├── .Rbuildignore
├── .github
    └── workflows
    │   ├── check-standard.yaml
    │   ├── pkgdown.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── .version.json
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── G.R
    ├── auc.R
    ├── calc_null_dev.R
    ├── cv-grpreg.R
    ├── cv-grpsurv.R
    ├── data.R
    ├── expand_spline.R
    ├── gBridge.R
    ├── gen_nonlinear_data.R
    ├── grpreg-package.R
    ├── grpreg.R
    ├── grpsurv.R
    ├── logLik.R
    ├── loss.R
    ├── multi.R
    ├── newS.R
    ├── newXG.R
    ├── newY.R
    ├── orthogonalize.R
    ├── plot-cv.R
    ├── plot-grpsurv-func.R
    ├── plot.R
    ├── plot_spline.R
    ├── predict-cv.R
    ├── predict-surv.R
    ├── predict.R
    ├── predict_spline.R
    ├── residuals.R
    ├── se-grpsurv.R
    ├── select.R
    ├── setupLambda.R
    ├── setupLambdaCox.R
    ├── standardize.R
    └── summary-cv-grpreg.R
├── README.md
├── data
    ├── Birthwt.RData
    ├── Lung.RData
    └── birthwt.grpreg.RData
├── grpreg.Rproj
├── inst
    ├── CITATION
    └── tinytest
    │   ├── additive-models.r
    │   ├── agreement.r
    │   ├── auc.r
    │   ├── binomial.r
    │   ├── coerce.r
    │   ├── cv-grpsurv.r
    │   ├── cv.r
    │   ├── extra-features.r
    │   ├── gBridge.r
    │   ├── gaussian.r
    │   ├── grpsurv.r
    │   ├── multitask.r
    │   ├── noiseless.r
    │   ├── poisson.r
    │   ├── select.r
    │   ├── standardization-ortho.r
    │   ├── surv-torture.r
    │   └── torture.r
├── man
    ├── AUC.cv.grpsurv.Rd
    ├── Birthwt.Rd
    ├── Lung.Rd
    ├── birthwt.grpreg.Rd
    ├── cv.grpreg.Rd
    ├── expand_spline.Rd
    ├── gBridge.Rd
    ├── gen_nonlinear_data.Rd
    ├── grpreg-package.Rd
    ├── grpreg.Rd
    ├── grpsurv.Rd
    ├── logLik.grpreg.Rd
    ├── plot.cv.grpreg.Rd
    ├── plot.grpreg.Rd
    ├── plot.grpsurv.func.Rd
    ├── plot_spline.Rd
    ├── predict.grpreg.Rd
    ├── predict.grpsurv.Rd
    ├── residuals.grpreg.Rd
    ├── select.Rd
    └── summary.cv.grpreg.Rd
├── pkgdown
    ├── _pkgdown.yml
    └── extra.css
├── src
    ├── gdfit_cox.c
    ├── gdfit_gaussian.c
    ├── gdfit_glm.c
    ├── grpreg_init.c
    ├── lcdfit_cox.c
    ├── lcdfit_gaussian.c
    ├── lcdfit_glm.c
    ├── maxprod.c
    └── standardize.c
├── tests
    └── test.R
└── vignettes
    ├── articles
        ├── _commands.tex
        ├── adaptive-rescaling.rmd
        ├── additive-models.rmd
        ├── models.rmd
        └── penalties.rmd
    ├── grpreg.rmd
    └── vignette.css


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^pkgdown$
2 | ^docs$
3 | ^index.Rmd$
4 | ^\.github$
5 | ^\.version\.json$
6 | ^.*\.Rproj$
7 | ^\.Rproj\.user$
8 | 


--------------------------------------------------------------------------------
/.github/workflows/check-standard.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches: [main, master]
 4 |   pull_request:
 5 |     branches: [main, master]
 6 | 
 7 | name: R-CMD-check
 8 | 
 9 | jobs:
10 |   R-CMD-check:
11 |     runs-on: ${{ matrix.config.os }}
12 | 
13 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
14 | 
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         config:
19 |           - {os: windows-latest, r: 'release'}
20 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
21 |           - {os: ubuntu-latest,   r: 'release'}
22 | 
23 |     env:
24 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
25 |       R_KEEP_PKG_SOURCE: yes
26 |       R_FORCE_TEST: true
27 | 
28 |     steps:
29 |       - uses: actions/checkout@v4
30 | 
31 |       - uses: r-lib/actions/setup-pandoc@v2
32 | 
33 |       - uses: r-lib/actions/setup-r@v2
34 |         with:
35 |           r-version: ${{ matrix.config.r }}
36 |           http-user-agent: ${{ matrix.config.http-user-agent }}
37 |           use-public-rspm: true
38 | 
39 |       - uses: r-lib/actions/setup-r-dependencies@v2
40 |         with:
41 |           extra-packages: |
42 |             any::rcmdcheck
43 |             any::glmnet
44 |             any::ncvreg
45 |             any::survival
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           args: '"--no-manual"'
50 | 
51 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches: [main, master]
 4 |   pull_request:
 5 |     branches: [main, master]
 6 |   release:
 7 |     types: [published]
 8 |   workflow_dispatch:
 9 | 
10 | name: pkgdown
11 | 
12 | jobs:
13 |   pkgdown:
14 |     runs-on: ubuntu-latest
15 |     # Only restrict concurrency for non-PR jobs
16 |     concurrency:
17 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
18 |     env:
19 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
20 |     permissions:
21 |       contents: write
22 |     steps:
23 |       - uses: actions/checkout@v4
24 | 
25 |       - uses: r-lib/actions/setup-pandoc@v2
26 | 
27 |       - uses: r-lib/actions/setup-r@v2
28 |         with:
29 |           use-public-rspm: true
30 | 
31 |       - uses: r-lib/actions/setup-r-dependencies@v2
32 |         with:
33 |           extra-packages: |
34 |             any::pkgdown
35 |             local::.
36 |           needs: website
37 | 
38 |       - name: Build site
39 |         run: options(yaml.eval.expr = TRUE); pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
40 |         shell: Rscript {0}
41 | 
42 |       - name: Deploy to GitHub pages 🚀
43 |         if: github.event_name != 'pull_request'
44 |         uses: JamesIves/github-pages-deploy-action@v4.5.0
45 |         with:
46 |           clean: false
47 |           branch: gh-pages
48 |           folder: docs
49 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches: [main, master]
 4 |   pull_request:
 5 |     branches: [main, master]
 6 | 
 7 | name: test-coverage
 8 | 
 9 | permissions: read-all
10 | 
11 | jobs:
12 |   test-coverage:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 |       R_FORCE_TEST: true
17 | 
18 |     steps:
19 |       - uses: actions/checkout@v4
20 | 
21 |       - uses: r-lib/actions/setup-r@v2
22 |         with:
23 |           use-public-rspm: true
24 | 
25 |       - uses: r-lib/actions/setup-r-dependencies@v2
26 |         with:
27 |           extra-packages: |
28 |             any::covr
29 |             any::glmnet
30 |             any::ncvreg
31 |             any::survival
32 |             any::tinytest
33 |             any::xml2
34 |           needs: coverage
35 | 
36 |       - name: Test coverage
37 |         run: |
38 |           cov <- covr::package_coverage(
39 |             quiet = FALSE,
40 |             clean = FALSE,
41 |             install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
42 |           )
43 |           print(cov)
44 |           covr::to_cobertura(cov)
45 |         shell: Rscript {0}
46 | 
47 |       - uses: codecov/codecov-action@v4
48 |         with:
49 |           # Fail if error if not on PR, or if on PR and token is given
50 |           fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
51 |           file: ./cobertura.xml
52 |           plugin: noop
53 |           disable_search: true
54 |           token: ${{ secrets.CODECOV_TOKEN }}
55 | 
56 |       - name: Upload test results
57 |         if: failure()
58 |         uses: actions/upload-artifact@v4
59 |         with:
60 |           name: coverage-test-failures
61 |           path: ${{ runner.temp }}/package
62 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | vignettes/*.html
2 | .Rproj.user
3 | .Rhistory
4 | docs
5 | 
6 | *.o
7 | *.so
8 | 


--------------------------------------------------------------------------------
/.version.json:
--------------------------------------------------------------------------------
1 | {
2 |   "schemaVersion": 1,
3 |   "label": "GitHub",
4 |   "message": "3.5.0.3",
5 |   "color": "blue"
6 | }
7 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: grpreg
 2 | Title: Regularization Paths for Regression Models with Grouped Covariates
 3 | Version: 3.5.0.3
 4 | Authors@R: c(
 5 |   person("Patrick", "Breheny", role=c("aut","cre"), email="patrick-breheny@uiowa.edu", comment=c(ORCID="0000-0002-0650-1119")),
 6 |   person("Yaohui", "Zeng", role="ctb"),
 7 |   person("Ryan", "Kurth", role="ctb"))
 8 | Description: Efficient algorithms for fitting the regularization path of linear
 9 |   regression, GLM, and Cox regression models with grouped penalties.  This
10 |   includes group selection methods such as group lasso, group MCP, and
11 |   group SCAD as well as bi-level selection methods such as the group
12 |   exponential lasso, the composite MCP, and the group bridge.  For more
13 |   information, see Breheny and Huang (2009) <doi:10.4310/sii.2009.v2.n3.a10>,
14 |   Huang, Breheny, and Ma (2012) <doi:10.1214/12-sts392>, Breheny and Huang
15 |   (2015) <doi:10.1007/s11222-013-9424-2>, and Breheny (2015)
16 |   <doi:10.1111/biom.12300>, or visit the package homepage
17 |   <https://pbreheny.github.io/grpreg/>.
18 | License: GPL-3
19 | URL: https://pbreheny.github.io/grpreg/, https://github.com/pbreheny/grpreg
20 | BugReports: https://github.com/pbreheny/grpreg/issues
21 | Depends: R (>= 3.1.0)
22 | Imports: Matrix
23 | Suggests: knitr, rmarkdown, splines, survival, tinytest
24 | VignetteBuilder: knitr
25 | Encoding: UTF-8
26 | Roxygen: list(markdown = TRUE)
27 | RoxygenNote: 7.3.2
28 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(AUC,cv.grpsurv)
 4 | S3method(coef,cv.grpreg)
 5 | S3method(coef,grpreg)
 6 | S3method(logLik,grpreg)
 7 | S3method(logLik,grpsurv)
 8 | S3method(plot,cv.grpreg)
 9 | S3method(plot,grpreg)
10 | S3method(plot,grpsurv.func)
11 | S3method(predict,cv.grpreg)
12 | S3method(predict,grpreg)
13 | S3method(predict,grpsurv)
14 | S3method(print,summary.cv.grpreg)
15 | S3method(residuals,grpreg)
16 | S3method(select,grpreg)
17 | S3method(summary,cv.grpreg)
18 | export(AUC)
19 | export(cv.grpreg)
20 | export(cv.grpsurv)
21 | export(expand_spline)
22 | export(gBridge)
23 | export(gen_nonlinear_data)
24 | export(grpreg)
25 | export(grpsurv)
26 | export(plot_spline)
27 | export(select)
28 | import(grDevices)
29 | import(graphics)
30 | import(stats)
31 | import(utils)
32 | importFrom(Matrix,bdiag)
33 | useDynLib(grpreg, .registration = TRUE)
34 | 


--------------------------------------------------------------------------------
/R/G.R:
--------------------------------------------------------------------------------
 1 | setupG <- function(group, m, bilevel) {
 2 |   gf <- factor(group)
 3 |   if (any(levels(gf)=='0')) {
 4 |     g <- as.integer(gf) - 1
 5 |     lev <- levels(gf)[levels(gf)!='0']
 6 |   } else {
 7 |     g <- as.integer(gf)
 8 |     lev <- levels(gf)
 9 |   }
10 |   if (is.numeric(group) | is.integer(group)) {
11 |     lev <- paste0("G", lev)
12 |   }
13 |   if (missing(m)) {
14 |     m <- rep(NA, length(lev))
15 |     names(m) <- lev
16 |   } else {
17 |     #if (all.equal(sort(names(m)), sort(group)))
18 |     TRY <- try(as.integer(group)==g)
19 |     if (inherits(TRY, 'try-error') || any(!TRY)) stop('Attempting to set group.multiplier is ambiguous if group is not a factor', call.=FALSE)
20 |     if (length(m) != length(lev)) stop("Length of group.multiplier must equal number of penalized groups", call.=FALSE)
21 |     if (storage.mode(m) != "double") storage.mode(m) <- "double"
22 |     if (any(m < 0)) stop('group.multiplier cannot be negative', call.=FALSE)
23 |   }
24 |   structure(g, levels=lev, m=m)
25 | }
26 | subsetG <- function(g, nz) {
27 |   lev <- attr(g, 'levels')
28 |   m <- attr(g, 'm')
29 |   new <- g[nz]
30 |   dropped <- setdiff(g, new)
31 |   if (length(dropped)) {
32 |     lev <- lev[-dropped]
33 |     m <- m[-dropped]
34 |     gf <- factor(new)
35 |     new <- as.integer(gf) - 1*any(levels(gf)=='0')
36 |   }
37 |   structure(new, levels=lev, m=m)
38 | }
39 | reorderG <- function(g, m, bilevel) {
40 |   og <- g
41 |   lev <- attr(g, 'levels')
42 |   m <- attr(g, 'm')
43 |   if (any(g==0)) {
44 |     g <- as.integer(relevel(factor(g), "0"))-1
45 |   }
46 |   if (any(order(g) != 1:length(g))) {
47 |     reorder <- TRUE
48 |     gf <- factor(g)
49 |     if (any(levels(gf)=="0")) {
50 |       gf <- relevel(gf, "0")
51 |       g <- as.integer(gf) - 1
52 |     } else {
53 |       g <- as.integer(gf)
54 |     }
55 |     ord <- order(g)
56 |     ord.inv <- match(1:length(g), ord)
57 |     g <- g[ord]
58 |   } else {
59 |     reorder <- FALSE
60 |     ord <- ord.inv <- NULL
61 |   }
62 |   structure(g, levels=lev, m=m, ord=ord, ord.inv=ord.inv, reorder=reorder)
63 | }
64 | 


--------------------------------------------------------------------------------
/R/auc.R:
--------------------------------------------------------------------------------
 1 | #' Calculates AUC for cv.grpsurv objects
 2 | #' 
 3 | #' Calculates the cross-validated AUC (concordance) from a "cv.grpsurv" object.
 4 | #' 
 5 | #' The area under the curve (AUC), or equivalently, the concordance statistic
 6 | #' (C), is calculated according to the procedure described in van Houwelingen
 7 | #' and Putter (2011). The function calls `survival::concordancefit()`, except
 8 | #' cross-validated linear predictors are used to guard against overfitting.
 9 | #' Thus, the values returned by `AUC.cv.grpsurv()` will be lower than those you
10 | #' would obtain with `concordancefit()` if you fit the full (unpenalized) model.
11 | #' 
12 | #' @aliases AUC
13 | #' 
14 | #' @param obj     A `cv.grpsurv` object. You must run `cv.grpsurv()` with the option `returnY=TRUE` in order for \code{AUC} to work.
15 | #' @param \dots   For S3 method compatibility.
16 | #' 
17 | #' @seealso [cv.grpsurv()], [survival::survConcordance()]
18 | #' 
19 | #' @references van Houwelingen H, Putter H (2011). *Dynamic Prediction in Clinical Survival Analysis*. CRC Press.
20 | #' 
21 | #' @examples
22 | #' \dontshow{set.seed(1)}
23 | #' data(Lung)
24 | #' X <- Lung$X
25 | #' y <- Lung$y
26 | #' group <- Lung$group
27 | #' 
28 | #' cvfit <- cv.grpsurv(X, y, group, returnY=TRUE)
29 | #' head(AUC(cvfit))
30 | #' ll <- log(cvfit$fit$lambda)
31 | #' plot(ll, AUC(cvfit), xlim=rev(range(ll)), lwd=3, type='l',
32 | #'      xlab=expression(log(lambda)), ylab='AUC', las=1)
33 | #' @export
34 | 
35 | AUC.cv.grpsurv <- function(obj, ...) {
36 |   if (!("Y" %in% names(obj))) stop("Must run cv.grpsurv with 'returnY=TRUE' in order to calculate AUC", call.=FALSE)
37 |   if (!requireNamespace("survival", quietly = TRUE)) {
38 |     stop("The 'survival' package is needed for AUC() to work. Please install it.", call. = FALSE)
39 |   }
40 |   if (utils::packageVersion("survival") < "3.2.10") stop("AUC.cv.grpsurv requires version 3.2.10 of 'survival' package or higher", call.=FALSE)
41 |   S <- survival::Surv(obj$fit$time, obj$fit$fail)
42 |   apply(obj$Y, 2, concord, y = S)
43 | }
44 | 
45 | #' @rdname AUC.cv.grpsurv
46 | #' @export
47 | 
48 | AUC <- function(obj, ...) UseMethod("AUC")
49 | 
50 | concord <- function(x, y) {
51 |   survival::concordancefit(y, -x)$concordance
52 | }
53 | 


--------------------------------------------------------------------------------
/R/calc_null_dev.R:
--------------------------------------------------------------------------------
1 | calc_null_dev <- function(X, y, group, family) {
2 |   form <- if (any(group==0)) formula(y~X[, group==0]) else formula(y~1)
3 |   fit <- glm(form, family=family)
4 |   mean(deviance_grpreg(y, predict(fit, type="response"), family))
5 | }
6 | 


--------------------------------------------------------------------------------
/R/cv-grpsurv.R:
--------------------------------------------------------------------------------
 1 | #' @rdname cv.grpreg
 2 | #' @export
 3 | 
 4 | cv.grpsurv <- function(X, y, group=1:ncol(X), ..., nfolds=10, seed, fold, se=c('quick', 'bootstrap'), returnY=FALSE, trace=FALSE) {
 5 |   se <- match.arg(se)
 6 | 
 7 |   # Complete data fit
 8 |   fit.args <- list(...)
 9 |   fit.args$X <- X
10 |   fit.args$y <- y
11 |   if (!inherits(X, "expandedMatrix")) fit.args$group <- group
12 |   fit.args$returnX <- TRUE
13 |   fit <- do.call("grpsurv", fit.args)
14 | 
15 |   # Get standardized X, y
16 |   X <- fit$XG$X
17 |   y <- cbind(fit$time, fit$fail)
18 |   returnX <- list(...)$returnX
19 |   if (is.null(returnX) || !returnX) fit$X <- NULL
20 | 
21 |   # Set up folds
22 |   n <- nrow(X)
23 |   if (!missing(seed)) {
24 |     original_seed <- .GlobalEnv$.Random.seed
25 |     on.exit(.GlobalEnv$.Random.seed <- original_seed)
26 |     set.seed(seed)
27 |   }
28 |   if (missing(fold)) {
29 |     ind1 <- which(fit$fail==1)
30 |     ind0 <- which(fit$fail==0)
31 |     n1 <- length(ind1)
32 |     n0 <- length(ind0)
33 |     fold1 <- 1:n1 %% nfolds
34 |     fold0 <- (n1 + 1:n0) %% nfolds
35 |     fold1[fold1==0] <- nfolds
36 |     fold0[fold0==0] <- nfolds
37 |     fold <- integer(n)
38 |     fold[fit$fail==1] <- sample(fold1)
39 |     fold[fit$fail==0] <- sample(fold0)
40 |   } else {
41 |     nfolds <- max(fold)
42 |   }
43 |   Y <- matrix(NA, nrow=n, ncol=length(fit$lambda))
44 | 
45 |   cv.args <- list(...)
46 |   cv.args$lambda <- fit$lambda
47 |   cv.args$group <- fit$XG$g
48 |   cv.args$group.multiplier <- fit$XG$m
49 |   cv.args$warn <- FALSE
50 | 
51 |   for (i in 1:nfolds) {
52 |     if (trace) cat("Starting CV fold #", i, sep="","\n")
53 |     res <- cvf.surv(i, X, y, fold, cv.args)
54 |     Y[fold==i, 1:res$nl] <- res$yhat
55 |   }
56 | 
57 |   # Eliminate saturated lambda values, if any
58 |   ind <- which(apply(is.finite(Y), 2, all))
59 |   Y <- Y[, ind]
60 |   lambda <- fit$lambda[ind]
61 | 
62 |   # Return
63 |   if (se == "quick") {
64 |     L <- deviance_grpsurv(y, Y, total=FALSE)
65 |     cve <- apply(L, 2, sum)/sum(fit$fail)
66 |     cvse <- apply(L, 2, sd)*sqrt(nrow(L))/sum(fit$fail)
67 |   } else {
68 |     cve <- as.double(deviance_grpsurv(y, Y))/sum(fit$fail)
69 |     cvse <- se_grpsurv(y, Y)/sum(fit$fail)
70 |   }
71 |   min <- which.min(cve)
72 | 
73 |   val <- list(cve=cve, cvse=cvse, fold=fold, lambda=lambda, fit=fit, min=min, lambda.min=lambda[min], null.dev=cve[1])
74 |   if (returnY) val$Y <- Y
75 |   structure(val, class=c("cv.grpsurv", "cv.grpreg"))
76 | }
77 | cvf.surv <- function(i, XX, y, fold, cv.args) {
78 |   cv.args$X <- XX[fold!=i, , drop=FALSE]
79 |   cv.args$y <- y[fold!=i,]
80 |   fit.i <- do.call("grpsurv", cv.args)
81 | 
82 |   X2 <- XX[fold==i, , drop=FALSE]
83 |   y2 <- y[fold==i,]
84 |   nl <- length(fit.i$lambda)
85 |   yhat <- predict(fit.i, X2)
86 | 
87 |   list(nl=length(fit.i$lambda), yhat=yhat)
88 | }
89 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
  1 | #' Risk Factors Associated with Low Infant Birth Weight
  2 | #' 
  3 | #' This version of the data set has been deprecated and will not be supported
  4 | #' in future versions.  Please use \code{\link{Birthwt}} instead.
  5 | #' 
  6 | #' @format This data frame contains the following columns:
  7 | #' \itemize{
  8 | #' \item\code{low} Indicator of birth weight less than 2.5kg \item\code{bwt}
  9 | #' Birth weight in kilograms \item\code{age1,age2,age3} Orthogonal polynomials
 10 | #' of first, second, and third degree representing mother's age in years
 11 | #' \item\code{lwt1,lwt2,lwt3} Orthogonal polynomials of first, second, and
 12 | #' third degree representing mother's weight in pounds at last menstrual period
 13 | #' \item\code{white,black} Indicator functions for mother's race; "other" is
 14 | #' reference group \item\code{smoke} smoking status during pregnancy
 15 | #' \item\code{ptl1,ptl2m} Indicator functions for one or for two or more
 16 | #' previous premature labors, respectively.  No previous premature labors is
 17 | #' the reference category.  \item\code{ht} History of hypertension
 18 | #' \item\code{ui} Presence of uterine irritability \item\code{ftv1,ftv2,ftv3m}
 19 | #' Indicator functions for one, for two, or for three or more physician visits
 20 | #' during the first trimester, respectively.  No visits is the reference
 21 | #' category.
 22 | #' }
 23 | #' @seealso \code{\link{Birthwt}}
 24 | "birthwt.grpreg"
 25 | 
 26 | 
 27 | #' Risk Factors Associated with Low Infant Birth Weight
 28 | #' 
 29 | #' The `Birthwt` data contains 189 observations, 16 predictors, and an
 30 | #' outcome, birthweight, available both as a continuous measure and a binary
 31 | #' indicator for low birth weight.The data were collected at Baystate Medical
 32 | #' Center, Springfield, Mass during 1986. This data frame is a
 33 | #' reparameterization of the `birthwt` data frame from the **MASS** package.
 34 | #' 
 35 | #' @format The \code{Birthwt} object is a list containing four elements (`X`, `bwt`, `low`, and `group`):
 36 | #' \describe{
 37 | #'   \item{bwt}{Birth weight in kilograms}
 38 | #'   \item{low}{Indicator of birth weight less than 2.5kg}
 39 | #'   \item{group}{Vector describing how the columns of X are grouped}
 40 | #'   \item{X}{A matrix with 189 observations (rows) and 16 predictor variables (columns).}
 41 | #' }
 42 | #' The matrix `X` contains the following columns:
 43 | #' \describe{
 44 | #'   \item{age1,age2,age3}{Orthogonal polynomials of first, second, and third degree representing mother's age in years}
 45 | #'   \item{lwt1,lwt2,lwt3}{Orthogonal polynomials of first, second, and third degree representing mother's weight in pounds at last menstrual period}
 46 | #'   \item{white,black}{Indicator functions for mother's race; "other" is reference group}
 47 | #'   \item{smoke}{Smoking status during pregnancy}
 48 | #'   \item{ptl1,ptl2m}{Indicator functions for one or for two or more previous premature labors, respectively. No previous premature labors is the reference category.}
 49 | #'   \item{ht}{History of hypertension}
 50 | #'   \item{ui}{Presence of uterine irritability}
 51 | #'   \item{ftv1,ftv2,ftv3m}{Indicator functions for one, for two, or for three or more physician visits during the first trimester, respectively. No visits is the reference category.}
 52 | #' }
 53 | #' 
 54 | #' @seealso [MASS::birthwt], [grpreg()]
 55 | #' 
 56 | #' @references
 57 | #' \itemize{
 58 | #'   \item Venables, W. N. and Ripley, B. D. (2002). *Modern Applied Statistics with S.* Fourth edition. Springer.
 59 | #'   \item Hosmer, D.W. and Lemeshow, S. (1989) *Applied Logistic Regression.* New York: Wiley
 60 | #' }
 61 | #' 
 62 | #' @source <https://cran.r-project.org/package=MASS>
 63 | #' 
 64 | #' @examples
 65 | #' data(Birthwt)
 66 | #' hist(Birthwt$bwt, xlab="Child's birth weight", main="")
 67 | #' table(Birthwt$low)
 68 | #' ## See examples in ?birthwt (MASS package)
 69 | #' ##   for more about the data set
 70 | #' ## See examples in ?grpreg for use of this data set
 71 | #' ##   with group penalized regression models
 72 | "Birthwt"
 73 | 
 74 | 
 75 | #' VA lung cancer data set
 76 | #' 
 77 | #' Data from a randomised trial of two treatment regimens for lung cancer. This
 78 | #' is a standard survival analysis data set from the classic textbook by
 79 | #' Kalbfleisch and Prentice.
 80 | #' 
 81 | #' @format A list of two objects: `y` and `X`
 82 | #' \describe{
 83 | #'   \item{y}{A two column matrix (`Surv` object) containing the follow-up
 84 | #'   time (in days) and an indicator variable for whether the patient died
 85 | #'   while on the study or not.}
 86 | #'   \item{X}{A matrix with 137 observations (rows) and 9 predictor variables
 87 | #'   (columns). The remainder of this list describes the columns of `X`}
 88 | #'   \item{trt}{Treatment indicator (1=control group, 2=treatment group)}
 89 | #'   \item{karno}{Karnofsky performance score (0=bad, 100=good)}
 90 | #'   \item{diagtime}{Time from diagnosis to randomization (months)}
 91 | #'   \item{age}{Age (years, at baseline)}
 92 | #'   \item{prior}{Prior therapy (0=no, 1=yes)}
 93 | #'   \item{squamous}{Indicator for whether the cancer type is squamous cell
 94 | #'   carcinoma (0=no, 1=yes)}
 95 | #'   \item{small}{Indicator for whether the cancer type is small cell lung
 96 | #'   cancer (0=no, 1=yes)}
 97 | #'   \item{adeno}{Indicator for whether the cancer type is adenocarcinoma
 98 | #'   (0=no, 1=yes)}
 99 | #'   \item{large}{Indicator for whether the cancer type is large cell carcinoma
100 | #'   (0=no, 1=yes)}
101 | #' }
102 | #' 
103 | #' @seealso `grpsurv()`
104 | #' @references
105 | #' \itemize{
106 | #'   \item Kalbfleisch D and Prentice RL (1980), *The Statistical Analysis of
107 | #'   Failure Time Data*. Wiley, New York.
108 | #' }
109 | #' @source \url{https://cran.r-project.org/package=survival}
110 | "Lung"
111 | 


--------------------------------------------------------------------------------
/R/expand_spline.R:
--------------------------------------------------------------------------------
 1 | #' Expand feature matrix using basis splines
 2 | #'
 3 | #' Performs a basis expansion for many features at once, returning output that is compatible
 4 | #' for use with the `grpreg()` function. Returns an expanded matrix along with a vector
 5 | #' that describes its grouping.
 6 | #'
 7 | #' `expand_spline()` uses the function [splines::bs()] or [splines::ns()] to generate a basis
 8 | #' matrix for each column of `x`. These matrices represent the spline basis for piecewise
 9 | #' polynomials with specified degree evaluated separately for each original column of `x`.
10 | #' These matrices are then column-bound to form a single grouped matrix of derived features. A vector
11 | #' that describes the grouping present in the resulting matrix is also generated. The resulting 
12 | #' object can be passed to [grpreg()].
13 | #' 
14 | #' This methodology was originally proposed by Ravikumar et al. (2009), who named it SPAM (SParse Additive Modeling).
15 | #'
16 | #' @param x        Features to be expanded (numeric matrix).
17 | #' @param df       Degrees of freedom (numeric; default = 3).
18 | #' @param degree   Degree of the piecewise polynomial (integer; default = 3 (cubic splines)).
19 | #' @param type     Type of spline, either B-spline (`"bs"`) or natural cubic spline (`"ns"`; default).
20 | #' 
21 | #' @return
22 | #' An object of class `expandedMatrix` consisting of:
23 | #' * `X`: A matrix of dimension `nrow(x)` by `df*ncol(x)`
24 | #' * `group`: A vector of length `df*ncol(x)` that describes the grouping structure
25 | #' * Additional metadata on the splines, such as knot locations, required in order to evaluate spline at new feature values (e.g., for prediction)
26 | #' 
27 | #' @references
28 | #'   * Ravikumar P, Lafferty J, Liu H and Wasserman L (2009). Sparse additive models. *Journal of the Royal Statistical Society Series B*, **71**: 1009-1030.
29 | #'   
30 | #' @seealso [plot_spline()] to visualize the resulting nonlinear fits
31 | #'
32 | #' @examples
33 | #' \dontshow{set.seed(1)}
34 | #' Data <- gen_nonlinear_data(n=1000)
35 | #' X <- expand_spline(Data$X)
36 | #' fit <- grpreg(X, Data$y)
37 | #' plot_spline(fit, "V02", lambda = 0.03)
38 | #' @export
39 | 
40 | expand_spline <- function(x, df = 3, degree = 3, type = c("ns", "bs")) {
41 |   type <- match.arg(type)
42 |   if(type == "ns" && degree != 3) {
43 |     warning("Degree has been set to 3 for natural splines")
44 |   }
45 |   n <- nrow(x)
46 |   p <- ncol(x)
47 |   finalx <- matrix(NA, n, (p*df))
48 |   knots <- rep(list(rep(NA, (df-degree))), p)
49 |   boundary <- rep(list(rep(NA, 2)), p)
50 |   
51 |   if(type == "bs"){
52 |     for(i in 0:(p-1)){
53 |       bs <- splines::bs(x[,i+1], df = df, degree = degree)
54 |       finalx[,(df*i+1):(df*i+df)] <- bs
55 |       boundary[[i+1]] <- attr(bs, "Boundary.knots")
56 |       knots[[i+1]] <- attr(bs, "knots")
57 |     }
58 |   }
59 |   else if (type == "ns") {
60 |     for (i in 0:(p-1)) {
61 |       ns <- splines::ns(x[,i+1], df = df)
62 |       finalx[,(df*i+1):(df*i+df)] <- ns
63 |       boundary[[i+1]] <- attr(ns, "Boundary.knots")
64 |       knots[[i+1]] <- attr(ns, "knots")
65 |     }
66 |   }
67 |   else {
68 |     stop(paste(type, "is not a valid type"), call. = FALSE)
69 |   }
70 |   
71 |   if (length(colnames(x)) == p) {
72 |     groups <- rep(colnames(x), each = df)
73 |     colnames(finalx) <- paste(groups, 1:df, sep='_')
74 |   }
75 |   else {
76 |     groups <- rep(paste0("V", 1:p), each = df)
77 |     colnames(finalx) <- paste(groups, 1:df, sep="_")
78 |   }
79 |   
80 |   return(structure(list(X = finalx, 
81 |                         group = groups, 
82 |                         knots = knots,
83 |                         boundary = boundary,
84 |                         degree = degree,
85 |                         originalx = x,
86 |                         type = type), class='expandedMatrix'))
87 | }
88 | 


--------------------------------------------------------------------------------
/R/gBridge.R:
--------------------------------------------------------------------------------
  1 | #' Fit a group bridge regression path
  2 | #' 
  3 | #' Fit regularization paths for linear and logistic group bridge-penalized
  4 | #' regression models over a grid of values for the regularization parameter
  5 | #' lambda.
  6 | #' 
  7 | #' This method fits the group bridge method of Huang et al. (2009).  Unlike the
  8 | #' penalties in \code{grpreg}, the group bridge is not differentiable at zero;
  9 | #' because of this, a number of changes must be made to the algorithm, which is
 10 | #' why it has its own function.  Most notably, the method is unable to start at
 11 | #' \code{lambda.max}; it must start at \code{lambda.min} and proceed in the
 12 | #' opposite direction.
 13 | #' 
 14 | #' In other respects, the usage and behavior of the function is similar to the
 15 | #' rest of the \code{grpreg} package.
 16 | #' 
 17 | #' @param X The design matrix, as in \code{grpreg}.
 18 | #' @param y The response vector (or matrix), as in \code{grpreg}.
 19 | #' @param group The grouping vector, as in \code{grpreg}.
 20 | #' @param family Either "gaussian" or "binomial", depending on the response.
 21 | #' @param nlambda The number of \code{lambda} values, as in \code{grpreg}.
 22 | #' @param lambda A user supplied sequence of `lambda values, as in `grpreg()`.
 23 | #' @param lambda.min The smallest value for \code{lambda}, as in \code{grpreg}.
 24 | #' @param lambda.max The maximum value for \code{lambda}.  Unlike the penalties
 25 | #' in \code{grpreg}, it is not possible to solve for \code{lambda.max} directly
 26 | #' with group bridge models.  Thus, it must be specified by the user.  If it is
 27 | #' not specified, \code{gBridge} will attempt to guess \code{lambda.max}, but
 28 | #' this is not particularly accurate.
 29 | #' @param alpha Tuning parameter for the balance between the group penalty and
 30 | #' the L2 penalty, as in \code{grpreg}.
 31 | #' @param eps Convergence threshhold, as in \code{grpreg}.
 32 | #' @param delta The group bridge penalty is not differentiable at zero, and
 33 | #' requires a small number \code{delta} to bound it away from zero.  There is
 34 | #' typically no need to change this value.
 35 | #' @param max.iter Maximum number of iterations, as in \code{grpreg}.
 36 | #' @param gamma Tuning parameter of the group bridge penalty (the exponent to
 37 | #' which the L1 norm of the coefficients in the group are raised).  Default is
 38 | #' 0.5, the square root.
 39 | #' @param group.multiplier The multiplicative factor by which each group's
 40 | #' penalty is to be multiplied, as in \code{grpreg}.
 41 | #' @param warn Should the function give a warning if it fails to converge?  As
 42 | #' in \code{grpreg}.
 43 | #' @param returnX Return the standardized design matrix (and associated group
 44 | #' structure information)?  Default is FALSE.
 45 | #' @param ... Not used.
 46 | #' 
 47 | #' @return An object with S3 class \code{"grpreg"}, as in \code{grpreg}.
 48 | #' 
 49 | #' @seealso [grpreg()]
 50 | #' 
 51 | #' @references
 52 | #' \itemize{
 53 | #' \item Huang J, Ma S, Xie H, and Zhang C. (2009) A group bridge approach for
 54 | #' variable selection. *Biometrika*, **96**: 339-355. \doi{10.1093/biomet/asp020}
 55 | #' 
 56 | #' \item Breheny P and Huang J. (2009) Penalized methods for bi-level variable
 57 | #' selection. *Statistics and its interface*, **2**: 369-380.
 58 | #' \doi{10.4310/sii.2009.v2.n3.a10}
 59 | #' }
 60 | #' 
 61 | #' @examples
 62 | #' data(Birthwt)
 63 | #' X <- Birthwt$X
 64 | #' group <- Birthwt$group
 65 | #' 
 66 | #' ## Linear regression
 67 | #' y <- Birthwt$bwt
 68 | #' fit <- gBridge(X, y, group, lambda.max=0.08)
 69 | #' plot(fit)
 70 | #' select(fit)$beta
 71 | #' 
 72 | #' ## Logistic regression
 73 | #' y <- Birthwt$low
 74 | #' fit <- gBridge(X, y, group, family="binomial", lambda.max=0.17)
 75 | #' plot(fit)
 76 | #' select(fit)$beta
 77 | #' @export
 78 | 
 79 | gBridge <- function(X, y, group=1:ncol(X), family=c("gaussian", "binomial", "poisson"), nlambda=100, lambda,
 80 |                     lambda.min={if (nrow(X) > ncol(X)) .001 else .05}, lambda.max, alpha=1, eps=.001, delta=1e-7,
 81 |                     max.iter=10000, gamma=0.5, group.multiplier, warn=TRUE, returnX=FALSE, ...) {
 82 |   # Error checking
 83 |   family <- match.arg(family)
 84 |   if (alpha > 1 | alpha <= 0) stop("alpha must be in (0, 1]", call.=FALSE)
 85 |   if (any(is.na(y)) | any(is.na(X))) stop("Missing data (NA's) detected.  Take actions (e.g., removing cases, removing features, imputation) to eliminate missing data before passing X and y to gBridge", call.=FALSE)
 86 |   if (length(group)!=ncol(X)) stop("group does not match X", call.=FALSE)
 87 |   if (delta <= 0) stop("Delta must be a positive number", call.=FALSE)
 88 | 
 89 |   # Construct XG, yy
 90 |   yy <- newY(y, family)
 91 |   m <- attr(yy, "m")
 92 |   XG <- newXG(X, group, group.multiplier, m, TRUE)
 93 |   if (nrow(XG$X) != length(yy)) stop("X and y do not have the same number of observations", call.=FALSE)
 94 | 
 95 |   # Set up lambda
 96 |   if (missing(lambda)) {
 97 |     lambda <- setupLambda.gBridge(XG$X, yy, XG$g, family, alpha, lambda.min, lambda.max, nlambda, gamma, XG$m)
 98 |   } else {
 99 |     nlambda <- length(lambda)
100 |   }
101 | 
102 |   # Fit
103 |   n <- length(yy)
104 |   p <- ncol(XG$X)
105 |   K <- as.integer(table(XG$g))
106 |   K0 <- as.integer(if (min(XG$g)==0) K[1] else 0)
107 |   K1 <- as.integer(if (min(XG$g)==0) cumsum(K) else c(0, cumsum(K)))
108 |   if (family=="gaussian") {
109 |     fit <- .Call("lcdfit_gaussian", XG$X, yy, "gBridge", K1, K0, lambda, alpha, eps, delta, gamma, 0, as.integer(max.iter), as.double(XG$m), as.integer(p), as.integer(max(XG$g)), as.integer(TRUE))
110 |     b <- rbind(mean(y), matrix(fit[[1]], nrow=p))
111 |     loss <- fit[[2]]
112 |     Eta <- matrix(fit[[3]], nrow=n) + mean(y)
113 |     df <- fit[[4]] + 1 # Intercept
114 |     iter <- fit[[5]]
115 |   } else {
116 |     fit <- .Call("lcdfit_glm", XG$X, yy, family, "gBridge", K1, K0, lambda, alpha, eps, delta, gamma, 0, as.integer(max.iter), as.double(XG$m), as.integer(p), as.integer(max(XG$g)), as.integer(warn), as.integer(TRUE))
117 |     b <- rbind(fit[[1]], matrix(fit[[2]], nrow=p))
118 |     loss <- fit[[3]]
119 |     Eta <- matrix(fit[[4]], nrow=n)
120 |     df <- fit[[5]]
121 |     iter <- fit[[6]]
122 |   }
123 | 
124 |   # Eliminate saturated lambda values, if any
125 |   ind <- !is.na(iter)
126 |   b <- b[, ind, drop=FALSE]
127 |   iter <- iter[ind]
128 |   lambda <- lambda[ind]
129 |   df <- df[ind]
130 |   loss <- loss[ind]
131 |   if (iter[1] == max.iter) stop("Algorithm failed to converge for any values of lambda.  This indicates a combination of (a) an ill-conditioned feature matrix X and (b) insufficient penalization.  You must fix one or the other for your model to be identifiable.", call.=FALSE)
132 |   if (warn & any(iter==max.iter)) warning("Algorithm failed to converge for all values of lambda", call.=FALSE)
133 | 
134 |   # Unstandardize
135 |   if (XG$reorder) b[-1,] <- b[1+XG$ord.inv,]
136 |   beta <- unstandardize(b, XG)
137 | 
138 |   # Names
139 |   varnames <- c("(Intercept)", XG$names)
140 |   if (m > 1) {
141 |     beta[2:m,] <- sweep(beta[2:m, , drop=FALSE], 2, beta[1,], FUN="+")
142 |     beta <- array(beta, dim=c(m, nrow(beta)/m, ncol(beta)))
143 |     group <- group[-(1:(m-1))]
144 |     dimnames(beta) <- list(colnames(yy), varnames, round(lambda, digits=4))
145 |   } else {
146 |     dimnames(beta) <- list(varnames, round(lambda, digits=4))
147 |   }
148 | 
149 |   val <- structure(list(beta = beta,
150 |                         family = family,
151 |                         group = group,
152 |                         lambda = lambda,
153 |                         alpha = alpha,
154 |                         deviance = 2 * loss,
155 |                         n = n,
156 |                         penalty = "gBridge",
157 |                         df = df,
158 |                         iter = iter,
159 |                         group.multiplier = XG$m),
160 |                    class = "grpreg")
161 |   if (returnX) {
162 |     val$XG = XG
163 |     val$y = yy
164 |   } else if (family=="poisson") {
165 |     val$y <- y
166 |   }
167 |   val
168 | }
169 | 


--------------------------------------------------------------------------------
/R/gen_nonlinear_data.R:
--------------------------------------------------------------------------------
 1 | #' Generate nonlinear example data
 2 | #' 
 3 | #' Mainly intended to demonstrate the use of basis expansion models for sparse additive modeling; intended for use with [expand_spline()].
 4 | #' 
 5 | #' @param n      Sample size (numeric; default = 100).
 6 | #' @param p      Number of features (numeric; default = 16).
 7 | #' @param seed   Set to get different random data sets, passed to [set.seed()]
 8 | #' 
 9 | #' @examples
10 | #' Data <- gen_nonlinear_data()
11 | #' @export
12 | 
13 | gen_nonlinear_data <- function(n=100, p=16, seed) {
14 |   if (!missing(seed)) {
15 |     original_seed <- .GlobalEnv$.Random.seed
16 |     on.exit(.GlobalEnv$.Random.seed <- original_seed)
17 |     set.seed(seed)
18 |   }
19 |   if (!(is.numeric(p) && p >= 6)) stop('p must be at least 6', call.=FALSE)
20 |   X <- matrix(runif(n*p), nrow=n, ncol=p)
21 |   w <- floor(log10(p)) + 1
22 |   colnames(X) <- sprintf(paste0('V%0', w, 'd'), 1:p)
23 |   f <- list(
24 |     function(x){2*(exp(-10*x)-exp(-10))/(1-exp(-10)) - 1},
25 |     function(x){-2*(exp(-10*x)-exp(-10))/(1-exp(-10)) + 1},
26 |     function(x){2*x-1},
27 |     function(x){-2*x+1},
28 |     function(x){8*(x-0.5)^2 - 1},
29 |     function(x){-8*(x-0.5)^2 + 1})
30 |   eta <- matrix(NA, nrow=n, ncol=6)
31 |   for (j in 1:6) eta[,j] <- f[[j]](X[,j])
32 |   mu <- 5 + apply(eta, 1, sum)
33 |   y <- rnorm(n, mean=mu, sd=0.25)
34 |   list(X=X, y=y, mu=mu)
35 | }
36 | 


--------------------------------------------------------------------------------
/R/grpreg-package.R:
--------------------------------------------------------------------------------
 1 | #' @keywords internal 
 2 | #' @aliases grpreg-package NULL
 3 | #' @author Patrick Breheny
 4 | #' 
 5 | #' @references
 6 | #' \itemize{
 7 | #' \item Yuan M and Lin Y. (2006) Model selection and estimation in regression
 8 | #' with grouped variables. *Journal of the Royal Statistical Society Series B*,
 9 | #' **68**: 49-67. \doi{10.1111/j.1467-9868.2005.00532.x}
10 | #' 
11 | #' \item Huang J, Ma S, Xie H, and Zhang C. (2009) A group bridge approach for
12 | #' variable selection. *Biometrika*, **96**: 339-355. \doi{10.1093/biomet/asp020}
13 | #' 
14 | #' \item Breheny P and Huang J. (2009) Penalized methods for bi-level variable
15 | #' selection. *Statistics and its interface*, **2**: 369-380.
16 | #' \doi{10.4310/sii.2009.v2.n3.a10}
17 | #' 
18 | #' \item Huang J, Breheny P, and Ma S. (2012). A selective review of group
19 | #' selection in high dimensional models. *Statistical Science*, **27**: 481-499.
20 | #' \doi{10.1214/12-sts392}
21 | #' 
22 | #' \item Breheny P and Huang J. (2015) Group descent algorithms for nonconvex
23 | #' penalized linear and logistic regression models with grouped predictors.
24 | #' *Statistics and Computing*, **25**: 173-187. \doi{10.1007/s11222-013-9424-2}
25 | #' 
26 | #' \item Breheny P. (2015) The group exponential lasso for bi-level variable
27 | #' selection. *Biometrics*, **71**: 731-740. \doi{10.1111/biom.12300}
28 | #' }
29 | #' 
30 | #' @examples
31 | #' \donttest{vignette("getting-started", package="grpreg")}
32 | "_PACKAGE"
33 | 
34 | #' @useDynLib grpreg, .registration = TRUE
35 | #' @import stats
36 | #' @import graphics
37 | #' @import grDevices
38 | #' @import utils
39 | #' @importFrom Matrix bdiag
40 | NULL
41 | 


--------------------------------------------------------------------------------
/R/logLik.R:
--------------------------------------------------------------------------------
 1 | #' logLik method for grpreg
 2 | #' 
 3 | #' Calculates the log likelihood and degrees of freedom for a fitted grpreg
 4 | #' object.
 5 | #' 
 6 | #' Exists mainly for use with [stats::AIC()] and [stats::BIC()].
 7 | #' 
 8 | #' @aliases logLik logLik.grpreg
 9 | #' 
10 | #' @param object A fitted `grpreg` or `grpsurv` object, as obtained from
11 | #' [grpreg()] or [grpsurv()]
12 | #' @param df.method How should effective model parameters be calculated? One
13 | #' of: `"active"`, which counts the number of nonzero coefficients; or
14 | #' `"default"`, which uses the calculated `df` returned by
15 | #' `grpreg`. Default is `"default"`.
16 | #' @param REML Use restricted MLE for estimation of the scale parameter in a
17 | #' gaussian model?  Default is FALSE.
18 | #' @param ... For S3 method compatibility.
19 | #' 
20 | #' @returns Returns an object of class 'logLik', in this case consisting of a
21 | #' number (or vector of numbers) with two attributes: 'df' (the estimated
22 | #' degrees of freedom in the model) and 'nobs' (number of observations).
23 | #' 
24 | #' The 'print' method for 'logLik' objects is not intended to handle vectors;
25 | #' consequently, the value of the function does not necessarily display
26 | #' correctly.  However, it works with 'AIC' and 'BIC' without any glitches and
27 | #' returns the expected vectorized output.
28 | #' 
29 | #' @author Patrick Breheny
30 | #' @seealso [grpreg()]
31 | #' 
32 | #' @examples
33 | #' data(Birthwt)
34 | #' X <- Birthwt$X
35 | #' y <- Birthwt$bwt
36 | #' group <- Birthwt$group
37 | #' fit <- grpreg(X,y,group,penalty="cMCP")
38 | #' logLik(fit) ## Display is glitchy for vectors
39 | #' AIC(fit)
40 | #' BIC(fit)
41 | #' @export
42 | 
43 | logLik.grpreg <- function(object, df.method=c("default","active"), REML=FALSE, ...) {
44 |   df.method <- match.arg(df.method)
45 |   n <- as.integer(object$n)
46 |   df <- if (df.method=="active") apply(coef(object)!=0, 2, sum) else object$df
47 |   if (object$family=="gaussian") {
48 |     rdf <- if (REML) n-df else n
49 |     RSS <- object$deviance
50 |     l <- -n/2 * (log(2*pi) + log(RSS) - log(rdf)) - rdf/2
51 |     df <- df + 1
52 |   } else if (object$family=='poisson') {
53 |     y <- object$y
54 |     ind <- y != 0
55 |     l <- -object$deviance/2 + sum(y[ind]*log(y[ind])) - sum(y) - sum(lfactorial(y))
56 |   } else {
57 |     l <- -object$deviance/2
58 |   }
59 |   structure(l, df=df, nobs=n, class='logLik')
60 | }
61 | 
62 | #' @rdname logLik.grpreg
63 | #' @export
64 | 
65 | logLik.grpsurv <- function(object, df.method=c("default","active"), ...) {
66 |   df.method <- match.arg(df.method)
67 |   n <- as.integer(object$n)
68 |   df <- if (df.method=="active") apply(coef(object)!=0, 2, sum) else object$df
69 |   structure(-object$deviance/2, df=df, nobs=n, class='logLik')
70 | }
71 | 


--------------------------------------------------------------------------------
/R/loss.R:
--------------------------------------------------------------------------------
 1 | deviance_grpreg <- function(y, yhat, family) {
 2 |   n <- length(y)
 3 |   if (family=="gaussian") {
 4 |     val <- (y - yhat)^2
 5 |   } else if (family=="binomial") {
 6 |     yhat[yhat < 0.00001] <- 0.00001
 7 |     yhat[yhat > 0.99999] <- 0.99999
 8 |     if (is.matrix(yhat)) {
 9 |       val <- matrix(NA, nrow=nrow(yhat), ncol=ncol(yhat))
10 |       if (sum(y==1)) val[y==1,] <- -2*log(yhat[y==1, , drop=FALSE])
11 |       if (sum(y==0)) val[y==0,] <- -2*log(1-yhat[y==0, , drop=FALSE])
12 |     } else {
13 |       val <- double(length(y))
14 |       if (sum(y==1)) val[y==1] <- -2*log(yhat[y==1])
15 |       if (sum(y==0)) val[y==0] <- -2*log(1-yhat[y==0])
16 |     }
17 |   } else if (family=="poisson") {
18 |     yly <- y*log(y)
19 |     yly[y==0] <- 0
20 |     val <- 2*(yly - y + yhat - y*log(yhat))
21 |   }
22 |   val
23 | }
24 | deviance_grpsurv <- function(y, eta, total=TRUE) {
25 |   ind <- order(y[,1])
26 |   d <- as.integer(y[ind,2])
27 |   if (is.matrix(eta)) {
28 |     eta <- eta[ind, , drop=FALSE]
29 |     r <- apply(eta, 2, function(x) rev(cumsum(rev(exp(x)))))
30 |   } else {
31 |     eta <- as.matrix(eta[ind])
32 |     r <- as.matrix(rev(cumsum(rev(exp(eta)))))
33 |   }
34 |   if (total) {
35 |     return(-2*(crossprod(d, eta) - crossprod(d, log(r))))
36 |   } else {
37 |     return(-2*(eta[d==1, , drop=FALSE] - log(r)[d==1, , drop=FALSE]))
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/R/multi.R:
--------------------------------------------------------------------------------
 1 | multiX <- function(X, m) {
 2 |   p <- ncol(X)
 3 |   n <- nrow(X)
 4 |   A <- matrix(0, m*n, m*p)
 5 |   for (i in 1:m) {
 6 |     A[m*(1:n)-i+1, m*(1:p)-i+1] <- X
 7 |   }
 8 |   cbind(matrix(as.double(diag(m)), m*n, m, byrow=TRUE)[,2:m], A)
 9 | }
10 | multiG <- function(g, ncolY) {
11 |   structure(c(rep(0, ncolY-1), rep(g, each=ncolY)),
12 |             levels=attr(g, 'levels'),
13 |             m=attr(g, 'm'))
14 | }
15 | 


--------------------------------------------------------------------------------
/R/newS.R:
--------------------------------------------------------------------------------
1 | newS <- function(y) {
2 |   ind <- order(y[,1])
3 |   list(time = as.double(y[ind,1]),
4 |        fail = as.double(y[ind,2]),
5 |        ind  = ind)
6 | }
7 | 


--------------------------------------------------------------------------------
/R/newXG.R:
--------------------------------------------------------------------------------
 1 | newXG <- function(X, g, m, ncolY, bilevel) {
 2 |   # Coerce X to matrix
 3 |   if (!inherits(X, "matrix")) {
 4 |     tmp <- try(X <- model.matrix(~0+., data=X), silent=TRUE)
 5 |     if (inherits(tmp, "try-error")) stop("X must be a matrix or able to be coerced to a matrix", call.=FALSE)
 6 |   }
 7 |   if (storage.mode(X)=="integer") storage.mode(X) <- "double"
 8 |   if (any(is.na(X))) stop("Missing data (NA's) detected in X.  You must eliminate missing data (e.g., by removing cases, removing features, or imputation) before passing X to grpreg", call.=FALSE)
 9 |   if (length(g) != ncol(X)) stop ("Dimensions of group is not compatible with X", call.=FALSE)
10 |   xnames <- if (is.null(colnames(X))) paste("V", 1:ncol(X), sep="") else colnames(X)
11 | 
12 |   # Setup group
13 |   G <- setupG(g, m, bilevel)
14 | 
15 |   # Reconfigure for multiple outcomes, if necessary
16 |   if (ncolY > 1) {
17 |     X <- multiX(X, ncolY)
18 |     G <- multiG(G, ncolY)
19 |   }
20 | 
21 |   # Feature-level standardization
22 |   std <- .Call("standardize", X)
23 |   XX <- std[[1]]
24 |   center <- std[[2]]
25 |   scale <- std[[3]]
26 |   nz <- which(scale > 1e-6)                # non-constant columns
27 |   if (length(nz) != ncol(X)) {
28 |     XX <- XX[, nz, drop=FALSE]
29 |     G <- subsetG(G, nz)
30 |   }
31 | 
32 |   # Reorder groups, if necessary
33 |   G <- reorderG(G, attr(G, 'm'), bilevel)
34 |   if (attr(G, 'reorder')) XX <- XX[, attr(G, 'ord')]
35 | 
36 |   # Group-level standardization
37 |   if (!bilevel) {
38 |     XX <- orthogonalize(XX, G)
39 |     g <- attr(XX, "group")
40 |   } else {
41 |     g <- as.integer(G)
42 |   }
43 | 
44 |   # Set group multiplier if missing
45 |   m <- attr(G, 'm')
46 |   if (all(is.na(m))) {
47 |     m <- if (bilevel) rep(1, max(g)) else sqrt(table(g[g!=0]))
48 |   }
49 | 
50 |   # Return
51 |   return(list(X=XX, g=g, m=m, reorder=attr(G, 'reorder'), ord.inv=attr(G, 'ord.inv'), names=xnames,
52 |               center=center, scale=scale, nz=nz))
53 | }
54 | 


--------------------------------------------------------------------------------
/R/newY.R:
--------------------------------------------------------------------------------
 1 | newY <- function(y, family) {
 2 |   if (is.data.frame(y)) y <- as.matrix(y)
 3 |   if (is.matrix(y)) {
 4 |     d <- dim(y)
 5 |     y <- t(y)
 6 |   } else {
 7 |     d <- c(length(y), 1)
 8 |   }
 9 | 
10 |   # Convert fuzzy binomial data
11 |   if (family=="binomial" && typeof(y) != "logical") {
12 |     tab <- table(y)
13 |     if (length(tab) > 2) stop("Attemping to use family='binomial' with non-binary data", call.=FALSE)
14 |     if (!identical(names(tab), c("0", "1"))) {
15 |       message(paste0("Logistic regression modeling Pr(y=", names(tab)[2], ")"))
16 |       y <- as.double(as.character(y) == names(tab)[2])
17 |       if (d[2] > 1) attr(y, "dim") <- d
18 |     }
19 |   }
20 | 
21 |   # Convert to double, if necessary
22 |   if (typeof(y) != "double") {
23 |     tryCatch(storage.mode(y) <- "double", warning=function(w) {stop("y must be numeric or able to be coerced to numeric", call.=FALSE)})
24 |   }
25 |   if (any(is.na(y))) stop("Missing data (NA's) detected in outcome y.  You must eliminate missing data (e.g., by removing cases or imputation) before passing y to grpreg", call.=FALSE)
26 | 
27 |   # Handle multi
28 |   if (is.matrix(y)) {
29 |     if (ncol(y) > 1) {
30 |       if (is.null(colnames(y))) paste("Y", 1:ncol(y), sep="")
31 |     }
32 |     attributes(y) <- NULL
33 |   }
34 | 
35 |   if (family=="gaussian") {
36 |     meanY <- mean(y)
37 |     y <- y - meanY
38 |     attr(y, "mean") <- meanY
39 |   }
40 |   attr(y, "m") <- d[2]
41 |   y
42 | }
43 | 


--------------------------------------------------------------------------------
/R/orthogonalize.R:
--------------------------------------------------------------------------------
 1 | orthogonalize <- function(X, group) {
 2 |   n <- nrow(X)
 3 |   J <- max(group)
 4 |   T <- vector("list", J)
 5 |   XX <- matrix(0, nrow=nrow(X), ncol=ncol(X))
 6 |   XX[, which(group==0)] <- X[, which(group==0)]
 7 |   for (j in seq_along(integer(J))) {
 8 |     ind <- which(group==j)
 9 |     if (length(ind)==0) next
10 |     SVD <- svd(X[, ind, drop=FALSE], nu=0)
11 |     r <- which(SVD$d > 1e-10)
12 |     T[[j]] <- sweep(SVD$v[, r, drop=FALSE], 2, sqrt(n)/SVD$d[r], "*")
13 |     XX[, ind[r]] <- X[, ind] %*% T[[j]]
14 |   }
15 |   nz <- !apply(XX==0, 2, all)
16 |   XX <- XX[, nz, drop=FALSE]
17 |   attr(XX, "T") <- T
18 |   attr(XX, "group") <- group[nz]
19 |   XX
20 | }
21 | unorthogonalize <- function(b, XX, group, intercept=TRUE) {
22 |   ind <- !sapply(attr(XX, "T"), is.null)
23 |   T <- bdiag(attr(XX, "T")[ind])
24 |   if (intercept) {
25 |     ind0 <- c(1, 1+which(group==0))
26 |     val <- Matrix::as.matrix(rbind(b[ind0, , drop=FALSE], T %*% b[-ind0, , drop=FALSE]))
27 |   } else if (sum(group==0)) {
28 |     ind0 <- which(group==0)
29 |     val <- Matrix::as.matrix(rbind(b[ind0, , drop=FALSE], T %*% b[-ind0, , drop=FALSE]))
30 |   } else {
31 |     val <- as.matrix(T %*% b)
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/R/plot-cv.R:
--------------------------------------------------------------------------------
  1 | #' Plots the cross-validation curve from a \code{cv.grpreg} object
  2 | #' 
  3 | #' Plots the cross-validation curve from a \code{cv.grpreg} object, along with
  4 | #' standard error bars.
  5 | #' 
  6 | #' Error bars representing approximate +/- 1 SE (68\% confidence intervals) are
  7 | #' plotted along with the estimates at value of \code{lambda}.  For \code{rsq}
  8 | #' and \code{snr}, these confidence intervals are quite crude, especially near
  9 | #' zero, and will hopefully be improved upon in later versions of
 10 | #' \code{grpreg}.
 11 | #' 
 12 | #' @param x A \code{cv.grpreg} object.
 13 | #' @param log.l Should horizontal axis be on the log scale?  Default is TRUE.
 14 | #' @param type What to plot on the vertical axis.  \code{cve} plots the
 15 | #' cross-validation error (deviance); \code{rsq} plots an estimate of the
 16 | #' fraction of the deviance explained by the model (R-squared); \code{snr}
 17 | #' plots an estimate of the signal-to-noise ratio; \code{scale} plots, for
 18 | #' \code{family="gaussian"}, an estimate of the scale parameter (standard
 19 | #' deviation); \code{pred} plots, for \code{family="binomial"}, the estimated
 20 | #' prediction error; \code{all} produces all of the above.
 21 | #' @param selected If \code{TRUE} (the default), places an axis on top of the
 22 | #' plot denoting the number of groups in the model (i.e., that contain a
 23 | #' nonzero regression coefficient) at that value of \code{lambda}.
 24 | #' @param vertical.line If \code{TRUE} (the default), draws a vertical line at
 25 | #' the value where cross-validaton error is minimized.
 26 | #' @param col Controls the color of the dots (CV estimates).
 27 | #' @param \dots Other graphical parameters to \code{plot}
 28 | #' 
 29 | #' @seealso [grpreg()], [cv.grpreg()]
 30 | #' 
 31 | #' @examples
 32 | #' \dontshow{set.seed(1)}
 33 | #' # Birthweight data
 34 | #' data(Birthwt)
 35 | #' X <- Birthwt$X
 36 | #' group <- Birthwt$group
 37 | #' 
 38 | #' # Linear regression
 39 | #' y <- Birthwt$bwt
 40 | #' cvfit <- cv.grpreg(X, y, group)
 41 | #' plot(cvfit)
 42 | #' op <- par(mfrow=c(2,2))
 43 | #' plot(cvfit, type="all")
 44 | #' 
 45 | #' ## Logistic regression
 46 | #' y <- Birthwt$low
 47 | #' cvfit <- cv.grpreg(X, y, group, family="binomial")
 48 | #' par(op)
 49 | #' plot(cvfit)
 50 | #' par(mfrow=c(2,2))
 51 | #' plot(cvfit, type="all")
 52 | #' @export
 53 | 
 54 | plot.cv.grpreg <- function(x, log.l=TRUE, type=c("cve", "rsq", "scale", "snr", "pred", "all"), selected=TRUE, vertical.line=TRUE, col="red", ...) {
 55 |   type <- match.arg(type)
 56 |   if (type=="all") {
 57 |     plot(x, log.l=log.l, type="cve", selected=selected, ...)
 58 |     plot(x, log.l=log.l, type="rsq", selected=selected, ...)
 59 |     plot(x, log.l=log.l, type="snr", selected=selected, ...)
 60 |     if (length(x$fit$family)) {
 61 |       if (x$fit$family == "binomial") plot(x, log.l=log.l, type="pred", selected=selected, ...)
 62 |       if (x$fit$family == "gaussian") plot(x, log.l=log.l, type="scale", selected=selected, ...)
 63 |     }
 64 |     return(invisible(NULL))
 65 |   }
 66 |   l <- x$lambda
 67 |   if (log.l) {
 68 |     l <- log(l)
 69 |     xlab <- expression(log(lambda))
 70 |   } else xlab <- expression(lambda)
 71 | 
 72 |   ## Calculate y
 73 |   L.cve <- x$cve - x$cvse
 74 |   U.cve <- x$cve + x$cvse
 75 |   if (type=="cve") {
 76 |     y <- x$cve
 77 |     L <- L.cve
 78 |     U <- U.cve
 79 |     ylab <- "Cross-validation error"
 80 |   } else if (type=="rsq" | type == "snr") {
 81 |     if (length(x$fit$family) && x$fit$family=='gaussian') {
 82 |       rsq <- pmin(pmax(1 - x$cve/x$null.dev, 0), 1)
 83 |       rsql <- pmin(pmax(1 - U.cve/x$null.dev, 0), 1)
 84 |       rsqu <- pmin(pmax(1 - L.cve/x$null.dev, 0), 1)
 85 |     } else {
 86 |       rsq <- pmin(pmax(1 - exp(x$cve-x$null.dev), 0), 1)
 87 |       rsql <- pmin(pmax(1 - exp(U.cve-x$null.dev), 0), 1)
 88 |       rsqu <- pmin(pmax(1 - exp(L.cve-x$null.dev), 0), 1)
 89 |     }
 90 |     if (type == "rsq") {
 91 |       y <- rsq
 92 |       L <- rsql
 93 |       U <- rsqu
 94 |       ylab <- ~R^2
 95 |     } else if(type=="snr") {
 96 |       y <- pmin(rsq/(1-rsq), 1e6)
 97 |       L <- pmin(rsql/(1-rsql), 1e6)
 98 |       U <- pmin(rsqu/(1-rsqu), 1e6)
 99 |       if (max(c(y,L,U)) == 1e6) warning('Signal-to-noise ratio is infinite')
100 |       ylab <- "Signal-to-noise ratio"
101 |     }
102 |   } else if (type=="scale") {
103 |     if (x$fit$family == "binomial") stop("Scale parameter for binomial family fixed at 1", call.=FALSE)
104 |     y <- sqrt(x$cve)
105 |     L <- sqrt(L.cve)
106 |     U <- sqrt(U.cve)
107 |     ylab <- ~hat(sigma)
108 |   } else if (type=="pred") {
109 |     y <- x$pe
110 |     n <- x$fit$n
111 |     CI <- sapply(y, function(x) {binom.test(x*n, n, conf.level=0.68)$conf.int})
112 |     L <- CI[1,]
113 |     U <- CI[2,]
114 |     ylab <- "Prediction error"
115 |   }
116 | 
117 |   ind <- if (type=="pred") is.finite(l[1:length(x$pe)]) else is.finite(l[1:length(x$cve)])
118 |   ylim <- range(c(L[ind], U[ind]))
119 |   aind <- ((U-L)/diff(ylim) > 1e-3) & ind
120 |   plot.args = list(x=l[ind], y=y[ind], ylim=ylim, xlab=xlab, ylab=ylab, type="n", xlim=rev(range(l[ind])), las=1, bty="n")
121 |   new.args = list(...)
122 |   if (length(new.args)) plot.args[names(new.args)] = new.args
123 |   do.call("plot", plot.args)
124 |   if (vertical.line) abline(v=l[x$min], lty=2, lwd=.5)
125 |   suppressWarnings(arrows(x0=l[aind], x1=l[aind], y0=L[aind], y1=U[aind], code=3, angle=90, col="gray80", length=.05))
126 |   points(l[ind], y[ind], col=col, pch=19, cex=.5)
127 |   if (selected) {
128 |     n.s <- sapply(predict(x$fit, lambda=x$lambda, type="groups"), length)
129 |     axis(3, at=l, labels=n.s, tick=FALSE, line=-0.5)
130 |     mtext("Groups selected", cex=0.8, line=1.5)
131 |   }
132 | }
133 | 


--------------------------------------------------------------------------------
/R/plot-grpsurv-func.R:
--------------------------------------------------------------------------------
 1 | #' Plot survival curve for grpsurv model
 2 | #' 
 3 | #' Plot survival curve for a model that has been fit using \code{grpsurv}
 4 | #' followed by a prediction of the survival function using
 5 | #' \code{predict.grpsurv}
 6 | #' 
 7 | #' 
 8 | #' @param x A \code{'grpsurv.func'} object, which is returned by
 9 | #' \code{predict.grpsurv} if \code{type='survival'} is specified.  See
10 | #' examples.
11 | #' @param alpha Controls alpha-blending (i.e., transparency).  Useful if many
12 | #' overlapping lines are present.
13 | #' @param \dots Other graphical parameters to pass to \code{plot}
14 | #' @author Patrick Breheny
15 | #' @seealso \code{\link{grpsurv}}, \code{\link{predict.grpsurv}}
16 | #' @examples
17 | #' 
18 | #' data(Lung)
19 | #' X <- Lung$X
20 | #' y <- Lung$y
21 | #' group <- Lung$group
22 | #' fit <- grpsurv(X, y, group)
23 | #' 
24 | #' # A single survival curve
25 | #' S <- predict(fit, X[1,], type='survival', lambda=.05)
26 | #' plot(S, xlim=c(0,200))
27 | #' 
28 | #' # Lots of survival curves
29 | #' S <- predict(fit, X, type='survival', lambda=.05)
30 | #' plot(S, xlim=c(0,200), alpha=0.3)
31 | #' @export
32 | 
33 | plot.grpsurv.func <- function(x, alpha=1, ...) {
34 |   time <- attr(x, 'time')
35 |   if (length(x) > 1) {
36 |     Y <- sapply(x, function(f) f(time))
37 |     n <- ncol(Y)
38 |   } else {
39 |     Y <- x(time)
40 |     n <- 1
41 |   }
42 | 
43 |   plot.args <- list(x=1, y=1, xlim=range(time), ylim=range(Y), xlab='Time', ylab='Pr(Survival)', type="n", las=1, bty="n")
44 |   new.args <- list(...)
45 |   if (length(new.args)) plot.args[names(new.args)] <- new.args
46 |   do.call("plot", plot.args)
47 | 
48 |   cols <- hcl(h=seq(15, 375, len=max(4, n+1)), l=60, c=150, alpha=alpha)
49 |   cols <- if (n==2) cols[c(1,3)] else cols[1:n]
50 |   line.args <- list(col=cols, lwd=1+2*exp(-n/20), lty=1, type='s')
51 |   if (length(new.args)) line.args[names(new.args)] <- new.args
52 |   line.args$x <- time
53 |   line.args$y <- Y
54 |   do.call("matlines", line.args)
55 | }
56 | 


--------------------------------------------------------------------------------
/R/plot.R:
--------------------------------------------------------------------------------
  1 | #' Plot coefficients from a "grpreg" object
  2 | #' 
  3 | #' Produces a plot of the coefficient paths for a fitted \code{grpreg} object.
  4 | #' 
  5 | #' 
  6 | #' @param x Fitted \code{"grpreg"} model.
  7 | #' @param alpha Controls alpha-blending.  Default is alpha=1.
  8 | #' @param legend.loc Where should the legend go?  If left unspecified, no
  9 | #' legend is drawn.  See \code{\link[graphics]{legend}} for details.
 10 | #' @param label If TRUE, annotates the plot with text labels in the right
 11 | #' margin describing which variable/group the corresponding line belongs to.
 12 | #' @param log.l Should horizontal axis be on the log scale?  Default is FALSE.
 13 | #' @param norm If \code{TRUE}, plot the norm of each group, rather than the
 14 | #' individual coefficients.
 15 | #' @param \dots Other graphical parameters to \code{plot}, \code{matlines}, or
 16 | #' \code{legend}
 17 | #' 
 18 | #' @seealso [grpreg()]
 19 | #' 
 20 | #' @examples
 21 | #' # Fit model to birthweight data
 22 | #' data(Birthwt)
 23 | #' X <- Birthwt$X
 24 | #' y <- Birthwt$bwt
 25 | #' group <- Birthwt$group
 26 | #' fit <- grpreg(X, y, group, penalty="grLasso")
 27 | #' 
 28 | #' # Plot (basic)
 29 | #' plot(fit)
 30 | #' 
 31 | #' # Plot group norms, with labels in right margin
 32 | #' plot(fit, norm=TRUE, label=TRUE)
 33 | #' 
 34 | #' # Plot (miscellaneous options)
 35 | #' myColors <- c("black", "red", "green", "blue", "yellow", "purple",
 36 | #' "orange", "brown")
 37 | #' plot(fit, legend.loc="topleft", col=myColors)
 38 | #' labs <- c("Mother's Age", "# Phys. visits", "Hypertension", "Mother's weight",
 39 | #'           "# Premature", "Race", "Smoking", "Uterine irritability")
 40 | #' plot(fit, legend.loc="topleft", lwd=6, alpha=0.5, legend=labs)
 41 | #' plot(fit, norm=TRUE, legend.loc="topleft", lwd=6, alpha=0.5, legend=labs)
 42 | #' @export
 43 | 
 44 | plot.grpreg <- function(x, alpha=1, legend.loc, label=FALSE, log.l=FALSE, norm=FALSE, ...) {
 45 |   if (norm) {
 46 |     Y <- predict(x, type="norm")
 47 |     if (any(x$group==0)) Y <- Y[-1,]
 48 |     nonzero <- which(apply(abs(Y), 1, sum)!=0)
 49 |     Y <- Y[nonzero,]
 50 |     g <- 1:nrow(Y)
 51 |   } else {
 52 |     if (length(dim(x$beta))==3) {
 53 |       beta <- matrix(x$beta[, -1, , drop=FALSE], ncol=dim(x$beta)[3])
 54 |     } else if (inherits(x, "grpsurv")) {
 55 |       beta <- x$beta
 56 |     } else {
 57 |       beta <- x$beta[-1, , drop=FALSE]
 58 |     }
 59 |     penalized <- which(x$group!=0)
 60 |     nonzero <- which(apply(abs(beta), 1, sum)!=0)
 61 |     ind <- intersect(penalized, nonzero)
 62 |     Y <- beta[ind, , drop=FALSE]
 63 |     g <- as.integer(as.factor(x$group[ind]))
 64 |   }
 65 |   p <- nrow(Y)
 66 |   l <- x$lambda
 67 |   n.g <- max(g)
 68 | 
 69 |   if (log.l) {
 70 |     l <- log(l)
 71 |     xlab <- expression(log(lambda))
 72 |   } else xlab <- expression(lambda)
 73 | 
 74 |   plot.args <- list(x=l, y=1:length(l), ylim=range(Y), xlab=xlab, ylab="", type="n", xlim=rev(range(l)), las=1, bty="n")
 75 |   new.args <- list(...)
 76 |   if (length(new.args)) {
 77 |     new.plot.args <- new.args[names(new.args) %in% c(names(par()), names(formals(plot.default)))]
 78 |     plot.args[names(new.plot.args)] <- new.plot.args
 79 |   }
 80 |   do.call("plot", plot.args)
 81 |   if (plot.args$ylab=="") {
 82 |     ylab <- if (norm) expression("||"*hat(beta)*"||") else expression(hat(beta))
 83 |     mtext(ylab, 2, 3.5, las=1, adj=0)
 84 |   }
 85 |   abline(h=0, lwd=0.5, col="gray")
 86 | 
 87 |   cols <- hcl(h=seq(15, 375, len=max(4, n.g+1)), l=60, c=150, alpha=alpha)
 88 |   cols <- if (n.g==2) cols[c(1,3)] else cols[1:n.g]
 89 |   line.args <- list(col=cols, lwd=1+2*exp(-p/20), lty=1, pch="")
 90 |   if (length(new.args)) line.args[names(new.args)] <- new.args
 91 |   line.args$x <- l
 92 |   line.args$y <- t(Y)
 93 |   line.args$col <- line.args$col[g]
 94 |   line.args$lty <- rep(line.args$lty, length.out=max(g))
 95 |   line.args$lty <- line.args$lty[g]
 96 |   do.call("matlines", line.args)
 97 | 
 98 |   if(!missing(legend.loc)) {
 99 |     legend.args <- list(col=cols, lwd=line.args$lwd, lty=line.args$lty, legend=names(x$group.multiplier))
100 |     if (length(new.args)) {
101 |       new.legend.args <- new.args[names(new.args) %in% names(formals(legend))]
102 |       legend.args[names(new.legend.args)] <- new.legend.args
103 |     }
104 |     legend.args$x <- legend.loc
105 |     do.call("legend", legend.args)
106 |   }
107 |   if (label) {
108 |     ypos <- Y[, ncol(Y)]
109 |     text(-0.001, ypos, names(ypos), xpd=NA, adj=c(0, NA))
110 |   }
111 | }
112 | 


--------------------------------------------------------------------------------
/R/predict-cv.R:
--------------------------------------------------------------------------------
 1 | #' @rdname predict.grpreg
 2 | #' @export
 3 | 
 4 | predict.cv.grpreg <- function(object, X, lambda=object$lambda.min, which=object$min, type=c("link", "response", "class", "coefficients", "vars", "groups", "nvars", "ngroups", "norm"), ...) {
 5 |   type <- match.arg(type)
 6 |   if (inherits(object, 'cv.grpsurv')) {
 7 |     return(predict.grpsurv(object$fit, X=X, lambda=lambda, which=which, type=type, ...))
 8 |   } else {
 9 |     return(predict.grpreg(object$fit, X=X, lambda=lambda, which=which, type=type, ...))
10 |   }
11 | }
12 | 
13 | #' @rdname predict.grpreg
14 | #' @export
15 | 
16 | coef.cv.grpreg <- function(object, lambda=object$lambda.min, which=object$min, ...) {
17 |   coef.grpreg(object$fit, lambda=lambda, which=which, ...)
18 | }
19 | 


--------------------------------------------------------------------------------
/R/predict-surv.R:
--------------------------------------------------------------------------------
  1 | #' Model predictions for grpsurv objects
  2 | #' 
  3 | #' Similar to other predict methods, this function returns predictions from a fitted `grpsurv` object.
  4 | #' 
  5 | #' Estimation of baseline survival function conditional on the estimated values of `beta` is carried out according to the method described in Chapter 4.3 of Kalbfleisch and Prentice.
  6 | #' 
  7 | #' @param object   Fitted `grpsurv` model object.
  8 | #' @param X        Matrix of values at which predictions are to be made. Not required for some `type` values.
  9 | #' @param lambda   Regularization parameter at which predictions are requested. For values of `lambda` not in the sequence of fitted models, linear interpolation is used.
 10 | #' @param which    Indices of the penalty parameter `lambda` at which predictions are required. Default: all indices. If `lambda` is specified, this will override `which`.
 11 | #' @param type     Type of prediction:
 12 | #'   * `link`: linear predictors
 13 | #'   * `response`: risk (i.e., `exp(link)`)
 14 | #'   * `survival`: the estimated survival function
 15 | #'   * `hazard`: the estimated cumulative hazard function
 16 | #'   * `median`: median survival time
 17 | #'   * The other options are all identical to their [grpreg()] counterparts
 18 | #' @param ...      Not used.
 19 | #' 
 20 | #' @return The object returned depends on type.
 21 | #' 
 22 | #' @references
 23 | #'   * Kalbfleish JD and Prentice RL (2002). The Statistical Analysis of Failure Time Data, 2nd edition. Wiley.
 24 | #'   
 25 | #' @author Patrick Breheny
 26 | #' 
 27 | #' @seealso [grpsurv()]
 28 | #' 
 29 | #' @examples
 30 | #' data(Lung)
 31 | #' X <- Lung$X
 32 | #' 
 33 | #' y <- Lung$y
 34 | #' group <- Lung$group
 35 | #'  
 36 | #' fit <- grpsurv(X, y, group)
 37 | #' coef(fit, lambda=0.05)
 38 | #' head(predict(fit, X, type="link", lambda=0.05))
 39 | #' head(predict(fit, X, type="response", lambda=0.05))
 40 | #'  
 41 | #' # Survival function
 42 | #' S <- predict(fit, X[1,], type="survival", lambda=0.05)
 43 | #' S(100)
 44 | #' S <- predict(fit, X, type="survival", lambda=0.05)
 45 | #' plot(S, xlim=c(0,200))
 46 | #'  
 47 | #' # Medians
 48 | #' predict(fit, X[1,], type="median", lambda=0.05)
 49 | #' M <- predict(fit, X, type="median")
 50 | #' M[1:10, 1:10]
 51 | #'  
 52 | #' # Nonzero coefficients
 53 | #' predict(fit, type="vars", lambda=c(0.1, 0.01))
 54 | #' predict(fit, type="nvars", lambda=c(0.1, 0.01))
 55 | #' @export
 56 | 
 57 | predict.grpsurv <- function(object, X,
 58 |                             type=c("link", "response", "survival", "hazard", "median", "norm", 
 59 |                                    "coefficients", "vars", "nvars", "groups", "ngroups"),
 60 |                             lambda, which=1:length(object$lambda), ...) {
 61 |   type <- match.arg(type)
 62 |   if (type %in% c("norm", "coefficients", "vars", "nvars", "groups", "ngroups")) {
 63 |     return(predict.grpreg(object=object, X=X, type=type, lambda=lambda, which=which, ...))
 64 |   }
 65 |   if (!missing(lambda)) {
 66 |     ind <- approx(object$lambda, seq(object$lambda), lambda)$y
 67 |     l <- floor(ind)
 68 |     r <- ceiling(ind)
 69 |     x <- ind %% 1
 70 |     beta <- (1-x)*object$beta[, l, drop=FALSE] + x*object$beta[, r, drop=FALSE]
 71 |     colnames(beta) <- round(lambda, 4)
 72 |   } else {
 73 |     beta <- object$beta[, which, drop=FALSE]
 74 |   }
 75 | 
 76 |   if (missing(X)) {
 77 |     eta <- matrix(0, 1, 1)
 78 |     warning('Returning "baseline" prediction; supply X for more interesting prediction')
 79 |   } else {
 80 |     if (inherits(object, "expanded")) X <- predict_spline(object, X)
 81 |     eta <- X %*% beta
 82 |   }
 83 |   if (type=='link') return(drop(eta))
 84 |   if (type=='response') return(drop(exp(eta)))
 85 |   if (!missing(lambda)) {
 86 |     W <- (1-x)*exp(object$linear.predictors)[, l, drop=FALSE] + x*exp(object$linear.predictors)[, r, drop=FALSE]
 87 |   } else {
 88 |     W <- exp(object$linear.predictors)[, which, drop=FALSE]
 89 |   }
 90 | 
 91 |   if (type %in% c('survival', 'hazard') & ncol(W) > 1) stop('Can only return type="survival" for a single lambda value', call.=FALSE)
 92 |   if (type %in% c('survival', 'hazard')) val <- vector('list', length(eta))
 93 |   if (type == 'median') val <- matrix(NA, nrow(eta), ncol(eta))
 94 |   for (j in 1:ncol(eta)) {
 95 |     # Estimate baseline hazard
 96 |     w <- W[,j]
 97 |     r <- rev(cumsum(rev(w)))
 98 |     a <- ifelse(object$fail, (1-w/r)^(1/w), 1)
 99 |     S0 <- c(1, cumprod(a))
100 |     H0 <- c(0, cumsum(1-a))
101 |     x <- c(0, object$time)
102 |     for (i in 1:nrow(eta)) {
103 |       S <- S0^exp(eta[i,j])
104 |       H <- H0*exp(eta[i,j])
105 |       if (type == 'survival') val[[i]] <- approxfun(x, S, method='constant', ties="ordered")
106 |       else if (type == 'hazard') val[[i]] <- approxfun(x, H, method='constant', ties="ordered")
107 |       else if (type == 'median') {
108 |         if (any(S < 0.5)) {
109 |           val[i,j] <- x[min(which(S < .5))]
110 |         }
111 |       }
112 |     }
113 |   }
114 |   if (type %in% c('survival', 'hazard')) {
115 |     if (nrow(eta)==1) val <- val[[1]]
116 |     class(val) <- c('grpsurv.func', class(val))
117 |     attr(val, 'time') <- object$time
118 |   } else if (type == 'median') {
119 |     val <- drop(val)
120 |   }
121 |   val
122 | }
123 | 


--------------------------------------------------------------------------------
/R/predict_spline.R:
--------------------------------------------------------------------------------
 1 | predict_spline <- function(object, X) {
 2 |   p <- length(unique(object$group))
 3 |   meta <- object$meta
 4 |   if (meta$type == 'bs') {
 5 |     df <- length(meta$knots[[1]]) + meta$degree
 6 |   } else if (meta$type == 'ns') {
 7 |     df <- length(meta$knots[[1]]) + 1
 8 |   }
 9 |   bsX <- matrix(NA, nrow(X), (p*df)) # make this work for vectors
10 |   for(i in 1:p) {
11 |     mat <- object$meta$X[,(df*(i-1)+1):(df*(i-1)+df)]
12 |     attr(mat, "degree") <- meta$degree
13 |     attr(mat, "knots") <- meta$knots[[i]]
14 |     attr(mat, "Boundary.knots") <- meta$boundary[[i]]
15 |     attr(mat, "intercept") <- FALSE
16 |     attr(mat, "class") <- c(meta$type, "basis", "matrix")
17 |     bsX[,(df*(i-1)+1):(df*(i-1)+df)] <- predict(mat, X[,i])
18 |   }
19 |   bsX
20 | }
21 | 


--------------------------------------------------------------------------------
/R/residuals.R:
--------------------------------------------------------------------------------
 1 | #' Extract residuals from a grpreg or grpsurv fit
 2 | #' 
 3 | #' Currently, only deviance residuals are supported.
 4 | #' 
 5 | #' @param object   Object of class `grpreg` or `grpsurv`.
 6 | #' @param lambda   Values of the regularization parameter at which residuals are requested (numeric vector). For values of lambda not in the sequence of fitted models, linear interpolation is used.
 7 | #' @param which    Index of the penalty parameter at which residuals are requested (default = all indices). If `lambda` is specified, this take precedence over `which`.
 8 | #' @param drop     By default, if a single value of lambda is supplied, a vector of residuals is returned (logical; default=`TRUE`). Set `drop=FALSE` if you wish to have the function always return a matrix (see [drop()]).
 9 | #' @param ...      Not used.
10 | #' 
11 | #' @examples
12 | #' data(Birthwt)
13 | #' X <- Birthwt$X
14 | #' y <- Birthwt$bwt
15 | #' group <- Birthwt$group
16 | #' fit <- grpreg(X, y, group, returnX=TRUE)
17 | #' residuals(fit)[1:5, 1:5]
18 | #' head(residuals(fit, lambda=0.1))
19 | #' @export
20 | 
21 | residuals.grpreg <- function(object, lambda, which=1:length(object$lambda), drop=TRUE, ...) {
22 |   
23 |   # Calculate matrix of residuals
24 |   if (inherits(object, 'grpsurv')) {
25 |     for (j in 1:length(object$lambda)) {
26 |       h <- suppressWarnings(predict(object, which=j, type='hazard')(object$time))
27 |       M <- object$fail - h * exp(object$linear.predictors)
28 |       R <- sign(M) * sqrt(-2*(M + object$fail*log(object$fail-M)))
29 |       R[h==0,] <- 0
30 |       R <- R[match(1:object$n, object$order),]  # Return in original order
31 |     }
32 |   } else if (object$family == 'gaussian') {
33 |     R <- object$y - object$linear.predictor
34 |   } else if (object$family == 'binomial') {
35 |     f <- binomial()$dev.resids
36 |     M <- binomial()$linkinv(object$linear.predictor)
37 |     R <- vapply(1:length(object$lambda), function(j) {dr(f, object$y, M[,j])}, double(length(object$y)))
38 |   } else if (object$family == 'poisson') {
39 |     f <- poisson()$dev.resids
40 |     M <- poisson()$linkinv(object$linear.predictor)
41 |     R <- vapply(1:length(object$lambda), function(j) {dr(f, object$y, M[,j])}, double(length(object$y)))
42 |   } else {
43 |     stop('Residuals not implemented for this type of grpreg object.')
44 |   }
45 | 
46 |   # Interpolate and return
47 |   if (!missing(lambda)) {
48 |     ind <- approx(object$lambda, seq(object$lambda), lambda)$y
49 |     l <- floor(ind)
50 |     r <- ceiling(ind)
51 |     w <- ind %% 1
52 |     out <- (1-w)*R[, l, drop=FALSE] + w*R[, r, drop=FALSE]
53 |     colnames(out) <- round(lambda, 4)
54 |   } else {
55 |     out <- R[, which, drop=FALSE]
56 |   }
57 |   if (drop) return(drop(out)) else return(out)
58 | }
59 | 
60 | dr <- function(f, y, m) {
61 |   sqrt(pmax(f(y, m, rep(1, length(y))), 0)) * ((y > m) * 2 - 1)
62 | }
63 | 


--------------------------------------------------------------------------------
/R/se-grpsurv.R:
--------------------------------------------------------------------------------
 1 | # Bootstrap method for calculating CVSE for Cox models
 2 | se_grpsurv <-function (y, eta, B = 100) {
 3 |   cve <- matrix(NA, B, ncol(eta))
 4 |   for (b in 1:B) {
 5 |     ind <- sample(1:nrow(eta), replace=TRUE)
 6 |     cve[b,] <- deviance_grpsurv(y[ind,], eta[ind,])
 7 |   }
 8 |   apply(cve, 2, sd)
 9 | }
10 | 


--------------------------------------------------------------------------------
/R/select.R:
--------------------------------------------------------------------------------
  1 | #' @rdname select
  2 | #' @export
  3 | select <- function(obj,...) UseMethod("select")
  4 | 
  5 | #' Select an value of lambda along a grpreg path
  6 | #' 
  7 | #' Selects a point along the regularization path of a fitted grpreg object
  8 | #' according to the AIC, BIC, or GCV criteria.
  9 | #' 
 10 | #' The criteria are defined as follows, where \eqn{L}{L} is the deviance (i.e,
 11 | #' -2 times the log-likelihood), \eqn{\nu}{df} is the degrees of freedom, and
 12 | #' \eqn{n}{n} is the sample size:
 13 | #' 
 14 | #' \deqn{AIC = L + 2\nu}{AIC = L + 2*df} \deqn{BIC = L + \log(n)\nu}{BIC = L +
 15 | #' log(n)*df} \deqn{GCV = \frac{L}{(1-\nu/n)^2}}{GCV= L/((1-df/n)^2)}
 16 | #' \deqn{AICc = AIC + 2\frac{\nu(\nu+1)}{n-\nu-1}}{AICc = AIC +
 17 | #' 2*df*(df+1)/(n-df-1)} \deqn{EBIC = BIC + 2 \log{p \choose \nu}}{EBIC = BIC +
 18 | #' 2*log(p choose df)}
 19 | #' 
 20 | #' @rdname select
 21 | #' 
 22 | #' @param obj A fitted grpreg object.
 23 | #' @param criterion The criterion by which to select the regularization
 24 | #' parameter.  One of \code{"AIC"}, \code{"BIC"}, \code{"GCV"}, \code{"AICc"},
 25 | #' or \code{"EBIC"}; default is \code{"BIC"}.
 26 | #' @param df.method How should effective model parameters be calculated?  One
 27 | #' of: \code{"active"}, which counts the number of nonzero coefficients; or
 28 | #' \code{"default"}, which uses the calculated \code{df} returned by
 29 | #' \code{grpreg}.  Default is \code{"default"}.
 30 | #' @param smooth Applies a smoother to the information criteria before
 31 | #' selecting the optimal value.
 32 | #' @param \dots For S3 method compatibility.
 33 | #' 
 34 | #' @return A list containing:
 35 | #' \describe{
 36 | #' \item{lambda}{The selected value of the regularization parameter, `lambda`.}
 37 | #' \item{beta}{The vector of coefficients at the chosen value of `lambda`.}
 38 | #' \item{df}{The effective number of model parameters at the chosen value of `lambda`.}
 39 | #' \item{IC}{A vector of the calculated model selection criteria for each point on the regularization path.}
 40 | #' }
 41 | #' 
 42 | #' @seealso [grpreg()]
 43 | #' 
 44 | #' @examples
 45 | #' data(Birthwt)
 46 | #' X <- Birthwt$X
 47 | #' y <- Birthwt$bwt
 48 | #' group <- Birthwt$group
 49 | #' fit <- grpreg(X, y, group, penalty="grLasso")
 50 | #' select(fit)
 51 | #' select(fit,crit="AIC",df="active")
 52 | #' plot(fit)
 53 | #' abline(v=select(fit)$lambda)
 54 | #' par(mfrow=c(1,3))
 55 | #' l <- fit$lambda
 56 | #' xlim <- rev(range(l))
 57 | #' plot(l, select(fit)$IC, xlim=xlim, pch=19, type="o", ylab="BIC")
 58 | #' plot(l, select(fit,"AIC")$IC, xlim=xlim, pch=19, type="o",ylab="AIC")
 59 | #' plot(l, select(fit,"GCV")$IC, xlim=xlim, pch=19, type="o",ylab="GCV")
 60 | #' @export
 61 | 
 62 | select.grpreg <- function(obj, criterion=c("BIC","AIC","GCV","AICc","EBIC"), df.method=c("default","active"), smooth=FALSE, ...) {
 63 |   criterion <- match.arg(criterion)
 64 |   df.method <- match.arg(df.method)
 65 |   ll <- logLik(obj, df.method=df.method, ...)
 66 |   df <- as.double(attr(ll,"df"))
 67 |   d <- dim(obj$beta)
 68 |   p <- if (length(d)==2) d[1] - 1 else d[2] - 1
 69 |   j <- if(obj$family=="gaussian") df - 2 else df - 1
 70 |   
 71 |   IC <- switch(criterion,
 72 |                AIC = AIC(ll),
 73 |                BIC = BIC(ll),
 74 |                GCV = (1/obj$n) * (-2) * as.double(ll) / (1-df/obj$n)^2,
 75 |                AICc = AIC(ll) + 2*df*(df+1)/(obj$n-df-1),
 76 |                EBIC = BIC(ll) + 2*(lgamma(p+1) - lgamma(j+1) - lgamma(p-j+1)))
 77 |   n.l <- length(obj$lambda)
 78 |   if (smooth & (n.l < 4)) {
 79 |     smooth <- FALSE
 80 |     warning("Need at least 4 points to use smooth=TRUE", call.=FALSE)
 81 |   }
 82 |   if (smooth) {
 83 |     fit.ss <- smooth.spline(IC[is.finite(IC)])
 84 |     d <- diff(fit.ss$y)
 85 |     if (all(d<0)) i <- n.l
 86 |     else i <- min(which(d>0))-1
 87 |     if (i==0) i <- 1
 88 |   } else i <- which.min(IC)
 89 |   
 90 |   if (min(obj$lambda) == obj$lambda[i]) {
 91 |     warning(paste("minimum lambda selected for", obj$penalty), call.=FALSE)
 92 |   } else if ((max(obj$lambda) == obj$lambda[i]) & obj$penalty=="gBridge") {
 93 |     warning("maximum lambda selected", call.=FALSE)
 94 |   }
 95 |   return(list(beta=obj$beta[,i],
 96 |               lambda=obj$lambda[i],
 97 |               df=df[i],
 98 |               IC=IC))
 99 | }
100 | 
101 | 


--------------------------------------------------------------------------------
/R/setupLambda.R:
--------------------------------------------------------------------------------
 1 | setupLambda <- function(X, y, group, family, penalty, alpha, lambda.min, log.lambda, nlambda, group.multiplier) {
 2 |   
 3 |   # Fit to unpenalized covariates
 4 |   n <- length(y)
 5 |   K <- table(group)
 6 |   K1 <- if (min(group)==0) cumsum(K) else c(0, cumsum(K))
 7 |   storage.mode(K1) <- "integer"
 8 |   if (K1[1]!=0) {
 9 |     fit <- glm(y~X[, group==0], family=family)
10 |   } else {
11 |     fit <- glm(y~1, family=family)
12 |   }
13 | 
14 |   ## Determine lambda.max
15 |   if (family=="gaussian") {
16 |     r <- fit$residuals
17 |   } else {
18 |     w <- fit$weights
19 |     if (max(w) < 1e-4) stop("Unpenalized portion of model is already saturated; exiting...", call.=FALSE)
20 |     r <- residuals(fit, "working")*w
21 |   }
22 |   if (strtrim(penalty, 2) == "gr") {
23 |     zmax <- .Call("maxgrad", X, r, K1, as.double(group.multiplier)) / n
24 |   } else {
25 |     zmax <- .Call("maxprod", X, r, K1, as.double(group.multiplier)) / n
26 |   }
27 |   lambda.max <- zmax/alpha
28 | 
29 |   if (log.lambda) { # lambda sequence on log-scale
30 |     if (lambda.min==0) {
31 |       lambda <- c(exp(seq(log(lambda.max), log(.001*lambda.max), length=nlambda-1)), 0)
32 |     } else {
33 |       lambda <- exp(seq(log(lambda.max), log(lambda.min*lambda.max), length=nlambda))
34 |     }
35 |   } else { # lambda sequence on linear-scale
36 |     if (lambda.min==0) {
37 |       lambda <- c(seq(lambda.max, 0.001*lambda.max, length = nlambda-1), 0)
38 |     } else {
39 |       lambda <- seq(lambda.max, lambda.min*lambda.max, length = nlambda)
40 |     }
41 |   }
42 |   lambda
43 | }
44 | 
45 | setupLambda.gBridge <- function(X, y, group, family, alpha, lambda.min, lambda.max, nlambda, gamma, group.multiplier) {
46 |   ## Fit to unpenalized covariates
47 |   n <- length(y)
48 |   ind <- which(group!=0)
49 |   if (length(ind)!=length(group)) {
50 |     fit <- glm(y~X[, group==0], family=family)
51 |   } else {
52 |     fit <- glm(y~1, family=family)
53 |   }
54 | 
55 |   ## Guess lambda.max
56 |   if (missing(lambda.max)) {
57 |     if (family=="gaussian") {
58 |       z <- crossprod(X[, ind], fit$residuals) / n
59 |       a <- .35
60 |     } else {
61 |       z <- crossprod(X[, ind], fit$weights * residuals(fit, "working")) / n
62 |       a <- .2
63 |     }
64 |     lambda.max <- max(abs(z)/group.multiplier[group])*a^(1-gamma)/(gamma*alpha)
65 |   }
66 |   if (lambda.min==0) {
67 |     lambda <- c(exp(seq(log(lambda.max), log(.001*lambda.max), len=nlambda-1)), 0)
68 |   } else {
69 |     lambda <- exp(seq(log(lambda.max), log(lambda.min*lambda.max), len=nlambda))
70 |   }
71 |   return(rev(lambda))
72 | }
73 | 


--------------------------------------------------------------------------------
/R/setupLambdaCox.R:
--------------------------------------------------------------------------------
 1 | setupLambdaCox <- function(X, y, Delta, group, penalty, alpha, lambda.min, nlambda, group.multiplier) {
 2 |   n <- nrow(X)
 3 |   p <- ncol(X)
 4 | 
 5 |   ## Fit to unpenalized covariates
 6 |   K <- table(group)
 7 |   K1 <- as.integer(if (min(group)==0) cumsum(K) else c(0, cumsum(K)))
 8 |   if (K1[1]!=0) {
 9 |     SURV <- get("Surv", asNamespace("survival"))
10 |     COXPH <- get("coxph", asNamespace("survival"))
11 |     nullFit <- COXPH(SURV(y, Delta) ~ X[, group==0, drop=FALSE])
12 |     eta <- nullFit$linear.predictors
13 |     rsk <- rev(cumsum(rev(exp(eta))))
14 |     s <- Delta - exp(eta)*cumsum(Delta/rsk)
15 |   } else {
16 |     w <- 1/(n-(1:n)+1)
17 |     s <- Delta - cumsum(Delta*w)
18 |   }
19 | 
20 |   ## Determine lambda.max
21 |   if (strtrim(penalty, 2) == "gr") {
22 |     zmax <- .Call("maxgrad", X, s, K1, as.double(group.multiplier)) / n
23 |   } else {
24 |     zmax <- .Call("maxprod", X, s, K1, as.double(group.multiplier)) / n
25 |   }
26 |   lambda.max <- zmax/alpha
27 | 
28 |   if (lambda.min==0) lambda <- c(exp(seq(log(lambda.max), log(.001*lambda.max), len=nlambda-1)), 0)
29 |   else lambda <- exp(seq(log(lambda.max), log(lambda.min*lambda.max), len=nlambda))
30 |   lambda
31 | }
32 | 


--------------------------------------------------------------------------------
/R/standardize.R:
--------------------------------------------------------------------------------
1 | unstandardize <- function(b, XG) {
2 |   beta <- matrix(0, nrow=1+length(XG$scale), ncol=ncol(b))
3 |   beta[1 + XG$nz,] <- b[-1,] / XG$scale[XG$nz]
4 |   beta[1,] <- b[1,] - crossprod(XG$center, beta[-1, , drop=FALSE])
5 |   beta
6 | }
7 | 


--------------------------------------------------------------------------------
/R/summary-cv-grpreg.R:
--------------------------------------------------------------------------------
  1 | #' Summarizing inferences based on cross-validation
  2 | #' 
  3 | #' Summary method for \code{cv.grpreg} or \code{cv.grpsurv} objects
  4 | #' 
  5 | #' 
  6 | #' @aliases summary.cv.grpreg print.summary.cv.grpreg
  7 | #' @param object A \code{"cv.grpreg"} object.
  8 | #' @param x A \code{"summary.cv.grpreg"} object.
  9 | #' @param digits Number of digits past the decimal point to print out.  Can be
 10 | #' a vector specifying different display digits for each of the five
 11 | #' non-integer printed values.
 12 | #' @param \dots Further arguments passed to or from other methods.
 13 | #' @return \code{summary(cvfit)} produces an object with S3 class
 14 | #' \code{"summary.cv.grpreg"}.  The class has its own print method and contains
 15 | #' the following list elements: \item{penalty}{The penalty used by
 16 | #' \code{grpreg}/\code{grpsurv}.} \item{model}{The type of model:
 17 | #' \code{"linear"}, \code{"logistic"}, \code{"Poisson"}, \code{"Cox"}, etc.}
 18 | #' \item{n}{Number of observations} \item{p}{Number of regression coefficients
 19 | #' (not including the intercept).} \item{min}{The index of \code{lambda} with
 20 | #' the smallest cross-validation error.} \item{lambda}{The sequence of
 21 | #' \code{lambda} values used by \code{cv.grpreg}/\code{cv.grpsurv}.}
 22 | #' \item{cve}{Cross-validation error (deviance).} \item{r.squared}{Proportion
 23 | #' of variance explained by the model, as estimated by cross-validation.}
 24 | #' \item{snr}{Signal to noise ratio, as estimated by cross-validation.}
 25 | #' \item{sigma}{For linear regression models, the scale parameter estimate.}
 26 | #' \item{pe}{For logistic regression models, the prediction error
 27 | #' (misclassification error).}
 28 | #' @author Patrick Breheny
 29 | #' @seealso \code{\link{grpreg}}, \code{\link{cv.grpreg}},
 30 | #' \code{\link{cv.grpsurv}}, \code{\link{plot.cv.grpreg}}
 31 | #' @examples
 32 | #' 
 33 | #' # Birthweight data
 34 | #' data(Birthwt)
 35 | #' X <- Birthwt$X
 36 | #' group <- Birthwt$group
 37 | #' 
 38 | #' # Linear regression
 39 | #' y <- Birthwt$bwt
 40 | #' cvfit <- cv.grpreg(X, y, group)
 41 | #' summary(cvfit)
 42 | #' 
 43 | #' # Logistic regression
 44 | #' y <- Birthwt$low
 45 | #' cvfit <- cv.grpreg(X, y, group, family="binomial")
 46 | #' summary(cvfit)
 47 | #' 
 48 | #' # Cox regression
 49 | #' data(Lung)
 50 | #' cvfit <- with(Lung, cv.grpsurv(X, y, group))
 51 | #' summary(cvfit)
 52 | #' @export
 53 | 
 54 | summary.cv.grpreg <- function(object, ...) {
 55 |   S <- pmax(object$null.dev - object$cve, 0)
 56 |   if (!inherits(object, 'cv.grpsurv') && object$fit$family=="gaussian") {
 57 |     rsq <- pmin(pmax(1 - object$cve/object$null.dev, 0), 1)
 58 |   } else {
 59 |     rsq <- pmin(pmax(1 - exp(object$cve-object$null.dev), 0), 1)
 60 |   }
 61 |   snr <- S/object$cve
 62 |   nvars <- predict(object$fit, type="nvars")
 63 |   ngroups <- predict(object$fit, type="ngroups")
 64 |   if (inherits(object, 'cv.grpsurv')) {
 65 |     model <- 'Cox'
 66 |   } else {
 67 |     model <- switch(object$fit$family, gaussian="linear", binomial="logistic", poisson="Poisson")
 68 |   }
 69 |   d <- dim(object$fit$beta)
 70 |   if (length(d)==3) {
 71 |     p <- d[2] - 1
 72 |   } else {
 73 |     if (model == 'Cox') {
 74 |       p <- d[1]
 75 |     } else {
 76 |       p <- d[1] - 1
 77 |     }
 78 |   }
 79 |   val <- list(penalty=object$fit$penalty,
 80 |               model=model,
 81 |               n=object$fit$n,
 82 |               p=p,
 83 |               min=object$min,
 84 |               lambda=object$lambda,
 85 |               cve=object$cve,
 86 |               r.squared=rsq,
 87 |               snr=snr,
 88 |               nvars=nvars,
 89 |               ngroups=ngroups,
 90 |               d=d)
 91 |   if (!inherits(object, 'cv.grpsurv') && object$fit$family=="gaussian") val$sigma <- sqrt(object$cve)
 92 |   if (!inherits(object, 'cv.grpsurv') && object$fit$family=="binomial") val$pe <- object$pe
 93 |   structure(val, class="summary.cv.grpreg")
 94 | }
 95 | 
 96 | #' @rdname summary.cv.grpreg
 97 | #' @export
 98 | 
 99 | print.summary.cv.grpreg <- function(x, digits, ...) {
100 |   digits <- if (missing(digits)) digits <- c(2, 4, 2, 2, 3) else rep(digits, length.out=5)
101 |   if (length(x$d)==3) {
102 |     cat(x$penalty, "-penalized multivariate ", x$model, " regression with m=", x$d[1], ", n=", x$n/x$d[1], ", p=", x$p, "\n", sep="")
103 |   } else {
104 |     cat(x$penalty, "-penalized ", x$model, " regression with n=", x$n, ", p=", x$p, "\n", sep="")
105 |   }
106 |   cat("At minimum cross-validation error (lambda=", formatC(x$lambda[x$min], digits[2], format="f"), "):\n", sep="")
107 |   cat("-------------------------------------------------\n")
108 |   cat("  Nonzero coefficients: ", x$nvars[x$min], "\n", sep="")
109 |   cat("  Nonzero groups: ", x$ngroups[x$min], "\n", sep="")
110 |   cat("  Cross-validation error of ", formatC(min(x$cve), digits[1], format="f"), "\n", sep="")
111 |   cat("  Maximum R-squared: ", formatC(max(x$r.squared), digits[3], format="f"), "\n", sep="")
112 |   cat("  Maximum signal-to-noise ratio: ", formatC(max(x$snr), digits[4], format="f"), "\n", sep="")
113 |   if (x$model == "logistic") cat("  Prediction error at lambda.min: ", formatC(x$pe[x$min], digits[5], format="f"), "\n", sep="")
114 |   if (x$model == "linear") cat("  Scale estimate (sigma) at lambda.min: ", formatC(sqrt(x$cve[x$min]), digits[5], format="f"), "\n", sep="")
115 | }
116 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!-- badges: start -->
 2 | [![GitHub version](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pbreheny/grpreg/master/.version.json&style=flat&logo=github)](https://github.com/pbreheny/grpreg)
 3 | [![CRAN version](https://img.shields.io/cran/v/grpreg?logo=R)](https://cran.r-project.org/package=grpreg)
 4 | [![downloads](https://cranlogs.r-pkg.org/badges/grpreg)](https://cran.r-project.org/package=grpreg)
 5 | [![R-CMD-check](https://github.com/pbreheny/grpreg/workflows/R-CMD-check/badge.svg)](https://github.com/pbreheny/grpreg/actions)
 6 | [![codecov.io](https://codecov.io/github/pbreheny/grpreg/coverage.svg?branch=master)](https://app.codecov.io/gh/pbreheny/grpreg)
 7 | <!-- badges: end -->
 8 | 
 9 | # [Regularization Paths for Regression Models with Grouped Covariates](https://pbreheny.github.io/grpreg/)
10 | 
11 | `grpreg` is an R package for fitting the regularization path of linear regression, GLM, and Cox regression models with grouped penalties.  This includes group selection methods such as group lasso, group MCP, and group SCAD as well as bi-level selection methods such as the group exponential lasso, the composite MCP, and the group bridge.  Utilities for carrying out cross-validation as well as post-fitting visualization, summarization, and prediction are also provided.
12 | 
13 | ### Install
14 | 
15 | * To install the latest release version from CRAN:
16 | 
17 | ``` r
18 | install.packages("grpreg")
19 | ```
20 | 
21 | * To install the latest development version from GitHub:
22 | 
23 | ``` r
24 | remotes::install_github("pbreheny/grpreg")
25 | ```
26 | 
27 | ### Get started
28 | 
29 | See the ["getting started" vignette](https://pbreheny.github.io/grpreg/articles/grpreg.html)
30 | 
31 | ### Learn more
32 | 
33 | Follow the links under "Articles" at the [grpreg website](https://pbreheny.github.io/grpreg/)
34 | 
35 | ### References
36 | 
37 | For more on the mathematical foundations and algorithmic details, see:
38 | 
39 | * [Breheny, P. and Huang, J. (2009) Penalized methods for bi-level variable selection.  *Statistics and its interface*, **2**: 369-380.](https://myweb.uiowa.edu/pbreheny/pdf/Breheny2009.pdf)
40 | * [Breheny, P. and Huang, J. (2015) Group descent algorithms for nonconvex penalized linear and logistic regression models with grouped predictors. *Statistics and Computing*, **25**: 173-187.](https://dx.doi.org/10.1007/s11222-013-9424-2)
41 | 


--------------------------------------------------------------------------------
/data/Birthwt.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pbreheny/grpreg/287121410d7f196af1c078338fd437b773a34b0e/data/Birthwt.RData


--------------------------------------------------------------------------------
/data/Lung.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pbreheny/grpreg/287121410d7f196af1c078338fd437b773a34b0e/data/Lung.RData


--------------------------------------------------------------------------------
/data/birthwt.grpreg.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pbreheny/grpreg/287121410d7f196af1c078338fd437b773a34b0e/data/birthwt.grpreg.RData


--------------------------------------------------------------------------------
/grpreg.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | ProjectId: e2a34794-455e-4924-80b5-840c61557bd9
 3 | 
 4 | RestoreWorkspace: No
 5 | SaveWorkspace: No
 6 | AlwaysSaveHistory: No
 7 | 
 8 | EnableCodeIndexing: Yes
 9 | UseSpacesForTab: Yes
10 | NumSpacesForTab: 2
11 | Encoding: UTF-8
12 | 
13 | RnwWeave: knitr
14 | LaTeX: pdfLaTeX
15 | 
16 | BuildType: Package
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | PackageRoxygenize: rd,collate,namespace
19 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("If you use any of the L2-norm penalties (penalty='grLasso', 'grMCP', or 'grSCAD'), cite the Statistics and Computing article.  If you use the group exponential lasso (penalty='gel'), cite the Biometrics article.  If you use the composite MCP (penalty='cMCP') or group bridge penalties, cite the Statistics and its Interface (2009) article; note that the penalty is referred to as the 'group MCP' in the original article, but it would be better to refer to it as 'composite MCP' in future works.")
 2 | 
 3 | bibentry(
 4 |   bibtype = "Article",
 5 |   author = c(
 6 |     person("Patrick", "Breheny"),
 7 |     person("Jian", "Huang")
 8 |   ),
 9 |   title = "Group descent algorithms for nonconvex penalized linear and logistic regression models with grouped predictors",
10 |   journal = "Statistics and Computing",
11 |   year = 2015,
12 |   volume = 25,
13 |   number = 2,
14 |   pages = "173--187",
15 |   doi = "10.1007/s11222-013-9424-2",
16 |   url = "https://dx.doi.org/10.1007/s11222-013-9424-2")
17 | 
18 | bibentry(
19 |   bibtype = "Article",
20 |   author = person("Patrick", "Breheny"),
21 |   title = "The group exponential lasso for bi-level variable selection",
22 |   journal = "Biometrics",
23 |   year = 2015,
24 |   volume = 71,
25 |   number = 3,
26 |   pages = "731--740",
27 |   doi = "10.1111/biom.12300",
28 |   url = "https://dx.doi.org/10.1111/biom.12300")
29 | 
30 | bibentry(
31 |   bibtype = "Article",
32 |   author = c(
33 |     person("Patrick", "Breheny"),
34 |     person("Jian", "Huang")
35 |   ),
36 |   title = "Penalized methods for bi-level variable selection",
37 |   journal = "Statistics and Its Interface",
38 |   year = 2009,
39 |   volume = 2,
40 |   pages = "369--380",
41 |   doi = "10.4310/sii.2009.v2.n3.a10")
42 | 
43 | 


--------------------------------------------------------------------------------
/inst/tinytest/agreement.r:
--------------------------------------------------------------------------------
 1 | suppressPackageStartupMessages(library(glmnet))
 2 | 
 3 | # gel reproduces lasso
 4 | n <- 100
 5 | group <- rep(1, 10)
 6 | p <- length(group)
 7 | X <- matrix(rnorm(n * p), ncol = p)
 8 | y <- rnorm(n)
 9 | yy <- rnorm(n) > 0
10 | gel <- coef(fit <- grpreg(X, y, group, penalty = "gel", tau = 0))
11 | par(mfrow = c(2, 2))
12 | plot(fit, log = TRUE)
13 | lasso <- as.matrix(coef(fit <- glmnet(X, y, lambda = fit$lambda)))
14 | plot(fit, "lambda")
15 | expect_equivalent(gel, lasso, tolerance = .01)
16 | gel <- coef(fit <- grpreg(X, yy, group, penalty = "gel", family = "binomial", tau = 0))
17 | plot(fit, log = TRUE)
18 | lasso <- as.matrix(coef(fit <- glmnet(X, yy, family = "binomial", lambda = fit$lambda)))
19 | plot(fit, "lambda")
20 | expect_equivalent(gel, lasso, tolerance = .01)
21 | gel <- coef(fit <- grpreg(X, yy, group, penalty = "gel", family = "poisson", tau = 0))
22 | plot(fit, log = TRUE)
23 | lasso <- as.matrix(coef(fit <- glmnet(X, yy, family = "poisson", lambda = fit$lambda)))
24 | plot(fit, "lambda")
25 | expect_equivalent(gel, lasso, tolerance = .01)
26 | 
27 | # grLasso reproduces lasso
28 | n <- 50
29 | group <- 1:10
30 | p <- length(group)
31 | X <- matrix(rnorm(n * p), ncol = p)
32 | y <- rnorm(n)
33 | yy <- runif(n) > .5
34 | 
35 | grLasso <- coef(fit <- grpreg(X, y, group, penalty = "grLasso"))
36 | par(mfrow = c(3, 2))
37 | plot(fit, log = TRUE)
38 | fit1 <- fit
39 | lasso <- as.matrix(coef(fit <- glmnet(X, y, lambda = fit$lambda)))
40 | plot(fit, "lambda")
41 | fit2 <- fit
42 | expect_equivalent(grLasso, lasso, tolerance = .01)
43 | 
44 | grLasso <- coef(fit <- grpreg(X, yy, group, penalty = "grLasso", family = "binomial"))
45 | plot(fit, log = TRUE)
46 | lasso <- as.matrix(coef(fit <- glmnet(X, yy, family = "binomial", lambda = fit$lambda)))
47 | plot(fit, "lambda")
48 | expect_equivalent(grLasso, lasso, tolerance = .01)
49 | grLasso <- coef(fit <- grpreg(X, yy, group, penalty = "grLasso", family = "poisson"))
50 | plot(fit, log = TRUE)
51 | lasso <- as.matrix(coef(fit <- glmnet(X, yy, family = "poisson", lambda = fit$lambda)))
52 | plot(fit, "lambda")
53 | expect_equivalent(grLasso, lasso, tolerance = .01)
54 | 
55 | 
56 | # grMCP and grSCAD reproduce MCP and SCAD lasso
57 | n <- 50
58 | group <- 1:10
59 | p <- length(group)
60 | X <- matrix(rnorm(n * p), ncol = p)
61 | y <- rnorm(n)
62 | grMCP <- coef(fit <- grpreg(X, y, group, penalty = "grMCP", gamma = 3))
63 | par(mfrow = c(2, 2))
64 | plot(fit)
65 | mcp <- coef(fit <- ncvreg::ncvreg(X, y, lambda = fit$lambda, penalty = "MCP", gamma = 3))
66 | plot(fit)
67 | expect_equivalent(grMCP, mcp, tolerance = .01)
68 | grSCAD <- coef(fit <- grpreg(X, y, group, penalty = "grSCAD", gamma = 4))
69 | plot(fit)
70 | scad <- coef(fit <- ncvreg::ncvreg(X, y, lambda = fit$lambda, penalty = "SCAD", gamma = 4))
71 | plot(fit)
72 | expect_equivalent(grSCAD, scad, tolerance = .01)
73 | 


--------------------------------------------------------------------------------
/inst/tinytest/auc.r:
--------------------------------------------------------------------------------
 1 | if (interactive()) library(tinytest)
 2 | library(survival, quietly = TRUE)
 3 | 
 4 | # The quantities here are not exactly the same thing (one is in-sample, the
 5 | # other is out-of-sample), but they should be close
 6 | n <- 500
 7 | X <- matrix(rnorm(n * 10), n, 10)
 8 | y <- Surv(rexp(n, rate = exp(X[, 1])), rbinom(n, 1, prob = 0.8))
 9 | cvfit <- cv.grpsurv(X, y, lambda.min = 0, returnY = TRUE)
10 | a <- AUC(cvfit)
11 | a[length(a)]
12 | fit <- coxph(y ~ X)
13 | b <- concordancefit(y, -predict(fit))$concordance
14 | expect_equivalent(a[length(a)], b, tol = 0.02)
15 | 


--------------------------------------------------------------------------------
/inst/tinytest/binomial.r:
--------------------------------------------------------------------------------
  1 | # grpreg reproduces simple logistic regression
  2 | n <- 20
  3 | p <- 1
  4 | X <- matrix(rnorm(n * p), ncol = p)
  5 | y <- runif(n) > .5
  6 | group <- 1
  7 | reg <- glm(y ~ X, family = "binomial")$coef
  8 | nlam <- 100
  9 | par(mfcol = c(3, 2))
 10 | gel <- coef(
 11 |   fit <- grpreg(
 12 |     X,
 13 |     y,
 14 |     group,
 15 |     penalty = "gel",
 16 |     nlambda = nlam,
 17 |     lambda.min = 0,
 18 |     family = "binomial",
 19 |     eps = 1e-10
 20 |   )
 21 | )[, nlam]
 22 | plot(fit, main = fit$penalty)
 23 | expect_equivalent(gel, reg, tolerance = 1e-7)
 24 | cMCP <- coef(
 25 |   fit <- grpreg(
 26 |     X,
 27 |     y,
 28 |     group,
 29 |     penalty = "cMCP",
 30 |     nlambda = nlam,
 31 |     lambda.min = 0,
 32 |     family = "binomial",
 33 |     gamma = 9,
 34 |     eps = 1e-10
 35 |   )
 36 | )[, nlam]
 37 | plot(fit, main = fit$penalty)
 38 | expect_equivalent(cMCP, reg, tolerance = 1e-7)
 39 | bridge <- coef(
 40 |   fit <- gBridge(X, y, group, lambda.min = 0, nlambda = nlam, family = "binomial", eps = 1e-10)
 41 | )[, 1]
 42 | plot(fit, main = fit$penalty)
 43 | expect_equivalent(bridge, reg, tolerance = 1e-7)
 44 | grLasso <- coef(
 45 |   fit <- grpreg(
 46 |     X,
 47 |     y,
 48 |     group,
 49 |     penalty = "grLasso",
 50 |     nlambda = nlam,
 51 |     lambda.min = 0,
 52 |     family = "binomial",
 53 |     eps = 1e-10
 54 |   )
 55 | )[, nlam]
 56 | plot(fit, main = fit$penalty)
 57 | expect_equivalent(grLasso, reg, tolerance = 1e-7)
 58 | grMCP <- coef(
 59 |   fit <- grpreg(
 60 |     X,
 61 |     y,
 62 |     group,
 63 |     penalty = "grMCP",
 64 |     nlambda = nlam,
 65 |     lambda.min = 0,
 66 |     family = "binomial",
 67 |     eps = 1e-10
 68 |   )
 69 | )[, nlam]
 70 | plot(fit, main = fit$penalty)
 71 | expect_equivalent(grMCP, reg, tolerance = 1e-7)
 72 | grSCAD <- coef(
 73 |   fit <- grpreg(
 74 |     X,
 75 |     y,
 76 |     group,
 77 |     penalty = "grSCAD",
 78 |     nlambda = nlam,
 79 |     lambda.min = 0,
 80 |     family = "binomial",
 81 |     eps = 1e-10
 82 |   )
 83 | )[, nlam]
 84 | plot(fit, main = fit$penalty)
 85 | expect_equivalent(grSCAD, reg, tolerance = 1e-7)
 86 | 
 87 | # grpreg() reproduces logistic regression
 88 | n <- 100
 89 | group <- rep(0:3, 1:4)
 90 | p <- length(group)
 91 | X <- matrix(rnorm(n * p), ncol = p)
 92 | y <- runif(n) > .5
 93 | fit.mle <- glm(y ~ X, family = "binomial")
 94 | reg <- coef(fit.mle)
 95 | par(mfcol = c(3, 2))
 96 | gel <- coef(
 97 |   fit <- grpreg(X, y, group, penalty = "gel", family = "binomial", eps = 1e-10, lambda.min = 0)
 98 | )[, 100]
 99 | plot(fit, main = fit$penalty)
100 | expect_equivalent(gel, reg, tol = 1e-6)
101 | cMCP <- coef(
102 |   fit <- grpreg(X, y, group, penalty = "cMCP", family = "binomial", eps = 1e-10, lambda.min = 0)
103 | )[, 100]
104 | plot(fit, main = fit$penalty)
105 | expect_equivalent(cMCP, reg, tol = 1e-6)
106 | bridge <- coef(fit <- gBridge(X, y, group, family = "binomial", eps = 1e-10, lambda.min = 0))[, 1]
107 | plot(fit, main = fit$penalty)
108 | expect_equivalent(bridge, reg, tol = 1e-6)
109 | grLasso <- coef(
110 |   fit <- grpreg(X, y, group, penalty = "grLasso", family = "binomial", eps = 1e-10, lambda.min = 0)
111 | )[, 100]
112 | plot(fit, main = fit$penalty)
113 | expect_equivalent(grLasso, reg, tol = 1e-6)
114 | grMCP <- coef(
115 |   fit <- grpreg(
116 |     X,
117 |     y,
118 |     group,
119 |     penalty = "grMCP",
120 |     family = "binomial",
121 |     gamma = 2,
122 |     eps = 1e-10,
123 |     lambda.min = 0
124 |   )
125 | )[, 100]
126 | plot(fit, main = fit$penalty)
127 | expect_equivalent(grMCP, reg, tol = 1e-6)
128 | grSCAD <- coef(
129 |   fit <- grpreg(
130 |     X,
131 |     y,
132 |     group,
133 |     penalty = "grSCAD",
134 |     family = "binomial",
135 |     gamma = 2.1,
136 |     eps = 1e-10,
137 |     lambda.min = 0
138 |   )
139 | )[, 100]
140 | plot(fit, main = fit$penalty)
141 | expect_equivalent(grSCAD, reg, tol = 1e-6)
142 | expect_equivalent(predict(fit, X)[, 100], predict(fit.mle), tol = 1e-6)
143 | expect_equivalent(
144 |   predict(fit, X, type = "response")[, 100],
145 |   predict(fit.mle, type = "response"),
146 |   tol = 1e-6
147 | )
148 | 


--------------------------------------------------------------------------------
/inst/tinytest/coerce.r:
--------------------------------------------------------------------------------
 1 | # expect_equalcoersion of y
 2 | char_y <- matrix(LETTERS[1:2], 4, 3)
 3 | logi_y <- matrix(rep(c(TRUE, FALSE), each = 6), 4, 3)
 4 | int_y <- matrix(1:12, 4, 3)
 5 | num_y <- matrix(1.0 * 1:12, 4, 3)
 6 | tools::assertError(grpreg:::newY(char_y, "gaussian"))
 7 | tools::assertError(grpreg:::newY(num_y, "binomial"))
 8 | tools::assertError(grpreg:::newY(int_y, "binomial"))
 9 | grpreg:::newY(logi_y, "binomial")
10 | suppressMessages(grpreg:::newY(char_y, "binomial"))
11 | grpreg:::newY(num_y, "gaussian")
12 | grpreg:::newY(int_y, "gaussian")
13 | suppressMessages(grpreg:::newY(char_y[, 1], "binomial"))
14 | 
15 | # expect_equalcoersion of X, y
16 | data(Birthwt, package = "grpreg")
17 | X <- data.frame(Birthwt$X)
18 | y <- factor(Birthwt$low, labels = c("No", "Yes"))
19 | suppressMessages(fit <- grpreg(X, y, family = "binomial"))
20 | 
21 | # expect_equalcoersion of group
22 | y <- Birthwt$low
23 | g1 <- Birthwt$group
24 | g2 <- as.numeric(factor(g1))
25 | g3 <- as.numeric(factor(g1, levels = sort(levels(g1)))) ## Tests reordering
26 | fit1 <- grpreg(X, y, group = g1, family = "binomial")
27 | fit2 <- grpreg(X, y, group = g2, family = "binomial")
28 | fit3 <- grpreg(X, y, group = g3, family = "binomial")
29 | expect_equal(coef(fit1, which = 50), coef(fit2, which = 50), tol = 0.001)
30 | expect_equal(coef(fit2, which = 50), coef(fit3, which = 50), tol = 0.001)
31 | expect_equal(coef(fit1, which = 50), coef(fit3, which = 50), tol = 0.001)
32 | 


--------------------------------------------------------------------------------
/inst/tinytest/cv-grpsurv.r:
--------------------------------------------------------------------------------
 1 | n <- 50
 2 | group <- rep(0:3, 4:1)
 3 | p <- length(group)
 4 | X <- matrix(rnorm(n * p), ncol = p)
 5 | y <- cbind(rexp(n, exp(X[, 5] + X[, 7])), rep(0:1, c(10, n - 10)))
 6 | cvfit <- cv.grpsurv(X, y)
 7 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso")
 8 | cvfit <- cv.grpsurv(X, y, group, penalty = "gel")
 9 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso", nfolds = 50)
10 | cvfit <- cv.grpsurv(X, y, group, penalty = "gel", nfolds = 50)
11 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso", se = "bootstrap")
12 | cvfit <- cv.grpsurv(X, y, group, penalty = "gel", se = "bootstrap")
13 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso", se = "bootstrap")
14 | 
15 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso")
16 | op <- par(mfrow = c(2, 2))
17 | plot(cvfit)
18 | plot(cvfit, type = "rsq")
19 | plot(cvfit, type = "snr")
20 | par(op)
21 | summary(cvfit)
22 | 


--------------------------------------------------------------------------------
/inst/tinytest/cv.r:
--------------------------------------------------------------------------------
  1 | if (interactive()) library(tinytest)
  2 | 
  3 | # Gaussian
  4 | n <- 50
  5 | group <- rep(0:4, 5:1)
  6 | p <- length(group)
  7 | X <- matrix(rnorm(n * p), ncol = p)
  8 | y <- rnorm(n)
  9 | cvfit <- cv.grpreg(X, y, group, penalty = "grLasso")
 10 | cvfit <- cv.grpreg(X, y, group, penalty = "gel")
 11 | cvfit <- cv.grpreg(X, y, group, penalty = "grLasso", fold = 1:50)
 12 | cvfit <- cv.grpreg(X, y, group, penalty = "gel", fold = 1:50)
 13 | plot(cvfit, type = "all")
 14 | 
 15 | # Binomial
 16 | n <- 50
 17 | group <- rep(0:3, 4:1)
 18 | p <- length(group)
 19 | X <- matrix(rnorm(n * p), ncol = p)
 20 | y <- runif(n) > 0.5
 21 | cvfit <- cv.grpreg(X, y, group, family = "binomial", penalty = "grLasso")
 22 | cvfit <- cv.grpreg(X, y, group, family = "binomial", penalty = "gel")
 23 | cvfit <- cv.grpreg(X, y, group, family = "binomial", penalty = "grLasso", fold = 1:50)
 24 | cvfit <- cv.grpreg(X, y, group, family = "binomial", penalty = "gel", fold = 1:50)
 25 | plot(cvfit, type = "all")
 26 | 
 27 | # Poisson
 28 | n <- 50
 29 | group <- rep(0:3, 4:1)
 30 | p <- length(group)
 31 | X <- matrix(rnorm(n * p), ncol = p)
 32 | y <- sample(1:n)
 33 | cvfit <- cv.grpreg(X, y, group, family = "poisson", penalty = "grLasso")
 34 | cvfit <- cv.grpreg(X, y, group, family = "poisson", penalty = "gel")
 35 | cvfit <- cv.grpreg(X, y, group, family = "poisson", penalty = "grLasso", fold = 1:50)
 36 | cvfit <- cv.grpreg(X, y, group, family = "poisson", penalty = "gel", fold = 1:50)
 37 | plot(cvfit, type = "all")
 38 | summary(cvfit)
 39 | 
 40 | # Multitask learning
 41 | n <- 50
 42 | p <- 10
 43 | m <- 4
 44 | X <- matrix(rnorm(n * p), ncol = p)
 45 | Y <- matrix(rnorm(n * m), ncol = m)
 46 | cvfit <- cv.grpreg(X, Y)
 47 | cvfit <- cv.grpreg(X, Y, nfolds = 50)
 48 | Y <- matrix(rnorm(n * m), ncol = m) > 0
 49 | cvfit <- cv.grpreg(X, Y, family = "binomial")
 50 | cvfit <- cv.grpreg(X, Y, family = "binomial", nfolds = 50)
 51 | plot(cvfit, type = "all")
 52 | 
 53 | # p > n
 54 | n <- 75
 55 | p <- 200
 56 | X <- matrix(rnorm(n * p), n, p)
 57 | mu <- exp(apply(X[, 1:10], 1, sum) * 0.5)
 58 | y <- rpois(n, mu)
 59 | g <- rep(LETTERS[1:20], each = 10)
 60 | cvfit <- cv.grpreg(X, y, group = g)
 61 | plot(cvfit, type = "all")
 62 | cvfit <- cv.grpreg(X, y > 0, group = g, family = "binomial")
 63 | plot(cvfit, type = "all")
 64 | cvfit <- cv.grpreg(X, y, group = g, family = "poisson")
 65 | plot(cvfit, type = "all")
 66 | 
 67 | # summary
 68 | set.seed(4)
 69 | n <- 75
 70 | p <- 200
 71 | X <- matrix(rnorm(n * p), n, p)
 72 | y <- rpois(n, 1)
 73 | g <- rep(LETTERS[1:20], each = 10)
 74 | cvfit <- cv.grpreg(X, y, group = g)
 75 | s <- summary(cvfit)
 76 | expect_equivalent(s$ngroups[1], 0)
 77 | 
 78 | # predict
 79 | expect_equivalent(
 80 |   predict(cvfit, X[1:3, ]),
 81 |   predict(cvfit$fit, X[1:3, ], lambda = cvfit$lambda.min)
 82 | )
 83 | 
 84 | 
 85 | # R squared ---------------------------------------------------------------
 86 | 
 87 | # Gaussian
 88 | n <- 5000
 89 | group <- rep(0:4, 5:1)
 90 | p <- length(group)
 91 | X <- matrix(rnorm(n * p), ncol = p)
 92 | y <- rnorm(n, mean = X[, 6])
 93 | cvfit <- cv.grpreg(X, y, group, penalty = "grLasso", lambda.min = 0)
 94 | summary(cvfit, lambda = 0)$r.squared
 95 | summary(lm(y ~ ., as.data.frame(X)))$r.squared
 96 | 
 97 | 
 98 | # Return Y ----------------------------------------------------------------
 99 | 
100 | n <- 50
101 | group <- rep(0:4, 5:1)
102 | p <- length(group)
103 | X <- matrix(rnorm(n * p), ncol = p)
104 | y <- rnorm(n)
105 | cvfit <- cv.grpreg(X, y, group, returnY = TRUE)
106 | expect_equivalent(
107 |   cvfit$cve,
108 |   apply((cvfit$Y - y)^2, 2, mean)
109 | )
110 | 


--------------------------------------------------------------------------------
/inst/tinytest/gBridge.r:
--------------------------------------------------------------------------------
 1 | # gBridge reproduces linear regression
 2 | n <- 50
 3 | p <- 10
 4 | X <- matrix(rnorm(n * p), ncol = p)
 5 | y <- rnorm(n)
 6 | group <- rep(0:3, 4:1)
 7 | fit.mle <- lm(y ~ X)
 8 | reg <- coef(fit.mle)
 9 | fit <- gBridge(X, y, group, lambda.min = 0, eps = 1e-10)
10 | expect_equivalent(coef(fit, which = 1), coef(fit.mle))
11 | expect_silent(plot(fit))
12 | 
13 | # CV
14 | cvfit <- cv.grpreg(X, y, group, lambda.min = 0, gBridge = TRUE)
15 | plot(cvfit)
16 | summary(cvfit)
17 | 
18 | # Should probably have more tests for logistic regression, etc.
19 | 


--------------------------------------------------------------------------------
/inst/tinytest/gaussian.r:
--------------------------------------------------------------------------------
 1 | # Simple linear regression ------------------------------------------------
 2 | 
 3 | n <- 5
 4 | p <- 1
 5 | X <- matrix(rnorm(n * p), ncol = p)
 6 | y <- rnorm(n)
 7 | group <- 1
 8 | reg <- lm(y ~ X)$coef
 9 | nlam <- 100
10 | par(mfcol = c(3, 2))
11 | gel <- coef(fit <- grpreg(X, y, group, penalty = "gel", nlambda = nlam, lambda.min = 0))[, nlam]
12 | plot(fit, main = fit$penalty)
13 | expect_equivalent(gel, reg, tolerance = 1e-7)
14 | cMCP <- coef(fit <- grpreg(X, y, group, penalty = "cMCP", nlambda = nlam, lambda.min = 0))[, nlam]
15 | plot(fit, main = fit$penalty)
16 | expect_equivalent(cMCP, reg, tolerance = 1e-7)
17 | bridge <- coef(fit <- gBridge(X, y, group, nlambda = nlam, lambda.min = 0))[, 1]
18 | plot(fit, main = fit$penalty)
19 | expect_equivalent(bridge, reg, tolerance = 1e-7)
20 | grLasso <- coef(fit <- grpreg(X, y, group, penalty = "grLasso", nlambda = nlam, lambda.min = 0))[,
21 |   nlam
22 | ]
23 | plot(fit, main = fit$penalty)
24 | expect_equivalent(grLasso, reg, tolerance = 1e-7)
25 | grMCP <- coef(fit <- grpreg(X, y, group, penalty = "grMCP", nlambda = nlam, lambda.min = 0))[, nlam]
26 | plot(fit, main = fit$penalty)
27 | expect_equivalent(grMCP, reg, tolerance = 1e-7)
28 | grSCAD <- coef(fit <- grpreg(X, y, group, penalty = "grSCAD", nlambda = nlam, lambda.min = 0))[,
29 |   nlam
30 | ]
31 | plot(fit, main = fit$penalty)
32 | expect_equivalent(grSCAD, reg, tolerance = 1e-7)
33 | 
34 | 
35 | # Linear regression -------------------------------------------------------
36 | 
37 | n <- 50
38 | p <- 10
39 | X <- matrix(rnorm(n * p), ncol = p)
40 | y <- rnorm(n)
41 | group <- rep(0:3, 4:1)
42 | fit.mle <- lm(y ~ X)
43 | reg <- coef(fit.mle)
44 | nlam <- 100
45 | par(mfcol = c(3, 2))
46 | gel <- coef(
47 |   fit <- grpreg(X, y, group, penalty = "gel", nlambda = nlam, lambda.min = 0, eps = 1e-10)
48 | )[, nlam]
49 | plot(fit, main = fit$penalty)
50 | expect_equivalent(gel, reg, tolerance = 1e-7)
51 | cMCP <- coef(fit <- grpreg(X, y, group, penalty = "cMCP", lambda.min = 0, eps = 1e-10))[, 100]
52 | plot(fit, main = fit$penalty)
53 | expect_equivalent(cMCP, reg, tolerance = 1e-7)
54 | bridge <- coef(fit <- gBridge(X, y, group, lambda.min = 0, eps = 1e-10))[, 1]
55 | plot(fit, main = fit$penalty)
56 | expect_equivalent(bridge, reg, tolerance = 1e-7)
57 | grLasso <- coef(fit <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-10))[, 100]
58 | plot(fit, main = fit$penalty)
59 | expect_equivalent(grLasso, reg, tolerance = 1e-7)
60 | grMCP <- coef(fit <- grpreg(X, y, group, penalty = "grMCP", lambda.min = 0, eps = 1e-10))[, 100]
61 | plot(fit, main = fit$penalty)
62 | expect_equivalent(grMCP, reg, tolerance = 1e-7)
63 | grSCAD <- coef(fit <- grpreg(X, y, group, penalty = "grSCAD", lambda.min = 0, eps = 1e-10))[, 100]
64 | plot(fit, main = fit$penalty)
65 | expect_equivalent(grSCAD, reg, tolerance = 1e-7)
66 | expect_equivalent(predict(fit, X)[, 100], predict(fit.mle), tolerance = 1e-7)
67 | plot(fit, norm = TRUE, label = TRUE)
68 | plot(fit, legend.loc = "topleft")
69 | 


--------------------------------------------------------------------------------
/inst/tinytest/grpsurv.r:
--------------------------------------------------------------------------------
  1 | if (interactive()) library(tinytest)
  2 | suppressPackageStartupMessages(library(survival))
  3 | 
  4 | # Test that grpsurv works when x has 1 column
  5 | y <- Surv(rexp(50), sample(rep(0:1, c(10, 40))))
  6 | X <- matrix(rnorm(50 * 1), 50, 1)
  7 | g <- 1
  8 | fit <- grpsurv(X, y, g, lambda.min = 0)
  9 | 
 10 | # Test that grpsurv works
 11 | y <- Surv(rexp(50), sample(rep(0:1, c(10, 40))))
 12 | X <- matrix(rnorm(50 * 6), 50, 6)
 13 | g <- rep(1:3, each = 2)
 14 | fit <- grpsurv(X, y, g, lambda.min = 0)
 15 | 
 16 | # $ grpsurv equals MLE when lam=0
 17 | fit.mle <- coxph(y ~ X)
 18 | expect_equivalent(coef(fit)[, 100], coef(fit.mle), tol = 0.01)
 19 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "grSCAD")
 20 | expect_equivalent(coef(fit)[, 100], coef(fit.mle), tol = 0.01)
 21 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "grMCP")
 22 | expect_equivalent(coef(fit)[, 100], coef(fit.mle), tol = 0.01)
 23 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "gel")
 24 | expect_equivalent(coef(fit)[, 100], coef(fit.mle), tol = 0.01)
 25 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "cMCP")
 26 | expect_equivalent(coef(fit)[, 100], coef(fit.mle), tol = 0.01)
 27 | 
 28 | # grpsurv returns correct logLik
 29 | expect_equivalent(logLik(fit)[100], logLik(fit.mle)[1], tol = 0.1)
 30 | expect_equivalent(AIC(fit)[100], AIC(fit.mle), tol = 0.1)
 31 | 
 32 | # grpsurv returns correct linear predictors
 33 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "grLasso", eps = 1e-12)
 34 | expect_equivalent(fit$linear.predictors[, 100], fit.mle$linear.predictors[order(y)])
 35 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "gel", eps = 1e-12)
 36 | expect_equivalent(fit$linear.predictors[, 100], fit.mle$linear.predictors[order(y)])
 37 | 
 38 | # residuals are correct (slightly different at final observation because baseline hazard estimated differently)
 39 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "grLasso", eps = 1e-12)
 40 | r1 <- residuals(fit, lambda = 0)
 41 | r2 <- residuals(fit.mle, type = "deviance")
 42 | plot(r1, r2)
 43 | abline(0, 1)
 44 | expect_equivalent(
 45 |   residuals(fit, lambda = 0),
 46 |   residuals(fit.mle, type = "deviance"),
 47 |   tolerance = 0.1
 48 | )
 49 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "gel", eps = 1e-12)
 50 | expect_equivalent(
 51 |   residuals(fit, lambda = 0),
 52 |   residuals(fit.mle, type = "deviance"),
 53 |   tolerance = 0.1
 54 | )
 55 | 
 56 | # predict works for grpsurv
 57 | head(predict(fit, X, "vars"))
 58 | head(predict(fit, X, "nvars"))
 59 | head(predict(fit, X, "groups"))
 60 | head(predict(fit, X, "ngroups"))
 61 | head(predict(fit, X, "link"))
 62 | head(predict(fit, X, "response"))
 63 | head(predict(fit, X, "coef"))
 64 | head(predict(fit, X, "median"))
 65 | S <- predict(fit, X, "survival", lambda = 0.1)
 66 | plot(S)
 67 | S <- predict(fit, X[1, ], "survival", lambda = 0.1)
 68 | plot(S)
 69 | H <- predict(fit, X, "hazard", lambda = 0.1)
 70 | plot(H)
 71 | H <- predict(fit, X[1, ], "hazard", lambda = 0.1)
 72 | plot(H)
 73 | 
 74 | # Survival curves vs survival package
 75 | DF <- as.data.frame(X)
 76 | fit.mle <- coxph(y ~ ., DF)
 77 | S <- predict(fit, X[1, ], "survival", lambda = 0)
 78 | plot(S)
 79 | lines(survfit(fit.mle, DF[1, ], conf.int = FALSE))
 80 | H <- predict(fit, X[1, ], "hazard", lambda = 0)
 81 | plot(H)
 82 | lines(survfit(fit.mle, DF[1, ]), conf.int = FALSE, cumhaz = TRUE)
 83 | 
 84 | # Cumulative hazard
 85 | h1 <- H(fit$time)
 86 | h2 <- survfit(fit.mle, DF[1, ])$cumhaz
 87 | plot(h1, h2)
 88 | abline(0, 1)
 89 | 
 90 | # penalty factor works for grpsurv
 91 | fit <- grpsurv(X, y, g, group.multiplier = c(1:3))
 92 | 
 93 | # coersion works for grpsurv
 94 | fit <- grpsurv(as.data.frame(X), y)
 95 | 
 96 | # loss works for grpsurv
 97 | eta <- predict(fit, X, "link", lambda = 0.1)
 98 | grpreg:::deviance_grpsurv(y, eta)
 99 | grpreg:::deviance_grpsurv(y, eta, total = FALSE)
100 | 
101 | # cross-validation works for grpsurv
102 | cvfit <- cv.grpsurv(X, y, g, lambda.min = 0)
103 | plot(cvfit)
104 | summary(cvfit)
105 | 


--------------------------------------------------------------------------------
/inst/tinytest/multitask.r:
--------------------------------------------------------------------------------
  1 | n <- 100
  2 | p <- 5
  3 | 
  4 | # multitask learning works
  5 | X <- matrix(rnorm(n * p), ncol = p)
  6 | Y <- matrix(rnorm(n * 3), ncol = 3)
  7 | colnames(X) <- LETTERS[1:p]
  8 | fit <- grpreg(X, Y, penalty = "grLasso")
  9 | expect_equivalent(dim(fit$beta), c(3, p + 1, 100))
 10 | fit <- grpreg(X, Y, penalty = "cMCP")
 11 | expect_equivalent(dim(fit$beta), c(3, p + 1, 100))
 12 | fit <- gBridge(X, Y)
 13 | expect_equivalent(dim(fit$beta), c(3, p + 1, 100))
 14 | 
 15 | # multitask learning works (logistic regression)
 16 | fit <- grpreg(X, Y > 0, family = "binomial", penalty = "grLasso", lambda.min = 0.4)
 17 | expect_equivalent(dim(fit$beta), c(3, p + 1, 100))
 18 | fit <- grpreg(X, Y > 0, family = "binomial", penalty = "cMCP", lambda.min = 0.4)
 19 | expect_equivalent(dim(fit$beta), c(3, p + 1, 100))
 20 | fit <- gBridge(X, Y > 0, family = "binomial", lambda.min = 0.4)
 21 | expect_equivalent(dim(fit$beta), c(3, p + 1, 100))
 22 | 
 23 | # coef/predict work for multitask learning
 24 | fit <- grpreg(X, Y)
 25 | l <- fit$lambda[20]
 26 | coef(fit, which = 1:2)
 27 | coef(fit, lambda = l)
 28 | predict(fit, lambda = l, type = "nvars")
 29 | predict(fit, which = c(30, 60), type = "nvars")
 30 | predict(fit, lambda = l, type = "ngroups")
 31 | predict(fit, which = c(30, 60), type = "ngroups")
 32 | predict(fit, lambda = l, type = "groups")
 33 | predict(fit, which = c(30, 60), type = "groups")
 34 | predict(fit, lambda = l, type = "norm")
 35 | predict(fit, which = c(30, 60), type = "norm")
 36 | head(predict(fit, X, lambda = l))
 37 | predict(fit, X, which = c(30, 60))[1:10, , ]
 38 | 
 39 | # coef/predict work for multitask learning (logistic regression)
 40 | fit <- grpreg(X, Y > 0, family = "binomial")
 41 | l <- fit$lambda[20]
 42 | predict(fit, lambda = l, type = "nvars")
 43 | predict(fit, lambda = l, type = "ngroups")
 44 | predict(fit, lambda = l, type = "groups")
 45 | predict(fit, lambda = l, type = "norm")
 46 | head(predict(fit, X, lambda = l))
 47 | head(predict(fit, X, lambda = l, type = "response"))
 48 | head(predict(fit, X, lambda = l, type = "class"))
 49 | 
 50 | # cross-validation for multitask learning works
 51 | cvfit <- cv.grpreg(X, Y)
 52 | plot(cvfit)
 53 | summary(cvfit)
 54 | cvfit <- cv.grpreg(X, Y, penalty = "cMCP")
 55 | plot(cvfit)
 56 | summary(cvfit)
 57 | 
 58 | # cross-validation for multitask learning works (logistic regression)
 59 | cvfit <- cv.grpreg(X, Y > 0, family = "binomial")
 60 | plot(cvfit)
 61 | summary(cvfit)
 62 | cvfit <- cv.grpreg(X, Y > 0, family = "binomial", penalty = "cMCP")
 63 | plot(cvfit)
 64 | summary(cvfit)
 65 | 
 66 | # multitask learning reproduces linear regression
 67 | fit.mle <- lm(Y ~ X)
 68 | reg <- coef(fit.mle)
 69 | cMCP <- coef(fit <- grpreg(X, Y, penalty = "cMCP", lambda.min = 0), which = 100)
 70 | expect_equivalent(t(cMCP), reg, tolerance = 0.01)
 71 | p <- predict(fit, X, which = 100)
 72 | expect_equivalent(p, predict(fit.mle), tolerance = .01)
 73 | bridge <- coef(fit <- gBridge(X, Y, lambda.min = 0), which = 1)
 74 | expect_equivalent(t(bridge), reg, tolerance = .01)
 75 | p <- predict(fit, X, which = 1)
 76 | expect_equivalent(p, predict(fit.mle), tolerance = .01)
 77 | grLasso <- coef(fit <- grpreg(X, Y, penalty = "grLasso", lambda.min = 0), which = 100)
 78 | expect_equivalent(t(grLasso), reg, tolerance = .01)
 79 | p <- predict(fit, X, which = 100)
 80 | expect_equivalent(p, predict(fit.mle), tolerance = .01)
 81 | grMCP <- coef(fit <- grpreg(X, Y, penalty = "grMCP", lambda.min = 0), which = 100)
 82 | expect_equivalent(t(grMCP), reg, tolerance = .01)
 83 | p <- predict(fit, X, which = 100)
 84 | expect_equivalent(p, predict(fit.mle), tolerance = .01)
 85 | grSCAD <- coef(fit <- grpreg(X, Y, penalty = "grSCAD", lambda.min = 0), which = 100)
 86 | expect_equivalent(t(grSCAD), reg, tolerance = .01)
 87 | p <- predict(fit, X, which = 100)
 88 | expect_equivalent(p, predict(fit.mle), tolerance = .01)
 89 | 
 90 | # multitask learning reproduces logistic regression
 91 | n <- 50
 92 | p <- 2
 93 | X <- matrix(rnorm(n * p), ncol = p)
 94 | Y <- matrix(rnorm(n * 3), ncol = 3) > 0
 95 | fit.mle <- glm(Y[, 3] ~ X, family = binomial)
 96 | mle <- coef(fit.mle)
 97 | beta <- coef(fit <- grpreg(X, Y, lambda.min = 0, family = "binomial"), which = 100)[3, ]
 98 | expect_equivalent(beta, mle, tolerance = .01)
 99 | p <- predict(fit, X, which = 100)[, 3]
100 | expect_equivalent(p, predict(fit.mle), tolerance = .01)
101 | p <- predict(fit, X, which = 100, type = "response")[, 3]
102 | expect_equivalent(p, predict(fit.mle, type = "response"), tolerance = .01)
103 | bridge <- coef(fit <- gBridge(X, Y, family = "binomial", lambda.min = 0), which = 1)[3, ]
104 | expect_equivalent(bridge, mle, tolerance = .01)
105 | p <- predict(fit, X, which = 1)[, 3]
106 | expect_equivalent(p, predict(fit.mle), tolerance = .01)
107 | p <- predict(fit, X, which = 1, type = "response")[, 3]
108 | expect_equivalent(p, predict(fit.mle, type = "response"), tolerance = .01)
109 | 


--------------------------------------------------------------------------------
/inst/tinytest/noiseless.r:
--------------------------------------------------------------------------------
 1 | X <- rbind(diag(8), -diag(8)) * sqrt(8)
 2 | beta <- c(-1, -1, -1, -1, 2, 2, 2, 2)
 3 | y <- as.numeric(X %*% beta)
 4 | group <- c(1, 1, 1, 1, 2, 2, 2, 2)
 5 | 
 6 | fit <- grpreg(X, y, group, lambda = c(2, 1.999999, 1, 0))
 7 | expect_equal(predict(fit, type = "nvars", which = 2), 4)
 8 | expect_equivalent(coef(fit, lambda = 0), c(0, beta))
 9 | expect_equivalent(coef(fit, lambda = 1), c(0, 0, 0, 0, 0, 1, 1, 1, 1))
10 | 


--------------------------------------------------------------------------------
/inst/tinytest/poisson.r:
--------------------------------------------------------------------------------
 1 | # grpreg() reproduces poisson regression
 2 | n <- 50
 3 | group <- rep(0:3, 1:4)
 4 | p <- length(group)
 5 | X <- matrix(rnorm(n * p), ncol = p)
 6 | y <- sample(1:5, n, replace = TRUE)
 7 | fit.mle <- glm(y ~ X, family = "poisson")
 8 | reg <- coef(fit.mle)
 9 | par(mfcol = c(3, 2))
10 | gel <- coef(
11 |   fit <- grpreg(X, y, group, penalty = "gel", family = "poisson", eps = 1e-10, lambda.min = 0)
12 | )[, 100]
13 | plot(fit, main = fit$penalty)
14 | expect_equivalent(gel, reg, tolerance = 1e-7)
15 | cMCP <- coef(
16 |   fit <- grpreg(X, y, group, penalty = "cMCP", family = "poisson", eps = 1e-10, lambda.min = 0)
17 | )[, 100]
18 | plot(fit, main = fit$penalty)
19 | expect_equivalent(cMCP, reg, tolerance = 1e-7)
20 | bridge <- coef(fit <- gBridge(X, y, group, family = "poisson", eps = 1e-10, lambda.min = 0))[, 1]
21 | plot(fit, main = fit$penalty)
22 | expect_equivalent(bridge, reg, tolerance = 1e-7)
23 | grLasso <- coef(
24 |   fit <- grpreg(X, y, group, penalty = "grLasso", family = "poisson", eps = 1e-10, lambda.min = 0)
25 | )[, 100]
26 | plot(fit, main = fit$penalty)
27 | expect_equivalent(grLasso, reg, tolerance = 1e-7)
28 | grMCP <- coef(
29 |   fit <- grpreg(
30 |     X,
31 |     y,
32 |     group,
33 |     penalty = "grMCP",
34 |     family = "poisson",
35 |     gamma = 2,
36 |     eps = 1e-10,
37 |     lambda.min = 0
38 |   )
39 | )[, 100]
40 | plot(fit, main = fit$penalty)
41 | expect_equivalent(grMCP, reg, tolerance = 1e-7)
42 | grSCAD <- coef(
43 |   fit <- grpreg(
44 |     X,
45 |     y,
46 |     group,
47 |     penalty = "grSCAD",
48 |     family = "poisson",
49 |     gamma = 2.1,
50 |     eps = 1e-10,
51 |     lambda.min = 0
52 |   )
53 | )[, 100]
54 | plot(fit, main = fit$penalty)
55 | expect_equivalent(grSCAD, reg, tolerance = 1e-7)
56 | expect_equivalent(predict(fit, X)[, 100], predict(fit.mle), tolerance = 1e-7)
57 | expect_equivalent(
58 |   predict(fit, X, type = "response")[, 100],
59 |   predict(fit.mle, type = "response"),
60 |   tolerance = 1e-7
61 | )
62 | 


--------------------------------------------------------------------------------
/inst/tinytest/select.r:
--------------------------------------------------------------------------------
 1 | if (interactive()) library(tinytest)
 2 | 
 3 | data(Birthwt)
 4 | X <- Birthwt$X
 5 | y <- Birthwt$bwt
 6 | group <- Birthwt$group
 7 | fit <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0)
 8 | lmfit <- lm(y ~ X)
 9 | 
10 | # BIC
11 | expect_silent(sb <- select(fit))
12 | expect_equal(sb$IC[100], BIC(lmfit), tolerance = 1e-4)
13 | 
14 | # AIC
15 | expect_warning(sa <- select(fit, criterion = "AIC", df.method = "active"))
16 | expect_silent(sa <- select(fit, criterion = "AIC"))
17 | expect_equivalent(sa$IC[100], AIC(lmfit), tolerance = 1e-4)
18 | 
19 | # GCV
20 | expect_silent(s <- select(fit, criterion = "GCV", smooth = TRUE))
21 | 
22 | # AICc
23 | expect_silent(s <- select(fit, criterion = "AICc"))
24 | expect_true(all(s$IC >= sa$IC))
25 | 
26 | # EBIC
27 | expect_silent(s <- select(fit, criterion = "EBIC"))
28 | expect_true(all(s$IC >= sb$IC))
29 | 


--------------------------------------------------------------------------------
/inst/tinytest/standardization-ortho.r:
--------------------------------------------------------------------------------
 1 | n <- 20
 2 | p <- 5
 3 | l <- 5
 4 | group <- c(1, 1, 2, 2, 2)
 5 | zgroup <- rep(0:1, 2:3)
 6 | 
 7 | # standardize() standardizes correctly
 8 | X <- matrix(rnorm(n * p), ncol = p)
 9 | XX <- .Call("standardize", X)[[1]]
10 | expect_equal(apply(XX, 2, mean), rep(0, 5))
11 | expect_equal(apply(XX, 2, crossprod), rep(20, 5))
12 | 
13 | # unstandardize() unstandardizes correctly
14 | X <- matrix(rnorm(n * p), ncol = p)
15 | std <- .Call("standardize", X)
16 | XX <- std[[1]]
17 | center <- std[[2]]
18 | scale <- std[[3]]
19 | bb <- matrix(rnorm(l * (p + 1)), nrow = p + 1)
20 | b <- grpreg:::unstandardize(bb, list(center = center, scale = scale, nz = which(scale > 1e-10)))
21 | expect_equal(cbind(1, XX) %*% bb, cbind(1, X) %*% b)
22 | 
23 | # orthogonalize() orthogonalizes correctly
24 | X <- matrix(rnorm(n * p), ncol = p)
25 | XX <- grpreg:::orthogonalize(X, group)
26 | for (j in 1:group[p]) {
27 |   ind <- which(group == j)
28 |   expect_equal(crossprod(XX[, ind]) / n, diag(length(ind)))
29 | }
30 | 
31 | # unorthogonalize() unorthogonalizes correctly
32 | X <- matrix(rnorm(n * p), ncol = p)
33 | XX <- grpreg:::orthogonalize(X, group)
34 | bb <- matrix(rnorm(l * (p + 1)), nrow = p + 1)
35 | b <- grpreg:::unorthogonalize(bb, XX, attr(XX, "group"))
36 | expect_equal(cbind(1, XX) %*% bb, cbind(1, X) %*% b)
37 | 
38 | # unorthogonalize() unorthogonalizes correctly w/o intercept
39 | X <- matrix(rnorm(n * p), ncol = p)
40 | XX <- grpreg:::orthogonalize(X, zgroup)
41 | bb <- matrix(rnorm(l * (p)), nrow = p)
42 | b <- grpreg:::unorthogonalize(bb, XX, attr(XX, "group"), intercept = FALSE)
43 | expect_equal(XX %*% bb, X %*% b)
44 | 
45 | # orthogonalize() orthogonalizes correctly w/o full rank
46 | X <- matrix(rnorm(n * p), ncol = p)
47 | X[, 5] <- X[, 4]
48 | XX <- grpreg:::orthogonalize(X, group)
49 | for (j in 1:group[p]) {
50 |   ind <- which(attr(XX, "group") == j)
51 |   expect_equal(crossprod(XX[, ind]) / n, diag(length(ind)))
52 | }
53 | y <- rnorm(nrow(X))
54 | fit <- grpreg(X, y, group = LETTERS[group])
55 | 
56 | # orthogonalize() orthogonalizes correctly w/ 0's present and non-full-rank
57 | X <- matrix(rnorm(n * p), ncol = p)
58 | X[, 4] <- 0
59 | X[, 5] <- X[, 3]
60 | grp <- grpreg:::newXG(X, group, rep(1, max(group)), 1, FALSE)
61 | XX <- grp$X
62 | g <- grp$g
63 | for (j in 1:max(g)) {
64 |   ind <- which(g == j)
65 |   expect_equal(crossprod(XX[, ind]) / n, diag(length(ind)))
66 | }
67 | y <- rnorm(nrow(X))
68 | fit <- grpreg(X, y, group = LETTERS[group])
69 | 


--------------------------------------------------------------------------------
/inst/tinytest/surv-torture.r:
--------------------------------------------------------------------------------
  1 | suppressPackageStartupMessages(library(survival))
  2 | 
  3 | # single lambda
  4 | n <- 50
  5 | group <- rep(0:3, 4:1)
  6 | p <- length(group)
  7 | X <- matrix(rnorm(n * p), ncol = p)
  8 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10)))
  9 | b.mle <- coef(coxph(y ~ X))
 10 | b <- coef(fit <- grpsurv(X, y, group, penalty = "grLasso", lambda = 0, eps = 1e-10))
 11 | expect_equivalent(b, b.mle, tol = 0.0001)
 12 | 
 13 | # constant columns
 14 | n <- 50
 15 | group <- rep(0:3, 4:1)
 16 | p <- length(group)
 17 | X <- matrix(rnorm(n * p), ncol = p)
 18 | X[, 5] <- 0
 19 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10)))
 20 | b.mle <- coef(coxph(y ~ X))
 21 | b.mle[is.na(b.mle)] <- 0
 22 | b <- coef(fit <- grpsurv(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7), 0)
 23 | expect_equivalent(b, b.mle, tol = 0.0001)
 24 | b <- coef(fit <- grpsurv(X, y, group, penalty = "gel", lambda.min = 0, eps = 1e-7), 0)
 25 | expect_equivalent(b, b.mle, tol = 0.0001)
 26 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso")
 27 | cvfit <- cv.grpsurv(X, y, group, penalty = "gel")
 28 | 
 29 | # constant groups
 30 | n <- 50
 31 | group <- rep(0:3, 4:1)
 32 | p <- length(group)
 33 | X <- matrix(rnorm(n * p), ncol = p)
 34 | X[, group == 2] <- 0
 35 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10)))
 36 | b.mle <- coef(coxph(y ~ X))
 37 | b.mle[is.na(b.mle)] <- 0
 38 | b <- coef(fit <- grpsurv(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7), 0)
 39 | expect_equivalent(b, b.mle, tol = 0.0001)
 40 | b <- coef(fit <- grpsurv(X, y, group, penalty = "gel", lambda.min = 0, eps = 1e-7), 0)
 41 | expect_equivalent(b, b.mle, tol = 0.0001)
 42 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso")
 43 | cvfit <- cv.grpsurv(X, y, group, penalty = "gel")
 44 | 
 45 | # constant groups w/ multiplier specified
 46 | n <- 50
 47 | group <- rep(0:3, 4:1)
 48 | p <- length(group)
 49 | X <- matrix(rnorm(n * p), ncol = p)
 50 | X[, group == 2] <- 0
 51 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10)))
 52 | b.mle <- coef(coxph(y ~ X))
 53 | b.mle[is.na(b.mle)] <- 0
 54 | b <- coef(
 55 |   fit <- grpsurv(
 56 |     X,
 57 |     y,
 58 |     group,
 59 |     penalty = "grLasso",
 60 |     lambda.min = 0,
 61 |     eps = 1e-7,
 62 |     group.multiplier = 1:3
 63 |   ),
 64 |   0
 65 | )
 66 | expect_equivalent(b, b.mle, tol = 0.0001)
 67 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso")
 68 | cvfit <- cv.grpsurv(X, y, group, penalty = "gel")
 69 | 
 70 | # grpsurv handles groups of non-full rank
 71 | n <- 50
 72 | group <- rep(0:3, 4:1)
 73 | p <- length(group)
 74 | X <- matrix(rnorm(n * p), ncol = p)
 75 | X[, 7] <- X[, 6]
 76 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10)))
 77 | b0 <- coef(coxph(y ~ X))
 78 | b0[6:7] <- b0[6] / 2
 79 | b <- coef(fit <- grpsurv(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7), 0)
 80 | expect_equivalent(b, b0, tol = 0.0001)
 81 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso")
 82 | cvfit <- cv.grpsurv(X, y, group, penalty = "gel")
 83 | 
 84 | # out-of-order groups #1
 85 | n <- 50
 86 | group <- rep(1:2, each = 2)
 87 | perm <- sample(1:length(group))
 88 | p <- length(group)
 89 | X <- matrix(rnorm(n * p), ncol = p)
 90 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10)))
 91 | fit1 <- grpsurv(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7)
 92 | fit2 <- grpsurv(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0, eps = 1e-7)
 93 | b1 <- coef(fit1, 0)[perm]
 94 | b2 <- coef(fit2, 0)
 95 | expect_equivalent(b1, b2, tol = 0.01)
 96 | cvfit <- cv.grpsurv(X[, perm], y, group[perm], penalty = "grLasso")
 97 | 
 98 | # out-of-order groups #2
 99 | n <- 50
100 | group <- rep(0:3, 4:1)
101 | perm <- sample(1:length(group))
102 | p <- length(group)
103 | X <- matrix(rnorm(n * p), ncol = p)
104 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10)))
105 | fit1 <- grpsurv(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7)
106 | fit2 <- grpsurv(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0, eps = 1e-7)
107 | b1 <- coef(fit1, 0)[perm]
108 | b2 <- coef(fit2, 0)
109 | expect_equivalent(b1, b2, tol = 0.01)
110 | cvfit <- cv.grpsurv(X[, perm], y, group[perm], penalty = "grLasso")
111 | 
112 | # groups order + rank + constant col + constant grp
113 | n <- 50
114 | group <- rep(0:4, c(2, 2:5))
115 | perm <- sample(1:length(group))
116 | p <- length(group)
117 | X <- matrix(rnorm(n * p), ncol = p)
118 | X[, 7] <- X[, 6] # Group 2 not full rank
119 | X[, group == 3] <- 0 # Group 3 constant
120 | X[, 15] <- 0 # Group 4 contains a zero column
121 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10)))
122 | fit1 <- grpsurv(X, y, group, penalty = "grLasso", lambda.min = 0)
123 | fit2 <- grpsurv(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0)
124 | b1 <- coef(fit1, 0)
125 | b2 <- coef(fit2, 0)
126 | expect_equivalent(b1[perm], b2, tol = 0.01) # Checking perm ordering
127 | nz <- which(apply(X, 2, sd) != 0)
128 | fit3 <- grpsurv(X[, nz], y, group[nz], penalty = "grLasso", lambda.min = 0)
129 | b3 <- coef(fit3, 0)
130 | expect_equivalent(b1[nz], b3, tol = 0.01) # Checking dropped group/var
131 | expect_equivalent(coef(fit1)["V6", ], coef(fit1)["V7", ], tol = 0.01) # Checking rank handled correctly
132 | cvfit <- cv.grpsurv(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0)
133 | plot(cvfit)
134 | summary(cvfit)
135 | plot(cvfit$fit)
136 | 


--------------------------------------------------------------------------------
/inst/tinytest/torture.r:
--------------------------------------------------------------------------------
  1 | # Single lambda
  2 | n <- 50
  3 | group <- rep(0:3, 4:1)
  4 | p <- length(group)
  5 | X <- matrix(rnorm(n * p), ncol = p)
  6 | y <- rnorm(n)
  7 | b.lm <- coef(lm(y ~ X))
  8 | b <- coef(fit <- grpreg(X, y, group, penalty = "grLasso", lambda = 0, eps = 1e-10))
  9 | expect_equivalent(b, b.lm, tol = 0.0001)
 10 | 
 11 | # constant columns
 12 | n <- 50
 13 | group <- rep(0:3, 4:1)
 14 | p <- length(group)
 15 | X <- matrix(rnorm(n * p), ncol = p)
 16 | X[, 5] <- 0
 17 | y <- rnorm(n)
 18 | b.lm <- coef(lm(y ~ X))
 19 | b.lm[is.na(b.lm)] <- 0
 20 | b <- coef(fit <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7), 0)
 21 | expect_equivalent(b, b.lm, tol = 0.0001)
 22 | b <- coef(fit <- grpreg(X, y, group, penalty = "gel", lambda.min = 0, eps = 1e-7), 0)
 23 | expect_equivalent(b, b.lm, tol = 0.0001)
 24 | cvfit <- cv.grpreg(X, y, group, penalty = "grLasso")
 25 | cvfit <- cv.grpreg(X, y, group, penalty = "gel")
 26 | 
 27 | # constant groups
 28 | n <- 50
 29 | group <- rep(0:3, 4:1)
 30 | p <- length(group)
 31 | X <- matrix(rnorm(n * p), ncol = p)
 32 | X[, group == 2] <- 0
 33 | y <- rnorm(n)
 34 | b.lm <- coef(lm(y ~ X))
 35 | b.lm[is.na(b.lm)] <- 0
 36 | b <- coef(fit <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7), 0)
 37 | expect_equivalent(b, b.lm, tol = 0.0001)
 38 | b <- coef(fit <- grpreg(X, y, group, penalty = "gel", lambda.min = 0, eps = 1e-7), 0)
 39 | expect_equivalent(b, b.lm, tol = 0.0001)
 40 | cvfit <- cv.grpreg(X, y, group, penalty = "grLasso")
 41 | cvfit <- cv.grpreg(X, y, group, penalty = "gel")
 42 | 
 43 | # constant groups w/ multiplier specified
 44 | n <- 50
 45 | group <- rep(0:3, 4:1)
 46 | p <- length(group)
 47 | X <- matrix(rnorm(n * p), ncol = p)
 48 | X[, group == 2] <- 0
 49 | y <- rnorm(n)
 50 | b.lm <- coef(lm(y ~ X))
 51 | b.lm[is.na(b.lm)] <- 0
 52 | b <- coef(
 53 |   fit <- grpreg(
 54 |     X,
 55 |     y,
 56 |     group,
 57 |     penalty = "grLasso",
 58 |     lambda.min = 0,
 59 |     eps = 1e-7,
 60 |     group.multiplier = 1:3
 61 |   ),
 62 |   0
 63 | )
 64 | expect_equivalent(b, b.lm, tol = 0.0001)
 65 | cvfit <- cv.grpreg(X, y, group, penalty = "grLasso")
 66 | cvfit <- cv.grpreg(X, y, group, penalty = "gel")
 67 | 
 68 | # grpreg handles groups of non-full rank
 69 | n <- 50
 70 | group <- rep(0:3, 4:1)
 71 | p <- length(group)
 72 | X <- matrix(rnorm(n * p), ncol = p)
 73 | X[, 7] <- X[, 6]
 74 | y <- rnorm(n)
 75 | b0 <- coef(lm(y ~ X))
 76 | b0[7:8] <- b0[7] / 2
 77 | b <- coef(fit <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7), 0)
 78 | expect_equivalent(b, b0, tol = 0.0001)
 79 | cvfit <- cv.grpreg(X, y, group, penalty = "grLasso")
 80 | cvfit <- cv.grpreg(X, y, group, penalty = "gel")
 81 | 
 82 | # out-of-order groups #1
 83 | n <- 50
 84 | group <- rep(1:2, each = 2)
 85 | perm <- sample(1:length(group))
 86 | p <- length(group)
 87 | X <- matrix(rnorm(n * p), ncol = p)
 88 | y <- rnorm(n)
 89 | fit1 <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7)
 90 | fit2 <- grpreg(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0, eps = 1e-7)
 91 | b1 <- coef(fit1, 0)[-1][perm]
 92 | b2 <- coef(fit2, 0)[-1]
 93 | expect_equivalent(b1, b2, tol = 0.01)
 94 | cvfit <- cv.grpreg(X[, perm], y, group[perm], penalty = "grLasso")
 95 | 
 96 | # out-of-order groups #2
 97 | n <- 50
 98 | group <- rep(0:3, 4:1)
 99 | perm <- sample(1:length(group))
100 | p <- length(group)
101 | X <- matrix(rnorm(n * p), ncol = p)
102 | y <- rnorm(n)
103 | fit1 <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7)
104 | fit2 <- grpreg(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0, eps = 1e-7)
105 | b1 <- coef(fit1, 0)[-1][perm]
106 | b2 <- coef(fit2, 0)[-1]
107 | expect_equivalent(b1, b2, tol = 0.01)
108 | cvfit <- cv.grpreg(X[, perm], y, group[perm], penalty = "grLasso")
109 | 
110 | # groups order + rank + constant col + constant grp
111 | n <- 50
112 | group <- rep(0:4, c(2, 2:5))
113 | perm <- sample(1:length(group))
114 | p <- length(group)
115 | X <- matrix(rnorm(n * p), ncol = p)
116 | X[, 7] <- X[, 6] # Group 2 not full rank
117 | X[, group == 3] <- 0 # Group 3 constant
118 | X[, 15] <- 0 # Group 4 contains a zero column
119 | y <- rnorm(n)
120 | fit1 <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0)
121 | fit2 <- grpreg(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0)
122 | b1 <- coef(fit1, 0)[-1]
123 | b2 <- coef(fit2, 0)[-1]
124 | expect_equivalent(b1[perm], b2, tol = 0.01) # Checking perm ordering
125 | nz <- which(apply(X, 2, sd) != 0)
126 | fit3 <- grpreg(X[, nz], y, group[nz], penalty = "grLasso", lambda.min = 0)
127 | b3 <- coef(fit3, 0)[-1]
128 | expect_equivalent(b1[nz], b3, tol = 0.01) # Checking dropped group/var
129 | expect_equivalent(coef(fit1)["V6", ], coef(fit1)["V7", ], tol = 0.01) # Checking rank handled correctly
130 | cvfit <- cv.grpreg(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0)
131 | plot(cvfit)
132 | summary(cvfit)
133 | plot(cvfit$fit)
134 | 


--------------------------------------------------------------------------------
/man/AUC.cv.grpsurv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/auc.R
 3 | \name{AUC.cv.grpsurv}
 4 | \alias{AUC.cv.grpsurv}
 5 | \alias{AUC}
 6 | \title{Calculates AUC for cv.grpsurv objects}
 7 | \usage{
 8 | \method{AUC}{cv.grpsurv}(obj, ...)
 9 | 
10 | AUC(obj, ...)
11 | }
12 | \arguments{
13 | \item{obj}{A \code{cv.grpsurv} object. You must run \code{cv.grpsurv()} with the option \code{returnY=TRUE} in order for \code{AUC} to work.}
14 | 
15 | \item{\dots}{For S3 method compatibility.}
16 | }
17 | \description{
18 | Calculates the cross-validated AUC (concordance) from a "cv.grpsurv" object.
19 | }
20 | \details{
21 | The area under the curve (AUC), or equivalently, the concordance statistic
22 | (C), is calculated according to the procedure described in van Houwelingen
23 | and Putter (2011). The function calls \code{survival::concordancefit()}, except
24 | cross-validated linear predictors are used to guard against overfitting.
25 | Thus, the values returned by \code{AUC.cv.grpsurv()} will be lower than those you
26 | would obtain with \code{concordancefit()} if you fit the full (unpenalized) model.
27 | }
28 | \examples{
29 | \dontshow{set.seed(1)}
30 | data(Lung)
31 | X <- Lung$X
32 | y <- Lung$y
33 | group <- Lung$group
34 | 
35 | cvfit <- cv.grpsurv(X, y, group, returnY=TRUE)
36 | head(AUC(cvfit))
37 | ll <- log(cvfit$fit$lambda)
38 | plot(ll, AUC(cvfit), xlim=rev(range(ll)), lwd=3, type='l',
39 |      xlab=expression(log(lambda)), ylab='AUC', las=1)
40 | }
41 | \references{
42 | van Houwelingen H, Putter H (2011). \emph{Dynamic Prediction in Clinical Survival Analysis}. CRC Press.
43 | }
44 | \seealso{
45 | \code{\link[=cv.grpsurv]{cv.grpsurv()}}, \code{\link[survival:survival-deprecated]{survival::survConcordance()}}
46 | }
47 | 


--------------------------------------------------------------------------------
/man/Birthwt.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{Birthwt}
 5 | \alias{Birthwt}
 6 | \title{Risk Factors Associated with Low Infant Birth Weight}
 7 | \format{
 8 | The \code{Birthwt} object is a list containing four elements (\code{X}, \code{bwt}, \code{low}, and \code{group}):
 9 | \describe{
10 | \item{bwt}{Birth weight in kilograms}
11 | \item{low}{Indicator of birth weight less than 2.5kg}
12 | \item{group}{Vector describing how the columns of X are grouped}
13 | \item{X}{A matrix with 189 observations (rows) and 16 predictor variables (columns).}
14 | }
15 | The matrix \code{X} contains the following columns:
16 | \describe{
17 | \item{age1,age2,age3}{Orthogonal polynomials of first, second, and third degree representing mother's age in years}
18 | \item{lwt1,lwt2,lwt3}{Orthogonal polynomials of first, second, and third degree representing mother's weight in pounds at last menstrual period}
19 | \item{white,black}{Indicator functions for mother's race; "other" is reference group}
20 | \item{smoke}{Smoking status during pregnancy}
21 | \item{ptl1,ptl2m}{Indicator functions for one or for two or more previous premature labors, respectively. No previous premature labors is the reference category.}
22 | \item{ht}{History of hypertension}
23 | \item{ui}{Presence of uterine irritability}
24 | \item{ftv1,ftv2,ftv3m}{Indicator functions for one, for two, or for three or more physician visits during the first trimester, respectively. No visits is the reference category.}
25 | }
26 | }
27 | \source{
28 | \url{https://cran.r-project.org/package=MASS}
29 | }
30 | \usage{
31 | Birthwt
32 | }
33 | \description{
34 | The \code{Birthwt} data contains 189 observations, 16 predictors, and an
35 | outcome, birthweight, available both as a continuous measure and a binary
36 | indicator for low birth weight.The data were collected at Baystate Medical
37 | Center, Springfield, Mass during 1986. This data frame is a
38 | reparameterization of the \code{birthwt} data frame from the \strong{MASS} package.
39 | }
40 | \examples{
41 | data(Birthwt)
42 | hist(Birthwt$bwt, xlab="Child's birth weight", main="")
43 | table(Birthwt$low)
44 | ## See examples in ?birthwt (MASS package)
45 | ##   for more about the data set
46 | ## See examples in ?grpreg for use of this data set
47 | ##   with group penalized regression models
48 | }
49 | \references{
50 | \itemize{
51 | \item Venables, W. N. and Ripley, B. D. (2002). \emph{Modern Applied Statistics with S.} Fourth edition. Springer.
52 | \item Hosmer, D.W. and Lemeshow, S. (1989) \emph{Applied Logistic Regression.} New York: Wiley
53 | }
54 | }
55 | \seealso{
56 | \link[MASS:birthwt]{MASS::birthwt}, \code{\link[=grpreg]{grpreg()}}
57 | }
58 | \keyword{datasets}
59 | 


--------------------------------------------------------------------------------
/man/Lung.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{Lung}
 5 | \alias{Lung}
 6 | \title{VA lung cancer data set}
 7 | \format{
 8 | A list of two objects: \code{y} and \code{X}
 9 | \describe{
10 | \item{y}{A two column matrix (\code{Surv} object) containing the follow-up
11 | time (in days) and an indicator variable for whether the patient died
12 | while on the study or not.}
13 | \item{X}{A matrix with 137 observations (rows) and 9 predictor variables
14 | (columns). The remainder of this list describes the columns of \code{X}}
15 | \item{trt}{Treatment indicator (1=control group, 2=treatment group)}
16 | \item{karno}{Karnofsky performance score (0=bad, 100=good)}
17 | \item{diagtime}{Time from diagnosis to randomization (months)}
18 | \item{age}{Age (years, at baseline)}
19 | \item{prior}{Prior therapy (0=no, 1=yes)}
20 | \item{squamous}{Indicator for whether the cancer type is squamous cell
21 | carcinoma (0=no, 1=yes)}
22 | \item{small}{Indicator for whether the cancer type is small cell lung
23 | cancer (0=no, 1=yes)}
24 | \item{adeno}{Indicator for whether the cancer type is adenocarcinoma
25 | (0=no, 1=yes)}
26 | \item{large}{Indicator for whether the cancer type is large cell carcinoma
27 | (0=no, 1=yes)}
28 | }
29 | }
30 | \source{
31 | \url{https://cran.r-project.org/package=survival}
32 | }
33 | \usage{
34 | Lung
35 | }
36 | \description{
37 | Data from a randomised trial of two treatment regimens for lung cancer. This
38 | is a standard survival analysis data set from the classic textbook by
39 | Kalbfleisch and Prentice.
40 | }
41 | \references{
42 | \itemize{
43 | \item Kalbfleisch D and Prentice RL (1980), \emph{The Statistical Analysis of
44 | Failure Time Data}. Wiley, New York.
45 | }
46 | }
47 | \seealso{
48 | \code{grpsurv()}
49 | }
50 | \keyword{datasets}
51 | 


--------------------------------------------------------------------------------
/man/birthwt.grpreg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{birthwt.grpreg}
 5 | \alias{birthwt.grpreg}
 6 | \title{Risk Factors Associated with Low Infant Birth Weight}
 7 | \format{
 8 | This data frame contains the following columns:
 9 | \itemize{
10 | \item\code{low} Indicator of birth weight less than 2.5kg \item\code{bwt}
11 | Birth weight in kilograms \item\code{age1,age2,age3} Orthogonal polynomials
12 | of first, second, and third degree representing mother's age in years
13 | \item\code{lwt1,lwt2,lwt3} Orthogonal polynomials of first, second, and
14 | third degree representing mother's weight in pounds at last menstrual period
15 | \item\code{white,black} Indicator functions for mother's race; "other" is
16 | reference group \item\code{smoke} smoking status during pregnancy
17 | \item\code{ptl1,ptl2m} Indicator functions for one or for two or more
18 | previous premature labors, respectively.  No previous premature labors is
19 | the reference category.  \item\code{ht} History of hypertension
20 | \item\code{ui} Presence of uterine irritability \item\code{ftv1,ftv2,ftv3m}
21 | Indicator functions for one, for two, or for three or more physician visits
22 | during the first trimester, respectively.  No visits is the reference
23 | category.
24 | }
25 | }
26 | \usage{
27 | birthwt.grpreg
28 | }
29 | \description{
30 | This version of the data set has been deprecated and will not be supported
31 | in future versions.  Please use \code{\link{Birthwt}} instead.
32 | }
33 | \seealso{
34 | \code{\link{Birthwt}}
35 | }
36 | \keyword{datasets}
37 | 


--------------------------------------------------------------------------------
/man/cv.grpreg.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cv-grpreg.R, R/cv-grpsurv.R
  3 | \name{cv.grpreg}
  4 | \alias{cv.grpreg}
  5 | \alias{cv.grpsurv}
  6 | \title{Cross-validation for grpreg/grpsurv}
  7 | \usage{
  8 | cv.grpreg(
  9 |   X,
 10 |   y,
 11 |   group = 1:ncol(X),
 12 |   ...,
 13 |   nfolds = 10,
 14 |   seed,
 15 |   fold,
 16 |   returnY = FALSE,
 17 |   trace = FALSE
 18 | )
 19 | 
 20 | cv.grpsurv(
 21 |   X,
 22 |   y,
 23 |   group = 1:ncol(X),
 24 |   ...,
 25 |   nfolds = 10,
 26 |   seed,
 27 |   fold,
 28 |   se = c("quick", "bootstrap"),
 29 |   returnY = FALSE,
 30 |   trace = FALSE
 31 | )
 32 | }
 33 | \arguments{
 34 | \item{X}{The design matrix, as in \code{\link[=grpreg]{grpreg()}}/\code{\link[=grpsurv]{grpsurv()}}.}
 35 | 
 36 | \item{y}{The response vector (or matrix), as in \code{\link[=grpreg]{grpreg()}}/\code{\link[=grpsurv]{grpsurv()}}.}
 37 | 
 38 | \item{group}{The grouping vector, as in \code{\link[=grpreg]{grpreg()}}/\code{\link[=grpsurv]{grpsurv()}}.}
 39 | 
 40 | \item{...}{Additional arguments to \code{\link[=grpreg]{grpreg()}}/\code{\link[=grpsurv]{grpsurv()}}.}
 41 | 
 42 | \item{nfolds}{The number of cross-validation folds.  Default is 10.}
 43 | 
 44 | \item{seed}{You may set the seed of the random number generator in order to
 45 | obtain reproducible results.}
 46 | 
 47 | \item{fold}{Which fold each observation belongs to.  By default the
 48 | observations are randomly assigned.}
 49 | 
 50 | \item{returnY}{Should cv.grpreg()/cv.grpsurv() return the fitted
 51 | values from the cross-validation folds?  Default is FALSE; if TRUE, this
 52 | will return a matrix in which the element for row i, column j is the fitted
 53 | value for observation i from the fold in which observation i was excluded
 54 | from the fit, at the jth value of lambda.  NOTE: For \code{cv.grpsurv()}, the
 55 | rows of \code{Y} are ordered by time on study, and therefore will not
 56 | correspond to the original order of observations pased to \code{cv.grpsurv}.}
 57 | 
 58 | \item{trace}{If set to TRUE, cv.grpreg will inform the user of its progress
 59 | by announcing the beginning of each CV fold.  Default is FALSE.}
 60 | 
 61 | \item{se}{For \code{cv.grpsurv()}, the method by which the cross-valiation
 62 | standard error (CVSE) is calculated.  The 'quick' approach is based on a
 63 | rough approximation, but can be calculated more or less instantly.  The
 64 | 'bootstrap' approach is more accurate, but requires additional computing
 65 | time.}
 66 | }
 67 | \value{
 68 | An object with S3 class \code{"cv.grpreg"} containing:
 69 | \item{cve}{The error for each value of \code{lambda}, averaged across the
 70 | cross-validation folds.} \item{cvse}{The estimated standard error associated
 71 | with each value of for \code{cve}.} \item{lambda}{The sequence of
 72 | regularization parameter values along which the cross-validation error was
 73 | calculated.} \item{fit}{The fitted \code{grpreg} object for the whole data.}
 74 | \item{fold}{The fold assignments for cross-validation for each observation;
 75 | note that for \code{cv.grpsurv}, these are in terms of the ordered
 76 | observations, not the original observations.} \item{min}{The index of
 77 | \code{lambda} corresponding to \code{lambda.min}.} \item{lambda.min}{The
 78 | value of \code{lambda} with the minimum cross-validation error.}
 79 | \item{null.dev}{The deviance for the intercept-only model.}
 80 | \item{pe}{If \code{family="binomial"}, the cross-validation prediction error for
 81 | each value of \code{lambda}.}
 82 | }
 83 | \description{
 84 | Performs k-fold cross validation for penalized regression models with
 85 | grouped covariates over a grid of values for the regularization parameter
 86 | lambda.
 87 | }
 88 | \details{
 89 | The function calls \code{\link[=grpreg]{grpreg()}} or \code{\link[=grpsurv]{grpsurv()}} \code{nfolds} times, each
 90 | time leaving out 1/\code{nfolds} of the data.  The cross-validation error is
 91 | based on the deviance;
 92 | \href{https://pbreheny.github.io/grpreg/articles/models.html}{see here for more details}.
 93 | 
 94 | For Gaussian and Poisson responses, the folds are chosen according to simple
 95 | random sampling.  For binomial responses, the numbers for each outcome class
 96 | are balanced across the folds; i.e., the number of outcomes in which
 97 | \code{y} is equal to 1 is the same for each fold, or possibly off by 1 if
 98 | the numbers do not divide evenly.  This approach is used for Cox regression
 99 | as well to balance the amount of censoring cross each fold.
100 | 
101 | For Cox models, \code{cv.grpsurv} uses the approach of calculating the full
102 | Cox partial likelihood using the cross-validated set of linear predictors.
103 | Other approaches to cross-validation for the Cox regression model have been
104 | proposed in the literature; the strengths and weaknesses of the various
105 | methods for penalized regression in the Cox model are the subject of current
106 | research.  A simple approximation to the standard error is provided,
107 | although an option to bootstrap the standard error (\code{se='bootstrap'})
108 | is also available.
109 | 
110 | As in \code{\link[=grpreg]{grpreg()}}, seemingly unrelated regressions/multitask learning can
111 | be carried out by setting \code{y} to be a matrix, in which case groups are
112 | set up automatically (see \code{\link[=grpreg]{grpreg()}} for details), and
113 | cross-validation is carried out with respect to rows of \code{y}.  As
114 | mentioned in the details there, it is recommended to standardize the
115 | responses prior to fitting.
116 | }
117 | \examples{
118 | \dontshow{set.seed(1)}
119 | data(Birthwt)
120 | X <- Birthwt$X
121 | y <- Birthwt$bwt
122 | group <- Birthwt$group
123 | 
124 | cvfit <- cv.grpreg(X, y, group)
125 | plot(cvfit)
126 | summary(cvfit)
127 | coef(cvfit) ## Beta at minimum CVE
128 | 
129 | cvfit <- cv.grpreg(X, y, group, penalty="gel")
130 | plot(cvfit)
131 | summary(cvfit)
132 | 
133 | }
134 | \seealso{
135 | \code{\link[=grpreg]{grpreg()}}, \code{\link[=plot.cv.grpreg]{plot.cv.grpreg()}}, \code{\link[=summary.cv.grpreg]{summary.cv.grpreg()}},
136 | \code{\link[=predict.cv.grpreg]{predict.cv.grpreg()}}
137 | }
138 | \author{
139 | Patrick Breheny
140 | }
141 | 


--------------------------------------------------------------------------------
/man/expand_spline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/expand_spline.R
 3 | \name{expand_spline}
 4 | \alias{expand_spline}
 5 | \title{Expand feature matrix using basis splines}
 6 | \usage{
 7 | expand_spline(x, df = 3, degree = 3, type = c("ns", "bs"))
 8 | }
 9 | \arguments{
10 | \item{x}{Features to be expanded (numeric matrix).}
11 | 
12 | \item{df}{Degrees of freedom (numeric; default = 3).}
13 | 
14 | \item{degree}{Degree of the piecewise polynomial (integer; default = 3 (cubic splines)).}
15 | 
16 | \item{type}{Type of spline, either B-spline (\code{"bs"}) or natural cubic spline (\code{"ns"}; default).}
17 | }
18 | \value{
19 | An object of class \code{expandedMatrix} consisting of:
20 | \itemize{
21 | \item \code{X}: A matrix of dimension \code{nrow(x)} by \code{df*ncol(x)}
22 | \item \code{group}: A vector of length \code{df*ncol(x)} that describes the grouping structure
23 | \item Additional metadata on the splines, such as knot locations, required in order to evaluate spline at new feature values (e.g., for prediction)
24 | }
25 | }
26 | \description{
27 | Performs a basis expansion for many features at once, returning output that is compatible
28 | for use with the \code{grpreg()} function. Returns an expanded matrix along with a vector
29 | that describes its grouping.
30 | }
31 | \details{
32 | \code{expand_spline()} uses the function \code{\link[splines:bs]{splines::bs()}} or \code{\link[splines:ns]{splines::ns()}} to generate a basis
33 | matrix for each column of \code{x}. These matrices represent the spline basis for piecewise
34 | polynomials with specified degree evaluated separately for each original column of \code{x}.
35 | These matrices are then column-bound to form a single grouped matrix of derived features. A vector
36 | that describes the grouping present in the resulting matrix is also generated. The resulting
37 | object can be passed to \code{\link[=grpreg]{grpreg()}}.
38 | 
39 | This methodology was originally proposed by Ravikumar et al. (2009), who named it SPAM (SParse Additive Modeling).
40 | }
41 | \examples{
42 | \dontshow{set.seed(1)}
43 | Data <- gen_nonlinear_data(n=1000)
44 | X <- expand_spline(Data$X)
45 | fit <- grpreg(X, Data$y)
46 | plot_spline(fit, "V02", lambda = 0.03)
47 | }
48 | \references{
49 | \itemize{
50 | \item Ravikumar P, Lafferty J, Liu H and Wasserman L (2009). Sparse additive models. \emph{Journal of the Royal Statistical Society Series B}, \strong{71}: 1009-1030.
51 | }
52 | }
53 | \seealso{
54 | \code{\link[=plot_spline]{plot_spline()}} to visualize the resulting nonlinear fits
55 | }
56 | 


--------------------------------------------------------------------------------
/man/gBridge.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/gBridge.R
  3 | \name{gBridge}
  4 | \alias{gBridge}
  5 | \title{Fit a group bridge regression path}
  6 | \usage{
  7 | gBridge(
  8 |   X,
  9 |   y,
 10 |   group = 1:ncol(X),
 11 |   family = c("gaussian", "binomial", "poisson"),
 12 |   nlambda = 100,
 13 |   lambda,
 14 |   lambda.min = {
 15 |      if (nrow(X) > ncol(X)) 
 16 |          0.001
 17 |      else 0.05
 18 |  },
 19 |   lambda.max,
 20 |   alpha = 1,
 21 |   eps = 0.001,
 22 |   delta = 1e-07,
 23 |   max.iter = 10000,
 24 |   gamma = 0.5,
 25 |   group.multiplier,
 26 |   warn = TRUE,
 27 |   returnX = FALSE,
 28 |   ...
 29 | )
 30 | }
 31 | \arguments{
 32 | \item{X}{The design matrix, as in \code{grpreg}.}
 33 | 
 34 | \item{y}{The response vector (or matrix), as in \code{grpreg}.}
 35 | 
 36 | \item{group}{The grouping vector, as in \code{grpreg}.}
 37 | 
 38 | \item{family}{Either "gaussian" or "binomial", depending on the response.}
 39 | 
 40 | \item{nlambda}{The number of \code{lambda} values, as in \code{grpreg}.}
 41 | 
 42 | \item{lambda}{A user supplied sequence of \verb{lambda values, as in }grpreg()`.}
 43 | 
 44 | \item{lambda.min}{The smallest value for \code{lambda}, as in \code{grpreg}.}
 45 | 
 46 | \item{lambda.max}{The maximum value for \code{lambda}.  Unlike the penalties
 47 | in \code{grpreg}, it is not possible to solve for \code{lambda.max} directly
 48 | with group bridge models.  Thus, it must be specified by the user.  If it is
 49 | not specified, \code{gBridge} will attempt to guess \code{lambda.max}, but
 50 | this is not particularly accurate.}
 51 | 
 52 | \item{alpha}{Tuning parameter for the balance between the group penalty and
 53 | the L2 penalty, as in \code{grpreg}.}
 54 | 
 55 | \item{eps}{Convergence threshhold, as in \code{grpreg}.}
 56 | 
 57 | \item{delta}{The group bridge penalty is not differentiable at zero, and
 58 | requires a small number \code{delta} to bound it away from zero.  There is
 59 | typically no need to change this value.}
 60 | 
 61 | \item{max.iter}{Maximum number of iterations, as in \code{grpreg}.}
 62 | 
 63 | \item{gamma}{Tuning parameter of the group bridge penalty (the exponent to
 64 | which the L1 norm of the coefficients in the group are raised).  Default is
 65 | 0.5, the square root.}
 66 | 
 67 | \item{group.multiplier}{The multiplicative factor by which each group's
 68 | penalty is to be multiplied, as in \code{grpreg}.}
 69 | 
 70 | \item{warn}{Should the function give a warning if it fails to converge?  As
 71 | in \code{grpreg}.}
 72 | 
 73 | \item{returnX}{Return the standardized design matrix (and associated group
 74 | structure information)?  Default is FALSE.}
 75 | 
 76 | \item{...}{Not used.}
 77 | }
 78 | \value{
 79 | An object with S3 class \code{"grpreg"}, as in \code{grpreg}.
 80 | }
 81 | \description{
 82 | Fit regularization paths for linear and logistic group bridge-penalized
 83 | regression models over a grid of values for the regularization parameter
 84 | lambda.
 85 | }
 86 | \details{
 87 | This method fits the group bridge method of Huang et al. (2009).  Unlike the
 88 | penalties in \code{grpreg}, the group bridge is not differentiable at zero;
 89 | because of this, a number of changes must be made to the algorithm, which is
 90 | why it has its own function.  Most notably, the method is unable to start at
 91 | \code{lambda.max}; it must start at \code{lambda.min} and proceed in the
 92 | opposite direction.
 93 | 
 94 | In other respects, the usage and behavior of the function is similar to the
 95 | rest of the \code{grpreg} package.
 96 | }
 97 | \examples{
 98 | data(Birthwt)
 99 | X <- Birthwt$X
100 | group <- Birthwt$group
101 | 
102 | ## Linear regression
103 | y <- Birthwt$bwt
104 | fit <- gBridge(X, y, group, lambda.max=0.08)
105 | plot(fit)
106 | select(fit)$beta
107 | 
108 | ## Logistic regression
109 | y <- Birthwt$low
110 | fit <- gBridge(X, y, group, family="binomial", lambda.max=0.17)
111 | plot(fit)
112 | select(fit)$beta
113 | }
114 | \references{
115 | \itemize{
116 | \item Huang J, Ma S, Xie H, and Zhang C. (2009) A group bridge approach for
117 | variable selection. \emph{Biometrika}, \strong{96}: 339-355. \doi{10.1093/biomet/asp020}
118 | 
119 | \item Breheny P and Huang J. (2009) Penalized methods for bi-level variable
120 | selection. \emph{Statistics and its interface}, \strong{2}: 369-380.
121 | \doi{10.4310/sii.2009.v2.n3.a10}
122 | }
123 | }
124 | \seealso{
125 | \code{\link[=grpreg]{grpreg()}}
126 | }
127 | 


--------------------------------------------------------------------------------
/man/gen_nonlinear_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gen_nonlinear_data.R
 3 | \name{gen_nonlinear_data}
 4 | \alias{gen_nonlinear_data}
 5 | \title{Generate nonlinear example data}
 6 | \usage{
 7 | gen_nonlinear_data(n = 100, p = 16, seed)
 8 | }
 9 | \arguments{
10 | \item{n}{Sample size (numeric; default = 100).}
11 | 
12 | \item{p}{Number of features (numeric; default = 16).}
13 | 
14 | \item{seed}{Set to get different random data sets, passed to \code{\link[=set.seed]{set.seed()}}}
15 | }
16 | \description{
17 | Mainly intended to demonstrate the use of basis expansion models for sparse additive modeling; intended for use with \code{\link[=expand_spline]{expand_spline()}}.
18 | }
19 | \examples{
20 | Data <- gen_nonlinear_data()
21 | }
22 | 


--------------------------------------------------------------------------------
/man/grpreg-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/grpreg-package.R
 3 | \docType{package}
 4 | \name{grpreg-package}
 5 | \alias{grpreg-package}
 6 | \title{grpreg: Regularization Paths for Regression Models with Grouped Covariates}
 7 | \description{
 8 | Efficient algorithms for fitting the regularization path of linear regression, GLM, and Cox regression models with grouped penalties. This includes group selection methods such as group lasso, group MCP, and group SCAD as well as bi-level selection methods such as the group exponential lasso, the composite MCP, and the group bridge. For more information, see Breheny and Huang (2009) \doi{10.4310/sii.2009.v2.n3.a10}, Huang, Breheny, and Ma (2012) \doi{10.1214/12-sts392}, Breheny and Huang (2015) \doi{10.1007/s11222-013-9424-2}, and Breheny (2015) \doi{10.1111/biom.12300}, or visit the package homepage \url{https://pbreheny.github.io/grpreg/}.
 9 | }
10 | \examples{
11 | \donttest{vignette("getting-started", package="grpreg")}
12 | }
13 | \references{
14 | \itemize{
15 | \item Yuan M and Lin Y. (2006) Model selection and estimation in regression
16 | with grouped variables. \emph{Journal of the Royal Statistical Society Series B},
17 | \strong{68}: 49-67. \doi{10.1111/j.1467-9868.2005.00532.x}
18 | 
19 | \item Huang J, Ma S, Xie H, and Zhang C. (2009) A group bridge approach for
20 | variable selection. \emph{Biometrika}, \strong{96}: 339-355. \doi{10.1093/biomet/asp020}
21 | 
22 | \item Breheny P and Huang J. (2009) Penalized methods for bi-level variable
23 | selection. \emph{Statistics and its interface}, \strong{2}: 369-380.
24 | \doi{10.4310/sii.2009.v2.n3.a10}
25 | 
26 | \item Huang J, Breheny P, and Ma S. (2012). A selective review of group
27 | selection in high dimensional models. \emph{Statistical Science}, \strong{27}: 481-499.
28 | \doi{10.1214/12-sts392}
29 | 
30 | \item Breheny P and Huang J. (2015) Group descent algorithms for nonconvex
31 | penalized linear and logistic regression models with grouped predictors.
32 | \emph{Statistics and Computing}, \strong{25}: 173-187. \doi{10.1007/s11222-013-9424-2}
33 | 
34 | \item Breheny P. (2015) The group exponential lasso for bi-level variable
35 | selection. \emph{Biometrics}, \strong{71}: 731-740. \doi{10.1111/biom.12300}
36 | }
37 | }
38 | \seealso{
39 | Useful links:
40 | \itemize{
41 |   \item \url{https://pbreheny.github.io/grpreg/}
42 |   \item \url{https://github.com/pbreheny/grpreg}
43 |   \item Report bugs at \url{https://github.com/pbreheny/grpreg/issues}
44 | }
45 | 
46 | }
47 | \author{
48 | Patrick Breheny
49 | }
50 | \keyword{internal}
51 | 


--------------------------------------------------------------------------------
/man/logLik.grpreg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/logLik.R
 3 | \name{logLik.grpreg}
 4 | \alias{logLik.grpreg}
 5 | \alias{logLik}
 6 | \alias{logLik.grpsurv}
 7 | \title{logLik method for grpreg}
 8 | \usage{
 9 | \method{logLik}{grpreg}(object, df.method = c("default", "active"), REML = FALSE, ...)
10 | 
11 | \method{logLik}{grpsurv}(object, df.method = c("default", "active"), ...)
12 | }
13 | \arguments{
14 | \item{object}{A fitted \code{grpreg} or \code{grpsurv} object, as obtained from
15 | \code{\link[=grpreg]{grpreg()}} or \code{\link[=grpsurv]{grpsurv()}}}
16 | 
17 | \item{df.method}{How should effective model parameters be calculated? One
18 | of: \code{"active"}, which counts the number of nonzero coefficients; or
19 | \code{"default"}, which uses the calculated \code{df} returned by
20 | \code{grpreg}. Default is \code{"default"}.}
21 | 
22 | \item{REML}{Use restricted MLE for estimation of the scale parameter in a
23 | gaussian model?  Default is FALSE.}
24 | 
25 | \item{...}{For S3 method compatibility.}
26 | }
27 | \value{
28 | Returns an object of class 'logLik', in this case consisting of a
29 | number (or vector of numbers) with two attributes: 'df' (the estimated
30 | degrees of freedom in the model) and 'nobs' (number of observations).
31 | 
32 | The 'print' method for 'logLik' objects is not intended to handle vectors;
33 | consequently, the value of the function does not necessarily display
34 | correctly.  However, it works with 'AIC' and 'BIC' without any glitches and
35 | returns the expected vectorized output.
36 | }
37 | \description{
38 | Calculates the log likelihood and degrees of freedom for a fitted grpreg
39 | object.
40 | }
41 | \details{
42 | Exists mainly for use with \code{\link[stats:AIC]{stats::AIC()}} and \code{\link[stats:AIC]{stats::BIC()}}.
43 | }
44 | \examples{
45 | data(Birthwt)
46 | X <- Birthwt$X
47 | y <- Birthwt$bwt
48 | group <- Birthwt$group
49 | fit <- grpreg(X,y,group,penalty="cMCP")
50 | logLik(fit) ## Display is glitchy for vectors
51 | AIC(fit)
52 | BIC(fit)
53 | }
54 | \seealso{
55 | \code{\link[=grpreg]{grpreg()}}
56 | }
57 | \author{
58 | Patrick Breheny
59 | }
60 | 


--------------------------------------------------------------------------------
/man/plot.cv.grpreg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot-cv.R
 3 | \name{plot.cv.grpreg}
 4 | \alias{plot.cv.grpreg}
 5 | \title{Plots the cross-validation curve from a \code{cv.grpreg} object}
 6 | \usage{
 7 | \method{plot}{cv.grpreg}(
 8 |   x,
 9 |   log.l = TRUE,
10 |   type = c("cve", "rsq", "scale", "snr", "pred", "all"),
11 |   selected = TRUE,
12 |   vertical.line = TRUE,
13 |   col = "red",
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{x}{A \code{cv.grpreg} object.}
19 | 
20 | \item{log.l}{Should horizontal axis be on the log scale?  Default is TRUE.}
21 | 
22 | \item{type}{What to plot on the vertical axis.  \code{cve} plots the
23 | cross-validation error (deviance); \code{rsq} plots an estimate of the
24 | fraction of the deviance explained by the model (R-squared); \code{snr}
25 | plots an estimate of the signal-to-noise ratio; \code{scale} plots, for
26 | \code{family="gaussian"}, an estimate of the scale parameter (standard
27 | deviation); \code{pred} plots, for \code{family="binomial"}, the estimated
28 | prediction error; \code{all} produces all of the above.}
29 | 
30 | \item{selected}{If \code{TRUE} (the default), places an axis on top of the
31 | plot denoting the number of groups in the model (i.e., that contain a
32 | nonzero regression coefficient) at that value of \code{lambda}.}
33 | 
34 | \item{vertical.line}{If \code{TRUE} (the default), draws a vertical line at
35 | the value where cross-validaton error is minimized.}
36 | 
37 | \item{col}{Controls the color of the dots (CV estimates).}
38 | 
39 | \item{\dots}{Other graphical parameters to \code{plot}}
40 | }
41 | \description{
42 | Plots the cross-validation curve from a \code{cv.grpreg} object, along with
43 | standard error bars.
44 | }
45 | \details{
46 | Error bars representing approximate +/- 1 SE (68\\% confidence intervals) are
47 | plotted along with the estimates at value of \code{lambda}.  For \code{rsq}
48 | and \code{snr}, these confidence intervals are quite crude, especially near
49 | zero, and will hopefully be improved upon in later versions of
50 | \code{grpreg}.
51 | }
52 | \examples{
53 | \dontshow{set.seed(1)}
54 | # Birthweight data
55 | data(Birthwt)
56 | X <- Birthwt$X
57 | group <- Birthwt$group
58 | 
59 | # Linear regression
60 | y <- Birthwt$bwt
61 | cvfit <- cv.grpreg(X, y, group)
62 | plot(cvfit)
63 | op <- par(mfrow=c(2,2))
64 | plot(cvfit, type="all")
65 | 
66 | ## Logistic regression
67 | y <- Birthwt$low
68 | cvfit <- cv.grpreg(X, y, group, family="binomial")
69 | par(op)
70 | plot(cvfit)
71 | par(mfrow=c(2,2))
72 | plot(cvfit, type="all")
73 | }
74 | \seealso{
75 | \code{\link[=grpreg]{grpreg()}}, \code{\link[=cv.grpreg]{cv.grpreg()}}
76 | }
77 | 


--------------------------------------------------------------------------------
/man/plot.grpreg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.R
 3 | \name{plot.grpreg}
 4 | \alias{plot.grpreg}
 5 | \title{Plot coefficients from a "grpreg" object}
 6 | \usage{
 7 | \method{plot}{grpreg}(x, alpha = 1, legend.loc, label = FALSE, log.l = FALSE, norm = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{Fitted \code{"grpreg"} model.}
11 | 
12 | \item{alpha}{Controls alpha-blending.  Default is alpha=1.}
13 | 
14 | \item{legend.loc}{Where should the legend go?  If left unspecified, no
15 | legend is drawn.  See \code{\link[graphics]{legend}} for details.}
16 | 
17 | \item{label}{If TRUE, annotates the plot with text labels in the right
18 | margin describing which variable/group the corresponding line belongs to.}
19 | 
20 | \item{log.l}{Should horizontal axis be on the log scale?  Default is FALSE.}
21 | 
22 | \item{norm}{If \code{TRUE}, plot the norm of each group, rather than the
23 | individual coefficients.}
24 | 
25 | \item{\dots}{Other graphical parameters to \code{plot}, \code{matlines}, or
26 | \code{legend}}
27 | }
28 | \description{
29 | Produces a plot of the coefficient paths for a fitted \code{grpreg} object.
30 | }
31 | \examples{
32 | # Fit model to birthweight data
33 | data(Birthwt)
34 | X <- Birthwt$X
35 | y <- Birthwt$bwt
36 | group <- Birthwt$group
37 | fit <- grpreg(X, y, group, penalty="grLasso")
38 | 
39 | # Plot (basic)
40 | plot(fit)
41 | 
42 | # Plot group norms, with labels in right margin
43 | plot(fit, norm=TRUE, label=TRUE)
44 | 
45 | # Plot (miscellaneous options)
46 | myColors <- c("black", "red", "green", "blue", "yellow", "purple",
47 | "orange", "brown")
48 | plot(fit, legend.loc="topleft", col=myColors)
49 | labs <- c("Mother's Age", "# Phys. visits", "Hypertension", "Mother's weight",
50 |           "# Premature", "Race", "Smoking", "Uterine irritability")
51 | plot(fit, legend.loc="topleft", lwd=6, alpha=0.5, legend=labs)
52 | plot(fit, norm=TRUE, legend.loc="topleft", lwd=6, alpha=0.5, legend=labs)
53 | }
54 | \seealso{
55 | \code{\link[=grpreg]{grpreg()}}
56 | }
57 | 


--------------------------------------------------------------------------------
/man/plot.grpsurv.func.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot-grpsurv-func.R
 3 | \name{plot.grpsurv.func}
 4 | \alias{plot.grpsurv.func}
 5 | \title{Plot survival curve for grpsurv model}
 6 | \usage{
 7 | \method{plot}{grpsurv.func}(x, alpha = 1, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{A \code{'grpsurv.func'} object, which is returned by
11 | \code{predict.grpsurv} if \code{type='survival'} is specified.  See
12 | examples.}
13 | 
14 | \item{alpha}{Controls alpha-blending (i.e., transparency).  Useful if many
15 | overlapping lines are present.}
16 | 
17 | \item{\dots}{Other graphical parameters to pass to \code{plot}}
18 | }
19 | \description{
20 | Plot survival curve for a model that has been fit using \code{grpsurv}
21 | followed by a prediction of the survival function using
22 | \code{predict.grpsurv}
23 | }
24 | \examples{
25 | 
26 | data(Lung)
27 | X <- Lung$X
28 | y <- Lung$y
29 | group <- Lung$group
30 | fit <- grpsurv(X, y, group)
31 | 
32 | # A single survival curve
33 | S <- predict(fit, X[1,], type='survival', lambda=.05)
34 | plot(S, xlim=c(0,200))
35 | 
36 | # Lots of survival curves
37 | S <- predict(fit, X, type='survival', lambda=.05)
38 | plot(S, xlim=c(0,200), alpha=0.3)
39 | }
40 | \seealso{
41 | \code{\link{grpsurv}}, \code{\link{predict.grpsurv}}
42 | }
43 | \author{
44 | Patrick Breheny
45 | }
46 | 


--------------------------------------------------------------------------------
/man/plot_spline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_spline.R
 3 | \name{plot_spline}
 4 | \alias{plot_spline}
 5 | \title{Plot spline curve for a fitted additive model}
 6 | \usage{
 7 | plot_spline(
 8 |   fit,
 9 |   variable,
10 |   lambda,
11 |   which = NULL,
12 |   partial = FALSE,
13 |   type = "contrast",
14 |   warnings = TRUE,
15 |   points.par = NULL,
16 |   add = FALSE,
17 |   ...
18 | )
19 | }
20 | \arguments{
21 | \item{fit}{A \code{grpreg} object. The model must have been fit using a \code{expand_spline} object.}
22 | 
23 | \item{variable}{The name of the variable which will be plotted (character).}
24 | 
25 | \item{lambda}{Values of the regularization parameter \code{lambda} which will be used for the plot. If a vector is passed, a curve will be drawn for each value of lambda (numeric vector; if a \code{cv.grpreg} object is passed, the \code{lambda} value minimizing cross-validation error will be used as a default; otherwise, there is no default value)}
26 | 
27 | \item{which}{Index of penalty parameter \code{lambda} which will be used for the plot. If both \code{lambda} and \code{which} are specified, \code{lambda} takes precedence (integer vector).}
28 | 
29 | \item{partial}{If \code{TRUE}, a scatter plot of the partial residuals is superimposed on the curve (logical; default = \code{FALSE}). If multiple lambdas are specified, the largest value is used to calculate the residuals.}
30 | 
31 | \item{type}{Type of plot to be produced (default = \code{"contrast"}). The following options are supported:
32 | \itemize{
33 | \item If \code{"conditional"}, the plot returned shows the value of the variable on the x-axis and the change in linear predictor on the y-axis, holding all other variables constant at their mean value.
34 | \item If \code{"contrast"}, the plot returned shows the effect on the linear predictor by moving the x variable away from its mean.
35 | }}
36 | 
37 | \item{warnings}{If \code{FALSE}, warnings will be suppressed (default = \code{TRUE}).}
38 | 
39 | \item{points.par}{List of parameters (see \code{\link[=par]{par()}} to pass to \code{\link[=points]{points()}} when \code{partial=TRUE}.}
40 | 
41 | \item{add}{Add spline to existing plot? (default: FALSE)}
42 | 
43 | \item{...}{Further arguments to be passed to \code{plot()}. Note that these arguments also control the appearance of the lines.}
44 | }
45 | \description{
46 | Plots a spline curve for a single variable using a \code{grpreg} or \code{cv.grpreg} object for which an additive model was fit.
47 | }
48 | \details{
49 | \code{plot_spline()} takes a model fit using both the \code{\link[=grpreg]{grpreg()}} and \code{\link[=expand_spline]{expand_spline()}} functions and plots a spline curve for a given variable.
50 | }
51 | \examples{
52 | \dontshow{set.seed(1)}
53 | Data <- gen_nonlinear_data(n=1000)
54 | X <- expand_spline(Data$X)
55 | fit <- grpreg(X, Data$y)
56 | plot_spline(fit, "V02", lambda = 0.03)
57 | plot_spline(fit, "V02", which = c(10, 90))
58 | plot_spline(fit, "V02", lambda = 0.03, partial=TRUE)
59 | plot_spline(fit, "V02", lambda = 0.03, partial=TRUE, type='conditional')
60 | plot_spline(fit, "V02", lambda = 0.03, partial=TRUE, lwd=6, col='yellow',
61 |             points.par=list(pch=9, col='blue'))
62 | 
63 | op <- par(mfrow=c(3,2), mar=c(4.5, 4.5, 0.25, 0.25))
64 | for (i in 1:6) plot_spline(fit, sprintf("V\%02d", i), lambda = 0.03, partial=TRUE)
65 | par(op)
66 | 
67 | cvfit <- cv.grpreg(X, Data$y)
68 | plot_spline(cvfit, "V02")
69 | plot_spline(cvfit, "V02", partial=TRUE)
70 | }
71 | 


--------------------------------------------------------------------------------
/man/predict.grpreg.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/predict-cv.R, R/predict.R
  3 | \name{predict.cv.grpreg}
  4 | \alias{predict.cv.grpreg}
  5 | \alias{coef.cv.grpreg}
  6 | \alias{predict.grpreg}
  7 | \alias{coef.grpreg}
  8 | \title{Model predictions based on a fitted \code{grpreg} object}
  9 | \usage{
 10 | \method{predict}{cv.grpreg}(
 11 |   object,
 12 |   X,
 13 |   lambda = object$lambda.min,
 14 |   which = object$min,
 15 |   type = c("link", "response", "class", "coefficients", "vars", "groups", "nvars",
 16 |     "ngroups", "norm"),
 17 |   ...
 18 | )
 19 | 
 20 | \method{coef}{cv.grpreg}(object, lambda = object$lambda.min, which = object$min, ...)
 21 | 
 22 | \method{predict}{grpreg}(
 23 |   object,
 24 |   X,
 25 |   type = c("link", "response", "class", "coefficients", "vars", "groups", "nvars",
 26 |     "ngroups", "norm"),
 27 |   lambda,
 28 |   which = 1:length(object$lambda),
 29 |   ...
 30 | )
 31 | 
 32 | \method{coef}{grpreg}(object, lambda, which = 1:length(object$lambda), drop = TRUE, ...)
 33 | }
 34 | \arguments{
 35 | \item{object}{Fitted \code{"grpreg"} or \code{"cv.grpreg"} model object.}
 36 | 
 37 | \item{X}{Matrix of values at which predictions are to be made.  Not used for
 38 | \code{type="coefficients"}}
 39 | 
 40 | \item{lambda}{Values of the regularization parameter \code{lambda} at which
 41 | predictions are requested.  For values of \code{lambda} not in the sequence
 42 | of fitted models, linear interpolation is used.}
 43 | 
 44 | \item{which}{Indices of the penalty parameter \code{lambda} at which
 45 | predictions are required.  By default, all indices are returned.  If
 46 | \code{lambda} is specified, this will override \code{which}.}
 47 | 
 48 | \item{type}{Type of prediction: \code{"link"} returns the linear predictors;
 49 | \code{"response"} gives the fitted values; \code{"class"} returns the
 50 | binomial outcome with the highest probability; \code{"coefficients"} returns
 51 | the coefficients; \code{"vars"} returns the indices for the nonzero
 52 | coefficients; \code{"groups"} returns the indices for the groups with at
 53 | least one nonzero coefficient; \code{"nvars"} returns the number of nonzero
 54 | coefficients; \code{"ngroups"} returns the number of groups with at least
 55 | one nonzero coefficient; \code{"norm"} returns the L2 norm of the
 56 | coefficients in each group.}
 57 | 
 58 | \item{\dots}{Not used.}
 59 | 
 60 | \item{drop}{By default, if a single value of \code{lambda} is supplied, a
 61 | vector of coefficients is returned.  Set \code{drop=FALSE} if you wish to
 62 | have \code{coef} always return a matrix (see \code{\link{drop}}).}
 63 | }
 64 | \value{
 65 | The object returned depends on type.
 66 | }
 67 | \description{
 68 | Similar to other predict methods, this function returns predictions from a
 69 | fitted \code{"grpreg"} object.
 70 | }
 71 | \details{
 72 | \code{coef} and \code{predict} methods are provided for \code{"cv.grpreg"}
 73 | options as a convenience.  They simply call \code{coef.grpreg} and
 74 | \code{predict.grpreg} with \code{lambda} set to the value that minimizes the
 75 | cross-validation error.
 76 | }
 77 | \examples{
 78 | # Fit penalized logistic regression model to birthweight data
 79 | data(Birthwt)
 80 | X <- Birthwt$X
 81 | y <- Birthwt$low
 82 | group <- Birthwt$group
 83 | fit <- grpreg(X, y, group, penalty="grLasso", family="binomial")
 84 | 
 85 | # Coef and predict methods
 86 | coef(fit, lambda=.001)
 87 | predict(fit, X, type="link", lambda=.07)[1:10]
 88 | predict(fit, X, type="response", lambda=.07)[1:10]
 89 | predict(fit, X, type="class", lambda=.01)[1:15]
 90 | predict(fit, type="vars", lambda=.07)
 91 | predict(fit, type="groups", lambda=.07)
 92 | predict(fit, type="norm", lambda=.07)
 93 | 
 94 | # Coef and predict methods for cross-validation
 95 | cvfit <- cv.grpreg(X, y, group, family="binomial", penalty="grMCP")
 96 | coef(cvfit)
 97 | predict(cvfit, X)[1:10]
 98 | predict(cvfit, X, type="response")[1:10]
 99 | predict(cvfit, type="groups")
100 | }
101 | \seealso{
102 | \code{grpreg}
103 | }
104 | \author{
105 | Patrick Breheny
106 | }
107 | 


--------------------------------------------------------------------------------
/man/predict.grpsurv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict-surv.R
 3 | \name{predict.grpsurv}
 4 | \alias{predict.grpsurv}
 5 | \title{Model predictions for grpsurv objects}
 6 | \usage{
 7 | \method{predict}{grpsurv}(
 8 |   object,
 9 |   X,
10 |   type = c("link", "response", "survival", "hazard", "median", "norm", "coefficients",
11 |     "vars", "nvars", "groups", "ngroups"),
12 |   lambda,
13 |   which = 1:length(object$lambda),
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{object}{Fitted \code{grpsurv} model object.}
19 | 
20 | \item{X}{Matrix of values at which predictions are to be made. Not required for some \code{type} values.}
21 | 
22 | \item{type}{Type of prediction:
23 | \itemize{
24 | \item \code{link}: linear predictors
25 | \item \code{response}: risk (i.e., \code{exp(link)})
26 | \item \code{survival}: the estimated survival function
27 | \item \code{hazard}: the estimated cumulative hazard function
28 | \item \code{median}: median survival time
29 | \item The other options are all identical to their \code{\link[=grpreg]{grpreg()}} counterparts
30 | }}
31 | 
32 | \item{lambda}{Regularization parameter at which predictions are requested. For values of \code{lambda} not in the sequence of fitted models, linear interpolation is used.}
33 | 
34 | \item{which}{Indices of the penalty parameter \code{lambda} at which predictions are required. Default: all indices. If \code{lambda} is specified, this will override \code{which}.}
35 | 
36 | \item{...}{Not used.}
37 | }
38 | \value{
39 | The object returned depends on type.
40 | }
41 | \description{
42 | Similar to other predict methods, this function returns predictions from a fitted \code{grpsurv} object.
43 | }
44 | \details{
45 | Estimation of baseline survival function conditional on the estimated values of \code{beta} is carried out according to the method described in Chapter 4.3 of Kalbfleisch and Prentice.
46 | }
47 | \examples{
48 | data(Lung)
49 | X <- Lung$X
50 | 
51 | y <- Lung$y
52 | group <- Lung$group
53 |  
54 | fit <- grpsurv(X, y, group)
55 | coef(fit, lambda=0.05)
56 | head(predict(fit, X, type="link", lambda=0.05))
57 | head(predict(fit, X, type="response", lambda=0.05))
58 |  
59 | # Survival function
60 | S <- predict(fit, X[1,], type="survival", lambda=0.05)
61 | S(100)
62 | S <- predict(fit, X, type="survival", lambda=0.05)
63 | plot(S, xlim=c(0,200))
64 |  
65 | # Medians
66 | predict(fit, X[1,], type="median", lambda=0.05)
67 | M <- predict(fit, X, type="median")
68 | M[1:10, 1:10]
69 |  
70 | # Nonzero coefficients
71 | predict(fit, type="vars", lambda=c(0.1, 0.01))
72 | predict(fit, type="nvars", lambda=c(0.1, 0.01))
73 | }
74 | \references{
75 | \itemize{
76 | \item Kalbfleish JD and Prentice RL (2002). The Statistical Analysis of Failure Time Data, 2nd edition. Wiley.
77 | }
78 | }
79 | \seealso{
80 | \code{\link[=grpsurv]{grpsurv()}}
81 | }
82 | \author{
83 | Patrick Breheny
84 | }
85 | 


--------------------------------------------------------------------------------
/man/residuals.grpreg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/residuals.R
 3 | \name{residuals.grpreg}
 4 | \alias{residuals.grpreg}
 5 | \title{Extract residuals from a grpreg or grpsurv fit}
 6 | \usage{
 7 | \method{residuals}{grpreg}(object, lambda, which = 1:length(object$lambda), drop = TRUE, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{Object of class \code{grpreg} or \code{grpsurv}.}
11 | 
12 | \item{lambda}{Values of the regularization parameter at which residuals are requested (numeric vector). For values of lambda not in the sequence of fitted models, linear interpolation is used.}
13 | 
14 | \item{which}{Index of the penalty parameter at which residuals are requested (default = all indices). If \code{lambda} is specified, this take precedence over \code{which}.}
15 | 
16 | \item{drop}{By default, if a single value of lambda is supplied, a vector of residuals is returned (logical; default=\code{TRUE}). Set \code{drop=FALSE} if you wish to have the function always return a matrix (see \code{\link[=drop]{drop()}}).}
17 | 
18 | \item{...}{Not used.}
19 | }
20 | \description{
21 | Currently, only deviance residuals are supported.
22 | }
23 | \examples{
24 | data(Birthwt)
25 | X <- Birthwt$X
26 | y <- Birthwt$bwt
27 | group <- Birthwt$group
28 | fit <- grpreg(X, y, group, returnX=TRUE)
29 | residuals(fit)[1:5, 1:5]
30 | head(residuals(fit, lambda=0.1))
31 | }
32 | 


--------------------------------------------------------------------------------
/man/select.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/select.R
 3 | \name{select}
 4 | \alias{select}
 5 | \alias{select.grpreg}
 6 | \title{Select an value of lambda along a grpreg path}
 7 | \usage{
 8 | select(obj, ...)
 9 | 
10 | \method{select}{grpreg}(
11 |   obj,
12 |   criterion = c("BIC", "AIC", "GCV", "AICc", "EBIC"),
13 |   df.method = c("default", "active"),
14 |   smooth = FALSE,
15 |   ...
16 | )
17 | }
18 | \arguments{
19 | \item{obj}{A fitted grpreg object.}
20 | 
21 | \item{\dots}{For S3 method compatibility.}
22 | 
23 | \item{criterion}{The criterion by which to select the regularization
24 | parameter.  One of \code{"AIC"}, \code{"BIC"}, \code{"GCV"}, \code{"AICc"},
25 | or \code{"EBIC"}; default is \code{"BIC"}.}
26 | 
27 | \item{df.method}{How should effective model parameters be calculated?  One
28 | of: \code{"active"}, which counts the number of nonzero coefficients; or
29 | \code{"default"}, which uses the calculated \code{df} returned by
30 | \code{grpreg}.  Default is \code{"default"}.}
31 | 
32 | \item{smooth}{Applies a smoother to the information criteria before
33 | selecting the optimal value.}
34 | }
35 | \value{
36 | A list containing:
37 | \describe{
38 | \item{lambda}{The selected value of the regularization parameter, \code{lambda}.}
39 | \item{beta}{The vector of coefficients at the chosen value of \code{lambda}.}
40 | \item{df}{The effective number of model parameters at the chosen value of \code{lambda}.}
41 | \item{IC}{A vector of the calculated model selection criteria for each point on the regularization path.}
42 | }
43 | }
44 | \description{
45 | Selects a point along the regularization path of a fitted grpreg object
46 | according to the AIC, BIC, or GCV criteria.
47 | }
48 | \details{
49 | The criteria are defined as follows, where \eqn{L}{L} is the deviance (i.e,
50 | -2 times the log-likelihood), \eqn{\nu}{df} is the degrees of freedom, and
51 | \eqn{n}{n} is the sample size:
52 | 
53 | \deqn{AIC = L + 2\nu}{AIC = L + 2*df} \deqn{BIC = L + \log(n)\nu}{BIC = L +
54 | log(n)*df} \deqn{GCV = \frac{L}{(1-\nu/n)^2}}{GCV= L/((1-df/n)^2)}
55 | \deqn{AICc = AIC + 2\frac{\nu(\nu+1)}{n-\nu-1}}{AICc = AIC +
56 | 2*df*(df+1)/(n-df-1)} \deqn{EBIC = BIC + 2 \log{p \choose \nu}}{EBIC = BIC +
57 | 2*log(p choose df)}
58 | }
59 | \examples{
60 | data(Birthwt)
61 | X <- Birthwt$X
62 | y <- Birthwt$bwt
63 | group <- Birthwt$group
64 | fit <- grpreg(X, y, group, penalty="grLasso")
65 | select(fit)
66 | select(fit,crit="AIC",df="active")
67 | plot(fit)
68 | abline(v=select(fit)$lambda)
69 | par(mfrow=c(1,3))
70 | l <- fit$lambda
71 | xlim <- rev(range(l))
72 | plot(l, select(fit)$IC, xlim=xlim, pch=19, type="o", ylab="BIC")
73 | plot(l, select(fit,"AIC")$IC, xlim=xlim, pch=19, type="o",ylab="AIC")
74 | plot(l, select(fit,"GCV")$IC, xlim=xlim, pch=19, type="o",ylab="GCV")
75 | }
76 | \seealso{
77 | \code{\link[=grpreg]{grpreg()}}
78 | }
79 | 


--------------------------------------------------------------------------------
/man/summary.cv.grpreg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summary-cv-grpreg.R
 3 | \name{summary.cv.grpreg}
 4 | \alias{summary.cv.grpreg}
 5 | \alias{print.summary.cv.grpreg}
 6 | \title{Summarizing inferences based on cross-validation}
 7 | \usage{
 8 | \method{summary}{cv.grpreg}(object, ...)
 9 | 
10 | \method{print}{summary.cv.grpreg}(x, digits, ...)
11 | }
12 | \arguments{
13 | \item{object}{A \code{"cv.grpreg"} object.}
14 | 
15 | \item{\dots}{Further arguments passed to or from other methods.}
16 | 
17 | \item{x}{A \code{"summary.cv.grpreg"} object.}
18 | 
19 | \item{digits}{Number of digits past the decimal point to print out.  Can be
20 | a vector specifying different display digits for each of the five
21 | non-integer printed values.}
22 | }
23 | \value{
24 | \code{summary(cvfit)} produces an object with S3 class
25 | \code{"summary.cv.grpreg"}.  The class has its own print method and contains
26 | the following list elements: \item{penalty}{The penalty used by
27 | \code{grpreg}/\code{grpsurv}.} \item{model}{The type of model:
28 | \code{"linear"}, \code{"logistic"}, \code{"Poisson"}, \code{"Cox"}, etc.}
29 | \item{n}{Number of observations} \item{p}{Number of regression coefficients
30 | (not including the intercept).} \item{min}{The index of \code{lambda} with
31 | the smallest cross-validation error.} \item{lambda}{The sequence of
32 | \code{lambda} values used by \code{cv.grpreg}/\code{cv.grpsurv}.}
33 | \item{cve}{Cross-validation error (deviance).} \item{r.squared}{Proportion
34 | of variance explained by the model, as estimated by cross-validation.}
35 | \item{snr}{Signal to noise ratio, as estimated by cross-validation.}
36 | \item{sigma}{For linear regression models, the scale parameter estimate.}
37 | \item{pe}{For logistic regression models, the prediction error
38 | (misclassification error).}
39 | }
40 | \description{
41 | Summary method for \code{cv.grpreg} or \code{cv.grpsurv} objects
42 | }
43 | \examples{
44 | 
45 | # Birthweight data
46 | data(Birthwt)
47 | X <- Birthwt$X
48 | group <- Birthwt$group
49 | 
50 | # Linear regression
51 | y <- Birthwt$bwt
52 | cvfit <- cv.grpreg(X, y, group)
53 | summary(cvfit)
54 | 
55 | # Logistic regression
56 | y <- Birthwt$low
57 | cvfit <- cv.grpreg(X, y, group, family="binomial")
58 | summary(cvfit)
59 | 
60 | # Cox regression
61 | data(Lung)
62 | cvfit <- with(Lung, cv.grpsurv(X, y, group))
63 | summary(cvfit)
64 | }
65 | \seealso{
66 | \code{\link{grpreg}}, \code{\link{cv.grpreg}},
67 | \code{\link{cv.grpsurv}}, \code{\link{plot.cv.grpreg}}
68 | }
69 | \author{
70 | Patrick Breheny
71 | }
72 | 


--------------------------------------------------------------------------------
/pkgdown/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://pbreheny.github.io/grpreg/
 2 | 
 3 | template:
 4 |   bootstrap: 5
 5 |   light-switch: true
 6 |   math-rendering: katex
 7 |   includes:
 8 |     in_header: |
 9 |       <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.css" integrity="sha384-nB0miv6/jRmo5UMMR1wu3Gz6NLsoTkbqJghGIsx//Rlm+ZU03BU6SQNC66uf4l5+" crossorigin="anonymous">
10 |       <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.js" integrity="sha384-7zkQWkzuo3B5mTepMUcHkMB5jZaolc2xDwL6VFqjFALcbeS9Ggm/Yr2r3Dy4lfFg" crossorigin="anonymous"></script>
11 |       <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/contrib/auto-render.min.js" integrity="sha384-43gviWU0YVjaDtb/GhzOouOXtZMP/7XUzwPTstBeZFe/+rCMvRwr4yROQP43s0Xk" crossorigin="anonymous" onload="renderMathInElement(document.body);"></script>
12 | 
13 | figures:
14 |   dev: png
15 |   dpi: 96
16 |   fig.ext: png
17 |   fig.width: 5
18 |   fig.height: 5
19 |   fig.retina: 2
20 |   bg: "#fff"
21 | 
22 | navbar:
23 |   structure:
24 |     left: [intro, articles, reference, news]
25 |     right: [search, github]
26 | 
27 | home:
28 |   sidebar:
29 |     structure: [links, citation, authors, dev]
30 | 
31 | footer:
32 |   structure:
33 |     left: developed_by
34 |     right: last_updated
35 |   components:
36 |     last_updated: !expr paste("Last updated:", format(Sys.time(), "%B %d, %Y"))
37 | 
38 | articles:
39 |   - title: "Learn more"
40 |     navbar: ~
41 |     contents:
42 |       - articles/models
43 |       - articles/penalties
44 |       - articles/additive-models
45 |       - articles/adaptive-rescaling
46 | 
47 | reference:
48 | - title: Model fitting
49 |   contents:
50 |   - grpreg
51 |   - grpsurv
52 |   - gBridge
53 | - title: Model selection and cross-validation
54 |   contents:
55 |   - cv.grpreg
56 |   - cv.grpsurv
57 |   - plot.cv.grpreg
58 |   - summary.cv.grpreg
59 |   - AUC.cv.grpsurv
60 |   - select
61 | - title: Plotting and extracting model features
62 |   contents:
63 |   - logLik.grpreg
64 |   - predict.grpreg
65 |   - predict.grpsurv
66 |   - plot.grpreg
67 |   - plot.grpsurv.func
68 |   - residuals.grpreg
69 | - title: Utilities for additive models
70 |   contents:
71 |   - expand_spline
72 |   - gen_nonlinear_data
73 |   - plot_spline
74 | - title: Data sets
75 |   contents:
76 |   - Birthwt
77 |   - Lung
78 | - title: internal
79 |   contents:
80 |   - birthwt.grpreg
81 |   - grpreg-package
82 | 


--------------------------------------------------------------------------------
/pkgdown/extra.css:
--------------------------------------------------------------------------------
 1 | .dropdown-menu {
 2 |   --bs-dropdown-link-color: #aaa;
 3 | }
 4 | 
 5 | a {
 6 |   text-decoration:none;
 7 | }
 8 | 
 9 | a:hover {
10 |   color: #00e7a7;
11 | }
12 | 
13 | .navbar {
14 |   background: color-mix(in oklab, var(--bs-link-color) 50%, var(--bs-body-bg) 50%);
15 | }
16 | 
17 | [data-bs-theme="dark"] pre code { color: #f8f8f2 }
18 | [data-bs-theme="dark"] pre code span.al { color: #ff858f; background-color: #2a0f15; font-weight: bold } /* alert */
19 | [data-bs-theme="dark"] pre code span.an { color: #f8f8f2} /* annotation */
20 | [data-bs-theme="dark"] pre code span.at { color: #00e0e0} /* attribute */
21 | [data-bs-theme="dark"] pre code span.bn { color: #61cc33} /* base n */
22 | [data-bs-theme="dark"] pre code span.bu { color: #61cc33} /* builtin */
23 | [data-bs-theme="dark"] pre code span.cf { color: #df9af4; font-weight: bold} /* control flow */
24 | [data-bs-theme="dark"] pre code span.ch { color: #61cc33} /* character */
25 | [data-bs-theme="dark"] pre code span.cn { color: #61cc33} /* constant */
26 | [data-bs-theme="dark"] pre code span.r-in span.co { color: #6abafb; font-style: italic } /* comment */
27 | [data-bs-theme="dark"] pre code span.r-out.co { color: #ccc; font-style: italic } /* comment */
28 | [data-bs-theme="dark"] pre code span.cv { color: #61cc33} /* comment variable */
29 | [data-bs-theme="dark"] pre code span.do { color: #f8f8f2} /* documentation */
30 | [data-bs-theme="dark"] pre code span.dt { color: #df9af4} /* data type */
31 | [data-bs-theme="dark"] pre code span.dv { color: #61cc33} /* decimal value */
32 | [data-bs-theme="dark"] pre code span.er { color: #ff858f; text-decoration: underline} /* error */
33 | [data-bs-theme="dark"] pre code span.ex { color: #00e0e0; font-weight: bold} /* extension */
34 | [data-bs-theme="dark"] pre code span.fl { color: #61cc33} /* float */
35 | [data-bs-theme="dark"] pre code span.fu { color: #ff858f} /* function */
36 | [data-bs-theme="dark"] pre code span.im { color: #61cc33} /* import */
37 | [data-bs-theme="dark"] pre code span.in { color: #f8f8f2} /* information */
38 | [data-bs-theme="dark"] pre code span.kw { color: #df9af4; font-weight: bold} /* keyword */
39 | [data-bs-theme="dark"] pre code span.op { color: #df9af4} /* operator */
40 | [data-bs-theme="dark"] pre code span.ot { color: #00e0e0} /* other */
41 | [data-bs-theme="dark"] pre code span.pp { color: #f8f8f2} /* preprocessor */
42 | [data-bs-theme="dark"] pre code span.re { color: #00e0e0; background-color: #f8f8f2} /* regular expression? */
43 | [data-bs-theme="dark"] pre code span.sc { color: #61cc33} /* special character */
44 | [data-bs-theme="dark"] pre code span.ss { color: #61cc33} /* special string */
45 | [data-bs-theme="dark"] pre code span.st { color: #61cc33} /* string */
46 | [data-bs-theme="dark"] pre code span.va { color: #f8f8f2} /* variable */
47 | [data-bs-theme="dark"] pre code span.vs { color: #61cc33} /* verbatim string */
48 | [data-bs-theme="dark"] pre code span.wa { color: #dcc6e0} /* warning */
49 | [data-bs-theme="dark"] pre code .r-pr { color: #777 } /* prompt */
50 | 


--------------------------------------------------------------------------------
/src/gdfit_cox.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <string.h>
  3 | #include <Rinternals.h>
  4 | #include <R_ext/Rdynload.h>
  5 | #include <R.h>
  6 | #include <R_ext/Applic.h>
  7 | double crossprod(double *x, double *y, int n, int j);
  8 | double norm(double *x, int p);
  9 | double S(double z, double l);
 10 | double F(double z, double l1, double l2, double gamma);
 11 | double Fs(double z, double l1, double l2, double gamma);
 12 | 
 13 | // Group descent update -- cox
 14 | void gd_cox(double *b, double *x, double *r, double *eta, double v, int g,
 15 |             int *K1, int n, int l, int p, const char *penalty, double lam1,
 16 |             double lam2, double gamma, SEXP df, double *a, double *maxChange) {
 17 | 
 18 |   // Calculate z
 19 |   int K = K1[g+1] - K1[g];
 20 |   double *z = R_Calloc(K, double);
 21 |   for (int j=K1[g]; j<K1[g+1]; j++) z[j-K1[g]] = crossprod(x, r, n, j)/n + a[j];
 22 |   double z_norm = norm(z,K);
 23 | 
 24 |   // Update b
 25 |   double len;
 26 |   if (strcmp(penalty, "grLasso")==0) len = S(v * z_norm, lam1) / (v * (1 + lam2));
 27 |   if (strcmp(penalty, "grMCP")==0) len = F(v * z_norm, lam1, lam2, gamma) / v;
 28 |   if (strcmp(penalty, "grSCAD")==0) len = Fs(v * z_norm, lam1, lam2, gamma) / v;
 29 |   if (len != 0 || a[K1[g]] != 0) {
 30 |     // If necessary, update b and r
 31 |     for (int j=K1[g]; j<K1[g+1]; j++) {
 32 |       b[l*p+j] = len * z[j-K1[g]] / z_norm;
 33 |       double shift = b[l*p+j]-a[j];
 34 |       if (fabs(shift) > maxChange[0]) maxChange[0] = fabs(shift);
 35 |       for (int i=0; i<n; i++) {
 36 |         double si = shift*x[j*n+i];
 37 |         r[i] -= si;
 38 |         eta[i] += si;
 39 |       }
 40 |     }
 41 |   }
 42 | 
 43 |   // Update df
 44 |   if (len > 0) REAL(df)[l] += K * len / z_norm;
 45 |   free(z);
 46 | }
 47 | 
 48 | SEXP gdfit_cox(SEXP X_, SEXP d_, SEXP penalty_, SEXP K1_, SEXP K0_, SEXP lambda, SEXP alpha_, SEXP eps_, SEXP max_iter_, SEXP gamma_, SEXP group_multiplier, SEXP dfmax_, SEXP gmax_, SEXP warn_, SEXP user_) {
 49 | 
 50 |   // Lengths/dimensions
 51 |   int n = length(d_);
 52 |   int L = length(lambda);
 53 |   int J = length(K1_) - 1;
 54 |   int p = length(X_)/n;
 55 | 
 56 |   // Pointers
 57 |   double *X = REAL(X_);
 58 |   double *d = REAL(d_);
 59 |   const char *penalty = CHAR(STRING_ELT(penalty_, 0));
 60 |   int *K1 = INTEGER(K1_);
 61 |   int K0 = INTEGER(K0_)[0];
 62 |   double *lam = REAL(lambda);
 63 |   double alpha = REAL(alpha_)[0];
 64 |   double eps = REAL(eps_)[0];
 65 |   int max_iter = INTEGER(max_iter_)[0];
 66 |   int tot_iter = 0;
 67 |   double gamma = REAL(gamma_)[0];
 68 |   double *m = REAL(group_multiplier);
 69 |   int dfmax = INTEGER(dfmax_)[0];
 70 |   int gmax = INTEGER(gmax_)[0];
 71 |   int warn = INTEGER(warn_)[0];
 72 |   int user = INTEGER(user_)[0];
 73 | 
 74 |   // Outcome
 75 |   SEXP res, beta, Loss, iter, df, Eta;
 76 |   PROTECT(res = allocVector(VECSXP, 5));
 77 |   PROTECT(beta = allocVector(REALSXP, L*p));
 78 |   for (int j=0; j<(L*p); j++) REAL(beta)[j] = 0;
 79 |   PROTECT(iter = allocVector(INTSXP, L));
 80 |   for (int i=0; i<L; i++) INTEGER(iter)[i] = 0;
 81 |   PROTECT(df = allocVector(REALSXP, L));
 82 |   for (int i=0; i<L; i++) REAL(df)[i] = 0;
 83 |   PROTECT(Loss = allocVector(REALSXP, L));
 84 |   PROTECT(Eta = allocVector(REALSXP, L*n));
 85 |   for (int i=0; i<(L*n); i++) REAL(Eta)[i] = 0;
 86 |   double *b = REAL(beta);
 87 |   double *ETA = REAL(Eta);
 88 | 
 89 |   // Intermediate quantities
 90 |   double *a = R_Calloc(p, double);  // Beta from previous iteration
 91 |   for (int j=0; j<p; j++) a[j] = 0;
 92 |   double *r = R_Calloc(n, double);
 93 |   double *h = R_Calloc(n, double);  
 94 |   double *haz = R_Calloc(n, double);
 95 |   double *rsk = R_Calloc(n, double);  
 96 |   double *eta = R_Calloc(n, double);
 97 |   int *e = R_Calloc(J, int);
 98 |   for (int g=0; g<J; g++) e[g] = 0;
 99 |   int lstart, ng, nv, violations;
100 |   double shift, l1, l2, nullDev, v, s, maxChange;
101 | 
102 |   // Initialization
103 |   // If lam[0]=lam_max, skip lam[0] -- closed form sol'n available
104 |   rsk[n-1] = 1;
105 |   for (int i=n-2; i>=0; i--) rsk[i] = rsk[i+1] + 1;
106 |   nullDev = 0;
107 |   for (int i=0; i<n; i++) nullDev -= d[i]*log(rsk[i]);
108 |   if (user) {
109 |     lstart = 0;
110 |   } else {
111 |     lstart = 1;
112 |     REAL(Loss)[0] = nullDev;
113 |   }
114 | 
115 |   // Path
116 |   for (int l=lstart; l<L; l++) {
117 |     R_CheckUserInterrupt();
118 |     if (l != 0) {
119 |       // Assign a
120 |       for (int j=0; j<p; j++) a[j] = b[(l-1)*p+j];
121 | 
122 |       // Check dfmax, gmax
123 |       ng = 0;
124 |       nv = 0;
125 |       for (int g=0; g<J; g++) {
126 |         if (a[K1[g]] != 0) {
127 |           ng++;
128 |           nv = nv + (K1[g+1]-K1[g]);
129 |         }
130 |       }
131 |       if (ng > gmax || nv > dfmax || tot_iter == max_iter) {
132 |         for (int ll=l; ll<L; ll++) INTEGER(iter)[ll] = NA_INTEGER;
133 |         break;
134 |       }
135 |     }
136 | 
137 |     while (tot_iter < max_iter) {
138 |       while (tot_iter < max_iter) {
139 |         INTEGER(iter)[l]++;
140 |         tot_iter++;
141 |         REAL(Loss)[l] = 0;
142 |         REAL(df)[l] = 0;
143 |  
144 |         // Calculate haz, risk
145 |         for (int i=0; i<n; i++) haz[i] = exp(eta[i]);
146 |         rsk[n-1] = haz[n-1];
147 |         for (int i=n-2; i>=0; i--) {
148 |           rsk[i] = rsk[i+1] + haz[i];
149 |         }
150 |         for (int i=0; i<n; i++) {
151 |           REAL(Loss)[l] += d[i]*eta[i] - d[i]*log(rsk[i]);
152 |         }
153 |    
154 |         // Approximate L
155 |         h[0] = d[0]/rsk[0];
156 |         v = 1;
157 |         for (int i=1; i<n; i++) {
158 |           h[i] = h[i-1] + d[i]/rsk[i];
159 |         }
160 |         for (int i=0; i<n; i++) {
161 |           h[i] = h[i]*haz[i];
162 |           s = d[i] - h[i];
163 |           if (h[i]==0) r[i]=0;
164 |           else r[i] = s/v;
165 |         }
166 | 
167 |         // Check for saturation
168 |         if (REAL(Loss)[l]/nullDev < .01) {
169 |           if (warn) warning("Model saturated; exiting...");
170 |           for (int ll=l; ll<L; ll++) INTEGER(iter)[ll] = NA_INTEGER;
171 |           tot_iter = max_iter;
172 |           break;
173 |         }
174 | 
175 |         // Update unpenalized covariates
176 |         maxChange = 0;
177 |         for (int j=0; j<K0; j++) {
178 |           shift = crossprod(X, r, n, j)/n;
179 |           if (fabs(shift) > maxChange) maxChange = fabs(shift);
180 |           b[l*p+j] = shift + a[j];
181 |           for (int i=0; i<n; i++) {
182 |             double si = shift * X[n*j+i];
183 |             r[i] -= si;
184 |             eta[i] += si;
185 |           }
186 |           REAL(df)[l]++;
187 |         }
188 | 
189 |         // Update penalized groups
190 |         for (int g=0; g<J; g++) {
191 |           l1 = lam[l] * m[g] * alpha;
192 |           l2 = lam[l] * m[g] * (1-alpha);
193 |           if (e[g]) gd_cox(b, X, r, eta, v, g, K1, n, l, p, penalty, l1, l2,
194 |                            gamma, df, a, &maxChange);
195 |         }
196 | 
197 |         // Check convergence
198 |         for (int j=0; j<p; j++) a[j] = b[l*p+j];
199 |         if (maxChange < eps) break;
200 |       }
201 | 
202 |       // Scan for violations
203 |       violations = 0;
204 |       for (int g=0; g<J; g++) {
205 |         if (e[g]==0) {
206 |           l1 = lam[l] * m[g] * alpha;
207 |           l2 = lam[l] * m[g] * (1-alpha);
208 |           gd_cox(b, X, r, eta, v, g, K1, n, l, p, penalty, l1, l2, gamma, df, a, &maxChange);
209 |           if (b[l*p+K1[g]] != 0) {
210 |             e[g] = 1;
211 |             violations++;
212 |           }
213 |         }
214 |       }
215 | 
216 |       if (violations==0) {
217 |         for (int i=0; i<n; i++) ETA[l*n+i] = eta[i];
218 |         break;
219 |       }
220 |       for (int j=0; j<p; j++) a[j] = b[l*p+j];
221 |     }
222 |   }
223 |   free(h);
224 |   free(a);
225 |   free(r);
226 |   free(e);
227 |   free(eta);
228 |   free(haz);
229 |   free(rsk);
230 |   SET_VECTOR_ELT(res, 0, beta);
231 |   SET_VECTOR_ELT(res, 1, iter);
232 |   SET_VECTOR_ELT(res, 2, df);
233 |   SET_VECTOR_ELT(res, 3, Loss);
234 |   SET_VECTOR_ELT(res, 4, Eta);
235 |   UNPROTECT(6);
236 |   return(res);
237 | }
238 | 


--------------------------------------------------------------------------------
/src/grpreg_init.c:
--------------------------------------------------------------------------------
  1 | #include <R.h>
  2 | #include <Rinternals.h>
  3 | #include <stdlib.h>
  4 | #include <math.h>
  5 | #include <string.h>
  6 | #include <R_ext/Rdynload.h>
  7 | 
  8 | /* .Call calls */
  9 | extern SEXP gdfit_glm(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 10 | extern SEXP gdfit_cox(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 11 | extern SEXP gdfit_gaussian(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 12 | extern SEXP lcdfit_glm(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 13 | extern SEXP lcdfit_cox(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 14 | extern SEXP lcdfit_gaussian(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 15 | extern SEXP maxgrad(SEXP, SEXP, SEXP, SEXP);
 16 | extern SEXP maxprod(SEXP, SEXP, SEXP, SEXP);
 17 | extern SEXP standardize(SEXP);
 18 | 
 19 | static const R_CallMethodDef CallEntries[] = {
 20 |   {"gdfit_glm",                (DL_FUNC) &gdfit_glm,                16},
 21 |   {"gdfit_cox",                (DL_FUNC) &gdfit_cox,                15},
 22 |   {"gdfit_gaussian",           (DL_FUNC) &gdfit_gaussian,           15},
 23 |   {"lcdfit_glm",               (DL_FUNC) &lcdfit_glm,               18},
 24 |   {"lcdfit_cox",               (DL_FUNC) &lcdfit_cox,               17},
 25 |   {"lcdfit_gaussian",          (DL_FUNC) &lcdfit_gaussian,          16},
 26 |   {"maxgrad",                  (DL_FUNC) &maxgrad,                   4},
 27 |   {"maxprod",                  (DL_FUNC) &maxprod,                   4},
 28 |   {"standardize",              (DL_FUNC) &standardize,               1},
 29 |   {NULL, NULL, 0}
 30 | };
 31 | 
 32 | void R_init_grpreg(DllInfo *dll) {
 33 |   R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
 34 |   R_useDynamicSymbols(dll, FALSE);
 35 | }
 36 | 
 37 | // Cross product of the jth column of x with y
 38 | double crossprod(double *x, double *y, int n, int j) {
 39 |   double val = 0;
 40 |   int nn = n*j;
 41 |   for (int i=0; i<n; i++) val += x[nn+i] * y[i];
 42 |   return(val);
 43 | }
 44 | 
 45 | // Weighted cross product of y with jth column of x
 46 | double wcrossprod(double *X, double *y, double *w, int n, int j) {
 47 |   int nn = n*j;
 48 |   double val=0;
 49 |   for (int i=0;i<n;i++) val += X[nn+i]*y[i]*w[i];
 50 |   return(val);
 51 | }
 52 | 
 53 | // Weighted sum of squares of jth column of X
 54 | double wsqsum(double *X, double *w, int n, int j) {
 55 |   int nn = n*j;
 56 |   double val=0;
 57 |   for (int i=0;i<n;i++) val += w[i] * pow(X[nn+i], 2);
 58 |   return(val);
 59 | }
 60 | 
 61 | // Sum of x
 62 | double sum(double *x, int n) {
 63 |   double val = 0;
 64 |   for (int i=0; i<n; i++) val += x[i];
 65 |   return(val);
 66 | }
 67 | 
 68 | // Max of x
 69 | double max(double *x, int n) {
 70 |   double val = x[0];
 71 |   for (int i=1; i<n; i++) {
 72 |     if (x[i] > val) val = x[i];
 73 |   }
 74 |   return(val);
 75 | }
 76 | 
 77 | // Gaussian loss
 78 | double gLoss(double *r, int n) {
 79 |   double l = 0;
 80 |   for (int i=0;i<n;i++) l = l + pow(r[i],2);
 81 |   return(l);
 82 | }
 83 | 
 84 | // Pr(y=1) for binomial
 85 | double p_binomial(double eta) {
 86 |   if (eta > 10) {
 87 |     return(1);
 88 |   } else if (eta < -10) {
 89 |     return(0);
 90 |   } else {
 91 |     return(exp(eta)/(1+exp(eta)));
 92 |   }
 93 | }
 94 | 
 95 | // Euclidean norm
 96 | double norm(double *x, int p) {
 97 |   double x_norm = 0;
 98 |   for (int j=0; j<p; j++) x_norm = x_norm + pow(x[j],2);
 99 |   x_norm = sqrt(x_norm);
100 |   return(x_norm);
101 | }
102 | 
103 | // Soft-thresholding operator
104 | double S(double z, double l) {
105 |   if (z > l) return(z-l);
106 |   if (z < -l) return(z+l);
107 |   return(0);
108 | }
109 | 
110 | // Firm-thresholding operator
111 | double F(double z, double l1, double l2, double gamma) {
112 |   double s=0;
113 |   if (z > 0) s = 1;
114 |   else if (z < 0) s = -1;
115 |   if (fabs(z) <= l1) return(0);
116 |   else if (fabs(z) <= gamma*l1*(1+l2)) return(s*(fabs(z)-l1)/(1+l2-1/gamma));
117 |   else return(z/(1+l2));
118 | }
119 | 
120 | // SCAD-modified firm-thresholding operator
121 | double Fs(double z, double l1, double l2, double gamma) {
122 |   double s=0;
123 |   if (z > 0) s = 1;
124 |   else if (z < 0) s = -1;
125 |   if (fabs(z) <= l1) return(0);
126 |   else if (fabs(z) <= (l1*(1+l2)+l1)) return(s*(fabs(z)-l1)/(1+l2));
127 |   else if (fabs(z) <= gamma*l1*(1+l2)) return(s*(fabs(z)-gamma*l1/(gamma-1))/(1-1/(gamma-1)+l2));
128 |   else return(z/(1+l2));
129 | }
130 | 
131 | // MCP penalty
132 | double MCP(double theta, double l, double a) {
133 |   theta = fabs(theta);
134 |   if (theta <= a*l) return(l*theta - pow(theta,2)/(2*a));
135 |   else return(a*pow(l,2)/2);
136 | }
137 | 
138 | // MCP penalization rate
139 | double dMCP(double theta, double l, double a) {
140 |   theta = fabs(theta);
141 |   if (theta < a*l) return(l-theta/a);
142 |   else return(0);
143 | }
144 | 


--------------------------------------------------------------------------------
/src/maxprod.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <string.h>
 3 | #include <Rinternals.h>
 4 | #include <R_ext/Rdynload.h>
 5 | #include <R.h>
 6 | #include <R_ext/Applic.h>
 7 | double crossprod(double *X, double *y, int n, int j);
 8 | double norm(double *x, int p);
 9 | 
10 | SEXP maxprod(SEXP X_, SEXP y_, SEXP K_, SEXP m_) {
11 | 
12 |   // Declarations
13 |   int n = nrows(X_);
14 |   int J = length(K_)-1;
15 |   SEXP zmax;
16 |   PROTECT(zmax = allocVector(REALSXP, 1));
17 |   REAL(zmax)[0] = 0;
18 |   double *X = REAL(X_);
19 |   double *y = REAL(y_);
20 |   double *m = REAL(m_);
21 |   int *K = INTEGER(K_);
22 | 
23 |   for (int g=0; g<J; g++) {
24 |     for (int j=K[g]; j<K[g+1]; j++) {
25 |       double z = fabs(crossprod(X, y, n, j) / m[g]);
26 |       if (z > REAL(zmax)[0]) REAL(zmax)[0] = z;
27 |     }
28 |   }
29 | 
30 |   // Return
31 |   UNPROTECT(1);
32 |   return(zmax);
33 | }
34 | 
35 | SEXP maxgrad(SEXP X_, SEXP y_, SEXP K_, SEXP m_) {
36 | 
37 |   // Declarations
38 |   int n = nrows(X_);
39 |   int J = length(K_)-1;
40 |   SEXP zmax;
41 |   PROTECT(zmax = allocVector(REALSXP, 1));
42 |   REAL(zmax)[0] = 0;
43 |   double *X = REAL(X_);
44 |   double *y = REAL(y_);
45 |   double *m = REAL(m_);
46 |   int *K = INTEGER(K_);
47 | 
48 |   for (int g=0; g<J; g++) {
49 |     int Kg = K[g+1]-K[g];
50 |     double *Z = R_Calloc(Kg, double);
51 |     for (int j=K[g]; j<K[g+1]; j++) {
52 |       Z[j-K[g]] = crossprod(X, y, n, j);
53 |     }
54 |     double z = norm(Z, Kg) / m[g];
55 |     if (z > REAL(zmax)[0]) REAL(zmax)[0] = z;
56 |     free(Z);
57 |   }
58 | 
59 |   // Return
60 |   UNPROTECT(1);
61 |   return(zmax);
62 | }
63 | 


--------------------------------------------------------------------------------
/src/standardize.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <string.h>
 3 | #include <Rinternals.h>
 4 | #include <R_ext/Rdynload.h>
 5 | #include <R.h>
 6 | #include <R_ext/Applic.h>
 7 | 
 8 | SEXP standardize(SEXP X_) {
 9 |   // Declarations
10 |   int n = nrows(X_);
11 |   int p = ncols(X_);
12 |   SEXP XX_, c_, s_;
13 |   PROTECT(XX_ = allocMatrix(REALSXP, n, p));
14 |   for (int j=0; j<(n*p); j++) REAL(XX_)[j] = 0;
15 |   PROTECT(c_ = allocVector(REALSXP, p));
16 |   for (int j=0; j<p; j++) REAL(c_)[j] = 0;
17 |   PROTECT(s_ = allocVector(REALSXP, p));
18 |   for (int j=0; j<p; j++) REAL(s_)[j] = 0;
19 |   double *X = REAL(X_);
20 |   double *XX = REAL(XX_);
21 |   double *c = REAL(c_);
22 |   double *s = REAL(s_);
23 | 
24 |   for (int j=0; j<p; j++) {
25 |     // Center
26 |     c[j] = 0;
27 |     for (int i=0; i<n; i++) {
28 |       c[j] += X[j*n+i];
29 |     }
30 |     c[j] = c[j] / n;
31 |     for (int i=0; i<n; i++) XX[j*n+i] = X[j*n+i] - c[j];
32 | 
33 |     // Scale
34 |     s[j] = 0;
35 |     for (int i=0; i<n; i++) {
36 |       s[j] += pow(XX[j*n+i], 2);
37 |     }
38 |     s[j] = sqrt(s[j]/n);
39 |     for (int i=0; i<n; i++) XX[j*n+i] = XX[j*n+i]/s[j];
40 |   }
41 | 
42 |   // Return list
43 |   SEXP res;
44 |   PROTECT(res = allocVector(VECSXP, 3));
45 |   SET_VECTOR_ELT(res, 0, XX_);
46 |   SET_VECTOR_ELT(res, 1, c_);
47 |   SET_VECTOR_ELT(res, 2, s_);
48 |   UNPROTECT(4);
49 |   return(res);
50 | }
51 | 


--------------------------------------------------------------------------------
/tests/test.R:
--------------------------------------------------------------------------------
1 | if (requireNamespace("tinytest", quietly=TRUE)) {
2 |   if (identical(tolower(Sys.getenv("R_FORCE_TEST")), "true")) {
3 |     tinytest::test_package("grpreg", pattern="^[^_].*\\.[rR]$")
4 |   }
5 | }
6 | 


--------------------------------------------------------------------------------
/vignettes/articles/adaptive-rescaling.rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Adaptive rescaling"
 3 | author: "Patrick Breheny"
 4 | ---
 5 | 
 6 | Iteratively reweighted algorithms introduce scaling issues for nonconvex penalties. In **grpreg**, this is resolved through a technique called adaptive rescaling.
 7 | 
 8 | This issue is documented in more detail on the [ncvreg webpage](http://pbreheny.github.io/ncvreg/articles/web/adaptive-rescaling.html) and was originally proposed in the following manuscript:
 9 | 
10 | * [Breheny P and Huang J (2011).  Coordinate descent algorithms for nonconvex penalized regression, with applications to biological feature selection.  *Annals of Applied Statistics*, 5: 232--253](https://myweb.uiowa.edu/pbreheny/pdf/Breheny2011.pdf)
11 | 


--------------------------------------------------------------------------------
/vignettes/articles/additive-models.rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Additive models"
 3 | author: "Patrick Breheny"
 4 | ---
 5 |   
 6 | ```{r setup, include=FALSE}
 7 | library(grpreg)
 8 | knitr::opts_knit$set(aliases=c(h = 'fig.height', w = 'fig.width'))
 9 | knitr::opts_knit$set(global.par=TRUE)
10 | knitr::opts_chunk$set(comment="#", collapse=TRUE, cache=FALSE, tidy=FALSE)
11 | knitr::knit_hooks$set(small.mar = function(before, options, envir) {
12 |   if (before) par(mar = c(4, 4, .1, .1))
13 | })
14 | set.seed(1)
15 | ```
16 | ```{r par0, echo=FALSE, results='hide'}
17 | par(mar=c(5,5,0.5,0.5))
18 | ```
19 | 
20 | Starting in version 3.4, **grpreg** offers an interface for setting up, fitting, and visualizing additive models. Numeric features are automatically expanded using spline basis functions. The basic idea was first proposed by [Ravikumar et al. (2009)](https://doi.org/10.1111/j.1467-9868.2009.00718.x), who called it SPAM, for sparse additive models. The original proposal involved the group lasso penalty, but any of **grpreg**'s penalty functions can be used instead. The basic usage is illustrated below.
21 | 
22 | Let's start by generating some nonlinear data:
23 | 
24 | ```{r gen_data}
25 | Data <- gen_nonlinear_data(n=1000)
26 | Data$X[1:5, 1:5]
27 | dim(Data$X)
28 | ```
29 | 
30 | The matrix `Data$X` contains 16 numeric features, named `V01`, `V02`, and so on. Each of those features can be expanded via the `expand_spline()` function:
31 | 
32 | ```{r expand}
33 | X <- expand_spline(Data$X)
34 | X$X[1:5, 1:5]
35 | dim(X$X)
36 | head(X$group)
37 | ```
38 | 
39 | The resulting object is a list that contains the expanded matrix `X$X` and the group assignments `X$group`, along with some metadata needed by internal functions. Note that `X$X` now contains 48 columns -- each of the 16 numeric features (`V01`) has been expanded into a 3-column matrix (`V01_1`, `V01_2`, and `V01_3`). By default, `expand_spline()` uses natural cubic splines with three degrees of freedom, but consult its documentation for additional options.
40 | 
41 | This expanded matrix can now be passed to `grpreg()`:
42 | 
43 | ```{r fit}
44 | fit <- grpreg(X, Data$y)
45 | ```
46 | 
47 | Note that it is not necessary to pass grouping information in this case, as it is contained with the `X` object. At this point, all of the usual tools `coef()`, `predict()`, etc., can be used, as well as `plot.grpreg()`. However, **grpreg** also offers a function, `plot_spline()`, specific to additive models:
48 | 
49 | ```{r plot, w=5, h=4}
50 | plot_spline(fit, "V02", lambda = 0.03)
51 | ```
52 | 
53 | Partial residuals can be included in these plots as well:
54 | 
55 | ```{r plot_partial, w=5, h=4}
56 | plot_spline(fit, "V02", lambda = 0.03, partial=TRUE)
57 | ```
58 | 
59 | By default, these plots are centered such that at the mean of $x$ (where $x$ denotes the feature being plotted), the $y$ value is zero. Alternatively, if `type="conditional"` is specified, `plot_spline()` will construct a plot in which the vertical axis represents model predictions as $x$ varies and all other features are fixed at their mean value:
60 | 
61 | ```{r plot_conditional, w=5, h=4}
62 | plot_spline(fit, "V02", lambda = 0.03, partial=TRUE, type='conditional')
63 | ```
64 | 
65 | In comparing these two plots, note that the general contours are the same; the only difference is the value of the vertical axis. Here are the plots for the first 9 coefficients:
66 | 
67 | ```{r par1, echo=FALSE}
68 | op <- par(mfrow=c(3,3), mar=c(4.5, 4.5, 0.25, 0.25))
69 | ```
70 | 
71 | ```{r plot_all_6, w=5, h=5, out.width="90%"}
72 | for (i in 1:9) plot_spline(fit, sprintf("V%02d", i), lambda = 0.03, partial=TRUE, warn=FALSE)
73 | ```
74 | 
75 | ```{r par2, echo=FALSE}
76 | par(op)
77 | ```
78 | 
79 | In the generating model, variables 3 and 4 had a linear relationship with the outcome, variables 1, 2, 5, and 6 had nonlinear relationships, and all other variables were unrelated. The sparse additive model has captured this nicely.
80 | 
81 | These tools work with cross-validation as one would expect (by default plotting the fit that minimizes cross-validation error):
82 | 
83 | ```{r cvfit, w=5, h=4}
84 | cvfit <- cv.grpreg(X, Data$y)
85 | plot_spline(cvfit, "V02", partial=TRUE)
86 | ```
87 | 
88 | Finally, these tools work with survival and glm models as well. Here, all plots are returned on the linear predictor scale, and the residuals are deviance residuals.
89 | 


--------------------------------------------------------------------------------
/vignettes/articles/models.rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Models"
  3 | author: "Patrick Breheny"
  4 | ---
  5 | 
  6 | ```{r setup, include=FALSE}
  7 | library(grpreg)
  8 | knitr::opts_knit$set(aliases=c(h = 'fig.height', w = 'fig.width'))
  9 | knitr::opts_chunk$set(comment="#", collapse=TRUE, cache=FALSE, tidy=FALSE)
 10 | knitr::knit_hooks$set(small.mar = function(before, options, envir) {
 11 |   if (before) par(mar = c(4, 4, .1, .1))
 12 | })
 13 | ```
 14 | 
 15 | ```{r, child='_commands.tex', results='asis'}
 16 | ```
 17 | 
 18 | `grpreg` fits models that fall into the penalized likelihood framework.  Rather than estimating $\bb$ by maximizing the likelihood, in this framework we estimate $\bb$ by minimizing the objective function
 19 | $$
 20 | Q(\bb|\X, \y) = \frac{1}{n}L(\bb|\X,\y) + P_\lam(\bb),
 21 | $$
 22 | where the loss function $L(\bb|\X,\y)$ is the negative log-likelihood ($2$ times this quantity is known as the deviance), $P_\lam(\bb)$ is the penalty, and $\lam$ is a regularization parameter that controls the tradeoff between the two components.  This article describes the different loss models available in `grpreg`; see [penalties](penalties.html) for more information on the different penalties available.
 23 | 
 24 | # Gaussian (linear regression)
 25 | 
 26 | In linear regression, the loss function is simply the squared error loss:
 27 | $$
 28 | L(\bb|\X,\y) = \frac{1}{2} \norm{\y-\X\bb}_2^2;
 29 | $$
 30 | this is proportional to the negative log-likelihood for a model where $y$ follows a Gaussian distribution with constant variance and mean equal to $\X\bb$.
 31 | 
 32 | To fit a penalized linear regression model with `grpreg`:
 33 | 
 34 | ```{r, eval=FALSE}
 35 | fit <- grpreg(X, y, group)
 36 | ```
 37 | 
 38 | # Binomial (logistic regression)
 39 | 
 40 | In logistic regression, the loss function is:
 41 | $$
 42 | L(\bb|\X,\y) = -\sum_{i:y_i=1}\log\ph_i - \sum_{i:y_i=0}\log(1-\ph_i);
 43 | $$
 44 | this is the negative log-likelihood for a binomial distribution with probabilities $P(Y_i=1)=\ph_i$ given by:
 45 | $$
 46 | \ph_i = \frac{\exp(\eta_i)}{1+\eta_i},
 47 | $$
 48 | where $\be = \X\bb$ denotes the linear predictors.
 49 | 
 50 | To fit a penalized logistic regression model with `grpreg`:
 51 | 
 52 | ```{r, eval=FALSE}
 53 | fit <- grpreg(X, y, group, family='binomial')
 54 | ```
 55 | 
 56 | # Poisson
 57 | 
 58 | In Poisson regression, the loss function is:
 59 | $$
 60 | L(\bb|\X,\y) = 2\sum_i \left\{y_i\log y_i - y_i\log \mu_i + mu_i - y_i\right\};
 61 | $$
 62 | note that some of these terms are constant with respect to $\mu_i$ and can therefore be ignored during optimization.  Twice this loss is the deviance for a Poisson distribution $Y_i \sim \text{Pois}(\mh_i)$ with rate parameter given by:
 63 | $$
 64 | \mh_i = \exp(\eta_i).
 65 | $$
 66 | 
 67 | To fit a penalized Poisson regression model with `grpreg`:
 68 | ```{r, eval=FALSE}
 69 | fit <- grpreg(X, y, group, family='poisson')
 70 | ```
 71 | 
 72 | # Cox proportional hazards
 73 | 
 74 | The above models all fall into the category of distributions known as exponential families (hence the `family`) argument. `grpreg` also allows users to fit Cox proportional hazards models, although these models fall outside this framework and are therefore fit using a different function, `grpsurv`. In Cox regression, the negative log of the partial likelihood is
 75 | $$
 76 | L(\bb|\X,\y) = -2\sum_{j=1}^{m} d_j \eta_j + 2\sum_{j=1}^{m} d_j \log\left\{\sum_{i \in R_j} \exp(\eta_i)\right\},
 77 | $$
 78 | where $t_1 < t_2 < \ldots < t_m$ denotes an increasing list of unique failure times indexed by $j$ and $R_j$ denotes the set of observations still at risk at time $t_j$, known as the risk set.
 79 | 
 80 | The `Lung` data (see `?Lung` for more details) provides an example of time-to-event data that can be used with Cox regression.  Loading this data set into R,
 81 | 
 82 | ```{r}
 83 | data(Lung)
 84 | X <- Lung$X
 85 | y <- Lung$y
 86 | group <- Lung$group
 87 | ```
 88 | 
 89 | To fit a penalized Cox regression model,
 90 | 
 91 | ```{r}
 92 | fit <- grpsurv(X, y, group)
 93 | ```
 94 | 
 95 | As before, you can call `plot`, `coef`, `predict`, etc. on `fit`:
 96 | 
 97 | ```{r}
 98 | coef(fit, lambda=0.1)
 99 | ```
100 | 
101 | Cross-validation is similar:
102 | 
103 | ```{r seed}
104 | set.seed(1)
105 | ```
106 | 
107 | ```{r models-cox-cv, h=3, w=6, out.height=360, out.width=720}
108 | cvfit <- cv.grpsurv(X, y, group)
109 | par(mfrow=c(1,2))
110 | plot(cvfit, type='cve')
111 | plot(cvfit, type='rsq')
112 | ```
113 | 
114 | In addition to the quantities like coefficients and number of nonzero coefficients that `predict` returns for other types of models, `predict()` for an `grpsurv` object can also estimate the baseline hazard (using the Kalbfleish-Prentice method) and therefore, the survival function.  A method to plot the resulting function is also available:
115 | 
116 | ```{r models-cox-surv-single, h=4, w=4}
117 | S <- predict(fit, X[1,], type='survival', lambda=0.02)
118 | S(365)   # Estiamted survival at 1 year
119 | plot(S, xlim=c(0,200))
120 | ```
121 | 
122 | When multiple subjects are involved in the prediction:
123 | 
124 | ```{r models-cox-surv-multiple, h=4, w=4}
125 | S <- predict(fit, X, type='survival', lambda=0.02)
126 | S[[1]](365)  # Estimated survival at 1 year for subject 1
127 | S[[2]](365)  # Estimated survival at 1 year for subject 2
128 | plot(S, xlim=c(0,200))
129 | ```
130 | 


--------------------------------------------------------------------------------
/vignettes/articles/penalties.rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Penalties"
  3 | author: "Patrick Breheny"
  4 | ---
  5 | 
  6 | ```{r setup, include=FALSE}
  7 | library(grpreg)
  8 | knitr::opts_knit$set(aliases=c(h = 'fig.height', w = 'fig.width'))
  9 | knitr::opts_chunk$set(comment="#", collapse=TRUE, cache=FALSE, tidy=FALSE)
 10 | knitr::knit_hooks$set(small.mar = function(before, options, envir) {
 11 |   if (before) par(mar = c(4, 4, .1, .1))
 12 | })
 13 | ```
 14 | 
 15 | ```{r, child='_commands.tex', results='asis'}
 16 | ```
 17 | 
 18 | **grpreg** fits models that fall into the penalized likelihood framework, in which we estimate $\bb$ by minimizing the objective function
 19 | 
 20 | $$ Q(\bb|\X, \y) = L(\bb|\X,\y) + P_\lam(\bb), $$
 21 | 
 22 | where $L(\bb|\X,\y)$ is the loss (deviance) and $P_\lam(\bb)$ is the penalty.  This article describes the different penalties available in `grpreg`; see [models](models.html) for more information on the different loss functions available.
 23 | 
 24 | The following notation is used throughout (recall that the design matrix $\X$ is decomposed into groups $\X_1, \X_2, \ldots$:
 25 | 
 26 | * $\bb$ denotes the entire vector of regression coefficients
 27 | * $\bb_j$ denotes the vector of regression coefficients corresponding to the $j$th group
 28 | * $\beta_{jk}$ denotes $k$th regression coefficient in the $j$th group
 29 | * $\norm{\bb_j}_2$ denotes the Euclidean ($L_2$) norm of $\bb_j$: $\norm{x}_2 = \sqrt{x_1^2 + x_2^2 + \ldots}$
 30 | * $\norm{\bb_j}_1$ denotes the $L_1$ norm of $\beta_j$: $\norm{x}_1 = \abs{x_1} + \abs{x_2} + \ldots$
 31 | 
 32 | ## Group selection
 33 | 
 34 | These penalties are sparse at the group level -- the coefficients within a group will either all equal zero or none will equal zero.
 35 | 
 36 | If you use any of these penalties, please cite
 37 | 
 38 | * Breheny P and Huang J (2015). Group descent algorithms for nonconvex penalized linear and logistic regression models with grouped predictors. *Statistics and Computing*, **25**: 173-187. [[pdf](https://myweb.uiowa.edu/pbreheny/pdf/group-computing.pdf)].
 39 | 
 40 | The article goes into more mathematical details, discusses issues of standardization in the group sense, and provides references.
 41 | 
 42 | The group lasso was originally proposed in
 43 | 
 44 | * Yuan M. and Lin Y. (2006) Model selection and estimation in regression with grouped variables. *Journal of the Royal Statistical Society Series B*, **68**: 49-67.
 45 | 
 46 | ### Group lasso
 47 | 
 48 | ```{r, eval=FALSE}
 49 | grpreg(X, y, group, penalty="grLasso")
 50 | ```
 51 | 
 52 | $$ P(\beta) = \lam \sum_j \norm{\bb_j}_2 $$
 53 | 
 54 | ### Group MCP
 55 | 
 56 | ```{r, eval=FALSE}
 57 | grpreg(X, y, group, penalty="grMCP")
 58 | ```
 59 | 
 60 | $$ P(\bb) = \sum_j \textrm{MCP}_{\lam, \gamma}(\norm{\bb_j}_2) $$
 61 | 
 62 | where $\textrm{MCP}_{\lam, \gamma}(\cdot)$ denotes the MCP penalty with regularization parameter $\lam$ and tuning parameter $\gamma$.
 63 | 
 64 | ### Group SCAD
 65 | 
 66 | ```{r, eval=FALSE}
 67 | grpreg(X, y, group, penalty="grSCAD")
 68 | ```
 69 | 
 70 | $$ P(\bb) = \sum_j \textrm{SCAD}_{\lam, \gamma}(\norm{\bb_j}_2) $$
 71 | 
 72 | where $\textrm{SCAD}_{\lam, \gamma}(\cdot)$ denotes the SCAD penalty with regularization parameter $\lam$ and tuning parameter $\gamma$.
 73 | 
 74 | ## Bi-level selection
 75 | 
 76 | These penalties are sparse at both the group and individual levels.  In some groups, all coefficients will equal zero.  However, even if a group is selected, some of the coefficients within that group may still be zero.
 77 | 
 78 | ### Group exponential lasso (GEL)
 79 | 
 80 | ```{r, eval=FALSE}
 81 | grpreg(X, y, group, penalty="gel")
 82 | ```
 83 | 
 84 | $$ P(\beta) = \sum_j f_{\lam, \tau}(\norm{\bb_j}_1) $$
 85 | 
 86 | where $f(\cdot)$ denotes the exponential penalty with regularization parameter $\lam$ and tuning parameter $\tau$:
 87 | 
 88 | $$ f_{\lam, \tau}(\theta) = \frac{\lam^2}{\tau}\left\{1-\exp\left(-\frac{\tau\theta}{\lam}\right)\right\} $$
 89 | 
 90 | If you use the GEL penalty, please cite
 91 | 
 92 | * Breheny P (2015). The group exponential lasso for bi-level variable selection. *Biometrics*, **71**: 731-740. [[pdf](https://onlinelibrary.wiley.com/doi/10.1111/biom.12300/pdf)].
 93 | 
 94 | ### Composite MCP
 95 | 
 96 | ```{r, eval=FALSE}
 97 | grpreg(X, y, group, penalty="cMCP")
 98 | ```
 99 | 
100 | $$ P(\bb) = \sum_j \textrm{MCP}_{\lam, \gam_1} \left( \sum_k \textrm{MCP}_{\lam, \gam_2} (\abs{\beta_{jk}}) \right) $$
101 | 
102 | where $\textrm{MCP}_{\lam, \gamma}(\cdot)$ denotes the MCP penalty with regularization parameter $\lam$ and tuning parameter $\gamma$.
103 | 
104 | If you use the composite MCP penalty, please cite either of the following papers:
105 | 
106 | * Breheny P and Huang J (2009). Penalized methods for bi-level variable selection. *Statistics and Its Interface*, **2**: 369-380. [[pdf](https://myweb.uiowa.edu/pbreheny/pdf/Breheny2009.pdf)]
107 | * Huang J, Breheny P and Ma S (2012). A selective review of group selection in high-dimensional models. *Statistical Science*, **27**: 481-499. [[pdf](https://myweb.uiowa.edu/pbreheny/pdf/Huang2012.pdf)]
108 | 
109 | Please note that there is some confusion around the name "group MCP".  In the first paper above (2009), the composite MCP penalty was referred to as the "group MCP" penalty; the second paper (2012), in reviewing the various kinds of group penalties that had been proposed, recommended changing the name to "composite MCP" to avoid confusion with the "group MCP" [defined above](#group-mcp).
110 | 
111 | ### Group bridge
112 | 
113 | ```{r, eval=FALSE}
114 | gBridge(X, y, group)
115 | ```
116 | 
117 | $$ P(\bb) = \lambda \sum_j K_j^\gamma \norm{\bb_j}_1^\gamma $$
118 | 
119 | where $K_j$ denotes the number of elements in group $j$.
120 | 
121 | Please note that the group bridge penalty uses a very different algorithm from the other penalties.  Due to the nature of the penalty, model fitting is slower and less stable for group bridge models.  This is, in fact, the main motivation of the GEL penalty of Section~\ref{Sec:gel}: to offer a more tractable alternative to group bridge that has similar estimation properties but is much better behaved from a numerical optimization perspective.
122 | 
123 | If you use the group bridge penalty, please cite either of the following papers:
124 | 
125 | * Huang J, Ma S, Xie H and Zhang C (2009). A group bridge approach for variable selection. *Biometrika*, **96**: 339-355.
126 | * Breheny P and Huang J (2009). Penalized methods for bi-level variable selection. *Statistics and Its Interface*, **2**: 369-380. [[pdf](\href{https://myweb.uiowa.edu/pbreheny/publications/Breheny2009.pdf)]
127 | 
128 | The first paper proposed the method; the second paper proposed the algorithm that is used in the `grpreg` package.
129 | 
130 | ## Specifying an additional ridge component
131 | 
132 | For all of the penalties in the previous section, `grpreg` allows the specification of an additional ridge ($L_2$) component to the penalty.  This will set $\lam_1 = \alpha\lam$ and $\lam_2=(1-\alpha)\lam$, with the penalty given by
133 | 
134 | $$ P(\bb) = P_1(\bb|\lam_1) + \frac{\lam_2}{2}\norm{\bb}_2^2, $$
135 | 
136 | where $P_1$ is any of the penalties from the earlier sections.  So, for example
137 | 
138 | ```{r, eval=FALSE}
139 | grpreg(X, y, group, penalty="grLasso", alpha=0.75)
140 | ```
141 | 
142 | will fit a model with penalty
143 | 
144 | $$ P(\beta) = 0.75\lam \sum_j \norm{\bb_j}_2 + \frac{0.25\lam}{2}\norm{\bb}_2^2. $$
145 | 


--------------------------------------------------------------------------------
/vignettes/grpreg.rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Getting started with grpreg"
 3 | author: "Patrick Breheny"
 4 | output:
 5 |   rmarkdown::html_vignette:
 6 |     css: vignette.css
 7 | vignette: >
 8 |   %\VignetteIndexEntry{Getting started with grpreg}
 9 |   %\VignetteEngine{knitr::rmarkdown}
10 |   %\VignetteEncoding{UTF-8}
11 | ---
12 | 
13 | ```{r setup, include=FALSE}
14 | library(grpreg)
15 | set.seed(4)
16 | knitr::opts_knit$set(aliases=c(h = 'fig.height', w = 'fig.width'))
17 | knitr::opts_chunk$set(comment="#", collapse=TRUE, cache=FALSE, tidy=FALSE)
18 | knitr::knit_hooks$set(small.mar = function(before, options, envir) {
19 |   if (before) par(mar = c(4, 4, .1, .1))
20 | })
21 | ```
22 | 
23 | **[grpreg](https://pbreheny.github.io/grpreg/)** is an R package for fitting the regularization path of linear regression, GLM, and Cox regression models with grouped penalties.  This includes group selection methods such as group lasso, group MCP, and group SCAD as well as bi-level selection methods such as the group exponential lasso, the composite MCP, and the group bridge.  Utilities for carrying out cross-validation as well as post-fitting visualization, summarization, and prediction are also provided.
24 | 
25 | **grpreg** comes with a few example data sets; we'll look at `Birthwt`, which involves identifying risk factors associated with low birth weight.  The outcome can either be measured continuously (`bwt`, the birth weight in kilograms) or dichotomized (`low`) with respect to the newborn having a low birth weight (under 2.5 kg).
26 | 
27 | ```{r Birthwt}
28 | data(Birthwt)
29 | X <- Birthwt$X
30 | y <- Birthwt$bwt
31 | head(X)
32 | ```
33 | The original design matrix consisted of 8 variables, which have been expanded here into 16 features.  For example, there are multiple indicator functions for race ("other" being the reference group) and several continuous factors such as age have been expanded using polynomial contrasts (splines would give a similar structure).  Hence, the columns of the design matrix are *grouped*; this is what grpreg is designed for.  The grouping information is encoded as follows:
34 | 
35 | ```{r Birthwt_group}
36 | group <- Birthwt$group
37 | group
38 | ```
39 | 
40 | Here, groups are given as a factor; unique integer codes (which are essentially unlabeled factors) and character vectors are also allowed (character vectors do have some limitations, however, as the order of the groups is left unspecified, which can lead to ambiguity if you also try to set the `group.multiplier` option).  To fit a group lasso model to this data:
41 | 
42 | ```{r fit}
43 | fit <- grpreg(X, y, group, penalty="grLasso")
44 | ```
45 | 
46 | We can then plot the coefficient paths with
47 | 
48 | ```{r plot, h=4, w=6, small.mar=TRUE}
49 | plot(fit)
50 | ```
51 | 
52 | Notice that when a group enters the model (e.g., the green group), all of its coefficients become nonzero; this is what happens with group lasso models.  To see what the coefficients are, we could use the `coef` function:
53 | 
54 | ```{r coef}
55 | coef(fit, lambda=0.05)
56 | ```
57 | 
58 | Note that the number of physician's visits (`ftv`) is not included in the model at $\lambda=0.05$.
59 | 
60 | Typically, one would carry out cross-validation for the purposes of carrying out inference on the predictive accuracy of the model at various values of $\lambda$.
61 | 
62 | ```{r cvplot, h=5, w=6}
63 | cvfit <- cv.grpreg(X, y, group, penalty="grLasso")
64 | plot(cvfit)
65 | ```
66 | The coefficients corresponding to the value of $\lambda$ that minimizes the cross-validation error can be obtained via `coef`:
67 | 
68 | ```{r cv_coef}
69 | coef(cvfit)
70 | ```
71 | 
72 | Predicted values can be obtained via `predict`, which has a number of options:
73 | 
74 | ```{r predict}
75 | predict(cvfit, X=head(X))                 # Predictions for new observations
76 | predict(fit, type="ngroups", lambda=0.1)  # Number of nonzero groups
77 | predict(fit, type="groups", lambda=0.1)   # Identity of nonzero groups
78 | predict(fit, type="nvars", lambda=0.1)    # Number of nonzero coefficients
79 | predict(fit, type="vars", lambda=0.1)     # Identity of nonzero coefficients
80 | ```
81 | 
82 | Note that the original fit (to the full data set) is returned as `cvfit$fit`; it is not necessary to call both `grpreg` and `cv.grpreg` to analyze a data set.  Several other penalties are available, as are methods for logistic regression and Cox proportional hazards regression.
83 | 


--------------------------------------------------------------------------------
/vignettes/vignette.css:
--------------------------------------------------------------------------------
  1 | body {
  2 | background-color: #fff;
  3 | margin: 1em auto;
  4 | max-width: 700px;
  5 | overflow: visible;
  6 | padding-left: 2em;
  7 | padding-right: 2em;
  8 | margin-bottom: 50px;
  9 | font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
 10 | font-size: 14px;
 11 | line-height: 1.35;
 12 | }
 13 | table {
 14 | margin: 1em auto;
 15 | border-width: 1px;
 16 | border-color: #DDDDDD;
 17 | border-style: outset;
 18 | border-collapse: collapse;
 19 | }
 20 | table th {
 21 | border-width: 2px;
 22 | padding: 5px;
 23 | border-style: inset;
 24 | }
 25 | table td {
 26 | border-width: 1px;
 27 | border-style: inset;
 28 | line-height: 18px;
 29 | padding: 5px 5px;
 30 | }
 31 | table, table th, table td {
 32 | border-left-style: none;
 33 | border-right-style: none;
 34 | }
 35 | table thead, table .even {
 36 | background-color: #f7f7f7;
 37 | }
 38 | p {
 39 | margin: 0.5em 0;
 40 | }
 41 | blockquote {
 42 | background-color: #f6f6f6;
 43 | padding: 0.25em 0.75em;
 44 | }
 45 | hr {
 46 | border-style: solid;
 47 | border: none;
 48 | border-top: 1px solid #777;
 49 | margin: 28px 0;
 50 | }
 51 | dl {
 52 | margin-left: 0;
 53 | }
 54 | dl dd {
 55 | margin-bottom: 13px;
 56 | margin-left: 13px;
 57 | }
 58 | dl dt {
 59 | font-weight: bold;
 60 | }
 61 | ul {
 62 | margin-top: 0;
 63 | }
 64 | ul li {
 65 | list-style: circle outside;
 66 | }
 67 | ul ul {
 68 | margin-bottom: 0;
 69 | }
 70 | pre, code {
 71 | background-color: #f7f7f7;
 72 | border-radius: 3px;
 73 | color: #333;
 74 | white-space: pre-wrap; 
 75 | }
 76 | pre {
 77 | border: 1px solid #eee;
 78 | border-radius: 3px;
 79 | margin: 5px 0px 10px 0px;
 80 | padding: 10px;
 81 | }
 82 | pre:not([class]) {
 83 | background-color: #f7f7f7;
 84 | }
 85 | code {
 86 | font-family: Consolas, Monaco, 'Courier New', monospace;
 87 | font-size: 85%;
 88 | }
 89 | p > code, li > code {
 90 | padding: 2px 0;
 91 | }
 92 | .figure {
 93 | text-align: center;
 94 | }
 95 | img {
 96 | background-color: #FFFFFF;
 97 | padding: 2px;
 98 | display: block;
 99 | margin: auto;
100 | }
101 | h1 {
102 | margin-top: 0;
103 | font-size: 35px;
104 | line-height: 40px;
105 | }
106 | h2 {
107 | border-bottom: 4px solid #f7f7f7;
108 | padding-top: 10px;
109 | padding-bottom: 2px;
110 | font-size: 145%;
111 | }
112 | h3 {
113 | border-bottom: 2px solid #f7f7f7;
114 | padding-top: 10px;
115 | font-size: 120%;
116 | }
117 | h4 {
118 | border-bottom: 1px solid #f7f7f7;
119 | margin-left: 8px;
120 | font-size: 105%;
121 | }
122 | h5, h6 {
123 | border-bottom: 1px solid #ccc;
124 | font-size: 105%;
125 | }
126 | a {
127 | color: #008dff;
128 | text-decoration: none;
129 | }
130 | a:hover {
131 | color: #00b500;
132 | }
133 | 
134 | a.sourceLine:hover {
135 | text-decoration: none;
136 | }
137 | code span.dt {color: #000000;}
138 | code span.fl {color: #1514b5;}
139 | code span.fu {color: #000000;} 
140 | code span.ch,code span.st {color: #036a07;} 
141 | code span.kw {color: #375f84;}
142 | code span.co {color: #888888;} 
143 | code span.message { color: black; font-weight: bolder;}
144 | code span.error { color: orange; font-weight: bolder;}
145 | code span.warning { color: #6A0366; font-weight: bolder;}
146 | 


--------------------------------------------------------------------------------