├── vignettes
    ├── unnamed-chunk-1-1.png
    └── usage_of_the_ordinis_package.Rmd
├── .gitignore
├── R
    ├── ordinis.R
    ├── RcppExports.R
    ├── utils.R
    ├── ordinis_methods.R
    └── ordinis_call.R
├── .Rbuildignore
├── src
    ├── Makevars
    ├── Makevars.win
    ├── utils.h
    ├── CoordBase.h
    ├── utils.cpp
    ├── RcppExports.cpp
    ├── ordinis_dense.cpp
    ├── DataStd.h
    ├── ordinis_dense_glm.cpp
    ├── CoordGaussianDense.h
    └── CoordLogisticDense.h
├── ordinis.Rproj
├── DESCRIPTION
├── NAMESPACE
├── man
    ├── logLik.Rd
    ├── predict.cv.ordinis.Rd
    ├── predict.ordinis.Rd
    ├── plot.Rd
    ├── cv.ordinis.Rd
    └── ordinis.Rd
├── README.Rmd
└── README.md


/vignettes/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaredhuling/ordinis/HEAD/vignettes/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | .Rapp.history
 6 | src/*.o
 7 | src/*.so
 8 | src/*.dll
 9 | inst/doc
10 | .DS_Store
11 | 


--------------------------------------------------------------------------------
/R/ordinis.R:
--------------------------------------------------------------------------------
1 | #' @importFrom grDevices rainbow
2 | #' @importFrom graphics mtext par strwidth title
3 | #' @import Matrix
4 | #' @import Rcpp
5 | #' @import foreach
6 | #' @useDynLib ordinis, .registration = TRUE
7 | NULL
8 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^.*\.Rhistory$
 4 | ^cran-comments.md$
 5 | ^NEWS.md$
 6 | README.Rmd
 7 | README.html
 8 | ^\.travis\.yml$
 9 | ^/\.gitattributes$
10 | ^.*\docs$
11 | ^\_pkgdown\.yml$
12 | docs
13 | ^_pkgdown\.yml$
14 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
 1 | ## Use the R_HOME indirection to support installations of multiple R version
 2 | 
 3 | ## KEEP
 4 | #PKG_CPPFLAGS = -I${R_HOME}/library/Rcpp/include \
 5 | #               -I${R_HOME}/library/RcppEigen/include  -I. -DNDEBUG
 6 | 
 7 | 
 8 | PKG_CXXFLAGS = -DNDEBUG
 9 | PKG_LIBS = `$(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()"` $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
10 | 
11 | CXX_STD = CXX11
12 | 


--------------------------------------------------------------------------------
/ordinis.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 4
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
 1 | 
 2 | ## OLD
 3 | #PKG_CPPFLAGS = -I${R_HOME}/library/RcppEigen/include \
 4 | #-I${R_HOME}/library/Rcpp/include -I. -DNDEBUG
 5 | 
 6 | #PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
 7 | ## old
 8 | 
 9 | 
10 | #PKG_LIBS = $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" --vanilla -e "Rcpp:::LdFlags()") $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
11 | PKG_LIBS = $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" --vanilla -e "Rcpp:::LdFlags()") $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
12 | 
13 | 
14 | PKG_CPPFLAGS = -I. -DNDEBUG
15 | 
16 | CXX_STD = CXX11
17 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: ordinis
 2 | Type: Package
 3 | Title: Fits penalized regression models with coordinate descent
 4 | Version: 0.1.0
 5 | Authors@R: c(
 6 |     person("Jared", "Huling",  role = c("aut", "cre"), email = "jaredhuling@gmail.com")
 7 |     )
 8 | Description: More about what it does (maybe more than one line)
 9 |     Use four spaces when indenting paragraphs within the Description.
10 | License: GPL-2
11 | Depends:
12 |     methods,
13 |     Matrix
14 | Encoding: UTF-8
15 | LazyData: true
16 | Imports: 
17 |     Rcpp (>= 0.12.16),
18 |     foreach
19 | LinkingTo: 
20 |     Rcpp, 
21 |     RcppEigen
22 | RoxygenNote: 6.1.0
23 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | coord_ordinis_dense_cpp <- function(x, y, weights, lambda, penalty_factor, limits, nlambda, lmin_ratio, standardize, intercept, opts) {
 5 |     .Call(`_ordinis_coord_ordinis_dense_cpp`, x, y, weights, lambda, penalty_factor, limits, nlambda, lmin_ratio, standardize, intercept, opts)
 6 | }
 7 | 
 8 | coord_ordinis_dense_glm_cpp <- function(x, y, weights, offset, lambda, penalty_factor, limits, nlambda, lmin_ratio, standardize, intercept, glm_fam, opts) {
 9 |     .Call(`_ordinis_coord_ordinis_dense_glm_cpp`, x, y, weights, offset, lambda, penalty_factor, limits, nlambda, lmin_ratio, standardize, intercept, glm_fam, opts)
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/src/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ordinis_UTILS_H
 2 | #define _ordinis_UTILS_H
 3 | 
 4 | 
 5 | #include <Rcpp.h>
 6 | #include <RcppEigen.h>
 7 | #include <vector>
 8 | #include <functional>
 9 | #include <algorithm>
10 | #include <iostream>
11 | #include <cmath>
12 | #include <numeric>
13 | 
14 | 
15 | using Eigen::MatrixXd;
16 | using Eigen::ArrayXd;
17 | using Eigen::VectorXd;
18 | using Eigen::VectorXi;
19 | 
20 | typedef Eigen::SparseVector<double> SparseVector;
21 | 
22 | double threshold(double num);
23 | 
24 | VectorXd cumsum(const VectorXd& x);
25 | 
26 | VectorXd cumsumrev(const VectorXd& x);
27 | 
28 | 
29 | bool stopRule(const VectorXd& cur, const VectorXd& prev, const double& tolerance);
30 | 
31 | bool stopRule(const SparseVector& cur, const SparseVector& prev, const double& tolerance);
32 | 
33 | bool stopRule(SparseVector& cur, SparseVector& prev, const double& tolerance);
34 | 
35 | 
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(logLik,ordinis)
 4 | S3method(plot,cv.ordinis)
 5 | S3method(plot,ordinis)
 6 | S3method(predict,cdbinomial)
 7 | S3method(predict,cdgaussian)
 8 | S3method(predict,cv.ordinis)
 9 | S3method(predict,ordinis)
10 | export(cv.ordinis)
11 | export(ordinis)
12 | import(Matrix)
13 | import(Rcpp)
14 | import(foreach)
15 | importFrom(grDevices,rainbow)
16 | importFrom(graphics,abline)
17 | importFrom(graphics,axis)
18 | importFrom(graphics,matplot)
19 | importFrom(graphics,mtext)
20 | importFrom(graphics,par)
21 | importFrom(graphics,points)
22 | importFrom(graphics,segments)
23 | importFrom(graphics,strwidth)
24 | importFrom(graphics,title)
25 | importFrom(methods,as)
26 | importFrom(stats,approx)
27 | importFrom(stats,predict)
28 | importFrom(stats,quantile)
29 | importFrom(stats,runif)
30 | importFrom(stats,weighted.mean)
31 | useDynLib(ordinis, .registration = TRUE)
32 | 


--------------------------------------------------------------------------------
/man/logLik.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ordinis_methods.R
 3 | \name{logLik.ordinis}
 4 | \alias{logLik.ordinis}
 5 | \title{log likelihood function for fitted ordinis objects}
 6 | \usage{
 7 | \method{logLik}{ordinis}(object, REML = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{fitted "ordinis" model object.}
11 | 
12 | \item{REML}{an optional logical value. If \code{TRUE} the
13 | restricted log-likelihood is returned, else, if \code{FALSE},
14 | the log-likelihood is returned. Defaults to \code{FALSE}.}
15 | 
16 | \item{...}{not used}
17 | }
18 | \description{
19 | log likelihood function for fitted ordinis objects
20 | }
21 | \examples{
22 | set.seed(123)
23 | n.obs <- 200
24 | n.vars <- 500
25 | 
26 | true.beta <- c(runif(15, -0.25, 0.25), rep(0, n.vars - 15))
27 | x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)
28 | y <- rnorm(n.obs, sd = 3) + x \%*\% true.beta
29 | 
30 | fit <- ordinis(x = x, y = y)
31 | 
32 | logLik(fit)
33 | 
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/man/predict.cv.ordinis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ordinis_methods.R
 3 | \name{predict.cv.ordinis}
 4 | \alias{predict.cv.ordinis}
 5 | \title{Prediction function for fitted cross validation ordinis objects}
 6 | \usage{
 7 | \method{predict}{cv.ordinis}(object, newx, s = c("lambda.min",
 8 |   "lambda.1se"), ...)
 9 | }
10 | \arguments{
11 | \item{object}{fitted \code{"cv.ordinis"} model object}
12 | 
13 | \item{newx}{Matrix of new values for \code{x} at which predictions are to be made. Must be a matrix; can be sparse as in the
14 | \code{CsparseMatrix} objects of the \pkg{Matrix} package
15 | This argument is not used for \code{type = c("coefficients","nonzero")}}
16 | 
17 | \item{s}{Value(s) of the penalty parameter lambda at which predictions are required. Default is the entire sequence used to create
18 | the model. For \code{predict.cv.ordinis()}, can also specify \code{"lambda.1se"} or \code{"lambda.min"} for best lambdas estimated by cross validation}
19 | 
20 | \item{...}{used to pass the other arguments for predict.ordinis}
21 | }
22 | \value{
23 | An object depending on the type argument
24 | }
25 | \description{
26 | Prediction function for fitted cross validation ordinis objects
27 | }
28 | \examples{
29 | set.seed(123)
30 | n.obs <- 1e4
31 | n.vars <- 100
32 | n.obs.test <- 1e3
33 | 
34 | true.beta <- c(runif(15, -0.5, 0.5), rep(0, n.vars - 15))
35 | 
36 | x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)
37 | y <- rnorm(n.obs, sd = 3) + x \%*\% true.beta
38 | x.test <- matrix(rnorm(n.obs.test * n.vars), n.obs.test, n.vars)
39 | y.test <- rnorm(n.obs.test, sd = 3) + x.test \%*\% true.beta
40 | 
41 | fit <- cv.ordinis(x = x, y = y,
42 |                   gamma = 1.4,
43 |                   nlambda = 10)
44 | 
45 | 
46 | preds.best <- predict(fit, newx = x.test, type = "response")
47 | 
48 | apply(preds.best, 2, function(x) mean((y.test - x) ^ 2))
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/man/predict.ordinis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ordinis_methods.R
 3 | \name{predict.ordinis}
 4 | \alias{predict.ordinis}
 5 | \title{Prediction method for coord lasso fitted objects}
 6 | \usage{
 7 | \method{predict}{ordinis}(object, newx, s = NULL, type = c("link",
 8 |   "response", "coefficients", "nonzero", "class"), ...)
 9 | }
10 | \arguments{
11 | \item{object}{fitted "ordinis" model object}
12 | 
13 | \item{newx}{Matrix of new values for \code{x} at which predictions are to be made. Must be a matrix; can be sparse as in the
14 | \code{CsparseMatrix} objects of the \pkg{Matrix} package.
15 | This argument is not used for \code{type=c("coefficients","nonzero")}}
16 | 
17 | \item{s}{Value(s) of the penalty parameter lambda at which predictions are required. Default is the entire sequence used to create
18 | the model.}
19 | 
20 | \item{type}{Type of prediction required. \code{type = "link"} gives the linear predictors for the \code{"binomial"} model; for \code{"gaussian"} models it gives the fitted values.
21 | \code{type = "response"} gives the fitted probabilities for \code{"binomial"}. \code{type = "coefficients"} computes the coefficients at the requested values for \code{s}.
22 | \code{type = "class"} applies only to \code{"binomial"} and produces the class label corresponding to the maximum probability.}
23 | 
24 | \item{...}{not used}
25 | }
26 | \value{
27 | An object depending on the type argument
28 | }
29 | \description{
30 | Prediction method for coord lasso fitted objects
31 | }
32 | \examples{
33 | set.seed(123)
34 | n.obs <- 1e4
35 | n.vars <- 100
36 | n.obs.test <- 1e3
37 | 
38 | true.beta <- c(runif(15, -0.5, 0.5), rep(0, n.vars - 15))
39 | 
40 | x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)
41 | y <- rnorm(n.obs, sd = 3) + x \%*\% true.beta
42 | x.test <- matrix(rnorm(n.obs.test * n.vars), n.obs.test, n.vars)
43 | y.test <- rnorm(n.obs.test, sd = 3) + x.test \%*\% true.beta
44 | 
45 | fit <- ordinis(x = x, y = y, nlambda = 10)
46 | 
47 | preds.lasso <- predict(fit, newx = x.test, type = "response")
48 | 
49 | apply(preds.lasso, 2, function(x) mean((y.test - x) ^ 2))
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/man/plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ordinis_methods.R
 3 | \name{plot.ordinis}
 4 | \alias{plot.ordinis}
 5 | \alias{plot.cv.ordinis}
 6 | \title{Plot method for ordinis fitted objects}
 7 | \usage{
 8 | \method{plot}{ordinis}(x, xvar = c("loglambda", "lambda", "norm"),
 9 |   labsize = 0.6, xlab = iname, ylab = NULL, main = x$penalty,
10 |   xlim = NULL, n.print = 10L, ...)
11 | 
12 | \method{plot}{cv.ordinis}(x, sign.lambda = 1, ...)
13 | }
14 | \arguments{
15 | \item{x}{fitted "ordinis" model object or fitted "cv.ordinis" model object}
16 | 
17 | \item{xvar}{What is on the X-axis. \code{"penalty"} plots against the penalty value applied to the coefficients, \code{"lambda"} against the log-lambda sequence}
18 | 
19 | \item{labsize}{size of labels for variable names. If labsize = 0, then no variable names will be plotted}
20 | 
21 | \item{xlab}{label for x-axis}
22 | 
23 | \item{ylab}{label for y-axis}
24 | 
25 | \item{main}{main title for plot}
26 | 
27 | \item{xlim}{numeric vectors of length 2, giving the \code{x} and \code{y} coordinates ranges.}
28 | 
29 | \item{n.print}{scalar integer for the number of times along the regularization path to print the number
30 | of nonzero coefficients. If set to a negative value, the number of nonzero coefficients will not be printed.}
31 | 
32 | \item{...}{other graphical parameters for the plot}
33 | 
34 | \item{sign.lambda}{Either plot against log(lambda) (default) or its negative if \code{sign.lambda = -1}.}
35 | }
36 | \description{
37 | Plot method for ordinis fitted objects
38 | 
39 | Plot method for fitted two mountains cv objects
40 | }
41 | \examples{
42 | set.seed(123)
43 | n.obs <- 100
44 | n.vars <- 1000
45 | 
46 | true.beta <- c(runif(5, 0.1, 1) * (2 * rbinom(5, 1, 0.5) - 1), rep(0, n.vars - 5))
47 | 
48 | x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)
49 | y <- rnorm(n.obs, sd = 2) + x \%*\% true.beta
50 | 
51 | fit <- ordinis(x = x, y = y, penalty = c("mcp"))
52 | 
53 | plot(fit)
54 | 
55 | set.seed(123)
56 | n.obs <- 100
57 | n.vars <- 200
58 | 
59 | true.beta <- c(runif(15, -0.5, 0.5), rep(0, n.vars - 15))
60 | 
61 | x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)
62 | y <- rnorm(n.obs, sd = 3) + x \%*\% true.beta
63 | 
64 | fit <- cv.ordinis(x = x, y = y, gamma = 1.4)
65 | 
66 | plot(fit)
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/vignettes/usage_of_the_ordinis_package.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Usage of the ordinis Package"
 3 | author: "Jared Huling"
 4 | date: "`r Sys.Date()`"
 5 | output: 
 6 |     rmarkdown::html_vignette:
 7 |         fig_width: 7
 8 |         fig_height: 5
 9 |         toc: true
10 |         toc_depth: 3
11 |         number_sections: true
12 |         self_contained: true
13 | vignette: >
14 |   %\VignetteIndexEntry{Usage of the ordinis Package}
15 |   %\VignetteEngine{knitr::rmarkdown}
16 | ---
17 | 
18 | 
19 | 
20 | # Introduction to `ordinis`
21 | 
22 | 
23 | ## Fitting penalized regression models
24 | 
25 | 
26 | ```{r fit_model}
27 | 
28 | library(ordinis)
29 | 
30 | 
31 | set.seed(1)
32 | n <- 100
33 | p <- 1000
34 | m <- 10
35 | b <- matrix(c(runif(m, min = -1), rep(0, p - m)))
36 | x <- matrix(rnorm(n * p, sd = 3), n, p)
37 | y <- drop(x %*% b) + rnorm(n)
38 | 
39 | # fitting a linear model with an MCP penalty
40 | mod <- ordinis(x, y, 
41 |                penalty = "mcp",
42 |                gamma = 1.4)  # additional tuning param for MCP
43 | 
44 | plot(mod)
45 | 
46 | # now applying positivity constraints to parameters and adding a ridge penalty
47 | mod2 <- ordinis(x, y, 
48 |                 penalty = "mcp",
49 |                 gamma = 1.4,  # additional tuning param for MCP
50 |                 lower.limits = rep(0, p), # force all coefficients to be positive
51 |                 penalty.factor = c(0, 0, rep(1, p-2)), # don't penalize first two coefficients
52 |                 alpha = 0.5)  # use elastic net with alpha = 0.95
53 | 
54 | plot(mod2)
55 | 
56 | # use cross validation to select lambda tuning parameter
57 | cvmod <- cv.ordinis(x, y, penalty = "mcp", gamma = 1.4)
58 | 
59 | plot(cvmod)
60 | 
61 | # return coefficients with min cv-MSE
62 | coef <- predict(cvmod, type = "coef", s = "lambda.min")
63 | 
64 | # use cross validation to select lambda tuning parameter
65 | cvmodl1 <- cv.ordinis(x, y, penalty = "lasso")
66 | 
67 | # return coefficients with min cv-MSE
68 | coefl1 <- predict(cvmodl1, type = "coef", s = "lambda.min")
69 | 
70 | plot(cvmodl1)
71 | 
72 | # number selected by MCP
73 | sum(coef[-1] != 0)
74 | 
75 | # number selected by lasso
76 | sum(coefl1[-1] != 0)
77 | 
78 | # MCP
79 | round(coef[2:11], 3)
80 | 
81 | # truth
82 | round(b[1:10], 3)
83 | 
84 | # lasso
85 | round(coefl1[2:11], 3)
86 | ```
87 | 


--------------------------------------------------------------------------------
/man/cv.ordinis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ordinis_call.R
 3 | \name{cv.ordinis}
 4 | \alias{cv.ordinis}
 5 | \title{CV Fitting for A Lasso Model Using the Coordinate Descent Algorithm}
 6 | \usage{
 7 | cv.ordinis(x, y, lambda = numeric(0), gamma = 3.7,
 8 |   type.measure = c("mse", "deviance", "class", "auc", "mae"),
 9 |   nfolds = 10, foldid = NULL, grouped = TRUE, keep = FALSE,
10 |   parallel = FALSE, ...)
11 | }
12 | \arguments{
13 | \item{x}{The design matrix}
14 | 
15 | \item{y}{The response vector}
16 | 
17 | \item{lambda}{A user provided sequence of \eqn{\lambda}. If set to
18 | \code{NULL}, the program will calculate its own sequence
19 | according to \code{nlambda} and \code{lambda_min_ratio},
20 | which starts from \eqn{\lambda_0} (with this
21 | \eqn{\lambda} all coefficients will be zero) and ends at
22 | \code{lambda0 * lambda_min_ratio}, containing
23 | \code{nlambda} values equally spaced in the log scale.
24 | It is recommended to set this parameter to be \code{NULL}
25 | (the default).}
26 | 
27 | \item{gamma}{bandwidth for MCP/SCAD}
28 | 
29 | \item{type.measure}{measure to evaluate for cross-validation. The default is \code{type.measure = "deviance"},
30 | which uses squared-error for gaussian models (a.k.a \code{type.measure = "mse"} there), deviance for logistic
31 | regression. \code{type.measure = "class"} applies to binomial only. \code{type.measure = "auc"} is for two-class logistic
32 | regression only. \code{type.measure = "mse"} or \code{type.measure = "mae"} (mean absolute error) can be used by all models;
33 | they measure the deviation from the fitted mean to the response.}
34 | 
35 | \item{nfolds}{number of folds for cross-validation. default is 10. 3 is smallest value allowed.}
36 | 
37 | \item{foldid}{an optional vector of values between 1 and nfold specifying which fold each observation belongs to.}
38 | 
39 | \item{grouped}{Like in \pkg{glmnet}, this is an experimental argument, with default \code{TRUE}, and can be ignored by most users.
40 | For all models, this refers to computing nfolds separate statistics, and then using their mean and estimated standard
41 | error to describe the CV curve. If \code{grouped = FALSE}, an error matrix is built up at the observation level from the
42 | predictions from the \code{nfold} fits, and then summarized (does not apply to \code{type.measure = "auc"}).}
43 | 
44 | \item{keep}{If \code{keep = TRUE}, a prevalidated list of arrasy is returned containing fitted values for each observation
45 | and each value of lambda for each model. This means these fits are computed with this observation and the rest of its
46 | fold omitted. The folid vector is also returned. Default is \code{keep = FALSE}}
47 | 
48 | \item{parallel}{If TRUE, use parallel foreach to fit each fold. Must register parallel before hand, such as \pkg{doMC}.}
49 | 
50 | \item{...}{other parameters to be passed to \code{"ordinis"} function}
51 | }
52 | \description{
53 | Cross validation for linear models with the lasso penalty
54 | 
55 | where \eqn{n} is the sample size and \eqn{\lambda} is a tuning
56 | parameter that controls the sparsity of \eqn{\beta}.
57 | }
58 | \examples{
59 | set.seed(123)
60 | n = 100
61 | p = 1000
62 | b = c(runif(10, min = 0.2, max = 1), rep(0, p - 10))
63 | x = matrix(rnorm(n * p, sd = 3), n, p)
64 | y = drop(x \%*\% b) + rnorm(n)
65 | 
66 | ## fit lasso model with 100 tuning parameter values
67 | res <- cv.ordinis(x, y)
68 | 
69 | 
70 | }
71 | 


--------------------------------------------------------------------------------
/src/CoordBase.h:
--------------------------------------------------------------------------------
  1 | #ifndef COORDBASE_H
  2 | #define COORDBASE_H
  3 | 
  4 | #include <RcppEigen.h>
  5 | #include "utils.h"
  6 | 
  7 | 
  8 | template<typename VecTypeBeta>
  9 | class CoordBase
 10 | {
 11 | protected:
 12 | 
 13 |     typedef Eigen::SparseVector<int> SparseVectori;
 14 | 
 15 |     const int nvars;      // dimension of beta
 16 |     const int nobs;       // number of rows
 17 | 
 18 |     VecTypeBeta beta;        // parameters to be optimized
 19 |     VecTypeBeta beta_prev;   // auxiliary parameters
 20 |     VecTypeBeta beta_prev_irls;   // auxiliary parameters
 21 |     SparseVectori eligible_set;
 22 |     VectorXi ineligible_set;
 23 |     double loss;
 24 | 
 25 |     double tol;           // tolerance for convergence
 26 | 
 27 |     int nzero;
 28 | 
 29 |     double deviance, deviance_prev, null_dev;
 30 | 
 31 |     //virtual void next_beta(VecTypeBeta &res, VectorXi &eligible) = 0;
 32 |     virtual void next_beta(VecTypeBeta &res, SparseVectori &eligible) = 0;
 33 |     virtual void next_beta(VecTypeBeta &res, VectorXi &eligible)      = 0;
 34 | 
 35 |     virtual bool converged()
 36 |     {
 37 |         return (stopRule(beta, beta_prev, tol));
 38 |     }
 39 | 
 40 |     bool converged_irls()
 41 |     {
 42 |         //return (stopRule(beta, beta_prev_irls, tol));
 43 |         if (std::abs(deviance - deviance_prev) / (0.1 + std::abs(deviance)) < tol)
 44 |         {
 45 |             return true;
 46 |         } else
 47 |         {
 48 |             return false;
 49 |         }
 50 |     }
 51 | 
 52 | 
 53 |     void print_row(int iter)
 54 |     {
 55 |         const char sep = ' ';
 56 | 
 57 |         Rcpp::Rcout << std::left << std::setw(7)  << std::setfill(sep) << iter;
 58 |         Rcpp::Rcout << std::endl;
 59 |     }
 60 |     void print_footer()
 61 |     {
 62 |         const int width = 80;
 63 |         Rcpp::Rcout << std::string(width, '=') << std::endl << std::endl;
 64 |     }
 65 | 
 66 | public:
 67 |     CoordBase(int n_, int p_,
 68 |               double tol_ = 1e-6) :
 69 |     nvars(p_), nobs(n_),
 70 |     beta(p_), beta_prev(p_), beta_prev_irls(p_), // allocate space but do not set values
 71 |     eligible_set(p_), ineligible_set(p_),
 72 |     tol(tol_)
 73 |     {}
 74 | 
 75 |     virtual ~CoordBase() {}
 76 | 
 77 |     void update_beta(SparseVectori &eligible)
 78 |     {
 79 |         //VecTypeBeta newbeta(nvars);
 80 |         next_beta(beta, eligible);
 81 |         //beta.swap(newbeta);
 82 |     }
 83 | 
 84 |     void update_beta(VectorXi &eligible)
 85 |     {
 86 |         //VecTypeBeta newbeta(nvars);
 87 |         next_beta(beta, eligible);
 88 |         //beta.swap(newbeta);
 89 |     }
 90 | 
 91 |     int solve(int maxit)
 92 |     {
 93 |         int i;
 94 | 
 95 |         nzero = 0;
 96 | 
 97 |         for(i = 0; i < maxit; ++i)
 98 |         {
 99 |             beta_prev = beta;
100 |             // old_y = dual_y;
101 |             //std::copy(dual_y.data(), dual_y.data() + dim_dual, old_y.data());
102 | 
103 |             update_beta(eligible_set);
104 | 
105 |             // print_row(i);
106 | 
107 |             if(converged())
108 |                 break;
109 | 
110 |         }
111 | 
112 |         // print_footer();
113 | 
114 |         return i + 1;
115 |     }
116 | 
117 |     virtual VecTypeBeta get_beta() { return beta; }
118 |     virtual int get_nzero()        { return nzero; }
119 |     double get_intercept()         { return 0.0; }
120 |     double get_null_dev()          { return null_dev; }
121 |     double get_dev()               { return deviance; }
122 |     virtual double get_loss()      { return loss; }
123 | };
124 | 
125 | 
126 | 
127 | #endif // COORDBASE_H
128 | 
129 | 


--------------------------------------------------------------------------------
/src/utils.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #include "utils.h"
  4 | 
  5 | double threshold(double num)
  6 | {
  7 |   return num > 0 ? num : 0;
  8 | }
  9 | 
 10 | // computes cumulative sum of vector x
 11 | VectorXd cumsum(const VectorXd& x) {
 12 |   const int n(x.size());
 13 |   VectorXd cmsm(n);
 14 |   //cmsm = std::partial_sum(x.data(), x.data() + x.size(), cmsm.data(), std::plus<double>());
 15 |   cmsm(0) = x(0);
 16 | 
 17 |   for (int i = 1; i < n; i++) {
 18 |     cmsm(i) = cmsm(i-1) + x(i);
 19 |   }
 20 |   return (cmsm);
 21 | }
 22 | 
 23 | // computes reverse cumulative sum of vector x
 24 | VectorXd cumsumrev(const VectorXd& x) {
 25 |   const int n(x.size());
 26 |   VectorXd cmsm(n);
 27 |   //std::reverse(x.data(), x.data() + x.size());
 28 |   //cmsm = std::partial_sum(x.data(), x.data() + x.size(), cmsm.data(), std::plus<double>());
 29 |   cmsm(0) = x(n-1);
 30 |   //double tmpsum = 0;
 31 | 
 32 |   for (int i = 1; i < n; i++) {
 33 |     //tmpsum += cmsm(i-1);
 34 |     cmsm(i) = cmsm(i-1) + x(n-i-1);
 35 |   }
 36 |   std::reverse(cmsm.data(), cmsm.data() + cmsm.size());
 37 |   return (cmsm);
 38 | }
 39 | 
 40 | 
 41 | 
 42 | 
 43 | bool stopRule(const VectorXd& cur, const VectorXd& prev, const double& tolerance) {
 44 |   for (unsigned i = 0; i < cur.rows(); i++) {
 45 |     if ( (cur(i) != 0 && prev(i) == 0) || (cur(i) == 0 && prev(i) != 0) ) {
 46 |       return 0;
 47 |     }
 48 |     if (cur(i) != 0 && prev(i) != 0 &&
 49 |         std::abs( (cur(i) - prev(i)) / prev(i)) > tolerance) {
 50 |   	  return 0;
 51 |     }
 52 |   }
 53 |   return 1;
 54 | }
 55 | 
 56 | bool stopRule(const SparseVector& cur, const SparseVector& prev, const double& tolerance)
 57 | {
 58 | 
 59 |     /*
 60 |     for (unsigned i = 0; i < cur.size(); i++) {
 61 |         if ( (cur.coeff(i) != 0 && prev.coeff(i) == 0) || (cur.coeff(i) == 0 && prev.coeff(i) != 0) ) {
 62 |             return 0;
 63 |         }
 64 |         if (cur.coeff(i) != 0 && prev.coeff(i) != 0 &&
 65 |             std::abs( (cur.coeff(i) - prev.coeff(i)) / prev.coeff(i)) > tolerance) {
 66 |             return 0;
 67 |         }
 68 |     }
 69 |      */
 70 | 
 71 |     const int n1 = cur.nonZeros(), n2 = prev.nonZeros();
 72 |     const double *v1_val = cur.valuePtr(), *v2_val = prev.valuePtr();
 73 |     const int *v1_ind = cur.innerIndexPtr(), *v2_ind = prev.innerIndexPtr();
 74 | 
 75 |     double eps = 1e-4;
 76 |     int i1 = 0, i2 = 0;
 77 |     while(i1 < n1 && i2 < n2)
 78 |     {
 79 |         if(v1_ind[i1] == v2_ind[i2])
 80 |         {
 81 |             if (std::abs( (v1_val[i1] - v2_val[i2]) / (std::abs(v2_val[i2]) + eps) ) > tolerance)
 82 |             {
 83 |                 return 0;
 84 |             } else
 85 |             {
 86 |                 i1++;
 87 |                 i2++;
 88 |             }
 89 |         } else
 90 |         {
 91 |             return 0;
 92 |         }
 93 |     }
 94 |     return 1;
 95 | }
 96 | 
 97 | bool stopRule(SparseVector& cur, SparseVector& prev, const double& tolerance)
 98 | {
 99 | 
100 |     /*
101 |     for (unsigned i = 0; i < cur.size(); i++) {
102 |     if ( (cur.coeff(i) != 0 && prev.coeff(i) == 0) || (cur.coeff(i) == 0 && prev.coeff(i) != 0) ) {
103 |     return 0;
104 |     }
105 |     if (cur.coeff(i) != 0 && prev.coeff(i) != 0 &&
106 |     std::abs( (cur.coeff(i) - prev.coeff(i)) / prev.coeff(i)) > tolerance) {
107 |     return 0;
108 |     }
109 |     }
110 |     */
111 | 
112 |     const int n1 = cur.nonZeros(), n2 = prev.nonZeros();
113 |     const double *v1_val = cur.valuePtr(), *v2_val = prev.valuePtr();
114 |     const int *v1_ind = cur.innerIndexPtr(), *v2_ind = prev.innerIndexPtr();
115 | 
116 | 
117 |     double eps = 1e-4;
118 | 
119 |     int i1 = 0, i2 = 0;
120 |     while(i1 < n1 && i2 < n2)
121 |     {
122 |         if(v1_ind[i1] == v2_ind[i2])
123 |         {
124 |             if (std::abs( (v1_val[i1] - v2_val[i2]) / (std::abs(v2_val[i2]) + eps)) > tolerance)
125 |             {
126 |                 return 0;
127 |             } else
128 |             {
129 |                 i1++;
130 |                 i2++;
131 |             }
132 |         } else
133 |         {
134 |             return 0;
135 |         }
136 |     }
137 |     return 1;
138 | }
139 | 
140 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | #include <RcppEigen.h>
 5 | #include <Rcpp.h>
 6 | 
 7 | using namespace Rcpp;
 8 | 
 9 | // coord_ordinis_dense_cpp
10 | List coord_ordinis_dense_cpp(Rcpp::NumericMatrix x, Rcpp::NumericVector y, Rcpp::NumericVector weights, Rcpp::NumericVector lambda, Rcpp::NumericVector penalty_factor, Rcpp::NumericMatrix limits, int nlambda, double lmin_ratio, bool standardize, bool intercept, List opts);
11 | RcppExport SEXP _ordinis_coord_ordinis_dense_cpp(SEXP xSEXP, SEXP ySEXP, SEXP weightsSEXP, SEXP lambdaSEXP, SEXP penalty_factorSEXP, SEXP limitsSEXP, SEXP nlambdaSEXP, SEXP lmin_ratioSEXP, SEXP standardizeSEXP, SEXP interceptSEXP, SEXP optsSEXP) {
12 | BEGIN_RCPP
13 |     Rcpp::RObject rcpp_result_gen;
14 |     Rcpp::RNGScope rcpp_rngScope_gen;
15 |     Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type x(xSEXP);
16 |     Rcpp::traits::input_parameter< Rcpp::NumericVector >::type y(ySEXP);
17 |     Rcpp::traits::input_parameter< Rcpp::NumericVector >::type weights(weightsSEXP);
18 |     Rcpp::traits::input_parameter< Rcpp::NumericVector >::type lambda(lambdaSEXP);
19 |     Rcpp::traits::input_parameter< Rcpp::NumericVector >::type penalty_factor(penalty_factorSEXP);
20 |     Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type limits(limitsSEXP);
21 |     Rcpp::traits::input_parameter< int >::type nlambda(nlambdaSEXP);
22 |     Rcpp::traits::input_parameter< double >::type lmin_ratio(lmin_ratioSEXP);
23 |     Rcpp::traits::input_parameter< bool >::type standardize(standardizeSEXP);
24 |     Rcpp::traits::input_parameter< bool >::type intercept(interceptSEXP);
25 |     Rcpp::traits::input_parameter< List >::type opts(optsSEXP);
26 |     rcpp_result_gen = Rcpp::wrap(coord_ordinis_dense_cpp(x, y, weights, lambda, penalty_factor, limits, nlambda, lmin_ratio, standardize, intercept, opts));
27 |     return rcpp_result_gen;
28 | END_RCPP
29 | }
30 | // coord_ordinis_dense_glm_cpp
31 | List coord_ordinis_dense_glm_cpp(Rcpp::NumericMatrix x, Rcpp::NumericVector y, Rcpp::NumericVector weights, Rcpp::NumericVector offset, Rcpp::NumericVector lambda, Rcpp::NumericVector penalty_factor, Rcpp::NumericMatrix limits, int nlambda, double lmin_ratio, bool standardize, bool intercept, bool glm_fam, List opts);
32 | RcppExport SEXP _ordinis_coord_ordinis_dense_glm_cpp(SEXP xSEXP, SEXP ySEXP, SEXP weightsSEXP, SEXP offsetSEXP, SEXP lambdaSEXP, SEXP penalty_factorSEXP, SEXP limitsSEXP, SEXP nlambdaSEXP, SEXP lmin_ratioSEXP, SEXP standardizeSEXP, SEXP interceptSEXP, SEXP glm_famSEXP, SEXP optsSEXP) {
33 | BEGIN_RCPP
34 |     Rcpp::RObject rcpp_result_gen;
35 |     Rcpp::RNGScope rcpp_rngScope_gen;
36 |     Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type x(xSEXP);
37 |     Rcpp::traits::input_parameter< Rcpp::NumericVector >::type y(ySEXP);
38 |     Rcpp::traits::input_parameter< Rcpp::NumericVector >::type weights(weightsSEXP);
39 |     Rcpp::traits::input_parameter< Rcpp::NumericVector >::type offset(offsetSEXP);
40 |     Rcpp::traits::input_parameter< Rcpp::NumericVector >::type lambda(lambdaSEXP);
41 |     Rcpp::traits::input_parameter< Rcpp::NumericVector >::type penalty_factor(penalty_factorSEXP);
42 |     Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type limits(limitsSEXP);
43 |     Rcpp::traits::input_parameter< int >::type nlambda(nlambdaSEXP);
44 |     Rcpp::traits::input_parameter< double >::type lmin_ratio(lmin_ratioSEXP);
45 |     Rcpp::traits::input_parameter< bool >::type standardize(standardizeSEXP);
46 |     Rcpp::traits::input_parameter< bool >::type intercept(interceptSEXP);
47 |     Rcpp::traits::input_parameter< bool >::type glm_fam(glm_famSEXP);
48 |     Rcpp::traits::input_parameter< List >::type opts(optsSEXP);
49 |     rcpp_result_gen = Rcpp::wrap(coord_ordinis_dense_glm_cpp(x, y, weights, offset, lambda, penalty_factor, limits, nlambda, lmin_ratio, standardize, intercept, glm_fam, opts));
50 |     return rcpp_result_gen;
51 | END_RCPP
52 | }
53 | 
54 | static const R_CallMethodDef CallEntries[] = {
55 |     {"_ordinis_coord_ordinis_dense_cpp", (DL_FUNC) &_ordinis_coord_ordinis_dense_cpp, 11},
56 |     {"_ordinis_coord_ordinis_dense_glm_cpp", (DL_FUNC) &_ordinis_coord_ordinis_dense_glm_cpp, 13},
57 |     {NULL, NULL, 0}
58 | };
59 | 
60 | RcppExport void R_init_ordinis(DllInfo *dll) {
61 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
62 |     R_useDynamicSymbols(dll, FALSE);
63 | }
64 | 


--------------------------------------------------------------------------------
/man/ordinis.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/ordinis_call.R
  3 | \name{ordinis}
  4 | \alias{ordinis}
  5 | \title{Fitting Lasso-penalized Using the Coordinate Descent Algorithm}
  6 | \usage{
  7 | ordinis(x, y, weights = rep(1, NROW(y)), offset = NULL,
  8 |   family = NULL, penalty = c("lasso", "alasso", "mcp", "scad"),
  9 |   lambda = numeric(0), alpha = 1, gamma = ifelse(penalty == "scad",
 10 |   3.7, 1.4), penalty.factor = NULL, upper.limits = rep(Inf, NCOL(x)),
 11 |   lower.limits = rep(-Inf, NCOL(x)), nlambda = 100L,
 12 |   lambda.min.ratio = NULL, intercept = TRUE, standardize = TRUE,
 13 |   dfmax = nvars, maxit = NULL, tol = NULL, maxit.irls = 25L,
 14 |   tol.irls = 0.001)
 15 | }
 16 | \arguments{
 17 | \item{x}{The design matrix}
 18 | 
 19 | \item{y}{The response vector}
 20 | 
 21 | \item{weights}{a vector of weights of length equal to length of \code{y}. \code{weights} are NOT standardized or scaled; the user must
 22 | do this if desired}
 23 | 
 24 | \item{offset}{A vector of length \code{nobs} that is included in the linear predictor}
 25 | 
 26 | \item{family}{family of underlying model. Only "gaussian" for continuous responses is available now}
 27 | 
 28 | \item{penalty}{a string indicating which penalty to use. \code{"lasso"}, \code{"MCP"}, and \code{"SCAD"}
 29 | are available}
 30 | 
 31 | \item{lambda}{A user provided sequence of \eqn{\lambda}. If set to
 32 | \code{NULL}, the program will calculate its own sequence
 33 | according to \code{nlambda} and \code{lambda_min_ratio},
 34 | which starts from \eqn{\lambda_0} (with this
 35 | \eqn{\lambda} all coefficients will be zero) and ends at
 36 | \code{lambda0 * lambda_min_ratio}, containing
 37 | \code{nlambda} values equally spaced in the log scale.
 38 | It is recommended to set this parameter to be \code{NULL}
 39 | (the default).}
 40 | 
 41 | \item{alpha}{mixing parameter between 0 and 1 for elastic net. \code{alpha=1} is for the lasso, \code{alpha=0} is for ridge}
 42 | 
 43 | \item{gamma}{parameter for MCP/SCAD. Defaults to the recommended values from the papers corresponding to each penalty}
 44 | 
 45 | \item{penalty.factor}{a vector with length equal to the number of columns in x to be multiplied by lambda. by default
 46 | it is a vector of 1s. \code{penalty.factor} is NOT scaled}
 47 | 
 48 | \item{upper.limits}{a vector of length \code{ncol(x)} of upper limits for each coefficient. Can be a single value, which will
 49 | then be applied for each coefficient. Must be non-negative.}
 50 | 
 51 | \item{lower.limits}{a vector of length \code{ncol(x)} of lower limits for each coefficient. Can be a single value, which will
 52 | then be applied for each coefficient. Cannot be greater than 0.}
 53 | 
 54 | \item{nlambda}{Number of values in the \eqn{\lambda} sequence. Only used
 55 | when the program calculates its own \eqn{\lambda}
 56 | (by setting \code{lambda = NULL}).}
 57 | 
 58 | \item{lambda.min.ratio}{Smallest value in the \eqn{\lambda} sequence
 59 | as a fraction of \eqn{\lambda_0}. See
 60 | the explanation of the \code{lambda}
 61 | argument. This parameter is only used when
 62 | the program calculates its own \eqn{\lambda}
 63 | (by setting \code{lambda = NULL}). The default
 64 | value is the same as \pkg{glmnet}: 0.001 if
 65 | \code{nrow(x) >= ncol(x)} and 0.05 otherwise.}
 66 | 
 67 | \item{intercept}{Whether to fit an intercept in the model. Default is \code{TRUE}.}
 68 | 
 69 | \item{standardize}{Whether to standardize the design matrix before
 70 | fitting the model. Default is \code{TRUE}. Fitted coefficients
 71 | are always returned on the original scale.}
 72 | 
 73 | \item{dfmax}{Maximum number of variables allowed in the model}
 74 | 
 75 | \item{maxit}{Maximum number of coordinate descent iterations.}
 76 | 
 77 | \item{tol}{convergence tolerance parameter.}
 78 | 
 79 | \item{maxit.irls}{Maximum number of coordinate descent iterations.}
 80 | 
 81 | \item{tol.irls}{convergence tolerance parameter.}
 82 | }
 83 | \description{
 84 | ordinis provides estimation of linear models with the lasso penalty
 85 | }
 86 | \examples{
 87 | set.seed(123)
 88 | n = 100
 89 | p = 1000
 90 | b = c(runif(10, min = 0.1, max = 1), rep(0, p - 10))
 91 | x = matrix(rnorm(n * p, sd = 1.5), n, p)
 92 | y = drop(x \%*\% b) + rnorm(n)
 93 | 
 94 | 
 95 | ## fit lasso model with 100 tuning parameter values
 96 | res <- ordinis(x, y)
 97 | 
 98 | y2 <- 1 * (y > 0)
 99 | y3 <- exp(y)
100 | 
101 | resb <- ordinis(x, y2, family = "binomial")
102 | 
103 | }
104 | 


--------------------------------------------------------------------------------
/src/ordinis_dense.cpp:
--------------------------------------------------------------------------------
  1 | #define EIGEN_DONT_PARALLELIZE
  2 | 
  3 | #include "CoordGaussianDense.h"
  4 | #include "DataStd.h"
  5 | 
  6 | 
  7 | using Eigen::MatrixXf;
  8 | using Eigen::VectorXf;
  9 | using Eigen::MatrixXd;
 10 | using Eigen::VectorXd;
 11 | using Eigen::ArrayXf;
 12 | using Eigen::ArrayXd;
 13 | using Eigen::ArrayXXf;
 14 | using Eigen::Map;
 15 | 
 16 | using Rcpp::wrap;
 17 | using Rcpp::as;
 18 | using Rcpp::List;
 19 | using Rcpp::Named;
 20 | using Rcpp::IntegerVector;
 21 | 
 22 | typedef Map<VectorXd> MapVecd;
 23 | typedef Map<Eigen::MatrixXd> MapMatd;
 24 | typedef Eigen::SparseVector<double> SpVec;
 25 | typedef Eigen::SparseMatrix<double> SpMat;
 26 | 
 27 | inline void write_beta_matrix(SpMat &betas, int col, double beta0, SpVec &coef)
 28 | {
 29 |     betas.insert(0, col) = beta0;
 30 | 
 31 |     for(SpVec::InnerIterator iter(coef); iter; ++iter)
 32 |     {
 33 |         betas.insert(iter.index() + 1, col) = iter.value();
 34 |     }
 35 | }
 36 | 
 37 | List coord_ordinis_dense(Rcpp::NumericMatrix x_,
 38 |                          Rcpp::NumericVector y_,
 39 |                          Rcpp::NumericVector weights_,
 40 |                          Rcpp::NumericVector lambda_,
 41 |                          Rcpp::NumericVector penalty_factor_,
 42 |                          Rcpp::NumericMatrix limits_,
 43 |                          int    nlambda_,
 44 |                          double lmin_ratio_,
 45 |                          bool   standardize_,
 46 |                          bool   intercept_,
 47 |                          List   opts_)
 48 | {
 49 | 
 50 |     const int n = x_.rows();
 51 |     const int p = x_.cols();
 52 | 
 53 |     MatrixXd datX(n, p);
 54 |     VectorXd datY(n);
 55 |     VectorXd weights(n);
 56 |     MatrixXd limits(2, p);
 57 | 
 58 |     // Copy data and convert type from double to float
 59 |     std::copy(x_.begin(), x_.end(), datX.data());
 60 |     std::copy(y_.begin(), y_.end(), datY.data());
 61 |     std::copy(weights_.begin(), weights_.end(), weights.data());
 62 | 
 63 |     std::copy(limits_.begin(), limits_.end(), limits.data());
 64 | 
 65 |     //Map<VectorXd>  weights(as<Map<VectorXd> >(weights_));
 66 | 
 67 |     // In glmnet, we minimize
 68 |     //   1/(2n) * ||y - X * beta||^2 + lambda * ||beta||_1
 69 |     // which is equivalent to minimizing
 70 |     //   1/2 * ||y - X * beta||^2 + n * lambda * ||beta||_1
 71 |     ArrayXd lambda(as<ArrayXd>(lambda_));
 72 |     int nlambda = lambda.size();
 73 | 
 74 |     ArrayXd penalty_factor(as<ArrayXd>(penalty_factor_));
 75 | 
 76 | 
 77 |     List opts(opts_);
 78 |     const int maxit        = as<int>(opts["maxit"]);
 79 |     const double tol       = as<double>(opts["tol"]);
 80 |     const double alpha     = as<double>(opts["alpha"]);
 81 |     const double gamma     = as<double>(opts["gamma"]);
 82 |     const int dfmax        = as<int>(opts["dfmax"]);
 83 |     const bool standardize = standardize_;
 84 |     const bool intercept   = intercept_;
 85 | 
 86 |     std::vector<std::string> penalty(as< std::vector<std::string> >(opts["penalty"]));
 87 | 
 88 |     DataStd<double> datstd(n, p, standardize, intercept, true);
 89 |     datstd.standardize(datX, datY, limits);
 90 | 
 91 | 
 92 |     CoordGaussianDense *solver;
 93 |     solver = new CoordGaussianDense(datX, datY, weights,
 94 |                                     penalty_factor,
 95 |                                     limits, penalty[0],
 96 |                                     intercept,
 97 |                                     alpha, tol);
 98 | 
 99 |     if (nlambda < 1)
100 |     {
101 |         double lmax = 0.0;
102 |         lmax = solver->get_lambda_zero(); // * datstd.get_scaleY();
103 | 
104 |         double lmin = lmin_ratio_ * lmax;
105 |         lambda.setLinSpaced(nlambda_, std::log(lmax), std::log(lmin));
106 |         lambda  = lambda.exp();
107 |         nlambda = lambda.size();
108 |     }
109 | 
110 | 
111 | 
112 |     SpMat beta(p + 1, nlambda);
113 |     beta.reserve(Eigen::VectorXi::Constant(nlambda, std::min(n, p)));
114 | 
115 |     //MatrixXd beta(p + 1, nlambda);
116 |     VectorXd lossvec(nlambda);
117 | 
118 | 
119 |     IntegerVector niter(nlambda);
120 |     double ilambda = 0.0;
121 | 
122 |     int last = nlambda;
123 |     for(int i = 0; i < nlambda; i++)
124 |     {
125 |         ilambda = lambda[i]; // * double(n); // / datstd.get_scaleY();
126 | 
127 |         if(i == 0)
128 |             solver->init(ilambda, gamma);
129 |         else
130 |             solver->init_warm(ilambda, gamma);
131 | 
132 |         niter[i]  = solver->solve(maxit);
133 |         SpVec res = solver->get_beta();
134 | 
135 |         int nzero = solver->get_nzero();
136 |         double beta0 = 0.0;
137 |         beta0 = solver->get_intercept();
138 |         datstd.recover(beta0, res);
139 |         //beta(0,i) = beta0;
140 |         //beta.block(1, i, p, 1) = res;
141 |         write_beta_matrix(beta, i, beta0, res);
142 | 
143 |         lossvec(i) = solver->get_loss();
144 | 
145 |         if (nzero > dfmax && i > 0)
146 |         {
147 |             last = i;
148 |             break;
149 |         }
150 |     }
151 | 
152 |     delete solver;
153 | 
154 |     beta.makeCompressed();
155 | 
156 |     return List::create(Named("beta")        = beta,
157 |                         Named("niter")       = niter,
158 |                         Named("lambda")      = lambda,
159 |                         Named("loss")        = lossvec,
160 |                         Named("last")        = last);
161 | }
162 | 
163 | // [[Rcpp::export]]
164 | List coord_ordinis_dense_cpp(Rcpp::NumericMatrix x,
165 |                              Rcpp::NumericVector y,
166 |                              Rcpp::NumericVector weights,
167 |                              Rcpp::NumericVector lambda,
168 |                              Rcpp::NumericVector penalty_factor,
169 |                              Rcpp::NumericMatrix limits,
170 |                              int nlambda,
171 |                              double lmin_ratio,
172 |                              bool standardize,
173 |                              bool intercept,
174 |                              List opts)
175 | {
176 |     return coord_ordinis_dense(x, y, weights,
177 |                                lambda,
178 |                                penalty_factor,
179 |                                limits,
180 |                                nlambda,
181 |                                lmin_ratio,
182 |                                standardize,
183 |                                intercept,
184 |                                opts);
185 | }
186 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | 
  6 | ```{r setup, include=FALSE}
  7 | library(knitr)
  8 | opts_chunk$set(message = FALSE)
  9 | ```
 10 | 
 11 | 
 12 | ## Introduction to `ordinis'
 13 | 
 14 | The 'ordinis' package provides computation for penalized regression problems via coordinate descent. It is mostly for my own experimentation at this stage, however it is fairly efficient and reliable. 
 15 | 
 16 | Install using the **devtools** package:
 17 | 
 18 | ```r
 19 | devtools::install_github("jaredhuling/ordinis")
 20 | ```
 21 | 
 22 | or by cloning and building 
 23 | 
 24 | ## Example
 25 | 
 26 | ```{r, warning=FALSE, message=FALSE, fig.path="vignettes/"}
 27 | library(ordinis)
 28 | 
 29 | # compute the full solution path, n > p
 30 | set.seed(123)
 31 | n <- 500
 32 | p <- 50000
 33 | m <- 50
 34 | b <- matrix(c(runif(m), rep(0, p - m)))
 35 | x <- matrix(rnorm(n * p, sd = 3), n, p)
 36 | y <- drop(x %*% b) + rnorm(n)
 37 | 
 38 | mod <- ordinis(x, y, 
 39 |                penalty = "mcp",
 40 |                lower.limits = rep(0, p), # force all coefficients to be positive
 41 |                penalty.factor = c(0, 0, rep(1, p-2)), # don't penalize first two coefficients
 42 |                alpha = 0.95)  # use elastic net with alpha = 0.95
 43 | 
 44 | plot(mod)
 45 | 
 46 | ## show likelihood
 47 | logLik(mod)
 48 | 
 49 | ## compute AIC
 50 | AIC(mod)
 51 | 
 52 | ## BIC
 53 | BIC(mod)
 54 | ```
 55 | 
 56 | ## Performance
 57 | 
 58 | ### Lasso (linear regression)
 59 | 
 60 | ```{r, warning=FALSE, message=FALSE}
 61 | library(microbenchmark)
 62 | library(glmnet)
 63 | 
 64 | b <- matrix(c(runif(m, min = -1), rep(0, p - m)))
 65 | x <- matrix(rnorm(n * p, sd = 3), n, p)
 66 | y <- drop(x %*% b) + rnorm(n)
 67 | 
 68 | lambdas = glmnet(x, y)$lambda
 69 | 
 70 | microbenchmark(
 71 |     "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-10,  # thresh must be very small 
 72 |                                       lambda = lambdas)},    # for comparable precision
 73 |     "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 
 74 |                                        tol = 1e-3)},
 75 |     times = 5
 76 | )
 77 | 
 78 | 
 79 | # difference of results
 80 | max(abs(coef(resg) - reso$beta))
 81 | 
 82 | microbenchmark(
 83 |     "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-15,  # thresh must be very low for comparable precision
 84 |                                       lambda = lambdas)},
 85 |     "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 
 86 |                                             tol = 1e-3)},
 87 |     times = 5
 88 | )
 89 | 
 90 | # difference of results
 91 | max(abs(coef(resg) - reso$beta))
 92 | 
 93 | 
 94 | ```
 95 | 
 96 | ### Lasso (logistic regression)
 97 | 
 98 | `glmnet` is clearly faster for logistic regression for the same precision
 99 | 
100 | ```{r, warning=FALSE, message=FALSE}
101 | library(MASS)
102 | 
103 | set.seed(123)
104 | n <- 200
105 | p <- 10000
106 | m <- 20
107 | b <- matrix(c(runif(m, min = -0.5, max = 0.5), rep(0, p - m)))
108 | x <- matrix(rnorm(n * p, sd = 3), n, p)
109 | y <- 1 *(drop(x %*% b) + rnorm(n) > 0)
110 | 
111 | lambdas = glmnet(x, y, family = "binomial", lambda.min.ratio = 0.02)$lambda
112 | 
113 | microbenchmark(
114 |     "glmnet[lasso]" = {resg <- glmnet(x, y, family = "binomial",
115 |                                       thresh = 1e-10,  
116 |                                       lambda = lambdas)},    
117 |     "ordinis[lasso]"     = {reso <- ordinis(x, y, family = "binomial", 
118 |                                             lambda = lambdas, 
119 |                                             tol = 1e-3, tol.irls = 1e-3)},
120 |     times = 5
121 | )
122 | 
123 | # difference of results
124 | max(abs(coef(resg) - reso$beta))
125 | 
126 | 
127 | microbenchmark(
128 |     "glmnet[lasso]" = {resg <- glmnet(x, y, family = "binomial",
129 |                                       thresh = 1e-15,  
130 |                                       lambda = lambdas)},    
131 |     "ordinis[lasso]"     = {reso <- ordinis(x, y, family = "binomial", 
132 |                                             lambda = lambdas, 
133 |                                             tol = 1e-3, tol.irls = 1e-3)},
134 |     times = 5
135 | )
136 | 
137 | # difference of results
138 | max(abs(coef(resg) - reso$beta))
139 | 
140 | ```
141 | 
142 | ### Lasso (linear regression, ill-conditioned)
143 | 
144 | ```{r, warning=FALSE, message=FALSE}
145 | library(MASS)
146 | 
147 | set.seed(123)
148 | n <- 500
149 | p <- 1000
150 | m <- 50
151 | b <- matrix(c(runif(m, min = -1), rep(0, p - m)))
152 | sig <- matrix(0.5, ncol=p,nrow=p); diag(sig) <- 1
153 | x <- mvrnorm(n, mu=rep(0, p), Sigma = sig)
154 | y <- drop(x %*% b) + rnorm(n)
155 | 
156 | lambdas = glmnet(x, y)$lambda[1:65]
157 | 
158 | microbenchmark(
159 |     "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-9,  # thresh must be very small 
160 |                                       lambda = lambdas)},    # for comparable precision
161 |     "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 
162 |                                        tol = 1e-5)},
163 |     times = 5
164 | )
165 | 
166 | 
167 | # difference of results
168 | max(abs(coef(resg) - reso$beta))
169 | 
170 | microbenchmark(
171 |     "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-11,  # thresh must be very low for comparable precision
172 |                                       lambda = lambdas)},
173 |     "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 
174 |                                             tol = 1e-5)},
175 |     times = 5
176 | )
177 | 
178 | # difference of results
179 | max(abs(coef(resg) - reso$beta))
180 | 
181 | ```
182 | 
183 | ### Validity of solutions with various bells and whistles
184 | 
185 | Due to internal differences in standardization, we now compare with `glmnet` when using observation weights, penalty scaling factors, and parameter box constraints
186 | 
187 | ```{r}
188 | 
189 | set.seed(123)
190 | n = 200
191 | p = 1000
192 | m <- 15
193 | b = c(runif(m, min = -0.5, max = 0.5), rep(0, p - m))
194 | x = (matrix(rnorm(n * p, sd = 3), n, p))
195 | y = drop(x %*% b) + rnorm(n)
196 | y2 <- 1 * (y > rnorm(n, mean = 0.5, sd = 3))
197 | 
198 | 
199 | wts <- runif(nrow(x))
200 | wts <- wts / mean(wts) # re-scale like glmnet does, so we can compare
201 | 
202 | penalty.factor <- rbinom(ncol(x), 1, 0.99) * runif(ncol(x)) * 5
203 | penalty.factor <- (penalty.factor / sum(penalty.factor)) * ncol(x)  # re-scale like glmnet does, so we can compare
204 | 
205 | system.time(resb <- ordinis(x, y2, family = "binomial", tol = 1e-7, tol.irls = 1e-5,
206 |                             penalty = "lasso",
207 |                             alpha = 0.5,  #elastic net term
208 |                             lower.limits = 0, upper.limits = 0.02, # box constraints on all parameters
209 |                             standardize = FALSE, intercept = TRUE,
210 |                             weights = wts, # observation weights
211 |                             penalty.factor = penalty.factor)) # penalty scaling factors
212 | 
213 | system.time(resg <- glmnet(x,y2, family = "binomial",
214 |                            lambda = resb$lambda,
215 |                            alpha = 0.5, #elastic net term
216 |                            weights = wts, # observation weights
217 |                            penalty.factor = penalty.factor, # penalty scaling factors
218 |                            lower.limits = 0, upper.limits = 0.02, # box constraints on all parameters
219 |                            standardize = FALSE, intercept = TRUE,
220 |                            thresh = 1e-16))
221 | 
222 | ## compare solutions
223 | max(abs(resb$beta[-1,] - resg$beta))
224 | 
225 | 
226 | # now with no box constraints
227 | system.time(resb <- ordinis(x, y2, family = "binomial", tol = 1e-7, tol.irls = 1e-5,
228 |                             penalty = "lasso",
229 |                             alpha = 0.5,  #elastic net term
230 |                             standardize = FALSE, intercept = TRUE,
231 |                             weights = wts, # observation weights
232 |                             penalty.factor = penalty.factor)) # penalty scaling factors
233 | 
234 | system.time(resg <- glmnet(x,y2, family = "binomial",
235 |                            lambda = resb$lambda,
236 |                            alpha = 0.5, #elastic net term
237 |                            weights = wts, # observation weights
238 |                            penalty.factor = penalty.factor, # penalty scaling factors
239 |                            standardize = FALSE, intercept = TRUE,
240 |                            thresh = 1e-16))
241 | 
242 | ## compare solutions
243 | max(abs(resb$beta[-1,] - resg$beta))
244 | 
245 | ```
246 | 
247 | ### A Note on the Elastic Net and linear models
248 | 
249 | Due to how scaling of the response is handled different in glmnet, it yields slightly different solutions than both ordinis and ncvreg for Gaussian models with a ridge penalty term
250 | 
251 | ```{r}
252 | library(ncvreg)
253 | 
254 | ## I'm setting all methods to have high precision just so solutions are comparable.
255 | ## differences in computation time may be due in part to the arbitrariness of the 
256 | ## particular precisions chosen
257 | system.time(resg <- glmnet(x, y, family = "gaussian", alpha = 0.25, 
258 |                            thresh = 1e-15))
259 | 
260 | system.time(res <- ordinis(x, y, family = "gaussian", penalty = "lasso", alpha = 0.25,
261 |                             tol = 1e-10, lambda = resg$lambda))
262 | 
263 | system.time(resn <- ncvreg(x, y, family="gaussian", penalty = "lasso",
264 |                            lambda = resg$lambda, alpha = 0.25, max.iter = 100000,
265 |                            eps = 1e-10))
266 | 
267 | resgg <- res; resgg$beta[-1,] <- resg$beta
268 | 
269 | # compare ordinis and glmnet
270 | max(abs(res$beta[-1,] - resg$beta))
271 | 
272 | # compare ordinis and ncvreg
273 | max(abs(res$beta - resn$beta))
274 | 
275 | # compare ncvreg and glmnet
276 | max(abs(resn$beta[-1,] - resg$beta))
277 | ```
278 | 
279 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Taken from Jerome Friedman, Trevor Hastie, Noah Simon, and Rob Tibshirani's package glmnet
  3 | ## https://cran.r-project.org/web/packages/glmnet/index.html
  4 | lambda.interp=function(lambda,s){
  5 |     ### lambda is the index sequence that is produced by the model
  6 |     ### s is the new vector at which evaluations are required.
  7 |     ### the value is a vector of left and right indices, and a vector of fractions.
  8 |     ### the new values are interpolated bewteen the two using the fraction
  9 |     ### Note: lambda decreases. you take:
 10 |     ### sfrac*left+(1-sfrac*right)
 11 | 
 12 |     if(length(lambda)==1){# degenerate case of only one lambda
 13 |         nums=length(s)
 14 |         left=rep(1,nums)
 15 |         right=left
 16 |         sfrac=rep(1,nums)
 17 |     }
 18 |     else{
 19 |         s[s > max(lambda)] = max(lambda)
 20 |         s[s < min(lambda)] = min(lambda)
 21 |         k=length(lambda)
 22 |         sfrac <- (lambda[1]-s)/(lambda[1] - lambda[k])
 23 |         lambda <- (lambda[1] - lambda)/(lambda[1] - lambda[k])
 24 |         coord <- approx(lambda, seq(lambda), sfrac)$y
 25 |         left <- floor(coord)
 26 |         right <- ceiling(coord)
 27 |         sfrac=(sfrac-lambda[right])/(lambda[left] - lambda[right])
 28 |         sfrac[left==right]=1
 29 |     }
 30 |     list(left=left,right=right,frac=sfrac)
 31 | }
 32 | 
 33 | ## Taken from Jerome Friedman, Trevor Hastie, Noah Simon, and Rob Tibshirani's package glmnet
 34 | ## https://cran.r-project.org/web/packages/glmnet/index.html
 35 | nonzeroCoef = function (beta, bystep = FALSE)
 36 | {
 37 |     ### bystep = FALSE means which variables were ever nonzero
 38 |     ### bystep = TRUE means which variables are nonzero for each step
 39 |     nr=nrow(beta)
 40 |     if (nr == 1) {#degenerate case
 41 |         if (bystep)
 42 |             apply(beta, 2, function(x) if (abs(x) > 0)
 43 |                 1
 44 |                 else NULL)
 45 |         else {
 46 |             if (any(abs(beta) > 0))
 47 |                 1
 48 |             else NULL
 49 |         }
 50 |     }
 51 |     else {
 52 |         beta=abs(beta)>0 # this is sparse
 53 |         which=seq(nr)
 54 |         ones=rep(1,ncol(beta))
 55 |         nz=as.vector((beta%*%ones)>0)
 56 |         which=which[nz]
 57 |         if (bystep) {
 58 |             if(length(which)>0){
 59 |                 beta=as.matrix(beta[which,,drop=FALSE])
 60 |                 nzel = function(x, which) if (any(x))
 61 |                     which[x]
 62 |                 else NULL
 63 |                 which=apply(beta, 2, nzel, which)
 64 |                 if(!is.list(which))which=data.frame(which)# apply can return a matrix!!
 65 |                 which
 66 |             }
 67 |             else{
 68 |                 dn=dimnames(beta)[[2]]
 69 |                 which=vector("list",length(dn))
 70 |                 names(which)=dn
 71 |                 which
 72 |             }
 73 | 
 74 |         }
 75 |         else which
 76 |     }
 77 | }
 78 | 
 79 | ## Taken from Rahul Mazumder, Trevor Hastie, and Jerome Friedman's sparsenet package
 80 | ## https://cran.r-project.org/web/packages/sparsenet/index.html
 81 | argmin=function(x){
 82 |     vx=as.vector(x)
 83 |     imax=order(vx)[1]
 84 |     if(!is.matrix(x))imax
 85 |     else{
 86 |         d=dim(x)
 87 |         c1=as.vector(outer(seq(d[1]),rep(1,d[2])))[imax]
 88 |         c2=as.vector(outer(rep(1,d[1]),seq(d[2])))[imax]
 89 |         c(c1,c2)
 90 | 
 91 |     }
 92 | }
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | # taken from glmnet
101 | cvcompute=function(mat,weights,foldid,nlams){
102 |     ###Computes the weighted mean and SD within folds, and hence the se of the mean
103 |     wisum=tapply(weights,foldid,sum)
104 |     nfolds=max(foldid)
105 |     outmat=matrix(NA,nfolds,ncol(mat))
106 |     good=matrix(0,nfolds,ncol(mat))
107 |     mat[is.infinite(mat)]=NA#just in case some infinities crept in
108 |     for(i in seq(nfolds)){
109 |         mati=mat[foldid==i,,drop=FALSE]
110 |         wi=weights[foldid==i]
111 |         outmat[i,]=apply(mati,2,weighted.mean,w=wi,na.rm=TRUE)
112 |         good[i,seq(nlams[i])]=1
113 |     }
114 |     N=apply(good,2,sum)
115 |     list(cvraw=outmat,weights=wisum,N=N)
116 | }
117 | 
118 | # taken from glmnet
119 | getmin=function(lambda,cvm,cvsd){
120 |     cvmin=min(cvm,na.rm=TRUE)
121 |     idmin=cvm<=cvmin
122 |     lambda.min=max(lambda[idmin],na.rm=TRUE)
123 |     idmin=match(lambda.min,lambda)
124 |     semin=(cvm+cvsd)[idmin]
125 |     idmin=cvm<=semin
126 |     lambda.1se=max(lambda[idmin],na.rm=TRUE)
127 |     list(lambda.min=lambda.min,lambda.1se=lambda.1se)
128 | }
129 | 
130 | 
131 | 
132 | # taken from glmnet
133 | auc=function(y,prob,w){
134 |     if(missing(w)){
135 |         rprob=rank(prob)
136 |         n1=sum(y);n0=length(y)-n1
137 |         u=sum(rprob[y==1])-n1*(n1+1)/2
138 |         exp(log(u) - log(n1) - log(n0))
139 |     }
140 |     else{
141 |         rprob=runif(length(prob))
142 |         op=order(prob,rprob)#randomize ties
143 |         y=y[op]
144 |         w=w[op]
145 |         cw=cumsum(w)
146 |         w1=w[y==1]
147 |         cw1=cumsum(w1)
148 |         wauc = log(sum(w1*(cw[y==1]-cw1)))
149 |         sumw1 = cw1[length(cw1)]
150 |         sumw2  = cw[length(cw)] - sumw1
151 |         exp(wauc - log(sumw1) - log(sumw2))
152 |     }
153 | }
154 | 
155 | # taken from glmnet
156 | auc.mat=function(y,prob,weights=rep(1,nrow(y))){
157 |     Weights=as.vector(weights*y)
158 |     ny=nrow(y)
159 |     Y=rep(c(0,1),c(ny,ny))
160 |     Prob=c(prob,prob)
161 |     auc(Y,Prob,Weights)
162 | }
163 | 
164 | # taken from glmnet
165 | error.bars <- function(x, upper, lower, width = 0.02, ...)
166 | {
167 |     xlim <- range(x)
168 |     barw <- diff(xlim) * width
169 |     segments(x, upper, x, lower, col = 8, lty = 5, lwd = 0.5, ...)
170 |     segments(x - barw, upper, x + barw, upper, col = "grey50", lwd = 1, ...)
171 |     segments(x - barw, lower, x + barw, lower, col = "grey50", lwd = 1, ...)
172 |     range(upper, lower)
173 | }
174 | 
175 | objective_logistic <- function(beta, x, y, lambda, penalty.factor = rep(1, ncol(x)), intercept = FALSE, alpha = 1)
176 | {
177 |     if (intercept)
178 |     {
179 |         beta0 <- beta[1]
180 |         beta  <- beta[-1]
181 | 
182 |         xbeta <- drop(x %*% beta) + beta0
183 |     } else
184 |     {
185 |         xbeta <- drop(x %*% beta)
186 |     }
187 |     neglogLik <- (-sum(  y * xbeta  ) + sum( log1p(exp(xbeta)) )) / nrow(x)
188 | 
189 |     neglogLik + sum(abs(beta) * lambda * penalty.factor * alpha) + 0.5 * sum((beta) ^ 2 * lambda * penalty.factor * (1 - alpha))
190 | }
191 | 
192 | objective_logistic <- function(beta, x, y, lambda, penalty.factor = rep(1, ncol(x)), intercept = FALSE, alpha = 1,
193 |                              penalty = c("lasso", "mcp", "scad"), gamma = 3.7)
194 | {
195 |     penalty <- match.arg(penalty)
196 |     if (intercept)
197 |     {
198 |         beta0 <- beta[1]
199 |         beta  <- beta[-1]
200 | 
201 |         xbeta <- drop(x %*% beta) + beta0
202 |     } else
203 |     {
204 |         xbeta <- drop(x %*% beta)
205 |     }
206 |     neglogLik <- (-sum(  y * xbeta  ) + sum( log1p(exp(xbeta)) )) / nrow(x)
207 | 
208 |     if (penalty == "lasso")
209 |     {
210 |         penalty.part <- sum(abs(beta) * lambda * penalty.factor * alpha)
211 |     } else if (penalty == "mcp")
212 |     {
213 |         penalty.part <- 0
214 |         for (j in 1:length(beta))
215 |         {
216 |             pen.cur <- penalty.factor[j] * lambda * alpha
217 |             b <- beta[j]
218 |             if (abs(b) <= gamma * pen.cur)
219 |             {
220 |                 penalty.part <- penalty.part + pen.cur * abs(b) - b ^ 2 / (2 * gamma)
221 |             } else
222 |             {
223 |                 penalty.part <- penalty.part + 0.5 * (gamma) * pen.cur ^ 2
224 |             }
225 |         }
226 |     } else
227 |     {
228 |         penalty.part <- 0
229 |         for (j in 1:length(beta))
230 |         {
231 |             pen.cur <- penalty.factor[j] * lambda * alpha
232 |             b <- beta[j]
233 |             if (abs(b) <= pen.cur)
234 |             {
235 |                 penalty.part <- penalty.part + pen.cur * abs(b)
236 |             } else if (abs(b) <= gamma * pen.cur)
237 |             {
238 |                 penalty.part <- penalty.part - (abs(b) ^ 2 - 2 * gamma * pen.cur * abs(b) + pen.cur ^ 2) / (2 * (gamma - 1))
239 |             } else
240 |             {
241 |                 penalty.part <- penalty.part + 0.5 * (gamma + 1) * pen.cur ^ 2
242 |             }
243 |         }
244 |     }
245 | 
246 |     neglogLik + penalty.part + 0.5 * sum((beta) ^ 2 * lambda * penalty.factor * (1 - alpha))
247 | }
248 | 
249 | objective_linear <- function(beta, x, y, lambda, penalty.factor = rep(1, ncol(x)), intercept = FALSE, alpha = 1,
250 |                              penalty = c("lasso", "mcp", "scad"), gamma = 3.7)
251 | {
252 |     penalty <- match.arg(penalty)
253 |     if (intercept)
254 |     {
255 |         beta0 <- beta[1]
256 |         beta  <- beta[-1]
257 | 
258 |         xbeta <- drop(x %*% beta) + beta0
259 |     } else
260 |     {
261 |         xbeta <- drop(x %*% beta)
262 |     }
263 |     sumsq <- 0.5 * sum(  (y - xbeta) ^ 2  ) / nrow(x)
264 | 
265 |     if (penalty == "lasso")
266 |     {
267 |         penalty.part <- sum(abs(beta) * lambda * penalty.factor * alpha)
268 |     } else if (penalty == "mcp")
269 |     {
270 |         penalty.part <- 0
271 |         for (j in 1:length(beta))
272 |         {
273 |             pen.cur <- penalty.factor[j] * lambda * alpha
274 |             b <- beta[j]
275 |             if (abs(b) <= gamma * pen.cur)
276 |             {
277 |                 penalty.part <- penalty.part + pen.cur * abs(b) - b ^ 2 / (2 * gamma)
278 |             } else
279 |             {
280 |                 penalty.part <- penalty.part + 0.5 * (gamma) * pen.cur ^ 2
281 |             }
282 |         }
283 |     } else
284 |     {
285 |         penalty.part <- 0
286 |         for (j in 1:length(beta))
287 |         {
288 |             pen.cur <- penalty.factor[j] * lambda * alpha
289 |             b <- beta[j]
290 |             if (abs(b) <= pen.cur)
291 |             {
292 |                 penalty.part <- penalty.part + pen.cur * abs(b)
293 |             } else if (abs(b) <= gamma * pen.cur)
294 |             {
295 |                 penalty.part <- penalty.part - (abs(b) ^ 2 - 2 * gamma * pen.cur * abs(b) + pen.cur ^ 2) / (2 * (gamma - 1))
296 |             } else
297 |             {
298 |                 penalty.part <- penalty.part + 0.5 * (gamma + 1) * pen.cur ^ 2
299 |             }
300 |         }
301 |     }
302 | 
303 |     sumsq + penalty.part + 0.5 * sum((beta) ^ 2 * lambda * penalty.factor * (1 - alpha))
304 | }
305 | 


--------------------------------------------------------------------------------
/src/DataStd.h:
--------------------------------------------------------------------------------
  1 | #ifndef DATASTD_H
  2 | #define DATASTD_H
  3 | 
  4 | #include <RcppEigen.h>
  5 | #include <Eigen/Core>
  6 | 
  7 | using Eigen::MatrixXd;
  8 | 
  9 | template <typename Double = double>
 10 | class DataStd
 11 | {
 12 | private:
 13 |     typedef Eigen::SparseVector<double> SparseVector;
 14 |     typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> Matrix;
 15 |     typedef Eigen::Matrix<double, Eigen::Dynamic, 1> Vector;
 16 |     typedef Eigen::Array <double, Eigen::Dynamic, 1> Array;
 17 |     typedef const Eigen::Ref<const Vector> ConstGenericVector;
 18 |     typedef Eigen::Ref<Array> ArrayRef;
 19 | 
 20 | 
 21 |     // flag - 0: standardize = FALSE, intercept = FALSE
 22 |     //             directly fit model
 23 |     // flag - 1: standardize = TRUE, intercept = FALSE
 24 |     //             scale x and y by their standard deviation
 25 |     // flag - 2: standardize = FALSE, intercept = TRUE
 26 |     //             center x, standardize y
 27 |     // flag - 3: standardize = TRUE, intercept = TRUE
 28 |     //             standardize x and y
 29 |     const int flag;
 30 | 
 31 |     const int n;
 32 |     const int p;
 33 | 
 34 |     double meanY;
 35 |     double scaleY;
 36 |     bool   glm;
 37 |     Array  meanX;
 38 |     Array  scaleX;
 39 | 
 40 |     static double sd_n(ConstGenericVector &v)
 41 |     {
 42 |         double mean = v.mean();
 43 |         Vector v_centered = v.array() - mean;
 44 | 
 45 |         return v_centered.norm() / std::sqrt(double(v.size()));
 46 |     }
 47 | 
 48 |     // spvec -> spvec / arr, elementwise
 49 |     static void elementwise_quot(SparseVector &spvec, Array &arr)
 50 |     {
 51 |         for(typename SparseVector::InnerIterator iter(spvec); iter; ++iter)
 52 |         {
 53 |             iter.valueRef() /= arr[iter.index()];
 54 |         }
 55 |     }
 56 | 
 57 |     // inner product of spvec and arr
 58 |     static double sparse_inner_product(SparseVector &spvec, Array &arr)
 59 |     {
 60 |         double res = 0.0;
 61 |         for(typename SparseVector::InnerIterator iter(spvec); iter; ++iter)
 62 |         {
 63 |             res += iter.value() * arr[iter.index()];
 64 |         }
 65 |         return res;
 66 |     }
 67 | 
 68 | public:
 69 |     DataStd(int n_, int p_, bool standardize, bool intercept, bool glm_ = false) :
 70 |         flag(int(standardize) + 2 * int(intercept)),
 71 |         n(n_),
 72 |         p(p_),
 73 |         meanY(0.0),
 74 |         scaleY(1.0),
 75 |         glm(glm_)
 76 |     {
 77 |         if(flag == 3 || flag == 2)
 78 |             meanX.resize(p);
 79 |         if(flag == 3 || flag == 1)
 80 |             scaleX.resize(p);
 81 |     }
 82 | 
 83 |     void standardize(MatrixXd &X, Vector &Y, MatrixXd &limits)
 84 |     {
 85 |         double n_invsqrt = 1.0 / std::sqrt(Double(n));
 86 | 
 87 |         // standardize Y
 88 |         switch(flag)
 89 |         {
 90 |             case 1:
 91 |                 if (!glm)
 92 |                 {
 93 |                     scaleY = sd_n(Y);
 94 |                     Y.array() /= scaleY;
 95 |                 }
 96 |                 break;
 97 |             case 2:
 98 |             case 3:
 99 |                 if (!glm)
100 |                 {
101 |                     meanY = Y.mean();
102 |                     Y.array() -= meanY;
103 |                     scaleY = Y.norm() * n_invsqrt;
104 |                     Y.array() /= scaleY;
105 |                 }
106 |                 break;
107 |             default:
108 |                 break;
109 |         }
110 | 
111 |         // standardize X
112 |         switch(flag)
113 |         {
114 |             case 1:
115 |                 for(int i = 0; i < p; i++)
116 |                 {
117 |                     //X.col(i).array() *=  wts.array();
118 |                     scaleX[i] = sd_n(X.col(i));
119 |                     X.col(i).array() *= (1.0 / scaleX[i]);
120 |                     limits.col(i).array() *= (scaleX[i]);
121 |                 }
122 |                 break;
123 |             case 2:
124 |                 for(int i = 0; i < p; i++)
125 |                 {
126 |                     //X.col(i).array() *=  wts.array();
127 |                     meanX[i] = X.col(i).mean();
128 |                     X.col(i).array() -= meanX[i];
129 |                     //limits.col(i).array() -= meanX[i];
130 |                 }
131 |                 break;
132 |             case 3:
133 |                 for(int i = 0; i < p; i++)
134 |                 {
135 |                     /*meanX[i] = X.col(i).mean();
136 |                     X.col(i).array() -= meanX[i];
137 |                     scaleX[i] = X.col(i).norm() * n_invsqrt;
138 |                     X.col(i).array() /= scaleX[i];*/
139 |                     double *begin = &X(0, i);
140 |                     double *end = begin + n;
141 |                     //X.col(i).array() *=  wts.array();
142 |                     meanX[i] = X.col(i).mean();
143 |                     std::transform(begin, end, begin, std::bind2nd(std::minus<double>(), meanX[i]));
144 |                     scaleX[i] = X.col(i).norm() * n_invsqrt;
145 |                     std::transform(begin, end, begin, std::bind2nd(std::multiplies<double>(), 1.0 / scaleX[i]));
146 | 
147 |                     limits.col(i).array() *= scaleX[i];
148 |                     //double *beginll = &limits(0, i);
149 |                     //double *endll = begin + 2;
150 |                     //X.col(i).array() *=  wts.array();
151 |                     //std::transform(begin, endll, beginll, std::bind2nd(std::plus<double>(), meanX[i]));
152 |                     //std::transform(begin, endll, beginll, std::bind2nd(std::multiplies<double>(), scaleX[i]));
153 |                 }
154 |                 break;
155 |             default:
156 |                 break;
157 |         }
158 |     }
159 | 
160 |     void standardize(MatrixXd &X, Vector &Y, MatrixXd &limits, Vector &wts)
161 |     {
162 |         double n_invsqrt = 1.0 / std::sqrt(Double(n));
163 | 
164 |         Vector sqrt_wts = wts.array().sqrt();
165 | 
166 |         if (!glm)
167 |         {
168 |             Y.array() *= sqrt_wts.array();
169 |         }
170 | 
171 | 
172 | 
173 |         // standardize Y
174 |         switch(flag)
175 |         {
176 |         case 1:
177 |             if (!glm)
178 |             {
179 |                 scaleY = sd_n(Y);
180 |                 Y.array() /= scaleY;
181 |             }
182 |             break;
183 |         case 2:
184 |         case 3:
185 |             if (!glm)
186 |             {
187 |                 meanY = Y.mean();
188 |                 Y.array() -= meanY;
189 |                 scaleY = Y.norm() * n_invsqrt;
190 |                 Y.array() /= scaleY;
191 |             }
192 |             break;
193 |         default:
194 |             break;
195 |         }
196 | 
197 |         // standardize X
198 |         switch(flag)
199 |         {
200 |         case 1:
201 |             for(int i = 0; i < p; i++)
202 |             {
203 |                 X.col(i).array() *=  sqrt_wts.array();
204 |                 scaleX[i] = sd_n(X.col(i));
205 |                 X.col(i).array() *= (1.0 / scaleX[i]);
206 |             }
207 |             break;
208 |         case 2:
209 |             for(int i = 0; i < p; i++)
210 |             {
211 |                 X.col(i).array() *=  sqrt_wts.array();
212 |                 meanX[i] = X.col(i).mean();
213 |                 X.col(i).array() -= meanX[i];
214 |             }
215 |             break;
216 |         case 3:
217 |             for(int i = 0; i < p; i++)
218 |             {
219 |                 /*meanX[i] = X.col(i).mean();
220 |                 X.col(i).array() -= meanX[i];
221 |                 scaleX[i] = X.col(i).norm() * n_invsqrt;
222 |                 X.col(i).array() /= scaleX[i];*/
223 |                 double *begin = &X(0, i);
224 |                 double *end = begin + n;
225 |                 X.col(i).array() *=  sqrt_wts.array();
226 |                 meanX[i] = X.col(i).mean();
227 |                 std::transform(begin, end, begin, std::bind2nd(std::minus<double>(), meanX[i]));
228 |                 scaleX[i] = X.col(i).norm() * n_invsqrt;
229 |                 std::transform(begin, end, begin, std::bind2nd(std::multiplies<double>(), 1.0 / scaleX[i]));
230 |             }
231 |             break;
232 |         default:
233 |             break;
234 |         }
235 |     }
236 | 
237 |     void recover(double &beta0, ArrayRef coef)
238 |     {
239 |         switch(flag)
240 |         {
241 |             case 0:
242 |                 beta0 = 0.0;
243 |                 break;
244 |             case 1:
245 |                 coef /= scaleX;
246 |                 if (!glm)
247 |                 {
248 |                     beta0 = 0.0;
249 |                     coef *= scaleY;
250 |                 }
251 |                 break;
252 |             case 2:
253 |                 if (!glm)
254 |                 {
255 |                     coef *= scaleY;
256 |                     beta0 = meanY - (coef * meanX).sum();
257 |                 } else
258 |                 {
259 |                     beta0 -= (coef * meanX).sum();
260 |                 }
261 |                 break;
262 |             case 3:
263 |                 coef /= scaleX;
264 |                 if (!glm)
265 |                 {
266 |                     coef *= scaleY;
267 |                     beta0 = meanY - (coef * meanX).sum();
268 |                 } else
269 |                 {
270 |                     beta0 -= (coef * meanX).sum();
271 |                 }
272 |                 break;
273 |             default:
274 |                 break;
275 |         }
276 |     }
277 | 
278 |     void recover(double &beta0, SparseVector &coef)
279 |     {
280 |         switch(flag)
281 |         {
282 |             case 0:
283 |                 beta0 = 0.0;
284 |                 break;
285 |             case 1:
286 |                 elementwise_quot(coef, scaleX);
287 |                 if (!glm)
288 |                 {
289 |                     coef *= scaleY;
290 |                     beta0 = 0.0;
291 |                 }
292 |                 break;
293 |             case 2:
294 |                 if (!glm)
295 |                 {
296 |                     coef *= scaleY;
297 |                     beta0 = meanY - sparse_inner_product(coef, meanX);
298 |                 } else
299 |                 {
300 |                     beta0 -= sparse_inner_product(coef, meanX);
301 |                 }
302 |                 break;
303 |             case 3:
304 |                 elementwise_quot(coef, scaleX);
305 |                 if (!glm)
306 |                 {
307 |                     coef *= scaleY;
308 |                     beta0 = meanY - sparse_inner_product(coef, meanX);
309 |                 } else
310 |                 {
311 |                     beta0 -= sparse_inner_product(coef, meanX);
312 |                 }
313 |                 break;
314 |             default:
315 |                 break;
316 |         }
317 |     }
318 | 
319 |     double get_scaleY() { return scaleY; }
320 | };
321 | 
322 | 
323 | 
324 | #endif // DATASTD_H
325 | 


--------------------------------------------------------------------------------
/src/ordinis_dense_glm.cpp:
--------------------------------------------------------------------------------
  1 | #define EIGEN_DONT_PARALLELIZE
  2 | 
  3 | #include "CoordLogisticDense.h"
  4 | #include "CoordGLMDense.h"
  5 | #include "DataStd.h"
  6 | 
  7 | 
  8 | using Eigen::MatrixXf;
  9 | using Eigen::VectorXf;
 10 | using Eigen::MatrixXd;
 11 | using Eigen::VectorXd;
 12 | using Eigen::ArrayXf;
 13 | using Eigen::ArrayXd;
 14 | using Eigen::ArrayXXf;
 15 | using Eigen::Map;
 16 | 
 17 | using Rcpp::wrap;
 18 | using Rcpp::as;
 19 | using Rcpp::List;
 20 | using Rcpp::Named;
 21 | using Rcpp::IntegerVector;
 22 | 
 23 | typedef Map<VectorXd> MapVecd;
 24 | typedef Map<Eigen::MatrixXd> MapMatd;
 25 | typedef Eigen::SparseVector<double> SpVec;
 26 | typedef Eigen::SparseMatrix<double> SpMat;
 27 | 
 28 | inline void write_beta_matrix(SpMat &betas, int col, double beta0, SpVec &coef)
 29 | {
 30 |     betas.insert(0, col) = beta0;
 31 | 
 32 |     for(SpVec::InnerIterator iter(coef); iter; ++iter)
 33 |     {
 34 |         betas.insert(iter.index() + 1, col) = iter.value();
 35 |     }
 36 | }
 37 | 
 38 | List coord_ordinis_dense_glm(Rcpp::NumericMatrix x_,
 39 |                              Rcpp::NumericVector y_,
 40 |                              Rcpp::NumericVector weights_,
 41 |                              Rcpp::NumericVector offset_,
 42 |                              Rcpp::NumericVector lambda_,
 43 |                              Rcpp::NumericVector penalty_factor_,
 44 |                              Rcpp::NumericMatrix limits_,
 45 |                              int    nlambda_,
 46 |                              double lmin_ratio_,
 47 |                              bool   standardize_,
 48 |                              bool   intercept_,
 49 |                              bool   glm_fam_,
 50 |                              List   opts_)
 51 | {
 52 | 
 53 |     const int n = x_.rows();
 54 |     const int p = x_.cols();
 55 | 
 56 |     MatrixXd datX(n, p);
 57 |     VectorXd datY(n);
 58 |     VectorXd weights(n);
 59 |     VectorXd offset(n);
 60 |     MatrixXd limits(2, p);
 61 | 
 62 |     // Copy data and convert type from double to float
 63 |     std::copy(x_.begin(), x_.end(), datX.data());
 64 |     std::copy(y_.begin(), y_.end(), datY.data());
 65 |     std::copy(weights_.begin(), weights_.end(), weights.data());
 66 |     std::copy(offset_.begin(), offset_.end(), offset.data());
 67 | 
 68 |     std::copy(limits_.begin(), limits_.end(), limits.data());
 69 | 
 70 |     //Map<VectorXd>  weights(as<Map<VectorXd> >(weights_));
 71 | 
 72 |     // In glmnet, we minimize
 73 |     //   1/(2n) * ||y - X * beta||^2 + lambda * ||beta||_1
 74 |     // which is equivalent to minimizing
 75 |     //   1/2 * ||y - X * beta||^2 + n * lambda * ||beta||_1
 76 |     ArrayXd lambda(as<ArrayXd>(lambda_));
 77 |     int nlambda = lambda.size();
 78 | 
 79 |     ArrayXd penalty_factor(as<ArrayXd>(penalty_factor_));
 80 | 
 81 | 
 82 |     List opts(opts_);
 83 |     const int maxit        = as<int>(opts["maxit"]);
 84 |     const double tol       = as<double>(opts["tol"]);
 85 |     const double alpha     = as<double>(opts["alpha"]);
 86 |     const double gamma     = as<double>(opts["gamma"]);
 87 |     const int maxit_irls   = as<int>(opts["maxit.irls"]);
 88 |     const double tol_irls  = as<double>(opts["tol.irls"]);
 89 |     const int dfmax        = as<int>(opts["dfmax"]);
 90 |     const bool standardize = standardize_;
 91 |     const bool intercept   = intercept_;
 92 |     const bool glm_fam     = glm_fam_;
 93 | 
 94 |     std::vector<std::string> penalty(as< std::vector<std::string> >(opts["penalty"]));
 95 | 
 96 |     DataStd<double> datstd(n, p, standardize, intercept, true);
 97 |     datstd.standardize(datX, datY, limits);
 98 | 
 99 | 
100 |     CoordGLMDense *solver;
101 |     CoordLogisticDense *solver_bin;
102 | 
103 |     //CoordBase<Eigen::SparseVector<double> > *solver = NULL; // obj doesn't point to anything yet
104 | 
105 |     if (glm_fam)
106 |     {
107 |         Rcpp::Function var        = as<Rcpp::Function>(opts["variance"]);
108 |         Rcpp::Function mu_eta     = as<Rcpp::Function>(opts["mu_eta"]);
109 |         Rcpp::Function linkinv    = as<Rcpp::Function>(opts["linkinv"]);
110 |         Rcpp::Function linkfun    = as<Rcpp::Function>(opts["linkfun"]);
111 |         Rcpp::Function dev_resids = as<Rcpp::Function>(opts["dev_resids"]);
112 |         Rcpp::Function valideta   = as<Rcpp::Function>(opts["valideta"]);
113 |         Rcpp::Function validmu    = as<Rcpp::Function>(opts["validmu"]);
114 | 
115 |         solver = new CoordGLMDense(datX, datY,
116 |                                    weights, offset,
117 |                                    penalty_factor,
118 |                                    limits, penalty[0],
119 |                                    var, mu_eta, linkinv, linkfun, dev_resids,
120 |                                    valideta, validmu,
121 |                                    intercept, alpha,
122 |                                    tol, maxit_irls, tol_irls);
123 | 
124 |     } else
125 |     {
126 |         solver_bin = new CoordLogisticDense(datX, datY,
127 |                                             weights, offset,
128 |                                             penalty_factor,
129 |                                             limits, penalty[0],
130 |                                             intercept, alpha,
131 |                                             tol, maxit_irls, tol_irls);
132 |     }
133 | 
134 | 
135 | 
136 |     if (nlambda < 1)
137 |     {
138 |         double lmax = 0.0;
139 |         if (glm_fam)
140 |         {
141 |             lmax = solver->get_lambda_zero();
142 |         } else
143 |         {
144 |             lmax = solver_bin->get_lambda_zero();
145 |         }
146 | 
147 |         double lmin = lmin_ratio_ * lmax;
148 |         lambda.setLinSpaced(nlambda_, std::log(lmax), std::log(lmin));
149 |         lambda = lambda.exp();
150 |         nlambda = lambda.size();
151 |     }
152 | 
153 | 
154 | 
155 | 
156 |     SpMat beta(p + 1, nlambda);
157 |     beta.reserve(Eigen::VectorXi::Constant(nlambda, std::min(n, p)));
158 | 
159 |     //MatrixXd beta(p + 1, nlambda);
160 |     //VectorXd lossvec(nlambda);
161 | 
162 |     IntegerVector niter(nlambda);
163 |     double ilambda = 0.0;
164 | 
165 |     double null_dev = 0.0;
166 |     VectorXd deviance(nlambda);
167 |     deviance.setZero();
168 | 
169 | 
170 |     int last = nlambda;
171 |     for(int i = 0; i < nlambda; i++)
172 |     {
173 | 
174 |         ilambda = lambda[i];
175 | 
176 |         if (glm_fam)
177 |         {
178 |             if(i == 0)
179 |                 solver->init(ilambda, gamma);
180 |             else
181 |                 solver->init_warm(ilambda, gamma);
182 | 
183 |             niter[i]    = solver->solve(maxit);
184 | 
185 |             SpVec res   = solver->get_beta();
186 |             int nzero   = solver->get_nzero();
187 |             deviance(i) = solver->get_dev();
188 | 
189 |             if (i == 0) null_dev = solver->get_null_dev();
190 | 
191 |             if (p >= n)
192 |             {
193 |                 if ((nzero > dfmax || deviance(i) < 0.1 * null_dev) && i > 0 )
194 |                 {
195 |                     last = i - 1;
196 |                     break;
197 |                 }
198 |             } else
199 |             {
200 |                 if ((nzero > dfmax || deviance(i) < 0.05 * null_dev) && i > 0 )
201 |                 {
202 |                     last = i - 1;
203 |                     break;
204 |                 }
205 |             }
206 | 
207 |             double beta0 = 0.0;
208 |             beta0 = solver->get_intercept();
209 | 
210 |             datstd.recover(beta0, res);
211 |             //beta(0,i) = beta0;
212 |             //beta.block(1, i, p, 1) = res;
213 |             write_beta_matrix(beta, i, beta0, res);
214 |         } else
215 |         {
216 |             if(i == 0)
217 |                 solver_bin->init(ilambda, gamma);
218 |             else
219 |                 solver_bin->init_warm(ilambda, gamma);
220 | 
221 |             niter[i]    = solver_bin->solve(maxit);
222 | 
223 |             SpVec res   = solver_bin->get_beta();
224 |             int nzero   = solver_bin->get_nzero();
225 |             deviance(i) = solver_bin->get_dev();
226 | 
227 |             if (i == 0) null_dev = solver_bin->get_null_dev();
228 | 
229 |             if (p >= n)
230 |             {
231 |                 if ((nzero > dfmax || deviance(i) < 0.1 * null_dev) && i > 0 )
232 |                 {
233 |                     last = i - 1;
234 |                     break;
235 |                 }
236 |             } else
237 |             {
238 |                 if ((nzero > dfmax || deviance(i) < 0.05 * null_dev) && i > 0 )
239 |                 {
240 |                     last = i - 1;
241 |                     break;
242 |                 }
243 |             }
244 | 
245 |             double beta0 = 0.0;
246 |             beta0 = solver_bin->get_intercept();
247 | 
248 |             datstd.recover(beta0, res);
249 |             //beta(0,i) = beta0;
250 |             //beta.block(1, i, p, 1) = res;
251 |             write_beta_matrix(beta, i, beta0, res);
252 |         }
253 |         //lossvec(i) = solver->get_loss();
254 |     }
255 | 
256 |     if (glm_fam)
257 |     {
258 |         delete solver;
259 |     } else
260 |     {
261 |         delete solver_bin;
262 |     }
263 | 
264 | 
265 |     beta.makeCompressed();
266 | 
267 |     return List::create(Named("beta")          = beta,
268 |                         Named("niter")         = niter,
269 |                         Named("lambda")        = lambda,
270 |                         //Named("loss")          = lossvec,
271 |                         Named("deviance")      = deviance,
272 |                         Named("null.deviance") = null_dev,
273 |                         Named("last")          = last);
274 | }
275 | 
276 | // [[Rcpp::export]]
277 | List coord_ordinis_dense_glm_cpp(Rcpp::NumericMatrix x,
278 |                                  Rcpp::NumericVector y,
279 |                                  Rcpp::NumericVector weights,
280 |                                  Rcpp::NumericVector offset,
281 |                                  Rcpp::NumericVector lambda,
282 |                                  Rcpp::NumericVector penalty_factor,
283 |                                  Rcpp::NumericMatrix limits,
284 |                                  int nlambda,
285 |                                  double lmin_ratio,
286 |                                  bool standardize,
287 |                                  bool intercept,
288 |                                  bool glm_fam,
289 |                                  List opts)
290 | {
291 |     return coord_ordinis_dense_glm(x, y, weights, offset,
292 |                                    lambda, penalty_factor,
293 |                                    limits,
294 |                                    nlambda,
295 |                                    lmin_ratio,
296 |                                    standardize,
297 |                                    intercept,
298 |                                    glm_fam,
299 |                                    opts);
300 | }
301 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Introduction to \`ordinis’
  3 | 
  4 | The ‘ordinis’ package provides computation for penalized regression
  5 | problems via coordinate descent. It is mostly for my own experimentation
  6 | at this stage, however it is fairly efficient and reliable.
  7 | 
  8 | Install using the **devtools** package:
  9 | 
 10 | ``` r
 11 | devtools::install_github("jaredhuling/ordinis")
 12 | ```
 13 | 
 14 | or by cloning and building
 15 | 
 16 | ## Example
 17 | 
 18 | ``` r
 19 | library(ordinis)
 20 | 
 21 | # compute the full solution path, n > p
 22 | set.seed(123)
 23 | n <- 500
 24 | p <- 50000
 25 | m <- 50
 26 | b <- matrix(c(runif(m), rep(0, p - m)))
 27 | x <- matrix(rnorm(n * p, sd = 3), n, p)
 28 | y <- drop(x %*% b) + rnorm(n)
 29 | 
 30 | mod <- ordinis(x, y, 
 31 |                penalty = "mcp",
 32 |                lower.limits = rep(0, p), # force all coefficients to be positive
 33 |                penalty.factor = c(0, 0, rep(1, p-2)), # don't penalize first two coefficients
 34 |                alpha = 0.95)  # use elastic net with alpha = 0.95
 35 | 
 36 | plot(mod)
 37 | ```
 38 | 
 39 | ![](vignettes/unnamed-chunk-1-1.png)<!-- -->
 40 | 
 41 | ``` r
 42 | ## show likelihood
 43 | logLik(mod)
 44 | ```
 45 | 
 46 |     ## 'log Lik.' -1960.4981, -1959.7942, -1958.4955, -1957.2568, -1956.0766, -1954.9533, -1953.8853, -1952.3674, -1950.1291, -1945.7116, -1939.9338, -1932.9379, -1925.4526, -1917.7379, -1909.7029, -1901.5909, -1893.7027, -1885.2011, -1874.5999, -1863.6113, -1852.3128, -1840.6926, -1828.9064, -1817.2957, -1805.9049, -1794.2657, -1782.7004, -1771.0541, -1758.8450, -1744.8511, -1730.6794, -1715.8181, -1699.7919, -1683.1596, -1666.3203, -1647.3779, -1627.8234, -1608.2236, -1588.6072, -1569.0073, -1549.4624, -1529.6515, -1508.9413, -1488.3112, -1467.8201, -1446.9552, -1426.0467, -1404.6194, -1383.3848, -1361.5585, -1340.0356, -1318.9663, -1298.7914, -1279.5254, -1259.3910, -1239.3235, -1220.1175, -1201.6675, -1183.7428, -1166.4344, -1149.8871, -1133.5086, -1116.3535, -1099.6555, -1082.5834, -1065.4142, -1048.6729, -1031.4340, -1014.8887,  -999.1320,  -984.1856,  -969.5954,  -954.8587,  -940.2463,  -925.9616,  -912.3066,  -899.2996,  -885.3278,  -871.5722,  -858.5513,  -846.3830,  -835.1985,  -824.8354,  -815.1351,  -806.1005,  -797.7647,  -789.3363,  -781.0021,  -773.5596,  -766.7392,  -760.4534,  -754.7102,  -749.4411,  -744.6367,  -740.2854,  -735.9902,  -731.7368,  -727.1736,  -722.7904,  -718.6086 (df= 3 4 4 4 4 4 4 5 7 9111213131414141619192021222222232324262628303031323434343434343535353536373738383838383840404040404040414242434344444444444546464646464747474747474747474848484848484848484950515252)
 47 | 
 48 | ``` r
 49 | ## compute AIC
 50 | AIC(mod)
 51 | ```
 52 | 
 53 |     ##   [1] 3926.996 3927.588 3924.991 3922.514 3920.153 3917.907 3915.771
 54 |     ##   [8] 3914.735 3914.258 3909.423 3901.868 3889.876 3876.905 3861.476
 55 |     ##  [15] 3847.406 3831.182 3815.405 3802.402 3787.200 3765.223 3744.626
 56 |     ##  [22] 3723.385 3701.813 3678.591 3655.810 3634.531 3611.401 3590.108
 57 |     ##  [29] 3569.690 3541.702 3517.359 3491.636 3459.584 3428.319 3396.641
 58 |     ##  [36] 3362.756 3323.647 3284.447 3245.214 3206.015 3166.925 3129.303
 59 |     ##  [43] 3087.883 3046.622 3005.640 2965.910 2926.093 2883.239 2842.770
 60 |     ##  [50] 2799.117 2756.071 2713.933 2673.583 2635.051 2598.782 2558.647
 61 |     ##  [57] 2520.235 2483.335 2447.486 2412.869 2379.774 2349.017 2316.707
 62 |     ##  [64] 2283.311 2251.167 2216.828 2185.346 2150.868 2117.777 2086.264
 63 |     ##  [71] 2056.371 2029.191 2001.717 1972.493 1943.923 1916.613 1890.599
 64 |     ##  [78] 1864.656 1837.144 1811.103 1786.766 1764.397 1743.671 1724.270
 65 |     ##  [85] 1706.201 1689.529 1674.673 1658.004 1643.119 1629.478 1616.907
 66 |     ##  [92] 1605.420 1594.882 1585.273 1576.571 1569.980 1563.474 1556.347
 67 |     ##  [99] 1549.581 1541.217
 68 | 
 69 | ``` r
 70 | ## BIC
 71 | BIC(mod)
 72 | ```
 73 | 
 74 |     ##   [1] 3939.640 3944.447 3941.849 3939.372 3937.012 3934.765 3932.629
 75 |     ##   [8] 3935.808 3943.760 3947.355 3948.228 3940.451 3931.695 3916.266
 76 |     ##  [15] 3906.410 3890.186 3874.410 3869.836 3867.277 3845.300 3828.918
 77 |     ##  [22] 3811.892 3794.534 3771.313 3748.531 3731.467 3708.337 3691.259
 78 |     ##  [29] 3679.270 3651.282 3635.368 3618.074 3586.022 3558.972 3531.508
 79 |     ##  [36] 3506.052 3466.943 3427.744 3388.511 3349.311 3310.222 3276.814
 80 |     ##  [43] 3235.394 3194.134 3153.152 3117.636 3082.034 3039.179 3002.925
 81 |     ##  [50] 2959.272 2916.226 2874.088 2833.738 2795.206 2767.366 2727.231
 82 |     ##  [57] 2688.819 2651.919 2616.070 2581.453 2548.358 2521.816 2493.721
 83 |     ##  [64] 2460.325 2432.395 2398.057 2370.789 2336.311 2303.220 2271.707
 84 |     ##  [71] 2241.814 2218.848 2195.589 2166.365 2137.795 2110.485 2084.471
 85 |     ##  [78] 2062.742 2035.231 2009.189 1984.853 1962.484 1941.757 1922.357
 86 |     ##  [85] 1904.288 1887.616 1876.974 1860.305 1845.420 1831.780 1819.208
 87 |     ##  [92] 1807.722 1797.183 1787.575 1778.872 1776.496 1774.204 1771.292
 88 |     ##  [99] 1768.741 1760.377
 89 | 
 90 | ## Performance
 91 | 
 92 | ### Lasso (linear regression)
 93 | 
 94 | ``` r
 95 | library(microbenchmark)
 96 | library(glmnet)
 97 | 
 98 | b <- matrix(c(runif(m, min = -1), rep(0, p - m)))
 99 | x <- matrix(rnorm(n * p, sd = 3), n, p)
100 | y <- drop(x %*% b) + rnorm(n)
101 | 
102 | lambdas = glmnet(x, y)$lambda
103 | 
104 | microbenchmark(
105 |     "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-10,  # thresh must be very small 
106 |                                       lambda = lambdas)},    # for comparable precision
107 |     "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 
108 |                                        tol = 1e-3)},
109 |     times = 5
110 | )
111 | ```
112 | 
113 |     ## Unit: seconds
114 |     ##            expr      min       lq     mean   median       uq      max
115 |     ##   glmnet[lasso] 3.377096 3.487334 3.543365 3.545748 3.553003 3.753645
116 |     ##  ordinis[lasso] 5.768823 5.777944 5.836068 5.799918 5.814987 6.018668
117 |     ##  neval
118 |     ##      5
119 |     ##      5
120 | 
121 | ``` r
122 | # difference of results
123 | max(abs(coef(resg) - reso$beta))
124 | ```
125 | 
126 |     ## [1] 0.0008824882
127 | 
128 | ``` r
129 | microbenchmark(
130 |     "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-15,  # thresh must be very low for comparable precision
131 |                                       lambda = lambdas)},
132 |     "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 
133 |                                             tol = 1e-3)},
134 |     times = 5
135 | )
136 | ```
137 | 
138 |     ## Unit: seconds
139 |     ##            expr      min       lq     mean   median       uq      max
140 |     ##   glmnet[lasso] 5.618403 5.748487 5.754279 5.783774 5.792731 5.827999
141 |     ##  ordinis[lasso] 5.961874 5.967261 6.039594 6.075738 6.088031 6.105065
142 |     ##  neval
143 |     ##      5
144 |     ##      5
145 | 
146 | ``` r
147 | # difference of results
148 | max(abs(coef(resg) - reso$beta))
149 | ```
150 | 
151 |     ## [1] 1.404332e-05
152 | 
153 | ### Lasso (logistic regression)
154 | 
155 | `glmnet` is clearly faster for logistic regression for the same
156 | precision
157 | 
158 | ``` r
159 | library(MASS)
160 | 
161 | set.seed(123)
162 | n <- 200
163 | p <- 10000
164 | m <- 20
165 | b <- matrix(c(runif(m, min = -0.5, max = 0.5), rep(0, p - m)))
166 | x <- matrix(rnorm(n * p, sd = 3), n, p)
167 | y <- 1 *(drop(x %*% b) + rnorm(n) > 0)
168 | 
169 | lambdas = glmnet(x, y, family = "binomial", lambda.min.ratio = 0.02)$lambda
170 | 
171 | microbenchmark(
172 |     "glmnet[lasso]" = {resg <- glmnet(x, y, family = "binomial",
173 |                                       thresh = 1e-10,  
174 |                                       lambda = lambdas)},    
175 |     "ordinis[lasso]"     = {reso <- ordinis(x, y, family = "binomial", 
176 |                                             lambda = lambdas, 
177 |                                             tol = 1e-3, tol.irls = 1e-3)},
178 |     times = 5
179 | )
180 | ```
181 | 
182 |     ## Unit: milliseconds
183 |     ##            expr       min        lq      mean   median        uq       max
184 |     ##   glmnet[lasso]  399.9576  405.9744  410.6385  407.893  410.4411  428.9268
185 |     ##  ordinis[lasso] 1166.7480 1185.0670 1193.5077 1192.116 1195.2467 1228.3609
186 |     ##  neval
187 |     ##      5
188 |     ##      5
189 | 
190 | ``` r
191 | # difference of results
192 | max(abs(coef(resg) - reso$beta))
193 | ```
194 | 
195 |     ## [1] 0.0003735867
196 | 
197 | ``` r
198 | microbenchmark(
199 |     "glmnet[lasso]" = {resg <- glmnet(x, y, family = "binomial",
200 |                                       thresh = 1e-15,  
201 |                                       lambda = lambdas)},    
202 |     "ordinis[lasso]"     = {reso <- ordinis(x, y, family = "binomial", 
203 |                                             lambda = lambdas, 
204 |                                             tol = 1e-3, tol.irls = 1e-3)},
205 |     times = 5
206 | )
207 | ```
208 | 
209 |     ## Unit: milliseconds
210 |     ##            expr       min        lq      mean   median        uq       max
211 |     ##   glmnet[lasso]  676.8328  684.9007  697.8913  686.202  709.3506  732.1704
212 |     ##  ordinis[lasso] 1175.4595 1178.5731 1209.1550 1197.182 1215.1998 1279.3604
213 |     ##  neval
214 |     ##      5
215 |     ##      5
216 | 
217 | ``` r
218 | # difference of results
219 | max(abs(coef(resg) - reso$beta))
220 | ```
221 | 
222 |     ## [1] 2.525457e-05
223 | 
224 | ### Lasso (linear regression, ill-conditioned)
225 | 
226 | ``` r
227 | library(MASS)
228 | 
229 | set.seed(123)
230 | n <- 500
231 | p <- 1000
232 | m <- 50
233 | b <- matrix(c(runif(m, min = -1), rep(0, p - m)))
234 | sig <- matrix(0.5, ncol=p,nrow=p); diag(sig) <- 1
235 | x <- mvrnorm(n, mu=rep(0, p), Sigma = sig)
236 | y <- drop(x %*% b) + rnorm(n)
237 | 
238 | lambdas = glmnet(x, y)$lambda[1:65]
239 | 
240 | microbenchmark(
241 |     "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-9,  # thresh must be very small 
242 |                                       lambda = lambdas)},    # for comparable precision
243 |     "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 
244 |                                        tol = 1e-5)},
245 |     times = 5
246 | )
247 | ```
248 | 
249 |     ## Unit: milliseconds
250 |     ##            expr      min       lq     mean   median       uq      max
251 |     ##   glmnet[lasso] 158.4697 158.7652 163.5977 159.7300 163.8546 177.1690
252 |     ##  ordinis[lasso] 312.3342 316.8890 317.9636 317.8517 320.2239 322.5189
253 |     ##  neval
254 |     ##      5
255 |     ##      5
256 | 
257 | ``` r
258 | # difference of results
259 | max(abs(coef(resg) - reso$beta))
260 | ```
261 | 
262 |     ## [1] 0.000262974
263 | 
264 | ``` r
265 | microbenchmark(
266 |     "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-11,  # thresh must be very low for comparable precision
267 |                                       lambda = lambdas)},
268 |     "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 
269 |                                             tol = 1e-5)},
270 |     times = 5
271 | )
272 | ```
273 | 
274 |     ## Unit: milliseconds
275 |     ##            expr      min       lq     mean   median       uq      max
276 |     ##   glmnet[lasso] 324.0667 327.7087 340.9450 348.1692 348.4202 356.3602
277 |     ##  ordinis[lasso] 313.3590 314.2454 324.0695 320.6374 328.1055 344.0002
278 |     ##  neval
279 |     ##      5
280 |     ##      5
281 | 
282 | ``` r
283 | # difference of results
284 | max(abs(coef(resg) - reso$beta))
285 | ```
286 | 
287 |     ## [1] 2.454023e-05
288 | 
289 | ### Validity of solutions with various bells and whistles
290 | 
291 | Due to internal differences in standardization, we now compare with
292 | `glmnet` when using observation weights, penalty scaling factors, and
293 | parameter box constraints
294 | 
295 | ``` r
296 | set.seed(123)
297 | n = 200
298 | p = 1000
299 | m <- 15
300 | b = c(runif(m, min = -0.5, max = 0.5), rep(0, p - m))
301 | x = (matrix(rnorm(n * p, sd = 3), n, p))
302 | y = drop(x %*% b) + rnorm(n)
303 | y2 <- 1 * (y > rnorm(n, mean = 0.5, sd = 3))
304 | 
305 | 
306 | wts <- runif(nrow(x))
307 | wts <- wts / mean(wts) # re-scale like glmnet does, so we can compare
308 | 
309 | penalty.factor <- rbinom(ncol(x), 1, 0.99) * runif(ncol(x)) * 5
310 | penalty.factor <- (penalty.factor / sum(penalty.factor)) * ncol(x)  # re-scale like glmnet does, so we can compare
311 | 
312 | system.time(resb <- ordinis(x, y2, family = "binomial", tol = 1e-7, tol.irls = 1e-5,
313 |                             penalty = "lasso",
314 |                             alpha = 0.5,  #elastic net term
315 |                             lower.limits = 0, upper.limits = 0.02, # box constraints on all parameters
316 |                             standardize = FALSE, intercept = TRUE,
317 |                             weights = wts, # observation weights
318 |                             penalty.factor = penalty.factor)) # penalty scaling factors
319 | ```
320 | 
321 |     ##    user  system elapsed 
322 |     ##   0.069   0.000   0.069
323 | 
324 | ``` r
325 | system.time(resg <- glmnet(x,y2, family = "binomial",
326 |                            lambda = resb$lambda,
327 |                            alpha = 0.5, #elastic net term
328 |                            weights = wts, # observation weights
329 |                            penalty.factor = penalty.factor, # penalty scaling factors
330 |                            lower.limits = 0, upper.limits = 0.02, # box constraints on all parameters
331 |                            standardize = FALSE, intercept = TRUE,
332 |                            thresh = 1e-16))
333 | ```
334 | 
335 |     ##    user  system elapsed 
336 |     ##   0.041   0.001   0.042
337 | 
338 | ``` r
339 | ## compare solutions
340 | max(abs(resb$beta[-1,] - resg$beta))
341 | ```
342 | 
343 |     ## [1] 3.823445e-09
344 | 
345 | ``` r
346 | # now with no box constraints
347 | system.time(resb <- ordinis(x, y2, family = "binomial", tol = 1e-7, tol.irls = 1e-5,
348 |                             penalty = "lasso",
349 |                             alpha = 0.5,  #elastic net term
350 |                             standardize = FALSE, intercept = TRUE,
351 |                             weights = wts, # observation weights
352 |                             penalty.factor = penalty.factor)) # penalty scaling factors
353 | ```
354 | 
355 |     ##    user  system elapsed 
356 |     ##   0.087   0.001   0.087
357 | 
358 | ``` r
359 | system.time(resg <- glmnet(x,y2, family = "binomial",
360 |                            lambda = resb$lambda,
361 |                            alpha = 0.5, #elastic net term
362 |                            weights = wts, # observation weights
363 |                            penalty.factor = penalty.factor, # penalty scaling factors
364 |                            standardize = FALSE, intercept = TRUE,
365 |                            thresh = 1e-16))
366 | ```
367 | 
368 |     ##    user  system elapsed 
369 |     ##   0.061   0.001   0.064
370 | 
371 | ``` r
372 | ## compare solutions
373 | max(abs(resb$beta[-1,] - resg$beta))
374 | ```
375 | 
376 |     ## [1] 6.005807e-09
377 | 
378 | ### A Note on the Elastic Net and linear models
379 | 
380 | Due to how scaling of the response is handled different in glmnet, it
381 | yields slightly different solutions than both ordinis and ncvreg for
382 | Gaussian models with a ridge penalty term
383 | 
384 | ``` r
385 | library(ncvreg)
386 | 
387 | ## I'm setting all methods to have high precision just so solutions are comparable.
388 | ## differences in computation time may be due in part to the arbitrariness of the 
389 | ## particular precisions chosen
390 | system.time(resg <- glmnet(x, y, family = "gaussian", alpha = 0.25, 
391 |                            thresh = 1e-15))
392 | ```
393 | 
394 |     ##    user  system elapsed 
395 |     ##   0.481   0.006   0.492
396 | 
397 | ``` r
398 | system.time(res <- ordinis(x, y, family = "gaussian", penalty = "lasso", alpha = 0.25,
399 |                             tol = 1e-10, lambda = resg$lambda))
400 | ```
401 | 
402 |     ##    user  system elapsed 
403 |     ##   0.363   0.002   0.366
404 | 
405 | ``` r
406 | system.time(resn <- ncvreg(x, y, family="gaussian", penalty = "lasso",
407 |                            lambda = resg$lambda, alpha = 0.25, max.iter = 100000,
408 |                            eps = 1e-10))
409 | ```
410 | 
411 |     ##    user  system elapsed 
412 |     ##   0.507   0.003   0.511
413 | 
414 | ``` r
415 | resgg <- res; resgg$beta[-1,] <- resg$beta
416 | 
417 | # compare ordinis and glmnet
418 | max(abs(res$beta[-1,] - resg$beta))
419 | ```
420 | 
421 |     ## [1] 0.1123304
422 | 
423 | ``` r
424 | # compare ordinis and ncvreg
425 | max(abs(res$beta - resn$beta))
426 | ```
427 | 
428 |     ## [1] 6.948169e-09
429 | 
430 | ``` r
431 | # compare ncvreg and glmnet
432 | max(abs(resn$beta[-1,] - resg$beta))
433 | ```
434 | 
435 |     ## [1] 0.1123304
436 | 


--------------------------------------------------------------------------------
/R/ordinis_methods.R:
--------------------------------------------------------------------------------
  1 | ## the code here is largely based on the code
  2 | ## from the glmnet package (no reason to reinvent the wheel)
  3 | 
  4 | #' Prediction method for coord lasso fitted objects
  5 | #'
  6 | #' @param object fitted "ordinis" model object
  7 | #' @param newx Matrix of new values for \code{x} at which predictions are to be made. Must be a matrix; can be sparse as in the
  8 | #' \code{CsparseMatrix} objects of the \pkg{Matrix} package.
  9 | #' This argument is not used for \code{type=c("coefficients","nonzero")}
 10 | #' @param s Value(s) of the penalty parameter lambda at which predictions are required. Default is the entire sequence used to create
 11 | #' the model.
 12 | #' @param type Type of prediction required. \code{type = "link"} gives the linear predictors for the \code{"binomial"} model; for \code{"gaussian"} models it gives the fitted values.
 13 | #' \code{type = "response"} gives the fitted probabilities for \code{"binomial"}. \code{type = "coefficients"} computes the coefficients at the requested values for \code{s}.
 14 | #' \code{type = "class"} applies only to \code{"binomial"} and produces the class label corresponding to the maximum probability.
 15 | #' @param ... not used
 16 | #' @importFrom graphics abline abline axis matplot points segments
 17 | #' @importFrom methods as
 18 | #' @importFrom stats approx predict quantile runif weighted.mean
 19 | #' @return An object depending on the type argument
 20 | #' @method predict ordinis
 21 | #' @export
 22 | #' @examples
 23 | #' set.seed(123)
 24 | #' n.obs <- 1e4
 25 | #' n.vars <- 100
 26 | #' n.obs.test <- 1e3
 27 | #'
 28 | #' true.beta <- c(runif(15, -0.5, 0.5), rep(0, n.vars - 15))
 29 | #'
 30 | #' x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)
 31 | #' y <- rnorm(n.obs, sd = 3) + x %*% true.beta
 32 | #' x.test <- matrix(rnorm(n.obs.test * n.vars), n.obs.test, n.vars)
 33 | #' y.test <- rnorm(n.obs.test, sd = 3) + x.test %*% true.beta
 34 | #'
 35 | #' fit <- ordinis(x = x, y = y, nlambda = 10)
 36 | #'
 37 | #' preds.lasso <- predict(fit, newx = x.test, type = "response")
 38 | #'
 39 | #' apply(preds.lasso, 2, function(x) mean((y.test - x) ^ 2))
 40 | #'
 41 | predict.ordinis <- function(object, newx, s = NULL,
 42 |                             type = c("link",
 43 |                                      "response",
 44 |                                      "coefficients",
 45 |                                      "nonzero",
 46 |                                      "class"), ...)
 47 | {
 48 |     type <- match.arg(type)
 49 | 
 50 |     if(missing(newx)){
 51 |         if(!match(type, c("coefficients", "nonzero"), FALSE))stop("A value for 'newx' must be supplied")
 52 |     }
 53 |     nbeta <- object$beta
 54 | 
 55 |     if(!is.null(s)){
 56 |         #vnames=dimnames(nbeta)[[1]]
 57 |         lambda <- object$lambda
 58 |         lamlist <- lambda.interp(object$lambda,s)
 59 |         nbeta <- nbeta[,lamlist$left,drop=FALSE]*lamlist$frac +nbeta[,lamlist$right,drop=FALSE]*(1-lamlist$frac)
 60 |         #dimnames(nbeta)=list(vnames,paste(seq(along=s)))
 61 |     }
 62 |     if (type == "coefficients") return(nbeta)
 63 |     if (type == "nonzero") {
 64 |         newbeta <- abs(as.matrix(object$beta)) > 0
 65 |         index <- 1:(dim(newbeta)[1])
 66 |         nzel <- function(x, index) if(any(x)) index[x] else NULL
 67 |         betaList <- apply(newbeta, 2, nzel, index)
 68 |         return(betaList)
 69 |     }
 70 | 
 71 |     newx <- as.matrix(newx)
 72 |     # add constant column if needed
 73 |     if (ncol(newx) < nrow(nbeta))
 74 |         newx <- cbind(rep(1, nrow(newx)), newx)
 75 | 
 76 |     as.matrix(newx %*% nbeta)
 77 | }
 78 | 
 79 | 
 80 | #' @export
 81 | predict.cdgaussian <- function(object, newx, s = NULL,
 82 |                                type = c("link",
 83 |                                         "response",
 84 |                                         "coefficients",
 85 |                                         "nonzero"), ...)
 86 | {
 87 |     NextMethod("predict")
 88 | }
 89 | 
 90 | 
 91 | #' @export
 92 | predict.cdbinomial <- function(object, newx, s=NULL,
 93 |                                type=c("link",
 94 |                                       "response",
 95 |                                       "coefficients",
 96 |                                       "class",
 97 |                                       "nonzero"), ...)
 98 | {
 99 |     type <- match.arg(type)
100 |     nfit <- NextMethod("predict")
101 |     switch(type,
102 |            response={
103 |                prob=exp(-nfit)
104 |                1 / (1 + prob)
105 |            },
106 |            class={
107 |                cnum=ifelse(nfit > 0, 2, 1)
108 |                clet=object$classnames[cnum]
109 |                if(is.matrix(cnum))clet=array(clet,dim(cnum),dimnames(cnum))
110 |                clet
111 |            },
112 |            nfit
113 |     )
114 | }
115 | 
116 | 
117 | 
118 | 
119 | cv.cdbinomial=function(outlist,lambda,x,y,foldid,type.measure,grouped,keep=FALSE){
120 |     typenames=c(mse="Mean-Squared Error",mae="Mean Absolute Error",deviance="Binomial Deviance",auc="AUC",class="Misclassification Error")
121 |     if(type.measure=="default")type.measure="deviance"
122 |     if(!match(type.measure,c("mse","mae","deviance","auc","class"),FALSE)){
123 |         warning("Only 'deviance', 'class', 'auc', 'mse' or 'mae'  available for binomial models; 'deviance' used")
124 |         type.measure="deviance"
125 |     }
126 | 
127 |     ###These are hard coded in the Fortran, so we do that here too
128 |     prob_min=1e-5
129 |     prob_max=1-prob_min
130 |     ###Turn y into a matrix
131 |     nc = dim(y)
132 |     if (is.null(nc)) {
133 |         y = as.factor(y)
134 |         ntab = table(y)
135 |         nc = as.integer(length(ntab))
136 |         y = diag(nc)[as.numeric(y), ]
137 |     }
138 |     N=nrow(y)
139 |     nfolds=max(foldid)
140 |     if( (N/nfolds <10)&&type.measure=="auc"){
141 |         warning("Too few (< 10) observations per fold for type.measure='auc' in cv.lognet; changed to type.measure='deviance'. Alternatively, use smaller value for nfolds",call.=FALSE)
142 |         type.measure="deviance"
143 |     }
144 |     if( (N/nfolds <3)&&grouped){
145 |         warning("Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold",call.=FALSE)
146 |         grouped=FALSE
147 |     }
148 | 
149 | 
150 |     #if(!is.null(offset)){
151 |     #  is.offset=TRUE
152 |     #  offset=drop(offset)
153 |     #}else is.offset=FALSE
154 |     predmat=matrix(NA,nrow(y),length(lambda))
155 |     nlams=double(nfolds)
156 |     for(i in seq(nfolds)){
157 |         which=foldid==i
158 |         fitobj=outlist[[i]]
159 |         #if(is.offset)off_sub=offset[which]
160 |         preds=predict(fitobj, newx = x[which,,drop=FALSE], type="response")
161 |         nlami=length(outlist[[i]]$lam)
162 |         predmat[which,seq(nlami)]=preds
163 |         nlams[i]=nlami
164 |     }
165 |     ###If auc we behave differently
166 |     if(type.measure=="auc"){
167 |         cvraw=matrix(NA,nfolds,length(lambda))
168 |         good=matrix(0,nfolds,length(lambda))
169 |         for(i in seq(nfolds)){
170 |             good[i,seq(nlams[i])]=1
171 |             which=foldid==i
172 |             for(j in seq(nlams[i])){
173 |                 #cvraw[i,j]=auc.mat(y[which,],predmat[which,j],weights[which])
174 |                 cvraw[i,j]=auc.mat(y[which,],predmat[which,j], rep(1, sum(which)))
175 |             }
176 |         }
177 |         N=apply(good,2,sum)
178 |         #weights=tapply(weights,foldid,sum)
179 |     }
180 |     else{
181 |         ##extract weights and normalize to sum to 1
182 |         #ywt=apply(y,1,sum)
183 |         #y=y/ywt
184 |         #weights=weights*ywt
185 | 
186 |         N=nrow(y) - apply(is.na(predmat),2,sum)
187 |         cvraw=switch(type.measure,
188 |                      "mse"=(y[,1]-(1-predmat))^2 +(y[,2]-predmat)^2,
189 |                      "mae"=abs(y[,1]-(1-predmat)) +abs(y[,2]-predmat),
190 |                      "deviance"= {
191 |                          predmat=pmin(pmax(predmat,prob_min),prob_max)
192 |                          lp=y[,1]*log(1-predmat)+y[,2]*log(predmat)
193 |                          ly=log(y)
194 |                          ly[y==0]=0
195 |                          ly=drop((y*ly)%*%c(1,1))
196 |                          2*(ly-lp)
197 |                      },
198 |                      "class"=y[,1]*(predmat>.5) +y[,2]*(predmat<=.5)
199 |         )
200 |         if(grouped){
201 |             cvob=cvcompute(cvraw,rep(1, nrow(y)),foldid,nlams)
202 |             cvraw=cvob$cvraw;weights=cvob$weights;N=cvob$N
203 |         }
204 |     }
205 |     #cvm=apply(cvraw,2,weighted.mean,w=weights,na.rm=TRUE)
206 |     cvm=apply(cvraw,2,mean,w=weights,na.rm=TRUE)
207 |     #cvsd=sqrt(apply(scale(cvraw,cvm,FALSE)^2,2,weighted.mean,w=rep(1, nrow(y)),na.rm=TRUE)/(N-1))
208 |     cvsd=sqrt(apply(scale(cvraw,cvm,FALSE)^2,2,mean,na.rm=TRUE)/(N-1))
209 |     out=list(cvm=cvm,cvsd=cvsd,name=typenames[type.measure])
210 |     if(keep)out$fit.preval=predmat
211 |     out
212 | 
213 | }
214 | 
215 | cv.cdgaussian <- function(outlist,lambda,x,y,foldid,type.measure,grouped,keep=FALSE)
216 | {
217 |     typenames=c(deviance="Mean-Squared Error",mse="Mean-Squared Error",mae="Mean Absolute Error",auc="Area under (ROC) Curve")
218 |     if(type.measure=="default")type.measure="mse"
219 |     if(!match(type.measure,c("mse","mae","deviance","auc"),FALSE)){
220 |         warning("Only 'mse', 'deviance' or 'mae'  available for Gaussian models; 'mse' used")
221 |         type.measure="mse"
222 |     }
223 |     #if(!is.null(offset))y=y-drop(offset)
224 |     predmat=matrix(NA,length(y),length(lambda))
225 |     nfolds=max(foldid)
226 |     nlams=double(nfolds)
227 |     for(i in seq(nfolds)){
228 |         which=foldid==i
229 |         fitobj=outlist[[i]]
230 |         #fitobj$offset=FALSE
231 |         preds=predict(fitobj,x[which,,drop=FALSE], type="response")
232 |         nlami=length(outlist[[i]]$lambda)
233 |         predmat[which,seq(nlami)]=preds
234 |         nlams[i]=nlami
235 |     }
236 | 
237 |     if (type.measure == "auc")
238 |     {
239 |         cvraw = matrix(NA, nfolds, length(lambda))
240 |         good = matrix(0, nfolds, length(lambda))
241 |         for (i in seq(nfolds)) {
242 |             good[i, seq(nlams[i])] = 1
243 |             which = foldid == i
244 |             for (j in seq(nlams[i])) {
245 |                 #cvraw[i, j] = auc.mat(y[which, ], predmat[which,
246 |                 #                                          j], weights[which])
247 |                 cvraw[i,j] <- auc(y[which],predmat[which,j], rep(1, sum(which)))
248 |             }
249 |         }
250 |         N = apply(good, 2, sum)
251 |     } else
252 |     {
253 |         N=length(y) - apply(is.na(predmat),2,sum)
254 |         cvraw=switch(type.measure,
255 |                      "mse"=(y-predmat)^2,
256 |                      "deviance"=(y-predmat)^2,
257 |                      "mae"=abs(y-predmat)
258 |         )
259 |         if( (length(y)/nfolds <3)&&grouped){
260 |             warning("Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold",call.=FALSE)
261 |             grouped=FALSE
262 |         }
263 |         if(grouped){
264 |             cvob=cvcompute(cvraw,rep(1, length(y)),foldid,nlams)
265 |             cvraw=cvob$cvraw;weights=cvob$weights;N=cvob$N
266 |         }
267 |     }
268 | 
269 | 
270 |     #cvm=apply(cvraw,2,weighted.mean,w=weights,na.rm=TRUE)
271 |     cvm=apply(cvraw,2,mean,na.rm=TRUE)
272 |     #cvsd=sqrt(apply(scale(cvraw,cvm,FALSE)^2,2,weighted.mean,w=weights,na.rm=TRUE)/(N-1))
273 |     cvsd=sqrt(apply(scale(cvraw,cvm,FALSE)^2,2,mean,na.rm=TRUE)/(N-1))
274 |     out=list(cvm=cvm,cvsd=cvsd,name=typenames[type.measure])
275 |     if(keep)out$fit.preval=predmat
276 |     out
277 | }
278 | 
279 | 
280 | 
281 | 
282 | 
283 | 
284 | 
285 | #' Plot method for ordinis fitted objects
286 | #'
287 | #' @param x fitted "ordinis" model object or fitted "cv.ordinis" model object
288 | #' @param xvar What is on the X-axis. \code{"penalty"} plots against the penalty value applied to the coefficients, \code{"lambda"} against the log-lambda sequence
289 | #' @param labsize size of labels for variable names. If labsize = 0, then no variable names will be plotted
290 | #' @param xlab label for x-axis
291 | #' @param ylab label for y-axis
292 | #' @param main main title for plot
293 | #' @param xlim numeric vectors of length 2, giving the \code{x} and \code{y} coordinates ranges.
294 | #' @param n.print scalar integer for the number of times along the regularization path to print the number
295 | #' of nonzero coefficients. If set to a negative value, the number of nonzero coefficients will not be printed.
296 | #' @param ... other graphical parameters for the plot
297 | #' @rdname plot
298 | #' @export
299 | #' @examples
300 | #' set.seed(123)
301 | #' n.obs <- 100
302 | #' n.vars <- 1000
303 | #'
304 | #' true.beta <- c(runif(5, 0.1, 1) * (2 * rbinom(5, 1, 0.5) - 1), rep(0, n.vars - 5))
305 | #'
306 | #' x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)
307 | #' y <- rnorm(n.obs, sd = 2) + x %*% true.beta
308 | #'
309 | #' fit <- ordinis(x = x, y = y, penalty = c("mcp"))
310 | #'
311 | #' plot(fit)
312 | #'
313 | plot.ordinis <- function(x,
314 |                          xvar = c("loglambda", "lambda", "norm"),
315 |                          labsize = 0.6,
316 |                          xlab = iname, ylab = NULL,
317 |                          main = x$penalty,
318 |                          xlim = NULL,
319 |                          n.print = 10L,
320 |                          ...)
321 | {
322 | 
323 |     xvar <- match.arg(xvar)
324 |     nbeta <- as.matrix(x$beta[-1,]) ## remove intercept
325 |     remove <- apply(nbeta, 1, function(betas) all(betas == 0) )
326 | 
327 |     if (is.null(xlim))
328 |     {
329 |         switch(xvar,
330 |                "norm" = {
331 |                    index    <- apply(abs(nbeta), 2, sum)
332 |                    iname    <- expression(L[1] * " Norm")
333 |                    xlim     <- range(index)
334 |                    approx.f <- 1
335 |                },
336 |                "lambda" = {
337 |                    index    <- x$lambda
338 |                    iname    <- expression(lambda)
339 |                    xlim     <- rev(range(index))
340 |                    approx.f <- 0
341 |                },
342 |                "loglambda" = {
343 |                    index    <- log(x$lambda)
344 |                    iname    <- expression(log(lambda))
345 |                    xlim     <- rev(range(index))
346 |                    approx.f <- 1
347 |                }
348 |         )
349 |     } else
350 |     {
351 |         switch(xvar,
352 |                "norm" = {
353 |                    index    <- apply(abs(nbeta), 2, sum)
354 |                    iname    <- expression(L[1] * " Norm")
355 |                    approx.f <- 1
356 |                },
357 |                "lambda" = {
358 |                    index    <- x$lambda
359 |                    iname    <- expression(lambda)
360 |                    approx.f <- 0
361 |                },
362 |                "loglambda" = {
363 |                    index    <- log(x$lambda)
364 |                    iname    <- expression(log(lambda))
365 |                    approx.f <- 1
366 |                }
367 |         )
368 |     }
369 | 
370 |     if (all(remove)) stop("All beta estimates are zero for all values of lambda. No plot returned.")
371 | 
372 | 
373 |     cols <- rainbow(sum(!remove))
374 | 
375 |     ## create sequence that grabs one of ROYGBIV and repeats with
376 |     ## an increment up the rainbow spectrum with each step from 1:7 on ROYGBIV
377 |     n.cols <- 7L
378 |     scramble.seq <- rep(((1:n.cols) - 1) * (length(cols) %/% (n.cols)) + 1, length(cols) %/% n.cols)[1:length(cols)] +
379 |         (((0:(length(cols)-1)) %/% n.cols))
380 | 
381 |     scramble.seq[is.na(scramble.seq)] <- which(!(1:length(cols) %in% scramble.seq))
382 |     colseq <- cols[scramble.seq]
383 | 
384 | 
385 |     matplot(index, t(nbeta[!remove,,drop=FALSE]),
386 |             lty = 1,
387 |             xlab = xlab,
388 |             ylab = "",
389 |             col = colseq,
390 |             xlim = xlim,
391 |             type = 'l', ...)
392 | 
393 |     if (is.null(ylab))
394 |     {
395 |         mtext(expression(hat(beta)), side = 2, cex = par("cex"), line = 3, las = 1)
396 |     } else
397 |     {
398 |         mtext(ylab, side = 2, cex = par("cex"), line = 3)
399 |         ylab = ""
400 |     }
401 | 
402 |     if (n.print >= 0)
403 |     {
404 |         atdf <- pretty(index, n = n.print)
405 |         plotnz <- approx(x = index, y = x$nzero, xout = atdf, rule = 2, method = "constant", f = approx.f)$y
406 |         axis(side=3, at = atdf, labels = plotnz, tick=FALSE, line=0, ...)
407 |     }
408 | 
409 |     title(main, line = 2.5, ...)
410 | 
411 | 
412 | 
413 |     # Adjust the margins to make sure the labels fit
414 |     labwidth <- ifelse(labsize > 0, max(strwidth(rownames(nbeta[!remove,]), "inches", labsize)), 0)
415 |     margins <- par("mai")
416 |     par("mai" = c(margins[1:3], max(margins[4], labwidth*1.4)))
417 |     if ( labsize > 0 && !is.null(rownames(nbeta)) )
418 |     {
419 |         take <- which(!remove)
420 |         for (i in 1:sum(!remove)) {
421 |             j <- take[i]
422 |             axis(4, at = nbeta[j, ncol(nbeta)], labels = rownames(nbeta)[j],
423 |                  las=1, cex.axis=labsize, col.axis = colseq[i],
424 |                  lty = (i - 1) %% 5 + 1, col = colseq[i], ...)
425 |         }
426 |     }
427 |     par("mai" = margins)
428 | }
429 | 
430 | 
431 | #' Prediction function for fitted cross validation ordinis objects
432 | #'
433 | #' @param object fitted \code{"cv.ordinis"} model object
434 | #' @param newx Matrix of new values for \code{x} at which predictions are to be made. Must be a matrix; can be sparse as in the
435 | #' \code{CsparseMatrix} objects of the \pkg{Matrix} package
436 | #' This argument is not used for \code{type = c("coefficients","nonzero")}
437 | #' @param s Value(s) of the penalty parameter lambda at which predictions are required. Default is the entire sequence used to create
438 | #' the model. For \code{predict.cv.ordinis()}, can also specify \code{"lambda.1se"} or \code{"lambda.min"} for best lambdas estimated by cross validation
439 | #' @param ... used to pass the other arguments for predict.ordinis
440 | #' @return An object depending on the type argument
441 | #' @method predict cv.ordinis
442 | #' @export
443 | #' @examples
444 | #' set.seed(123)
445 | #' n.obs <- 1e4
446 | #' n.vars <- 100
447 | #' n.obs.test <- 1e3
448 | #'
449 | #' true.beta <- c(runif(15, -0.5, 0.5), rep(0, n.vars - 15))
450 | #'
451 | #' x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)
452 | #' y <- rnorm(n.obs, sd = 3) + x %*% true.beta
453 | #' x.test <- matrix(rnorm(n.obs.test * n.vars), n.obs.test, n.vars)
454 | #' y.test <- rnorm(n.obs.test, sd = 3) + x.test %*% true.beta
455 | #'
456 | #' fit <- cv.ordinis(x = x, y = y,
457 | #'                   gamma = 1.4,
458 | #'                   nlambda = 10)
459 | #'
460 | #'
461 | #' preds.best <- predict(fit, newx = x.test, type = "response")
462 | #'
463 | #' apply(preds.best, 2, function(x) mean((y.test - x) ^ 2))
464 | #'
465 | predict.cv.ordinis <- function(object, newx,
466 |                             s=c("lambda.min", "lambda.1se"), ...)
467 | {
468 |     if(is.numeric(s))lambda=s
469 |     else
470 |         if(is.character(s)){
471 |             s=match.arg(s)
472 |             lambda=object[[s]]
473 |         }
474 | 
475 |     else stop("Invalid form for s")
476 |     predict(object$ordinis.fit, newx, s=lambda, ...)
477 | }
478 | 
479 | 
480 | 
481 | #' Plot method for fitted two mountains cv objects
482 | #'
483 | #' @param sign.lambda Either plot against log(lambda) (default) or its negative if \code{sign.lambda = -1}.
484 | #' @rdname plot
485 | #' @method plot cv.ordinis
486 | #' @export
487 | #' @examples
488 | #' set.seed(123)
489 | #' n.obs <- 100
490 | #' n.vars <- 200
491 | #'
492 | #' true.beta <- c(runif(15, -0.5, 0.5), rep(0, n.vars - 15))
493 | #'
494 | #' x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)
495 | #' y <- rnorm(n.obs, sd = 3) + x %*% true.beta
496 | #'
497 | #' fit <- cv.ordinis(x = x, y = y, gamma = 1.4)
498 | #'
499 | #' plot(fit)
500 | #'
501 | plot.cv.ordinis <- function(x, sign.lambda = 1, ...)
502 | {
503 |     # modified from glmnet
504 |     object = x
505 | 
506 |     main.txt <- ""
507 | 
508 |     xlab=expression(log(lambda))
509 |     if(sign.lambda<0)xlab=paste("-",xlab,sep="")
510 |     plot.args=list(x    = sign.lambda * log(object$lambda),
511 |                    y    = object$cvm,
512 |                    ylim = range(object$cvup, object$cvlo),
513 |                    xlab = xlab,
514 |                    ylab = object$name,
515 |                    type = "n")
516 |     new.args=list(...)
517 |     if(length(new.args))plot.args[names(new.args)]=new.args
518 |     do.call("plot", plot.args)
519 |     error.bars(sign.lambda * log(object$lambda),
520 |                object$cvup,
521 |                object$cvlo, width = 0.005)
522 |     points(sign.lambda*log(object$lambda), object$cvm, pch=20, col="dodgerblue")
523 |     axis(side=3,at=sign.lambda*log(object$lambda),labels = paste(object$nzero), tick=FALSE, line=0, ...)
524 |     abline(v = sign.lambda * log(object$lambda.min), lty=2, lwd = 2, col = "firebrick1")
525 |     abline(v = sign.lambda * log(object$lambda.1se), lty=2, lwd = 2, col = "firebrick1")
526 |     title(main.txt, line = 2.5, ...)
527 |     invisible()
528 | }
529 | 
530 | 
531 | 
532 | #' log likelihood function for fitted ordinis objects
533 | #'
534 | #' @param object fitted "ordinis" model object.
535 | #' @param REML an optional logical value. If \code{TRUE} the
536 | #' restricted log-likelihood is returned, else, if \code{FALSE},
537 | #' the log-likelihood is returned. Defaults to \code{FALSE}.
538 | #' @param ... not used
539 | #' @rdname logLik
540 | #' @export
541 | #' @examples
542 | #' set.seed(123)
543 | #' n.obs <- 200
544 | #' n.vars <- 500
545 | #'
546 | #' true.beta <- c(runif(15, -0.25, 0.25), rep(0, n.vars - 15))
547 | #' x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars)
548 | #' y <- rnorm(n.obs, sd = 3) + x %*% true.beta
549 | #'
550 | #' fit <- ordinis(x = x, y = y)
551 | #'
552 | #' logLik(fit)
553 | #'
554 | #'
555 | logLik.ordinis <- function(object, REML = FALSE, ...) {
556 |     # taken from ncvreg. Thanks to Patrick Breheny.
557 |     n  <- as.numeric(object$nobs)
558 |     df <- object$nzero + object$intercept
559 | 
560 |     if (object$family == "gaussian")
561 |     {
562 |         if (REML)
563 |         {
564 |             rdf <- n - df
565 |         } else
566 |         {
567 |             rdf <- n
568 |         }
569 | 
570 |         resid.ss <- object$loss
571 |         logL <- -0.5 * n * (log(2 * pi) - log(rdf) + log(resid.ss)) - 0.5 * rdf
572 |     } else if (object$family == "binomial")
573 |     {
574 |         logL <- -1 * object$deviance
575 |     } else if (object$family == "poisson")
576 |     {
577 |         stop("poisson not complete yet")
578 |         #y <- object$y
579 |         #ind <- y != 0
580 |         #logL <- -object$loss + sum(y[ind] * log(y[ind])) - sum(y) - sum(lfactorial(y))
581 |     } else if (object$family == "coxph")
582 |     {
583 |         logL <- -1e99
584 |     }
585 | 
586 |     attr(logL,"df")   <- df
587 |     attr(logL,"nobs") <- n
588 |     class(logL) <- "logLik"
589 |     logL
590 | }
591 | 


--------------------------------------------------------------------------------
/src/CoordGaussianDense.h:
--------------------------------------------------------------------------------
  1 | #ifndef COORDGAUSSIANDENSE_H
  2 | #define COORDGAUSSIANDENSE_H
  3 | 
  4 | #include "CoordBase.h"
  5 | #include "utils.h"
  6 | 
  7 | // minimize  1/2 * ||y - X * beta||^2 + lambda * ||beta||_1
  8 | //
  9 | // In ADMM form,
 10 | //   minimize f(x) + g(z)
 11 | //   s.t. x - z = 0
 12 | //
 13 | // x => beta
 14 | // z => -X * beta
 15 | // A => X
 16 | // b => y
 17 | // f(x) => 1/2 * ||Ax - b||^2
 18 | // g(z) => lambda * ||z||_1
 19 | class CoordGaussianDense: public CoordBase<Eigen::SparseVector<double> > //Eigen::SparseVector<double>
 20 | {
 21 | protected:
 22 |     typedef float Scalar;
 23 |     typedef double Double;
 24 |     typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> Matrix;
 25 |     typedef Eigen::Matrix<double, Eigen::Dynamic, 1> Vector;
 26 |     typedef Eigen::Map<const Matrix> MapMat;
 27 |     typedef Eigen::Map<const Vector> MapVec;
 28 |     typedef const Eigen::Ref<const Matrix> ConstGenericMatrix;
 29 |     typedef const Eigen::Ref<const Vector> ConstGenericVector;
 30 |     typedef Eigen::SparseMatrix<double> SpMat;
 31 |     typedef Eigen::SparseVector<double> SparseVector;
 32 |     typedef Eigen::SparseVector<int> SparseVectori;
 33 | 
 34 |     typedef SparseVector::InnerIterator InIterVec;
 35 |     typedef SparseVectori::InnerIterator InIterVeci;
 36 | 
 37 |     MapMat datX;                  // data matrix
 38 |     MapVec datY;                  // response vector
 39 |     MapVec weights;               // weight vector
 40 | 
 41 |     Scalar lambda, lambda_ridge, gamma;  // L1 penalty
 42 | 
 43 |     double threshval;
 44 |     VectorXd resid_cur;
 45 | 
 46 |     std::string penalty;
 47 |     bool intercept;
 48 |     ArrayXd penalty_factor;       // penalty multiplication factors
 49 |     MapMat limits;
 50 |     double alpha;
 51 |     int penalty_factor_size;
 52 | 
 53 |     VectorXd XY;                  // X'Y
 54 |     VectorXd Xsq;                 // colSums(X^2)
 55 |     VectorXd Xtr;                 // X'resid
 56 | 
 57 |     Scalar lambda0;               // minimum lambda to make coefficients all zero
 58 | 
 59 |     double lprev;
 60 | 
 61 |     double beta0, weights_sum, resids_sum;
 62 | 
 63 |     // pointer we will set to one of the thresholding functions
 64 |     typedef double (*thresh_func_ptr)(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom);
 65 | 
 66 |     thresh_func_ptr thresh_func;
 67 | 
 68 |     /*
 69 |     static void soft_threshold(SparseVector &res, const Vector &vec, const double &penalty)
 70 |     {
 71 |         int v_size = vec.size();
 72 |         res.setZero();
 73 |         res.reserve(v_size);
 74 | 
 75 |         const double *ptr = vec.data();
 76 |         for(int i = 0; i < v_size; i++)
 77 |         {
 78 |             if(ptr[i] > penalty)
 79 |                 res.insertBack(i) = ptr[i] - penalty;
 80 |             else if(ptr[i] < -penalty)
 81 |                 res.insertBack(i) = ptr[i] + penalty;
 82 |         }
 83 |     }
 84 |     */
 85 | 
 86 |     void update_intercept()
 87 |     {
 88 | 
 89 |         if (intercept)
 90 |         {
 91 |             resids_sum = (resid_cur).sum();
 92 | 
 93 |             double beta0_delta = resids_sum / weights_sum;
 94 | 
 95 |             beta0             += beta0_delta;
 96 | 
 97 |             // update the (weighted) working residual!
 98 |             resid_cur.array() -= beta0_delta * weights.array();
 99 |         }
100 |     }
101 | 
102 |     double compute_loss()
103 |     {
104 |         //double sum_squares = 0.5 * (resid_cur.array().square().sum()) / double(nobs);
105 |         double sum_squares = 0.5 * (resid_cur.array().square().sum());
106 |         double penalty_part = 0.0;
107 | 
108 |         /*
109 |         if (penalty_factor_size < 1)
110 |         {
111 |             penalty_part = lambda * beta.array().abs().matrix().sum();
112 |         } else
113 |         {
114 |             penalty_part = lambda * (beta.array() * penalty_factor.array()).abs().matrix().sum();
115 |         }
116 |          */
117 | 
118 |         return (sum_squares + penalty_part);
119 |     }
120 | 
121 |     static double soft_threshold(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom)
122 |     {
123 | 
124 |         if (std::abs(value) <= penalty)
125 |             return(0.0);
126 |         else if (value > penalty)
127 |             return( (value - penalty) / (denom + l2) );
128 |         else
129 |             return( (value + penalty) / (denom + l2) );
130 | 
131 |         /* // this ordering is slower for high-dimensional problems
132 |         if(value > penalty)
133 |         return( (value - penalty) / (denom + l2) );
134 |         else if(value < -penalty)
135 |         return( (value + penalty) / (denom + l2) );
136 |         else
137 |         return(0.0);
138 |         */
139 |     }
140 | 
141 |     static double scad_threshold(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom)
142 |     {
143 |         double val_abs = std::abs(value);
144 | 
145 |         if (val_abs <= penalty)
146 |             return(0.0);
147 |         else if (val_abs <= penalty * (1.0 + l2) + penalty)
148 |         {
149 |             if(value > penalty)
150 |                 return((value - penalty) / (  denom + denom * l2 ));
151 |             else
152 |                 return((value + penalty) / (  denom + denom * l2 ));
153 |         } else if (val_abs <= gamma * penalty * (1.0 + l2))
154 |         {
155 |             if ((gamma - 1.0) * value > gamma * penalty)
156 |                 return( (value - gamma * penalty / (gamma - 1.0)) / (denom * ( 1.0 - 1.0 / (gamma - 1.0) + l2 )) );
157 |             else
158 |                 return( (value + gamma * penalty / (gamma - 1.0)) / (denom * ( 1.0 - 1.0 / (gamma - 1.0) + l2 )) );
159 |         } else
160 |         {
161 |             return(value / (denom + denom * l2));
162 |         }
163 | 
164 |     }
165 | 
166 |     /*
167 |     static double mcp_threshold(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom)
168 |     {
169 |         if (std::abs(value) > gamma * penalty * (denom + l2))
170 |             return(value / (denom + l2));
171 |         else if(value > penalty)
172 |             return((value - penalty) / (  (denom + l2 - 1.0 / gamma) ));
173 |         else if(value < -penalty)
174 |             return((value + penalty) / (  (denom + l2 - 1.0 / gamma) ));
175 |         else
176 |             return(0.0);
177 |     }
178 |      */
179 |     static double mcp_threshold(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom)
180 |     {
181 |         double val_abs = std::abs(value);
182 | 
183 |         if (val_abs <= penalty)
184 |             return(0.0);
185 |         else if (val_abs <= gamma * penalty * (1.0 + l2))
186 |         {
187 |             if(value > penalty)
188 |                 return((value - penalty) / ( denom * (1.0 - 1.0 / gamma + l2) ));
189 |             else
190 |                 return((value + penalty) / ( denom * (1.0 - 1.0 / gamma + l2) ));
191 |         } else
192 |             return(value / (denom + denom * l2));
193 | 
194 |         /*
195 |         if (std::abs(value) > gamma * penalty * (1.0 + l2))
196 |         return(value / (denom + denom * l2));
197 |         else if(value > penalty)
198 |         return((value - penalty) / ( denom * (1.0 + l2 - 1.0 / gamma) ));
199 |         else if(value < -penalty)
200 |         return((value + penalty) / ( denom * (1.0 + l2 - 1.0 / gamma) ));
201 |         else
202 |         return(0.0);
203 |         */
204 | 
205 |     }
206 | 
207 |     void set_threshold_func()
208 |     {
209 |         if (penalty == "lasso")
210 |         {
211 |             thresh_func = &CoordGaussianDense::soft_threshold;
212 |         } else if (penalty == "mcp")
213 |         {
214 |             thresh_func = &CoordGaussianDense::mcp_threshold;
215 |         } else
216 |         {
217 |             thresh_func = &CoordGaussianDense::scad_threshold;
218 |         }
219 |     }
220 | 
221 |     //void next_beta(Vector &res, VectorXi &eligible)
222 |     void next_beta(SparseVector &res, SparseVectori &eligible)
223 |     {
224 | 
225 |         // now update intercept if necessary
226 |         update_intercept();
227 | 
228 |         int j;
229 |         double grad;
230 | 
231 |         // if no penalty multiplication factors specified
232 |         if (penalty_factor_size < 1)
233 |         {
234 |             for (InIterVeci i_(eligible); i_; ++i_)
235 |             {
236 |                 int j = i_.index();
237 |                 double beta_prev = beta.coeffRef( j ); //beta(j);
238 | 
239 |                 // surprisingly it's faster to calculate this on an iteration-basis
240 |                 // and not pre-calculate it within each newton iteration..
241 |                 if (Xsq(j) < 0.0) Xsq(j) = (datX.col(j).array().square()).matrix().mean();
242 | 
243 |                 Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs);
244 | 
245 |                 grad = Xtr(j) + beta_prev * Xsq(j);
246 |                 //grad      = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j);
247 | 
248 |                 threshval = thresh_func(grad, lambda, gamma, lambda_ridge, Xsq(j));
249 | 
250 |                 //  apply param limits
251 |                 if (threshval < limits(1, j)) threshval = limits(1, j);
252 |                 if (threshval > limits(0, j)) threshval = limits(0, j);
253 | 
254 |                 // update residual if the coefficient changes after
255 |                 // thresholding.
256 |                 if (beta_prev != threshval)
257 |                 {
258 |                     if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev;
259 |                     beta.coeffRef(j)    = threshval;
260 |                     resid_cur.array()  -= (threshval - beta_prev) * datX.col(j).array() * weights.array();
261 | 
262 |                     // update eligible set if necessary
263 |                     if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1;
264 |                     //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0;
265 |                     //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0;
266 |                 } else
267 |                 {
268 |                     if (beta_prev == 0.0 && eligible_set.coeff(j) == 1)
269 |                     {
270 |                         eligible_set.coeffRef(j) = 0;
271 |                     }
272 |                 }
273 |             }
274 |         } else //if penalty multiplication factors are used
275 |         {
276 |             for (InIterVeci i_(eligible); i_; ++i_)
277 |             {
278 |                 int j = i_.index();
279 |                 double beta_prev = beta.coeff( j ); //beta(j);
280 | 
281 |                 // surprisingly it's faster to calculate this on an iteration-basis
282 |                 // and not pre-calculate it within each newton iteration..
283 |                 if (Xsq(j) < 0.0) Xsq(j) = (datX.col(j).array().square()).matrix().mean();
284 | 
285 |                 Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs);
286 | 
287 |                 grad = Xtr(j) + beta_prev * Xsq(j);
288 |                 //grad      = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j);
289 | 
290 |                 threshval = thresh_func(grad, penalty_factor(j) * lambda, gamma, penalty_factor(j) * lambda_ridge, Xsq(j));
291 | 
292 |                 //  apply param limits
293 |                 if (threshval < limits(1, j)) threshval = limits(1, j);
294 |                 if (threshval > limits(0, j)) threshval = limits(0, j);
295 | 
296 |                 // update residual if the coefficient changes after
297 |                 // thresholding.
298 |                 if (beta_prev != threshval)
299 |                 {
300 |                     if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev;
301 |                     beta.coeffRef(j)   = threshval;
302 |                     resid_cur.array() -= (threshval - beta_prev) * datX.col(j).array() * weights.array();
303 | 
304 |                     // update eligible set if necessary
305 |                     if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1;
306 |                     //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0;
307 |                     //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0;
308 |                 } else
309 |                 {
310 |                     if (beta_prev == 0.0 && eligible_set.coeff(j) == 1)
311 |                     {
312 |                         eligible_set.coeffRef(j) = 0;
313 |                     }
314 |                 }
315 |             }
316 |         }
317 | 
318 |     }
319 | 
320 |     //void next_beta(Vector &res, VectorXi &eligible)
321 |     void next_beta(SparseVector &res, VectorXi &eligible)
322 |     {
323 | 
324 |         // now update intercept if necessary
325 |         update_intercept();
326 | 
327 |         int j;
328 |         double grad;
329 | 
330 | 
331 |         // if no penalty multiplication factors specified
332 |         if (penalty_factor_size < 1)
333 |         {
334 |             for (j = 0; j < nvars; ++j)
335 |             {
336 |                 if (eligible(j))
337 |                 {
338 |                     double beta_prev = beta.coeff( j ); //beta(j);
339 | 
340 |                     // surprisingly it's faster to calculate this on an iteration-basis
341 |                     // and not pre-calculate it within each newton iteration..
342 |                     if (Xsq(j)  < 0.0) Xsq(j) = (datX.col(j).array().square()).matrix().mean();
343 | 
344 |                     Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs);
345 | 
346 |                     grad = Xtr(j) + beta_prev * Xsq(j);
347 |                     //grad = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j) ;
348 | 
349 |                     threshval = thresh_func(grad, lambda, gamma, lambda_ridge, Xsq(j));
350 | 
351 |                     //  apply param limits
352 |                     if (threshval < limits(1, j)) threshval = limits(1, j);
353 |                     if (threshval > limits(0, j)) threshval = limits(0, j);
354 | 
355 |                     // update residual if the coefficient changes after
356 |                     // thresholding.
357 |                     if (beta_prev != threshval)
358 |                     {
359 |                         if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev;
360 |                         beta.coeffRef(j)   = threshval;
361 |                         resid_cur.array() -= (threshval - beta_prev) * datX.col(j).array() * weights.array();
362 | 
363 |                         // update eligible set if necessary
364 |                         if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1;
365 |                         //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0;
366 |                         //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0;
367 |                     } else
368 |                     {
369 |                         if (beta_prev == 0.0 && eligible_set.coeff(j) == 1)
370 |                         {
371 |                             eligible_set.coeffRef(j) = 0;
372 |                         }
373 |                     }
374 |                 } // end eligible set check
375 |             }
376 |         } else //if penalty multiplication factors are used
377 |         {
378 |             for (j = 0; j < nvars; ++j)
379 |             {
380 |                 if (eligible(j))
381 |                 {
382 |                     double beta_prev = beta.coeff( j ); //beta(j);
383 | 
384 |                     // surprisingly it's faster to calculate this on an iteration-basis
385 |                     // and not pre-calculate it within each newton iteration..
386 |                     if (Xsq(j) < 0.0) Xsq(j) = (datX.col(j).array().square()).matrix().mean();
387 | 
388 |                     Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs);
389 | 
390 |                     grad = Xtr(j) + beta_prev * Xsq(j);
391 |                     //grad = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j);
392 | 
393 |                     threshval = thresh_func(grad, penalty_factor(j) * lambda, gamma, penalty_factor(j) * lambda_ridge, Xsq(j));
394 | 
395 |                     //  apply param limits
396 |                     if (threshval < limits(1,j)) threshval = limits(1,j);
397 |                     if (threshval > limits(0,j)) threshval = limits(0,j);
398 | 
399 |                     // update residual if the coefficient changes after
400 |                     // thresholding.
401 |                     if (beta_prev != threshval)
402 |                     {
403 |                         if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev;
404 |                         beta.coeffRef(j)   = threshval;
405 |                         resid_cur.array() -= (threshval - beta_prev) * datX.col(j).array() * weights.array();
406 | 
407 |                         // update eligible set if necessary
408 |                         if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1;
409 |                         //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0;
410 |                         //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0;
411 |                     } else
412 |                     {
413 |                         if (beta_prev == 0.0 && eligible_set.coeff(j) == 1)
414 |                         {
415 |                             eligible_set.coeffRef(j) = 0;
416 |                         }
417 |                     }
418 |                 } // end eligible set check
419 |             }
420 |         }
421 | 
422 |     }
423 | 
424 | 
425 | 
426 |     // Calculate ||v1 - v2||^2 when v1 and v2 are sparse
427 |     static double diff_squared_norm(const SparseVector &v1, const SparseVector &v2)
428 |     {
429 |         const int n1 = v1.nonZeros(), n2 = v2.nonZeros();
430 |         const double *v1_val = v1.valuePtr(), *v2_val = v2.valuePtr();
431 |         const int *v1_ind = v1.innerIndexPtr(), *v2_ind = v2.innerIndexPtr();
432 | 
433 |         double r = 0.0;
434 |         int i1 = 0, i2 = 0;
435 |         while(i1 < n1 && i2 < n2)
436 |         {
437 |             if(v1_ind[i1] == v2_ind[i2])
438 |             {
439 |                 double val = v1_val[i1] - v2_val[i2];
440 |                 r += val * val;
441 |                 i1++;
442 |                 i2++;
443 |             } else if(v1_ind[i1] < v2_ind[i2]) {
444 |                 r += v1_val[i1] * v1_val[i1];
445 |                 i1++;
446 |             } else {
447 |                 r += v2_val[i2] * v2_val[i2];
448 |                 i2++;
449 |             }
450 |         }
451 |         while(i1 < n1)
452 |         {
453 |             r += v1_val[i1] * v1_val[i1];
454 |             i1++;
455 |         }
456 |         while(i2 < n2)
457 |         {
458 |             r += v2_val[i2] * v2_val[i2];
459 |             i2++;
460 |         }
461 | 
462 |         return r;
463 |     }
464 | 
465 | 
466 | public:
467 |     CoordGaussianDense(ConstGenericMatrix &datX_,
468 |                        ConstGenericVector &datY_,
469 |                        ConstGenericVector &weights_,
470 |                        ArrayXd &penalty_factor_,
471 |                        ConstGenericMatrix &limits_,
472 |                        std::string &penalty_,
473 |                        bool   intercept_,
474 |                        double alpha_ = 1.0,
475 |                        double tol_   = 1e-6) :
476 |     CoordBase<Eigen::SparseVector<double> >
477 |                 (datX_.rows(), datX_.cols(), tol_),
478 |                                datX(datX_.data(), datX_.rows(), datX_.cols()),
479 |                                datY(datY_.data(), datY_.size()),
480 |                                weights(weights_.data(), weights_.size()),
481 |                                resid_cur(datY_.array() * weights.array()),  //assumes we start our beta estimate at 0
482 |                                penalty(penalty_),
483 |                                intercept(intercept_),
484 |                                penalty_factor(penalty_factor_),
485 |                                limits(limits_.data(), limits_.rows(), limits_.cols()),
486 |                                alpha(alpha_),
487 |                                penalty_factor_size(penalty_factor_.size()),
488 |                                XY((datX.transpose() * (datY.array() * weights.array()).matrix()) ),
489 |                                Xsq(datX_.cols()), Xtr(datX_.cols())
490 |     {}
491 | 
492 |     double get_lambda_zero()
493 |     {
494 |         if (penalty_factor_size > 0)
495 |         {
496 | 
497 |             lambda0 = 0;
498 |             for (int i = 0; i < penalty_factor.size(); ++i)
499 |             {
500 |                 if (penalty_factor(i) != 0.0)
501 |                 {
502 |                     double valcur = std::abs(XY(i)) / penalty_factor(i);
503 | 
504 |                     if (valcur > lambda0) lambda0 = valcur;
505 |                 }
506 |             }
507 |         } else
508 |         {
509 |             lambda0 = XY.cwiseAbs().maxCoeff();
510 |         }
511 | 
512 |         lambda0 /= ( alpha * 1.0 * double(nobs)); //std::pow(1e-6, 1.0/(99.0));
513 | 
514 | 
515 | 
516 |         return lambda0;
517 |     }
518 | 
519 |     // init() is a cold start for the first lambda
520 |     void init(double lambda_, double gamma_)
521 |     {
522 | 
523 |         set_threshold_func();
524 | 
525 |         beta.setZero();
526 | 
527 |         lambda       = lambda_ * alpha;
528 |         lambda_ridge = lambda_ * (1.0 - alpha);
529 | 
530 |         gamma        = gamma_;
531 | 
532 |         eligible_set.setZero();
533 | 
534 |         eligible_set.reserve(std::min(nobs, nvars));
535 |         beta.reserve(std::min(nobs, nvars));
536 | 
537 |         nzero = 0;
538 |         beta0 = 0.0;
539 | 
540 |         Xsq.fill(-1.0);
541 | 
542 |         weights_sum = weights.sum();
543 | 
544 |         double cutoff;
545 | 
546 |         if (penalty == "lasso")
547 |         {
548 |             cutoff = 2.0 * lambda - lambda0;
549 |         } else if (penalty == "mcp")
550 |         {
551 |             cutoff = lambda + gamma / (gamma - 1.0) * (lambda - lambda0);
552 |         } else
553 |         {
554 |             cutoff = lambda + gamma / (gamma - 2.0) * (lambda - lambda0);
555 |         }
556 | 
557 | 
558 | 
559 |         if (penalty_factor_size < 1)
560 |         {
561 |             for (int j = 0; j < nvars; ++j) if (std::abs(XY(j)) > (cutoff)) eligible_set.coeffRef(j) = 1;
562 |         } else
563 |         {
564 |             for (int j = 0; j < nvars; ++j) if (std::abs(XY(j)) > (cutoff * penalty_factor(j))) eligible_set.coeffRef(j) = 1;
565 |         }
566 | 
567 |         //beta.reserve( std::max(eligible_set.sum() + 10, std::min(nvars, nobs)) );
568 | 
569 |     }
570 |     // when computing for the next lambda, we can use the
571 |     // current main_x, aux_z, dual_y and rho as initial values
572 |     void init_warm(double lambda_, double gamma_)
573 |     {
574 |         lprev        = lambda;
575 |         lambda       = lambda_ * alpha;
576 |         lambda_ridge = lambda_ * (1.0 - alpha);
577 |         gamma        = gamma_;
578 | 
579 |         eligible_set.setZero();
580 | 
581 |         eligible_set.reserve(std::min(nobs * 2, nvars));
582 | 
583 |         nzero = 0;
584 | 
585 |         double cutoff;
586 | 
587 |         if (penalty == "lasso")
588 |         {
589 |             cutoff = 2.0 * lambda - lprev;
590 |         } else if (penalty == "mcp")
591 |         {
592 |             cutoff = lambda + gamma / (gamma - 1.0) * (lambda - lprev);
593 |         } else
594 |         {
595 |             cutoff = lambda + gamma / (gamma - 2.0) * (lambda - lprev);
596 |         }
597 | 
598 | 
599 | 
600 |         if (penalty_factor_size < 1)
601 |         {
602 |             for (int j = 0; j < nvars; ++j) if (std::abs(Xtr(j)) >  std::pow(nobs, 0.85) * (cutoff)) eligible_set.coeffRef(j) = 1;
603 |         } else
604 |         {
605 |             for (int j = 0; j < nvars; ++j) if (std::abs(Xtr(j)) > (std::pow(nobs, 0.85) * cutoff * penalty_factor(j))) eligible_set.coeffRef(j) = 1;
606 |         }
607 | 
608 |         //beta.reserve( std::max(eligible_set.sum() + 10, std::min(nvars, nobs)) );
609 |     }
610 | 
611 | 
612 |     int solve(int maxit)
613 |     {
614 |         //int i;
615 | 
616 |         int current_iter = 0;
617 | 
618 |         // run once through all variables
619 |         //current_iter++;
620 |         //beta_prev = beta;
621 |         //ineligible_set.fill(1);
622 | 
623 |         //update_beta(ineligible_set);
624 | 
625 |         while(current_iter < maxit)
626 |         {
627 |             while(current_iter < maxit)
628 |             {
629 |                 current_iter++;
630 |                 beta_prev = beta;
631 | 
632 |                 update_beta(eligible_set);
633 | 
634 |                 if(converged()) break;
635 |             }
636 | 
637 |             current_iter++;
638 |             beta_prev = beta;
639 |             ineligible_set.fill(1);
640 | 
641 |             for (InIterVeci i_(eligible_set); i_; ++i_)
642 |             {
643 |                 ineligible_set(i_.index()) = 0;
644 |             }
645 | 
646 |             update_beta(ineligible_set);
647 | 
648 |             if(converged()) break;
649 |         }
650 | 
651 |         // force zeros to be actual zeros
652 |         beta.prune(0.0);
653 |         nzero = beta.nonZeros();
654 | 
655 |         /*
656 |          for (int j = 0; j < nvars; ++j)
657 |          {
658 |          if (beta(j) != 0)
659 |          ++nzero;
660 |          }
661 |          */
662 | 
663 | 
664 |         loss = compute_loss();
665 | 
666 |         // print_footer();
667 | 
668 |         return current_iter;
669 |     }
670 | 
671 |     virtual double get_intercept() { return beta0; }
672 | };
673 | 
674 | 
675 | 
676 | #endif // COORDGAUSSIANDENSE_H
677 | 


--------------------------------------------------------------------------------
/R/ordinis_call.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' Fitting Lasso-penalized Using the Coordinate Descent Algorithm
  4 | #'
  5 | #' @description ordinis provides estimation of linear models with the lasso penalty
  6 | #'
  7 | #'
  8 | #' @param x The design matrix
  9 | #' @param y The response vector
 10 | #' @param weights a vector of weights of length equal to length of \code{y}. \code{weights} are NOT standardized or scaled; the user must
 11 | #' do this if desired
 12 | #' @param offset A vector of length \code{nobs} that is included in the linear predictor
 13 | #' @param penalty a string indicating which penalty to use. \code{"lasso"}, \code{"MCP"}, and \code{"SCAD"}
 14 | #' are available
 15 | #' @param lambda A user provided sequence of \eqn{\lambda}. If set to
 16 | #'                      \code{NULL}, the program will calculate its own sequence
 17 | #'                      according to \code{nlambda} and \code{lambda_min_ratio},
 18 | #'                      which starts from \eqn{\lambda_0} (with this
 19 | #'                      \eqn{\lambda} all coefficients will be zero) and ends at
 20 | #'                      \code{lambda0 * lambda_min_ratio}, containing
 21 | #'                      \code{nlambda} values equally spaced in the log scale.
 22 | #'                      It is recommended to set this parameter to be \code{NULL}
 23 | #'                      (the default).
 24 | #' @param alpha mixing parameter between 0 and 1 for elastic net. \code{alpha=1} is for the lasso, \code{alpha=0} is for ridge
 25 | #' @param gamma parameter for MCP/SCAD. Defaults to the recommended values from the papers corresponding to each penalty
 26 | #' @param penalty.factor a vector with length equal to the number of columns in x to be multiplied by lambda. by default
 27 | #'                      it is a vector of 1s. \code{penalty.factor} is NOT scaled
 28 | #' @param upper.limits a vector of length \code{ncol(x)} of upper limits for each coefficient. Can be a single value, which will
 29 | #' then be applied for each coefficient. Must be non-negative.
 30 | #' @param lower.limits a vector of length \code{ncol(x)} of lower limits for each coefficient. Can be a single value, which will
 31 | #' then be applied for each coefficient. Cannot be greater than 0.
 32 | #' @param nlambda Number of values in the \eqn{\lambda} sequence. Only used
 33 | #'                       when the program calculates its own \eqn{\lambda}
 34 | #'                       (by setting \code{lambda = NULL}).
 35 | #' @param lambda.min.ratio Smallest value in the \eqn{\lambda} sequence
 36 | #'                                as a fraction of \eqn{\lambda_0}. See
 37 | #'                                the explanation of the \code{lambda}
 38 | #'                                argument. This parameter is only used when
 39 | #'                                the program calculates its own \eqn{\lambda}
 40 | #'                                (by setting \code{lambda = NULL}). The default
 41 | #'                                value is the same as \pkg{glmnet}: 0.001 if
 42 | #'                                \code{nrow(x) >= ncol(x)} and 0.05 otherwise.
 43 | #' @param family family of underlying model. Only "gaussian" for continuous responses is available now
 44 | #' @param intercept Whether to fit an intercept in the model. Default is \code{TRUE}.
 45 | #' @param standardize Whether to standardize the design matrix before
 46 | #'                    fitting the model. Default is \code{TRUE}. Fitted coefficients
 47 | #'                    are always returned on the original scale.
 48 | #' @param dfmax Maximum number of variables allowed in the model
 49 | #' @param maxit Maximum number of coordinate descent iterations.
 50 | #' @param tol convergence tolerance parameter.
 51 | #' @param maxit.irls Maximum number of coordinate descent iterations.
 52 | #' @param tol.irls convergence tolerance parameter.
 53 | #'
 54 | #' @examples
 55 | #' set.seed(123)
 56 | #' n = 100
 57 | #' p = 1000
 58 | #' b = c(runif(10, min = 0.1, max = 1), rep(0, p - 10))
 59 | #' x = matrix(rnorm(n * p, sd = 1.5), n, p)
 60 | #' y = drop(x %*% b) + rnorm(n)
 61 | #'
 62 | #'
 63 | #' ## fit lasso model with 100 tuning parameter values
 64 | #' res <- ordinis(x, y)
 65 | #'
 66 | #' y2 <- 1 * (y > 0)
 67 | #' y3 <- exp(y)
 68 | #'
 69 | #' resb <- ordinis(x, y2, family = "binomial")
 70 | #'
 71 | #' @export
 72 | ordinis <- function(x,
 73 |                     y,
 74 |                     weights          = rep(1, NROW(y)),
 75 |                     offset           = NULL,
 76 |                     family           = NULL,
 77 |                     penalty          = c("lasso", "alasso", "mcp", "scad"),
 78 |                     lambda           = numeric(0),
 79 |                     alpha            = 1,
 80 |                     gamma            = ifelse(penalty == "scad", 3.7, 1.4),
 81 |                     penalty.factor   = NULL,
 82 |                     upper.limits     = rep(Inf, NCOL(x)),
 83 |                     lower.limits     = rep(-Inf, NCOL(x)),
 84 |                     nlambda          = 100L,
 85 |                     lambda.min.ratio = NULL,
 86 |                     intercept        = TRUE,
 87 |                     standardize      = TRUE,
 88 |                     dfmax            = nvars,
 89 |                     maxit            = NULL,
 90 |                     tol              = NULL,
 91 |                     maxit.irls       = 25L,
 92 |                     tol.irls         = 1e-3
 93 | )
 94 | {
 95 | 
 96 |     x <- as.matrix(x)
 97 |     y <- as.numeric(y)
 98 | 
 99 |     n <- nrow(x)
100 |     p <- nvars <- ncol(x)
101 | 
102 |     intercept    <- as.logical(intercept)
103 |     standardize  <- as.logical(standardize)
104 |     penalty      <- match.arg(penalty)
105 | 
106 |     if (is.null(family) || is.character(family) & (family == "gaussian" | family == "binomial"))
107 |     {
108 |         glm_fam <- FALSE
109 |         if (is.null(family)) family <- "gaussian"
110 | 
111 |         if (is.null(maxit))
112 |         {
113 |             if (family == "gaussian")
114 |             {
115 |                 maxit <- 5000L
116 |             } else
117 |             {
118 |                 maxit <- 500L
119 |             }
120 |         }
121 | 
122 |         if (is.null(tol))
123 |         {
124 |             if (family == "gaussian")
125 |             {
126 |                 tol <- 1e-4
127 |             } else
128 |             {
129 |                 tol <- 1e-3
130 |             }
131 |         }
132 | 
133 |     } else
134 |     {
135 |         glm_fam <- TRUE
136 |         message("Use of glm family() functions still experimental. Does not always work yet")
137 |         if (is.character(family))
138 |         {
139 |             family <- get(family, mode = "function", envir = parent.frame())
140 |         }
141 |         if (is.function(family))
142 |         {
143 |             family <- family()
144 |         }
145 |         if (is.null(family$family))
146 |         {
147 |             print(family)
148 |             stop("'family' not recognized")
149 |         }
150 | 
151 |         if (is.null(maxit))
152 |         {
153 |             if (family$family == "gaussian")
154 |             {
155 |                 maxit <- 5000L
156 |             } else
157 |             {
158 |                 maxit <- 1500L
159 |             }
160 |         }
161 | 
162 |         if (is.null(tol))
163 |         {
164 |             if (family$family == "gaussian")
165 |             {
166 |                 tol <- 1e-4
167 |             } else
168 |             {
169 |                 tol <- 1e-3
170 |             }
171 |         }
172 |     }
173 | 
174 |     if (!is.null(dim(x)))
175 |     {
176 |         vnames <- colnames(x)
177 | 
178 |         if (is.null(vnames))
179 |         {
180 |             vnames <- paste0("V", 1:p)
181 |         }
182 |     }
183 | 
184 |     if (!is.null(offset))
185 |     {
186 |         if (length(offset) != n) stop("offset must be of same length as response")
187 |     } else
188 |     {
189 |         offset <- rep(0, n)
190 |     }
191 | 
192 |     if (!glm_fam)
193 |     {
194 |         if (family == "binomial")
195 |         {
196 |             if (length(unique(y)) != 2) stop("y must only take 2 values")
197 | 
198 |             #if (penalty != "lasso") warning("non-lasso penalties for non-gaussian responses are experimental")
199 |         }
200 |     }
201 | 
202 |     ## taken from glmnet
203 |     if(any(lower.limits>0)) { stop("Lower limits should be non-positive") }
204 |     if(any(upper.limits<0)) { stop("Upper limits should be non-negative") }
205 |     lower.limits[lower.limits == -Inf] <- -1e99
206 |     upper.limits[upper.limits == Inf]  <- 1e99
207 |     if(length(lower.limits) < nvars)
208 |     {
209 |         if(length(lower.limits)==1) lower.limits <- rep(lower.limits, nvars) else stop("Require length 1 or nvars lower.limits")
210 |     } else
211 |     {
212 |         lower.limits <- lower.limits[seq(nvars)]
213 |     }
214 |     if(length(upper.limits) < nvars)
215 |     {
216 |         if(length(upper.limits)==1) upper.limits <- rep(upper.limits, nvars) else stop("Require length 1 or nvars upper.limits")
217 |     } else
218 |     {
219 |         upper.limits <- upper.limits[seq(nvars)]
220 |     }
221 | 
222 |     if (n != NROW(y))
223 |     {
224 |         stop("number of rows in x not equal to length of y")
225 |     }
226 | 
227 |     if (NROW(weights) != NROW(y))
228 |     {
229 |         stop("length of weights not equal to length of y")
230 |     }
231 | 
232 |     if (is.null(penalty.factor))
233 |     {
234 |         penalty.factor <- numeric(0)
235 |     } else
236 |     {
237 |         if (length(penalty.factor) != p)
238 |         {
239 |             stop("penalty.factor must be same length as number of columns in x")
240 |         }
241 |     }
242 | 
243 |     lambda_val <- sort(as.numeric(lambda), decreasing = TRUE)
244 | 
245 |     if(any(lambda_val <= 0))
246 |     {
247 |         stop("lambda must be positive")
248 |     }
249 | 
250 | 
251 |     if(nlambda[1] <= 0)
252 |     {
253 |         stop("nlambda must be a positive integer")
254 |     }
255 | 
256 |     if(is.null(lambda.min.ratio))
257 |     {
258 |         lmr_val <- ifelse(nrow(x) < ncol(x), 0.05, 0.001)
259 |     } else
260 |     {
261 |         lmr_val <- as.numeric(lambda.min.ratio)
262 |     }
263 | 
264 |     if(lmr_val >= 1 | lmr_val <= 0)
265 |     {
266 |         stop("lambda.min.ratio must be within (0, 1)")
267 |     }
268 | 
269 |     lambda           <- lambda_val
270 |     nlambda          <- as.integer(nlambda[1])
271 |     lambda.min.ratio <- lmr_val
272 | 
273 |     if (alpha > 1 | alpha < 0) stop("alpha must be between 0 and 1")
274 | 
275 |     if(maxit <= 0)
276 |     {
277 |         stop("maxit should be positive")
278 |     }
279 |     if(tol < 0)
280 |     {
281 |         stop("tol should be nonnegative")
282 |     }
283 | 
284 |     maxit       <- as.integer(maxit[1])
285 |     tol         <- as.double(tol[1])
286 |     maxit.irls  <- as.integer(maxit.irls[1])
287 |     tol.irls    <- as.double(tol.irls[1])
288 |     alpha       <- as.double(alpha[1])
289 |     gamma       <- as.double(gamma[1])
290 |     penalty     <- as.character(penalty[1])
291 |     dfmax       <- as.integer(dfmax[1])
292 | 
293 | 
294 |     penalty_orig <- penalty
295 | 
296 |     if (penalty == "alasso")
297 |     {
298 |         penalty <- "lasso"
299 |     }
300 | 
301 |     if (glm_fam)
302 |     {
303 |         opts <- list(maxit       = maxit,
304 |                      tol         = tol,
305 |                      alpha       = alpha,
306 |                      gamma       = gamma,
307 |                      penalty     = penalty,
308 |                      maxit.irls  = maxit.irls,
309 |                      tol.irls    = tol.irls,
310 |                      dfmax       = dfmax,
311 |                      variance    = family$variance,
312 |                      mu_eta      = family$mu.eta,
313 |                      linkinv     = family$linkinv,
314 |                      linkfun     = family$linkfun,
315 |                      dev_resids  = family$dev.resids,
316 |                      valideta    = family$valideta,
317 |                      validmu     = family$validmu)
318 |     } else
319 |     {
320 |         opts <- list(maxit       = maxit,
321 |                      tol         = tol,
322 |                      alpha       = alpha,
323 |                      gamma       = gamma,
324 |                      penalty     = penalty,
325 |                      maxit.irls  = maxit.irls,
326 |                      tol.irls    = tol.irls,
327 |                      dfmax       = dfmax,
328 |                      variance    = numeric(0),
329 |                      mu_eta      = numeric(0),
330 |                      linkinv     = numeric(0),
331 |                      linkfun     = numeric(0),
332 |                      dev_resids  = numeric(0),
333 |                      valideta    = numeric(0),
334 |                      validmu     = numeric(0))
335 |     }
336 | 
337 |     if (gamma <= 1) stop("gamma must be greater than 1")
338 | 
339 |     if (glm_fam || family == "binomial")
340 |     {
341 | 
342 |         res <- coord_ordinis_dense_glm_cpp(x,
343 |                                            y,
344 |                                            weights,
345 |                                            drop(offset),
346 |                                            lambda,
347 |                                            penalty.factor,
348 |                                            rbind(upper.limits, lower.limits),
349 |                                            nlambda,
350 |                                            lambda.min.ratio,
351 |                                            standardize,
352 |                                            intercept,
353 |                                            glm_fam,
354 |                                            opts
355 |         )
356 |         res$beta   <- res$beta[, 1:res$last, drop = FALSE]
357 | 
358 |         res$beta   <- as(res$beta, "sparseMatrix")
359 | 
360 |         res$deviance   <- res$deviance[1:res$last]
361 |     } else if (family == "gaussian")
362 |     {
363 |         res <- coord_ordinis_dense_cpp(x,
364 |                                        y - drop(offset),
365 |                                        weights,
366 |                                        lambda,
367 |                                        penalty.factor,
368 |                                        rbind(upper.limits, lower.limits),
369 |                                        nlambda,
370 |                                        lambda.min.ratio,
371 |                                        standardize,
372 |                                        intercept,
373 |                                        opts
374 |         )
375 |         res$beta   <- res$beta[, 1:res$last, drop = FALSE]
376 | 
377 |         res$beta   <- as(res$beta, "sparseMatrix")
378 | 
379 |         res$fitted <- as.matrix(cbind(1, x) %*% res$beta)
380 |         res$resid  <- matrix(rep(y, ncol(res$beta)), ncol = ncol(res$beta) ) - res$fitted
381 |         res$loss   <- colSums(res$resid ^ 2)
382 | 
383 |     }
384 | 
385 | 
386 |     if (penalty_orig == "alasso")
387 |     {
388 |         penwts <- as.vector(unname(1 / abs(res$beta[-1,ncol(res$beta)])))
389 | 
390 |         if (length(penalty.factor))
391 |         {
392 |             penalty.factor <- penalty.factor * penwts
393 |         } else
394 |         {
395 |             penalty.factor <- penwts
396 |         }
397 | 
398 | 
399 |         if (glm_fam || family == "binomial")
400 |         {
401 | 
402 |             res <- coord_ordinis_dense_glm_cpp(x,
403 |                                                y,
404 |                                                weights,
405 |                                                drop(offset),
406 |                                                lambda,
407 |                                                penalty.factor,
408 |                                                rbind(upper.limits, lower.limits),
409 |                                                nlambda,
410 |                                                lambda.min.ratio,
411 |                                                standardize,
412 |                                                intercept,
413 |                                                glm_fam,
414 |                                                opts
415 |             )
416 |             res$beta   <- res$beta[, 1:res$last, drop = FALSE]
417 | 
418 |             res$beta   <- as(res$beta, "sparseMatrix")
419 | 
420 |             res$deviance   <- res$deviance[1:res$last]
421 |         } else if (family == "gaussian")
422 |         {
423 |             res <- coord_ordinis_dense_cpp(x,
424 |                                            y - drop(offset),
425 |                                            weights,
426 |                                            lambda,
427 |                                            penalty.factor,
428 |                                            rbind(upper.limits, lower.limits),
429 |                                            nlambda,
430 |                                            lambda.min.ratio,
431 |                                            standardize,
432 |                                            intercept,
433 |                                            opts
434 |             )
435 |             res$beta   <- res$beta[, 1:res$last, drop = FALSE]
436 | 
437 |             res$beta   <- as(res$beta, "sparseMatrix")
438 | 
439 |             res$fitted <- as.matrix(cbind(1, x) %*% res$beta)
440 |             res$resid  <- matrix(rep(y, ncol(res$beta)), ncol = ncol(res$beta) ) - res$fitted
441 |             res$loss   <- colSums(res$resid ^ 2)
442 | 
443 |         }
444 |     }
445 | 
446 |     rownames(res$beta) <- c("(Intercept)", vnames)
447 | 
448 |     res$niter  <- res$niter[1:res$last]
449 |     res$lambda <- res$lambda[1:res$last]
450 |     #res$losses <- res$losses[, 1:res$last, drop = FALSE]
451 |     #res$losses.iter <- res$losses.iter[, 1:res$last, drop = FALSE]
452 | 
453 |     res$nzero   <- colSums(res$beta[-1,,drop=FALSE] != 0)
454 | 
455 |     res$glm_fam     <- glm_fam
456 |     res$family      <- family
457 |     res$penalty     <- penalty
458 |     res$penalty.factor <- penalty.factor
459 |     res$standardize <- standardize
460 |     res$intercept   <- intercept
461 |     res$nobs        <- n
462 |     res$nvars       <- p
463 | 
464 |     if (!glm_fam)
465 |     {
466 |         class2 <- switch(family,
467 |                          "gaussian" = "cdgaussian",
468 |                          "binomial" = "cdbinomial")
469 |     } else
470 |     {
471 |         class2 <- "glm_ordinis"
472 |     }
473 | 
474 | 
475 |     class(res) <- c("ordinis", class2)
476 |     res
477 | }
478 | 
479 | 
480 | 
481 | 
482 | #' CV Fitting for A Lasso Model Using the Coordinate Descent Algorithm
483 | #'
484 | #' @description Cross validation for linear models with the lasso penalty
485 | #'
486 | #' where \eqn{n} is the sample size and \eqn{\lambda} is a tuning
487 | #' parameter that controls the sparsity of \eqn{\beta}.
488 | #'
489 | #' @param x The design matrix
490 | #' @param y The response vector
491 | #' @param lambda A user provided sequence of \eqn{\lambda}. If set to
492 | #'                      \code{NULL}, the program will calculate its own sequence
493 | #'                      according to \code{nlambda} and \code{lambda_min_ratio},
494 | #'                      which starts from \eqn{\lambda_0} (with this
495 | #'                      \eqn{\lambda} all coefficients will be zero) and ends at
496 | #'                      \code{lambda0 * lambda_min_ratio}, containing
497 | #'                      \code{nlambda} values equally spaced in the log scale.
498 | #'                      It is recommended to set this parameter to be \code{NULL}
499 | #'                      (the default).
500 | #' @param gamma bandwidth for MCP/SCAD
501 | #' @param type.measure measure to evaluate for cross-validation. The default is \code{type.measure = "deviance"},
502 | #' which uses squared-error for gaussian models (a.k.a \code{type.measure = "mse"} there), deviance for logistic
503 | #' regression. \code{type.measure = "class"} applies to binomial only. \code{type.measure = "auc"} is for two-class logistic
504 | #' regression only. \code{type.measure = "mse"} or \code{type.measure = "mae"} (mean absolute error) can be used by all models;
505 | #' they measure the deviation from the fitted mean to the response.
506 | #' @param nfolds number of folds for cross-validation. default is 10. 3 is smallest value allowed.
507 | #' @param foldid an optional vector of values between 1 and nfold specifying which fold each observation belongs to.
508 | #' @param grouped Like in \pkg{glmnet}, this is an experimental argument, with default \code{TRUE}, and can be ignored by most users.
509 | #' For all models, this refers to computing nfolds separate statistics, and then using their mean and estimated standard
510 | #' error to describe the CV curve. If \code{grouped = FALSE}, an error matrix is built up at the observation level from the
511 | #' predictions from the \code{nfold} fits, and then summarized (does not apply to \code{type.measure = "auc"}).
512 | #' @param keep If \code{keep = TRUE}, a prevalidated list of arrasy is returned containing fitted values for each observation
513 | #' and each value of lambda for each model. This means these fits are computed with this observation and the rest of its
514 | #' fold omitted. The folid vector is also returned. Default is \code{keep = FALSE}
515 | #' @param parallel If TRUE, use parallel foreach to fit each fold. Must register parallel before hand, such as \pkg{doMC}.
516 | #' @param ... other parameters to be passed to \code{"ordinis"} function
517 | #'
518 | #' @examples set.seed(123)
519 | #' n = 100
520 | #' p = 1000
521 | #' b = c(runif(10, min = 0.2, max = 1), rep(0, p - 10))
522 | #' x = matrix(rnorm(n * p, sd = 3), n, p)
523 | #' y = drop(x %*% b) + rnorm(n)
524 | #'
525 | #' ## fit lasso model with 100 tuning parameter values
526 | #' res <- cv.ordinis(x, y)
527 | #'
528 | #'
529 | #' @export
530 | cv.ordinis <- function(x,
531 |                        y,
532 |                        lambda   = numeric(0),
533 |                        gamma    = 3.7,
534 |                        type.measure = c("mse", "deviance", "class", "auc", "mae"),
535 |                        nfolds   = 10,
536 |                        foldid   = NULL,
537 |                        grouped  = TRUE,
538 |                        keep     = FALSE,
539 |                        parallel = FALSE,
540 |                        ...)
541 | {
542 |     if (missing(type.measure))
543 |         type.measure = "default"
544 |     else type.measure = match.arg(type.measure)
545 |     if (length(lambda) == 1 && length(lambda) < 2)
546 |         stop("Need more than one value of lambda for cv.ordinis")
547 |     N = nrow(x)
548 |     y = drop(y)
549 | 
550 |     two.call = match.call(expand.dots = TRUE)
551 |     which = match(c("type.measure", "nfolds", "foldid"), names(two.call), FALSE)
552 |     if (any(which))
553 |         two.call = two.call[-which]
554 |     two.call[[1]] = as.name("ordinis")
555 |     two.object = ordinis(x,
556 |                          y,
557 |                          lambda = lambda,
558 |                          gamma  = gamma, ...)
559 |     two.object$call = two.call
560 | 
561 | 
562 |     nz = two.object$nzero
563 | 
564 |     if (is.null(foldid))
565 |         foldid = sample(rep(seq(nfolds), length = N))
566 |     else nfolds = max(foldid)
567 |     if (nfolds < 3)
568 |         stop("nfolds must be bigger than 3; nfolds=10 recommended")
569 |     outlist = as.list(seq(nfolds))
570 |     if (parallel) {
571 |         outlist = foreach(i = seq(nfolds), .packages = c("ordinis")) %dopar%
572 |         {
573 |             which = foldid == i
574 |             if (is.matrix(y))
575 |                 y_sub = y[!which, ]
576 |             else y_sub = y[!which]
577 | 
578 |             ordinis(x[!which, , drop = FALSE],
579 |                     y_sub,
580 |                     lambda = lambda,
581 |                     gamma  = gamma,
582 |                     ...)
583 |         }
584 |     }
585 |     else {
586 |         for (i in seq(nfolds)) {
587 |             which = foldid == i
588 |             if (is.matrix(y))
589 |                 y_sub = y[!which, ]
590 |             else y_sub = y[!which]
591 | 
592 |             outlist[[i]] = ordinis(x[!which, , drop = FALSE],
593 |                                    y_sub,
594 |                                    lambda = lambda,
595 |                                    gamma  = gamma, ...)
596 |         }
597 |     }
598 | 
599 |     min.idx <- min(min(sapply(outlist, function(obj) obj$last)), two.object$last)
600 | 
601 |     two.object$beta        <- two.object$beta[, 1:min.idx, drop = FALSE]
602 |     two.object$niter       <- two.object$niter[1:min.idx]
603 |     two.object$lambda      <- two.object$lambda[1:min.idx]
604 |     two.object$losses      <- two.object$losses[1:min.idx]
605 |     two.object$losses.iter <- two.object$losses.iter[, 1:min.idx, drop = FALSE]
606 |     two.object$nzero       <- two.object$nzero[1:min.idx]
607 | 
608 |     nz <- two.object$nzero
609 | 
610 |     for (i in 1:length(outlist))
611 |     {
612 |         outlist[[i]]$beta        <- outlist[[i]]$beta[, 1:min.idx, drop = FALSE]
613 |         outlist[[i]]$niter       <- outlist[[i]]$niter[1:min.idx]
614 |         outlist[[i]]$lambda      <- outlist[[i]]$lambda[1:min.idx]
615 |         outlist[[i]]$losses      <- outlist[[i]]$losses[1:min.idx]
616 |         outlist[[i]]$losses.iter <- outlist[[i]]$losses.iter[, 1:min.idx, drop = FALSE]
617 |         outlist[[i]]$nzero       <- outlist[[i]]$nzero[1:min.idx]
618 |     }
619 | 
620 |     fun     <- paste("cv", class(two.object)[[2]], sep = ".")
621 |     cvstuff <- do.call(fun,
622 |                        list(outlist,
623 |                             two.object$lambda,
624 |                             x,
625 |                             y,
626 |                             foldid,
627 |                             type.measure,
628 |                             grouped,
629 |                             keep))
630 |     cvm     <- cvstuff$cvm
631 |     cvsd    <- cvstuff$cvsd
632 |     cvname  <- cvstuff$name
633 | 
634 |     out <- list(lambda      = two.object$lambda,
635 |                 cvm         = cvm,
636 |                 cvsd        = cvsd,
637 |                 cvup        = cvm + cvsd,
638 |                 cvlo        = cvm - cvsd,
639 |                 name        = cvname,
640 |                 nzero       = nz,
641 |                 ordinis.fit = two.object)
642 |     if(keep)out=c(out,list(fit.preval=cvstuff$fit.preval,foldid=foldid))
643 |     lamin=if(type.measure=="auc")getmin(two.object$lambda,-cvm,cvsd)
644 |     else getmin(two.object$lambda,cvm,cvsd)
645 |     obj=c(out,as.list(lamin))
646 |     class(obj)="cv.ordinis"
647 |     obj
648 | 
649 | }
650 | 
651 | 
652 | 
653 | 
654 | 
655 | 


--------------------------------------------------------------------------------
/src/CoordLogisticDense.h:
--------------------------------------------------------------------------------
  1 | #ifndef COORDLOGISTICDENSE_H
  2 | #define COORDLOGISTICDENSE_H
  3 | 
  4 | #include "CoordBase.h"
  5 | #include "utils.h"
  6 | 
  7 | // minimize  1/2 * ||y - X * beta||^2 + lambda * ||beta||_1
  8 | //
  9 | // In ADMM form,
 10 | //   minimize f(x) + g(z)
 11 | //   s.t. x - z = 0
 12 | //
 13 | // x => beta
 14 | // z => -X * beta
 15 | // A => X
 16 | // b => y
 17 | // f(x) => 1/2 * ||Ax - b||^2
 18 | // g(z) => lambda * ||z||_1
 19 | class CoordLogisticDense: public CoordBase<Eigen::SparseVector<double> > //Eigen::SparseVector<double>
 20 | {
 21 | protected:
 22 |     typedef float Scalar;
 23 |     typedef double Double;
 24 |     typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> Matrix;
 25 |     typedef Eigen::Matrix<double, Eigen::Dynamic, 1> Vector;
 26 |     typedef Eigen::Map<const Matrix> MapMat;
 27 |     typedef Eigen::Map<const Vector> MapVec;
 28 |     typedef const Eigen::Ref<const Matrix> ConstGenericMatrix;
 29 |     typedef const Eigen::Ref<const Vector> ConstGenericVector;
 30 |     typedef Eigen::SparseMatrix<double> SpMat;
 31 |     typedef Eigen::SparseVector<double> SparseVector;
 32 |     typedef Eigen::SparseVector<int> SparseVectori;
 33 | 
 34 |     typedef SparseVector::InnerIterator InIterVec;
 35 |     typedef SparseVectori::InnerIterator InIterVeci;
 36 | 
 37 |     MapMat datX;                  // data matrix
 38 |     MapVec datY;                  // response vector
 39 |     MapVec weights;               // weight vector
 40 |     MapVec offset;                // offset vector
 41 | 
 42 |     Scalar lambda, lambda_ridge, gamma;  // L1 penalty
 43 | 
 44 |     double threshval;
 45 |     VectorXd resid_cur, xbeta_cur, p, W, weightssqrt, z;
 46 | 
 47 |     std::string penalty;
 48 |     ArrayXd penalty_factor;       // penalty multiplication factors
 49 |     bool intercept;
 50 |     MapMat limits;
 51 |     double alpha;
 52 |     int maxit_irls;
 53 |     double tol_irls;
 54 |     int penalty_factor_size;
 55 | 
 56 |     VectorXd XY;                  // X'Y
 57 |     VectorXd Xsq;                 // colSums(X^2)
 58 |     VectorXd Xtr;                 // X'resid
 59 | 
 60 |     Scalar lambda0;               // minimum lambda to make coefficients all zero
 61 | 
 62 |     double lprev;
 63 | 
 64 |     double beta0; // intercept
 65 | 
 66 |     double weights_sum, resids_sum;
 67 | 
 68 |     // pointer we will set to one of the thresholding functions
 69 |     typedef double (*thresh_func_ptr)(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom);
 70 | 
 71 |     thresh_func_ptr thresh_func;
 72 | 
 73 |     /*
 74 |     static void soft_threshold(SparseVector &res, const Vector &vec, const double &penalty)
 75 |     {
 76 |         int v_size = vec.size();
 77 |         res.setZero();
 78 |         res.reserve(v_size);
 79 | 
 80 |         const double *ptr = vec.data();
 81 |         for(int i = 0; i < v_size; i++)
 82 |         {
 83 |             if(ptr[i] > penalty)
 84 |                 res.insertBack(i) = ptr[i] - penalty;
 85 |             else if(ptr[i] < -penalty)
 86 |                 res.insertBack(i) = ptr[i] + penalty;
 87 |         }
 88 |     }
 89 |     */
 90 | 
 91 |     bool converged_irls()
 92 |     {
 93 |         //return (stopRule(beta, beta_prev_irls, tol_irls));
 94 | 
 95 |         if (std::abs(deviance - deviance_prev) / (0.1 + std::abs(deviance)) < tol_irls)
 96 |         {
 97 |             return true;
 98 |         } else
 99 |         {
100 |             return false;
101 |         }
102 | 
103 |     }
104 | 
105 |     void initialize_params()
106 |     {
107 |         double ymean = (weights.array() * datY.array()).matrix().mean();
108 | 
109 |         if (intercept)
110 |         {
111 |             beta0 = std::log(ymean / (1.0 - ymean));
112 |         } else
113 |         {
114 |             beta0 = 0.0;
115 |         }
116 | 
117 |         xbeta_cur.array() = offset.array() + beta0;
118 | 
119 |         // calculate null deviance
120 |         null_dev = (-1.0 * datY.array() * log(ymean) - (1.0 - datY.array()) * std::log(1.0 - ymean)).sum();
121 |     }
122 | 
123 |     void update_quadratic_approx()
124 |     {
125 |         // calculate mean function
126 |         p = 1.0 / (1.0 + ((-1.0 * xbeta_cur.array()).exp()));
127 | 
128 |         // construct weights and multiply by user-specified weights
129 |         W = weights.array() * p.array() * (1.0 - p.array());
130 | 
131 |         // make sure no weights are too small
132 |         for (int k = 0; k < nobs; ++k)
133 |         {
134 |             if (W(k) < 1e-5)
135 |             {
136 |                 W(k) = 1e-5;
137 |             }
138 |         }
139 | 
140 |         // here we update the residuals and multiply by user-specified weights, which
141 |         // will be multiplied by X. ie X'resid_cur = X'Wz, where z is the working response from IRLS
142 |         resid_cur = weights.array() * (datY.array() - p.array()); // + xbeta_cur.array() * W.array().sqrt();
143 | 
144 |         //Xsq = (W.array().sqrt().matrix().asDiagonal() * datX).array().square().colwise().sum();
145 | 
146 |         // we will check this in later iterations
147 |         // and only update when Xsq(j) < 0
148 |         // (it's always positive, so this way we can check and avoid re-calculating unnecessarily)
149 |         Xsq.fill(-1.0);
150 | 
151 |         // this is needed for intercept updates
152 |         weights_sum = W.sum();
153 | 
154 |         // update deviance
155 |         deviance = 0.0;
156 |         for (int ii = 0; ii < nobs; ++ii)
157 |         {
158 |             if (datY(ii) == 1)
159 |             {
160 |                 if (p(ii) > 1e-5)
161 |                 {
162 |                     deviance -= std::log(p(ii));
163 |                 } else
164 |                 {
165 |                     // don't divide by zero
166 |                     deviance -= std::log(1e-5);
167 |                 }
168 | 
169 |             } else
170 |             {
171 |                 if (p(ii) <= 1.0 - 1e-5)
172 |                 {
173 |                     deviance -= std::log((1.0 - p(ii)));
174 |                 } else
175 |                 {
176 |                     // don't divide by zero
177 |                     deviance -= std::log(1.0 - 1e-5);
178 |                 }
179 | 
180 |             }
181 |         }
182 |     }
183 | 
184 |     void update_intercept()
185 |     {
186 | 
187 |         if (intercept)
188 |         {
189 |             resids_sum = (resid_cur).sum();
190 | 
191 |             double beta0_delta = resids_sum / weights_sum;
192 | 
193 |             beta0             += beta0_delta;
194 | 
195 |             // update the (weighted) working residual!
196 |             resid_cur.array() -= beta0_delta * W.array();
197 | 
198 |             // update the linear predictor!
199 |             xbeta_cur.array() += beta0_delta;
200 |         }
201 |     }
202 | 
203 |     double compute_loss()
204 |     {
205 |         //double sum_squares = 0.5 * (resid_cur.array().square().sum()) / double(nobs);
206 |         double sum_squares = 0.5 * (resid_cur.array().square().sum());
207 |         double penalty_part = 0.0;
208 | 
209 |         /*
210 |         if (penalty_factor_size < 1)
211 |         {
212 |             penalty_part = lambda * beta.array().abs().matrix().sum();
213 |         } else
214 |         {
215 |             penalty_part = lambda * (beta.array() * penalty_factor.array()).abs().matrix().sum();
216 |         }
217 |          */
218 | 
219 |         return (sum_squares + penalty_part);
220 |     }
221 | 
222 |     static double soft_threshold(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom)
223 |     {
224 | 
225 |         if (std::abs(value) <= penalty)
226 |             return(0.0);
227 |         else if (value > penalty)
228 |             return( (value - penalty) / (denom + denom * l2) );
229 |         else
230 |             return( (value + penalty) / (denom + denom * l2) );
231 | 
232 |         /* // this ordering is slower for high-dimensional problems
233 |         if(value > penalty)
234 |             return( (value - penalty) / (denom + l2) );
235 |         else if(value < -penalty)
236 |             return( (value + penalty) / (denom + l2) );
237 |         else
238 |             return(0.0);
239 |          */
240 |     }
241 | 
242 |     static double scad_threshold(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom)
243 |     {
244 |         double val_abs = std::abs(value);
245 | 
246 |         if (val_abs <= penalty)
247 |             return(0.0);
248 |         else if (val_abs <= penalty * (1.0 + l2) + penalty)
249 |         {
250 |             if(value > penalty)
251 |                 return((value - penalty) / (  denom + denom * l2 ));
252 |             else
253 |                 return((value + penalty) / (  denom + denom * l2 ));
254 |         } else if (val_abs <= gamma * penalty * (1.0 + l2))
255 |         {
256 |             if ((gamma - 1.0) * value > gamma * penalty)
257 |                 return( (value - gamma * penalty / (gamma - 1.0)) / (denom * ( 1.0 - 1.0 / (gamma - 1.0) + l2 )) );
258 |             else
259 |                 return( (value + gamma * penalty / (gamma - 1.0)) / (denom * ( 1.0 - 1.0 / (gamma - 1.0) + l2 )) );
260 |         } else
261 |         {
262 |             return(value / (denom + denom * l2));
263 |         }
264 | 
265 |     }
266 | 
267 |     static double mcp_threshold(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom)
268 |     {
269 |         double val_abs = std::abs(value);
270 | 
271 |         if (val_abs <= penalty)
272 |             return(0.0);
273 |         else if (val_abs <= gamma * penalty * (1.0 + l2))
274 |         {
275 |             if(value > penalty)
276 |                 return((value - penalty) / ( denom * (1.0 + l2 - 1.0 / gamma) ));
277 |             else
278 |                 return((value + penalty) / ( denom * (1.0 + l2 - 1.0 / gamma) ));
279 |         } else
280 |             return(value / (denom + denom * l2));
281 | 
282 |         /*
283 |         if (std::abs(value) > gamma * penalty * (1.0 + l2))
284 |             return(value / (denom + denom * l2));
285 |         else if(value > penalty)
286 |             return((value - penalty) / ( denom * (1.0 + l2 - 1.0 / gamma) ));
287 |         else if(value < -penalty)
288 |             return((value + penalty) / ( denom * (1.0 + l2 - 1.0 / gamma) ));
289 |         else
290 |             return(0.0);
291 |          */
292 |     }
293 | 
294 | 
295 |     void set_threshold_func()
296 |     {
297 |         if (penalty == "lasso")
298 |         {
299 |             thresh_func = &CoordLogisticDense::soft_threshold;
300 |         } else if (penalty == "mcp")
301 |         {
302 |             thresh_func = &CoordLogisticDense::mcp_threshold;
303 |         } else
304 |         {
305 |             thresh_func = &CoordLogisticDense::scad_threshold;
306 |         }
307 |     }
308 | 
309 |     //void next_beta(Vector &res, VectorXi &eligible)
310 |     void next_beta(SparseVector &res, SparseVectori &eligible)
311 |     {
312 | 
313 |         int j;
314 |         double grad;
315 | 
316 |         // now update intercept if necessary
317 |         update_intercept();
318 | 
319 |         // if no penalty multiplication factors specified
320 |         if (penalty_factor_size < 1)
321 |         {
322 |             for (InIterVeci i_(eligible); i_; ++i_)
323 |             {
324 |                 int j = i_.index();
325 |                 double beta_prev = beta.coeff( j ); //beta(j);
326 | 
327 |                 // surprisingly it's faster to calculate this on an iteration-basis
328 |                 // and not pre-calculate it within each newton iteration..
329 |                 if (Xsq(j) < 0) Xsq(j) = (datX.col(j).array().square() * W.array()).matrix().mean();
330 | 
331 |                 Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs);
332 | 
333 |                 grad = Xtr(j) + beta_prev * Xsq(j);
334 | 
335 |                 //grad = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j);
336 | 
337 |                 threshval = thresh_func(grad, lambda, gamma, lambda_ridge, Xsq(j));
338 | 
339 |                 //  apply param limits
340 |                 if (threshval < limits(1,j)) threshval = limits(1,j);
341 |                 if (threshval > limits(0,j)) threshval = limits(0,j);
342 | 
343 |                 // update both residual and linear predictor
344 |                 // if the coefficient changes after thresholding.
345 |                 if (beta_prev != threshval)
346 |                 {
347 | 
348 |                     if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev;
349 | 
350 |                     beta.coeffRef(j)    = threshval;
351 | 
352 |                     VectorXd delta_cur = (threshval - beta_prev) * datX.col(j);
353 | 
354 |                     xbeta_cur         += delta_cur;
355 |                     resid_cur.array() -= delta_cur.array() * W.array();
356 | 
357 |                     // update eligible set if necessary
358 |                     if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1;
359 |                     //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0;
360 |                     //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0;
361 |                 } else
362 |                 {
363 |                     // here we only remove a variable from the eligible set
364 |                     // if it's zero twice in a row
365 |                     if (beta_prev == 0.0 && eligible_set.coeff(j) == 1)
366 |                     {
367 |                         eligible_set.coeffRef(j) = 0;
368 |                     }
369 |                 }
370 |             }
371 |         } else //if penalty multiplication factors are used
372 |         {
373 |             for (InIterVeci i_(eligible); i_; ++i_)
374 |             {
375 |                 int j = i_.index();
376 |                 double beta_prev = beta.coeff( j ); //beta(j);
377 | 
378 |                 // surprisingly it's faster to calculate this on an iteration-basis
379 |                 // and not pre-calculate it within each newton iteration..
380 |                 if (Xsq(j) < 0) Xsq(j) = (datX.col(j).array().square() * W.array()).matrix().mean();
381 | 
382 |                 Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs);
383 | 
384 |                 grad = Xtr(j) + beta_prev * Xsq(j);
385 | 
386 |                 //grad = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j);
387 | 
388 |                 threshval = thresh_func(grad, penalty_factor(j) * lambda, gamma, penalty_factor(j) * lambda_ridge, Xsq(j));
389 | 
390 |                 //  apply param limits
391 |                 if (threshval < limits(1,j)) threshval = limits(1,j);
392 |                 if (threshval > limits(0,j)) threshval = limits(0,j);
393 | 
394 |                 // update both residual and linear predictor
395 |                 // if the coefficient changes after thresholding.
396 |                 if (beta_prev != threshval)
397 |                 {
398 | 
399 |                     if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev;
400 | 
401 |                     beta.coeffRef(j) = threshval;
402 | 
403 |                     VectorXd delta_cur = (threshval - beta_prev) * datX.col(j);
404 | 
405 |                     xbeta_cur         += delta_cur;
406 |                     resid_cur.array() -= delta_cur.array() * W.array();
407 | 
408 |                     // update eligible set if necessary
409 |                     if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1;
410 |                     //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0;
411 |                     //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0;
412 |                 } else
413 |                 {
414 |                     // here we only remove a variable from the eligible set
415 |                     // if it's zero twice in a row
416 |                     if (beta_prev == 0.0 && eligible_set.coeff(j) == 1)
417 |                     {
418 |                         eligible_set.coeffRef(j) = 0;
419 |                     }
420 |                 }
421 |             }
422 |         }
423 | 
424 |     }
425 | 
426 |     //void next_beta(Vector &res, VectorXi &eligible)
427 |     void next_beta(SparseVector &res, VectorXi &eligible)
428 |     {
429 | 
430 |         int j;
431 |         double grad;
432 | 
433 |         // now update intercept if necessary
434 |         update_intercept();
435 | 
436 | 
437 |         // if no penalty multiplication factors specified
438 |         if (penalty_factor_size < 1)
439 |         {
440 |             for (j = 0; j < nvars; ++j)
441 |             {
442 |                 if (eligible(j))
443 |                 {
444 |                     double beta_prev = beta.coeff( j ); //beta(j);
445 | 
446 |                     // surprisingly it's faster to calculate this on an iteration-basis
447 |                     // and not pre-calculate it within each newton iteration..
448 |                     if (Xsq(j) < 0) Xsq(j) = (datX.col(j).array().square() * W.array()).matrix().mean();
449 | 
450 |                     Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs);
451 | 
452 |                     grad = Xtr(j) + beta_prev * Xsq(j);
453 | 
454 |                     //grad = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j);
455 | 
456 |                     threshval = thresh_func(grad, lambda, gamma, lambda_ridge, Xsq(j));
457 | 
458 |                     //  apply param limits
459 |                     if (threshval < limits(1,j)) threshval = limits(1,j);
460 |                     if (threshval > limits(0,j)) threshval = limits(0,j);
461 | 
462 |                     // update both residual and linear predictor
463 |                     // if the coefficient changes after thresholding.
464 |                     if (beta_prev != threshval)
465 |                     {
466 | 
467 |                         if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev;
468 | 
469 |                         beta.coeffRef(j)    = threshval;
470 | 
471 |                         VectorXd delta_cur = (threshval - beta_prev) * datX.col(j);
472 | 
473 |                         xbeta_cur         += delta_cur;
474 |                         resid_cur.array() -= delta_cur.array() * W.array();
475 | 
476 |                         // update eligible set if necessary
477 |                         if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1;
478 |                         //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0;
479 |                         //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0;
480 |                     } else
481 |                     {
482 |                         // here we only remove a variable from the eligible set
483 |                         // if it's zero twice in a row
484 |                         if (beta_prev == 0.0 && eligible_set.coeff(j) == 1)
485 |                         {
486 |                             eligible_set.coeffRef(j) = 0;
487 |                         }
488 |                     }
489 |                 } // end eligible set check
490 |             }
491 |         } else //if penalty multiplication factors are used
492 |         {
493 |             for (j = 0; j < nvars; ++j)
494 |             {
495 |                 if (eligible(j))
496 |                 {
497 |                     double beta_prev = beta.coeff( j ); //beta(j);
498 | 
499 |                     // surprisingly it's faster to calculate this on an iteration-basis
500 |                     // and not pre-calculate it within each newton iteration..
501 |                     if (Xsq(j) < 0) Xsq(j) = (datX.col(j).array().square() * W.array()).matrix().mean();
502 | 
503 |                     Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs);
504 | 
505 |                     grad = Xtr(j) + beta_prev * Xsq(j);
506 | 
507 |                     //grad = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j);
508 | 
509 |                     threshval = thresh_func(grad, penalty_factor(j) * lambda, gamma, penalty_factor(j) * lambda_ridge, Xsq(j));
510 | 
511 |                     //  apply param limits
512 |                     if (threshval < limits(1,j)) threshval = limits(1,j);
513 |                     if (threshval > limits(0,j)) threshval = limits(0,j);
514 | 
515 |                     // update both residual and linear predictor
516 |                     // if the coefficient changes after thresholding.
517 |                     if (beta_prev != threshval)
518 |                     {
519 | 
520 |                         if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev;
521 | 
522 |                         beta.coeffRef(j) = threshval;
523 | 
524 |                         VectorXd delta_cur = (threshval - beta_prev) * datX.col(j);
525 | 
526 |                         xbeta_cur         += delta_cur;
527 |                         resid_cur.array() -= delta_cur.array() * W.array();
528 | 
529 |                         // update eligible set if necessary
530 |                         if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1;
531 |                         //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0;
532 |                         //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0;
533 |                     } else
534 |                     {
535 |                         // here we only remove a variable from the eligible set
536 |                         // if it's zero twice in a row
537 |                         if (beta_prev == 0.0 && eligible_set.coeff(j) == 1)
538 |                         {
539 |                             eligible_set.coeffRef(j) = 0;
540 |                         }
541 |                     }
542 |                 } // end eligible set check
543 |             }
544 |         }
545 | 
546 |     }
547 | 
548 | 
549 | 
550 |     // Calculate ||v1 - v2||^2 when v1 and v2 are sparse
551 |     static double diff_squared_norm(const SparseVector &v1, const SparseVector &v2)
552 |     {
553 |         const int n1 = v1.nonZeros(), n2 = v2.nonZeros();
554 |         const double *v1_val = v1.valuePtr(), *v2_val = v2.valuePtr();
555 |         const int *v1_ind = v1.innerIndexPtr(), *v2_ind = v2.innerIndexPtr();
556 | 
557 |         double r = 0.0;
558 |         int i1 = 0, i2 = 0;
559 |         while(i1 < n1 && i2 < n2)
560 |         {
561 |             if(v1_ind[i1] == v2_ind[i2])
562 |             {
563 |                 double val = v1_val[i1] - v2_val[i2];
564 |                 r += val * val;
565 |                 i1++;
566 |                 i2++;
567 |             } else if(v1_ind[i1] < v2_ind[i2]) {
568 |                 r += v1_val[i1] * v1_val[i1];
569 |                 i1++;
570 |             } else {
571 |                 r += v2_val[i2] * v2_val[i2];
572 |                 i2++;
573 |             }
574 |         }
575 |         while(i1 < n1)
576 |         {
577 |             r += v1_val[i1] * v1_val[i1];
578 |             i1++;
579 |         }
580 |         while(i2 < n2)
581 |         {
582 |             r += v2_val[i2] * v2_val[i2];
583 |             i2++;
584 |         }
585 | 
586 |         return r;
587 |     }
588 | 
589 | 
590 | public:
591 |     CoordLogisticDense(ConstGenericMatrix &datX_,
592 |                        ConstGenericVector &datY_,
593 |                        ConstGenericVector &weights_,
594 |                        ConstGenericVector &offset_,
595 |                        ArrayXd &penalty_factor_,
596 |                        ConstGenericMatrix &limits_,
597 |                        std::string &penalty_,
598 |                        bool intercept_,
599 |                        double alpha_      = 1.0,
600 |                        double tol_        = 1e-6,
601 |                        int    maxit_irls_ = 100,
602 |                        double tol_irls_   = 1e-6) :
603 |     CoordBase<Eigen::SparseVector<double> >
604 |                 (datX_.rows(), datX_.cols(), tol_),
605 |                                datX(datX_.data(), datX_.rows(), datX_.cols()),
606 |                                datY(datY_.data(), datY_.size()),
607 |                                weights(weights_.data(), weights_.size()),
608 |                                offset(offset_.data(), offset_.size()),
609 |                                resid_cur(datX_.rows()),
610 |                                xbeta_cur(datX_.rows()),
611 |                                p(datX_.rows()),
612 |                                W(datX_.rows()), weightssqrt(weights.array().sqrt()),
613 |                                z(datX_.rows()),
614 |                                penalty(penalty_),
615 |                                penalty_factor(penalty_factor_),
616 |                                intercept(intercept_),
617 |                                limits(limits_.data(), limits_.rows(), limits_.cols()),
618 |                                alpha(alpha_), maxit_irls(maxit_irls_), tol_irls(tol_irls_),
619 |                                penalty_factor_size(penalty_factor_.size()),
620 |                                XY(datX.transpose() * (datY.array() * weights.array()).matrix()),
621 |                                Xsq(datX_.cols()), Xtr(datX_.cols())
622 |     {}
623 | 
624 |     double get_lambda_zero()
625 |     {
626 |         if (penalty_factor_size > 0)
627 |         {
628 |             VectorXd XXtmp = datX.transpose().rowwise().sum();
629 |             lambda0 = 0;
630 |             for (int i = 0; i < penalty_factor.size(); ++i)
631 |             {
632 |                 if (penalty_factor(i) != 0.0)
633 |                 {
634 |                     double valcur = std::abs(XY(i)) / penalty_factor(i);
635 | 
636 |                     if (valcur > lambda0) lambda0 = valcur;
637 |                 }
638 |             }
639 |         } else
640 |         {
641 |             lambda0 = (XY).cwiseAbs().maxCoeff();
642 |         }
643 | 
644 |         lambda0 /= ( alpha * 1.0 * double(nobs)); //std::pow(1e-6, 1.0/(99.0));
645 | 
646 |         return lambda0;
647 |     }
648 | 
649 |     // init() is a cold start for the first lambda
650 |     void init(double lambda_, double gamma_)
651 |     {
652 | 
653 |         set_threshold_func();
654 | 
655 |         beta.setZero();
656 | 
657 |         lambda       = lambda_ * alpha;
658 |         lambda_ridge = lambda_ * (1.0 - alpha);
659 | 
660 |         gamma        = gamma_;
661 | 
662 |         eligible_set.setZero();
663 | 
664 |         eligible_set.reserve(std::min(nobs, nvars));
665 | 
666 |         nzero = 0;
667 | 
668 |         deviance = 0.0;
669 | 
670 |         xbeta_cur.setZero();
671 |         resid_cur.setZero();
672 | 
673 |         // this starts estimate of intercept
674 |         initialize_params();
675 | 
676 |         double cutoff;
677 | 
678 |         if (penalty == "lasso")
679 |         {
680 |             cutoff = 2.0 * lambda - lambda0;
681 |         } else if (penalty == "mcp")
682 |         {
683 |             cutoff = lambda + gamma / (gamma - 1.0) * (lambda - lambda0);
684 |         } else
685 |         {
686 |             cutoff = lambda + gamma / (gamma - 2.0) * (lambda - lambda0);
687 |         }
688 | 
689 | 
690 | 
691 |         if (penalty_factor_size < 1)
692 |         {
693 |             for (int j = 0; j < nvars; ++j) if (std::abs(XY(j)) > (cutoff)) eligible_set.coeffRef(j) = 1;
694 |         } else
695 |         {
696 |             for (int j = 0; j < nvars; ++j) if (std::abs(XY(j)) > (cutoff * penalty_factor(j))) eligible_set.coeffRef(j) = 1;
697 |         }
698 | 
699 |         //beta.reserve( std::max(eligible_set.sum() + 10, std::min(nvars, nobs)) );
700 | 
701 |     }
702 |     // when computing for the next lambda, we can use the
703 |     // current main_x, aux_z, dual_y and rho as initial values
704 |     void init_warm(double lambda_, double gamma_)
705 |     {
706 |         lprev        = lambda;
707 |         lambda       = lambda_ * alpha;
708 |         lambda_ridge = lambda_ * (1.0 - alpha);
709 | 
710 |         gamma        = gamma_;
711 | 
712 |         eligible_set.setZero();
713 | 
714 |         eligible_set.reserve(std::min(nobs, nvars));
715 | 
716 |         nzero = 0;
717 | 
718 |         deviance = 0.0;
719 | 
720 |         double cutoff;
721 | 
722 |         if (penalty == "lasso")
723 |         {
724 |             cutoff = 2.0 * lambda - lprev;
725 |         } else if (penalty == "mcp")
726 |         {
727 |             cutoff = lambda + gamma / (gamma - 1.0) * (lambda - lprev);
728 |         } else
729 |         {
730 |             cutoff = lambda + gamma / (gamma - 2.0) * (lambda - lprev);
731 |         }
732 | 
733 | 
734 |         if (penalty_factor_size < 1)
735 |         {
736 |             for (int j = 0; j < nvars; ++j) if (std::abs(Xtr(j)) >  (cutoff)) eligible_set.coeffRef(j) = 1;
737 |         } else
738 |         {
739 |             for (int j = 0; j < nvars; ++j) if (std::abs(Xtr(j)) > cutoff * penalty_factor(j)) eligible_set.coeffRef(j) = 1;
740 |         }
741 | 
742 | 
743 |         //beta.reserve( std::max(eligible_set.sum() + 10, std::min(nvars, nobs)) );
744 |     }
745 | 
746 | 
747 |     int solve(int maxit)
748 |     {
749 |         //int i;
750 |         int irls_iter = 0;
751 | 
752 |         while(irls_iter < maxit_irls)
753 |         {
754 |             irls_iter++;
755 | 
756 |             beta_prev_irls = beta;
757 |             deviance_prev = deviance;
758 | 
759 |             //xbeta_cur.array() = (datX * beta).array() + beta0; //this is efficient because beta is a sparse vector
760 | 
761 |             update_quadratic_approx();
762 | 
763 |             int current_iter = 0;
764 | 
765 |             // run once through all variables
766 |             //current_iter++;
767 |             beta_prev = beta;
768 |             ineligible_set.fill(1);
769 | 
770 |             //update_beta(ineligible_set);
771 | 
772 |             while(current_iter < maxit)
773 |             {
774 |                 while(current_iter < maxit)
775 |                 {
776 |                     current_iter++;
777 |                     beta_prev = beta;
778 | 
779 |                     if (current_iter % 5 == 0) update_quadratic_approx();
780 | 
781 |                     update_beta(eligible_set);
782 | 
783 |                     if(converged()) break;
784 |                 }
785 | 
786 |                 current_iter++;
787 |                 beta_prev = beta;
788 |                 ineligible_set.fill(1);
789 | 
790 |                 for (InIterVeci i_(eligible_set); i_; ++i_)
791 |                 {
792 |                     ineligible_set(i_.index()) = 0;
793 |                 }
794 | 
795 |                 update_quadratic_approx();
796 | 
797 |                 update_beta(ineligible_set);
798 | 
799 |                 if(converged()) break;
800 |             } //end coordinate descent loop
801 | 
802 |             if(converged_irls()) break;
803 | 
804 |         } //end irls loop
805 | 
806 | 
807 |         /*
808 |         for (int j = 0; j < nvars; ++j)
809 |         {
810 |             if (beta(j) != 0)
811 |                 ++nzero;
812 |         }
813 |          */
814 | 
815 |         // force zeros to be actual zeros
816 |         beta.prune(0.0);
817 | 
818 |         nzero = beta.nonZeros();
819 | 
820 | 
821 | 
822 |         loss = compute_loss();
823 | 
824 |         // print_footer();
825 | 
826 |         return irls_iter;
827 |     }
828 | 
829 |     virtual double get_intercept() { return beta0; }
830 | };
831 | 
832 | 
833 | 
834 | #endif // COORDGAUSSIANDENSE_H
835 | 


--------------------------------------------------------------------------------