├── vignettes ├── unnamed-chunk-1-1.png └── usage_of_the_ordinis_package.Rmd ├── .gitignore ├── R ├── ordinis.R ├── RcppExports.R ├── utils.R ├── ordinis_methods.R └── ordinis_call.R ├── .Rbuildignore ├── src ├── Makevars ├── Makevars.win ├── utils.h ├── CoordBase.h ├── utils.cpp ├── RcppExports.cpp ├── ordinis_dense.cpp ├── DataStd.h ├── ordinis_dense_glm.cpp ├── CoordGaussianDense.h └── CoordLogisticDense.h ├── ordinis.Rproj ├── DESCRIPTION ├── NAMESPACE ├── man ├── logLik.Rd ├── predict.cv.ordinis.Rd ├── predict.ordinis.Rd ├── plot.Rd ├── cv.ordinis.Rd └── ordinis.Rd ├── README.Rmd └── README.md /vignettes/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaredhuling/ordinis/HEAD/vignettes/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | .Rapp.history 6 | src/*.o 7 | src/*.so 8 | src/*.dll 9 | inst/doc 10 | .DS_Store 11 | -------------------------------------------------------------------------------- /R/ordinis.R: -------------------------------------------------------------------------------- 1 | #' @importFrom grDevices rainbow 2 | #' @importFrom graphics mtext par strwidth title 3 | #' @import Matrix 4 | #' @import Rcpp 5 | #' @import foreach 6 | #' @useDynLib ordinis, .registration = TRUE 7 | NULL 8 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^.*\.Rhistory$ 4 | ^cran-comments.md$ 5 | ^NEWS.md$ 6 | README.Rmd 7 | README.html 8 | ^\.travis\.yml$ 9 | ^/\.gitattributes$ 10 | ^.*\docs$ 11 | ^\_pkgdown\.yml$ 12 | docs 13 | ^_pkgdown\.yml$ 14 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | ## Use the R_HOME indirection to support installations of multiple R version 2 | 3 | ## KEEP 4 | #PKG_CPPFLAGS = -I${R_HOME}/library/Rcpp/include \ 5 | # -I${R_HOME}/library/RcppEigen/include -I. -DNDEBUG 6 | 7 | 8 | PKG_CXXFLAGS = -DNDEBUG 9 | PKG_LIBS = `$(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()"` $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 10 | 11 | CXX_STD = CXX11 12 | -------------------------------------------------------------------------------- /ordinis.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | 2 | ## OLD 3 | #PKG_CPPFLAGS = -I${R_HOME}/library/RcppEigen/include \ 4 | #-I${R_HOME}/library/Rcpp/include -I. -DNDEBUG 5 | 6 | #PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 7 | ## old 8 | 9 | 10 | #PKG_LIBS = $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" --vanilla -e "Rcpp:::LdFlags()") $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 11 | PKG_LIBS = $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" --vanilla -e "Rcpp:::LdFlags()") $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 12 | 13 | 14 | PKG_CPPFLAGS = -I. -DNDEBUG 15 | 16 | CXX_STD = CXX11 17 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: ordinis 2 | Type: Package 3 | Title: Fits penalized regression models with coordinate descent 4 | Version: 0.1.0 5 | Authors@R: c( 6 | person("Jared", "Huling", role = c("aut", "cre"), email = "jaredhuling@gmail.com") 7 | ) 8 | Description: More about what it does (maybe more than one line) 9 | Use four spaces when indenting paragraphs within the Description. 10 | License: GPL-2 11 | Depends: 12 | methods, 13 | Matrix 14 | Encoding: UTF-8 15 | LazyData: true 16 | Imports: 17 | Rcpp (>= 0.12.16), 18 | foreach 19 | LinkingTo: 20 | Rcpp, 21 | RcppEigen 22 | RoxygenNote: 6.1.0 23 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | coord_ordinis_dense_cpp <- function(x, y, weights, lambda, penalty_factor, limits, nlambda, lmin_ratio, standardize, intercept, opts) { 5 | .Call(`_ordinis_coord_ordinis_dense_cpp`, x, y, weights, lambda, penalty_factor, limits, nlambda, lmin_ratio, standardize, intercept, opts) 6 | } 7 | 8 | coord_ordinis_dense_glm_cpp <- function(x, y, weights, offset, lambda, penalty_factor, limits, nlambda, lmin_ratio, standardize, intercept, glm_fam, opts) { 9 | .Call(`_ordinis_coord_ordinis_dense_glm_cpp`, x, y, weights, offset, lambda, penalty_factor, limits, nlambda, lmin_ratio, standardize, intercept, glm_fam, opts) 10 | } 11 | 12 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _ordinis_UTILS_H 2 | #define _ordinis_UTILS_H 3 | 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | 15 | using Eigen::MatrixXd; 16 | using Eigen::ArrayXd; 17 | using Eigen::VectorXd; 18 | using Eigen::VectorXi; 19 | 20 | typedef Eigen::SparseVector SparseVector; 21 | 22 | double threshold(double num); 23 | 24 | VectorXd cumsum(const VectorXd& x); 25 | 26 | VectorXd cumsumrev(const VectorXd& x); 27 | 28 | 29 | bool stopRule(const VectorXd& cur, const VectorXd& prev, const double& tolerance); 30 | 31 | bool stopRule(const SparseVector& cur, const SparseVector& prev, const double& tolerance); 32 | 33 | bool stopRule(SparseVector& cur, SparseVector& prev, const double& tolerance); 34 | 35 | 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(logLik,ordinis) 4 | S3method(plot,cv.ordinis) 5 | S3method(plot,ordinis) 6 | S3method(predict,cdbinomial) 7 | S3method(predict,cdgaussian) 8 | S3method(predict,cv.ordinis) 9 | S3method(predict,ordinis) 10 | export(cv.ordinis) 11 | export(ordinis) 12 | import(Matrix) 13 | import(Rcpp) 14 | import(foreach) 15 | importFrom(grDevices,rainbow) 16 | importFrom(graphics,abline) 17 | importFrom(graphics,axis) 18 | importFrom(graphics,matplot) 19 | importFrom(graphics,mtext) 20 | importFrom(graphics,par) 21 | importFrom(graphics,points) 22 | importFrom(graphics,segments) 23 | importFrom(graphics,strwidth) 24 | importFrom(graphics,title) 25 | importFrom(methods,as) 26 | importFrom(stats,approx) 27 | importFrom(stats,predict) 28 | importFrom(stats,quantile) 29 | importFrom(stats,runif) 30 | importFrom(stats,weighted.mean) 31 | useDynLib(ordinis, .registration = TRUE) 32 | -------------------------------------------------------------------------------- /man/logLik.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ordinis_methods.R 3 | \name{logLik.ordinis} 4 | \alias{logLik.ordinis} 5 | \title{log likelihood function for fitted ordinis objects} 6 | \usage{ 7 | \method{logLik}{ordinis}(object, REML = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{object}{fitted "ordinis" model object.} 11 | 12 | \item{REML}{an optional logical value. If \code{TRUE} the 13 | restricted log-likelihood is returned, else, if \code{FALSE}, 14 | the log-likelihood is returned. Defaults to \code{FALSE}.} 15 | 16 | \item{...}{not used} 17 | } 18 | \description{ 19 | log likelihood function for fitted ordinis objects 20 | } 21 | \examples{ 22 | set.seed(123) 23 | n.obs <- 200 24 | n.vars <- 500 25 | 26 | true.beta <- c(runif(15, -0.25, 0.25), rep(0, n.vars - 15)) 27 | x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars) 28 | y <- rnorm(n.obs, sd = 3) + x \%*\% true.beta 29 | 30 | fit <- ordinis(x = x, y = y) 31 | 32 | logLik(fit) 33 | 34 | 35 | } 36 | -------------------------------------------------------------------------------- /man/predict.cv.ordinis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ordinis_methods.R 3 | \name{predict.cv.ordinis} 4 | \alias{predict.cv.ordinis} 5 | \title{Prediction function for fitted cross validation ordinis objects} 6 | \usage{ 7 | \method{predict}{cv.ordinis}(object, newx, s = c("lambda.min", 8 | "lambda.1se"), ...) 9 | } 10 | \arguments{ 11 | \item{object}{fitted \code{"cv.ordinis"} model object} 12 | 13 | \item{newx}{Matrix of new values for \code{x} at which predictions are to be made. Must be a matrix; can be sparse as in the 14 | \code{CsparseMatrix} objects of the \pkg{Matrix} package 15 | This argument is not used for \code{type = c("coefficients","nonzero")}} 16 | 17 | \item{s}{Value(s) of the penalty parameter lambda at which predictions are required. Default is the entire sequence used to create 18 | the model. For \code{predict.cv.ordinis()}, can also specify \code{"lambda.1se"} or \code{"lambda.min"} for best lambdas estimated by cross validation} 19 | 20 | \item{...}{used to pass the other arguments for predict.ordinis} 21 | } 22 | \value{ 23 | An object depending on the type argument 24 | } 25 | \description{ 26 | Prediction function for fitted cross validation ordinis objects 27 | } 28 | \examples{ 29 | set.seed(123) 30 | n.obs <- 1e4 31 | n.vars <- 100 32 | n.obs.test <- 1e3 33 | 34 | true.beta <- c(runif(15, -0.5, 0.5), rep(0, n.vars - 15)) 35 | 36 | x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars) 37 | y <- rnorm(n.obs, sd = 3) + x \%*\% true.beta 38 | x.test <- matrix(rnorm(n.obs.test * n.vars), n.obs.test, n.vars) 39 | y.test <- rnorm(n.obs.test, sd = 3) + x.test \%*\% true.beta 40 | 41 | fit <- cv.ordinis(x = x, y = y, 42 | gamma = 1.4, 43 | nlambda = 10) 44 | 45 | 46 | preds.best <- predict(fit, newx = x.test, type = "response") 47 | 48 | apply(preds.best, 2, function(x) mean((y.test - x) ^ 2)) 49 | 50 | } 51 | -------------------------------------------------------------------------------- /man/predict.ordinis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ordinis_methods.R 3 | \name{predict.ordinis} 4 | \alias{predict.ordinis} 5 | \title{Prediction method for coord lasso fitted objects} 6 | \usage{ 7 | \method{predict}{ordinis}(object, newx, s = NULL, type = c("link", 8 | "response", "coefficients", "nonzero", "class"), ...) 9 | } 10 | \arguments{ 11 | \item{object}{fitted "ordinis" model object} 12 | 13 | \item{newx}{Matrix of new values for \code{x} at which predictions are to be made. Must be a matrix; can be sparse as in the 14 | \code{CsparseMatrix} objects of the \pkg{Matrix} package. 15 | This argument is not used for \code{type=c("coefficients","nonzero")}} 16 | 17 | \item{s}{Value(s) of the penalty parameter lambda at which predictions are required. Default is the entire sequence used to create 18 | the model.} 19 | 20 | \item{type}{Type of prediction required. \code{type = "link"} gives the linear predictors for the \code{"binomial"} model; for \code{"gaussian"} models it gives the fitted values. 21 | \code{type = "response"} gives the fitted probabilities for \code{"binomial"}. \code{type = "coefficients"} computes the coefficients at the requested values for \code{s}. 22 | \code{type = "class"} applies only to \code{"binomial"} and produces the class label corresponding to the maximum probability.} 23 | 24 | \item{...}{not used} 25 | } 26 | \value{ 27 | An object depending on the type argument 28 | } 29 | \description{ 30 | Prediction method for coord lasso fitted objects 31 | } 32 | \examples{ 33 | set.seed(123) 34 | n.obs <- 1e4 35 | n.vars <- 100 36 | n.obs.test <- 1e3 37 | 38 | true.beta <- c(runif(15, -0.5, 0.5), rep(0, n.vars - 15)) 39 | 40 | x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars) 41 | y <- rnorm(n.obs, sd = 3) + x \%*\% true.beta 42 | x.test <- matrix(rnorm(n.obs.test * n.vars), n.obs.test, n.vars) 43 | y.test <- rnorm(n.obs.test, sd = 3) + x.test \%*\% true.beta 44 | 45 | fit <- ordinis(x = x, y = y, nlambda = 10) 46 | 47 | preds.lasso <- predict(fit, newx = x.test, type = "response") 48 | 49 | apply(preds.lasso, 2, function(x) mean((y.test - x) ^ 2)) 50 | 51 | } 52 | -------------------------------------------------------------------------------- /man/plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ordinis_methods.R 3 | \name{plot.ordinis} 4 | \alias{plot.ordinis} 5 | \alias{plot.cv.ordinis} 6 | \title{Plot method for ordinis fitted objects} 7 | \usage{ 8 | \method{plot}{ordinis}(x, xvar = c("loglambda", "lambda", "norm"), 9 | labsize = 0.6, xlab = iname, ylab = NULL, main = x$penalty, 10 | xlim = NULL, n.print = 10L, ...) 11 | 12 | \method{plot}{cv.ordinis}(x, sign.lambda = 1, ...) 13 | } 14 | \arguments{ 15 | \item{x}{fitted "ordinis" model object or fitted "cv.ordinis" model object} 16 | 17 | \item{xvar}{What is on the X-axis. \code{"penalty"} plots against the penalty value applied to the coefficients, \code{"lambda"} against the log-lambda sequence} 18 | 19 | \item{labsize}{size of labels for variable names. If labsize = 0, then no variable names will be plotted} 20 | 21 | \item{xlab}{label for x-axis} 22 | 23 | \item{ylab}{label for y-axis} 24 | 25 | \item{main}{main title for plot} 26 | 27 | \item{xlim}{numeric vectors of length 2, giving the \code{x} and \code{y} coordinates ranges.} 28 | 29 | \item{n.print}{scalar integer for the number of times along the regularization path to print the number 30 | of nonzero coefficients. If set to a negative value, the number of nonzero coefficients will not be printed.} 31 | 32 | \item{...}{other graphical parameters for the plot} 33 | 34 | \item{sign.lambda}{Either plot against log(lambda) (default) or its negative if \code{sign.lambda = -1}.} 35 | } 36 | \description{ 37 | Plot method for ordinis fitted objects 38 | 39 | Plot method for fitted two mountains cv objects 40 | } 41 | \examples{ 42 | set.seed(123) 43 | n.obs <- 100 44 | n.vars <- 1000 45 | 46 | true.beta <- c(runif(5, 0.1, 1) * (2 * rbinom(5, 1, 0.5) - 1), rep(0, n.vars - 5)) 47 | 48 | x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars) 49 | y <- rnorm(n.obs, sd = 2) + x \%*\% true.beta 50 | 51 | fit <- ordinis(x = x, y = y, penalty = c("mcp")) 52 | 53 | plot(fit) 54 | 55 | set.seed(123) 56 | n.obs <- 100 57 | n.vars <- 200 58 | 59 | true.beta <- c(runif(15, -0.5, 0.5), rep(0, n.vars - 15)) 60 | 61 | x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars) 62 | y <- rnorm(n.obs, sd = 3) + x \%*\% true.beta 63 | 64 | fit <- cv.ordinis(x = x, y = y, gamma = 1.4) 65 | 66 | plot(fit) 67 | 68 | } 69 | -------------------------------------------------------------------------------- /vignettes/usage_of_the_ordinis_package.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Usage of the ordinis Package" 3 | author: "Jared Huling" 4 | date: "`r Sys.Date()`" 5 | output: 6 | rmarkdown::html_vignette: 7 | fig_width: 7 8 | fig_height: 5 9 | toc: true 10 | toc_depth: 3 11 | number_sections: true 12 | self_contained: true 13 | vignette: > 14 | %\VignetteIndexEntry{Usage of the ordinis Package} 15 | %\VignetteEngine{knitr::rmarkdown} 16 | --- 17 | 18 | 19 | 20 | # Introduction to `ordinis` 21 | 22 | 23 | ## Fitting penalized regression models 24 | 25 | 26 | ```{r fit_model} 27 | 28 | library(ordinis) 29 | 30 | 31 | set.seed(1) 32 | n <- 100 33 | p <- 1000 34 | m <- 10 35 | b <- matrix(c(runif(m, min = -1), rep(0, p - m))) 36 | x <- matrix(rnorm(n * p, sd = 3), n, p) 37 | y <- drop(x %*% b) + rnorm(n) 38 | 39 | # fitting a linear model with an MCP penalty 40 | mod <- ordinis(x, y, 41 | penalty = "mcp", 42 | gamma = 1.4) # additional tuning param for MCP 43 | 44 | plot(mod) 45 | 46 | # now applying positivity constraints to parameters and adding a ridge penalty 47 | mod2 <- ordinis(x, y, 48 | penalty = "mcp", 49 | gamma = 1.4, # additional tuning param for MCP 50 | lower.limits = rep(0, p), # force all coefficients to be positive 51 | penalty.factor = c(0, 0, rep(1, p-2)), # don't penalize first two coefficients 52 | alpha = 0.5) # use elastic net with alpha = 0.95 53 | 54 | plot(mod2) 55 | 56 | # use cross validation to select lambda tuning parameter 57 | cvmod <- cv.ordinis(x, y, penalty = "mcp", gamma = 1.4) 58 | 59 | plot(cvmod) 60 | 61 | # return coefficients with min cv-MSE 62 | coef <- predict(cvmod, type = "coef", s = "lambda.min") 63 | 64 | # use cross validation to select lambda tuning parameter 65 | cvmodl1 <- cv.ordinis(x, y, penalty = "lasso") 66 | 67 | # return coefficients with min cv-MSE 68 | coefl1 <- predict(cvmodl1, type = "coef", s = "lambda.min") 69 | 70 | plot(cvmodl1) 71 | 72 | # number selected by MCP 73 | sum(coef[-1] != 0) 74 | 75 | # number selected by lasso 76 | sum(coefl1[-1] != 0) 77 | 78 | # MCP 79 | round(coef[2:11], 3) 80 | 81 | # truth 82 | round(b[1:10], 3) 83 | 84 | # lasso 85 | round(coefl1[2:11], 3) 86 | ``` 87 | -------------------------------------------------------------------------------- /man/cv.ordinis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ordinis_call.R 3 | \name{cv.ordinis} 4 | \alias{cv.ordinis} 5 | \title{CV Fitting for A Lasso Model Using the Coordinate Descent Algorithm} 6 | \usage{ 7 | cv.ordinis(x, y, lambda = numeric(0), gamma = 3.7, 8 | type.measure = c("mse", "deviance", "class", "auc", "mae"), 9 | nfolds = 10, foldid = NULL, grouped = TRUE, keep = FALSE, 10 | parallel = FALSE, ...) 11 | } 12 | \arguments{ 13 | \item{x}{The design matrix} 14 | 15 | \item{y}{The response vector} 16 | 17 | \item{lambda}{A user provided sequence of \eqn{\lambda}. If set to 18 | \code{NULL}, the program will calculate its own sequence 19 | according to \code{nlambda} and \code{lambda_min_ratio}, 20 | which starts from \eqn{\lambda_0} (with this 21 | \eqn{\lambda} all coefficients will be zero) and ends at 22 | \code{lambda0 * lambda_min_ratio}, containing 23 | \code{nlambda} values equally spaced in the log scale. 24 | It is recommended to set this parameter to be \code{NULL} 25 | (the default).} 26 | 27 | \item{gamma}{bandwidth for MCP/SCAD} 28 | 29 | \item{type.measure}{measure to evaluate for cross-validation. The default is \code{type.measure = "deviance"}, 30 | which uses squared-error for gaussian models (a.k.a \code{type.measure = "mse"} there), deviance for logistic 31 | regression. \code{type.measure = "class"} applies to binomial only. \code{type.measure = "auc"} is for two-class logistic 32 | regression only. \code{type.measure = "mse"} or \code{type.measure = "mae"} (mean absolute error) can be used by all models; 33 | they measure the deviation from the fitted mean to the response.} 34 | 35 | \item{nfolds}{number of folds for cross-validation. default is 10. 3 is smallest value allowed.} 36 | 37 | \item{foldid}{an optional vector of values between 1 and nfold specifying which fold each observation belongs to.} 38 | 39 | \item{grouped}{Like in \pkg{glmnet}, this is an experimental argument, with default \code{TRUE}, and can be ignored by most users. 40 | For all models, this refers to computing nfolds separate statistics, and then using their mean and estimated standard 41 | error to describe the CV curve. If \code{grouped = FALSE}, an error matrix is built up at the observation level from the 42 | predictions from the \code{nfold} fits, and then summarized (does not apply to \code{type.measure = "auc"}).} 43 | 44 | \item{keep}{If \code{keep = TRUE}, a prevalidated list of arrasy is returned containing fitted values for each observation 45 | and each value of lambda for each model. This means these fits are computed with this observation and the rest of its 46 | fold omitted. The folid vector is also returned. Default is \code{keep = FALSE}} 47 | 48 | \item{parallel}{If TRUE, use parallel foreach to fit each fold. Must register parallel before hand, such as \pkg{doMC}.} 49 | 50 | \item{...}{other parameters to be passed to \code{"ordinis"} function} 51 | } 52 | \description{ 53 | Cross validation for linear models with the lasso penalty 54 | 55 | where \eqn{n} is the sample size and \eqn{\lambda} is a tuning 56 | parameter that controls the sparsity of \eqn{\beta}. 57 | } 58 | \examples{ 59 | set.seed(123) 60 | n = 100 61 | p = 1000 62 | b = c(runif(10, min = 0.2, max = 1), rep(0, p - 10)) 63 | x = matrix(rnorm(n * p, sd = 3), n, p) 64 | y = drop(x \%*\% b) + rnorm(n) 65 | 66 | ## fit lasso model with 100 tuning parameter values 67 | res <- cv.ordinis(x, y) 68 | 69 | 70 | } 71 | -------------------------------------------------------------------------------- /src/CoordBase.h: -------------------------------------------------------------------------------- 1 | #ifndef COORDBASE_H 2 | #define COORDBASE_H 3 | 4 | #include 5 | #include "utils.h" 6 | 7 | 8 | template 9 | class CoordBase 10 | { 11 | protected: 12 | 13 | typedef Eigen::SparseVector SparseVectori; 14 | 15 | const int nvars; // dimension of beta 16 | const int nobs; // number of rows 17 | 18 | VecTypeBeta beta; // parameters to be optimized 19 | VecTypeBeta beta_prev; // auxiliary parameters 20 | VecTypeBeta beta_prev_irls; // auxiliary parameters 21 | SparseVectori eligible_set; 22 | VectorXi ineligible_set; 23 | double loss; 24 | 25 | double tol; // tolerance for convergence 26 | 27 | int nzero; 28 | 29 | double deviance, deviance_prev, null_dev; 30 | 31 | //virtual void next_beta(VecTypeBeta &res, VectorXi &eligible) = 0; 32 | virtual void next_beta(VecTypeBeta &res, SparseVectori &eligible) = 0; 33 | virtual void next_beta(VecTypeBeta &res, VectorXi &eligible) = 0; 34 | 35 | virtual bool converged() 36 | { 37 | return (stopRule(beta, beta_prev, tol)); 38 | } 39 | 40 | bool converged_irls() 41 | { 42 | //return (stopRule(beta, beta_prev_irls, tol)); 43 | if (std::abs(deviance - deviance_prev) / (0.1 + std::abs(deviance)) < tol) 44 | { 45 | return true; 46 | } else 47 | { 48 | return false; 49 | } 50 | } 51 | 52 | 53 | void print_row(int iter) 54 | { 55 | const char sep = ' '; 56 | 57 | Rcpp::Rcout << std::left << std::setw(7) << std::setfill(sep) << iter; 58 | Rcpp::Rcout << std::endl; 59 | } 60 | void print_footer() 61 | { 62 | const int width = 80; 63 | Rcpp::Rcout << std::string(width, '=') << std::endl << std::endl; 64 | } 65 | 66 | public: 67 | CoordBase(int n_, int p_, 68 | double tol_ = 1e-6) : 69 | nvars(p_), nobs(n_), 70 | beta(p_), beta_prev(p_), beta_prev_irls(p_), // allocate space but do not set values 71 | eligible_set(p_), ineligible_set(p_), 72 | tol(tol_) 73 | {} 74 | 75 | virtual ~CoordBase() {} 76 | 77 | void update_beta(SparseVectori &eligible) 78 | { 79 | //VecTypeBeta newbeta(nvars); 80 | next_beta(beta, eligible); 81 | //beta.swap(newbeta); 82 | } 83 | 84 | void update_beta(VectorXi &eligible) 85 | { 86 | //VecTypeBeta newbeta(nvars); 87 | next_beta(beta, eligible); 88 | //beta.swap(newbeta); 89 | } 90 | 91 | int solve(int maxit) 92 | { 93 | int i; 94 | 95 | nzero = 0; 96 | 97 | for(i = 0; i < maxit; ++i) 98 | { 99 | beta_prev = beta; 100 | // old_y = dual_y; 101 | //std::copy(dual_y.data(), dual_y.data() + dim_dual, old_y.data()); 102 | 103 | update_beta(eligible_set); 104 | 105 | // print_row(i); 106 | 107 | if(converged()) 108 | break; 109 | 110 | } 111 | 112 | // print_footer(); 113 | 114 | return i + 1; 115 | } 116 | 117 | virtual VecTypeBeta get_beta() { return beta; } 118 | virtual int get_nzero() { return nzero; } 119 | double get_intercept() { return 0.0; } 120 | double get_null_dev() { return null_dev; } 121 | double get_dev() { return deviance; } 122 | virtual double get_loss() { return loss; } 123 | }; 124 | 125 | 126 | 127 | #endif // COORDBASE_H 128 | 129 | -------------------------------------------------------------------------------- /src/utils.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "utils.h" 4 | 5 | double threshold(double num) 6 | { 7 | return num > 0 ? num : 0; 8 | } 9 | 10 | // computes cumulative sum of vector x 11 | VectorXd cumsum(const VectorXd& x) { 12 | const int n(x.size()); 13 | VectorXd cmsm(n); 14 | //cmsm = std::partial_sum(x.data(), x.data() + x.size(), cmsm.data(), std::plus()); 15 | cmsm(0) = x(0); 16 | 17 | for (int i = 1; i < n; i++) { 18 | cmsm(i) = cmsm(i-1) + x(i); 19 | } 20 | return (cmsm); 21 | } 22 | 23 | // computes reverse cumulative sum of vector x 24 | VectorXd cumsumrev(const VectorXd& x) { 25 | const int n(x.size()); 26 | VectorXd cmsm(n); 27 | //std::reverse(x.data(), x.data() + x.size()); 28 | //cmsm = std::partial_sum(x.data(), x.data() + x.size(), cmsm.data(), std::plus()); 29 | cmsm(0) = x(n-1); 30 | //double tmpsum = 0; 31 | 32 | for (int i = 1; i < n; i++) { 33 | //tmpsum += cmsm(i-1); 34 | cmsm(i) = cmsm(i-1) + x(n-i-1); 35 | } 36 | std::reverse(cmsm.data(), cmsm.data() + cmsm.size()); 37 | return (cmsm); 38 | } 39 | 40 | 41 | 42 | 43 | bool stopRule(const VectorXd& cur, const VectorXd& prev, const double& tolerance) { 44 | for (unsigned i = 0; i < cur.rows(); i++) { 45 | if ( (cur(i) != 0 && prev(i) == 0) || (cur(i) == 0 && prev(i) != 0) ) { 46 | return 0; 47 | } 48 | if (cur(i) != 0 && prev(i) != 0 && 49 | std::abs( (cur(i) - prev(i)) / prev(i)) > tolerance) { 50 | return 0; 51 | } 52 | } 53 | return 1; 54 | } 55 | 56 | bool stopRule(const SparseVector& cur, const SparseVector& prev, const double& tolerance) 57 | { 58 | 59 | /* 60 | for (unsigned i = 0; i < cur.size(); i++) { 61 | if ( (cur.coeff(i) != 0 && prev.coeff(i) == 0) || (cur.coeff(i) == 0 && prev.coeff(i) != 0) ) { 62 | return 0; 63 | } 64 | if (cur.coeff(i) != 0 && prev.coeff(i) != 0 && 65 | std::abs( (cur.coeff(i) - prev.coeff(i)) / prev.coeff(i)) > tolerance) { 66 | return 0; 67 | } 68 | } 69 | */ 70 | 71 | const int n1 = cur.nonZeros(), n2 = prev.nonZeros(); 72 | const double *v1_val = cur.valuePtr(), *v2_val = prev.valuePtr(); 73 | const int *v1_ind = cur.innerIndexPtr(), *v2_ind = prev.innerIndexPtr(); 74 | 75 | double eps = 1e-4; 76 | int i1 = 0, i2 = 0; 77 | while(i1 < n1 && i2 < n2) 78 | { 79 | if(v1_ind[i1] == v2_ind[i2]) 80 | { 81 | if (std::abs( (v1_val[i1] - v2_val[i2]) / (std::abs(v2_val[i2]) + eps) ) > tolerance) 82 | { 83 | return 0; 84 | } else 85 | { 86 | i1++; 87 | i2++; 88 | } 89 | } else 90 | { 91 | return 0; 92 | } 93 | } 94 | return 1; 95 | } 96 | 97 | bool stopRule(SparseVector& cur, SparseVector& prev, const double& tolerance) 98 | { 99 | 100 | /* 101 | for (unsigned i = 0; i < cur.size(); i++) { 102 | if ( (cur.coeff(i) != 0 && prev.coeff(i) == 0) || (cur.coeff(i) == 0 && prev.coeff(i) != 0) ) { 103 | return 0; 104 | } 105 | if (cur.coeff(i) != 0 && prev.coeff(i) != 0 && 106 | std::abs( (cur.coeff(i) - prev.coeff(i)) / prev.coeff(i)) > tolerance) { 107 | return 0; 108 | } 109 | } 110 | */ 111 | 112 | const int n1 = cur.nonZeros(), n2 = prev.nonZeros(); 113 | const double *v1_val = cur.valuePtr(), *v2_val = prev.valuePtr(); 114 | const int *v1_ind = cur.innerIndexPtr(), *v2_ind = prev.innerIndexPtr(); 115 | 116 | 117 | double eps = 1e-4; 118 | 119 | int i1 = 0, i2 = 0; 120 | while(i1 < n1 && i2 < n2) 121 | { 122 | if(v1_ind[i1] == v2_ind[i2]) 123 | { 124 | if (std::abs( (v1_val[i1] - v2_val[i2]) / (std::abs(v2_val[i2]) + eps)) > tolerance) 125 | { 126 | return 0; 127 | } else 128 | { 129 | i1++; 130 | i2++; 131 | } 132 | } else 133 | { 134 | return 0; 135 | } 136 | } 137 | return 1; 138 | } 139 | 140 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | #include 6 | 7 | using namespace Rcpp; 8 | 9 | // coord_ordinis_dense_cpp 10 | List coord_ordinis_dense_cpp(Rcpp::NumericMatrix x, Rcpp::NumericVector y, Rcpp::NumericVector weights, Rcpp::NumericVector lambda, Rcpp::NumericVector penalty_factor, Rcpp::NumericMatrix limits, int nlambda, double lmin_ratio, bool standardize, bool intercept, List opts); 11 | RcppExport SEXP _ordinis_coord_ordinis_dense_cpp(SEXP xSEXP, SEXP ySEXP, SEXP weightsSEXP, SEXP lambdaSEXP, SEXP penalty_factorSEXP, SEXP limitsSEXP, SEXP nlambdaSEXP, SEXP lmin_ratioSEXP, SEXP standardizeSEXP, SEXP interceptSEXP, SEXP optsSEXP) { 12 | BEGIN_RCPP 13 | Rcpp::RObject rcpp_result_gen; 14 | Rcpp::RNGScope rcpp_rngScope_gen; 15 | Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type x(xSEXP); 16 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type y(ySEXP); 17 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type weights(weightsSEXP); 18 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type lambda(lambdaSEXP); 19 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type penalty_factor(penalty_factorSEXP); 20 | Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type limits(limitsSEXP); 21 | Rcpp::traits::input_parameter< int >::type nlambda(nlambdaSEXP); 22 | Rcpp::traits::input_parameter< double >::type lmin_ratio(lmin_ratioSEXP); 23 | Rcpp::traits::input_parameter< bool >::type standardize(standardizeSEXP); 24 | Rcpp::traits::input_parameter< bool >::type intercept(interceptSEXP); 25 | Rcpp::traits::input_parameter< List >::type opts(optsSEXP); 26 | rcpp_result_gen = Rcpp::wrap(coord_ordinis_dense_cpp(x, y, weights, lambda, penalty_factor, limits, nlambda, lmin_ratio, standardize, intercept, opts)); 27 | return rcpp_result_gen; 28 | END_RCPP 29 | } 30 | // coord_ordinis_dense_glm_cpp 31 | List coord_ordinis_dense_glm_cpp(Rcpp::NumericMatrix x, Rcpp::NumericVector y, Rcpp::NumericVector weights, Rcpp::NumericVector offset, Rcpp::NumericVector lambda, Rcpp::NumericVector penalty_factor, Rcpp::NumericMatrix limits, int nlambda, double lmin_ratio, bool standardize, bool intercept, bool glm_fam, List opts); 32 | RcppExport SEXP _ordinis_coord_ordinis_dense_glm_cpp(SEXP xSEXP, SEXP ySEXP, SEXP weightsSEXP, SEXP offsetSEXP, SEXP lambdaSEXP, SEXP penalty_factorSEXP, SEXP limitsSEXP, SEXP nlambdaSEXP, SEXP lmin_ratioSEXP, SEXP standardizeSEXP, SEXP interceptSEXP, SEXP glm_famSEXP, SEXP optsSEXP) { 33 | BEGIN_RCPP 34 | Rcpp::RObject rcpp_result_gen; 35 | Rcpp::RNGScope rcpp_rngScope_gen; 36 | Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type x(xSEXP); 37 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type y(ySEXP); 38 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type weights(weightsSEXP); 39 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type offset(offsetSEXP); 40 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type lambda(lambdaSEXP); 41 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type penalty_factor(penalty_factorSEXP); 42 | Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type limits(limitsSEXP); 43 | Rcpp::traits::input_parameter< int >::type nlambda(nlambdaSEXP); 44 | Rcpp::traits::input_parameter< double >::type lmin_ratio(lmin_ratioSEXP); 45 | Rcpp::traits::input_parameter< bool >::type standardize(standardizeSEXP); 46 | Rcpp::traits::input_parameter< bool >::type intercept(interceptSEXP); 47 | Rcpp::traits::input_parameter< bool >::type glm_fam(glm_famSEXP); 48 | Rcpp::traits::input_parameter< List >::type opts(optsSEXP); 49 | rcpp_result_gen = Rcpp::wrap(coord_ordinis_dense_glm_cpp(x, y, weights, offset, lambda, penalty_factor, limits, nlambda, lmin_ratio, standardize, intercept, glm_fam, opts)); 50 | return rcpp_result_gen; 51 | END_RCPP 52 | } 53 | 54 | static const R_CallMethodDef CallEntries[] = { 55 | {"_ordinis_coord_ordinis_dense_cpp", (DL_FUNC) &_ordinis_coord_ordinis_dense_cpp, 11}, 56 | {"_ordinis_coord_ordinis_dense_glm_cpp", (DL_FUNC) &_ordinis_coord_ordinis_dense_glm_cpp, 13}, 57 | {NULL, NULL, 0} 58 | }; 59 | 60 | RcppExport void R_init_ordinis(DllInfo *dll) { 61 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 62 | R_useDynamicSymbols(dll, FALSE); 63 | } 64 | -------------------------------------------------------------------------------- /man/ordinis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ordinis_call.R 3 | \name{ordinis} 4 | \alias{ordinis} 5 | \title{Fitting Lasso-penalized Using the Coordinate Descent Algorithm} 6 | \usage{ 7 | ordinis(x, y, weights = rep(1, NROW(y)), offset = NULL, 8 | family = NULL, penalty = c("lasso", "alasso", "mcp", "scad"), 9 | lambda = numeric(0), alpha = 1, gamma = ifelse(penalty == "scad", 10 | 3.7, 1.4), penalty.factor = NULL, upper.limits = rep(Inf, NCOL(x)), 11 | lower.limits = rep(-Inf, NCOL(x)), nlambda = 100L, 12 | lambda.min.ratio = NULL, intercept = TRUE, standardize = TRUE, 13 | dfmax = nvars, maxit = NULL, tol = NULL, maxit.irls = 25L, 14 | tol.irls = 0.001) 15 | } 16 | \arguments{ 17 | \item{x}{The design matrix} 18 | 19 | \item{y}{The response vector} 20 | 21 | \item{weights}{a vector of weights of length equal to length of \code{y}. \code{weights} are NOT standardized or scaled; the user must 22 | do this if desired} 23 | 24 | \item{offset}{A vector of length \code{nobs} that is included in the linear predictor} 25 | 26 | \item{family}{family of underlying model. Only "gaussian" for continuous responses is available now} 27 | 28 | \item{penalty}{a string indicating which penalty to use. \code{"lasso"}, \code{"MCP"}, and \code{"SCAD"} 29 | are available} 30 | 31 | \item{lambda}{A user provided sequence of \eqn{\lambda}. If set to 32 | \code{NULL}, the program will calculate its own sequence 33 | according to \code{nlambda} and \code{lambda_min_ratio}, 34 | which starts from \eqn{\lambda_0} (with this 35 | \eqn{\lambda} all coefficients will be zero) and ends at 36 | \code{lambda0 * lambda_min_ratio}, containing 37 | \code{nlambda} values equally spaced in the log scale. 38 | It is recommended to set this parameter to be \code{NULL} 39 | (the default).} 40 | 41 | \item{alpha}{mixing parameter between 0 and 1 for elastic net. \code{alpha=1} is for the lasso, \code{alpha=0} is for ridge} 42 | 43 | \item{gamma}{parameter for MCP/SCAD. Defaults to the recommended values from the papers corresponding to each penalty} 44 | 45 | \item{penalty.factor}{a vector with length equal to the number of columns in x to be multiplied by lambda. by default 46 | it is a vector of 1s. \code{penalty.factor} is NOT scaled} 47 | 48 | \item{upper.limits}{a vector of length \code{ncol(x)} of upper limits for each coefficient. Can be a single value, which will 49 | then be applied for each coefficient. Must be non-negative.} 50 | 51 | \item{lower.limits}{a vector of length \code{ncol(x)} of lower limits for each coefficient. Can be a single value, which will 52 | then be applied for each coefficient. Cannot be greater than 0.} 53 | 54 | \item{nlambda}{Number of values in the \eqn{\lambda} sequence. Only used 55 | when the program calculates its own \eqn{\lambda} 56 | (by setting \code{lambda = NULL}).} 57 | 58 | \item{lambda.min.ratio}{Smallest value in the \eqn{\lambda} sequence 59 | as a fraction of \eqn{\lambda_0}. See 60 | the explanation of the \code{lambda} 61 | argument. This parameter is only used when 62 | the program calculates its own \eqn{\lambda} 63 | (by setting \code{lambda = NULL}). The default 64 | value is the same as \pkg{glmnet}: 0.001 if 65 | \code{nrow(x) >= ncol(x)} and 0.05 otherwise.} 66 | 67 | \item{intercept}{Whether to fit an intercept in the model. Default is \code{TRUE}.} 68 | 69 | \item{standardize}{Whether to standardize the design matrix before 70 | fitting the model. Default is \code{TRUE}. Fitted coefficients 71 | are always returned on the original scale.} 72 | 73 | \item{dfmax}{Maximum number of variables allowed in the model} 74 | 75 | \item{maxit}{Maximum number of coordinate descent iterations.} 76 | 77 | \item{tol}{convergence tolerance parameter.} 78 | 79 | \item{maxit.irls}{Maximum number of coordinate descent iterations.} 80 | 81 | \item{tol.irls}{convergence tolerance parameter.} 82 | } 83 | \description{ 84 | ordinis provides estimation of linear models with the lasso penalty 85 | } 86 | \examples{ 87 | set.seed(123) 88 | n = 100 89 | p = 1000 90 | b = c(runif(10, min = 0.1, max = 1), rep(0, p - 10)) 91 | x = matrix(rnorm(n * p, sd = 1.5), n, p) 92 | y = drop(x \%*\% b) + rnorm(n) 93 | 94 | 95 | ## fit lasso model with 100 tuning parameter values 96 | res <- ordinis(x, y) 97 | 98 | y2 <- 1 * (y > 0) 99 | y3 <- exp(y) 100 | 101 | resb <- ordinis(x, y2, family = "binomial") 102 | 103 | } 104 | -------------------------------------------------------------------------------- /src/ordinis_dense.cpp: -------------------------------------------------------------------------------- 1 | #define EIGEN_DONT_PARALLELIZE 2 | 3 | #include "CoordGaussianDense.h" 4 | #include "DataStd.h" 5 | 6 | 7 | using Eigen::MatrixXf; 8 | using Eigen::VectorXf; 9 | using Eigen::MatrixXd; 10 | using Eigen::VectorXd; 11 | using Eigen::ArrayXf; 12 | using Eigen::ArrayXd; 13 | using Eigen::ArrayXXf; 14 | using Eigen::Map; 15 | 16 | using Rcpp::wrap; 17 | using Rcpp::as; 18 | using Rcpp::List; 19 | using Rcpp::Named; 20 | using Rcpp::IntegerVector; 21 | 22 | typedef Map MapVecd; 23 | typedef Map MapMatd; 24 | typedef Eigen::SparseVector SpVec; 25 | typedef Eigen::SparseMatrix SpMat; 26 | 27 | inline void write_beta_matrix(SpMat &betas, int col, double beta0, SpVec &coef) 28 | { 29 | betas.insert(0, col) = beta0; 30 | 31 | for(SpVec::InnerIterator iter(coef); iter; ++iter) 32 | { 33 | betas.insert(iter.index() + 1, col) = iter.value(); 34 | } 35 | } 36 | 37 | List coord_ordinis_dense(Rcpp::NumericMatrix x_, 38 | Rcpp::NumericVector y_, 39 | Rcpp::NumericVector weights_, 40 | Rcpp::NumericVector lambda_, 41 | Rcpp::NumericVector penalty_factor_, 42 | Rcpp::NumericMatrix limits_, 43 | int nlambda_, 44 | double lmin_ratio_, 45 | bool standardize_, 46 | bool intercept_, 47 | List opts_) 48 | { 49 | 50 | const int n = x_.rows(); 51 | const int p = x_.cols(); 52 | 53 | MatrixXd datX(n, p); 54 | VectorXd datY(n); 55 | VectorXd weights(n); 56 | MatrixXd limits(2, p); 57 | 58 | // Copy data and convert type from double to float 59 | std::copy(x_.begin(), x_.end(), datX.data()); 60 | std::copy(y_.begin(), y_.end(), datY.data()); 61 | std::copy(weights_.begin(), weights_.end(), weights.data()); 62 | 63 | std::copy(limits_.begin(), limits_.end(), limits.data()); 64 | 65 | //Map weights(as >(weights_)); 66 | 67 | // In glmnet, we minimize 68 | // 1/(2n) * ||y - X * beta||^2 + lambda * ||beta||_1 69 | // which is equivalent to minimizing 70 | // 1/2 * ||y - X * beta||^2 + n * lambda * ||beta||_1 71 | ArrayXd lambda(as(lambda_)); 72 | int nlambda = lambda.size(); 73 | 74 | ArrayXd penalty_factor(as(penalty_factor_)); 75 | 76 | 77 | List opts(opts_); 78 | const int maxit = as(opts["maxit"]); 79 | const double tol = as(opts["tol"]); 80 | const double alpha = as(opts["alpha"]); 81 | const double gamma = as(opts["gamma"]); 82 | const int dfmax = as(opts["dfmax"]); 83 | const bool standardize = standardize_; 84 | const bool intercept = intercept_; 85 | 86 | std::vector penalty(as< std::vector >(opts["penalty"])); 87 | 88 | DataStd datstd(n, p, standardize, intercept, true); 89 | datstd.standardize(datX, datY, limits); 90 | 91 | 92 | CoordGaussianDense *solver; 93 | solver = new CoordGaussianDense(datX, datY, weights, 94 | penalty_factor, 95 | limits, penalty[0], 96 | intercept, 97 | alpha, tol); 98 | 99 | if (nlambda < 1) 100 | { 101 | double lmax = 0.0; 102 | lmax = solver->get_lambda_zero(); // * datstd.get_scaleY(); 103 | 104 | double lmin = lmin_ratio_ * lmax; 105 | lambda.setLinSpaced(nlambda_, std::log(lmax), std::log(lmin)); 106 | lambda = lambda.exp(); 107 | nlambda = lambda.size(); 108 | } 109 | 110 | 111 | 112 | SpMat beta(p + 1, nlambda); 113 | beta.reserve(Eigen::VectorXi::Constant(nlambda, std::min(n, p))); 114 | 115 | //MatrixXd beta(p + 1, nlambda); 116 | VectorXd lossvec(nlambda); 117 | 118 | 119 | IntegerVector niter(nlambda); 120 | double ilambda = 0.0; 121 | 122 | int last = nlambda; 123 | for(int i = 0; i < nlambda; i++) 124 | { 125 | ilambda = lambda[i]; // * double(n); // / datstd.get_scaleY(); 126 | 127 | if(i == 0) 128 | solver->init(ilambda, gamma); 129 | else 130 | solver->init_warm(ilambda, gamma); 131 | 132 | niter[i] = solver->solve(maxit); 133 | SpVec res = solver->get_beta(); 134 | 135 | int nzero = solver->get_nzero(); 136 | double beta0 = 0.0; 137 | beta0 = solver->get_intercept(); 138 | datstd.recover(beta0, res); 139 | //beta(0,i) = beta0; 140 | //beta.block(1, i, p, 1) = res; 141 | write_beta_matrix(beta, i, beta0, res); 142 | 143 | lossvec(i) = solver->get_loss(); 144 | 145 | if (nzero > dfmax && i > 0) 146 | { 147 | last = i; 148 | break; 149 | } 150 | } 151 | 152 | delete solver; 153 | 154 | beta.makeCompressed(); 155 | 156 | return List::create(Named("beta") = beta, 157 | Named("niter") = niter, 158 | Named("lambda") = lambda, 159 | Named("loss") = lossvec, 160 | Named("last") = last); 161 | } 162 | 163 | // [[Rcpp::export]] 164 | List coord_ordinis_dense_cpp(Rcpp::NumericMatrix x, 165 | Rcpp::NumericVector y, 166 | Rcpp::NumericVector weights, 167 | Rcpp::NumericVector lambda, 168 | Rcpp::NumericVector penalty_factor, 169 | Rcpp::NumericMatrix limits, 170 | int nlambda, 171 | double lmin_ratio, 172 | bool standardize, 173 | bool intercept, 174 | List opts) 175 | { 176 | return coord_ordinis_dense(x, y, weights, 177 | lambda, 178 | penalty_factor, 179 | limits, 180 | nlambda, 181 | lmin_ratio, 182 | standardize, 183 | intercept, 184 | opts); 185 | } 186 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | ```{r setup, include=FALSE} 7 | library(knitr) 8 | opts_chunk$set(message = FALSE) 9 | ``` 10 | 11 | 12 | ## Introduction to `ordinis' 13 | 14 | The 'ordinis' package provides computation for penalized regression problems via coordinate descent. It is mostly for my own experimentation at this stage, however it is fairly efficient and reliable. 15 | 16 | Install using the **devtools** package: 17 | 18 | ```r 19 | devtools::install_github("jaredhuling/ordinis") 20 | ``` 21 | 22 | or by cloning and building 23 | 24 | ## Example 25 | 26 | ```{r, warning=FALSE, message=FALSE, fig.path="vignettes/"} 27 | library(ordinis) 28 | 29 | # compute the full solution path, n > p 30 | set.seed(123) 31 | n <- 500 32 | p <- 50000 33 | m <- 50 34 | b <- matrix(c(runif(m), rep(0, p - m))) 35 | x <- matrix(rnorm(n * p, sd = 3), n, p) 36 | y <- drop(x %*% b) + rnorm(n) 37 | 38 | mod <- ordinis(x, y, 39 | penalty = "mcp", 40 | lower.limits = rep(0, p), # force all coefficients to be positive 41 | penalty.factor = c(0, 0, rep(1, p-2)), # don't penalize first two coefficients 42 | alpha = 0.95) # use elastic net with alpha = 0.95 43 | 44 | plot(mod) 45 | 46 | ## show likelihood 47 | logLik(mod) 48 | 49 | ## compute AIC 50 | AIC(mod) 51 | 52 | ## BIC 53 | BIC(mod) 54 | ``` 55 | 56 | ## Performance 57 | 58 | ### Lasso (linear regression) 59 | 60 | ```{r, warning=FALSE, message=FALSE} 61 | library(microbenchmark) 62 | library(glmnet) 63 | 64 | b <- matrix(c(runif(m, min = -1), rep(0, p - m))) 65 | x <- matrix(rnorm(n * p, sd = 3), n, p) 66 | y <- drop(x %*% b) + rnorm(n) 67 | 68 | lambdas = glmnet(x, y)$lambda 69 | 70 | microbenchmark( 71 | "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-10, # thresh must be very small 72 | lambda = lambdas)}, # for comparable precision 73 | "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 74 | tol = 1e-3)}, 75 | times = 5 76 | ) 77 | 78 | 79 | # difference of results 80 | max(abs(coef(resg) - reso$beta)) 81 | 82 | microbenchmark( 83 | "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-15, # thresh must be very low for comparable precision 84 | lambda = lambdas)}, 85 | "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 86 | tol = 1e-3)}, 87 | times = 5 88 | ) 89 | 90 | # difference of results 91 | max(abs(coef(resg) - reso$beta)) 92 | 93 | 94 | ``` 95 | 96 | ### Lasso (logistic regression) 97 | 98 | `glmnet` is clearly faster for logistic regression for the same precision 99 | 100 | ```{r, warning=FALSE, message=FALSE} 101 | library(MASS) 102 | 103 | set.seed(123) 104 | n <- 200 105 | p <- 10000 106 | m <- 20 107 | b <- matrix(c(runif(m, min = -0.5, max = 0.5), rep(0, p - m))) 108 | x <- matrix(rnorm(n * p, sd = 3), n, p) 109 | y <- 1 *(drop(x %*% b) + rnorm(n) > 0) 110 | 111 | lambdas = glmnet(x, y, family = "binomial", lambda.min.ratio = 0.02)$lambda 112 | 113 | microbenchmark( 114 | "glmnet[lasso]" = {resg <- glmnet(x, y, family = "binomial", 115 | thresh = 1e-10, 116 | lambda = lambdas)}, 117 | "ordinis[lasso]" = {reso <- ordinis(x, y, family = "binomial", 118 | lambda = lambdas, 119 | tol = 1e-3, tol.irls = 1e-3)}, 120 | times = 5 121 | ) 122 | 123 | # difference of results 124 | max(abs(coef(resg) - reso$beta)) 125 | 126 | 127 | microbenchmark( 128 | "glmnet[lasso]" = {resg <- glmnet(x, y, family = "binomial", 129 | thresh = 1e-15, 130 | lambda = lambdas)}, 131 | "ordinis[lasso]" = {reso <- ordinis(x, y, family = "binomial", 132 | lambda = lambdas, 133 | tol = 1e-3, tol.irls = 1e-3)}, 134 | times = 5 135 | ) 136 | 137 | # difference of results 138 | max(abs(coef(resg) - reso$beta)) 139 | 140 | ``` 141 | 142 | ### Lasso (linear regression, ill-conditioned) 143 | 144 | ```{r, warning=FALSE, message=FALSE} 145 | library(MASS) 146 | 147 | set.seed(123) 148 | n <- 500 149 | p <- 1000 150 | m <- 50 151 | b <- matrix(c(runif(m, min = -1), rep(0, p - m))) 152 | sig <- matrix(0.5, ncol=p,nrow=p); diag(sig) <- 1 153 | x <- mvrnorm(n, mu=rep(0, p), Sigma = sig) 154 | y <- drop(x %*% b) + rnorm(n) 155 | 156 | lambdas = glmnet(x, y)$lambda[1:65] 157 | 158 | microbenchmark( 159 | "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-9, # thresh must be very small 160 | lambda = lambdas)}, # for comparable precision 161 | "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 162 | tol = 1e-5)}, 163 | times = 5 164 | ) 165 | 166 | 167 | # difference of results 168 | max(abs(coef(resg) - reso$beta)) 169 | 170 | microbenchmark( 171 | "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-11, # thresh must be very low for comparable precision 172 | lambda = lambdas)}, 173 | "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 174 | tol = 1e-5)}, 175 | times = 5 176 | ) 177 | 178 | # difference of results 179 | max(abs(coef(resg) - reso$beta)) 180 | 181 | ``` 182 | 183 | ### Validity of solutions with various bells and whistles 184 | 185 | Due to internal differences in standardization, we now compare with `glmnet` when using observation weights, penalty scaling factors, and parameter box constraints 186 | 187 | ```{r} 188 | 189 | set.seed(123) 190 | n = 200 191 | p = 1000 192 | m <- 15 193 | b = c(runif(m, min = -0.5, max = 0.5), rep(0, p - m)) 194 | x = (matrix(rnorm(n * p, sd = 3), n, p)) 195 | y = drop(x %*% b) + rnorm(n) 196 | y2 <- 1 * (y > rnorm(n, mean = 0.5, sd = 3)) 197 | 198 | 199 | wts <- runif(nrow(x)) 200 | wts <- wts / mean(wts) # re-scale like glmnet does, so we can compare 201 | 202 | penalty.factor <- rbinom(ncol(x), 1, 0.99) * runif(ncol(x)) * 5 203 | penalty.factor <- (penalty.factor / sum(penalty.factor)) * ncol(x) # re-scale like glmnet does, so we can compare 204 | 205 | system.time(resb <- ordinis(x, y2, family = "binomial", tol = 1e-7, tol.irls = 1e-5, 206 | penalty = "lasso", 207 | alpha = 0.5, #elastic net term 208 | lower.limits = 0, upper.limits = 0.02, # box constraints on all parameters 209 | standardize = FALSE, intercept = TRUE, 210 | weights = wts, # observation weights 211 | penalty.factor = penalty.factor)) # penalty scaling factors 212 | 213 | system.time(resg <- glmnet(x,y2, family = "binomial", 214 | lambda = resb$lambda, 215 | alpha = 0.5, #elastic net term 216 | weights = wts, # observation weights 217 | penalty.factor = penalty.factor, # penalty scaling factors 218 | lower.limits = 0, upper.limits = 0.02, # box constraints on all parameters 219 | standardize = FALSE, intercept = TRUE, 220 | thresh = 1e-16)) 221 | 222 | ## compare solutions 223 | max(abs(resb$beta[-1,] - resg$beta)) 224 | 225 | 226 | # now with no box constraints 227 | system.time(resb <- ordinis(x, y2, family = "binomial", tol = 1e-7, tol.irls = 1e-5, 228 | penalty = "lasso", 229 | alpha = 0.5, #elastic net term 230 | standardize = FALSE, intercept = TRUE, 231 | weights = wts, # observation weights 232 | penalty.factor = penalty.factor)) # penalty scaling factors 233 | 234 | system.time(resg <- glmnet(x,y2, family = "binomial", 235 | lambda = resb$lambda, 236 | alpha = 0.5, #elastic net term 237 | weights = wts, # observation weights 238 | penalty.factor = penalty.factor, # penalty scaling factors 239 | standardize = FALSE, intercept = TRUE, 240 | thresh = 1e-16)) 241 | 242 | ## compare solutions 243 | max(abs(resb$beta[-1,] - resg$beta)) 244 | 245 | ``` 246 | 247 | ### A Note on the Elastic Net and linear models 248 | 249 | Due to how scaling of the response is handled different in glmnet, it yields slightly different solutions than both ordinis and ncvreg for Gaussian models with a ridge penalty term 250 | 251 | ```{r} 252 | library(ncvreg) 253 | 254 | ## I'm setting all methods to have high precision just so solutions are comparable. 255 | ## differences in computation time may be due in part to the arbitrariness of the 256 | ## particular precisions chosen 257 | system.time(resg <- glmnet(x, y, family = "gaussian", alpha = 0.25, 258 | thresh = 1e-15)) 259 | 260 | system.time(res <- ordinis(x, y, family = "gaussian", penalty = "lasso", alpha = 0.25, 261 | tol = 1e-10, lambda = resg$lambda)) 262 | 263 | system.time(resn <- ncvreg(x, y, family="gaussian", penalty = "lasso", 264 | lambda = resg$lambda, alpha = 0.25, max.iter = 100000, 265 | eps = 1e-10)) 266 | 267 | resgg <- res; resgg$beta[-1,] <- resg$beta 268 | 269 | # compare ordinis and glmnet 270 | max(abs(res$beta[-1,] - resg$beta)) 271 | 272 | # compare ordinis and ncvreg 273 | max(abs(res$beta - resn$beta)) 274 | 275 | # compare ncvreg and glmnet 276 | max(abs(resn$beta[-1,] - resg$beta)) 277 | ``` 278 | 279 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | 2 | ## Taken from Jerome Friedman, Trevor Hastie, Noah Simon, and Rob Tibshirani's package glmnet 3 | ## https://cran.r-project.org/web/packages/glmnet/index.html 4 | lambda.interp=function(lambda,s){ 5 | ### lambda is the index sequence that is produced by the model 6 | ### s is the new vector at which evaluations are required. 7 | ### the value is a vector of left and right indices, and a vector of fractions. 8 | ### the new values are interpolated bewteen the two using the fraction 9 | ### Note: lambda decreases. you take: 10 | ### sfrac*left+(1-sfrac*right) 11 | 12 | if(length(lambda)==1){# degenerate case of only one lambda 13 | nums=length(s) 14 | left=rep(1,nums) 15 | right=left 16 | sfrac=rep(1,nums) 17 | } 18 | else{ 19 | s[s > max(lambda)] = max(lambda) 20 | s[s < min(lambda)] = min(lambda) 21 | k=length(lambda) 22 | sfrac <- (lambda[1]-s)/(lambda[1] - lambda[k]) 23 | lambda <- (lambda[1] - lambda)/(lambda[1] - lambda[k]) 24 | coord <- approx(lambda, seq(lambda), sfrac)$y 25 | left <- floor(coord) 26 | right <- ceiling(coord) 27 | sfrac=(sfrac-lambda[right])/(lambda[left] - lambda[right]) 28 | sfrac[left==right]=1 29 | } 30 | list(left=left,right=right,frac=sfrac) 31 | } 32 | 33 | ## Taken from Jerome Friedman, Trevor Hastie, Noah Simon, and Rob Tibshirani's package glmnet 34 | ## https://cran.r-project.org/web/packages/glmnet/index.html 35 | nonzeroCoef = function (beta, bystep = FALSE) 36 | { 37 | ### bystep = FALSE means which variables were ever nonzero 38 | ### bystep = TRUE means which variables are nonzero for each step 39 | nr=nrow(beta) 40 | if (nr == 1) {#degenerate case 41 | if (bystep) 42 | apply(beta, 2, function(x) if (abs(x) > 0) 43 | 1 44 | else NULL) 45 | else { 46 | if (any(abs(beta) > 0)) 47 | 1 48 | else NULL 49 | } 50 | } 51 | else { 52 | beta=abs(beta)>0 # this is sparse 53 | which=seq(nr) 54 | ones=rep(1,ncol(beta)) 55 | nz=as.vector((beta%*%ones)>0) 56 | which=which[nz] 57 | if (bystep) { 58 | if(length(which)>0){ 59 | beta=as.matrix(beta[which,,drop=FALSE]) 60 | nzel = function(x, which) if (any(x)) 61 | which[x] 62 | else NULL 63 | which=apply(beta, 2, nzel, which) 64 | if(!is.list(which))which=data.frame(which)# apply can return a matrix!! 65 | which 66 | } 67 | else{ 68 | dn=dimnames(beta)[[2]] 69 | which=vector("list",length(dn)) 70 | names(which)=dn 71 | which 72 | } 73 | 74 | } 75 | else which 76 | } 77 | } 78 | 79 | ## Taken from Rahul Mazumder, Trevor Hastie, and Jerome Friedman's sparsenet package 80 | ## https://cran.r-project.org/web/packages/sparsenet/index.html 81 | argmin=function(x){ 82 | vx=as.vector(x) 83 | imax=order(vx)[1] 84 | if(!is.matrix(x))imax 85 | else{ 86 | d=dim(x) 87 | c1=as.vector(outer(seq(d[1]),rep(1,d[2])))[imax] 88 | c2=as.vector(outer(rep(1,d[1]),seq(d[2])))[imax] 89 | c(c1,c2) 90 | 91 | } 92 | } 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | # taken from glmnet 101 | cvcompute=function(mat,weights,foldid,nlams){ 102 | ###Computes the weighted mean and SD within folds, and hence the se of the mean 103 | wisum=tapply(weights,foldid,sum) 104 | nfolds=max(foldid) 105 | outmat=matrix(NA,nfolds,ncol(mat)) 106 | good=matrix(0,nfolds,ncol(mat)) 107 | mat[is.infinite(mat)]=NA#just in case some infinities crept in 108 | for(i in seq(nfolds)){ 109 | mati=mat[foldid==i,,drop=FALSE] 110 | wi=weights[foldid==i] 111 | outmat[i,]=apply(mati,2,weighted.mean,w=wi,na.rm=TRUE) 112 | good[i,seq(nlams[i])]=1 113 | } 114 | N=apply(good,2,sum) 115 | list(cvraw=outmat,weights=wisum,N=N) 116 | } 117 | 118 | # taken from glmnet 119 | getmin=function(lambda,cvm,cvsd){ 120 | cvmin=min(cvm,na.rm=TRUE) 121 | idmin=cvm<=cvmin 122 | lambda.min=max(lambda[idmin],na.rm=TRUE) 123 | idmin=match(lambda.min,lambda) 124 | semin=(cvm+cvsd)[idmin] 125 | idmin=cvm<=semin 126 | lambda.1se=max(lambda[idmin],na.rm=TRUE) 127 | list(lambda.min=lambda.min,lambda.1se=lambda.1se) 128 | } 129 | 130 | 131 | 132 | # taken from glmnet 133 | auc=function(y,prob,w){ 134 | if(missing(w)){ 135 | rprob=rank(prob) 136 | n1=sum(y);n0=length(y)-n1 137 | u=sum(rprob[y==1])-n1*(n1+1)/2 138 | exp(log(u) - log(n1) - log(n0)) 139 | } 140 | else{ 141 | rprob=runif(length(prob)) 142 | op=order(prob,rprob)#randomize ties 143 | y=y[op] 144 | w=w[op] 145 | cw=cumsum(w) 146 | w1=w[y==1] 147 | cw1=cumsum(w1) 148 | wauc = log(sum(w1*(cw[y==1]-cw1))) 149 | sumw1 = cw1[length(cw1)] 150 | sumw2 = cw[length(cw)] - sumw1 151 | exp(wauc - log(sumw1) - log(sumw2)) 152 | } 153 | } 154 | 155 | # taken from glmnet 156 | auc.mat=function(y,prob,weights=rep(1,nrow(y))){ 157 | Weights=as.vector(weights*y) 158 | ny=nrow(y) 159 | Y=rep(c(0,1),c(ny,ny)) 160 | Prob=c(prob,prob) 161 | auc(Y,Prob,Weights) 162 | } 163 | 164 | # taken from glmnet 165 | error.bars <- function(x, upper, lower, width = 0.02, ...) 166 | { 167 | xlim <- range(x) 168 | barw <- diff(xlim) * width 169 | segments(x, upper, x, lower, col = 8, lty = 5, lwd = 0.5, ...) 170 | segments(x - barw, upper, x + barw, upper, col = "grey50", lwd = 1, ...) 171 | segments(x - barw, lower, x + barw, lower, col = "grey50", lwd = 1, ...) 172 | range(upper, lower) 173 | } 174 | 175 | objective_logistic <- function(beta, x, y, lambda, penalty.factor = rep(1, ncol(x)), intercept = FALSE, alpha = 1) 176 | { 177 | if (intercept) 178 | { 179 | beta0 <- beta[1] 180 | beta <- beta[-1] 181 | 182 | xbeta <- drop(x %*% beta) + beta0 183 | } else 184 | { 185 | xbeta <- drop(x %*% beta) 186 | } 187 | neglogLik <- (-sum( y * xbeta ) + sum( log1p(exp(xbeta)) )) / nrow(x) 188 | 189 | neglogLik + sum(abs(beta) * lambda * penalty.factor * alpha) + 0.5 * sum((beta) ^ 2 * lambda * penalty.factor * (1 - alpha)) 190 | } 191 | 192 | objective_logistic <- function(beta, x, y, lambda, penalty.factor = rep(1, ncol(x)), intercept = FALSE, alpha = 1, 193 | penalty = c("lasso", "mcp", "scad"), gamma = 3.7) 194 | { 195 | penalty <- match.arg(penalty) 196 | if (intercept) 197 | { 198 | beta0 <- beta[1] 199 | beta <- beta[-1] 200 | 201 | xbeta <- drop(x %*% beta) + beta0 202 | } else 203 | { 204 | xbeta <- drop(x %*% beta) 205 | } 206 | neglogLik <- (-sum( y * xbeta ) + sum( log1p(exp(xbeta)) )) / nrow(x) 207 | 208 | if (penalty == "lasso") 209 | { 210 | penalty.part <- sum(abs(beta) * lambda * penalty.factor * alpha) 211 | } else if (penalty == "mcp") 212 | { 213 | penalty.part <- 0 214 | for (j in 1:length(beta)) 215 | { 216 | pen.cur <- penalty.factor[j] * lambda * alpha 217 | b <- beta[j] 218 | if (abs(b) <= gamma * pen.cur) 219 | { 220 | penalty.part <- penalty.part + pen.cur * abs(b) - b ^ 2 / (2 * gamma) 221 | } else 222 | { 223 | penalty.part <- penalty.part + 0.5 * (gamma) * pen.cur ^ 2 224 | } 225 | } 226 | } else 227 | { 228 | penalty.part <- 0 229 | for (j in 1:length(beta)) 230 | { 231 | pen.cur <- penalty.factor[j] * lambda * alpha 232 | b <- beta[j] 233 | if (abs(b) <= pen.cur) 234 | { 235 | penalty.part <- penalty.part + pen.cur * abs(b) 236 | } else if (abs(b) <= gamma * pen.cur) 237 | { 238 | penalty.part <- penalty.part - (abs(b) ^ 2 - 2 * gamma * pen.cur * abs(b) + pen.cur ^ 2) / (2 * (gamma - 1)) 239 | } else 240 | { 241 | penalty.part <- penalty.part + 0.5 * (gamma + 1) * pen.cur ^ 2 242 | } 243 | } 244 | } 245 | 246 | neglogLik + penalty.part + 0.5 * sum((beta) ^ 2 * lambda * penalty.factor * (1 - alpha)) 247 | } 248 | 249 | objective_linear <- function(beta, x, y, lambda, penalty.factor = rep(1, ncol(x)), intercept = FALSE, alpha = 1, 250 | penalty = c("lasso", "mcp", "scad"), gamma = 3.7) 251 | { 252 | penalty <- match.arg(penalty) 253 | if (intercept) 254 | { 255 | beta0 <- beta[1] 256 | beta <- beta[-1] 257 | 258 | xbeta <- drop(x %*% beta) + beta0 259 | } else 260 | { 261 | xbeta <- drop(x %*% beta) 262 | } 263 | sumsq <- 0.5 * sum( (y - xbeta) ^ 2 ) / nrow(x) 264 | 265 | if (penalty == "lasso") 266 | { 267 | penalty.part <- sum(abs(beta) * lambda * penalty.factor * alpha) 268 | } else if (penalty == "mcp") 269 | { 270 | penalty.part <- 0 271 | for (j in 1:length(beta)) 272 | { 273 | pen.cur <- penalty.factor[j] * lambda * alpha 274 | b <- beta[j] 275 | if (abs(b) <= gamma * pen.cur) 276 | { 277 | penalty.part <- penalty.part + pen.cur * abs(b) - b ^ 2 / (2 * gamma) 278 | } else 279 | { 280 | penalty.part <- penalty.part + 0.5 * (gamma) * pen.cur ^ 2 281 | } 282 | } 283 | } else 284 | { 285 | penalty.part <- 0 286 | for (j in 1:length(beta)) 287 | { 288 | pen.cur <- penalty.factor[j] * lambda * alpha 289 | b <- beta[j] 290 | if (abs(b) <= pen.cur) 291 | { 292 | penalty.part <- penalty.part + pen.cur * abs(b) 293 | } else if (abs(b) <= gamma * pen.cur) 294 | { 295 | penalty.part <- penalty.part - (abs(b) ^ 2 - 2 * gamma * pen.cur * abs(b) + pen.cur ^ 2) / (2 * (gamma - 1)) 296 | } else 297 | { 298 | penalty.part <- penalty.part + 0.5 * (gamma + 1) * pen.cur ^ 2 299 | } 300 | } 301 | } 302 | 303 | sumsq + penalty.part + 0.5 * sum((beta) ^ 2 * lambda * penalty.factor * (1 - alpha)) 304 | } 305 | -------------------------------------------------------------------------------- /src/DataStd.h: -------------------------------------------------------------------------------- 1 | #ifndef DATASTD_H 2 | #define DATASTD_H 3 | 4 | #include 5 | #include 6 | 7 | using Eigen::MatrixXd; 8 | 9 | template 10 | class DataStd 11 | { 12 | private: 13 | typedef Eigen::SparseVector SparseVector; 14 | typedef Eigen::Matrix Matrix; 15 | typedef Eigen::Matrix Vector; 16 | typedef Eigen::Array Array; 17 | typedef const Eigen::Ref ConstGenericVector; 18 | typedef Eigen::Ref ArrayRef; 19 | 20 | 21 | // flag - 0: standardize = FALSE, intercept = FALSE 22 | // directly fit model 23 | // flag - 1: standardize = TRUE, intercept = FALSE 24 | // scale x and y by their standard deviation 25 | // flag - 2: standardize = FALSE, intercept = TRUE 26 | // center x, standardize y 27 | // flag - 3: standardize = TRUE, intercept = TRUE 28 | // standardize x and y 29 | const int flag; 30 | 31 | const int n; 32 | const int p; 33 | 34 | double meanY; 35 | double scaleY; 36 | bool glm; 37 | Array meanX; 38 | Array scaleX; 39 | 40 | static double sd_n(ConstGenericVector &v) 41 | { 42 | double mean = v.mean(); 43 | Vector v_centered = v.array() - mean; 44 | 45 | return v_centered.norm() / std::sqrt(double(v.size())); 46 | } 47 | 48 | // spvec -> spvec / arr, elementwise 49 | static void elementwise_quot(SparseVector &spvec, Array &arr) 50 | { 51 | for(typename SparseVector::InnerIterator iter(spvec); iter; ++iter) 52 | { 53 | iter.valueRef() /= arr[iter.index()]; 54 | } 55 | } 56 | 57 | // inner product of spvec and arr 58 | static double sparse_inner_product(SparseVector &spvec, Array &arr) 59 | { 60 | double res = 0.0; 61 | for(typename SparseVector::InnerIterator iter(spvec); iter; ++iter) 62 | { 63 | res += iter.value() * arr[iter.index()]; 64 | } 65 | return res; 66 | } 67 | 68 | public: 69 | DataStd(int n_, int p_, bool standardize, bool intercept, bool glm_ = false) : 70 | flag(int(standardize) + 2 * int(intercept)), 71 | n(n_), 72 | p(p_), 73 | meanY(0.0), 74 | scaleY(1.0), 75 | glm(glm_) 76 | { 77 | if(flag == 3 || flag == 2) 78 | meanX.resize(p); 79 | if(flag == 3 || flag == 1) 80 | scaleX.resize(p); 81 | } 82 | 83 | void standardize(MatrixXd &X, Vector &Y, MatrixXd &limits) 84 | { 85 | double n_invsqrt = 1.0 / std::sqrt(Double(n)); 86 | 87 | // standardize Y 88 | switch(flag) 89 | { 90 | case 1: 91 | if (!glm) 92 | { 93 | scaleY = sd_n(Y); 94 | Y.array() /= scaleY; 95 | } 96 | break; 97 | case 2: 98 | case 3: 99 | if (!glm) 100 | { 101 | meanY = Y.mean(); 102 | Y.array() -= meanY; 103 | scaleY = Y.norm() * n_invsqrt; 104 | Y.array() /= scaleY; 105 | } 106 | break; 107 | default: 108 | break; 109 | } 110 | 111 | // standardize X 112 | switch(flag) 113 | { 114 | case 1: 115 | for(int i = 0; i < p; i++) 116 | { 117 | //X.col(i).array() *= wts.array(); 118 | scaleX[i] = sd_n(X.col(i)); 119 | X.col(i).array() *= (1.0 / scaleX[i]); 120 | limits.col(i).array() *= (scaleX[i]); 121 | } 122 | break; 123 | case 2: 124 | for(int i = 0; i < p; i++) 125 | { 126 | //X.col(i).array() *= wts.array(); 127 | meanX[i] = X.col(i).mean(); 128 | X.col(i).array() -= meanX[i]; 129 | //limits.col(i).array() -= meanX[i]; 130 | } 131 | break; 132 | case 3: 133 | for(int i = 0; i < p; i++) 134 | { 135 | /*meanX[i] = X.col(i).mean(); 136 | X.col(i).array() -= meanX[i]; 137 | scaleX[i] = X.col(i).norm() * n_invsqrt; 138 | X.col(i).array() /= scaleX[i];*/ 139 | double *begin = &X(0, i); 140 | double *end = begin + n; 141 | //X.col(i).array() *= wts.array(); 142 | meanX[i] = X.col(i).mean(); 143 | std::transform(begin, end, begin, std::bind2nd(std::minus(), meanX[i])); 144 | scaleX[i] = X.col(i).norm() * n_invsqrt; 145 | std::transform(begin, end, begin, std::bind2nd(std::multiplies(), 1.0 / scaleX[i])); 146 | 147 | limits.col(i).array() *= scaleX[i]; 148 | //double *beginll = &limits(0, i); 149 | //double *endll = begin + 2; 150 | //X.col(i).array() *= wts.array(); 151 | //std::transform(begin, endll, beginll, std::bind2nd(std::plus(), meanX[i])); 152 | //std::transform(begin, endll, beginll, std::bind2nd(std::multiplies(), scaleX[i])); 153 | } 154 | break; 155 | default: 156 | break; 157 | } 158 | } 159 | 160 | void standardize(MatrixXd &X, Vector &Y, MatrixXd &limits, Vector &wts) 161 | { 162 | double n_invsqrt = 1.0 / std::sqrt(Double(n)); 163 | 164 | Vector sqrt_wts = wts.array().sqrt(); 165 | 166 | if (!glm) 167 | { 168 | Y.array() *= sqrt_wts.array(); 169 | } 170 | 171 | 172 | 173 | // standardize Y 174 | switch(flag) 175 | { 176 | case 1: 177 | if (!glm) 178 | { 179 | scaleY = sd_n(Y); 180 | Y.array() /= scaleY; 181 | } 182 | break; 183 | case 2: 184 | case 3: 185 | if (!glm) 186 | { 187 | meanY = Y.mean(); 188 | Y.array() -= meanY; 189 | scaleY = Y.norm() * n_invsqrt; 190 | Y.array() /= scaleY; 191 | } 192 | break; 193 | default: 194 | break; 195 | } 196 | 197 | // standardize X 198 | switch(flag) 199 | { 200 | case 1: 201 | for(int i = 0; i < p; i++) 202 | { 203 | X.col(i).array() *= sqrt_wts.array(); 204 | scaleX[i] = sd_n(X.col(i)); 205 | X.col(i).array() *= (1.0 / scaleX[i]); 206 | } 207 | break; 208 | case 2: 209 | for(int i = 0; i < p; i++) 210 | { 211 | X.col(i).array() *= sqrt_wts.array(); 212 | meanX[i] = X.col(i).mean(); 213 | X.col(i).array() -= meanX[i]; 214 | } 215 | break; 216 | case 3: 217 | for(int i = 0; i < p; i++) 218 | { 219 | /*meanX[i] = X.col(i).mean(); 220 | X.col(i).array() -= meanX[i]; 221 | scaleX[i] = X.col(i).norm() * n_invsqrt; 222 | X.col(i).array() /= scaleX[i];*/ 223 | double *begin = &X(0, i); 224 | double *end = begin + n; 225 | X.col(i).array() *= sqrt_wts.array(); 226 | meanX[i] = X.col(i).mean(); 227 | std::transform(begin, end, begin, std::bind2nd(std::minus(), meanX[i])); 228 | scaleX[i] = X.col(i).norm() * n_invsqrt; 229 | std::transform(begin, end, begin, std::bind2nd(std::multiplies(), 1.0 / scaleX[i])); 230 | } 231 | break; 232 | default: 233 | break; 234 | } 235 | } 236 | 237 | void recover(double &beta0, ArrayRef coef) 238 | { 239 | switch(flag) 240 | { 241 | case 0: 242 | beta0 = 0.0; 243 | break; 244 | case 1: 245 | coef /= scaleX; 246 | if (!glm) 247 | { 248 | beta0 = 0.0; 249 | coef *= scaleY; 250 | } 251 | break; 252 | case 2: 253 | if (!glm) 254 | { 255 | coef *= scaleY; 256 | beta0 = meanY - (coef * meanX).sum(); 257 | } else 258 | { 259 | beta0 -= (coef * meanX).sum(); 260 | } 261 | break; 262 | case 3: 263 | coef /= scaleX; 264 | if (!glm) 265 | { 266 | coef *= scaleY; 267 | beta0 = meanY - (coef * meanX).sum(); 268 | } else 269 | { 270 | beta0 -= (coef * meanX).sum(); 271 | } 272 | break; 273 | default: 274 | break; 275 | } 276 | } 277 | 278 | void recover(double &beta0, SparseVector &coef) 279 | { 280 | switch(flag) 281 | { 282 | case 0: 283 | beta0 = 0.0; 284 | break; 285 | case 1: 286 | elementwise_quot(coef, scaleX); 287 | if (!glm) 288 | { 289 | coef *= scaleY; 290 | beta0 = 0.0; 291 | } 292 | break; 293 | case 2: 294 | if (!glm) 295 | { 296 | coef *= scaleY; 297 | beta0 = meanY - sparse_inner_product(coef, meanX); 298 | } else 299 | { 300 | beta0 -= sparse_inner_product(coef, meanX); 301 | } 302 | break; 303 | case 3: 304 | elementwise_quot(coef, scaleX); 305 | if (!glm) 306 | { 307 | coef *= scaleY; 308 | beta0 = meanY - sparse_inner_product(coef, meanX); 309 | } else 310 | { 311 | beta0 -= sparse_inner_product(coef, meanX); 312 | } 313 | break; 314 | default: 315 | break; 316 | } 317 | } 318 | 319 | double get_scaleY() { return scaleY; } 320 | }; 321 | 322 | 323 | 324 | #endif // DATASTD_H 325 | -------------------------------------------------------------------------------- /src/ordinis_dense_glm.cpp: -------------------------------------------------------------------------------- 1 | #define EIGEN_DONT_PARALLELIZE 2 | 3 | #include "CoordLogisticDense.h" 4 | #include "CoordGLMDense.h" 5 | #include "DataStd.h" 6 | 7 | 8 | using Eigen::MatrixXf; 9 | using Eigen::VectorXf; 10 | using Eigen::MatrixXd; 11 | using Eigen::VectorXd; 12 | using Eigen::ArrayXf; 13 | using Eigen::ArrayXd; 14 | using Eigen::ArrayXXf; 15 | using Eigen::Map; 16 | 17 | using Rcpp::wrap; 18 | using Rcpp::as; 19 | using Rcpp::List; 20 | using Rcpp::Named; 21 | using Rcpp::IntegerVector; 22 | 23 | typedef Map MapVecd; 24 | typedef Map MapMatd; 25 | typedef Eigen::SparseVector SpVec; 26 | typedef Eigen::SparseMatrix SpMat; 27 | 28 | inline void write_beta_matrix(SpMat &betas, int col, double beta0, SpVec &coef) 29 | { 30 | betas.insert(0, col) = beta0; 31 | 32 | for(SpVec::InnerIterator iter(coef); iter; ++iter) 33 | { 34 | betas.insert(iter.index() + 1, col) = iter.value(); 35 | } 36 | } 37 | 38 | List coord_ordinis_dense_glm(Rcpp::NumericMatrix x_, 39 | Rcpp::NumericVector y_, 40 | Rcpp::NumericVector weights_, 41 | Rcpp::NumericVector offset_, 42 | Rcpp::NumericVector lambda_, 43 | Rcpp::NumericVector penalty_factor_, 44 | Rcpp::NumericMatrix limits_, 45 | int nlambda_, 46 | double lmin_ratio_, 47 | bool standardize_, 48 | bool intercept_, 49 | bool glm_fam_, 50 | List opts_) 51 | { 52 | 53 | const int n = x_.rows(); 54 | const int p = x_.cols(); 55 | 56 | MatrixXd datX(n, p); 57 | VectorXd datY(n); 58 | VectorXd weights(n); 59 | VectorXd offset(n); 60 | MatrixXd limits(2, p); 61 | 62 | // Copy data and convert type from double to float 63 | std::copy(x_.begin(), x_.end(), datX.data()); 64 | std::copy(y_.begin(), y_.end(), datY.data()); 65 | std::copy(weights_.begin(), weights_.end(), weights.data()); 66 | std::copy(offset_.begin(), offset_.end(), offset.data()); 67 | 68 | std::copy(limits_.begin(), limits_.end(), limits.data()); 69 | 70 | //Map weights(as >(weights_)); 71 | 72 | // In glmnet, we minimize 73 | // 1/(2n) * ||y - X * beta||^2 + lambda * ||beta||_1 74 | // which is equivalent to minimizing 75 | // 1/2 * ||y - X * beta||^2 + n * lambda * ||beta||_1 76 | ArrayXd lambda(as(lambda_)); 77 | int nlambda = lambda.size(); 78 | 79 | ArrayXd penalty_factor(as(penalty_factor_)); 80 | 81 | 82 | List opts(opts_); 83 | const int maxit = as(opts["maxit"]); 84 | const double tol = as(opts["tol"]); 85 | const double alpha = as(opts["alpha"]); 86 | const double gamma = as(opts["gamma"]); 87 | const int maxit_irls = as(opts["maxit.irls"]); 88 | const double tol_irls = as(opts["tol.irls"]); 89 | const int dfmax = as(opts["dfmax"]); 90 | const bool standardize = standardize_; 91 | const bool intercept = intercept_; 92 | const bool glm_fam = glm_fam_; 93 | 94 | std::vector penalty(as< std::vector >(opts["penalty"])); 95 | 96 | DataStd datstd(n, p, standardize, intercept, true); 97 | datstd.standardize(datX, datY, limits); 98 | 99 | 100 | CoordGLMDense *solver; 101 | CoordLogisticDense *solver_bin; 102 | 103 | //CoordBase > *solver = NULL; // obj doesn't point to anything yet 104 | 105 | if (glm_fam) 106 | { 107 | Rcpp::Function var = as(opts["variance"]); 108 | Rcpp::Function mu_eta = as(opts["mu_eta"]); 109 | Rcpp::Function linkinv = as(opts["linkinv"]); 110 | Rcpp::Function linkfun = as(opts["linkfun"]); 111 | Rcpp::Function dev_resids = as(opts["dev_resids"]); 112 | Rcpp::Function valideta = as(opts["valideta"]); 113 | Rcpp::Function validmu = as(opts["validmu"]); 114 | 115 | solver = new CoordGLMDense(datX, datY, 116 | weights, offset, 117 | penalty_factor, 118 | limits, penalty[0], 119 | var, mu_eta, linkinv, linkfun, dev_resids, 120 | valideta, validmu, 121 | intercept, alpha, 122 | tol, maxit_irls, tol_irls); 123 | 124 | } else 125 | { 126 | solver_bin = new CoordLogisticDense(datX, datY, 127 | weights, offset, 128 | penalty_factor, 129 | limits, penalty[0], 130 | intercept, alpha, 131 | tol, maxit_irls, tol_irls); 132 | } 133 | 134 | 135 | 136 | if (nlambda < 1) 137 | { 138 | double lmax = 0.0; 139 | if (glm_fam) 140 | { 141 | lmax = solver->get_lambda_zero(); 142 | } else 143 | { 144 | lmax = solver_bin->get_lambda_zero(); 145 | } 146 | 147 | double lmin = lmin_ratio_ * lmax; 148 | lambda.setLinSpaced(nlambda_, std::log(lmax), std::log(lmin)); 149 | lambda = lambda.exp(); 150 | nlambda = lambda.size(); 151 | } 152 | 153 | 154 | 155 | 156 | SpMat beta(p + 1, nlambda); 157 | beta.reserve(Eigen::VectorXi::Constant(nlambda, std::min(n, p))); 158 | 159 | //MatrixXd beta(p + 1, nlambda); 160 | //VectorXd lossvec(nlambda); 161 | 162 | IntegerVector niter(nlambda); 163 | double ilambda = 0.0; 164 | 165 | double null_dev = 0.0; 166 | VectorXd deviance(nlambda); 167 | deviance.setZero(); 168 | 169 | 170 | int last = nlambda; 171 | for(int i = 0; i < nlambda; i++) 172 | { 173 | 174 | ilambda = lambda[i]; 175 | 176 | if (glm_fam) 177 | { 178 | if(i == 0) 179 | solver->init(ilambda, gamma); 180 | else 181 | solver->init_warm(ilambda, gamma); 182 | 183 | niter[i] = solver->solve(maxit); 184 | 185 | SpVec res = solver->get_beta(); 186 | int nzero = solver->get_nzero(); 187 | deviance(i) = solver->get_dev(); 188 | 189 | if (i == 0) null_dev = solver->get_null_dev(); 190 | 191 | if (p >= n) 192 | { 193 | if ((nzero > dfmax || deviance(i) < 0.1 * null_dev) && i > 0 ) 194 | { 195 | last = i - 1; 196 | break; 197 | } 198 | } else 199 | { 200 | if ((nzero > dfmax || deviance(i) < 0.05 * null_dev) && i > 0 ) 201 | { 202 | last = i - 1; 203 | break; 204 | } 205 | } 206 | 207 | double beta0 = 0.0; 208 | beta0 = solver->get_intercept(); 209 | 210 | datstd.recover(beta0, res); 211 | //beta(0,i) = beta0; 212 | //beta.block(1, i, p, 1) = res; 213 | write_beta_matrix(beta, i, beta0, res); 214 | } else 215 | { 216 | if(i == 0) 217 | solver_bin->init(ilambda, gamma); 218 | else 219 | solver_bin->init_warm(ilambda, gamma); 220 | 221 | niter[i] = solver_bin->solve(maxit); 222 | 223 | SpVec res = solver_bin->get_beta(); 224 | int nzero = solver_bin->get_nzero(); 225 | deviance(i) = solver_bin->get_dev(); 226 | 227 | if (i == 0) null_dev = solver_bin->get_null_dev(); 228 | 229 | if (p >= n) 230 | { 231 | if ((nzero > dfmax || deviance(i) < 0.1 * null_dev) && i > 0 ) 232 | { 233 | last = i - 1; 234 | break; 235 | } 236 | } else 237 | { 238 | if ((nzero > dfmax || deviance(i) < 0.05 * null_dev) && i > 0 ) 239 | { 240 | last = i - 1; 241 | break; 242 | } 243 | } 244 | 245 | double beta0 = 0.0; 246 | beta0 = solver_bin->get_intercept(); 247 | 248 | datstd.recover(beta0, res); 249 | //beta(0,i) = beta0; 250 | //beta.block(1, i, p, 1) = res; 251 | write_beta_matrix(beta, i, beta0, res); 252 | } 253 | //lossvec(i) = solver->get_loss(); 254 | } 255 | 256 | if (glm_fam) 257 | { 258 | delete solver; 259 | } else 260 | { 261 | delete solver_bin; 262 | } 263 | 264 | 265 | beta.makeCompressed(); 266 | 267 | return List::create(Named("beta") = beta, 268 | Named("niter") = niter, 269 | Named("lambda") = lambda, 270 | //Named("loss") = lossvec, 271 | Named("deviance") = deviance, 272 | Named("null.deviance") = null_dev, 273 | Named("last") = last); 274 | } 275 | 276 | // [[Rcpp::export]] 277 | List coord_ordinis_dense_glm_cpp(Rcpp::NumericMatrix x, 278 | Rcpp::NumericVector y, 279 | Rcpp::NumericVector weights, 280 | Rcpp::NumericVector offset, 281 | Rcpp::NumericVector lambda, 282 | Rcpp::NumericVector penalty_factor, 283 | Rcpp::NumericMatrix limits, 284 | int nlambda, 285 | double lmin_ratio, 286 | bool standardize, 287 | bool intercept, 288 | bool glm_fam, 289 | List opts) 290 | { 291 | return coord_ordinis_dense_glm(x, y, weights, offset, 292 | lambda, penalty_factor, 293 | limits, 294 | nlambda, 295 | lmin_ratio, 296 | standardize, 297 | intercept, 298 | glm_fam, 299 | opts); 300 | } 301 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Introduction to \`ordinis’ 3 | 4 | The ‘ordinis’ package provides computation for penalized regression 5 | problems via coordinate descent. It is mostly for my own experimentation 6 | at this stage, however it is fairly efficient and reliable. 7 | 8 | Install using the **devtools** package: 9 | 10 | ``` r 11 | devtools::install_github("jaredhuling/ordinis") 12 | ``` 13 | 14 | or by cloning and building 15 | 16 | ## Example 17 | 18 | ``` r 19 | library(ordinis) 20 | 21 | # compute the full solution path, n > p 22 | set.seed(123) 23 | n <- 500 24 | p <- 50000 25 | m <- 50 26 | b <- matrix(c(runif(m), rep(0, p - m))) 27 | x <- matrix(rnorm(n * p, sd = 3), n, p) 28 | y <- drop(x %*% b) + rnorm(n) 29 | 30 | mod <- ordinis(x, y, 31 | penalty = "mcp", 32 | lower.limits = rep(0, p), # force all coefficients to be positive 33 | penalty.factor = c(0, 0, rep(1, p-2)), # don't penalize first two coefficients 34 | alpha = 0.95) # use elastic net with alpha = 0.95 35 | 36 | plot(mod) 37 | ``` 38 | 39 | ![](vignettes/unnamed-chunk-1-1.png) 40 | 41 | ``` r 42 | ## show likelihood 43 | logLik(mod) 44 | ``` 45 | 46 | ## 'log Lik.' -1960.4981, -1959.7942, -1958.4955, -1957.2568, -1956.0766, -1954.9533, -1953.8853, -1952.3674, -1950.1291, -1945.7116, -1939.9338, -1932.9379, -1925.4526, -1917.7379, -1909.7029, -1901.5909, -1893.7027, -1885.2011, -1874.5999, -1863.6113, -1852.3128, -1840.6926, -1828.9064, -1817.2957, -1805.9049, -1794.2657, -1782.7004, -1771.0541, -1758.8450, -1744.8511, -1730.6794, -1715.8181, -1699.7919, -1683.1596, -1666.3203, -1647.3779, -1627.8234, -1608.2236, -1588.6072, -1569.0073, -1549.4624, -1529.6515, -1508.9413, -1488.3112, -1467.8201, -1446.9552, -1426.0467, -1404.6194, -1383.3848, -1361.5585, -1340.0356, -1318.9663, -1298.7914, -1279.5254, -1259.3910, -1239.3235, -1220.1175, -1201.6675, -1183.7428, -1166.4344, -1149.8871, -1133.5086, -1116.3535, -1099.6555, -1082.5834, -1065.4142, -1048.6729, -1031.4340, -1014.8887, -999.1320, -984.1856, -969.5954, -954.8587, -940.2463, -925.9616, -912.3066, -899.2996, -885.3278, -871.5722, -858.5513, -846.3830, -835.1985, -824.8354, -815.1351, -806.1005, -797.7647, -789.3363, -781.0021, -773.5596, -766.7392, -760.4534, -754.7102, -749.4411, -744.6367, -740.2854, -735.9902, -731.7368, -727.1736, -722.7904, -718.6086 (df= 3 4 4 4 4 4 4 5 7 9111213131414141619192021222222232324262628303031323434343434343535353536373738383838383840404040404040414242434344444444444546464646464747474747474747474848484848484848484950515252) 47 | 48 | ``` r 49 | ## compute AIC 50 | AIC(mod) 51 | ``` 52 | 53 | ## [1] 3926.996 3927.588 3924.991 3922.514 3920.153 3917.907 3915.771 54 | ## [8] 3914.735 3914.258 3909.423 3901.868 3889.876 3876.905 3861.476 55 | ## [15] 3847.406 3831.182 3815.405 3802.402 3787.200 3765.223 3744.626 56 | ## [22] 3723.385 3701.813 3678.591 3655.810 3634.531 3611.401 3590.108 57 | ## [29] 3569.690 3541.702 3517.359 3491.636 3459.584 3428.319 3396.641 58 | ## [36] 3362.756 3323.647 3284.447 3245.214 3206.015 3166.925 3129.303 59 | ## [43] 3087.883 3046.622 3005.640 2965.910 2926.093 2883.239 2842.770 60 | ## [50] 2799.117 2756.071 2713.933 2673.583 2635.051 2598.782 2558.647 61 | ## [57] 2520.235 2483.335 2447.486 2412.869 2379.774 2349.017 2316.707 62 | ## [64] 2283.311 2251.167 2216.828 2185.346 2150.868 2117.777 2086.264 63 | ## [71] 2056.371 2029.191 2001.717 1972.493 1943.923 1916.613 1890.599 64 | ## [78] 1864.656 1837.144 1811.103 1786.766 1764.397 1743.671 1724.270 65 | ## [85] 1706.201 1689.529 1674.673 1658.004 1643.119 1629.478 1616.907 66 | ## [92] 1605.420 1594.882 1585.273 1576.571 1569.980 1563.474 1556.347 67 | ## [99] 1549.581 1541.217 68 | 69 | ``` r 70 | ## BIC 71 | BIC(mod) 72 | ``` 73 | 74 | ## [1] 3939.640 3944.447 3941.849 3939.372 3937.012 3934.765 3932.629 75 | ## [8] 3935.808 3943.760 3947.355 3948.228 3940.451 3931.695 3916.266 76 | ## [15] 3906.410 3890.186 3874.410 3869.836 3867.277 3845.300 3828.918 77 | ## [22] 3811.892 3794.534 3771.313 3748.531 3731.467 3708.337 3691.259 78 | ## [29] 3679.270 3651.282 3635.368 3618.074 3586.022 3558.972 3531.508 79 | ## [36] 3506.052 3466.943 3427.744 3388.511 3349.311 3310.222 3276.814 80 | ## [43] 3235.394 3194.134 3153.152 3117.636 3082.034 3039.179 3002.925 81 | ## [50] 2959.272 2916.226 2874.088 2833.738 2795.206 2767.366 2727.231 82 | ## [57] 2688.819 2651.919 2616.070 2581.453 2548.358 2521.816 2493.721 83 | ## [64] 2460.325 2432.395 2398.057 2370.789 2336.311 2303.220 2271.707 84 | ## [71] 2241.814 2218.848 2195.589 2166.365 2137.795 2110.485 2084.471 85 | ## [78] 2062.742 2035.231 2009.189 1984.853 1962.484 1941.757 1922.357 86 | ## [85] 1904.288 1887.616 1876.974 1860.305 1845.420 1831.780 1819.208 87 | ## [92] 1807.722 1797.183 1787.575 1778.872 1776.496 1774.204 1771.292 88 | ## [99] 1768.741 1760.377 89 | 90 | ## Performance 91 | 92 | ### Lasso (linear regression) 93 | 94 | ``` r 95 | library(microbenchmark) 96 | library(glmnet) 97 | 98 | b <- matrix(c(runif(m, min = -1), rep(0, p - m))) 99 | x <- matrix(rnorm(n * p, sd = 3), n, p) 100 | y <- drop(x %*% b) + rnorm(n) 101 | 102 | lambdas = glmnet(x, y)$lambda 103 | 104 | microbenchmark( 105 | "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-10, # thresh must be very small 106 | lambda = lambdas)}, # for comparable precision 107 | "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 108 | tol = 1e-3)}, 109 | times = 5 110 | ) 111 | ``` 112 | 113 | ## Unit: seconds 114 | ## expr min lq mean median uq max 115 | ## glmnet[lasso] 3.377096 3.487334 3.543365 3.545748 3.553003 3.753645 116 | ## ordinis[lasso] 5.768823 5.777944 5.836068 5.799918 5.814987 6.018668 117 | ## neval 118 | ## 5 119 | ## 5 120 | 121 | ``` r 122 | # difference of results 123 | max(abs(coef(resg) - reso$beta)) 124 | ``` 125 | 126 | ## [1] 0.0008824882 127 | 128 | ``` r 129 | microbenchmark( 130 | "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-15, # thresh must be very low for comparable precision 131 | lambda = lambdas)}, 132 | "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 133 | tol = 1e-3)}, 134 | times = 5 135 | ) 136 | ``` 137 | 138 | ## Unit: seconds 139 | ## expr min lq mean median uq max 140 | ## glmnet[lasso] 5.618403 5.748487 5.754279 5.783774 5.792731 5.827999 141 | ## ordinis[lasso] 5.961874 5.967261 6.039594 6.075738 6.088031 6.105065 142 | ## neval 143 | ## 5 144 | ## 5 145 | 146 | ``` r 147 | # difference of results 148 | max(abs(coef(resg) - reso$beta)) 149 | ``` 150 | 151 | ## [1] 1.404332e-05 152 | 153 | ### Lasso (logistic regression) 154 | 155 | `glmnet` is clearly faster for logistic regression for the same 156 | precision 157 | 158 | ``` r 159 | library(MASS) 160 | 161 | set.seed(123) 162 | n <- 200 163 | p <- 10000 164 | m <- 20 165 | b <- matrix(c(runif(m, min = -0.5, max = 0.5), rep(0, p - m))) 166 | x <- matrix(rnorm(n * p, sd = 3), n, p) 167 | y <- 1 *(drop(x %*% b) + rnorm(n) > 0) 168 | 169 | lambdas = glmnet(x, y, family = "binomial", lambda.min.ratio = 0.02)$lambda 170 | 171 | microbenchmark( 172 | "glmnet[lasso]" = {resg <- glmnet(x, y, family = "binomial", 173 | thresh = 1e-10, 174 | lambda = lambdas)}, 175 | "ordinis[lasso]" = {reso <- ordinis(x, y, family = "binomial", 176 | lambda = lambdas, 177 | tol = 1e-3, tol.irls = 1e-3)}, 178 | times = 5 179 | ) 180 | ``` 181 | 182 | ## Unit: milliseconds 183 | ## expr min lq mean median uq max 184 | ## glmnet[lasso] 399.9576 405.9744 410.6385 407.893 410.4411 428.9268 185 | ## ordinis[lasso] 1166.7480 1185.0670 1193.5077 1192.116 1195.2467 1228.3609 186 | ## neval 187 | ## 5 188 | ## 5 189 | 190 | ``` r 191 | # difference of results 192 | max(abs(coef(resg) - reso$beta)) 193 | ``` 194 | 195 | ## [1] 0.0003735867 196 | 197 | ``` r 198 | microbenchmark( 199 | "glmnet[lasso]" = {resg <- glmnet(x, y, family = "binomial", 200 | thresh = 1e-15, 201 | lambda = lambdas)}, 202 | "ordinis[lasso]" = {reso <- ordinis(x, y, family = "binomial", 203 | lambda = lambdas, 204 | tol = 1e-3, tol.irls = 1e-3)}, 205 | times = 5 206 | ) 207 | ``` 208 | 209 | ## Unit: milliseconds 210 | ## expr min lq mean median uq max 211 | ## glmnet[lasso] 676.8328 684.9007 697.8913 686.202 709.3506 732.1704 212 | ## ordinis[lasso] 1175.4595 1178.5731 1209.1550 1197.182 1215.1998 1279.3604 213 | ## neval 214 | ## 5 215 | ## 5 216 | 217 | ``` r 218 | # difference of results 219 | max(abs(coef(resg) - reso$beta)) 220 | ``` 221 | 222 | ## [1] 2.525457e-05 223 | 224 | ### Lasso (linear regression, ill-conditioned) 225 | 226 | ``` r 227 | library(MASS) 228 | 229 | set.seed(123) 230 | n <- 500 231 | p <- 1000 232 | m <- 50 233 | b <- matrix(c(runif(m, min = -1), rep(0, p - m))) 234 | sig <- matrix(0.5, ncol=p,nrow=p); diag(sig) <- 1 235 | x <- mvrnorm(n, mu=rep(0, p), Sigma = sig) 236 | y <- drop(x %*% b) + rnorm(n) 237 | 238 | lambdas = glmnet(x, y)$lambda[1:65] 239 | 240 | microbenchmark( 241 | "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-9, # thresh must be very small 242 | lambda = lambdas)}, # for comparable precision 243 | "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 244 | tol = 1e-5)}, 245 | times = 5 246 | ) 247 | ``` 248 | 249 | ## Unit: milliseconds 250 | ## expr min lq mean median uq max 251 | ## glmnet[lasso] 158.4697 158.7652 163.5977 159.7300 163.8546 177.1690 252 | ## ordinis[lasso] 312.3342 316.8890 317.9636 317.8517 320.2239 322.5189 253 | ## neval 254 | ## 5 255 | ## 5 256 | 257 | ``` r 258 | # difference of results 259 | max(abs(coef(resg) - reso$beta)) 260 | ``` 261 | 262 | ## [1] 0.000262974 263 | 264 | ``` r 265 | microbenchmark( 266 | "glmnet[lasso]" = {resg <- glmnet(x, y, thresh = 1e-11, # thresh must be very low for comparable precision 267 | lambda = lambdas)}, 268 | "ordinis[lasso]" = {reso <- ordinis(x, y, lambda = lambdas, 269 | tol = 1e-5)}, 270 | times = 5 271 | ) 272 | ``` 273 | 274 | ## Unit: milliseconds 275 | ## expr min lq mean median uq max 276 | ## glmnet[lasso] 324.0667 327.7087 340.9450 348.1692 348.4202 356.3602 277 | ## ordinis[lasso] 313.3590 314.2454 324.0695 320.6374 328.1055 344.0002 278 | ## neval 279 | ## 5 280 | ## 5 281 | 282 | ``` r 283 | # difference of results 284 | max(abs(coef(resg) - reso$beta)) 285 | ``` 286 | 287 | ## [1] 2.454023e-05 288 | 289 | ### Validity of solutions with various bells and whistles 290 | 291 | Due to internal differences in standardization, we now compare with 292 | `glmnet` when using observation weights, penalty scaling factors, and 293 | parameter box constraints 294 | 295 | ``` r 296 | set.seed(123) 297 | n = 200 298 | p = 1000 299 | m <- 15 300 | b = c(runif(m, min = -0.5, max = 0.5), rep(0, p - m)) 301 | x = (matrix(rnorm(n * p, sd = 3), n, p)) 302 | y = drop(x %*% b) + rnorm(n) 303 | y2 <- 1 * (y > rnorm(n, mean = 0.5, sd = 3)) 304 | 305 | 306 | wts <- runif(nrow(x)) 307 | wts <- wts / mean(wts) # re-scale like glmnet does, so we can compare 308 | 309 | penalty.factor <- rbinom(ncol(x), 1, 0.99) * runif(ncol(x)) * 5 310 | penalty.factor <- (penalty.factor / sum(penalty.factor)) * ncol(x) # re-scale like glmnet does, so we can compare 311 | 312 | system.time(resb <- ordinis(x, y2, family = "binomial", tol = 1e-7, tol.irls = 1e-5, 313 | penalty = "lasso", 314 | alpha = 0.5, #elastic net term 315 | lower.limits = 0, upper.limits = 0.02, # box constraints on all parameters 316 | standardize = FALSE, intercept = TRUE, 317 | weights = wts, # observation weights 318 | penalty.factor = penalty.factor)) # penalty scaling factors 319 | ``` 320 | 321 | ## user system elapsed 322 | ## 0.069 0.000 0.069 323 | 324 | ``` r 325 | system.time(resg <- glmnet(x,y2, family = "binomial", 326 | lambda = resb$lambda, 327 | alpha = 0.5, #elastic net term 328 | weights = wts, # observation weights 329 | penalty.factor = penalty.factor, # penalty scaling factors 330 | lower.limits = 0, upper.limits = 0.02, # box constraints on all parameters 331 | standardize = FALSE, intercept = TRUE, 332 | thresh = 1e-16)) 333 | ``` 334 | 335 | ## user system elapsed 336 | ## 0.041 0.001 0.042 337 | 338 | ``` r 339 | ## compare solutions 340 | max(abs(resb$beta[-1,] - resg$beta)) 341 | ``` 342 | 343 | ## [1] 3.823445e-09 344 | 345 | ``` r 346 | # now with no box constraints 347 | system.time(resb <- ordinis(x, y2, family = "binomial", tol = 1e-7, tol.irls = 1e-5, 348 | penalty = "lasso", 349 | alpha = 0.5, #elastic net term 350 | standardize = FALSE, intercept = TRUE, 351 | weights = wts, # observation weights 352 | penalty.factor = penalty.factor)) # penalty scaling factors 353 | ``` 354 | 355 | ## user system elapsed 356 | ## 0.087 0.001 0.087 357 | 358 | ``` r 359 | system.time(resg <- glmnet(x,y2, family = "binomial", 360 | lambda = resb$lambda, 361 | alpha = 0.5, #elastic net term 362 | weights = wts, # observation weights 363 | penalty.factor = penalty.factor, # penalty scaling factors 364 | standardize = FALSE, intercept = TRUE, 365 | thresh = 1e-16)) 366 | ``` 367 | 368 | ## user system elapsed 369 | ## 0.061 0.001 0.064 370 | 371 | ``` r 372 | ## compare solutions 373 | max(abs(resb$beta[-1,] - resg$beta)) 374 | ``` 375 | 376 | ## [1] 6.005807e-09 377 | 378 | ### A Note on the Elastic Net and linear models 379 | 380 | Due to how scaling of the response is handled different in glmnet, it 381 | yields slightly different solutions than both ordinis and ncvreg for 382 | Gaussian models with a ridge penalty term 383 | 384 | ``` r 385 | library(ncvreg) 386 | 387 | ## I'm setting all methods to have high precision just so solutions are comparable. 388 | ## differences in computation time may be due in part to the arbitrariness of the 389 | ## particular precisions chosen 390 | system.time(resg <- glmnet(x, y, family = "gaussian", alpha = 0.25, 391 | thresh = 1e-15)) 392 | ``` 393 | 394 | ## user system elapsed 395 | ## 0.481 0.006 0.492 396 | 397 | ``` r 398 | system.time(res <- ordinis(x, y, family = "gaussian", penalty = "lasso", alpha = 0.25, 399 | tol = 1e-10, lambda = resg$lambda)) 400 | ``` 401 | 402 | ## user system elapsed 403 | ## 0.363 0.002 0.366 404 | 405 | ``` r 406 | system.time(resn <- ncvreg(x, y, family="gaussian", penalty = "lasso", 407 | lambda = resg$lambda, alpha = 0.25, max.iter = 100000, 408 | eps = 1e-10)) 409 | ``` 410 | 411 | ## user system elapsed 412 | ## 0.507 0.003 0.511 413 | 414 | ``` r 415 | resgg <- res; resgg$beta[-1,] <- resg$beta 416 | 417 | # compare ordinis and glmnet 418 | max(abs(res$beta[-1,] - resg$beta)) 419 | ``` 420 | 421 | ## [1] 0.1123304 422 | 423 | ``` r 424 | # compare ordinis and ncvreg 425 | max(abs(res$beta - resn$beta)) 426 | ``` 427 | 428 | ## [1] 6.948169e-09 429 | 430 | ``` r 431 | # compare ncvreg and glmnet 432 | max(abs(resn$beta[-1,] - resg$beta)) 433 | ``` 434 | 435 | ## [1] 0.1123304 436 | -------------------------------------------------------------------------------- /R/ordinis_methods.R: -------------------------------------------------------------------------------- 1 | ## the code here is largely based on the code 2 | ## from the glmnet package (no reason to reinvent the wheel) 3 | 4 | #' Prediction method for coord lasso fitted objects 5 | #' 6 | #' @param object fitted "ordinis" model object 7 | #' @param newx Matrix of new values for \code{x} at which predictions are to be made. Must be a matrix; can be sparse as in the 8 | #' \code{CsparseMatrix} objects of the \pkg{Matrix} package. 9 | #' This argument is not used for \code{type=c("coefficients","nonzero")} 10 | #' @param s Value(s) of the penalty parameter lambda at which predictions are required. Default is the entire sequence used to create 11 | #' the model. 12 | #' @param type Type of prediction required. \code{type = "link"} gives the linear predictors for the \code{"binomial"} model; for \code{"gaussian"} models it gives the fitted values. 13 | #' \code{type = "response"} gives the fitted probabilities for \code{"binomial"}. \code{type = "coefficients"} computes the coefficients at the requested values for \code{s}. 14 | #' \code{type = "class"} applies only to \code{"binomial"} and produces the class label corresponding to the maximum probability. 15 | #' @param ... not used 16 | #' @importFrom graphics abline abline axis matplot points segments 17 | #' @importFrom methods as 18 | #' @importFrom stats approx predict quantile runif weighted.mean 19 | #' @return An object depending on the type argument 20 | #' @method predict ordinis 21 | #' @export 22 | #' @examples 23 | #' set.seed(123) 24 | #' n.obs <- 1e4 25 | #' n.vars <- 100 26 | #' n.obs.test <- 1e3 27 | #' 28 | #' true.beta <- c(runif(15, -0.5, 0.5), rep(0, n.vars - 15)) 29 | #' 30 | #' x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars) 31 | #' y <- rnorm(n.obs, sd = 3) + x %*% true.beta 32 | #' x.test <- matrix(rnorm(n.obs.test * n.vars), n.obs.test, n.vars) 33 | #' y.test <- rnorm(n.obs.test, sd = 3) + x.test %*% true.beta 34 | #' 35 | #' fit <- ordinis(x = x, y = y, nlambda = 10) 36 | #' 37 | #' preds.lasso <- predict(fit, newx = x.test, type = "response") 38 | #' 39 | #' apply(preds.lasso, 2, function(x) mean((y.test - x) ^ 2)) 40 | #' 41 | predict.ordinis <- function(object, newx, s = NULL, 42 | type = c("link", 43 | "response", 44 | "coefficients", 45 | "nonzero", 46 | "class"), ...) 47 | { 48 | type <- match.arg(type) 49 | 50 | if(missing(newx)){ 51 | if(!match(type, c("coefficients", "nonzero"), FALSE))stop("A value for 'newx' must be supplied") 52 | } 53 | nbeta <- object$beta 54 | 55 | if(!is.null(s)){ 56 | #vnames=dimnames(nbeta)[[1]] 57 | lambda <- object$lambda 58 | lamlist <- lambda.interp(object$lambda,s) 59 | nbeta <- nbeta[,lamlist$left,drop=FALSE]*lamlist$frac +nbeta[,lamlist$right,drop=FALSE]*(1-lamlist$frac) 60 | #dimnames(nbeta)=list(vnames,paste(seq(along=s))) 61 | } 62 | if (type == "coefficients") return(nbeta) 63 | if (type == "nonzero") { 64 | newbeta <- abs(as.matrix(object$beta)) > 0 65 | index <- 1:(dim(newbeta)[1]) 66 | nzel <- function(x, index) if(any(x)) index[x] else NULL 67 | betaList <- apply(newbeta, 2, nzel, index) 68 | return(betaList) 69 | } 70 | 71 | newx <- as.matrix(newx) 72 | # add constant column if needed 73 | if (ncol(newx) < nrow(nbeta)) 74 | newx <- cbind(rep(1, nrow(newx)), newx) 75 | 76 | as.matrix(newx %*% nbeta) 77 | } 78 | 79 | 80 | #' @export 81 | predict.cdgaussian <- function(object, newx, s = NULL, 82 | type = c("link", 83 | "response", 84 | "coefficients", 85 | "nonzero"), ...) 86 | { 87 | NextMethod("predict") 88 | } 89 | 90 | 91 | #' @export 92 | predict.cdbinomial <- function(object, newx, s=NULL, 93 | type=c("link", 94 | "response", 95 | "coefficients", 96 | "class", 97 | "nonzero"), ...) 98 | { 99 | type <- match.arg(type) 100 | nfit <- NextMethod("predict") 101 | switch(type, 102 | response={ 103 | prob=exp(-nfit) 104 | 1 / (1 + prob) 105 | }, 106 | class={ 107 | cnum=ifelse(nfit > 0, 2, 1) 108 | clet=object$classnames[cnum] 109 | if(is.matrix(cnum))clet=array(clet,dim(cnum),dimnames(cnum)) 110 | clet 111 | }, 112 | nfit 113 | ) 114 | } 115 | 116 | 117 | 118 | 119 | cv.cdbinomial=function(outlist,lambda,x,y,foldid,type.measure,grouped,keep=FALSE){ 120 | typenames=c(mse="Mean-Squared Error",mae="Mean Absolute Error",deviance="Binomial Deviance",auc="AUC",class="Misclassification Error") 121 | if(type.measure=="default")type.measure="deviance" 122 | if(!match(type.measure,c("mse","mae","deviance","auc","class"),FALSE)){ 123 | warning("Only 'deviance', 'class', 'auc', 'mse' or 'mae' available for binomial models; 'deviance' used") 124 | type.measure="deviance" 125 | } 126 | 127 | ###These are hard coded in the Fortran, so we do that here too 128 | prob_min=1e-5 129 | prob_max=1-prob_min 130 | ###Turn y into a matrix 131 | nc = dim(y) 132 | if (is.null(nc)) { 133 | y = as.factor(y) 134 | ntab = table(y) 135 | nc = as.integer(length(ntab)) 136 | y = diag(nc)[as.numeric(y), ] 137 | } 138 | N=nrow(y) 139 | nfolds=max(foldid) 140 | if( (N/nfolds <10)&&type.measure=="auc"){ 141 | warning("Too few (< 10) observations per fold for type.measure='auc' in cv.lognet; changed to type.measure='deviance'. Alternatively, use smaller value for nfolds",call.=FALSE) 142 | type.measure="deviance" 143 | } 144 | if( (N/nfolds <3)&&grouped){ 145 | warning("Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold",call.=FALSE) 146 | grouped=FALSE 147 | } 148 | 149 | 150 | #if(!is.null(offset)){ 151 | # is.offset=TRUE 152 | # offset=drop(offset) 153 | #}else is.offset=FALSE 154 | predmat=matrix(NA,nrow(y),length(lambda)) 155 | nlams=double(nfolds) 156 | for(i in seq(nfolds)){ 157 | which=foldid==i 158 | fitobj=outlist[[i]] 159 | #if(is.offset)off_sub=offset[which] 160 | preds=predict(fitobj, newx = x[which,,drop=FALSE], type="response") 161 | nlami=length(outlist[[i]]$lam) 162 | predmat[which,seq(nlami)]=preds 163 | nlams[i]=nlami 164 | } 165 | ###If auc we behave differently 166 | if(type.measure=="auc"){ 167 | cvraw=matrix(NA,nfolds,length(lambda)) 168 | good=matrix(0,nfolds,length(lambda)) 169 | for(i in seq(nfolds)){ 170 | good[i,seq(nlams[i])]=1 171 | which=foldid==i 172 | for(j in seq(nlams[i])){ 173 | #cvraw[i,j]=auc.mat(y[which,],predmat[which,j],weights[which]) 174 | cvraw[i,j]=auc.mat(y[which,],predmat[which,j], rep(1, sum(which))) 175 | } 176 | } 177 | N=apply(good,2,sum) 178 | #weights=tapply(weights,foldid,sum) 179 | } 180 | else{ 181 | ##extract weights and normalize to sum to 1 182 | #ywt=apply(y,1,sum) 183 | #y=y/ywt 184 | #weights=weights*ywt 185 | 186 | N=nrow(y) - apply(is.na(predmat),2,sum) 187 | cvraw=switch(type.measure, 188 | "mse"=(y[,1]-(1-predmat))^2 +(y[,2]-predmat)^2, 189 | "mae"=abs(y[,1]-(1-predmat)) +abs(y[,2]-predmat), 190 | "deviance"= { 191 | predmat=pmin(pmax(predmat,prob_min),prob_max) 192 | lp=y[,1]*log(1-predmat)+y[,2]*log(predmat) 193 | ly=log(y) 194 | ly[y==0]=0 195 | ly=drop((y*ly)%*%c(1,1)) 196 | 2*(ly-lp) 197 | }, 198 | "class"=y[,1]*(predmat>.5) +y[,2]*(predmat<=.5) 199 | ) 200 | if(grouped){ 201 | cvob=cvcompute(cvraw,rep(1, nrow(y)),foldid,nlams) 202 | cvraw=cvob$cvraw;weights=cvob$weights;N=cvob$N 203 | } 204 | } 205 | #cvm=apply(cvraw,2,weighted.mean,w=weights,na.rm=TRUE) 206 | cvm=apply(cvraw,2,mean,w=weights,na.rm=TRUE) 207 | #cvsd=sqrt(apply(scale(cvraw,cvm,FALSE)^2,2,weighted.mean,w=rep(1, nrow(y)),na.rm=TRUE)/(N-1)) 208 | cvsd=sqrt(apply(scale(cvraw,cvm,FALSE)^2,2,mean,na.rm=TRUE)/(N-1)) 209 | out=list(cvm=cvm,cvsd=cvsd,name=typenames[type.measure]) 210 | if(keep)out$fit.preval=predmat 211 | out 212 | 213 | } 214 | 215 | cv.cdgaussian <- function(outlist,lambda,x,y,foldid,type.measure,grouped,keep=FALSE) 216 | { 217 | typenames=c(deviance="Mean-Squared Error",mse="Mean-Squared Error",mae="Mean Absolute Error",auc="Area under (ROC) Curve") 218 | if(type.measure=="default")type.measure="mse" 219 | if(!match(type.measure,c("mse","mae","deviance","auc"),FALSE)){ 220 | warning("Only 'mse', 'deviance' or 'mae' available for Gaussian models; 'mse' used") 221 | type.measure="mse" 222 | } 223 | #if(!is.null(offset))y=y-drop(offset) 224 | predmat=matrix(NA,length(y),length(lambda)) 225 | nfolds=max(foldid) 226 | nlams=double(nfolds) 227 | for(i in seq(nfolds)){ 228 | which=foldid==i 229 | fitobj=outlist[[i]] 230 | #fitobj$offset=FALSE 231 | preds=predict(fitobj,x[which,,drop=FALSE], type="response") 232 | nlami=length(outlist[[i]]$lambda) 233 | predmat[which,seq(nlami)]=preds 234 | nlams[i]=nlami 235 | } 236 | 237 | if (type.measure == "auc") 238 | { 239 | cvraw = matrix(NA, nfolds, length(lambda)) 240 | good = matrix(0, nfolds, length(lambda)) 241 | for (i in seq(nfolds)) { 242 | good[i, seq(nlams[i])] = 1 243 | which = foldid == i 244 | for (j in seq(nlams[i])) { 245 | #cvraw[i, j] = auc.mat(y[which, ], predmat[which, 246 | # j], weights[which]) 247 | cvraw[i,j] <- auc(y[which],predmat[which,j], rep(1, sum(which))) 248 | } 249 | } 250 | N = apply(good, 2, sum) 251 | } else 252 | { 253 | N=length(y) - apply(is.na(predmat),2,sum) 254 | cvraw=switch(type.measure, 255 | "mse"=(y-predmat)^2, 256 | "deviance"=(y-predmat)^2, 257 | "mae"=abs(y-predmat) 258 | ) 259 | if( (length(y)/nfolds <3)&&grouped){ 260 | warning("Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold",call.=FALSE) 261 | grouped=FALSE 262 | } 263 | if(grouped){ 264 | cvob=cvcompute(cvraw,rep(1, length(y)),foldid,nlams) 265 | cvraw=cvob$cvraw;weights=cvob$weights;N=cvob$N 266 | } 267 | } 268 | 269 | 270 | #cvm=apply(cvraw,2,weighted.mean,w=weights,na.rm=TRUE) 271 | cvm=apply(cvraw,2,mean,na.rm=TRUE) 272 | #cvsd=sqrt(apply(scale(cvraw,cvm,FALSE)^2,2,weighted.mean,w=weights,na.rm=TRUE)/(N-1)) 273 | cvsd=sqrt(apply(scale(cvraw,cvm,FALSE)^2,2,mean,na.rm=TRUE)/(N-1)) 274 | out=list(cvm=cvm,cvsd=cvsd,name=typenames[type.measure]) 275 | if(keep)out$fit.preval=predmat 276 | out 277 | } 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | #' Plot method for ordinis fitted objects 286 | #' 287 | #' @param x fitted "ordinis" model object or fitted "cv.ordinis" model object 288 | #' @param xvar What is on the X-axis. \code{"penalty"} plots against the penalty value applied to the coefficients, \code{"lambda"} against the log-lambda sequence 289 | #' @param labsize size of labels for variable names. If labsize = 0, then no variable names will be plotted 290 | #' @param xlab label for x-axis 291 | #' @param ylab label for y-axis 292 | #' @param main main title for plot 293 | #' @param xlim numeric vectors of length 2, giving the \code{x} and \code{y} coordinates ranges. 294 | #' @param n.print scalar integer for the number of times along the regularization path to print the number 295 | #' of nonzero coefficients. If set to a negative value, the number of nonzero coefficients will not be printed. 296 | #' @param ... other graphical parameters for the plot 297 | #' @rdname plot 298 | #' @export 299 | #' @examples 300 | #' set.seed(123) 301 | #' n.obs <- 100 302 | #' n.vars <- 1000 303 | #' 304 | #' true.beta <- c(runif(5, 0.1, 1) * (2 * rbinom(5, 1, 0.5) - 1), rep(0, n.vars - 5)) 305 | #' 306 | #' x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars) 307 | #' y <- rnorm(n.obs, sd = 2) + x %*% true.beta 308 | #' 309 | #' fit <- ordinis(x = x, y = y, penalty = c("mcp")) 310 | #' 311 | #' plot(fit) 312 | #' 313 | plot.ordinis <- function(x, 314 | xvar = c("loglambda", "lambda", "norm"), 315 | labsize = 0.6, 316 | xlab = iname, ylab = NULL, 317 | main = x$penalty, 318 | xlim = NULL, 319 | n.print = 10L, 320 | ...) 321 | { 322 | 323 | xvar <- match.arg(xvar) 324 | nbeta <- as.matrix(x$beta[-1,]) ## remove intercept 325 | remove <- apply(nbeta, 1, function(betas) all(betas == 0) ) 326 | 327 | if (is.null(xlim)) 328 | { 329 | switch(xvar, 330 | "norm" = { 331 | index <- apply(abs(nbeta), 2, sum) 332 | iname <- expression(L[1] * " Norm") 333 | xlim <- range(index) 334 | approx.f <- 1 335 | }, 336 | "lambda" = { 337 | index <- x$lambda 338 | iname <- expression(lambda) 339 | xlim <- rev(range(index)) 340 | approx.f <- 0 341 | }, 342 | "loglambda" = { 343 | index <- log(x$lambda) 344 | iname <- expression(log(lambda)) 345 | xlim <- rev(range(index)) 346 | approx.f <- 1 347 | } 348 | ) 349 | } else 350 | { 351 | switch(xvar, 352 | "norm" = { 353 | index <- apply(abs(nbeta), 2, sum) 354 | iname <- expression(L[1] * " Norm") 355 | approx.f <- 1 356 | }, 357 | "lambda" = { 358 | index <- x$lambda 359 | iname <- expression(lambda) 360 | approx.f <- 0 361 | }, 362 | "loglambda" = { 363 | index <- log(x$lambda) 364 | iname <- expression(log(lambda)) 365 | approx.f <- 1 366 | } 367 | ) 368 | } 369 | 370 | if (all(remove)) stop("All beta estimates are zero for all values of lambda. No plot returned.") 371 | 372 | 373 | cols <- rainbow(sum(!remove)) 374 | 375 | ## create sequence that grabs one of ROYGBIV and repeats with 376 | ## an increment up the rainbow spectrum with each step from 1:7 on ROYGBIV 377 | n.cols <- 7L 378 | scramble.seq <- rep(((1:n.cols) - 1) * (length(cols) %/% (n.cols)) + 1, length(cols) %/% n.cols)[1:length(cols)] + 379 | (((0:(length(cols)-1)) %/% n.cols)) 380 | 381 | scramble.seq[is.na(scramble.seq)] <- which(!(1:length(cols) %in% scramble.seq)) 382 | colseq <- cols[scramble.seq] 383 | 384 | 385 | matplot(index, t(nbeta[!remove,,drop=FALSE]), 386 | lty = 1, 387 | xlab = xlab, 388 | ylab = "", 389 | col = colseq, 390 | xlim = xlim, 391 | type = 'l', ...) 392 | 393 | if (is.null(ylab)) 394 | { 395 | mtext(expression(hat(beta)), side = 2, cex = par("cex"), line = 3, las = 1) 396 | } else 397 | { 398 | mtext(ylab, side = 2, cex = par("cex"), line = 3) 399 | ylab = "" 400 | } 401 | 402 | if (n.print >= 0) 403 | { 404 | atdf <- pretty(index, n = n.print) 405 | plotnz <- approx(x = index, y = x$nzero, xout = atdf, rule = 2, method = "constant", f = approx.f)$y 406 | axis(side=3, at = atdf, labels = plotnz, tick=FALSE, line=0, ...) 407 | } 408 | 409 | title(main, line = 2.5, ...) 410 | 411 | 412 | 413 | # Adjust the margins to make sure the labels fit 414 | labwidth <- ifelse(labsize > 0, max(strwidth(rownames(nbeta[!remove,]), "inches", labsize)), 0) 415 | margins <- par("mai") 416 | par("mai" = c(margins[1:3], max(margins[4], labwidth*1.4))) 417 | if ( labsize > 0 && !is.null(rownames(nbeta)) ) 418 | { 419 | take <- which(!remove) 420 | for (i in 1:sum(!remove)) { 421 | j <- take[i] 422 | axis(4, at = nbeta[j, ncol(nbeta)], labels = rownames(nbeta)[j], 423 | las=1, cex.axis=labsize, col.axis = colseq[i], 424 | lty = (i - 1) %% 5 + 1, col = colseq[i], ...) 425 | } 426 | } 427 | par("mai" = margins) 428 | } 429 | 430 | 431 | #' Prediction function for fitted cross validation ordinis objects 432 | #' 433 | #' @param object fitted \code{"cv.ordinis"} model object 434 | #' @param newx Matrix of new values for \code{x} at which predictions are to be made. Must be a matrix; can be sparse as in the 435 | #' \code{CsparseMatrix} objects of the \pkg{Matrix} package 436 | #' This argument is not used for \code{type = c("coefficients","nonzero")} 437 | #' @param s Value(s) of the penalty parameter lambda at which predictions are required. Default is the entire sequence used to create 438 | #' the model. For \code{predict.cv.ordinis()}, can also specify \code{"lambda.1se"} or \code{"lambda.min"} for best lambdas estimated by cross validation 439 | #' @param ... used to pass the other arguments for predict.ordinis 440 | #' @return An object depending on the type argument 441 | #' @method predict cv.ordinis 442 | #' @export 443 | #' @examples 444 | #' set.seed(123) 445 | #' n.obs <- 1e4 446 | #' n.vars <- 100 447 | #' n.obs.test <- 1e3 448 | #' 449 | #' true.beta <- c(runif(15, -0.5, 0.5), rep(0, n.vars - 15)) 450 | #' 451 | #' x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars) 452 | #' y <- rnorm(n.obs, sd = 3) + x %*% true.beta 453 | #' x.test <- matrix(rnorm(n.obs.test * n.vars), n.obs.test, n.vars) 454 | #' y.test <- rnorm(n.obs.test, sd = 3) + x.test %*% true.beta 455 | #' 456 | #' fit <- cv.ordinis(x = x, y = y, 457 | #' gamma = 1.4, 458 | #' nlambda = 10) 459 | #' 460 | #' 461 | #' preds.best <- predict(fit, newx = x.test, type = "response") 462 | #' 463 | #' apply(preds.best, 2, function(x) mean((y.test - x) ^ 2)) 464 | #' 465 | predict.cv.ordinis <- function(object, newx, 466 | s=c("lambda.min", "lambda.1se"), ...) 467 | { 468 | if(is.numeric(s))lambda=s 469 | else 470 | if(is.character(s)){ 471 | s=match.arg(s) 472 | lambda=object[[s]] 473 | } 474 | 475 | else stop("Invalid form for s") 476 | predict(object$ordinis.fit, newx, s=lambda, ...) 477 | } 478 | 479 | 480 | 481 | #' Plot method for fitted two mountains cv objects 482 | #' 483 | #' @param sign.lambda Either plot against log(lambda) (default) or its negative if \code{sign.lambda = -1}. 484 | #' @rdname plot 485 | #' @method plot cv.ordinis 486 | #' @export 487 | #' @examples 488 | #' set.seed(123) 489 | #' n.obs <- 100 490 | #' n.vars <- 200 491 | #' 492 | #' true.beta <- c(runif(15, -0.5, 0.5), rep(0, n.vars - 15)) 493 | #' 494 | #' x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars) 495 | #' y <- rnorm(n.obs, sd = 3) + x %*% true.beta 496 | #' 497 | #' fit <- cv.ordinis(x = x, y = y, gamma = 1.4) 498 | #' 499 | #' plot(fit) 500 | #' 501 | plot.cv.ordinis <- function(x, sign.lambda = 1, ...) 502 | { 503 | # modified from glmnet 504 | object = x 505 | 506 | main.txt <- "" 507 | 508 | xlab=expression(log(lambda)) 509 | if(sign.lambda<0)xlab=paste("-",xlab,sep="") 510 | plot.args=list(x = sign.lambda * log(object$lambda), 511 | y = object$cvm, 512 | ylim = range(object$cvup, object$cvlo), 513 | xlab = xlab, 514 | ylab = object$name, 515 | type = "n") 516 | new.args=list(...) 517 | if(length(new.args))plot.args[names(new.args)]=new.args 518 | do.call("plot", plot.args) 519 | error.bars(sign.lambda * log(object$lambda), 520 | object$cvup, 521 | object$cvlo, width = 0.005) 522 | points(sign.lambda*log(object$lambda), object$cvm, pch=20, col="dodgerblue") 523 | axis(side=3,at=sign.lambda*log(object$lambda),labels = paste(object$nzero), tick=FALSE, line=0, ...) 524 | abline(v = sign.lambda * log(object$lambda.min), lty=2, lwd = 2, col = "firebrick1") 525 | abline(v = sign.lambda * log(object$lambda.1se), lty=2, lwd = 2, col = "firebrick1") 526 | title(main.txt, line = 2.5, ...) 527 | invisible() 528 | } 529 | 530 | 531 | 532 | #' log likelihood function for fitted ordinis objects 533 | #' 534 | #' @param object fitted "ordinis" model object. 535 | #' @param REML an optional logical value. If \code{TRUE} the 536 | #' restricted log-likelihood is returned, else, if \code{FALSE}, 537 | #' the log-likelihood is returned. Defaults to \code{FALSE}. 538 | #' @param ... not used 539 | #' @rdname logLik 540 | #' @export 541 | #' @examples 542 | #' set.seed(123) 543 | #' n.obs <- 200 544 | #' n.vars <- 500 545 | #' 546 | #' true.beta <- c(runif(15, -0.25, 0.25), rep(0, n.vars - 15)) 547 | #' x <- matrix(rnorm(n.obs * n.vars), n.obs, n.vars) 548 | #' y <- rnorm(n.obs, sd = 3) + x %*% true.beta 549 | #' 550 | #' fit <- ordinis(x = x, y = y) 551 | #' 552 | #' logLik(fit) 553 | #' 554 | #' 555 | logLik.ordinis <- function(object, REML = FALSE, ...) { 556 | # taken from ncvreg. Thanks to Patrick Breheny. 557 | n <- as.numeric(object$nobs) 558 | df <- object$nzero + object$intercept 559 | 560 | if (object$family == "gaussian") 561 | { 562 | if (REML) 563 | { 564 | rdf <- n - df 565 | } else 566 | { 567 | rdf <- n 568 | } 569 | 570 | resid.ss <- object$loss 571 | logL <- -0.5 * n * (log(2 * pi) - log(rdf) + log(resid.ss)) - 0.5 * rdf 572 | } else if (object$family == "binomial") 573 | { 574 | logL <- -1 * object$deviance 575 | } else if (object$family == "poisson") 576 | { 577 | stop("poisson not complete yet") 578 | #y <- object$y 579 | #ind <- y != 0 580 | #logL <- -object$loss + sum(y[ind] * log(y[ind])) - sum(y) - sum(lfactorial(y)) 581 | } else if (object$family == "coxph") 582 | { 583 | logL <- -1e99 584 | } 585 | 586 | attr(logL,"df") <- df 587 | attr(logL,"nobs") <- n 588 | class(logL) <- "logLik" 589 | logL 590 | } 591 | -------------------------------------------------------------------------------- /src/CoordGaussianDense.h: -------------------------------------------------------------------------------- 1 | #ifndef COORDGAUSSIANDENSE_H 2 | #define COORDGAUSSIANDENSE_H 3 | 4 | #include "CoordBase.h" 5 | #include "utils.h" 6 | 7 | // minimize 1/2 * ||y - X * beta||^2 + lambda * ||beta||_1 8 | // 9 | // In ADMM form, 10 | // minimize f(x) + g(z) 11 | // s.t. x - z = 0 12 | // 13 | // x => beta 14 | // z => -X * beta 15 | // A => X 16 | // b => y 17 | // f(x) => 1/2 * ||Ax - b||^2 18 | // g(z) => lambda * ||z||_1 19 | class CoordGaussianDense: public CoordBase > //Eigen::SparseVector 20 | { 21 | protected: 22 | typedef float Scalar; 23 | typedef double Double; 24 | typedef Eigen::Matrix Matrix; 25 | typedef Eigen::Matrix Vector; 26 | typedef Eigen::Map MapMat; 27 | typedef Eigen::Map MapVec; 28 | typedef const Eigen::Ref ConstGenericMatrix; 29 | typedef const Eigen::Ref ConstGenericVector; 30 | typedef Eigen::SparseMatrix SpMat; 31 | typedef Eigen::SparseVector SparseVector; 32 | typedef Eigen::SparseVector SparseVectori; 33 | 34 | typedef SparseVector::InnerIterator InIterVec; 35 | typedef SparseVectori::InnerIterator InIterVeci; 36 | 37 | MapMat datX; // data matrix 38 | MapVec datY; // response vector 39 | MapVec weights; // weight vector 40 | 41 | Scalar lambda, lambda_ridge, gamma; // L1 penalty 42 | 43 | double threshval; 44 | VectorXd resid_cur; 45 | 46 | std::string penalty; 47 | bool intercept; 48 | ArrayXd penalty_factor; // penalty multiplication factors 49 | MapMat limits; 50 | double alpha; 51 | int penalty_factor_size; 52 | 53 | VectorXd XY; // X'Y 54 | VectorXd Xsq; // colSums(X^2) 55 | VectorXd Xtr; // X'resid 56 | 57 | Scalar lambda0; // minimum lambda to make coefficients all zero 58 | 59 | double lprev; 60 | 61 | double beta0, weights_sum, resids_sum; 62 | 63 | // pointer we will set to one of the thresholding functions 64 | typedef double (*thresh_func_ptr)(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom); 65 | 66 | thresh_func_ptr thresh_func; 67 | 68 | /* 69 | static void soft_threshold(SparseVector &res, const Vector &vec, const double &penalty) 70 | { 71 | int v_size = vec.size(); 72 | res.setZero(); 73 | res.reserve(v_size); 74 | 75 | const double *ptr = vec.data(); 76 | for(int i = 0; i < v_size; i++) 77 | { 78 | if(ptr[i] > penalty) 79 | res.insertBack(i) = ptr[i] - penalty; 80 | else if(ptr[i] < -penalty) 81 | res.insertBack(i) = ptr[i] + penalty; 82 | } 83 | } 84 | */ 85 | 86 | void update_intercept() 87 | { 88 | 89 | if (intercept) 90 | { 91 | resids_sum = (resid_cur).sum(); 92 | 93 | double beta0_delta = resids_sum / weights_sum; 94 | 95 | beta0 += beta0_delta; 96 | 97 | // update the (weighted) working residual! 98 | resid_cur.array() -= beta0_delta * weights.array(); 99 | } 100 | } 101 | 102 | double compute_loss() 103 | { 104 | //double sum_squares = 0.5 * (resid_cur.array().square().sum()) / double(nobs); 105 | double sum_squares = 0.5 * (resid_cur.array().square().sum()); 106 | double penalty_part = 0.0; 107 | 108 | /* 109 | if (penalty_factor_size < 1) 110 | { 111 | penalty_part = lambda * beta.array().abs().matrix().sum(); 112 | } else 113 | { 114 | penalty_part = lambda * (beta.array() * penalty_factor.array()).abs().matrix().sum(); 115 | } 116 | */ 117 | 118 | return (sum_squares + penalty_part); 119 | } 120 | 121 | static double soft_threshold(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom) 122 | { 123 | 124 | if (std::abs(value) <= penalty) 125 | return(0.0); 126 | else if (value > penalty) 127 | return( (value - penalty) / (denom + l2) ); 128 | else 129 | return( (value + penalty) / (denom + l2) ); 130 | 131 | /* // this ordering is slower for high-dimensional problems 132 | if(value > penalty) 133 | return( (value - penalty) / (denom + l2) ); 134 | else if(value < -penalty) 135 | return( (value + penalty) / (denom + l2) ); 136 | else 137 | return(0.0); 138 | */ 139 | } 140 | 141 | static double scad_threshold(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom) 142 | { 143 | double val_abs = std::abs(value); 144 | 145 | if (val_abs <= penalty) 146 | return(0.0); 147 | else if (val_abs <= penalty * (1.0 + l2) + penalty) 148 | { 149 | if(value > penalty) 150 | return((value - penalty) / ( denom + denom * l2 )); 151 | else 152 | return((value + penalty) / ( denom + denom * l2 )); 153 | } else if (val_abs <= gamma * penalty * (1.0 + l2)) 154 | { 155 | if ((gamma - 1.0) * value > gamma * penalty) 156 | return( (value - gamma * penalty / (gamma - 1.0)) / (denom * ( 1.0 - 1.0 / (gamma - 1.0) + l2 )) ); 157 | else 158 | return( (value + gamma * penalty / (gamma - 1.0)) / (denom * ( 1.0 - 1.0 / (gamma - 1.0) + l2 )) ); 159 | } else 160 | { 161 | return(value / (denom + denom * l2)); 162 | } 163 | 164 | } 165 | 166 | /* 167 | static double mcp_threshold(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom) 168 | { 169 | if (std::abs(value) > gamma * penalty * (denom + l2)) 170 | return(value / (denom + l2)); 171 | else if(value > penalty) 172 | return((value - penalty) / ( (denom + l2 - 1.0 / gamma) )); 173 | else if(value < -penalty) 174 | return((value + penalty) / ( (denom + l2 - 1.0 / gamma) )); 175 | else 176 | return(0.0); 177 | } 178 | */ 179 | static double mcp_threshold(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom) 180 | { 181 | double val_abs = std::abs(value); 182 | 183 | if (val_abs <= penalty) 184 | return(0.0); 185 | else if (val_abs <= gamma * penalty * (1.0 + l2)) 186 | { 187 | if(value > penalty) 188 | return((value - penalty) / ( denom * (1.0 - 1.0 / gamma + l2) )); 189 | else 190 | return((value + penalty) / ( denom * (1.0 - 1.0 / gamma + l2) )); 191 | } else 192 | return(value / (denom + denom * l2)); 193 | 194 | /* 195 | if (std::abs(value) > gamma * penalty * (1.0 + l2)) 196 | return(value / (denom + denom * l2)); 197 | else if(value > penalty) 198 | return((value - penalty) / ( denom * (1.0 + l2 - 1.0 / gamma) )); 199 | else if(value < -penalty) 200 | return((value + penalty) / ( denom * (1.0 + l2 - 1.0 / gamma) )); 201 | else 202 | return(0.0); 203 | */ 204 | 205 | } 206 | 207 | void set_threshold_func() 208 | { 209 | if (penalty == "lasso") 210 | { 211 | thresh_func = &CoordGaussianDense::soft_threshold; 212 | } else if (penalty == "mcp") 213 | { 214 | thresh_func = &CoordGaussianDense::mcp_threshold; 215 | } else 216 | { 217 | thresh_func = &CoordGaussianDense::scad_threshold; 218 | } 219 | } 220 | 221 | //void next_beta(Vector &res, VectorXi &eligible) 222 | void next_beta(SparseVector &res, SparseVectori &eligible) 223 | { 224 | 225 | // now update intercept if necessary 226 | update_intercept(); 227 | 228 | int j; 229 | double grad; 230 | 231 | // if no penalty multiplication factors specified 232 | if (penalty_factor_size < 1) 233 | { 234 | for (InIterVeci i_(eligible); i_; ++i_) 235 | { 236 | int j = i_.index(); 237 | double beta_prev = beta.coeffRef( j ); //beta(j); 238 | 239 | // surprisingly it's faster to calculate this on an iteration-basis 240 | // and not pre-calculate it within each newton iteration.. 241 | if (Xsq(j) < 0.0) Xsq(j) = (datX.col(j).array().square()).matrix().mean(); 242 | 243 | Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs); 244 | 245 | grad = Xtr(j) + beta_prev * Xsq(j); 246 | //grad = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j); 247 | 248 | threshval = thresh_func(grad, lambda, gamma, lambda_ridge, Xsq(j)); 249 | 250 | // apply param limits 251 | if (threshval < limits(1, j)) threshval = limits(1, j); 252 | if (threshval > limits(0, j)) threshval = limits(0, j); 253 | 254 | // update residual if the coefficient changes after 255 | // thresholding. 256 | if (beta_prev != threshval) 257 | { 258 | if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev; 259 | beta.coeffRef(j) = threshval; 260 | resid_cur.array() -= (threshval - beta_prev) * datX.col(j).array() * weights.array(); 261 | 262 | // update eligible set if necessary 263 | if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1; 264 | //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0; 265 | //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0; 266 | } else 267 | { 268 | if (beta_prev == 0.0 && eligible_set.coeff(j) == 1) 269 | { 270 | eligible_set.coeffRef(j) = 0; 271 | } 272 | } 273 | } 274 | } else //if penalty multiplication factors are used 275 | { 276 | for (InIterVeci i_(eligible); i_; ++i_) 277 | { 278 | int j = i_.index(); 279 | double beta_prev = beta.coeff( j ); //beta(j); 280 | 281 | // surprisingly it's faster to calculate this on an iteration-basis 282 | // and not pre-calculate it within each newton iteration.. 283 | if (Xsq(j) < 0.0) Xsq(j) = (datX.col(j).array().square()).matrix().mean(); 284 | 285 | Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs); 286 | 287 | grad = Xtr(j) + beta_prev * Xsq(j); 288 | //grad = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j); 289 | 290 | threshval = thresh_func(grad, penalty_factor(j) * lambda, gamma, penalty_factor(j) * lambda_ridge, Xsq(j)); 291 | 292 | // apply param limits 293 | if (threshval < limits(1, j)) threshval = limits(1, j); 294 | if (threshval > limits(0, j)) threshval = limits(0, j); 295 | 296 | // update residual if the coefficient changes after 297 | // thresholding. 298 | if (beta_prev != threshval) 299 | { 300 | if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev; 301 | beta.coeffRef(j) = threshval; 302 | resid_cur.array() -= (threshval - beta_prev) * datX.col(j).array() * weights.array(); 303 | 304 | // update eligible set if necessary 305 | if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1; 306 | //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0; 307 | //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0; 308 | } else 309 | { 310 | if (beta_prev == 0.0 && eligible_set.coeff(j) == 1) 311 | { 312 | eligible_set.coeffRef(j) = 0; 313 | } 314 | } 315 | } 316 | } 317 | 318 | } 319 | 320 | //void next_beta(Vector &res, VectorXi &eligible) 321 | void next_beta(SparseVector &res, VectorXi &eligible) 322 | { 323 | 324 | // now update intercept if necessary 325 | update_intercept(); 326 | 327 | int j; 328 | double grad; 329 | 330 | 331 | // if no penalty multiplication factors specified 332 | if (penalty_factor_size < 1) 333 | { 334 | for (j = 0; j < nvars; ++j) 335 | { 336 | if (eligible(j)) 337 | { 338 | double beta_prev = beta.coeff( j ); //beta(j); 339 | 340 | // surprisingly it's faster to calculate this on an iteration-basis 341 | // and not pre-calculate it within each newton iteration.. 342 | if (Xsq(j) < 0.0) Xsq(j) = (datX.col(j).array().square()).matrix().mean(); 343 | 344 | Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs); 345 | 346 | grad = Xtr(j) + beta_prev * Xsq(j); 347 | //grad = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j) ; 348 | 349 | threshval = thresh_func(grad, lambda, gamma, lambda_ridge, Xsq(j)); 350 | 351 | // apply param limits 352 | if (threshval < limits(1, j)) threshval = limits(1, j); 353 | if (threshval > limits(0, j)) threshval = limits(0, j); 354 | 355 | // update residual if the coefficient changes after 356 | // thresholding. 357 | if (beta_prev != threshval) 358 | { 359 | if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev; 360 | beta.coeffRef(j) = threshval; 361 | resid_cur.array() -= (threshval - beta_prev) * datX.col(j).array() * weights.array(); 362 | 363 | // update eligible set if necessary 364 | if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1; 365 | //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0; 366 | //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0; 367 | } else 368 | { 369 | if (beta_prev == 0.0 && eligible_set.coeff(j) == 1) 370 | { 371 | eligible_set.coeffRef(j) = 0; 372 | } 373 | } 374 | } // end eligible set check 375 | } 376 | } else //if penalty multiplication factors are used 377 | { 378 | for (j = 0; j < nvars; ++j) 379 | { 380 | if (eligible(j)) 381 | { 382 | double beta_prev = beta.coeff( j ); //beta(j); 383 | 384 | // surprisingly it's faster to calculate this on an iteration-basis 385 | // and not pre-calculate it within each newton iteration.. 386 | if (Xsq(j) < 0.0) Xsq(j) = (datX.col(j).array().square()).matrix().mean(); 387 | 388 | Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs); 389 | 390 | grad = Xtr(j) + beta_prev * Xsq(j); 391 | //grad = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j); 392 | 393 | threshval = thresh_func(grad, penalty_factor(j) * lambda, gamma, penalty_factor(j) * lambda_ridge, Xsq(j)); 394 | 395 | // apply param limits 396 | if (threshval < limits(1,j)) threshval = limits(1,j); 397 | if (threshval > limits(0,j)) threshval = limits(0,j); 398 | 399 | // update residual if the coefficient changes after 400 | // thresholding. 401 | if (beta_prev != threshval) 402 | { 403 | if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev; 404 | beta.coeffRef(j) = threshval; 405 | resid_cur.array() -= (threshval - beta_prev) * datX.col(j).array() * weights.array(); 406 | 407 | // update eligible set if necessary 408 | if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1; 409 | //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0; 410 | //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0; 411 | } else 412 | { 413 | if (beta_prev == 0.0 && eligible_set.coeff(j) == 1) 414 | { 415 | eligible_set.coeffRef(j) = 0; 416 | } 417 | } 418 | } // end eligible set check 419 | } 420 | } 421 | 422 | } 423 | 424 | 425 | 426 | // Calculate ||v1 - v2||^2 when v1 and v2 are sparse 427 | static double diff_squared_norm(const SparseVector &v1, const SparseVector &v2) 428 | { 429 | const int n1 = v1.nonZeros(), n2 = v2.nonZeros(); 430 | const double *v1_val = v1.valuePtr(), *v2_val = v2.valuePtr(); 431 | const int *v1_ind = v1.innerIndexPtr(), *v2_ind = v2.innerIndexPtr(); 432 | 433 | double r = 0.0; 434 | int i1 = 0, i2 = 0; 435 | while(i1 < n1 && i2 < n2) 436 | { 437 | if(v1_ind[i1] == v2_ind[i2]) 438 | { 439 | double val = v1_val[i1] - v2_val[i2]; 440 | r += val * val; 441 | i1++; 442 | i2++; 443 | } else if(v1_ind[i1] < v2_ind[i2]) { 444 | r += v1_val[i1] * v1_val[i1]; 445 | i1++; 446 | } else { 447 | r += v2_val[i2] * v2_val[i2]; 448 | i2++; 449 | } 450 | } 451 | while(i1 < n1) 452 | { 453 | r += v1_val[i1] * v1_val[i1]; 454 | i1++; 455 | } 456 | while(i2 < n2) 457 | { 458 | r += v2_val[i2] * v2_val[i2]; 459 | i2++; 460 | } 461 | 462 | return r; 463 | } 464 | 465 | 466 | public: 467 | CoordGaussianDense(ConstGenericMatrix &datX_, 468 | ConstGenericVector &datY_, 469 | ConstGenericVector &weights_, 470 | ArrayXd &penalty_factor_, 471 | ConstGenericMatrix &limits_, 472 | std::string &penalty_, 473 | bool intercept_, 474 | double alpha_ = 1.0, 475 | double tol_ = 1e-6) : 476 | CoordBase > 477 | (datX_.rows(), datX_.cols(), tol_), 478 | datX(datX_.data(), datX_.rows(), datX_.cols()), 479 | datY(datY_.data(), datY_.size()), 480 | weights(weights_.data(), weights_.size()), 481 | resid_cur(datY_.array() * weights.array()), //assumes we start our beta estimate at 0 482 | penalty(penalty_), 483 | intercept(intercept_), 484 | penalty_factor(penalty_factor_), 485 | limits(limits_.data(), limits_.rows(), limits_.cols()), 486 | alpha(alpha_), 487 | penalty_factor_size(penalty_factor_.size()), 488 | XY((datX.transpose() * (datY.array() * weights.array()).matrix()) ), 489 | Xsq(datX_.cols()), Xtr(datX_.cols()) 490 | {} 491 | 492 | double get_lambda_zero() 493 | { 494 | if (penalty_factor_size > 0) 495 | { 496 | 497 | lambda0 = 0; 498 | for (int i = 0; i < penalty_factor.size(); ++i) 499 | { 500 | if (penalty_factor(i) != 0.0) 501 | { 502 | double valcur = std::abs(XY(i)) / penalty_factor(i); 503 | 504 | if (valcur > lambda0) lambda0 = valcur; 505 | } 506 | } 507 | } else 508 | { 509 | lambda0 = XY.cwiseAbs().maxCoeff(); 510 | } 511 | 512 | lambda0 /= ( alpha * 1.0 * double(nobs)); //std::pow(1e-6, 1.0/(99.0)); 513 | 514 | 515 | 516 | return lambda0; 517 | } 518 | 519 | // init() is a cold start for the first lambda 520 | void init(double lambda_, double gamma_) 521 | { 522 | 523 | set_threshold_func(); 524 | 525 | beta.setZero(); 526 | 527 | lambda = lambda_ * alpha; 528 | lambda_ridge = lambda_ * (1.0 - alpha); 529 | 530 | gamma = gamma_; 531 | 532 | eligible_set.setZero(); 533 | 534 | eligible_set.reserve(std::min(nobs, nvars)); 535 | beta.reserve(std::min(nobs, nvars)); 536 | 537 | nzero = 0; 538 | beta0 = 0.0; 539 | 540 | Xsq.fill(-1.0); 541 | 542 | weights_sum = weights.sum(); 543 | 544 | double cutoff; 545 | 546 | if (penalty == "lasso") 547 | { 548 | cutoff = 2.0 * lambda - lambda0; 549 | } else if (penalty == "mcp") 550 | { 551 | cutoff = lambda + gamma / (gamma - 1.0) * (lambda - lambda0); 552 | } else 553 | { 554 | cutoff = lambda + gamma / (gamma - 2.0) * (lambda - lambda0); 555 | } 556 | 557 | 558 | 559 | if (penalty_factor_size < 1) 560 | { 561 | for (int j = 0; j < nvars; ++j) if (std::abs(XY(j)) > (cutoff)) eligible_set.coeffRef(j) = 1; 562 | } else 563 | { 564 | for (int j = 0; j < nvars; ++j) if (std::abs(XY(j)) > (cutoff * penalty_factor(j))) eligible_set.coeffRef(j) = 1; 565 | } 566 | 567 | //beta.reserve( std::max(eligible_set.sum() + 10, std::min(nvars, nobs)) ); 568 | 569 | } 570 | // when computing for the next lambda, we can use the 571 | // current main_x, aux_z, dual_y and rho as initial values 572 | void init_warm(double lambda_, double gamma_) 573 | { 574 | lprev = lambda; 575 | lambda = lambda_ * alpha; 576 | lambda_ridge = lambda_ * (1.0 - alpha); 577 | gamma = gamma_; 578 | 579 | eligible_set.setZero(); 580 | 581 | eligible_set.reserve(std::min(nobs * 2, nvars)); 582 | 583 | nzero = 0; 584 | 585 | double cutoff; 586 | 587 | if (penalty == "lasso") 588 | { 589 | cutoff = 2.0 * lambda - lprev; 590 | } else if (penalty == "mcp") 591 | { 592 | cutoff = lambda + gamma / (gamma - 1.0) * (lambda - lprev); 593 | } else 594 | { 595 | cutoff = lambda + gamma / (gamma - 2.0) * (lambda - lprev); 596 | } 597 | 598 | 599 | 600 | if (penalty_factor_size < 1) 601 | { 602 | for (int j = 0; j < nvars; ++j) if (std::abs(Xtr(j)) > std::pow(nobs, 0.85) * (cutoff)) eligible_set.coeffRef(j) = 1; 603 | } else 604 | { 605 | for (int j = 0; j < nvars; ++j) if (std::abs(Xtr(j)) > (std::pow(nobs, 0.85) * cutoff * penalty_factor(j))) eligible_set.coeffRef(j) = 1; 606 | } 607 | 608 | //beta.reserve( std::max(eligible_set.sum() + 10, std::min(nvars, nobs)) ); 609 | } 610 | 611 | 612 | int solve(int maxit) 613 | { 614 | //int i; 615 | 616 | int current_iter = 0; 617 | 618 | // run once through all variables 619 | //current_iter++; 620 | //beta_prev = beta; 621 | //ineligible_set.fill(1); 622 | 623 | //update_beta(ineligible_set); 624 | 625 | while(current_iter < maxit) 626 | { 627 | while(current_iter < maxit) 628 | { 629 | current_iter++; 630 | beta_prev = beta; 631 | 632 | update_beta(eligible_set); 633 | 634 | if(converged()) break; 635 | } 636 | 637 | current_iter++; 638 | beta_prev = beta; 639 | ineligible_set.fill(1); 640 | 641 | for (InIterVeci i_(eligible_set); i_; ++i_) 642 | { 643 | ineligible_set(i_.index()) = 0; 644 | } 645 | 646 | update_beta(ineligible_set); 647 | 648 | if(converged()) break; 649 | } 650 | 651 | // force zeros to be actual zeros 652 | beta.prune(0.0); 653 | nzero = beta.nonZeros(); 654 | 655 | /* 656 | for (int j = 0; j < nvars; ++j) 657 | { 658 | if (beta(j) != 0) 659 | ++nzero; 660 | } 661 | */ 662 | 663 | 664 | loss = compute_loss(); 665 | 666 | // print_footer(); 667 | 668 | return current_iter; 669 | } 670 | 671 | virtual double get_intercept() { return beta0; } 672 | }; 673 | 674 | 675 | 676 | #endif // COORDGAUSSIANDENSE_H 677 | -------------------------------------------------------------------------------- /R/ordinis_call.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' Fitting Lasso-penalized Using the Coordinate Descent Algorithm 4 | #' 5 | #' @description ordinis provides estimation of linear models with the lasso penalty 6 | #' 7 | #' 8 | #' @param x The design matrix 9 | #' @param y The response vector 10 | #' @param weights a vector of weights of length equal to length of \code{y}. \code{weights} are NOT standardized or scaled; the user must 11 | #' do this if desired 12 | #' @param offset A vector of length \code{nobs} that is included in the linear predictor 13 | #' @param penalty a string indicating which penalty to use. \code{"lasso"}, \code{"MCP"}, and \code{"SCAD"} 14 | #' are available 15 | #' @param lambda A user provided sequence of \eqn{\lambda}. If set to 16 | #' \code{NULL}, the program will calculate its own sequence 17 | #' according to \code{nlambda} and \code{lambda_min_ratio}, 18 | #' which starts from \eqn{\lambda_0} (with this 19 | #' \eqn{\lambda} all coefficients will be zero) and ends at 20 | #' \code{lambda0 * lambda_min_ratio}, containing 21 | #' \code{nlambda} values equally spaced in the log scale. 22 | #' It is recommended to set this parameter to be \code{NULL} 23 | #' (the default). 24 | #' @param alpha mixing parameter between 0 and 1 for elastic net. \code{alpha=1} is for the lasso, \code{alpha=0} is for ridge 25 | #' @param gamma parameter for MCP/SCAD. Defaults to the recommended values from the papers corresponding to each penalty 26 | #' @param penalty.factor a vector with length equal to the number of columns in x to be multiplied by lambda. by default 27 | #' it is a vector of 1s. \code{penalty.factor} is NOT scaled 28 | #' @param upper.limits a vector of length \code{ncol(x)} of upper limits for each coefficient. Can be a single value, which will 29 | #' then be applied for each coefficient. Must be non-negative. 30 | #' @param lower.limits a vector of length \code{ncol(x)} of lower limits for each coefficient. Can be a single value, which will 31 | #' then be applied for each coefficient. Cannot be greater than 0. 32 | #' @param nlambda Number of values in the \eqn{\lambda} sequence. Only used 33 | #' when the program calculates its own \eqn{\lambda} 34 | #' (by setting \code{lambda = NULL}). 35 | #' @param lambda.min.ratio Smallest value in the \eqn{\lambda} sequence 36 | #' as a fraction of \eqn{\lambda_0}. See 37 | #' the explanation of the \code{lambda} 38 | #' argument. This parameter is only used when 39 | #' the program calculates its own \eqn{\lambda} 40 | #' (by setting \code{lambda = NULL}). The default 41 | #' value is the same as \pkg{glmnet}: 0.001 if 42 | #' \code{nrow(x) >= ncol(x)} and 0.05 otherwise. 43 | #' @param family family of underlying model. Only "gaussian" for continuous responses is available now 44 | #' @param intercept Whether to fit an intercept in the model. Default is \code{TRUE}. 45 | #' @param standardize Whether to standardize the design matrix before 46 | #' fitting the model. Default is \code{TRUE}. Fitted coefficients 47 | #' are always returned on the original scale. 48 | #' @param dfmax Maximum number of variables allowed in the model 49 | #' @param maxit Maximum number of coordinate descent iterations. 50 | #' @param tol convergence tolerance parameter. 51 | #' @param maxit.irls Maximum number of coordinate descent iterations. 52 | #' @param tol.irls convergence tolerance parameter. 53 | #' 54 | #' @examples 55 | #' set.seed(123) 56 | #' n = 100 57 | #' p = 1000 58 | #' b = c(runif(10, min = 0.1, max = 1), rep(0, p - 10)) 59 | #' x = matrix(rnorm(n * p, sd = 1.5), n, p) 60 | #' y = drop(x %*% b) + rnorm(n) 61 | #' 62 | #' 63 | #' ## fit lasso model with 100 tuning parameter values 64 | #' res <- ordinis(x, y) 65 | #' 66 | #' y2 <- 1 * (y > 0) 67 | #' y3 <- exp(y) 68 | #' 69 | #' resb <- ordinis(x, y2, family = "binomial") 70 | #' 71 | #' @export 72 | ordinis <- function(x, 73 | y, 74 | weights = rep(1, NROW(y)), 75 | offset = NULL, 76 | family = NULL, 77 | penalty = c("lasso", "alasso", "mcp", "scad"), 78 | lambda = numeric(0), 79 | alpha = 1, 80 | gamma = ifelse(penalty == "scad", 3.7, 1.4), 81 | penalty.factor = NULL, 82 | upper.limits = rep(Inf, NCOL(x)), 83 | lower.limits = rep(-Inf, NCOL(x)), 84 | nlambda = 100L, 85 | lambda.min.ratio = NULL, 86 | intercept = TRUE, 87 | standardize = TRUE, 88 | dfmax = nvars, 89 | maxit = NULL, 90 | tol = NULL, 91 | maxit.irls = 25L, 92 | tol.irls = 1e-3 93 | ) 94 | { 95 | 96 | x <- as.matrix(x) 97 | y <- as.numeric(y) 98 | 99 | n <- nrow(x) 100 | p <- nvars <- ncol(x) 101 | 102 | intercept <- as.logical(intercept) 103 | standardize <- as.logical(standardize) 104 | penalty <- match.arg(penalty) 105 | 106 | if (is.null(family) || is.character(family) & (family == "gaussian" | family == "binomial")) 107 | { 108 | glm_fam <- FALSE 109 | if (is.null(family)) family <- "gaussian" 110 | 111 | if (is.null(maxit)) 112 | { 113 | if (family == "gaussian") 114 | { 115 | maxit <- 5000L 116 | } else 117 | { 118 | maxit <- 500L 119 | } 120 | } 121 | 122 | if (is.null(tol)) 123 | { 124 | if (family == "gaussian") 125 | { 126 | tol <- 1e-4 127 | } else 128 | { 129 | tol <- 1e-3 130 | } 131 | } 132 | 133 | } else 134 | { 135 | glm_fam <- TRUE 136 | message("Use of glm family() functions still experimental. Does not always work yet") 137 | if (is.character(family)) 138 | { 139 | family <- get(family, mode = "function", envir = parent.frame()) 140 | } 141 | if (is.function(family)) 142 | { 143 | family <- family() 144 | } 145 | if (is.null(family$family)) 146 | { 147 | print(family) 148 | stop("'family' not recognized") 149 | } 150 | 151 | if (is.null(maxit)) 152 | { 153 | if (family$family == "gaussian") 154 | { 155 | maxit <- 5000L 156 | } else 157 | { 158 | maxit <- 1500L 159 | } 160 | } 161 | 162 | if (is.null(tol)) 163 | { 164 | if (family$family == "gaussian") 165 | { 166 | tol <- 1e-4 167 | } else 168 | { 169 | tol <- 1e-3 170 | } 171 | } 172 | } 173 | 174 | if (!is.null(dim(x))) 175 | { 176 | vnames <- colnames(x) 177 | 178 | if (is.null(vnames)) 179 | { 180 | vnames <- paste0("V", 1:p) 181 | } 182 | } 183 | 184 | if (!is.null(offset)) 185 | { 186 | if (length(offset) != n) stop("offset must be of same length as response") 187 | } else 188 | { 189 | offset <- rep(0, n) 190 | } 191 | 192 | if (!glm_fam) 193 | { 194 | if (family == "binomial") 195 | { 196 | if (length(unique(y)) != 2) stop("y must only take 2 values") 197 | 198 | #if (penalty != "lasso") warning("non-lasso penalties for non-gaussian responses are experimental") 199 | } 200 | } 201 | 202 | ## taken from glmnet 203 | if(any(lower.limits>0)) { stop("Lower limits should be non-positive") } 204 | if(any(upper.limits<0)) { stop("Upper limits should be non-negative") } 205 | lower.limits[lower.limits == -Inf] <- -1e99 206 | upper.limits[upper.limits == Inf] <- 1e99 207 | if(length(lower.limits) < nvars) 208 | { 209 | if(length(lower.limits)==1) lower.limits <- rep(lower.limits, nvars) else stop("Require length 1 or nvars lower.limits") 210 | } else 211 | { 212 | lower.limits <- lower.limits[seq(nvars)] 213 | } 214 | if(length(upper.limits) < nvars) 215 | { 216 | if(length(upper.limits)==1) upper.limits <- rep(upper.limits, nvars) else stop("Require length 1 or nvars upper.limits") 217 | } else 218 | { 219 | upper.limits <- upper.limits[seq(nvars)] 220 | } 221 | 222 | if (n != NROW(y)) 223 | { 224 | stop("number of rows in x not equal to length of y") 225 | } 226 | 227 | if (NROW(weights) != NROW(y)) 228 | { 229 | stop("length of weights not equal to length of y") 230 | } 231 | 232 | if (is.null(penalty.factor)) 233 | { 234 | penalty.factor <- numeric(0) 235 | } else 236 | { 237 | if (length(penalty.factor) != p) 238 | { 239 | stop("penalty.factor must be same length as number of columns in x") 240 | } 241 | } 242 | 243 | lambda_val <- sort(as.numeric(lambda), decreasing = TRUE) 244 | 245 | if(any(lambda_val <= 0)) 246 | { 247 | stop("lambda must be positive") 248 | } 249 | 250 | 251 | if(nlambda[1] <= 0) 252 | { 253 | stop("nlambda must be a positive integer") 254 | } 255 | 256 | if(is.null(lambda.min.ratio)) 257 | { 258 | lmr_val <- ifelse(nrow(x) < ncol(x), 0.05, 0.001) 259 | } else 260 | { 261 | lmr_val <- as.numeric(lambda.min.ratio) 262 | } 263 | 264 | if(lmr_val >= 1 | lmr_val <= 0) 265 | { 266 | stop("lambda.min.ratio must be within (0, 1)") 267 | } 268 | 269 | lambda <- lambda_val 270 | nlambda <- as.integer(nlambda[1]) 271 | lambda.min.ratio <- lmr_val 272 | 273 | if (alpha > 1 | alpha < 0) stop("alpha must be between 0 and 1") 274 | 275 | if(maxit <= 0) 276 | { 277 | stop("maxit should be positive") 278 | } 279 | if(tol < 0) 280 | { 281 | stop("tol should be nonnegative") 282 | } 283 | 284 | maxit <- as.integer(maxit[1]) 285 | tol <- as.double(tol[1]) 286 | maxit.irls <- as.integer(maxit.irls[1]) 287 | tol.irls <- as.double(tol.irls[1]) 288 | alpha <- as.double(alpha[1]) 289 | gamma <- as.double(gamma[1]) 290 | penalty <- as.character(penalty[1]) 291 | dfmax <- as.integer(dfmax[1]) 292 | 293 | 294 | penalty_orig <- penalty 295 | 296 | if (penalty == "alasso") 297 | { 298 | penalty <- "lasso" 299 | } 300 | 301 | if (glm_fam) 302 | { 303 | opts <- list(maxit = maxit, 304 | tol = tol, 305 | alpha = alpha, 306 | gamma = gamma, 307 | penalty = penalty, 308 | maxit.irls = maxit.irls, 309 | tol.irls = tol.irls, 310 | dfmax = dfmax, 311 | variance = family$variance, 312 | mu_eta = family$mu.eta, 313 | linkinv = family$linkinv, 314 | linkfun = family$linkfun, 315 | dev_resids = family$dev.resids, 316 | valideta = family$valideta, 317 | validmu = family$validmu) 318 | } else 319 | { 320 | opts <- list(maxit = maxit, 321 | tol = tol, 322 | alpha = alpha, 323 | gamma = gamma, 324 | penalty = penalty, 325 | maxit.irls = maxit.irls, 326 | tol.irls = tol.irls, 327 | dfmax = dfmax, 328 | variance = numeric(0), 329 | mu_eta = numeric(0), 330 | linkinv = numeric(0), 331 | linkfun = numeric(0), 332 | dev_resids = numeric(0), 333 | valideta = numeric(0), 334 | validmu = numeric(0)) 335 | } 336 | 337 | if (gamma <= 1) stop("gamma must be greater than 1") 338 | 339 | if (glm_fam || family == "binomial") 340 | { 341 | 342 | res <- coord_ordinis_dense_glm_cpp(x, 343 | y, 344 | weights, 345 | drop(offset), 346 | lambda, 347 | penalty.factor, 348 | rbind(upper.limits, lower.limits), 349 | nlambda, 350 | lambda.min.ratio, 351 | standardize, 352 | intercept, 353 | glm_fam, 354 | opts 355 | ) 356 | res$beta <- res$beta[, 1:res$last, drop = FALSE] 357 | 358 | res$beta <- as(res$beta, "sparseMatrix") 359 | 360 | res$deviance <- res$deviance[1:res$last] 361 | } else if (family == "gaussian") 362 | { 363 | res <- coord_ordinis_dense_cpp(x, 364 | y - drop(offset), 365 | weights, 366 | lambda, 367 | penalty.factor, 368 | rbind(upper.limits, lower.limits), 369 | nlambda, 370 | lambda.min.ratio, 371 | standardize, 372 | intercept, 373 | opts 374 | ) 375 | res$beta <- res$beta[, 1:res$last, drop = FALSE] 376 | 377 | res$beta <- as(res$beta, "sparseMatrix") 378 | 379 | res$fitted <- as.matrix(cbind(1, x) %*% res$beta) 380 | res$resid <- matrix(rep(y, ncol(res$beta)), ncol = ncol(res$beta) ) - res$fitted 381 | res$loss <- colSums(res$resid ^ 2) 382 | 383 | } 384 | 385 | 386 | if (penalty_orig == "alasso") 387 | { 388 | penwts <- as.vector(unname(1 / abs(res$beta[-1,ncol(res$beta)]))) 389 | 390 | if (length(penalty.factor)) 391 | { 392 | penalty.factor <- penalty.factor * penwts 393 | } else 394 | { 395 | penalty.factor <- penwts 396 | } 397 | 398 | 399 | if (glm_fam || family == "binomial") 400 | { 401 | 402 | res <- coord_ordinis_dense_glm_cpp(x, 403 | y, 404 | weights, 405 | drop(offset), 406 | lambda, 407 | penalty.factor, 408 | rbind(upper.limits, lower.limits), 409 | nlambda, 410 | lambda.min.ratio, 411 | standardize, 412 | intercept, 413 | glm_fam, 414 | opts 415 | ) 416 | res$beta <- res$beta[, 1:res$last, drop = FALSE] 417 | 418 | res$beta <- as(res$beta, "sparseMatrix") 419 | 420 | res$deviance <- res$deviance[1:res$last] 421 | } else if (family == "gaussian") 422 | { 423 | res <- coord_ordinis_dense_cpp(x, 424 | y - drop(offset), 425 | weights, 426 | lambda, 427 | penalty.factor, 428 | rbind(upper.limits, lower.limits), 429 | nlambda, 430 | lambda.min.ratio, 431 | standardize, 432 | intercept, 433 | opts 434 | ) 435 | res$beta <- res$beta[, 1:res$last, drop = FALSE] 436 | 437 | res$beta <- as(res$beta, "sparseMatrix") 438 | 439 | res$fitted <- as.matrix(cbind(1, x) %*% res$beta) 440 | res$resid <- matrix(rep(y, ncol(res$beta)), ncol = ncol(res$beta) ) - res$fitted 441 | res$loss <- colSums(res$resid ^ 2) 442 | 443 | } 444 | } 445 | 446 | rownames(res$beta) <- c("(Intercept)", vnames) 447 | 448 | res$niter <- res$niter[1:res$last] 449 | res$lambda <- res$lambda[1:res$last] 450 | #res$losses <- res$losses[, 1:res$last, drop = FALSE] 451 | #res$losses.iter <- res$losses.iter[, 1:res$last, drop = FALSE] 452 | 453 | res$nzero <- colSums(res$beta[-1,,drop=FALSE] != 0) 454 | 455 | res$glm_fam <- glm_fam 456 | res$family <- family 457 | res$penalty <- penalty 458 | res$penalty.factor <- penalty.factor 459 | res$standardize <- standardize 460 | res$intercept <- intercept 461 | res$nobs <- n 462 | res$nvars <- p 463 | 464 | if (!glm_fam) 465 | { 466 | class2 <- switch(family, 467 | "gaussian" = "cdgaussian", 468 | "binomial" = "cdbinomial") 469 | } else 470 | { 471 | class2 <- "glm_ordinis" 472 | } 473 | 474 | 475 | class(res) <- c("ordinis", class2) 476 | res 477 | } 478 | 479 | 480 | 481 | 482 | #' CV Fitting for A Lasso Model Using the Coordinate Descent Algorithm 483 | #' 484 | #' @description Cross validation for linear models with the lasso penalty 485 | #' 486 | #' where \eqn{n} is the sample size and \eqn{\lambda} is a tuning 487 | #' parameter that controls the sparsity of \eqn{\beta}. 488 | #' 489 | #' @param x The design matrix 490 | #' @param y The response vector 491 | #' @param lambda A user provided sequence of \eqn{\lambda}. If set to 492 | #' \code{NULL}, the program will calculate its own sequence 493 | #' according to \code{nlambda} and \code{lambda_min_ratio}, 494 | #' which starts from \eqn{\lambda_0} (with this 495 | #' \eqn{\lambda} all coefficients will be zero) and ends at 496 | #' \code{lambda0 * lambda_min_ratio}, containing 497 | #' \code{nlambda} values equally spaced in the log scale. 498 | #' It is recommended to set this parameter to be \code{NULL} 499 | #' (the default). 500 | #' @param gamma bandwidth for MCP/SCAD 501 | #' @param type.measure measure to evaluate for cross-validation. The default is \code{type.measure = "deviance"}, 502 | #' which uses squared-error for gaussian models (a.k.a \code{type.measure = "mse"} there), deviance for logistic 503 | #' regression. \code{type.measure = "class"} applies to binomial only. \code{type.measure = "auc"} is for two-class logistic 504 | #' regression only. \code{type.measure = "mse"} or \code{type.measure = "mae"} (mean absolute error) can be used by all models; 505 | #' they measure the deviation from the fitted mean to the response. 506 | #' @param nfolds number of folds for cross-validation. default is 10. 3 is smallest value allowed. 507 | #' @param foldid an optional vector of values between 1 and nfold specifying which fold each observation belongs to. 508 | #' @param grouped Like in \pkg{glmnet}, this is an experimental argument, with default \code{TRUE}, and can be ignored by most users. 509 | #' For all models, this refers to computing nfolds separate statistics, and then using their mean and estimated standard 510 | #' error to describe the CV curve. If \code{grouped = FALSE}, an error matrix is built up at the observation level from the 511 | #' predictions from the \code{nfold} fits, and then summarized (does not apply to \code{type.measure = "auc"}). 512 | #' @param keep If \code{keep = TRUE}, a prevalidated list of arrasy is returned containing fitted values for each observation 513 | #' and each value of lambda for each model. This means these fits are computed with this observation and the rest of its 514 | #' fold omitted. The folid vector is also returned. Default is \code{keep = FALSE} 515 | #' @param parallel If TRUE, use parallel foreach to fit each fold. Must register parallel before hand, such as \pkg{doMC}. 516 | #' @param ... other parameters to be passed to \code{"ordinis"} function 517 | #' 518 | #' @examples set.seed(123) 519 | #' n = 100 520 | #' p = 1000 521 | #' b = c(runif(10, min = 0.2, max = 1), rep(0, p - 10)) 522 | #' x = matrix(rnorm(n * p, sd = 3), n, p) 523 | #' y = drop(x %*% b) + rnorm(n) 524 | #' 525 | #' ## fit lasso model with 100 tuning parameter values 526 | #' res <- cv.ordinis(x, y) 527 | #' 528 | #' 529 | #' @export 530 | cv.ordinis <- function(x, 531 | y, 532 | lambda = numeric(0), 533 | gamma = 3.7, 534 | type.measure = c("mse", "deviance", "class", "auc", "mae"), 535 | nfolds = 10, 536 | foldid = NULL, 537 | grouped = TRUE, 538 | keep = FALSE, 539 | parallel = FALSE, 540 | ...) 541 | { 542 | if (missing(type.measure)) 543 | type.measure = "default" 544 | else type.measure = match.arg(type.measure) 545 | if (length(lambda) == 1 && length(lambda) < 2) 546 | stop("Need more than one value of lambda for cv.ordinis") 547 | N = nrow(x) 548 | y = drop(y) 549 | 550 | two.call = match.call(expand.dots = TRUE) 551 | which = match(c("type.measure", "nfolds", "foldid"), names(two.call), FALSE) 552 | if (any(which)) 553 | two.call = two.call[-which] 554 | two.call[[1]] = as.name("ordinis") 555 | two.object = ordinis(x, 556 | y, 557 | lambda = lambda, 558 | gamma = gamma, ...) 559 | two.object$call = two.call 560 | 561 | 562 | nz = two.object$nzero 563 | 564 | if (is.null(foldid)) 565 | foldid = sample(rep(seq(nfolds), length = N)) 566 | else nfolds = max(foldid) 567 | if (nfolds < 3) 568 | stop("nfolds must be bigger than 3; nfolds=10 recommended") 569 | outlist = as.list(seq(nfolds)) 570 | if (parallel) { 571 | outlist = foreach(i = seq(nfolds), .packages = c("ordinis")) %dopar% 572 | { 573 | which = foldid == i 574 | if (is.matrix(y)) 575 | y_sub = y[!which, ] 576 | else y_sub = y[!which] 577 | 578 | ordinis(x[!which, , drop = FALSE], 579 | y_sub, 580 | lambda = lambda, 581 | gamma = gamma, 582 | ...) 583 | } 584 | } 585 | else { 586 | for (i in seq(nfolds)) { 587 | which = foldid == i 588 | if (is.matrix(y)) 589 | y_sub = y[!which, ] 590 | else y_sub = y[!which] 591 | 592 | outlist[[i]] = ordinis(x[!which, , drop = FALSE], 593 | y_sub, 594 | lambda = lambda, 595 | gamma = gamma, ...) 596 | } 597 | } 598 | 599 | min.idx <- min(min(sapply(outlist, function(obj) obj$last)), two.object$last) 600 | 601 | two.object$beta <- two.object$beta[, 1:min.idx, drop = FALSE] 602 | two.object$niter <- two.object$niter[1:min.idx] 603 | two.object$lambda <- two.object$lambda[1:min.idx] 604 | two.object$losses <- two.object$losses[1:min.idx] 605 | two.object$losses.iter <- two.object$losses.iter[, 1:min.idx, drop = FALSE] 606 | two.object$nzero <- two.object$nzero[1:min.idx] 607 | 608 | nz <- two.object$nzero 609 | 610 | for (i in 1:length(outlist)) 611 | { 612 | outlist[[i]]$beta <- outlist[[i]]$beta[, 1:min.idx, drop = FALSE] 613 | outlist[[i]]$niter <- outlist[[i]]$niter[1:min.idx] 614 | outlist[[i]]$lambda <- outlist[[i]]$lambda[1:min.idx] 615 | outlist[[i]]$losses <- outlist[[i]]$losses[1:min.idx] 616 | outlist[[i]]$losses.iter <- outlist[[i]]$losses.iter[, 1:min.idx, drop = FALSE] 617 | outlist[[i]]$nzero <- outlist[[i]]$nzero[1:min.idx] 618 | } 619 | 620 | fun <- paste("cv", class(two.object)[[2]], sep = ".") 621 | cvstuff <- do.call(fun, 622 | list(outlist, 623 | two.object$lambda, 624 | x, 625 | y, 626 | foldid, 627 | type.measure, 628 | grouped, 629 | keep)) 630 | cvm <- cvstuff$cvm 631 | cvsd <- cvstuff$cvsd 632 | cvname <- cvstuff$name 633 | 634 | out <- list(lambda = two.object$lambda, 635 | cvm = cvm, 636 | cvsd = cvsd, 637 | cvup = cvm + cvsd, 638 | cvlo = cvm - cvsd, 639 | name = cvname, 640 | nzero = nz, 641 | ordinis.fit = two.object) 642 | if(keep)out=c(out,list(fit.preval=cvstuff$fit.preval,foldid=foldid)) 643 | lamin=if(type.measure=="auc")getmin(two.object$lambda,-cvm,cvsd) 644 | else getmin(two.object$lambda,cvm,cvsd) 645 | obj=c(out,as.list(lamin)) 646 | class(obj)="cv.ordinis" 647 | obj 648 | 649 | } 650 | 651 | 652 | 653 | 654 | 655 | -------------------------------------------------------------------------------- /src/CoordLogisticDense.h: -------------------------------------------------------------------------------- 1 | #ifndef COORDLOGISTICDENSE_H 2 | #define COORDLOGISTICDENSE_H 3 | 4 | #include "CoordBase.h" 5 | #include "utils.h" 6 | 7 | // minimize 1/2 * ||y - X * beta||^2 + lambda * ||beta||_1 8 | // 9 | // In ADMM form, 10 | // minimize f(x) + g(z) 11 | // s.t. x - z = 0 12 | // 13 | // x => beta 14 | // z => -X * beta 15 | // A => X 16 | // b => y 17 | // f(x) => 1/2 * ||Ax - b||^2 18 | // g(z) => lambda * ||z||_1 19 | class CoordLogisticDense: public CoordBase > //Eigen::SparseVector 20 | { 21 | protected: 22 | typedef float Scalar; 23 | typedef double Double; 24 | typedef Eigen::Matrix Matrix; 25 | typedef Eigen::Matrix Vector; 26 | typedef Eigen::Map MapMat; 27 | typedef Eigen::Map MapVec; 28 | typedef const Eigen::Ref ConstGenericMatrix; 29 | typedef const Eigen::Ref ConstGenericVector; 30 | typedef Eigen::SparseMatrix SpMat; 31 | typedef Eigen::SparseVector SparseVector; 32 | typedef Eigen::SparseVector SparseVectori; 33 | 34 | typedef SparseVector::InnerIterator InIterVec; 35 | typedef SparseVectori::InnerIterator InIterVeci; 36 | 37 | MapMat datX; // data matrix 38 | MapVec datY; // response vector 39 | MapVec weights; // weight vector 40 | MapVec offset; // offset vector 41 | 42 | Scalar lambda, lambda_ridge, gamma; // L1 penalty 43 | 44 | double threshval; 45 | VectorXd resid_cur, xbeta_cur, p, W, weightssqrt, z; 46 | 47 | std::string penalty; 48 | ArrayXd penalty_factor; // penalty multiplication factors 49 | bool intercept; 50 | MapMat limits; 51 | double alpha; 52 | int maxit_irls; 53 | double tol_irls; 54 | int penalty_factor_size; 55 | 56 | VectorXd XY; // X'Y 57 | VectorXd Xsq; // colSums(X^2) 58 | VectorXd Xtr; // X'resid 59 | 60 | Scalar lambda0; // minimum lambda to make coefficients all zero 61 | 62 | double lprev; 63 | 64 | double beta0; // intercept 65 | 66 | double weights_sum, resids_sum; 67 | 68 | // pointer we will set to one of the thresholding functions 69 | typedef double (*thresh_func_ptr)(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom); 70 | 71 | thresh_func_ptr thresh_func; 72 | 73 | /* 74 | static void soft_threshold(SparseVector &res, const Vector &vec, const double &penalty) 75 | { 76 | int v_size = vec.size(); 77 | res.setZero(); 78 | res.reserve(v_size); 79 | 80 | const double *ptr = vec.data(); 81 | for(int i = 0; i < v_size; i++) 82 | { 83 | if(ptr[i] > penalty) 84 | res.insertBack(i) = ptr[i] - penalty; 85 | else if(ptr[i] < -penalty) 86 | res.insertBack(i) = ptr[i] + penalty; 87 | } 88 | } 89 | */ 90 | 91 | bool converged_irls() 92 | { 93 | //return (stopRule(beta, beta_prev_irls, tol_irls)); 94 | 95 | if (std::abs(deviance - deviance_prev) / (0.1 + std::abs(deviance)) < tol_irls) 96 | { 97 | return true; 98 | } else 99 | { 100 | return false; 101 | } 102 | 103 | } 104 | 105 | void initialize_params() 106 | { 107 | double ymean = (weights.array() * datY.array()).matrix().mean(); 108 | 109 | if (intercept) 110 | { 111 | beta0 = std::log(ymean / (1.0 - ymean)); 112 | } else 113 | { 114 | beta0 = 0.0; 115 | } 116 | 117 | xbeta_cur.array() = offset.array() + beta0; 118 | 119 | // calculate null deviance 120 | null_dev = (-1.0 * datY.array() * log(ymean) - (1.0 - datY.array()) * std::log(1.0 - ymean)).sum(); 121 | } 122 | 123 | void update_quadratic_approx() 124 | { 125 | // calculate mean function 126 | p = 1.0 / (1.0 + ((-1.0 * xbeta_cur.array()).exp())); 127 | 128 | // construct weights and multiply by user-specified weights 129 | W = weights.array() * p.array() * (1.0 - p.array()); 130 | 131 | // make sure no weights are too small 132 | for (int k = 0; k < nobs; ++k) 133 | { 134 | if (W(k) < 1e-5) 135 | { 136 | W(k) = 1e-5; 137 | } 138 | } 139 | 140 | // here we update the residuals and multiply by user-specified weights, which 141 | // will be multiplied by X. ie X'resid_cur = X'Wz, where z is the working response from IRLS 142 | resid_cur = weights.array() * (datY.array() - p.array()); // + xbeta_cur.array() * W.array().sqrt(); 143 | 144 | //Xsq = (W.array().sqrt().matrix().asDiagonal() * datX).array().square().colwise().sum(); 145 | 146 | // we will check this in later iterations 147 | // and only update when Xsq(j) < 0 148 | // (it's always positive, so this way we can check and avoid re-calculating unnecessarily) 149 | Xsq.fill(-1.0); 150 | 151 | // this is needed for intercept updates 152 | weights_sum = W.sum(); 153 | 154 | // update deviance 155 | deviance = 0.0; 156 | for (int ii = 0; ii < nobs; ++ii) 157 | { 158 | if (datY(ii) == 1) 159 | { 160 | if (p(ii) > 1e-5) 161 | { 162 | deviance -= std::log(p(ii)); 163 | } else 164 | { 165 | // don't divide by zero 166 | deviance -= std::log(1e-5); 167 | } 168 | 169 | } else 170 | { 171 | if (p(ii) <= 1.0 - 1e-5) 172 | { 173 | deviance -= std::log((1.0 - p(ii))); 174 | } else 175 | { 176 | // don't divide by zero 177 | deviance -= std::log(1.0 - 1e-5); 178 | } 179 | 180 | } 181 | } 182 | } 183 | 184 | void update_intercept() 185 | { 186 | 187 | if (intercept) 188 | { 189 | resids_sum = (resid_cur).sum(); 190 | 191 | double beta0_delta = resids_sum / weights_sum; 192 | 193 | beta0 += beta0_delta; 194 | 195 | // update the (weighted) working residual! 196 | resid_cur.array() -= beta0_delta * W.array(); 197 | 198 | // update the linear predictor! 199 | xbeta_cur.array() += beta0_delta; 200 | } 201 | } 202 | 203 | double compute_loss() 204 | { 205 | //double sum_squares = 0.5 * (resid_cur.array().square().sum()) / double(nobs); 206 | double sum_squares = 0.5 * (resid_cur.array().square().sum()); 207 | double penalty_part = 0.0; 208 | 209 | /* 210 | if (penalty_factor_size < 1) 211 | { 212 | penalty_part = lambda * beta.array().abs().matrix().sum(); 213 | } else 214 | { 215 | penalty_part = lambda * (beta.array() * penalty_factor.array()).abs().matrix().sum(); 216 | } 217 | */ 218 | 219 | return (sum_squares + penalty_part); 220 | } 221 | 222 | static double soft_threshold(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom) 223 | { 224 | 225 | if (std::abs(value) <= penalty) 226 | return(0.0); 227 | else if (value > penalty) 228 | return( (value - penalty) / (denom + denom * l2) ); 229 | else 230 | return( (value + penalty) / (denom + denom * l2) ); 231 | 232 | /* // this ordering is slower for high-dimensional problems 233 | if(value > penalty) 234 | return( (value - penalty) / (denom + l2) ); 235 | else if(value < -penalty) 236 | return( (value + penalty) / (denom + l2) ); 237 | else 238 | return(0.0); 239 | */ 240 | } 241 | 242 | static double scad_threshold(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom) 243 | { 244 | double val_abs = std::abs(value); 245 | 246 | if (val_abs <= penalty) 247 | return(0.0); 248 | else if (val_abs <= penalty * (1.0 + l2) + penalty) 249 | { 250 | if(value > penalty) 251 | return((value - penalty) / ( denom + denom * l2 )); 252 | else 253 | return((value + penalty) / ( denom + denom * l2 )); 254 | } else if (val_abs <= gamma * penalty * (1.0 + l2)) 255 | { 256 | if ((gamma - 1.0) * value > gamma * penalty) 257 | return( (value - gamma * penalty / (gamma - 1.0)) / (denom * ( 1.0 - 1.0 / (gamma - 1.0) + l2 )) ); 258 | else 259 | return( (value + gamma * penalty / (gamma - 1.0)) / (denom * ( 1.0 - 1.0 / (gamma - 1.0) + l2 )) ); 260 | } else 261 | { 262 | return(value / (denom + denom * l2)); 263 | } 264 | 265 | } 266 | 267 | static double mcp_threshold(double &value, const double &penalty, const double &gamma, const double &l2, const double &denom) 268 | { 269 | double val_abs = std::abs(value); 270 | 271 | if (val_abs <= penalty) 272 | return(0.0); 273 | else if (val_abs <= gamma * penalty * (1.0 + l2)) 274 | { 275 | if(value > penalty) 276 | return((value - penalty) / ( denom * (1.0 + l2 - 1.0 / gamma) )); 277 | else 278 | return((value + penalty) / ( denom * (1.0 + l2 - 1.0 / gamma) )); 279 | } else 280 | return(value / (denom + denom * l2)); 281 | 282 | /* 283 | if (std::abs(value) > gamma * penalty * (1.0 + l2)) 284 | return(value / (denom + denom * l2)); 285 | else if(value > penalty) 286 | return((value - penalty) / ( denom * (1.0 + l2 - 1.0 / gamma) )); 287 | else if(value < -penalty) 288 | return((value + penalty) / ( denom * (1.0 + l2 - 1.0 / gamma) )); 289 | else 290 | return(0.0); 291 | */ 292 | } 293 | 294 | 295 | void set_threshold_func() 296 | { 297 | if (penalty == "lasso") 298 | { 299 | thresh_func = &CoordLogisticDense::soft_threshold; 300 | } else if (penalty == "mcp") 301 | { 302 | thresh_func = &CoordLogisticDense::mcp_threshold; 303 | } else 304 | { 305 | thresh_func = &CoordLogisticDense::scad_threshold; 306 | } 307 | } 308 | 309 | //void next_beta(Vector &res, VectorXi &eligible) 310 | void next_beta(SparseVector &res, SparseVectori &eligible) 311 | { 312 | 313 | int j; 314 | double grad; 315 | 316 | // now update intercept if necessary 317 | update_intercept(); 318 | 319 | // if no penalty multiplication factors specified 320 | if (penalty_factor_size < 1) 321 | { 322 | for (InIterVeci i_(eligible); i_; ++i_) 323 | { 324 | int j = i_.index(); 325 | double beta_prev = beta.coeff( j ); //beta(j); 326 | 327 | // surprisingly it's faster to calculate this on an iteration-basis 328 | // and not pre-calculate it within each newton iteration.. 329 | if (Xsq(j) < 0) Xsq(j) = (datX.col(j).array().square() * W.array()).matrix().mean(); 330 | 331 | Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs); 332 | 333 | grad = Xtr(j) + beta_prev * Xsq(j); 334 | 335 | //grad = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j); 336 | 337 | threshval = thresh_func(grad, lambda, gamma, lambda_ridge, Xsq(j)); 338 | 339 | // apply param limits 340 | if (threshval < limits(1,j)) threshval = limits(1,j); 341 | if (threshval > limits(0,j)) threshval = limits(0,j); 342 | 343 | // update both residual and linear predictor 344 | // if the coefficient changes after thresholding. 345 | if (beta_prev != threshval) 346 | { 347 | 348 | if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev; 349 | 350 | beta.coeffRef(j) = threshval; 351 | 352 | VectorXd delta_cur = (threshval - beta_prev) * datX.col(j); 353 | 354 | xbeta_cur += delta_cur; 355 | resid_cur.array() -= delta_cur.array() * W.array(); 356 | 357 | // update eligible set if necessary 358 | if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1; 359 | //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0; 360 | //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0; 361 | } else 362 | { 363 | // here we only remove a variable from the eligible set 364 | // if it's zero twice in a row 365 | if (beta_prev == 0.0 && eligible_set.coeff(j) == 1) 366 | { 367 | eligible_set.coeffRef(j) = 0; 368 | } 369 | } 370 | } 371 | } else //if penalty multiplication factors are used 372 | { 373 | for (InIterVeci i_(eligible); i_; ++i_) 374 | { 375 | int j = i_.index(); 376 | double beta_prev = beta.coeff( j ); //beta(j); 377 | 378 | // surprisingly it's faster to calculate this on an iteration-basis 379 | // and not pre-calculate it within each newton iteration.. 380 | if (Xsq(j) < 0) Xsq(j) = (datX.col(j).array().square() * W.array()).matrix().mean(); 381 | 382 | Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs); 383 | 384 | grad = Xtr(j) + beta_prev * Xsq(j); 385 | 386 | //grad = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j); 387 | 388 | threshval = thresh_func(grad, penalty_factor(j) * lambda, gamma, penalty_factor(j) * lambda_ridge, Xsq(j)); 389 | 390 | // apply param limits 391 | if (threshval < limits(1,j)) threshval = limits(1,j); 392 | if (threshval > limits(0,j)) threshval = limits(0,j); 393 | 394 | // update both residual and linear predictor 395 | // if the coefficient changes after thresholding. 396 | if (beta_prev != threshval) 397 | { 398 | 399 | if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev; 400 | 401 | beta.coeffRef(j) = threshval; 402 | 403 | VectorXd delta_cur = (threshval - beta_prev) * datX.col(j); 404 | 405 | xbeta_cur += delta_cur; 406 | resid_cur.array() -= delta_cur.array() * W.array(); 407 | 408 | // update eligible set if necessary 409 | if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1; 410 | //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0; 411 | //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0; 412 | } else 413 | { 414 | // here we only remove a variable from the eligible set 415 | // if it's zero twice in a row 416 | if (beta_prev == 0.0 && eligible_set.coeff(j) == 1) 417 | { 418 | eligible_set.coeffRef(j) = 0; 419 | } 420 | } 421 | } 422 | } 423 | 424 | } 425 | 426 | //void next_beta(Vector &res, VectorXi &eligible) 427 | void next_beta(SparseVector &res, VectorXi &eligible) 428 | { 429 | 430 | int j; 431 | double grad; 432 | 433 | // now update intercept if necessary 434 | update_intercept(); 435 | 436 | 437 | // if no penalty multiplication factors specified 438 | if (penalty_factor_size < 1) 439 | { 440 | for (j = 0; j < nvars; ++j) 441 | { 442 | if (eligible(j)) 443 | { 444 | double beta_prev = beta.coeff( j ); //beta(j); 445 | 446 | // surprisingly it's faster to calculate this on an iteration-basis 447 | // and not pre-calculate it within each newton iteration.. 448 | if (Xsq(j) < 0) Xsq(j) = (datX.col(j).array().square() * W.array()).matrix().mean(); 449 | 450 | Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs); 451 | 452 | grad = Xtr(j) + beta_prev * Xsq(j); 453 | 454 | //grad = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j); 455 | 456 | threshval = thresh_func(grad, lambda, gamma, lambda_ridge, Xsq(j)); 457 | 458 | // apply param limits 459 | if (threshval < limits(1,j)) threshval = limits(1,j); 460 | if (threshval > limits(0,j)) threshval = limits(0,j); 461 | 462 | // update both residual and linear predictor 463 | // if the coefficient changes after thresholding. 464 | if (beta_prev != threshval) 465 | { 466 | 467 | if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev; 468 | 469 | beta.coeffRef(j) = threshval; 470 | 471 | VectorXd delta_cur = (threshval - beta_prev) * datX.col(j); 472 | 473 | xbeta_cur += delta_cur; 474 | resid_cur.array() -= delta_cur.array() * W.array(); 475 | 476 | // update eligible set if necessary 477 | if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1; 478 | //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0; 479 | //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0; 480 | } else 481 | { 482 | // here we only remove a variable from the eligible set 483 | // if it's zero twice in a row 484 | if (beta_prev == 0.0 && eligible_set.coeff(j) == 1) 485 | { 486 | eligible_set.coeffRef(j) = 0; 487 | } 488 | } 489 | } // end eligible set check 490 | } 491 | } else //if penalty multiplication factors are used 492 | { 493 | for (j = 0; j < nvars; ++j) 494 | { 495 | if (eligible(j)) 496 | { 497 | double beta_prev = beta.coeff( j ); //beta(j); 498 | 499 | // surprisingly it's faster to calculate this on an iteration-basis 500 | // and not pre-calculate it within each newton iteration.. 501 | if (Xsq(j) < 0) Xsq(j) = (datX.col(j).array().square() * W.array()).matrix().mean(); 502 | 503 | Xtr(j) = datX.col(j).dot(resid_cur) / double(nobs); 504 | 505 | grad = Xtr(j) + beta_prev * Xsq(j); 506 | 507 | //grad = datX.col(j).dot(resid_cur) / double(nobs) + beta_prev * Xsq(j); 508 | 509 | threshval = thresh_func(grad, penalty_factor(j) * lambda, gamma, penalty_factor(j) * lambda_ridge, Xsq(j)); 510 | 511 | // apply param limits 512 | if (threshval < limits(1,j)) threshval = limits(1,j); 513 | if (threshval > limits(0,j)) threshval = limits(0,j); 514 | 515 | // update both residual and linear predictor 516 | // if the coefficient changes after thresholding. 517 | if (beta_prev != threshval) 518 | { 519 | 520 | if (threshval != 0.0) threshval = 0.85 * threshval + 0.15 * beta_prev; 521 | 522 | beta.coeffRef(j) = threshval; 523 | 524 | VectorXd delta_cur = (threshval - beta_prev) * datX.col(j); 525 | 526 | xbeta_cur += delta_cur; 527 | resid_cur.array() -= delta_cur.array() * W.array(); 528 | 529 | // update eligible set if necessary 530 | if (threshval != 0.0 && eligible_set.coeff(j) == 0) eligible_set.coeffRef(j) = 1; 531 | //if (threshval == 0.0 && eligible_set(j) == 1 && beta_nz_prev(j) == 0) eligible_set(j) = 0; 532 | //if (threshval == 0.0 && eligible_set.coeff(j) == 1) eligible_set.coeffRef(j) = 0; 533 | } else 534 | { 535 | // here we only remove a variable from the eligible set 536 | // if it's zero twice in a row 537 | if (beta_prev == 0.0 && eligible_set.coeff(j) == 1) 538 | { 539 | eligible_set.coeffRef(j) = 0; 540 | } 541 | } 542 | } // end eligible set check 543 | } 544 | } 545 | 546 | } 547 | 548 | 549 | 550 | // Calculate ||v1 - v2||^2 when v1 and v2 are sparse 551 | static double diff_squared_norm(const SparseVector &v1, const SparseVector &v2) 552 | { 553 | const int n1 = v1.nonZeros(), n2 = v2.nonZeros(); 554 | const double *v1_val = v1.valuePtr(), *v2_val = v2.valuePtr(); 555 | const int *v1_ind = v1.innerIndexPtr(), *v2_ind = v2.innerIndexPtr(); 556 | 557 | double r = 0.0; 558 | int i1 = 0, i2 = 0; 559 | while(i1 < n1 && i2 < n2) 560 | { 561 | if(v1_ind[i1] == v2_ind[i2]) 562 | { 563 | double val = v1_val[i1] - v2_val[i2]; 564 | r += val * val; 565 | i1++; 566 | i2++; 567 | } else if(v1_ind[i1] < v2_ind[i2]) { 568 | r += v1_val[i1] * v1_val[i1]; 569 | i1++; 570 | } else { 571 | r += v2_val[i2] * v2_val[i2]; 572 | i2++; 573 | } 574 | } 575 | while(i1 < n1) 576 | { 577 | r += v1_val[i1] * v1_val[i1]; 578 | i1++; 579 | } 580 | while(i2 < n2) 581 | { 582 | r += v2_val[i2] * v2_val[i2]; 583 | i2++; 584 | } 585 | 586 | return r; 587 | } 588 | 589 | 590 | public: 591 | CoordLogisticDense(ConstGenericMatrix &datX_, 592 | ConstGenericVector &datY_, 593 | ConstGenericVector &weights_, 594 | ConstGenericVector &offset_, 595 | ArrayXd &penalty_factor_, 596 | ConstGenericMatrix &limits_, 597 | std::string &penalty_, 598 | bool intercept_, 599 | double alpha_ = 1.0, 600 | double tol_ = 1e-6, 601 | int maxit_irls_ = 100, 602 | double tol_irls_ = 1e-6) : 603 | CoordBase > 604 | (datX_.rows(), datX_.cols(), tol_), 605 | datX(datX_.data(), datX_.rows(), datX_.cols()), 606 | datY(datY_.data(), datY_.size()), 607 | weights(weights_.data(), weights_.size()), 608 | offset(offset_.data(), offset_.size()), 609 | resid_cur(datX_.rows()), 610 | xbeta_cur(datX_.rows()), 611 | p(datX_.rows()), 612 | W(datX_.rows()), weightssqrt(weights.array().sqrt()), 613 | z(datX_.rows()), 614 | penalty(penalty_), 615 | penalty_factor(penalty_factor_), 616 | intercept(intercept_), 617 | limits(limits_.data(), limits_.rows(), limits_.cols()), 618 | alpha(alpha_), maxit_irls(maxit_irls_), tol_irls(tol_irls_), 619 | penalty_factor_size(penalty_factor_.size()), 620 | XY(datX.transpose() * (datY.array() * weights.array()).matrix()), 621 | Xsq(datX_.cols()), Xtr(datX_.cols()) 622 | {} 623 | 624 | double get_lambda_zero() 625 | { 626 | if (penalty_factor_size > 0) 627 | { 628 | VectorXd XXtmp = datX.transpose().rowwise().sum(); 629 | lambda0 = 0; 630 | for (int i = 0; i < penalty_factor.size(); ++i) 631 | { 632 | if (penalty_factor(i) != 0.0) 633 | { 634 | double valcur = std::abs(XY(i)) / penalty_factor(i); 635 | 636 | if (valcur > lambda0) lambda0 = valcur; 637 | } 638 | } 639 | } else 640 | { 641 | lambda0 = (XY).cwiseAbs().maxCoeff(); 642 | } 643 | 644 | lambda0 /= ( alpha * 1.0 * double(nobs)); //std::pow(1e-6, 1.0/(99.0)); 645 | 646 | return lambda0; 647 | } 648 | 649 | // init() is a cold start for the first lambda 650 | void init(double lambda_, double gamma_) 651 | { 652 | 653 | set_threshold_func(); 654 | 655 | beta.setZero(); 656 | 657 | lambda = lambda_ * alpha; 658 | lambda_ridge = lambda_ * (1.0 - alpha); 659 | 660 | gamma = gamma_; 661 | 662 | eligible_set.setZero(); 663 | 664 | eligible_set.reserve(std::min(nobs, nvars)); 665 | 666 | nzero = 0; 667 | 668 | deviance = 0.0; 669 | 670 | xbeta_cur.setZero(); 671 | resid_cur.setZero(); 672 | 673 | // this starts estimate of intercept 674 | initialize_params(); 675 | 676 | double cutoff; 677 | 678 | if (penalty == "lasso") 679 | { 680 | cutoff = 2.0 * lambda - lambda0; 681 | } else if (penalty == "mcp") 682 | { 683 | cutoff = lambda + gamma / (gamma - 1.0) * (lambda - lambda0); 684 | } else 685 | { 686 | cutoff = lambda + gamma / (gamma - 2.0) * (lambda - lambda0); 687 | } 688 | 689 | 690 | 691 | if (penalty_factor_size < 1) 692 | { 693 | for (int j = 0; j < nvars; ++j) if (std::abs(XY(j)) > (cutoff)) eligible_set.coeffRef(j) = 1; 694 | } else 695 | { 696 | for (int j = 0; j < nvars; ++j) if (std::abs(XY(j)) > (cutoff * penalty_factor(j))) eligible_set.coeffRef(j) = 1; 697 | } 698 | 699 | //beta.reserve( std::max(eligible_set.sum() + 10, std::min(nvars, nobs)) ); 700 | 701 | } 702 | // when computing for the next lambda, we can use the 703 | // current main_x, aux_z, dual_y and rho as initial values 704 | void init_warm(double lambda_, double gamma_) 705 | { 706 | lprev = lambda; 707 | lambda = lambda_ * alpha; 708 | lambda_ridge = lambda_ * (1.0 - alpha); 709 | 710 | gamma = gamma_; 711 | 712 | eligible_set.setZero(); 713 | 714 | eligible_set.reserve(std::min(nobs, nvars)); 715 | 716 | nzero = 0; 717 | 718 | deviance = 0.0; 719 | 720 | double cutoff; 721 | 722 | if (penalty == "lasso") 723 | { 724 | cutoff = 2.0 * lambda - lprev; 725 | } else if (penalty == "mcp") 726 | { 727 | cutoff = lambda + gamma / (gamma - 1.0) * (lambda - lprev); 728 | } else 729 | { 730 | cutoff = lambda + gamma / (gamma - 2.0) * (lambda - lprev); 731 | } 732 | 733 | 734 | if (penalty_factor_size < 1) 735 | { 736 | for (int j = 0; j < nvars; ++j) if (std::abs(Xtr(j)) > (cutoff)) eligible_set.coeffRef(j) = 1; 737 | } else 738 | { 739 | for (int j = 0; j < nvars; ++j) if (std::abs(Xtr(j)) > cutoff * penalty_factor(j)) eligible_set.coeffRef(j) = 1; 740 | } 741 | 742 | 743 | //beta.reserve( std::max(eligible_set.sum() + 10, std::min(nvars, nobs)) ); 744 | } 745 | 746 | 747 | int solve(int maxit) 748 | { 749 | //int i; 750 | int irls_iter = 0; 751 | 752 | while(irls_iter < maxit_irls) 753 | { 754 | irls_iter++; 755 | 756 | beta_prev_irls = beta; 757 | deviance_prev = deviance; 758 | 759 | //xbeta_cur.array() = (datX * beta).array() + beta0; //this is efficient because beta is a sparse vector 760 | 761 | update_quadratic_approx(); 762 | 763 | int current_iter = 0; 764 | 765 | // run once through all variables 766 | //current_iter++; 767 | beta_prev = beta; 768 | ineligible_set.fill(1); 769 | 770 | //update_beta(ineligible_set); 771 | 772 | while(current_iter < maxit) 773 | { 774 | while(current_iter < maxit) 775 | { 776 | current_iter++; 777 | beta_prev = beta; 778 | 779 | if (current_iter % 5 == 0) update_quadratic_approx(); 780 | 781 | update_beta(eligible_set); 782 | 783 | if(converged()) break; 784 | } 785 | 786 | current_iter++; 787 | beta_prev = beta; 788 | ineligible_set.fill(1); 789 | 790 | for (InIterVeci i_(eligible_set); i_; ++i_) 791 | { 792 | ineligible_set(i_.index()) = 0; 793 | } 794 | 795 | update_quadratic_approx(); 796 | 797 | update_beta(ineligible_set); 798 | 799 | if(converged()) break; 800 | } //end coordinate descent loop 801 | 802 | if(converged_irls()) break; 803 | 804 | } //end irls loop 805 | 806 | 807 | /* 808 | for (int j = 0; j < nvars; ++j) 809 | { 810 | if (beta(j) != 0) 811 | ++nzero; 812 | } 813 | */ 814 | 815 | // force zeros to be actual zeros 816 | beta.prune(0.0); 817 | 818 | nzero = beta.nonZeros(); 819 | 820 | 821 | 822 | loss = compute_loss(); 823 | 824 | // print_footer(); 825 | 826 | return irls_iter; 827 | } 828 | 829 | virtual double get_intercept() { return beta0; } 830 | }; 831 | 832 | 833 | 834 | #endif // COORDGAUSSIANDENSE_H 835 | --------------------------------------------------------------------------------