├── .github
    ├── .gitignore
    ├── ISSUE_TEMPLATE
    │   └── bug_report.md
    └── workflows
    │   ├── pkgdown.yaml
    │   ├── lint-changed-files.yaml
    │   └── R-CMD-check.yaml
├── vignettes
    ├── .gitignore
    ├── simu.Rmd
    └── HTLR.bib
├── data
    ├── colon.rda
    └── diabetes392.rda
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-warmup.R
    │   ├── test-gendata.r
    │   ├── test-util.r
    │   └── test-bplrhmc.r
├── data-raw
    ├── prostate.rda
    └── pre_diabetes.R
├── man
    ├── figures
    │   └── README-pressure-1.png
    ├── pipe.Rd
    ├── order_plain.Rd
    ├── order_ftest.Rd
    ├── order_kruskal.Rd
    ├── nzero_idx.Rd
    ├── evaluate_pred.Rd
    ├── predict.htlr.fit.Rd
    ├── split_data.Rd
    ├── colon.Rd
    ├── as.matrix.htlr.fit.Rd
    ├── lasso_deltas.Rd
    ├── summary.htlr.fit.Rd
    ├── gendata_MLR.Rd
    ├── htlr_predict.Rd
    ├── bcbcsf_deltas.Rd
    ├── diabetes392.Rd
    ├── htlr_prior.Rd
    ├── std.Rd
    ├── gendata_FAM.Rd
    ├── htlr_fit.Rd
    └── htlr.Rd
├── src
    ├── Makevars
    ├── Makevars.win
    ├── utils.h
    ├── main.cpp
    ├── sampler.h
    ├── sampler.cpp
    ├── ars.h
    ├── utils.cpp
    ├── gibbs.h
    ├── ars_demo.cpp
    ├── RcppExports.cpp
    ├── gibbs.cpp
    └── ars.cpp
├── .gitignore
├── inst
    ├── install_old_htlr.R
    └── CITATION
├── .Rbuildignore
├── R
    ├── initial-state.R
    ├── RcppExports.R
    ├── order.r
    ├── data.R
    ├── std.R
    ├── gendata.r
    ├── predict.R
    ├── lassorda.r
    ├── htlr.R
    ├── util.r
    ├── mccoef.r
    ├── compred.r
    └── core.r
├── HTLR.Rproj
├── pkgdown
    └── _pkgdown.yml
├── NAMESPACE
├── DESCRIPTION
├── NEWS.md
├── README.md
└── README.Rmd


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/data/colon.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longhaiSK/HTLR/HEAD/data/colon.rda


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(HTLR)
3 | 
4 | test_check("HTLR")
5 | 


--------------------------------------------------------------------------------
/data-raw/prostate.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longhaiSK/HTLR/HEAD/data-raw/prostate.rda


--------------------------------------------------------------------------------
/data/diabetes392.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longhaiSK/HTLR/HEAD/data/diabetes392.rda


--------------------------------------------------------------------------------
/man/figures/README-pressure-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longhaiSK/HTLR/HEAD/man/figures/README-pressure-1.png


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 
2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
3 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 
2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | .DS_Store
 6 | src/*.o
 7 | src/*.so
 8 | src/*.dll
 9 | tmp/
10 | .vscode/
11 | docs/
12 | 


--------------------------------------------------------------------------------
/inst/install_old_htlr.R:
--------------------------------------------------------------------------------
1 | if (!require(devtools)) {
2 |   install.packages("devtools")
3 | }
4 | devtools::install_github("cran/rda")
5 | devtools::install_github("longhaiSK/HTLR", ref = "legacy")
6 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^docs$
 4 | ^pkgdown$
 5 | ^\.travis\.yml$
 6 | ^README\.Rmd$
 7 | ^index\.md$
 8 | ^tmp$
 9 | ^.vscode$
10 | ^data-raw$
11 | ^LICENSE$
12 | ^.github$
13 | ^tests/testthat\.rda$
14 | ^\.github$
15 | 


--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.r
 3 | \name{\%>\%}
 4 | \alias{\%>\%}
 5 | \title{Pipe operator}
 6 | \usage{
 7 | lhs \%>\% rhs
 8 | }
 9 | \description{
10 | See \code{magrittr::\link[magrittr]{\%>\%}} for details.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/R/initial-state.R:
--------------------------------------------------------------------------------
 1 | as.htlr.init <- function(x) UseMethod("as.htlr.init")
 2 | 
 3 | #' @exportS3Method
 4 | as.htlr.init.cv.glmnet <- function(x) 
 5 | {
 6 |   coefs <- coef(x, s = "lambda.min") %>% Reduce(f = cbind) %>% as.matrix()
 7 |   deltas <- coefs[, -1, drop = FALSE] - coefs[, 1]
 8 |   return (deltas)
 9 | }
10 | 


--------------------------------------------------------------------------------
/HTLR.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | PackageCheckArgs: --as-cran
19 | PackageRoxygenize: rd,collate,namespace,vignette
20 | 


--------------------------------------------------------------------------------
/src/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H
 2 | #define UTILS_H 
 3 | 
 4 | #include "RcppArmadillo.h"
 5 | 
 6 | double log_sum_exp(const arma::vec &a);
 7 | arma::vec log_sum_exp(const arma::mat &A);
 8 | arma::mat find_normlv(const arma::mat &lv);
 9 | arma::vec spl_sgm_ig(double alpha, int K, double w, const arma::vec &vardeltas);
10 | 
11 | inline arma::rowvec col_sum(const arma::mat &A)
12 | {
13 |   return arma::sum(A, 0);
14 | }
15 | 
16 | inline arma::vec row_sum(const arma::mat &A)
17 | {
18 |   return arma::sum(A, 1);
19 | }
20 | 
21 | #endif 
22 | 


--------------------------------------------------------------------------------
/man/order_plain.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/order.r
 3 | \name{order_plain}
 4 | \alias{order_plain}
 5 | \title{Plain order function}
 6 | \usage{
 7 | order_plain(X, y)
 8 | }
 9 | \arguments{
10 | \item{X}{Input matrix, of dimension \code{nobs} by \code{nvars}; each row is an observation vector.}
11 | 
12 | \item{y}{Vector of response variables.}
13 | }
14 | \value{
15 | Sequence starting from 1 to \code{nvars}.
16 | }
17 | \description{
18 | A placeholder order function that returns the original order of given features.
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG]"
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior.
15 | 
16 | **Expected behavior**
17 | A clear and concise description of what you expected to happen.
18 | 
19 | **R session info**
20 | Put your R sessionInfo here.
21 | 
22 | **Screenshots**
23 | If applicable, add screenshots to help explain your problem.
24 | 
25 | **Additional context**
26 | Add any other context about the problem here.
27 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite the HTLR package in publications, please use:")
 2 | 
 3 | bibentry(
 4 | bibtype = "Article",
 5 | title = "Fully Bayesian logistic regression with hyper-LASSO priors for high-dimensional feature selection",
 6 | journal = "Journal of Statistical Computation and Simulation",
 7 | year = "2018",
 8 | author = "Longhai Li and Weixin Yao",
 9 | volume = "88",
10 | number = "14",
11 | pages = "2827--2851",
12 | publisher = "Taylor & Francis",
13 | textVersion = "Li, L., & Yao, W. (2018). Fully Bayesian logistic regression with hyper-LASSO priors for high-dimensional feature selection. Journal of Statistical Computation and Simulation, 88(14), 2827-2851."
14 | )
15 | 


--------------------------------------------------------------------------------
/man/order_ftest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/order.r
 3 | \name{order_ftest}
 4 | \alias{order_ftest}
 5 | \title{Order features by F-statistic}
 6 | \usage{
 7 | order_ftest(X, y)
 8 | }
 9 | \arguments{
10 | \item{X}{Input matrix, of dimension \code{nobs} by \code{nvars}; each row is an observation vector.}
11 | 
12 | \item{y}{Vector of response variables.}
13 | }
14 | \value{
15 | Order of all features of length \code{nvars}.
16 | }
17 | \description{
18 | This function orders all features in terms of ANOVA F-statistic.
19 | }
20 | \examples{
21 | data("diabetes392")
22 | order_ftest(diabetes392$X, diabetes392$y)
23 | 
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/order_kruskal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/order.r
 3 | \name{order_kruskal}
 4 | \alias{order_kruskal}
 5 | \title{Order features by Kruskal-Wallis test}
 6 | \usage{
 7 | order_kruskal(X, y)
 8 | }
 9 | \arguments{
10 | \item{X}{Input matrix, of dimension \code{nobs} by \code{nvars}; each row is an observation vector.}
11 | 
12 | \item{y}{Vector of response variables.}
13 | }
14 | \value{
15 | Order of all features of length \code{nvars}.
16 | }
17 | \description{
18 | This function orders all features in terms of Kruskal-Wallis test p-value.
19 | }
20 | \examples{
21 | data("diabetes392")
22 | order_kruskal(diabetes392$X, diabetes392$y)
23 | 
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/nzero_idx.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mccoef.r
 3 | \name{nzero_idx}
 4 | \alias{nzero_idx}
 5 | \title{Get Indices of Non-Zero Coefficients}
 6 | \usage{
 7 | nzero_idx(fit, cut = 0.1)
 8 | }
 9 | \arguments{
10 | \item{fit}{An object of S3 class \code{htlr.fit}.}
11 | 
12 | \item{cut}{Threshold on relative SDB to distinguish zero coefficients.}
13 | }
14 | \value{
15 | Indices vector of non-zero coefficients in the model.
16 | }
17 | \description{
18 | Get the indices of non-zero coefficients from fitted HTLR model objects.
19 | }
20 | \examples{
21 | set.seed(12345)
22 | data("colon")
23 | 
24 | fit <- htlr(X = colon$X, y = colon$y, fsel = 1:100, iter = 20)
25 | nzero_idx(fit)
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/pkgdown/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://longhaisk.github.io/HTLR/
 2 | 
 3 | template:
 4 |   bootstrap: 5
 5 |   bootswatch: sandstone
 6 | 
 7 | reference:
 8 | - title: Model fitting
 9 |   contents:
10 |   - htlr
11 |   - htlr_prior
12 | - title: Prediction and inference
13 |   contents:
14 |   - predict.htlr.fit
15 |   - as.matrix.htlr.fit
16 |   - nzero_idx
17 |   - summary.htlr.fit
18 |   - evaluate_pred
19 | - title: Generating simulation data
20 |   contents:
21 |   - gendata_FAM
22 |   - gendata_MLR 
23 | #- title: Feature screening
24 | #  contents:
25 | #  - order_ftest
26 | #  - order_kruskal
27 | - title: Utility functions 
28 |   contents:
29 |   - std
30 |   - split_data
31 | - title: Data sets
32 |   contents:
33 |   - colon
34 |   - diabetes392
35 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
 1 | #include "RcppArmadillo.h"
 2 | #include "gibbs.h"
 3 | 
 4 | // [[Rcpp::depends(RcppArmadillo)]]
 5 | 
 6 | // [[Rcpp::export]]
 7 | Rcpp::List htlr_fit_helper(
 8 |     int p, int K, int n,
 9 |     arma::mat &X, arma::mat &ymat, arma::uvec &ybase,
10 |     std::string ptype, double alpha, double s, double eta,
11 |     int iters_rmc, int iters_h, int thin,
12 |     int leap_L, int leap_L_h, double leap_step,
13 |     double hmc_sgmcut, arma::mat &deltas, arma::vec &sigmasbt,
14 |     bool keep_warmup_hist, int silence, bool legacy)
15 | {
16 |   auto f = Fit(
17 |       p, K, n, X, ymat, ybase,
18 |       ptype, alpha, s, eta,
19 |       iters_rmc, iters_h, thin,
20 |       leap_L, leap_L_h, leap_step,
21 |       hmc_sgmcut, deltas, sigmasbt,
22 |       keep_warmup_hist, silence, legacy);
23 |   f.StartSampling();
24 |   return f.OutputR();
25 | }
26 | 


--------------------------------------------------------------------------------
/data-raw/pre_diabetes.R:
--------------------------------------------------------------------------------
 1 | library(corrplot)
 2 | 
 3 | diabetes <- read.csv("data-raw/diabetes.csv", header = TRUE)
 4 | 
 5 | summary(diabetes)
 6 | str(diabetes)
 7 | 
 8 | # removing those observation rows with 0 in any of the variables
 9 | for (i in 2:6) {
10 |   diabetes <- diabetes[-which(diabetes[, i] == 0), ]
11 | }
12 | 
13 | # scale the covariates for easier comparison of coefficient posteriors
14 | for (i in 1:8) {
15 |   diabetes[i] <- scale(diabetes[i])
16 | }
17 | 
18 | # modify the data column names slightly for easier typing
19 | names(diabetes)[7] <- "PedigreeFunction"
20 | names(diabetes) <- tolower(names(diabetes))
21 | 
22 | str(diabetes)
23 | 
24 | corrplot(cor(diabetes[, c(9, 1:8)]))
25 | 
26 | # preparing the dataset
27 | diabetes392 <- list("X" = model.matrix(outcome ~ . - 1, data = diabetes),
28 |                     "y" = factor(diabetes$outcome))
29 | 
30 | usethis::use_data(diabetes392)
31 | 


--------------------------------------------------------------------------------
/man/evaluate_pred.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compred.r
 3 | \name{evaluate_pred}
 4 | \alias{evaluate_pred}
 5 | \title{Evaluate Prediction Results}
 6 | \usage{
 7 | evaluate_pred(y.pred, y.true, caseid = names(y.true), showplot = TRUE, ...)
 8 | }
 9 | \arguments{
10 | \item{y.pred}{A matrix of predicted probabilities, as returned by a classifier.}
11 | 
12 | \item{y.true}{Ground truth labels vector.}
13 | 
14 | \item{caseid}{The names of test cases which we take account of. By default all test cases are used for evaluation.}
15 | 
16 | \item{showplot}{Logical; if \code{TRUE}, a summary plot will be generated.}
17 | 
18 | \item{...}{Not used.}
19 | }
20 | \value{
21 | A summary of evaluation result.
22 | }
23 | \description{
24 | This function compares the prediction results returned by a classifier with ground truth, 
25 | and finally gives a summary of the evaluation.
26 | }
27 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(as.htlr.init,cv.glmnet)
 4 | S3method(as.matrix,htlr.fit)
 5 | S3method(nobs,htlr.fit)
 6 | S3method(predict,htlr.fit)
 7 | S3method(print,htlr.fit)
 8 | S3method(summary,htlr.fit)
 9 | export("%>%")
10 | export(bcbcsf_deltas)
11 | export(evaluate_pred)
12 | export(gendata_FAM)
13 | export(gendata_MLR)
14 | export(htlr)
15 | export(htlr_fit)
16 | export(htlr_predict)
17 | export(htlr_prior)
18 | export(lasso_deltas)
19 | export(nzero_idx)
20 | export(order_ftest)
21 | export(order_kruskal)
22 | export(order_plain)
23 | export(split_data)
24 | export(std)
25 | import(Rcpp)
26 | import(grDevices)
27 | import(graphics)
28 | import(stats)
29 | importFrom(BCBCSF,bcbcsf_fitpred)
30 | importFrom(BCBCSF,bcbcsf_sumfit)
31 | importFrom(glmnet,cv.glmnet)
32 | importFrom(glmnet,glmnet)
33 | importFrom(magrittr,"%>%")
34 | importFrom(magrittr,extract2)
35 | importFrom(utils,read.table)
36 | useDynLib(HTLR)
37 | 


--------------------------------------------------------------------------------
/tests/testthat/test-warmup.R:
--------------------------------------------------------------------------------
 1 | SEED <- 1234
 2 | 
 3 | set.seed(SEED)
 4 | dat <- gendata_MLR(n = 100, p = 10)
 5 | dat <- split_data(dat$X, dat$y, p.train = 0.8)
 6 | 
 7 | set.seed(SEED)
 8 | fit.with.warmup <- htlr(X = dat$x.tr, y = dat$y.tr, iter = 10, keep.warmup.hist = T)
 9 | 
10 | set.seed(SEED)
11 | fit.wout.warmup <- htlr(X = dat$x.tr, y = dat$y.tr, iter = 10, keep.warmup.hist = F) 
12 | 
13 | test_that("predict() can handle fit with warmup records", {
14 |   expect_equal(predict(fit.wout.warmup, dat$x.te), predict(fit.with.warmup, dat$x.te))
15 | })
16 | 
17 | test_that("as.matrix() can handle fit with warmup records", {
18 |   # include.warmup wouldn't have effect if warmup record is not available
19 |   expect_equal(as.matrix(fit.with.warmup, include.warmup = F, k = 1), as.matrix(fit.wout.warmup, include.warmup = T, k = 1))
20 |   expect_equal(as.matrix(fit.with.warmup, include.warmup = F, k = 1), as.matrix(fit.wout.warmup, include.warmup = F, k = 1))
21 | })
22 | 


--------------------------------------------------------------------------------
/man/predict.htlr.fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.R
 3 | \name{predict.htlr.fit}
 4 | \alias{predict.htlr.fit}
 5 | \title{Make Prediction on New Data}
 6 | \usage{
 7 | \method{predict}{htlr.fit}(object, newx, type = c("response", "class"), ...)
 8 | }
 9 | \arguments{
10 | \item{object}{A fitted model object with S3 class \code{htlrfit}.}
11 | 
12 | \item{newx}{A Matrix of values at which predictions are to be made.}
13 | 
14 | \item{type}{Type of prediction required. Type "response" gives the fitted probabilities.
15 | Type "class" produces the class label corresponding to the maximum probability.}
16 | 
17 | \item{...}{Advanced options to specify the Markov chain iterations used for inference. 
18 | See \code{\link{htlr_predict}}.}
19 | }
20 | \value{
21 | The object returned depends on type.
22 | }
23 | \description{
24 | Similar to other predict methods, this function returns predictions from a fitted \code{htlrfit} object.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/split_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.r
 3 | \name{split_data}
 4 | \alias{split_data}
 5 | \title{Split Data into Train and Test Partitions}
 6 | \usage{
 7 | split_data(X, y, p.train = 0.7, n.train = round(nrow(X) * p.train))
 8 | }
 9 | \arguments{
10 | \item{X}{Input matrix, of dimension \code{nobs} by \code{nvars}; each row is an observation vector.}
11 | 
12 | \item{y}{Vector of response variables.}
13 | 
14 | \item{p.train}{Percentage of training set.}
15 | 
16 | \item{n.train}{Number of cases for training; will override \code{p.train} if specified.}
17 | }
18 | \value{
19 | List of training data \code{x.tr}, \code{y.tr} and testing data \code{x.te}, \code{y.te}.
20 | }
21 | \description{
22 | This function splits the input data and response variables into training and testing parts.
23 | }
24 | \examples{
25 | dat <- gendata_MLR(n = 100, p = 10)
26 | dat <- split_data(dat$X, dat$y, p.train = 0.7)
27 | dim(dat$x.tr)
28 | dim(dat$x.te)
29 |    
30 | }
31 | 


--------------------------------------------------------------------------------
/man/colon.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{colon}
 5 | \alias{colon}
 6 | \title{Colon Tissues}
 7 | \format{
 8 | A list contains data matrix \code{X} and response vector \code{y}:
 9 | \describe{
10 |   \item{X}{A matrix with 66 rows (observations) and 2000 columns (features).}
11 |   \item{y}{A binary vector where 0 indicates normal colon tissues and 1 indicates tumor colon tissues.}
12 | }
13 | }
14 | \usage{
15 | data("colon")
16 | }
17 | \description{
18 | In this dataset, expression levels of 40 tumor and 22 normal colon tissues 
19 | for 6500 human genes are measured using the Affymetrix technology. 
20 | A selection of 2000 genes with highest minimal intensity across the samples
21 | has been made by Alon et al. (1999). The data is preprocessed by carrying out 
22 | a base 10 logarithmic transformation and standardizing each tissue sample to 
23 | zero mean and unit variance across the genes.
24 | }
25 | \references{
26 | Dettling Marcel, and Peter Bühlmann (2002). Supervised clustering of genes.
27 | \emph{Genome biology}, 3(12), research0069-1.
28 | }
29 | \keyword{datasets}
30 | 


--------------------------------------------------------------------------------
/tests/testthat/test-gendata.r:
--------------------------------------------------------------------------------
 1 | skip_on_ci()
 2 | skip_on_cran()
 3 | 
 4 | SEED <- 1001
 5 | 
 6 | test_that("sim MLR works", {
 7 |   set.seed(SEED)
 8 |   expect <- HTLR.old::htlr_gendata(n = 50, p = 10)
 9 |   
10 |   set.seed(SEED)
11 |   actual <- gendata_MLR(n = 50, p = 10)
12 |   
13 |   expect_equal(unname(actual$X), expect$X)
14 |   expect_equal(actual$y, expect$y)
15 |   expect_equal(actual$deltas, expect$deltas)
16 | })
17 | 
18 | test_that("sim FAM works", {
19 |   n <- 100
20 |   p <- 10
21 |   
22 |   means <- rbind(
23 |     c(0, 1, 0),
24 |     c(0, 0, 0),
25 |     c(0, 0, 1),
26 |     c(0, 0, 1),
27 |     c(0, 0, 1)
28 |   ) * 2
29 |   means <- rbind(means, matrix(0, p - 5, 3))
30 |   
31 |   A <- diag(1, p)
32 |   A[1:5, 1:3] <- rbind(
33 |     c(1, 0, 0),
34 |     c(2, 1, 0),
35 |     c(0, 0, 1),
36 |     c(0, 0, 1),
37 |     c(0, 0, 1)
38 |   )
39 |   
40 |   set.seed(SEED)
41 |   expect <- HTLR.old::gendata_fam(n, means, A, sd_g = 0.5, stdx = TRUE)
42 |   
43 |   set.seed(SEED)
44 |   actual <- gendata_FAM(n, means, A, sd_g = 0.5, stdx = TRUE)
45 |   
46 |   expect_equal(unname(actual$X), expect$X)
47 |   expect_equal(actual$y, expect$y)
48 |   expect_equal(actual$muj, expect$muj)
49 |   expect_equal(actual$SGM, expect$SGM)
50 | })
51 | 


--------------------------------------------------------------------------------
/man/as.matrix.htlr.fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mccoef.r
 3 | \name{as.matrix.htlr.fit}
 4 | \alias{as.matrix.htlr.fit}
 5 | \title{Create a Matrix of Markov Chain Samples}
 6 | \usage{
 7 | \method{as.matrix}{htlr.fit}(x, k = NULL, include.warmup = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{An object of S3 class \code{htlr.fit}.}
11 | 
12 | \item{k}{Coefficients associated with class \code{k} will be drawn. Must be a positive integer in 
13 | 1,2,\ldots,C-1 for C-class traning labels (base class 0 can not be chosen). By default the last class
14 | is selected. For binary logistic model this argument can be ignored.}
15 | 
16 | \item{include.warmup}{Whether or not to include warmup samples}
17 | 
18 | \item{...}{Not used.}
19 | }
20 | \value{
21 | A matrix with \code{(p + 1)} columns and \code{i} rows, where \code{p} is the number of features 
22 | excluding intercept, and \code{i} is the number of iterations after burnin.
23 | }
24 | \description{
25 | The Markov chain samples (without warmup) included in a \code{htlr.fit} object will be coerced to a matrix.
26 | }
27 | \examples{
28 | ## No. of features used: 100; No. of iterations after burnin: 15 
29 | fit <- htlr(X = colon$X, y = colon$y, fsel = 1:100, iter = 20, warmup = 5)
30 | 
31 | dim(as.matrix(fit))
32 |   
33 | }
34 | 


--------------------------------------------------------------------------------
/man/lasso_deltas.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lassorda.r
 3 | \name{lasso_deltas}
 4 | \alias{lasso_deltas}
 5 | \title{Lasso Initial State}
 6 | \usage{
 7 | lasso_deltas(
 8 |   X,
 9 |   y,
10 |   lambda = NULL,
11 |   verbose = FALSE,
12 |   alpha = 1,
13 |   rank_fn = order_plain,
14 |   k = ncol(X)
15 | )
16 | }
17 | \arguments{
18 | \item{X}{Design matrix of traning data; 
19 | rows should be for the cases, and columns for different features.}
20 | 
21 | \item{y}{Vector of class labels in training or test data set. 
22 | Must be coded as non-negative integers, e.g., 1,2,\ldots,C for C classes.}
23 | 
24 | \item{lambda}{A user supplied lambda sequence for \code{glmnet} cross-validation.
25 | \code{NULL} by default, and it will be generated by \code{glmnet}.}
26 | 
27 | \item{alpha}{The elasticnet mixing parameter for \code{glmnet}.}
28 | }
29 | \value{
30 | A matrix - the initial state of Markov Chain for HTLR model fitting.
31 | }
32 | \description{
33 | Generate initial Markov chain state with Lasso.
34 | }
35 | \references{
36 | Jerome Friedman, Trevor Hastie, Robert Tibshirani (2010).
37 | Regularization Paths for Generalized Linear Models via Coordinate
38 | Descent. \emph{Journal of Statistical Software}, 33(1), 1-22.
39 | }
40 | \seealso{
41 | \code{\link{bcbcsf_deltas}}
42 | }
43 | \keyword{internal}
44 | 


--------------------------------------------------------------------------------
/man/summary.htlr.fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mccoef.r
 3 | \name{summary.htlr.fit}
 4 | \alias{summary.htlr.fit}
 5 | \title{Posterior Summaries}
 6 | \usage{
 7 | \method{summary}{htlr.fit}(
 8 |   object,
 9 |   features = 1L:object$p,
10 |   method = median,
11 |   usedmc = get_sample_indice(dim(object$mcdeltas)[3], object$mc.param$iter.rmc),
12 |   ...
13 | )
14 | }
15 | \arguments{
16 | \item{object}{An object of S3 class \code{htlr.fit}.}
17 | 
18 | \item{features}{A vector of indices (int) or names (char) that specify the parameters we will look at.
19 | By default all parameters are selected.}
20 | 
21 | \item{method}{A function that is used to aggregate the MCMC samples. The default is \code{median}, 
22 | other built-in/customized statistical functions such as \code{mean}, \code{sd}, and \code{mad}
23 | can also be used.}
24 | 
25 | \item{usedmc}{Indices of Markov chain iterations used for inference. By default all iterations are used.}
26 | 
27 | \item{...}{Not used.}
28 | }
29 | \value{
30 | A point summary of MCMC samples.
31 | }
32 | \description{
33 | This function gives a summary of posterior of parameters.
34 | }
35 | \examples{
36 | set.seed(12345)
37 | data("colon")
38 | 
39 | fit <- htlr(X = colon$X, y = colon$y, fsel = 1:100, iter = 20)
40 | summary(fit, features = 1:16)
41 |   
42 | }
43 | 


--------------------------------------------------------------------------------
/src/sampler.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLER_H
 2 | #define SAMPLER_H 
 3 | 
 4 | #include "RcppArmadillo.h"
 5 | #include "ars.h"
 6 | 
 7 | class SamplerSgm : public SampleTarget
 8 | {
 9 |   protected:
10 | 
11 |   int idx_;
12 |   const int p_, K_;
13 |   const double alpha_, log_aw_;
14 |   const arma::vec vardeltas_;
15 | 
16 |   public:
17 | 
18 |   SamplerSgm(int p, const arma::vec &vardeltas, int K, double alpha, double log_aw);
19 |   void set_idx(int i);
20 | };
21 | 
22 | class SamplerSgmNeg : public SamplerSgm
23 | {
24 |   public:
25 | 
26 |   SamplerSgmNeg(int p, const arma::vec &vardeltas, int K, double alpha, double log_aw);
27 |   void eval_logf(const double x, double &logf, double &dlogf) override; 
28 | };
29 | 
30 | class SamplerSgmGhs : public SamplerSgm
31 | {
32 |   public:
33 | 
34 |   SamplerSgmGhs(int p, const arma::vec &vardeltas, int K, double alpha, double log_aw);
35 |   void eval_logf(const double x, double &logf, double &dlogf) override;
36 | };
37 | 
38 | class SamplerLogw : public SampleTarget
39 | {
40 |   protected:
41 | 
42 |   const int p_, K_;
43 |   const double nu_, s_, eta_;
44 |   const arma::vec vardeltas_;
45 | 
46 |   public:
47 | 
48 |   SamplerLogw(int p, const arma::vec &vardeltas, int K,
49 |               double nu, double s, double eta);
50 | 
51 |   void eval_logf(const double x, double &logf, double &dlogf) override;
52 | };
53 | 
54 | #endif
55 | 


--------------------------------------------------------------------------------
/man/gendata_MLR.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gendata.r
 3 | \name{gendata_MLR}
 4 | \alias{gendata_MLR}
 5 | \title{Generate Simulated Data with Multinomial Logistic Regression Model}
 6 | \usage{
 7 | gendata_MLR(n, p, NC = 3, nu = 2, w = 1, X = NULL, betas = NULL)
 8 | }
 9 | \arguments{
10 | \item{n}{Number of observations.}
11 | 
12 | \item{p}{Number of features.}
13 | 
14 | \item{NC}{Number of classes for response variables.}
15 | 
16 | \item{nu, w}{If \code{betas} is not supplied (default), the regression coefficients are generated with 
17 | t prior with df = \code{nu}, scale = \code{sqrt(w)}; will be ignored if \code{betas} is supplied.}
18 | 
19 | \item{X}{The design matrix; will be generated from standard normal distribution if not supplied.}
20 | 
21 | \item{betas}{User supplied regression coefficients.}
22 | }
23 | \value{
24 | A list contains input matrix \code{X}, response variables \code{y}, and regression coefficients \code{deltas}.
25 | }
26 | \description{
27 | This function generates the response variables \code{y} given 
28 | optional supplied \code{X} using a multinomial logistic regression model.
29 | }
30 | \examples{
31 | set.seed(12345)
32 | dat <- gendata_MLR(n = 100, p = 10)
33 | ggplot2::qplot(dat$y, bins = 6)
34 | corrplot::corrplot(cor(dat$X))
35 | 
36 | }
37 | \seealso{
38 | \code{\link{gendata_FAM}}
39 | }
40 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: HTLR
 2 | Version: 1.0
 3 | Title: Bayesian Logistic Regression with Heavy-Tailed Priors
 4 | Authors@R: c(person(given = "Longhai", family = "Li", role = c("aut"), email = "longhai@math.usask.ca",
 5 |     comment=c(ORCID="0000-0002-3074-8584")), person(given = "Steven", family = "Liu", role = c("aut", "cre"), 
 6 |     email = "shinyu.lieu@gmail.com"))
 7 | Description: Efficient Bayesian multinomial logistic regression based on heavy-tailed
 8 |   (hyper-LASSO, non-convex) priors. The posterior of coefficients and hyper-parameters
 9 |   is sampled with restricted Gibbs sampling for leveraging the high-dimensionality and
10 |   Hamiltonian Monte Carlo for handling the high-correlation among coefficients. A detailed
11 |   description of the method: Li and Yao (2018), 
12 |   Journal of Statistical Computation and Simulation, 88:14, 2827-2851, <doi:10.48550/arXiv.1405.3319>.
13 | License: GPL-3
14 | URL: https://longhaisk.github.io/HTLR/
15 | BugReports: https://github.com/longhaiSK/HTLR/issues
16 | Depends: R (>= 3.6.2)
17 | Suggests: 
18 |     ggplot2,
19 |     corrplot,
20 |     testthat,
21 |     bayesplot,
22 |     knitr,
23 |     rmarkdown
24 | Imports: 
25 |     Rcpp (>= 1.0.0),
26 |     BCBCSF,
27 |     glmnet,
28 |     magrittr
29 | LinkingTo: Rcpp (>= 1.0.0), RcppArmadillo
30 | NeedsCompilation: yes
31 | LazyData: true
32 | Encoding: UTF-8
33 | RoxygenNote: 7.3.2
34 | VignetteBuilder: knitr
35 | 


--------------------------------------------------------------------------------
/man/htlr_predict.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.R
 3 | \name{htlr_predict}
 4 | \alias{htlr_predict}
 5 | \title{Make Prediction on New Data (Advanced)}
 6 | \usage{
 7 | htlr_predict(
 8 |   X_ts,
 9 |   fithtlr = NULL,
10 |   deltas = NULL,
11 |   burn = NULL,
12 |   thin = 1,
13 |   usedmc = NULL,
14 |   rep.legacy = TRUE
15 | )
16 | }
17 | \arguments{
18 | \item{X_ts}{Matrix of values at which predictions are to be made.}
19 | 
20 | \item{fithtlr}{Fitted HTLR model object.}
21 | 
22 | \item{deltas}{The values of deltas (for example true deltas) used to make prediction; 
23 | will override \code{fithtlr} if provided.}
24 | 
25 | \item{burn, thin}{\code{burn} of Markov chain (super)iterations will be discarded for prediction,
26 | and only every \code{thin} are used.}
27 | 
28 | \item{usedmc}{Indices of Markov chain iterations used for inference. 
29 | If supplied, \code{burn} and \code{thin} will be ignored.}
30 | 
31 | \item{rep.legacy}{To reproduce (actually incorrect) results in legacy version.
32 | See \url{https://github.com/longhaiSK/HTLR/issues/7}.}
33 | }
34 | \value{
35 | A matrix of predictive probabilities, with rows for cases, cols for classes.
36 | }
37 | \description{
38 | This function uses MCMC samples from fitted \code{htlrfit} object OR user supplied 
39 | regression coefficient to predict the class labels of test cases.
40 | }
41 | \keyword{internal}
42 | 


--------------------------------------------------------------------------------
/tests/testthat/test-util.r:
--------------------------------------------------------------------------------
 1 | set.seed(1234)
 2 | 
 3 | mat_80x90 <- matrix(rnorm(n = 80 * 90, mean = 100, sd = 50), 80, 90)
 4 | mat_800x900 <- matrix(rnorm(n = 800 * 900, mean = 100, sd = 50), 800, 900)
 5 | 
 6 | test_that("std works", {
 7 |   mat_80x90_nuj <- apply(mat_80x90, 2, median)
 8 |   mat_80x90_sdj <- apply(mat_80x90, 2, sd)
 9 |   mat_80x90_std_exp <- sweep(mat_80x90, 2, mat_80x90_nuj, "-")
10 |   mat_80x90_std_exp <- sweep(mat_80x90_std_exp, 2, mat_80x90_sdj, "/")  
11 |   
12 |   mat_80x90_std_act <- std(mat_80x90)
13 |   expect_equal(c(mat_80x90_std_act), c(mat_80x90_std_exp))
14 |   expect_equal(attr(mat_80x90_std_act, "center"), mat_80x90_nuj)
15 |   expect_equal(attr(mat_80x90_std_act, "scale"), mat_80x90_sdj)
16 |   expect_equal(attr(mat_80x90_std_act, "nonsingular"), 1L:ncol(mat_80x90))
17 | })
18 | 
19 | ## compute V (delta)
20 | comp_vardeltas_r <- function(deltas)
21 | {
22 |   K <- ncol(deltas)
23 |   SUMdeltas <- rowSums(deltas)
24 |   SUMsqdeltas <- rowSums(deltas^2)
25 |   SUMsqdeltas - SUMdeltas^2 / (K + 1)
26 | }
27 | 
28 | test_that("comp_vardeltas works", {
29 |   expect_equal(c(comp_vardeltas(mat_80x90)), comp_vardeltas_r(mat_80x90))
30 | })
31 | 
32 | comp_lsl_r <- function(lv)
33 | {
34 |   log_sum_exp(cbind(0, lv))
35 | }
36 | 
37 | test_that("comp_lsl works", {
38 |   expect_equal(comp_lsl(mat_80x90), comp_lsl_r(mat_80x90))
39 | })
40 | 
41 | log_normcons_r <- function(lv)
42 | {
43 |   sum(comp_lsl_r(lv))
44 | }
45 | 
46 | test_that("log_normcons works", {
47 |   expect_equal(log_normcons(mat_80x90), log_normcons_r(mat_80x90))
48 | })
49 | 


--------------------------------------------------------------------------------
/man/bcbcsf_deltas.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bcbcsf.r
 3 | \name{bcbcsf_deltas}
 4 | \alias{bcbcsf_deltas}
 5 | \title{Bias-corrected Bayesian classification initial state}
 6 | \usage{
 7 | bcbcsf_deltas(X, y, alpha = 0)
 8 | }
 9 | \arguments{
10 | \item{X}{Design matrix of traning data; 
11 | rows should be for the cases, and columns for different features.}
12 | 
13 | \item{y}{Vector of class labels in training or test data set. 
14 | Must be coded as non-negative integers, e.g., 1,2,\ldots,C for C classes.}
15 | 
16 | \item{alpha}{The regularization proportion (between 0 and 1) for mixing the 
17 | diagonal covariance estimates and the sample covariance estimated with the 
18 | training samples. The default is 0, the covariance matrix is assumed to be diagonal, 
19 | which is the most robust.}
20 | }
21 | \value{
22 | A matrix - the initial state of Markov Chain for HTLR model fitting.
23 | }
24 | \description{
25 | Generate initial Markov chain state with Bias-corrected Bayesian classification.
26 | }
27 | \details{
28 | Caveat: This method can be used only for continuous predictors such as gene expression profiles, 
29 | and it does not make sense for categorical predictors such as SNP profiles.
30 | }
31 | \references{
32 | Longhai Li (2012). Bias-corrected hierarchical Bayesian classification 
33 | with a selected subset of high-dimensional features. 
34 | \emph{Journal of the American Statistical Association}, 107(497), 120-134.
35 | }
36 | \seealso{
37 | \code{\link{lasso_deltas}}
38 | }
39 | \keyword{internal}
40 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown.yaml
13 | 
14 | permissions: read-all
15 | 
16 | jobs:
17 |   pkgdown:
18 |     runs-on: ubuntu-latest
19 |     # Only restrict concurrency for non-PR jobs
20 |     concurrency:
21 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
22 |     env:
23 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
24 |     permissions:
25 |       contents: write
26 |     steps:
27 |       - uses: actions/checkout@v4
28 | 
29 |       - uses: r-lib/actions/setup-pandoc@v2
30 | 
31 |       - uses: r-lib/actions/setup-r@v2
32 |         with:
33 |           use-public-rspm: true
34 | 
35 |       - uses: r-lib/actions/setup-r-dependencies@v2
36 |         with:
37 |           extra-packages: any::pkgdown, local::.
38 |           needs: website
39 | 
40 |       - name: Build site
41 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE, lazy = TRUE)
42 |         shell: Rscript {0}
43 | 
44 |       - name: Deploy to GitHub pages 🚀
45 |         if: github.event_name != 'pull_request'
46 |         uses: JamesIves/github-pages-deploy-action@v4.5.0
47 |         with:
48 |           clean: false
49 |           branch: gh-pages
50 |           folder: docs
51 | 


--------------------------------------------------------------------------------
/.github/workflows/lint-changed-files.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   pull_request:
 5 |     branches: [main, master]
 6 | 
 7 | name: lint-changed-files.yaml
 8 | 
 9 | permissions: read-all
10 | 
11 | jobs:
12 |   lint-changed-files:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 |     steps:
17 |       - uses: actions/checkout@v4
18 | 
19 |       - uses: r-lib/actions/setup-r@v2
20 | 
21 |       - uses: r-lib/actions/setup-r-dependencies@v2
22 |         with:
23 |           extra-packages: |
24 |             any::gh
25 |             any::lintr
26 |             any::purrr
27 |           needs: check
28 | 
29 |       - name: Add lintr options
30 |         run: |
31 |           cat('\noptions(lintr.linter_file = ".lintr")\n', file = "~/.Rprofile", append = TRUE)
32 |         shell: Rscript {0}
33 | 
34 |       - name: Install package
35 |         run: R CMD INSTALL .
36 | 
37 |       - name: Extract and lint files changed by this PR
38 |         run: |
39 |           files <- gh::gh("GET https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/files")
40 |           changed_files <- purrr::map_chr(files, "filename")
41 |           all_files <- list.files(recursive = TRUE)
42 |           exclusions_list <- as.list(setdiff(all_files, changed_files))
43 |           lintr::lint_package(exclusions = exclusions_list)
44 |         shell: Rscript {0}
45 |         env:
46 |           LINTR_ERROR_ON_LINT: true
47 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check.yaml
10 | 
11 | permissions: read-all
12 | 
13 | jobs:
14 |   R-CMD-check:
15 |     runs-on: ${{ matrix.config.os }}
16 | 
17 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
18 | 
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         config:
23 |           - {os: macos-latest,   r: 'release'}
24 |           - {os: windows-latest, r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
26 |           - {os: ubuntu-latest,   r: 'release'}
27 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
28 | 
29 |     env:
30 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
31 |       R_KEEP_PKG_SOURCE: yes
32 | 
33 |     steps:
34 |       - uses: actions/checkout@v4
35 | 
36 |       - uses: r-lib/actions/setup-pandoc@v2
37 | 
38 |       - uses: r-lib/actions/setup-r@v2
39 |         with:
40 |           r-version: ${{ matrix.config.r }}
41 |           http-user-agent: ${{ matrix.config.http-user-agent }}
42 |           use-public-rspm: true
43 | 
44 |       - uses: r-lib/actions/setup-r-dependencies@v2
45 |         with:
46 |           extra-packages: any::rcmdcheck
47 |           needs: check
48 | 
49 |       - uses: r-lib/actions/check-r-package@v2
50 |         with:
51 |           upload-snapshots: true
52 |           build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
53 | 


--------------------------------------------------------------------------------
/man/diabetes392.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{diabetes392}
 5 | \alias{diabetes392}
 6 | \title{Pima Indians Diabetes}
 7 | \format{
 8 | A list contains data matrix \code{X} and response vector \code{y}:
 9 | \describe{
10 |   \item{X}{A matrix with 392 rows (observations) and 8 columns (features).}
11 |   \item{y}{A binary vector where 1 indicates diabetes patients and 0 for otherwise.}
12 | }
13 | }
14 | \source{
15 | \url{https://www.kaggle.com/uciml/pima-indians-diabetes-database}
16 | }
17 | \usage{
18 | data("diabetes392")
19 | }
20 | \description{
21 | This dataset is originally from the National Institute of Diabetes and Digestive and Kidney Diseases.
22 | The objective of the dataset is to diagnostically predict whether or not a patient has diabetes, 
23 | based on certain diagnostic measurements included in the dataset. Several constraints were placed 
24 | on the selection of these instances from a larger database. In particular, all patients here are 
25 | females at least 21 years old of Pima Indian heritage. Different from the UCI original version, 
26 | the dataset has been preprocessed such that rows with missing values are removed, and features are scaled.
27 | }
28 | \references{
29 | Smith, J.W., Everhart, J.E., Dickson, W.C., Knowler, W.C., & Johannes, R.S. (1988). 
30 | Using the ADAP learning algorithm to forecast the onset of diabetes mellitus. 
31 | \emph{In Proceedings of the Symposium on Computer Applications and Medical Care} (pp. 261--265). 
32 | IEEE Computer Society Press.
33 | }
34 | \seealso{
35 | \url{https://avehtari.github.io/modelselection/diabetes.html}
36 | }
37 | \keyword{datasets}
38 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # HTLR 0.4-3
 2 | 
 3 | ## New Features
 4 | 
 5 | * Added option for users to keep samples of warmup iterations.
 6 | 
 7 | ## Improvements
 8 | 
 9 | * Bug fix [[#7](https://github.com/longhaiSK/HTLR/issues/7)].
10 | 
11 | # HTLR 0.4-2
12 | 
13 | ## New Features
14 | 
15 | * Added new function `std()` for feature standardization. 
16 | 
17 | * Added new dataset `diabetes392`.
18 | 
19 | ## Improvements
20 | 
21 | * `htlr()` and `predict.htlr.fit()` now handles non-matrix input, i.e. data.frame.
22 | 
23 | * Minor speed improvement on `htlr()` and `gendata_FAM()`. 
24 | 
25 | * Updated documentation of `htlr()`.
26 | 
27 | ## Note
28 | 
29 | * Changed package license from GPLv2 to GPLv3.
30 | 
31 | # HTLR 0.4-1
32 | 
33 | ## Bug Fixes
34 | 
35 | * Fixed potential memory leak issue in ARS module. 
36 | 
37 | # HTLR 0.4
38 | 
39 | ## New Features
40 | 
41 | * This is the first released version of revamped HTLR.
42 | 
43 | * The Gibbs sampling routine is completely refactored using RcppArmadillo, which leads to a significant performance gain on multi-core/distributed machines.  
44 | 
45 | * The fitted model object is registered to S3 class `htlrfit`, coming with a set of useful S3 methods `print()`, `summary()`, `predict()`, `as.matrix()`, and `nobs()`.   
46 | 
47 | * New model fitting function `htlr()` has a more accessible interface, while `htlr_fit()` and `htlr_predict()` are still keeped for the best possible backward compatibility.
48 | 
49 | * Better cohesion with `bayesplot` and other packages of RStan toolchain.
50 | 
51 | * Added new dataset `colon`. 
52 | 
53 | # HTLR 0.3
54 | 
55 | ## Note
56 | 
57 | * This is HTLR originally created by Longhai Li with legacy version number 3.1-1.
58 | 
59 | * Not compatible with macOS. 
60 | 


--------------------------------------------------------------------------------
/man/htlr_prior.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.r
 3 | \name{htlr_prior}
 4 | \alias{htlr_prior}
 5 | \title{Generate Prior Configuration}
 6 | \usage{
 7 | htlr_prior(
 8 |   ptype = c("t", "ghs", "neg"),
 9 |   df = 1,
10 |   logw = -(1/df) * 10,
11 |   eta = ifelse(df > 1, 3, 0),
12 |   sigmab0 = 2000
13 | )
14 | }
15 | \arguments{
16 | \item{ptype}{The prior to be applied to the model. Either "t" (student-t, default), "ghs" (horseshoe), 
17 | or "neg" (normal-exponential-gamma).}
18 | 
19 | \item{df}{The degree freedom (aka alpha) of t/ghs/neg prior for coefficients.}
20 | 
21 | \item{logw}{The log scale of priors for coefficients.}
22 | 
23 | \item{eta}{The \code{sd} of the normal prior for logw. When it is set to 0, logw is fixed. 
24 | Otherwise, logw is assigned with a normal prior and it will be updated during sampling.}
25 | 
26 | \item{sigmab0}{The \code{sd} of the normal prior for the intercept.}
27 | }
28 | \value{
29 | A configuration list containing \code{ptype}, \code{alpha}, \code{logw}, \code{eta}, and \code{sigmab0}.
30 | }
31 | \description{
32 | Configure prior hyper-parameters for HTLR model fitting
33 | }
34 | \details{
35 | The output is a configuration list which is to be passed to \code{prior} argument of \code{htlr}.     
36 | For naive users, you only need to specify the prior type and degree freedom, then the other hyper-parameters
37 | will be chosen automatically. For advanced users, you can supply each prior hyper-parameters by yourself.
38 | For suggestion of picking hyper-parameters, see \code{references}.
39 | }
40 | \references{
41 | Longhai Li and Weixin Yao. (2018). Fully Bayesian Logistic Regression 
42 | with Hyper-Lasso Priors for High-dimensional Feature Selection.
43 | \emph{Journal of Statistical Computation and Simulation} 2018, 88:14, 2827-2851.
44 | }
45 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | sample_trunc_norm <- function(n, lb, ub, verbose = FALSE) {
 5 |     .Call('_HTLR_sample_trunc_norm', PACKAGE = 'HTLR', n, lb, ub, verbose)
 6 | }
 7 | 
 8 | sample_post_ichi <- function(n, sigmasq, alpha1, alpha0 = 1E-5, w0 = 1E-5, verbose = FALSE) {
 9 |     .Call('_HTLR_sample_post_ichi', PACKAGE = 'HTLR', n, sigmasq, alpha1, alpha0, w0, verbose)
10 | }
11 | 
12 | sample_trunc_beta <- function(n, alpha, beta, lb = 0, ub = 1, verbose = FALSE) {
13 |     .Call('_HTLR_sample_trunc_beta', PACKAGE = 'HTLR', n, alpha, beta, lb, ub, verbose)
14 | }
15 | 
16 | htlr_fit_helper <- function(p, K, n, X, ymat, ybase, ptype, alpha, s, eta, iters_rmc, iters_h, thin, leap_L, leap_L_h, leap_step, hmc_sgmcut, deltas, sigmasbt, keep_warmup_hist, silence, legacy) {
17 |     .Call('_HTLR_htlr_fit_helper', PACKAGE = 'HTLR', p, K, n, X, ymat, ybase, ptype, alpha, s, eta, iters_rmc, iters_h, thin, leap_L, leap_L_h, leap_step, hmc_sgmcut, deltas, sigmasbt, keep_warmup_hist, silence, legacy)
18 | }
19 | 
20 | log_sum_exp <- function(A) {
21 |     .Call('_HTLR_log_sum_exp', PACKAGE = 'HTLR', A)
22 | }
23 | 
24 | spl_sgm_ig <- function(alpha, K, w, vardeltas) {
25 |     .Call('_HTLR_spl_sgm_ig', PACKAGE = 'HTLR', alpha, K, w, vardeltas)
26 | }
27 | 
28 | std_helper <- function(A) {
29 |     .Call('_HTLR_std_helper', PACKAGE = 'HTLR', A)
30 | }
31 | 
32 | comp_vardeltas <- function(deltas) {
33 |     .Call('_HTLR_comp_vardeltas', PACKAGE = 'HTLR', deltas)
34 | }
35 | 
36 | comp_lsl <- function(A) {
37 |     .Call('_HTLR_comp_lsl', PACKAGE = 'HTLR', A)
38 | }
39 | 
40 | log_normcons <- function(A) {
41 |     .Call('_HTLR_log_normcons', PACKAGE = 'HTLR', A)
42 | }
43 | 
44 | gendata_FAM_helper <- function(n, muj, muj_rep, A, sd_g, stdx) {
45 |     .Call('_HTLR_gendata_FAM_helper', PACKAGE = 'HTLR', n, muj, muj_rep, A, sd_g, stdx)
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/man/std.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/std.R
 3 | \name{std}
 4 | \alias{std}
 5 | \title{Standardizes a Design Matrix}
 6 | \usage{
 7 | std(X, tol = 1e-06)
 8 | }
 9 | \arguments{
10 | \item{X}{Design matrix, of dimension \code{nobs} by \code{nvars}; each row is an observation vector; 
11 | can also be an object that can be coerced to a matrix, e.g. a data.frame.}
12 | 
13 | \item{tol}{The tolerance value; a column of \code{X} is considered as singular if the \code{sd}
14 | of its entries (observations) is less than \code{tol}. Singular columns will be dropped by the end.}
15 | }
16 | \value{
17 | The standardized design matrix with the following attributes:
18 | \describe{
19 |   \item{nonsingular}{Indices of non-singular columns.}
20 |   \item{center}{Median of each non-singular column which is used for standardization.}
21 |   \item{scale}{Standard deviation of each non-singular column which is used for standardization.}
22 | }
23 | }
24 | \description{
25 | This function accepts a design matrix and returns a standardized version of that matrix, 
26 | the statistics of each column such as \code{median} and \code{sd} are also provided.
27 | }
28 | \details{
29 | For each column of \code{X}, the standardization is done by first subtracting its median, 
30 | then dividing by its sample standard deviation, while the original version in \code{ncvreg} uses
31 | mean and population standard deviation. Its speed is slower than \code{ncvreg} because of the
32 | complexity of median finding, but still substantially faster than \code{scale()} provided by R base.
33 | }
34 | \examples{
35 | set.seed(123)
36 | mat <- matrix(rnorm(n = 80 * 90, mean = 100, sd = 50), 80, 90)
37 | mat \%>\% as.numeric() \%>\% ggplot2::qplot(bins = 30, xlab = '')
38 | mat \%>\% std() \%>\% as.numeric() \%>\% ggplot2::qplot(bins = 30, xlab = '')
39 |  
40 | }
41 | \seealso{
42 | \url{http://pbreheny.github.io/ncvreg/reference/std.html}
43 | }
44 | \author{
45 | Patrick Breheny (original) \cr Steven Liu (modification)
46 | }
47 | 


--------------------------------------------------------------------------------
/man/gendata_FAM.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gendata.r
 3 | \name{gendata_FAM}
 4 | \alias{gendata_FAM}
 5 | \title{Generate Simulated Data with Factor Analysis Model}
 6 | \usage{
 7 | gendata_FAM(n, muj, A, sd_g = 0, stdx = FALSE)
 8 | }
 9 | \arguments{
10 | \item{n}{Number of observations.}
11 | 
12 | \item{muj}{C by p matrix, with row c representing y = c, and column j representing \eqn{x_j}.
13 | Used to specify \code{y}.}
14 | 
15 | \item{A}{Factor loading matrix of size p by p, see details.}
16 | 
17 | \item{sd_g}{Numeric value indicating noise level \eqn{\delta}, see details.}
18 | 
19 | \item{stdx}{Logical; if \code{TRUE}, data \code{X} is standardized to have \code{mean = 0} and \code{sd = 1}.}
20 | }
21 | \value{
22 | A list contains input matrix \code{X}, response variables \code{y},
23 | covariate matrix \code{SGM} and \code{muj} (standardized if \code{stdx = TRUE}).
24 | }
25 | \description{
26 | This function generates inputs \code{X} given by the response variable \code{y} 
27 | using a multivariate normal model.
28 | }
29 | \details{
30 | The means of each covariate \eqn{x_j} depend on \code{y} specified by the 
31 | matrix \code{muj}; the covariate matrix \eqn{\Sigma} of the multivariate normal 
32 | is equal to \eqn{AA^t\delta^2I}, where \code{A} is the factor loading matrix
33 | and \eqn{\delta} is the noise level.
34 | }
35 | \examples{
36 | ## feature #1: marginally related feature
37 | ## feature #2: marginally unrelated feature, but feature #2 is correlated with feature #1
38 | ## feature #3-5: marginally related features and also internally correlated
39 | ## feature #6-10: noise features without relationship with the y
40 | 
41 | set.seed(12345)
42 | n <- 100
43 | p <- 10
44 | 
45 | means <- rbind(
46 |   c(0, 1, 0),
47 |   c(0, 0, 0),
48 |   c(0, 0, 1),
49 |   c(0, 0, 1),
50 |   c(0, 0, 1)
51 | ) * 2
52 | 
53 | means <- rbind(means, matrix(0, p - 5, 3))
54 | 
55 | A <- diag(1, p)
56 | A[1:5, 1:3] <- rbind(
57 |   c(1, 0, 0),
58 |   c(2, 1, 0),
59 |   c(0, 0, 1),
60 |   c(0, 0, 1),
61 |   c(0, 0, 1)
62 | )
63 | 
64 | dat <- gendata_FAM(n, means, A, sd_g = 0.5, stdx = TRUE)
65 | ggplot2::qplot(dat$y, bins = 6)
66 | corrplot::corrplot(cor(dat$X))
67 | 
68 | }
69 | \seealso{
70 | \code{\link{gendata_MLR}}
71 | }
72 | 


--------------------------------------------------------------------------------
/src/sampler.cpp:
--------------------------------------------------------------------------------
 1 | #include "sampler.h"
 2 | 
 3 | SamplerSgm::SamplerSgm(int p, const arma::vec &vardeltas, int K, double alpha, double log_aw)
 4 |     : p_(p), K_(K), alpha_(alpha), log_aw_(log_aw), vardeltas_(vardeltas)
 5 | {
 6 | }
 7 | 
 8 | void SamplerSgm::set_idx(int i)
 9 | {
10 |   idx_ = i;
11 | }
12 | 
13 | SamplerSgmNeg::SamplerSgmNeg(int p, const arma::vec &vardeltas, int K, double alpha, double log_aw)
14 |     : SamplerSgm(p, vardeltas, K, alpha, log_aw)
15 | {
16 | }
17 | 
18 | void SamplerSgmNeg::eval_logf(const double x, double &logf, double &dlogf)
19 | {
20 |   logf = -(K_ / 2.0 - 1.0) * x;
21 |   dlogf = -(K_ / 2.0 - 1.0);
22 | 
23 |   double vexi = vardeltas_[idx_] / 2.0 / exp(x);
24 |   logf -= vexi;
25 |   dlogf += vexi;
26 | 
27 |   double eximinlogaw = exp(x - log_aw_);
28 |   logf -= (alpha_ / 2.0 + 1.0) * log(1.0 + 2.0 * eximinlogaw);
29 |   dlogf -= (alpha_ + 2.0) * eximinlogaw / (1.0 + 2.0 * eximinlogaw);
30 | }
31 | 
32 | SamplerSgmGhs::SamplerSgmGhs(int p, const arma::vec &vardeltas, int K, double alpha, double log_aw)
33 |     : SamplerSgm(p, vardeltas, K, alpha, log_aw)
34 | {
35 | }
36 | 
37 | void SamplerSgmGhs::eval_logf(const double x, double &logf, double &dlogf)
38 | {
39 |   logf = -(K_ - 1.0) / 2.0 * x;
40 |   dlogf = -(K_ - 1.0) / 2.0;
41 | 
42 |   double vexi = vardeltas_[idx_] / 2.0 / exp(x);
43 |   logf -= vexi;
44 |   dlogf += vexi;
45 | 
46 |   double eximinlogaw = exp(x - log_aw_);
47 |   logf -= (alpha_ + 1.0) / 2.0 * log(1.0 + eximinlogaw);
48 |   dlogf -= (alpha_ + 1.0) / 2.0 * eximinlogaw / (1.0 + eximinlogaw);
49 | }
50 | 
51 | SamplerLogw::SamplerLogw(int p, const arma::vec &vardeltas, int K,
52 |                          double nu, double s, double eta)
53 |     : p_(p), K_(K), nu_(nu), s_(s), eta_(eta), vardeltas_(vardeltas)
54 | {
55 | }
56 | 
57 | void SamplerLogw::eval_logf(const double x, double &logf, double &dlogf)
58 | {
59 |   double w = exp(x);
60 |   double sdu = (x - s_) / eta_;
61 |   double wnu = w * nu_;
62 | 
63 |   dlogf = arma::accu(wnu / (vardeltas_ + wnu));
64 |   logf = arma::accu(arma::log(vardeltas_ + wnu));
65 | 
66 |   logf *= (-(nu_ + K_) / 2);
67 |   dlogf *= (-(nu_ + K_) / 2);
68 |   logf += p_ * nu_ / 2 * x;
69 |   dlogf += p_ * nu_ / 2;
70 |   logf += -R_pow_di(sdu, 2) / 2 - log(eta_);
71 |   dlogf += -sdu / eta_;
72 | }
73 | 


--------------------------------------------------------------------------------
/R/order.r:
--------------------------------------------------------------------------------
 1 | #' Order features by Kruskal-Wallis test
 2 | #'
 3 | #' This function orders all features in terms of Kruskal-Wallis test p-value.
 4 | #' 
 5 | #' @param X Input matrix, of dimension \code{nobs} by \code{nvars}; each row is an observation vector.
 6 | #' 
 7 | #' @param y Vector of response variables.
 8 | #' 
 9 | #' @return Order of all features of length \code{nvars}.
10 | #' 
11 | #' @export
12 | #' 
13 | #' @keywords internal
14 | #'
15 | #' @examples 
16 | #' data("diabetes392")
17 | #' order_kruskal(diabetes392$X, diabetes392$y)
18 | #' 
19 | order_kruskal <- function(X, y)
20 | {
21 |   pv_kt <- function(x, g) { kruskal.test(x, g)$p.value }
22 |   order(apply(X, 2, pv_kt, g = y))
23 | }
24 | 
25 | #' Plain order function
26 | #'
27 | #' A placeholder order function that returns the original order of given features.
28 | #' 
29 | #' @param X Input matrix, of dimension \code{nobs} by \code{nvars}; each row is an observation vector.
30 | #' 
31 | #' @param y Vector of response variables.
32 | #' 
33 | #' @return Sequence starting from 1 to \code{nvars}.
34 | #' 
35 | #' @export
36 | #' 
37 | #' @keywords internal
38 | #' 
39 | order_plain <- function(X, y) { 1L:ncol(X) }
40 | 
41 | #' Order features by F-statistic
42 | #'
43 | #' This function orders all features in terms of ANOVA F-statistic.
44 | #' 
45 | #' @param X Input matrix, of dimension \code{nobs} by \code{nvars}; each row is an observation vector.
46 | #' 
47 | #' @param y Vector of response variables.
48 | #' 
49 | #' @return Order of all features of length \code{nvars}.
50 | #' 
51 | #' @export
52 | #' 
53 | #' @keywords internal
54 | #'
55 | #' @examples 
56 | #' data("diabetes392")
57 | #' order_ftest(diabetes392$X, diabetes392$y)
58 | #' 
59 | order_ftest <- function(X, y) { order(panova(X, y)) }
60 | 
61 | # Helper function for order_ftest() 
62 | panova <- function(X, y)
63 | {
64 |   n <- length(y)
65 |   nos_g <- as.vector(table(y))
66 |   G <- length(nos_g)
67 |   
68 |   gsum_X <- rowsum(X, y)
69 |   sum_X2 <- colSums(X ^ 2)
70 |   sum_gsum2 <- colSums(gsum_X ^ 2 / nos_g)
71 |   
72 |   pvars <- (sum_X2 - sum_gsum2) / (n - G) # pooled variances
73 |   
74 |   sum_X <- colSums (X)
75 |   gvars <- (sum_gsum2 - sum_X ^ 2 / n) / (G - 1) # variances btw groups
76 |   # F-statistic
77 |   fstats <- gvars / pvars
78 |   
79 |   pvalues <- 1 - pf(fstats, df1 = G - 1, df2 = n - G, ncp = 0)
80 |   pvalues
81 | }
82 | 


--------------------------------------------------------------------------------
/src/ars.h:
--------------------------------------------------------------------------------
 1 | #ifndef ARS_H
 2 | #define ARS_H 
 3 | 
 4 | #include <Rcpp.h>
 5 | 
 6 | // An interface of sampling target, to be extended by users.
 7 | class SampleTarget 
 8 | { 
 9 |   public: 
10 |   virtual void eval_logf(const double x, double &logf, double &dlogf) = 0;
11 |   virtual ~SampleTarget() = default; 
12 | };
13 | 
14 | // Main ARS class
15 | class ARS
16 | {
17 |   private:
18 | 
19 |   const int n_; // number of samples
20 |   
21 |   const double lb_, ub_;     // lower and upper bounds of logf
22 |   //const double ini_tpoint; // initial tangent point
23 |   const bool verbose_;       // flag whether to print ARS information
24 | 
25 |   /* optional User Settings */
26 | 
27 |   const int max_nhull_; // maximum number of pieces of linear hulls
28 |   double stepout_;      // size of stepout in initializing linear hulls
29 | 
30 |   // the smallest difference of derivatives that can be thought of as the same
31 |   // so that we don't need to insert one more hull
32 |   double tol_dlogf_is0_, tol_ddlogf_is0_;
33 | 
34 |   /* global working vectors used to represent linear hulls */
35 |   
36 |   double 
37 |     *tpoints_,                        // tangent points 
38 |     *lws_,                            // log integrals of exp hulls
39 |     *lowerbounds_, *upperbounds_,     // bounds of hulls 
40 |     *logfvs_, *dlogfvs_,              // values of logf and dlogf at tangent points
41 |     *slopes_leftsq_, *slopes_ritesq_; // slopes of left and right squeezings
42 | 
43 |   int *lefthulls_, *ritehulls_;    // indice of left and right hulls
44 |   int no_hulls_;                   // total number of hulls 
45 | 
46 |   double newx_, newlogf_, newdlogf_; // a new tangent point to be inserted
47 |   int h_;                            // index of the hull where newx is from
48 | 
49 |   // the sampling target object which provides function for evaluating logf and dlogf  
50 |   SampleTarget *target_;
51 | 
52 |   void update_hulls(const int h, const double newx, const double logfv, const double dlogfv);
53 |   double eval_upperhull(const int h, const double newx);
54 |   double eval_lowerhull(const int h, const double newx);
55 |   void Initialize();
56 | 
57 |   public:
58 | 
59 |   ARS(int n, SampleTarget *target, double ini_tpoint,
60 |       double lb = R_NegInf, double ub = R_PosInf,
61 |       bool verbose = false, int max_nhull = 1000, double stepout = 10,
62 |       double tol_dlogf_is0 = 1E-5, double tol_ddlogf_is0 = 1E-5);
63 | 
64 |   ~ARS();
65 | 
66 |   Rcpp::NumericVector Sample(); 
67 | 
68 | };
69 | 
70 | #endif 
71 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' Colon Tissues
 2 | #'
 3 | #' In this dataset, expression levels of 40 tumor and 22 normal colon tissues 
 4 | #' for 6500 human genes are measured using the Affymetrix technology. 
 5 | #' A selection of 2000 genes with highest minimal intensity across the samples
 6 | #' has been made by Alon et al. (1999). The data is preprocessed by carrying out 
 7 | #' a base 10 logarithmic transformation and standardizing each tissue sample to 
 8 | #' zero mean and unit variance across the genes.
 9 | #'
10 | #' @docType data
11 | #' 
12 | #' @keywords datasets
13 | #'
14 | #' @format A list contains data matrix \code{X} and response vector \code{y}:
15 | #' \describe{
16 | #'   \item{X}{A matrix with 66 rows (observations) and 2000 columns (features).}
17 | #'   \item{y}{A binary vector where 0 indicates normal colon tissues and 1 indicates tumor colon tissues.}
18 | #' }
19 | #' 
20 | #' @usage data("colon")
21 | #' 
22 | #' @references Dettling Marcel, and Peter Bühlmann (2002). Supervised clustering of genes.
23 | #' \emph{Genome biology}, 3(12), research0069-1.
24 | #' 
25 | "colon"
26 | 
27 | #' Pima Indians Diabetes
28 | #'
29 | #' This dataset is originally from the National Institute of Diabetes and Digestive and Kidney Diseases.
30 | #' The objective of the dataset is to diagnostically predict whether or not a patient has diabetes, 
31 | #' based on certain diagnostic measurements included in the dataset. Several constraints were placed 
32 | #' on the selection of these instances from a larger database. In particular, all patients here are 
33 | #' females at least 21 years old of Pima Indian heritage. Different from the UCI original version, 
34 | #' the dataset has been preprocessed such that rows with missing values are removed, and features are scaled.
35 | #'
36 | #' @docType data
37 | #' 
38 | #' @keywords datasets
39 | #'
40 | #' @format A list contains data matrix \code{X} and response vector \code{y}:
41 | #' \describe{
42 | #'   \item{X}{A matrix with 392 rows (observations) and 8 columns (features).}
43 | #'   \item{y}{A binary vector where 1 indicates diabetes patients and 0 for otherwise.}
44 | #' }
45 | #' 
46 | #' @usage data("diabetes392")
47 | #' 
48 | #' @source \url{https://www.kaggle.com/uciml/pima-indians-diabetes-database}
49 | #' 
50 | #' @seealso \url{https://avehtari.github.io/modelselection/diabetes.html}
51 | #' 
52 | #' @references Smith, J.W., Everhart, J.E., Dickson, W.C., Knowler, W.C., & Johannes, R.S. (1988). 
53 | #' Using the ADAP learning algorithm to forecast the onset of diabetes mellitus. 
54 | #' \emph{In Proceedings of the Symposium on Computer Applications and Medical Care} (pp. 261--265). 
55 | #' IEEE Computer Society Press.
56 | #' 
57 | "diabetes392"
58 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 3 | 
 4 | ## HTLR: Bayesian Logistic Regression with Heavy-tailed Priors
 5 | 
 6 | <!-- badges: start -->
 7 | 
 8 | [![CRAN
 9 | status](https://www.r-pkg.org/badges/version/HTLR)](https://CRAN.R-project.org/package=HTLR)
10 | [![build](https://github.com/longhaiSK/HTLR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/longhaiSK/HTLR/actions/workflows/R-CMD-check.yaml)
11 | [![Lifecycle:
12 | stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable)
13 | [![downloads](https://cranlogs.r-pkg.org/badges/grand-total/HTLR)](https://cran.r-project.org/package=HTLR)
14 | 
15 | <!-- badges: end -->
16 | 
17 | *HTLR* performs classification and feature selection by fitting Bayesian
18 | polychotomous (multiclass, multinomial) logistic regression models based
19 | on heavy-tailed priors with small degree freedom. This package is
20 | suitable for classification with high-dimensional features, such as gene
21 | expression profiles. Heavy-tailed priors can impose stronger shrinkage
22 | (compared to Guassian and Laplace priors) to the coefficients associated
23 | with a large number of useless features, but still allow coefficients of
24 | a small number of useful features to stand out with little punishment.
25 | Heavy-tailed priors can also automatically make selection within a large
26 | number of correlated features. The posterior of coefficients and
27 | hyperparameters is sampled with resitricted Gibbs sampling for
28 | leveraging high-dimensionality and Hamiltonian Monte Carlo for handling
29 | high-correlations among coefficients.
30 | 
31 | ## Installation
32 | 
33 | [CRAN](https://CRAN.R-project.org) version (recommended):
34 | 
35 | ``` r
36 | install.packages("HTLR")
37 | ```
38 | 
39 | Development version on [GitHub](https://github.com/):
40 | 
41 | ``` r
42 | # install.packages("devtools")
43 | devtools::install_github("longhaiSK/HTLR")
44 | ```
45 | 
46 | This package uses library [Armadillo](https://arma.sourceforge.net/) for
47 | carrying out most of matrix operations, you may get speed benefits from
48 | using an alternative BLAS library such as
49 | [ATLAS](https://math-atlas.sourceforge.net/),
50 | [OpenBLAS](https://www.openblas.net/) or Intel MKL. Check out this
51 | [post](https://brettklamer.com/diversions/statistical/faster-blas-in-r/)
52 | for the comparison and the installation guide.
53 | 
54 | ## Reference
55 | 
56 | Longhai Li and Weixin Yao (2018). Fully Bayesian Logistic Regression
57 | with Hyper-Lasso Priors for High-dimensional Feature Selection. 2018,
58 | 88:14, 2827-2851, [the published
59 | version](https://www.tandfonline.com/doi/full/10.1080/00949655.2018.1490418),
60 | or [arXiv version](https://arxiv.org/pdf/1405.3319.pdf).
61 | 


--------------------------------------------------------------------------------
/src/utils.cpp:
--------------------------------------------------------------------------------
  1 | #include "utils.h"
  2 | 
  3 | double log_sum_exp(const arma::vec &a)
  4 | {  
  5 |   double m = a.max();
  6 |   return log(accu(arma::exp(a - m))) + m;
  7 | }
  8 | 
  9 | // [[Rcpp::export]]
 10 | arma::vec log_sum_exp(const arma::mat &A)
 11 | {
 12 |   arma::colvec m = arma::max(A, 1);
 13 |   return 
 14 |     arma::log(
 15 |       row_sum(
 16 |         arma::exp(
 17 |           A.each_col() - m
 18 |         )
 19 |       )
 20 |     ) + m;
 21 | }
 22 | 
 23 | arma::mat find_normlv(const arma::mat &lv)
 24 | {  
 25 |   return lv.each_col() - log_sum_exp(lv);
 26 | }
 27 | 
 28 | // [[Rcpp::export]]
 29 | arma::vec spl_sgm_ig(double alpha, int K, double w, const arma::vec &vardeltas)
 30 | {
 31 |   arma::vec rn_gamma = Rcpp::rgamma(vardeltas.n_elem, (alpha + K) / 2); 
 32 |   return (1.0 / rn_gamma % (alpha * w + vardeltas) / 2.0);
 33 | }
 34 | 
 35 | // [[Rcpp::export]]
 36 | Rcpp::List std_helper(const arma::mat &A)
 37 | {
 38 |   arma::rowvec nuj = arma::median(A);
 39 |   arma::rowvec sdj = arma::stddev(A);
 40 |   arma::mat A_std = A.each_row() - nuj;
 41 |   A_std.each_row() /= sdj;
 42 | 
 43 |   return Rcpp::List::create(
 44 |     Rcpp::Named("median") = nuj,
 45 |     Rcpp::Named("sd") = sdj,
 46 |     Rcpp::Named("X") = A_std
 47 |   );
 48 | }
 49 | 
 50 | // [[Rcpp::export]]
 51 | arma::vec comp_vardeltas(const arma::mat &deltas)
 52 | {
 53 |   arma::vec sum_deltas = row_sum(deltas);
 54 |   arma::vec sum_sq_deltas = row_sum(arma::square(deltas));
 55 |   return sum_sq_deltas - (arma::square(sum_deltas) / (deltas.n_cols + 1));
 56 | }
 57 | 
 58 | // [[Rcpp::export]]
 59 | arma::vec comp_lsl(arma::mat &A)
 60 | {
 61 |   A.insert_cols(0, 1);
 62 |   return log_sum_exp(A);
 63 | }
 64 | 
 65 | // [[Rcpp::export]]
 66 | double log_normcons(arma::mat &A)
 67 | {
 68 |   return arma::accu(comp_lsl(A));
 69 | }
 70 | 
 71 | // [[Rcpp::export]]
 72 | Rcpp::List gendata_FAM_helper(int n, arma::mat &muj, const arma::mat &muj_rep, const arma::mat &A, double sd_g, bool stdx)
 73 | {
 74 |   int p = muj.n_rows;
 75 |   // int c = muj.n_cols;
 76 |   int k = A.n_cols;
 77 | 
 78 |   arma::vec rn_nk = Rcpp::rnorm(n * k);
 79 |   arma::mat KN = arma::reshape(rn_nk, k, n); 
 80 |   arma::mat X = A * KN + muj_rep; 
 81 | 
 82 |   arma::vec rn_np = Rcpp::rnorm(n * p); 
 83 |   arma::mat NP = arma::reshape(rn_np, arma::size(X));
 84 |   X += NP * sd_g; 
 85 | 
 86 |   arma::mat SGM = A * A.t() + arma::eye(p, p) * R_pow_di(sd_g, 2);
 87 |   
 88 |   if (stdx)
 89 |   {
 90 |     arma::vec mux = arma::mean(muj, 1);
 91 |     arma::vec sdx = arma::sqrt(SGM.diag() + arma::var(muj, 1, 1));
 92 |     muj.each_col() -= mux;
 93 |     muj.each_col() /= sdx;
 94 |     X.each_col() -= mux;
 95 |     X.each_col() /= sdx;
 96 |     SGM.each_col() /= sdx;
 97 |     SGM.each_row() /= sdx.t();
 98 |   }
 99 | 
100 |   return Rcpp::List::create(
101 |     Rcpp::Named("X") = X.t(),
102 |     Rcpp::Named("muj") = muj,
103 |     Rcpp::Named("SGM") = SGM
104 |   );
105 | }
106 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | 
 5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 6 | 
 7 | ```{r, include = FALSE}
 8 | knitr::opts_chunk$set(
 9 |   collapse = TRUE,
10 |   comment = "#>",
11 |   fig.path = "man/figures/README-",
12 |   out.width = "100%"
13 | )
14 | ```
15 | ## HTLR: Bayesian Logistic Regression with Heavy-tailed Priors
16 | 
17 | <!-- badges: start -->
18 | [![CRAN status](https://www.r-pkg.org/badges/version/HTLR)](https://CRAN.R-project.org/package=HTLR)
19 | [![build](https://github.com/longhaiSK/HTLR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/longhaiSK/HTLR/actions/workflows/R-CMD-check.yaml)
20 | [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable)
21 | [![downloads](https://cranlogs.r-pkg.org/badges/grand-total/HTLR)](https://cran.r-project.org/package=HTLR)
22 | 
23 | <!-- badges: end -->
24 | 
25 | *HTLR* performs classification and feature selection by fitting Bayesian polychotomous (multiclass, multinomial) logistic regression models based on heavy-tailed priors with small degree freedom. This package is suitable for classification with high-dimensional features, such as gene expression profiles. Heavy-tailed priors can impose stronger shrinkage (compared to Guassian and Laplace priors) to the coefficients associated with a large number of useless features, but still allow coefficients of a small number of useful features to stand out with little punishment. Heavy-tailed priors can also automatically make selection within a large number of correlated features. The posterior of coefficients and hyperparameters is sampled with resitricted Gibbs sampling for leveraging high-dimensionality and Hamiltonian Monte Carlo for handling high-correlations among coefficients. 
26 | 
27 | ## Installation
28 | 
29 | [CRAN](https://CRAN.R-project.org) version (recommended):
30 | 
31 | ``` r
32 | install.packages("HTLR")
33 | ```
34 | 
35 | Development version on [GitHub](https://github.com/):
36 | 
37 | ``` r
38 | # install.packages("devtools")
39 | devtools::install_github("longhaiSK/HTLR")
40 | 
41 | ```
42 | 
43 | This package uses library [Armadillo](https://arma.sourceforge.net/) for carrying out most of matrix operations, you may get speed benefits from using an alternative BLAS library such as [ATLAS](https://math-atlas.sourceforge.net/), [OpenBLAS](https://www.openblas.net/) or Intel MKL. Check out this [post](https://brettklamer.com/diversions/statistical/faster-blas-in-r/) for the comparison and the installation guide.
44 | 
45 | ## Reference
46 | 
47 | Longhai Li and Weixin Yao (2018). Fully Bayesian Logistic Regression with Hyper-Lasso Priors for High-dimensional Feature Selection. \emph{Journal of Statistical Computation and Simulation} 2018, 88:14, 2827-2851, [the published version](https://www.tandfonline.com/doi/full/10.1080/00949655.2018.1490418), or [arXiv version](https://arxiv.org/pdf/1405.3319.pdf).
48 | 


--------------------------------------------------------------------------------
/src/gibbs.h:
--------------------------------------------------------------------------------
  1 | #ifndef GIBBS_H
  2 | #define GIBBS_H 
  3 | #define ARMA_NO_DEBUG
  4 | 
  5 | #include "RcppArmadillo.h"
  6 | #include "utils.h"
  7 | #include "sampler.h"
  8 | 
  9 | class Fit 
 10 | {
 11 |   private:
 12 | 
 13 |   // data
 14 |   const int p_, K_, C_, n_;
 15 |   const arma::mat X_; 
 16 |   const arma::mat ymat_; 
 17 |   const arma::uvec ybase_;
 18 | 
 19 |   // prior
 20 |   const std::string ptype_; 
 21 |   const double alpha_, s_, eta_;
 22 | 
 23 |   // sampling
 24 |   const int iters_rmc_, iters_h_, thin_, leap_L_, leap_L_h_; 
 25 |   const double leap_step_; 
 26 |   const double sgmsq_cut_;
 27 | 
 28 |   // stepsize for HMC
 29 |   const arma::rowvec DDNloglike_;
 30 | 
 31 |   // fit result
 32 |   arma::cube mc_deltas_;
 33 |   arma::mat mc_sigmasbt_, mc_var_deltas_;
 34 |   arma::vec mc_logw_, mc_loglike_, mc_uvar_, mc_hmcrej_;
 35 | 
 36 |   // other control or result
 37 |   const bool keep_warmup_hist_;
 38 |   const int silence_;
 39 |   const bool legacy_;
 40 | 
 41 |   // internal
 42 |   const int nvar_;
 43 |   double logw_;
 44 |   int nuvar_, nfvar_;
 45 |   arma::uvec ids_update_, ids_fix_, iup_;
 46 | 
 47 |   arma::mat 
 48 |       lv_, lv_old_, lv_fix_, norm_lv_,
 49 |       pred_prob_, pred_prob_old_,
 50 |       DNloglike_, DNloglike_old_, 
 51 |       deltas_, deltas_old_, momt_,
 52 |       DNlogprior_, DNlogprior_old_, DNlogpost_;  
 53 |   
 54 |   arma::vec 
 55 |       sumsq_deltas_, sumsq_deltas_old_,
 56 |       sum_deltas_, sum_deltas_old_,
 57 |       var_deltas_, var_deltas_old_,
 58 |       step_sizes_, sigmasbt_; 
 59 |   double loglike_, loglike_old_;
 60 |  
 61 |   arma::uvec GetIdsUpdate() {return iup_;}
 62 |   arma::uvec GetIdsFix() {return ids_fix_.head(nfvar_);}
 63 |   
 64 |   void WhichUpdate(bool init = false); 
 65 |   void DetachFixlv();
 66 |   void UpdatePredProb();
 67 |   void UpdateDNlogLike();
 68 |   void UpdateLogLike();
 69 |   void UpdateDNlogPrior();
 70 |   void UpdateDNlogPost();
 71 |   void UpdateMomtAndDeltas();
 72 |   void UpdateVarDeltas();
 73 |   void UpdateSigmas();
 74 |   void UpdateSigmasT();
 75 |   void UpdateSigmasSgm(SamplerSgm *target);
 76 |   void UpdateSigmasNeg();
 77 |   void UpdateSigmasGhs();
 78 |   void UpdateLogw();
 79 |   double CompNegEnergy();
 80 |   void GenMomt();
 81 |   void MoveMomt();
 82 |   void UpdateStepSizes();
 83 |   void CacheOldValues();
 84 |   void RestoreOldValues();
 85 |   bool IsFault(double cri = 20);
 86 |   void Initialize();
 87 |   void Traject(int i_mc);
 88 | 
 89 |   public:
 90 | 
 91 |   Fit(int p, int K, int n,
 92 |       const arma::mat &X, const arma::mat &ymat, const arma::uvec &ybase,
 93 |       std::string ptype, double alpha, double s, double eta,
 94 |       int iters_rmc, int iters_h, int thin, 
 95 |       int leap_L, int leap_L_h, double leap_step,
 96 |       double hmc_sgmcut, const arma::mat &deltas, const arma::vec &sigmasbt,
 97 |       bool keep_warmup_hist, int silence, bool legacy);
 98 | 
 99 |   void StartSampling();
100 |   Rcpp::List OutputR(); 
101 | 
102 | };
103 | 
104 | #endif
105 | 


--------------------------------------------------------------------------------
/R/std.R:
--------------------------------------------------------------------------------
 1 | #################################################### COPYLEFT NOTICE ####################################################
 2 | ## This is a derivative work based on https://github.com/pbreheny/ncvreg/blob/master/R/std.R under GPLv3 licence.########
 3 | ## Author of original version: Patrick Breheny, modified by Steven Liu. For more information see details section below.##
 4 | #########################################################################################################################
 5 | 
 6 | #' Standardizes a Design Matrix
 7 | #' 
 8 | #' This function accepts a design matrix and returns a standardized version of that matrix, 
 9 | #' the statistics of each column such as \code{median} and \code{sd} are also provided. 
10 | #' 
11 | #' @param X Design matrix, of dimension \code{nobs} by \code{nvars}; each row is an observation vector; 
12 | #' can also be an object that can be coerced to a matrix, e.g. a data.frame.
13 | #' 
14 | #' @param tol The tolerance value; a column of \code{X} is considered as singular if the \code{sd}
15 | #' of its entries (observations) is less than \code{tol}. Singular columns will be dropped by the end.
16 | #' 
17 | #' @return The standardized design matrix with the following attributes:
18 | #' \describe{
19 | #'   \item{nonsingular}{Indices of non-singular columns.}
20 | #'   \item{center}{Median of each non-singular column which is used for standardization.}
21 | #'   \item{scale}{Standard deviation of each non-singular column which is used for standardization.}
22 | #' }
23 | #' 
24 | #' @details For each column of \code{X}, the standardization is done by first subtracting its median, 
25 | #' then dividing by its sample standard deviation, while the original version in \code{ncvreg} uses
26 | #' mean and population standard deviation. Its speed is slower than \code{ncvreg} because of the
27 | #' complexity of median finding, but still substantially faster than \code{scale()} provided by R base.
28 | #' 
29 | #' @author Patrick Breheny (original) \cr Steven Liu (modification)
30 | #' 
31 | #' @seealso \url{http://pbreheny.github.io/ncvreg/reference/std.html} 
32 | #' 
33 | #' @export
34 | #' 
35 | #' @examples 
36 | #' set.seed(123)
37 | #' mat <- matrix(rnorm(n = 80 * 90, mean = 100, sd = 50), 80, 90)
38 | #' mat %>% as.numeric() %>% ggplot2::qplot(bins = 30, xlab = '')
39 | #' mat %>% std() %>% as.numeric() %>% ggplot2::qplot(bins = 30, xlab = '')
40 | #'  
41 | std <- function(X, tol = 1e-6)
42 | {
43 |   if (!is.matrix(X)) 
44 |   {
45 |     tmp <- try(X <- model.matrix(~0+., data = X), silent=TRUE)
46 |     if (class(tmp)[1] == "try-error") 
47 |       stop("X must be a matrix or able to be coerced to a matrix")
48 |   }
49 |   # checking NAs
50 |   if (anyNA(X)) stop("data contains NA(s)")
51 |   # calling std_helper() at src/utils.cpp
52 |   std.info <- std_helper(X)
53 |   # copying dimnames
54 |   dimnames(std.info$X) <- dimnames(X)
55 |   # checking singular columns
56 |   vars.ns <- which(std.info$sd > tol)
57 |   
58 |   val <- std.info$X[, vars.ns, drop = FALSE]
59 |   if (length(vars.ns) < ncol(X)) 
60 |   {
61 |     message(
62 |       sprintf(
63 |         "dropped %d singular column(s)",
64 |         ncol(X) - length(vars.ns)
65 |       )
66 |     )
67 |   }
68 |   
69 |   attr(val, "nonsingular") <- vars.ns
70 |   attr(val, "center") <- std.info$median[, vars.ns]
71 |   attr(val, "scale") <- std.info$sd[, vars.ns]
72 |   return(val)
73 | }
74 | 


--------------------------------------------------------------------------------
/src/ars_demo.cpp:
--------------------------------------------------------------------------------
  1 | #include "ars.h"
  2 | 
  3 | // [[Rcpp::export]]
  4 | Rcpp::NumericVector sample_trunc_norm(
  5 |     const int n, const double lb, const double ub, const bool verbose = false)
  6 | {
  7 |   class TruncNormTarget : public SampleTarget
  8 |   {
  9 |     private: 
 10 |     
 11 |     const double lb_, ub_;
 12 | 
 13 |     public:
 14 | 
 15 |     TruncNormTarget(double lb, double ub) : lb_(lb), ub_(ub) {}
 16 | 
 17 |     void eval_logf(const double x, double &logf, double &dlogf) override
 18 |     {
 19 |       // if ub and lb cannot be found explicitely, this checking condition can be
 20 |       // replaced by another checking expression, eg. power (x,2) < 1
 21 |       if ((x > ub_) || (x < lb_))
 22 |       {
 23 |         logf = R_NegInf;
 24 |         dlogf = R_NaN;
 25 |         return;
 26 |       }
 27 |       logf = -pow(x, 2) / 2;
 28 |       dlogf = -x;
 29 |     }
 30 |   };
 31 | 
 32 |   double init_tpoint = 0;
 33 |   if (R_FINITE(lb) && R_FINITE(ub))
 34 |     init_tpoint = (lb + ub) / 2;
 35 |   if (R_FINITE(lb) && (!R_FINITE(ub)))
 36 |     init_tpoint = lb + 1;
 37 |   if ((!R_FINITE(lb)) && R_FINITE(ub))
 38 |     init_tpoint = ub - 1;
 39 |   if ((!R_FINITE(lb)) && (!R_FINITE(ub)))
 40 |     init_tpoint = 0;
 41 | 
 42 |   // Do adaptive rejection sampling here
 43 |   // Note that the bounds are set to -Inf and +Inf, so the actual bounds
 44 |   // are to be determined by the ARS sampler.
 45 |   auto target = TruncNormTarget(lb, ub);
 46 |   auto sampler = ARS(n, &target, init_tpoint, R_NegInf, R_PosInf, verbose);
 47 |   return sampler.Sample(); 
 48 | }
 49 | 
 50 | // [[Rcpp::export]]
 51 | Rcpp::NumericVector sample_post_ichi(
 52 |     const int n, const Rcpp::NumericVector &sigmasq, const double alpha1, 
 53 |     const double alpha0 = 1E-5, const double w0 = 1E-5, 
 54 |     const bool verbose = false)
 55 | {
 56 |   class IchiTarget : public SampleTarget
 57 |   {
 58 |     private: 
 59 |     
 60 |     double lambda_p_, alpha_p_, lambda0_;
 61 | 
 62 |     public:
 63 | 
 64 |     IchiTarget(Rcpp::NumericVector sigmasq, double alpha1, double alpha0, double w0)
 65 |     {
 66 |       lambda_p_ = 0;
 67 |       int p = sigmasq.length();
 68 |       for (int i = 0; i < p; i++)
 69 |       {
 70 |         lambda_p_ += 1 / sigmasq[i];
 71 |       }
 72 |       lambda_p_ *= alpha1 / 2.0;
 73 | 
 74 |       alpha_p_ = (p * alpha1 - alpha0) / 2.0;
 75 |       lambda0_ = alpha0 * w0 / 2.0;
 76 | 
 77 |       if (alpha_p_ < 1.0)
 78 |       {
 79 |         Rcpp::stop(
 80 |             "Error in 'R_sample_post_ichi:\n'"
 81 |             "Posterior alpha is less than 1, not log-concave\n");
 82 |       }
 83 |     }
 84 | 
 85 |     void eval_logf(const double x, double &logf, double &dlogf) override
 86 |     {
 87 |       double exps = exp(x);
 88 |       double iexps = 1 / exps;
 89 |       logf = alpha_p_ * x - lambda_p_ * exp(x) - lambda0_ * iexps;
 90 |       dlogf = alpha_p_ - lambda_p_ * exps + lambda0_ * iexps;
 91 |     }
 92 |   };
 93 | 
 94 |   // Do adaptive rejection sampling here
 95 |   auto target = IchiTarget(sigmasq, alpha1, alpha0, w0);
 96 |   auto sampler = ARS(n, &target, 0, R_NegInf, R_PosInf, verbose);
 97 |   return sampler.Sample(); 
 98 | }
 99 | 
100 | // [[Rcpp::export]]
101 | Rcpp::NumericVector sample_trunc_beta(
102 |     const int n, const double alpha, const double beta, 
103 |     const double lb = 0, const double ub = 1, const bool verbose = false)
104 | {
105 |   class TruncBetaTarget : public SampleTarget
106 |   {
107 |     private: 
108 |     
109 |     const double alpha_, beta_;
110 | 
111 |     public:
112 | 
113 |     TruncBetaTarget(double alpha, double beta) : alpha_(alpha), beta_(beta)
114 |     {
115 |     }
116 | 
117 |     void eval_logf(const double x, double &logf, double &dlogf) override
118 |     {
119 |       logf = alpha_ * x - (alpha_ + beta_) * log(1 + exp(x));
120 |       dlogf = alpha_ - (alpha_ + beta_) / (1 + exp(-x));
121 |     }
122 |   };
123 | 
124 |   // Do adaptive rejection sampling here
125 |   double m = (lb + ub) / 2.0;
126 |   auto target = TruncBetaTarget(alpha, beta);
127 |   auto sampler = ARS(n, &target,
128 |                      log(m) - log(1 - m),
129 |                      log(lb) - log(1 - lb),
130 |                      log(ub) - log(1 - ub), 
131 |                      verbose);
132 |   return 1 / (1 + exp(-sampler.Sample()));
133 | }
134 | 


--------------------------------------------------------------------------------
/vignettes/simu.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Multinomial Logistic Regression with Heavy-Tailed Priors"
  3 | author: "Longhai Li and Steven Liu"
  4 | date: "`r Sys.Date()`"
  5 | bibliography: HTLR.bib
  6 | output:
  7 |   rmarkdown::html_document:
  8 |     toc: true
  9 |     toc_float: false
 10 |     toc_depth: 4
 11 |     number_sections: true
 12 | vignette: >
 13 |   %\VignetteIndexEntry{intro}
 14 |   %\VignetteEngine{knitr::rmarkdown}
 15 |   %\VignetteEncoding{UTF-8}
 16 | ---
 17 | 
 18 | ```{r, include = FALSE}
 19 | knitr::opts_chunk$set(
 20 |   collapse = TRUE,
 21 |   comment = "#>"
 22 | )
 23 | ```
 24 | 
 25 | # Data Generation
 26 | 
 27 | Load the necessary libraries:
 28 | 
 29 | ```{r setup}
 30 | library(HTLR)
 31 | library(bayesplot)
 32 | ```
 33 | 
 34 | The description of the dataset generating scheme is found from @li2018fully.
 35 | 
 36 | There are 4 groups of features:
 37 | 
 38 | * feature #1: marginally related feature
 39 |  
 40 | * feature #2: marginally unrelated feature, but feature #2 is correlated with feature #1
 41 | 
 42 | * feature #3 - #10: marginally related features and also internally correlated
 43 | 
 44 | * feature #11 - #2000: noise features without relationship with the y
 45 | 
 46 | ```{r}
 47 | SEED <- 123
 48 | 
 49 | n <- 510
 50 | p <- 2000
 51 | 
 52 | means <- rbind(
 53 |   c(0, 1, 0),
 54 |   c(0, 0, 0),
 55 |   c(0, 0, 1),
 56 |   c(0, 0, 1),
 57 |   c(0, 0, 1),
 58 |   c(0, 0, 1),
 59 |   c(0, 0, 1),
 60 |   c(0, 0, 1),
 61 |   c(0, 0, 1),
 62 |   c(0, 0, 1)
 63 | ) * 2
 64 | 
 65 | means <- rbind(means, matrix(0, p - 10, 3))
 66 | 
 67 | A <- diag(1, p)
 68 | 
 69 | A[1:10, 1:3] <-
 70 |   rbind(
 71 |     c(1, 0, 0),
 72 |     c(2, 1, 0),
 73 |     c(0, 0, 1),
 74 |     c(0, 0, 1),
 75 |     c(0, 0, 1),
 76 |     c(0, 0, 1),
 77 |     c(0, 0, 1),
 78 |     c(0, 0, 1),
 79 |     c(0, 0, 1),
 80 |     c(0, 0, 1)
 81 |   )
 82 | 
 83 | set.seed(SEED)
 84 | dat <- gendata_FAM(n, means, A, sd_g = 0.5, stdx = TRUE)
 85 | str(dat)
 86 | ```
 87 | 
 88 | Look at the correlation between features:
 89 | ```{r}
 90 | # require(corrplot)
 91 | cor(dat$X[ , 1:11]) %>% corrplot::corrplot(tl.pos = "n")
 92 | ```
 93 | 
 94 | Split the data into training and testing sets:
 95 | ```{r}
 96 | set.seed(SEED)
 97 | dat <- split_data(dat$X, dat$y, n.train = 500)
 98 | str(dat)
 99 | ```
100 | 
101 | # Model Fitting
102 | 
103 | Fit a HTLR model with all default settings:
104 | ```{r}
105 | set.seed(SEED)
106 | system.time(
107 |   fit.t <- htlr(dat$x.tr, dat$y.tr)
108 | )
109 | print(fit.t)
110 | ```
111 | 
112 | With another configuration:
113 | ```{r}
114 | set.seed(SEED)
115 | system.time(
116 |   fit.t2 <- htlr(X = dat$x.tr, y = dat$y.tr, 
117 |                  prior = htlr_prior("t", df = 1, logw = -20, sigmab0 = 1500), 
118 |                  iter = 4000, init = "bcbc", keep.warmup.hist = T)
119 | )
120 | print(fit.t2)
121 | ```
122 | 
123 | # Model Inspection 
124 | 
125 | Look at the point summaries of posterior of selected parameters:
126 | ```{r}
127 | summary(fit.t2, features = c(1:10, 100, 200, 1000, 2000), method = median)
128 | ```
129 | 
130 | Plot interval estimates from posterior draws using [bayesplot](https://mc-stan.org/bayesplot/index.html):
131 | ```{r}
132 | post.t <- as.matrix(fit.t2, k = 2)
133 | ## signal parameters
134 | mcmc_intervals(post.t, pars = c("Intercept", "V1", "V2", "V3", "V1000"))
135 | ```
136 | 
137 | Trace plot of MCMC draws:
138 | ```{r}
139 | as.matrix(fit.t2, k = 2, include.warmup = T) %>%
140 |   mcmc_trace(c("V1", "V1000"), facet_args = list("nrow" = 2), n_warmup = 2000)
141 | ```
142 | 
143 | The coefficient of unrelated features (noise) are not updated during some iterations due to restricted Gibbs sampling @li2018fully, hence the computational cost is greatly reduced.
144 | 
145 | # Make Predictions
146 | 
147 | A glance at the prediction accuracy:
148 | ```{r}
149 | y.class <- predict(fit.t, dat$x.te, type = "class")
150 | y.class
151 | print(paste0("prediction accuracy of model 1 = ", 
152 |              sum(y.class == dat$y.te) / length(y.class)))
153 | 
154 | y.class2 <- predict(fit.t2, dat$x.te, type = "class")
155 | print(paste0("prediction accuracy of model 2 = ", 
156 |              sum(y.class2 == dat$y.te) / length(y.class)))
157 | 
158 | ```
159 | 
160 | More details about the prediction result: 
161 | ```{r}
162 | predict(fit.t, dat$x.te, type = "response") %>%
163 |   evaluate_pred(y.true = dat$y.te)
164 | ```
165 | 


--------------------------------------------------------------------------------
/R/gendata.r:
--------------------------------------------------------------------------------
  1 | #' Generate Simulated Data with Multinomial Logistic Regression Model
  2 | #' 
  3 | #' This function generates the response variables \code{y} given 
  4 | #' optional supplied \code{X} using a multinomial logistic regression model.
  5 | #' 
  6 | #' @param n Number of observations.
  7 | #' @param p Number of features.
  8 | #' @param NC Number of classes for response variables.
  9 | #' @param nu,w If \code{betas} is not supplied (default), the regression coefficients are generated with 
 10 | #' t prior with df = \code{nu}, scale = \code{sqrt(w)}; will be ignored if \code{betas} is supplied.
 11 | #' @param X The design matrix; will be generated from standard normal distribution if not supplied.
 12 | #' @param betas User supplied regression coefficients.  
 13 | #' 
 14 | #' @return A list contains input matrix \code{X}, response variables \code{y}, and regression coefficients \code{deltas}.      
 15 | #'
 16 | #' @export
 17 | #' 
 18 | #' @examples
 19 | #' set.seed(12345)
 20 | #' dat <- gendata_MLR(n = 100, p = 10)
 21 | #' ggplot2::qplot(dat$y, bins = 6)
 22 | #' corrplot::corrplot(cor(dat$X))
 23 | #' 
 24 | #' @seealso \code{\link{gendata_FAM}}
 25 | #' 
 26 | gendata_MLR <- function(n, p, NC = 3, nu = 2, w = 1, X = NULL, betas = NULL)
 27 | {
 28 |   if (is.null(X)) {
 29 |     X <- matrix(rnorm (n * p), n, p)
 30 |     colnames(X) <- paste0("V", 1:p)
 31 |   }
 32 | 
 33 |   if (is.null(betas))
 34 |   {
 35 |     sigmasbt <- 1 / rgamma(p, nu / 2, nu / 2) * w
 36 |     betas <- replicate(NC, rnorm(p + 1)) * c(0, sqrt(sigmasbt))
 37 |   }
 38 |   
 39 |   deltas <- betas[, -1, drop = FALSE] - betas[, 1]  
 40 |   lv <- cbind(1, X) %*% betas
 41 |   probs <- exp(lv - as.vector(log_sum_exp(lv)))
 42 | 
 43 |   y <- apply(probs, 1, function(prob){sample(1:NC, 1, TRUE, prob)})
 44 | 
 45 |   list("X" = X, "y" = y, "deltas" = deltas)
 46 | }
 47 | 
 48 | #' Generate Simulated Data with Factor Analysis Model 
 49 | #' 
 50 | #' This function generates inputs \code{X} given by the response variable \code{y} 
 51 | #' using a multivariate normal model. 
 52 | #' 
 53 | #' The means of each covariate \eqn{x_j} depend on \code{y} specified by the 
 54 | #' matrix \code{muj}; the covariate matrix \eqn{\Sigma} of the multivariate normal 
 55 | #' is equal to \eqn{AA^t\delta^2I}, where \code{A} is the factor loading matrix
 56 | #' and \eqn{\delta} is the noise level.  
 57 | #' 
 58 | #' @param n Number of observations.
 59 | #' @param muj C by p matrix, with row c representing y = c, and column j representing \eqn{x_j}.
 60 | #' Used to specify \code{y}. 
 61 | #' @param A Factor loading matrix of size p by p, see details.
 62 | #' @param sd_g Numeric value indicating noise level \eqn{\delta}, see details.
 63 | #' @param stdx Logical; if \code{TRUE}, data \code{X} is standardized to have \code{mean = 0} and \code{sd = 1}.
 64 | #'
 65 | #' @return A list contains input matrix \code{X}, response variables \code{y},
 66 | #' covariate matrix \code{SGM} and \code{muj} (standardized if \code{stdx = TRUE}).
 67 | #' 
 68 | #' @export
 69 | #' 
 70 | #' @examples 
 71 | #' ## feature #1: marginally related feature
 72 | #' ## feature #2: marginally unrelated feature, but feature #2 is correlated with feature #1
 73 | #' ## feature #3-5: marginally related features and also internally correlated
 74 | #' ## feature #6-10: noise features without relationship with the y
 75 | #' 
 76 | #' set.seed(12345)
 77 | #' n <- 100
 78 | #' p <- 10
 79 | #' 
 80 | #' means <- rbind(
 81 | #'   c(0, 1, 0),
 82 | #'   c(0, 0, 0),
 83 | #'   c(0, 0, 1),
 84 | #'   c(0, 0, 1),
 85 | #'   c(0, 0, 1)
 86 | #' ) * 2
 87 | #' 
 88 | #' means <- rbind(means, matrix(0, p - 5, 3))
 89 | #' 
 90 | #' A <- diag(1, p)
 91 | #' A[1:5, 1:3] <- rbind(
 92 | #'   c(1, 0, 0),
 93 | #'   c(2, 1, 0),
 94 | #'   c(0, 0, 1),
 95 | #'   c(0, 0, 1),
 96 | #'   c(0, 0, 1)
 97 | #' )
 98 | #' 
 99 | #' dat <- gendata_FAM(n, means, A, sd_g = 0.5, stdx = TRUE)
100 | #' ggplot2::qplot(dat$y, bins = 6)
101 | #' corrplot::corrplot(cor(dat$X))
102 | #' 
103 | #' @seealso \code{\link{gendata_MLR}}
104 | #'      
105 | gendata_FAM <- function(n, muj, A, sd_g = 0, stdx = FALSE)
106 | {
107 |   y <- rep(1L:ncol(muj), len = n)
108 |   muj_rep <- muj[, y] # repeat each col of muj until ncol(muj_rep) == length(y) == n 
109 |   dat <- gendata_FAM_helper(n, muj, muj_rep, A, sd_g, stdx)
110 |   dat$y <- y
111 |   colnames(dat$X) <- paste0("V", 1L:nrow(muj))
112 |   return(dat)
113 | }
114 | 


--------------------------------------------------------------------------------
/R/predict.R:
--------------------------------------------------------------------------------
  1 | #' Make Prediction on New Data (Advanced)
  2 | #' 
  3 | #' This function uses MCMC samples from fitted \code{htlrfit} object OR user supplied 
  4 | #' regression coefficient to predict the class labels of test cases. 
  5 | #' 
  6 | #' @param X_ts Matrix of values at which predictions are to be made.
  7 | #' 
  8 | #' @param fithtlr Fitted HTLR model object.
  9 | #' 
 10 | #' @param deltas The values of deltas (for example true deltas) used to make prediction; 
 11 | #' will override \code{fithtlr} if provided. 
 12 | #'  
 13 | #' @param burn,thin \code{burn} of Markov chain (super)iterations will be discarded for prediction,
 14 | #' and only every \code{thin} are used.
 15 | #' 
 16 | #' @param usedmc Indices of Markov chain iterations used for inference. 
 17 | #' If supplied, \code{burn} and \code{thin} will be ignored.
 18 | #' 
 19 | #' @param rep.legacy To reproduce (actually incorrect) results in legacy version.
 20 | #' See \url{https://github.com/longhaiSK/HTLR/issues/7}. 
 21 | #' 
 22 | #' @return A matrix of predictive probabilities, with rows for cases, cols for classes.
 23 | #' 
 24 | #' @export
 25 | #' 
 26 | #' @keywords internal
 27 | #' 
 28 | htlr_predict <- function(X_ts, fithtlr = NULL, deltas = NULL, burn = NULL, thin = 1, usedmc = NULL, rep.legacy = TRUE)
 29 | {
 30 |   if (is.vector(X_ts))
 31 |     X_ts <- matrix(X_ts, 1)
 32 |   else if (!is.matrix(X_ts))
 33 |     X_ts <- as.matrix(X_ts)
 34 |   no_ts <- nrow(X_ts)
 35 |   
 36 |   if (is.null(deltas) & !is.null(fithtlr))
 37 |   {
 38 |     mcdims <- dim(fithtlr$mcdeltas)
 39 |     p <- mcdims[1] - 1
 40 |     K <- mcdims[2]
 41 |     no_mcspl <- mcdims[3]
 42 |     
 43 |     ## index of mc iters used for inference
 44 |     if (is.null(usedmc))
 45 |     {
 46 |       if (is.null(burn))
 47 |         usedmc <- get_sample_indice(no_mcspl, fithtlr$mc.param$iter.rmc, p.burn.extra = 0.1, thin = thin, ignore.first = !rep.legacy)
 48 |       else
 49 |         usedmc <- get_sample_indice(no_mcspl, fithtlr$mc.param$iter.rmc, n.burn.extra = burn, thin = thin, ignore.first = !rep.legacy)
 50 |     }
 51 |     
 52 |     no_used <- length(usedmc)
 53 |     
 54 |     ## read deltas for prediction
 55 |     longdeltas <- matrix(fithtlr$mcdeltas[, , usedmc], nrow = p + 1)
 56 | 
 57 |     ## selecting features and standardizing
 58 |     fsel <- fithtlr$feature$fsel
 59 |     X_ts <- X_ts[, fsel, drop = FALSE]
 60 |     nuj <- fithtlr$feature$nuj
 61 |     sdj <- fithtlr$feature$sdj
 62 |     X_ts <- sweep(X_ts, 2, nuj, "-")
 63 |     X_ts <- sweep(X_ts, 2, sdj, "/")
 64 |   }
 65 |   else
 66 |   {
 67 |     if (is.vector(deltas) | is.matrix(deltas)) 
 68 |     {
 69 |       deltas <- matrix(deltas, nrow = ncol(X_ts) + 1)
 70 |       p <- nrow(deltas) - 1
 71 |       K <- 1
 72 |       longdeltas <- deltas
 73 |       no_used <- 1
 74 |     }
 75 |   }
 76 |   
 77 |   ## add intercept to all cases
 78 |   X_addint_ts <- cbind(1, X_ts)
 79 |   
 80 |   longlv <- X_addint_ts %*% longdeltas
 81 |   arraylv <- array(longlv, dim = c(no_ts, K, no_used))
 82 |   logsumlv <- apply(arraylv, 3, comp_lsl)
 83 |   array_normlv <- sweep(arraylv, c(1, 3), logsumlv)
 84 |   array_predprobs <- exp(array_normlv)
 85 |   probs_pred <- apply(array_predprobs, c(1, 2), mean)
 86 |   
 87 |   predprobs_c1 <- pmax(0, 1 - apply(probs_pred, 1, sum))
 88 |   probs_pred <- cbind(predprobs_c1, probs_pred)
 89 |   colnames(probs_pred) <- paste("class", levels(factor(fithtlr$feature$y)))
 90 |   
 91 |   return(probs_pred)
 92 | }
 93 | 
 94 | #' Make Prediction on New Data
 95 | #' 
 96 | #' Similar to other predict methods, this function returns predictions from a fitted \code{htlrfit} object.
 97 | #' 
 98 | #' @param object A fitted model object with S3 class \code{htlrfit}.
 99 | #' 
100 | #' @param newx A Matrix of values at which predictions are to be made.
101 | #' 
102 | #' @param type Type of prediction required. Type "response" gives the fitted probabilities.
103 | #' Type "class" produces the class label corresponding to the maximum probability.
104 | #' 
105 | #' @param ... Advanced options to specify the Markov chain iterations used for inference. 
106 | #' See \code{\link{htlr_predict}}.   
107 | #' 
108 | #' @return The object returned depends on type.
109 | #' 
110 | #' @export
111 | #' 
112 | predict.htlr.fit <- function(object, newx, type = c("response", "class"), ...)
113 | {
114 |   if (!exists("burn")) burn <- NULL
115 |   if (!exists("usedmc")) usedmc <- NULL
116 |   if (!exists("thin")) thin <- 1
117 |   
118 |   pred.prob <- htlr_predict(X_ts = newx, fithtlr = object, burn = burn, thin = thin, usedmc = usedmc, rep.legacy = FALSE)
119 |   
120 |   type <- match.arg(type)
121 |   if (type == "response")
122 |     return(pred.prob)
123 |   if (type == "class") {
124 |     classes <- object$feature$y %>%
125 |       factor() %>%
126 |       levels() %>%
127 |       as.numeric()
128 |     newy <- classes[apply(pred.prob, 1, which.max)] %>% as.matrix()
129 |     colnames(newy) <- "y.pred"
130 |     return(newy)
131 |   }
132 | }
133 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bplrhmc.r:
--------------------------------------------------------------------------------
  1 | skip_on_ci()
  2 | skip_on_cran()
  3 | 
  4 | SEED <- 1001
  5 | 
  6 | set.seed(SEED)
  7 | 
  8 | data("colon")
  9 | dat <- split_data(colon$X, colon$y, p.train = 0.9)
 10 | 
 11 | set.seed(SEED)
 12 | mlr <- gendata_MLR(n = 50, p = 10)
 13 | dat3 <- split_data(mlr$X, mlr$y, p.train = 0.8)
 14 | 
 15 | expect_equal_predict <- function(actual, expected) {
 16 |   expect_equal(unname(actual$probs_pred), unname(expected$probs_pred))
 17 | }
 18 | 
 19 | expect_equal_htlr <- function(actual, expected)
 20 | {
 21 |   expect_equal(actual$mcdeltas, expected$mcdeltas)
 22 |   expect_equal(as.vector(actual$mclogw), expected$mclogw)
 23 |   expect_equal(as.vector(actual$mcloglike), expected$mcloglike)
 24 |   expect_equal(as.vector(actual$mcuvar), expected$mcuvar)
 25 |   expect_equal(as.vector(actual$mchmcrej), expected$mchmcrej)
 26 |   expect_equal(actual$mcsigmasbt, expected$mcsigmasbt)
 27 |   expect_equal(actual$mcvardeltas, expected$mcvardeltas)
 28 |   expect_equal_predict(actual, expected)
 29 | }
 30 | 
 31 | test_that("colon500_t1_s10_eta10_bcbc_bi", {
 32 |   
 33 |   set.seed(SEED)
 34 |   fit <- htlr_fit(X_tr = dat$x.tr, y_tr = dat$y.tr, X_ts = dat$x.te,
 35 |                   fsel = 1L:500, ptype = "t", alpha = 1, s = -10, eta = 10,
 36 |                   initial_state = "bcbcsfrda", iters_h = 5, iters_rmc = 5, thin = 1)
 37 |   
 38 |   set.seed(SEED)
 39 |   expected <- HTLR.old::htlr_fit(X_tr = dat$x.tr, y_tr = dat$y.tr, X_ts = dat$x.te,
 40 |                                fsel = 1L:500, ptype = "t", alpha = 1, s = -10, eta = 10,
 41 |                                initial_state = "bcbcsfrda", iters_h = 5, iters_rmc = 5, thin = 1) 
 42 |     
 43 |   expect_equal_htlr(fit, expected)
 44 | })
 45 | 
 46 | test_that("sim_t1_s10_eta10_bcbc_mul", {
 47 |   
 48 |   set.seed(SEED)
 49 |   fit <- htlr_fit(X_tr = dat3$x.tr, y_tr = dat3$y.tr, X_ts = dat3$x.te,
 50 |                   ptype = "t", alpha = 1, s = -10, eta = 10,
 51 |                   initial_state = "bcbcsfrda", iters_h = 5, iters_rmc = 5, thin = 1)
 52 |   
 53 |   set.seed(SEED)
 54 |   expected <- HTLR.old::htlr_fit(X_tr = dat3$x.tr, y_tr = dat3$y.tr, X_ts = dat3$x.te,
 55 |                                  ptype = "t", alpha = 1, s = -10, eta = 10,
 56 |                                  initial_state = "bcbcsfrda", iters_h = 5, iters_rmc = 5, thin = 1)
 57 |   
 58 |   expect_equal_htlr(fit, expected)
 59 | })
 60 | 
 61 | test_that("colon500_neg1_s10_eta0_bcbc_bi", {
 62 |   
 63 |   set.seed(SEED)
 64 |   fit <- htlr_fit(X_tr = dat$x.tr, y_tr = dat$y.tr, X_ts = dat$x.te,
 65 |                   fsel = 1L:500, ptype = "neg", alpha = 1, s = -10,
 66 |                   initial_state = "bcbcsfrda", iters_h = 5, iters_rmc = 5, thin = 1)
 67 |   
 68 |   set.seed(SEED)
 69 |   expected <- HTLR.old::htlr_fit(X_tr = dat$x.tr, y_tr = dat$y.tr, X_ts = dat$x.te,
 70 |                                  fsel = 1L:500, ptype = "neg", alpha = 1, s = -10,
 71 |                                  initial_state = "bcbcsfrda", iters_h = 5, iters_rmc = 5, thin = 1)
 72 |   
 73 |   expect_equal_htlr(fit, expected)
 74 | })
 75 | 
 76 | test_that("colon500_ghs1_s10_eta0_bcbc_bi", {
 77 |   
 78 |   set.seed(SEED)
 79 |   fit <- htlr_fit(X_tr = dat$x.tr, y_tr = dat$y.tr, X_ts = dat$x.te,
 80 |                   fsel = 1L:500, ptype = "ghs", alpha = 1, s = -10,
 81 |                   initial_state = "bcbcsfrda", iters_h = 5, iters_rmc = 5, thin = 1)
 82 |   
 83 |   set.seed(SEED)
 84 |   expected <- HTLR.old::htlr_fit(X_tr = dat$x.tr, y_tr = dat$y.tr, X_ts = dat$x.te,
 85 |                                  fsel = 1L:500, ptype = "ghs", alpha = 1, s = -10,
 86 |                                  initial_state = "bcbcsfrda", iters_h = 5, iters_rmc = 5, thin = 1)
 87 |   
 88 |   expect_equal_htlr(fit, expected)
 89 | })
 90 | 
 91 | # test_that("colon500_t1_s10_eta10_lasso_bi", {
 92 | #   set.seed(SEED)
 93 | #   fit <- htlr_fit(X_tr = dat$x.tr, y_tr = dat$y.tr, X_ts = dat$x.te,
 94 | #                   fsel = 1L:500, ptype = "t", alpha = 1, s = -10, eta = 10,
 95 | #                   initial_state = "lasso", iters_h = 0, iters_rmc = 1, thin = 1)
 96 | #   
 97 | #   set.seed(SEED)
 98 | #   expected <- HTLR.old::htlr_fit(X_tr = dat$x.tr, y_tr = dat$y.tr, X_ts = dat$x.te,
 99 | #                                  fsel = 1L:500, ptype = "t", alpha = 1, s = -10, eta = 10,
100 | #                                  initial_state = "lasso", iters_h = 0, iters_rmc = 1, thin = 1)
101 | #   expect_equal_htlr(fit, expected)
102 | # })
103 | 
104 | # HoSC <- list("x.train" = read.csv("../../data-raw/HoCS_train_data.csv", header = F),
105 | #              "x.test" = read.csv("../../data-raw/HoCS_test_data.csv", header = F),
106 | #              "y.train" = rep(1:3, each = 10),
107 | #              "y.test" = c(rep(1, 50), rep(2, 27), rep(3, 52)))
108 | # 
109 | # test_that("data_with_singular_col", {
110 | #   fit.1 <- htlr(X = HoSC$x.train, y = HoSC$y.train, init = "bcbc", iter = 10)
111 | #   fit.2 <- htlr_fit(X_tr = HoSC$x.train, y_tr = HoSC$y.train, ptype = "t",
112 | #                     initial_state = "lasso", iters_h = 5, iters_rmc = 5, thin = 1)
113 | # 
114 | #   expect_equal(length(fit.1$featur$fsel), 86)
115 | #   expect_equal(length(fit.2$featur$fsel), 86)
116 | # 
117 | #   pred <- predict(fit.1, HoSC$x.test)
118 | # })
119 | 


--------------------------------------------------------------------------------
/man/htlr_fit.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/core.r
  3 | \name{htlr_fit}
  4 | \alias{htlr_fit}
  5 | \title{Fit a HTLR Model (Internal API)}
  6 | \usage{
  7 | htlr_fit(
  8 |   X_tr,
  9 |   y_tr,
 10 |   fsel = 1:ncol(X_tr),
 11 |   stdzx = TRUE,
 12 |   ptype = c("t", "ghs", "neg"),
 13 |   sigmab0 = 2000,
 14 |   alpha = 1,
 15 |   s = -10,
 16 |   eta = 0,
 17 |   iters_h = 1000,
 18 |   iters_rmc = 1000,
 19 |   thin = 1,
 20 |   leap_L = 50,
 21 |   leap_L_h = 5,
 22 |   leap_step = 0.3,
 23 |   hmc_sgmcut = 0.05,
 24 |   initial_state = "lasso",
 25 |   keep.warmup.hist = FALSE,
 26 |   silence = TRUE,
 27 |   rep.legacy = TRUE,
 28 |   alpha.rda = 0.2,
 29 |   lasso.lambda = seq(0.05, 0.01, by = -0.01),
 30 |   X_ts = NULL,
 31 |   predburn = NULL,
 32 |   predthin = 1
 33 | )
 34 | }
 35 | \arguments{
 36 | \item{X_tr}{Input matrix, of dimension nobs by nvars; each row is an observation vector.}
 37 | 
 38 | \item{y_tr}{Vector of response variables. Must be coded as non-negative integers, 
 39 | e.g., 1,2,\ldots,C for C classes, label 0 is also allowed.}
 40 | 
 41 | \item{fsel}{Subsets of features selected before fitting, such as by univariate screening.}
 42 | 
 43 | \item{stdzx}{Logical; if \code{TRUE}, the original feature values are standardized to have \code{mean = 0} 
 44 | and \code{sd = 1}.}
 45 | 
 46 | \item{ptype}{The prior to be applied to the model. Either "t" (student-t, default), 
 47 | "ghs" (horseshoe), or "neg" (normal-exponential-gamma).}
 48 | 
 49 | \item{sigmab0}{The \code{sd} of the normal prior for the intercept.}
 50 | 
 51 | \item{alpha}{The degree freedom of t/ghs/neg prior for coefficients.}
 52 | 
 53 | \item{s}{The log scale of priors (logw) for coefficients.}
 54 | 
 55 | \item{eta}{The \code{sd} of the normal prior for logw. When it is set to 0, logw is fixed. 
 56 | Otherwise, logw is assigned with a normal prior and it will be updated during sampling.}
 57 | 
 58 | \item{iters_h}{A positive integer specifying the number of warmup (aka burnin).}
 59 | 
 60 | \item{iters_rmc}{A positive integer specifying the number of iterations after warmup.}
 61 | 
 62 | \item{thin}{A positive integer specifying the period for saving samples.}
 63 | 
 64 | \item{leap_L}{The length of leapfrog trajectory in sampling phase.}
 65 | 
 66 | \item{leap_L_h}{The length of leapfrog trajectory in burnin phase.}
 67 | 
 68 | \item{leap_step}{The stepsize adjustment multiplied to the second-order partial derivatives of log posterior.}
 69 | 
 70 | \item{hmc_sgmcut}{The coefficients smaller than this criteria will be fixed in 
 71 | each HMC updating step.}
 72 | 
 73 | \item{initial_state}{The initial state of Markov Chain; can be a previously 
 74 | fitted \code{fithtlr} object, or a user supplied initial state vector, or 
 75 | a character string matches the following:  
 76 | \itemize{
 77 |   \item "lasso" - (Default) Use Lasso initial state with \code{lambda} chosen by 
 78 |   cross-validation. Users may specify their own candidate \code{lambda} values via 
 79 |   optional argument \code{lasso.lambda}. Further customized Lasso initial 
 80 |   states can be generated by \code{\link{lasso_deltas}}.    
 81 |   \item "bcbcsfrda" - Use initial state generated by package \code{BCBCSF} 
 82 |   (Bias-corrected Bayesian classification). Further customized BCBCSF initial 
 83 |   states can be generated by \code{\link{bcbcsf_deltas}}. WARNING: This type of 
 84 |   initial states can be used for continuous features such as gene expression profiles, 
 85 |   but it should not be used for categorical features such as SNP profiles.
 86 |   \item "random" - Use random initial values sampled from N(0, 1).     
 87 | }}
 88 | 
 89 | \item{keep.warmup.hist}{Warmup iterations are not recorded by default, set \code{TRUE} to enable it.}
 90 | 
 91 | \item{silence}{Setting it to \code{FALSE} for tracking MCMC sampling iterations.}
 92 | 
 93 | \item{rep.legacy}{Logical; if \code{TRUE}, the output produced in \code{HTLR} versions up to 
 94 | legacy-3.1-1 is reproduced. The speed would be typically slower than non-legacy mode on
 95 | multi-core machine.}
 96 | 
 97 | \item{alpha.rda}{A user supplied alpha value for \code{\link{bcbcsf_deltas}} when
 98 | setting up BCBCSF initial state. Default: 0.2.}
 99 | 
100 | \item{lasso.lambda}{- A user supplied lambda sequence for \code{\link{lasso_deltas}} when 
101 | setting up Lasso initial state. Default: \{.01, .02, \ldots, .05\}. Will be ignored if 
102 | \code{rep.legacy} is set to \code{TRUE}.}
103 | 
104 | \item{X_ts}{Test data which predictions are to be made.}
105 | 
106 | \item{predburn, predthin}{For prediction base on \code{X_ts} (when supplied), \code{predburn} of 
107 | Markov chain (super)iterations will be discarded, and only every \code{predthin} are used for inference.}
108 | }
109 | \value{
110 | A list of fitting results. If \code{X_ts} is not provided, the list is an object 
111 | with S3 class \code{htlr.fit}.
112 | }
113 | \description{
114 | This function trains linear logistic regression models with HMC in restricted Gibbs sampling.
115 | It also makes predictions for test cases if \code{X_ts} are provided.
116 | }
117 | \references{
118 | Longhai Li and Weixin Yao (2018). Fully Bayesian Logistic Regression 
119 | with Hyper-Lasso Priors for High-dimensional Feature Selection.
120 | \emph{Journal of Statistical Computation and Simulation} 2018, 88:14, 2827-2851.
121 | }
122 | \keyword{internal}
123 | 


--------------------------------------------------------------------------------
/man/htlr.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/htlr.R
  3 | \name{htlr}
  4 | \alias{htlr}
  5 | \title{Fit a HTLR Model}
  6 | \usage{
  7 | htlr(
  8 |   X,
  9 |   y,
 10 |   fsel = 1:ncol(X),
 11 |   stdx = TRUE,
 12 |   prior = "t",
 13 |   df = 1,
 14 |   iter = 2000,
 15 |   warmup = floor(iter/2),
 16 |   thin = 1,
 17 |   init = "lasso",
 18 |   leap = 50,
 19 |   leap.warm = floor(leap/10),
 20 |   leap.stepsize = 0.3,
 21 |   cut = 0.05,
 22 |   verbose = FALSE,
 23 |   rep.legacy = FALSE,
 24 |   keep.warmup.hist = FALSE,
 25 |   ...
 26 | )
 27 | }
 28 | \arguments{
 29 | \item{X}{Input matrix, of dimension nobs by nvars; each row is an observation vector.}
 30 | 
 31 | \item{y}{Vector of response variables. Must be coded as non-negative integers, 
 32 | e.g., 1,2,\ldots,C for C classes, label 0 is also allowed.}
 33 | 
 34 | \item{fsel}{Subsets of features selected before fitting, such as by univariate screening.}
 35 | 
 36 | \item{stdx}{Logical; if \code{TRUE}, the original feature values are standardized to have \code{mean = 0} 
 37 | and \code{sd = 1}.}
 38 | 
 39 | \item{prior}{The prior to be applied to the model. Either a list of hyperparameter settings 
 40 | returned by \code{\link{htlr_prior}} or a character string from "t" (student-t), "ghs" (horseshoe), 
 41 | and "neg" (normal-exponential-gamma).}
 42 | 
 43 | \item{df}{The degree freedom of t/ghs/neg prior for coefficients. Will be ignored if the 
 44 | configuration list from \code{\link{htlr_prior}} is passed to \code{prior}.}
 45 | 
 46 | \item{iter}{A positive integer specifying the number of iterations (including warmup).}
 47 | 
 48 | \item{warmup}{A positive integer specifying the number of warmup (aka burnin). 
 49 | The number of warmup iterations should not be larger than iter and the default is \code{iter / 2}.}
 50 | 
 51 | \item{thin}{A positive integer specifying the period for saving samples.}
 52 | 
 53 | \item{init}{The initial state of Markov Chain; it accepts three forms:
 54 | \itemize{ 
 55 | \item a previously fitted \code{fithtlr} object, 
 56 | \item  a user supplied initial coeficient matrix of (p+1)*K, where p is the number of features, K is the number of classes in y minus 1, 
 57 | \item a character string matches the following:  
 58 | \itemize{
 59 |   \item "lasso" - (Default) Use Lasso initial state with \code{lambda} chosen by 
 60 |   cross-validation. Users may specify their own candidate \code{lambda} values via 
 61 |   optional argument \code{lasso.lambda}. Further customized Lasso initial 
 62 |   states can be generated by \code{\link{lasso_deltas}}.    
 63 |   \item "bcbc" - Use initial state generated by package \code{BCBCSF} 
 64 |   (Bias-corrected Bayesian classification). Further customized BCBCSF initial 
 65 |   states can be generated by \code{\link{bcbcsf_deltas}}. WARNING: This type of 
 66 |   initial states can be used for continuous features such as gene expression profiles, 
 67 |   but it should not be used for categorical features such as SNP profiles.
 68 |   \item "random" - Use random initial values sampled from N(0, 1).     
 69 | }
 70 | 
 71 | }}
 72 | 
 73 | \item{leap}{The length of leapfrog trajectory in sampling phase.}
 74 | 
 75 | \item{leap.warm}{The length of leapfrog trajectory in burnin phase.}
 76 | 
 77 | \item{leap.stepsize}{The integrator step size used in the Hamiltonian simulation.}
 78 | 
 79 | \item{cut}{The coefficients smaller than this criteria will be fixed in each HMC updating step.}
 80 | 
 81 | \item{verbose}{Logical; setting it to \code{TRUE} for tracking MCMC sampling iterations.}
 82 | 
 83 | \item{rep.legacy}{Logical; if \code{TRUE}, the output produced in \code{HTLR} versions up to 
 84 | legacy-3.1-1 is reproduced. The speed will be typically slower than non-legacy mode on
 85 | multi-core machine. Default is \code{FALSE}.}
 86 | 
 87 | \item{keep.warmup.hist}{Warmup iterations are not recorded by default, set \code{TRUE} to enable it.}
 88 | 
 89 | \item{...}{Other optional parameters:
 90 | \itemize{
 91 |   \item rda.alpha - A user supplied alpha value for \code{\link{bcbcsf_deltas}}. Default: 0.2.
 92 |   \item lasso.lambda - A user supplied lambda sequence for \code{\link{lasso_deltas}}. 
 93 |   Default: \{.01, .02, \ldots, .05\}. Will be ignored if \code{rep.legacy} is set to \code{TRUE}.
 94 | }}
 95 | }
 96 | \value{
 97 | An object with S3 class \code{htlr.fit}.
 98 | }
 99 | \description{
100 | This function trains linear logistic regression models with HMC in restricted Gibbs sampling.
101 | }
102 | \examples{
103 | set.seed(12345)
104 | data("colon")
105 | 
106 | ## fit HTLR models with selected features, note that the chain length setting is for demo only
107 | 
108 | ## using t prior with 1 df and log-scale fixed to -10 
109 | fit.t <- htlr(X = colon$X, y = colon$y, fsel = 1:100,
110 |               prior = htlr_prior("t", df = 1, logw = -10), 
111 |               init = "bcbc", iter = 20, thin = 1)
112 | 
113 | ## using NEG prior with 1 df and log-scale fixed to -10 
114 | fit.neg <- htlr(X = colon$X, y = colon$y, fsel = 1:100,
115 |                 prior = htlr_prior("neg", df = 1, logw = -10), 
116 |                 init = "bcbc", iter = 20, thin = 1)
117 | 
118 | ## using horseshoe prior with 1 df and auto-selected log-scale   
119 | fit.ghs <- htlr(X = colon$X, y = colon$y, fsel = 1:100,
120 |                 prior = "ghs", df = 1, init = "bcbc",
121 |                 iter = 20, thin = 1)
122 | 
123 | }
124 | \references{
125 | Longhai Li and Weixin Yao (2018). Fully Bayesian Logistic Regression 
126 | with Hyper-Lasso Priors for High-dimensional Feature Selection.
127 | \emph{Journal of Statistical Computation and Simulation} 2018, 88:14, 2827-2851.
128 | }
129 | 


--------------------------------------------------------------------------------
/R/lassorda.r:
--------------------------------------------------------------------------------
  1 | #' Lasso Initial State
  2 | #' 
  3 | #' Generate initial Markov chain state with Lasso.
  4 | #' 
  5 | #' @param X Design matrix of traning data; 
  6 | #' rows should be for the cases, and columns for different features.
  7 | #' 
  8 | #' @param y Vector of class labels in training or test data set. 
  9 | #' Must be coded as non-negative integers, e.g., 1,2,\ldots,C for C classes.
 10 | #' 
 11 | #' @param lambda A user supplied lambda sequence for \code{glmnet} cross-validation.
 12 | #' \code{NULL} by default, and it will be generated by \code{glmnet}.
 13 | #' 
 14 | #' @param alpha The elasticnet mixing parameter for \code{glmnet}.
 15 | #' 
 16 | #' @return A matrix - the initial state of Markov Chain for HTLR model fitting.
 17 | #' 
 18 | #' @references Jerome Friedman, Trevor Hastie, Robert Tibshirani (2010).
 19 | #' Regularization Paths for Generalized Linear Models via Coordinate
 20 | #' Descent. \emph{Journal of Statistical Software}, 33(1), 1-22.  
 21 | #'
 22 | #' @importFrom glmnet glmnet cv.glmnet 
 23 | #'
 24 | #' @export
 25 | #' 
 26 | #' @keywords internal
 27 | #' 
 28 | #' @seealso \code{\link{bcbcsf_deltas}}
 29 | lasso_deltas <- function(X, y, lambda = NULL, verbose = FALSE, alpha = 1, rank_fn = order_plain, k = ncol(X))
 30 | {
 31 |   #try_require("glmnet")
 32 | 
 33 |   if (k < ncol(X))
 34 |   {
 35 |     fsel <- rank_fn(X, y)[1:k]
 36 |     X <- X[, fsel, drop = FALSE]
 37 |   }
 38 |   
 39 |   if (is.null(lambda)) # pre.legacy == TRUE
 40 |   {
 41 |     lasso.fit <- cv.glmnet(x = X, y = y, alpha = alpha, nlambda = 500, family = "multinomial")
 42 |   }
 43 |   else
 44 |   {
 45 |     lasso.fit <- cv.glmnet(x = X, y = y, alpha = alpha, lambda = lambda, 
 46 |                            family = "multinomial", nfolds = 5)  
 47 |   }
 48 |   
 49 |   if (verbose)
 50 |     message("The best lambda chosen by CV: ", lasso.fit$lambda.min ,"\n")
 51 |   
 52 |   betas <- coef(lasso.fit, s = "lambda.min")
 53 |   mbetas <- as.matrix(Reduce(cbind, betas))
 54 |   deltas <- mbetas[, -1, drop = FALSE] - mbetas[, 1]
 55 |   return (deltas)
 56 | }
 57 | 
 58 | # lasso_fitpred <- function (X_tr, y_tr, X_ts = NULL, rank_fn = rank_plain, k = ncol (X_tr))
 59 | # {
 60 | #   #try_require ("glmnet")
 61 | #   ## read information about data
 62 | #   n <- nrow (X_tr) ## numbers of obs
 63 | #   p <- ncol (X_tr)
 64 | #   ## find number of observations in each group
 65 | #   nos_g <- as.vector (tapply (rep(1, n), INDEX = y_tr, sum))
 66 | #   G <- length (nos_g)
 67 | #   if (any(nos_g < 2))
 68 | #     stop ("Less than 2 cases in some group")
 69 | #   
 70 | #   ## feature selection
 71 | #   if (k < p)
 72 | #   {
 73 | #     fsel <- rank_fn (X_tr, y_tr) [1:k]
 74 | #     X_tr <- X_tr [, fsel, drop = FALSE]
 75 | #     X_ts <- X_ts[, fsel, drop = FALSE]
 76 | #   }
 77 | #   
 78 | #   ## choosing the best lambda
 79 | #   cvfit <- cv.glmnet (
 80 | #     x = X_tr,
 81 | #     y = y_tr,
 82 | #     nlambda = 500,
 83 | #     family = "multinomial"
 84 | #   )
 85 | #   lambda <- cvfit$lambda[which.min(cvfit$cvm)]
 86 | #   cat ("The best lambda chosen by CV:", lambda, "\n")
 87 | #   
 88 | #   ## fit lasso with the best lambda
 89 | #   lassofit <- glmnet (
 90 | #     x = X_tr,
 91 | #     y = y_tr,
 92 | #     nlambda = 500,
 93 | #     family = "multinomial"
 94 | #   )
 95 | #   
 96 | #   betas <- coef (lassofit, s = lambda)
 97 | #   
 98 | #   mbetas <- matrix (0, p + 1, G)
 99 | #   for (g in 1:G)
100 | #   {
101 | #     mbetas[, g] <- as.numeric (betas[[g]])
102 | #   }
103 | #   deltas <- mbetas[,-1, drop = FALSE] - mbetas[, 1]
104 | #   
105 | #   ## predicting for new cases
106 | #   if (is.null (X_ts))
107 | #   {
108 | #     return (deltas)
109 | #   }
110 | #   else
111 | #   {
112 | #     probs_pred <- matrix(predict (
113 | #       lassofit,
114 | #       newx = X_ts,
115 | #       s = lambda,
116 | #       type = "response"
117 | #     )[, , 1],
118 | #     nrow = nrow (X_ts))
119 | #     return (list (
120 | #       probs_pred = probs_pred,
121 | #       values_pred = apply (probs_pred, 1, which.max),
122 | #       deltas = deltas
123 | #     ))
124 | #   }
125 | # }
126 | 
127 | 
128 | # lassocv_fsel_trpr <- function (y_tr, X_tr, X_ts, nos_fsel = ncol (X_tr), rankf = rank_k)
129 | # {
130 | #   no_ts <- nrow (X_ts)
131 | #   rankedf <- rankf (X = X_tr, y = y_tr)
132 | # 
133 | #   nfsel <- length (nos_fsel)
134 | #   NC <- length (unique (y_tr))
135 | #   array_probs_pred <- array (0, dim = c(no_ts, NC, nfsel))
136 | # 
137 | #   for (i in 1:nfsel)
138 | #   {
139 | #     fsel <- rankedf [1:nos_fsel[i]]
140 | #     array_probs_pred[,,i] <- trpr_lassocv (
141 | #       y_tr = y_tr, X_tr = X_tr[, fsel, drop = FALSE],
142 | #       X_ts = X_ts[, fsel, drop = FALSE])$probs_pred
143 | #   }
144 | # 
145 | #   list (nos_fsel = nos_fsel, array_probs_pred = array_probs_pred)
146 | # }
147 | # 
148 | # convert_ix <- function (n, nrow, ncol)
149 | # {
150 | #   m <- floor ((n - 1) %/% nrow)
151 | #   r <- n - 1 - m * nrow
152 | #   c (r + 1, m + 1)
153 | # }
154 | # 
155 | # trpr_rdacv <- function (X_tr, y_tr, X_ts, nos_fsel = ncol (X_tr), rankf = rank_plain)
156 | # {
157 | #   try_require("rda")
158 | #   
159 | #   topgenes <- rankf (X_tr, y_tr)
160 | #   x_tr <- t (X_tr)
161 | #   x_ts <- t (X_ts)
162 | # 
163 | #   n <- ncol (x_ts)
164 | #   C <- length (unique (y_tr))
165 | # 
166 | #   K <- length (nos_fsel)
167 | # 
168 | #   array_probs_pred <- array (0, dim = c(n, C, K))
169 | # 
170 | #   for (k in 1:K)
171 | #   {
172 | #     nofsel <- nos_fsel [k]
173 | #     fsel <- topgenes [1:nofsel]
174 | #     x_tr_sel <- x_tr [fsel,, drop = FALSE]
175 | #     x_ts_sel <- x_ts [fsel,, drop = FALSE]
176 | # 
177 | #     fit <- rda::rda (x = x_tr_sel, y = y_tr)
178 | #     fitcv <- rda::rda.cv (fit, x = x_tr_sel, y = y_tr)
179 | #     no_delta <- length (fitcv$delta)
180 | #     no_alpha <- length (fitcv$alpha)
181 | #     ixmin <- which.min(fitcv$cv.err)
182 | #     ixad <- convert_ix (ixmin, no_alpha, no_delta)
183 | #     opt_alpha <- fitcv$alpha[ixad[1]]
184 | #     opt_delta <- fitcv$delta[ixad[2]]
185 | # 
186 | #     array_probs_pred[,,k] <- rda::rda (x = x_tr_sel, y = y_tr, xnew = x_ts_sel,
187 | #     alpha = opt_alpha, delta = opt_delta )$posterior[1,1,,]
188 | # 
189 | #     array_probs_pred[,,k] <- exp (array_probs_pred[,,k])
190 | #     sumprobs <- apply (array_probs_pred[,,k], 1, sum)
191 | #     array_probs_pred[,,k] <- array_probs_pred[,,k] / sumprobs
192 | #   }
193 | # 
194 | #   array_probs_pred
195 | # 
196 | # }
197 | # 
198 | # 
199 | # rdacv_fsel_trpr <- function (X_tr, y_tr, X_ts, nos_fsel = ncol (X_tr),
200 | #     rankf = rank_plain)
201 | # {
202 | #   list (array_probs_pred = trpr_rdacv (X_tr = X_tr, y_tr = y_tr, X_ts = X_ts, nos_fsel = nos_fsel, rankf = rankf), nos_fsel = nos_fsel )
203 | # }
204 | 
205 | 


--------------------------------------------------------------------------------
/R/htlr.R:
--------------------------------------------------------------------------------
  1 | #' Fit a HTLR Model
  2 | #'
  3 | #' This function trains linear logistic regression models with HMC in restricted Gibbs sampling. 
  4 | #'
  5 | #' @param X Input matrix, of dimension nobs by nvars; each row is an observation vector.
  6 | #' 
  7 | #' @param y Vector of response variables. Must be coded as non-negative integers, 
  8 | #' e.g., 1,2,\ldots,C for C classes, label 0 is also allowed.
  9 | #' 
 10 | #' @param fsel Subsets of features selected before fitting, such as by univariate screening.
 11 | #' @param stdx Logical; if \code{TRUE}, the original feature values are standardized to have \code{mean = 0} 
 12 | #' and \code{sd = 1}.
 13 | #' 
 14 | #' @param iter A positive integer specifying the number of iterations (including warmup).
 15 | #' @param warmup A positive integer specifying the number of warmup (aka burnin). 
 16 | #' The number of warmup iterations should not be larger than iter and the default is \code{iter / 2}.
 17 | #' 
 18 | #' @param thin A positive integer specifying the period for saving samples.
 19 | #' 
 20 | #' @param leap The length of leapfrog trajectory in sampling phase.
 21 | #' @param leap.warm The length of leapfrog trajectory in burnin phase.
 22 | #' @param leap.stepsize The integrator step size used in the Hamiltonian simulation.
 23 | #' 
 24 | #' @param cut The coefficients smaller than this criteria will be fixed in each HMC updating step.
 25 | #' 
 26 | #' @param init The initial state of Markov Chain; it accepts three forms:
 27 | #' \itemize{ 
 28 | #' \item a previously fitted \code{fithtlr} object, 
 29 | #' \item  a user supplied initial coeficient matrix of (p+1)*K, where p is the number of features, K is the number of classes in y minus 1, 
 30 | #' \item a character string matches the following:  
 31 | #' \itemize{
 32 | #'   \item "lasso" - (Default) Use Lasso initial state with \code{lambda} chosen by 
 33 | #'   cross-validation. Users may specify their own candidate \code{lambda} values via 
 34 | #'   optional argument \code{lasso.lambda}. Further customized Lasso initial 
 35 | #'   states can be generated by \code{\link{lasso_deltas}}.    
 36 | #'   \item "bcbc" - Use initial state generated by package \code{BCBCSF} 
 37 | #'   (Bias-corrected Bayesian classification). Further customized BCBCSF initial 
 38 | #'   states can be generated by \code{\link{bcbcsf_deltas}}. WARNING: This type of 
 39 | #'   initial states can be used for continuous features such as gene expression profiles, 
 40 | #'   but it should not be used for categorical features such as SNP profiles.
 41 | #'   \item "random" - Use random initial values sampled from N(0, 1).     
 42 | #' }
 43 | #' 
 44 | #' }
 45 | #' 
 46 | #' @param prior The prior to be applied to the model. Either a list of hyperparameter settings 
 47 | #' returned by \code{\link{htlr_prior}} or a character string from "t" (student-t), "ghs" (horseshoe), 
 48 | #' and "neg" (normal-exponential-gamma).
 49 | #' 
 50 | #' @param df The degree freedom of t/ghs/neg prior for coefficients. Will be ignored if the 
 51 | #' configuration list from \code{\link{htlr_prior}} is passed to \code{prior}. 
 52 | #' 
 53 | #' @param verbose Logical; setting it to \code{TRUE} for tracking MCMC sampling iterations.  
 54 | #' 
 55 | #' @param rep.legacy Logical; if \code{TRUE}, the output produced in \code{HTLR} versions up to 
 56 | #' legacy-3.1-1 is reproduced. The speed will be typically slower than non-legacy mode on
 57 | #' multi-core machine. Default is \code{FALSE}.
 58 | #' 
 59 | #' @param keep.warmup.hist Warmup iterations are not recorded by default, set \code{TRUE} to enable it. 
 60 | #' 
 61 | #' @param ... Other optional parameters:
 62 | #' \itemize{
 63 | #'   \item rda.alpha - A user supplied alpha value for \code{\link{bcbcsf_deltas}}. Default: 0.2.
 64 | #'   \item lasso.lambda - A user supplied lambda sequence for \code{\link{lasso_deltas}}. 
 65 | #'   Default: \{.01, .02, \ldots, .05\}. Will be ignored if \code{rep.legacy} is set to \code{TRUE}.
 66 | #' } 
 67 | #' 
 68 | #' @return An object with S3 class \code{htlr.fit}.  
 69 | #' 
 70 | #' @references
 71 | #' Longhai Li and Weixin Yao (2018). Fully Bayesian Logistic Regression 
 72 | #' with Hyper-Lasso Priors for High-dimensional Feature Selection.
 73 | #' \emph{Journal of Statistical Computation and Simulation} 2018, 88:14, 2827-2851.
 74 | #' 
 75 | #' @export
 76 | #' 
 77 | #' @examples
 78 | #' set.seed(12345)
 79 | #' data("colon")
 80 | #' 
 81 | #' ## fit HTLR models with selected features, note that the chain length setting is for demo only
 82 | #'
 83 | #' ## using t prior with 1 df and log-scale fixed to -10 
 84 | #' fit.t <- htlr(X = colon$X, y = colon$y, fsel = 1:100,
 85 | #'               prior = htlr_prior("t", df = 1, logw = -10), 
 86 | #'               init = "bcbc", iter = 20, thin = 1)
 87 | #'
 88 | #' ## using NEG prior with 1 df and log-scale fixed to -10 
 89 | #' fit.neg <- htlr(X = colon$X, y = colon$y, fsel = 1:100,
 90 | #'                 prior = htlr_prior("neg", df = 1, logw = -10), 
 91 | #'                 init = "bcbc", iter = 20, thin = 1)
 92 | #'
 93 | #' ## using horseshoe prior with 1 df and auto-selected log-scale   
 94 | #' fit.ghs <- htlr(X = colon$X, y = colon$y, fsel = 1:100,
 95 | #'                 prior = "ghs", df = 1, init = "bcbc",
 96 | #'                 iter = 20, thin = 1)
 97 | #' 
 98 | htlr <-
 99 |   function (X, y,
100 |             fsel = 1:ncol(X),
101 |             stdx = TRUE,
102 |             prior = "t",
103 |             df = 1,
104 |             iter = 2000,
105 |             warmup = floor(iter/2), 
106 |             thin = 1,
107 |             init = "lasso",
108 |             leap = 50,
109 |             leap.warm = floor(leap/10),
110 |             leap.stepsize = 0.3,
111 |             cut = 0.05,
112 |             verbose = FALSE,
113 |             rep.legacy = FALSE,
114 |             keep.warmup.hist = FALSE,
115 |             ...
116 |   )
117 | {
118 |   stopifnot(iter > warmup, warmup > 0, thin > 0, leap > 0, leap.warm > 0)  
119 |   
120 |   if (exists("rda.alpha")) {
121 |     if (init != "bcbc")
122 |       warning("not using 'BCBCSF' init, input 'rda.alpha' will be ignored")
123 |   } else {
124 |     rda.alpha <- 0.2
125 |   }
126 |   
127 |   if (exists("lasso.lambda")) {
128 |     if (init != "lasso")
129 |       warning("not using 'LASSO' init, input 'lasso.lambda' will be ignored")
130 |     else if (rep.legacy)
131 |       warning("rep.legacy == TRUE, input 'lasso.lambda' will be ignored")
132 |   } else {
133 |     lasso.lambda <- seq(.05, .01, by = -.01)
134 |   }
135 |     
136 |   if (is.character(prior))
137 |     prior <- htlr_prior(prior, df)
138 |   
139 |   # htlr_fit() will take care of input checking
140 |   htlr_fit(X_tr = X, y_tr = y, fsel = fsel, stdzx = stdx, 
141 |            ptype = prior$ptype, alpha = prior$alpha, s = prior$logw, 
142 |            #eta = prior$eta, 
143 |            sigmab0 = prior$sigmab0, 
144 |            iters_h = warmup, iters_rmc = (iter - warmup), thin = thin, 
145 |            leap_L = leap, leap_L_h = leap.warm, leap_step = leap.stepsize, 
146 |            hmc_sgmcut = cut, initial_state = init, 
147 |            silence = !verbose, rep.legacy = rep.legacy, keep.warmup.hist = keep.warmup.hist)
148 | 
149 |   }
150 | 


--------------------------------------------------------------------------------
/R/util.r:
--------------------------------------------------------------------------------
  1 | #' Generate Prior Configuration
  2 | #' 
  3 | #' Configure prior hyper-parameters for HTLR model fitting
  4 | #' 
  5 | #' The output is a configuration list which is to be passed to \code{prior} argument of \code{htlr}.     
  6 | #' For naive users, you only need to specify the prior type and degree freedom, then the other hyper-parameters
  7 | #' will be chosen automatically. For advanced users, you can supply each prior hyper-parameters by yourself.
  8 | #' For suggestion of picking hyper-parameters, see \code{references}.  
  9 | #'
 10 | #' @param ptype The prior to be applied to the model. Either "t" (student-t, default), "ghs" (horseshoe), 
 11 | #' or "neg" (normal-exponential-gamma).
 12 | #' @param df The degree freedom (aka alpha) of t/ghs/neg prior for coefficients.
 13 | #' @param logw The log scale of priors for coefficients.
 14 | #' @param eta The \code{sd} of the normal prior for logw. When it is set to 0, logw is fixed. 
 15 | #' Otherwise, logw is assigned with a normal prior and it will be updated during sampling.
 16 | #' @param sigmab0 The \code{sd} of the normal prior for the intercept.
 17 | #' 
 18 | #' @return A configuration list containing \code{ptype}, \code{alpha}, \code{logw}, \code{eta}, and \code{sigmab0}.    
 19 | #' 
 20 | #' @references
 21 | #' Longhai Li and Weixin Yao. (2018). Fully Bayesian Logistic Regression 
 22 | #' with Hyper-Lasso Priors for High-dimensional Feature Selection.
 23 | #' \emph{Journal of Statistical Computation and Simulation} 2018, 88:14, 2827-2851.
 24 | #' 
 25 | #' @export
 26 | #' 
 27 | htlr_prior <- function(ptype = c("t", "ghs", "neg"), 
 28 |                        df = 1,
 29 |                        logw = -(1 / df) * 10,
 30 |                        eta = ifelse(df > 1, 3, 0),
 31 |                        sigmab0 = 2000)
 32 | {
 33 |   ptype <- match.arg(ptype)
 34 |   if (ptype != "t" & eta != 0)
 35 |     warning("random logw currently only supports t prior")
 36 |   list(
 37 |     "ptype" = ptype,
 38 |     "alpha" = df,
 39 |     "logw" = logw,
 40 |     "eta" = eta,
 41 |     "sigmab0" = sigmab0
 42 |   )
 43 | }
 44 | 
 45 | #' Split Data into Train and Test Partitions
 46 | #' 
 47 | #' This function splits the input data and response variables into training and testing parts.
 48 | #' 
 49 | #' @param X Input matrix, of dimension \code{nobs} by \code{nvars}; each row is an observation vector.
 50 | #' 
 51 | #' @param y Vector of response variables.
 52 | #' 
 53 | #' @param p.train Percentage of training set.
 54 | #' 
 55 | #' @param n.train Number of cases for training; will override \code{p.train} if specified.
 56 | #' 
 57 | #' @return List of training data \code{x.tr}, \code{y.tr} and testing data \code{x.te}, \code{y.te}.
 58 | #' 
 59 | #' @export
 60 | #' 
 61 | #' @examples 
 62 | #' dat <- gendata_MLR(n = 100, p = 10)
 63 | #' dat <- split_data(dat$X, dat$y, p.train = 0.7)
 64 | #' dim(dat$x.tr)
 65 | #' dim(dat$x.te)
 66 | #'    
 67 | split_data <- function(X,
 68 |                        y,
 69 |                        p.train = 0.7,
 70 |                        n.train = round(nrow(X) * p.train))
 71 | {
 72 |   stopifnot(nrow(X) == length(y), p.train > 0, p.train < 1)
 73 |   
 74 |   tr.row <- sample(1L:nrow(X), n.train, replace = FALSE)
 75 |   
 76 |   x.tr <- X[tr.row, , drop = FALSE]
 77 |   y.tr <- y[tr.row]
 78 |   x.te <- X[-tr.row, , drop = FALSE]
 79 |   y.te <- y[-tr.row]
 80 |   
 81 |   list(
 82 |     "x.tr" = x.tr, 
 83 |     "y.tr" = y.tr,
 84 |     "x.te" = x.te, 
 85 |     "y.te" = y.te
 86 |   )
 87 | }
 88 | 
 89 | # compute sd of betas
 90 | # @export
 91 | comp_sdb <- function (deltas, removeint = TRUE, normalize = FALSE)
 92 | {
 93 |     C <- ncol (deltas) + 1
 94 |     if (removeint)
 95 |     {
 96 |         deltas <- deltas[-1,,drop = F]
 97 |     }
 98 |     
 99 |     vardeltas <- comp_vardeltas (deltas)
100 |     sdb <- sqrt (vardeltas/C)
101 |     
102 |     if (normalize) sdb <- sdb / max(sdb)
103 |     
104 |     sdb
105 | }
106 | 
107 | #' @export
108 | nobs.htlr.fit <- function(object, ...)
109 | {
110 |   object$n
111 | }
112 | 
113 | #' @export
114 | print.htlr.fit <- function(x, ...)
115 | {
116 |   info.data <- sprintf("Data:\n
117 |   response:\t%d-class
118 |   observations:\t%d
119 |   predictors:\t%d (w/ intercept)
120 |   standardised:\t%s",
121 |   x$K + 1, x$n, x$p + 1, as.character(x$feature$stdx)) 
122 |   
123 |   info.model <- sprintf("Model:\n
124 |   prior dist:\t%s (df = %d, log(w) = %.1f)
125 |   init state:\t%s 
126 |   burn-in:\t%d
127 |   sample:\t%d (posterior sample size)",
128 |   x$prior$ptype, x$prior$alpha, x$prior$logw, 
129 |   x$mc.param$init, x$mc.param$iter.warm, x$mc.param$iter.rmc)
130 |   
131 |   info.est <- sprintf("Estimates:\n
132 |   model size:\t%d (w/ intercept)
133 |   coefficients: see help('summary.htlr.fit')",
134 |   length(nzero_idx(x)) + 1)
135 |   
136 |   cat("Fitted HTLR model", "\n\n", info.data, "\n\n", info.model, "\n\n", info.est)
137 | }
138 | 
139 | # try to install suggested packages when needed
140 | # @author: Michael W. Kearney
141 | # @source: https://github.com/ropensci/rtweet/blob/master/R/utils.R
142 | try_require <- function(pkg, f = NULL) {
143 |   if (is.null(f))
144 |     f <- "this action"
145 |   else
146 |     f <- paste0("`", f, "`")
147 |   
148 |   if (requireNamespace(pkg, quietly = TRUE)) 
149 |   {
150 |     library(pkg, character.only = TRUE)
151 |     return(invisible())
152 |   }
153 |   
154 |   stop(paste0("Package `", pkg, "` required for ", f , ".\n",
155 |               "Please install and try again."), call. = FALSE)
156 | }
157 | 
158 | # @param rstudio launch in rstudio viewer instead of web browser? 
159 | # @param ... passed to shiny::runApp
160 | # launch_shiny <- function(launch.browser = TRUE, ...) {
161 | #   try_require("shiny", f = "launch_shiny()")
162 | #   shiny::runApp(system.file("app", package = "HTLR"), 
163 | #                 launch.browser = launch.browser, ...)
164 | # }
165 | 
166 | #' Pipe operator
167 | #'
168 | #' See \code{magrittr::\link[magrittr]{\%>\%}} for details.
169 | #'
170 | #' @name %>%
171 | #' @rdname pipe
172 | #' @keywords internal
173 | #' @export
174 | #' @importFrom magrittr %>%
175 | #' @usage lhs \%>\% rhs
176 | NULL
177 | 
178 | #Plots feature importance scores
179 | # 
180 | #This function plots feature importance scores or coefficients using histogram line. 
181 | # 
182 | #param fscores Scores measuring feature importance, such as \code{wsdbs}, \code{msdbs}, or coefficients values.
183 | # 
184 | #export
185 | # 
186 | #seealso htlr_fss
187 | # plot_fscore <- function (fscores, fsel=1:length (fscores), show_ix = 0.1, do.plot = TRUE, ...)
188 | # {
189 | # 	if (show_ix > 1) stop ("show_ix must be less than 1")
190 | #     afscores <- abs (fscores)
191 | #     mfscore <- max (afscores)
192 | #     
193 | #     plotargs <- list (...)
194 | #     
195 | #     p <- length (fscores) 
196 | # 
197 | #     if (is.null (plotargs$log))  plotargs$log <- ""
198 | #     if (is.null (plotargs$type)) plotargs$type <- "h"
199 | #     
200 | #     if (is.null (plotargs$ylab) ) plotargs$ylab = "Feature Score"
201 | #     if (is.null(plotargs$xlab))plotargs$xlab <-"Feature Index" 
202 | #     if (is.null (plotargs$cex.axis))  plotargs$cex.axis <- 0.8
203 | #     
204 | #     # plot fscores   
205 | #     if (do.plot) {
206 | # 	    do.call (plot, c(list (x = fscores), plotargs) )    
207 | #     		# show shresholds 0.1 and 0.01
208 | #     		abline (h = mfscore * c(-0.01,0.01), lty = 2, col = "grey")
209 | #     		abline (h = mfscore * c(-0.1,0.1), lty = 1, col = "grey")    
210 | #     }
211 | #     
212 | #     # showtops       
213 | #     itops <- which (afscores >= show_ix * mfscore)    
214 | # 	if (do.plot)
215 | # 	 text (itops, fscores [itops], fsel[itops], col = "red", srt = 0, adj = - 0.2, cex = 0.7)
216 | #     
217 | #     a <- fsel[itops]
218 | #     
219 | # }
220 | 


--------------------------------------------------------------------------------
/R/mccoef.r:
--------------------------------------------------------------------------------
  1 | #' Create a Matrix of Markov Chain Samples
  2 | #' 
  3 | #' The Markov chain samples (without warmup) included in a \code{htlr.fit} object will be coerced to a matrix.
  4 | #' 
  5 | #' @param x An object of S3 class \code{htlr.fit}.
  6 | #' 
  7 | #' @param k Coefficients associated with class \code{k} will be drawn. Must be a positive integer in 
  8 | #' 1,2,\ldots,C-1 for C-class traning labels (base class 0 can not be chosen). By default the last class
  9 | #' is selected. For binary logistic model this argument can be ignored.
 10 | #' 
 11 | #' @param include.warmup Whether or not to include warmup samples 
 12 | #' 
 13 | #' @param ... Not used.
 14 | #' 
 15 | #' @return A matrix with \code{(p + 1)} columns and \code{i} rows, where \code{p} is the number of features 
 16 | #' excluding intercept, and \code{i} is the number of iterations after burnin.
 17 | #' 
 18 | #' @export
 19 | #' 
 20 | #' @examples 
 21 | #' ## No. of features used: 100; No. of iterations after burnin: 15 
 22 | #' fit <- htlr(X = colon$X, y = colon$y, fsel = 1:100, iter = 20, warmup = 5)
 23 | #' 
 24 | #' dim(as.matrix(fit))
 25 | #'   
 26 | as.matrix.htlr.fit <- function(x, k = NULL, include.warmup = FALSE, ...)
 27 | {
 28 |   if (is.null(k))
 29 |   {
 30 |     k <- x$K
 31 |     if (k > 1)
 32 |       message(
 33 |         "'k' was not specified, coefficients associated with the last class will be drawn"
 34 |       )
 35 |   }
 36 |   if (include.warmup)
 37 |     mcdeltas <- t(x$mcdeltas[ , k, -1])
 38 |   else
 39 |     mcdeltas <- t(x$mcdeltas[ , k, get_sample_indice(dim(x$mcdeltas)[3], x$mc.param$iter.rmc)]) 
 40 |   colnames(mcdeltas) <- colnames(x$feature$X)
 41 |   mcdeltas
 42 | }
 43 | 
 44 | #' Posterior Summaries  
 45 | #' 
 46 | #' This function gives a summary of posterior of parameters.
 47 | #' 
 48 | #' @param object An object of S3 class \code{htlr.fit}.
 49 | #' 
 50 | #' @param usedmc Indices of Markov chain iterations used for inference. By default all iterations are used.
 51 | #' 
 52 | #' @param method A function that is used to aggregate the MCMC samples. The default is \code{median}, 
 53 | #' other built-in/customized statistical functions such as \code{mean}, \code{sd}, and \code{mad}
 54 | #' can also be used. 
 55 | #' 
 56 | #' @param features A vector of indices (int) or names (char) that specify the parameters we will look at.
 57 | #' By default all parameters are selected. 
 58 | #' 
 59 | #' @param ... Not used.  
 60 | #' 
 61 | #' @return A point summary of MCMC samples. 
 62 | #' 
 63 | #' @importFrom magrittr extract2
 64 | #' 
 65 | #' @export
 66 | #' 
 67 | #' @examples 
 68 | #' set.seed(12345)
 69 | #' data("colon")
 70 | #' 
 71 | #' fit <- htlr(X = colon$X, y = colon$y, fsel = 1:100, iter = 20)
 72 | #' summary(fit, features = 1:16)
 73 | #'   
 74 | summary.htlr.fit <-
 75 |   function (object,
 76 |             features = 1L:object$p,
 77 |             method = median,
 78 |             usedmc = get_sample_indice(dim(object$mcdeltas)[3], object$mc.param$iter.rmc),
 79 |             ...)
 80 | {
 81 |   ix.f <- object$feature$fsel[features] %>% na.omit()
 82 |   
 83 |   mddeltas <- object$mcdeltas[c(1, ix.f + 1), , usedmc, drop = FALSE] %>%
 84 |     apply(MARGIN = c(1, 2), FUN = method)
 85 |   
 86 |   attr(mddeltas, "stats") <- 
 87 |     match.call() %>% 
 88 |     as.list() %>% 
 89 |     extract2("method") %>%
 90 |     as.character()
 91 |   if (is.null(colnames(mddeltas)))
 92 |     attr(mddeltas, "stats") <- "median"
 93 |   rownames(mddeltas) <- c("Intercept", names(ix.f))
 94 |   colnames(mddeltas) <- paste("class", levels(factor(object$feature$y)))[-1]
 95 |   
 96 |   return(mddeltas)
 97 | }
 98 | 
 99 | # @param p.burn.extra Percentage of iterations to be burned out after known warmup iterations
100 | # @param n.burn.extra Number of iterations to be burned out after known warmup iterations,
101 | # will overwrite p.burn.extra if specified
102 | # @param ignore.first First entry should be ignored because it is supposed to be the initial state,
103 | # in rare cases you may want it be FALSE
104 | get_sample_indice <- function(n.sample,
105 |                               iter.rmc,
106 |                               thin = 1,
107 |                               p.burn.extra = 0,
108 |                               n.burn.extra = floor(iter.rmc * p.burn.extra),
109 |                               ignore.first = TRUE)
110 | {
111 |   stopifnot(p.burn.extra >= 0, p.burn.extra < 1)
112 |   n.warmup <- n.sample - iter.rmc
113 |   seq((n.warmup + n.burn.extra + ignore.first), n.sample, thin)
114 | }
115 | 
116 | # @export
117 | htlr_sdb <- function(fit,
118 |                      burn = NULL,
119 |                      usedmc = NULL,
120 |                      thin = 1)
121 | {
122 |   if (is.null(usedmc))
123 |   {
124 |     if (is.null(burn))
125 |       usedmc <- get_sample_indice(dim(fit$mcdeltas)[3], fit$mc.param$iter.rmc, p.burn.extra = 0.2, thin = thin)
126 |     else
127 |       usedmc <- get_sample_indice(dim(fit$mcdeltas)[3], fit$mc.param$iter.rmc, n.burn.extra = burn, thin = thin)
128 |   }
129 |   deltas <- summary(fit, usedmc = usedmc, method = mean)
130 |   comp_sdb(deltas, removeint = TRUE, normalize = FALSE)
131 | }
132 | 
133 | #' Get Indices of Non-Zero Coefficients
134 | #' 
135 | #' Get the indices of non-zero coefficients from fitted HTLR model objects.
136 | #' 
137 | #' @param fit An object of S3 class \code{htlr.fit}.
138 | #' 
139 | #' @param cut Threshold on relative SDB to distinguish zero coefficients. 
140 | #' 
141 | #' @return Indices vector of non-zero coefficients in the model.
142 | #' 
143 | #' @export
144 | #' 
145 | #' @examples 
146 | #' set.seed(12345)
147 | #' data("colon")
148 | #' 
149 | #' fit <- htlr(X = colon$X, y = colon$y, fsel = 1:100, iter = 20)
150 | #' nzero_idx(fit)
151 | #' 
152 | nzero_idx <- function(fit, cut = 0.1)
153 | {
154 |   sdb <- htlr_sdb(fit)
155 |   which(sdb > cut * max(sdb))
156 | }
157 | 
158 | # Plots Markov chain trace or scatterplot
159 | # 
160 | # This function plots Markov chain samples of 1 or 2 features. In plotting for 2 features, 
161 | # gray lines show Markov chain transitions.
162 | # 
163 | # @param fithtlr A list containing fitting results by \code{\link{htlr_fit}}.
164 | # @param features A vector of 1 or 2 numbers representing 1 or 2 features one wishes to look.
165 | # @param class Coefficients associated with \code{class} will be drawn. Must be a positive integer in 
166 | # 1,2,\ldots,C for C-class traning labels.
167 | # @param usedmc Indices of Markov chain iterations used in plottings; one can set it to the 
168 | # indices of Markov chain iterations belonging to the ith feature subset, \code{mcids[[i]]}, 
169 | # found by \code{\link{htlr_fss}}.
170 | # 
171 | # @return A vector of Markov chain sample of 1 coefficient, 
172 | # or an array of Markov chain samples of 2 coefficients.
173 | # 
174 | # @export
175 | # 
176 | htlr_mccoef <-  function (fithtlr,
177 |                           features = 1,
178 |                           class = 2,
179 |                           usedmc = get_sample_indice(dim(fithtlr$mcdeltas)[3], fithtlr$mc.param$iter.rmc),
180 |                           symlim = FALSE,
181 |                           drawq = c(0, 1),
182 |                           truedeltas = NULL
183 | )
184 | {
185 |   mcdims <- dim (fithtlr$mcdeltas)
186 |   p <- mcdims [1] - 1
187 |   K <- mcdims [2]
188 |   features <- features [!is.na (features)]
189 |   
190 |   if (length (features) == 1)
191 |   {
192 |     if (features == 0) j <- 1
193 |     else  j <- which(fithtlr$feature$fsel == features) + 1
194 |     k <- class - 1
195 |     deltas <- fithtlr$mcdeltas[j, k,usedmc, drop = FALSE]
196 |     
197 |     plot (deltas, pch = 20, 
198 |           xlab = "Markov Chain Index (after burning and thinning)", 
199 |           ylab = sprintf ("Coef. Value of Feature %d",  features),
200 |           main = sprintf("MC Coefficients for Feature %d (Class %d)",
201 |                          features, class)
202 |     )
203 |     qdeltas <- quantile (deltas, probs = c(0.025,0.5,0.975))
204 |     abline (h = qdeltas[2], lwd = 2)
205 |     abline (h = qdeltas[1], lty = 2, lwd = 2)
206 |     abline (h = qdeltas[3], lty = 2, lwd = 2)
207 |     
208 |     if (!is.null (truedeltas))
209 |     {   
210 |       abline (h = truedeltas [j,k], lwd = 2, col = "red")
211 |     }
212 |   }
213 |   else
214 |   {
215 |     j <- 1:2
216 |     if (features[1] == 0) j[1] <- 1 else
217 |       j[1] <- which (fithtlr$feature$fsel == features[1]) + 1
218 |     if (features[2] == 0) j[2] <- 1 else        
219 |       j[2] <- which (fithtlr$feature$fsel == features[2]) + 1
220 |     k <- class - 1
221 |     deltas <- fithtlr$mcdeltas[j, k,usedmc, drop = TRUE]
222 |     
223 |     if (symlim)
224 |     {
225 |       lim <- quantile (deltas, probs = drawq)
226 |       xlim <- lim 
227 |       ylim <- lim
228 |     }
229 |     else
230 |     {
231 |       xlim <- quantile (deltas[1,], probs = drawq)
232 |       ylim <- quantile (deltas[2,], probs = drawq)
233 |     }
234 |     
235 |     plot (t(deltas), 
236 |           xlab = sprintf ("feature %d",  features[1]),
237 |           ylab = sprintf ("feature %d",  features[2]),
238 |           xlim = xlim,
239 |           ylim = ylim,
240 |           type = "l", col = "grey", 
241 |           main = sprintf("MC Coef. for Features %d and %d (Class %d)",
242 |                          features[1], features[2], class) )
243 |     points (t(deltas), pch = 20) 
244 |   }
245 |   out <- deltas
246 | }
247 | 


--------------------------------------------------------------------------------
/R/compred.r:
--------------------------------------------------------------------------------
  1 | #' Evaluate Prediction Results
  2 | #' 
  3 | #' This function compares the prediction results returned by a classifier with ground truth, 
  4 | #' and finally gives a summary of the evaluation.
  5 | #' 
  6 | #' @param y.pred A matrix of predicted probabilities, as returned by a classifier.
  7 | #' 
  8 | #' @param y.true Ground truth labels vector.
  9 | #' 
 10 | #' @param caseid The names of test cases which we take account of. By default all test cases are used for evaluation.
 11 | #' 
 12 | #' @param showplot Logical; if \code{TRUE}, a summary plot will be generated.
 13 | #' 
 14 | #' @param ... Not used.    
 15 | #'          
 16 | #' @return A summary of evaluation result.
 17 | #' 
 18 | #' @export
 19 | #' 
 20 | evaluate_pred <- function(y.pred, y.true, caseid = names(y.true), showplot = TRUE, ...)
 21 | {
 22 |   if (is.null(caseid))
 23 |     caseid <- 1:length(y.true)
 24 |   
 25 |   C <- ncol(y.pred)
 26 |   values_pred <- apply(y.pred, 1, which.max)
 27 |   
 28 |   if (min(y.true) == 0) y.true <- y.true + 1
 29 |   
 30 |   table_eval <- data.frame(caseid, y.true, y.pred, 1 * (values_pred != y.true))
 31 |   colnames(table_eval) <- c("Case ID", "True Label", paste ("Pred. Prob", 1:C), "Wrong?")
 32 |   
 33 |   eval_tab_pred(table_eval, showplot = showplot)
 34 | }
 35 | 
 36 | probs_attrue_bplr <- function (probs_pred, y)
 37 | {
 38 |   tp <- rep(0, nrow(probs_pred))
 39 |   for(i in 1:nrow(probs_pred)) 
 40 |     tp[i] <- probs_pred[i, y[i]]
 41 | 
 42 |   tp
 43 | }
 44 | 
 45 | #' @importFrom utils read.table
 46 | #' @import grDevices graphics
 47 | eval_tab_pred <- function (table_eval, showplot = TRUE, method = "Prediction", ...)
 48 | {
 49 |   if (is.character (table_eval)) 
 50 |   {
 51 |     table_eval <- as.matrix (read.table (table_eval))
 52 |   }
 53 |   
 54 |   C <- ncol (table_eval) - 3
 55 |   colnames (table_eval) <- c("Case ID", "True Label", paste ("Pred. Prob", 1:C), "Wrong?")
 56 |   
 57 |   probs_pred <- table_eval [, 2+(1:C)]
 58 |   y <- table_eval[,2]
 59 |   probs_at_truelabels <- probs_attrue_bplr (probs_pred, y)
 60 |   which.wrong <- which (table_eval[,C+3] == 1)
 61 |   n <- nrow (table_eval)
 62 |   
 63 |   amlp <- - mean (log (probs_at_truelabels))
 64 |   no_errors <- sum (table_eval[, C+3])
 65 |   er <- no_errors/n
 66 |   
 67 |   yl <- y; if (C == 2) yl[y==2] <- 3
 68 |   
 69 |   plotargs <- list (...)
 70 |   if (is.null (plotargs$ylab)) 
 71 |     plotargs$ylab <- "Predictive Prob at True Label"
 72 |   if (is.null (plotargs$xlab)) plotargs$xlab <- "Case Index"
 73 |   if (is.null (plotargs$ylim)) plotargs$ylim <- c(0,1)
 74 |   if (is.null (plotargs$pch)) plotargs$pch <- yl   
 75 |   if (is.null (plotargs$col)) plotargs$col <- 1+table_eval[, C+3]   
 76 |   if (showplot)
 77 |   {
 78 |     plotargs$x <- probs_at_truelabels
 79 |     do.call (plot, plotargs)   
 80 |     
 81 |     if (C == 2) abline (h = 0.5)
 82 |     abline (h = 0.1,lty = 2)
 83 |     
 84 |     title (main = sprintf ("%s: AMLP = %5.3f, Error Rate = %4.2f%% (%d/%d)", 
 85 |                            method,amlp, er*100, no_errors, n), 
 86 |            cex = 0.8, line = 0.5) 
 87 |     
 88 |     # if (no_errors > 0) {   
 89 |     #   text (which.wrong, probs_at_truelabels[which.wrong], labels = which.wrong,
 90 |     #         srt = 90, adj = - 0.4, cex = 0.9, col = "red")
 91 |     # }
 92 |   }
 93 |   list ("prob_at_truelabels" = probs_at_truelabels, 
 94 |         "table_eval" = table_eval,  
 95 |         "amlp" = amlp, 
 96 |         "err_rate" = er, 
 97 |         "which.wrong" = which.wrong)
 98 | }
 99 | 
100 | # plot_features <- function (X, features, predfile = NULL, colcases = NULL, ...)
101 | # {
102 | #     n <- nrow (X)
103 | # 
104 | # 	if (is.null (colcases)) {
105 | # 		if (!is.null (predfile))
106 | # 		{
107 | # 		    which.wrong <- eval_pred (predfile, showplot = FALSE)$which.wrong
108 | # 		    colcases <- rep (1, n)
109 | # 		    colcases [which.wrong] <- 2
110 | # 		}
111 | # 		else 
112 | # 		{
113 | # 		    which.wrong <- NULL
114 | # 		    colcases <- rep (1,n)
115 | # 		      
116 | # 		}
117 | #     }
118 | #     if (length (features) == 2)
119 | #     {
120 | #         plot (X[,features], col = colcases, xlab = "", ylab = "", ...)
121 | #     
122 | #         title (xlab = sprintf ("Expression Level of Gene %d", features[1])) 
123 | #         title (ylab = sprintf ("Expression Level of Gene %d", features[2])) 
124 | #          
125 | #         title( main = sprintf("Scatterplot of Genes %d and %d", 
126 | #                               features[1],features[2]), line = 0.5)
127 | #         for (i in which.wrong)
128 | #         {
129 | #             text (X[i,features[1]], X[i, features[2]], i, 
130 | #             srt =90, adj = 1.5, col = "red",  cex = 0.8)
131 | #         }
132 | #     }
133 | #     
134 | #     if (length (features) == 1)
135 | #     {
136 | #         plot (X[,features],col = colcases, xlab = "", ylab = "", ...)
137 | # 
138 | #         title (xlab = "Case Index", line = 2)
139 | #         title (ylab = sprintf ("Expression Level of Gene %d", features))
140 | #         
141 | #         for (i in which.wrong)
142 | #         {
143 | #             text (i,X[i,features],i,srt=90,adj = -0.4,col="red",cex = 0.8)
144 | #         }
145 | #     }
146 | #     
147 | #     if (length (features) == 3)
148 | #     {
149 | #       scatterplot3d (X[,features[1]], X[,features[2]], X[,features[3]],
150 | #       xlab = paste ("Gene", features[1]), 
151 | #       ylab = paste ("Gene", features[2]), 
152 | #       zlab = paste ("Gene", features[3]), 
153 | #       main = sprintf("3D Scatterplot of Genes %d, %d and %d", 
154 | #                      features [1], features [2], features [3] ),
155 | #       color = colcases,...)
156 | #     }
157 | #     
158 | # }
159 | 
160 | #import grDevices
161 | # compare2 <- function (a1, a2, m1, m2, item, filename = item, sign = FALSE, ...)
162 | # {
163 | #     psfile <- sprintf ("%s.ps", filename)
164 | # 
165 | #     postscript (file = psfile, title = psfile,
166 | #                 paper = "special", width = 5, height = 5, horizontal = F)
167 | #     par (mar = c(4,4,2,0.5))
168 | #     if (!sign)  xlim <- range (a1, a2)
169 | #     else {
170 | #         xlim <- max (abs(range (a1, a2, na.rm = TRUE, finite = TRUE)))
171 | #         xlim <- c(-xlim, +xlim)
172 | #     }
173 | #     plot (a1, a2,  xlim = xlim, ylim = xlim,
174 | #           main = sprintf("%s by %s and %s", item, m1, m2),
175 | #           xlab = m1, ylab = m2, ...)
176 | #     abline (a = 0, b = 1, col = "grey")
177 | #     if (sign) abline (a = 0, b = -1, col = "grey")
178 | #     dev.off()
179 | # }
180 | 
181 | 
182 | comp_amlp <- function(probs_pred, y)
183 | {
184 |     mlp <- rep(0, nrow(probs_pred))
185 |     for(i in 1:nrow(probs_pred))
186 |         mlp[i] <- log(probs_pred[i,y[i]])
187 | 
188 |     - mean(mlp)
189 | }
190 | 
191 | ## Mloss -- a matrix specifying losses, with row for true values, and
192 | ## column for predicted values.
193 | comp_loss <- function(probs_pred, y, Mloss = NULL)
194 | {
195 |      G <- ncol (probs_pred)
196 | 
197 |      if (is.null (Mloss))
198 |      {
199 |         Mloss <- matrix(1,G,G)
200 |         diag(Mloss) <- 0
201 |      }
202 | 
203 |      loss_pred <- probs_pred %*% Mloss
204 |      y_pred <- apply(loss_pred,1,which.min)
205 | 
206 |      loss <- 0
207 |      for(i in 1:nrow(probs_pred)) {
208 |          loss <- loss + Mloss[y[i],y_pred[i]]
209 |      }
210 | 
211 |      loss / length (y)
212 | }
213 | 
214 | ### old not used any more functions
215 | #if (FALSE)
216 | #{
217 | 
218 | ### partition all cases into nfold subsets
219 | ### This function partitions a set of observations into subsets of almost
220 | ### equal size. The result is used in crossvalidation
221 | #mk_folds <- function(y, nfold = 10, random = TRUE)
222 | #{
223 | #    n <- length (y)
224 | #    nos_g <- table (y)
225 | #    G <- length (nos_g)
226 | #    nfold <- min (nfold, nos_g)
227 | 
228 | #    folds <- rep (0, n)
229 | #    
230 | #    for (g in 1:G)
231 | #    {
232 | #        ng <- nos_g [g]
233 | #        m <- ceiling (ng/nfold)
234 | 
235 | #        if (random)
236 | #        {
237 | #            gfolds <- c( replicate (m, sample (1:nfold) ) ) [1:ng]
238 | #        }
239 | #        else
240 | #        {
241 | #            gfolds <- rep (1:nfold, m)[1:ng]
242 | #        }
243 | #        
244 | #        folds [y == g] <- gfolds
245 | #    }
246 | #    
247 | #    ## create fold list 
248 | #    foldlist <- rep (list (""),nfold)
249 | #    for (i in 1:nfold)
250 | #    {
251 | #        foldlist [[i]] <- which (folds == i)
252 | #    }
253 | #    
254 | #    foldlist
255 | # }
256 | 
257 | ##################### a generic crossvalidation function ####################
258 | ### X --- features with rows for cases
259 | ### y --- a vector of response values
260 | ### nfold --- number of folds in cross validation
261 | ###  trpr_fn --- function for training and prediction:
262 | ###  the arguments of trpr_fn must include X_tr, y_tr, X_ts
263 | ###  the outputs of trpr_fn must include probs_pred
264 | ### ... --- other arguments needed by trpr_fn other than X_tr, y_tr, X_ts
265 | #cross_vld <- function (
266 | #     trpr_fn, folds = NULL, nfold = 10, X, y,randomcv = TRUE, ...)
267 | #{
268 | #  if (!is.matrix(X)) stop ("'X' must be a matrix with rows for cases")
269 | 
270 | #  n <- nrow(X)
271 | #  nos_g <- as.vector (tapply (rep(1,n), INDEX = y, sum))
272 | #  if (any(nos_g < 2)) stop ("less than 2 cases in some group in your data")
273 | #  G <- length (nos_g)
274 | 
275 | 
276 | #  if (is.null (folds)) folds <- mk_folds (y, nfold)
277 | #  
278 | #  nfold <- length (folds)
279 | 
280 | #  array_probs_pred <- NULL
281 | #  vector_ts <- NULL
282 | 
283 | #  for (i_fold in 1:nfold)
284 | #  {
285 | #      cat ( "=============== CV: Fold",i_fold, "===============\n")
286 | #      
287 | #      ts <- folds [[i_fold]]
288 | #      vector_ts <- c (vector_ts, ts)
289 | #      tr <- (1:n)[- (ts)]
290 | 
291 | #      array_probs_pred <- abind ( array_probs_pred,
292 | #        trpr_fn (
293 | #            X_tr = X[tr,, drop = FALSE], y_tr = y[tr],
294 | #            X_ts = X[ts,, drop = FALSE], ...)$array_probs_pred,
295 | #        along = 1)
296 | #  }
297 | 
298 | #  ## make the order of cases in array_probs_pred is the same as X
299 | #  array_probs_pred <- array_probs_pred [order (vector_ts),,,drop = FALSE]
300 | 
301 | #  list (folds = folds, array_probs_pred = array_probs_pred)
302 | #}
303 | 
304 | #}
305 | 
306 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
  1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
  2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
  3 | 
  4 | #include <RcppArmadillo.h>
  5 | #include <Rcpp.h>
  6 | 
  7 | using namespace Rcpp;
  8 | 
  9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
 10 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
 11 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 12 | #endif
 13 | 
 14 | // sample_trunc_norm
 15 | Rcpp::NumericVector sample_trunc_norm(const int n, const double lb, const double ub, const bool verbose);
 16 | RcppExport SEXP _HTLR_sample_trunc_norm(SEXP nSEXP, SEXP lbSEXP, SEXP ubSEXP, SEXP verboseSEXP) {
 17 | BEGIN_RCPP
 18 |     Rcpp::RObject rcpp_result_gen;
 19 |     Rcpp::RNGScope rcpp_rngScope_gen;
 20 |     Rcpp::traits::input_parameter< const int >::type n(nSEXP);
 21 |     Rcpp::traits::input_parameter< const double >::type lb(lbSEXP);
 22 |     Rcpp::traits::input_parameter< const double >::type ub(ubSEXP);
 23 |     Rcpp::traits::input_parameter< const bool >::type verbose(verboseSEXP);
 24 |     rcpp_result_gen = Rcpp::wrap(sample_trunc_norm(n, lb, ub, verbose));
 25 |     return rcpp_result_gen;
 26 | END_RCPP
 27 | }
 28 | // sample_post_ichi
 29 | Rcpp::NumericVector sample_post_ichi(const int n, const Rcpp::NumericVector& sigmasq, const double alpha1, const double alpha0, const double w0, const bool verbose);
 30 | RcppExport SEXP _HTLR_sample_post_ichi(SEXP nSEXP, SEXP sigmasqSEXP, SEXP alpha1SEXP, SEXP alpha0SEXP, SEXP w0SEXP, SEXP verboseSEXP) {
 31 | BEGIN_RCPP
 32 |     Rcpp::RObject rcpp_result_gen;
 33 |     Rcpp::RNGScope rcpp_rngScope_gen;
 34 |     Rcpp::traits::input_parameter< const int >::type n(nSEXP);
 35 |     Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type sigmasq(sigmasqSEXP);
 36 |     Rcpp::traits::input_parameter< const double >::type alpha1(alpha1SEXP);
 37 |     Rcpp::traits::input_parameter< const double >::type alpha0(alpha0SEXP);
 38 |     Rcpp::traits::input_parameter< const double >::type w0(w0SEXP);
 39 |     Rcpp::traits::input_parameter< const bool >::type verbose(verboseSEXP);
 40 |     rcpp_result_gen = Rcpp::wrap(sample_post_ichi(n, sigmasq, alpha1, alpha0, w0, verbose));
 41 |     return rcpp_result_gen;
 42 | END_RCPP
 43 | }
 44 | // sample_trunc_beta
 45 | Rcpp::NumericVector sample_trunc_beta(const int n, const double alpha, const double beta, const double lb, const double ub, const bool verbose);
 46 | RcppExport SEXP _HTLR_sample_trunc_beta(SEXP nSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP lbSEXP, SEXP ubSEXP, SEXP verboseSEXP) {
 47 | BEGIN_RCPP
 48 |     Rcpp::RObject rcpp_result_gen;
 49 |     Rcpp::RNGScope rcpp_rngScope_gen;
 50 |     Rcpp::traits::input_parameter< const int >::type n(nSEXP);
 51 |     Rcpp::traits::input_parameter< const double >::type alpha(alphaSEXP);
 52 |     Rcpp::traits::input_parameter< const double >::type beta(betaSEXP);
 53 |     Rcpp::traits::input_parameter< const double >::type lb(lbSEXP);
 54 |     Rcpp::traits::input_parameter< const double >::type ub(ubSEXP);
 55 |     Rcpp::traits::input_parameter< const bool >::type verbose(verboseSEXP);
 56 |     rcpp_result_gen = Rcpp::wrap(sample_trunc_beta(n, alpha, beta, lb, ub, verbose));
 57 |     return rcpp_result_gen;
 58 | END_RCPP
 59 | }
 60 | // htlr_fit_helper
 61 | Rcpp::List htlr_fit_helper(int p, int K, int n, arma::mat& X, arma::mat& ymat, arma::uvec& ybase, std::string ptype, double alpha, double s, double eta, int iters_rmc, int iters_h, int thin, int leap_L, int leap_L_h, double leap_step, double hmc_sgmcut, arma::mat& deltas, arma::vec& sigmasbt, bool keep_warmup_hist, int silence, bool legacy);
 62 | RcppExport SEXP _HTLR_htlr_fit_helper(SEXP pSEXP, SEXP KSEXP, SEXP nSEXP, SEXP XSEXP, SEXP ymatSEXP, SEXP ybaseSEXP, SEXP ptypeSEXP, SEXP alphaSEXP, SEXP sSEXP, SEXP etaSEXP, SEXP iters_rmcSEXP, SEXP iters_hSEXP, SEXP thinSEXP, SEXP leap_LSEXP, SEXP leap_L_hSEXP, SEXP leap_stepSEXP, SEXP hmc_sgmcutSEXP, SEXP deltasSEXP, SEXP sigmasbtSEXP, SEXP keep_warmup_histSEXP, SEXP silenceSEXP, SEXP legacySEXP) {
 63 | BEGIN_RCPP
 64 |     Rcpp::RObject rcpp_result_gen;
 65 |     Rcpp::RNGScope rcpp_rngScope_gen;
 66 |     Rcpp::traits::input_parameter< int >::type p(pSEXP);
 67 |     Rcpp::traits::input_parameter< int >::type K(KSEXP);
 68 |     Rcpp::traits::input_parameter< int >::type n(nSEXP);
 69 |     Rcpp::traits::input_parameter< arma::mat& >::type X(XSEXP);
 70 |     Rcpp::traits::input_parameter< arma::mat& >::type ymat(ymatSEXP);
 71 |     Rcpp::traits::input_parameter< arma::uvec& >::type ybase(ybaseSEXP);
 72 |     Rcpp::traits::input_parameter< std::string >::type ptype(ptypeSEXP);
 73 |     Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP);
 74 |     Rcpp::traits::input_parameter< double >::type s(sSEXP);
 75 |     Rcpp::traits::input_parameter< double >::type eta(etaSEXP);
 76 |     Rcpp::traits::input_parameter< int >::type iters_rmc(iters_rmcSEXP);
 77 |     Rcpp::traits::input_parameter< int >::type iters_h(iters_hSEXP);
 78 |     Rcpp::traits::input_parameter< int >::type thin(thinSEXP);
 79 |     Rcpp::traits::input_parameter< int >::type leap_L(leap_LSEXP);
 80 |     Rcpp::traits::input_parameter< int >::type leap_L_h(leap_L_hSEXP);
 81 |     Rcpp::traits::input_parameter< double >::type leap_step(leap_stepSEXP);
 82 |     Rcpp::traits::input_parameter< double >::type hmc_sgmcut(hmc_sgmcutSEXP);
 83 |     Rcpp::traits::input_parameter< arma::mat& >::type deltas(deltasSEXP);
 84 |     Rcpp::traits::input_parameter< arma::vec& >::type sigmasbt(sigmasbtSEXP);
 85 |     Rcpp::traits::input_parameter< bool >::type keep_warmup_hist(keep_warmup_histSEXP);
 86 |     Rcpp::traits::input_parameter< int >::type silence(silenceSEXP);
 87 |     Rcpp::traits::input_parameter< bool >::type legacy(legacySEXP);
 88 |     rcpp_result_gen = Rcpp::wrap(htlr_fit_helper(p, K, n, X, ymat, ybase, ptype, alpha, s, eta, iters_rmc, iters_h, thin, leap_L, leap_L_h, leap_step, hmc_sgmcut, deltas, sigmasbt, keep_warmup_hist, silence, legacy));
 89 |     return rcpp_result_gen;
 90 | END_RCPP
 91 | }
 92 | // log_sum_exp
 93 | arma::vec log_sum_exp(const arma::mat& A);
 94 | RcppExport SEXP _HTLR_log_sum_exp(SEXP ASEXP) {
 95 | BEGIN_RCPP
 96 |     Rcpp::RObject rcpp_result_gen;
 97 |     Rcpp::RNGScope rcpp_rngScope_gen;
 98 |     Rcpp::traits::input_parameter< const arma::mat& >::type A(ASEXP);
 99 |     rcpp_result_gen = Rcpp::wrap(log_sum_exp(A));
100 |     return rcpp_result_gen;
101 | END_RCPP
102 | }
103 | // spl_sgm_ig
104 | arma::vec spl_sgm_ig(double alpha, int K, double w, const arma::vec& vardeltas);
105 | RcppExport SEXP _HTLR_spl_sgm_ig(SEXP alphaSEXP, SEXP KSEXP, SEXP wSEXP, SEXP vardeltasSEXP) {
106 | BEGIN_RCPP
107 |     Rcpp::RObject rcpp_result_gen;
108 |     Rcpp::RNGScope rcpp_rngScope_gen;
109 |     Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP);
110 |     Rcpp::traits::input_parameter< int >::type K(KSEXP);
111 |     Rcpp::traits::input_parameter< double >::type w(wSEXP);
112 |     Rcpp::traits::input_parameter< const arma::vec& >::type vardeltas(vardeltasSEXP);
113 |     rcpp_result_gen = Rcpp::wrap(spl_sgm_ig(alpha, K, w, vardeltas));
114 |     return rcpp_result_gen;
115 | END_RCPP
116 | }
117 | // std_helper
118 | Rcpp::List std_helper(const arma::mat& A);
119 | RcppExport SEXP _HTLR_std_helper(SEXP ASEXP) {
120 | BEGIN_RCPP
121 |     Rcpp::RObject rcpp_result_gen;
122 |     Rcpp::RNGScope rcpp_rngScope_gen;
123 |     Rcpp::traits::input_parameter< const arma::mat& >::type A(ASEXP);
124 |     rcpp_result_gen = Rcpp::wrap(std_helper(A));
125 |     return rcpp_result_gen;
126 | END_RCPP
127 | }
128 | // comp_vardeltas
129 | arma::vec comp_vardeltas(const arma::mat& deltas);
130 | RcppExport SEXP _HTLR_comp_vardeltas(SEXP deltasSEXP) {
131 | BEGIN_RCPP
132 |     Rcpp::RObject rcpp_result_gen;
133 |     Rcpp::RNGScope rcpp_rngScope_gen;
134 |     Rcpp::traits::input_parameter< const arma::mat& >::type deltas(deltasSEXP);
135 |     rcpp_result_gen = Rcpp::wrap(comp_vardeltas(deltas));
136 |     return rcpp_result_gen;
137 | END_RCPP
138 | }
139 | // comp_lsl
140 | arma::vec comp_lsl(arma::mat& A);
141 | RcppExport SEXP _HTLR_comp_lsl(SEXP ASEXP) {
142 | BEGIN_RCPP
143 |     Rcpp::RObject rcpp_result_gen;
144 |     Rcpp::RNGScope rcpp_rngScope_gen;
145 |     Rcpp::traits::input_parameter< arma::mat& >::type A(ASEXP);
146 |     rcpp_result_gen = Rcpp::wrap(comp_lsl(A));
147 |     return rcpp_result_gen;
148 | END_RCPP
149 | }
150 | // log_normcons
151 | double log_normcons(arma::mat& A);
152 | RcppExport SEXP _HTLR_log_normcons(SEXP ASEXP) {
153 | BEGIN_RCPP
154 |     Rcpp::RObject rcpp_result_gen;
155 |     Rcpp::RNGScope rcpp_rngScope_gen;
156 |     Rcpp::traits::input_parameter< arma::mat& >::type A(ASEXP);
157 |     rcpp_result_gen = Rcpp::wrap(log_normcons(A));
158 |     return rcpp_result_gen;
159 | END_RCPP
160 | }
161 | // gendata_FAM_helper
162 | Rcpp::List gendata_FAM_helper(int n, arma::mat& muj, const arma::mat& muj_rep, const arma::mat& A, double sd_g, bool stdx);
163 | RcppExport SEXP _HTLR_gendata_FAM_helper(SEXP nSEXP, SEXP mujSEXP, SEXP muj_repSEXP, SEXP ASEXP, SEXP sd_gSEXP, SEXP stdxSEXP) {
164 | BEGIN_RCPP
165 |     Rcpp::RObject rcpp_result_gen;
166 |     Rcpp::RNGScope rcpp_rngScope_gen;
167 |     Rcpp::traits::input_parameter< int >::type n(nSEXP);
168 |     Rcpp::traits::input_parameter< arma::mat& >::type muj(mujSEXP);
169 |     Rcpp::traits::input_parameter< const arma::mat& >::type muj_rep(muj_repSEXP);
170 |     Rcpp::traits::input_parameter< const arma::mat& >::type A(ASEXP);
171 |     Rcpp::traits::input_parameter< double >::type sd_g(sd_gSEXP);
172 |     Rcpp::traits::input_parameter< bool >::type stdx(stdxSEXP);
173 |     rcpp_result_gen = Rcpp::wrap(gendata_FAM_helper(n, muj, muj_rep, A, sd_g, stdx));
174 |     return rcpp_result_gen;
175 | END_RCPP
176 | }
177 | 
178 | static const R_CallMethodDef CallEntries[] = {
179 |     {"_HTLR_sample_trunc_norm", (DL_FUNC) &_HTLR_sample_trunc_norm, 4},
180 |     {"_HTLR_sample_post_ichi", (DL_FUNC) &_HTLR_sample_post_ichi, 6},
181 |     {"_HTLR_sample_trunc_beta", (DL_FUNC) &_HTLR_sample_trunc_beta, 6},
182 |     {"_HTLR_htlr_fit_helper", (DL_FUNC) &_HTLR_htlr_fit_helper, 22},
183 |     {"_HTLR_log_sum_exp", (DL_FUNC) &_HTLR_log_sum_exp, 1},
184 |     {"_HTLR_spl_sgm_ig", (DL_FUNC) &_HTLR_spl_sgm_ig, 4},
185 |     {"_HTLR_std_helper", (DL_FUNC) &_HTLR_std_helper, 1},
186 |     {"_HTLR_comp_vardeltas", (DL_FUNC) &_HTLR_comp_vardeltas, 1},
187 |     {"_HTLR_comp_lsl", (DL_FUNC) &_HTLR_comp_lsl, 1},
188 |     {"_HTLR_log_normcons", (DL_FUNC) &_HTLR_log_normcons, 1},
189 |     {"_HTLR_gendata_FAM_helper", (DL_FUNC) &_HTLR_gendata_FAM_helper, 6},
190 |     {NULL, NULL, 0}
191 | };
192 | 
193 | RcppExport void R_init_HTLR(DllInfo *dll) {
194 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
195 |     R_useDynamicSymbols(dll, FALSE);
196 | }
197 | 


--------------------------------------------------------------------------------
/vignettes/HTLR.bib:
--------------------------------------------------------------------------------
  1 | @article{xiao2015msaenet,
  2 |   title={Multi-step adaptive elastic-net: reducing false positives in high-dimensional variable selection},
  3 |   author={Xiao, Nan and Xu, Qing-Song},
  4 |   journal={Journal of Statistical Computation and Simulation},
  5 |   volume={85},
  6 |   number={18},
  7 |   pages={3755--3765},
  8 |   year={2015},
  9 |   publisher={Taylor \& Francis}
 10 | }
 11 | 
 12 | @article{breheny2011coordinate,
 13 |   title={Coordinate descent algorithms for nonconvex penalized regression, with applications to biological feature selection},
 14 |   author={Breheny, Patrick and Huang, Jian},
 15 |   journal={The annals of applied statistics},
 16 |   volume={5},
 17 |   number={1},
 18 |   pages={232},
 19 |   year={2011},
 20 |   publisher={NIH Public Access}
 21 | }
 22 | 
 23 | @article{li2018fully,
 24 |   title={Fully Bayesian logistic regression with hyper-LASSO priors for high-dimensional feature selection},
 25 |   author={Li, Longhai and Yao, Weixin},
 26 |   journal={Journal of Statistical Computation and Simulation},
 27 |   volume={88},
 28 |   number={14},
 29 |   pages={2827--2851},
 30 |   year={2018},
 31 |   publisher={Taylor \& Francis}
 32 | }
 33 | 
 34 | @article{kyung2010penalized,
 35 |   title={Penalized regression, standard errors, and Bayesian lassos},
 36 |   author={Kyung, Minjung and Gill, Jeff and Ghosh, Malay and Casella, George and others},
 37 |   journal={Bayesian Analysis},
 38 |   volume={5},
 39 |   number={2},
 40 |   pages={369--411},
 41 |   year={2010},
 42 |   publisher={International Society for Bayesian Analysis}
 43 | }
 44 | 
 45 | @article{zou2005regularization,
 46 |   title={Regularization and variable selection via the elastic net},
 47 |   author={Zou, Hui and Hastie, Trevor},
 48 |   journal={Journal of the royal statistical society: series B (statistical methodology)},
 49 |   volume={67},
 50 |   number={2},
 51 |   pages={301--320},
 52 |   year={2005},
 53 |   publisher={Wiley Online Library}
 54 | }
 55 | 
 56 | @article{sur2018modern,
 57 |   title={A modern maximum-likelihood theory for high-dimensional logistic regression},
 58 |   author={Sur, Pragya and Cand{\`e}s, Emmanuel J},
 59 |   journal={arXiv preprint arXiv:1803.06964},
 60 |   year={2018}
 61 | }
 62 | 
 63 | @inbook{james2013introduction,
 64 |   title={An introduction to statistical learning},
 65 |   author={James, Gareth and Witten, Daniela and Hastie, Trevor and Tibshirani, Robert},
 66 |   volume={112},
 67 |   year={2013},
 68 |   pages = {239-241},
 69 |   publisher={Springer}
 70 | }
 71 | 
 72 | @article{hoerl1970ridge,
 73 |   title={Ridge regression: Biased estimation for nonorthogonal problems},
 74 |   author={Hoerl, Arthur E and Kennard, Robert W},
 75 |   journal={Technometrics},
 76 |   volume={12},
 77 |   number={1},
 78 |   pages={55--67},
 79 |   year={1970},
 80 |   publisher={Taylor \& Francis Group}
 81 | }
 82 | 
 83 | @article{tibshirani1996regression,
 84 |   title={Regression shrinkage and selection via the lasso},
 85 |   author={Tibshirani, Robert},
 86 |   journal={Journal of the Royal Statistical Society: Series B (Methodological)},
 87 |   volume={58},
 88 |   number={1},
 89 |   pages={267--288},
 90 |   year={1996},
 91 |   publisher={Wiley Online Library}
 92 | }
 93 | 
 94 | @article{fan2001variable,
 95 |   title={Variable selection via nonconcave penalized likelihood and its oracle properties},
 96 |   author={Fan, Jianqing and Li, Runze},
 97 |   journal={Journal of the American statistical Association},
 98 |   volume={96},
 99 |   number={456},
100 |   pages={1348--1360},
101 |   year={2001},
102 |   publisher={Taylor \& Francis}
103 | }
104 | 
105 | @article{zhang2010nearly,
106 |   title={Nearly unbiased variable selection under minimax concave penalty},
107 |   author={Zhang, Cun-Hui and others},
108 |   journal={The Annals of statistics},
109 |   volume={38},
110 |   number={2},
111 |   pages={894--942},
112 |   year={2010},
113 |   publisher={Institute of Mathematical Statistics}
114 | }
115 | 
116 | @article{zou2006adaptive,
117 |   title={The adaptive lasso and its oracle properties},
118 |   author={Zou, Hui},
119 |   journal={Journal of the American statistical association},
120 |   volume={101},
121 |   number={476},
122 |   pages={1418--1429},
123 |   year={2006},
124 |   publisher={Taylor \& Francis}
125 | }
126 | 
127 | @article{meinshausen2007relaxed,
128 |   title={Relaxed lasso},
129 |   author={Meinshausen, Nicolai},
130 |   journal={Computational Statistics \& Data Analysis},
131 |   volume={52},
132 |   number={1},
133 |   pages={374--393},
134 |   year={2007},
135 |   publisher={Elsevier}
136 | }
137 | 
138 | @article{gelman2008weakly,
139 |   title={A weakly informative default prior distribution for logistic and other regression models},
140 |   author={Gelman, Andrew and Jakulin, Aleks and Pittau, Maria Grazia and Su, Yu-Sung and others},
141 |   journal={The Annals of Applied Statistics},
142 |   volume={2},
143 |   number={4},
144 |   pages={1360--1383},
145 |   year={2008},
146 |   publisher={Institute of Mathematical Statistics}
147 | }
148 | 
149 | @article{griffin2011bayesian,
150 |   title={Bayesian hyper-lassos with non-convex penalization},
151 |   author={Griffin, Jim E and Brown, Philip J},
152 |   journal={Australian \& New Zealand Journal of Statistics},
153 |   volume={53},
154 |   number={4},
155 |   pages={423--442},
156 |   year={2011},
157 |   publisher={Wiley Online Library}
158 | }
159 | 
160 | @article{carvalho2010horseshoe,
161 |   title={The horseshoe estimator for sparse signals},
162 |   author={Carvalho, Carlos M and Polson, Nicholas G and Scott, James G},
163 |   journal={Biometrika},
164 |   volume={97},
165 |   number={2},
166 |   pages={465--480},
167 |   year={2010},
168 |   publisher={Oxford University Press}
169 | }
170 | 
171 | @misc{trevor2009elements,
172 |   title={The elements of statistical learning: data mining, inference, and prediction},
173 |   author={Trevor, Hastie and Robert, Tibshirani and JH, Friedman},
174 |   year={2009},
175 |   publisher={New York, NY: Springer}
176 | }
177 | 
178 | @article{meier2008group,
179 |   title={The group lasso for logistic regression},
180 |   author={Meier, Lukas and Van De Geer, Sara and B{\"u}hlmann, Peter},
181 |   journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
182 |   volume={70},
183 |   number={1},
184 |   pages={53--71},
185 |   year={2008},
186 |   publisher={Wiley Online Library}
187 | }
188 | 
189 | @inproceedings{ogutu2012genomic,
190 |   title={Genomic selection using regularized linear regression models: ridge regression, lasso, elastic net and their extensions},
191 |   author={Ogutu, Joseph O and Schulz-Streeck, Torben and Piepho, Hans-Peter},
192 |   booktitle={BMC proceedings},
193 |   volume={6},
194 |   pages={S10},
195 |   year={2012},
196 |   organization={BioMed Central}
197 | }
198 | 
199 | @article{waldmann2013evaluation,
200 |   title={Evaluation of the lasso and the elastic net in genome-wide association studies},
201 |   author={Waldmann, Patrik and M{\'e}sz{\'a}ros, G{\'a}bor and Gredler, Birgit and Fuerst, Christian and S{\"o}lkner, Johann},
202 |   journal={Frontiers in genetics},
203 |   volume={4},
204 |   pages={270},
205 |   year={2013},
206 |   publisher={Frontiers}
207 | }
208 | 
209 | @inproceedings{kakade2010learning,
210 |   title={Learning exponential families in high-dimensions: Strong convexity and sparsity},
211 |   author={Kakade, Sham and Shamir, Ohad and Sindharan, Karthik and Tewari, Ambuj},
212 |   booktitle={Proceedings of the thirteenth international conference on artificial intelligence and statistics},
213 |   pages={381--388},
214 |   year={2010}
215 | }
216 | 
217 | @article{zou2009adaptive,
218 |   title={On the adaptive elastic-net with a diverging number of parameters},
219 |   author={Zou, Hui and Zhang, Hao Helen},
220 |   journal={Annals of statistics},
221 |   volume={37},
222 |   number={4},
223 |   pages={1733},
224 |   year={2009},
225 |   publisher={NIH Public Access}
226 | }
227 | 
228 | @Manual{xiao2018msa,
229 |     title = {msaenet: Multi-Step Adaptive Estimation Methods for Sparse Regressions},
230 |     author = {Xiao, Nan},
231 |     year = {2017},
232 |     note = {R package version 2.3},
233 |     url = {https://nanx.me/msaenet/},
234 | }
235 | 
236 | @article{andrieu2003introduction,
237 |   title={An introduction to MCMC for machine learning},
238 |   author={Andrieu, Christophe and De Freitas, Nando and Doucet, Arnaud and Jordan, Michael I},
239 |   journal={Machine learning},
240 |   volume={50},
241 |   number={1-2},
242 |   pages={5--43},
243 |   year={2003},
244 |   publisher={Springer}
245 | }
246 | 
247 | @article{neal2011mcmc,
248 |   title={MCMC using Hamiltonian dynamics},
249 |   author={Neal, Radford M and others},
250 |   journal={Handbook of markov chain monte carlo},
251 |   volume={2},
252 |   number={11},
253 |   pages={2},
254 |   year={2011}
255 | }
256 | 
257 | @article{hanley1982meaning,
258 |   title={The meaning and use of the area under a receiver operating characteristic (ROC) curve.},
259 |   author={Hanley, James A and McNeil, Barbara J},
260 |   journal={Radiology},
261 |   volume={143},
262 |   number={1},
263 |   pages={29--36},
264 |   year={1982}
265 | }
266 | 
267 | @Article{Friedman2010glmnet,
268 |   title = {Regularization Paths for Generalized Linear Models via Coordinate Descent},
269 |   author = {Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert},
270 |   journal = {Journal of Statistical Software},
271 |   year = {2010},
272 |   volume = {33},
273 |   number = {1},
274 |   pages = {1--22},
275 |   url = {http://www.jstatsoft.org/v33/i01/},
276 | }
277 | 
278 | @Manual{R2019,
279 | title = {R: A Language and Environment for Statistical Computing},
280 | author = {{R Core Team}},
281 | organization = {R Foundation for Statistical Computing},
282 | address = {Vienna, Austria},
283 | year = {2019},
284 | url = {https://www.R-project.org/},
285 | }
286 | 
287 | @Manual{caret,
288 |   title = {caret: Classification and Regression Training},
289 |   author = {Max Kuhn and Jed Wing and Steve Weston and Andre Williams and Chris Keefer and Allan Engelhardt and Tony Cooper and Zachary Mayer and Brenton Kenkel and the R Core Team and Michael Benesty and Reynald Lescarbeau and Andrew Ziem and Luca Scrucca and Yuan Tang and Can Candan and Tyler Hunt.},
290 |   year = {2018},
291 |   note = {R package version 6.0-81},
292 |   url = {https://CRAN.R-project.org/package=caret},
293 | }
294 | 
295 | @Manual{EBglmnet,
296 |     title = {EBglmnet: Empirical Bayesian Lasso and Elastic Net Methods for Generalized
297 |     Linear Models},
298 |     author = {Anhui Huang and Dianting Liu},
299 |     year = {2016},
300 |     note = {R package version 4.1},
301 |     url = {https://CRAN.R-project.org/package=EBglmnet},
302 | }
303 | 
304 | @Manual{bayesCL,
305 |     title = {bayesCL: Bayesian Inference on a GPU using OpenCL},
306 |     author = {Rok Cesnovar and Erik Strumbelj},
307 |     year = {2017},
308 |     note = {R package version 0.0.1},
309 |     url = {https://CRAN.R-project.org/package=bayesCL},
310 | } 
311 | 


--------------------------------------------------------------------------------
/src/gibbs.cpp:
--------------------------------------------------------------------------------
  1 | #include <string>
  2 | #include "gibbs.h"
  3 | 
  4 | Fit::Fit(int p, int K, int n, const arma::mat &X, const arma::mat &ymat, const arma::uvec &ybase,
  5 |          std::string ptype, double alpha, double s, double eta,
  6 |          int iters_rmc, int iters_h, int thin,
  7 |          int leap_L, int leap_L_h, double leap_step,
  8 |          double hmc_sgmcut, const arma::mat &deltas, const arma::vec &sigmasbt,
  9 |          bool keep_warmup_hist, int silence, bool legacy)
 10 |     : p_(p), K_(K), C_(K + 1), n_(n), X_(X), ymat_(ymat), ybase_(ybase),
 11 |       ptype_(ptype), alpha_(alpha), s_(s), eta_(eta),
 12 |       iters_rmc_(iters_rmc), iters_h_(iters_h), thin_(thin),
 13 |       leap_L_(leap_L), leap_L_h_(leap_L_h), leap_step_(leap_step),
 14 |       sgmsq_cut_(hmc_sgmcut > 0 ? R_pow_di(hmc_sgmcut, 2) : hmc_sgmcut),
 15 |       DDNloglike_(col_sum(arma::square(X)) / 4), keep_warmup_hist_(keep_warmup_hist),
 16 |       silence_(silence), legacy_(legacy), nvar_(p + 1), logw_(s), sigmasbt_(sigmasbt)
 17 | {
 18 |   ids_update_ = arma::uvec(nvar_, arma::fill::zeros);
 19 |   ids_fix_ = arma::uvec(nvar_, arma::fill::zeros);
 20 | 
 21 |   int hist_len = keep_warmup_hist_ ? (iters_rmc_ + iters_h_ + 1) : (iters_rmc_ + 1);
 22 | 
 23 |   mc_logw_ = arma::vec(hist_len, arma::fill::zeros);
 24 |   mc_logw_[0] = logw_;
 25 |   
 26 |   mc_sigmasbt_ = arma::mat(nvar_, hist_len, arma::fill::zeros);
 27 |   mc_sigmasbt_.col(0) = sigmasbt_;
 28 | 
 29 |   deltas_ = deltas;
 30 |   mc_deltas_ = arma::cube(nvar_, K, hist_len, arma::fill::zeros);
 31 |   mc_deltas_.slice(0) = deltas;
 32 | 
 33 |   mc_var_deltas_ = arma::mat(nvar_, hist_len, arma::fill::zeros);
 34 |   mc_loglike_ = arma::vec(hist_len, arma::fill::zeros);
 35 |   mc_uvar_ = arma::vec(hist_len, arma::fill::zeros);
 36 |   mc_hmcrej_ = arma::vec(hist_len, arma::fill::zeros);
 37 | 
 38 |   lv_ = arma::mat(n, C_, arma::fill::zeros);
 39 |   lv_fix_ = arma::mat(n, C_, arma::fill::zeros);
 40 | 
 41 |   DNloglike_ = arma::mat(nvar_, K, arma::fill::zeros);
 42 |   momt_ = arma::mat(nvar_, K, arma::fill::zeros);
 43 |   DNlogprior_ = arma::mat(nvar_, K, arma::fill::zeros);
 44 |   DNlogpost_ = arma::mat(nvar_, K, arma::fill::zeros);
 45 | 
 46 |   sumsq_deltas_ = arma::vec(nvar_, arma::fill::zeros);
 47 |   sum_deltas_ = arma::vec(nvar_, arma::fill::zeros);
 48 |   var_deltas_ = arma::vec(nvar_, arma::fill::zeros);
 49 |   step_sizes_ = arma::vec(nvar_, arma::fill::zeros);
 50 | }
 51 | 
 52 | void Fit::StartSampling()
 53 | {
 54 |   Initialize();
 55 | 
 56 |   /************************ start gibbs sampling **************************/
 57 |   for (int i_mc = 0; i_mc < iters_h_ + iters_rmc_; i_mc++)
 58 |   {
 59 |     /***************** thin iterations of Gibbs sampling ******************/
 60 |     double no_uvar = 0;
 61 |     double rej = 0;
 62 |     for (int i_thin = 0; i_thin < thin_; i_thin++)
 63 |     {
 64 |       /*********************** HMC Metropolis Update ********************/
 65 |       
 66 |       // initialize HMC
 67 |       WhichUpdate();
 68 |       no_uvar += nuvar_;
 69 | 
 70 |       GenMomt();
 71 |       UpdateStepSizes();
 72 | 
 73 |       DetachFixlv();
 74 |       CacheOldValues();
 75 |       
 76 |       double nenergy_old = CompNegEnergy();
 77 | 
 78 |       // start trajectory
 79 |       UpdateDNlogPrior(); 
 80 |       UpdateDNlogLike(); 
 81 |       UpdateDNlogPost(); 
 82 |       Traject(i_mc);
 83 | 
 84 |       // decide whether to accept it
 85 |       UpdateLogLike();
 86 |       UpdateVarDeltas();
 87 |       double nenergy = CompNegEnergy();
 88 | 
 89 |       GetRNGstate();
 90 |       if (log(R::runif(0, 1)) > (nenergy - nenergy_old) || IsFault())
 91 |       {
 92 |         RestoreOldValues();
 93 |         rej++;
 94 |       }
 95 |       PutRNGstate();
 96 | 
 97 |       UpdateSigmas();
 98 |     }
 99 | 
100 |     no_uvar /= thin_;
101 |     rej /= thin_;
102 | 
103 |     /****************** record the markov chain state ********************/
104 |     int i_rmc = keep_warmup_hist_ ? (i_mc + 1) : (i_mc - iters_h_ + 1);
105 |     if (i_rmc > 0)
106 |     {
107 |       mc_deltas_.slice(i_rmc) = deltas_;
108 |       mc_sigmasbt_.col(i_rmc) = sigmasbt_;
109 |       mc_var_deltas_.col(i_rmc) = var_deltas_;
110 |       mc_logw_[i_rmc] = logw_;
111 |       mc_loglike_[i_rmc] = loglike_;
112 |       mc_uvar_[i_rmc] = no_uvar;
113 |       mc_hmcrej_[i_rmc] = rej;
114 |     }
115 | 
116 |     // print some results on screen
117 |     if (silence_ == 0)
118 |     {
119 |       Rprintf(
120 |           "Iter%4d: deviance=%5.3f, logw=%6.2f, nuvar=%3.0f, hmcrej=%4.2f\n",
121 |           i_mc - iters_h_, -loglike_ / n_, logw_, no_uvar, rej);
122 |     }
123 | 
124 |     if (i_mc % 256 == 0) R_CheckUserInterrupt();
125 |   }
126 | }
127 | 
128 | Rcpp::List Fit::OutputR()
129 | {
130 |   auto mc_param = Rcpp::List::create(
131 |       Rcpp::Named("iter.rmc") = iters_rmc_,
132 |       Rcpp::Named("iter.warm") = iters_h_,
133 |       Rcpp::Named("thin") = thin_,
134 |       Rcpp::Named("leap") = leap_L_,
135 |       Rcpp::Named("leap.warm") = leap_L_h_,
136 |       Rcpp::Named("leap.step") = leap_step_,
137 |       Rcpp::Named("sgmsq.cut") = sgmsq_cut_,
138 |       Rcpp::Named("DDNloglike") = DDNloglike_);
139 | 
140 |   return Rcpp::List::create(
141 |       Rcpp::Named("p") = p_,
142 |       Rcpp::Named("n") = n_,
143 |       Rcpp::Named("K") = K_,
144 |       Rcpp::Named("mc.param") = mc_param,
145 |       Rcpp::Named("mcdeltas") = mc_deltas_,
146 |       Rcpp::Named("mclogw") = mc_logw_,
147 |       Rcpp::Named("mcsigmasbt") = mc_sigmasbt_,
148 |       Rcpp::Named("mcvardeltas") = mc_var_deltas_,
149 |       Rcpp::Named("mcloglike") = mc_loglike_,
150 |       Rcpp::Named("mcuvar") = mc_uvar_,
151 |       Rcpp::Named("mchmcrej") = mc_hmcrej_);
152 | }
153 | 
154 | // This function determines which features to be updated.
155 | // Modified: nuvar, nfvar, ids_update, ids_fix_  
156 | void Fit::WhichUpdate(bool init)
157 | {
158 |   nuvar_ = 0;
159 |   nfvar_ = 0;
160 |   double cut = init ? -1 : sgmsq_cut_;
161 | 
162 |   for (int j = 0; j < nvar_; j++)
163 |   {
164 |     if (sigmasbt_(j) > cut)
165 |       ids_update_(nuvar_++) = j;
166 |     else
167 |       ids_fix_(nfvar_++) = j;
168 |   }
169 |   iup_ = ids_update_.head(nuvar_); // save a quick reference 
170 | }
171 | 
172 | // X: n * nvar
173 | // deltas: nvar * K
174 | // lv: n * (1 + K)
175 | // Modified: lv, norm_lv, pred_prob
176 | void Fit::UpdatePredProb()
177 | {
178 |   lv_.tail_cols(K_) = lv_fix_.tail_cols(K_);
179 |   for (int j : iup_)
180 |   {
181 |     for (int k = 0; k < K_; k++)
182 |     {
183 |       for (int i = 0; i < n_; i++)
184 |       {
185 |         lv_(i,k + 1) += X_(i, j) * deltas_(j, k);
186 |       }
187 |     }
188 |   }
189 |   norm_lv_ = find_normlv(lv_);
190 |   pred_prob_ = arma::exp(norm_lv_);
191 | }
192 | 
193 | // lv: n * (1 + K)
194 | // deltas: nvar * K
195 | // X: n * nvar
196 | // Modified: lv_fix
197 | void Fit::DetachFixlv()
198 | {
199 |   if (nuvar_ <= nvar_ / 2)
200 |   {
201 |     lv_fix_.tail_cols(K_) = lv_.tail_cols(K_); 
202 |     // remove updated part
203 |     for (int j : iup_)
204 |     {
205 |       for (int k = 0; k < K_; k++)
206 |       {
207 |         for (int i = 0; i < n_; i++)
208 |         {
209 |           lv_fix_(i, k + 1) -= X_(i, j) * deltas_(j, k);
210 |         }
211 |       }
212 |     }
213 |   }
214 |   else
215 |   {
216 |     lv_fix_.tail_cols(K_) = arma::mat(n_, K_, arma::fill::zeros);
217 |     // add fixed part
218 |     for (int j : GetIdsFix())
219 |     {
220 |       for (int k = 0; k < K_; k++)
221 |       {
222 |         for (int i = 0; i < n_; i++)
223 |         {
224 |           lv_fix_(i, k + 1) += X_(i, j) * deltas_(j, k);
225 |         }
226 |       }
227 |     }
228 |   }
229 | }
230 | 
231 | // DNloglike: nvar * K
232 | // X: n * nvar
233 | // pred_prob: n * (1 + K)
234 | // ymat: n * K
235 | // Modified: DNloglike
236 | void Fit::UpdateDNlogLike()
237 | {
238 |   arma::mat tmp = pred_prob_.tail_cols(K_) - ymat_;
239 |   for (int j : iup_)
240 |   {
241 |     for (int k = 0; k < K_; k++)
242 |     {
243 |       DNloglike_(j, k) = 0;
244 |       for (int i = 0; i < n_; i++)
245 |       {
246 |         DNloglike_(j, k) += X_(i, j) * tmp(i, k);
247 |       }
248 |     }
249 |   }
250 | }
251 | 
252 | // norm_lv: n * C
253 | // Modified: loglike
254 | void Fit::UpdateLogLike()
255 | {
256 |   loglike_ = 0;
257 |   for (int i = 0; i < n_; i++)
258 |   {
259 |     loglike_ += norm_lv_(i, ybase_(i));
260 |   }
261 | }
262 | 
263 | // deltas: nvar * K
264 | // DNlogprior: nvar * K
265 | // sum_deltas: nvar
266 | // Modified: sum_deltas, DNlogprior:  
267 | void Fit::UpdateDNlogPrior()
268 | {
269 |   arma::mat deltas_tmp = deltas_.rows(iup_);
270 |   sum_deltas_(iup_) = row_sum(deltas_tmp); 
271 |   DNlogprior_.rows(iup_) = deltas_tmp.each_col() - sum_deltas_(iup_) / C_;
272 | }
273 | 
274 | // DNloglike: nvar * K
275 | // DNlogprior: nvar * K
276 | // DNlogpost: nvar * K
277 | // sigmasbt: nvar
278 | // Modified: DNlogpost 
279 | void Fit::UpdateDNlogPost()
280 | {
281 |   arma::mat DNlogprior_tmp = DNlogprior_.rows(iup_);
282 |   DNlogpost_.rows(iup_) = DNloglike_.rows(iup_) + DNlogprior_tmp.each_col() / sigmasbt_(iup_);
283 | }
284 | 
285 | // This function is called at the beginning of the trajectory loop.
286 | // momt: nvar * K
287 | // step_sizes: nvar
288 | // DNlogpost: nvar * K
289 | // deltas: nvar * K
290 | // Modified: momt, deltas
291 | void Fit::UpdateMomtAndDeltas()
292 | {
293 |   arma::mat DNlogpost_tmp = DNlogpost_.rows(iup_);
294 |   momt_.rows(iup_) -= step_sizes_(iup_) / 2 % DNlogpost_tmp.each_col();
295 |   arma::mat momt_tmp = momt_.rows(iup_);
296 |   deltas_.rows(iup_) += step_sizes_(iup_) % momt_tmp.each_col();
297 | }
298 | 
299 | void Fit::UpdateSigmas()
300 | {
301 |   if (ptype_.compare("t") == 0)
302 |     UpdateSigmasT();
303 |   else if (ptype_.compare("ghs") == 0)
304 |     UpdateSigmasGhs();
305 |   else if (ptype_.compare("neg") == 0)
306 |     UpdateSigmasNeg();
307 |   else
308 |     Rcpp::stop("Unsupported prior type %s", ptype_);
309 | }
310 | 
311 | void Fit::UpdateSigmasT()
312 | {
313 |   double alpha_post = (alpha_ + K_) / 2;
314 |   if (legacy_)
315 |   {
316 |     for (int j = 1; j < nvar_; j++)
317 |     {
318 |       GetRNGstate();
319 |       sigmasbt_(j) =
320 |           1.0 / R::rgamma(alpha_post, 1.0) * (alpha_ * exp(logw_) + var_deltas_[j]) / 2.0;
321 |       PutRNGstate();
322 |     }
323 |   }
324 |   else
325 |   {
326 |     arma::vec var_deltas_p = var_deltas_.tail(p_); 
327 |     sigmasbt_.tail(p_) = spl_sgm_ig(alpha_, K_, exp(logw_), var_deltas_p);
328 |   }
329 | 
330 |   UpdateLogw();
331 | }
332 | 
333 | void Fit::UpdateLogw()
334 | {
335 |   // logw Update
336 |   if (eta_ > 1E-10)
337 |   {
338 |     if (eta_ < 0.01)
339 |       logw_ = s_;
340 |     else
341 |     {
342 |       arma::vec tmp = var_deltas_.tail(p_);
343 |       auto target = SamplerLogw(p_, tmp, K_, alpha_, s_, eta_);
344 |       auto spl = ARS(1, &target, logw_);
345 |       logw_ = spl.Sample()[0];
346 |     }
347 |   }
348 | }
349 | 
350 | // Helper function for UpdateSigmasGhs and UpdateSigmasNeg  
351 | void Fit::UpdateSigmasSgm(SamplerSgm *target)
352 | {
353 |   if (legacy_)
354 |   {
355 |     for (int j = 1; j < nvar_; j++)
356 |     {
357 |       target->set_idx(j);    
358 |       auto spl = ARS(1, target, log(var_deltas_(j) / K_));
359 |       sigmasbt_(j) = exp(spl.Sample()[0]); // perform ars on log(sigma_j)
360 |     }
361 |   }
362 |   else
363 |   {
364 |     arma::vec tmp = arma::linspace(1, p_, p_);
365 |     tmp.for_each([this, &target](arma::vec::elem_type &val) {
366 |       target->set_idx(val);
367 |       auto spl = ARS(1, target, log(var_deltas_(val) / K_));
368 |       val = exp(spl.Sample()[0]); // perform ars on log(sigma_j)
369 |     });
370 |     sigmasbt_.tail(p_) = tmp;
371 |   }
372 | }
373 | 
374 | void Fit::UpdateSigmasGhs()
375 | {
376 |   auto *target = 
377 |     new SamplerSgmGhs(nvar_, var_deltas_, K_, alpha_, logw_ + log(alpha_));
378 |   UpdateSigmasSgm(target);
379 |   delete target;
380 | }
381 | 
382 | void Fit::UpdateSigmasNeg()
383 | {
384 |   auto *target = 
385 |     new SamplerSgmNeg(nvar_, var_deltas_, K_, alpha_, logw_ + log(alpha_));
386 |   UpdateSigmasSgm(target);
387 |   delete target;
388 | }
389 | 
390 | void Fit::Traject(int i_mc)
391 | {
392 |   int L;
393 | 
394 |   if (i_mc < iters_h_ / 2.0)
395 |   {
396 |     L = leap_L_h_;
397 |     logw_ = -10;
398 |   }
399 |   else if (i_mc < iters_h_)
400 |   {
401 |     L = leap_L_h_;
402 |     logw_ = s_;
403 |   }
404 |   else
405 |   {
406 |     L = leap_L_;
407 |     logw_ = s_;
408 |   }
409 | 
410 |   for (int i_trj = 0; i_trj < L; i_trj++)
411 |   {
412 |     UpdateMomtAndDeltas();
413 |     UpdatePredProb();
414 |     UpdateDNlogPrior();
415 |     UpdateDNlogLike();
416 |     UpdateDNlogPost();
417 |     MoveMomt();
418 |   }
419 | }
420 | 
421 | // deltas: nvar * K
422 | // sumsq_deltas: nvar
423 | // var_deltas: nvar
424 | // Modified: sumsq_deltas, var_deltas  
425 | void Fit::UpdateVarDeltas()
426 | {
427 |   sumsq_deltas_(iup_) = row_sum(arma::square(deltas_.rows(iup_)));
428 |   var_deltas_(iup_) = sumsq_deltas_(iup_) - arma::square(sum_deltas_(iup_)) / C_;
429 | }
430 | 
431 | // momt: nvar * K
432 | double Fit::CompNegEnergy()
433 | {
434 |   double logprior = arma::sum(var_deltas_(iup_) / sigmasbt_(iup_));
435 |   double logprior_momt = arma::accu(arma::square(momt_.rows(iup_)));
436 |   return (loglike_ - logprior / 2 - logprior_momt / 2);
437 | }
438 | 
439 | // momt: nvar * K
440 | // Modified: momt
441 | void Fit::GenMomt()
442 | {
443 |   if (true)
444 |   {
445 |     for (int j : iup_)
446 |     {
447 |       for (int k = 0; k < K_; k++)
448 |       {
449 |         GetRNGstate();
450 |         momt_(j, k) = R::rnorm(0, 1);
451 |         PutRNGstate();
452 |       }
453 |     }
454 |   }
455 |   else // might have problem
456 |   {
457 |     arma::vec rn = Rcpp::rnorm(nuvar_ * K_);
458 |     momt_.rows(iup_) = arma::reshape(rn, nuvar_, K_);
459 |   }
460 | }
461 | 
462 | // This function moves momonton with new derivatives.
463 | // step_sizes: nvar
464 | // DNlogpost: nvar * K
465 | // momt: nvar * K
466 | // Modified: momt
467 | void Fit::MoveMomt()
468 | {
469 |   arma::mat DNlogpost_tmp = DNlogpost_.rows(iup_);
470 |   momt_.rows(iup_) -= step_sizes_(iup_) / 2 % DNlogpost_tmp.each_col();
471 | }
472 | 
473 | // step_sizes: nvar
474 | // DDNloglike_: nvar
475 | // sigmasbt: nvar
476 | // Modified: step_sizes
477 | void Fit::UpdateStepSizes()
478 | {
479 |   step_sizes_(iup_) =
480 |       leap_step_ / arma::sqrt(DDNloglike_(iup_) + K_ / sigmasbt_(iup_) / C_);
481 | }
482 | 
483 | void Fit::CacheOldValues()
484 | {
485 |   lv_old_ = lv_;
486 |   pred_prob_old_ = pred_prob_;
487 |   deltas_old_ = deltas_;
488 |   DNlogprior_old_ = DNlogprior_;
489 |   var_deltas_old_ = var_deltas_;
490 |   loglike_old_ = loglike_;
491 | }
492 | 
493 | void Fit::RestoreOldValues()
494 | {
495 |   lv_ = lv_old_;
496 |   pred_prob_ = pred_prob_old_;
497 |   deltas_ = deltas_old_;
498 |   DNlogprior_ = DNlogprior_old_;
499 |   var_deltas_ = var_deltas_old_;
500 |   loglike_ = loglike_old_;
501 | }
502 | 
503 | bool Fit::IsFault(double cri)
504 | {
505 |   for (int j : iup_)
506 |   {
507 |     for (int k = 0; k < K_; k++)
508 |     {
509 |       if (fabs(deltas_(j, k)) > cri)
510 |       {
511 |         return true;
512 |       }
513 |     }
514 |   }
515 |   return false;
516 | }
517 | 
518 | // This function is called once at the beginning of the sampling process.
519 | void Fit::Initialize()
520 | {
521 |   WhichUpdate(true); // set to update all
522 |   UpdatePredProb();  // lv is computed here
523 | 
524 |   UpdateLogLike();
525 |   mc_loglike_[0] = loglike_;
526 | 
527 |   UpdateDNlogPrior();
528 |   UpdateVarDeltas(); 
529 |   mc_var_deltas_.col(0) = var_deltas_;
530 | }
531 | 


--------------------------------------------------------------------------------
/src/ars.cpp:
--------------------------------------------------------------------------------
  1 | #include "ars.h"
  2 | 
  3 | // NOTE: in this program, I call a piece of linear function as 'hull', and
  4 | // the piecewise linear function above logf upperhulls, and
  5 | // the piecewise linear function below logf lowerhulls
  6 | 
  7 | // Header of helper functions
  8 | int sample_disc(const int k, const double *lw);
  9 | double sample_elin(const double lower, const double upper,
 10 |                    const double dlogf, const double tol_dlogf_is0_);
 11 | double logint_elin(const double logf, const double dlogf, const double t,
 12 |                    const double lower, const double upper, const double tol_dlogf_is0_);
 13 | double interc(const double t1, const double t2,
 14 |               const double logf1, const double logf2,
 15 |               const double dlogf1, const double dlogf2,
 16 |               const double tol_ddlogf_is0_);
 17 | 
 18 | // this function updates the envolop and squeezing functions.
 19 | // newx --- new point to be inserted
 20 | // h --- index of the hull where newx is from
 21 | // logfv, dlogfv --- values of logf and dlogv at newx
 22 | void ARS::update_hulls(const int h, const double newx, const double logfv, const double dlogfv)
 23 | {
 24 |   int lh, rh, nh;
 25 | 
 26 |   if (no_hulls_ == max_nhull_) return;// reaching the limit of working vector
 27 | 
 28 |   //specify left and right hulls of new hull
 29 |   if (newx > tpoints_[h]) // to insert to the right of hull h
 30 |   {
 31 |     lh = h; rh = ritehulls_[h];
 32 |     // if logfv is -infinity, only update the rightest hull rightbound and lw
 33 |     if ((rh == max_nhull_) & (logfv == R_NegInf))
 34 |     {
 35 |       if (upperbounds_[h] != newx)
 36 |       {
 37 |         upperbounds_[h] = newx;
 38 |         lws_[h] = logint_elin(logfvs_[h], dlogfvs_[h], tpoints_[h],
 39 |                              lowerbounds_[h], upperbounds_[h], tol_dlogf_is0_);
 40 |       }
 41 |       return;
 42 |     }
 43 |   }
 44 |   else // to insert to the left of hull h
 45 |   {
 46 |     lh = lefthulls_[h]; rh = h;
 47 |     // if logfv is -infinity, only update the leftest hull leftbound and lw
 48 |     if ((lh == -1) & (logfv == R_NegInf))
 49 |     {
 50 |       if (lowerbounds_[h] != newx)
 51 |       {
 52 |         lowerbounds_[h] = newx;
 53 |         lws_[h] = logint_elin(logfvs_[h], dlogfvs_[h], tpoints_[h],
 54 |                              lowerbounds_[h], upperbounds_[h], tol_dlogf_is0_);
 55 |       }
 56 |       return;
 57 |     }
 58 |   }
 59 | 
 60 |   // insert a new hull
 61 |   nh = no_hulls_;
 62 |   no_hulls_ ++;
 63 |   tpoints_[nh] = newx;
 64 |   logfvs_[nh] = logfv;
 65 |   dlogfvs_[nh] = dlogfv;
 66 |   lefthulls_[nh] = lh;
 67 |   ritehulls_[nh] = rh;
 68 | 
 69 |   if (lh == -1) // nh will be the new leftest hull
 70 |   {
 71 |     lowerbounds_[nh] = lowerbounds_[h];
 72 |     slopes_leftsq_[nh] = R_PosInf;
 73 |   }
 74 |   else
 75 |   {
 76 |     lowerbounds_[nh] = interc(
 77 |         tpoints_[lh], tpoints_[nh], logfvs_[lh], logfvs_[nh],
 78 |         dlogfvs_[lh], dlogfvs_[nh], tol_ddlogf_is0_);
 79 |     slopes_leftsq_[nh] = (logfvs_[nh] - logfvs_[lh]) /
 80 |                         (tpoints_[nh] - tpoints_[lh]);
 81 |   }
 82 |   if (rh == max_nhull_)
 83 |   {
 84 |     upperbounds_[nh] = upperbounds_[h];
 85 |     slopes_ritesq_[nh] = R_NegInf;
 86 |   }
 87 |   else
 88 |   {
 89 |     upperbounds_[nh] =
 90 |         interc(tpoints_[nh], tpoints_[rh], logfvs_[nh], logfvs_[rh],
 91 |                dlogfvs_[nh], dlogfvs_[rh], tol_ddlogf_is0_);
 92 |     slopes_ritesq_[nh] = (logfvs_[nh] - logfvs_[rh]) /
 93 |                         (tpoints_[nh] - tpoints_[rh]);
 94 |   }
 95 | 
 96 |   lws_[nh] = logint_elin(logfvs_[nh], dlogfvs_[nh], tpoints_[nh],
 97 |                         lowerbounds_[nh], upperbounds_[nh], tol_dlogf_is0_);
 98 | 
 99 |   // update left hull of new null
100 |   if (lh != -1)
101 |   {
102 |     upperbounds_[lh] = lowerbounds_[nh];
103 |     ritehulls_[lh] = nh;
104 |     slopes_ritesq_[lh] = slopes_leftsq_[nh];
105 |     lws_[lh] = logint_elin(logfvs_[lh], dlogfvs_[lh], tpoints_[lh],
106 |                           lowerbounds_[lh], upperbounds_[lh], tol_dlogf_is0_);
107 |   }
108 | 
109 |   // update right hull of newh if it exists
110 |   if (rh != max_nhull_)
111 |   {
112 |     lowerbounds_[rh] = upperbounds_[nh];
113 |     lefthulls_[rh] = nh;
114 |     slopes_leftsq_[rh] = slopes_ritesq_[nh];
115 | 
116 |     lws_[rh] = logint_elin(logfvs_[rh], dlogfvs_[rh], tpoints_[rh],
117 |                           lowerbounds_[rh], upperbounds_[rh], tol_dlogf_is0_);
118 |   }
119 | }
120 | 
121 | double ARS::eval_upperhull(const int h, const double newx)
122 | {
123 |   return ((newx - tpoints_[h]) * dlogfvs_[h] + logfvs_[h]);
124 | }
125 | 
126 | double ARS::eval_lowerhull(const int h, const double newx)
127 | {
128 |     if (newx >= tpoints_[h])
129 |     {
130 |       return ((newx - tpoints_[h]) * slopes_ritesq_[h] + logfvs_[h]);
131 |     }
132 |     else
133 |     {
134 |       return ((newx - tpoints_[h]) * slopes_leftsq_[h] + logfvs_[h]);
135 |     }
136 | }
137 | 
138 | void ARS::Initialize()
139 | {
140 |   // if lb is finite, bound the first hull at left
141 |   // or insert a hull tangent at lb if logf at lb is finite too
142 |   if (R_FINITE(lb_))
143 |   {
144 |     h_ = 0;
145 |     newx_ = lb_;
146 |     target_->eval_logf(newx_, newlogf_, newdlogf_);
147 |     update_hulls(h_, newx_, newlogf_, newdlogf_);
148 |   }
149 |   // expanding at the left until reaching a bound or integral to finite
150 |   else
151 |   {
152 |     newx_ = tpoints_[0] - stepout_;
153 |     do
154 |     {
155 |       if (no_hulls_ == max_nhull_)
156 |       {
157 |         Rcpp::stop(
158 |             "Error in Rejection Sampling: (finite lb)\n"
159 |             "'max_nhull_' is set too small, or your log-PDF is NOT concave.\n");
160 |       }
161 |       h_ = 0;
162 |       target_->eval_logf(newx_, newlogf_, newdlogf_);
163 |       update_hulls(h_, newx_, newlogf_, newdlogf_);
164 |       // finding a new leftbound, quit expanding
165 |       if (newlogf_ == R_NegInf) break;
166 |       newx_ -= stepout_;
167 |       h_ = no_hulls_ - 1;
168 |     }
169 |     while (newdlogf_ < tol_dlogf_is0_);
170 |   }
171 | 
172 |   // if ub is finite, bound the first hull at the right
173 |   // or insert a hull tangent at ub if logf at ub is finite too
174 |   if (R_FINITE(ub_))
175 |   {
176 |     h_= 0;
177 |     newx_ = ub_;
178 |     target_->eval_logf(newx_, newlogf_, newdlogf_);
179 |     update_hulls(h_, newx_, newlogf_, newdlogf_);
180 |   }
181 |   else // expanding at the right until reaching a bound or integral to finite
182 |   {
183 |     h_ = 0;
184 |     newx_ = tpoints_[0] + stepout_;
185 |     do
186 |     {
187 |       if (no_hulls_ == max_nhull_)
188 |       {
189 |         //Rcpp::Rcerr << no_hulls << " " << max_nhull << "\n";
190 |         Rcpp::stop(
191 |             "Error in Rejection Sampling: (finite ub)\n"
192 |             "'max_nhull' is set too small, or your log-PDF is NOT concave.\n");
193 |       }
194 |       target_->eval_logf(newx_, newlogf_, newdlogf_);
195 |       update_hulls(h_, newx_, newlogf_, newdlogf_);
196 |       if (!R_FINITE(newlogf_)) break;
197 |       newx_ += stepout_;
198 |       h_ = no_hulls_ - 1;
199 |     }
200 |     while (newdlogf_ > - tol_dlogf_is0_);
201 |   }
202 | }
203 | 
204 | ARS::ARS(int n, SampleTarget *target_, double ini_tpoint, 
205 |     double lb/*= -INFINITY*/, double ub/*= +INFINITY*/,   
206 |     bool verbose/*=false*/, int max_nhull_/*=1000*/, double stepout_/*=10*/,
207 |     double tol_dlogf_is0_/*= 1E-5*/, double tol_ddlogf_is0_/*= 1E-5*/)
208 |     : n_(n), lb_(lb), ub_(ub), verbose_(verbose), max_nhull_(max_nhull_), stepout_(stepout_), 
209 |     tol_dlogf_is0_(tol_dlogf_is0_), tol_ddlogf_is0_(tol_ddlogf_is0_), target_(target_)  
210 | {
211 |   // construct the first hull
212 |   logfvs_  = new double[max_nhull_] {0};
213 |   dlogfvs_ = new double[max_nhull_] {0};
214 |   tpoints_ = new double[max_nhull_] {0};
215 |   tpoints_[0] = ini_tpoint; // the tangent point
216 |   target_->eval_logf(tpoints_[0], logfvs_[0], dlogfvs_[0]);
217 |   if (!R_FINITE(logfvs_[0]))
218 |   {
219 |     Rcpp::stop(
220 |         "Error in adaptive rejection sampling:\n"
221 |         "the first tangent point doesn't have positive probability.\n");
222 |   }
223 | 
224 |   lowerbounds_ = new double[max_nhull_] {0};
225 |   upperbounds_ = new double[max_nhull_] {0};
226 |   lowerbounds_[0] = fmax(lb, R_NegInf); // lower bound of the hull
227 |   upperbounds_[0] = fmin(ub, R_PosInf); // upper bound of the hull
228 | 
229 |   lefthulls_ = new int[max_nhull_] {0};
230 |   ritehulls_ = new int[max_nhull_] {0};
231 |   lefthulls_[0] = -1;        // index of left hull
232 |   ritehulls_[0] = max_nhull_; // index of right hull
233 | 
234 |   slopes_leftsq_ = new double[max_nhull_] {0};
235 |   slopes_ritesq_ = new double[max_nhull_] {0};
236 |   slopes_leftsq_[0] = R_PosInf; // slope of left squeezing arc
237 |   slopes_ritesq_[0] = R_NegInf; // slope of right sequeezing arc
238 | 
239 |   lws_ = new double[max_nhull_] {0};
240 |   lws_[0] = R_PosInf; // compute log weights, updating lws_[0]
241 |   no_hulls_ = 1;
242 | }
243 | 
244 | ARS::~ARS()
245 | {
246 |   delete[] logfvs_;
247 |   delete[] dlogfvs_;
248 |   delete[] tpoints_;
249 |   delete[] lowerbounds_;
250 |   delete[] upperbounds_;
251 |   delete[] lefthulls_;
252 |   delete[] ritehulls_;
253 |   delete[] slopes_leftsq_;
254 |   delete[] slopes_ritesq_;
255 |   delete[] lws_;
256 | }
257 | 
258 | // Do adaptive rejection sampling
259 | Rcpp::NumericVector ARS::Sample()
260 | {
261 |   Initialize();
262 | 
263 |   /* define parameters used while sampling */ 
264 |   double
265 |       upperhullv, // value of upper hull at newx
266 |       lowerhullv, // value of lower (squeezing) hull at newx
267 |       u,          // a random number used to determine whether to accept
268 |       logacceptv; // if logacceptv is smaller than logf, newx accepted
269 |   int no_rejs = 0;
270 |   Rcpp::NumericVector output (n_); // sampling output
271 | 
272 |   for (int i = 0; i < n_; i++)
273 |   {
274 |     bool rejected = true;
275 |     while (rejected)
276 |     {
277 |       // draw a new point and a unif random number
278 |       h_ = sample_disc(no_hulls_, lws_);
279 |       newx_ = sample_elin(lowerbounds_[h_], upperbounds_[h_],
280 |                          dlogfvs_[h_], tol_dlogf_is0_);
281 |       upperhullv = eval_upperhull(h_, newx_);
282 | 
283 |       GetRNGstate();
284 |       u = unif_rand();
285 |       PutRNGstate();
286 | 
287 |       logacceptv = upperhullv + log(u);
288 |       lowerhullv = eval_lowerhull(h_, newx_);
289 |       //check acceptance with squeezing function
290 |       if (logacceptv <= lowerhullv)
291 |       {
292 |         output[i] = newx_;
293 |         rejected = false;
294 |       }
295 |       else
296 |       {
297 |         // check acceptance with logf
298 |         // eval logf at newx and insert a new hull
299 |         target_->eval_logf(newx_, newlogf_, newdlogf_);
300 |         update_hulls(h_, newx_, newlogf_, newdlogf_);
301 |         if (logacceptv <= newlogf_)
302 |         {
303 |           output[i] = newx_;
304 |           rejected = false;
305 |         }
306 |         else
307 |           no_rejs++;
308 |       }
309 |     }
310 |   }
311 |   if (verbose_)
312 |   {
313 |     double rej_rate = (no_rejs + 0.0) / (no_rejs + n_ + 0.0);
314 |     Rprintf("Sampling complete. Number of hulls: %d, Rejection rate: %4.2f\n",
315 |             no_hulls_, rej_rate);
316 |   }
317 |   return output;
318 | }
319 | 
320 | // find maximum value in double array a with length n
321 | double fmaxm(const int n, const double *a)
322 | {
323 |   double ma = a[0];
324 |   if (n > 1)
325 |   {
326 |     for (int i = 1; i < n; i++)
327 |       ma = fmax(a[i], ma);
328 |   }
329 |   return ma;
330 | }
331 | 
332 | // k --- number of discrete values
333 | // lw --- log of probabilities
334 | int sample_disc(const int k, const double *lw)
335 | {
336 |   // constructing probabilities from log probabilities
337 |   double max_lw = fmaxm(k, lw);
338 |   double *cw = new double[k];
339 |   cw[0] = exp(lw[0] - max_lw);
340 |   for (int i = 1; i < k; i++)
341 |     cw[i] = cw[i - 1] + exp(lw[i] - max_lw);
342 | 
343 |   GetRNGstate();
344 |   double u = unif_rand() * cw[k - 1];
345 |   PutRNGstate();
346 |   // convert u into a discrete value
347 |   int i = 0;
348 |   while (i < k)
349 |   {
350 |     if (u <= cw[i])
351 |       break;
352 |     i++;
353 |   }
354 |   delete[] cw;
355 |   return i;
356 | }
357 | 
358 | // this function samples one point from: exp (a[0]*x) I (x in [lb, upper[0]])
359 | double sample_elin(const double lower, const double upper,
360 |                    const double dlogf, const double tol_dlogf_is0_)
361 | {
362 |   // set smallest value for derivative that can be thought of as 0
363 |   int type_lin = -1; 
364 |   bool isfault = false;
365 | 
366 |   // checking linear function type and fault
367 |   if (fabs(dlogf) <= tol_dlogf_is0_)
368 |   {
369 |     if (!(R_FINITE(lower) & R_FINITE(upper)))
370 |       isfault = true;
371 |     else
372 |       type_lin = 0; // slope is zero
373 |   }
374 | 
375 |   if (dlogf > tol_dlogf_is0_)
376 |   {
377 |     if (!R_FINITE(upper))
378 |       isfault = true;
379 |     else
380 |       type_lin = 1; // slope is postive
381 |   }
382 | 
383 |   if (dlogf < -tol_dlogf_is0_)
384 |   {
385 |     if (!R_FINITE(lower))
386 |       isfault = true;
387 |     else
388 |       type_lin = 2; //slope is negative
389 |   }
390 | 
391 |   if (isfault)
392 |   {
393 |     REprintf("(dlogf = %4.2f, lowerbound = %4.2f, upperbound = %4.2f)\n",
394 |              dlogf, lower, upper);
395 |     Rcpp::stop(
396 |         "Error: in C function 'sample_elin':\n"
397 |         "the exp linear function integrates to NAN/INFINITY\n");
398 |   }
399 | 
400 |   double dx = upper - lower;
401 |   GetRNGstate();
402 |   double y = R::runif(0, 1);
403 |   PutRNGstate();
404 | 
405 |   double output;
406 | 
407 |   if (type_lin == 0)       
408 |     output = lower + y * dx;
409 |   else if (type_lin == 1)  
410 |     output = upper + log((1 - y) * exp(-dlogf * dx) + y) / dlogf;
411 |   else if (type_lin == 2)  
412 |     output = lower + log(1 - y + y * exp(dlogf * dx)) / dlogf;
413 |   else
414 |     Rcpp::stop("Error: in C function 'sample_elin': unexpected type_lin value\n");
415 | 
416 |   return output;
417 | }
418 | 
419 | // this function evaluates the log of integral of exp linear hull
420 | // logf --- value of linear hull at t
421 | // dlogf --- value of derive of linear hull
422 | // t --- tangent point where logf is calculated
423 | // lower and upper --- lower and upper bounds of linear hull
424 | double logint_elin(const double logf, const double dlogf, const double t,
425 |                    const double lower, const double upper, const double tol_dlogf_is0_)
426 | {
427 |   double output;
428 | 
429 |   double dx = upper - lower;
430 |   double abs_dlogf = fabs(dlogf);
431 | 
432 |   if (abs_dlogf <= tol_dlogf_is0_) // slope is 0
433 |   {
434 |     output = logf + log(dx);
435 |   }
436 |   else
437 |   {
438 |     if (dlogf > tol_dlogf_is0_) // slope is positive
439 |     {
440 |       output = logf + dlogf * (upper - t) - log(abs_dlogf) +
441 |                log(1 - exp(-abs_dlogf * dx));
442 |     }
443 |     else //slope is negative
444 |     {
445 |       output = logf + dlogf * (lower - t) - log(abs_dlogf) +
446 |                log(1 - exp(-abs_dlogf * dx));
447 |     }
448 |   }
449 |   return output;
450 | }
451 | 
452 | // this function finds interception points between t1 and t2
453 | double interc(const double t1, const double t2,
454 |               const double logf1, const double logf2,
455 |               const double dlogf1, const double dlogf2,
456 |               const double tol_ddlogf_is0_)
457 | {
458 |   if (fabs(dlogf1 - dlogf2) > tol_ddlogf_is0_)
459 |     return ((logf2 - logf1 - dlogf2 * t2 + dlogf1 * t1) / (dlogf1 - dlogf2));
460 |   else
461 |     return ((t1 + t2) / 2.0);
462 | }
463 | 


--------------------------------------------------------------------------------
/R/core.r:
--------------------------------------------------------------------------------
  1 | #' Fit a HTLR Model (Internal API)
  2 | #'
  3 | #' This function trains linear logistic regression models with HMC in restricted Gibbs sampling.
  4 | #' It also makes predictions for test cases if \code{X_ts} are provided.
  5 | #'
  6 | #' @param y_tr Vector of response variables. Must be coded as non-negative integers, 
  7 | #' e.g., 1,2,\ldots,C for C classes, label 0 is also allowed.
  8 | #' @param X_tr Input matrix, of dimension nobs by nvars; each row is an observation vector.
  9 | #' @param fsel Subsets of features selected before fitting, such as by univariate screening.
 10 | #' @param stdzx Logical; if \code{TRUE}, the original feature values are standardized to have \code{mean = 0} 
 11 | #' and \code{sd = 1}.
 12 | #' 
 13 | #' @param iters_h A positive integer specifying the number of warmup (aka burnin).
 14 | #' @param iters_rmc A positive integer specifying the number of iterations after warmup.
 15 | #' @param thin A positive integer specifying the period for saving samples.
 16 | #' 
 17 | #' @param leap_L The length of leapfrog trajectory in sampling phase.
 18 | #' @param leap_L_h The length of leapfrog trajectory in burnin phase.
 19 | #' @param leap_step The stepsize adjustment multiplied to the second-order partial derivatives of log posterior.
 20 | #' 
 21 | #' @param initial_state The initial state of Markov Chain; can be a previously 
 22 | #' fitted \code{fithtlr} object, or a user supplied initial state vector, or 
 23 | #' a character string matches the following:  
 24 | #' \itemize{
 25 | #'   \item "lasso" - (Default) Use Lasso initial state with \code{lambda} chosen by 
 26 | #'   cross-validation. Users may specify their own candidate \code{lambda} values via 
 27 | #'   optional argument \code{lasso.lambda}. Further customized Lasso initial 
 28 | #'   states can be generated by \code{\link{lasso_deltas}}.    
 29 | #'   \item "bcbcsfrda" - Use initial state generated by package \code{BCBCSF} 
 30 | #'   (Bias-corrected Bayesian classification). Further customized BCBCSF initial 
 31 | #'   states can be generated by \code{\link{bcbcsf_deltas}}. WARNING: This type of 
 32 | #'   initial states can be used for continuous features such as gene expression profiles, 
 33 | #'   but it should not be used for categorical features such as SNP profiles.
 34 | #'   \item "random" - Use random initial values sampled from N(0, 1).     
 35 | #' } 
 36 | #' 
 37 | #' @param ptype The prior to be applied to the model. Either "t" (student-t, default), 
 38 | #' "ghs" (horseshoe), or "neg" (normal-exponential-gamma).
 39 | #' 
 40 | #' @param sigmab0 The \code{sd} of the normal prior for the intercept.
 41 | #' @param alpha The degree freedom of t/ghs/neg prior for coefficients.
 42 | #' @param s The log scale of priors (logw) for coefficients.
 43 | #' @param eta The \code{sd} of the normal prior for logw. When it is set to 0, logw is fixed. 
 44 | #' Otherwise, logw is assigned with a normal prior and it will be updated during sampling. 
 45 | #' 
 46 | #' @param hmc_sgmcut The coefficients smaller than this criteria will be fixed in 
 47 | #' each HMC updating step.
 48 | #' 
 49 | #' @param silence Setting it to \code{FALSE} for tracking MCMC sampling iterations. 
 50 | #' @param rep.legacy Logical; if \code{TRUE}, the output produced in \code{HTLR} versions up to 
 51 | #' legacy-3.1-1 is reproduced. The speed would be typically slower than non-legacy mode on
 52 | #' multi-core machine.  
 53 | #' 
 54 | #' @param keep.warmup.hist Warmup iterations are not recorded by default, set \code{TRUE} to enable it. 
 55 | #' 
 56 | #' @param X_ts Test data which predictions are to be made.
 57 | #' @param predburn,predthin For prediction base on \code{X_ts} (when supplied), \code{predburn} of 
 58 | #' Markov chain (super)iterations will be discarded, and only every \code{predthin} are used for inference.
 59 | #' 
 60 | #' @param alpha.rda A user supplied alpha value for \code{\link{bcbcsf_deltas}} when
 61 | #' setting up BCBCSF initial state. Default: 0.2. 
 62 | #' @param lasso.lambda - A user supplied lambda sequence for \code{\link{lasso_deltas}} when 
 63 | #' setting up Lasso initial state. Default: \{.01, .02, \ldots, .05\}. Will be ignored if 
 64 | #' \code{rep.legacy} is set to \code{TRUE}. 
 65 | #' 
 66 | #' @return A list of fitting results. If \code{X_ts} is not provided, the list is an object 
 67 | #' with S3 class \code{htlr.fit}.  
 68 | #' 
 69 | #' @references
 70 | #' Longhai Li and Weixin Yao (2018). Fully Bayesian Logistic Regression 
 71 | #' with Hyper-Lasso Priors for High-dimensional Feature Selection.
 72 | #' \emph{Journal of Statistical Computation and Simulation} 2018, 88:14, 2827-2851.
 73 | #' 
 74 | #' @useDynLib HTLR
 75 | #' @import Rcpp stats
 76 | #' 
 77 | #' @export
 78 | #' @keywords internal
 79 | #' 
 80 | htlr_fit <- function (
 81 |     X_tr, y_tr, fsel = 1:ncol(X_tr), stdzx = TRUE, ## data
 82 |     ptype = c("t", "ghs", "neg"), sigmab0 = 2000, alpha = 1, s = -10, eta = 0,  ## prior
 83 |     iters_h = 1000, iters_rmc = 1000, thin = 1,  ## mc iterations
 84 |     leap_L = 50, leap_L_h = 5, leap_step = 0.3,  hmc_sgmcut = 0.05, ## hmc
 85 |     initial_state = "lasso", keep.warmup.hist = FALSE, silence = TRUE, rep.legacy = TRUE,
 86 |     alpha.rda = 0.2, lasso.lambda = seq(.05, .01, by = -.01),
 87 |     X_ts = NULL, predburn = NULL, predthin = 1)
 88 | {
 89 |   #------------------------------- Input Checking -------------------------------#
 90 |   stopifnot(iters_rmc > 0, iters_h >= 0, thin > 0, leap_L > 0, leap_L_h > 0,
 91 |             alpha > 0, eta >= 0, sigmab0 >= 0,
 92 |             ptype %in% c("t", "ghs", "neg"))
 93 |   
 94 |   if (length(y_tr) != nrow(X_tr)) stop ("'y' and 'X' mismatch")
 95 |   
 96 |   yfreq <- table(y_tr)
 97 |   if (length(yfreq) < 2)
 98 |     stop("less than 2 classes of response")
 99 |   if (any(yfreq < 2)) 
100 |     stop("less than 2 cases in some group")
101 | 
102 |   #----------------------------- Data preprocessing -----------------------------#
103 |   y1 <- as.numeric(y_tr)
104 |   if (min(y1) == 0)
105 |     y1 <- y1 + 1
106 |   
107 |   ybase <- as.integer(y1 - 1)
108 |   ymat <- model.matrix( ~ factor(y1) - 1)[, -1]
109 |   C <- length(unique(ybase))
110 |   K <- C - 1
111 |   
112 |   ## feature selection
113 |   X_tr <- X_tr[, fsel, drop = FALSE]
114 |   names(fsel) <- colnames(X_tr)
115 |   
116 |   ## standardize selected features
117 |   nuj <- rep(0, length(fsel))
118 |   sdj <- rep(1, length(fsel))
119 |   if (stdzx == TRUE)
120 |   {
121 |     if (is.numeric(initial_state))
122 |     {
123 |       message("skip standardizing features because customized initial state is provided")
124 |     }
125 |     else
126 |     {
127 |       X_tr <- std(X_tr)
128 |       nuj <- attr(X_tr, "center")
129 |       sdj <- attr(X_tr, "scale")
130 |       fsel <- fsel[attr(X_tr, "nonsingular")]
131 |     }
132 |   }
133 |   else
134 |   {
135 |     if (!is.matrix(X_tr)) {
136 |       message(sprintf("coercing %s 'X' to matrix", class(X_tr)))
137 |       X_tr <- as.matrix(X_tr) 
138 |     }
139 |   }
140 |   p <- ncol(X_tr)
141 |   n <- nrow(X_tr)
142 | 
143 |   ## add intercept
144 |   X_addint <- cbind(1, X_tr)
145 |   if (!is.null(colnames(X_tr)))
146 |     colnames(X_addint) <- c("Intercept", colnames(X_tr))
147 | 
148 |   #---------------------- Markov chain state initialization ----------------------#
149 | 
150 |   if (is.list(initial_state)) # use the last iteration of markov chain
151 |   {
152 |     no.mcspl <- length(initial_state$mclogw)
153 |     deltas <- matrix(initial_state$mcdeltas[, , no.mcspl], nrow = p + 1)
154 |     sigmasbt <- initial_state$mcsigmasbt[, no.mcspl]
155 |     s <- initial_state$mclogw[no.mcspl]
156 |     init.type <- "htlr"
157 |   }
158 |   else
159 |   {
160 |     if (is.matrix(initial_state)) # user supplied deltas
161 |     {
162 |       deltas <- initial_state
163 |       if (nrow(deltas) != p + 1 || ncol(deltas) != K)
164 |       {
165 |         stop(
166 |           sprintf(
167 |             "Initial `deltas' mismatch data. Expected: nrow=%d, ncol=%d; Actual: nrow=%d, ncol=%d.",
168 |             p + 1, K, nrow(deltas), ncol(deltas))
169 |         )        
170 |       }
171 |       init.type <- "customized"
172 |     }
173 |     else if (initial_state == "lasso")
174 |     {
175 |       if (rep.legacy) 
176 |         lasso.lambda <- NULL # will be chosen by CV
177 |       deltas <- lasso_deltas(X_tr, y1, lambda = lasso.lambda, verbose = !silence)
178 |       init.type <- "lasso"
179 |     }
180 |     else if (substr(initial_state, 1, 4) == "bcbc")
181 |     {
182 |       deltas <- bcbcsf_deltas(X_tr, y1, alpha.rda)
183 |       init.type <- "bcbc"
184 |     }
185 |     else if (initial_state == "random")
186 |     {
187 |       deltas <- matrix(rnorm((p + 1) * K) * 2, p + 1, K)
188 |       init.type <- "random"
189 |     }
190 |     else stop("not supported init type")
191 |     
192 |     vardeltas <- comp_vardeltas(deltas)[-1]
193 |     sigmasbt <- c(sigmab0, spl_sgm_ig(alpha, K, exp(s), vardeltas))
194 |   }
195 |  
196 |   #-------------------------- Do Gibbs sampling --------------------------#
197 | 
198 |   fit <- htlr_fit_helper(
199 |       ## data
200 |       p = p, K = K, n = n,
201 |       X = X_addint, 
202 |       ymat = as.matrix(ymat), 
203 |       ybase = ybase,
204 |       ## prior
205 |       ptype = ptype, alpha = alpha, s = s, eta = eta,
206 |       ## sampling
207 |       iters_rmc = iters_rmc, iters_h = iters_h, thin = thin, 
208 |       leap_L = leap_L, leap_L_h = leap_L_h, leap_step = leap_step, 
209 |       hmc_sgmcut = hmc_sgmcut,
210 |       ## init state
211 |       deltas = deltas, sigmasbt = sigmasbt,
212 |       ## other control
213 |       keep_warmup_hist = keep.warmup.hist, silence = as.integer(silence), legacy = rep.legacy)
214 |   
215 |   # add prior hyperparameter info
216 |   fit$prior <- htlr_prior(ptype, alpha, s, sigmab0)
217 |   
218 |   # add initial state info
219 |   fit$mc.param$init <- init.type
220 |   
221 |   # add data preprocessing info
222 |   fit$feature <- list("y" = y_tr, "X" = X_addint, "stdx" = stdzx,
223 |                       "fsel" = fsel, "nuj" = nuj, "sdj" = sdj)
224 |   
225 |   # add call
226 |   fit$call <- match.call()
227 |   
228 |   # register S3
229 |   attr(fit, "class") <- "htlr.fit"
230 |         
231 |   #---------------------- Prediction for test cases ----------------------#
232 |   if (!is.null(X_ts))
233 |   {
234 |     fit$probs_pred <- htlr_predict(
235 |       X_ts = X_ts,
236 |       fithtlr = fit,
237 |       burn = predburn,
238 |       thin = predthin
239 |     )
240 |   }
241 |   
242 |   return(fit)
243 | }
244 | 
245 | ######################## some functions not used currently ###################
246 | 
247 | # htlr_ci <- function (fithtlr, usedmc = NULL)
248 | # {
249 | #     mcdims <- dim (fithtlr$mcdeltas)
250 | #     p <- mcdims [1] - 1
251 | #     K <- mcdims [2]
252 | #     no_mcspl <- mcdims[3]
253 | # 
254 | #     ## index of mc iters used for inference
255 | # 
256 | #     mcdeltas <- fithtlr$mcdeltas[,,usedmc, drop = FALSE]
257 | #     
258 | #     cideltas <- array (0, dim = c(p+1, K, 3))
259 | #     for (j in 1:(p+1))
260 | #     {
261 | #         for (k in 1:K) {
262 | #           cideltas [j,k,] <- 
263 | #             quantile (mcdeltas[j,k,], probs = c(1-cp, 1, 1 + cp)/2)
264 | #         }
265 | #     }
266 | #     
267 | #     cideltas
268 | # }
269 | # 
270 | # ## this function plots confidence intervals
271 | # htlr_plotci <- function (fithtlr, usedmc = NULL, 
272 | #                          cp = 0.95, truedeltas = NULL,   ...)
273 | # {
274 | #     
275 | #     cideltas <- htlr_coefs (fithtlr, usedmc = usedmc, showci = TRUE, cp = cp)
276 | #     K <- dim (cideltas)[2]
277 | #     
278 | #     for (k in 1:K)
279 | #     {
280 | #         plotmci (cideltas[,k,], truedeltas = truedeltas[,k], 
281 | #                  main = sprintf ("%d%% MC C.I. of Coefs (Class %d)", 
282 | #                                  cp * 100, k+1),
283 | #                 ...)
284 | #         
285 | #     }
286 | #     
287 | #     return (cideltas)
288 | # }
289 | # 
290 | # 
291 | # htlr_outpred <- function (x,y,...)
292 | # {
293 | #   X_ts <- cbind (x, rep (y, each = length (x)))
294 | #   probs_pred <- htlr_predict (X_ts = X_ts, ...)$probs_pred[,2] 
295 | #   matrix (probs_pred, nrow = length (x) )
296 | # }
297 | # 
298 | # 
299 | # norm_coef <- function (deltas)
300 | # {
301 | #   slope <- sqrt (sum(deltas^2))
302 | #   deltas/slope
303 | # }
304 | # 
305 | # pie_coef <- function (deltas)
306 | # {
307 | #   slope <- sum(abs(deltas))
308 | #   deltas/slope
309 | # }
310 | # 
311 | # norm_mcdeltas <- function (mcdeltas)
312 | # {
313 | #   sqnorm <- function (a) sqrt(sum (a^2))
314 | #   dim_mcd <- dim (mcdeltas)
315 | #     
316 | #   slopes <- apply (mcdeltas[-1,,,drop=FALSE], MARGIN = c(2,3), sqnorm)
317 | #     
318 | #   mcthetas <- sweep (x = mcdeltas, MARGIN = c(2,3), STATS = slopes, FUN = "/")
319 | #   
320 | #   list (mcthetas = mcthetas, slopes = as.vector(slopes))
321 | # }
322 | # 
323 | # pie_mcdeltas <- function (mcdeltas)
324 | # {
325 | #   sumabs <- function (a) sum (abs(a))
326 | #   dim_mcd <- dim (mcdeltas)
327 | #     
328 | #   slopes <- apply (mcdeltas[-1,,,drop=FALSE], MARGIN = c(2,3), sumabs)
329 | #     
330 | #   mcthetas <- sweep (x = mcdeltas, MARGIN = c(2,3), STATS = slopes, FUN = "/")
331 | #   
332 | #   list (mcthetas = mcthetas, slopes = as.vector(slopes))
333 | # }
334 | # 
335 | # plotmci <- function (CI, truedeltas = NULL, ...)
336 | # {
337 | #     p <- nrow (CI) - 1
338 | # 
339 | #     plotargs <- list (...)
340 | #     
341 | #     if (is.null (plotargs$ylim)) plotargs$ylim <- range (CI)
342 | #     if (is.null (plotargs$pch))  plotargs$pch <- 4 
343 | #     if (is.null (plotargs$xlab)) 
344 | #        plotargs$xlab <- "Feature Index in Training Data"
345 | #     if (is.null (plotargs$ylab)) plotargs$ylab <- "Coefficient Value"
346 | #     
347 | #     do.call (plot, c (list(x= 0:p, y=CI[,2]), plotargs))
348 | #     
349 | #     abline (h = 0)
350 | #     
351 | #     for (j in 0:p)
352 | #     {
353 | #         
354 | #         points (c(j,j), CI[j+1,-2], type = "l", lwd = 2)
355 | #     }
356 | #     
357 | #     if (!is.null (truedeltas))
358 | #     {
359 | #         points (0:p, truedeltas, col = "red", cex = 1.2, pch = 20)
360 | #     }
361 | # 
362 | # }
363 | # 
364 | # 
365 | # 
366 | # 
367 | # htlr_plotleapfrog <- function ()
368 | # {
369 | #         if (looklf & i_mc %% iters_imc == 0 & i_mc >=0 )
370 | #         {
371 | #            if (!file.exists ("leapfrogplots")) dir.create ("leapfrogplots")
372 | # 
373 | #            postscript (file = sprintf ("leapfrogplots/ch%d.ps", i_sup),
374 | #            title = "leapfrogplots-ch", paper = "special",
375 | #            width = 8, height = 4, horiz = FALSE)
376 | #            par (mar = c(5,4,3,1))
377 | #            plot (-olp$nenergy_trj + olp$nenergy_trj[1],
378 | #                 xlab = "Index of Trajectory", type = "l",
379 | #                 ylab = "Hamiltonian Value",
380 | #                 main =
381 | #                 sprintf (paste( "Hamiltonian Values with the Starting Value",
382 | #                 "Subtracted\n(P(acceptance)=%.2f)", sep = ""),
383 | #                 min(1, exp(olp$nenergy_trj[L+1]-olp$nenergy_trj[1]) )
384 | #                 )
385 | #            )
386 | #            abline (h = c (-1,1))
387 | #            dev.off()
388 | # 
389 | #            postscript (file = sprintf ("leapfrogplots/dd%d.ps", i_sup+1),
390 | #            title = sprintf("leapfrogplots-dd%d", i_sup + 1), 
391 | #            paper = "special",
392 | #            width = 8, height = 4, horiz = FALSE)
393 | #            par (mar = c(5,4,3,1))
394 | #            plot (olp$ddeltas_trj, xlab = "Index of Trajectory",type = "l",
395 | #                  ylab = "square distance of Deltas",
396 | #                  main = "Square Distance of `Deltas'")
397 | #            dev.off ()
398 | # 
399 | #            postscript (file = sprintf ("leapfrogplots/ll%d.ps", i_sup),
400 | #            title = "leapfrogplots-ll", paper = "special",
401 | #            width = 8, height = 4, horiz = FALSE)
402 | #            par (mar = c(5,4,3,1))
403 | #            plot (olp$loglike_trj, xlab = "Index of Trajectory", type = "l",
404 | #                  ylab = "log likelihood",
405 | #                  main = "Log likelihood of Training Cases")
406 | #            dev.off()
407 | #         }
408 | # }
409 | 


--------------------------------------------------------------------------------