├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ └── R-CMD-check.yml ├── .gitignore ├── CONTRIBUTING.md ├── CRAN-SUBMISSION ├── DESCRIPTION ├── LICENSE ├── Makefile ├── NAMESPACE ├── NEWS.md ├── R ├── RcppExports.R ├── find_dupes.R ├── formula_hal9001.R ├── hal.R ├── hal9000.R ├── hal9001-package.R ├── hal_quotes.R ├── make_basis.R ├── predict.R ├── reduce_basis_filter.R ├── sl_hal9001.R ├── squash_hal.R ├── summary.R └── zzz.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── codecov.yml ├── cran-comments.md ├── data └── hal_quotes.rda ├── docs ├── 404.html ├── CONTRIBUTING.html ├── LICENSE-text.html ├── articles │ ├── index.html │ ├── intro_hal9001.html │ └── intro_hal9001_files │ │ ├── accessible-code-block-0.0.1 │ │ └── empty-anchor.js │ │ ├── figure-html │ │ ├── unnamed-chunk-13-1.png │ │ ├── unnamed-chunk-2-1.png │ │ ├── unnamed-chunk-3-1.png │ │ ├── unnamed-chunk-3-2.png │ │ ├── unnamed-chunk-3-3.png │ │ ├── unnamed-chunk-3-4.png │ │ ├── unnamed-chunk-5-1.png │ │ ├── unnamed-chunk-5-2.png │ │ └── unnamed-chunk-5-3.png │ │ └── header-attrs-2.7.2 │ │ └── header-attrs.js ├── authors.html ├── bootstrap-toc.css ├── bootstrap-toc.js ├── docsearch.css ├── docsearch.js ├── index.html ├── link.svg ├── news │ └── index.html ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml ├── reference │ ├── Rplot001.png │ ├── SL.hal9001.html │ ├── apply_copy_map.html │ ├── as_dgCMatrix.html │ ├── basis_list_cols.html │ ├── basis_of_degree.html │ ├── calc_pnz.html │ ├── calc_xscale.html │ ├── cv_lasso.html │ ├── cv_lasso_early_stopping.html │ ├── enumerate_basis.html │ ├── enumerate_edge_basis.html │ ├── evaluate_basis.html │ ├── fit_hal.html │ ├── formula_hal.html │ ├── formula_helpers.html │ ├── generate_all_rules.html │ ├── h.html │ ├── hal9000.html │ ├── hal9001.html │ ├── hal_quotes.html │ ├── index.html │ ├── index_first_copy.html │ ├── lassi.html │ ├── lassi_fit_module.html │ ├── lassi_origami.html │ ├── lassi_predict.html │ ├── make_basis_list.html │ ├── make_copy_map.html │ ├── make_design_matrix.html │ ├── make_reduced_basis_map.html │ ├── meets_basis.html │ ├── num_knots_generator.html │ ├── plus-.formula_hal9001.html │ ├── predict.SL.hal9001.html │ ├── predict.hal9001.html │ ├── predict.lassi.html │ ├── print.formula_hal9001.html │ ├── print.summary.hal9001.html │ ├── quantizer.html │ ├── squash_hal_fit.html │ └── summary.hal9001.html └── sitemap.xml ├── hal9001.Rproj ├── inst ├── CITATION └── REFERENCES.bib ├── man ├── SL.hal9001.Rd ├── apply_copy_map.Rd ├── as_dgCMatrix.Rd ├── basis_list_cols.Rd ├── basis_of_degree.Rd ├── calc_pnz.Rd ├── calc_xscale.Rd ├── enumerate_basis.Rd ├── enumerate_edge_basis.Rd ├── evaluate_basis.Rd ├── fit_hal.Rd ├── formula_hal.Rd ├── formula_helpers.Rd ├── generate_all_rules.Rd ├── h.Rd ├── hal9000.Rd ├── hal9001.Rd ├── hal_quotes.Rd ├── index_first_copy.Rd ├── make_basis_list.Rd ├── make_copy_map.Rd ├── make_design_matrix.Rd ├── make_reduced_basis_map.Rd ├── meets_basis.Rd ├── num_knots_generator.Rd ├── plus-.formula_hal9001.Rd ├── predict.SL.hal9001.Rd ├── predict.hal9001.Rd ├── print.formula_hal9001.Rd ├── print.summary.hal9001.Rd ├── quantizer.Rd ├── squash_hal_fit.Rd └── summary.hal9001.Rd ├── paper ├── paper.md └── refs.bib ├── sandbox ├── benchmarks_hal9001.Rmd ├── benchmarks_hal9001_files │ └── figure-html │ │ ├── hal_benchmark_drtmle_estimateG-1.png │ │ ├── hal_benchmark_drtmle_estimateG_SL-1.png │ │ ├── hal_benchmark_drtmle_estimateQ-1.png │ │ └── hal_microbenchmark-1.png ├── cate_hal.R ├── hal_benchmarks.Rmd ├── makeSparseMat.R ├── mangolassi_types.h ├── prof.gif ├── prof_summary.txt ├── rcpp_hal.Rmd └── toprof.R ├── src ├── .gitignore ├── Makevars ├── RcppExports.cpp ├── dedupe.cpp ├── hal9001_types.h ├── make_hal_basis.cpp ├── utils.cpp └── utils.h ├── tests ├── testthat.R └── testthat │ ├── test-basis.R │ ├── test-cv_lasso.R │ ├── test-dedupe.R │ ├── test-formula.R │ ├── test-general_families.R │ ├── test-hal_binomial.R │ ├── test-hal_comparison.R │ ├── test-hal_hazards.R │ ├── test-hal_multivariate.R │ ├── test-hal_nocv.R │ ├── test-higher_order_smoothness.R │ ├── test-lasso.R │ ├── test-make_basis_additional_args.R │ ├── test-reduce_basis_filter.R │ ├── test-screen_experimental.R │ ├── test-single_lambda.R │ ├── test-sl_ecpolley.R │ ├── test-stat_performance.R │ ├── test-summary.R │ ├── test-varselect_hal.R │ └── test-x_unpenalized.R └── vignettes └── intro_hal9001.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | # Rproj and version control. 2 | ^.*\.Rproj$ 3 | ^\.Rproj\.user$ 4 | ^\.github$ 5 | ^\.gitignore$ 6 | 7 | # Related to using README Rmd. 8 | ^README\.Rmd$ 9 | ^README\.html$ 10 | ^README-.*\.png$ 11 | ^README-refs\.bib$ 12 | # Automated testing files. 13 | ^\.travis\.yml$ 14 | ^appveyor\.yml$ 15 | ^codecov\.yml$ 16 | # Extra miscellaneous support files/directories. 17 | ^cran-comments\.md$ 18 | ^CONTRIBUTING\.md$ 19 | ^Makefile$ 20 | ^LICENSE$ 21 | ^sandbox$ 22 | ^paper$ 23 | ^docs$ 24 | ^_pkgdown\.yml$ 25 | ^CRAN-RELEASE$ 26 | ^CRAN-SUBMISSION$ 27 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | - master 6 | - devel 7 | pull_request: 8 | branches: 9 | - main 10 | - master 11 | - devel 12 | 13 | name: R-CMD-check 14 | 15 | jobs: 16 | R-CMD-check: 17 | runs-on: ${{ matrix.config.os }} 18 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 19 | 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | config: 24 | - {os: ubuntu-latest, r: 'release'} 25 | - {os: macOS-latest, r: 'release'} 26 | - {os: windows-latest, r: 'release'} 27 | 28 | env: 29 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 30 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 31 | 32 | steps: 33 | - name: Checkout repo 34 | uses: actions/checkout@v4 35 | 36 | - name: Setup R 37 | uses: r-lib/actions/setup-r@v2 38 | with: 39 | r-version: ${{ matrix.config.r }} 40 | 41 | - name: Install pandoc 42 | uses: r-lib/actions/setup-pandoc@v2 43 | 44 | - name: Install tinyTeX 45 | uses: r-lib/actions/setup-tinytex@v2 46 | 47 | - name: Install system dependencies 48 | if: runner.os == 'Linux' 49 | run: | 50 | sudo apt install -y curl libcurl4-doc libcurl4-openssl-dev 51 | 52 | - name: Install package dependencies 53 | run: | 54 | install.packages(c("remotes", "rcmdcheck", "covr", "sessioninfo", "pak")) 55 | pak::local_install_deps(ask = FALSE, dependencies = TRUE) 56 | shell: Rscript {0} 57 | 58 | - name: Check package 59 | run: | 60 | options(crayon.enabled = TRUE) 61 | rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error") 62 | shell: Rscript {0} 63 | 64 | - name: Upload code coverage 65 | if: runner.os == 'Linux' 66 | run: | 67 | covr::codecov() 68 | shell: Rscript {0} 69 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | inst/lib 2 | .Rproj.user 3 | src/*.o 4 | src/*.so 5 | src/*.dll 6 | src/symbols.rds 7 | .Rhistory 8 | .RData 9 | .Ruserdata 10 | Rplots.pdf 11 | README.html 12 | sandbox/*.html 13 | vignettes/*.html 14 | .ipynb_checkpoints 15 | 16 | ## macOS 17 | .DS_Store 18 | 19 | ## QtCreator 20 | Rcpp.pro 21 | Rcpp.pro.user 22 | *.autosave 23 | .#* 24 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to `hal9001` development 2 | 3 | We, the authors of the `hal9001` R package, use the same guide as is used for 4 | contributing to the development of the popular `tidyverse` ecosystem of R 5 | packages. This document is simply a formal re-statement of that fact. 6 | 7 | The goal of this guide is to help you get up and contributing to `hal9001` as 8 | quickly as possible. The guide is divided into two main pieces: 9 | 10 | * Filing a bug report or feature request in an issue. 11 | * Suggesting a change via a pull request. 12 | 13 | ## Issues 14 | 15 | When filing an issue, the most important thing is to include a minimal 16 | reproducible example so that we can quickly verify the problem, and then figure 17 | out how to fix it. There are three things you need to include to make your 18 | example reproducible: required packages, data, code. 19 | 20 | 1. **Packages** should be loaded at the top of the script, so it's easy to 21 | see which ones the example needs. 22 | 23 | 2. The easiest way to include **data** is to use `dput()` to generate the R 24 | code to recreate it. 25 | 26 | 3. Spend a little bit of time ensuring that your **code** is easy for others to 27 | read: 28 | 29 | * make sure you've used spaces and your variable names are concise, but 30 | informative 31 | 32 | * use comments to indicate where your problem lies 33 | 34 | * do your best to remove everything that is not related to the problem. 35 | The shorter your code is, the easier it is to understand. 36 | 37 | You can check you have actually made a reproducible example by starting up a 38 | fresh R session and pasting your script in. 39 | 40 | (Unless you've been specifically asked for it, please don't include the output 41 | of `sessionInfo()`.) 42 | 43 | ## Pull requests 44 | 45 | To contribute a change to `hal9001`, you follow these steps: 46 | 47 | 1. Create a branch in git and make your changes. 48 | 2. Push branch to GitHub and issue pull request (PR). 49 | 3. Discuss the pull request. 50 | 4. Iterate until either we accept the PR or decide that it's not a good fit for 51 | `hal9001`. 52 | 53 | Each of these steps are described in more detail below. This might feel 54 | overwhelming the first time you get set up, but it gets easier with practice. 55 | 56 | If you're not familiar with git or GitHub, please start by reading 57 | 58 | 59 | Pull requests will be evaluated against a checklist: 60 | 61 | 1. __Motivation__. Your pull request should clearly and concisely motivates the 62 | need for change. Please describe the problem your PR addresses and show 63 | how your pull request solves it as concisely as possible. 64 | 65 | Also include this motivation in `NEWS` so that when a new release of 66 | `hal9001` comes out it's easy for users to see what's changed. Add your 67 | item at the top of the file and use markdown for formatting. The 68 | news item should end with `(@yourGithubUsername, #the_issue_number)`. 69 | 70 | 2. __Only related changes__. Before you submit your pull request, please 71 | check to make sure that you haven't accidentally included any unrelated 72 | changes. These make it harder to see exactly what's changed, and to 73 | evaluate any unexpected side effects. 74 | 75 | Each PR corresponds to a git branch, so if you expect to submit 76 | multiple changes make sure to create multiple branches. If you have 77 | multiple changes that depend on each other, start with the first one 78 | and don't submit any others until the first one has been processed. 79 | 80 | 3. __Use `hal9001` coding style__. To do so, please follow the [official 81 | `tidyverse` style guide](http://style.tidyverse.org). Maintaining a 82 | consistent style across the whole code base makes it much easier to jump 83 | into the code. If you're modifying existing `hal9001` code that doesn't 84 | follow the style guide, a separate pull request to fix the style would be 85 | greatly appreciated. 86 | 87 | 4. If you're adding new parameters or a new function, you'll also need 88 | to document them with [`roxygen2`](https://github.com/klutometis/roxygen). 89 | Make sure to re-run `devtools::document()` on the code before submitting. 90 | 91 | This seems like a lot of work but don't worry if your pull request isn't 92 | perfect. It's a learning process. A pull request is a process, and unless 93 | you've submitted a few in the past it's unlikely that your pull request will be 94 | accepted as is. Please don't submit pull requests that change existing 95 | behaviour. Instead, think about how you can add a new feature in a minimally 96 | invasive way. 97 | 98 | -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 0.4.6 2 | Date: 2023-11-08 21:59:50 UTC 3 | SHA: 35009e61c892eccd4545efc4aaa02fc5449ef877 4 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: hal9001 2 | Title: The Scalable Highly Adaptive Lasso 3 | Version: 0.4.6 4 | Authors@R: c( 5 | person("Jeremy", "Coyle", email = "jeremyrcoyle@gmail.com", 6 | role = c("aut", "cre"), 7 | comment = c(ORCID = "0000-0002-9874-6649")), 8 | person("Nima", "Hejazi", email = "nh@nimahejazi.org", 9 | role = "aut", 10 | comment = c(ORCID = "0000-0002-7127-2789")), 11 | person("Rachael", "Phillips", email = "rachaelvphillips@berkeley.edu", 12 | role = "aut", comment = c(ORCID = "0000-0002-8474-591X")), 13 | person("Lars", "van der Laan", email = "vanderlaanlars@yahoo.com", 14 | role = "aut"), 15 | person("David", "Benkeser", email = "benkeser@emory.edu", 16 | role = "ctb", 17 | comment = c(ORCID = "0000-0002-1019-8343")), 18 | person("Oleg", "Sofrygin", email = "oleg.sofrygin@gmail.com", 19 | role = "ctb"), 20 | person("Weixin", "Cai", email = "wcai@berkeley.edu", 21 | role = "ctb", 22 | comment = c(ORCID = "0000-0003-2680-3066")), 23 | person("Mark", "van der Laan", email = "laan@berkeley.edu", 24 | role = c("aut", "cph", "ths"), 25 | comment = c(ORCID = "0000-0003-1432-5511")) 26 | ) 27 | Description: A scalable implementation of the highly adaptive lasso algorithm, 28 | including routines for constructing sparse matrices of basis functions of the 29 | observed data, as well as a custom implementation of Lasso regression tailored 30 | to enhance efficiency when the matrix of predictors is composed exclusively of 31 | indicator functions. For ease of use and increased flexibility, the Lasso 32 | fitting routines invoke code from the 'glmnet' package by default. The highly 33 | adaptive lasso was first formulated and described by MJ van der Laan (2017) 34 | , with practical demonstrations of its performance 35 | given by Benkeser and van der Laan (2016) . This 36 | implementation of the highly adaptive lasso algorithm was described by Hejazi, 37 | Coyle, and van der Laan (2020) . 38 | Depends: 39 | R (>= 3.1.0), 40 | Rcpp 41 | License: GPL-3 42 | URL: https://github.com/tlverse/hal9001 43 | BugReports: https://github.com/tlverse/hal9001/issues 44 | Encoding: UTF-8 45 | LazyData: true 46 | Imports: 47 | Matrix, 48 | stats, 49 | utils, 50 | methods, 51 | assertthat, 52 | origami (>= 1.0.3), 53 | glmnet, 54 | data.table, 55 | stringr 56 | Suggests: 57 | testthat, 58 | knitr, 59 | rmarkdown, 60 | microbenchmark, 61 | future, 62 | ggplot2, 63 | dplyr, 64 | tidyr, 65 | survival, 66 | SuperLearner 67 | LinkingTo: 68 | Rcpp, 69 | RcppEigen 70 | VignetteBuilder: knitr 71 | RoxygenNote: 7.3.1 72 | Roxygen: list(markdown = TRUE) 73 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | md: 2 | Rscript -e "rmarkdown::render('README.Rmd', output_file = 'README.md')" 3 | 4 | site: 5 | Rscript -e "rmarkdown::render('README.Rmd', output_file = 'README.md')" 6 | Rscript -e "pkgdown::build_site()" 7 | 8 | check: 9 | Rscript -e "devtools::check()" 10 | 11 | checkfast: 12 | Rscript -e "devtools::check(build_args = '--no-build-vignettes', run_dont_test = TRUE)" 13 | 14 | test: 15 | Rscript -e "devtools::test()" 16 | 17 | doc: 18 | Rscript -e "devtools::document()" 19 | 20 | build: 21 | Rscript -e "devtools::build()" 22 | 23 | buildfast: 24 | Rscript -e "devtools::build(vignettes = FALSE)" 25 | 26 | style: 27 | Rscript -e "styler::style_pkg()" 28 | 29 | pr: style checkfast site 30 | echo "If all checks have passed, you are ready to submit a new PR." 31 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method("+",formula_hal9001) 4 | S3method(predict,SL.hal9001) 5 | S3method(predict,hal9001) 6 | S3method(print,formula_hal9001) 7 | S3method(print,summary.hal9001) 8 | S3method(summary,hal9001) 9 | export(SL.hal9001) 10 | export(apply_copy_map) 11 | export(enumerate_basis) 12 | export(fit_hal) 13 | export(formula_hal) 14 | export(h) 15 | export(make_copy_map) 16 | export(make_design_matrix) 17 | export(make_reduced_basis_map) 18 | export(squash_hal_fit) 19 | importFrom(Matrix,tcrossprod) 20 | importFrom(Rcpp,sourceCpp) 21 | importFrom(assertthat,assert_that) 22 | importFrom(data.table,`:=`) 23 | importFrom(data.table,data.table) 24 | importFrom(data.table,rbindlist) 25 | importFrom(data.table,setorder) 26 | importFrom(glmnet,cv.glmnet) 27 | importFrom(glmnet,glmnet) 28 | importFrom(methods,is) 29 | importFrom(origami,folds2foldvec) 30 | importFrom(origami,make_folds) 31 | importFrom(stats,aggregate) 32 | importFrom(stats,as.formula) 33 | importFrom(stats,coef) 34 | importFrom(stats,median) 35 | importFrom(stats,plogis) 36 | importFrom(stats,predict) 37 | importFrom(stats,quantile) 38 | importFrom(stringr,str_detect) 39 | importFrom(stringr,str_extract) 40 | importFrom(stringr,str_match) 41 | importFrom(stringr,str_match_all) 42 | importFrom(stringr,str_remove) 43 | importFrom(stringr,str_replace) 44 | importFrom(stringr,str_split) 45 | importFrom(utils,combn) 46 | importFrom(utils,data) 47 | useDynLib(hal9001) 48 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # hal9001 0.4.6 2 | * Fixed predict method to address changes required by Matrix 1.6.2 3 | * Reduced default number of knots used for zero-order and first-order basis 4 | expansions as follows: 5 | * `base_num_knots_0` set to 20, used to be 200 6 | * `base_num_knots_1` set to 10, used to be 50 7 | 8 | # hal9001 0.4.5 9 | * Added multivariate outcome prediction 10 | 11 | # hal9001 0.4.4 12 | * Fixed bug with `prediction_bounds` (a `fit_hal` argument in `fit_control` 13 | list), which would error when it was specified as a numeric vector. Also, 14 | added a check to assert this argument is correctly specified, and tests 15 | to ensure a numeric vector of bounds is provided. 16 | * Simplified `fit_control` list arguments in `fit_hal`. Users can still specify 17 | additional arguments to `cv.glmnet` and `glmnet` in this list. 18 | * Defined `weights` as a formal argument in `fit_hal`, opposed to an optional 19 | argument in `fit_control`, to facilitate specification and avoid confusion. 20 | This increases flexibility with SuperLearner wrapper `SL.hal9001` as well; 21 | `fit_control` can now be customized with `SL.hal9001`. 22 | 23 | # hal9001 0.4.3 24 | * Version bump for CRAN resubmission following archiving. 25 | 26 | # hal9001 0.4.2 27 | * Version bump for CRAN resubmission following archiving. 28 | 29 | # hal9001 0.4.1 30 | * Minor adjustments to speed up unit tests and examples. 31 | * Version bump for CRAN resubmission. 32 | 33 | # hal9001 0.4.0 34 | 35 | As of September 2021: 36 | * Minor change to how binning is performed when `num_knots = 1`, ensuring that 37 | the minimal number of knots is chosen when `num_knots = 1`. This results in 38 | HAL agreeing with (main terms) `glmnet` when `smoothness_orders = 1` and 39 | `num_knots = 1`. 40 | * Revised formula interface with enhanced capabilities, allowing specifciation 41 | of penalization factors, smoothness_orders, and the number of knots for each 42 | variable, for every single term separately using the new `h` function. It is 43 | possible to specify, e.g., `h(X) + h(W)` which will generate and concatenate 44 | the two basis function terms. 45 | 46 | As of April 2021: 47 | * The default of `fit_hal` is now a first order smoothed HAL with binning. 48 | * Updated documentation for `formula_hal`, `fit_hal` and `predict`; and 49 | added `fit_control` and `formula_control` lists for arguments. Moved much of 50 | the text to details sections, and shortened the argument descriptions. 51 | * Updated `summary` to support higher-order HAL fit interpretations. 52 | * Added checks to `fit_hal` for missingness and dimensionality correspondence 53 | between `X`, `Y`, and `X_unpenalized`. These checks lead to quickly-produced 54 | errors, opposed to enumerating the basis list and then letting `glmnet` error 55 | on something trivial like this. 56 | * Modified formula interface in `fit_hal`, so `formula` is now provided 57 | directly to `fit_hal` and `formula_hal` is run within `fit_hal`. Due to these 58 | changes, it no longer made sense for `formula_hal` to accept `data`, so it 59 | now takes as input `X`. Also, the `formula_fit_hal` function was removed as 60 | it is no longer needed. 61 | * Support for the custom lasso procedure implemented in `Rcpp` has been 62 | discontinued. Accordingly, the `"lassi"` option and argument `fit_type` have 63 | been removed from `fit_hal`. 64 | * Re-added `lambda.min.ratio` as a `fit_control` argument to `fit_hal`. We've 65 | seen that not setting `lambda.min.ratio` in `glmnet` can lead to no `lambda` 66 | values that fit the data sufficiently well, so it seems appropriate to 67 | override the `glmnet` default. 68 | 69 | # hal9001 0.3.0 70 | 71 | As of February 2021: 72 | * Support _higher order_ HAL via the new `smoothness_orders` argument 73 | * `smoothness_orders` is a vector of length 1 or length `ncol(X)`. 74 | * If `smoothness_orders` is of length 1 then its values are recycled to form 75 | a vector of length `ncol(X)`. 76 | * Given such a vector of length `ncol(X)`, the ith element gives the level of 77 | smoothness for the variable corresponding to the ith column in `X`. 78 | * Degree-dependant binning. Higher order terms are binned more coarsely; the 79 | `num_knots` argument is a vector up to `max_degree` controlling the 80 | degree-specific binning. 81 | * Adds `formula_hal` which allows a formula specification of a HAL model. 82 | 83 | # hal9001 0.2.8 84 | 85 | As of November 2020: 86 | * Allow support for Poisson family to `glmnet()`. 87 | * Begins consideration of supporting arbitrary `stats::family()` objects to be 88 | passed through to calls to `glmnet()`. 89 | * Simplifies output of `fit_hal()` by unifying the redundant `hal_lasso` and 90 | `glmnet_lasso` slots into the new `lasso_fit` slot. 91 | * Cleans up of methods throughout and improves documentation, reducing a few 92 | redundancies for cleaner/simpler code in `summary.hal9001`. 93 | * Adds link to DOI of the published _Journal of Open Source Software_ paper in 94 | `DESCRIPTION`. 95 | 96 | # hal9001 0.2.7 97 | 98 | As of September 2020: 99 | * Adds a `summary` method for interpreting HAL regressions 100 | (https://github.com/tlverse/hal9001/pull/64). 101 | * Adds a software paper for publication in the _Journal of Open Source 102 | Software_ (https://github.com/tlverse/hal9001/pull/71). 103 | 104 | # hal9001 0.2.6 105 | 106 | As of June 2020: 107 | * Address bugs/inconsistencies reported in the prediction method when trying to 108 | specify a value of lambda not included in initial fitting. 109 | * Addresses a bug arising from a silent failure in `glmnet` in which it ignores 110 | the argument `lambda.min.ratio` when `family = "gaussian"` is not set. 111 | * Adds a short software paper for submission to JOSS. 112 | * Minor documentation updates. 113 | 114 | # hal9001 0.2.5 115 | 116 | As of March 2020 117 | * First CRAN release. 118 | -------------------------------------------------------------------------------- /R/find_dupes.R: -------------------------------------------------------------------------------- 1 | #' Build Copy Maps 2 | #' 3 | #' @param x_basis A design matrix consisting of basis (indicator) functions for 4 | #' covariates (X) and terms for interactions thereof. 5 | #' 6 | #' @export 7 | #' 8 | #' @examples 9 | #' \donttest{ 10 | #' gendata <- function(n) { 11 | #' W1 <- runif(n, -3, 3) 12 | #' W2 <- rnorm(n) 13 | #' W3 <- runif(n) 14 | #' W4 <- rnorm(n) 15 | #' g0 <- plogis(0.5 * (-0.8 * W1 + 0.39 * W2 + 0.08 * W3 - 0.12 * W4)) 16 | #' A <- rbinom(n, 1, g0) 17 | #' Q0 <- plogis(0.15 * (2 * A + 2 * A * W1 + 6 * A * W3 * W4 - 3)) 18 | #' Y <- rbinom(n, 1, Q0) 19 | #' data.frame(A, W1, W2, W3, W4, Y) 20 | #' } 21 | #' set.seed(1234) 22 | #' data <- gendata(100) 23 | #' covars <- setdiff(names(data), "Y") 24 | #' X <- as.matrix(data[, covars, drop = FALSE]) 25 | #' basis_list <- enumerate_basis(X) 26 | #' x_basis <- make_design_matrix(X, basis_list) 27 | #' copy_map <- make_copy_map(x_basis) 28 | #' } 29 | #' 30 | #' @return A \code{list} of \code{numeric} vectors indicating indices of basis 31 | #' functions that are identical in the training set. 32 | make_copy_map <- function(x_basis) { 33 | copy_indices <- index_first_copy(x_basis) 34 | copy_map <- split(seq_along(copy_indices), copy_indices) 35 | return(copy_map) 36 | } 37 | -------------------------------------------------------------------------------- /R/hal9000.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c("hal_quotes")) 2 | 3 | #' HAL 9000 Quotes 4 | #' 5 | #' Prints a quote from the HAL 9000 robot from 2001: A Space Odyssey 6 | #' 7 | #' @importFrom utils data 8 | # 9 | hal9000 <- function() { 10 | utils::data("hal_quotes", envir = environment()) 11 | 12 | # pick a HAL 9000 quote to print 13 | hal_says <- hal_quotes[sample(seq_along(hal_quotes), 1)] 14 | 15 | # special case, for David 16 | names <- Sys.info()[c(6, 7, 8)] 17 | if ("david" %in% names | "benkeser" %in% names) { 18 | hal_says <- hal_quotes[6] 19 | } 20 | print(hal_says) 21 | } 22 | -------------------------------------------------------------------------------- /R/hal9001-package.R: -------------------------------------------------------------------------------- 1 | #' hal9001 2 | #' 3 | #' Package for fitting the Highly Adaptive LASSO (HAL) estimator 4 | #' 5 | #' @name hal9001 6 | #' 7 | #' @importFrom Rcpp sourceCpp 8 | #' @useDynLib hal9001 9 | # 10 | NULL 11 | -------------------------------------------------------------------------------- /R/hal_quotes.R: -------------------------------------------------------------------------------- 1 | #' HAL9000 Quotes from "2001: A Space Odyssey" 2 | #' 3 | #' Curated selection of quotes from the HAL9000 computer, from the critically 4 | #' acclaimed epic science-fiction film "2001: A Space Odyssey" (1968). 5 | #' 6 | #' @format A vector of quotes. 7 | #' 8 | "hal_quotes" 9 | -------------------------------------------------------------------------------- /R/reduce_basis_filter.R: -------------------------------------------------------------------------------- 1 | #' Mass-based reduction of basis functions 2 | #' 3 | #' A helper function that finds which basis functions to keep (and equivalently 4 | #' which to discard) based on the proportion of 1's (observations, i.e., 5 | #' "mass") included in a given basis function. 6 | #' 7 | #' @param x_basis A matrix of basis functions with all redundant basis 8 | #' functions already removed. 9 | #' @param reduce_basis_crit A scalar \code{numeric} value bounded in the open 10 | #' interval (0,1) indicating the minimum proportion of 1's in a basis function 11 | #' column needed for the basis function to be included in the procedure to fit 12 | #' the Lasso. Any basis functions with a lower proportion of 1's than the 13 | #' specified cutoff will be removed. This argument defaults to \code{NULL}, in 14 | #' which case all basis functions are used in the lasso-fitting stage of the 15 | #' HAL algorithm. 16 | #' 17 | #' @return A binary \code{numeric} vector indicating which columns of the 18 | #' matrix of basis functions to keep (given a one) and which to discard (given 19 | #' a zero). 20 | #' 21 | #' @importFrom assertthat assert_that 22 | #' 23 | #' @export 24 | make_reduced_basis_map <- function(x_basis, reduce_basis_crit) { 25 | # check that the provided option is a proportion 26 | assertthat::assert_that(reduce_basis_crit < 1 && reduce_basis_crit > 0) 27 | 28 | # filter over the matrix of basis functions 29 | basis_filled_prop <- calc_pnz(x_basis) 30 | reduced_basis_col_ind <- which(basis_filled_prop > reduce_basis_crit) 31 | return(reduced_basis_col_ind) 32 | } 33 | -------------------------------------------------------------------------------- /R/sl_hal9001.R: -------------------------------------------------------------------------------- 1 | #' Wrapper for Classic SuperLearner 2 | #' 3 | #' Wrapper for \pkg{SuperLearner} for objects of class \code{hal9001} 4 | #' 5 | #' @param Y A \code{numeric} vector of observations of the outcome variable. 6 | #' @param X An input \code{matrix} with dimensions number of observations -by- 7 | #' number of covariates that will be used to derive the design matrix of basis 8 | #' functions. 9 | #' @param newX A matrix of new observations on which to obtain predictions. The 10 | #' default of \code{NULL} computes predictions on training inputs \code{X}. 11 | #' @param family A \code{\link[stats]{family}} object (one that is supported 12 | #' by \code{\link[glmnet]{glmnet}}) specifying the error/link family for a 13 | #' generalized linear model. 14 | #' @param obsWeights A \code{numeric} vector of observational-level weights. 15 | #' @param id A \code{numeric} vector of IDs. 16 | #' @param max_degree The highest order of interaction terms for which basis 17 | #' functions ought to be generated. 18 | #' @param smoothness_orders An \code{integer} vector of length 1 or greater, 19 | #' specifying the smoothness of the basis functions. See the argument 20 | #' \code{smoothness_orders} of \code{\link{fit_hal}} for more information. 21 | #' @param num_knots An \code{integer} vector of length 1 or \code{max_degree}, 22 | #' specifying the maximum number of knot points (i.e., bins) for each 23 | #' covariate for generating basis functions. See \code{num_knots} argument in 24 | #' \code{\link{fit_hal}} for more information. 25 | #' @param ... Additional arguments to \code{\link{fit_hal}}. 26 | #' 27 | #' @importFrom stats predict 28 | #' 29 | #' @export 30 | #' 31 | #' @return An object of class \code{SL.hal9001} with a fitted \code{hal9001} 32 | #' object and corresponding predictions based on the input data. 33 | SL.hal9001 <- function(Y, 34 | X, 35 | newX, 36 | family, 37 | obsWeights, 38 | id, 39 | max_degree = 2, 40 | smoothness_orders = 1, 41 | num_knots = 5, 42 | ...) { 43 | # create matrix version of X and newX for use with hal9001::fit_hal 44 | if (!is.matrix(X)) X <- as.matrix(X) 45 | if (!is.null(newX) & !is.matrix(newX)) newX <- as.matrix(newX) 46 | 47 | # fit hal 48 | hal_fit <- hal9001::fit_hal( 49 | Y = Y, X = X, family = family$family, weights = obsWeights, id = id, 50 | max_degree = max_degree, smoothness_orders = smoothness_orders, 51 | num_knots = num_knots, ... 52 | ) 53 | 54 | # compute predictions based on `newX` or input `X` 55 | if (!is.null(newX)) { 56 | pred <- stats::predict(hal_fit, new_data = newX) 57 | } else { 58 | pred <- stats::predict(hal_fit, new_data = X) 59 | } 60 | 61 | # build output object 62 | fit <- list(object = hal_fit) 63 | class(fit) <- "SL.hal9001" 64 | out <- list(pred = pred, fit = fit) 65 | return(out) 66 | } 67 | 68 | ############################################################################### 69 | 70 | #' predict.SL.hal9001 71 | #' 72 | #' Predict method for objects of class \code{SL.hal9001} 73 | #' 74 | #' @param object A fitted object of class \code{hal9001}. 75 | #' @param newdata A matrix of new observations on which to obtain predictions. 76 | #' @param ... Not used. 77 | #' 78 | #' @importFrom stats predict 79 | #' 80 | #' @export 81 | #' 82 | #' @return A \code{numeric} vector of predictions from a \code{SL.hal9001} 83 | #' object based on the provide \code{newdata}. 84 | predict.SL.hal9001 <- function(object, newdata, ...) { 85 | # coerce newdata to matrix if not already so 86 | if (!is.matrix(newdata)) newdata <- as.matrix(newdata) 87 | 88 | # generate predictions and return 89 | pred <- stats::predict(object$object, new_data = newdata) 90 | return(pred) 91 | } 92 | -------------------------------------------------------------------------------- /R/squash_hal.R: -------------------------------------------------------------------------------- 1 | #' Squash HAL objects 2 | #' 3 | #' Reduce footprint by dropping basis functions with coefficients of zero 4 | #' 5 | #' @param object An object of class \code{hal9001}, containing the results of 6 | #' fitting the Highly Adaptive LASSO, as produced by a call to \code{fit_hal}. 7 | #' 8 | #' @importFrom methods is 9 | #' @importFrom assertthat assert_that 10 | #' 11 | #' @return Object of class \code{hal9001}, similar to the input object but 12 | #' reduced such that coefficients belonging to bases with coefficients equal 13 | #' to zero removed. 14 | #' 15 | #' @export 16 | #' 17 | #' @examples 18 | #' \donttest{ 19 | #' # generate simple test data 20 | #' n <- 100 21 | #' p <- 3 22 | #' x <- matrix(rnorm(n * p), n, p) 23 | #' y <- sin(x[, 1]) * sin(x[, 2]) + rnorm(n, mean = 0, sd = 0.2) 24 | #' 25 | #' # fit HAL model and squash resulting object to reduce footprint 26 | #' hal_fit <- fit_hal(X = x, Y = y, yolo = FALSE) 27 | #' squashed <- squash_hal_fit(hal_fit) 28 | #' } 29 | squash_hal_fit <- function(object) { 30 | assertthat::assert_that(is(object, "hal9001")) 31 | 32 | # find indices for basis functions with non-zero coefficients 33 | nz_coefs <- which(as.vector(object$coefs)[-1] != 0) 34 | new_coefs <- object$coefs[c(1, nz_coefs + 1)] 35 | 36 | # extract all basis functions belonging to any group with nonzero coefficient 37 | nz_basis_groups <- object$copy_map[nz_coefs] 38 | all_nz_basis_index <- sort(unlist(nz_basis_groups)) 39 | new_basis <- object$basis_list[all_nz_basis_index] 40 | 41 | # now, reindex and rekey the copy_map 42 | new_copy_map <- lapply(nz_basis_groups, match, all_nz_basis_index) 43 | new_keys <- sapply(new_copy_map, `[[`, 1) 44 | names(new_copy_map) <- new_keys 45 | 46 | # create fit object 47 | fit <- list( 48 | basis_list = new_basis, 49 | copy_map = new_copy_map, 50 | coefs = as.matrix(new_coefs), 51 | times = object$times, 52 | lambda_star = object$lambda_star, 53 | prediction_bounds = object$prediction_bounds, 54 | family = object$family, 55 | unpenalized_covariates = object$unpenalized_covariates, 56 | p_reserve = object$p_reserve 57 | ) 58 | class(fit) <- "hal9001" 59 | return(fit) 60 | } 61 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onAttach <- function(...) { 2 | packageStartupMessage(paste0( 3 | "hal9001 v", 4 | utils::packageDescription("hal9001")$Version, 5 | ": The Scalable Highly Adaptive Lasso\n", 6 | "note: fit_hal defaults have changed. See ?fit_hal for details" 7 | )) 8 | } 9 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | rmarkdown::github_document 4 | bibliography: "inst/REFERENCES.bib" 5 | --- 6 | 7 | 8 | 9 | ```{r, echo = FALSE} 10 | knitr::opts_chunk$set( 11 | collapse = TRUE, 12 | comment = "#>", 13 | fig.path = "README-" 14 | ) 15 | ``` 16 | 17 | # R/`hal9001` 18 | 19 | [![R-CMD-check](https://github.com/tlverse/hal9001/workflows/R-CMD-check/badge.svg)](https://github.com/tlverse/hal9001/actions) 20 | [![Coverage Status](https://codecov.io/gh/tlverse/hal9001/branch/master/graph/badge.svg)](https://app.codecov.io/gh/tlverse/hal9001) 21 | [![CRAN](https://www.r-pkg.org/badges/version/hal9001)](https://www.r-pkg.org/pkg/hal9001) 22 | [![CRAN downloads](https://cranlogs.r-pkg.org/badges/hal9001)](https://CRAN.R-project.org/package=hal9001) 23 | [![CRAN total downloads](http://cranlogs.r-pkg.org/badges/grand-total/hal9001)](https://CRAN.R-project.org/package=hal9001) 24 | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) 25 | [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](http://www.gnu.org/licenses/gpl-3.0) 26 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3558313.svg)](https://doi.org/10.5281/zenodo.3558313) 27 | [![DOI](https://joss.theoj.org/papers/10.21105/joss.02526/status.svg)](https://doi.org/10.21105/joss.02526) 28 | 29 | > The _Scalable_ Highly Adaptive Lasso 30 | 31 | __Authors:__ [Jeremy Coyle](https://github.com/tlverse), [Nima 32 | Hejazi](https://nimahejazi.org), [Rachael 33 | Phillips](https://github.com/rachaelvp), [Lars van der 34 | Laan](https://github.com/Larsvanderlaan), and [Mark van der 35 | Laan](https://vanderlaan-lab.org/) 36 | 37 | --- 38 | 39 | ## What's `hal9001`? 40 | 41 | `hal9001` is an R package providing an implementation of the scalable _highly 42 | adaptive lasso_ (HAL), a nonparametric regression estimator that applies 43 | L1-regularized lasso regression to a design matrix composed of indicator 44 | functions corresponding to the support of the functional over a set of 45 | covariates and interactions thereof. HAL regression allows for arbitrarily 46 | complex functional forms to be estimated at fast (near-parametric) convergence 47 | rates under only global smoothness assumptions [@vdl2017generally; 48 | @bibaut2019fast]. For detailed theoretical discussions of the highly adaptive 49 | lasso estimator, consider consulting, for example, @vdl2017generally, 50 | @vdl2017finite, and @vdl2017uniform. For a computational demonstration of the 51 | versatility of HAL regression, see @benkeser2016hal. Recent theoretical works 52 | have demonstrated success in building efficient estimators of complex 53 | parameters when particular variations of HAL regression are used to estimate 54 | nuisance parameters [e.g., @vdl2019efficient; @ertefaie2020nonparametric]. 55 | 56 | --- 57 | 58 | ## Installation 59 | 60 | For standard use, we recommend installing the package from 61 | [CRAN](https://CRAN.R-project.org/package=hal9001) via 62 | 63 | ```{r cran-installation, eval = FALSE} 64 | install.packages("hal9001") 65 | ``` 66 | 67 | To contribute, install the _development version_ of `hal9001` from GitHub via 68 | [`remotes`](https://CRAN.R-project.org/package=remotes): 69 | 70 | ```{r gh-master-installation, eval = FALSE} 71 | remotes::install_github("tlverse/hal9001") 72 | ``` 73 | 74 | --- 75 | 76 | ## Issues 77 | 78 | If you encounter any bugs or have any specific feature requests, please [file an 79 | issue](https://github.com/tlverse/hal9001/issues). 80 | 81 | --- 82 | 83 | ## Example 84 | 85 | Consider the following minimal example in using `hal9001` to generate 86 | predictions via Highly Adaptive Lasso regression: 87 | 88 | ```{r example} 89 | # load the package and set a seed 90 | library(hal9001) 91 | set.seed(385971) 92 | 93 | # simulate data 94 | n <- 100 95 | p <- 3 96 | x <- matrix(rnorm(n * p), n, p) 97 | y <- x[, 1] * sin(x[, 2]) + rnorm(n, mean = 0, sd = 0.2) 98 | 99 | # fit the HAL regression 100 | hal_fit <- fit_hal(X = x, Y = y, yolo = TRUE) 101 | hal_fit$times 102 | 103 | # training sample prediction 104 | preds <- predict(hal_fit, new_data = x) 105 | mean(hal_mse <- (preds - y)^2) 106 | ``` 107 | 108 | --- 109 | 110 | ## Contributions 111 | 112 | Contributions are very welcome. Interested contributors should consult our 113 | [contribution 114 | guidelines](https://github.com/tlverse/hal9001/blob/master/CONTRIBUTING.md) 115 | prior to submitting a pull request. 116 | 117 | --- 118 | 119 | ## Citation 120 | 121 | After using the `hal9001` R package, please cite both of the following: 122 | 123 | @software{coyle2022hal9001-rpkg, 124 | author = {Coyle, Jeremy R and Hejazi, Nima S and Phillips, Rachael V 125 | and {van der Laan}, Lars and {van der Laan}, Mark J}, 126 | title = {{hal9001}: The scalable highly adaptive lasso}, 127 | year = {2022}, 128 | url = {https://doi.org/10.5281/zenodo.3558313}, 129 | doi = {10.5281/zenodo.3558313} 130 | note = {{R} package version 0.4.2} 131 | } 132 | 133 | @article{hejazi2020hal9001-joss, 134 | author = {Hejazi, Nima S and Coyle, Jeremy R and {van der Laan}, Mark 135 | J}, 136 | title = {{hal9001}: Scalable highly adaptive lasso regression in 137 | {R}}, 138 | year = {2020}, 139 | url = {https://doi.org/10.21105/joss.02526}, 140 | doi = {10.21105/joss.02526}, 141 | journal = {Journal of Open Source Software}, 142 | publisher = {The Open Journal} 143 | } 144 | 145 | --- 146 | 147 | ## License 148 | 149 | © 2017-2024 [Jeremy Coyle](https://github.com/tlverse) and [Nima 150 | Hejazi](https://nimahejazi.org) 151 | 152 | The contents of this repository are distributed under the GPL-3 license. See 153 | file `LICENSE` for details. 154 | 155 | --- 156 | 157 | ## References 158 | 159 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | title: hal9001 2 | url: https://tlverse.org/hal9001 3 | template: 4 | params: 5 | bootswatch: flatly 6 | ganalytics: UA-115145808-1 7 | navbar: 8 | title: "" 9 | left: 10 | - text: "tlverse" 11 | href: https://tlverse.org 12 | - text: "Overview" 13 | href: "articles/intro_hal9001.html" 14 | - text: "Articles" 15 | href: "articles/index.html" 16 | - text: "Reference" 17 | href: "reference/index.html" 18 | right: 19 | - icon: fa-github fa-lg 20 | href: https://github.com/tlverse/hal9001 21 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Test environments 2 | * ubuntu 20.04 (local + GitHub Actions), R 4.1.1 3 | * macOS 10.15 (local + GitHub Actions), R 4.1.1 4 | * windows 2019 (on GitHub Actions), R 4.1.1 5 | 6 | ## R CMD check results 7 | There were no ERRORs or WARNINGs. 8 | * There was 1 NOTE: 9 | installed size is 8.2Mb 10 | sub-directories of 1Mb or more: 11 | libs 8.0Mb 12 | 13 | ## Downstream dependencies 14 | * The `haldensify` and `txshift` packages rely upon this package. 15 | 16 | ## Additional notes 17 | * This package was recently identified as being among a set of packages that 18 | "have inst/CITATION files with persons using the deprecated 'first' 19 | or 'middle' arguments instead of 'given'...Can you please change to use 20 | 'given' instead?" We have updated the inst/CITATION file accordingly. 21 | -------------------------------------------------------------------------------- /data/hal_quotes.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/data/hal_quotes.rda -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Page not found (404) • hal9001 9 | 10 | 11 | 12 | 13 | 14 | 15 | 25 | 26 | 27 | 28 | 29 |
30 |
73 | 74 | 75 | 76 | 77 |
78 |
79 | 82 | 83 | Content not found. Please use links in the navbar. 84 | 85 |
86 | 87 | 91 | 92 |
93 | 94 | 95 | 96 |
100 | 101 |
102 |

103 |

Site built with pkgdown 2.1.1.

104 |
105 | 106 |
107 |
108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | Articles • hal9001 12 | 13 | 14 |
15 |
52 | 53 | 54 | 55 |
56 |
57 | 60 | 61 |
62 |

All vignettes

63 |

64 | 65 |
Fitting the Highly Adaptive Lasso with `hal9001`
66 |
67 |
68 |
69 |
70 | 71 | 72 |
75 | 76 |
77 |

Site built with pkgdown 2.1.1.

78 |
79 | 80 |
81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /docs/articles/intro_hal9001_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-3-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-3-2.png -------------------------------------------------------------------------------- /docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-3-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-3-3.png -------------------------------------------------------------------------------- /docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-3-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-3-4.png -------------------------------------------------------------------------------- /docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-5-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-5-2.png -------------------------------------------------------------------------------- /docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-5-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/docs/articles/intro_hal9001_files/figure-html/unnamed-chunk-5-3.png -------------------------------------------------------------------------------- /docs/articles/intro_hal9001_files/header-attrs-2.7.2/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ 7 | 8 | /* All levels of nav */ 9 | nav[data-toggle='toc'] .nav > li > a { 10 | display: block; 11 | padding: 4px 20px; 12 | font-size: 13px; 13 | font-weight: 500; 14 | color: #767676; 15 | } 16 | nav[data-toggle='toc'] .nav > li > a:hover, 17 | nav[data-toggle='toc'] .nav > li > a:focus { 18 | padding-left: 19px; 19 | color: #563d7c; 20 | text-decoration: none; 21 | background-color: transparent; 22 | border-left: 1px solid #563d7c; 23 | } 24 | nav[data-toggle='toc'] .nav > .active > a, 25 | nav[data-toggle='toc'] .nav > .active:hover > a, 26 | nav[data-toggle='toc'] .nav > .active:focus > a { 27 | padding-left: 18px; 28 | font-weight: bold; 29 | color: #563d7c; 30 | background-color: transparent; 31 | border-left: 2px solid #563d7c; 32 | } 33 | 34 | /* Nav: second level (shown on .active) */ 35 | nav[data-toggle='toc'] .nav .nav { 36 | display: none; /* Hide by default, but at >768px, show it */ 37 | padding-bottom: 10px; 38 | } 39 | nav[data-toggle='toc'] .nav .nav > li > a { 40 | padding-top: 1px; 41 | padding-bottom: 1px; 42 | padding-left: 30px; 43 | font-size: 12px; 44 | font-weight: normal; 45 | } 46 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 47 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 48 | padding-left: 29px; 49 | } 50 | nav[data-toggle='toc'] .nav .nav > .active > a, 51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 53 | padding-left: 28px; 54 | font-weight: 500; 55 | } 56 | 57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ 58 | nav[data-toggle='toc'] .nav > .active > ul { 59 | display: block; 60 | } 61 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | (function() { 6 | 'use strict'; 7 | 8 | window.Toc = { 9 | helpers: { 10 | // return all matching elements in the set, or their descendants 11 | findOrFilter: function($el, selector) { 12 | // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ 13 | // http://stackoverflow.com/a/12731439/358804 14 | var $descendants = $el.find(selector); 15 | return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); 16 | }, 17 | 18 | generateUniqueIdBase: function(el) { 19 | var text = $(el).text(); 20 | var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); 21 | return anchor || el.tagName.toLowerCase(); 22 | }, 23 | 24 | generateUniqueId: function(el) { 25 | var anchorBase = this.generateUniqueIdBase(el); 26 | for (var i = 0; ; i++) { 27 | var anchor = anchorBase; 28 | if (i > 0) { 29 | // add suffix 30 | anchor += '-' + i; 31 | } 32 | // check if ID already exists 33 | if (!document.getElementById(anchor)) { 34 | return anchor; 35 | } 36 | } 37 | }, 38 | 39 | generateAnchor: function(el) { 40 | if (el.id) { 41 | return el.id; 42 | } else { 43 | var anchor = this.generateUniqueId(el); 44 | el.id = anchor; 45 | return anchor; 46 | } 47 | }, 48 | 49 | createNavList: function() { 50 | return $(''); 51 | }, 52 | 53 | createChildNavList: function($parent) { 54 | var $childList = this.createNavList(); 55 | $parent.append($childList); 56 | return $childList; 57 | }, 58 | 59 | generateNavEl: function(anchor, text) { 60 | var $a = $(''); 61 | $a.attr('href', '#' + anchor); 62 | $a.text(text); 63 | var $li = $('
  • '); 64 | $li.append($a); 65 | return $li; 66 | }, 67 | 68 | generateNavItem: function(headingEl) { 69 | var anchor = this.generateAnchor(headingEl); 70 | var $heading = $(headingEl); 71 | var text = $heading.data('toc-text') || $heading.text(); 72 | return this.generateNavEl(anchor, text); 73 | }, 74 | 75 | // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). 76 | getTopLevel: function($scope) { 77 | for (var i = 1; i <= 6; i++) { 78 | var $headings = this.findOrFilter($scope, 'h' + i); 79 | if ($headings.length > 1) { 80 | return i; 81 | } 82 | } 83 | 84 | return 1; 85 | }, 86 | 87 | // returns the elements for the top level, and the next below it 88 | getHeadings: function($scope, topLevel) { 89 | var topSelector = 'h' + topLevel; 90 | 91 | var secondaryLevel = topLevel + 1; 92 | var secondarySelector = 'h' + secondaryLevel; 93 | 94 | return this.findOrFilter($scope, topSelector + ',' + secondarySelector); 95 | }, 96 | 97 | getNavLevel: function(el) { 98 | return parseInt(el.tagName.charAt(1), 10); 99 | }, 100 | 101 | populateNav: function($topContext, topLevel, $headings) { 102 | var $context = $topContext; 103 | var $prevNav; 104 | 105 | var helpers = this; 106 | $headings.each(function(i, el) { 107 | var $newNav = helpers.generateNavItem(el); 108 | var navLevel = helpers.getNavLevel(el); 109 | 110 | // determine the proper $context 111 | if (navLevel === topLevel) { 112 | // use top level 113 | $context = $topContext; 114 | } else if ($prevNav && $context === $topContext) { 115 | // create a new level of the tree and switch to it 116 | $context = helpers.createChildNavList($prevNav); 117 | } // else use the current $context 118 | 119 | $context.append($newNav); 120 | 121 | $prevNav = $newNav; 122 | }); 123 | }, 124 | 125 | parseOps: function(arg) { 126 | var opts; 127 | if (arg.jquery) { 128 | opts = { 129 | $nav: arg 130 | }; 131 | } else { 132 | opts = arg; 133 | } 134 | opts.$scope = opts.$scope || $(document.body); 135 | return opts; 136 | } 137 | }, 138 | 139 | // accepts a jQuery object, or an options object 140 | init: function(opts) { 141 | opts = this.helpers.parseOps(opts); 142 | 143 | // ensure that the data attribute is in place for styling 144 | opts.$nav.attr('data-toggle', 'toc'); 145 | 146 | var $topContext = this.helpers.createChildNavList(opts.$nav); 147 | var topLevel = this.helpers.getTopLevel(opts.$scope); 148 | var $headings = this.helpers.getHeadings(opts.$scope, topLevel); 149 | this.helpers.populateNav($topContext, topLevel, $headings); 150 | } 151 | }; 152 | 153 | $(function() { 154 | $('nav[data-toggle="toc"]').each(function(i, el) { 155 | var $nav = $(el); 156 | Toc.init($nav); 157 | }); 158 | }); 159 | })(); 160 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('.navbar-fixed-top').headroom(); 6 | 7 | $('body').css('padding-top', $('.navbar').height() + 10); 8 | $(window).resize(function(){ 9 | $('body').css('padding-top', $('.navbar').height() + 10); 10 | }); 11 | 12 | $('[data-toggle="tooltip"]').tooltip(); 13 | 14 | var cur_path = paths(location.pathname); 15 | var links = $("#navbar ul li a"); 16 | var max_length = -1; 17 | var pos = -1; 18 | for (var i = 0; i < links.length; i++) { 19 | if (links[i].getAttribute("href") === "#") 20 | continue; 21 | // Ignore external links 22 | if (links[i].host !== location.host) 23 | continue; 24 | 25 | var nav_path = paths(links[i].pathname); 26 | 27 | var length = prefix_length(nav_path, cur_path); 28 | if (length > max_length) { 29 | max_length = length; 30 | pos = i; 31 | } 32 | } 33 | 34 | // Add class to parent
  • , and enclosing
  • if in dropdown 35 | if (pos >= 0) { 36 | var menu_anchor = $(links[pos]); 37 | menu_anchor.parent().addClass("active"); 38 | menu_anchor.closest("li.dropdown").addClass("active"); 39 | } 40 | }); 41 | 42 | function paths(pathname) { 43 | var pieces = pathname.split("/"); 44 | pieces.shift(); // always starts with / 45 | 46 | var end = pieces[pieces.length - 1]; 47 | if (end === "index.html" || end === "") 48 | pieces.pop(); 49 | return(pieces); 50 | } 51 | 52 | // Returns -1 if not found 53 | function prefix_length(needle, haystack) { 54 | if (needle.length > haystack.length) 55 | return(-1); 56 | 57 | // Special case for length-0 haystack, since for loop won't run 58 | if (haystack.length === 0) { 59 | return(needle.length === 0 ? 0 : -1); 60 | } 61 | 62 | for (var i = 0; i < haystack.length; i++) { 63 | if (needle[i] != haystack[i]) 64 | return(i); 65 | } 66 | 67 | return(haystack.length); 68 | } 69 | 70 | /* Clipboard --------------------------*/ 71 | 72 | function changeTooltipMessage(element, msg) { 73 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 74 | element.setAttribute('data-original-title', msg); 75 | $(element).tooltip('show'); 76 | element.setAttribute('data-original-title', tooltipOriginalTitle); 77 | } 78 | 79 | if(ClipboardJS.isSupported()) { 80 | $(document).ready(function() { 81 | var copyButton = ""; 82 | 83 | $("div.sourceCode").addClass("hasCopyButton"); 84 | 85 | // Insert copy buttons: 86 | $(copyButton).prependTo(".hasCopyButton"); 87 | 88 | // Initialize tooltips: 89 | $('.btn-copy-ex').tooltip({container: 'body'}); 90 | 91 | // Initialize clipboard: 92 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { 93 | text: function(trigger) { 94 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); 95 | } 96 | }); 97 | 98 | clipboardBtnCopies.on('success', function(e) { 99 | changeTooltipMessage(e.trigger, 'Copied!'); 100 | e.clearSelection(); 101 | }); 102 | 103 | clipboardBtnCopies.on('error', function() { 104 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 105 | }); 106 | }); 107 | } 108 | })(window.jQuery || window.$) 109 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: '3.5' 2 | pkgdown: 2.1.1 3 | pkgdown_sha: ~ 4 | articles: 5 | intro_hal9001: intro_hal9001.html 6 | last_built: 2024-11-19T19:38Z 7 | urls: 8 | reference: https://tlverse.org/hal9001/reference 9 | article: https://tlverse.org/hal9001/articles 10 | -------------------------------------------------------------------------------- /docs/reference/Rplot001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/docs/reference/Rplot001.png -------------------------------------------------------------------------------- /docs/reference/calc_pnz.html: -------------------------------------------------------------------------------- 1 | 2 | Calculate Proportion of Nonzero Entries — calc_pnz • hal9001 12 | 13 | 14 |
    15 |
    52 | 53 | 54 | 55 |
    56 |
    57 | 62 | 63 |
    64 |

    Calculate Proportion of Nonzero Entries

    65 |
    66 | 67 |
    68 |
    calc_pnz(X)
    69 |
    70 | 71 | 72 |
    73 | 76 |
    77 | 78 | 79 |
    82 | 83 |
    84 |

    Site built with pkgdown 2.1.1.

    85 |
    86 | 87 |
    88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /docs/reference/hal9000.html: -------------------------------------------------------------------------------- 1 | 2 | HAL 9000 Quotes — hal9000 • hal9001 12 | 13 | 14 |
    15 |
    52 | 53 | 54 | 55 |
    56 |
    57 | 62 | 63 |
    64 |

    Prints a quote from the HAL 9000 robot from 2001: A Space Odyssey

    65 |
    66 | 67 |
    68 |
    hal9000()
    69 |
    70 | 71 | 72 |
    73 | 76 |
    77 | 78 | 79 |
    82 | 83 |
    84 |

    Site built with pkgdown 2.1.1.

    85 |
    86 | 87 |
    88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /docs/reference/hal9001.html: -------------------------------------------------------------------------------- 1 | 2 | hal9001 — hal9001 • hal9001 12 | 13 | 14 |
    15 |
    52 | 53 | 54 | 55 |
    56 |
    57 | 62 | 63 |
    64 |

    Package for fitting the Highly Adaptive LASSO (HAL) estimator

    65 |
    66 | 67 | 68 | 69 |
    70 | 73 |
    74 | 75 | 76 |
    79 | 80 |
    81 |

    Site built with pkgdown 2.1.1.

    82 |
    83 | 84 |
    85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /docs/reference/hal_quotes.html: -------------------------------------------------------------------------------- 1 | 2 | HAL9000 Quotes from "2001: A Space Odyssey" — hal_quotes • hal9001 13 | 14 | 15 |
    16 |
    53 | 54 | 55 | 56 |
    57 |
    58 | 63 | 64 |
    65 |

    Curated selection of quotes from the HAL9000 computer, from the critically 66 | acclaimed epic science-fiction film "2001: A Space Odyssey" (1968).

    67 |
    68 | 69 |
    70 |
    hal_quotes
    71 |
    72 | 73 |
    74 |

    Format

    75 |

    A vector of quotes.

    76 |
    77 | 78 |
    79 | 82 |
    83 | 84 | 85 |
    88 | 89 |
    90 |

    Site built with pkgdown 2.1.1.

    91 |
    92 | 93 |
    94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /docs/reference/lassi_fit_module.html: -------------------------------------------------------------------------------- 1 | 2 | Rcpp module: lassi_fit_module — lassi_fit_module • hal9001 12 | 13 | 14 |
    15 |
    52 | 53 | 54 | 55 |
    56 |
    57 | 62 | 63 |
    64 |

    Rcpp module: lassi_fit_module

    65 |
    66 | 67 | 68 | 69 |
    70 | 73 |
    74 | 75 | 76 |
    79 | 80 |
    81 |

    Site built with pkgdown 2.0.3.

    82 |
    83 | 84 |
    85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | https://tlverse.org/hal9001/404.html 3 | https://tlverse.org/hal9001/CONTRIBUTING.html 4 | https://tlverse.org/hal9001/LICENSE-text.html 5 | https://tlverse.org/hal9001/articles/index.html 6 | https://tlverse.org/hal9001/articles/intro_hal9001.html 7 | https://tlverse.org/hal9001/authors.html 8 | https://tlverse.org/hal9001/index.html 9 | https://tlverse.org/hal9001/news/index.html 10 | https://tlverse.org/hal9001/reference/SL.hal9001.html 11 | https://tlverse.org/hal9001/reference/apply_copy_map.html 12 | https://tlverse.org/hal9001/reference/as_dgCMatrix.html 13 | https://tlverse.org/hal9001/reference/basis_list_cols.html 14 | https://tlverse.org/hal9001/reference/basis_of_degree.html 15 | https://tlverse.org/hal9001/reference/calc_pnz.html 16 | https://tlverse.org/hal9001/reference/calc_xscale.html 17 | https://tlverse.org/hal9001/reference/cv_lasso.html 18 | https://tlverse.org/hal9001/reference/cv_lasso_early_stopping.html 19 | https://tlverse.org/hal9001/reference/enumerate_basis.html 20 | https://tlverse.org/hal9001/reference/enumerate_edge_basis.html 21 | https://tlverse.org/hal9001/reference/evaluate_basis.html 22 | https://tlverse.org/hal9001/reference/fit_hal.html 23 | https://tlverse.org/hal9001/reference/formula_hal.html 24 | https://tlverse.org/hal9001/reference/formula_helpers.html 25 | https://tlverse.org/hal9001/reference/generate_all_rules.html 26 | https://tlverse.org/hal9001/reference/h.html 27 | https://tlverse.org/hal9001/reference/hal9000.html 28 | https://tlverse.org/hal9001/reference/hal9001.html 29 | https://tlverse.org/hal9001/reference/hal_quotes.html 30 | https://tlverse.org/hal9001/reference/index.html 31 | https://tlverse.org/hal9001/reference/index_first_copy.html 32 | https://tlverse.org/hal9001/reference/lassi.html 33 | https://tlverse.org/hal9001/reference/lassi_fit_module.html 34 | https://tlverse.org/hal9001/reference/lassi_origami.html 35 | https://tlverse.org/hal9001/reference/lassi_predict.html 36 | https://tlverse.org/hal9001/reference/make_basis_list.html 37 | https://tlverse.org/hal9001/reference/make_copy_map.html 38 | https://tlverse.org/hal9001/reference/make_design_matrix.html 39 | https://tlverse.org/hal9001/reference/make_reduced_basis_map.html 40 | https://tlverse.org/hal9001/reference/meets_basis.html 41 | https://tlverse.org/hal9001/reference/num_knots_generator.html 42 | https://tlverse.org/hal9001/reference/plus-.formula_hal9001.html 43 | https://tlverse.org/hal9001/reference/predict.SL.hal9001.html 44 | https://tlverse.org/hal9001/reference/predict.hal9001.html 45 | https://tlverse.org/hal9001/reference/predict.lassi.html 46 | https://tlverse.org/hal9001/reference/print.formula_hal9001.html 47 | https://tlverse.org/hal9001/reference/print.summary.hal9001.html 48 | https://tlverse.org/hal9001/reference/quantizer.html 49 | https://tlverse.org/hal9001/reference/squash_hal_fit.html 50 | https://tlverse.org/hal9001/reference/summary.hal9001.html 51 | 52 | 53 | -------------------------------------------------------------------------------- /hal9001.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("Please cite the following works when using the 'hal9001' software 2 | package, including both the software tool and any articles 3 | describing the software package and/or statistical methodology.") 4 | 5 | year <- format(Sys.Date(), "%Y") 6 | note <- sprintf("R package version %s", meta$Version) 7 | 8 | bibentry(bibtype = "Manual", 9 | title = "{hal9001}: The scalable highly adaptive lasso", 10 | author = c(person("Jeremy R", "Coyle"), 11 | person("Nima S", "Hejazi"), 12 | person("Rachael V", "Phillips"), 13 | person("Lars WP", "van der Laan"), 14 | person("Mark J", "van der Laan")), 15 | year = year, 16 | note = note, 17 | doi = "10.5281/zenodo.3558313", 18 | url = "https://github.com/tlverse/hal9001") 19 | 20 | bibentry(bibtype = "Article", 21 | title = "{hal9001}: Scalable highly adaptive lasso regression in {R}", 22 | author = c(person("Nima S", "Hejazi"), 23 | person("Jeremy R", "Coyle"), 24 | person("Mark J", "van der Laan")), 25 | year = "2020", 26 | journal = "Journal of Open Source Software", 27 | publisher = "The Open Journal", 28 | doi = "10.21105/joss.02526", 29 | url = "https://doi.org/10.21105/joss.02526") 30 | -------------------------------------------------------------------------------- /inst/REFERENCES.bib: -------------------------------------------------------------------------------- 1 | @inproceedings{benkeser2016hal, 2 | doi = {10.1109/dsaa.2016.93}, 3 | url = {https://doi.org/10.1109/dsaa.2016.93}, 4 | year = {2016}, 5 | publisher = {{IEEE}}, 6 | author = {Benkeser, David and {van der Laan}, Mark J}, 7 | title = {The Highly Adaptive Lasso Estimator}, 8 | booktitle = {2016 {IEEE} International Conference on Data Science and 9 | Advanced Analytics ({DSAA})} 10 | } 11 | 12 | @article{vdl2017generally, 13 | doi = {10.1515/ijb-2015-0097}, 14 | url = {https://doi.org/10.1515/ijb-2015-0097}, 15 | title = {A Generally Efficient Targeted Minimum Loss Based Estimator based on 16 | the {Highly Adaptive Lasso}}, 17 | author = {{van der Laan}, Mark J}, 18 | journal = {The International Journal of Biostatistics}, 19 | year = {2017}, 20 | publisher = {De Gruyter} 21 | } 22 | 23 | @article{vdl2017finite, 24 | doi = {}, 25 | url = {https://arxiv.org/abs/1708.09502}, 26 | year = {2017}, 27 | publisher = {}, 28 | journal = {}, 29 | volume = {}, 30 | number = {}, 31 | pages = {}, 32 | author = {{van der Laan}, Mark J}, 33 | title = {Finite sample inference for {Targeted Learning}} 34 | } 35 | 36 | @article{bibaut2019fast, 37 | doi = {}, 38 | url = {https://arxiv.org/abs/1907.09244}, 39 | year = {2019}, 40 | publisher = {}, 41 | journal = {}, 42 | volume = {}, 43 | number = {}, 44 | pages = {}, 45 | author = {Bibaut, Aur{\'e}lien F and {van der Laan}, Mark J}, 46 | title = {Fast rates for empirical risk minimization over 47 | c\`{a}dl\`{a}g functions with bounded sectional variation norm} 48 | } 49 | 50 | @article{vdl2019efficient, 51 | doi = {}, 52 | url = {https://arxiv.org/abs/1908.05607}, 53 | year = {2019}, 54 | publisher = {}, 55 | journal = {}, 56 | volume = {}, 57 | number = {}, 58 | pages = {}, 59 | author = {{van der Laan}, Mark J and Benkeser, David and Cai, Weixin}, 60 | title = {Efficient estimation of pathwise differentiable target parameters 61 | with the undersmoothed highly adaptive lasso} 62 | } 63 | 64 | @article{vdl2017uniform, 65 | doi = {}, 66 | url = {https://arxiv.org/abs/1709.06256}, 67 | year = {2017}, 68 | publisher = {}, 69 | journal = {}, 70 | volume = {}, 71 | number = {}, 72 | pages = {}, 73 | author = {{van der Laan}, Mark J and Bibaut, Aur{\'e}lien F}, 74 | title = {Uniform consistency of the highly adaptive lasso estimator of 75 | infinite-dimensional parameters} 76 | } 77 | 78 | @article{ertefaie2020nonparametric, 79 | doi = {}, 80 | url = {https://arxiv.org/abs/2005.11303}, 81 | year = {2020}, 82 | publisher = {}, 83 | journal = {}, 84 | volume = {}, 85 | number = {}, 86 | pages = {}, 87 | author = {Ertefaie, Ashkan and Hejazi, Nima S and {van der Laan}, Mark J}, 88 | title = {Nonparametric inverse probability weighted estimators based on the 89 | highly adaptive lasso} 90 | } 91 | 92 | @article{hejazi2020hal9001, 93 | doi = {10.21105/joss.02526}, 94 | url = {https://doi.org/10.21105/joss.02526}, 95 | year = {2020}, 96 | month = {9}, 97 | publisher = {The Open Journal}, 98 | journal = {Journal of Open Source Software}, 99 | volume = {5}, 100 | number = {53}, 101 | pages = {2526}, 102 | author = {Hejazi, Nima S and Coyle, Jeremy R and {van der Laan}, Mark J}, 103 | title = {{hal9001}: Scalable highly adaptive lasso regression in {R}} 104 | } 105 | 106 | @article{friedman2010glmnet, 107 | title={Regularization paths for generalized linear models via coordinate 108 | descent}, 109 | author={Friedman, Jerome and Hastie, Trevor and Tibshirani, Rob}, 110 | journal={Journal of Statistical Software}, 111 | volume={33}, 112 | number={1}, 113 | pages={1}, 114 | year={2010} 115 | } 116 | 117 | @software{coyle-gh-hal9001, 118 | doi = {10.5281/zenodo.3558313}, 119 | url = {https://CRAN.R-project.org/package=hal9001}, 120 | note = {{\texttt{R}} package}, 121 | version = {0.2.7}, 122 | author = {Coyle, Jeremy R and Hejazi, Nima S and {van der Laan}, Mark J}, 123 | title = {{\texttt{hal9001}}: The scalable highly adaptive lasso} 124 | } 125 | -------------------------------------------------------------------------------- /man/SL.hal9001.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sl_hal9001.R 3 | \name{SL.hal9001} 4 | \alias{SL.hal9001} 5 | \title{Wrapper for Classic SuperLearner} 6 | \usage{ 7 | SL.hal9001( 8 | Y, 9 | X, 10 | newX, 11 | family, 12 | obsWeights, 13 | id, 14 | max_degree = 2, 15 | smoothness_orders = 1, 16 | num_knots = 5, 17 | ... 18 | ) 19 | } 20 | \arguments{ 21 | \item{Y}{A \code{numeric} vector of observations of the outcome variable.} 22 | 23 | \item{X}{An input \code{matrix} with dimensions number of observations -by- 24 | number of covariates that will be used to derive the design matrix of basis 25 | functions.} 26 | 27 | \item{newX}{A matrix of new observations on which to obtain predictions. The 28 | default of \code{NULL} computes predictions on training inputs \code{X}.} 29 | 30 | \item{family}{A \code{\link[stats]{family}} object (one that is supported 31 | by \code{\link[glmnet]{glmnet}}) specifying the error/link family for a 32 | generalized linear model.} 33 | 34 | \item{obsWeights}{A \code{numeric} vector of observational-level weights.} 35 | 36 | \item{id}{A \code{numeric} vector of IDs.} 37 | 38 | \item{max_degree}{The highest order of interaction terms for which basis 39 | functions ought to be generated.} 40 | 41 | \item{smoothness_orders}{An \code{integer} vector of length 1 or greater, 42 | specifying the smoothness of the basis functions. See the argument 43 | \code{smoothness_orders} of \code{\link{fit_hal}} for more information.} 44 | 45 | \item{num_knots}{An \code{integer} vector of length 1 or \code{max_degree}, 46 | specifying the maximum number of knot points (i.e., bins) for each 47 | covariate for generating basis functions. See \code{num_knots} argument in 48 | \code{\link{fit_hal}} for more information.} 49 | 50 | \item{...}{Additional arguments to \code{\link{fit_hal}}.} 51 | } 52 | \value{ 53 | An object of class \code{SL.hal9001} with a fitted \code{hal9001} 54 | object and corresponding predictions based on the input data. 55 | } 56 | \description{ 57 | Wrapper for \pkg{SuperLearner} for objects of class \code{hal9001} 58 | } 59 | -------------------------------------------------------------------------------- /man/apply_copy_map.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{apply_copy_map} 4 | \alias{apply_copy_map} 5 | \title{Apply copy map} 6 | \usage{ 7 | apply_copy_map(X, copy_map) 8 | } 9 | \arguments{ 10 | \item{X}{Sparse matrix containing columns of indicator functions.} 11 | 12 | \item{copy_map}{the copy map} 13 | } 14 | \value{ 15 | A \code{dgCMatrix} sparse matrix corresponding to the design matrix 16 | for a zero-th order highly adaptive lasso, but with all duplicated columns 17 | (basis functions) removed. 18 | } 19 | \description{ 20 | OR duplicate training set columns together 21 | } 22 | \examples{ 23 | \donttest{ 24 | gendata <- function(n) { 25 | W1 <- runif(n, -3, 3) 26 | W2 <- rnorm(n) 27 | W3 <- runif(n) 28 | W4 <- rnorm(n) 29 | g0 <- plogis(0.5 * (-0.8 * W1 + 0.39 * W2 + 0.08 * W3 - 0.12 * W4)) 30 | A <- rbinom(n, 1, g0) 31 | Q0 <- plogis(0.15 * (2 * A + 2 * A * W1 + 6 * A * W3 * W4 - 3)) 32 | Y <- rbinom(n, 1, Q0) 33 | data.frame(A, W1, W2, W3, W4, Y) 34 | } 35 | set.seed(1234) 36 | data <- gendata(100) 37 | covars <- setdiff(names(data), "Y") 38 | X <- as.matrix(data[, covars, drop = FALSE]) 39 | basis_list <- enumerate_basis(X) 40 | x_basis <- make_design_matrix(X, basis_list) 41 | copy_map <- make_copy_map(x_basis) 42 | x_basis_uniq <- apply_copy_map(x_basis, copy_map) 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /man/as_dgCMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{as_dgCMatrix} 4 | \alias{as_dgCMatrix} 5 | \title{Fast Coercion to Sparse Matrix} 6 | \usage{ 7 | as_dgCMatrix(XX_) 8 | } 9 | \arguments{ 10 | \item{XX_}{An object of class \code{Matrix} that has a sparse structure 11 | suitable for coercion to a sparse matrix format of \code{dgCMatrix}.} 12 | } 13 | \value{ 14 | An object of class \code{dgCMatrix}, coerced from input \code{XX_}. 15 | } 16 | \description{ 17 | Fast and efficient coercion of standard matrix objects to sparse matrices. 18 | Borrowed from http://gallery.rcpp.org/articles/sparse-matrix-coercion/. 19 | INTERNAL USE ONLY. 20 | } 21 | -------------------------------------------------------------------------------- /man/basis_list_cols.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/make_basis.R 3 | \name{basis_list_cols} 4 | \alias{basis_list_cols} 5 | \title{List Basis Functions} 6 | \usage{ 7 | basis_list_cols( 8 | cols, 9 | x, 10 | smoothness_orders, 11 | include_zero_order, 12 | include_lower_order = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{cols}{Index or indices (as \code{numeric}) of covariates (columns) of 17 | interest in the data matrix \code{x} for which basis functions ought to be 18 | generated. Note that basis functions for interactions of these columns are 19 | computed automatically.} 20 | 21 | \item{x}{A \code{matrix} containing observations in the rows and covariates 22 | in the columns. Basis functions are computed for these covariates.} 23 | 24 | \item{smoothness_orders}{An integer vector of length \code{ncol(x)} 25 | specifying the desired smoothness of the function in each covariate. k = 0 26 | is no smoothness (indicator basis), k = 1 is first order smoothness, and so 27 | on. For an additive model, the component function for each covariate will 28 | have the degree of smoothness as specified by smoothness_orders. For 29 | non-additive components (tensor products of univariate basis functions), 30 | the univariate basis functions in each tensor product have smoothness 31 | degree as specified by smoothness_orders.} 32 | 33 | \item{include_zero_order}{A \code{logical}, indicating whether the zeroth 34 | order basis functions are included for each covariate (if \code{TRUE}), in 35 | addition to the smooth basis functions given by \code{smoothness_orders}. 36 | This allows the algorithm to data-adaptively choose the appropriate degree 37 | of smoothness.} 38 | 39 | \item{include_lower_order}{A \code{logical}, like \code{include_zero_order}, 40 | except including all basis functions of lower smoothness degrees than 41 | specified via \code{smoothness_orders}.} 42 | } 43 | \value{ 44 | A \code{list} containing the basis functions generated from a set of 45 | input columns. 46 | } 47 | \description{ 48 | Build a list of basis functions from a set of columns 49 | } 50 | -------------------------------------------------------------------------------- /man/basis_of_degree.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/make_basis.R 3 | \name{basis_of_degree} 4 | \alias{basis_of_degree} 5 | \title{Compute Degree of Basis Functions} 6 | \usage{ 7 | basis_of_degree( 8 | x, 9 | degree, 10 | smoothness_orders, 11 | include_zero_order, 12 | include_lower_order 13 | ) 14 | } 15 | \arguments{ 16 | \item{x}{An input \code{matrix} containing observations and covariates 17 | following standard conventions in problems of statistical learning.} 18 | 19 | \item{degree}{The highest order of interaction terms for which the basis 20 | functions ought to be generated. The default (\code{NULL}) corresponds to 21 | generating basis functions for the full dimensionality of the input matrix.} 22 | 23 | \item{smoothness_orders}{An integer vector of length \code{ncol(x)} 24 | specifying the desired smoothness of the function in each covariate. k = 0 25 | is no smoothness (indicator basis), k = 1 is first order smoothness, and so 26 | on. For an additive model, the component function for each covariate will 27 | have the degree of smoothness as specified by smoothness_orders. For 28 | non-additive components (tensor products of univariate basis functions), 29 | the univariate basis functions in each tensor product have smoothness 30 | degree as specified by smoothness_orders.} 31 | 32 | \item{include_zero_order}{A \code{logical}, indicating whether the zeroth 33 | order basis functions are included for each covariate (if \code{TRUE}), in 34 | addition to the smooth basis functions given by \code{smoothness_orders}. 35 | This allows the algorithm to data-adaptively choose the appropriate degree 36 | of smoothness.} 37 | 38 | \item{include_lower_order}{A \code{logical}, like \code{include_zero_order}, 39 | except including all basis functions of lower smoothness degrees than 40 | specified via \code{smoothness_orders}.} 41 | } 42 | \value{ 43 | A \code{list} containing basis functions and cutoffs generated from 44 | a set of input columns up to a particular pre-specified degree. 45 | } 46 | \description{ 47 | Find the full list of basis functions up to a particular degree 48 | } 49 | -------------------------------------------------------------------------------- /man/calc_pnz.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{calc_pnz} 4 | \alias{calc_pnz} 5 | \title{Calculate Proportion of Nonzero Entries} 6 | \usage{ 7 | calc_pnz(X) 8 | } 9 | \description{ 10 | Calculate Proportion of Nonzero Entries 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/calc_xscale.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{calc_xscale} 4 | \alias{calc_xscale} 5 | \title{Calculating Centered and Scaled Matrices} 6 | \usage{ 7 | calc_xscale(X, xcenter) 8 | } 9 | \arguments{ 10 | \item{X}{A sparse matrix, to be centered.} 11 | 12 | \item{xcenter}{A vector of column means to be used for centering X.} 13 | } 14 | \description{ 15 | Calculating Centered and Scaled Matrices 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/enumerate_basis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/make_basis.R 3 | \name{enumerate_basis} 4 | \alias{enumerate_basis} 5 | \title{Enumerate Basis Functions} 6 | \usage{ 7 | enumerate_basis( 8 | x, 9 | max_degree = NULL, 10 | smoothness_orders = rep(0, ncol(x)), 11 | include_zero_order = FALSE, 12 | include_lower_order = FALSE, 13 | num_knots = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{x}{An input \code{matrix} containing observations and covariates 18 | following standard conventions in problems of statistical learning.} 19 | 20 | \item{max_degree}{The highest order of interaction terms for which the basis 21 | functions ought to be generated. The default (\code{NULL}) corresponds to 22 | generating basis functions for the full dimensionality of the input matrix.} 23 | 24 | \item{smoothness_orders}{An integer vector of length \code{ncol(x)} 25 | specifying the desired smoothness of the function in each covariate. k = 0 26 | is no smoothness (indicator basis), k = 1 is first order smoothness, and so 27 | on. For an additive model, the component function for each covariate will 28 | have the degree of smoothness as specified by smoothness_orders. For 29 | non-additive components (tensor products of univariate basis functions), 30 | the univariate basis functions in each tensor product have smoothness 31 | degree as specified by smoothness_orders.} 32 | 33 | \item{include_zero_order}{A \code{logical}, indicating whether the zeroth 34 | order basis functions are included for each covariate (if \code{TRUE}), in 35 | addition to the smooth basis functions given by \code{smoothness_orders}. 36 | This allows the algorithm to data-adaptively choose the appropriate degree 37 | of smoothness.} 38 | 39 | \item{include_lower_order}{A \code{logical}, like \code{include_zero_order}, 40 | except including all basis functions of lower smoothness degrees than 41 | specified via \code{smoothness_orders}.} 42 | 43 | \item{num_knots}{A vector of length \code{max_degree}, which determines how 44 | granular the knot points to generate basis functions should be for each 45 | degree of basis function. The first entry of \code{num_knots} determines 46 | the number of knot points to be used for each univariate basis function. 47 | More generally, The kth entry of \code{num_knots} determines the number of 48 | knot points to be used for the kth degree basis functions. Specifically, 49 | for a kth degree basis function, which is the tensor product of k 50 | univariate basis functions, this determines the number of knot points to be 51 | used for each univariate basis function in the tensor product.} 52 | } 53 | \value{ 54 | A \code{list} of basis functions generated for all covariates and 55 | interaction thereof up to a pre-specified degree. 56 | } 57 | \description{ 58 | Generate basis functions for all covariates and interaction terms thereof up 59 | to a specified order/degree. 60 | } 61 | \examples{ 62 | \donttest{ 63 | gendata <- function(n) { 64 | W1 <- runif(n, -3, 3) 65 | W2 <- rnorm(n) 66 | W3 <- runif(n) 67 | W4 <- rnorm(n) 68 | g0 <- plogis(0.5 * (-0.8 * W1 + 0.39 * W2 + 0.08 * W3 - 0.12 * W4)) 69 | A <- rbinom(n, 1, g0) 70 | Q0 <- plogis(0.15 * (2 * A + 2 * A * W1 + 6 * A * W3 * W4 - 3)) 71 | Y <- rbinom(n, 1, Q0) 72 | data.frame(A, W1, W2, W3, W4, Y) 73 | } 74 | set.seed(1234) 75 | data <- gendata(100) 76 | covars <- setdiff(names(data), "Y") 77 | X <- as.matrix(data[, covars, drop = FALSE]) 78 | basis_list <- enumerate_basis(X) 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /man/enumerate_edge_basis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/make_basis.R 3 | \name{enumerate_edge_basis} 4 | \alias{enumerate_edge_basis} 5 | \title{Enumerate Basis Functions at Generalized Edges} 6 | \usage{ 7 | enumerate_edge_basis( 8 | x, 9 | max_degree = 3, 10 | smoothness_orders = rep(0, ncol(x)), 11 | include_zero_order = FALSE, 12 | include_lower_order = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{x}{An input \code{matrix} containing observations and covariates 17 | following standard conventions in problems of statistical learning.} 18 | 19 | \item{max_degree}{The highest order of interaction terms for which the basis 20 | functions ought to be generated. The default (\code{NULL}) corresponds to 21 | generating basis functions for the full dimensionality of the input matrix.} 22 | 23 | \item{smoothness_orders}{An integer vector of length \code{ncol(x)} 24 | specifying the desired smoothness of the function in each covariate. k = 0 25 | is no smoothness (indicator basis), k = 1 is first order smoothness, and so 26 | on. For an additive model, the component function for each covariate will 27 | have the degree of smoothness as specified by smoothness_orders. For 28 | non-additive components (tensor products of univariate basis functions), 29 | the univariate basis functions in each tensor product have smoothness 30 | degree as specified by smoothness_orders.} 31 | 32 | \item{include_zero_order}{A \code{logical}, indicating whether the zeroth 33 | order basis functions are included for each covariate (if \code{TRUE}), in 34 | addition to the smooth basis functions given by \code{smoothness_orders}. 35 | This allows the algorithm to data-adaptively choose the appropriate degree 36 | of smoothness.} 37 | 38 | \item{include_lower_order}{A \code{logical}, like \code{include_zero_order}, 39 | except including all basis functions of lower smoothness degrees than 40 | specified via \code{smoothness_orders}.} 41 | } 42 | \description{ 43 | For degrees of smoothness greater than 1, we must generate the lower order 44 | smoothness basis functions using the knot points at the "edge" of the 45 | hypercube. For example, consider f(x) = x^2 + x, which is second-order 46 | smooth, but will not be generated by purely quadratic basis functions. We 47 | also need to include the y = x function (which corresponds to first-order 48 | HAL basis functions at the left most value/edge of x). 49 | } 50 | \keyword{internal} 51 | -------------------------------------------------------------------------------- /man/evaluate_basis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{evaluate_basis} 4 | \alias{evaluate_basis} 5 | \title{Generate Basis Functions} 6 | \usage{ 7 | evaluate_basis(basis, X, x_basis, basis_col) 8 | } 9 | \arguments{ 10 | \item{basis}{The basis function.} 11 | 12 | \item{X}{The design matrix, containing the original data.} 13 | 14 | \item{x_basis}{The HAL design matrix, containing indicator functions.} 15 | 16 | \item{basis_col}{Numeric indicating which column to populate.} 17 | } 18 | \description{ 19 | Populates a column (indexed by basis_col) of x_basis with basis indicators. 20 | } 21 | -------------------------------------------------------------------------------- /man/formula_hal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/formula_hal9001.R 3 | \name{formula_hal} 4 | \alias{formula_hal} 5 | \title{HAL Formula: Convert formula or string to \code{formula_HAL} object.} 6 | \usage{ 7 | formula_hal(formula, smoothness_orders, num_knots, X = NULL) 8 | } 9 | \arguments{ 10 | \item{formula}{A \code{formula_hal9001} object as outputted by \code{h}.} 11 | 12 | \item{smoothness_orders}{A default value for \code{s} if not provided 13 | explicitly to the function \code{h}.} 14 | 15 | \item{num_knots}{A default value for \code{k} if not provided explicitly to 16 | the function \code{h}.} 17 | 18 | \item{X}{Controls inheritance of the variable \code{X} from parent environment. 19 | When \code{NULL} (the default), such a variable is inherited.} 20 | } 21 | \description{ 22 | HAL Formula: Convert formula or string to \code{formula_HAL} object. 23 | } 24 | -------------------------------------------------------------------------------- /man/formula_helpers.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/formula_hal9001.R 3 | \name{fill_dots} 4 | \alias{fill_dots} 5 | \title{Formula Helpers} 6 | \usage{ 7 | fill_dots(var_names, .) 8 | } 9 | \arguments{ 10 | \item{var_names}{A \code{character} vector of variable names representing a single type of interaction 11 | var_names may include the wildcard variable "." in which case the argument \code{.} must be specified 12 | so that all interactions matching the form of var_names are generated.} 13 | 14 | \item{.}{Specification of variables for use in the formula. 15 | This function takes a character vector \code{var_names} of the form c(name1, name2, ".", name3, ".") 16 | with any number of name{int} variables and any number of wild card variables ".". 17 | It returns a list of character vectors of the form c(name1, name2, wildcard1, name3, wildcard2) 18 | where wildcard1 and wildcard2 are iterated over all possible character names given in the argument \code{.}.} 19 | } 20 | \description{ 21 | Formula Helpers 22 | } 23 | -------------------------------------------------------------------------------- /man/generate_all_rules.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary.R 3 | \name{generate_all_rules} 4 | \alias{generate_all_rules} 5 | \title{Generates rules based on knot points of the fitted HAL basis functions with 6 | non-zero coefficients.} 7 | \usage{ 8 | generate_all_rules(basis_list, coefs, X_colnames) 9 | } 10 | \description{ 11 | Generates rules based on knot points of the fitted HAL basis functions with 12 | non-zero coefficients. 13 | } 14 | \keyword{internal} 15 | -------------------------------------------------------------------------------- /man/h.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/formula_hal9001.R 3 | \name{h} 4 | \alias{h} 5 | \title{HAL Formula term: Generate a single term of the HAL basis} 6 | \usage{ 7 | h( 8 | ..., 9 | k = NULL, 10 | s = NULL, 11 | pf = 1, 12 | monotone = c("none", "i", "d"), 13 | . = NULL, 14 | dot_args_as_string = FALSE, 15 | X = NULL 16 | ) 17 | } 18 | \arguments{ 19 | \item{...}{Variables for which to generate multivariate interaction basis 20 | function where the variables can be found in a matrix \code{X} in a parent 21 | environment/frame. Note, just like standard \code{formula} objects, the 22 | variables should not be characters (e.g. do h(W1,W2) not h("W1", "W2")) 23 | h(W1,W2,W3) will generate three-way HAL basis functions between W1, W2, and 24 | W3. It will \code{not} generate the lower dimensional basis functions.} 25 | 26 | \item{k}{The number of knots for each univariate basis function used to 27 | generate the tensor product basis functions. If a single value then this 28 | value is used for the univariate basis functions for each variable. 29 | Otherwise, this should be a variable named list that specifies for each 30 | variable how many knots points should be used. 31 | \code{h(W1,W2,W3, k = list(W1 = 3, W2 = 2, W3=1))} is equivalent to first 32 | binning the variables \code{W1}, \code{W2} and \code{W3} into \code{3}, \code{2} and \code{1} unique 33 | values and then calling \code{h(W1,W2,W3)}. This coarsening of the data ensures 34 | that fewer basis functions are generated, which can lead to substantial 35 | computational speed-ups. If not provided and the variable \code{num_knots} 36 | is in the parent environment, then \code{s} will be set to 37 | \code{num_knots}`.} 38 | 39 | \item{s}{The \code{smoothness_orders} for the basis functions. The possible 40 | values are \code{0} for piece-wise constant zero-order splines or \code{1} for 41 | piece-wise linear first-order splines. If not provided and the variable 42 | \code{smoothness_orders} is in the parent environment, then \code{s} will 43 | be set to \code{smoothness_orders}.} 44 | 45 | \item{pf}{A \code{penalty.factor} value the generated basis functions that is 46 | used by \code{glmnet} in the LASSO penalization procedure. \code{pf = 1} 47 | (default) is the standard penalization factor used by \code{glmnet} and 48 | \code{pf = 0} means the generated basis functions are unpenalized.} 49 | 50 | \item{monotone}{Whether the basis functions should enforce monotonicity of 51 | the interaction term. If \verb{\code{s} = 0}, this is monotonicity of the 52 | function, and, if \verb{\code{s} = 1}, this is monotonicity of its derivative 53 | (e.g., enforcing a convex fit). Set \code{"none"} for no constraints, \code{"i"} for 54 | a monotone increasing constraint, and \code{"d"} for a monotone decreasing 55 | constraint. Using \code{"i"} constrains the basis functions to have positive 56 | coefficients in the fit, and \code{"d"} constrains the basis functions to have 57 | negative coefficients.} 58 | 59 | \item{.}{Just like with \code{formula}, \code{.} as in \code{h(.)} or \code{h(.,.)} is 60 | treated as a wildcard variable that generates terms using all variables in 61 | the data. The argument \code{.} should be a character vector of variable 62 | names that \code{.} iterates over. Specifically, 63 | \code{h(., k=1, . = c("W1", "W2", "W3"))} is equivalent to 64 | \code{h(W1, k=1) + h(W2, k=1) + h(W3, k=1)}, and 65 | \code{h(., ., k=1, . = c("W1", "W2", "W3"))} is equivalent to 66 | \code{h(W1,W2, k=1) + h(W2,W3, k=1) + h(W1, W3, k=1)}} 67 | 68 | \item{dot_args_as_string}{Whether the arguments \code{...} are characters or 69 | character vectors and should thus be evaluated directly. When \code{TRUE}, the 70 | expression h("W1", "W2") can be used.} 71 | 72 | \item{X}{An optional design matrix where the variables given in \code{...} 73 | can be found. Otherwise, \code{X} is taken from the parent environment.} 74 | } 75 | \description{ 76 | HAL Formula term: Generate a single term of the HAL basis 77 | } 78 | -------------------------------------------------------------------------------- /man/hal9000.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hal9000.R 3 | \name{hal9000} 4 | \alias{hal9000} 5 | \title{HAL 9000 Quotes} 6 | \usage{ 7 | hal9000() 8 | } 9 | \description{ 10 | Prints a quote from the HAL 9000 robot from 2001: A Space Odyssey 11 | } 12 | -------------------------------------------------------------------------------- /man/hal9001.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hal9001-package.R 3 | \name{hal9001} 4 | \alias{hal9001} 5 | \title{hal9001} 6 | \description{ 7 | Package for fitting the Highly Adaptive LASSO (HAL) estimator 8 | } 9 | -------------------------------------------------------------------------------- /man/hal_quotes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hal_quotes.R 3 | \docType{data} 4 | \name{hal_quotes} 5 | \alias{hal_quotes} 6 | \title{HAL9000 Quotes from "2001: A Space Odyssey"} 7 | \format{ 8 | A vector of quotes. 9 | } 10 | \usage{ 11 | hal_quotes 12 | } 13 | \description{ 14 | Curated selection of quotes from the HAL9000 computer, from the critically 15 | acclaimed epic science-fiction film "2001: A Space Odyssey" (1968). 16 | } 17 | \keyword{datasets} 18 | -------------------------------------------------------------------------------- /man/index_first_copy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{index_first_copy} 4 | \alias{index_first_copy} 5 | \title{Find Copies of Columns} 6 | \usage{ 7 | index_first_copy(X) 8 | } 9 | \arguments{ 10 | \item{X}{Sparse matrix containing columns of indicator functions.} 11 | } 12 | \description{ 13 | Index vector that, for each column in X, indicates the index of the first 14 | copy of that column 15 | } 16 | -------------------------------------------------------------------------------- /man/make_basis_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{make_basis_list} 4 | \alias{make_basis_list} 5 | \title{Sort Basis Functions} 6 | \usage{ 7 | make_basis_list(X_sub, cols, order_map) 8 | } 9 | \arguments{ 10 | \item{X_sub}{A subset of the columns of X, the original design matrix.} 11 | 12 | \item{cols}{An index of the columns that were reduced to by sub-setting.} 13 | 14 | \item{order_map}{A vector with length the original unsubsetted matrix X which specifies the smoothness of the function in each covariate.} 15 | } 16 | \description{ 17 | Build a sorted list of unique basis functions based on columns, where each 18 | basis function is a list 19 | } 20 | \details{ 21 | Note that sorting of columns is performed such that the basis order 22 | equals cols.length() and each basis function is a list(cols, cutoffs). 23 | } 24 | -------------------------------------------------------------------------------- /man/make_copy_map.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/find_dupes.R 3 | \name{make_copy_map} 4 | \alias{make_copy_map} 5 | \title{Build Copy Maps} 6 | \usage{ 7 | make_copy_map(x_basis) 8 | } 9 | \arguments{ 10 | \item{x_basis}{A design matrix consisting of basis (indicator) functions for 11 | covariates (X) and terms for interactions thereof.} 12 | } 13 | \value{ 14 | A \code{list} of \code{numeric} vectors indicating indices of basis 15 | functions that are identical in the training set. 16 | } 17 | \description{ 18 | Build Copy Maps 19 | } 20 | \examples{ 21 | \donttest{ 22 | gendata <- function(n) { 23 | W1 <- runif(n, -3, 3) 24 | W2 <- rnorm(n) 25 | W3 <- runif(n) 26 | W4 <- rnorm(n) 27 | g0 <- plogis(0.5 * (-0.8 * W1 + 0.39 * W2 + 0.08 * W3 - 0.12 * W4)) 28 | A <- rbinom(n, 1, g0) 29 | Q0 <- plogis(0.15 * (2 * A + 2 * A * W1 + 6 * A * W3 * W4 - 3)) 30 | Y <- rbinom(n, 1, Q0) 31 | data.frame(A, W1, W2, W3, W4, Y) 32 | } 33 | set.seed(1234) 34 | data <- gendata(100) 35 | covars <- setdiff(names(data), "Y") 36 | X <- as.matrix(data[, covars, drop = FALSE]) 37 | basis_list <- enumerate_basis(X) 38 | x_basis <- make_design_matrix(X, basis_list) 39 | copy_map <- make_copy_map(x_basis) 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /man/make_design_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{make_design_matrix} 4 | \alias{make_design_matrix} 5 | \title{Build HAL Design Matrix} 6 | \usage{ 7 | make_design_matrix(X, blist, p_reserve = 0.5) 8 | } 9 | \arguments{ 10 | \item{X}{Matrix of covariates containing observed data in the columns.} 11 | 12 | \item{blist}{List of basis functions with which to build HAL design matrix.} 13 | 14 | \item{p_reserve}{Sparse matrix pre-allocation proportion. Default value is 0.5. 15 | If one expects a dense HAL design matrix, it is useful to set p_reserve to a higher value.} 16 | } 17 | \value{ 18 | A \code{dgCMatrix} sparse matrix of indicator basis functions 19 | corresponding to the design matrix in a zero-order highly adaptive lasso. 20 | } 21 | \description{ 22 | Make a HAL design matrix based on original design matrix X and a list of 23 | basis functions in argument blist 24 | } 25 | \examples{ 26 | \donttest{ 27 | gendata <- function(n) { 28 | W1 <- runif(n, -3, 3) 29 | W2 <- rnorm(n) 30 | W3 <- runif(n) 31 | W4 <- rnorm(n) 32 | g0 <- plogis(0.5 * (-0.8 * W1 + 0.39 * W2 + 0.08 * W3 - 0.12 * W4)) 33 | A <- rbinom(n, 1, g0) 34 | Q0 <- plogis(0.15 * (2 * A + 2 * A * W1 + 6 * A * W3 * W4 - 3)) 35 | Y <- rbinom(n, 1, Q0) 36 | data.frame(A, W1, W2, W3, W4, Y) 37 | } 38 | set.seed(1234) 39 | data <- gendata(100) 40 | covars <- setdiff(names(data), "Y") 41 | X <- as.matrix(data[, covars, drop = FALSE]) 42 | basis_list <- enumerate_basis(X) 43 | x_basis <- make_design_matrix(X, basis_list) 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /man/make_reduced_basis_map.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reduce_basis_filter.R 3 | \name{make_reduced_basis_map} 4 | \alias{make_reduced_basis_map} 5 | \title{Mass-based reduction of basis functions} 6 | \usage{ 7 | make_reduced_basis_map(x_basis, reduce_basis_crit) 8 | } 9 | \arguments{ 10 | \item{x_basis}{A matrix of basis functions with all redundant basis 11 | functions already removed.} 12 | 13 | \item{reduce_basis_crit}{A scalar \code{numeric} value bounded in the open 14 | interval (0,1) indicating the minimum proportion of 1's in a basis function 15 | column needed for the basis function to be included in the procedure to fit 16 | the Lasso. Any basis functions with a lower proportion of 1's than the 17 | specified cutoff will be removed. This argument defaults to \code{NULL}, in 18 | which case all basis functions are used in the lasso-fitting stage of the 19 | HAL algorithm.} 20 | } 21 | \value{ 22 | A binary \code{numeric} vector indicating which columns of the 23 | matrix of basis functions to keep (given a one) and which to discard (given 24 | a zero). 25 | } 26 | \description{ 27 | A helper function that finds which basis functions to keep (and equivalently 28 | which to discard) based on the proportion of 1's (observations, i.e., 29 | "mass") included in a given basis function. 30 | } 31 | -------------------------------------------------------------------------------- /man/meets_basis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{meets_basis} 4 | \alias{meets_basis} 5 | \title{Compute Values of Basis Functions} 6 | \usage{ 7 | meets_basis(X, row_num, cols, cutoffs, orders) 8 | } 9 | \arguments{ 10 | \item{X}{The design matrix, containing the original data.} 11 | 12 | \item{row_num}{Numeri for a row index over which to evaluate.} 13 | 14 | \item{cols}{Numeric for the column indices of the basis function.} 15 | 16 | \item{cutoffs}{Numeric providing thresholds.} 17 | 18 | \item{orders}{Numeric providing smoothness orders} 19 | } 20 | \description{ 21 | Computes and returns the indicator value for the basis described by 22 | cols and cutoffs for a given row of X 23 | } 24 | -------------------------------------------------------------------------------- /man/num_knots_generator.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hal.R 3 | \name{num_knots_generator} 4 | \alias{num_knots_generator} 5 | \title{A default generator for the \code{num_knots} argument for each degree of 6 | interactions and the smoothness orders.} 7 | \usage{ 8 | num_knots_generator( 9 | max_degree, 10 | smoothness_orders, 11 | base_num_knots_0 = 500, 12 | base_num_knots_1 = 200 13 | ) 14 | } 15 | \arguments{ 16 | \item{max_degree}{interaction degree.} 17 | 18 | \item{smoothness_orders}{see \code{\link{fit_hal}}.} 19 | 20 | \item{base_num_knots_0}{The base number of knots for zeroth-order smoothness 21 | basis functions. The number of knots by degree interaction decays as 22 | \code{base_num_knots_0/2^(d-1)} where \code{d} is the interaction degree of the basis 23 | function.} 24 | 25 | \item{base_num_knots_1}{The base number of knots for 1 or greater order 26 | smoothness basis functions. The number of knots by degree interaction 27 | decays as \code{base_num_knots_1/2^(d-1)} where \code{d} is the interaction degree of 28 | the basis function.} 29 | } 30 | \description{ 31 | A default generator for the \code{num_knots} argument for each degree of 32 | interactions and the smoothness orders. 33 | } 34 | \keyword{internal} 35 | -------------------------------------------------------------------------------- /man/plus-.formula_hal9001.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/formula_hal9001.R 3 | \name{+.formula_hal9001} 4 | \alias{+.formula_hal9001} 5 | \title{HAL Formula addition: Adding formula term object together into a single 6 | formula object term.} 7 | \usage{ 8 | \method{+}{formula_hal9001}(x, y) 9 | } 10 | \arguments{ 11 | \item{x}{A \code{formula_hal9001} object as outputted by \code{h}.} 12 | 13 | \item{y}{A \code{formula_hal9001} object as outputted by \code{h}.} 14 | } 15 | \description{ 16 | HAL Formula addition: Adding formula term object together into a single 17 | formula object term. 18 | } 19 | -------------------------------------------------------------------------------- /man/predict.SL.hal9001.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sl_hal9001.R 3 | \name{predict.SL.hal9001} 4 | \alias{predict.SL.hal9001} 5 | \title{predict.SL.hal9001} 6 | \usage{ 7 | \method{predict}{SL.hal9001}(object, newdata, ...) 8 | } 9 | \arguments{ 10 | \item{object}{A fitted object of class \code{hal9001}.} 11 | 12 | \item{newdata}{A matrix of new observations on which to obtain predictions.} 13 | 14 | \item{...}{Not used.} 15 | } 16 | \value{ 17 | A \code{numeric} vector of predictions from a \code{SL.hal9001} 18 | object based on the provide \code{newdata}. 19 | } 20 | \description{ 21 | Predict method for objects of class \code{SL.hal9001} 22 | } 23 | -------------------------------------------------------------------------------- /man/predict.hal9001.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.R 3 | \name{predict.hal9001} 4 | \alias{predict.hal9001} 5 | \title{Prediction from HAL fits} 6 | \usage{ 7 | \method{predict}{hal9001}( 8 | object, 9 | new_data, 10 | new_X_unpenalized = NULL, 11 | offset = NULL, 12 | type = c("response", "link"), 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{object}{An object of class \code{hal9001}, containing the results of 18 | fitting the Highly Adaptive Lasso, as produced by \code{\link{fit_hal}}.} 19 | 20 | \item{new_data}{A \code{matrix} or \code{data.frame} containing new data 21 | (i.e., observations not used for fitting the \code{hal9001} object that's 22 | passed in via the \code{object} argument) for which the \code{hal9001} 23 | object will compute predicted values.} 24 | 25 | \item{new_X_unpenalized}{If the user supplied \code{X_unpenalized} during 26 | training, then user should also supply this matrix with the same number of 27 | observations as \code{new_data}.} 28 | 29 | \item{offset}{A vector of offsets. Must be provided if provided at training.} 30 | 31 | \item{type}{Either "response" for predictions of the response, or "link" for 32 | un-transformed predictions (on the scale of the link function).} 33 | 34 | \item{...}{Additional arguments passed to \code{predict} as necessary.} 35 | } 36 | \value{ 37 | A \code{numeric} vector of predictions from a \code{hal9001} object. 38 | } 39 | \description{ 40 | Prediction from HAL fits 41 | } 42 | \details{ 43 | Method for computing and extracting predictions from fits of the 44 | Highly Adaptive Lasso estimator, returned as a single S3 objects of class 45 | \code{hal9001}. 46 | } 47 | \note{ 48 | This prediction method does not function similarly to the equivalent 49 | method from \pkg{glmnet}. In particular, this procedure will not return a 50 | subset of lambdas originally specified in calling \code{\link{fit_hal}} 51 | nor result in re-fitting. Instead, it will return predictions for all of 52 | the lambdas specified in the call to \code{\link{fit_hal}} that constructs 53 | \code{object}, when \code{fit_control}'s \code{cv_select} is set to 54 | \code{FALSE}. When \code{fit_control}'s \code{cv_select} is set to 55 | \code{TRUE}, predictions will only be returned for the value of lambda 56 | selected by cross-validation. 57 | } 58 | -------------------------------------------------------------------------------- /man/print.formula_hal9001.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/formula_hal9001.R 3 | \name{print.formula_hal9001} 4 | \alias{print.formula_hal9001} 5 | \title{Print formula_hal9001 object} 6 | \usage{ 7 | \method{print}{formula_hal9001}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A formula_hal9001 object.} 11 | 12 | \item{...}{Other arguments (ignored).} 13 | } 14 | \description{ 15 | Print formula_hal9001 object 16 | } 17 | -------------------------------------------------------------------------------- /man/print.summary.hal9001.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary.R 3 | \name{print.summary.hal9001} 4 | \alias{print.summary.hal9001} 5 | \title{Print Method for Summary Class of HAL fits} 6 | \usage{ 7 | \method{print}{summary.hal9001}(x, length = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An object of class \code{summary.hal9001}.} 11 | 12 | \item{length}{The number of ranked coefficients to be summarized.} 13 | 14 | \item{...}{Other arguments (ignored).} 15 | } 16 | \description{ 17 | Print Method for Summary Class of HAL fits 18 | } 19 | -------------------------------------------------------------------------------- /man/quantizer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/make_basis.R 3 | \name{quantizer} 4 | \alias{quantizer} 5 | \title{Discretize Variables into Number of Bins by Unique Values} 6 | \usage{ 7 | quantizer(X, bins) 8 | } 9 | \arguments{ 10 | \item{X}{A \code{numeric} vector to be discretized.} 11 | 12 | \item{bins}{A \code{numeric} scalar indicating the number of bins into which 13 | \code{X} should be discretized..} 14 | } 15 | \description{ 16 | Discretize Variables into Number of Bins by Unique Values 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/squash_hal_fit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/squash_hal.R 3 | \name{squash_hal_fit} 4 | \alias{squash_hal_fit} 5 | \title{Squash HAL objects} 6 | \usage{ 7 | squash_hal_fit(object) 8 | } 9 | \arguments{ 10 | \item{object}{An object of class \code{hal9001}, containing the results of 11 | fitting the Highly Adaptive LASSO, as produced by a call to \code{fit_hal}.} 12 | } 13 | \value{ 14 | Object of class \code{hal9001}, similar to the input object but 15 | reduced such that coefficients belonging to bases with coefficients equal 16 | to zero removed. 17 | } 18 | \description{ 19 | Reduce footprint by dropping basis functions with coefficients of zero 20 | } 21 | \examples{ 22 | \donttest{ 23 | # generate simple test data 24 | n <- 100 25 | p <- 3 26 | x <- matrix(rnorm(n * p), n, p) 27 | y <- sin(x[, 1]) * sin(x[, 2]) + rnorm(n, mean = 0, sd = 0.2) 28 | 29 | # fit HAL model and squash resulting object to reduce footprint 30 | hal_fit <- fit_hal(X = x, Y = y, yolo = FALSE) 31 | squashed <- squash_hal_fit(hal_fit) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /man/summary.hal9001.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary.R 3 | \name{summary.hal9001} 4 | \alias{summary.hal9001} 5 | \title{Summary Method for HAL fit objects} 6 | \usage{ 7 | \method{summary}{hal9001}( 8 | object, 9 | lambda = NULL, 10 | only_nonzero_coefs = TRUE, 11 | include_redundant_terms = FALSE, 12 | round_cutoffs = 3, 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{object}{An object of class \code{hal9001}, containing the results of 18 | fitting the Highly Adaptive Lasso, as produced by \code{\link{fit_hal}}.} 19 | 20 | \item{lambda}{Optional \code{numeric} value of the lambda tuning 21 | parameter, for which corresponding coefficient values will be summarized. 22 | Defaults to \code{\link{fit_hal}}'s optimal value, \code{lambda_star}, or 23 | the minimum value of \code{lambda_star}.} 24 | 25 | \item{only_nonzero_coefs}{A \code{logical} specifying whether the summary 26 | should include only terms with non-zero coefficients.} 27 | 28 | \item{include_redundant_terms}{A \code{logical} specifying whether the 29 | summary should remove so-called "redundant terms". We define a redundant 30 | term (say x1) as a term (1) with basis function corresponding to an 31 | existing basis function, a duplicate; and (2) the duplicate contains the 32 | x1 term as part of its term, so that x1 terms inclusion would be redundant. 33 | For example, say the same coefficient corresponds to these three terms: 34 | (1) "I(age >= 50)*I(bmi >= 18)", (2) "I(age >= 50)", and (3) 35 | "I(education >= 16)". When \code{include_redundant_terms} is 36 | \code{FALSE} (default), the second basis function is omitted.} 37 | 38 | \item{round_cutoffs}{An \code{integer} indicating the number of decimal 39 | places to be used for rounding cutoff values in the term. For example, if 40 | "bmi" was numeric that was rounded to the third decimal, in the example 41 | above we would have needed to specify \code{round_cutoffs = 0} in order to 42 | yield a term like "I(bmi >= 18)" opposed to something like 43 | "I(bmi >= 18.111)". This rounding is intended to simplify the term-wise 44 | part of the output and only rounds the basis cutoffs, the \code{hal9001} 45 | model's coefficients are not rounded.} 46 | 47 | \item{...}{Additional arguments passed to \code{summary}, not supported.} 48 | } 49 | \value{ 50 | A list summarizing a \code{hal9001} object's coefficients. 51 | } 52 | \description{ 53 | Summary Method for HAL fit objects 54 | } 55 | \details{ 56 | Method for summarizing the coefficients of the Highly Adaptive 57 | Lasso estimator in terms of the basis functions corresponding to covariates 58 | and interactions of covariates, returned as a single S3 object of class 59 | \code{hal9001}. 60 | 61 | Due to the nature of the basis function terms, the summary tables can be 62 | extremely wide. The R environment might not be the optimal location to view 63 | the summary. Tables can be exported from R to LaTeX with \pkg{xtable} 64 | package (or similar). Here's an example: 65 | \code{print(xtable(summary(fit)$table, type = "latex"), file = "dt.tex")}. 66 | } 67 | -------------------------------------------------------------------------------- /sandbox/benchmarks_hal9001_files/figure-html/hal_benchmark_drtmle_estimateG-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/sandbox/benchmarks_hal9001_files/figure-html/hal_benchmark_drtmle_estimateG-1.png -------------------------------------------------------------------------------- /sandbox/benchmarks_hal9001_files/figure-html/hal_benchmark_drtmle_estimateG_SL-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/sandbox/benchmarks_hal9001_files/figure-html/hal_benchmark_drtmle_estimateG_SL-1.png -------------------------------------------------------------------------------- /sandbox/benchmarks_hal9001_files/figure-html/hal_benchmark_drtmle_estimateQ-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/sandbox/benchmarks_hal9001_files/figure-html/hal_benchmark_drtmle_estimateQ-1.png -------------------------------------------------------------------------------- /sandbox/benchmarks_hal9001_files/figure-html/hal_microbenchmark-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/sandbox/benchmarks_hal9001_files/figure-html/hal_microbenchmark-1.png -------------------------------------------------------------------------------- /sandbox/cate_hal.R: -------------------------------------------------------------------------------- 1 | # Estimate the CATE via HAL using the delta method 2 | devtools::load_all() 3 | n_obs <- 1000 4 | W <- replicate(3, rbinom(n_obs, 1, 0.5)) 5 | A <- rbinom(n_obs, 1, plogis(rowSums(W[, -3]))) 6 | Y <- A - rowSums(W) + rnorm(n_obs) 7 | 8 | g_AW <- glm(A ~ W, family = "binomial") 9 | pred_g <- as.numeric(predict(g_AW)) 10 | 11 | 12 | hal_cate_delta <- function(tmle_task, g_fit, ci_level = 0.95, ...) { 13 | 14 | # get multiplier for Wald-style confidence intervals 15 | ci_mult <- (c(-1, 1) * stats::qnorm((1 - ci_level) / 2)) 16 | 17 | # get output from TMLE task object 18 | Y <- tmle_task$get_tmle_node("Y") 19 | A <- tmle_task$get_tmle_node("A") 20 | W <- tmle_task$get_tmle_node("W") 21 | 22 | # fit a HAL model to the pseudo-CATE transformed values and predict on W 23 | cate <- as.numeric((Y * 2 * A - 1) / g_fit) 24 | hal_cate <- hal9001::fit_hal(X = as.matrix(W), Y = cate, ...) 25 | pred_cate <- stats::predict(hal_cate, new_data = W) 26 | 27 | # compute residuals; extract basis function matrix and HAL coefficients 28 | resids_cate <- as.numeric(Y - pred_cate) 29 | phi_basis <- as.matrix(hal_cate$x_basis) 30 | coefs_hal <- hal_cate$coefs 31 | 32 | # J x J matrix of basis function values -- analogous to EIF matrix? 33 | cnb <- tcrossprod(crossprod(phi_basis, resids_cate), coefs_hal)[, -1] 34 | cnb_inv <- MASS::ginv(as.matrix(cnb)) 35 | 36 | # compute individual-level estimates of basis function contributions 37 | cate_est_obs <- tcrossprod(phi_basis, 38 | t(crossprod(cnb_inv, 39 | crossprod(phi_basis, resids)))) 40 | 41 | # compute variance of CATE, parameter estimate, and get inference 42 | cate_var <- var(cate_est_obs) 43 | cate_est <- mean(cate_est_obs) 44 | cate_ci <- cate_est + ci_mult * as.numeric(sqrt(cate_var / length(Y))) 45 | 46 | # generate output table 47 | ci_out <- data.table::data.table(cate_ci[2], cate_est, cate_ci[1]) 48 | data.table::setnames(ci_out, c("ci_lwr", "est", "ci_upr")) 49 | return(ci_out) 50 | } 51 | 52 | -------------------------------------------------------------------------------- /sandbox/hal_benchmarks.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "hal90001 Benchmarks" 3 | author: "Jeremy Coyle" 4 | date: "10/5/2017" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE, results='hide'} 9 | library(knitr) 10 | knitr::opts_chunk$set(echo = TRUE) 11 | library(sl3) 12 | library(delayed) 13 | library(SuperLearner) 14 | library(future) 15 | library(ggplot2) 16 | library(data.table) 17 | library(stringr) 18 | library(scales) 19 | ``` 20 | 21 | ## Introduction 22 | 23 | This document consists of some simple benchmarks for various choices of SuperLearner implementation, wrapper functions, and parallelization schemes. The purpose of this document is two-fold: 24 | 25 | 1. Compare the computational performance of these methods 26 | 2. Illustrate the use of these different methods 27 | 28 | ## Test Setup 29 | 30 | 31 | ### Test System 32 | 33 | ```{r systemInfo, echo=FALSE, results="asis"} 34 | uname <- system("uname -a", intern = TRUE) 35 | os <- sub(" .*", "", uname) 36 | if(os=="Darwin"){ 37 | cpu_model <- system("sysctl -n machdep.cpu.brand_string", intern = TRUE) 38 | cpus_physical <- as.numeric(system("sysctl -n hw.physicalcpu", intern = TRUE)) 39 | cpus_logical <- as.numeric(system("sysctl -n hw.logicalcpu", intern = TRUE)) 40 | cpu_clock <- system("sysctl -n hw.cpufrequency_max", intern = TRUE) 41 | memory <- system("sysctl -n hw.memsize", intern = TRUE) 42 | } else if(os=="Linux"){ 43 | cpu_model <- system("lscpu | grep 'Model name'", intern = TRUE) 44 | cpu_model <- gsub("Model name:[[:blank:]]*","", cpu_model) 45 | cpus_logical <- system("lscpu | grep '^CPU(s)'", intern = TRUE) 46 | cpus_logical <- as.numeric(gsub("^.*:[[:blank:]]*","", cpus_logical)) 47 | tpc <- system("lscpu | grep '^Thread(s) per core'", intern = TRUE) 48 | tpc <- as.numeric(gsub("^.*:[[:blank:]]*","", tpc)) 49 | cpus_physical <- cpus_logical/tpc 50 | cpu_clock <- as.numeric(gsub("GHz","",gsub("^.*@","",cpu_model)))*10^9 51 | memory <- system("cat /proc/meminfo | grep '^MemTotal'", intern = TRUE) 52 | memory <- as.numeric(gsub("kB","",gsub("^.*:","",memory)))*2^10 53 | } else { 54 | stop("unsupported OS") 55 | } 56 | ``` 57 | 58 | * CPU model: `r cpu_model` 59 | * Physical cores: `r as.numeric(cpus_physical)` 60 | * Logical cores: `r as.numeric(cpus_logical)` 61 | * Clock speed: `r as.numeric(cpu_clock)/10^9`GHz 62 | * Memory: `r round(as.numeric(memory)/2^30, 1)`GB 63 | 64 | ### Test Data 65 | 66 | ### Tests 67 | 68 | ```{r lassi} 69 | microbenchmark({ 70 | glmnet::glmnet(x = x_basis, y = y, intercept = TRUE, nlambda = 100, 71 | lambda.min.ratio = 0.01, family = "gaussian", alpha = 1, standardize = TRUE) 72 | }, times = 10) 73 | microbenchmark({ 74 | lassi(x_basis, y, nlambda=100, lambda_min_ratio = 0.01, center = FALSE) 75 | }, times = 10) 76 | 77 | microbenchmark({ 78 | lassi(x_basis, y, nlambda=100, lambda_min_ratio = 0.01, center = TRUE) 79 | }, times = 10) 80 | 81 | microbenchmark({ 82 | glmnet::cv.glmnet(x = x_basis, y = y, intercept = TRUE, nlambda = 100, 83 | lambda.min.ratio = 0.01, family = "gaussian", alpha = 1, standardize = TRUE) 84 | }, times = 1) 85 | 86 | microbenchmark({ 87 | cv_lasso(x_basis, y, center = FALSE) 88 | }, times = 1) 89 | 90 | microbenchmark({ 91 | cv_lasso(x_basis, y, center = TRUE) 92 | }, times = 1) 93 | 94 | set.seed(1234) 95 | cv_l_full <- cv_lasso(x_basis, y, center = FALSE) 96 | set.seed(1234) 97 | cv_l_es <- cv_lasso_early_stopping(x_basis, y) 98 | plot(cv_l_es) 99 | plot(cv_l_full$lambdas_cvmse) 100 | microbenchmark({ 101 | cv_lasso_early_stopping(x_basis, y) 102 | }, times = 1) 103 | 104 | ``` 105 | 106 | ## Session Information 107 | 108 | ```{r sessionInfo, echo=FALSE, results="asis"} 109 | sessionInfo() 110 | ``` -------------------------------------------------------------------------------- /sandbox/mangolassi_types.h: -------------------------------------------------------------------------------- 1 | // [[Rcpp::depends(RcppEigen)]] 2 | #include 3 | using namespace Rcpp; 4 | typedef Eigen::MappedSparseMatrix MSpMat; 5 | typedef MSpMat::InnerIterator MInIterMat; 6 | typedef Eigen::SparseMatrix SpMat; 7 | typedef Eigen::SparseMatrix IntSpMat; 8 | typedef SpMat::InnerIterator InIterMat; 9 | typedef SpMat::InnerVectorReturnType InVec; 10 | typedef Eigen::SparseVector SpVec; 11 | typedef SpVec::InnerIterator InIterVec; 12 | 13 | struct cmpMatrixRow { 14 | bool operator()(const NumericVector& a, const NumericVector& b) const { 15 | 16 | int i = 0; 17 | 18 | int smaller_length=a.size(); 19 | if (b.size() < smaller_length) { 20 | smaller_length = b.size(); 21 | } 22 | 23 | for (i = 0; i < smaller_length; i++) { 24 | if (a[i] ==b[i]) { 25 | //skip anything at the beginning that matches 26 | continue; 27 | } else { 28 | //once there's a mismatch, determine which one is bigger 29 | return(a[i] < b[i]); 30 | } 31 | } 32 | return(a.size() < b.size()); 33 | } 34 | }; 35 | 36 | typedef std::map BasisMap; 37 | 38 | typedef std::pair MSpMatCol; 39 | 40 | struct cmpCol { 41 | bool operator()(const MSpMatCol& a, const MSpMatCol& b) const { 42 | //returns true if a is strictly less than b 43 | const MSpMat& X = a.first; 44 | int col_a = a.second; 45 | int col_b = b.second; 46 | 47 | MInIterMat iter_b(X, col_b); 48 | for (MInIterMat iter_a(X, col_a); iter_a; ++iter_a, ++iter_b) { 49 | if (!iter_b) { 50 | //we've matched the entirety of b to a, but there's still more elements in a, so it comes after 51 | //iter_b is shorter 52 | return(false); 53 | } 54 | int index_a = iter_a.index(); 55 | int index_b = iter_b.index(); 56 | 57 | // Rcout << index_a << " " << index_b << std::endl; 58 | if (index_a == index_b) { 59 | //skip anything at the beginning that matches 60 | continue; 61 | } else { 62 | //once there's a mismatch, determine sort order 63 | //if iter_a has a lower index, it comes later in the sort order 64 | return(index_a > index_b); 65 | } 66 | } 67 | //we've matched the entirety of a to b 68 | //if there are more elements in b, it comes after, otherwise they're a match 69 | return(iter_b); 70 | } 71 | }; 72 | 73 | typedef std::map ColMap; 74 | 75 | -------------------------------------------------------------------------------- /sandbox/prof.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlverse/hal9001/00fe70f32bcf32e006ad415fe5b1bd8947be8b6f/sandbox/prof.gif -------------------------------------------------------------------------------- /sandbox/toprof.R: -------------------------------------------------------------------------------- 1 | library(hal9001) 2 | library(hal) 3 | x <- xmat <- matrix(rnorm(1000 * 3), 1000, 3) 4 | cols <- c(2, 3) 5 | x_sub <- x[, cols] 6 | basis_list <- mangolassi:::make_basis_list(x_sub) 7 | for (i in 1:100) { 8 | z <- mangolassi:::evaluate_basis_list(x_sub, basis_list) 9 | } 10 | # CPUPROFILE='myprof.log' Rscript inst/toprof.R 11 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | //CPPFLAGS=-O3 2 | //CXXFLAGS=-O3 3 | //PKG_CPPFLAGS=-O3 4 | //PKG_CXXFLAGS=-O3 5 | -------------------------------------------------------------------------------- /src/dedupe.cpp: -------------------------------------------------------------------------------- 1 | // [[Rcpp::depends(RcppEigen)]] 2 | #include 3 | #include "hal9001_types.h" 4 | using namespace Rcpp; 5 | //------------------------------------------------------------------------------ 6 | 7 | //' Find Copies of Columns 8 | //' 9 | //' Index vector that, for each column in X, indicates the index of the first 10 | //' copy of that column 11 | //' 12 | //' @param X Sparse matrix containing columns of indicator functions. 13 | //' 14 | // [[Rcpp::export]] 15 | IntegerVector index_first_copy(const MSpMat& X) { 16 | int p = X.cols(); 17 | 18 | ColMap col_map; 19 | IntegerVector copy_index(p); 20 | 21 | for (int j = 0; j < p; j++) { 22 | MSpMatCol current_col(X, j); 23 | 24 | //https://stackoverflow.com/questions/97050/stdmap-insert-or-stdmap-find 25 | ColMap::iterator match = col_map.lower_bound(current_col); 26 | if (match != col_map.end() && 27 | !(col_map.key_comp()(current_col, match->first))) { 28 | // column already exists 29 | copy_index[j] = match->second + 1; //use 1-indexing 30 | } else { 31 | // column not yet in map 32 | col_map.insert(match, ColMap::value_type(current_col, j)); 33 | copy_index[j] = j+1; //use 1-indexing 34 | } 35 | } 36 | return(copy_index); 37 | } 38 | 39 | //------------------------------------------------------------------------------ 40 | 41 | //' Apply copy map 42 | //' 43 | //' OR duplicate training set columns together 44 | //' 45 | //' @param X Sparse matrix containing columns of indicator functions. 46 | //' @param copy_map the copy map 47 | //' 48 | //' @export 49 | //' 50 | //' @examples 51 | //' \donttest{ 52 | //' gendata <- function(n) { 53 | //' W1 <- runif(n, -3, 3) 54 | //' W2 <- rnorm(n) 55 | //' W3 <- runif(n) 56 | //' W4 <- rnorm(n) 57 | //' g0 <- plogis(0.5 * (-0.8 * W1 + 0.39 * W2 + 0.08 * W3 - 0.12 * W4)) 58 | //' A <- rbinom(n, 1, g0) 59 | //' Q0 <- plogis(0.15 * (2 * A + 2 * A * W1 + 6 * A * W3 * W4 - 3)) 60 | //' Y <- rbinom(n, 1, Q0) 61 | //' data.frame(A, W1, W2, W3, W4, Y) 62 | //' } 63 | //' set.seed(1234) 64 | //' data <- gendata(100) 65 | //' covars <- setdiff(names(data), "Y") 66 | //' X <- as.matrix(data[, covars, drop = FALSE]) 67 | //' basis_list <- enumerate_basis(X) 68 | //' x_basis <- make_design_matrix(X, basis_list) 69 | //' copy_map <- make_copy_map(x_basis) 70 | //' x_basis_uniq <- apply_copy_map(x_basis, copy_map) 71 | //' } 72 | //' 73 | //' @return A \code{dgCMatrix} sparse matrix corresponding to the design matrix 74 | //' for a zero-th order highly adaptive lasso, but with all duplicated columns 75 | //' (basis functions) removed. 76 | // [[Rcpp::export]] 77 | SpMat apply_copy_map(const MSpMat X, const List& copy_map) { 78 | int n = X.rows(); 79 | int basis_p = copy_map.size(); 80 | 81 | SpMat x_unique(n, basis_p); 82 | x_unique.reserve(0.5 * n * basis_p); 83 | 84 | for(int j=0; j 5 | using namespace Rcpp; 6 | typedef Eigen::SparseMatrix SpMat; 7 | typedef Eigen::SparseMatrix IntSpMat; 8 | typedef Eigen::Map MSpMat; 9 | typedef MSpMat::InnerIterator MInIterMat; 10 | 11 | typedef SpMat::InnerIterator InIterMat; 12 | typedef SpMat::InnerVectorReturnType InVec; 13 | typedef Eigen::SparseVector SpVec; 14 | typedef SpVec::InnerIterator InIterVec; 15 | 16 | struct cmpMatrixRow { 17 | bool operator()(const NumericVector& a, const NumericVector& b) const { 18 | 19 | int i = 0; 20 | 21 | int smaller_length = a.size(); 22 | if (b.size() BasisMap; 40 | 41 | typedef std::pair MSpMatCol; 42 | 43 | struct cmpCol { 44 | bool operator()(const MSpMatCol& a, const MSpMatCol& b) const { 45 | //returns true if a is strictly less than b 46 | const MSpMat& X = a.first; 47 | int col_a = a.second; 48 | int col_b = b.second; 49 | 50 | MInIterMat iter_b(X, col_b); 51 | for (MInIterMat iter_a(X, col_a); iter_a; ++iter_a,++iter_b) { 52 | if (!iter_b) { 53 | //we've matched the entirety of b to a, but there's still more... 54 | //...elements in a, so it comes after 55 | //iter_b is shorter 56 | return(false); 57 | } 58 | int index_a = iter_a.index(); 59 | int index_b = iter_b.index(); 60 | 61 | // Rcout << index_a << " " << index_b << std::endl; 62 | if (index_a == index_b) { 63 | //skip anything at the beginning that matches 64 | continue; 65 | } else { 66 | //once there's a mismatch, determine sort order 67 | //if iter_a has a lower index, it comes later in the sort order 68 | return(index_a > index_b); 69 | } 70 | } 71 | //we've matched the entirety of a to b 72 | //if there are more elements in b, it comes after, otherwise they're a match 73 | return(iter_b); 74 | } 75 | }; 76 | 77 | typedef std::map ColMap; 78 | 79 | #endif //HAL9001_TYPES_H 80 | -------------------------------------------------------------------------------- /src/utils.cpp: -------------------------------------------------------------------------------- 1 | // [[Rcpp::depends(RcppEigen)]] 2 | #include 3 | #include 4 | #include "hal9001_types.h" 5 | using namespace Rcpp; 6 | //------------------------------------------------------------------------------ 7 | 8 | //' Fast Coercion to Sparse Matrix 9 | //' 10 | //' Fast and efficient coercion of standard matrix objects to sparse matrices. 11 | //' Borrowed from http://gallery.rcpp.org/articles/sparse-matrix-coercion/. 12 | //' INTERNAL USE ONLY. 13 | //' 14 | //' @param XX_ An object of class \code{Matrix} that has a sparse structure 15 | //' suitable for coercion to a sparse matrix format of \code{dgCMatrix}. 16 | //' 17 | //' @return An object of class \code{dgCMatrix}, coerced from input \code{XX_}. 18 | //' 19 | // [[Rcpp::export]] 20 | SEXP as_dgCMatrix( SEXP XX_ ) { 21 | typedef Eigen::SparseMatrix SpMat; 22 | typedef Eigen::Map MapMatd; // Input: must be double 23 | MapMatd X(Rcpp::as(XX_)); 24 | SpMat Xsparse = X.sparseView(); // Output: sparse matrix 25 | S4 Xout(wrap(Xsparse)); // Output: as S4 object 26 | NumericMatrix Xin(XX_); // Copy dimnames 27 | Xout.slot("Dimnames") = clone(List(Xin.attr("dimnames"))); 28 | return(Xout); 29 | } 30 | 31 | //------------------------------------------------------------------------------ 32 | 33 | 34 | // Find Nonzero Entries 35 | IntegerVector non_zeros(const MSpMat& X) { 36 | int p = X.cols(); 37 | int j; 38 | int nz; 39 | 40 | IntegerVector non_zeros(p); 41 | 42 | for (j = 0; j < p; ++j) { 43 | nz = 0; 44 | for (MInIterMat i_(X, j); i_; ++i_) { 45 | nz++; 46 | } 47 | non_zeros[j] = nz; 48 | } 49 | return(non_zeros); 50 | } 51 | 52 | //------------------------------------------------------------------------------ 53 | 54 | //' Calculate Proportion of Nonzero Entries 55 | //' 56 | //' @keywords internal 57 | //' 58 | // [[Rcpp::export]] 59 | NumericVector calc_pnz(const MSpMat& X) { 60 | IntegerVector nz = non_zeros(X); 61 | int n = X.rows(); 62 | NumericVector pnz = as(nz)/n; 63 | 64 | return(pnz); 65 | } 66 | 67 | //------------------------------------------------------------------------------ 68 | 69 | // Safer Square Root 70 | NumericVector not_dumb_sqrt(const NumericVector& x){ 71 | NumericVector res(x.length()); 72 | for(int i=0; i lambda) { 113 | beta -= lambda; 114 | } else if (beta < -1 * lambda) { 115 | beta += lambda; 116 | } else { 117 | beta = 0; 118 | } 119 | return(beta); 120 | } 121 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | // [[Rcpp::depends(RcppEigen)]] 2 | 3 | #include 4 | #include "hal9001_types.h" 5 | using namespace Rcpp; 6 | 7 | NumericVector get_pnz(const MSpMat& X); 8 | NumericVector get_xscale(const MSpMat& X, const NumericVector& xcenter); 9 | NumericVector calc_pnz(const MSpMat& X); 10 | NumericVector calc_xscale(const MSpMat& X, const NumericVector& xcenter); 11 | bool equal_double(double x, double y); 12 | double soft_max(double beta, double lambda); -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(methods) 3 | library(data.table) 4 | library(microbenchmark) 5 | library(glmnet) 6 | library(SuperLearner) 7 | library(hal9001) 8 | 9 | test_check("hal9001") 10 | -------------------------------------------------------------------------------- /tests/testthat/test-basis.R: -------------------------------------------------------------------------------- 1 | context("Unit test for procedures relating to basis functions.") 2 | # library(microbenchmark) 3 | 4 | 5 | # Rcpp::compileAttributes() load_all() 6 | 7 | compare_basis <- function(ab1, ab2) { 8 | basis_str1 <- apply(ab1, 2, paste, collapse = "") 9 | basis_str2 <- apply(ab1, 2, paste, collapse = "") 10 | all(basis_str1 %in% basis_str2) && all(basis_str2 %in% basis_str1) 11 | } 12 | 13 | if ("hal" %in% installed.packages()) { 14 | basis_test <- function(x) { 15 | basis_list <- enumerate_basis(x) 16 | x_basis <- make_design_matrix(x, basis_list) 17 | x_basis_hal <- hal:::makeSparseMat(x) 18 | expect_true(compare_basis(x_basis, x_basis_hal)) 19 | } 20 | 21 | basis_timing <- function(x) { 22 | basis_list <- enumerate_basis(x) 23 | microbenchmark( 24 | { 25 | hal:::makeSparseMat(x) 26 | }, 27 | { 28 | basis_list <- enumerate_basis(x) 29 | }, 30 | { 31 | x_basis <- make_design_matrix(x, basis_list) 32 | }, 33 | times = 1 34 | ) 35 | } 36 | 37 | n <- 100 38 | p <- 10 39 | x_mat_1 <- matrix(rnorm(n * p), n, p) 40 | basis_test(x_mat_1) 41 | 42 | # basis_timing(x_mat_1) 43 | 44 | # n <- 1000 45 | # p <- 3 46 | # x_mat_2 <- matrix(rnorm(n * p), n, p) 47 | # basis_test(x_mat_2) 48 | # basis_timing(x_mat_2) 49 | 50 | # x_mat_3 <- matrix(rbinom(n * p, 1, 0.5), n, p) 51 | # basis_test(x_mat_3) 52 | # basis_timing(x_mat_3) 53 | } 54 | -------------------------------------------------------------------------------- /tests/testthat/test-cv_lasso.R: -------------------------------------------------------------------------------- 1 | context("Unit test for the generic cross-validated LASSO estimation procedure.") 2 | # library(glmnet) 3 | library(origami) 4 | set.seed(749125) 5 | 6 | ################################################################################ 7 | ## SIMULATION SETUP 8 | ################################################################################ 9 | 10 | # number of CV folds 11 | n_folds <- 3 12 | 13 | # generate simple test data 14 | n <- 100 15 | p <- 3 16 | x <- xmat <- matrix(rnorm(n * p), n, p) 17 | y <- x[, 1] + rnorm(n, mean = 0, sd = 1) 18 | 19 | test_n <- 1e4 20 | test_x <- matrix(rnorm(test_n * p), test_n, p) 21 | test_y <- test_x[, 1] + rnorm(test_n, mean = 0, sd = 0.5) 22 | 23 | # fit design matrix for HAL 24 | basis_list <- hal9001:::enumerate_basis(x) 25 | x_basis <- hal9001:::make_design_matrix(x, basis_list) 26 | 27 | # catalog and eliminate duplicates 28 | copy_map <- hal9001:::make_copy_map(x_basis) 29 | unique_columns <- as.numeric(names(copy_map)) 30 | x_basis <- x_basis[, unique_columns] 31 | 32 | ################################################################################ 33 | # cv.glmnet reference 34 | ################################################################################ 35 | 36 | # create fold ID object for using the same folds between cv.glmnet and origami 37 | folds <- make_folds(n) 38 | fold_id <- origami:::folds2foldvec(folds) 39 | 40 | # just use the standard implementation available in glmnet 41 | lasso_glmnet <- glmnet::cv.glmnet( 42 | x = x_basis, y = y, nfolds = n_folds, 43 | foldid = fold_id 44 | ) 45 | lambda_minmse_cvglmnet <- lasso_glmnet$lambda.min 46 | lambda_1se_cvglmnet <- lasso_glmnet$lambda.1se 47 | coef_minmse_cvglmnet <- as.numeric(coef(lasso_glmnet, "lambda.min")) 48 | coef_1se_cvglmnet <- as.numeric(coef(lasso_glmnet, "lambda.1se")) 49 | betas_cvglmnet <- cbind(coef_1se_cvglmnet, coef_minmse_cvglmnet) 50 | -------------------------------------------------------------------------------- /tests/testthat/test-dedupe.R: -------------------------------------------------------------------------------- 1 | context("Unit test for removing duplicate columns of indicator functions.") 2 | 3 | # generate design matrix for HAL 4 | n <- 100 5 | p <- 3 6 | x <- matrix(rnorm(n * p), n, p) 7 | basis_list <- enumerate_basis(x) 8 | x_basis <- make_design_matrix(x, basis_list) 9 | copy_map <- make_copy_map(x_basis) 10 | 11 | compare_basis <- function(ab1, ab2) { 12 | basis_str1 <- apply(ab1, 2, paste, collapse = "") 13 | basis_str2 <- apply(ab1, 2, paste, collapse = "") 14 | all(basis_str1 %in% basis_str2) && all(basis_str2 %in% basis_str1) 15 | } 16 | 17 | unique_columns <- as.numeric(names(copy_map)) 18 | x_basis_uniq <- x_basis[, unique_columns] 19 | test_that("Information preserved after reduction to unique basis functions", { 20 | expect_true(compare_basis(x_basis, x_basis_uniq)) 21 | }) 22 | 23 | # now that we've removed duplicates, the copy map should be all length 1 24 | new_copy_map <- make_copy_map(x_basis_uniq) 25 | largest_group <- max(sapply(new_copy_map, length)) 26 | test_that("Copy map simple after reduction", { 27 | expect_equal(largest_group, 1) 28 | }) 29 | 30 | x_basis_uniq2 <- apply_copy_map(x_basis, copy_map) 31 | test_that("apply_copy_map matches unique columns for original data", { 32 | expect_equivalent(x_basis_uniq, x_basis_uniq2) 33 | }) 34 | 35 | # test for or_duplicate_columns 36 | mat <- Matrix::sparseMatrix( 37 | i = c(1, 2), j = c(2, 5), x = c(1, 1), 38 | dims = c(2, 5) 39 | ) 40 | copy_map <- list(c(3, 3), c(1, 2)) 41 | reduced <- apply_copy_map(mat, copy_map) 42 | 43 | copy_group <- copy_map[[1]] 44 | simple <- sapply( 45 | copy_map, 46 | function(copy_group) apply(mat[, copy_group], 1, max) 47 | ) 48 | 49 | test_that("apply_copy_map results in correct dimenson for output", { 50 | expect_equal(dim(simple), dim(reduced)) 51 | }) 52 | 53 | max_diff <- max(abs(simple - reduced)) 54 | test_that("apply_copy_map matches a simple R implementation", { 55 | expect_equal(max_diff, 0) 56 | }) 57 | -------------------------------------------------------------------------------- /tests/testthat/test-formula.R: -------------------------------------------------------------------------------- 1 | context("check formula function") 2 | 3 | 4 | n <- 500 5 | p <- 3 6 | X <- xmat <- matrix(rnorm(n * p), n, p) 7 | colnames(X) <- c("X1", "X2", "X3") 8 | 9 | 10 | 11 | test_that("Check formula", { 12 | smoothness_orders <- 1 13 | num_knots <- 3 14 | expect_true(length(h(X1)$basis_list) == num_knots) 15 | expect_true(h(X1)$basis_list[[1]]$orders == 1) 16 | expect_true(all(h(X1)$penalty.factors == 1)) 17 | out <- h(X1, pf = 0) 18 | expect_true(all(out$penalty.factors == 0)) 19 | out <- h(X1, X2, k = 5) 20 | 21 | expect_true(length(out$basis_list) == 25) 22 | out <- h(X1, X2, k = 5, monotone = "i") 23 | expect_true(all(out$lower.limits == 0)) 24 | expect_true(length((h(X1) + h(X2))$basis_list) == 6) 25 | formula <- ~ h(X1) + h(X2) 26 | expect_true(length(setdiff(formula_hal(formula)$basis_list, (h(X1) + h(X2))$basis_list)) == 0) 27 | formula <- "~ h(X1) + h(X2)" 28 | expect_true(length(setdiff(formula_hal(formula)$basis_list, (h(X1) + h(X2))$basis_list)) == 0) 29 | expect_true(length(formula_hal(formula, num_knots = 3)$basis_list) == length(formula_hal(formula)$basis_list)) 30 | expect_true(length(formula_hal(formula, num_knots = 10)$basis_list) != length(formula_hal(formula)$basis_list)) 31 | formula <- h(., k = 2)$basis_list 32 | expect_true(length(formula[[1]]$cols) == 1) 33 | formula <- h(., ., k = 2)$basis_list 34 | expect_true(length(formula[[1]]$cols) == 2) 35 | }) 36 | 37 | 38 | 39 | 40 | # 41 | # n <- 500 42 | # p <- 3 43 | # X <- xmat <- matrix(rnorm(n * p), n, p) 44 | # colnames(X) <- c("X1", "X2", "X3") 45 | # smoothness_orders <- 1 46 | # num_knots <- 1 47 | # length(h(W1)$basis_list) 48 | # 49 | # 50 | # 51 | # test_that("Check formula", { 52 | # formula <- formula_hal("Y ~ h(X1) + h(X2) + h(X3)", x, num_knots = 5) 53 | # expect_true(length(formula$basis_list) == p * 5) 54 | # }) 55 | # 56 | # test_that("Check formula", { 57 | # formula <- formula_hal("~ .", x, num_knots = 5) 58 | # expect_true(length(formula$basis_list) == p * 5) 59 | # }) 60 | # 61 | # 62 | # formula <- formula_hal("Y ~ h(X1) + h(X2) + h(X3) + h(X1,X2) + h(X2,X3) + h(X1,X3)", x, num_knots = c(5, 5)) 63 | # blist1 <- formula$basis_list 64 | # formula <- formula_hal("Y ~ .^2", x, num_knots = 5) 65 | # blist2 <- formula$basis_list 66 | # formula <- formula_hal("Y ~ h(.) + h(.,.)", x, num_knots = 5) 67 | # blist3 <- formula$basis_list 68 | # formula <- formula_hal("Y ~ h(X1) + h(X2) +h(X1) + h(X3) + h(X1,X2) + h(X2,X3) + h(X1,X3) +.^2 +.", x, num_knots = 5) 69 | # blist4 <- formula$basis_list 70 | # 71 | # formula <- formula_hal("Y ~ h(a) + h(a,b) + h(a,a)", x, num_knots = 5, custom_group = list("a" = c("X1", "X2", "X3"), "b" = c("X1", "X2", "X3"))) 72 | # blist5 <- formula$basis_list 73 | # 74 | # test_that("Check formula", { 75 | # expect_true(length(blist1) == length(blist2) && length(setdiff(blist1, blist2)) == 0) 76 | # expect_true(length(blist1) == length(blist3) && length(setdiff(blist1, blist3)) == 0) 77 | # expect_true(length(blist1) == length(blist4) && length(setdiff(blist1, blist4)) == 0) 78 | # expect_true(length(blist1) == length(blist5) && length(setdiff(blist1, blist5)) == 0) 79 | # }) 80 | # 81 | # 82 | # 83 | # formula <- formula_hal("Y ~ i(.) + i(.,.)", x, num_knots = 3) 84 | # upper <- formula$upper.limits 85 | # lower <- formula$lower.limits 86 | # 87 | # 88 | # test_that("Check formula", { 89 | # expect_true(all(upper == Inf) && all(lower == 0)) 90 | # }) 91 | # 92 | # formula <- formula_hal("Y ~ h(.) + h(.,.)", x, num_knots = 3) 93 | # upper <- formula$upper.limits 94 | # lower <- formula$lower.limits 95 | # 96 | # 97 | # test_that("Check formula", { 98 | # expect_true(all(upper == Inf) && all(lower == -Inf)) 99 | # }) 100 | # formula <- formula_hal("Y ~ d(.) + d(.,.)", x, num_knots = 3) 101 | # upper <- formula$upper.limits 102 | # lower <- formula$lower.limits 103 | # 104 | # test_that("Check formula", { 105 | # expect_true(all(upper == 0) && all(lower == -Inf)) 106 | # }) 107 | -------------------------------------------------------------------------------- /tests/testthat/test-general_families.R: -------------------------------------------------------------------------------- 1 | context("HAL with general familes.") 2 | set.seed(45791) 3 | 4 | # easily compute MSE 5 | mse <- function(preds, y) { 6 | mean((preds - y)^2) 7 | } 8 | 9 | # generate simple test data 10 | n <- 100 11 | p <- 3 12 | x <- xmat <- matrix(rnorm(n * p), n, p) 13 | y_prob <- plogis(3 * sin(x[, 1]) + sin(x[, 2])) 14 | y <- rbinom(n = n, size = 1, prob = y_prob) 15 | 16 | test_n <- 100 17 | test_x <- matrix(rnorm(test_n * p), test_n, p) 18 | test_y_prob <- plogis(3 * sin(test_x[, 1]) + sin(test_x[, 2])) 19 | test_y <- rbinom(n = test_n, size = 1, prob = y_prob) 20 | fit_control <- list(prediction_bounds = c(0.01, 0.99)) 21 | # ml implementation 22 | ml_hal_fit <- suppressWarnings( 23 | fit_hal(X = x, Y = y, family = "binomial", fit_control = fit_control) 24 | ) 25 | ml_hal_fit$times 26 | x_basis <- make_design_matrix(x, ml_hal_fit$basis_list) 27 | 28 | # training sample prediction 29 | preds <- predict(ml_hal_fit, new_data = x) 30 | ml_hal_mse1 <- mse(preds, y_prob) 31 | set.seed(45791) 32 | 33 | ml_hal_fit <- suppressWarnings( 34 | fit_hal(X = x, Y = y, family = binomial(), fit_control = fit_control) 35 | ) 36 | ml_hal_fit$times 37 | x_basis <- make_design_matrix(x, ml_hal_fit$basis_list) 38 | 39 | # training sample prediction 40 | preds <- predict(ml_hal_fit, new_data = x) 41 | ml_hal_mse2 <- mse(preds, y_prob) 42 | 43 | test_that("MSE for logistic regression close to logistic family object pred", { 44 | expect_true(abs(ml_hal_mse1 - ml_hal_mse2) < 0.01) 45 | }) 46 | 47 | # ml implementation 48 | ml_hal_fit <- suppressWarnings(fit_hal(X = x, Y = y, family = "poisson")) 49 | ml_hal_fit$times 50 | x_basis <- make_design_matrix(x, ml_hal_fit$basis_list) 51 | 52 | # training sample prediction 53 | preds <- predict(ml_hal_fit, new_data = x) 54 | ml_hal_mse1 <- mse(preds, y_prob) 55 | set.seed(45791) 56 | 57 | ml_hal_fit <- suppressWarnings(fit_hal(X = x, Y = y, family = poisson())) 58 | ml_hal_fit$times 59 | x_basis <- make_design_matrix(x, ml_hal_fit$basis_list) 60 | 61 | # training sample prediction 62 | preds <- predict(ml_hal_fit, new_data = x) 63 | ml_hal_mse2 <- mse(preds, y_prob) 64 | 65 | test_that("MSE for logistic regression close to logistic family object pred", { 66 | expect_true(abs(ml_hal_mse1 - ml_hal_mse2) < 0.01) 67 | }) 68 | 69 | test_that("Error when prediction_bounds is incorrectly formatted", { 70 | fit_control <- list(prediction_bounds = 9) 71 | expect_error(fit_hal(X = x, Y = y, fit_control = fit_control)) 72 | }) 73 | 74 | test_that("Message when standardize set to TRUE", { 75 | fit_control <- list(standardize = TRUE) 76 | expect_message(fit_hal(X = x, Y = y, fit_control = fit_control)) 77 | }) 78 | 79 | test_that("Warning when reduce_basis without zero-order smoothness", { 80 | expect_warning(fit_hal(X = x, Y = y, reduce_basis = 0.95)) 81 | }) 82 | -------------------------------------------------------------------------------- /tests/testthat/test-hal_binomial.R: -------------------------------------------------------------------------------- 1 | context("HAL with binary outcomes: regularized logistic regression.") 2 | set.seed(45791) 3 | 4 | # easily compute MSE 5 | mse <- function(preds, y) { 6 | mean((preds - y)^2) 7 | } 8 | 9 | 10 | # generate simple test data 11 | n <- 100 12 | p <- 3 13 | x <- xmat <- matrix(rnorm(n * p), n, p) 14 | y_prob <- plogis(3 * sin(x[, 1]) + sin(x[, 2])) 15 | y <- rbinom(n = n, size = 1, prob = y_prob) 16 | 17 | test_n <- 100 18 | test_x <- matrix(rnorm(test_n * p), test_n, p) 19 | test_y_prob <- plogis(3 * sin(test_x[, 1]) + sin(test_x[, 2])) 20 | test_y <- rbinom(n = test_n, size = 1, prob = y_prob) 21 | 22 | # ml implementation 23 | ml_hal_fit <- fit_hal(X = x, Y = y, family = "binomial", yolo = FALSE) 24 | ml_hal_fit$times 25 | x_basis <- make_design_matrix(x, ml_hal_fit$basis_list) 26 | 27 | # training sample prediction 28 | preds <- predict(ml_hal_fit, new_data = x) 29 | ml_hal_mse <- mse(preds, y_prob) 30 | 31 | test_that("MSE for logistic regression results is less than for null model", { 32 | expect_lt(ml_hal_mse, mse(rep(mean(y), n), y_prob)) 33 | }) 34 | 35 | # out-of-bag prediction 36 | oob_preds <- predict(ml_hal_fit, new_data = test_x) 37 | oob_ml_hal_mse <- mse(oob_preds, y = test_y_prob) 38 | 39 | test_that("MSE for logistic regression on test set is less than for nulll", { 40 | expect_lt(oob_ml_hal_mse, mse(rep(mean(y), test_n), test_y_prob)) 41 | }) 42 | 43 | test_that("Prediction bounds respected when numeric vector supplied", { 44 | ml_hal_fit <- fit_hal( 45 | X = x, Y = y, family = "binomial", 46 | fit_control = list(prediction_bounds = c(0.4, 0.7)) 47 | ) 48 | preds <- predict(ml_hal_fit, new_data = x) 49 | expect_true(min(preds) >= 0.4) 50 | expect_true(max(preds) <= 0.7) 51 | }) 52 | 53 | test_that("Check of prediction_bounds formatting errors", { 54 | kitty_fit_control <- list(prediction_bounds = 9) 55 | expect_error( 56 | fit_hal(X = x, Y = y, family = "binomial", fit_control = kitty_fit_control) 57 | ) 58 | }) 59 | 60 | test_that("Check of fit_control formatting errors", { 61 | kitty_fit_control <- list("kitty" = TRUE) 62 | expect_warning( 63 | fit_hal(X = x, Y = y, family = "binomial", fit_control = kitty_fit_control) 64 | ) 65 | }) 66 | -------------------------------------------------------------------------------- /tests/testthat/test-hal_comparison.R: -------------------------------------------------------------------------------- 1 | context("Unit test for the HAL estimation procedure.") 2 | set.seed(45791) 3 | 4 | # easily compute MSE 5 | mse <- function(preds, y) { 6 | mean((preds - y)^2) 7 | } 8 | 9 | # generate simple test data 10 | n <- 100 11 | p <- 3 12 | x <- xmat <- matrix(rnorm(n * p), n, p) 13 | y <- sin(x[, 1]) * sin(x[, 2]) + rnorm(n, mean = 0, sd = 0.2) 14 | 15 | test_n <- 100 16 | test_x <- matrix(rnorm(test_n * p), test_n, p) 17 | test_y <- sin(test_x[, 1]) * sin(test_x[, 2]) + rnorm( 18 | test_n, 19 | mean = 0, 20 | sd = 0.2 21 | ) 22 | 23 | # original implementation 24 | if ("hal" %in% installed.packages()) { 25 | classic_hal_fit <- hal::hal(Y = y, X = x, verbose = FALSE) 26 | classic_hal_fit$times 27 | } 28 | 29 | # hal9001 implementation 30 | hal_fit <- fit_hal(X = x, Y = y, yolo = FALSE) 31 | hal_fit$times 32 | 33 | # training sample prediction 34 | preds <- predict(hal_fit, new_data = x) 35 | hal_mse <- mse(preds, y) 36 | 37 | # out-of-bag prediction 38 | oob_preds <- predict(hal_fit, new_data = test_x) 39 | oob_ml_hal_mse <- mse(oob_preds, y = test_y) 40 | 41 | # squash object 42 | squashed <- squash_hal_fit(hal_fit) 43 | test_that("Squashed HAL objects are smaller than before squashing", { 44 | expect_lt(object.size(squashed), object.size(hal_fit)) 45 | }) 46 | 47 | # verify squashing does not impact prediction on original data 48 | sq_preds <- predict(hal_fit, new_data = x) 49 | test_that("Sqashing HAL objects does not impact prediction (in sample)", { 50 | expect_equal(preds, sq_preds) 51 | }) 52 | 53 | # verify squashing does not impact prediction on test data 54 | sq_oob_preds <- predict(hal_fit, new_data = test_x) 55 | test_that("Sqashing HAL objects does not impact prediction (out of sample)", { 56 | expect_equal(oob_preds, sq_oob_preds) 57 | }) 58 | -------------------------------------------------------------------------------- /tests/testthat/test-hal_hazards.R: -------------------------------------------------------------------------------- 1 | # NOTE: https://stats.stackexchange.com/questions/46532/cox-baseline-hazard 2 | context("Hazard estimation with HAL based on penalized Cox.") 3 | set.seed(74296) 4 | library(glmnet) 5 | library(survival) 6 | 7 | # create survival data structures 8 | data(cancer, package = "survival") 9 | y_surv <- Surv(kidney$time, kidney$status) 10 | x_surv <- kidney[, c("age", "sex", "disease", "frail")] 11 | x_surv$disease <- as.numeric(x_surv$disease) 12 | x_surv <- as.matrix(x_surv) 13 | 14 | # fit Cox model for baseline hazard (actually gives cumulative baseline hazard) 15 | cph <- coxph(Surv(time, status) ~ age + sex + disease + frail, kidney, 16 | method = "breslow" 17 | ) 18 | lambda0_cum <- basehaz(cph, centered = FALSE) 19 | 20 | # convert from cumulative baseline hazard to standard baseline hazard 21 | haz <- exp(diff(lambda0_cum[, 1]) * diff(lambda0_cum[, 2])) 22 | lambda0 <- rep(c(lambda0_cum$hazard[1], diff(lambda0_cum$hazard)), 23 | times = table(sort(kidney$time)) 24 | ) 25 | 26 | # fit CV-lasso with Cox penalty and predict 27 | cv_coxnet <- cv.glmnet(x = x_surv, y = y_surv, family = "cox") 28 | coxnet_pred <- as.numeric(predict(cv_coxnet, x_surv, type = "response")) 29 | 30 | # try with hal9001 instead of glmnet 31 | cv_halcox <- suppressWarnings(fit_hal( 32 | X = x_surv, Y = y_surv, family = "cox", yolo = FALSE 33 | )) 34 | halcox_pred <- predict(cv_halcox, new_data = x_surv) 35 | 36 | 37 | 38 | # fit lasso with Cox penalty over a grid of lambda and predict 39 | nocv_coxnet <- glmnet(x = x_surv, y = y_surv, family = "cox", nlambda = 200) 40 | nocv_coxnet_pred <- as.matrix(predict(nocv_coxnet, x_surv, type = "response")) 41 | 42 | # fit HAL with Cox penalty over a grid of lambda and predict 43 | nocv_halcox <- suppressWarnings(fit_hal( 44 | X = x_surv, Y = y_surv, family = "cox", 45 | fit_control = list(cv_select = FALSE, nlambda = 200), 46 | yolo = FALSE 47 | )) 48 | nocv_halcox_pred <- predict(nocv_halcox, new_data = x_surv) 49 | -------------------------------------------------------------------------------- /tests/testthat/test-hal_multivariate.R: -------------------------------------------------------------------------------- 1 | context("Multivariate outcome prediction with HAL") 2 | 3 | library(glmnet) 4 | data(MultiGaussianExample) 5 | 6 | # get hal fit 7 | set.seed(74296) 8 | hal_fit <- fit_hal( 9 | X = MultiGaussianExample$x, Y = MultiGaussianExample$y, family = "mgaussian", 10 | return_x_basis = TRUE 11 | ) 12 | hal_summary <- summary(hal_fit) 13 | 14 | test_that("HAL and glmnet predictions match for multivariate outcome", { 15 | # get hal preds 16 | hal_pred <- predict(hal_fit, new_data = MultiGaussianExample$x) 17 | # get glmnet preds 18 | set.seed(74296) 19 | glmnet_fit <- cv.glmnet( 20 | x = hal_fit$x_basis, y = MultiGaussianExample$y, 21 | family = "mgaussian", standardize = FALSE, 22 | lambda.min.ratio = 1e-4 23 | ) 24 | glmnet_pred <- predict(glmnet_fit, hal_fit$x_basis, s = hal_fit$lambda_star)[, , 1] 25 | # test equivalence 26 | colnames(glmnet_pred) <- colnames(hal_pred) 27 | expect_equivalent(glmnet_pred, hal_pred) 28 | }) 29 | 30 | test_that("HAL summarizes coefs for each multivariate outcome prediction", { 31 | expect_equal(ncol(MultiGaussianExample$y), length(hal_summary)) 32 | }) 33 | 34 | test_that("HAL summarizes coefs appropriately for multivariate outcome", { 35 | # this checks intercept matches 36 | lapply(seq_along(hal_summary), function(i) { 37 | expect_equal(hal_fit$coefs[[i]][1, ], as.numeric(hal_summary[[i]]$table[1, 1])) 38 | }) 39 | }) 40 | 41 | test_that("Error when prediction_bounds is incorrectly formatted", { 42 | fit_control <- list(prediction_bounds = 9) 43 | expect_error(fit_hal( 44 | X = MultiGaussianExample$x, Y = MultiGaussianExample$y, 45 | family = "mgaussian", fit_control = fit_control 46 | )) 47 | }) 48 | 49 | test_that("HAL summary for multivariate outcome predictions prints", { 50 | hal_summary2 <- summary(hal_fit, only_nonzero_coefs = FALSE) 51 | expect_output(print(hal_summary, length = 2)) 52 | expect_output(print(hal_summary)) 53 | expect_output(print(hal_summary2, length = 2)) 54 | expect_output(print(hal_summary2)) 55 | }) 56 | -------------------------------------------------------------------------------- /tests/testthat/test-hal_nocv.R: -------------------------------------------------------------------------------- 1 | context("HAL without CV-selection of regularization parameter.") 2 | set.seed(45791) 3 | 4 | # generate simple test data 5 | n_obs <- 100 6 | p_dim <- 3 7 | x <- matrix(rnorm(n_obs * p_dim), n_obs, p_dim) 8 | y_prob <- plogis(3 * sin(x[, 1]) + sin(x[, 2])) 9 | y <- rbinom(n = n_obs, size = 1, prob = y_prob) 10 | 11 | # HAL without 12 | hal_fit_nocv <- fit_hal( 13 | X = x, Y = y, 14 | family = "binomial", 15 | fit_control = list(cv_select = FALSE) 16 | ) 17 | 18 | # training sample prediction 19 | n_lambda <- length(hal_fit_nocv$lambda_star) 20 | preds <- predict(hal_fit_nocv, new_data = x) 21 | 22 | test_that("Predictions are the right shape when no CV-selection performed", { 23 | # are the predictions a matrix? 24 | expect_true(is.matrix(preds)) 25 | 26 | # are the predictions the right shape? 27 | expect_equal(nrow(preds), n_obs) 28 | expect_equal(ncol(preds), n_lambda) 29 | }) 30 | -------------------------------------------------------------------------------- /tests/testthat/test-higher_order_smoothness.R: -------------------------------------------------------------------------------- 1 | context("Higher order smoothness HAL") 2 | library(hal9001) 3 | set.seed(1234) 4 | n <- 100 5 | p <- 3 6 | x <- xmat <- matrix(rnorm(n * p), n, p) 7 | y <- sin(x[, 1]) + rnorm(n, mean = 0, sd = 0.2) 8 | 9 | test_n <- 500 10 | test_x <- matrix(rnorm(test_n * p), test_n, p) 11 | test_y <- sin(test_x[, 1]) #* sin(test_x[, 2]) 12 | # + rnorm( 13 | # test_n, 14 | # mean = 0, 15 | # sd = 0.2 16 | # ) 17 | 18 | fit0 <- fit_hal(x, y, max_degree = 1, smoothness_orders = 0, num_knots = 5) 19 | fit1 <- fit_hal(x, y, max_degree = 1, smoothness_orders = 1, num_knots = 5) 20 | fit2 <- fit_hal(x, y, max_degree = 1, smoothness_orders = 2, num_knots = 5) 21 | 22 | # visual check 23 | plot(predict(fit0, new_data = test_x), test_y) 24 | plot(predict(fit1, new_data = test_x), test_y) 25 | plot(predict(fit2, new_data = test_x), test_y) 26 | 27 | # MSE 28 | mse0 <- mean((predict(fit0, new_data = test_x) - test_y)^2) 29 | mse1 <- mean((predict(fit1, new_data = test_x) - test_y)^2) 30 | mse2 <- mean((predict(fit2, new_data = test_x) - test_y)^2) 31 | 32 | # these tests might fail at random??? 33 | test_that("0th-order HAL has worse MSE than 1st-order w/ fewer knot points", { 34 | expect_true(mse0 >= mse1) 35 | }) 36 | 37 | test_that("1st-order HAL has worse MSE than 2nd-order w/ fewer knot points", { 38 | expect_true(mse1 >= mse2) 39 | }) 40 | -------------------------------------------------------------------------------- /tests/testthat/test-lasso.R: -------------------------------------------------------------------------------- 1 | context("Unit test for the generic LASSO estimation procedure.") 2 | library(glmnet) 3 | library(methods) 4 | set.seed(749125) 5 | 6 | # generate simple test data 7 | n <- 100 8 | p <- 3 9 | x <- xmat <- matrix(rnorm(n * p), n, p) 10 | y <- sin(x[, 1]) * sin(x[, 2]) + rnorm(n, 0, 0.2) 11 | 12 | test_n <- 100 13 | test_x <- matrix(rnorm(test_n * p), test_n, p) 14 | test_y <- sin(test_x[, 1]) * sin(test_x[, 2]) + rnorm(test_n, 0, 0.2) 15 | 16 | system.time({ 17 | # generate design matrix for HAL 18 | basis_list <- hal9001:::enumerate_basis(x) 19 | x_basis <- hal9001:::make_design_matrix(x, basis_list) 20 | time_design_matrix <- proc.time() 21 | }) 22 | 23 | system.time({ 24 | # catalog and eliminate duplicates 25 | copy_map <- hal9001:::make_copy_map(x_basis) 26 | unique_columns <- as.numeric(names(copy_map)) 27 | x_basis <- x_basis[, unique_columns] 28 | }) 29 | 30 | ################################################# 31 | # use glmnet fit as reference 32 | system.time({ 33 | glmnet_fit <- glmnet::glmnet( 34 | x = x_basis, y = y, intercept = TRUE, 35 | nlambda = 100, lambda.min.ratio = 0.01, family = "gaussian", 36 | alpha = 1, standardize.response = FALSE, standardize = TRUE 37 | ) 38 | }) 39 | 40 | ################################################################################ 41 | # PREDICTION 42 | ################################################################################ 43 | # format test data set 44 | new_data <- as.matrix(test_x) 45 | pred_x_basis <- hal9001:::make_design_matrix(new_data, basis_list) 46 | pred_x_basis <- hal9001:::apply_copy_map(pred_x_basis, copy_map) 47 | gpred_mat <- predict(glmnet_fit, pred_x_basis) 48 | gmses <- apply(gpred_mat, 2, function(preds) { 49 | mean((preds - test_y)^2) 50 | }) 51 | -------------------------------------------------------------------------------- /tests/testthat/test-make_basis_additional_args.R: -------------------------------------------------------------------------------- 1 | context("Make basis additional args: num_knots, smoothness_orders, include_...") 2 | n <- 100 3 | p <- 3 4 | x <- xmat <- matrix(rnorm(n * p), n, p) 5 | y <- sin(x[, 1]) * sin(x[, 2]) + rnorm(n, mean = 0, sd = 0.2) 6 | 7 | test_n <- 100 8 | test_x <- matrix(rnorm(test_n * p), test_n, p) 9 | test_y <- sin(test_x[, 1]) * sin(test_x[, 2]) 10 | 11 | basis_list1 <- enumerate_basis(x, max_degree = 1, smoothness_orders = rep(1, ncol(x)), num_knots = c(5)) 12 | basis_list2 <- enumerate_basis(x, max_degree = 1, smoothness_orders = rep(1, ncol(x)), num_knots = c(10)) 13 | 14 | test_that("Argument num_knots reduces number of basis function as expected", { 15 | expect_equal(length(basis_list1), 5 * p) 16 | expect_equal(length(basis_list2), 10 * p) 17 | }) 18 | 19 | basis_list <- enumerate_basis(x, max_degree = 1, smoothness_orders = rep(1, ncol(x)), num_knots = NULL) 20 | test_that("Argument smoothness_orders = 1 gives basis list with orders = 1", { 21 | expect_equal(all(unlist(lapply(basis_list, function(basis) { 22 | all(basis$orders == 1) 23 | }))), TRUE) 24 | }) 25 | basis_list <- enumerate_basis(x, max_degree = 1, smoothness_orders = rep(2, ncol(x)), num_knots = 25, include_lower_order = T, include_zero_order = T) 26 | 27 | number_0 <- sum(sapply(basis_list, function(basis) { 28 | all(basis$orders == 0) 29 | })) 30 | number_1 <- sum(sapply(basis_list, function(basis) { 31 | all(basis$orders == 1) 32 | })) 33 | number_2 <- sum(sapply(basis_list, function(basis) { 34 | all(basis$orders == 2) 35 | })) 36 | 37 | test_that("Arguments include_zero_order and include_lower_order work", { 38 | expect_equal(number_0, 25 * p) 39 | expect_equal(number_1, 25 * p) 40 | expect_equal(number_2, 25 * p) 41 | }) 42 | 43 | 44 | basis_list <- enumerate_edge_basis(x, max_degree = 3, smoothness_orders = rep(1, ncol(x))) 45 | length(basis_list) 46 | 47 | 48 | test_that("enumerate_edge_basis generates correct number of edge basis functions", { 49 | expect_equal(length(basis_list), 7) 50 | }) 51 | -------------------------------------------------------------------------------- /tests/testthat/test-reduce_basis_filter.R: -------------------------------------------------------------------------------- 1 | context("Unit test for elementary basis function reduction procedure.") 2 | #### NOTE: The default hal parameters changed so this test fails. 3 | 4 | set.seed(45791) 5 | library(origami) 6 | 7 | # generate simple test data 8 | n <- 100 9 | p <- 5 10 | x <- xmat <- matrix(rnorm(n * p), n, p) 11 | y <- sin(x[, 1]) + sin(x[, 2]) + rnorm(n, mean = 0, sd = 0.2) 12 | 13 | system.time({ 14 | new_i <- 1 15 | offset <- rep(mean(y), n) 16 | current_i <- NULL 17 | good_i <- NULL 18 | old_mse <- Inf 19 | mse <- var(y) 20 | folds <- make_folds(n, V = 5) 21 | foldid <- folds2foldvec(folds) 22 | old_basis <- NULL 23 | mses <- NULL 24 | continue <- TRUE 25 | while (continue) { 26 | current_i <- c(current_i, new_i) 27 | # 28 | # b1 = enumerate_basis(x[new_i,,drop=FALSE],1:3) 29 | # x_basis <- make_design_matrix(x,c(old_basis,b1)) 30 | # screen_glmnet <- cv.glmnet(x = x_basis, y = y, family = "gaussian", 31 | # intercept = TRUE, maxit=1, thresh=1, 32 | # foldid=foldid, nlambda=10, keep=TRUE) 33 | # lambda_min_index <- which.min(screen_glmnet$cvm) 34 | # cvm_min <- min(screen_glmnet$cvm) 35 | # preds <- screen_glmnet$fit.preval[,lambda_min_index] 36 | 37 | b1 <- enumerate_basis(x[new_i, , drop = FALSE], 1:3) 38 | x_basis <- make_design_matrix(x, b1) 39 | screen_glmnet <- cv.glmnet( 40 | x = x_basis, y = y, family = "gaussian", offset = offset, 41 | intercept = FALSE, maxit = 10, thresh = 1e-1, foldid = foldid, 42 | nlambda = 10, 43 | keep = TRUE 44 | ) 45 | lambda_min_index <- which.min(screen_glmnet$cvm) 46 | cvm_min <- min(screen_glmnet$cvm) 47 | preds <- screen_glmnet$fit.preval[, lambda_min_index] + offset 48 | 49 | se <- (preds - y)^2 50 | mse <- mean(se) 51 | se[c(current_i, new_i)] <- 0 52 | new_i <- which.max(se) 53 | # print(sprintf("%f, %f", old_mse, mse)) 54 | continue <- mse < 1.1 * old_mse 55 | if (mse < old_mse) { 56 | good_i <- unique(c(good_i, new_i)) 57 | offset <- preds 58 | old_mse <- mse 59 | coefs <- as.vector(coef(screen_glmnet, s = "lambda.min"))[-1] 60 | # old_basis <- union(old_basis,c(old_basis,b1)[which(coefs!=0)]) 61 | # print(length(old_basis)) 62 | old_basis <- unique(c(old_basis, b1)) 63 | } 64 | 65 | mses <- c(mses, old_mse) 66 | recent_mses <- mses[(max(length(mses) - 10, 0) + 1):length(mses)] 67 | r <- lm.fit( 68 | cbind(rep(1, length(recent_mses)), 1:length(recent_mses)), 69 | recent_mses 70 | ) 71 | rate <- unlist(coef(r)[2] / coef(r)[1]) 72 | if (is.na(rate)) { 73 | rate <- -Inf 74 | } 75 | # print(rate) 76 | continue <- (-1 * rate) > 1e-4 77 | continue <- TRUE 78 | continue <- length(current_i) < n 79 | } 80 | }) 81 | 82 | folds <- make_folds(n, V = 5) 83 | foldid <- folds2foldvec(folds) 84 | 85 | x_basis <- make_design_matrix(x, old_basis) 86 | red_glmnet <- cv.glmnet(x_basis, y, keep = TRUE, foldid = foldid) 87 | lambda_min_index <- which.min(red_glmnet$cvm) 88 | preds <- red_glmnet$fit.preval[, lambda_min_index] 89 | mean((preds - y)^2) 90 | 91 | system.time({ 92 | # rand_n <- sample(n,length(good_i)) 93 | # full_basis <- enumerate_basis(x[rand_n,],1:3) 94 | full_basis <- enumerate_basis(x, 1:3) 95 | # rand_b <- sample(length(full_basis),length(old_basis)) 96 | x_basis <- make_design_matrix(x, full_basis) 97 | full_glmnet <- cv.glmnet(x_basis, y, keep = TRUE, foldid = foldid) 98 | lambda_min_index <- which.min(full_glmnet$cvm) 99 | preds <- full_glmnet$fit.preval[, lambda_min_index] 100 | mean((preds - y)^2) 101 | }) 102 | 103 | fit <- glmnet( 104 | x = x_basis, y = y, family = "gaussian", offset = offset, 105 | intercept = FALSE, lambda = 0.03 106 | ) 107 | b1 <- coef(fit) 108 | 109 | fit <- glmnet( 110 | x = x_basis, y = y, family = "gaussian", offset = offset, 111 | intercept = FALSE, maxit = 2, thresh = 1, lambda = 0.03 112 | ) 113 | b2 <- coef(fit) 114 | 115 | fit <- glmnet( 116 | x = x_basis, y = y, family = "gaussian", offset = offset, 117 | intercept = FALSE, maxit = 2, thresh = 1, lambda = 0.03 118 | ) 119 | b3 <- coef(fit) 120 | 121 | # hal9001 implementation without basis function reduction 122 | system.time({ 123 | hal_fit_full <- fit_hal( 124 | X = x, Y = y, 125 | return_lasso = TRUE, 126 | max_degree = 3, 127 | num_knots = length(y), 128 | smoothness_orders = 0, 129 | yolo = FALSE 130 | ) 131 | }) 132 | hff_preds <- predict(hal_fit_full, new_data = x) 133 | mean((y - hff_preds + mean(hff_preds))^2) 134 | hal_fit_full$times 135 | hal_pred_full <- predict(hal_fit_full, new_data = x) 136 | mse_hal_full <- mean((y - hal_pred_full)^2) 137 | 138 | # hal9001 implementation with basis function reduction 139 | hal_fit_reduced <- fit_hal( 140 | X = x, Y = y, 141 | return_lasso = TRUE, 142 | reduce_basis = 1 / sqrt(n), 143 | max_degree = 3, 144 | num_knots = length(y), 145 | smoothness_orders = 0, 146 | yolo = FALSE 147 | ) 148 | 149 | hal_fit_reduced$times 150 | hal_pred_reduced <- predict(hal_fit_reduced, new_data = x) 151 | mse_hal_reduced <- mean((y - hal_pred_reduced)^2) 152 | 153 | # TEST: reduced HAL object contains fewer lasso coefficients than full object 154 | test_that("Basis reduction passes fewer beta estimates to the lasso model", { 155 | coef_hal_reduced <- dim(coef(hal_fit_reduced$lasso_fit))[1] 156 | coef_hal_full <- dim(coef(hal_fit_reduced$lasso_fit))[1] 157 | expect_lte(coef_hal_reduced, coef_hal_full) 158 | }) 159 | 160 | test_that("Predictions are not too different when reducing basis functions", { 161 | expect_lt(mean((hal_pred_full - hal_pred_reduced)^2), 0.02) 162 | }) 163 | 164 | # ensure hal fit with reduce_basis works with new data for prediction 165 | newx <- matrix(rnorm(n * p), n, p) 166 | hal_pred_reduced_newx <- predict(hal_fit_reduced, new_data = newx) 167 | -------------------------------------------------------------------------------- /tests/testthat/test-screen_experimental.R: -------------------------------------------------------------------------------- 1 | # 06 April 2020 - test is failing, corresponding code needs review/re-haul 2 | if (FALSE) { 3 | context("Unit test for HAL screening procedure") 4 | library(glmnet) 5 | set.seed(749125) 6 | 7 | n <- 100 8 | p <- 5 9 | x <- xmat <- matrix(rnorm(n * p), n, p) 10 | y <- 10 * x[, 1] + 5 * x[, 2] + 6 * x[, 1] * x[, 2] + 11 | rnorm(n, mean = 0, sd = 0.2) 12 | 13 | testn <- 10000 14 | testx <- xmat <- matrix(rnorm(testn * p), testn, p) 15 | testy <- 10 * testx[, 1] + 5 * testx[, 2] + 6 * testx[, 1] * testx[, 2] + 16 | rnorm(n, mean = 0, sd = 0.2) 17 | 18 | select_list <- 2 19 | select_rank1 <- hal_screen_rank(x, y, k = 1, family = "gaussian") 20 | test_that("Rank function works properly with k(k!=NULL)", { 21 | expect_equal(select_list, select_rank1) # k=length(select_list), equal 22 | }) 23 | 24 | select_list <- c(2, 3) 25 | select_rank2 <- hal_screen_rank(x, y, family = "gaussian") 26 | 27 | test_that("Rank function works properly without k", { 28 | expect_equal(select_list, select_rank2) # k=NULL, equal 29 | }) 30 | 31 | # x_interaction_basis <- cbind(x, x[,1]*x[,2], x[,1]*x[,3], x[,2]*x[,3])# generate main terms and 2-way interaction 32 | # x_basis_lists <- list(1, 2, 3, c(1,2), c(1,3), c(2,3))#generate the column lists 33 | x_basis_lists <- list(1, 2, c(1, 2)) 34 | goodbasis <- hal_screen_goodbasis(x, y, 35 | actual_max_degree = 2, k = NULL, 36 | family = "gaussian" 37 | ) 38 | 39 | 40 | test_that("Goodbasis function works properly with interaction", { 41 | x_basis_str <- lapply(x_basis_lists, paste, collapse = ",") 42 | goodbasis_str <- lapply(goodbasis, paste, collapse = ",") 43 | # when k=6, they must be equal, all columns would be selected 44 | expect_setequal(x_basis_str, goodbasis_str) 45 | }) 46 | # 47 | # x_basis<-matrix(nrow = n, ncol = 1) 48 | # 49 | # basis_list <- c() 50 | # for (i in seq_along(x_basis_lists)) { 51 | # col_list <- x_basis_lists[[i]] 52 | # basis_list <- c(basis_list,basis_list_cols(col_list, x)) 53 | # 54 | # } 55 | # 56 | # x_basis <- make_design_matrix(x, basis_list)#generate k*n basis functions 57 | # 58 | # test_x_basis <- make_design_matrix(testx, basis_list) 59 | 60 | hal_with_screening <- fit_hal(x, y, screen_basis = TRUE) 61 | hal_without_screening <- fit_hal(x, y, screen_basis = FALSE) 62 | 63 | preds <- predict(hal_with_screening, new_data = testx) 64 | mse_w_screening <- mean((preds - testy)^2) 65 | preds <- predict(hal_without_screening, new_data = testx) 66 | mse_wo_screening <- mean((preds - testy)^2) 67 | 68 | hal_with_screening$times 69 | hal_without_screening$times 70 | 71 | 72 | test_that("screening makes things faster", { 73 | with_time <- hal_with_screening$times["total", "elapsed"] 74 | wo_time <- hal_without_screening$times["total", "elapsed"] 75 | expect_lt(with_time, wo_time) 76 | }) 77 | 78 | test_that("screening doesn't hurt mse too much", { 79 | expect_lt(mse_w_screening, mse_wo_screening * 1.2) 80 | }) 81 | } 82 | -------------------------------------------------------------------------------- /tests/testthat/test-single_lambda.R: -------------------------------------------------------------------------------- 1 | context("Feeding single lambda into hal9001 (glmnet version) will not error.") 2 | set.seed(1234) 3 | n <- 100 4 | x <- rnorm(n) 5 | y <- as.numeric(plogis(2 * x + rnorm(n)) > 0.5) 6 | wts <- rep(1, n) 7 | 8 | # fit via call to glmnet::glmnet for a single value of lambda 9 | hal_fit <- fit_hal( 10 | X = x, 11 | Y = y, 12 | fit_control = list(weights = wts, use_min = TRUE, cv_select = FALSE), 13 | yolo = FALSE, 14 | family = "binomial", 15 | lambda = 2e-2, 16 | return_lasso = TRUE 17 | ) 18 | 19 | test_that("Output object is `glmnet`.", { 20 | expect_true("glmnet" %in% class(hal_fit$lasso_fit)) 21 | }) 22 | 23 | test_that("Output object is not `cv.glmnet`.", { 24 | expect_false("cv.glmnet" %in% class(hal_fit$lasso_fit)) 25 | }) 26 | -------------------------------------------------------------------------------- /tests/testthat/test-sl_ecpolley.R: -------------------------------------------------------------------------------- 1 | context("Fits and prediction of SuperLearner package.") 2 | library(SuperLearner) 3 | 4 | # easily compute MSE 5 | mse <- function(preds, y) { 6 | mean((preds - y)^2) 7 | } 8 | 9 | # simulation constants 10 | set.seed(479512) 11 | p <- 3 # dimensionality 12 | n <- 100 # observations 13 | 14 | # simulate data 15 | x <- as.data.frame(replicate(p, rnorm(n))) 16 | y <- sin(1 / x[, 2]) + rnorm(n, mean = 0, sd = 0.2) 17 | test_x <- as.data.frame(replicate(p, rnorm(n))) 18 | test_y <- sin(1 / test_x[, 2]) + cos(test_x[, 3]) + 19 | rnorm(n, mean = 0, sd = 0.2) 20 | 21 | # run HAL by itself 22 | hal <- fit_hal(X = x, Y = y, yolo = FALSE) 23 | pred_hal_train <- predict(hal, new_data = x) 24 | pred_hal_test <- predict(hal, new_data = test_x) 25 | 26 | # run SL-classic with glmnet and get predictions 27 | hal_sl <- SuperLearner(Y = y, X = x, SL.lib = "SL.hal9001") 28 | sl_hal_fit <- SL.hal9001( 29 | Y = y, X = x, newX = NULL, 30 | family = stats::gaussian(), 31 | obsWeights = rep(1, length(y)), 32 | id = seq_along(y) 33 | ) 34 | # hal9001:::predict.SL.hal9001(sl_hal_fit$fit,newX=x,newdata=x) 35 | pred_hal_sl_train <- as.numeric(predict(hal_sl, newX = x)$pred) 36 | pred_hal_sl_test <- as.numeric(predict(hal_sl, newX = test_x)$pred) 37 | 38 | # run an SL with HAL and some parametric learners 39 | sl <- SuperLearner( 40 | Y = y, X = x, SL.lib = c("SL.mean", "SL.hal9001"), 41 | cvControl = list(validRows = hal_sl$validRows) 42 | ) 43 | 44 | # test for HAL vs. SL-HAL: outputs are the same length 45 | test_that("HAL and SuperLearner-HAL produce results of same shape", { 46 | expect_equal(length(pred_hal_train), length(pred_hal_sl_train)) 47 | expect_equal(length(pred_hal_test), length(pred_hal_sl_test)) 48 | }) 49 | 50 | # test of MSEs being close: SL-HAL and SL dominated by HAL should be very close 51 | # (hence the rather low tolerance, esp. given an additive scale) 52 | test_that("HAL dominates other algorithms when used in SuperLearner", { 53 | pred_sl_test <- as.numeric(predict(sl, newX = test_x)$pred) 54 | expect_equal( 55 | mse(pred_sl_test, test_y), 56 | expected = mse(pred_hal_sl_test, test_y), 57 | scale = mse(pred_hal_sl_test, test_y), tolerance = 0.05 58 | ) 59 | }) 60 | 61 | # test of SL-HAL risk: HAL has lowest CV-risk in the learner library 62 | test_that("HAL has the lowest CV-risk amongst algorithms in Super Learner", { 63 | expect_equivalent(names(which.min(sl$cvRisk)), "SL.hal9001_All") 64 | }) 65 | -------------------------------------------------------------------------------- /tests/testthat/test-stat_performance.R: -------------------------------------------------------------------------------- 1 | context("Verify Statistical Performance") 2 | library(glmnet) 3 | 4 | # generate training and test data 5 | # adapted from https://github.com/tlverse/hal9001/issues/9 6 | g0_linear <- function(W1, W2, W3, W4) { 7 | plogis(0.5 * (-0.8 * W1 + 0.39 * W2 + 0.08 * W3 - 0.12 * W4 - 0.15)) 8 | } 9 | 10 | Q0_trig1 <- function(A, W1, W2, W3, W4) { 11 | plogis(0.14 * (2 * A + 12 | 2 * A * W1 + 13 | 20 * cos(W1) * A - 14 | 3 * W1 * sin(2 * W2) + 15 | cos(W1) - 16 | 3 * W2 + 17 | 4 * A * (W2^2) + 18 | 3 * cos(W4) * A + 19 | A * W1^2 - 20 | 2 * sin(W2) * W4 - 21 | 6 * A * W3 * W4 - 22 | 3)) 23 | } 24 | 25 | gendata <- function(n, g0, Q0) { 26 | W1 <- runif(n, -3, 3) 27 | W2 <- rnorm(n) 28 | W3 <- runif(n) 29 | W4 <- rnorm(n) 30 | A <- rbinom(n, 1, g0(W1, W2, W3, W4)) 31 | Y <- rbinom(n, 1, Q0(A, W1, W2, W3, W4)) 32 | data.frame(A, W1, W2, W3, W4, Y) 33 | } 34 | 35 | set.seed(1234) 36 | data <- gendata(100, g0 = g0_linear, Q0 = Q0_trig1) 37 | covars <- setdiff(names(data), "Y") 38 | X <- data[, covars, drop = FALSE] 39 | Y <- data$Y 40 | testdata <- gendata(100, g0 = g0_linear, Q0 = Q0_trig1) 41 | testY <- Y # testdata$Y 42 | testX <- X # testdata[, covars, drop = F] 43 | 44 | 45 | ######################################### 46 | # hal classic fit and prediction 47 | 48 | if ("hal" %in% installed.packages()) { 49 | # NOTE: see https://github.com/benkeser/halplus 50 | library(hal) 51 | set.seed(1234) # attempt to control randomness in cv.glmnet fold generation 52 | halres <- hal(Y = Y, newX = testX, X = X, verbose = FALSE, parallel = FALSE) 53 | pred <- halres$pred 54 | 55 | # should be nonzero 56 | length(halres$dupInds) 57 | 58 | # how many basis functions did we generate? 59 | nbasis <- length(coef(halres$object)) 60 | coefs <- coef(halres$object, "lambda.min") 61 | 62 | # compute MSE 63 | mean((pred - testY)^2) 64 | } 65 | 66 | ######################################### 67 | # hal9001 with default arguments 68 | # fold_id <- sample(1:10,length(Y),replace=T) 69 | # attempt to control randomness in cv.glmnet fold generation 70 | X <- as.matrix(X) 71 | # test <- hal_screen_basis(X, Y,family="gaussian", verbose=TRUE, main_terms = FALSE) 72 | halres9001 <- fit_hal( 73 | Y = Y, X = X, 74 | yolo = FALSE 75 | # NOTE: hal_screen_goodbasis is broken 76 | # screen_basis = TRUE 77 | # screen_lambda = TRUE 78 | ) 79 | pred9001 <- predict(halres9001, new_data = testX) 80 | 81 | # compute MSE 82 | mean((pred9001 - testY)^2) 83 | 84 | default_coef <- halres9001$coef 85 | ######################################### 86 | # attempt to match hal classic 87 | # good reason to believe basis function code is working (see (test_basis.R)), 88 | # so let's use our basis code 89 | 90 | # training 91 | X <- as.matrix(X) 92 | basis_list <- hal9001:::enumerate_basis(X) 93 | x_basis <- hal9001:::make_design_matrix(X, basis_list) 94 | 95 | copy_map <- hal9001:::make_copy_map(x_basis) 96 | unique_columns <- as.numeric(names(copy_map)) 97 | x_basis <- x_basis[, unique_columns] 98 | nbasis9001 <- ncol(x_basis) 99 | 100 | set.seed(1234) 101 | # attempt to control randomness in cv.glmnet fold generation 102 | # try to match hal param 103 | hal_lasso <- glmnet::cv.glmnet( 104 | x = x_basis, y = Y, nlambda = 100, 105 | lambda.min.ratio = 0.001, nfolds = 10, 106 | family = "gaussian", alpha = 1 107 | ) 108 | 109 | # prediction 110 | new_data <- as.matrix(testX) 111 | pred_x_basis <- hal9001:::make_design_matrix(new_data, basis_list) 112 | pred_x_basis_uniq <- apply_copy_map(pred_x_basis, copy_map) 113 | 114 | # still doesn't quite match 115 | match_pred <- predict(hal_lasso, pred_x_basis_uniq, "lambda.min") 116 | mean((match_pred - testY)^2) 117 | # plot(pred9001, match_pred) 118 | -------------------------------------------------------------------------------- /tests/testthat/test-summary.R: -------------------------------------------------------------------------------- 1 | context("Summary method.") 2 | set.seed(45791) 3 | 4 | n <- 50 5 | p <- 3 6 | x <- matrix(rnorm(n * p), n, p) 7 | y <- sin(x[, 1]) + sin(x[, 2]) + rnorm(n, mean = 0, sd = 0.2) 8 | colnames(x) <- c("col1", "col2", "col3") 9 | 10 | hal_fit <- fit_hal(X = x, Y = y, fit_control = list(use_min = FALSE)) 11 | hal_fit_nolasso <- fit_hal(X = x, Y = y, yolo = FALSE, return_lasso = FALSE) 12 | hal_fit_nocv <- fit_hal( 13 | X = x, Y = y, yolo = FALSE, fit_control = list(cv_select = FALSE) 14 | ) 15 | hal_fit_nocv_nolasso <- fit_hal( 16 | X = x, Y = y, yolo = FALSE, fit_control = list(cv_select = FALSE), 17 | return_lasso = FALSE, return_x_basis = TRUE 18 | ) 19 | 20 | # Basic summary works 21 | summ <- summary(hal_fit) 22 | 23 | # Basic summary works when lambda is provided 24 | summ <- summary(hal_fit, lambda = hal_fit$lambda_star) 25 | summ <- summary(hal_fit, lambda = hal_fit$lasso_fit$lambda[7]) 26 | 27 | 28 | # Summary with all coefficients works 29 | summ <- summary(hal_fit, only_nonzero_coefs = FALSE) 30 | 31 | # Summary with nonzero coefs and include_redundant_terms TRUE works 32 | summary_all_nonzero_terms <- summary(hal_fit, 33 | include_redundant_terms = TRUE 34 | ) 35 | 36 | 37 | # Summary with all coefs and include_redundant_terms TRUE works 38 | summary_all_terms <- summary( 39 | hal_fit, 40 | only_nonzero_coefs = FALSE, include_redundant_terms = TRUE 41 | ) 42 | 43 | # Print works 44 | summary_short <- summary(hal_fit) 45 | summary_long <- summary(hal_fit, only_nonzero_coefs = FALSE) 46 | sink(tempfile()) 47 | print(summary_short) 48 | print(summary_long) 49 | print(summary_short, length = 10) 50 | print(summary_long, length = 10) 51 | sink() 52 | 53 | test_that("Errors work", { 54 | expect_error( 55 | summary(hal_fit, lambda = c(1, 2)), 56 | "Cannot summarize over multiple values of lambda." 57 | ) 58 | expect_error( 59 | summary(hal_fit, lambda = 1), 60 | "Coefficients for the specified lambda do not exist." 61 | ) 62 | expect_error( 63 | summary(hal_fit_nolasso, lambda = 1) 64 | ) 65 | }) 66 | 67 | test_that("Warnings work", { 68 | expect_warning( 69 | summary(hal_fit_nocv) 70 | ) 71 | expect_warning( 72 | summary(hal_fit_nocv_nolasso) 73 | ) 74 | }) 75 | -------------------------------------------------------------------------------- /tests/testthat/test-varselect_hal.R: -------------------------------------------------------------------------------- 1 | # context("HAL with screening for high-dimensional data") 2 | # set.seed(45791) 3 | 4 | # easily compute MSE 5 | # mse <- function(preds, y) { 6 | # mean((preds - y)^2) 7 | # } 8 | 9 | # generate simple test data 10 | # n <- 1000 11 | # p <- 100 12 | # x <- xmat <- matrix(rnorm(n * p), n, p) 13 | # y_prob <- plogis(3 * sin(x[, 1]) + 3 * sin(x[, 2])) 14 | # y <- rbinom(n = n, size = 1, prob = y_prob) 15 | 16 | # test_n <- 10000 17 | # test_x <- matrix(rnorm(test_n * p), test_n, p) 18 | # test_y_prob <- plogis(3 * sin(test_x[, 1]) + sin(test_x[, 2])) 19 | # test_y <- rbinom(n = test_n, size = 1, prob = y_prob) 20 | 21 | # col_lists <- as.list(1:p) 22 | # i <- 1 23 | 24 | # linear_glmnet <- suppressWarnings(glmnet( 25 | # x = cbind(y, y), y = y, family = "binomial", maxit = 1, 26 | # thresh = 0.01 27 | # )) 28 | # linear_glmnet$lambda 29 | 30 | # TODO: test screening functionality 31 | -------------------------------------------------------------------------------- /tests/testthat/test-x_unpenalized.R: -------------------------------------------------------------------------------- 1 | context("Train and predict with X_unpenalized argument will not error.") 2 | set.seed(1234) 3 | n <- 100 4 | x <- rnorm(n) 5 | a <- rnorm(n) 6 | y <- 2 * x + 5 * a + rnorm(n) 7 | 8 | hal_fit <- fit_hal( 9 | X = as.matrix(x), 10 | Y = y, 11 | X_unpenalized = as.matrix(a), 12 | fit_control = list(use_min = TRUE, cv_select = FALSE), 13 | yolo = FALSE, 14 | family = "gaussian", 15 | lambda = 2e-2, 16 | return_lasso = TRUE 17 | ) 18 | beta_hat <- hal_fit$coefs[, 1] 19 | test_that("Training: The coefficient is not penalized heavily.", { 20 | expect_true( 21 | all.equal(tail(beta_hat, 1), 5, tolerance = 0.1, check.attributes = FALSE) 22 | ) 23 | }) 24 | 25 | test_that("Training: input is not a matrix.", { 26 | expect_error(fit_hal( 27 | X = x, 28 | Y = y, 29 | X_unpenalized = a, 30 | fit_control = list(use_min = TRUE, cv_select = FALSE), 31 | yolo = FALSE, 32 | family = "gaussian", 33 | lambda = 2e-2, 34 | return_lasso = TRUE 35 | )) 36 | }) 37 | test_that("Training: Number of rows do not match.", { 38 | expect_error(fit_hal( 39 | X = x, 40 | Y = y, 41 | X_unpenalized = as.matrix(a[-1]), 42 | yolo = FALSE, 43 | family = "gaussian", 44 | lambda = 2e-2, 45 | fit_control = list(use_min = TRUE, cv_select = FALSE), 46 | return_lasso = TRUE 47 | )) 48 | }) 49 | 50 | yhat <- predict(hal_fit, new_data = x, new_X_unpenalized = as.matrix(a)) 51 | test_that("Predict: input not a matrix.", { 52 | expect_error(predict(hal_fit, new_data = x, new_X_unpenalized = a)) 53 | }) 54 | test_that("Predict: new_X_unpenalized not supplied when used in training.", { 55 | expect_error(predict(hal_fit, new_data = x, new_X_unpenalized = NULL)) 56 | }) 57 | test_that("Predict: Number of rows not match.", { 58 | expect_error( 59 | predict(hal_fit, new_data = x, new_X_unpenalized = as.matrix(a[-1])) 60 | ) 61 | }) 62 | test_that("Predict: Number of columns do not match those from training.", { 63 | expect_error(predict(hal_fit, new_data = x, new_X_unpenalized = cbind(a, a))) 64 | }) 65 | --------------------------------------------------------------------------------