├── .Rbuildignore ├── .github └── workflows │ ├── cpp-test.yml │ ├── pypi-wheels.yml │ ├── python-test.yml │ ├── r-cran-branch.yml │ ├── r-devel-check.yml │ └── r-test.yml ├── .gitignore ├── .gitmodules ├── CHANGELOG.md ├── CMakeLists.txt ├── CPP_DEPS_LICENSE.md ├── DESCRIPTION ├── Doxyfile ├── LICENSE ├── LICENSE.md ├── MANIFEST.in ├── NAMESPACE ├── NEWS.md ├── R ├── bart.R ├── bcf.R ├── calibration.R ├── config.R ├── cpp11.R ├── data.R ├── forest.R ├── generics.R ├── kernel.R ├── model.R ├── random_effects.R ├── serialization.R ├── stochtree-package.R ├── utils.R └── variance.R ├── README.md ├── R_README.md ├── _pkgdown.yml ├── cmake └── Sanitizer.cmake ├── cran-bootstrap.R ├── cran-cleanup.R ├── cran-comments.md ├── debug ├── README.md ├── api_debug.cpp └── data │ ├── heterosked_test.csv │ └── heterosked_train.csv ├── demo ├── data │ ├── python_r_debug_test.csv │ └── python_r_debug_train.csv ├── debug │ ├── causal_inference.py │ ├── causal_inference_binary_outcome.py │ ├── classification.py │ ├── kernel.py │ ├── multi_chain.py │ ├── multivariate_treatment_causal_inference.py │ ├── parallel_multi_chain.py │ ├── r_comparison_debug.py │ ├── random_effects.py │ ├── rfx_serialization.py │ ├── serialization.py │ ├── supervised_learning.py │ └── supervised_learning_binary_outcome.py └── notebooks │ ├── causal_inference.ipynb │ ├── causal_inference_feature_subsets.ipynb │ ├── heteroskedastic_supervised_learning.ipynb │ ├── multivariate_treatment_causal_inference.ipynb │ ├── prototype_interface.ipynb │ ├── serialization.ipynb │ ├── supervised_learning.ipynb │ ├── supervised_learning_classification.ipynb │ └── tree_inspection.ipynb ├── include ├── nlohmann │ └── json.hpp └── stochtree │ ├── category_tracker.h │ ├── common.h │ ├── container.h │ ├── cutpoint_candidates.h │ ├── data.h │ ├── ensemble.h │ ├── export.h │ ├── gamma_sampler.h │ ├── ig_sampler.h │ ├── io.h │ ├── leaf_model.h │ ├── log.h │ ├── mainpage.h │ ├── meta.h │ ├── normal_sampler.h │ ├── partition_tracker.h │ ├── prior.h │ ├── random.h │ ├── random_effects.h │ ├── tree.h │ ├── tree_sampler.h │ └── variance_model.h ├── inst └── COPYRIGHTS ├── man ├── CppJson.Rd ├── CppRNG.Rd ├── Forest.Rd ├── ForestDataset.Rd ├── ForestModel.Rd ├── ForestModelConfig.Rd ├── ForestSamples.Rd ├── GlobalModelConfig.Rd ├── Outcome.Rd ├── RandomEffectSamples.Rd ├── RandomEffectsDataset.Rd ├── RandomEffectsModel.Rd ├── RandomEffectsTracker.Rd ├── bart.Rd ├── bcf.Rd ├── calibrateInverseGammaErrorVariance.Rd ├── computeForestLeafIndices.Rd ├── computeForestLeafVariances.Rd ├── computeForestMaxLeafIndex.Rd ├── convertPreprocessorToJson.Rd ├── createBARTModelFromCombinedJson.Rd ├── createBARTModelFromCombinedJsonString.Rd ├── createBARTModelFromJson.Rd ├── createBARTModelFromJsonFile.Rd ├── createBARTModelFromJsonString.Rd ├── createBCFModelFromCombinedJson.Rd ├── createBCFModelFromCombinedJsonString.Rd ├── createBCFModelFromJson.Rd ├── createBCFModelFromJsonFile.Rd ├── createBCFModelFromJsonString.Rd ├── createCppJson.Rd ├── createCppJsonFile.Rd ├── createCppJsonString.Rd ├── createCppRNG.Rd ├── createForest.Rd ├── createForestDataset.Rd ├── createForestModel.Rd ├── createForestModelConfig.Rd ├── createForestSamples.Rd ├── createGlobalModelConfig.Rd ├── createOutcome.Rd ├── createPreprocessorFromJson.Rd ├── createPreprocessorFromJsonString.Rd ├── createRandomEffectSamples.Rd ├── createRandomEffectsDataset.Rd ├── createRandomEffectsModel.Rd ├── createRandomEffectsTracker.Rd ├── getRandomEffectSamples.Rd ├── getRandomEffectSamples.bartmodel.Rd ├── getRandomEffectSamples.bcfmodel.Rd ├── loadForestContainerCombinedJson.Rd ├── loadForestContainerCombinedJsonString.Rd ├── loadForestContainerJson.Rd ├── loadRandomEffectSamplesCombinedJson.Rd ├── loadRandomEffectSamplesCombinedJsonString.Rd ├── loadRandomEffectSamplesJson.Rd ├── loadScalarJson.Rd ├── loadVectorJson.Rd ├── predict.bartmodel.Rd ├── predict.bcfmodel.Rd ├── preprocessPredictionData.Rd ├── preprocessTrainData.Rd ├── resetActiveForest.Rd ├── resetForestModel.Rd ├── resetRandomEffectsModel.Rd ├── resetRandomEffectsTracker.Rd ├── rootResetRandomEffectsModel.Rd ├── rootResetRandomEffectsTracker.Rd ├── sampleGlobalErrorVarianceOneIteration.Rd ├── sampleLeafVarianceOneIteration.Rd ├── saveBARTModelToJson.Rd ├── saveBARTModelToJsonFile.Rd ├── saveBARTModelToJsonString.Rd ├── saveBCFModelToJson.Rd ├── saveBCFModelToJsonFile.Rd ├── saveBCFModelToJsonString.Rd ├── savePreprocessorToJsonString.Rd └── stochtree-package.Rd ├── pyproject.toml ├── python_docs ├── Makefile ├── README.md ├── make.bat ├── requirements.txt └── source │ ├── api.rst │ ├── causal.rst │ ├── conf.py │ ├── index.rst │ ├── install.rst │ └── supervised.rst ├── requirements.txt ├── setup.py ├── src ├── Makevars ├── R_data.cpp ├── R_random_effects.cpp ├── container.cpp ├── cpp11.cpp ├── cutpoint_candidates.cpp ├── data.cpp ├── forest.cpp ├── io.cpp ├── kernel.cpp ├── leaf_model.cpp ├── partition_tracker.cpp ├── py_stochtree.cpp ├── random_effects.cpp ├── sampler.cpp ├── serialization.cpp ├── stochtree_types.h └── tree.cpp ├── stochtree ├── __init__.py ├── bart.py ├── bcf.py ├── calibration.py ├── config.py ├── data.py ├── forest.py ├── kernel.py ├── preprocessing.py ├── random_effects.py ├── sampler.py ├── serialization.py └── utils.py ├── test ├── R │ ├── testthat.R │ └── testthat │ │ ├── test-bart.R │ │ ├── test-bcf.R │ │ ├── test-categorical.R │ │ ├── test-data-preprocessing.R │ │ ├── test-forest-container.R │ │ ├── test-forest.R │ │ ├── test-predict.R │ │ ├── test-residual.R │ │ └── test-serialization.R ├── README.md ├── cpp │ ├── test_category_tracker.cpp │ ├── test_cutpoints.cpp │ ├── test_data.cpp │ ├── test_forest.cpp │ ├── test_json.cpp │ ├── test_model.cpp │ ├── test_predict.cpp │ ├── test_random_effects.cpp │ ├── test_sorted_partition_tracker.cpp │ ├── test_tree.cpp │ ├── test_unsorted_partition_tracker.cpp │ ├── testutils.cpp │ └── testutils.h └── python │ ├── test_bart.py │ ├── test_bcf.py │ ├── test_calibration.py │ ├── test_config.py │ ├── test_forest.py │ ├── test_forest_container.py │ ├── test_json.py │ ├── test_kernel.py │ ├── test_predict.py │ ├── test_preprocessor.py │ ├── test_random_effects.py │ ├── test_residual.py │ └── test_utils.py ├── tools ├── data │ ├── python_r_debug_test.csv │ └── python_r_debug_train.csv ├── debug │ ├── additive_lm.R │ ├── bart_profile.R │ ├── bcf_json.R │ ├── bcf_rfx.R │ ├── continuous_treatment_bcf.R │ ├── debug.R │ ├── dgps.R │ ├── forest_reset_debug.R │ ├── heteroskedastic_bart.R │ ├── json_debug.R │ ├── multichain_seq.R │ ├── multivariate_bart_debug.R │ ├── parallel_warmstart.R │ ├── parallel_warmstart_bcf.R │ ├── python_comparison_debug.R │ ├── python_r_debug.R │ ├── r_kernel.R │ └── restricted_sweep.R ├── perf │ ├── bart_microbenchmark.R │ ├── bcf_microbenchmark.R │ ├── bcf_performance_metrics.R │ └── custom_loop_microbenchmark.R ├── setup │ └── setup_r_dependencies.R └── simulations │ ├── bart_comparison.R │ ├── bcf-sim-study.R │ └── bcf_comparison.R └── vignettes ├── BayesianSupervisedLearning.Rmd ├── CausalInference.Rmd ├── CustomSamplingRoutine.Rmd ├── EnsembleKernel.Rmd ├── Heteroskedasticity.Rmd ├── ModelSerialization.Rmd ├── MultiChain.Rmd ├── PriorCalibration.Rmd ├── TreeInspection.Rmd └── vignettes.bib /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^cran-comments\.md$ 4 | -------------------------------------------------------------------------------- /.github/workflows/pypi-wheels.yml: -------------------------------------------------------------------------------- 1 | name: Build Python Wheels for PyPI 2 | # Note: this file is based in part on the example workflow in the cibuildwheel docs 3 | # https://cibuildwheel.pypa.io/en/stable/setup/#github-actions 4 | # and in part on matplotlib's wheel build workflow: 5 | # https://github.com/matplotlib/matplotlib/blob/main/.github/workflows/cibuildwheel.yml 6 | 7 | on: 8 | push: 9 | branches: [main] 10 | pull_request: 11 | branches: [main] 12 | workflow_dispatch: 13 | 14 | jobs: 15 | build_wheels: 16 | name: Build wheels on ${{ matrix.os }} 17 | runs-on: ${{ matrix.os }} 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | include: 22 | - os: ubuntu-latest 23 | cibw_archs: "x86_64" 24 | - os: ubuntu-24.04-arm 25 | cibw_archs: "aarch64" 26 | - os: windows-latest 27 | cibw_archs: "auto64" 28 | - os: macos-13 29 | cibw_archs: "x86_64" 30 | - os: macos-14 31 | cibw_archs: "arm64" 32 | 33 | steps: 34 | - uses: actions/checkout@v4 35 | with: 36 | submodules: 'recursive' 37 | 38 | - name: Build wheels 39 | uses: pypa/cibuildwheel@v2.23.2 40 | env: 41 | CIBW_SKIP: "pp* *-musllinux_* *-win32" 42 | CIBW_ARCHS: ${{ matrix.cibw_archs }} 43 | MACOSX_DEPLOYMENT_TARGET: "10.13" 44 | 45 | - uses: actions/upload-artifact@v4 46 | with: 47 | name: cibw-wheels-${{ matrix.os }}-${{ matrix.cibw_archs }} 48 | path: ./wheelhouse/*.whl 49 | -------------------------------------------------------------------------------- /.github/workflows/python-test.yml: -------------------------------------------------------------------------------- 1 | name: Python Package Unit Tests 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | jobs: 13 | testing: 14 | name: test-python 15 | runs-on: ${{ matrix.os }} 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | os: [ubuntu-latest, windows-latest, macos-latest] 21 | 22 | steps: 23 | - name: Checkout repository 24 | uses: actions/checkout@v4 25 | with: 26 | submodules: 'recursive' 27 | 28 | - name: Setup Python 3.10 29 | uses: actions/setup-python@v5 30 | with: 31 | python-version: "3.10" 32 | cache: "pip" 33 | 34 | - name: Install Package with Relevant Dependencies 35 | run: | 36 | pip install --upgrade pip 37 | pip install -r requirements.txt 38 | pip install . 39 | 40 | - name: Run Pytest 41 | run: | 42 | pytest test/python 43 | -------------------------------------------------------------------------------- /.github/workflows/r-cran-branch.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: [main] 4 | pull_request: 5 | branches: [main] 6 | release: 7 | types: [published] 8 | workflow_dispatch: 9 | 10 | name: Update R Package Dev Branch 11 | 12 | jobs: 13 | testing: 14 | name: r-cran-branch 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | permissions: 19 | contents: write 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | with: 24 | submodules: 'recursive' 25 | 26 | - uses: r-lib/actions/setup-pandoc@v2 27 | 28 | - uses: r-lib/actions/setup-r@v2 29 | with: 30 | use-public-rspm: true 31 | 32 | - uses: r-lib/actions/setup-r-dependencies@v2 33 | with: 34 | extra-packages: any::testthat, any::decor 35 | 36 | - name: Create CRAN-formatted source package in stochtree_cran subfolder 37 | run: | 38 | Rscript cran-bootstrap.R 39 | 40 | - name: Deploy to CRAN dev branch 41 | if: github.event_name != 'pull_request' 42 | uses: JamesIves/github-pages-deploy-action@v4.5.0 43 | with: 44 | clean: false 45 | branch: r-dev 46 | folder: stochtree_cran -------------------------------------------------------------------------------- /.github/workflows/r-devel-check.yml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | workflow_dispatch: 5 | 6 | name: R Devel CRAN Checks and Unit Tests 7 | 8 | jobs: 9 | testing: 10 | name: test-r 11 | runs-on: ${{ matrix.os }} 12 | 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | os: [ubuntu-latest, windows-latest, macos-latest] 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | with: 21 | submodules: 'recursive' 22 | 23 | - uses: r-lib/actions/setup-pandoc@v2 24 | 25 | - uses: r-lib/actions/setup-r@v2 26 | with: 27 | r-version: 'devel' 28 | use-public-rspm: true 29 | 30 | - uses: r-lib/actions/setup-r-dependencies@v2 31 | with: 32 | extra-packages: any::testthat, any::decor, any::rcmdcheck 33 | needs: check 34 | 35 | - name: Create a CRAN-ready version of the R package 36 | run: | 37 | Rscript cran-bootstrap.R 0 0 1 38 | 39 | - uses: r-lib/actions/check-r-package@v2 40 | with: 41 | working-directory: 'stochtree_cran' 42 | -------------------------------------------------------------------------------- /.github/workflows/r-test.yml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: R Package Unit Tests 13 | 14 | jobs: 15 | testing: 16 | name: test-r 17 | runs-on: ${{ matrix.os }} 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | os: [ubuntu-latest, windows-latest, macos-latest] 23 | 24 | steps: 25 | - uses: actions/checkout@v4 26 | with: 27 | submodules: 'recursive' 28 | 29 | - uses: r-lib/actions/setup-pandoc@v2 30 | 31 | - uses: r-lib/actions/setup-r@v2 32 | with: 33 | use-public-rspm: true 34 | 35 | - uses: r-lib/actions/setup-r-dependencies@v2 36 | with: 37 | extra-packages: any::testthat, any::decor, any::rcmdcheck 38 | needs: check 39 | 40 | - name: Create a CRAN-ready version of the R package 41 | run: | 42 | Rscript cran-bootstrap.R 0 0 1 43 | 44 | - uses: r-lib/actions/check-r-package@v2 45 | with: 46 | working-directory: 'stochtree_cran' 47 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "deps/fast_double_parser"] 2 | path = deps/fast_double_parser 3 | url = https://github.com/lemire/fast_double_parser/ 4 | branch = master 5 | [submodule "deps/fmt"] 6 | path = deps/fmt 7 | url = https://github.com/fmtlib/fmt/ 8 | branch = master 9 | [submodule "deps/boost_math"] 10 | path = deps/boost_math 11 | url = https://github.com/boostorg/math 12 | branch = master 13 | [submodule "deps/eigen"] 14 | path = deps/eigen 15 | url = https://gitlab.com/libeigen/eigen 16 | branch = 3.4 17 | [submodule "deps/pybind11"] 18 | path = deps/pybind11 19 | url = https://github.com/pybind/pybind11 20 | branch = v2.12 21 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | # stochtree 0.1.2 4 | 5 | ## New Features 6 | 7 | * Support for binary outcomes in BART and BCF with a probit link ([#164](https://github.com/StochasticTree/stochtree/pull/164)) 8 | 9 | ## Bug Fixes 10 | 11 | * Fixed indexing bug in cleanup of grow-from-root (GFR) samples in BART and BCF models 12 | * Avoid using covariate preprocessor in `computeForestLeafIndices` R function when a `ForestSamples` object is provided (instead of a `bartmodel` or `bcfmodel` object) 13 | 14 | # stochtree 0.1.1 15 | 16 | ## Bug Fixes 17 | 18 | * Fixed initialization bug in several R package code examples for random effects models 19 | 20 | # stochtree 0.1.0 21 | 22 | Initial "alpha" release 23 | 24 | ## New Features 25 | 26 | * Support for sampling stochastic tree ensembles using two algorithms: MCMC and Grow-From-Root (GFR) 27 | * High-level model types supported: 28 | * Supervised learning with constant leaves or user-specified leaf regression models 29 | * Causal effect estimation with binary or continuous treatments 30 | * Additional high-level modeling features: 31 | * Forest-based variance function estimation (heteroskedasticity) 32 | * Additive (univariate or multivariate) group random effects 33 | * Multi-chain sampling and support for parallelism 34 | * "Warm-start" initialization of MCMC forest samplers via the Grow-From-Root (GFR) algorithm 35 | * Automated preprocessing / handling of categorical variables 36 | * Low-level interface: 37 | * Ability to combine a forest sampler with other (additive) model terms, without using C++ 38 | * Combine and sample an arbitrary number of forests or random effects terms 39 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: stochtree 2 | Title: Stochastic Tree Ensembles (XBART and BART) for Supervised Learning and Causal Inference 3 | Version: 0.1.1 4 | Authors@R: 5 | c( 6 | person("Drew", "Herren", email = "drewherrenopensource@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4109-6611")), 7 | person("Richard", "Hahn", role = "aut"), 8 | person("Jared", "Murray", role = "aut"), 9 | person("Carlos", "Carvalho", role = "aut"), 10 | person("Jingyu", "He", role = "aut"), 11 | person("Pedro", "Lima", role = "ctb"), 12 | person("stochtree", "contributors", role = c("cph")), 13 | person("Eigen", "contributors", role = c("cph"), comment = "C++ source uses the Eigen library for matrix operations, see inst/COPYRIGHTS"), 14 | person("xgboost", "contributors", role = c("cph"), comment = "C++ tree code and related operations include or are inspired by code from the xgboost library, see inst/COPYRIGHTS"), 15 | person("treelite", "contributors", role = c("cph"), comment = "C++ tree code and related operations include or are inspired by code from the treelite library, see inst/COPYRIGHTS"), 16 | person("Microsoft", "Corporation", role = c("cph"), comment = "C++ I/O and various project structure code include or are inspired by code from the LightGBM library, which is a copyright of Microsoft, see inst/COPYRIGHTS"), 17 | person("Niels", "Lohmann", role = c("cph"), comment = "C++ source uses the JSON for Modern C++ library for JSON operations, see inst/COPYRIGHTS"), 18 | person("Daniel", "Lemire", role = c("cph"), comment = "C++ source uses the fast_double_parser library internally, see inst/COPYRIGHTS"), 19 | person("Victor", "Zverovich", role = c("cph"), comment = "C++ source uses the fmt library internally, see inst/COPYRIGHTS") 20 | ) 21 | Copyright: Copyright details for stochtree's C++ dependencies, which are vendored along with the core stochtree source code, are detailed in inst/COPYRIGHTS 22 | Description: Flexible stochastic tree ensemble software. 23 | Robust implementations of Bayesian Additive Regression Trees (BART) 24 | Chipman, George, McCulloch (2010) 25 | for supervised learning and Bayesian Causal Forests (BCF) 26 | Hahn, Murray, Carvalho (2020) 27 | for causal inference. Enables model serialization and parallel sampling 28 | and provides a low-level interface for custom stochastic forest samplers. 29 | License: MIT + file LICENSE 30 | Encoding: UTF-8 31 | Roxygen: list(markdown = TRUE) 32 | RoxygenNote: 7.3.2 33 | LinkingTo: 34 | cpp11, BH 35 | Suggests: 36 | testthat (>= 3.0.0), 37 | doParallel, 38 | foreach, 39 | ggplot2, 40 | knitr, 41 | latex2exp, 42 | Matrix, 43 | MASS, 44 | mvtnorm, 45 | rmarkdown, 46 | tgp 47 | VignetteBuilder: knitr 48 | SystemRequirements: C++17 49 | Imports: 50 | R6, 51 | stats 52 | URL: https://stochtree.ai/, https://github.com/StochasticTree/stochtree 53 | BugReports: https://github.com/StochasticTree/stochtree/issues 54 | Config/testthat/edition: 3 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2025 2 | COPYRIGHT HOLDER: stochtree contributors -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2023-2025 stochtree authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # Include cmake build instructions 2 | include CMakeLists.txt 3 | 4 | # Vendor package header files 5 | recursive-include include *.h 6 | recursive-include include *.hpp 7 | 8 | # Vendor package source files (excluding R-specific) 9 | recursive-include src *.cpp 10 | recursive-include src *.h 11 | exclude src/R_data.cpp src/R_random_effects.cpp 12 | 13 | # Remove the CRAN extensionless LICENSE file 14 | exclude LICENSE 15 | 16 | # Remove egg info 17 | prune stochtree.egg-info 18 | 19 | # Vendor pybind11 dependencies in PyPI source distribution 20 | recursive-include deps/pybind11/include/pybind11 *.h 21 | recursive-include deps/pybind11/tools * 22 | include deps/pybind11/CMakeLists.txt 23 | include deps/pybind11/pyproject.toml 24 | include deps/pybind11/setup.cfg 25 | include deps/pybind11/setup.py 26 | 27 | # Vendor fmt header files in PyPI source distribution 28 | recursive-include deps/fmt/include/fmt *.h 29 | 30 | # Vendor fast_double_parser header file in PyPI source distribution 31 | include deps/fast_double_parser/include/fast_double_parser.h 32 | 33 | # Vendor Eigen header files in PyPI source distribution 34 | include deps/eigen/Eigen/Cholesky 35 | include deps/eigen/Eigen/Core 36 | include deps/eigen/Eigen/Dense 37 | include deps/eigen/Eigen/Eigenvalues 38 | include deps/eigen/Eigen/Geometry 39 | include deps/eigen/Eigen/Householder 40 | include deps/eigen/Eigen/IterativeLinearSolvers 41 | include deps/eigen/Eigen/Jacobi 42 | include deps/eigen/Eigen/LU 43 | include deps/eigen/Eigen/OrderingMethods 44 | include deps/eigen/Eigen/QR 45 | include deps/eigen/Eigen/SVD 46 | include deps/eigen/Eigen/Sparse 47 | include deps/eigen/Eigen/SparseCholesky 48 | include deps/eigen/Eigen/SparseCore 49 | include deps/eigen/Eigen/SparseQR 50 | include deps/eigen/Eigen/misc 51 | include deps/eigen/Eigen/plugins 52 | recursive-include deps/eigen/Eigen/src/Cholesky *.h 53 | recursive-include deps/eigen/Eigen/src/Core *.h 54 | recursive-include deps/eigen/Eigen/src/Dense *.h 55 | recursive-include deps/eigen/Eigen/src/Eigenvalues *.h 56 | recursive-include deps/eigen/Eigen/src/Geometry *.h 57 | recursive-include deps/eigen/Eigen/src/Householder *.h 58 | recursive-include deps/eigen/Eigen/src/IterativeLinearSolvers *.h 59 | recursive-include deps/eigen/Eigen/src/Jacobi *.h 60 | recursive-include deps/eigen/Eigen/src/LU *.h 61 | recursive-include deps/eigen/Eigen/src/OrderingMethods *.h 62 | recursive-include deps/eigen/Eigen/src/QR *.h 63 | recursive-include deps/eigen/Eigen/src/SVD *.h 64 | recursive-include deps/eigen/Eigen/src/Sparse *.h 65 | recursive-include deps/eigen/Eigen/src/SparseCholesky *.h 66 | recursive-include deps/eigen/Eigen/src/SparseCore *.h 67 | recursive-include deps/eigen/Eigen/src/SparseQR *.h 68 | recursive-include deps/eigen/Eigen/src/misc *.h 69 | recursive-include deps/eigen/Eigen/src/plugins *.h -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(getRandomEffectSamples,bartmodel) 4 | S3method(getRandomEffectSamples,bcfmodel) 5 | S3method(predict,bartmodel) 6 | S3method(predict,bcfmodel) 7 | export(bart) 8 | export(bcf) 9 | export(calibrateInverseGammaErrorVariance) 10 | export(computeForestLeafIndices) 11 | export(computeForestLeafVariances) 12 | export(computeForestMaxLeafIndex) 13 | export(convertPreprocessorToJson) 14 | export(createBARTModelFromCombinedJson) 15 | export(createBARTModelFromCombinedJsonString) 16 | export(createBARTModelFromJson) 17 | export(createBARTModelFromJsonFile) 18 | export(createBARTModelFromJsonString) 19 | export(createBCFModelFromCombinedJson) 20 | export(createBCFModelFromCombinedJsonString) 21 | export(createBCFModelFromJson) 22 | export(createBCFModelFromJsonFile) 23 | export(createBCFModelFromJsonString) 24 | export(createCppJson) 25 | export(createCppJsonFile) 26 | export(createCppJsonString) 27 | export(createCppRNG) 28 | export(createForest) 29 | export(createForestDataset) 30 | export(createForestModel) 31 | export(createForestModelConfig) 32 | export(createForestSamples) 33 | export(createGlobalModelConfig) 34 | export(createOutcome) 35 | export(createPreprocessorFromJson) 36 | export(createPreprocessorFromJsonString) 37 | export(createRandomEffectSamples) 38 | export(createRandomEffectsDataset) 39 | export(createRandomEffectsModel) 40 | export(createRandomEffectsTracker) 41 | export(getRandomEffectSamples) 42 | export(loadForestContainerCombinedJson) 43 | export(loadForestContainerCombinedJsonString) 44 | export(loadForestContainerJson) 45 | export(loadRandomEffectSamplesCombinedJson) 46 | export(loadRandomEffectSamplesCombinedJsonString) 47 | export(loadRandomEffectSamplesJson) 48 | export(loadScalarJson) 49 | export(loadVectorJson) 50 | export(preprocessPredictionData) 51 | export(preprocessTrainData) 52 | export(resetActiveForest) 53 | export(resetForestModel) 54 | export(resetRandomEffectsModel) 55 | export(resetRandomEffectsTracker) 56 | export(rootResetRandomEffectsModel) 57 | export(rootResetRandomEffectsTracker) 58 | export(sampleGlobalErrorVarianceOneIteration) 59 | export(sampleLeafVarianceOneIteration) 60 | export(saveBARTModelToJson) 61 | export(saveBARTModelToJsonFile) 62 | export(saveBARTModelToJsonString) 63 | export(saveBCFModelToJson) 64 | export(saveBCFModelToJsonFile) 65 | export(saveBCFModelToJsonString) 66 | export(savePreprocessorToJsonString) 67 | importFrom(R6,R6Class) 68 | importFrom(stats,coef) 69 | importFrom(stats,dnorm) 70 | importFrom(stats,lm) 71 | importFrom(stats,model.matrix) 72 | importFrom(stats,pnorm) 73 | importFrom(stats,predict) 74 | importFrom(stats,qgamma) 75 | importFrom(stats,qnorm) 76 | importFrom(stats,resid) 77 | importFrom(stats,rnorm) 78 | importFrom(stats,runif) 79 | importFrom(stats,sd) 80 | importFrom(stats,sigma) 81 | importFrom(stats,var) 82 | useDynLib(stochtree, .registration = TRUE) 83 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # stochtree 0.1.2 2 | 3 | ## New Features 4 | 5 | * Support for binary outcomes in BART and BCF with a probit link ([#164](https://github.com/StochasticTree/stochtree/pull/164)) 6 | 7 | ## Bug Fixes 8 | 9 | * Fixed indexing bug in cleanup of grow-from-root (GFR) samples in BART and BCF models 10 | * Avoid using covariate preprocessor in `computeForestLeafIndices` function when a `ForestSamples` object is provided (rather than a `bartmodel` or `bcfmodel` object) 11 | 12 | # stochtree 0.1.1 13 | 14 | * Fixed initialization bug in several R package code examples for random effects models 15 | 16 | # stochtree 0.1.0 17 | 18 | * Initial release on CRAN. 19 | * Support for sampling stochastic tree ensembles using two algorithms: MCMC and Grow-From-Root (GFR) 20 | * High-level model types supported: 21 | * Supervised learning with constant leaves or user-specified leaf regression models 22 | * Causal effect estimation with binary or continuous treatments 23 | * Additional high-level modeling features: 24 | * Forest-based variance function estimation (heteroskedasticity) 25 | * Additive (univariate or multivariate) group random effects 26 | * Multi-chain sampling and support for parallelism 27 | * "Warm-start" initialization of MCMC forest samplers via the Grow-From-Root (GFR) algorithm 28 | * Automated preprocessing / handling of categorical variables 29 | * Low-level interface: 30 | * Ability to combine a forest sampler with other (additive) model terms, without using C++ 31 | * Combine and sample an arbitrary number of forests or random effects terms 32 | -------------------------------------------------------------------------------- /R/calibration.R: -------------------------------------------------------------------------------- 1 | #' Calibrate the scale parameter on an inverse gamma prior for the global error variance as in Chipman et al (2022) 2 | #' 3 | #' Chipman, H., George, E., Hahn, R., McCulloch, R., Pratola, M. and Sparapani, R. (2022). Bayesian Additive Regression Trees, Computational Approaches. In Wiley StatsRef: Statistics Reference Online (eds N. Balakrishnan, T. Colton, B. Everitt, W. Piegorsch, F. Ruggeri and J.L. Teugels). https://doi.org/10.1002/9781118445112.stat08288 4 | #' 5 | #' @param y Outcome to be modeled using BART, BCF or another nonparametric ensemble method. 6 | #' @param X Covariates to be used to partition trees in an ensemble or series of ensemble. 7 | #' @param W (Optional) Basis used to define a "leaf regression" model for each decision tree. The "classic" BART model assumes a constant leaf parameter, which is equivalent to a "leaf regression" on a basis of all ones, though it is not necessary to pass a vector of ones, here or to the BART function. Default: `NULL`. 8 | #' @param nu The shape parameter for the global error variance's IG prior. The scale parameter in the Sparapani et al (2021) parameterization is defined as `nu*lambda` where `lambda` is the output of this function. Default: `3`. 9 | #' @param quant (Optional) Quantile of the inverse gamma prior distribution represented by a linear-regression-based overestimate of `sigma^2`. Default: `0.9`. 10 | #' @param standardize (Optional) Whether or not outcome should be standardized (`(y-mean(y))/sd(y)`) before calibration of `lambda`. Default: `TRUE`. 11 | #' 12 | #' @return Value of `lambda` which determines the scale parameter of the global error variance prior (`sigma^2 ~ IG(nu,nu*lambda)`) 13 | #' @export 14 | #' 15 | #' @examples 16 | #' n <- 100 17 | #' p <- 5 18 | #' X <- matrix(runif(n*p), ncol = p) 19 | #' y <- 10*X[,1] - 20*X[,2] + rnorm(n) 20 | #' nu <- 3 21 | #' lambda <- calibrateInverseGammaErrorVariance(y, X, nu = nu) 22 | #' sigma2hat <- mean(resid(lm(y~X))^2) 23 | #' mean(var(y)/rgamma(100000, nu, rate = nu*lambda) < sigma2hat) 24 | calibrateInverseGammaErrorVariance <- function(y, X, W = NULL, nu = 3, quant = 0.9, standardize = TRUE) { 25 | # Compute regression basis 26 | if (!is.null(W)) basis <- cbind(X, W) 27 | else basis <- X 28 | # Standardize outcome if requested 29 | if (standardize) y <- (y-mean(y))/sd(y) 30 | # Compute the "regression-based" overestimate of sigma^2 31 | sigma2hat <- mean(resid(lm(y~basis))^2) 32 | # Calibrate lambda based on the implied quantile of sigma2hat 33 | return((sigma2hat*qgamma(1-quant,nu))/nu) 34 | } 35 | -------------------------------------------------------------------------------- /R/generics.R: -------------------------------------------------------------------------------- 1 | #' Generic function for extracting random effect samples from a model object (BCF, BART, etc...) 2 | #' 3 | #' @param object Fitted model object from which to extract random effects 4 | #' @param ... Other parameters to be used in random effects extraction 5 | #' @return List of random effect samples 6 | #' @export 7 | #' 8 | #' @examples 9 | #' n <- 100 10 | #' p <- 10 11 | #' X <- matrix(runif(n*p), ncol = p) 12 | #' rfx_group_ids <- sample(1:2, size = n, replace = TRUE) 13 | #' rfx_basis <- rep(1.0, n) 14 | #' y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) 15 | #' bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids, 16 | #' rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10) 17 | #' rfx_samples <- getRandomEffectSamples(bart_model) 18 | getRandomEffectSamples <- function(object, ...) UseMethod("getRandomEffectSamples") 19 | -------------------------------------------------------------------------------- /R/stochtree-package.R: -------------------------------------------------------------------------------- 1 | ## usethis namespace: start 2 | #' @importFrom stats coef 3 | #' @importFrom stats dnorm 4 | #' @importFrom stats lm 5 | #' @importFrom stats model.matrix 6 | #' @importFrom stats predict 7 | #' @importFrom stats qgamma 8 | #' @importFrom stats qnorm 9 | #' @importFrom stats pnorm 10 | #' @importFrom stats resid 11 | #' @importFrom stats rnorm 12 | #' @importFrom stats runif 13 | #' @importFrom stats sd 14 | #' @importFrom stats sigma 15 | #' @importFrom stats var 16 | #' @importFrom R6 R6Class 17 | ## usethis namespace: end 18 | NULL 19 | 20 | #' @useDynLib stochtree, .registration = TRUE 21 | "_PACKAGE" -------------------------------------------------------------------------------- /R/variance.R: -------------------------------------------------------------------------------- 1 | #' Sample one iteration of the (inverse gamma) global variance model 2 | #' 3 | #' @param residual Outcome class 4 | #' @param dataset ForestDataset class 5 | #' @param rng C++ random number generator 6 | #' @param a Global variance shape parameter 7 | #' @param b Global variance scale parameter 8 | #' @return None 9 | #' @export 10 | #' 11 | #' @examples 12 | #' X <- matrix(runif(10*100), ncol = 10) 13 | #' y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) 14 | #' y_std <- (y-mean(y))/sd(y) 15 | #' forest_dataset <- createForestDataset(X) 16 | #' outcome <- createOutcome(y_std) 17 | #' rng <- createCppRNG(1234) 18 | #' a <- 1.0 19 | #' b <- 1.0 20 | #' sigma2 <- sampleGlobalErrorVarianceOneIteration(outcome, forest_dataset, rng, a, b) 21 | sampleGlobalErrorVarianceOneIteration <- function(residual, dataset, rng, a, b) { 22 | return(sample_sigma2_one_iteration_cpp(residual$data_ptr, dataset$data_ptr, rng$rng_ptr, a, b)) 23 | } 24 | 25 | #' Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!) 26 | #' 27 | #' @param forest C++ forest 28 | #' @param rng C++ random number generator 29 | #' @param a Leaf variance shape parameter 30 | #' @param b Leaf variance scale parameter 31 | #' @return None 32 | #' @export 33 | #' 34 | #' @examples 35 | #' num_trees <- 100 36 | #' leaf_dimension <- 1 37 | #' is_leaf_constant <- TRUE 38 | #' is_exponentiated <- FALSE 39 | #' active_forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) 40 | #' rng <- createCppRNG(1234) 41 | #' a <- 1.0 42 | #' b <- 1.0 43 | #' tau <- sampleLeafVarianceOneIteration(active_forest, rng, a, b) 44 | sampleLeafVarianceOneIteration <- function(forest, rng, a, b) { 45 | return(sample_tau_one_iteration_cpp(forest$forest_ptr, rng$rng_ptr, a, b)) 46 | } 47 | -------------------------------------------------------------------------------- /R_README.md: -------------------------------------------------------------------------------- 1 | # stochtree R package 2 | 3 | Software for building stochastic tree ensembles (i.e. BART, XBART) for supervised learning and causal inference. 4 | 5 | ## Getting started 6 | 7 | `stochtree` can be installed from CRAN via 8 | 9 | ``` 10 | install.packages("stochtree") 11 | ``` 12 | 13 | The development version of stochtree can be installed from github via 14 | 15 | ``` 16 | remotes::install_github("StochasticTree/stochtree", ref="r-dev") 17 | ``` 18 | -------------------------------------------------------------------------------- /cmake/Sanitizer.cmake: -------------------------------------------------------------------------------- 1 | # Set appropriate compiler and linker flags for sanitizers. 2 | # 3 | # Usage of this module: 4 | # enable_sanitizers("address;leak") 5 | 6 | # Add flags 7 | macro(enable_sanitizer sanitizer) 8 | if(${sanitizer} MATCHES "address") 9 | set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=address") 10 | 11 | elseif(${sanitizer} MATCHES "thread") 12 | set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=thread") 13 | 14 | elseif(${sanitizer} MATCHES "leak") 15 | set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=leak") 16 | 17 | elseif(${sanitizer} MATCHES "undefined") 18 | set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=undefined -fno-sanitize-recover=undefined") 19 | 20 | else() 21 | message(FATAL_ERROR "Santizer ${sanitizer} not supported.") 22 | endif() 23 | endmacro() 24 | 25 | macro(enable_sanitizers SANITIZERS) 26 | # Check sanitizers compatibility. 27 | foreach(_san ${SANITIZERS}) 28 | string(TOLOWER ${_san} _san) 29 | if(_san MATCHES "thread") 30 | if(${_use_other_sanitizers}) 31 | message(FATAL_ERROR "thread sanitizer is not compatible with ${_san} sanitizer.") 32 | endif() 33 | set(_use_thread_sanitizer 1) 34 | else() 35 | if(${_use_thread_sanitizer}) 36 | message(FATAL_ERROR "${_san} sanitizer is not compatible with thread sanitizer.") 37 | endif() 38 | set(_use_other_sanitizers 1) 39 | endif() 40 | endforeach() 41 | 42 | message(STATUS "Sanitizers: ${SANITIZERS}") 43 | 44 | foreach(_san ${SANITIZERS}) 45 | string(TOLOWER ${_san} _san) 46 | enable_sanitizer(${_san}) 47 | endforeach() 48 | message(STATUS "Sanitizers compile flags: ${SAN_COMPILE_FLAGS}") 49 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_COMPILE_FLAGS}") 50 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_COMPILE_FLAGS}") 51 | endmacro() 52 | -------------------------------------------------------------------------------- /cran-cleanup.R: -------------------------------------------------------------------------------- 1 | # Create the stochtree_cran folder 2 | cran_dir <- "stochtree_cran" 3 | if (dir.exists(cran_dir)) { 4 | # cran_subfolder_files <- list.files(cran_dir, recursive = TRUE, full.names = TRUE) 5 | unlink(cran_dir, recursive = TRUE) 6 | } -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 3 notes 4 | 5 | * This is a new release. 6 | * Checking installed package size ... NOTE installed size is 46.3Mb (linux-only) 7 | * Possibly misspelled words in DESCRIPTION: All of the words are proper nouns or technical terms (BCF, Carvalho, Chipman, McCulloch, XBART) 8 | 9 | ## CRAN comments (20250206) 10 | 11 | Below are responses to the initial comments received from CRAN on Feb 6, 2025 12 | 13 | ### Copyright 14 | 15 | > Please always add all authors, contributors and copyright holders in the Authors@R field with the appropriate roles." 16 | 17 | stochtree's C++ core has several vendored dependencies. The license and copyright details for each of these dependencies are delineated in the inst/COPYRIGHTS file. We have included the authors / contributors of each of these dependencies as copyright holders in the authors list of the DESCRIPTION file and also included a "Copyright:" section in the DESCRIPTION file explaining this. 18 | 19 | ### TRUE / FALSE 20 | 21 | > Please write TRUE and FALSE instead of T and F. 22 | 23 | We have converted `T` and `F` to `TRUE` and `FALSE` in the R code. 24 | 25 | ### Examples with commented code 26 | 27 | > Some code lines in examples are commented out. Please never do that. 28 | 29 | We no longer do this, and apologize for the oversight. 30 | 31 | ## CRAN comments (20250207) 32 | 33 | Below we address issues raised by CRAN on Feb 7, 2025 34 | 35 | ### Valgrind 36 | 37 | A valgrind-instrumented version of R exposed memory issues in several examples 38 | in the `stochtree` documentation. The specific issue is 39 | 40 | > Conditional jump or move depends on uninitialised value(s) 41 | 42 | The examples that triggered this were in fact working with Eigen matrices 43 | with uninitialized values. 44 | 45 | This has been corrected and we have verified that running the `stochtree` 46 | examples no longer produce this memcheck error. 47 | -------------------------------------------------------------------------------- /debug/README.md: -------------------------------------------------------------------------------- 1 | # Debugging 2 | 3 | This subdirectory contains a debug program for the C++ codebase. 4 | The program takes several command line arguments (in order): 5 | 6 | 1. Which data-generating process (DGP) to run (integer-coded, see below for a detailed description) 7 | 1. Which leaf model to sample (integer-coded, see below for a detailed description) 8 | 3. Whether or not to include random effects (0 = no, 1 = yes) 9 | 4. Number of grow-from-root (GFR) samples 10 | 5. Number of MCMC samples 11 | 6. Seed for random number generator (-1 means we defer to C++ `std::random_device`) 12 | 7. [Optional] name of data file to load for training, instead of simulating data (leave this blank as `""` if simulated data is desired) 13 | 8. [Optional] index of outcome column in data file (leave this blank as `0`) 14 | 9. [Optional] comma-delimited string of column indices of covariates (leave this blank as `""`) 15 | 10. [Optional] comma-delimited string of column indices of leaf regression bases (leave this blank as `""`) 16 | 17 | The DGPs are numbered as follows: 18 | 19 | 0. Simple leaf regression model with a univariate basis for the leaf model 20 | 1. Constant leaf model with a large number of deep interactions between features 21 | 2. Simple leaf regression model with a multivariate basis for the leaf model 22 | 3. Simple "variance-only" model with a mean of zero but covariate-moderated variance function 23 | 24 | The models are numbered as follows: 25 | 26 | 0. Constant leaf tree model (the "classic" BART / XBART model) 27 | 1. "Univariate basis" leaf regression model 28 | 2. "Multivariate basis" leaf regression model 29 | 3. Log linear heteroskedastic variance model 30 | 31 | For an example of how to run this progam for DGP 0, leaf model 1, no random effects, 10 GFR samples, 100 MCMC samples and a default seed (`-1`), run 32 | 33 | `./build/debugstochtree 0 1 0 10 100 -1 "" 0 "" ""` 34 | 35 | from the main `stochtree` project directory after building with `BUILD_DEBUG_TARGETS` set to `ON`. 36 | -------------------------------------------------------------------------------- /demo/debug/classification.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import pandas as pd 4 | import seaborn as sns 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.metrics import roc_curve, auc 7 | 8 | from stochtree import BARTModel 9 | 10 | # RNG 11 | rng = np.random.default_rng() 12 | 13 | # Generate covariates 14 | n = 1000 15 | p_X = 10 16 | X = rng.uniform(0, 1, (n, p_X)) 17 | 18 | 19 | # Define the outcome mean function 20 | def outcome_mean(X): 21 | return np.where( 22 | (X[:, 0] >= 0.0) & (X[:, 0] < 0.25), 23 | -7.5 * X[:, 1], 24 | np.where( 25 | (X[:, 0] >= 0.25) & (X[:, 0] < 0.5), 26 | -2.5 * X[:, 1], 27 | np.where((X[:, 0] >= 0.5) & (X[:, 0] < 0.75), 2.5 * X[:, 1], 7.5 * X[:, 1]), 28 | ), 29 | ) 30 | 31 | 32 | # Generate outcome 33 | epsilon = rng.normal(0, 1, n) 34 | z = outcome_mean(X) + epsilon 35 | y = np.where(z >= 0, 1, 0) 36 | 37 | # Test-train split 38 | sample_inds = np.arange(n) 39 | train_inds, test_inds = train_test_split(sample_inds, test_size=0.5) 40 | X_train = X[train_inds, :] 41 | X_test = X[test_inds, :] 42 | z_train = z[train_inds] 43 | z_test = z[test_inds] 44 | y_train = y[train_inds] 45 | y_test = y[test_inds] 46 | 47 | # Fit Probit BART 48 | bart_model = BARTModel() 49 | general_params = {"num_chains": 1} 50 | mean_forest_params = {"probit_outcome_model": True} 51 | bart_model.sample( 52 | X_train=X_train, 53 | y_train=y_train, 54 | X_test=X_test, 55 | num_gfr=10, 56 | num_mcmc=100, 57 | general_params=general_params, 58 | mean_forest_params=mean_forest_params 59 | ) 60 | -------------------------------------------------------------------------------- /demo/debug/kernel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from stochtree import Dataset, ForestContainer, compute_forest_leaf_indices 3 | 4 | # Create dataset 5 | X = np.array( 6 | [[1.5, 8.7, 1.2], 7 | [2.7, 3.4, 5.4], 8 | [3.6, 1.2, 9.3], 9 | [4.4, 5.4, 10.4], 10 | [5.3, 9.3, 3.6], 11 | [6.1, 10.4, 4.4]] 12 | ) 13 | n, p = X.shape 14 | num_trees = 2 15 | output_dim = 1 16 | forest_dataset = Dataset() 17 | forest_dataset.add_covariates(X) 18 | forest_samples = ForestContainer(num_trees, output_dim, True, False) 19 | 20 | # Initialize a forest with constant root predictions 21 | forest_samples.add_sample(0.) 22 | 23 | # Split the root of the first tree in the ensemble at X[,1] > 4.0 24 | forest_samples.add_numeric_split(0, 0, 0, 0, 4.0, -5., 5.) 25 | 26 | # Check that regular and "raw" predictions are the same (since the leaf is constant) 27 | computed_indices = compute_forest_leaf_indices(forest_samples, X) 28 | 29 | # Split the left leaf of the first tree in the ensemble at X[,2] > 4.0 30 | forest_samples.add_numeric_split(0, 0, 1, 1, 4.0, -7.5, -2.5) 31 | 32 | # Check that regular and "raw" predictions are the same (since the leaf is constant) 33 | computed_indices = compute_forest_leaf_indices(forest_samples, X) 34 | -------------------------------------------------------------------------------- /demo/debug/multivariate_treatment_causal_inference.py: -------------------------------------------------------------------------------- 1 | # Load necessary libraries 2 | import numpy as np 3 | import pandas as pd 4 | import seaborn as sns 5 | import matplotlib.pyplot as plt 6 | from stochtree import BCFModel 7 | from sklearn.model_selection import train_test_split 8 | 9 | # RNG 10 | rng = np.random.default_rng() 11 | 12 | # Generate covariates and basis 13 | n = 1000 14 | p_X = 5 15 | X = rng.uniform(0, 1, (n, p_X)) 16 | pi_X = 0.25 + 0.5*X[:,0] 17 | Z = rng.uniform(0, 1, (n, 2)) 18 | 19 | # Define the outcome mean functions (prognostic and treatment effects) 20 | mu_X = pi_X*5 + 2*X[:,2] 21 | tau_X = np.stack((X[:,1], X[:,2]), axis=-1) 22 | 23 | # Generate outcome 24 | epsilon = rng.normal(0, 1, n) 25 | treatment_term = np.multiply(tau_X, Z).sum(axis=1) 26 | y = mu_X + treatment_term + epsilon 27 | 28 | # Test-train split 29 | sample_inds = np.arange(n) 30 | train_inds, test_inds = train_test_split(sample_inds, test_size=0.5) 31 | X_train = X[train_inds,:] 32 | X_test = X[test_inds,:] 33 | Z_train = Z[train_inds,:] 34 | Z_test = Z[test_inds,:] 35 | y_train = y[train_inds] 36 | y_test = y[test_inds] 37 | pi_train = pi_X[train_inds] 38 | pi_test = pi_X[test_inds] 39 | mu_train = mu_X[train_inds] 40 | mu_test = mu_X[test_inds] 41 | tau_train = tau_X[train_inds,:] 42 | tau_test = tau_X[test_inds,:] 43 | 44 | # Run BCF 45 | bcf_model = BCFModel() 46 | bcf_model.sample(X_train, Z_train, y_train, pi_train, X_test, Z_test, pi_test, num_gfr=10, num_mcmc=100) 47 | -------------------------------------------------------------------------------- /demo/debug/r_comparison_debug.py: -------------------------------------------------------------------------------- 1 | # R Comparison Demo Script 2 | 3 | # Load necessary libraries 4 | import numpy as np 5 | import pandas as pd 6 | from stochtree import BARTModel 7 | 8 | # Load data 9 | df = pd.read_csv("debug/data/heterosked_train.csv") 10 | y = df.loc[:,'y'].to_numpy() 11 | X = df.loc[:,['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10']].to_numpy() 12 | y = y.astype(np.float64) 13 | X = X.astype(np.float64) 14 | 15 | # Run BART 16 | bart_model = BARTModel() 17 | bart_model.sample(X_train=X, y_train=y, num_gfr=0, num_mcmc=10, general_params={'random_seed': 1234, 'standardize': False, 'sample_sigma2_global': True}) 18 | 19 | # Inspect the MCMC (BART) samples 20 | y_avg_mcmc = np.squeeze(bart_model.y_hat_train).mean(axis = 1, keepdims = True) 21 | print(y_avg_mcmc[:20]) 22 | print(bart_model.global_var_samples) 23 | -------------------------------------------------------------------------------- /demo/debug/random_effects.py: -------------------------------------------------------------------------------- 1 | # Random Effects Demo Script 2 | 3 | # Load necessary libraries 4 | import numpy as np 5 | import pandas as pd 6 | import seaborn as sns 7 | import matplotlib.pyplot as plt 8 | from stochtree import ( 9 | RandomEffectsContainer, 10 | RandomEffectsDataset, 11 | RandomEffectsModel, 12 | RandomEffectsTracker, 13 | Residual, 14 | RNG, 15 | ) 16 | # from sklearn.model_selection import train_test_split 17 | 18 | # Generate sample data 19 | # RNG 20 | random_seed = 1234 21 | rng = np.random.default_rng(random_seed) 22 | 23 | # Generate group labels and random effects basis 24 | num_observations = 1000 25 | num_basis = 2 26 | num_groups = 4 27 | group_labels = rng.choice(num_groups, size=num_observations) 28 | basis = np.empty((num_observations, num_basis)) 29 | basis[:, 0] = 1.0 30 | if num_basis > 1: 31 | basis[:, 1:] = rng.uniform(-1, 1, (num_observations, num_basis - 1)) 32 | 33 | 34 | # Define the group rfx function 35 | def outcome_mean(group_labels, basis): 36 | return np.where( 37 | group_labels == 0, 38 | 0 - 1 * basis[:, 1], 39 | np.where( 40 | group_labels == 1, 41 | 4 + 1 * basis[:, 1], 42 | np.where(group_labels == 2, 8 + 3 * basis[:, 1], 12 + 5 * basis[:, 1]), 43 | ), 44 | ) 45 | 46 | 47 | # Generate outcome 48 | epsilon = rng.normal(0, 1, num_observations) 49 | rfx_term = outcome_mean(group_labels, basis) 50 | y = rfx_term + epsilon 51 | 52 | # Standardize outcome 53 | y_bar = np.mean(y) 54 | y_std = np.std(y) 55 | resid = (y - y_bar) / y_std 56 | 57 | # Construct python objects used for rfx sampling 58 | outcome = Residual(resid) 59 | rfx_dataset = RandomEffectsDataset() 60 | rfx_dataset.add_group_labels(group_labels) 61 | rfx_dataset.add_basis(basis) 62 | rfx_tracker = RandomEffectsTracker(group_labels) 63 | rfx_model = RandomEffectsModel(num_basis, num_groups) 64 | rfx_model.set_working_parameter(np.ones(num_basis)) 65 | rfx_model.set_group_parameters(np.ones((num_basis, num_groups))) 66 | rfx_model.set_working_parameter_covariance(np.identity(num_basis)) 67 | rfx_model.set_group_parameter_covariance(np.identity(num_basis)) 68 | rfx_model.set_variance_prior_shape(1.0) 69 | rfx_model.set_variance_prior_scale(1.0) 70 | rfx_container = RandomEffectsContainer(num_basis, num_groups, rfx_tracker) 71 | # cpp_rng = RNG(random_seed) 72 | cpp_rng = RNG() 73 | 74 | # Sample the model 75 | rfx_model.sample(rfx_dataset, outcome, rfx_tracker, rfx_container, True, 1.0, cpp_rng) 76 | 77 | # Inspect the samples 78 | rfx_preds = rfx_container.predict(group_labels, basis) * y_std + y_bar 79 | rfx_comparison_df = pd.DataFrame( 80 | np.concatenate((rfx_preds, np.expand_dims(rfx_term, axis=1)), axis=1), 81 | columns=["Predicted", "Actual"], 82 | ) 83 | sns.scatterplot(data=rfx_comparison_df, x="Predicted", y="Actual") 84 | plt.axline((0, 0), slope=1, color="black", linestyle=(0, (3, 3))) 85 | plt.show() 86 | -------------------------------------------------------------------------------- /demo/debug/rfx_serialization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from stochtree import BARTModel 3 | 4 | # RNG 5 | random_seed = 1234 6 | rng = np.random.default_rng(random_seed) 7 | 8 | # Generate covariates and basis 9 | n = 1000 10 | p_X = 10 11 | p_W = 1 12 | X = rng.uniform(0, 1, (n, p_X)) 13 | W = rng.uniform(0, 1, (n, p_W)) 14 | 15 | # Generate random effects terms 16 | num_basis = 2 17 | num_groups = 4 18 | group_labels = rng.choice(num_groups, size=n) 19 | basis = np.empty((n, num_basis)) 20 | basis[:, 0] = 1.0 21 | if num_basis > 1: 22 | basis[:, 1:] = rng.uniform(-1, 1, (n, num_basis - 1)) 23 | 24 | # Define the outcome mean function 25 | def outcome_mean(X, W): 26 | return np.where( 27 | (X[:,0] >= 0.0) & (X[:,0] < 0.25), -7.5 * W[:,0], 28 | np.where( 29 | (X[:,0] >= 0.25) & (X[:,0] < 0.5), -2.5 * W[:,0], 30 | np.where( 31 | (X[:,0] >= 0.5) & (X[:,0] < 0.75), 2.5 * W[:,0], 32 | 7.5 * W[:,0] 33 | ) 34 | ) 35 | ) 36 | 37 | # Define the group rfx function 38 | def rfx_mean(group_labels, basis): 39 | return np.where( 40 | group_labels == 0, 41 | 0 - 1 * basis[:, 1], 42 | np.where( 43 | group_labels == 1, 44 | 4 + 1 * basis[:, 1], 45 | np.where( 46 | group_labels == 2, 8 + 3 * basis[:, 1], 12 + 5 * basis[:, 1] 47 | ), 48 | ), 49 | ) 50 | 51 | # Generate outcome 52 | epsilon = rng.normal(0, 1, n) 53 | forest_term = outcome_mean(X, W) 54 | rfx_term = rfx_mean(group_labels, basis) 55 | y = forest_term + rfx_term + epsilon 56 | 57 | # Run BART 58 | bart_orig = BARTModel() 59 | bart_orig.sample(X_train=X, y_train=y, leaf_basis_train=W, rfx_group_ids_train=group_labels, 60 | rfx_basis_train=basis, num_gfr=10, num_mcmc=10) 61 | 62 | # Extract predictions from the sampler 63 | y_hat_orig = bart_orig.predict(X, W, group_labels, basis) 64 | 65 | # "Round-trip" the model to JSON string and back and check that the predictions agree 66 | bart_json_string = bart_orig.to_json() 67 | bart_reloaded = BARTModel() 68 | bart_reloaded.from_json(bart_json_string) 69 | y_hat_reloaded = bart_reloaded.predict(X, W, group_labels, basis) 70 | np.testing.assert_almost_equal(y_hat_orig, y_hat_reloaded) -------------------------------------------------------------------------------- /include/stochtree/export.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Export macros ensure that the C++ code can be used as a library cross-platform 3 | * (declspec needed to load names from a DLL on windows) and can be wrapped in a 4 | * C program. 5 | * 6 | * This code modifies (changing names of) the export macros in LightGBM, which carries 7 | * the following copyright information: 8 | * 9 | * Copyright (c) 2017 Microsoft Corporation. All rights reserved. 10 | * Licensed under the MIT License. See LICENSE file in the project root for license information. 11 | */ 12 | #ifndef STOCHTREE_EXPORT_H_ 13 | #define STOCHTREE_EXPORT_H_ 14 | 15 | /** Macros for exporting symbols in MSVC/GCC/CLANG **/ 16 | 17 | #ifdef __cplusplus 18 | #define STOCHTREE_EXTERN_C extern "C" 19 | #else 20 | #define STOCHTREE_EXTERN_C 21 | #endif 22 | 23 | #ifdef _MSC_VER 24 | #define STOCHTREE_EXPORT __declspec(dllexport) 25 | #define STOCHTREE_C_EXPORT STOCHTREE_EXTERN_C __declspec(dllexport) 26 | #else 27 | #define STOCHTREE_EXPORT __attribute__ ((visibility ("default"))) 28 | #define STOCHTREE_C_EXPORT STOCHTREE_EXTERN_C __attribute__ ((visibility ("default"))) 29 | #endif 30 | 31 | #endif /** STOCHTREE_EXPORT_H_ **/ 32 | -------------------------------------------------------------------------------- /include/stochtree/gamma_sampler.h: -------------------------------------------------------------------------------- 1 | /*! Copyright (c) 2024 stochtree authors. All rights reserved. */ 2 | #ifndef STOCHTREE_GAMMA_SAMPLER_H_ 3 | #define STOCHTREE_GAMMA_SAMPLER_H_ 4 | 5 | #include 6 | 7 | namespace StochTree { 8 | 9 | class GammaSampler { 10 | public: 11 | GammaSampler() {} 12 | ~GammaSampler() {} 13 | double Sample(double a, double b, std::mt19937& gen, bool rate_param = true) { 14 | double scale = rate_param ? 1./b : b; 15 | gamma_dist_ = std::gamma_distribution(a, scale); 16 | return gamma_dist_(gen); 17 | } 18 | private: 19 | /*! \brief Standard normal distribution */ 20 | std::gamma_distribution gamma_dist_; 21 | }; 22 | 23 | } // namespace StochTree 24 | 25 | #endif // STOCHTREE_IG_SAMPLER_H_ -------------------------------------------------------------------------------- /include/stochtree/ig_sampler.h: -------------------------------------------------------------------------------- 1 | /*! Copyright (c) 2024 stochtree authors. All rights reserved. */ 2 | #ifndef STOCHTREE_IG_SAMPLER_H_ 3 | #define STOCHTREE_IG_SAMPLER_H_ 4 | 5 | #include 6 | 7 | namespace StochTree { 8 | 9 | class InverseGammaSampler { 10 | public: 11 | InverseGammaSampler() {} 12 | ~InverseGammaSampler() {} 13 | double Sample(double a, double b, std::mt19937& gen, bool scale_param = true) { 14 | // C++ standard library provides a gamma distribution with scale 15 | // parameter, but the correspondence between gamma and IG is that 16 | // 1 / gamma(a,b) ~ IG(a,b) when b is a __rate__ parameter. 17 | // Before sampling, we convert ig_scale to a gamma scale parameter by 18 | // taking its multiplicative inverse. 19 | double gamma_scale = scale_param ? 1./b : b; 20 | gamma_dist_ = std::gamma_distribution(a, gamma_scale); 21 | return (1/gamma_dist_(gen)); 22 | } 23 | private: 24 | /*! \brief Standard normal distribution */ 25 | std::gamma_distribution gamma_dist_; 26 | }; 27 | 28 | } // namespace StochTree 29 | 30 | #endif // STOCHTREE_IG_SAMPLER_H_ -------------------------------------------------------------------------------- /include/stochtree/normal_sampler.h: -------------------------------------------------------------------------------- 1 | /*! Copyright (c) 2024 stochtree authors. All rights reserved. */ 2 | #ifndef STOCHTREE_NORMAL_SAMPLER_H_ 3 | #define STOCHTREE_NORMAL_SAMPLER_H_ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace StochTree { 11 | 12 | class UnivariateNormalSampler { 13 | public: 14 | UnivariateNormalSampler() {std_normal_dist_ = std::normal_distribution(0.,1.);} 15 | ~UnivariateNormalSampler() {} 16 | double Sample(double mean, double variance, std::mt19937& gen) { 17 | return mean + std::sqrt(variance) * std_normal_dist_(gen); 18 | } 19 | private: 20 | /*! \brief Standard normal distribution */ 21 | std::normal_distribution std_normal_dist_; 22 | }; 23 | 24 | class MultivariateNormalSampler { 25 | public: 26 | MultivariateNormalSampler() {std_normal_dist_ = std::normal_distribution(0.,1.);} 27 | ~MultivariateNormalSampler() {} 28 | std::vector Sample(Eigen::VectorXd& mean, Eigen::MatrixXd& covariance, std::mt19937& gen) { 29 | // Dimension extraction and checks 30 | int mean_cols = mean.size(); 31 | int cov_rows = covariance.rows(); 32 | int cov_cols = covariance.cols(); 33 | CHECK_EQ(mean_cols, cov_cols); 34 | 35 | // Variance cholesky decomposition 36 | Eigen::LLT decomposition(covariance); 37 | Eigen::MatrixXd covariance_chol = decomposition.matrixL(); 38 | 39 | // Sample a vector of standard normal random variables 40 | Eigen::VectorXd std_norm_vec(cov_rows); 41 | for (int i = 0; i < cov_rows; i++) { 42 | std_norm_vec(i) = std_normal_dist_(gen); 43 | } 44 | 45 | // Compute and return the sampled value 46 | Eigen::VectorXd sampled_values_raw = mean + covariance_chol * std_norm_vec; 47 | std::vector result(cov_rows); 48 | for (int i = 0; i < cov_rows; i++) { 49 | result[i] = sampled_values_raw(i, 0); 50 | } 51 | return result; 52 | } 53 | Eigen::VectorXd SampleEigen(Eigen::VectorXd& mean, Eigen::MatrixXd& covariance, std::mt19937& gen) { 54 | // Dimension extraction and checks 55 | int mean_cols = mean.size(); 56 | int cov_rows = covariance.rows(); 57 | int cov_cols = covariance.cols(); 58 | CHECK_EQ(mean_cols, cov_cols); 59 | 60 | // Variance cholesky decomposition 61 | Eigen::LLT decomposition(covariance); 62 | Eigen::MatrixXd covariance_chol = decomposition.matrixL(); 63 | 64 | // Sample a vector of standard normal random variables 65 | Eigen::VectorXd std_norm_vec(cov_rows); 66 | for (int i = 0; i < cov_rows; i++) { 67 | std_norm_vec(i) = std_normal_dist_(gen); 68 | } 69 | 70 | // Compute and return the sampled value 71 | return mean + covariance_chol * std_norm_vec; 72 | } 73 | private: 74 | /*! \brief Standard normal distribution */ 75 | std::normal_distribution std_normal_dist_; 76 | }; 77 | 78 | } // namespace StochTree 79 | 80 | #endif // STOCHTREE_NORMAL_SAMPLER_H_ -------------------------------------------------------------------------------- /include/stochtree/prior.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2024 stochtree authors. All rights reserved. 3 | * Licensed under the MIT License. See LICENSE file in the project root for license information. 4 | */ 5 | #ifndef STOCHTREE_PRIOR_H_ 6 | #define STOCHTREE_PRIOR_H_ 7 | 8 | #include 9 | #include 10 | 11 | namespace StochTree { 12 | 13 | class RandomEffectsGaussianPrior { 14 | public: 15 | RandomEffectsGaussianPrior() {} 16 | virtual ~RandomEffectsGaussianPrior() = default; 17 | }; 18 | 19 | class RandomEffectsRegressionGaussianPrior : public RandomEffectsGaussianPrior { 20 | public: 21 | RandomEffectsRegressionGaussianPrior(double a, double b, int32_t num_components, int32_t num_groups) { 22 | a_ = a; 23 | b_ = b; 24 | num_components_ = num_components; 25 | num_groups_ = num_groups; 26 | } 27 | ~RandomEffectsRegressionGaussianPrior() {} 28 | double GetPriorVarianceShape() {return a_;} 29 | double GetPriorVarianceScale() {return b_;} 30 | int32_t GetNumComponents() {return num_components_;} 31 | int32_t GetNumGroups() {return num_groups_;} 32 | void SetPriorVarianceShape(double a) {a_ = a;} 33 | void SetPriorVarianceScale(double b) {b_ = b;} 34 | void SetNumComponents(int32_t num_components) {num_components_ = num_components;} 35 | void SetNumGroups(int32_t num_groups) {num_groups_ = num_groups;} 36 | private: 37 | double a_; 38 | double b_; 39 | int32_t num_components_; 40 | int32_t num_groups_; 41 | }; 42 | 43 | class TreePrior { 44 | public: 45 | TreePrior(double alpha, double beta, int32_t min_samples_in_leaf, int32_t max_depth = -1) { 46 | alpha_ = alpha; 47 | beta_ = beta; 48 | min_samples_in_leaf_ = min_samples_in_leaf; 49 | max_depth_ = max_depth; 50 | } 51 | ~TreePrior() {} 52 | double GetAlpha() {return alpha_;} 53 | double GetBeta() {return beta_;} 54 | int32_t GetMinSamplesLeaf() {return min_samples_in_leaf_;} 55 | int32_t GetMaxDepth() {return max_depth_;} 56 | void SetAlpha(double alpha) {alpha_ = alpha;} 57 | void SetBeta(double beta) {beta_ = beta;} 58 | void SetMinSamplesLeaf(int32_t min_samples_in_leaf) {min_samples_in_leaf_ = min_samples_in_leaf;} 59 | void SetMaxDepth(int32_t max_depth) {max_depth_ = max_depth;} 60 | private: 61 | double alpha_; 62 | double beta_; 63 | int32_t min_samples_in_leaf_; 64 | int32_t max_depth_; 65 | }; 66 | 67 | class IGVariancePrior { 68 | public: 69 | IGVariancePrior(double shape, double scale) { 70 | shape_ = shape; 71 | scale_ = scale; 72 | } 73 | ~IGVariancePrior() {} 74 | double GetShape() {return shape_;} 75 | double GetScale() {return scale_;} 76 | void SetShape(double shape) {shape_ = shape;} 77 | void SetScale(double scale) {scale_ = scale;} 78 | private: 79 | double shape_; 80 | double scale_; 81 | }; 82 | 83 | } // namespace StochTree 84 | 85 | #endif // STOCHTREE_PRIOR_H_ -------------------------------------------------------------------------------- /man/CppRNG.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model.R 3 | \name{CppRNG} 4 | \alias{CppRNG} 5 | \title{Class that wraps a C++ random number generator (for reproducibility)} 6 | \description{ 7 | Persists a C++ random number generator throughout an R session to 8 | ensure reproducibility from a given random seed. If no seed is provided, 9 | the C++ random number generator is initialized using \code{std::random_device}. 10 | } 11 | \section{Public fields}{ 12 | \if{html}{\out{
}} 13 | \describe{ 14 | \item{\code{rng_ptr}}{External pointer to a C++ std::mt19937 class} 15 | } 16 | \if{html}{\out{
}} 17 | } 18 | \section{Methods}{ 19 | \subsection{Public methods}{ 20 | \itemize{ 21 | \item \href{#method-CppRNG-new}{\code{CppRNG$new()}} 22 | } 23 | } 24 | \if{html}{\out{
}} 25 | \if{html}{\out{}} 26 | \if{latex}{\out{\hypertarget{method-CppRNG-new}{}}} 27 | \subsection{Method \code{new()}}{ 28 | Create a new CppRNG object. 29 | \subsection{Usage}{ 30 | \if{html}{\out{
}}\preformatted{CppRNG$new(random_seed = -1)}\if{html}{\out{
}} 31 | } 32 | 33 | \subsection{Arguments}{ 34 | \if{html}{\out{
}} 35 | \describe{ 36 | \item{\code{random_seed}}{(Optional) random seed for sampling} 37 | } 38 | \if{html}{\out{
}} 39 | } 40 | \subsection{Returns}{ 41 | A new \code{CppRNG} object. 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /man/GlobalModelConfig.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/config.R 3 | \name{GlobalModelConfig} 4 | \alias{GlobalModelConfig} 5 | \title{Object used to get / set global parameters and other global model 6 | configuration options in the "low-level" stochtree interface} 7 | \value{ 8 | Global error variance parameter 9 | } 10 | \description{ 11 | The "low-level" stochtree interface enables a high degreee of sampler 12 | customization, in which users employ R wrappers around C++ objects 13 | like ForestDataset, Outcome, CppRng, and ForestModel to run the 14 | Gibbs sampler of a BART model with custom modifications. 15 | GlobalModelConfig allows users to specify / query the global parameters 16 | of a model they wish to run. 17 | } 18 | \section{Public fields}{ 19 | \if{html}{\out{
}} 20 | \describe{ 21 | \item{\code{global_error_variance}}{Global error variance parameter 22 | Create a new GlobalModelConfig object.} 23 | } 24 | \if{html}{\out{
}} 25 | } 26 | \section{Methods}{ 27 | \subsection{Public methods}{ 28 | \itemize{ 29 | \item \href{#method-GlobalModelConfig-new}{\code{GlobalModelConfig$new()}} 30 | \item \href{#method-GlobalModelConfig-update_global_error_variance}{\code{GlobalModelConfig$update_global_error_variance()}} 31 | \item \href{#method-GlobalModelConfig-get_global_error_variance}{\code{GlobalModelConfig$get_global_error_variance()}} 32 | } 33 | } 34 | \if{html}{\out{
}} 35 | \if{html}{\out{}} 36 | \if{latex}{\out{\hypertarget{method-GlobalModelConfig-new}{}}} 37 | \subsection{Method \code{new()}}{ 38 | \subsection{Usage}{ 39 | \if{html}{\out{
}}\preformatted{GlobalModelConfig$new(global_error_variance = 1)}\if{html}{\out{
}} 40 | } 41 | 42 | \subsection{Arguments}{ 43 | \if{html}{\out{
}} 44 | \describe{ 45 | \item{\code{global_error_variance}}{Global error variance parameter (default: \code{1.0})} 46 | } 47 | \if{html}{\out{
}} 48 | } 49 | \subsection{Returns}{ 50 | A new GlobalModelConfig object. 51 | } 52 | } 53 | \if{html}{\out{
}} 54 | \if{html}{\out{}} 55 | \if{latex}{\out{\hypertarget{method-GlobalModelConfig-update_global_error_variance}{}}} 56 | \subsection{Method \code{update_global_error_variance()}}{ 57 | Update global error variance parameter 58 | \subsection{Usage}{ 59 | \if{html}{\out{
}}\preformatted{GlobalModelConfig$update_global_error_variance(global_error_variance)}\if{html}{\out{
}} 60 | } 61 | 62 | \subsection{Arguments}{ 63 | \if{html}{\out{
}} 64 | \describe{ 65 | \item{\code{global_error_variance}}{Global error variance parameter} 66 | } 67 | \if{html}{\out{
}} 68 | } 69 | } 70 | \if{html}{\out{
}} 71 | \if{html}{\out{}} 72 | \if{latex}{\out{\hypertarget{method-GlobalModelConfig-get_global_error_variance}{}}} 73 | \subsection{Method \code{get_global_error_variance()}}{ 74 | Query global error variance parameter for this GlobalModelConfig object 75 | \subsection{Usage}{ 76 | \if{html}{\out{
}}\preformatted{GlobalModelConfig$get_global_error_variance()}\if{html}{\out{
}} 77 | } 78 | 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /man/RandomEffectsTracker.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/random_effects.R 3 | \name{RandomEffectsTracker} 4 | \alias{RandomEffectsTracker} 5 | \title{Class that defines a "tracker" for random effects models, most notably 6 | storing the data indices available in each group for quicker posterior 7 | computation and sampling of random effects terms.} 8 | \description{ 9 | Stores a mapping from every observation to its group index, a mapping 10 | from group indices to the training sample observations available in that 11 | group, and predictions for each observation. 12 | } 13 | \section{Public fields}{ 14 | \if{html}{\out{
}} 15 | \describe{ 16 | \item{\code{rfx_tracker_ptr}}{External pointer to a C++ StochTree::RandomEffectsTracker class} 17 | } 18 | \if{html}{\out{
}} 19 | } 20 | \section{Methods}{ 21 | \subsection{Public methods}{ 22 | \itemize{ 23 | \item \href{#method-RandomEffectsTracker-new}{\code{RandomEffectsTracker$new()}} 24 | } 25 | } 26 | \if{html}{\out{
}} 27 | \if{html}{\out{}} 28 | \if{latex}{\out{\hypertarget{method-RandomEffectsTracker-new}{}}} 29 | \subsection{Method \code{new()}}{ 30 | Create a new RandomEffectsTracker object. 31 | \subsection{Usage}{ 32 | \if{html}{\out{
}}\preformatted{RandomEffectsTracker$new(rfx_group_indices)}\if{html}{\out{
}} 33 | } 34 | 35 | \subsection{Arguments}{ 36 | \if{html}{\out{
}} 37 | \describe{ 38 | \item{\code{rfx_group_indices}}{Integer indices indicating groups used to define random effects} 39 | } 40 | \if{html}{\out{
}} 41 | } 42 | \subsection{Returns}{ 43 | A new \code{RandomEffectsTracker} object. 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /man/calibrateInverseGammaErrorVariance.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calibration.R 3 | \name{calibrateInverseGammaErrorVariance} 4 | \alias{calibrateInverseGammaErrorVariance} 5 | \title{Calibrate the scale parameter on an inverse gamma prior for the global error variance as in Chipman et al (2022)} 6 | \usage{ 7 | calibrateInverseGammaErrorVariance( 8 | y, 9 | X, 10 | W = NULL, 11 | nu = 3, 12 | quant = 0.9, 13 | standardize = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{y}{Outcome to be modeled using BART, BCF or another nonparametric ensemble method.} 18 | 19 | \item{X}{Covariates to be used to partition trees in an ensemble or series of ensemble.} 20 | 21 | \item{W}{(Optional) Basis used to define a "leaf regression" model for each decision tree. The "classic" BART model assumes a constant leaf parameter, which is equivalent to a "leaf regression" on a basis of all ones, though it is not necessary to pass a vector of ones, here or to the BART function. Default: \code{NULL}.} 22 | 23 | \item{nu}{The shape parameter for the global error variance's IG prior. The scale parameter in the Sparapani et al (2021) parameterization is defined as \code{nu*lambda} where \code{lambda} is the output of this function. Default: \code{3}.} 24 | 25 | \item{quant}{(Optional) Quantile of the inverse gamma prior distribution represented by a linear-regression-based overestimate of \code{sigma^2}. Default: \code{0.9}.} 26 | 27 | \item{standardize}{(Optional) Whether or not outcome should be standardized (\code{(y-mean(y))/sd(y)}) before calibration of \code{lambda}. Default: \code{TRUE}.} 28 | } 29 | \value{ 30 | Value of \code{lambda} which determines the scale parameter of the global error variance prior (\code{sigma^2 ~ IG(nu,nu*lambda)}) 31 | } 32 | \description{ 33 | Chipman, H., George, E., Hahn, R., McCulloch, R., Pratola, M. and Sparapani, R. (2022). Bayesian Additive Regression Trees, Computational Approaches. In Wiley StatsRef: Statistics Reference Online (eds N. Balakrishnan, T. Colton, B. Everitt, W. Piegorsch, F. Ruggeri and J.L. Teugels). https://doi.org/10.1002/9781118445112.stat08288 34 | } 35 | \examples{ 36 | n <- 100 37 | p <- 5 38 | X <- matrix(runif(n*p), ncol = p) 39 | y <- 10*X[,1] - 20*X[,2] + rnorm(n) 40 | nu <- 3 41 | lambda <- calibrateInverseGammaErrorVariance(y, X, nu = nu) 42 | sigma2hat <- mean(resid(lm(y~X))^2) 43 | mean(var(y)/rgamma(100000, nu, rate = nu*lambda) < sigma2hat) 44 | } 45 | -------------------------------------------------------------------------------- /man/computeForestLeafIndices.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kernel.R 3 | \name{computeForestLeafIndices} 4 | \alias{computeForestLeafIndices} 5 | \title{Compute vector of forest leaf indices} 6 | \usage{ 7 | computeForestLeafIndices( 8 | model_object, 9 | covariates, 10 | forest_type = NULL, 11 | propensity = NULL, 12 | forest_inds = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{model_object}{Object of type \code{bartmodel}, \code{bcfmodel}, or \code{ForestSamples} corresponding to a BART / BCF model with at least one forest sample, or a low-level \code{ForestSamples} object.} 17 | 18 | \item{covariates}{Covariates to use for prediction. Must have the same dimensions / column types as the data used to train a forest.} 19 | 20 | \item{forest_type}{Which forest to use from \code{model_object}. 21 | Valid inputs depend on the model type, and whether or not a given forest was sampled in that model. 22 | 23 | \strong{1. BART} 24 | \itemize{ 25 | \item \code{'mean'}: Extracts leaf indices for the mean forest 26 | \item \code{'variance'}: Extracts leaf indices for the variance forest 27 | } 28 | 29 | \strong{2. BCF} 30 | \itemize{ 31 | \item \code{'prognostic'}: Extracts leaf indices for the prognostic forest 32 | \item \code{'treatment'}: Extracts leaf indices for the treatment effect forest 33 | \item \code{'variance'}: Extracts leaf indices for the variance forest 34 | } 35 | 36 | \strong{3. ForestSamples} 37 | \itemize{ 38 | \item \code{NULL}: It is not necessary to disambiguate when this function is called directly on a \code{ForestSamples} object. This is the default value of this 39 | }} 40 | 41 | \item{propensity}{(Optional) Propensities used for prediction (BCF-only).} 42 | 43 | \item{forest_inds}{(Optional) Indices of the forest sample(s) for which to compute leaf indices. If not provided, 44 | this function will return leaf indices for every sample of a forest. 45 | This function uses 0-indexing, so the first forest sample corresponds to \code{forest_num = 0}, and so on.} 46 | } 47 | \value{ 48 | Vector of size \code{num_obs * num_trees}, where \code{num_obs = nrow(covariates)} 49 | and \code{num_trees} is the number of trees in the relevant forest of \code{model_object}. 50 | } 51 | \description{ 52 | Compute and return a vector representation of a forest's leaf predictions for 53 | every observation in a dataset. 54 | 55 | The vector has a "row-major" format that can be easily re-represented as 56 | as a CSR sparse matrix: elements are organized so that the first \code{n} elements 57 | correspond to leaf predictions for all \code{n} observations in a dataset for the 58 | first tree in an ensemble, the next \code{n} elements correspond to predictions for 59 | the second tree and so on. The "data" for each element corresponds to a uniquely 60 | mapped column index that corresponds to a single leaf of a single tree (i.e. 61 | if tree 1 has 3 leaves, its column indices range from 0 to 2, and then tree 2's 62 | leaf indices begin at 3, etc...). 63 | } 64 | \examples{ 65 | X <- matrix(runif(10*100), ncol = 10) 66 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) 67 | bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) 68 | computeForestLeafIndices(bart_model, X, "mean") 69 | computeForestLeafIndices(bart_model, X, "mean", 0) 70 | computeForestLeafIndices(bart_model, X, "mean", c(1,3,9)) 71 | } 72 | -------------------------------------------------------------------------------- /man/computeForestLeafVariances.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kernel.R 3 | \name{computeForestLeafVariances} 4 | \alias{computeForestLeafVariances} 5 | \title{Compute vector of forest leaf scale parameters} 6 | \usage{ 7 | computeForestLeafVariances(model_object, forest_type, forest_inds = NULL) 8 | } 9 | \arguments{ 10 | \item{model_object}{Object of type \code{bartmodel} or \code{bcfmodel} corresponding to a BART / BCF model with at least one forest sample} 11 | 12 | \item{forest_type}{Which forest to use from \code{model_object}. 13 | Valid inputs depend on the model type, and whether or not a given forest was sampled in that model. 14 | 15 | \strong{1. BART} 16 | \itemize{ 17 | \item \code{'mean'}: Extracts leaf indices for the mean forest 18 | \item \code{'variance'}: Extracts leaf indices for the variance forest 19 | } 20 | 21 | \strong{2. BCF} 22 | \itemize{ 23 | \item \code{'prognostic'}: Extracts leaf indices for the prognostic forest 24 | \item \code{'treatment'}: Extracts leaf indices for the treatment effect forest 25 | \item \code{'variance'}: Extracts leaf indices for the variance forest 26 | }} 27 | 28 | \item{forest_inds}{(Optional) Indices of the forest sample(s) for which to compute leaf indices. If not provided, 29 | this function will return leaf indices for every sample of a forest. 30 | This function uses 0-indexing, so the first forest sample corresponds to \code{forest_num = 0}, and so on.} 31 | } 32 | \value{ 33 | Vector of size \code{length(forest_inds)} with the leaf scale parameter for each requested forest. 34 | } 35 | \description{ 36 | Return each forest's leaf node scale parameters. 37 | 38 | If leaf scale is not sampled for the forest in question, throws an error that the 39 | leaf model does not have a stochastic scale parameter. 40 | } 41 | \examples{ 42 | X <- matrix(runif(10*100), ncol = 10) 43 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) 44 | bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) 45 | computeForestLeafVariances(bart_model, "mean") 46 | computeForestLeafVariances(bart_model, "mean", 0) 47 | computeForestLeafVariances(bart_model, "mean", c(1,3,5)) 48 | } 49 | -------------------------------------------------------------------------------- /man/computeForestMaxLeafIndex.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kernel.R 3 | \name{computeForestMaxLeafIndex} 4 | \alias{computeForestMaxLeafIndex} 5 | \title{Compute and return the largest possible leaf index computable by \code{computeForestLeafIndices} for the forests in a designated forest sample container.} 6 | \usage{ 7 | computeForestMaxLeafIndex(model_object, forest_type = NULL, forest_inds = NULL) 8 | } 9 | \arguments{ 10 | \item{model_object}{Object of type \code{bartmodel}, \code{bcfmodel}, or \code{ForestSamples} corresponding to a BART / BCF model with at least one forest sample, or a low-level \code{ForestSamples} object.} 11 | 12 | \item{forest_type}{Which forest to use from \code{model_object}. 13 | Valid inputs depend on the model type, and whether or not a 14 | 15 | \strong{1. BART} 16 | \itemize{ 17 | \item \code{'mean'}: Extracts leaf indices for the mean forest 18 | \item \code{'variance'}: Extracts leaf indices for the variance forest 19 | } 20 | 21 | \strong{2. BCF} 22 | \itemize{ 23 | \item \code{'prognostic'}: Extracts leaf indices for the prognostic forest 24 | \item \code{'treatment'}: Extracts leaf indices for the treatment effect forest 25 | \item \code{'variance'}: Extracts leaf indices for the variance forest 26 | } 27 | 28 | \strong{3. ForestSamples} 29 | \itemize{ 30 | \item \code{NULL}: It is not necessary to disambiguate when this function is called directly on a \code{ForestSamples} object. This is the default value of this 31 | }} 32 | 33 | \item{forest_inds}{(Optional) Indices of the forest sample(s) for which to compute max leaf indices. If not provided, 34 | this function will return max leaf indices for every sample of a forest. 35 | This function uses 0-indexing, so the first forest sample corresponds to \code{forest_num = 0}, and so on.} 36 | } 37 | \value{ 38 | Vector containing the largest possible leaf index computable by \code{computeForestLeafIndices} for the forests in a designated forest sample container. 39 | } 40 | \description{ 41 | Compute and return the largest possible leaf index computable by \code{computeForestLeafIndices} for the forests in a designated forest sample container. 42 | } 43 | \examples{ 44 | X <- matrix(runif(10*100), ncol = 10) 45 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) 46 | bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) 47 | computeForestMaxLeafIndex(bart_model, "mean") 48 | computeForestMaxLeafIndex(bart_model, "mean", 0) 49 | computeForestMaxLeafIndex(bart_model, "mean", c(1,3,9)) 50 | } 51 | -------------------------------------------------------------------------------- /man/convertPreprocessorToJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{convertPreprocessorToJson} 4 | \alias{convertPreprocessorToJson} 5 | \title{Convert the persistent aspects of a covariate preprocessor to (in-memory) C++ JSON object} 6 | \usage{ 7 | convertPreprocessorToJson(object) 8 | } 9 | \arguments{ 10 | \item{object}{List containing information on variables, including train set 11 | categories for categorical variables} 12 | } 13 | \value{ 14 | wrapper around in-memory C++ JSON object 15 | } 16 | \description{ 17 | Convert the persistent aspects of a covariate preprocessor to (in-memory) C++ JSON object 18 | } 19 | \examples{ 20 | cov_mat <- matrix(1:12, ncol = 3) 21 | preprocess_list <- preprocessTrainData(cov_mat) 22 | preprocessor_json <- convertPreprocessorToJson(preprocess_list$metadata) 23 | } 24 | -------------------------------------------------------------------------------- /man/createBARTModelFromCombinedJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bart.R 3 | \name{createBARTModelFromCombinedJson} 4 | \alias{createBARTModelFromCombinedJson} 5 | \title{Convert a list of (in-memory) JSON representations of a BART model to a single combined BART model object 6 | which can be used for prediction, etc...} 7 | \usage{ 8 | createBARTModelFromCombinedJson(json_object_list) 9 | } 10 | \arguments{ 11 | \item{json_object_list}{List of objects of type \code{CppJson} containing Json representation of a BART model} 12 | } 13 | \value{ 14 | Object of type \code{bartmodel} 15 | } 16 | \description{ 17 | Convert a list of (in-memory) JSON representations of a BART model to a single combined BART model object 18 | which can be used for prediction, etc... 19 | } 20 | \examples{ 21 | n <- 100 22 | p <- 5 23 | X <- matrix(runif(n*p), ncol = p) 24 | f_XW <- ( 25 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 26 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 27 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 28 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 29 | ) 30 | noise_sd <- 1 31 | y <- f_XW + rnorm(n, 0, noise_sd) 32 | test_set_pct <- 0.2 33 | n_test <- round(test_set_pct*n) 34 | n_train <- n - n_test 35 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 36 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 37 | X_test <- X[test_inds,] 38 | X_train <- X[train_inds,] 39 | y_test <- y[test_inds] 40 | y_train <- y[train_inds] 41 | bart_model <- bart(X_train = X_train, y_train = y_train, 42 | num_gfr = 10, num_burnin = 0, num_mcmc = 10) 43 | bart_json <- list(saveBARTModelToJson(bart_model)) 44 | bart_model_roundtrip <- createBARTModelFromCombinedJson(bart_json) 45 | } 46 | -------------------------------------------------------------------------------- /man/createBARTModelFromCombinedJsonString.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bart.R 3 | \name{createBARTModelFromCombinedJsonString} 4 | \alias{createBARTModelFromCombinedJsonString} 5 | \title{Convert a list of (in-memory) JSON strings that represent BART models to a single combined BART model object 6 | which can be used for prediction, etc...} 7 | \usage{ 8 | createBARTModelFromCombinedJsonString(json_string_list) 9 | } 10 | \arguments{ 11 | \item{json_string_list}{List of JSON strings which can be parsed to objects of type \code{CppJson} containing Json representation of a BART model} 12 | } 13 | \value{ 14 | Object of type \code{bartmodel} 15 | } 16 | \description{ 17 | Convert a list of (in-memory) JSON strings that represent BART models to a single combined BART model object 18 | which can be used for prediction, etc... 19 | } 20 | \examples{ 21 | n <- 100 22 | p <- 5 23 | X <- matrix(runif(n*p), ncol = p) 24 | f_XW <- ( 25 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 26 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 27 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 28 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 29 | ) 30 | noise_sd <- 1 31 | y <- f_XW + rnorm(n, 0, noise_sd) 32 | test_set_pct <- 0.2 33 | n_test <- round(test_set_pct*n) 34 | n_train <- n - n_test 35 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 36 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 37 | X_test <- X[test_inds,] 38 | X_train <- X[train_inds,] 39 | y_test <- y[test_inds] 40 | y_train <- y[train_inds] 41 | bart_model <- bart(X_train = X_train, y_train = y_train, 42 | num_gfr = 10, num_burnin = 0, num_mcmc = 10) 43 | bart_json_string_list <- list(saveBARTModelToJsonString(bart_model)) 44 | bart_model_roundtrip <- createBARTModelFromCombinedJsonString(bart_json_string_list) 45 | } 46 | -------------------------------------------------------------------------------- /man/createBARTModelFromJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bart.R 3 | \name{createBARTModelFromJson} 4 | \alias{createBARTModelFromJson} 5 | \title{Convert an (in-memory) JSON representation of a BART model to a BART model object 6 | which can be used for prediction, etc...} 7 | \usage{ 8 | createBARTModelFromJson(json_object) 9 | } 10 | \arguments{ 11 | \item{json_object}{Object of type \code{CppJson} containing Json representation of a BART model} 12 | } 13 | \value{ 14 | Object of type \code{bartmodel} 15 | } 16 | \description{ 17 | Convert an (in-memory) JSON representation of a BART model to a BART model object 18 | which can be used for prediction, etc... 19 | } 20 | \examples{ 21 | n <- 100 22 | p <- 5 23 | X <- matrix(runif(n*p), ncol = p) 24 | f_XW <- ( 25 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 26 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 27 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 28 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 29 | ) 30 | noise_sd <- 1 31 | y <- f_XW + rnorm(n, 0, noise_sd) 32 | test_set_pct <- 0.2 33 | n_test <- round(test_set_pct*n) 34 | n_train <- n - n_test 35 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 36 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 37 | X_test <- X[test_inds,] 38 | X_train <- X[train_inds,] 39 | y_test <- y[test_inds] 40 | y_train <- y[train_inds] 41 | bart_model <- bart(X_train = X_train, y_train = y_train, 42 | num_gfr = 10, num_burnin = 0, num_mcmc = 10) 43 | bart_json <- saveBARTModelToJson(bart_model) 44 | bart_model_roundtrip <- createBARTModelFromJson(bart_json) 45 | } 46 | -------------------------------------------------------------------------------- /man/createBARTModelFromJsonFile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bart.R 3 | \name{createBARTModelFromJsonFile} 4 | \alias{createBARTModelFromJsonFile} 5 | \title{Convert a JSON file containing sample information on a trained BART model 6 | to a BART model object which can be used for prediction, etc...} 7 | \usage{ 8 | createBARTModelFromJsonFile(json_filename) 9 | } 10 | \arguments{ 11 | \item{json_filename}{String of filepath, must end in ".json"} 12 | } 13 | \value{ 14 | Object of type \code{bartmodel} 15 | } 16 | \description{ 17 | Convert a JSON file containing sample information on a trained BART model 18 | to a BART model object which can be used for prediction, etc... 19 | } 20 | \examples{ 21 | n <- 100 22 | p <- 5 23 | X <- matrix(runif(n*p), ncol = p) 24 | f_XW <- ( 25 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 26 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 27 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 28 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 29 | ) 30 | noise_sd <- 1 31 | y <- f_XW + rnorm(n, 0, noise_sd) 32 | test_set_pct <- 0.2 33 | n_test <- round(test_set_pct*n) 34 | n_train <- n - n_test 35 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 36 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 37 | X_test <- X[test_inds,] 38 | X_train <- X[train_inds,] 39 | y_test <- y[test_inds] 40 | y_train <- y[train_inds] 41 | bart_model <- bart(X_train = X_train, y_train = y_train, 42 | num_gfr = 10, num_burnin = 0, num_mcmc = 10) 43 | tmpjson <- tempfile(fileext = ".json") 44 | saveBARTModelToJsonFile(bart_model, file.path(tmpjson)) 45 | bart_model_roundtrip <- createBARTModelFromJsonFile(file.path(tmpjson)) 46 | unlink(tmpjson) 47 | } 48 | -------------------------------------------------------------------------------- /man/createBARTModelFromJsonString.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bart.R 3 | \name{createBARTModelFromJsonString} 4 | \alias{createBARTModelFromJsonString} 5 | \title{Convert a JSON string containing sample information on a trained BART model 6 | to a BART model object which can be used for prediction, etc...} 7 | \usage{ 8 | createBARTModelFromJsonString(json_string) 9 | } 10 | \arguments{ 11 | \item{json_string}{JSON string dump} 12 | } 13 | \value{ 14 | Object of type \code{bartmodel} 15 | } 16 | \description{ 17 | Convert a JSON string containing sample information on a trained BART model 18 | to a BART model object which can be used for prediction, etc... 19 | } 20 | \examples{ 21 | n <- 100 22 | p <- 5 23 | X <- matrix(runif(n*p), ncol = p) 24 | f_XW <- ( 25 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 26 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 27 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 28 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 29 | ) 30 | noise_sd <- 1 31 | y <- f_XW + rnorm(n, 0, noise_sd) 32 | test_set_pct <- 0.2 33 | n_test <- round(test_set_pct*n) 34 | n_train <- n - n_test 35 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 36 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 37 | X_test <- X[test_inds,] 38 | X_train <- X[train_inds,] 39 | y_test <- y[test_inds] 40 | y_train <- y[train_inds] 41 | bart_model <- bart(X_train = X_train, y_train = y_train, 42 | num_gfr = 10, num_burnin = 0, num_mcmc = 10) 43 | bart_json <- saveBARTModelToJsonString(bart_model) 44 | bart_model_roundtrip <- createBARTModelFromJsonString(bart_json) 45 | y_hat_mean_roundtrip <- rowMeans(predict(bart_model_roundtrip, X_train)$y_hat) 46 | } 47 | -------------------------------------------------------------------------------- /man/createBCFModelFromCombinedJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bcf.R 3 | \name{createBCFModelFromCombinedJson} 4 | \alias{createBCFModelFromCombinedJson} 5 | \title{Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object 6 | which can be used for prediction, etc...} 7 | \usage{ 8 | createBCFModelFromCombinedJson(json_object_list) 9 | } 10 | \arguments{ 11 | \item{json_object_list}{List of objects of type \code{CppJson} containing Json representation of a BCF model} 12 | } 13 | \value{ 14 | Object of type \code{bcfmodel} 15 | } 16 | \description{ 17 | Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object 18 | which can be used for prediction, etc... 19 | } 20 | \examples{ 21 | n <- 500 22 | p <- 5 23 | X <- matrix(runif(n*p), ncol = p) 24 | mu_x <- ( 25 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 26 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 27 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 28 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 29 | ) 30 | pi_x <- ( 31 | ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 32 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 33 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 34 | ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8) 35 | ) 36 | tau_x <- ( 37 | ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 38 | ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 39 | ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 40 | ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0) 41 | ) 42 | Z <- rbinom(n, 1, pi_x) 43 | E_XZ <- mu_x + Z*tau_x 44 | snr <- 3 45 | rfx_group_ids <- rep(c(1,2), n \%/\% 2) 46 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) 47 | rfx_basis <- cbind(1, runif(n, -1, 1)) 48 | rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) 49 | y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) 50 | test_set_pct <- 0.2 51 | n_test <- round(test_set_pct*n) 52 | n_train <- n - n_test 53 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 54 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 55 | X_test <- X[test_inds,] 56 | X_train <- X[train_inds,] 57 | pi_test <- pi_x[test_inds] 58 | pi_train <- pi_x[train_inds] 59 | Z_test <- Z[test_inds] 60 | Z_train <- Z[train_inds] 61 | y_test <- y[test_inds] 62 | y_train <- y[train_inds] 63 | mu_test <- mu_x[test_inds] 64 | mu_train <- mu_x[train_inds] 65 | tau_test <- tau_x[test_inds] 66 | tau_train <- tau_x[train_inds] 67 | rfx_group_ids_test <- rfx_group_ids[test_inds] 68 | rfx_group_ids_train <- rfx_group_ids[train_inds] 69 | rfx_basis_test <- rfx_basis[test_inds,] 70 | rfx_basis_train <- rfx_basis[train_inds,] 71 | rfx_term_test <- rfx_term[test_inds] 72 | rfx_term_train <- rfx_term[train_inds] 73 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 74 | propensity_train = pi_train, 75 | rfx_group_ids_train = rfx_group_ids_train, 76 | rfx_basis_train = rfx_basis_train, X_test = X_test, 77 | Z_test = Z_test, propensity_test = pi_test, 78 | rfx_group_ids_test = rfx_group_ids_test, 79 | rfx_basis_test = rfx_basis_test, 80 | num_gfr = 10, num_burnin = 0, num_mcmc = 10) 81 | bcf_json_list <- list(saveBCFModelToJson(bcf_model)) 82 | bcf_model_roundtrip <- createBCFModelFromCombinedJson(bcf_json_list) 83 | } 84 | -------------------------------------------------------------------------------- /man/createBCFModelFromCombinedJsonString.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bcf.R 3 | \name{createBCFModelFromCombinedJsonString} 4 | \alias{createBCFModelFromCombinedJsonString} 5 | \title{Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object 6 | which can be used for prediction, etc...} 7 | \usage{ 8 | createBCFModelFromCombinedJsonString(json_string_list) 9 | } 10 | \arguments{ 11 | \item{json_string_list}{List of JSON strings which can be parsed to objects of type \code{CppJson} containing Json representation of a BCF model} 12 | } 13 | \value{ 14 | Object of type \code{bcfmodel} 15 | } 16 | \description{ 17 | Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object 18 | which can be used for prediction, etc... 19 | } 20 | \examples{ 21 | n <- 500 22 | p <- 5 23 | X <- matrix(runif(n*p), ncol = p) 24 | mu_x <- ( 25 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 26 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 27 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 28 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 29 | ) 30 | pi_x <- ( 31 | ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 32 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 33 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 34 | ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8) 35 | ) 36 | tau_x <- ( 37 | ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 38 | ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 39 | ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 40 | ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0) 41 | ) 42 | Z <- rbinom(n, 1, pi_x) 43 | E_XZ <- mu_x + Z*tau_x 44 | snr <- 3 45 | rfx_group_ids <- rep(c(1,2), n \%/\% 2) 46 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) 47 | rfx_basis <- cbind(1, runif(n, -1, 1)) 48 | rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) 49 | y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) 50 | test_set_pct <- 0.2 51 | n_test <- round(test_set_pct*n) 52 | n_train <- n - n_test 53 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 54 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 55 | X_test <- X[test_inds,] 56 | X_train <- X[train_inds,] 57 | pi_test <- pi_x[test_inds] 58 | pi_train <- pi_x[train_inds] 59 | Z_test <- Z[test_inds] 60 | Z_train <- Z[train_inds] 61 | y_test <- y[test_inds] 62 | y_train <- y[train_inds] 63 | mu_test <- mu_x[test_inds] 64 | mu_train <- mu_x[train_inds] 65 | tau_test <- tau_x[test_inds] 66 | tau_train <- tau_x[train_inds] 67 | rfx_group_ids_test <- rfx_group_ids[test_inds] 68 | rfx_group_ids_train <- rfx_group_ids[train_inds] 69 | rfx_basis_test <- rfx_basis[test_inds,] 70 | rfx_basis_train <- rfx_basis[train_inds,] 71 | rfx_term_test <- rfx_term[test_inds] 72 | rfx_term_train <- rfx_term[train_inds] 73 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 74 | propensity_train = pi_train, 75 | rfx_group_ids_train = rfx_group_ids_train, 76 | rfx_basis_train = rfx_basis_train, X_test = X_test, 77 | Z_test = Z_test, propensity_test = pi_test, 78 | rfx_group_ids_test = rfx_group_ids_test, 79 | rfx_basis_test = rfx_basis_test, 80 | num_gfr = 10, num_burnin = 0, num_mcmc = 10) 81 | bcf_json_string_list <- list(saveBCFModelToJsonString(bcf_model)) 82 | bcf_model_roundtrip <- createBCFModelFromCombinedJsonString(bcf_json_string_list) 83 | } 84 | -------------------------------------------------------------------------------- /man/createBCFModelFromJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bcf.R 3 | \name{createBCFModelFromJson} 4 | \alias{createBCFModelFromJson} 5 | \title{Convert an (in-memory) JSON representation of a BCF model to a BCF model object 6 | which can be used for prediction, etc...} 7 | \usage{ 8 | createBCFModelFromJson(json_object) 9 | } 10 | \arguments{ 11 | \item{json_object}{Object of type \code{CppJson} containing Json representation of a BCF model} 12 | } 13 | \value{ 14 | Object of type \code{bcfmodel} 15 | } 16 | \description{ 17 | Convert an (in-memory) JSON representation of a BCF model to a BCF model object 18 | which can be used for prediction, etc... 19 | } 20 | \examples{ 21 | n <- 500 22 | p <- 5 23 | X <- matrix(runif(n*p), ncol = p) 24 | mu_x <- ( 25 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 26 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 27 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 28 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 29 | ) 30 | pi_x <- ( 31 | ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 32 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 33 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 34 | ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8) 35 | ) 36 | tau_x <- ( 37 | ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 38 | ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 39 | ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 40 | ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0) 41 | ) 42 | Z <- rbinom(n, 1, pi_x) 43 | E_XZ <- mu_x + Z*tau_x 44 | snr <- 3 45 | rfx_group_ids <- rep(c(1,2), n \%/\% 2) 46 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) 47 | rfx_basis <- cbind(1, runif(n, -1, 1)) 48 | rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) 49 | y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) 50 | test_set_pct <- 0.2 51 | n_test <- round(test_set_pct*n) 52 | n_train <- n - n_test 53 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 54 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 55 | X_test <- X[test_inds,] 56 | X_train <- X[train_inds,] 57 | pi_test <- pi_x[test_inds] 58 | pi_train <- pi_x[train_inds] 59 | Z_test <- Z[test_inds] 60 | Z_train <- Z[train_inds] 61 | y_test <- y[test_inds] 62 | y_train <- y[train_inds] 63 | mu_test <- mu_x[test_inds] 64 | mu_train <- mu_x[train_inds] 65 | tau_test <- tau_x[test_inds] 66 | tau_train <- tau_x[train_inds] 67 | rfx_group_ids_test <- rfx_group_ids[test_inds] 68 | rfx_group_ids_train <- rfx_group_ids[train_inds] 69 | rfx_basis_test <- rfx_basis[test_inds,] 70 | rfx_basis_train <- rfx_basis[train_inds,] 71 | rfx_term_test <- rfx_term[test_inds] 72 | rfx_term_train <- rfx_term[train_inds] 73 | mu_params <- list(sample_sigma2_leaf = TRUE) 74 | tau_params <- list(sample_sigma2_leaf = FALSE) 75 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 76 | propensity_train = pi_train, 77 | rfx_group_ids_train = rfx_group_ids_train, 78 | rfx_basis_train = rfx_basis_train, X_test = X_test, 79 | Z_test = Z_test, propensity_test = pi_test, 80 | rfx_group_ids_test = rfx_group_ids_test, 81 | rfx_basis_test = rfx_basis_test, 82 | num_gfr = 10, num_burnin = 0, num_mcmc = 10, 83 | prognostic_forest_params = mu_params, 84 | treatment_effect_forest_params = tau_params) 85 | bcf_json <- saveBCFModelToJson(bcf_model) 86 | bcf_model_roundtrip <- createBCFModelFromJson(bcf_json) 87 | } 88 | -------------------------------------------------------------------------------- /man/createBCFModelFromJsonString.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bcf.R 3 | \name{createBCFModelFromJsonString} 4 | \alias{createBCFModelFromJsonString} 5 | \title{Convert a JSON string containing sample information on a trained BCF model 6 | to a BCF model object which can be used for prediction, etc...} 7 | \usage{ 8 | createBCFModelFromJsonString(json_string) 9 | } 10 | \arguments{ 11 | \item{json_string}{JSON string dump} 12 | } 13 | \value{ 14 | Object of type \code{bcfmodel} 15 | } 16 | \description{ 17 | Convert a JSON string containing sample information on a trained BCF model 18 | to a BCF model object which can be used for prediction, etc... 19 | } 20 | \examples{ 21 | n <- 500 22 | p <- 5 23 | X <- matrix(runif(n*p), ncol = p) 24 | mu_x <- ( 25 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 26 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 27 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 28 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 29 | ) 30 | pi_x <- ( 31 | ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 32 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 33 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 34 | ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8) 35 | ) 36 | tau_x <- ( 37 | ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 38 | ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 39 | ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 40 | ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0) 41 | ) 42 | Z <- rbinom(n, 1, pi_x) 43 | E_XZ <- mu_x + Z*tau_x 44 | snr <- 3 45 | rfx_group_ids <- rep(c(1,2), n \%/\% 2) 46 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) 47 | rfx_basis <- cbind(1, runif(n, -1, 1)) 48 | rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) 49 | y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) 50 | test_set_pct <- 0.2 51 | n_test <- round(test_set_pct*n) 52 | n_train <- n - n_test 53 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 54 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 55 | X_test <- X[test_inds,] 56 | X_train <- X[train_inds,] 57 | pi_test <- pi_x[test_inds] 58 | pi_train <- pi_x[train_inds] 59 | Z_test <- Z[test_inds] 60 | Z_train <- Z[train_inds] 61 | y_test <- y[test_inds] 62 | y_train <- y[train_inds] 63 | mu_test <- mu_x[test_inds] 64 | mu_train <- mu_x[train_inds] 65 | tau_test <- tau_x[test_inds] 66 | tau_train <- tau_x[train_inds] 67 | rfx_group_ids_test <- rfx_group_ids[test_inds] 68 | rfx_group_ids_train <- rfx_group_ids[train_inds] 69 | rfx_basis_test <- rfx_basis[test_inds,] 70 | rfx_basis_train <- rfx_basis[train_inds,] 71 | rfx_term_test <- rfx_term[test_inds] 72 | rfx_term_train <- rfx_term[train_inds] 73 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 74 | propensity_train = pi_train, 75 | rfx_group_ids_train = rfx_group_ids_train, 76 | rfx_basis_train = rfx_basis_train, X_test = X_test, 77 | Z_test = Z_test, propensity_test = pi_test, 78 | rfx_group_ids_test = rfx_group_ids_test, 79 | rfx_basis_test = rfx_basis_test, 80 | num_gfr = 10, num_burnin = 0, num_mcmc = 10) 81 | bcf_json <- saveBCFModelToJsonString(bcf_model) 82 | bcf_model_roundtrip <- createBCFModelFromJsonString(bcf_json) 83 | } 84 | -------------------------------------------------------------------------------- /man/createCppJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/serialization.R 3 | \name{createCppJson} 4 | \alias{createCppJson} 5 | \title{Create a new (empty) C++ Json object} 6 | \usage{ 7 | createCppJson() 8 | } 9 | \value{ 10 | \code{CppJson} object 11 | } 12 | \description{ 13 | Create a new (empty) C++ Json object 14 | } 15 | \examples{ 16 | example_vec <- runif(10) 17 | example_json <- createCppJson() 18 | example_json$add_vector("myvec", example_vec) 19 | } 20 | -------------------------------------------------------------------------------- /man/createCppJsonFile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/serialization.R 3 | \name{createCppJsonFile} 4 | \alias{createCppJsonFile} 5 | \title{Create a C++ Json object from a Json file} 6 | \usage{ 7 | createCppJsonFile(json_filename) 8 | } 9 | \arguments{ 10 | \item{json_filename}{Name of file to read. Must end in \code{.json}.} 11 | } 12 | \value{ 13 | \code{CppJson} object 14 | } 15 | \description{ 16 | Create a C++ Json object from a Json file 17 | } 18 | \examples{ 19 | example_vec <- runif(10) 20 | example_json <- createCppJson() 21 | example_json$add_vector("myvec", example_vec) 22 | tmpjson <- tempfile(fileext = ".json") 23 | example_json$save_file(file.path(tmpjson)) 24 | example_json_roundtrip <- createCppJsonFile(file.path(tmpjson)) 25 | unlink(tmpjson) 26 | } 27 | -------------------------------------------------------------------------------- /man/createCppJsonString.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/serialization.R 3 | \name{createCppJsonString} 4 | \alias{createCppJsonString} 5 | \title{Create a C++ Json object from a Json string} 6 | \usage{ 7 | createCppJsonString(json_string) 8 | } 9 | \arguments{ 10 | \item{json_string}{JSON string dump} 11 | } 12 | \value{ 13 | \code{CppJson} object 14 | } 15 | \description{ 16 | Create a C++ Json object from a Json string 17 | } 18 | \examples{ 19 | example_vec <- runif(10) 20 | example_json <- createCppJson() 21 | example_json$add_vector("myvec", example_vec) 22 | example_json_string <- example_json$return_json_string() 23 | example_json_roundtrip <- createCppJsonString(example_json_string) 24 | } 25 | -------------------------------------------------------------------------------- /man/createCppRNG.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model.R 3 | \name{createCppRNG} 4 | \alias{createCppRNG} 5 | \title{Create an R class that wraps a C++ random number generator} 6 | \usage{ 7 | createCppRNG(random_seed = -1) 8 | } 9 | \arguments{ 10 | \item{random_seed}{(Optional) random seed for sampling} 11 | } 12 | \value{ 13 | \code{CppRng} object 14 | } 15 | \description{ 16 | Create an R class that wraps a C++ random number generator 17 | } 18 | \examples{ 19 | rng <- createCppRNG(1234) 20 | rng <- createCppRNG() 21 | } 22 | -------------------------------------------------------------------------------- /man/createForest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forest.R 3 | \name{createForest} 4 | \alias{createForest} 5 | \title{Create a forest} 6 | \usage{ 7 | createForest( 8 | num_trees, 9 | leaf_dimension = 1, 10 | is_leaf_constant = FALSE, 11 | is_exponentiated = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{num_trees}{Number of trees in the forest} 16 | 17 | \item{leaf_dimension}{Dimensionality of the outcome model} 18 | 19 | \item{is_leaf_constant}{Whether leaf is constant} 20 | 21 | \item{is_exponentiated}{Whether forest predictions should be exponentiated before being returned} 22 | } 23 | \value{ 24 | \code{Forest} object 25 | } 26 | \description{ 27 | Create a forest 28 | } 29 | \examples{ 30 | num_trees <- 100 31 | leaf_dimension <- 2 32 | is_leaf_constant <- FALSE 33 | is_exponentiated <- FALSE 34 | forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) 35 | } 36 | -------------------------------------------------------------------------------- /man/createForestDataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{createForestDataset} 4 | \alias{createForestDataset} 5 | \title{Create a forest dataset object} 6 | \usage{ 7 | createForestDataset(covariates, basis = NULL, variance_weights = NULL) 8 | } 9 | \arguments{ 10 | \item{covariates}{Matrix of covariates} 11 | 12 | \item{basis}{(Optional) Matrix of bases used to define a leaf regression} 13 | 14 | \item{variance_weights}{(Optional) Vector of observation-specific variance weights} 15 | } 16 | \value{ 17 | \code{ForestDataset} object 18 | } 19 | \description{ 20 | Create a forest dataset object 21 | } 22 | \examples{ 23 | covariate_matrix <- matrix(runif(10*100), ncol = 10) 24 | basis_matrix <- matrix(rnorm(3*100), ncol = 3) 25 | weight_vector <- rnorm(100) 26 | forest_dataset <- createForestDataset(covariate_matrix) 27 | forest_dataset <- createForestDataset(covariate_matrix, basis_matrix) 28 | forest_dataset <- createForestDataset(covariate_matrix, basis_matrix, weight_vector) 29 | } 30 | -------------------------------------------------------------------------------- /man/createForestModel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model.R 3 | \name{createForestModel} 4 | \alias{createForestModel} 5 | \title{Create a forest model object} 6 | \usage{ 7 | createForestModel(forest_dataset, forest_model_config, global_model_config) 8 | } 9 | \arguments{ 10 | \item{forest_dataset}{ForestDataset object, used to initialize forest sampling data structures} 11 | 12 | \item{forest_model_config}{ForestModelConfig object containing forest model parameters and settings} 13 | 14 | \item{global_model_config}{GlobalModelConfig object containing global model parameters and settings} 15 | } 16 | \value{ 17 | \code{ForestModel} object 18 | } 19 | \description{ 20 | Create a forest model object 21 | } 22 | \examples{ 23 | num_trees <- 100 24 | n <- 100 25 | p <- 10 26 | alpha <- 0.95 27 | beta <- 2.0 28 | min_samples_leaf <- 2 29 | max_depth <- 10 30 | feature_types <- as.integer(rep(0, p)) 31 | X <- matrix(runif(n*p), ncol = p) 32 | forest_dataset <- createForestDataset(X) 33 | forest_model_config <- createForestModelConfig(feature_types=feature_types, 34 | num_trees=num_trees, num_features=p, 35 | num_observations=n, alpha=alpha, beta=beta, 36 | min_samples_leaf=min_samples_leaf, 37 | max_depth=max_depth, leaf_model_type=1) 38 | global_model_config <- createGlobalModelConfig(global_error_variance=1.0) 39 | forest_model <- createForestModel(forest_dataset, forest_model_config, global_model_config) 40 | } 41 | -------------------------------------------------------------------------------- /man/createForestModelConfig.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/config.R 3 | \name{createForestModelConfig} 4 | \alias{createForestModelConfig} 5 | \title{Create a forest model config object} 6 | \usage{ 7 | createForestModelConfig( 8 | feature_types = NULL, 9 | sweep_update_indices = NULL, 10 | num_trees = NULL, 11 | num_features = NULL, 12 | num_observations = NULL, 13 | variable_weights = NULL, 14 | leaf_dimension = 1, 15 | alpha = 0.95, 16 | beta = 2, 17 | min_samples_leaf = 5, 18 | max_depth = -1, 19 | leaf_model_type = 1, 20 | leaf_model_scale = NULL, 21 | variance_forest_shape = 1, 22 | variance_forest_scale = 1, 23 | cutpoint_grid_size = 100 24 | ) 25 | } 26 | \arguments{ 27 | \item{feature_types}{Vector of integer-coded feature types (integers where 0 = numeric, 1 = ordered categorical, 2 = unordered categorical)} 28 | 29 | \item{sweep_update_indices}{Vector of (0-indexed) indices of trees to update in a sweep} 30 | 31 | \item{num_trees}{Number of trees in the forest being sampled} 32 | 33 | \item{num_features}{Number of features in training dataset} 34 | 35 | \item{num_observations}{Number of observations in training dataset} 36 | 37 | \item{variable_weights}{Vector specifying sampling probability for all p covariates in ForestDataset} 38 | 39 | \item{leaf_dimension}{Dimension of the leaf model (default: \code{1})} 40 | 41 | \item{alpha}{Root node split probability in tree prior (default: \code{0.95})} 42 | 43 | \item{beta}{Depth prior penalty in tree prior (default: \code{2.0})} 44 | 45 | \item{min_samples_leaf}{Minimum number of samples in a tree leaf (default: \code{5})} 46 | 47 | \item{max_depth}{Maximum depth of any tree in the ensemble in the model. Setting to \code{-1} does not enforce any depth limits on trees. Default: \code{-1}.} 48 | 49 | \item{leaf_model_type}{Integer specifying the leaf model type (0 = constant leaf, 1 = univariate leaf regression, 2 = multivariate leaf regression). Default: \code{0}.} 50 | 51 | \item{leaf_model_scale}{Scale parameter used in Gaussian leaf models (can either be a scalar or a q x q matrix, where q is the dimensionality of the basis and is only >1 when \code{leaf_model_int = 2}). Calibrated internally as \code{1/num_trees}, propagated along diagonal if needed for multivariate leaf models.} 52 | 53 | \item{variance_forest_shape}{Shape parameter for IG leaf models (applicable when \code{leaf_model_type = 3}). Default: \code{1}.} 54 | 55 | \item{variance_forest_scale}{Scale parameter for IG leaf models (applicable when \code{leaf_model_type = 3}). Default: \code{1}.} 56 | 57 | \item{cutpoint_grid_size}{Number of unique cutpoints to consider (default: \code{100})} 58 | } 59 | \value{ 60 | ForestModelConfig object 61 | } 62 | \description{ 63 | Create a forest model config object 64 | } 65 | \examples{ 66 | config <- createForestModelConfig(num_trees = 10, num_features = 5, num_observations = 100) 67 | } 68 | -------------------------------------------------------------------------------- /man/createForestSamples.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forest.R 3 | \name{createForestSamples} 4 | \alias{createForestSamples} 5 | \title{Create a container of forest samples} 6 | \usage{ 7 | createForestSamples( 8 | num_trees, 9 | leaf_dimension = 1, 10 | is_leaf_constant = FALSE, 11 | is_exponentiated = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{num_trees}{Number of trees} 16 | 17 | \item{leaf_dimension}{Dimensionality of the outcome model} 18 | 19 | \item{is_leaf_constant}{Whether leaf is constant} 20 | 21 | \item{is_exponentiated}{Whether forest predictions should be exponentiated before being returned} 22 | } 23 | \value{ 24 | \code{ForestSamples} object 25 | } 26 | \description{ 27 | Create a container of forest samples 28 | } 29 | \examples{ 30 | num_trees <- 100 31 | leaf_dimension <- 2 32 | is_leaf_constant <- FALSE 33 | is_exponentiated <- FALSE 34 | forest_samples <- createForestSamples(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) 35 | } 36 | -------------------------------------------------------------------------------- /man/createGlobalModelConfig.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/config.R 3 | \name{createGlobalModelConfig} 4 | \alias{createGlobalModelConfig} 5 | \title{Create a global model config object} 6 | \usage{ 7 | createGlobalModelConfig(global_error_variance = 1) 8 | } 9 | \arguments{ 10 | \item{global_error_variance}{Global error variance parameter (default: \code{1.0})} 11 | } 12 | \value{ 13 | GlobalModelConfig object 14 | } 15 | \description{ 16 | Create a global model config object 17 | } 18 | \examples{ 19 | config <- createGlobalModelConfig(global_error_variance = 100) 20 | } 21 | -------------------------------------------------------------------------------- /man/createOutcome.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{createOutcome} 4 | \alias{createOutcome} 5 | \title{Create an outcome object} 6 | \usage{ 7 | createOutcome(outcome) 8 | } 9 | \arguments{ 10 | \item{outcome}{Vector of outcome values} 11 | } 12 | \value{ 13 | \code{Outcome} object 14 | } 15 | \description{ 16 | Create an outcome object 17 | } 18 | \examples{ 19 | X <- matrix(runif(10*100), ncol = 10) 20 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) 21 | outcome <- createOutcome(y) 22 | } 23 | -------------------------------------------------------------------------------- /man/createPreprocessorFromJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{createPreprocessorFromJson} 4 | \alias{createPreprocessorFromJson} 5 | \title{Reload a covariate preprocessor object from a JSON string containing a serialized preprocessor} 6 | \usage{ 7 | createPreprocessorFromJson(json_object) 8 | } 9 | \arguments{ 10 | \item{json_object}{in-memory wrapper around JSON C++ object containing covariate preprocessor metadata} 11 | } 12 | \value{ 13 | Preprocessor object that can be used with the \code{preprocessPredictionData} function 14 | } 15 | \description{ 16 | Reload a covariate preprocessor object from a JSON string containing a serialized preprocessor 17 | } 18 | \examples{ 19 | cov_mat <- matrix(1:12, ncol = 3) 20 | preprocess_list <- preprocessTrainData(cov_mat) 21 | preprocessor_json <- convertPreprocessorToJson(preprocess_list$metadata) 22 | preprocessor_roundtrip <- createPreprocessorFromJson(preprocessor_json) 23 | } 24 | -------------------------------------------------------------------------------- /man/createPreprocessorFromJsonString.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{createPreprocessorFromJsonString} 4 | \alias{createPreprocessorFromJsonString} 5 | \title{Reload a covariate preprocessor object from a JSON string containing a serialized preprocessor} 6 | \usage{ 7 | createPreprocessorFromJsonString(json_string) 8 | } 9 | \arguments{ 10 | \item{json_string}{in-memory JSON string containing covariate preprocessor metadata} 11 | } 12 | \value{ 13 | Preprocessor object that can be used with the \code{preprocessPredictionData} function 14 | } 15 | \description{ 16 | Reload a covariate preprocessor object from a JSON string containing a serialized preprocessor 17 | } 18 | \examples{ 19 | cov_mat <- matrix(1:12, ncol = 3) 20 | preprocess_list <- preprocessTrainData(cov_mat) 21 | preprocessor_json_string <- savePreprocessorToJsonString(preprocess_list$metadata) 22 | preprocessor_roundtrip <- createPreprocessorFromJsonString(preprocessor_json_string) 23 | } 24 | -------------------------------------------------------------------------------- /man/createRandomEffectSamples.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/random_effects.R 3 | \name{createRandomEffectSamples} 4 | \alias{createRandomEffectSamples} 5 | \title{Create a \code{RandomEffectSamples} object} 6 | \usage{ 7 | createRandomEffectSamples(num_components, num_groups, random_effects_tracker) 8 | } 9 | \arguments{ 10 | \item{num_components}{Number of "components" or bases defining the random effects regression} 11 | 12 | \item{num_groups}{Number of random effects groups} 13 | 14 | \item{random_effects_tracker}{Object of type \code{RandomEffectsTracker}} 15 | } 16 | \value{ 17 | \code{RandomEffectSamples} object 18 | } 19 | \description{ 20 | Create a \code{RandomEffectSamples} object 21 | } 22 | \examples{ 23 | n <- 100 24 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE) 25 | rfx_basis <- matrix(rep(1.0, n), ncol=1) 26 | num_groups <- length(unique(rfx_group_ids)) 27 | num_components <- ncol(rfx_basis) 28 | rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) 29 | rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) 30 | } 31 | -------------------------------------------------------------------------------- /man/createRandomEffectsDataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{createRandomEffectsDataset} 4 | \alias{createRandomEffectsDataset} 5 | \title{Create a random effects dataset object} 6 | \usage{ 7 | createRandomEffectsDataset(group_labels, basis, variance_weights = NULL) 8 | } 9 | \arguments{ 10 | \item{group_labels}{Vector of group labels} 11 | 12 | \item{basis}{Matrix of bases used to define the random effects regression (for an intercept-only model, pass an array of ones)} 13 | 14 | \item{variance_weights}{(Optional) Vector of observation-specific variance weights} 15 | } 16 | \value{ 17 | \code{RandomEffectsDataset} object 18 | } 19 | \description{ 20 | Create a random effects dataset object 21 | } 22 | \examples{ 23 | rfx_group_ids <- sample(1:2, size = 100, replace = TRUE) 24 | rfx_basis <- matrix(rnorm(3*100), ncol = 3) 25 | weight_vector <- rnorm(100) 26 | rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) 27 | rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis, weight_vector) 28 | } 29 | -------------------------------------------------------------------------------- /man/createRandomEffectsModel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/random_effects.R 3 | \name{createRandomEffectsModel} 4 | \alias{createRandomEffectsModel} 5 | \title{Create a \code{RandomEffectsModel} object} 6 | \usage{ 7 | createRandomEffectsModel(num_components, num_groups) 8 | } 9 | \arguments{ 10 | \item{num_components}{Number of "components" or bases defining the random effects regression} 11 | 12 | \item{num_groups}{Number of random effects groups} 13 | } 14 | \value{ 15 | \code{RandomEffectsModel} object 16 | } 17 | \description{ 18 | Create a \code{RandomEffectsModel} object 19 | } 20 | \examples{ 21 | n <- 100 22 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE) 23 | rfx_basis <- matrix(rep(1.0, n), ncol=1) 24 | num_groups <- length(unique(rfx_group_ids)) 25 | num_components <- ncol(rfx_basis) 26 | rfx_model <- createRandomEffectsModel(num_components, num_groups) 27 | } 28 | -------------------------------------------------------------------------------- /man/createRandomEffectsTracker.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/random_effects.R 3 | \name{createRandomEffectsTracker} 4 | \alias{createRandomEffectsTracker} 5 | \title{Create a \code{RandomEffectsTracker} object} 6 | \usage{ 7 | createRandomEffectsTracker(rfx_group_indices) 8 | } 9 | \arguments{ 10 | \item{rfx_group_indices}{Integer indices indicating groups used to define random effects} 11 | } 12 | \value{ 13 | \code{RandomEffectsTracker} object 14 | } 15 | \description{ 16 | Create a \code{RandomEffectsTracker} object 17 | } 18 | \examples{ 19 | n <- 100 20 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE) 21 | rfx_basis <- matrix(rep(1.0, n), ncol=1) 22 | num_groups <- length(unique(rfx_group_ids)) 23 | num_components <- ncol(rfx_basis) 24 | rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) 25 | } 26 | -------------------------------------------------------------------------------- /man/getRandomEffectSamples.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/generics.R 3 | \name{getRandomEffectSamples} 4 | \alias{getRandomEffectSamples} 5 | \title{Generic function for extracting random effect samples from a model object (BCF, BART, etc...)} 6 | \usage{ 7 | getRandomEffectSamples(object, ...) 8 | } 9 | \arguments{ 10 | \item{object}{Fitted model object from which to extract random effects} 11 | 12 | \item{...}{Other parameters to be used in random effects extraction} 13 | } 14 | \value{ 15 | List of random effect samples 16 | } 17 | \description{ 18 | Generic function for extracting random effect samples from a model object (BCF, BART, etc...) 19 | } 20 | \examples{ 21 | n <- 100 22 | p <- 10 23 | X <- matrix(runif(n*p), ncol = p) 24 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE) 25 | rfx_basis <- rep(1.0, n) 26 | y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) 27 | bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids, 28 | rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10) 29 | rfx_samples <- getRandomEffectSamples(bart_model) 30 | } 31 | -------------------------------------------------------------------------------- /man/getRandomEffectSamples.bartmodel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bart.R 3 | \name{getRandomEffectSamples.bartmodel} 4 | \alias{getRandomEffectSamples.bartmodel} 5 | \title{Extract raw sample values for each of the random effect parameter terms.} 6 | \usage{ 7 | \method{getRandomEffectSamples}{bartmodel}(object, ...) 8 | } 9 | \arguments{ 10 | \item{object}{Object of type \code{bartmodel} containing draws of a BART model and associated sampling outputs.} 11 | 12 | \item{...}{Other parameters to be used in random effects extraction} 13 | } 14 | \value{ 15 | List of arrays. The alpha array has dimension (\code{num_components}, \code{num_samples}) and is simply a vector if \code{num_components = 1}. 16 | The xi and beta arrays have dimension (\code{num_components}, \code{num_groups}, \code{num_samples}) and is simply a matrix if \code{num_components = 1}. 17 | The sigma array has dimension (\code{num_components}, \code{num_samples}) and is simply a vector if \code{num_components = 1}. 18 | } 19 | \description{ 20 | Extract raw sample values for each of the random effect parameter terms. 21 | } 22 | \examples{ 23 | n <- 100 24 | p <- 5 25 | X <- matrix(runif(n*p), ncol = p) 26 | f_XW <- ( 27 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 28 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 29 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 30 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 31 | ) 32 | snr <- 3 33 | group_ids <- rep(c(1,2), n \%/\% 2) 34 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) 35 | rfx_basis <- cbind(1, runif(n, -1, 1)) 36 | rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) 37 | E_y <- f_XW + rfx_term 38 | y <- E_y + rnorm(n, 0, 1)*(sd(E_y)/snr) 39 | test_set_pct <- 0.2 40 | n_test <- round(test_set_pct*n) 41 | n_train <- n - n_test 42 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 43 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 44 | X_test <- X[test_inds,] 45 | X_train <- X[train_inds,] 46 | y_test <- y[test_inds] 47 | y_train <- y[train_inds] 48 | rfx_group_ids_test <- group_ids[test_inds] 49 | rfx_group_ids_train <- group_ids[train_inds] 50 | rfx_basis_test <- rfx_basis[test_inds,] 51 | rfx_basis_train <- rfx_basis[train_inds,] 52 | rfx_term_test <- rfx_term[test_inds] 53 | rfx_term_train <- rfx_term[train_inds] 54 | bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test, 55 | rfx_group_ids_train = rfx_group_ids_train, 56 | rfx_group_ids_test = rfx_group_ids_test, 57 | rfx_basis_train = rfx_basis_train, 58 | rfx_basis_test = rfx_basis_test, 59 | num_gfr = 10, num_burnin = 0, num_mcmc = 10) 60 | rfx_samples <- getRandomEffectSamples(bart_model) 61 | } 62 | -------------------------------------------------------------------------------- /man/loadForestContainerCombinedJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/serialization.R 3 | \name{loadForestContainerCombinedJson} 4 | \alias{loadForestContainerCombinedJson} 5 | \title{Combine multiple JSON model objects containing forests (with the same hierarchy / schema) into a single forest_container} 6 | \usage{ 7 | loadForestContainerCombinedJson(json_object_list, json_forest_label) 8 | } 9 | \arguments{ 10 | \item{json_object_list}{List of objects of class \code{CppJson}} 11 | 12 | \item{json_forest_label}{Label referring to a particular forest (i.e. "forest_0") in the overall json hierarchy (must exist in every json object in the list)} 13 | } 14 | \value{ 15 | \code{ForestSamples} object 16 | } 17 | \description{ 18 | Combine multiple JSON model objects containing forests (with the same hierarchy / schema) into a single forest_container 19 | } 20 | \examples{ 21 | X <- matrix(runif(10*100), ncol = 10) 22 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) 23 | bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) 24 | bart_json <- list(saveBARTModelToJson(bart_model)) 25 | mean_forest <- loadForestContainerCombinedJson(bart_json, "forest_0") 26 | } 27 | -------------------------------------------------------------------------------- /man/loadForestContainerCombinedJsonString.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/serialization.R 3 | \name{loadForestContainerCombinedJsonString} 4 | \alias{loadForestContainerCombinedJsonString} 5 | \title{Combine multiple JSON strings representing model objects containing forests (with the same hierarchy / schema) into a single forest_container} 6 | \usage{ 7 | loadForestContainerCombinedJsonString(json_string_list, json_forest_label) 8 | } 9 | \arguments{ 10 | \item{json_string_list}{List of strings that parse into objects of type \code{CppJson}} 11 | 12 | \item{json_forest_label}{Label referring to a particular forest (i.e. "forest_0") in the overall json hierarchy (must exist in every json object in the list)} 13 | } 14 | \value{ 15 | \code{ForestSamples} object 16 | } 17 | \description{ 18 | Combine multiple JSON strings representing model objects containing forests (with the same hierarchy / schema) into a single forest_container 19 | } 20 | \examples{ 21 | X <- matrix(runif(10*100), ncol = 10) 22 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) 23 | bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) 24 | bart_json_string <- list(saveBARTModelToJsonString(bart_model)) 25 | mean_forest <- loadForestContainerCombinedJsonString(bart_json_string, "forest_0") 26 | } 27 | -------------------------------------------------------------------------------- /man/loadForestContainerJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/serialization.R 3 | \name{loadForestContainerJson} 4 | \alias{loadForestContainerJson} 5 | \title{Load a container of forest samples from json} 6 | \usage{ 7 | loadForestContainerJson(json_object, json_forest_label) 8 | } 9 | \arguments{ 10 | \item{json_object}{Object of class \code{CppJson}} 11 | 12 | \item{json_forest_label}{Label referring to a particular forest (i.e. "forest_0") in the overall json hierarchy} 13 | } 14 | \value{ 15 | \code{ForestSamples} object 16 | } 17 | \description{ 18 | Load a container of forest samples from json 19 | } 20 | \examples{ 21 | X <- matrix(runif(10*100), ncol = 10) 22 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) 23 | bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) 24 | bart_json <- saveBARTModelToJson(bart_model) 25 | mean_forest <- loadForestContainerJson(bart_json, "forest_0") 26 | } 27 | -------------------------------------------------------------------------------- /man/loadRandomEffectSamplesCombinedJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/serialization.R 3 | \name{loadRandomEffectSamplesCombinedJson} 4 | \alias{loadRandomEffectSamplesCombinedJson} 5 | \title{Combine multiple JSON model objects containing random effects (with the same hierarchy / schema) into a single container} 6 | \usage{ 7 | loadRandomEffectSamplesCombinedJson(json_object_list, json_rfx_num) 8 | } 9 | \arguments{ 10 | \item{json_object_list}{List of objects of class \code{CppJson}} 11 | 12 | \item{json_rfx_num}{Integer index indicating the position of the random effects term to be unpacked} 13 | } 14 | \value{ 15 | \code{RandomEffectSamples} object 16 | } 17 | \description{ 18 | Combine multiple JSON model objects containing random effects (with the same hierarchy / schema) into a single container 19 | } 20 | \examples{ 21 | n <- 100 22 | p <- 10 23 | X <- matrix(runif(n*p), ncol = p) 24 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE) 25 | rfx_basis <- rep(1.0, n) 26 | y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) 27 | bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids, 28 | rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10) 29 | bart_json <- list(saveBARTModelToJson(bart_model)) 30 | rfx_samples <- loadRandomEffectSamplesCombinedJson(bart_json, 0) 31 | } 32 | -------------------------------------------------------------------------------- /man/loadRandomEffectSamplesCombinedJsonString.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/serialization.R 3 | \name{loadRandomEffectSamplesCombinedJsonString} 4 | \alias{loadRandomEffectSamplesCombinedJsonString} 5 | \title{Combine multiple JSON strings representing model objects containing random effects (with the same hierarchy / schema) into a single container} 6 | \usage{ 7 | loadRandomEffectSamplesCombinedJsonString(json_string_list, json_rfx_num) 8 | } 9 | \arguments{ 10 | \item{json_string_list}{List of objects of class \code{CppJson}} 11 | 12 | \item{json_rfx_num}{Integer index indicating the position of the random effects term to be unpacked} 13 | } 14 | \value{ 15 | \code{RandomEffectSamples} object 16 | } 17 | \description{ 18 | Combine multiple JSON strings representing model objects containing random effects (with the same hierarchy / schema) into a single container 19 | } 20 | \examples{ 21 | n <- 100 22 | p <- 10 23 | X <- matrix(runif(n*p), ncol = p) 24 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE) 25 | rfx_basis <- rep(1.0, n) 26 | y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) 27 | bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids, 28 | rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10) 29 | bart_json_string <- list(saveBARTModelToJsonString(bart_model)) 30 | rfx_samples <- loadRandomEffectSamplesCombinedJsonString(bart_json_string, 0) 31 | } 32 | -------------------------------------------------------------------------------- /man/loadRandomEffectSamplesJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/serialization.R 3 | \name{loadRandomEffectSamplesJson} 4 | \alias{loadRandomEffectSamplesJson} 5 | \title{Load a container of random effect samples from json} 6 | \usage{ 7 | loadRandomEffectSamplesJson(json_object, json_rfx_num) 8 | } 9 | \arguments{ 10 | \item{json_object}{Object of class \code{CppJson}} 11 | 12 | \item{json_rfx_num}{Integer index indicating the position of the random effects term to be unpacked} 13 | } 14 | \value{ 15 | \code{RandomEffectSamples} object 16 | } 17 | \description{ 18 | Load a container of random effect samples from json 19 | } 20 | \examples{ 21 | n <- 100 22 | p <- 10 23 | X <- matrix(runif(n*p), ncol = p) 24 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE) 25 | rfx_basis <- rep(1.0, n) 26 | y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) 27 | bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids, 28 | rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10) 29 | bart_json <- saveBARTModelToJson(bart_model) 30 | rfx_samples <- loadRandomEffectSamplesJson(bart_json, 0) 31 | } 32 | -------------------------------------------------------------------------------- /man/loadScalarJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/serialization.R 3 | \name{loadScalarJson} 4 | \alias{loadScalarJson} 5 | \title{Load a scalar from json} 6 | \usage{ 7 | loadScalarJson(json_object, json_scalar_label, subfolder_name = NULL) 8 | } 9 | \arguments{ 10 | \item{json_object}{Object of class \code{CppJson}} 11 | 12 | \item{json_scalar_label}{Label referring to a particular scalar / string value (i.e. "num_samples") in the overall json hierarchy} 13 | 14 | \item{subfolder_name}{(Optional) Name of the subfolder / hierarchy under which vector sits} 15 | } 16 | \value{ 17 | R vector 18 | } 19 | \description{ 20 | Load a scalar from json 21 | } 22 | \examples{ 23 | example_scalar <- 5.4 24 | example_json <- createCppJson() 25 | example_json$add_scalar("myscalar", example_scalar) 26 | roundtrip_scalar <- loadScalarJson(example_json, "myscalar") 27 | } 28 | -------------------------------------------------------------------------------- /man/loadVectorJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/serialization.R 3 | \name{loadVectorJson} 4 | \alias{loadVectorJson} 5 | \title{Load a vector from json} 6 | \usage{ 7 | loadVectorJson(json_object, json_vector_label, subfolder_name = NULL) 8 | } 9 | \arguments{ 10 | \item{json_object}{Object of class \code{CppJson}} 11 | 12 | \item{json_vector_label}{Label referring to a particular vector (i.e. "sigma2_global_samples") in the overall json hierarchy} 13 | 14 | \item{subfolder_name}{(Optional) Name of the subfolder / hierarchy under which vector sits} 15 | } 16 | \value{ 17 | R vector 18 | } 19 | \description{ 20 | Load a vector from json 21 | } 22 | \examples{ 23 | example_vec <- runif(10) 24 | example_json <- createCppJson() 25 | example_json$add_vector("myvec", example_vec) 26 | roundtrip_vec <- loadVectorJson(example_json, "myvec") 27 | } 28 | -------------------------------------------------------------------------------- /man/predict.bartmodel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bart.R 3 | \name{predict.bartmodel} 4 | \alias{predict.bartmodel} 5 | \title{Predict from a sampled BART model on new data} 6 | \usage{ 7 | \method{predict}{bartmodel}( 8 | object, 9 | X, 10 | leaf_basis = NULL, 11 | rfx_group_ids = NULL, 12 | rfx_basis = NULL, 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{object}{Object of type \code{bart} containing draws of a regression forest and associated sampling outputs.} 18 | 19 | \item{X}{Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe.} 20 | 21 | \item{leaf_basis}{(Optional) Bases used for prediction (by e.g. dot product with leaf values). Default: \code{NULL}.} 22 | 23 | \item{rfx_group_ids}{(Optional) Test set group labels used for an additive random effects model. 24 | We do not currently support (but plan to in the near future), test set evaluation for group labels 25 | that were not in the training set.} 26 | 27 | \item{rfx_basis}{(Optional) Test set basis for "random-slope" regression in additive random effects model.} 28 | 29 | \item{...}{(Optional) Other prediction parameters.} 30 | } 31 | \value{ 32 | List of prediction matrices. If model does not have random effects, the list has one element -- the predictions from the forest. 33 | If the model does have random effects, the list has three elements -- forest predictions, random effects predictions, and their sum (\code{y_hat}). 34 | } 35 | \description{ 36 | Predict from a sampled BART model on new data 37 | } 38 | \examples{ 39 | n <- 100 40 | p <- 5 41 | X <- matrix(runif(n*p), ncol = p) 42 | f_XW <- ( 43 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 44 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 45 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 46 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 47 | ) 48 | noise_sd <- 1 49 | y <- f_XW + rnorm(n, 0, noise_sd) 50 | test_set_pct <- 0.2 51 | n_test <- round(test_set_pct*n) 52 | n_train <- n - n_test 53 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 54 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 55 | X_test <- X[test_inds,] 56 | X_train <- X[train_inds,] 57 | y_test <- y[test_inds] 58 | y_train <- y[train_inds] 59 | bart_model <- bart(X_train = X_train, y_train = y_train, 60 | num_gfr = 10, num_burnin = 0, num_mcmc = 10) 61 | y_hat_test <- predict(bart_model, X_test)$y_hat 62 | } 63 | -------------------------------------------------------------------------------- /man/predict.bcfmodel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bcf.R 3 | \name{predict.bcfmodel} 4 | \alias{predict.bcfmodel} 5 | \title{Predict from a sampled BCF model on new data} 6 | \usage{ 7 | \method{predict}{bcfmodel}( 8 | object, 9 | X, 10 | Z, 11 | propensity = NULL, 12 | rfx_group_ids = NULL, 13 | rfx_basis = NULL, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{object}{Object of type \code{bcfmodel} containing draws of a Bayesian causal forest model and associated sampling outputs.} 19 | 20 | \item{X}{Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe.} 21 | 22 | \item{Z}{Treatments used for prediction.} 23 | 24 | \item{propensity}{(Optional) Propensities used for prediction.} 25 | 26 | \item{rfx_group_ids}{(Optional) Test set group labels used for an additive random effects model. 27 | We do not currently support (but plan to in the near future), test set evaluation for group labels 28 | that were not in the training set.} 29 | 30 | \item{rfx_basis}{(Optional) Test set basis for "random-slope" regression in additive random effects model.} 31 | 32 | \item{...}{(Optional) Other prediction parameters.} 33 | } 34 | \value{ 35 | List of 3-5 \code{nrow(X)} by \code{object$num_samples} matrices: prognostic function estimates, treatment effect estimates, (optionally) random effects predictions, (optionally) variance forest predictions, and outcome predictions. 36 | } 37 | \description{ 38 | Predict from a sampled BCF model on new data 39 | } 40 | \examples{ 41 | n <- 500 42 | p <- 5 43 | X <- matrix(runif(n*p), ncol = p) 44 | mu_x <- ( 45 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 46 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 47 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 48 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 49 | ) 50 | pi_x <- ( 51 | ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 52 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 53 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 54 | ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8) 55 | ) 56 | tau_x <- ( 57 | ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 58 | ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 59 | ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 60 | ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0) 61 | ) 62 | Z <- rbinom(n, 1, pi_x) 63 | noise_sd <- 1 64 | y <- mu_x + tau_x*Z + rnorm(n, 0, noise_sd) 65 | test_set_pct <- 0.2 66 | n_test <- round(test_set_pct*n) 67 | n_train <- n - n_test 68 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 69 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 70 | X_test <- X[test_inds,] 71 | X_train <- X[train_inds,] 72 | pi_test <- pi_x[test_inds] 73 | pi_train <- pi_x[train_inds] 74 | Z_test <- Z[test_inds] 75 | Z_train <- Z[train_inds] 76 | y_test <- y[test_inds] 77 | y_train <- y[train_inds] 78 | mu_test <- mu_x[test_inds] 79 | mu_train <- mu_x[train_inds] 80 | tau_test <- tau_x[test_inds] 81 | tau_train <- tau_x[train_inds] 82 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 83 | propensity_train = pi_train, num_gfr = 10, 84 | num_burnin = 0, num_mcmc = 10) 85 | preds <- predict(bcf_model, X_test, Z_test, pi_test) 86 | } 87 | -------------------------------------------------------------------------------- /man/preprocessPredictionData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{preprocessPredictionData} 4 | \alias{preprocessPredictionData} 5 | \title{Preprocess covariates. DataFrames will be preprocessed based on their column 6 | types. Matrices will be passed through assuming all columns are numeric.} 7 | \usage{ 8 | preprocessPredictionData(input_data, metadata) 9 | } 10 | \arguments{ 11 | \item{input_data}{Covariates, provided as either a dataframe or a matrix} 12 | 13 | \item{metadata}{List containing information on variables, including train set 14 | categories for categorical variables} 15 | } 16 | \value{ 17 | Preprocessed data with categorical variables appropriately handled 18 | } 19 | \description{ 20 | Preprocess covariates. DataFrames will be preprocessed based on their column 21 | types. Matrices will be passed through assuming all columns are numeric. 22 | } 23 | \examples{ 24 | cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) 25 | metadata <- list(num_ordered_cat_vars = 0, num_unordered_cat_vars = 0, 26 | num_numeric_vars = 3, numeric_vars = c("x1", "x2", "x3")) 27 | X_preprocessed <- preprocessPredictionData(cov_df, metadata) 28 | } 29 | -------------------------------------------------------------------------------- /man/preprocessTrainData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{preprocessTrainData} 4 | \alias{preprocessTrainData} 5 | \title{Preprocess covariates. DataFrames will be preprocessed based on their column 6 | types. Matrices will be passed through assuming all columns are numeric.} 7 | \usage{ 8 | preprocessTrainData(input_data) 9 | } 10 | \arguments{ 11 | \item{input_data}{Covariates, provided as either a dataframe or a matrix} 12 | } 13 | \value{ 14 | List with preprocessed (unmodified) data and details on the number of each type 15 | of variable, unique categories associated with categorical variables, and the 16 | vector of feature types needed for calls to BART and BCF. 17 | } 18 | \description{ 19 | Preprocess covariates. DataFrames will be preprocessed based on their column 20 | types. Matrices will be passed through assuming all columns are numeric. 21 | } 22 | \examples{ 23 | cov_mat <- matrix(1:12, ncol = 3) 24 | preprocess_list <- preprocessTrainData(cov_mat) 25 | X <- preprocess_list$X 26 | } 27 | -------------------------------------------------------------------------------- /man/resetActiveForest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forest.R 3 | \name{resetActiveForest} 4 | \alias{resetActiveForest} 5 | \title{Reset an active forest, either from a specific forest in a \code{ForestContainer} 6 | or to an ensemble of single-node (i.e. root) trees} 7 | \usage{ 8 | resetActiveForest(active_forest, forest_samples = NULL, forest_num = NULL) 9 | } 10 | \arguments{ 11 | \item{active_forest}{Current active forest} 12 | 13 | \item{forest_samples}{(Optional) Container of forest samples from which to re-initialize active forest. If not provided, active forest will be reset to an ensemble of single-node (i.e. root) trees.} 14 | 15 | \item{forest_num}{(Optional) Index of forest samples from which to initialize active forest. If not provided, active forest will be reset to an ensemble of single-node (i.e. root) trees.} 16 | } 17 | \value{ 18 | None 19 | } 20 | \description{ 21 | Reset an active forest, either from a specific forest in a \code{ForestContainer} 22 | or to an ensemble of single-node (i.e. root) trees 23 | } 24 | \examples{ 25 | num_trees <- 100 26 | leaf_dimension <- 1 27 | is_leaf_constant <- TRUE 28 | is_exponentiated <- FALSE 29 | active_forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) 30 | forest_samples <- createForestSamples(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) 31 | forest_samples$add_forest_with_constant_leaves(0.0) 32 | forest_samples$add_numeric_split_tree(0, 0, 0, 0, 0.5, -1.0, 1.0) 33 | forest_samples$add_numeric_split_tree(0, 1, 0, 1, 0.75, 3.4, 0.75) 34 | active_forest$set_root_leaves(0.1) 35 | resetActiveForest(active_forest, forest_samples, 0) 36 | resetActiveForest(active_forest) 37 | } 38 | -------------------------------------------------------------------------------- /man/resetForestModel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forest.R 3 | \name{resetForestModel} 4 | \alias{resetForestModel} 5 | \title{Re-initialize a forest model (tracking data structures) from a specific forest in a \code{ForestContainer}} 6 | \usage{ 7 | resetForestModel(forest_model, forest, dataset, residual, is_mean_model) 8 | } 9 | \arguments{ 10 | \item{forest_model}{Forest model with tracking data structures} 11 | 12 | \item{forest}{Forest from which to re-initialize forest model} 13 | 14 | \item{dataset}{Training dataset object} 15 | 16 | \item{residual}{Residual which will also be updated} 17 | 18 | \item{is_mean_model}{Whether the model being updated is a conditional mean model} 19 | } 20 | \value{ 21 | None 22 | } 23 | \description{ 24 | Re-initialize a forest model (tracking data structures) from a specific forest in a \code{ForestContainer} 25 | } 26 | \examples{ 27 | n <- 100 28 | p <- 10 29 | num_trees <- 100 30 | leaf_dimension <- 1 31 | is_leaf_constant <- TRUE 32 | is_exponentiated <- FALSE 33 | alpha <- 0.95 34 | beta <- 2.0 35 | min_samples_leaf <- 2 36 | max_depth <- 10 37 | feature_types <- as.integer(rep(0, p)) 38 | leaf_model <- 0 39 | sigma2 <- 1.0 40 | leaf_scale <- as.matrix(1.0) 41 | variable_weights <- rep(1/p, p) 42 | a_forest <- 1 43 | b_forest <- 1 44 | cutpoint_grid_size <- 100 45 | X <- matrix(runif(n*p), ncol = p) 46 | forest_dataset <- createForestDataset(X) 47 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(n) 48 | outcome <- createOutcome(y) 49 | rng <- createCppRNG(1234) 50 | global_model_config <- createGlobalModelConfig(global_error_variance=sigma2) 51 | forest_model_config <- createForestModelConfig(feature_types=feature_types, 52 | num_trees=num_trees, num_observations=n, 53 | num_features=p, alpha=alpha, beta=beta, 54 | min_samples_leaf=min_samples_leaf, 55 | max_depth=max_depth, 56 | variable_weights=variable_weights, 57 | cutpoint_grid_size=cutpoint_grid_size, 58 | leaf_model_type=leaf_model, 59 | leaf_model_scale=leaf_scale) 60 | forest_model <- createForestModel(forest_dataset, forest_model_config, global_model_config) 61 | active_forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) 62 | forest_samples <- createForestSamples(num_trees, leaf_dimension, 63 | is_leaf_constant, is_exponentiated) 64 | active_forest$prepare_for_sampler(forest_dataset, outcome, forest_model, 0, 0.) 65 | forest_model$sample_one_iteration( 66 | forest_dataset, outcome, forest_samples, active_forest, 67 | rng, forest_model_config, global_model_config, 68 | keep_forest = TRUE, gfr = FALSE 69 | ) 70 | resetActiveForest(active_forest, forest_samples, 0) 71 | resetForestModel(forest_model, active_forest, forest_dataset, outcome, TRUE) 72 | } 73 | -------------------------------------------------------------------------------- /man/resetRandomEffectsModel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/random_effects.R 3 | \name{resetRandomEffectsModel} 4 | \alias{resetRandomEffectsModel} 5 | \title{Reset a \code{RandomEffectsModel} object based on the parameters indexed by \code{sample_num} in a \code{RandomEffectsSamples} object} 6 | \usage{ 7 | resetRandomEffectsModel(rfx_model, rfx_samples, sample_num, sigma_alpha_init) 8 | } 9 | \arguments{ 10 | \item{rfx_model}{Object of type \code{RandomEffectsModel}.} 11 | 12 | \item{rfx_samples}{Object of type \code{RandomEffectSamples}.} 13 | 14 | \item{sample_num}{Index of sample stored in \code{rfx_samples} from which to reset the state of a random effects model. Zero-indexed, so resetting based on the first sample would require setting \code{sample_num = 0}.} 15 | 16 | \item{sigma_alpha_init}{Initial value of the "working parameter" scale parameter.} 17 | } 18 | \value{ 19 | None 20 | } 21 | \description{ 22 | Reset a \code{RandomEffectsModel} object based on the parameters indexed by \code{sample_num} in a \code{RandomEffectsSamples} object 23 | } 24 | \examples{ 25 | n <- 100 26 | p <- 10 27 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE) 28 | rfx_basis <- matrix(rep(1.0, n), ncol=1) 29 | rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) 30 | y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) 31 | y_std <- (y-mean(y))/sd(y) 32 | outcome <- createOutcome(y_std) 33 | rng <- createCppRNG(1234) 34 | num_groups <- length(unique(rfx_group_ids)) 35 | num_components <- ncol(rfx_basis) 36 | rfx_model <- createRandomEffectsModel(num_components, num_groups) 37 | rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) 38 | rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) 39 | alpha_init <- rep(1,num_components) 40 | xi_init <- matrix(rep(alpha_init, num_groups),num_components,num_groups) 41 | sigma_alpha_init <- diag(1,num_components,num_components) 42 | sigma_xi_init <- diag(1,num_components,num_components) 43 | sigma_xi_shape <- 1 44 | sigma_xi_scale <- 1 45 | rfx_model$set_working_parameter(alpha_init) 46 | rfx_model$set_group_parameters(xi_init) 47 | rfx_model$set_working_parameter_cov(sigma_alpha_init) 48 | rfx_model$set_group_parameter_cov(sigma_xi_init) 49 | rfx_model$set_variance_prior_shape(sigma_xi_shape) 50 | rfx_model$set_variance_prior_scale(sigma_xi_scale) 51 | for (i in 1:3) { 52 | rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, 53 | rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, 54 | keep_sample=TRUE, global_variance=1.0, rng=rng) 55 | } 56 | resetRandomEffectsModel(rfx_model, rfx_samples, 0, 1.0) 57 | } 58 | -------------------------------------------------------------------------------- /man/resetRandomEffectsTracker.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/random_effects.R 3 | \name{resetRandomEffectsTracker} 4 | \alias{resetRandomEffectsTracker} 5 | \title{Reset a \code{RandomEffectsTracker} object based on the parameters indexed by \code{sample_num} in a \code{RandomEffectsSamples} object} 6 | \usage{ 7 | resetRandomEffectsTracker( 8 | rfx_tracker, 9 | rfx_model, 10 | rfx_dataset, 11 | residual, 12 | rfx_samples 13 | ) 14 | } 15 | \arguments{ 16 | \item{rfx_tracker}{Object of type \code{RandomEffectsTracker}.} 17 | 18 | \item{rfx_model}{Object of type \code{RandomEffectsModel}.} 19 | 20 | \item{rfx_dataset}{Object of type \code{RandomEffectsDataset}.} 21 | 22 | \item{residual}{Object of type \code{Outcome}.} 23 | 24 | \item{rfx_samples}{Object of type \code{RandomEffectSamples}.} 25 | } 26 | \value{ 27 | None 28 | } 29 | \description{ 30 | Reset a \code{RandomEffectsTracker} object based on the parameters indexed by \code{sample_num} in a \code{RandomEffectsSamples} object 31 | } 32 | \examples{ 33 | n <- 100 34 | p <- 10 35 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE) 36 | rfx_basis <- matrix(rep(1.0, n), ncol=1) 37 | rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) 38 | y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) 39 | y_std <- (y-mean(y))/sd(y) 40 | outcome <- createOutcome(y_std) 41 | rng <- createCppRNG(1234) 42 | num_groups <- length(unique(rfx_group_ids)) 43 | num_components <- ncol(rfx_basis) 44 | rfx_model <- createRandomEffectsModel(num_components, num_groups) 45 | rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) 46 | rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) 47 | alpha_init <- rep(1,num_components) 48 | xi_init <- matrix(rep(alpha_init, num_groups),num_components,num_groups) 49 | sigma_alpha_init <- diag(1,num_components,num_components) 50 | sigma_xi_init <- diag(1,num_components,num_components) 51 | sigma_xi_shape <- 1 52 | sigma_xi_scale <- 1 53 | rfx_model$set_working_parameter(alpha_init) 54 | rfx_model$set_group_parameters(xi_init) 55 | rfx_model$set_working_parameter_cov(sigma_alpha_init) 56 | rfx_model$set_group_parameter_cov(sigma_xi_init) 57 | rfx_model$set_variance_prior_shape(sigma_xi_shape) 58 | rfx_model$set_variance_prior_scale(sigma_xi_scale) 59 | for (i in 1:3) { 60 | rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, 61 | rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, 62 | keep_sample=TRUE, global_variance=1.0, rng=rng) 63 | } 64 | resetRandomEffectsModel(rfx_model, rfx_samples, 0, 1.0) 65 | resetRandomEffectsTracker(rfx_tracker, rfx_model, rfx_dataset, outcome, rfx_samples) 66 | } 67 | -------------------------------------------------------------------------------- /man/rootResetRandomEffectsModel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/random_effects.R 3 | \name{rootResetRandomEffectsModel} 4 | \alias{rootResetRandomEffectsModel} 5 | \title{Reset a \code{RandomEffectsModel} object to its "default" state} 6 | \usage{ 7 | rootResetRandomEffectsModel( 8 | rfx_model, 9 | alpha_init, 10 | xi_init, 11 | sigma_alpha_init, 12 | sigma_xi_init, 13 | sigma_xi_shape, 14 | sigma_xi_scale 15 | ) 16 | } 17 | \arguments{ 18 | \item{rfx_model}{Object of type \code{RandomEffectsModel}.} 19 | 20 | \item{alpha_init}{Initial value of the "working parameter".} 21 | 22 | \item{xi_init}{Initial value of the "group parameters".} 23 | 24 | \item{sigma_alpha_init}{Initial value of the "working parameter" scale parameter.} 25 | 26 | \item{sigma_xi_init}{Initial value of the "group parameters" scale parameter.} 27 | 28 | \item{sigma_xi_shape}{Shape parameter for the inverse gamma variance model on the group parameters.} 29 | 30 | \item{sigma_xi_scale}{Scale parameter for the inverse gamma variance model on the group parameters.} 31 | } 32 | \value{ 33 | None 34 | } 35 | \description{ 36 | Reset a \code{RandomEffectsModel} object to its "default" state 37 | } 38 | \examples{ 39 | n <- 100 40 | p <- 10 41 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE) 42 | rfx_basis <- matrix(rep(1.0, n), ncol=1) 43 | rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) 44 | y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) 45 | y_std <- (y-mean(y))/sd(y) 46 | outcome <- createOutcome(y_std) 47 | rng <- createCppRNG(1234) 48 | num_groups <- length(unique(rfx_group_ids)) 49 | num_components <- ncol(rfx_basis) 50 | rfx_model <- createRandomEffectsModel(num_components, num_groups) 51 | rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) 52 | rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) 53 | alpha_init <- rep(1,num_components) 54 | xi_init <- matrix(rep(alpha_init, num_groups),num_components,num_groups) 55 | sigma_alpha_init <- diag(1,num_components,num_components) 56 | sigma_xi_init <- diag(1,num_components,num_components) 57 | sigma_xi_shape <- 1 58 | sigma_xi_scale <- 1 59 | rfx_model$set_working_parameter(alpha_init) 60 | rfx_model$set_group_parameters(xi_init) 61 | rfx_model$set_working_parameter_cov(sigma_alpha_init) 62 | rfx_model$set_group_parameter_cov(sigma_xi_init) 63 | rfx_model$set_variance_prior_shape(sigma_xi_shape) 64 | rfx_model$set_variance_prior_scale(sigma_xi_scale) 65 | for (i in 1:3) { 66 | rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, 67 | rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, 68 | keep_sample=TRUE, global_variance=1.0, rng=rng) 69 | } 70 | rootResetRandomEffectsModel(rfx_model, alpha_init, xi_init, sigma_alpha_init, 71 | sigma_xi_init, sigma_xi_shape, sigma_xi_scale) 72 | } 73 | -------------------------------------------------------------------------------- /man/rootResetRandomEffectsTracker.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/random_effects.R 3 | \name{rootResetRandomEffectsTracker} 4 | \alias{rootResetRandomEffectsTracker} 5 | \title{Reset a \code{RandomEffectsTracker} object to its "default" state} 6 | \usage{ 7 | rootResetRandomEffectsTracker(rfx_tracker, rfx_model, rfx_dataset, residual) 8 | } 9 | \arguments{ 10 | \item{rfx_tracker}{Object of type \code{RandomEffectsTracker}.} 11 | 12 | \item{rfx_model}{Object of type \code{RandomEffectsModel}.} 13 | 14 | \item{rfx_dataset}{Object of type \code{RandomEffectsDataset}.} 15 | 16 | \item{residual}{Object of type \code{Outcome}.} 17 | } 18 | \value{ 19 | None 20 | } 21 | \description{ 22 | Reset a \code{RandomEffectsTracker} object to its "default" state 23 | } 24 | \examples{ 25 | n <- 100 26 | p <- 10 27 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE) 28 | rfx_basis <- matrix(rep(1.0, n), ncol=1) 29 | rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) 30 | y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) 31 | y_std <- (y-mean(y))/sd(y) 32 | outcome <- createOutcome(y_std) 33 | rng <- createCppRNG(1234) 34 | num_groups <- length(unique(rfx_group_ids)) 35 | num_components <- ncol(rfx_basis) 36 | rfx_model <- createRandomEffectsModel(num_components, num_groups) 37 | rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) 38 | rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) 39 | alpha_init <- rep(1,num_components) 40 | xi_init <- matrix(rep(alpha_init, num_groups),num_components,num_groups) 41 | sigma_alpha_init <- diag(1,num_components,num_components) 42 | sigma_xi_init <- diag(1,num_components,num_components) 43 | sigma_xi_shape <- 1 44 | sigma_xi_scale <- 1 45 | rfx_model$set_working_parameter(alpha_init) 46 | rfx_model$set_group_parameters(xi_init) 47 | rfx_model$set_working_parameter_cov(sigma_alpha_init) 48 | rfx_model$set_group_parameter_cov(sigma_xi_init) 49 | rfx_model$set_variance_prior_shape(sigma_xi_shape) 50 | rfx_model$set_variance_prior_scale(sigma_xi_scale) 51 | for (i in 1:3) { 52 | rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, 53 | rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, 54 | keep_sample=TRUE, global_variance=1.0, rng=rng) 55 | } 56 | rootResetRandomEffectsModel(rfx_model, alpha_init, xi_init, sigma_alpha_init, 57 | sigma_xi_init, sigma_xi_shape, sigma_xi_scale) 58 | rootResetRandomEffectsTracker(rfx_tracker, rfx_model, rfx_dataset, outcome) 59 | } 60 | -------------------------------------------------------------------------------- /man/sampleGlobalErrorVarianceOneIteration.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/variance.R 3 | \name{sampleGlobalErrorVarianceOneIteration} 4 | \alias{sampleGlobalErrorVarianceOneIteration} 5 | \title{Sample one iteration of the (inverse gamma) global variance model} 6 | \usage{ 7 | sampleGlobalErrorVarianceOneIteration(residual, dataset, rng, a, b) 8 | } 9 | \arguments{ 10 | \item{residual}{Outcome class} 11 | 12 | \item{dataset}{ForestDataset class} 13 | 14 | \item{rng}{C++ random number generator} 15 | 16 | \item{a}{Global variance shape parameter} 17 | 18 | \item{b}{Global variance scale parameter} 19 | } 20 | \value{ 21 | None 22 | } 23 | \description{ 24 | Sample one iteration of the (inverse gamma) global variance model 25 | } 26 | \examples{ 27 | X <- matrix(runif(10*100), ncol = 10) 28 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) 29 | y_std <- (y-mean(y))/sd(y) 30 | forest_dataset <- createForestDataset(X) 31 | outcome <- createOutcome(y_std) 32 | rng <- createCppRNG(1234) 33 | a <- 1.0 34 | b <- 1.0 35 | sigma2 <- sampleGlobalErrorVarianceOneIteration(outcome, forest_dataset, rng, a, b) 36 | } 37 | -------------------------------------------------------------------------------- /man/sampleLeafVarianceOneIteration.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/variance.R 3 | \name{sampleLeafVarianceOneIteration} 4 | \alias{sampleLeafVarianceOneIteration} 5 | \title{Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!)} 6 | \usage{ 7 | sampleLeafVarianceOneIteration(forest, rng, a, b) 8 | } 9 | \arguments{ 10 | \item{forest}{C++ forest} 11 | 12 | \item{rng}{C++ random number generator} 13 | 14 | \item{a}{Leaf variance shape parameter} 15 | 16 | \item{b}{Leaf variance scale parameter} 17 | } 18 | \value{ 19 | None 20 | } 21 | \description{ 22 | Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!) 23 | } 24 | \examples{ 25 | num_trees <- 100 26 | leaf_dimension <- 1 27 | is_leaf_constant <- TRUE 28 | is_exponentiated <- FALSE 29 | active_forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) 30 | rng <- createCppRNG(1234) 31 | a <- 1.0 32 | b <- 1.0 33 | tau <- sampleLeafVarianceOneIteration(active_forest, rng, a, b) 34 | } 35 | -------------------------------------------------------------------------------- /man/saveBARTModelToJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bart.R 3 | \name{saveBARTModelToJson} 4 | \alias{saveBARTModelToJson} 5 | \title{Convert the persistent aspects of a BART model to (in-memory) JSON} 6 | \usage{ 7 | saveBARTModelToJson(object) 8 | } 9 | \arguments{ 10 | \item{object}{Object of type \code{bartmodel} containing draws of a BART model and associated sampling outputs.} 11 | } 12 | \value{ 13 | Object of type \code{CppJson} 14 | } 15 | \description{ 16 | Convert the persistent aspects of a BART model to (in-memory) JSON 17 | } 18 | \examples{ 19 | n <- 100 20 | p <- 5 21 | X <- matrix(runif(n*p), ncol = p) 22 | f_XW <- ( 23 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 24 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 25 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 26 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 27 | ) 28 | noise_sd <- 1 29 | y <- f_XW + rnorm(n, 0, noise_sd) 30 | test_set_pct <- 0.2 31 | n_test <- round(test_set_pct*n) 32 | n_train <- n - n_test 33 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 34 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 35 | X_test <- X[test_inds,] 36 | X_train <- X[train_inds,] 37 | y_test <- y[test_inds] 38 | y_train <- y[train_inds] 39 | bart_model <- bart(X_train = X_train, y_train = y_train, 40 | num_gfr = 10, num_burnin = 0, num_mcmc = 10) 41 | bart_json <- saveBARTModelToJson(bart_model) 42 | } 43 | -------------------------------------------------------------------------------- /man/saveBARTModelToJsonFile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bart.R 3 | \name{saveBARTModelToJsonFile} 4 | \alias{saveBARTModelToJsonFile} 5 | \title{Convert the persistent aspects of a BART model to (in-memory) JSON and save to a file} 6 | \usage{ 7 | saveBARTModelToJsonFile(object, filename) 8 | } 9 | \arguments{ 10 | \item{object}{Object of type \code{bartmodel} containing draws of a BART model and associated sampling outputs.} 11 | 12 | \item{filename}{String of filepath, must end in ".json"} 13 | } 14 | \value{ 15 | None 16 | } 17 | \description{ 18 | Convert the persistent aspects of a BART model to (in-memory) JSON and save to a file 19 | } 20 | \examples{ 21 | n <- 100 22 | p <- 5 23 | X <- matrix(runif(n*p), ncol = p) 24 | f_XW <- ( 25 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 26 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 27 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 28 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 29 | ) 30 | noise_sd <- 1 31 | y <- f_XW + rnorm(n, 0, noise_sd) 32 | test_set_pct <- 0.2 33 | n_test <- round(test_set_pct*n) 34 | n_train <- n - n_test 35 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 36 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 37 | X_test <- X[test_inds,] 38 | X_train <- X[train_inds,] 39 | y_test <- y[test_inds] 40 | y_train <- y[train_inds] 41 | bart_model <- bart(X_train = X_train, y_train = y_train, 42 | num_gfr = 10, num_burnin = 0, num_mcmc = 10) 43 | tmpjson <- tempfile(fileext = ".json") 44 | saveBARTModelToJsonFile(bart_model, file.path(tmpjson)) 45 | unlink(tmpjson) 46 | } 47 | -------------------------------------------------------------------------------- /man/saveBARTModelToJsonString.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bart.R 3 | \name{saveBARTModelToJsonString} 4 | \alias{saveBARTModelToJsonString} 5 | \title{Convert the persistent aspects of a BART model to (in-memory) JSON string} 6 | \usage{ 7 | saveBARTModelToJsonString(object) 8 | } 9 | \arguments{ 10 | \item{object}{Object of type \code{bartmodel} containing draws of a BART model and associated sampling outputs.} 11 | } 12 | \value{ 13 | in-memory JSON string 14 | } 15 | \description{ 16 | Convert the persistent aspects of a BART model to (in-memory) JSON string 17 | } 18 | \examples{ 19 | n <- 100 20 | p <- 5 21 | X <- matrix(runif(n*p), ncol = p) 22 | f_XW <- ( 23 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 24 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 25 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 26 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 27 | ) 28 | noise_sd <- 1 29 | y <- f_XW + rnorm(n, 0, noise_sd) 30 | test_set_pct <- 0.2 31 | n_test <- round(test_set_pct*n) 32 | n_train <- n - n_test 33 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 34 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 35 | X_test <- X[test_inds,] 36 | X_train <- X[train_inds,] 37 | y_test <- y[test_inds] 38 | y_train <- y[train_inds] 39 | bart_model <- bart(X_train = X_train, y_train = y_train, 40 | num_gfr = 10, num_burnin = 0, num_mcmc = 10) 41 | bart_json_string <- saveBARTModelToJsonString(bart_model) 42 | } 43 | -------------------------------------------------------------------------------- /man/saveBCFModelToJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bcf.R 3 | \name{saveBCFModelToJson} 4 | \alias{saveBCFModelToJson} 5 | \title{Convert the persistent aspects of a BCF model to (in-memory) JSON} 6 | \usage{ 7 | saveBCFModelToJson(object) 8 | } 9 | \arguments{ 10 | \item{object}{Object of type \code{bcfmodel} containing draws of a Bayesian causal forest model and associated sampling outputs.} 11 | } 12 | \value{ 13 | Object of type \code{CppJson} 14 | } 15 | \description{ 16 | Convert the persistent aspects of a BCF model to (in-memory) JSON 17 | } 18 | \examples{ 19 | n <- 500 20 | p <- 5 21 | X <- matrix(runif(n*p), ncol = p) 22 | mu_x <- ( 23 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 24 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 25 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 26 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 27 | ) 28 | pi_x <- ( 29 | ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 30 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 31 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 32 | ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8) 33 | ) 34 | tau_x <- ( 35 | ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 36 | ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 37 | ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 38 | ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0) 39 | ) 40 | Z <- rbinom(n, 1, pi_x) 41 | E_XZ <- mu_x + Z*tau_x 42 | snr <- 3 43 | rfx_group_ids <- rep(c(1,2), n \%/\% 2) 44 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) 45 | rfx_basis <- cbind(1, runif(n, -1, 1)) 46 | rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) 47 | y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) 48 | test_set_pct <- 0.2 49 | n_test <- round(test_set_pct*n) 50 | n_train <- n - n_test 51 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 52 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 53 | X_test <- X[test_inds,] 54 | X_train <- X[train_inds,] 55 | pi_test <- pi_x[test_inds] 56 | pi_train <- pi_x[train_inds] 57 | Z_test <- Z[test_inds] 58 | Z_train <- Z[train_inds] 59 | y_test <- y[test_inds] 60 | y_train <- y[train_inds] 61 | mu_test <- mu_x[test_inds] 62 | mu_train <- mu_x[train_inds] 63 | tau_test <- tau_x[test_inds] 64 | tau_train <- tau_x[train_inds] 65 | rfx_group_ids_test <- rfx_group_ids[test_inds] 66 | rfx_group_ids_train <- rfx_group_ids[train_inds] 67 | rfx_basis_test <- rfx_basis[test_inds,] 68 | rfx_basis_train <- rfx_basis[train_inds,] 69 | rfx_term_test <- rfx_term[test_inds] 70 | rfx_term_train <- rfx_term[train_inds] 71 | mu_params <- list(sample_sigma2_leaf = TRUE) 72 | tau_params <- list(sample_sigma2_leaf = FALSE) 73 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 74 | propensity_train = pi_train, 75 | rfx_group_ids_train = rfx_group_ids_train, 76 | rfx_basis_train = rfx_basis_train, X_test = X_test, 77 | Z_test = Z_test, propensity_test = pi_test, 78 | rfx_group_ids_test = rfx_group_ids_test, 79 | rfx_basis_test = rfx_basis_test, 80 | num_gfr = 10, num_burnin = 0, num_mcmc = 10, 81 | prognostic_forest_params = mu_params, 82 | treatment_effect_forest_params = tau_params) 83 | bcf_json <- saveBCFModelToJson(bcf_model) 84 | } 85 | -------------------------------------------------------------------------------- /man/saveBCFModelToJsonFile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bcf.R 3 | \name{saveBCFModelToJsonFile} 4 | \alias{saveBCFModelToJsonFile} 5 | \title{Convert the persistent aspects of a BCF model to (in-memory) JSON and save to a file} 6 | \usage{ 7 | saveBCFModelToJsonFile(object, filename) 8 | } 9 | \arguments{ 10 | \item{object}{Object of type \code{bcfmodel} containing draws of a Bayesian causal forest model and associated sampling outputs.} 11 | 12 | \item{filename}{String of filepath, must end in ".json"} 13 | } 14 | \value{ 15 | in-memory JSON string 16 | } 17 | \description{ 18 | Convert the persistent aspects of a BCF model to (in-memory) JSON and save to a file 19 | } 20 | \examples{ 21 | n <- 500 22 | p <- 5 23 | X <- matrix(runif(n*p), ncol = p) 24 | mu_x <- ( 25 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 26 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 27 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 28 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 29 | ) 30 | pi_x <- ( 31 | ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 32 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 33 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 34 | ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8) 35 | ) 36 | tau_x <- ( 37 | ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 38 | ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 39 | ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 40 | ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0) 41 | ) 42 | Z <- rbinom(n, 1, pi_x) 43 | E_XZ <- mu_x + Z*tau_x 44 | snr <- 3 45 | rfx_group_ids <- rep(c(1,2), n \%/\% 2) 46 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) 47 | rfx_basis <- cbind(1, runif(n, -1, 1)) 48 | rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) 49 | y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) 50 | test_set_pct <- 0.2 51 | n_test <- round(test_set_pct*n) 52 | n_train <- n - n_test 53 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 54 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 55 | X_test <- X[test_inds,] 56 | X_train <- X[train_inds,] 57 | pi_test <- pi_x[test_inds] 58 | pi_train <- pi_x[train_inds] 59 | Z_test <- Z[test_inds] 60 | Z_train <- Z[train_inds] 61 | y_test <- y[test_inds] 62 | y_train <- y[train_inds] 63 | mu_test <- mu_x[test_inds] 64 | mu_train <- mu_x[train_inds] 65 | tau_test <- tau_x[test_inds] 66 | tau_train <- tau_x[train_inds] 67 | rfx_group_ids_test <- rfx_group_ids[test_inds] 68 | rfx_group_ids_train <- rfx_group_ids[train_inds] 69 | rfx_basis_test <- rfx_basis[test_inds,] 70 | rfx_basis_train <- rfx_basis[train_inds,] 71 | rfx_term_test <- rfx_term[test_inds] 72 | rfx_term_train <- rfx_term[train_inds] 73 | mu_params <- list(sample_sigma2_leaf = TRUE) 74 | tau_params <- list(sample_sigma2_leaf = FALSE) 75 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 76 | propensity_train = pi_train, 77 | rfx_group_ids_train = rfx_group_ids_train, 78 | rfx_basis_train = rfx_basis_train, X_test = X_test, 79 | Z_test = Z_test, propensity_test = pi_test, 80 | rfx_group_ids_test = rfx_group_ids_test, 81 | rfx_basis_test = rfx_basis_test, 82 | num_gfr = 10, num_burnin = 0, num_mcmc = 10, 83 | prognostic_forest_params = mu_params, 84 | treatment_effect_forest_params = tau_params) 85 | tmpjson <- tempfile(fileext = ".json") 86 | saveBCFModelToJsonFile(bcf_model, file.path(tmpjson)) 87 | unlink(tmpjson) 88 | } 89 | -------------------------------------------------------------------------------- /man/saveBCFModelToJsonString.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bcf.R 3 | \name{saveBCFModelToJsonString} 4 | \alias{saveBCFModelToJsonString} 5 | \title{Convert the persistent aspects of a BCF model to (in-memory) JSON string} 6 | \usage{ 7 | saveBCFModelToJsonString(object) 8 | } 9 | \arguments{ 10 | \item{object}{Object of type \code{bcfmodel} containing draws of a Bayesian causal forest model and associated sampling outputs.} 11 | } 12 | \value{ 13 | JSON string 14 | } 15 | \description{ 16 | Convert the persistent aspects of a BCF model to (in-memory) JSON string 17 | } 18 | \examples{ 19 | n <- 500 20 | p <- 5 21 | X <- matrix(runif(n*p), ncol = p) 22 | mu_x <- ( 23 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 24 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 25 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 26 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 27 | ) 28 | pi_x <- ( 29 | ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 30 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 31 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 32 | ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8) 33 | ) 34 | tau_x <- ( 35 | ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 36 | ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 37 | ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 38 | ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0) 39 | ) 40 | Z <- rbinom(n, 1, pi_x) 41 | E_XZ <- mu_x + Z*tau_x 42 | snr <- 3 43 | rfx_group_ids <- rep(c(1,2), n \%/\% 2) 44 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) 45 | rfx_basis <- cbind(1, runif(n, -1, 1)) 46 | rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) 47 | y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) 48 | test_set_pct <- 0.2 49 | n_test <- round(test_set_pct*n) 50 | n_train <- n - n_test 51 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 52 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)] 53 | X_test <- X[test_inds,] 54 | X_train <- X[train_inds,] 55 | pi_test <- pi_x[test_inds] 56 | pi_train <- pi_x[train_inds] 57 | Z_test <- Z[test_inds] 58 | Z_train <- Z[train_inds] 59 | y_test <- y[test_inds] 60 | y_train <- y[train_inds] 61 | mu_test <- mu_x[test_inds] 62 | mu_train <- mu_x[train_inds] 63 | tau_test <- tau_x[test_inds] 64 | tau_train <- tau_x[train_inds] 65 | rfx_group_ids_test <- rfx_group_ids[test_inds] 66 | rfx_group_ids_train <- rfx_group_ids[train_inds] 67 | rfx_basis_test <- rfx_basis[test_inds,] 68 | rfx_basis_train <- rfx_basis[train_inds,] 69 | rfx_term_test <- rfx_term[test_inds] 70 | rfx_term_train <- rfx_term[train_inds] 71 | mu_params <- list(sample_sigma2_leaf = TRUE) 72 | tau_params <- list(sample_sigma2_leaf = FALSE) 73 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 74 | propensity_train = pi_train, 75 | rfx_group_ids_train = rfx_group_ids_train, 76 | rfx_basis_train = rfx_basis_train, X_test = X_test, 77 | Z_test = Z_test, propensity_test = pi_test, 78 | rfx_group_ids_test = rfx_group_ids_test, 79 | rfx_basis_test = rfx_basis_test, 80 | num_gfr = 10, num_burnin = 0, num_mcmc = 10, 81 | prognostic_forest_params = mu_params, 82 | treatment_effect_forest_params = tau_params) 83 | saveBCFModelToJsonString(bcf_model) 84 | } 85 | -------------------------------------------------------------------------------- /man/savePreprocessorToJsonString.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{savePreprocessorToJsonString} 4 | \alias{savePreprocessorToJsonString} 5 | \title{Convert the persistent aspects of a covariate preprocessor to (in-memory) JSON string} 6 | \usage{ 7 | savePreprocessorToJsonString(object) 8 | } 9 | \arguments{ 10 | \item{object}{List containing information on variables, including train set 11 | categories for categorical variables} 12 | } 13 | \value{ 14 | in-memory JSON string 15 | } 16 | \description{ 17 | Convert the persistent aspects of a covariate preprocessor to (in-memory) JSON string 18 | } 19 | \examples{ 20 | cov_mat <- matrix(1:12, ncol = 3) 21 | preprocess_list <- preprocessTrainData(cov_mat) 22 | preprocessor_json_string <- savePreprocessorToJsonString(preprocess_list$metadata) 23 | } 24 | -------------------------------------------------------------------------------- /man/stochtree-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/stochtree-package.R 3 | \docType{package} 4 | \name{stochtree-package} 5 | \alias{stochtree} 6 | \alias{stochtree-package} 7 | \title{stochtree: Stochastic Tree Ensembles (XBART and BART) for Supervised Learning and Causal Inference} 8 | \description{ 9 | Flexible stochastic tree ensemble software. Robust implementations of Bayesian Additive Regression Trees (BART) Chipman, George, McCulloch (2010) \doi{10.1214/09-AOAS285} for supervised learning and Bayesian Causal Forests (BCF) Hahn, Murray, Carvalho (2020) \doi{10.1214/19-BA1195} for causal inference. Enables model serialization and parallel sampling and provides a low-level interface for custom stochastic forest samplers. 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://stochtree.ai/} 15 | \item \url{https://github.com/StochasticTree/stochtree} 16 | \item Report bugs at \url{https://github.com/StochasticTree/stochtree/issues} 17 | } 18 | 19 | } 20 | \author{ 21 | \strong{Maintainer}: Drew Herren \email{drewherrenopensource@gmail.com} (\href{https://orcid.org/0000-0003-4109-6611}{ORCID}) 22 | 23 | Authors: 24 | \itemize{ 25 | \item Richard Hahn 26 | \item Jared Murray 27 | \item Carlos Carvalho 28 | \item Jingyu He 29 | } 30 | 31 | Other contributors: 32 | \itemize{ 33 | \item Pedro Lima [contributor] 34 | \item stochtree contributors [copyright holder] 35 | \item Eigen contributors (C++ source uses the Eigen library for matrix operations, see inst/COPYRIGHTS) [copyright holder] 36 | \item xgboost contributors (C++ tree code and related operations include or are inspired by code from the xgboost library, see inst/COPYRIGHTS) [copyright holder] 37 | \item treelite contributors (C++ tree code and related operations include or are inspired by code from the treelite library, see inst/COPYRIGHTS) [copyright holder] 38 | \item Microsoft Corporation (C++ I/O and various project structure code include or are inspired by code from the LightGBM library, which is a copyright of Microsoft, see inst/COPYRIGHTS) [copyright holder] 39 | \item Niels Lohmann (C++ source uses the JSON for Modern C++ library for JSON operations, see inst/COPYRIGHTS) [copyright holder] 40 | \item Daniel Lemire (C++ source uses the fast_double_parser library internally, see inst/COPYRIGHTS) [copyright holder] 41 | \item Victor Zverovich (C++ source uses the fmt library internally, see inst/COPYRIGHTS) [copyright holder] 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel", 5 | "ninja", 6 | "cmake>=3.12", 7 | "numpy", 8 | "pandas", 9 | "scipy", 10 | "scikit-learn" 11 | ] 12 | build-backend = "setuptools.build_meta" 13 | 14 | [project] 15 | name = "stochtree" 16 | version = "0.1.0" 17 | dynamic = ["readme", "optional-dependencies", "license"] 18 | description = "Stochastic Tree Ensembles for Machine Learning and Causal Inference" 19 | requires-python = ">=3.8.0" 20 | classifiers = [ 21 | "Development Status :: 3 - Alpha", 22 | "Intended Audience :: Science/Research", 23 | "License :: OSI Approved :: MIT License", 24 | "Operating System :: MacOS", 25 | "Operating System :: Microsoft :: Windows", 26 | "Operating System :: POSIX :: Linux", 27 | "Programming Language :: Python :: 3.8", 28 | "Programming Language :: Python :: 3.9", 29 | "Programming Language :: Python :: 3.10", 30 | "Programming Language :: Python :: 3.11", 31 | "Programming Language :: Python :: 3.12", 32 | "Programming Language :: Python :: 3.13", 33 | "Topic :: Scientific/Engineering :: Artificial Intelligence" 34 | ] 35 | authors = [ 36 | {name = "Drew Herren", email = "drewherrenopensource@gmail.com"} 37 | ] 38 | 39 | [project.urls] 40 | Homepage = "https://stochtree.ai/" 41 | Documentation = "https://stochtree.ai/python_docs/index.html" 42 | Repository = "https://github.com/StochasticTree/stochtree" 43 | Issues = "https://github.com/StochasticTree/stochtree/issues" 44 | -------------------------------------------------------------------------------- /python_docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /python_docs/README.md: -------------------------------------------------------------------------------- 1 | # Python Package Documentation 2 | 3 | ## Building Documentation Locally 4 | 5 | The online documentation is built in the doc-specific `StochasticTree/stochtree-python` repo (see [here](https://github.com/StochasticTree/stochtree-python/blob/main/.github/workflows/docs.yml) for the Github workflow). 6 | To build the documentation locally, first ensure that you have [Sphinx](https://www.sphinx-doc.org/en/master/) installed, then navigate to the python package's main directory (i.e. `cd [path/to/stochtree]`), 7 | install the package, and run `sphinx-build` as below 8 | 9 | ``` 10 | pip install --upgrade pip 11 | pip install -r python_docs/requirements.txt 12 | pip install . 13 | sphinx-build -M html python_docs/source/ python_docs/build/ 14 | ``` 15 | 16 | ## Documentation Style 17 | 18 | Module (class, function, etc...) documentation follows [the numpy standard](https://numpydoc.readthedocs.io/en/latest/format.html#docstring-standard), 19 | applied in Sphinx using the [napoleon](https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html) extension. 20 | 21 | -------------------------------------------------------------------------------- /python_docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /python_docs/requirements.txt: -------------------------------------------------------------------------------- 1 | alabaster==0.7.13 2 | Babel==2.15.0 3 | beautifulsoup4==4.12.3 4 | certifi==2024.2.2 5 | charset-normalizer==3.3.2 6 | docutils==0.20.1 7 | furo==2024.5.6 8 | idna==3.7 9 | imagesize==1.4.1 10 | importlib_metadata==7.1.0 11 | Jinja2==3.1.4 12 | joblib==1.4.2 13 | MarkupSafe==2.1.5 14 | numpy==1.24.4 15 | packaging==24.0 16 | pandas==2.0.3 17 | pybind11==2.12.0 18 | Pygments==2.18.0 19 | python-dateutil==2.9.0.post0 20 | pytz==2024.1 21 | requests==2.32.2 22 | scikit-learn==1.3.2 23 | scipy==1.10.1 24 | six==1.16.0 25 | snowballstemmer==2.2.0 26 | soupsieve==2.5 27 | Sphinx==7.1.2 28 | sphinx-basic-ng==1.0.0b2 29 | sphinxcontrib-applehelp==1.0.4 30 | sphinxcontrib-devhelp==1.0.2 31 | sphinxcontrib-htmlhelp==2.0.1 32 | sphinxcontrib-jsmath==1.0.1 33 | sphinxcontrib-qthelp==1.0.3 34 | sphinxcontrib-serializinghtml==1.1.5 35 | threadpoolctl==3.5.0 36 | tzdata==2024.1 37 | urllib3==2.2.1 38 | zipp==3.18.2 39 | -------------------------------------------------------------------------------- /python_docs/source/api.rst: -------------------------------------------------------------------------------- 1 | StochTree API 2 | ============= 3 | 4 | BART 5 | ---- 6 | 7 | .. autoclass:: stochtree.bart.BARTModel 8 | :members: sample, predict 9 | 10 | BCF 11 | --- 12 | 13 | .. autoclass:: stochtree.bcf.BCFModel 14 | :members: sample, predict, predict_tau 15 | -------------------------------------------------------------------------------- /python_docs/source/causal.rst: -------------------------------------------------------------------------------- 1 | Causal Inference 2 | ================ 3 | 4 | This vignette provides a quick overview (using simulated data) of how to use ``stochtree`` for causal inference. 5 | Start by loading stochtree's ``BCFModel`` class and a number of other packages. 6 | 7 | .. code-block:: python 8 | 9 | import numpy as np 10 | import pandas as pd 11 | import seaborn as sns 12 | import matplotlib.pyplot as plt 13 | from stochtree import BCFModel 14 | from sklearn.model_selection import train_test_split 15 | 16 | Now, we generate a simulated causal inference problem 17 | 18 | .. code-block:: python 19 | 20 | # RNG 21 | random_seed = 101 22 | rng = np.random.default_rng(random_seed) 23 | 24 | # Generate covariates and basis 25 | n = 1000 26 | p_X = 5 27 | X = rng.uniform(0, 1, (n, p_X)) 28 | pi_X = 0.25 + 0.5*X[:,0] 29 | Z = rng.binomial(1, pi_X, n).astype(float) 30 | 31 | # Define the outcome mean functions (prognostic and treatment effects) 32 | mu_X = pi_X*5 33 | # tau_X = np.sin(X[:,1]*2*np.pi) 34 | tau_X = X[:,1]*2 35 | 36 | # Generate outcome 37 | epsilon = rng.normal(0, 1, n) 38 | y = mu_X + tau_X*Z + epsilon 39 | 40 | Split the dataset into train and test sets 41 | 42 | .. code-block:: python 43 | 44 | sample_inds = np.arange(n) 45 | train_inds, test_inds = train_test_split(sample_inds, test_size=0.5) 46 | X_train = X[train_inds,:] 47 | X_test = X[test_inds,:] 48 | Z_train = Z[train_inds] 49 | Z_test = Z[test_inds] 50 | y_train = y[train_inds] 51 | y_test = y[test_inds] 52 | pi_train = pi_X[train_inds] 53 | pi_test = pi_X[test_inds] 54 | mu_train = mu_X[train_inds] 55 | mu_test = mu_X[test_inds] 56 | tau_train = tau_X[train_inds] 57 | tau_test = tau_X[test_inds] 58 | 59 | Initialize and run a BCF sampler for 1000 iterations (after 10 "warm-start" draws) 60 | 61 | .. code-block:: python 62 | 63 | bcf_model = BCFModel() 64 | bcf_model.sample(X_train, Z_train, y_train, pi_train, X_test, Z_test, pi_test, num_gfr=10, num_mcmc=1000) 65 | -------------------------------------------------------------------------------- /python_docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # import os 7 | # import sys 8 | # sys.path.insert(0, os.path.abspath('../..')) 9 | 10 | # -- Project information ----------------------------------------------------- 11 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 12 | 13 | project = 'stochtree' 14 | copyright = '2024, Drew Herren' 15 | author = 'Drew Herren' 16 | release = '0.0.1' 17 | 18 | # -- General configuration --------------------------------------------------- 19 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 20 | 21 | extensions = [ 22 | 'sphinx.ext.autodoc', 23 | 'sphinx.ext.autosummary', 24 | ] 25 | 26 | templates_path = ['_templates'] 27 | exclude_patterns = [] 28 | 29 | 30 | 31 | # -- Options for HTML output ------------------------------------------------- 32 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 33 | 34 | html_theme = 'furo' 35 | html_static_path = ['_static'] 36 | -------------------------------------------------------------------------------- /python_docs/source/index.rst: -------------------------------------------------------------------------------- 1 | StochTree 2 | ========= 3 | 4 | ``stochtree`` runs stochastic machine learning algorithms for supervised learning and causal inference. 5 | For details on installing the package, see the :doc:`Installation ` page. Once you have ``stochtree`` installed, 6 | the :doc:`Supervised Learning ` and :doc:`Causal Inference ` vignettes provide some guidance on 7 | using the package for your use case. 8 | 9 | .. We also support a lower-level interface to the underlying C++ data structures which can allow for custom sampling routines 10 | .. (i.e. interspersing a BART forest with a neural network, a complicated variance sampler, etc...). This interface is introduced 11 | .. in the :doc:`Prototype ` vignette. 12 | 13 | For complete function / class documentation, see the :doc:`API ` page. 14 | 15 | .. toctree:: 16 | install 17 | supervised 18 | causal 19 | api 20 | -------------------------------------------------------------------------------- /python_docs/source/supervised.rst: -------------------------------------------------------------------------------- 1 | Supervised Learning 2 | =================== 3 | 4 | This vignette provides a quick overview (using simulated data) of how to use ``stochtree`` for supervised learning. 5 | Start by loading stochtree's ``BARTModel`` class and a number of other packages. 6 | 7 | .. code-block:: python 8 | 9 | import numpy as np 10 | import pandas as pd 11 | import seaborn as sns 12 | import matplotlib.pyplot as plt 13 | from stochtree import BARTModel 14 | from sklearn.model_selection import train_test_split 15 | 16 | Now, we generate a simulated prediction problem 17 | 18 | .. code-block:: python 19 | 20 | # RNG 21 | random_seed = 1234 22 | rng = np.random.default_rng(random_seed) 23 | 24 | # Generate covariates and basis 25 | n = 1000 26 | p_X = 10 27 | p_W = 1 28 | X = rng.uniform(0, 1, (n, p_X)) 29 | W = rng.uniform(0, 1, (n, p_W)) 30 | 31 | # Define the outcome mean function 32 | def outcome_mean(X, W): 33 | return np.where( 34 | (X[:,0] >= 0.0) & (X[:,0] < 0.25), -7.5 * W[:,0], 35 | np.where( 36 | (X[:,0] >= 0.25) & (X[:,0] < 0.5), -2.5 * W[:,0], 37 | np.where( 38 | (X[:,0] >= 0.5) & (X[:,0] < 0.75), 2.5 * W[:,0], 39 | 7.5 * W[:,0] 40 | ) 41 | ) 42 | ) 43 | 44 | # Generate outcome 45 | epsilon = rng.normal(0, 1, n) 46 | y = outcome_mean(X, W) + epsilon 47 | 48 | # Standardize outcome 49 | y_bar = np.mean(y) 50 | y_std = np.std(y) 51 | resid = (y-y_bar)/y_std 52 | 53 | Split the dataset into train and test sets 54 | 55 | .. code-block:: python 56 | 57 | sample_inds = np.arange(n) 58 | train_inds, test_inds = train_test_split(sample_inds, test_size=0.5) 59 | X_train = X[train_inds,:] 60 | X_test = X[test_inds,:] 61 | basis_train = W[train_inds,:] 62 | basis_test = W[test_inds,:] 63 | y_train = y[train_inds] 64 | y_test = y[test_inds] 65 | 66 | Initialize and run a BART sampler for 100 iterations (after 10 "warm-start" draws) 67 | 68 | .. code-block:: python 69 | 70 | bart_model = BARTModel() 71 | bart_model.sample(X_train=X_train, y_train=y_train, leaf_basis_train=basis_train, X_test=X_test, leaf_basis_test=basis_test, num_gfr=10, num_mcmc=100) 72 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | exceptiongroup==1.2.1 2 | iniconfig==2.0.0 3 | joblib==1.4.2 4 | numpy==1.24.4 5 | packaging==24.1 6 | pandas==2.0.3 7 | pluggy==1.5.0 8 | pybind11==2.12.0 9 | pytest==8.2.2 10 | python-dateutil==2.9.0.post0 11 | pytz==2024.1 12 | scikit-learn==1.3.2 13 | scipy==1.10.1 14 | six==1.16.0 15 | threadpoolctl==3.5.0 16 | tomli==2.0.1 17 | tzdata==2024.1 18 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | # package root 2 | PKGROOT=.. 3 | 4 | STOCHTREE_CPPFLAGS = -DSTOCHTREE_R_BUILD 5 | 6 | # PKG_CPPFLAGS= -I$(PKGROOT)/include -I$(PKGROOT)/deps/eigen -I$(PKGROOT)/deps/fmt/include -I$(PKGROOT)/deps/fast_double_parser/include -I$(PKGROOT)/deps/boost_math/include $(STOCHTREE_CPPFLAGS) 7 | PKG_CPPFLAGS= -I$(PKGROOT)/include -I$(PKGROOT)/deps/eigen -I$(PKGROOT)/deps/fmt/include -I$(PKGROOT)/deps/fast_double_parser/include $(STOCHTREE_CPPFLAGS) 8 | 9 | CXX_STD=CXX17 10 | 11 | OBJECTS = \ 12 | forest.o \ 13 | kernel.o \ 14 | R_data.o \ 15 | R_random_effects.o \ 16 | sampler.o \ 17 | serialization.o \ 18 | cpp11.o \ 19 | container.o \ 20 | cutpoint_candidates.o \ 21 | data.o \ 22 | io.o \ 23 | leaf_model.o \ 24 | partition_tracker.o \ 25 | random_effects.o \ 26 | tree.o 27 | -------------------------------------------------------------------------------- /src/kernel.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "stochtree_types.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | typedef Eigen::Map> DoubleMatrixType; 10 | typedef Eigen::Map> IntMatrixType; 11 | 12 | [[cpp11::register]] 13 | int forest_container_get_max_leaf_index_cpp(cpp11::external_pointer forest_container, int forest_num) { 14 | return forest_container->GetEnsemble(forest_num)->GetMaxLeafIndex() - 1; 15 | } 16 | 17 | [[cpp11::register]] 18 | cpp11::writable::integers_matrix<> compute_leaf_indices_cpp( 19 | cpp11::external_pointer forest_container, 20 | cpp11::doubles_matrix<> covariates, cpp11::integers forest_nums 21 | ) { 22 | // Wrap an Eigen Map around the raw data of the covariate matrix 23 | StochTree::data_size_t num_obs = covariates.nrow(); 24 | int num_covariates = covariates.ncol(); 25 | double* covariate_data_ptr = REAL(PROTECT(covariates)); 26 | DoubleMatrixType covariates_eigen(covariate_data_ptr, num_obs, num_covariates); 27 | 28 | // Extract other output dimensions 29 | int num_trees = forest_container->NumTrees(); 30 | int num_samples = forest_nums.size(); 31 | 32 | // Declare outputs 33 | cpp11::writable::integers_matrix<> output_matrix(num_obs*num_trees, num_samples); 34 | 35 | // Wrap Eigen Maps around kernel and kernel inverse matrices 36 | int* output_data_ptr = INTEGER(PROTECT(output_matrix)); 37 | IntMatrixType output_eigen(output_data_ptr, num_obs*num_trees, num_samples); 38 | 39 | // Compute leaf indices 40 | std::vector forest_indices(forest_nums.begin(), forest_nums.end()); 41 | forest_container->PredictLeafIndicesInplace(covariates_eigen, output_eigen, forest_indices, num_trees, num_obs); 42 | 43 | // Unprotect pointers to R data 44 | UNPROTECT(2); 45 | 46 | // Return matrix 47 | return output_matrix; 48 | } 49 | -------------------------------------------------------------------------------- /src/stochtree_types.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | enum ForestLeafModel { 10 | kConstant, 11 | kUnivariateRegression, 12 | kMultivariateRegression 13 | }; 14 | -------------------------------------------------------------------------------- /stochtree/__init__.py: -------------------------------------------------------------------------------- 1 | from .bart import BARTModel 2 | from .bcf import BCFModel 3 | from .calibration import calibrate_global_error_variance 4 | from .config import ForestModelConfig, GlobalModelConfig 5 | from .data import Dataset, Residual 6 | from .forest import Forest, ForestContainer 7 | from .kernel import ( 8 | compute_forest_leaf_indices, 9 | compute_forest_max_leaf_index 10 | ) 11 | from .preprocessing import CovariatePreprocessor 12 | from .random_effects import ( 13 | RandomEffectsContainer, 14 | RandomEffectsDataset, 15 | RandomEffectsModel, 16 | RandomEffectsTracker, 17 | ) 18 | from .sampler import ( 19 | RNG, 20 | ForestSampler, 21 | GlobalVarianceModel, 22 | LeafVarianceModel 23 | ) 24 | from .serialization import JSONSerializer 25 | from .utils import ( 26 | NotSampledError, 27 | _check_array_integer, 28 | _check_array_numeric, 29 | _check_is_int, 30 | _check_is_numeric, 31 | _check_matrix_square, 32 | _standardize_array_to_list, 33 | _standardize_array_to_np, 34 | ) 35 | 36 | __all__ = [ 37 | "BARTModel", 38 | "BCFModel", 39 | "Dataset", 40 | "Residual", 41 | "ForestContainer", 42 | "Forest", 43 | "CovariatePreprocessor", 44 | "RNG", 45 | "ForestSampler", 46 | "RandomEffectsContainer", 47 | "RandomEffectsDataset", 48 | "RandomEffectsModel", 49 | "RandomEffectsTracker", 50 | "GlobalVarianceModel", 51 | "LeafVarianceModel", 52 | "ForestModelConfig", 53 | "GlobalModelConfig", 54 | "JSONSerializer", 55 | "NotSampledError", 56 | "_check_array_integer", 57 | "_check_array_numeric", 58 | "_check_is_int", 59 | "_check_is_numeric", 60 | "_check_matrix_square", 61 | "_standardize_array_to_list", 62 | "_standardize_array_to_np", 63 | "compute_forest_leaf_indices", 64 | "compute_forest_max_leaf_index", 65 | "calibrate_global_error_variance", 66 | ] 67 | -------------------------------------------------------------------------------- /test/R/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview 7 | # * https://testthat.r-lib.org/articles/special-files.html 8 | 9 | library(testthat) 10 | library(stochtree) 11 | 12 | test_check("stochtree") 13 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | # Unit Testing 2 | 3 | This directory contains unit tests for the R and Python packages as well as the C++ core. Below, we detail how to run each test suite. 4 | 5 | ## R Package 6 | 7 | To run the R unit tests, first build the package (either via `R CMD build` at the command line or via "Shift + Command + B" in RStudio). 8 | Then in an R console, run `testthat::test_dir("test/R")`. 9 | 10 | ## Python Package 11 | 12 | To run the Python unit tests, first build the package at the command line (activating your virtual environment, if desired, beforehand): 13 | 14 | ```{bash} 15 | rm -rf stochtree.egg-info; rm -rf .pytest_cache; rm -rf build 16 | pip install . 17 | ``` 18 | 19 | Then run 20 | 21 | ```{bash} 22 | pytest test/python 23 | ``` 24 | 25 | ## C++ Core 26 | 27 | To run the C++ unit tests, you must build the test executable, which is activated via the `BUILD_TEST` CMake option 28 | 29 | ```{bash} 30 | rm -rf build 31 | mkdir build 32 | cmake -S . -B build -DBUILD_TEST=ON -DBUILD_DEBUG_TARGETS=OFF 33 | cmake --build build 34 | ``` 35 | 36 | Then run the unit test suite by running the test executable 37 | 38 | ```{bash} 39 | ./build/teststochtree 40 | ``` 41 | -------------------------------------------------------------------------------- /test/cpp/test_category_tracker.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | TEST(CategorySampleTracker, BasicOperations) { 13 | // Create a vector of categorical data 14 | std::vector category_data { 15 | 3, 4, 3, 2, 2, 4, 3, 3, 3, 4, 3, 4 16 | }; 17 | 18 | // Create a CategorySamplerTracker 19 | StochTree::CategorySampleTracker category_tracker = StochTree::CategorySampleTracker(category_data); 20 | 21 | // Extract the label map 22 | std::map label_map = category_tracker.GetLabelMap(); 23 | std::map expected_label_map {{2, 0}, {3, 1}, {4, 2}}; 24 | 25 | // Check that the map was constructed as expected 26 | ASSERT_EQ(label_map[2], 0); 27 | ASSERT_EQ(label_map[3], 1); 28 | ASSERT_EQ(label_map[4], 2); 29 | ASSERT_EQ(label_map, expected_label_map); 30 | } 31 | -------------------------------------------------------------------------------- /test/cpp/testutils.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2022 Microsoft Corporation. All rights reserved. 3 | * Licensed under the MIT License. See LICENSE file in the project root for license information. 4 | */ 5 | #ifndef STOCHTREE_TESTUTILS_H_ 6 | #define STOCHTREE_TESTUTILS_H_ 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace StochTree { 13 | 14 | namespace TestUtils { 15 | 16 | struct TestDataset { 17 | Eigen::Matrix covariates; 18 | Eigen::Matrix omega; 19 | Eigen::Matrix rfx_basis; 20 | Eigen::VectorXd outcome; 21 | std::vector rfx_groups; 22 | int n; 23 | int x_cols; 24 | int omega_cols; 25 | int rfx_basis_cols; 26 | int rfx_num_groups; 27 | bool row_major{true}; 28 | }; 29 | 30 | /*! Creates a small dataset (10 observations) */ 31 | TestDataset LoadSmallDatasetUnivariateBasis(); 32 | 33 | /*! Creates a small dataset (10 observations) with a multivariate basis for leaf regression applications */ 34 | TestDataset LoadSmallDatasetMultivariateBasis(); 35 | 36 | /*! Creates a small dataset (10 observations) with a multivariate basis and several random effects terms */ 37 | TestDataset LoadSmallRFXDatasetMultivariateBasis(); 38 | 39 | /*! Creates a modest dataset (100 observations) */ 40 | TestDataset LoadMediumDatasetUnivariateBasis(); 41 | 42 | } // namespace TestUtils 43 | 44 | } // namespace StochTree 45 | 46 | #endif // STOCHTREE_TESTUTILS_H_ 47 | -------------------------------------------------------------------------------- /test/python/test_calibration.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from scipy.stats import gamma 4 | from sklearn import linear_model 5 | from sklearn.metrics import mean_squared_error 6 | 7 | from stochtree import calibrate_global_error_variance 8 | 9 | 10 | class TestCalibration: 11 | def test_full_rank(self): 12 | n = 100 13 | p = 5 14 | nu = 3 15 | q = 0.9 16 | X = np.random.uniform(size=(n, p)) 17 | y = 1 + X[:, 0] * 0.1 - X[:, 1] * 0.2 + np.random.normal(size=n) 18 | y_std = (y - np.mean(y)) / np.std(y) 19 | reg_model = linear_model.LinearRegression() 20 | reg_model.fit(X, y_std) 21 | mse = mean_squared_error(y_std, reg_model.predict(X)) 22 | lamb = calibrate_global_error_variance(X=X, y=y, nu=nu, q=q, standardize=True) 23 | assert lamb == pytest.approx((mse * gamma.ppf(1 - q, nu)) / nu) 24 | 25 | def test_rank_deficient(self): 26 | n = 100 27 | p = 5 28 | nu = 3 29 | q = 0.9 30 | X = np.random.uniform(size=(n, p)) 31 | X[:, 4] = X[:, 2] 32 | y = 1 + X[:, 0] * 0.1 - X[:, 1] * 0.2 + np.random.normal(size=n) 33 | y_std = (y - np.mean(y)) / np.std(y) 34 | reg_model = linear_model.LinearRegression() 35 | reg_model.fit(X, y_std) 36 | mse = mean_squared_error(y_std, reg_model.predict(X)) 37 | if reg_model.rank_ < p: 38 | with pytest.warns(UserWarning): 39 | lamb = calibrate_global_error_variance( 40 | X=X, y=y, nu=nu, q=q, standardize=True 41 | ) 42 | else: 43 | lamb = calibrate_global_error_variance( 44 | X=X, y=y, nu=nu, q=q, standardize=True 45 | ) 46 | assert lamb == pytest.approx((mse * gamma.ppf(1 - q, nu)) / nu) 47 | 48 | def test_overdetermined(self): 49 | n = 100 50 | p = 101 51 | nu = 3 52 | q = 0.9 53 | X = np.random.uniform(size=(n, p)) 54 | y = 1 + X[:, 0] * 0.1 - X[:, 1] * 0.2 + np.random.normal(size=n) 55 | y_std = (y - np.mean(y)) / np.std(y) 56 | reg_model = linear_model.LinearRegression() 57 | reg_model.fit(X, y_std) 58 | with pytest.warns(UserWarning): 59 | lamb = calibrate_global_error_variance( 60 | X=X, y=y, nu=nu, q=q, standardize=True 61 | ) 62 | assert lamb == pytest.approx(np.var(y) * (gamma.ppf(1 - q, nu)) / nu) 63 | -------------------------------------------------------------------------------- /test/python/test_config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from stochtree.config import ForestModelConfig, GlobalModelConfig 5 | 6 | 7 | class TestConfig: 8 | def test_forest_config(self): 9 | with pytest.warns(): 10 | _ = ForestModelConfig(num_trees=10, num_features=5, num_observations=100) 11 | _ = ForestModelConfig(num_trees=1, num_features=1, num_observations=1) 12 | _ = ForestModelConfig( 13 | num_trees=10, 14 | num_features=5, 15 | num_observations=100, 16 | feature_types=[0, 0, 0, 0, 1], 17 | ) 18 | _ = ForestModelConfig( 19 | num_trees=1, num_features=1, num_observations=1, feature_types=[2] 20 | ) 21 | _ = ForestModelConfig( 22 | num_trees=10, 23 | num_features=5, 24 | num_observations=100, 25 | variable_weights=[0.2, 0.2, 0.2, 0.2, 0.2], 26 | ) 27 | _ = ForestModelConfig( 28 | num_trees=1, num_features=1, num_observations=1, variable_weights=[1.0] 29 | ) 30 | 31 | with pytest.raises(ValueError): 32 | _ = ForestModelConfig() 33 | _ = ForestModelConfig( 34 | num_trees=10, 35 | num_features=6, 36 | num_observations=100, 37 | feature_types=[0, 0, 0, 0, 1], 38 | ) 39 | _ = ForestModelConfig( 40 | num_trees=10, 41 | num_features=1, 42 | num_observations=100, 43 | feature_types=[0, 0, 0, 0, 1], 44 | ) 45 | _ = ForestModelConfig( 46 | num_trees=10, 47 | num_features=6, 48 | num_observations=100, 49 | variable_weights=[0.2, 0.2, 0.2, 0.2, 0.2], 50 | ) 51 | _ = ForestModelConfig( 52 | num_trees=10, 53 | num_features=1, 54 | num_observations=100, 55 | variable_weight=[0.2, 0.2, 0.2, 0.2, 0.2], 56 | ) 57 | _ = ForestModelConfig( 58 | num_trees=10, 59 | num_features=1, 60 | num_observations=100, 61 | leaf_dimension=2, 62 | leaf_model_scale=np.array([2, 3], [3, 4], [5, 6]), 63 | ) 64 | _ = ForestModelConfig( 65 | num_trees=10, num_features=1, num_observations=100, leaf_model_type=4 66 | ) 67 | _ = ForestModelConfig( 68 | num_trees=10, num_features=1, num_observations=100, leaf_model_type=-1 69 | ) 70 | 71 | def test_global_config(self): 72 | with pytest.raises(ValueError): 73 | _ = GlobalModelConfig(global_error_variance=0.0) 74 | _ = GlobalModelConfig(global_error_variance=-1.0) 75 | -------------------------------------------------------------------------------- /test/python/test_kernel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from stochtree import ( 5 | Dataset, 6 | Forest, 7 | ForestContainer, 8 | compute_forest_leaf_indices, 9 | compute_forest_max_leaf_index 10 | ) 11 | 12 | 13 | class TestKernel: 14 | def test_forest(self): 15 | # Create dataset 16 | X = np.array( 17 | [[1.5, 8.7, 1.2], 18 | [2.7, 3.4, 5.4], 19 | [3.6, 1.2, 9.3], 20 | [4.4, 5.4, 10.4], 21 | [5.3, 9.3, 3.6], 22 | [6.1, 10.4, 4.4]] 23 | ) 24 | n, p = X.shape 25 | num_trees = 2 26 | output_dim = 1 27 | forest_dataset = Dataset() 28 | forest_dataset.add_covariates(X) 29 | forest_samples = ForestContainer(num_trees, output_dim, True, False) 30 | 31 | # Initialize a forest with constant root predictions 32 | forest_samples.add_sample(0.) 33 | 34 | # Split the root of the first tree in the ensemble at X[,1] > 4.0 35 | forest_samples.add_numeric_split(0, 0, 0, 0, 4.0, -5., 5.) 36 | 37 | # Check that regular and "raw" predictions are the same (since the leaf is constant) 38 | computed = compute_forest_leaf_indices(forest_samples, X) 39 | max_leaf_index = compute_forest_max_leaf_index(forest_samples) 40 | expected = np.array([ 41 | [0], 42 | [0], 43 | [0], 44 | [1], 45 | [1], 46 | [1], 47 | [2], 48 | [2], 49 | [2], 50 | [2], 51 | [2], 52 | [2] 53 | ]) 54 | 55 | # Assertion 56 | np.testing.assert_almost_equal(computed, expected) 57 | assert max_leaf_index == [2] 58 | 59 | # Split the left leaf of the first tree in the ensemble at X[,2] > 4.0 60 | forest_samples.add_numeric_split(0, 0, 1, 1, 4.0, -7.5, -2.5) 61 | 62 | # Check that regular and "raw" predictions are the same (since the leaf is constant) 63 | computed = compute_forest_leaf_indices(forest_samples, X) 64 | max_leaf_index = compute_forest_max_leaf_index(forest_samples) 65 | expected = np.array([ 66 | [2], 67 | [1], 68 | [1], 69 | [0], 70 | [0], 71 | [0], 72 | [3], 73 | [3], 74 | [3], 75 | [3], 76 | [3], 77 | [3] 78 | ]) 79 | 80 | # Assertion 81 | np.testing.assert_almost_equal(computed, expected) 82 | assert max_leaf_index == [3] 83 | -------------------------------------------------------------------------------- /tools/debug/bart_profile.R: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ## Profiling BART on multiple platforms 3 | ################################################################################ 4 | 5 | library(stochtree) 6 | Rprof() 7 | 8 | start_time <- Sys.time() 9 | n <- 10000 10 | p <- 50 11 | X <- matrix(runif(n*p), ncol = p) 12 | f_XW <- ( 13 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 14 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 15 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 16 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 17 | ) 18 | noise_sd <- 1 19 | y <- f_XW + rnorm(n, 0, noise_sd) 20 | test_set_pct <- 0.2 21 | n_test <- round(test_set_pct*n) 22 | n_train <- n - n_test 23 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 24 | train_inds <- (1:n)[!((1:n) %in% test_inds)] 25 | X_test <- X[test_inds,] 26 | X_train <- X[train_inds,] 27 | y_test <- y[test_inds] 28 | y_train <- y[train_inds] 29 | bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test) 30 | end_time <- Sys.time() 31 | print(paste("runtime:", end_time - start_time)) 32 | 33 | summaryRprof() 34 | Rprof(NULL) 35 | -------------------------------------------------------------------------------- /tools/debug/continuous_treatment_bcf.R: -------------------------------------------------------------------------------- 1 | library(stochtree) 2 | 3 | # Generate data with a continuous treatment 4 | n <- 500 5 | snr <- 3 6 | x1 <- rnorm(n) 7 | x2 <- rnorm(n) 8 | x3 <- rnorm(n) 9 | x4 <- rnorm(n) 10 | x5 <- rnorm(n) 11 | X <- cbind(x1,x2,x3,x4,x5) 12 | p <- ncol(X) 13 | mu_x <- 1 + 2*x1 - 4*(x2 < 0) + 4*(x2 >= 0) + 3*(abs(x3) - sqrt(2/pi)) 14 | tau_x <- 1 + 2*x4 15 | u <- runif(n) 16 | pi_x <- ((mu_x-1)/4) + 4*(u-0.5) 17 | Z <- pi_x + rnorm(n,0,1) 18 | E_XZ <- mu_x + Z*tau_x 19 | y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr) 20 | X <- as.data.frame(X) 21 | 22 | # Split data into test and train sets 23 | test_set_pct <- 0.2 24 | n_test <- round(test_set_pct*n) 25 | n_train <- n - n_test 26 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 27 | train_inds <- (1:n)[!((1:n) %in% test_inds)] 28 | X_test <- X[test_inds,] 29 | X_train <- X[train_inds,] 30 | pi_test <- pi_x[test_inds] 31 | pi_train <- pi_x[train_inds] 32 | Z_test <- Z[test_inds] 33 | Z_train <- Z[train_inds] 34 | y_test <- y[test_inds] 35 | y_train <- y[train_inds] 36 | mu_test <- mu_x[test_inds] 37 | mu_train <- mu_x[train_inds] 38 | tau_test <- tau_x[test_inds] 39 | tau_train <- tau_x[train_inds] 40 | 41 | # Run continuous treatment BCF 42 | num_gfr <- 10 43 | num_burnin <- 0 44 | num_mcmc <- 1000 45 | num_samples <- num_gfr + num_burnin + num_mcmc 46 | bcf_model_warmstart <- bcf( 47 | X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, 48 | X_test = X_test, Z_test = Z_test, pi_test = pi_test, 49 | num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 50 | sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F, verbose = T 51 | ) 52 | 53 | # Inspect results 54 | mu_hat_train <- rowMeans(bcf_model_warmstart$mu_hat_train) 55 | tau_hat_train <- rowMeans(bcf_model_warmstart$tau_hat_train) 56 | mu_hat_test <- rowMeans(bcf_model_warmstart$mu_hat_test) 57 | tau_hat_test <- rowMeans(bcf_model_warmstart$tau_hat_test) 58 | plot(mu_train, mu_hat_train); abline(0,1,lwd=3,lty=3,col="red") 59 | plot(tau_train, tau_hat_train); abline(0,1,lwd=3,lty=3,col="red") 60 | plot(mu_test, mu_hat_test); abline(0,1,lwd=3,lty=3,col="red") 61 | plot(tau_test, tau_hat_test); abline(0,1,lwd=3,lty=3,col="red") 62 | -------------------------------------------------------------------------------- /tools/debug/dgps.R: -------------------------------------------------------------------------------- 1 | dgp_levels <- c("dgp_prediction_partitioned_lm", "dgp_prediction_step_function") 2 | 3 | dgp_prediction_partitioned_lm <- function(n, p_x, p_w, snr = NULL) { 4 | X <- matrix(runif(n*p_x), ncol = p_x) 5 | W <- matrix(runif(n*p_w), ncol = p_w) 6 | f_XW <- ( 7 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + 8 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + 9 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + 10 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1]) 11 | ) 12 | if (!is.null(snr)) { 13 | if (snr > 0) { 14 | noise_sd <- sd(f_XW) / snr 15 | snr_used <- snr 16 | } else { 17 | noise_sd <- 1 18 | snr_used <- sd(f_XW) / noise_sd 19 | } 20 | } else { 21 | noise_sd <- 1 22 | snr_used <- sd(f_XW) / noise_sd 23 | } 24 | y <- f_XW + rnorm(n, 0, noise_sd) 25 | return(list(has_basis=T,X=X,W=W,y=y,noise_sd=noise_sd,snr=snr_used)) 26 | } 27 | 28 | dgp_prediction_step_function <- function(n, p_x, snr = NULL) { 29 | X <- matrix(runif(n*p_x), ncol = p_x) 30 | f_XW <- ( 31 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 32 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 33 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 34 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 35 | ) 36 | if (!is.null(snr)) { 37 | if (snr > 0) { 38 | noise_sd <- sd(f_XW) / snr 39 | snr_used <- snr 40 | } else { 41 | noise_sd <- 1 42 | snr_used <- sd(f_XW) / noise_sd 43 | } 44 | } else { 45 | noise_sd <- 1 46 | snr_used <- sd(f_XW) / noise_sd 47 | } 48 | y <- f_XW + rnorm(n, 0, noise_sd) 49 | return(list(has_basis=F,X=X,W=NULL,y=y,noise_sd=noise_sd,snr=snr_used)) 50 | } 51 | -------------------------------------------------------------------------------- /tools/debug/heteroskedastic_bart.R: -------------------------------------------------------------------------------- 1 | # Load libraries 2 | library(stochtree) 3 | library(here) 4 | 5 | # Load train and test data 6 | from_file <- T 7 | if (from_file) { 8 | project_dir <- here() 9 | train_set_path <- file.path(project_dir, "debug", "data", "heterosked_train.csv") 10 | test_set_path <- file.path(project_dir, "debug", "data", "heterosked_test.csv") 11 | train_df <- read.csv(train_set_path) 12 | test_df <- read.csv(test_set_path) 13 | y_train <- train_df[,1] 14 | y_test <- test_df[,1] 15 | X_train <- train_df[,2:11] 16 | X_test <- test_df[,2:11] 17 | f_x_train <- train_df[,12] 18 | f_x_test <- test_df[,12] 19 | s_x_train <- train_df[,13] 20 | s_x_test <- test_df[,13] 21 | } else { 22 | n <- 500 23 | p_x <- 10 24 | X <- matrix(runif(n*p_x), ncol = p_x) 25 | f_XW <- 0 26 | s_XW <- ( 27 | ((0 <= X[,1]) & (0.25 > X[,1])) * (0.5*X[,3]) + 28 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (1*X[,3]) + 29 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2*X[,3]) + 30 | ((0.75 <= X[,1]) & (1 > X[,1])) * (3*X[,3]) 31 | ) 32 | y <- f_XW + rnorm(n, 0, 1)*s_XW 33 | 34 | # Split data into test and train sets 35 | test_set_pct <- 0.2 36 | n_test <- round(test_set_pct*n) 37 | n_train <- n - n_test 38 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 39 | train_inds <- (1:n)[!((1:n) %in% test_inds)] 40 | X_test <- as.data.frame(X[test_inds,]) 41 | X_train <- as.data.frame(X[train_inds,]) 42 | W_test <- NULL 43 | W_train <- NULL 44 | y_test <- y[test_inds] 45 | y_train <- y[train_inds] 46 | f_x_test <- f_XW[test_inds] 47 | f_x_train <- f_XW[train_inds] 48 | s_x_test <- s_XW[test_inds] 49 | s_x_train <- s_XW[train_inds] 50 | } 51 | 52 | # Run BART 53 | num_gfr <- 10 54 | num_burnin <- 0 55 | num_mcmc <- 200 56 | num_samples <- num_gfr + num_burnin + num_mcmc 57 | m <- 50 58 | a_0 <- sqrt(1/2) 59 | sigma0 <- 1/2 60 | bart_model <- stochtree::bart( 61 | X_train = X_train, y_train = y_train, X_test = X_test, 62 | num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 63 | num_trees_mean = 0, num_trees_variance = m, 64 | alpha_mean = 0.8, beta_mean = 3, min_samples_leaf_mean = 5, 65 | max_depth_mean = 3, alpha_variance = 0.95, beta_variance = 1.25, 66 | min_samples_leaf_variance = 1, max_depth_variance = 10, 67 | sample_sigma = F, sample_tau = F, keep_gfr = T, sigma2_init = sigma0, 68 | # a_forest = m/(a_0^2) + 1, b_forest = m/(a_0^2) 69 | a_forest = 3, b_forest = 2 70 | ) 71 | 72 | s_x_hat_train <- rowMeans(bart_model$sigma_x_hat_train) 73 | plot(s_x_hat_train, s_x_train, main = "Conditional std dev as a function of x", xlab = "Predicted", ylab = "Actual"); abline(0,1,col="red",lty=3,lwd=3) 74 | sqrt(mean((s_x_hat_train - s_x_train)^2)) 75 | 76 | s_x_hat_test <- rowMeans(bart_model$sigma_x_hat_test) 77 | plot(s_x_hat_test, s_x_test, main = "Conditional std dev as a function of x", xlab = "Predicted", ylab = "Actual"); abline(0,1,col="red",lty=3,lwd=3) 78 | sqrt(mean((s_x_hat_test - s_x_test)^2)) 79 | -------------------------------------------------------------------------------- /tools/debug/multichain_seq.R: -------------------------------------------------------------------------------- 1 | library(stochtree) 2 | n <- 500 3 | p_x <- 10 4 | p_w <- 1 5 | snr <- 3 6 | X <- matrix(runif(n*p_x), ncol = p_x) 7 | W <- matrix(runif(n*p_w), ncol = p_w) 8 | f_XW <- ( 9 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + 10 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + 11 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + 12 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1]) 13 | ) 14 | noise_sd <- sd(f_XW) / snr 15 | y <- f_XW + rnorm(n, 0, 1)*noise_sd 16 | test_set_pct <- 0.2 17 | n_test <- round(test_set_pct*n) 18 | n_train <- n - n_test 19 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 20 | train_inds <- (1:n)[!((1:n) %in% test_inds)] 21 | X_test <- as.data.frame(X[test_inds,]) 22 | X_train <- as.data.frame(X[train_inds,]) 23 | W_test <- W[test_inds,] 24 | W_train <- W[train_inds,] 25 | y_test <- y[test_inds] 26 | y_train <- y[train_inds] 27 | num_chains <- 4 28 | num_gfr <- 10 29 | num_burnin <- 0 30 | num_mcmc <- 100 31 | num_trees <- 100 32 | bart_models <- list() 33 | for (i in 1:num_chains) { 34 | bart_models[[i]] <- stochtree::bart( 35 | X_train = X_train, W_train = W_train, y_train = y_train, 36 | X_test = X_test, W_test = W_test, num_trees = num_trees, 37 | num_gfr = num_gfr, num_burnin = num_burnin, 38 | num_mcmc = num_mcmc, sample_sigma = T, sample_tau = T 39 | ) 40 | } 41 | json_string_list <- list() 42 | for (i in 1:num_chains) { 43 | json_string_list[[i]] <- saveBARTModelToJsonString(bart_models[[i]]) 44 | } 45 | combined_forests <- loadForestContainerCombinedJsonString(json_string_list, "forest_0") 46 | test_dataset <- createForestDataset(as.matrix(X_test), W_test) 47 | yhat_combined <- combined_forests$predict(test_dataset) -------------------------------------------------------------------------------- /tools/debug/multivariate_bart_debug.R: -------------------------------------------------------------------------------- 1 | library(stochtree) 2 | 3 | # Generate the data 4 | n <- 500 5 | p_x <- 10 6 | p_w <- 2 7 | snr <- 3 8 | X <- matrix(runif(n*p_x), ncol = p_x) 9 | W <- matrix(runif(n*p_w), ncol = p_w) 10 | f_XW <- ( 11 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + 12 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + 13 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + 14 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1]) 15 | ) 16 | noise_sd <- sd(f_XW) / snr 17 | y <- f_XW + rnorm(n, 0, 1)*noise_sd 18 | 19 | # Split data into test and train sets 20 | test_set_pct <- 0.2 21 | n_test <- round(test_set_pct*n) 22 | n_train <- n - n_test 23 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 24 | train_inds <- (1:n)[!((1:n) %in% test_inds)] 25 | X_test <- as.data.frame(X[test_inds,]) 26 | X_train <- as.data.frame(X[train_inds,]) 27 | W_test <- W[test_inds,] 28 | W_train <- W[train_inds,] 29 | y_test <- y[test_inds] 30 | y_train <- y[train_inds] 31 | 32 | # Sample BART model 33 | num_gfr <- 10 34 | num_burnin <- 0 35 | num_mcmc <- 100 36 | num_samples <- num_gfr + num_burnin + num_mcmc 37 | bart_params <- list(sample_sigma_global = T, sample_sigma_leaf = F, num_trees_mean = 100) 38 | bart_model_warmstart <- stochtree::bart( 39 | X_train = X_train, W_train = W_train, y_train = y_train, X_test = X_test, W_test = W_test, 40 | num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 41 | params = bart_params 42 | ) 43 | -------------------------------------------------------------------------------- /tools/debug/python_comparison_debug.R: -------------------------------------------------------------------------------- 1 | library(stochtree) 2 | 3 | df <- read.csv("debug/data/heterosked_train.csv") 4 | y <- df[,"y"] 5 | X <- df[,c('X1','X2','X3','X4','X5','X6','X7','X8','X9','X10')] 6 | 7 | num_gfr <- 0 8 | num_burnin <- 0 9 | num_mcmc <- 10 10 | general_params <- list(random_seed = 1234, standardize = F, sample_sigma2_global = T) 11 | bart_model <- stochtree::bart( 12 | X_train = X, y_train = y, 13 | num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 14 | general_params = general_params 15 | ) 16 | 17 | rowMeans(bart_model$y_hat_train)[1:20] 18 | bart_model$sigma2_global_samples -------------------------------------------------------------------------------- /tools/debug/r_kernel.R: -------------------------------------------------------------------------------- 1 | library(stochtree) 2 | library(tgp) 3 | 4 | # Generate the data, add many "noise variables" 5 | n <- 500 6 | p_extra <- 10 7 | friedman.df <- friedman.1.data(n=n) 8 | train_inds <- sort(sample(1:n, floor(0.8*n), replace = FALSE)) 9 | test_inds <- (1:n)[!((1:n) %in% train_inds)] 10 | X <- as.matrix(friedman.df)[,1:10] 11 | X <- cbind(X, matrix(runif(n*p_extra), ncol = p_extra)) 12 | y <- as.matrix(friedman.df)[,12] + rnorm(n,0,1)*(sd(as.matrix(friedman.df)[,11])/2) 13 | X_train <- X[train_inds,] 14 | X_test <- X[test_inds,] 15 | y_train <- y[train_inds] 16 | y_test <- y[test_inds] 17 | 18 | # Run BART on the data 19 | X_train <- as.data.frame(X_train) 20 | X_test <- as.data.frame(X_test) 21 | bart_params <- list(num_trees_mean=200, num_trees_variance=50) 22 | bart_model <- bart(X_train=X_train, y_train=y_train, X_test=X_test, params = bart_params, num_mcmc=1000) 23 | 24 | # Compute leaf indices for selected samples from the mean forest 25 | leaf_mat <- computeForestLeafIndices(bart_model, X_test, forest_type = "mean", 26 | forest_inds = c(99,100)) 27 | 28 | # Compute leaf indices for all samples from the mean forest 29 | leaf_mat <- computeForestLeafIndices(bart_model, X_test, forest_type = "mean") 30 | 31 | # Construct sparse matrix of leaf membership 32 | W <- Matrix::sparseMatrix(i=rep(1:length(y_test),200), j=leaf_mat[,forest_num] + 1, x=1) 33 | tcrossprod(W) 34 | 35 | # Compute leaf indices for selected samples from the variance forest 36 | leaf_mat <- computeForestLeafIndices(bart_model, X_test, forest_type = "variance", 37 | forest_inds = c(99,100)) 38 | 39 | # Compute leaf indices for all samples from the variance forest 40 | leaf_mat <- computeForestLeafIndices(bart_model, X_test, forest_type = "variance") -------------------------------------------------------------------------------- /tools/perf/bart_microbenchmark.R: -------------------------------------------------------------------------------- 1 | library(microbenchmark) 2 | library(stochtree) 3 | 4 | # Generate data needed to train BART model 5 | n <- 10000 6 | p <- 20 7 | X <- matrix(runif(n*p), ncol = p) 8 | f_XW <- ( 9 | ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 10 | ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 11 | ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 12 | ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) 13 | ) 14 | noise_sd <- 1 15 | y <- f_XW + rnorm(n, 0, noise_sd) 16 | test_set_pct <- 0.2 17 | n_test <- round(test_set_pct*n) 18 | n_train <- n - n_test 19 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 20 | train_inds <- (1:n)[!((1:n) %in% test_inds)] 21 | X_test <- X[test_inds,] 22 | X_train <- X[train_inds,] 23 | y_test <- y[test_inds] 24 | y_train <- y[train_inds] 25 | 26 | # Run microbenchmark 27 | bench_results <- microbenchmark( 28 | bart(X_train = X_train, y_train = y_train, X_test = X_test, num_gfr = 10, num_mcmc = 100), 29 | times = 10 30 | ) 31 | -------------------------------------------------------------------------------- /tools/perf/bcf_microbenchmark.R: -------------------------------------------------------------------------------- 1 | library(microbenchmark) 2 | library(stochtree) 3 | 4 | # Generate data needed to train BCF 5 | n <- 500 6 | x1 <- rnorm(n) 7 | x2 <- rnorm(n) 8 | x3 <- rnorm(n) 9 | x4 <- as.numeric(rbinom(n,1,0.5)) 10 | x5 <- as.numeric(sample(1:3,n,replace=TRUE)) 11 | X <- cbind(x1,x2,x3,x4,x5) 12 | p <- ncol(X) 13 | g <- function(x) {ifelse(x[,5]==1,2,ifelse(x[,5]==2,-1,4))} 14 | mu1 <- function(x) {1+g(x)+x[,1]*x[,3]} 15 | mu2 <- function(x) {1+g(x)+6*abs(x[,3]-1)} 16 | tau1 <- function(x) {rep(3,nrow(x))} 17 | tau2 <- function(x) {1+2*x[,2]*x[,4]} 18 | mu_x <- mu1(X) 19 | tau_x <- tau2(X) 20 | pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 21 | Z <- rbinom(n,1,pi_x) 22 | E_XZ <- mu_x + Z*tau_x 23 | snr <- 4 24 | y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr) 25 | test_set_pct <- 0.2 26 | n_test <- round(test_set_pct*n) 27 | n_train <- n - n_test 28 | test_inds <- sort(sample(1:n, n_test, replace = FALSE)) 29 | train_inds <- (1:n)[!((1:n) %in% test_inds)] 30 | X_test <- X[test_inds,] 31 | X_train <- X[train_inds,] 32 | pi_test <- pi_x[test_inds] 33 | pi_train <- pi_x[train_inds] 34 | Z_test <- Z[test_inds] 35 | Z_train <- Z[train_inds] 36 | y_test <- y[test_inds] 37 | y_train <- y[train_inds] 38 | mu_test <- mu_x[test_inds] 39 | mu_train <- mu_x[train_inds] 40 | tau_test <- tau_x[test_inds] 41 | tau_train <- tau_x[train_inds] 42 | 43 | # Run microbenchmark 44 | microbenchmark( 45 | bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, 46 | X_test = X_test, Z_test = Z_test, pi_test = pi_test, num_gfr = 10, 47 | num_mcmc = 1000, sample_sigma_leaf_tau = F) 48 | ) 49 | -------------------------------------------------------------------------------- /tools/setup/setup_r_dependencies.R: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ## This script is a modified version of the setup-r-dependencies Github action 3 | ## for local use and debugging. The source for the action is: 4 | ## https://github.com/r-lib/actions/blob/v2-branch/setup-r-dependencies/action.yaml 5 | ################################################################################ 6 | 7 | # Set site library path 8 | cat("::group::Set site library path\n") 9 | if (Sys.getenv("RENV_PROJECT") != "") { 10 | message("renv project detected, no need to set R_LIBS_SITE") 11 | cat(sprintf("R_LIB_FOR_PAK=%s\n", .libPaths()[1]), file = Sys.getenv("GITHUB_ENV"), append = TRUE) 12 | q("no") 13 | } 14 | lib <- .libPaths()[[1]] 15 | if (lib == "") { 16 | lib <- file.path(dirname(.Library), "site-library") 17 | Sys.setenv(R_LIBS_SITE = strsplit(lib, .Platform$path.sep)[[1]][[1]]) 18 | Sys.setenv(R_LIB_FOR_PAK = strsplit(lib, .Platform$path.sep)[[1]][[1]]) 19 | message("Setting R_LIBS_SITE to ", lib) 20 | message("Setting R_LIB_FOR_PAK to ", lib) 21 | } else { 22 | message("R_LIBS_SITE is already set to ", lib) 23 | Sys.setenv(R_LIB_FOR_PAK = strsplit(lib, .Platform$path.sep)[[1]][[1]]) 24 | message("R_LIB_FOR_PAK is now set to ", lib) 25 | } 26 | cat("::endgroup::\n") 27 | 28 | # Install pak 29 | cat("::group::Install pak\n") 30 | lib <- Sys.getenv("R_LIB_FOR_PAK") 31 | dir.create(lib, showWarnings = FALSE, recursive = TRUE) 32 | install.packages("pak", lib = lib, repos = sprintf( 33 | "https://r-lib.github.io/p/pak/%s/%s/%s/%s", 34 | "stable", 35 | .Platform$pkgType, 36 | R.Version()$os, 37 | R.Version()$arch 38 | )) 39 | cat("::endgroup::\n") 40 | 41 | # Dependency resolution 42 | cat("::group::Dependency resolution\n") 43 | cat("os-version=", sessionInfo()$running, "\n", sep = "", append = TRUE) 44 | r_version <- 45 | if (grepl("development", R.version.string)) { 46 | pdf(tempfile()) 47 | ge_ver <- attr(recordPlot(), "engineVersion") 48 | dev.off() 49 | paste0("R version ", getRversion(), " (ge:", ge_ver, "; iid:", .Internal(internalsID()), ")") 50 | } else { 51 | R.version.string 52 | } 53 | cat("r-version=", r_version, "\n", sep = "", append = TRUE) 54 | needs <- sprintf("Config/Needs/%s", strsplit("", "[[:space:],]+")[[1]]) 55 | deps <- strsplit("any::cpp11, any::R6, any::knitr, any::rmarkdown, any::Matrix, any::tgp, any::MASS, any::mvtnorm, any::ggplot2, any::latex2exp, any::testthat, any::sessioninfo", "[[:space:],]+")[[1]] 56 | extra_deps <- strsplit("any::testthat, any::decor, github::StochasticTree/stochtree-r", "[[:space:],]+")[[1]] 57 | dir.create("install_temp", showWarnings=FALSE) 58 | Sys.setenv("PKGCACHE_HTTP_VERSION" = "2") 59 | library(pak, lib.loc = Sys.getenv("R_LIB_FOR_PAK")) 60 | pak::lockfile_create( 61 | c(deps, extra_deps), 62 | lockfile = "install_temp/pkg.lock", 63 | upgrade = FALSE, 64 | dependencies = c(needs, "all"), 65 | lib = NULL 66 | ) 67 | cat("::endgroup::\n") 68 | cat("::group::Show Lockfile\n") 69 | writeLines(readLines("install_temp/pkg.lock")) 70 | cat("::endgroup::\n") 71 | 72 | # Install/Update packages 73 | cat("::group::Install/update packages\n") 74 | Sys.setenv("PKGCACHE_HTTP_VERSION" = "2") 75 | library(pak, lib.loc = Sys.getenv("R_LIB_FOR_PAK")) 76 | pak::lockfile_install("install_temp/pkg.lock") 77 | 78 | # Clean up temporary pkg.lock install directory 79 | unlink("install_temp", recursive = TRUE) 80 | cat("::endgroup::\n") 81 | --------------------------------------------------------------------------------