├── .Rbuildignore
├── .github
    └── workflows
    │   ├── cpp-test.yml
    │   ├── pypi-wheels.yml
    │   ├── python-test.yml
    │   ├── r-cran-branch.yml
    │   ├── r-devel-check.yml
    │   └── r-test.yml
├── .gitignore
├── .gitmodules
├── CHANGELOG.md
├── CMakeLists.txt
├── CPP_DEPS_LICENSE.md
├── DESCRIPTION
├── Doxyfile
├── LICENSE
├── LICENSE.md
├── MANIFEST.in
├── NAMESPACE
├── NEWS.md
├── R
    ├── bart.R
    ├── bcf.R
    ├── calibration.R
    ├── config.R
    ├── cpp11.R
    ├── data.R
    ├── forest.R
    ├── generics.R
    ├── kernel.R
    ├── model.R
    ├── random_effects.R
    ├── serialization.R
    ├── stochtree-package.R
    ├── utils.R
    └── variance.R
├── README.md
├── R_README.md
├── _pkgdown.yml
├── cmake
    └── Sanitizer.cmake
├── cran-bootstrap.R
├── cran-cleanup.R
├── cran-comments.md
├── debug
    ├── README.md
    ├── api_debug.cpp
    └── data
    │   ├── heterosked_test.csv
    │   └── heterosked_train.csv
├── demo
    ├── data
    │   ├── python_r_debug_test.csv
    │   └── python_r_debug_train.csv
    ├── debug
    │   ├── causal_inference.py
    │   ├── causal_inference_binary_outcome.py
    │   ├── classification.py
    │   ├── kernel.py
    │   ├── multi_chain.py
    │   ├── multivariate_treatment_causal_inference.py
    │   ├── parallel_multi_chain.py
    │   ├── r_comparison_debug.py
    │   ├── random_effects.py
    │   ├── rfx_serialization.py
    │   ├── serialization.py
    │   ├── supervised_learning.py
    │   └── supervised_learning_binary_outcome.py
    └── notebooks
    │   ├── causal_inference.ipynb
    │   ├── causal_inference_feature_subsets.ipynb
    │   ├── heteroskedastic_supervised_learning.ipynb
    │   ├── multivariate_treatment_causal_inference.ipynb
    │   ├── prototype_interface.ipynb
    │   ├── serialization.ipynb
    │   ├── supervised_learning.ipynb
    │   ├── supervised_learning_classification.ipynb
    │   └── tree_inspection.ipynb
├── include
    ├── nlohmann
    │   └── json.hpp
    └── stochtree
    │   ├── category_tracker.h
    │   ├── common.h
    │   ├── container.h
    │   ├── cutpoint_candidates.h
    │   ├── data.h
    │   ├── ensemble.h
    │   ├── export.h
    │   ├── gamma_sampler.h
    │   ├── ig_sampler.h
    │   ├── io.h
    │   ├── leaf_model.h
    │   ├── log.h
    │   ├── mainpage.h
    │   ├── meta.h
    │   ├── normal_sampler.h
    │   ├── partition_tracker.h
    │   ├── prior.h
    │   ├── random.h
    │   ├── random_effects.h
    │   ├── tree.h
    │   ├── tree_sampler.h
    │   └── variance_model.h
├── inst
    └── COPYRIGHTS
├── man
    ├── CppJson.Rd
    ├── CppRNG.Rd
    ├── Forest.Rd
    ├── ForestDataset.Rd
    ├── ForestModel.Rd
    ├── ForestModelConfig.Rd
    ├── ForestSamples.Rd
    ├── GlobalModelConfig.Rd
    ├── Outcome.Rd
    ├── RandomEffectSamples.Rd
    ├── RandomEffectsDataset.Rd
    ├── RandomEffectsModel.Rd
    ├── RandomEffectsTracker.Rd
    ├── bart.Rd
    ├── bcf.Rd
    ├── calibrateInverseGammaErrorVariance.Rd
    ├── computeForestLeafIndices.Rd
    ├── computeForestLeafVariances.Rd
    ├── computeForestMaxLeafIndex.Rd
    ├── convertPreprocessorToJson.Rd
    ├── createBARTModelFromCombinedJson.Rd
    ├── createBARTModelFromCombinedJsonString.Rd
    ├── createBARTModelFromJson.Rd
    ├── createBARTModelFromJsonFile.Rd
    ├── createBARTModelFromJsonString.Rd
    ├── createBCFModelFromCombinedJson.Rd
    ├── createBCFModelFromCombinedJsonString.Rd
    ├── createBCFModelFromJson.Rd
    ├── createBCFModelFromJsonFile.Rd
    ├── createBCFModelFromJsonString.Rd
    ├── createCppJson.Rd
    ├── createCppJsonFile.Rd
    ├── createCppJsonString.Rd
    ├── createCppRNG.Rd
    ├── createForest.Rd
    ├── createForestDataset.Rd
    ├── createForestModel.Rd
    ├── createForestModelConfig.Rd
    ├── createForestSamples.Rd
    ├── createGlobalModelConfig.Rd
    ├── createOutcome.Rd
    ├── createPreprocessorFromJson.Rd
    ├── createPreprocessorFromJsonString.Rd
    ├── createRandomEffectSamples.Rd
    ├── createRandomEffectsDataset.Rd
    ├── createRandomEffectsModel.Rd
    ├── createRandomEffectsTracker.Rd
    ├── getRandomEffectSamples.Rd
    ├── getRandomEffectSamples.bartmodel.Rd
    ├── getRandomEffectSamples.bcfmodel.Rd
    ├── loadForestContainerCombinedJson.Rd
    ├── loadForestContainerCombinedJsonString.Rd
    ├── loadForestContainerJson.Rd
    ├── loadRandomEffectSamplesCombinedJson.Rd
    ├── loadRandomEffectSamplesCombinedJsonString.Rd
    ├── loadRandomEffectSamplesJson.Rd
    ├── loadScalarJson.Rd
    ├── loadVectorJson.Rd
    ├── predict.bartmodel.Rd
    ├── predict.bcfmodel.Rd
    ├── preprocessPredictionData.Rd
    ├── preprocessTrainData.Rd
    ├── resetActiveForest.Rd
    ├── resetForestModel.Rd
    ├── resetRandomEffectsModel.Rd
    ├── resetRandomEffectsTracker.Rd
    ├── rootResetRandomEffectsModel.Rd
    ├── rootResetRandomEffectsTracker.Rd
    ├── sampleGlobalErrorVarianceOneIteration.Rd
    ├── sampleLeafVarianceOneIteration.Rd
    ├── saveBARTModelToJson.Rd
    ├── saveBARTModelToJsonFile.Rd
    ├── saveBARTModelToJsonString.Rd
    ├── saveBCFModelToJson.Rd
    ├── saveBCFModelToJsonFile.Rd
    ├── saveBCFModelToJsonString.Rd
    ├── savePreprocessorToJsonString.Rd
    └── stochtree-package.Rd
├── pyproject.toml
├── python_docs
    ├── Makefile
    ├── README.md
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── api.rst
    │   ├── causal.rst
    │   ├── conf.py
    │   ├── index.rst
    │   ├── install.rst
    │   └── supervised.rst
├── requirements.txt
├── setup.py
├── src
    ├── Makevars
    ├── R_data.cpp
    ├── R_random_effects.cpp
    ├── container.cpp
    ├── cpp11.cpp
    ├── cutpoint_candidates.cpp
    ├── data.cpp
    ├── forest.cpp
    ├── io.cpp
    ├── kernel.cpp
    ├── leaf_model.cpp
    ├── partition_tracker.cpp
    ├── py_stochtree.cpp
    ├── random_effects.cpp
    ├── sampler.cpp
    ├── serialization.cpp
    ├── stochtree_types.h
    └── tree.cpp
├── stochtree
    ├── __init__.py
    ├── bart.py
    ├── bcf.py
    ├── calibration.py
    ├── config.py
    ├── data.py
    ├── forest.py
    ├── kernel.py
    ├── preprocessing.py
    ├── random_effects.py
    ├── sampler.py
    ├── serialization.py
    └── utils.py
├── test
    ├── R
    │   ├── testthat.R
    │   └── testthat
    │   │   ├── test-bart.R
    │   │   ├── test-bcf.R
    │   │   ├── test-categorical.R
    │   │   ├── test-data-preprocessing.R
    │   │   ├── test-forest-container.R
    │   │   ├── test-forest.R
    │   │   ├── test-predict.R
    │   │   ├── test-residual.R
    │   │   └── test-serialization.R
    ├── README.md
    ├── cpp
    │   ├── test_category_tracker.cpp
    │   ├── test_cutpoints.cpp
    │   ├── test_data.cpp
    │   ├── test_forest.cpp
    │   ├── test_json.cpp
    │   ├── test_model.cpp
    │   ├── test_predict.cpp
    │   ├── test_random_effects.cpp
    │   ├── test_sorted_partition_tracker.cpp
    │   ├── test_tree.cpp
    │   ├── test_unsorted_partition_tracker.cpp
    │   ├── testutils.cpp
    │   └── testutils.h
    └── python
    │   ├── test_bart.py
    │   ├── test_bcf.py
    │   ├── test_calibration.py
    │   ├── test_config.py
    │   ├── test_forest.py
    │   ├── test_forest_container.py
    │   ├── test_json.py
    │   ├── test_kernel.py
    │   ├── test_predict.py
    │   ├── test_preprocessor.py
    │   ├── test_random_effects.py
    │   ├── test_residual.py
    │   └── test_utils.py
├── tools
    ├── data
    │   ├── python_r_debug_test.csv
    │   └── python_r_debug_train.csv
    ├── debug
    │   ├── additive_lm.R
    │   ├── bart_profile.R
    │   ├── bcf_json.R
    │   ├── bcf_rfx.R
    │   ├── continuous_treatment_bcf.R
    │   ├── debug.R
    │   ├── dgps.R
    │   ├── forest_reset_debug.R
    │   ├── heteroskedastic_bart.R
    │   ├── json_debug.R
    │   ├── multichain_seq.R
    │   ├── multivariate_bart_debug.R
    │   ├── parallel_warmstart.R
    │   ├── parallel_warmstart_bcf.R
    │   ├── python_comparison_debug.R
    │   ├── python_r_debug.R
    │   ├── r_kernel.R
    │   └── restricted_sweep.R
    ├── perf
    │   ├── bart_microbenchmark.R
    │   ├── bcf_microbenchmark.R
    │   ├── bcf_performance_metrics.R
    │   └── custom_loop_microbenchmark.R
    ├── setup
    │   └── setup_r_dependencies.R
    └── simulations
    │   ├── bart_comparison.R
    │   ├── bcf-sim-study.R
    │   └── bcf_comparison.R
└── vignettes
    ├── BayesianSupervisedLearning.Rmd
    ├── CausalInference.Rmd
    ├── CustomSamplingRoutine.Rmd
    ├── EnsembleKernel.Rmd
    ├── Heteroskedasticity.Rmd
    ├── ModelSerialization.Rmd
    ├── MultiChain.Rmd
    ├── PriorCalibration.Rmd
    ├── TreeInspection.Rmd
    └── vignettes.bib


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^cran-comments\.md$
4 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi-wheels.yml:
--------------------------------------------------------------------------------
 1 | name: Build Python Wheels for PyPI
 2 | # Note: this file is based in part on the example workflow in the cibuildwheel docs
 3 | # https://cibuildwheel.pypa.io/en/stable/setup/#github-actions
 4 | # and in part on matplotlib's wheel build workflow:
 5 | # https://github.com/matplotlib/matplotlib/blob/main/.github/workflows/cibuildwheel.yml
 6 | 
 7 | on:
 8 |   push:
 9 |     branches: [main]
10 |   pull_request:
11 |     branches: [main]
12 |   workflow_dispatch:
13 | 
14 | jobs:
15 |   build_wheels:
16 |     name: Build wheels on ${{ matrix.os }}
17 |     runs-on: ${{ matrix.os }}
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         include:
22 |           - os: ubuntu-latest
23 |             cibw_archs: "x86_64"
24 |           - os: ubuntu-24.04-arm
25 |             cibw_archs: "aarch64"
26 |           - os: windows-latest
27 |             cibw_archs: "auto64"
28 |           - os: macos-13
29 |             cibw_archs: "x86_64"
30 |           - os: macos-14
31 |             cibw_archs: "arm64"
32 | 
33 |     steps:
34 |       - uses: actions/checkout@v4
35 |         with:
36 |           submodules: 'recursive'
37 | 
38 |       - name: Build wheels
39 |         uses: pypa/cibuildwheel@v2.23.2
40 |         env:
41 |           CIBW_SKIP: "pp* *-musllinux_* *-win32"
42 |           CIBW_ARCHS: ${{ matrix.cibw_archs }}
43 |           MACOSX_DEPLOYMENT_TARGET: "10.13"
44 | 
45 |       - uses: actions/upload-artifact@v4
46 |         with:
47 |           name: cibw-wheels-${{ matrix.os }}-${{ matrix.cibw_archs }}
48 |           path: ./wheelhouse/*.whl
49 | 


--------------------------------------------------------------------------------
/.github/workflows/python-test.yml:
--------------------------------------------------------------------------------
 1 | name: Python Package Unit Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | jobs:
13 |   testing:
14 |     name: test-python
15 |     runs-on: ${{ matrix.os }}
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         os: [ubuntu-latest, windows-latest, macos-latest]
21 |     
22 |     steps:
23 |     - name: Checkout repository
24 |       uses: actions/checkout@v4
25 |       with:
26 |         submodules: 'recursive'
27 | 
28 |     - name: Setup Python 3.10
29 |       uses: actions/setup-python@v5
30 |       with:
31 |         python-version: "3.10"
32 |         cache: "pip"
33 | 
34 |     - name: Install Package with Relevant Dependencies
35 |       run: |
36 |         pip install --upgrade pip
37 |         pip install -r requirements.txt
38 |         pip install .
39 | 
40 |     - name: Run Pytest
41 |       run: |
42 |         pytest test/python
43 | 


--------------------------------------------------------------------------------
/.github/workflows/r-cran-branch.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches: [main]
 4 |   pull_request:
 5 |     branches: [main]
 6 |   release:
 7 |     types: [published]
 8 |   workflow_dispatch:
 9 | 
10 | name: Update R Package Dev Branch
11 | 
12 | jobs:
13 |   testing:
14 |     name: r-cran-branch
15 |     runs-on: ubuntu-latest
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |     permissions:
19 |       contents: write
20 |     
21 |     steps:
22 |       - uses: actions/checkout@v4
23 |         with:
24 |           submodules: 'recursive'
25 | 
26 |       - uses: r-lib/actions/setup-pandoc@v2
27 | 
28 |       - uses: r-lib/actions/setup-r@v2
29 |         with:
30 |           use-public-rspm: true
31 | 
32 |       - uses: r-lib/actions/setup-r-dependencies@v2
33 |         with:
34 |           extra-packages: any::testthat, any::decor
35 | 
36 |       - name: Create CRAN-formatted source package in stochtree_cran subfolder
37 |         run: |
38 |           Rscript cran-bootstrap.R
39 |       
40 |       - name: Deploy to CRAN dev branch
41 |         if: github.event_name != 'pull_request'
42 |         uses: JamesIves/github-pages-deploy-action@v4.5.0
43 |         with:
44 |           clean: false
45 |           branch: r-dev
46 |           folder: stochtree_cran


--------------------------------------------------------------------------------
/.github/workflows/r-devel-check.yml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   workflow_dispatch:
 5 | 
 6 | name: R Devel CRAN Checks and Unit Tests
 7 | 
 8 | jobs:
 9 |   testing:
10 |     name: test-r
11 |     runs-on: ${{ matrix.os }}
12 |     
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         os: [ubuntu-latest, windows-latest, macos-latest]
17 |     
18 |     steps:
19 |       - uses: actions/checkout@v4
20 |         with:
21 |           submodules: 'recursive'
22 | 
23 |       - uses: r-lib/actions/setup-pandoc@v2
24 | 
25 |       - uses: r-lib/actions/setup-r@v2
26 |         with:
27 |           r-version: 'devel'
28 |           use-public-rspm: true
29 | 
30 |       - uses: r-lib/actions/setup-r-dependencies@v2
31 |         with:
32 |           extra-packages: any::testthat, any::decor, any::rcmdcheck
33 |           needs: check
34 | 
35 |       - name: Create a CRAN-ready version of the R package
36 |         run: |
37 |           Rscript cran-bootstrap.R 0 0 1
38 |       
39 |       - uses: r-lib/actions/check-r-package@v2
40 |         with:
41 |           working-directory: 'stochtree_cran'
42 | 


--------------------------------------------------------------------------------
/.github/workflows/r-test.yml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: R Package Unit Tests
13 | 
14 | jobs:
15 |   testing:
16 |     name: test-r
17 |     runs-on: ${{ matrix.os }}
18 |     
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         os: [ubuntu-latest, windows-latest, macos-latest]
23 |     
24 |     steps:
25 |       - uses: actions/checkout@v4
26 |         with:
27 |           submodules: 'recursive'
28 | 
29 |       - uses: r-lib/actions/setup-pandoc@v2
30 | 
31 |       - uses: r-lib/actions/setup-r@v2
32 |         with:
33 |           use-public-rspm: true
34 | 
35 |       - uses: r-lib/actions/setup-r-dependencies@v2
36 |         with:
37 |           extra-packages: any::testthat, any::decor, any::rcmdcheck
38 |           needs: check
39 | 
40 |       - name: Create a CRAN-ready version of the R package
41 |         run: |
42 |           Rscript cran-bootstrap.R 0 0 1
43 |       
44 |       - uses: r-lib/actions/check-r-package@v2
45 |         with:
46 |           working-directory: 'stochtree_cran'
47 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "deps/fast_double_parser"]
 2 | 	path = deps/fast_double_parser
 3 | 	url = https://github.com/lemire/fast_double_parser/
 4 | 	branch = master
 5 | [submodule "deps/fmt"]
 6 | 	path = deps/fmt
 7 | 	url = https://github.com/fmtlib/fmt/
 8 | 	branch = master
 9 | [submodule "deps/boost_math"]
10 | 	path = deps/boost_math
11 | 	url = https://github.com/boostorg/math
12 | 	branch = master
13 | [submodule "deps/eigen"]
14 | 	path = deps/eigen
15 | 	url = https://gitlab.com/libeigen/eigen
16 | 	branch = 3.4
17 | [submodule "deps/pybind11"]
18 | 	path = deps/pybind11
19 | 	url = https://github.com/pybind/pybind11
20 | 	branch = v2.12
21 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | # stochtree 0.1.2
 4 | 
 5 | ## New Features
 6 | 
 7 | * Support for binary outcomes in BART and BCF with a probit link ([#164](https://github.com/StochasticTree/stochtree/pull/164))
 8 | 
 9 | ## Bug Fixes
10 | 
11 | * Fixed indexing bug in cleanup of grow-from-root (GFR) samples in BART and BCF models
12 | * Avoid using covariate preprocessor in `computeForestLeafIndices` R function when a `ForestSamples` object is provided (instead of a `bartmodel` or `bcfmodel` object)
13 | 
14 | # stochtree 0.1.1
15 | 
16 | ## Bug Fixes
17 | 
18 | * Fixed initialization bug in several R package code examples for random effects models
19 | 
20 | # stochtree 0.1.0
21 | 
22 | Initial "alpha" release
23 | 
24 | ## New Features
25 | 
26 | * Support for sampling stochastic tree ensembles using two algorithms: MCMC and Grow-From-Root (GFR)
27 | * High-level model types supported:
28 |     * Supervised learning with constant leaves or user-specified leaf regression models
29 |     * Causal effect estimation with binary or continuous treatments
30 | * Additional high-level modeling features:
31 |     * Forest-based variance function estimation (heteroskedasticity)
32 |     * Additive (univariate or multivariate) group random effects
33 |     * Multi-chain sampling and support for parallelism
34 |     * "Warm-start" initialization of MCMC forest samplers via the Grow-From-Root (GFR) algorithm
35 |     * Automated preprocessing / handling of categorical variables
36 | * Low-level interface:
37 |     * Ability to combine a forest sampler with other (additive) model terms, without using C++
38 |     * Combine and sample an arbitrary number of forests or random effects terms
39 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: stochtree
 2 | Title: Stochastic Tree Ensembles (XBART and BART) for Supervised Learning and Causal Inference
 3 | Version: 0.1.1
 4 | Authors@R: 
 5 |   c(
 6 |     person("Drew", "Herren", email = "drewherrenopensource@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4109-6611")), 
 7 |     person("Richard", "Hahn", role = "aut"),
 8 |     person("Jared", "Murray", role = "aut"),
 9 |     person("Carlos", "Carvalho", role = "aut"),
10 |     person("Jingyu", "He", role = "aut"),
11 |     person("Pedro", "Lima", role = "ctb"),
12 |     person("stochtree", "contributors", role = c("cph")),
13 |     person("Eigen", "contributors", role = c("cph"), comment = "C++ source uses the Eigen library for matrix operations, see inst/COPYRIGHTS"),
14 |     person("xgboost", "contributors", role = c("cph"), comment = "C++ tree code and related operations include or are inspired by code from the xgboost library, see inst/COPYRIGHTS"),
15 |     person("treelite", "contributors", role = c("cph"), comment = "C++ tree code and related operations include or are inspired by code from the treelite library, see inst/COPYRIGHTS"),
16 |     person("Microsoft", "Corporation", role = c("cph"), comment = "C++ I/O and various project structure code include or are inspired by code from the LightGBM library, which is a copyright of Microsoft, see inst/COPYRIGHTS"),
17 |     person("Niels", "Lohmann", role = c("cph"), comment = "C++ source uses the JSON for Modern C++ library for JSON operations, see inst/COPYRIGHTS"),
18 |     person("Daniel", "Lemire", role = c("cph"), comment = "C++ source uses the fast_double_parser library internally, see inst/COPYRIGHTS"),
19 |     person("Victor", "Zverovich", role = c("cph"), comment = "C++ source uses the fmt library internally, see inst/COPYRIGHTS")
20 |   )
21 | Copyright: Copyright details for stochtree's C++ dependencies, which are vendored along with the core stochtree source code, are detailed in inst/COPYRIGHTS
22 | Description: Flexible stochastic tree ensemble software. 
23 |     Robust implementations of Bayesian Additive Regression Trees (BART) 
24 |     Chipman, George, McCulloch (2010) <doi:10.1214/09-AOAS285> 
25 |     for supervised learning and Bayesian Causal Forests (BCF) 
26 |     Hahn, Murray, Carvalho (2020) <doi:10.1214/19-BA1195> 
27 |     for causal inference. Enables model serialization and parallel sampling 
28 |     and provides a low-level interface for custom stochastic forest samplers.
29 | License: MIT + file LICENSE
30 | Encoding: UTF-8
31 | Roxygen: list(markdown = TRUE)
32 | RoxygenNote: 7.3.2
33 | LinkingTo: 
34 |     cpp11, BH
35 | Suggests: 
36 |     testthat (>= 3.0.0), 
37 |     doParallel, 
38 |     foreach, 
39 |     ggplot2,
40 |     knitr,
41 |     latex2exp,
42 |     Matrix,
43 |     MASS,
44 |     mvtnorm,
45 |     rmarkdown,
46 |     tgp
47 | VignetteBuilder: knitr
48 | SystemRequirements: C++17
49 | Imports: 
50 |     R6,
51 |     stats
52 | URL: https://stochtree.ai/, https://github.com/StochasticTree/stochtree
53 | BugReports: https://github.com/StochasticTree/stochtree/issues
54 | Config/testthat/edition: 3
55 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2025
2 | COPYRIGHT HOLDER: stochtree contributors


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2023-2025 stochtree authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | # Include cmake build instructions
 2 | include CMakeLists.txt
 3 | 
 4 | # Vendor package header files
 5 | recursive-include include *.h
 6 | recursive-include include *.hpp
 7 | 
 8 | # Vendor package source files (excluding R-specific)
 9 | recursive-include src *.cpp
10 | recursive-include src *.h
11 | exclude src/R_data.cpp src/R_random_effects.cpp
12 | 
13 | # Remove the CRAN extensionless LICENSE file
14 | exclude LICENSE
15 | 
16 | # Remove egg info
17 | prune stochtree.egg-info
18 | 
19 | # Vendor pybind11 dependencies in PyPI source distribution
20 | recursive-include deps/pybind11/include/pybind11 *.h
21 | recursive-include deps/pybind11/tools *
22 | include deps/pybind11/CMakeLists.txt
23 | include deps/pybind11/pyproject.toml
24 | include deps/pybind11/setup.cfg
25 | include deps/pybind11/setup.py
26 | 
27 | # Vendor fmt header files in PyPI source distribution
28 | recursive-include deps/fmt/include/fmt *.h
29 | 
30 | # Vendor fast_double_parser header file in PyPI source distribution
31 | include deps/fast_double_parser/include/fast_double_parser.h
32 | 
33 | # Vendor Eigen header files in PyPI source distribution
34 | include deps/eigen/Eigen/Cholesky
35 | include deps/eigen/Eigen/Core
36 | include deps/eigen/Eigen/Dense
37 | include deps/eigen/Eigen/Eigenvalues
38 | include deps/eigen/Eigen/Geometry
39 | include deps/eigen/Eigen/Householder
40 | include deps/eigen/Eigen/IterativeLinearSolvers
41 | include deps/eigen/Eigen/Jacobi
42 | include deps/eigen/Eigen/LU
43 | include deps/eigen/Eigen/OrderingMethods
44 | include deps/eigen/Eigen/QR
45 | include deps/eigen/Eigen/SVD
46 | include deps/eigen/Eigen/Sparse
47 | include deps/eigen/Eigen/SparseCholesky
48 | include deps/eigen/Eigen/SparseCore
49 | include deps/eigen/Eigen/SparseQR
50 | include deps/eigen/Eigen/misc
51 | include deps/eigen/Eigen/plugins
52 | recursive-include deps/eigen/Eigen/src/Cholesky *.h
53 | recursive-include deps/eigen/Eigen/src/Core *.h
54 | recursive-include deps/eigen/Eigen/src/Dense *.h
55 | recursive-include deps/eigen/Eigen/src/Eigenvalues *.h
56 | recursive-include deps/eigen/Eigen/src/Geometry *.h
57 | recursive-include deps/eigen/Eigen/src/Householder *.h
58 | recursive-include deps/eigen/Eigen/src/IterativeLinearSolvers *.h
59 | recursive-include deps/eigen/Eigen/src/Jacobi *.h
60 | recursive-include deps/eigen/Eigen/src/LU *.h
61 | recursive-include deps/eigen/Eigen/src/OrderingMethods *.h
62 | recursive-include deps/eigen/Eigen/src/QR *.h
63 | recursive-include deps/eigen/Eigen/src/SVD *.h
64 | recursive-include deps/eigen/Eigen/src/Sparse *.h
65 | recursive-include deps/eigen/Eigen/src/SparseCholesky *.h
66 | recursive-include deps/eigen/Eigen/src/SparseCore *.h
67 | recursive-include deps/eigen/Eigen/src/SparseQR *.h
68 | recursive-include deps/eigen/Eigen/src/misc *.h
69 | recursive-include deps/eigen/Eigen/src/plugins *.h


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(getRandomEffectSamples,bartmodel)
 4 | S3method(getRandomEffectSamples,bcfmodel)
 5 | S3method(predict,bartmodel)
 6 | S3method(predict,bcfmodel)
 7 | export(bart)
 8 | export(bcf)
 9 | export(calibrateInverseGammaErrorVariance)
10 | export(computeForestLeafIndices)
11 | export(computeForestLeafVariances)
12 | export(computeForestMaxLeafIndex)
13 | export(convertPreprocessorToJson)
14 | export(createBARTModelFromCombinedJson)
15 | export(createBARTModelFromCombinedJsonString)
16 | export(createBARTModelFromJson)
17 | export(createBARTModelFromJsonFile)
18 | export(createBARTModelFromJsonString)
19 | export(createBCFModelFromCombinedJson)
20 | export(createBCFModelFromCombinedJsonString)
21 | export(createBCFModelFromJson)
22 | export(createBCFModelFromJsonFile)
23 | export(createBCFModelFromJsonString)
24 | export(createCppJson)
25 | export(createCppJsonFile)
26 | export(createCppJsonString)
27 | export(createCppRNG)
28 | export(createForest)
29 | export(createForestDataset)
30 | export(createForestModel)
31 | export(createForestModelConfig)
32 | export(createForestSamples)
33 | export(createGlobalModelConfig)
34 | export(createOutcome)
35 | export(createPreprocessorFromJson)
36 | export(createPreprocessorFromJsonString)
37 | export(createRandomEffectSamples)
38 | export(createRandomEffectsDataset)
39 | export(createRandomEffectsModel)
40 | export(createRandomEffectsTracker)
41 | export(getRandomEffectSamples)
42 | export(loadForestContainerCombinedJson)
43 | export(loadForestContainerCombinedJsonString)
44 | export(loadForestContainerJson)
45 | export(loadRandomEffectSamplesCombinedJson)
46 | export(loadRandomEffectSamplesCombinedJsonString)
47 | export(loadRandomEffectSamplesJson)
48 | export(loadScalarJson)
49 | export(loadVectorJson)
50 | export(preprocessPredictionData)
51 | export(preprocessTrainData)
52 | export(resetActiveForest)
53 | export(resetForestModel)
54 | export(resetRandomEffectsModel)
55 | export(resetRandomEffectsTracker)
56 | export(rootResetRandomEffectsModel)
57 | export(rootResetRandomEffectsTracker)
58 | export(sampleGlobalErrorVarianceOneIteration)
59 | export(sampleLeafVarianceOneIteration)
60 | export(saveBARTModelToJson)
61 | export(saveBARTModelToJsonFile)
62 | export(saveBARTModelToJsonString)
63 | export(saveBCFModelToJson)
64 | export(saveBCFModelToJsonFile)
65 | export(saveBCFModelToJsonString)
66 | export(savePreprocessorToJsonString)
67 | importFrom(R6,R6Class)
68 | importFrom(stats,coef)
69 | importFrom(stats,dnorm)
70 | importFrom(stats,lm)
71 | importFrom(stats,model.matrix)
72 | importFrom(stats,pnorm)
73 | importFrom(stats,predict)
74 | importFrom(stats,qgamma)
75 | importFrom(stats,qnorm)
76 | importFrom(stats,resid)
77 | importFrom(stats,rnorm)
78 | importFrom(stats,runif)
79 | importFrom(stats,sd)
80 | importFrom(stats,sigma)
81 | importFrom(stats,var)
82 | useDynLib(stochtree, .registration = TRUE)
83 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # stochtree 0.1.2
 2 | 
 3 | ## New Features
 4 | 
 5 | * Support for binary outcomes in BART and BCF with a probit link ([#164](https://github.com/StochasticTree/stochtree/pull/164))
 6 | 
 7 | ## Bug Fixes
 8 | 
 9 | * Fixed indexing bug in cleanup of grow-from-root (GFR) samples in BART and BCF models
10 | * Avoid using covariate preprocessor in `computeForestLeafIndices` function when a `ForestSamples` object is provided (rather than a `bartmodel` or `bcfmodel` object)
11 | 
12 | # stochtree 0.1.1
13 | 
14 | * Fixed initialization bug in several R package code examples for random effects models
15 | 
16 | # stochtree 0.1.0
17 | 
18 | * Initial release on CRAN.
19 | * Support for sampling stochastic tree ensembles using two algorithms: MCMC and Grow-From-Root (GFR)
20 | * High-level model types supported:
21 |     * Supervised learning with constant leaves or user-specified leaf regression models
22 |     * Causal effect estimation with binary or continuous treatments
23 | * Additional high-level modeling features:
24 |     * Forest-based variance function estimation (heteroskedasticity)
25 |     * Additive (univariate or multivariate) group random effects
26 |     * Multi-chain sampling and support for parallelism
27 |     * "Warm-start" initialization of MCMC forest samplers via the Grow-From-Root (GFR) algorithm
28 |     * Automated preprocessing / handling of categorical variables
29 | * Low-level interface:
30 |     * Ability to combine a forest sampler with other (additive) model terms, without using C++
31 |     * Combine and sample an arbitrary number of forests or random effects terms
32 | 


--------------------------------------------------------------------------------
/R/calibration.R:
--------------------------------------------------------------------------------
 1 | #' Calibrate the scale parameter on an inverse gamma prior for the global error variance as in Chipman et al (2022)
 2 | #' 
 3 | #' Chipman, H., George, E., Hahn, R., McCulloch, R., Pratola, M. and Sparapani, R. (2022). Bayesian Additive Regression Trees, Computational Approaches. In Wiley StatsRef: Statistics Reference Online (eds N. Balakrishnan, T. Colton, B. Everitt, W. Piegorsch, F. Ruggeri and J.L. Teugels). https://doi.org/10.1002/9781118445112.stat08288
 4 | #'
 5 | #' @param y Outcome to be modeled using BART, BCF or another nonparametric ensemble method.
 6 | #' @param X Covariates to be used to partition trees in an ensemble or series of ensemble.
 7 | #' @param W (Optional) Basis used to define a "leaf regression" model for each decision tree. The "classic" BART model assumes a constant leaf parameter, which is equivalent to a "leaf regression" on a basis of all ones, though it is not necessary to pass a vector of ones, here or to the BART function. Default: `NULL`.
 8 | #' @param nu The shape parameter for the global error variance's IG prior. The scale parameter in the Sparapani et al (2021) parameterization is defined as `nu*lambda` where `lambda` is the output of this function. Default: `3`.
 9 | #' @param quant (Optional) Quantile of the inverse gamma prior distribution represented by a linear-regression-based overestimate of `sigma^2`. Default: `0.9`.
10 | #' @param standardize (Optional) Whether or not outcome should be standardized (`(y-mean(y))/sd(y)`) before calibration of `lambda`. Default: `TRUE`.
11 | #'
12 | #' @return Value of `lambda` which determines the scale parameter of the global error variance prior (`sigma^2 ~ IG(nu,nu*lambda)`)
13 | #' @export 
14 | #'
15 | #' @examples
16 | #' n <- 100
17 | #' p <- 5
18 | #' X <- matrix(runif(n*p), ncol = p)
19 | #' y <- 10*X[,1] - 20*X[,2] + rnorm(n)
20 | #' nu <- 3
21 | #' lambda <- calibrateInverseGammaErrorVariance(y, X, nu = nu)
22 | #' sigma2hat <- mean(resid(lm(y~X))^2)
23 | #' mean(var(y)/rgamma(100000, nu, rate = nu*lambda) < sigma2hat)
24 | calibrateInverseGammaErrorVariance <- function(y, X, W = NULL, nu = 3, quant = 0.9, standardize = TRUE) {
25 |     # Compute regression basis
26 |     if (!is.null(W)) basis <- cbind(X, W)
27 |     else basis <- X
28 |     # Standardize outcome if requested
29 |     if (standardize) y <- (y-mean(y))/sd(y)
30 |     # Compute the "regression-based" overestimate of sigma^2
31 |     sigma2hat <- mean(resid(lm(y~basis))^2)
32 |     # Calibrate lambda based on the implied quantile of sigma2hat
33 |     return((sigma2hat*qgamma(1-quant,nu))/nu)
34 | }
35 | 


--------------------------------------------------------------------------------
/R/generics.R:
--------------------------------------------------------------------------------
 1 | #' Generic function for extracting random effect samples from a model object (BCF, BART, etc...)
 2 | #' 
 3 | #' @param object Fitted model object from which to extract random effects
 4 | #' @param ... Other parameters to be used in random effects extraction
 5 | #' @return List of random effect samples
 6 | #' @export
 7 | #' 
 8 | #' @examples
 9 | #' n <- 100
10 | #' p <- 10
11 | #' X <- matrix(runif(n*p), ncol = p)
12 | #' rfx_group_ids <- sample(1:2, size = n, replace = TRUE)
13 | #' rfx_basis <- rep(1.0, n)
14 | #' y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n)
15 | #' bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids,
16 | #'                    rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10)
17 | #' rfx_samples <- getRandomEffectSamples(bart_model)
18 | getRandomEffectSamples <- function(object, ...) UseMethod("getRandomEffectSamples")
19 | 


--------------------------------------------------------------------------------
/R/stochtree-package.R:
--------------------------------------------------------------------------------
 1 | ## usethis namespace: start
 2 | #' @importFrom stats coef
 3 | #' @importFrom stats dnorm
 4 | #' @importFrom stats lm
 5 | #' @importFrom stats model.matrix
 6 | #' @importFrom stats predict
 7 | #' @importFrom stats qgamma
 8 | #' @importFrom stats qnorm
 9 | #' @importFrom stats pnorm
10 | #' @importFrom stats resid
11 | #' @importFrom stats rnorm
12 | #' @importFrom stats runif
13 | #' @importFrom stats sd
14 | #' @importFrom stats sigma
15 | #' @importFrom stats var
16 | #' @importFrom R6 R6Class
17 | ## usethis namespace: end
18 | NULL
19 | 
20 | #' @useDynLib stochtree, .registration = TRUE
21 | "_PACKAGE"


--------------------------------------------------------------------------------
/R/variance.R:
--------------------------------------------------------------------------------
 1 | #' Sample one iteration of the (inverse gamma) global variance model
 2 | #'
 3 | #' @param residual Outcome class
 4 | #' @param dataset ForestDataset class
 5 | #' @param rng C++ random number generator
 6 | #' @param a Global variance shape parameter
 7 | #' @param b Global variance scale parameter
 8 | #' @return None
 9 | #' @export
10 | #' 
11 | #' @examples
12 | #' X <- matrix(runif(10*100), ncol = 10)
13 | #' y <- -5 + 10*(X[,1] > 0.5) + rnorm(100)
14 | #' y_std <- (y-mean(y))/sd(y)
15 | #' forest_dataset <- createForestDataset(X)
16 | #' outcome <- createOutcome(y_std)
17 | #' rng <- createCppRNG(1234)
18 | #' a <- 1.0
19 | #' b <- 1.0
20 | #' sigma2 <- sampleGlobalErrorVarianceOneIteration(outcome, forest_dataset, rng, a, b)
21 | sampleGlobalErrorVarianceOneIteration <- function(residual, dataset, rng, a, b) {
22 |     return(sample_sigma2_one_iteration_cpp(residual$data_ptr, dataset$data_ptr, rng$rng_ptr, a, b))
23 | }
24 | 
25 | #' Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!)
26 | #'
27 | #' @param forest C++ forest
28 | #' @param rng C++ random number generator
29 | #' @param a Leaf variance shape parameter
30 | #' @param b Leaf variance scale parameter
31 | #' @return None
32 | #' @export
33 | #' 
34 | #' @examples
35 | #' num_trees <- 100
36 | #' leaf_dimension <- 1
37 | #' is_leaf_constant <- TRUE
38 | #' is_exponentiated <- FALSE
39 | #' active_forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated)
40 | #' rng <- createCppRNG(1234)
41 | #' a <- 1.0
42 | #' b <- 1.0
43 | #' tau <- sampleLeafVarianceOneIteration(active_forest, rng, a, b)
44 | sampleLeafVarianceOneIteration <- function(forest, rng, a, b) {
45 |     return(sample_tau_one_iteration_cpp(forest$forest_ptr, rng$rng_ptr, a, b))
46 | }
47 | 


--------------------------------------------------------------------------------
/R_README.md:
--------------------------------------------------------------------------------
 1 | # stochtree R package
 2 | 
 3 | Software for building stochastic tree ensembles (i.e. BART, XBART) for supervised learning and causal inference.
 4 | 
 5 | ## Getting started
 6 | 
 7 | `stochtree` can be installed from CRAN via
 8 | 
 9 | ```
10 | install.packages("stochtree")
11 | ```
12 | 
13 | The development version of stochtree can be installed from github via
14 | 
15 | ```
16 | remotes::install_github("StochasticTree/stochtree", ref="r-dev")
17 | ```
18 | 


--------------------------------------------------------------------------------
/cmake/Sanitizer.cmake:
--------------------------------------------------------------------------------
 1 | # Set appropriate compiler and linker flags for sanitizers.
 2 | #
 3 | # Usage of this module:
 4 | #  enable_sanitizers("address;leak")
 5 | 
 6 | # Add flags
 7 | macro(enable_sanitizer sanitizer)
 8 |   if(${sanitizer} MATCHES "address")
 9 |     set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=address")
10 | 
11 |   elseif(${sanitizer} MATCHES "thread")
12 |     set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=thread")
13 | 
14 |   elseif(${sanitizer} MATCHES "leak")
15 |     set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=leak")
16 | 
17 |   elseif(${sanitizer} MATCHES "undefined")
18 |     set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=undefined -fno-sanitize-recover=undefined")
19 | 
20 |   else()
21 |     message(FATAL_ERROR "Santizer ${sanitizer} not supported.")
22 |   endif()
23 | endmacro()
24 | 
25 | macro(enable_sanitizers SANITIZERS)
26 |   # Check sanitizers compatibility.
27 |   foreach(_san ${SANITIZERS})
28 |     string(TOLOWER ${_san} _san)
29 |     if(_san MATCHES "thread")
30 |       if(${_use_other_sanitizers})
31 |         message(FATAL_ERROR "thread sanitizer is not compatible with ${_san} sanitizer.")
32 |       endif()
33 |       set(_use_thread_sanitizer 1)
34 |     else()
35 |       if(${_use_thread_sanitizer})
36 |         message(FATAL_ERROR "${_san} sanitizer is not compatible with thread sanitizer.")
37 |       endif()
38 |       set(_use_other_sanitizers 1)
39 |     endif()
40 |   endforeach()
41 | 
42 |   message(STATUS "Sanitizers: ${SANITIZERS}")
43 | 
44 |   foreach(_san ${SANITIZERS})
45 |     string(TOLOWER ${_san} _san)
46 |     enable_sanitizer(${_san})
47 |   endforeach()
48 |   message(STATUS "Sanitizers compile flags: ${SAN_COMPILE_FLAGS}")
49 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_COMPILE_FLAGS}")
50 |   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_COMPILE_FLAGS}")
51 | endmacro()
52 | 


--------------------------------------------------------------------------------
/cran-cleanup.R:
--------------------------------------------------------------------------------
1 | # Create the stochtree_cran folder
2 | cran_dir <- "stochtree_cran"
3 | if (dir.exists(cran_dir)) {
4 |     # cran_subfolder_files <- list.files(cran_dir, recursive = TRUE, full.names = TRUE)
5 |     unlink(cran_dir, recursive = TRUE)
6 | }


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## R CMD check results
 2 | 
 3 | 0 errors | 0 warnings | 3 notes
 4 | 
 5 | * This is a new release.
 6 | * Checking installed package size ... NOTE installed size is 46.3Mb (linux-only)
 7 | * Possibly misspelled words in DESCRIPTION: All of the words are proper nouns or technical terms (BCF, Carvalho, Chipman, McCulloch, XBART)
 8 | 
 9 | ## CRAN comments (20250206)
10 | 
11 | Below are responses to the initial comments received from CRAN on Feb 6, 2025
12 | 
13 | ### Copyright
14 | 
15 | > Please always add all authors, contributors and copyright holders in the Authors@R field with the appropriate roles."
16 | 
17 | stochtree's C++ core has several vendored dependencies. The license and copyright details for each of these dependencies are delineated in the inst/COPYRIGHTS file. We have included the authors / contributors of each of these dependencies as copyright holders in the authors list of the DESCRIPTION file and also included a "Copyright:" section in the DESCRIPTION file explaining this.
18 | 
19 | ### TRUE / FALSE
20 | 
21 | > Please write TRUE and FALSE instead of T and F.
22 | 
23 | We have converted `T` and `F` to `TRUE` and `FALSE` in the R code.
24 | 
25 | ### Examples with commented code
26 | 
27 | > Some code lines in examples are commented out. Please never do that.
28 | 
29 | We no longer do this, and apologize for the oversight.
30 | 
31 | ## CRAN comments (20250207)
32 | 
33 | Below we address issues raised by CRAN on Feb 7, 2025
34 | 
35 | ### Valgrind
36 | 
37 | A valgrind-instrumented version of R exposed memory issues in several examples 
38 | in the `stochtree` documentation. The specific issue is 
39 | 
40 | > Conditional jump or move depends on uninitialised value(s)
41 | 
42 | The examples that triggered this were in fact working with Eigen matrices 
43 | with uninitialized values. 
44 | 
45 | This has been corrected and we have verified that running the `stochtree` 
46 | examples no longer produce this memcheck error.
47 | 


--------------------------------------------------------------------------------
/debug/README.md:
--------------------------------------------------------------------------------
 1 | # Debugging
 2 | 
 3 | This subdirectory contains a debug program for the C++ codebase.
 4 | The program takes several command line arguments (in order):
 5 | 
 6 | 1. Which data-generating process (DGP) to run (integer-coded, see below for a detailed description)
 7 | 1. Which leaf model to sample (integer-coded, see below for a detailed description)
 8 | 3. Whether or not to include random effects (0 = no, 1 = yes)
 9 | 4. Number of grow-from-root (GFR) samples
10 | 5. Number of MCMC samples
11 | 6. Seed for random number generator (-1 means we defer to C++ `std::random_device`)
12 | 7. [Optional] name of data file to load for training, instead of simulating data (leave this blank as `""` if simulated data is desired)
13 | 8. [Optional] index of outcome column in data file (leave this blank as `0`)
14 | 9. [Optional] comma-delimited string of column indices of covariates (leave this blank as `""`)
15 | 10. [Optional] comma-delimited string of column indices of leaf regression bases (leave this blank as `""`)
16 | 
17 | The DGPs are numbered as follows:
18 | 
19 | 0. Simple leaf regression model with a univariate basis for the leaf model
20 | 1. Constant leaf model with a large number of deep interactions between features
21 | 2. Simple leaf regression model with a multivariate basis for the leaf model
22 | 3. Simple "variance-only" model with a mean of zero but covariate-moderated variance function
23 | 
24 | The models are numbered as follows:
25 | 
26 | 0. Constant leaf tree model (the "classic" BART / XBART model)
27 | 1. "Univariate basis" leaf regression model
28 | 2. "Multivariate basis" leaf regression model
29 | 3. Log linear heteroskedastic variance model
30 | 
31 | For an example of how to run this progam for DGP 0, leaf model 1, no random effects, 10 GFR samples, 100 MCMC samples and a default seed (`-1`), run
32 | 
33 | `./build/debugstochtree 0 1 0 10 100 -1 "" 0 "" ""`
34 | 
35 | from the main `stochtree` project directory after building with `BUILD_DEBUG_TARGETS` set to `ON`.
36 | 


--------------------------------------------------------------------------------
/demo/debug/classification.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | import pandas as pd
 4 | import seaborn as sns
 5 | from sklearn.model_selection import train_test_split
 6 | from sklearn.metrics import roc_curve, auc
 7 | 
 8 | from stochtree import BARTModel
 9 | 
10 | # RNG
11 | rng = np.random.default_rng()
12 | 
13 | # Generate covariates
14 | n = 1000
15 | p_X = 10
16 | X = rng.uniform(0, 1, (n, p_X))
17 | 
18 | 
19 | # Define the outcome mean function
20 | def outcome_mean(X):
21 |     return np.where(
22 |         (X[:, 0] >= 0.0) & (X[:, 0] < 0.25),
23 |         -7.5 * X[:, 1],
24 |         np.where(
25 |             (X[:, 0] >= 0.25) & (X[:, 0] < 0.5),
26 |             -2.5 * X[:, 1],
27 |             np.where((X[:, 0] >= 0.5) & (X[:, 0] < 0.75), 2.5 * X[:, 1], 7.5 * X[:, 1]),
28 |         ),
29 |     )
30 | 
31 | 
32 | # Generate outcome
33 | epsilon = rng.normal(0, 1, n)
34 | z = outcome_mean(X) + epsilon
35 | y = np.where(z >= 0, 1, 0)
36 | 
37 | # Test-train split
38 | sample_inds = np.arange(n)
39 | train_inds, test_inds = train_test_split(sample_inds, test_size=0.5)
40 | X_train = X[train_inds, :]
41 | X_test = X[test_inds, :]
42 | z_train = z[train_inds]
43 | z_test = z[test_inds]
44 | y_train = y[train_inds]
45 | y_test = y[test_inds]
46 | 
47 | # Fit Probit BART
48 | bart_model = BARTModel()
49 | general_params = {"num_chains": 1}
50 | mean_forest_params = {"probit_outcome_model": True}
51 | bart_model.sample(
52 |     X_train=X_train,
53 |     y_train=y_train,
54 |     X_test=X_test,
55 |     num_gfr=10,
56 |     num_mcmc=100,
57 |     general_params=general_params,
58 |     mean_forest_params=mean_forest_params
59 | )
60 | 


--------------------------------------------------------------------------------
/demo/debug/kernel.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from stochtree import Dataset, ForestContainer, compute_forest_leaf_indices
 3 | 
 4 | # Create dataset
 5 | X = np.array(
 6 |     [[1.5, 8.7, 1.2],
 7 |         [2.7, 3.4, 5.4],
 8 |         [3.6, 1.2, 9.3],
 9 |         [4.4, 5.4, 10.4],
10 |         [5.3, 9.3, 3.6],
11 |         [6.1, 10.4, 4.4]]
12 | )
13 | n, p = X.shape
14 | num_trees = 2
15 | output_dim = 1
16 | forest_dataset = Dataset()
17 | forest_dataset.add_covariates(X)
18 | forest_samples = ForestContainer(num_trees, output_dim, True, False)
19 | 
20 | # Initialize a forest with constant root predictions
21 | forest_samples.add_sample(0.)
22 | 
23 | # Split the root of the first tree in the ensemble at X[,1] > 4.0
24 | forest_samples.add_numeric_split(0, 0, 0, 0, 4.0, -5., 5.)
25 | 
26 | # Check that regular and "raw" predictions are the same (since the leaf is constant)
27 | computed_indices = compute_forest_leaf_indices(forest_samples, X)
28 | 
29 | # Split the left leaf of the first tree in the ensemble at X[,2] > 4.0
30 | forest_samples.add_numeric_split(0, 0, 1, 1, 4.0, -7.5, -2.5)
31 | 
32 | # Check that regular and "raw" predictions are the same (since the leaf is constant)
33 | computed_indices = compute_forest_leaf_indices(forest_samples, X)
34 | 


--------------------------------------------------------------------------------
/demo/debug/multivariate_treatment_causal_inference.py:
--------------------------------------------------------------------------------
 1 | # Load necessary libraries
 2 | import numpy as np
 3 | import pandas as pd
 4 | import seaborn as sns
 5 | import matplotlib.pyplot as plt
 6 | from stochtree import BCFModel
 7 | from sklearn.model_selection import train_test_split
 8 | 
 9 | # RNG
10 | rng = np.random.default_rng()
11 | 
12 | # Generate covariates and basis
13 | n = 1000
14 | p_X = 5
15 | X = rng.uniform(0, 1, (n, p_X))
16 | pi_X = 0.25 + 0.5*X[:,0]
17 | Z = rng.uniform(0, 1, (n, 2))
18 | 
19 | # Define the outcome mean functions (prognostic and treatment effects)
20 | mu_X = pi_X*5 + 2*X[:,2]
21 | tau_X = np.stack((X[:,1], X[:,2]), axis=-1)
22 | 
23 | # Generate outcome
24 | epsilon = rng.normal(0, 1, n)
25 | treatment_term = np.multiply(tau_X, Z).sum(axis=1)
26 | y = mu_X + treatment_term + epsilon
27 | 
28 | # Test-train split
29 | sample_inds = np.arange(n)
30 | train_inds, test_inds = train_test_split(sample_inds, test_size=0.5)
31 | X_train = X[train_inds,:]
32 | X_test = X[test_inds,:]
33 | Z_train = Z[train_inds,:]
34 | Z_test = Z[test_inds,:]
35 | y_train = y[train_inds]
36 | y_test = y[test_inds]
37 | pi_train = pi_X[train_inds]
38 | pi_test = pi_X[test_inds]
39 | mu_train = mu_X[train_inds]
40 | mu_test = mu_X[test_inds]
41 | tau_train = tau_X[train_inds,:]
42 | tau_test = tau_X[test_inds,:]
43 | 
44 | # Run BCF
45 | bcf_model = BCFModel()
46 | bcf_model.sample(X_train, Z_train, y_train, pi_train, X_test, Z_test, pi_test, num_gfr=10, num_mcmc=100)
47 | 


--------------------------------------------------------------------------------
/demo/debug/r_comparison_debug.py:
--------------------------------------------------------------------------------
 1 | # R Comparison Demo Script
 2 | 
 3 | # Load necessary libraries
 4 | import numpy as np
 5 | import pandas as pd
 6 | from stochtree import BARTModel
 7 | 
 8 | # Load data
 9 | df = pd.read_csv("debug/data/heterosked_train.csv")
10 | y = df.loc[:,'y'].to_numpy()
11 | X = df.loc[:,['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10']].to_numpy()
12 | y = y.astype(np.float64)
13 | X = X.astype(np.float64)
14 | 
15 | # Run BART
16 | bart_model = BARTModel()
17 | bart_model.sample(X_train=X, y_train=y, num_gfr=0, num_mcmc=10, general_params={'random_seed': 1234, 'standardize': False, 'sample_sigma2_global': True})
18 | 
19 | # Inspect the MCMC (BART) samples
20 | y_avg_mcmc = np.squeeze(bart_model.y_hat_train).mean(axis = 1, keepdims = True)
21 | print(y_avg_mcmc[:20])
22 | print(bart_model.global_var_samples)
23 | 


--------------------------------------------------------------------------------
/demo/debug/random_effects.py:
--------------------------------------------------------------------------------
 1 | # Random Effects Demo Script
 2 | 
 3 | # Load necessary libraries
 4 | import numpy as np
 5 | import pandas as pd
 6 | import seaborn as sns
 7 | import matplotlib.pyplot as plt
 8 | from stochtree import (
 9 |     RandomEffectsContainer,
10 |     RandomEffectsDataset,
11 |     RandomEffectsModel,
12 |     RandomEffectsTracker,
13 |     Residual,
14 |     RNG,
15 | )
16 | # from sklearn.model_selection import train_test_split
17 | 
18 | # Generate sample data
19 | # RNG
20 | random_seed = 1234
21 | rng = np.random.default_rng(random_seed)
22 | 
23 | # Generate group labels and random effects basis
24 | num_observations = 1000
25 | num_basis = 2
26 | num_groups = 4
27 | group_labels = rng.choice(num_groups, size=num_observations)
28 | basis = np.empty((num_observations, num_basis))
29 | basis[:, 0] = 1.0
30 | if num_basis > 1:
31 |     basis[:, 1:] = rng.uniform(-1, 1, (num_observations, num_basis - 1))
32 | 
33 | 
34 | # Define the group rfx function
35 | def outcome_mean(group_labels, basis):
36 |     return np.where(
37 |         group_labels == 0,
38 |         0 - 1 * basis[:, 1],
39 |         np.where(
40 |             group_labels == 1,
41 |             4 + 1 * basis[:, 1],
42 |             np.where(group_labels == 2, 8 + 3 * basis[:, 1], 12 + 5 * basis[:, 1]),
43 |         ),
44 |     )
45 | 
46 | 
47 | # Generate outcome
48 | epsilon = rng.normal(0, 1, num_observations)
49 | rfx_term = outcome_mean(group_labels, basis)
50 | y = rfx_term + epsilon
51 | 
52 | # Standardize outcome
53 | y_bar = np.mean(y)
54 | y_std = np.std(y)
55 | resid = (y - y_bar) / y_std
56 | 
57 | # Construct python objects used for rfx sampling
58 | outcome = Residual(resid)
59 | rfx_dataset = RandomEffectsDataset()
60 | rfx_dataset.add_group_labels(group_labels)
61 | rfx_dataset.add_basis(basis)
62 | rfx_tracker = RandomEffectsTracker(group_labels)
63 | rfx_model = RandomEffectsModel(num_basis, num_groups)
64 | rfx_model.set_working_parameter(np.ones(num_basis))
65 | rfx_model.set_group_parameters(np.ones((num_basis, num_groups)))
66 | rfx_model.set_working_parameter_covariance(np.identity(num_basis))
67 | rfx_model.set_group_parameter_covariance(np.identity(num_basis))
68 | rfx_model.set_variance_prior_shape(1.0)
69 | rfx_model.set_variance_prior_scale(1.0)
70 | rfx_container = RandomEffectsContainer(num_basis, num_groups, rfx_tracker)
71 | # cpp_rng = RNG(random_seed)
72 | cpp_rng = RNG()
73 | 
74 | # Sample the model
75 | rfx_model.sample(rfx_dataset, outcome, rfx_tracker, rfx_container, True, 1.0, cpp_rng)
76 | 
77 | # Inspect the samples
78 | rfx_preds = rfx_container.predict(group_labels, basis) * y_std + y_bar
79 | rfx_comparison_df = pd.DataFrame(
80 |     np.concatenate((rfx_preds, np.expand_dims(rfx_term, axis=1)), axis=1),
81 |     columns=["Predicted", "Actual"],
82 | )
83 | sns.scatterplot(data=rfx_comparison_df, x="Predicted", y="Actual")
84 | plt.axline((0, 0), slope=1, color="black", linestyle=(0, (3, 3)))
85 | plt.show()
86 | 


--------------------------------------------------------------------------------
/demo/debug/rfx_serialization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from stochtree import BARTModel
 3 | 
 4 | # RNG
 5 | random_seed = 1234
 6 | rng = np.random.default_rng(random_seed)
 7 | 
 8 | # Generate covariates and basis
 9 | n = 1000
10 | p_X = 10
11 | p_W = 1
12 | X = rng.uniform(0, 1, (n, p_X))
13 | W = rng.uniform(0, 1, (n, p_W))
14 | 
15 | # Generate random effects terms
16 | num_basis = 2
17 | num_groups = 4
18 | group_labels = rng.choice(num_groups, size=n)
19 | basis = np.empty((n, num_basis))
20 | basis[:, 0] = 1.0
21 | if num_basis > 1:
22 |     basis[:, 1:] = rng.uniform(-1, 1, (n, num_basis - 1))
23 | 
24 | # Define the outcome mean function
25 | def outcome_mean(X, W):
26 |     return np.where(
27 |         (X[:,0] >= 0.0) & (X[:,0] < 0.25), -7.5 * W[:,0], 
28 |         np.where(
29 |             (X[:,0] >= 0.25) & (X[:,0] < 0.5), -2.5 * W[:,0], 
30 |             np.where(
31 |                 (X[:,0] >= 0.5) & (X[:,0] < 0.75), 2.5 * W[:,0], 
32 |                 7.5 * W[:,0]
33 |             )
34 |         )
35 |     )
36 | 
37 | # Define the group rfx function
38 | def rfx_mean(group_labels, basis):
39 |     return np.where(
40 |         group_labels == 0,
41 |         0 - 1 * basis[:, 1],
42 |         np.where(
43 |             group_labels == 1,
44 |             4 + 1 * basis[:, 1],
45 |             np.where(
46 |                 group_labels == 2, 8 + 3 * basis[:, 1], 12 + 5 * basis[:, 1]
47 |             ),
48 |         ),
49 |     )
50 | 
51 | # Generate outcome
52 | epsilon = rng.normal(0, 1, n)
53 | forest_term = outcome_mean(X, W)
54 | rfx_term = rfx_mean(group_labels, basis)
55 | y = forest_term + rfx_term + epsilon
56 | 
57 | # Run BART
58 | bart_orig = BARTModel()
59 | bart_orig.sample(X_train=X, y_train=y, leaf_basis_train=W, rfx_group_ids_train=group_labels, 
60 |                   rfx_basis_train=basis, num_gfr=10, num_mcmc=10)
61 | 
62 | # Extract predictions from the sampler
63 | y_hat_orig = bart_orig.predict(X, W, group_labels, basis)
64 | 
65 | # "Round-trip" the model to JSON string and back and check that the predictions agree
66 | bart_json_string = bart_orig.to_json()
67 | bart_reloaded = BARTModel()
68 | bart_reloaded.from_json(bart_json_string)
69 | y_hat_reloaded = bart_reloaded.predict(X, W, group_labels, basis)
70 | np.testing.assert_almost_equal(y_hat_orig, y_hat_reloaded)


--------------------------------------------------------------------------------
/include/stochtree/export.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Export macros ensure that the C++ code can be used as a library cross-platform 
 3 |  * (declspec needed to load names from a DLL on windows) and can be wrapped in a 
 4 |  * C program.
 5 |  * 
 6 |  * This code modifies (changing names of) the export macros in LightGBM, which carries 
 7 |  * the following copyright information:
 8 |  * 
 9 |  * Copyright (c) 2017 Microsoft Corporation. All rights reserved.
10 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
11 |  */
12 | #ifndef STOCHTREE_EXPORT_H_
13 | #define STOCHTREE_EXPORT_H_
14 | 
15 | /** Macros for exporting symbols in MSVC/GCC/CLANG **/
16 | 
17 | #ifdef __cplusplus
18 | #define STOCHTREE_EXTERN_C extern "C"
19 | #else
20 | #define STOCHTREE_EXTERN_C
21 | #endif
22 | 
23 | #ifdef _MSC_VER
24 | #define STOCHTREE_EXPORT __declspec(dllexport)
25 | #define STOCHTREE_C_EXPORT STOCHTREE_EXTERN_C __declspec(dllexport)
26 | #else
27 | #define STOCHTREE_EXPORT  __attribute__ ((visibility ("default")))
28 | #define STOCHTREE_C_EXPORT STOCHTREE_EXTERN_C  __attribute__ ((visibility ("default")))
29 | #endif
30 | 
31 | #endif /** STOCHTREE_EXPORT_H_ **/
32 | 


--------------------------------------------------------------------------------
/include/stochtree/gamma_sampler.h:
--------------------------------------------------------------------------------
 1 | /*! Copyright (c) 2024 stochtree authors. All rights reserved. */
 2 | #ifndef STOCHTREE_GAMMA_SAMPLER_H_
 3 | #define STOCHTREE_GAMMA_SAMPLER_H_
 4 | 
 5 | #include <random>
 6 | 
 7 | namespace StochTree {
 8 | 
 9 | class GammaSampler {
10 |  public:
11 |   GammaSampler() {}
12 |   ~GammaSampler() {}
13 |   double Sample(double a, double b, std::mt19937& gen, bool rate_param = true) {
14 |     double scale = rate_param ? 1./b : b;
15 |     gamma_dist_ = std::gamma_distribution<double>(a, scale);
16 |     return gamma_dist_(gen);
17 |   }
18 |  private:
19 |   /*! \brief Standard normal distribution */
20 |   std::gamma_distribution<double> gamma_dist_;
21 | };
22 | 
23 | } // namespace StochTree
24 | 
25 | #endif // STOCHTREE_IG_SAMPLER_H_


--------------------------------------------------------------------------------
/include/stochtree/ig_sampler.h:
--------------------------------------------------------------------------------
 1 | /*! Copyright (c) 2024 stochtree authors. All rights reserved. */
 2 | #ifndef STOCHTREE_IG_SAMPLER_H_
 3 | #define STOCHTREE_IG_SAMPLER_H_
 4 | 
 5 | #include <random>
 6 | 
 7 | namespace StochTree {
 8 | 
 9 | class InverseGammaSampler {
10 |  public:
11 |   InverseGammaSampler() {}
12 |   ~InverseGammaSampler() {}
13 |   double Sample(double a, double b, std::mt19937& gen, bool scale_param = true) {
14 |     // C++ standard library provides a gamma distribution with scale
15 |     // parameter, but the correspondence between gamma and IG is that 
16 |     // 1 / gamma(a,b) ~ IG(a,b) when b is a __rate__ parameter.
17 |     // Before sampling, we convert ig_scale to a gamma scale parameter by 
18 |     // taking its multiplicative inverse.
19 |     double gamma_scale = scale_param ? 1./b : b;
20 |     gamma_dist_ = std::gamma_distribution<double>(a, gamma_scale);
21 |     return (1/gamma_dist_(gen));
22 |   }
23 |  private:
24 |   /*! \brief Standard normal distribution */
25 |   std::gamma_distribution<double> gamma_dist_;
26 | };
27 | 
28 | } // namespace StochTree
29 | 
30 | #endif // STOCHTREE_IG_SAMPLER_H_


--------------------------------------------------------------------------------
/include/stochtree/normal_sampler.h:
--------------------------------------------------------------------------------
 1 | /*! Copyright (c) 2024 stochtree authors. All rights reserved. */
 2 | #ifndef STOCHTREE_NORMAL_SAMPLER_H_
 3 | #define STOCHTREE_NORMAL_SAMPLER_H_
 4 | 
 5 | #include <Eigen/Dense>
 6 | #include <stochtree/log.h>
 7 | #include <random>
 8 | #include <vector>
 9 | 
10 | namespace StochTree {
11 | 
12 | class UnivariateNormalSampler {
13 |  public:
14 |   UnivariateNormalSampler() {std_normal_dist_ = std::normal_distribution<double>(0.,1.);}
15 |   ~UnivariateNormalSampler() {}
16 |   double Sample(double mean, double variance, std::mt19937& gen) {
17 |     return mean + std::sqrt(variance) * std_normal_dist_(gen);
18 |   }
19 |  private:
20 |   /*! \brief Standard normal distribution */
21 |   std::normal_distribution<double> std_normal_dist_;
22 | };
23 | 
24 | class MultivariateNormalSampler {
25 |  public:
26 |   MultivariateNormalSampler() {std_normal_dist_ = std::normal_distribution<double>(0.,1.);}
27 |   ~MultivariateNormalSampler() {}
28 |   std::vector<double> Sample(Eigen::VectorXd& mean, Eigen::MatrixXd& covariance, std::mt19937& gen) {
29 |     // Dimension extraction and checks
30 |     int mean_cols = mean.size();
31 |     int cov_rows = covariance.rows();
32 |     int cov_cols = covariance.cols();
33 |     CHECK_EQ(mean_cols, cov_cols);
34 |     
35 |     // Variance cholesky decomposition
36 |     Eigen::LLT<Eigen::MatrixXd> decomposition(covariance);
37 |     Eigen::MatrixXd covariance_chol = decomposition.matrixL();
38 | 
39 |     // Sample a vector of standard normal random variables
40 |     Eigen::VectorXd std_norm_vec(cov_rows);
41 |     for (int i = 0; i < cov_rows; i++) {
42 |       std_norm_vec(i) = std_normal_dist_(gen);
43 |     }
44 | 
45 |     // Compute and return the sampled value
46 |     Eigen::VectorXd sampled_values_raw = mean + covariance_chol * std_norm_vec;
47 |     std::vector<double> result(cov_rows);
48 |     for (int i = 0; i < cov_rows; i++) {
49 |       result[i] = sampled_values_raw(i, 0);
50 |     }
51 |     return result;
52 |   }
53 |   Eigen::VectorXd SampleEigen(Eigen::VectorXd& mean, Eigen::MatrixXd& covariance, std::mt19937& gen) {
54 |     // Dimension extraction and checks
55 |     int mean_cols = mean.size();
56 |     int cov_rows = covariance.rows();
57 |     int cov_cols = covariance.cols();
58 |     CHECK_EQ(mean_cols, cov_cols);
59 |     
60 |     // Variance cholesky decomposition
61 |     Eigen::LLT<Eigen::MatrixXd> decomposition(covariance);
62 |     Eigen::MatrixXd covariance_chol = decomposition.matrixL();
63 | 
64 |     // Sample a vector of standard normal random variables
65 |     Eigen::VectorXd std_norm_vec(cov_rows);
66 |     for (int i = 0; i < cov_rows; i++) {
67 |       std_norm_vec(i) = std_normal_dist_(gen);
68 |     }
69 | 
70 |     // Compute and return the sampled value
71 |     return mean + covariance_chol * std_norm_vec;
72 |   }
73 |  private:
74 |   /*! \brief Standard normal distribution */
75 |   std::normal_distribution<double> std_normal_dist_;
76 | };
77 | 
78 | } // namespace StochTree
79 | 
80 | #endif // STOCHTREE_NORMAL_SAMPLER_H_


--------------------------------------------------------------------------------
/include/stochtree/prior.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2024 stochtree authors. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | #ifndef STOCHTREE_PRIOR_H_
 6 | #define STOCHTREE_PRIOR_H_
 7 | 
 8 | #include <Eigen/Dense>
 9 | #include <stochtree/log.h>
10 | 
11 | namespace StochTree {
12 | 
13 | class RandomEffectsGaussianPrior {
14 |  public:
15 |   RandomEffectsGaussianPrior() {}
16 |   virtual ~RandomEffectsGaussianPrior() = default;
17 | };
18 | 
19 | class RandomEffectsRegressionGaussianPrior : public RandomEffectsGaussianPrior {
20 |  public:
21 |   RandomEffectsRegressionGaussianPrior(double a, double b, int32_t num_components, int32_t num_groups) {
22 |     a_ = a;
23 |     b_ = b;
24 |     num_components_ = num_components;
25 |     num_groups_ = num_groups;
26 |   }
27 |   ~RandomEffectsRegressionGaussianPrior() {}
28 |   double GetPriorVarianceShape() {return a_;}
29 |   double GetPriorVarianceScale() {return b_;}
30 |   int32_t GetNumComponents() {return num_components_;}
31 |   int32_t GetNumGroups() {return num_groups_;}
32 |   void SetPriorVarianceShape(double a) {a_ = a;}
33 |   void SetPriorVarianceScale(double b) {b_ = b;}
34 |   void SetNumComponents(int32_t num_components) {num_components_ = num_components;}
35 |   void SetNumGroups(int32_t num_groups) {num_groups_ = num_groups;}
36 |  private:
37 |   double a_;
38 |   double b_;
39 |   int32_t num_components_; 
40 |   int32_t num_groups_;
41 | };
42 | 
43 | class TreePrior {
44 |  public:
45 |   TreePrior(double alpha, double beta, int32_t min_samples_in_leaf, int32_t max_depth = -1) {
46 |     alpha_ = alpha;
47 |     beta_ = beta;
48 |     min_samples_in_leaf_ = min_samples_in_leaf;
49 |     max_depth_ = max_depth;
50 |   }
51 |   ~TreePrior() {}
52 |   double GetAlpha() {return alpha_;}
53 |   double GetBeta() {return beta_;}
54 |   int32_t GetMinSamplesLeaf() {return min_samples_in_leaf_;}
55 |   int32_t GetMaxDepth() {return max_depth_;}
56 |   void SetAlpha(double alpha) {alpha_ = alpha;}
57 |   void SetBeta(double beta) {beta_ = beta;}
58 |   void SetMinSamplesLeaf(int32_t min_samples_in_leaf) {min_samples_in_leaf_ = min_samples_in_leaf;}
59 |   void SetMaxDepth(int32_t max_depth) {max_depth_ = max_depth;}
60 |  private:
61 |   double alpha_;
62 |   double beta_;
63 |   int32_t min_samples_in_leaf_;
64 |   int32_t max_depth_;
65 | };
66 | 
67 | class IGVariancePrior {
68 |  public:
69 |   IGVariancePrior(double shape, double scale) {
70 |     shape_ = shape;
71 |     scale_ = scale;
72 |   }
73 |   ~IGVariancePrior() {}
74 |   double GetShape() {return shape_;}
75 |   double GetScale() {return scale_;}
76 |   void SetShape(double shape) {shape_ = shape;}
77 |   void SetScale(double scale) {scale_ = scale;}
78 |  private:
79 |   double shape_;
80 |   double scale_;
81 | };
82 | 
83 | } // namespace StochTree
84 | 
85 | #endif // STOCHTREE_PRIOR_H_


--------------------------------------------------------------------------------
/man/CppRNG.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model.R
 3 | \name{CppRNG}
 4 | \alias{CppRNG}
 5 | \title{Class that wraps a C++ random number generator (for reproducibility)}
 6 | \description{
 7 | Persists a C++ random number generator throughout an R session to
 8 | ensure reproducibility from a given random seed. If no seed is provided,
 9 | the C++ random number generator is initialized using \code{std::random_device}.
10 | }
11 | \section{Public fields}{
12 | \if{html}{\out{<div class="r6-fields">}}
13 | \describe{
14 | \item{\code{rng_ptr}}{External pointer to a C++ std::mt19937 class}
15 | }
16 | \if{html}{\out{</div>}}
17 | }
18 | \section{Methods}{
19 | \subsection{Public methods}{
20 | \itemize{
21 | \item \href{#method-CppRNG-new}{\code{CppRNG$new()}}
22 | }
23 | }
24 | \if{html}{\out{<hr>}}
25 | \if{html}{\out{<a id="method-CppRNG-new"></a>}}
26 | \if{latex}{\out{\hypertarget{method-CppRNG-new}{}}}
27 | \subsection{Method \code{new()}}{
28 | Create a new CppRNG object.
29 | \subsection{Usage}{
30 | \if{html}{\out{<div class="r">}}\preformatted{CppRNG$new(random_seed = -1)}\if{html}{\out{</div>}}
31 | }
32 | 
33 | \subsection{Arguments}{
34 | \if{html}{\out{<div class="arguments">}}
35 | \describe{
36 | \item{\code{random_seed}}{(Optional) random seed for sampling}
37 | }
38 | \if{html}{\out{</div>}}
39 | }
40 | \subsection{Returns}{
41 | A new \code{CppRNG} object.
42 | }
43 | }
44 | }
45 | 


--------------------------------------------------------------------------------
/man/GlobalModelConfig.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/config.R
 3 | \name{GlobalModelConfig}
 4 | \alias{GlobalModelConfig}
 5 | \title{Object used to get / set global parameters and other global model
 6 | configuration options in the "low-level" stochtree interface}
 7 | \value{
 8 | Global error variance parameter
 9 | }
10 | \description{
11 | The "low-level" stochtree interface enables a high degreee of sampler
12 | customization, in which users employ R wrappers around C++ objects
13 | like ForestDataset, Outcome, CppRng, and ForestModel to run the
14 | Gibbs sampler of a BART model with custom modifications.
15 | GlobalModelConfig allows users to specify / query the global parameters
16 | of a model they wish to run.
17 | }
18 | \section{Public fields}{
19 | \if{html}{\out{<div class="r6-fields">}}
20 | \describe{
21 | \item{\code{global_error_variance}}{Global error variance parameter
22 | Create a new GlobalModelConfig object.}
23 | }
24 | \if{html}{\out{</div>}}
25 | }
26 | \section{Methods}{
27 | \subsection{Public methods}{
28 | \itemize{
29 | \item \href{#method-GlobalModelConfig-new}{\code{GlobalModelConfig$new()}}
30 | \item \href{#method-GlobalModelConfig-update_global_error_variance}{\code{GlobalModelConfig$update_global_error_variance()}}
31 | \item \href{#method-GlobalModelConfig-get_global_error_variance}{\code{GlobalModelConfig$get_global_error_variance()}}
32 | }
33 | }
34 | \if{html}{\out{<hr>}}
35 | \if{html}{\out{<a id="method-GlobalModelConfig-new"></a>}}
36 | \if{latex}{\out{\hypertarget{method-GlobalModelConfig-new}{}}}
37 | \subsection{Method \code{new()}}{
38 | \subsection{Usage}{
39 | \if{html}{\out{<div class="r">}}\preformatted{GlobalModelConfig$new(global_error_variance = 1)}\if{html}{\out{</div>}}
40 | }
41 | 
42 | \subsection{Arguments}{
43 | \if{html}{\out{<div class="arguments">}}
44 | \describe{
45 | \item{\code{global_error_variance}}{Global error variance parameter (default: \code{1.0})}
46 | }
47 | \if{html}{\out{</div>}}
48 | }
49 | \subsection{Returns}{
50 | A new GlobalModelConfig object.
51 | }
52 | }
53 | \if{html}{\out{<hr>}}
54 | \if{html}{\out{<a id="method-GlobalModelConfig-update_global_error_variance"></a>}}
55 | \if{latex}{\out{\hypertarget{method-GlobalModelConfig-update_global_error_variance}{}}}
56 | \subsection{Method \code{update_global_error_variance()}}{
57 | Update global error variance parameter
58 | \subsection{Usage}{
59 | \if{html}{\out{<div class="r">}}\preformatted{GlobalModelConfig$update_global_error_variance(global_error_variance)}\if{html}{\out{</div>}}
60 | }
61 | 
62 | \subsection{Arguments}{
63 | \if{html}{\out{<div class="arguments">}}
64 | \describe{
65 | \item{\code{global_error_variance}}{Global error variance parameter}
66 | }
67 | \if{html}{\out{</div>}}
68 | }
69 | }
70 | \if{html}{\out{<hr>}}
71 | \if{html}{\out{<a id="method-GlobalModelConfig-get_global_error_variance"></a>}}
72 | \if{latex}{\out{\hypertarget{method-GlobalModelConfig-get_global_error_variance}{}}}
73 | \subsection{Method \code{get_global_error_variance()}}{
74 | Query global error variance parameter for this GlobalModelConfig object
75 | \subsection{Usage}{
76 | \if{html}{\out{<div class="r">}}\preformatted{GlobalModelConfig$get_global_error_variance()}\if{html}{\out{</div>}}
77 | }
78 | 
79 | }
80 | }
81 | 


--------------------------------------------------------------------------------
/man/RandomEffectsTracker.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/random_effects.R
 3 | \name{RandomEffectsTracker}
 4 | \alias{RandomEffectsTracker}
 5 | \title{Class that defines a "tracker" for random effects models, most notably
 6 | storing the data indices available in each group for quicker posterior
 7 | computation and sampling of random effects terms.}
 8 | \description{
 9 | Stores a mapping from every observation to its group index, a mapping
10 | from group indices to the training sample observations available in that
11 | group, and predictions for each observation.
12 | }
13 | \section{Public fields}{
14 | \if{html}{\out{<div class="r6-fields">}}
15 | \describe{
16 | \item{\code{rfx_tracker_ptr}}{External pointer to a C++ StochTree::RandomEffectsTracker class}
17 | }
18 | \if{html}{\out{</div>}}
19 | }
20 | \section{Methods}{
21 | \subsection{Public methods}{
22 | \itemize{
23 | \item \href{#method-RandomEffectsTracker-new}{\code{RandomEffectsTracker$new()}}
24 | }
25 | }
26 | \if{html}{\out{<hr>}}
27 | \if{html}{\out{<a id="method-RandomEffectsTracker-new"></a>}}
28 | \if{latex}{\out{\hypertarget{method-RandomEffectsTracker-new}{}}}
29 | \subsection{Method \code{new()}}{
30 | Create a new RandomEffectsTracker object.
31 | \subsection{Usage}{
32 | \if{html}{\out{<div class="r">}}\preformatted{RandomEffectsTracker$new(rfx_group_indices)}\if{html}{\out{</div>}}
33 | }
34 | 
35 | \subsection{Arguments}{
36 | \if{html}{\out{<div class="arguments">}}
37 | \describe{
38 | \item{\code{rfx_group_indices}}{Integer indices indicating groups used to define random effects}
39 | }
40 | \if{html}{\out{</div>}}
41 | }
42 | \subsection{Returns}{
43 | A new \code{RandomEffectsTracker} object.
44 | }
45 | }
46 | }
47 | 


--------------------------------------------------------------------------------
/man/calibrateInverseGammaErrorVariance.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calibration.R
 3 | \name{calibrateInverseGammaErrorVariance}
 4 | \alias{calibrateInverseGammaErrorVariance}
 5 | \title{Calibrate the scale parameter on an inverse gamma prior for the global error variance as in Chipman et al (2022)}
 6 | \usage{
 7 | calibrateInverseGammaErrorVariance(
 8 |   y,
 9 |   X,
10 |   W = NULL,
11 |   nu = 3,
12 |   quant = 0.9,
13 |   standardize = TRUE
14 | )
15 | }
16 | \arguments{
17 | \item{y}{Outcome to be modeled using BART, BCF or another nonparametric ensemble method.}
18 | 
19 | \item{X}{Covariates to be used to partition trees in an ensemble or series of ensemble.}
20 | 
21 | \item{W}{(Optional) Basis used to define a "leaf regression" model for each decision tree. The "classic" BART model assumes a constant leaf parameter, which is equivalent to a "leaf regression" on a basis of all ones, though it is not necessary to pass a vector of ones, here or to the BART function. Default: \code{NULL}.}
22 | 
23 | \item{nu}{The shape parameter for the global error variance's IG prior. The scale parameter in the Sparapani et al (2021) parameterization is defined as \code{nu*lambda} where \code{lambda} is the output of this function. Default: \code{3}.}
24 | 
25 | \item{quant}{(Optional) Quantile of the inverse gamma prior distribution represented by a linear-regression-based overestimate of \code{sigma^2}. Default: \code{0.9}.}
26 | 
27 | \item{standardize}{(Optional) Whether or not outcome should be standardized (\code{(y-mean(y))/sd(y)}) before calibration of \code{lambda}. Default: \code{TRUE}.}
28 | }
29 | \value{
30 | Value of \code{lambda} which determines the scale parameter of the global error variance prior (\code{sigma^2 ~ IG(nu,nu*lambda)})
31 | }
32 | \description{
33 | Chipman, H., George, E., Hahn, R., McCulloch, R., Pratola, M. and Sparapani, R. (2022). Bayesian Additive Regression Trees, Computational Approaches. In Wiley StatsRef: Statistics Reference Online (eds N. Balakrishnan, T. Colton, B. Everitt, W. Piegorsch, F. Ruggeri and J.L. Teugels). https://doi.org/10.1002/9781118445112.stat08288
34 | }
35 | \examples{
36 | n <- 100
37 | p <- 5
38 | X <- matrix(runif(n*p), ncol = p)
39 | y <- 10*X[,1] - 20*X[,2] + rnorm(n)
40 | nu <- 3
41 | lambda <- calibrateInverseGammaErrorVariance(y, X, nu = nu)
42 | sigma2hat <- mean(resid(lm(y~X))^2)
43 | mean(var(y)/rgamma(100000, nu, rate = nu*lambda) < sigma2hat)
44 | }
45 | 


--------------------------------------------------------------------------------
/man/computeForestLeafIndices.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/kernel.R
 3 | \name{computeForestLeafIndices}
 4 | \alias{computeForestLeafIndices}
 5 | \title{Compute vector of forest leaf indices}
 6 | \usage{
 7 | computeForestLeafIndices(
 8 |   model_object,
 9 |   covariates,
10 |   forest_type = NULL,
11 |   propensity = NULL,
12 |   forest_inds = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{model_object}{Object of type \code{bartmodel}, \code{bcfmodel}, or \code{ForestSamples} corresponding to a BART / BCF model with at least one forest sample, or a low-level \code{ForestSamples} object.}
17 | 
18 | \item{covariates}{Covariates to use for prediction. Must have the same dimensions / column types as the data used to train a forest.}
19 | 
20 | \item{forest_type}{Which forest to use from \code{model_object}.
21 | Valid inputs depend on the model type, and whether or not a given forest was sampled in that model.
22 | 
23 | \strong{1. BART}
24 | \itemize{
25 | \item \code{'mean'}: Extracts leaf indices for the mean forest
26 | \item \code{'variance'}: Extracts leaf indices for the variance forest
27 | }
28 | 
29 | \strong{2. BCF}
30 | \itemize{
31 | \item \code{'prognostic'}: Extracts leaf indices for the prognostic forest
32 | \item \code{'treatment'}: Extracts leaf indices for the treatment effect forest
33 | \item \code{'variance'}: Extracts leaf indices for the variance forest
34 | }
35 | 
36 | \strong{3. ForestSamples}
37 | \itemize{
38 | \item \code{NULL}: It is not necessary to disambiguate when this function is called directly on a \code{ForestSamples} object. This is the default value of this
39 | }}
40 | 
41 | \item{propensity}{(Optional) Propensities used for prediction (BCF-only).}
42 | 
43 | \item{forest_inds}{(Optional) Indices of the forest sample(s) for which to compute leaf indices. If not provided,
44 | this function will return leaf indices for every sample of a forest.
45 | This function uses 0-indexing, so the first forest sample corresponds to \code{forest_num = 0}, and so on.}
46 | }
47 | \value{
48 | Vector of size \code{num_obs * num_trees}, where \code{num_obs = nrow(covariates)}
49 | and \code{num_trees} is the number of trees in the relevant forest of \code{model_object}.
50 | }
51 | \description{
52 | Compute and return a vector representation of a forest's leaf predictions for
53 | every observation in a dataset.
54 | 
55 | The vector has a "row-major" format that can be easily re-represented as
56 | as a CSR sparse matrix: elements are organized so that the first \code{n} elements
57 | correspond to leaf predictions for all \code{n} observations in a dataset for the
58 | first tree in an ensemble, the next \code{n} elements correspond to predictions for
59 | the second tree and so on. The "data" for each element corresponds to a uniquely
60 | mapped column index that corresponds to a single leaf of a single tree (i.e.
61 | if tree 1 has 3 leaves, its column indices range from 0 to 2, and then tree 2's
62 | leaf indices begin at 3, etc...).
63 | }
64 | \examples{
65 | X <- matrix(runif(10*100), ncol = 10)
66 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100)
67 | bart_model <- bart(X, y, num_gfr=0, num_mcmc=10)
68 | computeForestLeafIndices(bart_model, X, "mean")
69 | computeForestLeafIndices(bart_model, X, "mean", 0)
70 | computeForestLeafIndices(bart_model, X, "mean", c(1,3,9))
71 | }
72 | 


--------------------------------------------------------------------------------
/man/computeForestLeafVariances.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/kernel.R
 3 | \name{computeForestLeafVariances}
 4 | \alias{computeForestLeafVariances}
 5 | \title{Compute vector of forest leaf scale parameters}
 6 | \usage{
 7 | computeForestLeafVariances(model_object, forest_type, forest_inds = NULL)
 8 | }
 9 | \arguments{
10 | \item{model_object}{Object of type \code{bartmodel} or \code{bcfmodel} corresponding to a BART / BCF model with at least one forest sample}
11 | 
12 | \item{forest_type}{Which forest to use from \code{model_object}.
13 | Valid inputs depend on the model type, and whether or not a given forest was sampled in that model.
14 | 
15 | \strong{1. BART}
16 | \itemize{
17 | \item \code{'mean'}: Extracts leaf indices for the mean forest
18 | \item \code{'variance'}: Extracts leaf indices for the variance forest
19 | }
20 | 
21 | \strong{2. BCF}
22 | \itemize{
23 | \item \code{'prognostic'}: Extracts leaf indices for the prognostic forest
24 | \item \code{'treatment'}: Extracts leaf indices for the treatment effect forest
25 | \item \code{'variance'}: Extracts leaf indices for the variance forest
26 | }}
27 | 
28 | \item{forest_inds}{(Optional) Indices of the forest sample(s) for which to compute leaf indices. If not provided,
29 | this function will return leaf indices for every sample of a forest.
30 | This function uses 0-indexing, so the first forest sample corresponds to \code{forest_num = 0}, and so on.}
31 | }
32 | \value{
33 | Vector of size \code{length(forest_inds)} with the leaf scale parameter for each requested forest.
34 | }
35 | \description{
36 | Return each forest's leaf node scale parameters.
37 | 
38 | If leaf scale is not sampled for the forest in question, throws an error that the
39 | leaf model does not have a stochastic scale parameter.
40 | }
41 | \examples{
42 | X <- matrix(runif(10*100), ncol = 10)
43 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100)
44 | bart_model <- bart(X, y, num_gfr=0, num_mcmc=10)
45 | computeForestLeafVariances(bart_model, "mean")
46 | computeForestLeafVariances(bart_model, "mean", 0)
47 | computeForestLeafVariances(bart_model, "mean", c(1,3,5))
48 | }
49 | 


--------------------------------------------------------------------------------
/man/computeForestMaxLeafIndex.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/kernel.R
 3 | \name{computeForestMaxLeafIndex}
 4 | \alias{computeForestMaxLeafIndex}
 5 | \title{Compute and return the largest possible leaf index computable by \code{computeForestLeafIndices} for the forests in a designated forest sample container.}
 6 | \usage{
 7 | computeForestMaxLeafIndex(model_object, forest_type = NULL, forest_inds = NULL)
 8 | }
 9 | \arguments{
10 | \item{model_object}{Object of type \code{bartmodel}, \code{bcfmodel}, or \code{ForestSamples} corresponding to a BART / BCF model with at least one forest sample, or a low-level \code{ForestSamples} object.}
11 | 
12 | \item{forest_type}{Which forest to use from \code{model_object}.
13 | Valid inputs depend on the model type, and whether or not a
14 | 
15 | \strong{1. BART}
16 | \itemize{
17 | \item \code{'mean'}: Extracts leaf indices for the mean forest
18 | \item \code{'variance'}: Extracts leaf indices for the variance forest
19 | }
20 | 
21 | \strong{2. BCF}
22 | \itemize{
23 | \item \code{'prognostic'}: Extracts leaf indices for the prognostic forest
24 | \item \code{'treatment'}: Extracts leaf indices for the treatment effect forest
25 | \item \code{'variance'}: Extracts leaf indices for the variance forest
26 | }
27 | 
28 | \strong{3. ForestSamples}
29 | \itemize{
30 | \item \code{NULL}: It is not necessary to disambiguate when this function is called directly on a \code{ForestSamples} object. This is the default value of this
31 | }}
32 | 
33 | \item{forest_inds}{(Optional) Indices of the forest sample(s) for which to compute max leaf indices. If not provided,
34 | this function will return max leaf indices for every sample of a forest.
35 | This function uses 0-indexing, so the first forest sample corresponds to \code{forest_num = 0}, and so on.}
36 | }
37 | \value{
38 | Vector containing the largest possible leaf index computable by \code{computeForestLeafIndices} for the forests in a designated forest sample container.
39 | }
40 | \description{
41 | Compute and return the largest possible leaf index computable by \code{computeForestLeafIndices} for the forests in a designated forest sample container.
42 | }
43 | \examples{
44 | X <- matrix(runif(10*100), ncol = 10)
45 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100)
46 | bart_model <- bart(X, y, num_gfr=0, num_mcmc=10)
47 | computeForestMaxLeafIndex(bart_model, "mean")
48 | computeForestMaxLeafIndex(bart_model, "mean", 0)
49 | computeForestMaxLeafIndex(bart_model, "mean", c(1,3,9))
50 | }
51 | 


--------------------------------------------------------------------------------
/man/convertPreprocessorToJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{convertPreprocessorToJson}
 4 | \alias{convertPreprocessorToJson}
 5 | \title{Convert the persistent aspects of a covariate preprocessor to (in-memory) C++ JSON object}
 6 | \usage{
 7 | convertPreprocessorToJson(object)
 8 | }
 9 | \arguments{
10 | \item{object}{List containing information on variables, including train set
11 | categories for categorical variables}
12 | }
13 | \value{
14 | wrapper around in-memory C++ JSON object
15 | }
16 | \description{
17 | Convert the persistent aspects of a covariate preprocessor to (in-memory) C++ JSON object
18 | }
19 | \examples{
20 | cov_mat <- matrix(1:12, ncol = 3)
21 | preprocess_list <- preprocessTrainData(cov_mat)
22 | preprocessor_json <- convertPreprocessorToJson(preprocess_list$metadata)
23 | }
24 | 


--------------------------------------------------------------------------------
/man/createBARTModelFromCombinedJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bart.R
 3 | \name{createBARTModelFromCombinedJson}
 4 | \alias{createBARTModelFromCombinedJson}
 5 | \title{Convert a list of (in-memory) JSON representations of a BART model to a single combined BART model object
 6 | which can be used for prediction, etc...}
 7 | \usage{
 8 | createBARTModelFromCombinedJson(json_object_list)
 9 | }
10 | \arguments{
11 | \item{json_object_list}{List of objects of type \code{CppJson} containing Json representation of a BART model}
12 | }
13 | \value{
14 | Object of type \code{bartmodel}
15 | }
16 | \description{
17 | Convert a list of (in-memory) JSON representations of a BART model to a single combined BART model object
18 | which can be used for prediction, etc...
19 | }
20 | \examples{
21 | n <- 100
22 | p <- 5
23 | X <- matrix(runif(n*p), ncol = p)
24 | f_XW <- (
25 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
26 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
27 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
28 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
29 | )
30 | noise_sd <- 1
31 | y <- f_XW + rnorm(n, 0, noise_sd)
32 | test_set_pct <- 0.2
33 | n_test <- round(test_set_pct*n)
34 | n_train <- n - n_test
35 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
36 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
37 | X_test <- X[test_inds,]
38 | X_train <- X[train_inds,]
39 | y_test <- y[test_inds]
40 | y_train <- y[train_inds]
41 | bart_model <- bart(X_train = X_train, y_train = y_train, 
42 |                    num_gfr = 10, num_burnin = 0, num_mcmc = 10)
43 | bart_json <- list(saveBARTModelToJson(bart_model))
44 | bart_model_roundtrip <- createBARTModelFromCombinedJson(bart_json)
45 | }
46 | 


--------------------------------------------------------------------------------
/man/createBARTModelFromCombinedJsonString.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bart.R
 3 | \name{createBARTModelFromCombinedJsonString}
 4 | \alias{createBARTModelFromCombinedJsonString}
 5 | \title{Convert a list of (in-memory) JSON strings that represent BART models to a single combined BART model object
 6 | which can be used for prediction, etc...}
 7 | \usage{
 8 | createBARTModelFromCombinedJsonString(json_string_list)
 9 | }
10 | \arguments{
11 | \item{json_string_list}{List of JSON strings which can be parsed to objects of type \code{CppJson} containing Json representation of a BART model}
12 | }
13 | \value{
14 | Object of type \code{bartmodel}
15 | }
16 | \description{
17 | Convert a list of (in-memory) JSON strings that represent BART models to a single combined BART model object
18 | which can be used for prediction, etc...
19 | }
20 | \examples{
21 | n <- 100
22 | p <- 5
23 | X <- matrix(runif(n*p), ncol = p)
24 | f_XW <- (
25 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
26 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
27 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
28 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
29 | )
30 | noise_sd <- 1
31 | y <- f_XW + rnorm(n, 0, noise_sd)
32 | test_set_pct <- 0.2
33 | n_test <- round(test_set_pct*n)
34 | n_train <- n - n_test
35 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
36 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
37 | X_test <- X[test_inds,]
38 | X_train <- X[train_inds,]
39 | y_test <- y[test_inds]
40 | y_train <- y[train_inds]
41 | bart_model <- bart(X_train = X_train, y_train = y_train, 
42 |                    num_gfr = 10, num_burnin = 0, num_mcmc = 10)
43 | bart_json_string_list <- list(saveBARTModelToJsonString(bart_model))
44 | bart_model_roundtrip <- createBARTModelFromCombinedJsonString(bart_json_string_list)
45 | }
46 | 


--------------------------------------------------------------------------------
/man/createBARTModelFromJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bart.R
 3 | \name{createBARTModelFromJson}
 4 | \alias{createBARTModelFromJson}
 5 | \title{Convert an (in-memory) JSON representation of a BART model to a BART model object
 6 | which can be used for prediction, etc...}
 7 | \usage{
 8 | createBARTModelFromJson(json_object)
 9 | }
10 | \arguments{
11 | \item{json_object}{Object of type \code{CppJson} containing Json representation of a BART model}
12 | }
13 | \value{
14 | Object of type \code{bartmodel}
15 | }
16 | \description{
17 | Convert an (in-memory) JSON representation of a BART model to a BART model object
18 | which can be used for prediction, etc...
19 | }
20 | \examples{
21 | n <- 100
22 | p <- 5
23 | X <- matrix(runif(n*p), ncol = p)
24 | f_XW <- (
25 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
26 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
27 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
28 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
29 | )
30 | noise_sd <- 1
31 | y <- f_XW + rnorm(n, 0, noise_sd)
32 | test_set_pct <- 0.2
33 | n_test <- round(test_set_pct*n)
34 | n_train <- n - n_test
35 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
36 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
37 | X_test <- X[test_inds,]
38 | X_train <- X[train_inds,]
39 | y_test <- y[test_inds]
40 | y_train <- y[train_inds]
41 | bart_model <- bart(X_train = X_train, y_train = y_train, 
42 |                    num_gfr = 10, num_burnin = 0, num_mcmc = 10)
43 | bart_json <- saveBARTModelToJson(bart_model)
44 | bart_model_roundtrip <- createBARTModelFromJson(bart_json)
45 | }
46 | 


--------------------------------------------------------------------------------
/man/createBARTModelFromJsonFile.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bart.R
 3 | \name{createBARTModelFromJsonFile}
 4 | \alias{createBARTModelFromJsonFile}
 5 | \title{Convert a JSON file containing sample information on a trained BART model
 6 | to a BART model object which can be used for prediction, etc...}
 7 | \usage{
 8 | createBARTModelFromJsonFile(json_filename)
 9 | }
10 | \arguments{
11 | \item{json_filename}{String of filepath, must end in ".json"}
12 | }
13 | \value{
14 | Object of type \code{bartmodel}
15 | }
16 | \description{
17 | Convert a JSON file containing sample information on a trained BART model
18 | to a BART model object which can be used for prediction, etc...
19 | }
20 | \examples{
21 | n <- 100
22 | p <- 5
23 | X <- matrix(runif(n*p), ncol = p)
24 | f_XW <- (
25 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
26 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
27 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
28 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
29 | )
30 | noise_sd <- 1
31 | y <- f_XW + rnorm(n, 0, noise_sd)
32 | test_set_pct <- 0.2
33 | n_test <- round(test_set_pct*n)
34 | n_train <- n - n_test
35 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
36 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
37 | X_test <- X[test_inds,]
38 | X_train <- X[train_inds,]
39 | y_test <- y[test_inds]
40 | y_train <- y[train_inds]
41 | bart_model <- bart(X_train = X_train, y_train = y_train, 
42 |                    num_gfr = 10, num_burnin = 0, num_mcmc = 10)
43 | tmpjson <- tempfile(fileext = ".json")
44 | saveBARTModelToJsonFile(bart_model, file.path(tmpjson))
45 | bart_model_roundtrip <- createBARTModelFromJsonFile(file.path(tmpjson))
46 | unlink(tmpjson)
47 | }
48 | 


--------------------------------------------------------------------------------
/man/createBARTModelFromJsonString.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bart.R
 3 | \name{createBARTModelFromJsonString}
 4 | \alias{createBARTModelFromJsonString}
 5 | \title{Convert a JSON string containing sample information on a trained BART model
 6 | to a BART model object which can be used for prediction, etc...}
 7 | \usage{
 8 | createBARTModelFromJsonString(json_string)
 9 | }
10 | \arguments{
11 | \item{json_string}{JSON string dump}
12 | }
13 | \value{
14 | Object of type \code{bartmodel}
15 | }
16 | \description{
17 | Convert a JSON string containing sample information on a trained BART model
18 | to a BART model object which can be used for prediction, etc...
19 | }
20 | \examples{
21 | n <- 100
22 | p <- 5
23 | X <- matrix(runif(n*p), ncol = p)
24 | f_XW <- (
25 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
26 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
27 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
28 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
29 | )
30 | noise_sd <- 1
31 | y <- f_XW + rnorm(n, 0, noise_sd)
32 | test_set_pct <- 0.2
33 | n_test <- round(test_set_pct*n)
34 | n_train <- n - n_test
35 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
36 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
37 | X_test <- X[test_inds,]
38 | X_train <- X[train_inds,]
39 | y_test <- y[test_inds]
40 | y_train <- y[train_inds]
41 | bart_model <- bart(X_train = X_train, y_train = y_train, 
42 |                    num_gfr = 10, num_burnin = 0, num_mcmc = 10)
43 | bart_json <- saveBARTModelToJsonString(bart_model)
44 | bart_model_roundtrip <- createBARTModelFromJsonString(bart_json)
45 | y_hat_mean_roundtrip <- rowMeans(predict(bart_model_roundtrip, X_train)$y_hat)
46 | }
47 | 


--------------------------------------------------------------------------------
/man/createBCFModelFromCombinedJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bcf.R
 3 | \name{createBCFModelFromCombinedJson}
 4 | \alias{createBCFModelFromCombinedJson}
 5 | \title{Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object
 6 | which can be used for prediction, etc...}
 7 | \usage{
 8 | createBCFModelFromCombinedJson(json_object_list)
 9 | }
10 | \arguments{
11 | \item{json_object_list}{List of objects of type \code{CppJson} containing Json representation of a BCF model}
12 | }
13 | \value{
14 | Object of type \code{bcfmodel}
15 | }
16 | \description{
17 | Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object
18 | which can be used for prediction, etc...
19 | }
20 | \examples{
21 | n <- 500
22 | p <- 5
23 | X <- matrix(runif(n*p), ncol = p)
24 | mu_x <- (
25 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
26 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
27 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
28 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
29 | )
30 | pi_x <- (
31 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 
32 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 
33 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 
34 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8)
35 | )
36 | tau_x <- (
37 |     ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 
38 |     ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 
39 |     ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 
40 |     ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0)
41 | )
42 | Z <- rbinom(n, 1, pi_x)
43 | E_XZ <- mu_x + Z*tau_x
44 | snr <- 3
45 | rfx_group_ids <- rep(c(1,2), n \%/\% 2)
46 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE)
47 | rfx_basis <- cbind(1, runif(n, -1, 1))
48 | rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis)
49 | y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr)
50 | test_set_pct <- 0.2
51 | n_test <- round(test_set_pct*n)
52 | n_train <- n - n_test
53 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
54 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
55 | X_test <- X[test_inds,]
56 | X_train <- X[train_inds,]
57 | pi_test <- pi_x[test_inds]
58 | pi_train <- pi_x[train_inds]
59 | Z_test <- Z[test_inds]
60 | Z_train <- Z[train_inds]
61 | y_test <- y[test_inds]
62 | y_train <- y[train_inds]
63 | mu_test <- mu_x[test_inds]
64 | mu_train <- mu_x[train_inds]
65 | tau_test <- tau_x[test_inds]
66 | tau_train <- tau_x[train_inds]
67 | rfx_group_ids_test <- rfx_group_ids[test_inds]
68 | rfx_group_ids_train <- rfx_group_ids[train_inds]
69 | rfx_basis_test <- rfx_basis[test_inds,]
70 | rfx_basis_train <- rfx_basis[train_inds,]
71 | rfx_term_test <- rfx_term[test_inds]
72 | rfx_term_train <- rfx_term[train_inds]
73 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 
74 |                  propensity_train = pi_train, 
75 |                  rfx_group_ids_train = rfx_group_ids_train, 
76 |                  rfx_basis_train = rfx_basis_train, X_test = X_test, 
77 |                  Z_test = Z_test, propensity_test = pi_test, 
78 |                  rfx_group_ids_test = rfx_group_ids_test,
79 |                  rfx_basis_test = rfx_basis_test, 
80 |                  num_gfr = 10, num_burnin = 0, num_mcmc = 10)
81 | bcf_json_list <- list(saveBCFModelToJson(bcf_model))
82 | bcf_model_roundtrip <- createBCFModelFromCombinedJson(bcf_json_list)
83 | }
84 | 


--------------------------------------------------------------------------------
/man/createBCFModelFromCombinedJsonString.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bcf.R
 3 | \name{createBCFModelFromCombinedJsonString}
 4 | \alias{createBCFModelFromCombinedJsonString}
 5 | \title{Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object
 6 | which can be used for prediction, etc...}
 7 | \usage{
 8 | createBCFModelFromCombinedJsonString(json_string_list)
 9 | }
10 | \arguments{
11 | \item{json_string_list}{List of JSON strings which can be parsed to objects of type \code{CppJson} containing Json representation of a BCF model}
12 | }
13 | \value{
14 | Object of type \code{bcfmodel}
15 | }
16 | \description{
17 | Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object
18 | which can be used for prediction, etc...
19 | }
20 | \examples{
21 | n <- 500
22 | p <- 5
23 | X <- matrix(runif(n*p), ncol = p)
24 | mu_x <- (
25 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
26 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
27 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
28 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
29 | )
30 | pi_x <- (
31 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 
32 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 
33 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 
34 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8)
35 | )
36 | tau_x <- (
37 |     ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 
38 |     ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 
39 |     ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 
40 |     ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0)
41 | )
42 | Z <- rbinom(n, 1, pi_x)
43 | E_XZ <- mu_x + Z*tau_x
44 | snr <- 3
45 | rfx_group_ids <- rep(c(1,2), n \%/\% 2)
46 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE)
47 | rfx_basis <- cbind(1, runif(n, -1, 1))
48 | rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis)
49 | y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr)
50 | test_set_pct <- 0.2
51 | n_test <- round(test_set_pct*n)
52 | n_train <- n - n_test
53 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
54 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
55 | X_test <- X[test_inds,]
56 | X_train <- X[train_inds,]
57 | pi_test <- pi_x[test_inds]
58 | pi_train <- pi_x[train_inds]
59 | Z_test <- Z[test_inds]
60 | Z_train <- Z[train_inds]
61 | y_test <- y[test_inds]
62 | y_train <- y[train_inds]
63 | mu_test <- mu_x[test_inds]
64 | mu_train <- mu_x[train_inds]
65 | tau_test <- tau_x[test_inds]
66 | tau_train <- tau_x[train_inds]
67 | rfx_group_ids_test <- rfx_group_ids[test_inds]
68 | rfx_group_ids_train <- rfx_group_ids[train_inds]
69 | rfx_basis_test <- rfx_basis[test_inds,]
70 | rfx_basis_train <- rfx_basis[train_inds,]
71 | rfx_term_test <- rfx_term[test_inds]
72 | rfx_term_train <- rfx_term[train_inds]
73 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 
74 |                  propensity_train = pi_train, 
75 |                  rfx_group_ids_train = rfx_group_ids_train, 
76 |                  rfx_basis_train = rfx_basis_train, X_test = X_test, 
77 |                  Z_test = Z_test, propensity_test = pi_test, 
78 |                  rfx_group_ids_test = rfx_group_ids_test,
79 |                  rfx_basis_test = rfx_basis_test, 
80 |                  num_gfr = 10, num_burnin = 0, num_mcmc = 10)
81 | bcf_json_string_list <- list(saveBCFModelToJsonString(bcf_model))
82 | bcf_model_roundtrip <- createBCFModelFromCombinedJsonString(bcf_json_string_list)
83 | }
84 | 


--------------------------------------------------------------------------------
/man/createBCFModelFromJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bcf.R
 3 | \name{createBCFModelFromJson}
 4 | \alias{createBCFModelFromJson}
 5 | \title{Convert an (in-memory) JSON representation of a BCF model to a BCF model object
 6 | which can be used for prediction, etc...}
 7 | \usage{
 8 | createBCFModelFromJson(json_object)
 9 | }
10 | \arguments{
11 | \item{json_object}{Object of type \code{CppJson} containing Json representation of a BCF model}
12 | }
13 | \value{
14 | Object of type \code{bcfmodel}
15 | }
16 | \description{
17 | Convert an (in-memory) JSON representation of a BCF model to a BCF model object
18 | which can be used for prediction, etc...
19 | }
20 | \examples{
21 | n <- 500
22 | p <- 5
23 | X <- matrix(runif(n*p), ncol = p)
24 | mu_x <- (
25 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
26 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
27 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
28 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
29 | )
30 | pi_x <- (
31 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 
32 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 
33 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 
34 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8)
35 | )
36 | tau_x <- (
37 |     ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 
38 |     ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 
39 |     ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 
40 |     ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0)
41 | )
42 | Z <- rbinom(n, 1, pi_x)
43 | E_XZ <- mu_x + Z*tau_x
44 | snr <- 3
45 | rfx_group_ids <- rep(c(1,2), n \%/\% 2)
46 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE)
47 | rfx_basis <- cbind(1, runif(n, -1, 1))
48 | rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis)
49 | y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr)
50 | test_set_pct <- 0.2
51 | n_test <- round(test_set_pct*n)
52 | n_train <- n - n_test
53 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
54 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
55 | X_test <- X[test_inds,]
56 | X_train <- X[train_inds,]
57 | pi_test <- pi_x[test_inds]
58 | pi_train <- pi_x[train_inds]
59 | Z_test <- Z[test_inds]
60 | Z_train <- Z[train_inds]
61 | y_test <- y[test_inds]
62 | y_train <- y[train_inds]
63 | mu_test <- mu_x[test_inds]
64 | mu_train <- mu_x[train_inds]
65 | tau_test <- tau_x[test_inds]
66 | tau_train <- tau_x[train_inds]
67 | rfx_group_ids_test <- rfx_group_ids[test_inds]
68 | rfx_group_ids_train <- rfx_group_ids[train_inds]
69 | rfx_basis_test <- rfx_basis[test_inds,]
70 | rfx_basis_train <- rfx_basis[train_inds,]
71 | rfx_term_test <- rfx_term[test_inds]
72 | rfx_term_train <- rfx_term[train_inds]
73 | mu_params <- list(sample_sigma2_leaf = TRUE)
74 | tau_params <- list(sample_sigma2_leaf = FALSE)
75 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 
76 |                  propensity_train = pi_train, 
77 |                  rfx_group_ids_train = rfx_group_ids_train, 
78 |                  rfx_basis_train = rfx_basis_train, X_test = X_test, 
79 |                  Z_test = Z_test, propensity_test = pi_test, 
80 |                  rfx_group_ids_test = rfx_group_ids_test,
81 |                  rfx_basis_test = rfx_basis_test, 
82 |                  num_gfr = 10, num_burnin = 0, num_mcmc = 10, 
83 |                  prognostic_forest_params = mu_params, 
84 |                  treatment_effect_forest_params = tau_params)
85 | bcf_json <- saveBCFModelToJson(bcf_model)
86 | bcf_model_roundtrip <- createBCFModelFromJson(bcf_json)
87 | }
88 | 


--------------------------------------------------------------------------------
/man/createBCFModelFromJsonString.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bcf.R
 3 | \name{createBCFModelFromJsonString}
 4 | \alias{createBCFModelFromJsonString}
 5 | \title{Convert a JSON string containing sample information on a trained BCF model
 6 | to a BCF model object which can be used for prediction, etc...}
 7 | \usage{
 8 | createBCFModelFromJsonString(json_string)
 9 | }
10 | \arguments{
11 | \item{json_string}{JSON string dump}
12 | }
13 | \value{
14 | Object of type \code{bcfmodel}
15 | }
16 | \description{
17 | Convert a JSON string containing sample information on a trained BCF model
18 | to a BCF model object which can be used for prediction, etc...
19 | }
20 | \examples{
21 | n <- 500
22 | p <- 5
23 | X <- matrix(runif(n*p), ncol = p)
24 | mu_x <- (
25 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
26 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
27 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
28 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
29 | )
30 | pi_x <- (
31 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 
32 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 
33 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 
34 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8)
35 | )
36 | tau_x <- (
37 |     ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 
38 |     ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 
39 |     ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 
40 |     ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0)
41 | )
42 | Z <- rbinom(n, 1, pi_x)
43 | E_XZ <- mu_x + Z*tau_x
44 | snr <- 3
45 | rfx_group_ids <- rep(c(1,2), n \%/\% 2)
46 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE)
47 | rfx_basis <- cbind(1, runif(n, -1, 1))
48 | rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis)
49 | y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr)
50 | test_set_pct <- 0.2
51 | n_test <- round(test_set_pct*n)
52 | n_train <- n - n_test
53 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
54 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
55 | X_test <- X[test_inds,]
56 | X_train <- X[train_inds,]
57 | pi_test <- pi_x[test_inds]
58 | pi_train <- pi_x[train_inds]
59 | Z_test <- Z[test_inds]
60 | Z_train <- Z[train_inds]
61 | y_test <- y[test_inds]
62 | y_train <- y[train_inds]
63 | mu_test <- mu_x[test_inds]
64 | mu_train <- mu_x[train_inds]
65 | tau_test <- tau_x[test_inds]
66 | tau_train <- tau_x[train_inds]
67 | rfx_group_ids_test <- rfx_group_ids[test_inds]
68 | rfx_group_ids_train <- rfx_group_ids[train_inds]
69 | rfx_basis_test <- rfx_basis[test_inds,]
70 | rfx_basis_train <- rfx_basis[train_inds,]
71 | rfx_term_test <- rfx_term[test_inds]
72 | rfx_term_train <- rfx_term[train_inds]
73 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 
74 |                  propensity_train = pi_train, 
75 |                  rfx_group_ids_train = rfx_group_ids_train, 
76 |                  rfx_basis_train = rfx_basis_train, X_test = X_test, 
77 |                  Z_test = Z_test, propensity_test = pi_test, 
78 |                  rfx_group_ids_test = rfx_group_ids_test,
79 |                  rfx_basis_test = rfx_basis_test, 
80 |                  num_gfr = 10, num_burnin = 0, num_mcmc = 10)
81 | bcf_json <- saveBCFModelToJsonString(bcf_model)
82 | bcf_model_roundtrip <- createBCFModelFromJsonString(bcf_json)
83 | }
84 | 


--------------------------------------------------------------------------------
/man/createCppJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/serialization.R
 3 | \name{createCppJson}
 4 | \alias{createCppJson}
 5 | \title{Create a new (empty) C++ Json object}
 6 | \usage{
 7 | createCppJson()
 8 | }
 9 | \value{
10 | \code{CppJson} object
11 | }
12 | \description{
13 | Create a new (empty) C++ Json object
14 | }
15 | \examples{
16 | example_vec <- runif(10)
17 | example_json <- createCppJson()
18 | example_json$add_vector("myvec", example_vec)
19 | }
20 | 


--------------------------------------------------------------------------------
/man/createCppJsonFile.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/serialization.R
 3 | \name{createCppJsonFile}
 4 | \alias{createCppJsonFile}
 5 | \title{Create a C++ Json object from a Json file}
 6 | \usage{
 7 | createCppJsonFile(json_filename)
 8 | }
 9 | \arguments{
10 | \item{json_filename}{Name of file to read. Must end in \code{.json}.}
11 | }
12 | \value{
13 | \code{CppJson} object
14 | }
15 | \description{
16 | Create a C++ Json object from a Json file
17 | }
18 | \examples{
19 | example_vec <- runif(10)
20 | example_json <- createCppJson()
21 | example_json$add_vector("myvec", example_vec)
22 | tmpjson <- tempfile(fileext = ".json")
23 | example_json$save_file(file.path(tmpjson))
24 | example_json_roundtrip <- createCppJsonFile(file.path(tmpjson))
25 | unlink(tmpjson)
26 | }
27 | 


--------------------------------------------------------------------------------
/man/createCppJsonString.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/serialization.R
 3 | \name{createCppJsonString}
 4 | \alias{createCppJsonString}
 5 | \title{Create a C++ Json object from a Json string}
 6 | \usage{
 7 | createCppJsonString(json_string)
 8 | }
 9 | \arguments{
10 | \item{json_string}{JSON string dump}
11 | }
12 | \value{
13 | \code{CppJson} object
14 | }
15 | \description{
16 | Create a C++ Json object from a Json string
17 | }
18 | \examples{
19 | example_vec <- runif(10)
20 | example_json <- createCppJson()
21 | example_json$add_vector("myvec", example_vec)
22 | example_json_string <- example_json$return_json_string()
23 | example_json_roundtrip <- createCppJsonString(example_json_string)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/createCppRNG.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model.R
 3 | \name{createCppRNG}
 4 | \alias{createCppRNG}
 5 | \title{Create an R class that wraps a C++ random number generator}
 6 | \usage{
 7 | createCppRNG(random_seed = -1)
 8 | }
 9 | \arguments{
10 | \item{random_seed}{(Optional) random seed for sampling}
11 | }
12 | \value{
13 | \code{CppRng} object
14 | }
15 | \description{
16 | Create an R class that wraps a C++ random number generator
17 | }
18 | \examples{
19 | rng <- createCppRNG(1234)
20 | rng <- createCppRNG()
21 | }
22 | 


--------------------------------------------------------------------------------
/man/createForest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forest.R
 3 | \name{createForest}
 4 | \alias{createForest}
 5 | \title{Create a forest}
 6 | \usage{
 7 | createForest(
 8 |   num_trees,
 9 |   leaf_dimension = 1,
10 |   is_leaf_constant = FALSE,
11 |   is_exponentiated = FALSE
12 | )
13 | }
14 | \arguments{
15 | \item{num_trees}{Number of trees in the forest}
16 | 
17 | \item{leaf_dimension}{Dimensionality of the outcome model}
18 | 
19 | \item{is_leaf_constant}{Whether leaf is constant}
20 | 
21 | \item{is_exponentiated}{Whether forest predictions should be exponentiated before being returned}
22 | }
23 | \value{
24 | \code{Forest} object
25 | }
26 | \description{
27 | Create a forest
28 | }
29 | \examples{
30 | num_trees <- 100
31 | leaf_dimension <- 2
32 | is_leaf_constant <- FALSE
33 | is_exponentiated <- FALSE
34 | forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated)
35 | }
36 | 


--------------------------------------------------------------------------------
/man/createForestDataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \name{createForestDataset}
 4 | \alias{createForestDataset}
 5 | \title{Create a forest dataset object}
 6 | \usage{
 7 | createForestDataset(covariates, basis = NULL, variance_weights = NULL)
 8 | }
 9 | \arguments{
10 | \item{covariates}{Matrix of covariates}
11 | 
12 | \item{basis}{(Optional) Matrix of bases used to define a leaf regression}
13 | 
14 | \item{variance_weights}{(Optional) Vector of observation-specific variance weights}
15 | }
16 | \value{
17 | \code{ForestDataset} object
18 | }
19 | \description{
20 | Create a forest dataset object
21 | }
22 | \examples{
23 | covariate_matrix <- matrix(runif(10*100), ncol = 10)
24 | basis_matrix <- matrix(rnorm(3*100), ncol = 3)
25 | weight_vector <- rnorm(100)
26 | forest_dataset <- createForestDataset(covariate_matrix)
27 | forest_dataset <- createForestDataset(covariate_matrix, basis_matrix)
28 | forest_dataset <- createForestDataset(covariate_matrix, basis_matrix, weight_vector)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/createForestModel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model.R
 3 | \name{createForestModel}
 4 | \alias{createForestModel}
 5 | \title{Create a forest model object}
 6 | \usage{
 7 | createForestModel(forest_dataset, forest_model_config, global_model_config)
 8 | }
 9 | \arguments{
10 | \item{forest_dataset}{ForestDataset object, used to initialize forest sampling data structures}
11 | 
12 | \item{forest_model_config}{ForestModelConfig object containing forest model parameters and settings}
13 | 
14 | \item{global_model_config}{GlobalModelConfig object containing global model parameters and settings}
15 | }
16 | \value{
17 | \code{ForestModel} object
18 | }
19 | \description{
20 | Create a forest model object
21 | }
22 | \examples{
23 | num_trees <- 100
24 | n <- 100
25 | p <- 10
26 | alpha <- 0.95
27 | beta <- 2.0
28 | min_samples_leaf <- 2
29 | max_depth <- 10
30 | feature_types <- as.integer(rep(0, p))
31 | X <- matrix(runif(n*p), ncol = p)
32 | forest_dataset <- createForestDataset(X)
33 | forest_model_config <- createForestModelConfig(feature_types=feature_types, 
34 |                                                num_trees=num_trees, num_features=p, 
35 |                                                num_observations=n, alpha=alpha, beta=beta, 
36 |                                                min_samples_leaf=min_samples_leaf, 
37 |                                                max_depth=max_depth, leaf_model_type=1)
38 | global_model_config <- createGlobalModelConfig(global_error_variance=1.0)
39 | forest_model <- createForestModel(forest_dataset, forest_model_config, global_model_config)
40 | }
41 | 


--------------------------------------------------------------------------------
/man/createForestModelConfig.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/config.R
 3 | \name{createForestModelConfig}
 4 | \alias{createForestModelConfig}
 5 | \title{Create a forest model config object}
 6 | \usage{
 7 | createForestModelConfig(
 8 |   feature_types = NULL,
 9 |   sweep_update_indices = NULL,
10 |   num_trees = NULL,
11 |   num_features = NULL,
12 |   num_observations = NULL,
13 |   variable_weights = NULL,
14 |   leaf_dimension = 1,
15 |   alpha = 0.95,
16 |   beta = 2,
17 |   min_samples_leaf = 5,
18 |   max_depth = -1,
19 |   leaf_model_type = 1,
20 |   leaf_model_scale = NULL,
21 |   variance_forest_shape = 1,
22 |   variance_forest_scale = 1,
23 |   cutpoint_grid_size = 100
24 | )
25 | }
26 | \arguments{
27 | \item{feature_types}{Vector of integer-coded feature types (integers where 0 = numeric, 1 = ordered categorical, 2 = unordered categorical)}
28 | 
29 | \item{sweep_update_indices}{Vector of (0-indexed) indices of trees to update in a sweep}
30 | 
31 | \item{num_trees}{Number of trees in the forest being sampled}
32 | 
33 | \item{num_features}{Number of features in training dataset}
34 | 
35 | \item{num_observations}{Number of observations in training dataset}
36 | 
37 | \item{variable_weights}{Vector specifying sampling probability for all p covariates in ForestDataset}
38 | 
39 | \item{leaf_dimension}{Dimension of the leaf model (default: \code{1})}
40 | 
41 | \item{alpha}{Root node split probability in tree prior (default: \code{0.95})}
42 | 
43 | \item{beta}{Depth prior penalty in tree prior (default: \code{2.0})}
44 | 
45 | \item{min_samples_leaf}{Minimum number of samples in a tree leaf (default: \code{5})}
46 | 
47 | \item{max_depth}{Maximum depth of any tree in the ensemble in the model. Setting to \code{-1} does not enforce any depth limits on trees. Default: \code{-1}.}
48 | 
49 | \item{leaf_model_type}{Integer specifying the leaf model type (0 = constant leaf, 1 = univariate leaf regression, 2 = multivariate leaf regression). Default: \code{0}.}
50 | 
51 | \item{leaf_model_scale}{Scale parameter used in Gaussian leaf models (can either be a scalar or a q x q matrix, where q is the dimensionality of the basis and is only >1 when \code{leaf_model_int = 2}). Calibrated internally as \code{1/num_trees}, propagated along diagonal if needed for multivariate leaf models.}
52 | 
53 | \item{variance_forest_shape}{Shape parameter for IG leaf models (applicable when \code{leaf_model_type = 3}). Default: \code{1}.}
54 | 
55 | \item{variance_forest_scale}{Scale parameter for IG leaf models (applicable when \code{leaf_model_type = 3}). Default: \code{1}.}
56 | 
57 | \item{cutpoint_grid_size}{Number of unique cutpoints to consider (default: \code{100})}
58 | }
59 | \value{
60 | ForestModelConfig object
61 | }
62 | \description{
63 | Create a forest model config object
64 | }
65 | \examples{
66 | config <- createForestModelConfig(num_trees = 10, num_features = 5, num_observations = 100)
67 | }
68 | 


--------------------------------------------------------------------------------
/man/createForestSamples.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forest.R
 3 | \name{createForestSamples}
 4 | \alias{createForestSamples}
 5 | \title{Create a container of forest samples}
 6 | \usage{
 7 | createForestSamples(
 8 |   num_trees,
 9 |   leaf_dimension = 1,
10 |   is_leaf_constant = FALSE,
11 |   is_exponentiated = FALSE
12 | )
13 | }
14 | \arguments{
15 | \item{num_trees}{Number of trees}
16 | 
17 | \item{leaf_dimension}{Dimensionality of the outcome model}
18 | 
19 | \item{is_leaf_constant}{Whether leaf is constant}
20 | 
21 | \item{is_exponentiated}{Whether forest predictions should be exponentiated before being returned}
22 | }
23 | \value{
24 | \code{ForestSamples} object
25 | }
26 | \description{
27 | Create a container of forest samples
28 | }
29 | \examples{
30 | num_trees <- 100
31 | leaf_dimension <- 2
32 | is_leaf_constant <- FALSE
33 | is_exponentiated <- FALSE
34 | forest_samples <- createForestSamples(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated)
35 | }
36 | 


--------------------------------------------------------------------------------
/man/createGlobalModelConfig.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/config.R
 3 | \name{createGlobalModelConfig}
 4 | \alias{createGlobalModelConfig}
 5 | \title{Create a global model config object}
 6 | \usage{
 7 | createGlobalModelConfig(global_error_variance = 1)
 8 | }
 9 | \arguments{
10 | \item{global_error_variance}{Global error variance parameter (default: \code{1.0})}
11 | }
12 | \value{
13 | GlobalModelConfig object
14 | }
15 | \description{
16 | Create a global model config object
17 | }
18 | \examples{
19 | config <- createGlobalModelConfig(global_error_variance = 100)
20 | }
21 | 


--------------------------------------------------------------------------------
/man/createOutcome.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \name{createOutcome}
 4 | \alias{createOutcome}
 5 | \title{Create an outcome object}
 6 | \usage{
 7 | createOutcome(outcome)
 8 | }
 9 | \arguments{
10 | \item{outcome}{Vector of outcome values}
11 | }
12 | \value{
13 | \code{Outcome} object
14 | }
15 | \description{
16 | Create an outcome object
17 | }
18 | \examples{
19 | X <- matrix(runif(10*100), ncol = 10)
20 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100)
21 | outcome <- createOutcome(y)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/createPreprocessorFromJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{createPreprocessorFromJson}
 4 | \alias{createPreprocessorFromJson}
 5 | \title{Reload a covariate preprocessor object from a JSON string containing a serialized preprocessor}
 6 | \usage{
 7 | createPreprocessorFromJson(json_object)
 8 | }
 9 | \arguments{
10 | \item{json_object}{in-memory wrapper around JSON C++ object containing covariate preprocessor metadata}
11 | }
12 | \value{
13 | Preprocessor object that can be used with the \code{preprocessPredictionData} function
14 | }
15 | \description{
16 | Reload a covariate preprocessor object from a JSON string containing a serialized preprocessor
17 | }
18 | \examples{
19 | cov_mat <- matrix(1:12, ncol = 3)
20 | preprocess_list <- preprocessTrainData(cov_mat)
21 | preprocessor_json <- convertPreprocessorToJson(preprocess_list$metadata)
22 | preprocessor_roundtrip <- createPreprocessorFromJson(preprocessor_json)
23 | }
24 | 


--------------------------------------------------------------------------------
/man/createPreprocessorFromJsonString.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{createPreprocessorFromJsonString}
 4 | \alias{createPreprocessorFromJsonString}
 5 | \title{Reload a covariate preprocessor object from a JSON string containing a serialized preprocessor}
 6 | \usage{
 7 | createPreprocessorFromJsonString(json_string)
 8 | }
 9 | \arguments{
10 | \item{json_string}{in-memory JSON string containing covariate preprocessor metadata}
11 | }
12 | \value{
13 | Preprocessor object that can be used with the \code{preprocessPredictionData} function
14 | }
15 | \description{
16 | Reload a covariate preprocessor object from a JSON string containing a serialized preprocessor
17 | }
18 | \examples{
19 | cov_mat <- matrix(1:12, ncol = 3)
20 | preprocess_list <- preprocessTrainData(cov_mat)
21 | preprocessor_json_string <- savePreprocessorToJsonString(preprocess_list$metadata)
22 | preprocessor_roundtrip <- createPreprocessorFromJsonString(preprocessor_json_string)
23 | }
24 | 


--------------------------------------------------------------------------------
/man/createRandomEffectSamples.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/random_effects.R
 3 | \name{createRandomEffectSamples}
 4 | \alias{createRandomEffectSamples}
 5 | \title{Create a \code{RandomEffectSamples} object}
 6 | \usage{
 7 | createRandomEffectSamples(num_components, num_groups, random_effects_tracker)
 8 | }
 9 | \arguments{
10 | \item{num_components}{Number of "components" or bases defining the random effects regression}
11 | 
12 | \item{num_groups}{Number of random effects groups}
13 | 
14 | \item{random_effects_tracker}{Object of type \code{RandomEffectsTracker}}
15 | }
16 | \value{
17 | \code{RandomEffectSamples} object
18 | }
19 | \description{
20 | Create a \code{RandomEffectSamples} object
21 | }
22 | \examples{
23 | n <- 100
24 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE)
25 | rfx_basis <- matrix(rep(1.0, n), ncol=1)
26 | num_groups <- length(unique(rfx_group_ids))
27 | num_components <- ncol(rfx_basis)
28 | rfx_tracker <- createRandomEffectsTracker(rfx_group_ids)
29 | rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker)
30 | }
31 | 


--------------------------------------------------------------------------------
/man/createRandomEffectsDataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \name{createRandomEffectsDataset}
 4 | \alias{createRandomEffectsDataset}
 5 | \title{Create a random effects dataset object}
 6 | \usage{
 7 | createRandomEffectsDataset(group_labels, basis, variance_weights = NULL)
 8 | }
 9 | \arguments{
10 | \item{group_labels}{Vector of group labels}
11 | 
12 | \item{basis}{Matrix of bases used to define the random effects regression (for an intercept-only model, pass an array of ones)}
13 | 
14 | \item{variance_weights}{(Optional) Vector of observation-specific variance weights}
15 | }
16 | \value{
17 | \code{RandomEffectsDataset} object
18 | }
19 | \description{
20 | Create a random effects dataset object
21 | }
22 | \examples{
23 | rfx_group_ids <- sample(1:2, size = 100, replace = TRUE)
24 | rfx_basis <- matrix(rnorm(3*100), ncol = 3)
25 | weight_vector <- rnorm(100)
26 | rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis)
27 | rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis, weight_vector)
28 | }
29 | 


--------------------------------------------------------------------------------
/man/createRandomEffectsModel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/random_effects.R
 3 | \name{createRandomEffectsModel}
 4 | \alias{createRandomEffectsModel}
 5 | \title{Create a \code{RandomEffectsModel} object}
 6 | \usage{
 7 | createRandomEffectsModel(num_components, num_groups)
 8 | }
 9 | \arguments{
10 | \item{num_components}{Number of "components" or bases defining the random effects regression}
11 | 
12 | \item{num_groups}{Number of random effects groups}
13 | }
14 | \value{
15 | \code{RandomEffectsModel} object
16 | }
17 | \description{
18 | Create a \code{RandomEffectsModel} object
19 | }
20 | \examples{
21 | n <- 100
22 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE)
23 | rfx_basis <- matrix(rep(1.0, n), ncol=1)
24 | num_groups <- length(unique(rfx_group_ids))
25 | num_components <- ncol(rfx_basis)
26 | rfx_model <- createRandomEffectsModel(num_components, num_groups)
27 | }
28 | 


--------------------------------------------------------------------------------
/man/createRandomEffectsTracker.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/random_effects.R
 3 | \name{createRandomEffectsTracker}
 4 | \alias{createRandomEffectsTracker}
 5 | \title{Create a \code{RandomEffectsTracker} object}
 6 | \usage{
 7 | createRandomEffectsTracker(rfx_group_indices)
 8 | }
 9 | \arguments{
10 | \item{rfx_group_indices}{Integer indices indicating groups used to define random effects}
11 | }
12 | \value{
13 | \code{RandomEffectsTracker} object
14 | }
15 | \description{
16 | Create a \code{RandomEffectsTracker} object
17 | }
18 | \examples{
19 | n <- 100
20 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE)
21 | rfx_basis <- matrix(rep(1.0, n), ncol=1)
22 | num_groups <- length(unique(rfx_group_ids))
23 | num_components <- ncol(rfx_basis)
24 | rfx_tracker <- createRandomEffectsTracker(rfx_group_ids)
25 | }
26 | 


--------------------------------------------------------------------------------
/man/getRandomEffectSamples.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generics.R
 3 | \name{getRandomEffectSamples}
 4 | \alias{getRandomEffectSamples}
 5 | \title{Generic function for extracting random effect samples from a model object (BCF, BART, etc...)}
 6 | \usage{
 7 | getRandomEffectSamples(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{Fitted model object from which to extract random effects}
11 | 
12 | \item{...}{Other parameters to be used in random effects extraction}
13 | }
14 | \value{
15 | List of random effect samples
16 | }
17 | \description{
18 | Generic function for extracting random effect samples from a model object (BCF, BART, etc...)
19 | }
20 | \examples{
21 | n <- 100
22 | p <- 10
23 | X <- matrix(runif(n*p), ncol = p)
24 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE)
25 | rfx_basis <- rep(1.0, n)
26 | y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n)
27 | bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids,
28 |                    rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10)
29 | rfx_samples <- getRandomEffectSamples(bart_model)
30 | }
31 | 


--------------------------------------------------------------------------------
/man/getRandomEffectSamples.bartmodel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bart.R
 3 | \name{getRandomEffectSamples.bartmodel}
 4 | \alias{getRandomEffectSamples.bartmodel}
 5 | \title{Extract raw sample values for each of the random effect parameter terms.}
 6 | \usage{
 7 | \method{getRandomEffectSamples}{bartmodel}(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{Object of type \code{bartmodel} containing draws of a BART model and associated sampling outputs.}
11 | 
12 | \item{...}{Other parameters to be used in random effects extraction}
13 | }
14 | \value{
15 | List of arrays. The alpha array has dimension (\code{num_components}, \code{num_samples}) and is simply a vector if \code{num_components = 1}.
16 | The xi and beta arrays have dimension (\code{num_components}, \code{num_groups}, \code{num_samples}) and is simply a matrix if \code{num_components = 1}.
17 | The sigma array has dimension (\code{num_components}, \code{num_samples}) and is simply a vector if \code{num_components = 1}.
18 | }
19 | \description{
20 | Extract raw sample values for each of the random effect parameter terms.
21 | }
22 | \examples{
23 | n <- 100
24 | p <- 5
25 | X <- matrix(runif(n*p), ncol = p)
26 | f_XW <- (
27 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
28 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
29 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
30 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
31 | )
32 | snr <- 3
33 | group_ids <- rep(c(1,2), n \%/\% 2)
34 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE)
35 | rfx_basis <- cbind(1, runif(n, -1, 1))
36 | rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis)
37 | E_y <- f_XW + rfx_term
38 | y <- E_y + rnorm(n, 0, 1)*(sd(E_y)/snr)
39 | test_set_pct <- 0.2
40 | n_test <- round(test_set_pct*n)
41 | n_train <- n - n_test
42 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
43 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
44 | X_test <- X[test_inds,]
45 | X_train <- X[train_inds,]
46 | y_test <- y[test_inds]
47 | y_train <- y[train_inds]
48 | rfx_group_ids_test <- group_ids[test_inds]
49 | rfx_group_ids_train <- group_ids[train_inds]
50 | rfx_basis_test <- rfx_basis[test_inds,]
51 | rfx_basis_train <- rfx_basis[train_inds,]
52 | rfx_term_test <- rfx_term[test_inds]
53 | rfx_term_train <- rfx_term[train_inds]
54 | bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test, 
55 |                    rfx_group_ids_train = rfx_group_ids_train, 
56 |                    rfx_group_ids_test = rfx_group_ids_test, 
57 |                    rfx_basis_train = rfx_basis_train, 
58 |                    rfx_basis_test = rfx_basis_test, 
59 |                    num_gfr = 10, num_burnin = 0, num_mcmc = 10)
60 | rfx_samples <- getRandomEffectSamples(bart_model)
61 | }
62 | 


--------------------------------------------------------------------------------
/man/loadForestContainerCombinedJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/serialization.R
 3 | \name{loadForestContainerCombinedJson}
 4 | \alias{loadForestContainerCombinedJson}
 5 | \title{Combine multiple JSON model objects containing forests (with the same hierarchy / schema) into a single forest_container}
 6 | \usage{
 7 | loadForestContainerCombinedJson(json_object_list, json_forest_label)
 8 | }
 9 | \arguments{
10 | \item{json_object_list}{List of objects of class \code{CppJson}}
11 | 
12 | \item{json_forest_label}{Label referring to a particular forest (i.e. "forest_0") in the overall json hierarchy (must exist in every json object in the list)}
13 | }
14 | \value{
15 | \code{ForestSamples} object
16 | }
17 | \description{
18 | Combine multiple JSON model objects containing forests (with the same hierarchy / schema) into a single forest_container
19 | }
20 | \examples{
21 | X <- matrix(runif(10*100), ncol = 10)
22 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100)
23 | bart_model <- bart(X, y, num_gfr=0, num_mcmc=10)
24 | bart_json <- list(saveBARTModelToJson(bart_model))
25 | mean_forest <- loadForestContainerCombinedJson(bart_json, "forest_0")
26 | }
27 | 


--------------------------------------------------------------------------------
/man/loadForestContainerCombinedJsonString.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/serialization.R
 3 | \name{loadForestContainerCombinedJsonString}
 4 | \alias{loadForestContainerCombinedJsonString}
 5 | \title{Combine multiple JSON strings representing model objects containing forests (with the same hierarchy / schema) into a single forest_container}
 6 | \usage{
 7 | loadForestContainerCombinedJsonString(json_string_list, json_forest_label)
 8 | }
 9 | \arguments{
10 | \item{json_string_list}{List of strings that parse into objects of type \code{CppJson}}
11 | 
12 | \item{json_forest_label}{Label referring to a particular forest (i.e. "forest_0") in the overall json hierarchy (must exist in every json object in the list)}
13 | }
14 | \value{
15 | \code{ForestSamples} object
16 | }
17 | \description{
18 | Combine multiple JSON strings representing model objects containing forests (with the same hierarchy / schema) into a single forest_container
19 | }
20 | \examples{
21 | X <- matrix(runif(10*100), ncol = 10)
22 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100)
23 | bart_model <- bart(X, y, num_gfr=0, num_mcmc=10)
24 | bart_json_string <- list(saveBARTModelToJsonString(bart_model))
25 | mean_forest <- loadForestContainerCombinedJsonString(bart_json_string, "forest_0")
26 | }
27 | 


--------------------------------------------------------------------------------
/man/loadForestContainerJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/serialization.R
 3 | \name{loadForestContainerJson}
 4 | \alias{loadForestContainerJson}
 5 | \title{Load a container of forest samples from json}
 6 | \usage{
 7 | loadForestContainerJson(json_object, json_forest_label)
 8 | }
 9 | \arguments{
10 | \item{json_object}{Object of class \code{CppJson}}
11 | 
12 | \item{json_forest_label}{Label referring to a particular forest (i.e. "forest_0") in the overall json hierarchy}
13 | }
14 | \value{
15 | \code{ForestSamples} object
16 | }
17 | \description{
18 | Load a container of forest samples from json
19 | }
20 | \examples{
21 | X <- matrix(runif(10*100), ncol = 10)
22 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100)
23 | bart_model <- bart(X, y, num_gfr=0, num_mcmc=10)
24 | bart_json <- saveBARTModelToJson(bart_model)
25 | mean_forest <- loadForestContainerJson(bart_json, "forest_0")
26 | }
27 | 


--------------------------------------------------------------------------------
/man/loadRandomEffectSamplesCombinedJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/serialization.R
 3 | \name{loadRandomEffectSamplesCombinedJson}
 4 | \alias{loadRandomEffectSamplesCombinedJson}
 5 | \title{Combine multiple JSON model objects containing random effects (with the same hierarchy / schema) into a single container}
 6 | \usage{
 7 | loadRandomEffectSamplesCombinedJson(json_object_list, json_rfx_num)
 8 | }
 9 | \arguments{
10 | \item{json_object_list}{List of objects of class \code{CppJson}}
11 | 
12 | \item{json_rfx_num}{Integer index indicating the position of the random effects term to be unpacked}
13 | }
14 | \value{
15 | \code{RandomEffectSamples} object
16 | }
17 | \description{
18 | Combine multiple JSON model objects containing random effects (with the same hierarchy / schema) into a single container
19 | }
20 | \examples{
21 | n <- 100
22 | p <- 10
23 | X <- matrix(runif(n*p), ncol = p)
24 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE)
25 | rfx_basis <- rep(1.0, n)
26 | y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n)
27 | bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids,
28 |                    rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10)
29 | bart_json <- list(saveBARTModelToJson(bart_model))
30 | rfx_samples <- loadRandomEffectSamplesCombinedJson(bart_json, 0)
31 | }
32 | 


--------------------------------------------------------------------------------
/man/loadRandomEffectSamplesCombinedJsonString.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/serialization.R
 3 | \name{loadRandomEffectSamplesCombinedJsonString}
 4 | \alias{loadRandomEffectSamplesCombinedJsonString}
 5 | \title{Combine multiple JSON strings representing model objects containing random effects (with the same hierarchy / schema) into a single container}
 6 | \usage{
 7 | loadRandomEffectSamplesCombinedJsonString(json_string_list, json_rfx_num)
 8 | }
 9 | \arguments{
10 | \item{json_string_list}{List of objects of class \code{CppJson}}
11 | 
12 | \item{json_rfx_num}{Integer index indicating the position of the random effects term to be unpacked}
13 | }
14 | \value{
15 | \code{RandomEffectSamples} object
16 | }
17 | \description{
18 | Combine multiple JSON strings representing model objects containing random effects (with the same hierarchy / schema) into a single container
19 | }
20 | \examples{
21 | n <- 100
22 | p <- 10
23 | X <- matrix(runif(n*p), ncol = p)
24 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE)
25 | rfx_basis <- rep(1.0, n)
26 | y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n)
27 | bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids,
28 |                    rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10)
29 | bart_json_string <- list(saveBARTModelToJsonString(bart_model))
30 | rfx_samples <- loadRandomEffectSamplesCombinedJsonString(bart_json_string, 0)
31 | }
32 | 


--------------------------------------------------------------------------------
/man/loadRandomEffectSamplesJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/serialization.R
 3 | \name{loadRandomEffectSamplesJson}
 4 | \alias{loadRandomEffectSamplesJson}
 5 | \title{Load a container of random effect samples from json}
 6 | \usage{
 7 | loadRandomEffectSamplesJson(json_object, json_rfx_num)
 8 | }
 9 | \arguments{
10 | \item{json_object}{Object of class \code{CppJson}}
11 | 
12 | \item{json_rfx_num}{Integer index indicating the position of the random effects term to be unpacked}
13 | }
14 | \value{
15 | \code{RandomEffectSamples} object
16 | }
17 | \description{
18 | Load a container of random effect samples from json
19 | }
20 | \examples{
21 | n <- 100
22 | p <- 10
23 | X <- matrix(runif(n*p), ncol = p)
24 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE)
25 | rfx_basis <- rep(1.0, n)
26 | y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n)
27 | bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids,
28 |                    rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10)
29 | bart_json <- saveBARTModelToJson(bart_model)
30 | rfx_samples <- loadRandomEffectSamplesJson(bart_json, 0)
31 | }
32 | 


--------------------------------------------------------------------------------
/man/loadScalarJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/serialization.R
 3 | \name{loadScalarJson}
 4 | \alias{loadScalarJson}
 5 | \title{Load a scalar from json}
 6 | \usage{
 7 | loadScalarJson(json_object, json_scalar_label, subfolder_name = NULL)
 8 | }
 9 | \arguments{
10 | \item{json_object}{Object of class \code{CppJson}}
11 | 
12 | \item{json_scalar_label}{Label referring to a particular scalar / string value (i.e. "num_samples") in the overall json hierarchy}
13 | 
14 | \item{subfolder_name}{(Optional) Name of the subfolder / hierarchy under which vector sits}
15 | }
16 | \value{
17 | R vector
18 | }
19 | \description{
20 | Load a scalar from json
21 | }
22 | \examples{
23 | example_scalar <- 5.4
24 | example_json <- createCppJson()
25 | example_json$add_scalar("myscalar", example_scalar)
26 | roundtrip_scalar <- loadScalarJson(example_json, "myscalar")
27 | }
28 | 


--------------------------------------------------------------------------------
/man/loadVectorJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/serialization.R
 3 | \name{loadVectorJson}
 4 | \alias{loadVectorJson}
 5 | \title{Load a vector from json}
 6 | \usage{
 7 | loadVectorJson(json_object, json_vector_label, subfolder_name = NULL)
 8 | }
 9 | \arguments{
10 | \item{json_object}{Object of class \code{CppJson}}
11 | 
12 | \item{json_vector_label}{Label referring to a particular vector (i.e. "sigma2_global_samples") in the overall json hierarchy}
13 | 
14 | \item{subfolder_name}{(Optional) Name of the subfolder / hierarchy under which vector sits}
15 | }
16 | \value{
17 | R vector
18 | }
19 | \description{
20 | Load a vector from json
21 | }
22 | \examples{
23 | example_vec <- runif(10)
24 | example_json <- createCppJson()
25 | example_json$add_vector("myvec", example_vec)
26 | roundtrip_vec <- loadVectorJson(example_json, "myvec")
27 | }
28 | 


--------------------------------------------------------------------------------
/man/predict.bartmodel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bart.R
 3 | \name{predict.bartmodel}
 4 | \alias{predict.bartmodel}
 5 | \title{Predict from a sampled BART model on new data}
 6 | \usage{
 7 | \method{predict}{bartmodel}(
 8 |   object,
 9 |   X,
10 |   leaf_basis = NULL,
11 |   rfx_group_ids = NULL,
12 |   rfx_basis = NULL,
13 |   ...
14 | )
15 | }
16 | \arguments{
17 | \item{object}{Object of type \code{bart} containing draws of a regression forest and associated sampling outputs.}
18 | 
19 | \item{X}{Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe.}
20 | 
21 | \item{leaf_basis}{(Optional) Bases used for prediction (by e.g. dot product with leaf values). Default: \code{NULL}.}
22 | 
23 | \item{rfx_group_ids}{(Optional) Test set group labels used for an additive random effects model.
24 | We do not currently support (but plan to in the near future), test set evaluation for group labels
25 | that were not in the training set.}
26 | 
27 | \item{rfx_basis}{(Optional) Test set basis for "random-slope" regression in additive random effects model.}
28 | 
29 | \item{...}{(Optional) Other prediction parameters.}
30 | }
31 | \value{
32 | List of prediction matrices. If model does not have random effects, the list has one element -- the predictions from the forest.
33 | If the model does have random effects, the list has three elements -- forest predictions, random effects predictions, and their sum (\code{y_hat}).
34 | }
35 | \description{
36 | Predict from a sampled BART model on new data
37 | }
38 | \examples{
39 | n <- 100
40 | p <- 5
41 | X <- matrix(runif(n*p), ncol = p)
42 | f_XW <- (
43 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
44 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
45 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
46 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
47 | )
48 | noise_sd <- 1
49 | y <- f_XW + rnorm(n, 0, noise_sd)
50 | test_set_pct <- 0.2
51 | n_test <- round(test_set_pct*n)
52 | n_train <- n - n_test
53 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
54 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
55 | X_test <- X[test_inds,]
56 | X_train <- X[train_inds,]
57 | y_test <- y[test_inds]
58 | y_train <- y[train_inds]
59 | bart_model <- bart(X_train = X_train, y_train = y_train, 
60 |                    num_gfr = 10, num_burnin = 0, num_mcmc = 10)
61 | y_hat_test <- predict(bart_model, X_test)$y_hat
62 | }
63 | 


--------------------------------------------------------------------------------
/man/predict.bcfmodel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bcf.R
 3 | \name{predict.bcfmodel}
 4 | \alias{predict.bcfmodel}
 5 | \title{Predict from a sampled BCF model on new data}
 6 | \usage{
 7 | \method{predict}{bcfmodel}(
 8 |   object,
 9 |   X,
10 |   Z,
11 |   propensity = NULL,
12 |   rfx_group_ids = NULL,
13 |   rfx_basis = NULL,
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{object}{Object of type \code{bcfmodel} containing draws of a Bayesian causal forest model and associated sampling outputs.}
19 | 
20 | \item{X}{Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe.}
21 | 
22 | \item{Z}{Treatments used for prediction.}
23 | 
24 | \item{propensity}{(Optional) Propensities used for prediction.}
25 | 
26 | \item{rfx_group_ids}{(Optional) Test set group labels used for an additive random effects model.
27 | We do not currently support (but plan to in the near future), test set evaluation for group labels
28 | that were not in the training set.}
29 | 
30 | \item{rfx_basis}{(Optional) Test set basis for "random-slope" regression in additive random effects model.}
31 | 
32 | \item{...}{(Optional) Other prediction parameters.}
33 | }
34 | \value{
35 | List of 3-5 \code{nrow(X)} by \code{object$num_samples} matrices: prognostic function estimates, treatment effect estimates, (optionally) random effects predictions, (optionally) variance forest predictions, and outcome predictions.
36 | }
37 | \description{
38 | Predict from a sampled BCF model on new data
39 | }
40 | \examples{
41 | n <- 500
42 | p <- 5
43 | X <- matrix(runif(n*p), ncol = p)
44 | mu_x <- (
45 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
46 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
47 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
48 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
49 | )
50 | pi_x <- (
51 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 
52 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 
53 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 
54 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8)
55 | )
56 | tau_x <- (
57 |     ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 
58 |     ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 
59 |     ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 
60 |     ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0)
61 | )
62 | Z <- rbinom(n, 1, pi_x)
63 | noise_sd <- 1
64 | y <- mu_x + tau_x*Z + rnorm(n, 0, noise_sd)
65 | test_set_pct <- 0.2
66 | n_test <- round(test_set_pct*n)
67 | n_train <- n - n_test
68 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
69 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
70 | X_test <- X[test_inds,]
71 | X_train <- X[train_inds,]
72 | pi_test <- pi_x[test_inds]
73 | pi_train <- pi_x[train_inds]
74 | Z_test <- Z[test_inds]
75 | Z_train <- Z[train_inds]
76 | y_test <- y[test_inds]
77 | y_train <- y[train_inds]
78 | mu_test <- mu_x[test_inds]
79 | mu_train <- mu_x[train_inds]
80 | tau_test <- tau_x[test_inds]
81 | tau_train <- tau_x[train_inds]
82 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 
83 |                  propensity_train = pi_train, num_gfr = 10, 
84 |                  num_burnin = 0, num_mcmc = 10)
85 | preds <- predict(bcf_model, X_test, Z_test, pi_test)
86 | }
87 | 


--------------------------------------------------------------------------------
/man/preprocessPredictionData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{preprocessPredictionData}
 4 | \alias{preprocessPredictionData}
 5 | \title{Preprocess covariates. DataFrames will be preprocessed based on their column
 6 | types. Matrices will be passed through assuming all columns are numeric.}
 7 | \usage{
 8 | preprocessPredictionData(input_data, metadata)
 9 | }
10 | \arguments{
11 | \item{input_data}{Covariates, provided as either a dataframe or a matrix}
12 | 
13 | \item{metadata}{List containing information on variables, including train set
14 | categories for categorical variables}
15 | }
16 | \value{
17 | Preprocessed data with categorical variables appropriately handled
18 | }
19 | \description{
20 | Preprocess covariates. DataFrames will be preprocessed based on their column
21 | types. Matrices will be passed through assuming all columns are numeric.
22 | }
23 | \examples{
24 | cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10)
25 | metadata <- list(num_ordered_cat_vars = 0, num_unordered_cat_vars = 0, 
26 |                  num_numeric_vars = 3, numeric_vars = c("x1", "x2", "x3"))
27 | X_preprocessed <- preprocessPredictionData(cov_df, metadata)
28 | }
29 | 


--------------------------------------------------------------------------------
/man/preprocessTrainData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{preprocessTrainData}
 4 | \alias{preprocessTrainData}
 5 | \title{Preprocess covariates. DataFrames will be preprocessed based on their column
 6 | types. Matrices will be passed through assuming all columns are numeric.}
 7 | \usage{
 8 | preprocessTrainData(input_data)
 9 | }
10 | \arguments{
11 | \item{input_data}{Covariates, provided as either a dataframe or a matrix}
12 | }
13 | \value{
14 | List with preprocessed (unmodified) data and details on the number of each type
15 | of variable, unique categories associated with categorical variables, and the
16 | vector of feature types needed for calls to BART and BCF.
17 | }
18 | \description{
19 | Preprocess covariates. DataFrames will be preprocessed based on their column
20 | types. Matrices will be passed through assuming all columns are numeric.
21 | }
22 | \examples{
23 | cov_mat <- matrix(1:12, ncol = 3)
24 | preprocess_list <- preprocessTrainData(cov_mat)
25 | X <- preprocess_list$X
26 | }
27 | 


--------------------------------------------------------------------------------
/man/resetActiveForest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forest.R
 3 | \name{resetActiveForest}
 4 | \alias{resetActiveForest}
 5 | \title{Reset an active forest, either from a specific forest in a \code{ForestContainer}
 6 | or to an ensemble of single-node (i.e. root) trees}
 7 | \usage{
 8 | resetActiveForest(active_forest, forest_samples = NULL, forest_num = NULL)
 9 | }
10 | \arguments{
11 | \item{active_forest}{Current active forest}
12 | 
13 | \item{forest_samples}{(Optional) Container of forest samples from which to re-initialize active forest. If not provided, active forest will be reset to an ensemble of single-node (i.e. root) trees.}
14 | 
15 | \item{forest_num}{(Optional) Index of forest samples from which to initialize active forest. If not provided, active forest will be reset to an ensemble of single-node (i.e. root) trees.}
16 | }
17 | \value{
18 | None
19 | }
20 | \description{
21 | Reset an active forest, either from a specific forest in a \code{ForestContainer}
22 | or to an ensemble of single-node (i.e. root) trees
23 | }
24 | \examples{
25 | num_trees <- 100
26 | leaf_dimension <- 1
27 | is_leaf_constant <- TRUE
28 | is_exponentiated <- FALSE
29 | active_forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated)
30 | forest_samples <- createForestSamples(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated)
31 | forest_samples$add_forest_with_constant_leaves(0.0)
32 | forest_samples$add_numeric_split_tree(0, 0, 0, 0, 0.5, -1.0, 1.0)
33 | forest_samples$add_numeric_split_tree(0, 1, 0, 1, 0.75, 3.4, 0.75)
34 | active_forest$set_root_leaves(0.1)
35 | resetActiveForest(active_forest, forest_samples, 0)
36 | resetActiveForest(active_forest)
37 | }
38 | 


--------------------------------------------------------------------------------
/man/resetForestModel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forest.R
 3 | \name{resetForestModel}
 4 | \alias{resetForestModel}
 5 | \title{Re-initialize a forest model (tracking data structures) from a specific forest in a \code{ForestContainer}}
 6 | \usage{
 7 | resetForestModel(forest_model, forest, dataset, residual, is_mean_model)
 8 | }
 9 | \arguments{
10 | \item{forest_model}{Forest model with tracking data structures}
11 | 
12 | \item{forest}{Forest from which to re-initialize forest model}
13 | 
14 | \item{dataset}{Training dataset object}
15 | 
16 | \item{residual}{Residual which will also be updated}
17 | 
18 | \item{is_mean_model}{Whether the model being updated is a conditional mean model}
19 | }
20 | \value{
21 | None
22 | }
23 | \description{
24 | Re-initialize a forest model (tracking data structures) from a specific forest in a \code{ForestContainer}
25 | }
26 | \examples{
27 | n <- 100
28 | p <- 10
29 | num_trees <- 100
30 | leaf_dimension <- 1
31 | is_leaf_constant <- TRUE
32 | is_exponentiated <- FALSE
33 | alpha <- 0.95
34 | beta <- 2.0
35 | min_samples_leaf <- 2
36 | max_depth <- 10
37 | feature_types <- as.integer(rep(0, p))
38 | leaf_model <- 0
39 | sigma2 <- 1.0
40 | leaf_scale <- as.matrix(1.0)
41 | variable_weights <- rep(1/p, p)
42 | a_forest <- 1
43 | b_forest <- 1
44 | cutpoint_grid_size <- 100
45 | X <- matrix(runif(n*p), ncol = p)
46 | forest_dataset <- createForestDataset(X)
47 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(n)
48 | outcome <- createOutcome(y)
49 | rng <- createCppRNG(1234)
50 | global_model_config <- createGlobalModelConfig(global_error_variance=sigma2)
51 | forest_model_config <- createForestModelConfig(feature_types=feature_types, 
52 |                                                num_trees=num_trees, num_observations=n, 
53 |                                                num_features=p, alpha=alpha, beta=beta, 
54 |                                                min_samples_leaf=min_samples_leaf, 
55 |                                                max_depth=max_depth, 
56 |                                                variable_weights=variable_weights, 
57 |                                                cutpoint_grid_size=cutpoint_grid_size, 
58 |                                                leaf_model_type=leaf_model, 
59 |                                                leaf_model_scale=leaf_scale)
60 | forest_model <- createForestModel(forest_dataset, forest_model_config, global_model_config)
61 | active_forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated)
62 | forest_samples <- createForestSamples(num_trees, leaf_dimension, 
63 |                                       is_leaf_constant, is_exponentiated)
64 | active_forest$prepare_for_sampler(forest_dataset, outcome, forest_model, 0, 0.)
65 | forest_model$sample_one_iteration(
66 |     forest_dataset, outcome, forest_samples, active_forest, 
67 |     rng, forest_model_config, global_model_config, 
68 |     keep_forest = TRUE, gfr = FALSE
69 | )
70 | resetActiveForest(active_forest, forest_samples, 0)
71 | resetForestModel(forest_model, active_forest, forest_dataset, outcome, TRUE)
72 | }
73 | 


--------------------------------------------------------------------------------
/man/resetRandomEffectsModel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/random_effects.R
 3 | \name{resetRandomEffectsModel}
 4 | \alias{resetRandomEffectsModel}
 5 | \title{Reset a \code{RandomEffectsModel} object based on the parameters indexed by \code{sample_num} in a \code{RandomEffectsSamples} object}
 6 | \usage{
 7 | resetRandomEffectsModel(rfx_model, rfx_samples, sample_num, sigma_alpha_init)
 8 | }
 9 | \arguments{
10 | \item{rfx_model}{Object of type \code{RandomEffectsModel}.}
11 | 
12 | \item{rfx_samples}{Object of type \code{RandomEffectSamples}.}
13 | 
14 | \item{sample_num}{Index of sample stored in \code{rfx_samples} from which to reset the state of a random effects model. Zero-indexed, so resetting based on the first sample would require setting \code{sample_num = 0}.}
15 | 
16 | \item{sigma_alpha_init}{Initial value of the "working parameter" scale parameter.}
17 | }
18 | \value{
19 | None
20 | }
21 | \description{
22 | Reset a \code{RandomEffectsModel} object based on the parameters indexed by \code{sample_num} in a \code{RandomEffectsSamples} object
23 | }
24 | \examples{
25 | n <- 100
26 | p <- 10
27 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE)
28 | rfx_basis <- matrix(rep(1.0, n), ncol=1)
29 | rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis)
30 | y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n)
31 | y_std <- (y-mean(y))/sd(y)
32 | outcome <- createOutcome(y_std)
33 | rng <- createCppRNG(1234)
34 | num_groups <- length(unique(rfx_group_ids))
35 | num_components <- ncol(rfx_basis)
36 | rfx_model <- createRandomEffectsModel(num_components, num_groups)
37 | rfx_tracker <- createRandomEffectsTracker(rfx_group_ids)
38 | rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker)
39 | alpha_init <- rep(1,num_components)
40 | xi_init <- matrix(rep(alpha_init, num_groups),num_components,num_groups)
41 | sigma_alpha_init <- diag(1,num_components,num_components)
42 | sigma_xi_init <- diag(1,num_components,num_components)
43 | sigma_xi_shape <- 1
44 | sigma_xi_scale <- 1
45 | rfx_model$set_working_parameter(alpha_init)
46 | rfx_model$set_group_parameters(xi_init)
47 | rfx_model$set_working_parameter_cov(sigma_alpha_init)
48 | rfx_model$set_group_parameter_cov(sigma_xi_init)
49 | rfx_model$set_variance_prior_shape(sigma_xi_shape)
50 | rfx_model$set_variance_prior_scale(sigma_xi_scale)
51 | for (i in 1:3) {
52 |     rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, 
53 |                                    rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, 
54 |                                    keep_sample=TRUE, global_variance=1.0, rng=rng)
55 | }
56 | resetRandomEffectsModel(rfx_model, rfx_samples, 0, 1.0)
57 | }
58 | 


--------------------------------------------------------------------------------
/man/resetRandomEffectsTracker.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/random_effects.R
 3 | \name{resetRandomEffectsTracker}
 4 | \alias{resetRandomEffectsTracker}
 5 | \title{Reset a \code{RandomEffectsTracker} object based on the parameters indexed by \code{sample_num} in a \code{RandomEffectsSamples} object}
 6 | \usage{
 7 | resetRandomEffectsTracker(
 8 |   rfx_tracker,
 9 |   rfx_model,
10 |   rfx_dataset,
11 |   residual,
12 |   rfx_samples
13 | )
14 | }
15 | \arguments{
16 | \item{rfx_tracker}{Object of type \code{RandomEffectsTracker}.}
17 | 
18 | \item{rfx_model}{Object of type \code{RandomEffectsModel}.}
19 | 
20 | \item{rfx_dataset}{Object of type \code{RandomEffectsDataset}.}
21 | 
22 | \item{residual}{Object of type \code{Outcome}.}
23 | 
24 | \item{rfx_samples}{Object of type \code{RandomEffectSamples}.}
25 | }
26 | \value{
27 | None
28 | }
29 | \description{
30 | Reset a \code{RandomEffectsTracker} object based on the parameters indexed by \code{sample_num} in a \code{RandomEffectsSamples} object
31 | }
32 | \examples{
33 | n <- 100
34 | p <- 10
35 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE)
36 | rfx_basis <- matrix(rep(1.0, n), ncol=1)
37 | rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis)
38 | y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n)
39 | y_std <- (y-mean(y))/sd(y)
40 | outcome <- createOutcome(y_std)
41 | rng <- createCppRNG(1234)
42 | num_groups <- length(unique(rfx_group_ids))
43 | num_components <- ncol(rfx_basis)
44 | rfx_model <- createRandomEffectsModel(num_components, num_groups)
45 | rfx_tracker <- createRandomEffectsTracker(rfx_group_ids)
46 | rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker)
47 | alpha_init <- rep(1,num_components)
48 | xi_init <- matrix(rep(alpha_init, num_groups),num_components,num_groups)
49 | sigma_alpha_init <- diag(1,num_components,num_components)
50 | sigma_xi_init <- diag(1,num_components,num_components)
51 | sigma_xi_shape <- 1
52 | sigma_xi_scale <- 1
53 | rfx_model$set_working_parameter(alpha_init)
54 | rfx_model$set_group_parameters(xi_init)
55 | rfx_model$set_working_parameter_cov(sigma_alpha_init)
56 | rfx_model$set_group_parameter_cov(sigma_xi_init)
57 | rfx_model$set_variance_prior_shape(sigma_xi_shape)
58 | rfx_model$set_variance_prior_scale(sigma_xi_scale)
59 | for (i in 1:3) {
60 |     rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, 
61 |                                    rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, 
62 |                                    keep_sample=TRUE, global_variance=1.0, rng=rng)
63 | }
64 | resetRandomEffectsModel(rfx_model, rfx_samples, 0, 1.0)
65 | resetRandomEffectsTracker(rfx_tracker, rfx_model, rfx_dataset, outcome, rfx_samples)
66 | }
67 | 


--------------------------------------------------------------------------------
/man/rootResetRandomEffectsModel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/random_effects.R
 3 | \name{rootResetRandomEffectsModel}
 4 | \alias{rootResetRandomEffectsModel}
 5 | \title{Reset a \code{RandomEffectsModel} object to its "default" state}
 6 | \usage{
 7 | rootResetRandomEffectsModel(
 8 |   rfx_model,
 9 |   alpha_init,
10 |   xi_init,
11 |   sigma_alpha_init,
12 |   sigma_xi_init,
13 |   sigma_xi_shape,
14 |   sigma_xi_scale
15 | )
16 | }
17 | \arguments{
18 | \item{rfx_model}{Object of type \code{RandomEffectsModel}.}
19 | 
20 | \item{alpha_init}{Initial value of the "working parameter".}
21 | 
22 | \item{xi_init}{Initial value of the "group parameters".}
23 | 
24 | \item{sigma_alpha_init}{Initial value of the "working parameter" scale parameter.}
25 | 
26 | \item{sigma_xi_init}{Initial value of the "group parameters" scale parameter.}
27 | 
28 | \item{sigma_xi_shape}{Shape parameter for the inverse gamma variance model on the group parameters.}
29 | 
30 | \item{sigma_xi_scale}{Scale parameter for the inverse gamma variance model on the group parameters.}
31 | }
32 | \value{
33 | None
34 | }
35 | \description{
36 | Reset a \code{RandomEffectsModel} object to its "default" state
37 | }
38 | \examples{
39 | n <- 100
40 | p <- 10
41 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE)
42 | rfx_basis <- matrix(rep(1.0, n), ncol=1)
43 | rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis)
44 | y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n)
45 | y_std <- (y-mean(y))/sd(y)
46 | outcome <- createOutcome(y_std)
47 | rng <- createCppRNG(1234)
48 | num_groups <- length(unique(rfx_group_ids))
49 | num_components <- ncol(rfx_basis)
50 | rfx_model <- createRandomEffectsModel(num_components, num_groups)
51 | rfx_tracker <- createRandomEffectsTracker(rfx_group_ids)
52 | rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker)
53 | alpha_init <- rep(1,num_components)
54 | xi_init <- matrix(rep(alpha_init, num_groups),num_components,num_groups)
55 | sigma_alpha_init <- diag(1,num_components,num_components)
56 | sigma_xi_init <- diag(1,num_components,num_components)
57 | sigma_xi_shape <- 1
58 | sigma_xi_scale <- 1
59 | rfx_model$set_working_parameter(alpha_init)
60 | rfx_model$set_group_parameters(xi_init)
61 | rfx_model$set_working_parameter_cov(sigma_alpha_init)
62 | rfx_model$set_group_parameter_cov(sigma_xi_init)
63 | rfx_model$set_variance_prior_shape(sigma_xi_shape)
64 | rfx_model$set_variance_prior_scale(sigma_xi_scale)
65 | for (i in 1:3) {
66 |     rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, 
67 |                                    rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, 
68 |                                    keep_sample=TRUE, global_variance=1.0, rng=rng)
69 | }
70 | rootResetRandomEffectsModel(rfx_model, alpha_init, xi_init, sigma_alpha_init,
71 |                             sigma_xi_init, sigma_xi_shape, sigma_xi_scale)
72 | }
73 | 


--------------------------------------------------------------------------------
/man/rootResetRandomEffectsTracker.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/random_effects.R
 3 | \name{rootResetRandomEffectsTracker}
 4 | \alias{rootResetRandomEffectsTracker}
 5 | \title{Reset a \code{RandomEffectsTracker} object to its "default" state}
 6 | \usage{
 7 | rootResetRandomEffectsTracker(rfx_tracker, rfx_model, rfx_dataset, residual)
 8 | }
 9 | \arguments{
10 | \item{rfx_tracker}{Object of type \code{RandomEffectsTracker}.}
11 | 
12 | \item{rfx_model}{Object of type \code{RandomEffectsModel}.}
13 | 
14 | \item{rfx_dataset}{Object of type \code{RandomEffectsDataset}.}
15 | 
16 | \item{residual}{Object of type \code{Outcome}.}
17 | }
18 | \value{
19 | None
20 | }
21 | \description{
22 | Reset a \code{RandomEffectsTracker} object to its "default" state
23 | }
24 | \examples{
25 | n <- 100
26 | p <- 10
27 | rfx_group_ids <- sample(1:2, size = n, replace = TRUE)
28 | rfx_basis <- matrix(rep(1.0, n), ncol=1)
29 | rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis)
30 | y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n)
31 | y_std <- (y-mean(y))/sd(y)
32 | outcome <- createOutcome(y_std)
33 | rng <- createCppRNG(1234)
34 | num_groups <- length(unique(rfx_group_ids))
35 | num_components <- ncol(rfx_basis)
36 | rfx_model <- createRandomEffectsModel(num_components, num_groups)
37 | rfx_tracker <- createRandomEffectsTracker(rfx_group_ids)
38 | rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker)
39 | alpha_init <- rep(1,num_components)
40 | xi_init <- matrix(rep(alpha_init, num_groups),num_components,num_groups)
41 | sigma_alpha_init <- diag(1,num_components,num_components)
42 | sigma_xi_init <- diag(1,num_components,num_components)
43 | sigma_xi_shape <- 1
44 | sigma_xi_scale <- 1
45 | rfx_model$set_working_parameter(alpha_init)
46 | rfx_model$set_group_parameters(xi_init)
47 | rfx_model$set_working_parameter_cov(sigma_alpha_init)
48 | rfx_model$set_group_parameter_cov(sigma_xi_init)
49 | rfx_model$set_variance_prior_shape(sigma_xi_shape)
50 | rfx_model$set_variance_prior_scale(sigma_xi_scale)
51 | for (i in 1:3) {
52 |     rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, 
53 |                                    rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, 
54 |                                    keep_sample=TRUE, global_variance=1.0, rng=rng)
55 | }
56 | rootResetRandomEffectsModel(rfx_model, alpha_init, xi_init, sigma_alpha_init,
57 |                             sigma_xi_init, sigma_xi_shape, sigma_xi_scale)
58 | rootResetRandomEffectsTracker(rfx_tracker, rfx_model, rfx_dataset, outcome)
59 | }
60 | 


--------------------------------------------------------------------------------
/man/sampleGlobalErrorVarianceOneIteration.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/variance.R
 3 | \name{sampleGlobalErrorVarianceOneIteration}
 4 | \alias{sampleGlobalErrorVarianceOneIteration}
 5 | \title{Sample one iteration of the (inverse gamma) global variance model}
 6 | \usage{
 7 | sampleGlobalErrorVarianceOneIteration(residual, dataset, rng, a, b)
 8 | }
 9 | \arguments{
10 | \item{residual}{Outcome class}
11 | 
12 | \item{dataset}{ForestDataset class}
13 | 
14 | \item{rng}{C++ random number generator}
15 | 
16 | \item{a}{Global variance shape parameter}
17 | 
18 | \item{b}{Global variance scale parameter}
19 | }
20 | \value{
21 | None
22 | }
23 | \description{
24 | Sample one iteration of the (inverse gamma) global variance model
25 | }
26 | \examples{
27 | X <- matrix(runif(10*100), ncol = 10)
28 | y <- -5 + 10*(X[,1] > 0.5) + rnorm(100)
29 | y_std <- (y-mean(y))/sd(y)
30 | forest_dataset <- createForestDataset(X)
31 | outcome <- createOutcome(y_std)
32 | rng <- createCppRNG(1234)
33 | a <- 1.0
34 | b <- 1.0
35 | sigma2 <- sampleGlobalErrorVarianceOneIteration(outcome, forest_dataset, rng, a, b)
36 | }
37 | 


--------------------------------------------------------------------------------
/man/sampleLeafVarianceOneIteration.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/variance.R
 3 | \name{sampleLeafVarianceOneIteration}
 4 | \alias{sampleLeafVarianceOneIteration}
 5 | \title{Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!)}
 6 | \usage{
 7 | sampleLeafVarianceOneIteration(forest, rng, a, b)
 8 | }
 9 | \arguments{
10 | \item{forest}{C++ forest}
11 | 
12 | \item{rng}{C++ random number generator}
13 | 
14 | \item{a}{Leaf variance shape parameter}
15 | 
16 | \item{b}{Leaf variance scale parameter}
17 | }
18 | \value{
19 | None
20 | }
21 | \description{
22 | Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!)
23 | }
24 | \examples{
25 | num_trees <- 100
26 | leaf_dimension <- 1
27 | is_leaf_constant <- TRUE
28 | is_exponentiated <- FALSE
29 | active_forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated)
30 | rng <- createCppRNG(1234)
31 | a <- 1.0
32 | b <- 1.0
33 | tau <- sampleLeafVarianceOneIteration(active_forest, rng, a, b)
34 | }
35 | 


--------------------------------------------------------------------------------
/man/saveBARTModelToJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bart.R
 3 | \name{saveBARTModelToJson}
 4 | \alias{saveBARTModelToJson}
 5 | \title{Convert the persistent aspects of a BART model to (in-memory) JSON}
 6 | \usage{
 7 | saveBARTModelToJson(object)
 8 | }
 9 | \arguments{
10 | \item{object}{Object of type \code{bartmodel} containing draws of a BART model and associated sampling outputs.}
11 | }
12 | \value{
13 | Object of type \code{CppJson}
14 | }
15 | \description{
16 | Convert the persistent aspects of a BART model to (in-memory) JSON
17 | }
18 | \examples{
19 | n <- 100
20 | p <- 5
21 | X <- matrix(runif(n*p), ncol = p)
22 | f_XW <- (
23 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
24 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
25 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
26 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
27 | )
28 | noise_sd <- 1
29 | y <- f_XW + rnorm(n, 0, noise_sd)
30 | test_set_pct <- 0.2
31 | n_test <- round(test_set_pct*n)
32 | n_train <- n - n_test
33 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
34 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
35 | X_test <- X[test_inds,]
36 | X_train <- X[train_inds,]
37 | y_test <- y[test_inds]
38 | y_train <- y[train_inds]
39 | bart_model <- bart(X_train = X_train, y_train = y_train, 
40 |                    num_gfr = 10, num_burnin = 0, num_mcmc = 10)
41 | bart_json <- saveBARTModelToJson(bart_model)
42 | }
43 | 


--------------------------------------------------------------------------------
/man/saveBARTModelToJsonFile.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bart.R
 3 | \name{saveBARTModelToJsonFile}
 4 | \alias{saveBARTModelToJsonFile}
 5 | \title{Convert the persistent aspects of a BART model to (in-memory) JSON and save to a file}
 6 | \usage{
 7 | saveBARTModelToJsonFile(object, filename)
 8 | }
 9 | \arguments{
10 | \item{object}{Object of type \code{bartmodel} containing draws of a BART model and associated sampling outputs.}
11 | 
12 | \item{filename}{String of filepath, must end in ".json"}
13 | }
14 | \value{
15 | None
16 | }
17 | \description{
18 | Convert the persistent aspects of a BART model to (in-memory) JSON and save to a file
19 | }
20 | \examples{
21 | n <- 100
22 | p <- 5
23 | X <- matrix(runif(n*p), ncol = p)
24 | f_XW <- (
25 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
26 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
27 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
28 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
29 | )
30 | noise_sd <- 1
31 | y <- f_XW + rnorm(n, 0, noise_sd)
32 | test_set_pct <- 0.2
33 | n_test <- round(test_set_pct*n)
34 | n_train <- n - n_test
35 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
36 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
37 | X_test <- X[test_inds,]
38 | X_train <- X[train_inds,]
39 | y_test <- y[test_inds]
40 | y_train <- y[train_inds]
41 | bart_model <- bart(X_train = X_train, y_train = y_train, 
42 |                    num_gfr = 10, num_burnin = 0, num_mcmc = 10)
43 | tmpjson <- tempfile(fileext = ".json")
44 | saveBARTModelToJsonFile(bart_model, file.path(tmpjson))
45 | unlink(tmpjson)
46 | }
47 | 


--------------------------------------------------------------------------------
/man/saveBARTModelToJsonString.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bart.R
 3 | \name{saveBARTModelToJsonString}
 4 | \alias{saveBARTModelToJsonString}
 5 | \title{Convert the persistent aspects of a BART model to (in-memory) JSON string}
 6 | \usage{
 7 | saveBARTModelToJsonString(object)
 8 | }
 9 | \arguments{
10 | \item{object}{Object of type \code{bartmodel} containing draws of a BART model and associated sampling outputs.}
11 | }
12 | \value{
13 | in-memory JSON string
14 | }
15 | \description{
16 | Convert the persistent aspects of a BART model to (in-memory) JSON string
17 | }
18 | \examples{
19 | n <- 100
20 | p <- 5
21 | X <- matrix(runif(n*p), ncol = p)
22 | f_XW <- (
23 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
24 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
25 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
26 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
27 | )
28 | noise_sd <- 1
29 | y <- f_XW + rnorm(n, 0, noise_sd)
30 | test_set_pct <- 0.2
31 | n_test <- round(test_set_pct*n)
32 | n_train <- n - n_test
33 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
34 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
35 | X_test <- X[test_inds,]
36 | X_train <- X[train_inds,]
37 | y_test <- y[test_inds]
38 | y_train <- y[train_inds]
39 | bart_model <- bart(X_train = X_train, y_train = y_train, 
40 |                    num_gfr = 10, num_burnin = 0, num_mcmc = 10)
41 | bart_json_string <- saveBARTModelToJsonString(bart_model)
42 | }
43 | 


--------------------------------------------------------------------------------
/man/saveBCFModelToJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bcf.R
 3 | \name{saveBCFModelToJson}
 4 | \alias{saveBCFModelToJson}
 5 | \title{Convert the persistent aspects of a BCF model to (in-memory) JSON}
 6 | \usage{
 7 | saveBCFModelToJson(object)
 8 | }
 9 | \arguments{
10 | \item{object}{Object of type \code{bcfmodel} containing draws of a Bayesian causal forest model and associated sampling outputs.}
11 | }
12 | \value{
13 | Object of type \code{CppJson}
14 | }
15 | \description{
16 | Convert the persistent aspects of a BCF model to (in-memory) JSON
17 | }
18 | \examples{
19 | n <- 500
20 | p <- 5
21 | X <- matrix(runif(n*p), ncol = p)
22 | mu_x <- (
23 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
24 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
25 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
26 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
27 | )
28 | pi_x <- (
29 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 
30 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 
31 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 
32 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8)
33 | )
34 | tau_x <- (
35 |     ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 
36 |     ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 
37 |     ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 
38 |     ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0)
39 | )
40 | Z <- rbinom(n, 1, pi_x)
41 | E_XZ <- mu_x + Z*tau_x
42 | snr <- 3
43 | rfx_group_ids <- rep(c(1,2), n \%/\% 2)
44 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE)
45 | rfx_basis <- cbind(1, runif(n, -1, 1))
46 | rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis)
47 | y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr)
48 | test_set_pct <- 0.2
49 | n_test <- round(test_set_pct*n)
50 | n_train <- n - n_test
51 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
52 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
53 | X_test <- X[test_inds,]
54 | X_train <- X[train_inds,]
55 | pi_test <- pi_x[test_inds]
56 | pi_train <- pi_x[train_inds]
57 | Z_test <- Z[test_inds]
58 | Z_train <- Z[train_inds]
59 | y_test <- y[test_inds]
60 | y_train <- y[train_inds]
61 | mu_test <- mu_x[test_inds]
62 | mu_train <- mu_x[train_inds]
63 | tau_test <- tau_x[test_inds]
64 | tau_train <- tau_x[train_inds]
65 | rfx_group_ids_test <- rfx_group_ids[test_inds]
66 | rfx_group_ids_train <- rfx_group_ids[train_inds]
67 | rfx_basis_test <- rfx_basis[test_inds,]
68 | rfx_basis_train <- rfx_basis[train_inds,]
69 | rfx_term_test <- rfx_term[test_inds]
70 | rfx_term_train <- rfx_term[train_inds]
71 | mu_params <- list(sample_sigma2_leaf = TRUE)
72 | tau_params <- list(sample_sigma2_leaf = FALSE)
73 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 
74 |                  propensity_train = pi_train, 
75 |                  rfx_group_ids_train = rfx_group_ids_train, 
76 |                  rfx_basis_train = rfx_basis_train, X_test = X_test, 
77 |                  Z_test = Z_test, propensity_test = pi_test, 
78 |                  rfx_group_ids_test = rfx_group_ids_test,
79 |                  rfx_basis_test = rfx_basis_test, 
80 |                  num_gfr = 10, num_burnin = 0, num_mcmc = 10, 
81 |                  prognostic_forest_params = mu_params, 
82 |                  treatment_effect_forest_params = tau_params)
83 | bcf_json <- saveBCFModelToJson(bcf_model)
84 | }
85 | 


--------------------------------------------------------------------------------
/man/saveBCFModelToJsonFile.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bcf.R
 3 | \name{saveBCFModelToJsonFile}
 4 | \alias{saveBCFModelToJsonFile}
 5 | \title{Convert the persistent aspects of a BCF model to (in-memory) JSON and save to a file}
 6 | \usage{
 7 | saveBCFModelToJsonFile(object, filename)
 8 | }
 9 | \arguments{
10 | \item{object}{Object of type \code{bcfmodel} containing draws of a Bayesian causal forest model and associated sampling outputs.}
11 | 
12 | \item{filename}{String of filepath, must end in ".json"}
13 | }
14 | \value{
15 | in-memory JSON string
16 | }
17 | \description{
18 | Convert the persistent aspects of a BCF model to (in-memory) JSON and save to a file
19 | }
20 | \examples{
21 | n <- 500
22 | p <- 5
23 | X <- matrix(runif(n*p), ncol = p)
24 | mu_x <- (
25 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
26 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
27 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
28 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
29 | )
30 | pi_x <- (
31 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 
32 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 
33 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 
34 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8)
35 | )
36 | tau_x <- (
37 |     ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 
38 |     ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 
39 |     ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 
40 |     ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0)
41 | )
42 | Z <- rbinom(n, 1, pi_x)
43 | E_XZ <- mu_x + Z*tau_x
44 | snr <- 3
45 | rfx_group_ids <- rep(c(1,2), n \%/\% 2)
46 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE)
47 | rfx_basis <- cbind(1, runif(n, -1, 1))
48 | rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis)
49 | y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr)
50 | test_set_pct <- 0.2
51 | n_test <- round(test_set_pct*n)
52 | n_train <- n - n_test
53 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
54 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
55 | X_test <- X[test_inds,]
56 | X_train <- X[train_inds,]
57 | pi_test <- pi_x[test_inds]
58 | pi_train <- pi_x[train_inds]
59 | Z_test <- Z[test_inds]
60 | Z_train <- Z[train_inds]
61 | y_test <- y[test_inds]
62 | y_train <- y[train_inds]
63 | mu_test <- mu_x[test_inds]
64 | mu_train <- mu_x[train_inds]
65 | tau_test <- tau_x[test_inds]
66 | tau_train <- tau_x[train_inds]
67 | rfx_group_ids_test <- rfx_group_ids[test_inds]
68 | rfx_group_ids_train <- rfx_group_ids[train_inds]
69 | rfx_basis_test <- rfx_basis[test_inds,]
70 | rfx_basis_train <- rfx_basis[train_inds,]
71 | rfx_term_test <- rfx_term[test_inds]
72 | rfx_term_train <- rfx_term[train_inds]
73 | mu_params <- list(sample_sigma2_leaf = TRUE)
74 | tau_params <- list(sample_sigma2_leaf = FALSE)
75 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 
76 |                  propensity_train = pi_train, 
77 |                  rfx_group_ids_train = rfx_group_ids_train, 
78 |                  rfx_basis_train = rfx_basis_train, X_test = X_test, 
79 |                  Z_test = Z_test, propensity_test = pi_test, 
80 |                  rfx_group_ids_test = rfx_group_ids_test,
81 |                  rfx_basis_test = rfx_basis_test, 
82 |                  num_gfr = 10, num_burnin = 0, num_mcmc = 10, 
83 |                  prognostic_forest_params = mu_params, 
84 |                  treatment_effect_forest_params = tau_params)
85 | tmpjson <- tempfile(fileext = ".json")
86 | saveBCFModelToJsonFile(bcf_model, file.path(tmpjson))
87 | unlink(tmpjson)
88 | }
89 | 


--------------------------------------------------------------------------------
/man/saveBCFModelToJsonString.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bcf.R
 3 | \name{saveBCFModelToJsonString}
 4 | \alias{saveBCFModelToJsonString}
 5 | \title{Convert the persistent aspects of a BCF model to (in-memory) JSON string}
 6 | \usage{
 7 | saveBCFModelToJsonString(object)
 8 | }
 9 | \arguments{
10 | \item{object}{Object of type \code{bcfmodel} containing draws of a Bayesian causal forest model and associated sampling outputs.}
11 | }
12 | \value{
13 | JSON string
14 | }
15 | \description{
16 | Convert the persistent aspects of a BCF model to (in-memory) JSON string
17 | }
18 | \examples{
19 | n <- 500
20 | p <- 5
21 | X <- matrix(runif(n*p), ncol = p)
22 | mu_x <- (
23 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
24 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
25 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
26 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
27 | )
28 | pi_x <- (
29 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (0.2) + 
30 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (0.4) + 
31 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (0.6) + 
32 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (0.8)
33 | )
34 | tau_x <- (
35 |     ((0 <= X[,2]) & (0.25 > X[,2])) * (0.5) + 
36 |     ((0.25 <= X[,2]) & (0.5 > X[,2])) * (1.0) + 
37 |     ((0.5 <= X[,2]) & (0.75 > X[,2])) * (1.5) + 
38 |     ((0.75 <= X[,2]) & (1 > X[,2])) * (2.0)
39 | )
40 | Z <- rbinom(n, 1, pi_x)
41 | E_XZ <- mu_x + Z*tau_x
42 | snr <- 3
43 | rfx_group_ids <- rep(c(1,2), n \%/\% 2)
44 | rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE)
45 | rfx_basis <- cbind(1, runif(n, -1, 1))
46 | rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis)
47 | y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr)
48 | test_set_pct <- 0.2
49 | n_test <- round(test_set_pct*n)
50 | n_train <- n - n_test
51 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
52 | train_inds <- (1:n)[!((1:n) \%in\% test_inds)]
53 | X_test <- X[test_inds,]
54 | X_train <- X[train_inds,]
55 | pi_test <- pi_x[test_inds]
56 | pi_train <- pi_x[train_inds]
57 | Z_test <- Z[test_inds]
58 | Z_train <- Z[train_inds]
59 | y_test <- y[test_inds]
60 | y_train <- y[train_inds]
61 | mu_test <- mu_x[test_inds]
62 | mu_train <- mu_x[train_inds]
63 | tau_test <- tau_x[test_inds]
64 | tau_train <- tau_x[train_inds]
65 | rfx_group_ids_test <- rfx_group_ids[test_inds]
66 | rfx_group_ids_train <- rfx_group_ids[train_inds]
67 | rfx_basis_test <- rfx_basis[test_inds,]
68 | rfx_basis_train <- rfx_basis[train_inds,]
69 | rfx_term_test <- rfx_term[test_inds]
70 | rfx_term_train <- rfx_term[train_inds]
71 | mu_params <- list(sample_sigma2_leaf = TRUE)
72 | tau_params <- list(sample_sigma2_leaf = FALSE)
73 | bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, 
74 |                  propensity_train = pi_train, 
75 |                  rfx_group_ids_train = rfx_group_ids_train, 
76 |                  rfx_basis_train = rfx_basis_train, X_test = X_test, 
77 |                  Z_test = Z_test, propensity_test = pi_test, 
78 |                  rfx_group_ids_test = rfx_group_ids_test,
79 |                  rfx_basis_test = rfx_basis_test, 
80 |                  num_gfr = 10, num_burnin = 0, num_mcmc = 10, 
81 |                  prognostic_forest_params = mu_params, 
82 |                  treatment_effect_forest_params = tau_params)
83 | saveBCFModelToJsonString(bcf_model)
84 | }
85 | 


--------------------------------------------------------------------------------
/man/savePreprocessorToJsonString.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{savePreprocessorToJsonString}
 4 | \alias{savePreprocessorToJsonString}
 5 | \title{Convert the persistent aspects of a covariate preprocessor to (in-memory) JSON string}
 6 | \usage{
 7 | savePreprocessorToJsonString(object)
 8 | }
 9 | \arguments{
10 | \item{object}{List containing information on variables, including train set
11 | categories for categorical variables}
12 | }
13 | \value{
14 | in-memory JSON string
15 | }
16 | \description{
17 | Convert the persistent aspects of a covariate preprocessor to (in-memory) JSON string
18 | }
19 | \examples{
20 | cov_mat <- matrix(1:12, ncol = 3)
21 | preprocess_list <- preprocessTrainData(cov_mat)
22 | preprocessor_json_string <- savePreprocessorToJsonString(preprocess_list$metadata)
23 | }
24 | 


--------------------------------------------------------------------------------
/man/stochtree-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/stochtree-package.R
 3 | \docType{package}
 4 | \name{stochtree-package}
 5 | \alias{stochtree}
 6 | \alias{stochtree-package}
 7 | \title{stochtree: Stochastic Tree Ensembles (XBART and BART) for Supervised Learning and Causal Inference}
 8 | \description{
 9 | Flexible stochastic tree ensemble software. Robust implementations of Bayesian Additive Regression Trees (BART) Chipman, George, McCulloch (2010) \doi{10.1214/09-AOAS285} for supervised learning and Bayesian Causal Forests (BCF) Hahn, Murray, Carvalho (2020) \doi{10.1214/19-BA1195} for causal inference. Enables model serialization and parallel sampling and provides a low-level interface for custom stochastic forest samplers.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://stochtree.ai/}
15 |   \item \url{https://github.com/StochasticTree/stochtree}
16 |   \item Report bugs at \url{https://github.com/StochasticTree/stochtree/issues}
17 | }
18 | 
19 | }
20 | \author{
21 | \strong{Maintainer}: Drew Herren \email{drewherrenopensource@gmail.com} (\href{https://orcid.org/0000-0003-4109-6611}{ORCID})
22 | 
23 | Authors:
24 | \itemize{
25 |   \item Richard Hahn
26 |   \item Jared Murray
27 |   \item Carlos Carvalho
28 |   \item Jingyu He
29 | }
30 | 
31 | Other contributors:
32 | \itemize{
33 |   \item Pedro Lima [contributor]
34 |   \item stochtree contributors [copyright holder]
35 |   \item Eigen contributors (C++ source uses the Eigen library for matrix operations, see inst/COPYRIGHTS) [copyright holder]
36 |   \item xgboost contributors (C++ tree code and related operations include or are inspired by code from the xgboost library, see inst/COPYRIGHTS) [copyright holder]
37 |   \item treelite contributors (C++ tree code and related operations include or are inspired by code from the treelite library, see inst/COPYRIGHTS) [copyright holder]
38 |   \item Microsoft Corporation (C++ I/O and various project structure code include or are inspired by code from the LightGBM library, which is a copyright of Microsoft, see inst/COPYRIGHTS) [copyright holder]
39 |   \item Niels Lohmann (C++ source uses the JSON for Modern C++ library for JSON operations, see inst/COPYRIGHTS) [copyright holder]
40 |   \item Daniel Lemire (C++ source uses the fast_double_parser library internally, see inst/COPYRIGHTS) [copyright holder]
41 |   \item Victor Zverovich (C++ source uses the fmt library internally, see inst/COPYRIGHTS) [copyright holder]
42 | }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = [
 3 |     "setuptools>=42",
 4 |     "wheel",
 5 |     "ninja",
 6 |     "cmake>=3.12",
 7 |     "numpy",
 8 |     "pandas",
 9 |     "scipy",
10 |     "scikit-learn"
11 | ]
12 | build-backend = "setuptools.build_meta"
13 | 
14 | [project]
15 | name = "stochtree"
16 | version = "0.1.0"
17 | dynamic = ["readme", "optional-dependencies", "license"]
18 | description = "Stochastic Tree Ensembles for Machine Learning and Causal Inference"
19 | requires-python = ">=3.8.0"
20 | classifiers = [
21 |     "Development Status :: 3 - Alpha",
22 |     "Intended Audience :: Science/Research",
23 |     "License :: OSI Approved :: MIT License",
24 |     "Operating System :: MacOS", 
25 |     "Operating System :: Microsoft :: Windows", 
26 |     "Operating System :: POSIX :: Linux", 
27 |     "Programming Language :: Python :: 3.8",
28 |     "Programming Language :: Python :: 3.9",
29 |     "Programming Language :: Python :: 3.10",
30 |     "Programming Language :: Python :: 3.11",
31 |     "Programming Language :: Python :: 3.12",
32 |     "Programming Language :: Python :: 3.13",
33 |     "Topic :: Scientific/Engineering :: Artificial Intelligence"
34 | ]
35 | authors = [
36 | 	{name = "Drew Herren", email = "drewherrenopensource@gmail.com"}
37 | ]
38 | 
39 | [project.urls]
40 | Homepage = "https://stochtree.ai/"
41 | Documentation = "https://stochtree.ai/python_docs/index.html"
42 | Repository = "https://github.com/StochasticTree/stochtree"
43 | Issues = "https://github.com/StochasticTree/stochtree/issues"
44 | 


--------------------------------------------------------------------------------
/python_docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/python_docs/README.md:
--------------------------------------------------------------------------------
 1 | # Python Package Documentation
 2 | 
 3 | ## Building Documentation Locally
 4 | 
 5 | The online documentation is built in the doc-specific `StochasticTree/stochtree-python` repo (see [here](https://github.com/StochasticTree/stochtree-python/blob/main/.github/workflows/docs.yml) for the Github workflow).
 6 | To build the documentation locally, first ensure that you have [Sphinx](https://www.sphinx-doc.org/en/master/) installed, then navigate to the python package's main directory (i.e. `cd [path/to/stochtree]`), 
 7 | install the package, and run `sphinx-build` as below
 8 | 
 9 | ```
10 | pip install --upgrade pip
11 | pip install -r python_docs/requirements.txt
12 | pip install .
13 | sphinx-build -M html python_docs/source/ python_docs/build/
14 | ```
15 | 
16 | ## Documentation Style
17 | 
18 | Module (class, function, etc...) documentation follows [the numpy standard](https://numpydoc.readthedocs.io/en/latest/format.html#docstring-standard), 
19 | applied in Sphinx using the [napoleon](https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html) extension.
20 | 
21 | 


--------------------------------------------------------------------------------
/python_docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/python_docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | alabaster==0.7.13
 2 | Babel==2.15.0
 3 | beautifulsoup4==4.12.3
 4 | certifi==2024.2.2
 5 | charset-normalizer==3.3.2
 6 | docutils==0.20.1
 7 | furo==2024.5.6
 8 | idna==3.7
 9 | imagesize==1.4.1
10 | importlib_metadata==7.1.0
11 | Jinja2==3.1.4
12 | joblib==1.4.2
13 | MarkupSafe==2.1.5
14 | numpy==1.24.4
15 | packaging==24.0
16 | pandas==2.0.3
17 | pybind11==2.12.0
18 | Pygments==2.18.0
19 | python-dateutil==2.9.0.post0
20 | pytz==2024.1
21 | requests==2.32.2
22 | scikit-learn==1.3.2
23 | scipy==1.10.1
24 | six==1.16.0
25 | snowballstemmer==2.2.0
26 | soupsieve==2.5
27 | Sphinx==7.1.2
28 | sphinx-basic-ng==1.0.0b2
29 | sphinxcontrib-applehelp==1.0.4
30 | sphinxcontrib-devhelp==1.0.2
31 | sphinxcontrib-htmlhelp==2.0.1
32 | sphinxcontrib-jsmath==1.0.1
33 | sphinxcontrib-qthelp==1.0.3
34 | sphinxcontrib-serializinghtml==1.1.5
35 | threadpoolctl==3.5.0
36 | tzdata==2024.1
37 | urllib3==2.2.1
38 | zipp==3.18.2
39 | 


--------------------------------------------------------------------------------
/python_docs/source/api.rst:
--------------------------------------------------------------------------------
 1 | StochTree API
 2 | =============
 3 | 
 4 | BART
 5 | ----
 6 | 
 7 | .. autoclass:: stochtree.bart.BARTModel
 8 |    :members: sample, predict
 9 | 
10 | BCF
11 | ---
12 | 
13 | .. autoclass:: stochtree.bcf.BCFModel
14 |    :members: sample, predict, predict_tau
15 | 


--------------------------------------------------------------------------------
/python_docs/source/causal.rst:
--------------------------------------------------------------------------------
 1 | Causal Inference
 2 | ================
 3 | 
 4 | This vignette provides a quick overview (using simulated data) of how to use ``stochtree`` for causal inference.
 5 | Start by loading stochtree's ``BCFModel`` class and a number of other packages.
 6 | 
 7 | .. code-block:: python
 8 | 
 9 |     import numpy as np
10 |     import pandas as pd
11 |     import seaborn as sns
12 |     import matplotlib.pyplot as plt
13 |     from stochtree import BCFModel
14 |     from sklearn.model_selection import train_test_split
15 | 
16 | Now, we generate a simulated causal inference problem
17 | 
18 | .. code-block:: python
19 | 
20 |     # RNG
21 |     random_seed = 101
22 |     rng = np.random.default_rng(random_seed)
23 | 
24 |     # Generate covariates and basis
25 |     n = 1000
26 |     p_X = 5
27 |     X = rng.uniform(0, 1, (n, p_X))
28 |     pi_X = 0.25 + 0.5*X[:,0]
29 |     Z = rng.binomial(1, pi_X, n).astype(float)
30 | 
31 |     # Define the outcome mean functions (prognostic and treatment effects)
32 |     mu_X = pi_X*5
33 |     # tau_X = np.sin(X[:,1]*2*np.pi)
34 |     tau_X = X[:,1]*2
35 | 
36 |     # Generate outcome
37 |     epsilon = rng.normal(0, 1, n)
38 |     y = mu_X + tau_X*Z + epsilon
39 | 
40 | Split the dataset into train and test sets
41 | 
42 | .. code-block:: python
43 | 
44 |     sample_inds = np.arange(n)
45 |     train_inds, test_inds = train_test_split(sample_inds, test_size=0.5)
46 |     X_train = X[train_inds,:]
47 |     X_test = X[test_inds,:]
48 |     Z_train = Z[train_inds]
49 |     Z_test = Z[test_inds]
50 |     y_train = y[train_inds]
51 |     y_test = y[test_inds]
52 |     pi_train = pi_X[train_inds]
53 |     pi_test = pi_X[test_inds]
54 |     mu_train = mu_X[train_inds]
55 |     mu_test = mu_X[test_inds]
56 |     tau_train = tau_X[train_inds]
57 |     tau_test = tau_X[test_inds]
58 | 
59 | Initialize and run a BCF sampler for 1000 iterations (after 10 "warm-start" draws)
60 | 
61 | .. code-block:: python
62 | 
63 |     bcf_model = BCFModel()
64 |     bcf_model.sample(X_train, Z_train, y_train, pi_train, X_test, Z_test, pi_test, num_gfr=10, num_mcmc=1000)
65 | 


--------------------------------------------------------------------------------
/python_docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # import os
 7 | # import sys
 8 | # sys.path.insert(0, os.path.abspath('../..'))
 9 | 
10 | # -- Project information -----------------------------------------------------
11 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
12 | 
13 | project = 'stochtree'
14 | copyright = '2024, Drew Herren'
15 | author = 'Drew Herren'
16 | release = '0.0.1'
17 | 
18 | # -- General configuration ---------------------------------------------------
19 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
20 | 
21 | extensions = [
22 |    'sphinx.ext.autodoc',
23 |    'sphinx.ext.autosummary',
24 | ]
25 | 
26 | templates_path = ['_templates']
27 | exclude_patterns = []
28 | 
29 | 
30 | 
31 | # -- Options for HTML output -------------------------------------------------
32 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
33 | 
34 | html_theme = 'furo'
35 | html_static_path = ['_static']
36 | 


--------------------------------------------------------------------------------
/python_docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | StochTree
 2 | =========
 3 | 
 4 | ``stochtree`` runs stochastic machine learning algorithms for supervised learning and causal inference.
 5 | For details on installing the package, see the :doc:`Installation <install>` page. Once you have ``stochtree`` installed, 
 6 | the :doc:`Supervised Learning <supervised>` and :doc:`Causal Inference <causal>` vignettes provide some guidance on 
 7 | using the package for your use case.
 8 | 
 9 | .. We also support a lower-level interface to the underlying C++ data structures which can allow for custom sampling routines 
10 | .. (i.e. interspersing a BART forest with a neural network, a complicated variance sampler, etc...). This interface is introduced 
11 | .. in the :doc:`Prototype <prototype>` vignette.
12 | 
13 | For complete function / class documentation, see the :doc:`API <api>` page.
14 | 
15 | .. toctree::
16 |    install
17 |    supervised
18 |    causal
19 |    api
20 | 


--------------------------------------------------------------------------------
/python_docs/source/supervised.rst:
--------------------------------------------------------------------------------
 1 | Supervised Learning
 2 | ===================
 3 | 
 4 | This vignette provides a quick overview (using simulated data) of how to use ``stochtree`` for supervised learning.
 5 | Start by loading stochtree's ``BARTModel`` class and a number of other packages.
 6 | 
 7 | .. code-block:: python
 8 | 
 9 |     import numpy as np
10 |     import pandas as pd
11 |     import seaborn as sns
12 |     import matplotlib.pyplot as plt
13 |     from stochtree import BARTModel
14 |     from sklearn.model_selection import train_test_split
15 | 
16 | Now, we generate a simulated prediction problem
17 | 
18 | .. code-block:: python
19 | 
20 |     # RNG
21 |     random_seed = 1234
22 |     rng = np.random.default_rng(random_seed)
23 | 
24 |     # Generate covariates and basis
25 |     n = 1000
26 |     p_X = 10
27 |     p_W = 1
28 |     X = rng.uniform(0, 1, (n, p_X))
29 |     W = rng.uniform(0, 1, (n, p_W))
30 | 
31 |     # Define the outcome mean function
32 |     def outcome_mean(X, W):
33 |         return np.where(
34 |             (X[:,0] >= 0.0) & (X[:,0] < 0.25), -7.5 * W[:,0], 
35 |             np.where(
36 |                 (X[:,0] >= 0.25) & (X[:,0] < 0.5), -2.5 * W[:,0], 
37 |                 np.where(
38 |                     (X[:,0] >= 0.5) & (X[:,0] < 0.75), 2.5 * W[:,0], 
39 |                     7.5 * W[:,0]
40 |                 )
41 |             )
42 |         )
43 | 
44 |     # Generate outcome
45 |     epsilon = rng.normal(0, 1, n)
46 |     y = outcome_mean(X, W) + epsilon
47 | 
48 |     # Standardize outcome
49 |     y_bar = np.mean(y)
50 |     y_std = np.std(y)
51 |     resid = (y-y_bar)/y_std
52 | 
53 | Split the dataset into train and test sets
54 | 
55 | .. code-block:: python
56 | 
57 |     sample_inds = np.arange(n)
58 |     train_inds, test_inds = train_test_split(sample_inds, test_size=0.5)
59 |     X_train = X[train_inds,:]
60 |     X_test = X[test_inds,:]
61 |     basis_train = W[train_inds,:]
62 |     basis_test = W[test_inds,:]
63 |     y_train = y[train_inds]
64 |     y_test = y[test_inds]
65 | 
66 | Initialize and run a BART sampler for 100 iterations (after 10 "warm-start" draws)
67 | 
68 | .. code-block:: python
69 | 
70 |     bart_model = BARTModel()
71 |     bart_model.sample(X_train=X_train, y_train=y_train, leaf_basis_train=basis_train, X_test=X_test, leaf_basis_test=basis_test, num_gfr=10, num_mcmc=100)
72 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | exceptiongroup==1.2.1
 2 | iniconfig==2.0.0
 3 | joblib==1.4.2
 4 | numpy==1.24.4
 5 | packaging==24.1
 6 | pandas==2.0.3
 7 | pluggy==1.5.0
 8 | pybind11==2.12.0
 9 | pytest==8.2.2
10 | python-dateutil==2.9.0.post0
11 | pytz==2024.1
12 | scikit-learn==1.3.2
13 | scipy==1.10.1
14 | six==1.16.0
15 | threadpoolctl==3.5.0
16 | tomli==2.0.1
17 | tzdata==2024.1
18 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
 1 | # package root
 2 | PKGROOT=..
 3 | 
 4 | STOCHTREE_CPPFLAGS = -DSTOCHTREE_R_BUILD
 5 | 
 6 | # PKG_CPPFLAGS= -I$(PKGROOT)/include -I$(PKGROOT)/deps/eigen -I$(PKGROOT)/deps/fmt/include -I$(PKGROOT)/deps/fast_double_parser/include -I$(PKGROOT)/deps/boost_math/include $(STOCHTREE_CPPFLAGS)
 7 | PKG_CPPFLAGS= -I$(PKGROOT)/include -I$(PKGROOT)/deps/eigen -I$(PKGROOT)/deps/fmt/include -I$(PKGROOT)/deps/fast_double_parser/include $(STOCHTREE_CPPFLAGS)
 8 | 
 9 | CXX_STD=CXX17
10 | 
11 | OBJECTS = \
12 |     forest.o \
13 |     kernel.o \
14 |     R_data.o \
15 |     R_random_effects.o \
16 |     sampler.o \
17 |     serialization.o \
18 |     cpp11.o \
19 |     container.o \
20 |     cutpoint_candidates.o \
21 |     data.o \
22 |     io.o \
23 |     leaf_model.o \
24 |     partition_tracker.o \
25 |     random_effects.o \
26 |     tree.o
27 | 


--------------------------------------------------------------------------------
/src/kernel.cpp:
--------------------------------------------------------------------------------
 1 | #include <cpp11.hpp>
 2 | #include "stochtree_types.h"
 3 | #include <stochtree/log.h>
 4 | #include <Eigen/Dense>
 5 | #include <Eigen/Dense>
 6 | #include <memory>
 7 | #include <vector>
 8 | 
 9 | typedef Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> DoubleMatrixType;
10 | typedef Eigen::Map<Eigen::Matrix<int, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> IntMatrixType;
11 | 
12 | [[cpp11::register]]
13 | int forest_container_get_max_leaf_index_cpp(cpp11::external_pointer<StochTree::ForestContainer> forest_container, int forest_num) {
14 |     return forest_container->GetEnsemble(forest_num)->GetMaxLeafIndex() - 1;
15 | }
16 | 
17 | [[cpp11::register]]
18 | cpp11::writable::integers_matrix<> compute_leaf_indices_cpp(
19 |         cpp11::external_pointer<StochTree::ForestContainer> forest_container, 
20 |         cpp11::doubles_matrix<> covariates, cpp11::integers forest_nums
21 | ) {
22 |     // Wrap an Eigen Map around the raw data of the covariate matrix
23 |     StochTree::data_size_t num_obs = covariates.nrow();
24 |     int num_covariates = covariates.ncol();
25 |     double* covariate_data_ptr = REAL(PROTECT(covariates));
26 |     DoubleMatrixType covariates_eigen(covariate_data_ptr, num_obs, num_covariates);
27 |     
28 |     // Extract other output dimensions
29 |     int num_trees = forest_container->NumTrees();
30 |     int num_samples = forest_nums.size();
31 | 
32 |     // Declare outputs
33 |     cpp11::writable::integers_matrix<> output_matrix(num_obs*num_trees, num_samples);
34 | 
35 |     // Wrap Eigen Maps around kernel and kernel inverse matrices
36 |     int* output_data_ptr = INTEGER(PROTECT(output_matrix));
37 |     IntMatrixType output_eigen(output_data_ptr, num_obs*num_trees, num_samples);
38 |     
39 |     // Compute leaf indices
40 |     std::vector<int> forest_indices(forest_nums.begin(), forest_nums.end());
41 |     forest_container->PredictLeafIndicesInplace(covariates_eigen, output_eigen, forest_indices, num_trees, num_obs);
42 | 
43 |     // Unprotect pointers to R data
44 |     UNPROTECT(2);
45 |     
46 |     // Return matrix
47 |     return output_matrix;
48 | }
49 | 


--------------------------------------------------------------------------------
/src/stochtree_types.h:
--------------------------------------------------------------------------------
 1 | #include <stochtree/container.h>
 2 | #include <stochtree/data.h>
 3 | #include <stochtree/leaf_model.h>
 4 | #include <stochtree/meta.h>
 5 | #include <stochtree/partition_tracker.h>
 6 | #include <stochtree/random_effects.h>
 7 | #include <stochtree/tree_sampler.h>
 8 | 
 9 | enum ForestLeafModel {
10 |     kConstant, 
11 |     kUnivariateRegression, 
12 |     kMultivariateRegression
13 | };
14 | 


--------------------------------------------------------------------------------
/stochtree/__init__.py:
--------------------------------------------------------------------------------
 1 | from .bart import BARTModel
 2 | from .bcf import BCFModel
 3 | from .calibration import calibrate_global_error_variance
 4 | from .config import ForestModelConfig, GlobalModelConfig
 5 | from .data import Dataset, Residual
 6 | from .forest import Forest, ForestContainer
 7 | from .kernel import (
 8 |     compute_forest_leaf_indices, 
 9 |     compute_forest_max_leaf_index
10 | )
11 | from .preprocessing import CovariatePreprocessor
12 | from .random_effects import (
13 |     RandomEffectsContainer, 
14 |     RandomEffectsDataset, 
15 |     RandomEffectsModel, 
16 |     RandomEffectsTracker, 
17 | )
18 | from .sampler import (
19 |     RNG, 
20 |     ForestSampler, 
21 |     GlobalVarianceModel, 
22 |     LeafVarianceModel
23 | )
24 | from .serialization import JSONSerializer
25 | from .utils import (
26 |     NotSampledError,
27 |     _check_array_integer,
28 |     _check_array_numeric,
29 |     _check_is_int,
30 |     _check_is_numeric,
31 |     _check_matrix_square,
32 |     _standardize_array_to_list,
33 |     _standardize_array_to_np,
34 | )
35 | 
36 | __all__ = [
37 |     "BARTModel",
38 |     "BCFModel",
39 |     "Dataset",
40 |     "Residual",
41 |     "ForestContainer",
42 |     "Forest",
43 |     "CovariatePreprocessor",
44 |     "RNG",
45 |     "ForestSampler",
46 |     "RandomEffectsContainer", 
47 |     "RandomEffectsDataset", 
48 |     "RandomEffectsModel", 
49 |     "RandomEffectsTracker", 
50 |     "GlobalVarianceModel",
51 |     "LeafVarianceModel",
52 |     "ForestModelConfig",
53 |     "GlobalModelConfig",
54 |     "JSONSerializer",
55 |     "NotSampledError",
56 |     "_check_array_integer",
57 |     "_check_array_numeric",
58 |     "_check_is_int",
59 |     "_check_is_numeric",
60 |     "_check_matrix_square",
61 |     "_standardize_array_to_list",
62 |     "_standardize_array_to_np",
63 |     "compute_forest_leaf_indices",
64 |     "compute_forest_max_leaf_index", 
65 |     "calibrate_global_error_variance",
66 | ]
67 | 


--------------------------------------------------------------------------------
/test/R/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
 7 | # * https://testthat.r-lib.org/articles/special-files.html
 8 | 
 9 | library(testthat)
10 | library(stochtree)
11 | 
12 | test_check("stochtree")
13 | 


--------------------------------------------------------------------------------
/test/README.md:
--------------------------------------------------------------------------------
 1 | # Unit Testing
 2 | 
 3 | This directory contains unit tests for the R and Python packages as well as the C++ core. Below, we detail how to run each test suite.
 4 | 
 5 | ## R Package
 6 | 
 7 | To run the R unit tests, first build the package (either via `R CMD build` at the command line or via "Shift + Command + B" in RStudio).
 8 | Then in an R console, run `testthat::test_dir("test/R")`.
 9 | 
10 | ## Python Package
11 | 
12 | To run the Python unit tests, first build the package at the command line (activating your virtual environment, if desired, beforehand):
13 | 
14 | ```{bash}
15 | rm -rf stochtree.egg-info; rm -rf .pytest_cache; rm -rf build
16 | pip install . 
17 | ```
18 | 
19 | Then run 
20 | 
21 | ```{bash}
22 | pytest test/python
23 | ```
24 | 
25 | ## C++ Core
26 | 
27 | To run the C++ unit tests, you must build the test executable, which is activated via the `BUILD_TEST` CMake option
28 | 
29 | ```{bash}
30 | rm -rf build                                                 
31 | mkdir build
32 | cmake -S . -B build -DBUILD_TEST=ON -DBUILD_DEBUG_TARGETS=OFF
33 | cmake --build build
34 | ```
35 | 
36 | Then run the unit test suite by running the test executable
37 | 
38 | ```{bash}
39 | ./build/teststochtree
40 | ```
41 | 


--------------------------------------------------------------------------------
/test/cpp/test_category_tracker.cpp:
--------------------------------------------------------------------------------
 1 | #include <gtest/gtest.h>
 2 | #include <testutils.h>
 3 | #include <stochtree/category_tracker.h>
 4 | #include <stochtree/data.h>
 5 | #include <stochtree/log.h>
 6 | #include <stochtree/partition_tracker.h>
 7 | #include <stochtree/tree.h>
 8 | #include <iostream>
 9 | #include <memory>
10 | #include <vector>
11 | 
12 | TEST(CategorySampleTracker, BasicOperations) {
13 |   // Create a vector of categorical data
14 |   std::vector<int32_t> category_data {
15 |     3, 4, 3, 2, 2, 4, 3, 3, 3, 4, 3, 4
16 |   };
17 | 
18 |   // Create a CategorySamplerTracker
19 |   StochTree::CategorySampleTracker category_tracker = StochTree::CategorySampleTracker(category_data);
20 | 
21 |   // Extract the label map
22 |   std::map<int32_t, int32_t> label_map = category_tracker.GetLabelMap();
23 |   std::map<int32_t, int32_t> expected_label_map {{2, 0}, {3, 1}, {4, 2}};
24 | 
25 |   // Check that the map was constructed as expected
26 |   ASSERT_EQ(label_map[2], 0);
27 |   ASSERT_EQ(label_map[3], 1);
28 |   ASSERT_EQ(label_map[4], 2);
29 |   ASSERT_EQ(label_map, expected_label_map);
30 | }
31 | 


--------------------------------------------------------------------------------
/test/cpp/testutils.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | #ifndef STOCHTREE_TESTUTILS_H_
 6 | #define STOCHTREE_TESTUTILS_H_
 7 | 
 8 | #include <Eigen/Dense>
 9 | #include <stochtree/random.h>
10 | #include <vector>
11 | 
12 | namespace StochTree {
13 | 
14 | namespace TestUtils {
15 | 
16 | struct TestDataset {
17 |   Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> covariates;
18 |   Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> omega;
19 |   Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> rfx_basis;
20 |   Eigen::VectorXd outcome;
21 |   std::vector<int32_t> rfx_groups;
22 |   int n;
23 |   int x_cols;
24 |   int omega_cols;
25 |   int rfx_basis_cols;
26 |   int rfx_num_groups;
27 |   bool row_major{true};
28 | };
29 | 
30 | /*! Creates a small dataset (10 observations) */
31 | TestDataset LoadSmallDatasetUnivariateBasis();
32 | 
33 | /*! Creates a small dataset (10 observations) with a multivariate basis for leaf regression applications */
34 | TestDataset LoadSmallDatasetMultivariateBasis();
35 | 
36 | /*! Creates a small dataset (10 observations) with a multivariate basis and several random effects terms */
37 | TestDataset LoadSmallRFXDatasetMultivariateBasis();
38 | 
39 | /*! Creates a modest dataset (100 observations) */
40 | TestDataset LoadMediumDatasetUnivariateBasis();
41 | 
42 | } // namespace TestUtils
43 | 
44 | } // namespace StochTree
45 | 
46 | #endif  // STOCHTREE_TESTUTILS_H_
47 | 


--------------------------------------------------------------------------------
/test/python/test_calibration.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from scipy.stats import gamma
 4 | from sklearn import linear_model
 5 | from sklearn.metrics import mean_squared_error
 6 | 
 7 | from stochtree import calibrate_global_error_variance
 8 | 
 9 | 
10 | class TestCalibration:
11 |     def test_full_rank(self):
12 |         n = 100
13 |         p = 5
14 |         nu = 3
15 |         q = 0.9
16 |         X = np.random.uniform(size=(n, p))
17 |         y = 1 + X[:, 0] * 0.1 - X[:, 1] * 0.2 + np.random.normal(size=n)
18 |         y_std = (y - np.mean(y)) / np.std(y)
19 |         reg_model = linear_model.LinearRegression()
20 |         reg_model.fit(X, y_std)
21 |         mse = mean_squared_error(y_std, reg_model.predict(X))
22 |         lamb = calibrate_global_error_variance(X=X, y=y, nu=nu, q=q, standardize=True)
23 |         assert lamb == pytest.approx((mse * gamma.ppf(1 - q, nu)) / nu)
24 | 
25 |     def test_rank_deficient(self):
26 |         n = 100
27 |         p = 5
28 |         nu = 3
29 |         q = 0.9
30 |         X = np.random.uniform(size=(n, p))
31 |         X[:, 4] = X[:, 2]
32 |         y = 1 + X[:, 0] * 0.1 - X[:, 1] * 0.2 + np.random.normal(size=n)
33 |         y_std = (y - np.mean(y)) / np.std(y)
34 |         reg_model = linear_model.LinearRegression()
35 |         reg_model.fit(X, y_std)
36 |         mse = mean_squared_error(y_std, reg_model.predict(X))
37 |         if reg_model.rank_ < p:
38 |             with pytest.warns(UserWarning):
39 |                 lamb = calibrate_global_error_variance(
40 |                     X=X, y=y, nu=nu, q=q, standardize=True
41 |                 )
42 |         else:
43 |             lamb = calibrate_global_error_variance(
44 |                 X=X, y=y, nu=nu, q=q, standardize=True
45 |             )
46 |         assert lamb == pytest.approx((mse * gamma.ppf(1 - q, nu)) / nu)
47 | 
48 |     def test_overdetermined(self):
49 |         n = 100
50 |         p = 101
51 |         nu = 3
52 |         q = 0.9
53 |         X = np.random.uniform(size=(n, p))
54 |         y = 1 + X[:, 0] * 0.1 - X[:, 1] * 0.2 + np.random.normal(size=n)
55 |         y_std = (y - np.mean(y)) / np.std(y)
56 |         reg_model = linear_model.LinearRegression()
57 |         reg_model.fit(X, y_std)
58 |         with pytest.warns(UserWarning):
59 |             lamb = calibrate_global_error_variance(
60 |                 X=X, y=y, nu=nu, q=q, standardize=True
61 |             )
62 |         assert lamb == pytest.approx(np.var(y) * (gamma.ppf(1 - q, nu)) / nu)
63 | 


--------------------------------------------------------------------------------
/test/python/test_config.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from stochtree.config import ForestModelConfig, GlobalModelConfig
 5 | 
 6 | 
 7 | class TestConfig:
 8 |     def test_forest_config(self):
 9 |         with pytest.warns():
10 |             _ = ForestModelConfig(num_trees=10, num_features=5, num_observations=100)
11 |             _ = ForestModelConfig(num_trees=1, num_features=1, num_observations=1)
12 |             _ = ForestModelConfig(
13 |                 num_trees=10,
14 |                 num_features=5,
15 |                 num_observations=100,
16 |                 feature_types=[0, 0, 0, 0, 1],
17 |             )
18 |             _ = ForestModelConfig(
19 |                 num_trees=1, num_features=1, num_observations=1, feature_types=[2]
20 |             )
21 |             _ = ForestModelConfig(
22 |                 num_trees=10,
23 |                 num_features=5,
24 |                 num_observations=100,
25 |                 variable_weights=[0.2, 0.2, 0.2, 0.2, 0.2],
26 |             )
27 |             _ = ForestModelConfig(
28 |                 num_trees=1, num_features=1, num_observations=1, variable_weights=[1.0]
29 |             )
30 | 
31 |         with pytest.raises(ValueError):
32 |             _ = ForestModelConfig()
33 |             _ = ForestModelConfig(
34 |                 num_trees=10,
35 |                 num_features=6,
36 |                 num_observations=100,
37 |                 feature_types=[0, 0, 0, 0, 1],
38 |             )
39 |             _ = ForestModelConfig(
40 |                 num_trees=10,
41 |                 num_features=1,
42 |                 num_observations=100,
43 |                 feature_types=[0, 0, 0, 0, 1],
44 |             )
45 |             _ = ForestModelConfig(
46 |                 num_trees=10,
47 |                 num_features=6,
48 |                 num_observations=100,
49 |                 variable_weights=[0.2, 0.2, 0.2, 0.2, 0.2],
50 |             )
51 |             _ = ForestModelConfig(
52 |                 num_trees=10,
53 |                 num_features=1,
54 |                 num_observations=100,
55 |                 variable_weight=[0.2, 0.2, 0.2, 0.2, 0.2],
56 |             )
57 |             _ = ForestModelConfig(
58 |                 num_trees=10,
59 |                 num_features=1,
60 |                 num_observations=100,
61 |                 leaf_dimension=2,
62 |                 leaf_model_scale=np.array([2, 3], [3, 4], [5, 6]),
63 |             )
64 |             _ = ForestModelConfig(
65 |                 num_trees=10, num_features=1, num_observations=100, leaf_model_type=4
66 |             )
67 |             _ = ForestModelConfig(
68 |                 num_trees=10, num_features=1, num_observations=100, leaf_model_type=-1
69 |             )
70 | 
71 |     def test_global_config(self):
72 |         with pytest.raises(ValueError):
73 |             _ = GlobalModelConfig(global_error_variance=0.0)
74 |             _ = GlobalModelConfig(global_error_variance=-1.0)
75 | 


--------------------------------------------------------------------------------
/test/python/test_kernel.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from stochtree import (
 5 |     Dataset,
 6 |     Forest,
 7 |     ForestContainer,
 8 |     compute_forest_leaf_indices, 
 9 |     compute_forest_max_leaf_index
10 | )
11 | 
12 | 
13 | class TestKernel:
14 |     def test_forest(self):
15 |         # Create dataset
16 |         X = np.array(
17 |             [[1.5, 8.7, 1.2],
18 |              [2.7, 3.4, 5.4],
19 |              [3.6, 1.2, 9.3],
20 |              [4.4, 5.4, 10.4],
21 |              [5.3, 9.3, 3.6],
22 |              [6.1, 10.4, 4.4]]
23 |         )
24 |         n, p = X.shape
25 |         num_trees = 2
26 |         output_dim = 1
27 |         forest_dataset = Dataset()
28 |         forest_dataset.add_covariates(X)
29 |         forest_samples = ForestContainer(num_trees, output_dim, True, False)
30 | 
31 |         # Initialize a forest with constant root predictions
32 |         forest_samples.add_sample(0.)
33 | 
34 |         # Split the root of the first tree in the ensemble at X[,1] > 4.0
35 |         forest_samples.add_numeric_split(0, 0, 0, 0, 4.0, -5., 5.)
36 | 
37 |         # Check that regular and "raw" predictions are the same (since the leaf is constant)
38 |         computed = compute_forest_leaf_indices(forest_samples, X)
39 |         max_leaf_index = compute_forest_max_leaf_index(forest_samples)
40 |         expected = np.array([
41 |             [0], 
42 |             [0], 
43 |             [0], 
44 |             [1], 
45 |             [1], 
46 |             [1],
47 |             [2], 
48 |             [2], 
49 |             [2], 
50 |             [2], 
51 |             [2], 
52 |             [2]
53 |         ])
54 |         
55 |         # Assertion
56 |         np.testing.assert_almost_equal(computed, expected)
57 |         assert max_leaf_index == [2]
58 | 
59 |         # Split the left leaf of the first tree in the ensemble at X[,2] > 4.0
60 |         forest_samples.add_numeric_split(0, 0, 1, 1, 4.0, -7.5, -2.5)
61 |         
62 |         # Check that regular and "raw" predictions are the same (since the leaf is constant)
63 |         computed = compute_forest_leaf_indices(forest_samples, X)
64 |         max_leaf_index = compute_forest_max_leaf_index(forest_samples)
65 |         expected = np.array([
66 |             [2], 
67 |             [1], 
68 |             [1], 
69 |             [0], 
70 |             [0], 
71 |             [0],
72 |             [3], 
73 |             [3], 
74 |             [3], 
75 |             [3], 
76 |             [3], 
77 |             [3]
78 |         ])
79 |         
80 |         # Assertion
81 |         np.testing.assert_almost_equal(computed, expected)
82 |         assert max_leaf_index == [3]
83 | 


--------------------------------------------------------------------------------
/tools/debug/bart_profile.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ## Profiling BART on multiple platforms
 3 | ################################################################################
 4 | 
 5 | library(stochtree)
 6 | Rprof()
 7 | 
 8 | start_time <- Sys.time()
 9 | n <- 10000
10 | p <- 50
11 | X <- matrix(runif(n*p), ncol = p)
12 | f_XW <- (
13 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
14 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
15 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
16 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
17 | )
18 | noise_sd <- 1
19 | y <- f_XW + rnorm(n, 0, noise_sd)
20 | test_set_pct <- 0.2
21 | n_test <- round(test_set_pct*n)
22 | n_train <- n - n_test
23 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
24 | train_inds <- (1:n)[!((1:n) %in% test_inds)]
25 | X_test <- X[test_inds,]
26 | X_train <- X[train_inds,]
27 | y_test <- y[test_inds]
28 | y_train <- y[train_inds]
29 | bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test)
30 | end_time <- Sys.time()
31 | print(paste("runtime:", end_time - start_time))
32 | 
33 | summaryRprof()
34 | Rprof(NULL)
35 | 


--------------------------------------------------------------------------------
/tools/debug/continuous_treatment_bcf.R:
--------------------------------------------------------------------------------
 1 | library(stochtree)
 2 | 
 3 | # Generate data with a continuous treatment
 4 | n <- 500
 5 | snr <- 3
 6 | x1 <- rnorm(n)
 7 | x2 <- rnorm(n)
 8 | x3 <- rnorm(n)
 9 | x4 <- rnorm(n)
10 | x5 <- rnorm(n)
11 | X <- cbind(x1,x2,x3,x4,x5)
12 | p <- ncol(X)
13 | mu_x <- 1 + 2*x1 - 4*(x2 < 0) + 4*(x2 >= 0) + 3*(abs(x3) - sqrt(2/pi))
14 | tau_x <- 1 + 2*x4
15 | u <- runif(n)
16 | pi_x <- ((mu_x-1)/4) + 4*(u-0.5)
17 | Z <- pi_x + rnorm(n,0,1)
18 | E_XZ <- mu_x + Z*tau_x
19 | y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr)
20 | X <- as.data.frame(X)
21 | 
22 | # Split data into test and train sets
23 | test_set_pct <- 0.2
24 | n_test <- round(test_set_pct*n)
25 | n_train <- n - n_test
26 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
27 | train_inds <- (1:n)[!((1:n) %in% test_inds)]
28 | X_test <- X[test_inds,]
29 | X_train <- X[train_inds,]
30 | pi_test <- pi_x[test_inds]
31 | pi_train <- pi_x[train_inds]
32 | Z_test <- Z[test_inds]
33 | Z_train <- Z[train_inds]
34 | y_test <- y[test_inds]
35 | y_train <- y[train_inds]
36 | mu_test <- mu_x[test_inds]
37 | mu_train <- mu_x[train_inds]
38 | tau_test <- tau_x[test_inds]
39 | tau_train <- tau_x[train_inds]
40 | 
41 | # Run continuous treatment BCF
42 | num_gfr <- 10
43 | num_burnin <- 0
44 | num_mcmc <- 1000
45 | num_samples <- num_gfr + num_burnin + num_mcmc
46 | bcf_model_warmstart <- bcf(
47 |     X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, 
48 |     X_test = X_test, Z_test = Z_test, pi_test = pi_test, 
49 |     num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
50 |     sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F, verbose = T
51 | )
52 | 
53 | # Inspect results
54 | mu_hat_train <- rowMeans(bcf_model_warmstart$mu_hat_train)
55 | tau_hat_train <- rowMeans(bcf_model_warmstart$tau_hat_train)
56 | mu_hat_test <- rowMeans(bcf_model_warmstart$mu_hat_test)
57 | tau_hat_test <- rowMeans(bcf_model_warmstart$tau_hat_test)
58 | plot(mu_train, mu_hat_train); abline(0,1,lwd=3,lty=3,col="red")
59 | plot(tau_train, tau_hat_train); abline(0,1,lwd=3,lty=3,col="red")
60 | plot(mu_test, mu_hat_test); abline(0,1,lwd=3,lty=3,col="red")
61 | plot(tau_test, tau_hat_test); abline(0,1,lwd=3,lty=3,col="red")
62 | 


--------------------------------------------------------------------------------
/tools/debug/dgps.R:
--------------------------------------------------------------------------------
 1 | dgp_levels <- c("dgp_prediction_partitioned_lm", "dgp_prediction_step_function")
 2 | 
 3 | dgp_prediction_partitioned_lm <- function(n, p_x, p_w, snr = NULL) {
 4 |     X <- matrix(runif(n*p_x), ncol = p_x)
 5 |     W <- matrix(runif(n*p_w), ncol = p_w)
 6 |     f_XW <- (
 7 |         ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + 
 8 |         ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + 
 9 |         ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + 
10 |         ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1])
11 |     )
12 |     if (!is.null(snr)) {
13 |         if (snr > 0) {
14 |             noise_sd <- sd(f_XW) / snr
15 |             snr_used <- snr
16 |         } else {
17 |             noise_sd <- 1
18 |             snr_used <- sd(f_XW) / noise_sd
19 |         }
20 |     } else {
21 |         noise_sd <- 1
22 |         snr_used <- sd(f_XW) / noise_sd
23 |     }
24 |     y <- f_XW + rnorm(n, 0, noise_sd)
25 |     return(list(has_basis=T,X=X,W=W,y=y,noise_sd=noise_sd,snr=snr_used))
26 | }
27 | 
28 | dgp_prediction_step_function <- function(n, p_x, snr = NULL) {
29 |     X <- matrix(runif(n*p_x), ncol = p_x)
30 |     f_XW <- (
31 |         ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
32 |         ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
33 |         ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
34 |         ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
35 |     )
36 |     if (!is.null(snr)) {
37 |         if (snr > 0) {
38 |             noise_sd <- sd(f_XW) / snr
39 |             snr_used <- snr
40 |         } else {
41 |             noise_sd <- 1
42 |             snr_used <- sd(f_XW) / noise_sd
43 |         }
44 |     } else {
45 |         noise_sd <- 1
46 |         snr_used <- sd(f_XW) / noise_sd
47 |     }
48 |     y <- f_XW + rnorm(n, 0, noise_sd)
49 |     return(list(has_basis=F,X=X,W=NULL,y=y,noise_sd=noise_sd,snr=snr_used))
50 | }
51 | 


--------------------------------------------------------------------------------
/tools/debug/heteroskedastic_bart.R:
--------------------------------------------------------------------------------
 1 | # Load libraries
 2 | library(stochtree)
 3 | library(here)
 4 | 
 5 | # Load train and test data
 6 | from_file <- T
 7 | if (from_file) {
 8 |     project_dir <- here()
 9 |     train_set_path <- file.path(project_dir, "debug", "data", "heterosked_train.csv")
10 |     test_set_path <- file.path(project_dir, "debug", "data", "heterosked_test.csv")
11 |     train_df <- read.csv(train_set_path)
12 |     test_df <- read.csv(test_set_path)
13 |     y_train <- train_df[,1]
14 |     y_test <- test_df[,1]
15 |     X_train <- train_df[,2:11]
16 |     X_test <- test_df[,2:11]
17 |     f_x_train <- train_df[,12]
18 |     f_x_test <- test_df[,12]
19 |     s_x_train <- train_df[,13]
20 |     s_x_test <- test_df[,13]
21 | } else {
22 |     n <- 500
23 |     p_x <- 10
24 |     X <- matrix(runif(n*p_x), ncol = p_x)
25 |     f_XW <- 0
26 |     s_XW <- (
27 |         ((0 <= X[,1]) & (0.25 > X[,1])) * (0.5*X[,3]) + 
28 |         ((0.25 <= X[,1]) & (0.5 > X[,1])) * (1*X[,3]) + 
29 |         ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2*X[,3]) + 
30 |         ((0.75 <= X[,1]) & (1 > X[,1])) * (3*X[,3])
31 |     )
32 |     y <- f_XW + rnorm(n, 0, 1)*s_XW
33 |     
34 |     # Split data into test and train sets
35 |     test_set_pct <- 0.2
36 |     n_test <- round(test_set_pct*n)
37 |     n_train <- n - n_test
38 |     test_inds <- sort(sample(1:n, n_test, replace = FALSE))
39 |     train_inds <- (1:n)[!((1:n) %in% test_inds)]
40 |     X_test <- as.data.frame(X[test_inds,])
41 |     X_train <- as.data.frame(X[train_inds,])
42 |     W_test <- NULL
43 |     W_train <- NULL
44 |     y_test <- y[test_inds]
45 |     y_train <- y[train_inds]
46 |     f_x_test <- f_XW[test_inds]
47 |     f_x_train <- f_XW[train_inds]
48 |     s_x_test <- s_XW[test_inds]
49 |     s_x_train <- s_XW[train_inds]
50 | }
51 | 
52 | # Run BART
53 | num_gfr <- 10
54 | num_burnin <- 0
55 | num_mcmc <- 200
56 | num_samples <- num_gfr + num_burnin + num_mcmc
57 | m <- 50
58 | a_0 <- sqrt(1/2)
59 | sigma0 <- 1/2
60 | bart_model <- stochtree::bart(
61 |     X_train = X_train, y_train = y_train, X_test = X_test,
62 |     num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc,
63 |     num_trees_mean = 0, num_trees_variance = m,
64 |     alpha_mean = 0.8, beta_mean = 3, min_samples_leaf_mean = 5,
65 |     max_depth_mean = 3, alpha_variance = 0.95, beta_variance = 1.25,
66 |     min_samples_leaf_variance = 1, max_depth_variance = 10,
67 |     sample_sigma = F, sample_tau = F, keep_gfr = T, sigma2_init = sigma0, 
68 |     # a_forest = m/(a_0^2) + 1, b_forest = m/(a_0^2)
69 |     a_forest = 3, b_forest = 2
70 | )
71 | 
72 | s_x_hat_train <- rowMeans(bart_model$sigma_x_hat_train)
73 | plot(s_x_hat_train, s_x_train, main = "Conditional std dev as a function of x", xlab = "Predicted", ylab = "Actual"); abline(0,1,col="red",lty=3,lwd=3)
74 | sqrt(mean((s_x_hat_train - s_x_train)^2))
75 | 
76 | s_x_hat_test <- rowMeans(bart_model$sigma_x_hat_test)
77 | plot(s_x_hat_test, s_x_test, main = "Conditional std dev as a function of x", xlab = "Predicted", ylab = "Actual"); abline(0,1,col="red",lty=3,lwd=3)
78 | sqrt(mean((s_x_hat_test - s_x_test)^2))
79 | 


--------------------------------------------------------------------------------
/tools/debug/multichain_seq.R:
--------------------------------------------------------------------------------
 1 | library(stochtree)
 2 | n <- 500
 3 | p_x <- 10
 4 | p_w <- 1
 5 | snr <- 3
 6 | X <- matrix(runif(n*p_x), ncol = p_x)
 7 | W <- matrix(runif(n*p_w), ncol = p_w)
 8 | f_XW <- (
 9 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + 
10 |         ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + 
11 |         ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + 
12 |         ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1])
13 | )
14 | noise_sd <- sd(f_XW) / snr
15 | y <- f_XW + rnorm(n, 0, 1)*noise_sd
16 | test_set_pct <- 0.2
17 | n_test <- round(test_set_pct*n)
18 | n_train <- n - n_test
19 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
20 | train_inds <- (1:n)[!((1:n) %in% test_inds)]
21 | X_test <- as.data.frame(X[test_inds,])
22 | X_train <- as.data.frame(X[train_inds,])
23 | W_test <- W[test_inds,]
24 | W_train <- W[train_inds,]
25 | y_test <- y[test_inds]
26 | y_train <- y[train_inds]
27 | num_chains <- 4
28 | num_gfr <- 10
29 | num_burnin <- 0
30 | num_mcmc <- 100
31 | num_trees <- 100
32 | bart_models <- list()
33 | for (i in 1:num_chains) {
34 |     bart_models[[i]] <- stochtree::bart(
35 |         X_train = X_train, W_train = W_train, y_train = y_train, 
36 |         X_test = X_test, W_test = W_test, num_trees = num_trees, 
37 |         num_gfr = num_gfr, num_burnin = num_burnin, 
38 |         num_mcmc = num_mcmc, sample_sigma = T, sample_tau = T
39 |     )
40 | }
41 | json_string_list <- list()
42 | for (i in 1:num_chains) {
43 |     json_string_list[[i]] <- saveBARTModelToJsonString(bart_models[[i]])
44 | }
45 | combined_forests <- loadForestContainerCombinedJsonString(json_string_list, "forest_0")
46 | test_dataset <- createForestDataset(as.matrix(X_test), W_test)
47 | yhat_combined <- combined_forests$predict(test_dataset)


--------------------------------------------------------------------------------
/tools/debug/multivariate_bart_debug.R:
--------------------------------------------------------------------------------
 1 | library(stochtree)
 2 | 
 3 | # Generate the data
 4 | n <- 500
 5 | p_x <- 10
 6 | p_w <- 2
 7 | snr <- 3
 8 | X <- matrix(runif(n*p_x), ncol = p_x)
 9 | W <- matrix(runif(n*p_w), ncol = p_w)
10 | f_XW <- (
11 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + 
12 |         ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + 
13 |         ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + 
14 |         ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1])
15 | )
16 | noise_sd <- sd(f_XW) / snr
17 | y <- f_XW + rnorm(n, 0, 1)*noise_sd
18 | 
19 | # Split data into test and train sets
20 | test_set_pct <- 0.2
21 | n_test <- round(test_set_pct*n)
22 | n_train <- n - n_test
23 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
24 | train_inds <- (1:n)[!((1:n) %in% test_inds)]
25 | X_test <- as.data.frame(X[test_inds,])
26 | X_train <- as.data.frame(X[train_inds,])
27 | W_test <- W[test_inds,]
28 | W_train <- W[train_inds,]
29 | y_test <- y[test_inds]
30 | y_train <- y[train_inds]
31 | 
32 | # Sample BART model
33 | num_gfr <- 10
34 | num_burnin <- 0
35 | num_mcmc <- 100
36 | num_samples <- num_gfr + num_burnin + num_mcmc
37 | bart_params <- list(sample_sigma_global = T, sample_sigma_leaf = F, num_trees_mean = 100)
38 | bart_model_warmstart <- stochtree::bart(
39 |     X_train = X_train, W_train = W_train, y_train = y_train, X_test = X_test, W_test = W_test, 
40 |     num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
41 |     params = bart_params
42 | )
43 | 


--------------------------------------------------------------------------------
/tools/debug/python_comparison_debug.R:
--------------------------------------------------------------------------------
 1 | library(stochtree)
 2 | 
 3 | df <- read.csv("debug/data/heterosked_train.csv")
 4 | y <- df[,"y"]
 5 | X <- df[,c('X1','X2','X3','X4','X5','X6','X7','X8','X9','X10')]
 6 | 
 7 | num_gfr <- 0
 8 | num_burnin <- 0
 9 | num_mcmc <- 10
10 | general_params <- list(random_seed = 1234, standardize = F, sample_sigma2_global = T)
11 | bart_model <- stochtree::bart(
12 |     X_train = X, y_train = y, 
13 |     num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
14 |     general_params = general_params
15 | )
16 | 
17 | rowMeans(bart_model$y_hat_train)[1:20]
18 | bart_model$sigma2_global_samples


--------------------------------------------------------------------------------
/tools/debug/r_kernel.R:
--------------------------------------------------------------------------------
 1 | library(stochtree)
 2 | library(tgp)
 3 | 
 4 | # Generate the data, add many "noise variables"
 5 | n <- 500
 6 | p_extra <- 10
 7 | friedman.df <- friedman.1.data(n=n)
 8 | train_inds <- sort(sample(1:n, floor(0.8*n), replace = FALSE))
 9 | test_inds <- (1:n)[!((1:n) %in% train_inds)]
10 | X <- as.matrix(friedman.df)[,1:10]
11 | X <- cbind(X, matrix(runif(n*p_extra), ncol = p_extra))
12 | y <- as.matrix(friedman.df)[,12] + rnorm(n,0,1)*(sd(as.matrix(friedman.df)[,11])/2)
13 | X_train <- X[train_inds,]
14 | X_test <- X[test_inds,]
15 | y_train <- y[train_inds]
16 | y_test <- y[test_inds]
17 | 
18 | # Run BART on the data
19 | X_train <- as.data.frame(X_train)
20 | X_test <- as.data.frame(X_test)
21 | bart_params <- list(num_trees_mean=200, num_trees_variance=50)
22 | bart_model <- bart(X_train=X_train, y_train=y_train, X_test=X_test, params = bart_params, num_mcmc=1000)
23 | 
24 | # Compute leaf indices for selected samples from the mean forest
25 | leaf_mat <- computeForestLeafIndices(bart_model, X_test, forest_type = "mean", 
26 |                                      forest_inds = c(99,100))
27 | 
28 | # Compute leaf indices for all samples from the mean forest
29 | leaf_mat <- computeForestLeafIndices(bart_model, X_test, forest_type = "mean")
30 | 
31 | # Construct sparse matrix of leaf membership
32 | W <- Matrix::sparseMatrix(i=rep(1:length(y_test),200), j=leaf_mat[,forest_num] + 1, x=1)
33 | tcrossprod(W)
34 | 
35 | # Compute leaf indices for selected samples from the variance forest
36 | leaf_mat <- computeForestLeafIndices(bart_model, X_test, forest_type = "variance", 
37 |                                      forest_inds = c(99,100))
38 | 
39 | # Compute leaf indices for all samples from the variance forest
40 | leaf_mat <- computeForestLeafIndices(bart_model, X_test, forest_type = "variance")


--------------------------------------------------------------------------------
/tools/perf/bart_microbenchmark.R:
--------------------------------------------------------------------------------
 1 | library(microbenchmark)
 2 | library(stochtree)
 3 | 
 4 | # Generate data needed to train BART model
 5 | n <- 10000
 6 | p <- 20
 7 | X <- matrix(runif(n*p), ncol = p)
 8 | f_XW <- (
 9 |     ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + 
10 |     ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + 
11 |     ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + 
12 |     ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
13 | )
14 | noise_sd <- 1
15 | y <- f_XW + rnorm(n, 0, noise_sd)
16 | test_set_pct <- 0.2
17 | n_test <- round(test_set_pct*n)
18 | n_train <- n - n_test
19 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
20 | train_inds <- (1:n)[!((1:n) %in% test_inds)]
21 | X_test <- X[test_inds,]
22 | X_train <- X[train_inds,]
23 | y_test <- y[test_inds]
24 | y_train <- y[train_inds]
25 | 
26 | # Run microbenchmark
27 | bench_results <- microbenchmark(
28 |     bart(X_train = X_train, y_train = y_train, X_test = X_test, num_gfr = 10, num_mcmc = 100), 
29 |     times = 10
30 | )
31 | 


--------------------------------------------------------------------------------
/tools/perf/bcf_microbenchmark.R:
--------------------------------------------------------------------------------
 1 | library(microbenchmark)
 2 | library(stochtree)
 3 | 
 4 | # Generate data needed to train BCF
 5 | n <- 500
 6 | x1 <- rnorm(n)
 7 | x2 <- rnorm(n)
 8 | x3 <- rnorm(n)
 9 | x4 <- as.numeric(rbinom(n,1,0.5))
10 | x5 <- as.numeric(sample(1:3,n,replace=TRUE))
11 | X <- cbind(x1,x2,x3,x4,x5)
12 | p <- ncol(X)
13 | g <- function(x) {ifelse(x[,5]==1,2,ifelse(x[,5]==2,-1,4))}
14 | mu1 <- function(x) {1+g(x)+x[,1]*x[,3]}
15 | mu2 <- function(x) {1+g(x)+6*abs(x[,3]-1)}
16 | tau1 <- function(x) {rep(3,nrow(x))}
17 | tau2 <- function(x) {1+2*x[,2]*x[,4]}
18 | mu_x <- mu1(X)
19 | tau_x <- tau2(X)
20 | pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10
21 | Z <- rbinom(n,1,pi_x)
22 | E_XZ <- mu_x + Z*tau_x
23 | snr <- 4
24 | y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr)
25 | test_set_pct <- 0.2
26 | n_test <- round(test_set_pct*n)
27 | n_train <- n - n_test
28 | test_inds <- sort(sample(1:n, n_test, replace = FALSE))
29 | train_inds <- (1:n)[!((1:n) %in% test_inds)]
30 | X_test <- X[test_inds,]
31 | X_train <- X[train_inds,]
32 | pi_test <- pi_x[test_inds]
33 | pi_train <- pi_x[train_inds]
34 | Z_test <- Z[test_inds]
35 | Z_train <- Z[train_inds]
36 | y_test <- y[test_inds]
37 | y_train <- y[train_inds]
38 | mu_test <- mu_x[test_inds]
39 | mu_train <- mu_x[train_inds]
40 | tau_test <- tau_x[test_inds]
41 | tau_train <- tau_x[train_inds]
42 | 
43 | # Run microbenchmark
44 | microbenchmark(
45 |     bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, 
46 |         X_test = X_test, Z_test = Z_test, pi_test = pi_test, num_gfr = 10, 
47 |         num_mcmc = 1000, sample_sigma_leaf_tau = F)
48 | )
49 | 


--------------------------------------------------------------------------------
/tools/setup/setup_r_dependencies.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | ## This script is a modified version of the setup-r-dependencies Github action
 3 | ## for local use and debugging. The source for the action is:
 4 | ## https://github.com/r-lib/actions/blob/v2-branch/setup-r-dependencies/action.yaml
 5 | ################################################################################
 6 | 
 7 | # Set site library path
 8 | cat("::group::Set site library path\n")
 9 | if (Sys.getenv("RENV_PROJECT") != "") {
10 |     message("renv project detected, no need to set R_LIBS_SITE")
11 |     cat(sprintf("R_LIB_FOR_PAK=%s\n", .libPaths()[1]), file = Sys.getenv("GITHUB_ENV"), append = TRUE)
12 |     q("no")
13 | }
14 | lib <- .libPaths()[[1]]
15 | if (lib == "") {
16 |     lib <- file.path(dirname(.Library), "site-library")
17 |     Sys.setenv(R_LIBS_SITE = strsplit(lib, .Platform$path.sep)[[1]][[1]])
18 |     Sys.setenv(R_LIB_FOR_PAK = strsplit(lib, .Platform$path.sep)[[1]][[1]])
19 |     message("Setting R_LIBS_SITE to ", lib)
20 |     message("Setting R_LIB_FOR_PAK to ", lib)
21 | } else {
22 |     message("R_LIBS_SITE is already set to ", lib)
23 |     Sys.setenv(R_LIB_FOR_PAK = strsplit(lib, .Platform$path.sep)[[1]][[1]])
24 |     message("R_LIB_FOR_PAK is now set to ", lib)
25 | }
26 | cat("::endgroup::\n")
27 | 
28 | # Install pak
29 | cat("::group::Install pak\n")
30 | lib <- Sys.getenv("R_LIB_FOR_PAK")
31 | dir.create(lib, showWarnings = FALSE, recursive = TRUE)
32 | install.packages("pak", lib = lib, repos = sprintf(
33 |     "https://r-lib.github.io/p/pak/%s/%s/%s/%s",
34 |     "stable",
35 |     .Platform$pkgType,
36 |     R.Version()$os,
37 |     R.Version()$arch
38 | ))
39 | cat("::endgroup::\n")
40 | 
41 | # Dependency resolution
42 | cat("::group::Dependency resolution\n")
43 | cat("os-version=", sessionInfo()$running, "\n", sep = "", append = TRUE)
44 | r_version <-
45 |     if (grepl("development", R.version.string)) {
46 |         pdf(tempfile())
47 |         ge_ver <- attr(recordPlot(), "engineVersion")
48 |         dev.off()
49 |         paste0("R version ", getRversion(), " (ge:", ge_ver, "; iid:", .Internal(internalsID()), ")")
50 |     } else {
51 |         R.version.string
52 |     }
53 | cat("r-version=", r_version, "\n", sep = "", append = TRUE)
54 | needs <- sprintf("Config/Needs/%s", strsplit("", "[[:space:],]+")[[1]])
55 | deps <- strsplit("any::cpp11, any::R6, any::knitr, any::rmarkdown, any::Matrix, any::tgp, any::MASS, any::mvtnorm, any::ggplot2, any::latex2exp, any::testthat, any::sessioninfo", "[[:space:],]+")[[1]]
56 | extra_deps <- strsplit("any::testthat, any::decor, github::StochasticTree/stochtree-r", "[[:space:],]+")[[1]]
57 | dir.create("install_temp", showWarnings=FALSE)
58 | Sys.setenv("PKGCACHE_HTTP_VERSION" = "2")
59 | library(pak, lib.loc = Sys.getenv("R_LIB_FOR_PAK"))
60 | pak::lockfile_create(
61 |     c(deps, extra_deps),
62 |     lockfile = "install_temp/pkg.lock",
63 |     upgrade = FALSE,
64 |     dependencies = c(needs, "all"),
65 |     lib = NULL
66 | )
67 | cat("::endgroup::\n")
68 | cat("::group::Show Lockfile\n")
69 | writeLines(readLines("install_temp/pkg.lock"))
70 | cat("::endgroup::\n")
71 | 
72 | # Install/Update packages
73 | cat("::group::Install/update packages\n")
74 | Sys.setenv("PKGCACHE_HTTP_VERSION" = "2")
75 | library(pak, lib.loc = Sys.getenv("R_LIB_FOR_PAK"))
76 | pak::lockfile_install("install_temp/pkg.lock")
77 | 
78 | # Clean up temporary pkg.lock install directory
79 | unlink("install_temp", recursive = TRUE)
80 | cat("::endgroup::\n")
81 | 


--------------------------------------------------------------------------------