├── .clang-format ├── .github └── workflows │ ├── document_protos.yaml │ ├── publish_base_image.yaml │ ├── publish_env_image.yaml │ └── test.yaml ├── .gitignore ├── .gitmodules ├── .readthedocs.yaml ├── CMakeLists.txt ├── CONTRIBUTING.md ├── INSTALL.md ├── LICENSE ├── R ├── .gitignore ├── README.md ├── bayesmixr │ ├── DESCRIPTION │ ├── LICENSE │ ├── NAMESPACE │ ├── R │ │ ├── build_bayesmix.R │ │ ├── decoder.R │ │ ├── run_mcmc.R │ │ ├── utils.R │ │ └── zzz.R │ ├── cleanup │ ├── cleanup.win │ ├── configure │ ├── configure.win │ ├── man │ │ ├── DecodeVarint32.Rd │ │ ├── VarintDecoder.Rd │ │ ├── build_bayesmix.Rd │ │ ├── import_protobuf_messages.Rd │ │ ├── maybe_print_proto_to_file.Rd │ │ ├── read_many_proto_from_file.Rd │ │ └── run_mcmc.Rd │ └── tests │ │ ├── testthat.R │ │ └── testthat │ │ ├── test_build.R │ │ └── test_run.R └── notebooks │ └── gaussian_mix_uni.Rmd ├── README.md ├── benchmarks ├── CMakeLists.txt ├── eval_lpdf.cc ├── lpd_grid.cc ├── main.cpp ├── mcmc_runs.cc └── nnw_marg_lpdf.cc ├── cmake ├── FindSphinx.cmake ├── ProtobufUtils.cmake ├── math.cmake ├── matplotplusplus.cmake └── protobuf.cmake ├── docs ├── .gitignore ├── CMakeLists.txt ├── Doxyfile.in ├── algorithms.rst ├── collectors.rst ├── conf.py ├── hierarchies.rst ├── index.rst ├── likelihoods.rst ├── mixings.rst ├── prior_models.rst ├── protos.html ├── protos.rst ├── python_interface.rst ├── requirements.txt ├── states.rst ├── tutorial.rst ├── updaters.rst └── utils.rst ├── examples ├── CMakeLists.txt ├── fa_hierarchy │ ├── in │ │ ├── algo.asciipb │ │ ├── data.csv │ │ ├── dp_gamma.asciipb │ │ └── fa.asciipb │ ├── out │ │ └── .gitignore │ └── run.sh ├── gamma_hierarchy │ ├── gamma_likelihood.h │ ├── gamma_prior_model.h │ ├── gammagamma_hierarchy.h │ ├── gammagamma_updater.h │ └── run_gamma_gamma.cc └── tutorial │ ├── 2dplot.sh │ ├── 2drun.sh │ ├── plot.sh │ └── run.sh ├── executables ├── plot_mcmc.cc └── run_mcmc.cc ├── install-tbb.bat ├── lib ├── argparse │ └── argparse.h └── progressbar │ └── progressbar.h ├── pre-commit-config.yaml ├── python ├── .gitignore ├── README.md ├── __init__.py ├── bayesmixpy │ ├── __init__.py │ ├── build_bayesmix.py │ ├── io_utils.py │ ├── proto │ │ └── __init__.py │ ├── run.py │ └── shell_utils.py ├── notebooks │ ├── gaussian_mix_NNxIG.ipynb │ ├── gaussian_mix_multi.ipynb │ ├── gaussian_mix_uni.ipynb │ └── split_merge_benchmarking.ipynb ├── pyproject.toml ├── requirements.txt ├── scripts │ ├── __init__.py │ ├── generate_asciipb.py │ └── populate_benchmark_datasets.py ├── setup.cfg ├── setup.py └── tests │ ├── __init__.py │ ├── test_build.py │ └── test_run.py ├── resources ├── .gitignore ├── 2d │ └── .gitignore ├── algo_cond_settings.asciipb ├── algo_marg_settings.asciipb ├── bash │ ├── cleanup_tbb.sh │ ├── push_containers.sh │ └── setup_pre_commit.sh ├── benchmarks │ ├── chains │ │ └── __init__.py │ └── default_algo_params.asciipb ├── datasets │ ├── dde.csv │ ├── dde_covs.csv │ ├── dde_covs_grid.csv │ ├── dde_grid.csv │ ├── faithful.csv │ ├── faithful_grid.csv │ ├── galaxy.csv │ └── galaxy_grid.csv ├── docker │ ├── base │ │ └── Dockerfile │ ├── env │ │ └── Dockerfile │ └── test │ │ └── Dockerfile ├── logo_full.svg ├── logo_icon.svg ├── patches │ └── matplotplusplus.patch └── tutorial │ ├── .gitignore │ ├── algo.asciipb │ ├── data.csv │ ├── dp_gamma.asciipb │ ├── grid.csv │ ├── lapnig_fixed.asciipb │ ├── mfm_fixed.asciipb │ ├── nnig_ngg.asciipb │ ├── nnw_ngiw.asciipb │ └── out │ └── .gitignore ├── src ├── CMakeLists.txt ├── algorithms │ ├── CMakeLists.txt │ ├── base_algorithm.cc │ ├── base_algorithm.h │ ├── blocked_gibbs_algorithm.cc │ ├── blocked_gibbs_algorithm.h │ ├── conditional_algorithm.cc │ ├── conditional_algorithm.h │ ├── load_algorithms.h │ ├── marginal_algorithm.cc │ ├── marginal_algorithm.h │ ├── neal2_algorithm.cc │ ├── neal2_algorithm.h │ ├── neal3_algorithm.cc │ ├── neal3_algorithm.h │ ├── neal8_algorithm.cc │ ├── neal8_algorithm.h │ ├── semihdp_sampler.cc │ ├── semihdp_sampler.h │ ├── slice_sampler.cc │ ├── slice_sampler.h │ ├── split_and_merge_algorithm.cc │ └── split_and_merge_algorithm.h ├── collectors │ ├── .gitignore │ ├── CMakeLists.txt │ ├── base_collector.h │ ├── file_collector.cc │ ├── file_collector.h │ ├── memory_collector.cc │ └── memory_collector.h ├── hierarchies │ ├── CMakeLists.txt │ ├── README.md │ ├── abstract_hierarchy.h │ ├── base_hierarchy.h │ ├── fa_hierarchy.h │ ├── lapnig_hierarchy.h │ ├── likelihoods │ │ ├── CMakeLists.txt │ │ ├── abstract_likelihood.h │ │ ├── base_likelihood.h │ │ ├── fa_likelihood.cc │ │ ├── fa_likelihood.h │ │ ├── laplace_likelihood.cc │ │ ├── laplace_likelihood.h │ │ ├── likelihood_internal.h │ │ ├── multi_norm_likelihood.cc │ │ ├── multi_norm_likelihood.h │ │ ├── states │ │ │ ├── CMakeLists.txt │ │ │ ├── base_state.h │ │ │ ├── fa_state.h │ │ │ ├── includes.h │ │ │ ├── multi_ls_state.h │ │ │ ├── uni_lin_reg_ls_state.h │ │ │ └── uni_ls_state.h │ │ ├── uni_lin_reg_likelihood.cc │ │ ├── uni_lin_reg_likelihood.h │ │ ├── uni_norm_likelihood.cc │ │ └── uni_norm_likelihood.h │ ├── lin_reg_uni_hierarchy.h │ ├── load_hierarchies.h │ ├── nnig_hierarchy.h │ ├── nnw_hierarchy.h │ ├── nnxig_hierarchy.h │ ├── priors │ │ ├── CMakeLists.txt │ │ ├── abstract_prior_model.h │ │ ├── base_prior_model.h │ │ ├── fa_prior_model.cc │ │ ├── fa_prior_model.h │ │ ├── hyperparams.h │ │ ├── mnig_prior_model.cc │ │ ├── mnig_prior_model.h │ │ ├── nig_prior_model.cc │ │ ├── nig_prior_model.h │ │ ├── nw_prior_model.cc │ │ ├── nw_prior_model.h │ │ ├── nxig_prior_model.cc │ │ ├── nxig_prior_model.h │ │ └── prior_model_internal.h │ └── updaters │ │ ├── CMakeLists.txt │ │ ├── abstract_updater.h │ │ ├── fa_updater.cc │ │ ├── fa_updater.h │ │ ├── mala_updater.h │ │ ├── metropolis_updater.h │ │ ├── mnig_updater.cc │ │ ├── mnig_updater.h │ │ ├── nnig_updater.cc │ │ ├── nnig_updater.h │ │ ├── nnw_updater.cc │ │ ├── nnw_updater.h │ │ ├── nnxig_updater.cc │ │ ├── nnxig_updater.h │ │ ├── random_walk_updater.h │ │ ├── semi_conjugate_updater.h │ │ └── target_lpdf_unconstrained.h ├── includes.h ├── mixings │ ├── CMakeLists.txt │ ├── abstract_mixing.h │ ├── base_mixing.h │ ├── dirichlet_mixing.cc │ ├── dirichlet_mixing.h │ ├── load_mixings.h │ ├── logit_sb_mixing.cc │ ├── logit_sb_mixing.h │ ├── mixture_finite_mixing.cc │ ├── mixture_finite_mixing.h │ ├── pityor_mixing.cc │ ├── pityor_mixing.h │ ├── truncated_sb_mixing.cc │ └── truncated_sb_mixing.h ├── plots │ ├── CMakeLists.txt │ ├── plot_utils.cc │ └── plot_utils.h ├── proto │ ├── .gitignore │ ├── CMakeLists.txt │ ├── __init__.py │ ├── algorithm_id.proto │ ├── algorithm_params.proto │ ├── algorithm_state.proto │ ├── cpp │ │ └── .gitignore │ ├── distribution.proto │ ├── hierarchy_id.proto │ ├── hierarchy_prior.proto │ ├── ls_state.proto │ ├── matrix.proto │ ├── mixing_id.proto │ ├── mixing_prior.proto │ ├── mixing_state.proto │ ├── py │ │ ├── .gitignore │ │ └── __init__.py │ └── semihdp.proto ├── runtime │ ├── CMakeLists.txt │ └── factory.h └── utils │ ├── CMakeLists.txt │ ├── cluster_utils.cc │ ├── cluster_utils.h │ ├── covariates_getter.h │ ├── distributions.cc │ ├── distributions.h │ ├── eigen_utils.cc │ ├── eigen_utils.h │ ├── eval_like.cc │ ├── eval_like.h │ ├── io_utils.cc │ ├── io_utils.h │ ├── proto_utils.cc │ ├── proto_utils.h │ ├── rng.h │ ├── testing_utils.cc │ └── testing_utils.h └── test ├── CMakeLists.txt ├── collectors.cc ├── distributions.cc ├── eigen_utils.cc ├── gradient.cc ├── hierarchies.cc ├── likelihoods.cc ├── logit_sb.cc ├── lpdf.cc ├── prior_models.cc ├── proto_utils.cc ├── rng.cc ├── runtime.cc ├── semi_hdp.cc ├── slice_sampler.cc └── write_proto.cc /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: google 2 | ColumnLimit: 79 3 | -------------------------------------------------------------------------------- /.github/workflows/document_protos.yaml: -------------------------------------------------------------------------------- 1 | name: document_protos 2 | 3 | on: 4 | pull_request_target: 5 | branches: [master] 6 | paths: 7 | - "**.proto" 8 | 9 | workflow_dispatch: 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | 18 | - name: check pwd 19 | run: echo $(pwd) 20 | 21 | - name: build_html 22 | run: docker run --rm --platform linux/amd64 -v $(pwd)/docs:/out -v $(pwd)/src/proto:/protos pseudomuto/protoc-gen-doc --doc_opt=html,protos.html 23 | 24 | - name: Commit changes 25 | uses: EndBug/add-and-commit@v7 26 | with: 27 | author_name: bayesmix-devs 28 | message: "auto update of docs/protos.html" 29 | add: "docs/protos.html" 30 | -------------------------------------------------------------------------------- /.github/workflows/publish_base_image.yaml: -------------------------------------------------------------------------------- 1 | name: Publish bayesmix-base image 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'master' 7 | workflow_dispatch: 8 | 9 | jobs: 10 | push_to_registry: 11 | name: Push bayesmix-base Docker image to Docker Hub 12 | if: github.repository == 'bayesmix-dev/bayesmix' 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Check out the repo 16 | uses: actions/checkout@v3 17 | 18 | - name: Set up Docker Buildx 19 | uses: docker/setup-buildx-action@v3 20 | 21 | - name: Login to Docker Hub 22 | uses: docker/login-action@v3 23 | with: 24 | username: ${{ secrets.MARIO_DOCKERHUB_USERNAME }} 25 | password: ${{ secrets.MARIO_DOCKERHUB_PASSWORD }} 26 | 27 | - name: Build and push bayesmix-base 28 | uses: docker/build-push-action@v5 29 | with: 30 | context: . 31 | push: true 32 | file: resources/docker/base/Dockerfile 33 | tags: mberaha/bayesmix-base:latest 34 | -------------------------------------------------------------------------------- /.github/workflows/publish_env_image.yaml: -------------------------------------------------------------------------------- 1 | name: Publish bayesmix-env image 2 | 3 | on: workflow_dispatch 4 | 5 | jobs: 6 | push_to_registry: 7 | name: Push bayesmix-env Docker image to Docker Hub 8 | if: github.repository == 'bayesmix-dev/bayesmix' 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Check out the repo 12 | uses: actions/checkout@v3 13 | 14 | - name: Set up Docker Buildx 15 | uses: docker/setup-buildx-action@v3 16 | 17 | - name: Login to Docker Hub 18 | uses: docker/login-action@v3 19 | with: 20 | username: ${{ secrets.MARIO_DOCKERHUB_USERNAME }} 21 | password: ${{ secrets.MARIO_DOCKERHUB_PASSWORD }} 22 | 23 | - name: Build and push bayesmix-env 24 | uses: docker/build-push-action@v5 25 | with: 26 | context: . 27 | push: true 28 | file: resources/docker/env/Dockerfile 29 | tags: mberaha/bayesmix-env:latest 30 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | branches: [master] 6 | 7 | workflow_dispatch: 8 | 9 | jobs: 10 | # This workflow contains a single job called "build" 11 | build: 12 | if: github.repository == 'bayesmix-dev/bayesmix' && github.event.pull_request.draft == false 13 | # The type of runner that the job will run on 14 | runs-on: ubuntu-latest 15 | # Steps represent a sequence of tasks that will be executed as part of the job 16 | steps: 17 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 18 | - name: Check out the repo 19 | uses: actions/checkout@v3 20 | 21 | - name: build 22 | run: docker 23 | 24 | - name: Build test Docker image 25 | run: docker build -f resources/docker/test/Dockerfile -t test . 26 | 27 | - name: Run C++ tests 28 | run: docker run test ./build/test/test_bayesmix 29 | 30 | - name: Run Python tests 31 | run: docker run test /bin/bash -c "cd python && pytest" 32 | 33 | - name: Run R tests 34 | run: docker run test Rscript --vanilla -e "testthat::test_package('bayesmixr')" 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *pb2.py 2 | # Build folder 3 | build/ 4 | # Visual Studio Code folder 5 | .vscode 6 | # SFTP configuration file 7 | sftp-config.json 8 | # Sublime Text files 9 | *.sublime-* 10 | # Python compilation files 11 | *.pyc 12 | # File collectors 13 | *.recordio 14 | # PDF output files 15 | *.pdf 16 | # Local files 17 | *.local.* 18 | # MacOS storage files 19 | .DS_Store 20 | .dockerignore 21 | .ipynb_checkpoints/ 22 | docs/_build/ 23 | resources/benchmarks/datasets 24 | resources/2d 25 | #CLion cash 26 | .idea/ 27 | # Build debug folder 28 | cmake-build-debug/ 29 | # lib/_deps/ folder 30 | lib/_deps/ 31 | # .old folders 32 | test/.old/ 33 | src/hierarchies/updaters/.old/ 34 | examples/gamma_hierarchy/.old/ 35 | # .env file 36 | .env 37 | # R stuff 38 | .Rproj.user 39 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/.gitmodules -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the version of Python and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.11" 12 | 13 | # Build documentation in the docs/ directory with Sphinx 14 | sphinx: 15 | configuration: docs/conf.py 16 | 17 | # We recommend specifying your dependencies to enable reproducible builds: 18 | # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 19 | python: 20 | install: 21 | - requirements: docs/requirements.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, bayesmix-dev 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /R/.gitignore: -------------------------------------------------------------------------------- 1 | # Files 2 | .Rbuildignore 3 | *.Rproj 4 | *.Rhistory 5 | *.Rdata 6 | *.nb.html 7 | 8 | # Folders 9 | build/ 10 | .Rproj.user/ 11 | -------------------------------------------------------------------------------- /R/bayesmixr/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: bayesmixr 2 | Title: An R interface to BayesMix 3 | Version: 0.1.3 4 | Author: Matteo Gianella 5 | Maintainer: Matteo Gianella 6 | Description: This package provides a light-weight R interface for BayesMix C++ library. 7 | License: BSD_3_clause + file LICENSE 8 | Encoding: UTF-8 9 | Roxygen: list(markdown = TRUE) 10 | RoxygenNote: 7.2.3 11 | Suggests: 12 | devtools (>= 2.4.5), 13 | testthat (>= 3.1.5) 14 | Config/testthat/edition: 3 15 | Imports: 16 | bitops (>= 1.0.7), 17 | RProtoBuf (>= 0.4.20), 18 | utils (>= 4.3.1), 19 | withr (>= 2.5.0) 20 | -------------------------------------------------------------------------------- /R/bayesmixr/LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2020 2 | COPYRIGHT HOLDER: bayesmix-dev 3 | ORGANIZATION: bayesmix 4 | -------------------------------------------------------------------------------- /R/bayesmixr/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(build_bayesmix) 4 | export(import_protobuf_messages) 5 | export(read_many_proto_from_file) 6 | export(run_mcmc) 7 | -------------------------------------------------------------------------------- /R/bayesmixr/R/decoder.R: -------------------------------------------------------------------------------- 1 | #' Return a decoder for a basic varint value (does not include tag). 2 | #' 3 | #' Decoded values will be bitwise-anded with the given mask before being 4 | #' returned, e.g. to limit them to 32 bits. The returned decoder does not take 5 | #' the usual "end" parameter -- the caller is expected to do bounds checking 6 | #' after the fact (often the caller can defer such checking until later). The 7 | #' decoder returns a (value, new_pos) pair. 8 | #' 9 | #' @keywords internal 10 | VarintDecoder = function(mask, result_type) { 11 | 12 | # Define DecodeVarint function 13 | DecodeVarint <- function(buffer, pos) { 14 | result = 0 15 | shift = 0 16 | while (TRUE) { 17 | b = as.numeric(buffer[pos]) 18 | result = bitops::bitOr(result, bitops::bitShiftL(bitops::bitAnd(b, 0x7f), shift)) 19 | pos = pos + 1 20 | if (!bitops::bitAnd(b, 0x80)) { 21 | result <- bitops::bitAnd(result, mask) 22 | result <- result_type(result) 23 | return(list(result = result, pos = as.integer(pos))) 24 | } 25 | shift <- shift + 7 26 | if (shift >= 64) { 27 | stop('Too many bytes when decoding varint.') 28 | } 29 | } 30 | } 31 | 32 | # Return the decoder as result 33 | return(DecodeVarint) 34 | } 35 | 36 | #' Use this decoder version for values which must be limited to 32 bits. 37 | #' 38 | #' @keywords internal 39 | DecodeVarint32 = VarintDecoder(2^32 - 1, as.integer) 40 | -------------------------------------------------------------------------------- /R/bayesmixr/R/zzz.R: -------------------------------------------------------------------------------- 1 | # Parse internal renviron file to set BAYESMIX_EXE variable 2 | .onAttach <- function(...) { 3 | readRenviron(system.file("bayesmixr.Renviron", package = "bayesmixr")) 4 | } 5 | 6 | # Unset BAYESMIX_EXE variable on detaching 7 | .onDetach <- function(...) { 8 | Sys.unsetenv("BAYESMIXR_HOME") 9 | Sys.unsetenv("BAYESMIX_EXE") 10 | Sys.unsetenv("TBB_PATH") 11 | } 12 | -------------------------------------------------------------------------------- /R/bayesmixr/cleanup: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Clean inst/ directory after install 4 | rm -rf ./inst 5 | -------------------------------------------------------------------------------- /R/bayesmixr/cleanup.win: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Clean inst/ directory after install 4 | rm -rf ./inst 5 | -------------------------------------------------------------------------------- /R/bayesmixr/configure: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Set BAYESMIXR_HOME environment variable 4 | mkdir -p ./inst && echo BAYESMIXR_HOME=$PWD > ./inst/bayesmixr.Renviron 5 | -------------------------------------------------------------------------------- /R/bayesmixr/configure.win: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Set BAYESMIXR_HOME environment variable 4 | mkdir -p ./inst && echo BAYESMIXR_HOME=$(cygpath -m $PWD) > ./inst/bayesmixr.Renviron 5 | -------------------------------------------------------------------------------- /R/bayesmixr/man/DecodeVarint32.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decoder.R 3 | \name{DecodeVarint32} 4 | \alias{DecodeVarint32} 5 | \title{Use this decoder version for values which must be limited to 32 bits.} 6 | \usage{ 7 | DecodeVarint32(buffer, pos) 8 | } 9 | \description{ 10 | Use this decoder version for values which must be limited to 32 bits. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /R/bayesmixr/man/VarintDecoder.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decoder.R 3 | \name{VarintDecoder} 4 | \alias{VarintDecoder} 5 | \title{Return a decoder for a basic varint value (does not include tag).} 6 | \usage{ 7 | VarintDecoder(mask, result_type) 8 | } 9 | \description{ 10 | Decoded values will be bitwise-anded with the given mask before being 11 | returned, e.g. to limit them to 32 bits. The returned decoder does not take 12 | the usual "end" parameter -- the caller is expected to do bounds checking 13 | after the fact (often the caller can defer such checking until later). The 14 | decoder returns a (value, new_pos) pair. 15 | } 16 | \keyword{internal} 17 | -------------------------------------------------------------------------------- /R/bayesmixr/man/build_bayesmix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/build_bayesmix.R 3 | \name{build_bayesmix} 4 | \alias{build_bayesmix} 5 | \title{Builds the BayesMix executable} 6 | \usage{ 7 | build_bayesmix( 8 | nproc = ceiling(parallel::detectCores()/2), 9 | build_subdir = "build" 10 | ) 11 | } 12 | \arguments{ 13 | \item{nproc}{Number of processes to use for parallel compilation. Thanks to \code{parallel} package, 14 | this parameter defaults to half of the available processes (through \code{\link[parallel]{detectCores}} function)} 15 | 16 | \item{build_subdir}{Name for the sub-directory of \code{bayesmix/} folder in which configuration and compilation happens. 17 | Default value is \code{build}.} 18 | } 19 | \value{ 20 | No output if build is successful, it raises errors otherwise 21 | } 22 | \description{ 23 | After the build, if no error has occurred, it saves the path into the \code{BAYESMIX_EXE} environment variable. 24 | Such variable is defined only when this package is loaded in the R session. 25 | } 26 | -------------------------------------------------------------------------------- /R/bayesmixr/man/import_protobuf_messages.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{import_protobuf_messages} 4 | \alias{import_protobuf_messages} 5 | \title{Import Protocol Buffers Descriptors of bayesmix} 6 | \usage{ 7 | import_protobuf_messages() 8 | } 9 | \description{ 10 | This utility loads in the workspace the protocol buffers descriptors defined 11 | in the \code{bayesmix} library, via \code{RProtoBuf} package. These 12 | descriptors can be used to handle the MCMC chain output of 13 | \code{\link{run_mcmc}} function. 14 | } 15 | -------------------------------------------------------------------------------- /R/bayesmixr/man/maybe_print_proto_to_file.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{maybe_print_proto_to_file} 4 | \alias{maybe_print_proto_to_file} 5 | \title{Print a protobuf message to file only if input is not a file} 6 | \usage{ 7 | maybe_print_proto_to_file(maybe_proto, proto_name = NULL, out_dir = NULL) 8 | } 9 | \description{ 10 | If \code{maybe_proto} is a file, returns the file name. If \code{maybe_proto} 11 | is a string representing a message, prints the message to a file and returns 12 | the file name. 13 | } 14 | \keyword{internal} 15 | -------------------------------------------------------------------------------- /R/bayesmixr/man/read_many_proto_from_file.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{read_many_proto_from_file} 4 | \alias{read_many_proto_from_file} 5 | \title{Read many protobuf messages of the same type from a file} 6 | \usage{ 7 | read_many_proto_from_file(filename, msg_type) 8 | } 9 | \value{ 10 | A list of \code{RProtoBuf::Message} of type \code{msg_type} 11 | } 12 | \description{ 13 | This function parse the file given by \code{filename} and deserialize all 14 | protobuf messages of type \code{msg_type}. The latter is of type 15 | \code{RProtoBuf::Descriptor} 16 | } 17 | -------------------------------------------------------------------------------- /R/bayesmixr/tests/testthat.R: -------------------------------------------------------------------------------- 1 | test_check("bayesmixr") 2 | -------------------------------------------------------------------------------- /R/bayesmixr/tests/testthat/test_build.R: -------------------------------------------------------------------------------- 1 | test_that("build_bayesmix() is successful", { 2 | cat("\n") 3 | testthat::expect_no_error(build_bayesmix()) 4 | }) 5 | -------------------------------------------------------------------------------- /benchmarks/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.13.0) 2 | project(benchmark_bayesmix) 3 | enable_testing() 4 | 5 | add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../lib/math/lib/benchmark_1.5.1 build) 6 | 7 | add_executable(benchmark_bayesmix $ 8 | nnw_marg_lpdf.cc 9 | mcmc_runs.cc 10 | eval_lpdf.cc 11 | lpd_grid.cc 12 | main.cpp 13 | ) 14 | 15 | target_include_directories(benchmark_bayesmix PUBLIC ${INCLUDE_PATHS}) 16 | target_link_libraries(benchmark_bayesmix PUBLIC 17 | ${LINK_LIBRARIES} benchmark::benchmark benchmark::benchmark_main) 18 | target_compile_options(benchmark_bayesmix PUBLIC ${COMPILE_OPTIONS}) 19 | -------------------------------------------------------------------------------- /benchmarks/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | BENCHMARK_MAIN(); 4 | -------------------------------------------------------------------------------- /benchmarks/mcmc_runs.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "benchmarks/utils.h" 4 | #include "src/includes.h" 5 | 6 | void run(std::shared_ptr& algorithm, 7 | const Eigen::MatrixXd& data, MemoryCollector* collector) { 8 | algorithm->set_data(data); 9 | algorithm->run(collector); 10 | } 11 | 12 | Eigen::MatrixXd get_data(int dim) { 13 | const char delim = ' '; 14 | Eigen::MatrixXd out; 15 | if (dim == 1) { 16 | out = bayesmix::read_eigen_matrix( 17 | "../resources/benchmarks/datasets/univariate_gaussian.csv", delim); 18 | } else { 19 | out = bayesmix::read_eigen_matrix( 20 | "../resources/benchmarks/datasets/multi_gaussian_dim_" + 21 | std::to_string(dim) + ".csv", 22 | delim); 23 | } 24 | return out; 25 | } 26 | 27 | std::string get_output_file(std::string algo_id, int dim) { 28 | std::string outfile; 29 | if (dim == 1) { 30 | outfile = "../resources/benchmarks/chains/" + algo_id + 31 | "_univariate_gaussian.recordio"; 32 | } else { 33 | outfile = "../resources/benchmarks/chains/" + algo_id + 34 | "_multi_gaussian_dim_ " + std::to_string(dim) + ".recordio"; 35 | } 36 | return outfile; 37 | } 38 | 39 | static void BM_Neal2(benchmark::State& state) { 40 | int dim = state.range(0); 41 | Eigen::MatrixXd data = get_data(dim); 42 | MemoryCollector collector; 43 | for (auto _ : state) { 44 | std::shared_ptr algo = get_algorithm("Neal2", dim); 45 | run(algo, data, &collector); 46 | } 47 | collector.write_to_file( 48 | get_output_file("Neal2", dim)); 49 | } 50 | 51 | static void BM_Neal3(benchmark::State& state) { 52 | int dim = state.range(0); 53 | Eigen::MatrixXd data = get_data(dim); 54 | MemoryCollector collector; 55 | for (auto _ : state) { 56 | std::shared_ptr algo = get_algorithm("Neal3", dim); 57 | run(algo, data, &collector); 58 | } 59 | collector.write_to_file( 60 | get_output_file("Neal3", dim)); 61 | } 62 | 63 | static void BM_Neal8(benchmark::State& state) { 64 | int dim = state.range(0); 65 | Eigen::MatrixXd data = get_data(dim); 66 | MemoryCollector collector; 67 | for (auto _ : state) { 68 | std::shared_ptr algo = get_algorithm("Neal8", dim); 69 | run(algo, data, &collector); 70 | } 71 | collector.write_to_file( 72 | get_output_file("Neal8", dim)); 73 | } 74 | 75 | BENCHMARK(BM_Neal2)->Arg(1)->Arg(2)->Arg(4)->Arg(8); 76 | BENCHMARK(BM_Neal3)->Arg(1)->Arg(2)->Arg(4)->Arg(8); 77 | BENCHMARK(BM_Neal8)->Arg(1)->Arg(2)->Arg(4)->Arg(8); 78 | -------------------------------------------------------------------------------- /benchmarks/nnw_marg_lpdf.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include "utils.h" 7 | 8 | static void BM_NNWPriorPred(benchmark::State& state) { 9 | int dim = state.range(0); 10 | auto hierarchy = get_multivariate_nnw_hierarchy(dim); 11 | Eigen::VectorXd x = Eigen::VectorXd::Zero(dim); 12 | for (auto _ : state) { 13 | hierarchy->prior_pred_lpdf(x); 14 | } 15 | } 16 | 17 | static void BM_NNWSampleFullCond(benchmark::State& state) { 18 | int dim = state.range(0); 19 | auto hierarchy = get_multivariate_nnw_hierarchy(dim); 20 | Eigen::MatrixXd data = Eigen::MatrixXd::Random(10, dim); 21 | for (auto _ : state) { 22 | hierarchy->initialize(); 23 | for (int i = 0; i < 10; i++) { 24 | hierarchy->add_datum(i, data.row(i)); 25 | } 26 | hierarchy->sample_full_cond(); 27 | } 28 | } 29 | 30 | static void BM_NNWConditionalPred(benchmark::State& state) { 31 | int dim = state.range(0); 32 | auto hierarchy = get_multivariate_nnw_hierarchy(dim); 33 | Eigen::MatrixXd data = Eigen::MatrixXd::Random(10, dim); 34 | for (int i = 0; i < 10; i++) { 35 | hierarchy->add_datum(i, data.row(i)); 36 | } 37 | Eigen::VectorXd x = Eigen::VectorXd::Zero(dim); 38 | 39 | for (auto _ : state) { 40 | std::dynamic_pointer_cast(hierarchy) 41 | ->save_posterior_hypers(); 42 | hierarchy->conditional_pred_lpdf(x); 43 | } 44 | } 45 | 46 | BENCHMARK(BM_NNWPriorPred)->RangeMultiplier(2)->Range(2, 2 << 5); 47 | BENCHMARK(BM_NNWSampleFullCond)->RangeMultiplier(2)->Range(2, 2 << 5); 48 | BENCHMARK(BM_NNWConditionalPred)->RangeMultiplier(2)->Range(2, 2 << 5); 49 | -------------------------------------------------------------------------------- /cmake/FindSphinx.cmake: -------------------------------------------------------------------------------- 1 | #Look for an executable called sphinx-build 2 | find_program(SPHINX_EXECUTABLE 3 | NAMES sphinx-build 4 | DOC "Path to sphinx-build executable") 5 | 6 | include(FindPackageHandleStandardArgs) 7 | 8 | #Handle standard arguments to find_package like REQUIRED and QUIET 9 | find_package_handle_standard_args(Sphinx 10 | "Failed to find sphinx-build executable" 11 | SPHINX_EXECUTABLE) 12 | -------------------------------------------------------------------------------- /cmake/ProtobufUtils.cmake: -------------------------------------------------------------------------------- 1 | # CMake function that add compilation instructions for every .proto files in 2 | # a given FOLDER, passed as input. 3 | 4 | function(compile_protobuf_files) 5 | # Parse input arguments 6 | set(oneValueArgs FOLDER HEADERS SOURCES PYTHON_OUT_PATH) 7 | set(multiValueArgs INCLUDE_PROTO_PATHS) 8 | cmake_parse_arguments(arg "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) 9 | 10 | # Append all paths for protoc 11 | list(APPEND PROTO_DIRS "--proto_path=${arg_FOLDER}") 12 | if(NOT "${arg_INCLUDE_PROTO_PATHS}" STREQUAL "") 13 | foreach(PBPATH IN LISTS arg_INCLUDE_PROTO_PATHS) 14 | list(APPEND PROTO_DIRS "--proto_path=${PBPATH}") 15 | endforeach() 16 | endif() 17 | 18 | # Set --python-out option if PYTHON_OUT is set 19 | if(NOT "${arg_PYTHON_OUT_PATH}" STREQUAL "") 20 | set(PYTHON_OUT "--python_out=${arg_PYTHON_OUT_PATH}") 21 | endif() 22 | 23 | # Make custom command to compile each ProtoFile in FOLDER_PATH 24 | file(GLOB ProtoFiles "${arg_FOLDER}/*.proto") 25 | set(PROTO_DIR proto) 26 | foreach(PROTO_FILE IN LISTS ProtoFiles) 27 | message(STATUS "protoc proto(cc): ${PROTO_FILE}") 28 | get_filename_component(PROTO_DIR ${PROTO_FILE} DIRECTORY) 29 | get_filename_component(PROTO_NAME ${PROTO_FILE} NAME_WE) 30 | set(PROTO_HDR ${CMAKE_CURRENT_BINARY_DIR}/${PROTO_NAME}.pb.h) 31 | set(PROTO_SRC ${CMAKE_CURRENT_BINARY_DIR}/${PROTO_NAME}.pb.cc) 32 | message(STATUS "protoc hdr: ${PROTO_HDR}") 33 | message(STATUS "protoc src: ${PROTO_SRC}") 34 | add_custom_command( 35 | OUTPUT ${PROTO_SRC} ${PROTO_HDR} 36 | COMMAND ${Protobuf_PROTOC_EXECUTABLE} ${PROTO_DIRS} 37 | "--cpp_out=${PROJECT_BINARY_DIR}" ${PYTHON_OUT} 38 | ${PROTO_FILE} 39 | DEPENDS ${PROTO_FILE} ${Protobuf_PROTOC_EXECUTABLE} 40 | COMMENT "Generate C++ protocol buffer for ${PROTO_FILE}" 41 | VERBATIM) 42 | list(APPEND PROTO_HEADERS ${PROTO_HDR}) 43 | list(APPEND PROTO_SOURCES ${PROTO_SRC}) 44 | endforeach() 45 | SET_SOURCE_FILES_PROPERTIES(${PROTO_SRCS} ${PROTO_HDRS} PROPERTIES GENERATED TRUE) 46 | 47 | # Propagate PROTO_HDRS and PROTO_SRCS to parent scope 48 | set(${arg_HEADERS} ${PROTO_HEADERS} PARENT_SCOPE) 49 | set(${arg_SOURCES} ${PROTO_SOURCES} PARENT_SCOPE) 50 | endfunction() 51 | -------------------------------------------------------------------------------- /cmake/math.cmake: -------------------------------------------------------------------------------- 1 | # Fetching bayesmix-dev/math 2 | message(STATUS "") 3 | message(STATUS "Fetching bayesmix-dev/math") 4 | FetchContent_Declare(math 5 | GIT_REPOSITORY "https://github.com/bayesmix-dev/math.git" 6 | GIT_TAG "develop" 7 | ) 8 | FetchContent_MakeAvailable(math) 9 | 10 | # Set TBB_ROOT variable 11 | set(TBB_ROOT ${math_SOURCE_DIR}/lib/tbb) 12 | 13 | # Define make command 14 | if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") 15 | set(MAKE_COMMAND mingw32-make) 16 | else() 17 | set(MAKE_COMMAND make) 18 | endif() 19 | 20 | # Set extra compiler flags for Windows 21 | if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") 22 | file(APPEND ${math_SOURCE_DIR}/make/local "CXXFLAGS+=-Wno-nonnull\n") 23 | file(APPEND ${math_SOURCE_DIR}/make/local "TBB_CXXFLAGS=-U__MSVCRT_VERSION__ -D__MSVCRT_VERSION__=0x0E00\n") 24 | endif() 25 | 26 | # Compile math libraries 27 | message(STATUS "Compiling math libraries ...") 28 | execute_process( 29 | COMMAND ${MAKE_COMMAND} -f ./make/standalone math-libs 30 | RESULT_VARIABLE result 31 | WORKING_DIRECTORY ${math_SOURCE_DIR} 32 | ) 33 | if(result) 34 | message(FATAL_ERROR "Failed to compile math libraries (${result})!") 35 | endif() 36 | 37 | # Add TBB link directory 38 | link_directories(${TBB_ROOT}) 39 | 40 | # In Windows, add TBB_ROOT to PATH variable via batch file if not present 41 | if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") 42 | # Check if adding TBB_ROOT is already present in PATH 43 | file(TO_CMAKE_PATH "$ENV{PATH}" PATH) 44 | string(FIND "${PATH}" "${TBB_ROOT}" tbb_path-LOCATION) 45 | # If not present, add to PATH user environment variable 46 | if(tbb_path-LOCATION EQUAL -1) 47 | execute_process( 48 | COMMAND cmd.exe /C install-tbb.bat 49 | RESULT_VARIABLE result 50 | WORKING_DIRECTORY ${BASEPATH} 51 | ) 52 | if(result) 53 | message(FATAL_ERROR "Failed to install TBB (${result})!") 54 | endif() 55 | endif() 56 | endif() 57 | -------------------------------------------------------------------------------- /cmake/matplotplusplus.cmake: -------------------------------------------------------------------------------- 1 | # Define patch command to inject 2 | set(matplotplusplus_patch git apply ${BASEPATH}/resources/patches/matplotplusplus.patch) 3 | 4 | # Make matplotplusplus available (+ patch) 5 | message(STATUS "") 6 | message(STATUS "Fetching alandefreitas/matplotplusplus") 7 | FetchContent_Declare(matplotplusplus 8 | GIT_REPOSITORY "https://github.com/alandefreitas/matplotplusplus.git" 9 | GIT_TAG "v1.2.1" 10 | PATCH_COMMAND ${matplotplusplus_patch} 11 | ) 12 | FetchContent_MakeAvailable(matplotplusplus) 13 | -------------------------------------------------------------------------------- /cmake/protobuf.cmake: -------------------------------------------------------------------------------- 1 | include(GNUInstallDirs) 2 | 3 | # Set protobuf options 4 | set(Protobuf_USE_STATIC_LIBS ON) 5 | set(Protobuf_MSVC_STATIC_RUNTIME OFF) 6 | set(protobuf_BUILD_TESTS OFF) 7 | set(protobuf_BUILD_PROTOC_BINARIES ON) 8 | 9 | # Fetch protocolbuffers_protobuf 10 | message(STATUS "") 11 | message(STATUS "Fetching protocolbuffers/protobuf") 12 | FetchContent_Declare(protobuf 13 | DOWNLOAD_EXTRACT_TIMESTAMP TRUE 14 | URL "https://github.com/protocolbuffers/protobuf/archive/refs/tags/v3.19.5.tar.gz" 15 | ) 16 | FetchContent_MakeAvailable(protobuf) 17 | 18 | # Set variables 19 | set(Protobuf_ROOT ${protobuf_SOURCE_DIR}/cmake) 20 | set(Protobuf_DIR ${Protobuf_ROOT}/${CMAKE_INSTALL_LIBDIR}/cmake/protobuf) 21 | 22 | # Configure protobuf 23 | message(STATUS "Setting up protobuf ...") 24 | execute_process( 25 | COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_PROTOC_BINARIES=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -G "${CMAKE_GENERATOR}" . 26 | RESULT_VARIABLE result 27 | WORKING_DIRECTORY ${Protobuf_ROOT} 28 | ) 29 | if(result) 30 | message(FATAL_ERROR "Failed to download protobuf (${result})!") 31 | endif() 32 | 33 | # Build protobuf 34 | message(STATUS "Building protobuf ...") 35 | execute_process( 36 | COMMAND ${CMAKE_COMMAND} --build . 37 | RESULT_VARIABLE result 38 | WORKING_DIRECTORY ${Protobuf_ROOT} 39 | ) 40 | if(result) 41 | message(FATAL_ERROR "Failed to build protobuf (${result})!") 42 | endif() 43 | 44 | # Find package in installed folder 45 | find_package(Protobuf REQUIRED HINTS ${Protobuf_DIR}) 46 | 47 | # Include protobuf related informations 48 | include(${Protobuf_DIR}/protobuf-config.cmake) 49 | include(${Protobuf_DIR}/protobuf-module.cmake) 50 | include(${Protobuf_DIR}/protobuf-options.cmake) 51 | include(${Protobuf_DIR}/protobuf-targets.cmake) 52 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | # MacOS storage files 2 | .DS_Store 3 | # Make files generated by CMake 4 | make.bat 5 | Makefile 6 | -------------------------------------------------------------------------------- /docs/algorithms.rst: -------------------------------------------------------------------------------- 1 | bayesmix/algorithms 2 | 3 | The ``Algorithm`` class handles other class objects and performs the MCMC simulation. 4 | There are two types of ``Algorithm``: marginal and conditional, each of which can only be used with the matching type of ``Mixing``. 5 | 6 | Algorithms 7 | ========== 8 | .. doxygenclass:: BaseAlgorithm 9 | :project: bayesmix 10 | :members: 11 | .. doxygenclass:: MarginalAlgorithm 12 | :project: bayesmix 13 | :members: 14 | .. doxygenclass:: Neal2Algorithm 15 | :project: bayesmix 16 | :members: 17 | .. doxygenclass:: Neal3Algorithm 18 | :project: bayesmix 19 | :members: 20 | .. doxygenclass:: Neal8Algorithm 21 | :project: bayesmix 22 | :members: 23 | .. doxygenclass:: SplitAndMergeAlgorithm 24 | :project: bayesmix 25 | :members: 26 | .. doxygenclass:: ConditionalAlgorithm 27 | :project: bayesmix 28 | :members: 29 | .. doxygenclass:: BlockedGibbsAlgorithm 30 | :project: bayesmix 31 | :members: 32 | -------------------------------------------------------------------------------- /docs/collectors.rst: -------------------------------------------------------------------------------- 1 | bayesmix/collectors 2 | 3 | Collectors 4 | ========== 5 | .. doxygenclass:: BaseCollector 6 | :project: bayesmix 7 | :members: 8 | .. doxygenclass:: FileCollector 9 | :project: bayesmix 10 | :members: 11 | .. doxygenclass:: MemoryCollector 12 | :project: bayesmix 13 | :members: 14 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import subprocess 4 | sys.path.insert(0, os.path.abspath('.')) 5 | sys.path.insert(0, os.path.abspath('..')) 6 | sys.path.insert(0, os.path.abspath('../python')) 7 | sys.path.insert(0, os.path.abspath('../python/bayesmixpy')) 8 | 9 | 10 | def configureDoxyfile(input_dir, output_dir): 11 | with open('Doxyfile.in', 'r') as file : 12 | filedata = file.read() 13 | 14 | filedata = filedata.replace('@DOXYGEN_INPUT_DIR@', input_dir) 15 | filedata = filedata.replace('@DOXYGEN_OUTPUT_DIR@', output_dir) 16 | 17 | with open('Doxyfile', 'w') as file: 18 | file.write(filedata) 19 | 20 | # Check if we're running on Read the Docs' servers 21 | read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True' 22 | 23 | breathe_projects = { "bayesmix": "../build/docs/docs/doxygen/xml " } 24 | breathe_default_project = "bayesmix" 25 | 26 | 27 | if read_the_docs_build: 28 | input_dir = '../src' 29 | output_dir = 'build' 30 | configureDoxyfile(input_dir, output_dir) 31 | subprocess.call('doxygen', shell=True) 32 | breathe_projects['bayesmix'] = output_dir + '/xml' 33 | 34 | 35 | project = 'bayesmix' 36 | copyright = '2021, Guindani, B. and Beraha, M.' 37 | author = 'Guindani, B. and Beraha, M.' 38 | 39 | # The full version, including alpha/beta/rc tags 40 | release = '0.0.1' 41 | 42 | extensions = [ 43 | 'sphinx.ext.autodoc', 44 | 'sphinx.ext.doctest', 45 | 'sphinx.ext.mathjax', 46 | 'sphinx.ext.napoleon', 47 | 'sphinx.ext.viewcode', 48 | 'sphinx.ext.imgmath', 49 | 'sphinx.ext.todo', 50 | 'breathe', 51 | ] 52 | 53 | 54 | templates_path = ['_templates'] 55 | 56 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 57 | 58 | html_theme = 'haiku' 59 | 60 | highlight_language = 'cpp' 61 | 62 | imgmath_latex = 'latex' 63 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. bayesmix documentation master file, created by 2 | sphinx-quickstart on Sun Jun 27 08:35:53 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | bayesmix: a nonparametric C++ library for mixture models 7 | ======================================================== 8 | 9 | .. image:: ../resources/logo_full.svg 10 | :width: 250px 11 | :alt: bayesmix full logo 12 | 13 | .. image:: 14 | https://readthedocs.org/projects/bayesmix/badge/?version=latest 15 | :target: https://bayesmix.readthedocs.io/en/latest/?badge=latest 16 | :alt: Documentation Status 17 | 18 | ``bayesmix`` is a C++ library for running MCMC simulations in Bayesian mixture models. 19 | It uses the ``Eigen`` library for vector-matrix manipulation and linear algebra, and ``protobuf`` (Protocol Buffers) for communication and storage of structured data. 20 | 21 | 22 | 23 | Submodules 24 | ========== 25 | 26 | There are currently three submodules to the ``bayesmix`` library, represented by three classes of objects: 27 | 28 | - ``Algorithms`` 29 | - ``Hierarchies`` 30 | - ``Mixings`` 31 | 32 | Further, we employ Protocol buffers for several purposes, including serialization. The list of all protos with their docs is available in the ``protos`` link below. 33 | 34 | .. toctree:: 35 | :maxdepth: 2 36 | :titlesonly: 37 | :caption: API: library submodules 38 | 39 | algorithms 40 | hierarchies 41 | mixings 42 | collectors 43 | protos 44 | utils 45 | 46 | 47 | Tutorials 48 | ========= 49 | 50 | .. toctree:: 51 | :maxdepth: 1 52 | 53 | tutorial 54 | 55 | .. :doc:`tutorial` 56 | 57 | 58 | Python interface 59 | ================ 60 | 61 | .. toctree:: 62 | :maxdepth: 1 63 | 64 | python_interface 65 | 66 | 67 | Indices and tables 68 | ================== 69 | 70 | * :ref:`genindex` 71 | * :ref:`modindex` 72 | * :ref:`search` 73 | -------------------------------------------------------------------------------- /docs/mixings.rst: -------------------------------------------------------------------------------- 1 | bayesmix/mixings 2 | 3 | Mixings 4 | ======= 5 | 6 | In the algorithms of the library, we store a single ``Mixing`` object that represents a prior for the mixing weights for the mixture models and the induced exchangeable partition probability function (EPPF). 7 | There are two types of ``Mixing``: marginal and conditional, each of which can only be used with the matching type of ``Algorithm``. 8 | For both of these types, certain API functions are required. 9 | 10 | 11 | -------------- 12 | Code structure 13 | -------------- 14 | 15 | We employ a Curiously Recurring Template Pattern coupled with an abstract interface, similarly to the ``Hierarchy`` class (see :ref:`here `). 16 | The code thus composes of: a virtual class defining the API, a template base class that is the base for the CRTP and derived child classes that fully specialize the template arguments. 17 | The class ``AbstractMixing`` defines the API, i.e. all the methods that need to be called from outside of a ``Mixing`` class. 18 | A template class ``BaseMixing`` inherits from ``AbstractMixing`` and implements some of the necessary virtual methods, which need not be implemented by the child classes. 19 | 20 | 21 | ------- 22 | Classes 23 | ------- 24 | 25 | .. doxygenclass:: AbstractMixing 26 | :project: bayesmix 27 | :members: 28 | .. doxygenclass:: BaseMixing 29 | :project: bayesmix 30 | :members: 31 | .. doxygenclass:: DirichletMixing 32 | :project: bayesmix 33 | :members: 34 | .. doxygenclass:: PitYorMixing 35 | :project: bayesmix 36 | :members: 37 | .. doxygenclass:: MixtureFiniteMixing 38 | :project: bayesmix 39 | :members: 40 | .. doxygenclass:: TruncatedSBMixing 41 | :project: bayesmix 42 | :members: 43 | .. doxygenclass:: LogitSBMixing 44 | :project: bayesmix 45 | :members: 46 | -------------------------------------------------------------------------------- /docs/protos.rst: -------------------------------------------------------------------------------- 1 | bayesmix/protos 2 | 3 | .. _protos: 4 | 5 | Protos 6 | ====== 7 | 8 | This library depends on Google's `Protocol Buffers `_, also known as ``protobuf``, which provides a convenient way to define classes that represent structured data. 9 | Special classes henceforth referred to as ``protobuf`` messages, or protos for short, can be defined in ``.proto`` files. A special compiler, ``protoc``, is automatically called by the library to generate C++ and/or Python classes for each message. 10 | The ``protobuf`` runtime library provides fast serialization of messages into bytes, which can be used to save objects to disk or pass serialized objects from one language to another. 11 | 12 | A description of all protos used in ``bayesmix`` follows. 13 | These range from simple enumerator identifiers (enums) and basic data types such as vectors or matrices, to objects representing probability distributions, hyperpriors, states, or hyperparameter values. 14 | Some of these protos are embedded in one another, possibly using the ``oneof`` keyword, which allows the outer proto to flexibly choose and contain one type of object among many different ones. 15 | For instance, this is the case with protos representing hyperpriors, which can have increasing degrees of complexity depending on which model is chosen by the user. 16 | 17 | The use of protos allows easy interface between multiple programming languages, as well as *a posteriori* analysis of MCMC chains. 18 | 19 | .. raw:: html 20 | :file: protos.html 21 | -------------------------------------------------------------------------------- /docs/python_interface.rst: -------------------------------------------------------------------------------- 1 | ========================================== 2 | BayesMixPy: a Python interface to BayesMix 3 | ========================================== 4 | 5 | Installation 6 | ============ 7 | 8 | After you have cloned the bayesmix github directory, navigate to the Python subfolder and install bayesmixpy using pip 9 | 10 | .. code-block:: shell 11 | 12 | cd python 13 | pip3 install -e . 14 | 15 | 16 | Usage 17 | ===== 18 | 19 | `bayesmixpy` provides two functions: `build_bayesmix` and `run_mcmc`. The first one 20 | installs `bayesmix` and its executables for you, while the second one calls the 21 | executable that runs the MCMC sampler from Python. 22 | 23 | Building bayesmix 24 | ----------------- 25 | 26 | To build `bayesmix`, in a Python shell or a notebook write 27 | 28 | .. code-block:: python 29 | 30 | from bayesmixpy import build_bayesmix 31 | 32 | n_proc = 4 # number of processors for building in parallel 33 | build_bayesmix(n_proc) 34 | 35 | 36 | this will print out the installation log and, if the installation was successful, the following message 37 | 38 | .. code-block:: shell 39 | 40 | Bayesmix executable is in '/build', 41 | export the environment variable BAYESMIX_EXE=build/run_mcmc 42 | 43 | 44 | Hence, for running the MCMC chain you should export the `BAYESMIX_EXE` environment variable. This can be done once and for all by copying 45 | 46 | .. code-block:: shell 47 | 48 | BAYESMIX_EXE=build/run_mcmc 49 | 50 | in your .bashrc file (or .zshrc if you are a MacOs user), or every time you use bayesmixpy, you can add the following lines on top of your Python script/notebook 51 | 52 | .. code-block:: python 53 | 54 | import os 55 | os.environ["BAYESMIX_EXE"] = build/run_mcmc 56 | 57 | from bayesmixpy import run_mcmc 58 | .... 59 | 60 | 61 | Running bayesmix 62 | ---------------- 63 | 64 | To `run_mcmc` users must define the model and the algorithm in some configuration files or text strings. See the notebooks in `python/notebooks/gaussian_mix_uni.ipynb` and `python/notebooks/gaussian_mix_multi.ipynb` for a concrete usage example. 65 | 66 | 67 | 68 | The BayesmixPy Package 69 | ========================= 70 | 71 | 72 | Functions 73 | --------- 74 | 75 | .. automodule:: bayesmixpy 76 | :members: 77 | :undoc-members: 78 | :show-inheritance: 79 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | breathe 2 | -------------------------------------------------------------------------------- /docs/states.rst: -------------------------------------------------------------------------------- 1 | bayesmix/hierarchies/likelihoods/states 2 | 3 | States 4 | ====== 5 | 6 | ``States`` are classes used to store parameters :math:`\theta_h` of every mixture component. 7 | Their main purpose is to handle serialization and de-serialization of the state. 8 | Moreover, they allow to go from the constrained to the unconstrained representation of the parameters (and viceversa) and compute the associated determinant of the Jacobian appearing in the change of density formula. 9 | 10 | 11 | -------------- 12 | Code Structure 13 | -------------- 14 | 15 | All classes must inherit from the ``BaseState`` class 16 | 17 | .. doxygenclass:: State::BaseState 18 | :project: bayesmix 19 | :members: 20 | 21 | Depending on the chosen ``Updater``, the unconstrained representation might not be needed, and the methods ``get_unconstrained()``, ``set_from_unconstrained()`` and ``log_det_jac()`` might never be called. 22 | Therefore, we do not force users to implement them. 23 | Instead, the ``set_from_proto()`` and ``get_as_proto()`` are fundamental as they allow the interaction with Google's Protocol Buffers library. 24 | 25 | ------------- 26 | State Classes 27 | ------------- 28 | 29 | .. doxygenclass:: State::UniLS 30 | :project: bayesmix 31 | :members: 32 | 33 | .. doxygenclass:: State::MultiLS 34 | :project: bayesmix 35 | :members: 36 | 37 | .. doxygenclass:: State::FA 38 | :project: bayesmix 39 | :members: 40 | :protected-members: 41 | -------------------------------------------------------------------------------- /docs/updaters.rst: -------------------------------------------------------------------------------- 1 | bayesmix/hierarchies/updaters 2 | 3 | Updaters 4 | ======== 5 | 6 | An ``Updater`` implements the machinery to provide a sampling from the full conditional distribution of a given hierarchy. 7 | 8 | The only operation performed is ``draw`` that samples from the full conditional, either exactly or via Markov chain Monte Carlo. 9 | 10 | .. doxygenclass:: AbstractUpdater 11 | :project: bayesmix 12 | :members: 13 | 14 | -------------- 15 | Code Structure 16 | -------------- 17 | 18 | We distinguish between semi-conjugate updaters and the metropolis-like updaters. 19 | 20 | 21 | Semi Conjugate Updaters 22 | ----------------------- 23 | 24 | A semi-conjugate updater can be used when the full conditional distribution has the same form of the prior. Therefore, to sample from the full conditional, it is sufficient to call the ``draw`` method of the prior, but with an updated set of hyperparameters. 25 | 26 | The class ``SemiConjugateUpdater`` defines the API 27 | 28 | .. doxygenclass:: SemiConjugateUpdater 29 | :project: bayesmix 30 | :members: 31 | 32 | Classes inheriting from this one should only implement the ``compute_posterior_hypers(...)`` member function. 33 | 34 | 35 | Metropolis-like Updaters 36 | ------------------------ 37 | 38 | A Metropolis updater uses the Metropolis-Hastings algorithm (or its variations) to sample from the full conditional density. 39 | 40 | .. doxygenclass:: MetropolisUpdater 41 | :project: bayesmix 42 | :members: 43 | 44 | 45 | Classes inheriting from this one should only implement the ``sample_proposal(...)`` method, which samples from the porposal distribution, and the ``proposal_lpdf`` one, which evaluates the proposal density log-probability density function. 46 | 47 | --------------- 48 | Updater Classes 49 | --------------- 50 | 51 | .. doxygenclass:: RandomWalkUpdater 52 | :project: bayesmix 53 | :members: 54 | .. doxygenclass:: MalaUpdater 55 | :project: bayesmix 56 | :members: 57 | .. doxygenclass:: NNIGUpdater 58 | :project: bayesmix 59 | :members: 60 | :protected-members: 61 | .. doxygenclass:: NNxIGUpdater 62 | :project: bayesmix 63 | :members: 64 | :protected-members: 65 | .. doxygenclass:: NNWUpdater 66 | :project: bayesmix 67 | :members: 68 | :protected-members: 69 | .. doxygenclass:: MNIGUpdater 70 | :project: bayesmix 71 | :members: 72 | :protected-members: 73 | .. doxygenclass:: FAUpdater 74 | :project: bayesmix 75 | :members: 76 | :protected-members: 77 | -------------------------------------------------------------------------------- /docs/utils.rst: -------------------------------------------------------------------------------- 1 | bayesmix/utils 2 | 3 | Utils 4 | ===== 5 | 6 | Collection of miscellaneous auxiliary tools for the library. 7 | 8 | -------------------- 9 | Clustering utilities 10 | -------------------- 11 | .. doxygenfile:: cluster_utils.h 12 | :project: bayesmix 13 | 14 | ------------------------------ 15 | Distribution-related utilities 16 | ------------------------------ 17 | .. doxygenfile:: distributions.h 18 | :project: bayesmix 19 | 20 | ---------------------------------------------- 21 | ``Eigen`` matrix manipulation utilities 22 | ---------------------------------------------- 23 | .. doxygenfile:: eigen_utils.h 24 | :project: bayesmix 25 | 26 | -------------------------------- 27 | ``Eigen`` input-output utilities 28 | -------------------------------- 29 | .. doxygenfile:: io_utils.h 30 | :project: bayesmix 31 | 32 | ----------------------------------- 33 | ``protobuf`` input-output utilities 34 | ----------------------------------- 35 | .. doxygenfile:: proto_utils.h 36 | :project: bayesmix 37 | 38 | ----------- 39 | RNG wrapper 40 | ----------- 41 | .. doxygenfile:: rng.h 42 | :project: bayesmix 43 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.13.0) 2 | project(examples_bayesmix) 3 | 4 | add_executable(run_gamma $ 5 | gamma_hierarchy/run_gamma_gamma.cc 6 | gamma_hierarchy/gammagamma_hierarchy.h 7 | gamma_hierarchy/gamma_likelihood.h 8 | gamma_hierarchy/gamma_prior_model.h 9 | gamma_hierarchy/gammagamma_updater.h 10 | ) 11 | 12 | target_include_directories(run_gamma PUBLIC ${INCLUDE_PATHS}) 13 | target_link_libraries(run_gamma PUBLIC 14 | ${LINK_LIBRARIES}) 15 | target_compile_options(run_gamma PUBLIC ${COMPILE_OPTIONS}) 16 | -------------------------------------------------------------------------------- /examples/fa_hierarchy/in/algo.asciipb: -------------------------------------------------------------------------------- 1 | ##### GENERIC SETTINGS FOR ALL ALGORITHMS ##### 2 | # Algorithm ID string, e.g. "Neal2" 3 | algo_id: "Neal8" 4 | 5 | # RNG initial seed: any nonnegative integer 6 | rng_seed: 20201124 7 | 8 | # Number of iterations of the algorithm 9 | iterations: 1100 10 | 11 | # Number of initial iterations discarded by the algorithm 12 | burnin: 100 13 | 14 | # Number of clusters in which data will be first initialized 15 | # (NOTE: If you wish to initialize one datum per cluster, please write 0.) 16 | # (NOTE: This value is ONLY used for initialization, and it may be overwritten 17 | # by certain mixing objects, such as LogSBMixing. Please check a mixing's 18 | # initialize() function to know for sure whether or not it will override this 19 | # value.) 20 | init_num_clusters: 3 21 | 22 | 23 | ##### ALGORITHM-SPECIFIC SETTINGS ##### 24 | # Neal8 number of auxiliary blocks 25 | # (NOTE: 3 is the recommended value in most cases, please change it only if you 26 | # know what you're doing.) 27 | neal8_n_aux: 3 28 | -------------------------------------------------------------------------------- /examples/fa_hierarchy/in/dp_gamma.asciipb: -------------------------------------------------------------------------------- 1 | gamma_prior { 2 | totalmass_prior { 3 | shape: 4.0 4 | rate: 2.0 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /examples/fa_hierarchy/in/fa.asciipb: -------------------------------------------------------------------------------- 1 | fixed_values { 2 | #Automatic initialization if size of parameters is 0. Use the syntax below to set hyperparameters manually. 3 | mutilde: { 4 | size:0 5 | data:[] 6 | #size:20 7 | #data:[50.71368, 51.54430, 52.06730, 52.86145, 53.39870, 54.14019, 54.92412, 55.46906, 56.30326, 57.10029, 57.64240, 58.27382, 58.84905, 59.47209, 60.35402, 60.87941, 61.67874, 62.22960, 63.06052, 63.57241] 8 | } 9 | beta: { 10 | size:0 11 | data:[] 12 | #size:20 13 | #data:[35.67803, 31.78040, 31.89471, 32.05035, 27.65942, 29.61652, 24.34434, 25.66590, 24.47986, 25.06988, 24.66126, 29.17324, 23.63442, 23.63596, 24.57538, 22.61197, 25.00478, 28.55595, 25.03113, 25.07533] 14 | } 15 | phi: 0.01 16 | alpha0: 5 17 | q: 5 18 | } 19 | -------------------------------------------------------------------------------- /examples/fa_hierarchy/out/.gitignore: -------------------------------------------------------------------------------- 1 | # Several image formats 2 | *.png 3 | *.svg 4 | *.pdf 5 | *.jpg 6 | *.jpeg 7 | *.eps 8 | # Output files 9 | *.csv 10 | *.recordio 11 | -------------------------------------------------------------------------------- /examples/fa_hierarchy/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | build/run_mcmc \ 4 | --algo-params-file examples/fa_hierarchy/in/algo.asciipb \ 5 | --hier-type FA --hier-args examples/fa_hierarchy/in/fa.asciipb \ 6 | --mix-type DP --mix-args examples/fa_hierarchy/in/dp_gamma.asciipb \ 7 | --coll-name examples/fa_hierarchy/out/chains.recordio \ 8 | --data-file examples/fa_hierarchy/in/data.csv \ 9 | --grid-file examples/fa_hierarchy/in/data.csv \ 10 | --dens-file examples/fa_hierarchy/out/density_file.csv \ 11 | --n-cl-file examples/fa_hierarchy/out/numclust.csv \ 12 | --clus-file examples/fa_hierarchy/out/clustering.csv \ 13 | --best-clus-file examples/fa_hierarchy/out/best_clustering.csv 14 | -------------------------------------------------------------------------------- /examples/gamma_hierarchy/gammagamma_hierarchy.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_GAMMA_GAMMA_HIERARCHY_H_ 2 | #define BAYESMIX_HIERARCHIES_GAMMA_GAMMA_HIERARCHY_H_ 3 | 4 | #include "gamma_likelihood.h" 5 | #include "gamma_prior_model.h" 6 | #include "gammagamma_updater.h" 7 | #include "hierarchy_id.pb.h" 8 | #include "src/hierarchies/base_hierarchy.h" 9 | 10 | class GammaGammaHierarchy 11 | : public BaseHierarchy { 13 | public: 14 | GammaGammaHierarchy(double shape_, double rate_alpha_, double rate_beta_) { 15 | auto prior = 16 | std::make_shared(shape_, rate_alpha_, rate_beta_); 17 | set_prior(prior); 18 | }; 19 | ~GammaGammaHierarchy() = default; 20 | 21 | bayesmix::HierarchyId get_id() const override { 22 | return bayesmix::HierarchyId::UNKNOWN_HIERARCHY; 23 | } 24 | 25 | void set_default_updater() { 26 | updater = std::make_shared(); 27 | } 28 | 29 | void initialize_state() override { 30 | // Get hypers 31 | auto hypers = prior->get_hypers(); 32 | // Initialize likelihood state 33 | State::Gamma state; 34 | state.shape = prior->get_shape(); 35 | state.rate = hypers.rate_alpha / hypers.rate_beta; 36 | like->set_state(state); 37 | }; 38 | 39 | double marg_lpdf(ProtoHypersPtr hier_params, 40 | const Eigen::RowVectorXd &datum) const override { 41 | throw( 42 | std::runtime_error("marg_lpdf() not implemented for this hierarchy")); 43 | return 0; 44 | } 45 | }; 46 | 47 | #endif // BAYESMIX_HIERARCHIES_GAMMA_GAMMA_HIERARCHY_H_ 48 | -------------------------------------------------------------------------------- /examples/gamma_hierarchy/gammagamma_updater.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_GAMMA_GAMMA_UPDATER_H_ 2 | #define BAYESMIX_HIERARCHIES_GAMMA_GAMMA_UPDATER_H_ 3 | 4 | #include "gamma_likelihood.h" 5 | #include "gamma_prior_model.h" 6 | #include "src/hierarchies/updaters/semi_conjugate_updater.h" 7 | 8 | class GammaGammaUpdater 9 | : public SemiConjugateUpdater { 10 | public: 11 | GammaGammaUpdater() = default; 12 | ~GammaGammaUpdater() = default; 13 | 14 | bool is_conjugate() const override { return true; }; 15 | 16 | ProtoHypersPtr compute_posterior_hypers(AbstractLikelihood& like, 17 | AbstractPriorModel& prior) override; 18 | 19 | std::shared_ptr clone() const override; 20 | }; 21 | 22 | /* DEFINITIONS */ 23 | AbstractUpdater::ProtoHypersPtr GammaGammaUpdater::compute_posterior_hypers( 24 | AbstractLikelihood& like, AbstractPriorModel& prior) { 25 | // Likelihood and Prior downcast 26 | auto& likecast = downcast_likelihood(like); 27 | auto& priorcast = downcast_prior(prior); 28 | 29 | // Getting required quantities from likelihood and prior 30 | int card = likecast.get_card(); 31 | double data_sum = likecast.get_data_sum(); 32 | double ndata = likecast.get_ndata(); 33 | double shape = priorcast.get_shape(); 34 | auto hypers = priorcast.get_hypers(); 35 | 36 | // No update possible 37 | if (card == 0) { 38 | return priorcast.get_hypers_proto(); 39 | } 40 | // Compute posterior hyperparameters 41 | double rate_alpha_new = hypers.rate_alpha + shape * ndata; 42 | double rate_beta_new = hypers.rate_beta + data_sum; 43 | 44 | // Proto conversion 45 | ProtoHypers out; 46 | out.mutable_general_state()->mutable_data()->Add(rate_alpha_new); 47 | out.mutable_general_state()->mutable_data()->Add(rate_beta_new); 48 | return std::make_shared(out); 49 | } 50 | 51 | std::shared_ptr GammaGammaUpdater::clone() const { 52 | auto out = std::make_shared(static_cast(*this)); 53 | out->clear_hypers(); 54 | return out; 55 | } 56 | 57 | #endif // BAYESMIX_HIERARCHIES_GAMMA_GAMMA_UPDATER_H_ 58 | -------------------------------------------------------------------------------- /examples/gamma_hierarchy/run_gamma_gamma.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "gammagamma_hierarchy.h" 4 | #include "src/includes.h" 5 | 6 | Eigen::MatrixXd simulate_data(const unsigned int ndata) { 7 | Eigen::MatrixXd data(ndata, 1); 8 | auto& rng = bayesmix::Rng::Instance().get(); 9 | for (int i = 0; i < ndata; i++) { 10 | if (stan::math::uniform_rng(0, 1, rng) < 0.5) { 11 | data(i, 0) = stan::math::gamma_rng(1, 5, rng); 12 | } else { 13 | data(i, 0) = stan::math::gamma_rng(1, 0.5, rng); 14 | } 15 | } 16 | return data; 17 | } 18 | 19 | int main() { 20 | auto hier = std::make_shared(1.0, 2.0, 2.0); 21 | 22 | bayesmix::DPPrior mix_prior; 23 | double totalmass = 1.0; 24 | mix_prior.mutable_fixed_value()->set_totalmass(totalmass); 25 | auto mixing = MixingFactory::Instance().create_object("DP"); 26 | mixing->get_mutable_prior()->CopyFrom(mix_prior); 27 | mixing->set_num_components(5); 28 | 29 | auto algo = AlgorithmFactory::Instance().create_object("Neal8"); 30 | MemoryCollector* coll = new MemoryCollector(); 31 | 32 | Eigen::MatrixXd data = simulate_data(50); 33 | algo->set_mixing(mixing); 34 | algo->set_data(data); 35 | algo->set_hierarchy(hier); 36 | 37 | bayesmix::AlgorithmParams params; 38 | params.set_algo_id("Neal8"); 39 | params.set_rng_seed(0); 40 | params.set_burnin(1000); 41 | params.set_iterations(2000); 42 | params.set_init_num_clusters(10); 43 | params.set_neal8_n_aux(1); 44 | 45 | algo->read_params_from_proto(params); 46 | algo->run(coll); 47 | 48 | delete coll; 49 | } 50 | -------------------------------------------------------------------------------- /examples/tutorial/2dplot.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | build/plot_mcmc \ 4 | --grid-file resources/datasets/faithful_grid.csv \ 5 | --dens-file resources/2d/density_2d.csv \ 6 | --dens-plot resources/2d/density.png \ 7 | --n-cl-file resources/2d/numclust_2d.csv \ 8 | --n-cl-trace-plot resources/2d/traceplot.png \ 9 | --n-cl-bar-plot resources/2d/nclus_barplot.png 10 | -------------------------------------------------------------------------------- /examples/tutorial/2drun.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | build/run_mcmc \ 4 | --algo-params-file resources/tutorial/algo.asciipb \ 5 | --hier-type NNW --hier-args resources/tutorial/nnw_ngiw.asciipb \ 6 | --mix-type DP --mix-args resources/tutorial/dp_gamma.asciipb \ 7 | --data-file resources/datasets/faithful.csv \ 8 | --grid-file resources/datasets/faithful_grid.csv \ 9 | --coll-name resources/2d/chains_2d.recordio \ 10 | --dens-file resources/2d/density_2d.csv \ 11 | --n-cl-file resources/2d/numclust_2d.csv \ 12 | --clus-file resources/2d/clustering_2d.csv 13 | -------------------------------------------------------------------------------- /examples/tutorial/plot.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | build/plot_mcmc \ 4 | --grid-file resources/tutorial/grid.csv \ 5 | --dens-file resources/tutorial/out/density.csv \ 6 | --dens-plot resources/tutorial/out/density.eps \ 7 | --n-cl-file resources/tutorial/out/numclust.csv \ 8 | --n-cl-trace-plot resources/tutorial/out/traceplot.eps \ 9 | --n-cl-bar-plot resources/tutorial/out/nclus_barplot.png 10 | -------------------------------------------------------------------------------- /examples/tutorial/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | build/run_mcmc \ 4 | --algo-params-file resources/tutorial/algo.asciipb \ 5 | --hier-type NNIG --hier-args resources/tutorial/nnig_ngg.asciipb \ 6 | --mix-type DP --mix-args resources/tutorial/dp_gamma.asciipb \ 7 | --coll-name resources/tutorial/out/chains.recordio \ 8 | --data-file resources/tutorial/data.csv \ 9 | --grid-file resources/tutorial/grid.csv \ 10 | --dens-file resources/tutorial/out/density.csv \ 11 | --n-cl-file resources/tutorial/out/numclust.csv \ 12 | --clus-file resources/tutorial/out/clustering.csv \ 13 | --best-clus-file resources/tutorial/out/best_clustering.csv 14 | -------------------------------------------------------------------------------- /install-tbb.bat: -------------------------------------------------------------------------------- 1 | @ECHO off 2 | echo Permanently setting TBB_ROOT to the PATH user environment variable: 3 | 4 | for /F "tokens=2* delims= " %%f IN ('reg query HKCU\Environment /v PATH ^| findstr /i path') do set OLD_SYSTEM_PATH="%%g" 5 | setx Path %~dp0lib\_deps\math-src\lib\tbb;%OLD_SYSTEM_PATH% 6 | 7 | echo Please close this shell and open a new shell. 8 | echo This will make the changes to the PATH variable become active. 9 | -------------------------------------------------------------------------------- /lib/progressbar/progressbar.h: -------------------------------------------------------------------------------- 1 | // source: https://github.com/prakhar1989/progress-cpp 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | namespace progresscpp { 9 | class ProgressBar { 10 | private: 11 | unsigned int ticks = 0; 12 | 13 | const unsigned int total_ticks; 14 | const unsigned int bar_width; 15 | const char complete_char = '='; 16 | const char incomplete_char = ' '; 17 | const std::chrono::steady_clock::time_point start_time = 18 | std::chrono::steady_clock::now(); 19 | 20 | public: 21 | ProgressBar(unsigned int total, unsigned int width, char complete, 22 | char incomplete) 23 | : total_ticks{total}, 24 | bar_width{width}, 25 | complete_char{complete}, 26 | incomplete_char{incomplete} {} 27 | 28 | ProgressBar(unsigned int total, unsigned int width) 29 | : total_ticks{total}, bar_width{width} {} 30 | 31 | unsigned int operator++() { return ++ticks; } 32 | 33 | void display() const { 34 | float progress = (float)ticks / total_ticks; 35 | int pos = (int)(bar_width * progress); 36 | 37 | std::chrono::steady_clock::time_point now = 38 | std::chrono::steady_clock::now(); 39 | auto time_elapsed = 40 | std::chrono::duration_cast(now - start_time) 41 | .count(); 42 | 43 | std::cout << "["; 44 | 45 | for (int i = 0; i < bar_width; ++i) { 46 | if (i < pos) 47 | std::cout << complete_char; 48 | else if (i == pos) 49 | std::cout << ">"; 50 | else 51 | std::cout << incomplete_char; 52 | } 53 | std::cout << "] " << int(progress * 100.0) << "% " 54 | << float(time_elapsed) / 1000.0 << "s\r"; 55 | std::cout.flush(); 56 | } 57 | 58 | void done() const { 59 | display(); 60 | std::cout << std::endl; 61 | } 62 | }; 63 | } // namespace progresscpp 64 | -------------------------------------------------------------------------------- /pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | hooks: 4 | - id: jupyter-nb-clear-output 5 | name: jupyter-nb-clear-output 6 | files: \.ipynb$ 7 | stages: [commit] 8 | language: system 9 | entry: jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace 10 | 11 | - repo: https://github.com/pre-commit/pre-commit-hooks 12 | rev: v4.0.1 13 | hooks: 14 | - id: check-added-large-files # prevents giant files from being committed. 15 | - id: check-case-conflict # checks for files that would conflict in case-insensitive filesystems. 16 | - id: check-merge-conflict # checks for files that contain merge conflict strings. 17 | - id: check-yaml # checks yaml files for parseable syntax. 18 | - id: detect-private-key # detects the presence of private keys. 19 | - id: end-of-file-fixer # ensures that a file is either empty, or ends with one newline. 20 | - id: fix-byte-order-marker # removes utf-8 byte order marker. 21 | - id: mixed-line-ending # replaces or checks mixed line ending. 22 | - id: requirements-txt-fixer # sorts entries in requirements.txt. 23 | - id: trailing-whitespace # trims trailing whitespace. 24 | 25 | - repo: https://github.com/pre-commit/mirrors-prettier 26 | rev: v2.4.1 27 | hooks: 28 | - id: prettier 29 | files: \.(js|ts|jsx|tsx|css|less|html|json|markdown|md|yaml|yml)$ 30 | 31 | - repo: https://github.com/pre-commit/mirrors-clang-format 32 | rev: v13.0.0 33 | hooks: 34 | - id: clang-format 35 | -------------------------------------------------------------------------------- /python/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .ipynb_checkpoints/ 3 | *.csv 4 | .env 5 | bayesmixpy.egg-info/ 6 | -------------------------------------------------------------------------------- /python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/python/__init__.py -------------------------------------------------------------------------------- /python/bayesmixpy/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['build_bayesmix', 'run_mcmc'] 2 | 3 | from .build_bayesmix import build_bayesmix 4 | from .run import run_mcmc 5 | -------------------------------------------------------------------------------- /python/bayesmixpy/build_bayesmix.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import subprocess 4 | import sys 5 | 6 | from distutils.spawn import find_executable 7 | 8 | from dotenv import set_key 9 | 10 | from .shell_utils import get_env_file, run_shell 11 | 12 | HERE = os.path.dirname(os.path.realpath(__file__)) 13 | path = pathlib.Path(HERE) 14 | BAYESMIX_HOME = os.environ.get("BAYESMIX_HOME", path.resolve().parents[1]) 15 | 16 | py2to3 = find_executable("2to3") 17 | PROTO_DIR = os.path.join(path, "proto/") 18 | 19 | 20 | def set_bayesmix_env(run_path): 21 | env_file = get_env_file() 22 | if not os.path.exists(env_file): 23 | open(env_file, mode='a').close() 24 | 25 | set_key(env_file, "BAYESMIX_EXE", run_path) 26 | 27 | 28 | def build_bayesmix(nproc=1, build_dirname="build"): 29 | """ 30 | Builds the BayesMix executable. After the build, if no error has occurred, 31 | it prints out the path to the executable. Save the path into the environment 32 | variable BAYESMIX_EXE. 33 | 34 | Parameters 35 | ---------- 36 | 37 | nproc : int 38 | Number of processes to use for parallel compilation. 39 | """ 40 | print("Building the Bayesmix executable") 41 | build_dir = os.path.join(BAYESMIX_HOME, build_dirname) 42 | os.makedirs(build_dir, exist_ok=True) 43 | cmake_cmd = "cmake .. -DDISABLE_BENCHMARKS=TRUE -DDISABLE_TESTS=TRUE " + \ 44 | "-DDISABLE_PLOTS=TRUE -DCMAKE_BUILD_TYPE=Release" 45 | try: 46 | run_shell(cmake_cmd, cwd=build_dir) 47 | except subprocess.CalledProcessError as e: 48 | print(e) 49 | print("Some error has occurred while building Bayesmix. The library has not" 50 | " been installed!") 51 | return 52 | 53 | run_cmd = "make run_mcmc -j{}".format(nproc) 54 | try: 55 | run_shell(run_cmd, cwd=build_dir) 56 | except subprocess.CalledProcessError as e: 57 | print(e) 58 | print("Some error has occurred while building Bayesmix. The library has not" 59 | " been installed!") 60 | return 61 | 62 | set_bayesmix_env("{0}/{1}".format(build_dir, "run_mcmc")) 63 | 64 | two_to_three_command = [ 65 | py2to3, "--output-dir={0}".format(PROTO_DIR), "-W", "-n", PROTO_DIR] 66 | print("********* CALLING 2to3 ***********") 67 | print(" ".join(two_to_three_command)) 68 | if subprocess.call(two_to_three_command) != 0: 69 | sys.exit(-1) 70 | 71 | return True 72 | 73 | if __name__ == '__main__': 74 | build_bayesmix(4) 75 | -------------------------------------------------------------------------------- /python/bayesmixpy/io_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from google.protobuf.internal.decoder import _DecodeVarint32 4 | 5 | 6 | def _is_file(a: str): 7 | out = False 8 | try: 9 | p = Path(a) 10 | out = p.exists() and p.is_file() 11 | except Exception as e: 12 | out = False 13 | return out 14 | 15 | def maybe_print_proto_to_file(maybe_proto: str, 16 | proto_name: str = None, 17 | out_dir: str = None): 18 | """If maybe_proto is a file, returns the file name. 19 | If maybe_proto is a string representing a message, prints the message to 20 | a file and returns the file name. 21 | """ 22 | if _is_file(maybe_proto): 23 | return maybe_proto 24 | 25 | proto_file = os.path.join(out_dir, proto_name + ".asciipb") 26 | 27 | with open(proto_file, "w") as f: 28 | print(maybe_proto, file=f) 29 | 30 | return proto_file 31 | 32 | def read_many_protos_from_file(filename, MsgType): 33 | out = [] 34 | with open(filename, "rb") as fp: 35 | buf = fp.read() 36 | 37 | n = 0 38 | while n < len(buf): 39 | msg_len, new_pos = _DecodeVarint32(buf, n) 40 | n = new_pos 41 | msg_buf = buf[n:n+msg_len] 42 | try: 43 | msg = MsgType() 44 | msg.ParseFromString(msg_buf) 45 | out.append(msg) 46 | n += msg_len 47 | except Exception as e: 48 | break 49 | 50 | return out 51 | -------------------------------------------------------------------------------- /python/bayesmixpy/proto/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/python/bayesmixpy/proto/__init__.py -------------------------------------------------------------------------------- /python/bayesmixpy/shell_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | HERE = os.path.dirname(os.path.realpath(__file__)) 5 | 6 | 7 | def run_shell(cmd, flush_startswith=None, cwd=None): 8 | proc = subprocess.Popen( 9 | cmd.split(), 10 | bufsize=1, 11 | stdin=subprocess.DEVNULL, 12 | stdout=subprocess.PIPE, 13 | stderr=subprocess.STDOUT, 14 | env=os.environ, 15 | universal_newlines=True, 16 | cwd=cwd) 17 | 18 | while proc.poll() is None: 19 | if proc.stdout is not None: 20 | line = proc.stdout.readline() 21 | line = line.strip() 22 | if flush_startswith and \ 23 | line.startswith(flush_startswith): 24 | print("\r{0}".format(line), end=' ', flush=True) 25 | else: 26 | print("{0}".format(line)) 27 | 28 | 29 | def get_env_file(): 30 | return os.path.join(HERE, ".env") 31 | -------------------------------------------------------------------------------- /python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools >= 40.9.0", 4 | "wheel", 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | -------------------------------------------------------------------------------- /python/requirements.txt: -------------------------------------------------------------------------------- 1 | 2to3 2 | matplotlib>=2.0.1 3 | numpy>=1.18.4 4 | protobuf==3.19.5 5 | python-dotenv>=0.20.0 6 | -------------------------------------------------------------------------------- /python/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/python/scripts/__init__.py -------------------------------------------------------------------------------- /python/scripts/populate_benchmark_datasets.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | multivariate_dims = [2, 4, 8] 5 | N_BY_CLUS = 10 6 | BASE_PATH = os.path.join("resources", "benchmarks", "datasets") 7 | BASE_CHAIN_PATH = os.path.join("resources", "benchmarks", "chains") 8 | 9 | if __name__ == '__main__': 10 | os.makedirs(BASE_PATH, exist_ok=True) 11 | os.makedirs(BASE_CHAIN_PATH, exist_ok=True) 12 | 13 | np.random.seed(2021) 14 | 15 | univ_y = np.concatenate( 16 | [np.random.normal(loc=-5, size=N_BY_CLUS), 17 | np.random.normal(loc=5, size=N_BY_CLUS)]) 18 | 19 | fname = os.path.join(BASE_PATH, "univariate_gaussian.csv") 20 | np.savetxt(fname, univ_y, delimiter=',') 21 | 22 | for d in multivariate_dims: 23 | multiv_y = np.vstack( 24 | [np.random.normal(loc=-5, size=(N_BY_CLUS, d)), 25 | np.random.normal(loc=5, size=(N_BY_CLUS, d))]) 26 | 27 | fname = os.path.join( 28 | BASE_PATH, "multi_gaussian_dim_{0}.csv".format(d)) 29 | np.savetxt(fname, multiv_y, delimiter=',') 30 | -------------------------------------------------------------------------------- /python/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = bayesmixpy 3 | version = 0.0.1 4 | license = Apache 2.0 5 | license_files = LICENSE 6 | author = Mario Beraha 7 | author_email = berahamario@gmail.com 8 | description = BAYESMIXPY: A Python interface to BayesMix. 9 | long_description = file: README.md 10 | long_description_content_type = text/markdown 11 | url = https://github.com/bayesmix-dev/bayesmix/tree/master/python 12 | classifiers = 13 | Programming Language :: Python :: 3 14 | 15 | [options] 16 | packages = find: 17 | python_requires = >=3.6 18 | -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import setuptools 3 | import site 4 | import sys 5 | site.ENABLE_USER_SITE = '--user' in sys.argv[1:] 6 | 7 | __version__ = "0.0.1" 8 | folder = os.path.dirname(__file__) 9 | path = os.path.join(folder, 'requirements.txt') 10 | install_requires = [] 11 | if os.path.exists(path): 12 | with open(path) as fp: 13 | install_requires = [line.strip() for line in fp] 14 | 15 | 16 | setuptools.setup(version=__version__, 17 | install_requires=install_requires) 18 | -------------------------------------------------------------------------------- /python/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/python/tests/__init__.py -------------------------------------------------------------------------------- /python/tests/test_build.py: -------------------------------------------------------------------------------- 1 | from bayesmixpy import build_bayesmix 2 | 3 | def test_build(): 4 | success = build_bayesmix() 5 | assert success == True 6 | -------------------------------------------------------------------------------- /resources/.gitignore: -------------------------------------------------------------------------------- 1 | # Resources subfolders 2 | csv 3 | asciipb 4 | -------------------------------------------------------------------------------- /resources/2d/.gitignore: -------------------------------------------------------------------------------- 1 | # Several image formats 2 | *.png 3 | *.svg 4 | *.pdf 5 | *.jpg 6 | *.jpeg 7 | *.eps 8 | # Output files 9 | *.csv 10 | -------------------------------------------------------------------------------- /resources/algo_cond_settings.asciipb: -------------------------------------------------------------------------------- 1 | ##### GENERIC SETTINGS FOR ALL ALGORITHMS ##### 2 | # Algorithm ID string, e.g. "Neal2" 3 | algo_id: "BlockedGibbs" 4 | 5 | # RNG initial seed: any nonnegative integer 6 | rng_seed: 20201124 7 | 8 | # Number of iterations of the algorithm 9 | iterations: 1000 10 | 11 | # Number of initial iterations discarded by the algorithm 12 | burnin: 100 13 | 14 | # Number of clusters in which data will be first initialized 15 | # (NOTE: If you wish to initialize one datum per cluster, please write 0.) 16 | # (NOTE: This value is ONLY used for initialization, and it may be overwritten 17 | # by certain mixing objects, such as LogSBMixing. Please check a mixing's 18 | # initialize() function to know for sure whether or not it will override this 19 | # value.) 20 | init_num_clusters: 3 21 | -------------------------------------------------------------------------------- /resources/algo_marg_settings.asciipb: -------------------------------------------------------------------------------- 1 | ##### GENERIC SETTINGS FOR ALL ALGORITHMS ##### 2 | # Algorithm ID string, e.g. "Neal2" 3 | algo_id: "Neal3" 4 | 5 | # RNG initial seed: any nonnegative integer 6 | rng_seed: 20201124 7 | 8 | # Number of iterations of the algorithm 9 | iterations: 1100 10 | 11 | # Number of initial iterations discarded by the algorithm 12 | burnin: 100 13 | 14 | # Number of clusters in which data will be first initialized 15 | # (NOTE: If you wish to initialize one datum per cluster, please write 0.) 16 | # (NOTE: This value is ONLY used for initialization, and it may be overwritten 17 | # by certain mixing objects, such as LogSBMixing. Please check a mixing's 18 | # initialize() function to know for sure whether or not it will override this 19 | # value.) 20 | init_num_clusters: 3 21 | 22 | 23 | ##### ALGORITHM-SPECIFIC SETTINGS ##### 24 | # Neal8 number of auxiliary blocks 25 | # (NOTE: 3 is the recommended value in most cases, please change it only if you 26 | # know what you're doing.) 27 | neal8_n_aux: 3 28 | 29 | ##### SPLIT AND MERGE SETTING ##### 30 | # Split and Merge number of restricted GS scans for each MH step. 31 | splitmerge_n_restr_gs_updates: 5 32 | 33 | # Split and Merge number of MH updates for each iteration 34 | splitmerge_n_mh_updates: 1 35 | 36 | # Split and Merge number of full GS scans for each iteration 37 | splitmerge_n_full_gs_updates: 1 38 | -------------------------------------------------------------------------------- /resources/bash/cleanup_tbb.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Run this script if build_tbb.py gives a "busy file" error at line 45. 4 | 5 | # Folders from build_tbb.py 6 | # stan_math_lib=lib/math/lib 7 | # tbb_root=lib/math/lib/tbb_2019_U8 8 | # tbb_debug=lib/math/lib/tbb_debug 9 | # tbb_release=lib/math/lib/tbb_release 10 | # tbb_dir=lib/math/lib/tbb 11 | rm -rf lib/math/lib/tbb_debug/ 12 | mv lib/math/lib/tbb_2019_U8/include/ lib/math/lib/tbb 13 | rm -rf lib/math/lib/tbb_2019_U8/ 14 | for name in lib/math/lib/tbb_release/*; do 15 | mv $name lib/math/lib/tbb 16 | done 17 | rm -rf lib/math/lib/tbb_release 18 | -------------------------------------------------------------------------------- /resources/bash/push_containers.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | docker build -f docker/env/Dockerfile --platform linux/x86_64 -t mberaha/bayesmix-env . 3 | docker push mberaha/bayesmix-env 4 | docker build -f docker/base/Dockerfile --platform linux/x86_64 -t mberaha/bayesmix-base . 5 | docker push mberaha/bayesmix-base 6 | -------------------------------------------------------------------------------- /resources/bash/setup_pre_commit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | pip3 install pre-commit 4 | pre-commit install --config pre-commit-config.yaml 5 | -------------------------------------------------------------------------------- /resources/benchmarks/chains/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/resources/benchmarks/chains/__init__.py -------------------------------------------------------------------------------- /resources/benchmarks/default_algo_params.asciipb: -------------------------------------------------------------------------------- 1 | rng_seed: 20210329 2 | 3 | iterations: 11000 4 | 5 | burnin: 100 6 | 7 | init_num_clusters: 5 8 | 9 | neal8_n_aux: 3 10 | -------------------------------------------------------------------------------- /resources/datasets/dde_covs_grid.csv: -------------------------------------------------------------------------------- 1 | 95 2 | -------------------------------------------------------------------------------- /resources/datasets/galaxy.csv: -------------------------------------------------------------------------------- 1 | 9.172 2 | 9.35 3 | 9.483 4 | 9.558 5 | 9.775 6 | 10.227 7 | 10.406 8 | 16.084 9 | 16.17 10 | 18.419 11 | 18.552 12 | 18.6 13 | 18.927 14 | 19.052 15 | 19.07 16 | 19.33 17 | 19.343 18 | 19.349 19 | 19.44 20 | 19.473 21 | 19.529 22 | 19.541 23 | 19.547 24 | 19.663 25 | 19.846 26 | 19.856 27 | 19.863 28 | 19.914 29 | 19.918 30 | 19.973 31 | 19.989 32 | 20.166 33 | 20.175 34 | 20.179 35 | 20.196 36 | 20.215 37 | 20.221 38 | 20.415 39 | 20.629 40 | 20.795 41 | 20.821 42 | 20.846 43 | 20.875 44 | 20.986 45 | 21.137 46 | 21.492 47 | 21.701 48 | 21.814 49 | 21.921 50 | 21.96 51 | 22.185 52 | 22.209 53 | 22.242 54 | 22.249 55 | 22.314 56 | 22.374 57 | 22.495 58 | 22.746 59 | 22.747 60 | 22.888 61 | 22.914 62 | 23.206 63 | 23.241 64 | 23.263 65 | 23.484 66 | 23.538 67 | 23.542 68 | 23.666 69 | 23.706 70 | 23.711 71 | 24.129 72 | 24.285 73 | 24.289 74 | 24.366 75 | 24.717 76 | 24.99 77 | 25.633 78 | 26.69 79 | 26.995 80 | 32.065 81 | 32.789 82 | 34.279 83 | -------------------------------------------------------------------------------- /resources/docker/base/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mberaha/bayesmix-env:latest 2 | 3 | # Pull master branch to update bayesmix 4 | RUN git pull 5 | 6 | # Compile test_bayesmix and run_mcmc 7 | RUN cd build \ 8 | && cmake -DDISABLE_PLOTS=ON .. \ 9 | && make test_bayesmix \ 10 | && make run_mcmc 11 | 12 | LABEL Name=bayesmix-base Version=0.0.1 13 | -------------------------------------------------------------------------------- /resources/docker/env/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rocker/r-ubuntu:latest 2 | 3 | # Update repo and install required packages 4 | RUN apt-get -y update \ 5 | && apt-get -y upgrade \ 6 | && apt-get -y install ccache cmake g++ git make pkg-config 7 | 8 | # Install required python packages 9 | RUN apt-get -y install python3-pip \ 10 | && python3 -m pip install pytest 11 | 12 | # Install required R packages (also installing protobuf and protoc v. 3.12) 13 | RUN apt-get -y install r-cran-devtools r-cran-testthat r-cran-rprotobuf 14 | 15 | # Clone bayesmix-dev/bayesmix repository in /usr/bayesmix 16 | RUN git clone https://github.com/bayesmix-dev/bayesmix.git /usr/bayesmix 17 | 18 | # Set working directory to /usr/bayesmix 19 | WORKDIR /usr/bayesmix 20 | 21 | # Compile test_bayesmix and run_mcmc 22 | RUN mkdir build && cd build \ 23 | && cmake -DDISABLE_PLOTS=ON .. \ 24 | && make test_bayesmix \ 25 | && make run_mcmc 26 | 27 | LABEL Name=bayesmix-env Version=0.0.1 28 | -------------------------------------------------------------------------------- /resources/docker/test/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mberaha/bayesmix-base:latest 2 | 3 | # Pull master branch to update bayesmix 4 | RUN git pull 5 | 6 | # Set working directory to /usr 7 | WORKDIR /usr 8 | 9 | # Store current version in /usr/bayesmix-update 10 | COPY . bayesmix-update 11 | 12 | # Generate and apply patch to updtae bayesmix 13 | RUN diff -ruN -x 'build' -x '_deps' -x '.git' -x '*_pb2.py' bayesmix/ bayesmix-update/ | patch -d bayesmix -p1 \ 14 | && rm -rf bayesmix-update 15 | 16 | # Set working directory to /usr 17 | WORKDIR /usr/bayesmix 18 | 19 | # Compile test_bayesmix and run_mcmc after apply changes 20 | RUN cd build \ 21 | && cmake -DDISABLE_PLOTS=ON .. \ 22 | && make test_bayesmix \ 23 | && make run_mcmc 24 | 25 | # Install bayesmixpy 26 | RUN cd python && python3 -m pip install -e . 27 | 28 | # Install bayesmixr 29 | RUN cd R && Rscript --vanilla -e "devtools::install('bayesmixr/', quick = T, args = '--clean')" 30 | 31 | LABEL Name=bayesmix-test Version=0.0.1 32 | -------------------------------------------------------------------------------- /resources/patches/matplotplusplus.patch: -------------------------------------------------------------------------------- 1 | diff --git a/source/3rd_party/CMakeLists.txt b/source/3rd_party/CMakeLists.txt 2 | index b5656e1..f89b6d1 100644 3 | --- a/source/3rd_party/CMakeLists.txt 4 | +++ b/source/3rd_party/CMakeLists.txt 5 | @@ -114,16 +114,16 @@ if(FFTW_FOUND) 6 | target_include_directories(cimg INTERFACE ${FFTW_INCLUDE_DIRS}) 7 | endif() 8 | 9 | -if (CMAKE_MODULE_PATH) 10 | - find_package(OpenCV QUIET) 11 | - if (OpenCV_FOUND) 12 | - target_compile_definitions(cimg INTERFACE cimg_use_opencv) 13 | - target_link_libraries(cimg INTERFACE ${OpenCV_LIBRARIES}) 14 | - target_include_directories(cimg INTERFACE ${OpenCV_INCLUDE_DIRS}) 15 | - endif() 16 | -else() 17 | - message("No CMAKE_MODULE_PATH path for OpenCV configured") 18 | -endif() 19 | +# if (CMAKE_MODULE_PATH) 20 | +# find_package(OpenCV QUIET) 21 | +# if (OpenCV_FOUND) 22 | +# target_compile_definitions(cimg INTERFACE cimg_use_opencv) 23 | +# target_link_libraries(cimg INTERFACE ${OpenCV_LIBRARIES}) 24 | +# target_include_directories(cimg INTERFACE ${OpenCV_INCLUDE_DIRS}) 25 | +# endif() 26 | +# else() 27 | +# message("No CMAKE_MODULE_PATH path for OpenCV configured") 28 | +# endif() 29 | 30 | 31 | if(LIBAVCODEC_FOUND AND LIBAVFORMAT_FOUND AND LIBSWSCALE_FOUND AND LIBAVUTIL_FOUND) 32 | -------------------------------------------------------------------------------- /resources/tutorial/.gitignore: -------------------------------------------------------------------------------- 1 | # Output folders 2 | out/*.csv 3 | .Rhistory 4 | .RData 5 | plots.R 6 | -------------------------------------------------------------------------------- /resources/tutorial/algo.asciipb: -------------------------------------------------------------------------------- 1 | ##### GENERIC SETTINGS FOR ALL ALGORITHMS ##### 2 | # Algorithm ID string, e.g. "Neal2" 3 | algo_id: "Neal2" 4 | 5 | # RNG initial seed: any nonnegative integer 6 | rng_seed: 20201124 7 | 8 | # Number of iterations of the algorithm 9 | iterations: 1100 10 | 11 | # Number of initial iterations discarded by the algorithm 12 | burnin: 100 13 | 14 | # Number of clusters in which data will be first initialized 15 | # (NOTE: If you wish to initialize one datum per cluster, please write 0.) 16 | # (NOTE: This value is ONLY used for initialization, and it may be overwritten 17 | # by certain mixing objects, such as LogSBMixing. Please check a mixing's 18 | # initialize() function to know for sure whether or not it will override this 19 | # value.) 20 | init_num_clusters: 3 21 | 22 | 23 | ##### ALGORITHM-SPECIFIC SETTINGS ##### 24 | # Neal8 number of auxiliary blocks 25 | # (NOTE: 3 is the recommended value in most cases, please change it only if you 26 | # know what you're doing.) 27 | neal8_n_aux: 3 28 | 29 | splitmerge_n_restr_gs_updates: 5 30 | splitmerge_n_mh_updates: 1 31 | splitmerge_n_full_gs_updates: 1 32 | -------------------------------------------------------------------------------- /resources/tutorial/dp_gamma.asciipb: -------------------------------------------------------------------------------- 1 | gamma_prior { 2 | totalmass_prior { 3 | shape: 4.0 4 | rate: 2.0 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /resources/tutorial/lapnig_fixed.asciipb: -------------------------------------------------------------------------------- 1 | fixed_values { 2 | mean: 0 3 | var: 10 4 | shape: 2 5 | scale: 1 6 | mh_mean_var: 10; 7 | mh_log_scale_var: 1; 8 | } 9 | -------------------------------------------------------------------------------- /resources/tutorial/mfm_fixed.asciipb: -------------------------------------------------------------------------------- 1 | fixed_value { 2 | lambda: 10 3 | gamma: 1 4 | } 5 | -------------------------------------------------------------------------------- /resources/tutorial/nnig_ngg.asciipb: -------------------------------------------------------------------------------- 1 | ngg_prior { 2 | mean_prior { 3 | mean: 5.5 4 | var: 2.25 5 | } 6 | var_scaling_prior { 7 | shape: 0.2 8 | rate: 0.6 9 | } 10 | shape: 1.5 11 | scale_prior { 12 | shape: 4.0 13 | rate: 2.0 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /resources/tutorial/nnw_ngiw.asciipb: -------------------------------------------------------------------------------- 1 | ngiw_prior { 2 | mean_prior { 3 | mean { 4 | size: 2 5 | data: 5.5 6 | data: 5.5 7 | } 8 | var { 9 | rows: 2 10 | cols: 2 11 | data: 0.2 12 | data: 0.0 13 | data: 0.0 14 | data: 0.2 15 | } 16 | } 17 | var_scaling_prior { 18 | shape: 0.2 19 | rate: 0.6 20 | } 21 | deg_free: 5.0 22 | scale_prior { 23 | deg_free: 5.0 24 | scale { 25 | rows: 2 26 | cols: 2 27 | data: 5.0 28 | data: 0.0 29 | data: 0.0 30 | data: 5.0 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /resources/tutorial/out/.gitignore: -------------------------------------------------------------------------------- 1 | # Several image formats 2 | *.png 3 | *.svg 4 | *.pdf 5 | *.jpg 6 | *.jpeg 7 | *.eps 8 | # Output files 9 | *.csv 10 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | target_sources(bayesmix PUBLIC includes.h) 2 | 3 | add_subdirectory(algorithms) 4 | add_subdirectory(collectors) 5 | add_subdirectory(hierarchies) 6 | add_subdirectory(mixings) 7 | add_subdirectory(runtime) 8 | add_subdirectory(utils) 9 | -------------------------------------------------------------------------------- /src/algorithms/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | target_sources(bayesmix 2 | PUBLIC 3 | base_algorithm.h 4 | base_algorithm.cc 5 | blocked_gibbs_algorithm.h 6 | blocked_gibbs_algorithm.cc 7 | conditional_algorithm.h 8 | conditional_algorithm.cc 9 | marginal_algorithm.h 10 | marginal_algorithm.cc 11 | neal2_algorithm.h 12 | neal2_algorithm.cc 13 | neal3_algorithm.h 14 | neal3_algorithm.cc 15 | neal8_algorithm.h 16 | neal8_algorithm.cc 17 | semihdp_sampler.h 18 | semihdp_sampler.cc 19 | split_and_merge_algorithm.cc 20 | split_and_merge_algorithm.h 21 | slice_sampler.h 22 | slice_sampler.cc 23 | ) 24 | -------------------------------------------------------------------------------- /src/algorithms/blocked_gibbs_algorithm.cc: -------------------------------------------------------------------------------- 1 | #include "blocked_gibbs_algorithm.h" 2 | 3 | #include 4 | 5 | #include "hierarchy_id.pb.h" 6 | #include "mixing_id.pb.h" 7 | #include "src/hierarchies/base_hierarchy.h" 8 | #include "src/mixings/base_mixing.h" 9 | #include "src/utils/distributions.h" 10 | #include "src/utils/rng.h" 11 | 12 | void BlockedGibbsAlgorithm::print_startup_message() const { 13 | std::string msg = "Running BlockedGibbs algorithm with " + 14 | bayesmix::HierarchyId_Name(unique_values[0]->get_id()) + 15 | " hierarchies, " + 16 | bayesmix::MixingId_Name(mixing->get_id()) + " mixing..."; 17 | std::cout << msg << std::endl; 18 | } 19 | 20 | void BlockedGibbsAlgorithm::sample_allocations() { 21 | auto &rng = bayesmix::Rng::Instance().get(); 22 | unsigned int num_components = mixing->get_num_components(); 23 | for (int i = 0; i < data.rows(); i++) { 24 | // Compute weights 25 | Eigen::VectorXd logprobas = 26 | mixing->get_mixing_weights(true, false, mix_covariates.row(i)); 27 | for (int j = 0; j < num_components; j++) { 28 | logprobas(j) += 29 | unique_values[j]->get_like_lpdf(data.row(i), hier_covariates.row(i)); 30 | } 31 | // Draw a NEW value for datum allocation 32 | unsigned int c_new = 33 | bayesmix::categorical_rng(stan::math::softmax(logprobas), rng, 0); 34 | unsigned int c_old = allocations[i]; 35 | if (c_new != c_old) { 36 | allocations[i] = c_new; 37 | // Remove datum from old cluster, add to new 38 | unique_values[c_old]->remove_datum( 39 | i, data.row(i), update_hierarchy_params(), hier_covariates.row(i)); 40 | unique_values[c_new]->add_datum( 41 | i, data.row(i), update_hierarchy_params(), hier_covariates.row(i)); 42 | } 43 | } 44 | } 45 | 46 | void BlockedGibbsAlgorithm::sample_unique_values() { 47 | for (auto &un : unique_values) { 48 | un->sample_full_cond(!update_hierarchy_params()); 49 | } 50 | } 51 | 52 | void BlockedGibbsAlgorithm::sample_weights() { 53 | mixing->update_state(unique_values, allocations); 54 | } 55 | -------------------------------------------------------------------------------- /src/algorithms/blocked_gibbs_algorithm.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_ALGORITHMS_BLOCKED_GIBBS_ALGORITHM_H_ 2 | #define BAYESMIX_ALGORITHMS_BLOCKED_GIBBS_ALGORITHM_H_ 3 | 4 | #include "algorithm_id.pb.h" 5 | #include "conditional_algorithm.h" 6 | 7 | //! Template class for the blocked Gibbs sampling algorithm. 8 | 9 | //! This class implement the blocked Gibbs sampling procedure from [1]. 10 | //! 11 | //! [1] Ishwaran, H., & James, L. F. (2001). Gibbs sampling methods for 12 | //! stick-breaking priors. Journal of the American Statistical 13 | //! Association, 96(453), 161-173. 14 | 15 | class BlockedGibbsAlgorithm : public ConditionalAlgorithm { 16 | public: 17 | BlockedGibbsAlgorithm() = default; 18 | ~BlockedGibbsAlgorithm() = default; 19 | 20 | bayesmix::AlgorithmId get_id() const override { 21 | return bayesmix::AlgorithmId::BlockedGibbs; 22 | } 23 | 24 | std::shared_ptr clone() const override { 25 | auto out = std::make_shared(*this); 26 | out->set_mixing(mixing->clone()); 27 | out->set_hierarchy(unique_values[0]->deep_clone()); 28 | return out; 29 | } 30 | 31 | protected: 32 | void print_startup_message() const override; 33 | 34 | void sample_allocations() override; 35 | 36 | void sample_unique_values() override; 37 | 38 | void sample_weights() override; 39 | }; 40 | 41 | #endif // BAYESMIX_ALGORITHMS_BLOCKED_GIBBS_ALGORITHM_H_ 42 | -------------------------------------------------------------------------------- /src/algorithms/conditional_algorithm.cc: -------------------------------------------------------------------------------- 1 | #include "conditional_algorithm.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "algorithm_state.pb.h" 7 | #include "base_algorithm.h" 8 | #include "src/collectors/base_collector.h" 9 | 10 | Eigen::VectorXd ConditionalAlgorithm::lpdf_from_state( 11 | const Eigen::MatrixXd &grid, const Eigen::RowVectorXd &hier_covariate, 12 | const Eigen::RowVectorXd &mix_covariate) { 13 | // Read mixing state 14 | unsigned int n_data = curr_state.cluster_allocs_size(); 15 | unsigned int n_clust = curr_state.cluster_states_size(); 16 | mixing->set_state_from_proto(curr_state.mixing_state()); 17 | // Initialize estimate containers 18 | Eigen::MatrixXd lpdf_local(grid.rows(), n_clust); 19 | Eigen::VectorXd lpdf_final(grid.rows()); 20 | auto temp_hier = unique_values[0]->clone(); 21 | temp_hier->set_hypers_from_proto(curr_state.hierarchy_hypers()); 22 | 23 | // Loop over grid points 24 | for (size_t i = 0; i < grid.rows(); i++) { 25 | // Get mixing weights for the i-th grid point 26 | Eigen::VectorXd logweights = 27 | mixing->get_mixing_weights(true, false, mix_covariate); 28 | // Loop over clusters 29 | for (size_t j = 0; j < n_clust; j++) { 30 | temp_hier->set_state_from_proto(curr_state.cluster_states(j)); 31 | // Get local, single-point estimate 32 | lpdf_local(i, j) = logweights(j) + 33 | temp_hier->get_like_lpdf(grid.row(i), hier_covariate); 34 | } 35 | // Final estimate for i-th grid point 36 | lpdf_final(i) = stan::math::log_sum_exp(lpdf_local.row(i)); 37 | } 38 | return lpdf_final; 39 | } 40 | -------------------------------------------------------------------------------- /src/algorithms/conditional_algorithm.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_ALGORITHMS_CONDITIONAL_ALGORITHM_H_ 2 | #define BAYESMIX_ALGORITHMS_CONDITIONAL_ALGORITHM_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "base_algorithm.h" 8 | #include "src/collectors/base_collector.h" 9 | 10 | /** 11 | * Template class for a conditional sampler deriving from `BaseAlgorithm`. 12 | * 13 | * This template class implements a generic Gibbs sampling conditional 14 | * algorithm as the child of the `BaseAlgorithm` class. 15 | * A mixture model sampled from a conditional algorithm can be expressed as 16 | * 17 | * \f[ 18 | * x_i \mid c_i, \theta_1, \dots, \theta_k &\sim f(x_i \mid \theta_{c_i}) \\ 19 | * \theta_1, \dots, \theta_k &\sim G_0 \\ 20 | * c_1, \dots, c_n \mid w_1, \dots, w_k &\sim \text{Cat}(w_1, \dots, w_k) \\ 21 | * w_1, \dots, w_k &\sim p(w_1, \dots, w_k) 22 | * \f] 23 | * 24 | * where \f$ f(x \mid \theta_j) \f$ is a density for each value of \f$ \theta_j 25 | * \f$, \f$ c_i \f$ take values in \f$ \{1, \dots, k\} \f$ and \f$ w_1, \dots, 26 | * w_k \f$ are nonnegative weights whose sum is a.s. 1, i.e. \f$ p(w_1, \dots, 27 | * w_k) \f$ is a probability distribution on the k-1 dimensional unit simplex). 28 | * In this library, each \f$ \theta_j \f$ is represented as an `Hierarchy` 29 | * object (which inherits from `AbstractHierarchy`), that also holds the 30 | * information related to the base measure \f$ G \f$ is (see 31 | * `AbstractHierarchy`). The weights \f$ (w_1, \dots, w_k) \f$ are represented 32 | * as a `Mixing` object, which inherits from `AbstractMixing`. 33 | * 34 | * The state of a conditional algorithm consists of the unique values, the 35 | * cluster allocations and the mixture weights. The former two are stored in 36 | * this class, while the weights are stored in the `Mixing` object. 37 | */ 38 | 39 | class ConditionalAlgorithm : public BaseAlgorithm { 40 | public: 41 | ConditionalAlgorithm() = default; 42 | ~ConditionalAlgorithm() = default; 43 | 44 | bool is_conditional() const override { return true; } 45 | 46 | Eigen::VectorXd lpdf_from_state( 47 | const Eigen::MatrixXd &grid, const Eigen::RowVectorXd &hier_covariate, 48 | const Eigen::RowVectorXd &mix_covariate) override; 49 | 50 | protected: 51 | //! Performs Gibbs sampling sub-step for all component weights 52 | virtual void sample_weights() = 0; 53 | 54 | void step() override { 55 | sample_allocations(); 56 | sample_unique_values(); 57 | update_hierarchy_hypers(); 58 | sample_weights(); 59 | } 60 | }; 61 | 62 | #endif // BAYESMIX_ALGORITHMS_CONDITIONAL_ALGORITHM_H_ 63 | -------------------------------------------------------------------------------- /src/algorithms/load_algorithms.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_ALGORITHMS_LOAD_ALGORITHMS_H_ 2 | #define BAYESMIX_ALGORITHMS_LOAD_ALGORITHMS_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "algorithm_id.pb.h" 8 | #include "base_algorithm.h" 9 | #include "blocked_gibbs_algorithm.h" 10 | #include "neal2_algorithm.h" 11 | #include "neal3_algorithm.h" 12 | #include "neal8_algorithm.h" 13 | #include "slice_sampler.h" 14 | #include "split_and_merge_algorithm.h" 15 | #include "src/runtime/factory.h" 16 | 17 | //! Loads all available `Algorithm` objects into the appropriate factory, so 18 | //! that they are ready to be chosen and used at runtime. 19 | 20 | template 21 | using Builder = std::function()>; 22 | 23 | using AlgorithmFactory = Factory; 24 | 25 | __attribute__((constructor)) static void load_algorithms() { 26 | AlgorithmFactory &factory = AlgorithmFactory::Instance(); 27 | // Initialize factory builders 28 | Builder Neal2builder = []() { 29 | return std::make_shared(); 30 | }; 31 | Builder Neal3builder = []() { 32 | return std::make_shared(); 33 | }; 34 | Builder Neal8builder = []() { 35 | return std::make_shared(); 36 | }; 37 | Builder BlockedGibbsbuilder = []() { 38 | return std::make_shared(); 39 | }; 40 | Builder SplitAndMergebuilder = []() { 41 | return std::make_shared(); 42 | }; 43 | Builder SliceBuilder = []() { 44 | return std::make_shared(); 45 | }; 46 | 47 | factory.add_builder(Neal2Algorithm().get_id(), Neal2builder); 48 | factory.add_builder(Neal3Algorithm().get_id(), Neal3builder); 49 | factory.add_builder(Neal8Algorithm().get_id(), Neal8builder); 50 | factory.add_builder(BlockedGibbsAlgorithm().get_id(), BlockedGibbsbuilder); 51 | factory.add_builder(SplitAndMergeAlgorithm().get_id(), SplitAndMergebuilder); 52 | factory.add_builder(SliceSampler().get_id(), SliceBuilder); 53 | } 54 | 55 | #endif // BAYESMIX_ALGORITHMS_LOAD_ALGORITHMS_H_ 56 | -------------------------------------------------------------------------------- /src/algorithms/marginal_algorithm.cc: -------------------------------------------------------------------------------- 1 | #include "marginal_algorithm.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "algorithm_state.pb.h" 8 | #include "base_algorithm.h" 9 | #include "src/collectors/base_collector.h" 10 | 11 | void MarginalAlgorithm::remove_singleton(const unsigned int idx) { 12 | // Relabel allocations 13 | for (auto &c : allocations) { 14 | if (c > idx) { 15 | c -= 1; 16 | } 17 | } 18 | // Remove cluster 19 | unique_values.erase(unique_values.begin() + idx); 20 | } 21 | 22 | Eigen::VectorXd MarginalAlgorithm::lpdf_from_state( 23 | const Eigen::MatrixXd &grid, const Eigen::RowVectorXd &hier_covariate, 24 | const Eigen::RowVectorXd &mix_covariate) { 25 | // Read mixing state 26 | unsigned int n_data = curr_state.cluster_allocs_size(); 27 | unsigned int n_clust = curr_state.cluster_states_size(); 28 | mixing->set_state_from_proto(curr_state.mixing_state()); 29 | // Initialize estimate containers 30 | Eigen::MatrixXd lpdf_local(grid.rows(), n_clust + 1); 31 | Eigen::VectorXd lpdf_final(grid.rows()); 32 | auto temp_hier = unique_values[0]->clone(); 33 | temp_hier->set_hypers_from_proto(curr_state.hierarchy_hypers()); 34 | 35 | for (size_t j = 0; j < n_clust; j++) { 36 | // Get hierarchy and mass values 37 | temp_hier->set_state_from_proto(curr_state.cluster_states(j)); 38 | double mass_ex = mixing->get_mass_existing_cluster( 39 | n_data, n_clust, true, false, temp_hier, mix_covariate); 40 | // Get local, single-point estimate 41 | lpdf_local.col(j) = 42 | temp_hier->like_lpdf_grid(grid, hier_covariate).array() + mass_ex; 43 | } 44 | double mass_new = mixing->get_mass_new_cluster(n_data, n_clust, true, false, 45 | mix_covariate); 46 | lpdf_local.col(n_clust) = 47 | lpdf_marginal_component(temp_hier, grid, hier_covariate).array() + 48 | mass_new; 49 | // Loop over grid points 50 | for (size_t i = 0; i < grid.rows(); i++) { 51 | // Final estimate for i-th grid point 52 | lpdf_final(i) = stan::math::log_sum_exp(lpdf_local.row(i)); 53 | assert(!isnan(lpdf_final(i))); 54 | } 55 | return lpdf_final; 56 | } 57 | -------------------------------------------------------------------------------- /src/algorithms/neal2_algorithm.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_ALGORITHMS_NEAL2_ALGORITHM_H_ 2 | #define BAYESMIX_ALGORITHMS_NEAL2_ALGORITHM_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "algorithm_id.pb.h" 8 | #include "marginal_algorithm.h" 9 | #include "src/hierarchies/base_hierarchy.h" 10 | 11 | //! Template class for Neal's algorithm 2 for conjugate hierarchies 12 | 13 | //! This class implements Neal's Gibbs sampling algorithm 2 from Neal (2000) 14 | //! that generates a Markov chain on the clustering of the provided data. 15 | //! 16 | //! This algorithm requires the use of a `ConjugateHierarchy` object. 17 | 18 | class Neal2Algorithm : public MarginalAlgorithm { 19 | public: 20 | Neal2Algorithm() = default; 21 | ~Neal2Algorithm() = default; 22 | 23 | bool requires_conjugate_hierarchy() const override { return true; } 24 | 25 | bayesmix::AlgorithmId get_id() const override { 26 | return bayesmix::AlgorithmId::Neal2; 27 | } 28 | 29 | std::shared_ptr clone() const override { 30 | auto out = std::make_shared(*this); 31 | out->set_mixing(mixing->clone()); 32 | out->set_hierarchy(unique_values[0]->deep_clone()); 33 | return out; 34 | } 35 | 36 | protected: 37 | void print_startup_message() const override; 38 | 39 | void sample_allocations() override; 40 | 41 | void sample_unique_values() override; 42 | 43 | Eigen::VectorXd lpdf_marginal_component( 44 | const std::shared_ptr hier, 45 | const Eigen::MatrixXd &grid, 46 | const Eigen::RowVectorXd &covariate) const override; 47 | 48 | //! Computes prior component of allocation sampling masses for given datum 49 | //! @param data_idx Index of the considered data point 50 | //! @return Allocation weights for the clusters 51 | virtual Eigen::VectorXd get_cluster_prior_mass( 52 | const unsigned int data_idx) const; 53 | 54 | //! Computes likelihood component of alloc. sampling masses for given datum 55 | //! @param data_idx Index of the considered data point 56 | //! @return Allocation weights for the clusters 57 | virtual Eigen::VectorXd get_cluster_lpdf(const unsigned int data_idx) const; 58 | }; 59 | 60 | #endif // BAYESMIX_ALGORITHMS_NEAL2_ALGORITHM_H_ 61 | -------------------------------------------------------------------------------- /src/algorithms/neal3_algorithm.cc: -------------------------------------------------------------------------------- 1 | #include "neal3_algorithm.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "hierarchy_id.pb.h" 7 | #include "mixing_id.pb.h" 8 | #include "src/hierarchies/base_hierarchy.h" 9 | 10 | void Neal3Algorithm::print_startup_message() const { 11 | std::string msg = "Running Neal3 algorithm with " + 12 | bayesmix::HierarchyId_Name(unique_values[0]->get_id()) + 13 | " hierarchies, " + 14 | bayesmix::MixingId_Name(mixing->get_id()) + " mixing..."; 15 | std::cout << msg << std::endl; 16 | } 17 | 18 | Eigen::VectorXd Neal3Algorithm::get_cluster_lpdf( 19 | const unsigned int data_idx) const { 20 | unsigned int n_data = data.rows(); 21 | unsigned int n_clust = unique_values.size(); 22 | Eigen::VectorXd loglpdf(n_clust + 1); 23 | for (size_t j = 0; j < n_clust; j++) { 24 | // Probability of being assigned to an already existing cluster 25 | loglpdf(j) = unique_values[j]->conditional_pred_lpdf( 26 | data.row(data_idx), hier_covariates.row(data_idx)); 27 | } 28 | // Probability of being assigned to a newly created cluster 29 | loglpdf(n_clust) = unique_values[0]->prior_pred_lpdf( 30 | data.row(data_idx), hier_covariates.row(data_idx)); 31 | return loglpdf; 32 | } 33 | -------------------------------------------------------------------------------- /src/algorithms/neal3_algorithm.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_ALGORITHMS_NEAL3_ALGORITHM_H_ 2 | #define BAYESMIX_ALGORITHMS_NEAL3_ALGORITHM_H_ 3 | 4 | #include "algorithm_id.pb.h" 5 | #include "neal2_algorithm.h" 6 | 7 | //! Template class for Neal's algorithm 3 for conjugate hierarchies 8 | 9 | //! This class implements Neal's Gibbs sampling algorithm 3 from Neal (2000) 10 | //! that generates a Markov chain on the clustering of the provided data. 11 | //! 12 | //! This algorithm requires the use of a `ConjugateHierarchy` object. 13 | //! Algorithm 3 from Neal (2000) is almost identical to Algorithm 2, except 14 | //! that conjugacy is further exploied by marginalizing the unique values 15 | //! from the state when updating the cluster allocations, which leads to 16 | //! improved efficiency in terms of effective sample size, but might require 17 | //! longer runtimes. 18 | //! For more information, please refer to the `Neal2Algorithm` class, as well 19 | //! as `BaseAlgorithm` and `MarginalAlgorithm` on which it is based. 20 | 21 | class Neal3Algorithm : public Neal2Algorithm { 22 | public: 23 | Neal3Algorithm() = default; 24 | ~Neal3Algorithm() = default; 25 | 26 | bayesmix::AlgorithmId get_id() const override { 27 | return bayesmix::AlgorithmId::Neal3; 28 | } 29 | 30 | std::shared_ptr clone() const override { 31 | auto out = std::make_shared(*this); 32 | out->set_mixing(mixing->clone()); 33 | out->set_hierarchy(unique_values[0]->deep_clone()); 34 | return out; 35 | } 36 | 37 | protected: 38 | void print_startup_message() const override; 39 | 40 | bool update_hierarchy_params() override { return true; } 41 | 42 | Eigen::VectorXd get_cluster_lpdf(const unsigned int data_idx) const override; 43 | }; 44 | 45 | #endif // BAYESMIX_ALGORITHMS_NEAL3_ALGORITHM_H_ 46 | -------------------------------------------------------------------------------- /src/algorithms/neal8_algorithm.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_ALGORITHMS_NEAL8_ALGORITHM_H_ 2 | #define BAYESMIX_ALGORITHMS_NEAL8_ALGORITHM_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "algorithm_id.pb.h" 9 | #include "neal2_algorithm.h" 10 | 11 | //! Template class for Neal's algorithm 8 for conjugate hierarchies 12 | 13 | //! This class implements Neal's Gibbs sampling algorithm 8 from Neal (2000) 14 | //! that generates a Markov chain on the clustering of the provided data. 15 | //! 16 | //! It extends Neal's algorithm 2 to also deal with cases when the 17 | //! kernel/likelihood f(x | phi) is not conjugate to G, thanks to the 18 | //! introduction of additional, auxiliary unique values. 19 | 20 | class Neal8Algorithm : public Neal2Algorithm { 21 | public: 22 | // DESTRUCTOR AND CONSTRUCTORS 23 | Neal8Algorithm() = default; 24 | ~Neal8Algorithm() = default; 25 | 26 | bool requires_conjugate_hierarchy() const override { return false; } 27 | 28 | //! Returns number of auxiliary blocks 29 | unsigned int get_n_aux() const { return n_aux; } 30 | 31 | //! Sets number of auxiliary blocks 32 | void set_n_aux(const unsigned int n_aux_) { 33 | if (n_aux_ == 0) { 34 | throw std::invalid_argument("Number of auxiliary block must be > 0"); 35 | } 36 | n_aux = n_aux_; 37 | } 38 | 39 | bayesmix::AlgorithmId get_id() const override { 40 | return bayesmix::AlgorithmId::Neal8; 41 | } 42 | 43 | void read_params_from_proto( 44 | const bayesmix::AlgorithmParams ¶ms) override; 45 | 46 | std::shared_ptr clone() const override { 47 | auto out = std::make_shared(*this); 48 | out->set_mixing(mixing->clone()); 49 | out->set_hierarchy(unique_values[0]->deep_clone()); 50 | return out; 51 | } 52 | 53 | protected: 54 | void initialize() override; 55 | 56 | void print_startup_message() const override; 57 | 58 | void sample_allocations() override; 59 | 60 | Eigen::VectorXd lpdf_marginal_component( 61 | const std::shared_ptr hier, 62 | const Eigen::MatrixXd &grid, 63 | const Eigen::RowVectorXd &covariate) const override; 64 | 65 | Eigen::VectorXd get_cluster_prior_mass( 66 | const unsigned int data_idx) const override; 67 | 68 | Eigen::VectorXd get_cluster_lpdf(const unsigned int data_idx) const override; 69 | 70 | //! Number of auxiliary blocks 71 | unsigned int n_aux = 3; 72 | 73 | //! Vector of auxiliary blocks 74 | std::vector> aux_unique_values; 75 | }; 76 | 77 | #endif // BAYESMIX_ALGORITHMS_NEAL8_ALGORITHM_H_ 78 | -------------------------------------------------------------------------------- /src/algorithms/slice_sampler.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_ALGORITHMS_SLICE_SAMPLER_H_ 2 | #define BAYESMIX_ALGORITHMS_SLICE_SAMPLER_H_ 3 | 4 | #include "algorithm_id.pb.h" 5 | #include "conditional_algorithm.h" 6 | #include "src/mixings/truncated_sb_mixing.h" 7 | 8 | //! This class implement the efficnet slice sampler from [1]. 9 | //! 10 | //! [1] Kalli, M., Griffin, J. E., & Walker, S. G. (2011). 11 | //! Slice sampling mixture models. Statistics and Computing. 12 | 13 | class SliceSampler : public ConditionalAlgorithm { 14 | public: 15 | SliceSampler() = default; 16 | ~SliceSampler() = default; 17 | 18 | void initialize() override; 19 | 20 | void step() override; 21 | 22 | bayesmix::AlgorithmId get_id() const override { 23 | return bayesmix::AlgorithmId::Slice; 24 | } 25 | 26 | std::shared_ptr clone() const override { 27 | auto out = std::make_shared(*this); 28 | out->set_mixing(mixing->clone()); 29 | out->set_hierarchy(unique_values[0]->deep_clone()); 30 | return out; 31 | } 32 | 33 | void sample_slice(); 34 | 35 | protected: 36 | void print_startup_message() const override; 37 | 38 | void sample_allocations() override; 39 | 40 | void sample_unique_values() override; 41 | 42 | void sample_weights() override; 43 | 44 | Eigen::VectorXd slice_u; 45 | 46 | std::shared_ptr mixing; 47 | }; 48 | 49 | #endif // BAYESMIX_ALGORITHMS_SLICE_SAMPLER_H_ 50 | -------------------------------------------------------------------------------- /src/collectors/.gitignore: -------------------------------------------------------------------------------- 1 | # Protocol Buffers implementation files (version-dependent) 2 | *.pb.cc 3 | *.pb.h 4 | -------------------------------------------------------------------------------- /src/collectors/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | target_sources(bayesmix 2 | PUBLIC 3 | base_collector.h 4 | file_collector.h 5 | file_collector.cc 6 | memory_collector.h 7 | memory_collector.cc 8 | ) 9 | -------------------------------------------------------------------------------- /src/collectors/memory_collector.cc: -------------------------------------------------------------------------------- 1 | #include "memory_collector.h" 2 | 3 | void MemoryCollector::collect(const google::protobuf::Message& state) { 4 | std::string s; 5 | state.SerializeToString(&s); 6 | chain.push_back(s); 7 | size++; 8 | } 9 | 10 | void MemoryCollector::get_state(const unsigned int i, 11 | google::protobuf::Message* out) { 12 | out->ParseFromString(chain[i]); 13 | } 14 | 15 | void MemoryCollector::reset() { curr_iter = 0; } 16 | 17 | bool MemoryCollector::next_state(google::protobuf::Message* const out) { 18 | if (curr_iter == size) { 19 | return false; 20 | } 21 | out->ParseFromString(chain[curr_iter]); 22 | curr_iter++; 23 | return true; 24 | } 25 | -------------------------------------------------------------------------------- /src/hierarchies/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | target_sources(bayesmix 2 | PUBLIC 3 | abstract_hierarchy.h 4 | base_hierarchy.h 5 | nnig_hierarchy.h 6 | nnxig_hierarchy.h 7 | nnw_hierarchy.h 8 | lin_reg_uni_hierarchy.h 9 | fa_hierarchy.h 10 | lapnig_hierarchy.h 11 | ) 12 | 13 | add_subdirectory(likelihoods) 14 | add_subdirectory(priors) 15 | add_subdirectory(updaters) 16 | -------------------------------------------------------------------------------- /src/hierarchies/lapnig_hierarchy.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_LAPNIG_HIERARCHY_H_ 2 | #define BAYESMIX_HIERARCHIES_LAPNIG_HIERARCHY_H_ 3 | 4 | #include "base_hierarchy.h" 5 | #include "hierarchy_id.pb.h" 6 | #include "likelihoods/laplace_likelihood.h" 7 | #include "priors/nxig_prior_model.h" 8 | #include "updaters/mala_updater.h" 9 | 10 | /** 11 | * Laplace Normal-InverseGamma hierarchy for univariate data. 12 | * 13 | * This class represents a hierarchical model where data are distributed 14 | * according to a Laplace likelihood (see the `LaplaceLikelihood` class for 15 | * deatils). The likelihood parameters have a Normal x InverseGamma centering 16 | * distribution (see the `NxIGPriorModel` class for details). That is: 17 | * 18 | * \f[ 19 | * f(x_i \mid \mu,\sigma^2) &= Laplace(\mu,\sqrt{\sigma^2/2})\\ 20 | * \mu &\sim N(\mu_0,\eta^2) \\ 21 | * \sigma^2 &\sim InvGamma(a, b) 22 | * \f] 23 | * The state is composed of mean and variance (thus the scale for the Laplace 24 | * distribution is \f$ \sqrt{\sigma^2/2}) \f$. The state hyperparameters are 25 | * \f$(mu_0, \sigma^2, a, b)\f$, all scalar values. Note that this hierarchy 26 | * is NOT conjugate, thus the marginal distribution is not available in closed 27 | * form. 28 | */ 29 | 30 | class LapNIGHierarchy 31 | : public BaseHierarchy { 33 | public: 34 | LapNIGHierarchy() = default; 35 | ~LapNIGHierarchy() = default; 36 | 37 | //! Returns the Protobuf ID associated to this class 38 | bayesmix::HierarchyId get_id() const override { 39 | return bayesmix::HierarchyId::LapNIG; 40 | } 41 | 42 | //! Sets the default updater algorithm for this hierarchy 43 | void set_default_updater() { updater = std::make_shared(); } 44 | 45 | //! Initializes state parameters to appropriate values 46 | void initialize_state() override { 47 | // Get hypers 48 | auto hypers = prior->get_hypers(); 49 | // Initialize likelihood state 50 | State::UniLS state; 51 | state.mean = hypers.mean; 52 | state.var = hypers.scale / (hypers.shape + 1); 53 | like->set_state(state); 54 | }; 55 | }; 56 | 57 | #endif // BAYESMIX_HIERARCHIES_LAPNIG_HIERARCHY_H_ 58 | -------------------------------------------------------------------------------- /src/hierarchies/likelihoods/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | target_sources(bayesmix PUBLIC 2 | likelihood_internal.h 3 | abstract_likelihood.h 4 | base_likelihood.h 5 | uni_norm_likelihood.h 6 | uni_norm_likelihood.cc 7 | multi_norm_likelihood.h 8 | multi_norm_likelihood.cc 9 | uni_lin_reg_likelihood.h 10 | uni_lin_reg_likelihood.cc 11 | laplace_likelihood.h 12 | laplace_likelihood.cc 13 | fa_likelihood.h 14 | fa_likelihood.cc 15 | ) 16 | 17 | add_subdirectory(states) 18 | -------------------------------------------------------------------------------- /src/hierarchies/likelihoods/fa_likelihood.cc: -------------------------------------------------------------------------------- 1 | #include "fa_likelihood.h" 2 | 3 | #include "src/utils/distributions.h" 4 | 5 | void FALikelihood::clear_summary_statistics() { 6 | data_sum = Eigen::VectorXd::Zero(dim); 7 | } 8 | 9 | double FALikelihood::compute_lpdf(const Eigen::RowVectorXd& datum) const { 10 | return bayesmix::multi_normal_lpdf_woodbury_chol( 11 | datum, state.mu, state.psi_inverse, state.cov_wood, state.cov_logdet); 12 | } 13 | 14 | void FALikelihood::update_sum_stats(const Eigen::RowVectorXd& datum, 15 | bool add) { 16 | if (add) { 17 | data_sum += datum; 18 | } else { 19 | data_sum -= datum; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/hierarchies/likelihoods/fa_likelihood.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_LIKELIHOODS_FA_LIKELIHOOD_H_ 2 | #define BAYESMIX_HIERARCHIES_LIKELIHOODS_FA_LIKELIHOOD_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "algorithm_state.pb.h" 12 | #include "base_likelihood.h" 13 | #include "states/includes.h" 14 | 15 | /** 16 | * A gaussian factor analytic likelihood, using the `State::FA` state. 17 | * Represents the model: 18 | * 19 | * \f[ 20 | * \bm{y}_1,\dots,\bm{y}_k \stackrel{\small\mathrm{iid}}{\sim} N_p(\bm{\mu}, 21 | * \Sigma + \Lambda\Lambda^T), \f] 22 | * 23 | * where Lambda is a \f$ p \times d \f$ matrix, usually \f$ d << p \f$ and \f$ 24 | * \Sigma \f$ is a diagonal matrix. Parameters are stored in a `State::FA` 25 | * state. We store as summary statistics the sum of the \f$ \bm{y}_i \f$'s, but 26 | * it is not sufficient for all the updates involved. Therefore, all the 27 | * observations allocated to a cluster are processed when computing the 28 | * cluster lpdf. 29 | */ 30 | 31 | class FALikelihood : public BaseLikelihood { 32 | public: 33 | FALikelihood() = default; 34 | ~FALikelihood() = default; 35 | bool is_multivariate() const override { return true; }; 36 | bool is_dependent() const override { return false; }; 37 | void clear_summary_statistics() override; 38 | void set_dim(unsigned int dim_) { 39 | dim = dim_; 40 | clear_summary_statistics(); 41 | }; 42 | unsigned int get_dim() const { return dim; }; 43 | Eigen::VectorXd get_data_sum() const { return data_sum; }; 44 | 45 | protected: 46 | double compute_lpdf(const Eigen::RowVectorXd& datum) const override; 47 | void update_sum_stats(const Eigen::RowVectorXd& datum, bool add) override; 48 | 49 | unsigned int dim; 50 | Eigen::VectorXd data_sum; 51 | }; 52 | 53 | #endif // BAYESMIX_HIERARCHIES_LIKELIHOODS_FA_LIKELIHOOD_H_ 54 | -------------------------------------------------------------------------------- /src/hierarchies/likelihoods/laplace_likelihood.cc: -------------------------------------------------------------------------------- 1 | #include "laplace_likelihood.h" 2 | 3 | double LaplaceLikelihood::compute_lpdf(const Eigen::RowVectorXd &datum) const { 4 | return stan::math::double_exponential_lpdf( 5 | datum(0), state.mean, stan::math::sqrt(state.var / 2.0)); 6 | } 7 | -------------------------------------------------------------------------------- /src/hierarchies/likelihoods/laplace_likelihood.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_LIKELIHOODS_LAPLACE_LIKELIHOOD_H_ 2 | #define BAYESMIX_HIERARCHIES_LIKELIHOODS_LAPLACE_LIKELIHOOD_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "algorithm_state.pb.h" 11 | #include "base_likelihood.h" 12 | #include "states/includes.h" 13 | 14 | /** 15 | * A univariate Laplace likelihood, using the `State::UniLS` state. Represents 16 | * the model: 17 | * 18 | * \f[ 19 | * y_1,\dots,y_k \mid \mu, \sigma^2 \stackrel{\small\mathrm{iid}}{\sim} 20 | * Laplace(\mu,\sigma^2), \f] 21 | * 22 | * where \f$ \mu \f$ is the mean and center of the distribution 23 | * and \f$ \sigma^2 \f$ is the variance. The scale parameter \f$ \lambda \f$ is 24 | * then \f$ \sqrt{\sigma^2/2} \f$. These parameters are stored in a 25 | * `State::UniLS` state. Since the Laplace likelihood does not have sufficient 26 | * statistics other than the whole sample, the `update_sum_stats()` method does 27 | * nothing. 28 | */ 29 | 30 | class LaplaceLikelihood 31 | : public BaseLikelihood { 32 | public: 33 | LaplaceLikelihood() = default; 34 | ~LaplaceLikelihood() = default; 35 | bool is_multivariate() const override { return false; }; 36 | bool is_dependent() const override { return false; }; 37 | void clear_summary_statistics() override { return; }; 38 | 39 | template 40 | T cluster_lpdf_from_unconstrained( 41 | const Eigen::Matrix &unconstrained_params) const { 42 | assert(unconstrained_params.size() == 2); 43 | 44 | T mean = unconstrained_params(0); 45 | T var = stan::math::positive_constrain(unconstrained_params(1)); 46 | 47 | T out = 0.; 48 | for (auto it = cluster_data_idx.begin(); it != cluster_data_idx.end(); 49 | ++it) { 50 | out += stan::math::double_exponential_lpdf(dataset_ptr->row(*it), mean, 51 | stan::math::sqrt(var / 2.0)); 52 | } 53 | return out; 54 | } 55 | 56 | protected: 57 | double compute_lpdf(const Eigen::RowVectorXd &datum) const override; 58 | void update_sum_stats(const Eigen::RowVectorXd &datum, bool add) override { 59 | return; 60 | }; 61 | }; 62 | 63 | #endif // BAYESMIX_HIERARCHIES_LIKELIHOODS_LAPLACE_LIKELIHOOD_H_ 64 | -------------------------------------------------------------------------------- /src/hierarchies/likelihoods/multi_norm_likelihood.cc: -------------------------------------------------------------------------------- 1 | #include "multi_norm_likelihood.h" 2 | 3 | #include "src/utils/distributions.h" 4 | #include "src/utils/eigen_utils.h" 5 | #include "src/utils/proto_utils.h" 6 | 7 | double MultiNormLikelihood::compute_lpdf( 8 | const Eigen::RowVectorXd &datum) const { 9 | return bayesmix::multi_normal_prec_lpdf(datum, state.mean, state.prec_chol, 10 | state.prec_logdet); 11 | } 12 | 13 | void MultiNormLikelihood::update_sum_stats(const Eigen::RowVectorXd &datum, 14 | bool add) { 15 | // Check if dim is not defined yet (this usually doesn't happen if the 16 | // hierarchy is initialized) 17 | if (!dim) set_dim(datum.size()); 18 | // Updates 19 | if (add) { 20 | data_sum += datum.transpose(); 21 | data_sum_squares += datum.transpose() * datum; 22 | } else { 23 | data_sum -= datum.transpose(); 24 | data_sum_squares -= datum.transpose() * datum; 25 | } 26 | } 27 | 28 | void MultiNormLikelihood::clear_summary_statistics() { 29 | data_sum = Eigen::VectorXd::Zero(dim); 30 | data_sum_squares = Eigen::MatrixXd::Zero(dim, dim); 31 | } 32 | -------------------------------------------------------------------------------- /src/hierarchies/likelihoods/multi_norm_likelihood.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_LIKELIHOODS_MULTI_NORM_LIKELIHOOD_H_ 2 | #define BAYESMIX_HIERARCHIES_LIKELIHOODS_MULTI_NORM_LIKELIHOOD_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "algorithm_state.pb.h" 12 | #include "base_likelihood.h" 13 | #include "states/includes.h" 14 | 15 | /** 16 | * A multivariate normal likelihood, using the `State::MultiLS` state. 17 | * Represents the model: 18 | * 19 | * \f[ 20 | * \bm{y}_1,\dots, \bm{y}_k \stackrel{\small\mathrm{iid}}{\sim} 21 | * N_p(\bm{\mu}, \Sigma), \f] 22 | * 23 | * where \f$ (\bm{\mu}, \Sigma) \f$ are stored in a `State::MultiLS` state. 24 | * The sufficient statistics stored are the sum of the \f$ \bm{y}_i \f$'s 25 | * and the sum of \f$ \bm{y}_i^T \bm{y}_i \f$. 26 | */ 27 | 28 | class MultiNormLikelihood 29 | : public BaseLikelihood { 30 | public: 31 | MultiNormLikelihood() = default; 32 | ~MultiNormLikelihood() = default; 33 | bool is_multivariate() const override { return true; }; 34 | bool is_dependent() const override { return false; }; 35 | void clear_summary_statistics() override; 36 | 37 | void set_dim(unsigned int dim_) { 38 | dim = dim_; 39 | clear_summary_statistics(); 40 | }; 41 | unsigned int get_dim() const { return dim; }; 42 | Eigen::VectorXd get_data_sum() const { return data_sum; }; 43 | Eigen::MatrixXd get_data_sum_squares() const { return data_sum_squares; }; 44 | 45 | protected: 46 | double compute_lpdf(const Eigen::RowVectorXd &datum) const override; 47 | void update_sum_stats(const Eigen::RowVectorXd &datum, bool add) override; 48 | 49 | unsigned int dim; 50 | Eigen::VectorXd data_sum; 51 | Eigen::MatrixXd data_sum_squares; 52 | }; 53 | 54 | #endif // BAYESMIX_HIERARCHIES_LIKELIHOODS_MULTI_NORM_LIKELIHOOD_H_ 55 | -------------------------------------------------------------------------------- /src/hierarchies/likelihoods/states/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | target_sources(bayesmix PUBLIC 2 | includes.h 3 | base_state.h 4 | uni_ls_state.h 5 | multi_ls_state.h 6 | uni_lin_reg_ls_state.h 7 | fa_state.h 8 | ) 9 | -------------------------------------------------------------------------------- /src/hierarchies/likelihoods/states/includes.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_LIKELIHOODS_STATES_INCLUDES_H_ 2 | #define BAYESMIX_HIERARCHIES_LIKELIHOODS_STATES_INCLUDES_H_ 3 | 4 | #include "fa_state.h" 5 | #include "multi_ls_state.h" 6 | #include "uni_lin_reg_ls_state.h" 7 | #include "uni_ls_state.h" 8 | 9 | #endif // BAYESMIX_HIERARCHIES_LIKELIHOODS_STATES_INCLUDES_H_ 10 | -------------------------------------------------------------------------------- /src/hierarchies/likelihoods/uni_lin_reg_likelihood.cc: -------------------------------------------------------------------------------- 1 | #include "uni_lin_reg_likelihood.h" 2 | 3 | #include "src/utils/eigen_utils.h" 4 | 5 | void UniLinRegLikelihood::clear_summary_statistics() { 6 | mixed_prod = Eigen::VectorXd::Zero(dim); 7 | data_sum_squares = 0.0; 8 | covar_sum_squares = Eigen::MatrixXd::Zero(dim, dim); 9 | } 10 | 11 | double UniLinRegLikelihood::compute_lpdf( 12 | const Eigen::RowVectorXd &datum, 13 | const Eigen::RowVectorXd &covariate) const { 14 | return stan::math::normal_lpdf( 15 | datum(0), state.regression_coeffs.dot(covariate), sqrt(state.var)); 16 | } 17 | 18 | void UniLinRegLikelihood::update_sum_stats(const Eigen::RowVectorXd &datum, 19 | const Eigen::RowVectorXd &covariate, 20 | bool add) { 21 | if (add) { 22 | data_sum_squares += datum(0) * datum(0); 23 | covar_sum_squares += covariate.transpose() * covariate; 24 | mixed_prod += datum(0) * covariate.transpose(); 25 | } else { 26 | data_sum_squares -= datum(0) * datum(0); 27 | covar_sum_squares -= covariate.transpose() * covariate; 28 | mixed_prod -= datum(0) * covariate.transpose(); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/hierarchies/likelihoods/uni_lin_reg_likelihood.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_LIKELIHOODS_UNI_LIN_REG_LIKELIHOOD_H_ 2 | #define BAYESMIX_HIERARCHIES_LIKELIHOODS_UNI_LIN_REG_LIKELIHOOD_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "algorithm_state.pb.h" 11 | #include "base_likelihood.h" 12 | #include "states/includes.h" 13 | 14 | /** 15 | * A scalar linear regression model, using the `State::UniLinRegLS` state. 16 | * Represents the model: 17 | * 18 | * \f[ 19 | * y_i \mid \bm{x}_i, \bm{\beta}, \sigma^2 20 | * \stackrel{\small\mathrm{ind}}{\sim} N(\bm{x}_i^T\bm{\beta},\sigma^2), \f] 21 | * 22 | * where \f$ (\bm{\beta}, \sigma^2) \f$ are stored in a `State::UniLinRegLS` 23 | * state. The sufficient statistics stored are the sum of \f$ y_i^2 \f$, the 24 | * sum of \f$ \bm{x}_i^T \bm{x}_i \f$ and the sum of \f$ y_i \bm{x}_i^T \f$. 25 | */ 26 | 27 | class UniLinRegLikelihood 28 | : public BaseLikelihood { 29 | public: 30 | UniLinRegLikelihood() = default; 31 | ~UniLinRegLikelihood() = default; 32 | bool is_multivariate() const override { return false; }; 33 | bool is_dependent() const override { return true; }; 34 | void clear_summary_statistics() override; 35 | 36 | // Getters and Setters 37 | unsigned int get_dim() const { return dim; }; 38 | void set_dim(unsigned int dim_) { 39 | dim = dim_; 40 | clear_summary_statistics(); 41 | }; 42 | double get_data_sum_squares() const { return data_sum_squares; }; 43 | Eigen::MatrixXd get_covar_sum_squares() const { return covar_sum_squares; }; 44 | Eigen::VectorXd get_mixed_prod() const { return mixed_prod; }; 45 | 46 | protected: 47 | double compute_lpdf(const Eigen::RowVectorXd &datum, 48 | const Eigen::RowVectorXd &covariate) const override; 49 | void update_sum_stats(const Eigen::RowVectorXd &datum, 50 | const Eigen::RowVectorXd &covariate, 51 | bool add) override; 52 | 53 | // Dimension of the coefficients vector 54 | unsigned int dim; 55 | // Represents pieces of y^t y 56 | double data_sum_squares; 57 | // Represents pieces of X^T X 58 | Eigen::MatrixXd covar_sum_squares; 59 | // Represents pieces of X^t y 60 | Eigen::VectorXd mixed_prod; 61 | }; 62 | 63 | #endif // BAYESMIX_HIERARCHIES_LIKELIHOODS_UNI_LIN_REG_LIKELIHOOD_H_ 64 | -------------------------------------------------------------------------------- /src/hierarchies/likelihoods/uni_norm_likelihood.cc: -------------------------------------------------------------------------------- 1 | #include "uni_norm_likelihood.h" 2 | 3 | double UniNormLikelihood::compute_lpdf(const Eigen::RowVectorXd &datum) const { 4 | return stan::math::normal_lpdf(datum(0), state.mean, sqrt(state.var)); 5 | } 6 | 7 | void UniNormLikelihood::update_sum_stats(const Eigen::RowVectorXd &datum, 8 | bool add) { 9 | if (add) { 10 | data_sum += datum(0); 11 | data_sum_squares += datum(0) * datum(0); 12 | } else { 13 | data_sum -= datum(0); 14 | data_sum_squares -= datum(0) * datum(0); 15 | } 16 | } 17 | 18 | void UniNormLikelihood::clear_summary_statistics() { 19 | data_sum = 0; 20 | data_sum_squares = 0; 21 | } 22 | -------------------------------------------------------------------------------- /src/hierarchies/likelihoods/uni_norm_likelihood.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_LIKELIHOODS_UNI_NORM_LIKELIHOOD_H_ 2 | #define BAYESMIX_HIERARCHIES_LIKELIHOODS_UNI_NORM_LIKELIHOOD_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "algorithm_state.pb.h" 11 | #include "base_likelihood.h" 12 | #include "states/includes.h" 13 | 14 | /** 15 | * A univariate normal likelihood, using the `State::UniLS` state. Represents 16 | * the model: 17 | * 18 | * \f[ 19 | * y_1, \dots, y_k \mid \mu, \sigma^2 \stackrel{\small\mathrm{iid}}{\sim} 20 | * N(\mu, \sigma^2), \f] 21 | * 22 | * where \f$ (\mu, \sigma^2) \f$ are stored in a `State::UniLS` state. 23 | * The sufficient statistics stored are the sum of the \f$ y_i \f$'s and the 24 | * sum of \f$ y_i^2 \f$. 25 | */ 26 | 27 | class UniNormLikelihood 28 | : public BaseLikelihood { 29 | public: 30 | UniNormLikelihood() = default; 31 | ~UniNormLikelihood() = default; 32 | bool is_multivariate() const override { return false; }; 33 | bool is_dependent() const override { return false; }; 34 | void clear_summary_statistics() override; 35 | double get_data_sum() const { return data_sum; }; 36 | double get_data_sum_squares() const { return data_sum_squares; }; 37 | 38 | template 39 | T cluster_lpdf_from_unconstrained( 40 | const Eigen::Matrix &unconstrained_params) const { 41 | assert(unconstrained_params.size() == 2); 42 | T mean = unconstrained_params(0); 43 | T var = stan::math::positive_constrain(unconstrained_params(1)); 44 | T out = -(data_sum_squares - 2 * mean * data_sum + card * mean * mean) / 45 | (2 * var); 46 | out -= card * 0.5 * stan::math::log(stan::math::TWO_PI * var); 47 | return out; 48 | } 49 | 50 | protected: 51 | double compute_lpdf(const Eigen::RowVectorXd &datum) const override; 52 | void update_sum_stats(const Eigen::RowVectorXd &datum, bool add) override; 53 | 54 | double data_sum = 0; 55 | double data_sum_squares = 0; 56 | }; 57 | 58 | #endif // BAYESMIX_HIERARCHIES_LIKELIHOODS_UNI_NORM_LIKELIHOOD_H_ 59 | -------------------------------------------------------------------------------- /src/hierarchies/load_hierarchies.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_LOAD_HIERARCHIES_H_ 2 | #define BAYESMIX_HIERARCHIES_LOAD_HIERARCHIES_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "abstract_hierarchy.h" 8 | #include "fa_hierarchy.h" 9 | #include "hierarchy_id.pb.h" 10 | #include "lapnig_hierarchy.h" 11 | #include "lin_reg_uni_hierarchy.h" 12 | #include "nnig_hierarchy.h" 13 | #include "nnw_hierarchy.h" 14 | #include "nnxig_hierarchy.h" 15 | #include "src/runtime/factory.h" 16 | 17 | //! Loads all available `Hierarchy` objects into the appropriate factory, so 18 | //! that they are ready to be chosen and used at runtime. 19 | 20 | template 21 | using Builder = std::function()>; 22 | 23 | using HierarchyFactory = Factory; 24 | 25 | __attribute__((constructor)) static void load_hierarchies() { 26 | HierarchyFactory &factory = HierarchyFactory::Instance(); 27 | // Initialize factory builders 28 | Builder NNIGbuilder = []() { 29 | return std::make_shared(); 30 | }; 31 | Builder NNxIGbuilder = []() { 32 | return std::make_shared(); 33 | }; 34 | Builder NNWbuilder = []() { 35 | return std::make_shared(); 36 | }; 37 | Builder LinRegUnibuilder = []() { 38 | return std::make_shared(); 39 | }; 40 | Builder FAbuilder = []() { 41 | return std::make_shared(); 42 | }; 43 | Builder LapNIGbuilder = []() { 44 | return std::make_shared(); 45 | }; 46 | 47 | factory.add_builder(NNIGHierarchy().get_id(), NNIGbuilder); 48 | factory.add_builder(NNxIGHierarchy().get_id(), NNxIGbuilder); 49 | factory.add_builder(NNWHierarchy().get_id(), NNWbuilder); 50 | factory.add_builder(LinRegUniHierarchy().get_id(), LinRegUnibuilder); 51 | factory.add_builder(FAHierarchy().get_id(), FAbuilder); 52 | factory.add_builder(LapNIGHierarchy().get_id(), LapNIGbuilder); 53 | } 54 | 55 | #endif // BAYESMIX_HIERARCHIES_LOAD_HIERARCHIES_H_ 56 | -------------------------------------------------------------------------------- /src/hierarchies/nnxig_hierarchy.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_NNXIG_HIERARCHY_H_ 2 | #define BAYESMIX_HIERARCHIES_NNXIG_HIERARCHY_H_ 3 | 4 | #include "base_hierarchy.h" 5 | #include "hierarchy_id.pb.h" 6 | #include "likelihoods/uni_norm_likelihood.h" 7 | #include "priors/nxig_prior_model.h" 8 | #include "updaters/nnxig_updater.h" 9 | 10 | /** 11 | * Semi-conjugate Normal Normal x InverseGamma hierarchy for univariate data. 12 | * 13 | * This class represents a hierarchical model where data are distributed 14 | * according to a Normal likelihood (see the `UniNormLikelihood` class for 15 | * details). The likelihood parameters have a Normal x InverseGamma centering 16 | * distribution (see the `NxIGPriorModel` class for details). That is: 17 | * 18 | * \f[ 19 | * f(x_i \mid \mu,\sigma^2) &= N(\mu,\sigma^2) \\ 20 | * \mu &\sim N(\mu_0, \eta^2) \\ 21 | * \sigma^2 &\sim InvGamma(a, b) 22 | * \f] 23 | * 24 | * The state is composed of mean and variance. The state hyperparameters are 25 | * \f$ (\mu_0, \eta^2, a, b) \f$, all scalar values. Note that this hierarchy 26 | * is NOT conjugate, meaning that the marginal distribution is not available 27 | * in closed form 28 | */ 29 | 30 | class NNxIGHierarchy 31 | : public BaseHierarchy { 32 | public: 33 | NNxIGHierarchy() = default; 34 | ~NNxIGHierarchy() = default; 35 | 36 | //! Returns the Protobuf ID associated to this class 37 | bayesmix::HierarchyId get_id() const override { 38 | return bayesmix::HierarchyId::NNxIG; 39 | } 40 | 41 | //! Sets the default updater algorithm for this hierarchy 42 | void set_default_updater() { updater = std::make_shared(); } 43 | 44 | //! Initializes state parameters to appropriate values 45 | void initialize_state() override { 46 | // Get hypers 47 | auto hypers = prior->get_hypers(); 48 | // Initialize likelihood state 49 | State::UniLS state; 50 | state.mean = hypers.mean; 51 | state.var = hypers.scale / (hypers.shape + 1); 52 | like->set_state(state); 53 | }; 54 | }; 55 | 56 | #endif // BAYESMIX_HIERARCHIES_NNXIG_HIERARCHY_H_ 57 | -------------------------------------------------------------------------------- /src/hierarchies/priors/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | target_sources(bayesmix PUBLIC 2 | prior_model_internal.h 3 | abstract_prior_model.h 4 | base_prior_model.h 5 | hyperparams.h 6 | nig_prior_model.h 7 | nig_prior_model.cc 8 | nxig_prior_model.h 9 | nxig_prior_model.cc 10 | nw_prior_model.h 11 | nw_prior_model.cc 12 | mnig_prior_model.h 13 | mnig_prior_model.cc 14 | fa_prior_model.h 15 | fa_prior_model.cc 16 | ) 17 | -------------------------------------------------------------------------------- /src/hierarchies/priors/fa_prior_model.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_PRIORS_FA_PRIOR_MODEL_H_ 2 | #define BAYESMIX_HIERARCHIES_PRIORS_FA_PRIOR_MODEL_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "base_prior_model.h" 9 | #include "hierarchy_prior.pb.h" 10 | #include "hyperparams.h" 11 | #include "src/utils/rng.h" 12 | 13 | /** 14 | * A priormodel for the factor analyzers likelihood, that is 15 | * 16 | * \f[ 17 | * \bm{\mu} &\sim N_p(\tilde{\bm{\mu}}, \psi I) \\ 18 | * \Lambda &\sim DL(\alpha) \\ 19 | * \Sigma &= \mathrm{diag}(\sigma^2_1, \ldots, \sigma^2_p) \\ 20 | * \sigma^2_j &\stackrel{\small\mathrm{iid}}{\sim} InvGamma(a,b) \quad 21 | * j=1,...,p \f] 22 | * 23 | * Where \f$ DL \f$ is the Dirichlet-Laplace distribution. 24 | * See Bhattacharya A., Pati D., Pillai N.S., Dunson D.B. (2015). 25 | * JASA 110(512), 1479–1490 for details. 26 | */ 27 | 28 | class FAPriorModel 29 | : public BasePriorModel { 31 | public: 32 | using AbstractPriorModel::ProtoHypers; 33 | using AbstractPriorModel::ProtoHypersPtr; 34 | 35 | FAPriorModel() = default; 36 | ~FAPriorModel() = default; 37 | 38 | double lpdf(const google::protobuf::Message &state_) override; 39 | 40 | State::FA sample(ProtoHypersPtr hier_hypers = nullptr) override; 41 | 42 | void update_hypers(const std::vector 43 | &states) override; 44 | 45 | void set_hypers_from_proto( 46 | const google::protobuf::Message &hypers_) override; 47 | 48 | unsigned int get_dim() const { return dim; }; 49 | 50 | std::shared_ptr get_hypers_proto() 51 | const override; 52 | 53 | protected: 54 | void initialize_hypers() override; 55 | 56 | unsigned int dim; 57 | }; 58 | 59 | #endif // BAYESMIX_HIERARCHIES_PRIORS_FA_PRIOR_MODEL_H_ 60 | -------------------------------------------------------------------------------- /src/hierarchies/priors/hyperparams.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_PRIORS_HYPERPARAMS_H_ 2 | #define BAYESMIX_HIERARCHIES_PRIORS_HYPERPARAMS_H_ 3 | 4 | #include 5 | 6 | namespace Hyperparams { 7 | 8 | struct NIG { 9 | double mean, var_scaling, shape, scale; 10 | }; 11 | 12 | struct NxIG { 13 | double mean, var, shape, scale; 14 | }; 15 | 16 | struct NW { 17 | Eigen::VectorXd mean; 18 | double var_scaling, deg_free; 19 | Eigen::MatrixXd scale, scale_inv, scale_chol; 20 | }; 21 | 22 | struct MNIG { 23 | Eigen::VectorXd mean; 24 | Eigen::MatrixXd var_scaling, var_scaling_inv; 25 | double shape, scale; 26 | }; 27 | 28 | struct FA { 29 | Eigen::VectorXd mutilde, beta; 30 | double phi, alpha0; 31 | unsigned int q; 32 | }; 33 | 34 | } // namespace Hyperparams 35 | 36 | #endif // BAYESMIX_HIERARCHIES_PRIORS_HYPERPARAMS_H_ 37 | -------------------------------------------------------------------------------- /src/hierarchies/priors/mnig_prior_model.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_PRIORS_MNIG_PRIOR_MODEL_H_ 2 | #define BAYESMIX_HIERARCHIES_PRIORS_MNIG_PRIOR_MODEL_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "base_prior_model.h" 9 | #include "hierarchy_prior.pb.h" 10 | #include "hyperparams.h" 11 | #include "src/utils/rng.h" 12 | 13 | /** 14 | * A conjugate prior model for the scalar linear regression likelihood, i.e. 15 | * 16 | * \f[ 17 | * \bm{\beta} \mid \sigma^2 & \sim N_p(\bm{\mu}, \sigma^2 \Lambda^{-1}) \\ 18 | * \sigma^2 & \sim InvGamma(a,b) 19 | * \f] 20 | */ 21 | 22 | class MNIGPriorModel 23 | : public BasePriorModel { 25 | public: 26 | using AbstractPriorModel::ProtoHypers; 27 | using AbstractPriorModel::ProtoHypersPtr; 28 | 29 | MNIGPriorModel() = default; 30 | ~MNIGPriorModel() = default; 31 | 32 | double lpdf(const google::protobuf::Message &state_) override; 33 | 34 | State::UniLinRegLS sample(ProtoHypersPtr hier_hypers = nullptr) override; 35 | 36 | void update_hypers(const std::vector 37 | &states) override; 38 | 39 | void set_hypers_from_proto( 40 | const google::protobuf::Message &hypers_) override; 41 | 42 | unsigned int get_dim() const { return dim; }; 43 | 44 | std::shared_ptr get_hypers_proto() 45 | const override; 46 | 47 | protected: 48 | void initialize_hypers() override; 49 | 50 | unsigned int dim; 51 | }; 52 | 53 | #endif // BAYESMIX_HIERARCHIES_PRIORS_MNIG_PRIOR_MODEL_H_ 54 | -------------------------------------------------------------------------------- /src/hierarchies/priors/nig_prior_model.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_PRIORS_NIG_PRIOR_MODEL_H_ 2 | #define BAYESMIX_HIERARCHIES_PRIORS_NIG_PRIOR_MODEL_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "base_prior_model.h" 10 | #include "hierarchy_prior.pb.h" 11 | #include "hyperparams.h" 12 | #include "src/utils/rng.h" 13 | 14 | /** 15 | * A conjugate prior model for the univariate normal likelihood, that is 16 | * 17 | * \f[ 18 | * \mu \mid \sigma^2 &\sim N(\mu_0, \sigma^2 / \lambda) \\ 19 | * \sigma^2 &\sim InvGamma(a,b) 20 | * \f] 21 | * 22 | * With several possibilies for hyper-priors on \f$ \mu \f$ and \f$ \sigma^2 23 | * \f$. We have considered a normal prior for \f$ mu0 \f$ and a 24 | * Normal-Gamma-Gamma for \f$ (mu0, a, b) \f$ in addition to fixing prior 25 | * hyperparameters 26 | */ 27 | 28 | class NIGPriorModel 29 | : public BasePriorModel { 31 | public: 32 | using AbstractPriorModel::ProtoHypers; 33 | using AbstractPriorModel::ProtoHypersPtr; 34 | 35 | NIGPriorModel() = default; 36 | ~NIGPriorModel() = default; 37 | 38 | double lpdf(const google::protobuf::Message &state_) override; 39 | 40 | template 41 | T lpdf_from_unconstrained( 42 | const Eigen::Matrix &unconstrained_params) const { 43 | Eigen::Matrix constrained_params = 44 | State::uni_ls_to_constrained(unconstrained_params); 45 | T log_det_jac = State::uni_ls_log_det_jac(constrained_params); 46 | T mean = constrained_params(0); 47 | T var = constrained_params(1); 48 | T lpdf = stan::math::normal_lpdf(mean, hypers->mean, 49 | sqrt(var / hypers->var_scaling)) + 50 | stan::math::inv_gamma_lpdf(var, hypers->shape, hypers->scale); 51 | 52 | return lpdf + log_det_jac; 53 | } 54 | 55 | State::UniLS sample(ProtoHypersPtr hier_hypers = nullptr) override; 56 | 57 | void update_hypers(const std::vector 58 | &states) override; 59 | 60 | void set_hypers_from_proto( 61 | const google::protobuf::Message &hypers_) override; 62 | 63 | std::shared_ptr get_hypers_proto() 64 | const override; 65 | 66 | protected: 67 | void initialize_hypers() override; 68 | }; 69 | 70 | #endif // BAYESMIX_HIERARCHIES_PRIORS_NIG_PRIOR_MODEL_H_ 71 | -------------------------------------------------------------------------------- /src/hierarchies/priors/nw_prior_model.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_PRIORS_NW_PRIOR_MODEL_H_ 2 | #define BAYESMIX_HIERARCHIES_PRIORS_NW_PRIOR_MODEL_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "base_prior_model.h" 10 | #include "hierarchy_prior.pb.h" 11 | #include "hyperparams.h" 12 | #include "src/utils/rng.h" 13 | 14 | /** 15 | * A conjugate prior model for the multivariate normal likelihood, that is 16 | * 17 | * \f[ 18 | * \bm{\mu} \mid \Sigma &\sim N_p(\bm{\mu}_0, (\Sigma \lambda)^{-1}) \\ 19 | * \Sigma & \sim Wishart(\nu_0, \Psi_0) 20 | * \f] 21 | * 22 | * With some options for hyper-priors on \f$ \bm{\mu} \f$ and \f$ \Sigma \f$. 23 | * We have considered a normal prior for \f$ \bm{\mu}_0 \f$ in addition to 24 | * fixing prior hyperparameters 25 | */ 26 | 27 | class NWPriorModel 28 | : public BasePriorModel { 30 | public: 31 | NWPriorModel() = default; 32 | ~NWPriorModel() = default; 33 | 34 | double lpdf(const google::protobuf::Message &state_) override; 35 | 36 | State::MultiLS sample(ProtoHypersPtr hier_hypers = nullptr) override; 37 | 38 | void update_hypers(const std::vector 39 | &states) override; 40 | 41 | void set_hypers_from_proto( 42 | const google::protobuf::Message &hypers_) override; 43 | 44 | void write_prec_to_state(const Eigen::MatrixXd &prec_, State::MultiLS *out); 45 | 46 | unsigned int get_dim() const { return dim; }; 47 | 48 | std::shared_ptr get_hypers_proto() 49 | const override; 50 | 51 | protected: 52 | void initialize_hypers() override; 53 | 54 | unsigned int dim; 55 | }; 56 | 57 | #endif // BAYESMIX_HIERARCHIES_PRIORS_NW_PRIOR_MODEL_H_ 58 | -------------------------------------------------------------------------------- /src/hierarchies/priors/nxig_prior_model.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_PRIORS_NXIG_PRIOR_MODEL_H_ 2 | #define BAYESMIX_HIERARCHIES_PRIORS_NXIG_PRIOR_MODEL_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "base_prior_model.h" 10 | #include "hierarchy_prior.pb.h" 11 | #include "hyperparams.h" 12 | #include "src/utils/rng.h" 13 | 14 | /** 15 | * A semi-conjugate prior model for the univariate normal likelihood, that is 16 | * 17 | * \f[ 18 | * \mu & \sim N(\mu_0, \eta^2) \\ 19 | * \sigma^2 & \sim InvGamma(a,b) 20 | * \f] 21 | */ 22 | 23 | class NxIGPriorModel 24 | : public BasePriorModel { 26 | public: 27 | using AbstractPriorModel::ProtoHypers; 28 | using AbstractPriorModel::ProtoHypersPtr; 29 | 30 | NxIGPriorModel() = default; 31 | ~NxIGPriorModel() = default; 32 | 33 | double lpdf(const google::protobuf::Message &state_) override; 34 | 35 | State::UniLS sample(ProtoHypersPtr hier_hypers = nullptr) override; 36 | 37 | void update_hypers(const std::vector 38 | &states) override; 39 | 40 | void set_hypers_from_proto( 41 | const google::protobuf::Message &hypers_) override; 42 | 43 | std::shared_ptr get_hypers_proto() 44 | const override; 45 | 46 | protected: 47 | void initialize_hypers() override; 48 | }; 49 | 50 | #endif // BAYESMIX_HIERARCHIES_PRIORS_NXIG_PRIOR_MODEL_H_ 51 | -------------------------------------------------------------------------------- /src/hierarchies/priors/prior_model_internal.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_PRIORS_PRIOR_MODEL_INTERNAL_H_ 2 | #define BAYESMIX_HIERARCHIES_PRIORS_PRIOR_MODEL_INTERNAL_H_ 3 | 4 | //! These functions exploit SFINAE to manage exception handling in all methods 5 | //! required only if end user wants to rely on Metropolis-like updaters. SFINAE 6 | //! (Substitution Failure Is Not An Error) is a C++ rule that applies during 7 | //! overload resolution of function templates: When substituting the explicitly 8 | //! specified or deduced type for the template parameter fails, the 9 | //! specialization is discarded from the overload set instead of causing a 10 | //! compile error. This feature is used in template metaprogramming. 11 | 12 | namespace internal { 13 | 14 | template 15 | auto lpdf_from_unconstrained( 16 | const Prior &prior, 17 | Eigen::Matrix unconstrained_params, int) 18 | -> decltype(prior.template lpdf_from_unconstrained( 19 | unconstrained_params)) { 20 | return prior.template lpdf_from_unconstrained(unconstrained_params); 21 | } 22 | 23 | template 24 | auto lpdf_from_unconstrained( 25 | const Prior &prior, 26 | Eigen::Matrix unconstrained_params, double) -> T { 27 | throw(std::runtime_error("lpdf_from_unconstrained() not yet implemented")); 28 | } 29 | 30 | } // namespace internal 31 | 32 | #endif // BAYESMIX_HIERARCHIES_PRIORS_PRIOR_MODEL_INTERNAL_H_ 33 | -------------------------------------------------------------------------------- /src/hierarchies/updaters/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | target_sources(bayesmix PUBLIC 2 | abstract_updater.h 3 | semi_conjugate_updater.h 4 | nnig_updater.h 5 | nnig_updater.cc 6 | nnxig_updater.h 7 | nnxig_updater.cc 8 | nnw_updater.h 9 | nnw_updater.cc 10 | mnig_updater.h 11 | mnig_updater.cc 12 | fa_updater.h 13 | fa_updater.cc 14 | metropolis_updater.h 15 | mala_updater.h 16 | random_walk_updater.h 17 | target_lpdf_unconstrained.h 18 | ) 19 | -------------------------------------------------------------------------------- /src/hierarchies/updaters/fa_updater.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_UPDATERS_FA_UPDATER_H_ 2 | #define BAYESMIX_HIERARCHIES_UPDATERS_FA_UPDATER_H_ 3 | 4 | #include "abstract_updater.h" 5 | #include "src/hierarchies/likelihoods/fa_likelihood.h" 6 | #include "src/hierarchies/likelihoods/states/includes.h" 7 | #include "src/hierarchies/priors/fa_prior_model.h" 8 | #include "src/hierarchies/priors/hyperparams.h" 9 | #include "src/utils/proto_utils.h" 10 | 11 | //! Updater specific for the `FAHierachy`. 12 | //! See Bhattacharya, Anirban, and David B. Dunson. 13 | //! "Sparse Bayesian infinite factor models." Biometrika (2011): 291-306. 14 | //! for further details 15 | class FAUpdater : public AbstractUpdater { 16 | public: 17 | FAUpdater() = default; 18 | ~FAUpdater() = default; 19 | void draw(AbstractLikelihood& like, AbstractPriorModel& prior, 20 | bool update_params) override; 21 | 22 | std::shared_ptr clone() const override { 23 | auto out = 24 | std::make_shared(static_cast(*this)); 25 | out->clear_hypers(); 26 | return out; 27 | } 28 | 29 | protected: 30 | void sample_eta(State::FA& state, const Hyperparams::FA& hypers, 31 | const FALikelihood& like); 32 | void sample_mu(State::FA& state, const Hyperparams::FA& hypers, 33 | const FALikelihood& like); 34 | void sample_lambda(State::FA& state, const Hyperparams::FA& hypers, 35 | const FALikelihood& like); 36 | void sample_psi(State::FA& state, const Hyperparams::FA& hypers, 37 | const FALikelihood& like); 38 | }; 39 | 40 | #endif // BAYESMIX_HIERARCHIES_UPDATERS_FA_UPDATER_H_ 41 | -------------------------------------------------------------------------------- /src/hierarchies/updaters/metropolis_updater.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_UPDATERS_METROPOLIS_UPDATER_H_ 2 | #define BAYESMIX_HIERARCHIES_UPDATERS_METROPOLIS_UPDATER_H_ 3 | 4 | #include "abstract_updater.h" 5 | #include "target_lpdf_unconstrained.h" 6 | 7 | //! Base class for updaters using a Metropolis-Hastings algorithm 8 | //! 9 | //! This class serves as the base for a CRTP. 10 | //! Children of this class should implement the methods 11 | //! template 12 | //! Eigen::VectorXd sample_proposal(Eigen::VectorXd curr_state, 13 | //! AbstractLikelihood &like, 14 | //! AbstractPriorModel &prior, F 15 | //! &target_lpdf) 16 | //! and 17 | //! template 18 | //! double proposal_lpdf(Eigen::VectorXd prop_state, 19 | //! Eigen::VectorXd curr_state, 20 | //! AbstractLikelihood &like, 21 | //! AbstractPriorModel &prior, 22 | //! F &target_lpdf) 23 | //! where the template parameter is needed to allow the use of stan's 24 | //! automatic differentiation if the gradient of the full conditional is 25 | //! required. 26 | template 27 | class MetropolisUpdater : public AbstractUpdater { 28 | public: 29 | //! Samples from the full conditional distribution using a 30 | //! Metropolis-Hastings step 31 | void draw(AbstractLikelihood &like, AbstractPriorModel &prior, 32 | bool update_params) override { 33 | target_lpdf_unconstrained target_lpdf(&like, &prior); 34 | Eigen::VectorXd curr_state = like.get_unconstrained_state(); 35 | Eigen::VectorXd prop_state = 36 | static_cast(this)->sample_proposal( 37 | curr_state, like, prior, target_lpdf); 38 | 39 | double log_arate = like.cluster_lpdf_from_unconstrained(prop_state) - 40 | like.cluster_lpdf_from_unconstrained(curr_state) + 41 | static_cast(this)->proposal_lpdf( 42 | curr_state, prop_state, like, prior, target_lpdf) - 43 | static_cast(this)->proposal_lpdf( 44 | prop_state, curr_state, like, prior, target_lpdf); 45 | 46 | auto &rng = bayesmix::Rng::Instance().get(); 47 | if (std::log(stan::math::uniform_rng(0, 1, rng)) < log_arate) { 48 | like.set_state_from_unconstrained(prop_state); 49 | } 50 | } 51 | }; 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /src/hierarchies/updaters/mnig_updater.cc: -------------------------------------------------------------------------------- 1 | #include "mnig_updater.h" 2 | 3 | AbstractUpdater::ProtoHypersPtr MNIGUpdater::compute_posterior_hypers( 4 | AbstractLikelihood& like, AbstractPriorModel& prior) { 5 | // Likelihood and Prior downcast 6 | auto& likecast = downcast_likelihood(like); 7 | auto& priorcast = downcast_prior(prior); 8 | 9 | // Getting required quantities from likelihood and prior 10 | int card = likecast.get_card(); 11 | unsigned int dim = likecast.get_dim(); 12 | double data_sum_squares = likecast.get_data_sum_squares(); 13 | Eigen::MatrixXd covar_sum_squares = likecast.get_covar_sum_squares(); 14 | Eigen::MatrixXd mixed_prod = likecast.get_mixed_prod(); 15 | auto hypers = priorcast.get_hypers(); 16 | 17 | // No update possible 18 | if (card == 0) { 19 | return priorcast.get_hypers_proto(); 20 | } 21 | 22 | // Compute posterior hyperparameters 23 | Eigen::VectorXd mean; 24 | Eigen::MatrixXd var_scaling, var_scaling_inv; 25 | double shape, scale; 26 | 27 | var_scaling = covar_sum_squares + hypers.var_scaling; 28 | auto llt = var_scaling.llt(); 29 | mean = llt.solve(mixed_prod + hypers.var_scaling * hypers.mean); 30 | shape = hypers.shape + 0.5 * card; 31 | scale = hypers.scale + 32 | 0.5 * (data_sum_squares + 33 | hypers.mean.transpose() * hypers.var_scaling * hypers.mean - 34 | mean.transpose() * var_scaling * mean); 35 | 36 | // Proto conversion 37 | ProtoHypers out; 38 | bayesmix::to_proto(mean, out.mutable_lin_reg_uni_state()->mutable_mean()); 39 | bayesmix::to_proto(var_scaling, 40 | out.mutable_lin_reg_uni_state()->mutable_var_scaling()); 41 | out.mutable_lin_reg_uni_state()->set_shape(shape); 42 | out.mutable_lin_reg_uni_state()->set_scale(scale); 43 | return std::make_shared(out); 44 | } 45 | -------------------------------------------------------------------------------- /src/hierarchies/updaters/mnig_updater.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_UPDATERS_MNIG_UPDATER_H_ 2 | #define BAYESMIX_HIERARCHIES_UPDATERS_MNIG_UPDATER_H_ 3 | 4 | #include "semi_conjugate_updater.h" 5 | #include "src/hierarchies/likelihoods/uni_lin_reg_likelihood.h" 6 | #include "src/hierarchies/priors/mnig_prior_model.h" 7 | 8 | /** 9 | * Updater specific for the `UniLinRegLikelihood` used in combination 10 | * with `MNIGPriorModel`, that is the model 11 | * 12 | * \f[ 13 | * y_i \mid \bm{\beta}, \sigma^2 &\stackrel{\small\mathrm{iid}}{\sim} 14 | * N(\bm{\beta}^T\bm{x}_i, \sigma^2) \\ 15 | * \bm{\beta} \mid \sigma^2 &\sim N_p(\mu_{0}, \sigma^2 \mathbf{V}^{-1}) \\ 16 | * \sigma^2 &\sim InvGamma(a, b) 17 | * \f] 18 | * 19 | * It exploits the conjugacy of the model to sample the full conditional of 20 | * \f$ (\bm{\beta}, \sigma^2) \f$ by calling `MNIGPriorModel::sample` with 21 | * updated parameters 22 | */ 23 | 24 | class MNIGUpdater 25 | : public SemiConjugateUpdater { 26 | public: 27 | MNIGUpdater() = default; 28 | ~MNIGUpdater() = default; 29 | 30 | bool is_conjugate() const override { return true; }; 31 | 32 | ProtoHypersPtr compute_posterior_hypers(AbstractLikelihood &like, 33 | AbstractPriorModel &prior) override; 34 | 35 | std::shared_ptr clone() const override { 36 | auto out = 37 | std::make_shared(static_cast(*this)); 38 | out->clear_hypers(); 39 | return out; 40 | } 41 | }; 42 | 43 | #endif // BAYESMIX_HIERARCHIES_UPDATERS_MNIG_UPDATER_H_ 44 | -------------------------------------------------------------------------------- /src/hierarchies/updaters/nnig_updater.cc: -------------------------------------------------------------------------------- 1 | #include "nnig_updater.h" 2 | 3 | #include "src/hierarchies/likelihoods/states/includes.h" 4 | #include "src/hierarchies/priors/hyperparams.h" 5 | 6 | AbstractUpdater::ProtoHypersPtr NNIGUpdater::compute_posterior_hypers( 7 | AbstractLikelihood& like, AbstractPriorModel& prior) { 8 | // Likelihood and Prior downcast 9 | auto& likecast = downcast_likelihood(like); 10 | auto& priorcast = downcast_prior(prior); 11 | 12 | // Getting required quantities from likelihood and prior 13 | int card = likecast.get_card(); 14 | double data_sum = likecast.get_data_sum(); 15 | double data_sum_squares = likecast.get_data_sum_squares(); 16 | auto hypers = priorcast.get_hypers(); 17 | 18 | // No update possible 19 | if (card == 0) { 20 | return priorcast.get_hypers_proto(); 21 | } 22 | 23 | // Compute posterior hyperparameters 24 | double mean, var_scaling, shape, scale; 25 | double y_bar = data_sum / (1.0 * card); // sample mean 26 | double ss = data_sum_squares - card * y_bar * y_bar; 27 | mean = (hypers.var_scaling * hypers.mean + data_sum) / 28 | (hypers.var_scaling + card); 29 | var_scaling = hypers.var_scaling + card; 30 | shape = hypers.shape + 0.5 * card; 31 | scale = hypers.scale + 0.5 * ss + 32 | 0.5 * hypers.var_scaling * card * (y_bar - hypers.mean) * 33 | (y_bar - hypers.mean) / (card + hypers.var_scaling); 34 | 35 | // Proto conversion 36 | ProtoHypers out; 37 | out.mutable_nnig_state()->set_mean(mean); 38 | out.mutable_nnig_state()->set_var_scaling(var_scaling); 39 | out.mutable_nnig_state()->set_shape(shape); 40 | out.mutable_nnig_state()->set_scale(scale); 41 | return std::make_shared(out); 42 | } 43 | -------------------------------------------------------------------------------- /src/hierarchies/updaters/nnig_updater.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_UPDATERS_NNIG_UPDATER_H_ 2 | #define BAYESMIX_HIERARCHIES_UPDATERS_NNIG_UPDATER_H_ 3 | 4 | #include "semi_conjugate_updater.h" 5 | #include "src/hierarchies/likelihoods/uni_norm_likelihood.h" 6 | #include "src/hierarchies/priors/nig_prior_model.h" 7 | 8 | /** 9 | * Updater specific for the `UniNormLikelihood` used in combination 10 | * with `NIGPriorModel`, that is the model 11 | * 12 | * \f[ 13 | * y_i \mid \mu, \sigma^2 &\stackrel{\small\mathrm{iid}}{\sim} N(\mu, 14 | * \sigma^2) \\ 15 | * \mu \mid \sigma^2 &\sim N(\mu_0, \sigma^2 / \lambda) \\ 16 | * \sigma^2 &\sim InvGamma(a, b) 17 | * \f] 18 | * 19 | * It exploits the conjugacy of the model to sample the full conditional of 20 | * \f$ (\mu, \sigma^2) \f$ by calling `NIGPriorModel::sample` with updated 21 | * parameters 22 | */ 23 | 24 | class NNIGUpdater 25 | : public SemiConjugateUpdater { 26 | public: 27 | NNIGUpdater() = default; 28 | ~NNIGUpdater() = default; 29 | 30 | bool is_conjugate() const override { return true; }; 31 | 32 | ProtoHypersPtr compute_posterior_hypers(AbstractLikelihood &like, 33 | AbstractPriorModel &prior) override; 34 | 35 | std::shared_ptr clone() const override { 36 | auto out = 37 | std::make_shared(static_cast(*this)); 38 | out->clear_hypers(); 39 | return out; 40 | } 41 | }; 42 | 43 | #endif // BAYESMIX_HIERARCHIES_UPDATERS_NNIG_UPDATER_H_ 44 | -------------------------------------------------------------------------------- /src/hierarchies/updaters/nnw_updater.cc: -------------------------------------------------------------------------------- 1 | #include "nnw_updater.h" 2 | 3 | #include "algorithm_state.pb.h" 4 | #include "src/hierarchies/likelihoods/states/includes.h" 5 | #include "src/hierarchies/priors/hyperparams.h" 6 | #include "src/utils/proto_utils.h" 7 | 8 | AbstractUpdater::ProtoHypersPtr NNWUpdater::compute_posterior_hypers( 9 | AbstractLikelihood& like, AbstractPriorModel& prior) { 10 | // Likelihood and Prior downcast 11 | auto& likecast = downcast_likelihood(like); 12 | auto& priorcast = downcast_prior(prior); 13 | 14 | // Getting required quantities from likelihood and prior 15 | int card = likecast.get_card(); 16 | Eigen::VectorXd data_sum = likecast.get_data_sum(); 17 | Eigen::MatrixXd data_sum_squares = likecast.get_data_sum_squares(); 18 | auto hypers = priorcast.get_hypers(); 19 | 20 | // No update possible 21 | if (card == 0) { 22 | return prior.get_hypers_proto(); 23 | } 24 | 25 | // Compute posterior hyperparameters 26 | Eigen::VectorXd mean; 27 | double var_scaling, deg_free; 28 | Eigen::MatrixXd scale, scale_inv, scale_chol; 29 | var_scaling = hypers.var_scaling + card; 30 | deg_free = hypers.deg_free + card; 31 | Eigen::VectorXd mubar = data_sum.array() / card; // sample mean 32 | mean = (hypers.var_scaling * hypers.mean + card * mubar) / 33 | (hypers.var_scaling + card); 34 | 35 | // Compute tau_n 36 | Eigen::MatrixXd tau_temp = 37 | data_sum_squares - card * mubar * mubar.transpose(); 38 | tau_temp += (card * hypers.var_scaling / (card + hypers.var_scaling)) * 39 | (mubar - hypers.mean) * (mubar - hypers.mean).transpose(); 40 | scale_inv = tau_temp + hypers.scale_inv; 41 | scale = stan::math::inverse_spd(scale_inv); 42 | scale_chol = Eigen::LLT(scale).matrixU(); 43 | 44 | // Proto conversion 45 | ProtoHypers out; 46 | bayesmix::to_proto(mean, out.mutable_nnw_state()->mutable_mean()); 47 | out.mutable_nnw_state()->set_var_scaling(var_scaling); 48 | out.mutable_nnw_state()->set_deg_free(deg_free); 49 | bayesmix::to_proto(scale, out.mutable_nnw_state()->mutable_scale()); 50 | bayesmix::to_proto(scale_chol, 51 | out.mutable_nnw_state()->mutable_scale_chol()); 52 | return std::make_shared(out); 53 | } 54 | -------------------------------------------------------------------------------- /src/hierarchies/updaters/nnw_updater.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_UPDATERS_NNW_UPDATER_H_ 2 | #define BAYESMIX_HIERARCHIES_UPDATERS_NNW_UPDATER_H_ 3 | 4 | #include "semi_conjugate_updater.h" 5 | #include "src/hierarchies/likelihoods/multi_norm_likelihood.h" 6 | #include "src/hierarchies/priors/nw_prior_model.h" 7 | 8 | /** 9 | * Updater specific for the `MultiNormLikelihood` used in combination 10 | * with `NWPriorModel`, that is the model 11 | * 12 | * \f[ 13 | * y_i \mid \bm{\mu}, \Sigma &\stackrel{\small\mathrm{iid}}{\sim} 14 | * N_d(\bm{mu}, \Sigma) \\ 15 | * \bm{\mu} \mid \Sigma &\sim N_d(\bm{\mu}_0, \Sigma / \lambda) \\ 16 | * \Sigma^{-1} &\sim Wishart(\nu, \Psi) 17 | * \f] 18 | * 19 | * It exploits the conjugacy of the model to sample the full conditional of 20 | * \f$ (\bm{\mu}, \Sigma) \f$ by calling `NWPriorModel::sample` with updated 21 | * parameters. 22 | */ 23 | 24 | class NNWUpdater 25 | : public SemiConjugateUpdater { 26 | public: 27 | NNWUpdater() = default; 28 | ~NNWUpdater() = default; 29 | 30 | bool is_conjugate() const override { return true; }; 31 | 32 | ProtoHypersPtr compute_posterior_hypers(AbstractLikelihood &like, 33 | AbstractPriorModel &prior) override; 34 | 35 | std::shared_ptr clone() const override { 36 | auto out = 37 | std::make_shared(static_cast(*this)); 38 | out->clear_hypers(); 39 | return out; 40 | } 41 | }; 42 | 43 | #endif // BAYESMIX_HIERARCHIES_UPDATERS_NNW_UPDATER_H_ 44 | -------------------------------------------------------------------------------- /src/hierarchies/updaters/nnxig_updater.cc: -------------------------------------------------------------------------------- 1 | #include "nnxig_updater.h" 2 | 3 | #include "src/hierarchies/likelihoods/states/includes.h" 4 | #include "src/hierarchies/priors/hyperparams.h" 5 | 6 | AbstractUpdater::ProtoHypersPtr NNxIGUpdater::compute_posterior_hypers( 7 | AbstractLikelihood& like, AbstractPriorModel& prior) { 8 | // Likelihood and Prior downcast 9 | auto& likecast = downcast_likelihood(like); 10 | auto& priorcast = downcast_prior(prior); 11 | 12 | // Getting required quantities from likelihood and prior 13 | auto state = likecast.get_state(); 14 | int card = likecast.get_card(); 15 | double data_sum = likecast.get_data_sum(); 16 | double data_sum_squares = likecast.get_data_sum_squares(); 17 | auto hypers = priorcast.get_hypers(); 18 | 19 | // No update possible 20 | if (card == 0) { 21 | return priorcast.get_hypers_proto(); 22 | } 23 | 24 | // Compute posterior hyperparameters 25 | double mean, var, shape, scale; 26 | double var_y = data_sum_squares - 2 * state.mean * data_sum + 27 | card * state.mean * state.mean; 28 | mean = (hypers.var * data_sum + state.var * hypers.mean) / 29 | (card * hypers.var + state.var); 30 | var = (state.var * hypers.var) / (card * hypers.var + state.var); 31 | shape = hypers.shape + 0.5 * card; 32 | scale = hypers.scale + 0.5 * var_y; 33 | 34 | // Proto conversion 35 | ProtoHypers out; 36 | out.mutable_nnxig_state()->set_mean(mean); 37 | out.mutable_nnxig_state()->set_var(var); 38 | out.mutable_nnxig_state()->set_shape(shape); 39 | out.mutable_nnxig_state()->set_scale(scale); 40 | return std::make_shared(out); 41 | } 42 | -------------------------------------------------------------------------------- /src/hierarchies/updaters/nnxig_updater.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_HIERARCHIES_UPDATERS_NNXIG_UPDATER_H_ 2 | #define BAYESMIX_HIERARCHIES_UPDATERS_NNXIG_UPDATER_H_ 3 | 4 | #include "semi_conjugate_updater.h" 5 | #include "src/hierarchies/likelihoods/uni_norm_likelihood.h" 6 | #include "src/hierarchies/priors/nxig_prior_model.h" 7 | 8 | /** 9 | * Updater specific for the `UniNormLikelihood` used in combination 10 | * with `NxIGPriorModel`, that is the model 11 | * 12 | * \f[ 13 | * y_i \mid \mu, \sigma^2 &\stackrel{\small\mathrm{iid}}{\sim} N(\mu, 14 | * \sigma^2) \\ 15 | * \mu &\sim N(\mu_0, \eta^2) \\ 16 | * \sigma^2 & \sim InvGamma(a,b) 17 | * \f] 18 | * 19 | * It exploits the semi-conjugacy of the model to sample the full conditional 20 | * of \f$ (\mu, \sigma^2) \f$ by calling `NxIGPriorModel::sample` with updated 21 | * parameters 22 | */ 23 | 24 | class NNxIGUpdater 25 | : public SemiConjugateUpdater { 26 | public: 27 | NNxIGUpdater() = default; 28 | ~NNxIGUpdater() = default; 29 | 30 | ProtoHypersPtr compute_posterior_hypers(AbstractLikelihood &like, 31 | AbstractPriorModel &prior) override; 32 | 33 | std::shared_ptr clone() const override { 34 | auto out = std::make_shared( 35 | static_cast(*this)); 36 | out->clear_hypers(); 37 | return out; 38 | } 39 | }; 40 | 41 | #endif // BAYESMIX_HIERARCHIES_UPDATERS_NNXIG_UPDATER_H_ 42 | -------------------------------------------------------------------------------- /src/hierarchies/updaters/target_lpdf_unconstrained.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_SRC_HIERARCHIES_UPDATERS_TARGET_LPDF_UNCONSTRAINED_H_ 2 | #define BAYESMIX_SRC_HIERARCHIES_UPDATERS_TARGET_LPDF_UNCONSTRAINED_H_ 3 | 4 | #include "src/hierarchies/likelihoods/abstract_likelihood.h" 5 | #include "src/hierarchies/priors/abstract_prior_model.h" 6 | 7 | //! Functor that computes the log-full conditional distribution 8 | //! of a specific hierarchy. 9 | //! Used by metropolis-like updaters especially when the gradient 10 | //! of the target_lpdf if required 11 | class target_lpdf_unconstrained { 12 | protected: 13 | AbstractLikelihood* like; 14 | AbstractPriorModel* prior; 15 | 16 | public: 17 | target_lpdf_unconstrained(AbstractLikelihood* like, 18 | AbstractPriorModel* prior) 19 | : like(like), prior(prior) {} 20 | 21 | //! Computes the log-full conditional that is simply the 22 | //! sum of `cluster_lpdf_from_unconstrained` in `AbstractLikelihood` 23 | //! and `lpdf_from_unconstrained` in `AbstractPriorModel` 24 | template 25 | T operator()(const Eigen::Matrix& x) const { 26 | return like->cluster_lpdf_from_unconstrained(x) + 27 | prior->lpdf_from_unconstrained(x); 28 | } 29 | }; 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /src/includes.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_INCLUDES_H_ 2 | #define BAYESMIX_INCLUDES_H_ 3 | 4 | #include "algorithm_params.pb.h" 5 | #include "algorithms/blocked_gibbs_algorithm.h" 6 | #include "algorithms/load_algorithms.h" 7 | #include "algorithms/neal2_algorithm.h" 8 | #include "algorithms/neal3_algorithm.h" 9 | #include "algorithms/neal8_algorithm.h" 10 | #include "collectors/file_collector.h" 11 | #include "collectors/memory_collector.h" 12 | #include "hierarchies/fa_hierarchy.h" 13 | #include "hierarchies/lapnig_hierarchy.h" 14 | #include "hierarchies/lin_reg_uni_hierarchy.h" 15 | #include "hierarchies/load_hierarchies.h" 16 | #include "hierarchies/nnig_hierarchy.h" 17 | #include "hierarchies/nnw_hierarchy.h" 18 | #include "hierarchies/nnxig_hierarchy.h" 19 | #include "mixings/dirichlet_mixing.h" 20 | #include "mixings/load_mixings.h" 21 | #include "mixings/logit_sb_mixing.h" 22 | #include "mixings/mixture_finite_mixing.h" 23 | #include "mixings/pityor_mixing.h" 24 | #include "mixings/truncated_sb_mixing.h" 25 | #include "runtime/factory.h" 26 | #include "utils/cluster_utils.h" 27 | #include "utils/eval_like.h" 28 | #include "utils/io_utils.h" 29 | #include "utils/proto_utils.h" 30 | 31 | #endif // BAYESMIX_INCLUDES_H_ 32 | -------------------------------------------------------------------------------- /src/mixings/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | target_sources(bayesmix 2 | PUBLIC 3 | base_mixing.h 4 | dirichlet_mixing.h 5 | dirichlet_mixing.cc 6 | logit_sb_mixing.cc 7 | logit_sb_mixing.h 8 | mixture_finite_mixing.cc 9 | mixture_finite_mixing.h 10 | pityor_mixing.h 11 | pityor_mixing.cc 12 | truncated_sb_mixing.h 13 | truncated_sb_mixing.cc 14 | ) 15 | -------------------------------------------------------------------------------- /src/mixings/load_mixings.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_MIXINGS_LOAD_MIXINGS_H_ 2 | #define BAYESMIX_MIXINGS_LOAD_MIXINGS_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "abstract_mixing.h" 8 | #include "dirichlet_mixing.h" 9 | #include "logit_sb_mixing.h" 10 | #include "mixture_finite_mixing.h" 11 | #include "pityor_mixing.h" 12 | #include "src/runtime/factory.h" 13 | #include "truncated_sb_mixing.h" 14 | 15 | //! Loads all available `Mixing` objects into the appropriate factory, so that 16 | //! they are ready to be chosen and used at runtime. 17 | 18 | template 19 | using Builder = std::function()>; 20 | 21 | using MixingFactory = Factory; 22 | 23 | __attribute__((constructor)) static void load_mixings() { 24 | MixingFactory &factory = MixingFactory::Instance(); 25 | // Initialize factory builders 26 | Builder DPbuilder = []() { 27 | return std::make_shared(); 28 | }; 29 | Builder LogSBbuilder = []() { 30 | return std::make_shared(); 31 | }; 32 | Builder MFMbuilder = []() { 33 | return std::make_shared(); 34 | }; 35 | Builder PYbuilder = []() { 36 | return std::make_shared(); 37 | }; 38 | Builder TruncSBbuilder = []() { 39 | return std::make_shared(); 40 | }; 41 | 42 | factory.add_builder(DirichletMixing().get_id(), DPbuilder); 43 | factory.add_builder(LogitSBMixing().get_id(), LogSBbuilder); 44 | factory.add_builder(MixtureFiniteMixing().get_id(), MFMbuilder); 45 | factory.add_builder(PitYorMixing().get_id(), PYbuilder); 46 | factory.add_builder(TruncatedSBMixing().get_id(), TruncSBbuilder); 47 | } 48 | 49 | #endif // BAYESMIX_MIXINGS_LOAD_MIXINGS_H_ 50 | -------------------------------------------------------------------------------- /src/plots/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14.0) 2 | -------------------------------------------------------------------------------- /src/plots/plot_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_PLOTS_PLOT_UTILS_H_ 2 | #define BAYESMIX_PLOTS_PLOT_UTILS_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | /* 10 | * Converts the support points of a 2d function and associated values 11 | * from the format {(x_i, y_i), z_i} stored in grid and vals respectively, 12 | * to grids over the 2d domain. Used in density_plot_2d. 13 | */ 14 | std::tuple>, std::vector>, 15 | std::vector>> 16 | to_mesh(const Eigen::MatrixXd &grid, const Eigen::VectorXd &vals); 17 | 18 | void density_plot_1d(const Eigen::MatrixXd &grid, const Eigen::VectorXd &dens, 19 | const std::string &outfile); 20 | 21 | void density_plot_2d(const Eigen::MatrixXd &grid, const Eigen::VectorXd &dens_, 22 | const std::string &outfile, const bool log_scale = true); 23 | 24 | void num_clus_trace(const Eigen::MatrixXd &num_clus_chain, 25 | const std::string &outfile); 26 | 27 | void num_clus_bar(const Eigen::MatrixXd &num_clus_chain_, 28 | const std::string &outfile); 29 | 30 | #endif // BAYESMIX_PLOTS_PLOT_UTILS_H_ 31 | -------------------------------------------------------------------------------- /src/proto/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | -------------------------------------------------------------------------------- /src/proto/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/src/proto/CMakeLists.txt -------------------------------------------------------------------------------- /src/proto/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/src/proto/__init__.py -------------------------------------------------------------------------------- /src/proto/algorithm_id.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package bayesmix; 4 | 5 | /* 6 | * Enum for the different types of algorithms. 7 | * References 8 | * [1] R. M. Neal, Markov Chain Sampling Methods for Dirichlet Process Mixture Models. JCGS(2000) 9 | * [2] H. Ishwaran and L. F. James, Gibbs Sampling Methods for Stick-Breaking Priors. JASA(2001) 10 | * [3] S. Jain and R. M. Neal, A Split-Merge Markov Chain Monte Carlo Procedure for the Dirichlet Process Mixture Model. JCGS (2004) 11 | * [4] M. Kalli, J. Griffin and S. G. Walker, Slice sampling mixture models. Stat and Comp. (2011) 12 | */ 13 | enum AlgorithmId { 14 | UNKNOWN_ALGORITHM = 0; 15 | Neal2 = 1; // Neal's Algorithm 2, see [1] 16 | Neal3 = 2; // Neal's Algorithm 3, see [1] 17 | Neal8 = 3; // Neal's Algorithm 8, see [1] 18 | BlockedGibbs = 4; // Ishwaran and James Blocked Gibbs, see [2] 19 | SplitMerge = 5; // Jain and Neal's Split&Merge, see [3]. NOT IMPLEMENTED YET! 20 | Slice = 6; // Slice sampling, see [4]. NOT IMPLEMENTED YET! 21 | } 22 | -------------------------------------------------------------------------------- /src/proto/algorithm_params.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package bayesmix; 4 | 5 | /* 6 | * Parameters used in the BaseAlgorithm class and childs. 7 | */ 8 | message AlgorithmParams { 9 | string algo_id = 1; // Id of the Algorithm. Must match the ones in the AlgorithmId enum 10 | uint32 rng_seed = 2; // Seed for the random number generator 11 | uint32 iterations = 3; // Total number of iterations of the MCMC chain 12 | uint32 burnin = 4; // Number of iterations to discard as burn-in 13 | uint32 init_num_clusters = 5; // Number of clusters to initialize the algorithm. It may be overridden by conditional mixings for which the number of components is fixed (e.g. TruncatedSBMixing). In this case, this value is ignored. 14 | uint32 neal8_n_aux = 6; // Number of auxiliary unique values for the Neal8 algorithm 15 | uint32 splitmerge_n_restr_gs_updates = 7; // Number of restricted GS scans for each MH step. 16 | uint32 splitmerge_n_mh_updates = 8; // Number of MH updates for each iteration of Split and Merge algorithm. 17 | uint32 splitmerge_n_full_gs_updates = 9; // Number of full GS scans for each iteration of Split and Merge algorithm. 18 | } 19 | -------------------------------------------------------------------------------- /src/proto/algorithm_state.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | import "mixing_state.proto"; 4 | import "ls_state.proto"; 5 | import "matrix.proto"; 6 | import "hierarchy_prior.proto"; 7 | import "distribution.proto"; 8 | 9 | package bayesmix; 10 | 11 | 12 | /* 13 | * This message represents the state of a Gibbs sampler for 14 | * a mixture model. All algorithms must be able to handle this 15 | * message, by filling it with the current state of the sampler 16 | * in the `get_state_as_proto` method. 17 | */ 18 | message AlgorithmState { 19 | message ClusterState { 20 | // Represents the state of a single cluster of component of 21 | // the mixture model. The first field is just a `oneof` wrapper 22 | // around the different possible states. 23 | oneof val { 24 | UniLSState uni_ls_state = 1; // State of a univariate location-scale family 25 | MultiLSState multi_ls_state = 2; // State of a multivariate location-scale family 26 | LinRegUniLSState lin_reg_uni_ls_state = 4; // State of a linear regression univariate location-scale family 27 | Vector general_state = 5; // Just a vector of doubles 28 | FAState fa_state = 6; // State of a Mixture of Factor Analysers 29 | 30 | } 31 | int32 cardinality = 3; // How many observations are in this cluster 32 | } 33 | 34 | repeated ClusterState cluster_states = 1; // The state of each cluster 35 | repeated int32 cluster_allocs = 2 [packed = true]; // Vector of allocations into clusters, one for each observation 36 | MixingState mixing_state = 3; // The state of the `Mixing` 37 | int32 iteration_num = 4; // The iteration number 38 | 39 | message HierarchyHypers { 40 | // Current values of the Hyperparameters of the Hierarchy 41 | oneof val { 42 | Vector general_state = 1; 43 | NIGDistribution nnig_state = 2; 44 | NWDistribution nnw_state = 3; 45 | MultiNormalIGDistribution lin_reg_uni_state = 4; 46 | NxIGDistribution nnxig_state = 5; 47 | FAPriorDistribution fa_state = 7; 48 | } 49 | } 50 | HierarchyHypers hierarchy_hypers = 5; // The current values of the hyperparameters of the hierarchy 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/proto/cpp/.gitignore: -------------------------------------------------------------------------------- 1 | # Protocol Buffers implementation files (version-dependent) 2 | *.pb.cc 3 | *.pb.h 4 | -------------------------------------------------------------------------------- /src/proto/distribution.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | import "matrix.proto"; 4 | 5 | package bayesmix; 6 | 7 | /* 8 | * Parameters defining a univariate normal distribution 9 | */ 10 | message UniNormalDistribution { 11 | double mean = 1; 12 | double var = 2; 13 | } 14 | 15 | /* 16 | * Parameters defining a multivariate normal distribution 17 | */ 18 | message MultiNormalDistribution { 19 | Vector mean = 1; 20 | Matrix var = 2; 21 | } 22 | 23 | /* 24 | * Parameters defining a gamma distribution with density 25 | * f(x) = x^(shape-1) * exp(-rate * x) / Gamma(shape) 26 | */ 27 | message GammaDistribution { 28 | double shape = 1; 29 | double rate = 2; 30 | } 31 | 32 | /* 33 | * Parameters defining an Inverse Wishart distribution 34 | */ 35 | message InvWishartDistribution { 36 | double deg_free = 1; 37 | Matrix scale = 2; 38 | } 39 | 40 | /* 41 | * Parameters defining a beta distribution 42 | */ 43 | message BetaDistribution { 44 | double shape_a = 1; 45 | double shape_b = 2; 46 | } 47 | 48 | /* 49 | * Parameters of a Normal Inverse-Gamma distribution 50 | * with density 51 | * f(x, y) = N(x | mu, y/var_scaling) * IG(y | shape, scale) 52 | */ 53 | message NIGDistribution { 54 | double mean = 1; 55 | double var_scaling = 2; 56 | double shape = 3; 57 | double scale = 4; 58 | } 59 | 60 | /* 61 | * Parameters of a Normal x Inverse-Gamma distribution 62 | * with density 63 | * f(x, y) = N(x | mu, var) * IG(y | shape, scale) 64 | */ 65 | message NxIGDistribution { 66 | double mean = 1; 67 | double var = 2; 68 | double shape = 3; 69 | double scale = 4; 70 | } 71 | 72 | /* 73 | * Parameters of a Normal Wishart distribution 74 | * with density 75 | * f(x, y) = N(x | mu, (y * var_scaling)^{-1}) * IW(y | deg_free, scale) 76 | * where x is a vector and y is a matrix (spd) 77 | */ 78 | message NWDistribution { 79 | Vector mean = 1; 80 | double var_scaling = 2; 81 | double deg_free = 3; 82 | Matrix scale = 4; 83 | Matrix scale_chol = 5; 84 | } 85 | 86 | 87 | /* 88 | * Parameters for the Normal Inverse Gamma distribution commonly employed in 89 | * linear regression models, with density 90 | * f(beta, var) = N(beta | mean, var * var_scaling^{-1}) * IG(var | shape, scale) 91 | */ 92 | message MultiNormalIGDistribution { 93 | Vector mean = 1; 94 | Matrix var_scaling = 2; 95 | double shape = 3; 96 | double scale = 4; 97 | } 98 | -------------------------------------------------------------------------------- /src/proto/hierarchy_id.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package bayesmix; 4 | 5 | /* 6 | * Enum for the different types of Hierarchy. 7 | */ 8 | enum HierarchyId { 9 | UNKNOWN_HIERARCHY = 0; 10 | NNIG = 1; // Normal - Normal Inverse Gamma 11 | NNW = 2; // Normal - Normal Wishart 12 | LinRegUni = 3; // Linear Regression (univariate response) 13 | LapNIG = 4; // Laplace - Normal Inverse Gamma 14 | FA = 5; // Factor Analysers 15 | NNxIG = 6; // Normal - Normal x Inverse Gamma 16 | PythonHier = 7; // Generic python hierarchy 17 | } 18 | -------------------------------------------------------------------------------- /src/proto/ls_state.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | import "matrix.proto"; 4 | 5 | package bayesmix; 6 | 7 | /* 8 | * Parameters of a univariate location-scale family of distributions. 9 | */ 10 | message UniLSState { 11 | double mean = 1; 12 | double var = 2; 13 | } 14 | 15 | /* 16 | * Parameters of a multivariate location-scale family of distributions, 17 | * parameterized by mean and precision (inverse of variance). For 18 | * convenience, we also store the Cholesky factor of the precision matrix. 19 | */ 20 | message MultiLSState { 21 | Vector mean = 1; 22 | Matrix prec = 2; 23 | Matrix prec_chol = 3; 24 | } 25 | 26 | /* 27 | * Parameters of a univariate linear regression 28 | */ 29 | message LinRegUniLSState { 30 | Vector regression_coeffs = 1; // regression coefficients 31 | double var = 2; // variance of the noise 32 | } 33 | 34 | message FAState { 35 | Vector mu = 1; 36 | Vector psi = 2; 37 | Matrix eta = 3; 38 | Matrix lambda = 4; 39 | } 40 | -------------------------------------------------------------------------------- /src/proto/matrix.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package bayesmix; 4 | 5 | /* 6 | * Message representing a vector of doubles. 7 | */ 8 | message Vector { 9 | int32 size = 1; // number of elements in the vector 10 | repeated double data = 2 [packed = true]; // vector elements 11 | } 12 | 13 | /* 14 | * Message representing a matrix of doubles. 15 | */ 16 | message Matrix { 17 | int32 rows = 1; // number of rows 18 | int32 cols = 2; // number of columns 19 | repeated double data = 3 [packed = true]; // matrix elements 20 | bool rowmajor = 4; // if true, the data is read in row-major order 21 | } 22 | -------------------------------------------------------------------------------- /src/proto/mixing_id.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package bayesmix; 4 | 5 | /* 6 | * Enum for the different types of Mixing. 7 | */ 8 | enum MixingId { 9 | UNKNOWN_MIXING = 0; 10 | DP = 1; // Dirichlet Process 11 | PY = 2; // Pitman-Yor Process 12 | LogSB = 3; // Logit Stick-Breaking Process 13 | TruncSB = 4; // Truncated Stick-Breaking Process 14 | MFM = 5; // Mixture of finite mixtures 15 | PythonMix = 6; // Generic python mixing 16 | } 17 | -------------------------------------------------------------------------------- /src/proto/mixing_prior.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | import "distribution.proto"; 4 | import "mixing_state.proto"; 5 | import "matrix.proto"; 6 | 7 | package bayesmix; 8 | 9 | 10 | /* 11 | * Prior for the concentration parameter of a Dirichlet process 12 | */ 13 | message DPPrior { 14 | message GammaPrior { 15 | GammaDistribution totalmass_prior = 1; 16 | } 17 | 18 | oneof totalmass { 19 | DPState fixed_value = 1; // No prior, just a fixed value 20 | GammaPrior gamma_prior = 2; // Gamma prior on the total mass 21 | } 22 | } 23 | /* 24 | * Prior for the Poisson rate and Dirichlet parameters of a MFM (Finite Dirichlet) process. 25 | * For the moment, we only support fixed values 26 | */ 27 | message MFMPrior { 28 | oneof totalmass { 29 | MFMState fixed_value = 1; // No prior, just a fixed value 30 | } 31 | } 32 | 33 | /* 34 | * Prior for the strength and discount parameters of a Pitman-Yor process. 35 | * For the moment, we only support fixed values 36 | */ 37 | message PYPrior { 38 | oneof totalmass { 39 | PYState fixed_values = 1; 40 | } 41 | } 42 | 43 | /* 44 | * Definition of the parameters of a Logit-Stick Breaking process. 45 | */ 46 | message LogSBPrior { 47 | oneof coeff { 48 | MultiNormalDistribution normal_prior = 1; // Normal prior on the regression coefficients 49 | } 50 | double step_size = 2; // Steps size for the MALA algorithm used for posterior inference (TODO: move?) 51 | uint32 num_components = 3; // Number of components in the process 52 | } 53 | 54 | /* 55 | * Definition of a generic container for the prior parameters to be used in Python 56 | */ 57 | message PythonMixPrior { 58 | oneof prior { 59 | Vector values = 1; 60 | } 61 | } 62 | 63 | /* 64 | * Definition of the parameters of a truncated Stick-Breaking process 65 | */ 66 | message TruncSBPrior { 67 | message BetaPriors { 68 | // General stick-breaking distributions 69 | repeated BetaDistribution beta_distributions = 1; 70 | } 71 | message DPPrior { 72 | // Truncated Dirichlet process 73 | double totalmass = 1; 74 | } 75 | 76 | message PYPrior { 77 | // Truncated Pitman-Yor process 78 | double strength = 1; 79 | double discount = 2; 80 | } 81 | 82 | oneof prior { 83 | BetaPriors beta_priors = 1; // General stick-breaking distributions 84 | DPPrior dp_prior = 2; // Truncated Dirichlet process 85 | PYPrior py_prior = 3; // Truncated Pitman-Yor process 86 | } 87 | uint32 num_components = 4; // Number of components in the process 88 | 89 | // If true we must use the Slice Sampler, and num_components is used only for 90 | // the initialization 91 | bool infinite_mixture = 5; 92 | } 93 | -------------------------------------------------------------------------------- /src/proto/mixing_state.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package bayesmix; 4 | 5 | import "matrix.proto"; 6 | 7 | /* 8 | * State of a Dirichlet process 9 | */ 10 | message DPState { 11 | double totalmass = 1; // the total mass of the DP 12 | } 13 | 14 | /* 15 | * State of a Pitman-Yor process 16 | */ 17 | message PYState { 18 | double strength = 1; 19 | double discount = 2; 20 | } 21 | 22 | /* 23 | * State of a Logit-Stick Breaking process 24 | */ 25 | message LogSBState { 26 | Matrix regression_coeffs = 1; // Num_Components x Num_Features matrix. Each row is the regression coefficients for a component. 27 | } 28 | 29 | /* 30 | * State of a truncated sitck breaking process. For convenice we store also the logarithm of the weights 31 | */ 32 | message TruncSBState { 33 | Vector sticks = 1; 34 | Vector logweights = 2; 35 | } 36 | 37 | /* 38 | * State of a MFM (Finite Dirichlet) process 39 | */ 40 | message MFMState { 41 | double lambda = 1; // rate parameter of Poisson prior on number of compunents of the MFM 42 | double gamma = 2; // parameter of the dirichlet distribution for the mixing weights 43 | } 44 | 45 | /* 46 | * Wrapper of all possible mixing states into a single oneof 47 | */ 48 | message MixingState { 49 | oneof state { 50 | DPState dp_state = 1; 51 | PYState py_state = 2; 52 | LogSBState log_sb_state = 3; 53 | TruncSBState trunc_sb_state = 4; 54 | MFMState mfm_state = 5; 55 | Vector general_state = 6; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/proto/py/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *pb2.py 3 | -------------------------------------------------------------------------------- /src/proto/py/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/src/proto/py/__init__.py -------------------------------------------------------------------------------- /src/proto/semihdp.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | import "ls_state.proto"; 4 | import "matrix.proto"; 5 | 6 | package bayesmix; 7 | 8 | message SemiHdpState { 9 | message ClusterState { 10 | oneof val { 11 | UniLSState uni_ls_state = 1; 12 | MultiLSState multi_ls_state = 2; 13 | LinRegUniLSState lin_reg_uni_ls_state = 4; 14 | Vector general_state = 5; 15 | } 16 | int32 cardinality = 3; 17 | } 18 | 19 | message RestaurantState { 20 | repeated ClusterState theta_stars = 1; 21 | repeated int32 n_by_clus = 2; 22 | repeated int32 table_to_shared = 3; 23 | repeated int32 table_to_idio = 4; 24 | } 25 | 26 | message GroupState { 27 | repeated int32 cluster_allocs = 1; 28 | } 29 | 30 | repeated RestaurantState restaurants = 1; 31 | repeated GroupState groups = 2; 32 | repeated ClusterState taus = 3; 33 | repeated int32 c = 4; 34 | double w = 5; 35 | } 36 | 37 | message SemiHdpParams { 38 | message PseudoPriorParams { 39 | double card_weight = 1; 40 | double mean_perturb_sd = 2; 41 | double var_perturb_frac = 3; 42 | } 43 | 44 | message WPriorParams { 45 | double shape1 = 1; 46 | double shape2 = 2; 47 | } 48 | 49 | PseudoPriorParams pseudo_prior = 1; 50 | 51 | double dirichlet_concentration = 2; 52 | 53 | string rest_allocs_update = 3; // Either "full", "metro_base", "metro_dist" 54 | 55 | double totalmass_rest = 4; 56 | 57 | double totalmass_hdp = 5; 58 | 59 | WPriorParams w_prior = 6; 60 | 61 | } 62 | -------------------------------------------------------------------------------- /src/runtime/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | target_sources(bayesmix 2 | PUBLIC 3 | factory.h 4 | ) 5 | -------------------------------------------------------------------------------- /src/utils/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | target_sources(bayesmix 2 | PUBLIC 3 | cluster_utils.h 4 | cluster_utils.cc 5 | eigen_utils.h 6 | eigen_utils.cc 7 | eval_like.h 8 | eval_like.cc 9 | distributions.h 10 | distributions.cc 11 | io_utils.h 12 | io_utils.cc 13 | proto_utils.h 14 | proto_utils.cc 15 | rng.h 16 | testing_utils.h 17 | testing_utils.cc 18 | covariates_getter.h 19 | ) 20 | -------------------------------------------------------------------------------- /src/utils/cluster_utils.cc: -------------------------------------------------------------------------------- 1 | #include "cluster_utils.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "lib/progressbar/progressbar.h" 7 | #include "proto_utils.h" 8 | 9 | Eigen::MatrixXd bayesmix::posterior_similarity( 10 | const Eigen::MatrixXd &alloc_chain) { 11 | unsigned int n_data = alloc_chain.cols(); 12 | Eigen::MatrixXd mean_diss = Eigen::MatrixXd::Zero(n_data, n_data); 13 | // Loop over pairs (i,j) of data points 14 | for (int i = 0; i < n_data; i++) { 15 | for (int j = 0; j < i; j++) { 16 | Eigen::ArrayXd diff = alloc_chain.col(i) - alloc_chain.col(j); 17 | mean_diss(i, j) = (diff == 0).count(); 18 | } 19 | } 20 | return mean_diss / alloc_chain.rows(); 21 | } 22 | 23 | Eigen::VectorXi bayesmix::cluster_estimate( 24 | const Eigen::MatrixXi &alloc_chain) { 25 | // Initialize objects 26 | unsigned n_iter = alloc_chain.rows(); 27 | unsigned int n_data = alloc_chain.cols(); 28 | std::vector > all_diss; 29 | progresscpp::ProgressBar bar(n_iter, 60); 30 | 31 | // Compute mean 32 | std::cout << "(Computing mean dissimilarity... " << std::flush; 33 | Eigen::MatrixXd mean_diss = 34 | bayesmix::posterior_similarity(alloc_chain.cast()); 35 | std::cout << "Done)" << std::endl; 36 | 37 | // Compute Frobenius norm error of all iterations 38 | std::cout << "Computing Frobenius norm error... " << std::endl; 39 | Eigen::VectorXd errors(n_iter); 40 | for (int k = 0; k < n_iter; k++) { 41 | for (int i = 0; i < n_data; i++) { 42 | for (int j = 0; j < i; j++) { 43 | double x = (alloc_chain(k, i) == alloc_chain(k, j)); 44 | errors(k) += (x - mean_diss(i, j)) * (x - mean_diss(i, j)); 45 | } 46 | } 47 | // Progress bar 48 | ++bar; 49 | bar.display(); 50 | } 51 | bar.done(); 52 | std::cout << "Done" << std::endl; // Print Ending Message 53 | 54 | // Find iteration with the least error 55 | std::ptrdiff_t ibest; 56 | unsigned int min_err = errors.minCoeff(&ibest); 57 | return alloc_chain.row(ibest).transpose(); 58 | } 59 | -------------------------------------------------------------------------------- /src/utils/cluster_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_UTILS_CLUSTER_UTILS_H_ 2 | #define BAYESMIX_UTILS_CLUSTER_UTILS_H_ 3 | 4 | #include 5 | 6 | //! \file cluster_utils.h 7 | //! The `cluster_utils.h` file includes some utilities for cluster estimation. 8 | //! These functions only use Eigen objects. 9 | 10 | namespace bayesmix { 11 | 12 | //! Computes the posterior similarity matrix the data 13 | Eigen::MatrixXd posterior_similarity(const Eigen::MatrixXd &alloc_chain); 14 | 15 | //! Estimates the clustering structure of the data via LS minimization 16 | Eigen::VectorXi cluster_estimate(const Eigen::MatrixXi &alloc_chain); 17 | 18 | } // namespace bayesmix 19 | 20 | #endif // BAYESMIX_UTILS_CLUSTER_UTILS_H_ 21 | -------------------------------------------------------------------------------- /src/utils/covariates_getter.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_SRC_UTILS_COVARIATES_GETTER_H 2 | #define BAYESMIX_SRC_UTILS_COVARIATES_GETTER_H 3 | 4 | #include 5 | 6 | class covariates_getter { 7 | protected: 8 | const Eigen::MatrixXd* covariates; 9 | 10 | public: 11 | covariates_getter(const Eigen::MatrixXd& covariates_) 12 | : covariates(&covariates_){}; 13 | 14 | Eigen::RowVectorXd operator()(const size_t& i) const { 15 | if (covariates->cols() == 0) { 16 | return Eigen::RowVectorXd(0); 17 | } else if (covariates->rows() == 1) { 18 | return covariates->row(0); 19 | } else { 20 | return covariates->row(i); 21 | } 22 | }; 23 | }; 24 | 25 | #endif // BAYESMIX_SRC_UTILS_COVARIATES_GETTER_H 26 | -------------------------------------------------------------------------------- /src/utils/eigen_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef BAYESMIX_SRC_UTILS_EIGEN_UTILS_H_ 2 | #define BAYESMIX_SRC_UTILS_EIGEN_UTILS_H_ 3 | 4 | #include 5 | #include 6 | 7 | //! @file eigen_utils.h 8 | //! The `eigen_utils.h` file implements a few methods to manipulate groups of 9 | //! matrices, mainly by joining different objects, as well as additional 10 | //! utilities for SPD checking and grid creation. 11 | 12 | namespace bayesmix { 13 | //! Concatenates a vector of Eigen matrices along the rows 14 | //! @param mats The matrices to be concatenated 15 | //! @return The resulting matrix 16 | //! @throw std::invalid argument if sizes mismatch 17 | Eigen::MatrixXd vstack(const std::vector &mats); 18 | 19 | //! Concatenates two matrices by row, modifying the first matrix in-place 20 | //! @throw std::invalid_argument if sizes mismatch 21 | void append_by_row(Eigen::MatrixXd *const a, const Eigen::MatrixXd &b); 22 | 23 | //! Concatenates two matrices by row 24 | //! @param a,b The matrices to be concatenated 25 | //! @return The resulting matrix 26 | //! @throw std::invalid_argument if sizes mismatch 27 | Eigen::MatrixXd append_by_row(const Eigen::MatrixXd &a, 28 | const Eigen::MatrixXd &b); 29 | 30 | //! Creates an Eigen matrix from a collection of rows 31 | //! @tparam Container An std-compatible container implementing `operator[]` 32 | //! @param rows The rows of the matrix 33 | //! @return The resulting matrix 34 | template