├── .clang-format
├── .github
    └── workflows
    │   ├── document_protos.yaml
    │   ├── publish_base_image.yaml
    │   ├── publish_env_image.yaml
    │   └── test.yaml
├── .gitignore
├── .gitmodules
├── .readthedocs.yaml
├── CMakeLists.txt
├── CONTRIBUTING.md
├── INSTALL.md
├── LICENSE
├── R
    ├── .gitignore
    ├── README.md
    ├── bayesmixr
    │   ├── DESCRIPTION
    │   ├── LICENSE
    │   ├── NAMESPACE
    │   ├── R
    │   │   ├── build_bayesmix.R
    │   │   ├── decoder.R
    │   │   ├── run_mcmc.R
    │   │   ├── utils.R
    │   │   └── zzz.R
    │   ├── cleanup
    │   ├── cleanup.win
    │   ├── configure
    │   ├── configure.win
    │   ├── man
    │   │   ├── DecodeVarint32.Rd
    │   │   ├── VarintDecoder.Rd
    │   │   ├── build_bayesmix.Rd
    │   │   ├── import_protobuf_messages.Rd
    │   │   ├── maybe_print_proto_to_file.Rd
    │   │   ├── read_many_proto_from_file.Rd
    │   │   └── run_mcmc.Rd
    │   └── tests
    │   │   ├── testthat.R
    │   │   └── testthat
    │   │       ├── test_build.R
    │   │       └── test_run.R
    └── notebooks
    │   └── gaussian_mix_uni.Rmd
├── README.md
├── benchmarks
    ├── CMakeLists.txt
    ├── eval_lpdf.cc
    ├── lpd_grid.cc
    ├── main.cpp
    ├── mcmc_runs.cc
    └── nnw_marg_lpdf.cc
├── cmake
    ├── FindSphinx.cmake
    ├── ProtobufUtils.cmake
    ├── math.cmake
    ├── matplotplusplus.cmake
    └── protobuf.cmake
├── docs
    ├── .gitignore
    ├── CMakeLists.txt
    ├── Doxyfile.in
    ├── algorithms.rst
    ├── collectors.rst
    ├── conf.py
    ├── hierarchies.rst
    ├── index.rst
    ├── likelihoods.rst
    ├── mixings.rst
    ├── prior_models.rst
    ├── protos.html
    ├── protos.rst
    ├── python_interface.rst
    ├── requirements.txt
    ├── states.rst
    ├── tutorial.rst
    ├── updaters.rst
    └── utils.rst
├── examples
    ├── CMakeLists.txt
    ├── fa_hierarchy
    │   ├── in
    │   │   ├── algo.asciipb
    │   │   ├── data.csv
    │   │   ├── dp_gamma.asciipb
    │   │   └── fa.asciipb
    │   ├── out
    │   │   └── .gitignore
    │   └── run.sh
    ├── gamma_hierarchy
    │   ├── gamma_likelihood.h
    │   ├── gamma_prior_model.h
    │   ├── gammagamma_hierarchy.h
    │   ├── gammagamma_updater.h
    │   └── run_gamma_gamma.cc
    └── tutorial
    │   ├── 2dplot.sh
    │   ├── 2drun.sh
    │   ├── plot.sh
    │   └── run.sh
├── executables
    ├── plot_mcmc.cc
    └── run_mcmc.cc
├── install-tbb.bat
├── lib
    ├── argparse
    │   └── argparse.h
    └── progressbar
    │   └── progressbar.h
├── pre-commit-config.yaml
├── python
    ├── .gitignore
    ├── README.md
    ├── __init__.py
    ├── bayesmixpy
    │   ├── __init__.py
    │   ├── build_bayesmix.py
    │   ├── io_utils.py
    │   ├── proto
    │   │   └── __init__.py
    │   ├── run.py
    │   └── shell_utils.py
    ├── notebooks
    │   ├── gaussian_mix_NNxIG.ipynb
    │   ├── gaussian_mix_multi.ipynb
    │   ├── gaussian_mix_uni.ipynb
    │   └── split_merge_benchmarking.ipynb
    ├── pyproject.toml
    ├── requirements.txt
    ├── scripts
    │   ├── __init__.py
    │   ├── generate_asciipb.py
    │   └── populate_benchmark_datasets.py
    ├── setup.cfg
    ├── setup.py
    └── tests
    │   ├── __init__.py
    │   ├── test_build.py
    │   └── test_run.py
├── resources
    ├── .gitignore
    ├── 2d
    │   └── .gitignore
    ├── algo_cond_settings.asciipb
    ├── algo_marg_settings.asciipb
    ├── bash
    │   ├── cleanup_tbb.sh
    │   ├── push_containers.sh
    │   └── setup_pre_commit.sh
    ├── benchmarks
    │   ├── chains
    │   │   └── __init__.py
    │   └── default_algo_params.asciipb
    ├── datasets
    │   ├── dde.csv
    │   ├── dde_covs.csv
    │   ├── dde_covs_grid.csv
    │   ├── dde_grid.csv
    │   ├── faithful.csv
    │   ├── faithful_grid.csv
    │   ├── galaxy.csv
    │   └── galaxy_grid.csv
    ├── docker
    │   ├── base
    │   │   └── Dockerfile
    │   ├── env
    │   │   └── Dockerfile
    │   └── test
    │   │   └── Dockerfile
    ├── logo_full.svg
    ├── logo_icon.svg
    ├── patches
    │   └── matplotplusplus.patch
    └── tutorial
    │   ├── .gitignore
    │   ├── algo.asciipb
    │   ├── data.csv
    │   ├── dp_gamma.asciipb
    │   ├── grid.csv
    │   ├── lapnig_fixed.asciipb
    │   ├── mfm_fixed.asciipb
    │   ├── nnig_ngg.asciipb
    │   ├── nnw_ngiw.asciipb
    │   └── out
    │       └── .gitignore
├── src
    ├── CMakeLists.txt
    ├── algorithms
    │   ├── CMakeLists.txt
    │   ├── base_algorithm.cc
    │   ├── base_algorithm.h
    │   ├── blocked_gibbs_algorithm.cc
    │   ├── blocked_gibbs_algorithm.h
    │   ├── conditional_algorithm.cc
    │   ├── conditional_algorithm.h
    │   ├── load_algorithms.h
    │   ├── marginal_algorithm.cc
    │   ├── marginal_algorithm.h
    │   ├── neal2_algorithm.cc
    │   ├── neal2_algorithm.h
    │   ├── neal3_algorithm.cc
    │   ├── neal3_algorithm.h
    │   ├── neal8_algorithm.cc
    │   ├── neal8_algorithm.h
    │   ├── semihdp_sampler.cc
    │   ├── semihdp_sampler.h
    │   ├── slice_sampler.cc
    │   ├── slice_sampler.h
    │   ├── split_and_merge_algorithm.cc
    │   └── split_and_merge_algorithm.h
    ├── collectors
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── base_collector.h
    │   ├── file_collector.cc
    │   ├── file_collector.h
    │   ├── memory_collector.cc
    │   └── memory_collector.h
    ├── hierarchies
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── abstract_hierarchy.h
    │   ├── base_hierarchy.h
    │   ├── fa_hierarchy.h
    │   ├── lapnig_hierarchy.h
    │   ├── likelihoods
    │   │   ├── CMakeLists.txt
    │   │   ├── abstract_likelihood.h
    │   │   ├── base_likelihood.h
    │   │   ├── fa_likelihood.cc
    │   │   ├── fa_likelihood.h
    │   │   ├── laplace_likelihood.cc
    │   │   ├── laplace_likelihood.h
    │   │   ├── likelihood_internal.h
    │   │   ├── multi_norm_likelihood.cc
    │   │   ├── multi_norm_likelihood.h
    │   │   ├── states
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── base_state.h
    │   │   │   ├── fa_state.h
    │   │   │   ├── includes.h
    │   │   │   ├── multi_ls_state.h
    │   │   │   ├── uni_lin_reg_ls_state.h
    │   │   │   └── uni_ls_state.h
    │   │   ├── uni_lin_reg_likelihood.cc
    │   │   ├── uni_lin_reg_likelihood.h
    │   │   ├── uni_norm_likelihood.cc
    │   │   └── uni_norm_likelihood.h
    │   ├── lin_reg_uni_hierarchy.h
    │   ├── load_hierarchies.h
    │   ├── nnig_hierarchy.h
    │   ├── nnw_hierarchy.h
    │   ├── nnxig_hierarchy.h
    │   ├── priors
    │   │   ├── CMakeLists.txt
    │   │   ├── abstract_prior_model.h
    │   │   ├── base_prior_model.h
    │   │   ├── fa_prior_model.cc
    │   │   ├── fa_prior_model.h
    │   │   ├── hyperparams.h
    │   │   ├── mnig_prior_model.cc
    │   │   ├── mnig_prior_model.h
    │   │   ├── nig_prior_model.cc
    │   │   ├── nig_prior_model.h
    │   │   ├── nw_prior_model.cc
    │   │   ├── nw_prior_model.h
    │   │   ├── nxig_prior_model.cc
    │   │   ├── nxig_prior_model.h
    │   │   └── prior_model_internal.h
    │   └── updaters
    │   │   ├── CMakeLists.txt
    │   │   ├── abstract_updater.h
    │   │   ├── fa_updater.cc
    │   │   ├── fa_updater.h
    │   │   ├── mala_updater.h
    │   │   ├── metropolis_updater.h
    │   │   ├── mnig_updater.cc
    │   │   ├── mnig_updater.h
    │   │   ├── nnig_updater.cc
    │   │   ├── nnig_updater.h
    │   │   ├── nnw_updater.cc
    │   │   ├── nnw_updater.h
    │   │   ├── nnxig_updater.cc
    │   │   ├── nnxig_updater.h
    │   │   ├── random_walk_updater.h
    │   │   ├── semi_conjugate_updater.h
    │   │   └── target_lpdf_unconstrained.h
    ├── includes.h
    ├── mixings
    │   ├── CMakeLists.txt
    │   ├── abstract_mixing.h
    │   ├── base_mixing.h
    │   ├── dirichlet_mixing.cc
    │   ├── dirichlet_mixing.h
    │   ├── load_mixings.h
    │   ├── logit_sb_mixing.cc
    │   ├── logit_sb_mixing.h
    │   ├── mixture_finite_mixing.cc
    │   ├── mixture_finite_mixing.h
    │   ├── pityor_mixing.cc
    │   ├── pityor_mixing.h
    │   ├── truncated_sb_mixing.cc
    │   └── truncated_sb_mixing.h
    ├── plots
    │   ├── CMakeLists.txt
    │   ├── plot_utils.cc
    │   └── plot_utils.h
    ├── proto
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── __init__.py
    │   ├── algorithm_id.proto
    │   ├── algorithm_params.proto
    │   ├── algorithm_state.proto
    │   ├── cpp
    │   │   └── .gitignore
    │   ├── distribution.proto
    │   ├── hierarchy_id.proto
    │   ├── hierarchy_prior.proto
    │   ├── ls_state.proto
    │   ├── matrix.proto
    │   ├── mixing_id.proto
    │   ├── mixing_prior.proto
    │   ├── mixing_state.proto
    │   ├── py
    │   │   ├── .gitignore
    │   │   └── __init__.py
    │   └── semihdp.proto
    ├── runtime
    │   ├── CMakeLists.txt
    │   └── factory.h
    └── utils
    │   ├── CMakeLists.txt
    │   ├── cluster_utils.cc
    │   ├── cluster_utils.h
    │   ├── covariates_getter.h
    │   ├── distributions.cc
    │   ├── distributions.h
    │   ├── eigen_utils.cc
    │   ├── eigen_utils.h
    │   ├── eval_like.cc
    │   ├── eval_like.h
    │   ├── io_utils.cc
    │   ├── io_utils.h
    │   ├── proto_utils.cc
    │   ├── proto_utils.h
    │   ├── rng.h
    │   ├── testing_utils.cc
    │   └── testing_utils.h
└── test
    ├── CMakeLists.txt
    ├── collectors.cc
    ├── distributions.cc
    ├── eigen_utils.cc
    ├── gradient.cc
    ├── hierarchies.cc
    ├── likelihoods.cc
    ├── logit_sb.cc
    ├── lpdf.cc
    ├── prior_models.cc
    ├── proto_utils.cc
    ├── rng.cc
    ├── runtime.cc
    ├── semi_hdp.cc
    ├── slice_sampler.cc
    └── write_proto.cc


/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: google
2 | ColumnLimit: 79
3 | 


--------------------------------------------------------------------------------
/.github/workflows/document_protos.yaml:
--------------------------------------------------------------------------------
 1 | name: document_protos
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     branches: [master]
 6 |     paths:
 7 |       - "**.proto"
 8 | 
 9 |   workflow_dispatch:
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |       - uses: actions/checkout@v2
17 | 
18 |       - name: check pwd
19 |         run: echo $(pwd)
20 | 
21 |       - name: build_html
22 |         run: docker run --rm --platform linux/amd64  -v $(pwd)/docs:/out -v $(pwd)/src/proto:/protos pseudomuto/protoc-gen-doc --doc_opt=html,protos.html
23 | 
24 |       - name: Commit changes
25 |         uses: EndBug/add-and-commit@v7
26 |         with:
27 |           author_name: bayesmix-devs
28 |           message: "auto update of docs/protos.html"
29 |           add: "docs/protos.html"
30 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_base_image.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish bayesmix-base image
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - 'master'
 7 |   workflow_dispatch:
 8 | 
 9 | jobs:
10 |   push_to_registry:
11 |     name: Push bayesmix-base Docker image to Docker Hub
12 |     if: github.repository == 'bayesmix-dev/bayesmix'
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Check out the repo
16 |         uses: actions/checkout@v3
17 | 
18 |       - name: Set up Docker Buildx
19 |         uses: docker/setup-buildx-action@v3
20 |       
21 |       - name: Login to Docker Hub
22 |         uses: docker/login-action@v3
23 |         with:
24 |           username: ${{ secrets.MARIO_DOCKERHUB_USERNAME }}
25 |           password: ${{ secrets.MARIO_DOCKERHUB_PASSWORD }}
26 |       
27 |       - name: Build and push bayesmix-base
28 |         uses: docker/build-push-action@v5
29 |         with:
30 |           context: .
31 |           push: true
32 |           file: resources/docker/base/Dockerfile
33 |           tags: mberaha/bayesmix-base:latest
34 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_env_image.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish bayesmix-env image
 2 | 
 3 | on: workflow_dispatch
 4 | 
 5 | jobs:
 6 |   push_to_registry:
 7 |     name: Push bayesmix-env Docker image to Docker Hub
 8 |     if: github.repository == 'bayesmix-dev/bayesmix'
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: Check out the repo
12 |         uses: actions/checkout@v3
13 | 
14 |       - name: Set up Docker Buildx
15 |         uses: docker/setup-buildx-action@v3
16 |       
17 |       - name: Login to Docker Hub
18 |         uses: docker/login-action@v3
19 |         with:
20 |           username: ${{ secrets.MARIO_DOCKERHUB_USERNAME }}
21 |           password: ${{ secrets.MARIO_DOCKERHUB_PASSWORD }}
22 |       
23 |       - name: Build and push bayesmix-env
24 |         uses: docker/build-push-action@v5
25 |         with:
26 |           context: .
27 |           push: true
28 |           file: resources/docker/env/Dockerfile
29 |           tags: mberaha/bayesmix-env:latest
30 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [master]
 6 | 
 7 |   workflow_dispatch:
 8 | 
 9 | jobs:
10 |   # This workflow contains a single job called "build"
11 |   build:
12 |     if: github.repository == 'bayesmix-dev/bayesmix' && github.event.pull_request.draft == false
13 |     # The type of runner that the job will run on
14 |     runs-on: ubuntu-latest
15 |     # Steps represent a sequence of tasks that will be executed as part of the job
16 |     steps:
17 |       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
18 |       - name: Check out the repo
19 |         uses: actions/checkout@v3
20 | 
21 |       - name: build
22 |         run: docker
23 | 
24 |       - name: Build test Docker image
25 |         run: docker build -f resources/docker/test/Dockerfile -t test .
26 | 
27 |       - name: Run C++ tests
28 |         run: docker run test ./build/test/test_bayesmix
29 | 
30 |       - name: Run Python tests
31 |         run: docker run test /bin/bash -c "cd python && pytest"
32 | 
33 |       - name: Run R tests
34 |         run: docker run test Rscript --vanilla -e "testthat::test_package('bayesmixr')"
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *pb2.py
 2 | # Build folder
 3 | build/
 4 | # Visual Studio Code folder
 5 | .vscode
 6 | # SFTP configuration file
 7 | sftp-config.json
 8 | # Sublime Text files
 9 | *.sublime-*
10 | # Python compilation files
11 | *.pyc
12 | # File collectors
13 | *.recordio
14 | # PDF output files
15 | *.pdf
16 | # Local files
17 | *.local.*
18 | # MacOS storage files
19 | .DS_Store
20 | .dockerignore
21 | .ipynb_checkpoints/
22 | docs/_build/
23 | resources/benchmarks/datasets
24 | resources/2d
25 | #CLion cash
26 | .idea/
27 | # Build debug folder
28 | cmake-build-debug/
29 | # lib/_deps/ folder
30 | lib/_deps/
31 | # .old folders
32 | test/.old/
33 | src/hierarchies/updaters/.old/
34 | examples/gamma_hierarchy/.old/
35 | # .env file
36 | .env
37 | # R stuff
38 | .Rproj.user
39 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/.gitmodules


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the version of Python and other tools you might need
 8 | build:
 9 |   os: ubuntu-22.04
10 |   tools:
11 |     python: "3.11"
12 | 
13 | # Build documentation in the docs/ directory with Sphinx
14 | sphinx:
15 |   configuration: docs/conf.py
16 | 
17 | # We recommend specifying your dependencies to enable reproducible builds:
18 | # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
19 | python:
20 |   install:
21 |   - requirements: docs/requirements.txt


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2020, bayesmix-dev
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/R/.gitignore:
--------------------------------------------------------------------------------
 1 | # Files
 2 | .Rbuildignore
 3 | *.Rproj
 4 | *.Rhistory
 5 | *.Rdata
 6 | *.nb.html
 7 | 
 8 | # Folders
 9 | build/
10 | .Rproj.user/
11 | 


--------------------------------------------------------------------------------
/R/bayesmixr/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: bayesmixr
 2 | Title: An R interface to BayesMix
 3 | Version: 0.1.3
 4 | Author: Matteo Gianella
 5 | Maintainer: Matteo Gianella <matteo.gianella@polimi.it>
 6 | Description: This package provides a light-weight R interface for BayesMix C++ library.
 7 | License: BSD_3_clause + file LICENSE
 8 | Encoding: UTF-8
 9 | Roxygen: list(markdown = TRUE)
10 | RoxygenNote: 7.2.3
11 | Suggests:
12 |     devtools (>= 2.4.5),
13 |     testthat (>= 3.1.5)
14 | Config/testthat/edition: 3
15 | Imports:
16 |     bitops (>= 1.0.7),
17 |     RProtoBuf (>= 0.4.20),
18 |     utils (>= 4.3.1),
19 |     withr (>= 2.5.0)
20 | 


--------------------------------------------------------------------------------
/R/bayesmixr/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2020
2 | COPYRIGHT HOLDER: bayesmix-dev
3 | ORGANIZATION: bayesmix
4 | 


--------------------------------------------------------------------------------
/R/bayesmixr/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 | 
3 | export(build_bayesmix)
4 | export(import_protobuf_messages)
5 | export(read_many_proto_from_file)
6 | export(run_mcmc)
7 | 


--------------------------------------------------------------------------------
/R/bayesmixr/R/decoder.R:
--------------------------------------------------------------------------------
 1 | #' Return a decoder for a basic varint value (does not include tag).
 2 | #'
 3 | #' Decoded values will be bitwise-anded with the given mask before being
 4 | #' returned, e.g. to limit them to 32 bits.  The returned decoder does not take
 5 | #' the usual "end" parameter -- the caller is expected to do bounds checking
 6 | #' after the fact (often the caller can defer such checking until later). The
 7 | #' decoder returns a (value, new_pos) pair.
 8 | #'
 9 | #' @keywords internal
10 | VarintDecoder = function(mask, result_type) {
11 | 
12 |   # Define DecodeVarint function
13 |   DecodeVarint <- function(buffer, pos) {
14 |     result = 0
15 |     shift = 0
16 |     while (TRUE) {
17 |       b = as.numeric(buffer[pos])
18 |       result = bitops::bitOr(result, bitops::bitShiftL(bitops::bitAnd(b, 0x7f), shift))
19 |       pos = pos + 1
20 |       if (!bitops::bitAnd(b, 0x80)) {
21 |         result <- bitops::bitAnd(result, mask)
22 |         result <- result_type(result)
23 |         return(list(result = result, pos = as.integer(pos)))
24 |       }
25 |       shift <- shift + 7
26 |       if (shift >= 64) {
27 |         stop('Too many bytes when decoding varint.')
28 |       }
29 |     }
30 |   }
31 | 
32 |   # Return the decoder as result
33 |   return(DecodeVarint)
34 | }
35 | 
36 | #' Use this decoder version for values which must be limited to 32 bits.
37 | #'
38 | #' @keywords internal
39 | DecodeVarint32 = VarintDecoder(2^32 - 1, as.integer)
40 | 


--------------------------------------------------------------------------------
/R/bayesmixr/R/zzz.R:
--------------------------------------------------------------------------------
 1 | # Parse internal renviron file to set BAYESMIX_EXE variable
 2 | .onAttach <- function(...) {
 3 |   readRenviron(system.file("bayesmixr.Renviron", package = "bayesmixr"))
 4 | }
 5 | 
 6 | # Unset BAYESMIX_EXE variable on detaching
 7 | .onDetach <- function(...) {
 8 |   Sys.unsetenv("BAYESMIXR_HOME")
 9 |   Sys.unsetenv("BAYESMIX_EXE")
10 |   Sys.unsetenv("TBB_PATH")
11 | }
12 | 


--------------------------------------------------------------------------------
/R/bayesmixr/cleanup:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | # Clean inst/ directory after install
4 | rm -rf ./inst
5 | 


--------------------------------------------------------------------------------
/R/bayesmixr/cleanup.win:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | # Clean inst/ directory after install
4 | rm -rf ./inst
5 | 


--------------------------------------------------------------------------------
/R/bayesmixr/configure:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | # Set BAYESMIXR_HOME environment variable
4 | mkdir -p ./inst && echo BAYESMIXR_HOME=$PWD > ./inst/bayesmixr.Renviron
5 | 


--------------------------------------------------------------------------------
/R/bayesmixr/configure.win:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | # Set BAYESMIXR_HOME environment variable
4 | mkdir -p ./inst && echo BAYESMIXR_HOME=$(cygpath -m $PWD) > ./inst/bayesmixr.Renviron
5 | 


--------------------------------------------------------------------------------
/R/bayesmixr/man/DecodeVarint32.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/decoder.R
 3 | \name{DecodeVarint32}
 4 | \alias{DecodeVarint32}
 5 | \title{Use this decoder version for values which must be limited to 32 bits.}
 6 | \usage{
 7 | DecodeVarint32(buffer, pos)
 8 | }
 9 | \description{
10 | Use this decoder version for values which must be limited to 32 bits.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/R/bayesmixr/man/VarintDecoder.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/decoder.R
 3 | \name{VarintDecoder}
 4 | \alias{VarintDecoder}
 5 | \title{Return a decoder for a basic varint value (does not include tag).}
 6 | \usage{
 7 | VarintDecoder(mask, result_type)
 8 | }
 9 | \description{
10 | Decoded values will be bitwise-anded with the given mask before being
11 | returned, e.g. to limit them to 32 bits.  The returned decoder does not take
12 | the usual "end" parameter -- the caller is expected to do bounds checking
13 | after the fact (often the caller can defer such checking until later). The
14 | decoder returns a (value, new_pos) pair.
15 | }
16 | \keyword{internal}
17 | 


--------------------------------------------------------------------------------
/R/bayesmixr/man/build_bayesmix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/build_bayesmix.R
 3 | \name{build_bayesmix}
 4 | \alias{build_bayesmix}
 5 | \title{Builds the BayesMix executable}
 6 | \usage{
 7 | build_bayesmix(
 8 |   nproc = ceiling(parallel::detectCores()/2),
 9 |   build_subdir = "build"
10 | )
11 | }
12 | \arguments{
13 | \item{nproc}{Number of processes to use for parallel compilation. Thanks to \code{parallel} package,
14 | this parameter defaults to half of the available processes (through \code{\link[parallel]{detectCores}} function)}
15 | 
16 | \item{build_subdir}{Name for the sub-directory of \code{bayesmix/} folder in which configuration and compilation happens.
17 | Default value is \code{build}.}
18 | }
19 | \value{
20 | No output if build is successful, it raises errors otherwise
21 | }
22 | \description{
23 | After the build, if no error has occurred, it saves the path into the \code{BAYESMIX_EXE} environment variable.
24 | Such variable is defined only when this package is loaded in the R session.
25 | }
26 | 


--------------------------------------------------------------------------------
/R/bayesmixr/man/import_protobuf_messages.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{import_protobuf_messages}
 4 | \alias{import_protobuf_messages}
 5 | \title{Import Protocol Buffers Descriptors of bayesmix}
 6 | \usage{
 7 | import_protobuf_messages()
 8 | }
 9 | \description{
10 | This utility loads in the workspace the protocol buffers descriptors defined
11 | in the \code{bayesmix} library, via \code{RProtoBuf} package. These
12 | descriptors can be used to handle the MCMC chain output of
13 | \code{\link{run_mcmc}} function.
14 | }
15 | 


--------------------------------------------------------------------------------
/R/bayesmixr/man/maybe_print_proto_to_file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{maybe_print_proto_to_file}
 4 | \alias{maybe_print_proto_to_file}
 5 | \title{Print a protobuf message to file only if input is not a file}
 6 | \usage{
 7 | maybe_print_proto_to_file(maybe_proto, proto_name = NULL, out_dir = NULL)
 8 | }
 9 | \description{
10 | If \code{maybe_proto} is a file, returns the file name. If \code{maybe_proto}
11 | is a string representing a message, prints the message to a file and returns
12 | the file name.
13 | }
14 | \keyword{internal}
15 | 


--------------------------------------------------------------------------------
/R/bayesmixr/man/read_many_proto_from_file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{read_many_proto_from_file}
 4 | \alias{read_many_proto_from_file}
 5 | \title{Read many protobuf messages of the same type from a file}
 6 | \usage{
 7 | read_many_proto_from_file(filename, msg_type)
 8 | }
 9 | \value{
10 | A list of \code{RProtoBuf::Message} of type \code{msg_type}
11 | }
12 | \description{
13 | This function parse the file given by \code{filename} and deserialize all
14 | protobuf messages of type \code{msg_type}. The latter is of type
15 | \code{RProtoBuf::Descriptor}
16 | }
17 | 


--------------------------------------------------------------------------------
/R/bayesmixr/tests/testthat.R:
--------------------------------------------------------------------------------
1 | test_check("bayesmixr")
2 | 


--------------------------------------------------------------------------------
/R/bayesmixr/tests/testthat/test_build.R:
--------------------------------------------------------------------------------
1 | test_that("build_bayesmix() is successful", {
2 |   cat("\n")
3 |   testthat::expect_no_error(build_bayesmix())
4 | })
5 | 


--------------------------------------------------------------------------------
/benchmarks/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.13.0)
 2 | project(benchmark_bayesmix)
 3 | enable_testing()
 4 | 
 5 | add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../lib/math/lib/benchmark_1.5.1 build)
 6 | 
 7 | add_executable(benchmark_bayesmix $<TARGET_OBJECTS:bayesmix>
 8 |   nnw_marg_lpdf.cc
 9 |   mcmc_runs.cc
10 |   eval_lpdf.cc
11 |   lpd_grid.cc
12 |   main.cpp
13 | )
14 | 
15 | target_include_directories(benchmark_bayesmix PUBLIC ${INCLUDE_PATHS})
16 | target_link_libraries(benchmark_bayesmix PUBLIC
17 |   ${LINK_LIBRARIES}  benchmark::benchmark benchmark::benchmark_main)
18 | target_compile_options(benchmark_bayesmix PUBLIC ${COMPILE_OPTIONS})
19 | 


--------------------------------------------------------------------------------
/benchmarks/main.cpp:
--------------------------------------------------------------------------------
1 | #include <benchmark/benchmark.h>
2 | 
3 | BENCHMARK_MAIN();
4 | 


--------------------------------------------------------------------------------
/benchmarks/mcmc_runs.cc:
--------------------------------------------------------------------------------
 1 | #include <benchmark/benchmark.h>
 2 | 
 3 | #include "benchmarks/utils.h"
 4 | #include "src/includes.h"
 5 | 
 6 | void run(std::shared_ptr<BaseAlgorithm>& algorithm,
 7 |          const Eigen::MatrixXd& data, MemoryCollector* collector) {
 8 |   algorithm->set_data(data);
 9 |   algorithm->run(collector);
10 | }
11 | 
12 | Eigen::MatrixXd get_data(int dim) {
13 |   const char delim = ' ';
14 |   Eigen::MatrixXd out;
15 |   if (dim == 1) {
16 |     out = bayesmix::read_eigen_matrix(
17 |         "../resources/benchmarks/datasets/univariate_gaussian.csv", delim);
18 |   } else {
19 |     out = bayesmix::read_eigen_matrix(
20 |         "../resources/benchmarks/datasets/multi_gaussian_dim_" +
21 |             std::to_string(dim) + ".csv",
22 |         delim);
23 |   }
24 |   return out;
25 | }
26 | 
27 | std::string get_output_file(std::string algo_id, int dim) {
28 |   std::string outfile;
29 |   if (dim == 1) {
30 |     outfile = "../resources/benchmarks/chains/" + algo_id +
31 |               "_univariate_gaussian.recordio";
32 |   } else {
33 |     outfile = "../resources/benchmarks/chains/" + algo_id +
34 |               "_multi_gaussian_dim_ " + std::to_string(dim) + ".recordio";
35 |   }
36 |   return outfile;
37 | }
38 | 
39 | static void BM_Neal2(benchmark::State& state) {
40 |   int dim = state.range(0);
41 |   Eigen::MatrixXd data = get_data(dim);
42 |   MemoryCollector collector;
43 |   for (auto _ : state) {
44 |     std::shared_ptr<BaseAlgorithm> algo = get_algorithm("Neal2", dim);
45 |     run(algo, data, &collector);
46 |   }
47 |   collector.write_to_file<bayesmix::AlgorithmState>(
48 |       get_output_file("Neal2", dim));
49 | }
50 | 
51 | static void BM_Neal3(benchmark::State& state) {
52 |   int dim = state.range(0);
53 |   Eigen::MatrixXd data = get_data(dim);
54 |   MemoryCollector collector;
55 |   for (auto _ : state) {
56 |     std::shared_ptr<BaseAlgorithm> algo = get_algorithm("Neal3", dim);
57 |     run(algo, data, &collector);
58 |   }
59 |   collector.write_to_file<bayesmix::AlgorithmState>(
60 |       get_output_file("Neal3", dim));
61 | }
62 | 
63 | static void BM_Neal8(benchmark::State& state) {
64 |   int dim = state.range(0);
65 |   Eigen::MatrixXd data = get_data(dim);
66 |   MemoryCollector collector;
67 |   for (auto _ : state) {
68 |     std::shared_ptr<BaseAlgorithm> algo = get_algorithm("Neal8", dim);
69 |     run(algo, data, &collector);
70 |   }
71 |   collector.write_to_file<bayesmix::AlgorithmState>(
72 |       get_output_file("Neal8", dim));
73 | }
74 | 
75 | BENCHMARK(BM_Neal2)->Arg(1)->Arg(2)->Arg(4)->Arg(8);
76 | BENCHMARK(BM_Neal3)->Arg(1)->Arg(2)->Arg(4)->Arg(8);
77 | BENCHMARK(BM_Neal8)->Arg(1)->Arg(2)->Arg(4)->Arg(8);
78 | 


--------------------------------------------------------------------------------
/benchmarks/nnw_marg_lpdf.cc:
--------------------------------------------------------------------------------
 1 | #include <benchmark/benchmark.h>
 2 | 
 3 | #include <iostream>
 4 | #include <stan/math/rev.hpp>
 5 | 
 6 | #include "utils.h"
 7 | 
 8 | static void BM_NNWPriorPred(benchmark::State& state) {
 9 |   int dim = state.range(0);
10 |   auto hierarchy = get_multivariate_nnw_hierarchy(dim);
11 |   Eigen::VectorXd x = Eigen::VectorXd::Zero(dim);
12 |   for (auto _ : state) {
13 |     hierarchy->prior_pred_lpdf(x);
14 |   }
15 | }
16 | 
17 | static void BM_NNWSampleFullCond(benchmark::State& state) {
18 |   int dim = state.range(0);
19 |   auto hierarchy = get_multivariate_nnw_hierarchy(dim);
20 |   Eigen::MatrixXd data = Eigen::MatrixXd::Random(10, dim);
21 |   for (auto _ : state) {
22 |     hierarchy->initialize();
23 |     for (int i = 0; i < 10; i++) {
24 |       hierarchy->add_datum(i, data.row(i));
25 |     }
26 |     hierarchy->sample_full_cond();
27 |   }
28 | }
29 | 
30 | static void BM_NNWConditionalPred(benchmark::State& state) {
31 |   int dim = state.range(0);
32 |   auto hierarchy = get_multivariate_nnw_hierarchy(dim);
33 |   Eigen::MatrixXd data = Eigen::MatrixXd::Random(10, dim);
34 |   for (int i = 0; i < 10; i++) {
35 |     hierarchy->add_datum(i, data.row(i));
36 |   }
37 |   Eigen::VectorXd x = Eigen::VectorXd::Zero(dim);
38 | 
39 |   for (auto _ : state) {
40 |     std::dynamic_pointer_cast<NNWHierarchy>(hierarchy)
41 |         ->save_posterior_hypers();
42 |     hierarchy->conditional_pred_lpdf(x);
43 |   }
44 | }
45 | 
46 | BENCHMARK(BM_NNWPriorPred)->RangeMultiplier(2)->Range(2, 2 << 5);
47 | BENCHMARK(BM_NNWSampleFullCond)->RangeMultiplier(2)->Range(2, 2 << 5);
48 | BENCHMARK(BM_NNWConditionalPred)->RangeMultiplier(2)->Range(2, 2 << 5);
49 | 


--------------------------------------------------------------------------------
/cmake/FindSphinx.cmake:
--------------------------------------------------------------------------------
 1 | #Look for an executable called sphinx-build
 2 | find_program(SPHINX_EXECUTABLE
 3 |              NAMES sphinx-build
 4 |              DOC "Path to sphinx-build executable")
 5 | 
 6 | include(FindPackageHandleStandardArgs)
 7 | 
 8 | #Handle standard arguments to find_package like REQUIRED and QUIET
 9 | find_package_handle_standard_args(Sphinx
10 |                                   "Failed to find sphinx-build executable"
11 |                                   SPHINX_EXECUTABLE)
12 | 


--------------------------------------------------------------------------------
/cmake/ProtobufUtils.cmake:
--------------------------------------------------------------------------------
 1 | # CMake function that add compilation instructions for every .proto files in
 2 | # a given FOLDER, passed as input.
 3 | 
 4 | function(compile_protobuf_files)
 5 |     # Parse input arguments
 6 |     set(oneValueArgs FOLDER HEADERS SOURCES PYTHON_OUT_PATH)
 7 |     set(multiValueArgs INCLUDE_PROTO_PATHS)
 8 |     cmake_parse_arguments(arg "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 9 | 
10 |     # Append all paths for protoc
11 |     list(APPEND PROTO_DIRS "--proto_path=${arg_FOLDER}")
12 |     if(NOT "${arg_INCLUDE_PROTO_PATHS}" STREQUAL "")
13 |         foreach(PBPATH IN LISTS arg_INCLUDE_PROTO_PATHS)
14 |             list(APPEND PROTO_DIRS "--proto_path=${PBPATH}")
15 |         endforeach()
16 |     endif()
17 | 
18 |     # Set --python-out option if PYTHON_OUT is set
19 |     if(NOT "${arg_PYTHON_OUT_PATH}" STREQUAL "")
20 |         set(PYTHON_OUT "--python_out=${arg_PYTHON_OUT_PATH}")
21 |     endif()
22 | 
23 |     # Make custom command to compile each ProtoFile in FOLDER_PATH
24 |     file(GLOB ProtoFiles "${arg_FOLDER}/*.proto")
25 |     set(PROTO_DIR proto)
26 |     foreach(PROTO_FILE IN LISTS ProtoFiles)
27 |     message(STATUS "protoc proto(cc): ${PROTO_FILE}")
28 |     get_filename_component(PROTO_DIR ${PROTO_FILE} DIRECTORY)
29 |     get_filename_component(PROTO_NAME ${PROTO_FILE} NAME_WE)
30 |     set(PROTO_HDR ${CMAKE_CURRENT_BINARY_DIR}/${PROTO_NAME}.pb.h)
31 |     set(PROTO_SRC ${CMAKE_CURRENT_BINARY_DIR}/${PROTO_NAME}.pb.cc)
32 |     message(STATUS "protoc hdr: ${PROTO_HDR}")
33 |     message(STATUS "protoc src: ${PROTO_SRC}")
34 |     add_custom_command(
35 |         OUTPUT ${PROTO_SRC} ${PROTO_HDR}
36 |         COMMAND ${Protobuf_PROTOC_EXECUTABLE} ${PROTO_DIRS}
37 |         "--cpp_out=${PROJECT_BINARY_DIR}" ${PYTHON_OUT}
38 |         ${PROTO_FILE}
39 |         DEPENDS ${PROTO_FILE} ${Protobuf_PROTOC_EXECUTABLE}
40 |         COMMENT "Generate C++ protocol buffer for ${PROTO_FILE}"
41 |         VERBATIM)
42 |     list(APPEND PROTO_HEADERS ${PROTO_HDR})
43 |     list(APPEND PROTO_SOURCES ${PROTO_SRC})
44 |     endforeach()
45 |     SET_SOURCE_FILES_PROPERTIES(${PROTO_SRCS} ${PROTO_HDRS} PROPERTIES GENERATED TRUE)
46 | 
47 |     # Propagate PROTO_HDRS and PROTO_SRCS to parent scope
48 |     set(${arg_HEADERS} ${PROTO_HEADERS} PARENT_SCOPE)
49 |     set(${arg_SOURCES} ${PROTO_SOURCES} PARENT_SCOPE)
50 | endfunction()
51 | 


--------------------------------------------------------------------------------
/cmake/math.cmake:
--------------------------------------------------------------------------------
 1 | # Fetching bayesmix-dev/math
 2 | message(STATUS "")
 3 | message(STATUS "Fetching bayesmix-dev/math")
 4 | FetchContent_Declare(math
 5 |   GIT_REPOSITORY "https://github.com/bayesmix-dev/math.git"
 6 |   GIT_TAG "develop"
 7 | )
 8 | FetchContent_MakeAvailable(math)
 9 | 
10 | # Set TBB_ROOT variable
11 | set(TBB_ROOT ${math_SOURCE_DIR}/lib/tbb)
12 | 
13 | # Define make command
14 | if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
15 |   set(MAKE_COMMAND mingw32-make)
16 | else()
17 |   set(MAKE_COMMAND make)
18 | endif()
19 | 
20 | # Set extra compiler flags for Windows
21 | if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
22 |   file(APPEND ${math_SOURCE_DIR}/make/local "CXXFLAGS+=-Wno-nonnull\n")
23 |   file(APPEND ${math_SOURCE_DIR}/make/local "TBB_CXXFLAGS=-U__MSVCRT_VERSION__ -D__MSVCRT_VERSION__=0x0E00\n")
24 | endif()
25 | 
26 | # Compile math libraries
27 | message(STATUS "Compiling math libraries ...")
28 | execute_process(
29 |   COMMAND ${MAKE_COMMAND} -f ./make/standalone math-libs
30 |   RESULT_VARIABLE result
31 |   WORKING_DIRECTORY ${math_SOURCE_DIR}
32 | )
33 | if(result)
34 |   message(FATAL_ERROR "Failed to compile math libraries (${result})!")
35 | endif()
36 | 
37 | # Add TBB link directory
38 | link_directories(${TBB_ROOT})
39 | 
40 | # In Windows, add TBB_ROOT to PATH variable via batch file if not present
41 | if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
42 |   # Check if adding TBB_ROOT is already present in PATH
43 |   file(TO_CMAKE_PATH "$ENV{PATH}" PATH)
44 |   string(FIND "${PATH}" "${TBB_ROOT}" tbb_path-LOCATION)
45 |   # If not present, add to PATH user environment variable
46 |   if(tbb_path-LOCATION EQUAL -1)
47 |     execute_process(
48 |       COMMAND cmd.exe /C install-tbb.bat
49 |       RESULT_VARIABLE result
50 |       WORKING_DIRECTORY ${BASEPATH}
51 |     )
52 |     if(result)
53 |       message(FATAL_ERROR "Failed to install TBB (${result})!")
54 |     endif()
55 |   endif()
56 | endif()
57 | 


--------------------------------------------------------------------------------
/cmake/matplotplusplus.cmake:
--------------------------------------------------------------------------------
 1 | # Define patch command to inject
 2 | set(matplotplusplus_patch git apply ${BASEPATH}/resources/patches/matplotplusplus.patch)
 3 | 
 4 | # Make matplotplusplus available (+ patch)
 5 | message(STATUS "")
 6 | message(STATUS "Fetching alandefreitas/matplotplusplus")
 7 | FetchContent_Declare(matplotplusplus
 8 | 	GIT_REPOSITORY "https://github.com/alandefreitas/matplotplusplus.git"
 9 | 	GIT_TAG "v1.2.1"
10 | 	PATCH_COMMAND ${matplotplusplus_patch}
11 | )
12 | FetchContent_MakeAvailable(matplotplusplus)
13 | 


--------------------------------------------------------------------------------
/cmake/protobuf.cmake:
--------------------------------------------------------------------------------
 1 | include(GNUInstallDirs)
 2 | 
 3 | # Set protobuf options
 4 | set(Protobuf_USE_STATIC_LIBS ON)
 5 | set(Protobuf_MSVC_STATIC_RUNTIME OFF)
 6 | set(protobuf_BUILD_TESTS OFF)
 7 | set(protobuf_BUILD_PROTOC_BINARIES ON)
 8 | 
 9 | # Fetch protocolbuffers_protobuf
10 | message(STATUS "")
11 | message(STATUS "Fetching protocolbuffers/protobuf")
12 | FetchContent_Declare(protobuf
13 |   DOWNLOAD_EXTRACT_TIMESTAMP TRUE
14 |   URL "https://github.com/protocolbuffers/protobuf/archive/refs/tags/v3.19.5.tar.gz"
15 | )
16 | FetchContent_MakeAvailable(protobuf)
17 | 
18 | # Set variables
19 | set(Protobuf_ROOT ${protobuf_SOURCE_DIR}/cmake)
20 | set(Protobuf_DIR ${Protobuf_ROOT}/${CMAKE_INSTALL_LIBDIR}/cmake/protobuf)
21 | 
22 | # Configure protobuf
23 | message(STATUS "Setting up protobuf ...")
24 | execute_process(
25 |   COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_PROTOC_BINARIES=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -G "${CMAKE_GENERATOR}" .
26 |   RESULT_VARIABLE result
27 |   WORKING_DIRECTORY ${Protobuf_ROOT}
28 | )
29 | if(result)
30 |   message(FATAL_ERROR "Failed to download protobuf (${result})!")
31 | endif()
32 | 
33 | # Build protobuf
34 | message(STATUS "Building protobuf ...")
35 | execute_process(
36 |   COMMAND ${CMAKE_COMMAND} --build .
37 |   RESULT_VARIABLE result
38 |   WORKING_DIRECTORY ${Protobuf_ROOT}
39 | )
40 | if(result)
41 |   message(FATAL_ERROR "Failed to build protobuf (${result})!")
42 | endif()
43 | 
44 | # Find package in installed folder
45 | find_package(Protobuf REQUIRED HINTS ${Protobuf_DIR})
46 | 
47 | # Include protobuf related informations
48 | include(${Protobuf_DIR}/protobuf-config.cmake)
49 | include(${Protobuf_DIR}/protobuf-module.cmake)
50 | include(${Protobuf_DIR}/protobuf-options.cmake)
51 | include(${Protobuf_DIR}/protobuf-targets.cmake)
52 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | # MacOS storage files
2 | .DS_Store
3 | # Make files generated by CMake
4 | make.bat
5 | Makefile
6 | 


--------------------------------------------------------------------------------
/docs/algorithms.rst:
--------------------------------------------------------------------------------
 1 | bayesmix/algorithms
 2 | 
 3 | The ``Algorithm`` class handles other class objects and performs the MCMC simulation.
 4 | There are two types of ``Algorithm``: marginal and conditional, each of which can only be used with the matching type of ``Mixing``.
 5 | 
 6 | Algorithms
 7 | ==========
 8 | .. doxygenclass:: BaseAlgorithm
 9 |    :project: bayesmix
10 |    :members:
11 | .. doxygenclass:: MarginalAlgorithm
12 |    :project: bayesmix
13 |    :members:
14 | .. doxygenclass:: Neal2Algorithm
15 |    :project: bayesmix
16 |    :members:
17 | .. doxygenclass:: Neal3Algorithm
18 |    :project: bayesmix
19 |    :members:
20 | .. doxygenclass:: Neal8Algorithm
21 |    :project: bayesmix
22 |    :members:
23 | .. doxygenclass:: SplitAndMergeAlgorithm
24 |    :project: bayesmix
25 |    :members:
26 | .. doxygenclass:: ConditionalAlgorithm
27 |    :project: bayesmix
28 |    :members:
29 | .. doxygenclass:: BlockedGibbsAlgorithm
30 |    :project: bayesmix
31 |    :members:
32 | 


--------------------------------------------------------------------------------
/docs/collectors.rst:
--------------------------------------------------------------------------------
 1 | bayesmix/collectors
 2 | 
 3 | Collectors
 4 | ==========
 5 | .. doxygenclass:: BaseCollector
 6 |    :project: bayesmix
 7 |    :members:
 8 | .. doxygenclass:: FileCollector
 9 |    :project: bayesmix
10 |    :members:
11 | .. doxygenclass:: MemoryCollector
12 |    :project: bayesmix
13 |    :members:
14 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import subprocess
 4 | sys.path.insert(0, os.path.abspath('.'))
 5 | sys.path.insert(0, os.path.abspath('..'))
 6 | sys.path.insert(0, os.path.abspath('../python'))
 7 | sys.path.insert(0, os.path.abspath('../python/bayesmixpy'))
 8 | 
 9 | 
10 | def configureDoxyfile(input_dir, output_dir):
11 |     with open('Doxyfile.in', 'r') as file :
12 |         filedata = file.read()
13 | 
14 |     filedata = filedata.replace('@DOXYGEN_INPUT_DIR@', input_dir)
15 |     filedata = filedata.replace('@DOXYGEN_OUTPUT_DIR@', output_dir)
16 | 
17 |     with open('Doxyfile', 'w') as file:
18 |         file.write(filedata)
19 | 
20 | # Check if we're running on Read the Docs' servers
21 | read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
22 | 
23 | breathe_projects = { "bayesmix": "../build/docs/docs/doxygen/xml " }
24 | breathe_default_project = "bayesmix"
25 | 
26 | 
27 | if read_the_docs_build:
28 |     input_dir = '../src'
29 |     output_dir = 'build'
30 |     configureDoxyfile(input_dir, output_dir)
31 |     subprocess.call('doxygen', shell=True)
32 |     breathe_projects['bayesmix'] = output_dir + '/xml'
33 | 
34 | 
35 | project = 'bayesmix'
36 | copyright = '2021, Guindani, B. and Beraha, M.'
37 | author = 'Guindani, B. and Beraha, M.'
38 | 
39 | # The full version, including alpha/beta/rc tags
40 | release = '0.0.1'
41 | 
42 | extensions = [
43 |     'sphinx.ext.autodoc',
44 |     'sphinx.ext.doctest',
45 |     'sphinx.ext.mathjax',
46 |     'sphinx.ext.napoleon',
47 |     'sphinx.ext.viewcode',
48 |     'sphinx.ext.imgmath',
49 |     'sphinx.ext.todo',
50 |     'breathe',
51 | ]
52 | 
53 | 
54 | templates_path = ['_templates']
55 | 
56 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
57 | 
58 | html_theme = 'haiku'
59 | 
60 | highlight_language = 'cpp'
61 | 
62 | imgmath_latex = 'latex'
63 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. bayesmix documentation master file, created by
 2 |    sphinx-quickstart on Sun Jun 27 08:35:53 2021.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | bayesmix: a nonparametric C++ library for mixture models
 7 | ========================================================
 8 | 
 9 | .. image:: ../resources/logo_full.svg
10 |    :width: 250px
11 |    :alt: bayesmix full logo
12 | 
13 | .. image::
14 |    https://readthedocs.org/projects/bayesmix/badge/?version=latest
15 |    :target: https://bayesmix.readthedocs.io/en/latest/?badge=latest
16 |    :alt: Documentation Status
17 | 
18 | ``bayesmix`` is a C++ library for running MCMC simulations in Bayesian mixture models.
19 | It uses the ``Eigen`` library for vector-matrix manipulation and linear algebra, and ``protobuf`` (Protocol Buffers) for communication and storage of structured data.
20 | 
21 | 
22 | 
23 | Submodules
24 | ==========
25 | 
26 | There are currently three submodules to the ``bayesmix`` library, represented by three classes of objects:
27 | 
28 | - ``Algorithms``
29 | - ``Hierarchies``
30 | - ``Mixings``
31 | 
32 | Further, we employ Protocol buffers for several purposes, including serialization. The list of all protos with their docs is available in the ``protos`` link below.
33 | 
34 | .. toctree::
35 |    :maxdepth: 2
36 |    :titlesonly:
37 |    :caption: API: library submodules
38 | 
39 |    algorithms
40 |    hierarchies
41 |    mixings
42 |    collectors
43 |    protos
44 |    utils
45 | 
46 | 
47 | Tutorials
48 | =========
49 | 
50 | .. toctree::
51 |    :maxdepth: 1
52 | 
53 |    tutorial
54 | 
55 | .. :doc:`tutorial`
56 | 
57 | 
58 | Python interface
59 | ================
60 | 
61 | .. toctree::
62 |    :maxdepth: 1
63 | 
64 |    python_interface
65 | 
66 | 
67 | Indices and tables
68 | ==================
69 | 
70 | * :ref:`genindex`
71 | * :ref:`modindex`
72 | * :ref:`search`
73 | 


--------------------------------------------------------------------------------
/docs/mixings.rst:
--------------------------------------------------------------------------------
 1 | bayesmix/mixings
 2 | 
 3 | Mixings
 4 | =======
 5 | 
 6 | In the algorithms of the library, we store a single ``Mixing`` object that represents a prior for the mixing weights for the mixture models and the induced exchangeable partition probability function (EPPF).
 7 | There are two types of ``Mixing``: marginal and conditional, each of which can only be used with the matching type of ``Algorithm``.
 8 | For both of these types, certain API functions are required.
 9 | 
10 | 
11 | --------------
12 | Code structure
13 | --------------
14 | 
15 | We employ a Curiously Recurring Template Pattern coupled with an abstract interface, similarly to the ``Hierarchy`` class (see :ref:`here <hierarchies-crtp>`).
16 | The code thus composes of: a virtual class defining the API, a template base class that is the base for the CRTP and derived child classes that fully specialize the template arguments.
17 | The class ``AbstractMixing`` defines the API, i.e. all the methods that need to be called from outside of a ``Mixing`` class.
18 | A template class ``BaseMixing`` inherits from ``AbstractMixing`` and implements some of the necessary virtual methods, which need not be implemented by the child classes.
19 | 
20 | 
21 | -------
22 | Classes
23 | -------
24 | 
25 | .. doxygenclass:: AbstractMixing
26 |    :project: bayesmix
27 |    :members:
28 | .. doxygenclass:: BaseMixing
29 |    :project: bayesmix
30 |    :members:
31 | .. doxygenclass:: DirichletMixing
32 |    :project: bayesmix
33 |    :members:
34 | .. doxygenclass:: PitYorMixing
35 |    :project: bayesmix
36 |    :members:
37 | .. doxygenclass:: MixtureFiniteMixing
38 |    :project: bayesmix
39 |    :members:
40 | .. doxygenclass:: TruncatedSBMixing
41 |    :project: bayesmix
42 |    :members:
43 | .. doxygenclass:: LogitSBMixing
44 |    :project: bayesmix
45 |    :members:
46 | 


--------------------------------------------------------------------------------
/docs/protos.rst:
--------------------------------------------------------------------------------
 1 | bayesmix/protos
 2 | 
 3 | .. _protos:
 4 | 
 5 | Protos
 6 | ======
 7 | 
 8 | This library depends on Google's `Protocol Buffers <https://developers.google.com/protocol-buffers>`_, also known as ``protobuf``, which provides a convenient way to define classes that represent structured data.
 9 | Special classes henceforth referred to as ``protobuf`` messages, or protos for short, can be defined in ``.proto`` files. A special compiler, ``protoc``, is automatically called by the library to generate C++ and/or Python classes for each message.
10 | The ``protobuf`` runtime library provides fast serialization of messages into bytes, which can be used to save objects to disk or pass serialized objects from one language to another.
11 | 
12 | A description of all protos used in ``bayesmix`` follows.
13 | These range from simple enumerator identifiers (enums) and basic data types such as vectors or matrices, to objects representing probability distributions, hyperpriors, states, or hyperparameter values.
14 | Some of these protos are embedded in one another, possibly using the ``oneof`` keyword, which allows the outer proto to flexibly choose and contain one type of object among many different ones.
15 | For instance, this is the case with protos representing hyperpriors, which can have increasing degrees of complexity depending on which model is chosen by the user.
16 | 
17 | The use of protos allows easy interface between multiple programming languages, as well as *a posteriori* analysis of MCMC chains.
18 | 
19 | .. raw:: html
20 |     :file: protos.html
21 | 


--------------------------------------------------------------------------------
/docs/python_interface.rst:
--------------------------------------------------------------------------------
 1 | ==========================================
 2 | BayesMixPy: a Python interface to BayesMix
 3 | ==========================================
 4 | 
 5 | Installation
 6 | ============
 7 | 
 8 | After you have cloned the bayesmix github directory, navigate to the Python subfolder and install bayesmixpy using pip
 9 | 
10 | .. code-block:: shell
11 | 
12 |     cd python
13 |     pip3 install -e .
14 | 
15 | 
16 | Usage
17 | =====
18 | 
19 | `bayesmixpy` provides two functions: `build_bayesmix` and `run_mcmc`. The first one
20 | installs `bayesmix` and its executables for you, while the second one calls the
21 | executable that runs the MCMC sampler from Python.
22 | 
23 | Building bayesmix
24 | -----------------
25 | 
26 | To build `bayesmix`, in a Python shell or a notebook write
27 | 
28 | .. code-block:: python
29 | 
30 |   from bayesmixpy import build_bayesmix
31 | 
32 |   n_proc = 4 # number of processors for building in parallel
33 |   build_bayesmix(n_proc)
34 | 
35 | 
36 | this will print out the installation log and, if the installation was successful, the following message
37 | 
38 | .. code-block:: shell
39 | 
40 |   Bayesmix executable is in '<BAYESMIX_HOME_REPO>/build',
41 |   export the environment variable BAYESMIX_EXE=<BAYESMIX_HOME_REPO>build/run_mcmc
42 | 
43 | 
44 | Hence, for running the MCMC chain you should export the `BAYESMIX_EXE` environment variable. This can be done once and for all by copying
45 | 
46 | .. code-block:: shell
47 | 
48 |   BAYESMIX_EXE=<BAYESMIX_HOME_REPO>build/run_mcmc
49 | 
50 | in your .bashrc file (or .zshrc if you are a MacOs user), or every time you use bayesmixpy, you can add the following lines on top of your Python script/notebook
51 | 
52 | .. code-block:: python
53 | 
54 |   import os
55 |   os.environ["BAYESMIX_EXE"] = <BAYESMIX_HOME_REPO>build/run_mcmc
56 | 
57 |   from bayesmixpy import run_mcmc
58 |   ....
59 | 
60 | 
61 | Running bayesmix
62 | ----------------
63 | 
64 | To `run_mcmc` users must define the model and the algorithm in some configuration files or text strings. See the notebooks in `python/notebooks/gaussian_mix_uni.ipynb` and  `python/notebooks/gaussian_mix_multi.ipynb` for a concrete usage example.
65 | 
66 | 
67 | 
68 | The BayesmixPy Package
69 | =========================
70 | 
71 | 
72 | Functions
73 | ---------
74 | 
75 | .. automodule:: bayesmixpy
76 |    :members:
77 |    :undoc-members:
78 |    :show-inheritance:
79 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | breathe
2 | 


--------------------------------------------------------------------------------
/docs/states.rst:
--------------------------------------------------------------------------------
 1 | bayesmix/hierarchies/likelihoods/states
 2 | 
 3 | States
 4 | ======
 5 | 
 6 | ``States`` are classes used to store  parameters :math:`\theta_h` of every mixture component.
 7 | Their main purpose is to handle serialization and de-serialization of the state.
 8 | Moreover, they allow to go from the constrained to the unconstrained representation of the parameters (and viceversa) and compute the associated determinant of the Jacobian appearing in the change of density formula.
 9 | 
10 | 
11 | --------------
12 | Code Structure
13 | --------------
14 | 
15 | All classes must inherit from the ``BaseState`` class
16 | 
17 | .. doxygenclass:: State::BaseState
18 |     :project: bayesmix
19 |     :members:
20 | 
21 | Depending on the chosen ``Updater``, the unconstrained representation might not be needed, and the methods ``get_unconstrained()``, ``set_from_unconstrained()`` and ``log_det_jac()`` might never be called.
22 | Therefore, we do not force users to implement them.
23 | Instead, the ``set_from_proto()`` and ``get_as_proto()`` are fundamental as they allow the interaction with Google's Protocol Buffers library.
24 | 
25 | -------------
26 | State Classes
27 | -------------
28 | 
29 | .. doxygenclass:: State::UniLS
30 |     :project: bayesmix
31 |     :members:
32 | 
33 | .. doxygenclass:: State::MultiLS
34 |     :project: bayesmix
35 |     :members:
36 | 
37 | .. doxygenclass:: State::FA
38 |     :project: bayesmix
39 |     :members:
40 |     :protected-members:
41 | 


--------------------------------------------------------------------------------
/docs/updaters.rst:
--------------------------------------------------------------------------------
 1 | bayesmix/hierarchies/updaters
 2 | 
 3 | Updaters
 4 | ========
 5 | 
 6 | An ``Updater`` implements the machinery to provide a sampling from the full conditional distribution of a given hierarchy.
 7 | 
 8 | The only operation performed is ``draw`` that samples from the full conditional, either exactly or via Markov chain Monte Carlo.
 9 | 
10 | .. doxygenclass:: AbstractUpdater
11 |     :project: bayesmix
12 |     :members:
13 | 
14 | --------------
15 | Code Structure
16 | --------------
17 | 
18 | We distinguish between semi-conjugate updaters and the metropolis-like updaters.
19 | 
20 | 
21 | Semi Conjugate Updaters
22 | -----------------------
23 | 
24 | A semi-conjugate updater can be used when the full conditional distribution has the same form of the prior. Therefore, to sample from the full conditional, it is sufficient to call the ``draw`` method of the prior, but with an updated set of hyperparameters.
25 | 
26 | The class ``SemiConjugateUpdater`` defines the API
27 | 
28 | .. doxygenclass:: SemiConjugateUpdater
29 |     :project: bayesmix
30 |     :members:
31 | 
32 | Classes inheriting from this one should only implement the ``compute_posterior_hypers(...)`` member function.
33 | 
34 | 
35 | Metropolis-like Updaters
36 | ------------------------
37 | 
38 | A Metropolis updater uses the Metropolis-Hastings algorithm (or its variations) to sample from the full conditional density.
39 | 
40 | .. doxygenclass:: MetropolisUpdater
41 |     :project: bayesmix
42 |     :members:
43 | 
44 | 
45 | Classes inheriting from this one should only implement the ``sample_proposal(...)`` method, which samples from the porposal distribution, and the ``proposal_lpdf`` one, which evaluates the proposal density log-probability density function.
46 | 
47 | ---------------
48 | Updater Classes
49 | ---------------
50 | 
51 | .. doxygenclass:: RandomWalkUpdater
52 |     :project: bayesmix
53 |     :members:
54 | .. doxygenclass:: MalaUpdater
55 |     :project: bayesmix
56 |     :members:
57 | .. doxygenclass:: NNIGUpdater
58 |     :project: bayesmix
59 |     :members:
60 |     :protected-members:
61 | .. doxygenclass:: NNxIGUpdater
62 |     :project: bayesmix
63 |     :members:
64 |     :protected-members:
65 | .. doxygenclass:: NNWUpdater
66 |     :project: bayesmix
67 |     :members:
68 |     :protected-members:
69 | .. doxygenclass:: MNIGUpdater
70 |     :project: bayesmix
71 |     :members:
72 |     :protected-members:
73 | .. doxygenclass:: FAUpdater
74 |     :project: bayesmix
75 |     :members:
76 |     :protected-members:
77 | 


--------------------------------------------------------------------------------
/docs/utils.rst:
--------------------------------------------------------------------------------
 1 | bayesmix/utils
 2 | 
 3 | Utils
 4 | =====
 5 | 
 6 | Collection of miscellaneous auxiliary tools for the library.
 7 | 
 8 | --------------------
 9 | Clustering utilities
10 | --------------------
11 | .. doxygenfile:: cluster_utils.h
12 |    :project: bayesmix
13 | 
14 | ------------------------------
15 | Distribution-related utilities
16 | ------------------------------
17 | .. doxygenfile:: distributions.h
18 |    :project: bayesmix
19 | 
20 | ----------------------------------------------
21 | ``Eigen`` matrix manipulation utilities
22 | ----------------------------------------------
23 | .. doxygenfile:: eigen_utils.h
24 |    :project: bayesmix
25 | 
26 | --------------------------------
27 | ``Eigen`` input-output utilities
28 | --------------------------------
29 | .. doxygenfile:: io_utils.h
30 |    :project: bayesmix
31 | 
32 | -----------------------------------
33 | ``protobuf`` input-output utilities
34 | -----------------------------------
35 | .. doxygenfile:: proto_utils.h
36 |    :project: bayesmix
37 | 
38 | -----------
39 | RNG wrapper
40 | -----------
41 | .. doxygenfile:: rng.h
42 |    :project: bayesmix
43 | 


--------------------------------------------------------------------------------
/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.13.0)
 2 | project(examples_bayesmix)
 3 | 
 4 | add_executable(run_gamma $<TARGET_OBJECTS:bayesmix>
 5 |     gamma_hierarchy/run_gamma_gamma.cc
 6 |     gamma_hierarchy/gammagamma_hierarchy.h
 7 |     gamma_hierarchy/gamma_likelihood.h
 8 |     gamma_hierarchy/gamma_prior_model.h
 9 |     gamma_hierarchy/gammagamma_updater.h
10 | )
11 | 
12 | target_include_directories(run_gamma PUBLIC ${INCLUDE_PATHS})
13 | target_link_libraries(run_gamma PUBLIC
14 |   ${LINK_LIBRARIES})
15 | target_compile_options(run_gamma PUBLIC ${COMPILE_OPTIONS})
16 | 


--------------------------------------------------------------------------------
/examples/fa_hierarchy/in/algo.asciipb:
--------------------------------------------------------------------------------
 1 | ##### GENERIC SETTINGS FOR ALL ALGORITHMS #####
 2 | # Algorithm ID string, e.g. "Neal2"
 3 | algo_id: "Neal8"
 4 | 
 5 | # RNG initial seed: any nonnegative integer
 6 | rng_seed: 20201124
 7 | 
 8 | # Number of iterations of the algorithm
 9 | iterations: 1100
10 | 
11 | # Number of initial iterations discarded by the algorithm
12 | burnin: 100
13 | 
14 | # Number of clusters in which data will be first initialized
15 | # (NOTE: If you wish to initialize one datum per cluster, please write 0.)
16 | # (NOTE: This value is ONLY used for initialization, and it may be overwritten
17 | #  by certain mixing objects, such as LogSBMixing. Please check a mixing's
18 | #  initialize() function to know for sure whether or not it will override this
19 | #  value.)
20 | init_num_clusters: 3
21 | 
22 | 
23 | ##### ALGORITHM-SPECIFIC SETTINGS #####
24 | # Neal8 number of auxiliary blocks
25 | # (NOTE: 3 is the recommended value in most cases, please change it only if you
26 | #  know what you're doing.)
27 | neal8_n_aux: 3
28 | 


--------------------------------------------------------------------------------
/examples/fa_hierarchy/in/dp_gamma.asciipb:
--------------------------------------------------------------------------------
1 | gamma_prior {
2 |   totalmass_prior {
3 |     shape: 4.0
4 |     rate: 2.0
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/examples/fa_hierarchy/in/fa.asciipb:
--------------------------------------------------------------------------------
 1 | fixed_values {
 2 | 	    #Automatic initialization if size of parameters is 0. Use the syntax below to set hyperparameters manually.
 3 |   mutilde: {
 4 |     size:0
 5 |     data:[]
 6 |     #size:20
 7 |     #data:[50.71368, 51.54430, 52.06730, 52.86145, 53.39870, 54.14019, 54.92412, 55.46906, 56.30326, 57.10029, 57.64240, 58.27382, 58.84905, 59.47209, 60.35402, 60.87941, 61.67874, 62.22960, 63.06052, 63.57241]
 8 |     }
 9 |   beta: {
10 |     size:0
11 |     data:[]
12 |     #size:20
13 |     #data:[35.67803, 31.78040, 31.89471, 32.05035, 27.65942, 29.61652, 24.34434, 25.66590, 24.47986, 25.06988, 24.66126, 29.17324, 23.63442, 23.63596, 24.57538, 22.61197, 25.00478, 28.55595, 25.03113, 25.07533]
14 |     }
15 |   phi: 0.01
16 |   alpha0: 5
17 |   q: 5
18 | }
19 | 


--------------------------------------------------------------------------------
/examples/fa_hierarchy/out/.gitignore:
--------------------------------------------------------------------------------
 1 | # Several image formats
 2 | *.png
 3 | *.svg
 4 | *.pdf
 5 | *.jpg
 6 | *.jpeg
 7 | *.eps
 8 | # Output files
 9 | *.csv
10 | *.recordio
11 | 


--------------------------------------------------------------------------------
/examples/fa_hierarchy/run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 |   build/run_mcmc \
 4 |   --algo-params-file examples/fa_hierarchy/in/algo.asciipb \
 5 |   --hier-type FA --hier-args examples/fa_hierarchy/in/fa.asciipb \
 6 |   --mix-type DP --mix-args examples/fa_hierarchy/in/dp_gamma.asciipb \
 7 |   --coll-name examples/fa_hierarchy/out/chains.recordio \
 8 |   --data-file examples/fa_hierarchy/in/data.csv \
 9 |   --grid-file examples/fa_hierarchy/in/data.csv \
10 |   --dens-file examples/fa_hierarchy/out/density_file.csv \
11 |   --n-cl-file examples/fa_hierarchy/out/numclust.csv \
12 |   --clus-file examples/fa_hierarchy/out/clustering.csv \
13 |   --best-clus-file examples/fa_hierarchy/out/best_clustering.csv
14 | 


--------------------------------------------------------------------------------
/examples/gamma_hierarchy/gammagamma_hierarchy.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_GAMMA_GAMMA_HIERARCHY_H_
 2 | #define BAYESMIX_HIERARCHIES_GAMMA_GAMMA_HIERARCHY_H_
 3 | 
 4 | #include "gamma_likelihood.h"
 5 | #include "gamma_prior_model.h"
 6 | #include "gammagamma_updater.h"
 7 | #include "hierarchy_id.pb.h"
 8 | #include "src/hierarchies/base_hierarchy.h"
 9 | 
10 | class GammaGammaHierarchy
11 |     : public BaseHierarchy<GammaGammaHierarchy, GammaLikelihood,
12 |                            GammaPriorModel> {
13 |  public:
14 |   GammaGammaHierarchy(double shape_, double rate_alpha_, double rate_beta_) {
15 |     auto prior =
16 |         std::make_shared<GammaPriorModel>(shape_, rate_alpha_, rate_beta_);
17 |     set_prior(prior);
18 |   };
19 |   ~GammaGammaHierarchy() = default;
20 | 
21 |   bayesmix::HierarchyId get_id() const override {
22 |     return bayesmix::HierarchyId::UNKNOWN_HIERARCHY;
23 |   }
24 | 
25 |   void set_default_updater() {
26 |     updater = std::make_shared<GammaGammaUpdater>();
27 |   }
28 | 
29 |   void initialize_state() override {
30 |     // Get hypers
31 |     auto hypers = prior->get_hypers();
32 |     // Initialize likelihood state
33 |     State::Gamma state;
34 |     state.shape = prior->get_shape();
35 |     state.rate = hypers.rate_alpha / hypers.rate_beta;
36 |     like->set_state(state);
37 |   };
38 | 
39 |   double marg_lpdf(ProtoHypersPtr hier_params,
40 |                    const Eigen::RowVectorXd &datum) const override {
41 |     throw(
42 |         std::runtime_error("marg_lpdf() not implemented for this hierarchy"));
43 |     return 0;
44 |   }
45 | };
46 | 
47 | #endif  // BAYESMIX_HIERARCHIES_GAMMA_GAMMA_HIERARCHY_H_
48 | 


--------------------------------------------------------------------------------
/examples/gamma_hierarchy/gammagamma_updater.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_GAMMA_GAMMA_UPDATER_H_
 2 | #define BAYESMIX_HIERARCHIES_GAMMA_GAMMA_UPDATER_H_
 3 | 
 4 | #include "gamma_likelihood.h"
 5 | #include "gamma_prior_model.h"
 6 | #include "src/hierarchies/updaters/semi_conjugate_updater.h"
 7 | 
 8 | class GammaGammaUpdater
 9 |     : public SemiConjugateUpdater<GammaLikelihood, GammaPriorModel> {
10 |  public:
11 |   GammaGammaUpdater() = default;
12 |   ~GammaGammaUpdater() = default;
13 | 
14 |   bool is_conjugate() const override { return true; };
15 | 
16 |   ProtoHypersPtr compute_posterior_hypers(AbstractLikelihood& like,
17 |                                           AbstractPriorModel& prior) override;
18 | 
19 |   std::shared_ptr<AbstractUpdater> clone() const override;
20 | };
21 | 
22 | /* DEFINITIONS */
23 | AbstractUpdater::ProtoHypersPtr GammaGammaUpdater::compute_posterior_hypers(
24 |     AbstractLikelihood& like, AbstractPriorModel& prior) {
25 |   // Likelihood and Prior downcast
26 |   auto& likecast = downcast_likelihood(like);
27 |   auto& priorcast = downcast_prior(prior);
28 | 
29 |   // Getting required quantities from likelihood and prior
30 |   int card = likecast.get_card();
31 |   double data_sum = likecast.get_data_sum();
32 |   double ndata = likecast.get_ndata();
33 |   double shape = priorcast.get_shape();
34 |   auto hypers = priorcast.get_hypers();
35 | 
36 |   // No update possible
37 |   if (card == 0) {
38 |     return priorcast.get_hypers_proto();
39 |   }
40 |   // Compute posterior hyperparameters
41 |   double rate_alpha_new = hypers.rate_alpha + shape * ndata;
42 |   double rate_beta_new = hypers.rate_beta + data_sum;
43 | 
44 |   // Proto conversion
45 |   ProtoHypers out;
46 |   out.mutable_general_state()->mutable_data()->Add(rate_alpha_new);
47 |   out.mutable_general_state()->mutable_data()->Add(rate_beta_new);
48 |   return std::make_shared<ProtoHypers>(out);
49 | }
50 | 
51 | std::shared_ptr<AbstractUpdater> GammaGammaUpdater::clone() const {
52 |     auto out = std::make_shared<GammaGammaUpdater>(static_cast<GammaGammaUpdater const &>(*this));
53 |     out->clear_hypers();
54 |     return out;
55 |   }
56 | 
57 | #endif  // BAYESMIX_HIERARCHIES_GAMMA_GAMMA_UPDATER_H_
58 | 


--------------------------------------------------------------------------------
/examples/gamma_hierarchy/run_gamma_gamma.cc:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "gammagamma_hierarchy.h"
 4 | #include "src/includes.h"
 5 | 
 6 | Eigen::MatrixXd simulate_data(const unsigned int ndata) {
 7 |   Eigen::MatrixXd data(ndata, 1);
 8 |   auto& rng = bayesmix::Rng::Instance().get();
 9 |   for (int i = 0; i < ndata; i++) {
10 |     if (stan::math::uniform_rng(0, 1, rng) < 0.5) {
11 |       data(i, 0) = stan::math::gamma_rng(1, 5, rng);
12 |     } else {
13 |       data(i, 0) = stan::math::gamma_rng(1, 0.5, rng);
14 |     }
15 |   }
16 |   return data;
17 | }
18 | 
19 | int main() {
20 |   auto hier = std::make_shared<GammaGammaHierarchy>(1.0, 2.0, 2.0);
21 | 
22 |   bayesmix::DPPrior mix_prior;
23 |   double totalmass = 1.0;
24 |   mix_prior.mutable_fixed_value()->set_totalmass(totalmass);
25 |   auto mixing = MixingFactory::Instance().create_object("DP");
26 |   mixing->get_mutable_prior()->CopyFrom(mix_prior);
27 |   mixing->set_num_components(5);
28 | 
29 |   auto algo = AlgorithmFactory::Instance().create_object("Neal8");
30 |   MemoryCollector* coll = new MemoryCollector();
31 | 
32 |   Eigen::MatrixXd data = simulate_data(50);
33 |   algo->set_mixing(mixing);
34 |   algo->set_data(data);
35 |   algo->set_hierarchy(hier);
36 | 
37 |   bayesmix::AlgorithmParams params;
38 |   params.set_algo_id("Neal8");
39 |   params.set_rng_seed(0);
40 |   params.set_burnin(1000);
41 |   params.set_iterations(2000);
42 |   params.set_init_num_clusters(10);
43 |   params.set_neal8_n_aux(1);
44 | 
45 |   algo->read_params_from_proto(params);
46 |   algo->run(coll);
47 | 
48 |   delete coll;
49 | }
50 | 


--------------------------------------------------------------------------------
/examples/tutorial/2dplot.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | build/plot_mcmc \
 4 |   --grid-file resources/datasets/faithful_grid.csv \
 5 |   --dens-file resources/2d/density_2d.csv \
 6 |   --dens-plot resources/2d/density.png \
 7 |   --n-cl-file resources/2d/numclust_2d.csv \
 8 |   --n-cl-trace-plot resources/2d/traceplot.png \
 9 |   --n-cl-bar-plot  resources/2d/nclus_barplot.png
10 | 


--------------------------------------------------------------------------------
/examples/tutorial/2drun.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | build/run_mcmc \
 4 |   --algo-params-file resources/tutorial/algo.asciipb \
 5 |   --hier-type NNW --hier-args resources/tutorial/nnw_ngiw.asciipb \
 6 |   --mix-type DP --mix-args resources/tutorial/dp_gamma.asciipb \
 7 |   --data-file resources/datasets/faithful.csv \
 8 |   --grid-file resources/datasets/faithful_grid.csv \
 9 |   --coll-name resources/2d/chains_2d.recordio \
10 |   --dens-file resources/2d/density_2d.csv \
11 |   --n-cl-file resources/2d/numclust_2d.csv \
12 |   --clus-file resources/2d/clustering_2d.csv
13 | 


--------------------------------------------------------------------------------
/examples/tutorial/plot.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | build/plot_mcmc \
 4 |   --grid-file resources/tutorial/grid.csv \
 5 |   --dens-file resources/tutorial/out/density.csv \
 6 |   --dens-plot resources/tutorial/out/density.eps \
 7 |   --n-cl-file resources/tutorial/out/numclust.csv \
 8 |   --n-cl-trace-plot resources/tutorial/out/traceplot.eps \
 9 |   --n-cl-bar-plot  resources/tutorial/out/nclus_barplot.png
10 | 


--------------------------------------------------------------------------------
/examples/tutorial/run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | build/run_mcmc \
 4 |   --algo-params-file resources/tutorial/algo.asciipb \
 5 |   --hier-type NNIG --hier-args resources/tutorial/nnig_ngg.asciipb \
 6 |   --mix-type DP --mix-args resources/tutorial/dp_gamma.asciipb \
 7 |   --coll-name resources/tutorial/out/chains.recordio \
 8 |   --data-file resources/tutorial/data.csv \
 9 |   --grid-file resources/tutorial/grid.csv \
10 |   --dens-file resources/tutorial/out/density.csv \
11 |   --n-cl-file resources/tutorial/out/numclust.csv \
12 |   --clus-file resources/tutorial/out/clustering.csv \
13 |   --best-clus-file resources/tutorial/out/best_clustering.csv
14 | 


--------------------------------------------------------------------------------
/install-tbb.bat:
--------------------------------------------------------------------------------
1 | @ECHO off
2 | echo Permanently setting TBB_ROOT to the PATH user environment variable:
3 | 
4 | for /F "tokens=2* delims= " %%f IN ('reg query HKCU\Environment /v PATH ^| findstr /i path') do set OLD_SYSTEM_PATH="%%g"
5 | setx Path %~dp0lib\_deps\math-src\lib\tbb;%OLD_SYSTEM_PATH%
6 | 
7 | echo Please close this shell and open a new shell.
8 | echo This will make the changes to the PATH variable become active.
9 | 


--------------------------------------------------------------------------------
/lib/progressbar/progressbar.h:
--------------------------------------------------------------------------------
 1 | // source: https://github.com/prakhar1989/progress-cpp
 2 | 
 3 | #pragma once
 4 | 
 5 | #include <chrono>
 6 | #include <iostream>
 7 | 
 8 | namespace progresscpp {
 9 | class ProgressBar {
10 |  private:
11 |   unsigned int ticks = 0;
12 | 
13 |   const unsigned int total_ticks;
14 |   const unsigned int bar_width;
15 |   const char complete_char = '=';
16 |   const char incomplete_char = ' ';
17 |   const std::chrono::steady_clock::time_point start_time =
18 |       std::chrono::steady_clock::now();
19 | 
20 |  public:
21 |   ProgressBar(unsigned int total, unsigned int width, char complete,
22 |               char incomplete)
23 |       : total_ticks{total},
24 |         bar_width{width},
25 |         complete_char{complete},
26 |         incomplete_char{incomplete} {}
27 | 
28 |   ProgressBar(unsigned int total, unsigned int width)
29 |       : total_ticks{total}, bar_width{width} {}
30 | 
31 |   unsigned int operator++() { return ++ticks; }
32 | 
33 |   void display() const {
34 |     float progress = (float)ticks / total_ticks;
35 |     int pos = (int)(bar_width * progress);
36 | 
37 |     std::chrono::steady_clock::time_point now =
38 |         std::chrono::steady_clock::now();
39 |     auto time_elapsed =
40 |         std::chrono::duration_cast<std::chrono::milliseconds>(now - start_time)
41 |             .count();
42 | 
43 |     std::cout << "[";
44 | 
45 |     for (int i = 0; i < bar_width; ++i) {
46 |       if (i < pos)
47 |         std::cout << complete_char;
48 |       else if (i == pos)
49 |         std::cout << ">";
50 |       else
51 |         std::cout << incomplete_char;
52 |     }
53 |     std::cout << "] " << int(progress * 100.0) << "% "
54 |               << float(time_elapsed) / 1000.0 << "s\r";
55 |     std::cout.flush();
56 |   }
57 | 
58 |   void done() const {
59 |     display();
60 |     std::cout << std::endl;
61 |   }
62 | };
63 | }  // namespace progresscpp
64 | 


--------------------------------------------------------------------------------
/pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: local
 3 |     hooks:
 4 |       - id: jupyter-nb-clear-output
 5 |         name: jupyter-nb-clear-output
 6 |         files: \.ipynb$
 7 |         stages: [commit]
 8 |         language: system
 9 |         entry: jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace
10 | 
11 |   - repo: https://github.com/pre-commit/pre-commit-hooks
12 |     rev: v4.0.1
13 |     hooks:
14 |       - id: check-added-large-files # prevents giant files from being committed.
15 |       - id: check-case-conflict # checks for files that would conflict in case-insensitive filesystems.
16 |       - id: check-merge-conflict # checks for files that contain merge conflict strings.
17 |       - id: check-yaml # checks yaml files for parseable syntax.
18 |       - id: detect-private-key # detects the presence of private keys.
19 |       - id: end-of-file-fixer # ensures that a file is either empty, or ends with one newline.
20 |       - id: fix-byte-order-marker # removes utf-8 byte order marker.
21 |       - id: mixed-line-ending # replaces or checks mixed line ending.
22 |       - id: requirements-txt-fixer # sorts entries in requirements.txt.
23 |       - id: trailing-whitespace # trims trailing whitespace.
24 | 
25 |   - repo: https://github.com/pre-commit/mirrors-prettier
26 |     rev: v2.4.1
27 |     hooks:
28 |       - id: prettier
29 |         files: \.(js|ts|jsx|tsx|css|less|html|json|markdown|md|yaml|yml)$
30 | 
31 |   - repo: https://github.com/pre-commit/mirrors-clang-format
32 |     rev: v13.0.0
33 |     hooks:
34 |       - id: clang-format
35 | 


--------------------------------------------------------------------------------
/python/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .ipynb_checkpoints/
3 | *.csv
4 | .env
5 | bayesmixpy.egg-info/
6 | 


--------------------------------------------------------------------------------
/python/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/python/__init__.py


--------------------------------------------------------------------------------
/python/bayesmixpy/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['build_bayesmix', 'run_mcmc']
2 | 
3 | from .build_bayesmix import build_bayesmix
4 | from .run import run_mcmc
5 | 


--------------------------------------------------------------------------------
/python/bayesmixpy/build_bayesmix.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pathlib
 3 | import subprocess
 4 | import sys
 5 | 
 6 | from distutils.spawn import find_executable
 7 | 
 8 | from dotenv import set_key
 9 | 
10 | from .shell_utils import get_env_file, run_shell
11 | 
12 | HERE = os.path.dirname(os.path.realpath(__file__))
13 | path = pathlib.Path(HERE)
14 | BAYESMIX_HOME = os.environ.get("BAYESMIX_HOME", path.resolve().parents[1])
15 | 
16 | py2to3 = find_executable("2to3")
17 | PROTO_DIR = os.path.join(path, "proto/")
18 | 
19 | 
20 | def set_bayesmix_env(run_path):
21 |     env_file = get_env_file()
22 |     if not os.path.exists(env_file):
23 |         open(env_file, mode='a').close()
24 | 
25 |     set_key(env_file, "BAYESMIX_EXE", run_path)
26 | 
27 | 
28 | def build_bayesmix(nproc=1, build_dirname="build"):
29 |     """
30 |     Builds the BayesMix executable. After the build, if no error has occurred,
31 |     it prints out the path to the executable. Save the path into the environment
32 |     variable BAYESMIX_EXE.
33 | 
34 |     Parameters
35 |     ----------
36 | 
37 |     nproc : int
38 |         Number of processes to use for parallel compilation.
39 |     """
40 |     print("Building the Bayesmix executable")
41 |     build_dir = os.path.join(BAYESMIX_HOME, build_dirname)
42 |     os.makedirs(build_dir, exist_ok=True)
43 |     cmake_cmd = "cmake .. -DDISABLE_BENCHMARKS=TRUE -DDISABLE_TESTS=TRUE " + \
44 |         "-DDISABLE_PLOTS=TRUE -DCMAKE_BUILD_TYPE=Release"
45 |     try:
46 |         run_shell(cmake_cmd, cwd=build_dir)
47 |     except subprocess.CalledProcessError as e:
48 |         print(e)
49 |         print("Some error has occurred while building Bayesmix. The library has not"
50 |               " been installed!")
51 |         return
52 | 
53 |     run_cmd = "make run_mcmc -j{}".format(nproc)
54 |     try:
55 |         run_shell(run_cmd, cwd=build_dir)
56 |     except subprocess.CalledProcessError as e:
57 |         print(e)
58 |         print("Some error has occurred while building Bayesmix. The library has not"
59 |               " been installed!")
60 |         return
61 | 
62 |     set_bayesmix_env("{0}/{1}".format(build_dir, "run_mcmc"))
63 | 
64 |     two_to_three_command = [
65 |             py2to3, "--output-dir={0}".format(PROTO_DIR), "-W", "-n", PROTO_DIR]
66 |     print("********* CALLING 2to3 ***********")
67 |     print(" ".join(two_to_three_command))
68 |     if subprocess.call(two_to_three_command) != 0:
69 |         sys.exit(-1)
70 | 
71 |     return True
72 | 
73 | if __name__ == '__main__':
74 |     build_bayesmix(4)
75 | 


--------------------------------------------------------------------------------
/python/bayesmixpy/io_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | from google.protobuf.internal.decoder import _DecodeVarint32
 4 | 
 5 | 
 6 | def _is_file(a: str):
 7 |     out = False
 8 |     try:
 9 |         p = Path(a)
10 |         out = p.exists() and p.is_file()
11 |     except Exception as e:
12 |         out = False
13 |     return out
14 | 
15 | def maybe_print_proto_to_file(maybe_proto: str,
16 |                          proto_name: str = None,
17 |                          out_dir: str = None):
18 |     """If maybe_proto is a file, returns the file name.
19 |     If maybe_proto is a string representing a message, prints the message to
20 |     a file and returns the file name.
21 |     """
22 |     if _is_file(maybe_proto):
23 |         return maybe_proto
24 | 
25 |     proto_file = os.path.join(out_dir, proto_name + ".asciipb")
26 | 
27 |     with open(proto_file, "w") as f:
28 |         print(maybe_proto, file=f)
29 | 
30 |     return proto_file
31 | 
32 | def read_many_protos_from_file(filename, MsgType):
33 |     out = []
34 |     with open(filename, "rb") as fp:
35 |         buf = fp.read()
36 | 
37 |     n = 0
38 |     while n < len(buf):
39 |         msg_len, new_pos = _DecodeVarint32(buf, n)
40 |         n = new_pos
41 |         msg_buf = buf[n:n+msg_len]
42 |         try:
43 |             msg = MsgType()
44 |             msg.ParseFromString(msg_buf)
45 |             out.append(msg)
46 |             n += msg_len
47 |         except Exception as e:
48 |             break
49 | 
50 |     return out
51 | 


--------------------------------------------------------------------------------
/python/bayesmixpy/proto/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/python/bayesmixpy/proto/__init__.py


--------------------------------------------------------------------------------
/python/bayesmixpy/shell_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | 
 4 | HERE = os.path.dirname(os.path.realpath(__file__))
 5 | 
 6 | 
 7 | def run_shell(cmd, flush_startswith=None, cwd=None):
 8 |     proc = subprocess.Popen(
 9 |             cmd.split(),
10 |             bufsize=1,
11 |             stdin=subprocess.DEVNULL,
12 |             stdout=subprocess.PIPE,
13 |             stderr=subprocess.STDOUT,
14 |             env=os.environ,
15 |             universal_newlines=True,
16 |             cwd=cwd)
17 | 
18 |     while proc.poll() is None:
19 |         if proc.stdout is not None:
20 |             line = proc.stdout.readline()
21 |             line = line.strip()
22 |             if flush_startswith and \
23 |                     line.startswith(flush_startswith):
24 |                 print("\r{0}".format(line), end=' ', flush=True)
25 |             else:
26 |                 print("{0}".format(line))
27 | 
28 | 
29 | def get_env_file():
30 |     return os.path.join(HERE, ".env")
31 | 


--------------------------------------------------------------------------------
/python/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |   "setuptools >= 40.9.0",
4 |   "wheel",
5 | ]
6 | build-backend = "setuptools.build_meta"
7 | 


--------------------------------------------------------------------------------
/python/requirements.txt:
--------------------------------------------------------------------------------
1 | 2to3
2 | matplotlib>=2.0.1
3 | numpy>=1.18.4
4 | protobuf==3.19.5
5 | python-dotenv>=0.20.0
6 | 


--------------------------------------------------------------------------------
/python/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/python/scripts/__init__.py


--------------------------------------------------------------------------------
/python/scripts/populate_benchmark_datasets.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | 
 4 | multivariate_dims = [2, 4, 8]
 5 | N_BY_CLUS = 10
 6 | BASE_PATH = os.path.join("resources", "benchmarks", "datasets")
 7 | BASE_CHAIN_PATH = os.path.join("resources", "benchmarks", "chains")
 8 | 
 9 | if __name__ == '__main__':
10 |     os.makedirs(BASE_PATH, exist_ok=True)
11 |     os.makedirs(BASE_CHAIN_PATH, exist_ok=True)
12 | 
13 |     np.random.seed(2021)
14 | 
15 |     univ_y = np.concatenate(
16 |         [np.random.normal(loc=-5, size=N_BY_CLUS),
17 |          np.random.normal(loc=5, size=N_BY_CLUS)])
18 | 
19 |     fname = os.path.join(BASE_PATH, "univariate_gaussian.csv")
20 |     np.savetxt(fname, univ_y, delimiter=',')
21 | 
22 |     for d in multivariate_dims:
23 |         multiv_y = np.vstack(
24 |             [np.random.normal(loc=-5, size=(N_BY_CLUS, d)),
25 |              np.random.normal(loc=5, size=(N_BY_CLUS, d))])
26 | 
27 |         fname = os.path.join(
28 |             BASE_PATH, "multi_gaussian_dim_{0}.csv".format(d))
29 |         np.savetxt(fname, multiv_y, delimiter=',')
30 | 


--------------------------------------------------------------------------------
/python/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name                          = bayesmixpy
 3 | version                       = 0.0.1
 4 | license                       = Apache 2.0
 5 | license_files                 = LICENSE
 6 | author                        = Mario Beraha
 7 | author_email                  = berahamario@gmail.com
 8 | description                   = BAYESMIXPY: A Python interface to BayesMix.
 9 | long_description              = file: README.md
10 | long_description_content_type = text/markdown
11 | url                           = https://github.com/bayesmix-dev/bayesmix/tree/master/python
12 | classifiers =
13 |     Programming Language :: Python :: 3
14 | 
15 | [options]
16 | packages = find:
17 | python_requires = >=3.6
18 | 


--------------------------------------------------------------------------------
/python/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import setuptools
 3 | import site
 4 | import sys
 5 | site.ENABLE_USER_SITE = '--user' in sys.argv[1:]
 6 | 
 7 | __version__ = "0.0.1"
 8 | folder = os.path.dirname(__file__)
 9 | path = os.path.join(folder, 'requirements.txt')
10 | install_requires = []
11 | if os.path.exists(path):
12 |   with open(path) as fp:
13 |     install_requires = [line.strip() for line in fp]
14 | 
15 | 
16 | setuptools.setup(version=__version__,
17 |                  install_requires=install_requires)
18 | 


--------------------------------------------------------------------------------
/python/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/python/tests/__init__.py


--------------------------------------------------------------------------------
/python/tests/test_build.py:
--------------------------------------------------------------------------------
1 | from bayesmixpy import build_bayesmix
2 | 
3 | def test_build():
4 |     success = build_bayesmix()
5 |     assert success == True
6 | 


--------------------------------------------------------------------------------
/resources/.gitignore:
--------------------------------------------------------------------------------
1 | # Resources subfolders
2 | csv
3 | asciipb
4 | 


--------------------------------------------------------------------------------
/resources/2d/.gitignore:
--------------------------------------------------------------------------------
 1 | # Several image formats
 2 | *.png
 3 | *.svg
 4 | *.pdf
 5 | *.jpg
 6 | *.jpeg
 7 | *.eps
 8 | # Output files
 9 | *.csv
10 | 


--------------------------------------------------------------------------------
/resources/algo_cond_settings.asciipb:
--------------------------------------------------------------------------------
 1 | ##### GENERIC SETTINGS FOR ALL ALGORITHMS #####
 2 | # Algorithm ID string, e.g. "Neal2"
 3 | algo_id: "BlockedGibbs"
 4 | 
 5 | # RNG initial seed: any nonnegative integer
 6 | rng_seed: 20201124
 7 | 
 8 | # Number of iterations of the algorithm
 9 | iterations: 1000
10 | 
11 | # Number of initial iterations discarded by the algorithm
12 | burnin: 100
13 | 
14 | # Number of clusters in which data will be first initialized
15 | # (NOTE: If you wish to initialize one datum per cluster, please write 0.)
16 | # (NOTE: This value is ONLY used for initialization, and it may be overwritten
17 | #  by certain mixing objects, such as LogSBMixing. Please check a mixing's
18 | #  initialize() function to know for sure whether or not it will override this
19 | #  value.)
20 | init_num_clusters: 3
21 | 


--------------------------------------------------------------------------------
/resources/algo_marg_settings.asciipb:
--------------------------------------------------------------------------------
 1 | ##### GENERIC SETTINGS FOR ALL ALGORITHMS #####
 2 | # Algorithm ID string, e.g. "Neal2"
 3 | algo_id: "Neal3"
 4 | 
 5 | # RNG initial seed: any nonnegative integer
 6 | rng_seed: 20201124
 7 | 
 8 | # Number of iterations of the algorithm
 9 | iterations: 1100
10 | 
11 | # Number of initial iterations discarded by the algorithm
12 | burnin: 100
13 | 
14 | # Number of clusters in which data will be first initialized
15 | # (NOTE: If you wish to initialize one datum per cluster, please write 0.)
16 | # (NOTE: This value is ONLY used for initialization, and it may be overwritten
17 | #  by certain mixing objects, such as LogSBMixing. Please check a mixing's
18 | #  initialize() function to know for sure whether or not it will override this
19 | #  value.)
20 | init_num_clusters: 3
21 | 
22 | 
23 | ##### ALGORITHM-SPECIFIC SETTINGS #####
24 | # Neal8 number of auxiliary blocks
25 | # (NOTE: 3 is the recommended value in most cases, please change it only if you
26 | #  know what you're doing.)
27 | neal8_n_aux: 3
28 | 
29 | ##### SPLIT AND MERGE SETTING #####
30 | # Split and Merge number of restricted GS scans for each MH step.
31 | splitmerge_n_restr_gs_updates: 5
32 | 
33 | # Split and Merge number of MH updates for each iteration
34 | splitmerge_n_mh_updates: 1
35 | 
36 | # Split and Merge number of full GS scans for each iteration
37 | splitmerge_n_full_gs_updates: 1
38 | 


--------------------------------------------------------------------------------
/resources/bash/cleanup_tbb.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Run this script if build_tbb.py gives a "busy file" error at line 45.
 4 | 
 5 | # Folders from build_tbb.py
 6 | # stan_math_lib=lib/math/lib
 7 | # tbb_root=lib/math/lib/tbb_2019_U8
 8 | # tbb_debug=lib/math/lib/tbb_debug
 9 | # tbb_release=lib/math/lib/tbb_release
10 | # tbb_dir=lib/math/lib/tbb
11 | rm -rf lib/math/lib/tbb_debug/
12 | mv lib/math/lib/tbb_2019_U8/include/ lib/math/lib/tbb
13 | rm -rf lib/math/lib/tbb_2019_U8/
14 | for name in lib/math/lib/tbb_release/*; do
15 |   mv $name lib/math/lib/tbb
16 | done
17 | rm -rf lib/math/lib/tbb_release
18 | 


--------------------------------------------------------------------------------
/resources/bash/push_containers.sh:
--------------------------------------------------------------------------------
1 | #! /bin/sh
2 | docker build -f docker/env/Dockerfile --platform linux/x86_64 -t mberaha/bayesmix-env .
3 | docker push mberaha/bayesmix-env
4 | docker build -f docker/base/Dockerfile --platform linux/x86_64 -t mberaha/bayesmix-base .
5 | docker push mberaha/bayesmix-base
6 | 


--------------------------------------------------------------------------------
/resources/bash/setup_pre_commit.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | pip3 install pre-commit
4 | pre-commit install --config pre-commit-config.yaml
5 | 


--------------------------------------------------------------------------------
/resources/benchmarks/chains/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/resources/benchmarks/chains/__init__.py


--------------------------------------------------------------------------------
/resources/benchmarks/default_algo_params.asciipb:
--------------------------------------------------------------------------------
 1 | rng_seed: 20210329
 2 | 
 3 | iterations: 11000
 4 | 
 5 | burnin: 100
 6 | 
 7 | init_num_clusters: 5
 8 | 
 9 | neal8_n_aux: 3
10 | 


--------------------------------------------------------------------------------
/resources/datasets/dde_covs_grid.csv:
--------------------------------------------------------------------------------
1 | 95
2 | 


--------------------------------------------------------------------------------
/resources/datasets/galaxy.csv:
--------------------------------------------------------------------------------
 1 | 9.172
 2 | 9.35
 3 | 9.483
 4 | 9.558
 5 | 9.775
 6 | 10.227
 7 | 10.406
 8 | 16.084
 9 | 16.17
10 | 18.419
11 | 18.552
12 | 18.6
13 | 18.927
14 | 19.052
15 | 19.07
16 | 19.33
17 | 19.343
18 | 19.349
19 | 19.44
20 | 19.473
21 | 19.529
22 | 19.541
23 | 19.547
24 | 19.663
25 | 19.846
26 | 19.856
27 | 19.863
28 | 19.914
29 | 19.918
30 | 19.973
31 | 19.989
32 | 20.166
33 | 20.175
34 | 20.179
35 | 20.196
36 | 20.215
37 | 20.221
38 | 20.415
39 | 20.629
40 | 20.795
41 | 20.821
42 | 20.846
43 | 20.875
44 | 20.986
45 | 21.137
46 | 21.492
47 | 21.701
48 | 21.814
49 | 21.921
50 | 21.96
51 | 22.185
52 | 22.209
53 | 22.242
54 | 22.249
55 | 22.314
56 | 22.374
57 | 22.495
58 | 22.746
59 | 22.747
60 | 22.888
61 | 22.914
62 | 23.206
63 | 23.241
64 | 23.263
65 | 23.484
66 | 23.538
67 | 23.542
68 | 23.666
69 | 23.706
70 | 23.711
71 | 24.129
72 | 24.285
73 | 24.289
74 | 24.366
75 | 24.717
76 | 24.99
77 | 25.633
78 | 26.69
79 | 26.995
80 | 32.065
81 | 32.789
82 | 34.279
83 | 


--------------------------------------------------------------------------------
/resources/docker/base/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM mberaha/bayesmix-env:latest
 2 | 
 3 | # Pull master branch to update bayesmix
 4 | RUN git pull
 5 | 
 6 | # Compile test_bayesmix and run_mcmc
 7 | RUN cd build \
 8 |     && cmake -DDISABLE_PLOTS=ON .. \
 9 |     && make test_bayesmix \
10 |     && make run_mcmc
11 | 
12 | LABEL Name=bayesmix-base Version=0.0.1
13 | 


--------------------------------------------------------------------------------
/resources/docker/env/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rocker/r-ubuntu:latest
 2 | 
 3 | # Update repo and install required packages
 4 | RUN apt-get -y update \
 5 |     && apt-get -y upgrade \
 6 |     && apt-get -y install ccache cmake g++ git make pkg-config
 7 | 
 8 | # Install required python packages
 9 | RUN apt-get -y install python3-pip \
10 |     && python3 -m pip install pytest
11 | 
12 | # Install required R packages (also installing protobuf and protoc v. 3.12)
13 | RUN apt-get -y install r-cran-devtools r-cran-testthat r-cran-rprotobuf
14 | 
15 | # Clone bayesmix-dev/bayesmix repository in /usr/bayesmix
16 | RUN git clone https://github.com/bayesmix-dev/bayesmix.git /usr/bayesmix
17 | 
18 | # Set working directory to /usr/bayesmix
19 | WORKDIR /usr/bayesmix
20 | 
21 | # Compile test_bayesmix and run_mcmc
22 | RUN mkdir build && cd build \
23 |     && cmake -DDISABLE_PLOTS=ON .. \
24 |     && make test_bayesmix \
25 |     && make run_mcmc
26 | 
27 | LABEL Name=bayesmix-env Version=0.0.1
28 | 


--------------------------------------------------------------------------------
/resources/docker/test/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM mberaha/bayesmix-base:latest
 2 | 
 3 | # Pull master branch to update bayesmix
 4 | RUN git pull
 5 | 
 6 | # Set working directory to /usr
 7 | WORKDIR /usr
 8 | 
 9 | # Store current version in /usr/bayesmix-update
10 | COPY . bayesmix-update
11 | 
12 | # Generate and apply patch to updtae bayesmix
13 | RUN diff -ruN -x 'build' -x '_deps' -x '.git' -x '*_pb2.py' bayesmix/ bayesmix-update/ | patch -d bayesmix -p1 \
14 |     && rm -rf bayesmix-update
15 | 
16 | # Set working directory to /usr
17 | WORKDIR /usr/bayesmix
18 | 
19 | # Compile test_bayesmix and run_mcmc after apply changes
20 | RUN cd build \
21 |     && cmake -DDISABLE_PLOTS=ON .. \
22 |     && make test_bayesmix \
23 |     && make run_mcmc
24 | 
25 | # Install bayesmixpy
26 | RUN cd python && python3 -m pip install -e .
27 | 
28 | # Install bayesmixr
29 | RUN cd R && Rscript --vanilla -e "devtools::install('bayesmixr/', quick = T, args = '--clean')"
30 | 
31 | LABEL Name=bayesmix-test Version=0.0.1
32 | 


--------------------------------------------------------------------------------
/resources/patches/matplotplusplus.patch:
--------------------------------------------------------------------------------
 1 | diff --git a/source/3rd_party/CMakeLists.txt b/source/3rd_party/CMakeLists.txt
 2 | index b5656e1..f89b6d1 100644
 3 | --- a/source/3rd_party/CMakeLists.txt
 4 | +++ b/source/3rd_party/CMakeLists.txt
 5 | @@ -114,16 +114,16 @@ if(FFTW_FOUND)
 6 |    target_include_directories(cimg INTERFACE ${FFTW_INCLUDE_DIRS})
 7 |  endif()
 8 | 
 9 | -if (CMAKE_MODULE_PATH)
10 | -  find_package(OpenCV QUIET)
11 | -  if (OpenCV_FOUND)
12 | -    target_compile_definitions(cimg INTERFACE cimg_use_opencv)
13 | -    target_link_libraries(cimg INTERFACE ${OpenCV_LIBRARIES})
14 | -    target_include_directories(cimg INTERFACE ${OpenCV_INCLUDE_DIRS})
15 | -  endif()
16 | -else()
17 | -  message("No CMAKE_MODULE_PATH path for OpenCV configured")
18 | -endif()
19 | +# if (CMAKE_MODULE_PATH)
20 | +#   find_package(OpenCV QUIET)
21 | +#   if (OpenCV_FOUND)
22 | +#     target_compile_definitions(cimg INTERFACE cimg_use_opencv)
23 | +#     target_link_libraries(cimg INTERFACE ${OpenCV_LIBRARIES})
24 | +#     target_include_directories(cimg INTERFACE ${OpenCV_INCLUDE_DIRS})
25 | +#   endif()
26 | +# else()
27 | +#   message("No CMAKE_MODULE_PATH path for OpenCV configured")
28 | +# endif()
29 | 
30 | 
31 |  if(LIBAVCODEC_FOUND AND LIBAVFORMAT_FOUND AND LIBSWSCALE_FOUND AND LIBAVUTIL_FOUND)
32 | 


--------------------------------------------------------------------------------
/resources/tutorial/.gitignore:
--------------------------------------------------------------------------------
1 | # Output folders
2 | out/*.csv
3 | .Rhistory
4 | .RData
5 | plots.R
6 | 


--------------------------------------------------------------------------------
/resources/tutorial/algo.asciipb:
--------------------------------------------------------------------------------
 1 | ##### GENERIC SETTINGS FOR ALL ALGORITHMS #####
 2 | # Algorithm ID string, e.g. "Neal2"
 3 | algo_id: "Neal2"
 4 | 
 5 | # RNG initial seed: any nonnegative integer
 6 | rng_seed: 20201124
 7 | 
 8 | # Number of iterations of the algorithm
 9 | iterations: 1100
10 | 
11 | # Number of initial iterations discarded by the algorithm
12 | burnin: 100
13 | 
14 | # Number of clusters in which data will be first initialized
15 | # (NOTE: If you wish to initialize one datum per cluster, please write 0.)
16 | # (NOTE: This value is ONLY used for initialization, and it may be overwritten
17 | #  by certain mixing objects, such as LogSBMixing. Please check a mixing's
18 | #  initialize() function to know for sure whether or not it will override this
19 | #  value.)
20 | init_num_clusters: 3
21 | 
22 | 
23 | ##### ALGORITHM-SPECIFIC SETTINGS #####
24 | # Neal8 number of auxiliary blocks
25 | # (NOTE: 3 is the recommended value in most cases, please change it only if you
26 | #  know what you're doing.)
27 | neal8_n_aux: 3
28 | 
29 | splitmerge_n_restr_gs_updates: 5
30 | splitmerge_n_mh_updates: 1
31 | splitmerge_n_full_gs_updates: 1
32 | 


--------------------------------------------------------------------------------
/resources/tutorial/dp_gamma.asciipb:
--------------------------------------------------------------------------------
1 | gamma_prior {
2 |   totalmass_prior {
3 |     shape: 4.0
4 |     rate: 2.0
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/resources/tutorial/lapnig_fixed.asciipb:
--------------------------------------------------------------------------------
1 | fixed_values {
2 |   mean: 0
3 |   var: 10
4 |   shape: 2
5 |   scale: 1
6 |   mh_mean_var: 10;
7 |   mh_log_scale_var: 1;
8 | }
9 | 


--------------------------------------------------------------------------------
/resources/tutorial/mfm_fixed.asciipb:
--------------------------------------------------------------------------------
1 | fixed_value {
2 |     lambda: 10
3 |     gamma: 1
4 | }
5 | 


--------------------------------------------------------------------------------
/resources/tutorial/nnig_ngg.asciipb:
--------------------------------------------------------------------------------
 1 | ngg_prior {
 2 |   mean_prior {
 3 |     mean: 5.5
 4 |     var: 2.25
 5 |   }
 6 |   var_scaling_prior {
 7 |     shape: 0.2
 8 |     rate: 0.6
 9 |   }
10 |   shape: 1.5
11 |   scale_prior {
12 |     shape: 4.0
13 |     rate: 2.0
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/resources/tutorial/nnw_ngiw.asciipb:
--------------------------------------------------------------------------------
 1 | ngiw_prior {
 2 |   mean_prior {
 3 |     mean {
 4 |       size: 2
 5 |       data: 5.5
 6 |       data: 5.5
 7 |     }
 8 |     var {
 9 |       rows: 2
10 |       cols: 2
11 |       data: 0.2
12 |       data: 0.0
13 |       data: 0.0
14 |       data: 0.2
15 |     }
16 |   }
17 |   var_scaling_prior {
18 |     shape: 0.2
19 |     rate: 0.6
20 |   }
21 |   deg_free: 5.0
22 |   scale_prior {
23 |     deg_free: 5.0
24 |     scale {
25 |       rows: 2
26 |       cols: 2
27 |       data: 5.0
28 |       data: 0.0
29 |       data: 0.0
30 |       data: 5.0
31 |     }
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/resources/tutorial/out/.gitignore:
--------------------------------------------------------------------------------
 1 | # Several image formats
 2 | *.png
 3 | *.svg
 4 | *.pdf
 5 | *.jpg
 6 | *.jpeg
 7 | *.eps
 8 | # Output files
 9 | *.csv
10 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | target_sources(bayesmix PUBLIC includes.h)
2 | 
3 | add_subdirectory(algorithms)
4 | add_subdirectory(collectors)
5 | add_subdirectory(hierarchies)
6 | add_subdirectory(mixings)
7 | add_subdirectory(runtime)
8 | add_subdirectory(utils)
9 | 


--------------------------------------------------------------------------------
/src/algorithms/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | target_sources(bayesmix
 2 |   PUBLIC
 3 |     base_algorithm.h
 4 |     base_algorithm.cc
 5 |     blocked_gibbs_algorithm.h
 6 |     blocked_gibbs_algorithm.cc
 7 |     conditional_algorithm.h
 8 |     conditional_algorithm.cc
 9 |     marginal_algorithm.h
10 |     marginal_algorithm.cc
11 |     neal2_algorithm.h
12 |     neal2_algorithm.cc
13 |     neal3_algorithm.h
14 |     neal3_algorithm.cc
15 |     neal8_algorithm.h
16 |     neal8_algorithm.cc
17 |     semihdp_sampler.h
18 |     semihdp_sampler.cc
19 |     split_and_merge_algorithm.cc
20 |     split_and_merge_algorithm.h
21 |     slice_sampler.h
22 |     slice_sampler.cc
23 | )
24 | 


--------------------------------------------------------------------------------
/src/algorithms/blocked_gibbs_algorithm.cc:
--------------------------------------------------------------------------------
 1 | #include "blocked_gibbs_algorithm.h"
 2 | 
 3 | #include <stan/math/prim/fun.hpp>
 4 | 
 5 | #include "hierarchy_id.pb.h"
 6 | #include "mixing_id.pb.h"
 7 | #include "src/hierarchies/base_hierarchy.h"
 8 | #include "src/mixings/base_mixing.h"
 9 | #include "src/utils/distributions.h"
10 | #include "src/utils/rng.h"
11 | 
12 | void BlockedGibbsAlgorithm::print_startup_message() const {
13 |   std::string msg = "Running BlockedGibbs algorithm with " +
14 |                     bayesmix::HierarchyId_Name(unique_values[0]->get_id()) +
15 |                     " hierarchies, " +
16 |                     bayesmix::MixingId_Name(mixing->get_id()) + " mixing...";
17 |   std::cout << msg << std::endl;
18 | }
19 | 
20 | void BlockedGibbsAlgorithm::sample_allocations() {
21 |   auto &rng = bayesmix::Rng::Instance().get();
22 |   unsigned int num_components = mixing->get_num_components();
23 |   for (int i = 0; i < data.rows(); i++) {
24 |     // Compute weights
25 |     Eigen::VectorXd logprobas =
26 |         mixing->get_mixing_weights(true, false, mix_covariates.row(i));
27 |     for (int j = 0; j < num_components; j++) {
28 |       logprobas(j) +=
29 |           unique_values[j]->get_like_lpdf(data.row(i), hier_covariates.row(i));
30 |     }
31 |     // Draw a NEW value for datum allocation
32 |     unsigned int c_new =
33 |         bayesmix::categorical_rng(stan::math::softmax(logprobas), rng, 0);
34 |     unsigned int c_old = allocations[i];
35 |     if (c_new != c_old) {
36 |       allocations[i] = c_new;
37 |       // Remove datum from old cluster, add to new
38 |       unique_values[c_old]->remove_datum(
39 |           i, data.row(i), update_hierarchy_params(), hier_covariates.row(i));
40 |       unique_values[c_new]->add_datum(
41 |           i, data.row(i), update_hierarchy_params(), hier_covariates.row(i));
42 |     }
43 |   }
44 | }
45 | 
46 | void BlockedGibbsAlgorithm::sample_unique_values() {
47 |   for (auto &un : unique_values) {
48 |     un->sample_full_cond(!update_hierarchy_params());
49 |   }
50 | }
51 | 
52 | void BlockedGibbsAlgorithm::sample_weights() {
53 |   mixing->update_state(unique_values, allocations);
54 | }
55 | 


--------------------------------------------------------------------------------
/src/algorithms/blocked_gibbs_algorithm.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_ALGORITHMS_BLOCKED_GIBBS_ALGORITHM_H_
 2 | #define BAYESMIX_ALGORITHMS_BLOCKED_GIBBS_ALGORITHM_H_
 3 | 
 4 | #include "algorithm_id.pb.h"
 5 | #include "conditional_algorithm.h"
 6 | 
 7 | //! Template class for the blocked Gibbs sampling algorithm.
 8 | 
 9 | //! This class implement the blocked Gibbs sampling procedure from [1].
10 | //!
11 | //! [1] Ishwaran, H., & James, L. F. (2001). Gibbs sampling methods for
12 | //! stick-breaking priors. Journal of the American Statistical
13 | //! Association, 96(453), 161-173.
14 | 
15 | class BlockedGibbsAlgorithm : public ConditionalAlgorithm {
16 |  public:
17 |   BlockedGibbsAlgorithm() = default;
18 |   ~BlockedGibbsAlgorithm() = default;
19 | 
20 |   bayesmix::AlgorithmId get_id() const override {
21 |     return bayesmix::AlgorithmId::BlockedGibbs;
22 |   }
23 | 
24 |   std::shared_ptr<BaseAlgorithm> clone() const override {
25 |     auto out = std::make_shared<BlockedGibbsAlgorithm>(*this);
26 |     out->set_mixing(mixing->clone());
27 |     out->set_hierarchy(unique_values[0]->deep_clone());
28 |     return out;
29 |   }
30 | 
31 |  protected:
32 |   void print_startup_message() const override;
33 | 
34 |   void sample_allocations() override;
35 | 
36 |   void sample_unique_values() override;
37 | 
38 |   void sample_weights() override;
39 | };
40 | 
41 | #endif  // BAYESMIX_ALGORITHMS_BLOCKED_GIBBS_ALGORITHM_H_
42 | 


--------------------------------------------------------------------------------
/src/algorithms/conditional_algorithm.cc:
--------------------------------------------------------------------------------
 1 | #include "conditional_algorithm.h"
 2 | 
 3 | #include <stan/math/prim/fun.hpp>
 4 | #include <stan/math/rev.hpp>
 5 | 
 6 | #include "algorithm_state.pb.h"
 7 | #include "base_algorithm.h"
 8 | #include "src/collectors/base_collector.h"
 9 | 
10 | Eigen::VectorXd ConditionalAlgorithm::lpdf_from_state(
11 |     const Eigen::MatrixXd &grid, const Eigen::RowVectorXd &hier_covariate,
12 |     const Eigen::RowVectorXd &mix_covariate) {
13 |   // Read mixing state
14 |   unsigned int n_data = curr_state.cluster_allocs_size();
15 |   unsigned int n_clust = curr_state.cluster_states_size();
16 |   mixing->set_state_from_proto(curr_state.mixing_state());
17 |   // Initialize estimate containers
18 |   Eigen::MatrixXd lpdf_local(grid.rows(), n_clust);
19 |   Eigen::VectorXd lpdf_final(grid.rows());
20 |   auto temp_hier = unique_values[0]->clone();
21 |   temp_hier->set_hypers_from_proto(curr_state.hierarchy_hypers());
22 | 
23 |   // Loop over grid points
24 |   for (size_t i = 0; i < grid.rows(); i++) {
25 |     // Get mixing weights for the i-th grid point
26 |     Eigen::VectorXd logweights =
27 |         mixing->get_mixing_weights(true, false, mix_covariate);
28 |     // Loop over clusters
29 |     for (size_t j = 0; j < n_clust; j++) {
30 |       temp_hier->set_state_from_proto(curr_state.cluster_states(j));
31 |       // Get local, single-point estimate
32 |       lpdf_local(i, j) = logweights(j) +
33 |                          temp_hier->get_like_lpdf(grid.row(i), hier_covariate);
34 |     }
35 |     // Final estimate for i-th grid point
36 |     lpdf_final(i) = stan::math::log_sum_exp(lpdf_local.row(i));
37 |   }
38 |   return lpdf_final;
39 | }
40 | 


--------------------------------------------------------------------------------
/src/algorithms/conditional_algorithm.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_ALGORITHMS_CONDITIONAL_ALGORITHM_H_
 2 | #define BAYESMIX_ALGORITHMS_CONDITIONAL_ALGORITHM_H_
 3 | 
 4 | #include <memory>
 5 | #include <stan/math/rev.hpp>
 6 | 
 7 | #include "base_algorithm.h"
 8 | #include "src/collectors/base_collector.h"
 9 | 
10 | /**
11 |  * Template class for a conditional sampler deriving from `BaseAlgorithm`.
12 |  *
13 |  * This template class implements a generic Gibbs sampling conditional
14 |  * algorithm as the child of the `BaseAlgorithm` class.
15 |  * A mixture model sampled from a conditional algorithm can be expressed as
16 |  *
17 |  * \f[
18 |  *    x_i \mid c_i, \theta_1, \dots, \theta_k &\sim f(x_i \mid \theta_{c_i}) \\
19 |  *    \theta_1, \dots, \theta_k &\sim G_0 \\
20 |  *    c_1, \dots, c_n \mid w_1, \dots, w_k &\sim \text{Cat}(w_1, \dots, w_k) \\
21 |  *    w_1, \dots, w_k &\sim p(w_1, \dots, w_k)
22 |  * \f]
23 |  *
24 |  * where \f$ f(x \mid \theta_j) \f$ is a density for each value of \f$ \theta_j
25 |  * \f$, \f$ c_i \f$ take values in \f$ \{1, \dots, k\} \f$ and \f$ w_1, \dots,
26 |  * w_k \f$ are nonnegative weights whose sum is a.s. 1, i.e. \f$ p(w_1, \dots,
27 |  * w_k) \f$ is a probability distribution on the k-1 dimensional unit simplex).
28 |  * In this library, each \f$ \theta_j \f$ is represented as an `Hierarchy`
29 |  * object (which inherits from `AbstractHierarchy`), that also holds the
30 |  * information related to the base measure \f$ G \f$ is (see
31 |  * `AbstractHierarchy`). The weights \f$ (w_1, \dots, w_k) \f$ are represented
32 |  * as a `Mixing` object, which inherits from `AbstractMixing`.
33 |  *
34 |  * The state of a conditional algorithm consists of the unique values, the
35 |  * cluster allocations and the mixture weights. The former two are stored in
36 |  * this class, while the weights are stored in the `Mixing` object.
37 |  */
38 | 
39 | class ConditionalAlgorithm : public BaseAlgorithm {
40 |  public:
41 |   ConditionalAlgorithm() = default;
42 |   ~ConditionalAlgorithm() = default;
43 | 
44 |   bool is_conditional() const override { return true; }
45 | 
46 |   Eigen::VectorXd lpdf_from_state(
47 |       const Eigen::MatrixXd &grid, const Eigen::RowVectorXd &hier_covariate,
48 |       const Eigen::RowVectorXd &mix_covariate) override;
49 | 
50 |  protected:
51 |   //! Performs Gibbs sampling sub-step for all component weights
52 |   virtual void sample_weights() = 0;
53 | 
54 |   void step() override {
55 |     sample_allocations();
56 |     sample_unique_values();
57 |     update_hierarchy_hypers();
58 |     sample_weights();
59 |   }
60 | };
61 | 
62 | #endif  // BAYESMIX_ALGORITHMS_CONDITIONAL_ALGORITHM_H_
63 | 


--------------------------------------------------------------------------------
/src/algorithms/load_algorithms.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_ALGORITHMS_LOAD_ALGORITHMS_H_
 2 | #define BAYESMIX_ALGORITHMS_LOAD_ALGORITHMS_H_
 3 | 
 4 | #include <functional>
 5 | #include <memory>
 6 | 
 7 | #include "algorithm_id.pb.h"
 8 | #include "base_algorithm.h"
 9 | #include "blocked_gibbs_algorithm.h"
10 | #include "neal2_algorithm.h"
11 | #include "neal3_algorithm.h"
12 | #include "neal8_algorithm.h"
13 | #include "slice_sampler.h"
14 | #include "split_and_merge_algorithm.h"
15 | #include "src/runtime/factory.h"
16 | 
17 | //! Loads all available `Algorithm` objects into the appropriate factory, so
18 | //! that they are ready to be chosen and used at runtime.
19 | 
20 | template <class AbstractProduct>
21 | using Builder = std::function<std::shared_ptr<AbstractProduct>()>;
22 | 
23 | using AlgorithmFactory = Factory<bayesmix::AlgorithmId, BaseAlgorithm>;
24 | 
25 | __attribute__((constructor)) static void load_algorithms() {
26 |   AlgorithmFactory &factory = AlgorithmFactory::Instance();
27 |   // Initialize factory builders
28 |   Builder<BaseAlgorithm> Neal2builder = []() {
29 |     return std::make_shared<Neal2Algorithm>();
30 |   };
31 |   Builder<BaseAlgorithm> Neal3builder = []() {
32 |     return std::make_shared<Neal3Algorithm>();
33 |   };
34 |   Builder<BaseAlgorithm> Neal8builder = []() {
35 |     return std::make_shared<Neal8Algorithm>();
36 |   };
37 |   Builder<BaseAlgorithm> BlockedGibbsbuilder = []() {
38 |     return std::make_shared<BlockedGibbsAlgorithm>();
39 |   };
40 |   Builder<BaseAlgorithm> SplitAndMergebuilder = []() {
41 |     return std::make_shared<SplitAndMergeAlgorithm>();
42 |   };
43 |   Builder<BaseAlgorithm> SliceBuilder = []() {
44 |     return std::make_shared<SliceSampler>();
45 |   };
46 | 
47 |   factory.add_builder(Neal2Algorithm().get_id(), Neal2builder);
48 |   factory.add_builder(Neal3Algorithm().get_id(), Neal3builder);
49 |   factory.add_builder(Neal8Algorithm().get_id(), Neal8builder);
50 |   factory.add_builder(BlockedGibbsAlgorithm().get_id(), BlockedGibbsbuilder);
51 |   factory.add_builder(SplitAndMergeAlgorithm().get_id(), SplitAndMergebuilder);
52 |   factory.add_builder(SliceSampler().get_id(), SliceBuilder);
53 | }
54 | 
55 | #endif  // BAYESMIX_ALGORITHMS_LOAD_ALGORITHMS_H_
56 | 


--------------------------------------------------------------------------------
/src/algorithms/marginal_algorithm.cc:
--------------------------------------------------------------------------------
 1 | #include "marginal_algorithm.h"
 2 | 
 3 | #include <cassert>
 4 | #include <stan/math/prim/fun.hpp>
 5 | #include <stan/math/rev.hpp>
 6 | 
 7 | #include "algorithm_state.pb.h"
 8 | #include "base_algorithm.h"
 9 | #include "src/collectors/base_collector.h"
10 | 
11 | void MarginalAlgorithm::remove_singleton(const unsigned int idx) {
12 |   // Relabel allocations
13 |   for (auto &c : allocations) {
14 |     if (c > idx) {
15 |       c -= 1;
16 |     }
17 |   }
18 |   // Remove cluster
19 |   unique_values.erase(unique_values.begin() + idx);
20 | }
21 | 
22 | Eigen::VectorXd MarginalAlgorithm::lpdf_from_state(
23 |     const Eigen::MatrixXd &grid, const Eigen::RowVectorXd &hier_covariate,
24 |     const Eigen::RowVectorXd &mix_covariate) {
25 |   // Read mixing state
26 |   unsigned int n_data = curr_state.cluster_allocs_size();
27 |   unsigned int n_clust = curr_state.cluster_states_size();
28 |   mixing->set_state_from_proto(curr_state.mixing_state());
29 |   // Initialize estimate containers
30 |   Eigen::MatrixXd lpdf_local(grid.rows(), n_clust + 1);
31 |   Eigen::VectorXd lpdf_final(grid.rows());
32 |   auto temp_hier = unique_values[0]->clone();
33 |   temp_hier->set_hypers_from_proto(curr_state.hierarchy_hypers());
34 | 
35 |   for (size_t j = 0; j < n_clust; j++) {
36 |     // Get hierarchy and mass values
37 |     temp_hier->set_state_from_proto(curr_state.cluster_states(j));
38 |     double mass_ex = mixing->get_mass_existing_cluster(
39 |         n_data, n_clust, true, false, temp_hier, mix_covariate);
40 |     // Get local, single-point estimate
41 |     lpdf_local.col(j) =
42 |         temp_hier->like_lpdf_grid(grid, hier_covariate).array() + mass_ex;
43 |   }
44 |   double mass_new = mixing->get_mass_new_cluster(n_data, n_clust, true, false,
45 |                                                  mix_covariate);
46 |   lpdf_local.col(n_clust) =
47 |       lpdf_marginal_component(temp_hier, grid, hier_covariate).array() +
48 |       mass_new;
49 |   // Loop over grid points
50 |   for (size_t i = 0; i < grid.rows(); i++) {
51 |     // Final estimate for i-th grid point
52 |     lpdf_final(i) = stan::math::log_sum_exp(lpdf_local.row(i));
53 |     assert(!isnan(lpdf_final(i)));
54 |   }
55 |   return lpdf_final;
56 | }
57 | 


--------------------------------------------------------------------------------
/src/algorithms/neal2_algorithm.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_ALGORITHMS_NEAL2_ALGORITHM_H_
 2 | #define BAYESMIX_ALGORITHMS_NEAL2_ALGORITHM_H_
 3 | 
 4 | #include <memory>
 5 | #include <stan/math/rev.hpp>
 6 | 
 7 | #include "algorithm_id.pb.h"
 8 | #include "marginal_algorithm.h"
 9 | #include "src/hierarchies/base_hierarchy.h"
10 | 
11 | //! Template class for Neal's algorithm 2 for conjugate hierarchies
12 | 
13 | //! This class implements Neal's Gibbs sampling algorithm 2 from Neal (2000)
14 | //! that generates a Markov chain on the clustering of the provided data.
15 | //!
16 | //! This algorithm requires the use of a `ConjugateHierarchy` object.
17 | 
18 | class Neal2Algorithm : public MarginalAlgorithm {
19 |  public:
20 |   Neal2Algorithm() = default;
21 |   ~Neal2Algorithm() = default;
22 | 
23 |   bool requires_conjugate_hierarchy() const override { return true; }
24 | 
25 |   bayesmix::AlgorithmId get_id() const override {
26 |     return bayesmix::AlgorithmId::Neal2;
27 |   }
28 | 
29 |   std::shared_ptr<BaseAlgorithm> clone() const override {
30 |     auto out = std::make_shared<Neal2Algorithm>(*this);
31 |     out->set_mixing(mixing->clone());
32 |     out->set_hierarchy(unique_values[0]->deep_clone());
33 |     return out;
34 |   }
35 | 
36 |  protected:
37 |   void print_startup_message() const override;
38 | 
39 |   void sample_allocations() override;
40 | 
41 |   void sample_unique_values() override;
42 | 
43 |   Eigen::VectorXd lpdf_marginal_component(
44 |       const std::shared_ptr<AbstractHierarchy> hier,
45 |       const Eigen::MatrixXd &grid,
46 |       const Eigen::RowVectorXd &covariate) const override;
47 | 
48 |   //! Computes prior component of allocation sampling masses for given datum
49 |   //! @param data_idx Index of the considered data point
50 |   //! @return         Allocation weights for the clusters
51 |   virtual Eigen::VectorXd get_cluster_prior_mass(
52 |       const unsigned int data_idx) const;
53 | 
54 |   //! Computes likelihood component of alloc. sampling masses for given datum
55 |   //! @param data_idx Index of the considered data point
56 |   //! @return         Allocation weights for the clusters
57 |   virtual Eigen::VectorXd get_cluster_lpdf(const unsigned int data_idx) const;
58 | };
59 | 
60 | #endif  // BAYESMIX_ALGORITHMS_NEAL2_ALGORITHM_H_
61 | 


--------------------------------------------------------------------------------
/src/algorithms/neal3_algorithm.cc:
--------------------------------------------------------------------------------
 1 | #include "neal3_algorithm.h"
 2 | 
 3 | #include <memory>
 4 | #include <stan/math/rev.hpp>
 5 | 
 6 | #include "hierarchy_id.pb.h"
 7 | #include "mixing_id.pb.h"
 8 | #include "src/hierarchies/base_hierarchy.h"
 9 | 
10 | void Neal3Algorithm::print_startup_message() const {
11 |   std::string msg = "Running Neal3 algorithm with " +
12 |                     bayesmix::HierarchyId_Name(unique_values[0]->get_id()) +
13 |                     " hierarchies, " +
14 |                     bayesmix::MixingId_Name(mixing->get_id()) + " mixing...";
15 |   std::cout << msg << std::endl;
16 | }
17 | 
18 | Eigen::VectorXd Neal3Algorithm::get_cluster_lpdf(
19 |     const unsigned int data_idx) const {
20 |   unsigned int n_data = data.rows();
21 |   unsigned int n_clust = unique_values.size();
22 |   Eigen::VectorXd loglpdf(n_clust + 1);
23 |   for (size_t j = 0; j < n_clust; j++) {
24 |     // Probability of being assigned to an already existing cluster
25 |     loglpdf(j) = unique_values[j]->conditional_pred_lpdf(
26 |         data.row(data_idx), hier_covariates.row(data_idx));
27 |   }
28 |   // Probability of being assigned to a newly created cluster
29 |   loglpdf(n_clust) = unique_values[0]->prior_pred_lpdf(
30 |       data.row(data_idx), hier_covariates.row(data_idx));
31 |   return loglpdf;
32 | }
33 | 


--------------------------------------------------------------------------------
/src/algorithms/neal3_algorithm.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_ALGORITHMS_NEAL3_ALGORITHM_H_
 2 | #define BAYESMIX_ALGORITHMS_NEAL3_ALGORITHM_H_
 3 | 
 4 | #include "algorithm_id.pb.h"
 5 | #include "neal2_algorithm.h"
 6 | 
 7 | //! Template class for Neal's algorithm 3 for conjugate hierarchies
 8 | 
 9 | //! This class implements Neal's Gibbs sampling algorithm 3 from Neal (2000)
10 | //! that generates a Markov chain on the clustering of the provided data.
11 | //!
12 | //! This algorithm requires the use of a `ConjugateHierarchy` object.
13 | //! Algorithm 3 from Neal (2000) is almost identical to Algorithm 2, except
14 | //! that conjugacy is further exploied by marginalizing the unique values
15 | //! from the state when updating the cluster allocations, which leads to
16 | //! improved efficiency in terms of effective sample size, but might require
17 | //! longer runtimes.
18 | //! For more information, please refer to the `Neal2Algorithm` class, as well
19 | //! as `BaseAlgorithm` and `MarginalAlgorithm` on which it is based.
20 | 
21 | class Neal3Algorithm : public Neal2Algorithm {
22 |  public:
23 |   Neal3Algorithm() = default;
24 |   ~Neal3Algorithm() = default;
25 | 
26 |   bayesmix::AlgorithmId get_id() const override {
27 |     return bayesmix::AlgorithmId::Neal3;
28 |   }
29 | 
30 |   std::shared_ptr<BaseAlgorithm> clone() const override {
31 |     auto out = std::make_shared<Neal3Algorithm>(*this);
32 |     out->set_mixing(mixing->clone());
33 |     out->set_hierarchy(unique_values[0]->deep_clone());
34 |     return out;
35 |   }
36 | 
37 |  protected:
38 |   void print_startup_message() const override;
39 | 
40 |   bool update_hierarchy_params() override { return true; }
41 | 
42 |   Eigen::VectorXd get_cluster_lpdf(const unsigned int data_idx) const override;
43 | };
44 | 
45 | #endif  // BAYESMIX_ALGORITHMS_NEAL3_ALGORITHM_H_
46 | 


--------------------------------------------------------------------------------
/src/algorithms/neal8_algorithm.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_ALGORITHMS_NEAL8_ALGORITHM_H_
 2 | #define BAYESMIX_ALGORITHMS_NEAL8_ALGORITHM_H_
 3 | 
 4 | #include <memory>
 5 | #include <stan/math/rev.hpp>
 6 | #include <vector>
 7 | 
 8 | #include "algorithm_id.pb.h"
 9 | #include "neal2_algorithm.h"
10 | 
11 | //! Template class for Neal's algorithm 8 for conjugate hierarchies
12 | 
13 | //! This class implements Neal's Gibbs sampling algorithm 8 from Neal (2000)
14 | //! that generates a Markov chain on the clustering of the provided data.
15 | //!
16 | //! It extends Neal's algorithm 2 to also deal with cases when the
17 | //! kernel/likelihood f(x | phi) is not conjugate to G, thanks to the
18 | //! introduction of additional, auxiliary unique values.
19 | 
20 | class Neal8Algorithm : public Neal2Algorithm {
21 |  public:
22 |   // DESTRUCTOR AND CONSTRUCTORS
23 |   Neal8Algorithm() = default;
24 |   ~Neal8Algorithm() = default;
25 | 
26 |   bool requires_conjugate_hierarchy() const override { return false; }
27 | 
28 |   //! Returns number of auxiliary blocks
29 |   unsigned int get_n_aux() const { return n_aux; }
30 | 
31 |   //! Sets number of auxiliary blocks
32 |   void set_n_aux(const unsigned int n_aux_) {
33 |     if (n_aux_ == 0) {
34 |       throw std::invalid_argument("Number of auxiliary block must be > 0");
35 |     }
36 |     n_aux = n_aux_;
37 |   }
38 | 
39 |   bayesmix::AlgorithmId get_id() const override {
40 |     return bayesmix::AlgorithmId::Neal8;
41 |   }
42 | 
43 |   void read_params_from_proto(
44 |       const bayesmix::AlgorithmParams &params) override;
45 | 
46 |   std::shared_ptr<BaseAlgorithm> clone() const override {
47 |     auto out = std::make_shared<Neal8Algorithm>(*this);
48 |     out->set_mixing(mixing->clone());
49 |     out->set_hierarchy(unique_values[0]->deep_clone());
50 |     return out;
51 |   }
52 | 
53 |  protected:
54 |   void initialize() override;
55 | 
56 |   void print_startup_message() const override;
57 | 
58 |   void sample_allocations() override;
59 | 
60 |   Eigen::VectorXd lpdf_marginal_component(
61 |       const std::shared_ptr<AbstractHierarchy> hier,
62 |       const Eigen::MatrixXd &grid,
63 |       const Eigen::RowVectorXd &covariate) const override;
64 | 
65 |   Eigen::VectorXd get_cluster_prior_mass(
66 |       const unsigned int data_idx) const override;
67 | 
68 |   Eigen::VectorXd get_cluster_lpdf(const unsigned int data_idx) const override;
69 | 
70 |   //! Number of auxiliary blocks
71 |   unsigned int n_aux = 3;
72 | 
73 |   //! Vector of auxiliary blocks
74 |   std::vector<std::shared_ptr<AbstractHierarchy>> aux_unique_values;
75 | };
76 | 
77 | #endif  // BAYESMIX_ALGORITHMS_NEAL8_ALGORITHM_H_
78 | 


--------------------------------------------------------------------------------
/src/algorithms/slice_sampler.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_ALGORITHMS_SLICE_SAMPLER_H_
 2 | #define BAYESMIX_ALGORITHMS_SLICE_SAMPLER_H_
 3 | 
 4 | #include "algorithm_id.pb.h"
 5 | #include "conditional_algorithm.h"
 6 | #include "src/mixings/truncated_sb_mixing.h"
 7 | 
 8 | //! This class implement the efficnet slice sampler from [1].
 9 | //!
10 | //! [1] Kalli, M., Griffin, J. E., & Walker, S. G. (2011).
11 | //!     Slice sampling mixture models. Statistics and Computing.
12 | 
13 | class SliceSampler : public ConditionalAlgorithm {
14 |  public:
15 |   SliceSampler() = default;
16 |   ~SliceSampler() = default;
17 | 
18 |   void initialize() override;
19 | 
20 |   void step() override;
21 | 
22 |   bayesmix::AlgorithmId get_id() const override {
23 |     return bayesmix::AlgorithmId::Slice;
24 |   }
25 | 
26 |   std::shared_ptr<BaseAlgorithm> clone() const override {
27 |     auto out = std::make_shared<SliceSampler>(*this);
28 |     out->set_mixing(mixing->clone());
29 |     out->set_hierarchy(unique_values[0]->deep_clone());
30 |     return out;
31 |   }
32 | 
33 |   void sample_slice();
34 | 
35 |  protected:
36 |   void print_startup_message() const override;
37 | 
38 |   void sample_allocations() override;
39 | 
40 |   void sample_unique_values() override;
41 | 
42 |   void sample_weights() override;
43 | 
44 |   Eigen::VectorXd slice_u;
45 | 
46 |   std::shared_ptr<TruncatedSBMixing> mixing;
47 | };
48 | 
49 | #endif  // BAYESMIX_ALGORITHMS_SLICE_SAMPLER_H_
50 | 


--------------------------------------------------------------------------------
/src/collectors/.gitignore:
--------------------------------------------------------------------------------
1 | # Protocol Buffers implementation files (version-dependent)
2 | *.pb.cc
3 | *.pb.h
4 | 


--------------------------------------------------------------------------------
/src/collectors/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | target_sources(bayesmix
2 |   PUBLIC
3 |     base_collector.h
4 |     file_collector.h
5 |     file_collector.cc
6 |     memory_collector.h
7 |     memory_collector.cc
8 | )
9 | 


--------------------------------------------------------------------------------
/src/collectors/memory_collector.cc:
--------------------------------------------------------------------------------
 1 | #include "memory_collector.h"
 2 | 
 3 | void MemoryCollector::collect(const google::protobuf::Message& state) {
 4 |   std::string s;
 5 |   state.SerializeToString(&s);
 6 |   chain.push_back(s);
 7 |   size++;
 8 | }
 9 | 
10 | void MemoryCollector::get_state(const unsigned int i,
11 |                                 google::protobuf::Message* out) {
12 |   out->ParseFromString(chain[i]);
13 | }
14 | 
15 | void MemoryCollector::reset() { curr_iter = 0; }
16 | 
17 | bool MemoryCollector::next_state(google::protobuf::Message* const out) {
18 |   if (curr_iter == size) {
19 |     return false;
20 |   }
21 |   out->ParseFromString(chain[curr_iter]);
22 |   curr_iter++;
23 |   return true;
24 | }
25 | 


--------------------------------------------------------------------------------
/src/hierarchies/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | target_sources(bayesmix
 2 |   PUBLIC
 3 |     abstract_hierarchy.h
 4 |     base_hierarchy.h
 5 |     nnig_hierarchy.h
 6 |     nnxig_hierarchy.h
 7 |     nnw_hierarchy.h
 8 |     lin_reg_uni_hierarchy.h
 9 |     fa_hierarchy.h
10 |     lapnig_hierarchy.h
11 | )
12 | 
13 | add_subdirectory(likelihoods)
14 | add_subdirectory(priors)
15 | add_subdirectory(updaters)
16 | 


--------------------------------------------------------------------------------
/src/hierarchies/lapnig_hierarchy.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_LAPNIG_HIERARCHY_H_
 2 | #define BAYESMIX_HIERARCHIES_LAPNIG_HIERARCHY_H_
 3 | 
 4 | #include "base_hierarchy.h"
 5 | #include "hierarchy_id.pb.h"
 6 | #include "likelihoods/laplace_likelihood.h"
 7 | #include "priors/nxig_prior_model.h"
 8 | #include "updaters/mala_updater.h"
 9 | 
10 | /**
11 |  * Laplace Normal-InverseGamma hierarchy for univariate data.
12 |  *
13 |  * This class represents a hierarchical model where data are distributed
14 |  * according to a Laplace likelihood (see the `LaplaceLikelihood` class for
15 |  * deatils). The likelihood parameters have a Normal x InverseGamma centering
16 |  * distribution (see the `NxIGPriorModel` class for details). That is:
17 |  *
18 |  * \f[
19 |  *    f(x_i \mid \mu,\sigma^2) &= Laplace(\mu,\sqrt{\sigma^2/2})\\
20 |  *    \mu &\sim N(\mu_0,\eta^2) \\
21 |  *    \sigma^2 &\sim InvGamma(a, b)
22 |  * \f]
23 |  * The state is composed of mean and variance (thus the scale for the Laplace
24 |  * distribution is \f$ \sqrt{\sigma^2/2}) \f$. The state hyperparameters are
25 |  * \f$(mu_0, \sigma^2, a, b)\f$, all scalar values. Note that this hierarchy
26 |  * is NOT conjugate, thus the marginal distribution is not available in closed
27 |  * form.
28 |  */
29 | 
30 | class LapNIGHierarchy
31 |     : public BaseHierarchy<LapNIGHierarchy, LaplaceLikelihood,
32 |                            NxIGPriorModel> {
33 |  public:
34 |   LapNIGHierarchy() = default;
35 |   ~LapNIGHierarchy() = default;
36 | 
37 |   //! Returns the Protobuf ID associated to this class
38 |   bayesmix::HierarchyId get_id() const override {
39 |     return bayesmix::HierarchyId::LapNIG;
40 |   }
41 | 
42 |   //! Sets the default updater algorithm for this hierarchy
43 |   void set_default_updater() { updater = std::make_shared<MalaUpdater>(); }
44 | 
45 |   //! Initializes state parameters to appropriate values
46 |   void initialize_state() override {
47 |     // Get hypers
48 |     auto hypers = prior->get_hypers();
49 |     // Initialize likelihood state
50 |     State::UniLS state;
51 |     state.mean = hypers.mean;
52 |     state.var = hypers.scale / (hypers.shape + 1);
53 |     like->set_state(state);
54 |   };
55 | };
56 | 
57 | #endif  // BAYESMIX_HIERARCHIES_LAPNIG_HIERARCHY_H_
58 | 


--------------------------------------------------------------------------------
/src/hierarchies/likelihoods/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | target_sources(bayesmix PUBLIC
 2 |     likelihood_internal.h
 3 |     abstract_likelihood.h
 4 |     base_likelihood.h
 5 |     uni_norm_likelihood.h
 6 |     uni_norm_likelihood.cc
 7 |     multi_norm_likelihood.h
 8 |     multi_norm_likelihood.cc
 9 |     uni_lin_reg_likelihood.h
10 |     uni_lin_reg_likelihood.cc
11 |     laplace_likelihood.h
12 |     laplace_likelihood.cc
13 |     fa_likelihood.h
14 |     fa_likelihood.cc
15 | )
16 | 
17 | add_subdirectory(states)
18 | 


--------------------------------------------------------------------------------
/src/hierarchies/likelihoods/fa_likelihood.cc:
--------------------------------------------------------------------------------
 1 | #include "fa_likelihood.h"
 2 | 
 3 | #include "src/utils/distributions.h"
 4 | 
 5 | void FALikelihood::clear_summary_statistics() {
 6 |   data_sum = Eigen::VectorXd::Zero(dim);
 7 | }
 8 | 
 9 | double FALikelihood::compute_lpdf(const Eigen::RowVectorXd& datum) const {
10 |   return bayesmix::multi_normal_lpdf_woodbury_chol(
11 |       datum, state.mu, state.psi_inverse, state.cov_wood, state.cov_logdet);
12 | }
13 | 
14 | void FALikelihood::update_sum_stats(const Eigen::RowVectorXd& datum,
15 |                                     bool add) {
16 |   if (add) {
17 |     data_sum += datum;
18 |   } else {
19 |     data_sum -= datum;
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/hierarchies/likelihoods/fa_likelihood.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_LIKELIHOODS_FA_LIKELIHOOD_H_
 2 | #define BAYESMIX_HIERARCHIES_LIKELIHOODS_FA_LIKELIHOOD_H_
 3 | 
 4 | #include <google/protobuf/stubs/casts.h>
 5 | 
 6 | #include <memory>
 7 | #include <stan/math/prim.hpp>
 8 | #include <stan/math/rev.hpp>
 9 | #include <vector>
10 | 
11 | #include "algorithm_state.pb.h"
12 | #include "base_likelihood.h"
13 | #include "states/includes.h"
14 | 
15 | /**
16 |  * A gaussian factor analytic likelihood, using the `State::FA` state.
17 |  * Represents the model:
18 |  *
19 |  * \f[
20 |  *    \bm{y}_1,\dots,\bm{y}_k \stackrel{\small\mathrm{iid}}{\sim} N_p(\bm{\mu},
21 |  * \Sigma + \Lambda\Lambda^T), \f]
22 |  *
23 |  * where Lambda is a \f$ p \times d \f$ matrix, usually \f$ d << p \f$ and \f$
24 |  * \Sigma \f$ is a diagonal matrix. Parameters are stored in a `State::FA`
25 |  * state. We store as summary statistics the sum of the \f$ \bm{y}_i \f$'s, but
26 |  * it is not sufficient for all the updates involved. Therefore, all the
27 |  * observations allocated to a cluster are processed when computing the
28 |  * cluster lpdf.
29 |  */
30 | 
31 | class FALikelihood : public BaseLikelihood<FALikelihood, State::FA> {
32 |  public:
33 |   FALikelihood() = default;
34 |   ~FALikelihood() = default;
35 |   bool is_multivariate() const override { return true; };
36 |   bool is_dependent() const override { return false; };
37 |   void clear_summary_statistics() override;
38 |   void set_dim(unsigned int dim_) {
39 |     dim = dim_;
40 |     clear_summary_statistics();
41 |   };
42 |   unsigned int get_dim() const { return dim; };
43 |   Eigen::VectorXd get_data_sum() const { return data_sum; };
44 | 
45 |  protected:
46 |   double compute_lpdf(const Eigen::RowVectorXd& datum) const override;
47 |   void update_sum_stats(const Eigen::RowVectorXd& datum, bool add) override;
48 | 
49 |   unsigned int dim;
50 |   Eigen::VectorXd data_sum;
51 | };
52 | 
53 | #endif  // BAYESMIX_HIERARCHIES_LIKELIHOODS_FA_LIKELIHOOD_H_
54 | 


--------------------------------------------------------------------------------
/src/hierarchies/likelihoods/laplace_likelihood.cc:
--------------------------------------------------------------------------------
1 | #include "laplace_likelihood.h"
2 | 
3 | double LaplaceLikelihood::compute_lpdf(const Eigen::RowVectorXd &datum) const {
4 |   return stan::math::double_exponential_lpdf(
5 |       datum(0), state.mean, stan::math::sqrt(state.var / 2.0));
6 | }
7 | 


--------------------------------------------------------------------------------
/src/hierarchies/likelihoods/laplace_likelihood.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_LIKELIHOODS_LAPLACE_LIKELIHOOD_H_
 2 | #define BAYESMIX_HIERARCHIES_LIKELIHOODS_LAPLACE_LIKELIHOOD_H_
 3 | 
 4 | #include <google/protobuf/stubs/casts.h>
 5 | 
 6 | #include <memory>
 7 | #include <stan/math/rev.hpp>
 8 | #include <vector>
 9 | 
10 | #include "algorithm_state.pb.h"
11 | #include "base_likelihood.h"
12 | #include "states/includes.h"
13 | 
14 | /**
15 |  * A univariate Laplace likelihood, using the `State::UniLS` state. Represents
16 |  * the model:
17 |  *
18 |  * \f[
19 |  *    y_1,\dots,y_k \mid \mu, \sigma^2 \stackrel{\small\mathrm{iid}}{\sim}
20 |  * Laplace(\mu,\sigma^2), \f]
21 |  *
22 |  * where \f$ \mu \f$ is the mean and center of the distribution
23 |  * and \f$ \sigma^2 \f$ is the variance. The scale parameter \f$ \lambda \f$ is
24 |  * then \f$ \sqrt{\sigma^2/2} \f$. These parameters are stored in a
25 |  * `State::UniLS` state. Since the Laplace likelihood does not have sufficient
26 |  * statistics other than the whole sample, the `update_sum_stats()` method does
27 |  * nothing.
28 |  */
29 | 
30 | class LaplaceLikelihood
31 |     : public BaseLikelihood<LaplaceLikelihood, State::UniLS> {
32 |  public:
33 |   LaplaceLikelihood() = default;
34 |   ~LaplaceLikelihood() = default;
35 |   bool is_multivariate() const override { return false; };
36 |   bool is_dependent() const override { return false; };
37 |   void clear_summary_statistics() override { return; };
38 | 
39 |   template <typename T>
40 |   T cluster_lpdf_from_unconstrained(
41 |       const Eigen::Matrix<T, Eigen::Dynamic, 1> &unconstrained_params) const {
42 |     assert(unconstrained_params.size() == 2);
43 | 
44 |     T mean = unconstrained_params(0);
45 |     T var = stan::math::positive_constrain(unconstrained_params(1));
46 | 
47 |     T out = 0.;
48 |     for (auto it = cluster_data_idx.begin(); it != cluster_data_idx.end();
49 |          ++it) {
50 |       out += stan::math::double_exponential_lpdf(dataset_ptr->row(*it), mean,
51 |                                                  stan::math::sqrt(var / 2.0));
52 |     }
53 |     return out;
54 |   }
55 | 
56 |  protected:
57 |   double compute_lpdf(const Eigen::RowVectorXd &datum) const override;
58 |   void update_sum_stats(const Eigen::RowVectorXd &datum, bool add) override {
59 |     return;
60 |   };
61 | };
62 | 
63 | #endif  // BAYESMIX_HIERARCHIES_LIKELIHOODS_LAPLACE_LIKELIHOOD_H_
64 | 


--------------------------------------------------------------------------------
/src/hierarchies/likelihoods/multi_norm_likelihood.cc:
--------------------------------------------------------------------------------
 1 | #include "multi_norm_likelihood.h"
 2 | 
 3 | #include "src/utils/distributions.h"
 4 | #include "src/utils/eigen_utils.h"
 5 | #include "src/utils/proto_utils.h"
 6 | 
 7 | double MultiNormLikelihood::compute_lpdf(
 8 |     const Eigen::RowVectorXd &datum) const {
 9 |   return bayesmix::multi_normal_prec_lpdf(datum, state.mean, state.prec_chol,
10 |                                           state.prec_logdet);
11 | }
12 | 
13 | void MultiNormLikelihood::update_sum_stats(const Eigen::RowVectorXd &datum,
14 |                                            bool add) {
15 |   // Check if dim is not defined yet (this usually doesn't happen if the
16 |   // hierarchy is initialized)
17 |   if (!dim) set_dim(datum.size());
18 |   // Updates
19 |   if (add) {
20 |     data_sum += datum.transpose();
21 |     data_sum_squares += datum.transpose() * datum;
22 |   } else {
23 |     data_sum -= datum.transpose();
24 |     data_sum_squares -= datum.transpose() * datum;
25 |   }
26 | }
27 | 
28 | void MultiNormLikelihood::clear_summary_statistics() {
29 |   data_sum = Eigen::VectorXd::Zero(dim);
30 |   data_sum_squares = Eigen::MatrixXd::Zero(dim, dim);
31 | }
32 | 


--------------------------------------------------------------------------------
/src/hierarchies/likelihoods/multi_norm_likelihood.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_LIKELIHOODS_MULTI_NORM_LIKELIHOOD_H_
 2 | #define BAYESMIX_HIERARCHIES_LIKELIHOODS_MULTI_NORM_LIKELIHOOD_H_
 3 | 
 4 | #include <google/protobuf/stubs/casts.h>
 5 | 
 6 | #include <memory>
 7 | #include <stan/math/prim.hpp>
 8 | #include <stan/math/rev.hpp>
 9 | #include <vector>
10 | 
11 | #include "algorithm_state.pb.h"
12 | #include "base_likelihood.h"
13 | #include "states/includes.h"
14 | 
15 | /**
16 |  * A multivariate normal likelihood, using the `State::MultiLS` state.
17 |  * Represents the model:
18 |  *
19 |  * \f[
20 |  *    \bm{y}_1,\dots, \bm{y}_k \stackrel{\small\mathrm{iid}}{\sim}
21 |  * N_p(\bm{\mu}, \Sigma), \f]
22 |  *
23 |  * where \f$ (\bm{\mu}, \Sigma) \f$ are stored in a `State::MultiLS` state.
24 |  * The sufficient statistics stored are the sum of the \f$ \bm{y}_i \f$'s
25 |  * and the sum of \f$ \bm{y}_i^T \bm{y}_i \f$.
26 |  */
27 | 
28 | class MultiNormLikelihood
29 |     : public BaseLikelihood<MultiNormLikelihood, State::MultiLS> {
30 |  public:
31 |   MultiNormLikelihood() = default;
32 |   ~MultiNormLikelihood() = default;
33 |   bool is_multivariate() const override { return true; };
34 |   bool is_dependent() const override { return false; };
35 |   void clear_summary_statistics() override;
36 | 
37 |   void set_dim(unsigned int dim_) {
38 |     dim = dim_;
39 |     clear_summary_statistics();
40 |   };
41 |   unsigned int get_dim() const { return dim; };
42 |   Eigen::VectorXd get_data_sum() const { return data_sum; };
43 |   Eigen::MatrixXd get_data_sum_squares() const { return data_sum_squares; };
44 | 
45 |  protected:
46 |   double compute_lpdf(const Eigen::RowVectorXd &datum) const override;
47 |   void update_sum_stats(const Eigen::RowVectorXd &datum, bool add) override;
48 | 
49 |   unsigned int dim;
50 |   Eigen::VectorXd data_sum;
51 |   Eigen::MatrixXd data_sum_squares;
52 | };
53 | 
54 | #endif  // BAYESMIX_HIERARCHIES_LIKELIHOODS_MULTI_NORM_LIKELIHOOD_H_
55 | 


--------------------------------------------------------------------------------
/src/hierarchies/likelihoods/states/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | target_sources(bayesmix PUBLIC
2 |     includes.h
3 |     base_state.h
4 |     uni_ls_state.h
5 |     multi_ls_state.h
6 |     uni_lin_reg_ls_state.h
7 |     fa_state.h
8 | )
9 | 


--------------------------------------------------------------------------------
/src/hierarchies/likelihoods/states/includes.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_LIKELIHOODS_STATES_INCLUDES_H_
 2 | #define BAYESMIX_HIERARCHIES_LIKELIHOODS_STATES_INCLUDES_H_
 3 | 
 4 | #include "fa_state.h"
 5 | #include "multi_ls_state.h"
 6 | #include "uni_lin_reg_ls_state.h"
 7 | #include "uni_ls_state.h"
 8 | 
 9 | #endif  // BAYESMIX_HIERARCHIES_LIKELIHOODS_STATES_INCLUDES_H_
10 | 


--------------------------------------------------------------------------------
/src/hierarchies/likelihoods/uni_lin_reg_likelihood.cc:
--------------------------------------------------------------------------------
 1 | #include "uni_lin_reg_likelihood.h"
 2 | 
 3 | #include "src/utils/eigen_utils.h"
 4 | 
 5 | void UniLinRegLikelihood::clear_summary_statistics() {
 6 |   mixed_prod = Eigen::VectorXd::Zero(dim);
 7 |   data_sum_squares = 0.0;
 8 |   covar_sum_squares = Eigen::MatrixXd::Zero(dim, dim);
 9 | }
10 | 
11 | double UniLinRegLikelihood::compute_lpdf(
12 |     const Eigen::RowVectorXd &datum,
13 |     const Eigen::RowVectorXd &covariate) const {
14 |   return stan::math::normal_lpdf(
15 |       datum(0), state.regression_coeffs.dot(covariate), sqrt(state.var));
16 | }
17 | 
18 | void UniLinRegLikelihood::update_sum_stats(const Eigen::RowVectorXd &datum,
19 |                                            const Eigen::RowVectorXd &covariate,
20 |                                            bool add) {
21 |   if (add) {
22 |     data_sum_squares += datum(0) * datum(0);
23 |     covar_sum_squares += covariate.transpose() * covariate;
24 |     mixed_prod += datum(0) * covariate.transpose();
25 |   } else {
26 |     data_sum_squares -= datum(0) * datum(0);
27 |     covar_sum_squares -= covariate.transpose() * covariate;
28 |     mixed_prod -= datum(0) * covariate.transpose();
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/hierarchies/likelihoods/uni_lin_reg_likelihood.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_LIKELIHOODS_UNI_LIN_REG_LIKELIHOOD_H_
 2 | #define BAYESMIX_HIERARCHIES_LIKELIHOODS_UNI_LIN_REG_LIKELIHOOD_H_
 3 | 
 4 | #include <google/protobuf/stubs/casts.h>
 5 | 
 6 | #include <memory>
 7 | #include <stan/math/rev.hpp>
 8 | #include <vector>
 9 | 
10 | #include "algorithm_state.pb.h"
11 | #include "base_likelihood.h"
12 | #include "states/includes.h"
13 | 
14 | /**
15 |  * A scalar linear regression model, using the `State::UniLinRegLS` state.
16 |  * Represents the model:
17 |  *
18 |  * \f[
19 |  *    y_i \mid \bm{x}_i, \bm{\beta}, \sigma^2
20 |  * \stackrel{\small\mathrm{ind}}{\sim} N(\bm{x}_i^T\bm{\beta},\sigma^2), \f]
21 |  *
22 |  * where \f$ (\bm{\beta}, \sigma^2) \f$ are stored in a `State::UniLinRegLS`
23 |  * state. The sufficient statistics stored are the sum of \f$ y_i^2 \f$, the
24 |  * sum of \f$ \bm{x}_i^T \bm{x}_i \f$ and the sum of \f$ y_i \bm{x}_i^T \f$.
25 |  */
26 | 
27 | class UniLinRegLikelihood
28 |     : public BaseLikelihood<UniLinRegLikelihood, State::UniLinRegLS> {
29 |  public:
30 |   UniLinRegLikelihood() = default;
31 |   ~UniLinRegLikelihood() = default;
32 |   bool is_multivariate() const override { return false; };
33 |   bool is_dependent() const override { return true; };
34 |   void clear_summary_statistics() override;
35 | 
36 |   // Getters and Setters
37 |   unsigned int get_dim() const { return dim; };
38 |   void set_dim(unsigned int dim_) {
39 |     dim = dim_;
40 |     clear_summary_statistics();
41 |   };
42 |   double get_data_sum_squares() const { return data_sum_squares; };
43 |   Eigen::MatrixXd get_covar_sum_squares() const { return covar_sum_squares; };
44 |   Eigen::VectorXd get_mixed_prod() const { return mixed_prod; };
45 | 
46 |  protected:
47 |   double compute_lpdf(const Eigen::RowVectorXd &datum,
48 |                       const Eigen::RowVectorXd &covariate) const override;
49 |   void update_sum_stats(const Eigen::RowVectorXd &datum,
50 |                         const Eigen::RowVectorXd &covariate,
51 |                         bool add) override;
52 | 
53 |   // Dimension of the coefficients vector
54 |   unsigned int dim;
55 |   // Represents pieces of y^t y
56 |   double data_sum_squares;
57 |   // Represents pieces of X^T X
58 |   Eigen::MatrixXd covar_sum_squares;
59 |   // Represents pieces of X^t y
60 |   Eigen::VectorXd mixed_prod;
61 | };
62 | 
63 | #endif  // BAYESMIX_HIERARCHIES_LIKELIHOODS_UNI_LIN_REG_LIKELIHOOD_H_
64 | 


--------------------------------------------------------------------------------
/src/hierarchies/likelihoods/uni_norm_likelihood.cc:
--------------------------------------------------------------------------------
 1 | #include "uni_norm_likelihood.h"
 2 | 
 3 | double UniNormLikelihood::compute_lpdf(const Eigen::RowVectorXd &datum) const {
 4 |   return stan::math::normal_lpdf(datum(0), state.mean, sqrt(state.var));
 5 | }
 6 | 
 7 | void UniNormLikelihood::update_sum_stats(const Eigen::RowVectorXd &datum,
 8 |                                          bool add) {
 9 |   if (add) {
10 |     data_sum += datum(0);
11 |     data_sum_squares += datum(0) * datum(0);
12 |   } else {
13 |     data_sum -= datum(0);
14 |     data_sum_squares -= datum(0) * datum(0);
15 |   }
16 | }
17 | 
18 | void UniNormLikelihood::clear_summary_statistics() {
19 |   data_sum = 0;
20 |   data_sum_squares = 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/src/hierarchies/likelihoods/uni_norm_likelihood.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_LIKELIHOODS_UNI_NORM_LIKELIHOOD_H_
 2 | #define BAYESMIX_HIERARCHIES_LIKELIHOODS_UNI_NORM_LIKELIHOOD_H_
 3 | 
 4 | #include <google/protobuf/stubs/casts.h>
 5 | 
 6 | #include <memory>
 7 | #include <stan/math/rev.hpp>
 8 | #include <vector>
 9 | 
10 | #include "algorithm_state.pb.h"
11 | #include "base_likelihood.h"
12 | #include "states/includes.h"
13 | 
14 | /**
15 |  * A univariate normal likelihood, using the `State::UniLS` state. Represents
16 |  * the model:
17 |  *
18 |  * \f[
19 |  *    y_1, \dots, y_k \mid \mu, \sigma^2 \stackrel{\small\mathrm{iid}}{\sim}
20 |  * N(\mu, \sigma^2), \f]
21 |  *
22 |  * where \f$ (\mu, \sigma^2) \f$ are stored in a `State::UniLS` state.
23 |  * The sufficient statistics stored are the sum of the \f$ y_i \f$'s and the
24 |  * sum of \f$ y_i^2 \f$.
25 |  */
26 | 
27 | class UniNormLikelihood
28 |     : public BaseLikelihood<UniNormLikelihood, State::UniLS> {
29 |  public:
30 |   UniNormLikelihood() = default;
31 |   ~UniNormLikelihood() = default;
32 |   bool is_multivariate() const override { return false; };
33 |   bool is_dependent() const override { return false; };
34 |   void clear_summary_statistics() override;
35 |   double get_data_sum() const { return data_sum; };
36 |   double get_data_sum_squares() const { return data_sum_squares; };
37 | 
38 |   template <typename T>
39 |   T cluster_lpdf_from_unconstrained(
40 |       const Eigen::Matrix<T, Eigen::Dynamic, 1> &unconstrained_params) const {
41 |     assert(unconstrained_params.size() == 2);
42 |     T mean = unconstrained_params(0);
43 |     T var = stan::math::positive_constrain(unconstrained_params(1));
44 |     T out = -(data_sum_squares - 2 * mean * data_sum + card * mean * mean) /
45 |             (2 * var);
46 |     out -= card * 0.5 * stan::math::log(stan::math::TWO_PI * var);
47 |     return out;
48 |   }
49 | 
50 |  protected:
51 |   double compute_lpdf(const Eigen::RowVectorXd &datum) const override;
52 |   void update_sum_stats(const Eigen::RowVectorXd &datum, bool add) override;
53 | 
54 |   double data_sum = 0;
55 |   double data_sum_squares = 0;
56 | };
57 | 
58 | #endif  // BAYESMIX_HIERARCHIES_LIKELIHOODS_UNI_NORM_LIKELIHOOD_H_
59 | 


--------------------------------------------------------------------------------
/src/hierarchies/load_hierarchies.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_LOAD_HIERARCHIES_H_
 2 | #define BAYESMIX_HIERARCHIES_LOAD_HIERARCHIES_H_
 3 | 
 4 | #include <functional>
 5 | #include <memory>
 6 | 
 7 | #include "abstract_hierarchy.h"
 8 | #include "fa_hierarchy.h"
 9 | #include "hierarchy_id.pb.h"
10 | #include "lapnig_hierarchy.h"
11 | #include "lin_reg_uni_hierarchy.h"
12 | #include "nnig_hierarchy.h"
13 | #include "nnw_hierarchy.h"
14 | #include "nnxig_hierarchy.h"
15 | #include "src/runtime/factory.h"
16 | 
17 | //! Loads all available `Hierarchy` objects into the appropriate factory, so
18 | //! that they are ready to be chosen and used at runtime.
19 | 
20 | template <class AbstractProduct>
21 | using Builder = std::function<std::shared_ptr<AbstractProduct>()>;
22 | 
23 | using HierarchyFactory = Factory<bayesmix::HierarchyId, AbstractHierarchy>;
24 | 
25 | __attribute__((constructor)) static void load_hierarchies() {
26 |   HierarchyFactory &factory = HierarchyFactory::Instance();
27 |   // Initialize factory builders
28 |   Builder<AbstractHierarchy> NNIGbuilder = []() {
29 |     return std::make_shared<NNIGHierarchy>();
30 |   };
31 |   Builder<AbstractHierarchy> NNxIGbuilder = []() {
32 |     return std::make_shared<NNxIGHierarchy>();
33 |   };
34 |   Builder<AbstractHierarchy> NNWbuilder = []() {
35 |     return std::make_shared<NNWHierarchy>();
36 |   };
37 |   Builder<AbstractHierarchy> LinRegUnibuilder = []() {
38 |     return std::make_shared<LinRegUniHierarchy>();
39 |   };
40 |   Builder<AbstractHierarchy> FAbuilder = []() {
41 |     return std::make_shared<FAHierarchy>();
42 |   };
43 |   Builder<AbstractHierarchy> LapNIGbuilder = []() {
44 |     return std::make_shared<LapNIGHierarchy>();
45 |   };
46 | 
47 |   factory.add_builder(NNIGHierarchy().get_id(), NNIGbuilder);
48 |   factory.add_builder(NNxIGHierarchy().get_id(), NNxIGbuilder);
49 |   factory.add_builder(NNWHierarchy().get_id(), NNWbuilder);
50 |   factory.add_builder(LinRegUniHierarchy().get_id(), LinRegUnibuilder);
51 |   factory.add_builder(FAHierarchy().get_id(), FAbuilder);
52 |   factory.add_builder(LapNIGHierarchy().get_id(), LapNIGbuilder);
53 | }
54 | 
55 | #endif  // BAYESMIX_HIERARCHIES_LOAD_HIERARCHIES_H_
56 | 


--------------------------------------------------------------------------------
/src/hierarchies/nnxig_hierarchy.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_NNXIG_HIERARCHY_H_
 2 | #define BAYESMIX_HIERARCHIES_NNXIG_HIERARCHY_H_
 3 | 
 4 | #include "base_hierarchy.h"
 5 | #include "hierarchy_id.pb.h"
 6 | #include "likelihoods/uni_norm_likelihood.h"
 7 | #include "priors/nxig_prior_model.h"
 8 | #include "updaters/nnxig_updater.h"
 9 | 
10 | /**
11 |  * Semi-conjugate Normal Normal x InverseGamma hierarchy for univariate data.
12 |  *
13 |  * This class represents a hierarchical model where data are distributed
14 |  * according to a Normal likelihood (see the `UniNormLikelihood` class for
15 |  * details). The likelihood parameters have a Normal x InverseGamma centering
16 |  * distribution (see the `NxIGPriorModel` class for details). That is:
17 |  *
18 |  * \f[
19 |  *    f(x_i \mid \mu,\sigma^2) &= N(\mu,\sigma^2) \\
20 |  *    \mu &\sim N(\mu_0, \eta^2) \\
21 |  *    \sigma^2 &\sim InvGamma(a, b)
22 |  * \f]
23 |  *
24 |  * The state is composed of mean and variance. The state hyperparameters are
25 |  * \f$ (\mu_0, \eta^2, a, b) \f$, all scalar values. Note that this hierarchy
26 |  * is NOT conjugate, meaning that the marginal distribution is not available
27 |  * in closed form
28 |  */
29 | 
30 | class NNxIGHierarchy
31 |     : public BaseHierarchy<NNxIGHierarchy, UniNormLikelihood, NxIGPriorModel> {
32 |  public:
33 |   NNxIGHierarchy() = default;
34 |   ~NNxIGHierarchy() = default;
35 | 
36 |   //! Returns the Protobuf ID associated to this class
37 |   bayesmix::HierarchyId get_id() const override {
38 |     return bayesmix::HierarchyId::NNxIG;
39 |   }
40 | 
41 |   //! Sets the default updater algorithm for this hierarchy
42 |   void set_default_updater() { updater = std::make_shared<NNxIGUpdater>(); }
43 | 
44 |   //! Initializes state parameters to appropriate values
45 |   void initialize_state() override {
46 |     // Get hypers
47 |     auto hypers = prior->get_hypers();
48 |     // Initialize likelihood state
49 |     State::UniLS state;
50 |     state.mean = hypers.mean;
51 |     state.var = hypers.scale / (hypers.shape + 1);
52 |     like->set_state(state);
53 |   };
54 | };
55 | 
56 | #endif  // BAYESMIX_HIERARCHIES_NNXIG_HIERARCHY_H_
57 | 


--------------------------------------------------------------------------------
/src/hierarchies/priors/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | target_sources(bayesmix PUBLIC
 2 |     prior_model_internal.h
 3 |     abstract_prior_model.h
 4 |     base_prior_model.h
 5 |     hyperparams.h
 6 |     nig_prior_model.h
 7 |     nig_prior_model.cc
 8 |     nxig_prior_model.h
 9 |     nxig_prior_model.cc
10 |     nw_prior_model.h
11 |     nw_prior_model.cc
12 |     mnig_prior_model.h
13 |     mnig_prior_model.cc
14 |     fa_prior_model.h
15 |     fa_prior_model.cc
16 | )
17 | 


--------------------------------------------------------------------------------
/src/hierarchies/priors/fa_prior_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_PRIORS_FA_PRIOR_MODEL_H_
 2 | #define BAYESMIX_HIERARCHIES_PRIORS_FA_PRIOR_MODEL_H_
 3 | 
 4 | #include <memory>
 5 | #include <stan/math/rev.hpp>
 6 | #include <vector>
 7 | 
 8 | #include "base_prior_model.h"
 9 | #include "hierarchy_prior.pb.h"
10 | #include "hyperparams.h"
11 | #include "src/utils/rng.h"
12 | 
13 | /**
14 |  * A priormodel for the factor analyzers likelihood, that is
15 |  *
16 |  * \f[
17 |  *    \bm{\mu} &\sim N_p(\tilde{\bm{\mu}}, \psi I) \\
18 |  *    \Lambda &\sim DL(\alpha) \\
19 |  *    \Sigma &= \mathrm{diag}(\sigma^2_1, \ldots, \sigma^2_p) \\
20 |  *    \sigma^2_j &\stackrel{\small\mathrm{iid}}{\sim} InvGamma(a,b)  \quad
21 |  * j=1,...,p \f]
22 |  *
23 |  * Where \f$ DL \f$ is the Dirichlet-Laplace distribution.
24 |  * See Bhattacharya A., Pati D., Pillai N.S., Dunson D.B. (2015).
25 |  * JASA 110(512), 1479–1490 for details.
26 |  */
27 | 
28 | class FAPriorModel
29 |     : public BasePriorModel<FAPriorModel, State::FA, Hyperparams::FA,
30 |                             bayesmix::FAPrior> {
31 |  public:
32 |   using AbstractPriorModel::ProtoHypers;
33 |   using AbstractPriorModel::ProtoHypersPtr;
34 | 
35 |   FAPriorModel() = default;
36 |   ~FAPriorModel() = default;
37 | 
38 |   double lpdf(const google::protobuf::Message &state_) override;
39 | 
40 |   State::FA sample(ProtoHypersPtr hier_hypers = nullptr) override;
41 | 
42 |   void update_hypers(const std::vector<bayesmix::AlgorithmState::ClusterState>
43 |                          &states) override;
44 | 
45 |   void set_hypers_from_proto(
46 |       const google::protobuf::Message &hypers_) override;
47 | 
48 |   unsigned int get_dim() const { return dim; };
49 | 
50 |   std::shared_ptr<bayesmix::AlgorithmState::HierarchyHypers> get_hypers_proto()
51 |       const override;
52 | 
53 |  protected:
54 |   void initialize_hypers() override;
55 | 
56 |   unsigned int dim;
57 | };
58 | 
59 | #endif  // BAYESMIX_HIERARCHIES_PRIORS_FA_PRIOR_MODEL_H_
60 | 


--------------------------------------------------------------------------------
/src/hierarchies/priors/hyperparams.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_PRIORS_HYPERPARAMS_H_
 2 | #define BAYESMIX_HIERARCHIES_PRIORS_HYPERPARAMS_H_
 3 | 
 4 | #include <stan/math/rev.hpp>
 5 | 
 6 | namespace Hyperparams {
 7 | 
 8 | struct NIG {
 9 |   double mean, var_scaling, shape, scale;
10 | };
11 | 
12 | struct NxIG {
13 |   double mean, var, shape, scale;
14 | };
15 | 
16 | struct NW {
17 |   Eigen::VectorXd mean;
18 |   double var_scaling, deg_free;
19 |   Eigen::MatrixXd scale, scale_inv, scale_chol;
20 | };
21 | 
22 | struct MNIG {
23 |   Eigen::VectorXd mean;
24 |   Eigen::MatrixXd var_scaling, var_scaling_inv;
25 |   double shape, scale;
26 | };
27 | 
28 | struct FA {
29 |   Eigen::VectorXd mutilde, beta;
30 |   double phi, alpha0;
31 |   unsigned int q;
32 | };
33 | 
34 | }  // namespace Hyperparams
35 | 
36 | #endif  // BAYESMIX_HIERARCHIES_PRIORS_HYPERPARAMS_H_
37 | 


--------------------------------------------------------------------------------
/src/hierarchies/priors/mnig_prior_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_PRIORS_MNIG_PRIOR_MODEL_H_
 2 | #define BAYESMIX_HIERARCHIES_PRIORS_MNIG_PRIOR_MODEL_H_
 3 | 
 4 | #include <memory>
 5 | #include <stan/math/rev.hpp>
 6 | #include <vector>
 7 | 
 8 | #include "base_prior_model.h"
 9 | #include "hierarchy_prior.pb.h"
10 | #include "hyperparams.h"
11 | #include "src/utils/rng.h"
12 | 
13 | /**
14 |  * A conjugate prior model for the scalar linear regression likelihood, i.e.
15 |  *
16 |  * \f[
17 |  *      \bm{\beta} \mid \sigma^2 & \sim N_p(\bm{\mu}, \sigma^2 \Lambda^{-1}) \\
18 |  *      \sigma^2 & \sim InvGamma(a,b)
19 |  * \f]
20 |  */
21 | 
22 | class MNIGPriorModel
23 |     : public BasePriorModel<MNIGPriorModel, State::UniLinRegLS,
24 |                             Hyperparams::MNIG, bayesmix::LinRegUniPrior> {
25 |  public:
26 |   using AbstractPriorModel::ProtoHypers;
27 |   using AbstractPriorModel::ProtoHypersPtr;
28 | 
29 |   MNIGPriorModel() = default;
30 |   ~MNIGPriorModel() = default;
31 | 
32 |   double lpdf(const google::protobuf::Message &state_) override;
33 | 
34 |   State::UniLinRegLS sample(ProtoHypersPtr hier_hypers = nullptr) override;
35 | 
36 |   void update_hypers(const std::vector<bayesmix::AlgorithmState::ClusterState>
37 |                          &states) override;
38 | 
39 |   void set_hypers_from_proto(
40 |       const google::protobuf::Message &hypers_) override;
41 | 
42 |   unsigned int get_dim() const { return dim; };
43 | 
44 |   std::shared_ptr<bayesmix::AlgorithmState::HierarchyHypers> get_hypers_proto()
45 |       const override;
46 | 
47 |  protected:
48 |   void initialize_hypers() override;
49 | 
50 |   unsigned int dim;
51 | };
52 | 
53 | #endif  // BAYESMIX_HIERARCHIES_PRIORS_MNIG_PRIOR_MODEL_H_
54 | 


--------------------------------------------------------------------------------
/src/hierarchies/priors/nig_prior_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_PRIORS_NIG_PRIOR_MODEL_H_
 2 | #define BAYESMIX_HIERARCHIES_PRIORS_NIG_PRIOR_MODEL_H_
 3 | 
 4 | #include <memory>
 5 | #include <stan/math/prim.hpp>
 6 | #include <stan/math/rev.hpp>
 7 | #include <vector>
 8 | 
 9 | #include "base_prior_model.h"
10 | #include "hierarchy_prior.pb.h"
11 | #include "hyperparams.h"
12 | #include "src/utils/rng.h"
13 | 
14 | /**
15 |  * A conjugate prior model for the univariate normal likelihood, that is
16 |  *
17 |  * \f[
18 |  *      \mu \mid \sigma^2 &\sim N(\mu_0, \sigma^2 / \lambda) \\
19 |  *      \sigma^2 &\sim InvGamma(a,b)
20 |  * \f]
21 |  *
22 |  * With several possibilies for hyper-priors on \f$ \mu \f$ and \f$ \sigma^2
23 |  * \f$. We have considered a normal prior for \f$ mu0 \f$ and a
24 |  * Normal-Gamma-Gamma for \f$ (mu0, a, b) \f$ in addition to fixing prior
25 |  * hyperparameters
26 |  */
27 | 
28 | class NIGPriorModel
29 |     : public BasePriorModel<NIGPriorModel, State::UniLS, Hyperparams::NIG,
30 |                             bayesmix::NNIGPrior> {
31 |  public:
32 |   using AbstractPriorModel::ProtoHypers;
33 |   using AbstractPriorModel::ProtoHypersPtr;
34 | 
35 |   NIGPriorModel() = default;
36 |   ~NIGPriorModel() = default;
37 | 
38 |   double lpdf(const google::protobuf::Message &state_) override;
39 | 
40 |   template <typename T>
41 |   T lpdf_from_unconstrained(
42 |       const Eigen::Matrix<T, Eigen::Dynamic, 1> &unconstrained_params) const {
43 |     Eigen::Matrix<T, Eigen::Dynamic, 1> constrained_params =
44 |         State::uni_ls_to_constrained(unconstrained_params);
45 |     T log_det_jac = State::uni_ls_log_det_jac(constrained_params);
46 |     T mean = constrained_params(0);
47 |     T var = constrained_params(1);
48 |     T lpdf = stan::math::normal_lpdf(mean, hypers->mean,
49 |                                      sqrt(var / hypers->var_scaling)) +
50 |              stan::math::inv_gamma_lpdf(var, hypers->shape, hypers->scale);
51 | 
52 |     return lpdf + log_det_jac;
53 |   }
54 | 
55 |   State::UniLS sample(ProtoHypersPtr hier_hypers = nullptr) override;
56 | 
57 |   void update_hypers(const std::vector<bayesmix::AlgorithmState::ClusterState>
58 |                          &states) override;
59 | 
60 |   void set_hypers_from_proto(
61 |       const google::protobuf::Message &hypers_) override;
62 | 
63 |   std::shared_ptr<bayesmix::AlgorithmState::HierarchyHypers> get_hypers_proto()
64 |       const override;
65 | 
66 |  protected:
67 |   void initialize_hypers() override;
68 | };
69 | 
70 | #endif  // BAYESMIX_HIERARCHIES_PRIORS_NIG_PRIOR_MODEL_H_
71 | 


--------------------------------------------------------------------------------
/src/hierarchies/priors/nw_prior_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_PRIORS_NW_PRIOR_MODEL_H_
 2 | #define BAYESMIX_HIERARCHIES_PRIORS_NW_PRIOR_MODEL_H_
 3 | 
 4 | #include <memory>
 5 | #include <stan/math/prim.hpp>
 6 | #include <stan/math/rev.hpp>
 7 | #include <vector>
 8 | 
 9 | #include "base_prior_model.h"
10 | #include "hierarchy_prior.pb.h"
11 | #include "hyperparams.h"
12 | #include "src/utils/rng.h"
13 | 
14 | /**
15 |  * A conjugate prior model for the multivariate normal likelihood, that is
16 |  *
17 |  * \f[
18 |  *      \bm{\mu} \mid \Sigma &\sim N_p(\bm{\mu}_0, (\Sigma \lambda)^{-1}) \\
19 |  *      \Sigma & \sim Wishart(\nu_0, \Psi_0)
20 |  * \f]
21 |  *
22 |  * With some options for hyper-priors on \f$ \bm{\mu} \f$ and \f$ \Sigma \f$.
23 |  * We have considered a normal prior for \f$ \bm{\mu}_0 \f$ in addition to
24 |  * fixing prior hyperparameters
25 |  */
26 | 
27 | class NWPriorModel
28 |     : public BasePriorModel<NWPriorModel, State::MultiLS, Hyperparams::NW,
29 |                             bayesmix::NNWPrior> {
30 |  public:
31 |   NWPriorModel() = default;
32 |   ~NWPriorModel() = default;
33 | 
34 |   double lpdf(const google::protobuf::Message &state_) override;
35 | 
36 |   State::MultiLS sample(ProtoHypersPtr hier_hypers = nullptr) override;
37 | 
38 |   void update_hypers(const std::vector<bayesmix::AlgorithmState::ClusterState>
39 |                          &states) override;
40 | 
41 |   void set_hypers_from_proto(
42 |       const google::protobuf::Message &hypers_) override;
43 | 
44 |   void write_prec_to_state(const Eigen::MatrixXd &prec_, State::MultiLS *out);
45 | 
46 |   unsigned int get_dim() const { return dim; };
47 | 
48 |   std::shared_ptr<bayesmix::AlgorithmState::HierarchyHypers> get_hypers_proto()
49 |       const override;
50 | 
51 |  protected:
52 |   void initialize_hypers() override;
53 | 
54 |   unsigned int dim;
55 | };
56 | 
57 | #endif  // BAYESMIX_HIERARCHIES_PRIORS_NW_PRIOR_MODEL_H_
58 | 


--------------------------------------------------------------------------------
/src/hierarchies/priors/nxig_prior_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_PRIORS_NXIG_PRIOR_MODEL_H_
 2 | #define BAYESMIX_HIERARCHIES_PRIORS_NXIG_PRIOR_MODEL_H_
 3 | 
 4 | #include <memory>
 5 | #include <stan/math/prim.hpp>
 6 | #include <stan/math/rev.hpp>
 7 | #include <vector>
 8 | 
 9 | #include "base_prior_model.h"
10 | #include "hierarchy_prior.pb.h"
11 | #include "hyperparams.h"
12 | #include "src/utils/rng.h"
13 | 
14 | /**
15 |  * A semi-conjugate prior model for the univariate normal likelihood, that is
16 |  *
17 |  * \f[
18 |  *      \mu & \sim N(\mu_0, \eta^2) \\
19 |  *      \sigma^2 & \sim InvGamma(a,b)
20 |  * \f]
21 |  */
22 | 
23 | class NxIGPriorModel
24 |     : public BasePriorModel<NxIGPriorModel, State::UniLS, Hyperparams::NxIG,
25 |                             bayesmix::NNxIGPrior> {
26 |  public:
27 |   using AbstractPriorModel::ProtoHypers;
28 |   using AbstractPriorModel::ProtoHypersPtr;
29 | 
30 |   NxIGPriorModel() = default;
31 |   ~NxIGPriorModel() = default;
32 | 
33 |   double lpdf(const google::protobuf::Message &state_) override;
34 | 
35 |   State::UniLS sample(ProtoHypersPtr hier_hypers = nullptr) override;
36 | 
37 |   void update_hypers(const std::vector<bayesmix::AlgorithmState::ClusterState>
38 |                          &states) override;
39 | 
40 |   void set_hypers_from_proto(
41 |       const google::protobuf::Message &hypers_) override;
42 | 
43 |   std::shared_ptr<bayesmix::AlgorithmState::HierarchyHypers> get_hypers_proto()
44 |       const override;
45 | 
46 |  protected:
47 |   void initialize_hypers() override;
48 | };
49 | 
50 | #endif  // BAYESMIX_HIERARCHIES_PRIORS_NXIG_PRIOR_MODEL_H_
51 | 


--------------------------------------------------------------------------------
/src/hierarchies/priors/prior_model_internal.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_PRIORS_PRIOR_MODEL_INTERNAL_H_
 2 | #define BAYESMIX_HIERARCHIES_PRIORS_PRIOR_MODEL_INTERNAL_H_
 3 | 
 4 | //! These functions exploit SFINAE to manage exception handling in all methods
 5 | //! required only if end user wants to rely on Metropolis-like updaters. SFINAE
 6 | //! (Substitution Failure Is Not An Error) is a C++ rule that applies during
 7 | //! overload resolution of function templates: When substituting the explicitly
 8 | //! specified or deduced type for the template parameter fails, the
 9 | //! specialization is discarded from the overload set instead of causing a
10 | //! compile error. This feature is used in template metaprogramming.
11 | 
12 | namespace internal {
13 | 
14 | template <class Prior, typename T>
15 | auto lpdf_from_unconstrained(
16 |     const Prior &prior,
17 |     Eigen::Matrix<T, Eigen::Dynamic, 1> unconstrained_params, int)
18 |     -> decltype(prior.template lpdf_from_unconstrained<T>(
19 |         unconstrained_params)) {
20 |   return prior.template lpdf_from_unconstrained<T>(unconstrained_params);
21 | }
22 | 
23 | template <class Prior, typename T>
24 | auto lpdf_from_unconstrained(
25 |     const Prior &prior,
26 |     Eigen::Matrix<T, Eigen::Dynamic, 1> unconstrained_params, double) -> T {
27 |   throw(std::runtime_error("lpdf_from_unconstrained() not yet implemented"));
28 | }
29 | 
30 | }  // namespace internal
31 | 
32 | #endif  // BAYESMIX_HIERARCHIES_PRIORS_PRIOR_MODEL_INTERNAL_H_
33 | 


--------------------------------------------------------------------------------
/src/hierarchies/updaters/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | target_sources(bayesmix PUBLIC
 2 |     abstract_updater.h
 3 |     semi_conjugate_updater.h
 4 |     nnig_updater.h
 5 |     nnig_updater.cc
 6 |     nnxig_updater.h
 7 |     nnxig_updater.cc
 8 |     nnw_updater.h
 9 |     nnw_updater.cc
10 |     mnig_updater.h
11 |     mnig_updater.cc
12 |     fa_updater.h
13 |     fa_updater.cc
14 |     metropolis_updater.h
15 |     mala_updater.h
16 |     random_walk_updater.h
17 |     target_lpdf_unconstrained.h
18 | )
19 | 


--------------------------------------------------------------------------------
/src/hierarchies/updaters/fa_updater.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_UPDATERS_FA_UPDATER_H_
 2 | #define BAYESMIX_HIERARCHIES_UPDATERS_FA_UPDATER_H_
 3 | 
 4 | #include "abstract_updater.h"
 5 | #include "src/hierarchies/likelihoods/fa_likelihood.h"
 6 | #include "src/hierarchies/likelihoods/states/includes.h"
 7 | #include "src/hierarchies/priors/fa_prior_model.h"
 8 | #include "src/hierarchies/priors/hyperparams.h"
 9 | #include "src/utils/proto_utils.h"
10 | 
11 | //! Updater specific for the `FAHierachy`.
12 | //! See  Bhattacharya, Anirban, and David B. Dunson.
13 | //! "Sparse Bayesian infinite factor models." Biometrika (2011): 291-306.
14 | //! for further details
15 | class FAUpdater : public AbstractUpdater {
16 |  public:
17 |   FAUpdater() = default;
18 |   ~FAUpdater() = default;
19 |   void draw(AbstractLikelihood& like, AbstractPriorModel& prior,
20 |             bool update_params) override;
21 | 
22 |   std::shared_ptr<AbstractUpdater> clone() const override {
23 |     auto out =
24 |         std::make_shared<FAUpdater>(static_cast<FAUpdater const&>(*this));
25 |     out->clear_hypers();
26 |     return out;
27 |   }
28 | 
29 |  protected:
30 |   void sample_eta(State::FA& state, const Hyperparams::FA& hypers,
31 |                   const FALikelihood& like);
32 |   void sample_mu(State::FA& state, const Hyperparams::FA& hypers,
33 |                  const FALikelihood& like);
34 |   void sample_lambda(State::FA& state, const Hyperparams::FA& hypers,
35 |                      const FALikelihood& like);
36 |   void sample_psi(State::FA& state, const Hyperparams::FA& hypers,
37 |                   const FALikelihood& like);
38 | };
39 | 
40 | #endif  // BAYESMIX_HIERARCHIES_UPDATERS_FA_UPDATER_H_
41 | 


--------------------------------------------------------------------------------
/src/hierarchies/updaters/metropolis_updater.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_UPDATERS_METROPOLIS_UPDATER_H_
 2 | #define BAYESMIX_HIERARCHIES_UPDATERS_METROPOLIS_UPDATER_H_
 3 | 
 4 | #include "abstract_updater.h"
 5 | #include "target_lpdf_unconstrained.h"
 6 | 
 7 | //! Base class for updaters using a Metropolis-Hastings algorithm
 8 | //!
 9 | //! This class serves as the base for a CRTP.
10 | //! Children of this class should implement the methods
11 | //!     template <typename F>
12 | //!     Eigen::VectorXd sample_proposal(Eigen::VectorXd curr_state,
13 | //!                                     AbstractLikelihood &like,
14 | //!                                     AbstractPriorModel &prior, F
15 | //!                                     &target_lpdf)
16 | //! and
17 | //!     template <typename F>
18 | //!     double proposal_lpdf(Eigen::VectorXd prop_state,
19 | //!                          Eigen::VectorXd curr_state,
20 | //!                          AbstractLikelihood &like,
21 | //!                          AbstractPriorModel &prior,
22 | //!                          F &target_lpdf)
23 | //! where the template parameter is needed to allow the use of stan's
24 | //! automatic differentiation if the gradient of the full conditional is
25 | //! required.
26 | template <class DerivedUpdater>
27 | class MetropolisUpdater : public AbstractUpdater {
28 |  public:
29 |   //! Samples from the full conditional distribution using a
30 |   //! Metropolis-Hastings step
31 |   void draw(AbstractLikelihood &like, AbstractPriorModel &prior,
32 |             bool update_params) override {
33 |     target_lpdf_unconstrained target_lpdf(&like, &prior);
34 |     Eigen::VectorXd curr_state = like.get_unconstrained_state();
35 |     Eigen::VectorXd prop_state =
36 |         static_cast<DerivedUpdater *>(this)->sample_proposal(
37 |             curr_state, like, prior, target_lpdf);
38 | 
39 |     double log_arate = like.cluster_lpdf_from_unconstrained(prop_state) -
40 |                        like.cluster_lpdf_from_unconstrained(curr_state) +
41 |                        static_cast<DerivedUpdater *>(this)->proposal_lpdf(
42 |                            curr_state, prop_state, like, prior, target_lpdf) -
43 |                        static_cast<DerivedUpdater *>(this)->proposal_lpdf(
44 |                            prop_state, curr_state, like, prior, target_lpdf);
45 | 
46 |     auto &rng = bayesmix::Rng::Instance().get();
47 |     if (std::log(stan::math::uniform_rng(0, 1, rng)) < log_arate) {
48 |       like.set_state_from_unconstrained(prop_state);
49 |     }
50 |   }
51 | };
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/src/hierarchies/updaters/mnig_updater.cc:
--------------------------------------------------------------------------------
 1 | #include "mnig_updater.h"
 2 | 
 3 | AbstractUpdater::ProtoHypersPtr MNIGUpdater::compute_posterior_hypers(
 4 |     AbstractLikelihood& like, AbstractPriorModel& prior) {
 5 |   // Likelihood and Prior downcast
 6 |   auto& likecast = downcast_likelihood(like);
 7 |   auto& priorcast = downcast_prior(prior);
 8 | 
 9 |   // Getting required quantities from likelihood and prior
10 |   int card = likecast.get_card();
11 |   unsigned int dim = likecast.get_dim();
12 |   double data_sum_squares = likecast.get_data_sum_squares();
13 |   Eigen::MatrixXd covar_sum_squares = likecast.get_covar_sum_squares();
14 |   Eigen::MatrixXd mixed_prod = likecast.get_mixed_prod();
15 |   auto hypers = priorcast.get_hypers();
16 | 
17 |   // No update possible
18 |   if (card == 0) {
19 |     return priorcast.get_hypers_proto();
20 |   }
21 | 
22 |   // Compute posterior hyperparameters
23 |   Eigen::VectorXd mean;
24 |   Eigen::MatrixXd var_scaling, var_scaling_inv;
25 |   double shape, scale;
26 | 
27 |   var_scaling = covar_sum_squares + hypers.var_scaling;
28 |   auto llt = var_scaling.llt();
29 |   mean = llt.solve(mixed_prod + hypers.var_scaling * hypers.mean);
30 |   shape = hypers.shape + 0.5 * card;
31 |   scale = hypers.scale +
32 |           0.5 * (data_sum_squares +
33 |                  hypers.mean.transpose() * hypers.var_scaling * hypers.mean -
34 |                  mean.transpose() * var_scaling * mean);
35 | 
36 |   // Proto conversion
37 |   ProtoHypers out;
38 |   bayesmix::to_proto(mean, out.mutable_lin_reg_uni_state()->mutable_mean());
39 |   bayesmix::to_proto(var_scaling,
40 |                      out.mutable_lin_reg_uni_state()->mutable_var_scaling());
41 |   out.mutable_lin_reg_uni_state()->set_shape(shape);
42 |   out.mutable_lin_reg_uni_state()->set_scale(scale);
43 |   return std::make_shared<ProtoHypers>(out);
44 | }
45 | 


--------------------------------------------------------------------------------
/src/hierarchies/updaters/mnig_updater.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_UPDATERS_MNIG_UPDATER_H_
 2 | #define BAYESMIX_HIERARCHIES_UPDATERS_MNIG_UPDATER_H_
 3 | 
 4 | #include "semi_conjugate_updater.h"
 5 | #include "src/hierarchies/likelihoods/uni_lin_reg_likelihood.h"
 6 | #include "src/hierarchies/priors/mnig_prior_model.h"
 7 | 
 8 | /**
 9 |  * Updater specific for the `UniLinRegLikelihood` used in combination
10 |  * with `MNIGPriorModel`, that is the model
11 |  *
12 |  * \f[
13 |  *    y_i \mid \bm{\beta}, \sigma^2 &\stackrel{\small\mathrm{iid}}{\sim}
14 |  * N(\bm{\beta}^T\bm{x}_i, \sigma^2) \\
15 |  *  \bm{\beta} \mid \sigma^2 &\sim N_p(\mu_{0}, \sigma^2 \mathbf{V}^{-1}) \\
16 |  *    \sigma^2 &\sim InvGamma(a, b)
17 |  * \f]
18 |  *
19 |  * It exploits the conjugacy of the model to sample the full conditional of
20 |  * \f$ (\bm{\beta}, \sigma^2) \f$ by calling `MNIGPriorModel::sample` with
21 |  * updated parameters
22 |  */
23 | 
24 | class MNIGUpdater
25 |     : public SemiConjugateUpdater<UniLinRegLikelihood, MNIGPriorModel> {
26 |  public:
27 |   MNIGUpdater() = default;
28 |   ~MNIGUpdater() = default;
29 | 
30 |   bool is_conjugate() const override { return true; };
31 | 
32 |   ProtoHypersPtr compute_posterior_hypers(AbstractLikelihood &like,
33 |                                           AbstractPriorModel &prior) override;
34 | 
35 |   std::shared_ptr<AbstractUpdater> clone() const override {
36 |     auto out =
37 |         std::make_shared<MNIGUpdater>(static_cast<MNIGUpdater const &>(*this));
38 |     out->clear_hypers();
39 |     return out;
40 |   }
41 | };
42 | 
43 | #endif  // BAYESMIX_HIERARCHIES_UPDATERS_MNIG_UPDATER_H_
44 | 


--------------------------------------------------------------------------------
/src/hierarchies/updaters/nnig_updater.cc:
--------------------------------------------------------------------------------
 1 | #include "nnig_updater.h"
 2 | 
 3 | #include "src/hierarchies/likelihoods/states/includes.h"
 4 | #include "src/hierarchies/priors/hyperparams.h"
 5 | 
 6 | AbstractUpdater::ProtoHypersPtr NNIGUpdater::compute_posterior_hypers(
 7 |     AbstractLikelihood& like, AbstractPriorModel& prior) {
 8 |   // Likelihood and Prior downcast
 9 |   auto& likecast = downcast_likelihood(like);
10 |   auto& priorcast = downcast_prior(prior);
11 | 
12 |   // Getting required quantities from likelihood and prior
13 |   int card = likecast.get_card();
14 |   double data_sum = likecast.get_data_sum();
15 |   double data_sum_squares = likecast.get_data_sum_squares();
16 |   auto hypers = priorcast.get_hypers();
17 | 
18 |   // No update possible
19 |   if (card == 0) {
20 |     return priorcast.get_hypers_proto();
21 |   }
22 | 
23 |   // Compute posterior hyperparameters
24 |   double mean, var_scaling, shape, scale;
25 |   double y_bar = data_sum / (1.0 * card);  // sample mean
26 |   double ss = data_sum_squares - card * y_bar * y_bar;
27 |   mean = (hypers.var_scaling * hypers.mean + data_sum) /
28 |          (hypers.var_scaling + card);
29 |   var_scaling = hypers.var_scaling + card;
30 |   shape = hypers.shape + 0.5 * card;
31 |   scale = hypers.scale + 0.5 * ss +
32 |           0.5 * hypers.var_scaling * card * (y_bar - hypers.mean) *
33 |               (y_bar - hypers.mean) / (card + hypers.var_scaling);
34 | 
35 |   // Proto conversion
36 |   ProtoHypers out;
37 |   out.mutable_nnig_state()->set_mean(mean);
38 |   out.mutable_nnig_state()->set_var_scaling(var_scaling);
39 |   out.mutable_nnig_state()->set_shape(shape);
40 |   out.mutable_nnig_state()->set_scale(scale);
41 |   return std::make_shared<ProtoHypers>(out);
42 | }
43 | 


--------------------------------------------------------------------------------
/src/hierarchies/updaters/nnig_updater.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_UPDATERS_NNIG_UPDATER_H_
 2 | #define BAYESMIX_HIERARCHIES_UPDATERS_NNIG_UPDATER_H_
 3 | 
 4 | #include "semi_conjugate_updater.h"
 5 | #include "src/hierarchies/likelihoods/uni_norm_likelihood.h"
 6 | #include "src/hierarchies/priors/nig_prior_model.h"
 7 | 
 8 | /**
 9 |  * Updater specific for the `UniNormLikelihood` used in combination
10 |  * with `NIGPriorModel`, that is the model
11 |  *
12 |  * \f[
13 |  *      y_i \mid \mu, \sigma^2 &\stackrel{\small\mathrm{iid}}{\sim} N(\mu,
14 |  * \sigma^2) \\
15 |  *      \mu \mid \sigma^2 &\sim N(\mu_0, \sigma^2 / \lambda) \\
16 |  *      \sigma^2 &\sim InvGamma(a, b)
17 |  * \f]
18 |  *
19 |  * It exploits the conjugacy of the model to sample the full conditional of
20 |  * \f$ (\mu, \sigma^2) \f$ by calling `NIGPriorModel::sample` with updated
21 |  * parameters
22 |  */
23 | 
24 | class NNIGUpdater
25 |     : public SemiConjugateUpdater<UniNormLikelihood, NIGPriorModel> {
26 |  public:
27 |   NNIGUpdater() = default;
28 |   ~NNIGUpdater() = default;
29 | 
30 |   bool is_conjugate() const override { return true; };
31 | 
32 |   ProtoHypersPtr compute_posterior_hypers(AbstractLikelihood &like,
33 |                                           AbstractPriorModel &prior) override;
34 | 
35 |   std::shared_ptr<AbstractUpdater> clone() const override {
36 |     auto out =
37 |         std::make_shared<NNIGUpdater>(static_cast<NNIGUpdater const &>(*this));
38 |     out->clear_hypers();
39 |     return out;
40 |   }
41 | };
42 | 
43 | #endif  // BAYESMIX_HIERARCHIES_UPDATERS_NNIG_UPDATER_H_
44 | 


--------------------------------------------------------------------------------
/src/hierarchies/updaters/nnw_updater.cc:
--------------------------------------------------------------------------------
 1 | #include "nnw_updater.h"
 2 | 
 3 | #include "algorithm_state.pb.h"
 4 | #include "src/hierarchies/likelihoods/states/includes.h"
 5 | #include "src/hierarchies/priors/hyperparams.h"
 6 | #include "src/utils/proto_utils.h"
 7 | 
 8 | AbstractUpdater::ProtoHypersPtr NNWUpdater::compute_posterior_hypers(
 9 |     AbstractLikelihood& like, AbstractPriorModel& prior) {
10 |   // Likelihood and Prior downcast
11 |   auto& likecast = downcast_likelihood(like);
12 |   auto& priorcast = downcast_prior(prior);
13 | 
14 |   // Getting required quantities from likelihood and prior
15 |   int card = likecast.get_card();
16 |   Eigen::VectorXd data_sum = likecast.get_data_sum();
17 |   Eigen::MatrixXd data_sum_squares = likecast.get_data_sum_squares();
18 |   auto hypers = priorcast.get_hypers();
19 | 
20 |   // No update possible
21 |   if (card == 0) {
22 |     return prior.get_hypers_proto();
23 |   }
24 | 
25 |   // Compute posterior hyperparameters
26 |   Eigen::VectorXd mean;
27 |   double var_scaling, deg_free;
28 |   Eigen::MatrixXd scale, scale_inv, scale_chol;
29 |   var_scaling = hypers.var_scaling + card;
30 |   deg_free = hypers.deg_free + card;
31 |   Eigen::VectorXd mubar = data_sum.array() / card;  // sample mean
32 |   mean = (hypers.var_scaling * hypers.mean + card * mubar) /
33 |          (hypers.var_scaling + card);
34 | 
35 |   // Compute tau_n
36 |   Eigen::MatrixXd tau_temp =
37 |       data_sum_squares - card * mubar * mubar.transpose();
38 |   tau_temp += (card * hypers.var_scaling / (card + hypers.var_scaling)) *
39 |               (mubar - hypers.mean) * (mubar - hypers.mean).transpose();
40 |   scale_inv = tau_temp + hypers.scale_inv;
41 |   scale = stan::math::inverse_spd(scale_inv);
42 |   scale_chol = Eigen::LLT<Eigen::MatrixXd>(scale).matrixU();
43 | 
44 |   // Proto conversion
45 |   ProtoHypers out;
46 |   bayesmix::to_proto(mean, out.mutable_nnw_state()->mutable_mean());
47 |   out.mutable_nnw_state()->set_var_scaling(var_scaling);
48 |   out.mutable_nnw_state()->set_deg_free(deg_free);
49 |   bayesmix::to_proto(scale, out.mutable_nnw_state()->mutable_scale());
50 |   bayesmix::to_proto(scale_chol,
51 |                      out.mutable_nnw_state()->mutable_scale_chol());
52 |   return std::make_shared<ProtoHypers>(out);
53 | }
54 | 


--------------------------------------------------------------------------------
/src/hierarchies/updaters/nnw_updater.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_UPDATERS_NNW_UPDATER_H_
 2 | #define BAYESMIX_HIERARCHIES_UPDATERS_NNW_UPDATER_H_
 3 | 
 4 | #include "semi_conjugate_updater.h"
 5 | #include "src/hierarchies/likelihoods/multi_norm_likelihood.h"
 6 | #include "src/hierarchies/priors/nw_prior_model.h"
 7 | 
 8 | /**
 9 |  * Updater specific for the `MultiNormLikelihood` used in combination
10 |  * with `NWPriorModel`, that is the model
11 |  *
12 |  * \f[
13 |  *      y_i \mid \bm{\mu}, \Sigma &\stackrel{\small\mathrm{iid}}{\sim}
14 |  * N_d(\bm{mu}, \Sigma) \\
15 |  *      \bm{\mu} \mid \Sigma &\sim N_d(\bm{\mu}_0, \Sigma / \lambda) \\
16 |  *      \Sigma^{-1} &\sim Wishart(\nu, \Psi)
17 |  * \f]
18 |  *
19 |  * It exploits the conjugacy of the model to sample the full conditional of
20 |  * \f$ (\bm{\mu}, \Sigma) \f$ by calling `NWPriorModel::sample` with updated
21 |  * parameters.
22 |  */
23 | 
24 | class NNWUpdater
25 |     : public SemiConjugateUpdater<MultiNormLikelihood, NWPriorModel> {
26 |  public:
27 |   NNWUpdater() = default;
28 |   ~NNWUpdater() = default;
29 | 
30 |   bool is_conjugate() const override { return true; };
31 | 
32 |   ProtoHypersPtr compute_posterior_hypers(AbstractLikelihood &like,
33 |                                           AbstractPriorModel &prior) override;
34 | 
35 |   std::shared_ptr<AbstractUpdater> clone() const override {
36 |     auto out =
37 |         std::make_shared<NNWUpdater>(static_cast<NNWUpdater const &>(*this));
38 |     out->clear_hypers();
39 |     return out;
40 |   }
41 | };
42 | 
43 | #endif  // BAYESMIX_HIERARCHIES_UPDATERS_NNW_UPDATER_H_
44 | 


--------------------------------------------------------------------------------
/src/hierarchies/updaters/nnxig_updater.cc:
--------------------------------------------------------------------------------
 1 | #include "nnxig_updater.h"
 2 | 
 3 | #include "src/hierarchies/likelihoods/states/includes.h"
 4 | #include "src/hierarchies/priors/hyperparams.h"
 5 | 
 6 | AbstractUpdater::ProtoHypersPtr NNxIGUpdater::compute_posterior_hypers(
 7 |     AbstractLikelihood& like, AbstractPriorModel& prior) {
 8 |   // Likelihood and Prior downcast
 9 |   auto& likecast = downcast_likelihood(like);
10 |   auto& priorcast = downcast_prior(prior);
11 | 
12 |   // Getting required quantities from likelihood and prior
13 |   auto state = likecast.get_state();
14 |   int card = likecast.get_card();
15 |   double data_sum = likecast.get_data_sum();
16 |   double data_sum_squares = likecast.get_data_sum_squares();
17 |   auto hypers = priorcast.get_hypers();
18 | 
19 |   // No update possible
20 |   if (card == 0) {
21 |     return priorcast.get_hypers_proto();
22 |   }
23 | 
24 |   // Compute posterior hyperparameters
25 |   double mean, var, shape, scale;
26 |   double var_y = data_sum_squares - 2 * state.mean * data_sum +
27 |                  card * state.mean * state.mean;
28 |   mean = (hypers.var * data_sum + state.var * hypers.mean) /
29 |          (card * hypers.var + state.var);
30 |   var = (state.var * hypers.var) / (card * hypers.var + state.var);
31 |   shape = hypers.shape + 0.5 * card;
32 |   scale = hypers.scale + 0.5 * var_y;
33 | 
34 |   // Proto conversion
35 |   ProtoHypers out;
36 |   out.mutable_nnxig_state()->set_mean(mean);
37 |   out.mutable_nnxig_state()->set_var(var);
38 |   out.mutable_nnxig_state()->set_shape(shape);
39 |   out.mutable_nnxig_state()->set_scale(scale);
40 |   return std::make_shared<ProtoHypers>(out);
41 | }
42 | 


--------------------------------------------------------------------------------
/src/hierarchies/updaters/nnxig_updater.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_HIERARCHIES_UPDATERS_NNXIG_UPDATER_H_
 2 | #define BAYESMIX_HIERARCHIES_UPDATERS_NNXIG_UPDATER_H_
 3 | 
 4 | #include "semi_conjugate_updater.h"
 5 | #include "src/hierarchies/likelihoods/uni_norm_likelihood.h"
 6 | #include "src/hierarchies/priors/nxig_prior_model.h"
 7 | 
 8 | /**
 9 |  * Updater specific for the `UniNormLikelihood` used in combination
10 |  * with `NxIGPriorModel`, that is the model
11 |  *
12 |  * \f[
13 |  *      y_i \mid \mu, \sigma^2 &\stackrel{\small\mathrm{iid}}{\sim} N(\mu,
14 |  * \sigma^2) \\
15 |  *      \mu &\sim N(\mu_0, \eta^2) \\
16 |  *      \sigma^2 & \sim InvGamma(a,b)
17 |  * \f]
18 |  *
19 |  * It exploits the semi-conjugacy of the model to sample the full conditional
20 |  * of \f$ (\mu, \sigma^2) \f$ by calling `NxIGPriorModel::sample` with updated
21 |  * parameters
22 |  */
23 | 
24 | class NNxIGUpdater
25 |     : public SemiConjugateUpdater<UniNormLikelihood, NxIGPriorModel> {
26 |  public:
27 |   NNxIGUpdater() = default;
28 |   ~NNxIGUpdater() = default;
29 | 
30 |   ProtoHypersPtr compute_posterior_hypers(AbstractLikelihood &like,
31 |                                           AbstractPriorModel &prior) override;
32 | 
33 |   std::shared_ptr<AbstractUpdater> clone() const override {
34 |     auto out = std::make_shared<NNxIGUpdater>(
35 |         static_cast<NNxIGUpdater const &>(*this));
36 |     out->clear_hypers();
37 |     return out;
38 |   }
39 | };
40 | 
41 | #endif  // BAYESMIX_HIERARCHIES_UPDATERS_NNXIG_UPDATER_H_
42 | 


--------------------------------------------------------------------------------
/src/hierarchies/updaters/target_lpdf_unconstrained.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_SRC_HIERARCHIES_UPDATERS_TARGET_LPDF_UNCONSTRAINED_H_
 2 | #define BAYESMIX_SRC_HIERARCHIES_UPDATERS_TARGET_LPDF_UNCONSTRAINED_H_
 3 | 
 4 | #include "src/hierarchies/likelihoods/abstract_likelihood.h"
 5 | #include "src/hierarchies/priors/abstract_prior_model.h"
 6 | 
 7 | //! Functor that computes the log-full conditional distribution
 8 | //! of a specific hierarchy.
 9 | //! Used by metropolis-like updaters especially when the gradient
10 | //! of the target_lpdf if required
11 | class target_lpdf_unconstrained {
12 |  protected:
13 |   AbstractLikelihood* like;
14 |   AbstractPriorModel* prior;
15 | 
16 |  public:
17 |   target_lpdf_unconstrained(AbstractLikelihood* like,
18 |                             AbstractPriorModel* prior)
19 |       : like(like), prior(prior) {}
20 | 
21 |   //! Computes the log-full conditional that is simply the
22 |   //! sum of `cluster_lpdf_from_unconstrained` in `AbstractLikelihood`
23 |   //! and `lpdf_from_unconstrained` in `AbstractPriorModel`
24 |   template <typename T>
25 |   T operator()(const Eigen::Matrix<T, Eigen::Dynamic, 1>& x) const {
26 |     return like->cluster_lpdf_from_unconstrained(x) +
27 |            prior->lpdf_from_unconstrained(x);
28 |   }
29 | };
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/src/includes.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_INCLUDES_H_
 2 | #define BAYESMIX_INCLUDES_H_
 3 | 
 4 | #include "algorithm_params.pb.h"
 5 | #include "algorithms/blocked_gibbs_algorithm.h"
 6 | #include "algorithms/load_algorithms.h"
 7 | #include "algorithms/neal2_algorithm.h"
 8 | #include "algorithms/neal3_algorithm.h"
 9 | #include "algorithms/neal8_algorithm.h"
10 | #include "collectors/file_collector.h"
11 | #include "collectors/memory_collector.h"
12 | #include "hierarchies/fa_hierarchy.h"
13 | #include "hierarchies/lapnig_hierarchy.h"
14 | #include "hierarchies/lin_reg_uni_hierarchy.h"
15 | #include "hierarchies/load_hierarchies.h"
16 | #include "hierarchies/nnig_hierarchy.h"
17 | #include "hierarchies/nnw_hierarchy.h"
18 | #include "hierarchies/nnxig_hierarchy.h"
19 | #include "mixings/dirichlet_mixing.h"
20 | #include "mixings/load_mixings.h"
21 | #include "mixings/logit_sb_mixing.h"
22 | #include "mixings/mixture_finite_mixing.h"
23 | #include "mixings/pityor_mixing.h"
24 | #include "mixings/truncated_sb_mixing.h"
25 | #include "runtime/factory.h"
26 | #include "utils/cluster_utils.h"
27 | #include "utils/eval_like.h"
28 | #include "utils/io_utils.h"
29 | #include "utils/proto_utils.h"
30 | 
31 | #endif  // BAYESMIX_INCLUDES_H_
32 | 


--------------------------------------------------------------------------------
/src/mixings/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | target_sources(bayesmix
 2 |   PUBLIC
 3 |     base_mixing.h
 4 |     dirichlet_mixing.h
 5 |     dirichlet_mixing.cc
 6 |     logit_sb_mixing.cc
 7 |     logit_sb_mixing.h
 8 |     mixture_finite_mixing.cc
 9 |     mixture_finite_mixing.h
10 |     pityor_mixing.h
11 |     pityor_mixing.cc
12 |     truncated_sb_mixing.h
13 |     truncated_sb_mixing.cc
14 | )
15 | 


--------------------------------------------------------------------------------
/src/mixings/load_mixings.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_MIXINGS_LOAD_MIXINGS_H_
 2 | #define BAYESMIX_MIXINGS_LOAD_MIXINGS_H_
 3 | 
 4 | #include <functional>
 5 | #include <memory>
 6 | 
 7 | #include "abstract_mixing.h"
 8 | #include "dirichlet_mixing.h"
 9 | #include "logit_sb_mixing.h"
10 | #include "mixture_finite_mixing.h"
11 | #include "pityor_mixing.h"
12 | #include "src/runtime/factory.h"
13 | #include "truncated_sb_mixing.h"
14 | 
15 | //! Loads all available `Mixing` objects into the appropriate factory, so that
16 | //! they are ready to be chosen and used at runtime.
17 | 
18 | template <class AbstractProduct>
19 | using Builder = std::function<std::shared_ptr<AbstractProduct>()>;
20 | 
21 | using MixingFactory = Factory<bayesmix::MixingId, AbstractMixing>;
22 | 
23 | __attribute__((constructor)) static void load_mixings() {
24 |   MixingFactory &factory = MixingFactory::Instance();
25 |   // Initialize factory builders
26 |   Builder<AbstractMixing> DPbuilder = []() {
27 |     return std::make_shared<DirichletMixing>();
28 |   };
29 |   Builder<AbstractMixing> LogSBbuilder = []() {
30 |     return std::make_shared<LogitSBMixing>();
31 |   };
32 |   Builder<AbstractMixing> MFMbuilder = []() {
33 |     return std::make_shared<MixtureFiniteMixing>();
34 |   };
35 |   Builder<AbstractMixing> PYbuilder = []() {
36 |     return std::make_shared<PitYorMixing>();
37 |   };
38 |   Builder<AbstractMixing> TruncSBbuilder = []() {
39 |     return std::make_shared<TruncatedSBMixing>();
40 |   };
41 | 
42 |   factory.add_builder(DirichletMixing().get_id(), DPbuilder);
43 |   factory.add_builder(LogitSBMixing().get_id(), LogSBbuilder);
44 |   factory.add_builder(MixtureFiniteMixing().get_id(), MFMbuilder);
45 |   factory.add_builder(PitYorMixing().get_id(), PYbuilder);
46 |   factory.add_builder(TruncatedSBMixing().get_id(), TruncSBbuilder);
47 | }
48 | 
49 | #endif  // BAYESMIX_MIXINGS_LOAD_MIXINGS_H_
50 | 


--------------------------------------------------------------------------------
/src/plots/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.14.0)
2 | 


--------------------------------------------------------------------------------
/src/plots/plot_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_PLOTS_PLOT_UTILS_H_
 2 | #define BAYESMIX_PLOTS_PLOT_UTILS_H_
 3 | 
 4 | #include <matplot/matplot.h>
 5 | 
 6 | #include <numeric>
 7 | #include <stan/math/rev.hpp>
 8 | 
 9 | /*
10 |  * Converts the support points of a 2d function and associated values
11 |  * from the format {(x_i, y_i), z_i} stored in grid and vals respectively,
12 |  * to grids over the 2d domain. Used in density_plot_2d.
13 |  */
14 | std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>,
15 |            std::vector<std::vector<double>>>
16 | to_mesh(const Eigen::MatrixXd &grid, const Eigen::VectorXd &vals);
17 | 
18 | void density_plot_1d(const Eigen::MatrixXd &grid, const Eigen::VectorXd &dens,
19 |                      const std::string &outfile);
20 | 
21 | void density_plot_2d(const Eigen::MatrixXd &grid, const Eigen::VectorXd &dens_,
22 |                      const std::string &outfile, const bool log_scale = true);
23 | 
24 | void num_clus_trace(const Eigen::MatrixXd &num_clus_chain,
25 |                     const std::string &outfile);
26 | 
27 | void num_clus_bar(const Eigen::MatrixXd &num_clus_chain_,
28 |                   const std::string &outfile);
29 | 
30 | #endif  // BAYESMIX_PLOTS_PLOT_UTILS_H_
31 | 


--------------------------------------------------------------------------------
/src/proto/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | 


--------------------------------------------------------------------------------
/src/proto/CMakeLists.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/src/proto/CMakeLists.txt


--------------------------------------------------------------------------------
/src/proto/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/src/proto/__init__.py


--------------------------------------------------------------------------------
/src/proto/algorithm_id.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package bayesmix;
 4 | 
 5 | /*
 6 |  * Enum for the different types of algorithms.
 7 |  * References
 8 |  * [1] R. M. Neal, Markov Chain Sampling Methods for  Dirichlet Process Mixture Models. JCGS(2000)
 9 |  * [2] H. Ishwaran and L. F. James, Gibbs Sampling Methods for Stick-Breaking Priors. JASA(2001)
10 |  * [3] S. Jain and R. M. Neal,  A Split-Merge Markov Chain Monte Carlo Procedure for the Dirichlet Process Mixture Model. JCGS (2004)
11 |  * [4] M. Kalli, J. Griffin and S. G. Walker, Slice sampling mixture models. Stat and Comp. (2011)
12 |  */
13 | enum AlgorithmId {
14 |     UNKNOWN_ALGORITHM = 0;
15 |     Neal2 = 1; // Neal's Algorithm 2, see [1]
16 |     Neal3 = 2; // Neal's Algorithm 3, see [1]
17 |     Neal8 = 3; // Neal's Algorithm 8, see [1]
18 |     BlockedGibbs = 4; // Ishwaran and James Blocked Gibbs, see [2]
19 |     SplitMerge = 5; // Jain and Neal's Split&Merge, see [3]. NOT IMPLEMENTED YET!
20 |     Slice = 6; // Slice sampling, see [4].  NOT IMPLEMENTED YET!
21 | }
22 | 


--------------------------------------------------------------------------------
/src/proto/algorithm_params.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package bayesmix;
 4 | 
 5 | /*
 6 |  * Parameters used in the BaseAlgorithm class and childs.
 7 |  */
 8 | message AlgorithmParams {
 9 |   string algo_id = 1; // Id of the Algorithm. Must match the ones in the AlgorithmId enum
10 |   uint32 rng_seed = 2; // Seed for the random number generator
11 |   uint32 iterations = 3; // Total number of iterations of the MCMC chain
12 |   uint32 burnin = 4; // Number of iterations to discard as burn-in
13 |   uint32 init_num_clusters = 5; // Number of clusters to initialize the algorithm. It may be overridden by conditional mixings for which the number of components is fixed (e.g. TruncatedSBMixing). In this case, this value is ignored.
14 |   uint32 neal8_n_aux = 6; // Number of auxiliary unique values for the Neal8 algorithm
15 |   uint32 splitmerge_n_restr_gs_updates = 7; // Number of restricted GS scans for each MH step.
16 |   uint32 splitmerge_n_mh_updates = 8; // Number of MH updates for each iteration of Split and Merge algorithm.
17 |   uint32 splitmerge_n_full_gs_updates = 9; // Number of full GS scans for each iteration of Split and Merge algorithm.
18 | }
19 | 


--------------------------------------------------------------------------------
/src/proto/algorithm_state.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | import "mixing_state.proto";
 4 | import "ls_state.proto";
 5 | import "matrix.proto";
 6 | import "hierarchy_prior.proto";
 7 | import "distribution.proto";
 8 | 
 9 | package bayesmix;
10 | 
11 | 
12 | /*
13 |  * This message represents the state of a Gibbs sampler for
14 |  * a mixture model. All algorithms must be able to handle this
15 |  * message, by filling it with the current state of the sampler
16 |  * in the `get_state_as_proto` method.
17 |  */
18 | message AlgorithmState {
19 |   message ClusterState {
20 |     // Represents the state of a single cluster of component of
21 |     // the mixture model. The first field is just a `oneof` wrapper
22 |     // around the different possible states.
23 |     oneof val {
24 |       UniLSState uni_ls_state = 1; // State of a univariate location-scale family
25 |       MultiLSState multi_ls_state = 2; // State of a multivariate location-scale family
26 |       LinRegUniLSState lin_reg_uni_ls_state = 4; // State of a linear regression univariate location-scale family
27 |       Vector general_state = 5; // Just a vector of doubles
28 |       FAState fa_state = 6; // State of a Mixture of Factor Analysers
29 | 
30 |     }
31 |     int32 cardinality = 3; // How many observations are in this cluster
32 |   }
33 | 
34 |   repeated ClusterState cluster_states = 1; // The state of each cluster
35 |   repeated int32 cluster_allocs = 2 [packed = true]; // Vector of allocations into clusters, one for each observation
36 |   MixingState mixing_state = 3; // The state of the `Mixing`
37 |   int32 iteration_num = 4; // The iteration number
38 | 
39 |   message HierarchyHypers {
40 |     // Current values of the Hyperparameters of the Hierarchy
41 |     oneof val {
42 |       Vector general_state = 1;
43 |       NIGDistribution nnig_state = 2;
44 |       NWDistribution nnw_state = 3;
45 |       MultiNormalIGDistribution lin_reg_uni_state = 4;
46 |       NxIGDistribution nnxig_state = 5;
47 |       FAPriorDistribution fa_state = 7;
48 |     }
49 |   }
50 |   HierarchyHypers hierarchy_hypers = 5; // The current values of the hyperparameters of the hierarchy
51 | 
52 | }
53 | 


--------------------------------------------------------------------------------
/src/proto/cpp/.gitignore:
--------------------------------------------------------------------------------
1 | # Protocol Buffers implementation files (version-dependent)
2 | *.pb.cc
3 | *.pb.h
4 | 


--------------------------------------------------------------------------------
/src/proto/distribution.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | import "matrix.proto";
 4 | 
 5 | package bayesmix;
 6 | 
 7 | /*
 8 |  * Parameters defining a univariate normal distribution
 9 |  */
10 | message UniNormalDistribution {
11 |   double mean = 1;
12 |   double var = 2;
13 | }
14 | 
15 | /*
16 |  * Parameters defining a multivariate normal distribution
17 |  */
18 | message MultiNormalDistribution {
19 |   Vector mean = 1;
20 |   Matrix var = 2;
21 | }
22 | 
23 | /*
24 |  * Parameters defining a gamma distribution with density
25 |  * f(x) = x^(shape-1) * exp(-rate * x) / Gamma(shape)
26 |  */
27 | message GammaDistribution {
28 |   double shape = 1;
29 |   double rate = 2;
30 | }
31 | 
32 | /*
33 |  * Parameters defining an Inverse Wishart distribution
34 |  */
35 | message InvWishartDistribution {
36 |   double deg_free = 1;
37 |   Matrix scale = 2;
38 | }
39 | 
40 | /*
41 |  * Parameters defining a beta distribution
42 |  */
43 | message BetaDistribution {
44 |   double shape_a = 1;
45 |   double shape_b = 2;
46 | }
47 | 
48 | /*
49 |  * Parameters of a  Normal Inverse-Gamma distribution
50 |  * with density
51 |  * f(x, y) = N(x | mu, y/var_scaling) * IG(y | shape, scale)
52 |  */
53 | message NIGDistribution {
54 |   double mean = 1;
55 |   double var_scaling = 2;
56 |   double shape = 3;
57 |   double scale = 4;
58 | }
59 | 
60 | /*
61 |  * Parameters of a  Normal x Inverse-Gamma distribution
62 |  * with density
63 |  * f(x, y) = N(x | mu, var) * IG(y | shape, scale)
64 |  */
65 | message NxIGDistribution {
66 |   double mean = 1;
67 |   double var = 2;
68 |   double shape = 3;
69 |   double scale = 4;
70 | }
71 | 
72 | /*
73 |  * Parameters of a Normal Wishart distribution
74 |  * with density
75 |  * f(x, y) = N(x | mu, (y * var_scaling)^{-1}) * IW(y | deg_free, scale)
76 |  * where x is a vector and y is a matrix (spd)
77 |  */
78 | message NWDistribution {
79 |   Vector mean = 1;
80 |   double var_scaling = 2;
81 |   double deg_free = 3;
82 |   Matrix scale = 4;
83 |   Matrix scale_chol = 5;
84 | }
85 | 
86 | 
87 | /*
88 |  * Parameters for the Normal Inverse Gamma distribution commonly employed in
89 |  * linear regression models, with density
90 |  * f(beta, var) = N(beta | mean, var * var_scaling^{-1}) * IG(var | shape, scale)
91 |  */
92 | message MultiNormalIGDistribution {
93 |   Vector mean = 1;
94 |   Matrix var_scaling = 2;
95 |   double shape = 3;
96 |   double scale = 4;
97 | }
98 | 


--------------------------------------------------------------------------------
/src/proto/hierarchy_id.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package bayesmix;
 4 | 
 5 | /*
 6 |  * Enum for the different types of Hierarchy.
 7 |  */
 8 | enum HierarchyId {
 9 |     UNKNOWN_HIERARCHY = 0;
10 |     NNIG = 1; // Normal - Normal Inverse Gamma
11 |     NNW = 2; // Normal - Normal Wishart
12 |     LinRegUni = 3; // Linear Regression (univariate response)
13 |     LapNIG = 4; // Laplace - Normal Inverse Gamma
14 |     FA = 5; // Factor Analysers
15 |     NNxIG = 6; // Normal - Normal x Inverse Gamma
16 |     PythonHier = 7; // Generic python hierarchy
17 | }
18 | 


--------------------------------------------------------------------------------
/src/proto/ls_state.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | import "matrix.proto";
 4 | 
 5 | package bayesmix;
 6 | 
 7 | /*
 8 |  * Parameters of a univariate location-scale family of distributions.
 9 |  */
10 | message UniLSState {
11 |   double mean = 1;
12 |   double var = 2;
13 | }
14 | 
15 | /*
16 |  * Parameters of a multivariate location-scale family of distributions,
17 |  * parameterized by mean and precision (inverse of variance). For
18 |  * convenience, we also store the Cholesky factor of the precision matrix.
19 |  */
20 | message MultiLSState {
21 |   Vector mean = 1;
22 |   Matrix prec = 2;
23 |   Matrix prec_chol = 3;
24 | }
25 | 
26 | /*
27 |  * Parameters of a univariate linear regression
28 |  */
29 | message LinRegUniLSState {
30 |   Vector regression_coeffs = 1; // regression coefficients
31 |   double var = 2; // variance of the noise
32 | }
33 | 
34 | message FAState {
35 |   Vector mu = 1;
36 |   Vector psi = 2;
37 |   Matrix eta = 3;
38 |   Matrix lambda = 4;
39 | }
40 | 


--------------------------------------------------------------------------------
/src/proto/matrix.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package bayesmix;
 4 | 
 5 | /*
 6 |  * Message representing a vector of doubles.
 7 |  */
 8 | message Vector {
 9 |   int32 size = 1; // number of elements in the vector
10 |   repeated double data = 2 [packed = true]; // vector elements
11 | }
12 | 
13 | /*
14 |  * Message representing a matrix of doubles.
15 |  */
16 | message Matrix {
17 |   int32 rows = 1; // number of rows
18 |   int32 cols = 2; // number of columns
19 |   repeated double data = 3 [packed = true]; // matrix elements
20 |   bool rowmajor = 4; // if true, the data is read in row-major order
21 | }
22 | 


--------------------------------------------------------------------------------
/src/proto/mixing_id.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package bayesmix;
 4 | 
 5 | /*
 6 |  * Enum for the different types of Mixing.
 7 |  */
 8 | enum MixingId {
 9 |     UNKNOWN_MIXING = 0;
10 |     DP = 1; // Dirichlet Process
11 |     PY = 2; // Pitman-Yor Process
12 |     LogSB = 3; // Logit Stick-Breaking Process
13 |     TruncSB = 4; // Truncated Stick-Breaking Process
14 |     MFM = 5; // Mixture of finite mixtures
15 |     PythonMix = 6; // Generic python mixing
16 | }
17 | 


--------------------------------------------------------------------------------
/src/proto/mixing_prior.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | import "distribution.proto";
 4 | import "mixing_state.proto";
 5 | import "matrix.proto";
 6 | 
 7 | package bayesmix;
 8 | 
 9 | 
10 | /*
11 |  * Prior for the concentration parameter of a Dirichlet process
12 |  */
13 | message DPPrior {
14 |   message GammaPrior {
15 |     GammaDistribution totalmass_prior = 1;
16 |   }
17 | 
18 |   oneof totalmass {
19 |     DPState fixed_value = 1; // No prior, just a fixed value
20 |     GammaPrior gamma_prior = 2; // Gamma prior on the total mass
21 |   }
22 | }
23 | /*
24 |  * Prior for the Poisson rate and Dirichlet parameters of a MFM (Finite Dirichlet) process.
25 |  * For the moment, we only support fixed values
26 |  */
27 | message MFMPrior {
28 |   oneof totalmass {
29 |     MFMState fixed_value = 1; // No prior, just a fixed value
30 |   }
31 | }
32 | 
33 | /*
34 |  * Prior for the strength and discount parameters of a Pitman-Yor process.
35 |  * For the moment, we only support fixed values
36 |  */
37 | message PYPrior {
38 |   oneof totalmass {
39 |     PYState fixed_values = 1;
40 |   }
41 | }
42 | 
43 | /*
44 |  * Definition of the parameters of a Logit-Stick Breaking process.
45 |  */
46 | message LogSBPrior {
47 |   oneof coeff {
48 |     MultiNormalDistribution normal_prior = 1; // Normal prior on the regression coefficients
49 |   }
50 |   double step_size = 2;  // Steps size for the MALA algorithm used for posterior inference (TODO: move?)
51 |   uint32 num_components = 3; // Number of components in the process
52 | }
53 | 
54 | /*
55 |  * Definition of a generic container for the prior parameters to be used in Python
56 |  */
57 | message PythonMixPrior {
58 |   oneof prior {
59 |     Vector values = 1;
60 |   }
61 | }
62 | 
63 | /*
64 |  * Definition of the parameters of a truncated Stick-Breaking process
65 |  */
66 | message TruncSBPrior {
67 |   message BetaPriors {
68 |     // General stick-breaking distributions
69 |     repeated BetaDistribution beta_distributions = 1;
70 |   }
71 |   message DPPrior {
72 |     // Truncated Dirichlet process
73 |     double totalmass = 1;
74 |   }
75 | 
76 |   message PYPrior {
77 |     // Truncated Pitman-Yor process
78 |     double strength = 1;
79 |     double discount = 2;
80 |   }
81 | 
82 |   oneof prior {
83 |     BetaPriors beta_priors = 1; // General stick-breaking distributions
84 |     DPPrior dp_prior = 2; // Truncated Dirichlet process
85 |     PYPrior py_prior = 3; // Truncated Pitman-Yor process
86 |   }
87 |   uint32 num_components = 4; // Number of components in the process
88 | 
89 |   // If true we must use the Slice Sampler, and num_components is used only for
90 |   // the initialization
91 |   bool infinite_mixture = 5;
92 | }
93 | 


--------------------------------------------------------------------------------
/src/proto/mixing_state.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package bayesmix;
 4 | 
 5 | import "matrix.proto";
 6 | 
 7 | /*
 8 |  * State of a Dirichlet process
 9 |  */
10 | message DPState {
11 |   double totalmass = 1; // the total mass of the DP
12 | }
13 | 
14 | /*
15 |  * State of a Pitman-Yor process
16 |  */
17 | message PYState {
18 |   double strength = 1;
19 |   double discount = 2;
20 | }
21 | 
22 | /*
23 |  * State of a Logit-Stick Breaking process
24 |  */
25 | message LogSBState {
26 |   Matrix regression_coeffs = 1; // Num_Components x Num_Features matrix. Each row is the regression coefficients for a component.
27 | }
28 | 
29 | /*
30 |  * State of a truncated sitck breaking process. For convenice we store also the logarithm of the weights
31 |  */
32 | message TruncSBState {
33 |   Vector sticks = 1;
34 |   Vector logweights = 2;
35 | }
36 | 
37 | /*
38 |  * State of a MFM (Finite Dirichlet) process
39 |  */
40 | message MFMState {
41 |   double lambda = 1; // rate parameter of Poisson prior on number of compunents of the MFM
42 |   double gamma = 2; // parameter of the dirichlet distribution for the mixing weights
43 | }
44 | 
45 | /*
46 |  * Wrapper of all possible mixing states into a single oneof
47 |  */
48 | message MixingState {
49 |   oneof state {
50 |     DPState dp_state = 1;
51 |     PYState py_state = 2;
52 |     LogSBState log_sb_state = 3;
53 |     TruncSBState trunc_sb_state = 4;
54 |     MFMState mfm_state = 5;
55 |     Vector general_state = 6;
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/proto/py/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *pb2.py
3 | 


--------------------------------------------------------------------------------
/src/proto/py/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayesmix-dev/bayesmix/78f6634c23130e922fb07e05793562e3fc5655e4/src/proto/py/__init__.py


--------------------------------------------------------------------------------
/src/proto/semihdp.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | import "ls_state.proto";
 4 | import "matrix.proto";
 5 | 
 6 | package bayesmix;
 7 | 
 8 | message SemiHdpState {
 9 |   message ClusterState {
10 |     oneof val {
11 |       UniLSState uni_ls_state = 1;
12 |       MultiLSState multi_ls_state = 2;
13 |       LinRegUniLSState lin_reg_uni_ls_state = 4;
14 |       Vector general_state = 5;
15 |     }
16 |     int32 cardinality = 3;
17 |   }
18 | 
19 |   message RestaurantState {
20 |       repeated ClusterState theta_stars = 1;
21 |       repeated int32 n_by_clus = 2;
22 |       repeated int32 table_to_shared = 3;
23 |       repeated int32 table_to_idio = 4;
24 |   }
25 | 
26 |   message GroupState {
27 |       repeated int32 cluster_allocs = 1;
28 |   }
29 | 
30 |   repeated RestaurantState restaurants = 1;
31 |   repeated GroupState groups = 2;
32 |   repeated ClusterState taus = 3;
33 |   repeated int32 c = 4;
34 |   double w = 5;
35 | }
36 | 
37 | message SemiHdpParams {
38 |   message PseudoPriorParams {
39 |     double card_weight = 1;
40 |     double mean_perturb_sd = 2;
41 |     double var_perturb_frac = 3;
42 |   }
43 | 
44 |   message WPriorParams {
45 |     double shape1 = 1;
46 |     double shape2 = 2;
47 |   }
48 | 
49 |   PseudoPriorParams pseudo_prior = 1;
50 | 
51 |   double dirichlet_concentration = 2;
52 | 
53 |   string rest_allocs_update = 3; // Either "full", "metro_base", "metro_dist"
54 | 
55 |   double totalmass_rest = 4;
56 | 
57 |   double totalmass_hdp = 5;
58 | 
59 |   WPriorParams w_prior = 6;
60 | 
61 | }
62 | 


--------------------------------------------------------------------------------
/src/runtime/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | target_sources(bayesmix
2 |   PUBLIC
3 |     factory.h
4 | )
5 | 


--------------------------------------------------------------------------------
/src/utils/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | target_sources(bayesmix
 2 |   PUBLIC
 3 |     cluster_utils.h
 4 |     cluster_utils.cc
 5 |     eigen_utils.h
 6 |     eigen_utils.cc
 7 |     eval_like.h
 8 |     eval_like.cc
 9 |     distributions.h
10 |     distributions.cc
11 |     io_utils.h
12 |     io_utils.cc
13 |     proto_utils.h
14 |     proto_utils.cc
15 |     rng.h
16 |     testing_utils.h
17 |     testing_utils.cc
18 |     covariates_getter.h
19 | )
20 | 


--------------------------------------------------------------------------------
/src/utils/cluster_utils.cc:
--------------------------------------------------------------------------------
 1 | #include "cluster_utils.h"
 2 | 
 3 | #include <Eigen/SparseCore>
 4 | #include <stan/math/rev.hpp>
 5 | 
 6 | #include "lib/progressbar/progressbar.h"
 7 | #include "proto_utils.h"
 8 | 
 9 | Eigen::MatrixXd bayesmix::posterior_similarity(
10 |     const Eigen::MatrixXd &alloc_chain) {
11 |   unsigned int n_data = alloc_chain.cols();
12 |   Eigen::MatrixXd mean_diss = Eigen::MatrixXd::Zero(n_data, n_data);
13 |   // Loop over pairs (i,j) of data points
14 |   for (int i = 0; i < n_data; i++) {
15 |     for (int j = 0; j < i; j++) {
16 |       Eigen::ArrayXd diff = alloc_chain.col(i) - alloc_chain.col(j);
17 |       mean_diss(i, j) = (diff == 0).count();
18 |     }
19 |   }
20 |   return mean_diss / alloc_chain.rows();
21 | }
22 | 
23 | Eigen::VectorXi bayesmix::cluster_estimate(
24 |     const Eigen::MatrixXi &alloc_chain) {
25 |   // Initialize objects
26 |   unsigned n_iter = alloc_chain.rows();
27 |   unsigned int n_data = alloc_chain.cols();
28 |   std::vector<Eigen::SparseMatrix<double> > all_diss;
29 |   progresscpp::ProgressBar bar(n_iter, 60);
30 | 
31 |   // Compute mean
32 |   std::cout << "(Computing mean dissimilarity... " << std::flush;
33 |   Eigen::MatrixXd mean_diss =
34 |       bayesmix::posterior_similarity(alloc_chain.cast<double>());
35 |   std::cout << "Done)" << std::endl;
36 | 
37 |   // Compute Frobenius norm error of all iterations
38 |   std::cout << "Computing Frobenius norm error... " << std::endl;
39 |   Eigen::VectorXd errors(n_iter);
40 |   for (int k = 0; k < n_iter; k++) {
41 |     for (int i = 0; i < n_data; i++) {
42 |       for (int j = 0; j < i; j++) {
43 |         double x = (alloc_chain(k, i) == alloc_chain(k, j));
44 |         errors(k) += (x - mean_diss(i, j)) * (x - mean_diss(i, j));
45 |       }
46 |     }
47 |     // Progress bar
48 |     ++bar;
49 |     bar.display();
50 |   }
51 |   bar.done();
52 |   std::cout << "Done" << std::endl;  // Print Ending Message
53 | 
54 |   // Find iteration with the least error
55 |   std::ptrdiff_t ibest;
56 |   unsigned int min_err = errors.minCoeff(&ibest);
57 |   return alloc_chain.row(ibest).transpose();
58 | }
59 | 


--------------------------------------------------------------------------------
/src/utils/cluster_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_UTILS_CLUSTER_UTILS_H_
 2 | #define BAYESMIX_UTILS_CLUSTER_UTILS_H_
 3 | 
 4 | #include <stan/math/rev.hpp>
 5 | 
 6 | //! \file cluster_utils.h
 7 | //! The `cluster_utils.h` file includes some utilities for cluster estimation.
 8 | //! These functions only use Eigen objects.
 9 | 
10 | namespace bayesmix {
11 | 
12 | //! Computes the posterior similarity matrix the data
13 | Eigen::MatrixXd posterior_similarity(const Eigen::MatrixXd &alloc_chain);
14 | 
15 | //! Estimates the clustering structure of the data via LS minimization
16 | Eigen::VectorXi cluster_estimate(const Eigen::MatrixXi &alloc_chain);
17 | 
18 | }  // namespace bayesmix
19 | 
20 | #endif  // BAYESMIX_UTILS_CLUSTER_UTILS_H_
21 | 


--------------------------------------------------------------------------------
/src/utils/covariates_getter.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_SRC_UTILS_COVARIATES_GETTER_H
 2 | #define BAYESMIX_SRC_UTILS_COVARIATES_GETTER_H
 3 | 
 4 | #include <Eigen/Dense>
 5 | 
 6 | class covariates_getter {
 7 |  protected:
 8 |   const Eigen::MatrixXd* covariates;
 9 | 
10 |  public:
11 |   covariates_getter(const Eigen::MatrixXd& covariates_)
12 |       : covariates(&covariates_){};
13 | 
14 |   Eigen::RowVectorXd operator()(const size_t& i) const {
15 |     if (covariates->cols() == 0) {
16 |       return Eigen::RowVectorXd(0);
17 |     } else if (covariates->rows() == 1) {
18 |       return covariates->row(0);
19 |     } else {
20 |       return covariates->row(i);
21 |     }
22 |   };
23 | };
24 | 
25 | #endif  // BAYESMIX_SRC_UTILS_COVARIATES_GETTER_H
26 | 


--------------------------------------------------------------------------------
/src/utils/eigen_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_SRC_UTILS_EIGEN_UTILS_H_
 2 | #define BAYESMIX_SRC_UTILS_EIGEN_UTILS_H_
 3 | 
 4 | #include <stan/math/rev.hpp>
 5 | #include <vector>
 6 | 
 7 | //! @file eigen_utils.h
 8 | //! The `eigen_utils.h` file implements a few methods to manipulate groups of
 9 | //! matrices, mainly by joining different objects, as well as additional
10 | //! utilities for SPD checking and grid creation.
11 | 
12 | namespace bayesmix {
13 | //! Concatenates a vector of Eigen matrices along the rows
14 | //! @param mats The matrices to be concatenated
15 | //! @return     The resulting matrix
16 | //! @throw      std::invalid argument if sizes mismatch
17 | Eigen::MatrixXd vstack(const std::vector<Eigen::MatrixXd> &mats);
18 | 
19 | //! Concatenates two matrices by row, modifying the first matrix in-place
20 | //! @throw std::invalid_argument if sizes mismatch
21 | void append_by_row(Eigen::MatrixXd *const a, const Eigen::MatrixXd &b);
22 | 
23 | //! Concatenates two matrices by row
24 | //! @param a,b The matrices to be concatenated
25 | //! @return    The resulting matrix
26 | //! @throw     std::invalid_argument if sizes mismatch
27 | Eigen::MatrixXd append_by_row(const Eigen::MatrixXd &a,
28 |                               const Eigen::MatrixXd &b);
29 | 
30 | //! Creates an Eigen matrix from a collection of rows
31 | //! @tparam Container  An std-compatible container implementing `operator[]`
32 | //! @param rows        The rows of the matrix
33 | //! @return            The resulting matrix
34 | template <template <typename...> class Container>
35 | Eigen::MatrixXd stack_vectors(const Container<Eigen::VectorXd> &rows) {
36 |   int nrows = rows.size();
37 |   int ncols = rows[0].size();
38 |   Eigen::MatrixXd out(nrows, ncols);
39 |   for (int i = 0; i < nrows; i++) out.row(i) = rows[i].transpose();
40 |   return out;
41 | }
42 | 
43 | //! Checks whether the matrix is symmetric and semi-positive definite
44 | void check_spd(const Eigen::MatrixXd &mat);
45 | 
46 | //! Creates a 2d grid over rectangle [x1, x2] x [y1, y2], with nx * ny points
47 | //! @param x1, x2, y1, y2  Bounds for the rectangle
48 | //! @param nx, ny          Number of points created along the x, y directions
49 | //! @return                The resulting grid
50 | Eigen::MatrixXd get_2d_grid(const double x1, const double x2, const int nx,
51 |                             const double y1, const double y2, const int ny);
52 | }  // namespace bayesmix
53 | 
54 | #endif  // BAYESMIX_SRC_UTILS_EIGEN_UTILS_H_
55 | 


--------------------------------------------------------------------------------
/src/utils/eval_like.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_UTILS_EVAL_LIKE_H_
 2 | #define BAYESMIX_UTILS_EVAL_LIKE_H_
 3 | 
 4 | #include "src/includes.h"
 5 | #include "src/utils/eigen_utils.h"
 6 | 
 7 | namespace bayesmix {
 8 | 
 9 | //! * Evaluates the (mixture) likelihood for all the states of the MCMC chain,
10 | //! in parallel.
11 | //! @param algo a shared_ptr to the algorithm used for MCMC sampling
12 | //! @param collector a pointer to the collector containing the chain
13 | //! @param low_memory if false, the whole chain will be loaded into the memory.
14 | //!        This leads to a speedup which ranges between 20-50% but increases
15 | //!        significantly the memory required.
16 | //!        If true, we load the chain by deserializing chunks of size
17 | //!        `chunk_size`, and process the states within each chunk in parallel.
18 | //! @param njobs used only if low_memory=false, the number of parallel jobs
19 | //! @param chunk_size see `low_memory`.
20 | Eigen::MatrixXd eval_lpdf_parallel(
21 |     const std::shared_ptr<BaseAlgorithm> algo, BaseCollector *const collector,
22 |     const Eigen::MatrixXd &grid,
23 |     const Eigen::RowVectorXd &hier_covariate = Eigen::RowVectorXd(0),
24 |     const Eigen::RowVectorXd &mix_covariate = Eigen::RowVectorXd(0),
25 |     const bool low_memory = false, const int njobs = 4,
26 |     const int chunk_size = 100);
27 | 
28 | namespace internal {
29 | 
30 | template <typename T>
31 | std::vector<std::vector<T>> gen_even_slices(const std::vector<T> &x,
32 |                                             const int num_slices) {
33 |   std::vector<std::vector<T>> out(num_slices);
34 |   for (int i = 0; i < x.size(); i++) {
35 |     out[i % num_slices].push_back(x[i]);
36 |   }
37 |   return out;
38 | }
39 | 
40 | Eigen::MatrixXd eval_lpdf_parallel_lowmemory(
41 |     const std::shared_ptr<BaseAlgorithm> algo, BaseCollector *const collector,
42 |     const Eigen::MatrixXd &grid,
43 |     const Eigen::RowVectorXd &hier_covariate = Eigen::RowVectorXd(0),
44 |     const Eigen::RowVectorXd &mix_covariate = Eigen::RowVectorXd(0),
45 |     const int chunk_size = 100);
46 | 
47 | Eigen::MatrixXd eval_lpdf_parallel_fullmemory(
48 |     const std::shared_ptr<BaseAlgorithm> algo, BaseCollector *const collector,
49 |     const Eigen::MatrixXd &grid,
50 |     const Eigen::RowVectorXd &hier_covariate = Eigen::RowVectorXd(0),
51 |     const Eigen::RowVectorXd &mix_covariate = Eigen::RowVectorXd(0),
52 |     const int njobs = 4);
53 | }  // namespace internal
54 | 
55 | }  // namespace bayesmix
56 | 
57 | #endif
58 | 


--------------------------------------------------------------------------------
/src/utils/io_utils.cc:
--------------------------------------------------------------------------------
 1 | #include "io_utils.h"
 2 | 
 3 | #include <fstream>
 4 | #include <iostream>
 5 | #include <stan/math/rev.hpp>
 6 | 
 7 | bool bayesmix::check_file_is_writeable(const std::string &filename) {
 8 |   std::ofstream ofstr;
 9 |   ofstr.open(filename);
10 |   if (ofstr.fail()) {
11 |     ofstr.close();
12 |     throw std::invalid_argument("Cannot write to " + filename);
13 |   }
14 |   ofstr.close();
15 |   return true;
16 | }
17 | 
18 | Eigen::MatrixXd bayesmix::read_eigen_matrix(const std::string &filename,
19 |                                             const char delim /* = ','*/) {
20 |   // Initialize objects
21 |   int rows = 0, cols = 0;
22 |   std::ifstream filestream(filename);
23 |   if (!filestream.is_open()) {
24 |     std::string err = "File " + filename + " does not exist";
25 |     throw std::invalid_argument(err);
26 |   }
27 | 
28 |   // Get number of rows and columns
29 |   std::string line, entry;
30 |   while (getline(filestream, line, '\n')) {
31 |     rows++;
32 |     if (rows == 1) {
33 |       std::stringstream linestream(line);
34 |       while (getline(linestream, entry, delim)) {
35 |         cols++;
36 |       }
37 |     }
38 |   }
39 | 
40 |   // Reset file stream to the beginning of the file
41 |   filestream.clear();
42 |   filestream.seekg(0, std::ios::beg);
43 | 
44 |   // Fill an Eigen Matrix with values from the matrix
45 |   Eigen::MatrixXd mat(rows, cols);
46 |   int i = 0;
47 |   while (getline(filestream, line, '\n')) {
48 |     int j = 0;
49 |     std::stringstream linestream(line);
50 |     while (getline(linestream, entry, delim)) {
51 |       std::stringstream entrystream(entry);
52 |       mat(i, j) = std::stof(entry);
53 |       j++;
54 |     }
55 |     i++;
56 |   }
57 | 
58 |   filestream.close();
59 |   return mat;
60 | };
61 | 
62 | void bayesmix::write_matrix_to_file(const Eigen::MatrixXd &mat,
63 |                                     const std::string &filename,
64 |                                     const char delim /*= ','*/) {
65 |   using namespace Eigen;
66 |   std::string del;
67 |   del = delim;
68 |   const IOFormat CSVFormat(StreamPrecision, DontAlignCols, del, "\n");
69 |   std::ofstream file(filename.c_str());
70 |   file << mat.format(CSVFormat);
71 | }
72 | 


--------------------------------------------------------------------------------
/src/utils/io_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_UTILS_IO_UTILS_H_
 2 | #define BAYESMIX_UTILS_IO_UTILS_H_
 3 | 
 4 | #include <stan/math/rev.hpp>
 5 | 
 6 | //! @file io_utils.h
 7 | //! The `io_utils.h` file implements basic input-output utilities for Eigen
 8 | //! matrices from and to text files.
 9 | 
10 | namespace bayesmix {
11 | //! Checks whether the given file is available for writing
12 | bool check_file_is_writeable(const std::string &filename);
13 | 
14 | //! Returns an Eigen matrix after reading it from a file
15 | Eigen::MatrixXd read_eigen_matrix(const std::string &filename,
16 |                                   const char delim = ',');
17 | 
18 | //! Writes the given Eigen matrix to a text file
19 | void write_matrix_to_file(const Eigen::MatrixXd &mat,
20 |                           const std::string &filename, const char delim = ',');
21 | }  // namespace bayesmix
22 | 
23 | #endif  // BAYESMIX_UTILS_IO_UTILS_H_
24 | 


--------------------------------------------------------------------------------
/src/utils/proto_utils.cc:
--------------------------------------------------------------------------------
 1 | #include "proto_utils.h"
 2 | 
 3 | #include <google/protobuf/io/zero_copy_stream_impl.h>
 4 | #include <google/protobuf/text_format.h>
 5 | 
 6 | #include <fstream>
 7 | #include <stan/math/rev.hpp>
 8 | 
 9 | #include "matrix.pb.h"
10 | 
11 | void bayesmix::to_proto(const Eigen::VectorXd &vec,
12 |                         bayesmix::Vector *const out) {
13 |   out->set_size(vec.size());
14 |   *out->mutable_data() = {vec.data(), vec.data() + vec.size()};
15 | }
16 | 
17 | void bayesmix::to_proto(const Eigen::MatrixXd &mat,
18 |                         bayesmix::Matrix *const out) {
19 |   out->set_rows(mat.rows());
20 |   out->set_cols(mat.cols());
21 |   out->set_rowmajor(false);
22 |   *out->mutable_data() = {mat.data(), mat.data() + mat.size()};
23 | }
24 | 
25 | Eigen::VectorXd bayesmix::to_eigen(const bayesmix::Vector &vec) {
26 |   int size = vec.size();
27 |   Eigen::VectorXd out;
28 |   if (size > 0) {
29 |     const double *p = &(vec.data())[0];
30 |     out = Eigen::Map<const Eigen::VectorXd>(p, size);
31 |   }
32 |   return out;
33 | }
34 | 
35 | Eigen::MatrixXd bayesmix::to_eigen(const bayesmix::Matrix &mat) {
36 |   int nrow = mat.rows();
37 |   int ncol = mat.cols();
38 |   Eigen::MatrixXd out;
39 |   if (nrow > 0 & ncol > 0) {
40 |     const double *p = &(mat.data())[0];
41 |     if (mat.rowmajor()) {
42 |       out = Eigen::Map<const Eigen::Matrix<double, Eigen::Dynamic,
43 |                                            Eigen::Dynamic, Eigen::RowMajor> >(
44 |           p, nrow, ncol);
45 |     } else {
46 |       out = Eigen::Map<const Eigen::MatrixXd>(p, nrow, ncol);
47 |     }
48 |   }
49 |   return out;
50 | }
51 | 
52 | void bayesmix::read_proto_from_file(const std::string &filename,
53 |                                     google::protobuf::Message *const out) {
54 |   std::ifstream ifs(filename);
55 |   google::protobuf::io::IstreamInputStream iis(&ifs);
56 |   auto success = google::protobuf::TextFormat::Parse(&iis, out);
57 |   if (!success) {
58 |     std::cout << "Error " << success << " in read_proto_from_file"
59 |               << std::endl;
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/utils/proto_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_UTILS_PROTO_UTILS_H_
 2 | #define BAYESMIX_UTILS_PROTO_UTILS_H_
 3 | 
 4 | #include <stan/math/rev.hpp>
 5 | 
 6 | #include "matrix.pb.h"
 7 | 
 8 | //! @file proto_utils.h
 9 | //! The `proto_utils.h` file implements a few useful functions to manipulate
10 | //! Protobuf objects. For instance, this library implements its own version of
11 | //! vectors and matrices, and the functions implemented here convert from these
12 | //! types to the Eigen ones and viceversa. One can also read a Protobuf from a
13 | //! text file. This is mostly useful for algorithm configuration files.
14 | 
15 | namespace bayesmix {
16 | 
17 | //! Writes an Eigen vector to a bayesmix::Vector Protobuf object by pointer
18 | void to_proto(const Eigen::VectorXd &vec, bayesmix::Vector *const out);
19 | 
20 | //! Writes an Eigen matrix to a bayesmix::Matrix Protobuf object by pointer
21 | void to_proto(const Eigen::MatrixXd &mat, bayesmix::Matrix *const out);
22 | 
23 | //! Converts a bayesmix::Vector Protobuf object into an Eigen vector
24 | Eigen::VectorXd to_eigen(const bayesmix::Vector &vec);
25 | 
26 | //! Converts a bayesmix::Matrix Protobuf object into an Eigen matrix
27 | Eigen::MatrixXd to_eigen(const bayesmix::Matrix &mat);
28 | 
29 | //! Writes from a given file to a Protobuf object via pointer
30 | void read_proto_from_file(const std::string &filename,
31 |                           google::protobuf::Message *const out);
32 | 
33 | }  // namespace bayesmix
34 | 
35 | #endif  // BAYESMIX_UTILS_PROTO_UTILS_H_
36 | 


--------------------------------------------------------------------------------
/src/utils/rng.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_UTILS_RNG_H_
 2 | #define BAYESMIX_UTILS_RNG_H_
 3 | 
 4 | #include <random>
 5 | 
 6 | //! @file rng.h
 7 | //! The `rng.h` file defines a simple Random Number Generation class wrapper.
 8 | //! This class wraps the C++ standard RNG object and allows the use of any RNG
 9 | //! seed. It is implemented as a singleton, so that every object used in the
10 | //! library has access to the same exact RNG engine.
11 | //! This is needed to ensure that the rng stream is well defined and that every
12 | //! random number generation causes an update in the rng state.
13 | //! The main drawback is that this design does not allow for efficient
14 | //! parallelization, as calls to the Rng::Instance() from different threads
15 | //! could cause data races. A preferred solution would be to define the Rng to
16 | //! be thread-local if omp-parallelism over several cores is desired, see:
17 | //! https://stackoverflow.com/q/64937761
18 | 
19 | namespace bayesmix {
20 | class Rng {
21 |  public:
22 |   //! Returns (and creates if nonexistent) the singleton of this class
23 |   static Rng &Instance() {
24 |     static Rng s;
25 |     return s;
26 |   }
27 | 
28 |   //! Returns a reference to the underlying RNG object
29 |   std::mt19937 &get() { return mt; }
30 | 
31 |   //! Sets the RNG seed
32 |   void seed(const int seed_val) { mt.seed(seed_val); }
33 | 
34 |  private:
35 |   Rng(const int seed_val = 20201103) { mt.seed(seed_val); }
36 |   ~Rng() {}
37 |   Rng(Rng const &) = delete;
38 |   Rng &operator=(Rng const &) = delete;
39 | 
40 |   //! C++ standard library RNG object
41 |   std::mt19937 mt;
42 | };
43 | }  // namespace bayesmix
44 | 
45 | #endif  // BAYESMIX_UTILS_RNG_H_
46 | 


--------------------------------------------------------------------------------
/src/utils/testing_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAYESMIX_BENCHMARKS_UTILS_H
 2 | #define BAYESMIX_BENCHMARKS_UTILS_H
 3 | 
 4 | #include "src/includes.h"
 5 | 
 6 | std::shared_ptr<AbstractHierarchy> get_multivariate_nnw_hierarchy(int dim);
 7 | 
 8 | std::shared_ptr<AbstractHierarchy> get_univariate_nnig_hierarchy();
 9 | 
10 | std::shared_ptr<AbstractMixing> get_dirichlet_mixing();
11 | 
12 | std::shared_ptr<BaseAlgorithm> get_algorithm(const std::string& id, int dim);
13 | 
14 | Eigen::MatrixXd get_spd_matrix(int dim);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.13.0)
 2 | project(test_bayesmix)
 3 | enable_testing()
 4 | 
 5 | find_package(PkgConfig REQUIRED)
 6 | set(CMAKE_CXX_FLAGS -pthread)
 7 | 
 8 | include(FetchContent)
 9 | FetchContent_Declare(
10 |   googletest
11 |   GIT_REPOSITORY https://github.com/google/googletest.git
12 |   GIT_TAG        9c332145b71c36a5bad9688312c79184f98601ff # release-1.13
13 | )
14 | # For Windows: Prevent overriding the parent project's compiler/linker settings
15 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
16 | FetchContent_MakeAvailable(googletest)
17 | 
18 | add_executable(test_bayesmix $<TARGET_OBJECTS:bayesmix>
19 |   write_proto.cc
20 |   proto_utils.cc
21 |   likelihoods.cc
22 |   prior_models.cc
23 |   hierarchies.cc
24 |   lpdf.cc
25 |   eigen_utils.cc
26 |   distributions.cc
27 |   semi_hdp.cc
28 |   collectors.cc
29 |   runtime.cc
30 |   rng.cc
31 |   logit_sb.cc
32 |   gradient.cc
33 |   slice_sampler.cc
34 | )
35 | 
36 | target_include_directories(test_bayesmix PUBLIC ${INCLUDE_PATHS})
37 | target_link_libraries(test_bayesmix PUBLIC
38 |   ${LINK_LIBRARIES} gtest_main gtest
39 | )
40 | target_compile_options(test_bayesmix PUBLIC ${COMPILE_OPTIONS})
41 | 
42 | add_test(NAME runtest COMMAND test_bayesmix)
43 | 


--------------------------------------------------------------------------------
/test/collectors.cc:
--------------------------------------------------------------------------------
 1 | #include <gtest/gtest.h>
 2 | 
 3 | #include <stan/math/rev.hpp>
 4 | #include <vector>
 5 | 
 6 | #include "matrix.pb.h"
 7 | #include "src/collectors/file_collector.h"
 8 | #include "src/collectors/memory_collector.h"
 9 | #include "src/utils/proto_utils.h"
10 | 
11 | TEST(collectors, memory) {
12 |   MemoryCollector coll;
13 |   coll.start_collecting();
14 | 
15 |   std::vector<Eigen::VectorXd> chain(5);
16 |   for (int i = 0; i < 5; i++) {
17 |     chain[i] = Eigen::VectorXd::Ones(3) * i;
18 |     bayesmix::Vector curr;
19 |     to_proto(chain[i], &curr);
20 |     coll.collect(curr);
21 |   }
22 |   coll.finish_collecting();
23 | 
24 |   int iter = 0;
25 |   bool keep = true;
26 |   while (keep) {
27 |     bayesmix::Vector curr;
28 |     keep = coll.get_next_state(&curr);
29 |     if (!keep) {
30 |       iter--;
31 |       break;
32 |     }
33 |     ASSERT_EQ(curr.size(), 3);
34 |     ASSERT_EQ(curr.data(0), iter);
35 |     iter++;
36 |   }
37 | 
38 |   ASSERT_EQ(chain[iter](0), chain[4][0]);
39 | }
40 | 
41 | TEST(collectors, file_writing) {
42 |   FileCollector coll("test.recordio");
43 |   coll.start_collecting();
44 |   std::vector<Eigen::VectorXd> chain(5);
45 |   for (int i = 0; i < 5; i++) {
46 |     chain[i] = Eigen::VectorXd::Ones(3) * i;
47 |     bayesmix::Vector curr;
48 |     to_proto(chain[i], &curr);
49 |     coll.collect(curr);
50 |   }
51 |   coll.finish_collecting();
52 | }
53 | 
54 | TEST(collectors, file_reading) {
55 |   FileCollector coll("test.recordio");
56 |   coll.start_collecting();
57 | 
58 |   std::vector<Eigen::VectorXd> chain(5);
59 |   for (int i = 0; i < 5; i++) {
60 |     chain[i] = Eigen::VectorXd::Ones(3) * i;
61 |     bayesmix::Vector curr;
62 |     to_proto(chain[i], &curr);
63 |     coll.collect(curr);
64 |   }
65 |   coll.finish_collecting();
66 | 
67 |   FileCollector coll2("test.recordio");
68 |   int iter = 0;
69 |   bool keep = true;
70 |   while (keep) {
71 |     bayesmix::Vector curr;
72 |     keep = coll2.get_next_state(&curr);
73 |     if (!keep) {
74 |       iter--;
75 |       break;
76 |     }
77 |     ASSERT_EQ(curr.size(), 3);
78 |     ASSERT_EQ(curr.data(0), iter);
79 |     iter++;
80 |   }
81 |   ASSERT_EQ(chain[iter](0), chain[4][0]);
82 | }
83 | 


--------------------------------------------------------------------------------
/test/gradient.cc:
--------------------------------------------------------------------------------
 1 | #include <gtest/gtest.h>
 2 | 
 3 | #include <stan/math/rev.hpp>
 4 | 
 5 | class fbase {
 6 |  public:
 7 |   virtual double lpdf(const Eigen::VectorXd& x) = 0;
 8 | };
 9 | 
10 | class f1 : public fbase {
11 |  protected:
12 |   double y;
13 | 
14 |  public:
15 |   f1() = default;
16 |   f1(double y) : y(y) {}
17 | 
18 |   template <typename T>
19 |   T lpdf(const Eigen::Matrix<T, Eigen::Dynamic, 1>& x) const {
20 |     return 0.5 * x.squaredNorm() * y;
21 |   }
22 | 
23 |   double lpdf(const Eigen::Matrix<double, Eigen::Dynamic, 1>& x) {
24 |     return this->lpdf<double>(x);
25 |   }
26 | };
27 | 
28 | template <class F>
29 | struct target_lpdf {
30 |   F f;
31 | 
32 |   template <typename T>
33 |   T operator()(const Eigen::Matrix<T, Eigen::Dynamic, 1>& x) const {
34 |     return f.lpdf(x);
35 |   }
36 | };
37 | 
38 | TEST(gradient, quadratic_function) {
39 |   Eigen::VectorXd out;
40 |   Eigen::VectorXd x(5);
41 |   x << 1.0, 2.0, 3.0, 4.0, 5.0;
42 |   target_lpdf<f1> target_function;
43 |   target_function.f = f1(5.0);
44 |   double y;
45 |   stan::math::gradient(target_function, x, y, out);
46 | 
47 |   for (int i = 0; i < 5; i++) {
48 |     ASSERT_DOUBLE_EQ(out(i), 5 * x(i));
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/test/proto_utils.cc:
--------------------------------------------------------------------------------
 1 | #include "src/utils/proto_utils.h"
 2 | 
 3 | #include <gtest/gtest.h>
 4 | 
 5 | #include <stan/math/rev.hpp>
 6 | 
 7 | #include "matrix.pb.h"
 8 | 
 9 | TEST(to_proto, vector) {
10 |   Eigen::VectorXd vec = Eigen::VectorXd::Ones(5);
11 |   vec(1) = 100.0;
12 | 
13 |   bayesmix::Vector vecproto;
14 |   bayesmix::to_proto(vec, &vecproto);
15 | 
16 |   ASSERT_EQ(vecproto.size(), 5);
17 |   ASSERT_EQ(vecproto.data(0), 1.0);
18 |   ASSERT_EQ(vecproto.data(1), 100.0);
19 | }
20 | 
21 | TEST(to_proto, matrix) {
22 |   Eigen::MatrixXd mat = Eigen::MatrixXd::Identity(5, 5);
23 | 
24 |   bayesmix::Matrix matproto;
25 |   bayesmix::to_proto(mat, &matproto);
26 | 
27 |   ASSERT_EQ(matproto.rows(), 5);
28 |   ASSERT_EQ(matproto.cols(), 5);
29 |   ASSERT_EQ(matproto.data(0), 1.0);
30 |   ASSERT_EQ(matproto.data(1), 0.0);
31 | 
32 |   mat(0, 1) = 100.0;
33 |   bayesmix::to_proto(mat, &matproto);
34 |   ASSERT_EQ(matproto.data(1), 0.0);
35 |   ASSERT_EQ(matproto.data(5), 100.0);
36 | }
37 | 
38 | TEST(to_eigen, vector) {
39 |   Eigen::VectorXd vec = Eigen::VectorXd::Ones(5);
40 |   vec(1) = 100.0;
41 | 
42 |   bayesmix::Vector vecproto;
43 |   bayesmix::to_proto(vec, &vecproto);
44 | 
45 |   Eigen::VectorXd vecnew = bayesmix::to_eigen(vecproto);
46 | 
47 |   ASSERT_EQ(vecnew.size(), vec.size());
48 |   ASSERT_EQ(vecnew.sum(), vec.sum());
49 | }
50 | 
51 | TEST(to_eigen, matrix_colmajor) {
52 |   Eigen::MatrixXd mat = Eigen::MatrixXd::Identity(5, 5);
53 |   mat(0, 1) = 100.0;
54 | 
55 |   bayesmix::Matrix matproto;
56 |   bayesmix::to_proto(mat, &matproto);
57 | 
58 |   Eigen::MatrixXd matnew = bayesmix::to_eigen(matproto);
59 | 
60 |   ASSERT_EQ(matnew.rows(), 5);
61 |   ASSERT_EQ(matnew.cols(), 5);
62 |   ASSERT_EQ(matnew(0, 0), 1.0);
63 |   ASSERT_EQ(matnew(1, 0), 0.0);
64 |   ASSERT_EQ(matnew(0, 1), 100.0);
65 | }
66 | 
67 | TEST(to_eigen, matrix_rowmajor) {
68 |   Eigen::MatrixXd mat = Eigen::MatrixXd::Identity(5, 5);
69 |   mat(0, 1) = 100.0;
70 | 
71 |   bayesmix::Matrix matproto;
72 |   bayesmix::to_proto(mat, &matproto);
73 |   matproto.set_rowmajor(true);
74 | 
75 |   Eigen::MatrixXd matnew = bayesmix::to_eigen(matproto);
76 | 
77 |   ASSERT_EQ(matnew.rows(), 5);
78 |   ASSERT_EQ(matnew.cols(), 5);
79 |   ASSERT_EQ(matnew(0, 0), 1.0);
80 |   ASSERT_FALSE(matnew(0, 1) == 100.0);
81 | }
82 | 


--------------------------------------------------------------------------------
/test/runtime.cc:
--------------------------------------------------------------------------------
 1 | // Checks that all the combinations of Algorithms, Mixings and Hierarchies
 2 | // can be combined into a sampleable model.
 3 | 
 4 | #include <gtest/gtest.h>
 5 | 
 6 | #include "src/includes.h"
 7 | #include "src/utils/testing_utils.h"
 8 | 
 9 | TEST(can_build, allmodels) {
10 |   auto &factory_algo = AlgorithmFactory::Instance();
11 |   auto &factory_hier = HierarchyFactory::Instance();
12 |   auto &factory_mixing = MixingFactory::Instance();
13 | 
14 |   for (auto &algo_id : factory_algo.list_of_known_builders()) {
15 |     auto algo = factory_algo.create_object(algo_id);
16 |     for (auto &mix_id : factory_mixing.list_of_known_builders()) {
17 |       auto mix = factory_mixing.create_object(mix_id);
18 |       algo->set_mixing(mix);
19 |       for (auto &hier_id : factory_hier.list_of_known_builders()) {
20 |         auto hier = factory_hier.create_object(hier_id);
21 |         if (hier->is_conjugate() & algo->requires_conjugate_hierarchy())
22 |           algo->set_hierarchy(hier);
23 |         else if (!algo->requires_conjugate_hierarchy())
24 |           algo->set_hierarchy(hier);
25 |       }
26 |     }
27 |   }
28 | }
29 | 
30 | TEST(clone, algorithm) {
31 |   std::shared_ptr<BaseAlgorithm> algo = get_algorithm("Neal3", 2);
32 |   std::shared_ptr<BaseAlgorithm> algo_clone = algo->clone();
33 | 
34 |   algo->get_unique_values()[0]->sample_prior();
35 | 
36 |   ASSERT_FALSE(
37 |       algo->get_unique_values()[0]->get_state_proto()->DebugString() ==
38 |       algo_clone->get_unique_values()[0]->get_state_proto()->DebugString());
39 | }
40 | 


--------------------------------------------------------------------------------
/test/slice_sampler.cc:
--------------------------------------------------------------------------------
 1 | #include "src/algorithms/slice_sampler.h"
 2 | 
 3 | #include <gtest/gtest.h>
 4 | 
 5 | #include <memory>
 6 | #include <stan/math/rev.hpp>
 7 | #include <vector>
 8 | 
 9 | #include "semihdp.pb.h"
10 | #include "src/includes.h"
11 | #include "src/utils/eigen_utils.h"
12 | 
13 | class SliceSamplerTest : public SliceSampler, public ::testing::Test {
14 |  public:
15 |   std::shared_ptr<AbstractHierarchy> get_hierarchy() {
16 |     auto hier = std::make_shared<NNIGHierarchy>();
17 |     bayesmix::NNIGPrior hier_prior;
18 |     hier_prior.mutable_fixed_values()->set_mean(0.0);
19 |     hier_prior.mutable_fixed_values()->set_var_scaling(0.1);
20 |     hier_prior.mutable_fixed_values()->set_shape(2.0);
21 |     hier_prior.mutable_fixed_values()->set_scale(2.0);
22 |     hier->get_mutable_prior()->CopyFrom(hier_prior);
23 |     hier->initialize();
24 |     return hier;
25 |   }
26 | 
27 |   std::shared_ptr<TruncatedSBMixing> get_mixing(int num_components) {
28 |     auto mix = std::make_shared<TruncatedSBMixing>();
29 |     bayesmix::TruncSBPrior prior;
30 |     prior.mutable_dp_prior()->set_totalmass(2.0);
31 |     prior.set_num_components(num_components);
32 |     mix->get_mutable_prior()->CopyFrom(prior);
33 |     mix->initialize();
34 |     return mix;
35 |   }
36 | 
37 |   void setup(int num_components = 10) {
38 |     Eigen::MatrixXd data = Eigen::MatrixXd::Ones(30, 1);
39 |     auto hier = get_hierarchy();
40 |     auto mix = get_mixing(num_components);
41 |     bayesmix::AlgorithmParams algo_proto;
42 |     bayesmix::read_proto_from_file(
43 |         "../resources/benchmarks/default_algo_params.asciipb", &algo_proto);
44 |     SliceSampler algo;
45 |     read_params_from_proto(algo_proto);
46 |     set_mixing(mix);
47 |     set_hierarchy(hier);
48 |     set_data(data);
49 |   }
50 | };
51 | 
52 | TEST_F(SliceSamplerTest, initialize) {
53 |   setup();
54 |   initialize();
55 |   ASSERT_TRUE(true);
56 | }
57 | 
58 | TEST_F(SliceSamplerTest, sample_weights) {
59 |   setup(3);
60 |   initialize();
61 |   sample_slice();
62 |   sample_weights();
63 |   Eigen::VectorXd weights = mixing->get_mixing_weights(false, false);
64 |   ASSERT_GT(weights(0), 0);
65 |   ASSERT_GT(weights(1), 0);
66 |   ASSERT_LE(weights.sum(), 1.0);
67 | }
68 | 


--------------------------------------------------------------------------------
/test/write_proto.cc:
--------------------------------------------------------------------------------
 1 | #include <gtest/gtest.h>
 2 | 
 3 | #include "algorithm_state.pb.h"
 4 | #include "ls_state.pb.h"
 5 | #include "src/hierarchies/nnig_hierarchy.h"
 6 | #include "src/utils/proto_utils.h"
 7 | 
 8 | TEST(set_state, uni_ls) {
 9 |   double mean = 5;
10 |   double var = 1.0;
11 | 
12 |   bayesmix::UniLSState curr;
13 |   curr.set_mean(mean);
14 |   curr.set_var(var);
15 | 
16 |   ASSERT_EQ(curr.mean(), mean);
17 | 
18 |   bayesmix::AlgorithmState::ClusterState clusval;
19 |   clusval.mutable_uni_ls_state()->CopyFrom(curr);
20 |   NNIGHierarchy cluster;
21 |   cluster.set_state_from_proto(clusval);
22 | 
23 |   ASSERT_EQ(curr.mean(), cluster.get_state().mean);
24 | }
25 | 
26 | TEST(write_proto, uni_ls) {
27 |   double mean = 5;
28 |   double var = 1.0;
29 | 
30 |   bayesmix::UniLSState curr;
31 |   curr.set_mean(mean);
32 |   curr.set_var(var);
33 | 
34 |   bayesmix::AlgorithmState::ClusterState clusval_in;
35 |   clusval_in.mutable_uni_ls_state()->CopyFrom(curr);
36 |   NNIGHierarchy cluster;
37 |   cluster.set_state_from_proto(clusval_in);
38 | 
39 |   bayesmix::AlgorithmState out;
40 |   bayesmix::AlgorithmState::ClusterState* clusval = out.add_cluster_states();
41 |   cluster.write_state_to_proto(clusval);
42 | 
43 |   double out_mean = clusval->uni_ls_state().mean();
44 |   double out_var = clusval->uni_ls_state().var();
45 |   ASSERT_EQ(mean, out_mean);
46 |   ASSERT_EQ(var, out_var);
47 | }
48 | 


--------------------------------------------------------------------------------