├── .gitignore
├── .travis.yml
├── HACKING
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── check.sh
├── conda
    ├── meta.yaml
    └── upload.sh
├── docker
    ├── README
    ├── ubuntu1604
    └── ubuntu1604-int
├── images
    ├── linear_regression.png
    ├── one_view.png
    ├── smc.gif
    └── spherical_gaussian.png
├── pythenv.sh
├── setup.py
├── src
    ├── __init__.py
    ├── cgpm.py
    ├── crosscat
    │   ├── README.md
    │   ├── __init__.py
    │   ├── engine.py
    │   ├── loomcat.py
    │   ├── lovecat.py
    │   ├── sampling.py
    │   ├── state.py
    │   └── statedoc.py
    ├── dummy
    │   ├── README
    │   ├── __init__.py
    │   ├── barebones.py
    │   ├── fourway.py
    │   ├── piecewise.py
    │   ├── trollnormal.py
    │   └── twoway.py
    ├── factor
    │   ├── README
    │   ├── __init__.py
    │   └── factor.py
    ├── kde
    │   ├── README
    │   ├── __init__.py
    │   └── mvkde.py
    ├── knn
    │   ├── README
    │   ├── __init__.py
    │   └── mvknn.py
    ├── mixtures
    │   ├── README
    │   ├── __init__.py
    │   ├── dim.py
    │   ├── relevance.py
    │   └── view.py
    ├── network
    │   ├── README
    │   ├── __init__.py
    │   ├── helpers.py
    │   └── importance.py
    ├── primitives
    │   ├── README
    │   ├── __init__.py
    │   ├── bernoulli.py
    │   ├── beta.py
    │   ├── categorical.py
    │   ├── crp.py
    │   ├── distribution.py
    │   ├── exponential.py
    │   ├── geometric.py
    │   ├── lognormal.py
    │   ├── normal.py
    │   ├── normal_trunc.py
    │   ├── poisson.py
    │   └── vonmises.py
    ├── regressions
    │   ├── README
    │   ├── __init__.py
    │   ├── forest.py
    │   ├── linreg.py
    │   └── ols.py
    ├── uncorrelated
    │   ├── README
    │   ├── __init__.py
    │   ├── diamond.py
    │   ├── directed.py
    │   ├── dots.py
    │   ├── linear.py
    │   ├── parabola.py
    │   ├── ring.py
    │   ├── sin.py
    │   ├── undirected.py
    │   ├── uniformx.py
    │   └── xcross.py
    ├── utils
    │   ├── __init__.py
    │   ├── config.py
    │   ├── data.py
    │   ├── entropy_estimators.py
    │   ├── general.py
    │   ├── mvnormal.py
    │   ├── parallel_map.py
    │   ├── plots.py
    │   ├── render.py
    │   ├── sampling.py
    │   ├── test.py
    │   ├── timer.py
    │   └── validation.py
    └── venturescript
    │   ├── __init__.py
    │   ├── vscgpm.py
    │   └── vsinline.py
└── tests
    ├── __init__.py
    ├── conftest.py
    ├── disabled_test_loomcat.py
    ├── disabled_test_render_utils.py
    ├── disabled_test_simulate_univariate.py
    ├── disabled_test_uncorrelated_simulate.py
    ├── disabled_test_vsinline_determinism.py
    ├── graphical
        ├── animals.py
        ├── depprob_id.py
        ├── dpmm_nignormal.py
        ├── one_view.py
        ├── recover.py
        ├── resources
        │   └── satellites.csv
        ├── slice.py
        ├── two_views.py
        └── zero_corr.py
    ├── hacks.py
    ├── markers.py
    ├── stochastic.py
    ├── test_add_state.py
    ├── test_bernoulli.py
    ├── test_check_env_debug.py
    ├── test_cmi.py
    ├── test_cmi_partition.py
    ├── test_constr_crp.py
    ├── test_crp.py
    ├── test_dependence_constraints.py
    ├── test_dependence_probability.py
    ├── test_diagnostics.py
    ├── test_direct_network.py
    ├── test_engine_alter.py
    ├── test_engine_dimensions.py
    ├── test_engine_seed.py
    ├── test_exponentials_transition_hypers.py
    ├── test_factor_analysis.py
    ├── test_force_cell.py
    ├── test_forest.py
    ├── test_get_vqe.py
    ├── test_gpmcc_simple_composite.py
    ├── test_importance_helpers.py
    ├── test_impossible_evidence.py
    ├── test_incorporate_dim.py
    ├── test_incorporate_row.py
    ├── test_iter_counter.py
    ├── test_linreg.py
    ├── test_linreg_mixture.py
    ├── test_logpdf_score.py
    ├── test_logsumexp.py
    ├── test_lovecat.py
    ├── test_lw_rf.py
    ├── test_mvkde.py
    ├── test_mvknn.py
    ├── test_normal_categorical.py
    ├── test_ols.py
    ├── test_piecewise.py
    ├── test_populate_evidence.py
    ├── test_relevance.py
    ├── test_row_similarity.py
    ├── test_serialize.py
    ├── test_simulate_many_decorator.py
    ├── test_state_initialize.py
    ├── test_stochastic.py
    ├── test_teh_murphy.py
    ├── test_type_check.py
    ├── test_update_cctype.py
    ├── test_view_logpdf_cluster.py
    ├── test_vscgpm.py
    └── test_vsinline.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | bin/
12 | build/
13 | develop-eggs/
14 | dist/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # Installer logs
26 | pip-log.txt
27 | pip-delete-this-directory.txt
28 | 
29 | # Unit test / coverage reports
30 | htmlcov/
31 | .tox/
32 | .coverage
33 | .cache
34 | nosetests.xml
35 | coverage.xml
36 | 
37 | # Translations
38 | *.mo
39 | 
40 | # Mr Developer
41 | .mr.developer.cfg
42 | .project
43 | .pydevproject
44 | 
45 | # Rope
46 | .ropeproject
47 | 
48 | # Django stuff:
49 | *.log
50 | *.pot
51 | 
52 | # Sphinx documentation
53 | docs/_build/
54 | 
55 | # gpmcc artifacts
56 | *.engine*
57 | *.bak*
58 | 
59 | # numpy binaries
60 | *.npy
61 | 
62 | # latex artifacts
63 | *.aux
64 | *.bbl
65 | *.blg
66 | *.fdb_latexmk
67 | *.fls
68 | *.synctex.gz
69 | *_minted-*
70 | *.pdf
71 | 
72 | # specific directories
73 | examples/malawi/resources/
74 | examples/satellites/splits/
75 | examples/swallows/resources/
76 | examples/swallows/splits/
77 | examples/swallows/univariate.py
78 | experiments/resources/
79 | experiments/*/resources/
80 | cgpm
81 | src/debug
82 | src/venturescript/resources
83 | notes/gpm/vs/vsgpm.pdf
84 | tests/resources
85 | /src/version.py
86 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | dist: trusty
 3 | env:
 4 |   global:
 5 |     # get all the branches referencing this commit
 6 |     - REAL_BRANCH=$(git ls-remote origin | sed -n "\|$TRAVIS_COMMIT\s\+refs/heads/|{s///p}")
 7 | 
 8 | python:
 9 |   - 2.7
10 | install:
11 |   - wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh
12 |   - bash miniconda.sh -b -p $HOME/miniconda
13 |   - export PATH="$HOME/miniconda/bin:$PATH"
14 |   - hash -r
15 |   - conda config --set always_yes yes --set changeps1 no
16 |   - conda update -q conda
17 |   - conda install -q conda=4.6.14 conda-build=3.13.0
18 | before_script:
19 |   - gem install travis
20 | script:
21 |   - export CONDA_PACKAGE_VERSION="${TRAVIS_TAG:-$(date +%Y.%m.%d)}"
22 |   # remove leading v from tags if they exist
23 |   - CONDA_PACKAGE_VERSION="$(sed s/^v// <<<$CONDA_PACKAGE_VERSION)"
24 |   - conda build . -c probcomp -c anaconda
25 | after_success:
26 |   - bash conda/upload.sh
27 | 


--------------------------------------------------------------------------------
/HACKING:
--------------------------------------------------------------------------------
 1 | Notes For cgpm Hackers
 2 | 
 3 | * Branches
 4 | 
 5 | Should be [year][month][day]-[username]-[branch-description...], for example a
 6 | well named branch is 20160621-fsaad-update-hacking.
 7 | 
 8 | Please do not delete branches, history should be maintained.
 9 | 
10 | * Python coding style
11 | 
12 | Generally follow PEP 8, 80-char max, with these exceptions:
13 | 
14 | - New line, instead of alignment, for continuation lines. For function
15 | definitions, no new line and use eight spaces.
16 | 
17 |     Example: Yes
18 |     model = cgpm.crosscat.state.State(
19 |         X, outputs=[1,2,3,4], inputs=None,
20 |         cctypes=['normal', 'bernoulli', 'poisson', 'lognormal'],
21 |         rng=np.random.RandomState(0))
22 | 
23 |     Example: No
24 |     model = cgpm.crosscat.state.State(X, outputs=[1,2,3], inputs=None,
25 |                                       cctypes=['normal', 'bernoulli', 'poisson',
26 |                                         'lognormal'],
27 |                                       rng=np.random.RandomState(0))
28 | 
29 |     Example: Yes, Preferred
30 |     def generate_mh_sample(
31 |             x, logpdf_target, jump_std, D, num_samples=1,
32 |             num_burn=1, num_chains=7 num_lag=1, rng=None):
33 |         ...body...
34 | 
35 |     Example: Yes
36 |     def generate_mh_sample(x, logpdf_target, jump_std, D, num_samples=1,
37 |             num_burn=1, num_chains=7 num_lag=1, rng=None):
38 |         ...body...
39 | 
40 |     Example: No
41 |     def generate_mh_sample(x, logpdf_target, jump_std, D, num_samples=1,
42 |                            num_burn=1, num_chains=7 num_lag=1, rng=None):
43 |         ...body...
44 | 
45 | - Generally use single-quoted strings, except use """ for doc strings.
46 | 
47 | * Python imports
48 | 
49 | Should be organized as follows:
50 | 
51 |     [standard library imports]
52 |     - blank line -
53 |     [third-party library imports]
54 |     - blank line -
55 |     [sister projects library imports]
56 |     - blank line -
57 |     [cgpm module imports]
58 | 
59 | Each import block should be organized alphabetically: first all unqualified
60 | imports (import baz), then all named imports (from foo import nix).
61 | For example
62 | 
63 | import math
64 | import multiprocessing as mp
65 | 
66 | from array import ArrayType
67 | from struct import pack
68 | 
69 | import numpy as np
70 | 
71 | from scipy.misc import logsumexp
72 | from scipy.stats import geom
73 | from scipy.stats import norm
74 | 
75 | import bayeslite.core
76 | import bayeslite.math_util
77 | 
78 | from gpmcc.crosscat.state import State
79 | from gpmcc.utils import general as gu
80 | 
81 | * Testing
82 | 
83 | The tip of every branch merged into master __must__ pass ./check.sh, and be
84 | consistent with the code conventions here. New functionality must always be
85 | associated with test -- fixing bugs should preferably include a test as well
86 | (less strict).
87 | 
88 | * Entropy
89 | 
90 | Please, never, ever used global random state. Every source of random bits must
91 | be managed explicitly.
92 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include LICENSE.txt
 2 | include README.md
 3 | include check.sh
 4 | include pythenv.sh
 5 | include tests/graphical/animals.py
 6 | include tests/graphical/depprob_id.py
 7 | include tests/graphical/dpmm_nignormal.py
 8 | include tests/graphical/one_view.py
 9 | include tests/graphical/recover.py
10 | include tests/graphical/resources/satellites.csv
11 | include tests/graphical/slice.py
12 | include tests/graphical/two_views.py
13 | include tests/graphical/zero_corr.py
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # cgpm
  2 | 
  3 | [![Build Status](https://travis-ci.org/probcomp/cgpm.svg?branch=master)](https://travis-ci.org/probcomp/cgpm)
  4 | 
  5 | The aim of this project is to provide a unified probabilistic programming
  6 | framework to express different models and techniques from statistics, machine
  7 | learning and non-parametric Bayes. It serves as the primary modeling and
  8 | inference runtime system for [bayeslite](https://github.com/probcomp/bayeslite),
  9 | an open-source implementation of BayesDB.
 10 | 
 11 | Composable generative population models (CGPM) are a computational abstraction
 12 | for probabilistic objects. They provide an interface that explicitly
 13 | differentiates between the _sampler_ of a random variable from its conditional
 14 | distribution and the _assessor_ of its conditional density. By encapsulating
 15 | models as probabilistic programs that implement CGPMs, complex models can be
 16 | built as compositions of sub-CGPMs, and queried in a model-independent way
 17 | using the Bayesian Query Language.
 18 | 
 19 | ## Installing
 20 | 
 21 | ### Conda
 22 | 
 23 | The easiest way to install cgpm is to use the
 24 | [package](https://anaconda.org/probcomp/cgpm) on Anaconda Cloud.
 25 | Please follow [these instructions](https://github.com/probcomp/iventure/blob/master/docs/conda.md).
 26 | 
 27 | ### Manual Build
 28 | 
 29 | `cgpm` targets Ubuntu 14.04 and 16.04. The package can be installed by cloning
 30 | this repository and following these instructions. It is _highly recommended_ to
 31 | install `cgpm` inside of a virtualenv which was created using the
 32 | `--system-site-packages` flag.
 33 | 
 34 | 1. Install dependencies from `apt`, [listed here](https://github.com/probcomp/cgpm/blob/71fe62790f466e9dd2149d0f527c584cce19e70f/docker/ubuntu1604#L4-L14).
 35 | 
 36 | 2. Retrieve and build the source.
 37 | 
 38 |     ```
 39 |     % git clone git@github.com:probcomp/cgpm
 40 |     % cd cgpm
 41 |     % pip install --no-deps .
 42 |     ```
 43 | 
 44 | 3. Verify the installation.
 45 | 
 46 |     ```
 47 |     % python -c 'import cgpm'
 48 |     % cd cgpm && ./check.sh
 49 |     ```
 50 | 
 51 | ## Publications
 52 | 
 53 | CGPMs, and their integration as a runtime system for
 54 | [BayesDB](probcomp.csail.mit.edu/bayesdb/), are described in the following
 55 | technical report:
 56 | 
 57 | - __Probabilistic Data Analysis with Probabilistic Programming__.
 58 | Saad, F., and Mansinghka, V. [_arXiv preprint, arXiv:1608.05347_](https://arxiv.org/abs/1608.05347), 2017.
 59 | 
 60 | Applications of using cgpm and bayeslite for data analysis tasks can be further
 61 | found in:
 62 | 
 63 | - __Probabilistic Search for Structured Data via Probabilistic Programming and Nonparametric Bayes__.
 64 | Saad, F. Casarsa, L., and Mansinghka, V. [_arXiv preprint, arXiv:1704.01087_](https://arxiv.org/abs/1704.01087), 2017.
 65 | 
 66 | - __Detecting Dependencies in Sparse, Multivariate Databases Using Probabilistic Programming and Non-parametric Bayes__.
 67 | Saad, F., and Mansinghka, V. [_Artificial Intelligence and Statistics (AISTATS)_](http://proceedings.mlr.press/v54/saad17a.html), 2017.
 68 | 
 69 | - __A Probabilistic Programming Approach to Probabilistic Data Analysis__.
 70 | Saad, F., and Mansinghka, V. [_Advances in Neural Information Processing Systems (NIPS)_](https://papers.nips.cc/paper/6060-a-probabilistic-programming-approach-to-probabilistic-data-analysis.html), 2016.
 71 | 
 72 | 
 73 | ## Tests
 74 | 
 75 | Running `./check.sh` will run a subset of the tests that are considered complete
 76 | and stable. To launch the full test suite, including continuous integration
 77 | tests, run `py.test` in the root directory. There are more tests in the `tests/`
 78 | directory, but those that do not start with `test_` or do start with `disabled_`
 79 | are not considered ready. The tip of every branch merged into master __must__
 80 | pass `./check.sh`, and be consistent with the code conventions outlined in
 81 | [HACKING](HACKING).
 82 | 
 83 | To run the full test suite, use `./check.sh --integration tests/`. Note that the
 84 | full integration test suite requires installing the C++
 85 | [crosscat](https://github.com/probcomp/crosscat) backend.
 86 | 
 87 | ## License
 88 | 
 89 | Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 90 | 
 91 | Licensed under the Apache License, Version 2.0 (the "License");
 92 | you may not use this file except in compliance with the License.
 93 | You may obtain a copy of the License at
 94 | 
 95 |    http://www.apache.org/licenses/LICENSE-2.0
 96 | 
 97 | Unless required by applicable law or agreed to in writing, software
 98 | distributed under the License is distributed on an "AS IS" BASIS,
 99 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
100 | See the License for the specific language governing permissions and
101 | limitations under the License.
102 | 


--------------------------------------------------------------------------------
/check.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -Ceu
 4 | 
 5 | : ${PYTHON:=python}
 6 | 
 7 | root=`cd -- "$(dirname -- "$0")" && pwd`
 8 | 
 9 | (
10 |     set -Ceu
11 |     cd -- "${root}"
12 |     rm -rf build
13 |     "$PYTHON" setup.py build
14 |     if [ $# -eq 0 ]; then
15 |         # By default, when running all tests, skip tests that have
16 |         # been marked for continuous integration by using __ci_ in
17 |         # their names.  (git grep __ci_ to find these.)
18 |         ./pythenv.sh "$PYTHON" -m pytest --pyargs cgpm -k "not __ci_"
19 |     else
20 |         # If args are specified, run all tests, including continuous
21 |         # integration tests, for the selected components.
22 |         ./pythenv.sh "$PYTHON" -m pytest "$@"
23 |     fi
24 | )
25 | 


--------------------------------------------------------------------------------
/conda/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |   name: cgpm
 3 |   version: {{ CONDA_PACKAGE_VERSION }}
 4 | 
 5 | source:
 6 |   path: ../
 7 | 
 8 | build:
 9 |   script: python setup.py install
10 | 
11 | requirements:
12 |   build:
13 |     - git
14 |     - matplotlib 1.5.*
15 |     - numpy 1.11.*
16 |     - python 2.7.*
17 |   run:
18 |     - nomkl
19 |     - matplotlib 1.5.*
20 |     - numpy 1.11.*
21 |     - pandas 0.18.*
22 |     - python 2.7.*
23 |     - scikit-learn 0.17.*
24 |     - scipy 0.17.*
25 |     - statsmodels 0.6.*
26 | 
27 | test:
28 |   requires:
29 |     - matplotlib 1.5.*
30 |     - pytest 2.8.*
31 |     - python 2.7.*
32 |   imports:
33 |     - cgpm
34 |   commands:
35 |     - python -m pytest --pyargs cgpm
36 | 
37 | about:
38 |   home: https://github.com/probcomp/cgpm
39 |   license: Apache
40 |   license_file: LICENSE.txt
41 | 


--------------------------------------------------------------------------------
/conda/upload.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -ev
 3 | 
 4 | # fyi, the logic below is necessary due to the fact that on a tagged build, TRAVIS_BRANCH and TRAVIS_TAG are the same
 5 | # in the case of a tagged build, use the REAL_BRANCH env var defined in travis.yml
 6 | if [ -n "${TRAVIS_TAG}" ]; then
 7 |   conda install anaconda-client
 8 |   # if tag didn't come from master, add the "dev" label
 9 |   if [ ${REAL_BRANCH} = "master" ]; then
10 |     anaconda -t ${CONDA_UPLOAD_TOKEN} upload -u ${CONDA_USER} ~/miniconda/conda-bld/linux-64/cgpm-*.tar.bz2 --force
11 |   else
12 |     anaconda -t ${CONDA_UPLOAD_TOKEN} upload -u ${CONDA_USER} -l dev ~/miniconda/conda-bld/linux-64/cgpm-*.tar.bz2 --force
13 |   fi
14 | elif [ ${TRAVIS_BRANCH} = "master" ]; then
15 |   if [ ${TRAVIS_EVENT_TYPE} = "cron" ]; then
16 |     # don't build package for nightly cron.. this is just for test stability info
17 |     exit 0
18 |   else
19 |     conda install anaconda-client
20 |     anaconda -t ${CONDA_UPLOAD_TOKEN} upload -u ${CONDA_USER} -l edge ~/miniconda/conda-bld/linux-64/cgpm-*.tar.bz2 --force
21 |     # trigger a downstream bayeslite build using the edge package
22 |     curl -LO https://raw.github.com/stephanmg/travis-dependent-builds/master/trigger.sh
23 |     bash trigger.sh probcomp bayeslite master $TRAVIS_ACCESS_TOKEN | grep -v TOKEN
24 |   fi
25 | else
26 |   exit 0
27 | fi
28 | 


--------------------------------------------------------------------------------
/docker/README:
--------------------------------------------------------------------------------
1 | This directory contains Dockerfiles that build and install cgpm.
2 | 
3 | Note that the file ubuntu1404-int is used as part of an integration test on the
4 | probcomp Jenkins test server at https://probcomp-4.csail.mit.edu. This file is
5 | not appropriate for being run in isolation.
6 | 


--------------------------------------------------------------------------------
/docker/ubuntu1604:
--------------------------------------------------------------------------------
 1 | FROM            ubuntu:16.04
 2 | MAINTAINER      MIT Probabilistic Computing Project
 3 | 
 4 | RUN             apt-get update -qq \
 5 |                 && apt-get install -qq -y \
 6 |                     git \
 7 |                     python-matplotlib \
 8 |                     python-numpy \
 9 |                     python-pandas \
10 |                     python-pytest \
11 |                     python-scipy \
12 |                     python-sklearn \
13 |                     python-statsmodels
14 | 
15 | ADD             . /cgpm
16 | WORKDIR         /cgpm
17 | RUN             ./check.sh tests
18 | RUN             python setup.py sdist
19 | RUN             python setup.py bdist
20 | 


--------------------------------------------------------------------------------
/docker/ubuntu1604-int:
--------------------------------------------------------------------------------
 1 | # Integration tests.
 2 | 
 3 | FROM            ubuntu:16.04
 4 | MAINTAINER      MIT Probabilistic Computing Project
 5 | 
 6 | RUN             apt-get update -qq \
 7 |                 && apt-get upgrade -qq \
 8 |                 && apt-get install -y \
 9 |                     build-essential \
10 |                     ccache \
11 |                     git \
12 |                     libboost-all-dev \
13 |                     libgsl0-dev \
14 |                     python-flask \
15 |                     python-jsonschema \
16 |                     python-matplotlib \
17 |                     python-nose \
18 |                     python-nose-testconfig \
19 |                     python-numpy \
20 |                     python-pandas \
21 |                     python-pexpect \
22 |                     python-pytest \
23 |                     python-requests \
24 |                     python-scipy \
25 |                     python-six \
26 |                     python-sklearn \
27 |                     python-statsmodels
28 | 
29 | ADD             . /cgpm
30 | WORKDIR         /cgpm
31 | RUN \
32 |                 ./docker/deps/bayeslite-apsw/pythenv.sh \
33 |                 ./docker/deps/bayeslite/pythenv.sh \
34 |                 ./docker/deps/crosscat/pythenv.sh \
35 |                 ./docker/deps/venture/pythenv.sh \
36 |                 ./check.sh --integration tests
37 | RUN             python setup.py sdist
38 | RUN             python setup.py bdist
39 | 


--------------------------------------------------------------------------------
/images/linear_regression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/probcomp/cgpm/56a481829448bddc9cdfebd42f65023287d5b7c7/images/linear_regression.png


--------------------------------------------------------------------------------
/images/one_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/probcomp/cgpm/56a481829448bddc9cdfebd42f65023287d5b7c7/images/one_view.png


--------------------------------------------------------------------------------
/images/smc.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/probcomp/cgpm/56a481829448bddc9cdfebd42f65023287d5b7c7/images/smc.gif


--------------------------------------------------------------------------------
/images/spherical_gaussian.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/probcomp/cgpm/56a481829448bddc9cdfebd42f65023287d5b7c7/images/spherical_gaussian.png


--------------------------------------------------------------------------------
/pythenv.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -Ceu
 4 | 
 5 | : ${PYTHON:=python}
 6 | root=`cd -- "$(dirname -- "$0")" && pwd`
 7 | platform=`"${PYTHON}" -c 'import distutils.util as u; print u.get_platform()'`
 8 | version=`"${PYTHON}" -c 'import sys; print sys.version[0:3]'`
 9 | 
10 | # The lib directory varies depending on
11 | #
12 | # (a) whether there are extension modules (here, no); and
13 | # (b) whether some Debian maintainer decided to patch the local Python
14 | # to behave as though there were.
15 | #
16 | # But there's no obvious way to just ask distutils what the name will
17 | # be.  There's no harm in naming a pathname that doesn't exist, other
18 | # than a handful of microseconds of runtime, so we'll add both.
19 | libdir="${root}/build/lib"
20 | plat_libdir="${libdir}.${platform}-${version}"
21 | export PYTHONPATH="${libdir}:${plat_libdir}${PYTHONPATH:+:${PYTHONPATH}}"
22 | 
23 | bindir="${root}/build/scripts-${version}"
24 | export PATH="${bindir}${PATH:+:${PATH}}"
25 | 
26 | exec "$@"
27 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from .version import __version__
18 | 


--------------------------------------------------------------------------------
/src/crosscat/README.md:
--------------------------------------------------------------------------------
 1 | # gpmcc
 2 | 
 3 | Implementation of [crosscat](http://probcomp.csail.mit.edu/crosscat/) from
 4 | the lens of generative population models (GPMs). The goal is to express the
 5 | hierarchial generative process that defines crosscat as a composition of
 6 | modules that follow the GPM interface.
 7 | 
 8 | ## Research Goals
 9 | 
10 | gpmcc aims to implement all the features that exist in current crosscat
11 | implementations, as well as new features not available in vanilla crosscat.
12 | Key ideas on the development roadmap are:
13 | 
14 | - Interface that permits key constructs of the Metamodeling Language (MML),
15 |   such as:
16 |   - Suggesting column dependencies.
17 |   - Suggesting row dependencies, with respect to a subset of columns.
18 |   - Sequential incorporate/unincorporate of (partial) observations
19 |     interleaved with analysis.
20 |   - Targeted analysis over the crosscat inference kernels.
21 |   - Column-specific datatype constraints (`REAL`, `POSITIVE`,
22 |   `IN-RANGE(min,max)`, `CATEGORICAL`, `ORDINAL`, etc).
23 | 
24 | - Sequential Monte Carlo (SMC) implementation of the posterior inference
25 |   algorithm described in [Mansinghka, et
26 |   al.](http://arxiv.org/pdf/1512.01272.pdf) Section 2.4, as opposed to
27 |   observe-all then Gibbs forever.
28 | 
29 | - Interface for the Bayesian Query Language (BQL) and
30 |   [bayeslite](https://github.com/probcomp/bayeslite) integration, with new
31 |   BQL additions such as:
32 |   - Conditional mutual information.
33 |   - KL-divergence of predictive distribution against synthetic GPMs.
34 |   - Marginal likelihood estimates of real datasets.
35 | 
36 | - Interface for foreign GPMs that are jointly analyzed with crosscat.
37 |   Current implementations only allow foreign GPMs to be composed at query,
38 |   not analysis, time.
39 | 
40 | - Subsampling, where each model is responsible for a subset of data from an
41 |   overlapping partition of the overall dataset.
42 | 
43 | - Multiprocessing for analysis. Distributed?
44 | 
45 | - Several DistributionGpms for the different MML data-types, not just
46 |   Normal and Multinomial.
47 | 
48 | ## Static Example
49 | 
50 | The simplest example is creating a synthetic dataset where each variable is a
51 | mixture of one of the available DistributionGpms. Inference is run using, an
52 | extended implementation of CrossCat from the lens of compositions of composable
53 | generative population models.
54 | 
55 | ```
56 | $ python -i tests/graphical/one_view.py
57 | ```
58 | 
59 | A plot similar to ![images/one_view.png](images/one_view.png) should appear.
60 | 
61 | ## Interactive Example (Experimental)
62 | 
63 | Single-particle SMC in a DP Mixture with Normal-InverseGamma base measure and
64 | normal obervations can be run interactively:
65 | 
66 | ```
67 | $ python -i tests/graphical/dpmm_nignormal.py
68 | ```
69 | 
70 | Click on the graph to produce observations and watch, the Gibbs kernel cycle
71 | through the hypothesis space
72 | 
73 | ![images/smc.gif](images/smc.gif)
74 | 
75 | The values printed in the console after each click are estimates of the
76 | marginal-log-likelihood of observations, based on the single particle
77 | weight. The following output
78 | 
79 | ```
80 | Observation 8.000000: 0.209677
81 | [-8.0740236375201153]
82 | ```
83 | 
84 | means the eighth observation is 0.209677, and the estimated marginal
85 | log-liklelihood is -8.0740236375201153.
86 | 


--------------------------------------------------------------------------------
/src/crosscat/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 


--------------------------------------------------------------------------------
/src/crosscat/sampling.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | '''
 18 | This module contains implementations of simulate and logpdf specialized to
 19 | cgpm.crosscat, avoiding overhead of recursive implementations using the
 20 | importance network on the sub-cgpms that comprise cgpm.crosscat.State.
 21 | '''
 22 | 
 23 | from itertools import chain
 24 | 
 25 | import numpy as np
 26 | 
 27 | from cgpm.primitives.crp import Crp
 28 | 
 29 | from cgpm.utils.general import log_normalize
 30 | from cgpm.utils.general import log_pflip
 31 | from cgpm.utils.general import logsumexp
 32 | from cgpm.utils.general import merged
 33 | 
 34 | from cgpm.utils.validation import partition_query_evidence
 35 | 
 36 | 
 37 | def state_logpdf(state, rowid, targets, constraints=None):
 38 |     targets_lookup, constraints_lookup = partition_query_evidence(
 39 |         state.Zv(), targets, constraints)
 40 |     logps = (
 41 |         view_logpdf(
 42 |             view=state.views[v],
 43 |             rowid=rowid,
 44 |             targets=targets_lookup[v],
 45 |             constraints=constraints_lookup.get(v, {})
 46 |         )
 47 |         for v in targets_lookup
 48 |     )
 49 |     return sum(logps)
 50 | 
 51 | 
 52 | def state_simulate(state, rowid, targets, constraints=None, N=None):
 53 |     targets_lookup, constraints_lookup = partition_query_evidence(
 54 |         state.Zv(), targets, constraints)
 55 |     N_sim = N if N is not None else 1
 56 |     draws = (
 57 |         view_simulate(
 58 |             view=state.views[v],
 59 |             rowid=rowid,
 60 |             targets=targets_lookup[v],
 61 |             constraints=constraints_lookup.get(v, {}),
 62 |             N=N_sim
 63 |         )
 64 |         for v in targets_lookup
 65 |     )
 66 |     samples = [merged(*l) for l in zip(*draws)]
 67 |     return samples if N is not None else samples[0]
 68 | 
 69 | 
 70 | def view_logpdf(view, rowid, targets, constraints):
 71 |     if not view.hypothetical(rowid):
 72 |         return _logpdf_row(view, targets, view.Zr(rowid))
 73 |     Nk = view.Nk()
 74 |     N_rows = len(view.Zr())
 75 |     K = view.crp.clusters[0].gibbs_tables(-1)
 76 |     lp_crp = [Crp.calc_predictive_logp(k, N_rows, Nk, view.alpha()) for k in K]
 77 |     lp_constraints = [_logpdf_row(view, constraints, k) for k in K]
 78 |     if all(np.isinf(lp_constraints)):
 79 |         raise ValueError('Zero density constraints: %s' % (constraints,))
 80 |     lp_cluster = log_normalize(np.add(lp_crp, lp_constraints))
 81 |     lp_targets = [_logpdf_row(view, targets, k) for k in K]
 82 |     return logsumexp(np.add(lp_cluster, lp_targets))
 83 | 
 84 | 
 85 | def view_simulate(view, rowid, targets, constraints, N):
 86 |     if not view.hypothetical(rowid):
 87 |         return _simulate_row(view, targets, view.Zr(rowid), N)
 88 |     Nk = view.Nk()
 89 |     N_rows = len(view.Zr())
 90 |     K = view.crp.clusters[0].gibbs_tables(-1)
 91 |     lp_crp = [Crp.calc_predictive_logp(k, N_rows, Nk, view.alpha()) for k in K]
 92 |     lp_constraints = [_logpdf_row(view, constraints, k) for k in K]
 93 |     if all(np.isinf(lp_constraints)):
 94 |         raise ValueError('Zero density constraints: %s' % (constraints,))
 95 |     lp_cluster = np.add(lp_crp, lp_constraints)
 96 |     ks = log_pflip(lp_cluster, array=K, size=N, rng=view.rng)
 97 |     counts = {k:n for k,n in enumerate(np.bincount(ks)) if n > 0}
 98 |     samples = (_simulate_row(view, targets, k, counts[k]) for k in counts)
 99 |     return chain.from_iterable(samples)
100 | 
101 | 
102 | def _logpdf_row(view, targets, cluster):
103 |     """Return joint density of the targets in a fixed cluster."""
104 |     return sum(
105 |         view.dims[c].logpdf(None, {c:x}, None, {view.outputs[0]: cluster})
106 |         for c, x in targets.iteritems()
107 |     )
108 | 
109 | 
110 | def _simulate_row(view, targets, cluster, N):
111 |     """Return sample of the targets in a fixed cluster."""
112 |     samples = (
113 |         view.dims[c].simulate(None, [c], None, {view.outputs[0]: cluster}, N)
114 |         for c in targets
115 |     )
116 |     return (merged(*l) for l in zip(*samples))
117 | 


--------------------------------------------------------------------------------
/src/dummy/README:
--------------------------------------------------------------------------------
1 | These CGPMs are dummies, used mainly for test and exploratory purposes.
2 | 
3 | 


--------------------------------------------------------------------------------
/src/dummy/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 


--------------------------------------------------------------------------------
/src/dummy/barebones.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | """A barebones cgpm only outputs, inputs, serialization, and not much else."""
18 | 
19 | from cgpm.cgpm import CGpm
20 | 
21 | class BareBonesCGpm(CGpm):
22 | 
23 |     def __init__(self, outputs, inputs, distargs=None, rng=None):
24 |         self.outputs = outputs
25 |         self.inputs = inputs
26 | 
27 |     def incorporate(self, rowid, observation, inputs=None):
28 |         return
29 | 
30 |     def unincorporate(self, rowid):
31 |         return
32 | 
33 |     def logpdf(self, rowid, targets, constraints=None, inputs=None):
34 |         return 0
35 | 
36 |     def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
37 |         result = {i:1 for i in self.outputs}
38 |         return result if N is None else [result] * N
39 | 
40 |     def transition(self, **kwargs):
41 |         return
42 | 
43 |     def to_metadata(self):
44 |         metadata = dict()
45 |         metadata['outputs'] = self.outputs
46 |         metadata['inputs'] = self.inputs
47 |         metadata['factory'] = ('cgpm.dummy.barebones', 'BareBonesCGpm')
48 |         return metadata
49 | 
50 |     @classmethod
51 |     def from_metadata(cls, metadata, rng=None):
52 |         return cls(metadata['outputs'], metadata['inputs'])
53 | 


--------------------------------------------------------------------------------
/src/dummy/fourway.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import time
 18 | 
 19 | import numpy as np
 20 | 
 21 | from cgpm.cgpm import CGpm
 22 | from cgpm.utils import general as gu
 23 | 
 24 | 
 25 | class FourWay(CGpm):
 26 |     """Outputs categorical(4) (quadrant indicator) on R2 valued input."""
 27 | 
 28 |     def __init__(self, outputs, inputs, distargs=None, rng=None):
 29 |         if rng is None:
 30 |             rng = gu.gen_rng(1)
 31 |         self.rng = rng
 32 |         self.probabilities =[
 33 |             [.70, .10, .05, .05],
 34 |             [.10, .80, .10, .10],
 35 |             [.10, .15, .65, .10],
 36 |             [.10, .05, .10, .75],
 37 |         ]
 38 |         assert len(outputs) == 1
 39 |         assert len(inputs) == 2
 40 |         self.outputs = list(outputs)
 41 |         self.inputs = list(inputs)
 42 | 
 43 |     def incorporate(self, rowid, observation, inputs=None):
 44 |         return
 45 | 
 46 |     def unincorporate(self, rowid):
 47 |         return
 48 | 
 49 |     @gu.simulate_many
 50 |     def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
 51 |         regime = self.lookup_quadrant(
 52 |             inputs[self.inputs[0]],
 53 |             inputs[self.inputs[1]],
 54 |         )
 55 |         x = gu.pflip(self.probabilities[regime], rng=self.rng)
 56 |         return {self.outputs[0]: x}
 57 | 
 58 |     def logpdf(self, rowid, targets, constraints=None, inputs=None):
 59 |         x = targets[self.outputs[0]]
 60 |         if not (0 <= x <= 3):
 61 |             return -float('inf')
 62 |         regime = self.lookup_quadrant(
 63 |             inputs[self.inputs[0]],
 64 |             inputs[self.inputs[1]],
 65 |         )
 66 |         return np.log(self.probabilities[regime][x])
 67 | 
 68 |     def transition(self, N=None, S=None):
 69 |         time.sleep(.1)
 70 | 
 71 |     @staticmethod
 72 |     def lookup_quadrant(y0, y1):
 73 |         if y0 >= 0 and y1 >= 0: return 0
 74 |         if y0 <= 0 and y1 >= 0: return 1
 75 |         if y0 >= 0 and y1 <= 0: return 2
 76 |         if y0 <= 0 and y1 <= 0: return 3
 77 |         raise ValueError('Invalid value: %s' % str((y0, y1)))
 78 | 
 79 |     @staticmethod
 80 |     def retrieve_y_for_x(x):
 81 |         if x == 0: return [2, 2]
 82 |         if x == 1: return [-2, 2]
 83 |         if x == 2: return [2, -2]
 84 |         if x == 3: return [-2, -2]
 85 |         raise ValueError('Invalid value: %s' % str(x))
 86 | 
 87 |     def to_metadata(self):
 88 |         metadata = dict()
 89 |         metadata['outputs'] = self.outputs
 90 |         metadata['inputs'] = self.inputs
 91 |         metadata['factory'] = ('cgpm.dummy.fourway', 'FourWay')
 92 |         return metadata
 93 | 
 94 |     @classmethod
 95 |     def from_metadata(cls, metadata, rng=None):
 96 |         if rng is None: rng = gu.gen_rng(0)
 97 |         return cls(
 98 |             outputs=metadata['outputs'],
 99 |             inputs=metadata['inputs'],
100 |             rng=rng)
101 | 


--------------------------------------------------------------------------------
/src/dummy/trollnormal.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import time
18 | 
19 | import numpy as np
20 | 
21 | from cgpm.cgpm import CGpm
22 | from cgpm.utils import general as gu
23 | 
24 | 
25 | class TrollNormal(CGpm):
26 |     def __init__(self, outputs, inputs, rng=None, distargs=None):
27 |         if rng is None:
28 |             rng = gu.gen_rng(1)
29 |         self.rng = rng
30 |         assert len(outputs) == 1
31 |         assert len(inputs) == 2
32 |         self.outputs = list(outputs)
33 |         self.inputs = list(inputs)
34 |         self.rowids = set([])
35 | 
36 |     def incorporate(self, rowid, observation, inputs=None):
37 |         assert rowid not in self.rowids
38 |         self.rowids.add(rowid)
39 | 
40 |     def unincorporate(self, rowid):
41 |         assert rowid in self.rowids
42 |         self.rowids.remove(rowid)
43 | 
44 |     @gu.simulate_many
45 |     def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
46 |         x = self.rng.normal(
47 |             self._retrieve_location(inputs),
48 |             self._retrieve_scale(inputs),
49 |         )
50 |         return {self.outputs[0]: x}
51 | 
52 |     def logpdf(self, rowid, targets, constraints=None, inputs=None):
53 |         return self._gaussian_log_pdf(
54 |             targets[self.outputs[0]],
55 |             self._retrieve_location(inputs),
56 |             self._retrieve_scale(inputs),
57 |         )
58 | 
59 |     def _gaussian_log_pdf(self, x, mu, s):
60 |         normalizing_constant = -(np.log(2 * np.pi) / 2) - np.log(s)
61 |         return normalizing_constant - ((x - mu)**2 / (2 * s**2))
62 | 
63 |     def transition(self, N=None, S=None):
64 |         time.sleep(.1)
65 | 
66 |     def _retrieve_location(self, inputs):
67 |         return inputs[self.inputs[0]]
68 | 
69 |     def _retrieve_scale(self, inputs):
70 |         return abs(inputs[self.inputs[1]]) + 1
71 | 
72 |     def to_metadata(self):
73 |         metadata = dict()
74 |         metadata['outputs'] = self.outputs
75 |         metadata['inputs'] = self.inputs
76 |         metadata['factory'] = ('cgpm.dummy.trollnormal', 'TrollNormal')
77 |         return metadata
78 | 
79 |     @classmethod
80 |     def from_metadata(cls, metadata, rng=None):
81 |         if rng is None: rng = gu.gen_rng(0)
82 |         return cls(
83 |             outputs=metadata['outputs'],
84 |             inputs=metadata['inputs'],
85 |             rng=rng)
86 | 


--------------------------------------------------------------------------------
/src/dummy/twoway.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import time
18 | 
19 | import numpy as np
20 | 
21 | from cgpm.cgpm import CGpm
22 | from cgpm.utils import general as gu
23 | 
24 | 
25 | class TwoWay(CGpm):
26 |     """Generates {0,1} output on {0,1} valued input with given CPT."""
27 | 
28 |     def __init__(self, outputs, inputs, distargs=None, rng=None):
29 |         if rng is None:
30 |             rng = gu.gen_rng(1)
31 |         self.rng = rng
32 |         self.probabilities =[
33 |             [.9, .1],
34 |             [.3, .7],
35 |         ]
36 |         assert len(outputs) == 1
37 |         assert len(inputs) == 1
38 |         self.outputs = list(outputs)
39 |         self.inputs = list(inputs)
40 | 
41 |     def incorporate(self, rowid, observation, inputs=None):
42 |         return
43 | 
44 |     def unincorporate(self, rowid):
45 |         return
46 | 
47 |     @gu.simulate_many
48 |     def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
49 |         y = inputs[self.inputs[0]]
50 |         assert int(y) == float(y)
51 |         assert y in [0, 1]
52 |         x = gu.pflip(self.probabilities[y], rng=self.rng)
53 |         return {self.outputs[0]: x}
54 | 
55 |     def logpdf(self, rowid, targets, constraints=None, inputs=None):
56 |         y = inputs[self.inputs[0]]
57 |         assert int(y) == float(y)
58 |         assert y in [0, 1]
59 |         x = targets[self.outputs[0]]
60 |         return np.log(self.probabilities[y][x]) if x in [0,1] else -float('inf')
61 | 
62 |     def transition(self, N=None):
63 |         time.sleep(.1)
64 | 
65 |     @staticmethod
66 |     def retrieve_y_for_x(x):
67 |         if x == 0:
68 |             return 0
69 |         if x == 1:
70 |             return 1
71 |         raise ValueError('Invalid value: %s' % str(x))
72 | 
73 |     def to_metadata(self):
74 |         metadata = dict()
75 |         metadata['outputs'] = self.outputs
76 |         metadata['inputs'] = self.inputs
77 |         metadata['factory'] = ('cgpm.dummy.twoway', 'TwoWay')
78 |         return metadata
79 | 
80 |     @classmethod
81 |     def from_metadata(cls, metadata, rng=None):
82 |         if rng is None:
83 |             rng = gu.gen_rng(0)
84 |         return cls(
85 |             outputs=metadata['outputs'],
86 |             inputs=metadata['inputs'],
87 |             rng=rng,
88 |         )
89 | 


--------------------------------------------------------------------------------
/src/factor/README:
--------------------------------------------------------------------------------
1 | These CGPMs learn low dimensional, continuous latent variable representations.
2 | 


--------------------------------------------------------------------------------
/src/factor/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 


--------------------------------------------------------------------------------
/src/kde/README:
--------------------------------------------------------------------------------
1 | These CGPMs learn the joint denisty on a high-dimensional variable using KDE.
2 | 


--------------------------------------------------------------------------------
/src/kde/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 


--------------------------------------------------------------------------------
/src/knn/README:
--------------------------------------------------------------------------------
1 | This CGPM builds ad-hoc machine learning models on a per-query basis using KNN.
2 | 


--------------------------------------------------------------------------------
/src/knn/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 


--------------------------------------------------------------------------------
/src/mixtures/README:
--------------------------------------------------------------------------------
1 | These CGPMs represent mixtures of CGPMs, using crosscat-like terminology.
2 | 
3 | 


--------------------------------------------------------------------------------
/src/mixtures/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 


--------------------------------------------------------------------------------
/src/network/README:
--------------------------------------------------------------------------------
1 | Importance sampling for inference in a composite network of CGPMs.
2 | 


--------------------------------------------------------------------------------
/src/network/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 


--------------------------------------------------------------------------------
/src/network/helpers.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import itertools as it
 18 | 
 19 | import numpy as np
 20 | from scipy.sparse.csgraph import connected_components
 21 | 
 22 | 
 23 | def validate_cgpms(cgpms):
 24 |     ot = [set(c.outputs) for c in cgpms]
 25 |     if not all(s for s in ot):
 26 |         raise ValueError('No output for a cgpm: %s' % ot)
 27 |     if any(set.intersection(a,b) for a,b in it.combinations(ot, 2)):
 28 |         raise ValueError('Duplicate outputs for cgpms: %s' % ot)
 29 |     return cgpms
 30 | 
 31 | 
 32 | def retrieve_variable_to_cgpm(cgpms):
 33 |     """Return map of variable v to its index i in the list of cgpms."""
 34 |     return {v:i for i, c in enumerate(cgpms) for v in c.outputs}
 35 | 
 36 | 
 37 | def retrieve_adjacency_list(cgpms, v_to_c):
 38 |     """Return map of cgpm index to list of indexes of its parent cgpms."""
 39 |     return {
 40 |         i: list(set(v_to_c[p] for p in c.inputs if p in v_to_c))
 41 |         for i, c in enumerate(cgpms)
 42 |     }
 43 | 
 44 | def retrieve_adjacency_matrix(cgpms, v_to_c):
 45 |     """Return a directed adjacency matrix of cgpms."""
 46 |     adjacency_list = retrieve_adjacency_list(cgpms, v_to_c)
 47 |     adjacency_matrix = np.zeros((len(adjacency_list), len(adjacency_list)))
 48 |     for i in adjacency_list:
 49 |         adjacency_matrix[i, adjacency_list[i]] = 1
 50 |     return adjacency_matrix.T
 51 | 
 52 | def retrieve_extraneous_inputs(cgpms, v_to_c):
 53 |     """Return list of inputs that are not the output of any cgpm."""
 54 |     extraneous = [[i for i in c.inputs if i not in v_to_c] for c in cgpms]
 55 |     return list(it.chain.from_iterable(extraneous))
 56 | 
 57 | 
 58 | def retrieve_ancestors(cgpms, q):
 59 |     """Return list of all variables that are ancestors of q (duplicates)."""
 60 |     v_to_c = retrieve_variable_to_cgpm(cgpms)
 61 |     if q not in v_to_c:
 62 |         raise ValueError('Invalid node: %s, %s' % (q, v_to_c))
 63 |     def ancestors(v):
 64 |         parents = cgpms[v_to_c[v]].inputs if v in v_to_c else []
 65 |         parent_ancestors = [ancestors(v) for v in parents]
 66 |         return list(it.chain.from_iterable(parent_ancestors)) + parents
 67 |     return ancestors(q)
 68 | 
 69 | def retrieve_descendents(cgpms, q):
 70 |     """Return list of all variables that are descends of q (duplicates)."""
 71 |     v_to_c = retrieve_variable_to_cgpm(cgpms)
 72 |     if q not in v_to_c:
 73 |         raise ValueError('Invalid node: %s, %s' % (q, v_to_c))
 74 |     def descendents(v):
 75 |         children = list(it.chain.from_iterable(
 76 |             [c.outputs for c in cgpms if v in c.inputs]))
 77 |         children_descendents = [descendents(c) for c in children]
 78 |         return list(it.chain.from_iterable(children_descendents)) + children
 79 |     return descendents(q)
 80 | 
 81 | 
 82 | def retrieve_weakly_connected_components(cgpms):
 83 |     v_to_c = retrieve_variable_to_cgpm(cgpms)
 84 |     adjacency = retrieve_adjacency_matrix(cgpms, v_to_c)
 85 |     n_components, labels = connected_components(
 86 |         adjacency, directed=True, connection='weak', return_labels=True)
 87 |     return labels
 88 | 
 89 | 
 90 | def topological_sort(graph):
 91 |     """Topologically sort a directed graph represented as an adjacency list.
 92 | 
 93 |     Assumes that edges are incoming, ie (10: [8,7]) means 8->10 and 7->10.
 94 | 
 95 |     Parameters
 96 |     ----------
 97 |     graph : list or dict
 98 |         Adjacency list or dict representing the graph, for example:
 99 |             graph_l = [(10, [8, 7]), (5, [8, 7, 9, 10, 11, 13, 15])]
100 |             graph_d = {10: [8, 7], 5: [8, 7, 9, 10, 11, 13, 15]}
101 | 
102 |     Returns
103 |     -------
104 |     graph_sorted : list
105 |         An adjacency list, where order of nodes is in topological order.
106 |     """
107 |     graph_sorted = []
108 |     graph = dict(graph)
109 |     while graph:
110 |         cyclic = True
111 |         for node, edges in graph.items():
112 |             if all(e not in graph for e in edges):
113 |                 cyclic = False
114 |                 del graph[node]
115 |                 graph_sorted.append(node)
116 |         if cyclic:
117 |             raise ValueError('Cyclic dependency occurred in topological_sort.')
118 |     return graph_sorted
119 | 
120 | 
121 | def retrieve_required_inputs(cgpms, topo, targets, constraints, extraneous):
122 |     """Return list of input addresses required to answer query."""
123 |     required = set(targets)
124 |     for l in reversed(topo):
125 |         outputs_l = cgpms[l].outputs
126 |         inputs_l = cgpms[l].inputs
127 |         if any(i in required or i in constraints for i in outputs_l):
128 |             required.update(inputs_l)
129 |     return [
130 |         target for target in required if
131 |         all(target not in x for x in [targets, constraints, extraneous])
132 |     ]
133 | 


--------------------------------------------------------------------------------
/src/primitives/README:
--------------------------------------------------------------------------------
1 | These CGPMs are univariate probability distributions from exponential family.
2 | 
3 | 


--------------------------------------------------------------------------------
/src/primitives/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 


--------------------------------------------------------------------------------
/src/primitives/bernoulli.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | from math import log
 18 | 
 19 | from scipy.special import betaln
 20 | 
 21 | from cgpm.primitives.distribution import DistributionGpm
 22 | from cgpm.utils import general as gu
 23 | 
 24 | 
 25 | class Bernoulli(DistributionGpm):
 26 |     """Bernoulli distribution with beta prior on bias theta.
 27 | 
 28 |     theta ~ Beta(alpha, beta)
 29 |     x ~ Bernoulli(theta)
 30 |     """
 31 | 
 32 |     def __init__(self, outputs, inputs, hypers=None, params=None,
 33 |             distargs=None, rng=None):
 34 |         DistributionGpm.__init__(
 35 |             self, outputs, inputs, hypers, params, distargs, rng)
 36 |         # Sufficent statistics.
 37 |         self.N = 0
 38 |         self.x_sum = 0
 39 |         # Hyperparameters.
 40 |         if hypers is None: hypers = {}
 41 |         self.alpha = hypers.get('alpha', 1.)
 42 |         self.beta = hypers.get('beta', 1.)
 43 |         assert self.alpha > 0
 44 |         assert self.beta > 0
 45 | 
 46 |     def incorporate(self, rowid, observation, inputs=None):
 47 |         DistributionGpm.incorporate(self, rowid, observation, inputs)
 48 |         x = observation[self.outputs[0]]
 49 |         if x not in [0, 1]:
 50 |             raise ValueError('Invalid Bernoulli: %s' % str(x))
 51 |         self.N += 1
 52 |         self.x_sum += x
 53 |         self.data[rowid] = x
 54 | 
 55 |     def unincorporate(self, rowid):
 56 |         x = self.data.pop(rowid)
 57 |         self.N -= 1
 58 |         self.x_sum -= x
 59 | 
 60 |     def logpdf(self, rowid, targets, constraints=None, inputs=None):
 61 |         DistributionGpm.logpdf(self, rowid, targets, constraints, inputs)
 62 |         x = targets[self.outputs[0]]
 63 |         if x not in [0, 1]:
 64 |             return -float('inf')
 65 |         return Bernoulli.calc_predictive_logp(
 66 |             x, self.N, self.x_sum, self.alpha, self.beta)
 67 | 
 68 |     @gu.simulate_many
 69 |     def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
 70 |         DistributionGpm.simulate(self, rowid, targets, constraints, inputs, N)
 71 |         if rowid in self.data:
 72 |             return {self.outputs[0]: self.data[rowid]}
 73 |         p0 = Bernoulli.calc_predictive_logp(
 74 |             0, self.N, self.x_sum, self.alpha, self.beta)
 75 |         p1 = Bernoulli.calc_predictive_logp(
 76 |             1, self.N, self.x_sum, self.alpha, self.beta)
 77 |         x = gu.log_pflip([p0, p1], rng=self.rng)
 78 |         return {self.outputs[0]: x}
 79 | 
 80 |     def logpdf_score(self):
 81 |         return Bernoulli.calc_logpdf_marginal(
 82 |             self.N, self.x_sum, self.alpha, self.beta)
 83 | 
 84 |     ##################
 85 |     # NON-GPM METHOD #
 86 |     ##################
 87 | 
 88 |     def transition_params(self):
 89 |         return
 90 | 
 91 |     def set_hypers(self, hypers):
 92 |         assert hypers['alpha'] > 0
 93 |         assert hypers['beta'] > 0
 94 |         self.alpha = hypers['alpha']
 95 |         self.beta = hypers['beta']
 96 | 
 97 |     def get_hypers(self):
 98 |         return {'alpha': self.alpha, 'beta': self.beta}
 99 | 
100 |     def get_params(self):
101 |         return {}
102 | 
103 |     def get_suffstats(self):
104 |         return {'N':self.N, 'x_sum':self.x_sum}
105 | 
106 |     def get_distargs(self):
107 |         return {'k': 2}
108 | 
109 |     @staticmethod
110 |     def construct_hyper_grids(X, n_grid=30):
111 |         grids = dict()
112 |         grids['alpha'] = gu.log_linspace(1., float(len(X)), n_grid)
113 |         grids['beta'] = gu.log_linspace(1., float(len(X)),n_grid)
114 |         return grids
115 | 
116 |     @staticmethod
117 |     def name():
118 |         return 'bernoulli'
119 | 
120 |     @staticmethod
121 |     def is_collapsed():
122 |         return True
123 | 
124 |     @staticmethod
125 |     def is_continuous():
126 |         return False
127 | 
128 |     @staticmethod
129 |     def is_conditional():
130 |         return False
131 | 
132 |     @staticmethod
133 |     def is_numeric():
134 |         return False
135 | 
136 |     ##################
137 |     # HELPER METHODS #
138 |     ##################
139 | 
140 |     @staticmethod
141 |     def calc_predictive_logp(x, N, x_sum, alpha, beta):
142 |         log_denom = log(N + alpha + beta)
143 |         if x == 1:
144 |             return log(x_sum + alpha) - log_denom
145 |         else:
146 |             return log(N - x_sum + beta) - log_denom
147 | 
148 |     @staticmethod
149 |     def calc_logpdf_marginal(N, x_sum, alpha, beta):
150 |         return betaln(x_sum + alpha, N - x_sum + beta) - betaln(alpha, beta)
151 | 


--------------------------------------------------------------------------------
/src/primitives/categorical.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | from math import log
 18 | 
 19 | import numpy as np
 20 | 
 21 | from scipy.special import gammaln
 22 | 
 23 | from cgpm.primitives.distribution import DistributionGpm
 24 | from cgpm.utils import general as gu
 25 | 
 26 | 
 27 | class Categorical(DistributionGpm):
 28 |     """Categorical distribution with symmetric dirichlet prior on
 29 |     category weight vector v.
 30 | 
 31 |     k := distarg
 32 |     v ~ Symmetric-Dirichlet(alpha/k)
 33 |     x ~ Categorical(v)
 34 |     http://www.cs.berkeley.edu/~stephentu/writeups/dirichlet-conjugate-prior.pdf
 35 |     """
 36 | 
 37 |     def __init__(self, outputs, inputs, hypers=None, params=None,
 38 |             distargs=None, rng=None):
 39 |         DistributionGpm.__init__(
 40 |             self, outputs, inputs, hypers, params, distargs, rng)
 41 |         # Distargs.
 42 |         k = distargs.get('k', None)
 43 |         if k is None:
 44 |             raise ValueError('Categorical requires distarg `k`.')
 45 |         self.k = int(k)
 46 |         # Sufficient statistics.
 47 |         self.N = 0
 48 |         self.counts = np.zeros(self.k)
 49 |         # Hyperparameters.
 50 |         if hypers is None: hypers = {}
 51 |         self.alpha = hypers.get('alpha', 1.)
 52 | 
 53 |     def incorporate(self, rowid, observation, inputs=None):
 54 |         DistributionGpm.incorporate(self, rowid, observation, inputs)
 55 |         x = observation[self.outputs[0]]
 56 |         if not (x % 1 == 0 and 0 <= x < self.k):
 57 |             raise ValueError('Invalid Categorical(%d): %s' % (self.k, x))
 58 |         x = int(x)
 59 |         self.N += 1
 60 |         self.counts[x] += 1
 61 |         self.data[rowid] = x
 62 | 
 63 |     def unincorporate(self, rowid):
 64 |         x = self.data.pop(rowid)
 65 |         self.N -= 1
 66 |         self.counts[x] -= 1
 67 | 
 68 |     def logpdf(self, rowid, targets, constraints=None, inputs=None):
 69 |         DistributionGpm.logpdf(self, rowid, targets, constraints, inputs)
 70 |         x = targets[self.outputs[0]]
 71 |         if not (x % 1 == 0 and 0 <= x < self.k):
 72 |             return -float('inf')
 73 |         return Categorical.calc_predictive_logp(
 74 |             int(x), self.N, self.counts, self.alpha)
 75 | 
 76 |     @gu.simulate_many
 77 |     def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
 78 |         DistributionGpm.simulate(self, rowid, targets, constraints, inputs, N)
 79 |         if rowid in self.data:
 80 |             return {self.outputs[0]: self.data[rowid]}
 81 |         x = gu.pflip(self.counts + self.alpha, rng=self.rng)
 82 |         return {self.outputs[0]: x}
 83 | 
 84 |     def logpdf_score(self):
 85 |         return Categorical.calc_logpdf_marginal(self.N, self.counts, self.alpha)
 86 | 
 87 |     ##################
 88 |     # NON-GPM METHOD #
 89 |     ##################
 90 | 
 91 |     def transition_params(self):
 92 |         return
 93 | 
 94 |     def set_hypers(self, hypers):
 95 |         assert hypers['alpha'] > 0
 96 |         self.alpha = hypers['alpha']
 97 | 
 98 |     def get_hypers(self):
 99 |         return {'alpha': self.alpha}
100 | 
101 |     def get_params(self):
102 |         return {}
103 | 
104 |     def get_suffstats(self):
105 |         return {'N' : self.N, 'counts' : list(self.counts)}
106 | 
107 |     def get_distargs(self):
108 |         return {'k': self.k}
109 | 
110 |     @staticmethod
111 |     def construct_hyper_grids(X, n_grid=30):
112 |         grids = dict()
113 |         grids['alpha'] = gu.log_linspace(1., float(len(X)), n_grid)
114 |         return grids
115 | 
116 |     @staticmethod
117 |     def name():
118 |         return 'categorical'
119 | 
120 |     @staticmethod
121 |     def is_collapsed():
122 |         return True
123 | 
124 |     @staticmethod
125 |     def is_continuous():
126 |         return False
127 | 
128 |     @staticmethod
129 |     def is_conditional():
130 |         return False
131 | 
132 |     @staticmethod
133 |     def is_numeric():
134 |         return False
135 | 
136 |     ##################
137 |     # HELPER METHODS #
138 |     ##################
139 | 
140 |     @staticmethod
141 |     def validate(x, K):
142 |         return int(x) == float(x) and 0 <= x < K
143 | 
144 |     @staticmethod
145 |     def calc_predictive_logp(x, N, counts, alpha):
146 |         numer = log(alpha + counts[x])
147 |         denom = log(np.sum(counts) + alpha * len(counts))
148 |         return numer - denom
149 | 
150 |     @staticmethod
151 |     def calc_logpdf_marginal(N, counts, alpha):
152 |         K = len(counts)
153 |         A = K * alpha
154 |         lg = sum(gammaln(counts[k] + alpha) for k in xrange(K))
155 |         return gammaln(A) - gammaln(A+N) + lg - K * gammaln(alpha)
156 | 


--------------------------------------------------------------------------------
/src/regressions/README:
--------------------------------------------------------------------------------
1 | These CGPMs are discriminative, i.e. numeric- or symbolic-valued regression.
2 | 
3 | 


--------------------------------------------------------------------------------
/src/regressions/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 


--------------------------------------------------------------------------------
/src/uncorrelated/README:
--------------------------------------------------------------------------------
1 | These CGPMs are zero-correlation bivariate functions on the R2 plane.
2 | 
3 | 


--------------------------------------------------------------------------------
/src/uncorrelated/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 


--------------------------------------------------------------------------------
/src/uncorrelated/diamond.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from cgpm.cgpm import CGpm
18 | from cgpm.network.importance import ImportanceNetwork
19 | from cgpm.uncorrelated.directed import DirectedXyGpm
20 | from cgpm.uncorrelated.uniformx import UniformX
21 | from cgpm.utils import general as gu
22 | 
23 | 
24 | class DiamondY(CGpm):
25 |     def __init__(self, outputs=None, inputs=None, noise=None, rng=None):
26 |         if rng is None:
27 |             rng = gu.gen_rng(1)
28 |         if outputs is None:
29 |             outputs = [0]
30 |         if inputs is None:
31 |             inputs = [1]
32 |         if noise is None:
33 |             noise = .1
34 |         self.rng = rng
35 |         self.outputs = outputs
36 |         self.inputs = inputs
37 |         self.noise = noise
38 | 
39 |     @gu.simulate_many
40 |     def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
41 |         assert targets == self.outputs
42 |         assert inputs.keys() == self.inputs
43 |         x = inputs[self.inputs[0]]
44 |         slope = self.rng.rand()
45 |         noise = self.rng.uniform(high=self.noise)
46 |         if x < 0 and slope < .5:
47 |             y = max(-x-1, x+1 - noise)
48 |         elif x < 0 and slope > .5:
49 |             y = min(x+1, -x-1 + noise)
50 |         elif x > 0 and slope < .5:
51 |             y = min(-x+1, x-1 + noise)
52 |         elif x > 0 and slope > .5:
53 |             y = max(x-1, -x+1 - noise)
54 |         else:
55 |             raise ValueError()
56 |         return {self.outputs[0]: y}
57 | 
58 |     def logpdf(self, rowid, targets, constraints=None, inputs=None):
59 |         raise NotImplementedError
60 | 
61 | 
62 | class Diamond(DirectedXyGpm):
63 |     """Y = (+/- w.p .5) X^2 + U(0,noise)."""
64 | 
65 |     def __init__(self, outputs=None, inputs=None, noise=None, rng=None):
66 |         DirectedXyGpm.__init__(
67 |             self, outputs=outputs, inputs=inputs, noise=noise, rng=rng)
68 |         self.x = UniformX(
69 |             outputs=[self.outputs[0]], low=-1, high=1)
70 |         self.y = DiamondY(
71 |             outputs=[self.outputs[1]],
72 |             inputs=[self.outputs[0]],
73 |             noise=noise)
74 |         self.network = ImportanceNetwork([self.x, self.y], rng=self.rng)
75 | 


--------------------------------------------------------------------------------
/src/uncorrelated/directed.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from cgpm.cgpm import CGpm
18 | from cgpm.utils.general import gen_rng
19 | 
20 | 
21 | class DirectedXyGpm(CGpm):
22 |     """Interface directed two-dimensional GPMs over the R2 plane."""
23 | 
24 |     def __init__(self, outputs=None, inputs=None, noise=None, acc=1, rng=None):
25 |         """Initialize the Gpm with given noise parameter.
26 | 
27 |         Parameters
28 |         ----------
29 |         noise : float
30 |             Value in (0,1) indicating the noise level of the distribution.
31 |         rng : np.random.RandomState, optional.
32 |             Source of entropy.
33 |         """
34 |         if type(self) is DirectedXyGpm:
35 |             raise Exception('Cannot directly instantiate DirectedXyGpm.')
36 |         if rng is None:
37 |             rng = gen_rng(0)
38 |         if outputs is None:
39 |             outputs = [0, 1]
40 |         if noise is None:
41 |             noise = .1
42 |         self.rng = rng
43 |         self.outputs = outputs
44 |         self.noise = noise
45 |         self.acc = acc
46 |         # Override the network in subclass.
47 |         self.network = None
48 | 
49 |     def logpdf(self, rowid, targets, constraints=None, inputs=None):
50 |         if self.network is None:
51 |             raise ValueError('self.network not defined by %s' % (type(self),))
52 |         return self.network.logpdf(rowid, targets, inputs)
53 | 
54 |     def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
55 |         if self.network is None:
56 |             raise ValueError('self.network not defined by %s' % (type(self),))
57 |         return self.network.simulate(rowid, targets, inputs, N)
58 | 


--------------------------------------------------------------------------------
/src/uncorrelated/dots.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import numpy as np
18 | 
19 | from scipy.stats import norm
20 | 
21 | from cgpm.uncorrelated.undirected import UnDirectedXyGpm
22 | from cgpm.utils import general as gu
23 | 
24 | 
25 | class Dots(UnDirectedXyGpm):
26 |     """(X,Y) ~ Four Dots."""
27 | 
28 |     mx = [ -1, 1, -1, 1]
29 |     my = [ -1, -1, 1, 1]
30 | 
31 |     def simulate_joint(self):
32 |         n = self.rng.randint(4)
33 |         x = self.rng.normal(loc=self.mx[n], scale=self.noise)
34 |         y = self.rng.normal(loc=self.my[n], scale=self.noise)
35 |         return [x, y]
36 | 
37 |     def simulate_conditional(self, z):
38 |         return self.simulate_joint()[0]
39 | 
40 |     def logpdf_joint(self, x, y):
41 |         return gu.logsumexp([np.log(.25)
42 |                 + norm.logpdf(x, loc=mx, scale=self.noise)
43 |                 + norm.logpdf(y, loc=my, scale=self.noise)
44 |             for (mx,my) in zip(self.mx, self.my)])
45 | 
46 |     def logpdf_marginal(self, z):
47 |         return gu.logsumexp(
48 |             [np.log(.5) + norm.logpdf(z, loc=mx, scale=self.noise)
49 |             for mx in set(self.mx)])
50 | 
51 |     def logpdf_conditional(self, w, z):
52 |         return self.logpdf_marginal(z)
53 | 


--------------------------------------------------------------------------------
/src/uncorrelated/linear.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import numpy as np
18 | 
19 | from scipy.stats import norm
20 | 
21 | from cgpm.uncorrelated.undirected import UnDirectedXyGpm
22 | from cgpm.utils import mvnormal as multivariate_normal
23 | 
24 | 
25 | class Linear(UnDirectedXyGpm):
26 | 
27 |     def simulate_joint(self):
28 |         return self.rng.multivariate_normal(
29 |             [0,0], [[1,1-self.noise],[1-self.noise,1]])
30 | 
31 |     def simulate_conditional(self, z):
32 |         mean = self.conditional_mean(z)
33 |         var = self.conditional_variance(z)
34 |         return self.rng.normal(loc=mean, scale=np.sqrt(var))
35 | 
36 |     def logpdf_joint(self, x, y):
37 |         return multivariate_normal.logpdf(
38 |             np.array([x,y]), np.array([0,0]),
39 |             np.array([[1,1-self.noise],[1-self.noise,1]]))
40 | 
41 |     def logpdf_marginal(self, z):
42 |         return norm.logpdf(z, scale=1)
43 | 
44 |     def logpdf_conditional(self, w, z):
45 |         mean = self.conditional_mean(z)
46 |         var = self.conditional_variance(z)
47 |         return norm.logpdf(w, loc=mean, scale=np.sqrt(var))
48 | 
49 |     def conditional_mean(self, z):
50 |         return (1-self.noise)*z
51 | 
52 |     def conditional_variance(self, z):
53 |         return (1-(1-self.noise)**2)
54 | 
55 |     def mutual_information(self):
56 |         cov = 1-self.noise
57 |         return -.5 * np.log(1-cov**2)
58 | 


--------------------------------------------------------------------------------
/src/uncorrelated/parabola.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import numpy as np
18 | 
19 | from scipy.misc import logsumexp
20 | from scipy.stats import uniform
21 | 
22 | from cgpm.cgpm import CGpm
23 | from cgpm.network.importance import ImportanceNetwork
24 | from cgpm.uncorrelated.directed import DirectedXyGpm
25 | from cgpm.uncorrelated.uniformx import UniformX
26 | from cgpm.utils import general as gu
27 | 
28 | 
29 | class ParabolaY(CGpm):
30 |     def __init__(self, outputs=None, inputs=None, noise=None, rng=None):
31 |         if rng is None:
32 |             rng = gu.gen_rng(1)
33 |         if outputs is None:
34 |             outputs = [0]
35 |         if inputs is None:
36 |             inputs = [1]
37 |         if noise is None:
38 |             noise = .1
39 |         self.rng = rng
40 |         self.outputs = outputs
41 |         self.inputs = inputs
42 |         self.noise = noise
43 |         self.uniform = uniform(loc=-self.noise, scale=2*self.noise)
44 | 
45 |     @gu.simulate_many
46 |     def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
47 |         assert targets == self.outputs
48 |         assert inputs.keys() == self.inputs
49 |         assert not constraints
50 |         x = inputs[self.inputs[0]]
51 |         u = self.rng.rand()
52 |         noise = self.rng.uniform(low=-self.noise, high=self.noise)
53 |         if u < .5:
54 |             y = x**2 + noise
55 |         else:
56 |             y = -(x**2 + noise)
57 |         return {self.outputs[0]: y}
58 | 
59 |     def logpdf(self, rowid, targets, constraints=None, inputs=None):
60 |         assert targets.keys() == self.outputs
61 |         assert inputs.keys() == self.inputs
62 |         assert not constraints
63 |         x = inputs[self.inputs[0]]
64 |         y = targets[self.outputs[0]]
65 |         return logsumexp([
66 |             np.log(.5)+self.uniform.logpdf(y-x**2),
67 |             np.log(.5)+self.uniform.logpdf(-y-x**2)
68 |         ])
69 | 
70 | 
71 | class Parabola(DirectedXyGpm):
72 |     """Y = (+/- w.p .5) X^2 + U(0,noise)."""
73 | 
74 |     def __init__(self, outputs=None, inputs=None, noise=None, rng=None):
75 |         DirectedXyGpm.__init__(
76 |             self, outputs=outputs, inputs=inputs, noise=noise, rng=rng)
77 |         self.x = UniformX(
78 |             outputs=[self.outputs[0]], low=-1, high=1)
79 |         self.y = ParabolaY(
80 |             outputs=[self.outputs[1]],
81 |             inputs=[self.outputs[0]],
82 |             noise=noise)
83 |         self.network = ImportanceNetwork([self.x, self.y], rng=self.rng)
84 | 


--------------------------------------------------------------------------------
/src/uncorrelated/ring.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import numpy as np
18 | 
19 | from cgpm.uncorrelated.undirected import UnDirectedXyGpm
20 | 
21 | 
22 | class Ring(UnDirectedXyGpm):
23 |     """sqrt(X**2 + Y**2) + noise = 1 """
24 | 
25 |     def simulate_joint(self):
26 |         angle = self.rng.uniform(0., 2.*np.pi)
27 |         distance = self.rng.uniform(1.-self.noise, 1.)
28 |         x = np.cos(angle)*distance
29 |         y = np.sin(angle)*distance
30 |         return [x, y]
31 | 


--------------------------------------------------------------------------------
/src/uncorrelated/undirected.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from cgpm.cgpm import CGpm
18 | from cgpm.utils import general as gu
19 | 
20 | 
21 | class UnDirectedXyGpm(CGpm):
22 |     """Interface undirected two-dimensional GPMs over the R2 plane."""
23 | 
24 |     def __init__(self, outputs=None, inputs=None, noise=None, rng=None):
25 |         if rng is None:
26 |             rng = gu.gen_rng(0)
27 |         if outputs is None:
28 |             outputs = [0, 1]
29 |         if noise is None:
30 |             noise = .1
31 |         self.rng = rng
32 |         self.outputs = outputs
33 |         self.inputs = []
34 |         self.noise = noise
35 | 
36 |     def logpdf(self, rowid, targets, constraints=None, inputs=None):
37 |         assert not inputs
38 |         if not constraints:
39 |             if len(targets) == 2:
40 |                 x, y = targets.values()
41 |                 return self.logpdf_joint(x, y)
42 |             else:
43 |                 z = targets.values()[0]
44 |                 return self.logpdf_maringal(z)
45 |         else:
46 |             assert len(constraints) == len(targets) == 1
47 |             z = constraints.values()[0]
48 |             w = targets.values()[0]
49 |             return self.logpdf_conditional(w, z)
50 | 
51 |     @gu.simulate_many
52 |     def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
53 |         assert not inputs
54 |         if not constraints:
55 |             sample = self.simulate_joint()
56 |             return {q: sample[self.outputs.index(q)] for q in targets}
57 |         assert len(constraints) == len(targets) == 1
58 |         z = constraints.values()[0]
59 |         return {targets[0]: self.simulate_conditional(z)}
60 | 
61 |     # Internal simulators and assesors.
62 | 
63 |     def simulate_joint(self):
64 |         raise NotImplementedError
65 | 
66 |     def simulate_conditional(self, z):
67 |         raise NotImplementedError
68 | 
69 |     def logpdf_marginal(self, z):
70 |         raise NotImplementedError
71 | 
72 |     def logpdf_joint(self, x, y):
73 |         raise NotImplementedError
74 | 
75 |     def logpdf_conditional(self, w, z):
76 |         raise NotImplementedError
77 | 
78 |     def mutual_information(self):
79 |         raise NotImplementedError
80 | 


--------------------------------------------------------------------------------
/src/uncorrelated/uniformx.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from scipy.stats import uniform
18 | 
19 | from cgpm.cgpm import CGpm
20 | from cgpm.utils import general as gu
21 | 
22 | 
23 | class UniformX(CGpm):
24 | 
25 |     def __init__(self, outputs=None, inputs=None, low=0, high=1, rng=None):
26 |         assert not inputs
27 |         if rng is None:
28 |             rng = gu.gen_rng(0)
29 |         if outputs is None:
30 |             outputs = [0]
31 |         self.rng = rng
32 |         self.low = low
33 |         self.high = high
34 |         self.outputs = outputs
35 |         self.inputs = []
36 |         self.uniform = uniform(loc=self.low, scale=self.high-self.low)
37 | 
38 |     @gu.simulate_many
39 |     def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
40 |         assert not constraints
41 |         assert targets == self.outputs
42 |         x = self.rng.uniform(low=self.low, high=self.high)
43 |         return {self.outputs[0]: x}
44 | 
45 |     def logpdf(self, rowid, targets, constraints=None, inputs=None):
46 |         assert not constraints
47 |         assert not inputs
48 |         assert targets.keys() == self.outputs
49 |         x = targets[self.outputs[0]]
50 |         return self.uniform.logpdf(x)
51 | 


--------------------------------------------------------------------------------
/src/uncorrelated/xcross.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import numpy as np
18 | 
19 | from cgpm.uncorrelated.undirected import UnDirectedXyGpm
20 | from cgpm.utils import general as gu
21 | from cgpm.utils import mvnormal as multivariate_normal
22 | 
23 | 
24 | class XCross(UnDirectedXyGpm):
25 |     """Y = (+/- w.p .5) X + N(0,noise)."""
26 | 
27 |     def simulate_joint(self):
28 |         if self.rng.rand() < .5:
29 |             cov = np.array([[1,1-self.noise],[1-self.noise,1]])
30 |         else:
31 |             cov = np.array([[1,-1+self.noise],[-1+self.noise,1]])
32 |         return self.rng.multivariate_normal([0,0], cov=cov)
33 | 
34 |     def logpdf_joint(self, x, y):
35 |         X = np.array([x, y])
36 |         Mu = np.array([0, 0])
37 |         Sigma0 = np.array([[1, 1 - self.noise], [1 - self.noise, 1]])
38 |         Sigma1 = np.array([[1, -1 + self.noise], [-1 + self.noise, 1]])
39 |         return gu.logsumexp([
40 |             np.log(.5)+multivariate_normal.logpdf(X, Mu, Sigma0),
41 |             np.log(.5)+multivariate_normal.logpdf(X, Mu, Sigma1),
42 |         ])
43 | 


--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 


--------------------------------------------------------------------------------
/src/utils/config.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import os
 18 | import re
 19 | 
 20 | from datetime import datetime
 21 | 
 22 | import importlib
 23 | 
 24 | 
 25 | cctype_class_lookup = {
 26 |     'bernoulli'         : ('cgpm.primitives.bernoulli', 'Bernoulli'),
 27 |     'beta'              : ('cgpm.primitives.beta', 'Beta'),
 28 |     'categorical'       : ('cgpm.primitives.categorical', 'Categorical'),
 29 |     'crp'               : ('cgpm.primitives.crp', 'Crp'),
 30 |     'exponential'       : ('cgpm.primitives.exponential', 'Exponential'),
 31 |     'geometric'         : ('cgpm.primitives.geometric', 'Geometric'),
 32 |     'linear_regression' : ('cgpm.regressions.linreg', 'LinearRegression'),
 33 |     'lognormal'         : ('cgpm.primitives.lognormal', 'Lognormal'),
 34 |     'normal'            : ('cgpm.primitives.normal', 'Normal'),
 35 |     'normal_trunc'      : ('cgpm.primitives.normal_trunc', 'NormalTrunc'),
 36 |     'poisson'           : ('cgpm.primitives.poisson', 'Poisson'),
 37 |     'random_forest'     : ('cgpm.regressions.forest', 'RandomForest'),
 38 |     'vonmises'          : ('cgpm.primitives.vonmises', 'Vonmises'),
 39 | }
 40 | 
 41 | # https://github.com/posterior/loom/blob/master/doc/using.md#input-format
 42 | cctype_loom_lookup = {
 43 |     'bernoulli'         : 'boolean',
 44 |     'beta'              : 'real',
 45 |     'categorical'       : 'categorical',
 46 |     'crp'               : 'unbounded_categorical',
 47 |     'exponential'       : 'real',
 48 |     'geometric'         : 'real',
 49 |     'lognormal'         : 'real',
 50 |     'normal'            : 'real',
 51 |     'normal_trunc'      : 'real',
 52 |     'poisson'           : 'count',
 53 |     'vonmises'          : 'real',
 54 | }
 55 | 
 56 | def timestamp():
 57 |     return datetime.now().strftime('%Y%m%d-%H%M%S')
 58 | 
 59 | def colors():
 60 |     """Returns a list of colors."""
 61 |     return ['red', 'blue', 'green', 'yellow', 'orange', 'purple', 'brown',
 62 |         'black', 'pink']
 63 | 
 64 | def cctype_class(cctype):
 65 |     """Return class object for initializing a named GPM (default normal)."""
 66 |     if not cctype:
 67 |         raise ValueError('Specify a cctype!')
 68 |     modulename, classname = cctype_class_lookup[cctype]
 69 |     mod = importlib.import_module(modulename)
 70 |     return getattr(mod, classname)
 71 | 
 72 | def loom_stattype(cctype, distargs):
 73 |     # XXX Loom categorical is only up to 256 values; otherwise we need
 74 |     # unbounded_categorical (aka crp).
 75 |     if cctype == 'categorical' and distargs['k'] > 256:
 76 |         cctype = 'crp'
 77 |     try:
 78 |         return cctype_loom_lookup[cctype]
 79 |     except KeyError:
 80 |         raise ValueError(
 81 |             'Cannot convert cgpm type to loom type: %s' % (cctype,))
 82 | 
 83 | def valid_cctype(dist):
 84 |     """Returns True if dist is a valid DistributionGpm."""
 85 |     return dist in cctype_class_lookup
 86 | 
 87 | def all_cctypes():
 88 |     """Returns a list of all known DistributionGpm."""
 89 |     return cctype_class_lookup.keys()
 90 | 
 91 | def parse_distargs(dists):
 92 |     """Parses a list of cctypes, where distargs are in parenthesis.
 93 |     >>> Input ['normal','categorical(k=8)','beta'].
 94 |     >>> Output ['normal','categorical','beta'], [None, {'k':8}, None].
 95 |     """
 96 |     cctypes, distargs = [], []
 97 |     for cctype in dists:
 98 |         keywords = re.search('\(.*\)', cctype)
 99 |         if keywords is not None:
100 |             keywords = keywords.group(0).replace('(','').\
101 |                 replace(')','')
102 |             temp = {}
103 |             for subpair in keywords.split(','):
104 |                 key, val = subpair.split('=')
105 |                 temp[key] = float(val)
106 |             keywords = temp
107 |             cctype = cctype[:cctype.index('(')]
108 |         cctypes.append(cctype)
109 |         distargs.append(keywords)
110 |     return cctypes, distargs
111 | 
112 | def check_env_debug():
113 |     debug = os.environ.get('GPMCCDEBUG', None)
114 |     return False if debug is None else int(debug)
115 | 


--------------------------------------------------------------------------------
/src/utils/parallel_map.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2016 MIT Probabilistic Computing Project.
  2 | #
  3 | # This file is part of Venture.
  4 | #
  5 | # Venture is free software: you can redistribute it and/or modify
  6 | # it under the terms of the GNU General Public License as published by
  7 | # the Free Software Foundation, either version 3 of the License, or
  8 | # (at your option) any later version.
  9 | #
 10 | # Venture is distributed in the hope that it will be useful,
 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | # GNU General Public License for more details.
 14 | #
 15 | # You should have received a copy of the GNU General Public License
 16 | # along with Venture.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | import cPickle as pickle
 19 | import os
 20 | import struct
 21 | import traceback
 22 | 
 23 | from multiprocessing import Process
 24 | from multiprocessing import Pipe
 25 | from multiprocessing import cpu_count
 26 | 
 27 | 
 28 | def le32enc(n):
 29 |     return struct.pack('<I', n)
 30 | 
 31 | def le32dec(s):
 32 |     return struct.unpack('<I', s)[0]
 33 | 
 34 | 
 35 | # Not using multiprocessing.pool because it is not clear how to get it
 36 | # to share data from the parent to the child process.
 37 | def parallel_map(f, l, parallelism=None):
 38 | 
 39 |     ncpu = cpu_count() if parallelism is None else parallelism
 40 | 
 41 |     # Per-process action: grab an input from the input queue, compute,
 42 |     # toss the output in the output queue.
 43 |     def process_input(childno, inq_rd, outq_wr, retq_wr):
 44 |         while True:
 45 |             i = inq_rd.recv()
 46 |             if i is None:
 47 |                 break
 48 |             x = l[i]
 49 |             try:
 50 |                 ok, fx = True, f(x)
 51 |             except Exception as e:
 52 |                 ok, fx = False, traceback.format_exc()
 53 |             os.write(retq_wr, le32enc(childno))
 54 |             try:
 55 |                 outq_wr.send((i, ok, fx))
 56 |             except pickle.PicklingError:
 57 |                 outq_wr.send((i, False, traceback.format_exc()))
 58 | 
 59 |     def process_output(fl, ctr, output):
 60 |         (i, ok, fx) = output
 61 |         if not ok:
 62 |             raise RuntimeError('Subprocess failed: %s' % (fx,))
 63 |         fl[i] = fx
 64 |         ctr[0] -= 1
 65 | 
 66 |     # Create the queues and worker processes.
 67 |     retq_rd, retq_wr = os.pipe()
 68 |     inq = [Pipe(duplex=False) for _ in xrange(ncpu)]
 69 |     outq = [Pipe(duplex=False) for _ in xrange(ncpu)]
 70 |     process = [
 71 |         Process(target=process_input, args=(j, inq[j][0], outq[j][1], retq_wr))
 72 |         for j in xrange(ncpu)
 73 |     ]
 74 | 
 75 |     # Prepare to bail by terminating all the worker processes.
 76 |     try:
 77 | 
 78 |         # Start the worker processes.
 79 |         for p in process:
 80 |             p.start()
 81 | 
 82 |         # Queue up the tasks one by one.  If the input queue is full,
 83 |         # process an output item to free up a worker process and try
 84 |         # again.
 85 |         n = len(l)
 86 |         fl = [None] * n
 87 |         ctr = [n]
 88 |         iterator = iter(xrange(n))
 89 |         for j, i in zip(xrange(ncpu), iterator):
 90 |             inq[j][1].send(i)
 91 |         for i in iterator:
 92 |             j = le32dec(os.read(retq_rd, 4))
 93 |             process_output(fl, ctr, outq[j][0].recv())
 94 |             inq[j][1].send(i)
 95 | 
 96 |         # Process all the remaining output items.
 97 |         while 0 < ctr[0]:
 98 |             j = le32dec(os.read(retq_rd, 4))
 99 |             process_output(fl, ctr, outq[j][0].recv())
100 | 
101 |         # Cancel all the worker processes.
102 |         for _inq_rd, inq_wr in inq:
103 |             inq_wr.send(None)
104 | 
105 |         # Wait for all the worker processes to complete.
106 |         for p in process:
107 |             p.join()
108 | 
109 |     except Exception as e:           # paranoia
110 |         # Terminate all subprocesses immediately and reraise.
111 |         for p in process:
112 |             if p.is_alive():
113 |                 p.terminate()
114 |         raise
115 | 
116 |     finally:
117 |         os.close(retq_rd)
118 |         os.close(retq_wr)
119 |         for inq_rd, inq_wr in inq:
120 |             inq_rd.close()
121 |             inq_wr.close()
122 |         for outq_rd, outq_wr in outq:
123 |             outq_rd.close()
124 |             outq_wr.close()
125 | 
126 |     return fl
127 | 


--------------------------------------------------------------------------------
/src/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import time
18 | 
19 | class Timer(object):
20 |     def __enter__(self):
21 |         self.start = time.time()
22 |         return self
23 | 
24 |     def __exit__(self, *args):
25 |         self.end = time.time()
26 |         self.interval = self.end - self.start
27 | 
28 | 
29 | def progress(percentage, stream):
30 |     progress = ' ' * 30
31 |     fill = int(percentage * len(progress))
32 |     progress = '[' + '=' * fill + progress[fill:] + ']'
33 |     stream.write('\r{} {:1.2f}%'.format(progress, 100 * percentage))
34 |     stream.flush()
35 | 


--------------------------------------------------------------------------------
/src/venturescript/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/probcomp/cgpm/56a481829448bddc9cdfebd42f65023287d5b7c7/tests/__init__.py


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Set the matplotlib back end to a headless one before anything tries
18 | # to use matplotlib and get some other back end.
19 | import matplotlib
20 | matplotlib.use('pdf')
21 | 
22 | def pytest_addoption(parser):
23 |     parser.addoption(
24 |         '--integration',
25 |         action='store_true',
26 |         default=False,
27 |         help='run integration tests'
28 |     )
29 | 


--------------------------------------------------------------------------------
/tests/disabled_test_uncorrelated_simulate.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | '''
18 | Tests simulate from the joint distribution of the XY CGPMs in directory
19 | uncorrelated/. The suite is disabled because currently only plots of the
20 | synthetic data are generated, and it is yet to be determined how to test the
21 | plots.
22 | 
23 | Moreover a wider set of test cases for _conditional_ simulate are required.
24 | '''
25 | 
26 | import itertools
27 | import pytest
28 | 
29 | import matplotlib.pyplot as plt
30 | import numpy as np
31 | 
32 | from cgpm.uncorrelated.diamond import Diamond
33 | from cgpm.uncorrelated.dots import Dots
34 | from cgpm.uncorrelated.linear import Linear
35 | from cgpm.uncorrelated.parabola import Parabola
36 | from cgpm.uncorrelated.ring import Ring
37 | from cgpm.uncorrelated.sin import Sin
38 | from cgpm.uncorrelated.xcross import XCross
39 | 
40 | from cgpm.utils.general import gen_rng
41 | 
42 | 
43 | NUM_SAMPLES = 200
44 | NOISES = [.95, .85, .75, .65, .55, .45, .35, .25, .15, 0.10, .05, .01]
45 | 
46 | simulators = {
47 |     'diamond': Diamond,
48 |     'dots': Dots,
49 |     'linear': Linear,
50 |     'parabola': Parabola,
51 |     'ring': Ring,
52 |     'sin': Sin,
53 |     'xcross': XCross,
54 |     }
55 | 
56 | 
57 | simulator_limits = {
58 |     'diamond': ([-1.5, 1.5],[-1.5, 1.5]),
59 |     'dots': ([-2.5, 2.5],[-2.5, 2.5]),
60 |     'linear': ([-3, 3],[-3, 3]),
61 |     'parabola': ([-1.5, 1.5],[-1.5, 1.5]),
62 |     'ring': ([-1.5, 1.5],[-1.5, 1.5]),
63 |     'sin': ([-5, 5],[-2, 2]),
64 |     'xcross': ([-3, 3],[-3, 3]),
65 |     }
66 | 
67 | 
68 | # --------------------------------------------------------------------------
69 | # Inference.
70 | 
71 | def simulate_dataset(dist, noise, size=200):
72 |     rng = gen_rng(0)
73 |     cgpm = simulators[dist](outputs=[2,4], noise=noise, rng=rng)
74 |     samples = cgpm.simulate(-1, [2, 4], N=size)
75 |     try: # XXX Crash test only!
76 |         logpdfs = [cgpm.logpdf(-1, s) for s in samples]
77 |     except NotImplementedError:
78 |         pass
79 |     D = [(s[2], s[4]) for s in samples]
80 |     return np.asarray(D)
81 | 
82 | def plot_synthetic(dist, noise, size=1000):
83 |     fig, ax = plt.subplots()
84 |     T = simulate_dataset(dist, noise, size=size)
85 |     ax.set_title('%s Distribution (Noise %1.2f)' % (dist, noise))
86 |     ax.scatter(T[:,0], T[:,1], color='k', alpha=.5, label='True Distribution')
87 |     ax.set_xlabel('x1')
88 |     ax.set_ylabel('x2')
89 |     ax.grid()
90 |     ax.set_xlim(simulator_limits[dist][0])
91 |     ax.set_ylim(simulator_limits[dist][1])
92 |     fig.savefig('/tmp/synth_%s_%1.2f.png' % (dist, noise))
93 |     plt.close('all')
94 | 
95 | @pytest.mark.parametrize('dist, noise', itertools.product(simulators, NOISES))
96 | def test_dist_noise__ci_(dist, noise):
97 |     plot_synthetic(dist, noise)
98 | 


--------------------------------------------------------------------------------
/tests/disabled_test_vsinline_determinism.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | #   Copyright (c) 2010-2016, MIT Probabilistic Computing Project
 4 | #
 5 | #   Licensed under the Apache License, Version 2.0 (the "License");
 6 | #   you may not use this file except in compliance with the License.
 7 | #   You may obtain a copy of the License at
 8 | #
 9 | #       http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | 
17 | import matplotlib.pyplot as plt
18 | import numpy as np
19 | 
20 | from cgpm.utils import general as gu
21 | from cgpm.venturescript.vsinline import InlineVsCGpm
22 | 
23 | # The CGPM for X with seed 4.
24 | vsx_s4 = InlineVsCGpm([0], [],
25 |     expression='(lambda () (uniform_continuous -4.71 4.71))',
26 |     rng=gu.gen_rng(4))
27 | 
28 | # The CGPM for Y with seed 4.
29 | vsy_s4 = InlineVsCGpm([1], [0],
30 |     expression='''(lambda (x) (if (> (cos x) 0) (uniform_continuous (- (cos x) .5) (cos x)) (uniform_continuous (cos x) (+ (cos x) .5))))''',
31 |     rng=gu.gen_rng(4))
32 | 
33 | # The CGPM for Y with seed 5.
34 | vsy_s5 = InlineVsCGpm([1], [0],
35 |     expression='''(lambda (x) (if (> (cos x) 0) (uniform_continuous (- (cos x) .5) (cos x)) (uniform_continuous (cos x) (+ (cos x) .5))))''',
36 |     rng=gu.gen_rng(5))
37 | 
38 | # Simulate the uniform x from vsx.
39 | samples_x4 = vsx_s4.simulate(0, [0], N=200)
40 | 
41 | # Simulate Y from each of the seed 4 and 5.
42 | samples_y4 = [vsy_s4.simulate(0, [1], sx) for sx in samples_x4]
43 | samples_y5 = [vsy_s5.simulate(0, [1], sx) for sx in samples_x4]
44 | 
45 | # Convert all samples from dictionaries to lists.
46 | xs4 = [s[0] for s in samples_x4]
47 | ys4 = [s[1] for s in samples_y4]
48 | ys5 = [s[1] for s in samples_y5]
49 | 
50 | # Compute the noise at each data point for both sample sets.
51 | errors1 = np.cos(xs4)-ys4
52 | errors2 = np.cos(xs4)-ys5
53 | 
54 | # Plot the joint query.
55 | fig, ax = plt.subplots()
56 | ax.scatter(xs4, ys4, color='blue', alpha=.4) # There is no noise in the cosx.
57 | ax.scatter(xs4, ys5, color='red', alpha=.4)  # This is noise in the cosx.
58 | ax.set_xlim([-1.5*np.pi, 1.5*np.pi])
59 | ax.set_ylim([-1.75, 1.75])
60 | for x in xs4:
61 |     ax.vlines(x, -1.75, -1.65, linewidth=.5)
62 | ax.grid()
63 | 
64 | # Plot the errors.
65 | fig, ax = plt.subplots()
66 | ax.scatter(xs4, errors1, color='blue', alpha=.4)
67 | ax.scatter(xs4, errors2, color='red', alpha=.4)
68 | ax.set_xlabel('value of x')
69 | ax.set_ylabel('error of cos(x) - y')
70 | ax.grid()
71 | 


--------------------------------------------------------------------------------
/tests/graphical/animals.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import os
18 | 
19 | import matplotlib.pyplot as plt
20 | import numpy as np
21 | import pandas as pd
22 | import pytest
23 | 
24 | from cgpm.crosscat.engine import Engine
25 | from cgpm.utils import general as gu
26 | from cgpm.utils import plots as pu
27 | from cgpm.utils import test as tu
28 | from cgpm.utils import render as ru
29 | 
30 | 
31 | animals = pd.read_csv('resources/animals/animals.csv', index_col=0)
32 | animal_values = animals.values
33 | animal_names = animals.index.values
34 | animal_features = animals.columns.values
35 | 
36 | 
37 | # XXX This function should be parametrized better!
38 | def launch_analysis():
39 |     engine = Engine(
40 |         animals.values.astype(float),
41 |         num_states=64,
42 |         cctypes=['categorical']*len(animals.values[0]),
43 |         distargs=[{'k':2}]*len(animals.values[0]),
44 |         rng=gu.gen_rng(7))
45 | 
46 |     engine.transition(N=900)
47 |     with open('resources/animals/animals.engine', 'w') as f:
48 |         engine.to_pickle(f)
49 | 
50 |     engine = Engine.from_pickle(open('resources/animals/animals.engine','r'))
51 |     D = engine.dependence_probability_pairwise()
52 |     pu.plot_clustermap(D)
53 | 
54 | 
55 | def render_states_to_disk(filepath, prefix):
56 |     engine = Engine.from_pickle(filepath)
57 |     for i in range(engine.num_states()):
58 |         print '\r%d' % (i,)
59 |         savefile = '%s-%d' % (prefix, i)
60 |         state = engine.get_state(i)
61 |         ru.viz_state(
62 |             state, row_names=animal_names,
63 |             col_names=animal_features, savefile=savefile)
64 | 
65 | 
66 | def compare_dependence_heatmap():
67 |     e1 = Engine.from_pickle('resources/animals/animals.engine')
68 |     e2 = Engine.from_pickle('resources/animals/animals-lovecat.engine')
69 | 
70 |     D1 = e1.dependence_probability_pairwise()
71 |     D2 = e2.dependence_probability_pairwise()
72 |     C1 = pu.plot_clustermap(D1)
73 | 
74 |     ordering = C1.dendrogram_row.reordered_ind
75 | 
76 |     fig, ax = plt.subplots(nrows=1, ncols=2)
77 |     pu.plot_heatmap(D1, xordering=ordering, yordering=ordering, ax=ax[0])
78 |     pu.plot_heatmap(D2, xordering=ordering, yordering=ordering, ax=ax[1])
79 | 


--------------------------------------------------------------------------------
/tests/graphical/depprob_id.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import matplotlib.pyplot as plt
18 | import numpy as np
19 | import seaborn as sns
20 | 
21 | from cgpm.crosscat.engine import Engine
22 | from cgpm.utils import config as cu
23 | 
24 | np.random.seed(0)
25 | 
26 | N_ROWS = 300
27 | N_STATES = 12
28 | N_ITERS = 100
29 | 
30 | cctypes = ['categorical(k={})'.format(N_ROWS)] + ['normal']*8
31 | cctypes, distargs = cu.parse_distargs(cctypes)
32 | column_names = ['id'] + ['one cluster']*4 + ['four cluster']*4
33 | 
34 | # id column.
35 | X = np.zeros((N_ROWS, 9))
36 | X[:,0] = np.arange(N_ROWS)
37 | 
38 | # Four columns of one cluster from the standard normal.
39 | X[:,1:5] = np.random.randn(N_ROWS, 4)
40 | 
41 | # Four columns of four clusters with unit variance and means \in {0,1,2,3}.
42 | Z = np.random.randint(4, size=(N_ROWS))
43 | X[:,5:] = 4*np.reshape(np.repeat(Z,4), (len(Z),4)) + np.random.randn(N_ROWS, 4)
44 | 
45 | # Inference.
46 | engine = Engine(
47 |     X, cctypes=cctypes, distargs=distargs, num_states=N_STATES)
48 | engine.transition(N=N_ITERS)
49 | 
50 | # Dependence probability.
51 | D = engine.dependence_probability_pairwise()
52 | zmat = sns.clustermap(D, yticklabels=column_names, xticklabels=column_names)
53 | plt.setp(zmat.ax_heatmap.get_yticklabels(), rotation=0)
54 | plt.setp(zmat.ax_heatmap.get_xticklabels(), rotation=90)
55 | plt.show()
56 | 


--------------------------------------------------------------------------------
/tests/graphical/dpmm_nignormal.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | #   Licensed under the Apache License, Version 2.0 (the "License");
 4 | #   you may not use this file except in compliance with the License.
 5 | #   You may obtain a copy of the License at
 6 | #
 7 | #       http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #   Unless required by applicable law or agreed to in writing, software
10 | #   distributed under the License is distributed on an "AS IS" BASIS,
11 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #   See the License for the specific language governing permissions and
13 | #   limitations under the License.
14 | 
15 | import numpy as np
16 | import matplotlib.pyplot as plt
17 | 
18 | from cgpm.crosscat.state import State
19 | 
20 | def observe_datum(x):
21 |     global state
22 |     state.incorporate(rowid=state.n_rows(), observation={0:x})
23 |     state.transition_dim_grids()
24 |     print 'Observation %d: %f' % (state.n_rows(), x)
25 |     while True:
26 |         state.transition_view_rows()
27 |         state.transition_dim_hypers()
28 |         state.transition_view_alphas()
29 |         ax.clear()
30 |         state.dim_for(0).plot_dist(
31 |             state.X[0], Y=np.linspace(0.01,0.99,200), ax=ax)
32 |         ax.grid()
33 |         plt.pause(.8)
34 | 
35 | def on_click(event):
36 |     if event.button == 1:
37 |         if event.inaxes is not None:
38 |             observe_datum(event.xdata)
39 | 
40 | # Create state.
41 | initial_point = .8
42 | state = State([[initial_point]], cctypes=['normal'])
43 | 
44 | # Activate plotter.
45 | fig, ax = plt.subplots()
46 | ax.grid()
47 | plt.connect('button_press_event', on_click)
48 | plt.show()
49 | 


--------------------------------------------------------------------------------
/tests/graphical/one_view.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import numpy as np
 18 | import pytest
 19 | 
 20 | from cgpm.crosscat.state import State
 21 | from cgpm.utils import config as cu
 22 | from cgpm.utils import general as gu
 23 | from cgpm.utils import general as gu
 24 | from cgpm.utils import test as tu
 25 | 
 26 | # Set up the data generation
 27 | cctypes, distargs = cu.parse_distargs(
 28 |     ['normal',
 29 |     'poisson',
 30 |     'bernoulli',
 31 |     'categorical(k=4)',
 32 |     'lognormal',
 33 |     'exponential',
 34 |     'beta',
 35 |     'geometric',
 36 |     'vonmises'])
 37 | 
 38 | T, Zv, Zc = tu.gen_data_table(
 39 |     200, [1], [[.25, .25, .5]], cctypes, distargs,
 40 |     [.95]*len(cctypes), rng=gu.gen_rng(10))
 41 | 
 42 | state = State(T.T, cctypes=cctypes, distargs=distargs, rng=gu.gen_rng(312))
 43 | state.transition(N=10, progress=1)
 44 | 
 45 | def test_crash_simulate_joint(state):
 46 |     state.simulate(-1, [0, 1, 2, 3, 4, 5, 6, 7, 8], N=10)
 47 | 
 48 | def test_crash_logpdf_joint(state):
 49 |     state.logpdf(-1, {0:1, 1:2, 2:1, 3:3, 4:1, 5:10, 6:.4, 7:2, 8:1.8})
 50 | 
 51 | def test_crash_simulate_conditional(state):
 52 |     state.simulate(-1, [1, 4, 5, 6, 7, 8], {0:1, 2:1, 3:3}, None, 10)
 53 | 
 54 | def test_crash_logpdf_conditional(state):
 55 |     state.logpdf(
 56 |         -1, {1:2, 4:1, 5:10, 6:.4, 7:2, 8:1.8}, {0:1, 2:1, 3:3})
 57 | 
 58 | def test_crash_simulate_joint_observed(state):
 59 |     state.simulate(1, [0, 1, 2, 3, 4, 5, 6, 7, 8], None, None, 10)
 60 | 
 61 | def test_crash_logpdf_joint_observed(state):
 62 |     with pytest.raises(ValueError):
 63 |         state.logpdf(1, {0:1, 1:2, 2:1, 3:3, 4:1, 5:10, 6:.4, 7:2, 8:1.8})
 64 | 
 65 | def test_crash_simulate_conditional_observed(state):
 66 |     with pytest.raises(ValueError):
 67 |         state.simulate(1, [1, 4, 5, 6, 7, 8], {0:1, 2:1, 3:3}, None, 10)
 68 | 
 69 | def test_crash_logpdf_conditional_observed(state):
 70 |     with pytest.raises(ValueError):
 71 |         state.logpdf(
 72 |             1, {1:2, 4:1, 5:10, 6:.4, 7:2, 8:1.8}, {0:1, 2:1, 3:3})
 73 | 
 74 | # Plot!
 75 | state.plot()
 76 | 
 77 | # Run some solid checks on a complex state.
 78 | test_crash_simulate_joint(state)
 79 | test_crash_logpdf_joint(state)
 80 | test_crash_simulate_conditional(state)
 81 | test_crash_logpdf_conditional(state)
 82 | test_crash_simulate_joint_observed(state)
 83 | test_crash_logpdf_joint_observed(state)
 84 | test_crash_simulate_conditional_observed(state)
 85 | test_crash_logpdf_conditional_observed(state)
 86 | 
 87 | # Joint equals chain rule for state 1.
 88 | joint = state.logpdf(-1, {0:1, 1:2})
 89 | chain = state.logpdf(-1, {0:1}, {1:2}) + state.logpdf(-1, {1:2})
 90 | assert np.allclose(joint, chain)
 91 | 
 92 | if False:
 93 |     state2 = State(T.T, cctypes=cctypes, distargs=distargs, rng=gu.gen_rng(12))
 94 |     state2.transition(N=10, progress=1)
 95 | 
 96 |     # Joint equals chain rule for state 2.
 97 |     state2.logpdf(-1, {0:1, 1:2})
 98 |     state2.logpdf(-1, {0:1}, {1:2}) + state2.logpdf(-1, {1:2})
 99 | 
100 |     # Take the Monte Carlo average of the conditional.
101 |     mc_conditional = np.log(.5) + gu.logsumexp([
102 |         state.logpdf(-1, {0:1}, {1:2}),
103 |         state2.logpdf(-1, {0:1}, {1:2})
104 |     ])
105 | 
106 |     # Take the Monte Carlo average of the joint.
107 |     mc_joint = np.log(.5) + gu.logsumexp([
108 |         state.logpdf(-1, {0:1, 1:2}),
109 |         state2.logpdf(-1, {0:1, 1:2})
110 |     ])
111 | 
112 |     # Take the Monte Carlo average of the marginal.
113 |     mc_marginal = np.log(.5) + gu.logsumexp([
114 |         state.logpdf(-1, {1:2}),
115 |         state2.logpdf(-1, {1:2})
116 |     ])
117 | 


--------------------------------------------------------------------------------
/tests/graphical/recover.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import numpy as np
18 | import matplotlib.pyplot as plt
19 | 
20 | from cgpm.utils import test as tu
21 | from cgpm.utils import sampling as su
22 | from cgpm.crosscat.engine import Engine
23 | 
24 | shapes = ['x', 'sin', 'ring', 'dots']
25 | gen_function = {
26 |     'sin'   : tu.gen_sine_wave,
27 |     'x'     : tu.gen_x,
28 |     'ring'  : tu.gen_ring,
29 |     'dots'  : tu.gen_four_dots
30 | }
31 | 
32 | cctypes = ['normal', 'normal']
33 | distargs = [None, None]
34 | 
35 | 
36 | def run_test(args):
37 |     n_rows = args["num_rows"]
38 |     n_iters = args["num_iters"]
39 |     n_chains = args["num_chains"]
40 | 
41 |     n_per_chain = int(float(n_rows)/n_chains)
42 | 
43 |     fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(16,9))
44 |     axes = axes.ravel()
45 |     k = 0
46 |     for shape in shapes:
47 |         print "Shape: %s" % shape
48 |         T_o = np.asarray(gen_function[shape](n_rows))
49 |         T_i = []
50 | 
51 |         engine = Engine(
52 |             T_o.T, cctypes=cctypes, distargs=distargs, num_states=n_chains)
53 |         engine.transition(N=n_iters)
54 | 
55 |         for chain in xrange(n_chains):
56 |             state = engine.get_state(chain)
57 |             print "chain %i of %i" % (chain+1, n_chains)
58 |             T_i.extend(state.simulate(-1, [0,1], N=n_per_chain))
59 | 
60 |         T_i = np.array(T_i)
61 | 
62 |         ax = axes[k]
63 |         ax.scatter( T_o[0], T_o[1], color='blue', edgecolor='none' )
64 |         ax.set_xlabel("X")
65 |         ax.set_ylabel("Y")
66 |         ax.set_title("%s original" % shape)
67 | 
68 |         ax = axes[k+4]
69 |         ax.scatter( T_i[:,0], T_i[:,1], color='red', edgecolor='none' )
70 |         ax.set_xlabel("X")
71 |         ax.set_ylabel("Y")
72 |         ax.set_xlim(ax.get_xlim())
73 |         ax.set_ylim(ax.get_ylim())
74 |         ax.set_title("%s simulated" % shape)
75 | 
76 |         k += 1
77 | 
78 |     print "Done."
79 |     return fig
80 | 
81 | if __name__ == "__main__":
82 |     args = dict(num_rows=1000, num_iters=5000, num_chains=6)
83 |     fig = run_test(args)
84 |     fig.savefig('recover.png')
85 | 


--------------------------------------------------------------------------------
/tests/graphical/slice.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import math
18 | 
19 | import numpy as np
20 | 
21 | from matplotlib import pyplot as plt
22 | from scipy.integrate import trapz
23 | 
24 | from cgpm.utils import general as gu
25 | from cgpm.utils import sampling as su
26 | 
27 | 
28 | def main(num_samples, burn, lag, w):
29 |     alpha = 1.0
30 |     N = 25
31 |     Z = gu.simulate_crp(N, alpha)
32 |     K = max(Z) + 1
33 | 
34 |     # CRP with gamma prior.
35 |     log_pdf_fun = lambda alpha : gu.logp_crp_unorm(N, K, alpha) - alpha
36 |     proposal_fun = lambda : np.random.gamma(1.0, 1.0)
37 |     D = (0, float('Inf'))
38 | 
39 |     samples = su.slice_sample(proposal_fun, log_pdf_fun, D,
40 |         num_samples=num_samples, burn=burn, lag=lag, w=w)
41 | 
42 |     minval = min(samples)
43 |     maxval = max(samples)
44 |     xvals = np.linspace(minval, maxval, 100)
45 |     yvals = np.array([math.exp(log_pdf_fun(x)) for x in xvals])
46 |     yvals /= trapz(xvals, yvals)
47 | 
48 |     fig, ax = plt.subplots(2,1)
49 | 
50 |     ax[0].hist(samples, 50, normed=True)
51 | 
52 |     ax[1].hist(samples, 100, normed=True)
53 |     ax[1].plot(xvals,-yvals, c='red', lw=3, alpha=.8)
54 |     ax[1].set_xlim(ax[0].get_xlim())
55 |     ax[1].set_ylim(ax[0].get_ylim())
56 |     plt.show()
57 | 
58 | if __name__ == '__main__':
59 |     import argparse
60 | 
61 |     parser = argparse.ArgumentParser()
62 |     parser.add_argument('--num_samples', default=100, type=int)
63 |     parser.add_argument('--burn', default=10, type=int)
64 |     parser.add_argument('--lag', default=5, type=int)
65 |     parser.add_argument('--w', default=1.0, type=float)
66 | 
67 |     args = parser.parse_args()
68 | 
69 |     num_samples = args.num_samples
70 |     burn = args.burn
71 |     lag = args.lag
72 |     w = args.w
73 | 
74 |     main(num_samples, burn, lag, w)
75 | 


--------------------------------------------------------------------------------
/tests/hacks.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | """"pytest hacks"""
18 | 
19 | import pytest
20 | 
21 | 
22 | # XXX https://github.com/pytest-dev/pytest/issues/2338
23 | def skip(reason):
24 |     if pytest.__version__ >= '3':
25 |         raise pytest.skip.Exception(reason, allow_module_level=True)
26 |     else:
27 |         pytest.skip(reason)
28 | 


--------------------------------------------------------------------------------
/tests/markers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import pytest
18 | 
19 | integration = pytest.mark.skipif(not pytest.config.getoption('--integration'),
20 |     reason='specify --integration to run integration tests')
21 | 


--------------------------------------------------------------------------------
/tests/stochastic.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | #   Copyright (c) 2010-2016, MIT Probabilistic Computing Project
 4 | #
 5 | #   Licensed under the Apache License, Version 2.0 (the "License");
 6 | #   you may not use this file except in compliance with the License.
 7 | #   You may obtain a copy of the License at
 8 | #
 9 | #       http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | 
17 | import os
18 | import sys
19 | 
20 | class StochasticError(Exception):
21 |     def __init__(self, seed, exctype, excvalue):
22 |         self.seed = seed
23 |         self.exctype = exctype
24 |         self.excvalue = excvalue
25 |     def __str__(self):
26 |         hexseed = self.seed.encode('hex')
27 |         if hasattr(self.exctype, '__name__'):
28 |             typename = self.exctype.__name__
29 |         else:
30 |             typename = repr(self.exctype)
31 |         return '[seed %s]\n%s: %s' % (hexseed, typename, self.excvalue)
32 | 
33 | def stochastic(max_runs, min_passes):
34 |     assert 0 < max_runs
35 |     assert min_passes <= max_runs
36 |     def wrap(f):
37 |         def f_(seed=None):
38 |             if seed is not None:
39 |                 return f(seed)
40 |             npasses = 0
41 |             last_seed = None
42 |             last_exc_info = None
43 |             for i in xrange(max_runs):
44 |                 seed = os.urandom(32)
45 |                 try:
46 |                     value = f(seed)
47 |                 except:
48 |                     last_seed = seed
49 |                     last_exc_info = sys.exc_info()
50 |                 else:
51 |                     npasses += 1
52 |                     if min_passes <= npasses:
53 |                         return value
54 |             t, v, tb = last_exc_info
55 |             raise StochasticError, StochasticError(last_seed, t, v), tb
56 |         return f_
57 |     return wrap
58 | 


--------------------------------------------------------------------------------
/tests/test_add_state.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | """Test suite targeting cgpm.crosscat.engine.add_state."""
18 | 
19 | import pytest
20 | 
21 | from cgpm.crosscat.engine import Engine
22 | from cgpm.dummy.twoway import TwoWay
23 | from cgpm.utils import general as gu
24 | 
25 | 
26 | def get_engine():
27 |     X = [[0.123, 1, 0], [1.12, 0, 1], [1.1, 1, 2]]
28 |     rng = gu.gen_rng(1)
29 |     return Engine(
30 |         X,
31 |         outputs=[8,7,9],
32 |         num_states=4,
33 |         cctypes=['normal', 'bernoulli', 'categorical'],
34 |         distargs=[None, None, {'k': 3}],
35 |         rng=rng
36 |     )
37 | 
38 | 
39 | def test_engine_add_state_basic():
40 |     engine = get_engine()
41 |     initial_num_states = engine.num_states()
42 |     engine.add_state()
43 |     assert engine.num_states() == initial_num_states + 1
44 |     engine.add_state(count=2)
45 |     assert engine.num_states() == initial_num_states + 3
46 |     engine.transition(N=3)
47 |     engine.drop_state(6)
48 |     assert engine.num_states() == initial_num_states + 2
49 | 
50 | 
51 | def test_engine_add_state_custom():
52 |     # Add a state with a specified view and row partition.
53 |     engine = get_engine()
54 |     engine.add_state(count=2, Zv={7:0, 8:1, 9:0}, Zrv={0: [0,1,1], 1: [1,1,1]})
55 |     new_state = engine.get_state(engine.num_states()-1)
56 |     assert new_state.Zv() == {7:0, 8:1, 9:0}
57 |     assert new_state.views[0].Zr(0) == 0
58 |     assert new_state.views[0].Zr(1) == 1
59 |     assert new_state.views[0].Zr(2) == 1
60 |     assert new_state.views[1].Zr(0) == 1
61 |     assert new_state.views[1].Zr(1) == 1
62 |     assert new_state.views[1].Zr(2) == 1
63 | 
64 | 
65 | def test_engine_add_state_kwarg_errors():
66 |     engine = get_engine()
67 |     with pytest.raises(ValueError):
68 |         # Cannot specify new dataset.
69 |         engine.add_state(X=[[0,1]])
70 |     with pytest.raises(ValueError):
71 |         # Cannot specify new outputs.
72 |         engine.add_state(outputs=[1,2])
73 |     with pytest.raises(ValueError):
74 |         # Cannot specify new cctypes.
75 |         engine.add_state(cctypes=['normal', 'normal'])
76 |     with pytest.raises(ValueError):
77 |         # Cannot specify new distargs.
78 |         engine.add_state(distargs=[None, None, {'k' : 3}])
79 |     with pytest.raises(ValueError):
80 |         # Cannot specify all together.
81 |         engine.add_state(X=[[0,1]], outputs=[1,2], cctypes=['normal', 'normal'])
82 | 
83 | 
84 | def test_engine_add_state_composite_errors():
85 |     # XXX Add a Github ticket to support this feature. User should provide all
86 |     # the composite cgpms to match the count of initialized models.
87 |     engine = get_engine()
88 |     engine.compose_cgpm([
89 |         TwoWay(outputs=[4], inputs=[7]) for _i in xrange(engine.num_states())
90 |     ])
91 |     with pytest.raises(ValueError):
92 |         engine.add_state()
93 | 


--------------------------------------------------------------------------------
/tests/test_bernoulli.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import pytest
18 | import numpy as np
19 | 
20 | from cgpm.crosscat.engine import Engine
21 | from cgpm.utils import general as gu
22 | 
23 | 
24 | DATA_NUM_0 = 100
25 | DATA_NUM_1 = 200
26 | NUM_SIM = 10000
27 | NUM_ITER = 5
28 | 
29 | 
30 | def test_bernoulli():
31 |     # Switch for multiprocess (0 is faster).
32 |     multiprocess = 0
33 | 
34 |     # Create categorical data of DATA_NUM_0 zeros and DATA_NUM_1 ones.
35 |     data = np.transpose(np.array([[0] * DATA_NUM_0 + [1] * DATA_NUM_1]))
36 | 
37 |     # Run a single chain for a few iterations.
38 |     engine = Engine(
39 |         data, cctypes=['categorical'], distargs=[{'k': 2}],
40 |         rng=gu.gen_rng(0), multiprocess=0)
41 |     engine.transition(NUM_ITER, multiprocess=multiprocess)
42 | 
43 |     # Simulate from hypothetical row and compute the proportion of ones.
44 |     sample = engine.simulate(-1, [0], N=NUM_SIM, multiprocess=multiprocess)[0]
45 |     sum_b = sum(s[0] for s in sample)
46 |     observed_prob_of_1 = (float(sum_b) / float(NUM_SIM))
47 |     true_prob_of_1 = float(DATA_NUM_1) / float(DATA_NUM_0 + DATA_NUM_1)
48 |     # Check 1% relative match.
49 |     assert np.allclose(true_prob_of_1, observed_prob_of_1, rtol=.1)
50 | 
51 |     # Simulate from observed row as a crash test.
52 |     sample = engine.simulate(1, [0], N=1, multiprocess=multiprocess)
53 | 
54 |     # Ensure normalized unobserved probabilities.
55 |     p0_uob = engine.logpdf(-1, {0:0}, multiprocess=multiprocess)[0]
56 |     p1_uob = engine.logpdf(-1, {0:1}, multiprocess=multiprocess)[0]
57 |     assert np.allclose(gu.logsumexp([p0_uob, p1_uob]), 0)
58 | 
59 |     # A logpdf query constraining an observed returns an error.
60 |     with pytest.raises(ValueError):
61 |         engine.logpdf(1, {0:0}, multiprocess=multiprocess)
62 |     with pytest.raises(ValueError):
63 |         engine.logpdf(1, {0:1}, multiprocess=multiprocess)
64 | 


--------------------------------------------------------------------------------
/tests/test_check_env_debug.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | import os
19 | import pytest
20 | 
21 | from cgpm.utils.config import check_env_debug
22 | 
23 | token = 'GPMCCDEBUG'
24 | 
25 | def test_debug_none():
26 |     if token in os.environ:
27 |         del os.environ[token]
28 |     assert not check_env_debug()
29 | 
30 | def test_debug_false():
31 |     os.environ[token] = '0'
32 |     assert not check_env_debug()
33 | 
34 | def test_debug_true():
35 |     os.environ[token] = '1'
36 |     assert check_env_debug()
37 | 


--------------------------------------------------------------------------------
/tests/test_cmi_partition.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | 
 18 | import numpy as np
 19 | import pytest
 20 | 
 21 | from cgpm.crosscat.engine import DummyCgpm
 22 | from cgpm.crosscat.state import State
 23 | from cgpm.utils.general import gen_rng
 24 | 
 25 | 
 26 | def retrieve_state():
 27 |     X = np.eye(7)
 28 |     cctypes = ['normal'] * 7
 29 |     return State(
 30 |         X,
 31 |         outputs=[10,11,12,13,14,15,16],
 32 |         Zv={10:0, 11:0, 12:1, 13:2, 14:2, 15:2, 16:0},
 33 |         cctypes=cctypes,
 34 |         rng=gen_rng(2),
 35 |     )
 36 | 
 37 | 
 38 | def test_partition_mutual_information_query():
 39 |     state = retrieve_state()
 40 | 
 41 |     def check_expected_partitions(query, expected):
 42 |         blocks = state._partition_mutual_information_query(*query)
 43 |         assert len(blocks) == len(expected)
 44 |         for b in blocks:
 45 |             assert b in expected
 46 | 
 47 |     check_expected_partitions(
 48 |         query=([10], [11], {}),
 49 |         expected=[
 50 |             ([10], [11], {}),
 51 |     ])
 52 |     check_expected_partitions(
 53 |         query=([10,16], [11], {}),
 54 |         expected=[
 55 |             ([10,16], [11], {}),
 56 |     ])
 57 |     check_expected_partitions(
 58 |         query=([10,16], [11], {12:None}),
 59 |         expected=[
 60 |             ([10,16], [11], {}),
 61 |             ([], [], {12:None}),
 62 |     ])
 63 |     check_expected_partitions(
 64 |         query=([10,16], [11, 14], {12:None}),
 65 |         expected=[
 66 |             ([10,16], [11], {}),
 67 |             ([], [14], {}),
 68 |             ([], [], {12:None}),
 69 |     ])
 70 |     check_expected_partitions(
 71 |         query=([15, 16], [11, 14, 13], {12:None, 13:2, 10:-12}),
 72 |         expected=[
 73 |             ([15], [14,13], {13:2}),
 74 |             ([16], [11], {10:-12}),
 75 |             ([], [], {12:None}),
 76 |     ])
 77 |     check_expected_partitions(
 78 |         query=([15, 16], [11, 14, 13], {12:None, 13:2}),
 79 |         expected=[
 80 |             ([15], [14,13], {13:2}),
 81 |             ([16], [11], {}),
 82 |             ([], [], {12:None}),
 83 |     ])
 84 |     check_expected_partitions(
 85 |         query=([15, 16], [15, 16], {12:None, 13:2}),
 86 |         expected=[
 87 |             ([15], [15], {13:2}),
 88 |             ([16], [16], {}),
 89 |             ([], [], {12:None}),
 90 |     ])
 91 |     check_expected_partitions(
 92 |         query=([13, 14], [14, 13], {}),
 93 |         expected=[
 94 |             ([13, 14], [14,13], {}),
 95 |     ])
 96 | 
 97 |     # Connect variable 12 with variables in view 0.
 98 |     state.compose_cgpm(DummyCgpm(outputs=[100, 102], inputs=[10, 12]))
 99 | 
100 |     check_expected_partitions(
101 |         query=([15, 16], [11, 14, 13], {12:None, 13:2}),
102 |         expected=[
103 |             ([15], [14,13], {13:2}),
104 |             ([16], [11], {12:None}),
105 |     ])
106 |     check_expected_partitions(
107 |         query=([15, 16], [11, 14, 13], {12:None, 13:2}),
108 |         expected=[
109 |             ([15], [14,13], {13:2}),
110 |             ([16], [11], {12:None}),
111 |     ])
112 |     check_expected_partitions(
113 |         query=([15, 100, 16], [11, 14, 13], {102: -12, 12:None, 13:2}),
114 |         expected=[
115 |             ([15], [14,13], {13:2}),
116 |             ([100, 16], [11], {12:None, 102: -12}),
117 |     ])
118 | 
119 |     # Connect variables in view 0 with variables in view 2.
120 |     state.compose_cgpm(DummyCgpm(outputs=[200, 202], inputs=[100, 12]))
121 |     state.compose_cgpm(DummyCgpm(outputs=[300], inputs=[200, 13]))
122 | 
123 |     check_expected_partitions(
124 |         query=([15, 16], [11, 14, 13], {12:None, 13:2, 10:-12}),
125 |         expected=[
126 |             ([15, 16], [11, 14, 13], {12:None, 13:2, 10:-12})
127 |         ])
128 |     check_expected_partitions(
129 |         query=([15, 16], [11, 14, 13], {12:None, 13:2}),
130 |         expected=[
131 |             ([15, 16], [11, 14, 13], {12:None, 13:2})
132 |     ])
133 |     check_expected_partitions(
134 |         query=([300], [202], {13:2}),
135 |         expected=[
136 |             ([300], [202], {13:2})
137 |     ])
138 | 


--------------------------------------------------------------------------------
/tests/test_dependence_constraints.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import itertools
 18 | import pytest
 19 | 
 20 | import numpy as np
 21 | 
 22 | from cgpm.crosscat.state import State
 23 | from cgpm.utils import general as gu
 24 | from cgpm.utils import validation as vu
 25 | 
 26 | from markers import integration
 27 | 
 28 | 
 29 | def test_naive_bayes_independence():
 30 |     rng = gu.gen_rng(1)
 31 |     D = rng.normal(size=(10,1))
 32 |     T = np.repeat(D, 10, axis=1)
 33 |     Ci = list(itertools.combinations(range(10), 2))
 34 |     state = State(T, cctypes=['normal']*10, Ci=Ci, rng=rng)
 35 |     state.transition(N=10, progress=0)
 36 |     vu.validate_crp_constrained_partition(state.Zv(), [], Ci, {}, {})
 37 | 
 38 | 
 39 | def test_complex_independent_relationships():
 40 |     rng = gu.gen_rng(1)
 41 |     D = rng.normal(size=(10,1))
 42 |     T = np.repeat(D, 10, axis=1)
 43 |     Ci = [(2,8), (0,3)]
 44 |     state = State(T, cctypes=['normal']*10, Ci=Ci, rng=rng)
 45 |     state.transition(N=10, progress=0)
 46 |     vu.validate_crp_constrained_partition(state.Zv(), [], Ci, {}, {})
 47 | 
 48 | 
 49 | CIs = [[], [(2,8), (0,3)]]
 50 | @pytest.mark.parametrize('Ci', CIs)
 51 | def test_simple_dependence_constraint(Ci):
 52 |     rng = gu.gen_rng(1)
 53 |     D = rng.normal(size=(10,1))
 54 |     T = np.repeat(D, 10, axis=1)
 55 |     Cd = [(2,0), (8,3)]
 56 |     state = State(T, cctypes=['normal']*10, Ci=Ci, Cd=Cd, rng=rng)
 57 |     with pytest.raises(ValueError):
 58 |         # Cannot transition columns with dependencies.
 59 |         state.transition(N=10, kernels=['columns'], progress=0)
 60 |     state.transition(
 61 |         N=10,
 62 |         kernels=['rows', 'alpha', 'column_hypers', 'alpha', 'view_alphas'],
 63 |         progress=False)
 64 |     vu.validate_crp_constrained_partition(state.Zv(), Cd, Ci, {}, {})
 65 | 
 66 | 
 67 | def test_zero_based_outputs():
 68 |     """Constraints must have zero-based output variables for now."""
 69 |     rng = gu.gen_rng(1)
 70 |     D = rng.normal(size=(10,1))
 71 |     T = np.repeat(D, 10, axis=1)
 72 |     outputs = range(10, 20)
 73 |     with pytest.raises(ValueError):
 74 |         State(T, outputs=range(10,20), cctypes=['normal']*10,
 75 |             Cd=[(2,0)], rng=rng)
 76 |     with pytest.raises(ValueError):
 77 |         State(T, outputs=range(10,20), cctypes=['normal']*10,
 78 |             Ci=[(2,0)], rng=gu.gen_rng(0))
 79 | 
 80 | @integration
 81 | def test_naive_bayes_independence_lovecat():
 82 |     rng = gu.gen_rng(1)
 83 |     D = rng.normal(size=(10,1))
 84 |     T = np.repeat(D, 10, axis=1)
 85 |     Ci = list(itertools.combinations(range(10), 2))
 86 |     state = State(T, cctypes=['normal']*10, Ci=Ci, rng=gu.gen_rng(0))
 87 |     state.transition(N=10, progress=0)
 88 |     vu.validate_crp_constrained_partition(state.Zv(), [], Ci, {}, {})
 89 |     state.transition_lovecat(N=100, progress=0)
 90 |     vu.validate_crp_constrained_partition(state.Zv(), [], Ci, {}, {})
 91 | 
 92 | 
 93 | @integration
 94 | def test_complex_independent_relationships_lovecat():
 95 |     rng = gu.gen_rng(1)
 96 |     D = rng.normal(size=(10,1))
 97 |     T = np.repeat(D, 10, axis=1)
 98 |     Ci = [(2,8), (0,3)]
 99 |     Cd = [(2,3), (0,8)]
100 |     state = State(T, cctypes=['normal']*10, Ci=Ci, Cd=Cd, rng=gu.gen_rng(0))
101 |     state.transition_lovecat(N=1000, progress=1)
102 |     vu.validate_crp_constrained_partition(state.Zv(), Cd, Ci, {}, {})
103 | 
104 | @integration
105 | def test_independence_inference_quality_lovecat():
106 |     rng = gu.gen_rng(584)
107 |     column_view_1 = rng.normal(loc=0, size=(50,1))
108 | 
109 |     column_view_2 = np.concatenate((
110 |         rng.normal(loc=10, size=(25,1)),
111 |         rng.normal(loc=20, size=(25,1)),
112 |     ))
113 | 
114 |     data_view_1 = np.repeat(column_view_1, 4, axis=1)
115 |     data_view_2 = np.repeat(column_view_2, 4, axis=1)
116 |     data = np.column_stack((data_view_1, data_view_2))
117 | 
118 |     Zv0 = {i: 0 for i in xrange(8)}
119 |     state = State(data, Zv=Zv0, cctypes=['normal']*8, rng=gu.gen_rng(10))
120 |     state.transition_lovecat(N=100, progress=1)
121 |     for col in [0, 1, 2, 3,]:
122 |         assert state.Zv(col) == state.Zv(0)
123 |     for col in [4, 5, 6, 7]:
124 |         assert state.Zv(col) == state.Zv(4)
125 |     assert state.Zv(0) != state.Zv(4)
126 | 
127 |     # Get lovecat to merge the dependent columns into one view.
128 |     Cd = [(0,1), (2,3), (4,5), (6,7)]
129 |     Zv0 = {0:0, 1:0, 2:1, 3:1, 4:2, 5:2, 6:3, 7:3}
130 |     state = State(data, Zv=Zv0, cctypes=['normal']*8, Cd=Cd, rng=gu.gen_rng(1))
131 |     state.transition_lovecat(N=100, progress=1)
132 |     for col in [0, 1, 2, 3,]:
133 |         assert state.Zv(col) == state.Zv(0)
134 |     for col in [4, 5, 6, 7]:
135 |         assert state.Zv(col) == state.Zv(4)
136 |     assert state.Zv(0) != state.Zv(4)
137 | 


--------------------------------------------------------------------------------
/tests/test_diagnostics.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import time
18 | 
19 | from cgpm.crosscat.engine import Engine
20 | from cgpm.utils import general as gu
21 | from cgpm.utils import test as tu
22 | 
23 | from markers import integration
24 | 
25 | 
26 | def retrieve_normal_dataset():
27 |     D, Zv, Zc = tu.gen_data_table(
28 |         n_rows=20,
29 |         view_weights=None,
30 |         cluster_weights=[[.2,.2,.2,.4],[.2,.8],],
31 |         cctypes=['normal', 'normal'],
32 |         distargs=[None]*2,
33 |         separation=[0.95]*2,
34 |         view_partition=[0,1],
35 |         rng=gu.gen_rng(12))
36 |     return D
37 | 
38 | 
39 | @integration
40 | def test_simple_diagnostics():
41 |     def diagnostics_without_iters(diagnostics):
42 |         return (v for k, v in diagnostics.iteritems() if k != 'iterations')
43 |     D = retrieve_normal_dataset()
44 |     engine = Engine(
45 |             D.T, cctypes=['normal']*len(D),  num_states=4, rng=gu.gen_rng(12),)
46 |     engine.transition(N=20, checkpoint=2)
47 |     assert all(
48 |         all(len(v) == 10 for v in diagnostics_without_iters(state.diagnostics))
49 |         for state in engine.states
50 |     )
51 |     engine.transition(N=7, checkpoint=2)
52 |     assert all(
53 |         all(len(v) == 13 for v in diagnostics_without_iters(state.diagnostics))
54 |         for state in engine.states
55 |     )
56 |     engine.transition_lovecat(N=7, checkpoint=3)
57 |     assert all(
58 |         all(len(v) == 15 for v in diagnostics_without_iters(state.diagnostics))
59 |         for state in engine.states
60 |     )
61 |     engine.transition(S=0.5)
62 |     assert all(
63 |         all(len(v) == 15 for v in diagnostics_without_iters(state.diagnostics))
64 |         for state in engine.states
65 |     )
66 |     engine.transition(S=0.5, checkpoint=1)
67 |     assert all(
68 |         all(len(v) > 15 for v in diagnostics_without_iters(state.diagnostics))
69 |         for state in engine.states
70 |     )
71 |     # Add a timed analysis with diagnostic overrides large iterations, due
72 |     # to oddness of diagnostic tracing in lovecat.
73 |     start = time.time()
74 |     engine.transition_lovecat(N=20000, S=1, checkpoint=1)
75 |     assert 1 < time.time() - start < 3
76 | 


--------------------------------------------------------------------------------
/tests/test_engine_alter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import numpy as np
18 | import pytest
19 | 
20 | from cgpm.crosscat.engine import Engine
21 | from cgpm.utils import config as cu
22 | from cgpm.utils import general as gu
23 | from cgpm.utils import test as tu
24 | 
25 | 
26 | # Set up the data generation
27 | def get_engine():
28 |     cctypes, distargs = cu.parse_distargs([
29 |         'normal',
30 |         'poisson',
31 |         'bernoulli',
32 |         'lognormal',
33 |         'beta',
34 |         'vonmises'
35 |     ])
36 |     T, Zv, Zc = tu.gen_data_table(
37 |         20, [1], [[.25, .25, .5]], cctypes, distargs,
38 |         [.95]*len(cctypes), rng=gu.gen_rng(0))
39 |     T = T.T
40 |     # Make some nan cells for evidence.
41 |     T[5,0] = T[5,1] = T[5,2] = T[5,3] = np.nan
42 |     T[8,4] = np.nan
43 |     engine = Engine(
44 |         T,
45 |         cctypes=cctypes,
46 |         distargs=distargs,
47 |         num_states=6,
48 |         rng=gu.gen_rng(0)
49 |     )
50 |     engine.transition(N=2)
51 |     return engine
52 | 
53 | 
54 | def test_simple_alterations():
55 |     engine = get_engine()
56 | 
57 |     # Initial state outputs.
58 |     out_initial = engine.states[0].outputs
59 | 
60 |     # Indexes of outputs to alter.
61 |     out_f = 0
62 |     out_g = 3
63 | 
64 |     def alteration_f(state):
65 |         state.outputs[out_f] *= 13
66 |         return state
67 | 
68 |     def alteration_g(state):
69 |         state.outputs[out_g] *= 12
70 |         return state
71 | 
72 |     statenos = [0,3]
73 |     engine.alter((alteration_f, alteration_g), [0,3])
74 | 
75 |     out_expected = list(out_initial)
76 |     out_expected[out_f] *= 13
77 |     out_expected[out_g] *= 12
78 | 
79 |     for s in xrange(engine.num_states()):
80 |         if s in statenos:
81 |             assert engine.states[s].outputs == out_expected
82 |         else:
83 |             assert engine.states[s].outputs == out_initial
84 | 


--------------------------------------------------------------------------------
/tests/test_engine_seed.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from cgpm.crosscat.engine import Engine
18 | from cgpm.utils import general as gu
19 | 
20 | def test_engine_simulate_no_repeat():
21 |     """Generate 3 samples from 2 states 10 times, and ensure uniqueness."""
22 |     rng = gu.gen_rng(1)
23 |     engine = Engine(X=[[1]], cctypes=['normal'], num_states=2, rng=rng)
24 |     samples_list = [
25 |         [sample[0] for sample in engine.simulate(None, [0], N=3)[0]]
26 |         for _i in xrange(10)
27 |     ]
28 |     samples_set = set([frozenset(s) for s in samples_list])
29 |     assert len(samples_set) == len(samples_list)
30 | 


--------------------------------------------------------------------------------
/tests/test_exponentials_transition_hypers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import pytest
18 | 
19 | import numpy as np
20 | 
21 | from cgpm.utils import config as cu
22 | from cgpm.utils import general as gu
23 | from cgpm.utils import test as tu
24 | 
25 | 
26 | cctypes = [
27 |     ('normal', None),
28 |     ('categorical', {'k':4}),
29 |     ('lognormal', None),
30 |     ('poisson', None),
31 |     ('bernoulli', None),
32 |     ('exponential', None),
33 |     ('geometric', None),
34 |     ('vonmises', None)
35 |     ]
36 | 
37 | 
38 | @pytest.mark.parametrize('cctype', cctypes)
39 | def test_transition_hypers(cctype):
40 |     name, arg = cctype
41 |     model = cu.cctype_class(name)(
42 |         outputs=[0], inputs=None, distargs=arg, rng=gu.gen_rng(10))
43 |     D, Zv, Zc = tu.gen_data_table(
44 |         50, [1], [[.33, .33, .34]], [name], [arg], [.8], rng=gu.gen_rng(1))
45 | 
46 |     hypers_previous = model.get_hypers()
47 |     for rowid, x in enumerate(np.ravel(D)[:25]):
48 |         model.incorporate(rowid, {0:x}, None)
49 |     model.transition_hypers(N=3)
50 |     hypers_new = model.get_hypers()
51 |     assert not all(
52 |         np.allclose(hypers_new[hyper], hypers_previous[hyper])
53 |         for hyper in hypers_new)
54 | 
55 |     for rowid, x in enumerate(np.ravel(D)[:25]):
56 |         model.incorporate(rowid+25, {0:x}, None)
57 |     model.transition_hypers(N=3)
58 |     hypers_newer = model.get_hypers()
59 |     assert not all(
60 |         np.allclose(hypers_new[hyper], hypers_newer[hyper])
61 |         for hyper in hypers_newer)
62 | 
63 |     # In general inference should improve the log score.
64 |     # logpdf_score = model.logpdf_score()
65 |     # model.transition_hypers(N=200)
66 |     # assert model.logpdf_score() > logpdf_score
67 | 
68 | 


--------------------------------------------------------------------------------
/tests/test_force_cell.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import pytest
18 | 
19 | import numpy as np
20 | 
21 | from cgpm.crosscat.state import State
22 | from cgpm.utils import general as gu
23 | 
24 | 
25 | X = [[1,     np.nan,     2,         -1,         np.nan  ],
26 |      [1,     3,          2,         -1,         -5      ],
27 |      [18,    -7,         -2,        11,         -12     ],
28 |      [1,     np.nan,     np.nan,    np.nan,     np.nan  ],
29 |      [18,    -7,         -2,        11,         -12     ]]
30 | 
31 | 
32 | def get_state():
33 |     return State(
34 |         X,
35 |         outputs=range(5),
36 |         cctypes=['normal']*5,
37 |         Zv={0:0, 1:0, 2:0, 3:1, 4:1},
38 |         rng=gu.gen_rng(0),
39 |     )
40 | 
41 | 
42 | def test_invalid_nonnan_cell():
43 |     state = get_state()
44 |     # rowid 0 and output 1 is not nan.
45 |     with pytest.raises(ValueError):
46 |         state.force_cell(0, {0: 1})
47 | 
48 | def test_invalid_variable():
49 |     state = get_state()
50 |     # Output variable 10 does not exist.
51 |     with pytest.raises(KeyError):
52 |         state.force_cell(0, {10: 1})
53 | 
54 | def test_invalid_rowid():
55 |     state = get_state()
56 |     # Cannot force non-incorporated rowid.
57 |     with pytest.raises(ValueError):
58 |         state.force_cell(10, {0: 1})
59 |     with pytest.raises(ValueError):
60 |         state.force_cell(None, {0: 1})
61 | 
62 | def test_force_cell_valid():
63 |     state = get_state()
64 |     # Retrieve normal component model to force cell (0,1)
65 |     rowid, dim = 0, 1
66 |     view = state.view_for(dim)
67 |     k = view.Zr(rowid)
68 |     normal_component = state.dim_for(dim).clusters[k]
69 |     # Initial sufficient statistics.
70 |     N_initial = normal_component.N
71 |     sum_x_initial = normal_component.sum_x
72 |     sum_x_sq_initial = normal_component.sum_x_sq
73 |     # Force!
74 |     state.force_cell(0, {1: 1.5})
75 |     # Confirm incremented statistics.
76 |     assert normal_component.N == N_initial + 1
77 |     assert np.allclose(normal_component.sum_x, sum_x_initial + 1.5)
78 |     assert np.allclose(normal_component.sum_x_sq, sum_x_sq_initial + 1.5**2)
79 |     # Cannot force again.
80 |     with pytest.raises(ValueError):
81 |         state.force_cell(0, {1: 1})
82 |     # Run a transition.
83 |     state.transition(N=1)
84 |     # Force cell (3,[1,2])
85 |     state.force_cell(3, {1: -7, 2:-2})
86 |     state.transition(N=1)
87 | 


--------------------------------------------------------------------------------
/tests/test_get_vqe.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from cgpm.utils import validation as vu
18 | 
19 | 
20 | def test_partition_query_evidence_dict():
21 |     Zv = [0,0,0,1,1,1,2,2,2,3]
22 |     query = {9:101, 1:1, 4:2, 5:7, 7:0}
23 |     evidence = {2:4, 3:1, 9:1, 6:-1, 0:0}
24 |     queries, evidences = vu.partition_query_evidence(Zv, query, evidence)
25 | 
26 |     # All 4 views have query.
27 |     assert len(queries) == 4
28 | 
29 |     # View 0 has 2 queries.
30 |     assert len(queries[0]) == 1
31 |     assert queries[0][1] == 1
32 |     # View 1 has 2 queries.
33 |     assert len(queries[1]) == 2
34 |     assert queries[1][4] == 2
35 |     assert queries[1][5] == 7
36 |     # View 2 has 1 queries.
37 |     assert len(queries[2]) == 1
38 |     assert queries[2][7] == 0
39 |     # View 3 has 1 queries.
40 |     assert len(queries[3]) == 1
41 |     assert queries[3][9] == 101
42 | 
43 |     # Views 0,1,2,3 have evidence.
44 |     assert len(evidences) == 4
45 |     # View 0 has 2 evidence.
46 |     assert len(evidences[0]) == 2
47 |     assert evidences[0][0] == 0
48 |     assert evidences[0][2] == 4
49 |     # View 1 has 1 evidence.
50 |     assert len(evidences[1]) == 1
51 |     assert evidences[1][3] == 1
52 |     # View 2 has 1 evidence.
53 |     assert len(evidences[2]) == 1
54 |     assert evidences[2][6] == -1
55 |     # View 3 has 1 evidence.
56 |     assert len(evidences[3]) == 1
57 |     assert evidences[3][9] == 1
58 | 
59 | 
60 | def test_partition_query_evidence_list():
61 |     Zv = [0,0,0,1,1,1,2,2,2,3]
62 |     query = [9, 1, 4, 5, 7]
63 |     evidence = {2:-4, 3:-1, 9:-1, 6:1, 0:100}
64 |     queries, evidences = vu.partition_query_evidence(Zv, query, evidence)
65 | 
66 |     # All 4 views have query.
67 |     assert len(queries) == 4
68 | 
69 |     # View 0 has 2 queries.
70 |     assert len(queries[0]) == 1
71 |     assert 1 in queries[0]
72 |     # View 1 has 2 queries.
73 |     assert len(queries[1]) == 2
74 |     assert 4 in queries[1]
75 |     assert 5 in queries[1]
76 |     # View 2 has 1 queries.
77 |     assert len(queries[2]) == 1
78 |     assert 7 in queries[2]
79 |     # View 3 has 1 queries.
80 |     assert len(queries[3]) == 1
81 |     assert 9 in queries[3]
82 | 
83 |     # Views 0,1,2,3 have evidence.
84 |     assert len(evidences) == 4
85 |     # View 0 has 2 evidence.
86 |     assert len(evidences[0]) == 2
87 |     assert evidences[0][0] == 100
88 |     assert evidences[0][2] == -4
89 |     # View 1 has 1 evidence.
90 |     assert len(evidences[1]) == 1
91 |     assert evidences[1][3] == -1
92 |     # View 2 has 1 evidence.
93 |     assert len(evidences[2]) == 1
94 |     assert evidences[2][6] == 1
95 |     # View 3 has 1 evidence.
96 |     assert len(evidences[3]) == 1
97 |     assert evidences[3][9] == -1
98 | 


--------------------------------------------------------------------------------
/tests/test_impossible_evidence.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | """This test suite ensures that simulate and logpdf with zero-density evidence
18 | raises a ValueError."""
19 | 
20 | import pytest
21 | 
22 | from cgpm.crosscat.engine import State
23 | from cgpm.utils import config as cu
24 | from cgpm.utils import general as gu
25 | from cgpm.utils import test as tu
26 | 
27 | 
28 | @pytest.fixture(scope='module')
29 | def state():
30 |     # Set up the data generation
31 |     cctypes, distargs = cu.parse_distargs([
32 |         'normal',
33 |         'poisson',
34 |         'bernoulli',
35 |         'lognormal',
36 |         'beta',
37 |         'vonmises'])
38 |     T, Zv, Zc = tu.gen_data_table(
39 |         30, [1], [[.25, .25, .5]], cctypes, distargs,
40 |         [.95]*len(cctypes), rng=gu.gen_rng(0))
41 |     T = T.T
42 |     s = State(
43 |         T,
44 |         cctypes=cctypes,
45 |         distargs=distargs,
46 |         Zv={i: 0 for i in xrange(len(cctypes))},
47 |         rng=gu.gen_rng(0)
48 |     )
49 |     return s
50 | 
51 | 
52 | def test_impossible_simulate_evidence(state):
53 |     with pytest.raises(ValueError):
54 |         # Variable 2 is binary-valued, so .8 is impossible.
55 |         state.simulate(-1, [0,1], {2:.8})
56 |     with pytest.raises(ValueError):
57 |         # Variable 3 is lognormal so -1 impossible.
58 |         state.simulate(-1, [4], {3:-1})
59 |     with pytest.raises(ValueError):
60 |         # Variable 4 is beta so 1.1 impossible.
61 |         state.simulate(-1, [5], {3:-1})
62 | 
63 | 
64 | def test_impossible_logpdf_evidence(state):
65 |     with pytest.raises(ValueError):
66 |         # Variable 2 is binary-valued, so .8 is impossible.
67 |         state.logpdf(-1, {0:-1}, {2:.8})
68 |     with pytest.raises(ValueError):
69 |         # Variable 3 is lognormal so -1 impossible.
70 |         state.logpdf(-1, {1:1}, {3:-1})
71 |     with pytest.raises(ValueError):
72 |         # Variable 4 is beta so 1.1 impossible.
73 |         state.logpdf(-1, {4:1.1}, {3:-1})
74 | 
75 | 
76 | def test_valid_logpdf_query(state):
77 |     # Zero density logpdf is fine.
78 |     state.logpdf(-1, {2:.8}, {1:1.})
79 |     state.logpdf(-1, {5:18})
80 | 
81 | 


--------------------------------------------------------------------------------
/tests/test_incorporate_row.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import pytest
 18 | 
 19 | import numpy as np
 20 | 
 21 | from cgpm.crosscat.state import State
 22 | from cgpm.utils import general as gu
 23 | 
 24 | 
 25 | X = [[1,     np.nan,     2,         -1,         np.nan  ],
 26 |      [1,     3,          2,         -1,         -5      ],
 27 |      [18,    -7,         -2,        11,         -12     ],
 28 |      [1,     np.nan,     np.nan,    np.nan,     np.nan  ],
 29 |      [18,    -7,         -2,        11,         -12     ]]
 30 | 
 31 | 
 32 | def get_state():
 33 |     return State(
 34 |         X,
 35 |         outputs=range(5),
 36 |         cctypes=['normal']*5,
 37 |         Zv={0:0, 1:0, 2:0, 3:1, 4:1},
 38 |         rng=gu.gen_rng(0),
 39 |     )
 40 | 
 41 | 
 42 | def test_invalid_evidence_keys():
 43 |     state = get_state()
 44 |     # Non-existent view -3.
 45 |     with pytest.raises(ValueError):
 46 |         state.incorporate(
 47 |             state.n_rows(),
 48 |             {0:0, 1:1, 2:2, 3:3, 4:4, state.crp_id_view+2:0}
 49 |         )
 50 | 
 51 | 
 52 | def test_invalid_evidence():
 53 |     state = get_state()
 54 |     # Evidence is disabled since State has no inputs.
 55 |     with pytest.raises(Exception):
 56 |         state.incorporate(state.n_rows(), {0:0, 1:1, 2:2, 3:3, 4:4}, {12:1})
 57 | 
 58 | 
 59 | def test_invalid_cluster():
 60 |     state = get_state()
 61 |     # Should crash with None.
 62 |     with pytest.raises(Exception):
 63 |         state.incorporate(
 64 |             state.n_rows(),
 65 |             {0:0, 1:1, 2:2, 3:3, 4:4, state.views[0].outputs[0]:None})
 66 | 
 67 | 
 68 | def test_invalid_query_nan():
 69 |     state = get_state()
 70 |     # Not allowed to incorporate nan.
 71 |     with pytest.raises(ValueError):
 72 |         state.incorporate(state.n_rows(), {0:np.nan, 1:1, 2:2, 3:3, 4:4})
 73 | 
 74 | 
 75 | def test_invalid_rowid():
 76 |     state = get_state()
 77 |     # Non-contiguous rowids disabled.
 78 |     for rowid in range(state.n_rows()):
 79 |         with pytest.raises(ValueError):
 80 |             state.incorporate(rowid, {0:2})
 81 | 
 82 | def test_incorporate_valid():
 83 |     state = get_state()
 84 |     # Incorporate row into cluster 0 for all views.
 85 |     previous = np.asarray([state.views[v].Nk(0) for v in [0,1]])
 86 |     state.incorporate(
 87 |         state.n_rows(),
 88 |         {0:0, 1:1, 2:2, 3:3, 4:4, state.views[0].outputs[0]:0,
 89 |             state.views[1].outputs[0]:0}
 90 |     )
 91 |     assert [state.views[v].Nk(0) for v in [0,1]] == list(previous+1)
 92 |     # Incorporate row into cluster 0 for view 1 with some missing values.
 93 |     previous = state.views[1].Nk(0)
 94 |     state.incorporate(state.n_rows(), {0:0, 2:2, state.views[1].outputs[0]:0})
 95 |     assert state.views[1].Nk(0) == previous+1
 96 |     state.transition(N=2)
 97 |     # Hypothetical cluster 100.
 98 |     view = state.views[state.views.keys()[0]]
 99 |     state.incorporate(
100 |         state.n_rows(),
101 |         {0:0, 1:1, 2:2, 3:3, 4:4, view.outputs[0]:100})
102 | 
103 | 
104 | def test_unincorporate():
105 |     state = get_state()
106 |     # Unincorporate all the rows except for the last one.
107 |     # XXX Must remove the last rowid only at each invocation.
108 |     rowids = range(0, state.n_rows())
109 |     for rowid in rowids[:-1]:
110 |         with pytest.raises(ValueError):
111 |             state.unincorporate(rowid)
112 |     # Remove rowids starting from state.n_rows()-1 down to 1.
113 |     for rowid in reversed(rowids[1:]):
114 |         state.unincorporate(rowid)
115 |     assert state.n_rows() == 1
116 |     # Cannot unincorporate the final rowid.
117 |     with pytest.raises(ValueError):
118 |         state.unincorporate(0)
119 |     state.transition(N=2)
120 | 
121 | 
122 | def test_incorporate_session():
123 |     rng = gu.gen_rng(4)
124 |     state = State(
125 |         X, cctypes=['normal']*5, Zv={0:0, 1:0, 2:1, 3:1, 4:2}, rng=rng)
126 |     # Incorporate row into a singleton cluster for all views.
127 |     previous = [len(state.views[v].Nk()) for v in [0,1,2]]
128 |     data = {i: rng.normal() for i in xrange(5)}
129 |     clusters = {
130 |         state.views[0].outputs[0]: previous[0],
131 |         state.views[1].outputs[0]: previous[1],
132 |         state.views[2].outputs[0]: previous[2],
133 |     }
134 |     state.incorporate(state.n_rows(), gu.merged(data, clusters))
135 |     assert [len(state.views[v].Nk()) for v in [0,1,2]] == \
136 |         [p+1 for p in previous]
137 |     # Incorporate row without specifying clusters, and some missing values
138 |     data = {i: rng.normal() for i in xrange(2)}
139 |     state.incorporate(state.n_rows(), data)
140 |     state.transition(N=3)
141 |     # Remove the incorporated rowid.
142 |     state.unincorporate(state.n_rows()-1)
143 |     state.transition(N=3)
144 | 


--------------------------------------------------------------------------------
/tests/test_iter_counter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import time
18 | 
19 | import pytest
20 | 
21 | from cgpm.crosscat.engine import Engine
22 | from cgpm.crosscat.state import State
23 | from cgpm.dummy.fourway import FourWay
24 | from cgpm.dummy.twoway import TwoWay
25 | from cgpm.utils import general as gu
26 | 
27 | 
28 | def test_all_kernels():
29 |     rng = gu.gen_rng(0)
30 |     X = rng.normal(size=(5,5))
31 |     state = State(X, cctypes=['normal']*5)
32 |     state.transition(N=5)
33 |     for k, n in state.to_metadata()['diagnostics']['iterations'].iteritems():
34 |         assert n == 5
35 | 
36 | def test_individual_kernels():
37 |     rng = gu.gen_rng(0)
38 |     X = rng.normal(size=(5,5))
39 |     state = State(X, cctypes=['normal']*5)
40 |     state.transition(N=3, kernels=['alpha', 'rows'])
41 |     check_expected_counts(
42 |         state.diagnostics['iterations'],
43 |         {'alpha':3, 'rows':3})
44 |     state.transition(N=5, kernels=['view_alphas', 'column_params'])
45 |     check_expected_counts(
46 |         state.to_metadata()['diagnostics']['iterations'],
47 |         {'alpha':3, 'rows':3, 'view_alphas':5, 'column_params':5})
48 |     state.transition(
49 |         N=1, kernels=['view_alphas', 'column_params', 'column_hypers'])
50 |     check_expected_counts(
51 |         state.to_metadata()['diagnostics']['iterations'],
52 |         {'alpha':3, 'rows':3, 'view_alphas':6, 'column_params':6,
53 |         'column_hypers':1})
54 | 
55 | 
56 | def test_transition_foreign():
57 |     rng = gu.gen_rng(0)
58 |     X = rng.normal(size=(5,5))
59 |     state = State(X, cctypes=['normal']*5)
60 | 
61 |     token_a = state.compose_cgpm(FourWay(outputs=[12], inputs=[0,1], rng=rng))
62 |     state.transition_foreign(cols=[12], N=5)
63 |     check_expected_counts(
64 |         state.diagnostics['iterations'],
65 |         {'foreign-%s'%token_a: 5})
66 | 
67 |     token_b = state.compose_cgpm(TwoWay(outputs=[22], inputs=[2], rng=rng))
68 |     state.transition_foreign(cols=[22], N=1)
69 |     check_expected_counts(
70 |         state.diagnostics['iterations'],
71 |         {'foreign-%s'%token_a: 5, 'foreign-%s'%token_b: 1})
72 | 
73 | 
74 |     state.transition_foreign(N=3)
75 |     check_expected_counts(
76 |         state.diagnostics['iterations'],
77 |         {'foreign-%s'%token_a: 8, 'foreign-%s'%token_b: 4})
78 | 
79 |     start = time.time()
80 |     state.transition_foreign(S=2)
81 |     assert time.time() - start >= 2
82 | 
83 |     # Crash test for engine to transition everyone.
84 |     engine = Engine(X, cctypes=['normal']*5)
85 |     engine.compose_cgpm([FourWay(outputs=[12], inputs=[0,1], rng=rng)])
86 |     engine.transition(N=4)
87 |     # Cannot use transition with a foreign variable.
88 |     with pytest.raises(ValueError):
89 |         engine.transition(N=4, cols=state.outputs+[12], multiprocess=False)
90 |     # Cannot use transition_foreign with a Crosscat variable.
91 |     with pytest.raises(ValueError):
92 |         engine.transition_foreign(cols=[0, 12], N=4, multiprocess=False)
93 |     engine.transition_foreign(cols=[12], N=4)
94 | 
95 | 
96 | def check_expected_counts(actual, expected):
97 |     for k, n in expected.iteritems():
98 |         assert n == actual[k]
99 | 


--------------------------------------------------------------------------------
/tests/test_linreg_mixture.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | import matplotlib.cm as cm
19 | import matplotlib.pyplot as plt
20 | import numpy as np
21 | 
22 | from cgpm.crosscat.state import State
23 | from cgpm.utils import general as gu
24 | 
25 | 
26 | def _compute_y(x):
27 |     noise = [.5, 1]
28 |     slopes = [-2, 5]
29 |     model = x > 5
30 |     return slopes[model] * x  + rng.normal(scale=noise[model])
31 | 
32 | 
33 | rng = gu.gen_rng(1)
34 | X = rng.uniform(low=0, high=10, size=50)
35 | Y = map(_compute_y, X)
36 | D = np.column_stack((X,Y))
37 | 
38 | 
39 | def replace_key(d, a, b):
40 |     d[b] = d[a]
41 |     del d[a]
42 |     return d
43 | 
44 | 
45 | def generate_gaussian_samples():
46 |     state = State(
47 |         D, cctypes=['normal','normal'], Zv={0:0, 1:0}, rng=gu.gen_rng(0))
48 |     view = state.view_for(1)
49 |     state.transition(S=15, kernels=['rows','column_params','column_hypers'])
50 |     samples = view.simulate(-1, [0,1, view.outputs[0]], N=100)
51 |     return [replace_key(s, view.outputs[0], -1) for s in samples]
52 | 
53 | 
54 | def generate_regression_samples():
55 |     state = State(
56 |         D, cctypes=['normal','normal'], Zv={0:0, 1:0}, rng=gu.gen_rng(4))
57 |     view = state.view_for(1)
58 |     assert not state._composite
59 |     state.update_cctype(1, 'linear_regression')
60 |     assert state._composite
61 |     state.transition(S=30, kernels=['rows','column_params','column_hypers'])
62 |     samples = view.simulate(-1, [0, 1, view.outputs[0]], N=100)
63 |     return [replace_key(s, view.outputs[0], -1) for s in samples]
64 | 
65 | def plot_samples(samples, title):
66 |     fig, ax = plt.subplots()
67 |     clusters = set(s[-1] for s in samples)
68 |     colors = iter(cm.gist_rainbow(np.linspace(0, 1, len(clusters)+2)))
69 |     ax.scatter(D[:,0], D[:,1], color='k', label='Data')
70 |     for i, c in enumerate(clusters):
71 |         sc = [(j[0], j[1]) for j in samples if j[-1] == c]
72 |         xs, ys = zip(*sc)
73 |         ax.scatter(
74 |             xs, ys, alpha=.5, color=next(colors),
75 |             label='Simulated (cluster %d)' %i)
76 |     ax.set_title(title)
77 |     ax.legend(framealpha=0, loc='upper left')
78 |     ax.grid()
79 | 
80 | 
81 | def test_regression_plot_crash__ci_():
82 |     samples_a = generate_gaussian_samples()
83 |     samples_b = generate_regression_samples()
84 |     plot_samples(samples_a, 'Model: Mixture of 2D Gaussians')
85 |     plot_samples(samples_b, 'Model: Mixture of Linear Regression')
86 |     # plt.close('all')
87 | 


--------------------------------------------------------------------------------
/tests/test_logpdf_score.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | import numpy as np
19 | 
20 | from cgpm.crosscat.engine import Engine
21 | from cgpm.utils.general import gen_rng
22 | 
23 | def test_logpdf_score_crash():
24 |     rng = gen_rng(8)
25 |     # T = rng.choice([0,1], p=[.3,.7], size=250).reshape(-1,1)
26 |     T = rng.normal(size=30).reshape(-1,1)
27 |     engine = Engine(T, cctypes=['normal'], rng=rng, num_states=4)
28 |     logpdf_likelihood_initial = np.array(engine.logpdf_likelihood())
29 |     logpdf_score_initial = np.array(engine.logpdf_score())
30 |     assert np.all(logpdf_score_initial < logpdf_likelihood_initial)
31 |     # assert np.all(logpdf_likelihood_initial < logpdf_score_initial)
32 |     engine.transition(N=100)
33 |     engine.transition(kernels=['column_hypers','view_alphas'], N=10)
34 |     logpdf_likelihood_final = np.asarray(engine.logpdf_likelihood())
35 |     logpdf_score_final = np.asarray(engine.logpdf_score())
36 |     assert np.all(logpdf_score_final < logpdf_likelihood_final)
37 |     assert np.max(logpdf_score_initial) < np.max(logpdf_score_final)
38 | 


--------------------------------------------------------------------------------
/tests/test_logsumexp.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | #   Copyright (c) 2010-2016, MIT Probabilistic Computing Project
 4 | #
 5 | #   Licensed under the Apache License, Version 2.0 (the "License");
 6 | #   you may not use this file except in compliance with the License.
 7 | #   You may obtain a copy of the License at
 8 | #
 9 | #       http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | 
17 | import math
18 | import pytest
19 | 
20 | from cgpm.utils import general as gu
21 | 
22 | def relerr(expected, actual):
23 |     """Relative error between `expected` and `actual`: ``abs((a - e)/e)``."""
24 |     return abs((actual - expected)/expected)
25 | 
26 | def test_logsumexp():
27 |     inf = float('inf')
28 |     nan = float('nan')
29 |     with pytest.raises(OverflowError):
30 |         math.log(sum(map(math.exp, range(1000))))
31 |     assert relerr(999.4586751453871, gu.logsumexp(range(1000))) < 1e-15
32 |     assert gu.logsumexp([]) == -inf
33 |     assert gu.logsumexp([-1000.]) == -1000.
34 |     assert gu.logsumexp([-1000., -1000.]) == -1000. + math.log(2.)
35 |     assert relerr(math.log(2.), gu.logsumexp([0., 0.])) < 1e-15
36 |     assert gu.logsumexp([-inf, 1]) == 1
37 |     assert gu.logsumexp([-inf, -inf]) == -inf
38 |     assert gu.logsumexp([+inf, +inf]) == +inf
39 |     assert math.isnan(gu.logsumexp([-inf, +inf]))
40 |     assert math.isnan(gu.logsumexp([nan, inf]))
41 |     assert math.isnan(gu.logsumexp([nan, -3]))
42 | 
43 | def test_logmeanexp():
44 |     inf = float('inf')
45 |     nan = float('nan')
46 |     assert gu.logmeanexp([]) == -inf
47 |     assert relerr(992.550919866405, gu.logmeanexp(range(1000))) < 1e-15
48 |     assert gu.logmeanexp([-1000., -1000.]) == -1000.
49 |     assert relerr(math.log(0.5 * (1 + math.exp(-1.))),
50 |             gu.logmeanexp([0., -1.])) \
51 |         < 1e-15
52 |     assert relerr(math.log(0.5), gu.logmeanexp([0., -1000.])) < 1e-15
53 |     assert relerr(-3 - math.log(2.), gu.logmeanexp([-inf, -3])) < 1e-15
54 |     assert relerr(-3 - math.log(2.), gu.logmeanexp([-3, -inf])) < 1e-15
55 |     assert gu.logmeanexp([+inf, -3]) == +inf
56 |     assert gu.logmeanexp([-3, +inf]) == +inf
57 |     assert gu.logmeanexp([-inf, 0, +inf]) == +inf
58 |     assert math.isnan(gu.logmeanexp([nan, inf]))
59 |     assert math.isnan(gu.logmeanexp([nan, -3]))
60 |     assert math.isnan(gu.logmeanexp([nan]))
61 | 


--------------------------------------------------------------------------------
/tests/test_lw_rf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | """Crash and sanity tests for queries using likelihood weighting inference
 18 | with a RandomForest component model. Not an inference quality test suite."""
 19 | 
 20 | import pytest
 21 | 
 22 | import numpy as np
 23 | 
 24 | from cgpm.crosscat.state import State
 25 | from cgpm.utils import config as cu
 26 | from cgpm.utils import general as gu
 27 | from cgpm.utils import test as tu
 28 | 
 29 | 
 30 | @pytest.fixture(scope='module')
 31 | def state():
 32 |     cctypes, distargs = cu.parse_distargs([
 33 |         'categorical(k=5)',
 34 |         'normal',
 35 |         'poisson',
 36 |         'bernoulli'
 37 |     ])
 38 |     T, Zv, Zc = tu.gen_data_table(50, [1], [[.33, .33, .34]], cctypes, distargs,
 39 |         [.95]*len(cctypes), rng=gu.gen_rng(0))
 40 |     s = State(T.T, cctypes=cctypes, distargs=distargs,
 41 |         Zv={i:0 for i in xrange(len(cctypes))}, rng=gu.gen_rng(0))
 42 |     s.update_cctype(0, 'random_forest', distargs={'k':5})
 43 |     # XXX Uncomment me for a bug!
 44 |     # state.update_cctype(1, 'linear_regression')
 45 |     kernels = ['rows','view_alphas','alpha','column_params','column_hypers']
 46 |     s.transition(N=1, kernels=kernels)
 47 |     return s
 48 | 
 49 | 
 50 | def test_simulate_unconditional__ci_(state):
 51 |     for rowid in [-1, 1]:
 52 |         samples = state.simulate(rowid, [0], N=2)
 53 |         check_entries_in_list(samples, range(5))
 54 | 
 55 | 
 56 | def test_simulate_conditional__ci_(state):
 57 |     samples = state.simulate(
 58 |         -1, [0], {1:-1, 2:1, 3:1}, None, 2)
 59 |     check_entries_in_list(samples, range(5))
 60 |     samples = state.simulate(-1, [0, 2, 3], None, None, N=2)
 61 |     check_entries_in_list(samples, range(5))
 62 |     samples = state.simulate(1, [0, 2, 3], None, None, 2)
 63 |     check_entries_in_list(samples, range(5))
 64 | 
 65 | 
 66 | def test_logpdf_unconditional__ci_(state):
 67 |     for k in xrange(5):
 68 |         assert state.logpdf(None, {0: k}) < 0
 69 | 
 70 | 
 71 | def test_logpdf_deterministic__ci_(state):
 72 |     # Ensure logpdf estimation deterministic when all parents in constraints.
 73 |     for k in xrange(5):
 74 |         lp1 = state.logpdf(-1, {0:k, 3:0}, {1:1, 2:1})
 75 |         lp2 = state.logpdf(-1, {0:k, 3:0}, {1:1, 2:1})
 76 |         assert np.allclose(lp1, lp2)
 77 |     # Observed cell already has parents in constraints
 78 |     # Currently, logpdf for a non-nan observed cell is not possible.
 79 |     for k in xrange(5):
 80 |         with pytest.raises(ValueError):
 81 |             lp1 = state.logpdf(1, {0:k, 3:0})
 82 |         with pytest.raises(ValueError):
 83 |             lp2 = state.logpdf(1, {0:k, 3:0})
 84 |         assert np.allclose(lp1, lp2)
 85 | 
 86 | 
 87 | def test_logpdf_impute__ci_(state):
 88 |     # Ensure logpdf estimation nondeterministic when all parents in constraints.
 89 |     # In practice, since the Random Forest discretizes its input, is quite
 90 |     # likely that different importance sampling estimates return the same
 91 |     # probability even when the parent nodes have different values.
 92 |     for k in xrange(5):
 93 |         lp1 = state.logpdf(-1, {0:k}, {1:1})
 94 |         lp2 = state.logpdf(-1, {0:k}, {1:1})
 95 |         print lp1, lp2
 96 |     # Observed cell already has parents in constraints.
 97 |     for k in xrange(5):
 98 |         lp1 = state.logpdf(-1, {1:1, 2:2}, {0:k})
 99 |         lp2 = state.logpdf(-1, {1:1, 2:2}, {0:k})
100 |         print lp1, lp2
101 | 
102 | 
103 | def check_entries_in_list(entries, allowed):
104 |     for entry in entries:
105 |         assert entry[0] in allowed
106 | 


--------------------------------------------------------------------------------
/tests/test_normal_categorical.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | """This graphical test trains a gpmcc state on a bivariate population [X, Z].
 18 | X (called the data) is a cctype from DistributionGpm. Z is a categorical
 19 | variable that is a function of the latent cluster of each row
 20 | (called the indicator).
 21 | 
 22 | The three simulations are:
 23 |     - Joint Z,X.
 24 |     - Data conditioned on the indicator Z|X.
 25 |     - Indicator conditioned on the data X|Z.
 26 | 
 27 | Simulations are compared to synthetic data at indicator subpopulations.
 28 | """
 29 | 
 30 | import pytest
 31 | 
 32 | import matplotlib.pyplot as plt
 33 | import numpy as np
 34 | 
 35 | from scipy.stats import ks_2samp
 36 | 
 37 | from cgpm.crosscat.engine import Engine
 38 | from cgpm.utils import general as gu
 39 | from cgpm.utils import test as tu
 40 | 
 41 | 
 42 | N_SAMPLES = 250
 43 | 
 44 | T, Zv, Zc = tu.gen_data_table(
 45 |     N_SAMPLES, [1], [[.3, .5, .2]], ['normal'], [None], [.95],
 46 |     rng=gu.gen_rng(0))
 47 | 
 48 | DATA = np.zeros((N_SAMPLES, 2))
 49 | DATA[:,0] = T[0]
 50 | 
 51 | INDICATORS = [0, 1, 2, 3, 4, 5]
 52 | 
 53 | counts = {0:0, 1:0, 2:0}
 54 | for i in xrange(N_SAMPLES):
 55 |     k = Zc[0][i]
 56 |     DATA[i,1] = 2*INDICATORS[k] + counts[k] % 2
 57 |     counts[k] += 1
 58 | 
 59 | 
 60 | @pytest.fixture(scope='module')
 61 | def state():
 62 |     # Create an engine.
 63 |     engine = Engine(
 64 |         DATA, cctypes=['normal', 'categorical'], distargs=[None, {'k':6}],
 65 |         num_states=4, rng=gu.gen_rng(212))
 66 |     engine.transition(N=15)
 67 |     marginals = engine.logpdf_score()
 68 |     ranking = np.argsort(marginals)[::-1]
 69 |     return engine.get_state(ranking[0])
 70 | 
 71 | 
 72 | def test_joint(state):
 73 |     # Simulate from the joint distribution of (x,z).
 74 |     joint_samples = state.simulate(-1, [0,1], N=N_SAMPLES)
 75 |     _, ax = plt.subplots()
 76 |     ax.set_title('Joint Simulation')
 77 |     for t in INDICATORS:
 78 |         # Plot original data.
 79 |         data_subpop = DATA[DATA[:,1] == t]
 80 |         ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
 81 |         # Plot simulated data for indicator t.
 82 |         samples_subpop = [j[0] for j in joint_samples if j[1] == t]
 83 |         ax.scatter(
 84 |             np.add([t]*len(samples_subpop), .25), samples_subpop,
 85 |             color=gu.colors[t])
 86 |         # KS test.
 87 |         pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1]
 88 |         assert .05 < pvalue
 89 |     ax.set_xlabel('Indicator')
 90 |     ax.set_ylabel('x')
 91 |     ax.grid()
 92 | 
 93 | 
 94 | def test_conditional_indicator(state):
 95 |     # Simulate from the conditional X|Z
 96 |     _, ax = plt.subplots()
 97 |     ax.set_title('Conditional Simulation Of Data X Given Indicator Z')
 98 |     for t in INDICATORS:
 99 |         # Plot original data.
100 |         data_subpop = DATA[DATA[:,1] == t]
101 |         ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
102 |         # Plot simulated data.
103 |         samples_subpop = [s[0] for s in
104 |             state.simulate(-1, [0], {1:t}, None, len(data_subpop))]
105 |         ax.scatter(
106 |             np.repeat(t, len(data_subpop)) + .25,
107 |             samples_subpop, color=gu.colors[t])
108 |         # KS test.
109 |         pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1]
110 |         assert .01 < pvalue
111 |     ax.set_xlabel('Indicator')
112 |     ax.set_ylabel('x')
113 |     ax.grid()
114 | 
115 | 
116 | def test_conditional_real(state):
117 |     # Simulate from the conditional Z|X
118 |     fig, axes = plt.subplots(2,3)
119 |     fig.suptitle('Conditional Simulation Of Indicator Z Given Data X')
120 |     # Compute representative data sample for each indicator.
121 |     means = [np.mean(DATA[DATA[:,1]==t], axis=0)[0] for t in INDICATORS]
122 |     for mean, indicator, ax in zip(means, INDICATORS, axes.ravel('F')):
123 |         samples_subpop = [s[1] for s in
124 |             state.simulate(-1, [1], {0:mean}, None, N_SAMPLES)]
125 |         ax.hist(samples_subpop, color='g', alpha=.4)
126 |         ax.set_title('True Indicator %d' % indicator)
127 |         ax.set_xlabel('Simulated Indicator')
128 |         ax.set_xticks(INDICATORS)
129 |         ax.set_ylabel('Frequency')
130 |         ax.set_ylim([0, ax.get_ylim()[1]+10])
131 |         ax.grid()
132 |         # Check that the simulated indicator agrees with true indicator.
133 |         true_ind_a = indicator
134 |         true_ind_b = indicator-1  if indicator % 2 else indicator+1
135 |         counts = np.bincount(samples_subpop)
136 |         frac = sum(counts[[true_ind_a, true_ind_b]])/float(sum(counts))
137 |         assert .8 < frac
138 | 


--------------------------------------------------------------------------------
/tests/test_ols.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import importlib
 18 | import pytest
 19 | 
 20 | from math import log
 21 | 
 22 | import numpy as np
 23 | 
 24 | from cgpm.regressions.ols import OrdinaryLeastSquares
 25 | from cgpm.utils import config as cu
 26 | from cgpm.utils import general as gu
 27 | from cgpm.utils import test as tu
 28 | 
 29 | 
 30 | cctypes, distargs = cu.parse_distargs([
 31 |     'normal',
 32 |     'categorical(k=3)',
 33 |     'poisson',
 34 |     'bernoulli',
 35 |     'lognormal',
 36 |     'exponential',
 37 |     'geometric',
 38 |     'vonmises',
 39 |     'normal'])
 40 | 
 41 | T, Zv, Zc = tu.gen_data_table(
 42 |     100, [1], [[.33, .33, .34]], cctypes, distargs,
 43 |     [.2]*len(cctypes), rng=gu.gen_rng(0))
 44 | 
 45 | D = T.T
 46 | OLS_DISTARGS = {
 47 |     'inputs': {
 48 |         'stattypes': cctypes[1:],
 49 |         'statargs':
 50 |             [{'k': 3}] + [None] + [{'k': 2}] + [None, None, None, None, None]
 51 |     }
 52 | }
 53 | OLS_OUTPUTS = [0]
 54 | OLS_INPUTS = range(1, len(cctypes))
 55 | 
 56 | 
 57 | def test_integration():
 58 |     ols = OrdinaryLeastSquares(
 59 |         outputs=OLS_OUTPUTS,
 60 |         inputs=OLS_INPUTS,
 61 |         distargs=OLS_DISTARGS,
 62 |         rng=gu.gen_rng(0)
 63 |     )
 64 |     # Incorporate first 20 rows.
 65 |     for rowid, row in enumerate(D[:20]):
 66 |         observation = {0: row[0]}
 67 |         inputs = {i: row[i] for i in ols.inputs}
 68 |         ols.incorporate(rowid, observation, inputs)
 69 |     # Unincorporating row 20 should raise.
 70 |     with pytest.raises(ValueError):
 71 |         ols.unincorporate(20)
 72 |     # Unincorporate all rows.
 73 |     for rowid in xrange(20):
 74 |         ols.unincorporate(rowid)
 75 |     # Unincorporating row 0 should raise.
 76 |     with pytest.raises(ValueError):
 77 |         ols.unincorporate(0)
 78 |     # Incorporating with wrong covariate dimensions should raise.
 79 |     with pytest.raises(ValueError):
 80 |         observation = {0: D[0,0]}
 81 |         inputs = {i: v for (i, v) in enumerate(D[0])}
 82 |         ols.incorporate(0, observation, inputs)
 83 |     # Incorporating with None output value should raise.
 84 |     with pytest.raises(ValueError):
 85 |         observation = {0: None}
 86 |         inputs = {i: D[0,i] for i in ols.inputs}
 87 |         ols.incorporate(0, observation, inputs)
 88 |     # Incorporating with nan inputs value should raise.
 89 |     with pytest.raises(ValueError):
 90 |         observation = {0: 100}
 91 |         inputs = {i: D[0,i] for i in ols.inputs}
 92 |         inputs[inputs.keys()[0]] = np.nan
 93 |         ols.incorporate(0, observation, inputs)
 94 |     # Incorporate some more rows.
 95 |     for rowid, row in enumerate(D[:10]):
 96 |         observation = {0: row[0]}
 97 |         inputs = {i: row[i] for i in ols.inputs}
 98 |         ols.incorporate(rowid, observation, inputs)
 99 | 
100 |     # Run a transition.
101 |     ols.transition()
102 |     assert ols.noise > 0
103 | 
104 |     # Invalid categorical inputs 5 for categorical(k=3).
105 |     targets = {OLS_OUTPUTS[0]: 2}
106 |     inputs = dict(zip(OLS_INPUTS, [5, 5, 0, 1.4, 7, 4, 2, -2]))
107 |     with pytest.raises(ValueError):
108 |         ols.logpdf(-1, targets, None, inputs)
109 |     with pytest.raises(ValueError):
110 |         ols.simulate(-1, OLS_OUTPUTS, None, inputs)
111 | 
112 |     # Invalid categorical inputs 2 for bernoulli.
113 |     targets = {OLS_OUTPUTS[0]: 2}
114 |     inputs = dict(zip(OLS_INPUTS, [5, 5, 2, 1.4, 7, 4, 2, -2]))
115 |     with pytest.raises(ValueError):
116 |         ols.logpdf(-1, targets, None, inputs)
117 |     with pytest.raises(ValueError):
118 |         ols.simulate(-1, OLS_OUTPUTS, None, inputs)
119 | 
120 |     # Do a logpdf computation.
121 |     targets = {OLS_OUTPUTS[0]: 2}
122 |     inputs = dict(zip(OLS_INPUTS, [2, 5, 0, 1.4, 7, 4, 2, -2]))
123 |     logp_old = ols.logpdf(-1, targets, None, inputs)
124 |     assert logp_old < 0
125 |     ols.simulate(-1, OLS_OUTPUTS, None, inputs)
126 | 
127 |     # Now serialize and deserialize, and check if logp_old is the same.
128 |     metadata = ols.to_metadata()
129 |     builder = getattr(
130 |         importlib.import_module(metadata['factory'][0]),
131 |         metadata['factory'][1])
132 |     ols2 = builder.from_metadata(metadata, rng=gu.gen_rng(1))
133 | 
134 |     assert ols2.noise == ols.noise
135 |     logp_new = ols2.logpdf(-1, targets, None, inputs)
136 |     assert np.allclose(logp_new, logp_old)
137 |     ols2.simulate(-1, OLS_OUTPUTS, None, inputs)
138 | 


--------------------------------------------------------------------------------
/tests/test_piecewise.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import numpy as np
18 | 
19 | from cgpm.dummy.piecewise import PieceWise
20 | from cgpm.utils.general import logsumexp
21 | 
22 | 
23 | def test_piecewise_logpdf():
24 |     pw = PieceWise([0,1], [2], sigma=1, flip=.8)
25 |     # x,z
26 |     pw.simulate(None, [0,1], None, {2:1})
27 |     pw.logpdf(None, {0:1.5, 1:0}, None, {2:1})
28 | 
29 |     # x
30 |     pw.simulate(None, [0], None, {2:1})
31 |     pw.logpdf(None, {0:1.5}, None, {2:1})
32 | 
33 |     # z
34 |     pw.simulate(None, [1], None, {2:1})
35 |     assert np.allclose(
36 |         logsumexp([
37 |             pw.logpdf(None, {1:0}, None, {2:1}),
38 |             pw.logpdf(None, {1:1}, None, {2:1})]),
39 |         0)
40 | 
41 |     # z|x
42 |     pw.simulate(None, [1], {0:1.5}, {2:1})
43 |     assert np.allclose(
44 |         logsumexp([
45 |             pw.logpdf(None, {1:0}, {0:1.5}, {2:1}),
46 |             pw.logpdf(None, {1:1}, {0:1.5}, {2:1})]),
47 |         0)
48 | 
49 |     # x|z
50 |     pw.simulate(None, [0], {1:0}, {2:1})
51 |     pw.logpdf(None, {0:1.5}, {1:0}, {2:1})
52 | 


--------------------------------------------------------------------------------
/tests/test_populate_evidence.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import numpy as np
 18 | import pytest
 19 | 
 20 | from cgpm.mixtures.view import View
 21 | from cgpm.crosscat.state import State
 22 | 
 23 | """Test suite for View._populate_constraints.
 24 | 
 25 | Ensures that View._populate_constraints correctly retrieves values from the
 26 | dataset.
 27 | """
 28 | 
 29 | 
 30 | # ------------------------------------------------------------------------------
 31 | # Tests for cgpm.mixtures.view.View
 32 | 
 33 | def retrieve_view():
 34 |     X = np.asarray([
 35 |         [1,    np.nan,        2,      -1,      np.nan],
 36 |         [1,         3,        2,      -1,          -5],
 37 |         [1,    np.nan,   np.nan,  np.nan,      np.nan],
 38 |     ])
 39 |     outputs = [0,1,2,3,4]
 40 |     return View(
 41 |         {c: X[:,c].tolist() for c in outputs},
 42 |         outputs=[-1] + outputs,
 43 |         cctypes=['normal']*5,
 44 |         Zr=[0,1,2]
 45 |     )
 46 | 
 47 | 
 48 | def test_view_hypothetical_unchanged():
 49 |     view = retrieve_view()
 50 | 
 51 |     rowid = -1
 52 |     targets1 = {3:-1}
 53 |     constraints1 = {1:1, 2:2}
 54 |     constraints2 = view._populate_constraints(rowid, targets1, constraints1)
 55 |     assert constraints1 == constraints2
 56 | 
 57 | 
 58 | def test_view_only_rowid_to_populate():
 59 |     view = retrieve_view()
 60 | 
 61 |     # Can targets X[2,0] for simulate.
 62 |     rowid = 2
 63 |     targets1 = [0]
 64 |     constraints1 = {}
 65 |     constraints2 = view._populate_constraints(rowid, targets1, constraints1)
 66 |     assert constraints2 == {-1: view.Zr(rowid)}
 67 | 
 68 | 
 69 | def test_view_constrain_cluster():
 70 |     view = retrieve_view()
 71 | 
 72 |     # Cannot constrain cluster assignment of observed rowid.
 73 |     rowid = 1
 74 |     targets1 = {-1: 2}
 75 |     constraints1 = {}
 76 |     with pytest.raises(ValueError):
 77 |         view._populate_constraints(rowid, targets1, constraints1)
 78 | 
 79 | 
 80 | def test_view_values_to_populate():
 81 |     view = retrieve_view()
 82 | 
 83 |     rowid = 0
 84 |     targets1 = [1]
 85 |     constraints1 = {4:2}
 86 |     constraints2 = view._populate_constraints(rowid, targets1, constraints1)
 87 |     assert constraints2 == {0:1, 2:2, 3:-1, 4:2, -1: view.Zr(rowid)}
 88 | 
 89 |     rowid = 0
 90 |     targets1 = {1:1}
 91 |     constraints1 = {4:2}
 92 |     constraints2 = view._populate_constraints(rowid, targets1, constraints1)
 93 |     assert constraints2 == {2:2, 0:1, 3:-1, 4:2, -1: view.Zr(rowid)}
 94 | 
 95 | 
 96 | # ------------------------------------------------------------------------------
 97 | # Tests for cgpm.crosscat.state.State
 98 | 
 99 | def retrieve_state():
100 |     X = np.asarray([
101 |         [1,    np.nan,        2,      -1,      np.nan],
102 |         [1,         3,        2,      -1,          -5],
103 |         [1,    np.nan,   np.nan,  np.nan,      np.nan],
104 |     ])
105 |     outputs = [0,1,2,3,4]
106 |     return State(
107 |         X,
108 |         outputs=outputs,
109 |         cctypes=['normal']*5,
110 |         Zv={0:0, 1:0, 2:0, 3:0, 4:0},
111 |         Zrv={0:[0,1,2]}
112 |     )
113 | 
114 | def test_state_constrain_logpdf():
115 |     state = retrieve_state()
116 |     # Cannot targets X[2,0] for logpdf.
117 |     rowid = 2
118 |     targets1 = {0:2}
119 |     constraints1 = {}
120 |     with pytest.raises(ValueError):
121 |         state._validate_cgpm_query(rowid, targets1, constraints1)
122 | 
123 | def test_state_constrain_errors():
124 |     state = retrieve_state()
125 | 
126 |     rowid = 1
127 |     targets1 = {1:1, 4:1}
128 |     constraints1 = {}
129 |     with pytest.raises(ValueError):
130 |         state._validate_cgpm_query(rowid, targets1, constraints1)
131 | 
132 |     rowid = 1
133 |     targets1 = {1:3}
134 |     constraints1 = {4:-5}
135 |     with pytest.raises(ValueError):
136 |         state._validate_cgpm_query(rowid, targets1, constraints1)
137 | 
138 |     rowid = 1
139 |     targets1 = {0:1, 1:3}
140 |     constraints1 = {}
141 |     with pytest.raises(ValueError):
142 |         state._validate_cgpm_query(rowid, targets1, constraints1)
143 | 


--------------------------------------------------------------------------------
/tests/test_row_similarity.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import pytest
18 | 
19 | @pytest.mark.xfail(strict=True, reason='Stub: test not implemented yet.')
20 | def test_row_similarity_basic():
21 |     raise ValueError('Implement me!')
22 | 


--------------------------------------------------------------------------------
/tests/test_simulate_many_decorator.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from cgpm.utils import general as gu
18 | 
19 | class DummyCgpm():
20 | 
21 |     @gu.simulate_many
22 |     def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
23 |         return (self, rowid, targets, constraints, inputs, N)
24 | 
25 | def test_simulate_many_kwarg_none():
26 |     """N is None as a kwarg."""
27 |     gpm = DummyCgpm()
28 |     assert gpm.simulate(None, [1,2]) \
29 |         == (gpm, None, [1,2], None, None, None)
30 |     assert gpm.simulate(None, [1,2], inputs={4:1}) \
31 |         == (gpm, None, [1,2], None, {4:1}, None)
32 |     assert  gpm.simulate(4, [1,2], {3:2}) \
33 |         == (gpm, 4, [1,2], {3:2}, None, None)
34 |     assert gpm.simulate(None, [1,2], {4:2}, inputs={5:2}) \
35 |         == (gpm, None, [1,2], {4:2}, {5:2}, None)
36 |     assert gpm.simulate(None, [1,2], {5:2}) \
37 |         == (gpm, None, [1,2], {5:2}, None, None)
38 |     assert gpm.simulate(2, [1,2], constraints={4:2}, inputs={5:2}) \
39 |         == (gpm, 2, [1,2], {4:2}, {5:2}, None)
40 | 
41 | def test_simulate_many_kwarg_not_none():
42 |     """N is not None, used as a named parameter."""
43 |     gpm = DummyCgpm()
44 |     assert gpm.simulate(None, [1,2], N=10) \
45 |         == [(gpm, None, [1,2], None, None, 10)] * 10
46 |     assert gpm.simulate(9, [1,2], inputs=None, N=7) \
47 |         == [(gpm, 9, [1,2], None, None, 7)] * 7
48 |     assert gpm.simulate(None, [1,2], {2:1}, N=10) \
49 |         == [(gpm, None, [1,2], {2:1}, None, 10)] * 10
50 |     assert gpm.simulate(None, [1,2], constraints={4:2}, inputs={5:2}, N=1) \
51 |         == [(gpm, None, [1,2], {4:2}, {5:2}, 1)]
52 | 
53 | def test_simulate_many_positional():
54 |     # N is positional.
55 |     gpm = DummyCgpm()
56 |     assert gpm.simulate(77, [1,2], None, None, 10) \
57 |         == [(gpm, 77, [1,2], None, None, 10)] * 10
58 |     assert gpm.simulate(None, [1,2], None, {4:1}, 7) \
59 |         == [(gpm, None, [1,2], None, {4:1}, 7)] * 7
60 |     assert gpm.simulate(None, [1,2], {5:2}, None, 1) \
61 |         == [(gpm, None, [1,2], {5:2}, None, 1)]
62 |     assert gpm.simulate(None, [1,2], None, {3:1}, None) \
63 |         == (gpm, None, [1,2], None, {3:1}, None)
64 | 


--------------------------------------------------------------------------------
/tests/test_state_initialize.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from cgpm.crosscat.state import State
18 | from cgpm.utils import general as gu
19 | 
20 | def test_Zv_without_Zrv():
21 |     rng = gu.gen_rng(2)
22 |     D = rng.normal(size=(10,4))
23 | 
24 |     state = State(
25 |         D,
26 |         outputs=[3,2,1,0,],
27 |         cctypes=['normal']*D.shape[1],
28 |         Zv={3:0, 2:1, 1:2, 0:4},
29 |         rng=rng,
30 |     )
31 | 


--------------------------------------------------------------------------------
/tests/test_stochastic.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | #   Copyright (c) 2010-2016, MIT Probabilistic Computing Project
 4 | #
 5 | #   Licensed under the Apache License, Version 2.0 (the "License");
 6 | #   you may not use this file except in compliance with the License.
 7 | #   You may obtain a copy of the License at
 8 | #
 9 | #       http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | 
17 | import pytest
18 | 
19 | from stochastic import StochasticError
20 | from stochastic import stochastic
21 | 
22 | class Quagga(Exception):
23 |     pass
24 | 
25 | @stochastic(max_runs=1, min_passes=1)
26 | def _test_fail(seed):
27 |     raise Quagga
28 | 
29 | @stochastic(max_runs=1, min_passes=1)
30 | def _test_pass(_seed):
31 |     pass
32 | 
33 | passthenfail_counter = 0
34 | @stochastic(max_runs=2, min_passes=1)
35 | def _test_passthenfail(seed):
36 |     global passthenfail_counter
37 |     passthenfail_counter += 1
38 |     passthenfail_counter %= 2
39 |     if passthenfail_counter == 0:
40 |         raise Quagga
41 | 
42 | failthenpass_counter = 0
43 | @stochastic(max_runs=2, min_passes=1)
44 | def _test_failthenpass(seed):
45 |     global failthenpass_counter
46 |     failthenpass_counter += 1
47 |     failthenpass_counter %= 2
48 |     if failthenpass_counter == 1:
49 |         raise Quagga
50 | 
51 | @stochastic(max_runs=2, min_passes=1)
52 | def _test_failthenfail(seed):
53 |     raise Quagga
54 | 
55 | @stochastic(max_runs=1, min_passes=1)
56 | def test_stochastic(seed):
57 |     with pytest.raises(StochasticError):
58 |         _test_fail()
59 |     try:
60 |         _test_fail()
61 |     except StochasticError as e:
62 |         assert isinstance(e.excvalue, Quagga)
63 |     with pytest.raises(Quagga):
64 |         _test_fail(seed)
65 |     _test_pass()
66 |     _test_pass(seed)
67 |     _test_passthenfail()
68 |     with pytest.raises(Quagga):
69 |         _test_passthenfail(seed)
70 |     _test_failthenpass()
71 |     with pytest.raises(Quagga):
72 |         _test_failthenpass(seed)
73 |     with pytest.raises(StochasticError):
74 |         _test_failthenfail()
75 |     with pytest.raises(Quagga):
76 |         _test_failthenfail(seed)
77 | 


--------------------------------------------------------------------------------
/tests/test_teh_murphy.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | """The class cgpm.primitives.normal.Normal uses derivations from both
 18 | 
 19 |     http://www.stats.ox.ac.uk/~teh/research/notes/GaussianInverseGamma.pdf
 20 |     https://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf (Section 3)
 21 | 
 22 | The two sources use different parameterizations. This suite ensures that the
 23 | conversions of these parameterizations performed in Normal produce consistent
 24 | numerical results between the two sources.
 25 | 
 26 | In particular, truncating the Normal requires computing the logcdf to normalize
 27 | the probability, which is a Student T derived in Murphy but not Teh.
 28 | """
 29 | 
 30 | import itertools as it
 31 | 
 32 | import numpy as np
 33 | from numpy import log, pi, sqrt
 34 | from scipy.special import gammaln
 35 | from scipy.stats import t
 36 | 
 37 | from cgpm.primitives.normal import Normal
 38 | from cgpm.utils.general import gen_rng
 39 | 
 40 | 
 41 | # Prepare some functions for use in the test.
 42 | def teh_posterior(m, r, s, nu, x):
 43 |     """Eq 10 Teh."""
 44 |     N = len(x)
 45 |     sum_x = np.sum(x)
 46 |     sum_x_sq = np.sum(x**2)
 47 |     return Normal.posterior_hypers(N, sum_x, sum_x_sq, m, r, s, nu)
 48 | 
 49 | 
 50 | def murphy_posterior(a, b, k, mu, x):
 51 |     """Eqs 85 to 89 Murphy."""
 52 |     n = len(x)
 53 |     xbar = np.mean(x)
 54 |     kn = k + n
 55 |     an = a + n/2.
 56 |     mun = (k*mu+n*xbar)/(k+n)
 57 |     bn = b + .5*np.sum((x-xbar)**2) + k*n*(xbar-mu)**2 / (2*(k+n))
 58 |     return an, bn, kn, mun
 59 | 
 60 | 
 61 | def murphy_posterior_predictive(an1, an, bn1, bn, kn1, kn):
 62 |     """Eq 99 Murphy."""
 63 |     return gammaln(an1) - gammaln(an) + an*log(bn) - \
 64 |         (an1)*log(bn1) + 1/2.*(log(kn) - log(kn1)) - 1/2.*log(2*pi)
 65 | 
 66 | 
 67 | # Test suite.
 68 | 
 69 | def test_agreement():
 70 |     # Hyperparmaeters in Teh notation.
 71 |     all_m = map(float, (1., 7., .43, 1.2))
 72 |     all_r = map(float, (2., 18., 3., 11.))
 73 |     all_s = map(float, (2., 6., 15., 55.))
 74 |     all_nu = map(float, (4., .6, 14., 8.))
 75 | 
 76 |     # Dataset
 77 |     rng = gen_rng(0)
 78 |     x1 = rng.normal(10, 3, size=100)
 79 |     x2 = rng.normal(-3, 7, size=100)
 80 | 
 81 |     for (m, r, s, nu), x in \
 82 |             it.product(zip(all_m, all_r, all_s, all_nu), [x1,x2]):
 83 |         # Murphy hypers in terms of Teh.
 84 |         a = nu/2.
 85 |         b = s/2.
 86 |         k = r
 87 |         mu = m
 88 | 
 89 |         # Test equality of posterior hypers.
 90 |         mn, rn, sn, nun = teh_posterior(m, r, s, nu, x)
 91 |         an, bn, kn, mun = murphy_posterior(a, b, k, mu, x)
 92 |         assert np.allclose(an, nun/2, atol=1e-5)
 93 |         assert np.allclose(bn, sn/2, atol=1e-5)
 94 |         assert np.allclose(kn, rn, atol=1e-5)
 95 |         assert np.allclose(mun, mn, atol=1e-5)
 96 | 
 97 |         # Test posterior predictive agree with each other, and Student T.
 98 |         for xtest in np.linspace(1.1, 80.8, 14.1):
 99 |             # Murphy exact, Eq 99.
100 |             an1, bn1, kn1, mun1 = murphy_posterior(
101 |                 a, b, k, mu, np.append(x, xtest))
102 |             logprob_murphy = murphy_posterior_predictive(
103 |                 an1, an, bn1, bn, kn1, kn)
104 | 
105 |             # Student T Murphy, Eq 100.
106 |             scalesq = bn*(kn+1)/(an*kn)
107 |             logprob_t_murphy = t.logpdf(
108 |                 xtest, 2*an, loc=mun, scale=sqrt(scalesq))
109 | 
110 |             # Teh exact using Murphy Eq 99.
111 |             mn1, rn1, sn1, nun1 = teh_posterior(
112 |                 m, r, s, nu, np.append(x, xtest))
113 |             logprob_teh = murphy_posterior_predictive(
114 |                 nun1/2., nun/2, sn1/2., sn/2, rn1, rn)
115 | 
116 |             # Posterior predictive from Normal DistributionGpm.
117 |             logprob_nignormal = Normal.calc_predictive_logp(
118 |                 xtest, len(x), sum(x), np.sum(x**2), m, r, s, nu)
119 | 
120 |             # Student T Teh using Murphy Eq 100.
121 |             scalesq = sn/2.*(rn+1)/(nun/2.*rn)
122 |             logprob_t_teh = t.logpdf(
123 |                 xtest, 2*nun/2., loc=mn, scale=sqrt(scalesq))
124 | 
125 |             # Aggregate all values and test their equality.
126 |             values = [logprob_murphy, logprob_teh, logprob_t_murphy,
127 |                 logprob_t_teh, logprob_nignormal]
128 |             for v in values:
129 |                 assert np.allclose(v, values[0], atol=1e-2)
130 | 


--------------------------------------------------------------------------------
/tests/test_type_check.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | 
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | from collections import namedtuple
 18 | 
 19 | import pytest
 20 | 
 21 | from cgpm.utils import config as cu
 22 | 
 23 | 
 24 | Case = namedtuple(
 25 |     'Case', ['outputs', 'inputs', 'distargs', 'good', 'bad'])
 26 | 
 27 | cases = {
 28 |     'bernoulli' : Case(
 29 |         outputs=[0],
 30 |         inputs=None,
 31 |         distargs=None,
 32 |         good=[0, 1.],
 33 |         bad=[-1, .5, 3]),
 34 | 
 35 |     'beta' : Case(
 36 |         outputs=[0],
 37 |         inputs=None,
 38 |         distargs=None,
 39 |         good=[.3, .1, .9, 0.001, .9999],
 40 |         bad=[-1, 1.02, 21]),
 41 | 
 42 |     'categorical' : Case(
 43 |         outputs=[0],
 44 |         inputs=None,
 45 |         distargs={'k': 4},
 46 |         good=[0., 1, 2, 3.],
 47 |         bad=[-1, 2.5, 4]),
 48 | 
 49 |     'exponential' : Case(
 50 |         outputs=[0],
 51 |         inputs=None,
 52 |         distargs=None,
 53 |         good=[0, 1, 2, 3],
 54 |         bad=[-1, -2.5]),
 55 | 
 56 |     'geometric' : Case(
 57 |         outputs=[0],
 58 |         inputs=None,
 59 |         distargs=None,
 60 |         good=[0, 2, 12],
 61 |         bad=[-1, .5, -4]),
 62 | 
 63 |     'lognormal' : Case(
 64 |         outputs=[0],
 65 |         inputs=None,
 66 |         distargs=None,
 67 |         good=[1, 2, 3],
 68 |         bad=[-12, -0.01, 0]),
 69 | 
 70 |     'normal' : Case(
 71 |         outputs=[0],
 72 |         inputs=None,
 73 |         distargs=None,
 74 |         good=[-1, 0, 10],
 75 |         bad=[]),
 76 | 
 77 |     'normal_trunc' : Case(
 78 |         outputs=[0],
 79 |         inputs=None,
 80 |         distargs={'l': -1, 'h': 10},
 81 |         good=[0, 4, 9],
 82 |         bad=[44, -1.02]),
 83 | 
 84 |     'poisson' : Case(
 85 |         outputs=[0],
 86 |         inputs=None,
 87 |         distargs=None,
 88 |         good=[0, 5, 11],
 89 |         bad=[-1, .5, -4]),
 90 | 
 91 |     'random_forest' : Case(
 92 |         outputs=[0],
 93 |         inputs=[1, 2],
 94 |         distargs={'k': 2, 'inputs':{'stattypes': [1,2]}},
 95 |         good=[(0, {1:1, 2:2}), (1, {1:0, 2:2})],
 96 |         bad=[(-1, {1:1, 2:2}), (0, {0:1, 2:2}), (0, {1: 3})]),
 97 | 
 98 |     'vonmises' : Case(
 99 |         outputs=[0],
100 |         inputs=None,
101 |         distargs=None,
102 |         good=[0.1, 3.14, 6.2],
103 |         bad=[-1, 7, 12]),
104 | 
105 |     'linear_regression' : Case(
106 |         outputs=[0],
107 |         inputs=[1, 2],
108 |         distargs={
109 |             'inputs': {
110 |                 'stattypes': ['normal', 'bernoulli'],
111 |                 'statargs': [None, {'k':2}]}},
112 |         good=[(0, {1:1, 2:0})],
113 |         bad=[(0, {0:1, 1:1, 2:0})]),
114 | }
115 | 
116 | 
117 | def get_observation_inputs(t):
118 |     # Assumes that the output is always column id 0.
119 |     return ({0: t[0]}, t[1]) if isinstance(t, tuple) else ({0: t}, None)
120 | 
121 | 
122 | @pytest.mark.parametrize('cctype', cases.keys())
123 | def test_distributions(cctype):
124 |     case = cases[cctype]
125 |     assert_distribution(
126 |         cctype, case.outputs, case.inputs, case.distargs, case.good, case.bad)
127 | 
128 | 
129 | def assert_distribution(cctype, outputs, inputs, distargs, good, bad):
130 |     model = cu.cctype_class(cctype)(outputs, inputs, distargs=distargs)
131 |     for rowid, g in enumerate(good):
132 |         assert_good(model, rowid, g)
133 |     for rowid, b in enumerate(bad):
134 |         assert_bad(model, rowid, b)
135 | 
136 | 
137 | def assert_good(model, rowid, g):
138 |     observation, inputs = get_observation_inputs(g)
139 |     model.incorporate(rowid, observation, inputs)
140 |     model.unincorporate(rowid)
141 |     assert model.logpdf(-1, observation, None, inputs) != -float('inf')
142 | 
143 | 
144 | def assert_bad(model, rowid, b):
145 |     observation, inputs = get_observation_inputs(b)
146 |     with pytest.raises(Exception):
147 |         model.incorporate(rowid, observation, inputs)
148 |     with pytest.raises(Exception):
149 |         model.unincorporate(rowid)
150 |     try: # GPM return negative infinity for invalid input.
151 |         assert model.logpdf(-1, observation, None, inputs) == -float('inf')
152 |     except Exception: # Conditional GPM throws error on wrong input variables.
153 |         assert True
154 | 


--------------------------------------------------------------------------------
/tests/test_view_logpdf_cluster.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2015-2016 MIT Probabilistic Computing Project
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | 
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import pytest
18 | 
19 | import numpy as np
20 | 
21 | from cgpm.mixtures.view import View
22 | from cgpm.utils import general as gu
23 | 
24 | 
25 | def retrieve_view():
26 |     data = np.asarray([
27 |         [1.1,   -2.1,    0],  # rowid=0
28 |         [2.,      .1,    0],  # rowid=1
29 |         [1.5, np.nan,   .5],  # rowid=2
30 |         [4.7,    7.4,   .5],  # rowid=3
31 |         [5.2,    9.6,   np.nan],  # rowid=4
32 |     ])
33 | 
34 |     outputs = [0,1,2,]
35 | 
36 |     return View(
37 |         {c: data[:,i].tolist() for i, c in enumerate(outputs)},
38 |         outputs=[1000] + outputs,
39 |         alpha=2.,
40 |         cctypes=['normal'] * len(outputs),
41 |         Zr=[0,0,0,1,1,]
42 |     )
43 | 
44 | 
45 | def test_crp_prior_logpdf():
46 |     view = retrieve_view()
47 |     crp_normalizer = view.alpha() + 5.
48 |     cluster_logps = np.log(np.asarray([
49 |         3 / crp_normalizer,
50 |         2 / crp_normalizer,
51 |         view.alpha() / crp_normalizer
52 |     ]))
53 |     # Test the crp probabilities agree for a hypothetical row.
54 |     for k in [0,1,2]:
55 |         expected_logpdf = cluster_logps[k]
56 |         crp_logpdf = view.crp.clusters[0].logpdf(None, {view.outputs[0]: k})
57 |         assert np.allclose(expected_logpdf, crp_logpdf)
58 |         view_logpdf = view.logpdf(None, {view.outputs[0]: k})
59 |         assert np.allclose(view_logpdf, crp_logpdf)
60 | 
61 | 
62 | def test_crp_posterior_logpdf():
63 |     view = retrieve_view()
64 |     fresh_row = {0:2, 1:3, 2:.5}
65 |     logps = [
66 |         view.logpdf(None, {view.outputs[0]: k}, fresh_row)
67 |         for k in [0,1,2]
68 |     ]
69 |     assert np.allclose(gu.logsumexp(logps), 0)
70 | 
71 | 
72 | def test_logpdf_observed_nan():
73 |     view = retrieve_view()
74 |     logp_view = view.logpdf(2, {1:1})
75 |     logp_dim = view.dims[1].logpdf(2, {1:1}, None, {view.outputs[0]: view.Zr(2)})
76 |     assert np.allclose(logp_view, logp_dim)
77 | 
78 | 
79 | def test_logpdf_chain():
80 |     view = retrieve_view()
81 |     logp_cluster = view.logpdf(None, {view.outputs[0]: 0})
82 |     logp_data = view.logpdf(None, {1:1, 2:0}, {view.outputs[0]: 0})
83 |     logp_joint = view.logpdf(None, {1:1, 2:0, view.outputs[0]: 0})
84 |     assert np.allclose(logp_cluster+logp_data, logp_joint)
85 | 
86 | 
87 | def test_logpdf_bayes():
88 |     view = retrieve_view()
89 |     logp_posterior = view.logpdf(None, {view.outputs[0]: 0, 1:1}, {2:0})
90 |     logp_evidence = view.logpdf(None, {2:0})
91 |     logp_joint = view.logpdf(None, {1:1, 2:0, view.outputs[0]: 0})
92 |     assert np.allclose(logp_joint - logp_evidence, logp_posterior)
93 | 


--------------------------------------------------------------------------------
/tests/test_vsinline.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | #   Copyright (c) 2010-2016, MIT Probabilistic Computing Project
  4 | #
  5 | #   Licensed under the Apache License, Version 2.0 (the "License");
  6 | #   you may not use this file except in compliance with the License.
  7 | #   You may obtain a copy of the License at
  8 | #
  9 | #       http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | 
 17 | import hacks
 18 | import pytest
 19 | if not pytest.config.getoption('--integration'):
 20 |     hacks.skip('specify --integration to run integration tests')
 21 | 
 22 | import matplotlib.pyplot as plt
 23 | import numpy as np
 24 | import pytest
 25 | 
 26 | from cgpm.venturescript.vsinline import InlineVsCGpm
 27 | from cgpm.utils import general as gu
 28 | 
 29 | 
 30 | def test_input_matches_args():
 31 |     InlineVsCGpm([0], [], expression='() ~> {normal(0, 1)}')
 32 |     InlineVsCGpm([0], [], expression='( ) ~> {normal(0, 1)}')
 33 |     InlineVsCGpm([0], [], expression='()   ~> {normal(0, 1)}')
 34 |     InlineVsCGpm([0], [], expression=' ( ) ~> {normal(0, 1)}')
 35 |     InlineVsCGpm([0], [1], expression='(a) ~> {normal(0, 1)}')
 36 |     InlineVsCGpm([0], [1], expression='(a ) ~> {normal(0, 1)}')
 37 |     InlineVsCGpm([0], [1], expression=' ( a ) ~> {normal(0, 1)}')
 38 |     InlineVsCGpm([0], [1], expression='( ab) ~> {normal(0, 1)}')
 39 |     InlineVsCGpm([0], [1,2], expression='(a, b) ~> {normal(0, 1)}')
 40 |     InlineVsCGpm([0], [1,2], expression='( a, b ) ~> {normal(0, 1)}')
 41 |     InlineVsCGpm([0], [1,2], expression='(a, b ) ~> {normal(0, 1)}')
 42 |     InlineVsCGpm([0], [1,2,3], expression='(a, b, bc) ~> {2}')
 43 | 
 44 |     with pytest.raises(Exception):
 45 |         InlineVsCGpm([0], [], expression='(a) ~> {normal(0,1)}')
 46 |     with pytest.raises(Exception):
 47 |         InlineVsCGpm([0], [1], expression='(a, b) ~> {normal(0,1)}')
 48 |     with pytest.raises(Exception):
 49 |         InlineVsCGpm([0], [4], expression='(a, b , c) ~> {normal(0,1)}')
 50 |     with pytest.raises(Exception):
 51 |         InlineVsCGpm([0], [1,2], expression='(a) ~> {normal(0,1)}')
 52 | 
 53 | 
 54 | def test_simulate_uniform():
 55 |     vs = InlineVsCGpm([0], [],
 56 |         expression='() ~> {uniform(low: -4.71, high: 4.71)}',
 57 |         rng=gu.gen_rng(10))
 58 | 
 59 |     lp = vs.logpdf(0, {0:0})
 60 |     for x in np.linspace(-4.70, 4.70, 100):
 61 |         assert np.allclose(vs.logpdf(0, {0:x}), lp)
 62 |     assert np.isinf(vs.logpdf(0, {0:12}))
 63 | 
 64 |     samples = vs.simulate(0, [0], None, None, N=200)
 65 |     extracted = [s[0] for s in samples]
 66 |     fig, ax = plt.subplots()
 67 |     ax.hist(extracted)
 68 | 
 69 | 
 70 | def test_simulate_noisy_cos():
 71 |     vs_x = InlineVsCGpm([0], [],
 72 |         expression='() ~> {uniform(low: -4.71, high: 4.71)}',
 73 |         rng=gu.gen_rng(10))
 74 | 
 75 |     vs_y = InlineVsCGpm([1], [0],
 76 |         expression="""
 77 |         (x) ~>
 78 |             {if (cos(x) > 0)
 79 |                 {uniform(low: cos(x) - .5, high: cos(x))}
 80 |             else
 81 |                 {uniform(low: cos(x), high: cos(x) + .5)}}""",
 82 |         rng=gu.gen_rng(12))
 83 | 
 84 |     samples_x = vs_x.simulate(0, [0], None, None, N=200)
 85 |     samples_y = [vs_y.simulate(0, [1], None, sx) for sx in samples_x]
 86 | 
 87 |     # Plot the joint query.
 88 |     fig, ax = plt.subplots()
 89 | 
 90 |     xs = [s[0] for s in samples_x]
 91 |     ys = [s[1] for s in samples_y]
 92 | 
 93 |     # Scatter the dots.
 94 |     ax.scatter(xs, ys, color='blue', alpha=.4)
 95 |     ax.set_xlim([-1.5*np.pi, 1.5*np.pi])
 96 |     ax.set_ylim([-1.75, 1.75])
 97 |     for x in xs:
 98 |         ax.vlines(x, -1.75, -1.65, linewidth=.5)
 99 |     ax.grid()
100 | 
101 |     # Plot the density from y=0 to y=2 for x = 0
102 |     fig, ax = plt.subplots()
103 |     logpdfs = np.exp([
104 |         vs_y.logpdf(0, {1:y}, None, {0:0})
105 |         for y in np.linspace(0,2,50)
106 |     ])
107 |     ax.plot(np.linspace(0, 2, 50), logpdfs)
108 | 


--------------------------------------------------------------------------------