├── .ctags
├── .gitignore
├── .travis.yml
├── LICENSE-MIT
├── MANIFEST.in
├── README.md
├── examples
    ├── EM_demo.py
    ├── __init__.py
    ├── animation.py
    ├── demo.py
    ├── factor_analysis.py
    ├── meanfield_steps.py
    └── robust_regression.py
├── images
    ├── best-model.png
    ├── data.png
    └── model-vlb-vs-iteration.png
├── pybasicbayes
    ├── __init__.py
    ├── abstractions.py
    ├── distributions
    │   ├── __init__.py
    │   ├── binomial.py
    │   ├── gaussian.py
    │   ├── geometric.py
    │   ├── meta.py
    │   ├── multinomial.py
    │   ├── negativebinomial.py
    │   ├── poisson.py
    │   ├── regression.py
    │   └── uniform.py
    ├── models
    │   ├── __init__.py
    │   ├── factor_analysis.py
    │   ├── mixture.py
    │   └── parallel_mixture.py
    ├── testing
    │   ├── .gitignore
    │   ├── __init__.py
    │   └── mixins.py
    └── util
    │   ├── .ctags
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── cstats.pyx
    │   ├── cyutil.py
    │   ├── general.py
    │   ├── plot.py
    │   ├── profiling.py
    │   ├── stats.py
    │   ├── testing.py
    │   └── text.py
├── setup.py
└── tests
    ├── test_categorical.py
    ├── test_gammadirichlet.py
    ├── test_gaussian.py
    ├── test_geometric.py
    ├── test_negbin.py
    ├── test_poisson.py
    └── test_regression.py


/.ctags:
--------------------------------------------------------------------------------
1 | --exclude=util
2 | --python-kinds=-i
3 | --recurse=yes
4 | --exclude=.git
5 | --exclude=.pyc
6 | --exclude=.md
7 | --exclude=DS_Store
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.pyo
3 | /tags
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: python
 3 | python:
 4 |     - "2.7"
 5 | notifications:
 6 |     email: false
 7 | before_install:
 8 |   - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
 9 |   - bash miniconda.sh -b -p $HOME/miniconda
10 |   - export PATH="$HOME/miniconda/bin:$PATH"
11 |   - conda update --yes conda
12 |   - conda install --yes python=$TRAVIS_PYTHON_VERSION pip numpy scipy matplotlib cython nose future
13 | install:
14 |   - python setup.py build_ext --inplace
15 | script: nosetests tests -A 'not slow and not random'
16 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012 Matthew James Johnson, <mattjj@csail.mit.edu>
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 7 | of the Software, and to permit persons to whom the Software is furnished to do
 8 | so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include pybasicbayes *.pyx *.c *.cpp *.h
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Build Status](https://travis-ci.org/mattjj/pybasicbayes.svg?branch=master)](https://travis-ci.org/mattjj/pybasicbayes)
  2 | 
  3 | This library provides objects that model probability distributions and the
  4 | related operations that are common in generative Bayesian modeling and Bayesian
  5 | inference, including Gibbs sampling and variational mean field algorithms. The
  6 | file `abstractions.py` describes the queries a distribution must support to be
  7 | used in each algorithm, as well as an API for models, which compose the
  8 | distribution objects.
  9 | 
 10 | ## Example ##
 11 | 
 12 | The file `models.py` shows how to construct mixture models building on the
 13 | distribution objects in this library. For example, to generate data from a
 14 | Gaussian mixture model, we might set some hyperparameters, construct a
 15 | `Mixture` object, and then ask it to randomly generate some data from the
 16 | prior:
 17 | 
 18 | ```python
 19 | import numpy as np
 20 | from pybasicbayes import models, distributions
 21 | 
 22 | # hyperparameters
 23 | alpha_0=5.0
 24 | obs_hypparams = dict(mu_0=np.zeros(2),sigma_0=np.eye(2),kappa_0=0.05,nu_0=5)
 25 | 
 26 | # create the model
 27 | priormodel = models.Mixture(alpha_0=alpha_0,
 28 |         components=[distributions.Gaussian(**obs_hypparams) for itr in range(30)])
 29 | 
 30 | # generate some data
 31 | data = priormodel.rvs(400)
 32 | 
 33 | # delete the model
 34 | del priormodel
 35 | ```
 36 | 
 37 | If we throw away the prior model at the end, we're left just with the data,
 38 | which look like this:
 39 | 
 40 | ```python
 41 | from matplotlib import pyplot as plt
 42 | plt.figure()
 43 | plt.plot(data[:,0],data[:,1],'kx')
 44 | plt.title('data')
 45 | ```
 46 | 
 47 | ![randomly generated mixture model data](https://raw.githubusercontent.com/mattjj/pybasicbayes/master/images/data.png)
 48 | 
 49 | Imagine we loaded these data from some measurements file and we wanted to fit a
 50 | mixture model to it. We can create a new `Mixture` and run inference to get a
 51 | representation of the posterior distribution over mixture models conditioned on
 52 | observing these data:
 53 | 
 54 | ```python
 55 | posteriormodel = models.Mixture(alpha_0=alpha_0,
 56 |         components=[distributions.Gaussian(**obs_hypparams) for itr in range(30)])
 57 | 
 58 | posteriormodel.add_data(data)
 59 | ```
 60 | 
 61 | Since pybasicbayes implements both Gibbs sampling and variational mean field
 62 | inference algorithms, we can use both together in a hybrid algorithm.
 63 | 
 64 | ```python
 65 | import copy
 66 | from pybasicbayes.util.text import progprint_xrange
 67 | 
 68 | allscores = [] # variational lower bounds on the marginal data log likelihood
 69 | allmodels = []
 70 | for superitr in range(5):
 71 |     # Gibbs sampling to wander around the posterior
 72 |     print 'Gibbs Sampling'
 73 |     for itr in progprint_xrange(100):
 74 |         posteriormodel.resample_model()
 75 | 
 76 |     # mean field to lock onto a mode
 77 |     print 'Mean Field'
 78 |     scores = [posteriormodel.meanfield_coordinate_descent_step()
 79 |                 for itr in progprint_xrange(100)]
 80 | 
 81 |     allscores.append(scores)
 82 |     allmodels.append(copy.deepcopy(posteriormodel))
 83 | 
 84 | import operator
 85 | models_and_scores = sorted([(m,s[-1]) for m,s
 86 |     in zip(allmodels,allscores)],key=operator.itemgetter(1),reverse=True)
 87 | ```
 88 | 
 89 | Now we can plot the score trajectories:
 90 | 
 91 | ```python
 92 | plt.figure()
 93 | for scores in allscores:
 94 |     plt.plot(scores)
 95 | plt.title('model vlb scores vs iteration')
 96 | ```
 97 | 
 98 | ![model vlb scores vs iteration](https://raw.githubusercontent.com/mattjj/pybasicbayes/master/images/model-vlb-vs-iteration.png)
 99 | 
100 | And show the point estimate of the best model by calling the convenient `Mixture.plot()`:
101 | 
102 | ```python
103 | models_and_scores[0][0].plot()
104 | plt.title('best model')
105 | ```
106 | 
107 | ![best fit model and data](https://raw.githubusercontent.com/mattjj/pybasicbayes/master/images/best-model.png)
108 | 
109 | Since these are Bayesian methods, we have much more than just a point estimate
110 | for plotting: we have fit entire distributions, so we can query any confidence
111 | or marginal that we need.
112 | 
113 | See the file `demo.py` for the code for this demo.
114 | 
115 | ## Authors ##
116 | 
117 | [Matt Johnson](https://github.com/mattjj), [Alex Wiltschko](https://github.com/alexbw), [Yarden Katz](https://github.com/yarden), [Nick Foti](https://github.com/nfoti), and [Scott Linderman](https://github.com/slinderman).
118 | 
119 | 


--------------------------------------------------------------------------------
/examples/EM_demo.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from builtins import range
 3 | import numpy as np
 4 | np.seterr(invalid='raise')
 5 | from matplotlib import pyplot as plt
 6 | import copy
 7 | 
 8 | import pybasicbayes
 9 | from pybasicbayes import models, distributions
10 | from pybasicbayes.util.text import progprint_xrange
11 | 
12 | # EM is really terrible! Here's a demo of how to do it on really easy data
13 | 
14 | ### generate and plot the data
15 | 
16 | alpha_0=100.
17 | obs_hypparams=dict(mu_0=np.zeros(2),sigma_0=np.eye(2),kappa_0=0.05,nu_0=5)
18 | 
19 | priormodel = models.Mixture(alpha_0=alpha_0,
20 |         components=[distributions.Gaussian(**obs_hypparams) for itr in range(6)])
21 | 
22 | data = priormodel.rvs(200)
23 | 
24 | del priormodel
25 | 
26 | 
27 | plt.figure()
28 | plt.plot(data[:,0],data[:,1],'kx')
29 | plt.title('data')
30 | 
31 | 
32 | min_num_components, max_num_components = (1,12)
33 | num_tries_each = 5
34 | 
35 | ### search over models using BIC as a model selection criterion
36 | 
37 | BICs = []
38 | examplemodels = []
39 | for idx, num_components in enumerate(progprint_xrange(min_num_components,max_num_components+1)):
40 |     theseBICs = []
41 |     for i in range(num_tries_each):
42 |         fitmodel = models.Mixture(
43 |                 alpha_0=10000, # used for random initialization Gibbs sampling, big means use all components
44 |                 components=[distributions.Gaussian(**obs_hypparams) for itr in range(num_components)])
45 | 
46 |         fitmodel.add_data(data)
47 | 
48 |         # use Gibbs sampling for initialization
49 |         for itr in range(100):
50 |             fitmodel.resample_model()
51 | 
52 |         # use EM to fit a model
53 |         for itr in range(50):
54 |             fitmodel.EM_step()
55 | 
56 |         theseBICs.append(fitmodel.BIC())
57 | 
58 |     examplemodels.append(copy.deepcopy(fitmodel))
59 |     BICs.append(theseBICs)
60 | 
61 | plt.figure()
62 | plt.errorbar(
63 |         x=np.arange(min_num_components,max_num_components+1),
64 |         y=[np.mean(x) for x in BICs],
65 |         yerr=[np.std(x) for x in BICs]
66 |         )
67 | plt.xlabel('num components')
68 | plt.ylabel('BIC')
69 | 
70 | plt.figure()
71 | examplemodels[np.argmin([np.mean(x) for x in BICs])].plot()
72 | plt.title('a decent model')
73 | 
74 | plt.show()
75 | 
76 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattjj/pybasicbayes/61f65ad6c781288605ec5f7347efcc5dbd73c4fc/examples/__init__.py


--------------------------------------------------------------------------------
/examples/animation.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import print_function
 3 | from builtins import range
 4 | import numpy as np
 5 | import numpy.random as npr
 6 | from matplotlib import pyplot as plt
 7 | plt.ion()
 8 | 
 9 | from pybasicbayes import models, distributions
10 | 
11 | 
12 | ###############
13 | #  load data  #
14 | ###############
15 | 
16 | data = np.loadtxt('data.txt')
17 | 
18 | plt.figure()
19 | plt.plot(data[:,0],data[:,1],'kx')
20 | plt.title('data')
21 | 
22 | ##################
23 | #  set up model  #
24 | ##################
25 | 
26 | npr.seed(0)
27 | 
28 | alpha_0 = 5.
29 | obs_hypparams = dict(mu_0=np.zeros(2),sigma_0=np.eye(2),kappa_0=0.05,nu_0=5)
30 | 
31 | model = models.Mixture(
32 |     alpha_0=alpha_0,
33 |     components=[distributions.Gaussian(**obs_hypparams) for itr in range(30)]
34 | )
35 | 
36 | model.add_data(data)
37 | 
38 | ##############
39 | #  animate!  #
40 | ##############
41 | 
42 | ## movie
43 | # try:
44 | #     from moviepy.video.io.bindings import mplfig_to_npimage
45 | #     from moviepy.editor import VideoClip
46 | # except:
47 | #     print "No moviepy found. Quitting..."
48 | #     import sys
49 | #     sys.exit(1)
50 | 
51 | # fig = plt.figure()
52 | # model.plot(draw=False)
53 | # plt.axis([-8,5,-2,6])
54 | 
55 | # def make_frame_mpl(t):
56 | #     if (t // 2) % 2:
57 | #         model.meanfield_coordinate_descent_step()
58 | #     else:
59 | #         model.resample_model()
60 | #     model.plot(update=True,draw=False)
61 | #     return mplfig_to_npimage(fig)
62 | 
63 | # animation = VideoClip(make_frame_mpl, duration=12)
64 | # animation.write_videofile('gibbs.mp4',fps=50)
65 | 
66 | 
67 | 
68 | 
69 | import itertools, sys, json
70 | for i in itertools.count():
71 |     model.resample_model()
72 |     if i % 3 == 0:
73 |         print(json.dumps(model.to_json_dict()))
74 |         sys.stdout.flush()
75 | 
76 | 


--------------------------------------------------------------------------------
/examples/demo.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import print_function
 3 | from builtins import zip
 4 | from builtins import range
 5 | import numpy as np
 6 | np.seterr(invalid='raise')
 7 | from matplotlib import pyplot as plt
 8 | import copy
 9 | 
10 | import pybasicbayes
11 | from pybasicbayes import models, distributions
12 | from pybasicbayes.util.text import progprint_xrange
13 | 
14 | alpha_0=5.0
15 | obs_hypparams=dict(mu_0=np.zeros(2),sigma_0=np.eye(2),kappa_0=0.05,nu_0=5)
16 | 
17 | priormodel = models.Mixture(alpha_0=alpha_0,
18 |         components=[distributions.Gaussian(**obs_hypparams) for itr in range(30)])
19 | 
20 | data, _ = priormodel.generate(500)
21 | 
22 | plt.figure()
23 | priormodel.plot()
24 | plt.title('true model')
25 | 
26 | del priormodel
27 | 
28 | plt.figure()
29 | plt.plot(data[:,0],data[:,1],'kx')
30 | plt.title('data')
31 | 
32 | posteriormodel = models.Mixture(alpha_0=alpha_0,
33 |         components=[distributions.Gaussian(**obs_hypparams) for itr in range(30)])
34 | 
35 | posteriormodel.add_data(data)
36 | 
37 | allscores = []
38 | allmodels = []
39 | for superitr in range(5):
40 |     # Gibbs sampling to wander around the posterior
41 |     print('Gibbs Sampling')
42 |     for itr in progprint_xrange(100):
43 |         posteriormodel.resample_model()
44 | 
45 |     # mean field to lock onto a mode
46 |     print('Mean Field')
47 |     scores = [posteriormodel.meanfield_coordinate_descent_step()
48 |                 for itr in progprint_xrange(100)]
49 | 
50 |     allscores.append(scores)
51 |     allmodels.append(copy.deepcopy(posteriormodel))
52 | 
53 | plt.figure()
54 | for scores in allscores:
55 |     plt.plot(scores)
56 | plt.title('model vlb scores vs iteration')
57 | 
58 | import operator
59 | models_and_scores = sorted([(m,s[-1]) for m,s
60 |     in zip(allmodels,allscores)],key=operator.itemgetter(1),reverse=True)
61 | 
62 | plt.figure()
63 | models_and_scores[0][0].plot()
64 | plt.title('best model')
65 | 
66 | plt.show()
67 | 


--------------------------------------------------------------------------------
/examples/factor_analysis.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | np.random.seed(1)
  3 | 
  4 | from pybasicbayes.util.text import progprint_xrange
  5 | import matplotlib.pyplot as plt
  6 | from matplotlib.cm import get_cmap
  7 | 
  8 | import pybasicbayes.models.factor_analysis
  9 | from pybasicbayes.models.factor_analysis import FactorAnalysis
 10 | 
 11 | N = 2000
 12 | D_obs = 20
 13 | D_latent = 2
 14 | 
 15 | def principal_angle(A,B):
 16 |     """
 17 |     Find the principal angle between two subspaces
 18 |     spanned by columns of A and B
 19 |     """
 20 |     from numpy.linalg import qr, svd
 21 |     qA, _ = qr(A)
 22 |     qB, _ = qr(B)
 23 |     U,S,V = svd(qA.T.dot(qB))
 24 |     return np.arccos(min(S.min(), 1.0))
 25 | 
 26 | def generate_synth_data():
 27 | 
 28 |     # Create a true model and sample from it
 29 |     mask = np.random.rand(N,D_obs) < 0.9
 30 |     true_model = FactorAnalysis(D_obs, D_latent)
 31 |     X, Z_true = true_model.generate(N=N, mask=mask, keep=True)
 32 |     return true_model, X, Z_true, mask
 33 | 
 34 | 
 35 | def plot_results(lls, angles, Ztrue, Zinf):
 36 |     # Plot log probabilities
 37 |     plt.figure()
 38 |     plt.plot(lls)
 39 |     plt.ylabel("Log Likelihood")
 40 |     plt.xlabel("Iteration")
 41 | 
 42 |     plt.figure()
 43 |     plt.plot(np.array(angles) / np.pi * 180.)
 44 |     plt.ylabel("Principal Angle")
 45 |     plt.xlabel("Iteration")
 46 | 
 47 |     # Plot locations, color by angle
 48 |     N = Ztrue.shape[0]
 49 |     inds_to_plot = np.random.randint(0, N, min(N, 500))
 50 |     th = np.arctan2(Ztrue[:,1], Ztrue[:,0])
 51 |     nperm = np.argsort(np.argsort(th))
 52 |     cmap = get_cmap("jet")
 53 | 
 54 |     plt.figure()
 55 |     plt.subplot(121)
 56 |     for n in inds_to_plot:
 57 |         plt.plot(Ztrue[n,0], Ztrue[n,1], 'o', markerfacecolor=cmap(nperm[n] / float(N)), markeredgecolor="none")
 58 |     plt.title("True Embedding")
 59 |     plt.xlim(-4,4)
 60 |     plt.ylim(-4,4)
 61 | 
 62 |     plt.subplot(122)
 63 |     for n in inds_to_plot:
 64 |         plt.plot(Zinf[n,0], Zinf[n,1], 'o', markerfacecolor=cmap(nperm[n] / float(N)), markeredgecolor="none")
 65 |     plt.title("Inferred Embedding")
 66 |     plt.xlim(-4,4)
 67 |     plt.ylim(-4,4)
 68 | 
 69 |     plt.show()
 70 | 
 71 | def gibbs_example(true_model, X, Z_true, mask):   
 72 |     # Fit a test model
 73 |     model = FactorAnalysis(
 74 |         D_obs, D_latent,
 75 |         # W=true_model.W, sigmasq=true_model.sigmasq
 76 |         )
 77 |     inf_data = model.add_data(X, mask=mask)
 78 |     model.set_empirical_mean()
 79 | 
 80 |     lps = []
 81 |     angles = []
 82 |     N_iters = 100
 83 |     for _ in progprint_xrange(N_iters):
 84 |         model.resample_model()
 85 |         lps.append(model.log_likelihood())
 86 |         angles.append(principal_angle(true_model.W, model.W))
 87 | 
 88 |     plot_results(lps, angles, Z_true, inf_data.Z)
 89 | 
 90 | def em_example(true_model, X, Z_true, mask):
 91 |     # Fit a test model
 92 |     model = FactorAnalysis(
 93 |         D_obs, D_latent,
 94 |         # W=true_model.W, sigmasq=true_model.sigmasq
 95 |         )
 96 |     inf_data = model.add_data(X, mask=mask)
 97 |     model.set_empirical_mean()
 98 | 
 99 |     lps = []
100 |     angles = []
101 |     N_iters = 100
102 |     for _ in progprint_xrange(N_iters):
103 |         model.EM_step()
104 |         lps.append(model.log_likelihood())
105 |         angles.append(principal_angle(true_model.W, model.W))
106 | 
107 |     plot_results(lps, angles, Z_true, inf_data.E_Z)
108 | 
109 | def meanfield_example(true_model, X, Z_true, mask):
110 |     # Fit a test model
111 |     model = FactorAnalysis(
112 |         D_obs, D_latent,
113 |         # W=true_model.W, sigmasq=true_model.sigmasq
114 |         )
115 |     inf_data = model.add_data(X, mask=mask)
116 |     model.set_empirical_mean()
117 | 
118 |     lps = []
119 |     angles = []
120 |     N_iters = 100
121 |     for _ in progprint_xrange(N_iters):
122 |         model.meanfield_coordinate_descent_step()
123 |         lps.append(model.expected_log_likelihood())
124 |         E_W, _, _, _ = model.regression.mf_expectations
125 |         angles.append(principal_angle(true_model.W, E_W))
126 | 
127 |     plot_results(lps, angles, Z_true, inf_data.Z)
128 | 
129 | def svi_example(true_model, X, Z_true, mask):
130 |     # Fit a test model
131 |     model = FactorAnalysis(
132 |         D_obs, D_latent,
133 |         # W=true_model.W, sigmasq=true_model.sigmasq
134 |         )
135 | 
136 |     # Add the data in minibatches
137 |     N = X.shape[0]
138 |     minibatchsize = 200
139 |     prob = minibatchsize / float(N)
140 | 
141 |     lps = []
142 |     angles = []
143 |     N_iters = 100
144 |     delay = 10.0
145 |     forgetting_rate = 0.75
146 |     stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate)
147 |     for itr in progprint_xrange(N_iters):
148 |         minibatch = np.random.permutation(N)[:minibatchsize]
149 |         X_mb, mask_mb = X[minibatch], mask[minibatch]
150 |         lps.append(model.meanfield_sgdstep(X_mb, prob, stepsize[itr], masks=mask_mb))
151 |         E_W, _, _, _ = model.regression.mf_expectations
152 |         angles.append(principal_angle(true_model.W, E_W))
153 | 
154 |     # Compute the expected states for the first minibatch of data
155 |     model.add_data(X, mask)
156 |     statesobj = model.data_list.pop()
157 |     statesobj.meanfieldupdate()
158 |     Z_inf = statesobj.E_Z
159 |     plot_results(lps, angles, Z_true, Z_inf)
160 | 
161 | if __name__ == "__main__":
162 |     true_model, X, Z_true, mask = generate_synth_data()
163 |     gibbs_example(true_model, X, Z_true, mask)
164 |     em_example(true_model, X, Z_true, mask)
165 |     meanfield_example(true_model, X, Z_true, mask)
166 |     svi_example(true_model, X, Z_true, mask)
167 | 


--------------------------------------------------------------------------------
/examples/meanfield_steps.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from builtins import input
 3 | from builtins import range
 4 | import numpy as np
 5 | from matplotlib import pyplot as plt
 6 | plt.interactive(True)
 7 | 
 8 | from pybasicbayes import models, distributions
 9 | 
10 | GENERATE_DATA = True
11 | 
12 | ###########################
13 | #  generate or load data  #
14 | ###########################
15 | 
16 | alpha_0=5.0
17 | obs_hypparams=dict(mu_0=np.zeros(2),sigma_0=np.eye(2),kappa_0=0.05,nu_0=5)
18 | 
19 | priormodel = models.Mixture(alpha_0=alpha_0,
20 |         components=[distributions.Gaussian(**obs_hypparams) for itr in range(30)])
21 | data, _ = priormodel.generate(100)
22 | del priormodel
23 | 
24 | plt.figure()
25 | plt.plot(data[:,0],data[:,1],'kx')
26 | plt.title('data')
27 | 
28 | input() # pause for effect
29 | 
30 | ###############
31 | #  inference  #
32 | ###############
33 | 
34 | posteriormodel = models.Mixture(alpha_0=alpha_0,
35 |         components=[distributions.Gaussian(**obs_hypparams) for itr in range(30)])
36 | 
37 | posteriormodel.add_data(data)
38 | 
39 | vlbs = []
40 | plt.figure(2,figsize=(8,6))
41 | posteriormodel.plot()
42 | plt.figure(3,figsize=(8,6))
43 | while True:
44 |     if input().lower() == 'break': # pause at each iteration
45 |         break
46 | 
47 |     vlb = posteriormodel.meanfield_coordinate_descent_step()
48 | 
49 |     plt.figure(2)
50 |     plt.clf()
51 |     posteriormodel.plot()
52 | 
53 |     plt.figure(3)
54 |     plt.clf()
55 |     vlbs.append(vlb)
56 |     plt.plot(vlbs)
57 | 
58 | 


--------------------------------------------------------------------------------
/examples/robust_regression.py:
--------------------------------------------------------------------------------
  1 | # Demo of a robust regression model with multivariate-t distributed noise
  2 | 
  3 | import numpy as np
  4 | import numpy.random as npr
  5 | np.random.seed(0)
  6 | 
  7 | import matplotlib.pyplot as plt
  8 | import seaborn as sns
  9 | sns.set_style("white")
 10 | 
 11 | from pybasicbayes.util.text import progprint_xrange
 12 | from pybasicbayes.distributions import Regression, RobustRegression
 13 | 
 14 | D_out = 1
 15 | D_in = 2
 16 | N = 100
 17 | 
 18 | # Make a regression model and simulate data
 19 | A = npr.randn(D_out, D_in)
 20 | b = npr.randn(D_out)
 21 | Sigma = 0.1 * np.eye(D_out)
 22 | 
 23 | true_reg = Regression(A=np.column_stack((A, b)), sigma=Sigma, affine=True)
 24 | X = npr.randn(N, D_in)
 25 | y = true_reg.rvs(x=X, return_xy=False)
 26 | 
 27 | # Corrupt a fraction of the data
 28 | inds = npr.rand(N) < 0.1
 29 | y[inds] = 3 * npr.randn(inds.sum(), D_out)
 30 | 
 31 | # Make a test regression and fit it
 32 | std_reg = Regression(nu_0=D_out + 2,
 33 |                      S_0=np.eye(D_out),
 34 |                      M_0=np.zeros((D_out, D_in+1)),
 35 |                      K_0=np.eye(D_in+1),
 36 |                      affine=True)
 37 | 
 38 | robust_reg = RobustRegression(nu_0=D_out+2,
 39 |                       S_0=np.eye(D_out),
 40 |                       M_0=np.zeros((D_out, D_in+1)),
 41 |                       K_0=np.eye(D_in+1),
 42 |                       affine=True)
 43 | 
 44 | def _collect(r):
 45 |     ll = r.log_likelihood((X, y))[~inds].sum()
 46 |     err = ((y - r.predict(X))**2).sum(1)
 47 |     mse = np.mean(err[~inds])
 48 |     return r.A.copy(), ll, mse
 49 | 
 50 | def _update(r):
 51 |     r.resample([(X,y)])
 52 |     return _collect(r)
 53 | 
 54 | # Fit the standard regression
 55 | smpls = [_collect(std_reg)]
 56 | for _ in progprint_xrange(100):
 57 |     smpls.append(_update(std_reg))
 58 | smpls = zip(*smpls)
 59 | std_As, std_lls, std_mses = tuple(map(np.array, smpls))
 60 | 
 61 | # Fit the robust regression
 62 | smpls = [_collect(robust_reg)]
 63 | for _ in progprint_xrange(100):
 64 |     smpls.append(_update(robust_reg))
 65 | smpls = zip(*smpls)
 66 | robust_As, robust_lls, robust_mses = tuple(map(np.array, smpls))
 67 | 
 68 | 
 69 | # Plot the inferred regression function
 70 | plt.figure(figsize=(8, 4))
 71 | xlim = (-3, 3)
 72 | ylim = abs(y).max()
 73 | npts = 50
 74 | x1, x2 = np.meshgrid(np.linspace(*xlim, npts), np.linspace(*xlim, npts))
 75 | 
 76 | plt.subplot(131)
 77 | mu = true_reg.predict(np.column_stack((x1.ravel(), x2.ravel())))
 78 | plt.imshow(mu.reshape((npts, npts)),
 79 |            cmap="RdBu", vmin=-ylim, vmax=ylim,
 80 |            alpha=0.8,
 81 |            extent=xlim + tuple(reversed(xlim)))
 82 | plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray')
 83 | plt.scatter(X[inds,0], X[inds,1], c=y[inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1)
 84 | plt.xlim(xlim)
 85 | plt.ylim(xlim)
 86 | plt.title("True")
 87 | 
 88 | plt.subplot(132)
 89 | mu = std_reg.predict(np.column_stack((x1.ravel(), x2.ravel())))
 90 | plt.imshow(mu.reshape((npts, npts)),
 91 |            cmap="RdBu", vmin=-ylim, vmax=ylim,
 92 |            alpha=0.8,
 93 |            extent=xlim + tuple(reversed(xlim)))
 94 | plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray')
 95 | plt.scatter(X[inds,0], X[inds,1], c=y[inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1)
 96 | plt.xlim(xlim)
 97 | plt.ylim(xlim)
 98 | plt.title("Standard Regression")
 99 | 
100 | plt.subplot(133)
101 | mu = robust_reg.predict(np.column_stack((x1.ravel(), x2.ravel())))
102 | plt.imshow(mu.reshape((npts, npts)),
103 |            cmap="RdBu", vmin=-ylim, vmax=ylim,
104 |            alpha=0.8,
105 |            extent=xlim + tuple(reversed(xlim)))
106 | plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray')
107 | plt.scatter(X[inds,0], X[inds,1], c=y[inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1)
108 | plt.xlim(xlim)
109 | plt.ylim(xlim)
110 | plt.title("Robust Regression")
111 | 
112 | 
113 | print("True A:   {}".format(true_reg.A))
114 | print("Std A:    {}".format(std_As.mean(0)))
115 | print("Robust A: {}".format(robust_As.mean(0)))
116 | 
117 | # Plot the log likelihoods and mean squared errors
118 | plt.figure(figsize=(8, 4))
119 | plt.subplot(121)
120 | plt.plot(std_lls)
121 | plt.plot(robust_lls)
122 | plt.xlabel("Iteration")
123 | plt.ylabel("Log Likelihood")
124 | 
125 | plt.subplot(122)
126 | plt.plot(std_mses, label="Standard")
127 | plt.plot(robust_mses, label="Robust")
128 | plt.legend(loc="upper right")
129 | plt.xlabel("Iteration")
130 | plt.ylabel("Mean Squared Error")
131 | 
132 | plt.show()
133 | 


--------------------------------------------------------------------------------
/images/best-model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattjj/pybasicbayes/61f65ad6c781288605ec5f7347efcc5dbd73c4fc/images/best-model.png


--------------------------------------------------------------------------------
/images/data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattjj/pybasicbayes/61f65ad6c781288605ec5f7347efcc5dbd73c4fc/images/data.png


--------------------------------------------------------------------------------
/images/model-vlb-vs-iteration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattjj/pybasicbayes/61f65ad6c781288605ec5f7347efcc5dbd73c4fc/images/model-vlb-vs-iteration.png


--------------------------------------------------------------------------------
/pybasicbayes/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from . import abstractions, distributions, models, util
3 | 


--------------------------------------------------------------------------------
/pybasicbayes/abstractions.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from builtins import range
  3 | from builtins import object
  4 | import abc
  5 | import numpy as np
  6 | import copy
  7 | 
  8 | import pybasicbayes
  9 | from pybasicbayes.util.stats import combinedata
 10 | from pybasicbayes.util.text import progprint_xrange
 11 | from future.utils import with_metaclass
 12 | 
 13 | # NOTE: data is always a (possibly masked) np.ndarray or list of (possibly
 14 | # masked) np.ndarrays.
 15 | 
 16 | # TODO figure out a data abstraction
 17 | # TODO make an exponential family abc to reduce boilerplate
 18 | 
 19 | ################
 20 | #  Base class  #
 21 | ################
 22 | 
 23 | class Distribution(with_metaclass(abc.ABCMeta, object)):
 24 |     @abc.abstractmethod
 25 |     def rvs(self,size=[]):
 26 |         'random variates (samples)'
 27 |         pass
 28 | 
 29 |     @abc.abstractmethod
 30 |     def log_likelihood(self,x):
 31 |         '''
 32 |         log likelihood (either log probability mass function or log probability
 33 |         density function) of x, which has the same type as the output of rvs()
 34 |         '''
 35 |         pass
 36 | 
 37 | class BayesianDistribution(with_metaclass(abc.ABCMeta, Distribution)):
 38 |     def empirical_bayes(self,data):
 39 |         '''
 40 |         (optional) set hyperparameters via empirical bayes
 41 |         e.g. treat argument as a pseudo-dataset for exponential family
 42 |         '''
 43 |         raise NotImplementedError
 44 | 
 45 | #########################################################
 46 | #  Algorithm interfaces for inference in distributions  #
 47 | #########################################################
 48 | 
 49 | class GibbsSampling(with_metaclass(abc.ABCMeta, BayesianDistribution)):
 50 |     @abc.abstractmethod
 51 |     def resample(self,data=[]):
 52 |         pass
 53 | 
 54 |     def copy_sample(self):
 55 |         '''
 56 |         return an object copy suitable for making lists of posterior samples
 57 |         (override this method to prevent copying shared structures into each sample)
 58 |         '''
 59 |         return copy.deepcopy(self)
 60 | 
 61 |     def resample_and_copy(self):
 62 |         self.resample()
 63 |         return self.copy_sample()
 64 | 
 65 | class MeanField(with_metaclass(abc.ABCMeta, BayesianDistribution)):
 66 |     @abc.abstractmethod
 67 |     def expected_log_likelihood(self,x):
 68 |         pass
 69 | 
 70 |     @abc.abstractmethod
 71 |     def meanfieldupdate(self,data,weights):
 72 |         pass
 73 | 
 74 |     def get_vlb(self):
 75 |         raise NotImplementedError
 76 | 
 77 | class MeanFieldSVI(with_metaclass(abc.ABCMeta, BayesianDistribution)):
 78 |     @abc.abstractmethod
 79 |     def meanfield_sgdstep(self,expected_suff_stats,prob,stepsize):
 80 |         pass
 81 | 
 82 | class Collapsed(with_metaclass(abc.ABCMeta, BayesianDistribution)):
 83 |     @abc.abstractmethod
 84 |     def log_marginal_likelihood(self,data):
 85 |         pass
 86 | 
 87 |     def log_predictive(self,newdata,olddata):
 88 |         return self.log_marginal_likelihood(combinedata((newdata,olddata))) \
 89 |                     - self.log_marginal_likelihood(olddata)
 90 | 
 91 |     def predictive(self,*args,**kwargs):
 92 |         return np.exp(self.log_predictive(*args,**kwargs))
 93 | 
 94 | class MaxLikelihood(with_metaclass(abc.ABCMeta, Distribution)):
 95 |     @abc.abstractmethod
 96 |     def max_likelihood(self,data,weights=None):
 97 |         '''
 98 |         sets the parameters set to their maximum likelihood values given the
 99 |         (weighted) data
100 |         '''
101 |         pass
102 | 
103 |     @property
104 |     def num_parameters(self):
105 |         raise NotImplementedError
106 | 
107 | class MAP(with_metaclass(abc.ABCMeta, BayesianDistribution)):
108 |     @abc.abstractmethod
109 |     def MAP(self,data,weights=None):
110 |         '''
111 |         sets the parameters to their MAP values given the (weighted) data
112 |         analogous to max_likelihood but includes hyperparameters
113 |         '''
114 |         pass
115 | 
116 | class Tempering(BayesianDistribution):
117 |     @abc.abstractmethod
118 |     def log_likelihood(self,data,temperature=1.):
119 |         pass
120 | 
121 |     @abc.abstractmethod
122 |     def resample(self,data,temperature=1.):
123 |         pass
124 | 
125 |     def energy(self,data):
126 |         return -self.log_likelihood(data,temperature=1.)
127 | 
128 | ############
129 | #  Models  #
130 | ############
131 | 
132 | # a "model" is differentiated from a "distribution" in this code by latent state
133 | # over data: a model attaches a latent variable (like a label or state sequence)
134 | # to data, and so it 'holds onto' data. Hence the add_data method.
135 | 
136 | class Model(with_metaclass(abc.ABCMeta, object)):
137 |     @abc.abstractmethod
138 |     def add_data(self,data):
139 |         pass
140 | 
141 |     @abc.abstractmethod
142 |     def generate(self,keep=True,**kwargs):
143 |         '''
144 |         Like a distribution's rvs, but this also fills in latent state over
145 |         data and keeps references to the data.
146 |         '''
147 |         pass
148 | 
149 |     def rvs(self,*args,**kwargs):
150 |         return self.generate(*args,keep=False,**kwargs)[0] # 0th component is data, not latent stuff
151 | 
152 | ##################################################
153 | #  Algorithm interfaces for inference in models  #
154 | ##################################################
155 | 
156 | class ModelGibbsSampling(with_metaclass(abc.ABCMeta, Model)):
157 |     @abc.abstractmethod
158 |     def resample_model(self): # TODO niter?
159 |         pass
160 | 
161 |     def copy_sample(self):
162 |         '''
163 |         return an object copy suitable for making lists of posterior samples
164 |         (override this method to prevent copying shared structures into each sample)
165 |         '''
166 |         return copy.deepcopy(self)
167 | 
168 |     def resample_and_copy(self):
169 |         self.resample_model()
170 |         return self.copy_sample()
171 | 
172 | class ModelMeanField(with_metaclass(abc.ABCMeta, Model)):
173 |     @abc.abstractmethod
174 |     def meanfield_coordinate_descent_step(self):
175 |         # returns variational lower bound after update, if available
176 |         pass
177 | 
178 |     def meanfield_coordinate_descent(self,tol=1e-1,maxiter=250,progprint=False,**kwargs):
179 |         # NOTE: doesn't re-initialize!
180 |         scores = []
181 |         step_iterator = range(maxiter) if not progprint else progprint_xrange(maxiter)
182 |         for itr in step_iterator:
183 |             scores.append(self.meanfield_coordinate_descent_step(**kwargs))
184 |             if scores[-1] is not None and len(scores) > 1:
185 |                 if np.abs(scores[-1]-scores[-2]) < tol:
186 |                     return scores
187 |         print('WARNING: meanfield_coordinate_descent hit maxiter of %d' % maxiter)
188 |         return scores
189 | 
190 | class ModelMeanFieldSVI(with_metaclass(abc.ABCMeta, Model)):
191 |     @abc.abstractmethod
192 |     def meanfield_sgdstep(self,minibatch,prob,stepsize):
193 |         pass
194 | 
195 | class _EMBase(with_metaclass(abc.ABCMeta, Model)):
196 |     @abc.abstractmethod
197 |     def log_likelihood(self):
198 |         # returns a log likelihood number on attached data
199 |         pass
200 | 
201 |     def _EM_fit(self,method,tol=1e-1,maxiter=100,progprint=False):
202 |         # NOTE: doesn't re-initialize!
203 |         likes = []
204 |         step_iterator = range(maxiter) if not progprint else progprint_xrange(maxiter)
205 |         for itr in step_iterator:
206 |             method()
207 |             likes.append(self.log_likelihood())
208 |             if len(likes) > 1:
209 |                 if likes[-1]-likes[-2] < tol:
210 |                     return likes
211 |                 elif likes[-1] < likes[-2]:
212 |                     # probably oscillation, do one more
213 |                     method()
214 |                     likes.append(self.log_likelihood())
215 |                     return likes
216 |         print('WARNING: EM_fit reached maxiter of %d' % maxiter)
217 |         return likes
218 | 
219 | class ModelEM(with_metaclass(abc.ABCMeta, _EMBase)):
220 |     def EM_fit(self,tol=1e-1,maxiter=100):
221 |         return self._EM_fit(self.EM_step,tol=tol,maxiter=maxiter)
222 | 
223 |     @abc.abstractmethod
224 |     def EM_step(self):
225 |         pass
226 | 
227 | class ModelMAPEM(with_metaclass(abc.ABCMeta, _EMBase)):
228 |     def MAP_EM_fit(self,tol=1e-1,maxiter=100):
229 |         return self._EM_fit(self.MAP_EM_step,tol=tol,maxiter=maxiter)
230 | 
231 |     @abc.abstractmethod
232 |     def MAP_EM_step(self):
233 |         pass
234 | 
235 | class ModelParallelTempering(with_metaclass(abc.ABCMeta, Model)):
236 |     @abc.abstractproperty
237 |     def temperature(self):
238 |         pass
239 | 
240 |     @abc.abstractproperty
241 |     def energy(self):
242 |         pass
243 | 
244 |     @abc.abstractmethod
245 |     def swap_sample_with(self,other):
246 |         pass
247 | 
248 | 


--------------------------------------------------------------------------------
/pybasicbayes/distributions/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from .meta import *
 3 | 
 4 | from .regression import *
 5 | from .gaussian import *
 6 | from .uniform import *
 7 | 
 8 | from .binomial import *
 9 | from .multinomial import *
10 | from .negativebinomial import *
11 | from .geometric import *
12 | from .poisson import *
13 | 


--------------------------------------------------------------------------------
/pybasicbayes/distributions/binomial.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from builtins import zip
  3 | __all__ = ['Binomial']
  4 | 
  5 | import numpy as np
  6 | import scipy.stats as stats
  7 | import scipy.special as special
  8 | from warnings import warn
  9 | 
 10 | from pybasicbayes.abstractions import GibbsSampling, MeanField, \
 11 |     MeanFieldSVI
 12 | 
 13 | 
 14 | class Binomial(GibbsSampling, MeanField, MeanFieldSVI):
 15 |     '''
 16 |     Models a Binomial likelihood and a Beta prior:
 17 | 
 18 |         p ~ Beta(alpha_0, beta_0)
 19 |         x | p ~ Binom(p,n)
 20 | 
 21 |     where p is the success probability, alpha_0-1 is the prior number of
 22 |     successes, beta_0-1 is the prior number of failures.
 23 | 
 24 |     A special case of Multinomial where N is fixed and each observation counts
 25 |     the number of successes and is in {0,1,...,N}.
 26 |     '''
 27 |     def __init__(self,alpha_0,beta_0,alpha_mf=None,beta_mf=None,p=None,n=None):
 28 |         warn('this class is untested!')
 29 |         assert n is not None
 30 | 
 31 |         self.n = n
 32 |         self.alpha_0 = alpha_0
 33 |         self.beta_0 = beta_0
 34 | 
 35 |         self.alpha_mf = alpha_mf if alpha_mf is not None else alpha_0
 36 |         self.beta_mf = beta_mf if beta_mf is not None else beta_0
 37 | 
 38 |         if p is not None:
 39 |             self.p = p
 40 |         else:
 41 |             self.resample()
 42 | 
 43 |     def log_likelihood(self,x):
 44 |         return stats.binom.pmf(x,self.n,self.p)
 45 | 
 46 |     def rvs(self,size=None):
 47 |         return stats.binom.pmf(self.n,self.p,size=size)
 48 | 
 49 |     @property
 50 |     def natural_hypparam(self):
 51 |         return np.array([self.alpha_0 - 1, self.beta_0 - 1])
 52 | 
 53 |     @natural_hypparam.setter
 54 |     def natural_hypparam(self,natparam):
 55 |         self.alpha_0, self.beta_0 = natparam + 1
 56 | 
 57 |     def _get_statistics(self,data):
 58 |         if isinstance(data,np.ndarray):
 59 |             data = data.ravel()
 60 |             tot = data.sum()
 61 |             return np.array([tot, self.n*data.shape[0] - tot])
 62 |         else:
 63 |             return sum(
 64 |                 (self._get_statistics(d) for d in data),
 65 |                 self._empty_statistics())
 66 | 
 67 |     def _get_weighted_statistics(self,data,weights):
 68 |         if isinstance(data,np.ndarray):
 69 |             data, weights = data.ravel(), weights.ravel()
 70 |             tot = weights.dot(data)
 71 |             return np.array([tot, self.n*weights.sum() - tot])
 72 |         else:
 73 |             return sum(
 74 |                 (self._get_weighted_statistics(d,w) for d,w in zip(data,weights)),
 75 |                 self._empty_statistics())
 76 | 
 77 |     def _empty_statistics(self):
 78 |         return np.zeros(2)
 79 | 
 80 |     ### Gibbs
 81 | 
 82 |     def resample(self,data=[]):
 83 |         alpha_n, beta_n = self.natural_hypparam + self._get_statistics(data) + 1
 84 |         self.p = np.random.beta(alpha_n,beta_n)
 85 | 
 86 |         # use Gibbs to initialize mean field
 87 |         self.alpha_mf = self.p * (self.alpha_0 + self.beta_0)
 88 |         self.beta_mf = (1-self.p) * (self.alpha_0 + self.beta_0)
 89 | 
 90 |     ### Mean field and SVI
 91 | 
 92 |     def meanfieldupdate(self,data,weights):
 93 |         self.mf_natural_hypparam = \
 94 |             self.natural_hypparam + self._get_weighted_statistics(data,weights)
 95 | 
 96 |         # use mean field to initialize Gibbs
 97 |         self.p = self.alpha_mf / (self.alpha_mf + self.beta_mf)
 98 | 
 99 |     def meanfield_sgdstep(self,data,weights,minibatchprob,stepsize):
100 |         self.mf_natural_hypparam = \
101 |             (1-stepsize) * self.mf_natural_hypparam + stepsize * (
102 |                 self.natural_hypparam
103 |                 + 1./minibatchprob * self._get_weighted_statistics(data,weights))
104 | 
105 |     @property
106 |     def mf_natural_hypparam(self):
107 |         return np.array([self.alpha_mf - 1, self.beta_mf - 1])
108 | 
109 |     @mf_natural_hypparam.setter
110 |     def mf_natural_hypparam(self,natparam):
111 |         self.alpha_mf, self.beta_mf = natparam + 1
112 | 
113 |     def expected_log_likelihood(self,x):
114 |         n = self.n
115 |         Elnp, Eln1mp = self._mf_expected_statistics()
116 |         return special.gammaln(n+1) - special.gammaln(x+1) - special.gammaln(n-x+1) \
117 |             + x*Elnp + (n-x)*Eln1mp
118 | 
119 |     def _mf_expected_statistics(self):
120 |         return special.digamma([self.alpha_mf, self.beta_mf]) \
121 |             - special.digamma(self.alpha_mf + self.beta_mf)
122 | 
123 |     def get_vlb(self):
124 |         Elnp, Eln1mp = self._mf_expected_statistics()
125 |         return (self.alpha_0 - self.alpha_mf)*Elnp \
126 |             + (self.beta_0 - self.beta_mf)*Eln1mp \
127 |             - (self._log_partition_function(self.alpha_0, self.beta_0)
128 |                 - self._log_partition_function(self.alpha_mf,self.beta_mf))
129 | 
130 |     def _log_partition_function(self,alpha,beta):
131 |         return special.betaln(alpha,beta)
132 | 


--------------------------------------------------------------------------------
/pybasicbayes/distributions/geometric.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from builtins import zip
  3 | __all__ = ['Geometric']
  4 | 
  5 | import numpy as np
  6 | import scipy.stats as stats
  7 | import scipy.special as special
  8 | from warnings import warn
  9 | 
 10 | from pybasicbayes.abstractions import GibbsSampling, MeanField, \
 11 |     Collapsed, MaxLikelihood
 12 | 
 13 | 
 14 | class Geometric(GibbsSampling, MeanField, Collapsed, MaxLikelihood):
 15 |     '''
 16 |     Geometric distribution with a conjugate beta prior.
 17 |     The support is {1,2,3,...}.
 18 | 
 19 |     Hyperparameters:
 20 |         alpha_0, beta_0
 21 | 
 22 |     Parameter is the success probability:
 23 |         p
 24 |     '''
 25 |     def __init__(self,alpha_0=None,beta_0=None,p=None):
 26 |         self.p = p
 27 | 
 28 |         self.alpha_0 = self.mf_alpha_0 = alpha_0
 29 |         self.beta_0 = self.mf_beta_0 = beta_0
 30 | 
 31 |         if p is None and not any(_ is None for _ in (alpha_0,beta_0)):
 32 |             self.resample() # intialize from prior
 33 | 
 34 |     @property
 35 |     def params(self):
 36 |         return dict(p=self.p)
 37 | 
 38 |     @property
 39 |     def hypparams(self):
 40 |         return dict(alpha_0=self.alpha_0,beta_0=self.beta_0)
 41 | 
 42 |     def _posterior_hypparams(self,n,tot):
 43 |         return self.alpha_0 + n, self.beta_0 + tot
 44 | 
 45 |     def log_likelihood(self,x):
 46 |         x = np.array(x,ndmin=1)
 47 |         raw = np.empty(x.shape)
 48 |         raw[x>0] = (x[x>0]-1.)*np.log(1.-self.p) + np.log(self.p)
 49 |         raw[x<1] = -np.inf
 50 |         return raw if isinstance(x,np.ndarray) else raw[0]
 51 | 
 52 |     def log_sf(self,x):
 53 |         return stats.geom.logsf(x,self.p)
 54 | 
 55 |     def pmf(self,x):
 56 |         return stats.geom.pmf(x,self.p)
 57 | 
 58 |     def rvs(self,size=None):
 59 |         return np.random.geometric(self.p,size=size)
 60 | 
 61 |     def _get_statistics(self,data):
 62 |         if isinstance(data,np.ndarray):
 63 |             n = data.shape[0]
 64 |             tot = data.sum() - n
 65 |         elif isinstance(data,list):
 66 |             n = sum(d.shape[0] for d in data)
 67 |             tot = sum(d.sum() for d in data) - n
 68 |         else:
 69 |             assert np.isscalar(data)
 70 |             n = 1
 71 |             tot = data-1
 72 |         return n, tot
 73 | 
 74 |     def _get_weighted_statistics(self,data,weights):
 75 |         if isinstance(data,np.ndarray):
 76 |              n = weights.sum()
 77 |              tot = weights.dot(data) - n
 78 |         elif isinstance(data,list):
 79 |             n = sum(w.sum() for w in weights)
 80 |             tot = sum(w.dot(d) for w,d in zip(weights,data)) - n
 81 |         else:
 82 |             assert np.isscalar(data) and np.isscalar(weights)
 83 |             n = weights
 84 |             tot = weights*data - 1
 85 | 
 86 |         return n, tot
 87 | 
 88 |     ### Gibbs sampling
 89 | 
 90 |     def resample(self,data=[]):
 91 |         self.p = np.random.beta(*self._posterior_hypparams(*self._get_statistics(data)))
 92 | 
 93 |         # initialize mean field
 94 |         self.alpha_mf = self.p*(self.alpha_0+self.beta_0)
 95 |         self.beta_mf = (1-self.p)*(self.alpha_0+self.beta_0)
 96 | 
 97 |         return self
 98 | 
 99 |     ### mean field
100 | 
101 |     def meanfieldupdate(self,data,weights,stats=None):
102 |         warn('untested')
103 |         n, tot = self._get_weighted_statistics(data,weights) if stats is None else stats
104 |         self.alpha_mf = self.alpha_0 + n
105 |         self.beta_mf = self.beta_0 + tot
106 | 
107 |         # initialize Gibbs
108 |         self.p = self.alpha_mf / (self.alpha_mf + self.beta_mf)
109 | 
110 |     def get_vlb(self):
111 |         warn('untested')
112 |         Elnp, Eln1mp = self._expected_statistics(self.alpha_mf,self.beta_mf)
113 |         return (self.alpha_0 - self.alpha_mf)*Elnp \
114 |                 + (self.beta_0 - self.beta_mf)*Eln1mp \
115 |                 - (self._log_partition_function(self.alpha_0,self.beta_0)
116 |                         - self._log_partition_function(self.alpha_mf,self.beta_mf))
117 | 
118 |     def expected_log_likelihood(self,x):
119 |         warn('untested')
120 |         Elnp, Eln1mp = self._expected_statistics(self.alpha_mf,self.beta_mf)
121 |         return (x-1)*Eln1mp + Elnp1mp
122 | 
123 |     def _expected_statistics(self,alpha,beta):
124 |         warn('untested')
125 |         Elnp = special.digamma(alpha) - special.digamma(alpha+beta)
126 |         Eln1mp = special.digamma(beta) - special.digamma(alpha+beta)
127 |         return Elnp, Eln1mp
128 | 
129 |     ### Max likelihood
130 | 
131 |     def max_likelihood(self,data,weights=None):
132 |         if weights is None:
133 |             n, tot = self._get_statistics(data)
134 |         else:
135 |             n, tot = self._get_weighted_statistics(data,weights)
136 | 
137 |         self.p = n/tot
138 |         return self
139 | 
140 |     ### Collapsed
141 | 
142 |     def log_marginal_likelihood(self,data):
143 |         return self._log_partition_function(*self._posterior_hypparams(*self._get_statistics(data))) \
144 |             - self._log_partition_function(self.alpha_0,self.beta_0)
145 | 
146 |     def _log_partition_function(self,alpha,beta):
147 |         return special.betaln(alpha,beta)
148 | 


--------------------------------------------------------------------------------
/pybasicbayes/distributions/meta.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from builtins import zip
  3 | from builtins import range
  4 | __all__ = ['_FixedParamsMixin', 'ProductDistribution']
  5 | 
  6 | import numpy as np
  7 | 
  8 | from pybasicbayes.abstractions import Distribution, \
  9 |     GibbsSampling, MeanField, MeanFieldSVI, MaxLikelihood
 10 | from pybasicbayes.util.stats import atleast_2d
 11 | 
 12 | 
 13 | class _FixedParamsMixin(Distribution):
 14 |     @property
 15 |     def num_parameters(self):
 16 |         return 0
 17 | 
 18 |     def resample(self, *args, **kwargs):
 19 |         return self
 20 | 
 21 |     def meanfieldupdate(self, *args, **kwargs):
 22 |         return self
 23 | 
 24 |     def get_vlb(self):
 25 |         return 0.
 26 | 
 27 |     def copy_sample(self):
 28 |         return self
 29 | 
 30 | 
 31 | class ProductDistribution(
 32 |         GibbsSampling, MeanField, MeanFieldSVI, MaxLikelihood):
 33 |     def __init__(self, distns, slices=None):
 34 |         self._distns = distns
 35 |         self._slices = slices if slices is not None else \
 36 |             [slice(i, i+1) for i in range(len(distns))]
 37 | 
 38 |     @property
 39 |     def params(self):
 40 |         return {idx:distn.params for idx, distn in enumerate(self._distns)}
 41 | 
 42 |     @property
 43 |     def hypparams(self):
 44 |         return {idx:distn.hypparams for idx, distn in enumerate(self._distns)}
 45 | 
 46 |     @property
 47 |     def num_parameters(self):
 48 |         return sum(d.num_parameters for d in self._distns)
 49 | 
 50 |     def rvs(self,size=[]):
 51 |         return np.concatenate(
 52 |             [atleast_2d(distn.rvs(size=size))
 53 |              for distn in self._distns],axis=-1)
 54 | 
 55 |     def log_likelihood(self,x):
 56 |         return sum(
 57 |             distn.log_likelihood(x[...,sl])
 58 |             for distn,sl in zip(self._distns,self._slices))
 59 | 
 60 |     ### Gibbs
 61 | 
 62 |     def resample(self,data=[]):
 63 |         assert isinstance(data,(np.ndarray,list))
 64 |         if isinstance(data,np.ndarray):
 65 |             for distn,sl in zip(self._distns,self._slices):
 66 |                 distn.resample(data[...,sl])
 67 |         else:
 68 |             for distn,sl in zip(self._distns,self._slices):
 69 |                 distn.resample([d[...,sl] for d in data])
 70 |         return self
 71 | 
 72 |     ### Max likelihood
 73 | 
 74 |     def max_likelihood(self,data,weights=None):
 75 |         assert isinstance(data,(np.ndarray,list))
 76 |         if isinstance(data,np.ndarray):
 77 |             for distn,sl in zip(self._distns,self._slices):
 78 |                 distn.max_likelihood(data[...,sl],weights=weights)
 79 |         else:
 80 |             for distn,sl in zip(self._distns,self._slices):
 81 |                 distn.max_likelihood([d[...,sl] for d in data],weights=weights)
 82 |         return self
 83 | 
 84 |     ### Mean field
 85 | 
 86 |     def get_vlb(self):
 87 |         return sum(distn.get_vlb() for distn in self._distns)
 88 | 
 89 |     def expected_log_likelihood(self,x):
 90 |         return np.sum(
 91 |             [distn.expected_log_likelihood(x[...,sl])
 92 |              for distn,sl in zip(self._distns,self._slices)], axis=0).ravel()
 93 | 
 94 |     def meanfieldupdate(self,data,weights,**kwargs):
 95 |         assert isinstance(data,(np.ndarray,list))
 96 |         if isinstance(data,np.ndarray):
 97 |             for distn,sl in zip(self._distns,self._slices):
 98 |                 distn.meanfieldupdate(data[...,sl],weights)
 99 |         else:
100 |             for distn,sl in zip(self._distns,self._slices):
101 |                 distn.meanfieldupdate(
102 |                     [d[...,sl] for d in data],weights=weights)
103 |         return self
104 | 
105 |     def _resample_from_mf(self):
106 |         for distn in self._distns:
107 |             distn._resample_from_mf()
108 | 
109 |     ### SVI
110 | 
111 |     def meanfield_sgdstep(self,data,weights,prob,stepsize):
112 |         assert isinstance(data,(np.ndarray,list))
113 |         if isinstance(data,np.ndarray):
114 |             for distn,sl in zip(self._distns,self._slices):
115 |                 distn.meanfield_sgdstep(
116 |                     data[...,sl],weights,prob,stepsize)
117 |         else:
118 |             for distn,sl in zip(self._distns,self._slices):
119 |                 distn.meanfield_sgdstep(
120 |                     [d[...,sl] for d in data],weights,prob,stepsize)
121 |         return self
122 | 


--------------------------------------------------------------------------------
/pybasicbayes/distributions/multinomial.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from builtins import zip
  3 | from builtins import map
  4 | from builtins import range
  5 | __all__ = ['Categorical', 'CategoricalAndConcentration', 'Multinomial',
  6 |            'MultinomialAndConcentration', 'GammaCompoundDirichlet', 'CRP']
  7 | 
  8 | import numpy as np
  9 | from warnings import warn
 10 | import scipy.stats as stats
 11 | import scipy.special as special
 12 | 
 13 | from pybasicbayes.abstractions import \
 14 |     GibbsSampling, MeanField, MeanFieldSVI, MaxLikelihood, MAP
 15 | 
 16 | from pybasicbayes.util.stats import sample_discrete
 17 | 
 18 | try:
 19 |     from pybasicbayes.util.cstats import sample_crp_tablecounts
 20 | except ImportError:
 21 |     warn('using slow sample_crp_tablecounts')
 22 |     from pybasicbayes.util.stats import sample_crp_tablecounts
 23 | 
 24 | 
 25 | class Categorical(GibbsSampling, MeanField, MeanFieldSVI, MaxLikelihood, MAP):
 26 |     '''
 27 |     This class represents a categorical distribution over labels, where the
 28 |     parameter is weights and the prior is a Dirichlet distribution.
 29 |     For example, if K == 3, then five samples may look like
 30 |         [0,1,0,2,1]
 31 |     Each entry is the label of a sample, like the outcome of die rolls. In other
 32 |     words, generated data or data passed to log_likelihood are indices, not
 33 |     indicator variables!  (But when 'weighted data' is passed, like in mean
 34 |     field or weighted max likelihood, the weights are over indicator
 35 |     variables...)
 36 | 
 37 |     This class can be used as a weak limit approximation for a DP, particularly by
 38 |     calling __init__ with alpha_0 and K arguments, in which case the prior will be
 39 |     a symmetric Dirichlet with K components and parameter alpha_0/K; K is then the
 40 |     weak limit approximation parameter.
 41 | 
 42 |     Hyperparaemters:
 43 |         alphav_0 (vector) OR alpha_0 (scalar) and K
 44 | 
 45 |     Parameters:
 46 |         weights, a vector encoding a finite pmf
 47 |     '''
 48 |     def __init__(self,weights=None,alpha_0=None,K=None,alphav_0=None,alpha_mf=None):
 49 |         self.K = K
 50 |         self.alpha_0 = alpha_0
 51 |         self.alphav_0 = alphav_0
 52 | 
 53 |         self._alpha_mf = alpha_mf if alpha_mf is not None else self.alphav_0
 54 | 
 55 |         self.weights = weights
 56 | 
 57 |         if weights is None and self.alphav_0 is not None:
 58 |             self.resample()  # intialize from prior
 59 | 
 60 |     def _get_alpha_0(self):
 61 |         return self._alpha_0
 62 | 
 63 |     def _set_alpha_0(self,alpha_0):
 64 |         self._alpha_0 = alpha_0
 65 |         if not any(_ is None for _ in (self.K, self._alpha_0)):
 66 |             self.alphav_0 = np.repeat(self._alpha_0/self.K,self.K)
 67 | 
 68 |     alpha_0 = property(_get_alpha_0,_set_alpha_0)
 69 | 
 70 |     def _get_alphav_0(self):
 71 |         return self._alphav_0 if hasattr(self,'_alphav_0') else None
 72 | 
 73 |     def _set_alphav_0(self,alphav_0):
 74 |         if alphav_0 is not None:
 75 |             self._alphav_0 = alphav_0
 76 |             self.K = len(alphav_0)
 77 | 
 78 |     alphav_0 = property(_get_alphav_0,_set_alphav_0)
 79 | 
 80 |     @property
 81 |     def params(self):
 82 |         return dict(weights=self.weights)
 83 | 
 84 |     @property
 85 |     def hypparams(self):
 86 |         return dict(alphav_0=self.alphav_0)
 87 | 
 88 |     @property
 89 |     def num_parameters(self):
 90 |         return len(self.weights)
 91 | 
 92 |     def rvs(self,size=None):
 93 |         return sample_discrete(self.weights,size)
 94 | 
 95 |     def log_likelihood(self,x):
 96 |         out = np.zeros_like(x, dtype=np.double)
 97 |         nanidx = np.isnan(x)
 98 |         err = np.seterr(divide='ignore')
 99 |         out[~nanidx] = np.log(self.weights)[list(x[~nanidx])]  # log(0) can happen, no warning
100 |         np.seterr(**err)
101 |         return out
102 | 
103 |     ### Gibbs sampling
104 | 
105 |     def resample(self,data=[],counts=None):
106 |         counts = self._get_statistics(data) if counts is None else counts
107 |         self.weights = np.random.dirichlet(self.alphav_0 + counts)
108 |         np.clip(self.weights, np.spacing(1.), np.inf, out=self.weights)
109 |         # NOTE: next line is so we can use Gibbs sampling to initialize mean field
110 |         self._alpha_mf = self.weights * self.alphav_0.sum()
111 |         assert (self._alpha_mf >= 0.).all()
112 |         return self
113 | 
114 |     def _get_statistics(self,data,K=None):
115 |         K = K if K else self.K
116 |         if isinstance(data,np.ndarray) or \
117 |                 (isinstance(data,list) and len(data) > 0
118 |                  and not isinstance(data[0],(np.ndarray,list))):
119 |             counts = np.bincount(data,minlength=K)
120 |         else:
121 |             counts = sum(np.bincount(d,minlength=K) for d in data)
122 |         return counts
123 | 
124 |     def _get_weighted_statistics(self,data,weights):
125 |         if isinstance(weights,np.ndarray):
126 |             assert weights.ndim in (1,2)
127 |             if data is None or weights.ndim == 2:
128 |                 # when weights is 2D or data is None, the weights are expected
129 |                 # indicators and data is just a placeholder; nominally data
130 |                 # should be np.arange(K)[na,:].repeat(N,axis=0)
131 |                 counts = np.atleast_2d(weights).sum(0)
132 |             else:
133 |                 # when weights is 1D, data is indices and we do a weighted
134 |                 # bincount
135 |                 counts = np.bincount(data,weights,minlength=self.K)
136 |         else:
137 |             if len(weights) == 0:
138 |                 counts = np.zeros(self.K,dtype=int)
139 |             else:
140 |                 data = data if data else [None]*len(weights)
141 |                 counts = sum(self._get_weighted_statistics(d,w)
142 |                              for d, w in zip(data,weights))
143 |         return counts
144 | 
145 |     ### Mean Field
146 | 
147 |     def meanfieldupdate(self,data,weights):
148 |         # update
149 |         self._alpha_mf = self.alphav_0 + self._get_weighted_statistics(data,weights)
150 |         self.weights = self._alpha_mf / self._alpha_mf.sum()  # for plotting
151 |         assert (self._alpha_mf > 0.).all()
152 |         return self
153 | 
154 |     def get_vlb(self):
155 |         # return avg energy plus entropy, our contribution to the vlb
156 |         # see Eq. 10.66 in Bishop
157 |         logpitilde = self.expected_log_likelihood()  # default is on np.arange(self.K)
158 |         q_entropy = -1* (
159 |             (logpitilde*(self._alpha_mf-1)).sum()
160 |             + special.gammaln(self._alpha_mf.sum()) - special.gammaln(self._alpha_mf).sum())
161 |         p_avgengy = special.gammaln(self.alphav_0.sum()) - special.gammaln(self.alphav_0).sum() \
162 |             + ((self.alphav_0-1)*logpitilde).sum()
163 | 
164 |         return p_avgengy + q_entropy
165 | 
166 |     def expected_log_likelihood(self,x=None):
167 |         # usually called when np.all(x == np.arange(self.K))
168 |         x = x if x is not None else slice(None)
169 |         return special.digamma(self._alpha_mf[x]) - special.digamma(self._alpha_mf.sum())
170 | 
171 |     ### Mean Field SGD
172 | 
173 |     def meanfield_sgdstep(self,data,weights,prob,stepsize):
174 |         self._alpha_mf = \
175 |             (1-stepsize) * self._alpha_mf + stepsize * (
176 |                 self.alphav_0
177 |                 + 1./prob * self._get_weighted_statistics(data,weights))
178 |         self.weights = self._alpha_mf / self._alpha_mf.sum()  # for plotting
179 |         return self
180 | 
181 |     def _resample_from_mf(self):
182 |         self.weights = np.random.dirichlet(self._alpha_mf)
183 | 
184 |     ### Max likelihood
185 | 
186 |     def max_likelihood(self,data,weights=None):
187 |         if weights is None:
188 |             counts = self._get_statistics(data)
189 |         else:
190 |             counts = self._get_weighted_statistics(data,weights)
191 |         self.weights = counts/counts.sum()
192 |         return self
193 | 
194 |     def MAP(self,data,weights=None):
195 |         if weights is None:
196 |             counts = self._get_statistics(data)
197 |         else:
198 |             counts = self._get_weighted_statistics(data,weights)
199 |         counts += self.alphav_0
200 |         self.weights = counts/counts.sum()
201 |         return self
202 | 
203 | 
204 | class CategoricalAndConcentration(Categorical):
205 |     '''
206 |     Categorical with resampling of the symmetric Dirichlet concentration
207 |     parameter.
208 | 
209 |         concentration ~ Gamma(a_0,b_0)
210 | 
211 |     The Dirichlet prior over pi is then
212 | 
213 |         pi ~ Dir(concentration/K)
214 |     '''
215 |     def __init__(self,a_0,b_0,K,alpha_0=None,weights=None):
216 |         self.alpha_0_obj = GammaCompoundDirichlet(a_0=a_0,b_0=b_0,K=K,concentration=alpha_0)
217 |         super(CategoricalAndConcentration,self).__init__(alpha_0=self.alpha_0,
218 |                 K=K,weights=weights)
219 | 
220 |     def _get_alpha_0(self):
221 |         return self.alpha_0_obj.concentration
222 | 
223 |     def _set_alpha_0(self,alpha_0):
224 |         self.alpha_0_obj.concentration = alpha_0
225 |         self.alphav_0 = np.repeat(alpha_0/self.K,self.K)
226 | 
227 |     alpha_0 = property(_get_alpha_0, _set_alpha_0)
228 | 
229 |     @property
230 |     def params(self):
231 |         return dict(alpha_0=self.alpha_0,weights=self.weights)
232 | 
233 |     @property
234 |     def hypparams(self):
235 |         return dict(a_0=self.a_0,b_0=self.b_0,K=self.K)
236 | 
237 |     def resample(self,data=[]):
238 |         counts = self._get_statistics(data,self.K)
239 |         self.alpha_0_obj.resample(counts)
240 |         self.alpha_0 = self.alpha_0  # for the effect on alphav_0
241 |         return super(CategoricalAndConcentration,self).resample(data)
242 | 
243 |     def resample_just_weights(self,data=[]):
244 |         return super(CategoricalAndConcentration,self).resample(data)
245 | 
246 |     def meanfieldupdate(self,*args,**kwargs): # TODO
247 |         warn('MeanField not implemented for %s; concentration parameter will stay fixed')
248 |         return super(CategoricalAndConcentration,self).meanfieldupdate(*args,**kwargs)
249 | 
250 |     def max_likelihood(self,*args,**kwargs):
251 |         raise NotImplementedError
252 | 
253 | 
254 | class Multinomial(Categorical):
255 |     '''
256 |     Like Categorical but the data are counts, so _get_statistics is overridden
257 |     (though _get_weighted_statistics can stay the same!). log_likelihood also
258 |     changes since, just like for the binomial special case, we sum over all
259 |     possible orderings.
260 | 
261 |     For example, if K == 3, then a sample with n=5 might be
262 |         array([2,2,1])
263 | 
264 |     A Poisson process conditioned on the number of points emitted.
265 |     '''
266 |     def __init__(self,weights=None,alpha_0=None,K=None,alphav_0=None,alpha_mf=None,
267 |                  N=1):
268 |         self.N = N
269 |         super(Multinomial, self).__init__(weights,alpha_0,K,alphav_0,alpha_mf)
270 | 
271 |     def log_likelihood(self,x):
272 |         assert isinstance(x,np.ndarray) and x.ndim == 2 and x.shape[1] == self.K
273 |         return np.where(x,x*np.log(self.weights),0.).sum(1) \
274 |             + special.gammaln(x.sum(1)+1) - special.gammaln(x+1).sum(1)
275 | 
276 |     def rvs(self,size=None,N=None):
277 |         N = N if N else self.N
278 |         return np.random.multinomial(N, self.weights, size=size)
279 | 
280 |     def _get_statistics(self,data,K=None):
281 |         K = K if K else self.K
282 |         if isinstance(data,np.ndarray):
283 |             return np.atleast_2d(data).sum(0)
284 |         else:
285 |             if len(data) == 0:
286 |                 return np.zeros(K,dtype=int)
287 |             return np.concatenate(data).sum(0)
288 | 
289 |     def expected_log_likelihood(self,x=None):
290 |         if x is not None and (not x.ndim == 2 or not np.all(x == np.eye(x.shape[0]))):
291 |             raise NotImplementedError # TODO nontrivial expected log likelihood
292 |         return super(Multinomial,self).expected_log_likelihood()
293 | 
294 | 
295 | class MultinomialAndConcentration(CategoricalAndConcentration,Multinomial):
296 |     pass
297 | 
298 | 
299 | class CRP(GibbsSampling):
300 |     '''
301 |     concentration ~ Gamma(a_0,b_0) [b_0 is inverse scale, inverse of numpy scale arg]
302 |     rvs ~ CRP(concentration)
303 | 
304 |     This class models CRPs. The parameter is the concentration parameter (proportional
305 |     to probability of starting a new table given some number of customers in the
306 |     restaurant), which has a Gamma prior.
307 |     '''
308 | 
309 |     def __init__(self,a_0,b_0,concentration=None):
310 |         self.a_0 = a_0
311 |         self.b_0 = b_0
312 | 
313 |         if concentration is not None:
314 |             self.concentration = concentration
315 |         else:
316 |             self.resample(niter=1)
317 | 
318 |     @property
319 |     def params(self):
320 |         return dict(concentration=self.concentration)
321 | 
322 |     @property
323 |     def hypparams(self):
324 |         return dict(a_0=self.a_0,b_0=self.b_0)
325 | 
326 |     def rvs(self,customer_counts):
327 |         # could replace this with one of the faster C versions I have lying
328 |         # around, but at least the Python version is clearer
329 |         assert isinstance(customer_counts,list) or isinstance(customer_counts,int)
330 |         if isinstance(customer_counts,int):
331 |             customer_counts = [customer_counts]
332 | 
333 |         restaurants = []
334 |         for num in customer_counts:
335 |             # a CRP with num customers
336 |             tables = []
337 |             for c in range(num):
338 |                 newidx = sample_discrete(np.array(tables + [self.concentration]))
339 |                 if newidx == len(tables):
340 |                     tables += [1]
341 |                 else:
342 |                     tables[newidx] += 1
343 | 
344 |             restaurants.append(tables)
345 | 
346 |         return restaurants if len(restaurants) > 1 else restaurants[0]
347 | 
348 |     def log_likelihood(self,restaurants):
349 |         assert isinstance(restaurants,list) and len(restaurants) > 0
350 |         if not isinstance(restaurants[0],list): restaurants=[restaurants]
351 | 
352 |         likes = []
353 |         for counts in restaurants:
354 |             counts = np.array([c for c in counts if c > 0])    # remove zero counts b/c of gammaln
355 |             K = len(counts) # number of tables
356 |             N = sum(counts) # number of customers
357 |             likes.append(K*np.log(self.concentration) + np.sum(special.gammaln(counts)) +
358 |                             special.gammaln(self.concentration) -
359 |                             special.gammaln(N+self.concentration))
360 | 
361 |         return np.asarray(likes) if len(likes) > 1 else likes[0]
362 | 
363 |     def resample(self,data=[],niter=50):
364 |         for itr in range(niter):
365 |             a_n, b_n = self._posterior_hypparams(*self._get_statistics(data))
366 |             self.concentration = np.random.gamma(a_n,scale=1./b_n)
367 | 
368 |     def _posterior_hypparams(self,sample_numbers,total_num_distinct):
369 |         # NOTE: this is a stochastic function: it samples auxiliary variables
370 |         if total_num_distinct > 0:
371 |             sample_numbers = np.array(sample_numbers)
372 |             sample_numbers = sample_numbers[sample_numbers > 0]
373 | 
374 |             wvec = np.random.beta(self.concentration+1,sample_numbers)
375 |             svec = np.array(stats.bernoulli.rvs(sample_numbers/(sample_numbers+self.concentration)))
376 |             return self.a_0 + total_num_distinct-svec.sum(), (self.b_0 - np.log(wvec).sum())
377 |         else:
378 |             return self.a_0, self.b_0
379 |         return self
380 | 
381 |     def _get_statistics(self,data):
382 |         assert isinstance(data,list)
383 |         if len(data) == 0:
384 |             sample_numbers = 0
385 |             total_num_distinct = 0
386 |         else:
387 |             if isinstance(data[0],list):
388 |                 sample_numbers = np.array(list(map(sum,data)))
389 |                 total_num_distinct = sum(map(len,data))
390 |             else:
391 |                 sample_numbers = np.array(sum(data))
392 |                 total_num_distinct = len(data)
393 | 
394 |         return sample_numbers, total_num_distinct
395 | 
396 | 
397 | class GammaCompoundDirichlet(CRP):
398 |     # TODO this class is a bit ugly
399 |     '''
400 |     Implements a Gamma(a_0,b_0) prior over finite dirichlet concentration
401 |     parameter. The concentration is scaled according to the weak-limit sequence.
402 | 
403 |     For each set of counts i, the model is
404 |         concentration ~ Gamma(a_0,b_0)
405 |         pi_i ~ Dir(concentration/K)
406 |         data_i ~ Multinomial(pi_i)
407 | 
408 |     K is a free parameter in that with big enough K (relative to the size of the
409 |     sampled data) everything starts to act like a DP; K is just the size of the
410 |     size of the mesh projection.
411 |     '''
412 |     def __init__(self,K,a_0,b_0,concentration=None):
413 |         self.K = K
414 |         super(GammaCompoundDirichlet,self).__init__(a_0=a_0,b_0=b_0,
415 |                 concentration=concentration)
416 | 
417 |     @property
418 |     def params(self):
419 |         return dict(concentration=self.concentration)
420 | 
421 |     @property
422 |     def hypparams(self):
423 |         return dict(a_0=self.a_0,b_0=self.b_0,K=self.K)
424 | 
425 |     def rvs(self, sample_counts=None, size=None):
426 |         if sample_counts is None:
427 |             sample_counts = size
428 |         if isinstance(sample_counts,int):
429 |             sample_counts = [sample_counts]
430 |         out = np.empty((len(sample_counts),self.K),dtype=int)
431 |         for idx,c in enumerate(sample_counts):
432 |             out[idx] = np.random.multinomial(c,
433 |                 np.random.dirichlet(np.repeat(self.concentration/self.K,self.K)))
434 |         return out if out.shape[0] > 1 else out[0]
435 | 
436 |     def resample(self,data=[],niter=50,weighted_cols=None):
437 |         if weighted_cols is not None:
438 |             self.weighted_cols = weighted_cols
439 |         else:
440 |             self.weighted_cols = np.ones(self.K)
441 | 
442 |         # all this is to check if data is empty
443 |         if isinstance(data,np.ndarray):
444 |             size = data.sum()
445 |         elif isinstance(data,list):
446 |             size = sum(d.sum() for d in data)
447 |         else:
448 |             assert data == 0
449 |             size = 0
450 | 
451 |         if size > 0:
452 |             return super(GammaCompoundDirichlet,self).resample(data,niter=niter)
453 |         else:
454 |             return super(GammaCompoundDirichlet,self).resample(data,niter=1)
455 | 
456 |     def _get_statistics(self,data):
457 |         # NOTE: this is a stochastic function: it samples auxiliary variables
458 |         counts = np.array(data,ndmin=2,order='C')
459 | 
460 |         # sample m's, which sample an inverse of the weak limit projection
461 |         if counts.sum() == 0:
462 |             return 0, 0
463 |         else:
464 |             m = sample_crp_tablecounts(self.concentration,counts,self.weighted_cols)
465 |             return counts.sum(1), m.sum()
466 | 
467 |     def _get_statistics_python(self,data):
468 |         counts = np.array(data,ndmin=2)
469 | 
470 |         # sample m's
471 |         if counts.sum() == 0:
472 |             return 0, 0
473 |         else:
474 |             m = 0
475 |             for (i,j), n in np.ndenumerate(counts):
476 |                 m += (np.random.rand(n) < self.concentration*self.K*self.weighted_cols[j] \
477 |                         / (np.arange(n)+self.concentration*self.K*self.weighted_cols[j])).sum()
478 |             return counts.sum(1), m
479 | 
480 | 


--------------------------------------------------------------------------------
/pybasicbayes/distributions/negativebinomial.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from builtins import zip
  3 | from builtins import range
  4 | from builtins import object
  5 | __all__ = [
  6 |     'NegativeBinomial', 'NegativeBinomialFixedR', 'NegativeBinomialIntegerR2',
  7 |     'NegativeBinomialIntegerR', 'NegativeBinomialFixedRVariant',
  8 |     'NegativeBinomialIntegerRVariant', 'NegativeBinomialIntegerRVariant',
  9 |     'NegativeBinomialIntegerR2Variant']
 10 | 
 11 | import numpy as np
 12 | from numpy import newaxis as na
 13 | import scipy.special as special
 14 | from scipy.special import logsumexp
 15 | from warnings import warn
 16 | 
 17 | from pybasicbayes.abstractions import Distribution, GibbsSampling, \
 18 |     MeanField, MeanFieldSVI, MaxLikelihood
 19 | from pybasicbayes.util.stats import getdatasize, flattendata, \
 20 |     sample_discrete_from_log, sample_discrete, atleast_2d
 21 | 
 22 | try:
 23 |     from pybasicbayes.util.cstats import sample_crp_tablecounts
 24 | except ImportError:
 25 |     warn('using slow sample_crp_tablecounts')
 26 |     from pybasicbayes.util.stats import sample_crp_tablecounts
 27 | 
 28 | 
 29 | class _NegativeBinomialBase(Distribution):
 30 |     '''
 31 |     Negative Binomial distribution with a conjugate beta prior on p and a
 32 |     separate gamma prior on r. The parameter r does not need to be an integer.
 33 |     If r is an integer, then x ~ NegBin(r,p) is the same as
 34 |     x = np.random.geometric(1-p,size=r).sum() - r
 35 |     where r is subtracted to make the geometric support be {0,1,2,...}
 36 |     Mean is r*p/(1-p), var is r*p/(1-p)**2
 37 | 
 38 |     Uses the data augemntation sampling method from Zhou et al. ICML 2012
 39 | 
 40 |     NOTE: the support is {0,1,2,...}.
 41 | 
 42 |     Hyperparameters:
 43 |         k_0, theta_0: r ~ Gamma(k, theta)
 44 |                       or r = np.random.gamma(k,theta)
 45 |         alpha_0, beta_0: p ~ Beta(alpha,beta)
 46 |                       or p = np.random.beta(alpha,beta)
 47 | 
 48 |     Parameters:
 49 |         r
 50 |         p
 51 |     '''
 52 |     def __init__(self,r=None,p=None,k_0=None,theta_0=None,alpha_0=None,beta_0=None):
 53 |         self.r = r
 54 |         self.p = p
 55 | 
 56 |         self.k_0 = k_0
 57 |         self.theta_0 = theta_0
 58 |         self.alpha_0 = alpha_0
 59 |         self.beta_0 = beta_0
 60 | 
 61 |         if r is p is None and not any(_ is None for _ in (k_0,theta_0,alpha_0,beta_0)):
 62 |             self.resample() # intialize from prior
 63 | 
 64 |     @property
 65 |     def params(self):
 66 |         return dict(r=self.r,p=self.p)
 67 | 
 68 |     @property
 69 |     def hypparams(self):
 70 |         return dict(k_0=self.k_0,theta_0=self.theta_0,
 71 |                 alpha_0=self.alpha_0,beta_0=self.beta_0)
 72 | 
 73 |     def log_likelihood(self,x,r=None,p=None):
 74 |         r = r if r is not None else self.r
 75 |         p = p if p is not None else self.p
 76 |         x = np.array(x,ndmin=1)
 77 | 
 78 |         if self.p > 0:
 79 |             xnn = x[x >= 0]
 80 |             raw = np.empty(x.shape)
 81 |             raw[x>=0] = special.gammaln(r + xnn) - special.gammaln(r) \
 82 |                     - special.gammaln(xnn+1) + r*np.log(1-p) + xnn*np.log(p)
 83 |             raw[x<0] = -np.inf
 84 |             return raw if isinstance(x,np.ndarray) else raw[0]
 85 |         else:
 86 |             raw = np.log(np.zeros(x.shape))
 87 |             raw[x == 0] = 0.
 88 |             return raw if isinstance(x,np.ndarray) else raw[0]
 89 | 
 90 |     def log_sf(self,x):
 91 |         scalar = not isinstance(x,np.ndarray)
 92 |         x = np.atleast_1d(x)
 93 |         errs = np.seterr(divide='ignore')
 94 |         ret = np.log(special.betainc(x+1,self.r,self.p))
 95 |         np.seterr(**errs)
 96 |         ret[x < 0] = np.log(1.)
 97 |         if scalar:
 98 |             return ret[0]
 99 |         else:
100 |             return ret
101 | 
102 |     def rvs(self,size=None):
103 |         return np.random.poisson(np.random.gamma(self.r,self.p/(1-self.p),size=size))
104 | 
105 | class NegativeBinomial(_NegativeBinomialBase, GibbsSampling):
106 |     def resample(self,data=[],niter=20):
107 |         if getdatasize(data) == 0:
108 |             self.p = np.random.beta(self.alpha_0,self.beta_0)
109 |             self.r = np.random.gamma(self.k_0,self.theta_0)
110 |         else:
111 |             data = atleast_2d(flattendata(data))
112 |             N = len(data)
113 |             for itr in range(niter):
114 |                 ### resample r
115 |                 msum = sample_crp_tablecounts(self.r,data).sum()
116 |                 self.r = np.random.gamma(self.k_0 + msum, 1/(1/self.theta_0 - N*np.log(1-self.p)))
117 |                 ### resample p
118 |                 self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r)
119 |         return self
120 | 
121 |     def resample_python(self,data=[],niter=20):
122 |         if getdatasize(data) == 0:
123 |             self.p = np.random.beta(self.alpha_0,self.beta_0)
124 |             self.r = np.random.gamma(self.k_0,self.theta_0)
125 |         else:
126 |             data = flattendata(data)
127 |             N = len(data)
128 |             for itr in range(niter):
129 |                 ### resample r
130 |                 msum = 0.
131 |                 for n in data:
132 |                     msum += (np.random.rand(n) < self.r/(np.arange(n)+self.r)).sum()
133 |                 self.r = np.random.gamma(self.k_0 + msum, 1/(1/self.theta_0 - N*np.log(1-self.p)))
134 |                 ### resample p
135 |                 self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r)
136 |         return self
137 | 
138 |     ### OLD unused alternatives
139 | 
140 |     def resample_logseriesaug(self,data=[],niter=20):
141 |         # an alternative algorithm, kind of opaque and no advantages...
142 |         if getdatasize(data) == 0:
143 |             self.p = np.random.beta(self.alpha_0,self.beta_0)
144 |             self.r = np.random.gamma(self.k_0,self.theta_0)
145 |         else:
146 |             data = flattendata(data)
147 |             N = data.shape[0]
148 |             logF = self.logF
149 |             L_i = np.zeros(N)
150 |             data_nz = data[data > 0]
151 |             for itr in range(niter):
152 |                 logR = np.arange(1,logF.shape[1]+1)*np.log(self.r) + logF
153 |                 L_i[data > 0] = sample_discrete_from_log(logR[data_nz-1,:data_nz.max()],axis=1)+1
154 |                 self.r = np.random.gamma(self.k_0 + L_i.sum(), 1/(1/self.theta_0 - np.log(1-self.p)*N))
155 |                 self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r)
156 |         return self
157 | 
158 |     @classmethod
159 |     def _set_up_logF(cls):
160 |         if not hasattr(cls,'logF'):
161 |             # actually indexes logF[0,0] to correspond to log(F(1,1)) in Zhou
162 |             # paper, but keeps track of that alignment with the other code!
163 |             # especially arange(1,...), only using nonzero data and shifting it
164 |             SIZE = 500
165 | 
166 |             logF = -np.inf * np.ones((SIZE,SIZE))
167 |             logF[0,0] = 0.
168 |             for m in range(1,logF.shape[0]):
169 |                 prevrow = np.exp(logF[m-1] - logF[m-1].max())
170 |                 logF[m] = np.log(np.convolve(prevrow,[0,m,1],'same')) + logF[m-1].max()
171 |             cls.logF = logF
172 | 
173 | 
174 | class NegativeBinomialFixedR(_NegativeBinomialBase, GibbsSampling, MeanField, MeanFieldSVI, MaxLikelihood):
175 |     def __init__(self,r=None,p=None,alpha_0=None,beta_0=None,alpha_mf=None,beta_mf=None):
176 |         self.p = p
177 | 
178 |         self.r = r
179 | 
180 |         self.alpha_0 = alpha_0
181 |         self.beta_0 = beta_0
182 | 
183 |         if p is None and not any(_ is None for _ in (alpha_0,beta_0)):
184 |             self.resample() # intialize from prior
185 | 
186 |         if not any(_ is None for _ in (alpha_mf,beta_mf)):
187 |             self.alpha_mf = alpha_mf
188 |             self.beta_mf = beta_mf
189 | 
190 |     @property
191 |     def hypparams(self):
192 |         return dict(alpha_0=self.alpha_0,beta_0=self.beta_0)
193 | 
194 |     @property
195 |     def natural_hypparam(self):
196 |         return np.array([self.alpha_0,self.beta_0]) - 1
197 | 
198 |     @natural_hypparam.setter
199 |     def natural_hypparam(self,natparam):
200 |         self.alpha_0, self.beta_0 = natparam + 1
201 | 
202 |     ### Mean Field
203 | 
204 |     def _resample_from_mf(self):
205 |         self.p = np.random.beta(self.alpha_mf,self.beta_mf)
206 |         return self
207 | 
208 |     def meanfieldupdate(self,data,weights):
209 |         self.alpha_mf, self.beta_mf = \
210 |                 self._posterior_hypparams(*self._get_weighted_statistics(data,weights))
211 |         self.p = self.alpha_mf / (self.alpha_mf + self.beta_mf)
212 | 
213 |     def meanfield_sgdstep(self,data,weights,prob,stepsize):
214 |         alpha_new, beta_new = \
215 |                 self._posterior_hypparams(*(
216 |                     1./prob * self._get_weighted_statistics(data,weights)))
217 |         self.alpha_mf = (1-stepsize)*self.alpha_mf + stepsize*alpha_new
218 |         self.beta_mf = (1-stepsize)*self.beta_mf + stepsize*beta_new
219 |         self.p = self.alpha_mf / (self.alpha_mf + self.beta_mf)
220 | 
221 |     def get_vlb(self):
222 |         Elnp, Eln1mp = self._mf_expected_statistics()
223 |         p_avgengy = (self.alpha_0-1)*Elnp + (self.beta_0-1)*Eln1mp \
224 |                 - (special.gammaln(self.alpha_0) + special.gammaln(self.beta_0)
225 |                         - special.gammaln(self.alpha_0 + self.beta_0))
226 |         q_entropy = special.betaln(self.alpha_mf,self.beta_mf) \
227 |                 - (self.alpha_mf-1)*special.digamma(self.alpha_mf) \
228 |                 - (self.beta_mf-1)*special.digamma(self.beta_mf) \
229 |                 + (self.alpha_mf+self.beta_mf-2)*special.digamma(self.alpha_mf+self.beta_mf)
230 |         return p_avgengy + q_entropy
231 | 
232 |     def _mf_expected_statistics(self):
233 |         Elnp, Eln1mp = special.digamma([self.alpha_mf,self.beta_mf]) \
234 |                         - special.digamma(self.alpha_mf + self.beta_mf)
235 |         return Elnp, Eln1mp
236 | 
237 |     def expected_log_likelihood(self,x):
238 |         Elnp, Eln1mp = self._mf_expected_statistics()
239 |         x = np.atleast_1d(x)
240 |         errs = np.seterr(invalid='ignore')
241 |         out = x*Elnp + self.r*Eln1mp + self._log_base_measure(x,self.r)
242 |         np.seterr(**errs)
243 |         out[np.isnan(out)] = -np.inf
244 |         return out if out.shape[0] > 1 else out[0]
245 | 
246 |     @staticmethod
247 |     def _log_base_measure(x,r):
248 |         return special.gammaln(x+r) - special.gammaln(x+1) - special.gammaln(r)
249 | 
250 |     ### Gibbs
251 | 
252 |     def resample(self,data=[]):
253 |         self.p = np.random.beta(*self._posterior_hypparams(*self._get_statistics(data)))
254 |         # set mean field params to something reasonable for initialization
255 |         fakedata = self.rvs(10)
256 |         self.alpha_mf, self.beta_mf = self._posterior_hypparams(*self._get_statistics(fakedata))
257 | 
258 |     ### Max likelihood
259 | 
260 |     def max_likelihood(self,data,weights=None):
261 |         if weights is None:
262 |             n, tot = self._get_statistics(data)
263 |         else:
264 |             n, tot = self._get_weighted_statistics(data,weights)
265 | 
266 |         self.p = (tot/n) / (self.r + tot/n)
267 |         return self
268 | 
269 |     ### Statistics and posterior hypparams
270 | 
271 |     def _get_statistics(self,data):
272 |         if getdatasize(data) == 0:
273 |             n, tot = 0, 0
274 |         elif isinstance(data,np.ndarray):
275 |             assert np.all(data >= 0)
276 |             data = np.atleast_1d(data)
277 |             n, tot = data.shape[0], data.sum()
278 |         elif isinstance(data,list):
279 |             assert all(np.all(d >= 0) for d in data)
280 |             n = sum(d.shape[0] for d in data)
281 |             tot = sum(d.sum() for d in data)
282 |         else:
283 |             assert np.isscalar(data)
284 |             n = 1
285 |             tot = data
286 | 
287 |         return np.array([n, tot])
288 | 
289 |     def _get_weighted_statistics(self,data,weights):
290 |         if isinstance(weights,np.ndarray):
291 |             assert np.all(data >= 0) and data.ndim == 1
292 |             n, tot = weights.sum(), weights.dot(data)
293 |         else:
294 |             assert all(np.all(d >= 0) for d in data)
295 |             n = sum(w.sum() for w in weights)
296 |             tot = sum(w.dot(d) for d,w in zip(data,weights))
297 | 
298 |         return np.array([n, tot])
299 | 
300 |     def _posterior_hypparams(self,n,tot):
301 |         return np.array([self.alpha_0 + tot, self.beta_0 + n*self.r])
302 | 
303 | class NegativeBinomialIntegerR2(_NegativeBinomialBase,MeanField,MeanFieldSVI,GibbsSampling):
304 |     # NOTE: this class should replace NegativeBinomialFixedR completely...
305 |     _fixedr_class = NegativeBinomialFixedR
306 | 
307 |     def __init__(self,alpha_0=None,beta_0=None,alphas_0=None,betas_0=None,
308 |             r_support=None,r_probs=None,r_discrete_distn=None,
309 |             r=None,ps=None):
310 | 
311 |         assert (r_discrete_distn is not None) ^ (r_support is not None and r_probs is not None)
312 |         if r_discrete_distn is not None:
313 |             r_support, = np.where(r_discrete_distn)
314 |             r_probs = r_discrete_distn[r_support]
315 |             r_support += 1
316 |         self.r_support = np.asarray(r_support)
317 |         self.rho_0 = self.rho_mf = np.log(r_probs)
318 | 
319 |         assert (alpha_0 is not None and  beta_0 is not None) \
320 |                 ^ (alphas_0 is not None and betas_0 is not None)
321 |         alphas_0 = alphas_0 if alphas_0 is not None else [alpha_0]*len(r_support)
322 |         betas_0 = betas_0 if betas_0 is not None else [beta_0]*len(r_support)
323 |         ps = ps if ps is not None else [None]*len(r_support)
324 |         self._fixedr_distns = \
325 |             [self._fixedr_class(r=r,p=p,alpha_0=alpha_0,beta_0=beta_0)
326 |                     for r,p,alpha_0,beta_0 in zip(r_support,ps,alphas_0,betas_0)]
327 | 
328 |         # for init
329 |         self.ridx = sample_discrete(r_probs)
330 |         self.r = r_support[self.ridx]
331 | 
332 |     def __repr__(self):
333 |         return 'NB(r=%d,p=%0.3f)' % (self.r,self.p)
334 | 
335 |     @property
336 |     def alphas_0(self):
337 |         return np.array([d.alpha_0 for d in self._fixedr_distns]) \
338 |                 if len(self._fixedr_distns) > 0 else None
339 | 
340 |     @property
341 |     def betas_0(self):
342 |         return np.array([d.beta_0 for d in self._fixedr_distns]) \
343 |                 if len(self._fixedr_distns) > 0 else None
344 | 
345 |     @property
346 |     def p(self):
347 |         return self._fixedr_distns[self.ridx].p
348 | 
349 |     @p.setter
350 |     def p(self,val):
351 |         self._fixedr_distns[self.ridx].p = val
352 | 
353 |     def _resample_from_mf(self):
354 |         self._resample_r_from_mf()
355 |         self._resample_p_from_mf()
356 | 
357 |     def _resample_r_from_mf(self):
358 |         lognorm = logsumexp(self.rho_mf)
359 |         self.ridx = sample_discrete(np.exp(self.rho_mf - lognorm))
360 |         self.r = self.r_support[self.ridx]
361 | 
362 |     def _resample_p_from_mf(self):
363 |         d = self._fixedr_distns[self.ridx]
364 |         self.p = np.random.beta(d.alpha_mf,d.beta_mf)
365 | 
366 |     def get_vlb(self):
367 |         return self._r_vlb() + sum(np.exp(rho)*d.get_vlb()
368 |                 for rho,d in zip(self.rho_mf,self._fixedr_distns))
369 | 
370 |     def _r_vlb(self):
371 |         return np.exp(self.rho_mf).dot(self.rho_0) \
372 |                 - np.exp(self.rho_mf).dot(self.rho_mf)
373 | 
374 |     def meanfieldupdate(self,data,weights):
375 |         for d in self._fixedr_distns:
376 |             d.meanfieldupdate(data,weights)
377 |         self._update_rho_mf(data,weights)
378 |         # everything below here is for plotting
379 |         ridx = self.rho_mf.argmax()
380 |         d = self._fixedr_distns[ridx]
381 |         self.r = d.r
382 |         self.p = d.alpha_mf / (d.alpha_mf + d.beta_mf)
383 | 
384 |     def _update_rho_mf(self,data,weights):
385 |         self.rho_mf = self.rho_0.copy()
386 |         for idx, d in enumerate(self._fixedr_distns):
387 |             n, tot = d._get_weighted_statistics(data,weights)
388 |             Elnp, Eln1mp = d._mf_expected_statistics()
389 |             self.rho_mf[idx] += (d.alpha_0-1+tot)*Elnp + (d.beta_0-1+n*d.r)*Eln1mp
390 |             if isinstance(data,np.ndarray):
391 |                 self.rho_mf[idx] += weights.dot(d._log_base_measure(data,d.r))
392 |             else:
393 |                 self.rho_mf[idx] += sum(w.dot(d._log_base_measure(dt,d.r))
394 |                         for dt,w in zip(data,weights))
395 | 
396 |     def expected_log_likelihood(self,x):
397 |         lognorm = logsumexp(self.rho_mf)
398 |         return sum(np.exp(rho-lognorm)*d.expected_log_likelihood(x)
399 |                 for rho,d in zip(self.rho_mf,self._fixedr_distns))
400 | 
401 |     def meanfield_sgdstep(self,data,weights,prob,stepsize):
402 |         rho_mf_orig = self.rho_mf.copy()
403 |         if isinstance(data,np.ndarray):
404 |             self._update_rho_mf(data,prob*weights)
405 |         else:
406 |             self._update_rho_mf(data,[w*prob for w in weights])
407 |         rho_mf_new = self.rho_mf
408 | 
409 |         for d in self._fixedr_distns:
410 |             d.meanfield_sgdstep(data,weights,prob,stepsize)
411 | 
412 |         self.rho_mf = (1-stepsize)*rho_mf_orig + stepsize*rho_mf_new
413 | 
414 |         # for plotting
415 |         ridx = self.rho_mf.argmax()
416 |         d = self._fixedr_distns[ridx]
417 |         self.r = d.r
418 |         self.p = d.alpha_mf / (d.alpha_mf + d.beta_mf)
419 | 
420 |     def resample(self,data=[]):
421 |         self._resample_r(data) # marginalizes out p values
422 |         self._resample_p(data) # resample p given sampled r
423 |         return self
424 | 
425 |     def _resample_r(self,data):
426 |         self.ridx = sample_discrete(
427 |                 self._posterior_hypparams(self._get_statistics(data)))
428 |         self.r = self.r_support[self.ridx]
429 |         return self
430 | 
431 |     def _resample_p(self,data):
432 |         self._fixedr_distns[self.ridx].resample(data)
433 |         return self
434 | 
435 |     def _get_statistics(self,data=[]):
436 |         n, tot = self._fixedr_distns[0]._get_statistics(data)
437 |         if n > 0:
438 |             data = flattendata(data)
439 |             alphas_n, betas_n = self.alphas_0 + tot, self.betas_0 + self.r_support*n
440 |             log_marg_likelihoods = \
441 |                     special.betaln(alphas_n, betas_n) \
442 |                         - special.betaln(self.alphas_0, self.betas_0) \
443 |                     + (special.gammaln(data[:,na]+self.r_support)
444 |                         - special.gammaln(data[:,na]+1) \
445 |                         - special.gammaln(self.r_support)).sum(0)
446 |         else:
447 |             log_marg_likelihoods = np.zeros_like(self.r_support)
448 |         return log_marg_likelihoods
449 | 
450 |     def _posterior_hypparams(self,log_marg_likelihoods):
451 |         log_posterior_discrete = self.rho_0 + log_marg_likelihoods
452 |         return np.exp(log_posterior_discrete - log_posterior_discrete.max())
453 | 
454 | class NegativeBinomialIntegerR(NegativeBinomialFixedR, GibbsSampling, MaxLikelihood):
455 |     '''
456 |     Nonconjugate Discrete+Beta prior
457 |     r_discrete_distribution is an array where index i is p(r=i+1)
458 |     '''
459 |     def __init__(self,r_discrete_distn=None,r_support=None,
460 |             alpha_0=None,beta_0=None,r=None,p=None):
461 |         self.r_support = r_support
462 |         self.r_discrete_distn = r_discrete_distn
463 |         self.alpha_0 = alpha_0
464 |         self.beta_0 = beta_0
465 |         self.r = r
466 |         self.p = p
467 | 
468 |         if r is p is None \
469 |                 and not any(_ is None for _ in (r_discrete_distn,alpha_0,beta_0)):
470 |             self.resample() # intialize from prior
471 | 
472 |     @property
473 |     def hypparams(self):
474 |         return dict(r_discrete_distn=self.r_discrete_distn,
475 |                 alpha_0=self.alpha_0,beta_0=self.beta_0)
476 | 
477 |     def get_r_discrete_distn(self):
478 |         return self._r_discrete_distn
479 | 
480 |     def set_r_discrete_distn(self,r_discrete_distn):
481 |         if r_discrete_distn is not None:
482 |             r_discrete_distn = np.asarray(r_discrete_distn,dtype=np.float)
483 |             r_support, = np.where(r_discrete_distn)
484 |             r_probs = r_discrete_distn[r_support]
485 |             r_probs /= r_probs.sum()
486 |             r_support += 1 # r_probs[0] corresponds to r=1
487 | 
488 |             self.r_support = r_support
489 |             self.r_probs = r_probs
490 |             self._r_discrete_distn = r_discrete_distn
491 | 
492 |     r_discrete_distn = property(get_r_discrete_distn,set_r_discrete_distn)
493 | 
494 |     def rvs(self,size=None):
495 |         out = np.random.geometric(1-self.p,size=size)-1
496 |         for i in range(self.r-1):
497 |             out += np.random.geometric(1-self.p,size=size)-1
498 |         return out
499 | 
500 |     def resample(self,data=[]):
501 |         alpha_n, betas_n, posterior_discrete = self._posterior_hypparams(
502 |                 *self._get_statistics(data))
503 | 
504 |         r_idx = sample_discrete(posterior_discrete)
505 |         self.r = self.r_support[r_idx]
506 |         self.p = np.random.beta(alpha_n, betas_n[r_idx])
507 | 
508 |     # NOTE: this class has a conjugate prior even though it's not in the
509 |     # exponential family, so I wrote _get_statistics and _get_weighted_statistics
510 |     # (which integrate out p) for the resample() and meanfield_update() methods,
511 |     # though these aren't statistics in the exponential family sense
512 | 
513 |     def _get_statistics(self,data):
514 |         # NOTE: since this isn't really in exponential family, this method needs
515 |         # to look at hyperparameters. form posterior hyperparameters for the p
516 |         # parameters here so we can integrate them out and get the r statistics
517 |         n, tot = super(NegativeBinomialIntegerR,self)._get_statistics(data)
518 |         if n > 0:
519 |             alpha_n, betas_n = self.alpha_0 + tot, self.beta_0 + self.r_support*n
520 |             data = flattendata(data)
521 |             log_marg_likelihoods = \
522 |                     special.betaln(alpha_n, betas_n) \
523 |                         - special.betaln(self.alpha_0, self.beta_0) \
524 |                     + (special.gammaln(data[:,na]+self.r_support)
525 |                         - special.gammaln(data[:,na]+1) \
526 |                         - special.gammaln(self.r_support)).sum(0)
527 |         else:
528 |             log_marg_likelihoods = np.zeros_like(self.r_support)
529 | 
530 |         return n, tot, log_marg_likelihoods
531 | 
532 |     def _get_weighted_statistics(self,data,weights):
533 |         n, tot = super(NegativeBinomialIntegerR,self)._get_weighted_statistics(data,weights)
534 |         if n > 0:
535 |             alpha_n, betas_n = self.alpha_0 + tot, self.beta_0 + self.r_support*n
536 |             data, weights = flattendata(data), flattendata(weights)
537 |             log_marg_likelihoods = \
538 |                     special.betaln(alpha_n, betas_n) \
539 |                         - special.betaln(self.alpha_0, self.beta_0) \
540 |                     + (special.gammaln(data[:,na]+self.r_support)
541 |                         - special.gammaln(data[:,na]+1) \
542 |                         - special.gammaln(self.r_support)).dot(weights)
543 |         else:
544 |             log_marg_likelihoods = np.zeros_like(self.r_support)
545 | 
546 |         return n, tot, log_marg_likelihoods
547 | 
548 |     def _posterior_hypparams(self,n,tot,log_marg_likelihoods):
549 |         alpha_n = self.alpha_0 + tot
550 |         betas_n = self.beta_0 + n*self.r_support
551 |         log_posterior_discrete = np.log(self.r_probs) + log_marg_likelihoods
552 |         posterior_discrete = np.exp(log_posterior_discrete - log_posterior_discrete.max())
553 |         return alpha_n, betas_n, posterior_discrete
554 | 
555 |     def max_likelihood(self,data,weights=None,stats=None):
556 |         if stats is not None:
557 |             n, tot = stats
558 |         elif weights is None:
559 |             n, tot = super(NegativeBinomialIntegerR,self)._get_statistics(data)
560 |         else:
561 |             n, tot = super(NegativeBinomialIntegerR,self)._get_weighted_statistics(data,weights)
562 | 
563 |         if n > 1:
564 |             rs = self.r_support
565 |             ps = self._max_likelihood_ps(n,tot,rs)
566 | 
567 |             # TODO TODO this isn't right for weighted data: do weighted sums
568 |             if isinstance(data,np.ndarray):
569 |                 likelihoods = np.array([self.log_likelihood(data,r=r,p=p).sum()
570 |                                             for r,p in zip(rs,ps)])
571 |             else:
572 |                 likelihoods = np.array([sum(self.log_likelihood(d,r=r,p=p).sum()
573 |                                             for d in data) for r,p in zip(rs,ps)])
574 | 
575 |             argmax = likelihoods.argmax()
576 |             self.r = self.r_support[argmax]
577 |             self.p = ps[argmax]
578 |         return self
579 | 
580 |     def _log_base_measure(self,data):
581 |         return [(special.gammaln(r+data) - special.gammaln(r) - special.gammaln(data+1)).sum()
582 |                 for r in self.r_support]
583 | 
584 |     def _max_likelihood_ps(self,n,tot,rs):
585 |         ps = (tot/n) / (rs + tot/n)
586 |         assert (ps >= 0).all()
587 |         return ps
588 | 
589 | class _StartAtRMixin(object):
590 |     def log_likelihood(self,x,**kwargs):
591 |         r = kwargs['r'] if 'r' in kwargs else self.r
592 |         return super(_StartAtRMixin,self).log_likelihood(x-r,**kwargs)
593 | 
594 |     def log_sf(self,x,**kwargs):
595 |         return super(_StartAtRMixin,self).log_sf(x-self.r,**kwargs)
596 | 
597 |     def expected_log_likelihood(self,x,**kwargs):
598 |         r = kwargs['r'] if 'r' in kwargs else self.r
599 |         return super(_StartAtRMixin,self).expected_log_likelihood(x-r,**kwargs)
600 | 
601 |     def rvs(self,size=[]):
602 |         return super(_StartAtRMixin,self).rvs(size)+self.r
603 | 
604 | class NegativeBinomialFixedRVariant(_StartAtRMixin,NegativeBinomialFixedR):
605 |     def _get_statistics(self,data):
606 |         n, tot = super(NegativeBinomialFixedRVariant,self)._get_statistics(data)
607 |         n, tot = n, tot-n*self.r
608 |         assert tot >= 0
609 |         return np.array([n, tot])
610 | 
611 |     def _get_weighted_statistics(self,data,weights):
612 |         n, tot = super(NegativeBinomialFixedRVariant,self)._get_weighted_statistics(data,weights)
613 |         n, tot = n, tot-n*self.r
614 |         assert tot >= 0
615 |         return np.array([n, tot])
616 | 
617 | class NegativeBinomialIntegerRVariant(NegativeBinomialIntegerR):
618 |     def resample(self,data=[]):
619 |         n, alpha_n, posterior_discrete, r_support = self._posterior_hypparams(
620 |                 *self._get_statistics(data)) # NOTE: pass out r_support b/c feasible subset
621 |         self.r = r_support[sample_discrete(posterior_discrete)]
622 |         self.p = np.random.beta(alpha_n - n*self.r, self.beta_0 + n*self.r)
623 | 
624 |     def _get_statistics(self,data):
625 |         n = getdatasize(data)
626 |         if n > 0:
627 |             data = flattendata(data)
628 |             feasible = self.r_support <= data.min()
629 |             assert np.any(feasible)
630 |             r_support = self.r_support[feasible]
631 |             normalizers = (special.gammaln(data[:,na]) - special.gammaln(data[:,na]-r_support+1)
632 |                     - special.gammaln(r_support)).sum(0)
633 |             return n, data.sum(), normalizers, feasible
634 |         else:
635 |             return n, None, None, None
636 | 
637 |     def _posterior_hypparams(self,n,tot,normalizers,feasible):
638 |         if n == 0:
639 |             return n, self.alpha_0, self.r_probs, self.r_support
640 |         else:
641 |             r_probs = self.r_probs[feasible]
642 |             r_support = self.r_support[feasible]
643 |             log_marg_likelihoods = special.betaln(self.alpha_0 + tot - n*r_support,
644 |                                                         self.beta_0 + r_support*n) \
645 |                                     - special.betaln(self.alpha_0, self.beta_0) \
646 |                                     + normalizers
647 |             log_marg_probs = np.log(r_probs) + log_marg_likelihoods
648 |             log_marg_probs -= log_marg_probs.max()
649 |             marg_probs = np.exp(log_marg_probs)
650 | 
651 |             return n, self.alpha_0 + tot, marg_probs, r_support
652 | 
653 |     def _max_likelihood_ps(self,n,tot,rs):
654 |         ps = 1-(rs*n)/tot
655 |         assert (ps >= 0).all()
656 |         return ps
657 | 
658 |     def rvs(self,size=[]):
659 |         return super(NegativeBinomialIntegerRVariant,self).rvs(size) + self.r
660 | 
661 | class NegativeBinomialIntegerR2Variant(NegativeBinomialIntegerR2):
662 |     _fixedr_class = NegativeBinomialFixedRVariant
663 | 
664 |     def _update_rho_mf(self,data,weights):
665 |         self.rho_mf = self.rho_0.copy()
666 |         for idx, d in enumerate(self._fixedr_distns):
667 |             n, tot = d._get_weighted_statistics(data,weights)
668 |             Elnp, Eln1mp = d._mf_expected_statistics()
669 |             self.rho_mf[idx] += (d.alpha_0-1+tot)*Elnp + (d.beta_0-1+n*d.r)*Eln1mp
670 |             self.rho_mf_temp = self.rho_mf.copy()
671 | 
672 |             # NOTE: this method only needs to override parent in the base measure
673 |             # part, i.e. data -> data-r
674 |             if isinstance(data,np.ndarray):
675 |                 self.rho_mf[idx] += weights.dot(d._log_base_measure(data-d.r,d.r))
676 |             else:
677 |                 self.rho_mf[idx] += sum(w.dot(d._log_base_measure(dt-d.r,d.r))
678 |                         for dt,w in zip(data,weights))
679 | 


--------------------------------------------------------------------------------
/pybasicbayes/distributions/poisson.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from builtins import zip
  3 | __all__ = ['Poisson']
  4 | import numpy as np
  5 | import scipy.stats as stats
  6 | import scipy.special as special
  7 | 
  8 | from pybasicbayes.abstractions import GibbsSampling, Collapsed, \
  9 |     MaxLikelihood, MeanField, MeanFieldSVI
 10 | 
 11 | 
 12 | class Poisson(GibbsSampling, Collapsed, MaxLikelihood, MeanField, MeanFieldSVI):
 13 |     '''
 14 |     Poisson distribution with a conjugate Gamma prior.
 15 | 
 16 |     NOTE: the support is {0,1,2,...}
 17 | 
 18 |     Hyperparameters (following Wikipedia's notation):
 19 |         alpha_0, beta_0
 20 | 
 21 |     Parameter is the mean/variance parameter:
 22 |         lmbda
 23 |     '''
 24 |     def __init__(self,lmbda=None,alpha_0=None,beta_0=None,mf_alpha_0=None,mf_beta_0=None):
 25 |         self.lmbda = lmbda
 26 | 
 27 |         self.alpha_0 = alpha_0
 28 |         self.beta_0 = beta_0
 29 |         self.mf_alpha_0 = mf_alpha_0 if mf_alpha_0 is not None else alpha_0
 30 |         self.mf_beta_0 = mf_beta_0 if mf_beta_0 is not None else beta_0
 31 | 
 32 |         if lmbda is None and not any(_ is None for _ in (alpha_0,beta_0)):
 33 |             self.resample() # intialize from prior
 34 | 
 35 |     @property
 36 |     def params(self):
 37 |         return dict(lmbda=self.lmbda)
 38 | 
 39 |     @property
 40 |     def hypparams(self):
 41 |         return dict(alpha_0=self.alpha_0,beta_0=self.beta_0)
 42 | 
 43 |     def log_sf(self,x):
 44 |         return stats.poisson.logsf(x,self.lmbda)
 45 | 
 46 |     def _posterior_hypparams(self,n,tot):
 47 |         return self.alpha_0 + tot, self.beta_0 + n
 48 | 
 49 |     def rvs(self,size=None):
 50 |         return np.random.poisson(self.lmbda,size=size)
 51 | 
 52 |     def log_likelihood(self,x):
 53 |         lmbda = self.lmbda
 54 |         x = np.array(x,ndmin=1)
 55 |         raw = np.empty(x.shape)
 56 |         raw[x>=0] = -lmbda + x[x>=0]*np.log(lmbda) - special.gammaln(x[x>=0]+1)
 57 |         raw[x<0] = -np.inf
 58 |         return raw if isinstance(x,np.ndarray) else raw[0]
 59 | 
 60 |     def _get_statistics(self,data):
 61 |         if isinstance(data,np.ndarray):
 62 |             n = data.shape[0]
 63 |             tot = data.sum()
 64 |         elif isinstance(data,list):
 65 |             n = sum(d.shape[0] for d in data)
 66 |             tot = sum(d.sum() for d in data)
 67 |         else:
 68 |             assert np.isscalar(data)
 69 |             n = 1
 70 |             tot = data
 71 | 
 72 |         return n, tot
 73 | 
 74 |     def _get_weighted_statistics(self,data,weights):
 75 |         if isinstance(data,np.ndarray):
 76 |             n = weights.sum()
 77 |             tot = weights.dot(data)
 78 |         elif isinstance(data,list):
 79 |             n = sum(w.sum() for w in weights)
 80 |             tot = sum(w.dot(d) for w,d in zip(weights,data))
 81 |         else:
 82 |             assert np.isscalar(data) and np.isscalar(weights)
 83 |             n = weights
 84 |             tot = weights*data
 85 | 
 86 |         return np.array([n, tot])
 87 | 
 88 |     ### Gibbs Sampling
 89 | 
 90 |     def resample(self,data=[],stats=None):
 91 |         stats = self._get_statistics(data) if stats is None else stats
 92 |         alpha_n, beta_n = self._posterior_hypparams(*stats)
 93 |         self.lmbda = np.random.gamma(alpha_n,1/beta_n)
 94 | 
 95 |         # next line is for mean field initialization
 96 |         self.mf_alpha_0, self.mf_beta_0 = self.lmbda * self.beta_0, self.beta_0
 97 | 
 98 |         return self
 99 | 
100 |     ### Mean Field
101 | 
102 |     def _resample_from_mf(self):
103 |         mf_alpha_0, mf_beta_0 = self._natural_to_standard(self.mf_natural_hypparam)
104 |         self.lmbda = np.random.gamma(mf_alpha_0, 1./mf_beta_0)
105 | 
106 |     def meanfieldupdate(self,data,weights):
107 |         self.mf_natural_hypparam = \
108 |                 self.natural_hypparam + self._get_weighted_statistics(data,weights)
109 |         self.lmbda = self.mf_alpha_0 / self.mf_beta_0
110 | 
111 |     def meanfield_sgdstep(self,data,weights,prob,stepsize):
112 |         self.mf_natural_hypparam = \
113 |                 (1-stepsize) * self.mf_natural_hypparam + stepsize * (
114 |                         self.natural_hypparam
115 |                         + 1./prob * self._get_weighted_statistics(data,weights))
116 | 
117 |     def get_vlb(self):
118 |         return (self.natural_hypparam - self.mf_natural_hypparam).dot(self._mf_expected_statistics) \
119 |                 - (self._log_partition_fn(self.alpha_0,self.beta_0)
120 |                         - self._log_partition_fn(self.mf_alpha_0,self.mf_beta_0))
121 | 
122 |     def expected_log_likelihood(self,x):
123 |         Emlmbda, Elnlmbda = self._mf_expected_statistics
124 |         return -special.gammaln(x+1) + Elnlmbda * x + Emlmbda
125 | 
126 |     @property
127 |     def _mf_expected_statistics(self):
128 |         alpha, beta = self.mf_alpha_0, self.mf_beta_0
129 |         return np.array([-alpha/beta, special.digamma(alpha) - np.log(beta)])
130 | 
131 | 
132 |     @property
133 |     def natural_hypparam(self):
134 |         return self._standard_to_natural(self.alpha_0,self.beta_0)
135 | 
136 |     @property
137 |     def mf_natural_hypparam(self):
138 |         return self._standard_to_natural(self.mf_alpha_0,self.mf_beta_0)
139 | 
140 |     @mf_natural_hypparam.setter
141 |     def mf_natural_hypparam(self,natparam):
142 |         self.mf_alpha_0, self.mf_beta_0 = self._natural_to_standard(natparam)
143 | 
144 | 
145 |     def _standard_to_natural(self,alpha,beta):
146 |         return np.array([beta, alpha-1])
147 | 
148 |     def _natural_to_standard(self,natparam):
149 |         return natparam[1]+1, natparam[0]
150 | 
151 |     ### Collapsed
152 | 
153 |     def log_marginal_likelihood(self,data):
154 |         return self._log_partition_fn(*self._posterior_hypparams(*self._get_statistics(data))) \
155 |                 - self._log_partition_fn(self.alpha_0,self.beta_0) \
156 |                 - self._get_sum_of_gammas(data)
157 | 
158 |     def _log_partition_fn(self,alpha,beta):
159 |         return special.gammaln(alpha) - alpha * np.log(beta)
160 | 
161 |     def _get_sum_of_gammas(self,data):
162 |         if isinstance(data,np.ndarray):
163 |             return special.gammaln(data+1).sum()
164 |         elif isinstance(data,list):
165 |             return sum(special.gammaln(d+1).sum() for d in data)
166 |         else:
167 |             assert isinstance(data,int)
168 |             return special.gammaln(data+1)
169 | 
170 |     ### Max likelihood
171 | 
172 |     def max_likelihood(self,data,weights=None):
173 |         if weights is None:
174 |             n, tot = self._get_statistics(data)
175 |         else:
176 |             n, tot = self._get_weighted_statistics(data,weights)
177 | 
178 |         if n > 1e-2:
179 |             self.lmbda = tot/n
180 |             assert self.lmbda > 0
181 |         else:
182 |             self.broken = True
183 |             self.lmbda = 999999
184 | 
185 |         return self
186 | 
187 | 


--------------------------------------------------------------------------------
/pybasicbayes/distributions/uniform.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from builtins import map
  3 | from builtins import range
  4 | __all__ = ['UniformOneSided', 'Uniform']
  5 | 
  6 | import numpy as np
  7 | 
  8 | from pybasicbayes.abstractions import GibbsSampling
  9 | from pybasicbayes.util.stats import sample_pareto
 10 | from pybasicbayes.util.general import any_none
 11 | 
 12 | 
 13 | class UniformOneSided(GibbsSampling):
 14 |     '''
 15 |     Models a uniform distribution over [low,high] for a parameter high.
 16 |     Low is a fixed hyperparameter (hence "OneSided"). See the Uniform class for
 17 |     the two-sided version.
 18 | 
 19 |     Likelihood is x ~ U[low,high]
 20 |     Prior is high ~ Pareto(x_m,alpha) following Wikipedia's notation
 21 | 
 22 |     Hyperparameters:
 23 |         x_m, alpha, low
 24 | 
 25 |     Parameters:
 26 |         high
 27 |     '''
 28 |     def __init__(self,high=None,x_m=None,alpha=None,low=0.):
 29 |         self.high = high
 30 | 
 31 |         self.x_m = x_m
 32 |         self.alpha = alpha
 33 |         self.low = low
 34 | 
 35 |         have_hypers = x_m is not None and alpha is not None
 36 |         if high is None and have_hypers:
 37 |             self.resample()  # intialize from prior
 38 | 
 39 |     @property
 40 |     def params(self):
 41 |         return {'high':self.high}
 42 | 
 43 |     @property
 44 |     def hypparams(self):
 45 |         return dict(x_m=self.x_m,alpha=self.alpha,low=self.low)
 46 | 
 47 |     def log_likelihood(self,x):
 48 |         x = np.atleast_1d(x)
 49 |         raw = np.where(
 50 |             (self.low <= x) & (x < self.high),
 51 |             -np.log(self.high - self.low),-np.inf)
 52 |         return raw if isinstance(x,np.ndarray) else raw[0]
 53 | 
 54 |     def rvs(self,size=[]):
 55 |         return np.random.uniform(low=self.low,high=self.high,size=size)
 56 | 
 57 |     def resample(self,data=[]):
 58 |         self.high = sample_pareto(
 59 |             *self._posterior_hypparams(*self._get_statistics(data)))
 60 |         return self
 61 | 
 62 |     def _get_statistics(self,data):
 63 |         if isinstance(data,np.ndarray):
 64 |             n = data.shape[0]
 65 |             datamax = data.max()
 66 |         else:
 67 |             n = sum(d.shape[0] for d in data)
 68 |             datamax = \
 69 |                 max(d.max() for d in data) if n > 0 else -np.inf
 70 |         return n, datamax
 71 | 
 72 |     def _posterior_hypparams(self,n,datamax):
 73 |         return max(datamax,self.x_m), n + self.alpha
 74 | 
 75 | 
 76 | class Uniform(UniformOneSided):
 77 |     '''
 78 |     Models a uniform distribution over [low,high] for parameters low and high.
 79 |     The prior is non-conjugate (though it's conditionally conjugate over one
 80 |     parameter at a time).
 81 | 
 82 |     Likelihood is x ~ U[low,high]
 83 |     Prior is -low ~ Pareto(x_m_low,alpha_low)-2*x_m_low
 84 |              high ~ Pareto(x_m_high,alpha_high)
 85 | 
 86 |     Hyperparameters:
 87 |         x_m_low, alpha_low
 88 |         x_m_high, alpha_high
 89 | 
 90 |     Parameters:
 91 |         low, high
 92 |     '''
 93 |     def __init__(
 94 |             self,low=None,high=None,
 95 |             x_m_low=None,alpha_low=None,x_m_high=None,alpha_high=None):
 96 |         self.low = low
 97 |         self.high = high
 98 | 
 99 |         self.x_m_low = x_m_low
100 |         self.alpha_low = alpha_low
101 |         self.x_m_high = x_m_high
102 |         self.alpha_high = alpha_high
103 | 
104 |         have_hypers = not any_none(x_m_low,alpha_low,x_m_high,alpha_high)
105 |         if low is high is None and have_hypers:
106 |             self.resample()  # initialize from prior
107 | 
108 |     @property
109 |     def params(self):
110 |         return dict(low=self.low,high=self.high)
111 | 
112 |     @property
113 |     def hypparams(self):
114 |         return dict(
115 |             x_m_low=self.x_m_low,alpha_low=self.alpha_low,
116 |             x_m_high=self.x_m_high,alpha_high=self.alpha_high)
117 | 
118 |     def resample(self,data=[],niter=5):
119 |         if len(data) == 0:
120 |             self.low = -sample_pareto(-self.x_m_low,self.alpha_low)
121 |             self.high = sample_pareto(self.x_m_high,self.alpha_high)
122 |         else:
123 |             for itr in range(niter):
124 |                 # resample high, fixing low
125 |                 self.x_m, self.alpha = self.x_m_high, self.alpha_high
126 |                 super(Uniform,self).resample(data)
127 |                 # tricky: flip data and resample 'high' again
128 |                 self.x_m, self.alpha = -self.x_m_low, self.alpha_low
129 |                 self.low, self.high = self.high, self.low
130 |                 super(Uniform,self).resample(self._flip_data(data))
131 |                 self.low, self.high = self.x_m_low - self.high, self.low
132 | 
133 |     def _flip_data(self,data):
134 |         if isinstance(data,np.ndarray):
135 |             return self.x_m_low - data
136 |         else:
137 |             return list(map(self._flip_data,data))
138 | 


--------------------------------------------------------------------------------
/pybasicbayes/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .mixture import Labels, CRPLabels, Mixture, MixtureDistribution, CollapsedMixture, CRPMixture
2 | from .factor_analysis import FactorAnalysis


--------------------------------------------------------------------------------
/pybasicbayes/models/factor_analysis.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Probabilistic factor analysis to perform dimensionality reduction on mouse images.
  3 | With the probabilistic approach, we can handle missing data in the images.
  4 | Technically this holds for missing at random data, but we can try it
  5 | out on images where we treat cable pixels as missing, even though they
  6 | won't be random. This should give us a model-based way to fill in pixels,
  7 | and hopefully a more robust way to estimate principle components for modeling.
  8 | """
  9 | import abc
 10 | import numpy as np
 11 | 
 12 | from pybasicbayes.abstractions import Model, \
 13 |     ModelGibbsSampling, ModelMeanField, ModelMeanFieldSVI, ModelEM
 14 | from pybasicbayes.util.stats import sample_gaussian
 15 | from pybasicbayes.util.general import objarray
 16 | 
 17 | from pybasicbayes.distributions import DiagonalRegression
 18 | 
 19 | from pybasicbayes.util.profiling import line_profiled
 20 | PROFILING = True
 21 | 
 22 | class FactorAnalysisStates(object):
 23 |     """
 24 |     Wrapper for the latent states of a factor analysis model
 25 |     """
 26 |     def __init__(self, model, data, mask=None, **kwargs):
 27 |         self.model = model
 28 |         self.X = data
 29 |         if mask is None:
 30 |             mask = np.ones_like(data, dtype=bool)
 31 |         self.mask = mask
 32 |         assert data.shape == mask.shape and mask.dtype == bool
 33 |         assert self.X.shape[1] == self.D_obs
 34 | 
 35 |         # Initialize latent states
 36 |         self.N = self.X.shape[0]
 37 |         self.Z = np.random.randn(self.N, self.D_latent)
 38 | 
 39 |     @property
 40 |     def D_obs(self):
 41 |         return self.model.D_obs
 42 | 
 43 |     @property
 44 |     def D_latent(self):
 45 |         return self.model.D_latent
 46 | 
 47 |     @property
 48 |     def W(self):
 49 |         return self.model.W
 50 | 
 51 |     @property
 52 |     def mean(self):
 53 |         return self.model.mean
 54 | 
 55 |     @property
 56 |     def sigmasq(self):
 57 |         return self.model.sigmasq
 58 | 
 59 |     @property
 60 |     def regression(self):
 61 |         return self.model.regression
 62 | 
 63 |     def log_likelihood(self):
 64 |         # mu = np.dot(self.Z, self.W.T)
 65 |         # return -0.5 * np.sum(((self.X - mu) * self.mask) ** 2 / self.sigmasq)
 66 | 
 67 |         # Compute the marginal likelihood, integrating out z
 68 |         mu_x = self.mean
 69 |         Sigma_x = self.W.dot(self.W.T) + np.diag(self.sigmasq)
 70 | 
 71 |         from scipy.stats import multivariate_normal
 72 |         if not np.all(self.mask):
 73 |             # Find the patterns of missing dta
 74 |             missing_patterns = np.unique(self.mask, axis=0)
 75 | 
 76 |             # Evaluate the likelihood for each missing pattern
 77 |             lls = np.zeros(self.N)
 78 |             for pat in missing_patterns:
 79 |                 inds = np.all(self.mask == pat, axis=1)
 80 |                 lls[inds] = \
 81 |                     multivariate_normal(mu_x[pat], Sigma_x[np.ix_(pat, pat)])\
 82 |                     .logpdf(self.X[np.ix_(inds, pat)])
 83 | 
 84 |         else:
 85 |             lls = multivariate_normal(mu_x, Sigma_x).logpdf(self.X)
 86 | 
 87 |         return lls
 88 | 
 89 |     ## Gibbs
 90 |     def resample(self):
 91 |         W, sigmasq = self.W, self.sigmasq
 92 |         J0 = np.eye(self.D_latent)
 93 |         h0 = np.zeros(self.D_latent)
 94 | 
 95 |         # Sample each latent embedding
 96 |         for n in range(self.N):
 97 |             Jobs = self.mask[n] / sigmasq
 98 |             Jpost = J0 + (W * Jobs[:, None]).T.dot(W)
 99 |             hpost = h0 + ((self.X[n] - self.mean) * Jobs).dot(W)
100 |             self.Z[n] = sample_gaussian(J=Jpost, h=hpost)
101 | 
102 |     ## Mean field
103 |     def E_step(self):
104 |         W = self.W
105 |         WWT = np.array([np.outer(wd,wd) for wd in W])
106 |         sigmasq_inv = 1./self.sigmasq
107 |         self._meanfieldupdate(W, WWT, sigmasq_inv)
108 | 
109 |         # Copy over the expected states to Z
110 |         self.Z = self.E_Z
111 | 
112 |     def meanfieldupdate(self):
113 |         E_W, E_WWT, E_sigmasq_inv, _ = self.regression.mf_expectations
114 |         self._meanfieldupdate(E_W, E_WWT, E_sigmasq_inv)
115 | 
116 |         # Copy over the expected states to Z
117 |         self.Z = self.E_Z
118 | 
119 |     def _meanfieldupdate(self, E_W, E_WWT, E_sigmasq_inv):
120 |         N, D_obs, D_lat = self.N, self.D_obs, self.D_latent
121 |         E_WWT_vec = E_WWT.reshape(D_obs, -1)
122 | 
123 |         J0 = np.eye(D_lat)
124 |         h0 = np.zeros(D_lat)
125 | 
126 |         # Get expectations for the latent embedding of these datapoints
127 |         self.E_Z = np.zeros((N, D_lat))
128 |         self.E_ZZT = np.zeros((N, D_lat, D_lat))
129 | 
130 |         for n in range(N):
131 |             Jobs = self.mask[n] * E_sigmasq_inv
132 |             # Faster than Jpost = J0 + np.sum(E_WWT * Jobs[:,None,None], axis=0)
133 |             Jpost = J0 + (np.dot(Jobs, E_WWT_vec)).reshape((D_lat, D_lat))
134 |             hpost = h0 + ((self.X[n] - self.mean) * Jobs).dot(E_W)
135 | 
136 |             # Get the expectations for this set of indices
137 |             Sigma_post = np.linalg.inv(Jpost)
138 |             self.E_Z[n] = Sigma_post.dot(hpost)
139 |             self.E_ZZT[n] = Sigma_post + np.outer(self.E_Z[n], self.E_Z[n])
140 | 
141 |         self._set_expected_stats()
142 | 
143 |     def _set_expected_stats(self):
144 |         D_lat = self.D_latent
145 |         Xc = self.X - self.mean
146 |         E_Xsq = np.sum(Xc**2 * self.mask, axis=0)
147 |         E_XZT = (Xc * self.mask).T.dot(self.E_Z)
148 |         E_ZZT_vec = self.E_ZZT.reshape((self.E_ZZT.shape[0], D_lat ** 2))
149 |         E_ZZT = np.array([np.dot(self.mask[:, d], E_ZZT_vec).reshape((D_lat, D_lat))
150 |                           for d in range(self.D_obs)])
151 |         n = np.sum(self.mask, axis=0)
152 | 
153 |         self.E_emission_stats = objarray([E_Xsq, E_XZT, E_ZZT, n])
154 | 
155 |     def resample_from_mf(self):
156 |         for n in range(self.N):
157 |             mu_n = self.E_Z[n]
158 |             Sigma_n = self.E_ZZT[n] - np.outer(mu_n, mu_n)
159 |             self.Z[n] = sample_gaussian(mu=mu_n, Sigma=Sigma_n)
160 | 
161 |     def expected_log_likelihood(self):
162 |         E_W, E_WWT, E_sigmasq_inv, E_log_sigmasq = self.regression.mf_expectations
163 |         E_Xsq, E_XZT, E_ZZT, n = self.E_emission_stats
164 | 
165 |         ll = -0.5 * np.log(2 * np.pi) - 0.5 * np.sum(E_log_sigmasq * self.mask)
166 |         ll += -0.5 * np.sum(E_Xsq * E_sigmasq_inv)
167 |         ll += -0.5 * np.sum(-2 * E_XZT * E_W * E_sigmasq_inv[:,None])
168 |         ll += -0.5 * np.sum(E_WWT * E_ZZT * E_sigmasq_inv[:,None,None])
169 |         return ll
170 | 
171 | 
172 | class _FactorAnalysisBase(Model):
173 |     __metaclass__ = abc.ABCMeta
174 |     _states_class = FactorAnalysisStates
175 | 
176 |     def __init__(self, D_obs, D_latent,
177 |                  W=None, sigmasq=None,
178 |                  sigmasq_W_0=1.0, mu_W_0=0.0,
179 |                  alpha_0=3.0, beta_0=2.0):
180 | 
181 |         self.D_obs, self.D_latent = D_obs, D_latent
182 | 
183 |         # The weights and variances are encapsulated in a DiagonalRegression class
184 |         self.regression = \
185 |             DiagonalRegression(
186 |                 self.D_obs, self.D_latent,
187 |                 mu_0=mu_W_0 * np.ones(self.D_latent),
188 |                 Sigma_0=sigmasq_W_0 * np.eye(self.D_latent),
189 |                 alpha_0=alpha_0, beta_0=beta_0,
190 |                 A=W, sigmasq=sigmasq)
191 | 
192 |         # Handle the mean separately since DiagonalRegression doesn't support affine :-/
193 |         self.mean = np.zeros(D_obs)
194 | 
195 |         self.data_list = []
196 | 
197 |     @property
198 |     def W(self):
199 |         return self.regression.A
200 | 
201 |     @property
202 |     def sigmasq(self):
203 |         return self.regression.sigmasq_flat
204 | 
205 |     def set_empirical_mean(self):
206 |         self.mean = np.zeros(self.D_obs)
207 |         for n in range(self.D_obs):
208 |             self.mean[n] = np.concatenate([d.X[d.mask[:,n] == 1, n] for d in self.data_list]).mean()
209 | 
210 |     def add_data(self, data, mask=None, **kwargs):
211 |         self.data_list.append(self._states_class(self, data, mask=mask, **kwargs))
212 |         return self.data_list[-1]
213 | 
214 |     def generate(self, keep=True, N=1, mask=None, **kwargs):
215 |         # Sample from the factor analysis model
216 |         W, sigmasq = self.W, self.sigmasq
217 |         Z = np.random.randn(N, self.D_latent)
218 |         X = self.mean + np.dot(Z, W.T) + np.sqrt(sigmasq) * np.random.randn(N, self.D_obs)
219 | 
220 |         data = self._states_class(self, X, mask=mask, **kwargs)
221 |         data.Z = Z
222 |         if keep:
223 |             self.data_list.append(data)
224 |         return data.X, data.Z
225 | 
226 |     def _log_likelihoods(self, x, mask=None, **kwargs):
227 |         self.add_data(x, mask=mask, **kwargs)
228 |         states = self.data_list.pop()
229 |         return states.log_likelihood()
230 | 
231 |     def log_likelihood(self):
232 |         return sum([d.log_likelihood().sum() for d in self.data_list])
233 | 
234 |     def log_probability(self):
235 |         lp = 0
236 | 
237 |         # Prior
238 |         # lp += (-self.alpha_0-1) * np.log(self.sigmasq) - self.beta_0 / self.sigmasq
239 |         lp += -0.5 * np.sum(self.W**2)
240 |         lp += -0.5 * np.sum(self.Z**2)
241 |         lp += self.log_likelihood()
242 |         return lp
243 | 
244 | 
245 | class _FactorAnalysisGibbs(_FactorAnalysisBase, ModelGibbsSampling):
246 |     __metaclass__ = abc.ABCMeta
247 | 
248 |     def resample_model(self):
249 |         for data in self.data_list:
250 |             data.resample()
251 | 
252 |         Zs = np.vstack([d.Z for d in self.data_list])
253 |         Xs = np.vstack([d.X for d in self.data_list])
254 |         mask = np.vstack([d.mask for d in self.data_list])
255 |         self.regression.resample((Zs, Xs), mask=mask)
256 | 
257 | 
258 | class _FactorAnalysisEM(_FactorAnalysisBase, ModelEM):
259 | 
260 |     def _null_stats(self):
261 |         return objarray(
262 |             [np.zeros(self.D_obs),
263 |              np.zeros((self.D_obs, self.D_latent)),
264 |              np.zeros((self.D_obs, self.D_latent, self.D_latent)),
265 |              np.zeros(self.D_obs)])
266 | 
267 |     def EM_step(self):
268 |         for data in self.data_list:
269 |             data.E_step()
270 | 
271 |         stats = self._null_stats() + sum([d.E_emission_stats for d in self.data_list])
272 |         self.regression.max_likelihood(data=None, weights=None, stats=stats)
273 |         assert np.all(np.isfinite(self.sigmasq ))
274 | 
275 | 
276 | class _FactorAnalysisMeanField(_FactorAnalysisBase, ModelMeanField, ModelMeanFieldSVI):
277 |     __metaclass__ = abc.ABCMeta
278 | 
279 |     def _null_stats(self):
280 |         return objarray(
281 |             [np.zeros(self.D_obs),
282 |              np.zeros((self.D_obs, self.D_latent)),
283 |              np.zeros((self.D_obs, self.D_latent, self.D_latent)),
284 |              np.zeros(self.D_obs)])
285 | 
286 |     def meanfield_coordinate_descent_step(self):
287 |         for data in self.data_list:
288 |             data.meanfieldupdate()
289 | 
290 |         stats = self._null_stats() + sum([d.E_emission_stats for d in self.data_list])
291 |         self.regression.meanfieldupdate(stats=stats)
292 | 
293 |     def meanfield_sgdstep(self, minibatch, prob, stepsize, masks=None):
294 |         assert stepsize > 0 and stepsize <= 1
295 | 
296 |         states_list = self._get_mb_states_list(minibatch, masks)
297 |         for s in states_list:
298 |             s.meanfieldupdate()
299 | 
300 |         # Compute the sufficient statistics of the latent parameters
301 |         self.regression.meanfield_sgdstep(
302 |             data=None, weights=None, prob=prob, stepsize=stepsize,
303 |             stats=(sum(s.E_emission_stats for s in states_list)))
304 | 
305 |         # Compute the expected log likelihood for this minibatch
306 |         return sum([s.expected_log_likelihood() for s in states_list])
307 | 
308 |     def _get_mb_states_list(self, minibatch, masks):
309 |         minibatch = minibatch if isinstance(minibatch, list) else [minibatch]
310 |         masks = [None] * len(minibatch) if masks is None else \
311 |             (masks if isinstance(masks, list) else [masks])
312 | 
313 |         def get_states(data, mask):
314 |             self.add_data(data, mask=mask)
315 |             return self.data_list.pop()
316 | 
317 |         return [get_states(data, mask) for data, mask in zip(minibatch, masks)]
318 | 
319 |     def resample_from_mf(self):
320 |         for data in self.data_list:
321 |             data.resample_from_mf()
322 |         self.regression.resample_from_mf()
323 | 
324 |     def expected_log_likelihood(self):
325 |         ell = 0
326 |         for data in self.data_list:
327 |             ell += data.expected_log_likelihood()
328 |         return ell
329 | 
330 |     def initialize_meanfield(self):
331 |         self.regression._initialize_mean_field()
332 | 
333 | 
334 | class FactorAnalysis(_FactorAnalysisGibbs, _FactorAnalysisEM, _FactorAnalysisMeanField):
335 |     pass
336 | 
337 | 


--------------------------------------------------------------------------------
/pybasicbayes/models/parallel_mixture.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import numpy as np
 3 | 
 4 | model = None
 5 | labels_list = None
 6 | 
 7 | def _get_sampled_labels(idx):
 8 |     model.add_data(model.labels_list[idx].data,initialize_from_prior=False)
 9 |     l = model.labels_list.pop()
10 |     return l.z, l._normalizer
11 | 
12 | def _get_sampled_component_params(idx):
13 |     model.components[idx].resample([l.data[l.z == idx] for l in labels_list])
14 |     return model.components[idx].parameters
15 | 
16 | 


--------------------------------------------------------------------------------
/pybasicbayes/testing/.gitignore:
--------------------------------------------------------------------------------
1 | /figures/*
2 | 


--------------------------------------------------------------------------------
/pybasicbayes/testing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattjj/pybasicbayes/61f65ad6c781288605ec5f7347efcc5dbd73c4fc/pybasicbayes/testing/__init__.py


--------------------------------------------------------------------------------
/pybasicbayes/testing/mixins.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from builtins import zip
  3 | from builtins import range
  4 | from builtins import object
  5 | import numpy as np
  6 | import abc, os
  7 | 
  8 | from nose.plugins.attrib import attr
  9 | 
 10 | import pybasicbayes
 11 | from pybasicbayes.util import testing
 12 | from future.utils import with_metaclass
 13 | 
 14 | class DistributionTester(with_metaclass(abc.ABCMeta, object)):
 15 |     @abc.abstractproperty
 16 |     def distribution_class(self):
 17 |         pass
 18 | 
 19 |     @abc.abstractproperty
 20 |     def hyperparameter_settings(self):
 21 |         pass
 22 | 
 23 | class BasicTester(DistributionTester):
 24 |     @property
 25 |     def basic_data_size(self):
 26 |         return 1000
 27 | 
 28 |     def loglike_lists_tests(self):
 29 |         for setting_idx, hypparam_dict in enumerate(self.hyperparameter_settings):
 30 |             yield self.check_loglike_lists, setting_idx, hypparam_dict
 31 | 
 32 |     def check_loglike_lists(self,setting_idx,hypparam_dict):
 33 |         dist = self.distribution_class(**hypparam_dict)
 34 |         data = dist.rvs(size=self.basic_data_size)
 35 | 
 36 |         l1 = dist.log_likelihood(data).sum()
 37 |         l2 = sum(dist.log_likelihood(d) for d in np.array_split(data,self.basic_data_size))
 38 | 
 39 |         assert np.isclose(l1,l2)
 40 | 
 41 |     def stats_lists_tests(self):
 42 |         for setting_idx, hypparam_dict in enumerate(self.hyperparameter_settings):
 43 |             yield self.check_stats_lists, setting_idx, hypparam_dict
 44 | 
 45 |     def check_stats_lists(self,setting_idx,hypparam_dict):
 46 |         dist = self.distribution_class(**hypparam_dict)
 47 |         data = dist.rvs(size=self.basic_data_size)
 48 | 
 49 |         if hasattr(dist,'_get_statistics'):
 50 |             s1 = dist._get_statistics(data)
 51 |             s2 = dist._get_statistics([d for d in np.array_split(data,self.basic_data_size)])
 52 | 
 53 |             self._check_stats(s1,s2)
 54 | 
 55 |     def _check_stats(self,s1,s2):
 56 |         if isinstance(s1,np.ndarray):
 57 |             if s1.dtype == np.object:
 58 |                 assert all(np.allclose(t1,t2) for t1, t2 in zip(s1,s2))
 59 |             else:
 60 |                 assert np.allclose(s1,s2)
 61 |         elif isinstance(s1,tuple):
 62 |             assert all(np.allclose(ss1,ss2) for ss1,ss2 in zip(s1,s2))
 63 | 
 64 |     def missing_data_tests(self):
 65 |         for setting_idx, hypparam_dict in enumerate(self.hyperparameter_settings):
 66 |             yield self.check_missing_data_stats, setting_idx, hypparam_dict
 67 | 
 68 |     def check_missing_data_stats(self,setting_idx,hypparam_dict):
 69 |         dist = self.distribution_class(**hypparam_dict)
 70 |         data = dist.rvs(size=self.basic_data_size)
 71 | 
 72 |         if isinstance(data,np.ndarray):
 73 |             data[np.random.randint(2,size=data.shape[0]) == 1] = np.nan
 74 | 
 75 |             s1 = dist._get_statistics(data)
 76 |             s2 = dist._get_statistics(data[~np.isnan(data).any(1)])
 77 | 
 78 |             self._check_stats(s1,s2)
 79 | 
 80 | class BigDataGibbsTester(with_metaclass(abc.ABCMeta, DistributionTester)):
 81 |     @abc.abstractmethod
 82 |     def params_close(self,distn1,distn2):
 83 |         pass
 84 | 
 85 |     @property
 86 |     def big_data_size(self):
 87 |         return 20000
 88 | 
 89 |     @property
 90 |     def big_data_repeats_per_setting(self):
 91 |         return 1
 92 | 
 93 |     @property
 94 |     def big_data_hyperparameter_settings(self):
 95 |         return self.hyperparameter_settings
 96 | 
 97 |     @attr('random')
 98 |     def big_data_Gibbs_tests(self):
 99 |         for setting_idx, hypparam_dict in enumerate(self.big_data_hyperparameter_settings):
100 |             for i in range(self.big_data_repeats_per_setting):
101 |                 yield self.check_big_data_Gibbs, setting_idx, hypparam_dict
102 | 
103 |     def check_big_data_Gibbs(self,setting_idx,hypparam_dict):
104 |         d1 = self.distribution_class(**hypparam_dict)
105 |         d2 = self.distribution_class(**hypparam_dict)
106 | 
107 |         data = d1.rvs(size=self.big_data_size)
108 |         d2.resample(data)
109 | 
110 |         assert self.params_close(d1,d2)
111 | 
112 | class MaxLikelihoodTester(with_metaclass(abc.ABCMeta, DistributionTester)):
113 |     @abc.abstractmethod
114 |     def params_close(self,distn1,distn2):
115 |         pass
116 | 
117 | 
118 |     @property
119 |     def big_data_size(self):
120 |         return 20000
121 | 
122 |     @property
123 |     def big_data_repeats_per_setting(self):
124 |         return 1
125 | 
126 |     @property
127 |     def big_data_hyperparameter_settings(self):
128 |         return self.hyperparameter_settings
129 | 
130 | 
131 |     def maxlike_tests(self):
132 |         for setting_idx, hypparam_dict in enumerate(self.big_data_hyperparameter_settings):
133 |             for i in range(self.big_data_repeats_per_setting):
134 |                 yield self.check_maxlike, setting_idx, hypparam_dict
135 | 
136 |     def check_maxlike(self,setting_idx,hypparam_dict):
137 |         d1 = self.distribution_class(**hypparam_dict)
138 |         d2 = self.distribution_class(**hypparam_dict)
139 | 
140 |         data = d1.rvs(size=self.big_data_size)
141 |         d2.max_likelihood(data)
142 | 
143 |         assert self.params_close(d1,d2)
144 | 
145 | class GewekeGibbsTester(with_metaclass(abc.ABCMeta, DistributionTester)):
146 |     @abc.abstractmethod
147 |     def geweke_statistics(self,distn,data):
148 |         pass
149 | 
150 | 
151 |     @property
152 |     def geweke_nsamples(self):
153 |         return 30000
154 | 
155 |     @property
156 |     def geweke_data_size(self):
157 |         return 1 # NOTE: more data usually means slower mixing
158 | 
159 |     @property
160 |     def geweke_ntrials(self):
161 |         return 3
162 | 
163 |     @property
164 |     def geweke_pval(self):
165 |         return 0.05
166 | 
167 |     @property
168 |     def geweke_hyperparameter_settings(self):
169 |         return self.hyperparameter_settings
170 | 
171 |     def geweke_numerical_slice(self,distn,setting_idx):
172 |         return slice(None)
173 | 
174 |     @property
175 |     def resample_kwargs(self):
176 |         return {}
177 | 
178 |     @property
179 |     def geweke_resample_kwargs(self):
180 |         return self.resample_kwargs
181 | 
182 |     @property
183 |     def geweke_num_statistic_fails_to_tolerate(self):
184 |         return 1
185 | 
186 | 
187 |     @attr('slow', 'random')
188 |     def geweke_tests(self):
189 |         for setting_idx, hypparam_dict in enumerate(self.geweke_hyperparameter_settings):
190 |             yield self.check_geweke, setting_idx, hypparam_dict
191 | 
192 |     def geweke_figure_filepath(self,setting_idx):
193 |         return os.path.join(os.path.dirname(__file__),'figures',
194 |                             self.__class__.__name__,'setting_%d.pdf' % setting_idx)
195 | 
196 |     def check_geweke(self,setting_idx,hypparam_dict):
197 |         import os
198 |         from matplotlib import pyplot as plt
199 |         plt.ioff()
200 |         fig = plt.figure()
201 |         figpath = self.geweke_figure_filepath(setting_idx)
202 |         mkdir(os.path.dirname(figpath))
203 | 
204 |         nsamples, data_size, ntrials = self.geweke_nsamples, \
205 |                 self.geweke_data_size, self.geweke_ntrials
206 | 
207 |         d = self.distribution_class(**hypparam_dict)
208 |         sample_dim = np.atleast_1d(self.geweke_statistics(d,d.rvs(size=10))).shape[0]
209 | 
210 |         num_statistic_fails = 0
211 |         for trial in range(ntrials):
212 |             # collect forward-generated statistics
213 |             forward_statistics = np.squeeze(np.empty((nsamples,sample_dim)))
214 |             for i in range(nsamples):
215 |                 d = self.distribution_class(**hypparam_dict)
216 |                 data = d.rvs(size=data_size)
217 |                 forward_statistics[i] = self.geweke_statistics(d,data)
218 | 
219 |             # collect gibbs-generated statistics
220 |             gibbs_statistics = np.squeeze(np.empty((nsamples,sample_dim)))
221 |             d = self.distribution_class(**hypparam_dict)
222 |             data = d.rvs(size=data_size)
223 |             for i in range(nsamples):
224 |                 d.resample(data,**self.geweke_resample_kwargs)
225 |                 data = d.rvs(size=data_size)
226 |                 gibbs_statistics[i] = self.geweke_statistics(d,data)
227 | 
228 |             testing.populations_eq_quantile_plot(forward_statistics,gibbs_statistics,fig=fig)
229 |             try:
230 |                 sl = self.geweke_numerical_slice(d,setting_idx)
231 |                 testing.assert_populations_eq_moments(
232 |                         forward_statistics[...,sl],gibbs_statistics[...,sl],
233 |                         pval=self.geweke_pval)
234 |             except AssertionError:
235 |                 datapath = os.path.join(os.path.dirname(__file__),'figures',
236 |                         self.__class__.__name__,'setting_%d_trial_%d.npz' % (setting_idx,trial))
237 |                 np.savez(datapath,fwd=forward_statistics,gibbs=gibbs_statistics)
238 |                 example_violating_means = forward_statistics.mean(0), gibbs_statistics.mean(0)
239 |                 num_statistic_fails += 1
240 | 
241 |         plt.savefig(figpath)
242 | 
243 |         assert num_statistic_fails <= self.geweke_num_statistic_fails_to_tolerate, \
244 |                 'Geweke MAY have failed, check FIGURES in %s (e.g. %s vs %s)' \
245 |                 % ((os.path.dirname(figpath),) + example_violating_means)
246 | 
247 | 
248 | ##########
249 | #  misc  #
250 | ##########
251 | 
252 | def mkdir(path):
253 |     # from
254 |     # http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
255 |     import errno
256 |     try:
257 |         os.makedirs(path)
258 |     except OSError as exc:
259 |         if exc.errno == errno.EEXIST and os.path.isdir(path):
260 |             pass
261 |         else: raise
262 | 
263 | 


--------------------------------------------------------------------------------
/pybasicbayes/util/.ctags:
--------------------------------------------------------------------------------
 1 | --exclude=deps
 2 | --exclude=basic/pybasicbayes
 3 | --exclude=util
 4 | --python-kinds=-i
 5 | --recurse=yes
 6 | --exclude=.git
 7 | --exclude=.pyc
 8 | --exclude=.md
 9 | --exclude=DS_Store
10 | 


--------------------------------------------------------------------------------
/pybasicbayes/util/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.pyo
3 | /tags
4 | /build
5 | *.c
6 | /*.so
7 | 


--------------------------------------------------------------------------------
/pybasicbayes/util/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | __all__ = ['general','plot','stats','text']
3 | from . import general, plot, stats, text
4 | 


--------------------------------------------------------------------------------
/pybasicbayes/util/cstats.pyx:
--------------------------------------------------------------------------------
 1 | # distutils: extra_compile_args = -O3 -w
 2 | # cython: boundscheck = False, wraparound = False, cdivision = True
 3 | 
 4 | import numpy as np
 5 | cimport numpy as np
 6 | 
 7 | from libc.stdint cimport int32_t
 8 | from cython cimport floating, integral
 9 | 
10 | from cython.parallel import prange
11 | 
12 | cdef inline int32_t csample_discrete_normalized(floating[::1] distn, floating u):
13 |     cdef int i
14 |     cdef int N = distn.shape[0]
15 |     cdef floating tot = u
16 | 
17 |     for i in range(N):
18 |         tot -= distn[i]
19 |         if tot < 0:
20 |             break
21 | 
22 |     return i
23 | 
24 | def sample_markov(
25 |         int T,
26 |         np.ndarray[floating, ndim=2, mode="c"] trans_matrix,
27 |         np.ndarray[floating, ndim=1, mode="c"] init_state_distn
28 |         ):
29 |     cdef int32_t[::1] out = np.empty(T,dtype=np.int32)
30 |     cdef floating[:,::1] A = trans_matrix / trans_matrix.sum(1)[:,None]
31 |     cdef floating[::1] pi = init_state_distn / init_state_distn.sum()
32 | 
33 |     cdef floating[::1] randseq
34 |     if floating is double:
35 |         randseq = np.random.random(T).astype(np.double)
36 |     else:
37 |         randseq = np.random.random(T).astype(np.float)
38 | 
39 |     cdef int t
40 |     out[0] = csample_discrete_normalized(pi,randseq[0])
41 |     for t in range(1,T):
42 |         out[t] = csample_discrete_normalized(A[out[t-1]],randseq[t])
43 | 
44 |     return np.asarray(out)
45 | 
46 | def sample_crp_tablecounts(
47 |         floating concentration,
48 |         integral[:,:] customers,
49 |         colweights = None,
50 |         ):
51 |     cdef integral[:,::1] _customers = np.require(customers, requirements='C')
52 |     cdef integral[:,::1] m = np.zeros_like(_customers)
53 |     cdef floating[::1] _colweights = np.require(colweights, requirements='C') \
54 |         if colweights is not None else np.ones(customers.shape[1])
55 |     cdef int i, j, k
56 |     cdef integral tot = np.sum(_customers)
57 | 
58 |     cdef floating[::1] randseq
59 |     if floating is double:
60 |         randseq = np.random.random(tot).astype(np.double)
61 |     else:
62 |         randseq = np.random.random(tot).astype(np.float)
63 | 
64 |     tmp = np.empty_like(_customers)
65 |     tmp[0,0] = 0
66 |     tmp.flat[1:] = np.cumsum(np.ravel(customers)[:_customers.size-1],dtype=tmp.dtype)
67 |     cdef integral[:,::1] starts = tmp
68 | 
69 |     with nogil:
70 |         for i in prange(_customers.shape[0]):
71 |             for j in range(_customers.shape[1]):
72 |                 for k in range(_customers[i,j]):
73 |                     m[i,j] += randseq[starts[i,j]+k] \
74 |                         < (concentration * _colweights[j]) / (k+concentration*_colweights[j])
75 | 
76 |     return np.asarray(m)
77 | 
78 | 


--------------------------------------------------------------------------------
/pybasicbayes/util/cyutil.py:
--------------------------------------------------------------------------------
  1 | from builtins import map
  2 | from builtins import str
  3 | import Cython.Build
  4 | from Cython.Build.Dependencies import *
  5 | 
  6 | # NOTE: mostly a copy of cython's create_extension_list except for the lines
  7 | # surrounded by "begin matt added" / "end matt added"
  8 | def create_extension_list(patterns, exclude=[], ctx=None, aliases=None, quiet=False, language=None,
  9 |                           exclude_failures=False):
 10 |     if not isinstance(patterns, (list, tuple)):
 11 |         patterns = [patterns]
 12 |     explicit_modules = set([m.name for m in patterns if isinstance(m, Extension)])
 13 |     seen = set()
 14 |     deps = create_dependency_tree(ctx, quiet=quiet)
 15 |     to_exclude = set()
 16 |     if not isinstance(exclude, list):
 17 |         exclude = [exclude]
 18 |     for pattern in exclude:
 19 |         to_exclude.update(list(map(os.path.abspath, extended_iglob(pattern))))
 20 | 
 21 |     module_list = []
 22 |     for pattern in patterns:
 23 |         if isinstance(pattern, str):
 24 |             filepattern = pattern
 25 |             template = None
 26 |             name = '*'
 27 |             base = None
 28 |             exn_type = Extension
 29 |             ext_language = language
 30 |         elif isinstance(pattern, Extension):
 31 |             for filepattern in pattern.sources:
 32 |                 if os.path.splitext(filepattern)[1] in ('.py', '.pyx'):
 33 |                     break
 34 |             else:
 35 |                 # ignore non-cython modules
 36 |                 module_list.append(pattern)
 37 |                 continue
 38 |             template = pattern
 39 |             name = template.name
 40 |             base = DistutilsInfo(exn=template)
 41 |             exn_type = template.__class__
 42 |             ext_language = None  # do not override whatever the Extension says
 43 |         else:
 44 |             raise TypeError(pattern)
 45 | 
 46 |         for file in extended_iglob(filepattern):
 47 |             if os.path.abspath(file) in to_exclude:
 48 |                 continue
 49 |             pkg = deps.package(file)
 50 |             if '*' in name:
 51 |                 module_name = deps.fully_qualified_name(file)
 52 |                 if module_name in explicit_modules:
 53 |                     continue
 54 |             else:
 55 |                 module_name = name
 56 | 
 57 |             if module_name not in seen:
 58 |                 try:
 59 |                     kwds = deps.distutils_info(file, aliases, base).values
 60 |                 except Exception:
 61 |                     if exclude_failures:
 62 |                         continue
 63 |                     raise
 64 |                 if base is not None:
 65 |                     for key, value in list(base.values.items()):
 66 |                         if key not in kwds:
 67 |                             kwds[key] = value
 68 | 
 69 |                 sources = [file]
 70 |                 if template is not None:
 71 |                     sources += [m for m in template.sources if m != filepattern]
 72 |                 if 'sources' in kwds:
 73 |                     # allow users to add .c files etc.
 74 |                     for source in kwds['sources']:
 75 |                         source = encode_filename_in_py2(source)
 76 |                         if source not in sources:
 77 |                             sources.append(source)
 78 |                     del kwds['sources']
 79 |                 if 'depends' in kwds:
 80 |                     depends = resolve_depends(kwds['depends'], (kwds.get('include_dirs') or []) + [find_root_package_dir(file)])
 81 |                     if template is not None:
 82 |                         # Always include everything from the template.
 83 |                         depends = list(set(template.depends).union(set(depends)))
 84 |                     kwds['depends'] = depends
 85 | 
 86 |                 if ext_language and 'language' not in kwds:
 87 |                     kwds['language'] = ext_language
 88 | 
 89 |                 # NOTE: begin matt added
 90 |                 if 'name' in kwds:
 91 |                     module_name = str(kwds['name'])
 92 |                     del kwds['name']
 93 |                 else:
 94 |                     module_name = os.path.splitext(file)[0].replace('/','.')
 95 |                 # NOTE: end matt added
 96 |                 module_list.append(exn_type(
 97 |                         name=module_name,
 98 |                         sources=sources,
 99 |                         **kwds))
100 |                 m = module_list[-1]
101 |                 seen.add(name)
102 |     return module_list
103 | 
104 | true_cythonize = Cython.Build.cythonize
105 | true_create_extension_list = Cython.Build.Dependencies.create_extension_list
106 | 
107 | def cythonize(*args,**kwargs):
108 |     Cython.Build.Dependencies.create_extension_list = create_extension_list
109 |     out = true_cythonize(*args,**kwargs)
110 |     Cython.Build.Dependencies.create_extension_list = true_create_extension_list
111 |     return out
112 | 
113 | 


--------------------------------------------------------------------------------
/pybasicbayes/util/general.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from future import standard_library
  3 | standard_library.install_aliases()
  4 | from builtins import next
  5 | from builtins import zip
  6 | from builtins import range
  7 | import sys
  8 | import numpy as np
  9 | from numpy.lib.stride_tricks import as_strided as ast
 10 | import scipy.linalg
 11 | import scipy.linalg.lapack as lapack
 12 | import copy, collections, os, shutil, hashlib
 13 | from contextlib import closing
 14 | from itertools import chain, count
 15 | from functools import reduce
 16 | from urllib.request import urlopen  # py2.7 covered by standard_library.install_aliases()
 17 | 
 18 | 
 19 | def blockarray(*args,**kwargs):
 20 |     return np.array(np.bmat(*args,**kwargs),copy=False)
 21 | 
 22 | def interleave(*iterables):
 23 |     return list(chain.from_iterable(zip(*iterables)))
 24 | 
 25 | def joindicts(dicts):
 26 |     # stuff on right clobbers stuff on left
 27 |     return reduce(lambda x,y: dict(x,**y), dicts, {})
 28 | 
 29 | def one_vs_all(stuff):
 30 |     stuffset = set(stuff)
 31 |     for thing in stuff:
 32 |         yield thing, stuffset - set([thing])
 33 | 
 34 | def rle(stateseq):
 35 |     pos, = np.where(np.diff(stateseq) != 0)
 36 |     pos = np.concatenate(([0],pos+1,[len(stateseq)]))
 37 |     return stateseq[pos[:-1]], np.diff(pos)
 38 | 
 39 | def irle(vals,lens):
 40 |     out = np.empty(np.sum(lens))
 41 |     for v,l,start in zip(vals,lens,np.concatenate(((0,),np.cumsum(lens)[:-1]))):
 42 |         out[start:start+l] = v
 43 |     return out
 44 | 
 45 | def ibincount(counts):
 46 |     'returns an array a such that counts = np.bincount(a)'
 47 |     return np.repeat(np.arange(counts.shape[0]),counts)
 48 | 
 49 | def cumsum(v,strict=False):
 50 |     if not strict:
 51 |         return np.cumsum(v,axis=0)
 52 |     else:
 53 |         out = np.zeros_like(v)
 54 |         out[1:] = np.cumsum(v[:-1],axis=0)
 55 |         return out
 56 | 
 57 | def rcumsum(v,strict=False):
 58 |     if not strict:
 59 |         return np.cumsum(v[::-1],axis=0)[::-1]
 60 |     else:
 61 |         out = np.zeros_like(v)
 62 |         out[:-1] = np.cumsum(v[-1:0:-1],axis=0)[::-1]
 63 |         return out
 64 | 
 65 | def delta_like(v,i):
 66 |     out = np.zeros_like(v)
 67 |     out[i] = 1
 68 |     return out
 69 | 
 70 | def deepcopy(obj):
 71 |     return copy.deepcopy(obj)
 72 | 
 73 | def nice_indices(arr):
 74 |     '''
 75 |     takes an array like [1,1,5,5,5,999,1,1]
 76 |     and maps to something like [0,0,1,1,1,2,0,0]
 77 |     modifies original in place as well as returns a ref
 78 |     '''
 79 |     # surprisingly, this is slower for very small (and very large) inputs:
 80 |     # u,f,i = np.unique(arr,return_index=True,return_inverse=True)
 81 |     # arr[:] = np.arange(u.shape[0])[np.argsort(f)][i]
 82 |     ids = collections.defaultdict(count().__next__)
 83 |     for idx,x in enumerate(arr):
 84 |         arr[idx] = ids[x]
 85 |     return arr
 86 | 
 87 | def ndargmax(arr):
 88 |     return np.unravel_index(np.argmax(np.ravel(arr)),arr.shape)
 89 | 
 90 | def match_by_overlap(a,b):
 91 |     assert a.ndim == b.ndim == 1 and a.shape[0] == b.shape[0]
 92 |     ais, bjs = list(set(a)), list(set(b))
 93 |     scores = np.zeros((len(ais),len(bjs)))
 94 |     for i,ai in enumerate(ais):
 95 |         for j,bj in enumerate(bjs):
 96 |             scores[i,j] = np.dot(np.array(a==ai,dtype=np.float),b==bj)
 97 | 
 98 |     flip = len(bjs) > len(ais)
 99 | 
100 |     if flip:
101 |         ais, bjs = bjs, ais
102 |         scores = scores.T
103 | 
104 |     matching = []
105 |     while scores.size > 0:
106 |         i,j = ndargmax(scores)
107 |         matching.append((ais[i],bjs[j]))
108 |         scores = np.delete(np.delete(scores,i,0),j,1)
109 |         ais = np.delete(ais,i)
110 |         bjs = np.delete(bjs,j)
111 | 
112 |     return matching if not flip else [(x,y) for y,x in matching]
113 | 
114 | def hamming_error(a,b):
115 |     return (a!=b).sum()
116 | 
117 | def scoreatpercentile(data,per,axis=0):
118 |     'like the function in scipy.stats but with an axis argument and works on arrays'
119 |     a = np.sort(data,axis=axis)
120 |     idx = per/100. * (data.shape[axis]-1)
121 | 
122 |     if (idx % 1 == 0):
123 |         return a[[slice(None) if ii != axis else idx for ii in range(a.ndim)]]
124 |     else:
125 |         lowerweight = 1-(idx % 1)
126 |         upperweight = (idx % 1)
127 |         idx = int(np.floor(idx))
128 |         return lowerweight * a[[slice(None) if ii != axis else idx for ii in range(a.ndim)]] \
129 |                 + upperweight * a[[slice(None) if ii != axis else idx+1 for ii in range(a.ndim)]]
130 | 
131 | def stateseq_hamming_error(sampledstates,truestates):
132 |     sampledstates = np.array(sampledstates,ndmin=2).copy()
133 | 
134 |     errors = np.zeros(sampledstates.shape[0])
135 |     for idx,s in enumerate(sampledstates):
136 |         # match labels by maximum overlap
137 |         matching = match_by_overlap(s,truestates)
138 |         s2 = s.copy()
139 |         for i,j in matching:
140 |             s2[s==i] = j
141 |         errors[idx] = hamming_error(s2,truestates)
142 | 
143 |     return errors if errors.shape[0] > 1 else errors[0]
144 | 
145 | def _sieve(stream):
146 |     # just for fun; doesn't work over a few hundred
147 |     val = next(stream)
148 |     yield val
149 |     for x in [x for x in _sieve(stream) if x % val != 0]:
150 |         yield x
151 | 
152 | def primes():
153 |     return _sieve(count(2))
154 | 
155 | def top_eigenvector(A,niter=1000,force_iteration=False):
156 |     '''
157 |     assuming the LEFT invariant subspace of A corresponding to the LEFT
158 |     eigenvalue of largest modulus has geometric multiplicity of 1 (trivial
159 |     Jordan block), returns the vector at the intersection of that eigenspace and
160 |     the simplex
161 | 
162 |     A should probably be a ROW-stochastic matrix
163 | 
164 |     probably uses power iteration
165 |     '''
166 |     n = A.shape[0]
167 |     np.seterr(invalid='raise',divide='raise')
168 |     if n <= 25 and not force_iteration:
169 |         x = np.repeat(1./n,n)
170 |         x = np.linalg.matrix_power(A.T,niter).dot(x)
171 |         x /= x.sum()
172 |         return x
173 |     else:
174 |         x1 = np.repeat(1./n,n)
175 |         x2 = x1.copy()
176 |         for itr in range(niter):
177 |             np.dot(A.T,x1,out=x2)
178 |             x2 /= x2.sum()
179 |             x1,x2 = x2,x1
180 |             if np.linalg.norm(x1-x2) < 1e-8:
181 |                 break
182 |         return x1
183 | 
184 | def engine_global_namespace(f):
185 |     # see IPython.parallel.util.interactive; it's copied here so as to avoid
186 |     # extra imports/dependences elsewhere, and to provide a slightly clearer
187 |     # name
188 |     f.__module__ = '__main__'
189 |     return f
190 | 
191 | def block_view(a,block_shape):
192 |     shape = (a.shape[0]/block_shape[0],a.shape[1]/block_shape[1]) + block_shape
193 |     strides = (a.strides[0]*block_shape[0],a.strides[1]*block_shape[1]) + a.strides
194 |     return ast(a,shape=shape,strides=strides)
195 | 
196 | def AR_striding(data,nlags):
197 |     data = np.asarray(data)
198 |     if not data.flags.c_contiguous:
199 |         data = data.copy(order='C')
200 |     if data.ndim == 1:
201 |         data = np.reshape(data,(-1,1))
202 |     sz = data.dtype.itemsize
203 |     return ast(
204 |             data,
205 |             shape=(data.shape[0]-nlags,data.shape[1]*(nlags+1)),
206 |             strides=(data.shape[1]*sz,sz))
207 | 
208 | def count_transitions(stateseq,minlength=None):
209 |     if minlength is None:
210 |         minlength = stateseq.max() + 1
211 |     out = np.zeros((minlength,minlength),dtype=np.int32)
212 |     for a,b in zip(stateseq[:-1],stateseq[1:]):
213 |         out[a,b] += 1
214 |     return out
215 | 
216 | ### SGD
217 | 
218 | def sgd_steps(tau,kappa):
219 |     assert 0.5 < kappa <= 1 and tau >= 0
220 |     for t in count(1):
221 |         yield (t+tau)**(-kappa)
222 | 
223 | def hold_out(datalist,frac):
224 |     N = len(datalist)
225 |     perm = np.random.permutation(N)
226 |     split = int(np.ceil(frac * N))
227 |     return [datalist[i] for i in perm[split:]], [datalist[i] for i in perm[:split]]
228 | 
229 | def sgd_passes(tau,kappa,datalist,minibatchsize=1,npasses=1):
230 |     N = len(datalist)
231 | 
232 |     for superitr in range(npasses):
233 |         if minibatchsize == 1:
234 |             perm = np.random.permutation(N)
235 |             for idx, rho_t in zip(perm,sgd_steps(tau,kappa)):
236 |                 yield datalist[idx], rho_t
237 |         else:
238 |             minibatch_indices = np.array_split(np.random.permutation(N),N/minibatchsize)
239 |             for indices, rho_t in zip(minibatch_indices,sgd_steps(tau,kappa)):
240 |                 yield [datalist[idx] for idx in indices], rho_t
241 | 
242 | def sgd_sampling(tau,kappa,datalist,minibatchsize=1):
243 |     N = len(datalist)
244 |     if minibatchsize == 1:
245 |         for rho_t in sgd_steps(tau,kappa):
246 |             minibatch_index = np.random.choice(N)
247 |             yield datalist[minibatch_index], rho_t
248 |     else:
249 |         for rho_t in sgd_steps(tau,kappa):
250 |             minibatch_indices = np.random.choice(N,size=minibatchsize,replace=False)
251 |             yield [datalist[idx] for idx in minibatch_indices], rho_t
252 | 
253 | # TODO should probably eliminate this function
254 | def minibatchsize(lst):
255 |     return float(sum(d.shape[0] for d in lst))
256 | 
257 | ### misc
258 | 
259 | def random_subset(lst,sz):
260 |     perm = np.random.permutation(len(lst))
261 |     return [lst[perm[idx]] for idx in range(sz)]
262 | 
263 | def get_file(remote_url,local_path):
264 |     if not os.path.isfile(local_path):
265 |         with closing(urlopen(remote_url)) as remotefile:
266 |             with open(local_path,'wb') as localfile:
267 |                 shutil.copyfileobj(remotefile,localfile)
268 | 
269 | def list_split(lst,num):
270 |     assert num > 0
271 |     return [lst[start::num] for start in range(num)]
272 | 
273 | def ndarrayhash(v):
274 |     assert isinstance(v,np.ndarray)
275 |     return hashlib.sha1(v).hexdigest()
276 | 
277 | ### numerical linear algebra
278 | 
279 | def inv_psd(A, return_chol=False):
280 |     L = np.linalg.cholesky(A)
281 |     Ainv = lapack.dpotri(L, lower=True)[0]
282 |     copy_lower_to_upper(Ainv)
283 |     # if not np.allclose(Ainv, np.linalg.inv(A), rtol=1e-5, atol=1e-5):
284 |     #     import ipdb; ipdb.set_trace()
285 |     if return_chol:
286 |         return Ainv, L
287 |     else:
288 |         return Ainv
289 | 
290 | def solve_psd(A,b,chol=None,lower=True,overwrite_b=False,overwrite_A=False):
291 |     if chol is None:
292 |         return lapack.dposv(A,b,overwrite_b=overwrite_b,overwrite_a=overwrite_A)[1]
293 |     else:
294 |         return lapack.dpotrs(chol,b,lower,overwrite_b)[0]
295 | 
296 | def copy_lower_to_upper(A):
297 |     A += np.tril(A,k=-1).T
298 | 
299 | 
300 | # NOTE: existing numpy object array construction acts a bit weird, e.g.
301 | # np.array([randn(3,4),randn(3,5)]) vs np.array([randn(3,4),randn(5,3)])
302 | # this wrapper class is just meant to ensure that when ndarrays of objects are
303 | # constructed the construction doesn't "recurse" as in the first example
304 | class ObjArray(np.ndarray):
305 |     def __new__(cls,lst):
306 |         if isinstance(lst,(np.ndarray,float,int)):
307 |             return lst
308 |         else:
309 |             return np.ndarray.__new__(cls,len(lst),dtype=np.object)
310 | 
311 |     def __init__(self,lst):
312 |         if not isinstance(lst,(np.ndarray,float,int)):
313 |             for i, elt in enumerate(lst):
314 |                 self[i] = self.__class__(elt)
315 | 
316 | # Here's an alternative to ObjArray: just construct an obj array from a list
317 | def objarray(lst):
318 |     a = np.empty(len(lst), dtype=object)
319 |     for i,o in enumerate(lst):
320 |         a[i] = o
321 |     return a
322 | 
323 | def all_none(*args):
324 |     return all(_ is None for _ in args)
325 | 
326 | def any_none(*args):
327 |     return any(_ is None for _ in args)
328 | 
329 | 


--------------------------------------------------------------------------------
/pybasicbayes/util/plot.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from builtins import range
 3 | import numpy as np
 4 | from matplotlib import pyplot as plt
 5 | 
 6 | def plot_gaussian_2D(mu, lmbda, color='b', centermarker=True,label='',alpha=1.,ax=None,artists=None):
 7 |     '''
 8 |     Plots mean and cov ellipsoid into current axes. Must be 2D. lmbda is a covariance matrix.
 9 |     '''
10 |     assert len(mu) == 2
11 |     ax = ax if ax else plt.gca()
12 | 
13 |     # TODO if update alpha=0. and our previous alpha is 0., we don't need to
14 |     # dirty the artist
15 | 
16 |     t = np.hstack([np.arange(0,2*np.pi,0.01),0])
17 |     circle = np.vstack([np.sin(t),np.cos(t)])
18 |     ellipse = np.dot(np.linalg.cholesky(lmbda),circle)
19 | 
20 |     if artists is None:
21 |         point = ax.scatter([mu[0]],[mu[1]],marker='D',color=color,s=4,alpha=alpha) \
22 |                 if centermarker else None
23 |         line, = ax.plot(ellipse[0,:] + mu[0], ellipse[1,:] + mu[1],linestyle='-',
24 |                 linewidth=2,color=color,label=label,alpha=alpha)
25 |     else:
26 |         line, point = artists
27 |         if centermarker:
28 |             point.set_offsets(np.atleast_2d(mu))
29 |             point.set_alpha(alpha)
30 |             point.set_color(color)
31 |         line.set_xdata(ellipse[0,:] + mu[0])
32 |         line.set_ydata(ellipse[1,:] + mu[1])
33 |         line.set_alpha(alpha)
34 |         line.set_color(color)
35 | 
36 |     return (line, point) if point else (line,)
37 | 
38 | 
39 | def plot_gaussian_projection(mu, lmbda, vecs, **kwargs):
40 |     '''
41 |     Plots a ndim gaussian projected onto 2D vecs, where vecs is a matrix whose two columns
42 |     are the subset of some orthonomral basis (e.g. from PCA on samples).
43 |     '''
44 |     return plot_gaussian_2D(project_data(mu,vecs),project_ellipsoid(lmbda,vecs),**kwargs)
45 | 
46 | 
47 | def pca_project_data(data,num_components=2):
48 |     # convenience combination of the next two functions
49 |     return project_data(data,pca(data,num_components=num_components))
50 | 
51 | 
52 | def pca(data,num_components=2):
53 |     U,s,Vh = np.linalg.svd(data - np.mean(data,axis=0))
54 |     return Vh.T[:,:num_components]
55 | 
56 | 
57 | def project_data(data,vecs):
58 |     return np.dot(data,vecs.T)
59 | 
60 | 
61 | def project_ellipsoid(ellipsoid,vecs):
62 |     # vecs is a matrix whose columns are a subset of an orthonormal basis
63 |     # ellipsoid is a pos def matrix
64 |     return np.dot(vecs,np.dot(ellipsoid,vecs.T))
65 | 
66 | 
67 | def subplot_gridsize(num):
68 |     return sorted(min([(x,int(np.ceil(num/x))) for x in range(1,int(np.floor(np.sqrt(num)))+1)],key=sum))
69 | 


--------------------------------------------------------------------------------
/pybasicbayes/util/profiling.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from __future__ import print_function
 3 | from future import standard_library
 4 | standard_library.install_aliases()
 5 | import numpy as np
 6 | import sys, io, inspect, os, functools, time, collections
 7 | 
 8 | ### use @timed for really basic timing
 9 | 
10 | _timings = collections.defaultdict(list)
11 | 
12 | def timed(func):
13 |     @functools.wraps(func)
14 |     def wrapped(*args,**kwargs):
15 |         tic = time.time()
16 |         out = func(*args,**kwargs)
17 |         _timings[func].append(time.time() - tic)
18 |         return out
19 |     return wrapped
20 | 
21 | def show_timings(stream=None):
22 |     if stream is None:
23 |         stream = sys.stdout
24 |     if len(_timings) > 0:
25 |         results = [(inspect.getsourcefile(f),f.__name__,
26 |             len(vals),np.sum(vals),np.mean(vals),np.std(vals))
27 |             for f, vals in _timings.items()]
28 |         filename_lens = max(len(filename) for filename, _, _, _, _, _ in results)
29 |         name_lens = max(len(name) for _, name, _, _, _, _ in results)
30 | 
31 |         fmt = '{:>%d} {:>%d} {:>10} {:>10} {:>10} {:>10}' % (filename_lens, name_lens)
32 |         print(fmt.format('file','name','ncalls','tottime','avg time','std dev'), file=stream)
33 | 
34 |         fmt = '{:>%d} {:>%d} {:>10} {:>10.3} {:>10.3} {:>10.3}' % (filename_lens, name_lens)
35 |         print('\n'.join(fmt.format(*tup) for tup in sorted(results)), file=stream)
36 | 
37 | ### use @line_profiled for a thin wrapper around line_profiler
38 | 
39 | try:
40 |     import line_profiler
41 |     _prof = line_profiler.LineProfiler()
42 | 
43 |     def line_profiled(func):
44 |         mod = inspect.getmodule(func)
45 |         if 'PROFILING' in os.environ or (hasattr(mod,'PROFILING') and mod.PROFILING):
46 |             return _prof(func)
47 |         return func
48 | 
49 |     def show_line_stats(stream=None):
50 |         _prof.print_stats(stream=stream)
51 | except ImportError:
52 |     line_profiled = lambda x: x
53 | 
54 | 


--------------------------------------------------------------------------------
/pybasicbayes/util/stats.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import absolute_import
  3 | from builtins import range
  4 | import numpy as np
  5 | from numpy.random import random
  6 | na = np.newaxis
  7 | import scipy.stats as stats
  8 | import scipy.special as special
  9 | import scipy.linalg
 10 | from scipy.special import logsumexp
 11 | from numpy.core.umath_tests import inner1d
 12 | 
 13 | from .general import any_none, blockarray
 14 | 
 15 | ### data abstraction
 16 | 
 17 | # the data type is ndarrays OR lists of ndarrays
 18 | # type Data = ndarray | [ndarray]
 19 | 
 20 | def atleast_2d(data):
 21 |     # NOTE: can't use np.atleast_2d because if it's 1D we want axis 1 to be the
 22 |     # singleton and axis 0 to be the sequence index
 23 |     if data.ndim == 1:
 24 |         return data.reshape((-1,1))
 25 |     return data
 26 | 
 27 | def mask_data(data):
 28 |     return np.ma.masked_array(
 29 |         np.nan_to_num(data),np.isnan(data),fill_value=0.,hard_mask=True)
 30 | 
 31 | def gi(data):
 32 |     out = (np.isnan(atleast_2d(data)).sum(1) == 0).ravel()
 33 |     return out if len(out) != 0 else None
 34 | 
 35 | def getdatasize(data):
 36 |     if isinstance(data,np.ma.masked_array):
 37 |         return data.shape[0] - data.mask.reshape((data.shape[0],-1))[:,0].sum()
 38 |     elif isinstance(data,np.ndarray):
 39 |         if len(data) == 0:
 40 |             return 0
 41 |         return data[gi(data)].shape[0]
 42 |     elif isinstance(data,list):
 43 |         return sum(getdatasize(d) for d in data)
 44 |     else:
 45 |         # handle unboxed case for convenience
 46 |         assert isinstance(data,int) or isinstance(data,float)
 47 |         return 1
 48 | 
 49 | def getdatadimension(data):
 50 |     if isinstance(data,np.ndarray):
 51 |         assert data.ndim > 1
 52 |         return data.shape[1]
 53 |     elif isinstance(data,list):
 54 |         assert len(data) > 0
 55 |         return getdatadimension(data[0])
 56 |     else:
 57 |         # handle unboxed case for convenience
 58 |         assert isinstance(data,int) or isinstance(data,float)
 59 |         return 1
 60 | 
 61 | def combinedata(datas):
 62 |     ret = []
 63 |     for data in datas:
 64 |         if isinstance(data,np.ma.masked_array):
 65 |             ret.append(np.ma.compress_rows(data))
 66 |         if isinstance(data,np.ndarray):
 67 |             ret.append(data)
 68 |         elif isinstance(data,list):
 69 |             ret.extend(combinedata(data))
 70 |         else:
 71 |             # handle unboxed case for convenience
 72 |             assert isinstance(data,int) or isinstance(data,float)
 73 |             ret.append(np.atleast_1d(data))
 74 |     return ret
 75 | 
 76 | def flattendata(data):
 77 |     # data is either an array (possibly a maskedarray) or a list of arrays
 78 |     if isinstance(data,np.ndarray):
 79 |         return data
 80 |     elif isinstance(data,list) or isinstance(data,tuple):
 81 |         if any(isinstance(d,np.ma.MaskedArray) for d in data):
 82 |             return np.concatenate([np.ma.compress_rows(d) for d in data])
 83 |         else:
 84 |             return np.concatenate(data)
 85 |     else:
 86 |         # handle unboxed case for convenience
 87 |         assert isinstance(data,int) or isinstance(data,float)
 88 |         return np.atleast_1d(data)
 89 | 
 90 | ### misc
 91 | def update_param(oldv, newv, stepsize):
 92 |     return oldv * (1 - stepsize) + newv * stepsize
 93 | 
 94 | 
 95 | def cov(a):
 96 |     # return np.cov(a,rowvar=0,bias=1)
 97 |     mu = a.mean(0)
 98 |     if isinstance(a,np.ma.MaskedArray):
 99 |         return np.ma.dot(a.T,a)/a.count(0)[0] - np.ma.outer(mu,mu)
100 |     else:
101 |         return a.T.dot(a)/a.shape[0] - np.outer(mu,mu)
102 | 
103 | def normal_cdf(x, mu=0.0, sigma=1.0):
104 |     z = (x - mu) / sigma
105 |     return 0.5 * special.erfc(-z / np.sqrt(2))
106 | 
107 | 
108 | ### Sampling functions
109 | 
110 | def sample_gaussian(mu=None,Sigma=None,J=None,h=None):
111 |     mean_params = mu is not None and Sigma is not None
112 |     info_params = J is not None and h is not None
113 |     assert mean_params or info_params
114 | 
115 |     if not any_none(mu,Sigma):
116 |         return np.random.multivariate_normal(mu,Sigma)
117 |     else:
118 |         from scipy.linalg.lapack import dpotrs
119 |         L = np.linalg.cholesky(J)
120 |         x = np.random.randn(h.shape[0])
121 |         return scipy.linalg.solve_triangular(L,x,lower=True,trans='T') \
122 |             + dpotrs(L,h,lower=True)[0]
123 | 
124 | def sample_truncated_gaussian(mu=0, sigma=1, lb=-np.Inf, ub=np.Inf):
125 |     """
126 |     Sample a truncated normal with the specified params. This
127 |     is not the most stable way but it works as long as the
128 |     truncation region is not too far from the mean.
129 |     """
130 |     # Broadcast arrays to be of the same shape
131 |     mu, sigma, lb, ub = np.broadcast_arrays(mu, sigma, lb, ub)
132 |     shp = mu.shape
133 |     if np.allclose(sigma, 0.0):
134 |         return mu
135 | 
136 |     cdflb = normal_cdf(lb, mu, sigma)
137 |     cdfub = normal_cdf(ub, mu, sigma)
138 | 
139 |     # Sample uniformly from the CDF
140 |     cdfsamples = cdflb + np.random.rand(*shp) * (cdfub-cdflb)
141 | 
142 |     # Clip the CDF samples so that we can invert them
143 |     cdfsamples = np.clip(cdfsamples, 1e-15, 1-1e-15)
144 |     zs = -np.sqrt(2) * special.erfcinv(2 * cdfsamples)
145 | 
146 |     # Transform the standard normal samples
147 |     xs = sigma * zs + mu
148 |     xs = np.clip(xs, lb, ub)
149 | 
150 |     return xs
151 | 
152 | def sample_discrete(distn,size=[],dtype=np.int32):
153 |     'samples from a one-dimensional finite pmf'
154 |     distn = np.atleast_1d(distn)
155 |     assert (distn >=0).all() and distn.ndim == 1
156 |     if (0 == distn).all():
157 |         return np.random.randint(distn.shape[0],size=size)
158 |     cumvals = np.cumsum(distn)
159 |     return np.sum(np.array(random(size))[...,na] * cumvals[-1] > cumvals, axis=-1,dtype=dtype)
160 | 
161 | def sample_discrete_from_log(p_log,return_lognorms=False,axis=0,dtype=np.int32):
162 |     'samples log probability array along specified axis'
163 |     lognorms = logsumexp(p_log,axis=axis)
164 |     cumvals = np.exp(p_log - np.expand_dims(lognorms,axis)).cumsum(axis)
165 |     thesize = np.array(p_log.shape)
166 |     thesize[axis] = 1
167 |     randvals = random(size=thesize) * \
168 |             np.reshape(cumvals[[slice(None) if i is not axis else -1
169 |                 for i in range(p_log.ndim)]],thesize)
170 |     samples = np.sum(randvals > cumvals,axis=axis,dtype=dtype)
171 |     if return_lognorms:
172 |         return samples, lognorms
173 |     else:
174 |         return samples
175 | 
176 | def sample_markov(T,trans_matrix,init_state_distn):
177 |     out = np.empty(T,dtype=np.int32)
178 |     out[0] = sample_discrete(init_state_distn)
179 |     for t in range(1,T):
180 |         out[t] = sample_discrete(trans_matrix[out[t-1]])
181 |     return out
182 | 
183 | def sample_invgamma(alpha, beta):
184 |     return 1./np.random.gamma(alpha, 1./beta)
185 | 
186 | def niw_expectedstats(nu, S, m, kappa):
187 |     D = m.shape[0]
188 | 
189 |     # TODO speed this up with cholesky of S
190 |     E_J = nu * np.linalg.inv(S)
191 |     E_h = nu * np.linalg.solve(S,m)
192 |     E_muJmuT = D/kappa + m.dot(E_h)
193 |     E_logdetSigmainv = special.digamma((nu-np.arange(D))/2.).sum() \
194 |         + D*np.log(2.) - np.linalg.slogdet(S)[1]
195 | 
196 |     return E_J, E_h, E_muJmuT, E_logdetSigmainv
197 | 
198 | 
199 | def sample_niw(mu,lmbda,kappa,nu):
200 |     '''
201 |     Returns a sample from the normal/inverse-wishart distribution, conjugate
202 |     prior for (simultaneously) unknown mean and unknown covariance in a
203 |     Gaussian likelihood model. Returns covariance.
204 |     '''
205 |     # code is based on Matlab's method
206 |     # reference: p. 87 in Gelman's Bayesian Data Analysis
207 |     assert nu > lmbda.shape[0] and kappa > 0
208 | 
209 |     # first sample Sigma ~ IW(lmbda,nu)
210 |     lmbda = sample_invwishart(lmbda,nu)
211 |     # then sample mu | Lambda ~ N(mu, Lambda/kappa)
212 |     mu = np.random.multivariate_normal(mu,lmbda / kappa)
213 | 
214 |     return mu, lmbda
215 | 
216 | def sample_invwishart(S,nu):
217 |     # TODO make a version that returns the cholesky
218 |     # TODO allow passing in chol/cholinv of matrix parameter lmbda
219 |     # TODO lowmem! memoize! dchud (eigen?)
220 |     n = S.shape[0]
221 |     chol = np.linalg.cholesky(S)
222 | 
223 |     if (nu <= 81+n) and (nu == np.round(nu)):
224 |         x = np.random.randn(int(nu),n)
225 |     else:
226 |         x = np.diag(np.sqrt(np.atleast_1d(stats.chi2.rvs(nu-np.arange(n)))))
227 |         x[np.triu_indices_from(x,1)] = np.random.randn(n*(n-1)//2)
228 |     R = np.linalg.qr(x,'r')
229 |     T = scipy.linalg.solve_triangular(R.T,chol.T,lower=True).T
230 |     return np.dot(T,T.T)
231 | 
232 | def sample_wishart(sigma, nu):
233 |     n = sigma.shape[0]
234 |     chol = np.linalg.cholesky(sigma)
235 | 
236 |     # use matlab's heuristic for choosing between the two different sampling schemes
237 |     if (nu <= 81+n) and (nu == round(nu)):
238 |         # direct
239 |         X = np.dot(chol,np.random.normal(size=(n,nu)))
240 |     else:
241 |         A = np.diag(np.sqrt(np.random.chisquare(nu - np.arange(n))))
242 |         A[np.tri(n,k=-1,dtype=bool)] = np.random.normal(size=(n*(n-1)/2.))
243 |         X = np.dot(chol,A)
244 | 
245 |     return np.dot(X,X.T)
246 | 
247 | def sample_mn(M, U=None, Uinv=None, V=None, Vinv=None):
248 |     assert (U is None) ^ (Uinv is None)
249 |     assert (V is None) ^ (Vinv is None)
250 | 
251 |     G = np.random.normal(size=M.shape)
252 | 
253 |     if U is not None:
254 |         G = np.dot(np.linalg.cholesky(U),G)
255 |     else:
256 |         G = np.linalg.solve(np.linalg.cholesky(Uinv).T,G)
257 | 
258 |     if V is not None:
259 |         G = np.dot(G,np.linalg.cholesky(V).T)
260 |     else:
261 |         G = np.linalg.solve(np.linalg.cholesky(Vinv).T,G.T).T
262 | 
263 |     return M + G
264 | 
265 | def sample_mniw(nu, S, M, K=None, Kinv=None):
266 |     assert (K is None) ^ (Kinv is None)
267 |     Sigma = sample_invwishart(S,nu)
268 |     if K is not None:
269 |         return sample_mn(M=M,U=Sigma,V=K), Sigma
270 |     else:
271 |         return sample_mn(M=M,U=Sigma,Vinv=Kinv), Sigma
272 | 
273 | def mniw_expectedstats(nu, S, M, K=None, Kinv=None):
274 |     # NOTE: could speed this up with chol factorizing S, not re-solving
275 |     assert (K is None) ^ (Kinv is None)
276 |     m = M.shape[0]
277 |     K = K if K is not None else np.linalg.inv(Kinv)
278 | 
279 |     E_Sigmainv = nu*np.linalg.inv(S)
280 |     E_Sigmainv_A = nu*np.linalg.solve(S,M)
281 |     E_AT_Sigmainv_A = m*K + nu*M.T.dot(np.linalg.solve(S,M))
282 |     E_logdetSigmainv = special.digamma((nu-np.arange(m))/2.).sum() \
283 |         + m*np.log(2) - np.linalg.slogdet(S)[1]
284 | 
285 |     return E_Sigmainv, E_Sigmainv_A, E_AT_Sigmainv_A, E_logdetSigmainv
286 | 
287 | def mniw_log_partitionfunction(nu, S, M, K):
288 |     n = M.shape[0]
289 |     return n*nu/2*np.log(2) + special.multigammaln(nu/2., n) \
290 |         - nu/2*np.linalg.slogdet(S)[1] - n/2*np.linalg.slogdet(K)[1]
291 | 
292 | def sample_pareto(x_m,alpha):
293 |     return x_m + np.random.pareto(alpha)
294 | 
295 | def sample_crp_tablecounts(concentration,customers,colweights):
296 |     m = np.zeros_like(customers)
297 |     tot = customers.sum()
298 |     randseq = np.random.random(tot)
299 | 
300 |     starts = np.empty_like(customers)
301 |     starts[0,0] = 0
302 |     starts.flat[1:] = np.cumsum(np.ravel(customers)[:customers.size-1])
303 | 
304 |     for (i,j), n in np.ndenumerate(customers):
305 |         w = colweights[j]
306 |         for k in range(n):
307 |             m[i,j] += randseq[starts[i,j]+k] \
308 |                     < (concentration * w) / (k + concentration * w)
309 | 
310 |     return m
311 | 
312 | ### Entropy
313 | def invwishart_entropy(sigma,nu,chol=None):
314 |     D = sigma.shape[0]
315 |     chol = np.linalg.cholesky(sigma) if chol is None else chol
316 |     Elogdetlmbda = special.digamma((nu-np.arange(D))/2).sum() + D*np.log(2) - 2*np.log(chol.diagonal()).sum()
317 |     return invwishart_log_partitionfunction(sigma,nu,chol)-(nu-D-1)/2*Elogdetlmbda + nu*D/2
318 | 
319 | def invwishart_log_partitionfunction(sigma,nu,chol=None):
320 |     # In Bishop B.79 notation, this is -log B(W, nu), where W = sigma^{-1}
321 |     D = sigma.shape[0]
322 |     chol = np.linalg.cholesky(sigma) if chol is None else chol
323 |     return -1*(nu*np.log(chol.diagonal()).sum() - (nu*D/2*np.log(2) + D*(D-1)/4*np.log(np.pi) \
324 |             + special.gammaln((nu-np.arange(D))/2).sum()))
325 | 
326 | ### Predictive
327 | 
328 | def multivariate_t_loglik(y,nu,mu,lmbda):
329 |     # returns the log value
330 |     d = len(mu)
331 |     yc = np.array(y-mu,ndmin=2)
332 |     L = np.linalg.cholesky(lmbda)
333 |     ys = scipy.linalg.solve_triangular(L,yc.T,overwrite_b=True,lower=True)
334 |     return scipy.special.gammaln((nu+d)/2.) - scipy.special.gammaln(nu/2.) \
335 |             - (d/2.)*np.log(nu*np.pi) - np.log(L.diagonal()).sum() \
336 |             - (nu+d)/2.*np.log1p(1./nu*inner1d(ys.T,ys.T))
337 | 
338 | def beta_predictive(priorcounts,newcounts):
339 |     prior_nsuc, prior_nfail = priorcounts
340 |     nsuc, nfail = newcounts
341 | 
342 |     numer = scipy.special.gammaln(np.array([nsuc+prior_nsuc,
343 |         nfail+prior_nfail, prior_nsuc+prior_nfail])).sum()
344 |     denom = scipy.special.gammaln(np.array([prior_nsuc, prior_nfail,
345 |         prior_nsuc+prior_nfail+nsuc+nfail])).sum()
346 |     return numer - denom
347 | 
348 | ### Statistical tests
349 | 
350 | def two_sample_t_statistic(pop1, pop2):
351 |     pop1, pop2 = (flattendata(p) for p in (pop1, pop2))
352 |     t = (pop1.mean(0) - pop2.mean(0)) / np.sqrt(pop1.var(0)/pop1.shape[0] + pop2.var(0)/pop2.shape[0])
353 |     p = 2*stats.t.sf(np.abs(t),np.minimum(pop1.shape[0],pop2.shape[0]))
354 |     return t,p
355 | 
356 | def f_statistic(pop1, pop2): # TODO test
357 |     pop1, pop2 = (flattendata(p) for p in (pop1, pop2))
358 |     var1, var2 = pop1.var(0), pop2.var(0)
359 |     n1, n2 = np.where(var1 >= var2, pop1.shape[0], pop2.shape[0]), \
360 |              np.where(var1 >= var2, pop2.shape[0], pop1.shape[0])
361 |     var1, var2 = np.maximum(var1,var2), np.minimum(var1,var2)
362 |     f = var1 / var2
363 |     p = stats.f.sf(f,n1,n2)
364 |     return f,p
365 | 
366 | 


--------------------------------------------------------------------------------
/pybasicbayes/util/testing.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import absolute_import
  3 | from builtins import zip
  4 | import numpy as np
  5 | from numpy import newaxis as na
  6 | 
  7 | from . import stats, general
  8 | 
  9 | #########################
 10 | #  statistical testing  #
 11 | #########################
 12 | 
 13 | ### graphical
 14 | 
 15 | def populations_eq_quantile_plot(pop1, pop2, fig=None, percentilecutoff=5):
 16 |     import matplotlib.pyplot as plt
 17 | 
 18 |     pop1, pop2 = stats.flattendata(pop1), stats.flattendata(pop2)
 19 |     assert pop1.ndim == pop2.ndim == 1 or \
 20 |             (pop1.ndim == pop2.ndim == 2 and pop1.shape[1] == pop2.shape[1]), \
 21 |             'populations must have consistent dimensions'
 22 |     D = pop1.shape[1] if pop1.ndim == 2 else 1
 23 | 
 24 |     # we want to have the same number of samples
 25 |     n1, n2 = pop1.shape[0], pop2.shape[0]
 26 |     if n1 != n2:
 27 |         # subsample, since interpolation is dangerous
 28 |         if n1 < n2:
 29 |             pop1, pop2 = pop2, pop1
 30 |         np.random.shuffle(pop1)
 31 |         pop1 = pop1[:pop2.shape[0]]
 32 | 
 33 |     def plot_1d_scaled_quantiles(p1,p2,plot_midline=True):
 34 | 
 35 |         # scaled quantiles so that multiple calls line up
 36 |         p1.sort(), p2.sort() # NOTE: destructive! but that's cool
 37 |         xmin,xmax = general.scoreatpercentile(p1,percentilecutoff), \
 38 |                     general.scoreatpercentile(p1,100-percentilecutoff)
 39 |         ymin,ymax = general.scoreatpercentile(p2,percentilecutoff), \
 40 |                     general.scoreatpercentile(p2,100-percentilecutoff)
 41 |         plt.plot((p1-xmin)/(xmax-xmin),(p2-ymin)/(ymax-ymin))
 42 | 
 43 |         if plot_midline:
 44 |             plt.plot((0,1),(0,1),'k--')
 45 |         plt.axis((0,1,0,1))
 46 | 
 47 |     if D == 1:
 48 |         if fig is None:
 49 |             plt.figure()
 50 |         plot_1d_scaled_quantiles(pop1,pop2)
 51 |     else:
 52 |         if fig is None:
 53 |             fig = plt.figure()
 54 | 
 55 |         if not hasattr(fig,'_quantile_test_projs'):
 56 |             firsttime = True
 57 |             randprojs = np.random.randn(D,D)
 58 |             randprojs /= np.sqrt(np.sum(randprojs**2,axis=1))[:,na]
 59 |             projs = np.vstack((np.eye(D),randprojs))
 60 |             fig._quantile_test_projs = projs
 61 |         else:
 62 |             firsttime = False
 63 |             projs = fig._quantile_test_projs
 64 | 
 65 |         ims1, ims2 = pop1.dot(projs.T), pop2.dot(projs.T)
 66 |         for i, (im1, im2) in enumerate(zip(ims1.T,ims2.T)):
 67 |             plt.subplot(2,D,i+1)
 68 |             plot_1d_scaled_quantiles(im1,im2,plot_midline=firsttime)
 69 | 
 70 | ### numerical
 71 | 
 72 | # NOTE: a random numerical test should be repeated at the OUTERMOST loop (with
 73 | # exception catching) to see if its failures exceed the number expected
 74 | # according to the specified pvalue (tests could be repeated via sample
 75 | # bootstrapping inside the test, but that doesn't work reliably and random tests
 76 | # should have no problem generating new randomness!)
 77 | 
 78 | def assert_populations_eq(pop1, pop2):
 79 |     assert_populations_eq_moments(pop1,pop2) and \
 80 |     assert_populations_eq_komolgorofsmirnov(pop1,pop2)
 81 | 
 82 | def assert_populations_eq_moments(pop1, pop2, **kwargs):
 83 |     # just first two moments implemented; others are hard to estimate anyway!
 84 |     assert_populations_eq_means(pop1,pop2,**kwargs) and \
 85 |     assert_populations_eq_variances(pop1,pop2,**kwargs)
 86 | 
 87 | def assert_populations_eq_means(pop1, pop2, pval=0.05, msg=None):
 88 |     _,p = stats.two_sample_t_statistic(pop1,pop2)
 89 |     if np.any(p < pval):
 90 |         raise AssertionError(msg or "population means might be different at %0.3f" % pval)
 91 | 
 92 | def assert_populations_eq_variances(pop1, pop2, pval=0.05, msg=None):
 93 |     _,p = stats.f_statistic(pop1, pop2)
 94 |     if np.any(p < pval):
 95 |         raise AssertionError(msg or "population variances might be different at %0.3f" % pval)
 96 | 
 97 | def assert_populations_eq_komolgorofsmirnov(pop1, pop2, msg=None):
 98 |     raise NotImplementedError # TODO
 99 | 
100 | 


--------------------------------------------------------------------------------
/pybasicbayes/util/text.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from builtins import range
 3 | import numpy as np
 4 | import sys, time
 5 | 
 6 | # time.clock() is cpu time of current process
 7 | # time.time() is wall time
 8 | 
 9 | # TODO there are probably better progress bar libraries I could use
10 | 
11 | round = (lambda x: lambda y: int(x(y)))(round)
12 | 
13 | # NOTE: datetime.timedelta.__str__ doesn't allow formatting the number of digits
14 | def sec2str(seconds):
15 |     hours, rem = divmod(seconds,3600)
16 |     minutes, seconds = divmod(rem,60)
17 |     if hours > 0:
18 |         return '%02d:%02d:%02d' % (hours,minutes,round(seconds))
19 |     elif minutes > 0:
20 |         return '%02d:%02d' % (minutes,round(seconds))
21 |     else:
22 |         return '%0.2f' % seconds
23 | 
24 | def progprint_xrange(*args,**kwargs):
25 |     xr = range(*args)
26 |     return progprint(xr,total=len(xr),**kwargs)
27 | 
28 | def progprint(iterator,total=None,perline=25,show_times=True):
29 |     times = []
30 |     idx = 0
31 |     if total is not None:
32 |         numdigits = len('%d' % total)
33 |     for thing in iterator:
34 |         prev_time = time.time()
35 |         yield thing
36 |         times.append(time.time() - prev_time)
37 |         sys.stdout.write('.')
38 |         if (idx+1) % perline == 0:
39 |             if show_times:
40 |                 avgtime = np.mean(times)
41 |                 if total is not None:
42 |                     eta = sec2str(avgtime*(total-(idx+1)))
43 |                     sys.stdout.write((
44 |                         '  [ %%%dd/%%%dd, %%7.2fsec avg, ETA %%s ]\n'
45 |                                 % (numdigits,numdigits)) % (idx+1,total,avgtime,eta))
46 |                 else:
47 |                     sys.stdout.write('  [ %d done, %7.2fsec avg ]\n' % (idx+1,avgtime))
48 |             else:
49 |                 if total is not None:
50 |                     sys.stdout.write(('  [ %%%dd/%%%dd ]\n' % (numdigits,numdigits) ) % (idx+1,total))
51 |                 else:
52 |                     sys.stdout.write('  [ %d ]\n' % (idx+1))
53 |         idx += 1
54 |         sys.stdout.flush()
55 |     print('')
56 |     if show_times and len(times) > 0:
57 |         total = sec2str(seconds=np.sum(times))
58 |         print('%7.2fsec avg, %s total\n' % (np.mean(times),total))
59 | 
60 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, Extension
 2 | from setuptools.command.build_ext import build_ext as _build_ext
 3 | from setuptools.command.sdist import sdist as _sdist
 4 | from distutils.errors import CompileError
 5 | from warnings import warn
 6 | import os.path
 7 | 
 8 | try:
 9 |     from Cython.Distutils import build_ext as _build_ext
10 | except ImportError:
11 |     use_cython = False
12 | else:
13 |     use_cython = True
14 | 
15 | class build_ext(_build_ext):
16 |     # see http://stackoverflow.com/q/19919905 for explanation
17 |     def finalize_options(self):
18 |         _build_ext.finalize_options(self)
19 |         __builtins__.__NUMPY_SETUP__ = False
20 |         import numpy as np
21 |         self.include_dirs.append(np.get_include())
22 | 
23 |     # if extension modules fail to build, keep going anyway
24 |     def run(self):
25 |         try:
26 |             _build_ext.run(self)
27 |         except CompileError:
28 |             warn('Failed to build extension modules')
29 | 
30 | class sdist(_sdist):
31 |     def run(self):
32 |         try:
33 |             from Cython.Build import cythonize
34 |             cythonize(os.path.join('pybasicbayes','**','*.pyx'))
35 |         except:
36 |             warn('Failed to generate extension files from Cython sources')
37 |         finally:
38 |             _sdist.run(self)
39 | 
40 | ext_modules=[
41 |     Extension(
42 |         'pybasicbayes.util.cstats', ['pybasicbayes/util/cstats.c'],
43 |         extra_compile_args=['-O3','-w']),
44 | ]
45 | 
46 | if use_cython:
47 |     from Cython.Build import cythonize
48 |     try:
49 |         ext_modules = cythonize(os.path.join('pybasicbayes','**','*.pyx'))
50 |     except:
51 |         warn('Failed to generate extension module code from Cython files')
52 | 
53 | setup(name='pybasicbayes',
54 |       version='0.2.4',
55 |       description="Basic utilities for Bayesian inference",
56 |       author='Matthew James Johnson',
57 |       author_email='mattjj@csail.mit.edu',
58 |       url="http://github.com/mattjj/pybasicbayes",
59 |       packages=[
60 |           'pybasicbayes', 'pybasicbayes.distributions',
61 |           'pybasicbayes.util', 'pybasicbayes.testing', 'pybasicbayes.models'],
62 |       platforms='ALL',
63 |       keywords=[
64 |           'bayesian', 'inference', 'mcmc', 'variational inference',
65 |           'mean field', 'vb'],
66 |       install_requires=["numpy", "scipy", "matplotlib", "nose", "future"],
67 |       setup_requires=['numpy'],
68 |       classifiers=[
69 |           'Intended Audience :: Science/Research',
70 |           'Programming Language :: Python',
71 |       ],
72 |       ext_modules=ext_modules,
73 |       cmdclass={'build_ext': build_ext, 'sdist': sdist})
74 | 


--------------------------------------------------------------------------------
/tests/test_categorical.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import numpy as np
 3 | 
 4 | from nose.plugins.attrib import attr
 5 | 
 6 | import pybasicbayes.distributions as distributions
 7 | from pybasicbayes.testing.mixins import BigDataGibbsTester, \
 8 |     GewekeGibbsTester
 9 | 
10 | 
11 | @attr('categorical')
12 | class TestCategorical(BigDataGibbsTester,GewekeGibbsTester):
13 |     @property
14 |     def distribution_class(self):
15 |         return distributions.Categorical
16 | 
17 |     @property
18 |     def hyperparameter_settings(self):
19 |         return (dict(alpha_0=5.,K=5),)
20 | 
21 |     @property
22 |     def big_data_size(self):
23 |         return 20000
24 | 
25 |     def params_close(self,d1,d2):
26 |         return np.allclose(d1.weights,d2.weights,atol=0.05)
27 | 
28 |     def geweke_statistics(self,d,data):
29 |         return d.weights
30 | 
31 |     @property
32 |     def geweke_pval(self):
33 |         return 0.05
34 | 
35 | 
36 | @attr('categorical_concentration')
37 | class TestCategorical(GewekeGibbsTester):
38 | #class TestCategorical(BigDataGibbsTester,GewekeGibbsTester):
39 |     @property
40 |     def distribution_class(self):
41 |         return distributions.CategoricalAndConcentration
42 | 
43 |     @property
44 |     def hyperparameter_settings(self):
45 |         return (dict(a_0=5., b_0=5.0, K=5),)
46 | 
47 |     @property
48 |     def big_data_size(self):
49 |         return 20000
50 | 
51 |     def params_close(self,d1,d2):
52 |         return np.allclose(d1.weights,d2.weights,atol=0.05) and \
53 |                np.allclose(d1.alpha_0,d2.alpha_0,atol=0.05)
54 | 
55 |     def geweke_statistics(self,d,data):
56 |         #return np.concatenate((d.weights, [d.alpha_0]))
57 |         return np.array(d.alpha_0)
58 | 
59 |     @property
60 |     def geweke_nsamples(self):
61 |         return 3000
62 | 
63 |     @property
64 |     def geweke_data_size(self):
65 |         return 1 # NOTE: more data usually means slower mixing
66 | 
67 |     @property
68 |     def geweke_ntrials(self):
69 |         return 1
70 |     
71 |     @property
72 |     def geweke_pval(self):
73 |         return 0.05
74 | 


--------------------------------------------------------------------------------
/tests/test_gammadirichlet.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from builtins import range
 3 | from builtins import object
 4 | import numpy as np
 5 | 
 6 | from nose.plugins.attrib import attr
 7 | 
 8 | import pybasicbayes.distributions as distributions
 9 | from pybasicbayes.testing.mixins import GewekeGibbsTester
10 | 
11 | 
12 | @attr('GammaCompoundDirichlet', 'slow')
13 | class TestDirichletCompoundGamma(GewekeGibbsTester):
14 |     def test_weaklimit(self):
15 |         a = distributions.CRP(10,1)
16 |         b = distributions.GammaCompoundDirichlet(1000,10,1)
17 | 
18 |         a.concentration = b.concentration = 10.
19 | 
20 |         from matplotlib import pyplot as plt
21 | 
22 |         plt.figure()
23 |         crp_counts = np.zeros(10)
24 |         gcd_counts = np.zeros(10)
25 |         for itr in range(500):
26 |             crp_rvs = np.sort(a.rvs(25))[::-1][:10]
27 |             crp_counts[:len(crp_rvs)] += crp_rvs
28 |             gcd_counts += np.sort(b.rvs(25))[::-1][:10]
29 | 
30 |         plt.plot(crp_counts/200,gcd_counts/200,'bx-')
31 |         plt.xlim(0,10)
32 |         plt.ylim(0,10)
33 | 
34 |         import os
35 |         from pybasicbayes.testing.mixins import mkdir
36 |         figpath = os.path.join(
37 |             os.path.dirname(__file__),'figures',
38 |             self.__class__.__name__,'weaklimittest.pdf')
39 |         mkdir(os.path.dirname(figpath))
40 |         plt.savefig(figpath)
41 | 
42 |     @property
43 |     def distribution_class(self):
44 |         return distributions.GammaCompoundDirichlet
45 | 
46 |     @property
47 |     def hyperparameter_settings(self):
48 |         return (dict(K=1000, a_0=10, b_0=1),)
49 | 
50 |     def geweke_statistics(self, d, data):
51 |         return np.array([d.concentration])
52 | 
53 |     @property
54 |     def geweke_resample_kwargs(self):
55 |         return dict(niter=1)
56 | 


--------------------------------------------------------------------------------
/tests/test_gaussian.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import numpy as np
  3 | 
  4 | from nose.plugins.attrib import attr
  5 | 
  6 | import pybasicbayes.distributions as distributions
  7 | from pybasicbayes.testing.mixins import BigDataGibbsTester, \
  8 |     GewekeGibbsTester, BasicTester
  9 | 
 10 | 
 11 | @attr('gaussian')
 12 | class TestGaussian(BigDataGibbsTester,GewekeGibbsTester):
 13 |     @property
 14 |     def distribution_class(self):
 15 |         return distributions.Gaussian
 16 | 
 17 |     @property
 18 |     def hyperparameter_settings(self):
 19 |         return (dict(mu_0=np.zeros(2),sigma_0=np.eye(2),kappa_0=1.,nu_0=4.),)
 20 | 
 21 |     def params_close(self,d1,d2):
 22 |         return np.linalg.norm(d1.mu-d2.mu) < 0.1 and np.linalg.norm(d1.sigma-d2.sigma) < 0.1
 23 | 
 24 |     def geweke_statistics(self,d,data):
 25 |         return np.concatenate((d.mu,np.diag(d.sigma)))
 26 | 
 27 |     @property
 28 |     def geweke_nsamples(self):
 29 |         return 50000
 30 | 
 31 |     @property
 32 |     def geweke_data_size(self):
 33 |         return 1
 34 | 
 35 |     @property
 36 |     def geweke_pval(self):
 37 |         return 0.05
 38 | 
 39 |     def geweke_numerical_slice(self,d,setting_idx):
 40 |         return slice(0,d.mu.shape[0])
 41 | 
 42 |     ### class-specific
 43 | 
 44 |     def test_empirical_bayes(self):
 45 |         data = np.random.randn(50,2)
 46 |         distributions.Gaussian().empirical_bayes(data).hypparams
 47 | 
 48 | 
 49 | @attr('diagonalgaussian')
 50 | class TestDiagonalGaussian(BigDataGibbsTester,GewekeGibbsTester,BasicTester):
 51 |     @property
 52 |     def distribution_class(self):
 53 |         return distributions.DiagonalGaussian
 54 | 
 55 |     @property
 56 |     def hyperparameter_settings(self):
 57 |         return (dict(mu_0=np.zeros(2),nus_0=7,alphas_0=np.r_[5.,10.],betas_0=np.r_[1.,4.]),)
 58 | 
 59 |     def params_close(self,d1,d2):
 60 |         return np.linalg.norm(d1.mu-d2.mu) < 0.1 and np.linalg.norm(d1.sigmas-d2.sigmas) < 0.25
 61 | 
 62 |     def geweke_statistics(self,d,data):
 63 |         return np.concatenate((d.mu,d.sigmas))
 64 | 
 65 |     @property
 66 |     def geweke_nsamples(self):
 67 |         return 50000
 68 | 
 69 |     @property
 70 |     def geweke_data_size(self):
 71 |         return 2
 72 | 
 73 |     @property
 74 |     def geweke_pval(self):
 75 |         return 0.05
 76 | 
 77 |     def geweke_numerical_slice(self,d,setting_idx):
 78 |         return slice(0,d.mu.shape[0])
 79 | 
 80 |     ### class-specific
 81 | 
 82 |     def test_log_likelihood(self):
 83 |         data = np.random.randn(1000,100)
 84 | 
 85 |         mu = np.random.randn(100)
 86 |         sigmas = np.random.uniform(1,2,size=100)
 87 | 
 88 |         d = distributions.DiagonalGaussian(mu=mu,sigmas=sigmas)
 89 |         pdf1 = d.log_likelihood(data)
 90 | 
 91 |         import scipy.stats as stats
 92 |         pdf2 = stats.norm.logpdf(data,loc=mu,scale=np.sqrt(sigmas)).sum(1)
 93 | 
 94 |         assert np.allclose(pdf1,pdf2)
 95 | 
 96 |     def test_log_likelihood2(self):
 97 |         data = np.random.randn(1000,600)
 98 | 
 99 |         mu = np.random.randn(600)
100 |         sigmas = np.random.uniform(1,2,size=600)
101 | 
102 |         d = distributions.DiagonalGaussian(mu=mu,sigmas=sigmas)
103 |         pdf1 = d.log_likelihood(data)
104 | 
105 |         import scipy.stats as stats
106 |         pdf2 = stats.norm.logpdf(data,loc=mu,scale=np.sqrt(sigmas)).sum(1)
107 | 
108 |         assert np.allclose(pdf1,pdf2)
109 | 
110 | 
111 | @attr('diagonalgaussiannonconj')
112 | class TestDiagonalGaussianNonconjNIG(BigDataGibbsTester,GewekeGibbsTester,BasicTester):
113 |     @property
114 |     def distribution_class(self):
115 |         return distributions.DiagonalGaussianNonconjNIG
116 | 
117 |     @property
118 |     def hyperparameter_settings(self):
119 |         return (
120 |                 dict(mu_0=np.zeros(2),sigmas_0=np.ones(2),alpha_0=np.ones(2),beta_0=np.ones(2)),
121 |                 dict(mu_0=np.zeros(600),sigmas_0=np.ones(600),alpha_0=np.ones(600),beta_0=np.ones(600)),
122 |                 )
123 | 
124 |     def params_close(self,d1,d2):
125 |         return np.linalg.norm(d1.mu - d2.mu) < 0.25*np.sqrt(d1.mu.shape[0]) \
126 |                 and np.linalg.norm(d1.sigmas-d2.sigmas) < 0.5*d1.sigmas.shape[0]
127 | 
128 |     def geweke_statistics(self,d,data):
129 |         return np.concatenate((d.mu,d.sigmas))
130 | 
131 |     @property
132 |     def geweke_nsamples(self):
133 |         return 5000
134 | 
135 |     @property
136 |     def geweke_data_size(self):
137 |         return 2
138 | 
139 |     @property
140 |     def geweke_pval(self):
141 |         return 0.05
142 | 
143 |     def geweke_numerical_slice(self,d,setting_idx):
144 |         return slice(0,d.mu.shape[0])
145 | 
146 |     ### class-specific
147 | 
148 |     def test_log_likelihood(self):
149 |         data = np.random.randn(1000,100)
150 | 
151 |         mu = np.random.randn(100)
152 |         sigmas = np.random.uniform(1,2,size=100)
153 | 
154 |         d = distributions.DiagonalGaussian(mu=mu,sigmas=sigmas)
155 |         pdf1 = d.log_likelihood(data)
156 | 
157 |         import scipy.stats as stats
158 |         pdf2 = stats.norm.logpdf(data,loc=mu,scale=np.sqrt(sigmas)).sum(1)
159 | 
160 |         assert np.allclose(pdf1,pdf2)
161 | 
162 |     def test_log_likelihood2(self):
163 |         data = np.random.randn(1000,600)
164 | 
165 |         mu = np.random.randn(600)
166 |         sigmas = np.random.uniform(1,2,size=600)
167 | 
168 |         d = distributions.DiagonalGaussian(mu=mu,sigmas=sigmas)
169 |         pdf1 = d.log_likelihood(data)
170 | 
171 |         import scipy.stats as stats
172 |         pdf2 = stats.norm.logpdf(data,loc=mu,scale=np.sqrt(sigmas)).sum(1)
173 | 
174 |         assert np.allclose(pdf1,pdf2)
175 | 
176 | 
177 | @attr('gaussianfixedmean')
178 | class TestGaussianFixedMean(BigDataGibbsTester,GewekeGibbsTester):
179 |     @property
180 |     def distribution_class(self):
181 |         return distributions.GaussianFixedMean
182 | 
183 |     @property
184 |     def hyperparameter_settings(self):
185 |         return (dict(mu=np.array([1.,2.,3.]),nu_0=5,lmbda_0=np.diag([3.,2.,1.])),)
186 | 
187 |     def params_close(self,d1,d2):
188 |         return np.linalg.norm(d1.sigma - d2.sigma) < 0.25
189 | 
190 |     def geweke_statistics(self,d,data):
191 |         return np.diag(d.sigma)
192 | 
193 |     @property
194 |     def geweke_nsamples(self):
195 |         return 25000
196 | 
197 |     @property
198 |     def geweke_data_size(self):
199 |         return 5
200 | 
201 |     @property
202 |     def geweke_pval(self):
203 |         return 0.05
204 | 
205 | 
206 | @attr('gaussianfixedcov')
207 | class TestGaussianFixedCov(BigDataGibbsTester,GewekeGibbsTester):
208 |     @property
209 |     def distribution_class(self):
210 |         return distributions.GaussianFixedCov
211 | 
212 |     @property
213 |     def hyperparameter_settings(self):
214 |         return (dict(sigma=np.diag([3.,2.,1.]),mu_0=np.array([1.,2.,3.]),sigma_0=np.eye(3)),)
215 | 
216 |     def params_close(self,d1,d2):
217 |         return np.linalg.norm(d1.mu-d2.mu) < 0.1
218 | 
219 |     def geweke_statistics(self,d,data):
220 |         return d.mu
221 | 
222 |     @property
223 |     def geweke_nsamples(self):
224 |         return 25000
225 | 
226 |     @property
227 |     def geweke_data_size(self):
228 |         return 5
229 | 
230 |     @property
231 |     def geweke_pval(self):
232 |         return 0.05
233 | 
234 | 
235 | @attr('gaussiannonconj')
236 | class TestGaussianNonConj(BigDataGibbsTester,GewekeGibbsTester):
237 |     @property
238 |     def distribution_class(self):
239 |         return distributions.GaussianNonConj
240 | 
241 |     @property
242 |     def hyperparameter_settings(self):
243 |         return (dict(mu_0=np.zeros(2),mu_lmbda_0=2*np.eye(2),nu_0=5,sigma_lmbda_0=np.eye(2)),)
244 | 
245 |     def params_close(self,d1,d2):
246 |         return np.linalg.norm(d1.mu-d2.mu) < 0.1 and np.linalg.norm(d1.sigma-d2.sigma) < 0.25
247 | 
248 |     def geweke_statistics(self,d,data):
249 |         return np.concatenate((d.mu,np.diag(d.sigma)))
250 | 
251 |     @property
252 |     def geweke_nsamples(self):
253 |         return 30000
254 | 
255 |     @property
256 |     def geweke_data_size(self):
257 |         return 1
258 | 
259 |     @property
260 |     def geweke_pval(self):
261 |         return 0.05
262 | 
263 |     def geweke_numerical_slice(self,d,setting_idx):
264 |         return slice(0,d.mu.shape[0])
265 | 
266 |     @property
267 |     def resample_kwargs(self):
268 |         return dict(niter=10)
269 | 
270 | 
271 | @attr('scalargaussiannix')
272 | class TestScalarGaussianNIX(BigDataGibbsTester,GewekeGibbsTester):
273 |     @property
274 |     def distribution_class(self):
275 |         return distributions.ScalarGaussianNIX
276 | 
277 |     @property
278 |     def hyperparameter_settings(self):
279 |         return (dict(mu_0=2.7,kappa_0=2.,sigmasq_0=4.,nu_0=2),)
280 | 
281 |     def params_close(self,d1,d2):
282 |         return np.abs(d1.mu-d2.mu) < 0.5 and np.abs(d2.sigmasq - d2.sigmasq) < 0.5
283 | 
284 |     def geweke_statistics(self,d,data):
285 |         return np.array((d.mu,d.sigmasq))
286 | 
287 |     @property
288 |     def geweke_nsamples(self):
289 |         return 30000
290 | 
291 |     @property
292 |     def geweke_data_size(self):
293 |         return 2
294 | 
295 |     @property
296 |     def geweke_pval(self):
297 |         return 0.05
298 | 
299 |     def geweke_numerical_slice(self,d,setting_idx):
300 |         return slice(0,1)
301 | 
302 | 
303 | @attr('scalargaussiannonconjnix')
304 | class TestScalarGaussianNonconjNIX(BigDataGibbsTester,GewekeGibbsTester):
305 |     @property
306 |     def distribution_class(self):
307 |         return distributions.ScalarGaussianNonconjNIX
308 | 
309 |     @property
310 |     def hyperparameter_settings(self):
311 |         return (dict(mu_0=2.7,tausq_0=4.,sigmasq_0=2.,nu_0=2),)
312 | 
313 |     def params_close(self,d1,d2):
314 |         return np.abs(d1.mu-d2.mu) < 0.1 and np.abs(d2.sigmasq - d2.sigmasq) < 0.25
315 | 
316 |     def geweke_statistics(self,d,data):
317 |         return np.array((d.mu,d.sigmasq))
318 | 
319 |     @property
320 |     def geweke_nsamples(self):
321 |         return 30000
322 | 
323 |     @property
324 |     def geweke_data_size(self):
325 |         return 2
326 | 
327 |     @property
328 |     def geweke_pval(self):
329 |         return 0.05
330 | 
331 |     def geweke_numerical_slice(self,d,setting_idx):
332 |         return slice(0,1)
333 | 


--------------------------------------------------------------------------------
/tests/test_geometric.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import numpy as np
 3 | 
 4 | from nose.plugins.attrib import attr
 5 | 
 6 | import pybasicbayes.distributions as distributions
 7 | from pybasicbayes.testing.mixins import BigDataGibbsTester, \
 8 |     GewekeGibbsTester
 9 | 
10 | 
11 | @attr('geometric')
12 | class TestGeometric(BigDataGibbsTester,GewekeGibbsTester):
13 |     @property
14 |     def distribution_class(self):
15 |         return distributions.Geometric
16 | 
17 |     @property
18 |     def hyperparameter_settings(self):
19 |         return (dict(alpha_0=2,beta_0=20),dict(alpha_0=5,beta_0=5))
20 | 
21 |     def params_close(self,d1,d2):
22 |         return np.allclose(d1.p,d2.p,rtol=0.05)
23 | 
24 |     def geweke_statistics(self,d,data):
25 |         return d.p
26 | 
27 |     @property
28 |     def geweke_pval(self):
29 |         return 0.5
30 | 


--------------------------------------------------------------------------------
/tests/test_negbin.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import numpy as np
 3 | 
 4 | from nose.plugins.attrib import attr
 5 | 
 6 | import pybasicbayes.distributions as distributions
 7 | from pybasicbayes.testing.mixins import BigDataGibbsTester, \
 8 |     GewekeGibbsTester
 9 | 
10 | 
11 | @attr('negbinfixedr')
12 | class TestNegativeBinomialFixedR(BigDataGibbsTester,GewekeGibbsTester):
13 |     @property
14 |     def distribution_class(self):
15 |         return distributions.NegativeBinomialFixedR
16 | 
17 |     @property
18 |     def hyperparameter_settings(self):
19 |         return (dict(r=5,alpha_0=1,beta_0=9),)
20 | 
21 |     def params_close(self,d1,d2):
22 |         return np.allclose(d1.p,d2.p,rtol=0.1)
23 | 
24 |     def geweke_statistics(self,d,data):
25 |         return d.p
26 | 
27 | 
28 | @attr('negbinintr')
29 | class TestNegativeBinomialIntegerR(BigDataGibbsTester,GewekeGibbsTester):
30 |     @property
31 |     def distribution_class(self):
32 |         return distributions.NegativeBinomialIntegerR
33 | 
34 |     @property
35 |     def hyperparameter_settings(self):
36 |         return (dict(r_discrete_distn=np.r_[0.,0,0,1,1,1],alpha_0=5,beta_0=5),)
37 | 
38 |     def params_close(self,d1,d2):
39 |         # since it's easy to be off by 1 in r and still look like the same
40 |         # distribution, best just to check moment parameters
41 |         def mean(d):
42 |             return d.r*d.p/(1.-d.p)
43 |         def var(d):
44 |             return mean(d)/(1.-d.p)
45 |         return np.allclose(mean(d1),mean(d2),rtol=0.1) and np.allclose(var(d1),var(d2),rtol=0.1)
46 | 
47 |     def geweke_statistics(self,d,data):
48 |         return d.p
49 | 
50 |     @property
51 |     def geweke_pval(self):
52 |         return 0.005  # since the statistic is on (0,1), it's really sensitive?
53 | 
54 | 
55 | @attr('negbinintr2')
56 | class TestNegativeBinomialIntegerR2(BigDataGibbsTester,GewekeGibbsTester):
57 |     @property
58 |     def distribution_class(self):
59 |         return distributions.NegativeBinomialIntegerR2
60 | 
61 |     @property
62 |     def hyperparameter_settings(self):
63 |         return (dict(r_discrete_distn=np.r_[0.,0,0,1,1,1],alpha_0=5,beta_0=5),)
64 | 
65 |     def params_close(self,d1,d2):
66 |         # since it's easy to be off by 1 in r and still look like the same
67 |         # distribution, best just to check moment parameters
68 |         def mean(d):
69 |             return d.r*d.p/(1.-d.p)
70 |         def var(d):
71 |             return mean(d)/(1.-d.p)
72 |         return np.allclose(mean(d1),mean(d2),rtol=0.1) and np.allclose(var(d1),var(d2),rtol=0.1)
73 | 
74 |     def geweke_statistics(self,d,data):
75 |         return d.p
76 | 
77 |     @property
78 |     def geweke_pval(self):
79 |         return 0.005  # since the statistic is on (0,1), it's really sensitive?
80 | 
81 | 
82 | @attr('negbinintrvariant')
83 | class TestNegativeBinomialIntegerRVariant(TestNegativeBinomialIntegerR):
84 |     @property
85 |     def distribution_class(self):
86 |         return distributions.NegativeBinomialIntegerRVariant
87 | 


--------------------------------------------------------------------------------
/tests/test_poisson.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import numpy as np
 3 | 
 4 | from nose.plugins.attrib import attr
 5 | 
 6 | import pybasicbayes.distributions as distributions
 7 | from pybasicbayes.testing.mixins import BigDataGibbsTester, \
 8 |     GewekeGibbsTester
 9 | 
10 | 
11 | @attr('poisson')
12 | class TestPoisson(BigDataGibbsTester,GewekeGibbsTester):
13 |     @property
14 |     def distribution_class(self):
15 |         return distributions.Poisson
16 | 
17 |     @property
18 |     def hyperparameter_settings(self):
19 |         return (dict(alpha_0=30,beta_0=3),)
20 | 
21 |     def params_close(self,d1,d2):
22 |         return np.allclose(d1.lmbda,d2.lmbda,rtol=0.05)
23 | 
24 |     def geweke_statistics(self,d,data):
25 |         return d.lmbda
26 | 


--------------------------------------------------------------------------------
/tests/test_regression.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import numpy as np
  3 | 
  4 | from nose.plugins.attrib import attr
  5 | 
  6 | import pybasicbayes.distributions as distributions
  7 | from pybasicbayes.testing.mixins import BigDataGibbsTester, MaxLikelihoodTester, \
  8 |     GewekeGibbsTester, BasicTester
  9 | 
 10 | 
 11 | @attr('regression')
 12 | class TestRegression(
 13 |         BasicTester,BigDataGibbsTester,
 14 |         MaxLikelihoodTester,GewekeGibbsTester):
 15 |     @property
 16 |     def distribution_class(self):
 17 |         return distributions.Regression
 18 | 
 19 |     @property
 20 |     def hyperparameter_settings(self):
 21 |         return (dict(nu_0=3,S_0=np.eye(1),M_0=np.zeros((1,2)),K_0=np.eye(2)),
 22 |                 dict(nu_0=5,S_0=np.eye(2),M_0=np.zeros((2,4)),K_0=2*np.eye(4)),
 23 |                 dict(nu_0=5,S_0=np.eye(2),M_0=np.zeros((2,5)),K_0=2*np.eye(5),affine=True),)
 24 | 
 25 |     def params_close(self,d1,d2):
 26 |         return np.linalg.norm(d1.A-d2.A) < 0.1 and np.linalg.norm(d1.sigma-d2.sigma) < 0.1
 27 | 
 28 |     @property
 29 |     def big_data_size(self):
 30 |         return 80000
 31 | 
 32 |     def geweke_statistics(self,d,data):
 33 |         return np.concatenate((d.A.flatten(),np.diag(d.sigma)))
 34 | 
 35 |     def geweke_numerical_slice(self,d,setting_idx):
 36 |         return slice(0,d.A.flatten().shape[0])
 37 | 
 38 |     @property
 39 |     def geweke_ntrials(self):
 40 |         return 1  # because it's slow
 41 | 
 42 |     @property
 43 |     def geweke_num_statistic_fails_to_tolerate(self):
 44 |         return 0
 45 | 
 46 |     ### class-specific
 47 | 
 48 |     def test_affine_loglike(self):
 49 |         A = np.random.randn(2,3)
 50 |         b = np.random.randn(2)
 51 |         sigma = np.random.randn(2,2); sigma = sigma.dot(sigma.T)
 52 |         data = np.random.randn(25,5)
 53 | 
 54 |         d1 = self.distribution_class(A=np.hstack((A,b[:,None])),sigma=sigma,affine=True)
 55 |         d2 = self.distribution_class(A=A,sigma=sigma)
 56 | 
 57 |         likes1 = d1.log_likelihood(data)
 58 |         data[:,-2:] -= b
 59 |         likes2 = d2.log_likelihood(data)
 60 | 
 61 |         assert np.allclose(likes1,likes2)
 62 | 
 63 |     def test_loglike_against_gaussian(self):
 64 |         mu = np.random.randn(3)
 65 |         A = mu[:,None]
 66 |         sigma = np.random.randn(3,3); sigma = sigma.dot(sigma.T)
 67 | 
 68 |         data = np.random.randn(25,mu.shape[0])
 69 | 
 70 |         d1 = distributions.Gaussian(mu=mu,sigma=sigma)
 71 |         likes1 = d1.log_likelihood(data)
 72 | 
 73 |         d2 = self.distribution_class(A=A,sigma=sigma)
 74 |         likes2 = d2.log_likelihood(np.hstack((np.ones((data.shape[0],1)),data)))
 75 | 
 76 |         assert np.allclose(likes1,likes2)
 77 | 
 78 | @attr('regressionnonconj')
 79 | class TestRegressionNonconj(BasicTester,BigDataGibbsTester,GewekeGibbsTester):
 80 |     @property
 81 |     def distribution_class(self):
 82 |         return distributions.RegressionNonconj
 83 | 
 84 |     @property
 85 |     def hyperparameter_settings(self):
 86 |         def make_hyps(m,n):
 87 |             return dict(nu_0=m+1, S_0=m*np.eye(m),
 88 |                         M_0=np.zeros((m,n)), Sigma_0=np.eye(m*n))
 89 |         return [make_hyps(m,n) for m, n in [(2,3), (3,2)]]
 90 | 
 91 |     def params_close(self,d1,d2):
 92 |         return np.linalg.norm(d1.A-d2.A) < 0.5 and np.linalg.norm(d1.sigma-d2.sigma) < 0.5
 93 | 
 94 |     def geweke_statistics(self,d,data):
 95 |         return np.concatenate((d.A.flatten(),np.diag(d.sigma)))
 96 | 
 97 |     def geweke_numerical_slices(self,d,setting_idx):
 98 |         return slice(0,d.A.flatten().shape[0])
 99 | 
100 |     @property
101 |     def geweke_ntrials(self):
102 |         return 1  # because it's slow
103 | 
104 |     @property
105 |     def geweke_num_statistic_fails_to_tolerate(self):
106 |         return 0
107 | 
108 |     @property
109 |     def geweke_resample_kwargs(self):
110 |         return dict(niter=2)
111 | 


--------------------------------------------------------------------------------