├── .ctags ├── .gitignore ├── .travis.yml ├── LICENSE-MIT ├── MANIFEST.in ├── README.md ├── examples ├── EM_demo.py ├── __init__.py ├── animation.py ├── demo.py ├── factor_analysis.py ├── meanfield_steps.py └── robust_regression.py ├── images ├── best-model.png ├── data.png └── model-vlb-vs-iteration.png ├── pybasicbayes ├── __init__.py ├── abstractions.py ├── distributions │ ├── __init__.py │ ├── binomial.py │ ├── gaussian.py │ ├── geometric.py │ ├── meta.py │ ├── multinomial.py │ ├── negativebinomial.py │ ├── poisson.py │ ├── regression.py │ └── uniform.py ├── models │ ├── __init__.py │ ├── factor_analysis.py │ ├── mixture.py │ └── parallel_mixture.py ├── testing │ ├── .gitignore │ ├── __init__.py │ └── mixins.py └── util │ ├── .ctags │ ├── .gitignore │ ├── __init__.py │ ├── cstats.pyx │ ├── cyutil.py │ ├── general.py │ ├── plot.py │ ├── profiling.py │ ├── stats.py │ ├── testing.py │ └── text.py ├── setup.py └── tests ├── test_categorical.py ├── test_gammadirichlet.py ├── test_gaussian.py ├── test_geometric.py ├── test_negbin.py ├── test_poisson.py └── test_regression.py /.ctags: -------------------------------------------------------------------------------- 1 | --exclude=util 2 | --python-kinds=-i 3 | --recurse=yes 4 | --exclude=.git 5 | --exclude=.pyc 6 | --exclude=.md 7 | --exclude=DS_Store 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | /tags 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | python: 4 | - "2.7" 5 | notifications: 6 | email: false 7 | before_install: 8 | - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh 9 | - bash miniconda.sh -b -p $HOME/miniconda 10 | - export PATH="$HOME/miniconda/bin:$PATH" 11 | - conda update --yes conda 12 | - conda install --yes python=$TRAVIS_PYTHON_VERSION pip numpy scipy matplotlib cython nose future 13 | install: 14 | - python setup.py build_ext --inplace 15 | script: nosetests tests -A 'not slow and not random' 16 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Matthew James Johnson, 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include pybasicbayes *.pyx *.c *.cpp *.h 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/mattjj/pybasicbayes.svg?branch=master)](https://travis-ci.org/mattjj/pybasicbayes) 2 | 3 | This library provides objects that model probability distributions and the 4 | related operations that are common in generative Bayesian modeling and Bayesian 5 | inference, including Gibbs sampling and variational mean field algorithms. The 6 | file `abstractions.py` describes the queries a distribution must support to be 7 | used in each algorithm, as well as an API for models, which compose the 8 | distribution objects. 9 | 10 | ## Example ## 11 | 12 | The file `models.py` shows how to construct mixture models building on the 13 | distribution objects in this library. For example, to generate data from a 14 | Gaussian mixture model, we might set some hyperparameters, construct a 15 | `Mixture` object, and then ask it to randomly generate some data from the 16 | prior: 17 | 18 | ```python 19 | import numpy as np 20 | from pybasicbayes import models, distributions 21 | 22 | # hyperparameters 23 | alpha_0=5.0 24 | obs_hypparams = dict(mu_0=np.zeros(2),sigma_0=np.eye(2),kappa_0=0.05,nu_0=5) 25 | 26 | # create the model 27 | priormodel = models.Mixture(alpha_0=alpha_0, 28 | components=[distributions.Gaussian(**obs_hypparams) for itr in range(30)]) 29 | 30 | # generate some data 31 | data = priormodel.rvs(400) 32 | 33 | # delete the model 34 | del priormodel 35 | ``` 36 | 37 | If we throw away the prior model at the end, we're left just with the data, 38 | which look like this: 39 | 40 | ```python 41 | from matplotlib import pyplot as plt 42 | plt.figure() 43 | plt.plot(data[:,0],data[:,1],'kx') 44 | plt.title('data') 45 | ``` 46 | 47 | ![randomly generated mixture model data](https://raw.githubusercontent.com/mattjj/pybasicbayes/master/images/data.png) 48 | 49 | Imagine we loaded these data from some measurements file and we wanted to fit a 50 | mixture model to it. We can create a new `Mixture` and run inference to get a 51 | representation of the posterior distribution over mixture models conditioned on 52 | observing these data: 53 | 54 | ```python 55 | posteriormodel = models.Mixture(alpha_0=alpha_0, 56 | components=[distributions.Gaussian(**obs_hypparams) for itr in range(30)]) 57 | 58 | posteriormodel.add_data(data) 59 | ``` 60 | 61 | Since pybasicbayes implements both Gibbs sampling and variational mean field 62 | inference algorithms, we can use both together in a hybrid algorithm. 63 | 64 | ```python 65 | import copy 66 | from pybasicbayes.util.text import progprint_xrange 67 | 68 | allscores = [] # variational lower bounds on the marginal data log likelihood 69 | allmodels = [] 70 | for superitr in range(5): 71 | # Gibbs sampling to wander around the posterior 72 | print 'Gibbs Sampling' 73 | for itr in progprint_xrange(100): 74 | posteriormodel.resample_model() 75 | 76 | # mean field to lock onto a mode 77 | print 'Mean Field' 78 | scores = [posteriormodel.meanfield_coordinate_descent_step() 79 | for itr in progprint_xrange(100)] 80 | 81 | allscores.append(scores) 82 | allmodels.append(copy.deepcopy(posteriormodel)) 83 | 84 | import operator 85 | models_and_scores = sorted([(m,s[-1]) for m,s 86 | in zip(allmodels,allscores)],key=operator.itemgetter(1),reverse=True) 87 | ``` 88 | 89 | Now we can plot the score trajectories: 90 | 91 | ```python 92 | plt.figure() 93 | for scores in allscores: 94 | plt.plot(scores) 95 | plt.title('model vlb scores vs iteration') 96 | ``` 97 | 98 | ![model vlb scores vs iteration](https://raw.githubusercontent.com/mattjj/pybasicbayes/master/images/model-vlb-vs-iteration.png) 99 | 100 | And show the point estimate of the best model by calling the convenient `Mixture.plot()`: 101 | 102 | ```python 103 | models_and_scores[0][0].plot() 104 | plt.title('best model') 105 | ``` 106 | 107 | ![best fit model and data](https://raw.githubusercontent.com/mattjj/pybasicbayes/master/images/best-model.png) 108 | 109 | Since these are Bayesian methods, we have much more than just a point estimate 110 | for plotting: we have fit entire distributions, so we can query any confidence 111 | or marginal that we need. 112 | 113 | See the file `demo.py` for the code for this demo. 114 | 115 | ## Authors ## 116 | 117 | [Matt Johnson](https://github.com/mattjj), [Alex Wiltschko](https://github.com/alexbw), [Yarden Katz](https://github.com/yarden), [Nick Foti](https://github.com/nfoti), and [Scott Linderman](https://github.com/slinderman). 118 | 119 | -------------------------------------------------------------------------------- /examples/EM_demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from builtins import range 3 | import numpy as np 4 | np.seterr(invalid='raise') 5 | from matplotlib import pyplot as plt 6 | import copy 7 | 8 | import pybasicbayes 9 | from pybasicbayes import models, distributions 10 | from pybasicbayes.util.text import progprint_xrange 11 | 12 | # EM is really terrible! Here's a demo of how to do it on really easy data 13 | 14 | ### generate and plot the data 15 | 16 | alpha_0=100. 17 | obs_hypparams=dict(mu_0=np.zeros(2),sigma_0=np.eye(2),kappa_0=0.05,nu_0=5) 18 | 19 | priormodel = models.Mixture(alpha_0=alpha_0, 20 | components=[distributions.Gaussian(**obs_hypparams) for itr in range(6)]) 21 | 22 | data = priormodel.rvs(200) 23 | 24 | del priormodel 25 | 26 | 27 | plt.figure() 28 | plt.plot(data[:,0],data[:,1],'kx') 29 | plt.title('data') 30 | 31 | 32 | min_num_components, max_num_components = (1,12) 33 | num_tries_each = 5 34 | 35 | ### search over models using BIC as a model selection criterion 36 | 37 | BICs = [] 38 | examplemodels = [] 39 | for idx, num_components in enumerate(progprint_xrange(min_num_components,max_num_components+1)): 40 | theseBICs = [] 41 | for i in range(num_tries_each): 42 | fitmodel = models.Mixture( 43 | alpha_0=10000, # used for random initialization Gibbs sampling, big means use all components 44 | components=[distributions.Gaussian(**obs_hypparams) for itr in range(num_components)]) 45 | 46 | fitmodel.add_data(data) 47 | 48 | # use Gibbs sampling for initialization 49 | for itr in range(100): 50 | fitmodel.resample_model() 51 | 52 | # use EM to fit a model 53 | for itr in range(50): 54 | fitmodel.EM_step() 55 | 56 | theseBICs.append(fitmodel.BIC()) 57 | 58 | examplemodels.append(copy.deepcopy(fitmodel)) 59 | BICs.append(theseBICs) 60 | 61 | plt.figure() 62 | plt.errorbar( 63 | x=np.arange(min_num_components,max_num_components+1), 64 | y=[np.mean(x) for x in BICs], 65 | yerr=[np.std(x) for x in BICs] 66 | ) 67 | plt.xlabel('num components') 68 | plt.ylabel('BIC') 69 | 70 | plt.figure() 71 | examplemodels[np.argmin([np.mean(x) for x in BICs])].plot() 72 | plt.title('a decent model') 73 | 74 | plt.show() 75 | 76 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattjj/pybasicbayes/61f65ad6c781288605ec5f7347efcc5dbd73c4fc/examples/__init__.py -------------------------------------------------------------------------------- /examples/animation.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | from builtins import range 4 | import numpy as np 5 | import numpy.random as npr 6 | from matplotlib import pyplot as plt 7 | plt.ion() 8 | 9 | from pybasicbayes import models, distributions 10 | 11 | 12 | ############### 13 | # load data # 14 | ############### 15 | 16 | data = np.loadtxt('data.txt') 17 | 18 | plt.figure() 19 | plt.plot(data[:,0],data[:,1],'kx') 20 | plt.title('data') 21 | 22 | ################## 23 | # set up model # 24 | ################## 25 | 26 | npr.seed(0) 27 | 28 | alpha_0 = 5. 29 | obs_hypparams = dict(mu_0=np.zeros(2),sigma_0=np.eye(2),kappa_0=0.05,nu_0=5) 30 | 31 | model = models.Mixture( 32 | alpha_0=alpha_0, 33 | components=[distributions.Gaussian(**obs_hypparams) for itr in range(30)] 34 | ) 35 | 36 | model.add_data(data) 37 | 38 | ############## 39 | # animate! # 40 | ############## 41 | 42 | ## movie 43 | # try: 44 | # from moviepy.video.io.bindings import mplfig_to_npimage 45 | # from moviepy.editor import VideoClip 46 | # except: 47 | # print "No moviepy found. Quitting..." 48 | # import sys 49 | # sys.exit(1) 50 | 51 | # fig = plt.figure() 52 | # model.plot(draw=False) 53 | # plt.axis([-8,5,-2,6]) 54 | 55 | # def make_frame_mpl(t): 56 | # if (t // 2) % 2: 57 | # model.meanfield_coordinate_descent_step() 58 | # else: 59 | # model.resample_model() 60 | # model.plot(update=True,draw=False) 61 | # return mplfig_to_npimage(fig) 62 | 63 | # animation = VideoClip(make_frame_mpl, duration=12) 64 | # animation.write_videofile('gibbs.mp4',fps=50) 65 | 66 | 67 | 68 | 69 | import itertools, sys, json 70 | for i in itertools.count(): 71 | model.resample_model() 72 | if i % 3 == 0: 73 | print(json.dumps(model.to_json_dict())) 74 | sys.stdout.flush() 75 | 76 | -------------------------------------------------------------------------------- /examples/demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | from builtins import zip 4 | from builtins import range 5 | import numpy as np 6 | np.seterr(invalid='raise') 7 | from matplotlib import pyplot as plt 8 | import copy 9 | 10 | import pybasicbayes 11 | from pybasicbayes import models, distributions 12 | from pybasicbayes.util.text import progprint_xrange 13 | 14 | alpha_0=5.0 15 | obs_hypparams=dict(mu_0=np.zeros(2),sigma_0=np.eye(2),kappa_0=0.05,nu_0=5) 16 | 17 | priormodel = models.Mixture(alpha_0=alpha_0, 18 | components=[distributions.Gaussian(**obs_hypparams) for itr in range(30)]) 19 | 20 | data, _ = priormodel.generate(500) 21 | 22 | plt.figure() 23 | priormodel.plot() 24 | plt.title('true model') 25 | 26 | del priormodel 27 | 28 | plt.figure() 29 | plt.plot(data[:,0],data[:,1],'kx') 30 | plt.title('data') 31 | 32 | posteriormodel = models.Mixture(alpha_0=alpha_0, 33 | components=[distributions.Gaussian(**obs_hypparams) for itr in range(30)]) 34 | 35 | posteriormodel.add_data(data) 36 | 37 | allscores = [] 38 | allmodels = [] 39 | for superitr in range(5): 40 | # Gibbs sampling to wander around the posterior 41 | print('Gibbs Sampling') 42 | for itr in progprint_xrange(100): 43 | posteriormodel.resample_model() 44 | 45 | # mean field to lock onto a mode 46 | print('Mean Field') 47 | scores = [posteriormodel.meanfield_coordinate_descent_step() 48 | for itr in progprint_xrange(100)] 49 | 50 | allscores.append(scores) 51 | allmodels.append(copy.deepcopy(posteriormodel)) 52 | 53 | plt.figure() 54 | for scores in allscores: 55 | plt.plot(scores) 56 | plt.title('model vlb scores vs iteration') 57 | 58 | import operator 59 | models_and_scores = sorted([(m,s[-1]) for m,s 60 | in zip(allmodels,allscores)],key=operator.itemgetter(1),reverse=True) 61 | 62 | plt.figure() 63 | models_and_scores[0][0].plot() 64 | plt.title('best model') 65 | 66 | plt.show() 67 | -------------------------------------------------------------------------------- /examples/factor_analysis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | np.random.seed(1) 3 | 4 | from pybasicbayes.util.text import progprint_xrange 5 | import matplotlib.pyplot as plt 6 | from matplotlib.cm import get_cmap 7 | 8 | import pybasicbayes.models.factor_analysis 9 | from pybasicbayes.models.factor_analysis import FactorAnalysis 10 | 11 | N = 2000 12 | D_obs = 20 13 | D_latent = 2 14 | 15 | def principal_angle(A,B): 16 | """ 17 | Find the principal angle between two subspaces 18 | spanned by columns of A and B 19 | """ 20 | from numpy.linalg import qr, svd 21 | qA, _ = qr(A) 22 | qB, _ = qr(B) 23 | U,S,V = svd(qA.T.dot(qB)) 24 | return np.arccos(min(S.min(), 1.0)) 25 | 26 | def generate_synth_data(): 27 | 28 | # Create a true model and sample from it 29 | mask = np.random.rand(N,D_obs) < 0.9 30 | true_model = FactorAnalysis(D_obs, D_latent) 31 | X, Z_true = true_model.generate(N=N, mask=mask, keep=True) 32 | return true_model, X, Z_true, mask 33 | 34 | 35 | def plot_results(lls, angles, Ztrue, Zinf): 36 | # Plot log probabilities 37 | plt.figure() 38 | plt.plot(lls) 39 | plt.ylabel("Log Likelihood") 40 | plt.xlabel("Iteration") 41 | 42 | plt.figure() 43 | plt.plot(np.array(angles) / np.pi * 180.) 44 | plt.ylabel("Principal Angle") 45 | plt.xlabel("Iteration") 46 | 47 | # Plot locations, color by angle 48 | N = Ztrue.shape[0] 49 | inds_to_plot = np.random.randint(0, N, min(N, 500)) 50 | th = np.arctan2(Ztrue[:,1], Ztrue[:,0]) 51 | nperm = np.argsort(np.argsort(th)) 52 | cmap = get_cmap("jet") 53 | 54 | plt.figure() 55 | plt.subplot(121) 56 | for n in inds_to_plot: 57 | plt.plot(Ztrue[n,0], Ztrue[n,1], 'o', markerfacecolor=cmap(nperm[n] / float(N)), markeredgecolor="none") 58 | plt.title("True Embedding") 59 | plt.xlim(-4,4) 60 | plt.ylim(-4,4) 61 | 62 | plt.subplot(122) 63 | for n in inds_to_plot: 64 | plt.plot(Zinf[n,0], Zinf[n,1], 'o', markerfacecolor=cmap(nperm[n] / float(N)), markeredgecolor="none") 65 | plt.title("Inferred Embedding") 66 | plt.xlim(-4,4) 67 | plt.ylim(-4,4) 68 | 69 | plt.show() 70 | 71 | def gibbs_example(true_model, X, Z_true, mask): 72 | # Fit a test model 73 | model = FactorAnalysis( 74 | D_obs, D_latent, 75 | # W=true_model.W, sigmasq=true_model.sigmasq 76 | ) 77 | inf_data = model.add_data(X, mask=mask) 78 | model.set_empirical_mean() 79 | 80 | lps = [] 81 | angles = [] 82 | N_iters = 100 83 | for _ in progprint_xrange(N_iters): 84 | model.resample_model() 85 | lps.append(model.log_likelihood()) 86 | angles.append(principal_angle(true_model.W, model.W)) 87 | 88 | plot_results(lps, angles, Z_true, inf_data.Z) 89 | 90 | def em_example(true_model, X, Z_true, mask): 91 | # Fit a test model 92 | model = FactorAnalysis( 93 | D_obs, D_latent, 94 | # W=true_model.W, sigmasq=true_model.sigmasq 95 | ) 96 | inf_data = model.add_data(X, mask=mask) 97 | model.set_empirical_mean() 98 | 99 | lps = [] 100 | angles = [] 101 | N_iters = 100 102 | for _ in progprint_xrange(N_iters): 103 | model.EM_step() 104 | lps.append(model.log_likelihood()) 105 | angles.append(principal_angle(true_model.W, model.W)) 106 | 107 | plot_results(lps, angles, Z_true, inf_data.E_Z) 108 | 109 | def meanfield_example(true_model, X, Z_true, mask): 110 | # Fit a test model 111 | model = FactorAnalysis( 112 | D_obs, D_latent, 113 | # W=true_model.W, sigmasq=true_model.sigmasq 114 | ) 115 | inf_data = model.add_data(X, mask=mask) 116 | model.set_empirical_mean() 117 | 118 | lps = [] 119 | angles = [] 120 | N_iters = 100 121 | for _ in progprint_xrange(N_iters): 122 | model.meanfield_coordinate_descent_step() 123 | lps.append(model.expected_log_likelihood()) 124 | E_W, _, _, _ = model.regression.mf_expectations 125 | angles.append(principal_angle(true_model.W, E_W)) 126 | 127 | plot_results(lps, angles, Z_true, inf_data.Z) 128 | 129 | def svi_example(true_model, X, Z_true, mask): 130 | # Fit a test model 131 | model = FactorAnalysis( 132 | D_obs, D_latent, 133 | # W=true_model.W, sigmasq=true_model.sigmasq 134 | ) 135 | 136 | # Add the data in minibatches 137 | N = X.shape[0] 138 | minibatchsize = 200 139 | prob = minibatchsize / float(N) 140 | 141 | lps = [] 142 | angles = [] 143 | N_iters = 100 144 | delay = 10.0 145 | forgetting_rate = 0.75 146 | stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate) 147 | for itr in progprint_xrange(N_iters): 148 | minibatch = np.random.permutation(N)[:minibatchsize] 149 | X_mb, mask_mb = X[minibatch], mask[minibatch] 150 | lps.append(model.meanfield_sgdstep(X_mb, prob, stepsize[itr], masks=mask_mb)) 151 | E_W, _, _, _ = model.regression.mf_expectations 152 | angles.append(principal_angle(true_model.W, E_W)) 153 | 154 | # Compute the expected states for the first minibatch of data 155 | model.add_data(X, mask) 156 | statesobj = model.data_list.pop() 157 | statesobj.meanfieldupdate() 158 | Z_inf = statesobj.E_Z 159 | plot_results(lps, angles, Z_true, Z_inf) 160 | 161 | if __name__ == "__main__": 162 | true_model, X, Z_true, mask = generate_synth_data() 163 | gibbs_example(true_model, X, Z_true, mask) 164 | em_example(true_model, X, Z_true, mask) 165 | meanfield_example(true_model, X, Z_true, mask) 166 | svi_example(true_model, X, Z_true, mask) 167 | -------------------------------------------------------------------------------- /examples/meanfield_steps.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from builtins import input 3 | from builtins import range 4 | import numpy as np 5 | from matplotlib import pyplot as plt 6 | plt.interactive(True) 7 | 8 | from pybasicbayes import models, distributions 9 | 10 | GENERATE_DATA = True 11 | 12 | ########################### 13 | # generate or load data # 14 | ########################### 15 | 16 | alpha_0=5.0 17 | obs_hypparams=dict(mu_0=np.zeros(2),sigma_0=np.eye(2),kappa_0=0.05,nu_0=5) 18 | 19 | priormodel = models.Mixture(alpha_0=alpha_0, 20 | components=[distributions.Gaussian(**obs_hypparams) for itr in range(30)]) 21 | data, _ = priormodel.generate(100) 22 | del priormodel 23 | 24 | plt.figure() 25 | plt.plot(data[:,0],data[:,1],'kx') 26 | plt.title('data') 27 | 28 | input() # pause for effect 29 | 30 | ############### 31 | # inference # 32 | ############### 33 | 34 | posteriormodel = models.Mixture(alpha_0=alpha_0, 35 | components=[distributions.Gaussian(**obs_hypparams) for itr in range(30)]) 36 | 37 | posteriormodel.add_data(data) 38 | 39 | vlbs = [] 40 | plt.figure(2,figsize=(8,6)) 41 | posteriormodel.plot() 42 | plt.figure(3,figsize=(8,6)) 43 | while True: 44 | if input().lower() == 'break': # pause at each iteration 45 | break 46 | 47 | vlb = posteriormodel.meanfield_coordinate_descent_step() 48 | 49 | plt.figure(2) 50 | plt.clf() 51 | posteriormodel.plot() 52 | 53 | plt.figure(3) 54 | plt.clf() 55 | vlbs.append(vlb) 56 | plt.plot(vlbs) 57 | 58 | -------------------------------------------------------------------------------- /examples/robust_regression.py: -------------------------------------------------------------------------------- 1 | # Demo of a robust regression model with multivariate-t distributed noise 2 | 3 | import numpy as np 4 | import numpy.random as npr 5 | np.random.seed(0) 6 | 7 | import matplotlib.pyplot as plt 8 | import seaborn as sns 9 | sns.set_style("white") 10 | 11 | from pybasicbayes.util.text import progprint_xrange 12 | from pybasicbayes.distributions import Regression, RobustRegression 13 | 14 | D_out = 1 15 | D_in = 2 16 | N = 100 17 | 18 | # Make a regression model and simulate data 19 | A = npr.randn(D_out, D_in) 20 | b = npr.randn(D_out) 21 | Sigma = 0.1 * np.eye(D_out) 22 | 23 | true_reg = Regression(A=np.column_stack((A, b)), sigma=Sigma, affine=True) 24 | X = npr.randn(N, D_in) 25 | y = true_reg.rvs(x=X, return_xy=False) 26 | 27 | # Corrupt a fraction of the data 28 | inds = npr.rand(N) < 0.1 29 | y[inds] = 3 * npr.randn(inds.sum(), D_out) 30 | 31 | # Make a test regression and fit it 32 | std_reg = Regression(nu_0=D_out + 2, 33 | S_0=np.eye(D_out), 34 | M_0=np.zeros((D_out, D_in+1)), 35 | K_0=np.eye(D_in+1), 36 | affine=True) 37 | 38 | robust_reg = RobustRegression(nu_0=D_out+2, 39 | S_0=np.eye(D_out), 40 | M_0=np.zeros((D_out, D_in+1)), 41 | K_0=np.eye(D_in+1), 42 | affine=True) 43 | 44 | def _collect(r): 45 | ll = r.log_likelihood((X, y))[~inds].sum() 46 | err = ((y - r.predict(X))**2).sum(1) 47 | mse = np.mean(err[~inds]) 48 | return r.A.copy(), ll, mse 49 | 50 | def _update(r): 51 | r.resample([(X,y)]) 52 | return _collect(r) 53 | 54 | # Fit the standard regression 55 | smpls = [_collect(std_reg)] 56 | for _ in progprint_xrange(100): 57 | smpls.append(_update(std_reg)) 58 | smpls = zip(*smpls) 59 | std_As, std_lls, std_mses = tuple(map(np.array, smpls)) 60 | 61 | # Fit the robust regression 62 | smpls = [_collect(robust_reg)] 63 | for _ in progprint_xrange(100): 64 | smpls.append(_update(robust_reg)) 65 | smpls = zip(*smpls) 66 | robust_As, robust_lls, robust_mses = tuple(map(np.array, smpls)) 67 | 68 | 69 | # Plot the inferred regression function 70 | plt.figure(figsize=(8, 4)) 71 | xlim = (-3, 3) 72 | ylim = abs(y).max() 73 | npts = 50 74 | x1, x2 = np.meshgrid(np.linspace(*xlim, npts), np.linspace(*xlim, npts)) 75 | 76 | plt.subplot(131) 77 | mu = true_reg.predict(np.column_stack((x1.ravel(), x2.ravel()))) 78 | plt.imshow(mu.reshape((npts, npts)), 79 | cmap="RdBu", vmin=-ylim, vmax=ylim, 80 | alpha=0.8, 81 | extent=xlim + tuple(reversed(xlim))) 82 | plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray') 83 | plt.scatter(X[inds,0], X[inds,1], c=y[inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1) 84 | plt.xlim(xlim) 85 | plt.ylim(xlim) 86 | plt.title("True") 87 | 88 | plt.subplot(132) 89 | mu = std_reg.predict(np.column_stack((x1.ravel(), x2.ravel()))) 90 | plt.imshow(mu.reshape((npts, npts)), 91 | cmap="RdBu", vmin=-ylim, vmax=ylim, 92 | alpha=0.8, 93 | extent=xlim + tuple(reversed(xlim))) 94 | plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray') 95 | plt.scatter(X[inds,0], X[inds,1], c=y[inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1) 96 | plt.xlim(xlim) 97 | plt.ylim(xlim) 98 | plt.title("Standard Regression") 99 | 100 | plt.subplot(133) 101 | mu = robust_reg.predict(np.column_stack((x1.ravel(), x2.ravel()))) 102 | plt.imshow(mu.reshape((npts, npts)), 103 | cmap="RdBu", vmin=-ylim, vmax=ylim, 104 | alpha=0.8, 105 | extent=xlim + tuple(reversed(xlim))) 106 | plt.scatter(X[~inds,0], X[~inds,1], c=y[~inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='gray') 107 | plt.scatter(X[inds,0], X[inds,1], c=y[inds, 0], cmap="RdBu", vmin=-ylim, vmax=ylim, edgecolors='k', linewidths=1) 108 | plt.xlim(xlim) 109 | plt.ylim(xlim) 110 | plt.title("Robust Regression") 111 | 112 | 113 | print("True A: {}".format(true_reg.A)) 114 | print("Std A: {}".format(std_As.mean(0))) 115 | print("Robust A: {}".format(robust_As.mean(0))) 116 | 117 | # Plot the log likelihoods and mean squared errors 118 | plt.figure(figsize=(8, 4)) 119 | plt.subplot(121) 120 | plt.plot(std_lls) 121 | plt.plot(robust_lls) 122 | plt.xlabel("Iteration") 123 | plt.ylabel("Log Likelihood") 124 | 125 | plt.subplot(122) 126 | plt.plot(std_mses, label="Standard") 127 | plt.plot(robust_mses, label="Robust") 128 | plt.legend(loc="upper right") 129 | plt.xlabel("Iteration") 130 | plt.ylabel("Mean Squared Error") 131 | 132 | plt.show() 133 | -------------------------------------------------------------------------------- /images/best-model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattjj/pybasicbayes/61f65ad6c781288605ec5f7347efcc5dbd73c4fc/images/best-model.png -------------------------------------------------------------------------------- /images/data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattjj/pybasicbayes/61f65ad6c781288605ec5f7347efcc5dbd73c4fc/images/data.png -------------------------------------------------------------------------------- /images/model-vlb-vs-iteration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattjj/pybasicbayes/61f65ad6c781288605ec5f7347efcc5dbd73c4fc/images/model-vlb-vs-iteration.png -------------------------------------------------------------------------------- /pybasicbayes/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from . import abstractions, distributions, models, util 3 | -------------------------------------------------------------------------------- /pybasicbayes/abstractions.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from builtins import range 3 | from builtins import object 4 | import abc 5 | import numpy as np 6 | import copy 7 | 8 | import pybasicbayes 9 | from pybasicbayes.util.stats import combinedata 10 | from pybasicbayes.util.text import progprint_xrange 11 | from future.utils import with_metaclass 12 | 13 | # NOTE: data is always a (possibly masked) np.ndarray or list of (possibly 14 | # masked) np.ndarrays. 15 | 16 | # TODO figure out a data abstraction 17 | # TODO make an exponential family abc to reduce boilerplate 18 | 19 | ################ 20 | # Base class # 21 | ################ 22 | 23 | class Distribution(with_metaclass(abc.ABCMeta, object)): 24 | @abc.abstractmethod 25 | def rvs(self,size=[]): 26 | 'random variates (samples)' 27 | pass 28 | 29 | @abc.abstractmethod 30 | def log_likelihood(self,x): 31 | ''' 32 | log likelihood (either log probability mass function or log probability 33 | density function) of x, which has the same type as the output of rvs() 34 | ''' 35 | pass 36 | 37 | class BayesianDistribution(with_metaclass(abc.ABCMeta, Distribution)): 38 | def empirical_bayes(self,data): 39 | ''' 40 | (optional) set hyperparameters via empirical bayes 41 | e.g. treat argument as a pseudo-dataset for exponential family 42 | ''' 43 | raise NotImplementedError 44 | 45 | ######################################################### 46 | # Algorithm interfaces for inference in distributions # 47 | ######################################################### 48 | 49 | class GibbsSampling(with_metaclass(abc.ABCMeta, BayesianDistribution)): 50 | @abc.abstractmethod 51 | def resample(self,data=[]): 52 | pass 53 | 54 | def copy_sample(self): 55 | ''' 56 | return an object copy suitable for making lists of posterior samples 57 | (override this method to prevent copying shared structures into each sample) 58 | ''' 59 | return copy.deepcopy(self) 60 | 61 | def resample_and_copy(self): 62 | self.resample() 63 | return self.copy_sample() 64 | 65 | class MeanField(with_metaclass(abc.ABCMeta, BayesianDistribution)): 66 | @abc.abstractmethod 67 | def expected_log_likelihood(self,x): 68 | pass 69 | 70 | @abc.abstractmethod 71 | def meanfieldupdate(self,data,weights): 72 | pass 73 | 74 | def get_vlb(self): 75 | raise NotImplementedError 76 | 77 | class MeanFieldSVI(with_metaclass(abc.ABCMeta, BayesianDistribution)): 78 | @abc.abstractmethod 79 | def meanfield_sgdstep(self,expected_suff_stats,prob,stepsize): 80 | pass 81 | 82 | class Collapsed(with_metaclass(abc.ABCMeta, BayesianDistribution)): 83 | @abc.abstractmethod 84 | def log_marginal_likelihood(self,data): 85 | pass 86 | 87 | def log_predictive(self,newdata,olddata): 88 | return self.log_marginal_likelihood(combinedata((newdata,olddata))) \ 89 | - self.log_marginal_likelihood(olddata) 90 | 91 | def predictive(self,*args,**kwargs): 92 | return np.exp(self.log_predictive(*args,**kwargs)) 93 | 94 | class MaxLikelihood(with_metaclass(abc.ABCMeta, Distribution)): 95 | @abc.abstractmethod 96 | def max_likelihood(self,data,weights=None): 97 | ''' 98 | sets the parameters set to their maximum likelihood values given the 99 | (weighted) data 100 | ''' 101 | pass 102 | 103 | @property 104 | def num_parameters(self): 105 | raise NotImplementedError 106 | 107 | class MAP(with_metaclass(abc.ABCMeta, BayesianDistribution)): 108 | @abc.abstractmethod 109 | def MAP(self,data,weights=None): 110 | ''' 111 | sets the parameters to their MAP values given the (weighted) data 112 | analogous to max_likelihood but includes hyperparameters 113 | ''' 114 | pass 115 | 116 | class Tempering(BayesianDistribution): 117 | @abc.abstractmethod 118 | def log_likelihood(self,data,temperature=1.): 119 | pass 120 | 121 | @abc.abstractmethod 122 | def resample(self,data,temperature=1.): 123 | pass 124 | 125 | def energy(self,data): 126 | return -self.log_likelihood(data,temperature=1.) 127 | 128 | ############ 129 | # Models # 130 | ############ 131 | 132 | # a "model" is differentiated from a "distribution" in this code by latent state 133 | # over data: a model attaches a latent variable (like a label or state sequence) 134 | # to data, and so it 'holds onto' data. Hence the add_data method. 135 | 136 | class Model(with_metaclass(abc.ABCMeta, object)): 137 | @abc.abstractmethod 138 | def add_data(self,data): 139 | pass 140 | 141 | @abc.abstractmethod 142 | def generate(self,keep=True,**kwargs): 143 | ''' 144 | Like a distribution's rvs, but this also fills in latent state over 145 | data and keeps references to the data. 146 | ''' 147 | pass 148 | 149 | def rvs(self,*args,**kwargs): 150 | return self.generate(*args,keep=False,**kwargs)[0] # 0th component is data, not latent stuff 151 | 152 | ################################################## 153 | # Algorithm interfaces for inference in models # 154 | ################################################## 155 | 156 | class ModelGibbsSampling(with_metaclass(abc.ABCMeta, Model)): 157 | @abc.abstractmethod 158 | def resample_model(self): # TODO niter? 159 | pass 160 | 161 | def copy_sample(self): 162 | ''' 163 | return an object copy suitable for making lists of posterior samples 164 | (override this method to prevent copying shared structures into each sample) 165 | ''' 166 | return copy.deepcopy(self) 167 | 168 | def resample_and_copy(self): 169 | self.resample_model() 170 | return self.copy_sample() 171 | 172 | class ModelMeanField(with_metaclass(abc.ABCMeta, Model)): 173 | @abc.abstractmethod 174 | def meanfield_coordinate_descent_step(self): 175 | # returns variational lower bound after update, if available 176 | pass 177 | 178 | def meanfield_coordinate_descent(self,tol=1e-1,maxiter=250,progprint=False,**kwargs): 179 | # NOTE: doesn't re-initialize! 180 | scores = [] 181 | step_iterator = range(maxiter) if not progprint else progprint_xrange(maxiter) 182 | for itr in step_iterator: 183 | scores.append(self.meanfield_coordinate_descent_step(**kwargs)) 184 | if scores[-1] is not None and len(scores) > 1: 185 | if np.abs(scores[-1]-scores[-2]) < tol: 186 | return scores 187 | print('WARNING: meanfield_coordinate_descent hit maxiter of %d' % maxiter) 188 | return scores 189 | 190 | class ModelMeanFieldSVI(with_metaclass(abc.ABCMeta, Model)): 191 | @abc.abstractmethod 192 | def meanfield_sgdstep(self,minibatch,prob,stepsize): 193 | pass 194 | 195 | class _EMBase(with_metaclass(abc.ABCMeta, Model)): 196 | @abc.abstractmethod 197 | def log_likelihood(self): 198 | # returns a log likelihood number on attached data 199 | pass 200 | 201 | def _EM_fit(self,method,tol=1e-1,maxiter=100,progprint=False): 202 | # NOTE: doesn't re-initialize! 203 | likes = [] 204 | step_iterator = range(maxiter) if not progprint else progprint_xrange(maxiter) 205 | for itr in step_iterator: 206 | method() 207 | likes.append(self.log_likelihood()) 208 | if len(likes) > 1: 209 | if likes[-1]-likes[-2] < tol: 210 | return likes 211 | elif likes[-1] < likes[-2]: 212 | # probably oscillation, do one more 213 | method() 214 | likes.append(self.log_likelihood()) 215 | return likes 216 | print('WARNING: EM_fit reached maxiter of %d' % maxiter) 217 | return likes 218 | 219 | class ModelEM(with_metaclass(abc.ABCMeta, _EMBase)): 220 | def EM_fit(self,tol=1e-1,maxiter=100): 221 | return self._EM_fit(self.EM_step,tol=tol,maxiter=maxiter) 222 | 223 | @abc.abstractmethod 224 | def EM_step(self): 225 | pass 226 | 227 | class ModelMAPEM(with_metaclass(abc.ABCMeta, _EMBase)): 228 | def MAP_EM_fit(self,tol=1e-1,maxiter=100): 229 | return self._EM_fit(self.MAP_EM_step,tol=tol,maxiter=maxiter) 230 | 231 | @abc.abstractmethod 232 | def MAP_EM_step(self): 233 | pass 234 | 235 | class ModelParallelTempering(with_metaclass(abc.ABCMeta, Model)): 236 | @abc.abstractproperty 237 | def temperature(self): 238 | pass 239 | 240 | @abc.abstractproperty 241 | def energy(self): 242 | pass 243 | 244 | @abc.abstractmethod 245 | def swap_sample_with(self,other): 246 | pass 247 | 248 | -------------------------------------------------------------------------------- /pybasicbayes/distributions/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .meta import * 3 | 4 | from .regression import * 5 | from .gaussian import * 6 | from .uniform import * 7 | 8 | from .binomial import * 9 | from .multinomial import * 10 | from .negativebinomial import * 11 | from .geometric import * 12 | from .poisson import * 13 | -------------------------------------------------------------------------------- /pybasicbayes/distributions/binomial.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from builtins import zip 3 | __all__ = ['Binomial'] 4 | 5 | import numpy as np 6 | import scipy.stats as stats 7 | import scipy.special as special 8 | from warnings import warn 9 | 10 | from pybasicbayes.abstractions import GibbsSampling, MeanField, \ 11 | MeanFieldSVI 12 | 13 | 14 | class Binomial(GibbsSampling, MeanField, MeanFieldSVI): 15 | ''' 16 | Models a Binomial likelihood and a Beta prior: 17 | 18 | p ~ Beta(alpha_0, beta_0) 19 | x | p ~ Binom(p,n) 20 | 21 | where p is the success probability, alpha_0-1 is the prior number of 22 | successes, beta_0-1 is the prior number of failures. 23 | 24 | A special case of Multinomial where N is fixed and each observation counts 25 | the number of successes and is in {0,1,...,N}. 26 | ''' 27 | def __init__(self,alpha_0,beta_0,alpha_mf=None,beta_mf=None,p=None,n=None): 28 | warn('this class is untested!') 29 | assert n is not None 30 | 31 | self.n = n 32 | self.alpha_0 = alpha_0 33 | self.beta_0 = beta_0 34 | 35 | self.alpha_mf = alpha_mf if alpha_mf is not None else alpha_0 36 | self.beta_mf = beta_mf if beta_mf is not None else beta_0 37 | 38 | if p is not None: 39 | self.p = p 40 | else: 41 | self.resample() 42 | 43 | def log_likelihood(self,x): 44 | return stats.binom.pmf(x,self.n,self.p) 45 | 46 | def rvs(self,size=None): 47 | return stats.binom.pmf(self.n,self.p,size=size) 48 | 49 | @property 50 | def natural_hypparam(self): 51 | return np.array([self.alpha_0 - 1, self.beta_0 - 1]) 52 | 53 | @natural_hypparam.setter 54 | def natural_hypparam(self,natparam): 55 | self.alpha_0, self.beta_0 = natparam + 1 56 | 57 | def _get_statistics(self,data): 58 | if isinstance(data,np.ndarray): 59 | data = data.ravel() 60 | tot = data.sum() 61 | return np.array([tot, self.n*data.shape[0] - tot]) 62 | else: 63 | return sum( 64 | (self._get_statistics(d) for d in data), 65 | self._empty_statistics()) 66 | 67 | def _get_weighted_statistics(self,data,weights): 68 | if isinstance(data,np.ndarray): 69 | data, weights = data.ravel(), weights.ravel() 70 | tot = weights.dot(data) 71 | return np.array([tot, self.n*weights.sum() - tot]) 72 | else: 73 | return sum( 74 | (self._get_weighted_statistics(d,w) for d,w in zip(data,weights)), 75 | self._empty_statistics()) 76 | 77 | def _empty_statistics(self): 78 | return np.zeros(2) 79 | 80 | ### Gibbs 81 | 82 | def resample(self,data=[]): 83 | alpha_n, beta_n = self.natural_hypparam + self._get_statistics(data) + 1 84 | self.p = np.random.beta(alpha_n,beta_n) 85 | 86 | # use Gibbs to initialize mean field 87 | self.alpha_mf = self.p * (self.alpha_0 + self.beta_0) 88 | self.beta_mf = (1-self.p) * (self.alpha_0 + self.beta_0) 89 | 90 | ### Mean field and SVI 91 | 92 | def meanfieldupdate(self,data,weights): 93 | self.mf_natural_hypparam = \ 94 | self.natural_hypparam + self._get_weighted_statistics(data,weights) 95 | 96 | # use mean field to initialize Gibbs 97 | self.p = self.alpha_mf / (self.alpha_mf + self.beta_mf) 98 | 99 | def meanfield_sgdstep(self,data,weights,minibatchprob,stepsize): 100 | self.mf_natural_hypparam = \ 101 | (1-stepsize) * self.mf_natural_hypparam + stepsize * ( 102 | self.natural_hypparam 103 | + 1./minibatchprob * self._get_weighted_statistics(data,weights)) 104 | 105 | @property 106 | def mf_natural_hypparam(self): 107 | return np.array([self.alpha_mf - 1, self.beta_mf - 1]) 108 | 109 | @mf_natural_hypparam.setter 110 | def mf_natural_hypparam(self,natparam): 111 | self.alpha_mf, self.beta_mf = natparam + 1 112 | 113 | def expected_log_likelihood(self,x): 114 | n = self.n 115 | Elnp, Eln1mp = self._mf_expected_statistics() 116 | return special.gammaln(n+1) - special.gammaln(x+1) - special.gammaln(n-x+1) \ 117 | + x*Elnp + (n-x)*Eln1mp 118 | 119 | def _mf_expected_statistics(self): 120 | return special.digamma([self.alpha_mf, self.beta_mf]) \ 121 | - special.digamma(self.alpha_mf + self.beta_mf) 122 | 123 | def get_vlb(self): 124 | Elnp, Eln1mp = self._mf_expected_statistics() 125 | return (self.alpha_0 - self.alpha_mf)*Elnp \ 126 | + (self.beta_0 - self.beta_mf)*Eln1mp \ 127 | - (self._log_partition_function(self.alpha_0, self.beta_0) 128 | - self._log_partition_function(self.alpha_mf,self.beta_mf)) 129 | 130 | def _log_partition_function(self,alpha,beta): 131 | return special.betaln(alpha,beta) 132 | -------------------------------------------------------------------------------- /pybasicbayes/distributions/geometric.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from builtins import zip 3 | __all__ = ['Geometric'] 4 | 5 | import numpy as np 6 | import scipy.stats as stats 7 | import scipy.special as special 8 | from warnings import warn 9 | 10 | from pybasicbayes.abstractions import GibbsSampling, MeanField, \ 11 | Collapsed, MaxLikelihood 12 | 13 | 14 | class Geometric(GibbsSampling, MeanField, Collapsed, MaxLikelihood): 15 | ''' 16 | Geometric distribution with a conjugate beta prior. 17 | The support is {1,2,3,...}. 18 | 19 | Hyperparameters: 20 | alpha_0, beta_0 21 | 22 | Parameter is the success probability: 23 | p 24 | ''' 25 | def __init__(self,alpha_0=None,beta_0=None,p=None): 26 | self.p = p 27 | 28 | self.alpha_0 = self.mf_alpha_0 = alpha_0 29 | self.beta_0 = self.mf_beta_0 = beta_0 30 | 31 | if p is None and not any(_ is None for _ in (alpha_0,beta_0)): 32 | self.resample() # intialize from prior 33 | 34 | @property 35 | def params(self): 36 | return dict(p=self.p) 37 | 38 | @property 39 | def hypparams(self): 40 | return dict(alpha_0=self.alpha_0,beta_0=self.beta_0) 41 | 42 | def _posterior_hypparams(self,n,tot): 43 | return self.alpha_0 + n, self.beta_0 + tot 44 | 45 | def log_likelihood(self,x): 46 | x = np.array(x,ndmin=1) 47 | raw = np.empty(x.shape) 48 | raw[x>0] = (x[x>0]-1.)*np.log(1.-self.p) + np.log(self.p) 49 | raw[x<1] = -np.inf 50 | return raw if isinstance(x,np.ndarray) else raw[0] 51 | 52 | def log_sf(self,x): 53 | return stats.geom.logsf(x,self.p) 54 | 55 | def pmf(self,x): 56 | return stats.geom.pmf(x,self.p) 57 | 58 | def rvs(self,size=None): 59 | return np.random.geometric(self.p,size=size) 60 | 61 | def _get_statistics(self,data): 62 | if isinstance(data,np.ndarray): 63 | n = data.shape[0] 64 | tot = data.sum() - n 65 | elif isinstance(data,list): 66 | n = sum(d.shape[0] for d in data) 67 | tot = sum(d.sum() for d in data) - n 68 | else: 69 | assert np.isscalar(data) 70 | n = 1 71 | tot = data-1 72 | return n, tot 73 | 74 | def _get_weighted_statistics(self,data,weights): 75 | if isinstance(data,np.ndarray): 76 | n = weights.sum() 77 | tot = weights.dot(data) - n 78 | elif isinstance(data,list): 79 | n = sum(w.sum() for w in weights) 80 | tot = sum(w.dot(d) for w,d in zip(weights,data)) - n 81 | else: 82 | assert np.isscalar(data) and np.isscalar(weights) 83 | n = weights 84 | tot = weights*data - 1 85 | 86 | return n, tot 87 | 88 | ### Gibbs sampling 89 | 90 | def resample(self,data=[]): 91 | self.p = np.random.beta(*self._posterior_hypparams(*self._get_statistics(data))) 92 | 93 | # initialize mean field 94 | self.alpha_mf = self.p*(self.alpha_0+self.beta_0) 95 | self.beta_mf = (1-self.p)*(self.alpha_0+self.beta_0) 96 | 97 | return self 98 | 99 | ### mean field 100 | 101 | def meanfieldupdate(self,data,weights,stats=None): 102 | warn('untested') 103 | n, tot = self._get_weighted_statistics(data,weights) if stats is None else stats 104 | self.alpha_mf = self.alpha_0 + n 105 | self.beta_mf = self.beta_0 + tot 106 | 107 | # initialize Gibbs 108 | self.p = self.alpha_mf / (self.alpha_mf + self.beta_mf) 109 | 110 | def get_vlb(self): 111 | warn('untested') 112 | Elnp, Eln1mp = self._expected_statistics(self.alpha_mf,self.beta_mf) 113 | return (self.alpha_0 - self.alpha_mf)*Elnp \ 114 | + (self.beta_0 - self.beta_mf)*Eln1mp \ 115 | - (self._log_partition_function(self.alpha_0,self.beta_0) 116 | - self._log_partition_function(self.alpha_mf,self.beta_mf)) 117 | 118 | def expected_log_likelihood(self,x): 119 | warn('untested') 120 | Elnp, Eln1mp = self._expected_statistics(self.alpha_mf,self.beta_mf) 121 | return (x-1)*Eln1mp + Elnp1mp 122 | 123 | def _expected_statistics(self,alpha,beta): 124 | warn('untested') 125 | Elnp = special.digamma(alpha) - special.digamma(alpha+beta) 126 | Eln1mp = special.digamma(beta) - special.digamma(alpha+beta) 127 | return Elnp, Eln1mp 128 | 129 | ### Max likelihood 130 | 131 | def max_likelihood(self,data,weights=None): 132 | if weights is None: 133 | n, tot = self._get_statistics(data) 134 | else: 135 | n, tot = self._get_weighted_statistics(data,weights) 136 | 137 | self.p = n/tot 138 | return self 139 | 140 | ### Collapsed 141 | 142 | def log_marginal_likelihood(self,data): 143 | return self._log_partition_function(*self._posterior_hypparams(*self._get_statistics(data))) \ 144 | - self._log_partition_function(self.alpha_0,self.beta_0) 145 | 146 | def _log_partition_function(self,alpha,beta): 147 | return special.betaln(alpha,beta) 148 | -------------------------------------------------------------------------------- /pybasicbayes/distributions/meta.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from builtins import zip 3 | from builtins import range 4 | __all__ = ['_FixedParamsMixin', 'ProductDistribution'] 5 | 6 | import numpy as np 7 | 8 | from pybasicbayes.abstractions import Distribution, \ 9 | GibbsSampling, MeanField, MeanFieldSVI, MaxLikelihood 10 | from pybasicbayes.util.stats import atleast_2d 11 | 12 | 13 | class _FixedParamsMixin(Distribution): 14 | @property 15 | def num_parameters(self): 16 | return 0 17 | 18 | def resample(self, *args, **kwargs): 19 | return self 20 | 21 | def meanfieldupdate(self, *args, **kwargs): 22 | return self 23 | 24 | def get_vlb(self): 25 | return 0. 26 | 27 | def copy_sample(self): 28 | return self 29 | 30 | 31 | class ProductDistribution( 32 | GibbsSampling, MeanField, MeanFieldSVI, MaxLikelihood): 33 | def __init__(self, distns, slices=None): 34 | self._distns = distns 35 | self._slices = slices if slices is not None else \ 36 | [slice(i, i+1) for i in range(len(distns))] 37 | 38 | @property 39 | def params(self): 40 | return {idx:distn.params for idx, distn in enumerate(self._distns)} 41 | 42 | @property 43 | def hypparams(self): 44 | return {idx:distn.hypparams for idx, distn in enumerate(self._distns)} 45 | 46 | @property 47 | def num_parameters(self): 48 | return sum(d.num_parameters for d in self._distns) 49 | 50 | def rvs(self,size=[]): 51 | return np.concatenate( 52 | [atleast_2d(distn.rvs(size=size)) 53 | for distn in self._distns],axis=-1) 54 | 55 | def log_likelihood(self,x): 56 | return sum( 57 | distn.log_likelihood(x[...,sl]) 58 | for distn,sl in zip(self._distns,self._slices)) 59 | 60 | ### Gibbs 61 | 62 | def resample(self,data=[]): 63 | assert isinstance(data,(np.ndarray,list)) 64 | if isinstance(data,np.ndarray): 65 | for distn,sl in zip(self._distns,self._slices): 66 | distn.resample(data[...,sl]) 67 | else: 68 | for distn,sl in zip(self._distns,self._slices): 69 | distn.resample([d[...,sl] for d in data]) 70 | return self 71 | 72 | ### Max likelihood 73 | 74 | def max_likelihood(self,data,weights=None): 75 | assert isinstance(data,(np.ndarray,list)) 76 | if isinstance(data,np.ndarray): 77 | for distn,sl in zip(self._distns,self._slices): 78 | distn.max_likelihood(data[...,sl],weights=weights) 79 | else: 80 | for distn,sl in zip(self._distns,self._slices): 81 | distn.max_likelihood([d[...,sl] for d in data],weights=weights) 82 | return self 83 | 84 | ### Mean field 85 | 86 | def get_vlb(self): 87 | return sum(distn.get_vlb() for distn in self._distns) 88 | 89 | def expected_log_likelihood(self,x): 90 | return np.sum( 91 | [distn.expected_log_likelihood(x[...,sl]) 92 | for distn,sl in zip(self._distns,self._slices)], axis=0).ravel() 93 | 94 | def meanfieldupdate(self,data,weights,**kwargs): 95 | assert isinstance(data,(np.ndarray,list)) 96 | if isinstance(data,np.ndarray): 97 | for distn,sl in zip(self._distns,self._slices): 98 | distn.meanfieldupdate(data[...,sl],weights) 99 | else: 100 | for distn,sl in zip(self._distns,self._slices): 101 | distn.meanfieldupdate( 102 | [d[...,sl] for d in data],weights=weights) 103 | return self 104 | 105 | def _resample_from_mf(self): 106 | for distn in self._distns: 107 | distn._resample_from_mf() 108 | 109 | ### SVI 110 | 111 | def meanfield_sgdstep(self,data,weights,prob,stepsize): 112 | assert isinstance(data,(np.ndarray,list)) 113 | if isinstance(data,np.ndarray): 114 | for distn,sl in zip(self._distns,self._slices): 115 | distn.meanfield_sgdstep( 116 | data[...,sl],weights,prob,stepsize) 117 | else: 118 | for distn,sl in zip(self._distns,self._slices): 119 | distn.meanfield_sgdstep( 120 | [d[...,sl] for d in data],weights,prob,stepsize) 121 | return self 122 | -------------------------------------------------------------------------------- /pybasicbayes/distributions/multinomial.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from builtins import zip 3 | from builtins import map 4 | from builtins import range 5 | __all__ = ['Categorical', 'CategoricalAndConcentration', 'Multinomial', 6 | 'MultinomialAndConcentration', 'GammaCompoundDirichlet', 'CRP'] 7 | 8 | import numpy as np 9 | from warnings import warn 10 | import scipy.stats as stats 11 | import scipy.special as special 12 | 13 | from pybasicbayes.abstractions import \ 14 | GibbsSampling, MeanField, MeanFieldSVI, MaxLikelihood, MAP 15 | 16 | from pybasicbayes.util.stats import sample_discrete 17 | 18 | try: 19 | from pybasicbayes.util.cstats import sample_crp_tablecounts 20 | except ImportError: 21 | warn('using slow sample_crp_tablecounts') 22 | from pybasicbayes.util.stats import sample_crp_tablecounts 23 | 24 | 25 | class Categorical(GibbsSampling, MeanField, MeanFieldSVI, MaxLikelihood, MAP): 26 | ''' 27 | This class represents a categorical distribution over labels, where the 28 | parameter is weights and the prior is a Dirichlet distribution. 29 | For example, if K == 3, then five samples may look like 30 | [0,1,0,2,1] 31 | Each entry is the label of a sample, like the outcome of die rolls. In other 32 | words, generated data or data passed to log_likelihood are indices, not 33 | indicator variables! (But when 'weighted data' is passed, like in mean 34 | field or weighted max likelihood, the weights are over indicator 35 | variables...) 36 | 37 | This class can be used as a weak limit approximation for a DP, particularly by 38 | calling __init__ with alpha_0 and K arguments, in which case the prior will be 39 | a symmetric Dirichlet with K components and parameter alpha_0/K; K is then the 40 | weak limit approximation parameter. 41 | 42 | Hyperparaemters: 43 | alphav_0 (vector) OR alpha_0 (scalar) and K 44 | 45 | Parameters: 46 | weights, a vector encoding a finite pmf 47 | ''' 48 | def __init__(self,weights=None,alpha_0=None,K=None,alphav_0=None,alpha_mf=None): 49 | self.K = K 50 | self.alpha_0 = alpha_0 51 | self.alphav_0 = alphav_0 52 | 53 | self._alpha_mf = alpha_mf if alpha_mf is not None else self.alphav_0 54 | 55 | self.weights = weights 56 | 57 | if weights is None and self.alphav_0 is not None: 58 | self.resample() # intialize from prior 59 | 60 | def _get_alpha_0(self): 61 | return self._alpha_0 62 | 63 | def _set_alpha_0(self,alpha_0): 64 | self._alpha_0 = alpha_0 65 | if not any(_ is None for _ in (self.K, self._alpha_0)): 66 | self.alphav_0 = np.repeat(self._alpha_0/self.K,self.K) 67 | 68 | alpha_0 = property(_get_alpha_0,_set_alpha_0) 69 | 70 | def _get_alphav_0(self): 71 | return self._alphav_0 if hasattr(self,'_alphav_0') else None 72 | 73 | def _set_alphav_0(self,alphav_0): 74 | if alphav_0 is not None: 75 | self._alphav_0 = alphav_0 76 | self.K = len(alphav_0) 77 | 78 | alphav_0 = property(_get_alphav_0,_set_alphav_0) 79 | 80 | @property 81 | def params(self): 82 | return dict(weights=self.weights) 83 | 84 | @property 85 | def hypparams(self): 86 | return dict(alphav_0=self.alphav_0) 87 | 88 | @property 89 | def num_parameters(self): 90 | return len(self.weights) 91 | 92 | def rvs(self,size=None): 93 | return sample_discrete(self.weights,size) 94 | 95 | def log_likelihood(self,x): 96 | out = np.zeros_like(x, dtype=np.double) 97 | nanidx = np.isnan(x) 98 | err = np.seterr(divide='ignore') 99 | out[~nanidx] = np.log(self.weights)[list(x[~nanidx])] # log(0) can happen, no warning 100 | np.seterr(**err) 101 | return out 102 | 103 | ### Gibbs sampling 104 | 105 | def resample(self,data=[],counts=None): 106 | counts = self._get_statistics(data) if counts is None else counts 107 | self.weights = np.random.dirichlet(self.alphav_0 + counts) 108 | np.clip(self.weights, np.spacing(1.), np.inf, out=self.weights) 109 | # NOTE: next line is so we can use Gibbs sampling to initialize mean field 110 | self._alpha_mf = self.weights * self.alphav_0.sum() 111 | assert (self._alpha_mf >= 0.).all() 112 | return self 113 | 114 | def _get_statistics(self,data,K=None): 115 | K = K if K else self.K 116 | if isinstance(data,np.ndarray) or \ 117 | (isinstance(data,list) and len(data) > 0 118 | and not isinstance(data[0],(np.ndarray,list))): 119 | counts = np.bincount(data,minlength=K) 120 | else: 121 | counts = sum(np.bincount(d,minlength=K) for d in data) 122 | return counts 123 | 124 | def _get_weighted_statistics(self,data,weights): 125 | if isinstance(weights,np.ndarray): 126 | assert weights.ndim in (1,2) 127 | if data is None or weights.ndim == 2: 128 | # when weights is 2D or data is None, the weights are expected 129 | # indicators and data is just a placeholder; nominally data 130 | # should be np.arange(K)[na,:].repeat(N,axis=0) 131 | counts = np.atleast_2d(weights).sum(0) 132 | else: 133 | # when weights is 1D, data is indices and we do a weighted 134 | # bincount 135 | counts = np.bincount(data,weights,minlength=self.K) 136 | else: 137 | if len(weights) == 0: 138 | counts = np.zeros(self.K,dtype=int) 139 | else: 140 | data = data if data else [None]*len(weights) 141 | counts = sum(self._get_weighted_statistics(d,w) 142 | for d, w in zip(data,weights)) 143 | return counts 144 | 145 | ### Mean Field 146 | 147 | def meanfieldupdate(self,data,weights): 148 | # update 149 | self._alpha_mf = self.alphav_0 + self._get_weighted_statistics(data,weights) 150 | self.weights = self._alpha_mf / self._alpha_mf.sum() # for plotting 151 | assert (self._alpha_mf > 0.).all() 152 | return self 153 | 154 | def get_vlb(self): 155 | # return avg energy plus entropy, our contribution to the vlb 156 | # see Eq. 10.66 in Bishop 157 | logpitilde = self.expected_log_likelihood() # default is on np.arange(self.K) 158 | q_entropy = -1* ( 159 | (logpitilde*(self._alpha_mf-1)).sum() 160 | + special.gammaln(self._alpha_mf.sum()) - special.gammaln(self._alpha_mf).sum()) 161 | p_avgengy = special.gammaln(self.alphav_0.sum()) - special.gammaln(self.alphav_0).sum() \ 162 | + ((self.alphav_0-1)*logpitilde).sum() 163 | 164 | return p_avgengy + q_entropy 165 | 166 | def expected_log_likelihood(self,x=None): 167 | # usually called when np.all(x == np.arange(self.K)) 168 | x = x if x is not None else slice(None) 169 | return special.digamma(self._alpha_mf[x]) - special.digamma(self._alpha_mf.sum()) 170 | 171 | ### Mean Field SGD 172 | 173 | def meanfield_sgdstep(self,data,weights,prob,stepsize): 174 | self._alpha_mf = \ 175 | (1-stepsize) * self._alpha_mf + stepsize * ( 176 | self.alphav_0 177 | + 1./prob * self._get_weighted_statistics(data,weights)) 178 | self.weights = self._alpha_mf / self._alpha_mf.sum() # for plotting 179 | return self 180 | 181 | def _resample_from_mf(self): 182 | self.weights = np.random.dirichlet(self._alpha_mf) 183 | 184 | ### Max likelihood 185 | 186 | def max_likelihood(self,data,weights=None): 187 | if weights is None: 188 | counts = self._get_statistics(data) 189 | else: 190 | counts = self._get_weighted_statistics(data,weights) 191 | self.weights = counts/counts.sum() 192 | return self 193 | 194 | def MAP(self,data,weights=None): 195 | if weights is None: 196 | counts = self._get_statistics(data) 197 | else: 198 | counts = self._get_weighted_statistics(data,weights) 199 | counts += self.alphav_0 200 | self.weights = counts/counts.sum() 201 | return self 202 | 203 | 204 | class CategoricalAndConcentration(Categorical): 205 | ''' 206 | Categorical with resampling of the symmetric Dirichlet concentration 207 | parameter. 208 | 209 | concentration ~ Gamma(a_0,b_0) 210 | 211 | The Dirichlet prior over pi is then 212 | 213 | pi ~ Dir(concentration/K) 214 | ''' 215 | def __init__(self,a_0,b_0,K,alpha_0=None,weights=None): 216 | self.alpha_0_obj = GammaCompoundDirichlet(a_0=a_0,b_0=b_0,K=K,concentration=alpha_0) 217 | super(CategoricalAndConcentration,self).__init__(alpha_0=self.alpha_0, 218 | K=K,weights=weights) 219 | 220 | def _get_alpha_0(self): 221 | return self.alpha_0_obj.concentration 222 | 223 | def _set_alpha_0(self,alpha_0): 224 | self.alpha_0_obj.concentration = alpha_0 225 | self.alphav_0 = np.repeat(alpha_0/self.K,self.K) 226 | 227 | alpha_0 = property(_get_alpha_0, _set_alpha_0) 228 | 229 | @property 230 | def params(self): 231 | return dict(alpha_0=self.alpha_0,weights=self.weights) 232 | 233 | @property 234 | def hypparams(self): 235 | return dict(a_0=self.a_0,b_0=self.b_0,K=self.K) 236 | 237 | def resample(self,data=[]): 238 | counts = self._get_statistics(data,self.K) 239 | self.alpha_0_obj.resample(counts) 240 | self.alpha_0 = self.alpha_0 # for the effect on alphav_0 241 | return super(CategoricalAndConcentration,self).resample(data) 242 | 243 | def resample_just_weights(self,data=[]): 244 | return super(CategoricalAndConcentration,self).resample(data) 245 | 246 | def meanfieldupdate(self,*args,**kwargs): # TODO 247 | warn('MeanField not implemented for %s; concentration parameter will stay fixed') 248 | return super(CategoricalAndConcentration,self).meanfieldupdate(*args,**kwargs) 249 | 250 | def max_likelihood(self,*args,**kwargs): 251 | raise NotImplementedError 252 | 253 | 254 | class Multinomial(Categorical): 255 | ''' 256 | Like Categorical but the data are counts, so _get_statistics is overridden 257 | (though _get_weighted_statistics can stay the same!). log_likelihood also 258 | changes since, just like for the binomial special case, we sum over all 259 | possible orderings. 260 | 261 | For example, if K == 3, then a sample with n=5 might be 262 | array([2,2,1]) 263 | 264 | A Poisson process conditioned on the number of points emitted. 265 | ''' 266 | def __init__(self,weights=None,alpha_0=None,K=None,alphav_0=None,alpha_mf=None, 267 | N=1): 268 | self.N = N 269 | super(Multinomial, self).__init__(weights,alpha_0,K,alphav_0,alpha_mf) 270 | 271 | def log_likelihood(self,x): 272 | assert isinstance(x,np.ndarray) and x.ndim == 2 and x.shape[1] == self.K 273 | return np.where(x,x*np.log(self.weights),0.).sum(1) \ 274 | + special.gammaln(x.sum(1)+1) - special.gammaln(x+1).sum(1) 275 | 276 | def rvs(self,size=None,N=None): 277 | N = N if N else self.N 278 | return np.random.multinomial(N, self.weights, size=size) 279 | 280 | def _get_statistics(self,data,K=None): 281 | K = K if K else self.K 282 | if isinstance(data,np.ndarray): 283 | return np.atleast_2d(data).sum(0) 284 | else: 285 | if len(data) == 0: 286 | return np.zeros(K,dtype=int) 287 | return np.concatenate(data).sum(0) 288 | 289 | def expected_log_likelihood(self,x=None): 290 | if x is not None and (not x.ndim == 2 or not np.all(x == np.eye(x.shape[0]))): 291 | raise NotImplementedError # TODO nontrivial expected log likelihood 292 | return super(Multinomial,self).expected_log_likelihood() 293 | 294 | 295 | class MultinomialAndConcentration(CategoricalAndConcentration,Multinomial): 296 | pass 297 | 298 | 299 | class CRP(GibbsSampling): 300 | ''' 301 | concentration ~ Gamma(a_0,b_0) [b_0 is inverse scale, inverse of numpy scale arg] 302 | rvs ~ CRP(concentration) 303 | 304 | This class models CRPs. The parameter is the concentration parameter (proportional 305 | to probability of starting a new table given some number of customers in the 306 | restaurant), which has a Gamma prior. 307 | ''' 308 | 309 | def __init__(self,a_0,b_0,concentration=None): 310 | self.a_0 = a_0 311 | self.b_0 = b_0 312 | 313 | if concentration is not None: 314 | self.concentration = concentration 315 | else: 316 | self.resample(niter=1) 317 | 318 | @property 319 | def params(self): 320 | return dict(concentration=self.concentration) 321 | 322 | @property 323 | def hypparams(self): 324 | return dict(a_0=self.a_0,b_0=self.b_0) 325 | 326 | def rvs(self,customer_counts): 327 | # could replace this with one of the faster C versions I have lying 328 | # around, but at least the Python version is clearer 329 | assert isinstance(customer_counts,list) or isinstance(customer_counts,int) 330 | if isinstance(customer_counts,int): 331 | customer_counts = [customer_counts] 332 | 333 | restaurants = [] 334 | for num in customer_counts: 335 | # a CRP with num customers 336 | tables = [] 337 | for c in range(num): 338 | newidx = sample_discrete(np.array(tables + [self.concentration])) 339 | if newidx == len(tables): 340 | tables += [1] 341 | else: 342 | tables[newidx] += 1 343 | 344 | restaurants.append(tables) 345 | 346 | return restaurants if len(restaurants) > 1 else restaurants[0] 347 | 348 | def log_likelihood(self,restaurants): 349 | assert isinstance(restaurants,list) and len(restaurants) > 0 350 | if not isinstance(restaurants[0],list): restaurants=[restaurants] 351 | 352 | likes = [] 353 | for counts in restaurants: 354 | counts = np.array([c for c in counts if c > 0]) # remove zero counts b/c of gammaln 355 | K = len(counts) # number of tables 356 | N = sum(counts) # number of customers 357 | likes.append(K*np.log(self.concentration) + np.sum(special.gammaln(counts)) + 358 | special.gammaln(self.concentration) - 359 | special.gammaln(N+self.concentration)) 360 | 361 | return np.asarray(likes) if len(likes) > 1 else likes[0] 362 | 363 | def resample(self,data=[],niter=50): 364 | for itr in range(niter): 365 | a_n, b_n = self._posterior_hypparams(*self._get_statistics(data)) 366 | self.concentration = np.random.gamma(a_n,scale=1./b_n) 367 | 368 | def _posterior_hypparams(self,sample_numbers,total_num_distinct): 369 | # NOTE: this is a stochastic function: it samples auxiliary variables 370 | if total_num_distinct > 0: 371 | sample_numbers = np.array(sample_numbers) 372 | sample_numbers = sample_numbers[sample_numbers > 0] 373 | 374 | wvec = np.random.beta(self.concentration+1,sample_numbers) 375 | svec = np.array(stats.bernoulli.rvs(sample_numbers/(sample_numbers+self.concentration))) 376 | return self.a_0 + total_num_distinct-svec.sum(), (self.b_0 - np.log(wvec).sum()) 377 | else: 378 | return self.a_0, self.b_0 379 | return self 380 | 381 | def _get_statistics(self,data): 382 | assert isinstance(data,list) 383 | if len(data) == 0: 384 | sample_numbers = 0 385 | total_num_distinct = 0 386 | else: 387 | if isinstance(data[0],list): 388 | sample_numbers = np.array(list(map(sum,data))) 389 | total_num_distinct = sum(map(len,data)) 390 | else: 391 | sample_numbers = np.array(sum(data)) 392 | total_num_distinct = len(data) 393 | 394 | return sample_numbers, total_num_distinct 395 | 396 | 397 | class GammaCompoundDirichlet(CRP): 398 | # TODO this class is a bit ugly 399 | ''' 400 | Implements a Gamma(a_0,b_0) prior over finite dirichlet concentration 401 | parameter. The concentration is scaled according to the weak-limit sequence. 402 | 403 | For each set of counts i, the model is 404 | concentration ~ Gamma(a_0,b_0) 405 | pi_i ~ Dir(concentration/K) 406 | data_i ~ Multinomial(pi_i) 407 | 408 | K is a free parameter in that with big enough K (relative to the size of the 409 | sampled data) everything starts to act like a DP; K is just the size of the 410 | size of the mesh projection. 411 | ''' 412 | def __init__(self,K,a_0,b_0,concentration=None): 413 | self.K = K 414 | super(GammaCompoundDirichlet,self).__init__(a_0=a_0,b_0=b_0, 415 | concentration=concentration) 416 | 417 | @property 418 | def params(self): 419 | return dict(concentration=self.concentration) 420 | 421 | @property 422 | def hypparams(self): 423 | return dict(a_0=self.a_0,b_0=self.b_0,K=self.K) 424 | 425 | def rvs(self, sample_counts=None, size=None): 426 | if sample_counts is None: 427 | sample_counts = size 428 | if isinstance(sample_counts,int): 429 | sample_counts = [sample_counts] 430 | out = np.empty((len(sample_counts),self.K),dtype=int) 431 | for idx,c in enumerate(sample_counts): 432 | out[idx] = np.random.multinomial(c, 433 | np.random.dirichlet(np.repeat(self.concentration/self.K,self.K))) 434 | return out if out.shape[0] > 1 else out[0] 435 | 436 | def resample(self,data=[],niter=50,weighted_cols=None): 437 | if weighted_cols is not None: 438 | self.weighted_cols = weighted_cols 439 | else: 440 | self.weighted_cols = np.ones(self.K) 441 | 442 | # all this is to check if data is empty 443 | if isinstance(data,np.ndarray): 444 | size = data.sum() 445 | elif isinstance(data,list): 446 | size = sum(d.sum() for d in data) 447 | else: 448 | assert data == 0 449 | size = 0 450 | 451 | if size > 0: 452 | return super(GammaCompoundDirichlet,self).resample(data,niter=niter) 453 | else: 454 | return super(GammaCompoundDirichlet,self).resample(data,niter=1) 455 | 456 | def _get_statistics(self,data): 457 | # NOTE: this is a stochastic function: it samples auxiliary variables 458 | counts = np.array(data,ndmin=2,order='C') 459 | 460 | # sample m's, which sample an inverse of the weak limit projection 461 | if counts.sum() == 0: 462 | return 0, 0 463 | else: 464 | m = sample_crp_tablecounts(self.concentration,counts,self.weighted_cols) 465 | return counts.sum(1), m.sum() 466 | 467 | def _get_statistics_python(self,data): 468 | counts = np.array(data,ndmin=2) 469 | 470 | # sample m's 471 | if counts.sum() == 0: 472 | return 0, 0 473 | else: 474 | m = 0 475 | for (i,j), n in np.ndenumerate(counts): 476 | m += (np.random.rand(n) < self.concentration*self.K*self.weighted_cols[j] \ 477 | / (np.arange(n)+self.concentration*self.K*self.weighted_cols[j])).sum() 478 | return counts.sum(1), m 479 | 480 | -------------------------------------------------------------------------------- /pybasicbayes/distributions/negativebinomial.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from builtins import zip 3 | from builtins import range 4 | from builtins import object 5 | __all__ = [ 6 | 'NegativeBinomial', 'NegativeBinomialFixedR', 'NegativeBinomialIntegerR2', 7 | 'NegativeBinomialIntegerR', 'NegativeBinomialFixedRVariant', 8 | 'NegativeBinomialIntegerRVariant', 'NegativeBinomialIntegerRVariant', 9 | 'NegativeBinomialIntegerR2Variant'] 10 | 11 | import numpy as np 12 | from numpy import newaxis as na 13 | import scipy.special as special 14 | from scipy.special import logsumexp 15 | from warnings import warn 16 | 17 | from pybasicbayes.abstractions import Distribution, GibbsSampling, \ 18 | MeanField, MeanFieldSVI, MaxLikelihood 19 | from pybasicbayes.util.stats import getdatasize, flattendata, \ 20 | sample_discrete_from_log, sample_discrete, atleast_2d 21 | 22 | try: 23 | from pybasicbayes.util.cstats import sample_crp_tablecounts 24 | except ImportError: 25 | warn('using slow sample_crp_tablecounts') 26 | from pybasicbayes.util.stats import sample_crp_tablecounts 27 | 28 | 29 | class _NegativeBinomialBase(Distribution): 30 | ''' 31 | Negative Binomial distribution with a conjugate beta prior on p and a 32 | separate gamma prior on r. The parameter r does not need to be an integer. 33 | If r is an integer, then x ~ NegBin(r,p) is the same as 34 | x = np.random.geometric(1-p,size=r).sum() - r 35 | where r is subtracted to make the geometric support be {0,1,2,...} 36 | Mean is r*p/(1-p), var is r*p/(1-p)**2 37 | 38 | Uses the data augemntation sampling method from Zhou et al. ICML 2012 39 | 40 | NOTE: the support is {0,1,2,...}. 41 | 42 | Hyperparameters: 43 | k_0, theta_0: r ~ Gamma(k, theta) 44 | or r = np.random.gamma(k,theta) 45 | alpha_0, beta_0: p ~ Beta(alpha,beta) 46 | or p = np.random.beta(alpha,beta) 47 | 48 | Parameters: 49 | r 50 | p 51 | ''' 52 | def __init__(self,r=None,p=None,k_0=None,theta_0=None,alpha_0=None,beta_0=None): 53 | self.r = r 54 | self.p = p 55 | 56 | self.k_0 = k_0 57 | self.theta_0 = theta_0 58 | self.alpha_0 = alpha_0 59 | self.beta_0 = beta_0 60 | 61 | if r is p is None and not any(_ is None for _ in (k_0,theta_0,alpha_0,beta_0)): 62 | self.resample() # intialize from prior 63 | 64 | @property 65 | def params(self): 66 | return dict(r=self.r,p=self.p) 67 | 68 | @property 69 | def hypparams(self): 70 | return dict(k_0=self.k_0,theta_0=self.theta_0, 71 | alpha_0=self.alpha_0,beta_0=self.beta_0) 72 | 73 | def log_likelihood(self,x,r=None,p=None): 74 | r = r if r is not None else self.r 75 | p = p if p is not None else self.p 76 | x = np.array(x,ndmin=1) 77 | 78 | if self.p > 0: 79 | xnn = x[x >= 0] 80 | raw = np.empty(x.shape) 81 | raw[x>=0] = special.gammaln(r + xnn) - special.gammaln(r) \ 82 | - special.gammaln(xnn+1) + r*np.log(1-p) + xnn*np.log(p) 83 | raw[x<0] = -np.inf 84 | return raw if isinstance(x,np.ndarray) else raw[0] 85 | else: 86 | raw = np.log(np.zeros(x.shape)) 87 | raw[x == 0] = 0. 88 | return raw if isinstance(x,np.ndarray) else raw[0] 89 | 90 | def log_sf(self,x): 91 | scalar = not isinstance(x,np.ndarray) 92 | x = np.atleast_1d(x) 93 | errs = np.seterr(divide='ignore') 94 | ret = np.log(special.betainc(x+1,self.r,self.p)) 95 | np.seterr(**errs) 96 | ret[x < 0] = np.log(1.) 97 | if scalar: 98 | return ret[0] 99 | else: 100 | return ret 101 | 102 | def rvs(self,size=None): 103 | return np.random.poisson(np.random.gamma(self.r,self.p/(1-self.p),size=size)) 104 | 105 | class NegativeBinomial(_NegativeBinomialBase, GibbsSampling): 106 | def resample(self,data=[],niter=20): 107 | if getdatasize(data) == 0: 108 | self.p = np.random.beta(self.alpha_0,self.beta_0) 109 | self.r = np.random.gamma(self.k_0,self.theta_0) 110 | else: 111 | data = atleast_2d(flattendata(data)) 112 | N = len(data) 113 | for itr in range(niter): 114 | ### resample r 115 | msum = sample_crp_tablecounts(self.r,data).sum() 116 | self.r = np.random.gamma(self.k_0 + msum, 1/(1/self.theta_0 - N*np.log(1-self.p))) 117 | ### resample p 118 | self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r) 119 | return self 120 | 121 | def resample_python(self,data=[],niter=20): 122 | if getdatasize(data) == 0: 123 | self.p = np.random.beta(self.alpha_0,self.beta_0) 124 | self.r = np.random.gamma(self.k_0,self.theta_0) 125 | else: 126 | data = flattendata(data) 127 | N = len(data) 128 | for itr in range(niter): 129 | ### resample r 130 | msum = 0. 131 | for n in data: 132 | msum += (np.random.rand(n) < self.r/(np.arange(n)+self.r)).sum() 133 | self.r = np.random.gamma(self.k_0 + msum, 1/(1/self.theta_0 - N*np.log(1-self.p))) 134 | ### resample p 135 | self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r) 136 | return self 137 | 138 | ### OLD unused alternatives 139 | 140 | def resample_logseriesaug(self,data=[],niter=20): 141 | # an alternative algorithm, kind of opaque and no advantages... 142 | if getdatasize(data) == 0: 143 | self.p = np.random.beta(self.alpha_0,self.beta_0) 144 | self.r = np.random.gamma(self.k_0,self.theta_0) 145 | else: 146 | data = flattendata(data) 147 | N = data.shape[0] 148 | logF = self.logF 149 | L_i = np.zeros(N) 150 | data_nz = data[data > 0] 151 | for itr in range(niter): 152 | logR = np.arange(1,logF.shape[1]+1)*np.log(self.r) + logF 153 | L_i[data > 0] = sample_discrete_from_log(logR[data_nz-1,:data_nz.max()],axis=1)+1 154 | self.r = np.random.gamma(self.k_0 + L_i.sum(), 1/(1/self.theta_0 - np.log(1-self.p)*N)) 155 | self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r) 156 | return self 157 | 158 | @classmethod 159 | def _set_up_logF(cls): 160 | if not hasattr(cls,'logF'): 161 | # actually indexes logF[0,0] to correspond to log(F(1,1)) in Zhou 162 | # paper, but keeps track of that alignment with the other code! 163 | # especially arange(1,...), only using nonzero data and shifting it 164 | SIZE = 500 165 | 166 | logF = -np.inf * np.ones((SIZE,SIZE)) 167 | logF[0,0] = 0. 168 | for m in range(1,logF.shape[0]): 169 | prevrow = np.exp(logF[m-1] - logF[m-1].max()) 170 | logF[m] = np.log(np.convolve(prevrow,[0,m,1],'same')) + logF[m-1].max() 171 | cls.logF = logF 172 | 173 | 174 | class NegativeBinomialFixedR(_NegativeBinomialBase, GibbsSampling, MeanField, MeanFieldSVI, MaxLikelihood): 175 | def __init__(self,r=None,p=None,alpha_0=None,beta_0=None,alpha_mf=None,beta_mf=None): 176 | self.p = p 177 | 178 | self.r = r 179 | 180 | self.alpha_0 = alpha_0 181 | self.beta_0 = beta_0 182 | 183 | if p is None and not any(_ is None for _ in (alpha_0,beta_0)): 184 | self.resample() # intialize from prior 185 | 186 | if not any(_ is None for _ in (alpha_mf,beta_mf)): 187 | self.alpha_mf = alpha_mf 188 | self.beta_mf = beta_mf 189 | 190 | @property 191 | def hypparams(self): 192 | return dict(alpha_0=self.alpha_0,beta_0=self.beta_0) 193 | 194 | @property 195 | def natural_hypparam(self): 196 | return np.array([self.alpha_0,self.beta_0]) - 1 197 | 198 | @natural_hypparam.setter 199 | def natural_hypparam(self,natparam): 200 | self.alpha_0, self.beta_0 = natparam + 1 201 | 202 | ### Mean Field 203 | 204 | def _resample_from_mf(self): 205 | self.p = np.random.beta(self.alpha_mf,self.beta_mf) 206 | return self 207 | 208 | def meanfieldupdate(self,data,weights): 209 | self.alpha_mf, self.beta_mf = \ 210 | self._posterior_hypparams(*self._get_weighted_statistics(data,weights)) 211 | self.p = self.alpha_mf / (self.alpha_mf + self.beta_mf) 212 | 213 | def meanfield_sgdstep(self,data,weights,prob,stepsize): 214 | alpha_new, beta_new = \ 215 | self._posterior_hypparams(*( 216 | 1./prob * self._get_weighted_statistics(data,weights))) 217 | self.alpha_mf = (1-stepsize)*self.alpha_mf + stepsize*alpha_new 218 | self.beta_mf = (1-stepsize)*self.beta_mf + stepsize*beta_new 219 | self.p = self.alpha_mf / (self.alpha_mf + self.beta_mf) 220 | 221 | def get_vlb(self): 222 | Elnp, Eln1mp = self._mf_expected_statistics() 223 | p_avgengy = (self.alpha_0-1)*Elnp + (self.beta_0-1)*Eln1mp \ 224 | - (special.gammaln(self.alpha_0) + special.gammaln(self.beta_0) 225 | - special.gammaln(self.alpha_0 + self.beta_0)) 226 | q_entropy = special.betaln(self.alpha_mf,self.beta_mf) \ 227 | - (self.alpha_mf-1)*special.digamma(self.alpha_mf) \ 228 | - (self.beta_mf-1)*special.digamma(self.beta_mf) \ 229 | + (self.alpha_mf+self.beta_mf-2)*special.digamma(self.alpha_mf+self.beta_mf) 230 | return p_avgengy + q_entropy 231 | 232 | def _mf_expected_statistics(self): 233 | Elnp, Eln1mp = special.digamma([self.alpha_mf,self.beta_mf]) \ 234 | - special.digamma(self.alpha_mf + self.beta_mf) 235 | return Elnp, Eln1mp 236 | 237 | def expected_log_likelihood(self,x): 238 | Elnp, Eln1mp = self._mf_expected_statistics() 239 | x = np.atleast_1d(x) 240 | errs = np.seterr(invalid='ignore') 241 | out = x*Elnp + self.r*Eln1mp + self._log_base_measure(x,self.r) 242 | np.seterr(**errs) 243 | out[np.isnan(out)] = -np.inf 244 | return out if out.shape[0] > 1 else out[0] 245 | 246 | @staticmethod 247 | def _log_base_measure(x,r): 248 | return special.gammaln(x+r) - special.gammaln(x+1) - special.gammaln(r) 249 | 250 | ### Gibbs 251 | 252 | def resample(self,data=[]): 253 | self.p = np.random.beta(*self._posterior_hypparams(*self._get_statistics(data))) 254 | # set mean field params to something reasonable for initialization 255 | fakedata = self.rvs(10) 256 | self.alpha_mf, self.beta_mf = self._posterior_hypparams(*self._get_statistics(fakedata)) 257 | 258 | ### Max likelihood 259 | 260 | def max_likelihood(self,data,weights=None): 261 | if weights is None: 262 | n, tot = self._get_statistics(data) 263 | else: 264 | n, tot = self._get_weighted_statistics(data,weights) 265 | 266 | self.p = (tot/n) / (self.r + tot/n) 267 | return self 268 | 269 | ### Statistics and posterior hypparams 270 | 271 | def _get_statistics(self,data): 272 | if getdatasize(data) == 0: 273 | n, tot = 0, 0 274 | elif isinstance(data,np.ndarray): 275 | assert np.all(data >= 0) 276 | data = np.atleast_1d(data) 277 | n, tot = data.shape[0], data.sum() 278 | elif isinstance(data,list): 279 | assert all(np.all(d >= 0) for d in data) 280 | n = sum(d.shape[0] for d in data) 281 | tot = sum(d.sum() for d in data) 282 | else: 283 | assert np.isscalar(data) 284 | n = 1 285 | tot = data 286 | 287 | return np.array([n, tot]) 288 | 289 | def _get_weighted_statistics(self,data,weights): 290 | if isinstance(weights,np.ndarray): 291 | assert np.all(data >= 0) and data.ndim == 1 292 | n, tot = weights.sum(), weights.dot(data) 293 | else: 294 | assert all(np.all(d >= 0) for d in data) 295 | n = sum(w.sum() for w in weights) 296 | tot = sum(w.dot(d) for d,w in zip(data,weights)) 297 | 298 | return np.array([n, tot]) 299 | 300 | def _posterior_hypparams(self,n,tot): 301 | return np.array([self.alpha_0 + tot, self.beta_0 + n*self.r]) 302 | 303 | class NegativeBinomialIntegerR2(_NegativeBinomialBase,MeanField,MeanFieldSVI,GibbsSampling): 304 | # NOTE: this class should replace NegativeBinomialFixedR completely... 305 | _fixedr_class = NegativeBinomialFixedR 306 | 307 | def __init__(self,alpha_0=None,beta_0=None,alphas_0=None,betas_0=None, 308 | r_support=None,r_probs=None,r_discrete_distn=None, 309 | r=None,ps=None): 310 | 311 | assert (r_discrete_distn is not None) ^ (r_support is not None and r_probs is not None) 312 | if r_discrete_distn is not None: 313 | r_support, = np.where(r_discrete_distn) 314 | r_probs = r_discrete_distn[r_support] 315 | r_support += 1 316 | self.r_support = np.asarray(r_support) 317 | self.rho_0 = self.rho_mf = np.log(r_probs) 318 | 319 | assert (alpha_0 is not None and beta_0 is not None) \ 320 | ^ (alphas_0 is not None and betas_0 is not None) 321 | alphas_0 = alphas_0 if alphas_0 is not None else [alpha_0]*len(r_support) 322 | betas_0 = betas_0 if betas_0 is not None else [beta_0]*len(r_support) 323 | ps = ps if ps is not None else [None]*len(r_support) 324 | self._fixedr_distns = \ 325 | [self._fixedr_class(r=r,p=p,alpha_0=alpha_0,beta_0=beta_0) 326 | for r,p,alpha_0,beta_0 in zip(r_support,ps,alphas_0,betas_0)] 327 | 328 | # for init 329 | self.ridx = sample_discrete(r_probs) 330 | self.r = r_support[self.ridx] 331 | 332 | def __repr__(self): 333 | return 'NB(r=%d,p=%0.3f)' % (self.r,self.p) 334 | 335 | @property 336 | def alphas_0(self): 337 | return np.array([d.alpha_0 for d in self._fixedr_distns]) \ 338 | if len(self._fixedr_distns) > 0 else None 339 | 340 | @property 341 | def betas_0(self): 342 | return np.array([d.beta_0 for d in self._fixedr_distns]) \ 343 | if len(self._fixedr_distns) > 0 else None 344 | 345 | @property 346 | def p(self): 347 | return self._fixedr_distns[self.ridx].p 348 | 349 | @p.setter 350 | def p(self,val): 351 | self._fixedr_distns[self.ridx].p = val 352 | 353 | def _resample_from_mf(self): 354 | self._resample_r_from_mf() 355 | self._resample_p_from_mf() 356 | 357 | def _resample_r_from_mf(self): 358 | lognorm = logsumexp(self.rho_mf) 359 | self.ridx = sample_discrete(np.exp(self.rho_mf - lognorm)) 360 | self.r = self.r_support[self.ridx] 361 | 362 | def _resample_p_from_mf(self): 363 | d = self._fixedr_distns[self.ridx] 364 | self.p = np.random.beta(d.alpha_mf,d.beta_mf) 365 | 366 | def get_vlb(self): 367 | return self._r_vlb() + sum(np.exp(rho)*d.get_vlb() 368 | for rho,d in zip(self.rho_mf,self._fixedr_distns)) 369 | 370 | def _r_vlb(self): 371 | return np.exp(self.rho_mf).dot(self.rho_0) \ 372 | - np.exp(self.rho_mf).dot(self.rho_mf) 373 | 374 | def meanfieldupdate(self,data,weights): 375 | for d in self._fixedr_distns: 376 | d.meanfieldupdate(data,weights) 377 | self._update_rho_mf(data,weights) 378 | # everything below here is for plotting 379 | ridx = self.rho_mf.argmax() 380 | d = self._fixedr_distns[ridx] 381 | self.r = d.r 382 | self.p = d.alpha_mf / (d.alpha_mf + d.beta_mf) 383 | 384 | def _update_rho_mf(self,data,weights): 385 | self.rho_mf = self.rho_0.copy() 386 | for idx, d in enumerate(self._fixedr_distns): 387 | n, tot = d._get_weighted_statistics(data,weights) 388 | Elnp, Eln1mp = d._mf_expected_statistics() 389 | self.rho_mf[idx] += (d.alpha_0-1+tot)*Elnp + (d.beta_0-1+n*d.r)*Eln1mp 390 | if isinstance(data,np.ndarray): 391 | self.rho_mf[idx] += weights.dot(d._log_base_measure(data,d.r)) 392 | else: 393 | self.rho_mf[idx] += sum(w.dot(d._log_base_measure(dt,d.r)) 394 | for dt,w in zip(data,weights)) 395 | 396 | def expected_log_likelihood(self,x): 397 | lognorm = logsumexp(self.rho_mf) 398 | return sum(np.exp(rho-lognorm)*d.expected_log_likelihood(x) 399 | for rho,d in zip(self.rho_mf,self._fixedr_distns)) 400 | 401 | def meanfield_sgdstep(self,data,weights,prob,stepsize): 402 | rho_mf_orig = self.rho_mf.copy() 403 | if isinstance(data,np.ndarray): 404 | self._update_rho_mf(data,prob*weights) 405 | else: 406 | self._update_rho_mf(data,[w*prob for w in weights]) 407 | rho_mf_new = self.rho_mf 408 | 409 | for d in self._fixedr_distns: 410 | d.meanfield_sgdstep(data,weights,prob,stepsize) 411 | 412 | self.rho_mf = (1-stepsize)*rho_mf_orig + stepsize*rho_mf_new 413 | 414 | # for plotting 415 | ridx = self.rho_mf.argmax() 416 | d = self._fixedr_distns[ridx] 417 | self.r = d.r 418 | self.p = d.alpha_mf / (d.alpha_mf + d.beta_mf) 419 | 420 | def resample(self,data=[]): 421 | self._resample_r(data) # marginalizes out p values 422 | self._resample_p(data) # resample p given sampled r 423 | return self 424 | 425 | def _resample_r(self,data): 426 | self.ridx = sample_discrete( 427 | self._posterior_hypparams(self._get_statistics(data))) 428 | self.r = self.r_support[self.ridx] 429 | return self 430 | 431 | def _resample_p(self,data): 432 | self._fixedr_distns[self.ridx].resample(data) 433 | return self 434 | 435 | def _get_statistics(self,data=[]): 436 | n, tot = self._fixedr_distns[0]._get_statistics(data) 437 | if n > 0: 438 | data = flattendata(data) 439 | alphas_n, betas_n = self.alphas_0 + tot, self.betas_0 + self.r_support*n 440 | log_marg_likelihoods = \ 441 | special.betaln(alphas_n, betas_n) \ 442 | - special.betaln(self.alphas_0, self.betas_0) \ 443 | + (special.gammaln(data[:,na]+self.r_support) 444 | - special.gammaln(data[:,na]+1) \ 445 | - special.gammaln(self.r_support)).sum(0) 446 | else: 447 | log_marg_likelihoods = np.zeros_like(self.r_support) 448 | return log_marg_likelihoods 449 | 450 | def _posterior_hypparams(self,log_marg_likelihoods): 451 | log_posterior_discrete = self.rho_0 + log_marg_likelihoods 452 | return np.exp(log_posterior_discrete - log_posterior_discrete.max()) 453 | 454 | class NegativeBinomialIntegerR(NegativeBinomialFixedR, GibbsSampling, MaxLikelihood): 455 | ''' 456 | Nonconjugate Discrete+Beta prior 457 | r_discrete_distribution is an array where index i is p(r=i+1) 458 | ''' 459 | def __init__(self,r_discrete_distn=None,r_support=None, 460 | alpha_0=None,beta_0=None,r=None,p=None): 461 | self.r_support = r_support 462 | self.r_discrete_distn = r_discrete_distn 463 | self.alpha_0 = alpha_0 464 | self.beta_0 = beta_0 465 | self.r = r 466 | self.p = p 467 | 468 | if r is p is None \ 469 | and not any(_ is None for _ in (r_discrete_distn,alpha_0,beta_0)): 470 | self.resample() # intialize from prior 471 | 472 | @property 473 | def hypparams(self): 474 | return dict(r_discrete_distn=self.r_discrete_distn, 475 | alpha_0=self.alpha_0,beta_0=self.beta_0) 476 | 477 | def get_r_discrete_distn(self): 478 | return self._r_discrete_distn 479 | 480 | def set_r_discrete_distn(self,r_discrete_distn): 481 | if r_discrete_distn is not None: 482 | r_discrete_distn = np.asarray(r_discrete_distn,dtype=np.float) 483 | r_support, = np.where(r_discrete_distn) 484 | r_probs = r_discrete_distn[r_support] 485 | r_probs /= r_probs.sum() 486 | r_support += 1 # r_probs[0] corresponds to r=1 487 | 488 | self.r_support = r_support 489 | self.r_probs = r_probs 490 | self._r_discrete_distn = r_discrete_distn 491 | 492 | r_discrete_distn = property(get_r_discrete_distn,set_r_discrete_distn) 493 | 494 | def rvs(self,size=None): 495 | out = np.random.geometric(1-self.p,size=size)-1 496 | for i in range(self.r-1): 497 | out += np.random.geometric(1-self.p,size=size)-1 498 | return out 499 | 500 | def resample(self,data=[]): 501 | alpha_n, betas_n, posterior_discrete = self._posterior_hypparams( 502 | *self._get_statistics(data)) 503 | 504 | r_idx = sample_discrete(posterior_discrete) 505 | self.r = self.r_support[r_idx] 506 | self.p = np.random.beta(alpha_n, betas_n[r_idx]) 507 | 508 | # NOTE: this class has a conjugate prior even though it's not in the 509 | # exponential family, so I wrote _get_statistics and _get_weighted_statistics 510 | # (which integrate out p) for the resample() and meanfield_update() methods, 511 | # though these aren't statistics in the exponential family sense 512 | 513 | def _get_statistics(self,data): 514 | # NOTE: since this isn't really in exponential family, this method needs 515 | # to look at hyperparameters. form posterior hyperparameters for the p 516 | # parameters here so we can integrate them out and get the r statistics 517 | n, tot = super(NegativeBinomialIntegerR,self)._get_statistics(data) 518 | if n > 0: 519 | alpha_n, betas_n = self.alpha_0 + tot, self.beta_0 + self.r_support*n 520 | data = flattendata(data) 521 | log_marg_likelihoods = \ 522 | special.betaln(alpha_n, betas_n) \ 523 | - special.betaln(self.alpha_0, self.beta_0) \ 524 | + (special.gammaln(data[:,na]+self.r_support) 525 | - special.gammaln(data[:,na]+1) \ 526 | - special.gammaln(self.r_support)).sum(0) 527 | else: 528 | log_marg_likelihoods = np.zeros_like(self.r_support) 529 | 530 | return n, tot, log_marg_likelihoods 531 | 532 | def _get_weighted_statistics(self,data,weights): 533 | n, tot = super(NegativeBinomialIntegerR,self)._get_weighted_statistics(data,weights) 534 | if n > 0: 535 | alpha_n, betas_n = self.alpha_0 + tot, self.beta_0 + self.r_support*n 536 | data, weights = flattendata(data), flattendata(weights) 537 | log_marg_likelihoods = \ 538 | special.betaln(alpha_n, betas_n) \ 539 | - special.betaln(self.alpha_0, self.beta_0) \ 540 | + (special.gammaln(data[:,na]+self.r_support) 541 | - special.gammaln(data[:,na]+1) \ 542 | - special.gammaln(self.r_support)).dot(weights) 543 | else: 544 | log_marg_likelihoods = np.zeros_like(self.r_support) 545 | 546 | return n, tot, log_marg_likelihoods 547 | 548 | def _posterior_hypparams(self,n,tot,log_marg_likelihoods): 549 | alpha_n = self.alpha_0 + tot 550 | betas_n = self.beta_0 + n*self.r_support 551 | log_posterior_discrete = np.log(self.r_probs) + log_marg_likelihoods 552 | posterior_discrete = np.exp(log_posterior_discrete - log_posterior_discrete.max()) 553 | return alpha_n, betas_n, posterior_discrete 554 | 555 | def max_likelihood(self,data,weights=None,stats=None): 556 | if stats is not None: 557 | n, tot = stats 558 | elif weights is None: 559 | n, tot = super(NegativeBinomialIntegerR,self)._get_statistics(data) 560 | else: 561 | n, tot = super(NegativeBinomialIntegerR,self)._get_weighted_statistics(data,weights) 562 | 563 | if n > 1: 564 | rs = self.r_support 565 | ps = self._max_likelihood_ps(n,tot,rs) 566 | 567 | # TODO TODO this isn't right for weighted data: do weighted sums 568 | if isinstance(data,np.ndarray): 569 | likelihoods = np.array([self.log_likelihood(data,r=r,p=p).sum() 570 | for r,p in zip(rs,ps)]) 571 | else: 572 | likelihoods = np.array([sum(self.log_likelihood(d,r=r,p=p).sum() 573 | for d in data) for r,p in zip(rs,ps)]) 574 | 575 | argmax = likelihoods.argmax() 576 | self.r = self.r_support[argmax] 577 | self.p = ps[argmax] 578 | return self 579 | 580 | def _log_base_measure(self,data): 581 | return [(special.gammaln(r+data) - special.gammaln(r) - special.gammaln(data+1)).sum() 582 | for r in self.r_support] 583 | 584 | def _max_likelihood_ps(self,n,tot,rs): 585 | ps = (tot/n) / (rs + tot/n) 586 | assert (ps >= 0).all() 587 | return ps 588 | 589 | class _StartAtRMixin(object): 590 | def log_likelihood(self,x,**kwargs): 591 | r = kwargs['r'] if 'r' in kwargs else self.r 592 | return super(_StartAtRMixin,self).log_likelihood(x-r,**kwargs) 593 | 594 | def log_sf(self,x,**kwargs): 595 | return super(_StartAtRMixin,self).log_sf(x-self.r,**kwargs) 596 | 597 | def expected_log_likelihood(self,x,**kwargs): 598 | r = kwargs['r'] if 'r' in kwargs else self.r 599 | return super(_StartAtRMixin,self).expected_log_likelihood(x-r,**kwargs) 600 | 601 | def rvs(self,size=[]): 602 | return super(_StartAtRMixin,self).rvs(size)+self.r 603 | 604 | class NegativeBinomialFixedRVariant(_StartAtRMixin,NegativeBinomialFixedR): 605 | def _get_statistics(self,data): 606 | n, tot = super(NegativeBinomialFixedRVariant,self)._get_statistics(data) 607 | n, tot = n, tot-n*self.r 608 | assert tot >= 0 609 | return np.array([n, tot]) 610 | 611 | def _get_weighted_statistics(self,data,weights): 612 | n, tot = super(NegativeBinomialFixedRVariant,self)._get_weighted_statistics(data,weights) 613 | n, tot = n, tot-n*self.r 614 | assert tot >= 0 615 | return np.array([n, tot]) 616 | 617 | class NegativeBinomialIntegerRVariant(NegativeBinomialIntegerR): 618 | def resample(self,data=[]): 619 | n, alpha_n, posterior_discrete, r_support = self._posterior_hypparams( 620 | *self._get_statistics(data)) # NOTE: pass out r_support b/c feasible subset 621 | self.r = r_support[sample_discrete(posterior_discrete)] 622 | self.p = np.random.beta(alpha_n - n*self.r, self.beta_0 + n*self.r) 623 | 624 | def _get_statistics(self,data): 625 | n = getdatasize(data) 626 | if n > 0: 627 | data = flattendata(data) 628 | feasible = self.r_support <= data.min() 629 | assert np.any(feasible) 630 | r_support = self.r_support[feasible] 631 | normalizers = (special.gammaln(data[:,na]) - special.gammaln(data[:,na]-r_support+1) 632 | - special.gammaln(r_support)).sum(0) 633 | return n, data.sum(), normalizers, feasible 634 | else: 635 | return n, None, None, None 636 | 637 | def _posterior_hypparams(self,n,tot,normalizers,feasible): 638 | if n == 0: 639 | return n, self.alpha_0, self.r_probs, self.r_support 640 | else: 641 | r_probs = self.r_probs[feasible] 642 | r_support = self.r_support[feasible] 643 | log_marg_likelihoods = special.betaln(self.alpha_0 + tot - n*r_support, 644 | self.beta_0 + r_support*n) \ 645 | - special.betaln(self.alpha_0, self.beta_0) \ 646 | + normalizers 647 | log_marg_probs = np.log(r_probs) + log_marg_likelihoods 648 | log_marg_probs -= log_marg_probs.max() 649 | marg_probs = np.exp(log_marg_probs) 650 | 651 | return n, self.alpha_0 + tot, marg_probs, r_support 652 | 653 | def _max_likelihood_ps(self,n,tot,rs): 654 | ps = 1-(rs*n)/tot 655 | assert (ps >= 0).all() 656 | return ps 657 | 658 | def rvs(self,size=[]): 659 | return super(NegativeBinomialIntegerRVariant,self).rvs(size) + self.r 660 | 661 | class NegativeBinomialIntegerR2Variant(NegativeBinomialIntegerR2): 662 | _fixedr_class = NegativeBinomialFixedRVariant 663 | 664 | def _update_rho_mf(self,data,weights): 665 | self.rho_mf = self.rho_0.copy() 666 | for idx, d in enumerate(self._fixedr_distns): 667 | n, tot = d._get_weighted_statistics(data,weights) 668 | Elnp, Eln1mp = d._mf_expected_statistics() 669 | self.rho_mf[idx] += (d.alpha_0-1+tot)*Elnp + (d.beta_0-1+n*d.r)*Eln1mp 670 | self.rho_mf_temp = self.rho_mf.copy() 671 | 672 | # NOTE: this method only needs to override parent in the base measure 673 | # part, i.e. data -> data-r 674 | if isinstance(data,np.ndarray): 675 | self.rho_mf[idx] += weights.dot(d._log_base_measure(data-d.r,d.r)) 676 | else: 677 | self.rho_mf[idx] += sum(w.dot(d._log_base_measure(dt-d.r,d.r)) 678 | for dt,w in zip(data,weights)) 679 | -------------------------------------------------------------------------------- /pybasicbayes/distributions/poisson.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from builtins import zip 3 | __all__ = ['Poisson'] 4 | import numpy as np 5 | import scipy.stats as stats 6 | import scipy.special as special 7 | 8 | from pybasicbayes.abstractions import GibbsSampling, Collapsed, \ 9 | MaxLikelihood, MeanField, MeanFieldSVI 10 | 11 | 12 | class Poisson(GibbsSampling, Collapsed, MaxLikelihood, MeanField, MeanFieldSVI): 13 | ''' 14 | Poisson distribution with a conjugate Gamma prior. 15 | 16 | NOTE: the support is {0,1,2,...} 17 | 18 | Hyperparameters (following Wikipedia's notation): 19 | alpha_0, beta_0 20 | 21 | Parameter is the mean/variance parameter: 22 | lmbda 23 | ''' 24 | def __init__(self,lmbda=None,alpha_0=None,beta_0=None,mf_alpha_0=None,mf_beta_0=None): 25 | self.lmbda = lmbda 26 | 27 | self.alpha_0 = alpha_0 28 | self.beta_0 = beta_0 29 | self.mf_alpha_0 = mf_alpha_0 if mf_alpha_0 is not None else alpha_0 30 | self.mf_beta_0 = mf_beta_0 if mf_beta_0 is not None else beta_0 31 | 32 | if lmbda is None and not any(_ is None for _ in (alpha_0,beta_0)): 33 | self.resample() # intialize from prior 34 | 35 | @property 36 | def params(self): 37 | return dict(lmbda=self.lmbda) 38 | 39 | @property 40 | def hypparams(self): 41 | return dict(alpha_0=self.alpha_0,beta_0=self.beta_0) 42 | 43 | def log_sf(self,x): 44 | return stats.poisson.logsf(x,self.lmbda) 45 | 46 | def _posterior_hypparams(self,n,tot): 47 | return self.alpha_0 + tot, self.beta_0 + n 48 | 49 | def rvs(self,size=None): 50 | return np.random.poisson(self.lmbda,size=size) 51 | 52 | def log_likelihood(self,x): 53 | lmbda = self.lmbda 54 | x = np.array(x,ndmin=1) 55 | raw = np.empty(x.shape) 56 | raw[x>=0] = -lmbda + x[x>=0]*np.log(lmbda) - special.gammaln(x[x>=0]+1) 57 | raw[x<0] = -np.inf 58 | return raw if isinstance(x,np.ndarray) else raw[0] 59 | 60 | def _get_statistics(self,data): 61 | if isinstance(data,np.ndarray): 62 | n = data.shape[0] 63 | tot = data.sum() 64 | elif isinstance(data,list): 65 | n = sum(d.shape[0] for d in data) 66 | tot = sum(d.sum() for d in data) 67 | else: 68 | assert np.isscalar(data) 69 | n = 1 70 | tot = data 71 | 72 | return n, tot 73 | 74 | def _get_weighted_statistics(self,data,weights): 75 | if isinstance(data,np.ndarray): 76 | n = weights.sum() 77 | tot = weights.dot(data) 78 | elif isinstance(data,list): 79 | n = sum(w.sum() for w in weights) 80 | tot = sum(w.dot(d) for w,d in zip(weights,data)) 81 | else: 82 | assert np.isscalar(data) and np.isscalar(weights) 83 | n = weights 84 | tot = weights*data 85 | 86 | return np.array([n, tot]) 87 | 88 | ### Gibbs Sampling 89 | 90 | def resample(self,data=[],stats=None): 91 | stats = self._get_statistics(data) if stats is None else stats 92 | alpha_n, beta_n = self._posterior_hypparams(*stats) 93 | self.lmbda = np.random.gamma(alpha_n,1/beta_n) 94 | 95 | # next line is for mean field initialization 96 | self.mf_alpha_0, self.mf_beta_0 = self.lmbda * self.beta_0, self.beta_0 97 | 98 | return self 99 | 100 | ### Mean Field 101 | 102 | def _resample_from_mf(self): 103 | mf_alpha_0, mf_beta_0 = self._natural_to_standard(self.mf_natural_hypparam) 104 | self.lmbda = np.random.gamma(mf_alpha_0, 1./mf_beta_0) 105 | 106 | def meanfieldupdate(self,data,weights): 107 | self.mf_natural_hypparam = \ 108 | self.natural_hypparam + self._get_weighted_statistics(data,weights) 109 | self.lmbda = self.mf_alpha_0 / self.mf_beta_0 110 | 111 | def meanfield_sgdstep(self,data,weights,prob,stepsize): 112 | self.mf_natural_hypparam = \ 113 | (1-stepsize) * self.mf_natural_hypparam + stepsize * ( 114 | self.natural_hypparam 115 | + 1./prob * self._get_weighted_statistics(data,weights)) 116 | 117 | def get_vlb(self): 118 | return (self.natural_hypparam - self.mf_natural_hypparam).dot(self._mf_expected_statistics) \ 119 | - (self._log_partition_fn(self.alpha_0,self.beta_0) 120 | - self._log_partition_fn(self.mf_alpha_0,self.mf_beta_0)) 121 | 122 | def expected_log_likelihood(self,x): 123 | Emlmbda, Elnlmbda = self._mf_expected_statistics 124 | return -special.gammaln(x+1) + Elnlmbda * x + Emlmbda 125 | 126 | @property 127 | def _mf_expected_statistics(self): 128 | alpha, beta = self.mf_alpha_0, self.mf_beta_0 129 | return np.array([-alpha/beta, special.digamma(alpha) - np.log(beta)]) 130 | 131 | 132 | @property 133 | def natural_hypparam(self): 134 | return self._standard_to_natural(self.alpha_0,self.beta_0) 135 | 136 | @property 137 | def mf_natural_hypparam(self): 138 | return self._standard_to_natural(self.mf_alpha_0,self.mf_beta_0) 139 | 140 | @mf_natural_hypparam.setter 141 | def mf_natural_hypparam(self,natparam): 142 | self.mf_alpha_0, self.mf_beta_0 = self._natural_to_standard(natparam) 143 | 144 | 145 | def _standard_to_natural(self,alpha,beta): 146 | return np.array([beta, alpha-1]) 147 | 148 | def _natural_to_standard(self,natparam): 149 | return natparam[1]+1, natparam[0] 150 | 151 | ### Collapsed 152 | 153 | def log_marginal_likelihood(self,data): 154 | return self._log_partition_fn(*self._posterior_hypparams(*self._get_statistics(data))) \ 155 | - self._log_partition_fn(self.alpha_0,self.beta_0) \ 156 | - self._get_sum_of_gammas(data) 157 | 158 | def _log_partition_fn(self,alpha,beta): 159 | return special.gammaln(alpha) - alpha * np.log(beta) 160 | 161 | def _get_sum_of_gammas(self,data): 162 | if isinstance(data,np.ndarray): 163 | return special.gammaln(data+1).sum() 164 | elif isinstance(data,list): 165 | return sum(special.gammaln(d+1).sum() for d in data) 166 | else: 167 | assert isinstance(data,int) 168 | return special.gammaln(data+1) 169 | 170 | ### Max likelihood 171 | 172 | def max_likelihood(self,data,weights=None): 173 | if weights is None: 174 | n, tot = self._get_statistics(data) 175 | else: 176 | n, tot = self._get_weighted_statistics(data,weights) 177 | 178 | if n > 1e-2: 179 | self.lmbda = tot/n 180 | assert self.lmbda > 0 181 | else: 182 | self.broken = True 183 | self.lmbda = 999999 184 | 185 | return self 186 | 187 | -------------------------------------------------------------------------------- /pybasicbayes/distributions/uniform.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from builtins import map 3 | from builtins import range 4 | __all__ = ['UniformOneSided', 'Uniform'] 5 | 6 | import numpy as np 7 | 8 | from pybasicbayes.abstractions import GibbsSampling 9 | from pybasicbayes.util.stats import sample_pareto 10 | from pybasicbayes.util.general import any_none 11 | 12 | 13 | class UniformOneSided(GibbsSampling): 14 | ''' 15 | Models a uniform distribution over [low,high] for a parameter high. 16 | Low is a fixed hyperparameter (hence "OneSided"). See the Uniform class for 17 | the two-sided version. 18 | 19 | Likelihood is x ~ U[low,high] 20 | Prior is high ~ Pareto(x_m,alpha) following Wikipedia's notation 21 | 22 | Hyperparameters: 23 | x_m, alpha, low 24 | 25 | Parameters: 26 | high 27 | ''' 28 | def __init__(self,high=None,x_m=None,alpha=None,low=0.): 29 | self.high = high 30 | 31 | self.x_m = x_m 32 | self.alpha = alpha 33 | self.low = low 34 | 35 | have_hypers = x_m is not None and alpha is not None 36 | if high is None and have_hypers: 37 | self.resample() # intialize from prior 38 | 39 | @property 40 | def params(self): 41 | return {'high':self.high} 42 | 43 | @property 44 | def hypparams(self): 45 | return dict(x_m=self.x_m,alpha=self.alpha,low=self.low) 46 | 47 | def log_likelihood(self,x): 48 | x = np.atleast_1d(x) 49 | raw = np.where( 50 | (self.low <= x) & (x < self.high), 51 | -np.log(self.high - self.low),-np.inf) 52 | return raw if isinstance(x,np.ndarray) else raw[0] 53 | 54 | def rvs(self,size=[]): 55 | return np.random.uniform(low=self.low,high=self.high,size=size) 56 | 57 | def resample(self,data=[]): 58 | self.high = sample_pareto( 59 | *self._posterior_hypparams(*self._get_statistics(data))) 60 | return self 61 | 62 | def _get_statistics(self,data): 63 | if isinstance(data,np.ndarray): 64 | n = data.shape[0] 65 | datamax = data.max() 66 | else: 67 | n = sum(d.shape[0] for d in data) 68 | datamax = \ 69 | max(d.max() for d in data) if n > 0 else -np.inf 70 | return n, datamax 71 | 72 | def _posterior_hypparams(self,n,datamax): 73 | return max(datamax,self.x_m), n + self.alpha 74 | 75 | 76 | class Uniform(UniformOneSided): 77 | ''' 78 | Models a uniform distribution over [low,high] for parameters low and high. 79 | The prior is non-conjugate (though it's conditionally conjugate over one 80 | parameter at a time). 81 | 82 | Likelihood is x ~ U[low,high] 83 | Prior is -low ~ Pareto(x_m_low,alpha_low)-2*x_m_low 84 | high ~ Pareto(x_m_high,alpha_high) 85 | 86 | Hyperparameters: 87 | x_m_low, alpha_low 88 | x_m_high, alpha_high 89 | 90 | Parameters: 91 | low, high 92 | ''' 93 | def __init__( 94 | self,low=None,high=None, 95 | x_m_low=None,alpha_low=None,x_m_high=None,alpha_high=None): 96 | self.low = low 97 | self.high = high 98 | 99 | self.x_m_low = x_m_low 100 | self.alpha_low = alpha_low 101 | self.x_m_high = x_m_high 102 | self.alpha_high = alpha_high 103 | 104 | have_hypers = not any_none(x_m_low,alpha_low,x_m_high,alpha_high) 105 | if low is high is None and have_hypers: 106 | self.resample() # initialize from prior 107 | 108 | @property 109 | def params(self): 110 | return dict(low=self.low,high=self.high) 111 | 112 | @property 113 | def hypparams(self): 114 | return dict( 115 | x_m_low=self.x_m_low,alpha_low=self.alpha_low, 116 | x_m_high=self.x_m_high,alpha_high=self.alpha_high) 117 | 118 | def resample(self,data=[],niter=5): 119 | if len(data) == 0: 120 | self.low = -sample_pareto(-self.x_m_low,self.alpha_low) 121 | self.high = sample_pareto(self.x_m_high,self.alpha_high) 122 | else: 123 | for itr in range(niter): 124 | # resample high, fixing low 125 | self.x_m, self.alpha = self.x_m_high, self.alpha_high 126 | super(Uniform,self).resample(data) 127 | # tricky: flip data and resample 'high' again 128 | self.x_m, self.alpha = -self.x_m_low, self.alpha_low 129 | self.low, self.high = self.high, self.low 130 | super(Uniform,self).resample(self._flip_data(data)) 131 | self.low, self.high = self.x_m_low - self.high, self.low 132 | 133 | def _flip_data(self,data): 134 | if isinstance(data,np.ndarray): 135 | return self.x_m_low - data 136 | else: 137 | return list(map(self._flip_data,data)) 138 | -------------------------------------------------------------------------------- /pybasicbayes/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .mixture import Labels, CRPLabels, Mixture, MixtureDistribution, CollapsedMixture, CRPMixture 2 | from .factor_analysis import FactorAnalysis -------------------------------------------------------------------------------- /pybasicbayes/models/factor_analysis.py: -------------------------------------------------------------------------------- 1 | """ 2 | Probabilistic factor analysis to perform dimensionality reduction on mouse images. 3 | With the probabilistic approach, we can handle missing data in the images. 4 | Technically this holds for missing at random data, but we can try it 5 | out on images where we treat cable pixels as missing, even though they 6 | won't be random. This should give us a model-based way to fill in pixels, 7 | and hopefully a more robust way to estimate principle components for modeling. 8 | """ 9 | import abc 10 | import numpy as np 11 | 12 | from pybasicbayes.abstractions import Model, \ 13 | ModelGibbsSampling, ModelMeanField, ModelMeanFieldSVI, ModelEM 14 | from pybasicbayes.util.stats import sample_gaussian 15 | from pybasicbayes.util.general import objarray 16 | 17 | from pybasicbayes.distributions import DiagonalRegression 18 | 19 | from pybasicbayes.util.profiling import line_profiled 20 | PROFILING = True 21 | 22 | class FactorAnalysisStates(object): 23 | """ 24 | Wrapper for the latent states of a factor analysis model 25 | """ 26 | def __init__(self, model, data, mask=None, **kwargs): 27 | self.model = model 28 | self.X = data 29 | if mask is None: 30 | mask = np.ones_like(data, dtype=bool) 31 | self.mask = mask 32 | assert data.shape == mask.shape and mask.dtype == bool 33 | assert self.X.shape[1] == self.D_obs 34 | 35 | # Initialize latent states 36 | self.N = self.X.shape[0] 37 | self.Z = np.random.randn(self.N, self.D_latent) 38 | 39 | @property 40 | def D_obs(self): 41 | return self.model.D_obs 42 | 43 | @property 44 | def D_latent(self): 45 | return self.model.D_latent 46 | 47 | @property 48 | def W(self): 49 | return self.model.W 50 | 51 | @property 52 | def mean(self): 53 | return self.model.mean 54 | 55 | @property 56 | def sigmasq(self): 57 | return self.model.sigmasq 58 | 59 | @property 60 | def regression(self): 61 | return self.model.regression 62 | 63 | def log_likelihood(self): 64 | # mu = np.dot(self.Z, self.W.T) 65 | # return -0.5 * np.sum(((self.X - mu) * self.mask) ** 2 / self.sigmasq) 66 | 67 | # Compute the marginal likelihood, integrating out z 68 | mu_x = self.mean 69 | Sigma_x = self.W.dot(self.W.T) + np.diag(self.sigmasq) 70 | 71 | from scipy.stats import multivariate_normal 72 | if not np.all(self.mask): 73 | # Find the patterns of missing dta 74 | missing_patterns = np.unique(self.mask, axis=0) 75 | 76 | # Evaluate the likelihood for each missing pattern 77 | lls = np.zeros(self.N) 78 | for pat in missing_patterns: 79 | inds = np.all(self.mask == pat, axis=1) 80 | lls[inds] = \ 81 | multivariate_normal(mu_x[pat], Sigma_x[np.ix_(pat, pat)])\ 82 | .logpdf(self.X[np.ix_(inds, pat)]) 83 | 84 | else: 85 | lls = multivariate_normal(mu_x, Sigma_x).logpdf(self.X) 86 | 87 | return lls 88 | 89 | ## Gibbs 90 | def resample(self): 91 | W, sigmasq = self.W, self.sigmasq 92 | J0 = np.eye(self.D_latent) 93 | h0 = np.zeros(self.D_latent) 94 | 95 | # Sample each latent embedding 96 | for n in range(self.N): 97 | Jobs = self.mask[n] / sigmasq 98 | Jpost = J0 + (W * Jobs[:, None]).T.dot(W) 99 | hpost = h0 + ((self.X[n] - self.mean) * Jobs).dot(W) 100 | self.Z[n] = sample_gaussian(J=Jpost, h=hpost) 101 | 102 | ## Mean field 103 | def E_step(self): 104 | W = self.W 105 | WWT = np.array([np.outer(wd,wd) for wd in W]) 106 | sigmasq_inv = 1./self.sigmasq 107 | self._meanfieldupdate(W, WWT, sigmasq_inv) 108 | 109 | # Copy over the expected states to Z 110 | self.Z = self.E_Z 111 | 112 | def meanfieldupdate(self): 113 | E_W, E_WWT, E_sigmasq_inv, _ = self.regression.mf_expectations 114 | self._meanfieldupdate(E_W, E_WWT, E_sigmasq_inv) 115 | 116 | # Copy over the expected states to Z 117 | self.Z = self.E_Z 118 | 119 | def _meanfieldupdate(self, E_W, E_WWT, E_sigmasq_inv): 120 | N, D_obs, D_lat = self.N, self.D_obs, self.D_latent 121 | E_WWT_vec = E_WWT.reshape(D_obs, -1) 122 | 123 | J0 = np.eye(D_lat) 124 | h0 = np.zeros(D_lat) 125 | 126 | # Get expectations for the latent embedding of these datapoints 127 | self.E_Z = np.zeros((N, D_lat)) 128 | self.E_ZZT = np.zeros((N, D_lat, D_lat)) 129 | 130 | for n in range(N): 131 | Jobs = self.mask[n] * E_sigmasq_inv 132 | # Faster than Jpost = J0 + np.sum(E_WWT * Jobs[:,None,None], axis=0) 133 | Jpost = J0 + (np.dot(Jobs, E_WWT_vec)).reshape((D_lat, D_lat)) 134 | hpost = h0 + ((self.X[n] - self.mean) * Jobs).dot(E_W) 135 | 136 | # Get the expectations for this set of indices 137 | Sigma_post = np.linalg.inv(Jpost) 138 | self.E_Z[n] = Sigma_post.dot(hpost) 139 | self.E_ZZT[n] = Sigma_post + np.outer(self.E_Z[n], self.E_Z[n]) 140 | 141 | self._set_expected_stats() 142 | 143 | def _set_expected_stats(self): 144 | D_lat = self.D_latent 145 | Xc = self.X - self.mean 146 | E_Xsq = np.sum(Xc**2 * self.mask, axis=0) 147 | E_XZT = (Xc * self.mask).T.dot(self.E_Z) 148 | E_ZZT_vec = self.E_ZZT.reshape((self.E_ZZT.shape[0], D_lat ** 2)) 149 | E_ZZT = np.array([np.dot(self.mask[:, d], E_ZZT_vec).reshape((D_lat, D_lat)) 150 | for d in range(self.D_obs)]) 151 | n = np.sum(self.mask, axis=0) 152 | 153 | self.E_emission_stats = objarray([E_Xsq, E_XZT, E_ZZT, n]) 154 | 155 | def resample_from_mf(self): 156 | for n in range(self.N): 157 | mu_n = self.E_Z[n] 158 | Sigma_n = self.E_ZZT[n] - np.outer(mu_n, mu_n) 159 | self.Z[n] = sample_gaussian(mu=mu_n, Sigma=Sigma_n) 160 | 161 | def expected_log_likelihood(self): 162 | E_W, E_WWT, E_sigmasq_inv, E_log_sigmasq = self.regression.mf_expectations 163 | E_Xsq, E_XZT, E_ZZT, n = self.E_emission_stats 164 | 165 | ll = -0.5 * np.log(2 * np.pi) - 0.5 * np.sum(E_log_sigmasq * self.mask) 166 | ll += -0.5 * np.sum(E_Xsq * E_sigmasq_inv) 167 | ll += -0.5 * np.sum(-2 * E_XZT * E_W * E_sigmasq_inv[:,None]) 168 | ll += -0.5 * np.sum(E_WWT * E_ZZT * E_sigmasq_inv[:,None,None]) 169 | return ll 170 | 171 | 172 | class _FactorAnalysisBase(Model): 173 | __metaclass__ = abc.ABCMeta 174 | _states_class = FactorAnalysisStates 175 | 176 | def __init__(self, D_obs, D_latent, 177 | W=None, sigmasq=None, 178 | sigmasq_W_0=1.0, mu_W_0=0.0, 179 | alpha_0=3.0, beta_0=2.0): 180 | 181 | self.D_obs, self.D_latent = D_obs, D_latent 182 | 183 | # The weights and variances are encapsulated in a DiagonalRegression class 184 | self.regression = \ 185 | DiagonalRegression( 186 | self.D_obs, self.D_latent, 187 | mu_0=mu_W_0 * np.ones(self.D_latent), 188 | Sigma_0=sigmasq_W_0 * np.eye(self.D_latent), 189 | alpha_0=alpha_0, beta_0=beta_0, 190 | A=W, sigmasq=sigmasq) 191 | 192 | # Handle the mean separately since DiagonalRegression doesn't support affine :-/ 193 | self.mean = np.zeros(D_obs) 194 | 195 | self.data_list = [] 196 | 197 | @property 198 | def W(self): 199 | return self.regression.A 200 | 201 | @property 202 | def sigmasq(self): 203 | return self.regression.sigmasq_flat 204 | 205 | def set_empirical_mean(self): 206 | self.mean = np.zeros(self.D_obs) 207 | for n in range(self.D_obs): 208 | self.mean[n] = np.concatenate([d.X[d.mask[:,n] == 1, n] for d in self.data_list]).mean() 209 | 210 | def add_data(self, data, mask=None, **kwargs): 211 | self.data_list.append(self._states_class(self, data, mask=mask, **kwargs)) 212 | return self.data_list[-1] 213 | 214 | def generate(self, keep=True, N=1, mask=None, **kwargs): 215 | # Sample from the factor analysis model 216 | W, sigmasq = self.W, self.sigmasq 217 | Z = np.random.randn(N, self.D_latent) 218 | X = self.mean + np.dot(Z, W.T) + np.sqrt(sigmasq) * np.random.randn(N, self.D_obs) 219 | 220 | data = self._states_class(self, X, mask=mask, **kwargs) 221 | data.Z = Z 222 | if keep: 223 | self.data_list.append(data) 224 | return data.X, data.Z 225 | 226 | def _log_likelihoods(self, x, mask=None, **kwargs): 227 | self.add_data(x, mask=mask, **kwargs) 228 | states = self.data_list.pop() 229 | return states.log_likelihood() 230 | 231 | def log_likelihood(self): 232 | return sum([d.log_likelihood().sum() for d in self.data_list]) 233 | 234 | def log_probability(self): 235 | lp = 0 236 | 237 | # Prior 238 | # lp += (-self.alpha_0-1) * np.log(self.sigmasq) - self.beta_0 / self.sigmasq 239 | lp += -0.5 * np.sum(self.W**2) 240 | lp += -0.5 * np.sum(self.Z**2) 241 | lp += self.log_likelihood() 242 | return lp 243 | 244 | 245 | class _FactorAnalysisGibbs(_FactorAnalysisBase, ModelGibbsSampling): 246 | __metaclass__ = abc.ABCMeta 247 | 248 | def resample_model(self): 249 | for data in self.data_list: 250 | data.resample() 251 | 252 | Zs = np.vstack([d.Z for d in self.data_list]) 253 | Xs = np.vstack([d.X for d in self.data_list]) 254 | mask = np.vstack([d.mask for d in self.data_list]) 255 | self.regression.resample((Zs, Xs), mask=mask) 256 | 257 | 258 | class _FactorAnalysisEM(_FactorAnalysisBase, ModelEM): 259 | 260 | def _null_stats(self): 261 | return objarray( 262 | [np.zeros(self.D_obs), 263 | np.zeros((self.D_obs, self.D_latent)), 264 | np.zeros((self.D_obs, self.D_latent, self.D_latent)), 265 | np.zeros(self.D_obs)]) 266 | 267 | def EM_step(self): 268 | for data in self.data_list: 269 | data.E_step() 270 | 271 | stats = self._null_stats() + sum([d.E_emission_stats for d in self.data_list]) 272 | self.regression.max_likelihood(data=None, weights=None, stats=stats) 273 | assert np.all(np.isfinite(self.sigmasq )) 274 | 275 | 276 | class _FactorAnalysisMeanField(_FactorAnalysisBase, ModelMeanField, ModelMeanFieldSVI): 277 | __metaclass__ = abc.ABCMeta 278 | 279 | def _null_stats(self): 280 | return objarray( 281 | [np.zeros(self.D_obs), 282 | np.zeros((self.D_obs, self.D_latent)), 283 | np.zeros((self.D_obs, self.D_latent, self.D_latent)), 284 | np.zeros(self.D_obs)]) 285 | 286 | def meanfield_coordinate_descent_step(self): 287 | for data in self.data_list: 288 | data.meanfieldupdate() 289 | 290 | stats = self._null_stats() + sum([d.E_emission_stats for d in self.data_list]) 291 | self.regression.meanfieldupdate(stats=stats) 292 | 293 | def meanfield_sgdstep(self, minibatch, prob, stepsize, masks=None): 294 | assert stepsize > 0 and stepsize <= 1 295 | 296 | states_list = self._get_mb_states_list(minibatch, masks) 297 | for s in states_list: 298 | s.meanfieldupdate() 299 | 300 | # Compute the sufficient statistics of the latent parameters 301 | self.regression.meanfield_sgdstep( 302 | data=None, weights=None, prob=prob, stepsize=stepsize, 303 | stats=(sum(s.E_emission_stats for s in states_list))) 304 | 305 | # Compute the expected log likelihood for this minibatch 306 | return sum([s.expected_log_likelihood() for s in states_list]) 307 | 308 | def _get_mb_states_list(self, minibatch, masks): 309 | minibatch = minibatch if isinstance(minibatch, list) else [minibatch] 310 | masks = [None] * len(minibatch) if masks is None else \ 311 | (masks if isinstance(masks, list) else [masks]) 312 | 313 | def get_states(data, mask): 314 | self.add_data(data, mask=mask) 315 | return self.data_list.pop() 316 | 317 | return [get_states(data, mask) for data, mask in zip(minibatch, masks)] 318 | 319 | def resample_from_mf(self): 320 | for data in self.data_list: 321 | data.resample_from_mf() 322 | self.regression.resample_from_mf() 323 | 324 | def expected_log_likelihood(self): 325 | ell = 0 326 | for data in self.data_list: 327 | ell += data.expected_log_likelihood() 328 | return ell 329 | 330 | def initialize_meanfield(self): 331 | self.regression._initialize_mean_field() 332 | 333 | 334 | class FactorAnalysis(_FactorAnalysisGibbs, _FactorAnalysisEM, _FactorAnalysisMeanField): 335 | pass 336 | 337 | -------------------------------------------------------------------------------- /pybasicbayes/models/parallel_mixture.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | 4 | model = None 5 | labels_list = None 6 | 7 | def _get_sampled_labels(idx): 8 | model.add_data(model.labels_list[idx].data,initialize_from_prior=False) 9 | l = model.labels_list.pop() 10 | return l.z, l._normalizer 11 | 12 | def _get_sampled_component_params(idx): 13 | model.components[idx].resample([l.data[l.z == idx] for l in labels_list]) 14 | return model.components[idx].parameters 15 | 16 | -------------------------------------------------------------------------------- /pybasicbayes/testing/.gitignore: -------------------------------------------------------------------------------- 1 | /figures/* 2 | -------------------------------------------------------------------------------- /pybasicbayes/testing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattjj/pybasicbayes/61f65ad6c781288605ec5f7347efcc5dbd73c4fc/pybasicbayes/testing/__init__.py -------------------------------------------------------------------------------- /pybasicbayes/testing/mixins.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from builtins import zip 3 | from builtins import range 4 | from builtins import object 5 | import numpy as np 6 | import abc, os 7 | 8 | from nose.plugins.attrib import attr 9 | 10 | import pybasicbayes 11 | from pybasicbayes.util import testing 12 | from future.utils import with_metaclass 13 | 14 | class DistributionTester(with_metaclass(abc.ABCMeta, object)): 15 | @abc.abstractproperty 16 | def distribution_class(self): 17 | pass 18 | 19 | @abc.abstractproperty 20 | def hyperparameter_settings(self): 21 | pass 22 | 23 | class BasicTester(DistributionTester): 24 | @property 25 | def basic_data_size(self): 26 | return 1000 27 | 28 | def loglike_lists_tests(self): 29 | for setting_idx, hypparam_dict in enumerate(self.hyperparameter_settings): 30 | yield self.check_loglike_lists, setting_idx, hypparam_dict 31 | 32 | def check_loglike_lists(self,setting_idx,hypparam_dict): 33 | dist = self.distribution_class(**hypparam_dict) 34 | data = dist.rvs(size=self.basic_data_size) 35 | 36 | l1 = dist.log_likelihood(data).sum() 37 | l2 = sum(dist.log_likelihood(d) for d in np.array_split(data,self.basic_data_size)) 38 | 39 | assert np.isclose(l1,l2) 40 | 41 | def stats_lists_tests(self): 42 | for setting_idx, hypparam_dict in enumerate(self.hyperparameter_settings): 43 | yield self.check_stats_lists, setting_idx, hypparam_dict 44 | 45 | def check_stats_lists(self,setting_idx,hypparam_dict): 46 | dist = self.distribution_class(**hypparam_dict) 47 | data = dist.rvs(size=self.basic_data_size) 48 | 49 | if hasattr(dist,'_get_statistics'): 50 | s1 = dist._get_statistics(data) 51 | s2 = dist._get_statistics([d for d in np.array_split(data,self.basic_data_size)]) 52 | 53 | self._check_stats(s1,s2) 54 | 55 | def _check_stats(self,s1,s2): 56 | if isinstance(s1,np.ndarray): 57 | if s1.dtype == np.object: 58 | assert all(np.allclose(t1,t2) for t1, t2 in zip(s1,s2)) 59 | else: 60 | assert np.allclose(s1,s2) 61 | elif isinstance(s1,tuple): 62 | assert all(np.allclose(ss1,ss2) for ss1,ss2 in zip(s1,s2)) 63 | 64 | def missing_data_tests(self): 65 | for setting_idx, hypparam_dict in enumerate(self.hyperparameter_settings): 66 | yield self.check_missing_data_stats, setting_idx, hypparam_dict 67 | 68 | def check_missing_data_stats(self,setting_idx,hypparam_dict): 69 | dist = self.distribution_class(**hypparam_dict) 70 | data = dist.rvs(size=self.basic_data_size) 71 | 72 | if isinstance(data,np.ndarray): 73 | data[np.random.randint(2,size=data.shape[0]) == 1] = np.nan 74 | 75 | s1 = dist._get_statistics(data) 76 | s2 = dist._get_statistics(data[~np.isnan(data).any(1)]) 77 | 78 | self._check_stats(s1,s2) 79 | 80 | class BigDataGibbsTester(with_metaclass(abc.ABCMeta, DistributionTester)): 81 | @abc.abstractmethod 82 | def params_close(self,distn1,distn2): 83 | pass 84 | 85 | @property 86 | def big_data_size(self): 87 | return 20000 88 | 89 | @property 90 | def big_data_repeats_per_setting(self): 91 | return 1 92 | 93 | @property 94 | def big_data_hyperparameter_settings(self): 95 | return self.hyperparameter_settings 96 | 97 | @attr('random') 98 | def big_data_Gibbs_tests(self): 99 | for setting_idx, hypparam_dict in enumerate(self.big_data_hyperparameter_settings): 100 | for i in range(self.big_data_repeats_per_setting): 101 | yield self.check_big_data_Gibbs, setting_idx, hypparam_dict 102 | 103 | def check_big_data_Gibbs(self,setting_idx,hypparam_dict): 104 | d1 = self.distribution_class(**hypparam_dict) 105 | d2 = self.distribution_class(**hypparam_dict) 106 | 107 | data = d1.rvs(size=self.big_data_size) 108 | d2.resample(data) 109 | 110 | assert self.params_close(d1,d2) 111 | 112 | class MaxLikelihoodTester(with_metaclass(abc.ABCMeta, DistributionTester)): 113 | @abc.abstractmethod 114 | def params_close(self,distn1,distn2): 115 | pass 116 | 117 | 118 | @property 119 | def big_data_size(self): 120 | return 20000 121 | 122 | @property 123 | def big_data_repeats_per_setting(self): 124 | return 1 125 | 126 | @property 127 | def big_data_hyperparameter_settings(self): 128 | return self.hyperparameter_settings 129 | 130 | 131 | def maxlike_tests(self): 132 | for setting_idx, hypparam_dict in enumerate(self.big_data_hyperparameter_settings): 133 | for i in range(self.big_data_repeats_per_setting): 134 | yield self.check_maxlike, setting_idx, hypparam_dict 135 | 136 | def check_maxlike(self,setting_idx,hypparam_dict): 137 | d1 = self.distribution_class(**hypparam_dict) 138 | d2 = self.distribution_class(**hypparam_dict) 139 | 140 | data = d1.rvs(size=self.big_data_size) 141 | d2.max_likelihood(data) 142 | 143 | assert self.params_close(d1,d2) 144 | 145 | class GewekeGibbsTester(with_metaclass(abc.ABCMeta, DistributionTester)): 146 | @abc.abstractmethod 147 | def geweke_statistics(self,distn,data): 148 | pass 149 | 150 | 151 | @property 152 | def geweke_nsamples(self): 153 | return 30000 154 | 155 | @property 156 | def geweke_data_size(self): 157 | return 1 # NOTE: more data usually means slower mixing 158 | 159 | @property 160 | def geweke_ntrials(self): 161 | return 3 162 | 163 | @property 164 | def geweke_pval(self): 165 | return 0.05 166 | 167 | @property 168 | def geweke_hyperparameter_settings(self): 169 | return self.hyperparameter_settings 170 | 171 | def geweke_numerical_slice(self,distn,setting_idx): 172 | return slice(None) 173 | 174 | @property 175 | def resample_kwargs(self): 176 | return {} 177 | 178 | @property 179 | def geweke_resample_kwargs(self): 180 | return self.resample_kwargs 181 | 182 | @property 183 | def geweke_num_statistic_fails_to_tolerate(self): 184 | return 1 185 | 186 | 187 | @attr('slow', 'random') 188 | def geweke_tests(self): 189 | for setting_idx, hypparam_dict in enumerate(self.geweke_hyperparameter_settings): 190 | yield self.check_geweke, setting_idx, hypparam_dict 191 | 192 | def geweke_figure_filepath(self,setting_idx): 193 | return os.path.join(os.path.dirname(__file__),'figures', 194 | self.__class__.__name__,'setting_%d.pdf' % setting_idx) 195 | 196 | def check_geweke(self,setting_idx,hypparam_dict): 197 | import os 198 | from matplotlib import pyplot as plt 199 | plt.ioff() 200 | fig = plt.figure() 201 | figpath = self.geweke_figure_filepath(setting_idx) 202 | mkdir(os.path.dirname(figpath)) 203 | 204 | nsamples, data_size, ntrials = self.geweke_nsamples, \ 205 | self.geweke_data_size, self.geweke_ntrials 206 | 207 | d = self.distribution_class(**hypparam_dict) 208 | sample_dim = np.atleast_1d(self.geweke_statistics(d,d.rvs(size=10))).shape[0] 209 | 210 | num_statistic_fails = 0 211 | for trial in range(ntrials): 212 | # collect forward-generated statistics 213 | forward_statistics = np.squeeze(np.empty((nsamples,sample_dim))) 214 | for i in range(nsamples): 215 | d = self.distribution_class(**hypparam_dict) 216 | data = d.rvs(size=data_size) 217 | forward_statistics[i] = self.geweke_statistics(d,data) 218 | 219 | # collect gibbs-generated statistics 220 | gibbs_statistics = np.squeeze(np.empty((nsamples,sample_dim))) 221 | d = self.distribution_class(**hypparam_dict) 222 | data = d.rvs(size=data_size) 223 | for i in range(nsamples): 224 | d.resample(data,**self.geweke_resample_kwargs) 225 | data = d.rvs(size=data_size) 226 | gibbs_statistics[i] = self.geweke_statistics(d,data) 227 | 228 | testing.populations_eq_quantile_plot(forward_statistics,gibbs_statistics,fig=fig) 229 | try: 230 | sl = self.geweke_numerical_slice(d,setting_idx) 231 | testing.assert_populations_eq_moments( 232 | forward_statistics[...,sl],gibbs_statistics[...,sl], 233 | pval=self.geweke_pval) 234 | except AssertionError: 235 | datapath = os.path.join(os.path.dirname(__file__),'figures', 236 | self.__class__.__name__,'setting_%d_trial_%d.npz' % (setting_idx,trial)) 237 | np.savez(datapath,fwd=forward_statistics,gibbs=gibbs_statistics) 238 | example_violating_means = forward_statistics.mean(0), gibbs_statistics.mean(0) 239 | num_statistic_fails += 1 240 | 241 | plt.savefig(figpath) 242 | 243 | assert num_statistic_fails <= self.geweke_num_statistic_fails_to_tolerate, \ 244 | 'Geweke MAY have failed, check FIGURES in %s (e.g. %s vs %s)' \ 245 | % ((os.path.dirname(figpath),) + example_violating_means) 246 | 247 | 248 | ########## 249 | # misc # 250 | ########## 251 | 252 | def mkdir(path): 253 | # from 254 | # http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python 255 | import errno 256 | try: 257 | os.makedirs(path) 258 | except OSError as exc: 259 | if exc.errno == errno.EEXIST and os.path.isdir(path): 260 | pass 261 | else: raise 262 | 263 | -------------------------------------------------------------------------------- /pybasicbayes/util/.ctags: -------------------------------------------------------------------------------- 1 | --exclude=deps 2 | --exclude=basic/pybasicbayes 3 | --exclude=util 4 | --python-kinds=-i 5 | --recurse=yes 6 | --exclude=.git 7 | --exclude=.pyc 8 | --exclude=.md 9 | --exclude=DS_Store 10 | -------------------------------------------------------------------------------- /pybasicbayes/util/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | /tags 4 | /build 5 | *.c 6 | /*.so 7 | -------------------------------------------------------------------------------- /pybasicbayes/util/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | __all__ = ['general','plot','stats','text'] 3 | from . import general, plot, stats, text 4 | -------------------------------------------------------------------------------- /pybasicbayes/util/cstats.pyx: -------------------------------------------------------------------------------- 1 | # distutils: extra_compile_args = -O3 -w 2 | # cython: boundscheck = False, wraparound = False, cdivision = True 3 | 4 | import numpy as np 5 | cimport numpy as np 6 | 7 | from libc.stdint cimport int32_t 8 | from cython cimport floating, integral 9 | 10 | from cython.parallel import prange 11 | 12 | cdef inline int32_t csample_discrete_normalized(floating[::1] distn, floating u): 13 | cdef int i 14 | cdef int N = distn.shape[0] 15 | cdef floating tot = u 16 | 17 | for i in range(N): 18 | tot -= distn[i] 19 | if tot < 0: 20 | break 21 | 22 | return i 23 | 24 | def sample_markov( 25 | int T, 26 | np.ndarray[floating, ndim=2, mode="c"] trans_matrix, 27 | np.ndarray[floating, ndim=1, mode="c"] init_state_distn 28 | ): 29 | cdef int32_t[::1] out = np.empty(T,dtype=np.int32) 30 | cdef floating[:,::1] A = trans_matrix / trans_matrix.sum(1)[:,None] 31 | cdef floating[::1] pi = init_state_distn / init_state_distn.sum() 32 | 33 | cdef floating[::1] randseq 34 | if floating is double: 35 | randseq = np.random.random(T).astype(np.double) 36 | else: 37 | randseq = np.random.random(T).astype(np.float) 38 | 39 | cdef int t 40 | out[0] = csample_discrete_normalized(pi,randseq[0]) 41 | for t in range(1,T): 42 | out[t] = csample_discrete_normalized(A[out[t-1]],randseq[t]) 43 | 44 | return np.asarray(out) 45 | 46 | def sample_crp_tablecounts( 47 | floating concentration, 48 | integral[:,:] customers, 49 | colweights = None, 50 | ): 51 | cdef integral[:,::1] _customers = np.require(customers, requirements='C') 52 | cdef integral[:,::1] m = np.zeros_like(_customers) 53 | cdef floating[::1] _colweights = np.require(colweights, requirements='C') \ 54 | if colweights is not None else np.ones(customers.shape[1]) 55 | cdef int i, j, k 56 | cdef integral tot = np.sum(_customers) 57 | 58 | cdef floating[::1] randseq 59 | if floating is double: 60 | randseq = np.random.random(tot).astype(np.double) 61 | else: 62 | randseq = np.random.random(tot).astype(np.float) 63 | 64 | tmp = np.empty_like(_customers) 65 | tmp[0,0] = 0 66 | tmp.flat[1:] = np.cumsum(np.ravel(customers)[:_customers.size-1],dtype=tmp.dtype) 67 | cdef integral[:,::1] starts = tmp 68 | 69 | with nogil: 70 | for i in prange(_customers.shape[0]): 71 | for j in range(_customers.shape[1]): 72 | for k in range(_customers[i,j]): 73 | m[i,j] += randseq[starts[i,j]+k] \ 74 | < (concentration * _colweights[j]) / (k+concentration*_colweights[j]) 75 | 76 | return np.asarray(m) 77 | 78 | -------------------------------------------------------------------------------- /pybasicbayes/util/cyutil.py: -------------------------------------------------------------------------------- 1 | from builtins import map 2 | from builtins import str 3 | import Cython.Build 4 | from Cython.Build.Dependencies import * 5 | 6 | # NOTE: mostly a copy of cython's create_extension_list except for the lines 7 | # surrounded by "begin matt added" / "end matt added" 8 | def create_extension_list(patterns, exclude=[], ctx=None, aliases=None, quiet=False, language=None, 9 | exclude_failures=False): 10 | if not isinstance(patterns, (list, tuple)): 11 | patterns = [patterns] 12 | explicit_modules = set([m.name for m in patterns if isinstance(m, Extension)]) 13 | seen = set() 14 | deps = create_dependency_tree(ctx, quiet=quiet) 15 | to_exclude = set() 16 | if not isinstance(exclude, list): 17 | exclude = [exclude] 18 | for pattern in exclude: 19 | to_exclude.update(list(map(os.path.abspath, extended_iglob(pattern)))) 20 | 21 | module_list = [] 22 | for pattern in patterns: 23 | if isinstance(pattern, str): 24 | filepattern = pattern 25 | template = None 26 | name = '*' 27 | base = None 28 | exn_type = Extension 29 | ext_language = language 30 | elif isinstance(pattern, Extension): 31 | for filepattern in pattern.sources: 32 | if os.path.splitext(filepattern)[1] in ('.py', '.pyx'): 33 | break 34 | else: 35 | # ignore non-cython modules 36 | module_list.append(pattern) 37 | continue 38 | template = pattern 39 | name = template.name 40 | base = DistutilsInfo(exn=template) 41 | exn_type = template.__class__ 42 | ext_language = None # do not override whatever the Extension says 43 | else: 44 | raise TypeError(pattern) 45 | 46 | for file in extended_iglob(filepattern): 47 | if os.path.abspath(file) in to_exclude: 48 | continue 49 | pkg = deps.package(file) 50 | if '*' in name: 51 | module_name = deps.fully_qualified_name(file) 52 | if module_name in explicit_modules: 53 | continue 54 | else: 55 | module_name = name 56 | 57 | if module_name not in seen: 58 | try: 59 | kwds = deps.distutils_info(file, aliases, base).values 60 | except Exception: 61 | if exclude_failures: 62 | continue 63 | raise 64 | if base is not None: 65 | for key, value in list(base.values.items()): 66 | if key not in kwds: 67 | kwds[key] = value 68 | 69 | sources = [file] 70 | if template is not None: 71 | sources += [m for m in template.sources if m != filepattern] 72 | if 'sources' in kwds: 73 | # allow users to add .c files etc. 74 | for source in kwds['sources']: 75 | source = encode_filename_in_py2(source) 76 | if source not in sources: 77 | sources.append(source) 78 | del kwds['sources'] 79 | if 'depends' in kwds: 80 | depends = resolve_depends(kwds['depends'], (kwds.get('include_dirs') or []) + [find_root_package_dir(file)]) 81 | if template is not None: 82 | # Always include everything from the template. 83 | depends = list(set(template.depends).union(set(depends))) 84 | kwds['depends'] = depends 85 | 86 | if ext_language and 'language' not in kwds: 87 | kwds['language'] = ext_language 88 | 89 | # NOTE: begin matt added 90 | if 'name' in kwds: 91 | module_name = str(kwds['name']) 92 | del kwds['name'] 93 | else: 94 | module_name = os.path.splitext(file)[0].replace('/','.') 95 | # NOTE: end matt added 96 | module_list.append(exn_type( 97 | name=module_name, 98 | sources=sources, 99 | **kwds)) 100 | m = module_list[-1] 101 | seen.add(name) 102 | return module_list 103 | 104 | true_cythonize = Cython.Build.cythonize 105 | true_create_extension_list = Cython.Build.Dependencies.create_extension_list 106 | 107 | def cythonize(*args,**kwargs): 108 | Cython.Build.Dependencies.create_extension_list = create_extension_list 109 | out = true_cythonize(*args,**kwargs) 110 | Cython.Build.Dependencies.create_extension_list = true_create_extension_list 111 | return out 112 | 113 | -------------------------------------------------------------------------------- /pybasicbayes/util/general.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from future import standard_library 3 | standard_library.install_aliases() 4 | from builtins import next 5 | from builtins import zip 6 | from builtins import range 7 | import sys 8 | import numpy as np 9 | from numpy.lib.stride_tricks import as_strided as ast 10 | import scipy.linalg 11 | import scipy.linalg.lapack as lapack 12 | import copy, collections, os, shutil, hashlib 13 | from contextlib import closing 14 | from itertools import chain, count 15 | from functools import reduce 16 | from urllib.request import urlopen # py2.7 covered by standard_library.install_aliases() 17 | 18 | 19 | def blockarray(*args,**kwargs): 20 | return np.array(np.bmat(*args,**kwargs),copy=False) 21 | 22 | def interleave(*iterables): 23 | return list(chain.from_iterable(zip(*iterables))) 24 | 25 | def joindicts(dicts): 26 | # stuff on right clobbers stuff on left 27 | return reduce(lambda x,y: dict(x,**y), dicts, {}) 28 | 29 | def one_vs_all(stuff): 30 | stuffset = set(stuff) 31 | for thing in stuff: 32 | yield thing, stuffset - set([thing]) 33 | 34 | def rle(stateseq): 35 | pos, = np.where(np.diff(stateseq) != 0) 36 | pos = np.concatenate(([0],pos+1,[len(stateseq)])) 37 | return stateseq[pos[:-1]], np.diff(pos) 38 | 39 | def irle(vals,lens): 40 | out = np.empty(np.sum(lens)) 41 | for v,l,start in zip(vals,lens,np.concatenate(((0,),np.cumsum(lens)[:-1]))): 42 | out[start:start+l] = v 43 | return out 44 | 45 | def ibincount(counts): 46 | 'returns an array a such that counts = np.bincount(a)' 47 | return np.repeat(np.arange(counts.shape[0]),counts) 48 | 49 | def cumsum(v,strict=False): 50 | if not strict: 51 | return np.cumsum(v,axis=0) 52 | else: 53 | out = np.zeros_like(v) 54 | out[1:] = np.cumsum(v[:-1],axis=0) 55 | return out 56 | 57 | def rcumsum(v,strict=False): 58 | if not strict: 59 | return np.cumsum(v[::-1],axis=0)[::-1] 60 | else: 61 | out = np.zeros_like(v) 62 | out[:-1] = np.cumsum(v[-1:0:-1],axis=0)[::-1] 63 | return out 64 | 65 | def delta_like(v,i): 66 | out = np.zeros_like(v) 67 | out[i] = 1 68 | return out 69 | 70 | def deepcopy(obj): 71 | return copy.deepcopy(obj) 72 | 73 | def nice_indices(arr): 74 | ''' 75 | takes an array like [1,1,5,5,5,999,1,1] 76 | and maps to something like [0,0,1,1,1,2,0,0] 77 | modifies original in place as well as returns a ref 78 | ''' 79 | # surprisingly, this is slower for very small (and very large) inputs: 80 | # u,f,i = np.unique(arr,return_index=True,return_inverse=True) 81 | # arr[:] = np.arange(u.shape[0])[np.argsort(f)][i] 82 | ids = collections.defaultdict(count().__next__) 83 | for idx,x in enumerate(arr): 84 | arr[idx] = ids[x] 85 | return arr 86 | 87 | def ndargmax(arr): 88 | return np.unravel_index(np.argmax(np.ravel(arr)),arr.shape) 89 | 90 | def match_by_overlap(a,b): 91 | assert a.ndim == b.ndim == 1 and a.shape[0] == b.shape[0] 92 | ais, bjs = list(set(a)), list(set(b)) 93 | scores = np.zeros((len(ais),len(bjs))) 94 | for i,ai in enumerate(ais): 95 | for j,bj in enumerate(bjs): 96 | scores[i,j] = np.dot(np.array(a==ai,dtype=np.float),b==bj) 97 | 98 | flip = len(bjs) > len(ais) 99 | 100 | if flip: 101 | ais, bjs = bjs, ais 102 | scores = scores.T 103 | 104 | matching = [] 105 | while scores.size > 0: 106 | i,j = ndargmax(scores) 107 | matching.append((ais[i],bjs[j])) 108 | scores = np.delete(np.delete(scores,i,0),j,1) 109 | ais = np.delete(ais,i) 110 | bjs = np.delete(bjs,j) 111 | 112 | return matching if not flip else [(x,y) for y,x in matching] 113 | 114 | def hamming_error(a,b): 115 | return (a!=b).sum() 116 | 117 | def scoreatpercentile(data,per,axis=0): 118 | 'like the function in scipy.stats but with an axis argument and works on arrays' 119 | a = np.sort(data,axis=axis) 120 | idx = per/100. * (data.shape[axis]-1) 121 | 122 | if (idx % 1 == 0): 123 | return a[[slice(None) if ii != axis else idx for ii in range(a.ndim)]] 124 | else: 125 | lowerweight = 1-(idx % 1) 126 | upperweight = (idx % 1) 127 | idx = int(np.floor(idx)) 128 | return lowerweight * a[[slice(None) if ii != axis else idx for ii in range(a.ndim)]] \ 129 | + upperweight * a[[slice(None) if ii != axis else idx+1 for ii in range(a.ndim)]] 130 | 131 | def stateseq_hamming_error(sampledstates,truestates): 132 | sampledstates = np.array(sampledstates,ndmin=2).copy() 133 | 134 | errors = np.zeros(sampledstates.shape[0]) 135 | for idx,s in enumerate(sampledstates): 136 | # match labels by maximum overlap 137 | matching = match_by_overlap(s,truestates) 138 | s2 = s.copy() 139 | for i,j in matching: 140 | s2[s==i] = j 141 | errors[idx] = hamming_error(s2,truestates) 142 | 143 | return errors if errors.shape[0] > 1 else errors[0] 144 | 145 | def _sieve(stream): 146 | # just for fun; doesn't work over a few hundred 147 | val = next(stream) 148 | yield val 149 | for x in [x for x in _sieve(stream) if x % val != 0]: 150 | yield x 151 | 152 | def primes(): 153 | return _sieve(count(2)) 154 | 155 | def top_eigenvector(A,niter=1000,force_iteration=False): 156 | ''' 157 | assuming the LEFT invariant subspace of A corresponding to the LEFT 158 | eigenvalue of largest modulus has geometric multiplicity of 1 (trivial 159 | Jordan block), returns the vector at the intersection of that eigenspace and 160 | the simplex 161 | 162 | A should probably be a ROW-stochastic matrix 163 | 164 | probably uses power iteration 165 | ''' 166 | n = A.shape[0] 167 | np.seterr(invalid='raise',divide='raise') 168 | if n <= 25 and not force_iteration: 169 | x = np.repeat(1./n,n) 170 | x = np.linalg.matrix_power(A.T,niter).dot(x) 171 | x /= x.sum() 172 | return x 173 | else: 174 | x1 = np.repeat(1./n,n) 175 | x2 = x1.copy() 176 | for itr in range(niter): 177 | np.dot(A.T,x1,out=x2) 178 | x2 /= x2.sum() 179 | x1,x2 = x2,x1 180 | if np.linalg.norm(x1-x2) < 1e-8: 181 | break 182 | return x1 183 | 184 | def engine_global_namespace(f): 185 | # see IPython.parallel.util.interactive; it's copied here so as to avoid 186 | # extra imports/dependences elsewhere, and to provide a slightly clearer 187 | # name 188 | f.__module__ = '__main__' 189 | return f 190 | 191 | def block_view(a,block_shape): 192 | shape = (a.shape[0]/block_shape[0],a.shape[1]/block_shape[1]) + block_shape 193 | strides = (a.strides[0]*block_shape[0],a.strides[1]*block_shape[1]) + a.strides 194 | return ast(a,shape=shape,strides=strides) 195 | 196 | def AR_striding(data,nlags): 197 | data = np.asarray(data) 198 | if not data.flags.c_contiguous: 199 | data = data.copy(order='C') 200 | if data.ndim == 1: 201 | data = np.reshape(data,(-1,1)) 202 | sz = data.dtype.itemsize 203 | return ast( 204 | data, 205 | shape=(data.shape[0]-nlags,data.shape[1]*(nlags+1)), 206 | strides=(data.shape[1]*sz,sz)) 207 | 208 | def count_transitions(stateseq,minlength=None): 209 | if minlength is None: 210 | minlength = stateseq.max() + 1 211 | out = np.zeros((minlength,minlength),dtype=np.int32) 212 | for a,b in zip(stateseq[:-1],stateseq[1:]): 213 | out[a,b] += 1 214 | return out 215 | 216 | ### SGD 217 | 218 | def sgd_steps(tau,kappa): 219 | assert 0.5 < kappa <= 1 and tau >= 0 220 | for t in count(1): 221 | yield (t+tau)**(-kappa) 222 | 223 | def hold_out(datalist,frac): 224 | N = len(datalist) 225 | perm = np.random.permutation(N) 226 | split = int(np.ceil(frac * N)) 227 | return [datalist[i] for i in perm[split:]], [datalist[i] for i in perm[:split]] 228 | 229 | def sgd_passes(tau,kappa,datalist,minibatchsize=1,npasses=1): 230 | N = len(datalist) 231 | 232 | for superitr in range(npasses): 233 | if minibatchsize == 1: 234 | perm = np.random.permutation(N) 235 | for idx, rho_t in zip(perm,sgd_steps(tau,kappa)): 236 | yield datalist[idx], rho_t 237 | else: 238 | minibatch_indices = np.array_split(np.random.permutation(N),N/minibatchsize) 239 | for indices, rho_t in zip(minibatch_indices,sgd_steps(tau,kappa)): 240 | yield [datalist[idx] for idx in indices], rho_t 241 | 242 | def sgd_sampling(tau,kappa,datalist,minibatchsize=1): 243 | N = len(datalist) 244 | if minibatchsize == 1: 245 | for rho_t in sgd_steps(tau,kappa): 246 | minibatch_index = np.random.choice(N) 247 | yield datalist[minibatch_index], rho_t 248 | else: 249 | for rho_t in sgd_steps(tau,kappa): 250 | minibatch_indices = np.random.choice(N,size=minibatchsize,replace=False) 251 | yield [datalist[idx] for idx in minibatch_indices], rho_t 252 | 253 | # TODO should probably eliminate this function 254 | def minibatchsize(lst): 255 | return float(sum(d.shape[0] for d in lst)) 256 | 257 | ### misc 258 | 259 | def random_subset(lst,sz): 260 | perm = np.random.permutation(len(lst)) 261 | return [lst[perm[idx]] for idx in range(sz)] 262 | 263 | def get_file(remote_url,local_path): 264 | if not os.path.isfile(local_path): 265 | with closing(urlopen(remote_url)) as remotefile: 266 | with open(local_path,'wb') as localfile: 267 | shutil.copyfileobj(remotefile,localfile) 268 | 269 | def list_split(lst,num): 270 | assert num > 0 271 | return [lst[start::num] for start in range(num)] 272 | 273 | def ndarrayhash(v): 274 | assert isinstance(v,np.ndarray) 275 | return hashlib.sha1(v).hexdigest() 276 | 277 | ### numerical linear algebra 278 | 279 | def inv_psd(A, return_chol=False): 280 | L = np.linalg.cholesky(A) 281 | Ainv = lapack.dpotri(L, lower=True)[0] 282 | copy_lower_to_upper(Ainv) 283 | # if not np.allclose(Ainv, np.linalg.inv(A), rtol=1e-5, atol=1e-5): 284 | # import ipdb; ipdb.set_trace() 285 | if return_chol: 286 | return Ainv, L 287 | else: 288 | return Ainv 289 | 290 | def solve_psd(A,b,chol=None,lower=True,overwrite_b=False,overwrite_A=False): 291 | if chol is None: 292 | return lapack.dposv(A,b,overwrite_b=overwrite_b,overwrite_a=overwrite_A)[1] 293 | else: 294 | return lapack.dpotrs(chol,b,lower,overwrite_b)[0] 295 | 296 | def copy_lower_to_upper(A): 297 | A += np.tril(A,k=-1).T 298 | 299 | 300 | # NOTE: existing numpy object array construction acts a bit weird, e.g. 301 | # np.array([randn(3,4),randn(3,5)]) vs np.array([randn(3,4),randn(5,3)]) 302 | # this wrapper class is just meant to ensure that when ndarrays of objects are 303 | # constructed the construction doesn't "recurse" as in the first example 304 | class ObjArray(np.ndarray): 305 | def __new__(cls,lst): 306 | if isinstance(lst,(np.ndarray,float,int)): 307 | return lst 308 | else: 309 | return np.ndarray.__new__(cls,len(lst),dtype=np.object) 310 | 311 | def __init__(self,lst): 312 | if not isinstance(lst,(np.ndarray,float,int)): 313 | for i, elt in enumerate(lst): 314 | self[i] = self.__class__(elt) 315 | 316 | # Here's an alternative to ObjArray: just construct an obj array from a list 317 | def objarray(lst): 318 | a = np.empty(len(lst), dtype=object) 319 | for i,o in enumerate(lst): 320 | a[i] = o 321 | return a 322 | 323 | def all_none(*args): 324 | return all(_ is None for _ in args) 325 | 326 | def any_none(*args): 327 | return any(_ is None for _ in args) 328 | 329 | -------------------------------------------------------------------------------- /pybasicbayes/util/plot.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from builtins import range 3 | import numpy as np 4 | from matplotlib import pyplot as plt 5 | 6 | def plot_gaussian_2D(mu, lmbda, color='b', centermarker=True,label='',alpha=1.,ax=None,artists=None): 7 | ''' 8 | Plots mean and cov ellipsoid into current axes. Must be 2D. lmbda is a covariance matrix. 9 | ''' 10 | assert len(mu) == 2 11 | ax = ax if ax else plt.gca() 12 | 13 | # TODO if update alpha=0. and our previous alpha is 0., we don't need to 14 | # dirty the artist 15 | 16 | t = np.hstack([np.arange(0,2*np.pi,0.01),0]) 17 | circle = np.vstack([np.sin(t),np.cos(t)]) 18 | ellipse = np.dot(np.linalg.cholesky(lmbda),circle) 19 | 20 | if artists is None: 21 | point = ax.scatter([mu[0]],[mu[1]],marker='D',color=color,s=4,alpha=alpha) \ 22 | if centermarker else None 23 | line, = ax.plot(ellipse[0,:] + mu[0], ellipse[1,:] + mu[1],linestyle='-', 24 | linewidth=2,color=color,label=label,alpha=alpha) 25 | else: 26 | line, point = artists 27 | if centermarker: 28 | point.set_offsets(np.atleast_2d(mu)) 29 | point.set_alpha(alpha) 30 | point.set_color(color) 31 | line.set_xdata(ellipse[0,:] + mu[0]) 32 | line.set_ydata(ellipse[1,:] + mu[1]) 33 | line.set_alpha(alpha) 34 | line.set_color(color) 35 | 36 | return (line, point) if point else (line,) 37 | 38 | 39 | def plot_gaussian_projection(mu, lmbda, vecs, **kwargs): 40 | ''' 41 | Plots a ndim gaussian projected onto 2D vecs, where vecs is a matrix whose two columns 42 | are the subset of some orthonomral basis (e.g. from PCA on samples). 43 | ''' 44 | return plot_gaussian_2D(project_data(mu,vecs),project_ellipsoid(lmbda,vecs),**kwargs) 45 | 46 | 47 | def pca_project_data(data,num_components=2): 48 | # convenience combination of the next two functions 49 | return project_data(data,pca(data,num_components=num_components)) 50 | 51 | 52 | def pca(data,num_components=2): 53 | U,s,Vh = np.linalg.svd(data - np.mean(data,axis=0)) 54 | return Vh.T[:,:num_components] 55 | 56 | 57 | def project_data(data,vecs): 58 | return np.dot(data,vecs.T) 59 | 60 | 61 | def project_ellipsoid(ellipsoid,vecs): 62 | # vecs is a matrix whose columns are a subset of an orthonormal basis 63 | # ellipsoid is a pos def matrix 64 | return np.dot(vecs,np.dot(ellipsoid,vecs.T)) 65 | 66 | 67 | def subplot_gridsize(num): 68 | return sorted(min([(x,int(np.ceil(num/x))) for x in range(1,int(np.floor(np.sqrt(num)))+1)],key=sum)) 69 | -------------------------------------------------------------------------------- /pybasicbayes/util/profiling.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | from future import standard_library 4 | standard_library.install_aliases() 5 | import numpy as np 6 | import sys, io, inspect, os, functools, time, collections 7 | 8 | ### use @timed for really basic timing 9 | 10 | _timings = collections.defaultdict(list) 11 | 12 | def timed(func): 13 | @functools.wraps(func) 14 | def wrapped(*args,**kwargs): 15 | tic = time.time() 16 | out = func(*args,**kwargs) 17 | _timings[func].append(time.time() - tic) 18 | return out 19 | return wrapped 20 | 21 | def show_timings(stream=None): 22 | if stream is None: 23 | stream = sys.stdout 24 | if len(_timings) > 0: 25 | results = [(inspect.getsourcefile(f),f.__name__, 26 | len(vals),np.sum(vals),np.mean(vals),np.std(vals)) 27 | for f, vals in _timings.items()] 28 | filename_lens = max(len(filename) for filename, _, _, _, _, _ in results) 29 | name_lens = max(len(name) for _, name, _, _, _, _ in results) 30 | 31 | fmt = '{:>%d} {:>%d} {:>10} {:>10} {:>10} {:>10}' % (filename_lens, name_lens) 32 | print(fmt.format('file','name','ncalls','tottime','avg time','std dev'), file=stream) 33 | 34 | fmt = '{:>%d} {:>%d} {:>10} {:>10.3} {:>10.3} {:>10.3}' % (filename_lens, name_lens) 35 | print('\n'.join(fmt.format(*tup) for tup in sorted(results)), file=stream) 36 | 37 | ### use @line_profiled for a thin wrapper around line_profiler 38 | 39 | try: 40 | import line_profiler 41 | _prof = line_profiler.LineProfiler() 42 | 43 | def line_profiled(func): 44 | mod = inspect.getmodule(func) 45 | if 'PROFILING' in os.environ or (hasattr(mod,'PROFILING') and mod.PROFILING): 46 | return _prof(func) 47 | return func 48 | 49 | def show_line_stats(stream=None): 50 | _prof.print_stats(stream=stream) 51 | except ImportError: 52 | line_profiled = lambda x: x 53 | 54 | -------------------------------------------------------------------------------- /pybasicbayes/util/stats.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import absolute_import 3 | from builtins import range 4 | import numpy as np 5 | from numpy.random import random 6 | na = np.newaxis 7 | import scipy.stats as stats 8 | import scipy.special as special 9 | import scipy.linalg 10 | from scipy.special import logsumexp 11 | from numpy.core.umath_tests import inner1d 12 | 13 | from .general import any_none, blockarray 14 | 15 | ### data abstraction 16 | 17 | # the data type is ndarrays OR lists of ndarrays 18 | # type Data = ndarray | [ndarray] 19 | 20 | def atleast_2d(data): 21 | # NOTE: can't use np.atleast_2d because if it's 1D we want axis 1 to be the 22 | # singleton and axis 0 to be the sequence index 23 | if data.ndim == 1: 24 | return data.reshape((-1,1)) 25 | return data 26 | 27 | def mask_data(data): 28 | return np.ma.masked_array( 29 | np.nan_to_num(data),np.isnan(data),fill_value=0.,hard_mask=True) 30 | 31 | def gi(data): 32 | out = (np.isnan(atleast_2d(data)).sum(1) == 0).ravel() 33 | return out if len(out) != 0 else None 34 | 35 | def getdatasize(data): 36 | if isinstance(data,np.ma.masked_array): 37 | return data.shape[0] - data.mask.reshape((data.shape[0],-1))[:,0].sum() 38 | elif isinstance(data,np.ndarray): 39 | if len(data) == 0: 40 | return 0 41 | return data[gi(data)].shape[0] 42 | elif isinstance(data,list): 43 | return sum(getdatasize(d) for d in data) 44 | else: 45 | # handle unboxed case for convenience 46 | assert isinstance(data,int) or isinstance(data,float) 47 | return 1 48 | 49 | def getdatadimension(data): 50 | if isinstance(data,np.ndarray): 51 | assert data.ndim > 1 52 | return data.shape[1] 53 | elif isinstance(data,list): 54 | assert len(data) > 0 55 | return getdatadimension(data[0]) 56 | else: 57 | # handle unboxed case for convenience 58 | assert isinstance(data,int) or isinstance(data,float) 59 | return 1 60 | 61 | def combinedata(datas): 62 | ret = [] 63 | for data in datas: 64 | if isinstance(data,np.ma.masked_array): 65 | ret.append(np.ma.compress_rows(data)) 66 | if isinstance(data,np.ndarray): 67 | ret.append(data) 68 | elif isinstance(data,list): 69 | ret.extend(combinedata(data)) 70 | else: 71 | # handle unboxed case for convenience 72 | assert isinstance(data,int) or isinstance(data,float) 73 | ret.append(np.atleast_1d(data)) 74 | return ret 75 | 76 | def flattendata(data): 77 | # data is either an array (possibly a maskedarray) or a list of arrays 78 | if isinstance(data,np.ndarray): 79 | return data 80 | elif isinstance(data,list) or isinstance(data,tuple): 81 | if any(isinstance(d,np.ma.MaskedArray) for d in data): 82 | return np.concatenate([np.ma.compress_rows(d) for d in data]) 83 | else: 84 | return np.concatenate(data) 85 | else: 86 | # handle unboxed case for convenience 87 | assert isinstance(data,int) or isinstance(data,float) 88 | return np.atleast_1d(data) 89 | 90 | ### misc 91 | def update_param(oldv, newv, stepsize): 92 | return oldv * (1 - stepsize) + newv * stepsize 93 | 94 | 95 | def cov(a): 96 | # return np.cov(a,rowvar=0,bias=1) 97 | mu = a.mean(0) 98 | if isinstance(a,np.ma.MaskedArray): 99 | return np.ma.dot(a.T,a)/a.count(0)[0] - np.ma.outer(mu,mu) 100 | else: 101 | return a.T.dot(a)/a.shape[0] - np.outer(mu,mu) 102 | 103 | def normal_cdf(x, mu=0.0, sigma=1.0): 104 | z = (x - mu) / sigma 105 | return 0.5 * special.erfc(-z / np.sqrt(2)) 106 | 107 | 108 | ### Sampling functions 109 | 110 | def sample_gaussian(mu=None,Sigma=None,J=None,h=None): 111 | mean_params = mu is not None and Sigma is not None 112 | info_params = J is not None and h is not None 113 | assert mean_params or info_params 114 | 115 | if not any_none(mu,Sigma): 116 | return np.random.multivariate_normal(mu,Sigma) 117 | else: 118 | from scipy.linalg.lapack import dpotrs 119 | L = np.linalg.cholesky(J) 120 | x = np.random.randn(h.shape[0]) 121 | return scipy.linalg.solve_triangular(L,x,lower=True,trans='T') \ 122 | + dpotrs(L,h,lower=True)[0] 123 | 124 | def sample_truncated_gaussian(mu=0, sigma=1, lb=-np.Inf, ub=np.Inf): 125 | """ 126 | Sample a truncated normal with the specified params. This 127 | is not the most stable way but it works as long as the 128 | truncation region is not too far from the mean. 129 | """ 130 | # Broadcast arrays to be of the same shape 131 | mu, sigma, lb, ub = np.broadcast_arrays(mu, sigma, lb, ub) 132 | shp = mu.shape 133 | if np.allclose(sigma, 0.0): 134 | return mu 135 | 136 | cdflb = normal_cdf(lb, mu, sigma) 137 | cdfub = normal_cdf(ub, mu, sigma) 138 | 139 | # Sample uniformly from the CDF 140 | cdfsamples = cdflb + np.random.rand(*shp) * (cdfub-cdflb) 141 | 142 | # Clip the CDF samples so that we can invert them 143 | cdfsamples = np.clip(cdfsamples, 1e-15, 1-1e-15) 144 | zs = -np.sqrt(2) * special.erfcinv(2 * cdfsamples) 145 | 146 | # Transform the standard normal samples 147 | xs = sigma * zs + mu 148 | xs = np.clip(xs, lb, ub) 149 | 150 | return xs 151 | 152 | def sample_discrete(distn,size=[],dtype=np.int32): 153 | 'samples from a one-dimensional finite pmf' 154 | distn = np.atleast_1d(distn) 155 | assert (distn >=0).all() and distn.ndim == 1 156 | if (0 == distn).all(): 157 | return np.random.randint(distn.shape[0],size=size) 158 | cumvals = np.cumsum(distn) 159 | return np.sum(np.array(random(size))[...,na] * cumvals[-1] > cumvals, axis=-1,dtype=dtype) 160 | 161 | def sample_discrete_from_log(p_log,return_lognorms=False,axis=0,dtype=np.int32): 162 | 'samples log probability array along specified axis' 163 | lognorms = logsumexp(p_log,axis=axis) 164 | cumvals = np.exp(p_log - np.expand_dims(lognorms,axis)).cumsum(axis) 165 | thesize = np.array(p_log.shape) 166 | thesize[axis] = 1 167 | randvals = random(size=thesize) * \ 168 | np.reshape(cumvals[[slice(None) if i is not axis else -1 169 | for i in range(p_log.ndim)]],thesize) 170 | samples = np.sum(randvals > cumvals,axis=axis,dtype=dtype) 171 | if return_lognorms: 172 | return samples, lognorms 173 | else: 174 | return samples 175 | 176 | def sample_markov(T,trans_matrix,init_state_distn): 177 | out = np.empty(T,dtype=np.int32) 178 | out[0] = sample_discrete(init_state_distn) 179 | for t in range(1,T): 180 | out[t] = sample_discrete(trans_matrix[out[t-1]]) 181 | return out 182 | 183 | def sample_invgamma(alpha, beta): 184 | return 1./np.random.gamma(alpha, 1./beta) 185 | 186 | def niw_expectedstats(nu, S, m, kappa): 187 | D = m.shape[0] 188 | 189 | # TODO speed this up with cholesky of S 190 | E_J = nu * np.linalg.inv(S) 191 | E_h = nu * np.linalg.solve(S,m) 192 | E_muJmuT = D/kappa + m.dot(E_h) 193 | E_logdetSigmainv = special.digamma((nu-np.arange(D))/2.).sum() \ 194 | + D*np.log(2.) - np.linalg.slogdet(S)[1] 195 | 196 | return E_J, E_h, E_muJmuT, E_logdetSigmainv 197 | 198 | 199 | def sample_niw(mu,lmbda,kappa,nu): 200 | ''' 201 | Returns a sample from the normal/inverse-wishart distribution, conjugate 202 | prior for (simultaneously) unknown mean and unknown covariance in a 203 | Gaussian likelihood model. Returns covariance. 204 | ''' 205 | # code is based on Matlab's method 206 | # reference: p. 87 in Gelman's Bayesian Data Analysis 207 | assert nu > lmbda.shape[0] and kappa > 0 208 | 209 | # first sample Sigma ~ IW(lmbda,nu) 210 | lmbda = sample_invwishart(lmbda,nu) 211 | # then sample mu | Lambda ~ N(mu, Lambda/kappa) 212 | mu = np.random.multivariate_normal(mu,lmbda / kappa) 213 | 214 | return mu, lmbda 215 | 216 | def sample_invwishart(S,nu): 217 | # TODO make a version that returns the cholesky 218 | # TODO allow passing in chol/cholinv of matrix parameter lmbda 219 | # TODO lowmem! memoize! dchud (eigen?) 220 | n = S.shape[0] 221 | chol = np.linalg.cholesky(S) 222 | 223 | if (nu <= 81+n) and (nu == np.round(nu)): 224 | x = np.random.randn(int(nu),n) 225 | else: 226 | x = np.diag(np.sqrt(np.atleast_1d(stats.chi2.rvs(nu-np.arange(n))))) 227 | x[np.triu_indices_from(x,1)] = np.random.randn(n*(n-1)//2) 228 | R = np.linalg.qr(x,'r') 229 | T = scipy.linalg.solve_triangular(R.T,chol.T,lower=True).T 230 | return np.dot(T,T.T) 231 | 232 | def sample_wishart(sigma, nu): 233 | n = sigma.shape[0] 234 | chol = np.linalg.cholesky(sigma) 235 | 236 | # use matlab's heuristic for choosing between the two different sampling schemes 237 | if (nu <= 81+n) and (nu == round(nu)): 238 | # direct 239 | X = np.dot(chol,np.random.normal(size=(n,nu))) 240 | else: 241 | A = np.diag(np.sqrt(np.random.chisquare(nu - np.arange(n)))) 242 | A[np.tri(n,k=-1,dtype=bool)] = np.random.normal(size=(n*(n-1)/2.)) 243 | X = np.dot(chol,A) 244 | 245 | return np.dot(X,X.T) 246 | 247 | def sample_mn(M, U=None, Uinv=None, V=None, Vinv=None): 248 | assert (U is None) ^ (Uinv is None) 249 | assert (V is None) ^ (Vinv is None) 250 | 251 | G = np.random.normal(size=M.shape) 252 | 253 | if U is not None: 254 | G = np.dot(np.linalg.cholesky(U),G) 255 | else: 256 | G = np.linalg.solve(np.linalg.cholesky(Uinv).T,G) 257 | 258 | if V is not None: 259 | G = np.dot(G,np.linalg.cholesky(V).T) 260 | else: 261 | G = np.linalg.solve(np.linalg.cholesky(Vinv).T,G.T).T 262 | 263 | return M + G 264 | 265 | def sample_mniw(nu, S, M, K=None, Kinv=None): 266 | assert (K is None) ^ (Kinv is None) 267 | Sigma = sample_invwishart(S,nu) 268 | if K is not None: 269 | return sample_mn(M=M,U=Sigma,V=K), Sigma 270 | else: 271 | return sample_mn(M=M,U=Sigma,Vinv=Kinv), Sigma 272 | 273 | def mniw_expectedstats(nu, S, M, K=None, Kinv=None): 274 | # NOTE: could speed this up with chol factorizing S, not re-solving 275 | assert (K is None) ^ (Kinv is None) 276 | m = M.shape[0] 277 | K = K if K is not None else np.linalg.inv(Kinv) 278 | 279 | E_Sigmainv = nu*np.linalg.inv(S) 280 | E_Sigmainv_A = nu*np.linalg.solve(S,M) 281 | E_AT_Sigmainv_A = m*K + nu*M.T.dot(np.linalg.solve(S,M)) 282 | E_logdetSigmainv = special.digamma((nu-np.arange(m))/2.).sum() \ 283 | + m*np.log(2) - np.linalg.slogdet(S)[1] 284 | 285 | return E_Sigmainv, E_Sigmainv_A, E_AT_Sigmainv_A, E_logdetSigmainv 286 | 287 | def mniw_log_partitionfunction(nu, S, M, K): 288 | n = M.shape[0] 289 | return n*nu/2*np.log(2) + special.multigammaln(nu/2., n) \ 290 | - nu/2*np.linalg.slogdet(S)[1] - n/2*np.linalg.slogdet(K)[1] 291 | 292 | def sample_pareto(x_m,alpha): 293 | return x_m + np.random.pareto(alpha) 294 | 295 | def sample_crp_tablecounts(concentration,customers,colweights): 296 | m = np.zeros_like(customers) 297 | tot = customers.sum() 298 | randseq = np.random.random(tot) 299 | 300 | starts = np.empty_like(customers) 301 | starts[0,0] = 0 302 | starts.flat[1:] = np.cumsum(np.ravel(customers)[:customers.size-1]) 303 | 304 | for (i,j), n in np.ndenumerate(customers): 305 | w = colweights[j] 306 | for k in range(n): 307 | m[i,j] += randseq[starts[i,j]+k] \ 308 | < (concentration * w) / (k + concentration * w) 309 | 310 | return m 311 | 312 | ### Entropy 313 | def invwishart_entropy(sigma,nu,chol=None): 314 | D = sigma.shape[0] 315 | chol = np.linalg.cholesky(sigma) if chol is None else chol 316 | Elogdetlmbda = special.digamma((nu-np.arange(D))/2).sum() + D*np.log(2) - 2*np.log(chol.diagonal()).sum() 317 | return invwishart_log_partitionfunction(sigma,nu,chol)-(nu-D-1)/2*Elogdetlmbda + nu*D/2 318 | 319 | def invwishart_log_partitionfunction(sigma,nu,chol=None): 320 | # In Bishop B.79 notation, this is -log B(W, nu), where W = sigma^{-1} 321 | D = sigma.shape[0] 322 | chol = np.linalg.cholesky(sigma) if chol is None else chol 323 | return -1*(nu*np.log(chol.diagonal()).sum() - (nu*D/2*np.log(2) + D*(D-1)/4*np.log(np.pi) \ 324 | + special.gammaln((nu-np.arange(D))/2).sum())) 325 | 326 | ### Predictive 327 | 328 | def multivariate_t_loglik(y,nu,mu,lmbda): 329 | # returns the log value 330 | d = len(mu) 331 | yc = np.array(y-mu,ndmin=2) 332 | L = np.linalg.cholesky(lmbda) 333 | ys = scipy.linalg.solve_triangular(L,yc.T,overwrite_b=True,lower=True) 334 | return scipy.special.gammaln((nu+d)/2.) - scipy.special.gammaln(nu/2.) \ 335 | - (d/2.)*np.log(nu*np.pi) - np.log(L.diagonal()).sum() \ 336 | - (nu+d)/2.*np.log1p(1./nu*inner1d(ys.T,ys.T)) 337 | 338 | def beta_predictive(priorcounts,newcounts): 339 | prior_nsuc, prior_nfail = priorcounts 340 | nsuc, nfail = newcounts 341 | 342 | numer = scipy.special.gammaln(np.array([nsuc+prior_nsuc, 343 | nfail+prior_nfail, prior_nsuc+prior_nfail])).sum() 344 | denom = scipy.special.gammaln(np.array([prior_nsuc, prior_nfail, 345 | prior_nsuc+prior_nfail+nsuc+nfail])).sum() 346 | return numer - denom 347 | 348 | ### Statistical tests 349 | 350 | def two_sample_t_statistic(pop1, pop2): 351 | pop1, pop2 = (flattendata(p) for p in (pop1, pop2)) 352 | t = (pop1.mean(0) - pop2.mean(0)) / np.sqrt(pop1.var(0)/pop1.shape[0] + pop2.var(0)/pop2.shape[0]) 353 | p = 2*stats.t.sf(np.abs(t),np.minimum(pop1.shape[0],pop2.shape[0])) 354 | return t,p 355 | 356 | def f_statistic(pop1, pop2): # TODO test 357 | pop1, pop2 = (flattendata(p) for p in (pop1, pop2)) 358 | var1, var2 = pop1.var(0), pop2.var(0) 359 | n1, n2 = np.where(var1 >= var2, pop1.shape[0], pop2.shape[0]), \ 360 | np.where(var1 >= var2, pop2.shape[0], pop1.shape[0]) 361 | var1, var2 = np.maximum(var1,var2), np.minimum(var1,var2) 362 | f = var1 / var2 363 | p = stats.f.sf(f,n1,n2) 364 | return f,p 365 | 366 | -------------------------------------------------------------------------------- /pybasicbayes/util/testing.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import absolute_import 3 | from builtins import zip 4 | import numpy as np 5 | from numpy import newaxis as na 6 | 7 | from . import stats, general 8 | 9 | ######################### 10 | # statistical testing # 11 | ######################### 12 | 13 | ### graphical 14 | 15 | def populations_eq_quantile_plot(pop1, pop2, fig=None, percentilecutoff=5): 16 | import matplotlib.pyplot as plt 17 | 18 | pop1, pop2 = stats.flattendata(pop1), stats.flattendata(pop2) 19 | assert pop1.ndim == pop2.ndim == 1 or \ 20 | (pop1.ndim == pop2.ndim == 2 and pop1.shape[1] == pop2.shape[1]), \ 21 | 'populations must have consistent dimensions' 22 | D = pop1.shape[1] if pop1.ndim == 2 else 1 23 | 24 | # we want to have the same number of samples 25 | n1, n2 = pop1.shape[0], pop2.shape[0] 26 | if n1 != n2: 27 | # subsample, since interpolation is dangerous 28 | if n1 < n2: 29 | pop1, pop2 = pop2, pop1 30 | np.random.shuffle(pop1) 31 | pop1 = pop1[:pop2.shape[0]] 32 | 33 | def plot_1d_scaled_quantiles(p1,p2,plot_midline=True): 34 | 35 | # scaled quantiles so that multiple calls line up 36 | p1.sort(), p2.sort() # NOTE: destructive! but that's cool 37 | xmin,xmax = general.scoreatpercentile(p1,percentilecutoff), \ 38 | general.scoreatpercentile(p1,100-percentilecutoff) 39 | ymin,ymax = general.scoreatpercentile(p2,percentilecutoff), \ 40 | general.scoreatpercentile(p2,100-percentilecutoff) 41 | plt.plot((p1-xmin)/(xmax-xmin),(p2-ymin)/(ymax-ymin)) 42 | 43 | if plot_midline: 44 | plt.plot((0,1),(0,1),'k--') 45 | plt.axis((0,1,0,1)) 46 | 47 | if D == 1: 48 | if fig is None: 49 | plt.figure() 50 | plot_1d_scaled_quantiles(pop1,pop2) 51 | else: 52 | if fig is None: 53 | fig = plt.figure() 54 | 55 | if not hasattr(fig,'_quantile_test_projs'): 56 | firsttime = True 57 | randprojs = np.random.randn(D,D) 58 | randprojs /= np.sqrt(np.sum(randprojs**2,axis=1))[:,na] 59 | projs = np.vstack((np.eye(D),randprojs)) 60 | fig._quantile_test_projs = projs 61 | else: 62 | firsttime = False 63 | projs = fig._quantile_test_projs 64 | 65 | ims1, ims2 = pop1.dot(projs.T), pop2.dot(projs.T) 66 | for i, (im1, im2) in enumerate(zip(ims1.T,ims2.T)): 67 | plt.subplot(2,D,i+1) 68 | plot_1d_scaled_quantiles(im1,im2,plot_midline=firsttime) 69 | 70 | ### numerical 71 | 72 | # NOTE: a random numerical test should be repeated at the OUTERMOST loop (with 73 | # exception catching) to see if its failures exceed the number expected 74 | # according to the specified pvalue (tests could be repeated via sample 75 | # bootstrapping inside the test, but that doesn't work reliably and random tests 76 | # should have no problem generating new randomness!) 77 | 78 | def assert_populations_eq(pop1, pop2): 79 | assert_populations_eq_moments(pop1,pop2) and \ 80 | assert_populations_eq_komolgorofsmirnov(pop1,pop2) 81 | 82 | def assert_populations_eq_moments(pop1, pop2, **kwargs): 83 | # just first two moments implemented; others are hard to estimate anyway! 84 | assert_populations_eq_means(pop1,pop2,**kwargs) and \ 85 | assert_populations_eq_variances(pop1,pop2,**kwargs) 86 | 87 | def assert_populations_eq_means(pop1, pop2, pval=0.05, msg=None): 88 | _,p = stats.two_sample_t_statistic(pop1,pop2) 89 | if np.any(p < pval): 90 | raise AssertionError(msg or "population means might be different at %0.3f" % pval) 91 | 92 | def assert_populations_eq_variances(pop1, pop2, pval=0.05, msg=None): 93 | _,p = stats.f_statistic(pop1, pop2) 94 | if np.any(p < pval): 95 | raise AssertionError(msg or "population variances might be different at %0.3f" % pval) 96 | 97 | def assert_populations_eq_komolgorofsmirnov(pop1, pop2, msg=None): 98 | raise NotImplementedError # TODO 99 | 100 | -------------------------------------------------------------------------------- /pybasicbayes/util/text.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from builtins import range 3 | import numpy as np 4 | import sys, time 5 | 6 | # time.clock() is cpu time of current process 7 | # time.time() is wall time 8 | 9 | # TODO there are probably better progress bar libraries I could use 10 | 11 | round = (lambda x: lambda y: int(x(y)))(round) 12 | 13 | # NOTE: datetime.timedelta.__str__ doesn't allow formatting the number of digits 14 | def sec2str(seconds): 15 | hours, rem = divmod(seconds,3600) 16 | minutes, seconds = divmod(rem,60) 17 | if hours > 0: 18 | return '%02d:%02d:%02d' % (hours,minutes,round(seconds)) 19 | elif minutes > 0: 20 | return '%02d:%02d' % (minutes,round(seconds)) 21 | else: 22 | return '%0.2f' % seconds 23 | 24 | def progprint_xrange(*args,**kwargs): 25 | xr = range(*args) 26 | return progprint(xr,total=len(xr),**kwargs) 27 | 28 | def progprint(iterator,total=None,perline=25,show_times=True): 29 | times = [] 30 | idx = 0 31 | if total is not None: 32 | numdigits = len('%d' % total) 33 | for thing in iterator: 34 | prev_time = time.time() 35 | yield thing 36 | times.append(time.time() - prev_time) 37 | sys.stdout.write('.') 38 | if (idx+1) % perline == 0: 39 | if show_times: 40 | avgtime = np.mean(times) 41 | if total is not None: 42 | eta = sec2str(avgtime*(total-(idx+1))) 43 | sys.stdout.write(( 44 | ' [ %%%dd/%%%dd, %%7.2fsec avg, ETA %%s ]\n' 45 | % (numdigits,numdigits)) % (idx+1,total,avgtime,eta)) 46 | else: 47 | sys.stdout.write(' [ %d done, %7.2fsec avg ]\n' % (idx+1,avgtime)) 48 | else: 49 | if total is not None: 50 | sys.stdout.write((' [ %%%dd/%%%dd ]\n' % (numdigits,numdigits) ) % (idx+1,total)) 51 | else: 52 | sys.stdout.write(' [ %d ]\n' % (idx+1)) 53 | idx += 1 54 | sys.stdout.flush() 55 | print('') 56 | if show_times and len(times) > 0: 57 | total = sec2str(seconds=np.sum(times)) 58 | print('%7.2fsec avg, %s total\n' % (np.mean(times),total)) 59 | 60 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from setuptools.command.build_ext import build_ext as _build_ext 3 | from setuptools.command.sdist import sdist as _sdist 4 | from distutils.errors import CompileError 5 | from warnings import warn 6 | import os.path 7 | 8 | try: 9 | from Cython.Distutils import build_ext as _build_ext 10 | except ImportError: 11 | use_cython = False 12 | else: 13 | use_cython = True 14 | 15 | class build_ext(_build_ext): 16 | # see http://stackoverflow.com/q/19919905 for explanation 17 | def finalize_options(self): 18 | _build_ext.finalize_options(self) 19 | __builtins__.__NUMPY_SETUP__ = False 20 | import numpy as np 21 | self.include_dirs.append(np.get_include()) 22 | 23 | # if extension modules fail to build, keep going anyway 24 | def run(self): 25 | try: 26 | _build_ext.run(self) 27 | except CompileError: 28 | warn('Failed to build extension modules') 29 | 30 | class sdist(_sdist): 31 | def run(self): 32 | try: 33 | from Cython.Build import cythonize 34 | cythonize(os.path.join('pybasicbayes','**','*.pyx')) 35 | except: 36 | warn('Failed to generate extension files from Cython sources') 37 | finally: 38 | _sdist.run(self) 39 | 40 | ext_modules=[ 41 | Extension( 42 | 'pybasicbayes.util.cstats', ['pybasicbayes/util/cstats.c'], 43 | extra_compile_args=['-O3','-w']), 44 | ] 45 | 46 | if use_cython: 47 | from Cython.Build import cythonize 48 | try: 49 | ext_modules = cythonize(os.path.join('pybasicbayes','**','*.pyx')) 50 | except: 51 | warn('Failed to generate extension module code from Cython files') 52 | 53 | setup(name='pybasicbayes', 54 | version='0.2.4', 55 | description="Basic utilities for Bayesian inference", 56 | author='Matthew James Johnson', 57 | author_email='mattjj@csail.mit.edu', 58 | url="http://github.com/mattjj/pybasicbayes", 59 | packages=[ 60 | 'pybasicbayes', 'pybasicbayes.distributions', 61 | 'pybasicbayes.util', 'pybasicbayes.testing', 'pybasicbayes.models'], 62 | platforms='ALL', 63 | keywords=[ 64 | 'bayesian', 'inference', 'mcmc', 'variational inference', 65 | 'mean field', 'vb'], 66 | install_requires=["numpy", "scipy", "matplotlib", "nose", "future"], 67 | setup_requires=['numpy'], 68 | classifiers=[ 69 | 'Intended Audience :: Science/Research', 70 | 'Programming Language :: Python', 71 | ], 72 | ext_modules=ext_modules, 73 | cmdclass={'build_ext': build_ext, 'sdist': sdist}) 74 | -------------------------------------------------------------------------------- /tests/test_categorical.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | 4 | from nose.plugins.attrib import attr 5 | 6 | import pybasicbayes.distributions as distributions 7 | from pybasicbayes.testing.mixins import BigDataGibbsTester, \ 8 | GewekeGibbsTester 9 | 10 | 11 | @attr('categorical') 12 | class TestCategorical(BigDataGibbsTester,GewekeGibbsTester): 13 | @property 14 | def distribution_class(self): 15 | return distributions.Categorical 16 | 17 | @property 18 | def hyperparameter_settings(self): 19 | return (dict(alpha_0=5.,K=5),) 20 | 21 | @property 22 | def big_data_size(self): 23 | return 20000 24 | 25 | def params_close(self,d1,d2): 26 | return np.allclose(d1.weights,d2.weights,atol=0.05) 27 | 28 | def geweke_statistics(self,d,data): 29 | return d.weights 30 | 31 | @property 32 | def geweke_pval(self): 33 | return 0.05 34 | 35 | 36 | @attr('categorical_concentration') 37 | class TestCategorical(GewekeGibbsTester): 38 | #class TestCategorical(BigDataGibbsTester,GewekeGibbsTester): 39 | @property 40 | def distribution_class(self): 41 | return distributions.CategoricalAndConcentration 42 | 43 | @property 44 | def hyperparameter_settings(self): 45 | return (dict(a_0=5., b_0=5.0, K=5),) 46 | 47 | @property 48 | def big_data_size(self): 49 | return 20000 50 | 51 | def params_close(self,d1,d2): 52 | return np.allclose(d1.weights,d2.weights,atol=0.05) and \ 53 | np.allclose(d1.alpha_0,d2.alpha_0,atol=0.05) 54 | 55 | def geweke_statistics(self,d,data): 56 | #return np.concatenate((d.weights, [d.alpha_0])) 57 | return np.array(d.alpha_0) 58 | 59 | @property 60 | def geweke_nsamples(self): 61 | return 3000 62 | 63 | @property 64 | def geweke_data_size(self): 65 | return 1 # NOTE: more data usually means slower mixing 66 | 67 | @property 68 | def geweke_ntrials(self): 69 | return 1 70 | 71 | @property 72 | def geweke_pval(self): 73 | return 0.05 74 | -------------------------------------------------------------------------------- /tests/test_gammadirichlet.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from builtins import range 3 | from builtins import object 4 | import numpy as np 5 | 6 | from nose.plugins.attrib import attr 7 | 8 | import pybasicbayes.distributions as distributions 9 | from pybasicbayes.testing.mixins import GewekeGibbsTester 10 | 11 | 12 | @attr('GammaCompoundDirichlet', 'slow') 13 | class TestDirichletCompoundGamma(GewekeGibbsTester): 14 | def test_weaklimit(self): 15 | a = distributions.CRP(10,1) 16 | b = distributions.GammaCompoundDirichlet(1000,10,1) 17 | 18 | a.concentration = b.concentration = 10. 19 | 20 | from matplotlib import pyplot as plt 21 | 22 | plt.figure() 23 | crp_counts = np.zeros(10) 24 | gcd_counts = np.zeros(10) 25 | for itr in range(500): 26 | crp_rvs = np.sort(a.rvs(25))[::-1][:10] 27 | crp_counts[:len(crp_rvs)] += crp_rvs 28 | gcd_counts += np.sort(b.rvs(25))[::-1][:10] 29 | 30 | plt.plot(crp_counts/200,gcd_counts/200,'bx-') 31 | plt.xlim(0,10) 32 | plt.ylim(0,10) 33 | 34 | import os 35 | from pybasicbayes.testing.mixins import mkdir 36 | figpath = os.path.join( 37 | os.path.dirname(__file__),'figures', 38 | self.__class__.__name__,'weaklimittest.pdf') 39 | mkdir(os.path.dirname(figpath)) 40 | plt.savefig(figpath) 41 | 42 | @property 43 | def distribution_class(self): 44 | return distributions.GammaCompoundDirichlet 45 | 46 | @property 47 | def hyperparameter_settings(self): 48 | return (dict(K=1000, a_0=10, b_0=1),) 49 | 50 | def geweke_statistics(self, d, data): 51 | return np.array([d.concentration]) 52 | 53 | @property 54 | def geweke_resample_kwargs(self): 55 | return dict(niter=1) 56 | -------------------------------------------------------------------------------- /tests/test_gaussian.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | 4 | from nose.plugins.attrib import attr 5 | 6 | import pybasicbayes.distributions as distributions 7 | from pybasicbayes.testing.mixins import BigDataGibbsTester, \ 8 | GewekeGibbsTester, BasicTester 9 | 10 | 11 | @attr('gaussian') 12 | class TestGaussian(BigDataGibbsTester,GewekeGibbsTester): 13 | @property 14 | def distribution_class(self): 15 | return distributions.Gaussian 16 | 17 | @property 18 | def hyperparameter_settings(self): 19 | return (dict(mu_0=np.zeros(2),sigma_0=np.eye(2),kappa_0=1.,nu_0=4.),) 20 | 21 | def params_close(self,d1,d2): 22 | return np.linalg.norm(d1.mu-d2.mu) < 0.1 and np.linalg.norm(d1.sigma-d2.sigma) < 0.1 23 | 24 | def geweke_statistics(self,d,data): 25 | return np.concatenate((d.mu,np.diag(d.sigma))) 26 | 27 | @property 28 | def geweke_nsamples(self): 29 | return 50000 30 | 31 | @property 32 | def geweke_data_size(self): 33 | return 1 34 | 35 | @property 36 | def geweke_pval(self): 37 | return 0.05 38 | 39 | def geweke_numerical_slice(self,d,setting_idx): 40 | return slice(0,d.mu.shape[0]) 41 | 42 | ### class-specific 43 | 44 | def test_empirical_bayes(self): 45 | data = np.random.randn(50,2) 46 | distributions.Gaussian().empirical_bayes(data).hypparams 47 | 48 | 49 | @attr('diagonalgaussian') 50 | class TestDiagonalGaussian(BigDataGibbsTester,GewekeGibbsTester,BasicTester): 51 | @property 52 | def distribution_class(self): 53 | return distributions.DiagonalGaussian 54 | 55 | @property 56 | def hyperparameter_settings(self): 57 | return (dict(mu_0=np.zeros(2),nus_0=7,alphas_0=np.r_[5.,10.],betas_0=np.r_[1.,4.]),) 58 | 59 | def params_close(self,d1,d2): 60 | return np.linalg.norm(d1.mu-d2.mu) < 0.1 and np.linalg.norm(d1.sigmas-d2.sigmas) < 0.25 61 | 62 | def geweke_statistics(self,d,data): 63 | return np.concatenate((d.mu,d.sigmas)) 64 | 65 | @property 66 | def geweke_nsamples(self): 67 | return 50000 68 | 69 | @property 70 | def geweke_data_size(self): 71 | return 2 72 | 73 | @property 74 | def geweke_pval(self): 75 | return 0.05 76 | 77 | def geweke_numerical_slice(self,d,setting_idx): 78 | return slice(0,d.mu.shape[0]) 79 | 80 | ### class-specific 81 | 82 | def test_log_likelihood(self): 83 | data = np.random.randn(1000,100) 84 | 85 | mu = np.random.randn(100) 86 | sigmas = np.random.uniform(1,2,size=100) 87 | 88 | d = distributions.DiagonalGaussian(mu=mu,sigmas=sigmas) 89 | pdf1 = d.log_likelihood(data) 90 | 91 | import scipy.stats as stats 92 | pdf2 = stats.norm.logpdf(data,loc=mu,scale=np.sqrt(sigmas)).sum(1) 93 | 94 | assert np.allclose(pdf1,pdf2) 95 | 96 | def test_log_likelihood2(self): 97 | data = np.random.randn(1000,600) 98 | 99 | mu = np.random.randn(600) 100 | sigmas = np.random.uniform(1,2,size=600) 101 | 102 | d = distributions.DiagonalGaussian(mu=mu,sigmas=sigmas) 103 | pdf1 = d.log_likelihood(data) 104 | 105 | import scipy.stats as stats 106 | pdf2 = stats.norm.logpdf(data,loc=mu,scale=np.sqrt(sigmas)).sum(1) 107 | 108 | assert np.allclose(pdf1,pdf2) 109 | 110 | 111 | @attr('diagonalgaussiannonconj') 112 | class TestDiagonalGaussianNonconjNIG(BigDataGibbsTester,GewekeGibbsTester,BasicTester): 113 | @property 114 | def distribution_class(self): 115 | return distributions.DiagonalGaussianNonconjNIG 116 | 117 | @property 118 | def hyperparameter_settings(self): 119 | return ( 120 | dict(mu_0=np.zeros(2),sigmas_0=np.ones(2),alpha_0=np.ones(2),beta_0=np.ones(2)), 121 | dict(mu_0=np.zeros(600),sigmas_0=np.ones(600),alpha_0=np.ones(600),beta_0=np.ones(600)), 122 | ) 123 | 124 | def params_close(self,d1,d2): 125 | return np.linalg.norm(d1.mu - d2.mu) < 0.25*np.sqrt(d1.mu.shape[0]) \ 126 | and np.linalg.norm(d1.sigmas-d2.sigmas) < 0.5*d1.sigmas.shape[0] 127 | 128 | def geweke_statistics(self,d,data): 129 | return np.concatenate((d.mu,d.sigmas)) 130 | 131 | @property 132 | def geweke_nsamples(self): 133 | return 5000 134 | 135 | @property 136 | def geweke_data_size(self): 137 | return 2 138 | 139 | @property 140 | def geweke_pval(self): 141 | return 0.05 142 | 143 | def geweke_numerical_slice(self,d,setting_idx): 144 | return slice(0,d.mu.shape[0]) 145 | 146 | ### class-specific 147 | 148 | def test_log_likelihood(self): 149 | data = np.random.randn(1000,100) 150 | 151 | mu = np.random.randn(100) 152 | sigmas = np.random.uniform(1,2,size=100) 153 | 154 | d = distributions.DiagonalGaussian(mu=mu,sigmas=sigmas) 155 | pdf1 = d.log_likelihood(data) 156 | 157 | import scipy.stats as stats 158 | pdf2 = stats.norm.logpdf(data,loc=mu,scale=np.sqrt(sigmas)).sum(1) 159 | 160 | assert np.allclose(pdf1,pdf2) 161 | 162 | def test_log_likelihood2(self): 163 | data = np.random.randn(1000,600) 164 | 165 | mu = np.random.randn(600) 166 | sigmas = np.random.uniform(1,2,size=600) 167 | 168 | d = distributions.DiagonalGaussian(mu=mu,sigmas=sigmas) 169 | pdf1 = d.log_likelihood(data) 170 | 171 | import scipy.stats as stats 172 | pdf2 = stats.norm.logpdf(data,loc=mu,scale=np.sqrt(sigmas)).sum(1) 173 | 174 | assert np.allclose(pdf1,pdf2) 175 | 176 | 177 | @attr('gaussianfixedmean') 178 | class TestGaussianFixedMean(BigDataGibbsTester,GewekeGibbsTester): 179 | @property 180 | def distribution_class(self): 181 | return distributions.GaussianFixedMean 182 | 183 | @property 184 | def hyperparameter_settings(self): 185 | return (dict(mu=np.array([1.,2.,3.]),nu_0=5,lmbda_0=np.diag([3.,2.,1.])),) 186 | 187 | def params_close(self,d1,d2): 188 | return np.linalg.norm(d1.sigma - d2.sigma) < 0.25 189 | 190 | def geweke_statistics(self,d,data): 191 | return np.diag(d.sigma) 192 | 193 | @property 194 | def geweke_nsamples(self): 195 | return 25000 196 | 197 | @property 198 | def geweke_data_size(self): 199 | return 5 200 | 201 | @property 202 | def geweke_pval(self): 203 | return 0.05 204 | 205 | 206 | @attr('gaussianfixedcov') 207 | class TestGaussianFixedCov(BigDataGibbsTester,GewekeGibbsTester): 208 | @property 209 | def distribution_class(self): 210 | return distributions.GaussianFixedCov 211 | 212 | @property 213 | def hyperparameter_settings(self): 214 | return (dict(sigma=np.diag([3.,2.,1.]),mu_0=np.array([1.,2.,3.]),sigma_0=np.eye(3)),) 215 | 216 | def params_close(self,d1,d2): 217 | return np.linalg.norm(d1.mu-d2.mu) < 0.1 218 | 219 | def geweke_statistics(self,d,data): 220 | return d.mu 221 | 222 | @property 223 | def geweke_nsamples(self): 224 | return 25000 225 | 226 | @property 227 | def geweke_data_size(self): 228 | return 5 229 | 230 | @property 231 | def geweke_pval(self): 232 | return 0.05 233 | 234 | 235 | @attr('gaussiannonconj') 236 | class TestGaussianNonConj(BigDataGibbsTester,GewekeGibbsTester): 237 | @property 238 | def distribution_class(self): 239 | return distributions.GaussianNonConj 240 | 241 | @property 242 | def hyperparameter_settings(self): 243 | return (dict(mu_0=np.zeros(2),mu_lmbda_0=2*np.eye(2),nu_0=5,sigma_lmbda_0=np.eye(2)),) 244 | 245 | def params_close(self,d1,d2): 246 | return np.linalg.norm(d1.mu-d2.mu) < 0.1 and np.linalg.norm(d1.sigma-d2.sigma) < 0.25 247 | 248 | def geweke_statistics(self,d,data): 249 | return np.concatenate((d.mu,np.diag(d.sigma))) 250 | 251 | @property 252 | def geweke_nsamples(self): 253 | return 30000 254 | 255 | @property 256 | def geweke_data_size(self): 257 | return 1 258 | 259 | @property 260 | def geweke_pval(self): 261 | return 0.05 262 | 263 | def geweke_numerical_slice(self,d,setting_idx): 264 | return slice(0,d.mu.shape[0]) 265 | 266 | @property 267 | def resample_kwargs(self): 268 | return dict(niter=10) 269 | 270 | 271 | @attr('scalargaussiannix') 272 | class TestScalarGaussianNIX(BigDataGibbsTester,GewekeGibbsTester): 273 | @property 274 | def distribution_class(self): 275 | return distributions.ScalarGaussianNIX 276 | 277 | @property 278 | def hyperparameter_settings(self): 279 | return (dict(mu_0=2.7,kappa_0=2.,sigmasq_0=4.,nu_0=2),) 280 | 281 | def params_close(self,d1,d2): 282 | return np.abs(d1.mu-d2.mu) < 0.5 and np.abs(d2.sigmasq - d2.sigmasq) < 0.5 283 | 284 | def geweke_statistics(self,d,data): 285 | return np.array((d.mu,d.sigmasq)) 286 | 287 | @property 288 | def geweke_nsamples(self): 289 | return 30000 290 | 291 | @property 292 | def geweke_data_size(self): 293 | return 2 294 | 295 | @property 296 | def geweke_pval(self): 297 | return 0.05 298 | 299 | def geweke_numerical_slice(self,d,setting_idx): 300 | return slice(0,1) 301 | 302 | 303 | @attr('scalargaussiannonconjnix') 304 | class TestScalarGaussianNonconjNIX(BigDataGibbsTester,GewekeGibbsTester): 305 | @property 306 | def distribution_class(self): 307 | return distributions.ScalarGaussianNonconjNIX 308 | 309 | @property 310 | def hyperparameter_settings(self): 311 | return (dict(mu_0=2.7,tausq_0=4.,sigmasq_0=2.,nu_0=2),) 312 | 313 | def params_close(self,d1,d2): 314 | return np.abs(d1.mu-d2.mu) < 0.1 and np.abs(d2.sigmasq - d2.sigmasq) < 0.25 315 | 316 | def geweke_statistics(self,d,data): 317 | return np.array((d.mu,d.sigmasq)) 318 | 319 | @property 320 | def geweke_nsamples(self): 321 | return 30000 322 | 323 | @property 324 | def geweke_data_size(self): 325 | return 2 326 | 327 | @property 328 | def geweke_pval(self): 329 | return 0.05 330 | 331 | def geweke_numerical_slice(self,d,setting_idx): 332 | return slice(0,1) 333 | -------------------------------------------------------------------------------- /tests/test_geometric.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | 4 | from nose.plugins.attrib import attr 5 | 6 | import pybasicbayes.distributions as distributions 7 | from pybasicbayes.testing.mixins import BigDataGibbsTester, \ 8 | GewekeGibbsTester 9 | 10 | 11 | @attr('geometric') 12 | class TestGeometric(BigDataGibbsTester,GewekeGibbsTester): 13 | @property 14 | def distribution_class(self): 15 | return distributions.Geometric 16 | 17 | @property 18 | def hyperparameter_settings(self): 19 | return (dict(alpha_0=2,beta_0=20),dict(alpha_0=5,beta_0=5)) 20 | 21 | def params_close(self,d1,d2): 22 | return np.allclose(d1.p,d2.p,rtol=0.05) 23 | 24 | def geweke_statistics(self,d,data): 25 | return d.p 26 | 27 | @property 28 | def geweke_pval(self): 29 | return 0.5 30 | -------------------------------------------------------------------------------- /tests/test_negbin.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | 4 | from nose.plugins.attrib import attr 5 | 6 | import pybasicbayes.distributions as distributions 7 | from pybasicbayes.testing.mixins import BigDataGibbsTester, \ 8 | GewekeGibbsTester 9 | 10 | 11 | @attr('negbinfixedr') 12 | class TestNegativeBinomialFixedR(BigDataGibbsTester,GewekeGibbsTester): 13 | @property 14 | def distribution_class(self): 15 | return distributions.NegativeBinomialFixedR 16 | 17 | @property 18 | def hyperparameter_settings(self): 19 | return (dict(r=5,alpha_0=1,beta_0=9),) 20 | 21 | def params_close(self,d1,d2): 22 | return np.allclose(d1.p,d2.p,rtol=0.1) 23 | 24 | def geweke_statistics(self,d,data): 25 | return d.p 26 | 27 | 28 | @attr('negbinintr') 29 | class TestNegativeBinomialIntegerR(BigDataGibbsTester,GewekeGibbsTester): 30 | @property 31 | def distribution_class(self): 32 | return distributions.NegativeBinomialIntegerR 33 | 34 | @property 35 | def hyperparameter_settings(self): 36 | return (dict(r_discrete_distn=np.r_[0.,0,0,1,1,1],alpha_0=5,beta_0=5),) 37 | 38 | def params_close(self,d1,d2): 39 | # since it's easy to be off by 1 in r and still look like the same 40 | # distribution, best just to check moment parameters 41 | def mean(d): 42 | return d.r*d.p/(1.-d.p) 43 | def var(d): 44 | return mean(d)/(1.-d.p) 45 | return np.allclose(mean(d1),mean(d2),rtol=0.1) and np.allclose(var(d1),var(d2),rtol=0.1) 46 | 47 | def geweke_statistics(self,d,data): 48 | return d.p 49 | 50 | @property 51 | def geweke_pval(self): 52 | return 0.005 # since the statistic is on (0,1), it's really sensitive? 53 | 54 | 55 | @attr('negbinintr2') 56 | class TestNegativeBinomialIntegerR2(BigDataGibbsTester,GewekeGibbsTester): 57 | @property 58 | def distribution_class(self): 59 | return distributions.NegativeBinomialIntegerR2 60 | 61 | @property 62 | def hyperparameter_settings(self): 63 | return (dict(r_discrete_distn=np.r_[0.,0,0,1,1,1],alpha_0=5,beta_0=5),) 64 | 65 | def params_close(self,d1,d2): 66 | # since it's easy to be off by 1 in r and still look like the same 67 | # distribution, best just to check moment parameters 68 | def mean(d): 69 | return d.r*d.p/(1.-d.p) 70 | def var(d): 71 | return mean(d)/(1.-d.p) 72 | return np.allclose(mean(d1),mean(d2),rtol=0.1) and np.allclose(var(d1),var(d2),rtol=0.1) 73 | 74 | def geweke_statistics(self,d,data): 75 | return d.p 76 | 77 | @property 78 | def geweke_pval(self): 79 | return 0.005 # since the statistic is on (0,1), it's really sensitive? 80 | 81 | 82 | @attr('negbinintrvariant') 83 | class TestNegativeBinomialIntegerRVariant(TestNegativeBinomialIntegerR): 84 | @property 85 | def distribution_class(self): 86 | return distributions.NegativeBinomialIntegerRVariant 87 | -------------------------------------------------------------------------------- /tests/test_poisson.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | 4 | from nose.plugins.attrib import attr 5 | 6 | import pybasicbayes.distributions as distributions 7 | from pybasicbayes.testing.mixins import BigDataGibbsTester, \ 8 | GewekeGibbsTester 9 | 10 | 11 | @attr('poisson') 12 | class TestPoisson(BigDataGibbsTester,GewekeGibbsTester): 13 | @property 14 | def distribution_class(self): 15 | return distributions.Poisson 16 | 17 | @property 18 | def hyperparameter_settings(self): 19 | return (dict(alpha_0=30,beta_0=3),) 20 | 21 | def params_close(self,d1,d2): 22 | return np.allclose(d1.lmbda,d2.lmbda,rtol=0.05) 23 | 24 | def geweke_statistics(self,d,data): 25 | return d.lmbda 26 | -------------------------------------------------------------------------------- /tests/test_regression.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | 4 | from nose.plugins.attrib import attr 5 | 6 | import pybasicbayes.distributions as distributions 7 | from pybasicbayes.testing.mixins import BigDataGibbsTester, MaxLikelihoodTester, \ 8 | GewekeGibbsTester, BasicTester 9 | 10 | 11 | @attr('regression') 12 | class TestRegression( 13 | BasicTester,BigDataGibbsTester, 14 | MaxLikelihoodTester,GewekeGibbsTester): 15 | @property 16 | def distribution_class(self): 17 | return distributions.Regression 18 | 19 | @property 20 | def hyperparameter_settings(self): 21 | return (dict(nu_0=3,S_0=np.eye(1),M_0=np.zeros((1,2)),K_0=np.eye(2)), 22 | dict(nu_0=5,S_0=np.eye(2),M_0=np.zeros((2,4)),K_0=2*np.eye(4)), 23 | dict(nu_0=5,S_0=np.eye(2),M_0=np.zeros((2,5)),K_0=2*np.eye(5),affine=True),) 24 | 25 | def params_close(self,d1,d2): 26 | return np.linalg.norm(d1.A-d2.A) < 0.1 and np.linalg.norm(d1.sigma-d2.sigma) < 0.1 27 | 28 | @property 29 | def big_data_size(self): 30 | return 80000 31 | 32 | def geweke_statistics(self,d,data): 33 | return np.concatenate((d.A.flatten(),np.diag(d.sigma))) 34 | 35 | def geweke_numerical_slice(self,d,setting_idx): 36 | return slice(0,d.A.flatten().shape[0]) 37 | 38 | @property 39 | def geweke_ntrials(self): 40 | return 1 # because it's slow 41 | 42 | @property 43 | def geweke_num_statistic_fails_to_tolerate(self): 44 | return 0 45 | 46 | ### class-specific 47 | 48 | def test_affine_loglike(self): 49 | A = np.random.randn(2,3) 50 | b = np.random.randn(2) 51 | sigma = np.random.randn(2,2); sigma = sigma.dot(sigma.T) 52 | data = np.random.randn(25,5) 53 | 54 | d1 = self.distribution_class(A=np.hstack((A,b[:,None])),sigma=sigma,affine=True) 55 | d2 = self.distribution_class(A=A,sigma=sigma) 56 | 57 | likes1 = d1.log_likelihood(data) 58 | data[:,-2:] -= b 59 | likes2 = d2.log_likelihood(data) 60 | 61 | assert np.allclose(likes1,likes2) 62 | 63 | def test_loglike_against_gaussian(self): 64 | mu = np.random.randn(3) 65 | A = mu[:,None] 66 | sigma = np.random.randn(3,3); sigma = sigma.dot(sigma.T) 67 | 68 | data = np.random.randn(25,mu.shape[0]) 69 | 70 | d1 = distributions.Gaussian(mu=mu,sigma=sigma) 71 | likes1 = d1.log_likelihood(data) 72 | 73 | d2 = self.distribution_class(A=A,sigma=sigma) 74 | likes2 = d2.log_likelihood(np.hstack((np.ones((data.shape[0],1)),data))) 75 | 76 | assert np.allclose(likes1,likes2) 77 | 78 | @attr('regressionnonconj') 79 | class TestRegressionNonconj(BasicTester,BigDataGibbsTester,GewekeGibbsTester): 80 | @property 81 | def distribution_class(self): 82 | return distributions.RegressionNonconj 83 | 84 | @property 85 | def hyperparameter_settings(self): 86 | def make_hyps(m,n): 87 | return dict(nu_0=m+1, S_0=m*np.eye(m), 88 | M_0=np.zeros((m,n)), Sigma_0=np.eye(m*n)) 89 | return [make_hyps(m,n) for m, n in [(2,3), (3,2)]] 90 | 91 | def params_close(self,d1,d2): 92 | return np.linalg.norm(d1.A-d2.A) < 0.5 and np.linalg.norm(d1.sigma-d2.sigma) < 0.5 93 | 94 | def geweke_statistics(self,d,data): 95 | return np.concatenate((d.A.flatten(),np.diag(d.sigma))) 96 | 97 | def geweke_numerical_slices(self,d,setting_idx): 98 | return slice(0,d.A.flatten().shape[0]) 99 | 100 | @property 101 | def geweke_ntrials(self): 102 | return 1 # because it's slow 103 | 104 | @property 105 | def geweke_num_statistic_fails_to_tolerate(self): 106 | return 0 107 | 108 | @property 109 | def geweke_resample_kwargs(self): 110 | return dict(niter=2) 111 | --------------------------------------------------------------------------------