├── LICENSE
├── README.md
├── data
    ├── organ.wav
    └── timeseries.jld
├── docker
    ├── Dockerfile
    ├── README.md
    └── add_packages.jl
├── pdf
    ├── seigo.pdf
    └── seigo_v4.pdf
└── src
    ├── BayesNeuralNet.jl
    ├── DimensionalityReduction.jl
    ├── GaussianMixtureModel.jl
    ├── LogisticRegression.jl
    ├── NMF.jl
    ├── PoissonHMM.jl
    ├── PoissonMixtureModel.jl
    ├── demo_BayesNeuralNet.jl
    ├── demo_DimensionalityReduction.jl
    ├── demo_GaussianMixtureModel.jl
    ├── demo_LogisticRegression.jl
    ├── demo_NMF.jl
    ├── demo_PoissonHMM.jl
    ├── demo_PoissonMixtureModel.jl
    ├── demo_PolynomialRegression.jl
    ├── demo_Simple2DGauss.jl
    ├── demo_SimpleFitting.jl
    └── demo_nonconjugate.jl


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Sammy
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # BayesBook
 2 | 
 3 | 「機械学習スタートアップシリーズ ベイズ推論による機械学習入門」のソースコードをアップしています。
 4 | * http://www.kspub.co.jp/book/detail/1538320.html
 5 | * 正誤表（第１～３刷まで） https://github.com/sammy-suyama/BayesBook/blob/master/pdf/seigo.pdf
 6 | * 正誤表（第４刷まで） https://github.com/sammy-suyama/BayesBook/blob/master/pdf/seigo_v4.pdf
 7 | 
 8 | ソースコードはJuliaで書かれています。（推奨Vesion:0.6.0）
 9 | * The Julia Language: http://julialang.org/
10 | * Julia Documentation: http://docs.julialang.org/
11 | 
12 | グラフの描画やテストデータのダウンロードに一部Pythonライブラリを利用しています。
13 | * Python: https://www.python.org/
14 | * Matplotlib: https://matplotlib.org/
15 | * scikit-learn: http://scikit-learn.org/
16 | 
17 | 上記の環境構築が煩わしい場合にはDockerfileも用意しています．
18 | * Docker: https://docs.docker.com/ 
19 | 


--------------------------------------------------------------------------------
/data/organ.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sammy-suyama/BayesBook/61cb7ee0f1df348ef502aa183f23c7bc9753d02a/data/organ.wav


--------------------------------------------------------------------------------
/data/timeseries.jld:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sammy-suyama/BayesBook/61cb7ee0f1df348ef502aa183f23c7bc9753d02a/data/timeseries.jld


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:latest
 2 | 
 3 | # Update
 4 | RUN apt-get update
 5 | 
 6 | # Install matplotlib
 7 | RUN pip3 install matplotlib scipy scikit-learn notebook
 8 | 
 9 | # Install libraries
10 | RUN apt-get install -y sudo hdf5-tools libzmq3
11 | 
12 | # Install julia 0.6.0
13 | RUN wget https://julialang-s3.julialang.org/bin/linux/x64/0.6/julia-0.6.0-linux-x86_64.tar.gz && \
14 |     tar -xzf julia-0.6.0-linux-x86_64.tar.gz && \
15 |     ln -s /julia-903644385b/bin/julia /usr/local/bin/julia
16 | 
17 | # Set the working directory to /work
18 | WORKDIR /work
19 | 
20 | # Add julia packages
21 | ADD add_packages.jl /work
22 | RUN julia add_packages.jl
23 | 
24 | # Download source codes
25 | RUN git clone https://github.com/sammy-suyama/BayesBook.git
26 | 
27 | # Make port 8888 available to the world outside this container
28 | EXPOSE 8888
29 | 
30 | # Start jupyter notebook
31 | CMD jupyter notebook --allow-root --port=8888 --ip=0.0.0.0
32 | 


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
 1 | # DockerからJupyter notebookを実行する
 2 | 
 3 | JuliaやPythonの実行環境構築が煩わしい場合は、Dockerを使ってデモスクリプトをJupyter notebook上で動作させることができます。
 4 | Dockerのインストールに関しては公式サイトを参考ください。
 5 | * https://docs.docker.com/engine/installation/
 6 | 
 7 | `Dockerfile`の置いてあるディレクトリで、イメージを作成・実行します。
 8 | 
 9 |     $ docker build -t bayesbook .
10 |     $ docker run -p 8888:8888 bayesbook
11 | 
12 | 


--------------------------------------------------------------------------------
/docker/add_packages.jl:
--------------------------------------------------------------------------------
 1 | 
 2 | Pkg.update()
 3 | Pkg.add("PyPlot")
 4 | Pkg.add("StatsFuns")
 5 | Pkg.add("SpecialFunctions")
 6 | Pkg.add("Distributions")
 7 | Pkg.add("PDMats")
 8 | Pkg.add("ProgressMeter")
 9 | Pkg.add("DataFrames")
10 | Pkg.add("HDF5")
11 | Pkg.add("JLD")
12 | Pkg.add("IJulia")
13 | 


--------------------------------------------------------------------------------
/pdf/seigo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sammy-suyama/BayesBook/61cb7ee0f1df348ef502aa183f23c7bc9753d02a/pdf/seigo.pdf


--------------------------------------------------------------------------------
/pdf/seigo_v4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sammy-suyama/BayesBook/61cb7ee0f1df348ef502aa183f23c7bc9753d02a/pdf/seigo_v4.pdf


--------------------------------------------------------------------------------
/src/BayesNeuralNet.jl:
--------------------------------------------------------------------------------
  1 | """
  2 | Variational inference for Bayesian neural network
  3 | """
  4 | module BayesNeuralNet
  5 | using Distributions
  6 | 
  7 | export sample_data_from_prior, sample_data_from_posterior
  8 | export VI
  9 | 
 10 | function sigmoid(x)
 11 |     return 1.0 / (1.0 + exp.(-x[1]))
 12 | end
 13 | 
 14 | function rho2sig(rho)
 15 |     return log.(1 + exp.(rho))
 16 | end
 17 | 
 18 | function compute_df_dmu(mu, rho, W)
 19 |     return (W - mu) ./ rho2sig(rho).^2
 20 | end
 21 | 
 22 | function compute_df_drho(Y, X, mu, rho, W)
 23 |     return -0.5*((W - mu).^2 - rho2sig(rho).^2) .* compute_dprec_drho(rho)
 24 | end
 25 | 
 26 | function compute_dprec_drho(rho)
 27 |     return 2 * rho2sig(rho) .^ (-3) .* (1 ./ (1+exp.(rho))).^2 .* (1 ./ (1+exp.(-rho)))
 28 | end
 29 | 
 30 | function compute_df_dw(Y, X, sigma2_y, sigma2_w, mu1, rho1, W1, mu2, rho2, W2)
 31 |     M, N = size(X)
 32 |     Y_err1 = zeros(size(W1)) # MxK
 33 |     Y_err2 = zeros(size(W2)) # KxD
 34 | 
 35 |     for n in 1 : N
 36 |         Z = tanh.(W1'*X[:,n]) # Kx1
 37 |         Y_est = W2'*Z
 38 |         # 2nd unit, Dx1
 39 |         delta2 = Y_est - Y[n]
 40 |         
 41 |         # 1st unit, KxD
 42 |         delta1 = diagm(1 - Z.^2) * W2 * delta2
 43 |         
 44 |         Y_err1 += X[:,n] * delta1'
 45 |         Y_err2 += Z * delta2'
 46 |     end
 47 |     df_dw1 = W1/sigma2_w + (mu1 - W1) ./ rho2sig(rho1).^2 + Y_err1 / sigma2_y
 48 |     df_dw2 = W2/sigma2_w + (mu2 - W2) ./ rho2sig(rho2).^2 + Y_err2 / sigma2_y
 49 |     return df_dw1, df_dw2
 50 | end
 51 | 
 52 | """
 53 | Sample data given prior and inputs.
 54 | """
 55 | function sample_data_from_prior(X, sigma2_w, sigma2_y, D, K)
 56 |     M, N = size(X)
 57 | 
 58 |     W1 = sqrt(sigma2_w) * randn(M, K)
 59 |     W2 = sqrt(sigma2_w) * randn(K, D)
 60 |     
 61 |     # sample function
 62 |     Y = [W2'* tanh.(W1'X[:,n]) for n in 1 : N]
 63 | 
 64 |     # sample data
 65 |     Y_obs = [W2'* tanh.(W1'X[:,n]) + sqrt(sigma2_y)*randn(D) for n in 1 : N]
 66 | 
 67 |     return Y_obs, Y, W1, W2
 68 | end
 69 | 
 70 | """
 71 | Sample data given posterior and inputs.
 72 | """
 73 | function sample_data_from_posterior(X, mu1, rho1, mu2, rho2, sigma2_y, D)
 74 |     N = size(X, 2)
 75 |     ep1 = randn(size(mu1))
 76 |     W1_tmp = mu1 + log.(1 + exp.(rho1)) .* ep1
 77 |     ep2 = randn(size(mu2))
 78 |     W2_tmp = mu2 + log.(1 + exp.(rho2)) .* ep2    
 79 |     Y_est = [W2_tmp'* tanh.(W1_tmp'X[:,n]) for n in 1 : N]
 80 |     Y_obs = [W2_tmp'* tanh.(W1_tmp'X[:,n]) + sqrt(sigma2_y)*randn(D)  for n in 1 : N]
 81 |     return Y_est, Y_obs
 82 | end
 83 | 
 84 | """
 85 | Compute variational parameters.
 86 | """
 87 | function VI(Y, X, sigma2_w, sigma2_y, K, alpha, max_iter)
 88 |     M, N = size(X)
 89 |     D = length(Y[1])
 90 | 
 91 |     # initialize
 92 |     mu1 = randn(M, K)
 93 |     rho1 = randn(M, K)
 94 |     mu2 = randn(K, D)
 95 |     rho2 = randn(K, D)
 96 | 
 97 |     for i in 1 : max_iter
 98 |         # sample
 99 |         ep1 = randn(size(mu1))
100 |         W1_tmp = mu1 + log.(1 + exp.(rho1)) .* ep1
101 |         ep2 = randn(size(mu2))
102 |         W2_tmp = mu2 + log.(1 + exp.(rho2)) .* ep2
103 |         
104 |         # calc error
105 |         df_dw1, df_dw2 = compute_df_dw(Y, X, sigma2_y, sigma2_w, mu1, rho1, W1_tmp, mu2, rho2, W2_tmp)
106 |         
107 |         # 1st unit
108 |         df_dmu1 = compute_df_dmu(mu1, rho1, W1_tmp)
109 |         df_drho1 = compute_df_drho(Y, X, mu1, rho1, W1_tmp)
110 |         d_mu1 = df_dw1 + df_dmu1
111 |         d_rho1 = df_dw1 .* (ep1 ./ (1+exp.(-rho1))) + df_drho1
112 |         mu1 = mu1 - alpha * d_mu1
113 |         rho1 = rho1 - alpha * d_rho1 
114 |         
115 |         # 2nd unit
116 |         df_dmu2 = compute_df_dmu(mu2, rho2, W2_tmp)
117 |             df_drho2 = compute_df_drho(Y, X, mu2, rho2, W2_tmp)
118 |         d_mu2 = df_dw2 + df_dmu2
119 |         d_rho2 = df_dw2 .* (ep2 ./ (1+exp.(-rho2))) + df_drho2
120 |         mu2 = mu2 - alpha * d_mu2
121 |         rho2 = rho2 - alpha * d_rho2
122 |     end
123 |     return mu1, rho1, mu2, rho2
124 | end
125 | 
126 | end
127 | 
128 | 


--------------------------------------------------------------------------------
/src/DimensionalityReduction.jl:
--------------------------------------------------------------------------------
  1 | """
  2 | Variational inference for Bayesian DimensionalityReduction
  3 | """
  4 | module DimensionalityReduction
  5 | 
  6 | using Distributions
  7 | #using ProgressMeter
  8 | 
  9 | export DRModel
 10 | export sample_data, VI
 11 | 
 12 | ####################
 13 | ## Types
 14 | struct DRModel
 15 |     D::Int
 16 |     M::Int
 17 |     sigma2_y::Float64
 18 |     m_W::Array{Float64, 2} # MxD
 19 |     Sigma_W::Array{Float64, 3} # MxMxD
 20 |     m_mu::Array{Float64, 1} # D
 21 |     Sigma_mu::Array{Float64, 2} # DxD
 22 | end
 23 | 
 24 | ####################
 25 | ## functions
 26 | function sqsum(mat::Array{Float64}, idx::Int)
 27 |     return squeeze(sum(mat, idx), idx)
 28 | end
 29 | 
 30 | """
 31 | Sample data given hyperparameters.
 32 | """
 33 | function sample_data(N::Int, model::DRModel)
 34 |     D = model.D
 35 |     M = model.M
 36 |     W = zeros(M, D)
 37 |     mu = zeros(D)
 38 |     for d in 1 : D
 39 |         W[:,d] = rand(MvNormal(model.m_W[:,d], model.Sigma_W[:,:,d]))
 40 |     end
 41 |     mu = rand(MvNormal(model.m_mu, model.Sigma_mu))
 42 |     
 43 |     Y = zeros(D, N)
 44 |     X = randn(M, N)
 45 |     for n in 1 : N
 46 |         Y[:,n] = rand(MvNormal(W'*X[:,n] + mu, model.sigma2_y*eye(D)))
 47 |     end
 48 |     return Y, X, W, mu
 49 | end
 50 | 
 51 | function init(Y::Array{Float64, 2}, prior::DRModel)
 52 |     M = prior.M
 53 |     D, N = size(Y)
 54 |     X = randn(M, N)
 55 |     XX = zeros(M, M, N)
 56 |     for n in 1 : N
 57 |         XX[:,:,n] = X[:,n]*X[:,n]' + eye(M)
 58 |     end
 59 |     return X, XX
 60 | end
 61 | 
 62 | function update_W(Y::Array{Float64, 2}, prior::DRModel, posterior::DRModel,
 63 |                   X::Array{Float64, 2}, XX::Array{Float64, 3})
 64 |     D = prior.D
 65 |     M = prior.M
 66 |     N = size(Y, 2)
 67 |     m_W = zeros(M, D)
 68 |     Sigma_W = zeros(M, M, D)
 69 |     mu = posterior.m_mu
 70 |     for d in 1 : D
 71 |         Sigma_W[:,:,d] = inv(inv(prior.sigma2_y)*sqsum(XX, 3) + inv(prior.Sigma_W[:,:,d]))
 72 |         m_W[:,d] = Sigma_W[:,:,d]*(inv(prior.sigma2_y)*X*(Y[[d],:] - mu[d]*ones(1, N))'
 73 |                                    + inv(prior.Sigma_W[:,:,d])*prior.m_W[:,d])
 74 |     end
 75 |     return DRModel(D, M, prior.sigma2_y, m_W, Sigma_W, posterior.m_mu, posterior.Sigma_mu)
 76 | end
 77 | 
 78 | function update_mu(Y::Array{Float64, 2}, prior::DRModel, posterior::DRModel,
 79 |                    X::Array{Float64, 2}, XX::Array{Float64, 3})
 80 |     N = size(Y, 2)
 81 |     D = prior.D
 82 |     M = prior.M
 83 |     W = posterior.m_W
 84 |     Sigma_mu = inv(N*inv(prior.sigma2_y)*eye(D) + inv(prior.Sigma_mu))
 85 |     m_mu = Sigma_mu*(inv(prior.sigma2_y)*sqsum(Y - W'*X, 2) + inv(prior.Sigma_mu)*prior.m_mu)
 86 |     return DRModel(D, M, prior.sigma2_y, posterior.m_W, posterior.Sigma_W, m_mu, Sigma_mu)
 87 | end
 88 | 
 89 | function update_X(Y::Array{Float64, 2}, posterior::DRModel)
 90 |     D, N = size(Y)
 91 |     M = posterior.M
 92 |     
 93 |     W = posterior.m_W
 94 |     WW = zeros(M, M, D)
 95 |     for d in 1 : D
 96 |         WW[:,:,d] = W[:,d]*W[:,d]' + posterior.Sigma_W[:,:,d]
 97 |     end
 98 |     mu = posterior.m_mu
 99 |     X = zeros(M, N)
100 |     XX = zeros(M, M, N)
101 |     for n in 1 : N
102 |         Sigma = inv(inv(posterior.sigma2_y)*sqsum(WW, 3) + eye(M))
103 |         X[:,n] = inv(posterior.sigma2_y)*Sigma*W*(Y[:,n] - mu)
104 |         XX[:,:,n] = X[:,n] * X[:,n]' + Sigma
105 |     end
106 |     return X, XX
107 | end
108 | 
109 | function interpolate(mask::BitArray{2}, X::Array{Float64, 2}, posterior::DRModel)
110 |     Y_est = posterior.m_W'*X + repmat(posterior.m_mu, 1, size(X, 2))
111 |     return return Y_est[mask]
112 | end
113 | 
114 | """
115 | Compute variational posterior distributions.
116 | """
117 | function VI(Y::Array{Float64, 2}, prior::DRModel, max_iter::Int)
118 |     X, XX = init(Y, prior)
119 |     mask = isnan.(Y)
120 |     sum_nan = sum(mask)
121 |     posterior = deepcopy(prior)
122 | 
123 |     #progress = Progress(max_iter)
124 |     for iter in 1 : max_iter
125 |         # progress
126 |         #next!(progress)
127 |         
128 |         # Interpolate
129 |         if sum_nan > 0
130 |             Y[mask] = interpolate(mask, X, posterior)
131 |         end
132 |         
133 |         # M-step
134 |         posterior = update_W(Y, prior, posterior, X, XX)
135 |         posterior = update_mu(Y, prior, posterior, X, XX)
136 |         
137 |         # E-step
138 |         X, XX = update_X(Y, posterior)
139 |     end
140 |     
141 |     return posterior, X
142 | end
143 | 
144 | end
145 | 


--------------------------------------------------------------------------------
/src/GaussianMixtureModel.jl:
--------------------------------------------------------------------------------
  1 | """
  2 | Bayesian Gaussian Mixture Model
  3 | """
  4 | module GaussianMixtureModel
  5 | using StatsFuns.logsumexp, SpecialFunctions.digamma
  6 | using Distributions
  7 | using PDMats
  8 | 
  9 | export GW, BGMM, Gauss, GMM
 10 | export sample_GMM, sample_data, winner_takes_all
 11 | export learn_GS, learn_CGS, learn_VI
 12 | 
 13 | ####################
 14 | ## Types
 15 | struct GW
 16 |     # Parameters of Gauss Wisahrt distribution
 17 |     beta::Float64
 18 |     m::Vector{Float64}
 19 |     nu::Float64
 20 |     W::Matrix{Float64}
 21 | end
 22 | 
 23 | struct BGMM
 24 |     # Parameters of Bayesian Gaussian Mixture Model 
 25 |     D::Int
 26 |     K::Int
 27 |     alpha::Vector{Float64}
 28 |     cmp::Vector{GW}
 29 | end
 30 | 
 31 | struct Gauss
 32 |     # Parameters of Gauss Distribution
 33 |     mu::Vector{Float64}
 34 |     Lambda::Matrix{Float64}
 35 | end
 36 | 
 37 | struct GMM
 38 |     # Parameters of Gauss Mixture Model
 39 |     D::Int
 40 |     K::Int
 41 |     phi::Vector{Float64}
 42 |     cmp::Vector{Gauss}
 43 | end
 44 | 
 45 | ####################
 46 | ## Common functions
 47 | """
 48 | Sample a GMM given hyperparameters.
 49 | """
 50 | function sample_GMM(bgmm::BGMM)
 51 |     cmp = Vector{Gauss}()
 52 |     for c in bgmm.cmp
 53 |         Lambda = rand(Wishart(c.nu, PDMats.PDMat(Symmetric(c.W))))
 54 |         mu = rand(MvNormal(c.m, PDMats.PDMat(Symmetric(inv(c.beta*Lambda)))))
 55 |         push!(cmp, Gauss(mu, Lambda))
 56 |     end
 57 |     phi = rand(Dirichlet(bgmm.alpha))
 58 |     return GMM(bgmm.D, bgmm.K, phi, cmp)
 59 | end
 60 | 
 61 | """
 62 | Sample data from a specific GMM model.
 63 | """
 64 | function sample_data(gmm::GMM, N::Int)
 65 |     X = zeros(gmm.D, N)
 66 |     S = categorical_sample(gmm.phi, N)
 67 |     for n in 1 : N
 68 |         k = indmax(S[:, n])
 69 |         X[:,n] = rand(MvNormal(gmm.cmp[k].mu, PDMats.PDMat(Symmetric(inv(gmm.cmp[k].Lambda)))))
 70 |     end
 71 |     return X, S
 72 | end
 73 | 
 74 | categorical_sample(p::Vector{Float64}) = categorical_sample(p, 1)[:,1]
 75 | function categorical_sample(p::Vector{Float64}, N::Int)
 76 |     K = length(p)
 77 |     S = zeros(K, N)
 78 |     S_tmp = rand(Categorical(p), N)
 79 |     for k in 1 : K
 80 |         S[k,find(S_tmp.==k)] = 1
 81 |     end
 82 |     return S
 83 | end
 84 | 
 85 | function sumdigamma(nu, D)
 86 |     ret = 0.0
 87 |     for d in 1 : D
 88 |         ret += digamma.(0.5*(nu + 1 - d))
 89 |     end
 90 |     return ret
 91 | end
 92 | 
 93 | function init_S(X::Matrix{Float64}, bgmm::BGMM)
 94 |     N = size(X, 2)
 95 |     K = bgmm.K
 96 |     S = categorical_sample(ones(K)/K, N)    
 97 |     return S
 98 | end
 99 | 
100 | function calc_ELBO(X::Array{Float64, 2}, pri::BGMM, pos::BGMM)
101 |     function logCw(nu, W)
102 |         D = size(W, 1)
103 |         return -0.5*nu*logdet(W) - 0.5*nu*D*log.(2) - 0.25*D*(D-1)*log.(pi) - sum([lgamma.(0.5*(nu+1-d)) for d in 1 : D])
104 |     end
105 |     
106 |     ln_expt_S = update_S(pos, X)
107 |     expt_S = exp.(ln_expt_S)
108 |     K, N = size(expt_S)
109 |     D = size(X, 1)
110 | 
111 |     expt_ln_lkh = 0
112 |     for k in 1 : K
113 |         expt_Lambda = pos.cmp[k].nu * pos.cmp[k].W
114 |         expt_Lambda_mu = pos.cmp[k].nu * pos.cmp[k].W * pos.cmp[k].m
115 |         expt_mu_Lambda_mu = (pos.cmp[k].nu * pos.cmp[k].m' * pos.cmp[k].W * pos.cmp[k].m)[1] + D/pos.cmp[k].beta
116 |         expt_ln_Lambda = sumdigamma(pos.cmp[k].nu, D) + D*log.(2) + logdet(pos.cmp[k].W)
117 |         expt_ln_pi = digamma.(pos.alpha) - digamma.(sum(pos.alpha))
118 |         for n in 1 : N
119 |             # <ln p(X|S, mu, Lambda)>
120 |             expt_ln_lkh += -0.5 * expt_S[k,n]*(trace(X[:,n]*X[:,n]'*expt_Lambda)
121 |                                                - 2*(X[:,n]'*expt_Lambda_mu)[1]
122 |                                                + expt_mu_Lambda_mu
123 |                                                - expt_ln_Lambda
124 |                                                + D * log.(2*pi)
125 |                                                )
126 |             # <ln p(S|pi)>
127 |             expt_ln_lkh += expt_S[k,n]*expt_ln_pi[k]
128 |         end
129 |     end
130 |     # -<ln q(S)>
131 |     expt_ln_lkh -= sum(expt_S.*ln_expt_S)
132 |     
133 |     KL_mu_Lambda = [(0.5*D*(log.(pos.cmp[k].beta) - log.(pri.cmp[k].beta) + pri.cmp[k].beta/pos.cmp[k].beta - pos.cmp[k].nu - 1)
134 |                      + 0.5*(pos.cmp[k].nu-pri.cmp[k].nu)*(sumdigamma(pos.cmp[k].nu, D) + D*log.(2) + logdet(pos.cmp[k].W))
135 |                      + logCw(pos.cmp[k].nu, pos.cmp[k].W) - logCw(pri.cmp[k].nu, pri.cmp[k].W)
136 |                      + 0.5*pos.cmp[k].nu*trace((pri.cmp[k].beta*(pos.cmp[k].m-pri.cmp[k].m)*(pos.cmp[k].m-pri.cmp[k].m)'
137 |                                                 +inv(pri.cmp[k].W))*pos.cmp[k].W)) for k in 1 : K]
138 |     
139 |     KL_pi = (lgamma.(sum(pos.alpha)) - lgamma.(sum(pri.alpha))
140 |              - sum(lgamma.(pos.alpha)) + sum(lgamma.(pri.alpha))
141 |              + (pos.alpha - pri.alpha)' * (digamma.(pos.alpha) - digamma.(sum(pos.alpha)))
142 |              )[1]
143 | 
144 |     VB = expt_ln_lkh - (sum(KL_mu_Lambda) + KL_pi)
145 |     return VB
146 | end
147 | 
148 | function add_stats(bgmm::BGMM, X::Matrix{Float64}, S::Matrix{Float64})
149 |     D = bgmm.D
150 |     K = bgmm.K
151 |     sum_S = sum(S, 2)
152 |     alpha = [bgmm.alpha[k] + sum_S[k] for k in 1 : K]
153 |     cmp = Vector{GW}()
154 | 
155 |     XS = X*S';
156 |     for k in 1 : K
157 |         beta = bgmm.cmp[k].beta + sum_S[k]
158 |         m = (1.0/beta)*(vec(X*S[[k],:]') + bgmm.cmp[k].beta*bgmm.cmp[k].m)
159 |         nu = bgmm.cmp[k].nu + sum_S[k]
160 |         W = inv(X*diagm(S[k,:])*X'
161 |                        - beta*m*m'
162 |                        + bgmm.cmp[k].beta*bgmm.cmp[k].m*bgmm.cmp[k].m'
163 |                        + inv(bgmm.cmp[k].W))
164 |         
165 |         push!(cmp, GW(beta, m, nu, W))
166 |     end
167 |     return BGMM(D, K, alpha, cmp)
168 | end
169 | 
170 | remove_stats(bgmm::BGMM, X::Matrix{Float64}, S::Matrix{Float64}) = add_stats(bgmm, X, -S)
171 | 
172 | ####################
173 | ## used for Variational Inference
174 | function update_S(bgmm::BGMM, X::Matrix{Float64})
175 |     D, N = size(X)
176 |     K = bgmm.K
177 |     ln_S = zeros(K, N)
178 |     tmp = zeros(K)
179 | 
180 |     tmp = NaN * zeros(K)
181 |     sum_digamma_tmp = digamma.(sum(bgmm.alpha))
182 |     for k in 1 : K
183 |         tmp[k] = -0.5*(bgmm.cmp[k].nu*trace(bgmm.cmp[k].m*bgmm.cmp[k].m'*bgmm.cmp[k].W)
184 |                        + D*(1.0/bgmm.cmp[k].beta)
185 |                        - (sumdigamma(bgmm.cmp[k].nu, D) + logdet(bgmm.cmp[k].W)))
186 |         tmp[k] += digamma.(bgmm.alpha[k]) - sum_digamma_tmp
187 |     end
188 |     for n in 1 : N
189 |         tmp_ln_pi = NaN * zeros(K)
190 |         for k in 1 : K
191 |             tmp_ln_pi[k] = tmp[k] -0.5*bgmm.cmp[k].nu*trace((X[:,n]*X[:,n]' - 2*bgmm.cmp[k].m*X[:,n]')*bgmm.cmp[k].W)
192 |         end
193 |         ln_S[:,n] = tmp_ln_pi - logsumexp(tmp_ln_pi)
194 |     end
195 |     return ln_S    
196 | end
197 | 
198 | """
199 | Pick single states having a max probability.
200 | """
201 | function winner_takes_all(S::Matrix{Float64})
202 |     S_ret = zeros(size(S))
203 |     for n in 1 : size(S_ret, 2)
204 |         idx = indmax(S[:,n])
205 |         S_ret[idx,n] = 1
206 |     end
207 |     return S_ret
208 | end
209 | 
210 | ####################
211 | ## used for Gibbs Sampling
212 | function sample_S_GS(gmm::GMM, X::Matrix{Float64})
213 |     D, N = size(X)
214 |     K = gmm.K
215 |     S = zeros(K, N)
216 | 
217 |     tmp = [0.5*logdet(gmm.cmp[k].Lambda) + log.(gmm.phi[k]) for k in 1 : K]
218 | 
219 |     for n in 1 : N
220 |         tmp_ln_phi = [-0.5*trace(gmm.cmp[k].Lambda*(X[:,n] - gmm.cmp[k].mu)*(X[:,n] - gmm.cmp[k].mu)') + tmp[k] for k in 1 : K]
221 |         tmp_ln_phi = tmp_ln_phi - logsumexp(tmp_ln_phi)
222 |         S[:,n] = categorical_sample(exp.(tmp_ln_phi))
223 |     end
224 |     
225 |     return S
226 | end
227 | 
228 | ####################
229 | ## used for Collapsed Gibbs Sampling
230 | function calc_ln_ST(Xn::Vector{Float64}, gw::GW)
231 |     # TODO; need to check value?
232 |     D = size(Xn, 1)
233 |     W = ((1 - D + gw.nu)*gw.beta / (1 + gw.beta)) * gw.W
234 |     #ln_lkh = logpdf(MvTDist(1 - D + gw.nu, gw.m, (gw.nu/(gw.nu - 2))*inv(W)), Xn)
235 |     ln_lkh = logpdf(MvTDist(1 - D + gw.nu, gw.m, PDMats.PDMat(Symmetric(inv(W)))), Xn)
236 |     return sum(ln_lkh)
237 | end
238 | 
239 | function sample_Sn(Xn::Vector{Float64}, bgmm::BGMM)
240 |     ln_tmp = [(calc_ln_ST(Xn, bgmm.cmp[k]) + log.(bgmm.alpha[k])) for k in 1 : bgmm.K]
241 |     ln_tmp = ln_tmp -  logsumexp(ln_tmp)
242 |     Sn = categorical_sample(exp.(ln_tmp))
243 |     return Sn
244 | end
245 | 
246 | function sample_S_CGS(S::Matrix{Float64}, X::Matrix{Float64}, bgmm::BGMM)
247 |     D, N = size(X)
248 |     K = size(S, 1)
249 |     for n in randperm(N)
250 |         # remove
251 |         bgmm = remove_stats(bgmm, X[:,[n]], S[:,[n]])
252 |         # sample
253 |         S[:,n] = sample_Sn(X[:,n], bgmm)
254 |         # insert
255 |         bgmm = add_stats(bgmm, X[:,[n]], S[:,[n]])
256 |     end
257 |     return S, bgmm
258 | end
259 | 
260 | ####################
261 | ## Algorithm main
262 | """
263 | Compute posterior distributions via variational inference.
264 | """
265 | function learn_VI(X::Matrix{Float64}, prior_bgmm::BGMM, max_iter::Int)
266 |     # initialisation
267 |     expt_S = init_S(X, prior_bgmm)
268 |     bgmm = add_stats(prior_bgmm, X, expt_S)
269 |     VB = NaN * zeros(max_iter)
270 | 
271 |     # inference
272 |     for i in 1 : max_iter
273 |         # E-step
274 |         expt_S = exp.(update_S(bgmm, X))
275 |         # M-step
276 |         bgmm = add_stats(prior_bgmm, X, expt_S)
277 |         # calc VB
278 |         VB[i] = calc_ELBO(X, prior_bgmm, bgmm)
279 |     end
280 | 
281 |     # assign binary values
282 |     S = winner_takes_all(expt_S)
283 |     return S, bgmm, VB
284 | end
285 | 
286 | """
287 | Compute posterior distributions via Gibbs sampling.
288 | """
289 | function learn_GS(X::Matrix{Float64}, prior_bgmm::BGMM, max_iter::Int)
290 |     # initialisation
291 |     S = init_S(X, prior_bgmm)
292 |     bgmm = add_stats(prior_bgmm, X, S)
293 |     VB = NaN * zeros(max_iter)
294 |     
295 |     # inference
296 |     for i in 1 : max_iter            
297 |         # sample parameters
298 |         gmm = sample_GMM(bgmm)
299 |         # sample latent variables
300 |         S = sample_S_GS(gmm, X)
301 |         # update current model
302 |         bgmm = add_stats(prior_bgmm, X, S)
303 |         # calc VB
304 |         VB[i] = calc_ELBO(X, prior_bgmm, bgmm)
305 |     end
306 | 
307 |     return S, bgmm, VB
308 | end
309 | 
310 | """
311 | Compute posterior distributions via collapsed Gibbs sampling.
312 | """
313 | function learn_CGS(X::Matrix{Float64}, prior_bgmm::BGMM, max_iter::Int)
314 |     # initialisation
315 |     S = init_S(X, prior_bgmm)
316 |     bgmm = add_stats(prior_bgmm, X, S)
317 |     VB = NaN * zeros(max_iter)
318 | 
319 |     # inference
320 |     for i in 1 : max_iter
321 |         # directly sample S
322 |         S, bgmm = sample_S_CGS(S, X, bgmm)
323 |         # calc VB
324 |         VB[i] = calc_ELBO(X, prior_bgmm, bgmm)
325 |     end
326 | 
327 |     return S, bgmm, VB
328 | end
329 | 
330 | end
331 | 


--------------------------------------------------------------------------------
/src/LogisticRegression.jl:
--------------------------------------------------------------------------------
 1 | """
 2 | Variational inference for Bayesian logistic regression.
 3 | """
 4 | module LogisticRegression
 5 | using Distributions
 6 | 
 7 | export sigmoid, sample_data, VI
 8 | 
 9 | function sigmoid(x)
10 |     return 1.0 / (1.0 + exp.(-x[1]))
11 | end    
12 | 
13 | function bern_sample(mu)
14 |     i = rand(Bernoulli(mu))
15 |     val = zeros(2)
16 |     val[i+1] = 1
17 |     return val
18 | end
19 | 
20 | """
21 | Sample data & parameter given covariance Sigma_w and inputs X.
22 | """
23 | function sample_data(X, Sigma_w)
24 |     N = size(X, 2)
25 |     M = size(Sigma_w, 1)
26 | 
27 |     # sample parameters
28 |     W = rand(MvNormal(zeros(M), Sigma_w))
29 |     
30 |     # sample data
31 |     Y = [rand(Bernoulli(sigmoid(W'*X[:, n]))) for n in 1 : N]
32 |     return Y, W
33 | end
34 | 
35 | """
36 | Compute variational parameters.
37 | """
38 | function VI(Y, X, M, Sigma_w, alpha, max_iter)
39 |     function rho2sig(rho)
40 |         return log.(1 + exp.(rho))
41 |     end
42 |     
43 |     function compute_df_dw(Y, X, Sigma_w, mu, rho, W)
44 |         M, N = size(X)
45 |         term1 = (mu - W) ./ rho2sig(rho).^2
46 |         term2 = inv(Sigma_w)*W
47 |         term3 = 0
48 |         for n in 1 : N
49 |             term3 += -(Y[n] - sigmoid(W'*X[:,n])) * X[:,n]
50 |         end
51 |         return term1 + term2 + term3
52 |     end
53 |     
54 |     function compute_df_dmu(mu, rho, W)
55 |         return (W - mu) ./ rho2sig(rho).^2
56 |     end
57 |     
58 |     function compute_df_drho(Y, X, Sigma_w, mu, rho, W)
59 |         return -0.5*((W - mu).^2 - rho2sig(rho).^2) .* compute_dprec_drho(rho)
60 |     end
61 |     
62 |     function compute_dprec_drho(rho)
63 |         return 2 * rho2sig(rho) .^ (-3) .* (1 ./ (1+exp.(rho))).^2 .* (1 ./ (1+exp.(-rho)))
64 |     end
65 | 
66 |     # diag gaussian for approximate posterior
67 |     mu = randn(M)
68 |     rho = randn(M) # sigma = log.(1 + exp.(rho))
69 |     
70 |     for i in 1 : max_iter
71 |         # sample epsilon
72 |         ep = rand(M)
73 |         W_tmp = mu + log.(1 + exp.(rho)) .* ep
74 | 
75 |         # calculate gradient
76 |         df_dw = compute_df_dw(Y, X, Sigma_w, mu, rho, W_tmp)
77 |         df_dmu = compute_df_dmu(mu, rho, W_tmp)
78 |         df_drho = compute_df_drho(Y, X, Sigma_w, mu, rho, W_tmp)
79 |         d_mu = df_dw + df_dmu
80 |         d_rho = df_dw .* (ep ./ (1+exp.(-rho))) + df_drho
81 | 
82 |         # update variational parameters
83 |         mu = mu - alpha * d_mu
84 |         rho = rho - alpha * d_rho 
85 |     end
86 |     return mu, rho
87 | end
88 | 
89 | end
90 | 


--------------------------------------------------------------------------------
/src/NMF.jl:
--------------------------------------------------------------------------------
  1 | """
  2 | Variational inference for Bayesian NMF
  3 | """
  4 | module NMF
  5 | using Distributions
  6 | using StatsFuns.logsumexp, SpecialFunctions.digamma
  7 | 
  8 | export NMFModel
  9 | export sample_data, VI
 10 | 
 11 | ####################
 12 | ## Types
 13 | struct NMFModel
 14 |     a_t::Array{Float64, 2} # D x K
 15 |     b_t::Array{Float64, 2} # D x L
 16 |     a_v::Float64 # 1 dim
 17 |     b_v::Float64 # 1 dim
 18 | end
 19 | 
 20 | function sqsum(mat::Array, idx)
 21 |     return squeeze(sum(mat, idx), idx)
 22 | end
 23 | 
 24 | ####################
 25 | ## functions
 26 | function init(X::Array{Int64, 2}, model::NMFModel)
 27 |     D, N = size(X)
 28 |     K = size(model.a_t, 2)
 29 |     S = zeros(D, K, N)
 30 |     A_t = rand(D, K)
 31 |     B_t = rand(D, K)
 32 |     A_v = rand(K, N)
 33 |     B_v = rand(K, N)
 34 |     for d in 1 : D
 35 |         for k in 1 : K
 36 |             for n in 1 : N
 37 |                 S[d,k,n] = X[d,n] * A_t[d,k] * B_t[d,k] * A_v[k,n] * B_v[k,n]
 38 |             end
 39 |         end
 40 |     end   
 41 |     return S, A_t, B_t, A_v, B_v
 42 | end
 43 | 
 44 | function update_S(X::Array{Int64, 2}, A_t::Array{Float64, 2}, B_t::Array{Float64, 2}, A_v::Array{Float64, 2}, B_v::Array{Float64, 2})
 45 |     D, K = size(A_t)
 46 |     N = size(A_v, 2)
 47 |     S = zeros(D, K, N)
 48 |     for d in 1 : D
 49 |         for n in 1 : N
 50 |             # K dim
 51 |             ln_P = (digamma.(A_t[d,:]) + log.(B_t[d,:])
 52 |                     + digamma.(A_v[:,n]) + log.(B_v[:,n])
 53 |                     )
 54 |             ln_P = ln_P - logsumexp(ln_P)
 55 |             S[d,:,n] = X[d,n] * exp.(ln_P)
 56 |         end
 57 |     end
 58 |     return S
 59 | end
 60 | 
 61 | function update_T(S::Array{Float64, 3}, A_v::Array{Float64, 2}, B_v::Array{Float64, 2}, model::NMFModel)
 62 |     D, K, N = size(S)
 63 |     a_t = model.a_t # DxK
 64 |     b_t = model.b_t # DxK
 65 |     A_t = a_t + sqsum(S, 3)
 66 |     B_t = (a_t ./ b_t + repmat(sqsum(A_v.*B_v, 2)', D, 1)).^(-1)
 67 |     return A_t, B_t
 68 | end
 69 | 
 70 | function update_V(S::Array{Float64, 3}, A_t::Array{Float64, 2}, B_t::Array{Float64, 2}, model::NMFModel)
 71 |     a_v = model.a_v
 72 |     b_v = model.b_v
 73 |     D, K, N = size(S)
 74 |     A_v = a_v + sqsum(S, 1)
 75 |     B_v = (a_v / b_v + repmat(sqsum(A_t.*B_t, 1), 1, N)).^(-1)
 76 |     return A_v, B_v
 77 | end
 78 | 
 79 | """
 80 | Sample data given hyperparameters.
 81 | """
 82 | function sample_data(N::Int, model::NMFModel)
 83 |     # TODO; check b or 1/b ?
 84 |     D, K = size(model.a_t)
 85 | 
 86 |     T = zeros(D, K)
 87 |     for d in 1 : D
 88 |         for k in 1 : K
 89 |             T[d,k] = rand(Gamma(model.a_t[d,k], 1.0/model.b_t[d,k])) # TODO: check
 90 |         end
 91 |     end
 92 | 
 93 |     V = reshape(rand(Gamma(model.a_v, 1.0/model.b_v), K*N) , K, N) # TODO: check
 94 |     
 95 |     S = zeros(D, K, N)
 96 |     for d in 1 : D
 97 |         for k in 1 : K
 98 |             for n in 1 : N
 99 |                 S[d,k,n] = T[d,k] * V[k,n]
100 |             end
101 |         end
102 |     end
103 |     #X = sqsum(S, 2) + 0.0 # zero noise
104 |     X = sqsum(S, 2)
105 |     return X, T, S, V
106 | end
107 | 
108 | function update_model(A_t::Array{Float64, 2}, B_t::Array{Float64, 2}, model::NMFModel)
109 |     return NMFModel(A_t, B_t, model.a_v, model.b_v)
110 | end
111 | 
112 | """
113 | Compute variational posterior distributions.
114 | """
115 | function VI(X::Array{Int64, 2}, model::NMFModel, max_iter::Int)
116 |     K = size(model.a_t, 2)
117 |     D, N = size(X)
118 |     S, A_t, B_t, A_v, B_v = init(X, model)
119 |     for iter in 1 : max_iter
120 |         # latent
121 |         S = update_S(X, A_t, B_t, A_v, B_v)
122 |         A_v, B_v = update_V(S, A_t, B_t, model)
123 | 
124 |         # param
125 |         A_t, B_t = update_T(S, A_v, B_v, model)
126 |     end
127 |     
128 |     return update_model(A_t, B_t, model), S, A_t.*B_t, A_v.*B_v
129 | end
130 | 
131 | end
132 | 


--------------------------------------------------------------------------------
/src/PoissonHMM.jl:
--------------------------------------------------------------------------------
  1 | """
  2 | Bayesian 1dim Poisson Hidden Markov Model
  3 | """
  4 | module PoissonHMM
  5 | using StatsFuns.logsumexp, SpecialFunctions.digamma
  6 | using Distributions
  7 | 
  8 | export Gam, GHMM, Poi, HMM
  9 | export sample_HMM, sample_data, winner_takes_all
 10 | export learn_VI
 11 | 
 12 | ####################
 13 | ## Types
 14 | struct Gam
 15 |     # Parameters of Gamma distribution
 16 |     # 1dim
 17 |     a::Float64
 18 |     b::Float64
 19 | end
 20 | 
 21 | struct BHMM
 22 |     # Parameters of Bayesian Bernoulli Mixture Model 
 23 |     K::Int
 24 |     alpha_phi::Vector{Float64}
 25 |     alpha_A::Matrix{Float64}    
 26 |     cmp::Vector{Gam}
 27 | end
 28 | 
 29 | struct Poi
 30 |     # Parameters of Poisson Distribution
 31 |     # 1 dim
 32 |     lambda::Float64
 33 | end
 34 | 
 35 | struct HMM
 36 |     # Parameters of Bernoulli Mixture Model
 37 |     K::Int
 38 |     phi::Vector{Float64}
 39 |     A::Matrix{Float64}
 40 |     cmp::Vector{Poi}
 41 | end
 42 | 
 43 | ####################
 44 | ## Common functions
 45 | """
 46 | Sample an HMM from prior
 47 | """
 48 | function sample_HMM(bhmm::BHMM)
 49 |     cmp = Vector{Poi}()
 50 |     for c in bhmm.cmp
 51 |         lambda = rand(Gamma(c.a, 1.0/c.b))
 52 |         push!(cmp, Poi(lambda))
 53 |     end
 54 |     phi = rand(Dirichlet(bhmm.alpha_phi))
 55 |     A = zeros(size(bhmm.alpha_A))
 56 |     for k in 1 : bhmm.K
 57 |         A[:,k] = rand(Dirichlet(bhmm.alpha_A[:,k]))
 58 |     end
 59 |     return HMM(bhmm.K, phi, A, cmp)
 60 | end
 61 | 
 62 | """
 63 | Sample data from a specific Poisson HMM
 64 | """
 65 | function sample_data(hmm::HMM, N::Int)
 66 |     X = zeros(N)
 67 |     Z = zeros(hmm.K, N)
 68 | 
 69 |     # sample (n=1)
 70 |     Z[:,1] = categorical_sample(hmm.phi)
 71 |     k = indmax(Z[:, 1])
 72 |     X[1] = rand(Poisson(hmm.cmp[k].lambda))
 73 | 
 74 |     # sample (n>1)
 75 |     for n in 2 : N
 76 |         Z[:,n] = categorical_sample(hmm.A[:,k])
 77 |         k = indmax(Z[:, n])
 78 |         X[n] = rand(Poisson(hmm.cmp[k].lambda))
 79 |     end
 80 |     return X, Z
 81 | end
 82 | 
 83 | categorical_sample(p::Vector{Float64}) = categorical_sample(p, 1)[:,1]
 84 | function categorical_sample(p::Vector{Float64}, N::Int)
 85 |     K = length(p)
 86 |     S = zeros(K, N)
 87 |     S_tmp = rand(Categorical(p), N)
 88 |     for k in 1 : K
 89 |         S[k,find(S_tmp.==k)] = 1
 90 |     end
 91 |     return S
 92 | end
 93 | 
 94 | function init_Z(X::Vector{Float64}, bhmm::BHMM)
 95 |     N = size(X, 1)
 96 |     K = bhmm.K
 97 |     
 98 |     Z = rand(Dirichlet(ones(K)/K), N)
 99 |     ZZ = [zeros(K,K) for _ in 1 : N - 1]
100 |     for n in 1 : N - 1
101 |         ZZ[n] = Z[:,n+1] * Z[:,n]'
102 |     end
103 |     
104 |     return Z, ZZ
105 | end
106 | 
107 | """
108 | Not implemented yet.
109 | """
110 | function calc_ELBO(X::Matrix{Float64}, pri::BHMM, pos::BHMM)
111 | end
112 | 
113 | function add_stats(bhmm::BHMM, X::Vector{Float64},
114 |                    Z::Matrix{Float64}, ZZ::Vector{Matrix{Float64}})
115 |     K = bhmm.K
116 |     sum_Z = sum(Z, 2)
117 |     alpha_phi = [bhmm.alpha_phi[k] + Z[k,1] for k in 1 : K]
118 |     alpha_A = bhmm.alpha_A + sum(ZZ)
119 |     cmp = Vector{Gam}()
120 |     
121 |     ZX = Z*X # (KxN) x (Nx1) = Kx1
122 |     for k in 1 : K
123 |         a = bhmm.cmp[k].a + ZX[k]
124 |         b = bhmm.cmp[k].b + sum_Z[k]
125 |         push!(cmp, Gam(a, b))
126 |     end
127 |     return BHMM(K, alpha_phi, alpha_A, cmp)
128 | end
129 | 
130 | remove_stats(bhmm::BHMM, X::Vector{Float64}, Z::Matrix{Float64}) = add_stats(bhmm, X, -Z)
131 | 
132 | ####################
133 | ## used for Variational Inference
134 | function update_Z(bhmm::BHMM, X::Vector{Float64}, Z::Matrix{Float64})
135 |     N = size(X, 1)
136 |     K = bhmm.K
137 |     ln_expt_Z = zeros(K, N)
138 | 
139 |     ln_lkh = zeros(K, N)
140 |     for k in 1 : K
141 |         ln_lambda = digamma.(bhmm.cmp[k].a) - log.(bhmm.cmp[k].b)
142 |         lambda = bhmm.cmp[k].a / bhmm.cmp[k].b
143 |         for n in 1 : N
144 |             ln_lkh[k,n] = X[n]'*(ln_lambda) - lambda
145 |         end
146 |     end
147 |     
148 |     expt_ln_A = zeros(size(bhmm.alpha_A))
149 |     for k in 1 : K
150 |         expt_ln_A[:,k] = digamma.(bhmm.alpha_A[:,k]) - digamma.(sum(bhmm.alpha_A[:,k]))
151 |     end
152 | 
153 |     # copy
154 |     ln_expt_Z = log.(Z)
155 |     
156 |     # n = 1
157 |     ln_expt_Z[:,1] = (digamma.(bhmm.alpha_phi) - digamma.(sum(bhmm.alpha_phi))
158 |                       + expt_ln_A' * exp.(ln_expt_Z[:,2])
159 |                       + ln_lkh[:,1]
160 |                       )
161 |     ln_expt_Z[:,1] = ln_expt_Z[:,1] - logsumexp(ln_expt_Z[:,1])
162 |     
163 |     # 2 <= n <= N - 1
164 |     for n in 2 : N - 1
165 |         ln_expt_Z[:,n] =( expt_ln_A * exp.(ln_expt_Z[:,n-1])
166 |                           + expt_ln_A' * exp.(ln_expt_Z[:,n+1])
167 |                           + ln_lkh[:,n]
168 |                           )
169 |         ln_expt_Z[:,n] = ln_expt_Z[:,n] - logsumexp(ln_expt_Z[:,n])
170 |     end
171 |     
172 |     # n = N
173 |     ln_expt_Z[:,N] =( expt_ln_A * exp.(ln_expt_Z[:,N-1])
174 |                       + ln_lkh[:,N]
175 |                       )
176 |     ln_expt_Z[:,N] = ln_expt_Z[:,N] - logsumexp(ln_expt_Z[:,N])
177 |     
178 |     # calc output
179 |     Z_ret = exp.(ln_expt_Z)
180 |     ZZ_ret = [zeros(K,K) for _ in 1 : N - 1]
181 |     for n in 1 : N - 1
182 |         ZZ_ret[n] = Z_ret[:,n+1] * Z_ret[:,n]'
183 |     end
184 |     return Z_ret, ZZ_ret
185 | end
186 | 
187 | """
188 | Pick single states having a max probability.
189 | """
190 | function winner_takes_all(Z::Matrix{Float64})
191 |     Z_ret = zeros(size(Z))
192 |     for n in 1 : size(Z_ret, 2)
193 |         idx = indmax(Z[:,n])
194 |         Z_ret[idx,n] = 1
195 |     end
196 |     return Z_ret
197 | end
198 | 
199 | function logmatprod(ln_A::Array{Float64}, ln_B::Array{Float64})
200 |     I = size(ln_A, 1)
201 |     J = size(ln_B, 2)
202 |     ln_C = zeros(I, J)
203 |     for i in 1 : I
204 |         for j in 1 : J
205 |             ln_C[i, j] = logsumexp(ln_A[i, :] + ln_B[:, j])
206 |         end
207 |     end
208 |     return ln_C
209 | end
210 | 
211 | function update_Z_fb(bhmm::BHMM, X::Vector{Float64})
212 |     K = bhmm.K
213 |     N = length(X)
214 | 
215 |     # calc likelihood
216 |     ln_lik = zeros(K, N)
217 |     for k in 1 : K
218 |         ln_lambda = digamma.(bhmm.cmp[k].a) - log.(bhmm.cmp[k].b)
219 |         lambda = bhmm.cmp[k].a / bhmm.cmp[k].b
220 |         for n in 1 : N
221 |             ln_lik[k,n] =X[n]'*(ln_lambda) - lambda                
222 |         end
223 |     end
224 |     expt_ln_phi = digamma.(bhmm.alpha_phi) - digamma.(sum(bhmm.alpha_phi))
225 |     expt_ln_A = zeros(K,K)
226 |     for k in 1 : K
227 |         expt_ln_A[:,k] = digamma.(bhmm.alpha_A[:,k]) - digamma.(sum(bhmm.alpha_A[:,k]))
228 |     end
229 | 
230 |     Z, ZZ = fb_alg(ln_lik, expt_ln_phi, expt_ln_A)
231 |     
232 |     # different notation
233 |     ZZ_ret = [ZZ[:,:,n] for n in 1:size(ZZ, 3)]
234 | 
235 |     return Z, ZZ_ret
236 | end
237 | 
238 | function fb_alg(ln_lik::Matrix{Float64}, ln_phi::Vector{Float64}, ln_A::Matrix{Float64})
239 |     K, T = size(ln_lik)
240 |     ln_Z = zeros(K, T)
241 |     ln_ZZ = zeros(K, K, T)
242 |     ln_alpha = zeros(K, T)
243 |     ln_beta = zeros(K, T)
244 |     ln_st = zeros(T)
245 |     
246 |     for t in 1 : T
247 |         if t == 1
248 |             ln_alpha[:, 1] = ln_phi + ln_lik[:, 1]
249 |         else
250 |             ln_alpha[:, t] = logmatprod(ln_A, ln_alpha[:, t-1]) + ln_lik[:, t]
251 |         end
252 |         ln_st[t] = logsumexp(ln_alpha[:, t])
253 |         ln_alpha[:,t] = ln_alpha[:,t] - ln_st[t]
254 |     end
255 |     
256 |     for t in T-1 : -1 : 1
257 |         ln_beta[:, t] = logmatprod(ln_A', ln_beta[:, t+1] + ln_lik[:,t+1])
258 |         ln_beta[:, t] = ln_beta[:, t] - ln_st[t+1]
259 |     end
260 |     
261 |     ln_Z = ln_alpha + ln_beta
262 |     for t in 1 : T
263 |         if t < T
264 |             ln_ZZ[:,:,t] = (repmat(ln_alpha[:, t]', K, 1) + ln_A
265 |             + repmat(ln_lik[:, t+1] + ln_beta[:,t+1], 1, K))
266 |             ln_ZZ[:,:,t] = ln_ZZ[:,:,t] - ln_st[t+1]
267 |         end
268 |     end
269 |     return exp.(ln_Z), exp.(ln_ZZ)
270 | end
271 | 
272 | """
273 | Compute approximate posterior distributions via variational inference.
274 | """
275 | function learn_VI(X::Vector{Float64}, prior_bhmm::BHMM, max_iter::Int)
276 |     # initialisation
277 |     expt_Z, expt_ZZ = init_Z(X, prior_bhmm)
278 |     bhmm = add_stats(prior_bhmm, X, expt_Z, expt_ZZ)
279 |     VB = NaN * zeros(max_iter)
280 | 
281 |     # inference
282 |     for i in 1 : max_iter
283 |         # E-step
284 |         #expt_Z, expt_ZZ = update_Z(bhmm, X, expt_Z)
285 |         expt_Z, expt_ZZ = update_Z_fb(bhmm, X)
286 |         # M-step
287 |         bhmm = add_stats(prior_bhmm, X, expt_Z, expt_ZZ)
288 |     end
289 | 
290 |     return expt_Z, bhmm
291 | end
292 | end
293 | 


--------------------------------------------------------------------------------
/src/PoissonMixtureModel.jl:
--------------------------------------------------------------------------------
  1 | """
  2 | Bayesian Poisson Mixture Model
  3 | """
  4 | module PoissonMixtureModel
  5 | using StatsFuns.logsumexp, SpecialFunctions.digamma
  6 | using Distributions
  7 | 
  8 | export Gam, BPMM, Poi, PMM
  9 | export sample_PMM, sample_data, winner_takes_all
 10 | export learn_GS, learn_CGS, learn_VI
 11 | 
 12 | ####################
 13 | ## Types
 14 | struct Gam
 15 |     # Parameters of Gamma distribution
 16 |     a::Vector{Float64}
 17 |     b::Float64
 18 | end
 19 | 
 20 | struct BPMM
 21 |     # Parameters of Bayesian Poisson Mixture Model 
 22 |     D::Int
 23 |     K::Int
 24 |     alpha::Vector{Float64}
 25 |     cmp::Vector{Gam}
 26 | end
 27 | 
 28 | struct Poi
 29 |     # Parameters of Poisson Distribution
 30 |     lambda::Vector{Float64}
 31 | end
 32 | 
 33 | struct PMM
 34 |     # Parameters of Poisson Mixture Model
 35 |     D::Int
 36 |     K::Int
 37 |     phi::Vector{Float64}
 38 |     cmp::Vector{Poi}
 39 | end
 40 | 
 41 | ####################
 42 | ## Common functions
 43 | """
 44 | Sample a PMM given hyperparameters.
 45 | """
 46 | function sample_PMM(bpmm::BPMM)
 47 |     cmp = Vector{Poi}()
 48 |     for c in bpmm.cmp
 49 |         lambda = Vector{Float64}()
 50 |         for d in 1 : bpmm.D
 51 |             push!(lambda, rand(Gamma(c.a[d], 1.0/c.b)))
 52 |         end
 53 |         push!(cmp, Poi(lambda))  
 54 |     end
 55 |     phi = rand(Dirichlet(bpmm.alpha))
 56 |     return PMM(bpmm.D, bpmm.K, phi, cmp)
 57 | end
 58 | 
 59 | """
 60 | Sample data from a specific PMM model.
 61 | """
 62 | function sample_data(pmm::PMM, N::Int)
 63 |     X = zeros(pmm.D, N)
 64 |     S = categorical_sample(pmm.phi, N)
 65 |     for n in 1 : N
 66 |         k = indmax(S[:, n])
 67 |         for d in 1 : pmm.D
 68 |             X[d,n] = rand(Poisson(pmm.cmp[k].lambda[d]))
 69 |         end
 70 |     end
 71 |     return X, S
 72 | end
 73 | 
 74 | categorical_sample(p::Vector{Float64}) = categorical_sample(p, 1)[:,1]
 75 | function categorical_sample(p::Vector{Float64}, N::Int)
 76 |     K = length(p)
 77 |     S = zeros(K, N)
 78 |     S_tmp = rand(Categorical(p), N)
 79 |     for k in 1 : K
 80 |         S[k,find(S_tmp.==k)] = 1
 81 |     end
 82 |     return S
 83 | end
 84 | 
 85 | function init_S(X::Matrix{Float64}, bpmm::BPMM)
 86 |     N = size(X, 2)
 87 |     K = bpmm.K
 88 |     S = categorical_sample(ones(K)/K, N)    
 89 |     return S
 90 | end
 91 | 
 92 | function calc_ELBO(X::Matrix{Float64}, pri::BPMM, pos::BPMM)
 93 |     ln_expt_S = update_S(pos, X)
 94 |     expt_S = exp.(ln_expt_S)
 95 |     K, N = size(expt_S)
 96 |     D = size(X, 1)
 97 | 
 98 |     expt_ln_lambda = zeros(D, K)
 99 |     expt_lambda = zeros(D, K)
100 |     expt_ln_lkh = 0
101 |     for k in 1 : K
102 |         expt_ln_lambda[:,k] = digamma.(pos.cmp[k].a) - log.(pos.cmp[k].b)
103 |         expt_lambda[:,k] = pos.cmp[k].a / pos.cmp[k].b
104 |         for n in 1 : N
105 |             expt_ln_lkh += expt_S[k,n] * (X[:, n]' * expt_ln_lambda[:,k]
106 |                                        - sum(expt_lambda[:,k]) - sum(lgamma.(X[:,n]+1)))[1]
107 |         end
108 |     end
109 |     
110 |     expt_ln_pS = sum(expt_S' * (digamma.(pos.alpha) - digamma.(sum(pos.alpha))))
111 |     expt_ln_qS = sum(expt_S .* ln_expt_S)
112 |     
113 |     KL_lambda = 0
114 |     for k in 1 : K
115 |         KL_lambda += (sum(pos.cmp[k].a)*log.(pos.cmp[k].b) - sum(pri.cmp[k].a)*log.(pri.cmp[k].b)
116 |                       - sum(lgamma.(pos.cmp[k].a)) + sum(lgamma.(pri.cmp[k].a))
117 |                       + (pos.cmp[k].a - pri.cmp[k].a)' * expt_ln_lambda[:,k]
118 |                       + (pri.cmp[k].b - pos.cmp[k].b) * sum(expt_lambda[:,k])
119 |                       )[1]
120 |     end
121 |     KL_pi = (lgamma.(sum(pos.alpha)) - lgamma.(sum(pri.alpha))
122 |              - sum(lgamma.(pos.alpha)) + sum(lgamma.(pri.alpha))
123 |              + (pos.alpha - pri.alpha)' * (digamma.(pos.alpha) - digamma.(sum(pos.alpha)))
124 |              )[1]
125 |     
126 |     VB = expt_ln_lkh + expt_ln_pS - expt_ln_qS - (KL_lambda + KL_pi)
127 |     return VB
128 | end
129 | 
130 | function add_stats(bpmm::BPMM, X::Matrix{Float64}, S::Matrix{Float64})
131 |     D = bpmm.D
132 |     K = bpmm.K
133 |     sum_S = sum(S, 2)
134 |     alpha = [bpmm.alpha[k] + sum_S[k] for k in 1 : K]
135 |     cmp = Vector{Gam}()
136 | 
137 |     XS = X*S';
138 |     for k in 1 : K
139 |         a = [(bpmm.cmp[k].a[d] + XS[d,k])::Float64 for d in 1 : D]
140 |         b = bpmm.cmp[k].b + sum_S[k]
141 |         push!(cmp, Gam(a, b))
142 |     end
143 |     return BPMM(D, K, alpha, cmp)
144 | end
145 | 
146 | remove_stats(bpmm::BPMM, X::Matrix{Float64}, S::Matrix{Float64}) = add_stats(bpmm, X, -S)
147 | 
148 | ####################
149 | ## used for Variational Inference
150 | function update_S(bpmm::BPMM, X::Matrix{Float64})
151 |     D, N = size(X)
152 |     K = bpmm.K
153 |     ln_expt_S = zeros(K, N)
154 |     tmp = zeros(K)
155 | 
156 |     sum_digamma_tmp = digamma.(sum(bpmm.alpha))
157 |     for k in 1 : K
158 |         tmp[k] = - sum(bpmm.cmp[k].a) / bpmm.cmp[k].b
159 |         tmp[k] += digamma.(bpmm.alpha[k]) - sum_digamma_tmp
160 |     end
161 |     ln_lambda_X = [X'*(digamma.(bpmm.cmp[k].a) - log.(bpmm.cmp[k].b)) for k in 1 : K]
162 |     for n in 1 : N
163 |         tmp_ln_pi =  [tmp[k] + ln_lambda_X[k][n] for k in 1 : K]
164 |         ln_expt_S[:,n] = tmp_ln_pi - logsumexp(tmp_ln_pi)
165 |     end
166 |     return ln_expt_S
167 | end
168 | 
169 | """
170 | Pick single states having a max probability.
171 | """
172 | function winner_takes_all(S::Matrix{Float64})
173 |     S_ret = zeros(size(S))
174 |     for n in 1 : size(S_ret, 2)
175 |         idx = indmax(S[:,n])
176 |         S_ret[idx,n] = 1
177 |     end
178 |     return S_ret
179 | end
180 | 
181 | ####################
182 | ## used for Gibbs Sampling
183 | function sample_S_GS(pmm::PMM, X::Matrix{Float64})
184 |     D, N = size(X)
185 |     K = pmm.K
186 |     S = zeros(K, N)
187 | 
188 |     tmp = [-sum(pmm.cmp[k].lambda) + log.(pmm.phi[k]) for k in 1 : K]
189 |     ln_lambda_X = [X'*log.(pmm.cmp[k].lambda) for k in 1 : K]
190 |     for n in 1 : N
191 |         tmp_ln_phi = [(tmp[k] + ln_lambda_X[k][n])::Float64 for k in 1 : K]
192 |         tmp_ln_phi = tmp_ln_phi - logsumexp(tmp_ln_phi)
193 |         S[:,n] = categorical_sample(exp.(tmp_ln_phi))
194 |     end
195 |     return S
196 | end
197 | 
198 | ####################
199 | ## used for Collapsed Gibbs Sampling
200 | function calc_ln_NB(Xn::Vector{Float64}, gam::Gam)
201 |     ln_lkh = [(gam.a[d]*log.(gam.b)
202 |                - lgamma.(gam.a[d])
203 |                + lgamma.(Xn[d] + gam.a[d])
204 |                - (Xn[d] + gam.a[d])*log.(gam.b + 1)
205 |                )::Float64 for d in 1 : size(Xn, 1)]
206 |     return sum(ln_lkh)
207 | end
208 | 
209 | function sample_Sn(Xn::Vector{Float64}, bpmm::BPMM)
210 |     ln_tmp = [(calc_ln_NB(Xn, bpmm.cmp[k]) + log.(bpmm.alpha[k])) for k in 1 : bpmm.K]
211 |     ln_tmp = ln_tmp -  logsumexp(ln_tmp)
212 |     Sn = categorical_sample(exp.(ln_tmp))
213 |     return Sn
214 | end
215 | 
216 | function sample_S_CGS(S::Matrix{Float64}, X::Matrix{Float64}, bpmm::BPMM)
217 |     D, N = size(X)
218 |     K = size(S, 1)
219 |     for n in randperm(N)
220 |         # remove
221 |         bpmm = remove_stats(bpmm, X[:,[n]], S[:,[n]])
222 |         # sample
223 |         S[:,n] = sample_Sn(X[:,n], bpmm)
224 |         # insert
225 |         bpmm = add_stats(bpmm, X[:,[n]], S[:,[n]])
226 |     end
227 |     return S, bpmm
228 | end
229 | 
230 | ####################
231 | ## Algorithm main
232 | """
233 | Compute posterior distribution via variational inference.
234 | """
235 | function learn_VI(X::Matrix{Float64}, prior_bpmm::BPMM, max_iter::Int)
236 |     # initialisation
237 |     expt_S = init_S(X, prior_bpmm)
238 |     bpmm = add_stats(prior_bpmm, X, expt_S)
239 |     VB = NaN * zeros(max_iter)
240 | 
241 |     # inference
242 |     for i in 1 : max_iter
243 |         # E-step
244 |         expt_S = exp.(update_S(bpmm, X))
245 |         # M-step
246 |         bpmm = add_stats(prior_bpmm, X, expt_S)
247 |         # calc VB
248 |         VB[i] = calc_ELBO(X, prior_bpmm, bpmm)
249 |     end
250 | 
251 |     return expt_S, bpmm, VB
252 | end
253 | 
254 | """
255 | Compute posterior distribution via Gibbs sampling.
256 | """
257 | function learn_GS(X::Matrix{Float64}, prior_bpmm::BPMM, max_iter::Int)
258 |     # initialisation
259 |     S = init_S(X, prior_bpmm)
260 |     bpmm = add_stats(prior_bpmm, X, S)
261 |     VB = NaN * zeros(max_iter)
262 |     
263 |     # inference
264 |     for i in 1 : max_iter            
265 |         # sample parameters
266 |         pmm = sample_PMM(bpmm)
267 |         # sample latent variables
268 |         S = sample_S_GS(pmm, X)
269 |         # update current model
270 |         bpmm = add_stats(prior_bpmm, X, S)
271 |         # calc VB
272 |         VB[i] = calc_ELBO(X, prior_bpmm, bpmm)
273 |     end
274 | 
275 |     return S, bpmm, VB
276 | end
277 | 
278 | """
279 | Compute posterior distribution via collapsed Gibbs sampling.
280 | """
281 | function learn_CGS(X::Matrix{Float64}, prior_bpmm::BPMM, max_iter::Int)
282 |     # initialisation
283 |     S = init_S(X, prior_bpmm)
284 |     bpmm = add_stats(prior_bpmm, X, S)
285 |     VB = NaN * zeros(max_iter)
286 | 
287 |     # inference
288 |     for i in 1 : max_iter
289 |         # directly sample S
290 |         S, bpmm = sample_S_CGS(S, X, bpmm)
291 |         # calc VB
292 |         VB[i] = calc_ELBO(X, prior_bpmm, bpmm)
293 |     end
294 | 
295 |     return S, bpmm, VB
296 | end
297 | 
298 | end
299 | 


--------------------------------------------------------------------------------
/src/demo_BayesNeuralNet.jl:
--------------------------------------------------------------------------------
  1 | ####################################
  2 | ## Demo script for Bayesian neural network.
  3 | 
  4 | using PyPlot, PyCall
  5 | 
  6 | push!(LOAD_PATH, ".")
  7 | import BayesNeuralNet
  8 | 
  9 | """
 10 | Sample neural nets from prior.
 11 | """
 12 | function sample_test()
 13 |     # model parameters
 14 |     D = 1 # output
 15 |     K = 3 # hidden
 16 |     M = 2 # input
 17 |     sigma2_w = 10.0
 18 |     sigma2_y = 0.1
 19 |     
 20 |     xmin = -5
 21 |     xmax = 5
 22 |     N_lin = 1000
 23 |     X_lin = ones(M, N_lin)
 24 |     X_lin[1,:] = linspace(xmin, xmax, N_lin)
 25 |     X_lin[2,:] = 1 # bias
 26 | 
 27 |     # visualize
 28 |     num_samples = 5
 29 |     figure("Function samples")
 30 |     clf()
 31 |     for i in 1 : num_samples
 32 |         _, Y_true, _, _ = BayesNeuralNet.sample_data_from_prior(X_lin, sigma2_w, sigma2_y, D, K)
 33 |         plot(X_lin[1,:], Y_true)
 34 |         xlim([xmin, xmax])
 35 |     end
 36 |     ratey = (ylim()[2] - ylim()[1]) * 0.1
 37 |     ratex = (xlim()[2] - xlim()[1]) * 0.1
 38 |     text(xlim()[1] + ratex, ylim()[2] - ratey, @sprintf("K=%d", K), fontsize=18)
 39 |     show()
 40 | end
 41 | 
 42 | """
 43 | Run a test script of variational inference for Bayesian neural net.
 44 | """
 45 | function test()
 46 |     #################
 47 |     # prepara data
 48 |     
 49 |     # data size
 50 |     D = 1 # output
 51 |     M = 2 # input
 52 |         
 53 |     # function setting
 54 |     xmin = -2
 55 |     xmax = 4
 56 |     N_lin = 1000
 57 |     X_lin = ones(M, N_lin)
 58 |     X_lin[1,:] = linspace(xmin, xmax, N_lin)
 59 |     X_lin[2,:] = 1 # bias
 60 | 
 61 |     # training data
 62 |     N = 50 # data size
 63 |     X = 2*rand(M, N) - 0.0 # input
 64 |     X[2,:] = 1.0 # bias
 65 |     Y = 0.5*sin.(2*pi * X[1,:]/3) + 0.05 * randn(N)    
 66 |     
 67 |     # model parameters
 68 |     K = 5
 69 |     sigma2_w = 10.0
 70 |     sigma2_y = 0.01
 71 |     
 72 |     ################
 73 |     # inference
 74 |     alpha = 1.0e-5
 75 |     max_iter = 100000
 76 |     mu1, rho1, mu2, rho2 = BayesNeuralNet.VI(Y, X, sigma2_w, sigma2_y, K, alpha, max_iter)
 77 |     Y_mean = [mu2'* tanh.(mu1'X_lin[:,n]) for n in 1 : N_lin]
 78 | 
 79 |     ################
 80 |     # visualize        
 81 |     figure("result")
 82 |     clf()
 83 |     Y_list = []
 84 |     num_samples = 100
 85 |     for i in 1 : num_samples
 86 |         Y_est, _ = BayesNeuralNet.sample_data_from_posterior(X_lin, mu1, rho1, mu2, rho2, sigma2_y, D)
 87 |         push!(Y_list, Y_est)
 88 |         plot(X_lin[1,:], Y_est, "-c", alpha=0.25)
 89 |     end
 90 |     plot(X[1,:], Y, "ok")
 91 |     plot(X_lin[1,:], Y_mean, "b-")
 92 |     xlim([xmin, xmax])
 93 |     xlabel("x")
 94 |     ylabel("y")
 95 |     show()
 96 | end
 97 | 
 98 | #sample_test()
 99 | test()
100 | 


--------------------------------------------------------------------------------
/src/demo_DimensionalityReduction.jl:
--------------------------------------------------------------------------------
  1 | ###################################
  2 | ## Demo script for Bayesian Dimensionality Reduction
  3 | 
  4 | using PyPlot, PyCall
  5 | @pyimport sklearn.datasets as datasets
  6 | 
  7 | push!(LOAD_PATH,".")
  8 | import DimensionalityReduction
  9 | 
 10 | function load_facedata(skip::Int)
 11 |     face = datasets.fetch_olivetti_faces()
 12 |     Y_raw = face["images"]
 13 |     N, S_raw, _ = size(Y_raw)
 14 | 
 15 |     L = round(Int, S_raw / skip)
 16 |     Y_tmp = Y_raw[:,1:skip:end, 1:skip:end]
 17 |     Y = convert(Array{Float64, 2}, reshape(Y_tmp, N, size(Y_tmp,2)*size(Y_tmp,3))')
 18 |     D = size(Y, 1)
 19 |     
 20 |     return Y, D, L
 21 | end
 22 | 
 23 | function visualize(Y::Array{Float64,2}, L::Int)
 24 |     D, N = size(Y)
 25 |     base = round(Int, sqrt(N))
 26 |     v = round(Int, (L*ceil(N / base)))
 27 |     h = L * base
 28 |     pic = zeros(v, h)
 29 | 
 30 |     for n in 1 : N
 31 |         i = round(Int, (L*ceil(n / base)))
 32 |         idx1 = i - L + 1 : i
 33 |         idx2 = L*mod(n-1, base)+1 : L*(mod(n-1, base) + 1)
 34 |         pic[idx1,idx2] = reshape(Y[:,n], L, L)
 35 |     end
 36 |     imshow(pic, cmap=ColorMap("gray"))
 37 | end
 38 | 
 39 | function visualize(Y::Array{Float64,2}, L::Int, mask::BitArray{2})
 40 |     # for missing
 41 |     D, N = size(Y)
 42 |     base = round(Int, sqrt(N))
 43 |     v = round(Int, (L*ceil(N / base)))
 44 |     h = L * base
 45 |     pic = zeros(v, h, 3)
 46 | 
 47 |     Y_3dim = zeros(D, N, 3)
 48 |     for i in 1 : 3
 49 |         if i == 2
 50 |             Y_tmp = deepcopy(Y)
 51 |             Y_tmp[mask] = 1
 52 |             Y_3dim[:,:,i] = Y_tmp
 53 |         else
 54 |             Y_tmp = deepcopy(Y)
 55 |             Y_tmp[mask] = 0
 56 |             Y_3dim[:,:,i] = Y_tmp
 57 |         end
 58 |     end
 59 |     
 60 |     for n in 1 : N
 61 |         i = round(Int, (L*ceil(n / base)))
 62 |         idx1 = i - L + 1 : i
 63 |         idx2 = L*mod(n-1, base)+1 : L*(mod(n-1, base) + 1)
 64 |         for i in 1 : 3
 65 |             pic[idx1,idx2,i] = reshape(Y_3dim[:,n,i], L, L)
 66 |         end
 67 |     end
 68 |     imshow(pic, cmap=ColorMap("gray"))
 69 | end
 70 | 
 71 | """
 72 | Run a demo script of missing data interpolation for face dataset.
 73 | """
 74 | function test_face_missing()
 75 |     # load data
 76 |     skip = 2
 77 |     Y, D, L = load_facedata(skip)
 78 | 
 79 |     # mask
 80 |     missing_rate = 0.50
 81 |     mask = rand(size(Y)) .< missing_rate
 82 |     Y_obs = deepcopy(Y)
 83 |     Y_obs[mask] = NaN
 84 |     
 85 |     # known parames
 86 |     M = 16
 87 |     sigma2_y = 0.001
 88 |     Sigma_W = zeros(M,M,D)
 89 |     Sigma_mu = 1.0 * eye(D)
 90 |     for d in 1 : D
 91 |         Sigma_W[:,:,d] = 0.1 * eye(M)
 92 |     end
 93 |     prior = DimensionalityReduction.DRModel(D, M, sigma2_y, zeros(M, D), Sigma_W, zeros(D), Sigma_mu)
 94 | 
 95 |     # learn & generate
 96 |     max_iter = 100
 97 |     posterior, X_est = DimensionalityReduction.VI(deepcopy(Y_obs), prior, max_iter)
 98 |     Y_est = posterior.m_W'*X_est + repmat(posterior.m_mu, 1, size(X_est, 2))
 99 |     Y_itp = deepcopy(Y_obs)
100 |     Y_itp[mask] = Y_est[mask]
101 |   
102 |     #visualize
103 |     N_show = 4^2
104 |     
105 |     figure("Observation")
106 |     clf()
107 |     visualize(Y_obs[:,1:N_show], L, mask[:,1:N_show])
108 |     title("Observation")
109 | 
110 |     #figure("Estimation")
111 |     #clf()
112 |     #visualize(Y_est[:,1:N_show], L)
113 |     #title("Estimation")
114 | 
115 |     figure("Interpolation")
116 |     clf()
117 |     visualize(Y_itp[:,1:N_show], L)
118 |     title("Interpolation")
119 | 
120 |     figure("Truth")
121 |     clf()
122 |     visualize(Y[:,1:N_show], L)
123 |     title("Truth")
124 |     show()
125 | end
126 | 
127 | """
128 | Run a dimensionality reduction demo using Iris dataset.
129 | """
130 | function test_iris()
131 |     ##################
132 |     # load data
133 |     iris = datasets.load_iris()
134 |     Y_obs = iris["data"]'
135 |     label_list = [iris["target_names"][elem+1] for elem in iris["target"]]
136 |     D, N = size(Y_obs)
137 | 
138 |     ##################
139 |     # 2D compression    
140 | 
141 |     # model
142 |     M = 2
143 |     sigma2_y = 0.001
144 |     Sigma_W = zeros(M,M,D)
145 |     Sigma_mu = 1.0 * eye(D)
146 |     for d in 1 : D
147 |         Sigma_W[:,:,d] = 0.1 * eye(M)
148 |     end
149 |     prior = DimensionalityReduction.DRModel(D, M, sigma2_y, zeros(M, D), Sigma_W, zeros(D), Sigma_mu)
150 |         
151 |     # learn & generate
152 |     max_iter = 100
153 |     posterior, X_est = DimensionalityReduction.VI(deepcopy(Y_obs), prior, max_iter)
154 | 
155 |     # visualize
156 |     figure("2D plot")
157 |     clf()
158 |     scatter(X_est[1,1:50], X_est[2,1:50], color="r")
159 |     scatter(X_est[1,51:100], X_est[2,51:100], color="g")
160 |     scatter(X_est[1,101:end], X_est[2,101:end], color="b")
161 |     xlabel("\$x_1\$", fontsize=20)
162 |     ylabel("\$x_2\$", fontsize=20)
163 |     legend([label_list[1], label_list[51], label_list[101]], fontsize=16)
164 | 
165 |     ##################
166 |     # 3D compression
167 | 
168 |     # model
169 |     M = 3
170 |     sigma2_y = 0.001
171 |     Sigma_W = zeros(M,M,D)
172 |     Sigma_mu = 1.0 * eye(D)
173 |     for d in 1 : D
174 |         Sigma_W[:,:,d] = 0.1 * eye(M)
175 |     end
176 |     prior = DimensionalityReduction.DRModel(D, M, sigma2_y, zeros(M, D), Sigma_W, zeros(D), Sigma_mu)
177 |         
178 |     # learn & generate
179 |     max_iter = 100
180 |     posterior, X_est = DimensionalityReduction.VI(deepcopy(Y_obs), prior, max_iter)
181 | 
182 |     # visualize
183 |     figure("3D plot")
184 |     clf()
185 |     scatter3D(X_est[1,1:50], X_est[2,1:50], X_est[3,1:50], c="r")
186 |     scatter3D(X_est[1,51:100], X_est[2,51:100], X_est[3,51:100], c="g")
187 |     scatter3D(X_est[1,101:end], X_est[2,101:end], X_est[3,101:end], c="b")
188 |     legend([label_list[1], label_list[51], label_list[101]], fontsize=16)
189 |     xlabel("\$x_1\$", fontsize=20)
190 |     ylabel("\$x_2\$", fontsize=20)
191 |     zlabel("\$x_3\$", fontsize=20)
192 |     show()
193 | end
194 | 
195 | #test_face_missing()
196 | test_iris()
197 | 


--------------------------------------------------------------------------------
/src/demo_GaussianMixtureModel.jl:
--------------------------------------------------------------------------------
 1 | ###################################
 2 | ## Example code
 3 | ## for Bayesian Gaussin Mixture Model
 4 | 
 5 | using PyPlot, PyCall
 6 | push!(LOAD_PATH,".")
 7 | import GaussianMixtureModel
 8 | 
 9 | """
10 | Visualize data & estimation in 2D space.
11 | """
12 | function visualize_2D(X::Matrix{Float64}, S::Matrix{Float64}, S_est::Matrix{Float64}, text)
13 |     cmp = get_cmap("jet")
14 | 
15 |     K1 = size(S, 1)
16 |     K2 = size(S_est, 1)
17 |     col1 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K1)]    
18 |     col2 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K2)]    
19 | 
20 |     f, (ax1, ax2) = subplots(1,2,num=text)
21 |     f[:clf]()
22 |     f, (ax1, ax2) = subplots(1,2,num=text)
23 | 
24 |     for k in 1 : K1
25 |         ax1[:scatter](X[1, S[k,:].==1], X[2, S[k,:].==1], color=col1[k])
26 |     end
27 |     ax1[:set_title]("truth")
28 |     
29 |     for k in 1 : K2
30 |         ax2[:scatter](X[1, S_est[k,:].==1], X[2, S_est[k,:].==1], color=col2[k])
31 |     end
32 | 
33 |     ax2[:set_title]("estimation")
34 | end
35 | 
36 | """
37 | Run a test script for 2D data clustering.
38 | """
39 | function test_2D()
40 |     ## set model
41 |     D = 2 # data dimension
42 |     K = 4 #  number of mixture components
43 |     alpha = 100.0 * ones(K)
44 |     beta = 0.1
45 |     m = zeros(D)
46 |     nu = D + 1.0
47 |     W = eye(D)
48 |     cmp = [GaussianMixtureModel.GW(beta, m, nu, W) for _ in 1 : K]
49 |     bgmm = GaussianMixtureModel.BGMM(D, K, alpha, cmp)
50 |     
51 |     ## generate data
52 |     N = 300
53 |     gmm = GaussianMixtureModel.sample_GMM(bgmm)
54 |     X, S = GaussianMixtureModel.sample_data(gmm, N)
55 |     
56 |     ## inference
57 |     max_iter = 100
58 |     tic()
59 |     S_est, post_bgmm, VB = GaussianMixtureModel.learn_VI(X, bgmm, max_iter)
60 |     #S_est, post_bgmm, VB = GaussianMixtureModel.learn_GS(X, bgmm, max_iter)
61 |     #S_est, post_bgmm, VB = GaussianMixtureModel.learn_CGS(X, bgmm, max_iter)
62 |     toc()
63 | 
64 |     ## plot
65 |     visualize_2D(X, S, GaussianMixtureModel.winner_takes_all(S_est), "2D plot")
66 | 
67 |     # VB check
68 |     figure("ELBO")
69 |     clf()
70 |     plot(VB)
71 |     ylabel("ELBO")
72 |     xlabel("iterations")
73 |     show()
74 | end
75 | 
76 | test_2D()
77 | 


--------------------------------------------------------------------------------
/src/demo_LogisticRegression.jl:
--------------------------------------------------------------------------------
  1 | #####################################
  2 | ## Bayesian logistic regression demo
  3 | 
  4 | using PyPlot, PyCall
  5 | using Distributions
  6 | push!(LOAD_PATH, ".")
  7 | import LogisticRegression
  8 | 
  9 | """
 10 | Visualize prediction via surface (only for 2D inputs.)
 11 | """
 12 | function visualize_surface(mu, rho, X, Y, text)
 13 |     N = 100
 14 |     R = 100
 15 |     xmin = minimum(X[1,:])
 16 |     xmax = maximum(X[1,:])
 17 |     ymin = minimum(X[2,:])
 18 |     ymax = maximum(X[2,:])
 19 |     lx = xmax - xmin
 20 |     ly = ymax - ymin
 21 |     xmin = xmin - 0.25 * lx
 22 |     xmax = xmax + 0.25 * lx
 23 |     ymin = ymin - 0.25 * ly
 24 |     ymax = ymax + 0.25 * ly
 25 |     
 26 |     x1 = linspace(xmin,xmax,R)
 27 |     x2 = linspace(ymin,ymax,R)
 28 |     x1grid = repmat(x1, 1, R)
 29 |     x2grid = repmat(x2', R, 1)
 30 |     val = [x1grid[:] x2grid[:]]'
 31 | 
 32 |     z_list = []
 33 |     sigma = log.(1 + exp.(rho))
 34 |     for n in 1 : N
 35 |         W = rand(MvNormal(mu, diagm(sigma.^2)))
 36 |         z_tmp = [LogisticRegression.sigmoid(W'*val[:,i]) for i in 1 : size(val, 2)]
 37 |         push!(z_list, z_tmp)
 38 |     end
 39 |     z = mean(z_list)
 40 |     zgrid = reshape(z, R, R)
 41 | 
 42 |     # 3D plot
 43 |     figure("surface")
 44 |     clf()
 45 |     plot_surface(x1grid, x2grid, zgrid, alpha=0.5)
 46 |     scatter3D(X[1,Y.==1], X[2,Y.==1], Y[Y.==1]+0.01, c="r", depthshade=true)
 47 |     scatter3D(X[1,Y.==0], X[2,Y.==0], Y[Y.==0], c="b", depthshade=true)
 48 |     xlim([xmin, xmax])
 49 |     ylim([ymin, ymax])
 50 |     zlim([0, 1])
 51 |     title(text)
 52 | end
 53 | 
 54 | """
 55 | Visualize prediction via contour (only for 2D inputs.)
 56 | """
 57 | function visualize_contour(mu, rho, X, Y)
 58 |     N = 100
 59 |     R = 100
 60 |     xmin = 2*minimum(X[1,:])
 61 |     xmax = 2*maximum(X[1,:])
 62 |     ymin = minimum(X[2,:])
 63 |     ymax = maximum(X[2,:])
 64 | 
 65 |     x1 = linspace(xmin,xmax,R)
 66 |     x2 = linspace(ymin,ymax,R)
 67 |     x1grid = repmat(x1, 1, R)
 68 |     x2grid = repmat(x2', R, 1)
 69 |     val = [x1grid[:] x2grid[:]]'
 70 | 
 71 |     z_list = []
 72 |     W_list = []
 73 |     sigma = log.(1 + exp.(rho))
 74 |     for n in 1 : N
 75 |         W = rand(MvNormal(mu, diagm(sigma.^2)))
 76 |         z_tmp = [LogisticRegression.sigmoid(W'*val[:,i]) for i in 1 : size(val, 2)]
 77 |         push!(W_list, W)
 78 |         push!(z_list, z_tmp)
 79 |     end
 80 |     z = mean(z_list)
 81 |     zgrid = reshape(z, R, R)
 82 | 
 83 |     # precition
 84 |     figure("contour")
 85 |     clf()
 86 |     contour(x1grid, x2grid, zgrid, alpha=0.5, cmap=get_cmap("bwr"))
 87 |     scatter(X[1,Y.==1], X[2,Y.==1], c="r")
 88 |     scatter(X[1,Y.==0], X[2,Y.==0], c="b")
 89 |     xlim([xmin, xmax])
 90 |     ylim([ymin, ymax])
 91 |     title("prediction")
 92 | 
 93 |     # parameter samples
 94 |     figure("samples")
 95 |     clf()
 96 |     for n in 1 : 10
 97 |         draw_line(W_list[n], xmin, xmax)
 98 |     end
 99 |     scatter(X[1,Y.==1]', X[2,Y.==1]', c="r")
100 |     scatter(X[1,Y.==0]', X[2,Y.==0]', c="b")
101 |     xlim([xmin, xmax])
102 |     ylim([ymin, ymax])
103 |     title("parameter samples")
104 | end
105 | 
106 | function draw_line(W, xmin, xmax)
107 |     y1 = - xmin*W[1]/W[2]
108 |     y2 = - xmax*W[1]/W[2]
109 |     plot([xmin, xmax], [y1, y2], c="k")
110 | end
111 | 
112 | 
113 | ########################
114 | # create model
115 | 
116 | M = 2 # input dimension
117 | Sigma_w = 100.0 * eye(M) # prior on W
118 | 
119 | ########################
120 | # create toy-data using prior model
121 | 
122 | N = 50 # num of data points
123 | X = 2 * rand(M, N) - 1.0 # input values
124 | 
125 | # sample observation Y
126 | Y, _ = LogisticRegression.sample_data(X, Sigma_w)
127 | 
128 | ########################
129 | # inference
130 | alpha = 1.0e-4 # learning rate
131 | max_iter = 100000 # VI maximum iterations 
132 | 
133 | # learn variational parameters (mu & rho)
134 | mu, rho = LogisticRegression.VI(Y, X, M, Sigma_w, alpha, max_iter)
135 | 
136 | ########################
137 | # visualize (only for M=2)
138 | visualize_surface(mu, rho, X, Y, "prediction")
139 | visualize_contour(mu, rho, X, Y)
140 | show()
141 | 


--------------------------------------------------------------------------------
/src/demo_NMF.jl:
--------------------------------------------------------------------------------
 1 | ##############################
 2 | ## Audio decomposition demo using NMF
 3 | 
 4 | using PyPlot, PyCall
 5 | using DataFrames
 6 | using Distributions
 7 | 
 8 | push!(LOAD_PATH, ".")
 9 | import NMF
10 | @pyimport scipy.io.wavfile as wf
11 | 
12 | 
13 | # load data
14 | wavfile = "../data/organ.wav"
15 | fs, data = wf.read(wavfile)
16 | 
17 | figure("data")
18 | clf()
19 | Pxx, freqs, t, pl = specgram(data[10000:318000,2], Fs=fs, NFFT=256, noverlap=0)
20 | xlabel("time [sec]")
21 | ylabel("frequency [Hz]")
22 | ylim([0,22000])
23 | 
24 | # model
25 | D, N = size(Pxx)
26 | K = 2
27 | a_t = 1.0
28 | b_t = 1.0
29 | a_v = 1.0
30 | b_v = 100.0
31 | prior = NMF.NMFModel(a_t*ones(D,K), b_t*ones(D, K), a_v, b_v)
32 | 
33 | # inference
34 | max_iter = 100
35 | posterior, S_est, T_est, V_est = NMF.VI(Int64.(round.(Pxx)), prior, max_iter)
36 | X = T_est * V_est
37 | 
38 | # visualize
39 | figure("T")
40 | clf()
41 | for k in 1 : K
42 |     subplot(K,1,k)
43 |     plot(T_est[:,k], linewidth=1.0)
44 |     xlim([0, D])
45 |     ylim([0, ylim()[2]])
46 | end
47 | 
48 | figure("V")
49 | clf()
50 | for k in 1 : K
51 |     subplot(K,1,k)
52 |     plot(V_est[k,:], linewidth=1.0)
53 |     xlim([0,N])
54 |     ylim([0, ylim()[2]])
55 | end
56 | show()
57 | 


--------------------------------------------------------------------------------
/src/demo_PoissonHMM.jl:
--------------------------------------------------------------------------------
 1 | ###################################
 2 | ## Example code
 3 | ## for Bayesian Poisson HMM
 4 | 
 5 | using PyPlot, PyCall
 6 | using HDF5, JLD
 7 | @pyimport matplotlib.gridspec as gspec
 8 | 
 9 | push!(LOAD_PATH,".")
10 | import PoissonHMM
11 | import PoissonMixtureModel
12 | 
13 | """
14 | Simple comparison between HMM and mixture model.
15 | """
16 | function test_comparison()
17 |     #########################
18 |     ## load data
19 |     file_name = "../data/timeseries.jld"
20 |     X = load(file_name)["obs"]
21 |     N = length(X)
22 | 
23 |     #########################
24 |     ## Poison HMM
25 | 
26 |     ## set model
27 |     K = 2 #  number of mixture components
28 |     alpha_phi = 10.0 * ones(K)
29 |     alpha_A = 100.0 * eye(K) + 1.0*ones(K, K)
30 |     cmp = [PoissonHMM.Gam(1.0, 0.01), PoissonHMM.Gam(1.0, 0.01)]
31 |     bhmm = PoissonHMM.BHMM(K, alpha_phi, alpha_A, cmp)
32 |     
33 |     ## inference
34 |     max_iter = 100
35 |     tic()
36 |     Z_est_hmm, post_bhmm = PoissonHMM.learn_VI(X, bhmm, max_iter)
37 |     toc()
38 | 
39 |     #########################
40 |     ## Poison Mixture Model
41 | 
42 |     ## set model
43 |     K = 2 #  number of mixture components
44 |     alpha_phi = 10.0 * ones(K)
45 |     cmp = [PoissonMixtureModel.Gam([1.0], 0.01), PoissonMixtureModel.Gam([1.0], 0.01)]
46 |     bpmm = PoissonMixtureModel.BPMM(1, K, alpha_phi, cmp)
47 |     
48 |     ## inference
49 |     max_iter = 100
50 |     tic()
51 |     Z_est_pmm, post_bpmm = PoissonMixtureModel.learn_VI(reshape(X, 1, N), bpmm, max_iter)
52 |     toc()
53 | 
54 |     #########################
55 |     ## Compare results
56 |     figure("Hidden Markov Model vs Mixture Model")
57 |     subplot(3,1,1);plot(X);ylabel("data")
58 |     subplot(3,1,2);fill_between(1:N, reshape(Z_est_hmm[1,:]', N), zeros(N));ylim([0.0, 1.0]);ylabel("S (PHMM)")
59 |     subplot(3,1,3);fill_between(1:N, reshape(Z_est_pmm[1,:]', N), zeros(N));ylim([0.0, 1.0]);ylabel("S (PMM)")
60 |     show()
61 | end
62 | 
63 | test_comparison()
64 | 


--------------------------------------------------------------------------------
/src/demo_PoissonMixtureModel.jl:
--------------------------------------------------------------------------------
  1 | ###################################
  2 | ## Example code
  3 | ## for Bayesian Poisson Mixture Model
  4 | 
  5 | push!(LOAD_PATH,".")
  6 | using PyPlot, PyCall
  7 | import PoissonMixtureModel
  8 | 
  9 | """
 10 | Visualize data & estimation in 2D space.
 11 | """
 12 | function visualize_2D(X::Matrix{Float64}, S::Matrix{Float64}, S_est::Matrix{Float64}, text)
 13 |     cmp = get_cmap("jet")
 14 | 
 15 |     K1 = size(S, 1)
 16 |     K2 = size(S_est, 1)
 17 |     col1 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K1)]    
 18 |     col2 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K2)]    
 19 | 
 20 |     f, (ax1, ax2) = subplots(1,2,num=text)
 21 |     f[:clf]()
 22 |     f, (ax1, ax2) = subplots(1,2,num=text)
 23 |     
 24 |     for k in 1 : K1
 25 |         ax1[:scatter](X[1, S[k,:].==1], X[2, S[k,:].==1], color=col1[k])
 26 |     end
 27 |     ax1[:set_title]("truth")
 28 |     
 29 |     for k in 1 : K2
 30 |         ax2[:scatter](X[1, S_est[k,:].==1], X[2, S_est[k,:].==1], color=col2[k])
 31 |     end
 32 | 
 33 |     ax2[:set_title]("estimation")
 34 | end
 35 | 
 36 | function draw_hist(ax, X, S, label)
 37 |     counts, bins, patches = ax[:hist](X', 20)
 38 |     for i in 1 : length(patches)
 39 |         if counts[i] > 0
 40 |             S_tmp = S[:,bins[i] .<= X[1,:] .<= bins[i+1]]
 41 |             S_sum = sum(S_tmp, 2) / sum(S_tmp)
 42 |             patches[i][:set_facecolor]((S_sum[1], 0, S_sum[2]))
 43 |         end
 44 |     end
 45 |     ax[:set_title](label)
 46 | end
 47 | 
 48 | """
 49 | Visualize data & estimation using 1D histogram.
 50 | """
 51 | function visualize_1D(X::Matrix{Float64}, S::Matrix{Float64}, S_est::Matrix{Float64})
 52 |     # separated figures
 53 |     f1, ax1 = subplots(1,1,num="observation")
 54 |     f2, ax2 = subplots(1,1,num="estimation")
 55 |     f1[:clf]()
 56 |     f2[:clf]()
 57 |     _, ax1 = subplots(1,1,num="observation")
 58 |     _, ax2 = subplots(1,1,num="estimation")
 59 |     ax1[:hist](X', 20)
 60 |     ax1[:set_title]("observation")
 61 |     draw_hist(ax2, X, S_est, "estimation")    
 62 | end
 63 | 
 64 | """
 65 | Run a test script for 1D data clustering.
 66 | """
 67 | function test_1D()
 68 |     ## set model
 69 |     D = 1 # data dimension, must be 1.
 70 |     K = 2 #  number of mixture components, must be 2.
 71 |     alpha = 100.0 * ones(K)
 72 |     cmp = [PoissonMixtureModel.Gam(1.0*ones(D), 0.01) for i in 1 : K]
 73 |     bpmm = PoissonMixtureModel.BPMM(D, K, alpha, cmp)
 74 |     
 75 |     ## generate data
 76 |     N = 1000
 77 |     pmm = PoissonMixtureModel.sample_PMM(bpmm)
 78 |     X, S = PoissonMixtureModel.sample_data(pmm, N)
 79 |     
 80 |     ## inference
 81 |     max_iter = 100
 82 |     tic()
 83 |     S_est, post_bpmm, VB = PoissonMixtureModel.learn_VI(X, bpmm, max_iter)
 84 |     #S_est, post_bpmm, VB = PoissonMixtureModel.learn_GS(X, bpmm, max_iter)
 85 |     #S_est, post_bpmm, VB = PoissonMixtureModel.learn_CGS(X, bpmm, max_iter)
 86 |     toc()
 87 | 
 88 |     ## plot
 89 |     visualize_1D(X, S, S_est)
 90 | 
 91 |     figure("ELBO")
 92 |     clf()
 93 |     plot(VB)
 94 |     ylabel("ELBO")
 95 |     xlabel("iterations")
 96 |     show()
 97 | end
 98 | 
 99 | """
100 | Run a test script for 2D data clustering.
101 | """
102 | function test_2D()
103 |     ## set model
104 |     D = 2 # data dimension, must be 2.
105 |     K = 8 # number of mixture components
106 |     #K = 5
107 | 
108 |     alpha = 100.0 * ones(K)
109 |     cmp = [PoissonMixtureModel.Gam(1.0*ones(D), 0.01) for i in 1 : K]
110 |     bpmm = PoissonMixtureModel.BPMM(D, K, alpha, cmp)
111 |     
112 |     ## generate data
113 |     N = 300
114 |     pmm = PoissonMixtureModel.sample_PMM(bpmm)
115 |     X, S = PoissonMixtureModel.sample_data(pmm, N)
116 |     
117 |     ## inference
118 |     max_iter = 100
119 |     tic()
120 |     S_est, post_bpmm, VB = PoissonMixtureModel.learn_VI(X, bpmm, max_iter)
121 |     #S_est, post_bpmm, VB = PoissonMixtureModel.learn_GS(X, bpmm, max_iter)
122 |     #S_est, post_bpmm, VB = PoissonMixtureModel.learn_CGS(X, bpmm, max_iter)
123 |     toc()
124 | 
125 |     ## plot
126 |     visualize_2D(X, S, PoissonMixtureModel.winner_takes_all(S_est), "2D plot")
127 | 
128 |     # VB check
129 |     figure("ELBO")
130 |     clf()
131 |     plot(VB)
132 |     ylabel("ELBO")
133 |     xlabel("iterations")
134 |     show()
135 | end
136 | 
137 | test_1D()
138 | #test_2D()
139 | 
140 | 


--------------------------------------------------------------------------------
/src/demo_PolynomialRegression.jl:
--------------------------------------------------------------------------------
 1 | #################################
 2 | ## Bayesian model selection demo
 3 | ## for polynomial regression
 4 | 
 5 | using PyPlot, PyCall
 6 | using Distributions
 7 | 
 8 | function poly(X_raw, M)
 9 |     N = size(X_raw, 1)
10 |     X = zeros(M, N)
11 |     for m in 0 : M - 1
12 |         X[m+1,:] = X_raw.^m
13 |     end
14 |     return X
15 | end
16 | 
17 | function learn_bayes(X_raw, Y, M, sig2_y, Sig_w, X_lin)
18 |     X = poly(X_raw, M)
19 |     N = size(X_raw, 1)
20 |     
21 |     # calc posterior
22 |     Sig_w_h = inv(X*inv(sig2_y*eye(N))*X' + inv(Sig_w))
23 |     mu_w_h = Sig_w_h * (X * inv(sig2_y * eye(N)) * Y)
24 | 
25 |     # calc predictive
26 |     X_test = poly(X_lin, M)
27 |     Y_est = (mu_w_h'*X_test)'
28 |     sig2_y_prd = sig2_y + diag(X_test'Sig_w_h*X_test)
29 |     
30 |     # calc evidence
31 |     evidence = -0.5*(sum(Y)*inv(sig2_y) +N*log.(sig2_y) + N*log.(2*pi)
32 |                      + logdet(Sig_w)
33 |                      - (mu_w_h'*inv(Sig_w_h)*mu_w_h)[1] - logdet(Sig_w_h)
34 |                      )
35 |     return Y_est, sqrt.(sig2_y_prd), evidence
36 | end
37 | 
38 | function test()
39 |     # linspace
40 |     X_lin = linspace(-1, 7, 200)
41 |     
42 |     # generate data
43 |     N = 10
44 |     sig2_y = 0.1
45 |     X = 2*pi*rand(N)
46 |     Y_true = [sin.(x) for x in X_lin]
47 |     Y_obs = [sin.(x) + sig2_y * randn() for x in X]
48 |     
49 |     dims = [1, 2, 3, 4, 5, 10]
50 |     
51 |     # learning via Bayes
52 |     sig2_w = 1.0
53 |     Y_bayes = [learn_bayes(X, Y_obs, m, sig2_y, sig2_w*eye(m), X_lin) for m in dims]
54 | 
55 |     #############
56 |     # compute evidences
57 |     evidence = [learn_bayes(X, Y_obs, m, sig2_y, sig2_w*eye(m), X_lin)[3] for m in dims]
58 |     figure("evidence")
59 |     clf()
60 |     plot(1:length(dims), evidence)
61 |     xticks(1:length(dims),dims)
62 |     ylabel(("\$\\ln p(\\bf{Y}|\\bf{X})\$"), fontsize=20)
63 |     xlabel(("\$M\$"), fontsize=20)
64 |     
65 |     #############
66 |     # visualize
67 |     x_min = X_lin[1]
68 |     x_max = X_lin[end]
69 |     y_min = -4
70 |     y_max = 4
71 |     
72 |     figure("prediction")
73 |     clf()
74 |     for k in 1 : 6
75 |         subplot(230 + k)
76 |         plot(X_lin, Y_bayes[k][1])
77 |         plot(X_lin, Y_bayes[k][1] + Y_bayes[k][2], "c--")
78 |         plot(X_lin, Y_bayes[k][1] - Y_bayes[k][2], "c--")
79 |         plot(X, Y_obs, "ko")
80 |         xlim([x_min, x_max])
81 |         ylim([y_min, y_max])
82 |         text(x_max - 2.5, y_max - 1, @sprintf("M=%d", dims[k]))
83 |     end
84 |     show()
85 | end
86 | 
87 | test()
88 | 
89 | 


--------------------------------------------------------------------------------
/src/demo_Simple2DGauss.jl:
--------------------------------------------------------------------------------
  1 | ###################################
  2 | ## Simple VI & GS for 2D Gaussian
  3 | 
  4 | using PyPlot
  5 | using Distributions
  6 | 
  7 | function calc_KL(mu1, lambda1, mu2, lambda2)
  8 |     D = size(mu1, 1)
  9 |     px_lnqx = 0.5 * logdet(lambda2) - 0.5 * ((mu1 - mu2)' * lambda2 * (mu1 - mu2) + trace(lambda2 * inv(lambda1)))
 10 |     px_lnpx = 0.5 * logdet(lambda1) - 0.5 * D
 11 |     KL = - (px_lnqx - px_lnpx)
 12 |     return KL[1]
 13 | end
 14 | 
 15 | function plot_results(result, truth)
 16 |     N = size(result, 1)
 17 |     H = Int(ceil(sqrt(N)))
 18 |     W = Int(ceil(N / H))
 19 |     for i in 1 : H
 20 |         for j in 1 : W
 21 |             n = (i - 1) * W + j
 22 |             if n <= N
 23 |                 subplot(H, W, n)
 24 |                 title("$n of $N")
 25 |                 plot_gaussian(truth[1], truth[2], "b", "\$p(z)\$")
 26 |                 plot_gaussian(result[n][1], result[n][2], "r", "\$p(z)\$")
 27 |             end
 28 |         end
 29 |     end
 30 | end
 31 | 
 32 | function plot_lines(X)
 33 |     D, N = size(X)
 34 |     X_d = zeros(D, 2*N + 1)
 35 |     X_d[:,1] = X[:,1]
 36 |     for i in 1 : N
 37 |         X_d[1, 2*i - 1] = X[1, i]
 38 |         X_d[1, 2*i] = X[1, i]
 39 |         X_d[2, 2*i] = X[2, i]
 40 |         X_d[2, 2*i + 1] = X[2, i]
 41 |     end
 42 |     plot(X[1,:], X[2,:], "oy")
 43 |     plot(X_d[1,1:2*N], X_d[2,1:2*N], "--y")
 44 | end
 45 | 
 46 | function plot_gaussian(Mu, Sigma, col, label)
 47 |     res = 100
 48 |     plot(Mu[1], Mu[2], "x", color=col)
 49 |     
 50 |     F = eigfact(Sigma)
 51 |     vec = F.vectors
 52 |     val = F.values
 53 |     dw = 2*pi/res
 54 |     w = dw * (0 : res)
 55 |     
 56 |     c = 1.0
 57 |     a = sqrt(c*val[1])
 58 |     b = sqrt(c*val[2])
 59 |     P1 = a*cos.(w)
 60 |     P2 = b*sin.(w)
 61 |     P = Mu .+ vec'*vcat(P1', P2')
 62 |     plot(P[1, :], P[2, :], "-", color=col, label=label)
 63 | end
 64 | 
 65 | """
 66 | Variational inference for 2D Gauss.
 67 | """
 68 | function main_VI()
 69 |     ## creat truth distribution
 70 |     D = 2 # dimension
 71 |     theta = 2.0*pi/12 # tilt
 72 |     A = reshape([cos.(theta), -sin.(theta),
 73 |                  sin.(theta), cos.(theta)],
 74 |                 2, 2)
 75 |     mu = [0.0, 0.0]
 76 |     lambda = inv(A * inv(reshape([1,0,0,10], 2, 2)) * A')
 77 |     
 78 |     ## initialize
 79 |     #mu_h = randn(D)
 80 |     mu_h = [-0.5, 0.3]
 81 |     lambda_h = zeros(D,D)
 82 |     
 83 |     ## main iteration
 84 |     max_iter = 10
 85 |     KL = NaN * Array{Float64, 1}(max_iter)
 86 |     result = Array{Any, 1}(max_iter)
 87 |     for i in 1 : max_iter
 88 |         ## update
 89 |         mu_h[1] = mu[1] - inv(lambda[1,1])*lambda[1,2] * (mu_h[2] - mu[2])
 90 |         
 91 |         lambda_h[1,1] = lambda[1,1]
 92 |         mu_h[2] = mu[2] - inv(lambda[2,2])*lambda[2,1] * (mu_h[1] - mu[1])
 93 |         lambda_h[2,2] = lambda[2,2]
 94 |         
 95 |         ## calculate KL divergeince
 96 |         KL[i] = calc_KL(mu_h, lambda_h, mu, lambda)
 97 | 
 98 |         ## store the results
 99 |         result[i] = [deepcopy(mu_h), deepcopy(inv(lambda_h))]
100 |     end
101 | 
102 |     ## visualize results
103 |     figure("result per iteration (VI)")
104 |     clf()
105 |     plot_results(result, (mu, inv(lambda)))
106 | 
107 |     figure("result (VI)")
108 |     clf()
109 |     plot_gaussian(mu, inv(lambda), "b", "\$p(\\bf{z})\$")
110 |     plot_gaussian(result[end][1], result[end][2], "r", "\$q(\\bf{z})\$")
111 |     xlabel("\$z_1\$", fontsize=20)
112 |     ylabel("\$z_2\$", fontsize=20)
113 |     legend(fontsize=16)
114 |     
115 |     figure("KL divergence (VI)")
116 |     clf()
117 |     plot(1:max_iter, KL)
118 |     ylabel("KL divergence", fontsize=16)
119 |     xlabel("iteration", fontsize=16)
120 |     show()
121 | end
122 | 
123 | """
124 | Gibbs sampling for 2D Gauss.
125 | """
126 | function main_GS()
127 |     ## creat truth distribution
128 |     D = 2 # dimension
129 |     theta = 2.0*pi/12 # tilt
130 |     A = reshape([cos.(theta), -sin.(theta),
131 |                  sin.(theta), cos.(theta)],
132 |                 2, 2)
133 |     mu = [0.0, 0.0]
134 |     #lambda = inv(A * inv(reshape([1,0,0,10], 2, 2)) * A')
135 |     lambda = inv(A * inv(reshape([1,0,0,100], 2, 2)) * A')
136 | 
137 |     ## initialize
138 |     #max_iter = 1000
139 |     max_iter = 50
140 |     X = randn(D, max_iter)
141 |     mu_h = randn(D)
142 |     
143 |     ## main iteration
144 |     KL = NaN * Array{Float64, 1}(max_iter)
145 |     for i in 2 : max_iter
146 |         ## update
147 |         mu_h[1] = mu[1] - inv(lambda[1,1])*lambda[1,2] * (X[2,i-1] - mu[2])
148 |         X[1, i] = rand(Normal(mu_h[1], sqrt(inv(lambda[1,1]))))
149 |         
150 |         mu_h[2] = mu[2] - inv(lambda[2,2])*lambda[2,1] * (X[1,i] - mu[1])
151 |         X[2, i] = rand(Normal(mu_h[2], sqrt(inv(lambda[2,2]))))        
152 |         
153 |         if i > D
154 |             KL[i] = calc_KL(mean(X[:,1:i], 2), inv(cov(X[:,1:i], 2)), mu, lambda)
155 |         end
156 |     end
157 |     
158 |     ## visualize results
159 |     expt_mu = mean(X, 2)
160 |     expt_Sigma = cov(X, 2)
161 | 
162 |     figure("samples (GS)")
163 |     clf()
164 |     plot_lines(X)
165 |     plot_gaussian(mu, inv(lambda), "b", "\$p(\\bf{z})\$")
166 |     plot_gaussian(expt_mu, expt_Sigma, "r", "\$q(\\bf{z})\$")
167 |     xlabel("\$z_1\$", fontsize=20)
168 |     ylabel("\$z_2\$", fontsize=20)
169 |     legend(fontsize=16)
170 |     
171 |     figure("KL divergence (GS)")
172 |     clf()
173 |     plot(1:max_iter, KL)
174 |     ylabel("KL divergence", fontsize=16)
175 |     xlabel("sample size", fontsize=16)
176 |     show()
177 | end
178 | 
179 | main_VI()
180 | main_GS()
181 | 


--------------------------------------------------------------------------------
/src/demo_SimpleFitting.jl:
--------------------------------------------------------------------------------
 1 | #####################################
 2 | ## Simple function fitting demo
 3 | 
 4 | using PyPlot, PyCall
 5 | using Distributions
 6 | 
 7 | # true param
 8 | W = Array([1.0, 0.0, 1.0])
 9 | 
10 | # generate data
11 | sigma = 0.5
12 | N = 20
13 | X = linspace(-0.4,2.4,N)
14 | Y = [W[1] + W[2]*x + W[3]*x^2 + sigma*randn() for x in X]
15 | X_min = minimum(X)
16 | X_max = maximum(X)
17 | 
18 | # regression1
19 | X_all = linspace(X_min, X_max, 100)
20 | W1 = sum(Y.*X) / sum(X.^2)
21 | Y1 = [W1*x for x in X_all]
22 | 
23 | # regression2
24 | X2 = zeros(3, N)
25 | X2[1,:] = 1
26 | X2[2,:] = X
27 | X2[3,:] = X.^2
28 | W2 = inv(X2*X2') * X2*Y
29 | Y2 = [W2[1] + W2[2]*x + W2[3]*x^2 for x in X_all]
30 | 
31 | # show data
32 | figure()
33 | plot(X_all, Y1, "b-")
34 | plot(X_all, Y2, "g-")
35 | plot(X, Y, "ko")
36 | legend(["model1","model2","data"], loc="upper left", fontsize=16)
37 | xlabel("\$x\$", fontsize=20)
38 | ylabel("\$y\$", fontsize=20)
39 | show()
40 | 


--------------------------------------------------------------------------------
/src/demo_nonconjugate.jl:
--------------------------------------------------------------------------------
 1 | 
 2 | using PyPlot, PyCall
 3 | using Distributions
 4 | import StatsFuns.logsumexp
 5 | PyDict(matplotlib["rcParams"])["mathtext.fontset"] = "cm"
 6 | PyDict(matplotlib["rcParams"])["mathtext.rm"] = "serif"
 7 | PyDict(matplotlib["rcParams"])["lines.linewidth"] = 1.5
 8 | PyDict(matplotlib["rcParams"])["font.family"] = "TakaoPGothic"
 9 | 
10 | function expt(a, b, sigma, Y, X, N_s)
11 |     S = rand(Gamma(a, 1.0/b), N_s)
12 |     C = mean([exp(sum(logpdf.(Normal(s, sigma), Y))) for s in S])
13 |     curve = [exp(sum(logpdf.(Normal(mu, sigma), Y))) * pdf(Gamma(a, 1.0/b), mu) for mu in X]
14 |     m = mean([s*exp(sum(logpdf.(Normal(s, sigma), Y)))/C for s in S])
15 |     v = mean([(s-m)^2 * exp(sum(logpdf.(Normal(s, sigma), Y)))/C for s in S])
16 |     return curve/C, m, v
17 | end
18 | 
19 | X = linspace(-5, 10, 1000)
20 | 
21 | a = 2.0
22 | b = 2.0
23 | mu = 1.0
24 | sigma=1.0
25 | 
26 | # data
27 | N = 10
28 | Y = rand(Normal(mu, sigma), N)
29 | 
30 | # calc posterior
31 | N_s = 100000
32 | posterior, m, v = expt(a, b, sigma, Y, X, N_s)
33 | 
34 | a_h = m^2 / v
35 | b_h = m / v
36 | 
37 | figure()
38 | plot(X, pdf(Normal(mu,sigma), X))
39 | plot(X, pdf(Gamma(a,1.0/b), X))
40 | plot(X, posterior)
41 | plot(X, pdf(Gamma(a_h,1.0/b_h), X))
42 | plot(Y, 0.02*ones(N), "o")
43 | legend(["generator", "prior", "posterior", "approx", "samples"])
44 | #legend(["データ生成分布", "事前分布", "事後分布", "近似分布", "データ"], fontsize=12)
45 | xlim([-3, 6])
46 | ylim([0, 1.8])
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------