├── LICENSE ├── README.md ├── data ├── organ.wav └── timeseries.jld ├── docker ├── Dockerfile ├── README.md └── add_packages.jl ├── pdf ├── seigo.pdf └── seigo_v4.pdf └── src ├── BayesNeuralNet.jl ├── DimensionalityReduction.jl ├── GaussianMixtureModel.jl ├── LogisticRegression.jl ├── NMF.jl ├── PoissonHMM.jl ├── PoissonMixtureModel.jl ├── demo_BayesNeuralNet.jl ├── demo_DimensionalityReduction.jl ├── demo_GaussianMixtureModel.jl ├── demo_LogisticRegression.jl ├── demo_NMF.jl ├── demo_PoissonHMM.jl ├── demo_PoissonMixtureModel.jl ├── demo_PolynomialRegression.jl ├── demo_Simple2DGauss.jl ├── demo_SimpleFitting.jl └── demo_nonconjugate.jl /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Sammy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BayesBook 2 | 3 | 「機械学習スタートアップシリーズ ベイズ推論による機械学習入門」のソースコードをアップしています。 4 | * http://www.kspub.co.jp/book/detail/1538320.html 5 | * 正誤表(第1~3刷まで) https://github.com/sammy-suyama/BayesBook/blob/master/pdf/seigo.pdf 6 | * 正誤表(第4刷まで) https://github.com/sammy-suyama/BayesBook/blob/master/pdf/seigo_v4.pdf 7 | 8 | ソースコードはJuliaで書かれています。(推奨Vesion:0.6.0) 9 | * The Julia Language: http://julialang.org/ 10 | * Julia Documentation: http://docs.julialang.org/ 11 | 12 | グラフの描画やテストデータのダウンロードに一部Pythonライブラリを利用しています。 13 | * Python: https://www.python.org/ 14 | * Matplotlib: https://matplotlib.org/ 15 | * scikit-learn: http://scikit-learn.org/ 16 | 17 | 上記の環境構築が煩わしい場合にはDockerfileも用意しています. 18 | * Docker: https://docs.docker.com/ 19 | -------------------------------------------------------------------------------- /data/organ.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sammy-suyama/BayesBook/61cb7ee0f1df348ef502aa183f23c7bc9753d02a/data/organ.wav -------------------------------------------------------------------------------- /data/timeseries.jld: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sammy-suyama/BayesBook/61cb7ee0f1df348ef502aa183f23c7bc9753d02a/data/timeseries.jld -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:latest 2 | 3 | # Update 4 | RUN apt-get update 5 | 6 | # Install matplotlib 7 | RUN pip3 install matplotlib scipy scikit-learn notebook 8 | 9 | # Install libraries 10 | RUN apt-get install -y sudo hdf5-tools libzmq3 11 | 12 | # Install julia 0.6.0 13 | RUN wget https://julialang-s3.julialang.org/bin/linux/x64/0.6/julia-0.6.0-linux-x86_64.tar.gz && \ 14 | tar -xzf julia-0.6.0-linux-x86_64.tar.gz && \ 15 | ln -s /julia-903644385b/bin/julia /usr/local/bin/julia 16 | 17 | # Set the working directory to /work 18 | WORKDIR /work 19 | 20 | # Add julia packages 21 | ADD add_packages.jl /work 22 | RUN julia add_packages.jl 23 | 24 | # Download source codes 25 | RUN git clone https://github.com/sammy-suyama/BayesBook.git 26 | 27 | # Make port 8888 available to the world outside this container 28 | EXPOSE 8888 29 | 30 | # Start jupyter notebook 31 | CMD jupyter notebook --allow-root --port=8888 --ip=0.0.0.0 32 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # DockerからJupyter notebookを実行する 2 | 3 | JuliaやPythonの実行環境構築が煩わしい場合は、Dockerを使ってデモスクリプトをJupyter notebook上で動作させることができます。 4 | Dockerのインストールに関しては公式サイトを参考ください。 5 | * https://docs.docker.com/engine/installation/ 6 | 7 | `Dockerfile`の置いてあるディレクトリで、イメージを作成・実行します。 8 | 9 | $ docker build -t bayesbook . 10 | $ docker run -p 8888:8888 bayesbook 11 | 12 | -------------------------------------------------------------------------------- /docker/add_packages.jl: -------------------------------------------------------------------------------- 1 | 2 | Pkg.update() 3 | Pkg.add("PyPlot") 4 | Pkg.add("StatsFuns") 5 | Pkg.add("SpecialFunctions") 6 | Pkg.add("Distributions") 7 | Pkg.add("PDMats") 8 | Pkg.add("ProgressMeter") 9 | Pkg.add("DataFrames") 10 | Pkg.add("HDF5") 11 | Pkg.add("JLD") 12 | Pkg.add("IJulia") 13 | -------------------------------------------------------------------------------- /pdf/seigo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sammy-suyama/BayesBook/61cb7ee0f1df348ef502aa183f23c7bc9753d02a/pdf/seigo.pdf -------------------------------------------------------------------------------- /pdf/seigo_v4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sammy-suyama/BayesBook/61cb7ee0f1df348ef502aa183f23c7bc9753d02a/pdf/seigo_v4.pdf -------------------------------------------------------------------------------- /src/BayesNeuralNet.jl: -------------------------------------------------------------------------------- 1 | """ 2 | Variational inference for Bayesian neural network 3 | """ 4 | module BayesNeuralNet 5 | using Distributions 6 | 7 | export sample_data_from_prior, sample_data_from_posterior 8 | export VI 9 | 10 | function sigmoid(x) 11 | return 1.0 / (1.0 + exp.(-x[1])) 12 | end 13 | 14 | function rho2sig(rho) 15 | return log.(1 + exp.(rho)) 16 | end 17 | 18 | function compute_df_dmu(mu, rho, W) 19 | return (W - mu) ./ rho2sig(rho).^2 20 | end 21 | 22 | function compute_df_drho(Y, X, mu, rho, W) 23 | return -0.5*((W - mu).^2 - rho2sig(rho).^2) .* compute_dprec_drho(rho) 24 | end 25 | 26 | function compute_dprec_drho(rho) 27 | return 2 * rho2sig(rho) .^ (-3) .* (1 ./ (1+exp.(rho))).^2 .* (1 ./ (1+exp.(-rho))) 28 | end 29 | 30 | function compute_df_dw(Y, X, sigma2_y, sigma2_w, mu1, rho1, W1, mu2, rho2, W2) 31 | M, N = size(X) 32 | Y_err1 = zeros(size(W1)) # MxK 33 | Y_err2 = zeros(size(W2)) # KxD 34 | 35 | for n in 1 : N 36 | Z = tanh.(W1'*X[:,n]) # Kx1 37 | Y_est = W2'*Z 38 | # 2nd unit, Dx1 39 | delta2 = Y_est - Y[n] 40 | 41 | # 1st unit, KxD 42 | delta1 = diagm(1 - Z.^2) * W2 * delta2 43 | 44 | Y_err1 += X[:,n] * delta1' 45 | Y_err2 += Z * delta2' 46 | end 47 | df_dw1 = W1/sigma2_w + (mu1 - W1) ./ rho2sig(rho1).^2 + Y_err1 / sigma2_y 48 | df_dw2 = W2/sigma2_w + (mu2 - W2) ./ rho2sig(rho2).^2 + Y_err2 / sigma2_y 49 | return df_dw1, df_dw2 50 | end 51 | 52 | """ 53 | Sample data given prior and inputs. 54 | """ 55 | function sample_data_from_prior(X, sigma2_w, sigma2_y, D, K) 56 | M, N = size(X) 57 | 58 | W1 = sqrt(sigma2_w) * randn(M, K) 59 | W2 = sqrt(sigma2_w) * randn(K, D) 60 | 61 | # sample function 62 | Y = [W2'* tanh.(W1'X[:,n]) for n in 1 : N] 63 | 64 | # sample data 65 | Y_obs = [W2'* tanh.(W1'X[:,n]) + sqrt(sigma2_y)*randn(D) for n in 1 : N] 66 | 67 | return Y_obs, Y, W1, W2 68 | end 69 | 70 | """ 71 | Sample data given posterior and inputs. 72 | """ 73 | function sample_data_from_posterior(X, mu1, rho1, mu2, rho2, sigma2_y, D) 74 | N = size(X, 2) 75 | ep1 = randn(size(mu1)) 76 | W1_tmp = mu1 + log.(1 + exp.(rho1)) .* ep1 77 | ep2 = randn(size(mu2)) 78 | W2_tmp = mu2 + log.(1 + exp.(rho2)) .* ep2 79 | Y_est = [W2_tmp'* tanh.(W1_tmp'X[:,n]) for n in 1 : N] 80 | Y_obs = [W2_tmp'* tanh.(W1_tmp'X[:,n]) + sqrt(sigma2_y)*randn(D) for n in 1 : N] 81 | return Y_est, Y_obs 82 | end 83 | 84 | """ 85 | Compute variational parameters. 86 | """ 87 | function VI(Y, X, sigma2_w, sigma2_y, K, alpha, max_iter) 88 | M, N = size(X) 89 | D = length(Y[1]) 90 | 91 | # initialize 92 | mu1 = randn(M, K) 93 | rho1 = randn(M, K) 94 | mu2 = randn(K, D) 95 | rho2 = randn(K, D) 96 | 97 | for i in 1 : max_iter 98 | # sample 99 | ep1 = randn(size(mu1)) 100 | W1_tmp = mu1 + log.(1 + exp.(rho1)) .* ep1 101 | ep2 = randn(size(mu2)) 102 | W2_tmp = mu2 + log.(1 + exp.(rho2)) .* ep2 103 | 104 | # calc error 105 | df_dw1, df_dw2 = compute_df_dw(Y, X, sigma2_y, sigma2_w, mu1, rho1, W1_tmp, mu2, rho2, W2_tmp) 106 | 107 | # 1st unit 108 | df_dmu1 = compute_df_dmu(mu1, rho1, W1_tmp) 109 | df_drho1 = compute_df_drho(Y, X, mu1, rho1, W1_tmp) 110 | d_mu1 = df_dw1 + df_dmu1 111 | d_rho1 = df_dw1 .* (ep1 ./ (1+exp.(-rho1))) + df_drho1 112 | mu1 = mu1 - alpha * d_mu1 113 | rho1 = rho1 - alpha * d_rho1 114 | 115 | # 2nd unit 116 | df_dmu2 = compute_df_dmu(mu2, rho2, W2_tmp) 117 | df_drho2 = compute_df_drho(Y, X, mu2, rho2, W2_tmp) 118 | d_mu2 = df_dw2 + df_dmu2 119 | d_rho2 = df_dw2 .* (ep2 ./ (1+exp.(-rho2))) + df_drho2 120 | mu2 = mu2 - alpha * d_mu2 121 | rho2 = rho2 - alpha * d_rho2 122 | end 123 | return mu1, rho1, mu2, rho2 124 | end 125 | 126 | end 127 | 128 | -------------------------------------------------------------------------------- /src/DimensionalityReduction.jl: -------------------------------------------------------------------------------- 1 | """ 2 | Variational inference for Bayesian DimensionalityReduction 3 | """ 4 | module DimensionalityReduction 5 | 6 | using Distributions 7 | #using ProgressMeter 8 | 9 | export DRModel 10 | export sample_data, VI 11 | 12 | #################### 13 | ## Types 14 | struct DRModel 15 | D::Int 16 | M::Int 17 | sigma2_y::Float64 18 | m_W::Array{Float64, 2} # MxD 19 | Sigma_W::Array{Float64, 3} # MxMxD 20 | m_mu::Array{Float64, 1} # D 21 | Sigma_mu::Array{Float64, 2} # DxD 22 | end 23 | 24 | #################### 25 | ## functions 26 | function sqsum(mat::Array{Float64}, idx::Int) 27 | return squeeze(sum(mat, idx), idx) 28 | end 29 | 30 | """ 31 | Sample data given hyperparameters. 32 | """ 33 | function sample_data(N::Int, model::DRModel) 34 | D = model.D 35 | M = model.M 36 | W = zeros(M, D) 37 | mu = zeros(D) 38 | for d in 1 : D 39 | W[:,d] = rand(MvNormal(model.m_W[:,d], model.Sigma_W[:,:,d])) 40 | end 41 | mu = rand(MvNormal(model.m_mu, model.Sigma_mu)) 42 | 43 | Y = zeros(D, N) 44 | X = randn(M, N) 45 | for n in 1 : N 46 | Y[:,n] = rand(MvNormal(W'*X[:,n] + mu, model.sigma2_y*eye(D))) 47 | end 48 | return Y, X, W, mu 49 | end 50 | 51 | function init(Y::Array{Float64, 2}, prior::DRModel) 52 | M = prior.M 53 | D, N = size(Y) 54 | X = randn(M, N) 55 | XX = zeros(M, M, N) 56 | for n in 1 : N 57 | XX[:,:,n] = X[:,n]*X[:,n]' + eye(M) 58 | end 59 | return X, XX 60 | end 61 | 62 | function update_W(Y::Array{Float64, 2}, prior::DRModel, posterior::DRModel, 63 | X::Array{Float64, 2}, XX::Array{Float64, 3}) 64 | D = prior.D 65 | M = prior.M 66 | N = size(Y, 2) 67 | m_W = zeros(M, D) 68 | Sigma_W = zeros(M, M, D) 69 | mu = posterior.m_mu 70 | for d in 1 : D 71 | Sigma_W[:,:,d] = inv(inv(prior.sigma2_y)*sqsum(XX, 3) + inv(prior.Sigma_W[:,:,d])) 72 | m_W[:,d] = Sigma_W[:,:,d]*(inv(prior.sigma2_y)*X*(Y[[d],:] - mu[d]*ones(1, N))' 73 | + inv(prior.Sigma_W[:,:,d])*prior.m_W[:,d]) 74 | end 75 | return DRModel(D, M, prior.sigma2_y, m_W, Sigma_W, posterior.m_mu, posterior.Sigma_mu) 76 | end 77 | 78 | function update_mu(Y::Array{Float64, 2}, prior::DRModel, posterior::DRModel, 79 | X::Array{Float64, 2}, XX::Array{Float64, 3}) 80 | N = size(Y, 2) 81 | D = prior.D 82 | M = prior.M 83 | W = posterior.m_W 84 | Sigma_mu = inv(N*inv(prior.sigma2_y)*eye(D) + inv(prior.Sigma_mu)) 85 | m_mu = Sigma_mu*(inv(prior.sigma2_y)*sqsum(Y - W'*X, 2) + inv(prior.Sigma_mu)*prior.m_mu) 86 | return DRModel(D, M, prior.sigma2_y, posterior.m_W, posterior.Sigma_W, m_mu, Sigma_mu) 87 | end 88 | 89 | function update_X(Y::Array{Float64, 2}, posterior::DRModel) 90 | D, N = size(Y) 91 | M = posterior.M 92 | 93 | W = posterior.m_W 94 | WW = zeros(M, M, D) 95 | for d in 1 : D 96 | WW[:,:,d] = W[:,d]*W[:,d]' + posterior.Sigma_W[:,:,d] 97 | end 98 | mu = posterior.m_mu 99 | X = zeros(M, N) 100 | XX = zeros(M, M, N) 101 | for n in 1 : N 102 | Sigma = inv(inv(posterior.sigma2_y)*sqsum(WW, 3) + eye(M)) 103 | X[:,n] = inv(posterior.sigma2_y)*Sigma*W*(Y[:,n] - mu) 104 | XX[:,:,n] = X[:,n] * X[:,n]' + Sigma 105 | end 106 | return X, XX 107 | end 108 | 109 | function interpolate(mask::BitArray{2}, X::Array{Float64, 2}, posterior::DRModel) 110 | Y_est = posterior.m_W'*X + repmat(posterior.m_mu, 1, size(X, 2)) 111 | return return Y_est[mask] 112 | end 113 | 114 | """ 115 | Compute variational posterior distributions. 116 | """ 117 | function VI(Y::Array{Float64, 2}, prior::DRModel, max_iter::Int) 118 | X, XX = init(Y, prior) 119 | mask = isnan.(Y) 120 | sum_nan = sum(mask) 121 | posterior = deepcopy(prior) 122 | 123 | #progress = Progress(max_iter) 124 | for iter in 1 : max_iter 125 | # progress 126 | #next!(progress) 127 | 128 | # Interpolate 129 | if sum_nan > 0 130 | Y[mask] = interpolate(mask, X, posterior) 131 | end 132 | 133 | # M-step 134 | posterior = update_W(Y, prior, posterior, X, XX) 135 | posterior = update_mu(Y, prior, posterior, X, XX) 136 | 137 | # E-step 138 | X, XX = update_X(Y, posterior) 139 | end 140 | 141 | return posterior, X 142 | end 143 | 144 | end 145 | -------------------------------------------------------------------------------- /src/GaussianMixtureModel.jl: -------------------------------------------------------------------------------- 1 | """ 2 | Bayesian Gaussian Mixture Model 3 | """ 4 | module GaussianMixtureModel 5 | using StatsFuns.logsumexp, SpecialFunctions.digamma 6 | using Distributions 7 | using PDMats 8 | 9 | export GW, BGMM, Gauss, GMM 10 | export sample_GMM, sample_data, winner_takes_all 11 | export learn_GS, learn_CGS, learn_VI 12 | 13 | #################### 14 | ## Types 15 | struct GW 16 | # Parameters of Gauss Wisahrt distribution 17 | beta::Float64 18 | m::Vector{Float64} 19 | nu::Float64 20 | W::Matrix{Float64} 21 | end 22 | 23 | struct BGMM 24 | # Parameters of Bayesian Gaussian Mixture Model 25 | D::Int 26 | K::Int 27 | alpha::Vector{Float64} 28 | cmp::Vector{GW} 29 | end 30 | 31 | struct Gauss 32 | # Parameters of Gauss Distribution 33 | mu::Vector{Float64} 34 | Lambda::Matrix{Float64} 35 | end 36 | 37 | struct GMM 38 | # Parameters of Gauss Mixture Model 39 | D::Int 40 | K::Int 41 | phi::Vector{Float64} 42 | cmp::Vector{Gauss} 43 | end 44 | 45 | #################### 46 | ## Common functions 47 | """ 48 | Sample a GMM given hyperparameters. 49 | """ 50 | function sample_GMM(bgmm::BGMM) 51 | cmp = Vector{Gauss}() 52 | for c in bgmm.cmp 53 | Lambda = rand(Wishart(c.nu, PDMats.PDMat(Symmetric(c.W)))) 54 | mu = rand(MvNormal(c.m, PDMats.PDMat(Symmetric(inv(c.beta*Lambda))))) 55 | push!(cmp, Gauss(mu, Lambda)) 56 | end 57 | phi = rand(Dirichlet(bgmm.alpha)) 58 | return GMM(bgmm.D, bgmm.K, phi, cmp) 59 | end 60 | 61 | """ 62 | Sample data from a specific GMM model. 63 | """ 64 | function sample_data(gmm::GMM, N::Int) 65 | X = zeros(gmm.D, N) 66 | S = categorical_sample(gmm.phi, N) 67 | for n in 1 : N 68 | k = indmax(S[:, n]) 69 | X[:,n] = rand(MvNormal(gmm.cmp[k].mu, PDMats.PDMat(Symmetric(inv(gmm.cmp[k].Lambda))))) 70 | end 71 | return X, S 72 | end 73 | 74 | categorical_sample(p::Vector{Float64}) = categorical_sample(p, 1)[:,1] 75 | function categorical_sample(p::Vector{Float64}, N::Int) 76 | K = length(p) 77 | S = zeros(K, N) 78 | S_tmp = rand(Categorical(p), N) 79 | for k in 1 : K 80 | S[k,find(S_tmp.==k)] = 1 81 | end 82 | return S 83 | end 84 | 85 | function sumdigamma(nu, D) 86 | ret = 0.0 87 | for d in 1 : D 88 | ret += digamma.(0.5*(nu + 1 - d)) 89 | end 90 | return ret 91 | end 92 | 93 | function init_S(X::Matrix{Float64}, bgmm::BGMM) 94 | N = size(X, 2) 95 | K = bgmm.K 96 | S = categorical_sample(ones(K)/K, N) 97 | return S 98 | end 99 | 100 | function calc_ELBO(X::Array{Float64, 2}, pri::BGMM, pos::BGMM) 101 | function logCw(nu, W) 102 | D = size(W, 1) 103 | return -0.5*nu*logdet(W) - 0.5*nu*D*log.(2) - 0.25*D*(D-1)*log.(pi) - sum([lgamma.(0.5*(nu+1-d)) for d in 1 : D]) 104 | end 105 | 106 | ln_expt_S = update_S(pos, X) 107 | expt_S = exp.(ln_expt_S) 108 | K, N = size(expt_S) 109 | D = size(X, 1) 110 | 111 | expt_ln_lkh = 0 112 | for k in 1 : K 113 | expt_Lambda = pos.cmp[k].nu * pos.cmp[k].W 114 | expt_Lambda_mu = pos.cmp[k].nu * pos.cmp[k].W * pos.cmp[k].m 115 | expt_mu_Lambda_mu = (pos.cmp[k].nu * pos.cmp[k].m' * pos.cmp[k].W * pos.cmp[k].m)[1] + D/pos.cmp[k].beta 116 | expt_ln_Lambda = sumdigamma(pos.cmp[k].nu, D) + D*log.(2) + logdet(pos.cmp[k].W) 117 | expt_ln_pi = digamma.(pos.alpha) - digamma.(sum(pos.alpha)) 118 | for n in 1 : N 119 | # 120 | expt_ln_lkh += -0.5 * expt_S[k,n]*(trace(X[:,n]*X[:,n]'*expt_Lambda) 121 | - 2*(X[:,n]'*expt_Lambda_mu)[1] 122 | + expt_mu_Lambda_mu 123 | - expt_ln_Lambda 124 | + D * log.(2*pi) 125 | ) 126 | # 127 | expt_ln_lkh += expt_S[k,n]*expt_ln_pi[k] 128 | end 129 | end 130 | # - 131 | expt_ln_lkh -= sum(expt_S.*ln_expt_S) 132 | 133 | KL_mu_Lambda = [(0.5*D*(log.(pos.cmp[k].beta) - log.(pri.cmp[k].beta) + pri.cmp[k].beta/pos.cmp[k].beta - pos.cmp[k].nu - 1) 134 | + 0.5*(pos.cmp[k].nu-pri.cmp[k].nu)*(sumdigamma(pos.cmp[k].nu, D) + D*log.(2) + logdet(pos.cmp[k].W)) 135 | + logCw(pos.cmp[k].nu, pos.cmp[k].W) - logCw(pri.cmp[k].nu, pri.cmp[k].W) 136 | + 0.5*pos.cmp[k].nu*trace((pri.cmp[k].beta*(pos.cmp[k].m-pri.cmp[k].m)*(pos.cmp[k].m-pri.cmp[k].m)' 137 | +inv(pri.cmp[k].W))*pos.cmp[k].W)) for k in 1 : K] 138 | 139 | KL_pi = (lgamma.(sum(pos.alpha)) - lgamma.(sum(pri.alpha)) 140 | - sum(lgamma.(pos.alpha)) + sum(lgamma.(pri.alpha)) 141 | + (pos.alpha - pri.alpha)' * (digamma.(pos.alpha) - digamma.(sum(pos.alpha))) 142 | )[1] 143 | 144 | VB = expt_ln_lkh - (sum(KL_mu_Lambda) + KL_pi) 145 | return VB 146 | end 147 | 148 | function add_stats(bgmm::BGMM, X::Matrix{Float64}, S::Matrix{Float64}) 149 | D = bgmm.D 150 | K = bgmm.K 151 | sum_S = sum(S, 2) 152 | alpha = [bgmm.alpha[k] + sum_S[k] for k in 1 : K] 153 | cmp = Vector{GW}() 154 | 155 | XS = X*S'; 156 | for k in 1 : K 157 | beta = bgmm.cmp[k].beta + sum_S[k] 158 | m = (1.0/beta)*(vec(X*S[[k],:]') + bgmm.cmp[k].beta*bgmm.cmp[k].m) 159 | nu = bgmm.cmp[k].nu + sum_S[k] 160 | W = inv(X*diagm(S[k,:])*X' 161 | - beta*m*m' 162 | + bgmm.cmp[k].beta*bgmm.cmp[k].m*bgmm.cmp[k].m' 163 | + inv(bgmm.cmp[k].W)) 164 | 165 | push!(cmp, GW(beta, m, nu, W)) 166 | end 167 | return BGMM(D, K, alpha, cmp) 168 | end 169 | 170 | remove_stats(bgmm::BGMM, X::Matrix{Float64}, S::Matrix{Float64}) = add_stats(bgmm, X, -S) 171 | 172 | #################### 173 | ## used for Variational Inference 174 | function update_S(bgmm::BGMM, X::Matrix{Float64}) 175 | D, N = size(X) 176 | K = bgmm.K 177 | ln_S = zeros(K, N) 178 | tmp = zeros(K) 179 | 180 | tmp = NaN * zeros(K) 181 | sum_digamma_tmp = digamma.(sum(bgmm.alpha)) 182 | for k in 1 : K 183 | tmp[k] = -0.5*(bgmm.cmp[k].nu*trace(bgmm.cmp[k].m*bgmm.cmp[k].m'*bgmm.cmp[k].W) 184 | + D*(1.0/bgmm.cmp[k].beta) 185 | - (sumdigamma(bgmm.cmp[k].nu, D) + logdet(bgmm.cmp[k].W))) 186 | tmp[k] += digamma.(bgmm.alpha[k]) - sum_digamma_tmp 187 | end 188 | for n in 1 : N 189 | tmp_ln_pi = NaN * zeros(K) 190 | for k in 1 : K 191 | tmp_ln_pi[k] = tmp[k] -0.5*bgmm.cmp[k].nu*trace((X[:,n]*X[:,n]' - 2*bgmm.cmp[k].m*X[:,n]')*bgmm.cmp[k].W) 192 | end 193 | ln_S[:,n] = tmp_ln_pi - logsumexp(tmp_ln_pi) 194 | end 195 | return ln_S 196 | end 197 | 198 | """ 199 | Pick single states having a max probability. 200 | """ 201 | function winner_takes_all(S::Matrix{Float64}) 202 | S_ret = zeros(size(S)) 203 | for n in 1 : size(S_ret, 2) 204 | idx = indmax(S[:,n]) 205 | S_ret[idx,n] = 1 206 | end 207 | return S_ret 208 | end 209 | 210 | #################### 211 | ## used for Gibbs Sampling 212 | function sample_S_GS(gmm::GMM, X::Matrix{Float64}) 213 | D, N = size(X) 214 | K = gmm.K 215 | S = zeros(K, N) 216 | 217 | tmp = [0.5*logdet(gmm.cmp[k].Lambda) + log.(gmm.phi[k]) for k in 1 : K] 218 | 219 | for n in 1 : N 220 | tmp_ln_phi = [-0.5*trace(gmm.cmp[k].Lambda*(X[:,n] - gmm.cmp[k].mu)*(X[:,n] - gmm.cmp[k].mu)') + tmp[k] for k in 1 : K] 221 | tmp_ln_phi = tmp_ln_phi - logsumexp(tmp_ln_phi) 222 | S[:,n] = categorical_sample(exp.(tmp_ln_phi)) 223 | end 224 | 225 | return S 226 | end 227 | 228 | #################### 229 | ## used for Collapsed Gibbs Sampling 230 | function calc_ln_ST(Xn::Vector{Float64}, gw::GW) 231 | # TODO; need to check value? 232 | D = size(Xn, 1) 233 | W = ((1 - D + gw.nu)*gw.beta / (1 + gw.beta)) * gw.W 234 | #ln_lkh = logpdf(MvTDist(1 - D + gw.nu, gw.m, (gw.nu/(gw.nu - 2))*inv(W)), Xn) 235 | ln_lkh = logpdf(MvTDist(1 - D + gw.nu, gw.m, PDMats.PDMat(Symmetric(inv(W)))), Xn) 236 | return sum(ln_lkh) 237 | end 238 | 239 | function sample_Sn(Xn::Vector{Float64}, bgmm::BGMM) 240 | ln_tmp = [(calc_ln_ST(Xn, bgmm.cmp[k]) + log.(bgmm.alpha[k])) for k in 1 : bgmm.K] 241 | ln_tmp = ln_tmp - logsumexp(ln_tmp) 242 | Sn = categorical_sample(exp.(ln_tmp)) 243 | return Sn 244 | end 245 | 246 | function sample_S_CGS(S::Matrix{Float64}, X::Matrix{Float64}, bgmm::BGMM) 247 | D, N = size(X) 248 | K = size(S, 1) 249 | for n in randperm(N) 250 | # remove 251 | bgmm = remove_stats(bgmm, X[:,[n]], S[:,[n]]) 252 | # sample 253 | S[:,n] = sample_Sn(X[:,n], bgmm) 254 | # insert 255 | bgmm = add_stats(bgmm, X[:,[n]], S[:,[n]]) 256 | end 257 | return S, bgmm 258 | end 259 | 260 | #################### 261 | ## Algorithm main 262 | """ 263 | Compute posterior distributions via variational inference. 264 | """ 265 | function learn_VI(X::Matrix{Float64}, prior_bgmm::BGMM, max_iter::Int) 266 | # initialisation 267 | expt_S = init_S(X, prior_bgmm) 268 | bgmm = add_stats(prior_bgmm, X, expt_S) 269 | VB = NaN * zeros(max_iter) 270 | 271 | # inference 272 | for i in 1 : max_iter 273 | # E-step 274 | expt_S = exp.(update_S(bgmm, X)) 275 | # M-step 276 | bgmm = add_stats(prior_bgmm, X, expt_S) 277 | # calc VB 278 | VB[i] = calc_ELBO(X, prior_bgmm, bgmm) 279 | end 280 | 281 | # assign binary values 282 | S = winner_takes_all(expt_S) 283 | return S, bgmm, VB 284 | end 285 | 286 | """ 287 | Compute posterior distributions via Gibbs sampling. 288 | """ 289 | function learn_GS(X::Matrix{Float64}, prior_bgmm::BGMM, max_iter::Int) 290 | # initialisation 291 | S = init_S(X, prior_bgmm) 292 | bgmm = add_stats(prior_bgmm, X, S) 293 | VB = NaN * zeros(max_iter) 294 | 295 | # inference 296 | for i in 1 : max_iter 297 | # sample parameters 298 | gmm = sample_GMM(bgmm) 299 | # sample latent variables 300 | S = sample_S_GS(gmm, X) 301 | # update current model 302 | bgmm = add_stats(prior_bgmm, X, S) 303 | # calc VB 304 | VB[i] = calc_ELBO(X, prior_bgmm, bgmm) 305 | end 306 | 307 | return S, bgmm, VB 308 | end 309 | 310 | """ 311 | Compute posterior distributions via collapsed Gibbs sampling. 312 | """ 313 | function learn_CGS(X::Matrix{Float64}, prior_bgmm::BGMM, max_iter::Int) 314 | # initialisation 315 | S = init_S(X, prior_bgmm) 316 | bgmm = add_stats(prior_bgmm, X, S) 317 | VB = NaN * zeros(max_iter) 318 | 319 | # inference 320 | for i in 1 : max_iter 321 | # directly sample S 322 | S, bgmm = sample_S_CGS(S, X, bgmm) 323 | # calc VB 324 | VB[i] = calc_ELBO(X, prior_bgmm, bgmm) 325 | end 326 | 327 | return S, bgmm, VB 328 | end 329 | 330 | end 331 | -------------------------------------------------------------------------------- /src/LogisticRegression.jl: -------------------------------------------------------------------------------- 1 | """ 2 | Variational inference for Bayesian logistic regression. 3 | """ 4 | module LogisticRegression 5 | using Distributions 6 | 7 | export sigmoid, sample_data, VI 8 | 9 | function sigmoid(x) 10 | return 1.0 / (1.0 + exp.(-x[1])) 11 | end 12 | 13 | function bern_sample(mu) 14 | i = rand(Bernoulli(mu)) 15 | val = zeros(2) 16 | val[i+1] = 1 17 | return val 18 | end 19 | 20 | """ 21 | Sample data & parameter given covariance Sigma_w and inputs X. 22 | """ 23 | function sample_data(X, Sigma_w) 24 | N = size(X, 2) 25 | M = size(Sigma_w, 1) 26 | 27 | # sample parameters 28 | W = rand(MvNormal(zeros(M), Sigma_w)) 29 | 30 | # sample data 31 | Y = [rand(Bernoulli(sigmoid(W'*X[:, n]))) for n in 1 : N] 32 | return Y, W 33 | end 34 | 35 | """ 36 | Compute variational parameters. 37 | """ 38 | function VI(Y, X, M, Sigma_w, alpha, max_iter) 39 | function rho2sig(rho) 40 | return log.(1 + exp.(rho)) 41 | end 42 | 43 | function compute_df_dw(Y, X, Sigma_w, mu, rho, W) 44 | M, N = size(X) 45 | term1 = (mu - W) ./ rho2sig(rho).^2 46 | term2 = inv(Sigma_w)*W 47 | term3 = 0 48 | for n in 1 : N 49 | term3 += -(Y[n] - sigmoid(W'*X[:,n])) * X[:,n] 50 | end 51 | return term1 + term2 + term3 52 | end 53 | 54 | function compute_df_dmu(mu, rho, W) 55 | return (W - mu) ./ rho2sig(rho).^2 56 | end 57 | 58 | function compute_df_drho(Y, X, Sigma_w, mu, rho, W) 59 | return -0.5*((W - mu).^2 - rho2sig(rho).^2) .* compute_dprec_drho(rho) 60 | end 61 | 62 | function compute_dprec_drho(rho) 63 | return 2 * rho2sig(rho) .^ (-3) .* (1 ./ (1+exp.(rho))).^2 .* (1 ./ (1+exp.(-rho))) 64 | end 65 | 66 | # diag gaussian for approximate posterior 67 | mu = randn(M) 68 | rho = randn(M) # sigma = log.(1 + exp.(rho)) 69 | 70 | for i in 1 : max_iter 71 | # sample epsilon 72 | ep = rand(M) 73 | W_tmp = mu + log.(1 + exp.(rho)) .* ep 74 | 75 | # calculate gradient 76 | df_dw = compute_df_dw(Y, X, Sigma_w, mu, rho, W_tmp) 77 | df_dmu = compute_df_dmu(mu, rho, W_tmp) 78 | df_drho = compute_df_drho(Y, X, Sigma_w, mu, rho, W_tmp) 79 | d_mu = df_dw + df_dmu 80 | d_rho = df_dw .* (ep ./ (1+exp.(-rho))) + df_drho 81 | 82 | # update variational parameters 83 | mu = mu - alpha * d_mu 84 | rho = rho - alpha * d_rho 85 | end 86 | return mu, rho 87 | end 88 | 89 | end 90 | -------------------------------------------------------------------------------- /src/NMF.jl: -------------------------------------------------------------------------------- 1 | """ 2 | Variational inference for Bayesian NMF 3 | """ 4 | module NMF 5 | using Distributions 6 | using StatsFuns.logsumexp, SpecialFunctions.digamma 7 | 8 | export NMFModel 9 | export sample_data, VI 10 | 11 | #################### 12 | ## Types 13 | struct NMFModel 14 | a_t::Array{Float64, 2} # D x K 15 | b_t::Array{Float64, 2} # D x L 16 | a_v::Float64 # 1 dim 17 | b_v::Float64 # 1 dim 18 | end 19 | 20 | function sqsum(mat::Array, idx) 21 | return squeeze(sum(mat, idx), idx) 22 | end 23 | 24 | #################### 25 | ## functions 26 | function init(X::Array{Int64, 2}, model::NMFModel) 27 | D, N = size(X) 28 | K = size(model.a_t, 2) 29 | S = zeros(D, K, N) 30 | A_t = rand(D, K) 31 | B_t = rand(D, K) 32 | A_v = rand(K, N) 33 | B_v = rand(K, N) 34 | for d in 1 : D 35 | for k in 1 : K 36 | for n in 1 : N 37 | S[d,k,n] = X[d,n] * A_t[d,k] * B_t[d,k] * A_v[k,n] * B_v[k,n] 38 | end 39 | end 40 | end 41 | return S, A_t, B_t, A_v, B_v 42 | end 43 | 44 | function update_S(X::Array{Int64, 2}, A_t::Array{Float64, 2}, B_t::Array{Float64, 2}, A_v::Array{Float64, 2}, B_v::Array{Float64, 2}) 45 | D, K = size(A_t) 46 | N = size(A_v, 2) 47 | S = zeros(D, K, N) 48 | for d in 1 : D 49 | for n in 1 : N 50 | # K dim 51 | ln_P = (digamma.(A_t[d,:]) + log.(B_t[d,:]) 52 | + digamma.(A_v[:,n]) + log.(B_v[:,n]) 53 | ) 54 | ln_P = ln_P - logsumexp(ln_P) 55 | S[d,:,n] = X[d,n] * exp.(ln_P) 56 | end 57 | end 58 | return S 59 | end 60 | 61 | function update_T(S::Array{Float64, 3}, A_v::Array{Float64, 2}, B_v::Array{Float64, 2}, model::NMFModel) 62 | D, K, N = size(S) 63 | a_t = model.a_t # DxK 64 | b_t = model.b_t # DxK 65 | A_t = a_t + sqsum(S, 3) 66 | B_t = (a_t ./ b_t + repmat(sqsum(A_v.*B_v, 2)', D, 1)).^(-1) 67 | return A_t, B_t 68 | end 69 | 70 | function update_V(S::Array{Float64, 3}, A_t::Array{Float64, 2}, B_t::Array{Float64, 2}, model::NMFModel) 71 | a_v = model.a_v 72 | b_v = model.b_v 73 | D, K, N = size(S) 74 | A_v = a_v + sqsum(S, 1) 75 | B_v = (a_v / b_v + repmat(sqsum(A_t.*B_t, 1), 1, N)).^(-1) 76 | return A_v, B_v 77 | end 78 | 79 | """ 80 | Sample data given hyperparameters. 81 | """ 82 | function sample_data(N::Int, model::NMFModel) 83 | # TODO; check b or 1/b ? 84 | D, K = size(model.a_t) 85 | 86 | T = zeros(D, K) 87 | for d in 1 : D 88 | for k in 1 : K 89 | T[d,k] = rand(Gamma(model.a_t[d,k], 1.0/model.b_t[d,k])) # TODO: check 90 | end 91 | end 92 | 93 | V = reshape(rand(Gamma(model.a_v, 1.0/model.b_v), K*N) , K, N) # TODO: check 94 | 95 | S = zeros(D, K, N) 96 | for d in 1 : D 97 | for k in 1 : K 98 | for n in 1 : N 99 | S[d,k,n] = T[d,k] * V[k,n] 100 | end 101 | end 102 | end 103 | #X = sqsum(S, 2) + 0.0 # zero noise 104 | X = sqsum(S, 2) 105 | return X, T, S, V 106 | end 107 | 108 | function update_model(A_t::Array{Float64, 2}, B_t::Array{Float64, 2}, model::NMFModel) 109 | return NMFModel(A_t, B_t, model.a_v, model.b_v) 110 | end 111 | 112 | """ 113 | Compute variational posterior distributions. 114 | """ 115 | function VI(X::Array{Int64, 2}, model::NMFModel, max_iter::Int) 116 | K = size(model.a_t, 2) 117 | D, N = size(X) 118 | S, A_t, B_t, A_v, B_v = init(X, model) 119 | for iter in 1 : max_iter 120 | # latent 121 | S = update_S(X, A_t, B_t, A_v, B_v) 122 | A_v, B_v = update_V(S, A_t, B_t, model) 123 | 124 | # param 125 | A_t, B_t = update_T(S, A_v, B_v, model) 126 | end 127 | 128 | return update_model(A_t, B_t, model), S, A_t.*B_t, A_v.*B_v 129 | end 130 | 131 | end 132 | -------------------------------------------------------------------------------- /src/PoissonHMM.jl: -------------------------------------------------------------------------------- 1 | """ 2 | Bayesian 1dim Poisson Hidden Markov Model 3 | """ 4 | module PoissonHMM 5 | using StatsFuns.logsumexp, SpecialFunctions.digamma 6 | using Distributions 7 | 8 | export Gam, GHMM, Poi, HMM 9 | export sample_HMM, sample_data, winner_takes_all 10 | export learn_VI 11 | 12 | #################### 13 | ## Types 14 | struct Gam 15 | # Parameters of Gamma distribution 16 | # 1dim 17 | a::Float64 18 | b::Float64 19 | end 20 | 21 | struct BHMM 22 | # Parameters of Bayesian Bernoulli Mixture Model 23 | K::Int 24 | alpha_phi::Vector{Float64} 25 | alpha_A::Matrix{Float64} 26 | cmp::Vector{Gam} 27 | end 28 | 29 | struct Poi 30 | # Parameters of Poisson Distribution 31 | # 1 dim 32 | lambda::Float64 33 | end 34 | 35 | struct HMM 36 | # Parameters of Bernoulli Mixture Model 37 | K::Int 38 | phi::Vector{Float64} 39 | A::Matrix{Float64} 40 | cmp::Vector{Poi} 41 | end 42 | 43 | #################### 44 | ## Common functions 45 | """ 46 | Sample an HMM from prior 47 | """ 48 | function sample_HMM(bhmm::BHMM) 49 | cmp = Vector{Poi}() 50 | for c in bhmm.cmp 51 | lambda = rand(Gamma(c.a, 1.0/c.b)) 52 | push!(cmp, Poi(lambda)) 53 | end 54 | phi = rand(Dirichlet(bhmm.alpha_phi)) 55 | A = zeros(size(bhmm.alpha_A)) 56 | for k in 1 : bhmm.K 57 | A[:,k] = rand(Dirichlet(bhmm.alpha_A[:,k])) 58 | end 59 | return HMM(bhmm.K, phi, A, cmp) 60 | end 61 | 62 | """ 63 | Sample data from a specific Poisson HMM 64 | """ 65 | function sample_data(hmm::HMM, N::Int) 66 | X = zeros(N) 67 | Z = zeros(hmm.K, N) 68 | 69 | # sample (n=1) 70 | Z[:,1] = categorical_sample(hmm.phi) 71 | k = indmax(Z[:, 1]) 72 | X[1] = rand(Poisson(hmm.cmp[k].lambda)) 73 | 74 | # sample (n>1) 75 | for n in 2 : N 76 | Z[:,n] = categorical_sample(hmm.A[:,k]) 77 | k = indmax(Z[:, n]) 78 | X[n] = rand(Poisson(hmm.cmp[k].lambda)) 79 | end 80 | return X, Z 81 | end 82 | 83 | categorical_sample(p::Vector{Float64}) = categorical_sample(p, 1)[:,1] 84 | function categorical_sample(p::Vector{Float64}, N::Int) 85 | K = length(p) 86 | S = zeros(K, N) 87 | S_tmp = rand(Categorical(p), N) 88 | for k in 1 : K 89 | S[k,find(S_tmp.==k)] = 1 90 | end 91 | return S 92 | end 93 | 94 | function init_Z(X::Vector{Float64}, bhmm::BHMM) 95 | N = size(X, 1) 96 | K = bhmm.K 97 | 98 | Z = rand(Dirichlet(ones(K)/K), N) 99 | ZZ = [zeros(K,K) for _ in 1 : N - 1] 100 | for n in 1 : N - 1 101 | ZZ[n] = Z[:,n+1] * Z[:,n]' 102 | end 103 | 104 | return Z, ZZ 105 | end 106 | 107 | """ 108 | Not implemented yet. 109 | """ 110 | function calc_ELBO(X::Matrix{Float64}, pri::BHMM, pos::BHMM) 111 | end 112 | 113 | function add_stats(bhmm::BHMM, X::Vector{Float64}, 114 | Z::Matrix{Float64}, ZZ::Vector{Matrix{Float64}}) 115 | K = bhmm.K 116 | sum_Z = sum(Z, 2) 117 | alpha_phi = [bhmm.alpha_phi[k] + Z[k,1] for k in 1 : K] 118 | alpha_A = bhmm.alpha_A + sum(ZZ) 119 | cmp = Vector{Gam}() 120 | 121 | ZX = Z*X # (KxN) x (Nx1) = Kx1 122 | for k in 1 : K 123 | a = bhmm.cmp[k].a + ZX[k] 124 | b = bhmm.cmp[k].b + sum_Z[k] 125 | push!(cmp, Gam(a, b)) 126 | end 127 | return BHMM(K, alpha_phi, alpha_A, cmp) 128 | end 129 | 130 | remove_stats(bhmm::BHMM, X::Vector{Float64}, Z::Matrix{Float64}) = add_stats(bhmm, X, -Z) 131 | 132 | #################### 133 | ## used for Variational Inference 134 | function update_Z(bhmm::BHMM, X::Vector{Float64}, Z::Matrix{Float64}) 135 | N = size(X, 1) 136 | K = bhmm.K 137 | ln_expt_Z = zeros(K, N) 138 | 139 | ln_lkh = zeros(K, N) 140 | for k in 1 : K 141 | ln_lambda = digamma.(bhmm.cmp[k].a) - log.(bhmm.cmp[k].b) 142 | lambda = bhmm.cmp[k].a / bhmm.cmp[k].b 143 | for n in 1 : N 144 | ln_lkh[k,n] = X[n]'*(ln_lambda) - lambda 145 | end 146 | end 147 | 148 | expt_ln_A = zeros(size(bhmm.alpha_A)) 149 | for k in 1 : K 150 | expt_ln_A[:,k] = digamma.(bhmm.alpha_A[:,k]) - digamma.(sum(bhmm.alpha_A[:,k])) 151 | end 152 | 153 | # copy 154 | ln_expt_Z = log.(Z) 155 | 156 | # n = 1 157 | ln_expt_Z[:,1] = (digamma.(bhmm.alpha_phi) - digamma.(sum(bhmm.alpha_phi)) 158 | + expt_ln_A' * exp.(ln_expt_Z[:,2]) 159 | + ln_lkh[:,1] 160 | ) 161 | ln_expt_Z[:,1] = ln_expt_Z[:,1] - logsumexp(ln_expt_Z[:,1]) 162 | 163 | # 2 <= n <= N - 1 164 | for n in 2 : N - 1 165 | ln_expt_Z[:,n] =( expt_ln_A * exp.(ln_expt_Z[:,n-1]) 166 | + expt_ln_A' * exp.(ln_expt_Z[:,n+1]) 167 | + ln_lkh[:,n] 168 | ) 169 | ln_expt_Z[:,n] = ln_expt_Z[:,n] - logsumexp(ln_expt_Z[:,n]) 170 | end 171 | 172 | # n = N 173 | ln_expt_Z[:,N] =( expt_ln_A * exp.(ln_expt_Z[:,N-1]) 174 | + ln_lkh[:,N] 175 | ) 176 | ln_expt_Z[:,N] = ln_expt_Z[:,N] - logsumexp(ln_expt_Z[:,N]) 177 | 178 | # calc output 179 | Z_ret = exp.(ln_expt_Z) 180 | ZZ_ret = [zeros(K,K) for _ in 1 : N - 1] 181 | for n in 1 : N - 1 182 | ZZ_ret[n] = Z_ret[:,n+1] * Z_ret[:,n]' 183 | end 184 | return Z_ret, ZZ_ret 185 | end 186 | 187 | """ 188 | Pick single states having a max probability. 189 | """ 190 | function winner_takes_all(Z::Matrix{Float64}) 191 | Z_ret = zeros(size(Z)) 192 | for n in 1 : size(Z_ret, 2) 193 | idx = indmax(Z[:,n]) 194 | Z_ret[idx,n] = 1 195 | end 196 | return Z_ret 197 | end 198 | 199 | function logmatprod(ln_A::Array{Float64}, ln_B::Array{Float64}) 200 | I = size(ln_A, 1) 201 | J = size(ln_B, 2) 202 | ln_C = zeros(I, J) 203 | for i in 1 : I 204 | for j in 1 : J 205 | ln_C[i, j] = logsumexp(ln_A[i, :] + ln_B[:, j]) 206 | end 207 | end 208 | return ln_C 209 | end 210 | 211 | function update_Z_fb(bhmm::BHMM, X::Vector{Float64}) 212 | K = bhmm.K 213 | N = length(X) 214 | 215 | # calc likelihood 216 | ln_lik = zeros(K, N) 217 | for k in 1 : K 218 | ln_lambda = digamma.(bhmm.cmp[k].a) - log.(bhmm.cmp[k].b) 219 | lambda = bhmm.cmp[k].a / bhmm.cmp[k].b 220 | for n in 1 : N 221 | ln_lik[k,n] =X[n]'*(ln_lambda) - lambda 222 | end 223 | end 224 | expt_ln_phi = digamma.(bhmm.alpha_phi) - digamma.(sum(bhmm.alpha_phi)) 225 | expt_ln_A = zeros(K,K) 226 | for k in 1 : K 227 | expt_ln_A[:,k] = digamma.(bhmm.alpha_A[:,k]) - digamma.(sum(bhmm.alpha_A[:,k])) 228 | end 229 | 230 | Z, ZZ = fb_alg(ln_lik, expt_ln_phi, expt_ln_A) 231 | 232 | # different notation 233 | ZZ_ret = [ZZ[:,:,n] for n in 1:size(ZZ, 3)] 234 | 235 | return Z, ZZ_ret 236 | end 237 | 238 | function fb_alg(ln_lik::Matrix{Float64}, ln_phi::Vector{Float64}, ln_A::Matrix{Float64}) 239 | K, T = size(ln_lik) 240 | ln_Z = zeros(K, T) 241 | ln_ZZ = zeros(K, K, T) 242 | ln_alpha = zeros(K, T) 243 | ln_beta = zeros(K, T) 244 | ln_st = zeros(T) 245 | 246 | for t in 1 : T 247 | if t == 1 248 | ln_alpha[:, 1] = ln_phi + ln_lik[:, 1] 249 | else 250 | ln_alpha[:, t] = logmatprod(ln_A, ln_alpha[:, t-1]) + ln_lik[:, t] 251 | end 252 | ln_st[t] = logsumexp(ln_alpha[:, t]) 253 | ln_alpha[:,t] = ln_alpha[:,t] - ln_st[t] 254 | end 255 | 256 | for t in T-1 : -1 : 1 257 | ln_beta[:, t] = logmatprod(ln_A', ln_beta[:, t+1] + ln_lik[:,t+1]) 258 | ln_beta[:, t] = ln_beta[:, t] - ln_st[t+1] 259 | end 260 | 261 | ln_Z = ln_alpha + ln_beta 262 | for t in 1 : T 263 | if t < T 264 | ln_ZZ[:,:,t] = (repmat(ln_alpha[:, t]', K, 1) + ln_A 265 | + repmat(ln_lik[:, t+1] + ln_beta[:,t+1], 1, K)) 266 | ln_ZZ[:,:,t] = ln_ZZ[:,:,t] - ln_st[t+1] 267 | end 268 | end 269 | return exp.(ln_Z), exp.(ln_ZZ) 270 | end 271 | 272 | """ 273 | Compute approximate posterior distributions via variational inference. 274 | """ 275 | function learn_VI(X::Vector{Float64}, prior_bhmm::BHMM, max_iter::Int) 276 | # initialisation 277 | expt_Z, expt_ZZ = init_Z(X, prior_bhmm) 278 | bhmm = add_stats(prior_bhmm, X, expt_Z, expt_ZZ) 279 | VB = NaN * zeros(max_iter) 280 | 281 | # inference 282 | for i in 1 : max_iter 283 | # E-step 284 | #expt_Z, expt_ZZ = update_Z(bhmm, X, expt_Z) 285 | expt_Z, expt_ZZ = update_Z_fb(bhmm, X) 286 | # M-step 287 | bhmm = add_stats(prior_bhmm, X, expt_Z, expt_ZZ) 288 | end 289 | 290 | return expt_Z, bhmm 291 | end 292 | end 293 | -------------------------------------------------------------------------------- /src/PoissonMixtureModel.jl: -------------------------------------------------------------------------------- 1 | """ 2 | Bayesian Poisson Mixture Model 3 | """ 4 | module PoissonMixtureModel 5 | using StatsFuns.logsumexp, SpecialFunctions.digamma 6 | using Distributions 7 | 8 | export Gam, BPMM, Poi, PMM 9 | export sample_PMM, sample_data, winner_takes_all 10 | export learn_GS, learn_CGS, learn_VI 11 | 12 | #################### 13 | ## Types 14 | struct Gam 15 | # Parameters of Gamma distribution 16 | a::Vector{Float64} 17 | b::Float64 18 | end 19 | 20 | struct BPMM 21 | # Parameters of Bayesian Poisson Mixture Model 22 | D::Int 23 | K::Int 24 | alpha::Vector{Float64} 25 | cmp::Vector{Gam} 26 | end 27 | 28 | struct Poi 29 | # Parameters of Poisson Distribution 30 | lambda::Vector{Float64} 31 | end 32 | 33 | struct PMM 34 | # Parameters of Poisson Mixture Model 35 | D::Int 36 | K::Int 37 | phi::Vector{Float64} 38 | cmp::Vector{Poi} 39 | end 40 | 41 | #################### 42 | ## Common functions 43 | """ 44 | Sample a PMM given hyperparameters. 45 | """ 46 | function sample_PMM(bpmm::BPMM) 47 | cmp = Vector{Poi}() 48 | for c in bpmm.cmp 49 | lambda = Vector{Float64}() 50 | for d in 1 : bpmm.D 51 | push!(lambda, rand(Gamma(c.a[d], 1.0/c.b))) 52 | end 53 | push!(cmp, Poi(lambda)) 54 | end 55 | phi = rand(Dirichlet(bpmm.alpha)) 56 | return PMM(bpmm.D, bpmm.K, phi, cmp) 57 | end 58 | 59 | """ 60 | Sample data from a specific PMM model. 61 | """ 62 | function sample_data(pmm::PMM, N::Int) 63 | X = zeros(pmm.D, N) 64 | S = categorical_sample(pmm.phi, N) 65 | for n in 1 : N 66 | k = indmax(S[:, n]) 67 | for d in 1 : pmm.D 68 | X[d,n] = rand(Poisson(pmm.cmp[k].lambda[d])) 69 | end 70 | end 71 | return X, S 72 | end 73 | 74 | categorical_sample(p::Vector{Float64}) = categorical_sample(p, 1)[:,1] 75 | function categorical_sample(p::Vector{Float64}, N::Int) 76 | K = length(p) 77 | S = zeros(K, N) 78 | S_tmp = rand(Categorical(p), N) 79 | for k in 1 : K 80 | S[k,find(S_tmp.==k)] = 1 81 | end 82 | return S 83 | end 84 | 85 | function init_S(X::Matrix{Float64}, bpmm::BPMM) 86 | N = size(X, 2) 87 | K = bpmm.K 88 | S = categorical_sample(ones(K)/K, N) 89 | return S 90 | end 91 | 92 | function calc_ELBO(X::Matrix{Float64}, pri::BPMM, pos::BPMM) 93 | ln_expt_S = update_S(pos, X) 94 | expt_S = exp.(ln_expt_S) 95 | K, N = size(expt_S) 96 | D = size(X, 1) 97 | 98 | expt_ln_lambda = zeros(D, K) 99 | expt_lambda = zeros(D, K) 100 | expt_ln_lkh = 0 101 | for k in 1 : K 102 | expt_ln_lambda[:,k] = digamma.(pos.cmp[k].a) - log.(pos.cmp[k].b) 103 | expt_lambda[:,k] = pos.cmp[k].a / pos.cmp[k].b 104 | for n in 1 : N 105 | expt_ln_lkh += expt_S[k,n] * (X[:, n]' * expt_ln_lambda[:,k] 106 | - sum(expt_lambda[:,k]) - sum(lgamma.(X[:,n]+1)))[1] 107 | end 108 | end 109 | 110 | expt_ln_pS = sum(expt_S' * (digamma.(pos.alpha) - digamma.(sum(pos.alpha)))) 111 | expt_ln_qS = sum(expt_S .* ln_expt_S) 112 | 113 | KL_lambda = 0 114 | for k in 1 : K 115 | KL_lambda += (sum(pos.cmp[k].a)*log.(pos.cmp[k].b) - sum(pri.cmp[k].a)*log.(pri.cmp[k].b) 116 | - sum(lgamma.(pos.cmp[k].a)) + sum(lgamma.(pri.cmp[k].a)) 117 | + (pos.cmp[k].a - pri.cmp[k].a)' * expt_ln_lambda[:,k] 118 | + (pri.cmp[k].b - pos.cmp[k].b) * sum(expt_lambda[:,k]) 119 | )[1] 120 | end 121 | KL_pi = (lgamma.(sum(pos.alpha)) - lgamma.(sum(pri.alpha)) 122 | - sum(lgamma.(pos.alpha)) + sum(lgamma.(pri.alpha)) 123 | + (pos.alpha - pri.alpha)' * (digamma.(pos.alpha) - digamma.(sum(pos.alpha))) 124 | )[1] 125 | 126 | VB = expt_ln_lkh + expt_ln_pS - expt_ln_qS - (KL_lambda + KL_pi) 127 | return VB 128 | end 129 | 130 | function add_stats(bpmm::BPMM, X::Matrix{Float64}, S::Matrix{Float64}) 131 | D = bpmm.D 132 | K = bpmm.K 133 | sum_S = sum(S, 2) 134 | alpha = [bpmm.alpha[k] + sum_S[k] for k in 1 : K] 135 | cmp = Vector{Gam}() 136 | 137 | XS = X*S'; 138 | for k in 1 : K 139 | a = [(bpmm.cmp[k].a[d] + XS[d,k])::Float64 for d in 1 : D] 140 | b = bpmm.cmp[k].b + sum_S[k] 141 | push!(cmp, Gam(a, b)) 142 | end 143 | return BPMM(D, K, alpha, cmp) 144 | end 145 | 146 | remove_stats(bpmm::BPMM, X::Matrix{Float64}, S::Matrix{Float64}) = add_stats(bpmm, X, -S) 147 | 148 | #################### 149 | ## used for Variational Inference 150 | function update_S(bpmm::BPMM, X::Matrix{Float64}) 151 | D, N = size(X) 152 | K = bpmm.K 153 | ln_expt_S = zeros(K, N) 154 | tmp = zeros(K) 155 | 156 | sum_digamma_tmp = digamma.(sum(bpmm.alpha)) 157 | for k in 1 : K 158 | tmp[k] = - sum(bpmm.cmp[k].a) / bpmm.cmp[k].b 159 | tmp[k] += digamma.(bpmm.alpha[k]) - sum_digamma_tmp 160 | end 161 | ln_lambda_X = [X'*(digamma.(bpmm.cmp[k].a) - log.(bpmm.cmp[k].b)) for k in 1 : K] 162 | for n in 1 : N 163 | tmp_ln_pi = [tmp[k] + ln_lambda_X[k][n] for k in 1 : K] 164 | ln_expt_S[:,n] = tmp_ln_pi - logsumexp(tmp_ln_pi) 165 | end 166 | return ln_expt_S 167 | end 168 | 169 | """ 170 | Pick single states having a max probability. 171 | """ 172 | function winner_takes_all(S::Matrix{Float64}) 173 | S_ret = zeros(size(S)) 174 | for n in 1 : size(S_ret, 2) 175 | idx = indmax(S[:,n]) 176 | S_ret[idx,n] = 1 177 | end 178 | return S_ret 179 | end 180 | 181 | #################### 182 | ## used for Gibbs Sampling 183 | function sample_S_GS(pmm::PMM, X::Matrix{Float64}) 184 | D, N = size(X) 185 | K = pmm.K 186 | S = zeros(K, N) 187 | 188 | tmp = [-sum(pmm.cmp[k].lambda) + log.(pmm.phi[k]) for k in 1 : K] 189 | ln_lambda_X = [X'*log.(pmm.cmp[k].lambda) for k in 1 : K] 190 | for n in 1 : N 191 | tmp_ln_phi = [(tmp[k] + ln_lambda_X[k][n])::Float64 for k in 1 : K] 192 | tmp_ln_phi = tmp_ln_phi - logsumexp(tmp_ln_phi) 193 | S[:,n] = categorical_sample(exp.(tmp_ln_phi)) 194 | end 195 | return S 196 | end 197 | 198 | #################### 199 | ## used for Collapsed Gibbs Sampling 200 | function calc_ln_NB(Xn::Vector{Float64}, gam::Gam) 201 | ln_lkh = [(gam.a[d]*log.(gam.b) 202 | - lgamma.(gam.a[d]) 203 | + lgamma.(Xn[d] + gam.a[d]) 204 | - (Xn[d] + gam.a[d])*log.(gam.b + 1) 205 | )::Float64 for d in 1 : size(Xn, 1)] 206 | return sum(ln_lkh) 207 | end 208 | 209 | function sample_Sn(Xn::Vector{Float64}, bpmm::BPMM) 210 | ln_tmp = [(calc_ln_NB(Xn, bpmm.cmp[k]) + log.(bpmm.alpha[k])) for k in 1 : bpmm.K] 211 | ln_tmp = ln_tmp - logsumexp(ln_tmp) 212 | Sn = categorical_sample(exp.(ln_tmp)) 213 | return Sn 214 | end 215 | 216 | function sample_S_CGS(S::Matrix{Float64}, X::Matrix{Float64}, bpmm::BPMM) 217 | D, N = size(X) 218 | K = size(S, 1) 219 | for n in randperm(N) 220 | # remove 221 | bpmm = remove_stats(bpmm, X[:,[n]], S[:,[n]]) 222 | # sample 223 | S[:,n] = sample_Sn(X[:,n], bpmm) 224 | # insert 225 | bpmm = add_stats(bpmm, X[:,[n]], S[:,[n]]) 226 | end 227 | return S, bpmm 228 | end 229 | 230 | #################### 231 | ## Algorithm main 232 | """ 233 | Compute posterior distribution via variational inference. 234 | """ 235 | function learn_VI(X::Matrix{Float64}, prior_bpmm::BPMM, max_iter::Int) 236 | # initialisation 237 | expt_S = init_S(X, prior_bpmm) 238 | bpmm = add_stats(prior_bpmm, X, expt_S) 239 | VB = NaN * zeros(max_iter) 240 | 241 | # inference 242 | for i in 1 : max_iter 243 | # E-step 244 | expt_S = exp.(update_S(bpmm, X)) 245 | # M-step 246 | bpmm = add_stats(prior_bpmm, X, expt_S) 247 | # calc VB 248 | VB[i] = calc_ELBO(X, prior_bpmm, bpmm) 249 | end 250 | 251 | return expt_S, bpmm, VB 252 | end 253 | 254 | """ 255 | Compute posterior distribution via Gibbs sampling. 256 | """ 257 | function learn_GS(X::Matrix{Float64}, prior_bpmm::BPMM, max_iter::Int) 258 | # initialisation 259 | S = init_S(X, prior_bpmm) 260 | bpmm = add_stats(prior_bpmm, X, S) 261 | VB = NaN * zeros(max_iter) 262 | 263 | # inference 264 | for i in 1 : max_iter 265 | # sample parameters 266 | pmm = sample_PMM(bpmm) 267 | # sample latent variables 268 | S = sample_S_GS(pmm, X) 269 | # update current model 270 | bpmm = add_stats(prior_bpmm, X, S) 271 | # calc VB 272 | VB[i] = calc_ELBO(X, prior_bpmm, bpmm) 273 | end 274 | 275 | return S, bpmm, VB 276 | end 277 | 278 | """ 279 | Compute posterior distribution via collapsed Gibbs sampling. 280 | """ 281 | function learn_CGS(X::Matrix{Float64}, prior_bpmm::BPMM, max_iter::Int) 282 | # initialisation 283 | S = init_S(X, prior_bpmm) 284 | bpmm = add_stats(prior_bpmm, X, S) 285 | VB = NaN * zeros(max_iter) 286 | 287 | # inference 288 | for i in 1 : max_iter 289 | # directly sample S 290 | S, bpmm = sample_S_CGS(S, X, bpmm) 291 | # calc VB 292 | VB[i] = calc_ELBO(X, prior_bpmm, bpmm) 293 | end 294 | 295 | return S, bpmm, VB 296 | end 297 | 298 | end 299 | -------------------------------------------------------------------------------- /src/demo_BayesNeuralNet.jl: -------------------------------------------------------------------------------- 1 | #################################### 2 | ## Demo script for Bayesian neural network. 3 | 4 | using PyPlot, PyCall 5 | 6 | push!(LOAD_PATH, ".") 7 | import BayesNeuralNet 8 | 9 | """ 10 | Sample neural nets from prior. 11 | """ 12 | function sample_test() 13 | # model parameters 14 | D = 1 # output 15 | K = 3 # hidden 16 | M = 2 # input 17 | sigma2_w = 10.0 18 | sigma2_y = 0.1 19 | 20 | xmin = -5 21 | xmax = 5 22 | N_lin = 1000 23 | X_lin = ones(M, N_lin) 24 | X_lin[1,:] = linspace(xmin, xmax, N_lin) 25 | X_lin[2,:] = 1 # bias 26 | 27 | # visualize 28 | num_samples = 5 29 | figure("Function samples") 30 | clf() 31 | for i in 1 : num_samples 32 | _, Y_true, _, _ = BayesNeuralNet.sample_data_from_prior(X_lin, sigma2_w, sigma2_y, D, K) 33 | plot(X_lin[1,:], Y_true) 34 | xlim([xmin, xmax]) 35 | end 36 | ratey = (ylim()[2] - ylim()[1]) * 0.1 37 | ratex = (xlim()[2] - xlim()[1]) * 0.1 38 | text(xlim()[1] + ratex, ylim()[2] - ratey, @sprintf("K=%d", K), fontsize=18) 39 | show() 40 | end 41 | 42 | """ 43 | Run a test script of variational inference for Bayesian neural net. 44 | """ 45 | function test() 46 | ################# 47 | # prepara data 48 | 49 | # data size 50 | D = 1 # output 51 | M = 2 # input 52 | 53 | # function setting 54 | xmin = -2 55 | xmax = 4 56 | N_lin = 1000 57 | X_lin = ones(M, N_lin) 58 | X_lin[1,:] = linspace(xmin, xmax, N_lin) 59 | X_lin[2,:] = 1 # bias 60 | 61 | # training data 62 | N = 50 # data size 63 | X = 2*rand(M, N) - 0.0 # input 64 | X[2,:] = 1.0 # bias 65 | Y = 0.5*sin.(2*pi * X[1,:]/3) + 0.05 * randn(N) 66 | 67 | # model parameters 68 | K = 5 69 | sigma2_w = 10.0 70 | sigma2_y = 0.01 71 | 72 | ################ 73 | # inference 74 | alpha = 1.0e-5 75 | max_iter = 100000 76 | mu1, rho1, mu2, rho2 = BayesNeuralNet.VI(Y, X, sigma2_w, sigma2_y, K, alpha, max_iter) 77 | Y_mean = [mu2'* tanh.(mu1'X_lin[:,n]) for n in 1 : N_lin] 78 | 79 | ################ 80 | # visualize 81 | figure("result") 82 | clf() 83 | Y_list = [] 84 | num_samples = 100 85 | for i in 1 : num_samples 86 | Y_est, _ = BayesNeuralNet.sample_data_from_posterior(X_lin, mu1, rho1, mu2, rho2, sigma2_y, D) 87 | push!(Y_list, Y_est) 88 | plot(X_lin[1,:], Y_est, "-c", alpha=0.25) 89 | end 90 | plot(X[1,:], Y, "ok") 91 | plot(X_lin[1,:], Y_mean, "b-") 92 | xlim([xmin, xmax]) 93 | xlabel("x") 94 | ylabel("y") 95 | show() 96 | end 97 | 98 | #sample_test() 99 | test() 100 | -------------------------------------------------------------------------------- /src/demo_DimensionalityReduction.jl: -------------------------------------------------------------------------------- 1 | ################################### 2 | ## Demo script for Bayesian Dimensionality Reduction 3 | 4 | using PyPlot, PyCall 5 | @pyimport sklearn.datasets as datasets 6 | 7 | push!(LOAD_PATH,".") 8 | import DimensionalityReduction 9 | 10 | function load_facedata(skip::Int) 11 | face = datasets.fetch_olivetti_faces() 12 | Y_raw = face["images"] 13 | N, S_raw, _ = size(Y_raw) 14 | 15 | L = round(Int, S_raw / skip) 16 | Y_tmp = Y_raw[:,1:skip:end, 1:skip:end] 17 | Y = convert(Array{Float64, 2}, reshape(Y_tmp, N, size(Y_tmp,2)*size(Y_tmp,3))') 18 | D = size(Y, 1) 19 | 20 | return Y, D, L 21 | end 22 | 23 | function visualize(Y::Array{Float64,2}, L::Int) 24 | D, N = size(Y) 25 | base = round(Int, sqrt(N)) 26 | v = round(Int, (L*ceil(N / base))) 27 | h = L * base 28 | pic = zeros(v, h) 29 | 30 | for n in 1 : N 31 | i = round(Int, (L*ceil(n / base))) 32 | idx1 = i - L + 1 : i 33 | idx2 = L*mod(n-1, base)+1 : L*(mod(n-1, base) + 1) 34 | pic[idx1,idx2] = reshape(Y[:,n], L, L) 35 | end 36 | imshow(pic, cmap=ColorMap("gray")) 37 | end 38 | 39 | function visualize(Y::Array{Float64,2}, L::Int, mask::BitArray{2}) 40 | # for missing 41 | D, N = size(Y) 42 | base = round(Int, sqrt(N)) 43 | v = round(Int, (L*ceil(N / base))) 44 | h = L * base 45 | pic = zeros(v, h, 3) 46 | 47 | Y_3dim = zeros(D, N, 3) 48 | for i in 1 : 3 49 | if i == 2 50 | Y_tmp = deepcopy(Y) 51 | Y_tmp[mask] = 1 52 | Y_3dim[:,:,i] = Y_tmp 53 | else 54 | Y_tmp = deepcopy(Y) 55 | Y_tmp[mask] = 0 56 | Y_3dim[:,:,i] = Y_tmp 57 | end 58 | end 59 | 60 | for n in 1 : N 61 | i = round(Int, (L*ceil(n / base))) 62 | idx1 = i - L + 1 : i 63 | idx2 = L*mod(n-1, base)+1 : L*(mod(n-1, base) + 1) 64 | for i in 1 : 3 65 | pic[idx1,idx2,i] = reshape(Y_3dim[:,n,i], L, L) 66 | end 67 | end 68 | imshow(pic, cmap=ColorMap("gray")) 69 | end 70 | 71 | """ 72 | Run a demo script of missing data interpolation for face dataset. 73 | """ 74 | function test_face_missing() 75 | # load data 76 | skip = 2 77 | Y, D, L = load_facedata(skip) 78 | 79 | # mask 80 | missing_rate = 0.50 81 | mask = rand(size(Y)) .< missing_rate 82 | Y_obs = deepcopy(Y) 83 | Y_obs[mask] = NaN 84 | 85 | # known parames 86 | M = 16 87 | sigma2_y = 0.001 88 | Sigma_W = zeros(M,M,D) 89 | Sigma_mu = 1.0 * eye(D) 90 | for d in 1 : D 91 | Sigma_W[:,:,d] = 0.1 * eye(M) 92 | end 93 | prior = DimensionalityReduction.DRModel(D, M, sigma2_y, zeros(M, D), Sigma_W, zeros(D), Sigma_mu) 94 | 95 | # learn & generate 96 | max_iter = 100 97 | posterior, X_est = DimensionalityReduction.VI(deepcopy(Y_obs), prior, max_iter) 98 | Y_est = posterior.m_W'*X_est + repmat(posterior.m_mu, 1, size(X_est, 2)) 99 | Y_itp = deepcopy(Y_obs) 100 | Y_itp[mask] = Y_est[mask] 101 | 102 | #visualize 103 | N_show = 4^2 104 | 105 | figure("Observation") 106 | clf() 107 | visualize(Y_obs[:,1:N_show], L, mask[:,1:N_show]) 108 | title("Observation") 109 | 110 | #figure("Estimation") 111 | #clf() 112 | #visualize(Y_est[:,1:N_show], L) 113 | #title("Estimation") 114 | 115 | figure("Interpolation") 116 | clf() 117 | visualize(Y_itp[:,1:N_show], L) 118 | title("Interpolation") 119 | 120 | figure("Truth") 121 | clf() 122 | visualize(Y[:,1:N_show], L) 123 | title("Truth") 124 | show() 125 | end 126 | 127 | """ 128 | Run a dimensionality reduction demo using Iris dataset. 129 | """ 130 | function test_iris() 131 | ################## 132 | # load data 133 | iris = datasets.load_iris() 134 | Y_obs = iris["data"]' 135 | label_list = [iris["target_names"][elem+1] for elem in iris["target"]] 136 | D, N = size(Y_obs) 137 | 138 | ################## 139 | # 2D compression 140 | 141 | # model 142 | M = 2 143 | sigma2_y = 0.001 144 | Sigma_W = zeros(M,M,D) 145 | Sigma_mu = 1.0 * eye(D) 146 | for d in 1 : D 147 | Sigma_W[:,:,d] = 0.1 * eye(M) 148 | end 149 | prior = DimensionalityReduction.DRModel(D, M, sigma2_y, zeros(M, D), Sigma_W, zeros(D), Sigma_mu) 150 | 151 | # learn & generate 152 | max_iter = 100 153 | posterior, X_est = DimensionalityReduction.VI(deepcopy(Y_obs), prior, max_iter) 154 | 155 | # visualize 156 | figure("2D plot") 157 | clf() 158 | scatter(X_est[1,1:50], X_est[2,1:50], color="r") 159 | scatter(X_est[1,51:100], X_est[2,51:100], color="g") 160 | scatter(X_est[1,101:end], X_est[2,101:end], color="b") 161 | xlabel("\$x_1\$", fontsize=20) 162 | ylabel("\$x_2\$", fontsize=20) 163 | legend([label_list[1], label_list[51], label_list[101]], fontsize=16) 164 | 165 | ################## 166 | # 3D compression 167 | 168 | # model 169 | M = 3 170 | sigma2_y = 0.001 171 | Sigma_W = zeros(M,M,D) 172 | Sigma_mu = 1.0 * eye(D) 173 | for d in 1 : D 174 | Sigma_W[:,:,d] = 0.1 * eye(M) 175 | end 176 | prior = DimensionalityReduction.DRModel(D, M, sigma2_y, zeros(M, D), Sigma_W, zeros(D), Sigma_mu) 177 | 178 | # learn & generate 179 | max_iter = 100 180 | posterior, X_est = DimensionalityReduction.VI(deepcopy(Y_obs), prior, max_iter) 181 | 182 | # visualize 183 | figure("3D plot") 184 | clf() 185 | scatter3D(X_est[1,1:50], X_est[2,1:50], X_est[3,1:50], c="r") 186 | scatter3D(X_est[1,51:100], X_est[2,51:100], X_est[3,51:100], c="g") 187 | scatter3D(X_est[1,101:end], X_est[2,101:end], X_est[3,101:end], c="b") 188 | legend([label_list[1], label_list[51], label_list[101]], fontsize=16) 189 | xlabel("\$x_1\$", fontsize=20) 190 | ylabel("\$x_2\$", fontsize=20) 191 | zlabel("\$x_3\$", fontsize=20) 192 | show() 193 | end 194 | 195 | #test_face_missing() 196 | test_iris() 197 | -------------------------------------------------------------------------------- /src/demo_GaussianMixtureModel.jl: -------------------------------------------------------------------------------- 1 | ################################### 2 | ## Example code 3 | ## for Bayesian Gaussin Mixture Model 4 | 5 | using PyPlot, PyCall 6 | push!(LOAD_PATH,".") 7 | import GaussianMixtureModel 8 | 9 | """ 10 | Visualize data & estimation in 2D space. 11 | """ 12 | function visualize_2D(X::Matrix{Float64}, S::Matrix{Float64}, S_est::Matrix{Float64}, text) 13 | cmp = get_cmap("jet") 14 | 15 | K1 = size(S, 1) 16 | K2 = size(S_est, 1) 17 | col1 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K1)] 18 | col2 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K2)] 19 | 20 | f, (ax1, ax2) = subplots(1,2,num=text) 21 | f[:clf]() 22 | f, (ax1, ax2) = subplots(1,2,num=text) 23 | 24 | for k in 1 : K1 25 | ax1[:scatter](X[1, S[k,:].==1], X[2, S[k,:].==1], color=col1[k]) 26 | end 27 | ax1[:set_title]("truth") 28 | 29 | for k in 1 : K2 30 | ax2[:scatter](X[1, S_est[k,:].==1], X[2, S_est[k,:].==1], color=col2[k]) 31 | end 32 | 33 | ax2[:set_title]("estimation") 34 | end 35 | 36 | """ 37 | Run a test script for 2D data clustering. 38 | """ 39 | function test_2D() 40 | ## set model 41 | D = 2 # data dimension 42 | K = 4 # number of mixture components 43 | alpha = 100.0 * ones(K) 44 | beta = 0.1 45 | m = zeros(D) 46 | nu = D + 1.0 47 | W = eye(D) 48 | cmp = [GaussianMixtureModel.GW(beta, m, nu, W) for _ in 1 : K] 49 | bgmm = GaussianMixtureModel.BGMM(D, K, alpha, cmp) 50 | 51 | ## generate data 52 | N = 300 53 | gmm = GaussianMixtureModel.sample_GMM(bgmm) 54 | X, S = GaussianMixtureModel.sample_data(gmm, N) 55 | 56 | ## inference 57 | max_iter = 100 58 | tic() 59 | S_est, post_bgmm, VB = GaussianMixtureModel.learn_VI(X, bgmm, max_iter) 60 | #S_est, post_bgmm, VB = GaussianMixtureModel.learn_GS(X, bgmm, max_iter) 61 | #S_est, post_bgmm, VB = GaussianMixtureModel.learn_CGS(X, bgmm, max_iter) 62 | toc() 63 | 64 | ## plot 65 | visualize_2D(X, S, GaussianMixtureModel.winner_takes_all(S_est), "2D plot") 66 | 67 | # VB check 68 | figure("ELBO") 69 | clf() 70 | plot(VB) 71 | ylabel("ELBO") 72 | xlabel("iterations") 73 | show() 74 | end 75 | 76 | test_2D() 77 | -------------------------------------------------------------------------------- /src/demo_LogisticRegression.jl: -------------------------------------------------------------------------------- 1 | ##################################### 2 | ## Bayesian logistic regression demo 3 | 4 | using PyPlot, PyCall 5 | using Distributions 6 | push!(LOAD_PATH, ".") 7 | import LogisticRegression 8 | 9 | """ 10 | Visualize prediction via surface (only for 2D inputs.) 11 | """ 12 | function visualize_surface(mu, rho, X, Y, text) 13 | N = 100 14 | R = 100 15 | xmin = minimum(X[1,:]) 16 | xmax = maximum(X[1,:]) 17 | ymin = minimum(X[2,:]) 18 | ymax = maximum(X[2,:]) 19 | lx = xmax - xmin 20 | ly = ymax - ymin 21 | xmin = xmin - 0.25 * lx 22 | xmax = xmax + 0.25 * lx 23 | ymin = ymin - 0.25 * ly 24 | ymax = ymax + 0.25 * ly 25 | 26 | x1 = linspace(xmin,xmax,R) 27 | x2 = linspace(ymin,ymax,R) 28 | x1grid = repmat(x1, 1, R) 29 | x2grid = repmat(x2', R, 1) 30 | val = [x1grid[:] x2grid[:]]' 31 | 32 | z_list = [] 33 | sigma = log.(1 + exp.(rho)) 34 | for n in 1 : N 35 | W = rand(MvNormal(mu, diagm(sigma.^2))) 36 | z_tmp = [LogisticRegression.sigmoid(W'*val[:,i]) for i in 1 : size(val, 2)] 37 | push!(z_list, z_tmp) 38 | end 39 | z = mean(z_list) 40 | zgrid = reshape(z, R, R) 41 | 42 | # 3D plot 43 | figure("surface") 44 | clf() 45 | plot_surface(x1grid, x2grid, zgrid, alpha=0.5) 46 | scatter3D(X[1,Y.==1], X[2,Y.==1], Y[Y.==1]+0.01, c="r", depthshade=true) 47 | scatter3D(X[1,Y.==0], X[2,Y.==0], Y[Y.==0], c="b", depthshade=true) 48 | xlim([xmin, xmax]) 49 | ylim([ymin, ymax]) 50 | zlim([0, 1]) 51 | title(text) 52 | end 53 | 54 | """ 55 | Visualize prediction via contour (only for 2D inputs.) 56 | """ 57 | function visualize_contour(mu, rho, X, Y) 58 | N = 100 59 | R = 100 60 | xmin = 2*minimum(X[1,:]) 61 | xmax = 2*maximum(X[1,:]) 62 | ymin = minimum(X[2,:]) 63 | ymax = maximum(X[2,:]) 64 | 65 | x1 = linspace(xmin,xmax,R) 66 | x2 = linspace(ymin,ymax,R) 67 | x1grid = repmat(x1, 1, R) 68 | x2grid = repmat(x2', R, 1) 69 | val = [x1grid[:] x2grid[:]]' 70 | 71 | z_list = [] 72 | W_list = [] 73 | sigma = log.(1 + exp.(rho)) 74 | for n in 1 : N 75 | W = rand(MvNormal(mu, diagm(sigma.^2))) 76 | z_tmp = [LogisticRegression.sigmoid(W'*val[:,i]) for i in 1 : size(val, 2)] 77 | push!(W_list, W) 78 | push!(z_list, z_tmp) 79 | end 80 | z = mean(z_list) 81 | zgrid = reshape(z, R, R) 82 | 83 | # precition 84 | figure("contour") 85 | clf() 86 | contour(x1grid, x2grid, zgrid, alpha=0.5, cmap=get_cmap("bwr")) 87 | scatter(X[1,Y.==1], X[2,Y.==1], c="r") 88 | scatter(X[1,Y.==0], X[2,Y.==0], c="b") 89 | xlim([xmin, xmax]) 90 | ylim([ymin, ymax]) 91 | title("prediction") 92 | 93 | # parameter samples 94 | figure("samples") 95 | clf() 96 | for n in 1 : 10 97 | draw_line(W_list[n], xmin, xmax) 98 | end 99 | scatter(X[1,Y.==1]', X[2,Y.==1]', c="r") 100 | scatter(X[1,Y.==0]', X[2,Y.==0]', c="b") 101 | xlim([xmin, xmax]) 102 | ylim([ymin, ymax]) 103 | title("parameter samples") 104 | end 105 | 106 | function draw_line(W, xmin, xmax) 107 | y1 = - xmin*W[1]/W[2] 108 | y2 = - xmax*W[1]/W[2] 109 | plot([xmin, xmax], [y1, y2], c="k") 110 | end 111 | 112 | 113 | ######################## 114 | # create model 115 | 116 | M = 2 # input dimension 117 | Sigma_w = 100.0 * eye(M) # prior on W 118 | 119 | ######################## 120 | # create toy-data using prior model 121 | 122 | N = 50 # num of data points 123 | X = 2 * rand(M, N) - 1.0 # input values 124 | 125 | # sample observation Y 126 | Y, _ = LogisticRegression.sample_data(X, Sigma_w) 127 | 128 | ######################## 129 | # inference 130 | alpha = 1.0e-4 # learning rate 131 | max_iter = 100000 # VI maximum iterations 132 | 133 | # learn variational parameters (mu & rho) 134 | mu, rho = LogisticRegression.VI(Y, X, M, Sigma_w, alpha, max_iter) 135 | 136 | ######################## 137 | # visualize (only for M=2) 138 | visualize_surface(mu, rho, X, Y, "prediction") 139 | visualize_contour(mu, rho, X, Y) 140 | show() 141 | -------------------------------------------------------------------------------- /src/demo_NMF.jl: -------------------------------------------------------------------------------- 1 | ############################## 2 | ## Audio decomposition demo using NMF 3 | 4 | using PyPlot, PyCall 5 | using DataFrames 6 | using Distributions 7 | 8 | push!(LOAD_PATH, ".") 9 | import NMF 10 | @pyimport scipy.io.wavfile as wf 11 | 12 | 13 | # load data 14 | wavfile = "../data/organ.wav" 15 | fs, data = wf.read(wavfile) 16 | 17 | figure("data") 18 | clf() 19 | Pxx, freqs, t, pl = specgram(data[10000:318000,2], Fs=fs, NFFT=256, noverlap=0) 20 | xlabel("time [sec]") 21 | ylabel("frequency [Hz]") 22 | ylim([0,22000]) 23 | 24 | # model 25 | D, N = size(Pxx) 26 | K = 2 27 | a_t = 1.0 28 | b_t = 1.0 29 | a_v = 1.0 30 | b_v = 100.0 31 | prior = NMF.NMFModel(a_t*ones(D,K), b_t*ones(D, K), a_v, b_v) 32 | 33 | # inference 34 | max_iter = 100 35 | posterior, S_est, T_est, V_est = NMF.VI(Int64.(round.(Pxx)), prior, max_iter) 36 | X = T_est * V_est 37 | 38 | # visualize 39 | figure("T") 40 | clf() 41 | for k in 1 : K 42 | subplot(K,1,k) 43 | plot(T_est[:,k], linewidth=1.0) 44 | xlim([0, D]) 45 | ylim([0, ylim()[2]]) 46 | end 47 | 48 | figure("V") 49 | clf() 50 | for k in 1 : K 51 | subplot(K,1,k) 52 | plot(V_est[k,:], linewidth=1.0) 53 | xlim([0,N]) 54 | ylim([0, ylim()[2]]) 55 | end 56 | show() 57 | -------------------------------------------------------------------------------- /src/demo_PoissonHMM.jl: -------------------------------------------------------------------------------- 1 | ################################### 2 | ## Example code 3 | ## for Bayesian Poisson HMM 4 | 5 | using PyPlot, PyCall 6 | using HDF5, JLD 7 | @pyimport matplotlib.gridspec as gspec 8 | 9 | push!(LOAD_PATH,".") 10 | import PoissonHMM 11 | import PoissonMixtureModel 12 | 13 | """ 14 | Simple comparison between HMM and mixture model. 15 | """ 16 | function test_comparison() 17 | ######################### 18 | ## load data 19 | file_name = "../data/timeseries.jld" 20 | X = load(file_name)["obs"] 21 | N = length(X) 22 | 23 | ######################### 24 | ## Poison HMM 25 | 26 | ## set model 27 | K = 2 # number of mixture components 28 | alpha_phi = 10.0 * ones(K) 29 | alpha_A = 100.0 * eye(K) + 1.0*ones(K, K) 30 | cmp = [PoissonHMM.Gam(1.0, 0.01), PoissonHMM.Gam(1.0, 0.01)] 31 | bhmm = PoissonHMM.BHMM(K, alpha_phi, alpha_A, cmp) 32 | 33 | ## inference 34 | max_iter = 100 35 | tic() 36 | Z_est_hmm, post_bhmm = PoissonHMM.learn_VI(X, bhmm, max_iter) 37 | toc() 38 | 39 | ######################### 40 | ## Poison Mixture Model 41 | 42 | ## set model 43 | K = 2 # number of mixture components 44 | alpha_phi = 10.0 * ones(K) 45 | cmp = [PoissonMixtureModel.Gam([1.0], 0.01), PoissonMixtureModel.Gam([1.0], 0.01)] 46 | bpmm = PoissonMixtureModel.BPMM(1, K, alpha_phi, cmp) 47 | 48 | ## inference 49 | max_iter = 100 50 | tic() 51 | Z_est_pmm, post_bpmm = PoissonMixtureModel.learn_VI(reshape(X, 1, N), bpmm, max_iter) 52 | toc() 53 | 54 | ######################### 55 | ## Compare results 56 | figure("Hidden Markov Model vs Mixture Model") 57 | subplot(3,1,1);plot(X);ylabel("data") 58 | subplot(3,1,2);fill_between(1:N, reshape(Z_est_hmm[1,:]', N), zeros(N));ylim([0.0, 1.0]);ylabel("S (PHMM)") 59 | subplot(3,1,3);fill_between(1:N, reshape(Z_est_pmm[1,:]', N), zeros(N));ylim([0.0, 1.0]);ylabel("S (PMM)") 60 | show() 61 | end 62 | 63 | test_comparison() 64 | -------------------------------------------------------------------------------- /src/demo_PoissonMixtureModel.jl: -------------------------------------------------------------------------------- 1 | ################################### 2 | ## Example code 3 | ## for Bayesian Poisson Mixture Model 4 | 5 | push!(LOAD_PATH,".") 6 | using PyPlot, PyCall 7 | import PoissonMixtureModel 8 | 9 | """ 10 | Visualize data & estimation in 2D space. 11 | """ 12 | function visualize_2D(X::Matrix{Float64}, S::Matrix{Float64}, S_est::Matrix{Float64}, text) 13 | cmp = get_cmap("jet") 14 | 15 | K1 = size(S, 1) 16 | K2 = size(S_est, 1) 17 | col1 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K1)] 18 | col2 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K2)] 19 | 20 | f, (ax1, ax2) = subplots(1,2,num=text) 21 | f[:clf]() 22 | f, (ax1, ax2) = subplots(1,2,num=text) 23 | 24 | for k in 1 : K1 25 | ax1[:scatter](X[1, S[k,:].==1], X[2, S[k,:].==1], color=col1[k]) 26 | end 27 | ax1[:set_title]("truth") 28 | 29 | for k in 1 : K2 30 | ax2[:scatter](X[1, S_est[k,:].==1], X[2, S_est[k,:].==1], color=col2[k]) 31 | end 32 | 33 | ax2[:set_title]("estimation") 34 | end 35 | 36 | function draw_hist(ax, X, S, label) 37 | counts, bins, patches = ax[:hist](X', 20) 38 | for i in 1 : length(patches) 39 | if counts[i] > 0 40 | S_tmp = S[:,bins[i] .<= X[1,:] .<= bins[i+1]] 41 | S_sum = sum(S_tmp, 2) / sum(S_tmp) 42 | patches[i][:set_facecolor]((S_sum[1], 0, S_sum[2])) 43 | end 44 | end 45 | ax[:set_title](label) 46 | end 47 | 48 | """ 49 | Visualize data & estimation using 1D histogram. 50 | """ 51 | function visualize_1D(X::Matrix{Float64}, S::Matrix{Float64}, S_est::Matrix{Float64}) 52 | # separated figures 53 | f1, ax1 = subplots(1,1,num="observation") 54 | f2, ax2 = subplots(1,1,num="estimation") 55 | f1[:clf]() 56 | f2[:clf]() 57 | _, ax1 = subplots(1,1,num="observation") 58 | _, ax2 = subplots(1,1,num="estimation") 59 | ax1[:hist](X', 20) 60 | ax1[:set_title]("observation") 61 | draw_hist(ax2, X, S_est, "estimation") 62 | end 63 | 64 | """ 65 | Run a test script for 1D data clustering. 66 | """ 67 | function test_1D() 68 | ## set model 69 | D = 1 # data dimension, must be 1. 70 | K = 2 # number of mixture components, must be 2. 71 | alpha = 100.0 * ones(K) 72 | cmp = [PoissonMixtureModel.Gam(1.0*ones(D), 0.01) for i in 1 : K] 73 | bpmm = PoissonMixtureModel.BPMM(D, K, alpha, cmp) 74 | 75 | ## generate data 76 | N = 1000 77 | pmm = PoissonMixtureModel.sample_PMM(bpmm) 78 | X, S = PoissonMixtureModel.sample_data(pmm, N) 79 | 80 | ## inference 81 | max_iter = 100 82 | tic() 83 | S_est, post_bpmm, VB = PoissonMixtureModel.learn_VI(X, bpmm, max_iter) 84 | #S_est, post_bpmm, VB = PoissonMixtureModel.learn_GS(X, bpmm, max_iter) 85 | #S_est, post_bpmm, VB = PoissonMixtureModel.learn_CGS(X, bpmm, max_iter) 86 | toc() 87 | 88 | ## plot 89 | visualize_1D(X, S, S_est) 90 | 91 | figure("ELBO") 92 | clf() 93 | plot(VB) 94 | ylabel("ELBO") 95 | xlabel("iterations") 96 | show() 97 | end 98 | 99 | """ 100 | Run a test script for 2D data clustering. 101 | """ 102 | function test_2D() 103 | ## set model 104 | D = 2 # data dimension, must be 2. 105 | K = 8 # number of mixture components 106 | #K = 5 107 | 108 | alpha = 100.0 * ones(K) 109 | cmp = [PoissonMixtureModel.Gam(1.0*ones(D), 0.01) for i in 1 : K] 110 | bpmm = PoissonMixtureModel.BPMM(D, K, alpha, cmp) 111 | 112 | ## generate data 113 | N = 300 114 | pmm = PoissonMixtureModel.sample_PMM(bpmm) 115 | X, S = PoissonMixtureModel.sample_data(pmm, N) 116 | 117 | ## inference 118 | max_iter = 100 119 | tic() 120 | S_est, post_bpmm, VB = PoissonMixtureModel.learn_VI(X, bpmm, max_iter) 121 | #S_est, post_bpmm, VB = PoissonMixtureModel.learn_GS(X, bpmm, max_iter) 122 | #S_est, post_bpmm, VB = PoissonMixtureModel.learn_CGS(X, bpmm, max_iter) 123 | toc() 124 | 125 | ## plot 126 | visualize_2D(X, S, PoissonMixtureModel.winner_takes_all(S_est), "2D plot") 127 | 128 | # VB check 129 | figure("ELBO") 130 | clf() 131 | plot(VB) 132 | ylabel("ELBO") 133 | xlabel("iterations") 134 | show() 135 | end 136 | 137 | test_1D() 138 | #test_2D() 139 | 140 | -------------------------------------------------------------------------------- /src/demo_PolynomialRegression.jl: -------------------------------------------------------------------------------- 1 | ################################# 2 | ## Bayesian model selection demo 3 | ## for polynomial regression 4 | 5 | using PyPlot, PyCall 6 | using Distributions 7 | 8 | function poly(X_raw, M) 9 | N = size(X_raw, 1) 10 | X = zeros(M, N) 11 | for m in 0 : M - 1 12 | X[m+1,:] = X_raw.^m 13 | end 14 | return X 15 | end 16 | 17 | function learn_bayes(X_raw, Y, M, sig2_y, Sig_w, X_lin) 18 | X = poly(X_raw, M) 19 | N = size(X_raw, 1) 20 | 21 | # calc posterior 22 | Sig_w_h = inv(X*inv(sig2_y*eye(N))*X' + inv(Sig_w)) 23 | mu_w_h = Sig_w_h * (X * inv(sig2_y * eye(N)) * Y) 24 | 25 | # calc predictive 26 | X_test = poly(X_lin, M) 27 | Y_est = (mu_w_h'*X_test)' 28 | sig2_y_prd = sig2_y + diag(X_test'Sig_w_h*X_test) 29 | 30 | # calc evidence 31 | evidence = -0.5*(sum(Y)*inv(sig2_y) +N*log.(sig2_y) + N*log.(2*pi) 32 | + logdet(Sig_w) 33 | - (mu_w_h'*inv(Sig_w_h)*mu_w_h)[1] - logdet(Sig_w_h) 34 | ) 35 | return Y_est, sqrt.(sig2_y_prd), evidence 36 | end 37 | 38 | function test() 39 | # linspace 40 | X_lin = linspace(-1, 7, 200) 41 | 42 | # generate data 43 | N = 10 44 | sig2_y = 0.1 45 | X = 2*pi*rand(N) 46 | Y_true = [sin.(x) for x in X_lin] 47 | Y_obs = [sin.(x) + sig2_y * randn() for x in X] 48 | 49 | dims = [1, 2, 3, 4, 5, 10] 50 | 51 | # learning via Bayes 52 | sig2_w = 1.0 53 | Y_bayes = [learn_bayes(X, Y_obs, m, sig2_y, sig2_w*eye(m), X_lin) for m in dims] 54 | 55 | ############# 56 | # compute evidences 57 | evidence = [learn_bayes(X, Y_obs, m, sig2_y, sig2_w*eye(m), X_lin)[3] for m in dims] 58 | figure("evidence") 59 | clf() 60 | plot(1:length(dims), evidence) 61 | xticks(1:length(dims),dims) 62 | ylabel(("\$\\ln p(\\bf{Y}|\\bf{X})\$"), fontsize=20) 63 | xlabel(("\$M\$"), fontsize=20) 64 | 65 | ############# 66 | # visualize 67 | x_min = X_lin[1] 68 | x_max = X_lin[end] 69 | y_min = -4 70 | y_max = 4 71 | 72 | figure("prediction") 73 | clf() 74 | for k in 1 : 6 75 | subplot(230 + k) 76 | plot(X_lin, Y_bayes[k][1]) 77 | plot(X_lin, Y_bayes[k][1] + Y_bayes[k][2], "c--") 78 | plot(X_lin, Y_bayes[k][1] - Y_bayes[k][2], "c--") 79 | plot(X, Y_obs, "ko") 80 | xlim([x_min, x_max]) 81 | ylim([y_min, y_max]) 82 | text(x_max - 2.5, y_max - 1, @sprintf("M=%d", dims[k])) 83 | end 84 | show() 85 | end 86 | 87 | test() 88 | 89 | -------------------------------------------------------------------------------- /src/demo_Simple2DGauss.jl: -------------------------------------------------------------------------------- 1 | ################################### 2 | ## Simple VI & GS for 2D Gaussian 3 | 4 | using PyPlot 5 | using Distributions 6 | 7 | function calc_KL(mu1, lambda1, mu2, lambda2) 8 | D = size(mu1, 1) 9 | px_lnqx = 0.5 * logdet(lambda2) - 0.5 * ((mu1 - mu2)' * lambda2 * (mu1 - mu2) + trace(lambda2 * inv(lambda1))) 10 | px_lnpx = 0.5 * logdet(lambda1) - 0.5 * D 11 | KL = - (px_lnqx - px_lnpx) 12 | return KL[1] 13 | end 14 | 15 | function plot_results(result, truth) 16 | N = size(result, 1) 17 | H = Int(ceil(sqrt(N))) 18 | W = Int(ceil(N / H)) 19 | for i in 1 : H 20 | for j in 1 : W 21 | n = (i - 1) * W + j 22 | if n <= N 23 | subplot(H, W, n) 24 | title("$n of $N") 25 | plot_gaussian(truth[1], truth[2], "b", "\$p(z)\$") 26 | plot_gaussian(result[n][1], result[n][2], "r", "\$p(z)\$") 27 | end 28 | end 29 | end 30 | end 31 | 32 | function plot_lines(X) 33 | D, N = size(X) 34 | X_d = zeros(D, 2*N + 1) 35 | X_d[:,1] = X[:,1] 36 | for i in 1 : N 37 | X_d[1, 2*i - 1] = X[1, i] 38 | X_d[1, 2*i] = X[1, i] 39 | X_d[2, 2*i] = X[2, i] 40 | X_d[2, 2*i + 1] = X[2, i] 41 | end 42 | plot(X[1,:], X[2,:], "oy") 43 | plot(X_d[1,1:2*N], X_d[2,1:2*N], "--y") 44 | end 45 | 46 | function plot_gaussian(Mu, Sigma, col, label) 47 | res = 100 48 | plot(Mu[1], Mu[2], "x", color=col) 49 | 50 | F = eigfact(Sigma) 51 | vec = F.vectors 52 | val = F.values 53 | dw = 2*pi/res 54 | w = dw * (0 : res) 55 | 56 | c = 1.0 57 | a = sqrt(c*val[1]) 58 | b = sqrt(c*val[2]) 59 | P1 = a*cos.(w) 60 | P2 = b*sin.(w) 61 | P = Mu .+ vec'*vcat(P1', P2') 62 | plot(P[1, :], P[2, :], "-", color=col, label=label) 63 | end 64 | 65 | """ 66 | Variational inference for 2D Gauss. 67 | """ 68 | function main_VI() 69 | ## creat truth distribution 70 | D = 2 # dimension 71 | theta = 2.0*pi/12 # tilt 72 | A = reshape([cos.(theta), -sin.(theta), 73 | sin.(theta), cos.(theta)], 74 | 2, 2) 75 | mu = [0.0, 0.0] 76 | lambda = inv(A * inv(reshape([1,0,0,10], 2, 2)) * A') 77 | 78 | ## initialize 79 | #mu_h = randn(D) 80 | mu_h = [-0.5, 0.3] 81 | lambda_h = zeros(D,D) 82 | 83 | ## main iteration 84 | max_iter = 10 85 | KL = NaN * Array{Float64, 1}(max_iter) 86 | result = Array{Any, 1}(max_iter) 87 | for i in 1 : max_iter 88 | ## update 89 | mu_h[1] = mu[1] - inv(lambda[1,1])*lambda[1,2] * (mu_h[2] - mu[2]) 90 | 91 | lambda_h[1,1] = lambda[1,1] 92 | mu_h[2] = mu[2] - inv(lambda[2,2])*lambda[2,1] * (mu_h[1] - mu[1]) 93 | lambda_h[2,2] = lambda[2,2] 94 | 95 | ## calculate KL divergeince 96 | KL[i] = calc_KL(mu_h, lambda_h, mu, lambda) 97 | 98 | ## store the results 99 | result[i] = [deepcopy(mu_h), deepcopy(inv(lambda_h))] 100 | end 101 | 102 | ## visualize results 103 | figure("result per iteration (VI)") 104 | clf() 105 | plot_results(result, (mu, inv(lambda))) 106 | 107 | figure("result (VI)") 108 | clf() 109 | plot_gaussian(mu, inv(lambda), "b", "\$p(\\bf{z})\$") 110 | plot_gaussian(result[end][1], result[end][2], "r", "\$q(\\bf{z})\$") 111 | xlabel("\$z_1\$", fontsize=20) 112 | ylabel("\$z_2\$", fontsize=20) 113 | legend(fontsize=16) 114 | 115 | figure("KL divergence (VI)") 116 | clf() 117 | plot(1:max_iter, KL) 118 | ylabel("KL divergence", fontsize=16) 119 | xlabel("iteration", fontsize=16) 120 | show() 121 | end 122 | 123 | """ 124 | Gibbs sampling for 2D Gauss. 125 | """ 126 | function main_GS() 127 | ## creat truth distribution 128 | D = 2 # dimension 129 | theta = 2.0*pi/12 # tilt 130 | A = reshape([cos.(theta), -sin.(theta), 131 | sin.(theta), cos.(theta)], 132 | 2, 2) 133 | mu = [0.0, 0.0] 134 | #lambda = inv(A * inv(reshape([1,0,0,10], 2, 2)) * A') 135 | lambda = inv(A * inv(reshape([1,0,0,100], 2, 2)) * A') 136 | 137 | ## initialize 138 | #max_iter = 1000 139 | max_iter = 50 140 | X = randn(D, max_iter) 141 | mu_h = randn(D) 142 | 143 | ## main iteration 144 | KL = NaN * Array{Float64, 1}(max_iter) 145 | for i in 2 : max_iter 146 | ## update 147 | mu_h[1] = mu[1] - inv(lambda[1,1])*lambda[1,2] * (X[2,i-1] - mu[2]) 148 | X[1, i] = rand(Normal(mu_h[1], sqrt(inv(lambda[1,1])))) 149 | 150 | mu_h[2] = mu[2] - inv(lambda[2,2])*lambda[2,1] * (X[1,i] - mu[1]) 151 | X[2, i] = rand(Normal(mu_h[2], sqrt(inv(lambda[2,2])))) 152 | 153 | if i > D 154 | KL[i] = calc_KL(mean(X[:,1:i], 2), inv(cov(X[:,1:i], 2)), mu, lambda) 155 | end 156 | end 157 | 158 | ## visualize results 159 | expt_mu = mean(X, 2) 160 | expt_Sigma = cov(X, 2) 161 | 162 | figure("samples (GS)") 163 | clf() 164 | plot_lines(X) 165 | plot_gaussian(mu, inv(lambda), "b", "\$p(\\bf{z})\$") 166 | plot_gaussian(expt_mu, expt_Sigma, "r", "\$q(\\bf{z})\$") 167 | xlabel("\$z_1\$", fontsize=20) 168 | ylabel("\$z_2\$", fontsize=20) 169 | legend(fontsize=16) 170 | 171 | figure("KL divergence (GS)") 172 | clf() 173 | plot(1:max_iter, KL) 174 | ylabel("KL divergence", fontsize=16) 175 | xlabel("sample size", fontsize=16) 176 | show() 177 | end 178 | 179 | main_VI() 180 | main_GS() 181 | -------------------------------------------------------------------------------- /src/demo_SimpleFitting.jl: -------------------------------------------------------------------------------- 1 | ##################################### 2 | ## Simple function fitting demo 3 | 4 | using PyPlot, PyCall 5 | using Distributions 6 | 7 | # true param 8 | W = Array([1.0, 0.0, 1.0]) 9 | 10 | # generate data 11 | sigma = 0.5 12 | N = 20 13 | X = linspace(-0.4,2.4,N) 14 | Y = [W[1] + W[2]*x + W[3]*x^2 + sigma*randn() for x in X] 15 | X_min = minimum(X) 16 | X_max = maximum(X) 17 | 18 | # regression1 19 | X_all = linspace(X_min, X_max, 100) 20 | W1 = sum(Y.*X) / sum(X.^2) 21 | Y1 = [W1*x for x in X_all] 22 | 23 | # regression2 24 | X2 = zeros(3, N) 25 | X2[1,:] = 1 26 | X2[2,:] = X 27 | X2[3,:] = X.^2 28 | W2 = inv(X2*X2') * X2*Y 29 | Y2 = [W2[1] + W2[2]*x + W2[3]*x^2 for x in X_all] 30 | 31 | # show data 32 | figure() 33 | plot(X_all, Y1, "b-") 34 | plot(X_all, Y2, "g-") 35 | plot(X, Y, "ko") 36 | legend(["model1","model2","data"], loc="upper left", fontsize=16) 37 | xlabel("\$x\$", fontsize=20) 38 | ylabel("\$y\$", fontsize=20) 39 | show() 40 | -------------------------------------------------------------------------------- /src/demo_nonconjugate.jl: -------------------------------------------------------------------------------- 1 | 2 | using PyPlot, PyCall 3 | using Distributions 4 | import StatsFuns.logsumexp 5 | PyDict(matplotlib["rcParams"])["mathtext.fontset"] = "cm" 6 | PyDict(matplotlib["rcParams"])["mathtext.rm"] = "serif" 7 | PyDict(matplotlib["rcParams"])["lines.linewidth"] = 1.5 8 | PyDict(matplotlib["rcParams"])["font.family"] = "TakaoPGothic" 9 | 10 | function expt(a, b, sigma, Y, X, N_s) 11 | S = rand(Gamma(a, 1.0/b), N_s) 12 | C = mean([exp(sum(logpdf.(Normal(s, sigma), Y))) for s in S]) 13 | curve = [exp(sum(logpdf.(Normal(mu, sigma), Y))) * pdf(Gamma(a, 1.0/b), mu) for mu in X] 14 | m = mean([s*exp(sum(logpdf.(Normal(s, sigma), Y)))/C for s in S]) 15 | v = mean([(s-m)^2 * exp(sum(logpdf.(Normal(s, sigma), Y)))/C for s in S]) 16 | return curve/C, m, v 17 | end 18 | 19 | X = linspace(-5, 10, 1000) 20 | 21 | a = 2.0 22 | b = 2.0 23 | mu = 1.0 24 | sigma=1.0 25 | 26 | # data 27 | N = 10 28 | Y = rand(Normal(mu, sigma), N) 29 | 30 | # calc posterior 31 | N_s = 100000 32 | posterior, m, v = expt(a, b, sigma, Y, X, N_s) 33 | 34 | a_h = m^2 / v 35 | b_h = m / v 36 | 37 | figure() 38 | plot(X, pdf(Normal(mu,sigma), X)) 39 | plot(X, pdf(Gamma(a,1.0/b), X)) 40 | plot(X, posterior) 41 | plot(X, pdf(Gamma(a_h,1.0/b_h), X)) 42 | plot(Y, 0.02*ones(N), "o") 43 | legend(["generator", "prior", "posterior", "approx", "samples"]) 44 | #legend(["データ生成分布", "事前分布", "事後分布", "近似分布", "データ"], fontsize=12) 45 | xlim([-3, 6]) 46 | ylim([0, 1.8]) 47 | 48 | 49 | --------------------------------------------------------------------------------