├── .gitignore
├── LICENSE
├── README.md
├── acq
├── acqeig_vbmc.m
├── acqf_vbmc.m
├── acqflog_vbmc.m
├── acqfsn2_vbmc.m
├── acqimiqr_vbmc.m
├── acqus_vbmc.m
├── acqviqr_vbmc.m
└── acqwrapper_vbmc.m
├── docs
├── README.txt
├── vbmc-demo-2.gif
├── vbmc-demo.gif
├── vbmc-demo.png
├── vbmc2020-demo.gif
└── walkthrough.md
├── ent
├── entlb_vbmc.m
├── entmc_vbmc.m
└── entub_vbmc.m
├── gplite
├── LICENSE
├── README.md
├── gplite_clean.m
├── gplite_covfun.m
├── gplite_demo.m
├── gplite_fmin.m
├── gplite_hypprior.m
├── gplite_intmeanfun.m
├── gplite_meanfun.m
├── gplite_nlZ.m
├── gplite_noisefun.m
├── gplite_plot.m
├── gplite_post.m
├── gplite_pred.m
├── gplite_qpred.m
├── gplite_quad.m
├── gplite_rnd.m
├── gplite_sample.m
├── gplite_test.m
├── gplite_train.m
├── outwarp_negpow.m
├── outwarp_negpowc1.m
├── outwarp_negscaledpow.m
├── outwarp_test.m
└── private
│ ├── derivcheck.m
│ ├── eissample_lite.m
│ ├── fminfill.m
│ ├── gplite_core.m
│ ├── quantile1.m
│ ├── slicesamplebnd.m
│ └── sq_dist.m
├── install.m
├── lpostfun.m
├── misc
├── best_vbmc.m
├── boundscheck_vbmc.m
├── check_quadcoefficients_vbmc.m
├── evaloption_vbmc.m
├── fess_vbmc.m
├── finalboost_vbmc.m
├── funlogger_vbmc.m
├── get_GPTrainOptions.m
├── get_traindata_vbmc.m
├── get_vptheta.m
├── gethpd_vbmc.m
├── gplogjoint.m
├── gplogjoint_weights.m
├── gpreupdate.m
├── gpsample_vbmc.m
├── gptrain_vbmc.m
├── initdesign_vbmc.m
├── intkernel.m
├── negelcbo_vbmc.m
├── noiseshaping_vbmc.m
├── proposal_vbmc.m
├── real2int_vbmc.m
├── rescale_params.m
├── setupoptions_vbmc.m
├── setupvars_vbmc.m
├── testpdf.m
├── vbinit_vbmc.m
├── vbmc_gphyp.m
├── vpbndloss.m
├── vpbounds.m
├── vpoptimize_vbmc.m
├── vpoptimizeweights_vbmc.m
├── vpsample_vbmc.m
├── vpsieve_vbmc.m
├── vptrain2real.m
├── warp_gpandvp_vbmc.m
└── warp_input_vbmc.m
├── private
├── acqhedge_vbmc.m
├── activeimportancesampling_vbmc.m
├── activesample_vbmc.m
├── recompute_lcbmax.m
├── updateK.m
├── vbmc_demo2d.m
├── vbmc_iterplot.m
├── vbmc_output.m
├── vbmc_plot2d.m
├── vbmc_termination.m
└── vbmc_warmup.m
├── rosenbrock_test.m
├── shared
├── kde1d.m
├── msmoothboxlogpdf.m
├── msmoothboxpdf.m
├── msmoothboxrnd.m
├── msplinetrapezlogpdf.m
├── msplinetrapezpdf.m
├── msplinetrapezrnd.m
├── mtrapezlogpdf.m
├── mtrapezpdf.m
├── mtrapezrnd.m
├── munifboxlogpdf.m
├── munifboxpdf.m
├── munifboxrnd.m
├── mvnkl.m
├── qtrapz.m
├── warpvars_vbmc.m
└── warpvars_vbmc_test.m
├── test
├── runtest_vbmc.m
└── test_pdfs_vbmc.m
├── utils
├── cmaes_modded.m
├── cornerplot.m
├── covcma.m
├── eissample_lite.m
├── evalbool.m
├── fastkmeans.m
├── fminadam.m
├── fminfill.m
├── ibslike.m
├── kde2d.m
├── malasample_vbmc.m
├── psycho_gen.m
├── quantile1.m
├── slicelite.m
├── slicesample_vbmc.m
├── slicesamplebnd.m
├── softbndloss.m
├── sq_dist.m
└── unscent_warp.m
├── vbmc.m
├── vbmc_diagnostics.m
├── vbmc_examples.m
├── vbmc_isavp.m
├── vbmc_kldiv.m
├── vbmc_mode.m
├── vbmc_moments.m
├── vbmc_mtv.m
├── vbmc_pdf.m
├── vbmc_plot.m
├── vbmc_power.m
└── vbmc_rnd.m
/.gitignore:
--------------------------------------------------------------------------------
1 | *.mexw64
2 | *.mexa64
3 | *.mexmaci64
4 | *.asv
5 | *.m~
6 | .DS_Store
7 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2022, Luigi Acerbi
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/acq/acqeig_vbmc.m:
--------------------------------------------------------------------------------
1 | function acq = acqeig_vbmc(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot)
2 | %ACQEIG_VBMC Expected information gain (EIG) acquisition function.
3 |
4 | if isempty(Xs)
5 | % Return acquisition function info struct
6 | acq.compute_varlogjoint = true;
7 | return;
8 | end
9 |
10 | % Xs is in *transformed* coordinates
11 | Ns = numel(gp.post);
12 |
13 | % Estimate observation noise at test points from nearest neighbor
14 | [~,pos] = min(sq_dist(bsxfun(@rdivide,Xs,optimState.gplengthscale),gp.X_rescaled),[],2);
15 | sn2 = gp.sn2new(pos);
16 |
17 | intK = intkernel(Xs,vp,gp,0);
18 | ys2 = fs2 + sn2; % Predictive variance at test points
19 |
20 | rho2 = bsxfun(@rdivide,intK.^2,optimState.varlogjoint_samples.*ys2);
21 | acq = 0.5*sum(log(max(realmin,1 - min(1,rho2))),2)/Ns;
22 |
23 | end
24 |
25 |
26 | %SQ_DIST Compute matrix of all pairwise squared distances between two sets
27 | % of vectors, stored in the columns of the two matrices, a (of size n-by-D)
28 | % and b (of size m-by-D).
29 | function C = sq_dist(a,b)
30 |
31 | n = size(a,1);
32 | m = size(b,1);
33 | mu = (m/(n+m))*mean(b,1) + (n/(n+m))*mean(a,1);
34 | a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu);
35 | C = bsxfun(@plus,sum(a.*a,2),bsxfun(@minus,sum(b.*b,2)',2*a*b'));
36 | C = max(C,0);
37 |
38 | end
--------------------------------------------------------------------------------
/acq/acqf_vbmc.m:
--------------------------------------------------------------------------------
1 | function acq = acqf_vbmc(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot)
2 | %ACQF_VBMC Acquisition fcn. for prospective uncertainty search.
3 |
4 | % Xs is in *transformed* coordinates
5 |
6 | % Probability density of variational posterior at test points
7 | p = max(vbmc_pdf(vp,Xs,0),realmin);
8 |
9 | % Prospective uncertainty search
10 | z = optimState.ymax;
11 | acq = -vtot .* exp(fbar-z) .* p;
12 |
13 | end
--------------------------------------------------------------------------------
/acq/acqflog_vbmc.m:
--------------------------------------------------------------------------------
1 | function acq = acqflog_vbmc(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot)
2 | %ACQFLOG_VBMC Acquisition fcn. for prospective uncertainty search (log-valued).
3 |
4 | % Xs is in *transformed* coordinates
5 |
6 | if isempty(Xs)
7 | % Return acquisition function info struct
8 | acq.compute_varlogjoint = false;
9 | acq.log_flag = true;
10 | return;
11 | end
12 |
13 | % Probability density of variational posterior at test points
14 | p = max(vbmc_pdf(vp,Xs,0),realmin);
15 |
16 | % Log prospective uncertainty search
17 | z = optimState.ymax;
18 | acq = -(log(vtot) + fbar-z + log(p));
19 |
20 | end
--------------------------------------------------------------------------------
/acq/acqfsn2_vbmc.m:
--------------------------------------------------------------------------------
1 | function acq = acqfsn2_vbmc(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot)
2 | %ACQFSN2_VBMC Acquisition fcn. for noisy prospective uncertainty search.
3 |
4 | % Xs is in *transformed* coordinates
5 |
6 | % Probability density of variational posterior at test points
7 | p = max(vbmc_pdf(vp,Xs,0),realmin);
8 |
9 | % Estimate observation noise at test points from nearest neighbor
10 | [~,pos] = min(sq_dist(bsxfun(@rdivide,Xs,optimState.gplengthscale),gp.X_rescaled),[],2);
11 | sn2 = gp.sn2new(pos);
12 |
13 | z = optimState.ymax;
14 |
15 | % Prospective uncertainty search corrected for noisy observations
16 | acq = -vtot.*(1 - sn2./(vtot+sn2)) .* exp(fbar-z) .* p;
17 |
18 | end
19 |
20 |
21 | %SQ_DIST Compute matrix of all pairwise squared distances between two sets
22 | % of vectors, stored in the columns of the two matrices, a (of size n-by-D)
23 | % and b (of size m-by-D).
24 | function C = sq_dist(a,b)
25 |
26 | n = size(a,1);
27 | m = size(b,1);
28 | mu = (m/(n+m))*mean(b,1) + (n/(n+m))*mean(a,1);
29 | a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu);
30 | C = bsxfun(@plus,sum(a.*a,2),bsxfun(@minus,sum(b.*b,2)',2*a*b'));
31 | C = max(C,0);
32 |
33 | end
--------------------------------------------------------------------------------
/acq/acqimiqr_vbmc.m:
--------------------------------------------------------------------------------
1 | function acq = acqimiqr_vbmc(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot)
2 | %VBMC_ACQIMIQR Integrated median interquantile range acquisition function.
3 |
4 | u = 0.6745; % norminv(0.75)
5 |
6 | if isempty(Xs)
7 | % Return acquisition function info struct
8 | acq.importance_sampling = true;
9 | acq.importance_sampling_vp = false;
10 | acq.log_flag = true;
11 | return;
12 | elseif ischar(Xs)
13 | switch lower(Xs)
14 | case 'islogf1'
15 | % Importance sampling log base proposal (shared part)
16 | acq = fmu;
17 | case 'islogf2'
18 | % Importance sampling log base proposal (added part)
19 | % (Full log base proposal is fixed + added)
20 | fs = sqrt(fs2);
21 | acq = u*fs + log1p(-exp(-2*u*fs));
22 | case 'islogf'
23 | % Importance sampling log base proposal distribution
24 | fs = sqrt(fs2);
25 | acq = fmu + u*fs + log1p(-exp(-2*u*fs));
26 | end
27 | return;
28 | end
29 |
30 | % Different importance sampling inputs for different GP hyperparameters?
31 | multipleinputs_flag = size(optimState.ActiveImportanceSampling.Xa,3) > 1;
32 |
33 | % Xs is in *transformed* coordinates
34 |
35 | [Nx,D] = size(Xs);
36 | Ns = size(fmu,2);
37 | Na = size(optimState.ActiveImportanceSampling.Xa,1);
38 |
39 | % Estimate observation noise at test points from nearest neighbor
40 | [~,pos] = min(sq_dist(bsxfun(@rdivide,Xs,optimState.gplengthscale),gp.X_rescaled),[],2);
41 | sn2 = gp.sn2new(pos);
42 | % sn2 = min(sn2,1e4);
43 | ys2 = fs2 + sn2; % Predictive variance at test points
44 |
45 | if multipleinputs_flag
46 | Xa = zeros(Na,D);
47 | else
48 | Xa = optimState.ActiveImportanceSampling.Xa;
49 | end
50 | acq = zeros(Nx,Ns);
51 |
52 | %% Compute integrated acquisition function via importance sampling
53 |
54 | for s = 1:Ns
55 | hyp = gp.post(s).hyp;
56 | L = gp.post(s).L;
57 | Lchol = gp.post(s).Lchol;
58 | sn2_eff = 1/gp.post(s).sW(1)^2;
59 |
60 | if multipleinputs_flag
61 | Xa(:,:) = optimState.ActiveImportanceSampling.Xa(:,:,s);
62 | end
63 |
64 | % Compute cross-kernel matrix Ks_mat
65 | if gp.covfun(1) == 1 % Hard-coded SE-ard for speed
66 | ell = exp(hyp(1:D))';
67 | sf2 = exp(2*hyp(D+1));
68 | Ks_mat = sq_dist(gp.X*diag(1./ell),Xs*diag(1./ell));
69 | Ks_mat = sf2 * exp(-Ks_mat/2);
70 |
71 | Ka_mat = sq_dist(Xa*diag(1./ell),Xs*diag(1./ell));
72 | Ka_mat = sf2 * exp(-Ka_mat/2);
73 |
74 | %Kax_mat = sq_dist(Xa*diag(1./ell),gp.X*diag(1./ell));
75 | %Kax_mat = sf2 * exp(-Kax_mat/2);
76 | Kax_mat(:,:) = optimState.ActiveImportanceSampling.Kax_mat(:,:,s);
77 | else
78 | error('Other covariance functions not supported yet.');
79 | end
80 |
81 | if Lchol
82 | C = Ka_mat' - Ks_mat'*(L\(L'\Kax_mat'))/sn2_eff;
83 | else
84 | C = Ka_mat' + Ks_mat'*(L*Kax_mat');
85 | end
86 |
87 | tau2 = bsxfun(@rdivide,C.^2,ys2(:,s));
88 | s_pred = sqrt(max(bsxfun(@minus,optimState.ActiveImportanceSampling.fs2a(:,s)',tau2),0));
89 |
90 | lnw = optimState.ActiveImportanceSampling.lnw(s,:);
91 |
92 | zz = bsxfun(@plus,lnw,u*s_pred + log1p(-exp(-2*u*s_pred)));
93 | lnmax = max(zz,[],2);
94 | acq(:,s) = log(sum(exp(bsxfun(@minus,zz,lnmax)),2)) + lnmax;
95 | end
96 |
97 | if Ns > 1
98 | M = max(acq,[],2);
99 | acq = M + log(sum(exp(bsxfun(@minus,acq,M)),2)/Ns);
100 | end
101 |
102 | end
103 |
104 |
105 | %SQ_DIST Compute matrix of all pairwise squared distances between two sets
106 | % of vectors, stored in the columns of the two matrices, a (of size n-by-D)
107 | % and b (of size m-by-D).
108 | function C = sq_dist(a,b)
109 |
110 | n = size(a,1);
111 | m = size(b,1);
112 | mu = (m/(n+m))*mean(b,1) + (n/(n+m))*mean(a,1);
113 | a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu);
114 | C = bsxfun(@plus,sum(a.*a,2),bsxfun(@minus,sum(b.*b,2)',2*a*b'));
115 | C = max(C,0);
116 |
117 | end
--------------------------------------------------------------------------------
/acq/acqus_vbmc.m:
--------------------------------------------------------------------------------
1 | function acq = acqus_vbmc(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot)
2 | %ACQUS_VBMC Acquisition fcn via vanilla uncertainty sampling.
3 |
4 | % Xs is in *transformed* coordinates
5 |
6 | % Probability density of variational posterior at test points
7 | p = max(vbmc_pdf(vp,Xs,0),realmin);
8 |
9 | % Uncertainty search
10 | acq = -vtot .* p.^2;
11 |
12 | end
--------------------------------------------------------------------------------
/acq/acqviqr_vbmc.m:
--------------------------------------------------------------------------------
1 | function acq = acqviqr_vbmc(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot)
2 | %VBMC_ACQVIQR Variational integrated median interquantile range acquisition function.
3 |
4 | u = 0.6745; % norminv(0.75)
5 |
6 | if isempty(Xs)
7 | % Return acquisition function info struct
8 | acq.importance_sampling = true;
9 | acq.importance_sampling_vp = false;
10 | acq.variational_importance_sampling = true;
11 | acq.log_flag = true;
12 | return;
13 | elseif ischar(Xs)
14 | switch lower(Xs)
15 | case 'islogf1'
16 | % Importance sampling log base proposal (shared part)
17 | %Ns = size(fs2,2);
18 | %acq = repmat(vp,[1,Ns]);
19 | acq = zeros(size(fs2));
20 | case 'islogf2'
21 | % Importance sampling log base proposal (added part)
22 | % (Full log base proposal is fixed + added)
23 | fs = sqrt(fs2);
24 | acq = u*fs + log1p(-exp(-2*u*fs));
25 | case 'islogf'
26 | % Importance sampling log base proposal distribution
27 | fs = sqrt(fs2);
28 | acq = vp + u*fs + log1p(-exp(-2*u*fs));
29 | end
30 | return;
31 | end
32 |
33 | % Xs is in *transformed* coordinates
34 |
35 | [Nx,D] = size(Xs);
36 | Ns = size(fmu,2);
37 |
38 | % Estimate observation noise at test points from nearest neighbor
39 | [~,pos] = min(sq_dist(bsxfun(@rdivide,Xs,optimState.gplengthscale),gp.X_rescaled),[],2);
40 | sn2 = gp.sn2new(pos);
41 | % sn2 = min(sn2,1e4);
42 | ys2 = fs2 + sn2; % Predictive variance at test points
43 |
44 | Xa = optimState.ActiveImportanceSampling.Xa;
45 | acq = zeros(Nx,Ns);
46 |
47 | %% Compute integrated acquisition function via importance sampling
48 |
49 | % Integrated mean function being used?
50 | integrated_meanfun = isfield(gp,'intmeanfun') && gp.intmeanfun > 0;
51 |
52 | if integrated_meanfun
53 | % Evaluate basis functions
54 | plus_idx = gp.intmeanfun_var > 0;
55 | Ha = optimState.ActiveImportanceSampling.Ha;
56 | Hs = gplite_intmeanfun(Xs,gp.intmeanfun);
57 | Hs = Hs(plus_idx,:);
58 | end
59 |
60 | for s = 1:Ns
61 | hyp = gp.post(s).hyp;
62 | %L = gp.post(s).L;
63 | Lchol = gp.post(s).Lchol;
64 | %sn2_eff = 1/gp.post(s).sW(1)^2;
65 |
66 | % Compute cross-kernel matrix Ks_mat
67 | if gp.covfun(1) == 1 % Hard-coded SE-ard for speed
68 | ell = exp(hyp(1:D))';
69 | sf2 = exp(2*hyp(D+1));
70 | Xs_ell = bsxfun(@rdivide,Xs,ell);
71 |
72 | Ks_mat = sq_dist(bsxfun(@rdivide,gp.X,ell),Xs_ell);
73 | Ks_mat = sf2 * exp(-Ks_mat/2);
74 |
75 | Ka_mat = sq_dist(Xs_ell,bsxfun(@rdivide,Xa,ell));
76 | Ka_mat = sf2 * exp(-Ka_mat/2);
77 |
78 | %Kax_mat = sq_dist(Xa*diag(1./ell),gp.X*diag(1./ell));
79 | %Kax_mat = sf2 * exp(-Kax_mat/2);
80 | %Kax_mat(:,:) = optimState.ActiveImportanceSampling.Kax_mat(:,:,s);
81 | Ctmp_mat(:,:) = optimState.ActiveImportanceSampling.Ctmp_mat(:,:,s);
82 | else
83 | error('Other covariance functions not supported yet.');
84 | end
85 |
86 | if Lchol
87 | % C = Ka_mat - Ks_mat'*(L\(L'\Kax_mat'))/sn2_eff;
88 | C = Ka_mat - Ks_mat'*Ctmp_mat;
89 | else
90 | % C = Ka_mat + Ks_mat'*(L*Kax_mat');
91 | C = Ka_mat + Ks_mat'*Ctmp_mat;
92 | end
93 |
94 | if integrated_meanfun
95 | HKinv = gp.post(s).intmean.HKinv(plus_idx,:);
96 | Tplusinv = gp.post(s).intmean.Tplusinv;
97 | C = C + (Hs' - Ks_mat'*HKinv')*(Tplusinv*Ha) + (Ks_mat'*HKinv' - Hs')*(Tplusinv*(HKinv*Kax_mat'));
98 | end
99 |
100 | tau2 = bsxfun(@rdivide,C.^2,ys2(:,s));
101 | s_pred = sqrt(max(bsxfun(@minus,optimState.ActiveImportanceSampling.fs2a(:,s)',tau2),0));
102 |
103 | % lnw is zeros (VIQR uses simple Monte Carlo, no importance sampling)
104 | % lnw = optimState.ActiveImportanceSampling.lnw(s,:);
105 | % zz = bsxfun(@plus,lnw,u*s_pred + log1p(-exp(-2*u*s_pred)));
106 | zz = u*s_pred + log1p(-exp(-2*u*s_pred));
107 | lnmax = max(zz,[],2);
108 | acq(:,s) = log(sum(exp(bsxfun(@minus,zz,lnmax)),2)) + lnmax;
109 | end
110 |
111 | if Ns > 1
112 | M = max(acq,[],2);
113 | acq = M + log(sum(exp(bsxfun(@minus,acq,M)),2)/Ns);
114 | end
115 |
116 | end
117 |
118 |
119 | %SQ_DIST Compute matrix of all pairwise squared distances between two sets
120 | % of vectors, stored in the columns of the two matrices, a (of size n-by-D)
121 | % and b (of size m-by-D).
122 | function C = sq_dist(a,b)
123 |
124 | n = size(a,1);
125 | m = size(b,1);
126 | mu = (m/(n+m))*mean(b,1) + (n/(n+m))*mean(a,1);
127 | a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu);
128 | C = bsxfun(@plus,sum(a.*a,2),bsxfun(@minus,sum(b.*b,2)',2*a*b'));
129 | C = max(C,0);
130 |
131 | end
--------------------------------------------------------------------------------
/acq/acqwrapper_vbmc.m:
--------------------------------------------------------------------------------
1 | function acq = acqwrapper_vbmc(Xs,vp,gp,optimState,transpose_flag,acqFun,acqInfo)
2 | %ACQWRAPPER_VBMC Wrapper for all acquisition functions.
3 |
4 | % Transposed input (useful for CMAES)
5 | if transpose_flag; Xs = Xs'; end
6 |
7 | % Map integer inputs
8 | Xs = real2int_vbmc(Xs,vp.trinfo,optimState.integervars);
9 |
10 | %% Compute GP posterior predictive mean and variance
11 |
12 | if isfield(vp,'delta') && ~isempty(vp.delta) && any(vp.delta > 0)
13 | % Quadrature mean and variance for each hyperparameter sample
14 | [fmu,fs2] = gplite_quad(gp,Xs,vp.delta',1);
15 | else
16 | % GP mean and variance for each hyperparameter sample
17 | [~,~,fmu,fs2] = gplite_pred(gp,Xs,[],[],1,0);
18 | end
19 |
20 | % Compute total variance
21 | Ns = size(fmu,2);
22 | fbar = sum(fmu,2)/Ns; % Mean across samples
23 | vbar = sum(fs2,2)/Ns; % Average variance across samples
24 | if Ns > 1
25 | vf = sum(bsxfun(@minus,fmu,fbar).^2,2)/(Ns-1);
26 | else
27 | vf = 0;
28 | end % Sample variance
29 | vtot = vf + vbar; % Total variance
30 |
31 | %% Compute acquisition function
32 | acq = acqFun(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot);
33 |
34 | %% Regularization: penalize points where GP uncertainty is below threshold
35 | if optimState.VarianceRegularizedAcqFcn
36 | TolVar = optimState.TolGPVar; % Try not to go below this variance
37 | idx = vtot < TolVar;
38 |
39 | if any(idx)
40 | if isfield(acqInfo,'log_flag') && acqInfo.log_flag
41 | acq(idx) = acq(idx) + TolVar./vtot(idx) - 1;
42 | else
43 | acq(idx) = acq(idx) .* exp(-(TolVar./vtot(idx)-1));
44 | end
45 | end
46 | end
47 | acq = max(acq,-realmax);
48 |
49 | %% Hard bound checking: discard points too close to bounds
50 | X_orig = warpvars_vbmc(Xs,'i',vp.trinfo);
51 | idx = any(bsxfun(@lt,X_orig,optimState.LBeps_orig),2) | any(bsxfun(@gt,X_orig,optimState.UBeps_orig),2);
52 | acq(idx) = Inf;
53 |
54 | % Transposed output
55 | if transpose_flag; acq = acq'; end
56 |
57 | end
--------------------------------------------------------------------------------
/docs/README.txt:
--------------------------------------------------------------------------------
1 | Variational Bayesian Monte Carlo (VBMC) documentation
2 | ==============================================================================================
3 |
4 | For a description of the usage of VBMC, type
5 |
6 | > help vbmc
7 |
8 | in the MATLAB shell.
9 |
10 | You can also look up the 'vbmc_examples.m' script, for a tutorial with commented examples.
11 |
12 | For any other question, clarification, or troubleshooting, check out:
13 |
14 | - the VMBC page: https://github.com/acerbilab/vbmc
15 | - the online FAQ: https://github.com/acerbilab/vbmc/wiki
16 |
17 | ==============================================================================================
18 |
19 | The algorithm is described in the following references:
20 |
21 | 1) Acerbi, L. (2018). "Variational Bayesian Monte Carlo". In Advances in Neural Information
22 | Processing Systems 31 (NeurIPS 2018), pp. 8213-8223.
23 | 2) Acerbi, L. (2020). "Variational Bayesian Monte Carlo with Noisy Likelihoods". In Advances
24 | in Neural Information Processing Systems 33 (NeurIPS 2020), pp. 8211-8222.
25 |
--------------------------------------------------------------------------------
/docs/vbmc-demo-2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/docs/vbmc-demo-2.gif
--------------------------------------------------------------------------------
/docs/vbmc-demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/docs/vbmc-demo.gif
--------------------------------------------------------------------------------
/docs/vbmc-demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/docs/vbmc-demo.png
--------------------------------------------------------------------------------
/docs/vbmc2020-demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/docs/vbmc2020-demo.gif
--------------------------------------------------------------------------------
/ent/entmc_vbmc.m:
--------------------------------------------------------------------------------
1 | function [H,dH] = entmc_vbmc(vp,Ns,grad_flags,jacobian_flag)
2 | %ENTMC_VBMC Monte Carlo estimate of entropy of variational posterior
3 |
4 | if nargin < 2 || isempty(Ns); Ns = 10; end
5 | % Check if gradient computation is required
6 | if nargout < 2 % No 2nd output, no gradients
7 | grad_flags = false;
8 | elseif nargin < 3 || isempty(grad_flags) % By default compute all gradients
9 | grad_flags = true;
10 | end
11 | if isscalar(grad_flags); grad_flags = ones(1,4)*grad_flags; end
12 |
13 | % By default assume variational parameters were transformed (before the call)
14 | if nargin < 4 || isempty(jacobian_flag); jacobian_flag = true; end
15 |
16 | D = vp.D; % Number of dimensions
17 | K = vp.K; % Number of components
18 | mu(:,:) = vp.mu;
19 | sigma(1,:) = vp.sigma;
20 | lambda(:,1) = vp.lambda(:);
21 | w(1,:) = vp.w;
22 |
23 | % Check which gradients are computed
24 | if grad_flags(1); mu_grad = zeros(D,K); else, mu_grad = []; end
25 | if grad_flags(2); sigma_grad = zeros(K,1); else, sigma_grad = []; end
26 | if grad_flags(3); lambda_grad = zeros(D,1); else, lambda_grad = []; end
27 | if grad_flags(4); w_grad = zeros(K,1); else, w_grad = []; end
28 |
29 | % Reshape in 4-D to allow massive vectorization
30 | mu_4 = zeros(D,1,1,K);
31 | mu_4(:,1,1,:) = reshape(mu,[D,1,1,K]);
32 | sigma_4(1,1,1,:) = sigma;
33 | w_4(1,1,1,:) = w;
34 |
35 | sigmalambda = bsxfun(@times, sigma_4, lambda);
36 | nconst = 1/(2*pi)^(D/2)/prod(lambda);
37 |
38 | lambda_t = vp.lambda(:)'; % LAMBDA is a row vector
39 | mu_t(:,:) = vp.mu'; % MU transposed
40 | nf = 1/(2*pi)^(D/2)/prod(lambda); % Common normalization factor
41 |
42 | H = 0;
43 |
44 | % Make sure Ns is even
45 | Ns = ceil(Ns/2)*2;
46 | epsilon = zeros(D,1,Ns);
47 |
48 | % Loop over mixture components for generating samples
49 | for j = 1:K
50 |
51 | % Draw Monte Carlo samples from the j-th component
52 | % epsilon = randn(D,1,Ns);
53 | epsilon(:,1,1:Ns/2) = randn(D,1,Ns/2); % Antithetic sampling
54 | epsilon(:,1,Ns/2+1:end) = -epsilon(:,1,1:Ns/2);
55 | xi = bsxfun(@plus, bsxfun(@times, bsxfun(@times, epsilon, lambda), sigma(j)), mu_4(:,1,1,j));
56 |
57 | Xs = reshape(xi,[D,Ns])';
58 |
59 | % Compute pdf -- this block is equivalent to: ys = vbmc_pdf(vp,Xs,0);
60 | ys = zeros(Ns,1);
61 | for k = 1:K
62 | d2 = sum(bsxfun(@rdivide,bsxfun(@minus,Xs,mu_t(k,:)),sigma(k)*lambda_t).^2,2);
63 | nn = w(k)*nf/sigma(k)^D*exp(-0.5*d2);
64 | ys = ys + nn;
65 | end
66 |
67 | H = H - w(j)*sum(log(ys))/Ns;
68 |
69 | % Compute gradient via reparameterization trick
70 | if any(grad_flags)
71 | % Full mixture (for sample from the j-th component)
72 | norm_jl = bsxfun(@times, nconst./(sigma_4.^D), exp(-0.5*sum(bsxfun(@rdivide, bsxfun(@minus, xi, mu_4), sigmalambda).^2,1)));
73 | q_j = sum(bsxfun(@times,w_4,norm_jl),4);
74 |
75 | % Compute sum for gradient wrt mu
76 | % lsum = sum(bsxfun(@times,bsxfun(@rdivide, bsxfun(@minus, xi, mu_4), sigmalambda.^2), norm_jl),4);
77 | lsum = sum(bsxfun(@times, ...
78 | bsxfun(@rdivide, bsxfun(@minus, xi, mu_4), sigmalambda.^2),...
79 | bsxfun(@times,norm_jl,w_4)),4);
80 |
81 | if grad_flags(1)
82 | mu_grad(:,j) = w(j)*sum(bsxfun(@rdivide, lsum, q_j),3) / Ns;
83 | end
84 |
85 | if grad_flags(2)
86 | % Compute sum for gradient wrt sigma
87 | isum = sum(bsxfun(@times,lsum,bsxfun(@times, epsilon, lambda)),1);
88 | sigma_grad(j) = w(j) * sum(bsxfun(@rdivide, isum, q_j),3) / Ns;
89 | end
90 |
91 | if grad_flags(3)
92 | % Should be dividing by LAMBDA, see below
93 | lambda_grad = lambda_grad + sum(bsxfun(@times, lsum, bsxfun(@rdivide, w(j)*sigma(j)*epsilon,q_j)),3) / Ns;
94 | end
95 |
96 | if grad_flags(4)
97 | w_grad(j) = w_grad(j) - sum(log(q_j))/Ns;
98 | % w_grad(:) = w_grad(:) - w(j)*sum(norm_jl(1,1,:,j)./q_j)/Ns;
99 | % Fix by Chengkun Li
100 | w_grad(:) = w_grad(:) - w(j)*squeeze(sum(norm_jl(1,1,:,:)./q_j,3))/Ns;
101 | end
102 |
103 | end
104 | end
105 |
106 | if grad_flags(3)
107 | lambda_grad = bsxfun(@times,lambda_grad,lambda); % Reparameterization
108 | end
109 |
110 | if nargout > 1
111 | % Correct for standard log reparameterization of SIGMA
112 | if jacobian_flag && grad_flags(2)
113 | sigma_grad = bsxfun(@times,sigma_grad, sigma(:));
114 | end
115 | % Correct if NOT using standard log reparameterization of LAMBDA
116 | if ~jacobian_flag && grad_flags(3)
117 | lambda_grad = bsxfun(@rdivide,lambda_grad, lambda(:));
118 | end
119 | % Correct for standard softmax reparameterization of W
120 | if jacobian_flag && grad_flags(4)
121 | eta_sum = sum(exp(vp.eta));
122 | J_w = bsxfun(@times,-exp(vp.eta)',exp(vp.eta)/eta_sum^2) + diag(exp(vp.eta)/eta_sum);
123 | w_grad = J_w*w_grad;
124 | end
125 | dH = [mu_grad(:); sigma_grad(:); lambda_grad(:); w_grad(:)];
126 | end
127 |
128 | end
--------------------------------------------------------------------------------
/ent/entub_vbmc.m:
--------------------------------------------------------------------------------
1 | function [H,dH] = entub_vbmc(vp,grad_flags,jacobian_flag)
2 | %ENTUB_VBMC Entropy upper bound for variational posterior
3 |
4 | % Uses entropy upper bound of multivariate normal approximation
5 |
6 | % Check if gradient computation is required
7 | if nargout < 2 % No 2nd output, no gradients
8 | grad_flags = false;
9 | elseif nargin < 2 || isempty(grad_flags) % By default compute all gradients
10 | grad_flags = true;
11 | end
12 | if isscalar(grad_flags); grad_flags = ones(1,4)*grad_flags; end
13 |
14 | % By default assume variational parameters were transformed (before the call)
15 | if nargin < 3 || isempty(jacobian_flag); jacobian_flag = true; end
16 |
17 | D = vp.D; % Number of dimensions
18 | K = vp.K; % Number of components
19 | mu(:,:) = vp.mu;
20 | sigma(1,:) = vp.sigma;
21 | lambda(:,1) = vp.lambda(:);
22 | w(1,:) = vp.w;
23 |
24 | % Check which gradients are computed
25 | if grad_flags(1); mu_grad = zeros(D,K); dS_mu = zeros(D,D,K); else, mu_grad = []; end
26 | if grad_flags(2); sigma_grad = zeros(K,1); else, sigma_grad = []; end
27 | if grad_flags(3); lambda_grad = zeros(D,1); else, lambda_grad = []; end
28 | if grad_flags(4); w_grad = zeros(K,1); dS_w = zeros(D,D,K); else, w_grad = []; end
29 |
30 | if K == 1
31 | % Entropy of single component, uses exact expression
32 | H = 0.5*D*(1 + log(2*pi)) + D*sum(log(sigma)) + sum(log(lambda));
33 |
34 | if grad_flags(2)
35 | sigma_grad(:) = D./sigma(:);
36 | end
37 |
38 | if grad_flags(3)
39 | % Should be dividing by LAMBDA, see below
40 | lambda_grad(:) = ones(D,1); % 1./lambda(:);
41 | end
42 |
43 | if grad_flags(4)
44 | w_grad = 0;
45 | end
46 | else
47 |
48 | Mu = sum(bsxfun(@times,vp.w,vp.mu),2);
49 | Sigma = zeros(D,D);
50 | delta_mu = bsxfun(@minus,mu,Mu);
51 | for k = 1:K
52 | S_k = diag((lambda*sigma(k)).^2) + delta_mu(:,k)*delta_mu(:,k)';
53 | Sigma = Sigma + w(k)*S_k;
54 | if grad_flags(4); dS_w(:,:,k) = S_k; end
55 | end
56 | L = chol(Sigma);
57 |
58 | H = 0.5*D*(log(2*pi) + 1) + sum(log(diag(L)));
59 |
60 | if any(grad_flags)
61 | invK = L\(L'\eye(D));
62 |
63 | if grad_flags(1)
64 | for k = 1:K
65 | mu_grad((1:D)+(k-1)*D) = 0.5*w(k).*(sum(bsxfun(@times,invK,delta_mu(:,k)'),2) + sum(bsxfun(@times,invK,delta_mu(:,k)),1)');
66 | end
67 | end
68 |
69 | if grad_flags(2)
70 | Q = sum(sum(invK.*diag(lambda.^2)));
71 | sigma_grad(:) = Q*(w.*sigma);
72 | end
73 |
74 | if grad_flags(3)
75 | lambda_grad(:) = diag(invK).*lambda.^2*sum(w.*(sigma.^2));
76 | end
77 |
78 | if grad_flags(4)
79 | for k = 1:K
80 | w_grad(k) = 0.5*sum(sum(invK.*dS_w(:,:,k)));
81 | end
82 | end
83 | end
84 | end
85 |
86 | if nargout > 1
87 | % Correct for standard log reparameterization of SIGMA
88 | if jacobian_flag && grad_flags(2)
89 | sigma_grad = bsxfun(@times,sigma_grad, sigma(:));
90 | end
91 | % Correct if NOT using standard log reparameterization of LAMBDA
92 | if ~jacobian_flag && grad_flags(3)
93 | lambda_grad = bsxfun(@rdivide,lambda_grad, lambda(:));
94 | end
95 | % Correct for standard softmax reparameterization of W
96 | if jacobian_flag && grad_flags(4)
97 | eta_sum = sum(exp(vp.eta));
98 | J_w = bsxfun(@times,-exp(vp.eta)',exp(vp.eta)/eta_sum^2) + diag(exp(vp.eta)/eta_sum);
99 | w_grad = J_w*w_grad;
100 | end
101 | dH = [mu_grad(:); sigma_grad(:); lambda_grad(:); w_grad(:)];
102 | end
103 |
104 | end
--------------------------------------------------------------------------------
/gplite/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Luigi Acerbi
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/gplite/README.md:
--------------------------------------------------------------------------------
1 | # gplite
2 | Lite Gaussian process regression toolbox
3 |
--------------------------------------------------------------------------------
/gplite/gplite_clean.m:
--------------------------------------------------------------------------------
1 | function gp = gplite_clean(gp)
2 | %GPLITE_CLEAN Remove auxiliary info from lite GP struct (less memory usage).
3 | % GP = GPLITE_CLEAN(GP) removes auxiliary computational structs from
4 | % the GP. These can be reconstructed via a call to GPLITE_POST.
5 | %
6 | % See also GPLITE_POST.
7 |
8 | if ~isempty(gp) && isfield(gp,'post')
9 | copyfields = {'hyp'};
10 | emptyfields = {'alpha','sW','L','sn2_mult','Lchol'};
11 | checkfields = {'intmean'};
12 | for ff = copyfields; post0.(ff{:}) = []; end
13 | for ff = emptyfields; post0.(ff{:}) = []; end
14 | for ff = checkfields
15 | if isfield(gp.post(1),ff{:}); post0.(ff{:}) = []; end
16 | end
17 |
18 | for iG = 1:numel(gp)
19 | Ns = numel(gp(iG).post);
20 | postnew = post0;
21 | for iS = 1:Ns
22 | post_tmp = post0;
23 | for ff = copyfields
24 | post_tmp.(ff{:}) = gp(iG).post(iS).(ff{:});
25 | end
26 | postnew(iS) = post_tmp;
27 | end
28 | gp(iG).post = postnew;
29 | end
30 | end
--------------------------------------------------------------------------------
/gplite/gplite_covfun.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/gplite/gplite_covfun.m
--------------------------------------------------------------------------------
/gplite/gplite_demo.m:
--------------------------------------------------------------------------------
1 | %GPLITE_DEMO Demo script with example usage for the GPLITE toolbox.
2 |
3 | % Create example data in 1D
4 | N = 31;
5 | X = linspace(-5,5,N)';
6 | s2 = 0.00*0.1*exp(0.5*X);
7 | y = sin(X) + sqrt(s2).*randn(size(X));
8 | y(y<0) = -abs(3*y(y<0)).^2;
9 | s2 = [];
10 |
11 | %idx = N+1:N+3;
12 | %X(idx) = linspace(6,7,numel(idx))';
13 | %s2(idx) = 1e-4;
14 | %y(idx(randperm(numel(idx)))) = -linspace(1000,1001,numel(idx))';
15 |
16 | hyp0 = []; % Starting hyperparameter vector for optimization
17 | Ns = 10; % Number of hyperparameter samples
18 | covfun = [3 3]; % GP covariance function
19 | meanfun = 4; % GP mean function
20 | noisefun = [1 0 0]; % Constant plus user-provided noise
21 | hprior = []; % Prior over hyperparameters
22 | options = []; % Additional options
23 |
24 | % Output warping function
25 | outwarpfun = @outwarp_negpow;
26 | %outwarpfun = [];
27 | options.OutwarpFun = outwarpfun;
28 |
29 | % Set prior over noise hyperparameters
30 | gp = gplite_post([],X,y,covfun,meanfun,noisefun,s2,[],outwarpfun);
31 | hprior = gplite_hypprior(gp);
32 |
33 | hprior.mu(gp.Ncov+1) = log(1e-3);
34 | hprior.sigma(gp.Ncov+1) = 0.5;
35 |
36 | if gp.Nnoise > 1
37 | hprior.LB(gp.Ncov+2) = log(5);
38 | hprior.mu(gp.Ncov+2) = log(10);
39 | hprior.sigma(gp.Ncov+2) = 0.01;
40 |
41 | hprior.mu(gp.Ncov+3) = log(0.3);
42 | hprior.sigma(gp.Ncov+3) = 0.01;
43 | hprior.df(gp.Ncov+3) = Inf;
44 | end
45 |
46 | if ~isempty(outwarpfun)
47 | hprior.mu(gp.Ncov+gp.Nnoise+gp.Nmean+2) = 0;
48 | hprior.sigma(gp.Ncov+gp.Nnoise+gp.Nmean+2) = 1;
49 | hprior.mu(gp.Ncov+gp.Nnoise+gp.Nmean+3) = 0;
50 | hprior.sigma(gp.Ncov+gp.Nnoise+gp.Nmean+3) = 1;
51 | end
52 |
53 | % Train GP on data
54 | [gp,hyp,output] = gplite_train(hyp0,Ns,X,y,covfun,meanfun,noisefun,s2,hprior,options);
55 |
56 | hyp % Hyperparameter samples
57 |
58 | xstar = linspace(-15,15,200)'; % Test points
59 |
60 | % Compute GP posterior predictive mean and variance at test points
61 | [ymu,ys2,fmu,fs2] = gplite_pred(gp,xstar);
62 |
63 | % Plot data and GP prediction
64 | close all;
65 | figure(1); hold on;
66 | gplite_plot(gp);
--------------------------------------------------------------------------------
/gplite/gplite_fmin.m:
--------------------------------------------------------------------------------
1 | function [x,fval,gp] = gplite_fmin(gp,x0,maxflag)
2 | %GPLITE_FMIN Find global minimum (or maximum) of GP.
3 |
4 | if nargin < 2; x0 = 0; end
5 | if nargin < 3 || isempty(maxflag); maxflag = 0; end
6 |
7 | MaxBnd = 10;
8 | hpd_frac = 0.5;
9 | D = size(gp.X,2);
10 | N0 = size(x0,1);
11 | Nstarts = max(3,N0);
12 |
13 | diam = max(gp.X) - min(gp.X);
14 | LB = min(gp.X) - MaxBnd*diam;
15 | UB = max(gp.X) + MaxBnd*diam;
16 |
17 | % First, train GP
18 | if ~isfield(gp,'post') || isempty(gp.post)
19 | % How many samples for the GP?
20 | if isfield(gp,'Ns') && ~isempty(gp.Ns); Ns_gp = gp.Ns; else; Ns_gp = 0; end
21 | options.Nopts = 1; % Do only one optimization
22 | gp = gplite_train(...
23 | [],Ns_gp,gp.X,gp.y,gp.covfun,gp.meanfun,gp.noisefun,[],[],options);
24 | end
25 |
26 | % Start from the min (or max) of the training data
27 | if maxflag
28 | [~,ord] = sort(gp.y,'descend');
29 | else
30 | [~,ord] = sort(gp.y,'ascend');
31 | end
32 |
33 | % Take best for sure
34 | X = gp.X(ord,:);
35 | x0 = [x0; X(1,:)];
36 | X(1,:) = [];
37 |
38 | if Nstarts > N0+1
39 | Nx = size(X,1);
40 | N_hpd = ceil(Nx*hpd_frac);
41 | idx = randperm(N_hpd,min(Nstarts-N0,N_hpd));
42 | x0 = [x0; X(idx,:)];
43 | end
44 |
45 | N0 = size(x0,1);
46 | x = zeros(N0,D);
47 | f = zeros(N0,1);
48 | opts = optimoptions('fmincon','GradObj','off','Display','off');
49 | for i = 1:N0
50 | [x(i,:),f(i)] = fmincon(@(x) optfun(x,gp,maxflag),x0(i,:),[],[],[],[],LB,UB,[],opts);
51 | end
52 |
53 | [fval,idx] = min(f);
54 | x = x(idx,:);
55 |
56 | if maxflag; fval = -fval; end
57 |
58 | end
59 |
60 | function [f,df] = optfun(x,gp,maxflag)
61 |
62 | if nargout > 1
63 | [f,df] = gplite_pred(gp,x);
64 | else
65 | f = gplite_pred(gp,x);
66 | end
67 |
68 | if maxflag % Want to find maximum, swap sign
69 | f = -f;
70 | if nargout > 1; df = -df; end
71 | end
72 |
73 | end
--------------------------------------------------------------------------------
/gplite/gplite_hypprior.m:
--------------------------------------------------------------------------------
1 | function [lp,dlp] = gplite_hypprior(hyp,hprior)
2 | %GPLITE_HYPPRIOR Log priors for hyperparameters of lite GP regression.
3 |
4 | if isstruct(hyp)
5 | % Return an empty hyperprior struct
6 | if isfield(hyp,'Noutwarp'); Noutwarp = hyp.Noutwarp; else; Noutwarp = 0; end
7 | Nhyp = hyp.Ncov + hyp.Nnoise + hyp.Nmean + Noutwarp;
8 | hprior.mu = NaN(Nhyp,1);
9 | hprior.sigma = NaN(Nhyp,1);
10 | hprior.df = NaN(Nhyp,1);
11 | hprior.LB = NaN(Nhyp,1);
12 | hprior.UB = NaN(Nhyp,1);
13 | lp = hprior; dlp = [];
14 | else
15 |
16 | compute_grad = nargout > 1; % Compute gradient if required
17 |
18 | [Nhyp,Ns] = size(hyp); % Hyperparameters and samples
19 | if Ns > 1
20 | error('gplite_hypprior:nosampling', ...
21 | 'Hyperparameter log priors are available only for one-sample hyperparameter inputs.');
22 | end
23 |
24 | lp = 0;
25 | if compute_grad; dlp = zeros(Nhyp,1); end
26 |
27 | mu = hprior.mu(:);
28 | sigma = abs(hprior.sigma(:));
29 | if ~isfield(hprior,'df') || isempty(hprior.df) % Degrees of freedom
30 | df = 7*ones(Nhyp,1); % ~ from Gelman et al. (2009)
31 | else
32 | df = hprior.df(:);
33 | end
34 |
35 | uidx = ~isfinite(mu) | ~isfinite(sigma); % Uniform
36 | gidx = ~uidx & (df == 0 | ~isfinite(df)) & isfinite(sigma); % Gaussian
37 | tidx = ~uidx & df > 0 & isfinite(df); % Student's t
38 |
39 | % Quadratic form
40 | z2 = zeros(Nhyp,1);
41 | z2(gidx | tidx) = ((hyp(gidx | tidx) - mu(gidx | tidx))./sigma(gidx | tidx)).^2;
42 |
43 | % Gaussian prior
44 | if any(gidx)
45 | lp = lp -0.5*sum(log(2*pi*sigma(gidx).^2) + z2(gidx));
46 | if compute_grad
47 | dlp(gidx) = -(hyp(gidx) - mu(gidx))./sigma(gidx).^2;
48 | end
49 | end
50 |
51 | % Student's t prior
52 | if any(tidx)
53 | lp = lp + sum(gammaln(0.5*(df(tidx)+1)) - gammaln(0.5*df(tidx)) - 0.5*log(pi*df(tidx)) ...
54 | - log(sigma(tidx)) - 0.5*(df(tidx)+1).*log1p(z2(tidx)./df(tidx)));
55 | if compute_grad
56 | dlp(tidx) = -(df(tidx)+1)./df(tidx)./(1+z2(tidx)./df(tidx)).*(hyp(tidx) - mu(tidx))./sigma(tidx).^2;
57 | end
58 | end
59 | end
--------------------------------------------------------------------------------
/gplite/gplite_intmeanfun.m:
--------------------------------------------------------------------------------
1 | function H = gplite_intmeanfun(X,intmeanfun,y,extras)
2 | %GPLITE_INTMEANFUN Integrated mean function for lite Gaussian Process regression.
3 | % M = GPLITE_INTMEANFUN(HYP,X,MEANFUN) computes the GP mean function
4 | % MEANFUN evaluated at test points X. HYP is a single column vector of mean
5 | % function hyperparameters. MEANFUN can be a scalar or a character array
6 | % specifying the mean function, as follows:
7 | %
8 | % MEANFUN MEAN FUNCTION TYPE HYPERPARAMETERS
9 | % 0 or 'zero' zero 0
10 | % 1 or 'const' constant 1
11 | % 2 or 'linear' linear D+1
12 | % 3 or 'quad' quadratic 2*D+1
13 | % 4 or 'negquad' negative quadratic, centered 2*D+1
14 | % 5 or 'posquad' positive quadratic, centered 2*D+1
15 | % 6 or 'se' squared exponential 2*D+2
16 | % 7 or 'negse' negative squared exponential 2*D+2
17 | % function_handle custom NMEAN
18 | %
19 | % MEANFUN can be a function handle to a custom mean function.
20 | %
21 | % [M,DM] = GPLITE_MEANFUN(HYP,X,MEANFUN) also computes the gradient DM
22 | % with respect to GP hyperparamters. DM is a N-by-NMEAN matrix, where
23 | % each row represent the gradient with respect to the NMEAN hyperparameters
24 | % for each one of the N test point.
25 | %
26 | % NMEAN = GPLITE_MEANFUN([],X,MEANFUN) returns the number of mean function
27 | % hyperparameters requested by mean function MEANFUN.
28 | %
29 | % [NMEAN,MEANINFO] = GPLITE_MEANFUN([],X,MEANFUN,Y), where X is the matrix
30 | % of training inputs and Y the matrix of training targets, also returns a
31 | % struct MEANINFO with additional information about mean function
32 | % hyperparameters, with fields: LB (lower bounds); UB (upper bounds); PLB
33 | % (plausible lower bounds); PUB (plausible upper bounds); x0 (starting
34 | % point); meanfun (MEANFUN numerical identifier), meanfun_name (MEANFUN
35 | % name).
36 | %
37 | % See also GPLITE_COVFUN, GPLITE_NOISEFUN.
38 |
39 | [N,D] = size(X); % Number of training points and dimension
40 |
41 | switch intmeanfun
42 | case {1,'1','const'}
43 | intmeanfun = 1;
44 | Nb = 1;
45 | case {2,'2','linear'}
46 | intmeanfun = 2;
47 | Nb = 1 + D;
48 | case {3,'3','quadratic'}
49 | intmeanfun = 3;
50 | Nb = 1 + 2*D;
51 | case {4,'4','full','fullquad','fullquadratic'}
52 | intmeanfun = 4;
53 | Nb = 1 + 2*D + D*(D-1)/2;
54 | otherwise
55 | if isnumeric(intmeanfun); intmeanfun = num2str(intmeanfun); end
56 | error('gplite_intmeanfun:UnknownMeanFun',...
57 | ['Unknown integrated mean function identifier: [' intmeanfun '].']);
58 | end
59 |
60 | H = zeros(Nb,N);
61 |
62 | if intmeanfun >= 1
63 | H(1,:) = 1;
64 | end
65 | if intmeanfun >= 2
66 | H(2:D+1,:) = X';
67 | end
68 | if intmeanfun >= 3
69 | H(D+2:2*D+1,:) = X'.^2;
70 | end
71 | if intmeanfun >= 4
72 | idx = 0;
73 | for d = 1:D-1
74 | H(1+2*D+idx+(1:D-d),:) = bsxfun(@times,X(:,d)',X(:,d+1:D)');
75 | idx = idx + D-d;
76 | end
77 | end
78 |
79 | end
80 |
81 |
82 |
--------------------------------------------------------------------------------
/gplite/gplite_nlZ.m:
--------------------------------------------------------------------------------
1 | function [nlZ,dnlZ,post,K_mat,Q] = gplite_nlZ(hyp,gp,hprior)
2 | %GPLITE_NLZ Negative log marginal likelihood for lite GP regression.
3 | % [NLZ,DNLZ] = GPLITE_INF(HYP,GP) computes the log marginal likelihood
4 | % NLZ and its gradient DNLZ for hyperparameter vector HYP. HYP is a column
5 | % vector (see below). GP is a GPLITE struct.
6 | %
7 | % [NLZ,DNLZ] = GPLITE_INF(HYP,GP,HPRIOR) uses prior over hyperparameters
8 | % defined by the struct HPRIOR. HPRIOR has fields HPRIOR.mu, HPRIOR.sigma
9 | % and HPRIOR.nu which contain vectors representing, respectively, the mean,
10 | % standard deviation and degrees of freedom of the prior for each
11 | % hyperparameter. Priors are generally represented by Student's t distributions.
12 | % Set HPRIOR.nu(i) = Inf to have instead a Gaussian prior for the i-th
13 | % hyperparameter. Set HPRIOR.sigma(i) = Inf to have a (non-normalized)
14 | % flat prior over the i-th hyperparameter. Priors are defined in
15 | % transformed hyperparameter space (i.e., log space for positive-only
16 | % hyperparameters).
17 | %
18 | % [NLZ,DNLZ,POST] = GPLITE_INF(...) also returns a POST structure
19 | % associated with the provided hyperparameters.
20 | %
21 | % [NLZ,DNLZ,POST,K_MAT] = GPLITE_INF(...) also returns the computed
22 | % kernel matrix K_MAT.
23 | %
24 | % [NLZ,DNLZ,POST,K_MAT,Q] = GPLITE_INF(...) also returns the computed
25 | % auxiliary matrix Q used for computing derivatives.
26 |
27 | if nargin < 3; hprior = []; end
28 |
29 | [Nhyp,Ns] = size(hyp); % Hyperparameters and samples
30 | compute_grad = nargout > 1; % Compute gradient if required
31 |
32 | Ncov = gp.Ncov;
33 | Nnoise = gp.Nnoise;
34 | Nmean = gp.Nmean;
35 | if isfield(gp,'Noutwarp'); Noutwarp = gp.Noutwarp; else; Noutwarp = 0; end
36 |
37 | if Nhyp ~= (Ncov+Nnoise+Nmean+Noutwarp)
38 | error('gplite_nlZ:dimmismatch','Number of hyperparameters mismatched with dimension of training inputs.');
39 | end
40 | if compute_grad && Ns > 1
41 | error('gplite_nlZ:NoSampling', ...
42 | 'Computation of the log marginal likelihood is available only for one-sample hyperparameter inputs.');
43 | end
44 |
45 | switch nargout
46 | case {1,2}
47 | [nlZ,dnlZ] = gplite_core(hyp,gp,1,compute_grad);
48 | case 3
49 | [nlZ,dnlZ,post] = gplite_core(hyp,gp,1,compute_grad);
50 | case 4
51 | [nlZ,dnlZ,post,K_mat] = gplite_core(hyp,gp,1,compute_grad);
52 | case 5
53 | [nlZ,dnlZ,post,K_mat,Q] = gplite_core(hyp,gp,1,compute_grad);
54 | end
55 |
56 | % Compute hyperparameter prior if specified
57 | if ~isempty(hprior)
58 | if compute_grad
59 | [P,dP] = gplite_hypprior(hyp,hprior);
60 | nlZ = nlZ - P;
61 | dnlZ = dnlZ - dP;
62 | else
63 | P = gplite_hypprior(hyp,hprior);
64 | nlZ = nlZ - P;
65 | end
66 | end
67 |
68 | end
--------------------------------------------------------------------------------
/gplite/gplite_qpred.m:
--------------------------------------------------------------------------------
1 | function y = gplite_qpred(gp,p,type,Xstar,ystar,s2star)
2 | %GPLITE_QPRED Quantile prediction for lite Gaussian Processes regression.
3 |
4 | if nargin < 5; ystar = []; end
5 | if nargin < 6; s2star = []; end
6 |
7 | Ns = numel(gp.post); % Hyperparameter samples
8 | Nstar = size(Xstar,1); % Number of test inputs
9 |
10 | nx = 10;
11 | xx = norminv(linspace(0.5/nx,1-0.5/nx,nx));
12 |
13 | switch lower(type(1))
14 | case 'y'; obs_flag = true;
15 | case 'f'; obs_flag = false;
16 | otherwise
17 | error('gplite_qpred:unknowntype', ...
18 | 'Quantile prediction TYPE should be ''y'' for predicted observations or ''F'' for predicted latent function.');
19 | end
20 |
21 | % Output warping function
22 | outwarp_flag = isfield(gp,'outwarpfun') && ~isempty(gp.outwarpfun);
23 | if outwarp_flag
24 | Noutwarp = gp.Noutwarp;
25 | fmu_prewarp = zeros(Nstar,Ns);
26 | else
27 | Noutwarp = 0;
28 | end
29 |
30 | % Get GP prediction (observed or latent), by hyperparameter sample
31 | if obs_flag
32 | [gmu,gs2] = gplite_pred(gp,Xstar,ystar,s2star,1,1);
33 | else
34 | [~,~,gmu,gs2] = gplite_pred(gp,Xstar,ystar,s2star,1,1);
35 | end
36 |
37 | y = zeros(Nstar,Ns*nx);
38 |
39 | for s = 1:Ns
40 | grid = bsxfun(@plus,gmu(:,s),bsxfun(@times,sqrt(gs2(:,s)),xx));
41 | if outwarp_flag
42 | hyp = gp.post(s).hyp;
43 | hyp_outwarp = hyp(gp.Ncov+gp.Nnoise+gp.Nmean+1:gp.Ncov+gp.Nnoise+gp.Nmean+Noutwarp);
44 | grid = gp.outwarpfun(hyp_outwarp,grid,'inv');
45 | end
46 | y(:,(1:nx)+(s-1)*nx) = grid;
47 | end
48 |
49 | y = quantile(y,p,2);
50 |
51 |
--------------------------------------------------------------------------------
/gplite/gplite_quad.m:
--------------------------------------------------------------------------------
1 | function [F,varF] = gplite_quad(gp,mu,sigma,ssflag)
2 | %GPLITE_QUAD Bayesian quadrature for given Gaussian process.
3 |
4 | if nargin < 4 || isempty(ssflag); ssflag = false; end
5 |
6 | compute_var = nargout > 1; % Compute variance of the integral?
7 |
8 | [N,D] = size(gp.X); % Number of training points and dimension
9 | Ns = numel(gp.post); % Hyperparameter samples
10 |
11 | % Number of GP hyperparameters
12 | Ncov = gp.Ncov;
13 | Nnoise = gp.Nnoise;
14 | Nmean = gp.Nmean;
15 |
16 | if all(gp.meanfun ~= [0 1 4 6 8])
17 | error('gplite_quad:UnsupportedMeanFun', ...
18 | 'Bayesian quadrature currently only supports zero, constant, negative quadratic, or squared exponential mean functions.');
19 | end
20 |
21 | if gp.covfun ~= 1
22 | error('gplite_quad:UnsupportedCovFun', ...
23 | 'Bayesian quadrature only supports the squared exponential kernel.');
24 | end
25 |
26 | Nstar = size(mu,1);
27 | if size(sigma,1) == 1; sigma = repmat(sigma,[Nstar,1]); end
28 |
29 | % Which mean function is being used?
30 | quadratic_meanfun = gp.meanfun == 4;
31 | sqexp_meanfun = gp.meanfun == 6;
32 | quadsqexp_meanfun = gp.meanfun == 8;
33 |
34 | F = zeros(Nstar,Ns);
35 | if compute_var; varF = zeros(Nstar,Ns); end
36 |
37 | % Loop over hyperparameter samples
38 | for s = 1:Ns
39 | hyp = gp.post(s).hyp;
40 |
41 | % Extract GP hyperparameters from HYP
42 | ell(1,:) = exp(hyp(1:D));
43 | ln_sf2 = 2*hyp(D+1);
44 | sum_lnell = sum(hyp(1:D));
45 |
46 | % GP mean function hyperparameters
47 | if gp.meanfun > 0; m0 = hyp(Ncov+Nnoise+1); else; m0 = 0; end
48 | if quadratic_meanfun || sqexp_meanfun || quadsqexp_meanfun
49 | xm(1,:) = hyp(Ncov+Nnoise+1+(1:D));
50 | omega(1,:) = exp(hyp(Ncov+Nnoise+D+1+(1:D)));
51 | if sqexp_meanfun
52 | h = exp(hyp(Ncov+Nnoise+2*D+2));
53 | end
54 | end
55 | if quadsqexp_meanfun
56 | xm_se(1,:) = hyp(Ncov+Nnoise+2*D+1+(1:D));
57 | omega_se(1,:) = exp(hyp(Ncov+Nnoise+3*D+1+(1:D)));
58 | h_se = hyp(Ncov+Nnoise+4*D+2);
59 | end
60 |
61 | % GP posterior parameters
62 | alpha = gp.post(s).alpha;
63 | L = gp.post(s).L;
64 | Lchol = gp.post(s).Lchol;
65 |
66 | sn2 = exp(2*hyp(Ncov+1));
67 | sn2_eff = sn2*gp.post(s).sn2_mult;
68 |
69 | % Compute posterior mean of the integral
70 | tau = sqrt(bsxfun(@plus,sigma.^2,ell.^2));
71 | lnnf = ln_sf2 + sum_lnell - sum(log(tau),2); % Covariance normalization factor
72 | sumdelta2 = zeros(Nstar,N);
73 | for i = 1:D
74 | sumdelta2 = sumdelta2 + bsxfun(@rdivide,bsxfun(@minus, mu(:,i), gp.X(:,i)'),tau(:,i)).^2;
75 | end
76 | z = exp(bsxfun(@minus,lnnf,0.5*sumdelta2));
77 | F(:,s) = z*alpha + m0;
78 |
79 | if quadratic_meanfun || quadsqexp_meanfun
80 | nu_k = -0.5*sum(1./omega.^2 .* ...
81 | bsxfun(@plus,mu.^2 + sigma.^2 - bsxfun(@times,2*mu,xm), xm.^2),2);
82 | F(:,s) = F(:,s) + nu_k;
83 | elseif sqexp_meanfun
84 | tau2_mfun = bsxfun(@plus,sigma.^2,omega.^2);
85 | s2 = (bsxfun(@minus,mu,xm).^2)./tau2_mfun;
86 | nu_se = h*prod(bsxfun(@rdivide,omega,sqrt(tau2_mfun)),2).*exp(-0.5*sum(s2,2));
87 | F(:,s) = F(:,s) + nu_se;
88 | end
89 | if quadsqexp_meanfun
90 | tau2_mfun = bsxfun(@plus,sigma.^2,omega_se.^2);
91 | s2 = (bsxfun(@minus,mu,xm_se).^2)./tau2_mfun;
92 | nu_se = h_se*prod(bsxfun(@rdivide,omega_se,sqrt(tau2_mfun)),2).*exp(-0.5*sum(s2,2));
93 | F(:,s) = F(:,s) + nu_se;
94 | end
95 |
96 | % Compute posterior variance of the integral
97 | if compute_var
98 | tau_kk = sqrt(bsxfun(@plus,2*sigma.^2,ell.^2));
99 | nf_kk = exp(ln_sf2 + sum_lnell - sum(log(tau_kk),2));
100 | if Lchol
101 | invKzk = (L\(L'\z'))/sn2_eff;
102 | else
103 | invKzk = -L*z';
104 | end
105 | J_kk = nf_kk - sum(z.*invKzk',2);
106 | varF(:,s) = max(eps,J_kk); % Correct for numerical error
107 | end
108 |
109 | end
110 |
111 | % Unless predictions for samples are requested separately, average over samples
112 | if Ns > 1 && ~ssflag
113 | Fbar = sum(F,2)/Ns;
114 | if compute_var
115 | varFss = sum((F - Fbar).^2,2)/(Ns-1); % Estimated variance of the samples
116 | varF = sum(varF,2)/Ns + varFss;
117 | end
118 | F = Fbar;
119 | end
120 |
--------------------------------------------------------------------------------
/gplite/gplite_rnd.m:
--------------------------------------------------------------------------------
1 | function [Fstar,Ystar] = gplite_rnd(gp,Xstar,nowarpflag)
2 | %GPLITE_RND Draw a random function from Gaussian process.
3 | % FSTAR = GPLITE_RND(GP,XSTAR) draws a random function from GP, evaluated
4 | % at XSTAR.
5 | %
6 | % [FSTAR,YSTAR] = GPLITE_RND(GP,XSTAR) adds observation noise to the
7 | % drawn function.
8 | %
9 | % See also GPLITE_POST, GPLITE_PRED.
10 |
11 | if nargin < 3 || isempty(nowarpflag); nowarpflag = false; end
12 |
13 | [N,D] = size(gp.X); % Number of training points and dimension
14 | Ns = numel(gp.post); % Hyperparameter samples
15 | Nstar = size(Xstar,1); % Number of test inputs
16 |
17 | Ncov = gp.Ncov;
18 | Nnoise = gp.Nnoise;
19 | Nmean = gp.Nmean;
20 |
21 | % Draw from hyperparameter samples
22 | s = randi(Ns);
23 |
24 | hyp = gp.post(s).hyp;
25 |
26 | alpha = gp.post(s).alpha;
27 | L = gp.post(s).L;
28 | Lchol = gp.post(s).Lchol;
29 | sW = gp.post(s).sW;
30 |
31 | % Compute GP mean function at test points
32 | hyp_mean = hyp(Ncov+Nnoise+1:Ncov+Nnoise+Nmean);
33 | mstar = gplite_meanfun(hyp_mean,Xstar,gp.meanfun,[],gp.meanfun_extras);
34 |
35 | % Compute kernel matrix
36 | hyp_cov = hyp(1:Ncov);
37 | Kstar_mat = gplite_covfun(hyp_cov,Xstar,gp.covfun);
38 |
39 | if ~isempty(gp.y)
40 | % Compute cross-kernel matrix Ks_mat
41 | Ks_mat = gplite_covfun(hyp_cov,gp.X,gp.covfun,Xstar);
42 |
43 | fmu = mstar + Ks_mat'*alpha; % Conditional mean
44 |
45 | if Lchol
46 | V = L'\(repmat(sW,[1,Nstar]).*Ks_mat);
47 | C = Kstar_mat - V'*V; % predictive variances
48 | else
49 | LKs = L*Ks_mat;
50 | C = Kstar_mat + Ks_mat'*LKs;
51 | end
52 | else
53 | fmu = mstar; % No data, draw from prior
54 | C = Kstar_mat + eps*eye(Nstar);
55 | end
56 |
57 | C = (C + C')/2; % Enforce symmetry if lost due to numerical errors
58 |
59 | % Draw random function
60 | T = robustchol(C); % CHOL usually crashes, this is more stable
61 | Fstar = T' * randn(size(T,1),1) + fmu;
62 |
63 | % Add observation noise
64 | if nargout > 1
65 | % Get observation noise hyperparameters and evaluate noise at test points
66 | hyp_noise = hyp(Ncov+1:Ncov+Nnoise);
67 | sn2 = gplite_noisefun(hyp_noise,Xstar,gp.noisefun);
68 | sn2_mult = gp.post(s).sn2_mult;
69 | if isempty(sn2_mult); sn2_mult = 1; end
70 | Ystar = Fstar + sqrt(sn2*sn2_mult).*randn(size(fmu));
71 | end
72 |
73 | % Apply output warping to map back to observation space
74 | if ~isempty(gp.outwarpfun) && ~nowarpflag
75 | Noutwarp = gp.outwarpfun('info');
76 | hyp = gp.post(s).hyp;
77 | hyp_outwarp = hyp(Ncov+Nnoise+Nmean+1:Ncov+Nnoise+Nmean+Noutwarp);
78 | Fstar = gp.outwarpfun(hyp_outwarp,Fstar,'inv');
79 | if nargout > 1
80 | Ystar = gp.outwarpfun(hyp_outwarp,Ystar,'inv');
81 | end
82 | end
83 |
84 | end
85 |
86 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
87 |
88 | function [T,p] = robustchol(Sigma)
89 | %ROBUSTCHOL Cholesky-like decomposition for covariance matrix.
90 |
91 | [n,m] = size(Sigma); % Should be square
92 | [T,p] = chol(Sigma);
93 |
94 | if p > 0
95 | [U,D] = eig((Sigma+Sigma')/2);
96 |
97 | [~,maxidx] = max(abs(U),[],1);
98 | negidx = (U(maxidx + (0:n:(m-1)*n)) < 0);
99 | U(:,negidx) = -U(:,negidx);
100 |
101 | D = diag(D);
102 | tol = eps(max(D)) * length(D);
103 | t = (abs(D) > tol);
104 | D = D(t);
105 | p = sum(D<0); % negative eigenvalues
106 |
107 | if p == 0
108 | T = diag(sqrt(D)) * U(:,t)';
109 | else
110 | T = zeros(0,'like',Sigma);
111 | end
112 | end
113 |
114 |
115 | end
116 |
--------------------------------------------------------------------------------
/gplite/gplite_sample.m:
--------------------------------------------------------------------------------
1 | function [Xs,gp] = gplite_sample(gp,Ns,x0,method,logprior,beta,VarThresh,proppdf,proprnd,bounds)
2 | %GPLITE_SAMPLE Draw random samples from log pdf represented by GP.
3 |
4 | if nargin < 3; x0 = []; end
5 | if nargin < 4 || isempty(method); method = 'slicesample'; end
6 | if nargin < 5 || isempty(logprior); logprior = []; end
7 | if nargin < 6 || isempty(beta); beta = 0; end
8 | if nargin < 7 || isempty(VarThresh); VarThresh = Inf; end
9 | if nargin < 8 || isempty(proppdf); proppdf = []; end
10 | if nargin < 9 || isempty(proprnd); proprnd = []; end
11 | if nargin < 10; bounds = []; end
12 |
13 | D = size(gp.X,2);
14 |
15 | widths = std(gp.X,[],1);
16 | if isempty(bounds)
17 | MaxBnd = 10;
18 | diam = max(gp.X) - min(gp.X);
19 | LB = min(gp.X) - MaxBnd*diam;
20 | UB = max(gp.X) + MaxBnd*diam;
21 | else
22 | LB = bounds(1,:);
23 | UB = bounds(2,:);
24 | end
25 |
26 | % First, train GP
27 | if ~isfield(gp,'post') || isempty(gp.post)
28 | % How many samples for the GP?
29 | if isfield(gp,'Ns') && ~isempty(gp.Ns)
30 | Ns_gp = gp.Ns;
31 | else
32 | Ns_gp = 0;
33 | end
34 | if isfield(gp,'Nopts') && ~isempty(gp.Nopts)
35 | options.Nopts = gp.Nopts;
36 | else
37 | options.Nopts = 1; % Do only one optimization
38 | end
39 | if isfield(gp,'s2'); s2 = gp.s2; else; s2 = []; end
40 | gp = gplite_train(...
41 | [],Ns_gp,gp.X,gp.y,gp.covfun,gp.meanfun,gp.noisefun,s2,[],options);
42 | end
43 |
44 | % Recompute posterior auxiliary info if needed
45 | if ~isfield(gp.post(1),'alpha') || isempty(gp.post(1).alpha)
46 | gp = gplite_post(gp);
47 | end
48 |
49 | logpfun = @(x) log_gpfun(gp,x,beta,VarThresh);
50 |
51 | switch method
52 | case {'slicesample','slicesamplebnd'}
53 | sampleopts.Burnin = ceil(Ns/10);
54 | sampleopts.Thin = 1;
55 | sampleopts.Display = 'off';
56 | sampleopts.Diagnostics = false;
57 | sampleopts.LogPrior = logprior;
58 | sampleopts.MetropolisPdf = proppdf;
59 | sampleopts.MetropolisRnd = proprnd;
60 |
61 | if isempty(x0)
62 | [~,idx0] = max(gp.y);
63 | x0 = gp.X(idx0,:);
64 | else
65 | x0 = x0(1,:);
66 | end
67 | Xs = slicesamplebnd(logpfun, ...
68 | x0,Ns,widths,LB,UB,sampleopts);
69 |
70 | case 'parallel'
71 | sampleopts.Burnin = ceil(Ns/5);
72 | sampleopts.Thin = 1;
73 | sampleopts.Display = 'off';
74 | sampleopts.Diagnostics = false;
75 | sampleopts.VarTransform = false;
76 | sampleopts.InversionSample = false;
77 | sampleopts.FitGMM = false;
78 |
79 | if ~isempty(logprior)
80 | logPfuns = {logprior,logpfun};
81 | else
82 | logPfuns = logpfun;
83 | end
84 |
85 | % sampleopts.TransitionOperators = {'transSliceSampleRD'};
86 |
87 | W = 2*(D+1);
88 | if isempty(x0)
89 | % Take starting points from high posterior density region
90 | hpd_frac = 0.25;
91 | N = numel(gp.y);
92 | N_hpd = min(N,max(W,round(hpd_frac*N)));
93 | if isempty(logprior)
94 | [~,ord] = sort(gp.y,'descend');
95 | else
96 | dy = logprior(gp.X);
97 | [~,ord] = sort(gp.y + dy,'descend');
98 | end
99 | X_hpd = gp.X(ord(1:N_hpd),:);
100 | x0 = X_hpd(randperm(N_hpd,min(W,N_hpd)),:);
101 | end
102 | x0 = bsxfun(@min,bsxfun(@max,x0,LB),UB);
103 | Xs = eissample_lite(logPfuns,x0,Ns,W,widths,LB,UB,sampleopts);
104 | end
105 |
106 | end
107 |
108 | %--------------------------------------------------------------------------
109 | function y = log_gpfun(gp,x,beta,VarThresh)
110 |
111 | if (VarThresh == 0 || ~isfinite(VarThresh)) && beta == 0
112 | y = gplite_pred(gp,x);
113 | else
114 | [y,s2] = gplite_pred(gp,x);
115 | y(s2 >= VarThresh) = y(s2 >= VarThresh) - (s2(s2 >= VarThresh) - VarThresh);
116 | y = y - beta*sqrt(s2);
117 | end
118 |
119 | end
--------------------------------------------------------------------------------
/gplite/outwarp_negpow.m:
--------------------------------------------------------------------------------
1 | function [ywarp,dwarp_dt,dwarp_dtheta,d2warp_dthetadt] = outwarp_negpow(hyp,y,invflag)
2 | %GPLITE_NOISEFUN Noise function for lite Gaussian Process regression.
3 | % SN2 = GPLITE_NOISEFUN(HYP,X,NOISEFUN) computes the GP noise function
4 | % NOISEFUN, that is the variance of observation noise evaluated at test
5 | % points X. HYP is a single column vector of noise function
6 | % hyperparameters. NOISEFUN is a numeric array whose elements specify
7 | % features of the noise function, as follows:
8 | %
9 | % See also GPLITE_COVFUN, GPLITE_MEANFUN.
10 |
11 | if nargin < 2; y = []; end
12 | if nargin < 3 || isempty(invflag); invflag = false; else; invflag = true; end
13 |
14 | if invflag && nargout > 1
15 | error('outwarp_fun:InverseOnly', ...
16 | ['When calling for the inverse output warping function, only one function output is expected.']);
17 | end
18 |
19 | %--------------------------------------------------------------------------
20 | % CUSTOM: Number of hyperparameters
21 | Noutwarp = 2; % # hyperparameters of the output warping function
22 | %--------------------------------------------------------------------------
23 |
24 | N = size(y,1); % Number of training points
25 |
26 | % Return number of output warping function hyperparameters and additional info
27 | if ischar(hyp)
28 | ywarp = Noutwarp;
29 | if nargout > 1
30 |
31 | % Initialize bounds for all hyperparameters
32 | outwarp_info.LB = -Inf(1,Noutwarp);
33 | outwarp_info.UB = Inf(1,Noutwarp);
34 | outwarp_info.PLB = -Inf(1,Noutwarp);
35 | outwarp_info.PUB = Inf(1,Noutwarp);
36 | outwarp_info.x0 = NaN(1,Noutwarp);
37 |
38 | %------------------------------------------------------------------
39 | % CUSTOM: Initialize hyperparameter bounds and other details
40 |
41 | % Threshold parameter
42 | outwarp_info.LB(1) = min(y);
43 | outwarp_info.UB(1) = max(y);
44 | outwarp_info.PLB(1) = min(y);
45 | outwarp_info.PUB(1) = max(y);
46 | outwarp_info.x0(1) = NaN;
47 |
48 | % Power exponent k (log space)
49 | outwarp_info.LB(2) = -Inf;
50 | outwarp_info.UB(2) = Inf;
51 | outwarp_info.PLB(2) = -3;
52 | outwarp_info.PUB(2) = 3;
53 | outwarp_info.x0(2) = 0;
54 |
55 | %------------------------------------------------------------------
56 |
57 | % Assign handle of current output warping function
58 | outwarp_info.outwarpfun = str2func(mfilename);
59 |
60 | % Plausible starting point
61 | idx_nan = isnan(outwarp_info.x0);
62 | outwarp_info.x0(idx_nan) = 0.5*(outwarp_info.PLB(idx_nan) + outwarp_info.PUB(idx_nan));
63 |
64 | dwarp_dt = outwarp_info;
65 |
66 | end
67 |
68 | return;
69 | end
70 |
71 | [Nhyp,Ns] = size(hyp); % Hyperparameters and samples
72 |
73 | if Nhyp ~= Noutwarp
74 | error('outwarp_fun:WrongLikHyp', ...
75 | ['Expected ' num2str(Noutwarp) ' output warping function hyperparameters, ' num2str(Nhyp) ' passed instead.']);
76 | end
77 | if Ns > 1
78 | error('outwarp_fun:nosampling', ...
79 | 'Output warping function output is available only for one-sample hyperparameter inputs.');
80 | end
81 |
82 | %--------------------------------------------------------------------------
83 | % CUSTOM: Compute output warping function and gradients
84 |
85 | % Read hyperparameters
86 | y0 = hyp(1);
87 | k = exp(hyp(2));
88 |
89 | % Compute output warping or inverse warping
90 | ywarp = y;
91 | idx = y < y0;
92 | if invflag % Inverse output warping
93 | ywarp(idx) = y0 - (y0 - y(idx)).^(1/k);
94 | else % Direct output warping
95 | delta = (y0 - y(idx));
96 | deltak = delta.^k;
97 | ywarp(idx) = y0 - deltak;
98 | end
99 |
100 | if nargout > 1
101 | % First-order derivative of output warping function in output space
102 | dwarp_dt = ones(size(y));
103 | deltakm1 = delta.^(k-1);
104 |
105 | dwarp_dt(idx) = k*deltakm1;
106 |
107 | if nargout > 2
108 | % Gradient of output warping function wrt hyperparameters
109 | dwarp_dtheta = zeros(N,Noutwarp);
110 |
111 | dwarp_dtheta(idx,1) = 1 - k*deltakm1; % y0
112 | dwarp_dtheta(idx,2) = -k*deltak.*log(delta); % log(k)
113 |
114 | if nargout > 3
115 | % Gradient of derivative of output warping function
116 | d2warp_dthetadt = zeros(N,Noutwarp);
117 |
118 | d2warp_dthetadt(idx,1) = k*(k-1)*delta.^(k-2); % y0
119 | d2warp_dthetadt(idx,2) = k*deltakm1 + k^2*deltakm1.*log(delta); % log(k)
120 |
121 | end
122 |
123 | end
124 | end
125 |
126 | end
--------------------------------------------------------------------------------
/gplite/outwarp_negpowc1.m:
--------------------------------------------------------------------------------
1 | function [ywarp,dwarp_dt,dwarp_dtheta,d2warp_dthetadt] = outwarp_negpowc1(hyp,y,invflag)
2 | %GPLITE_NOISEFUN Noise function for lite Gaussian Process regression.
3 | % SN2 = GPLITE_NOISEFUN(HYP,X,NOISEFUN) computes the GP noise function
4 | % NOISEFUN, that is the variance of observation noise evaluated at test
5 | % points X. HYP is a single column vector of noise function
6 | % hyperparameters. NOISEFUN is a numeric array whose elements specify
7 | % features of the noise function, as follows:
8 | %
9 | % See also GPLITE_COVFUN, GPLITE_MEANFUN.
10 |
11 | if nargin < 2; y = []; end
12 | if nargin < 3 || isempty(invflag); invflag = false; else; invflag = true; end
13 |
14 | if invflag && nargout > 1
15 | error('outwarp_fun:InverseOnly', ...
16 | ['When calling for the inverse output warping function, only one function output is expected.']);
17 | end
18 |
19 | %--------------------------------------------------------------------------
20 | % CUSTOM: Number of hyperparameters
21 | Noutwarp = 2; % # hyperparameters of the output warping function
22 | %--------------------------------------------------------------------------
23 |
24 | N = size(y,1); % Number of training points
25 |
26 | % Return number of output warping function hyperparameters and additional info
27 | if ischar(hyp)
28 | ywarp = Noutwarp;
29 | if nargout > 1
30 |
31 | if isempty(y); y = [0;1]; end
32 |
33 | % Initialize bounds for all hyperparameters
34 | outwarp_info.LB = -Inf(1,Noutwarp);
35 | outwarp_info.UB = Inf(1,Noutwarp);
36 | outwarp_info.PLB = -Inf(1,Noutwarp);
37 | outwarp_info.PUB = Inf(1,Noutwarp);
38 | outwarp_info.x0 = NaN(1,Noutwarp);
39 |
40 | %------------------------------------------------------------------
41 | % CUSTOM: Initialize hyperparameter bounds and other details
42 |
43 | % Threshold parameter
44 | outwarp_info.LB(1) = min(y);
45 | outwarp_info.UB(1) = max(y);
46 | outwarp_info.PLB(1) = min(y);
47 | outwarp_info.PUB(1) = max(y);
48 | outwarp_info.x0(1) = NaN;
49 |
50 | % Power exponent k (log space)
51 | outwarp_info.LB(2) = -Inf;
52 | outwarp_info.UB(2) = Inf;
53 | outwarp_info.PLB(2) = -3;
54 | outwarp_info.PUB(2) = 3;
55 | outwarp_info.x0(2) = 0;
56 |
57 | %------------------------------------------------------------------
58 |
59 | % Assign handle of current output warping function
60 | outwarp_info.outwarpfun = str2func(mfilename);
61 |
62 | % Plausible starting point
63 | idx_nan = isnan(outwarp_info.x0);
64 | outwarp_info.x0(idx_nan) = 0.5*(outwarp_info.PLB(idx_nan) + outwarp_info.PUB(idx_nan));
65 |
66 | dwarp_dt = outwarp_info;
67 |
68 | end
69 |
70 | return;
71 | end
72 |
73 | [Nhyp,Ns] = size(hyp); % Hyperparameters and samples
74 |
75 | if Nhyp ~= Noutwarp
76 | error('outwarp_fun:WrongLikHyp', ...
77 | ['Expected ' num2str(Noutwarp) ' output warping function hyperparameters, ' num2str(Nhyp) ' passed instead.']);
78 | end
79 | if Ns > 1
80 | error('outwarp_fun:nosampling', ...
81 | 'Output warping function output is available only for one-sample hyperparameter inputs.');
82 | end
83 |
84 | %--------------------------------------------------------------------------
85 | % CUSTOM: Compute output warping function and gradients
86 |
87 | % Read hyperparameters
88 | y0 = hyp(1);
89 | k = exp(hyp(2));
90 |
91 | % Compute output warping or inverse warping
92 | ywarp = y;
93 | idx = y < y0;
94 | if invflag % Inverse output warping
95 | ywarp(idx) = y0 + 1 - (1 + k*y0 - k*y(idx)).^(1/k);
96 | else % Direct output warping
97 | delta = (1 + y0 - y(idx));
98 | deltak = delta.^k;
99 | ywarp(idx) = y0 - deltak/k + 1/k;
100 | end
101 |
102 | if nargout > 1
103 | % First-order derivative of output warping function in output space
104 | dwarp_dt = ones(size(y));
105 | deltakm1 = delta.^(k-1);
106 |
107 | dwarp_dt(idx) = deltakm1;
108 |
109 | if nargout > 2
110 | % Gradient of output warping function wrt hyperparameters
111 | dwarp_dtheta = zeros(N,Noutwarp);
112 |
113 | dwarp_dtheta(idx,1) = 1 - deltakm1; % y0
114 | dwarp_dtheta(idx,2) = -deltak.*log(delta) + deltak/k - 1/k; % log(k)
115 |
116 | if nargout > 3
117 | % Gradient of derivative of output warping function
118 | d2warp_dthetadt = zeros(N,Noutwarp);
119 |
120 | d2warp_dthetadt(idx,1) = (k-1)*delta.^(k-2); % y0
121 | d2warp_dthetadt(idx,2) = k*deltakm1.*log(delta); % log(k)
122 |
123 | end
124 |
125 | end
126 | end
127 |
128 | end
--------------------------------------------------------------------------------
/gplite/outwarp_negscaledpow.m:
--------------------------------------------------------------------------------
1 | function [ywarp,dwarp_dt,dwarp_dtheta,d2warp_dthetadt] = outwarp_negscaledpow(hyp,y,invflag)
2 | %GPLITE_NOISEFUN Noise function for lite Gaussian Process regression.
3 | % SN2 = GPLITE_NOISEFUN(HYP,X,NOISEFUN) computes the GP noise function
4 | % NOISEFUN, that is the variance of observation noise evaluated at test
5 | % points X. HYP is a single column vector of noise function
6 | % hyperparameters. NOISEFUN is a numeric array whose elements specify
7 | % features of the noise function, as follows:
8 | %
9 | % See also GPLITE_COVFUN, GPLITE_MEANFUN.
10 |
11 | if nargin < 2; y = []; end
12 | if nargin < 3 || isempty(invflag); invflag = false; else; invflag = true; end
13 |
14 | if invflag && nargout > 1
15 | error('outwarp_fun:InverseOnly', ...
16 | ['When calling for the inverse output warping function, only one function output is expected.']);
17 | end
18 |
19 | %--------------------------------------------------------------------------
20 | % CUSTOM: Number of hyperparameters
21 | Noutwarp = 3; % # hyperparameters of the output warping function
22 | %--------------------------------------------------------------------------
23 |
24 | N = size(y,1); % Number of training points
25 |
26 | % Return number of output warping function hyperparameters and additional info
27 | if ischar(hyp)
28 | ywarp = Noutwarp;
29 | if nargout > 1
30 |
31 | % Initialize bounds for all hyperparameters
32 | outwarp_info.LB = -Inf(1,Noutwarp);
33 | outwarp_info.UB = Inf(1,Noutwarp);
34 | outwarp_info.PLB = -Inf(1,Noutwarp);
35 | outwarp_info.PUB = Inf(1,Noutwarp);
36 | outwarp_info.x0 = NaN(1,Noutwarp);
37 |
38 | %------------------------------------------------------------------
39 | % CUSTOM: Initialize hyperparameter bounds and other details
40 |
41 | % Threshold parameter
42 | outwarp_info.LB(1) = min(y);
43 | outwarp_info.UB(1) = max(y);
44 | outwarp_info.PLB(1) = min(y);
45 | outwarp_info.PUB(1) = max(y);
46 | outwarp_info.x0(1) = NaN;
47 |
48 | % Scaling parameter a (log space)
49 | outwarp_info.LB(2) = -Inf;
50 | outwarp_info.UB(2) = Inf;
51 | outwarp_info.PLB(2) = -2;
52 | outwarp_info.PUB(2) = 2;
53 | outwarp_info.x0(2) = 0;
54 |
55 | % Power exponent k (log space)
56 | outwarp_info.LB(3) = -Inf;
57 | outwarp_info.UB(3) = Inf;
58 | outwarp_info.PLB(3) = -3;
59 | outwarp_info.PUB(3) = 3;
60 | outwarp_info.x0(3) = 0;
61 |
62 | %------------------------------------------------------------------
63 |
64 | % Assign handle of current output warping function
65 | outwarp_info.outwarpfun = str2func(mfilename);
66 |
67 | % Plausible starting point
68 | idx_nan = isnan(outwarp_info.x0);
69 | outwarp_info.x0(idx_nan) = 0.5*(outwarp_info.PLB(idx_nan) + outwarp_info.PUB(idx_nan));
70 |
71 | dwarp_dt = outwarp_info;
72 |
73 | end
74 |
75 | return;
76 | end
77 |
78 | [Nhyp,Ns] = size(hyp); % Hyperparameters and samples
79 |
80 | if Nhyp ~= Noutwarp
81 | error('outwarp_fun:WrongLikHyp', ...
82 | ['Expected ' num2str(Noutwarp) ' output warping function hyperparameters, ' num2str(Nhyp) ' passed instead.']);
83 | end
84 | if Ns > 1
85 | error('outwarp_fun:nosampling', ...
86 | 'Output warping function output is available only for one-sample hyperparameter inputs.');
87 | end
88 |
89 | %--------------------------------------------------------------------------
90 | % CUSTOM: Compute output warping function and gradients
91 |
92 | % Read hyperparameters
93 | y0 = hyp(1);
94 | a = exp(hyp(2));
95 | k = exp(hyp(3));
96 |
97 | % Compute output warping or inverse warping
98 | ywarp = y;
99 | idx = y < y0;
100 | if invflag % Inverse output warping
101 | ywarp(idx) = y0 - ((y0 - y(idx)).^(1/k))/a;
102 | else % Direct output warping
103 | adelta = a*(y0 - y(idx));
104 | adeltak = adelta.^k;
105 | ywarp(idx) = y0 - adeltak;
106 | end
107 |
108 | if nargout > 1
109 | % First-order derivative of output warping function in output space
110 | dwarp_dt = ones(size(y));
111 | adeltakm1 = adelta.^(k-1);
112 |
113 | dwarp_dt(idx) = a*k*adeltakm1;
114 |
115 | if nargout > 2
116 | % Gradient of output warping function wrt hyperparameters
117 | dwarp_dtheta = zeros(N,Noutwarp);
118 |
119 | dwarp_dtheta(idx,1) = 1 - a*k*adeltakm1; % y0
120 | dwarp_dtheta(idx,2) = -k*adeltak; % log(a)
121 | dwarp_dtheta(idx,3) = -k*adeltak.*log(adelta); % log(k)
122 |
123 | if nargout > 3
124 | % Gradient of derivative of output warping function
125 | d2warp_dthetadt = zeros(N,Noutwarp);
126 |
127 | d2warp_dthetadt(idx,1) = a^2*k*(k-1)*adelta.^(k-2); % y0
128 | d2warp_dthetadt(idx,2) = a*k^2*adeltakm1; % log(a)
129 | d2warp_dthetadt(idx,3) = a*k*adeltakm1 + a*k^2*adeltakm1.*log(adelta); % log(k)
130 |
131 | end
132 |
133 | end
134 | end
135 |
136 | end
--------------------------------------------------------------------------------
/gplite/outwarp_test.m:
--------------------------------------------------------------------------------
1 | function outwarp_test(outfun)
2 | %OUTWARP_TEST Test correct implementation of an output warping function.
3 |
4 | % Generate random observations
5 | N = randi(50);
6 | y = rand(N,1)*10;
7 |
8 | [Noutwarp,info] = outfun('info',y);
9 |
10 | % Generate random hyperparameters from plausible box
11 | PLB = info.PLB(:);
12 | PUB = info.PUB(:);
13 | hyp = rand(Noutwarp,1).*(PUB - PLB) + PLB;
14 |
15 | hyp
16 |
17 | fprintf('---------------------------------------------------------------------------------\n');
18 | fprintf('Check error on inverse of output warping function...\n\n');
19 |
20 | sum(abs(y - outfun(hyp,outfun(hyp,y),'inv')))
21 |
22 | fprintf('---------------------------------------------------------------------------------\n');
23 | fprintf('Check 1st-order derivative of output warping function...\n\n');
24 |
25 | yy = y(randi(N));
26 | derivcheck(@(t) f(t,hyp,outfun),yy);
27 |
28 | fprintf('---------------------------------------------------------------------------------\n');
29 | fprintf('Check gradient of output warping function wrt hyperparameters...\n\n');
30 |
31 | derivcheck(@(hyp_) f2(yy,hyp_,outfun),hyp);
32 |
33 | fprintf('---------------------------------------------------------------------------------\n');
34 | fprintf('Check gradient of derivative of output warping function wrt hyperparameters...\n\n');
35 |
36 | derivcheck(@(hyp_) f3(yy,hyp_,outfun),hyp);
37 |
38 |
39 |
40 | end
41 |
42 | function [y,dy] = f(t,hyp,outfun)
43 | [y,dy] = outfun(hyp,t);
44 | end
45 |
46 | function [y,dy] = f2(y,hyp,outfun)
47 | [y,~,dy] = outfun(hyp,y);
48 | end
49 |
50 | function [y,dy] = f3(y,hyp,outfun)
51 | [~,y,~,dy] = outfun(hyp,y);
52 | end
--------------------------------------------------------------------------------
/gplite/private/derivcheck.m:
--------------------------------------------------------------------------------
1 | function [err_rel,err_abs] = derivcheck(f,x,flag)
2 | %DERIVCHECK Check analytical vs numerical differentiation for a function
3 |
4 | if nargin < 3 || isempty(flag); flag = false; end
5 |
6 | tic
7 | if flag
8 | dy_num = fgrad(f,x,'five-points');
9 | else
10 | dy_num = gradest(f,x);
11 | end
12 | toc
13 | tic
14 | [y,dy_ana] = f(x);
15 | toc
16 |
17 | if size(dy_num,1) == size(dy_num,2)
18 | dy_num = sum(dy_num,1);
19 | end
20 |
21 | % Reshape to row vectors
22 | dy_num = dy_num(:)';
23 | dy_ana = dy_ana(:)';
24 |
25 | fprintf('Relative errors:\n');
26 | err_rel = (dy_num(:)' - dy_ana(:)')./dy_num(:)'
27 |
28 | fprintf('Absolute errors:\n');
29 | err_abs = dy_num(:)' - dy_ana(:)'
30 |
31 | end
--------------------------------------------------------------------------------
/gplite/private/eissample_lite.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/gplite/private/eissample_lite.m
--------------------------------------------------------------------------------
/gplite/private/quantile1.m:
--------------------------------------------------------------------------------
1 | function y = quantile1(x,p)
2 | %QUANTILE1 Quantile of a vector.
3 | % Y = PRCTILE(X,P) returns percentiles of the values in X. P is a scalar
4 | % or a vector of percent values. When X is a vector, Y is the same size
5 | % as P, and Y(i) contains the P(i)-th percentile. When X is a matrix,
6 | % the i-th row of Y contains the P(i)-th percentiles of each column of X.
7 | % For N-D arrays, PRCTILE operates along the first non-singleton
8 | % dimension.
9 | %
10 | % Percentiles are specified using percentages, from 0 to 100. For an N
11 | % element vector X, PRCTILE computes percentiles as follows:
12 | % 1) The sorted values in X are taken as the 100*(0.5/N), 100*(1.5/N),
13 | % ..., 100*((N-0.5)/N) percentiles.
14 | % 2) Linear interpolation is used to compute percentiles for percent
15 | % values between 100*(0.5/N) and 100*((N-0.5)/N)
16 | % 3) The minimum or maximum values in X are assigned to percentiles
17 | % for percent values outside that range.
18 | %
19 | % PRCTILE treats NaNs as missing values, and removes them.
20 | %
21 | % Examples:
22 | % y = prctile(x,50); % the median of x
23 | % y = prctile(x,[2.5 25 50 75 97.5]); % a useful summary of x
24 | %
25 | % See also IQR, MEDIAN, NANMEDIAN, QUANTILE.
26 |
27 | % Copyright 1993-2016 The MathWorks, Inc.
28 |
29 | % If X is empty, return all NaNs.
30 | if isempty(x)
31 | y = nan(size(p),'like',x);
32 | else
33 | % Drop X's leading singleton dims, and combine its trailing dims. This
34 | % leaves a matrix, and we can work along columns.
35 | x = x(:);
36 |
37 | x = sort(x,1);
38 | n = sum(~isnan(x), 1); % Number of non-NaN values
39 |
40 | if isequal(p,0.5) % make the median fast
41 | if rem(n,2) % n is odd
42 | y = x((n+1)/2,:);
43 | else % n is even
44 | y = (x(n/2,:) + x(n/2+1,:))/2;
45 | end
46 | else
47 | r = p*n;
48 | k = floor(r+0.5); % K gives the index for the row just before r
49 | kp1 = k + 1; % K+1 gives the index for the row just after r
50 | r = r - k; % R is the ratio between the K and K+1 rows
51 |
52 | % Find indices that are out of the range 1 to n and cap them
53 | k(k<1 | isnan(k)) = 1;
54 | kp1 = bsxfun( @min, kp1, n );
55 |
56 | % Use simple linear interpolation for the valid percentages
57 | y = (0.5+r).*x(kp1,:)+(0.5-r).*x(k,:);
58 |
59 | % Make sure that values we hit exactly are copied rather than interpolated
60 | exact = (r==-0.5);
61 | if any(exact)
62 | y(exact,:) = x(k(exact),:);
63 | end
64 |
65 | % Make sure that identical values are copied rather than interpolated
66 | same = (x(k,:)==x(kp1,:));
67 | if any(same(:))
68 | x = x(k,:); % expand x
69 | y(same) = x(same);
70 | end
71 |
72 | end
73 |
74 | end
75 |
76 | end
--------------------------------------------------------------------------------
/gplite/private/sq_dist.m:
--------------------------------------------------------------------------------
1 | % sq_dist - a function to compute a matrix of all pairwise squared distances
2 | % between two sets of vectors, stored in the columns of the two matrices, a
3 | % (of size D by n) and b (of size D by m). If only a single argument is given
4 | % or the second matrix is empty, the missing matrix is taken to be identical
5 | % to the first.
6 | %
7 | % Usage: C = sq_dist(a, b)
8 | % or: C = sq_dist(a) or equiv.: C = sq_dist(a, [])
9 | %
10 | % Where a is of size Dxn, b is of size Dxm (or empty), C is of size nxm.
11 | %
12 | % Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2010-12-13.
13 |
14 | function C = sq_dist(a, b)
15 |
16 | if nargin<1 || nargin>3 || nargout>1, error('Wrong number of arguments.'); end
17 | bsx = exist('bsxfun','builtin'); % since Matlab R2007a 7.4.0 and Octave 3.0
18 | if ~bsx, bsx = exist('bsxfun'); end % bsxfun is not yet "builtin" in Octave
19 | [D, n] = size(a);
20 |
21 | % Computation of a^2 - 2*a*b + b^2 is less stable than (a-b)^2 because numerical
22 | % precision can be lost when both a and b have very large absolute value and the
23 | % same sign. For that reason, we subtract the mean from the data beforehand to
24 | % stabilise the computations. This is OK because the squared error is
25 | % independent of the mean.
26 | if nargin==1 % subtract mean
27 | mu = mean(a,2);
28 | if bsx
29 | a = bsxfun(@minus,a,mu);
30 | else
31 | a = a - repmat(mu,1,size(a,2));
32 | end
33 | b = a; m = n;
34 | else
35 | [d, m] = size(b);
36 | if d ~= D, error('Error: column lengths must agree.'); end
37 | mu = (m/(n+m))*mean(b,2) + (n/(n+m))*mean(a,2);
38 | if bsx
39 | a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu);
40 | else
41 | a = a - repmat(mu,1,n); b = b - repmat(mu,1,m);
42 | end
43 | end
44 |
45 | if bsx % compute squared distances
46 | C = bsxfun(@plus,sum(a.*a,1)',bsxfun(@minus,sum(b.*b,1),2*a'*b));
47 | else
48 | C = repmat(sum(a.*a,1)',1,m) + repmat(sum(b.*b,1),n,1) - 2*a'*b;
49 | end
50 | C = max(C,0); % numerical noise can cause C to negative i.e. C > -1e-14
51 |
--------------------------------------------------------------------------------
/install.m:
--------------------------------------------------------------------------------
1 | % MATLAB installation script for VBMC
2 | %
3 | % Copyright (c) by Luigi Acerbi 2018-2020
4 |
5 | fprintf('Installing VBMC...\n');
6 |
7 | me = mfilename; % what is my filename
8 | pathstr = fileparts(which(me)); % get my location
9 | addpath(pathstr); % add base folder to the path
10 | addpath([pathstr filesep() 'shared']); % add shared folder to the path
11 |
12 | try
13 | failed_install_flag = savepath; % save path
14 | catch
15 | failed_install_flag = true;
16 | end
17 |
18 | if failed_install_flag
19 | fprintf('Installation error: could not save path.\n\n');
20 | fprintf('You need to manually add VBMC''s installation folder to your MATLAB search path (and save it).\n');
21 | fprintf('See the MATLAB documentation for more information.\n');
22 | fprintf('Note that in Linux systems, e.g. Ubuntu, you need read/write permission to save the MATLAB path (see here).\n');
23 | else
24 | fprintf('Installation successful!\n');
25 | type([pathstr filesep 'docs' filesep 'README.txt']);
26 | fprintf('\n');
27 | end
28 |
29 | clear me pathstr
--------------------------------------------------------------------------------
/lpostfun.m:
--------------------------------------------------------------------------------
1 | function [y,s] = lpostfun(x,llike,lprior)
2 | %LPOSTFUN Log (unnormalized) posterior function.
3 | % Y = LPOSTFUN(X,LLIKE,LPRIOR) returns the unnormalized log posterior
4 | % evaluated at X where LLIKE is a function handle to the log likelihood
5 | % function and LPRIOR a function handle to the log prior.
6 | %
7 | % [Y,S] = LPOSTFUN(X,LLIKE,LPRIOR) also returns an estimate S of the
8 | % standard deviation of a noisy log-likelihood evaluation at X (obtained
9 | % as second output of LLIKE, assuming LLIKE has two outputs). Note that
10 | % the log prior is assumed to be noiseless.
11 |
12 | if nargin < 3; lprior = []; end
13 |
14 | if nargout > 1
15 | [y,s] = llike(x);
16 | else
17 | y = llike(x);
18 | end
19 |
20 | if ~isempty(lprior)
21 | y = y + lprior(x);
22 | end
23 |
24 | end
--------------------------------------------------------------------------------
/misc/best_vbmc.m:
--------------------------------------------------------------------------------
1 | function [vp,elbo,elbo_sd,idx_best] = best_vbmc(stats,idx,SafeSD,FracBack,RankCriterion,RealFlag)
2 | %VBMC_BEST Return best variational posterior from stats structure.
3 |
4 | % Check up to this iteration (default, last)
5 | if nargin < 2 || isempty(idx); idx = stats.iter(end); end
6 |
7 | % Penalization for uncertainty (default, 5 SD)
8 | if nargin < 3 || isempty(SafeSD); SafeSD = 5; end
9 |
10 | % If no past stable iteration, go back up to this fraction of iterations
11 | if nargin < 4 || isempty(FracBack); FracBack = 0.25; end
12 |
13 | % Use new ranking criterion method to pick best solution
14 | if nargin < 5 || isempty(RankCriterion); RankCriterion = false; end
15 |
16 | % Convert training variational posterior to real posterior
17 | if nargin < 6 || isempty(RealFlag); RealFlag = false; end
18 |
19 | if stats.stable(idx)
20 | % If the current iteration is stable, return it
21 | idx_best = idx;
22 |
23 | else
24 | % Otherwise, find best solution according do various criteria
25 |
26 | if RankCriterion
27 | % Find solution that combines ELCBO, stability, and recency
28 |
29 | % Rank by position
30 | rank(:,1) = fliplr(1:idx)';
31 |
32 | % Rank by ELCBO
33 | lnZ_iter = stats.elbo(1:idx);
34 | lnZsd_iter = stats.elbo_sd(1:idx);
35 | elcbo = lnZ_iter - SafeSD*lnZsd_iter;
36 | [~,ord] = sort(elcbo,'descend');
37 | rank(ord,2) = 1:idx;
38 |
39 | % Rank by reliability index
40 | [~,ord] = sort(stats.rindex(1:idx),'ascend');
41 | rank(ord,3) = 1:idx;
42 |
43 | % Rank penalty to all non-stable iterations
44 | rank(:,4) = idx;
45 | rank(stats.stable(1:idx),4) = 1;
46 |
47 | % % Add rank penalty to warmup (and iteration immediately after)
48 | % last_warmup = find(stats.warmup(1:idx),1,'last');
49 | % rank(:,5) = 1;
50 | % rank(1:min(last_warmup+2,end),5) = idx;
51 |
52 | [~,idx_best] = min(sum(rank,2));
53 |
54 | else
55 | % Find recent solution with best ELCBO
56 | laststable = find(stats.stable(1:idx),1,'last');
57 | if isempty(laststable)
58 | BackIter = ceil(idx*FracBack); % Go up to this iterations back if no previous stable iteration
59 | idx_start = max(1,idx-BackIter);
60 | else
61 | idx_start = laststable;
62 | end
63 | lnZ_iter = stats.elbo(idx_start:idx);
64 | lnZsd_iter = stats.elbo_sd(idx_start:idx);
65 | elcbo = lnZ_iter - SafeSD*lnZsd_iter;
66 | [~,idx_best] = max(elcbo);
67 | idx_best = idx_start + idx_best - 1;
68 | end
69 | end
70 |
71 | % Return best variational posterior, its ELBO and SD
72 | if RealFlag
73 | vp = vptrain2real(stats.vp(idx_best),1);
74 | else
75 | vp = stats.vp(idx_best);
76 | end
77 | elbo = stats.elbo(idx_best);
78 | elbo_sd = stats.elbo_sd(idx_best);
79 | vp.stats.stable = stats.stable(idx_best);
80 |
81 | end
--------------------------------------------------------------------------------
/misc/check_quadcoefficients_vbmc.m:
--------------------------------------------------------------------------------
1 | function errorflag = check_quadcoefficients_vbmc(gp)
2 | %CHECK_QUADCOEFFICIENTS_VBMC Check that the quadratic coefficients are negative.
3 |
4 | % Extract integrated basis functions coefficients
5 | D = size(gp.X,2);
6 | Nb = numel(gp.post(1).intmean.betabar);
7 | betabar = zeros(Nb,numel(gp.post));
8 | for s = 1:numel(gp.post)
9 | betabar(:,s) = gp.post(s).intmean.betabar;
10 | end
11 | % betabar
12 |
13 | if gp.intmeanfun == 3
14 | errorflag = any(betabar(1+D+(1:D),:) >= 0,2)';
15 | elseif gp.intmeanfun == 4
16 | tril_mat = tril(true(D),-1);
17 | tril_vec = tril_mat(:);
18 | z = zeros(D*D,1);
19 | errorflag = false;
20 | for b = 1:size(betabar,2)
21 | beta_mat = z;
22 | beta_mat(tril_vec) = betabar(1+2*D+(1:D*(D-1)/2),b);
23 | beta_mat = reshape(beta_mat,[D,D]);
24 | beta_mat = beta_mat + beta_mat' + diag(betabar(1+D+(1:D),b));
25 | try
26 | [~,dd] = chol(-beta_mat);
27 | catch
28 | dd = 1;
29 | end
30 | % dd
31 | errorflag = errorflag | dd;
32 | end
33 | end
34 |
35 | end
--------------------------------------------------------------------------------
/misc/evaloption_vbmc.m:
--------------------------------------------------------------------------------
1 | function val = evaloption_vbmc(option,N)
2 | %GETVALUE_VBMC Return option value that could be a function handle.
3 |
4 | if isa(option,'function_handle')
5 | val = option(N);
6 | else
7 | val = option;
8 | end
9 |
10 | end
--------------------------------------------------------------------------------
/misc/fess_vbmc.m:
--------------------------------------------------------------------------------
1 | function [fess,X] = fess_vbmc(vp,gp,X)
2 | %FESS_VBMC Compute fractional effective sample size through importance sampling
3 |
4 | if nargin < 3 || isempty(X); X = 100; end
5 |
6 | % If a single number is passed, take it as the number of samples
7 | if numel(X) == 1
8 | N = X;
9 | X = vbmc_rnd(vp,N,0);
10 | else
11 | N = size(X,1);
12 | end
13 |
14 | % Can directly pass the estimated GP means instead of the full GP
15 | if isstruct(gp)
16 | [~,~,fbar] = gplite_pred(gp,X,[],[],0,0);
17 | else
18 | fbar = mean(gp,2);
19 | end
20 |
21 | if size(fbar,1) ~= size(X,1)
22 | error('Mismatch between number of samples from VP and GP.');
23 | end
24 |
25 | % Compute effective sample size (ESS) with importance sampling
26 | vlnpdf = max(vbmc_pdf(vp,X,0,1),log(realmin));
27 | logw = fbar - vlnpdf;
28 | w = exp(logw - max(logw));
29 | w = w/sum(w);
30 | fess = 1/sum(w.^2) / N; % fractional ESS
31 |
32 | end
--------------------------------------------------------------------------------
/misc/finalboost_vbmc.m:
--------------------------------------------------------------------------------
1 | function [vp,elbo,elbo_sd,changedflag] = finalboost_vbmc(vp,idx_best,optimState,stats,options)
2 | %FINALBOOST_VBMC Final boost of variational components.
3 |
4 | changedflag = false;
5 |
6 | Knew = max(options.MinFinalComponents,vp.K);
7 |
8 | % Current entropy samples during variational optimization
9 | NSent = evaloption_vbmc(options.NSent,Knew);
10 | NSentFast = evaloption_vbmc(options.NSentFast,Knew);
11 | NSentFine = evaloption_vbmc(options.NSentFine,Knew);
12 |
13 | % Entropy samples for final boost
14 | NSentBoost = NSent;
15 | NSentFastBoost = NSentFast;
16 | NSentFineBoost = NSentFine;
17 | if isfield(options,'NSentBoost') && ~isempty(options.NSentBoost)
18 | NSentBoost = evaloption_vbmc(options.NSentBoost,Knew);
19 | end
20 | if isfield(options,'NSentFastBoost') && ~isempty(options.NSentFastBoost)
21 | NSentFastBoost = evaloption_vbmc(options.NSentFastBoost,Knew);
22 | end
23 | if isfield(options,'NSentFineBoost') && ~isempty(options.NSentFineBoost)
24 | NSentFineBoost = evaloption_vbmc(options.NSentFineBoost,Knew);
25 | end
26 |
27 | % Perform final boost?
28 | do_boost = vp.K < options.MinFinalComponents || ...
29 | (NSent ~= NSentBoost) || (NSentFine ~= NSentFineBoost);
30 |
31 | if do_boost
32 | % Last variational optimization with large number of components
33 | Nfastopts = ceil(evaloption_vbmc(options.NSelbo,Knew));
34 | Nfastopts = ceil(Nfastopts * options.NSelboIncr);
35 | Nslowopts = 1;
36 | gp_idx = gplite_post(stats.gp(idx_best));
37 | options.TolWeight = 0; % No pruning of components
38 |
39 | % End warmup
40 | optimState.Warmup = false;
41 | vp.optimize_mu = logical(options.VariableMeans);
42 | vp.optimize_weights = logical(options.VariableWeights);
43 |
44 | options.NSent = NSentBoost;
45 | options.NSentFast = NSentFastBoost;
46 | options.NSentFine = NSentFineBoost;
47 | options.MaxIterStochastic = Inf;
48 | optimState.entropy_alpha = 0;
49 |
50 | if isfield(vp,'temperature') && ~isempty(vp.temperature)
51 | optimState.temperature = vp.temperature;
52 | end
53 |
54 | stable_flag = vp.stats.stable;
55 | vp = vpoptimize_vbmc(Nfastopts,Nslowopts,vp,gp_idx,Knew,optimState,options);
56 | vp.stats.stable = stable_flag;
57 | changedflag = true;
58 | end
59 |
60 | elbo = vp.stats.elbo;
61 | elbo_sd = vp.stats.elbo_sd;
62 |
63 | end
--------------------------------------------------------------------------------
/misc/get_traindata_vbmc.m:
--------------------------------------------------------------------------------
1 | function [X_train,y_train,s2_train,t_train] = get_traindata_vbmc(optimState,options)
2 | %GETRAINDATA Get training data for building GP surrogate.
3 |
4 | nvars = size(optimState.X,2);
5 |
6 | X_train = optimState.X(optimState.X_flag,:);
7 | y_train = optimState.y(optimState.X_flag);
8 | if isfield(optimState,'S')
9 | s2_train = optimState.S(optimState.X_flag).^2;
10 | else
11 | s2_train = [];
12 | end
13 |
14 | if options.NoiseShaping
15 | s2_train = noiseshaping_vbmc(s2_train,y_train,options);
16 | end
17 |
18 | if nargout > 3
19 | t_train = optimState.funevaltime(optimState.X_flag);
20 | end
21 |
22 |
23 |
24 | % xxplot = (1:numel(y_train))';
25 | % [yyplot,ord] = sort(log(y_max - y_train + 1));
26 | %
27 | % X_train = X_train(ord,:);
28 | % y_train = y_train(ord);
29 | %
30 | % plot(xxplot,yyplot,'k-','LineWidth',1); hold on;
31 | % p = robustfit(xxplot,yyplot); p = fliplr(p');
32 | % pred = p(1).*xxplot + p(2);
33 | % plot(xxplot, pred,'b--','LineWidth',1);
34 | % drawnow;
35 |
36 | % tail_idx = ceil(numel(y_train)*max(0.5,options.HPDFrac));
37 | % idx_start = find(yyplot(tail_idx:end) - pred(tail_idx:end) > 1,1);
38 | % if ~isempty(idx_start)
39 | % tail_idx = tail_idx + idx_start - 1;
40 | % [tail_idx,numel(y_train)]
41 | % yyplot(tail_idx:end) = min(pred(tail_idx:end),yyplot(tail_idx:end));
42 | % y_train(tail_idx:end) = 1 + y_max - exp(yyplot(tail_idx:end));
43 | % end
44 |
45 | end
46 |
--------------------------------------------------------------------------------
/misc/get_vptheta.m:
--------------------------------------------------------------------------------
1 | function [theta,vp] = get_vptheta(vp,optimize_mu,optimize_sigma,optimize_lambda,optimize_weights)
2 | %GET_VPTHETA Get vector of variational parameters from variational posterior.
3 |
4 | if nargin < 5 || isempty(optimize_weights)
5 | optimize_weights = vp.optimize_weights;
6 | if nargin < 4 || isempty(optimize_lambda)
7 | optimize_lambda = vp.optimize_lambda;
8 | if nargin < 3 || isempty(optimize_sigma)
9 | optimize_sigma = vp.optimize_sigma;
10 | if nargin < 2 || isempty(optimize_mu)
11 | optimize_mu = vp.optimize_mu;
12 | end
13 | end
14 | end
15 | end
16 |
17 | vp = rescale_params(vp);
18 | if optimize_mu; theta = vp.mu(:); else; theta = []; end
19 | if optimize_sigma; theta = [theta; log(vp.sigma(:))]; end
20 | if optimize_lambda; theta = [theta; log(vp.lambda(:))]; end
21 | if optimize_weights; theta = [theta; log(vp.w(:))]; end
22 |
23 | end
--------------------------------------------------------------------------------
/misc/gethpd_vbmc.m:
--------------------------------------------------------------------------------
1 | function [X_hpd,y_hpd,hpd_range] = gethpd_vbmc(X,y,HPDFrac)
2 | %GETHPD_VBMC Get high-posterior density dataset.
3 |
4 | if nargin < 3 || isempty(HPDFrac); HPDFrac = 0.8; end
5 |
6 | [N,D] = size(X);
7 |
8 | % Subsample high posterior density dataset
9 | [~,ord] = sort(y,'descend');
10 | N_hpd = round(HPDFrac*N);
11 | X_hpd = X(ord(1:N_hpd),:);
12 | if nargout > 1
13 | y_hpd = y(ord(1:N_hpd));
14 | end
15 | if nargout > 2
16 | hpd_range = max(X_hpd)-min(X_hpd);
17 | end
18 |
19 | end
--------------------------------------------------------------------------------
/misc/gplogjoint_weights.m:
--------------------------------------------------------------------------------
1 | function [F,dF,varF,dvarF,varss,I_sk,J_sjk] = gplogjoint_weights(vp,grad_flag,avg_flag,jacobian_flag,compute_var)
2 | %GPLOGJOINT_WEIGHTS Expected variational log joint probability via GP approximation
3 |
4 | % VP is a struct with the variational posterior
5 | % HYP is the vector of GP hyperparameters: [ell,sf2,sn2,m]
6 | % Note that hyperparameters are already transformed
7 | % X is a N-by-D matrix of training inputs
8 | % Y is a N-by-1 vector of function values at X
9 |
10 | if nargin < 3; grad_flag = []; end
11 | if nargin < 4 || isempty(avg_flag); avg_flag = true; end
12 | if nargin < 5 || isempty(jacobian_flag); jacobian_flag = true; end
13 | if nargin < 6; compute_var = []; end
14 | if isempty(compute_var); compute_var = nargout > 2; end
15 |
16 | % Check if gradient computation is required
17 | if nargout < 2 % No 2nd output, no gradients
18 | grad_flag = false;
19 | elseif isempty(grad_flag) % By default compute all gradients
20 | grad_flag = true;
21 | end
22 |
23 | compute_vargrad = nargout > 3 && compute_var && grad_flag;
24 |
25 | if compute_vargrad && compute_var ~= 2
26 | error('gplogjoint:FullVarianceGradient', ...
27 | 'Computation of gradient of log joint variance is currently available only for diagonal approximation of the variance.');
28 | end
29 |
30 | K = vp.K; % Number of components
31 | w(1,:) = vp.w;
32 | I_sk = vp.stats.I_sk;
33 | J_sjk = vp.stats.J_sjk;
34 |
35 | Ns = size(I_sk,1); % Hyperparameter samples
36 |
37 | F = zeros(1,Ns);
38 | if grad_flag; w_grad = zeros(K,Ns); else, w_grad = []; end
39 | if compute_var; varF = zeros(1,Ns); end
40 | if compute_vargrad % Compute gradient of variance?
41 | if grad_flag; w_vargrad = zeros(K,Ns); else, w_vargrad = []; end
42 | end
43 |
44 | % Loop over hyperparameter samples
45 | for s = 1:Ns
46 | F(s) = sum(w.*I_sk(s,:));
47 | if grad_flag; w_grad(:,s) = I_sk(s,:)'; end
48 |
49 | if compute_var == 2
50 | J_diag = diag(squeeze(J_sjk(s,:,:)))';
51 | varF(s) = sum(w.^2.*max(eps,J_diag));
52 | if compute_vargrad
53 | w_vargrad(:,s) = 2*w.*max(eps,J_diag);
54 | end
55 | elseif compute_var
56 | J_jk = squeeze(J_sjk(s,:,:));
57 | varF(s) = sum(sum(J_jk.*(w'*w),1));
58 | end
59 | end
60 |
61 | % Correct for numerical error
62 | if compute_var; varF = max(varF,eps); end
63 |
64 | if grad_flag
65 | if jacobian_flag
66 | eta_sum = sum(exp(vp.eta));
67 | J_w = bsxfun(@times,-exp(vp.eta)',exp(vp.eta)/eta_sum^2) + diag(exp(vp.eta)/eta_sum);
68 | w_grad = J_w*w_grad;
69 | end
70 | dF = w_grad;
71 | else
72 | dF = [];
73 | end
74 |
75 | if compute_vargrad
76 | % Correct for standard softmax reparameterization of W
77 | if jacobian_flag && grad_flag
78 | w_vargrad = J_w*w_vargrad;
79 | end
80 | dvarF = w_vargrad;
81 | else
82 | dvarF = [];
83 | end
84 |
85 | % [varF; varF_diag]
86 |
87 | % Average multiple hyperparameter samples
88 | varss = 0;
89 | if Ns > 1 && avg_flag
90 | Fbar = sum(F,2)/Ns;
91 | if compute_var
92 | varFss = sum((F - Fbar).^2,2)/(Ns-1); % Estimated variance of the samples
93 | varss = varFss + std(varF); % Variability due to sampling
94 | varF = sum(varF,2)/Ns + varFss;
95 | end
96 | if compute_vargrad
97 | dvv = 2*sum(F.*dF,2)/(Ns-1) - 2*Fbar.*sum(dF,2)/(Ns-1);
98 | dvarF = sum(dvarF,2)/Ns + dvv;
99 | end
100 | F = Fbar;
101 | if grad_flag; dF = sum(dF,2)/Ns; end
102 | end
103 |
104 | end
--------------------------------------------------------------------------------
/misc/gpreupdate.m:
--------------------------------------------------------------------------------
1 | function gp = gpreupdate(gp,optimState,options)
2 | %GPREUPDATE Quick posterior reupdate of Gaussian process.
3 |
4 | [X_train,y_train,s2_train,t_train] = get_traindata_vbmc(optimState,options);
5 | gp.X = X_train;
6 | gp.y = y_train;
7 | gp.s2 = s2_train;
8 | gp.t = t_train;
9 | gp = gplite_post(gp);
10 |
11 | if gp.intmeanfun == 3 || gp.intmeanfun == 4
12 | errorflag = check_quadcoefficients_vbmc(gp);
13 | if errorflag
14 | gp.meanfun = optimState.gpMeanfun;
15 | gp.intmeanfun = [];
16 |
17 | for s = 1:numel(gp.post)
18 | betabar = gp.post(s).intmean.betabar;
19 | hyp = gp.post(s).hyp;
20 |
21 | switch gp.meanfun
22 | case 4
23 | omega2 = -1./betabar(1+D+(1:D));
24 | xm = omega2.*betabar(1+(1:D));
25 | m0 = betabar(1) + 0.5*xm.^2./omega2;
26 | hyp_mean = [m0; xm(:); 0.5*log(omega2(:))];
27 | hypnew = [hyp(1:gp.Ncov+gp.Nnoise); hyp_mean(:); hyp(gp.Ncov+gp.Nnoise+1:end)];
28 | end
29 | gp.post(s).hyp = hypnew;
30 | end
31 |
32 | % Recompute GP without integrated mean function
33 | gp = gplite_post(gp);
34 | end
35 | end
36 |
37 | end
--------------------------------------------------------------------------------
/misc/gpsample_vbmc.m:
--------------------------------------------------------------------------------
1 | function X = gpsample_vbmc(vp,gp,Ns,origflag)
2 | %GPSAMPLE_VBMC Sample from GP obtained through VBMC.
3 |
4 | if nargin < 4 || isempty(origflag); origflag = true; end
5 |
6 | D = size(gp.X,2);
7 |
8 | if isfield(gp,'s2') && ~isempty(gp.s2)
9 | % Evaluate GP input length scale (use geometric mean)
10 | Ns_gp = numel(gp.post);
11 | ln_ell = zeros(D,Ns_gp);
12 | for s = 1:Ns_gp; ln_ell(:,s) = gp.post(s).hyp(1:D); end
13 | gplengthscale = exp(mean(ln_ell,2))';
14 | X_rescaled = bsxfun(@rdivide,gp.X,gplengthscale); % Rescaled GP training inputs
15 |
16 | % Evaluate GP observation noise on training inputs
17 | sn2new = zeros(size(gp.X,1),Ns_gp);
18 | for s = 1:Ns_gp
19 | hyp_noise = gp.post(s).hyp(gp.Ncov+1:gp.Ncov+gp.Nnoise); % Get noise hyperparameters
20 | if isfield(gp,'s2')
21 | s2 = gp.s2;
22 | else
23 | s2 = [];
24 | end
25 | % s2 = noiseshaping_vbmc(s2,gp.y,options);
26 | sn2new(:,s) = gplite_noisefun(hyp_noise,gp.X,gp.noisefun,gp.y,s2);
27 | end
28 | sn2new = mean(sn2new,2);
29 |
30 | % Estimate observation noise variance over variational posterior
31 | xx = vbmc_rnd(vp,2e4,0,0);
32 | [~,pos] = min(sq_dist(bsxfun(@rdivide,xx,gplengthscale),X_rescaled),[],2);
33 | sn2_avg = mean(sn2new(pos)); % Use nearest neighbor approximation
34 | else
35 | sn2_avg = 0;
36 | end
37 |
38 | VarThresh = max(1,sn2_avg);
39 |
40 | W = 2*(D+1);
41 | x0 = vbmc_rnd(vp,W,0,0);
42 | X = gplite_sample(gp,Ns,x0,'parallel',[],[],VarThresh);
43 | if origflag
44 | X = warpvars_vbmc(X,'inv',vp.trinfo);
45 | end
46 |
47 | end
48 |
49 |
50 |
51 | %SQ_DIST Compute matrix of all pairwise squared distances between two sets
52 | % of vectors, stored in the columns of the two matrices, a (of size n-by-D)
53 | % and b (of size m-by-D).
54 | function C = sq_dist(a,b)
55 |
56 | n = size(a,1);
57 | m = size(b,1);
58 | mu = (m/(n+m))*mean(b,1) + (n/(n+m))*mean(a,1);
59 | a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu);
60 | C = bsxfun(@plus,sum(a.*a,2),bsxfun(@minus,sum(b.*b,2)',2*a*b'));
61 | C = max(C,0);
62 |
63 | end
--------------------------------------------------------------------------------
/misc/initdesign_vbmc.m:
--------------------------------------------------------------------------------
1 | function [optimState,t_func] = initdesign_vbmc(optimState,Ns,funwrapper,t_func,options)
2 | %INITDESIGN_VBMC Initial sample design (provided or random box).
3 |
4 | x0 = optimState.Cache.X_orig;
5 | [N0,D] = size(x0);
6 |
7 | if N0 <= Ns
8 | Xs = x0;
9 | ys = optimState.Cache.y_orig;
10 | if N0 < Ns
11 | switch lower(options.InitDesign)
12 | case 'plausible'
13 | % Uniform random samples in the plausible box (in transformed space)
14 | Xrnd = bsxfun(@plus,bsxfun(@times,rand(Ns-N0,D),optimState.PUB-optimState.PLB),optimState.PLB);
15 | case 'narrow'
16 | xstart = warpvars_vbmc(x0(1,:),'dir',optimState.trinfo);
17 | Xrnd = bsxfun(@plus,bsxfun(@times,rand(Ns-N0,D)-0.5,0.1*(optimState.PUB-optimState.PLB)),xstart);
18 | Xrnd = bsxfun(@min,bsxfun(@max,Xrnd,optimState.PLB),optimState.PUB);
19 | otherwise
20 | error('Unknown initial design for VBMC.');
21 | end
22 | Xrnd = warpvars_vbmc(Xrnd,'inv',optimState.trinfo); % Convert back to original space
23 | Xs = [Xs; Xrnd];
24 | ys = [ys; NaN(Ns-N0,1)];
25 | end
26 | idx_remove = true(N0,1);
27 |
28 | elseif N0 > Ns
29 | % Cluster starting points
30 | kmeans_options = struct('Display','off','Method',2,'Preprocessing','whiten');
31 | idx = fastkmeans(x0,Ns,kmeans_options);
32 |
33 | % From each cluster, take points with higher density in original space
34 | Xs = NaN(Ns,D); ys = NaN(Ns,1); idx_remove = false(N0,1);
35 | for iK = 1:Ns
36 | idxK = find(idx == iK);
37 | xx = optimState.Cache.X_orig(idxK,:);
38 | yy = optimState.Cache.y_orig(idxK);
39 | [~,idx_y] = max(yy);
40 | Xs(iK,:) = xx(idx_y,:);
41 | ys(iK) = yy(idx_y);
42 | idx_remove(idxK(idx_y)) = true;
43 | end
44 | end
45 | % Remove points from starting cache
46 | optimState.Cache.X_orig(idx_remove,:) = [];
47 | optimState.Cache.y_orig(idx_remove) = [];
48 |
49 | Xs = warpvars_vbmc(Xs,'d',optimState.trinfo);
50 |
51 | for is = 1:Ns
52 | timer_func = tic;
53 | if isnan(ys(is)) % Function value is not available
54 | [~,optimState] = funlogger_vbmc(funwrapper,Xs(is,:),optimState,'iter');
55 | else
56 | [~,optimState] = funlogger_vbmc(funwrapper,Xs(is,:),optimState,'add',ys(is));
57 | end
58 | t_func = t_func + toc(timer_func);
59 | end
60 |
61 | end
62 |
--------------------------------------------------------------------------------
/misc/intkernel.m:
--------------------------------------------------------------------------------
1 | function F = intkernel(X,vp,gp,avg_flag)
2 | %INTKERNEL Expected GP kernel in scalar correlation
3 |
4 | if nargin < 4 || isempty(avg_flag); avg_flag = false; end
5 |
6 | K = vp.K; % Number of components
7 | [N,D] = size(X);
8 | mu(:,:) = vp.mu;
9 | sigma(1,:) = vp.sigma;
10 | lambda(:,1) = vp.lambda(:);
11 | w(1,:) = vp.w;
12 |
13 | Ns = numel(gp.post); % Hyperparameter samples
14 |
15 | F = zeros(N,Ns);
16 |
17 | if isfield(vp,'delta') && ~isempty(vp.delta)
18 | delta = vp.delta;
19 | else
20 | delta = 0;
21 | end
22 |
23 | % Integrated mean function being used?
24 | integrated_meanfun = isfield(gp,'intmeanfun') && gp.intmeanfun > 0;
25 |
26 | if integrated_meanfun
27 | % Evaluate basis functions
28 | Hs = gplite_intmeanfun(X,gp.intmeanfun);
29 | end
30 |
31 | % Loop over hyperparameter samples
32 | for s = 1:Ns
33 | hyp = gp.post(s).hyp;
34 |
35 | % Extract GP hyperparameters from HYP
36 | ell = exp(hyp(1:D));
37 | ln_sf2 = 2*hyp(D+1);
38 | sum_lnell = sum(hyp(1:D));
39 |
40 | if integrated_meanfun
41 | %betabar = gp.post(s).intmean.betabar';
42 | %KinvHtbetabar = gp.post(s).intmean.HKinv'*betabar;
43 | plus_idx = gp.intmeanfun_var > 0;
44 | HKinv = gp.post(s).intmean.HKinv(plus_idx,:);
45 | Tplusinv = gp.post(s).intmean.Tplusinv;
46 | end
47 |
48 | L = gp.post(s).L;
49 | Lchol = gp.post(s).Lchol;
50 |
51 | sn2_eff = 1/gp.post(s).sW(1)^2;
52 |
53 | ddl = sq_dist(bsxfun(@rdivide,X',ell),bsxfun(@rdivide,gp.X',ell));
54 | ll = exp(ln_sf2 -0.5*ddl);
55 |
56 | if Lchol
57 | zz = (L\(L'\ll'))/sn2_eff;
58 | else
59 | zz = -L*ll';
60 | end
61 |
62 | for k = 1:K
63 | tau_k = sqrt(sigma(k)^2*lambda.^2 + ell.^2 + delta.^2);
64 | lnnf_k = ln_sf2 + sum_lnell - sum(log(tau_k)); % Covariance normalization factor
65 | delta_k = bsxfun(@rdivide,bsxfun(@minus, mu(:,k), gp.X'), tau_k);
66 | z_k = exp(lnnf_k -0.5 * sum(delta_k.^2,1));
67 |
68 | dd_k = bsxfun(@rdivide,bsxfun(@minus, mu(:,k), X'), tau_k);
69 | zz_k = exp(lnnf_k -0.5 * sum(dd_k.^2,1));
70 |
71 | F(:,s) = F(:,s) + w(k)*(zz_k - z_k*zz)';
72 |
73 | % Contribution of integrated mean function
74 | if integrated_meanfun
75 | switch gp.intmeanfun
76 | case 1; u_k = 1;
77 | case 2; u_k = [1,mu(:,k)'];
78 | case 3; u_k = [1,mu(:,k)',(mu(:,k).^2 + sigma(k)^2*lambda.^2)'];
79 | case 4; u_k = [1,mu(:,k)',(mu(:,k).^2 + sigma(k)^2*lambda.^2)',mumu_mat(k,:)];
80 | end
81 |
82 | F(:,s) = F(:,s) + w(k)*((u_k(plus_idx)*(Tplusinv*Hs)) ...
83 | + ((z_k*HKinv')*(Tplusinv*(HKinv*ll'))) ...
84 | - (u_k(plus_idx)*(Tplusinv*(HKinv*ll'))) ...
85 | - ((z_k*HKinv')*(Tplusinv*Hs)))';
86 | end
87 |
88 |
89 | end
90 | end
91 |
92 | % Average multiple hyperparameter samples
93 | if Ns > 1 && avg_flag
94 | F = mean(F,2);
95 | end
96 |
97 | end
98 |
99 |
100 |
--------------------------------------------------------------------------------
/misc/noiseshaping_vbmc.m:
--------------------------------------------------------------------------------
1 | function s2s = noiseshaping_vbmc(s2,y,options)
2 | %NOISESHAPING_VBMC Increase noise for low-density points.
3 |
4 | TolScale = 1e10;
5 |
6 | if isempty(s2); s2 = options.TolGPNoise^2*ones(size(y)); end
7 |
8 | deltay = max(0, max(y) - y - options.NoiseShapingThreshold);
9 | sn2extra = (options.NoiseShapingFactor*deltay).^2;
10 |
11 | s2s = s2 + sn2extra;
12 |
13 | maxs2 = min(s2s)*TolScale;
14 | s2s = min(s2s,maxs2);
15 |
--------------------------------------------------------------------------------
/misc/proposal_vbmc.m:
--------------------------------------------------------------------------------
1 | function y = proposal_vbmc(X,PLB,PUB,LB,UB)
2 | %PROPOSAL_VBMC Default proposal function.
3 |
4 | [N,D] = size(X);
5 | y = zeros(N,1);
6 |
7 | % df = 3; % Three degrees of freedom
8 | mu = 0.5*(PLB + PUB);
9 | sigma = 0.5*(PUB-PLB);
10 |
11 | for d = 1:D
12 | % y(:,d) = ( 1 + ((X(:,d)-mu(d))./sigma(d)).^2/df ).^(-(df+1)/2);
13 | y(:,d) = 1./( 1 + (((X(:,d)-mu(d))./sigma(d)).^2)/3 ).^2;
14 | end
15 |
16 | y = prod(y,2);
17 |
18 | end
--------------------------------------------------------------------------------
/misc/real2int_vbmc.m:
--------------------------------------------------------------------------------
1 | function x = real2int_vbmc(x,trinfo,integervars)
2 | %REAL2INT_VBMC Convert to integer-valued representation.
3 |
4 | if ~any(integervars); return; end
5 |
6 | xtemp = warpvars_vbmc(x,'inv',trinfo);
7 | xtemp(:,integervars) = round(xtemp(:,integervars));
8 | xtemp = warpvars_vbmc(xtemp,'d',trinfo);
9 |
10 | x(:,integervars) = xtemp(:,integervars);
11 |
12 | end
--------------------------------------------------------------------------------
/misc/rescale_params.m:
--------------------------------------------------------------------------------
1 | function vp = rescale_params(vp,theta)
2 | %RESCALE_PARAMS Assign THETA and rescale SIGMA and LAMBDA variational parameters.
3 |
4 | D = vp.D;
5 |
6 | if nargin > 1 && ~isempty(theta)
7 | K = vp.K;
8 | if vp.optimize_mu
9 | vp.mu = reshape(theta(1:D*K),[D,K]);
10 | idx_start = D*K;
11 | else
12 | idx_start = 0;
13 | end
14 | if vp.optimize_sigma
15 | vp.sigma = exp(theta(idx_start+(1:K)));
16 | idx_start = idx_start + K;
17 | end
18 | if vp.optimize_lambda
19 | vp.lambda = exp(theta(idx_start+(1:D)))';
20 | end
21 | if vp.optimize_weights
22 | eta = theta(end-K+1:end);
23 | eta = eta - max(eta);
24 | vp.w = exp(eta(:)');
25 | end
26 | end
27 |
28 | nl = sqrt(sum(vp.lambda.^2)/D);
29 | vp.lambda = vp.lambda(:)/nl;
30 | vp.sigma = vp.sigma(:)'*nl;
31 |
32 | % Ensure that weights are normalized
33 | if vp.optimize_weights
34 | vp.w = vp.w(:)'/sum(vp.w);
35 | % Remove ETA, used only for optimization
36 | if isfield(vp,'eta'); vp = rmfield(vp,'eta'); end
37 | end
38 |
39 | % The mode may have moved
40 | if isfield(vp,'mode'); vp = rmfield(vp,'mode'); end
41 |
42 | end
--------------------------------------------------------------------------------
/misc/testpdf.m:
--------------------------------------------------------------------------------
1 | function [y,dy] = testpdf(x)
2 |
3 | D = numel(x);
4 | sigma = 1:D;
5 | y = -0.5*sum(x.^2./sigma.^2);
6 | dy = -x./sigma.^2;
--------------------------------------------------------------------------------
/misc/vbinit_vbmc.m:
--------------------------------------------------------------------------------
1 | function [vp0_vec,type_vec] = vbinit_vbmc(type,Nopts,vp,Knew,Xstar,ystar)
2 | %VBINIT Generate array of random starting parameters for variational posterior
3 |
4 | % XSTAR and YSTAR are usually HPD regions
5 |
6 | D = vp.D;
7 | K = vp.K;
8 |
9 | Nstar = size(Xstar,1);
10 |
11 | % Compute moments
12 | %X_mean = mean(X,1);
13 | %X_cov = cov(X);
14 | %[X_R,p] = chol(X_cov);
15 | %if p > 0; X_R = diag(std(X)); end
16 |
17 | type_vec = type*ones(Nopts,1);
18 | lambda0 = vp.lambda;
19 | mu0 = vp.mu;
20 | w0 = vp.w;
21 |
22 | switch type
23 | case 1 % Start from old variational parameters
24 | sigma0 = vp.sigma;
25 | case 2 % Start from highest-posterior density training points
26 | [~,ord] = sort(ystar,'descend');
27 | if vp.optimize_mu
28 | idx_ord = repmat(1:min(Knew,size(Xstar,1)),[1,ceil(Knew/size(Xstar,1))]);
29 | mu0 = Xstar(ord(idx_ord(1:Knew)),:)';
30 | end
31 | if K > 1; V = var(mu0,[],2); else; V = var(Xstar)'; end
32 | sigma0 = sqrt(mean(V./lambda0.^2)/Knew).*exp(0.2*randn(1,Knew));
33 | case 3 % Start from random provided training points
34 | if vp.optimize_mu; mu0 = zeros(D,K); end
35 | sigma0 = zeros(1,K);
36 | end
37 |
38 | for iOpt = 1:Nopts
39 | vp0_vec(iOpt) = vp;
40 | vp0_vec(iOpt).K = Knew;
41 |
42 | mu = mu0;
43 | sigma = sigma0;
44 | lambda = lambda0;
45 | if vp.optimize_weights; w = w0; end
46 | add_jitter = true;
47 |
48 | switch type
49 |
50 | case 1 % Start from old variational parameters
51 | if iOpt == 1 % Copy previous parameters verbatim
52 | add_jitter = false;
53 | end
54 | if Knew > vp.K
55 | % Spawn a new component near an existing one
56 | for iNew = vp.K+1:Knew
57 | idx = randi(vp.K);
58 | mu(:,iNew) = mu(:,idx);
59 | sigma(iNew) = sigma(idx);
60 | mu(:,iNew) = mu(:,iNew) + 0.5*sigma(iNew)*lambda.*randn(D,1);
61 | if vp.optimize_sigma
62 | sigma(iNew) = sigma(iNew)*exp(0.2*randn());
63 | end
64 | if vp.optimize_weights
65 | xi = 0.25 + 0.25*rand();
66 | w(iNew) = xi*w(idx);
67 | w(idx) = (1-xi)*w(idx);
68 | end
69 |
70 | end
71 | end
72 |
73 | case 2 % Start from highest-posterior density training points
74 | if iOpt == 1
75 | add_jitter = false;
76 | end
77 | if vp.optimize_lambda
78 | lambda = std(Xstar,[],1)';
79 | lambda = lambda*sqrt(D/sum(lambda.^2));
80 | end
81 | if vp.optimize_weights
82 | w = ones(1,Knew)/Knew;
83 | end
84 |
85 | case 3 % Start from random provided training points
86 | ord = randperm(Nstar);
87 | if vp.optimize_mu
88 | idx_ord = repmat(1:min(Knew,size(Xstar,1)),[1,ceil(Knew/size(Xstar,1))]);
89 | mu = Xstar(ord(idx_ord(1:Knew)),:)';
90 | else
91 | mu = mu0;
92 | end
93 | if K > 1; V = var(mu,[],2); else; V = var(Xstar)'; end
94 |
95 | if vp.optimize_sigma
96 | sigma = sqrt(mean(V)/Knew)*exp(0.2*randn(1,Knew));
97 | end
98 | if vp.optimize_lambda
99 | lambda = std(Xstar,[],1)';
100 | lambda = lambda*sqrt(D/sum(lambda.^2));
101 | end
102 | if vp.optimize_weights
103 | w = ones(1,Knew)/Knew;
104 | end
105 |
106 | otherwise
107 | error('vbinit:UnknownType', ...
108 | 'Unknown TYPE for initialization of variational posteriors.');
109 | end
110 |
111 | if add_jitter
112 | if vp.optimize_mu
113 | mu = mu + bsxfun(@times,sigma,bsxfun(@times,lambda,randn(size(mu))));
114 | end
115 | if vp.optimize_sigma
116 | sigma = sigma.*exp(0.2*randn(1,Knew));
117 | end
118 | if vp.optimize_lambda
119 | lambda = lambda.*exp(0.2*randn(D,1));
120 | end
121 | if vp.optimize_weights
122 | w = w.*exp(0.2*randn(1,Knew));
123 | w = w/sum(w);
124 | end
125 | end
126 |
127 | if vp.optimize_weights
128 | vp0_vec(iOpt).w = w;
129 | else
130 | vp0_vec(iOpt).w = ones(1,Knew)/Knew;
131 | end
132 | if vp.optimize_mu
133 | vp0_vec(iOpt).mu = mu;
134 | else
135 | vp0_vec(iOpt).mu = mu0;
136 | end
137 | vp0_vec(iOpt).sigma = sigma;
138 | vp0_vec(iOpt).lambda = lambda;
139 |
140 | end
--------------------------------------------------------------------------------
/misc/vbmc_gphyp.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/misc/vbmc_gphyp.m
--------------------------------------------------------------------------------
/misc/vpbndloss.m:
--------------------------------------------------------------------------------
1 | function [L,dL] = vpbndloss(theta,vp,thetabnd,TolCon)
2 | %VPLOSS Variational parameter loss function for soft optimization bounds.
3 |
4 | compute_grad = nargout > 1; % Compute gradient only if requested
5 |
6 | K = vp.K;
7 | D = vp.D;
8 |
9 | if vp.optimize_mu
10 | mu = theta(1:K*D);
11 | idx_start = K*D;
12 | else
13 | mu = vp.mu(:)';
14 | idx_start = 0;
15 | end
16 | if vp.optimize_sigma
17 | lnsigma = theta(idx_start+(1:K));
18 | idx_start = idx_start + K;
19 | else
20 | lnsigma = log(vp.sigma(:));
21 | end
22 | if vp.optimize_lambda
23 | lnlambda = theta(idx_start+(1:D));
24 | else
25 | lnlambda = log(vp.lambda(:));
26 | end
27 | if vp.optimize_weights
28 | eta = theta(end-K+1:end);
29 | else
30 | eta = [];
31 | end
32 |
33 | lnscale = bsxfun(@plus,lnsigma(:)',lnlambda(:));
34 | theta_ext = [];
35 | if vp.optimize_mu; theta_ext = [theta_ext; mu(:)]; end
36 | if vp.optimize_sigma || vp.optimize_lambda; theta_ext = [theta_ext; lnscale(:)]; end
37 | if vp.optimize_weights; theta_ext = [theta_ext; eta(:)]; end
38 |
39 | if compute_grad
40 | [L,dL] = softbndloss(theta_ext,thetabnd.lb(:),thetabnd.ub(:),TolCon);
41 | if vp.optimize_mu
42 | dmu = dL(1:D*K);
43 | idx_start = D*K;
44 | else
45 | dmu = [];
46 | idx_start = 0;
47 | end
48 | if vp.optimize_sigma || vp.optimize_lambda
49 | dlnscale = reshape(dL((1:D*K)+idx_start),[D,K]);
50 | if vp.optimize_sigma
51 | dsigma = sum(dlnscale,1);
52 | else
53 | dsigma = [];
54 | end
55 | if vp.optimize_lambda
56 | dlambda = sum(dlnscale,2);
57 | else
58 | dlambda = [];
59 | end
60 | else
61 | dsigma = []; dlambda = [];
62 | end
63 | if vp.optimize_weights
64 | deta = dL(end-K+1:end);
65 | else
66 | deta = [];
67 | end
68 | dL = [dmu(:); dsigma(:); dlambda(:); deta(:)];
69 | else
70 | L = softbndloss(theta_ext,thetabnd.lb(:),thetabnd.ub(:),TolCon);
71 | end
72 |
73 | end
--------------------------------------------------------------------------------
/misc/vpbounds.m:
--------------------------------------------------------------------------------
1 | function [vp,thetabnd] = vpbounds(vp,gp,options,K)
2 | %VPBOUNDS Compute soft bounds for variational posterior parameters.
3 |
4 | if nargin < 4 || isempty(K); K = vp.K; end
5 |
6 | % Soft-bound loss is computed on MU and SCALE (which is SIGMA times LAMBDA)
7 |
8 | % Start with reversed bounds (see below)
9 | if ~isfield(vp,'bounds') || isempty(vp.bounds)
10 | vp.bounds.mu_lb = Inf(1,vp.D);
11 | vp.bounds.mu_ub = -Inf(1,vp.D);
12 | vp.bounds.lnscale_lb = Inf(1,vp.D);
13 | vp.bounds.lnscale_ub = -Inf(1,vp.D);
14 | % vp.bounds
15 | end
16 |
17 | % Set bounds for mean parameters of variational components
18 | vp.bounds.mu_lb = min(min(gp.X),vp.bounds.mu_lb);
19 | vp.bounds.mu_ub = max(max(gp.X),vp.bounds.mu_ub);
20 |
21 | % Set bounds for log scale parameters of variational components
22 | lnrange = log(max(gp.X) - min(gp.X));
23 | vp.bounds.lnscale_lb = min(vp.bounds.lnscale_lb,lnrange + log(options.TolLength));
24 | vp.bounds.lnscale_ub = max(vp.bounds.lnscale_ub,lnrange);
25 |
26 | % Set bounds for log weight parameters of variational components
27 | if vp.optimize_weights
28 | vp.bounds.eta_lb = log(0.5*options.TolWeight);
29 | vp.bounds.eta_ub = 0;
30 | end
31 |
32 | thetabnd.lb = [];
33 | thetabnd.ub = [];
34 | if vp.optimize_mu
35 | thetabnd.lb = [thetabnd.lb,repmat(vp.bounds.mu_lb,[1,K])];
36 | thetabnd.ub = [thetabnd.ub,repmat(vp.bounds.mu_ub,[1,K])];
37 | end
38 | if vp.optimize_sigma || vp.optimize_lambda
39 | thetabnd.lb = [thetabnd.lb,repmat(vp.bounds.lnscale_lb,[1,K])];
40 | thetabnd.ub = [thetabnd.ub,repmat(vp.bounds.lnscale_ub,[1,K])];
41 | end
42 | if vp.optimize_weights
43 | thetabnd.lb = [thetabnd.lb,repmat(vp.bounds.eta_lb,[1,K])];
44 | thetabnd.ub = [thetabnd.ub,repmat(vp.bounds.eta_ub,[1,K])];
45 | end
46 |
47 | thetabnd.TolCon = options.TolConLoss;
48 |
49 | % Weights below a certain threshold are penalized
50 | if vp.optimize_weights
51 | thetabnd.WeightThreshold = max(1/(4*K),options.TolWeight);
52 | thetabnd.WeightPenalty = options.WeightPenalty;
53 | end
54 |
55 | end
--------------------------------------------------------------------------------
/misc/vpsample_vbmc.m:
--------------------------------------------------------------------------------
1 | function [vp,samples,output] = vpsample_vbmc(Ns,Ninit,vp,gp,optimState,options,wide_flag)
2 |
3 | if nargin < 7 || isempty(wide_flag); wide_flag = false; end
4 |
5 | % Assign default values to OPTIMSTATE
6 | if ~isfield(optimState,'delta'); optimState.delta = 0; end
7 | if ~isfield(optimState,'EntropySwitch'); optimState.EntropySwitch = false; end
8 | if ~isfield(optimState,'Warmup'); optimState.Warmup = ~vp.optimize_weights; end
9 | if ~isfield(optimState,'temperature'); optimState.temperature = 1; end
10 |
11 | %% Set up sampling variables and options
12 |
13 | % Perform quick sieve to determine good starting point
14 | [vp,~,elcbo_beta,compute_var,NSentK] = ...
15 | vpsieve_vbmc(Ninit,1,vp,gp,optimState,options);
16 |
17 | K = vp.K;
18 | D = vp.D;
19 |
20 | % Compute soft bounds for variational parameters optimization
21 | [vp,thetabnd] = vpbounds(vp,gp,options,K);
22 |
23 | % Move lower bound on scale - we want *wider* distributions
24 | if wide_flag
25 | lnscale = bsxfun(@plus,log(vp.sigma(:))',log(vp.lambda(:)));
26 | if vp.optimize_mu; idx = D*K; else; idx = 0; end
27 | thetabnd.lb(idx+1:idx+K*D) = lnscale;
28 | end
29 |
30 | %% Sample variational posterior starting from current
31 |
32 | theta0 = get_vptheta(vp)';
33 | Ntheta = numel(theta0);
34 |
35 | % MCMC parameters
36 | Widths = 0.5;
37 | sampleopts.Thin = 1;
38 | sampleopts.Burnin = 0;
39 | sampleopts.Display = 'off';
40 | sampleopts.Diagnostics = false;
41 | LB = -Inf(1,Ntheta);
42 | UB = Inf(1,Ntheta);
43 |
44 | idx_fixed = false(size(theta0));
45 | if ~optimState.Warmup && 0
46 | if vp.optimize_mu; idx_fixed(1:D*K) = true; end
47 | % idx_fixed = true(size(theta0));
48 | % idx_fixed(idx+1:idx+K) = false;
49 | end
50 |
51 | LB(idx_fixed) = theta0(idx_fixed);
52 | UB(idx_fixed) = theta0(idx_fixed);
53 |
54 | % Perform sampling
55 | try
56 | switch lower(options.VariationalSampler)
57 | case 'slicesample'
58 | vpmcmc_fun = @(theta_) -negelcbo_vbmc(theta_,elcbo_beta,vp,gp,NSentK,0,compute_var,0,thetabnd);
59 | [samples,fvals,exitflag,output] = ...
60 | slicesample_vbmc(vpmcmc_fun,theta0,Ns,Widths,LB,UB,sampleopts);
61 | case 'malasample'
62 | if isfield(optimState,'mcmc_stepsize')
63 | sampleopts.Stepsize = optimState.mcmc_stepsize;
64 | output.stepsize = sampleopts.Stepsize;
65 | end
66 | vpmcmc_fun = @(theta_) vpmcmcgrad_fun(theta_,elcbo_beta,vp,gp,NSentK,compute_var,thetabnd);
67 | [samples,fvals,exitflag,output] = ...
68 | malasample_vbmc(vpmcmc_fun,theta0,Ns,Widths,LB,UB,sampleopts);
69 | % output.accept_rate
70 | end
71 | catch
72 | samples = repmat(theta0,[Ns,1]);
73 | end
74 | vp = rescale_params(vp,samples(end,:));
75 |
76 | end
77 |
78 | function [logp,dlogp] = vpmcmcgrad_fun(theta,elcbo_beta,vp,gp,NSentK,compute_var,thetabnd)
79 | [nlogp,ndlogp] = negelcbo_vbmc(theta,elcbo_beta,vp,gp,NSentK,1,compute_var,0,thetabnd);
80 | logp = -nlogp;
81 | dlogp = -ndlogp;
82 | end
83 |
84 |
85 |
--------------------------------------------------------------------------------
/misc/vpsieve_vbmc.m:
--------------------------------------------------------------------------------
1 | function [vp0_vec,vp0_type,elcbo_beta,compute_var,NSentK,NSentKFast] = vpsieve_vbmc(Ninit,Nbest,vp,gp,optimState,options,K)
2 | %VPSIEVE Preliminary 'sieve' method for fitting variational posterior.
3 |
4 | % Assign default values to OPTIMSTATE
5 | if ~isfield(optimState,'delta'); optimState.delta = 0; end
6 | if ~isfield(optimState,'EntropySwitch'); optimState.EntropySwitch = false; end
7 | if ~isfield(optimState,'Warmup'); optimState.Warmup = ~vp.optimize_weights; end
8 | if ~isfield(optimState,'temperature'); optimState.temperature = 1; end
9 | if ~isfield(optimState,'Neff'); optimState.Neff = size(gp.X,1); end
10 |
11 | if isempty(Nbest); Nbest = 1; end
12 | if nargin < 7 || isempty(K); K = vp.K; end
13 |
14 | %% Set up optimization variables and options
15 |
16 | vp.delta = optimState.delta(:);
17 |
18 | if isempty(Ninit) % Number of initial starting points
19 | Ninit = ceil(evaloption_vbmc(options.NSelbo,K));
20 | end
21 | nelcbo_fill = zeros(Ninit,1);
22 |
23 | % Number of samples per component for MC approximation of the entropy
24 | NSentK = ceil(evaloption_vbmc(options.NSent,K)/K);
25 |
26 | % Number of samples per component for preliminary MC approximation of the entropy
27 | NSentKFast = ceil(evaloption_vbmc(options.NSentFast,K)/K);
28 |
29 | % Deterministic entropy if entropy switch is on or only one component
30 | if optimState.EntropySwitch || K == 1
31 | NSentK = 0;
32 | NSentKFast = 0;
33 | end
34 |
35 | % Confidence weight
36 | elcbo_beta = evaloption_vbmc(options.ELCBOWeight,optimState.Neff);
37 | compute_var = elcbo_beta ~= 0;
38 |
39 | % Compute soft bounds for variational parameters optimization
40 | [vp,thetabnd] = vpbounds(vp,gp,options,K);
41 |
42 | %% Perform quick shotgun evaluation of many candidate parameters
43 |
44 | if Ninit > 0
45 | % Get high-posterior density points
46 | [Xstar,ystar] = gethpd_vbmc(gp.X,gp.y,options.HPDFrac);
47 |
48 | % Generate a bunch of random candidate variational parameters
49 | switch Nbest
50 | case 1
51 | [vp0_vec,vp0_type] = vbinit_vbmc(1,Ninit,vp,K,Xstar,ystar);
52 | otherwise
53 | [vp0_vec1,vp0_type1] = vbinit_vbmc(1,ceil(Ninit/3),vp,K,Xstar,ystar);
54 | [vp0_vec2,vp0_type2] = vbinit_vbmc(2,ceil(Ninit/3),vp,K,Xstar,ystar);
55 | [vp0_vec3,vp0_type3] = vbinit_vbmc(3,Ninit-2*ceil(Ninit/3),vp,K,Xstar,ystar);
56 | vp0_vec = [vp0_vec1,vp0_vec2,vp0_vec3];
57 | vp0_type = [vp0_type1;vp0_type2;vp0_type3];
58 | end
59 |
60 | if isfield(optimState,'vp_repo') && ~isempty(optimState.vp_repo) && options.VariationalInitRepo
61 | Ntheta = numel(get_vptheta(vp0_vec(1)));
62 | idx = find(cellfun(@numel,optimState.vp_repo) == Ntheta);
63 | if ~isempty(idx)
64 | vp0_vec4 = [];
65 | for ii = 1:numel(idx)
66 | vp0_vec4 = [vp0_vec4,rescale_params(vp0_vec(1),optimState.vp_repo{idx(ii)})];
67 | end
68 | vp0_vec = [vp0_vec,vp0_vec4];
69 | vp0_type = [vp0_type;ones(numel(vp0_vec4),1)];
70 | end
71 | end
72 |
73 | % Quickly estimate ELCBO at each candidate variational posterior
74 | for iOpt = 1:numel(vp0_vec)
75 | [theta0,vp0_vec(iOpt)] = get_vptheta(vp0_vec(iOpt),vp.optimize_mu,vp.optimize_sigma,vp.optimize_lambda,vp.optimize_weights);
76 | [nelbo_tmp,~,~,~,varF_tmp] = negelcbo_vbmc(theta0,0,vp0_vec(iOpt),gp,NSentKFast,0,compute_var,options.AltMCEntropy,thetabnd);
77 | nelcbo_fill(iOpt) = nelbo_tmp + elcbo_beta*sqrt(varF_tmp);
78 | end
79 |
80 | % Sort by negative ELCBO
81 | [~,vp0_ord] = sort(nelcbo_fill,'ascend');
82 | vp0_vec = vp0_vec(vp0_ord);
83 | vp0_type = vp0_type(vp0_ord);
84 | else
85 | vp0_vec = vp;
86 | vp0_type = 1;
87 | end
88 |
89 |
90 |
91 | end
--------------------------------------------------------------------------------
/misc/vptrain2real.m:
--------------------------------------------------------------------------------
1 | function vp_real = vptrain2real(vp,entflag,options)
2 | %VPTRAIN2REAL Convert training variational posterior to real one.
3 |
4 | if nargin < 2 || isempty(entflag); entflag = false; end
5 | if nargin < 3; options = []; end
6 |
7 | if isfield(vp,'temperature') && ~isempty(vp.temperature)
8 | T = vp.temperature;
9 | else
10 | T = 1;
11 | end
12 |
13 | if any(T == [2,3,4,5])
14 | PowerThreshold = 1e-5;
15 | [vp_real,lnZ_pow] = vbmc_power(vp,T,PowerThreshold);
16 | if isfield(vp_real,'stats') && ~isempty(vp_real.stats)
17 | vp_real.stats.elbo = T*vp.stats.elbo + lnZ_pow;
18 | vp_real.stats.elbo_sd = T*vp.stats.elbo_sd;
19 | vp_real.stats.elogjoint_sd = T*vp.stats.elogjoint_sd;
20 |
21 | if entflag
22 | % Use deterministic approximation of the entropy
23 | H = entlb_vbmc(vp_real,0,1);
24 | varH = 0;
25 | vp_real.stats.elogjoint = vp_real.stats.elbo - H;
26 | vp_real.stats.entropy = H;
27 | vp_real.stats.entropy_sd = sqrt(varH);
28 | else
29 | vp_real.stats.elogjoint = NaN;
30 | vp_real.stats.entropy = NaN;
31 | vp_real.stats.entropy_sd = NaN;
32 | end
33 | end
34 | else
35 | vp_real = vp;
36 | end
37 |
38 |
--------------------------------------------------------------------------------
/misc/warp_gpandvp_vbmc.m:
--------------------------------------------------------------------------------
1 | function [vp,hyp_warped] = warp_gpandvp_vbmc(trinfo,vp_old,gp_old)
2 | %WARP_GPANDVP_VBMC Update GP hyps and variational posterior after warping.
3 |
4 | D = size(gp_old.X,2);
5 | trinfo_old = vp_old.trinfo;
6 |
7 | % Temperature scaling
8 | if isfield(vp_old,'temperature') && ~isempty(vp_old.temperature)
9 | T = vp_old.temperature;
10 | else
11 | T = 1;
12 | end
13 |
14 | %% Update GP hyperparameters
15 |
16 | warpfun = @(x) warpvars_vbmc(warpvars_vbmc(x,'i',trinfo_old),'d',trinfo);
17 |
18 | Ncov = gp_old.Ncov;
19 | Nnoise = gp_old.Nnoise;
20 | Nmean = gp_old.Nmean;
21 | if ~isempty(gp_old.outwarpfun); Noutwarp = gp_old.Noutwarp; else; Noutwarp = 0; end
22 |
23 | Ns_gp = numel(gp_old.post);
24 | hyp_warped = NaN(Ncov+Nnoise+Nmean+Noutwarp,Ns_gp);
25 |
26 | for s = 1:Ns_gp
27 | hyp = gp_old.post(s).hyp;
28 | hyp_warped(:,s) = hyp;
29 |
30 | % Update GP input length scales
31 | ell = exp(hyp(1:D))';
32 | [~,ell_new] = unscent_warp(warpfun,gp_old.X,ell);
33 | hyp_warped(1:D,s) = mean(log(ell_new),1); % Geometric mean of length scales
34 |
35 | % We assume relatively no change to GP output and noise scales
36 |
37 | switch gp_old.meanfun
38 | case 0
39 | % Warp constant mean
40 | m0 = hyp(Ncov+Nnoise+1);
41 | dy_old = warpvars_vbmc(gp_old.X,'logp',trinfo_old);
42 | dy = warpvars_vbmc(warpfun(gp_old.X),'logp',trinfo);
43 | m0w = m0 + (mean(dy) - mean(dy_old))/T;
44 |
45 | hyp_warped(Ncov+Nnoise+1,s) = m0w;
46 |
47 | case 4
48 | % Warp quadratic mean
49 | m0 = hyp(Ncov+Nnoise+1);
50 | xm = hyp(Ncov+Nnoise+1+(1:D))';
51 | omega = exp(hyp(Ncov+Nnoise+1+D+(1:D)))';
52 |
53 | % Warp location and scale
54 | [xmw,omegaw] = unscent_warp(warpfun,xm,omega);
55 |
56 | % Warp maximum
57 | dy_old = warpvars_vbmc(xm,'logpdf',trinfo_old)';
58 | dy = warpvars_vbmc(xmw,'logpdf',trinfo)';
59 | m0w = m0 + (dy - dy_old)/T;
60 |
61 | hyp_warped(Ncov+Nnoise+1,s) = m0w;
62 | hyp_warped(Ncov+Nnoise+1+(1:D),s) = xmw';
63 | hyp_warped(Ncov+Nnoise+1+D+(1:D),s) = log(omegaw)';
64 |
65 | otherwise
66 | error('Unsupported GP mean function for input warping.');
67 | end
68 | end
69 |
70 | %% Update variational posterior
71 |
72 | vp = vp_old;
73 | vp.trinfo = trinfo;
74 |
75 | mu = vp_old.mu';
76 | sigmalambda = bsxfun(@times,vp_old.lambda,vp_old.sigma)';
77 |
78 | [muw,sigmalambdaw] = unscent_warp(warpfun,mu,sigmalambda);
79 |
80 | vp.mu = muw';
81 | lambdaw = sqrt(D*mean(bsxfun(@rdivide,sigmalambdaw.^2,sum(sigmalambdaw.^2,2)),1));
82 | vp.lambda(:,1) = lambdaw(:);
83 |
84 | sigmaw = exp(mean(log(bsxfun(@rdivide,sigmalambdaw,lambdaw)),2));
85 | vp.sigma(1,:) = sigmaw;
86 |
87 | % Approximate change in weight
88 | dy_old = warpvars_vbmc(mu,'logpdf',trinfo_old)';
89 | dy = warpvars_vbmc(muw,'logpdf',trinfo)';
90 |
91 | ww = vp_old.w .* exp((dy - dy_old)/T);
92 | vp.w = ww ./ sum(ww);
93 |
94 | end
--------------------------------------------------------------------------------
/private/acqhedge_vbmc.m:
--------------------------------------------------------------------------------
1 | function hedge = acqhedge_vbmc(action,hedge,stats,options)
2 | %ACQPORTFOLIO Evaluate and update portfolio of acquisition functions.
3 |
4 | switch lower(action(1:3))
5 | case 'acq'
6 | % Choose acquisition function based on hedge strategy
7 |
8 | if isempty(hedge)
9 | % Initialize hedge struct
10 | hedge.g = zeros(1,numel(options.SearchAcqFcn));
11 | hedge.n = numel(options.SearchAcqFcn);
12 | hedge.count = 0;
13 | hedge.lambda = 0.2; % Lapse rate - random choice
14 | hedge.beta = 1;
15 | hedge.decay = options.AcqHedgeDecay^(options.FunEvalsPerIter);
16 | end
17 |
18 | hedge.count = hedge.count + 1;
19 | hedge.p = exp(hedge.beta*(hedge.g - max(hedge.g)))./sum(exp(hedge.beta*(hedge.g - max(hedge.g))));
20 | hedge.p = hedge.p*(1-hedge.lambda) + hedge.lambda/hedge.n;
21 |
22 | hedge.chosen = find(rand() < cumsum(hedge.p),1);
23 | hedge.phat = Inf(size(hedge.p));
24 | hedge.phat(hedge.chosen) = hedge.p(hedge.chosen);
25 |
26 | case 'upd'
27 | % Update value of hedge portfolio based on uncertainty reduction
28 |
29 | HedgeCutoff = 5;
30 |
31 | if ~isempty(hedge)
32 | iter = stats.iter(end);
33 | min_iter = max(1,iter-options.AcqHedgeIterWindow);
34 |
35 | min_sd = min(stats.elbo_sd(min_iter:iter-1));
36 | er_sd = max(0, log(min_sd / stats.elbo_sd(iter)));
37 |
38 | elcbo = stats.elbo - options.ELCBOImproWeight*stats.elbo_sd;
39 | max_elcbo = max(elcbo(min_iter:iter-1));
40 | er_elcbo = max(0,elcbo(iter) - max_elcbo)/options.TolImprovement;
41 | if er_elcbo > 1; er_elcbo = 1 + log(er_elcbo); end
42 |
43 | min_r = min(stats.rindex(min_iter:iter-1));
44 | er_r = max(0, log(min_r / stats.rindex(iter)));
45 |
46 | % er = 0.5*er_sd + 0.5*er_elcbo; % Reward
47 | er = er_r;
48 |
49 | for iHedge = 1:hedge.n
50 | hedge.g(iHedge) = hedge.decay*hedge.g(iHedge) + er/hedge.phat(iHedge);
51 | end
52 |
53 | % Apply cutoff value on hedge
54 | hedge.g = min(hedge.g,HedgeCutoff);
55 | hedge.g
56 | end
57 |
58 | end
--------------------------------------------------------------------------------
/private/recompute_lcbmax.m:
--------------------------------------------------------------------------------
1 | function lcbmax_vec = recompute_lcbmax(gp,optimState,stats,options)
2 | %RECOMPUTE_LCBMAX Recompute moving LCB maximum based on current GP.
3 |
4 | N = optimState.Xn;
5 | Xflag = optimState.X_flag;
6 | X = optimState.X(Xflag,:);
7 | y = optimState.y(Xflag);
8 | if isfield(optimState,'S')
9 | s2 = optimState.S(Xflag).^2;
10 | else
11 | s2 = [];
12 | end
13 |
14 | fmu = NaN(N,1);
15 | fs2 = fmu;
16 | [~,~,fmu(Xflag),fs2(Xflag)] = gplite_pred(gp,X,y,s2);
17 |
18 | lcb = fmu - options.ELCBOImproWeight*sqrt(fs2);
19 | lcb_movmax = movmax(lcb,[numel(lcb),0]);
20 |
21 | lcbmax_vec = lcb_movmax(stats.N);
22 |
23 | end
--------------------------------------------------------------------------------
/private/updateK.m:
--------------------------------------------------------------------------------
1 | function Knew = updateK(optimState,stats,options)
2 | %UPDATEK Update number of variational mixture components.
3 |
4 | Knew = optimState.vpK;
5 |
6 | % Compute maximum number of components
7 | Kmax = ceil(evaloption_vbmc(options.KfunMax,optimState.Neff));
8 |
9 | % Evaluate bonus for stable solution
10 | Kbonus = round(double(evaloption_vbmc(options.AdaptiveK,Knew)));
11 |
12 |
13 | % If not warming up, check if number of components gets to be increased
14 | if ~optimState.Warmup && optimState.iter > 1
15 |
16 | RecentIters = ceil(0.5*options.TolStableCount/options.FunEvalsPerIter);
17 |
18 | % Check if ELCBO has improved wrt recent iterations
19 | elbos = stats.elbo(max(1,end-RecentIters+1):end);
20 | elboSDs = stats.elbo_sd(max(1,end-RecentIters+1):end);
21 | elcbos = elbos - options.ELCBOImproWeight*elboSDs;
22 | warmups = stats.warmup(max(1,end-RecentIters+1):end);
23 | elcbos_after = elcbos(~warmups);
24 | elcbos_after(1:min(2,end)) = -Inf; % Ignore two iterations right after warmup
25 | elcbo_max = max(elcbos_after);
26 | improving_flag = elcbos_after(end) >= elcbo_max && isfinite(elcbos_after(end));
27 |
28 | % Add one component if ELCBO is improving and no pruning in last iteration
29 | if stats.pruned(end) == 0 && improving_flag
30 | Knew = Knew + 1;
31 | end
32 |
33 | % Bonus components for stable solution (speed up exploration)
34 | if stats.rindex(end) < 1 && ~optimState.RecomputeVarPost && improving_flag
35 | % No bonus if any component was very recently pruned
36 | if all(stats.pruned(max(1,end-ceil(0.5*RecentIters)+1):end) == 0)
37 | Knew = Knew + Kbonus;
38 | end
39 | end
40 | Knew = max(optimState.vpK,min(Knew,Kmax));
41 | end
42 |
43 | end
--------------------------------------------------------------------------------
/private/vbmc_demo2d.m:
--------------------------------------------------------------------------------
1 | function stats = vbmc_demo2d(fun,stats,plotbnd)
2 | %VBMC_DEMO2D Demo plot of VBMC at work (only for 2D problems).
3 |
4 | if nargin < 1 || isempty(fun); fun = @rosenbrock_test; end
5 | if nargin < 2 || isempty(stats)
6 | rng(0);
7 | [~,~,~,~,~,~,~,stats] = vbmc(fun,[-1 -1],-Inf,Inf,-3,3);
8 | end
9 | if nargin < 3 || isempty(plotbnd)
10 | vp = stats.vp(end);
11 | xrnd = vbmc_rnd(vp,1e6);
12 | for i = 1:size(xrnd,2)
13 | LB(i) = floor(quantile1(xrnd(:,i),0.01) - 0.5);
14 | UB(i) = ceil(quantile1(xrnd(:,i),0.99)+0.5);
15 | end
16 | else
17 | LB = plotbnd(1,:);
18 | UB = plotbnd(2,:);
19 | end
20 |
21 | tolx = 1e-3;
22 | Nx = 128;
23 | Npanels = 8;
24 |
25 | x1 = linspace(LB(1)+tolx,UB(1)-tolx,Nx);
26 | x2 = linspace(LB(2)+tolx,UB(2)-tolx,Nx);
27 | dx1 = x1(2)-x1(1);
28 | dx2 = x2(2)-x2(1);
29 |
30 | idx = ones(1,Npanels-2);
31 | idx(2) = find(stats.warmup == 1,1,'last');
32 | tmp = floor(linspace(idx(2),numel(stats.vp),Npanels-3));
33 | idx(3:Npanels-2) = tmp(2:end);
34 |
35 | Np = 5;
36 | grid = [];
37 | for i = 1:(Npanels-2)/2
38 | grid = [grid, [i*ones(1,Np); (i+(Npanels-2)/2)*ones(1,Np)]];
39 | end
40 | grid = [grid, [0,Npanels*ones(1,Np);0,(Npanels-1)*ones(1,Np)]];
41 |
42 | % grid = [reshape(1:Npanels-2,[(Npanels-2)/2,2])',[Npanels;Npanels-1]];
43 | labels{1} = 'A';
44 | labels{Npanels-1} = 'C';
45 | labels{Npanels} = 'B';
46 |
47 | h = plotify(grid,'gutter',[0.05 0.15],'margins',[.05 .02 .075 .05],'labels',labels);
48 |
49 | for iPlot = 1:Npanels
50 | axes(h(iPlot));
51 |
52 | %[X1,X2] = meshgrid(x1,x2);
53 | %tmp = cat(2,X2',X1');
54 | %xx = reshape(tmp,[],2);
55 | xx = combvec(x1,x2)';
56 |
57 | if iPlot <= numel(idx); vpflag = true; else vpflag = false; end
58 |
59 | elboflag = false;
60 | if vpflag
61 | vp = stats.vp(idx(iPlot));
62 | yy = vbmc_pdf(vp,xx);
63 | titlestr = ['Iteration ' num2str(stats.iter(idx(iPlot)))];
64 | if iPlot == 2; titlestr = [titlestr ' (end of warm-up)']; end
65 | elseif iPlot == Npanels-1
66 | lnyy = zeros(size(xx,1),1);
67 | for ii = 1:size(xx,1)
68 | lnyy(ii) = fun(xx(ii,:));
69 | end
70 | yy = exp(lnyy);
71 | Z = sum(yy(:))*dx1*dx2;
72 | yy = yy/Z;
73 | titlestr = ['True posterior'];
74 | else
75 | elboflag = true;
76 | end
77 |
78 | if elboflag
79 | iter = stats.iter;
80 | elbo = stats.elbo;
81 | elbo_sd = stats.elbo_sd;
82 | beta = 1.96;
83 | patch([iter,fliplr(iter)],[elbo + beta*elbo_sd, fliplr(elbo - beta*elbo_sd)],[1 0.8 0.8],'LineStyle','none'); hold on;
84 | hl(1) = plot(iter,elbo,'r','LineWidth',1); hold on;
85 | hl(2) = plot([iter(1),iter(end)],log(Z)*[1 1],'k','LineWidth',1);
86 | titlestr = 'Model evidence';
87 | xlim([0.9, stats.iter(end)+0.1]);
88 | ylims = [floor(min(elbo)-0.5),ceil(max(elbo)+0.5)];
89 | ylim(ylims);
90 | xticks(idx);
91 | yticks([ylims(1),round(log(Z),2),ylims(2)])
92 | xlabel('Iterations');
93 | if log(Z) < mean(ylims)
94 | loc = 'NorthEast';
95 | else
96 | loc = 'SouthEast';
97 | end
98 | hll = legend(hl,'ELBO','LML');
99 | set(hll,'Location',loc,'Box','off');
100 |
101 | else
102 | s = contour(x1,x2,reshape(yy',[Nx,Nx])');
103 |
104 | if vpflag
105 | % Plot component centers
106 | mu = warpvars_vbmc(vp.mu','inv',vp.trinfo);
107 | hold on;
108 | plot(mu(:,1),mu(:,2),'xr','LineStyle','none');
109 |
110 | % Plot data
111 | X = warpvars_vbmc(stats.gp(idx(iPlot)).X,'inv',vp.trinfo);
112 | plot(X(:,1),X(:,2),'.k','LineStyle','none');
113 | end
114 |
115 | % s.EdgeColor = 'None';
116 | view([0 90]);
117 | xlabel('x_1');
118 | ylabel('x_2');
119 | set(gca,'XTickLabel',[],'YTickLabel',[]);
120 |
121 | xlim([LB(1),UB(1)]);
122 | ylim([LB(2),UB(2)]);
123 | set(gca,'TickLength',get(gca,'TickLength')*2);
124 | end
125 |
126 | title(titlestr);
127 | set(gca,'TickDir','out');
128 | end
129 |
130 | set(gcf,'Color','w');
131 |
132 | pos = [20,20,900,450];
133 | set(gcf,'Position',pos);
134 | set(gcf,'Units','inches'); pos = get(gcf,'Position');
135 | set(gcf,'PaperPositionMode','Auto','PaperUnits','Inches','PaperSize',[pos(3), pos(4)])
136 | drawnow;
137 |
138 | end
--------------------------------------------------------------------------------
/private/vbmc_iterplot.m:
--------------------------------------------------------------------------------
1 | function vbmc_iterplot(vp,gp,optimState,stats,elbo)
2 | %VBMC_ITERPLOT Plot current iteration of the VBMC algorithm.
3 |
4 | D = vp.D;
5 | iter = optimState.iter;
6 | fontsize = 14;
7 |
8 | if D == 1
9 | hold off;
10 | gplite_plot(gp);
11 | hold on;
12 | xlims = xlim;
13 | xx = linspace(xlims(1),xlims(2),1e3)';
14 | yy = vbmc_pdf(vp,xx,false,true);
15 | hold on;
16 | plot(xx,yy+elbo,':');
17 | drawnow;
18 |
19 | else
20 | if ~isempty(vp)
21 | Xrnd = vbmc_rnd(vp,1e5,1,1);
22 | else
23 | Xrnd = gp.X;
24 | end
25 | X_train = gp.X;
26 |
27 | if iter == 1
28 | idx_new = true(size(X_train,1),1);
29 | else
30 | X_trainold = stats.gp(iter-1).X;
31 | idx_new = false(size(X_train,1),1);
32 | [~,idx_diff] = setdiff(X_train,X_trainold,'rows');
33 | idx_new(idx_diff) = true;
34 | end
35 | idx_old = ~idx_new;
36 |
37 | if ~isempty(vp.trinfo); X_train = warpvars_vbmc(X_train,'inv',vp.trinfo); end
38 |
39 | Pdelta = optimState.PUB_orig - optimState.PLB_orig;
40 | X_min = min(X_train,[],1) - Pdelta*0.1;
41 | X_max = max(X_train,[],1) + Pdelta*0.1;
42 | bounds = [max(min(optimState.PLB_orig,X_min),optimState.LB_orig); ...
43 | min(max(optimState.PUB_orig,X_max),optimState.UB_orig)];
44 |
45 | try
46 | for i = 1:D; names{i} = ['x_{' num2str(i) '}']; end
47 | [~,ax] = cornerplot(Xrnd,names,[],bounds);
48 | for i = 1:D-1
49 | for j = i+1:D
50 | axes(ax(j,i)); hold on;
51 | if any(idx_old)
52 | scatter(X_train(idx_old,i),X_train(idx_old,j),'ok');
53 | end
54 | if any(idx_new)
55 | scatter(X_train(idx_new,i),X_train(idx_new,j),'or','MarkerFaceColor','r');
56 | end
57 | end
58 | end
59 |
60 | h = axes(gcf,'Position',[0 0 1 1]);
61 | set(h,'Color','none','box','off','XTick',[],'YTick',[],'Units','normalized','Xcolor','none','Ycolor','none');
62 | text(0.9,0.9,['VBMC (iteration ' num2str(iter) ')'],'FontSize',fontsize,'HorizontalAlignment','right');
63 |
64 | drawnow;
65 | catch
66 | % pause
67 | end
68 | end
69 |
70 | end
--------------------------------------------------------------------------------
/private/vbmc_output.m:
--------------------------------------------------------------------------------
1 | function output = vbmc_output(vp,optimState,msg,stats,idx_best,vbmc_version)
2 | %VBMC_OUTPUT Create OUTPUT struct for VBMC.
3 |
4 | output.function = func2str(optimState.fun);
5 | if all(isinf(optimState.LB)) && all(isinf(optimState.UB))
6 | output.problemtype = 'unconstrained';
7 | else
8 | output.problemtype = 'boundconstraints';
9 | end
10 | output.iterations = optimState.iter;
11 | output.funccount = optimState.funccount;
12 | output.bestiter = idx_best;
13 | output.trainsetsize = stats.Neff(idx_best);
14 | output.components = vp.K;
15 | output.rindex = stats.rindex(idx_best);
16 | if stats.stable(idx_best)
17 | output.convergencestatus = 'probable';
18 | else
19 | output.convergencestatus = 'no';
20 | end
21 | output.overhead = NaN;
22 | output.rngstate = rng;
23 | output.algorithm = 'Variational Bayesian Monte Carlo';
24 | output.version = vbmc_version;
25 | output.message = msg;
26 |
27 | output.elbo = vp.stats.elbo;
28 | output.elbo_sd = vp.stats.elbo_sd;
29 |
30 | end
--------------------------------------------------------------------------------
/private/vbmc_plot2d.m:
--------------------------------------------------------------------------------
1 | function vbmc_plot2d(vp,LB,UB,gp,plotflag)
2 | %VBMC_PLOT2D 2-D Plot of variational/target posterior.
3 |
4 | if nargin < 4; gp = []; end
5 | if nargin < 5 || isempty(plotflag); plotflag = true; end
6 |
7 | tolx = 1e-3;
8 | Nx = 128;
9 |
10 | x1 = linspace(LB(1)+tolx,UB(1)-tolx,Nx);
11 | x2 = linspace(LB(2)+tolx,UB(2)-tolx,Nx);
12 | dx1 = x1(2)-x1(1);
13 | dx2 = x2(2)-x2(1);
14 |
15 | xx = combvec(x1,x2)';
16 |
17 | if isa(vp,'function_handle'); fun = vp; vpflag = false; else; vpflag = true; end
18 |
19 | if vpflag
20 | yy = vbmc_pdf(vp,xx);
21 | else
22 | lnyy = zeros(size(xx,1),1);
23 | for ii = 1:size(xx,1)
24 | lnyy(ii) = fun(xx(ii,:));
25 | end
26 | yy = exp(lnyy);
27 | Z = sum(yy(:))*dx1*dx2;
28 | yy = yy/Z;
29 | end
30 |
31 | s = contour(x1,x2,reshape(yy',[Nx,Nx])');
32 |
33 | if vpflag
34 | % Plot component centers
35 | if plotflag
36 | mu = warpvars_vbmc(vp.mu','inv',vp.trinfo);
37 | hold on;
38 | plot(mu(:,1),mu(:,2),'xr','LineStyle','none');
39 | end
40 |
41 | % Plot data
42 | if ~isempty(gp)
43 | X = warpvars_vbmc(gp.X,'inv',vp.trinfo);
44 | plot(X(:,1),X(:,2),'.k','LineStyle','none');
45 | end
46 | end
47 |
48 | % s.EdgeColor = 'None';
49 | view([0 90]);
50 | xlabel('x_1');
51 | ylabel('x_2');
52 | set(gca,'XTickLabel',[],'YTickLabel',[]);
53 |
54 | xlim([LB(1),UB(1)]);
55 | ylim([LB(2),UB(2)]);
56 | set(gca,'TickLength',get(gca,'TickLength')*2);
57 |
58 | set(gca,'TickDir','out');
59 | set(gcf,'Color','w');
60 |
61 | end
--------------------------------------------------------------------------------
/private/vbmc_termination.m:
--------------------------------------------------------------------------------
1 | function [optimState,stats,isFinished_flag,exitflag,action,msg] = vbmc_termination(optimState,action,stats,options)
2 | %VBMC_TERMINATION Compute stability index and check termination conditions.
3 |
4 | iter = optimState.iter;
5 | exitflag = 0;
6 | isFinished_flag = false;
7 | msg = [];
8 |
9 | % Maximum number of new function evaluations
10 | if optimState.funccount >= options.MaxFunEvals
11 | isFinished_flag = true;
12 | msg = 'Inference terminated: reached maximum number of function evaluations OPTIONS.MaxFunEvals.';
13 | end
14 |
15 | % Maximum number of iterations
16 | if iter >= options.MaxIter
17 | isFinished_flag = true;
18 | msg = 'Inference terminated: reached maximum number of iterations OPTIONS.MaxIter.';
19 | end
20 |
21 | % Quicker stability check for entropy switching
22 | if optimState.EntropySwitch
23 | TolStableIters = options.TolStableEntropyIters;
24 | else
25 | TolStableIters = ceil(options.TolStableCount/options.FunEvalsPerIter);
26 | end
27 |
28 | % Reached stable variational posterior with stable ELBO and low uncertainty
29 | [idx_stable,dN,dN_last,w] = getStableIter(stats,optimState,options);
30 | if ~isempty(idx_stable)
31 | sKL_list = stats.sKL;
32 | elbo_list = stats.elbo;
33 |
34 | sn = sqrt(optimState.sn2hpd);
35 | TolSN = sqrt(sn/options.TolSD)*options.TolSD;
36 | TolSD = min(max(options.TolSD,TolSN),options.TolSD*10);
37 |
38 | rindex_vec(1) = abs(elbo_list(iter) - elbo_list(iter-1)) / TolSD;
39 | rindex_vec(2) = stats.elbo_sd(iter) / TolSD;
40 | rindex_vec(3) = sKL_list(iter) / options.TolsKL; % This should be fixed
41 |
42 | % Stop sampling after sample variance has stabilized below ToL
43 | if ~isempty(idx_stable) && optimState.StopSampling == 0 && ~optimState.Warmup
44 | varss_list = stats.gpSampleVar;
45 | if sum(w.*varss_list(idx_stable:iter)) < options.TolGPVarMCMC
46 | optimState.StopSampling = optimState.N;
47 | end
48 | end
49 |
50 | % Compute average ELCBO improvement per fcn eval in the past few iters
51 | idx0 = max(1,iter-ceil(0.5*TolStableIters)+1);
52 | xx = stats.funccount(idx0:iter);
53 | yy = stats.elbo(idx0:iter) - options.ELCBOImproWeight*stats.elbo_sd(idx0:iter);
54 | p = polyfit(xx,yy,1);
55 | ELCBOimpro = p(1);
56 |
57 | else
58 | rindex_vec = Inf(1,3);
59 | ELCBOimpro = NaN;
60 | end
61 |
62 | % Store reliability index
63 | rindex = mean(rindex_vec);
64 | stats.rindex(iter) = rindex;
65 | stats.elcbo_impro(iter) = ELCBOimpro;
66 | optimState.R = rindex;
67 |
68 | % Check stability termination condition
69 | stableflag = false;
70 | if iter >= TolStableIters && ...
71 | rindex < 1 && ...
72 | ELCBOimpro < options.TolImprovement
73 |
74 | % Count how many good iters in the recent past (excluding current)
75 | stablecount = sum(stats.rindex(iter-TolStableIters+1:iter-1) < 1);
76 |
77 | % Iteration is stable if almost all recent iterations are stable
78 | if stablecount >= TolStableIters - floor(TolStableIters*options.TolStableExcptFrac) - 1
79 | % If stable but entropy switch is ON, turn it off and continue
80 | if optimState.EntropySwitch && isfinite(options.EntropyForceSwitch)
81 | optimState.EntropySwitch = false;
82 | if isempty(action); action = 'entropy switch'; else; action = [action ', entropy switch']; end
83 | else
84 | % Allow termination only if distant from last warping
85 | if (iter - optimState.LastSuccessfulWarping) >= TolStableIters/3
86 | isFinished_flag = true;
87 | exitflag = 1;
88 | msg = 'Inference terminated: variational solution stable for OPTIONS.TolStableCount fcn evaluations.';
89 | end
90 | stableflag = true;
91 | if isempty(action); action = 'stable'; else; action = [action ', stable']; end
92 | end
93 | end
94 | end
95 | stats.stable(iter) = stableflag; % Store stability flag
96 |
97 | % Prevent early termination
98 | if optimState.funccount < options.MinFunEvals || ...
99 | optimState.iter < options.MinIter
100 | isFinished_flag = false;
101 | end
102 |
103 | end
104 |
105 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
106 | function [idx_stable,dN,dN_last,w] = getStableIter(stats,optimState,options)
107 | %GETSTABLEITER Find index of starting stable iteration.
108 |
109 | iter = optimState.iter;
110 | idx_stable = [];
111 | dN = []; dN_last = []; w = [];
112 |
113 | if optimState.iter < 3; return; end
114 |
115 | if ~isempty(stats)
116 | N_list = stats.N;
117 | idx_stable = 1;
118 | if ~isempty(idx_stable)
119 | dN = optimState.N - N_list(idx_stable);
120 | dN_last = N_list(end) - N_list(end-1);
121 | end
122 |
123 | % Compute weighting function
124 | Nw = numel(idx_stable:iter);
125 | w1 = zeros(1,Nw);
126 | w1(end) = 1;
127 | w2 = exp(-(stats.N(end) - stats.N(end-Nw+1:end))/10);
128 | w2 = w2 / sum(w2);
129 | w = 0.5*w1 + 0.5*w2;
130 |
131 | end
132 |
133 | end
134 |
135 |
--------------------------------------------------------------------------------
/rosenbrock_test.m:
--------------------------------------------------------------------------------
1 | function [y,s] = rosenbrock_test(x,sigma)
2 | %ROSENBROCKS_TEST Rosenbrock's broad 'banana' function.
3 |
4 | if nargin < 2 || isempty(sigma); sigma = 0; end
5 |
6 | % Likelihood according to a broad Rosenbrock's function
7 | y = -sum((x(:,1:end-1) .^2 - x(:,2:end)) .^ 2 + (x(:,1:end-1)-1).^2/100,2);
8 |
9 | % Noisy test
10 | if sigma > 0
11 | n = size(x,1);
12 | y = y + sigma*randn([n,1]);
13 | if nargout > 1
14 | s = sigma*ones(n,1);
15 | end
16 | end
17 |
18 | % Might want to add a prior, such as
19 | % sigma2 = 9; % Prior variance
20 | % y = y - 0.5*sum(x.^2,2)/sigma2 - 0.5*D*log(2*pi*sigma2);
--------------------------------------------------------------------------------
/shared/msmoothboxlogpdf.m:
--------------------------------------------------------------------------------
1 | function y = msmoothboxlogpdf(x,a,b,sigma)
2 | %MSMOOTHBOXLOGPDF Multivariate smooth-box log probability density function.
3 | % Y = MSMOOTHBOXLOGPDF(X,A,B,SIGMA) returns the logarithm of the pdf of
4 | % the multivariate smooth-box distribution with pivots A and B and scale
5 | % SIGMA, evaluated at the values in X. The multivariate smooth-box pdf is
6 | % the product of univariate smooth-box pdfs in each dimension.
7 | %
8 | % For each dimension i, the univariate smooth-box pdf is defined as a
9 | % uniform distribution between pivots A(i), B(i) and Gaussian tails that
10 | % fall starting from p(A(i)) to the left (resp., p(B(i)) to the right)
11 | % with standard deviation SIGMA(i).
12 | %
13 | % X can be a matrix, where each row is a separate point and each column
14 | % is a different dimension. Similarly, A, B, and SIGMA can also be
15 | % matrices of the same size as X.
16 | %
17 | % The log pdf is typically preferred in numerical computations involving
18 | % probabilities, as it is more stable.
19 | %
20 | % See also MSMOOTHBOXPDF, MSMOOTHBOXRND.
21 |
22 | % Luigi Acerbi 2022
23 |
24 | [N,D] = size(x);
25 |
26 | if any(sigma(:) <= 0)
27 | error('msmoothboxpdf:NonPositiveSigma', ...
28 | 'All elements of SIGMA should be positive.');
29 | end
30 |
31 | if D > 1
32 | if isscalar(a); a = a*ones(1,D); end
33 | if isscalar(b); b = b*ones(1,D); end
34 | if isscalar(sigma); sigma = sigma*ones(1,D); end
35 | end
36 |
37 | if size(a,2) ~= D || size(b,2) ~= D || size(sigma,2) ~= D
38 | error('msmoothboxpdf:SizeError', ...
39 | 'A, B, SIGMA should be scalars or have the same number of columns as X.');
40 | end
41 |
42 | if size(a,1) == 1; a = repmat(a,[N,1]); end
43 | if size(b,1) == 1; b = repmat(b,[N,1]); end
44 | if size(sigma,1) == 1; sigma = repmat(sigma,[N,1]); end
45 |
46 | if any(a(:) >= b(:))
47 | error('msmoothboxpdf:OrderError', ...
48 | 'For all elements of A and B, the order A < B should hold.');
49 | end
50 |
51 | y = -inf(size(x));
52 | lnf = log(1/sqrt(2*pi)./sigma) - log1p(1/sqrt(2*pi)./sigma.*(b - a));
53 |
54 | for ii = 1:D
55 | idx = x(:,ii) < a(:,ii);
56 | y(idx,ii) = lnf(idx,ii) - 0.5*((x(idx,ii) - a(idx,ii))./sigma(idx,ii)).^2;
57 |
58 | idx = x(:,ii) >= a(:,ii) & x(:,ii) <= b(:,ii);
59 | y(idx,ii) = lnf(idx,ii);
60 |
61 | idx = x(:,ii) > b(:,ii);
62 | y(idx,ii) = lnf(idx,ii) - 0.5*((x(idx,ii) - b(idx,ii))./sigma(idx,ii)).^2;
63 | end
64 |
65 | y = sum(y,2);
--------------------------------------------------------------------------------
/shared/msmoothboxpdf.m:
--------------------------------------------------------------------------------
1 | function y = msmoothboxpdf(x,a,b,sigma)
2 | %MSMOOTHBOXPDF Multivariate smooth-box probability density function.
3 | % Y = MSMOOTHBOXPDF(X,A,B,SIGMA) returns the pdf of the multivariate
4 | % smooth-box distribution with pivots A and B and scale SIGMA, evaluated
5 | % at the values in X. The multivariate smooth-box pdf is the product of
6 | % univariate smooth-box pdfs in each dimension.
7 | %
8 | % For each dimension i, the univariate smooth-box pdf is defined as a
9 | % uniform distribution between pivots A(i), B(i) and Gaussian tails that
10 | % fall starting from p(A(i)) to the left (resp., p(B(i)) to the right)
11 | % with standard deviation SIGMA(i).
12 | %
13 | % X can be a matrix, where each row is a separate point and each column
14 | % is a different dimension. Similarly, A, B, and SIGMA can also be
15 | % matrices of the same size as X.
16 | %
17 | % See also MSMOOTHBOXLOGPDF, MSMOOTHBOXRND.
18 |
19 | % Luigi Acerbi 2022
20 |
21 | y = exp(msmoothboxlogpdf(x,a,b,sigma));
--------------------------------------------------------------------------------
/shared/msmoothboxrnd.m:
--------------------------------------------------------------------------------
1 | function r = msmoothboxrnd(a,b,sigma,n)
2 | %MSMOOTHBOXRND Random arrays from the multivariate smooth-box distribution.
3 | % R = MSMOOTHBOXRND(A,B,SIGMA) returns an N-by-D matrix R of random
4 | % vectors chosen from the multivariate smooth-box distribution
5 | % with pivots A and B and scale SIGMA. A, B and SIGMA are N-by-D matrices,
6 | % and MSMOOTHBOXRND generates each row of R using the corresponding row
7 | % of A, B and SIGMA.
8 | %
9 | % R = MSMOOTHBOXRND(A,B,SIGMA,N) returns a N-by-D matrix R of random
10 | % vectors chosen from the multivariate smooth-box distribution
11 | % with pivots A and B and scale SIGMA.
12 | %
13 | % See also MSMOOTHBOXPDF.
14 |
15 | % Luigi Acerbi 2022
16 |
17 | [Na,Da] = size(a);
18 | [Nb,Db] = size(b);
19 | [Nsigma,Dsigma] = size(sigma);
20 |
21 | if any(sigma(:) <= 0)
22 | error('msmoothboxrnd:NonPositiveSigma', ...
23 | 'All elements of SIGMA should be positive.');
24 | end
25 |
26 | if nargin < 4 || isempty(n)
27 | n = max([Na,Nb,Nsigma]);
28 | else
29 | if (Na ~= 1 && Na ~= n) || (Nb ~= 1 && Nb ~= n) || ...
30 | (Nsigma ~= 1 && Nsigma ~= n)
31 | error('msmoothboxrnd:SizeError', ...
32 | 'A, B, SIGMA should be 1-by-D or N-by-D arrays.');
33 | end
34 | end
35 | if Na ~= Nb || Da ~= Db || Na ~= Nsigma || Da ~= Dsigma
36 | error('msmoothboxrnd:SizeError', ...
37 | 'A, B, SIGMA should be arrays of the same size.');
38 | end
39 |
40 | D = Da;
41 |
42 | if size(a,1) == 1; a = repmat(a,[n,1]); end
43 | if size(b,1) == 1; b = repmat(b,[n,1]); end
44 | if size(sigma,1) == 1; sigma = repmat(sigma,[n,1]); end
45 |
46 | r = zeros(n,D);
47 |
48 | nf = 1 + 1/sqrt(2*pi)./sigma.*(b - a);
49 |
50 | % Sample one dimension at a time
51 | for d = 1:D
52 | % Draw component (left/right tails or plateau)
53 | u = nf(:,d) .* rand(n,1);
54 |
55 | % Left Gaussian tails
56 | idx = u < 0.5;
57 | if any(idx)
58 | z1 = abs(randn(sum(idx),1).*sigma(idx,d));
59 | r(idx,d) = a(idx) - z1;
60 | end
61 |
62 | % Right Gaussian tails
63 | idx = (u >= 0.5 & u < 1);
64 | if any(idx)
65 | z1 = abs(randn(sum(idx),1).*sigma(idx,d));
66 | r(idx,d) = b(idx) + z1;
67 | end
68 |
69 | % Plateau
70 | idx = u >= 1;
71 | if any(idx)
72 | r(idx,d) = a(idx,d) + (b(idx,d) - a(idx,d)).*rand(sum(idx),1);
73 | end
74 | end
--------------------------------------------------------------------------------
/shared/msplinetrapezlogpdf.m:
--------------------------------------------------------------------------------
1 | function y = msplinetrapezlogpdf(x,a,b,c,d)
2 | %MSPLINETRAPEZLOGPDF Multivariate spline-trapezoidal log pdf.
3 | % Y = MSPLINETRAPEZLOGPDF(X,A,B,C,D) returns the logarithm of the pdf of
4 | % the multivariate spline-trapezoidal distribution with external bounds
5 | % A and D and internal points B and C, evaluated at the values in X. The
6 | % multivariate pdf is the product of univariate spline-trapezoidal pdfs
7 | % in each dimension.
8 | %
9 | % For each dimension i, the univariate spline-trapezoidal pdf is defined
10 | % as a trapezoidal pdf whose points A, B and C, D are connected by cubic
11 | % splines such that the pdf is continuous and its derivatives at A, B, C,
12 | % and D are zero (so the derivatives are also continuous):
13 | %
14 | % | __________
15 | % | /| |\
16 | % p(X(i)) | / | | \
17 | % | / | | \
18 | % |___/___|________|___\____
19 | % A(i) B(i) C(i) D(i)
20 | % X(i)
21 | %
22 | % X can be a matrix, where each row is a separate point and each column
23 | % is a different dimension. Similarly, A, B, C, and D can also be
24 | % matrices of the same size as X.
25 | %
26 | % The log pdf is typically preferred in numerical computations involving
27 | % probabilities, as it is more stable.
28 | %
29 | % See also MSPLINETRAPEZPDF, MSPLINETRAPEZRND.
30 |
31 | % Luigi Acerbi 2022
32 |
33 | [N,D] = size(x);
34 |
35 | if D > 1
36 | if isscalar(a); a = a*ones(1,D); end
37 | if isscalar(b); b = b*ones(1,D); end
38 | if isscalar(c); c = c*ones(1,D); end
39 | if isscalar(d); d = d*ones(1,D); end
40 | end
41 |
42 | if size(a,2) ~= D || size(b,2) ~= D || size(c,2) ~= D || size(d,2) ~= D
43 | error('msplinetrapezlogpdf:SizeError', ...
44 | 'A, B, C, D should be scalars or have the same number of columns as X.');
45 | end
46 |
47 | if size(a,1) == 1; a = repmat(a,[N,1]); end
48 | if size(b,1) == 1; b = repmat(b,[N,1]); end
49 | if size(c,1) == 1; c = repmat(c,[N,1]); end
50 | if size(d,1) == 1; d = repmat(d,[N,1]); end
51 |
52 | y = -inf(size(x));
53 | % Normalization factor
54 | % nf = c - b + 0.5*(d - c + b - a);
55 | lnf = log(0.5*(c - b + d - a));
56 |
57 | for ii = 1:D
58 | idx = x(:,ii) >= a(:,ii) & x(:,ii) < b(:,ii);
59 | z = (x(idx,ii) - a(idx,ii))./(b(idx,ii) - a(idx,ii));
60 | y(idx,ii) = log(-2*z.^3 + 3*z.^2) - lnf(idx,ii);
61 |
62 | idx = x(:,ii) >= b(:,ii) & x(:,ii) < c(:,ii);
63 | y(idx,ii) = -lnf(idx,ii);
64 |
65 | idx = x(:,ii) >= c(:,ii) & x(:,ii) < d(:,ii);
66 | z = 1 - (x(idx,ii) - c(idx,ii)) ./ (d(idx,ii) - c(idx,ii));
67 | y(idx,ii) = log(-2*z.^3 + 3*z.^2) - lnf(idx,ii);
68 | end
69 |
70 | y = sum(y,2);
71 |
72 |
--------------------------------------------------------------------------------
/shared/msplinetrapezpdf.m:
--------------------------------------------------------------------------------
1 | function y = msplinetrapezpdf(x,a,b,c,d)
2 | %MSPLINETRAPEZPDF Multivariate spline-trapezoidal probability density fcn (pdf).
3 | % Y = MSPLINETRAPEZPDF(X,A,B,C,D) returns the pdf of the multivariate
4 | % spline-trapezoidal distribution with external bounds A and D and internal
5 | % points B and C, evaluated at the values in X. The multivariate pdf is
6 | % the product of univariate spline-trapezoidal pdfs in each dimension.
7 | %
8 | % For each dimension i, the univariate spline-trapezoidal pdf is defined
9 | % as a trapezoidal pdf whose points A, B and C, D are connected by cubic
10 | % splines such that the pdf is continuous and its derivatives at A, B, C,
11 | % and D are zero (so the derivatives are also continuous):
12 | %
13 | % | __________
14 | % | /| |\
15 | % p(X(i)) | / | | \
16 | % | / | | \
17 | % |___/___|________|___\____
18 | % A(i) B(i) C(i) D(i)
19 | % X(i)
20 | %
21 | % X can be a matrix, where each row is a separate point and each column
22 | % is a different dimension. Similarly, A, B, C, and D can also be
23 | % matrices of the same size as X.
24 | %
25 | % See also MSPLINETRAPEZLOGPDF, MSPLINETRAPEZRND.
26 |
27 | % Luigi Acerbi 2022
28 |
29 | y = exp(msplinetrapezlogpdf(x,a,b,c,d));
--------------------------------------------------------------------------------
/shared/msplinetrapezrnd.m:
--------------------------------------------------------------------------------
1 | function r = msplinetrapezrnd(a,u,v,b,n)
2 | %MSPLINETRAPEZRND Random arrays from the multivariate spline-trapezoidal distribution.
3 | % R = MSPLINETRAPEZRND(A,U,V,B) returns an N-by-D matrix R of random
4 | % vectors chosen from the multivariate spline-trapezoidal distribution
5 | % with external bounds A and B and internal points U and V. A, U, V and B
6 | % are N-by-D matrices, and MSPLINETRAPEZRND generates each row of R using
7 | % the corresponding row of A, U, V and B.
8 | %
9 | % R = MSPLINETRAPEZRND(A,U,V,B,N) returns a N-by-D matrix R of random
10 | % vectors chosen from the multivariate spline-trapezoidal distribution
11 | % with external bounds A and B and internal points U and V.
12 | %
13 | % See also MSPLINETRAPEZPDF.
14 |
15 | % Luigi Acerbi 2022
16 |
17 | [Na,Da] = size(a);
18 | [Nu,Du] = size(u);
19 | [Nv,Dv] = size(v);
20 | [Nb,Db] = size(b);
21 |
22 | if nargin < 3 || isempty(n)
23 | n = max([Na,Nu,Nv,Nb]);
24 | else
25 | if (Na ~= 1 && Na ~= n) || (Nb ~= 1 && Nb ~= n) || ...
26 | (Nu ~= 1 && Nu ~= n) || (Nv ~= 1 && Nv ~= n)
27 | error('msplinetrapezrnd:SizeError', ...
28 | 'A, U, V, B should be 1-by-D or N-by-D arrays.');
29 | end
30 | end
31 | if Na ~= Nb || Da ~= Db || Na ~= Nu || Da ~= Du || Na ~= Nv || Da ~= Dv
32 | error('msplinetrapezrnd:SizeError', ...
33 | 'A, U, V, B should be arrays of the same size.');
34 | end
35 |
36 | D = Da;
37 |
38 | if size(a,1) == 1; a = repmat(a,[n,1]); end
39 | if size(u,1) == 1; u = repmat(u,[n,1]); end
40 | if size(v,1) == 1; v = repmat(v,[n,1]); end
41 | if size(b,1) == 1; b = repmat(b,[n,1]); end
42 |
43 | r = zeros(n,D);
44 |
45 | % Sample one dimension at a time
46 | for d = 1:D
47 | % Compute maximum of one-dimensional pdf
48 | x0 = 0.5*(u(:,d) + v(:,d));
49 | y_max = msplinetrapezpdf(x0,a(:,d),u(:,d),v(:,d),b(:,d));
50 |
51 | idx = true(n,1);
52 | r1 = zeros(n,1);
53 | n1 = sum(idx);
54 |
55 | % Keep doing rejection sampling
56 | while n1 > 0
57 | % Uniform sampling in the box
58 | r1(idx) = bsxfun(@plus, a(idx,d), bsxfun(@times, rand(n1,1), b(idx,d) - a(idx,d)));
59 |
60 | % Rejection sampling
61 | z1 = rand(n1,1) .* y_max(idx);
62 | y1 = msplinetrapezpdf(r1(idx),a(idx,d),u(idx,d),v(idx,d),b(idx,d));
63 |
64 | idx_new = false(n,1);
65 | idx_new(idx) = z1 > y1; % Resample points outside
66 |
67 | idx = idx_new;
68 | n1 = sum(idx);
69 | end
70 |
71 | % Assign d-th dimension
72 | r(:,d) = r1;
73 | end
--------------------------------------------------------------------------------
/shared/mtrapezlogpdf.m:
--------------------------------------------------------------------------------
1 | function y = mtrapezlogpdf(x,a,u,v,b)
2 | %MTRAPEZLOGPDF Multivariate trapezoidal probability log pdf.
3 | % Y = MTRAPEZLOGPDF(X,A,U,V,B) returns the logarithm of the pdf of the
4 | % multivariate trapezoidal distribution with external bounds A and B and
5 | % internal points U and V, evaluated at the values in X. The multivariate
6 | % trapezoidal pdf is the product of univariate trapezoidal pdfs in each
7 | % dimension.
8 | %
9 | % For each dimension i, the univariate trapezoidal pdf is defined as:
10 | %
11 | % | __________
12 | % | /| |\
13 | % p(X(i)) | / | | \
14 | % | / | | \
15 | % |___/___|________|___\____
16 | % A(i) U(i) V(i) B(i)
17 | % X(i)
18 | %
19 | % X can be a matrix, where each row is a separate point and each column
20 | % is a different dimension. Similarly, A, B, C, and D can also be
21 | % matrices of the same size as X.
22 | %
23 | % The log pdf is typically preferred in numerical computations involving
24 | % probabilities, as it is more stable.
25 | %
26 | % See also MTRAPEZPDF, MTRAPEZRND.
27 |
28 | % Luigi Acerbi 2022
29 |
30 | [N,D] = size(x);
31 |
32 | if D > 1
33 | if isscalar(a); a = a*ones(1,D); end
34 | if isscalar(u); u = u*ones(1,D); end
35 | if isscalar(v); v = v*ones(1,D); end
36 | if isscalar(b); b = b*ones(1,D); end
37 | end
38 |
39 | if size(a,2) ~= D || size(u,2) ~= D || size(v,2) ~= D || size(b,2) ~= D
40 | error('mtrapezpdf:SizeError', ...
41 | 'A, B, C, D should be scalars or have the same number of columns as X.');
42 | end
43 |
44 | if size(a,1) == 1; a = repmat(a,[N,1]); end
45 | if size(u,1) == 1; u = repmat(u,[N,1]); end
46 | if size(v,1) == 1; v = repmat(v,[N,1]); end
47 | if size(b,1) == 1; b = repmat(b,[N,1]); end
48 |
49 | y = -inf(size(x));
50 | lnf = log(0.5) + log(b - a + v - u) + log(u - a);
51 |
52 | for ii = 1:D
53 | idx = x(:,ii) >= a(:,ii) & x(:,ii) < u(:,ii);
54 | y(idx,ii) = log(x(idx,ii) - a(idx,ii)) - lnf(idx,ii);
55 |
56 | idx = x(:,ii) >= u(:,ii) & x(:,ii) < v(:,ii);
57 | y(idx,ii) = log(u(idx,ii)-a(idx,ii)) - lnf(idx,ii);
58 |
59 | idx = x(:,ii) >= v(:,ii) & x(:,ii) < b(:,ii);
60 | y(idx,ii) = log(b(idx,ii) - x(idx,ii)) - log(b(idx,ii) - v(idx,ii)) + log(u(idx,ii)-a(idx,ii)) - lnf(idx,ii);
61 | end
62 |
63 | y = sum(y,2);
--------------------------------------------------------------------------------
/shared/mtrapezpdf.m:
--------------------------------------------------------------------------------
1 | function y = mtrapezpdf(x,a,u,v,b)
2 | %MTRAPEZPDF Multivariate trapezoidal probability density function (pdf).
3 | % Y = MTRAPEZPDF(X,A,U,V,B) returns the pdf of the multivariate trapezoidal
4 | % distribution with external bounds A and B and internal points U and V,
5 | % evaluated at the values in X. The multivariate trapezoidal
6 | % pdf is the product of univariate trapezoidal pdfs in each dimension.
7 | %
8 | % For each dimension i, the univariate trapezoidal pdf is defined as:
9 | %
10 | % | __________
11 | % | /| |\
12 | % p(X(i)) | / | | \
13 | % | / | | \
14 | % |___/___|________|___\____
15 | % A(i) U(i) V(i) B(i)
16 | % X(i)
17 | %
18 | % X can be a matrix, where each row is a separate point and each column
19 | % is a different dimension. Similarly, A, B, C, and D can also be
20 | % matrices of the same size as X.
21 | %
22 | % See also MTRAPEZLOGPDF, MTRAPEZRND.
23 |
24 | % Luigi Acerbi 2022
25 |
26 | y = exp(mtrapezlogpdf(x,a,u,v,b));
--------------------------------------------------------------------------------
/shared/mtrapezrnd.m:
--------------------------------------------------------------------------------
1 | function r = mtrapezrnd(a,u,v,b,n)
2 | %MTRAPEZRND Random arrays from the multivariate trapezoidal distribution.
3 | % R = MTRAPEZRND(A,U,V,B) returns an N-by-D matrix R of random vectors
4 | % chosen from the multivariate trapezoidal distribution with external
5 | % bounds A and B and internal points U and V. A, U, V and B are N-by-D
6 | % matrices, and MTRAPEZRND generates each row of R using the corresponding
7 | % row of A, U, V and B.
8 | %
9 | % R = MTRAPEZRND(A,U,V,B,N) returns a N-by-D matrix R of random vectors
10 | % chosen from the multivariate trapezoidal distribution with external
11 | % bounds A and B and internal points U and V.
12 | %
13 | % See also MTRAPEZPDF.
14 |
15 | % Luigi Acerbi 2022
16 |
17 | [Na,Da] = size(a);
18 | [Nu,Du] = size(u);
19 | [Nv,Dv] = size(v);
20 | [Nb,Db] = size(b);
21 |
22 | if nargin < 3 || isempty(n)
23 | n = max([Na,Nu,Nv,Nb]);
24 | else
25 | if (Na ~= 1 && Na ~= n) || (Nb ~= 1 && Nb ~= n) || ...
26 | (Nu ~= 1 && Nu ~= n) || (Nv ~= 1 && Nv ~= n)
27 | error('mtrapezrnd:SizeError', ...
28 | 'A, U, V, B should be 1-by-D or N-by-D arrays.');
29 | end
30 | end
31 | if Na ~= Nb || Da ~= Db || Na ~= Nu || Da ~= Du || Na ~= Nv || Da ~= Dv
32 | error('mtrapezrnd:SizeError', ...
33 | 'A, U, V, B should be arrays of the same size.');
34 | end
35 |
36 | D = Da;
37 |
38 | if size(a,1) == 1; a = repmat(a,[n,1]); end
39 | if size(u,1) == 1; u = repmat(u,[n,1]); end
40 | if size(v,1) == 1; v = repmat(v,[n,1]); end
41 | if size(b,1) == 1; b = repmat(b,[n,1]); end
42 |
43 | r = zeros(n,D);
44 |
45 | % Sample one dimension at a time
46 | for d = 1:D
47 | % Compute maximum of one-dimensional pdf
48 | x0 = 0.5*(u(:,d) + v(:,d));
49 | y_max = mtrapezpdf(x0,a(:,d),u(:,d),v(:,d),b(:,d));
50 |
51 | idx = true(n,1);
52 | r1 = zeros(n,1);
53 | n1 = sum(idx);
54 |
55 | % Keep doing rejection sampling
56 | while n1 > 0
57 | % Uniform sampling in the box
58 | r1(idx) = bsxfun(@plus, a(idx,d), bsxfun(@times, rand(n1,1), b(idx,d) - a(idx,d)));
59 |
60 | % Rejection sampling
61 | z1 = rand(n1,1) .* y_max(idx);
62 | y1 = mtrapezpdf(r1(idx),a(idx,d),u(idx,d),v(idx,d),b(idx,d));
63 |
64 | idx_new = false(n,1);
65 | idx_new(idx) = z1 > y1; % Resample points outside
66 |
67 | idx = idx_new;
68 | n1 = sum(idx);
69 | end
70 |
71 | % Assign d-th dimension
72 | r(:,d) = r1;
73 | end
--------------------------------------------------------------------------------
/shared/munifboxlogpdf.m:
--------------------------------------------------------------------------------
1 | function y = munifboxlogpdf(x,a,b)
2 | %MUNIFBOXLOGPDF Multivariate uniform box log probability density function.
3 | % Y = MUNIFBOXLOGPDF(X,A,B) returns the logarithm of the pdf of the
4 | % multivariate uniform-box distribution with bounds A and B, evaluated at
5 | % the values in X. The multivariate uniform box pdf is the product of
6 | % univariate uniform pdfs in each dimension.
7 | %
8 | % For each dimension i, the univariate uniform-box pdf is defined as:
9 | %
10 | % |
11 | % | ______________
12 | % p(X(i)) | | |
13 | % | | |
14 | % |___|____________|_____
15 | % A(i) B(i)
16 | % X(i)
17 | %
18 | % X can be a matrix, where each row is a separate point and each column
19 | % is a different dimension. Similarly, A and B can also be matrices of
20 | % the same size as X.
21 | %
22 | % The log pdf is typically preferred in numerical computations involving
23 | % probabilities, as it is more stable.
24 | %
25 | % See also MUNIFBOXPDF, MUNIFBOXRND.
26 |
27 | % Luigi Acerbi 2022
28 |
29 | [N,D] = size(x);
30 |
31 | if D > 1
32 | if isscalar(a); a = a*ones(1,D); end
33 | if isscalar(b); b = b*ones(1,D); end
34 | end
35 |
36 | if size(a,2) ~= D || size(b,2) ~= D
37 | error('munifboxlogpdf:SizeError', ...
38 | 'A, B should be scalars or have the same number of columns as X.');
39 | end
40 |
41 | if size(a,1) == 1; a = repmat(a,[N,1]); end
42 | if size(b,1) == 1; b = repmat(b,[N,1]); end
43 |
44 | if any(a(:) >= b(:))
45 | error('munifboxlogpdf:OrderError', ...
46 | 'For all elements of A and B, the order A < B should hold.');
47 | end
48 |
49 | lnf = sum(log(b - a),2);
50 | y = -lnf .* ones(N,1);
51 | idx = any(bsxfun(@lt, x, a),2) | any(bsxfun(@gt, x, b),2);
52 | y(idx) = -inf;
--------------------------------------------------------------------------------
/shared/munifboxpdf.m:
--------------------------------------------------------------------------------
1 | function y = munifboxpdf(x,a,b)
2 | %MUNIFBOXPDF Multivariate uniform box probability density function.
3 | % Y = MUNIFBOXPDF(X,A,B) returns the pdf of the multivariate uniform-box
4 | % distribution with bounds A and B, evaluated at the values in X. The
5 | % multivariate uniform box pdf is the product of univariate uniform
6 | % pdfs in each dimension.
7 | %
8 | % For each dimension i, the univariate uniform-box pdf is defined as:
9 | %
10 | % |
11 | % | ______________
12 | % p(X(i)) | | |
13 | % | | |
14 | % |___|____________|_____
15 | % A(i) B(i)
16 | % X(i)
17 | %
18 | % X can be a matrix, where each row is a separate point and each column
19 | % is a different dimension. Similarly, A and B can also be matrices of
20 | % the same size as X.
21 | %
22 | % See also MUNIFBOXLOGPDF, MUNIFBOXRND.
23 |
24 | % Luigi Acerbi 2022
25 |
26 | y = exp(munifboxlogpdf(x,a,b));
--------------------------------------------------------------------------------
/shared/munifboxrnd.m:
--------------------------------------------------------------------------------
1 | function r = munifboxrnd(a,b,n)
2 | %MUNIFBOXRND Random arrays from the multivariate uniform box distribution.
3 | % R = MUNIFBOXRND(A,B) returns an N-by-D matrix R of random vectors
4 | % chosen from the multivariate uniform box distribution with bounds A and
5 | % B. A and B are N-by-D matrices, and MUNIFBOXRND generates each row of R
6 | % using the corresponding row of A and B.
7 | %
8 | % R = MUNIFBOXRND(A,B,N) returns a N-by-D matrix R of random vectors
9 | % chosen from the multivariate uniform box distribution with 1-by-D bound
10 | % vectors A and B.
11 | %
12 | % See also MUNIFBOXPDF.
13 |
14 | % Luigi Acerbi 2022
15 |
16 | [N,D] = size(a);
17 | [Nb,Db] = size(b);
18 |
19 | if nargin < 3 || isempty(n)
20 | n = N;
21 | else
22 | if (N ~= 1 && N ~= n) || (Nb ~= 1 && Nb ~= n)
23 | error('munifboxrnd:SizeError', ...
24 | 'A and B should be 1-by-D or N-by-D arrays.');
25 | end
26 | end
27 | if N ~= Nb || D ~= Db
28 | error('munifboxrnd:SizeError', ...
29 | 'A and B should be arrays of the same size.');
30 | end
31 |
32 | if any(a(:) >= b(:))
33 | error('munifboxpdf:OrderError', ...
34 | 'For all elements of A and B, the order A < B should hold.');
35 | end
36 |
37 |
38 | r = bsxfun(@plus, a, bsxfun(@times, rand(n,D), b - a));
--------------------------------------------------------------------------------
/shared/mvnkl.m:
--------------------------------------------------------------------------------
1 | function [kl1,kl2] = mvnkl(Mu1,Sigma1,Mu2,Sigma2)
2 | %MVNKL Kullback-Leibler divergence between two multivariate normal pdfs.
3 |
4 | D = numel(Mu1);
5 |
6 | Mu1 = Mu1(:);
7 | Mu2 = Mu2(:);
8 |
9 | dmu = Mu2 - Mu1;
10 | detq1 = det(Sigma1);
11 | detq2 = det(Sigma2);
12 | lndet = log(detq2 / detq1);
13 |
14 | kl1 = 0.5*(trace(Sigma2\Sigma1) + dmu'*(Sigma2\dmu) - D + lndet);
15 | if nargout > 1
16 | kl2 = 0.5*(trace(Sigma1\Sigma2) + dmu'*(Sigma1\dmu) - D - lndet);
17 | end
--------------------------------------------------------------------------------
/shared/qtrapz.m:
--------------------------------------------------------------------------------
1 | function z = qtrapz(y,dim)
2 | %QTRAPZ Quick trapezoidal numerical integration.
3 | % Z = QTRAPZ(Y) computes an approximation of the integral of Y via
4 | % the trapezoidal method (with unit spacing). To compute the integral
5 | % for spacing different from one, multiply Z by the spacing increment.
6 | %
7 | % For vectors, QTRAPZ(Y) is the integral of Y. For matrices, QTRAPZ(Y)
8 | % is a row vector with the integral over each column. For N-D
9 | % arrays, QTRAPZ(Y) works across the first non-singleton dimension.
10 | %
11 | % Z = QTRAPZ(Y,DIM) integrates across dimension DIM of Y. The length of X
12 | % must be the same as size(Y,DIM).
13 | %
14 | % QTRAPZ is up to 3-4 times faster than TRAPZ for large arrays.
15 | %
16 | % See also TRAPZ.
17 |
18 | % Luigi Acerbi
19 | % Version 1.0. Release date: Jul/20/2015.
20 |
21 | % By default integrate along the first non-singleton dimension
22 | if nargin < 2; dim = find(size(y)~=1,1); end
23 |
24 | % Behaves as sum on empty array
25 | if isempty(y); z = sum(y,dim); return; end
26 |
27 | % Compute dimensions of input matrix
28 | if isvector(y); n = 1; else n = ndims(y); end
29 |
30 | switch n
31 | case {1,2} % 1-D or 2-D array
32 | switch dim
33 | case 1
34 | z = sum(y,1) - 0.5*(y(1,:) + y(end,:));
35 | case 2
36 | z = sum(y,2) - 0.5*(y(:,1) + y(:,end));
37 | otherwise
38 | error('qtrapz:dimMismatch', 'DIM must specify one of the dimensions of Y.');
39 | end
40 |
41 | case 3 % 3-D array
42 | switch dim
43 | case 1
44 | z = sum(y,1) - 0.5*(y(1,:,:) + y(end,:,:));
45 | case 2
46 | z = sum(y,2) - 0.5*(y(:,1,:) + y(:,end,:));
47 | case 3
48 | z = sum(y,3) - 0.5*(y(:,:,1) + y(:,:,end));
49 | otherwise
50 | error('qtrapz:dimMismatch', 'DIM must specify one of the dimensions of Y.');
51 | end
52 |
53 | case 4 % 4-D array
54 | switch dim
55 | case 1
56 | z = sum(y,1) - 0.5*(y(1,:,:,:) + y(end,:,:,:));
57 | case 2
58 | z = sum(y,2) - 0.5*(y(:,1,:,:) + y(:,end,:,:));
59 | case 3
60 | z = sum(y,3) - 0.5*(y(:,:,1,:) + y(:,:,end,:));
61 | case 4
62 | z = sum(y,4) - 0.5*(y(:,:,:,1) + y(:,:,:,end));
63 | otherwise
64 | error('qtrapz:dimMismatch', 'DIM must specify one of the dimensions of Y.');
65 | end
66 |
67 | otherwise % 5-D array or more
68 | for iDim = 1:n; index{iDim} = 1:size(y,iDim); end
69 | index1 = index; index1{dim} = 1;
70 | indexend = index; indexend{dim} = size(y,dim);
71 | try
72 | z = sum(y,dim) - 0.5*(y(index1{:}) + y(indexend{:}));
73 | catch
74 | error('qtrapz:dimMismatch', 'DIM must specify one of the dimensions of Y.');
75 | end
76 | end
--------------------------------------------------------------------------------
/shared/warpvars_vbmc_test.m:
--------------------------------------------------------------------------------
1 | function warpvars_vbmc_test(nvars)
2 |
3 | if nargin < 1 || isempty(nvars); nvars = 1; end
4 |
5 |
6 | for iType = [0,3,9,10,12,13]
7 |
8 | fprintf('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n\n');
9 | fprintf('Testing transformation type %d...\n',iType);
10 |
11 | nvars = 1;
12 | switch iType
13 | case {0,10}
14 | LB = -Inf; UB = Inf; PLB = -10; PUB = 10;
15 | case {3,9,12,13}
16 | LB = -9; UB = 4; PLB = -8.99; PUB = 3.99;
17 | end
18 |
19 | trinfo = warpvars_vbmc(nvars,LB,UB);
20 | trinfo.type = iType*ones(1,nvars);
21 | trinfo.alpha = exp(2*rand(1,nvars));
22 | trinfo.beta = exp(2*rand(1,nvars));
23 | trinfo.mu = 0.5*(PUB+PLB);
24 | trinfo.delta = (PUB-PLB);
25 |
26 | x = linspace(PLB,PUB,101);
27 | x2 = warpvars_vbmc(warpvars_vbmc(x,'dir',trinfo),'inv',trinfo);
28 |
29 | fprintf('Maximum error for identity transform f^-1(f(x)): %.g.\n\n',max(abs(x - x2)));
30 |
31 | fprintf('Checking derivative and log derivative:\n\n');
32 | x0 = rand(1,nvars).*(PUB-PLB)+PLB;
33 | derivcheck(@(x) fun(x,trinfo,0),x0,1);
34 | derivcheck(@(x) fun(x,trinfo,1),x0,1);
35 |
36 | if any(iType == [9 10])
37 | fprintf('Checking derivatives wrt warping parameters:\n\n');
38 | theta0 = 3*randn(1,2);
39 | derivcheck(@(theta) funfirst(theta,x0,trinfo),theta0',0);
40 | derivcheck(@(theta) funmixed(theta,x0,trinfo),theta0',0);
41 | end
42 |
43 | end
44 |
45 |
46 | % if nvars == 1
47 | % x = linspace(LB+sqrt(eps),UB-sqrt(eps),101);
48 | % x2 = warpvars_vbmc(warpvars_vbmc(x,'dir',trinfo),'inv',trinfo);
49 | % max(abs(x - x2))
50 | %
51 | % x0 = rand(1,nvars).*(UB-LB)+LB;
52 | % derivcheck(@(x) fun(x,trinfo),x0,1);
53 | % else
54 | % N = 10;
55 | % [Q,R] = qr(randn(nvars));
56 | % if det(Q) < 0; Q(:,1) = -Q(:,1); end
57 | % trinfo.R_mat = Q;
58 | % % trinfo.R_mat = eye(Nvars);
59 | % % trinfo.scale = exp(randn(1,Nvars));
60 | %
61 | % x = randn(N,nvars);
62 | % x2 = warpvars_vbmc(warpvars_vbmc(x,'dir',trinfo),'inv',trinfo);
63 | %
64 | % x - x2
65 | %
66 | %
67 | %
68 | % x0 = 0.1*rand(1,nvars).*(UB-LB)+LB;
69 | % x0t = warpvars_vbmc(x0,'dir',trinfo);
70 | %
71 | % derivcheck(@(x) fun(x,trinfo),x0,1);
72 | % derivcheck(@(x) invfun(x,trinfo),x0t,1);
73 | %
74 | % end
75 |
76 |
77 | % x0 = randn(1,Nvars);
78 | % derivcheck(@(theta) funfirst(theta,x0,trinfo),0.1*randn(1,2),0);
79 |
80 |
81 | end
82 |
83 | function [y,dy] = fun(x,trinfo,logflag)
84 |
85 | if nargin < 3 || isempty(logflag); logflag = 0; end
86 |
87 | y = warpvars_vbmc(x,'dir',trinfo);
88 | % dy = warpvars_vbmc(y,'g',trinfo);
89 |
90 | if logflag
91 | dy = exp(-warpvars_vbmc(y,'logpdf',trinfo));
92 | else
93 | dy = 1./warpvars_vbmc(y,'pdf',trinfo);
94 | end
95 |
96 | end
97 |
98 | function [y,dy] = invfun(x,trinfo)
99 |
100 | y = warpvars_vbmc(x,'inv',trinfo);
101 | dy = warpvars_vbmc(x,'r',trinfo);
102 | % dy = exp(-warpvars_vbmc(y,'logpdf',trinfo));
103 |
104 | end
105 |
106 |
107 | function [y,dy] = funfirst(theta,x,trinfo)
108 |
109 | nvars = numel(trinfo.lb_orig);
110 | theta = exp(theta);
111 |
112 | trinfo.alpha(1) = theta(1);
113 | trinfo.beta(1) = theta(2);
114 |
115 | y = warpvars_vbmc(x,'d',trinfo);
116 | dy = warpvars_vbmc(y,'f',trinfo);
117 |
118 | dy = dy([1;nvars+1]) .* theta(:)';
119 | % dy = exp(-warpvars_vbmc(y,'logpdf',trinfo));
120 |
121 | end
122 |
123 |
124 | function [dy,ddy] = funmixed(theta,x,trinfo)
125 |
126 | nvars = numel(trinfo.lb_orig);
127 | theta = exp(theta);
128 |
129 | trinfo.alpha(1) = theta(1);
130 | trinfo.beta(1) = theta(2);
131 |
132 | y = warpvars_vbmc(x,'d',trinfo);
133 | dy = exp(-warpvars_vbmc(y,'logpdf',trinfo));
134 |
135 | ddy = warpvars_vbmc(y,'m',trinfo);
136 | ddy = ddy([1;nvars+1]) .* theta(:)';
137 |
138 | end
139 |
--------------------------------------------------------------------------------
/test/test_pdfs_vbmc.m:
--------------------------------------------------------------------------------
1 | function test_pdfs_vbmc()
2 | %TEST_PDFS_VBMC Test pdfs introduced in the VBMC package.
3 |
4 | lb = [-1.1,-4.1];
5 | ub = [3.2,-2.8];
6 | a = [-1,-4];
7 | b = [3,-3];
8 | n = 1e6;
9 |
10 | tolerr = 1e-3; % Error tolerance on normalization constant
11 | tolrmse = 0.05; % Error tolerance on histogram vs pdf
12 |
13 | %% Test multivariate uniform box distribution
14 | pdf1 = @(x) munifboxpdf(x,a(1),b(1));
15 | pdf2 = @(x) munifboxpdf(x,a,b);
16 | pdf1log = @(x) exp(munifboxlogpdf(x,a(1),b(1)));
17 | pdf2log = @(x) exp(munifboxlogpdf(x,a,b));
18 | pdfrnd = @(n) munifboxrnd(a,b,n);
19 | name = 'munifbox';
20 |
21 | test_pdf1_normalization(pdf1,lb,ub,tolerr,name);
22 | test_pdf2_normalization(pdf2,lb,ub,tolerr,name);
23 | test_pdf1_normalization(pdf1log,lb,ub,tolerr,name);
24 | test_pdf2_normalization(pdf2log,lb,ub,tolerr,name);
25 | test_rnd(pdfrnd,pdf1,a,b,n,tolrmse,name);
26 |
27 | %% Test multivariate trapezoidal distribution
28 | u = [-0.5,-3.8];
29 | v = [1.5,-3.4];
30 | pdf1 = @(x) mtrapezpdf(x,a(1),u(1),v(1),b(1));
31 | pdf2 = @(x) mtrapezpdf(x,a,u,v,b);
32 | pdf1log = @(x) exp(mtrapezlogpdf(x,a(1),u(1),v(1),b(1)));
33 | pdf2log = @(x) exp(mtrapezlogpdf(x,a,u,v,b));
34 | pdfrnd = @(n) mtrapezrnd(a,u,v,b,n);
35 | name = 'mtrapez';
36 | test_pdf1_normalization(pdf1,lb,ub,tolerr,name);
37 | test_pdf2_normalization(pdf2,lb,ub,tolerr,name);
38 | test_pdf1_normalization(pdf1log,lb,ub,tolerr,name);
39 | test_pdf2_normalization(pdf2log,lb,ub,tolerr,name);
40 | test_rnd(pdfrnd,pdf1,a,b,n,tolrmse,name);
41 |
42 | %% Test multivariate spline trapezoidal distribution
43 | pdf1 = @(x) msplinetrapezpdf(x,a(1),u(1),v(1),b(1));
44 | pdf2 = @(x) msplinetrapezpdf(x,a,u,v,b);
45 | pdf1log = @(x) exp(msplinetrapezlogpdf(x,a(1),u(1),v(1),b(1)));
46 | pdf2log = @(x) exp(msplinetrapezlogpdf(x,a,u,v,b));
47 | pdfrnd = @(n) msplinetrapezrnd(a,u,v,b,n);
48 | name = 'msplinetrapez';
49 | test_pdf1_normalization(pdf1,lb,ub,tolerr,name);
50 | test_pdf2_normalization(pdf2,lb,ub,tolerr,name);
51 | test_pdf1_normalization(pdf1log,lb,ub,tolerr,name);
52 | test_pdf2_normalization(pdf2log,lb,ub,tolerr,name);
53 | test_rnd(pdfrnd,pdf1,a,b,n,tolrmse,name);
54 |
55 | %% Test multivariate smoothbox distribution
56 | sigma = [0.7,0.45];
57 | pdf1 = @(x) msmoothboxpdf(x,a(1),b(1),sigma(1));
58 | pdf2 = @(x) msmoothboxpdf(x,a,b,sigma);
59 | pdf1log = @(x) exp(msmoothboxlogpdf(x,a(1),b(1),sigma(1)));
60 | pdf2log = @(x) exp(msmoothboxlogpdf(x,a,b,sigma));
61 | pdfrnd = @(n) msmoothboxrnd(a,b,sigma,n);
62 | name = 'msmoothbox';
63 | lb = [-5,-7];
64 | ub = [5,0];
65 | test_pdf1_normalization(pdf1,lb,ub,tolerr,name);
66 | test_pdf2_normalization(pdf2,lb,ub,tolerr,name);
67 | test_pdf1_normalization(pdf1log,lb,ub,tolerr,name);
68 | test_pdf2_normalization(pdf2log,lb,ub,tolerr,name);
69 | test_rnd(pdfrnd,pdf1,a,b,n,tolrmse,name);
70 |
71 | %close all;
72 |
73 | end
74 |
75 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
76 | function test_pdf1_normalization(pdf1,lb,ub,tol,name)
77 | %TEST_PDF1_NORMALIZATION Test normalization of univariate pdf.
78 |
79 | % Check 1D integral
80 | y = integral(@(x) pdf1(x), lb(1), ub(1), 'ArrayValued', true);
81 | fprintf('%s: 1D integral: %.6f\n', name, y);
82 | assert(abs(y - 1) < tol, ['Test error: univariate ' name ' does not integrate to 1.']);
83 |
84 | end
85 |
86 | function test_pdf2_normalization(pdf2,lb,ub,tol,name)
87 | %TEST_PDF2_NORMALIZATION Test normalization of bivariate pdf.
88 |
89 | % Check 2D integral
90 | y = integral2(@(x1,x2) reshape(pdf2([x1(:),x2(:)]),size(x1)), ...
91 | lb(1), ub(1), lb(2), ub(2));
92 | fprintf('%s: 2D integral: %.6f\n', name, y);
93 | assert(abs(y - 1) < tol, ['Test error: bivariate ' name ' does not integrate to 1.']);
94 |
95 | end
96 |
97 | function test_rnd(pdfrnd,pdf1,a,b,n,tol,name)
98 | %TEST_RND Test random sample generation (histogram vs pdf).
99 |
100 | r = pdfrnd(n);
101 | h = histogram(r(:,1),100,'BinLimits',[a(1),b(1)],'Normalization','pdf');
102 | x = 0.5*(h.BinEdges(1:end-1) + h.BinEdges(2:end))';
103 | y = pdf1(x)';
104 | rmse = sqrt(sum(((y - h.Values)).^2*h.BinWidth));
105 | fprintf('%s: histogram rmse: %.6f\n', name, rmse);
106 | assert(rmse < tol, ['Test error: generated histogram does not match ' name ' pdf.']);
107 |
108 | end
--------------------------------------------------------------------------------
/utils/covcma.m:
--------------------------------------------------------------------------------
1 | function [Sigma,x0] = covcma(X,y,x0,d,frac)
2 | %WCMA Weighted covariance matrix (inspired by CMA-ES).
3 |
4 | if nargin < 3; x0 = []; end
5 | if nargin < 4 || isempty(d); d = 'descend'; end
6 | if nargin < 5 || isempty(frac); frac = 0.5; end
7 |
8 | [N,D] = size(X);
9 |
10 | % Compute vector weights
11 | mu = frac*N;
12 | weights = zeros(1,1,floor(mu));
13 | weights(1,1,:) = log(mu+1/2)-log(1:floor(mu));
14 | weights = weights./sum(weights);
15 |
16 | % Compute top vectors
17 | [~,index] = sort(y,d);
18 |
19 | if isempty(x0)
20 | x0 = sum(bsxfun(@times,weights(:),X(index(1:floor(mu)),:)),1);
21 | end
22 |
23 | % Compute weighted covariance matrix wrt X0
24 | topx = bsxfun(@minus,X(index(1:floor(mu)),:),x0);
25 | Sigma = sum(bsxfun(@times,weights,topx'*topx),3);
26 |
27 | % % Rescale covariance matrix according to mean vector length
28 | % [E,lambda] = eig(C);
29 | % % [sqrt(diag(lambda))',jit]
30 | % lambda = diag(lambda) + jit.^2;
31 | % lambda = lambda/sum(lambda);
32 | %
33 | % % Square root of covariance matrix
34 | % sigma = diag(sqrt(lambda))*E';
35 | %
36 | % % Rescale by current scale (reduced)
37 | % sigma = MeshSize*SearchFactor*sigma;
38 | %
39 | % % Random draw from multivariate normal
40 | % xs = bsxfun(@plus, x, randn(options.Nsearch,D)*sigma);
41 |
42 | end
--------------------------------------------------------------------------------
/utils/eissample_lite.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/utils/eissample_lite.m
--------------------------------------------------------------------------------
/utils/evalbool.m:
--------------------------------------------------------------------------------
1 | function tf = evalbool(s)
2 | %EVALBOOL Evaluate argument to a bool
3 |
4 | if ~ischar(s) % S may not and cannot be empty
5 | tf = s;
6 |
7 | else % Evaluation of string S
8 | if strncmpi(s, 'yes', 3) || strncmpi(s, 'on', 2) ...
9 | || strncmpi(s, 'true', 4) || strncmp(s, '1 ', 2)
10 | tf = 1;
11 | elseif strncmpi(s, 'no', 2) || strncmpi(s, 'off', 3) ...
12 | || strncmpi(s, 'false', 5) || strncmp(s, '0 ', 2)
13 | tf = 0;
14 | else
15 | try tf = evalin('caller', s); catch
16 | error(['String value "' s '" cannot be evaluated']);
17 | end
18 | try tf ~= 0; catch
19 | error(['String value "' s '" cannot be evaluated reasonably']);
20 | end
21 | end
22 |
23 | end
24 |
25 | end
--------------------------------------------------------------------------------
/utils/fminadam.m:
--------------------------------------------------------------------------------
1 | function [x,f,xtab,ftab,iter] = fminadam(fun,x0,LB,UB,TolFun,MaxIter,master_stepsize)
2 | %FMINADAM Function minimization via a modified ADAM algorithm.
3 |
4 | if nargin < 3; LB = []; end
5 | if nargin < 4; UB = []; end
6 | if nargin < 5 || isempty(TolFun); TolFun = 0.001; end
7 | if nargin < 6 || isempty(MaxIter); MaxIter = 1e4; end
8 | if nargin < 7; master_stepsize = []; end
9 |
10 | % Assign default parameters
11 | master_stepsize_default.max = 0.1;
12 | master_stepsize_default.min = 0.001;
13 | master_stepsize_default.decay = 200;
14 | for f = fields(master_stepsize_default)'
15 | if ~isfield(master_stepsize,f{:}) || isempty(master_stepsize.(f{:}))
16 | master_stepsize.(f{:}) = master_stepsize_default.(f{:});
17 | end
18 | end
19 |
20 | %% Adam with momentum
21 | fudge_factor = sqrt(eps);
22 | beta1 = 0.9;
23 | beta2 = 0.999;
24 | batchsize = 20;
25 | TolX = 0.001;
26 | TolX_max = 0.1;
27 | TolFun_max = TolFun*100;
28 |
29 | MinIter = batchsize*2;
30 |
31 | nvars = numel(x0);
32 | if isempty(LB); LB = -Inf(nvars,1); end
33 | if isempty(UB); UB = Inf(nvars,1); end
34 |
35 | m = 0; v = 0;
36 | %xtab = zeros(nvars,batchsize*2);
37 | xtab = zeros(nvars,MaxIter);
38 |
39 | x = x0(:);
40 | ftab = NaN(1,MaxIter);
41 |
42 | for iter = 1:MaxIter
43 | idx = mod(iter-1,batchsize*2) + 1;
44 | isMinibatchEnd = mod(iter,batchsize) == 0;
45 |
46 | %if mod(iter,100) == 0; fprintf('%d..',iter); end
47 |
48 | [ftab(iter),grad] = fun(x);
49 | grad = grad(:);
50 |
51 | m = beta1 * m + (1-beta1) * grad;
52 | v = beta2 * v + (1-beta2) * grad.^2;
53 | mhat = m / (1-beta1^iter);
54 | vhat = v / (1-beta2^iter);
55 |
56 | stepsize = master_stepsize.min + ...
57 | (master_stepsize.max - master_stepsize.min)*exp(-iter/master_stepsize.decay);
58 |
59 | x = x - stepsize .* mhat ./(sqrt(vhat) + fudge_factor); % update
60 | x = min(max(x,LB(:)),UB(:));
61 |
62 | % xtab(:,idx) = x; % Store X
63 | xtab(:,iter) = x; % Store X
64 |
65 | if isMinibatchEnd && iter >= MinIter
66 | xxp = linspace(-(batchsize-1)/2,(batchsize-1)/2,batchsize);
67 | [p,S] = polyfit(xxp,ftab(iter-batchsize+1:iter),1);
68 | slope = p(1);
69 | Rinv = inv(S.R); A = (Rinv*Rinv')*S.normr^2/S.df;
70 | slope_err = sqrt(A(1,1) + TolFun^2);
71 | slope_err_max = sqrt(A(1,1) + TolFun_max^2);
72 |
73 | % Check random walk distance as termination condition
74 | %dx = sqrt(sum((mean(xtab(:,1:batchsize),2) - mean(xtab(:,batchsize+(1:batchsize)),2)).^2/batchsize,1));
75 | dx = sqrt(sum((mean(xtab(:,iter-batchsize+1:iter),2) - mean(xtab(:,(iter-batchsize+1:iter)-batchsize),2)).^2/batchsize,1));
76 |
77 | % Termination conditions
78 | if ( dx < TolX && abs(slope) 1; xtab = xtab'; end % Transpose
--------------------------------------------------------------------------------
/utils/psycho_gen.m:
--------------------------------------------------------------------------------
1 | function R=psycho_gen(theta,S)
2 | %PSYCHO_GEN Generate responses for psychometric function model.
3 | % R=PSYCHO_GEN(THETA,S) generates responses in a simple orientation
4 | % discrimination task, where S is a vector of stimulus orientations (in
5 | % deg) for each trial, and THETA is a model parameter vector, with
6 | % THETA(1) as eta=log(sigma), the log of the sensory noise; THETA(2) the
7 | % bias term; THETA(3) is the lapse rate. The returned vector of responses
8 | % per trial reports 1 for "rightwards" and -1 for "leftwards".
9 | %
10 | % See Section 5.2 of the manuscript for more details on the model.
11 | %
12 | % Note that this model is very simple and used only for didactic purposes;
13 | % one should use the analytical log-likelihood whenever available.
14 |
15 | % Luigi Acerbi, 2020
16 |
17 | sigma = exp(theta(1));
18 | bias = theta(2);
19 | lapse = theta(3);
20 |
21 | %% Noisy measurement
22 |
23 | % Ass Gaussian noise to true orientations S to simulate noisy measurements
24 | X = S + sigma*randn(size(S));
25 |
26 | %% Decision rule
27 |
28 | % The response is 1 for "rightwards" if the internal measurement is larger
29 | % than the BIAS term; -1 for "leftwards" otherwise
30 | R = zeros(size(S));
31 | R(X >= bias) = 1;
32 | R(X < bias) = -1;
33 |
34 | %% Lapses
35 |
36 | % Choose trials in which subject lapses; response there is given at chance
37 | lapse_idx = rand(size(S)) < lapse;
38 |
39 | % Random responses (equal probability of 1 or -1)
40 | lapse_val = randi(2,[sum(lapse_idx),1])*2-3;
41 | R(lapse_idx) = lapse_val;
42 |
43 | end
--------------------------------------------------------------------------------
/utils/quantile1.m:
--------------------------------------------------------------------------------
1 | function y = quantile1(x,p)
2 | %QUANTILE1 Quantile of a vector.
3 | % Y = PRCTILE(X,P) returns percentiles of the values in X. P is a scalar
4 | % or a vector of percent values. When X is a vector, Y is the same size
5 | % as P, and Y(i) contains the P(i)-th percentile. When X is a matrix,
6 | % the i-th row of Y contains the P(i)-th percentiles of each column of X.
7 | % For N-D arrays, PRCTILE operates along the first non-singleton
8 | % dimension.
9 | %
10 | % Percentiles are specified using percentages, from 0 to 100. For an N
11 | % element vector X, PRCTILE computes percentiles as follows:
12 | % 1) The sorted values in X are taken as the 100*(0.5/N), 100*(1.5/N),
13 | % ..., 100*((N-0.5)/N) percentiles.
14 | % 2) Linear interpolation is used to compute percentiles for percent
15 | % values between 100*(0.5/N) and 100*((N-0.5)/N)
16 | % 3) The minimum or maximum values in X are assigned to percentiles
17 | % for percent values outside that range.
18 | %
19 | % PRCTILE treats NaNs as missing values, and removes them.
20 | %
21 | % Examples:
22 | % y = prctile(x,50); % the median of x
23 | % y = prctile(x,[2.5 25 50 75 97.5]); % a useful summary of x
24 | %
25 | % See also IQR, MEDIAN, NANMEDIAN, QUANTILE.
26 |
27 | % Copyright 1993-2016 The MathWorks, Inc.
28 |
29 | % If X is empty, return all NaNs.
30 | if isempty(x)
31 | y = nan(size(p),'like',x);
32 | else
33 | % Drop X's leading singleton dims, and combine its trailing dims. This
34 | % leaves a matrix, and we can work along columns.
35 | x = x(:);
36 |
37 | x = sort(x,1);
38 | n = sum(~isnan(x), 1); % Number of non-NaN values
39 |
40 | if isequal(p,0.5) % make the median fast
41 | if rem(n,2) % n is odd
42 | y = x((n+1)/2,:);
43 | else % n is even
44 | y = (x(n/2,:) + x(n/2+1,:))/2;
45 | end
46 | else
47 | r = p*n;
48 | k = floor(r+0.5); % K gives the index for the row just before r
49 | kp1 = k + 1; % K+1 gives the index for the row just after r
50 | r = r - k; % R is the ratio between the K and K+1 rows
51 |
52 | % Find indices that are out of the range 1 to n and cap them
53 | k(k<1 | isnan(k)) = 1;
54 | kp1 = bsxfun( @min, kp1, n );
55 |
56 | % Use simple linear interpolation for the valid percentages
57 | y = (0.5+r).*x(kp1,:)+(0.5-r).*x(k,:);
58 |
59 | % Make sure that values we hit exactly are copied rather than interpolated
60 | exact = (r==-0.5);
61 | if any(exact)
62 | y(exact,:) = x(k(exact),:);
63 | end
64 |
65 | % Make sure that identical values are copied rather than interpolated
66 | same = (x(k,:)==x(kp1,:));
67 | if any(same(:))
68 | x = x(k,:); % expand x
69 | y(same) = x(same);
70 | end
71 |
72 | end
73 |
74 | end
75 |
76 | end
--------------------------------------------------------------------------------
/utils/softbndloss.m:
--------------------------------------------------------------------------------
1 | function [y,dy] = softbndloss(x,slb,sub,TolCon)
2 | %SOFTBNDLOSS Loss function for soft bounds for function minimization.
3 |
4 | % Penalization relative scale
5 | if nargin < 4 || isempty(TolCon); TolCon = 1e-3; end
6 |
7 | compute_grad = nargout > 1; % Compute gradient only if requested
8 |
9 | ell = (sub - slb).*TolCon;
10 |
11 | y = 0;
12 | dy = zeros(size(x));
13 |
14 | idx = x < slb;
15 | if any(idx)
16 | y = y + 0.5*sum(((slb(idx) - x(idx))./ell(idx)).^2);
17 | if compute_grad
18 | dy(idx) = (x(idx) - slb(idx))./ell(idx).^2;
19 | end
20 | end
21 |
22 | idx = x > sub;
23 | if any(idx)
24 | y = y + 0.5*sum(((x(idx) - sub(idx))./ell(idx)).^2);
25 | if compute_grad
26 | dy(idx) = (x(idx) - sub(idx))./ell(idx).^2;
27 | end
28 | end
29 |
30 | end
--------------------------------------------------------------------------------
/utils/sq_dist.m:
--------------------------------------------------------------------------------
1 | % sq_dist - a function to compute a matrix of all pairwise squared distances
2 | % between two sets of vectors, stored in the columns of the two matrices, a
3 | % (of size D by n) and b (of size D by m). If only a single argument is given
4 | % or the second matrix is empty, the missing matrix is taken to be identical
5 | % to the first.
6 | %
7 | % Usage: C = sq_dist(a, b)
8 | % or: C = sq_dist(a) or equiv.: C = sq_dist(a, [])
9 | %
10 | % Where a is of size Dxn, b is of size Dxm (or empty), C is of size nxm.
11 | %
12 | % Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2010-12-13.
13 |
14 | function C = sq_dist(a, b)
15 |
16 | if nargin<1 || nargin>3 || nargout>1, error('Wrong number of arguments.'); end
17 | bsx = exist('bsxfun','builtin'); % since Matlab R2007a 7.4.0 and Octave 3.0
18 | if ~bsx, bsx = exist('bsxfun'); end % bsxfun is not yet "builtin" in Octave
19 | [D, n] = size(a);
20 |
21 | % Computation of a^2 - 2*a*b + b^2 is less stable than (a-b)^2 because numerical
22 | % precision can be lost when both a and b have very large absolute value and the
23 | % same sign. For that reason, we subtract the mean from the data beforehand to
24 | % stabilise the computations. This is OK because the squared error is
25 | % independent of the mean.
26 | if nargin==1 % subtract mean
27 | mu = mean(a,2);
28 | if bsx
29 | a = bsxfun(@minus,a,mu);
30 | else
31 | a = a - repmat(mu,1,size(a,2));
32 | end
33 | b = a; m = n;
34 | else
35 | [d, m] = size(b);
36 | if d ~= D, error('Error: column lengths must agree.'); end
37 | mu = (m/(n+m))*mean(b,2) + (n/(n+m))*mean(a,2);
38 | if bsx
39 | a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu);
40 | else
41 | a = a - repmat(mu,1,n); b = b - repmat(mu,1,m);
42 | end
43 | end
44 |
45 | if bsx % compute squared distances
46 | C = bsxfun(@plus,sum(a.*a,1)',bsxfun(@minus,sum(b.*b,1),2*a'*b));
47 | else
48 | C = repmat(sum(a.*a,1)',1,m) + repmat(sum(b.*b,1),n,1) - 2*a'*b;
49 | end
50 | C = max(C,0); % numerical noise can cause C to negative i.e. C > -1e-14
51 |
--------------------------------------------------------------------------------
/utils/unscent_warp.m:
--------------------------------------------------------------------------------
1 | function [xw,sigmaw,xu] = unscent_warp(fun,x,sigma)
2 | %UNSCENT_WARP Unscented transform (coordinate-wise only).
3 |
4 | [N1,D] = size(x);
5 | [N2,D2] = size(sigma);
6 |
7 | N = max(N1,N2);
8 |
9 | if N1 ~= N && N1 ~= 1; error('Mismatch between rows of X and SIGMA.'); end
10 | if N2 ~= N && N2 ~= 1; error('Mismatch between rows of X and SIGMA.'); end
11 | if D ~= D2; error('Mismatch between columns of X and SIGMA.'); end
12 |
13 | if N1 == 1 && N > 1; x = repmat(x,[N,1]); end
14 | if N2 == 1 && N > 1; sigma = repmat(sigma,[N,1]); end
15 |
16 | U = 2*D+1; % # unscented points
17 |
18 | x3(1,:,:) = x;
19 | xx = repmat(x3,[U,1,1]);
20 |
21 | for d = 1:D
22 | sigma3(1,:,1) = sqrt(D)*sigma(:,d);
23 | xx(2*d,:,d) = bsxfun(@plus,xx(2*d,:,d),sigma3);
24 | xx(2*d+1,:,d) = bsxfun(@minus,xx(2*d+1,:,d),sigma3);
25 | end
26 |
27 | xu = reshape(fun(reshape(xx,[N*U,D])),[U,N,D]);
28 |
29 | if N > 1
30 | xw(:,:) = mean(xu,1);
31 | sigmaw(:,:) = std(xu,[],1);
32 | else
33 | xw(1,:) = mean(xu,1);
34 | sigmaw(1,:) = std(xu,[],1);
35 | end
36 |
37 | end
--------------------------------------------------------------------------------
/vbmc_isavp.m:
--------------------------------------------------------------------------------
1 | function tf = vbmc_isavp(vp)
2 | %VBMC_ISAVP True for VBMC variational posterior structures.
3 | % VBMC_ISAVP returns true if VP is a variational posterior structure
4 | % returned by VBMC and false otherwise.
5 | %
6 | % See also VBMC.
7 |
8 | if isstruct(vp)
9 |
10 | tf = true;
11 |
12 | % Required fields for variational posterior
13 | vpfields = {'D','K','w','mu','sigma','lambda','trinfo', ...
14 | 'optimize_mu','optimize_sigma','optimize_lambda','optimize_weights','bounds'};
15 |
16 | % Check that VP has all the required fields, otherwise quit
17 | ff = fields(vp);
18 | for iField = 1:numel(vpfields)
19 | if ~any(strcmp(vpfields{iField},ff))
20 | tf = false;
21 | break;
22 | end
23 | end
24 |
25 | else
26 | tf = false;
27 | end
--------------------------------------------------------------------------------
/vbmc_kldiv.m:
--------------------------------------------------------------------------------
1 | function [kls,xx1,xx2] = vbmc_kldiv(vp1,vp2,Ns,gaussflag)
2 | %VBMC_KLDIV Kullback-Leibler divergence between two variational posteriors.
3 | % KLS = VBMC_KLDIV(VP1,VP2) returns an estimate of the (asymmetric)
4 | % Kullback-Leibler (KL) divergence between two variational posterior
5 | % distributions VP1 and VP2. KLS is a 2-element vector whose first element
6 | % is KL(VP1||VP2) and the second element is KL(VP2||VP1). The symmetrized
7 | % KL divergence can be computed as mean(KLS).
8 | %
9 | % KLS = VBMC_KLDIV(VP1,VP2,NS) uses NS random draws to estimate each
10 | % KL divergence (default NS=1e5).
11 | %
12 | % KLS = VBMC_KLDIV(VP1,VP2,NS,GAUSSFLAG) computes the "Gaussianized"
13 | % KL-divergence if GAUSSFLAG=1, that is the KL divergence between two
14 | % multivariate normal distibutions with the same moments as the variational
15 | % posteriors given as inputs. Otherwise, the standard KL-divergence is
16 | % returned for GAUSSFLAG=0 (default).
17 | %
18 | % [KLS,XX1,XX2] = VBMC_KLDIV(...) returns NS samples from the variational
19 | % posteriors VP1 and VP2 as, respectively, NS-by-D matrices XX1 and XX2,
20 | % where D is the dimensionality of the problem.
21 | %
22 | % If GAUSSFLAG is 1, VP1 and/or VP2 can be N-by-D matrices of samples
23 | % from variational posteriors (they do not need have the same number
24 | % of samples).
25 | %
26 | % See also VBMC, VBMC_MTV, VBMC_PDF, VBMC_RND, VBMC_DIAGNOSTICS.
27 |
28 | if nargin < 3 || isempty(Ns); Ns = 1e5; end
29 | if nargin < 4 || isempty(gaussflag); gaussflag = false; end
30 |
31 | % This was removed because the comparison *has* to be in original space,
32 | % given that the transform might change for distinct variational posteriors
33 | % if nargin < 5 || isempty(origflag); origflag = true; end
34 | origflag = true;
35 |
36 | kls = NaN(1,2);
37 |
38 | if ~gaussflag && (~vbmc_isavp(vp1) || ~vbmc_isavp(vp2))
39 | error('vbmc_kldiv:WrongInputs', ...
40 | 'Unless the KL divergence is Gaussianized, VP1 and VP2 need to be variational posteriors.');
41 | end
42 |
43 | %try
44 | if gaussflag
45 | if Ns == 0 % Analytical calculation
46 | if origflag
47 | error('vbmc_kldiv:NoAnalyticalMoments', ...
48 | 'Analytical moments are available only for the transformed space.')
49 | end
50 | [q1mu,q1sigma] = vbmc_moments(vp1,0);
51 | [q2mu,q2sigma] = vbmc_moments(vp2,0);
52 | xx1 = []; xx2 = [];
53 | else % Numerical moments
54 | if vbmc_isavp(vp1)
55 | [q1mu,q1sigma] = vbmc_moments(vp1,origflag,Ns);
56 | else
57 | q1mu = mean(vp1,1);
58 | q1sigma = cov(vp1);
59 | end
60 | if vbmc_isavp(vp2)
61 | [q2mu,q2sigma] = vbmc_moments(vp2,origflag,Ns);
62 | else
63 | q2mu = mean(vp2,1);
64 | q2sigma = cov(vp2);
65 | end
66 | end
67 | [kls(1),kls(2)] = mvnkl(q1mu,q1sigma,q2mu,q2sigma);
68 |
69 | else
70 | MINP = realmin;
71 |
72 | xx1 = vbmc_rnd(vp1,Ns,origflag,1);
73 | q1 = vbmc_pdf(vp1,xx1,origflag);
74 | q2 = vbmc_pdf(vp2,xx1,origflag);
75 | q1(q1 == 0 | ~isfinite(q1)) = 1; % Ignore these points
76 | q2(q2 == 0 | ~isfinite(q2)) = MINP;
77 | kls(1) = -mean(log(q2) - log(q1));
78 |
79 | xx2 = vbmc_rnd(vp2,Ns,origflag,1);
80 | q1 = vbmc_pdf(vp1,xx2,origflag);
81 | q2 = vbmc_pdf(vp2,xx2,origflag);
82 | q1(q1 == 0 | ~isfinite(q1)) = MINP;
83 | q2(q2 == 0 | ~isfinite(q2)) = 1; % Ignore these points
84 | kls(2) = -mean(log(q1) - log(q2));
85 |
86 | end
87 |
88 | kls = max(kls,0); % Correct for numerical errors
89 |
90 | %catch
91 |
92 | % Could not compute KL divs
93 |
94 | %end
--------------------------------------------------------------------------------
/vbmc_mode.m:
--------------------------------------------------------------------------------
1 | function [x,vp] = vbmc_mode(vp,nmax,origflag)
2 | %VBMC_MODE Find mode of VBMC posterior approximation.
3 | % X = VBMC_PDF(VP) returns the mode of the variational posterior VP.
4 | %
5 | % X = VBMC_PDF(VP,ORIGFLAG) returns the mode of the variational posterior
6 | % in the original parameter space if ORIGFLAG=1 (default), or in the
7 | % transformed VBMC space if ORIGFLAG=0. The two modes are generally not
8 | % equivalent, under a nonlinear transformation of variables.
9 | %
10 | % [X,VP] = VBMC_PDF(...) returns the variational posterior with the mode
11 | % stored in the VP struct.
12 | %
13 | % See also VBMC, VBMC_MOMENTS, VBMC_PDF.
14 |
15 | if nargin < 2 || isempty(nmax); nmax = 20; end
16 | if nargin < 3 || isempty(origflag); origflag = true; end
17 |
18 | if origflag && isfield(vp,'mode') && ~isempty(vp.mode)
19 | x = vp.mode;
20 | else
21 | x0_mat = vp.mu';
22 |
23 | if nmax < vp.K
24 | y0_vec = nlnpdf(x0_mat); % First, evaluate pdf at all modes
25 | % Start from first NMAX solutions
26 | [~,ord] = sort(y0_vec,'ascend');
27 | x0_mat = x0_mat(ord(1:nmax),:);
28 | end
29 |
30 | xmin = zeros(size(x0_mat,1),vp.D);
31 | ff = Inf(size(x0_mat,1),1);
32 |
33 | for k = 1:size(x0_mat,1)
34 | x0 = x0_mat(k,:);
35 | if origflag; x0 = warpvars_vbmc(x0,'inv',vp.trinfo); end
36 |
37 | if origflag
38 | opts = optimoptions('fmincon','GradObj','off','Display','off');
39 | LB = vp.trinfo.lb_orig + sqrt(eps);
40 | UB = vp.trinfo.ub_orig - sqrt(eps);
41 | x0 = min(max(x0,LB),UB);
42 | [xmin(k,:),ff(k)] = fmincon(@nlnpdf,x0,[],[],[],[],LB,UB,[],opts);
43 | else
44 | opts = optimoptions('fminunc','GradObj','off','Display','off');
45 | [xmin(k,:),ff(k)] = fminunc(@nlnpdf,x0,opts);
46 | end
47 | end
48 |
49 | [fval,idx] = min(ff);
50 |
51 | % Get mode and store it
52 | x = xmin(idx,:);
53 | if nargout > 1 && origflag
54 | vp.mode = x;
55 | end
56 | end
57 |
58 | function [y,dy] = nlnpdf(x)
59 | %NLNPDF Negative log posterior pdf and its gradient.
60 | if nargout > 1
61 | [y,dy] = vbmc_pdf(vp,x,origflag,1);
62 | y = -y; dy = -dy;
63 | else
64 | y = -vbmc_pdf(vp,x,origflag,1);
65 | end
66 | end
67 | end
--------------------------------------------------------------------------------
/vbmc_moments.m:
--------------------------------------------------------------------------------
1 | function [mubar,Sigma] = vbmc_moments(vp,origflag,Ns)
2 | %VBMC_MOMENTS Compute moments of variational posterior.
3 | % [MU,SIGMA] = VBMC_MOMENTS(VP) computes the mean MU and covariance
4 | % matrix SIGMA of the variational posterior VP via Monte Carlo sampling.
5 | %
6 | % [...] = VBMC_MOMENTS(VP,ORIGFLAG) computes the moments of the
7 | % variational posterior VP in the original problem space if ORIGFLAG=1
8 | % (default), or in the transformed VBMC space if ORIGFLAG=0. In the
9 | % transformed space, the moments are computed analytically.
10 | %
11 | % [...] = VBMC_MOMENTS(VP,1,NS) uses NS samples to evaluate the moments
12 | % of the variational posterior in the original space (default NS=1e6).
13 | %
14 | % See also VBMC, VBMC_MODE, VBMC_PDF, VBMC_RND.
15 |
16 | if nargin < 2 || isempty(origflag); origflag = true; end
17 | if nargin < 3 || isempty(Ns); Ns = 1e6; end
18 |
19 | covflag = nargout > 1; % Compute covariance?
20 |
21 | K = vp.K;
22 |
23 | if origflag
24 | X = vbmc_rnd(vp,Ns,1,1);
25 | mubar = mean(X,1);
26 | if covflag
27 | Sigma = cov(X);
28 | end
29 | else
30 | w(1,:) = vp.w; % Mixture weights
31 | mu(:,:) = vp.mu;
32 |
33 | mubar = sum(bsxfun(@times,w,mu),2);
34 |
35 | if covflag
36 | sigma(1,:) = vp.sigma;
37 | lambda(:,1) = vp.lambda(:);
38 |
39 | Sigma = sum(w.*sigma.^2)*diag(lambda.^2);
40 | for k = 1:K; Sigma = Sigma + w(k)*(mu(:,k)-mubar)*(mu(:,k)-mubar)'; end
41 | end
42 |
43 | mubar = mubar(:)'; % Return row vector
44 | end
--------------------------------------------------------------------------------
/vbmc_mtv.m:
--------------------------------------------------------------------------------
1 | function [mtv,xx1,xx2] = vbmc_mtv(vp1,vp2,Ns)
2 | %VBMC_MTV Marginal Total Variation distances between two variational posteriors.
3 | % MTV = VBMC_MTV(VP1,VP2) returns an estimate of the marginal total
4 | % variation distances between two variational posterior distributions VP1
5 | % and VP2. MTV is a D-element vector whose elements are the total variation
6 | % distance between the marginal distributions of VP1 and VP2, for each
7 | % coordinate dimension.
8 | %
9 | % The total variation distance between two densities p1 and p2 is:
10 | % TV(p1, p2) = 1/2 \int | p1(x) - p2(x) | dx
11 | %
12 | % MTV = VBMC_MTV(VP1,VP2,NS) uses NS random draws to estimate the MTV
13 | % (default NS=1e5).
14 | %
15 | % [MTV,XX1,XX2] = VBMC_MTV(...) returns NS samples from the variational
16 | % posteriors VP1 and VP2 as, respectively, NS-by-D matrices XX1 and XX2,
17 | % where D is the dimensionality of the problem.
18 | %
19 | % VP1 and/or VP2 can be N-by-D matrices of samples from variational
20 | % posteriors (they do not need have the same number of samples).
21 | %
22 | % See also VBMC, VBMC_KLDIV, VBMC_PDF, VBMC_RND, VBMC_DIAGNOSTICS.
23 |
24 | if nargin < 3 || isempty(Ns); Ns = 1e5; end
25 |
26 | % This was removed because the comparison *has* to be in original space,
27 | % given that the transform might change for distinct variational posteriors
28 | % if nargin < 4 || isempty(origflag); origflag = true; end
29 | origflag = true;
30 |
31 | if vbmc_isavp(vp1)
32 | xx1 = vbmc_rnd(vp1,Ns,origflag,1);
33 | lb1 = vp1.trinfo.lb_orig;
34 | ub1 = vp1.trinfo.ub_orig;
35 | else
36 | xx1 = vp1;
37 | lb1 = -Inf(1,size(vp1,2));
38 | ub1 = Inf(1,size(vp1,2));
39 | end
40 | if vbmc_isavp(vp2)
41 | xx2 = vbmc_rnd(vp2,Ns,origflag,1);
42 | lb2 = vp2.trinfo.lb_orig;
43 | ub2 = vp2.trinfo.ub_orig;
44 | else
45 | xx2 = vp2;
46 | lb2 = -Inf(1,size(vp2,2));
47 | ub2 = Inf(1,size(vp2,2));
48 | end
49 |
50 | D = size(xx1,2);
51 | nkde = 2^13;
52 | mtv = zeros(1,D);
53 |
54 | % Set bounds for kernel density estimate
55 | lb1_xx = min(xx1); ub1_xx = max(xx1);
56 | range1 = ub1_xx - lb1_xx;
57 | lb1 = max(lb1_xx-range1/10,lb1);
58 | ub1 = min(ub1_xx+range1/10,ub1);
59 |
60 | lb2_xx = min(xx2); ub2_xx = max(xx2);
61 | range2 = ub2_xx - lb2_xx;
62 | lb2 = max(lb2_xx-range2/10,lb2);
63 | ub2 = min(ub2_xx+range2/10,ub2);
64 |
65 | % Compute marginal total variation
66 | for i = 1:D
67 | [~,yy1,x1mesh] = kde1d(xx1(:,i),nkde,lb1(i),ub1(i));
68 | yy1 = yy1/(qtrapz(yy1)*(x1mesh(2)-x1mesh(1))); % Ensure normalization
69 |
70 | [~,yy2,x2mesh] = kde1d(xx2(:,i),nkde,lb2(i),ub2(i));
71 | yy2 = yy2/(qtrapz(yy2)*(x2mesh(2)-x2mesh(1))); % Ensure normalization
72 |
73 | f = @(x) abs(interp1(x1mesh,yy1,x,'spline',0) - interp1(x2mesh,yy2,x,'spline',0));
74 | bb = sort([x1mesh([1,end]),x2mesh([1,end])]);
75 | for j = 1:3
76 | xx_range = linspace(bb(j),bb(j+1),1e5);
77 | mtv(i) = mtv(i) + 0.5*qtrapz(f(xx_range))*(xx_range(2)-xx_range(1));
78 | end
79 | end
80 |
81 |
--------------------------------------------------------------------------------
/vbmc_pdf.m:
--------------------------------------------------------------------------------
1 | function [y,dy] = vbmc_pdf(vp,X,origflag,logflag,transflag,df)
2 | %VBMC_PDF Probability density function of VBMC posterior approximation.
3 | % Y = VBMC_PDF(VP,X) returns the probability density of the variational
4 | % posterior VP evaluated at each row of X. Rows of the N-by-D matrix X
5 | % correspond to observations or points, and columns correspond to variables
6 | % or coordinates. Y is an N-by-1 vector.
7 | %
8 | % Y = VBMC_PDF(VP,X,ORIGFLAG) returns the value of the posterior density
9 | % evaluated in the original parameter space for ORIGFLAG=1 (default), or
10 | % in the transformed VBMC space if ORIGFLAG=0.
11 | %
12 | % Y = VBMC_PDF(VP,X,ORIGFLAG,LOGFLAG) returns the value of the log
13 | % posterior density if LOGFLAG=1, otherwise the posterior density for
14 | % LOGFLAG=0 (default).
15 | %
16 | % Y = VBMC_PDF(VP,X,ORIGFLAG,LOGFLAG,TRANSFLAG) for TRANSFLAG=1 assumes
17 | % that X is already specified in tranformed VBMC space. Otherwise, X is
18 | % specified in the original parameter space (default TRANSFLAG=0).
19 | %
20 | % Y = VBMC_PDF(VP,X,ORIGFLAG,LOGFLAG,TRANSFLAG,DF) returns the probability
21 | % density of an heavy-tailed version of the variational posterior,
22 | % in which the multivariate normal components have been replaced by
23 | % multivariate t-distributions with DF degrees of freedom. The default is
24 | % DF=Inf, limit in which the t-distribution becomes a multivariate normal.
25 | %
26 | % See also VBMC, VBMC_RND.
27 |
28 | if nargin < 3 || isempty(origflag); origflag = true; end
29 | if nargin < 4 || isempty(logflag); logflag = false; end
30 | if nargin < 5 || isempty(transflag); transflag = false; end
31 | if nargin < 6 || isempty(df); df = Inf; end
32 |
33 | gradflag = nargout > 1; % Compute gradient
34 |
35 | % Convert points to transformed space
36 | if origflag && ~isempty(vp.trinfo) && ~transflag
37 | % Xold = X;
38 | X = warpvars_vbmc(X,'dir',vp.trinfo);
39 | end
40 |
41 | [N,D] = size(X);
42 | K = vp.K; % Number of components
43 | w = vp.w; % Mixture weights
44 | lambda = vp.lambda(:)'; % LAMBDA is a row vector
45 |
46 | mu_t(:,:) = vp.mu'; % MU transposed
47 | sigma(1,:) = vp.sigma;
48 |
49 | y = zeros(N,1); % Allocate probability vector
50 | if gradflag; dy = zeros(N,D); end
51 |
52 | if ~isfinite(df) || df == 0
53 | % Compute pdf of variational posterior
54 |
55 | % Common normalization factor
56 | nf = 1/(2*pi)^(D/2)/prod(lambda);
57 |
58 | for k = 1:K
59 | d2 = sum(bsxfun(@rdivide,bsxfun(@minus,X,mu_t(k,:)),sigma(k)*lambda).^2,2);
60 | nn = nf*w(k)/sigma(k)^D*exp(-0.5*d2);
61 | y = y + nn;
62 | if gradflag
63 | dy = dy - bsxfun(@times,nn, ...
64 | bsxfun(@rdivide,bsxfun(@minus,X,mu_t(k,:)),lambda.^2.*sigma(k)^2));
65 | end
66 | end
67 | else
68 | % Compute pdf of heavy-tailed variant of variational posterior
69 |
70 | if df > 0
71 | % (This uses a multivariate t-distribution which is not the same thing
72 | % as the product of D univariate t-distributions)
73 |
74 | % Common normalization factor
75 | nf = exp(gammaln((df+D)/2) - gammaln(df/2))/(df*pi)^(D/2)/prod(lambda);
76 |
77 | for k = 1:K
78 | d2 = sum(bsxfun(@rdivide,bsxfun(@minus, X, mu_t(k,:)),sigma(k)*lambda).^2,2);
79 | nn = nf*w(k)/sigma(k)^D*(1+d2/df).^(-(df+D)/2);
80 | y = y + nn;
81 | if gradflag
82 | error('Gradient of heavy-tailed pdf not supported yet.');
83 | dy = dy - bsxfun(@times,nn, ...
84 | bsxfun(@rdivide,bsxfun(@minus,X,mu_t(k,:)),lambda.^2.*sigma(k)^2));
85 | end
86 | end
87 | else
88 | % (This uses a product of D univariate t-distributions)
89 |
90 | df = abs(df);
91 |
92 | % Common normalization factor
93 | nf = (exp(gammaln((df+1)/2) - gammaln(df/2))/sqrt(df*pi))^D/prod(lambda);
94 |
95 | for k = 1:K
96 | d2 = bsxfun(@rdivide,bsxfun(@minus, X, mu_t(k,:)),sigma(k)*lambda).^2;
97 | nn = nf*w(k)/sigma(k)^D*prod((1+d2/df).^(-(df+1)/2),2);
98 | y = y + nn;
99 | if gradflag
100 | error('Gradient of heavy-tailed pdf not supported yet.');
101 | end
102 | end
103 | end
104 |
105 | end
106 |
107 | if logflag
108 | if gradflag; dy = bsxfun(@rdivide,dy,y); end
109 | y = log(y);
110 | end
111 |
112 | % Apply Jacobian correction
113 | if origflag && ~isempty(vp.trinfo)
114 | if logflag
115 | y = y - warpvars_vbmc(X,'logprob',vp.trinfo);
116 | if gradflag
117 | error('vbmc_pdf:NoOriginalGrad',...
118 | 'Gradient computation in original space not supported yet.');
119 | dy = dy - warpvars_vbmc(X,'g',vp.trinfo);
120 | end
121 | else
122 | y = y ./ warpvars_vbmc(X,'prob',vp.trinfo);
123 | end
124 | end
125 |
126 | end
--------------------------------------------------------------------------------
/vbmc_plot.m:
--------------------------------------------------------------------------------
1 | function vbmc_plot(vp_array,stats)
2 |
3 | if nargin < 2; stats = []; end
4 |
5 | Nsamples = 1e5;
6 |
7 | if ~iscell(vp_array)
8 | temp{1} = vp_array;
9 | vp_array = temp;
10 | end
11 |
12 | if numel(vp_array) == 1 && vbmc_isavp(vp_array{1})
13 | X = vbmc_rnd(vp_array{1},Nsamples);
14 | for d = 1:size(X,2); names{d} = ['x_{' num2str(d) '}']; end
15 | cornerplot(X,names);
16 | else
17 | Nbins = 40;
18 | Nvps = numel(vp_array);
19 | D = vp_array{1}.D;
20 | mm = zeros(Nvps,D);
21 | cmap = colormap;
22 | cmap = cmap(mod((1:27:(1+27*64))-1,64)+1,:);
23 |
24 | plotmat = [1 1; 1 2; 1 3; 2 2; 2 3; 2 3; 2 4; 2 4; 3 3; 3 4; 3 4; 3 4; 3 5; 3 5; 3 5; 4 4; 4 5; 4 5; 4 5];
25 | nrows = plotmat(D,1);
26 | ncols = plotmat(D,2);
27 |
28 | for i = 1:Nvps
29 | if ~isempty(stats) && stats.idx_best == i; best_flag = true; else; best_flag = false; end
30 | ltext{i} = ['vp #' num2str(i)];
31 | if best_flag; ltext{i} = [ltext{i} ' (best)']; end
32 |
33 | X = vbmc_rnd(vp_array{i},Nsamples);
34 | mm(i,:) = median(X);
35 |
36 | for d = 1:D
37 | subplot(nrows,ncols,d);
38 | if best_flag; lw = 3; else; lw = 1; end
39 | hst(i)=histogram(X(:,d),Nbins,'Normalization','probability','Displaystyle','stairs','LineWidth',lw,'EdgeColor',cmap(i,:));
40 | hold on;
41 | end
42 | end
43 |
44 | for i = 1:Nvps
45 | if ~isempty(stats) && stats.idx_best == i; best_flag = true; else; best_flag = false; end
46 | for d = 1:D
47 | subplot(nrows,ncols,d);
48 | if best_flag; lw = 3; else; lw = 1; end
49 | hln(i)=plot(mm(i,d)*[1 1],ylim,'-','LineWidth',lw,'Color',cmap(i,:));
50 | hold on;
51 | end
52 | end
53 |
54 |
55 | for d = 1:D
56 | subplot(nrows,ncols,d);
57 |
58 | xlabel(['x_{' num2str(d) '}']);
59 | set(gca,'TickDir','out');
60 | box off;
61 |
62 | if d == D
63 | hleg = legend(hln,ltext{:});
64 | set(hleg,'box','off','location','best');
65 | end
66 |
67 | end
68 | set(gcf,'Color','w');
69 |
70 | end
71 |
72 |
73 |
74 |
75 | end
--------------------------------------------------------------------------------
/vbmc_power.m:
--------------------------------------------------------------------------------
1 | function [vpp,lnZ] = vbmc_power(vp,n,cutoff)
2 | %VBMC_POWER Compute power posterior of variational approximation.
3 |
4 | if nargin < 3 || isempty(cutoff); cutoff = 1e-5; end
5 | if cutoff < 0; cutoff = 0; end
6 |
7 | vpp = vp;
8 | K = vp.K;
9 |
10 | if K > 1
11 | % For ease of reference in the code
12 | D = vp.D;
13 | w = vp.w;
14 | mu = vp.mu;
15 | sigma = vp.sigma;
16 | lambda = vp.lambda;
17 |
18 | % Power posterior parameters
19 | Kp = K^n;
20 | wp = zeros(1,Kp);
21 | mup = zeros(D,Kp);
22 | sigmap = zeros(1,Kp);
23 | end
24 |
25 | switch n
26 | case 1; lnZ = 0; return;
27 | case 2
28 | nf = 1/sqrt(2*pi)^D;
29 |
30 | % First, compute product posterior weights
31 | idx = 0;
32 | for i = 1:K
33 | for j = 1:K
34 | idx = idx + 1;
35 | sigmatilde2 = (sigma(i)^2+sigma(j).^2).*lambda.^2;
36 | wp(idx) = w(i)*w(j).*nf/prod(sqrt(sigmatilde2))*exp(-0.5*sum((mu(:,i)-mu(:,j)).^2./sigmatilde2,1));
37 | end
38 | end
39 |
40 | Z = sum(wp); % Normalization constant
41 | lnZ = log(Z);
42 | wp = wp/Z;
43 |
44 | % Throw away components which sum below cutoff
45 | wp_sorted = sort(wp);
46 | wp_cum = cumsum(wp_sorted);
47 | idx_cut = sum(wp_cum < cutoff);
48 | if idx_cut > 0; w_cutoff = wp_sorted(idx_cut); else; w_cutoff = 0; end
49 | wp(wp <= w_cutoff) = 0;
50 | wp = wp/sum(wp);
51 |
52 | % Then, compute mean and variance for above-cutoff components only
53 | idx = 0;
54 | for i = 1:K
55 | for j = 1:K
56 | idx = idx + 1;
57 | if wp(idx) == 0; continue; end
58 | mup(:,idx) = (mu(:,i).*sigma(j)^2 + mu(:,j).*sigma(i)^2)./(sigma(i)^2+sigma(j)^2);
59 | sigmap(idx) = sigma(i)*sigma(j)/sqrt(sigma(i)^2+sigma(j)^2);
60 | end
61 | end
62 |
63 | otherwise
64 | error('vbmc_power:UnsupportedPower',...
65 | 'The power N should be a small positive integer. Currently supported values of N: 1 and 2.');
66 | end
67 |
68 | % Keep only nonzero components
69 | keep_idx = wp > 0;
70 | wp_keep = wp(keep_idx);
71 | wp_keep = wp_keep/sum(wp_keep);
72 |
73 | vpp.K = sum(keep_idx);
74 | vpp.mu = mup(:,keep_idx);
75 | vpp.sigma = sigmap(keep_idx);
76 | vpp.w = wp_keep;
77 | if isfield(vpp,'temperature') && ~isempty(vpp.temperature)
78 | vpp.temperature = vpp.temperature/n;
79 | else
80 | vpp.temperature = 1/n;
81 | end
82 |
83 |
84 |
--------------------------------------------------------------------------------