├── .gitignore ├── LICENSE ├── README.md ├── acq ├── acqeig_vbmc.m ├── acqf_vbmc.m ├── acqflog_vbmc.m ├── acqfsn2_vbmc.m ├── acqimiqr_vbmc.m ├── acqus_vbmc.m ├── acqviqr_vbmc.m └── acqwrapper_vbmc.m ├── docs ├── README.txt ├── vbmc-demo-2.gif ├── vbmc-demo.gif ├── vbmc-demo.png ├── vbmc2020-demo.gif └── walkthrough.md ├── ent ├── entlb_vbmc.m ├── entmc_vbmc.m └── entub_vbmc.m ├── gplite ├── LICENSE ├── README.md ├── gplite_clean.m ├── gplite_covfun.m ├── gplite_demo.m ├── gplite_fmin.m ├── gplite_hypprior.m ├── gplite_intmeanfun.m ├── gplite_meanfun.m ├── gplite_nlZ.m ├── gplite_noisefun.m ├── gplite_plot.m ├── gplite_post.m ├── gplite_pred.m ├── gplite_qpred.m ├── gplite_quad.m ├── gplite_rnd.m ├── gplite_sample.m ├── gplite_test.m ├── gplite_train.m ├── outwarp_negpow.m ├── outwarp_negpowc1.m ├── outwarp_negscaledpow.m ├── outwarp_test.m └── private │ ├── derivcheck.m │ ├── eissample_lite.m │ ├── fminfill.m │ ├── gplite_core.m │ ├── quantile1.m │ ├── slicesamplebnd.m │ └── sq_dist.m ├── install.m ├── lpostfun.m ├── misc ├── best_vbmc.m ├── boundscheck_vbmc.m ├── check_quadcoefficients_vbmc.m ├── evaloption_vbmc.m ├── fess_vbmc.m ├── finalboost_vbmc.m ├── funlogger_vbmc.m ├── get_GPTrainOptions.m ├── get_traindata_vbmc.m ├── get_vptheta.m ├── gethpd_vbmc.m ├── gplogjoint.m ├── gplogjoint_weights.m ├── gpreupdate.m ├── gpsample_vbmc.m ├── gptrain_vbmc.m ├── initdesign_vbmc.m ├── intkernel.m ├── negelcbo_vbmc.m ├── noiseshaping_vbmc.m ├── proposal_vbmc.m ├── real2int_vbmc.m ├── rescale_params.m ├── setupoptions_vbmc.m ├── setupvars_vbmc.m ├── testpdf.m ├── vbinit_vbmc.m ├── vbmc_gphyp.m ├── vpbndloss.m ├── vpbounds.m ├── vpoptimize_vbmc.m ├── vpoptimizeweights_vbmc.m ├── vpsample_vbmc.m ├── vpsieve_vbmc.m ├── vptrain2real.m ├── warp_gpandvp_vbmc.m └── warp_input_vbmc.m ├── private ├── acqhedge_vbmc.m ├── activeimportancesampling_vbmc.m ├── activesample_vbmc.m ├── recompute_lcbmax.m ├── updateK.m ├── vbmc_demo2d.m ├── vbmc_iterplot.m ├── vbmc_output.m ├── vbmc_plot2d.m ├── vbmc_termination.m └── vbmc_warmup.m ├── rosenbrock_test.m ├── shared ├── kde1d.m ├── msmoothboxlogpdf.m ├── msmoothboxpdf.m ├── msmoothboxrnd.m ├── msplinetrapezlogpdf.m ├── msplinetrapezpdf.m ├── msplinetrapezrnd.m ├── mtrapezlogpdf.m ├── mtrapezpdf.m ├── mtrapezrnd.m ├── munifboxlogpdf.m ├── munifboxpdf.m ├── munifboxrnd.m ├── mvnkl.m ├── qtrapz.m ├── warpvars_vbmc.m └── warpvars_vbmc_test.m ├── test ├── runtest_vbmc.m └── test_pdfs_vbmc.m ├── utils ├── cmaes_modded.m ├── cornerplot.m ├── covcma.m ├── eissample_lite.m ├── evalbool.m ├── fastkmeans.m ├── fminadam.m ├── fminfill.m ├── ibslike.m ├── kde2d.m ├── malasample_vbmc.m ├── psycho_gen.m ├── quantile1.m ├── slicelite.m ├── slicesample_vbmc.m ├── slicesamplebnd.m ├── softbndloss.m ├── sq_dist.m └── unscent_warp.m ├── vbmc.m ├── vbmc_diagnostics.m ├── vbmc_examples.m ├── vbmc_isavp.m ├── vbmc_kldiv.m ├── vbmc_mode.m ├── vbmc_moments.m ├── vbmc_mtv.m ├── vbmc_pdf.m ├── vbmc_plot.m ├── vbmc_power.m └── vbmc_rnd.m /.gitignore: -------------------------------------------------------------------------------- 1 | *.mexw64 2 | *.mexa64 3 | *.mexmaci64 4 | *.asv 5 | *.m~ 6 | .DS_Store 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, Luigi Acerbi 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /acq/acqeig_vbmc.m: -------------------------------------------------------------------------------- 1 | function acq = acqeig_vbmc(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot) 2 | %ACQEIG_VBMC Expected information gain (EIG) acquisition function. 3 | 4 | if isempty(Xs) 5 | % Return acquisition function info struct 6 | acq.compute_varlogjoint = true; 7 | return; 8 | end 9 | 10 | % Xs is in *transformed* coordinates 11 | Ns = numel(gp.post); 12 | 13 | % Estimate observation noise at test points from nearest neighbor 14 | [~,pos] = min(sq_dist(bsxfun(@rdivide,Xs,optimState.gplengthscale),gp.X_rescaled),[],2); 15 | sn2 = gp.sn2new(pos); 16 | 17 | intK = intkernel(Xs,vp,gp,0); 18 | ys2 = fs2 + sn2; % Predictive variance at test points 19 | 20 | rho2 = bsxfun(@rdivide,intK.^2,optimState.varlogjoint_samples.*ys2); 21 | acq = 0.5*sum(log(max(realmin,1 - min(1,rho2))),2)/Ns; 22 | 23 | end 24 | 25 | 26 | %SQ_DIST Compute matrix of all pairwise squared distances between two sets 27 | % of vectors, stored in the columns of the two matrices, a (of size n-by-D) 28 | % and b (of size m-by-D). 29 | function C = sq_dist(a,b) 30 | 31 | n = size(a,1); 32 | m = size(b,1); 33 | mu = (m/(n+m))*mean(b,1) + (n/(n+m))*mean(a,1); 34 | a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu); 35 | C = bsxfun(@plus,sum(a.*a,2),bsxfun(@minus,sum(b.*b,2)',2*a*b')); 36 | C = max(C,0); 37 | 38 | end -------------------------------------------------------------------------------- /acq/acqf_vbmc.m: -------------------------------------------------------------------------------- 1 | function acq = acqf_vbmc(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot) 2 | %ACQF_VBMC Acquisition fcn. for prospective uncertainty search. 3 | 4 | % Xs is in *transformed* coordinates 5 | 6 | % Probability density of variational posterior at test points 7 | p = max(vbmc_pdf(vp,Xs,0),realmin); 8 | 9 | % Prospective uncertainty search 10 | z = optimState.ymax; 11 | acq = -vtot .* exp(fbar-z) .* p; 12 | 13 | end -------------------------------------------------------------------------------- /acq/acqflog_vbmc.m: -------------------------------------------------------------------------------- 1 | function acq = acqflog_vbmc(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot) 2 | %ACQFLOG_VBMC Acquisition fcn. for prospective uncertainty search (log-valued). 3 | 4 | % Xs is in *transformed* coordinates 5 | 6 | if isempty(Xs) 7 | % Return acquisition function info struct 8 | acq.compute_varlogjoint = false; 9 | acq.log_flag = true; 10 | return; 11 | end 12 | 13 | % Probability density of variational posterior at test points 14 | p = max(vbmc_pdf(vp,Xs,0),realmin); 15 | 16 | % Log prospective uncertainty search 17 | z = optimState.ymax; 18 | acq = -(log(vtot) + fbar-z + log(p)); 19 | 20 | end -------------------------------------------------------------------------------- /acq/acqfsn2_vbmc.m: -------------------------------------------------------------------------------- 1 | function acq = acqfsn2_vbmc(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot) 2 | %ACQFSN2_VBMC Acquisition fcn. for noisy prospective uncertainty search. 3 | 4 | % Xs is in *transformed* coordinates 5 | 6 | % Probability density of variational posterior at test points 7 | p = max(vbmc_pdf(vp,Xs,0),realmin); 8 | 9 | % Estimate observation noise at test points from nearest neighbor 10 | [~,pos] = min(sq_dist(bsxfun(@rdivide,Xs,optimState.gplengthscale),gp.X_rescaled),[],2); 11 | sn2 = gp.sn2new(pos); 12 | 13 | z = optimState.ymax; 14 | 15 | % Prospective uncertainty search corrected for noisy observations 16 | acq = -vtot.*(1 - sn2./(vtot+sn2)) .* exp(fbar-z) .* p; 17 | 18 | end 19 | 20 | 21 | %SQ_DIST Compute matrix of all pairwise squared distances between two sets 22 | % of vectors, stored in the columns of the two matrices, a (of size n-by-D) 23 | % and b (of size m-by-D). 24 | function C = sq_dist(a,b) 25 | 26 | n = size(a,1); 27 | m = size(b,1); 28 | mu = (m/(n+m))*mean(b,1) + (n/(n+m))*mean(a,1); 29 | a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu); 30 | C = bsxfun(@plus,sum(a.*a,2),bsxfun(@minus,sum(b.*b,2)',2*a*b')); 31 | C = max(C,0); 32 | 33 | end -------------------------------------------------------------------------------- /acq/acqimiqr_vbmc.m: -------------------------------------------------------------------------------- 1 | function acq = acqimiqr_vbmc(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot) 2 | %VBMC_ACQIMIQR Integrated median interquantile range acquisition function. 3 | 4 | u = 0.6745; % norminv(0.75) 5 | 6 | if isempty(Xs) 7 | % Return acquisition function info struct 8 | acq.importance_sampling = true; 9 | acq.importance_sampling_vp = false; 10 | acq.log_flag = true; 11 | return; 12 | elseif ischar(Xs) 13 | switch lower(Xs) 14 | case 'islogf1' 15 | % Importance sampling log base proposal (shared part) 16 | acq = fmu; 17 | case 'islogf2' 18 | % Importance sampling log base proposal (added part) 19 | % (Full log base proposal is fixed + added) 20 | fs = sqrt(fs2); 21 | acq = u*fs + log1p(-exp(-2*u*fs)); 22 | case 'islogf' 23 | % Importance sampling log base proposal distribution 24 | fs = sqrt(fs2); 25 | acq = fmu + u*fs + log1p(-exp(-2*u*fs)); 26 | end 27 | return; 28 | end 29 | 30 | % Different importance sampling inputs for different GP hyperparameters? 31 | multipleinputs_flag = size(optimState.ActiveImportanceSampling.Xa,3) > 1; 32 | 33 | % Xs is in *transformed* coordinates 34 | 35 | [Nx,D] = size(Xs); 36 | Ns = size(fmu,2); 37 | Na = size(optimState.ActiveImportanceSampling.Xa,1); 38 | 39 | % Estimate observation noise at test points from nearest neighbor 40 | [~,pos] = min(sq_dist(bsxfun(@rdivide,Xs,optimState.gplengthscale),gp.X_rescaled),[],2); 41 | sn2 = gp.sn2new(pos); 42 | % sn2 = min(sn2,1e4); 43 | ys2 = fs2 + sn2; % Predictive variance at test points 44 | 45 | if multipleinputs_flag 46 | Xa = zeros(Na,D); 47 | else 48 | Xa = optimState.ActiveImportanceSampling.Xa; 49 | end 50 | acq = zeros(Nx,Ns); 51 | 52 | %% Compute integrated acquisition function via importance sampling 53 | 54 | for s = 1:Ns 55 | hyp = gp.post(s).hyp; 56 | L = gp.post(s).L; 57 | Lchol = gp.post(s).Lchol; 58 | sn2_eff = 1/gp.post(s).sW(1)^2; 59 | 60 | if multipleinputs_flag 61 | Xa(:,:) = optimState.ActiveImportanceSampling.Xa(:,:,s); 62 | end 63 | 64 | % Compute cross-kernel matrix Ks_mat 65 | if gp.covfun(1) == 1 % Hard-coded SE-ard for speed 66 | ell = exp(hyp(1:D))'; 67 | sf2 = exp(2*hyp(D+1)); 68 | Ks_mat = sq_dist(gp.X*diag(1./ell),Xs*diag(1./ell)); 69 | Ks_mat = sf2 * exp(-Ks_mat/2); 70 | 71 | Ka_mat = sq_dist(Xa*diag(1./ell),Xs*diag(1./ell)); 72 | Ka_mat = sf2 * exp(-Ka_mat/2); 73 | 74 | %Kax_mat = sq_dist(Xa*diag(1./ell),gp.X*diag(1./ell)); 75 | %Kax_mat = sf2 * exp(-Kax_mat/2); 76 | Kax_mat(:,:) = optimState.ActiveImportanceSampling.Kax_mat(:,:,s); 77 | else 78 | error('Other covariance functions not supported yet.'); 79 | end 80 | 81 | if Lchol 82 | C = Ka_mat' - Ks_mat'*(L\(L'\Kax_mat'))/sn2_eff; 83 | else 84 | C = Ka_mat' + Ks_mat'*(L*Kax_mat'); 85 | end 86 | 87 | tau2 = bsxfun(@rdivide,C.^2,ys2(:,s)); 88 | s_pred = sqrt(max(bsxfun(@minus,optimState.ActiveImportanceSampling.fs2a(:,s)',tau2),0)); 89 | 90 | lnw = optimState.ActiveImportanceSampling.lnw(s,:); 91 | 92 | zz = bsxfun(@plus,lnw,u*s_pred + log1p(-exp(-2*u*s_pred))); 93 | lnmax = max(zz,[],2); 94 | acq(:,s) = log(sum(exp(bsxfun(@minus,zz,lnmax)),2)) + lnmax; 95 | end 96 | 97 | if Ns > 1 98 | M = max(acq,[],2); 99 | acq = M + log(sum(exp(bsxfun(@minus,acq,M)),2)/Ns); 100 | end 101 | 102 | end 103 | 104 | 105 | %SQ_DIST Compute matrix of all pairwise squared distances between two sets 106 | % of vectors, stored in the columns of the two matrices, a (of size n-by-D) 107 | % and b (of size m-by-D). 108 | function C = sq_dist(a,b) 109 | 110 | n = size(a,1); 111 | m = size(b,1); 112 | mu = (m/(n+m))*mean(b,1) + (n/(n+m))*mean(a,1); 113 | a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu); 114 | C = bsxfun(@plus,sum(a.*a,2),bsxfun(@minus,sum(b.*b,2)',2*a*b')); 115 | C = max(C,0); 116 | 117 | end -------------------------------------------------------------------------------- /acq/acqus_vbmc.m: -------------------------------------------------------------------------------- 1 | function acq = acqus_vbmc(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot) 2 | %ACQUS_VBMC Acquisition fcn via vanilla uncertainty sampling. 3 | 4 | % Xs is in *transformed* coordinates 5 | 6 | % Probability density of variational posterior at test points 7 | p = max(vbmc_pdf(vp,Xs,0),realmin); 8 | 9 | % Uncertainty search 10 | acq = -vtot .* p.^2; 11 | 12 | end -------------------------------------------------------------------------------- /acq/acqviqr_vbmc.m: -------------------------------------------------------------------------------- 1 | function acq = acqviqr_vbmc(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot) 2 | %VBMC_ACQVIQR Variational integrated median interquantile range acquisition function. 3 | 4 | u = 0.6745; % norminv(0.75) 5 | 6 | if isempty(Xs) 7 | % Return acquisition function info struct 8 | acq.importance_sampling = true; 9 | acq.importance_sampling_vp = false; 10 | acq.variational_importance_sampling = true; 11 | acq.log_flag = true; 12 | return; 13 | elseif ischar(Xs) 14 | switch lower(Xs) 15 | case 'islogf1' 16 | % Importance sampling log base proposal (shared part) 17 | %Ns = size(fs2,2); 18 | %acq = repmat(vp,[1,Ns]); 19 | acq = zeros(size(fs2)); 20 | case 'islogf2' 21 | % Importance sampling log base proposal (added part) 22 | % (Full log base proposal is fixed + added) 23 | fs = sqrt(fs2); 24 | acq = u*fs + log1p(-exp(-2*u*fs)); 25 | case 'islogf' 26 | % Importance sampling log base proposal distribution 27 | fs = sqrt(fs2); 28 | acq = vp + u*fs + log1p(-exp(-2*u*fs)); 29 | end 30 | return; 31 | end 32 | 33 | % Xs is in *transformed* coordinates 34 | 35 | [Nx,D] = size(Xs); 36 | Ns = size(fmu,2); 37 | 38 | % Estimate observation noise at test points from nearest neighbor 39 | [~,pos] = min(sq_dist(bsxfun(@rdivide,Xs,optimState.gplengthscale),gp.X_rescaled),[],2); 40 | sn2 = gp.sn2new(pos); 41 | % sn2 = min(sn2,1e4); 42 | ys2 = fs2 + sn2; % Predictive variance at test points 43 | 44 | Xa = optimState.ActiveImportanceSampling.Xa; 45 | acq = zeros(Nx,Ns); 46 | 47 | %% Compute integrated acquisition function via importance sampling 48 | 49 | % Integrated mean function being used? 50 | integrated_meanfun = isfield(gp,'intmeanfun') && gp.intmeanfun > 0; 51 | 52 | if integrated_meanfun 53 | % Evaluate basis functions 54 | plus_idx = gp.intmeanfun_var > 0; 55 | Ha = optimState.ActiveImportanceSampling.Ha; 56 | Hs = gplite_intmeanfun(Xs,gp.intmeanfun); 57 | Hs = Hs(plus_idx,:); 58 | end 59 | 60 | for s = 1:Ns 61 | hyp = gp.post(s).hyp; 62 | %L = gp.post(s).L; 63 | Lchol = gp.post(s).Lchol; 64 | %sn2_eff = 1/gp.post(s).sW(1)^2; 65 | 66 | % Compute cross-kernel matrix Ks_mat 67 | if gp.covfun(1) == 1 % Hard-coded SE-ard for speed 68 | ell = exp(hyp(1:D))'; 69 | sf2 = exp(2*hyp(D+1)); 70 | Xs_ell = bsxfun(@rdivide,Xs,ell); 71 | 72 | Ks_mat = sq_dist(bsxfun(@rdivide,gp.X,ell),Xs_ell); 73 | Ks_mat = sf2 * exp(-Ks_mat/2); 74 | 75 | Ka_mat = sq_dist(Xs_ell,bsxfun(@rdivide,Xa,ell)); 76 | Ka_mat = sf2 * exp(-Ka_mat/2); 77 | 78 | %Kax_mat = sq_dist(Xa*diag(1./ell),gp.X*diag(1./ell)); 79 | %Kax_mat = sf2 * exp(-Kax_mat/2); 80 | %Kax_mat(:,:) = optimState.ActiveImportanceSampling.Kax_mat(:,:,s); 81 | Ctmp_mat(:,:) = optimState.ActiveImportanceSampling.Ctmp_mat(:,:,s); 82 | else 83 | error('Other covariance functions not supported yet.'); 84 | end 85 | 86 | if Lchol 87 | % C = Ka_mat - Ks_mat'*(L\(L'\Kax_mat'))/sn2_eff; 88 | C = Ka_mat - Ks_mat'*Ctmp_mat; 89 | else 90 | % C = Ka_mat + Ks_mat'*(L*Kax_mat'); 91 | C = Ka_mat + Ks_mat'*Ctmp_mat; 92 | end 93 | 94 | if integrated_meanfun 95 | HKinv = gp.post(s).intmean.HKinv(plus_idx,:); 96 | Tplusinv = gp.post(s).intmean.Tplusinv; 97 | C = C + (Hs' - Ks_mat'*HKinv')*(Tplusinv*Ha) + (Ks_mat'*HKinv' - Hs')*(Tplusinv*(HKinv*Kax_mat')); 98 | end 99 | 100 | tau2 = bsxfun(@rdivide,C.^2,ys2(:,s)); 101 | s_pred = sqrt(max(bsxfun(@minus,optimState.ActiveImportanceSampling.fs2a(:,s)',tau2),0)); 102 | 103 | % lnw is zeros (VIQR uses simple Monte Carlo, no importance sampling) 104 | % lnw = optimState.ActiveImportanceSampling.lnw(s,:); 105 | % zz = bsxfun(@plus,lnw,u*s_pred + log1p(-exp(-2*u*s_pred))); 106 | zz = u*s_pred + log1p(-exp(-2*u*s_pred)); 107 | lnmax = max(zz,[],2); 108 | acq(:,s) = log(sum(exp(bsxfun(@minus,zz,lnmax)),2)) + lnmax; 109 | end 110 | 111 | if Ns > 1 112 | M = max(acq,[],2); 113 | acq = M + log(sum(exp(bsxfun(@minus,acq,M)),2)/Ns); 114 | end 115 | 116 | end 117 | 118 | 119 | %SQ_DIST Compute matrix of all pairwise squared distances between two sets 120 | % of vectors, stored in the columns of the two matrices, a (of size n-by-D) 121 | % and b (of size m-by-D). 122 | function C = sq_dist(a,b) 123 | 124 | n = size(a,1); 125 | m = size(b,1); 126 | mu = (m/(n+m))*mean(b,1) + (n/(n+m))*mean(a,1); 127 | a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu); 128 | C = bsxfun(@plus,sum(a.*a,2),bsxfun(@minus,sum(b.*b,2)',2*a*b')); 129 | C = max(C,0); 130 | 131 | end -------------------------------------------------------------------------------- /acq/acqwrapper_vbmc.m: -------------------------------------------------------------------------------- 1 | function acq = acqwrapper_vbmc(Xs,vp,gp,optimState,transpose_flag,acqFun,acqInfo) 2 | %ACQWRAPPER_VBMC Wrapper for all acquisition functions. 3 | 4 | % Transposed input (useful for CMAES) 5 | if transpose_flag; Xs = Xs'; end 6 | 7 | % Map integer inputs 8 | Xs = real2int_vbmc(Xs,vp.trinfo,optimState.integervars); 9 | 10 | %% Compute GP posterior predictive mean and variance 11 | 12 | if isfield(vp,'delta') && ~isempty(vp.delta) && any(vp.delta > 0) 13 | % Quadrature mean and variance for each hyperparameter sample 14 | [fmu,fs2] = gplite_quad(gp,Xs,vp.delta',1); 15 | else 16 | % GP mean and variance for each hyperparameter sample 17 | [~,~,fmu,fs2] = gplite_pred(gp,Xs,[],[],1,0); 18 | end 19 | 20 | % Compute total variance 21 | Ns = size(fmu,2); 22 | fbar = sum(fmu,2)/Ns; % Mean across samples 23 | vbar = sum(fs2,2)/Ns; % Average variance across samples 24 | if Ns > 1 25 | vf = sum(bsxfun(@minus,fmu,fbar).^2,2)/(Ns-1); 26 | else 27 | vf = 0; 28 | end % Sample variance 29 | vtot = vf + vbar; % Total variance 30 | 31 | %% Compute acquisition function 32 | acq = acqFun(Xs,vp,gp,optimState,fmu,fs2,fbar,vtot); 33 | 34 | %% Regularization: penalize points where GP uncertainty is below threshold 35 | if optimState.VarianceRegularizedAcqFcn 36 | TolVar = optimState.TolGPVar; % Try not to go below this variance 37 | idx = vtot < TolVar; 38 | 39 | if any(idx) 40 | if isfield(acqInfo,'log_flag') && acqInfo.log_flag 41 | acq(idx) = acq(idx) + TolVar./vtot(idx) - 1; 42 | else 43 | acq(idx) = acq(idx) .* exp(-(TolVar./vtot(idx)-1)); 44 | end 45 | end 46 | end 47 | acq = max(acq,-realmax); 48 | 49 | %% Hard bound checking: discard points too close to bounds 50 | X_orig = warpvars_vbmc(Xs,'i',vp.trinfo); 51 | idx = any(bsxfun(@lt,X_orig,optimState.LBeps_orig),2) | any(bsxfun(@gt,X_orig,optimState.UBeps_orig),2); 52 | acq(idx) = Inf; 53 | 54 | % Transposed output 55 | if transpose_flag; acq = acq'; end 56 | 57 | end -------------------------------------------------------------------------------- /docs/README.txt: -------------------------------------------------------------------------------- 1 | Variational Bayesian Monte Carlo (VBMC) documentation 2 | ============================================================================================== 3 | 4 | For a description of the usage of VBMC, type 5 | 6 | > help vbmc 7 | 8 | in the MATLAB shell. 9 | 10 | You can also look up the 'vbmc_examples.m' script, for a tutorial with commented examples. 11 | 12 | For any other question, clarification, or troubleshooting, check out: 13 | 14 | - the VMBC page: https://github.com/acerbilab/vbmc 15 | - the online FAQ: https://github.com/acerbilab/vbmc/wiki 16 | 17 | ============================================================================================== 18 | 19 | The algorithm is described in the following references: 20 | 21 | 1) Acerbi, L. (2018). "Variational Bayesian Monte Carlo". In Advances in Neural Information 22 | Processing Systems 31 (NeurIPS 2018), pp. 8213-8223. 23 | 2) Acerbi, L. (2020). "Variational Bayesian Monte Carlo with Noisy Likelihoods". In Advances 24 | in Neural Information Processing Systems 33 (NeurIPS 2020), pp. 8211-8222. 25 | -------------------------------------------------------------------------------- /docs/vbmc-demo-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/docs/vbmc-demo-2.gif -------------------------------------------------------------------------------- /docs/vbmc-demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/docs/vbmc-demo.gif -------------------------------------------------------------------------------- /docs/vbmc-demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/docs/vbmc-demo.png -------------------------------------------------------------------------------- /docs/vbmc2020-demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/docs/vbmc2020-demo.gif -------------------------------------------------------------------------------- /ent/entmc_vbmc.m: -------------------------------------------------------------------------------- 1 | function [H,dH] = entmc_vbmc(vp,Ns,grad_flags,jacobian_flag) 2 | %ENTMC_VBMC Monte Carlo estimate of entropy of variational posterior 3 | 4 | if nargin < 2 || isempty(Ns); Ns = 10; end 5 | % Check if gradient computation is required 6 | if nargout < 2 % No 2nd output, no gradients 7 | grad_flags = false; 8 | elseif nargin < 3 || isempty(grad_flags) % By default compute all gradients 9 | grad_flags = true; 10 | end 11 | if isscalar(grad_flags); grad_flags = ones(1,4)*grad_flags; end 12 | 13 | % By default assume variational parameters were transformed (before the call) 14 | if nargin < 4 || isempty(jacobian_flag); jacobian_flag = true; end 15 | 16 | D = vp.D; % Number of dimensions 17 | K = vp.K; % Number of components 18 | mu(:,:) = vp.mu; 19 | sigma(1,:) = vp.sigma; 20 | lambda(:,1) = vp.lambda(:); 21 | w(1,:) = vp.w; 22 | 23 | % Check which gradients are computed 24 | if grad_flags(1); mu_grad = zeros(D,K); else, mu_grad = []; end 25 | if grad_flags(2); sigma_grad = zeros(K,1); else, sigma_grad = []; end 26 | if grad_flags(3); lambda_grad = zeros(D,1); else, lambda_grad = []; end 27 | if grad_flags(4); w_grad = zeros(K,1); else, w_grad = []; end 28 | 29 | % Reshape in 4-D to allow massive vectorization 30 | mu_4 = zeros(D,1,1,K); 31 | mu_4(:,1,1,:) = reshape(mu,[D,1,1,K]); 32 | sigma_4(1,1,1,:) = sigma; 33 | w_4(1,1,1,:) = w; 34 | 35 | sigmalambda = bsxfun(@times, sigma_4, lambda); 36 | nconst = 1/(2*pi)^(D/2)/prod(lambda); 37 | 38 | lambda_t = vp.lambda(:)'; % LAMBDA is a row vector 39 | mu_t(:,:) = vp.mu'; % MU transposed 40 | nf = 1/(2*pi)^(D/2)/prod(lambda); % Common normalization factor 41 | 42 | H = 0; 43 | 44 | % Make sure Ns is even 45 | Ns = ceil(Ns/2)*2; 46 | epsilon = zeros(D,1,Ns); 47 | 48 | % Loop over mixture components for generating samples 49 | for j = 1:K 50 | 51 | % Draw Monte Carlo samples from the j-th component 52 | % epsilon = randn(D,1,Ns); 53 | epsilon(:,1,1:Ns/2) = randn(D,1,Ns/2); % Antithetic sampling 54 | epsilon(:,1,Ns/2+1:end) = -epsilon(:,1,1:Ns/2); 55 | xi = bsxfun(@plus, bsxfun(@times, bsxfun(@times, epsilon, lambda), sigma(j)), mu_4(:,1,1,j)); 56 | 57 | Xs = reshape(xi,[D,Ns])'; 58 | 59 | % Compute pdf -- this block is equivalent to: ys = vbmc_pdf(vp,Xs,0); 60 | ys = zeros(Ns,1); 61 | for k = 1:K 62 | d2 = sum(bsxfun(@rdivide,bsxfun(@minus,Xs,mu_t(k,:)),sigma(k)*lambda_t).^2,2); 63 | nn = w(k)*nf/sigma(k)^D*exp(-0.5*d2); 64 | ys = ys + nn; 65 | end 66 | 67 | H = H - w(j)*sum(log(ys))/Ns; 68 | 69 | % Compute gradient via reparameterization trick 70 | if any(grad_flags) 71 | % Full mixture (for sample from the j-th component) 72 | norm_jl = bsxfun(@times, nconst./(sigma_4.^D), exp(-0.5*sum(bsxfun(@rdivide, bsxfun(@minus, xi, mu_4), sigmalambda).^2,1))); 73 | q_j = sum(bsxfun(@times,w_4,norm_jl),4); 74 | 75 | % Compute sum for gradient wrt mu 76 | % lsum = sum(bsxfun(@times,bsxfun(@rdivide, bsxfun(@minus, xi, mu_4), sigmalambda.^2), norm_jl),4); 77 | lsum = sum(bsxfun(@times, ... 78 | bsxfun(@rdivide, bsxfun(@minus, xi, mu_4), sigmalambda.^2),... 79 | bsxfun(@times,norm_jl,w_4)),4); 80 | 81 | if grad_flags(1) 82 | mu_grad(:,j) = w(j)*sum(bsxfun(@rdivide, lsum, q_j),3) / Ns; 83 | end 84 | 85 | if grad_flags(2) 86 | % Compute sum for gradient wrt sigma 87 | isum = sum(bsxfun(@times,lsum,bsxfun(@times, epsilon, lambda)),1); 88 | sigma_grad(j) = w(j) * sum(bsxfun(@rdivide, isum, q_j),3) / Ns; 89 | end 90 | 91 | if grad_flags(3) 92 | % Should be dividing by LAMBDA, see below 93 | lambda_grad = lambda_grad + sum(bsxfun(@times, lsum, bsxfun(@rdivide, w(j)*sigma(j)*epsilon,q_j)),3) / Ns; 94 | end 95 | 96 | if grad_flags(4) 97 | w_grad(j) = w_grad(j) - sum(log(q_j))/Ns; 98 | % w_grad(:) = w_grad(:) - w(j)*sum(norm_jl(1,1,:,j)./q_j)/Ns; 99 | % Fix by Chengkun Li 100 | w_grad(:) = w_grad(:) - w(j)*squeeze(sum(norm_jl(1,1,:,:)./q_j,3))/Ns; 101 | end 102 | 103 | end 104 | end 105 | 106 | if grad_flags(3) 107 | lambda_grad = bsxfun(@times,lambda_grad,lambda); % Reparameterization 108 | end 109 | 110 | if nargout > 1 111 | % Correct for standard log reparameterization of SIGMA 112 | if jacobian_flag && grad_flags(2) 113 | sigma_grad = bsxfun(@times,sigma_grad, sigma(:)); 114 | end 115 | % Correct if NOT using standard log reparameterization of LAMBDA 116 | if ~jacobian_flag && grad_flags(3) 117 | lambda_grad = bsxfun(@rdivide,lambda_grad, lambda(:)); 118 | end 119 | % Correct for standard softmax reparameterization of W 120 | if jacobian_flag && grad_flags(4) 121 | eta_sum = sum(exp(vp.eta)); 122 | J_w = bsxfun(@times,-exp(vp.eta)',exp(vp.eta)/eta_sum^2) + diag(exp(vp.eta)/eta_sum); 123 | w_grad = J_w*w_grad; 124 | end 125 | dH = [mu_grad(:); sigma_grad(:); lambda_grad(:); w_grad(:)]; 126 | end 127 | 128 | end -------------------------------------------------------------------------------- /ent/entub_vbmc.m: -------------------------------------------------------------------------------- 1 | function [H,dH] = entub_vbmc(vp,grad_flags,jacobian_flag) 2 | %ENTUB_VBMC Entropy upper bound for variational posterior 3 | 4 | % Uses entropy upper bound of multivariate normal approximation 5 | 6 | % Check if gradient computation is required 7 | if nargout < 2 % No 2nd output, no gradients 8 | grad_flags = false; 9 | elseif nargin < 2 || isempty(grad_flags) % By default compute all gradients 10 | grad_flags = true; 11 | end 12 | if isscalar(grad_flags); grad_flags = ones(1,4)*grad_flags; end 13 | 14 | % By default assume variational parameters were transformed (before the call) 15 | if nargin < 3 || isempty(jacobian_flag); jacobian_flag = true; end 16 | 17 | D = vp.D; % Number of dimensions 18 | K = vp.K; % Number of components 19 | mu(:,:) = vp.mu; 20 | sigma(1,:) = vp.sigma; 21 | lambda(:,1) = vp.lambda(:); 22 | w(1,:) = vp.w; 23 | 24 | % Check which gradients are computed 25 | if grad_flags(1); mu_grad = zeros(D,K); dS_mu = zeros(D,D,K); else, mu_grad = []; end 26 | if grad_flags(2); sigma_grad = zeros(K,1); else, sigma_grad = []; end 27 | if grad_flags(3); lambda_grad = zeros(D,1); else, lambda_grad = []; end 28 | if grad_flags(4); w_grad = zeros(K,1); dS_w = zeros(D,D,K); else, w_grad = []; end 29 | 30 | if K == 1 31 | % Entropy of single component, uses exact expression 32 | H = 0.5*D*(1 + log(2*pi)) + D*sum(log(sigma)) + sum(log(lambda)); 33 | 34 | if grad_flags(2) 35 | sigma_grad(:) = D./sigma(:); 36 | end 37 | 38 | if grad_flags(3) 39 | % Should be dividing by LAMBDA, see below 40 | lambda_grad(:) = ones(D,1); % 1./lambda(:); 41 | end 42 | 43 | if grad_flags(4) 44 | w_grad = 0; 45 | end 46 | else 47 | 48 | Mu = sum(bsxfun(@times,vp.w,vp.mu),2); 49 | Sigma = zeros(D,D); 50 | delta_mu = bsxfun(@minus,mu,Mu); 51 | for k = 1:K 52 | S_k = diag((lambda*sigma(k)).^2) + delta_mu(:,k)*delta_mu(:,k)'; 53 | Sigma = Sigma + w(k)*S_k; 54 | if grad_flags(4); dS_w(:,:,k) = S_k; end 55 | end 56 | L = chol(Sigma); 57 | 58 | H = 0.5*D*(log(2*pi) + 1) + sum(log(diag(L))); 59 | 60 | if any(grad_flags) 61 | invK = L\(L'\eye(D)); 62 | 63 | if grad_flags(1) 64 | for k = 1:K 65 | mu_grad((1:D)+(k-1)*D) = 0.5*w(k).*(sum(bsxfun(@times,invK,delta_mu(:,k)'),2) + sum(bsxfun(@times,invK,delta_mu(:,k)),1)'); 66 | end 67 | end 68 | 69 | if grad_flags(2) 70 | Q = sum(sum(invK.*diag(lambda.^2))); 71 | sigma_grad(:) = Q*(w.*sigma); 72 | end 73 | 74 | if grad_flags(3) 75 | lambda_grad(:) = diag(invK).*lambda.^2*sum(w.*(sigma.^2)); 76 | end 77 | 78 | if grad_flags(4) 79 | for k = 1:K 80 | w_grad(k) = 0.5*sum(sum(invK.*dS_w(:,:,k))); 81 | end 82 | end 83 | end 84 | end 85 | 86 | if nargout > 1 87 | % Correct for standard log reparameterization of SIGMA 88 | if jacobian_flag && grad_flags(2) 89 | sigma_grad = bsxfun(@times,sigma_grad, sigma(:)); 90 | end 91 | % Correct if NOT using standard log reparameterization of LAMBDA 92 | if ~jacobian_flag && grad_flags(3) 93 | lambda_grad = bsxfun(@rdivide,lambda_grad, lambda(:)); 94 | end 95 | % Correct for standard softmax reparameterization of W 96 | if jacobian_flag && grad_flags(4) 97 | eta_sum = sum(exp(vp.eta)); 98 | J_w = bsxfun(@times,-exp(vp.eta)',exp(vp.eta)/eta_sum^2) + diag(exp(vp.eta)/eta_sum); 99 | w_grad = J_w*w_grad; 100 | end 101 | dH = [mu_grad(:); sigma_grad(:); lambda_grad(:); w_grad(:)]; 102 | end 103 | 104 | end -------------------------------------------------------------------------------- /gplite/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Luigi Acerbi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /gplite/README.md: -------------------------------------------------------------------------------- 1 | # gplite 2 | Lite Gaussian process regression toolbox 3 | -------------------------------------------------------------------------------- /gplite/gplite_clean.m: -------------------------------------------------------------------------------- 1 | function gp = gplite_clean(gp) 2 | %GPLITE_CLEAN Remove auxiliary info from lite GP struct (less memory usage). 3 | % GP = GPLITE_CLEAN(GP) removes auxiliary computational structs from 4 | % the GP. These can be reconstructed via a call to GPLITE_POST. 5 | % 6 | % See also GPLITE_POST. 7 | 8 | if ~isempty(gp) && isfield(gp,'post') 9 | copyfields = {'hyp'}; 10 | emptyfields = {'alpha','sW','L','sn2_mult','Lchol'}; 11 | checkfields = {'intmean'}; 12 | for ff = copyfields; post0.(ff{:}) = []; end 13 | for ff = emptyfields; post0.(ff{:}) = []; end 14 | for ff = checkfields 15 | if isfield(gp.post(1),ff{:}); post0.(ff{:}) = []; end 16 | end 17 | 18 | for iG = 1:numel(gp) 19 | Ns = numel(gp(iG).post); 20 | postnew = post0; 21 | for iS = 1:Ns 22 | post_tmp = post0; 23 | for ff = copyfields 24 | post_tmp.(ff{:}) = gp(iG).post(iS).(ff{:}); 25 | end 26 | postnew(iS) = post_tmp; 27 | end 28 | gp(iG).post = postnew; 29 | end 30 | end -------------------------------------------------------------------------------- /gplite/gplite_covfun.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/gplite/gplite_covfun.m -------------------------------------------------------------------------------- /gplite/gplite_demo.m: -------------------------------------------------------------------------------- 1 | %GPLITE_DEMO Demo script with example usage for the GPLITE toolbox. 2 | 3 | % Create example data in 1D 4 | N = 31; 5 | X = linspace(-5,5,N)'; 6 | s2 = 0.00*0.1*exp(0.5*X); 7 | y = sin(X) + sqrt(s2).*randn(size(X)); 8 | y(y<0) = -abs(3*y(y<0)).^2; 9 | s2 = []; 10 | 11 | %idx = N+1:N+3; 12 | %X(idx) = linspace(6,7,numel(idx))'; 13 | %s2(idx) = 1e-4; 14 | %y(idx(randperm(numel(idx)))) = -linspace(1000,1001,numel(idx))'; 15 | 16 | hyp0 = []; % Starting hyperparameter vector for optimization 17 | Ns = 10; % Number of hyperparameter samples 18 | covfun = [3 3]; % GP covariance function 19 | meanfun = 4; % GP mean function 20 | noisefun = [1 0 0]; % Constant plus user-provided noise 21 | hprior = []; % Prior over hyperparameters 22 | options = []; % Additional options 23 | 24 | % Output warping function 25 | outwarpfun = @outwarp_negpow; 26 | %outwarpfun = []; 27 | options.OutwarpFun = outwarpfun; 28 | 29 | % Set prior over noise hyperparameters 30 | gp = gplite_post([],X,y,covfun,meanfun,noisefun,s2,[],outwarpfun); 31 | hprior = gplite_hypprior(gp); 32 | 33 | hprior.mu(gp.Ncov+1) = log(1e-3); 34 | hprior.sigma(gp.Ncov+1) = 0.5; 35 | 36 | if gp.Nnoise > 1 37 | hprior.LB(gp.Ncov+2) = log(5); 38 | hprior.mu(gp.Ncov+2) = log(10); 39 | hprior.sigma(gp.Ncov+2) = 0.01; 40 | 41 | hprior.mu(gp.Ncov+3) = log(0.3); 42 | hprior.sigma(gp.Ncov+3) = 0.01; 43 | hprior.df(gp.Ncov+3) = Inf; 44 | end 45 | 46 | if ~isempty(outwarpfun) 47 | hprior.mu(gp.Ncov+gp.Nnoise+gp.Nmean+2) = 0; 48 | hprior.sigma(gp.Ncov+gp.Nnoise+gp.Nmean+2) = 1; 49 | hprior.mu(gp.Ncov+gp.Nnoise+gp.Nmean+3) = 0; 50 | hprior.sigma(gp.Ncov+gp.Nnoise+gp.Nmean+3) = 1; 51 | end 52 | 53 | % Train GP on data 54 | [gp,hyp,output] = gplite_train(hyp0,Ns,X,y,covfun,meanfun,noisefun,s2,hprior,options); 55 | 56 | hyp % Hyperparameter samples 57 | 58 | xstar = linspace(-15,15,200)'; % Test points 59 | 60 | % Compute GP posterior predictive mean and variance at test points 61 | [ymu,ys2,fmu,fs2] = gplite_pred(gp,xstar); 62 | 63 | % Plot data and GP prediction 64 | close all; 65 | figure(1); hold on; 66 | gplite_plot(gp); -------------------------------------------------------------------------------- /gplite/gplite_fmin.m: -------------------------------------------------------------------------------- 1 | function [x,fval,gp] = gplite_fmin(gp,x0,maxflag) 2 | %GPLITE_FMIN Find global minimum (or maximum) of GP. 3 | 4 | if nargin < 2; x0 = 0; end 5 | if nargin < 3 || isempty(maxflag); maxflag = 0; end 6 | 7 | MaxBnd = 10; 8 | hpd_frac = 0.5; 9 | D = size(gp.X,2); 10 | N0 = size(x0,1); 11 | Nstarts = max(3,N0); 12 | 13 | diam = max(gp.X) - min(gp.X); 14 | LB = min(gp.X) - MaxBnd*diam; 15 | UB = max(gp.X) + MaxBnd*diam; 16 | 17 | % First, train GP 18 | if ~isfield(gp,'post') || isempty(gp.post) 19 | % How many samples for the GP? 20 | if isfield(gp,'Ns') && ~isempty(gp.Ns); Ns_gp = gp.Ns; else; Ns_gp = 0; end 21 | options.Nopts = 1; % Do only one optimization 22 | gp = gplite_train(... 23 | [],Ns_gp,gp.X,gp.y,gp.covfun,gp.meanfun,gp.noisefun,[],[],options); 24 | end 25 | 26 | % Start from the min (or max) of the training data 27 | if maxflag 28 | [~,ord] = sort(gp.y,'descend'); 29 | else 30 | [~,ord] = sort(gp.y,'ascend'); 31 | end 32 | 33 | % Take best for sure 34 | X = gp.X(ord,:); 35 | x0 = [x0; X(1,:)]; 36 | X(1,:) = []; 37 | 38 | if Nstarts > N0+1 39 | Nx = size(X,1); 40 | N_hpd = ceil(Nx*hpd_frac); 41 | idx = randperm(N_hpd,min(Nstarts-N0,N_hpd)); 42 | x0 = [x0; X(idx,:)]; 43 | end 44 | 45 | N0 = size(x0,1); 46 | x = zeros(N0,D); 47 | f = zeros(N0,1); 48 | opts = optimoptions('fmincon','GradObj','off','Display','off'); 49 | for i = 1:N0 50 | [x(i,:),f(i)] = fmincon(@(x) optfun(x,gp,maxflag),x0(i,:),[],[],[],[],LB,UB,[],opts); 51 | end 52 | 53 | [fval,idx] = min(f); 54 | x = x(idx,:); 55 | 56 | if maxflag; fval = -fval; end 57 | 58 | end 59 | 60 | function [f,df] = optfun(x,gp,maxflag) 61 | 62 | if nargout > 1 63 | [f,df] = gplite_pred(gp,x); 64 | else 65 | f = gplite_pred(gp,x); 66 | end 67 | 68 | if maxflag % Want to find maximum, swap sign 69 | f = -f; 70 | if nargout > 1; df = -df; end 71 | end 72 | 73 | end -------------------------------------------------------------------------------- /gplite/gplite_hypprior.m: -------------------------------------------------------------------------------- 1 | function [lp,dlp] = gplite_hypprior(hyp,hprior) 2 | %GPLITE_HYPPRIOR Log priors for hyperparameters of lite GP regression. 3 | 4 | if isstruct(hyp) 5 | % Return an empty hyperprior struct 6 | if isfield(hyp,'Noutwarp'); Noutwarp = hyp.Noutwarp; else; Noutwarp = 0; end 7 | Nhyp = hyp.Ncov + hyp.Nnoise + hyp.Nmean + Noutwarp; 8 | hprior.mu = NaN(Nhyp,1); 9 | hprior.sigma = NaN(Nhyp,1); 10 | hprior.df = NaN(Nhyp,1); 11 | hprior.LB = NaN(Nhyp,1); 12 | hprior.UB = NaN(Nhyp,1); 13 | lp = hprior; dlp = []; 14 | else 15 | 16 | compute_grad = nargout > 1; % Compute gradient if required 17 | 18 | [Nhyp,Ns] = size(hyp); % Hyperparameters and samples 19 | if Ns > 1 20 | error('gplite_hypprior:nosampling', ... 21 | 'Hyperparameter log priors are available only for one-sample hyperparameter inputs.'); 22 | end 23 | 24 | lp = 0; 25 | if compute_grad; dlp = zeros(Nhyp,1); end 26 | 27 | mu = hprior.mu(:); 28 | sigma = abs(hprior.sigma(:)); 29 | if ~isfield(hprior,'df') || isempty(hprior.df) % Degrees of freedom 30 | df = 7*ones(Nhyp,1); % ~ from Gelman et al. (2009) 31 | else 32 | df = hprior.df(:); 33 | end 34 | 35 | uidx = ~isfinite(mu) | ~isfinite(sigma); % Uniform 36 | gidx = ~uidx & (df == 0 | ~isfinite(df)) & isfinite(sigma); % Gaussian 37 | tidx = ~uidx & df > 0 & isfinite(df); % Student's t 38 | 39 | % Quadratic form 40 | z2 = zeros(Nhyp,1); 41 | z2(gidx | tidx) = ((hyp(gidx | tidx) - mu(gidx | tidx))./sigma(gidx | tidx)).^2; 42 | 43 | % Gaussian prior 44 | if any(gidx) 45 | lp = lp -0.5*sum(log(2*pi*sigma(gidx).^2) + z2(gidx)); 46 | if compute_grad 47 | dlp(gidx) = -(hyp(gidx) - mu(gidx))./sigma(gidx).^2; 48 | end 49 | end 50 | 51 | % Student's t prior 52 | if any(tidx) 53 | lp = lp + sum(gammaln(0.5*(df(tidx)+1)) - gammaln(0.5*df(tidx)) - 0.5*log(pi*df(tidx)) ... 54 | - log(sigma(tidx)) - 0.5*(df(tidx)+1).*log1p(z2(tidx)./df(tidx))); 55 | if compute_grad 56 | dlp(tidx) = -(df(tidx)+1)./df(tidx)./(1+z2(tidx)./df(tidx)).*(hyp(tidx) - mu(tidx))./sigma(tidx).^2; 57 | end 58 | end 59 | end -------------------------------------------------------------------------------- /gplite/gplite_intmeanfun.m: -------------------------------------------------------------------------------- 1 | function H = gplite_intmeanfun(X,intmeanfun,y,extras) 2 | %GPLITE_INTMEANFUN Integrated mean function for lite Gaussian Process regression. 3 | % M = GPLITE_INTMEANFUN(HYP,X,MEANFUN) computes the GP mean function 4 | % MEANFUN evaluated at test points X. HYP is a single column vector of mean 5 | % function hyperparameters. MEANFUN can be a scalar or a character array 6 | % specifying the mean function, as follows: 7 | % 8 | % MEANFUN MEAN FUNCTION TYPE HYPERPARAMETERS 9 | % 0 or 'zero' zero 0 10 | % 1 or 'const' constant 1 11 | % 2 or 'linear' linear D+1 12 | % 3 or 'quad' quadratic 2*D+1 13 | % 4 or 'negquad' negative quadratic, centered 2*D+1 14 | % 5 or 'posquad' positive quadratic, centered 2*D+1 15 | % 6 or 'se' squared exponential 2*D+2 16 | % 7 or 'negse' negative squared exponential 2*D+2 17 | % function_handle custom NMEAN 18 | % 19 | % MEANFUN can be a function handle to a custom mean function. 20 | % 21 | % [M,DM] = GPLITE_MEANFUN(HYP,X,MEANFUN) also computes the gradient DM 22 | % with respect to GP hyperparamters. DM is a N-by-NMEAN matrix, where 23 | % each row represent the gradient with respect to the NMEAN hyperparameters 24 | % for each one of the N test point. 25 | % 26 | % NMEAN = GPLITE_MEANFUN([],X,MEANFUN) returns the number of mean function 27 | % hyperparameters requested by mean function MEANFUN. 28 | % 29 | % [NMEAN,MEANINFO] = GPLITE_MEANFUN([],X,MEANFUN,Y), where X is the matrix 30 | % of training inputs and Y the matrix of training targets, also returns a 31 | % struct MEANINFO with additional information about mean function 32 | % hyperparameters, with fields: LB (lower bounds); UB (upper bounds); PLB 33 | % (plausible lower bounds); PUB (plausible upper bounds); x0 (starting 34 | % point); meanfun (MEANFUN numerical identifier), meanfun_name (MEANFUN 35 | % name). 36 | % 37 | % See also GPLITE_COVFUN, GPLITE_NOISEFUN. 38 | 39 | [N,D] = size(X); % Number of training points and dimension 40 | 41 | switch intmeanfun 42 | case {1,'1','const'} 43 | intmeanfun = 1; 44 | Nb = 1; 45 | case {2,'2','linear'} 46 | intmeanfun = 2; 47 | Nb = 1 + D; 48 | case {3,'3','quadratic'} 49 | intmeanfun = 3; 50 | Nb = 1 + 2*D; 51 | case {4,'4','full','fullquad','fullquadratic'} 52 | intmeanfun = 4; 53 | Nb = 1 + 2*D + D*(D-1)/2; 54 | otherwise 55 | if isnumeric(intmeanfun); intmeanfun = num2str(intmeanfun); end 56 | error('gplite_intmeanfun:UnknownMeanFun',... 57 | ['Unknown integrated mean function identifier: [' intmeanfun '].']); 58 | end 59 | 60 | H = zeros(Nb,N); 61 | 62 | if intmeanfun >= 1 63 | H(1,:) = 1; 64 | end 65 | if intmeanfun >= 2 66 | H(2:D+1,:) = X'; 67 | end 68 | if intmeanfun >= 3 69 | H(D+2:2*D+1,:) = X'.^2; 70 | end 71 | if intmeanfun >= 4 72 | idx = 0; 73 | for d = 1:D-1 74 | H(1+2*D+idx+(1:D-d),:) = bsxfun(@times,X(:,d)',X(:,d+1:D)'); 75 | idx = idx + D-d; 76 | end 77 | end 78 | 79 | end 80 | 81 | 82 | -------------------------------------------------------------------------------- /gplite/gplite_nlZ.m: -------------------------------------------------------------------------------- 1 | function [nlZ,dnlZ,post,K_mat,Q] = gplite_nlZ(hyp,gp,hprior) 2 | %GPLITE_NLZ Negative log marginal likelihood for lite GP regression. 3 | % [NLZ,DNLZ] = GPLITE_INF(HYP,GP) computes the log marginal likelihood 4 | % NLZ and its gradient DNLZ for hyperparameter vector HYP. HYP is a column 5 | % vector (see below). GP is a GPLITE struct. 6 | % 7 | % [NLZ,DNLZ] = GPLITE_INF(HYP,GP,HPRIOR) uses prior over hyperparameters 8 | % defined by the struct HPRIOR. HPRIOR has fields HPRIOR.mu, HPRIOR.sigma 9 | % and HPRIOR.nu which contain vectors representing, respectively, the mean, 10 | % standard deviation and degrees of freedom of the prior for each 11 | % hyperparameter. Priors are generally represented by Student's t distributions. 12 | % Set HPRIOR.nu(i) = Inf to have instead a Gaussian prior for the i-th 13 | % hyperparameter. Set HPRIOR.sigma(i) = Inf to have a (non-normalized) 14 | % flat prior over the i-th hyperparameter. Priors are defined in 15 | % transformed hyperparameter space (i.e., log space for positive-only 16 | % hyperparameters). 17 | % 18 | % [NLZ,DNLZ,POST] = GPLITE_INF(...) also returns a POST structure 19 | % associated with the provided hyperparameters. 20 | % 21 | % [NLZ,DNLZ,POST,K_MAT] = GPLITE_INF(...) also returns the computed 22 | % kernel matrix K_MAT. 23 | % 24 | % [NLZ,DNLZ,POST,K_MAT,Q] = GPLITE_INF(...) also returns the computed 25 | % auxiliary matrix Q used for computing derivatives. 26 | 27 | if nargin < 3; hprior = []; end 28 | 29 | [Nhyp,Ns] = size(hyp); % Hyperparameters and samples 30 | compute_grad = nargout > 1; % Compute gradient if required 31 | 32 | Ncov = gp.Ncov; 33 | Nnoise = gp.Nnoise; 34 | Nmean = gp.Nmean; 35 | if isfield(gp,'Noutwarp'); Noutwarp = gp.Noutwarp; else; Noutwarp = 0; end 36 | 37 | if Nhyp ~= (Ncov+Nnoise+Nmean+Noutwarp) 38 | error('gplite_nlZ:dimmismatch','Number of hyperparameters mismatched with dimension of training inputs.'); 39 | end 40 | if compute_grad && Ns > 1 41 | error('gplite_nlZ:NoSampling', ... 42 | 'Computation of the log marginal likelihood is available only for one-sample hyperparameter inputs.'); 43 | end 44 | 45 | switch nargout 46 | case {1,2} 47 | [nlZ,dnlZ] = gplite_core(hyp,gp,1,compute_grad); 48 | case 3 49 | [nlZ,dnlZ,post] = gplite_core(hyp,gp,1,compute_grad); 50 | case 4 51 | [nlZ,dnlZ,post,K_mat] = gplite_core(hyp,gp,1,compute_grad); 52 | case 5 53 | [nlZ,dnlZ,post,K_mat,Q] = gplite_core(hyp,gp,1,compute_grad); 54 | end 55 | 56 | % Compute hyperparameter prior if specified 57 | if ~isempty(hprior) 58 | if compute_grad 59 | [P,dP] = gplite_hypprior(hyp,hprior); 60 | nlZ = nlZ - P; 61 | dnlZ = dnlZ - dP; 62 | else 63 | P = gplite_hypprior(hyp,hprior); 64 | nlZ = nlZ - P; 65 | end 66 | end 67 | 68 | end -------------------------------------------------------------------------------- /gplite/gplite_qpred.m: -------------------------------------------------------------------------------- 1 | function y = gplite_qpred(gp,p,type,Xstar,ystar,s2star) 2 | %GPLITE_QPRED Quantile prediction for lite Gaussian Processes regression. 3 | 4 | if nargin < 5; ystar = []; end 5 | if nargin < 6; s2star = []; end 6 | 7 | Ns = numel(gp.post); % Hyperparameter samples 8 | Nstar = size(Xstar,1); % Number of test inputs 9 | 10 | nx = 10; 11 | xx = norminv(linspace(0.5/nx,1-0.5/nx,nx)); 12 | 13 | switch lower(type(1)) 14 | case 'y'; obs_flag = true; 15 | case 'f'; obs_flag = false; 16 | otherwise 17 | error('gplite_qpred:unknowntype', ... 18 | 'Quantile prediction TYPE should be ''y'' for predicted observations or ''F'' for predicted latent function.'); 19 | end 20 | 21 | % Output warping function 22 | outwarp_flag = isfield(gp,'outwarpfun') && ~isempty(gp.outwarpfun); 23 | if outwarp_flag 24 | Noutwarp = gp.Noutwarp; 25 | fmu_prewarp = zeros(Nstar,Ns); 26 | else 27 | Noutwarp = 0; 28 | end 29 | 30 | % Get GP prediction (observed or latent), by hyperparameter sample 31 | if obs_flag 32 | [gmu,gs2] = gplite_pred(gp,Xstar,ystar,s2star,1,1); 33 | else 34 | [~,~,gmu,gs2] = gplite_pred(gp,Xstar,ystar,s2star,1,1); 35 | end 36 | 37 | y = zeros(Nstar,Ns*nx); 38 | 39 | for s = 1:Ns 40 | grid = bsxfun(@plus,gmu(:,s),bsxfun(@times,sqrt(gs2(:,s)),xx)); 41 | if outwarp_flag 42 | hyp = gp.post(s).hyp; 43 | hyp_outwarp = hyp(gp.Ncov+gp.Nnoise+gp.Nmean+1:gp.Ncov+gp.Nnoise+gp.Nmean+Noutwarp); 44 | grid = gp.outwarpfun(hyp_outwarp,grid,'inv'); 45 | end 46 | y(:,(1:nx)+(s-1)*nx) = grid; 47 | end 48 | 49 | y = quantile(y,p,2); 50 | 51 | -------------------------------------------------------------------------------- /gplite/gplite_quad.m: -------------------------------------------------------------------------------- 1 | function [F,varF] = gplite_quad(gp,mu,sigma,ssflag) 2 | %GPLITE_QUAD Bayesian quadrature for given Gaussian process. 3 | 4 | if nargin < 4 || isempty(ssflag); ssflag = false; end 5 | 6 | compute_var = nargout > 1; % Compute variance of the integral? 7 | 8 | [N,D] = size(gp.X); % Number of training points and dimension 9 | Ns = numel(gp.post); % Hyperparameter samples 10 | 11 | % Number of GP hyperparameters 12 | Ncov = gp.Ncov; 13 | Nnoise = gp.Nnoise; 14 | Nmean = gp.Nmean; 15 | 16 | if all(gp.meanfun ~= [0 1 4 6 8]) 17 | error('gplite_quad:UnsupportedMeanFun', ... 18 | 'Bayesian quadrature currently only supports zero, constant, negative quadratic, or squared exponential mean functions.'); 19 | end 20 | 21 | if gp.covfun ~= 1 22 | error('gplite_quad:UnsupportedCovFun', ... 23 | 'Bayesian quadrature only supports the squared exponential kernel.'); 24 | end 25 | 26 | Nstar = size(mu,1); 27 | if size(sigma,1) == 1; sigma = repmat(sigma,[Nstar,1]); end 28 | 29 | % Which mean function is being used? 30 | quadratic_meanfun = gp.meanfun == 4; 31 | sqexp_meanfun = gp.meanfun == 6; 32 | quadsqexp_meanfun = gp.meanfun == 8; 33 | 34 | F = zeros(Nstar,Ns); 35 | if compute_var; varF = zeros(Nstar,Ns); end 36 | 37 | % Loop over hyperparameter samples 38 | for s = 1:Ns 39 | hyp = gp.post(s).hyp; 40 | 41 | % Extract GP hyperparameters from HYP 42 | ell(1,:) = exp(hyp(1:D)); 43 | ln_sf2 = 2*hyp(D+1); 44 | sum_lnell = sum(hyp(1:D)); 45 | 46 | % GP mean function hyperparameters 47 | if gp.meanfun > 0; m0 = hyp(Ncov+Nnoise+1); else; m0 = 0; end 48 | if quadratic_meanfun || sqexp_meanfun || quadsqexp_meanfun 49 | xm(1,:) = hyp(Ncov+Nnoise+1+(1:D)); 50 | omega(1,:) = exp(hyp(Ncov+Nnoise+D+1+(1:D))); 51 | if sqexp_meanfun 52 | h = exp(hyp(Ncov+Nnoise+2*D+2)); 53 | end 54 | end 55 | if quadsqexp_meanfun 56 | xm_se(1,:) = hyp(Ncov+Nnoise+2*D+1+(1:D)); 57 | omega_se(1,:) = exp(hyp(Ncov+Nnoise+3*D+1+(1:D))); 58 | h_se = hyp(Ncov+Nnoise+4*D+2); 59 | end 60 | 61 | % GP posterior parameters 62 | alpha = gp.post(s).alpha; 63 | L = gp.post(s).L; 64 | Lchol = gp.post(s).Lchol; 65 | 66 | sn2 = exp(2*hyp(Ncov+1)); 67 | sn2_eff = sn2*gp.post(s).sn2_mult; 68 | 69 | % Compute posterior mean of the integral 70 | tau = sqrt(bsxfun(@plus,sigma.^2,ell.^2)); 71 | lnnf = ln_sf2 + sum_lnell - sum(log(tau),2); % Covariance normalization factor 72 | sumdelta2 = zeros(Nstar,N); 73 | for i = 1:D 74 | sumdelta2 = sumdelta2 + bsxfun(@rdivide,bsxfun(@minus, mu(:,i), gp.X(:,i)'),tau(:,i)).^2; 75 | end 76 | z = exp(bsxfun(@minus,lnnf,0.5*sumdelta2)); 77 | F(:,s) = z*alpha + m0; 78 | 79 | if quadratic_meanfun || quadsqexp_meanfun 80 | nu_k = -0.5*sum(1./omega.^2 .* ... 81 | bsxfun(@plus,mu.^2 + sigma.^2 - bsxfun(@times,2*mu,xm), xm.^2),2); 82 | F(:,s) = F(:,s) + nu_k; 83 | elseif sqexp_meanfun 84 | tau2_mfun = bsxfun(@plus,sigma.^2,omega.^2); 85 | s2 = (bsxfun(@minus,mu,xm).^2)./tau2_mfun; 86 | nu_se = h*prod(bsxfun(@rdivide,omega,sqrt(tau2_mfun)),2).*exp(-0.5*sum(s2,2)); 87 | F(:,s) = F(:,s) + nu_se; 88 | end 89 | if quadsqexp_meanfun 90 | tau2_mfun = bsxfun(@plus,sigma.^2,omega_se.^2); 91 | s2 = (bsxfun(@minus,mu,xm_se).^2)./tau2_mfun; 92 | nu_se = h_se*prod(bsxfun(@rdivide,omega_se,sqrt(tau2_mfun)),2).*exp(-0.5*sum(s2,2)); 93 | F(:,s) = F(:,s) + nu_se; 94 | end 95 | 96 | % Compute posterior variance of the integral 97 | if compute_var 98 | tau_kk = sqrt(bsxfun(@plus,2*sigma.^2,ell.^2)); 99 | nf_kk = exp(ln_sf2 + sum_lnell - sum(log(tau_kk),2)); 100 | if Lchol 101 | invKzk = (L\(L'\z'))/sn2_eff; 102 | else 103 | invKzk = -L*z'; 104 | end 105 | J_kk = nf_kk - sum(z.*invKzk',2); 106 | varF(:,s) = max(eps,J_kk); % Correct for numerical error 107 | end 108 | 109 | end 110 | 111 | % Unless predictions for samples are requested separately, average over samples 112 | if Ns > 1 && ~ssflag 113 | Fbar = sum(F,2)/Ns; 114 | if compute_var 115 | varFss = sum((F - Fbar).^2,2)/(Ns-1); % Estimated variance of the samples 116 | varF = sum(varF,2)/Ns + varFss; 117 | end 118 | F = Fbar; 119 | end 120 | -------------------------------------------------------------------------------- /gplite/gplite_rnd.m: -------------------------------------------------------------------------------- 1 | function [Fstar,Ystar] = gplite_rnd(gp,Xstar,nowarpflag) 2 | %GPLITE_RND Draw a random function from Gaussian process. 3 | % FSTAR = GPLITE_RND(GP,XSTAR) draws a random function from GP, evaluated 4 | % at XSTAR. 5 | % 6 | % [FSTAR,YSTAR] = GPLITE_RND(GP,XSTAR) adds observation noise to the 7 | % drawn function. 8 | % 9 | % See also GPLITE_POST, GPLITE_PRED. 10 | 11 | if nargin < 3 || isempty(nowarpflag); nowarpflag = false; end 12 | 13 | [N,D] = size(gp.X); % Number of training points and dimension 14 | Ns = numel(gp.post); % Hyperparameter samples 15 | Nstar = size(Xstar,1); % Number of test inputs 16 | 17 | Ncov = gp.Ncov; 18 | Nnoise = gp.Nnoise; 19 | Nmean = gp.Nmean; 20 | 21 | % Draw from hyperparameter samples 22 | s = randi(Ns); 23 | 24 | hyp = gp.post(s).hyp; 25 | 26 | alpha = gp.post(s).alpha; 27 | L = gp.post(s).L; 28 | Lchol = gp.post(s).Lchol; 29 | sW = gp.post(s).sW; 30 | 31 | % Compute GP mean function at test points 32 | hyp_mean = hyp(Ncov+Nnoise+1:Ncov+Nnoise+Nmean); 33 | mstar = gplite_meanfun(hyp_mean,Xstar,gp.meanfun,[],gp.meanfun_extras); 34 | 35 | % Compute kernel matrix 36 | hyp_cov = hyp(1:Ncov); 37 | Kstar_mat = gplite_covfun(hyp_cov,Xstar,gp.covfun); 38 | 39 | if ~isempty(gp.y) 40 | % Compute cross-kernel matrix Ks_mat 41 | Ks_mat = gplite_covfun(hyp_cov,gp.X,gp.covfun,Xstar); 42 | 43 | fmu = mstar + Ks_mat'*alpha; % Conditional mean 44 | 45 | if Lchol 46 | V = L'\(repmat(sW,[1,Nstar]).*Ks_mat); 47 | C = Kstar_mat - V'*V; % predictive variances 48 | else 49 | LKs = L*Ks_mat; 50 | C = Kstar_mat + Ks_mat'*LKs; 51 | end 52 | else 53 | fmu = mstar; % No data, draw from prior 54 | C = Kstar_mat + eps*eye(Nstar); 55 | end 56 | 57 | C = (C + C')/2; % Enforce symmetry if lost due to numerical errors 58 | 59 | % Draw random function 60 | T = robustchol(C); % CHOL usually crashes, this is more stable 61 | Fstar = T' * randn(size(T,1),1) + fmu; 62 | 63 | % Add observation noise 64 | if nargout > 1 65 | % Get observation noise hyperparameters and evaluate noise at test points 66 | hyp_noise = hyp(Ncov+1:Ncov+Nnoise); 67 | sn2 = gplite_noisefun(hyp_noise,Xstar,gp.noisefun); 68 | sn2_mult = gp.post(s).sn2_mult; 69 | if isempty(sn2_mult); sn2_mult = 1; end 70 | Ystar = Fstar + sqrt(sn2*sn2_mult).*randn(size(fmu)); 71 | end 72 | 73 | % Apply output warping to map back to observation space 74 | if ~isempty(gp.outwarpfun) && ~nowarpflag 75 | Noutwarp = gp.outwarpfun('info'); 76 | hyp = gp.post(s).hyp; 77 | hyp_outwarp = hyp(Ncov+Nnoise+Nmean+1:Ncov+Nnoise+Nmean+Noutwarp); 78 | Fstar = gp.outwarpfun(hyp_outwarp,Fstar,'inv'); 79 | if nargout > 1 80 | Ystar = gp.outwarpfun(hyp_outwarp,Ystar,'inv'); 81 | end 82 | end 83 | 84 | end 85 | 86 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 87 | 88 | function [T,p] = robustchol(Sigma) 89 | %ROBUSTCHOL Cholesky-like decomposition for covariance matrix. 90 | 91 | [n,m] = size(Sigma); % Should be square 92 | [T,p] = chol(Sigma); 93 | 94 | if p > 0 95 | [U,D] = eig((Sigma+Sigma')/2); 96 | 97 | [~,maxidx] = max(abs(U),[],1); 98 | negidx = (U(maxidx + (0:n:(m-1)*n)) < 0); 99 | U(:,negidx) = -U(:,negidx); 100 | 101 | D = diag(D); 102 | tol = eps(max(D)) * length(D); 103 | t = (abs(D) > tol); 104 | D = D(t); 105 | p = sum(D<0); % negative eigenvalues 106 | 107 | if p == 0 108 | T = diag(sqrt(D)) * U(:,t)'; 109 | else 110 | T = zeros(0,'like',Sigma); 111 | end 112 | end 113 | 114 | 115 | end 116 | -------------------------------------------------------------------------------- /gplite/gplite_sample.m: -------------------------------------------------------------------------------- 1 | function [Xs,gp] = gplite_sample(gp,Ns,x0,method,logprior,beta,VarThresh,proppdf,proprnd,bounds) 2 | %GPLITE_SAMPLE Draw random samples from log pdf represented by GP. 3 | 4 | if nargin < 3; x0 = []; end 5 | if nargin < 4 || isempty(method); method = 'slicesample'; end 6 | if nargin < 5 || isempty(logprior); logprior = []; end 7 | if nargin < 6 || isempty(beta); beta = 0; end 8 | if nargin < 7 || isempty(VarThresh); VarThresh = Inf; end 9 | if nargin < 8 || isempty(proppdf); proppdf = []; end 10 | if nargin < 9 || isempty(proprnd); proprnd = []; end 11 | if nargin < 10; bounds = []; end 12 | 13 | D = size(gp.X,2); 14 | 15 | widths = std(gp.X,[],1); 16 | if isempty(bounds) 17 | MaxBnd = 10; 18 | diam = max(gp.X) - min(gp.X); 19 | LB = min(gp.X) - MaxBnd*diam; 20 | UB = max(gp.X) + MaxBnd*diam; 21 | else 22 | LB = bounds(1,:); 23 | UB = bounds(2,:); 24 | end 25 | 26 | % First, train GP 27 | if ~isfield(gp,'post') || isempty(gp.post) 28 | % How many samples for the GP? 29 | if isfield(gp,'Ns') && ~isempty(gp.Ns) 30 | Ns_gp = gp.Ns; 31 | else 32 | Ns_gp = 0; 33 | end 34 | if isfield(gp,'Nopts') && ~isempty(gp.Nopts) 35 | options.Nopts = gp.Nopts; 36 | else 37 | options.Nopts = 1; % Do only one optimization 38 | end 39 | if isfield(gp,'s2'); s2 = gp.s2; else; s2 = []; end 40 | gp = gplite_train(... 41 | [],Ns_gp,gp.X,gp.y,gp.covfun,gp.meanfun,gp.noisefun,s2,[],options); 42 | end 43 | 44 | % Recompute posterior auxiliary info if needed 45 | if ~isfield(gp.post(1),'alpha') || isempty(gp.post(1).alpha) 46 | gp = gplite_post(gp); 47 | end 48 | 49 | logpfun = @(x) log_gpfun(gp,x,beta,VarThresh); 50 | 51 | switch method 52 | case {'slicesample','slicesamplebnd'} 53 | sampleopts.Burnin = ceil(Ns/10); 54 | sampleopts.Thin = 1; 55 | sampleopts.Display = 'off'; 56 | sampleopts.Diagnostics = false; 57 | sampleopts.LogPrior = logprior; 58 | sampleopts.MetropolisPdf = proppdf; 59 | sampleopts.MetropolisRnd = proprnd; 60 | 61 | if isempty(x0) 62 | [~,idx0] = max(gp.y); 63 | x0 = gp.X(idx0,:); 64 | else 65 | x0 = x0(1,:); 66 | end 67 | Xs = slicesamplebnd(logpfun, ... 68 | x0,Ns,widths,LB,UB,sampleopts); 69 | 70 | case 'parallel' 71 | sampleopts.Burnin = ceil(Ns/5); 72 | sampleopts.Thin = 1; 73 | sampleopts.Display = 'off'; 74 | sampleopts.Diagnostics = false; 75 | sampleopts.VarTransform = false; 76 | sampleopts.InversionSample = false; 77 | sampleopts.FitGMM = false; 78 | 79 | if ~isempty(logprior) 80 | logPfuns = {logprior,logpfun}; 81 | else 82 | logPfuns = logpfun; 83 | end 84 | 85 | % sampleopts.TransitionOperators = {'transSliceSampleRD'}; 86 | 87 | W = 2*(D+1); 88 | if isempty(x0) 89 | % Take starting points from high posterior density region 90 | hpd_frac = 0.25; 91 | N = numel(gp.y); 92 | N_hpd = min(N,max(W,round(hpd_frac*N))); 93 | if isempty(logprior) 94 | [~,ord] = sort(gp.y,'descend'); 95 | else 96 | dy = logprior(gp.X); 97 | [~,ord] = sort(gp.y + dy,'descend'); 98 | end 99 | X_hpd = gp.X(ord(1:N_hpd),:); 100 | x0 = X_hpd(randperm(N_hpd,min(W,N_hpd)),:); 101 | end 102 | x0 = bsxfun(@min,bsxfun(@max,x0,LB),UB); 103 | Xs = eissample_lite(logPfuns,x0,Ns,W,widths,LB,UB,sampleopts); 104 | end 105 | 106 | end 107 | 108 | %-------------------------------------------------------------------------- 109 | function y = log_gpfun(gp,x,beta,VarThresh) 110 | 111 | if (VarThresh == 0 || ~isfinite(VarThresh)) && beta == 0 112 | y = gplite_pred(gp,x); 113 | else 114 | [y,s2] = gplite_pred(gp,x); 115 | y(s2 >= VarThresh) = y(s2 >= VarThresh) - (s2(s2 >= VarThresh) - VarThresh); 116 | y = y - beta*sqrt(s2); 117 | end 118 | 119 | end -------------------------------------------------------------------------------- /gplite/outwarp_negpow.m: -------------------------------------------------------------------------------- 1 | function [ywarp,dwarp_dt,dwarp_dtheta,d2warp_dthetadt] = outwarp_negpow(hyp,y,invflag) 2 | %GPLITE_NOISEFUN Noise function for lite Gaussian Process regression. 3 | % SN2 = GPLITE_NOISEFUN(HYP,X,NOISEFUN) computes the GP noise function 4 | % NOISEFUN, that is the variance of observation noise evaluated at test 5 | % points X. HYP is a single column vector of noise function 6 | % hyperparameters. NOISEFUN is a numeric array whose elements specify 7 | % features of the noise function, as follows: 8 | % 9 | % See also GPLITE_COVFUN, GPLITE_MEANFUN. 10 | 11 | if nargin < 2; y = []; end 12 | if nargin < 3 || isempty(invflag); invflag = false; else; invflag = true; end 13 | 14 | if invflag && nargout > 1 15 | error('outwarp_fun:InverseOnly', ... 16 | ['When calling for the inverse output warping function, only one function output is expected.']); 17 | end 18 | 19 | %-------------------------------------------------------------------------- 20 | % CUSTOM: Number of hyperparameters 21 | Noutwarp = 2; % # hyperparameters of the output warping function 22 | %-------------------------------------------------------------------------- 23 | 24 | N = size(y,1); % Number of training points 25 | 26 | % Return number of output warping function hyperparameters and additional info 27 | if ischar(hyp) 28 | ywarp = Noutwarp; 29 | if nargout > 1 30 | 31 | % Initialize bounds for all hyperparameters 32 | outwarp_info.LB = -Inf(1,Noutwarp); 33 | outwarp_info.UB = Inf(1,Noutwarp); 34 | outwarp_info.PLB = -Inf(1,Noutwarp); 35 | outwarp_info.PUB = Inf(1,Noutwarp); 36 | outwarp_info.x0 = NaN(1,Noutwarp); 37 | 38 | %------------------------------------------------------------------ 39 | % CUSTOM: Initialize hyperparameter bounds and other details 40 | 41 | % Threshold parameter 42 | outwarp_info.LB(1) = min(y); 43 | outwarp_info.UB(1) = max(y); 44 | outwarp_info.PLB(1) = min(y); 45 | outwarp_info.PUB(1) = max(y); 46 | outwarp_info.x0(1) = NaN; 47 | 48 | % Power exponent k (log space) 49 | outwarp_info.LB(2) = -Inf; 50 | outwarp_info.UB(2) = Inf; 51 | outwarp_info.PLB(2) = -3; 52 | outwarp_info.PUB(2) = 3; 53 | outwarp_info.x0(2) = 0; 54 | 55 | %------------------------------------------------------------------ 56 | 57 | % Assign handle of current output warping function 58 | outwarp_info.outwarpfun = str2func(mfilename); 59 | 60 | % Plausible starting point 61 | idx_nan = isnan(outwarp_info.x0); 62 | outwarp_info.x0(idx_nan) = 0.5*(outwarp_info.PLB(idx_nan) + outwarp_info.PUB(idx_nan)); 63 | 64 | dwarp_dt = outwarp_info; 65 | 66 | end 67 | 68 | return; 69 | end 70 | 71 | [Nhyp,Ns] = size(hyp); % Hyperparameters and samples 72 | 73 | if Nhyp ~= Noutwarp 74 | error('outwarp_fun:WrongLikHyp', ... 75 | ['Expected ' num2str(Noutwarp) ' output warping function hyperparameters, ' num2str(Nhyp) ' passed instead.']); 76 | end 77 | if Ns > 1 78 | error('outwarp_fun:nosampling', ... 79 | 'Output warping function output is available only for one-sample hyperparameter inputs.'); 80 | end 81 | 82 | %-------------------------------------------------------------------------- 83 | % CUSTOM: Compute output warping function and gradients 84 | 85 | % Read hyperparameters 86 | y0 = hyp(1); 87 | k = exp(hyp(2)); 88 | 89 | % Compute output warping or inverse warping 90 | ywarp = y; 91 | idx = y < y0; 92 | if invflag % Inverse output warping 93 | ywarp(idx) = y0 - (y0 - y(idx)).^(1/k); 94 | else % Direct output warping 95 | delta = (y0 - y(idx)); 96 | deltak = delta.^k; 97 | ywarp(idx) = y0 - deltak; 98 | end 99 | 100 | if nargout > 1 101 | % First-order derivative of output warping function in output space 102 | dwarp_dt = ones(size(y)); 103 | deltakm1 = delta.^(k-1); 104 | 105 | dwarp_dt(idx) = k*deltakm1; 106 | 107 | if nargout > 2 108 | % Gradient of output warping function wrt hyperparameters 109 | dwarp_dtheta = zeros(N,Noutwarp); 110 | 111 | dwarp_dtheta(idx,1) = 1 - k*deltakm1; % y0 112 | dwarp_dtheta(idx,2) = -k*deltak.*log(delta); % log(k) 113 | 114 | if nargout > 3 115 | % Gradient of derivative of output warping function 116 | d2warp_dthetadt = zeros(N,Noutwarp); 117 | 118 | d2warp_dthetadt(idx,1) = k*(k-1)*delta.^(k-2); % y0 119 | d2warp_dthetadt(idx,2) = k*deltakm1 + k^2*deltakm1.*log(delta); % log(k) 120 | 121 | end 122 | 123 | end 124 | end 125 | 126 | end -------------------------------------------------------------------------------- /gplite/outwarp_negpowc1.m: -------------------------------------------------------------------------------- 1 | function [ywarp,dwarp_dt,dwarp_dtheta,d2warp_dthetadt] = outwarp_negpowc1(hyp,y,invflag) 2 | %GPLITE_NOISEFUN Noise function for lite Gaussian Process regression. 3 | % SN2 = GPLITE_NOISEFUN(HYP,X,NOISEFUN) computes the GP noise function 4 | % NOISEFUN, that is the variance of observation noise evaluated at test 5 | % points X. HYP is a single column vector of noise function 6 | % hyperparameters. NOISEFUN is a numeric array whose elements specify 7 | % features of the noise function, as follows: 8 | % 9 | % See also GPLITE_COVFUN, GPLITE_MEANFUN. 10 | 11 | if nargin < 2; y = []; end 12 | if nargin < 3 || isempty(invflag); invflag = false; else; invflag = true; end 13 | 14 | if invflag && nargout > 1 15 | error('outwarp_fun:InverseOnly', ... 16 | ['When calling for the inverse output warping function, only one function output is expected.']); 17 | end 18 | 19 | %-------------------------------------------------------------------------- 20 | % CUSTOM: Number of hyperparameters 21 | Noutwarp = 2; % # hyperparameters of the output warping function 22 | %-------------------------------------------------------------------------- 23 | 24 | N = size(y,1); % Number of training points 25 | 26 | % Return number of output warping function hyperparameters and additional info 27 | if ischar(hyp) 28 | ywarp = Noutwarp; 29 | if nargout > 1 30 | 31 | if isempty(y); y = [0;1]; end 32 | 33 | % Initialize bounds for all hyperparameters 34 | outwarp_info.LB = -Inf(1,Noutwarp); 35 | outwarp_info.UB = Inf(1,Noutwarp); 36 | outwarp_info.PLB = -Inf(1,Noutwarp); 37 | outwarp_info.PUB = Inf(1,Noutwarp); 38 | outwarp_info.x0 = NaN(1,Noutwarp); 39 | 40 | %------------------------------------------------------------------ 41 | % CUSTOM: Initialize hyperparameter bounds and other details 42 | 43 | % Threshold parameter 44 | outwarp_info.LB(1) = min(y); 45 | outwarp_info.UB(1) = max(y); 46 | outwarp_info.PLB(1) = min(y); 47 | outwarp_info.PUB(1) = max(y); 48 | outwarp_info.x0(1) = NaN; 49 | 50 | % Power exponent k (log space) 51 | outwarp_info.LB(2) = -Inf; 52 | outwarp_info.UB(2) = Inf; 53 | outwarp_info.PLB(2) = -3; 54 | outwarp_info.PUB(2) = 3; 55 | outwarp_info.x0(2) = 0; 56 | 57 | %------------------------------------------------------------------ 58 | 59 | % Assign handle of current output warping function 60 | outwarp_info.outwarpfun = str2func(mfilename); 61 | 62 | % Plausible starting point 63 | idx_nan = isnan(outwarp_info.x0); 64 | outwarp_info.x0(idx_nan) = 0.5*(outwarp_info.PLB(idx_nan) + outwarp_info.PUB(idx_nan)); 65 | 66 | dwarp_dt = outwarp_info; 67 | 68 | end 69 | 70 | return; 71 | end 72 | 73 | [Nhyp,Ns] = size(hyp); % Hyperparameters and samples 74 | 75 | if Nhyp ~= Noutwarp 76 | error('outwarp_fun:WrongLikHyp', ... 77 | ['Expected ' num2str(Noutwarp) ' output warping function hyperparameters, ' num2str(Nhyp) ' passed instead.']); 78 | end 79 | if Ns > 1 80 | error('outwarp_fun:nosampling', ... 81 | 'Output warping function output is available only for one-sample hyperparameter inputs.'); 82 | end 83 | 84 | %-------------------------------------------------------------------------- 85 | % CUSTOM: Compute output warping function and gradients 86 | 87 | % Read hyperparameters 88 | y0 = hyp(1); 89 | k = exp(hyp(2)); 90 | 91 | % Compute output warping or inverse warping 92 | ywarp = y; 93 | idx = y < y0; 94 | if invflag % Inverse output warping 95 | ywarp(idx) = y0 + 1 - (1 + k*y0 - k*y(idx)).^(1/k); 96 | else % Direct output warping 97 | delta = (1 + y0 - y(idx)); 98 | deltak = delta.^k; 99 | ywarp(idx) = y0 - deltak/k + 1/k; 100 | end 101 | 102 | if nargout > 1 103 | % First-order derivative of output warping function in output space 104 | dwarp_dt = ones(size(y)); 105 | deltakm1 = delta.^(k-1); 106 | 107 | dwarp_dt(idx) = deltakm1; 108 | 109 | if nargout > 2 110 | % Gradient of output warping function wrt hyperparameters 111 | dwarp_dtheta = zeros(N,Noutwarp); 112 | 113 | dwarp_dtheta(idx,1) = 1 - deltakm1; % y0 114 | dwarp_dtheta(idx,2) = -deltak.*log(delta) + deltak/k - 1/k; % log(k) 115 | 116 | if nargout > 3 117 | % Gradient of derivative of output warping function 118 | d2warp_dthetadt = zeros(N,Noutwarp); 119 | 120 | d2warp_dthetadt(idx,1) = (k-1)*delta.^(k-2); % y0 121 | d2warp_dthetadt(idx,2) = k*deltakm1.*log(delta); % log(k) 122 | 123 | end 124 | 125 | end 126 | end 127 | 128 | end -------------------------------------------------------------------------------- /gplite/outwarp_negscaledpow.m: -------------------------------------------------------------------------------- 1 | function [ywarp,dwarp_dt,dwarp_dtheta,d2warp_dthetadt] = outwarp_negscaledpow(hyp,y,invflag) 2 | %GPLITE_NOISEFUN Noise function for lite Gaussian Process regression. 3 | % SN2 = GPLITE_NOISEFUN(HYP,X,NOISEFUN) computes the GP noise function 4 | % NOISEFUN, that is the variance of observation noise evaluated at test 5 | % points X. HYP is a single column vector of noise function 6 | % hyperparameters. NOISEFUN is a numeric array whose elements specify 7 | % features of the noise function, as follows: 8 | % 9 | % See also GPLITE_COVFUN, GPLITE_MEANFUN. 10 | 11 | if nargin < 2; y = []; end 12 | if nargin < 3 || isempty(invflag); invflag = false; else; invflag = true; end 13 | 14 | if invflag && nargout > 1 15 | error('outwarp_fun:InverseOnly', ... 16 | ['When calling for the inverse output warping function, only one function output is expected.']); 17 | end 18 | 19 | %-------------------------------------------------------------------------- 20 | % CUSTOM: Number of hyperparameters 21 | Noutwarp = 3; % # hyperparameters of the output warping function 22 | %-------------------------------------------------------------------------- 23 | 24 | N = size(y,1); % Number of training points 25 | 26 | % Return number of output warping function hyperparameters and additional info 27 | if ischar(hyp) 28 | ywarp = Noutwarp; 29 | if nargout > 1 30 | 31 | % Initialize bounds for all hyperparameters 32 | outwarp_info.LB = -Inf(1,Noutwarp); 33 | outwarp_info.UB = Inf(1,Noutwarp); 34 | outwarp_info.PLB = -Inf(1,Noutwarp); 35 | outwarp_info.PUB = Inf(1,Noutwarp); 36 | outwarp_info.x0 = NaN(1,Noutwarp); 37 | 38 | %------------------------------------------------------------------ 39 | % CUSTOM: Initialize hyperparameter bounds and other details 40 | 41 | % Threshold parameter 42 | outwarp_info.LB(1) = min(y); 43 | outwarp_info.UB(1) = max(y); 44 | outwarp_info.PLB(1) = min(y); 45 | outwarp_info.PUB(1) = max(y); 46 | outwarp_info.x0(1) = NaN; 47 | 48 | % Scaling parameter a (log space) 49 | outwarp_info.LB(2) = -Inf; 50 | outwarp_info.UB(2) = Inf; 51 | outwarp_info.PLB(2) = -2; 52 | outwarp_info.PUB(2) = 2; 53 | outwarp_info.x0(2) = 0; 54 | 55 | % Power exponent k (log space) 56 | outwarp_info.LB(3) = -Inf; 57 | outwarp_info.UB(3) = Inf; 58 | outwarp_info.PLB(3) = -3; 59 | outwarp_info.PUB(3) = 3; 60 | outwarp_info.x0(3) = 0; 61 | 62 | %------------------------------------------------------------------ 63 | 64 | % Assign handle of current output warping function 65 | outwarp_info.outwarpfun = str2func(mfilename); 66 | 67 | % Plausible starting point 68 | idx_nan = isnan(outwarp_info.x0); 69 | outwarp_info.x0(idx_nan) = 0.5*(outwarp_info.PLB(idx_nan) + outwarp_info.PUB(idx_nan)); 70 | 71 | dwarp_dt = outwarp_info; 72 | 73 | end 74 | 75 | return; 76 | end 77 | 78 | [Nhyp,Ns] = size(hyp); % Hyperparameters and samples 79 | 80 | if Nhyp ~= Noutwarp 81 | error('outwarp_fun:WrongLikHyp', ... 82 | ['Expected ' num2str(Noutwarp) ' output warping function hyperparameters, ' num2str(Nhyp) ' passed instead.']); 83 | end 84 | if Ns > 1 85 | error('outwarp_fun:nosampling', ... 86 | 'Output warping function output is available only for one-sample hyperparameter inputs.'); 87 | end 88 | 89 | %-------------------------------------------------------------------------- 90 | % CUSTOM: Compute output warping function and gradients 91 | 92 | % Read hyperparameters 93 | y0 = hyp(1); 94 | a = exp(hyp(2)); 95 | k = exp(hyp(3)); 96 | 97 | % Compute output warping or inverse warping 98 | ywarp = y; 99 | idx = y < y0; 100 | if invflag % Inverse output warping 101 | ywarp(idx) = y0 - ((y0 - y(idx)).^(1/k))/a; 102 | else % Direct output warping 103 | adelta = a*(y0 - y(idx)); 104 | adeltak = adelta.^k; 105 | ywarp(idx) = y0 - adeltak; 106 | end 107 | 108 | if nargout > 1 109 | % First-order derivative of output warping function in output space 110 | dwarp_dt = ones(size(y)); 111 | adeltakm1 = adelta.^(k-1); 112 | 113 | dwarp_dt(idx) = a*k*adeltakm1; 114 | 115 | if nargout > 2 116 | % Gradient of output warping function wrt hyperparameters 117 | dwarp_dtheta = zeros(N,Noutwarp); 118 | 119 | dwarp_dtheta(idx,1) = 1 - a*k*adeltakm1; % y0 120 | dwarp_dtheta(idx,2) = -k*adeltak; % log(a) 121 | dwarp_dtheta(idx,3) = -k*adeltak.*log(adelta); % log(k) 122 | 123 | if nargout > 3 124 | % Gradient of derivative of output warping function 125 | d2warp_dthetadt = zeros(N,Noutwarp); 126 | 127 | d2warp_dthetadt(idx,1) = a^2*k*(k-1)*adelta.^(k-2); % y0 128 | d2warp_dthetadt(idx,2) = a*k^2*adeltakm1; % log(a) 129 | d2warp_dthetadt(idx,3) = a*k*adeltakm1 + a*k^2*adeltakm1.*log(adelta); % log(k) 130 | 131 | end 132 | 133 | end 134 | end 135 | 136 | end -------------------------------------------------------------------------------- /gplite/outwarp_test.m: -------------------------------------------------------------------------------- 1 | function outwarp_test(outfun) 2 | %OUTWARP_TEST Test correct implementation of an output warping function. 3 | 4 | % Generate random observations 5 | N = randi(50); 6 | y = rand(N,1)*10; 7 | 8 | [Noutwarp,info] = outfun('info',y); 9 | 10 | % Generate random hyperparameters from plausible box 11 | PLB = info.PLB(:); 12 | PUB = info.PUB(:); 13 | hyp = rand(Noutwarp,1).*(PUB - PLB) + PLB; 14 | 15 | hyp 16 | 17 | fprintf('---------------------------------------------------------------------------------\n'); 18 | fprintf('Check error on inverse of output warping function...\n\n'); 19 | 20 | sum(abs(y - outfun(hyp,outfun(hyp,y),'inv'))) 21 | 22 | fprintf('---------------------------------------------------------------------------------\n'); 23 | fprintf('Check 1st-order derivative of output warping function...\n\n'); 24 | 25 | yy = y(randi(N)); 26 | derivcheck(@(t) f(t,hyp,outfun),yy); 27 | 28 | fprintf('---------------------------------------------------------------------------------\n'); 29 | fprintf('Check gradient of output warping function wrt hyperparameters...\n\n'); 30 | 31 | derivcheck(@(hyp_) f2(yy,hyp_,outfun),hyp); 32 | 33 | fprintf('---------------------------------------------------------------------------------\n'); 34 | fprintf('Check gradient of derivative of output warping function wrt hyperparameters...\n\n'); 35 | 36 | derivcheck(@(hyp_) f3(yy,hyp_,outfun),hyp); 37 | 38 | 39 | 40 | end 41 | 42 | function [y,dy] = f(t,hyp,outfun) 43 | [y,dy] = outfun(hyp,t); 44 | end 45 | 46 | function [y,dy] = f2(y,hyp,outfun) 47 | [y,~,dy] = outfun(hyp,y); 48 | end 49 | 50 | function [y,dy] = f3(y,hyp,outfun) 51 | [~,y,~,dy] = outfun(hyp,y); 52 | end -------------------------------------------------------------------------------- /gplite/private/derivcheck.m: -------------------------------------------------------------------------------- 1 | function [err_rel,err_abs] = derivcheck(f,x,flag) 2 | %DERIVCHECK Check analytical vs numerical differentiation for a function 3 | 4 | if nargin < 3 || isempty(flag); flag = false; end 5 | 6 | tic 7 | if flag 8 | dy_num = fgrad(f,x,'five-points'); 9 | else 10 | dy_num = gradest(f,x); 11 | end 12 | toc 13 | tic 14 | [y,dy_ana] = f(x); 15 | toc 16 | 17 | if size(dy_num,1) == size(dy_num,2) 18 | dy_num = sum(dy_num,1); 19 | end 20 | 21 | % Reshape to row vectors 22 | dy_num = dy_num(:)'; 23 | dy_ana = dy_ana(:)'; 24 | 25 | fprintf('Relative errors:\n'); 26 | err_rel = (dy_num(:)' - dy_ana(:)')./dy_num(:)' 27 | 28 | fprintf('Absolute errors:\n'); 29 | err_abs = dy_num(:)' - dy_ana(:)' 30 | 31 | end -------------------------------------------------------------------------------- /gplite/private/eissample_lite.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/gplite/private/eissample_lite.m -------------------------------------------------------------------------------- /gplite/private/quantile1.m: -------------------------------------------------------------------------------- 1 | function y = quantile1(x,p) 2 | %QUANTILE1 Quantile of a vector. 3 | % Y = PRCTILE(X,P) returns percentiles of the values in X. P is a scalar 4 | % or a vector of percent values. When X is a vector, Y is the same size 5 | % as P, and Y(i) contains the P(i)-th percentile. When X is a matrix, 6 | % the i-th row of Y contains the P(i)-th percentiles of each column of X. 7 | % For N-D arrays, PRCTILE operates along the first non-singleton 8 | % dimension. 9 | % 10 | % Percentiles are specified using percentages, from 0 to 100. For an N 11 | % element vector X, PRCTILE computes percentiles as follows: 12 | % 1) The sorted values in X are taken as the 100*(0.5/N), 100*(1.5/N), 13 | % ..., 100*((N-0.5)/N) percentiles. 14 | % 2) Linear interpolation is used to compute percentiles for percent 15 | % values between 100*(0.5/N) and 100*((N-0.5)/N) 16 | % 3) The minimum or maximum values in X are assigned to percentiles 17 | % for percent values outside that range. 18 | % 19 | % PRCTILE treats NaNs as missing values, and removes them. 20 | % 21 | % Examples: 22 | % y = prctile(x,50); % the median of x 23 | % y = prctile(x,[2.5 25 50 75 97.5]); % a useful summary of x 24 | % 25 | % See also IQR, MEDIAN, NANMEDIAN, QUANTILE. 26 | 27 | % Copyright 1993-2016 The MathWorks, Inc. 28 | 29 | % If X is empty, return all NaNs. 30 | if isempty(x) 31 | y = nan(size(p),'like',x); 32 | else 33 | % Drop X's leading singleton dims, and combine its trailing dims. This 34 | % leaves a matrix, and we can work along columns. 35 | x = x(:); 36 | 37 | x = sort(x,1); 38 | n = sum(~isnan(x), 1); % Number of non-NaN values 39 | 40 | if isequal(p,0.5) % make the median fast 41 | if rem(n,2) % n is odd 42 | y = x((n+1)/2,:); 43 | else % n is even 44 | y = (x(n/2,:) + x(n/2+1,:))/2; 45 | end 46 | else 47 | r = p*n; 48 | k = floor(r+0.5); % K gives the index for the row just before r 49 | kp1 = k + 1; % K+1 gives the index for the row just after r 50 | r = r - k; % R is the ratio between the K and K+1 rows 51 | 52 | % Find indices that are out of the range 1 to n and cap them 53 | k(k<1 | isnan(k)) = 1; 54 | kp1 = bsxfun( @min, kp1, n ); 55 | 56 | % Use simple linear interpolation for the valid percentages 57 | y = (0.5+r).*x(kp1,:)+(0.5-r).*x(k,:); 58 | 59 | % Make sure that values we hit exactly are copied rather than interpolated 60 | exact = (r==-0.5); 61 | if any(exact) 62 | y(exact,:) = x(k(exact),:); 63 | end 64 | 65 | % Make sure that identical values are copied rather than interpolated 66 | same = (x(k,:)==x(kp1,:)); 67 | if any(same(:)) 68 | x = x(k,:); % expand x 69 | y(same) = x(same); 70 | end 71 | 72 | end 73 | 74 | end 75 | 76 | end -------------------------------------------------------------------------------- /gplite/private/sq_dist.m: -------------------------------------------------------------------------------- 1 | % sq_dist - a function to compute a matrix of all pairwise squared distances 2 | % between two sets of vectors, stored in the columns of the two matrices, a 3 | % (of size D by n) and b (of size D by m). If only a single argument is given 4 | % or the second matrix is empty, the missing matrix is taken to be identical 5 | % to the first. 6 | % 7 | % Usage: C = sq_dist(a, b) 8 | % or: C = sq_dist(a) or equiv.: C = sq_dist(a, []) 9 | % 10 | % Where a is of size Dxn, b is of size Dxm (or empty), C is of size nxm. 11 | % 12 | % Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2010-12-13. 13 | 14 | function C = sq_dist(a, b) 15 | 16 | if nargin<1 || nargin>3 || nargout>1, error('Wrong number of arguments.'); end 17 | bsx = exist('bsxfun','builtin'); % since Matlab R2007a 7.4.0 and Octave 3.0 18 | if ~bsx, bsx = exist('bsxfun'); end % bsxfun is not yet "builtin" in Octave 19 | [D, n] = size(a); 20 | 21 | % Computation of a^2 - 2*a*b + b^2 is less stable than (a-b)^2 because numerical 22 | % precision can be lost when both a and b have very large absolute value and the 23 | % same sign. For that reason, we subtract the mean from the data beforehand to 24 | % stabilise the computations. This is OK because the squared error is 25 | % independent of the mean. 26 | if nargin==1 % subtract mean 27 | mu = mean(a,2); 28 | if bsx 29 | a = bsxfun(@minus,a,mu); 30 | else 31 | a = a - repmat(mu,1,size(a,2)); 32 | end 33 | b = a; m = n; 34 | else 35 | [d, m] = size(b); 36 | if d ~= D, error('Error: column lengths must agree.'); end 37 | mu = (m/(n+m))*mean(b,2) + (n/(n+m))*mean(a,2); 38 | if bsx 39 | a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu); 40 | else 41 | a = a - repmat(mu,1,n); b = b - repmat(mu,1,m); 42 | end 43 | end 44 | 45 | if bsx % compute squared distances 46 | C = bsxfun(@plus,sum(a.*a,1)',bsxfun(@minus,sum(b.*b,1),2*a'*b)); 47 | else 48 | C = repmat(sum(a.*a,1)',1,m) + repmat(sum(b.*b,1),n,1) - 2*a'*b; 49 | end 50 | C = max(C,0); % numerical noise can cause C to negative i.e. C > -1e-14 51 | -------------------------------------------------------------------------------- /install.m: -------------------------------------------------------------------------------- 1 | % MATLAB installation script for VBMC 2 | % 3 | % Copyright (c) by Luigi Acerbi 2018-2020 4 | 5 | fprintf('Installing VBMC...\n'); 6 | 7 | me = mfilename; % what is my filename 8 | pathstr = fileparts(which(me)); % get my location 9 | addpath(pathstr); % add base folder to the path 10 | addpath([pathstr filesep() 'shared']); % add shared folder to the path 11 | 12 | try 13 | failed_install_flag = savepath; % save path 14 | catch 15 | failed_install_flag = true; 16 | end 17 | 18 | if failed_install_flag 19 | fprintf('Installation error: could not save path.\n\n'); 20 | fprintf('You need to manually add VBMC''s installation folder to your MATLAB search path (and save it).\n'); 21 | fprintf('See the MATLAB documentation for more information.\n'); 22 | fprintf('Note that in Linux systems, e.g. Ubuntu, you need read/write permission to save the MATLAB path (see here).\n'); 23 | else 24 | fprintf('Installation successful!\n'); 25 | type([pathstr filesep 'docs' filesep 'README.txt']); 26 | fprintf('\n'); 27 | end 28 | 29 | clear me pathstr -------------------------------------------------------------------------------- /lpostfun.m: -------------------------------------------------------------------------------- 1 | function [y,s] = lpostfun(x,llike,lprior) 2 | %LPOSTFUN Log (unnormalized) posterior function. 3 | % Y = LPOSTFUN(X,LLIKE,LPRIOR) returns the unnormalized log posterior 4 | % evaluated at X where LLIKE is a function handle to the log likelihood 5 | % function and LPRIOR a function handle to the log prior. 6 | % 7 | % [Y,S] = LPOSTFUN(X,LLIKE,LPRIOR) also returns an estimate S of the 8 | % standard deviation of a noisy log-likelihood evaluation at X (obtained 9 | % as second output of LLIKE, assuming LLIKE has two outputs). Note that 10 | % the log prior is assumed to be noiseless. 11 | 12 | if nargin < 3; lprior = []; end 13 | 14 | if nargout > 1 15 | [y,s] = llike(x); 16 | else 17 | y = llike(x); 18 | end 19 | 20 | if ~isempty(lprior) 21 | y = y + lprior(x); 22 | end 23 | 24 | end -------------------------------------------------------------------------------- /misc/best_vbmc.m: -------------------------------------------------------------------------------- 1 | function [vp,elbo,elbo_sd,idx_best] = best_vbmc(stats,idx,SafeSD,FracBack,RankCriterion,RealFlag) 2 | %VBMC_BEST Return best variational posterior from stats structure. 3 | 4 | % Check up to this iteration (default, last) 5 | if nargin < 2 || isempty(idx); idx = stats.iter(end); end 6 | 7 | % Penalization for uncertainty (default, 5 SD) 8 | if nargin < 3 || isempty(SafeSD); SafeSD = 5; end 9 | 10 | % If no past stable iteration, go back up to this fraction of iterations 11 | if nargin < 4 || isempty(FracBack); FracBack = 0.25; end 12 | 13 | % Use new ranking criterion method to pick best solution 14 | if nargin < 5 || isempty(RankCriterion); RankCriterion = false; end 15 | 16 | % Convert training variational posterior to real posterior 17 | if nargin < 6 || isempty(RealFlag); RealFlag = false; end 18 | 19 | if stats.stable(idx) 20 | % If the current iteration is stable, return it 21 | idx_best = idx; 22 | 23 | else 24 | % Otherwise, find best solution according do various criteria 25 | 26 | if RankCriterion 27 | % Find solution that combines ELCBO, stability, and recency 28 | 29 | % Rank by position 30 | rank(:,1) = fliplr(1:idx)'; 31 | 32 | % Rank by ELCBO 33 | lnZ_iter = stats.elbo(1:idx); 34 | lnZsd_iter = stats.elbo_sd(1:idx); 35 | elcbo = lnZ_iter - SafeSD*lnZsd_iter; 36 | [~,ord] = sort(elcbo,'descend'); 37 | rank(ord,2) = 1:idx; 38 | 39 | % Rank by reliability index 40 | [~,ord] = sort(stats.rindex(1:idx),'ascend'); 41 | rank(ord,3) = 1:idx; 42 | 43 | % Rank penalty to all non-stable iterations 44 | rank(:,4) = idx; 45 | rank(stats.stable(1:idx),4) = 1; 46 | 47 | % % Add rank penalty to warmup (and iteration immediately after) 48 | % last_warmup = find(stats.warmup(1:idx),1,'last'); 49 | % rank(:,5) = 1; 50 | % rank(1:min(last_warmup+2,end),5) = idx; 51 | 52 | [~,idx_best] = min(sum(rank,2)); 53 | 54 | else 55 | % Find recent solution with best ELCBO 56 | laststable = find(stats.stable(1:idx),1,'last'); 57 | if isempty(laststable) 58 | BackIter = ceil(idx*FracBack); % Go up to this iterations back if no previous stable iteration 59 | idx_start = max(1,idx-BackIter); 60 | else 61 | idx_start = laststable; 62 | end 63 | lnZ_iter = stats.elbo(idx_start:idx); 64 | lnZsd_iter = stats.elbo_sd(idx_start:idx); 65 | elcbo = lnZ_iter - SafeSD*lnZsd_iter; 66 | [~,idx_best] = max(elcbo); 67 | idx_best = idx_start + idx_best - 1; 68 | end 69 | end 70 | 71 | % Return best variational posterior, its ELBO and SD 72 | if RealFlag 73 | vp = vptrain2real(stats.vp(idx_best),1); 74 | else 75 | vp = stats.vp(idx_best); 76 | end 77 | elbo = stats.elbo(idx_best); 78 | elbo_sd = stats.elbo_sd(idx_best); 79 | vp.stats.stable = stats.stable(idx_best); 80 | 81 | end -------------------------------------------------------------------------------- /misc/check_quadcoefficients_vbmc.m: -------------------------------------------------------------------------------- 1 | function errorflag = check_quadcoefficients_vbmc(gp) 2 | %CHECK_QUADCOEFFICIENTS_VBMC Check that the quadratic coefficients are negative. 3 | 4 | % Extract integrated basis functions coefficients 5 | D = size(gp.X,2); 6 | Nb = numel(gp.post(1).intmean.betabar); 7 | betabar = zeros(Nb,numel(gp.post)); 8 | for s = 1:numel(gp.post) 9 | betabar(:,s) = gp.post(s).intmean.betabar; 10 | end 11 | % betabar 12 | 13 | if gp.intmeanfun == 3 14 | errorflag = any(betabar(1+D+(1:D),:) >= 0,2)'; 15 | elseif gp.intmeanfun == 4 16 | tril_mat = tril(true(D),-1); 17 | tril_vec = tril_mat(:); 18 | z = zeros(D*D,1); 19 | errorflag = false; 20 | for b = 1:size(betabar,2) 21 | beta_mat = z; 22 | beta_mat(tril_vec) = betabar(1+2*D+(1:D*(D-1)/2),b); 23 | beta_mat = reshape(beta_mat,[D,D]); 24 | beta_mat = beta_mat + beta_mat' + diag(betabar(1+D+(1:D),b)); 25 | try 26 | [~,dd] = chol(-beta_mat); 27 | catch 28 | dd = 1; 29 | end 30 | % dd 31 | errorflag = errorflag | dd; 32 | end 33 | end 34 | 35 | end -------------------------------------------------------------------------------- /misc/evaloption_vbmc.m: -------------------------------------------------------------------------------- 1 | function val = evaloption_vbmc(option,N) 2 | %GETVALUE_VBMC Return option value that could be a function handle. 3 | 4 | if isa(option,'function_handle') 5 | val = option(N); 6 | else 7 | val = option; 8 | end 9 | 10 | end -------------------------------------------------------------------------------- /misc/fess_vbmc.m: -------------------------------------------------------------------------------- 1 | function [fess,X] = fess_vbmc(vp,gp,X) 2 | %FESS_VBMC Compute fractional effective sample size through importance sampling 3 | 4 | if nargin < 3 || isempty(X); X = 100; end 5 | 6 | % If a single number is passed, take it as the number of samples 7 | if numel(X) == 1 8 | N = X; 9 | X = vbmc_rnd(vp,N,0); 10 | else 11 | N = size(X,1); 12 | end 13 | 14 | % Can directly pass the estimated GP means instead of the full GP 15 | if isstruct(gp) 16 | [~,~,fbar] = gplite_pred(gp,X,[],[],0,0); 17 | else 18 | fbar = mean(gp,2); 19 | end 20 | 21 | if size(fbar,1) ~= size(X,1) 22 | error('Mismatch between number of samples from VP and GP.'); 23 | end 24 | 25 | % Compute effective sample size (ESS) with importance sampling 26 | vlnpdf = max(vbmc_pdf(vp,X,0,1),log(realmin)); 27 | logw = fbar - vlnpdf; 28 | w = exp(logw - max(logw)); 29 | w = w/sum(w); 30 | fess = 1/sum(w.^2) / N; % fractional ESS 31 | 32 | end -------------------------------------------------------------------------------- /misc/finalboost_vbmc.m: -------------------------------------------------------------------------------- 1 | function [vp,elbo,elbo_sd,changedflag] = finalboost_vbmc(vp,idx_best,optimState,stats,options) 2 | %FINALBOOST_VBMC Final boost of variational components. 3 | 4 | changedflag = false; 5 | 6 | Knew = max(options.MinFinalComponents,vp.K); 7 | 8 | % Current entropy samples during variational optimization 9 | NSent = evaloption_vbmc(options.NSent,Knew); 10 | NSentFast = evaloption_vbmc(options.NSentFast,Knew); 11 | NSentFine = evaloption_vbmc(options.NSentFine,Knew); 12 | 13 | % Entropy samples for final boost 14 | NSentBoost = NSent; 15 | NSentFastBoost = NSentFast; 16 | NSentFineBoost = NSentFine; 17 | if isfield(options,'NSentBoost') && ~isempty(options.NSentBoost) 18 | NSentBoost = evaloption_vbmc(options.NSentBoost,Knew); 19 | end 20 | if isfield(options,'NSentFastBoost') && ~isempty(options.NSentFastBoost) 21 | NSentFastBoost = evaloption_vbmc(options.NSentFastBoost,Knew); 22 | end 23 | if isfield(options,'NSentFineBoost') && ~isempty(options.NSentFineBoost) 24 | NSentFineBoost = evaloption_vbmc(options.NSentFineBoost,Knew); 25 | end 26 | 27 | % Perform final boost? 28 | do_boost = vp.K < options.MinFinalComponents || ... 29 | (NSent ~= NSentBoost) || (NSentFine ~= NSentFineBoost); 30 | 31 | if do_boost 32 | % Last variational optimization with large number of components 33 | Nfastopts = ceil(evaloption_vbmc(options.NSelbo,Knew)); 34 | Nfastopts = ceil(Nfastopts * options.NSelboIncr); 35 | Nslowopts = 1; 36 | gp_idx = gplite_post(stats.gp(idx_best)); 37 | options.TolWeight = 0; % No pruning of components 38 | 39 | % End warmup 40 | optimState.Warmup = false; 41 | vp.optimize_mu = logical(options.VariableMeans); 42 | vp.optimize_weights = logical(options.VariableWeights); 43 | 44 | options.NSent = NSentBoost; 45 | options.NSentFast = NSentFastBoost; 46 | options.NSentFine = NSentFineBoost; 47 | options.MaxIterStochastic = Inf; 48 | optimState.entropy_alpha = 0; 49 | 50 | if isfield(vp,'temperature') && ~isempty(vp.temperature) 51 | optimState.temperature = vp.temperature; 52 | end 53 | 54 | stable_flag = vp.stats.stable; 55 | vp = vpoptimize_vbmc(Nfastopts,Nslowopts,vp,gp_idx,Knew,optimState,options); 56 | vp.stats.stable = stable_flag; 57 | changedflag = true; 58 | end 59 | 60 | elbo = vp.stats.elbo; 61 | elbo_sd = vp.stats.elbo_sd; 62 | 63 | end -------------------------------------------------------------------------------- /misc/get_traindata_vbmc.m: -------------------------------------------------------------------------------- 1 | function [X_train,y_train,s2_train,t_train] = get_traindata_vbmc(optimState,options) 2 | %GETRAINDATA Get training data for building GP surrogate. 3 | 4 | nvars = size(optimState.X,2); 5 | 6 | X_train = optimState.X(optimState.X_flag,:); 7 | y_train = optimState.y(optimState.X_flag); 8 | if isfield(optimState,'S') 9 | s2_train = optimState.S(optimState.X_flag).^2; 10 | else 11 | s2_train = []; 12 | end 13 | 14 | if options.NoiseShaping 15 | s2_train = noiseshaping_vbmc(s2_train,y_train,options); 16 | end 17 | 18 | if nargout > 3 19 | t_train = optimState.funevaltime(optimState.X_flag); 20 | end 21 | 22 | 23 | 24 | % xxplot = (1:numel(y_train))'; 25 | % [yyplot,ord] = sort(log(y_max - y_train + 1)); 26 | % 27 | % X_train = X_train(ord,:); 28 | % y_train = y_train(ord); 29 | % 30 | % plot(xxplot,yyplot,'k-','LineWidth',1); hold on; 31 | % p = robustfit(xxplot,yyplot); p = fliplr(p'); 32 | % pred = p(1).*xxplot + p(2); 33 | % plot(xxplot, pred,'b--','LineWidth',1); 34 | % drawnow; 35 | 36 | % tail_idx = ceil(numel(y_train)*max(0.5,options.HPDFrac)); 37 | % idx_start = find(yyplot(tail_idx:end) - pred(tail_idx:end) > 1,1); 38 | % if ~isempty(idx_start) 39 | % tail_idx = tail_idx + idx_start - 1; 40 | % [tail_idx,numel(y_train)] 41 | % yyplot(tail_idx:end) = min(pred(tail_idx:end),yyplot(tail_idx:end)); 42 | % y_train(tail_idx:end) = 1 + y_max - exp(yyplot(tail_idx:end)); 43 | % end 44 | 45 | end 46 | -------------------------------------------------------------------------------- /misc/get_vptheta.m: -------------------------------------------------------------------------------- 1 | function [theta,vp] = get_vptheta(vp,optimize_mu,optimize_sigma,optimize_lambda,optimize_weights) 2 | %GET_VPTHETA Get vector of variational parameters from variational posterior. 3 | 4 | if nargin < 5 || isempty(optimize_weights) 5 | optimize_weights = vp.optimize_weights; 6 | if nargin < 4 || isempty(optimize_lambda) 7 | optimize_lambda = vp.optimize_lambda; 8 | if nargin < 3 || isempty(optimize_sigma) 9 | optimize_sigma = vp.optimize_sigma; 10 | if nargin < 2 || isempty(optimize_mu) 11 | optimize_mu = vp.optimize_mu; 12 | end 13 | end 14 | end 15 | end 16 | 17 | vp = rescale_params(vp); 18 | if optimize_mu; theta = vp.mu(:); else; theta = []; end 19 | if optimize_sigma; theta = [theta; log(vp.sigma(:))]; end 20 | if optimize_lambda; theta = [theta; log(vp.lambda(:))]; end 21 | if optimize_weights; theta = [theta; log(vp.w(:))]; end 22 | 23 | end -------------------------------------------------------------------------------- /misc/gethpd_vbmc.m: -------------------------------------------------------------------------------- 1 | function [X_hpd,y_hpd,hpd_range] = gethpd_vbmc(X,y,HPDFrac) 2 | %GETHPD_VBMC Get high-posterior density dataset. 3 | 4 | if nargin < 3 || isempty(HPDFrac); HPDFrac = 0.8; end 5 | 6 | [N,D] = size(X); 7 | 8 | % Subsample high posterior density dataset 9 | [~,ord] = sort(y,'descend'); 10 | N_hpd = round(HPDFrac*N); 11 | X_hpd = X(ord(1:N_hpd),:); 12 | if nargout > 1 13 | y_hpd = y(ord(1:N_hpd)); 14 | end 15 | if nargout > 2 16 | hpd_range = max(X_hpd)-min(X_hpd); 17 | end 18 | 19 | end -------------------------------------------------------------------------------- /misc/gplogjoint_weights.m: -------------------------------------------------------------------------------- 1 | function [F,dF,varF,dvarF,varss,I_sk,J_sjk] = gplogjoint_weights(vp,grad_flag,avg_flag,jacobian_flag,compute_var) 2 | %GPLOGJOINT_WEIGHTS Expected variational log joint probability via GP approximation 3 | 4 | % VP is a struct with the variational posterior 5 | % HYP is the vector of GP hyperparameters: [ell,sf2,sn2,m] 6 | % Note that hyperparameters are already transformed 7 | % X is a N-by-D matrix of training inputs 8 | % Y is a N-by-1 vector of function values at X 9 | 10 | if nargin < 3; grad_flag = []; end 11 | if nargin < 4 || isempty(avg_flag); avg_flag = true; end 12 | if nargin < 5 || isempty(jacobian_flag); jacobian_flag = true; end 13 | if nargin < 6; compute_var = []; end 14 | if isempty(compute_var); compute_var = nargout > 2; end 15 | 16 | % Check if gradient computation is required 17 | if nargout < 2 % No 2nd output, no gradients 18 | grad_flag = false; 19 | elseif isempty(grad_flag) % By default compute all gradients 20 | grad_flag = true; 21 | end 22 | 23 | compute_vargrad = nargout > 3 && compute_var && grad_flag; 24 | 25 | if compute_vargrad && compute_var ~= 2 26 | error('gplogjoint:FullVarianceGradient', ... 27 | 'Computation of gradient of log joint variance is currently available only for diagonal approximation of the variance.'); 28 | end 29 | 30 | K = vp.K; % Number of components 31 | w(1,:) = vp.w; 32 | I_sk = vp.stats.I_sk; 33 | J_sjk = vp.stats.J_sjk; 34 | 35 | Ns = size(I_sk,1); % Hyperparameter samples 36 | 37 | F = zeros(1,Ns); 38 | if grad_flag; w_grad = zeros(K,Ns); else, w_grad = []; end 39 | if compute_var; varF = zeros(1,Ns); end 40 | if compute_vargrad % Compute gradient of variance? 41 | if grad_flag; w_vargrad = zeros(K,Ns); else, w_vargrad = []; end 42 | end 43 | 44 | % Loop over hyperparameter samples 45 | for s = 1:Ns 46 | F(s) = sum(w.*I_sk(s,:)); 47 | if grad_flag; w_grad(:,s) = I_sk(s,:)'; end 48 | 49 | if compute_var == 2 50 | J_diag = diag(squeeze(J_sjk(s,:,:)))'; 51 | varF(s) = sum(w.^2.*max(eps,J_diag)); 52 | if compute_vargrad 53 | w_vargrad(:,s) = 2*w.*max(eps,J_diag); 54 | end 55 | elseif compute_var 56 | J_jk = squeeze(J_sjk(s,:,:)); 57 | varF(s) = sum(sum(J_jk.*(w'*w),1)); 58 | end 59 | end 60 | 61 | % Correct for numerical error 62 | if compute_var; varF = max(varF,eps); end 63 | 64 | if grad_flag 65 | if jacobian_flag 66 | eta_sum = sum(exp(vp.eta)); 67 | J_w = bsxfun(@times,-exp(vp.eta)',exp(vp.eta)/eta_sum^2) + diag(exp(vp.eta)/eta_sum); 68 | w_grad = J_w*w_grad; 69 | end 70 | dF = w_grad; 71 | else 72 | dF = []; 73 | end 74 | 75 | if compute_vargrad 76 | % Correct for standard softmax reparameterization of W 77 | if jacobian_flag && grad_flag 78 | w_vargrad = J_w*w_vargrad; 79 | end 80 | dvarF = w_vargrad; 81 | else 82 | dvarF = []; 83 | end 84 | 85 | % [varF; varF_diag] 86 | 87 | % Average multiple hyperparameter samples 88 | varss = 0; 89 | if Ns > 1 && avg_flag 90 | Fbar = sum(F,2)/Ns; 91 | if compute_var 92 | varFss = sum((F - Fbar).^2,2)/(Ns-1); % Estimated variance of the samples 93 | varss = varFss + std(varF); % Variability due to sampling 94 | varF = sum(varF,2)/Ns + varFss; 95 | end 96 | if compute_vargrad 97 | dvv = 2*sum(F.*dF,2)/(Ns-1) - 2*Fbar.*sum(dF,2)/(Ns-1); 98 | dvarF = sum(dvarF,2)/Ns + dvv; 99 | end 100 | F = Fbar; 101 | if grad_flag; dF = sum(dF,2)/Ns; end 102 | end 103 | 104 | end -------------------------------------------------------------------------------- /misc/gpreupdate.m: -------------------------------------------------------------------------------- 1 | function gp = gpreupdate(gp,optimState,options) 2 | %GPREUPDATE Quick posterior reupdate of Gaussian process. 3 | 4 | [X_train,y_train,s2_train,t_train] = get_traindata_vbmc(optimState,options); 5 | gp.X = X_train; 6 | gp.y = y_train; 7 | gp.s2 = s2_train; 8 | gp.t = t_train; 9 | gp = gplite_post(gp); 10 | 11 | if gp.intmeanfun == 3 || gp.intmeanfun == 4 12 | errorflag = check_quadcoefficients_vbmc(gp); 13 | if errorflag 14 | gp.meanfun = optimState.gpMeanfun; 15 | gp.intmeanfun = []; 16 | 17 | for s = 1:numel(gp.post) 18 | betabar = gp.post(s).intmean.betabar; 19 | hyp = gp.post(s).hyp; 20 | 21 | switch gp.meanfun 22 | case 4 23 | omega2 = -1./betabar(1+D+(1:D)); 24 | xm = omega2.*betabar(1+(1:D)); 25 | m0 = betabar(1) + 0.5*xm.^2./omega2; 26 | hyp_mean = [m0; xm(:); 0.5*log(omega2(:))]; 27 | hypnew = [hyp(1:gp.Ncov+gp.Nnoise); hyp_mean(:); hyp(gp.Ncov+gp.Nnoise+1:end)]; 28 | end 29 | gp.post(s).hyp = hypnew; 30 | end 31 | 32 | % Recompute GP without integrated mean function 33 | gp = gplite_post(gp); 34 | end 35 | end 36 | 37 | end -------------------------------------------------------------------------------- /misc/gpsample_vbmc.m: -------------------------------------------------------------------------------- 1 | function X = gpsample_vbmc(vp,gp,Ns,origflag) 2 | %GPSAMPLE_VBMC Sample from GP obtained through VBMC. 3 | 4 | if nargin < 4 || isempty(origflag); origflag = true; end 5 | 6 | D = size(gp.X,2); 7 | 8 | if isfield(gp,'s2') && ~isempty(gp.s2) 9 | % Evaluate GP input length scale (use geometric mean) 10 | Ns_gp = numel(gp.post); 11 | ln_ell = zeros(D,Ns_gp); 12 | for s = 1:Ns_gp; ln_ell(:,s) = gp.post(s).hyp(1:D); end 13 | gplengthscale = exp(mean(ln_ell,2))'; 14 | X_rescaled = bsxfun(@rdivide,gp.X,gplengthscale); % Rescaled GP training inputs 15 | 16 | % Evaluate GP observation noise on training inputs 17 | sn2new = zeros(size(gp.X,1),Ns_gp); 18 | for s = 1:Ns_gp 19 | hyp_noise = gp.post(s).hyp(gp.Ncov+1:gp.Ncov+gp.Nnoise); % Get noise hyperparameters 20 | if isfield(gp,'s2') 21 | s2 = gp.s2; 22 | else 23 | s2 = []; 24 | end 25 | % s2 = noiseshaping_vbmc(s2,gp.y,options); 26 | sn2new(:,s) = gplite_noisefun(hyp_noise,gp.X,gp.noisefun,gp.y,s2); 27 | end 28 | sn2new = mean(sn2new,2); 29 | 30 | % Estimate observation noise variance over variational posterior 31 | xx = vbmc_rnd(vp,2e4,0,0); 32 | [~,pos] = min(sq_dist(bsxfun(@rdivide,xx,gplengthscale),X_rescaled),[],2); 33 | sn2_avg = mean(sn2new(pos)); % Use nearest neighbor approximation 34 | else 35 | sn2_avg = 0; 36 | end 37 | 38 | VarThresh = max(1,sn2_avg); 39 | 40 | W = 2*(D+1); 41 | x0 = vbmc_rnd(vp,W,0,0); 42 | X = gplite_sample(gp,Ns,x0,'parallel',[],[],VarThresh); 43 | if origflag 44 | X = warpvars_vbmc(X,'inv',vp.trinfo); 45 | end 46 | 47 | end 48 | 49 | 50 | 51 | %SQ_DIST Compute matrix of all pairwise squared distances between two sets 52 | % of vectors, stored in the columns of the two matrices, a (of size n-by-D) 53 | % and b (of size m-by-D). 54 | function C = sq_dist(a,b) 55 | 56 | n = size(a,1); 57 | m = size(b,1); 58 | mu = (m/(n+m))*mean(b,1) + (n/(n+m))*mean(a,1); 59 | a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu); 60 | C = bsxfun(@plus,sum(a.*a,2),bsxfun(@minus,sum(b.*b,2)',2*a*b')); 61 | C = max(C,0); 62 | 63 | end -------------------------------------------------------------------------------- /misc/initdesign_vbmc.m: -------------------------------------------------------------------------------- 1 | function [optimState,t_func] = initdesign_vbmc(optimState,Ns,funwrapper,t_func,options) 2 | %INITDESIGN_VBMC Initial sample design (provided or random box). 3 | 4 | x0 = optimState.Cache.X_orig; 5 | [N0,D] = size(x0); 6 | 7 | if N0 <= Ns 8 | Xs = x0; 9 | ys = optimState.Cache.y_orig; 10 | if N0 < Ns 11 | switch lower(options.InitDesign) 12 | case 'plausible' 13 | % Uniform random samples in the plausible box (in transformed space) 14 | Xrnd = bsxfun(@plus,bsxfun(@times,rand(Ns-N0,D),optimState.PUB-optimState.PLB),optimState.PLB); 15 | case 'narrow' 16 | xstart = warpvars_vbmc(x0(1,:),'dir',optimState.trinfo); 17 | Xrnd = bsxfun(@plus,bsxfun(@times,rand(Ns-N0,D)-0.5,0.1*(optimState.PUB-optimState.PLB)),xstart); 18 | Xrnd = bsxfun(@min,bsxfun(@max,Xrnd,optimState.PLB),optimState.PUB); 19 | otherwise 20 | error('Unknown initial design for VBMC.'); 21 | end 22 | Xrnd = warpvars_vbmc(Xrnd,'inv',optimState.trinfo); % Convert back to original space 23 | Xs = [Xs; Xrnd]; 24 | ys = [ys; NaN(Ns-N0,1)]; 25 | end 26 | idx_remove = true(N0,1); 27 | 28 | elseif N0 > Ns 29 | % Cluster starting points 30 | kmeans_options = struct('Display','off','Method',2,'Preprocessing','whiten'); 31 | idx = fastkmeans(x0,Ns,kmeans_options); 32 | 33 | % From each cluster, take points with higher density in original space 34 | Xs = NaN(Ns,D); ys = NaN(Ns,1); idx_remove = false(N0,1); 35 | for iK = 1:Ns 36 | idxK = find(idx == iK); 37 | xx = optimState.Cache.X_orig(idxK,:); 38 | yy = optimState.Cache.y_orig(idxK); 39 | [~,idx_y] = max(yy); 40 | Xs(iK,:) = xx(idx_y,:); 41 | ys(iK) = yy(idx_y); 42 | idx_remove(idxK(idx_y)) = true; 43 | end 44 | end 45 | % Remove points from starting cache 46 | optimState.Cache.X_orig(idx_remove,:) = []; 47 | optimState.Cache.y_orig(idx_remove) = []; 48 | 49 | Xs = warpvars_vbmc(Xs,'d',optimState.trinfo); 50 | 51 | for is = 1:Ns 52 | timer_func = tic; 53 | if isnan(ys(is)) % Function value is not available 54 | [~,optimState] = funlogger_vbmc(funwrapper,Xs(is,:),optimState,'iter'); 55 | else 56 | [~,optimState] = funlogger_vbmc(funwrapper,Xs(is,:),optimState,'add',ys(is)); 57 | end 58 | t_func = t_func + toc(timer_func); 59 | end 60 | 61 | end 62 | -------------------------------------------------------------------------------- /misc/intkernel.m: -------------------------------------------------------------------------------- 1 | function F = intkernel(X,vp,gp,avg_flag) 2 | %INTKERNEL Expected GP kernel in scalar correlation 3 | 4 | if nargin < 4 || isempty(avg_flag); avg_flag = false; end 5 | 6 | K = vp.K; % Number of components 7 | [N,D] = size(X); 8 | mu(:,:) = vp.mu; 9 | sigma(1,:) = vp.sigma; 10 | lambda(:,1) = vp.lambda(:); 11 | w(1,:) = vp.w; 12 | 13 | Ns = numel(gp.post); % Hyperparameter samples 14 | 15 | F = zeros(N,Ns); 16 | 17 | if isfield(vp,'delta') && ~isempty(vp.delta) 18 | delta = vp.delta; 19 | else 20 | delta = 0; 21 | end 22 | 23 | % Integrated mean function being used? 24 | integrated_meanfun = isfield(gp,'intmeanfun') && gp.intmeanfun > 0; 25 | 26 | if integrated_meanfun 27 | % Evaluate basis functions 28 | Hs = gplite_intmeanfun(X,gp.intmeanfun); 29 | end 30 | 31 | % Loop over hyperparameter samples 32 | for s = 1:Ns 33 | hyp = gp.post(s).hyp; 34 | 35 | % Extract GP hyperparameters from HYP 36 | ell = exp(hyp(1:D)); 37 | ln_sf2 = 2*hyp(D+1); 38 | sum_lnell = sum(hyp(1:D)); 39 | 40 | if integrated_meanfun 41 | %betabar = gp.post(s).intmean.betabar'; 42 | %KinvHtbetabar = gp.post(s).intmean.HKinv'*betabar; 43 | plus_idx = gp.intmeanfun_var > 0; 44 | HKinv = gp.post(s).intmean.HKinv(plus_idx,:); 45 | Tplusinv = gp.post(s).intmean.Tplusinv; 46 | end 47 | 48 | L = gp.post(s).L; 49 | Lchol = gp.post(s).Lchol; 50 | 51 | sn2_eff = 1/gp.post(s).sW(1)^2; 52 | 53 | ddl = sq_dist(bsxfun(@rdivide,X',ell),bsxfun(@rdivide,gp.X',ell)); 54 | ll = exp(ln_sf2 -0.5*ddl); 55 | 56 | if Lchol 57 | zz = (L\(L'\ll'))/sn2_eff; 58 | else 59 | zz = -L*ll'; 60 | end 61 | 62 | for k = 1:K 63 | tau_k = sqrt(sigma(k)^2*lambda.^2 + ell.^2 + delta.^2); 64 | lnnf_k = ln_sf2 + sum_lnell - sum(log(tau_k)); % Covariance normalization factor 65 | delta_k = bsxfun(@rdivide,bsxfun(@minus, mu(:,k), gp.X'), tau_k); 66 | z_k = exp(lnnf_k -0.5 * sum(delta_k.^2,1)); 67 | 68 | dd_k = bsxfun(@rdivide,bsxfun(@minus, mu(:,k), X'), tau_k); 69 | zz_k = exp(lnnf_k -0.5 * sum(dd_k.^2,1)); 70 | 71 | F(:,s) = F(:,s) + w(k)*(zz_k - z_k*zz)'; 72 | 73 | % Contribution of integrated mean function 74 | if integrated_meanfun 75 | switch gp.intmeanfun 76 | case 1; u_k = 1; 77 | case 2; u_k = [1,mu(:,k)']; 78 | case 3; u_k = [1,mu(:,k)',(mu(:,k).^2 + sigma(k)^2*lambda.^2)']; 79 | case 4; u_k = [1,mu(:,k)',(mu(:,k).^2 + sigma(k)^2*lambda.^2)',mumu_mat(k,:)]; 80 | end 81 | 82 | F(:,s) = F(:,s) + w(k)*((u_k(plus_idx)*(Tplusinv*Hs)) ... 83 | + ((z_k*HKinv')*(Tplusinv*(HKinv*ll'))) ... 84 | - (u_k(plus_idx)*(Tplusinv*(HKinv*ll'))) ... 85 | - ((z_k*HKinv')*(Tplusinv*Hs)))'; 86 | end 87 | 88 | 89 | end 90 | end 91 | 92 | % Average multiple hyperparameter samples 93 | if Ns > 1 && avg_flag 94 | F = mean(F,2); 95 | end 96 | 97 | end 98 | 99 | 100 | -------------------------------------------------------------------------------- /misc/noiseshaping_vbmc.m: -------------------------------------------------------------------------------- 1 | function s2s = noiseshaping_vbmc(s2,y,options) 2 | %NOISESHAPING_VBMC Increase noise for low-density points. 3 | 4 | TolScale = 1e10; 5 | 6 | if isempty(s2); s2 = options.TolGPNoise^2*ones(size(y)); end 7 | 8 | deltay = max(0, max(y) - y - options.NoiseShapingThreshold); 9 | sn2extra = (options.NoiseShapingFactor*deltay).^2; 10 | 11 | s2s = s2 + sn2extra; 12 | 13 | maxs2 = min(s2s)*TolScale; 14 | s2s = min(s2s,maxs2); 15 | -------------------------------------------------------------------------------- /misc/proposal_vbmc.m: -------------------------------------------------------------------------------- 1 | function y = proposal_vbmc(X,PLB,PUB,LB,UB) 2 | %PROPOSAL_VBMC Default proposal function. 3 | 4 | [N,D] = size(X); 5 | y = zeros(N,1); 6 | 7 | % df = 3; % Three degrees of freedom 8 | mu = 0.5*(PLB + PUB); 9 | sigma = 0.5*(PUB-PLB); 10 | 11 | for d = 1:D 12 | % y(:,d) = ( 1 + ((X(:,d)-mu(d))./sigma(d)).^2/df ).^(-(df+1)/2); 13 | y(:,d) = 1./( 1 + (((X(:,d)-mu(d))./sigma(d)).^2)/3 ).^2; 14 | end 15 | 16 | y = prod(y,2); 17 | 18 | end -------------------------------------------------------------------------------- /misc/real2int_vbmc.m: -------------------------------------------------------------------------------- 1 | function x = real2int_vbmc(x,trinfo,integervars) 2 | %REAL2INT_VBMC Convert to integer-valued representation. 3 | 4 | if ~any(integervars); return; end 5 | 6 | xtemp = warpvars_vbmc(x,'inv',trinfo); 7 | xtemp(:,integervars) = round(xtemp(:,integervars)); 8 | xtemp = warpvars_vbmc(xtemp,'d',trinfo); 9 | 10 | x(:,integervars) = xtemp(:,integervars); 11 | 12 | end -------------------------------------------------------------------------------- /misc/rescale_params.m: -------------------------------------------------------------------------------- 1 | function vp = rescale_params(vp,theta) 2 | %RESCALE_PARAMS Assign THETA and rescale SIGMA and LAMBDA variational parameters. 3 | 4 | D = vp.D; 5 | 6 | if nargin > 1 && ~isempty(theta) 7 | K = vp.K; 8 | if vp.optimize_mu 9 | vp.mu = reshape(theta(1:D*K),[D,K]); 10 | idx_start = D*K; 11 | else 12 | idx_start = 0; 13 | end 14 | if vp.optimize_sigma 15 | vp.sigma = exp(theta(idx_start+(1:K))); 16 | idx_start = idx_start + K; 17 | end 18 | if vp.optimize_lambda 19 | vp.lambda = exp(theta(idx_start+(1:D)))'; 20 | end 21 | if vp.optimize_weights 22 | eta = theta(end-K+1:end); 23 | eta = eta - max(eta); 24 | vp.w = exp(eta(:)'); 25 | end 26 | end 27 | 28 | nl = sqrt(sum(vp.lambda.^2)/D); 29 | vp.lambda = vp.lambda(:)/nl; 30 | vp.sigma = vp.sigma(:)'*nl; 31 | 32 | % Ensure that weights are normalized 33 | if vp.optimize_weights 34 | vp.w = vp.w(:)'/sum(vp.w); 35 | % Remove ETA, used only for optimization 36 | if isfield(vp,'eta'); vp = rmfield(vp,'eta'); end 37 | end 38 | 39 | % The mode may have moved 40 | if isfield(vp,'mode'); vp = rmfield(vp,'mode'); end 41 | 42 | end -------------------------------------------------------------------------------- /misc/testpdf.m: -------------------------------------------------------------------------------- 1 | function [y,dy] = testpdf(x) 2 | 3 | D = numel(x); 4 | sigma = 1:D; 5 | y = -0.5*sum(x.^2./sigma.^2); 6 | dy = -x./sigma.^2; -------------------------------------------------------------------------------- /misc/vbinit_vbmc.m: -------------------------------------------------------------------------------- 1 | function [vp0_vec,type_vec] = vbinit_vbmc(type,Nopts,vp,Knew,Xstar,ystar) 2 | %VBINIT Generate array of random starting parameters for variational posterior 3 | 4 | % XSTAR and YSTAR are usually HPD regions 5 | 6 | D = vp.D; 7 | K = vp.K; 8 | 9 | Nstar = size(Xstar,1); 10 | 11 | % Compute moments 12 | %X_mean = mean(X,1); 13 | %X_cov = cov(X); 14 | %[X_R,p] = chol(X_cov); 15 | %if p > 0; X_R = diag(std(X)); end 16 | 17 | type_vec = type*ones(Nopts,1); 18 | lambda0 = vp.lambda; 19 | mu0 = vp.mu; 20 | w0 = vp.w; 21 | 22 | switch type 23 | case 1 % Start from old variational parameters 24 | sigma0 = vp.sigma; 25 | case 2 % Start from highest-posterior density training points 26 | [~,ord] = sort(ystar,'descend'); 27 | if vp.optimize_mu 28 | idx_ord = repmat(1:min(Knew,size(Xstar,1)),[1,ceil(Knew/size(Xstar,1))]); 29 | mu0 = Xstar(ord(idx_ord(1:Knew)),:)'; 30 | end 31 | if K > 1; V = var(mu0,[],2); else; V = var(Xstar)'; end 32 | sigma0 = sqrt(mean(V./lambda0.^2)/Knew).*exp(0.2*randn(1,Knew)); 33 | case 3 % Start from random provided training points 34 | if vp.optimize_mu; mu0 = zeros(D,K); end 35 | sigma0 = zeros(1,K); 36 | end 37 | 38 | for iOpt = 1:Nopts 39 | vp0_vec(iOpt) = vp; 40 | vp0_vec(iOpt).K = Knew; 41 | 42 | mu = mu0; 43 | sigma = sigma0; 44 | lambda = lambda0; 45 | if vp.optimize_weights; w = w0; end 46 | add_jitter = true; 47 | 48 | switch type 49 | 50 | case 1 % Start from old variational parameters 51 | if iOpt == 1 % Copy previous parameters verbatim 52 | add_jitter = false; 53 | end 54 | if Knew > vp.K 55 | % Spawn a new component near an existing one 56 | for iNew = vp.K+1:Knew 57 | idx = randi(vp.K); 58 | mu(:,iNew) = mu(:,idx); 59 | sigma(iNew) = sigma(idx); 60 | mu(:,iNew) = mu(:,iNew) + 0.5*sigma(iNew)*lambda.*randn(D,1); 61 | if vp.optimize_sigma 62 | sigma(iNew) = sigma(iNew)*exp(0.2*randn()); 63 | end 64 | if vp.optimize_weights 65 | xi = 0.25 + 0.25*rand(); 66 | w(iNew) = xi*w(idx); 67 | w(idx) = (1-xi)*w(idx); 68 | end 69 | 70 | end 71 | end 72 | 73 | case 2 % Start from highest-posterior density training points 74 | if iOpt == 1 75 | add_jitter = false; 76 | end 77 | if vp.optimize_lambda 78 | lambda = std(Xstar,[],1)'; 79 | lambda = lambda*sqrt(D/sum(lambda.^2)); 80 | end 81 | if vp.optimize_weights 82 | w = ones(1,Knew)/Knew; 83 | end 84 | 85 | case 3 % Start from random provided training points 86 | ord = randperm(Nstar); 87 | if vp.optimize_mu 88 | idx_ord = repmat(1:min(Knew,size(Xstar,1)),[1,ceil(Knew/size(Xstar,1))]); 89 | mu = Xstar(ord(idx_ord(1:Knew)),:)'; 90 | else 91 | mu = mu0; 92 | end 93 | if K > 1; V = var(mu,[],2); else; V = var(Xstar)'; end 94 | 95 | if vp.optimize_sigma 96 | sigma = sqrt(mean(V)/Knew)*exp(0.2*randn(1,Knew)); 97 | end 98 | if vp.optimize_lambda 99 | lambda = std(Xstar,[],1)'; 100 | lambda = lambda*sqrt(D/sum(lambda.^2)); 101 | end 102 | if vp.optimize_weights 103 | w = ones(1,Knew)/Knew; 104 | end 105 | 106 | otherwise 107 | error('vbinit:UnknownType', ... 108 | 'Unknown TYPE for initialization of variational posteriors.'); 109 | end 110 | 111 | if add_jitter 112 | if vp.optimize_mu 113 | mu = mu + bsxfun(@times,sigma,bsxfun(@times,lambda,randn(size(mu)))); 114 | end 115 | if vp.optimize_sigma 116 | sigma = sigma.*exp(0.2*randn(1,Knew)); 117 | end 118 | if vp.optimize_lambda 119 | lambda = lambda.*exp(0.2*randn(D,1)); 120 | end 121 | if vp.optimize_weights 122 | w = w.*exp(0.2*randn(1,Knew)); 123 | w = w/sum(w); 124 | end 125 | end 126 | 127 | if vp.optimize_weights 128 | vp0_vec(iOpt).w = w; 129 | else 130 | vp0_vec(iOpt).w = ones(1,Knew)/Knew; 131 | end 132 | if vp.optimize_mu 133 | vp0_vec(iOpt).mu = mu; 134 | else 135 | vp0_vec(iOpt).mu = mu0; 136 | end 137 | vp0_vec(iOpt).sigma = sigma; 138 | vp0_vec(iOpt).lambda = lambda; 139 | 140 | end -------------------------------------------------------------------------------- /misc/vbmc_gphyp.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/misc/vbmc_gphyp.m -------------------------------------------------------------------------------- /misc/vpbndloss.m: -------------------------------------------------------------------------------- 1 | function [L,dL] = vpbndloss(theta,vp,thetabnd,TolCon) 2 | %VPLOSS Variational parameter loss function for soft optimization bounds. 3 | 4 | compute_grad = nargout > 1; % Compute gradient only if requested 5 | 6 | K = vp.K; 7 | D = vp.D; 8 | 9 | if vp.optimize_mu 10 | mu = theta(1:K*D); 11 | idx_start = K*D; 12 | else 13 | mu = vp.mu(:)'; 14 | idx_start = 0; 15 | end 16 | if vp.optimize_sigma 17 | lnsigma = theta(idx_start+(1:K)); 18 | idx_start = idx_start + K; 19 | else 20 | lnsigma = log(vp.sigma(:)); 21 | end 22 | if vp.optimize_lambda 23 | lnlambda = theta(idx_start+(1:D)); 24 | else 25 | lnlambda = log(vp.lambda(:)); 26 | end 27 | if vp.optimize_weights 28 | eta = theta(end-K+1:end); 29 | else 30 | eta = []; 31 | end 32 | 33 | lnscale = bsxfun(@plus,lnsigma(:)',lnlambda(:)); 34 | theta_ext = []; 35 | if vp.optimize_mu; theta_ext = [theta_ext; mu(:)]; end 36 | if vp.optimize_sigma || vp.optimize_lambda; theta_ext = [theta_ext; lnscale(:)]; end 37 | if vp.optimize_weights; theta_ext = [theta_ext; eta(:)]; end 38 | 39 | if compute_grad 40 | [L,dL] = softbndloss(theta_ext,thetabnd.lb(:),thetabnd.ub(:),TolCon); 41 | if vp.optimize_mu 42 | dmu = dL(1:D*K); 43 | idx_start = D*K; 44 | else 45 | dmu = []; 46 | idx_start = 0; 47 | end 48 | if vp.optimize_sigma || vp.optimize_lambda 49 | dlnscale = reshape(dL((1:D*K)+idx_start),[D,K]); 50 | if vp.optimize_sigma 51 | dsigma = sum(dlnscale,1); 52 | else 53 | dsigma = []; 54 | end 55 | if vp.optimize_lambda 56 | dlambda = sum(dlnscale,2); 57 | else 58 | dlambda = []; 59 | end 60 | else 61 | dsigma = []; dlambda = []; 62 | end 63 | if vp.optimize_weights 64 | deta = dL(end-K+1:end); 65 | else 66 | deta = []; 67 | end 68 | dL = [dmu(:); dsigma(:); dlambda(:); deta(:)]; 69 | else 70 | L = softbndloss(theta_ext,thetabnd.lb(:),thetabnd.ub(:),TolCon); 71 | end 72 | 73 | end -------------------------------------------------------------------------------- /misc/vpbounds.m: -------------------------------------------------------------------------------- 1 | function [vp,thetabnd] = vpbounds(vp,gp,options,K) 2 | %VPBOUNDS Compute soft bounds for variational posterior parameters. 3 | 4 | if nargin < 4 || isempty(K); K = vp.K; end 5 | 6 | % Soft-bound loss is computed on MU and SCALE (which is SIGMA times LAMBDA) 7 | 8 | % Start with reversed bounds (see below) 9 | if ~isfield(vp,'bounds') || isempty(vp.bounds) 10 | vp.bounds.mu_lb = Inf(1,vp.D); 11 | vp.bounds.mu_ub = -Inf(1,vp.D); 12 | vp.bounds.lnscale_lb = Inf(1,vp.D); 13 | vp.bounds.lnscale_ub = -Inf(1,vp.D); 14 | % vp.bounds 15 | end 16 | 17 | % Set bounds for mean parameters of variational components 18 | vp.bounds.mu_lb = min(min(gp.X),vp.bounds.mu_lb); 19 | vp.bounds.mu_ub = max(max(gp.X),vp.bounds.mu_ub); 20 | 21 | % Set bounds for log scale parameters of variational components 22 | lnrange = log(max(gp.X) - min(gp.X)); 23 | vp.bounds.lnscale_lb = min(vp.bounds.lnscale_lb,lnrange + log(options.TolLength)); 24 | vp.bounds.lnscale_ub = max(vp.bounds.lnscale_ub,lnrange); 25 | 26 | % Set bounds for log weight parameters of variational components 27 | if vp.optimize_weights 28 | vp.bounds.eta_lb = log(0.5*options.TolWeight); 29 | vp.bounds.eta_ub = 0; 30 | end 31 | 32 | thetabnd.lb = []; 33 | thetabnd.ub = []; 34 | if vp.optimize_mu 35 | thetabnd.lb = [thetabnd.lb,repmat(vp.bounds.mu_lb,[1,K])]; 36 | thetabnd.ub = [thetabnd.ub,repmat(vp.bounds.mu_ub,[1,K])]; 37 | end 38 | if vp.optimize_sigma || vp.optimize_lambda 39 | thetabnd.lb = [thetabnd.lb,repmat(vp.bounds.lnscale_lb,[1,K])]; 40 | thetabnd.ub = [thetabnd.ub,repmat(vp.bounds.lnscale_ub,[1,K])]; 41 | end 42 | if vp.optimize_weights 43 | thetabnd.lb = [thetabnd.lb,repmat(vp.bounds.eta_lb,[1,K])]; 44 | thetabnd.ub = [thetabnd.ub,repmat(vp.bounds.eta_ub,[1,K])]; 45 | end 46 | 47 | thetabnd.TolCon = options.TolConLoss; 48 | 49 | % Weights below a certain threshold are penalized 50 | if vp.optimize_weights 51 | thetabnd.WeightThreshold = max(1/(4*K),options.TolWeight); 52 | thetabnd.WeightPenalty = options.WeightPenalty; 53 | end 54 | 55 | end -------------------------------------------------------------------------------- /misc/vpsample_vbmc.m: -------------------------------------------------------------------------------- 1 | function [vp,samples,output] = vpsample_vbmc(Ns,Ninit,vp,gp,optimState,options,wide_flag) 2 | 3 | if nargin < 7 || isempty(wide_flag); wide_flag = false; end 4 | 5 | % Assign default values to OPTIMSTATE 6 | if ~isfield(optimState,'delta'); optimState.delta = 0; end 7 | if ~isfield(optimState,'EntropySwitch'); optimState.EntropySwitch = false; end 8 | if ~isfield(optimState,'Warmup'); optimState.Warmup = ~vp.optimize_weights; end 9 | if ~isfield(optimState,'temperature'); optimState.temperature = 1; end 10 | 11 | %% Set up sampling variables and options 12 | 13 | % Perform quick sieve to determine good starting point 14 | [vp,~,elcbo_beta,compute_var,NSentK] = ... 15 | vpsieve_vbmc(Ninit,1,vp,gp,optimState,options); 16 | 17 | K = vp.K; 18 | D = vp.D; 19 | 20 | % Compute soft bounds for variational parameters optimization 21 | [vp,thetabnd] = vpbounds(vp,gp,options,K); 22 | 23 | % Move lower bound on scale - we want *wider* distributions 24 | if wide_flag 25 | lnscale = bsxfun(@plus,log(vp.sigma(:))',log(vp.lambda(:))); 26 | if vp.optimize_mu; idx = D*K; else; idx = 0; end 27 | thetabnd.lb(idx+1:idx+K*D) = lnscale; 28 | end 29 | 30 | %% Sample variational posterior starting from current 31 | 32 | theta0 = get_vptheta(vp)'; 33 | Ntheta = numel(theta0); 34 | 35 | % MCMC parameters 36 | Widths = 0.5; 37 | sampleopts.Thin = 1; 38 | sampleopts.Burnin = 0; 39 | sampleopts.Display = 'off'; 40 | sampleopts.Diagnostics = false; 41 | LB = -Inf(1,Ntheta); 42 | UB = Inf(1,Ntheta); 43 | 44 | idx_fixed = false(size(theta0)); 45 | if ~optimState.Warmup && 0 46 | if vp.optimize_mu; idx_fixed(1:D*K) = true; end 47 | % idx_fixed = true(size(theta0)); 48 | % idx_fixed(idx+1:idx+K) = false; 49 | end 50 | 51 | LB(idx_fixed) = theta0(idx_fixed); 52 | UB(idx_fixed) = theta0(idx_fixed); 53 | 54 | % Perform sampling 55 | try 56 | switch lower(options.VariationalSampler) 57 | case 'slicesample' 58 | vpmcmc_fun = @(theta_) -negelcbo_vbmc(theta_,elcbo_beta,vp,gp,NSentK,0,compute_var,0,thetabnd); 59 | [samples,fvals,exitflag,output] = ... 60 | slicesample_vbmc(vpmcmc_fun,theta0,Ns,Widths,LB,UB,sampleopts); 61 | case 'malasample' 62 | if isfield(optimState,'mcmc_stepsize') 63 | sampleopts.Stepsize = optimState.mcmc_stepsize; 64 | output.stepsize = sampleopts.Stepsize; 65 | end 66 | vpmcmc_fun = @(theta_) vpmcmcgrad_fun(theta_,elcbo_beta,vp,gp,NSentK,compute_var,thetabnd); 67 | [samples,fvals,exitflag,output] = ... 68 | malasample_vbmc(vpmcmc_fun,theta0,Ns,Widths,LB,UB,sampleopts); 69 | % output.accept_rate 70 | end 71 | catch 72 | samples = repmat(theta0,[Ns,1]); 73 | end 74 | vp = rescale_params(vp,samples(end,:)); 75 | 76 | end 77 | 78 | function [logp,dlogp] = vpmcmcgrad_fun(theta,elcbo_beta,vp,gp,NSentK,compute_var,thetabnd) 79 | [nlogp,ndlogp] = negelcbo_vbmc(theta,elcbo_beta,vp,gp,NSentK,1,compute_var,0,thetabnd); 80 | logp = -nlogp; 81 | dlogp = -ndlogp; 82 | end 83 | 84 | 85 | -------------------------------------------------------------------------------- /misc/vpsieve_vbmc.m: -------------------------------------------------------------------------------- 1 | function [vp0_vec,vp0_type,elcbo_beta,compute_var,NSentK,NSentKFast] = vpsieve_vbmc(Ninit,Nbest,vp,gp,optimState,options,K) 2 | %VPSIEVE Preliminary 'sieve' method for fitting variational posterior. 3 | 4 | % Assign default values to OPTIMSTATE 5 | if ~isfield(optimState,'delta'); optimState.delta = 0; end 6 | if ~isfield(optimState,'EntropySwitch'); optimState.EntropySwitch = false; end 7 | if ~isfield(optimState,'Warmup'); optimState.Warmup = ~vp.optimize_weights; end 8 | if ~isfield(optimState,'temperature'); optimState.temperature = 1; end 9 | if ~isfield(optimState,'Neff'); optimState.Neff = size(gp.X,1); end 10 | 11 | if isempty(Nbest); Nbest = 1; end 12 | if nargin < 7 || isempty(K); K = vp.K; end 13 | 14 | %% Set up optimization variables and options 15 | 16 | vp.delta = optimState.delta(:); 17 | 18 | if isempty(Ninit) % Number of initial starting points 19 | Ninit = ceil(evaloption_vbmc(options.NSelbo,K)); 20 | end 21 | nelcbo_fill = zeros(Ninit,1); 22 | 23 | % Number of samples per component for MC approximation of the entropy 24 | NSentK = ceil(evaloption_vbmc(options.NSent,K)/K); 25 | 26 | % Number of samples per component for preliminary MC approximation of the entropy 27 | NSentKFast = ceil(evaloption_vbmc(options.NSentFast,K)/K); 28 | 29 | % Deterministic entropy if entropy switch is on or only one component 30 | if optimState.EntropySwitch || K == 1 31 | NSentK = 0; 32 | NSentKFast = 0; 33 | end 34 | 35 | % Confidence weight 36 | elcbo_beta = evaloption_vbmc(options.ELCBOWeight,optimState.Neff); 37 | compute_var = elcbo_beta ~= 0; 38 | 39 | % Compute soft bounds for variational parameters optimization 40 | [vp,thetabnd] = vpbounds(vp,gp,options,K); 41 | 42 | %% Perform quick shotgun evaluation of many candidate parameters 43 | 44 | if Ninit > 0 45 | % Get high-posterior density points 46 | [Xstar,ystar] = gethpd_vbmc(gp.X,gp.y,options.HPDFrac); 47 | 48 | % Generate a bunch of random candidate variational parameters 49 | switch Nbest 50 | case 1 51 | [vp0_vec,vp0_type] = vbinit_vbmc(1,Ninit,vp,K,Xstar,ystar); 52 | otherwise 53 | [vp0_vec1,vp0_type1] = vbinit_vbmc(1,ceil(Ninit/3),vp,K,Xstar,ystar); 54 | [vp0_vec2,vp0_type2] = vbinit_vbmc(2,ceil(Ninit/3),vp,K,Xstar,ystar); 55 | [vp0_vec3,vp0_type3] = vbinit_vbmc(3,Ninit-2*ceil(Ninit/3),vp,K,Xstar,ystar); 56 | vp0_vec = [vp0_vec1,vp0_vec2,vp0_vec3]; 57 | vp0_type = [vp0_type1;vp0_type2;vp0_type3]; 58 | end 59 | 60 | if isfield(optimState,'vp_repo') && ~isempty(optimState.vp_repo) && options.VariationalInitRepo 61 | Ntheta = numel(get_vptheta(vp0_vec(1))); 62 | idx = find(cellfun(@numel,optimState.vp_repo) == Ntheta); 63 | if ~isempty(idx) 64 | vp0_vec4 = []; 65 | for ii = 1:numel(idx) 66 | vp0_vec4 = [vp0_vec4,rescale_params(vp0_vec(1),optimState.vp_repo{idx(ii)})]; 67 | end 68 | vp0_vec = [vp0_vec,vp0_vec4]; 69 | vp0_type = [vp0_type;ones(numel(vp0_vec4),1)]; 70 | end 71 | end 72 | 73 | % Quickly estimate ELCBO at each candidate variational posterior 74 | for iOpt = 1:numel(vp0_vec) 75 | [theta0,vp0_vec(iOpt)] = get_vptheta(vp0_vec(iOpt),vp.optimize_mu,vp.optimize_sigma,vp.optimize_lambda,vp.optimize_weights); 76 | [nelbo_tmp,~,~,~,varF_tmp] = negelcbo_vbmc(theta0,0,vp0_vec(iOpt),gp,NSentKFast,0,compute_var,options.AltMCEntropy,thetabnd); 77 | nelcbo_fill(iOpt) = nelbo_tmp + elcbo_beta*sqrt(varF_tmp); 78 | end 79 | 80 | % Sort by negative ELCBO 81 | [~,vp0_ord] = sort(nelcbo_fill,'ascend'); 82 | vp0_vec = vp0_vec(vp0_ord); 83 | vp0_type = vp0_type(vp0_ord); 84 | else 85 | vp0_vec = vp; 86 | vp0_type = 1; 87 | end 88 | 89 | 90 | 91 | end -------------------------------------------------------------------------------- /misc/vptrain2real.m: -------------------------------------------------------------------------------- 1 | function vp_real = vptrain2real(vp,entflag,options) 2 | %VPTRAIN2REAL Convert training variational posterior to real one. 3 | 4 | if nargin < 2 || isempty(entflag); entflag = false; end 5 | if nargin < 3; options = []; end 6 | 7 | if isfield(vp,'temperature') && ~isempty(vp.temperature) 8 | T = vp.temperature; 9 | else 10 | T = 1; 11 | end 12 | 13 | if any(T == [2,3,4,5]) 14 | PowerThreshold = 1e-5; 15 | [vp_real,lnZ_pow] = vbmc_power(vp,T,PowerThreshold); 16 | if isfield(vp_real,'stats') && ~isempty(vp_real.stats) 17 | vp_real.stats.elbo = T*vp.stats.elbo + lnZ_pow; 18 | vp_real.stats.elbo_sd = T*vp.stats.elbo_sd; 19 | vp_real.stats.elogjoint_sd = T*vp.stats.elogjoint_sd; 20 | 21 | if entflag 22 | % Use deterministic approximation of the entropy 23 | H = entlb_vbmc(vp_real,0,1); 24 | varH = 0; 25 | vp_real.stats.elogjoint = vp_real.stats.elbo - H; 26 | vp_real.stats.entropy = H; 27 | vp_real.stats.entropy_sd = sqrt(varH); 28 | else 29 | vp_real.stats.elogjoint = NaN; 30 | vp_real.stats.entropy = NaN; 31 | vp_real.stats.entropy_sd = NaN; 32 | end 33 | end 34 | else 35 | vp_real = vp; 36 | end 37 | 38 | -------------------------------------------------------------------------------- /misc/warp_gpandvp_vbmc.m: -------------------------------------------------------------------------------- 1 | function [vp,hyp_warped] = warp_gpandvp_vbmc(trinfo,vp_old,gp_old) 2 | %WARP_GPANDVP_VBMC Update GP hyps and variational posterior after warping. 3 | 4 | D = size(gp_old.X,2); 5 | trinfo_old = vp_old.trinfo; 6 | 7 | % Temperature scaling 8 | if isfield(vp_old,'temperature') && ~isempty(vp_old.temperature) 9 | T = vp_old.temperature; 10 | else 11 | T = 1; 12 | end 13 | 14 | %% Update GP hyperparameters 15 | 16 | warpfun = @(x) warpvars_vbmc(warpvars_vbmc(x,'i',trinfo_old),'d',trinfo); 17 | 18 | Ncov = gp_old.Ncov; 19 | Nnoise = gp_old.Nnoise; 20 | Nmean = gp_old.Nmean; 21 | if ~isempty(gp_old.outwarpfun); Noutwarp = gp_old.Noutwarp; else; Noutwarp = 0; end 22 | 23 | Ns_gp = numel(gp_old.post); 24 | hyp_warped = NaN(Ncov+Nnoise+Nmean+Noutwarp,Ns_gp); 25 | 26 | for s = 1:Ns_gp 27 | hyp = gp_old.post(s).hyp; 28 | hyp_warped(:,s) = hyp; 29 | 30 | % Update GP input length scales 31 | ell = exp(hyp(1:D))'; 32 | [~,ell_new] = unscent_warp(warpfun,gp_old.X,ell); 33 | hyp_warped(1:D,s) = mean(log(ell_new),1); % Geometric mean of length scales 34 | 35 | % We assume relatively no change to GP output and noise scales 36 | 37 | switch gp_old.meanfun 38 | case 0 39 | % Warp constant mean 40 | m0 = hyp(Ncov+Nnoise+1); 41 | dy_old = warpvars_vbmc(gp_old.X,'logp',trinfo_old); 42 | dy = warpvars_vbmc(warpfun(gp_old.X),'logp',trinfo); 43 | m0w = m0 + (mean(dy) - mean(dy_old))/T; 44 | 45 | hyp_warped(Ncov+Nnoise+1,s) = m0w; 46 | 47 | case 4 48 | % Warp quadratic mean 49 | m0 = hyp(Ncov+Nnoise+1); 50 | xm = hyp(Ncov+Nnoise+1+(1:D))'; 51 | omega = exp(hyp(Ncov+Nnoise+1+D+(1:D)))'; 52 | 53 | % Warp location and scale 54 | [xmw,omegaw] = unscent_warp(warpfun,xm,omega); 55 | 56 | % Warp maximum 57 | dy_old = warpvars_vbmc(xm,'logpdf',trinfo_old)'; 58 | dy = warpvars_vbmc(xmw,'logpdf',trinfo)'; 59 | m0w = m0 + (dy - dy_old)/T; 60 | 61 | hyp_warped(Ncov+Nnoise+1,s) = m0w; 62 | hyp_warped(Ncov+Nnoise+1+(1:D),s) = xmw'; 63 | hyp_warped(Ncov+Nnoise+1+D+(1:D),s) = log(omegaw)'; 64 | 65 | otherwise 66 | error('Unsupported GP mean function for input warping.'); 67 | end 68 | end 69 | 70 | %% Update variational posterior 71 | 72 | vp = vp_old; 73 | vp.trinfo = trinfo; 74 | 75 | mu = vp_old.mu'; 76 | sigmalambda = bsxfun(@times,vp_old.lambda,vp_old.sigma)'; 77 | 78 | [muw,sigmalambdaw] = unscent_warp(warpfun,mu,sigmalambda); 79 | 80 | vp.mu = muw'; 81 | lambdaw = sqrt(D*mean(bsxfun(@rdivide,sigmalambdaw.^2,sum(sigmalambdaw.^2,2)),1)); 82 | vp.lambda(:,1) = lambdaw(:); 83 | 84 | sigmaw = exp(mean(log(bsxfun(@rdivide,sigmalambdaw,lambdaw)),2)); 85 | vp.sigma(1,:) = sigmaw; 86 | 87 | % Approximate change in weight 88 | dy_old = warpvars_vbmc(mu,'logpdf',trinfo_old)'; 89 | dy = warpvars_vbmc(muw,'logpdf',trinfo)'; 90 | 91 | ww = vp_old.w .* exp((dy - dy_old)/T); 92 | vp.w = ww ./ sum(ww); 93 | 94 | end -------------------------------------------------------------------------------- /private/acqhedge_vbmc.m: -------------------------------------------------------------------------------- 1 | function hedge = acqhedge_vbmc(action,hedge,stats,options) 2 | %ACQPORTFOLIO Evaluate and update portfolio of acquisition functions. 3 | 4 | switch lower(action(1:3)) 5 | case 'acq' 6 | % Choose acquisition function based on hedge strategy 7 | 8 | if isempty(hedge) 9 | % Initialize hedge struct 10 | hedge.g = zeros(1,numel(options.SearchAcqFcn)); 11 | hedge.n = numel(options.SearchAcqFcn); 12 | hedge.count = 0; 13 | hedge.lambda = 0.2; % Lapse rate - random choice 14 | hedge.beta = 1; 15 | hedge.decay = options.AcqHedgeDecay^(options.FunEvalsPerIter); 16 | end 17 | 18 | hedge.count = hedge.count + 1; 19 | hedge.p = exp(hedge.beta*(hedge.g - max(hedge.g)))./sum(exp(hedge.beta*(hedge.g - max(hedge.g)))); 20 | hedge.p = hedge.p*(1-hedge.lambda) + hedge.lambda/hedge.n; 21 | 22 | hedge.chosen = find(rand() < cumsum(hedge.p),1); 23 | hedge.phat = Inf(size(hedge.p)); 24 | hedge.phat(hedge.chosen) = hedge.p(hedge.chosen); 25 | 26 | case 'upd' 27 | % Update value of hedge portfolio based on uncertainty reduction 28 | 29 | HedgeCutoff = 5; 30 | 31 | if ~isempty(hedge) 32 | iter = stats.iter(end); 33 | min_iter = max(1,iter-options.AcqHedgeIterWindow); 34 | 35 | min_sd = min(stats.elbo_sd(min_iter:iter-1)); 36 | er_sd = max(0, log(min_sd / stats.elbo_sd(iter))); 37 | 38 | elcbo = stats.elbo - options.ELCBOImproWeight*stats.elbo_sd; 39 | max_elcbo = max(elcbo(min_iter:iter-1)); 40 | er_elcbo = max(0,elcbo(iter) - max_elcbo)/options.TolImprovement; 41 | if er_elcbo > 1; er_elcbo = 1 + log(er_elcbo); end 42 | 43 | min_r = min(stats.rindex(min_iter:iter-1)); 44 | er_r = max(0, log(min_r / stats.rindex(iter))); 45 | 46 | % er = 0.5*er_sd + 0.5*er_elcbo; % Reward 47 | er = er_r; 48 | 49 | for iHedge = 1:hedge.n 50 | hedge.g(iHedge) = hedge.decay*hedge.g(iHedge) + er/hedge.phat(iHedge); 51 | end 52 | 53 | % Apply cutoff value on hedge 54 | hedge.g = min(hedge.g,HedgeCutoff); 55 | hedge.g 56 | end 57 | 58 | end -------------------------------------------------------------------------------- /private/recompute_lcbmax.m: -------------------------------------------------------------------------------- 1 | function lcbmax_vec = recompute_lcbmax(gp,optimState,stats,options) 2 | %RECOMPUTE_LCBMAX Recompute moving LCB maximum based on current GP. 3 | 4 | N = optimState.Xn; 5 | Xflag = optimState.X_flag; 6 | X = optimState.X(Xflag,:); 7 | y = optimState.y(Xflag); 8 | if isfield(optimState,'S') 9 | s2 = optimState.S(Xflag).^2; 10 | else 11 | s2 = []; 12 | end 13 | 14 | fmu = NaN(N,1); 15 | fs2 = fmu; 16 | [~,~,fmu(Xflag),fs2(Xflag)] = gplite_pred(gp,X,y,s2); 17 | 18 | lcb = fmu - options.ELCBOImproWeight*sqrt(fs2); 19 | lcb_movmax = movmax(lcb,[numel(lcb),0]); 20 | 21 | lcbmax_vec = lcb_movmax(stats.N); 22 | 23 | end -------------------------------------------------------------------------------- /private/updateK.m: -------------------------------------------------------------------------------- 1 | function Knew = updateK(optimState,stats,options) 2 | %UPDATEK Update number of variational mixture components. 3 | 4 | Knew = optimState.vpK; 5 | 6 | % Compute maximum number of components 7 | Kmax = ceil(evaloption_vbmc(options.KfunMax,optimState.Neff)); 8 | 9 | % Evaluate bonus for stable solution 10 | Kbonus = round(double(evaloption_vbmc(options.AdaptiveK,Knew))); 11 | 12 | 13 | % If not warming up, check if number of components gets to be increased 14 | if ~optimState.Warmup && optimState.iter > 1 15 | 16 | RecentIters = ceil(0.5*options.TolStableCount/options.FunEvalsPerIter); 17 | 18 | % Check if ELCBO has improved wrt recent iterations 19 | elbos = stats.elbo(max(1,end-RecentIters+1):end); 20 | elboSDs = stats.elbo_sd(max(1,end-RecentIters+1):end); 21 | elcbos = elbos - options.ELCBOImproWeight*elboSDs; 22 | warmups = stats.warmup(max(1,end-RecentIters+1):end); 23 | elcbos_after = elcbos(~warmups); 24 | elcbos_after(1:min(2,end)) = -Inf; % Ignore two iterations right after warmup 25 | elcbo_max = max(elcbos_after); 26 | improving_flag = elcbos_after(end) >= elcbo_max && isfinite(elcbos_after(end)); 27 | 28 | % Add one component if ELCBO is improving and no pruning in last iteration 29 | if stats.pruned(end) == 0 && improving_flag 30 | Knew = Knew + 1; 31 | end 32 | 33 | % Bonus components for stable solution (speed up exploration) 34 | if stats.rindex(end) < 1 && ~optimState.RecomputeVarPost && improving_flag 35 | % No bonus if any component was very recently pruned 36 | if all(stats.pruned(max(1,end-ceil(0.5*RecentIters)+1):end) == 0) 37 | Knew = Knew + Kbonus; 38 | end 39 | end 40 | Knew = max(optimState.vpK,min(Knew,Kmax)); 41 | end 42 | 43 | end -------------------------------------------------------------------------------- /private/vbmc_demo2d.m: -------------------------------------------------------------------------------- 1 | function stats = vbmc_demo2d(fun,stats,plotbnd) 2 | %VBMC_DEMO2D Demo plot of VBMC at work (only for 2D problems). 3 | 4 | if nargin < 1 || isempty(fun); fun = @rosenbrock_test; end 5 | if nargin < 2 || isempty(stats) 6 | rng(0); 7 | [~,~,~,~,~,~,~,stats] = vbmc(fun,[-1 -1],-Inf,Inf,-3,3); 8 | end 9 | if nargin < 3 || isempty(plotbnd) 10 | vp = stats.vp(end); 11 | xrnd = vbmc_rnd(vp,1e6); 12 | for i = 1:size(xrnd,2) 13 | LB(i) = floor(quantile1(xrnd(:,i),0.01) - 0.5); 14 | UB(i) = ceil(quantile1(xrnd(:,i),0.99)+0.5); 15 | end 16 | else 17 | LB = plotbnd(1,:); 18 | UB = plotbnd(2,:); 19 | end 20 | 21 | tolx = 1e-3; 22 | Nx = 128; 23 | Npanels = 8; 24 | 25 | x1 = linspace(LB(1)+tolx,UB(1)-tolx,Nx); 26 | x2 = linspace(LB(2)+tolx,UB(2)-tolx,Nx); 27 | dx1 = x1(2)-x1(1); 28 | dx2 = x2(2)-x2(1); 29 | 30 | idx = ones(1,Npanels-2); 31 | idx(2) = find(stats.warmup == 1,1,'last'); 32 | tmp = floor(linspace(idx(2),numel(stats.vp),Npanels-3)); 33 | idx(3:Npanels-2) = tmp(2:end); 34 | 35 | Np = 5; 36 | grid = []; 37 | for i = 1:(Npanels-2)/2 38 | grid = [grid, [i*ones(1,Np); (i+(Npanels-2)/2)*ones(1,Np)]]; 39 | end 40 | grid = [grid, [0,Npanels*ones(1,Np);0,(Npanels-1)*ones(1,Np)]]; 41 | 42 | % grid = [reshape(1:Npanels-2,[(Npanels-2)/2,2])',[Npanels;Npanels-1]]; 43 | labels{1} = 'A'; 44 | labels{Npanels-1} = 'C'; 45 | labels{Npanels} = 'B'; 46 | 47 | h = plotify(grid,'gutter',[0.05 0.15],'margins',[.05 .02 .075 .05],'labels',labels); 48 | 49 | for iPlot = 1:Npanels 50 | axes(h(iPlot)); 51 | 52 | %[X1,X2] = meshgrid(x1,x2); 53 | %tmp = cat(2,X2',X1'); 54 | %xx = reshape(tmp,[],2); 55 | xx = combvec(x1,x2)'; 56 | 57 | if iPlot <= numel(idx); vpflag = true; else vpflag = false; end 58 | 59 | elboflag = false; 60 | if vpflag 61 | vp = stats.vp(idx(iPlot)); 62 | yy = vbmc_pdf(vp,xx); 63 | titlestr = ['Iteration ' num2str(stats.iter(idx(iPlot)))]; 64 | if iPlot == 2; titlestr = [titlestr ' (end of warm-up)']; end 65 | elseif iPlot == Npanels-1 66 | lnyy = zeros(size(xx,1),1); 67 | for ii = 1:size(xx,1) 68 | lnyy(ii) = fun(xx(ii,:)); 69 | end 70 | yy = exp(lnyy); 71 | Z = sum(yy(:))*dx1*dx2; 72 | yy = yy/Z; 73 | titlestr = ['True posterior']; 74 | else 75 | elboflag = true; 76 | end 77 | 78 | if elboflag 79 | iter = stats.iter; 80 | elbo = stats.elbo; 81 | elbo_sd = stats.elbo_sd; 82 | beta = 1.96; 83 | patch([iter,fliplr(iter)],[elbo + beta*elbo_sd, fliplr(elbo - beta*elbo_sd)],[1 0.8 0.8],'LineStyle','none'); hold on; 84 | hl(1) = plot(iter,elbo,'r','LineWidth',1); hold on; 85 | hl(2) = plot([iter(1),iter(end)],log(Z)*[1 1],'k','LineWidth',1); 86 | titlestr = 'Model evidence'; 87 | xlim([0.9, stats.iter(end)+0.1]); 88 | ylims = [floor(min(elbo)-0.5),ceil(max(elbo)+0.5)]; 89 | ylim(ylims); 90 | xticks(idx); 91 | yticks([ylims(1),round(log(Z),2),ylims(2)]) 92 | xlabel('Iterations'); 93 | if log(Z) < mean(ylims) 94 | loc = 'NorthEast'; 95 | else 96 | loc = 'SouthEast'; 97 | end 98 | hll = legend(hl,'ELBO','LML'); 99 | set(hll,'Location',loc,'Box','off'); 100 | 101 | else 102 | s = contour(x1,x2,reshape(yy',[Nx,Nx])'); 103 | 104 | if vpflag 105 | % Plot component centers 106 | mu = warpvars_vbmc(vp.mu','inv',vp.trinfo); 107 | hold on; 108 | plot(mu(:,1),mu(:,2),'xr','LineStyle','none'); 109 | 110 | % Plot data 111 | X = warpvars_vbmc(stats.gp(idx(iPlot)).X,'inv',vp.trinfo); 112 | plot(X(:,1),X(:,2),'.k','LineStyle','none'); 113 | end 114 | 115 | % s.EdgeColor = 'None'; 116 | view([0 90]); 117 | xlabel('x_1'); 118 | ylabel('x_2'); 119 | set(gca,'XTickLabel',[],'YTickLabel',[]); 120 | 121 | xlim([LB(1),UB(1)]); 122 | ylim([LB(2),UB(2)]); 123 | set(gca,'TickLength',get(gca,'TickLength')*2); 124 | end 125 | 126 | title(titlestr); 127 | set(gca,'TickDir','out'); 128 | end 129 | 130 | set(gcf,'Color','w'); 131 | 132 | pos = [20,20,900,450]; 133 | set(gcf,'Position',pos); 134 | set(gcf,'Units','inches'); pos = get(gcf,'Position'); 135 | set(gcf,'PaperPositionMode','Auto','PaperUnits','Inches','PaperSize',[pos(3), pos(4)]) 136 | drawnow; 137 | 138 | end -------------------------------------------------------------------------------- /private/vbmc_iterplot.m: -------------------------------------------------------------------------------- 1 | function vbmc_iterplot(vp,gp,optimState,stats,elbo) 2 | %VBMC_ITERPLOT Plot current iteration of the VBMC algorithm. 3 | 4 | D = vp.D; 5 | iter = optimState.iter; 6 | fontsize = 14; 7 | 8 | if D == 1 9 | hold off; 10 | gplite_plot(gp); 11 | hold on; 12 | xlims = xlim; 13 | xx = linspace(xlims(1),xlims(2),1e3)'; 14 | yy = vbmc_pdf(vp,xx,false,true); 15 | hold on; 16 | plot(xx,yy+elbo,':'); 17 | drawnow; 18 | 19 | else 20 | if ~isempty(vp) 21 | Xrnd = vbmc_rnd(vp,1e5,1,1); 22 | else 23 | Xrnd = gp.X; 24 | end 25 | X_train = gp.X; 26 | 27 | if iter == 1 28 | idx_new = true(size(X_train,1),1); 29 | else 30 | X_trainold = stats.gp(iter-1).X; 31 | idx_new = false(size(X_train,1),1); 32 | [~,idx_diff] = setdiff(X_train,X_trainold,'rows'); 33 | idx_new(idx_diff) = true; 34 | end 35 | idx_old = ~idx_new; 36 | 37 | if ~isempty(vp.trinfo); X_train = warpvars_vbmc(X_train,'inv',vp.trinfo); end 38 | 39 | Pdelta = optimState.PUB_orig - optimState.PLB_orig; 40 | X_min = min(X_train,[],1) - Pdelta*0.1; 41 | X_max = max(X_train,[],1) + Pdelta*0.1; 42 | bounds = [max(min(optimState.PLB_orig,X_min),optimState.LB_orig); ... 43 | min(max(optimState.PUB_orig,X_max),optimState.UB_orig)]; 44 | 45 | try 46 | for i = 1:D; names{i} = ['x_{' num2str(i) '}']; end 47 | [~,ax] = cornerplot(Xrnd,names,[],bounds); 48 | for i = 1:D-1 49 | for j = i+1:D 50 | axes(ax(j,i)); hold on; 51 | if any(idx_old) 52 | scatter(X_train(idx_old,i),X_train(idx_old,j),'ok'); 53 | end 54 | if any(idx_new) 55 | scatter(X_train(idx_new,i),X_train(idx_new,j),'or','MarkerFaceColor','r'); 56 | end 57 | end 58 | end 59 | 60 | h = axes(gcf,'Position',[0 0 1 1]); 61 | set(h,'Color','none','box','off','XTick',[],'YTick',[],'Units','normalized','Xcolor','none','Ycolor','none'); 62 | text(0.9,0.9,['VBMC (iteration ' num2str(iter) ')'],'FontSize',fontsize,'HorizontalAlignment','right'); 63 | 64 | drawnow; 65 | catch 66 | % pause 67 | end 68 | end 69 | 70 | end -------------------------------------------------------------------------------- /private/vbmc_output.m: -------------------------------------------------------------------------------- 1 | function output = vbmc_output(vp,optimState,msg,stats,idx_best,vbmc_version) 2 | %VBMC_OUTPUT Create OUTPUT struct for VBMC. 3 | 4 | output.function = func2str(optimState.fun); 5 | if all(isinf(optimState.LB)) && all(isinf(optimState.UB)) 6 | output.problemtype = 'unconstrained'; 7 | else 8 | output.problemtype = 'boundconstraints'; 9 | end 10 | output.iterations = optimState.iter; 11 | output.funccount = optimState.funccount; 12 | output.bestiter = idx_best; 13 | output.trainsetsize = stats.Neff(idx_best); 14 | output.components = vp.K; 15 | output.rindex = stats.rindex(idx_best); 16 | if stats.stable(idx_best) 17 | output.convergencestatus = 'probable'; 18 | else 19 | output.convergencestatus = 'no'; 20 | end 21 | output.overhead = NaN; 22 | output.rngstate = rng; 23 | output.algorithm = 'Variational Bayesian Monte Carlo'; 24 | output.version = vbmc_version; 25 | output.message = msg; 26 | 27 | output.elbo = vp.stats.elbo; 28 | output.elbo_sd = vp.stats.elbo_sd; 29 | 30 | end -------------------------------------------------------------------------------- /private/vbmc_plot2d.m: -------------------------------------------------------------------------------- 1 | function vbmc_plot2d(vp,LB,UB,gp,plotflag) 2 | %VBMC_PLOT2D 2-D Plot of variational/target posterior. 3 | 4 | if nargin < 4; gp = []; end 5 | if nargin < 5 || isempty(plotflag); plotflag = true; end 6 | 7 | tolx = 1e-3; 8 | Nx = 128; 9 | 10 | x1 = linspace(LB(1)+tolx,UB(1)-tolx,Nx); 11 | x2 = linspace(LB(2)+tolx,UB(2)-tolx,Nx); 12 | dx1 = x1(2)-x1(1); 13 | dx2 = x2(2)-x2(1); 14 | 15 | xx = combvec(x1,x2)'; 16 | 17 | if isa(vp,'function_handle'); fun = vp; vpflag = false; else; vpflag = true; end 18 | 19 | if vpflag 20 | yy = vbmc_pdf(vp,xx); 21 | else 22 | lnyy = zeros(size(xx,1),1); 23 | for ii = 1:size(xx,1) 24 | lnyy(ii) = fun(xx(ii,:)); 25 | end 26 | yy = exp(lnyy); 27 | Z = sum(yy(:))*dx1*dx2; 28 | yy = yy/Z; 29 | end 30 | 31 | s = contour(x1,x2,reshape(yy',[Nx,Nx])'); 32 | 33 | if vpflag 34 | % Plot component centers 35 | if plotflag 36 | mu = warpvars_vbmc(vp.mu','inv',vp.trinfo); 37 | hold on; 38 | plot(mu(:,1),mu(:,2),'xr','LineStyle','none'); 39 | end 40 | 41 | % Plot data 42 | if ~isempty(gp) 43 | X = warpvars_vbmc(gp.X,'inv',vp.trinfo); 44 | plot(X(:,1),X(:,2),'.k','LineStyle','none'); 45 | end 46 | end 47 | 48 | % s.EdgeColor = 'None'; 49 | view([0 90]); 50 | xlabel('x_1'); 51 | ylabel('x_2'); 52 | set(gca,'XTickLabel',[],'YTickLabel',[]); 53 | 54 | xlim([LB(1),UB(1)]); 55 | ylim([LB(2),UB(2)]); 56 | set(gca,'TickLength',get(gca,'TickLength')*2); 57 | 58 | set(gca,'TickDir','out'); 59 | set(gcf,'Color','w'); 60 | 61 | end -------------------------------------------------------------------------------- /private/vbmc_termination.m: -------------------------------------------------------------------------------- 1 | function [optimState,stats,isFinished_flag,exitflag,action,msg] = vbmc_termination(optimState,action,stats,options) 2 | %VBMC_TERMINATION Compute stability index and check termination conditions. 3 | 4 | iter = optimState.iter; 5 | exitflag = 0; 6 | isFinished_flag = false; 7 | msg = []; 8 | 9 | % Maximum number of new function evaluations 10 | if optimState.funccount >= options.MaxFunEvals 11 | isFinished_flag = true; 12 | msg = 'Inference terminated: reached maximum number of function evaluations OPTIONS.MaxFunEvals.'; 13 | end 14 | 15 | % Maximum number of iterations 16 | if iter >= options.MaxIter 17 | isFinished_flag = true; 18 | msg = 'Inference terminated: reached maximum number of iterations OPTIONS.MaxIter.'; 19 | end 20 | 21 | % Quicker stability check for entropy switching 22 | if optimState.EntropySwitch 23 | TolStableIters = options.TolStableEntropyIters; 24 | else 25 | TolStableIters = ceil(options.TolStableCount/options.FunEvalsPerIter); 26 | end 27 | 28 | % Reached stable variational posterior with stable ELBO and low uncertainty 29 | [idx_stable,dN,dN_last,w] = getStableIter(stats,optimState,options); 30 | if ~isempty(idx_stable) 31 | sKL_list = stats.sKL; 32 | elbo_list = stats.elbo; 33 | 34 | sn = sqrt(optimState.sn2hpd); 35 | TolSN = sqrt(sn/options.TolSD)*options.TolSD; 36 | TolSD = min(max(options.TolSD,TolSN),options.TolSD*10); 37 | 38 | rindex_vec(1) = abs(elbo_list(iter) - elbo_list(iter-1)) / TolSD; 39 | rindex_vec(2) = stats.elbo_sd(iter) / TolSD; 40 | rindex_vec(3) = sKL_list(iter) / options.TolsKL; % This should be fixed 41 | 42 | % Stop sampling after sample variance has stabilized below ToL 43 | if ~isempty(idx_stable) && optimState.StopSampling == 0 && ~optimState.Warmup 44 | varss_list = stats.gpSampleVar; 45 | if sum(w.*varss_list(idx_stable:iter)) < options.TolGPVarMCMC 46 | optimState.StopSampling = optimState.N; 47 | end 48 | end 49 | 50 | % Compute average ELCBO improvement per fcn eval in the past few iters 51 | idx0 = max(1,iter-ceil(0.5*TolStableIters)+1); 52 | xx = stats.funccount(idx0:iter); 53 | yy = stats.elbo(idx0:iter) - options.ELCBOImproWeight*stats.elbo_sd(idx0:iter); 54 | p = polyfit(xx,yy,1); 55 | ELCBOimpro = p(1); 56 | 57 | else 58 | rindex_vec = Inf(1,3); 59 | ELCBOimpro = NaN; 60 | end 61 | 62 | % Store reliability index 63 | rindex = mean(rindex_vec); 64 | stats.rindex(iter) = rindex; 65 | stats.elcbo_impro(iter) = ELCBOimpro; 66 | optimState.R = rindex; 67 | 68 | % Check stability termination condition 69 | stableflag = false; 70 | if iter >= TolStableIters && ... 71 | rindex < 1 && ... 72 | ELCBOimpro < options.TolImprovement 73 | 74 | % Count how many good iters in the recent past (excluding current) 75 | stablecount = sum(stats.rindex(iter-TolStableIters+1:iter-1) < 1); 76 | 77 | % Iteration is stable if almost all recent iterations are stable 78 | if stablecount >= TolStableIters - floor(TolStableIters*options.TolStableExcptFrac) - 1 79 | % If stable but entropy switch is ON, turn it off and continue 80 | if optimState.EntropySwitch && isfinite(options.EntropyForceSwitch) 81 | optimState.EntropySwitch = false; 82 | if isempty(action); action = 'entropy switch'; else; action = [action ', entropy switch']; end 83 | else 84 | % Allow termination only if distant from last warping 85 | if (iter - optimState.LastSuccessfulWarping) >= TolStableIters/3 86 | isFinished_flag = true; 87 | exitflag = 1; 88 | msg = 'Inference terminated: variational solution stable for OPTIONS.TolStableCount fcn evaluations.'; 89 | end 90 | stableflag = true; 91 | if isempty(action); action = 'stable'; else; action = [action ', stable']; end 92 | end 93 | end 94 | end 95 | stats.stable(iter) = stableflag; % Store stability flag 96 | 97 | % Prevent early termination 98 | if optimState.funccount < options.MinFunEvals || ... 99 | optimState.iter < options.MinIter 100 | isFinished_flag = false; 101 | end 102 | 103 | end 104 | 105 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 106 | function [idx_stable,dN,dN_last,w] = getStableIter(stats,optimState,options) 107 | %GETSTABLEITER Find index of starting stable iteration. 108 | 109 | iter = optimState.iter; 110 | idx_stable = []; 111 | dN = []; dN_last = []; w = []; 112 | 113 | if optimState.iter < 3; return; end 114 | 115 | if ~isempty(stats) 116 | N_list = stats.N; 117 | idx_stable = 1; 118 | if ~isempty(idx_stable) 119 | dN = optimState.N - N_list(idx_stable); 120 | dN_last = N_list(end) - N_list(end-1); 121 | end 122 | 123 | % Compute weighting function 124 | Nw = numel(idx_stable:iter); 125 | w1 = zeros(1,Nw); 126 | w1(end) = 1; 127 | w2 = exp(-(stats.N(end) - stats.N(end-Nw+1:end))/10); 128 | w2 = w2 / sum(w2); 129 | w = 0.5*w1 + 0.5*w2; 130 | 131 | end 132 | 133 | end 134 | 135 | -------------------------------------------------------------------------------- /rosenbrock_test.m: -------------------------------------------------------------------------------- 1 | function [y,s] = rosenbrock_test(x,sigma) 2 | %ROSENBROCKS_TEST Rosenbrock's broad 'banana' function. 3 | 4 | if nargin < 2 || isempty(sigma); sigma = 0; end 5 | 6 | % Likelihood according to a broad Rosenbrock's function 7 | y = -sum((x(:,1:end-1) .^2 - x(:,2:end)) .^ 2 + (x(:,1:end-1)-1).^2/100,2); 8 | 9 | % Noisy test 10 | if sigma > 0 11 | n = size(x,1); 12 | y = y + sigma*randn([n,1]); 13 | if nargout > 1 14 | s = sigma*ones(n,1); 15 | end 16 | end 17 | 18 | % Might want to add a prior, such as 19 | % sigma2 = 9; % Prior variance 20 | % y = y - 0.5*sum(x.^2,2)/sigma2 - 0.5*D*log(2*pi*sigma2); -------------------------------------------------------------------------------- /shared/msmoothboxlogpdf.m: -------------------------------------------------------------------------------- 1 | function y = msmoothboxlogpdf(x,a,b,sigma) 2 | %MSMOOTHBOXLOGPDF Multivariate smooth-box log probability density function. 3 | % Y = MSMOOTHBOXLOGPDF(X,A,B,SIGMA) returns the logarithm of the pdf of 4 | % the multivariate smooth-box distribution with pivots A and B and scale 5 | % SIGMA, evaluated at the values in X. The multivariate smooth-box pdf is 6 | % the product of univariate smooth-box pdfs in each dimension. 7 | % 8 | % For each dimension i, the univariate smooth-box pdf is defined as a 9 | % uniform distribution between pivots A(i), B(i) and Gaussian tails that 10 | % fall starting from p(A(i)) to the left (resp., p(B(i)) to the right) 11 | % with standard deviation SIGMA(i). 12 | % 13 | % X can be a matrix, where each row is a separate point and each column 14 | % is a different dimension. Similarly, A, B, and SIGMA can also be 15 | % matrices of the same size as X. 16 | % 17 | % The log pdf is typically preferred in numerical computations involving 18 | % probabilities, as it is more stable. 19 | % 20 | % See also MSMOOTHBOXPDF, MSMOOTHBOXRND. 21 | 22 | % Luigi Acerbi 2022 23 | 24 | [N,D] = size(x); 25 | 26 | if any(sigma(:) <= 0) 27 | error('msmoothboxpdf:NonPositiveSigma', ... 28 | 'All elements of SIGMA should be positive.'); 29 | end 30 | 31 | if D > 1 32 | if isscalar(a); a = a*ones(1,D); end 33 | if isscalar(b); b = b*ones(1,D); end 34 | if isscalar(sigma); sigma = sigma*ones(1,D); end 35 | end 36 | 37 | if size(a,2) ~= D || size(b,2) ~= D || size(sigma,2) ~= D 38 | error('msmoothboxpdf:SizeError', ... 39 | 'A, B, SIGMA should be scalars or have the same number of columns as X.'); 40 | end 41 | 42 | if size(a,1) == 1; a = repmat(a,[N,1]); end 43 | if size(b,1) == 1; b = repmat(b,[N,1]); end 44 | if size(sigma,1) == 1; sigma = repmat(sigma,[N,1]); end 45 | 46 | if any(a(:) >= b(:)) 47 | error('msmoothboxpdf:OrderError', ... 48 | 'For all elements of A and B, the order A < B should hold.'); 49 | end 50 | 51 | y = -inf(size(x)); 52 | lnf = log(1/sqrt(2*pi)./sigma) - log1p(1/sqrt(2*pi)./sigma.*(b - a)); 53 | 54 | for ii = 1:D 55 | idx = x(:,ii) < a(:,ii); 56 | y(idx,ii) = lnf(idx,ii) - 0.5*((x(idx,ii) - a(idx,ii))./sigma(idx,ii)).^2; 57 | 58 | idx = x(:,ii) >= a(:,ii) & x(:,ii) <= b(:,ii); 59 | y(idx,ii) = lnf(idx,ii); 60 | 61 | idx = x(:,ii) > b(:,ii); 62 | y(idx,ii) = lnf(idx,ii) - 0.5*((x(idx,ii) - b(idx,ii))./sigma(idx,ii)).^2; 63 | end 64 | 65 | y = sum(y,2); -------------------------------------------------------------------------------- /shared/msmoothboxpdf.m: -------------------------------------------------------------------------------- 1 | function y = msmoothboxpdf(x,a,b,sigma) 2 | %MSMOOTHBOXPDF Multivariate smooth-box probability density function. 3 | % Y = MSMOOTHBOXPDF(X,A,B,SIGMA) returns the pdf of the multivariate 4 | % smooth-box distribution with pivots A and B and scale SIGMA, evaluated 5 | % at the values in X. The multivariate smooth-box pdf is the product of 6 | % univariate smooth-box pdfs in each dimension. 7 | % 8 | % For each dimension i, the univariate smooth-box pdf is defined as a 9 | % uniform distribution between pivots A(i), B(i) and Gaussian tails that 10 | % fall starting from p(A(i)) to the left (resp., p(B(i)) to the right) 11 | % with standard deviation SIGMA(i). 12 | % 13 | % X can be a matrix, where each row is a separate point and each column 14 | % is a different dimension. Similarly, A, B, and SIGMA can also be 15 | % matrices of the same size as X. 16 | % 17 | % See also MSMOOTHBOXLOGPDF, MSMOOTHBOXRND. 18 | 19 | % Luigi Acerbi 2022 20 | 21 | y = exp(msmoothboxlogpdf(x,a,b,sigma)); -------------------------------------------------------------------------------- /shared/msmoothboxrnd.m: -------------------------------------------------------------------------------- 1 | function r = msmoothboxrnd(a,b,sigma,n) 2 | %MSMOOTHBOXRND Random arrays from the multivariate smooth-box distribution. 3 | % R = MSMOOTHBOXRND(A,B,SIGMA) returns an N-by-D matrix R of random 4 | % vectors chosen from the multivariate smooth-box distribution 5 | % with pivots A and B and scale SIGMA. A, B and SIGMA are N-by-D matrices, 6 | % and MSMOOTHBOXRND generates each row of R using the corresponding row 7 | % of A, B and SIGMA. 8 | % 9 | % R = MSMOOTHBOXRND(A,B,SIGMA,N) returns a N-by-D matrix R of random 10 | % vectors chosen from the multivariate smooth-box distribution 11 | % with pivots A and B and scale SIGMA. 12 | % 13 | % See also MSMOOTHBOXPDF. 14 | 15 | % Luigi Acerbi 2022 16 | 17 | [Na,Da] = size(a); 18 | [Nb,Db] = size(b); 19 | [Nsigma,Dsigma] = size(sigma); 20 | 21 | if any(sigma(:) <= 0) 22 | error('msmoothboxrnd:NonPositiveSigma', ... 23 | 'All elements of SIGMA should be positive.'); 24 | end 25 | 26 | if nargin < 4 || isempty(n) 27 | n = max([Na,Nb,Nsigma]); 28 | else 29 | if (Na ~= 1 && Na ~= n) || (Nb ~= 1 && Nb ~= n) || ... 30 | (Nsigma ~= 1 && Nsigma ~= n) 31 | error('msmoothboxrnd:SizeError', ... 32 | 'A, B, SIGMA should be 1-by-D or N-by-D arrays.'); 33 | end 34 | end 35 | if Na ~= Nb || Da ~= Db || Na ~= Nsigma || Da ~= Dsigma 36 | error('msmoothboxrnd:SizeError', ... 37 | 'A, B, SIGMA should be arrays of the same size.'); 38 | end 39 | 40 | D = Da; 41 | 42 | if size(a,1) == 1; a = repmat(a,[n,1]); end 43 | if size(b,1) == 1; b = repmat(b,[n,1]); end 44 | if size(sigma,1) == 1; sigma = repmat(sigma,[n,1]); end 45 | 46 | r = zeros(n,D); 47 | 48 | nf = 1 + 1/sqrt(2*pi)./sigma.*(b - a); 49 | 50 | % Sample one dimension at a time 51 | for d = 1:D 52 | % Draw component (left/right tails or plateau) 53 | u = nf(:,d) .* rand(n,1); 54 | 55 | % Left Gaussian tails 56 | idx = u < 0.5; 57 | if any(idx) 58 | z1 = abs(randn(sum(idx),1).*sigma(idx,d)); 59 | r(idx,d) = a(idx) - z1; 60 | end 61 | 62 | % Right Gaussian tails 63 | idx = (u >= 0.5 & u < 1); 64 | if any(idx) 65 | z1 = abs(randn(sum(idx),1).*sigma(idx,d)); 66 | r(idx,d) = b(idx) + z1; 67 | end 68 | 69 | % Plateau 70 | idx = u >= 1; 71 | if any(idx) 72 | r(idx,d) = a(idx,d) + (b(idx,d) - a(idx,d)).*rand(sum(idx),1); 73 | end 74 | end -------------------------------------------------------------------------------- /shared/msplinetrapezlogpdf.m: -------------------------------------------------------------------------------- 1 | function y = msplinetrapezlogpdf(x,a,b,c,d) 2 | %MSPLINETRAPEZLOGPDF Multivariate spline-trapezoidal log pdf. 3 | % Y = MSPLINETRAPEZLOGPDF(X,A,B,C,D) returns the logarithm of the pdf of 4 | % the multivariate spline-trapezoidal distribution with external bounds 5 | % A and D and internal points B and C, evaluated at the values in X. The 6 | % multivariate pdf is the product of univariate spline-trapezoidal pdfs 7 | % in each dimension. 8 | % 9 | % For each dimension i, the univariate spline-trapezoidal pdf is defined 10 | % as a trapezoidal pdf whose points A, B and C, D are connected by cubic 11 | % splines such that the pdf is continuous and its derivatives at A, B, C, 12 | % and D are zero (so the derivatives are also continuous): 13 | % 14 | % | __________ 15 | % | /| |\ 16 | % p(X(i)) | / | | \ 17 | % | / | | \ 18 | % |___/___|________|___\____ 19 | % A(i) B(i) C(i) D(i) 20 | % X(i) 21 | % 22 | % X can be a matrix, where each row is a separate point and each column 23 | % is a different dimension. Similarly, A, B, C, and D can also be 24 | % matrices of the same size as X. 25 | % 26 | % The log pdf is typically preferred in numerical computations involving 27 | % probabilities, as it is more stable. 28 | % 29 | % See also MSPLINETRAPEZPDF, MSPLINETRAPEZRND. 30 | 31 | % Luigi Acerbi 2022 32 | 33 | [N,D] = size(x); 34 | 35 | if D > 1 36 | if isscalar(a); a = a*ones(1,D); end 37 | if isscalar(b); b = b*ones(1,D); end 38 | if isscalar(c); c = c*ones(1,D); end 39 | if isscalar(d); d = d*ones(1,D); end 40 | end 41 | 42 | if size(a,2) ~= D || size(b,2) ~= D || size(c,2) ~= D || size(d,2) ~= D 43 | error('msplinetrapezlogpdf:SizeError', ... 44 | 'A, B, C, D should be scalars or have the same number of columns as X.'); 45 | end 46 | 47 | if size(a,1) == 1; a = repmat(a,[N,1]); end 48 | if size(b,1) == 1; b = repmat(b,[N,1]); end 49 | if size(c,1) == 1; c = repmat(c,[N,1]); end 50 | if size(d,1) == 1; d = repmat(d,[N,1]); end 51 | 52 | y = -inf(size(x)); 53 | % Normalization factor 54 | % nf = c - b + 0.5*(d - c + b - a); 55 | lnf = log(0.5*(c - b + d - a)); 56 | 57 | for ii = 1:D 58 | idx = x(:,ii) >= a(:,ii) & x(:,ii) < b(:,ii); 59 | z = (x(idx,ii) - a(idx,ii))./(b(idx,ii) - a(idx,ii)); 60 | y(idx,ii) = log(-2*z.^3 + 3*z.^2) - lnf(idx,ii); 61 | 62 | idx = x(:,ii) >= b(:,ii) & x(:,ii) < c(:,ii); 63 | y(idx,ii) = -lnf(idx,ii); 64 | 65 | idx = x(:,ii) >= c(:,ii) & x(:,ii) < d(:,ii); 66 | z = 1 - (x(idx,ii) - c(idx,ii)) ./ (d(idx,ii) - c(idx,ii)); 67 | y(idx,ii) = log(-2*z.^3 + 3*z.^2) - lnf(idx,ii); 68 | end 69 | 70 | y = sum(y,2); 71 | 72 | -------------------------------------------------------------------------------- /shared/msplinetrapezpdf.m: -------------------------------------------------------------------------------- 1 | function y = msplinetrapezpdf(x,a,b,c,d) 2 | %MSPLINETRAPEZPDF Multivariate spline-trapezoidal probability density fcn (pdf). 3 | % Y = MSPLINETRAPEZPDF(X,A,B,C,D) returns the pdf of the multivariate 4 | % spline-trapezoidal distribution with external bounds A and D and internal 5 | % points B and C, evaluated at the values in X. The multivariate pdf is 6 | % the product of univariate spline-trapezoidal pdfs in each dimension. 7 | % 8 | % For each dimension i, the univariate spline-trapezoidal pdf is defined 9 | % as a trapezoidal pdf whose points A, B and C, D are connected by cubic 10 | % splines such that the pdf is continuous and its derivatives at A, B, C, 11 | % and D are zero (so the derivatives are also continuous): 12 | % 13 | % | __________ 14 | % | /| |\ 15 | % p(X(i)) | / | | \ 16 | % | / | | \ 17 | % |___/___|________|___\____ 18 | % A(i) B(i) C(i) D(i) 19 | % X(i) 20 | % 21 | % X can be a matrix, where each row is a separate point and each column 22 | % is a different dimension. Similarly, A, B, C, and D can also be 23 | % matrices of the same size as X. 24 | % 25 | % See also MSPLINETRAPEZLOGPDF, MSPLINETRAPEZRND. 26 | 27 | % Luigi Acerbi 2022 28 | 29 | y = exp(msplinetrapezlogpdf(x,a,b,c,d)); -------------------------------------------------------------------------------- /shared/msplinetrapezrnd.m: -------------------------------------------------------------------------------- 1 | function r = msplinetrapezrnd(a,u,v,b,n) 2 | %MSPLINETRAPEZRND Random arrays from the multivariate spline-trapezoidal distribution. 3 | % R = MSPLINETRAPEZRND(A,U,V,B) returns an N-by-D matrix R of random 4 | % vectors chosen from the multivariate spline-trapezoidal distribution 5 | % with external bounds A and B and internal points U and V. A, U, V and B 6 | % are N-by-D matrices, and MSPLINETRAPEZRND generates each row of R using 7 | % the corresponding row of A, U, V and B. 8 | % 9 | % R = MSPLINETRAPEZRND(A,U,V,B,N) returns a N-by-D matrix R of random 10 | % vectors chosen from the multivariate spline-trapezoidal distribution 11 | % with external bounds A and B and internal points U and V. 12 | % 13 | % See also MSPLINETRAPEZPDF. 14 | 15 | % Luigi Acerbi 2022 16 | 17 | [Na,Da] = size(a); 18 | [Nu,Du] = size(u); 19 | [Nv,Dv] = size(v); 20 | [Nb,Db] = size(b); 21 | 22 | if nargin < 3 || isempty(n) 23 | n = max([Na,Nu,Nv,Nb]); 24 | else 25 | if (Na ~= 1 && Na ~= n) || (Nb ~= 1 && Nb ~= n) || ... 26 | (Nu ~= 1 && Nu ~= n) || (Nv ~= 1 && Nv ~= n) 27 | error('msplinetrapezrnd:SizeError', ... 28 | 'A, U, V, B should be 1-by-D or N-by-D arrays.'); 29 | end 30 | end 31 | if Na ~= Nb || Da ~= Db || Na ~= Nu || Da ~= Du || Na ~= Nv || Da ~= Dv 32 | error('msplinetrapezrnd:SizeError', ... 33 | 'A, U, V, B should be arrays of the same size.'); 34 | end 35 | 36 | D = Da; 37 | 38 | if size(a,1) == 1; a = repmat(a,[n,1]); end 39 | if size(u,1) == 1; u = repmat(u,[n,1]); end 40 | if size(v,1) == 1; v = repmat(v,[n,1]); end 41 | if size(b,1) == 1; b = repmat(b,[n,1]); end 42 | 43 | r = zeros(n,D); 44 | 45 | % Sample one dimension at a time 46 | for d = 1:D 47 | % Compute maximum of one-dimensional pdf 48 | x0 = 0.5*(u(:,d) + v(:,d)); 49 | y_max = msplinetrapezpdf(x0,a(:,d),u(:,d),v(:,d),b(:,d)); 50 | 51 | idx = true(n,1); 52 | r1 = zeros(n,1); 53 | n1 = sum(idx); 54 | 55 | % Keep doing rejection sampling 56 | while n1 > 0 57 | % Uniform sampling in the box 58 | r1(idx) = bsxfun(@plus, a(idx,d), bsxfun(@times, rand(n1,1), b(idx,d) - a(idx,d))); 59 | 60 | % Rejection sampling 61 | z1 = rand(n1,1) .* y_max(idx); 62 | y1 = msplinetrapezpdf(r1(idx),a(idx,d),u(idx,d),v(idx,d),b(idx,d)); 63 | 64 | idx_new = false(n,1); 65 | idx_new(idx) = z1 > y1; % Resample points outside 66 | 67 | idx = idx_new; 68 | n1 = sum(idx); 69 | end 70 | 71 | % Assign d-th dimension 72 | r(:,d) = r1; 73 | end -------------------------------------------------------------------------------- /shared/mtrapezlogpdf.m: -------------------------------------------------------------------------------- 1 | function y = mtrapezlogpdf(x,a,u,v,b) 2 | %MTRAPEZLOGPDF Multivariate trapezoidal probability log pdf. 3 | % Y = MTRAPEZLOGPDF(X,A,U,V,B) returns the logarithm of the pdf of the 4 | % multivariate trapezoidal distribution with external bounds A and B and 5 | % internal points U and V, evaluated at the values in X. The multivariate 6 | % trapezoidal pdf is the product of univariate trapezoidal pdfs in each 7 | % dimension. 8 | % 9 | % For each dimension i, the univariate trapezoidal pdf is defined as: 10 | % 11 | % | __________ 12 | % | /| |\ 13 | % p(X(i)) | / | | \ 14 | % | / | | \ 15 | % |___/___|________|___\____ 16 | % A(i) U(i) V(i) B(i) 17 | % X(i) 18 | % 19 | % X can be a matrix, where each row is a separate point and each column 20 | % is a different dimension. Similarly, A, B, C, and D can also be 21 | % matrices of the same size as X. 22 | % 23 | % The log pdf is typically preferred in numerical computations involving 24 | % probabilities, as it is more stable. 25 | % 26 | % See also MTRAPEZPDF, MTRAPEZRND. 27 | 28 | % Luigi Acerbi 2022 29 | 30 | [N,D] = size(x); 31 | 32 | if D > 1 33 | if isscalar(a); a = a*ones(1,D); end 34 | if isscalar(u); u = u*ones(1,D); end 35 | if isscalar(v); v = v*ones(1,D); end 36 | if isscalar(b); b = b*ones(1,D); end 37 | end 38 | 39 | if size(a,2) ~= D || size(u,2) ~= D || size(v,2) ~= D || size(b,2) ~= D 40 | error('mtrapezpdf:SizeError', ... 41 | 'A, B, C, D should be scalars or have the same number of columns as X.'); 42 | end 43 | 44 | if size(a,1) == 1; a = repmat(a,[N,1]); end 45 | if size(u,1) == 1; u = repmat(u,[N,1]); end 46 | if size(v,1) == 1; v = repmat(v,[N,1]); end 47 | if size(b,1) == 1; b = repmat(b,[N,1]); end 48 | 49 | y = -inf(size(x)); 50 | lnf = log(0.5) + log(b - a + v - u) + log(u - a); 51 | 52 | for ii = 1:D 53 | idx = x(:,ii) >= a(:,ii) & x(:,ii) < u(:,ii); 54 | y(idx,ii) = log(x(idx,ii) - a(idx,ii)) - lnf(idx,ii); 55 | 56 | idx = x(:,ii) >= u(:,ii) & x(:,ii) < v(:,ii); 57 | y(idx,ii) = log(u(idx,ii)-a(idx,ii)) - lnf(idx,ii); 58 | 59 | idx = x(:,ii) >= v(:,ii) & x(:,ii) < b(:,ii); 60 | y(idx,ii) = log(b(idx,ii) - x(idx,ii)) - log(b(idx,ii) - v(idx,ii)) + log(u(idx,ii)-a(idx,ii)) - lnf(idx,ii); 61 | end 62 | 63 | y = sum(y,2); -------------------------------------------------------------------------------- /shared/mtrapezpdf.m: -------------------------------------------------------------------------------- 1 | function y = mtrapezpdf(x,a,u,v,b) 2 | %MTRAPEZPDF Multivariate trapezoidal probability density function (pdf). 3 | % Y = MTRAPEZPDF(X,A,U,V,B) returns the pdf of the multivariate trapezoidal 4 | % distribution with external bounds A and B and internal points U and V, 5 | % evaluated at the values in X. The multivariate trapezoidal 6 | % pdf is the product of univariate trapezoidal pdfs in each dimension. 7 | % 8 | % For each dimension i, the univariate trapezoidal pdf is defined as: 9 | % 10 | % | __________ 11 | % | /| |\ 12 | % p(X(i)) | / | | \ 13 | % | / | | \ 14 | % |___/___|________|___\____ 15 | % A(i) U(i) V(i) B(i) 16 | % X(i) 17 | % 18 | % X can be a matrix, where each row is a separate point and each column 19 | % is a different dimension. Similarly, A, B, C, and D can also be 20 | % matrices of the same size as X. 21 | % 22 | % See also MTRAPEZLOGPDF, MTRAPEZRND. 23 | 24 | % Luigi Acerbi 2022 25 | 26 | y = exp(mtrapezlogpdf(x,a,u,v,b)); -------------------------------------------------------------------------------- /shared/mtrapezrnd.m: -------------------------------------------------------------------------------- 1 | function r = mtrapezrnd(a,u,v,b,n) 2 | %MTRAPEZRND Random arrays from the multivariate trapezoidal distribution. 3 | % R = MTRAPEZRND(A,U,V,B) returns an N-by-D matrix R of random vectors 4 | % chosen from the multivariate trapezoidal distribution with external 5 | % bounds A and B and internal points U and V. A, U, V and B are N-by-D 6 | % matrices, and MTRAPEZRND generates each row of R using the corresponding 7 | % row of A, U, V and B. 8 | % 9 | % R = MTRAPEZRND(A,U,V,B,N) returns a N-by-D matrix R of random vectors 10 | % chosen from the multivariate trapezoidal distribution with external 11 | % bounds A and B and internal points U and V. 12 | % 13 | % See also MTRAPEZPDF. 14 | 15 | % Luigi Acerbi 2022 16 | 17 | [Na,Da] = size(a); 18 | [Nu,Du] = size(u); 19 | [Nv,Dv] = size(v); 20 | [Nb,Db] = size(b); 21 | 22 | if nargin < 3 || isempty(n) 23 | n = max([Na,Nu,Nv,Nb]); 24 | else 25 | if (Na ~= 1 && Na ~= n) || (Nb ~= 1 && Nb ~= n) || ... 26 | (Nu ~= 1 && Nu ~= n) || (Nv ~= 1 && Nv ~= n) 27 | error('mtrapezrnd:SizeError', ... 28 | 'A, U, V, B should be 1-by-D or N-by-D arrays.'); 29 | end 30 | end 31 | if Na ~= Nb || Da ~= Db || Na ~= Nu || Da ~= Du || Na ~= Nv || Da ~= Dv 32 | error('mtrapezrnd:SizeError', ... 33 | 'A, U, V, B should be arrays of the same size.'); 34 | end 35 | 36 | D = Da; 37 | 38 | if size(a,1) == 1; a = repmat(a,[n,1]); end 39 | if size(u,1) == 1; u = repmat(u,[n,1]); end 40 | if size(v,1) == 1; v = repmat(v,[n,1]); end 41 | if size(b,1) == 1; b = repmat(b,[n,1]); end 42 | 43 | r = zeros(n,D); 44 | 45 | % Sample one dimension at a time 46 | for d = 1:D 47 | % Compute maximum of one-dimensional pdf 48 | x0 = 0.5*(u(:,d) + v(:,d)); 49 | y_max = mtrapezpdf(x0,a(:,d),u(:,d),v(:,d),b(:,d)); 50 | 51 | idx = true(n,1); 52 | r1 = zeros(n,1); 53 | n1 = sum(idx); 54 | 55 | % Keep doing rejection sampling 56 | while n1 > 0 57 | % Uniform sampling in the box 58 | r1(idx) = bsxfun(@plus, a(idx,d), bsxfun(@times, rand(n1,1), b(idx,d) - a(idx,d))); 59 | 60 | % Rejection sampling 61 | z1 = rand(n1,1) .* y_max(idx); 62 | y1 = mtrapezpdf(r1(idx),a(idx,d),u(idx,d),v(idx,d),b(idx,d)); 63 | 64 | idx_new = false(n,1); 65 | idx_new(idx) = z1 > y1; % Resample points outside 66 | 67 | idx = idx_new; 68 | n1 = sum(idx); 69 | end 70 | 71 | % Assign d-th dimension 72 | r(:,d) = r1; 73 | end -------------------------------------------------------------------------------- /shared/munifboxlogpdf.m: -------------------------------------------------------------------------------- 1 | function y = munifboxlogpdf(x,a,b) 2 | %MUNIFBOXLOGPDF Multivariate uniform box log probability density function. 3 | % Y = MUNIFBOXLOGPDF(X,A,B) returns the logarithm of the pdf of the 4 | % multivariate uniform-box distribution with bounds A and B, evaluated at 5 | % the values in X. The multivariate uniform box pdf is the product of 6 | % univariate uniform pdfs in each dimension. 7 | % 8 | % For each dimension i, the univariate uniform-box pdf is defined as: 9 | % 10 | % | 11 | % | ______________ 12 | % p(X(i)) | | | 13 | % | | | 14 | % |___|____________|_____ 15 | % A(i) B(i) 16 | % X(i) 17 | % 18 | % X can be a matrix, where each row is a separate point and each column 19 | % is a different dimension. Similarly, A and B can also be matrices of 20 | % the same size as X. 21 | % 22 | % The log pdf is typically preferred in numerical computations involving 23 | % probabilities, as it is more stable. 24 | % 25 | % See also MUNIFBOXPDF, MUNIFBOXRND. 26 | 27 | % Luigi Acerbi 2022 28 | 29 | [N,D] = size(x); 30 | 31 | if D > 1 32 | if isscalar(a); a = a*ones(1,D); end 33 | if isscalar(b); b = b*ones(1,D); end 34 | end 35 | 36 | if size(a,2) ~= D || size(b,2) ~= D 37 | error('munifboxlogpdf:SizeError', ... 38 | 'A, B should be scalars or have the same number of columns as X.'); 39 | end 40 | 41 | if size(a,1) == 1; a = repmat(a,[N,1]); end 42 | if size(b,1) == 1; b = repmat(b,[N,1]); end 43 | 44 | if any(a(:) >= b(:)) 45 | error('munifboxlogpdf:OrderError', ... 46 | 'For all elements of A and B, the order A < B should hold.'); 47 | end 48 | 49 | lnf = sum(log(b - a),2); 50 | y = -lnf .* ones(N,1); 51 | idx = any(bsxfun(@lt, x, a),2) | any(bsxfun(@gt, x, b),2); 52 | y(idx) = -inf; -------------------------------------------------------------------------------- /shared/munifboxpdf.m: -------------------------------------------------------------------------------- 1 | function y = munifboxpdf(x,a,b) 2 | %MUNIFBOXPDF Multivariate uniform box probability density function. 3 | % Y = MUNIFBOXPDF(X,A,B) returns the pdf of the multivariate uniform-box 4 | % distribution with bounds A and B, evaluated at the values in X. The 5 | % multivariate uniform box pdf is the product of univariate uniform 6 | % pdfs in each dimension. 7 | % 8 | % For each dimension i, the univariate uniform-box pdf is defined as: 9 | % 10 | % | 11 | % | ______________ 12 | % p(X(i)) | | | 13 | % | | | 14 | % |___|____________|_____ 15 | % A(i) B(i) 16 | % X(i) 17 | % 18 | % X can be a matrix, where each row is a separate point and each column 19 | % is a different dimension. Similarly, A and B can also be matrices of 20 | % the same size as X. 21 | % 22 | % See also MUNIFBOXLOGPDF, MUNIFBOXRND. 23 | 24 | % Luigi Acerbi 2022 25 | 26 | y = exp(munifboxlogpdf(x,a,b)); -------------------------------------------------------------------------------- /shared/munifboxrnd.m: -------------------------------------------------------------------------------- 1 | function r = munifboxrnd(a,b,n) 2 | %MUNIFBOXRND Random arrays from the multivariate uniform box distribution. 3 | % R = MUNIFBOXRND(A,B) returns an N-by-D matrix R of random vectors 4 | % chosen from the multivariate uniform box distribution with bounds A and 5 | % B. A and B are N-by-D matrices, and MUNIFBOXRND generates each row of R 6 | % using the corresponding row of A and B. 7 | % 8 | % R = MUNIFBOXRND(A,B,N) returns a N-by-D matrix R of random vectors 9 | % chosen from the multivariate uniform box distribution with 1-by-D bound 10 | % vectors A and B. 11 | % 12 | % See also MUNIFBOXPDF. 13 | 14 | % Luigi Acerbi 2022 15 | 16 | [N,D] = size(a); 17 | [Nb,Db] = size(b); 18 | 19 | if nargin < 3 || isempty(n) 20 | n = N; 21 | else 22 | if (N ~= 1 && N ~= n) || (Nb ~= 1 && Nb ~= n) 23 | error('munifboxrnd:SizeError', ... 24 | 'A and B should be 1-by-D or N-by-D arrays.'); 25 | end 26 | end 27 | if N ~= Nb || D ~= Db 28 | error('munifboxrnd:SizeError', ... 29 | 'A and B should be arrays of the same size.'); 30 | end 31 | 32 | if any(a(:) >= b(:)) 33 | error('munifboxpdf:OrderError', ... 34 | 'For all elements of A and B, the order A < B should hold.'); 35 | end 36 | 37 | 38 | r = bsxfun(@plus, a, bsxfun(@times, rand(n,D), b - a)); -------------------------------------------------------------------------------- /shared/mvnkl.m: -------------------------------------------------------------------------------- 1 | function [kl1,kl2] = mvnkl(Mu1,Sigma1,Mu2,Sigma2) 2 | %MVNKL Kullback-Leibler divergence between two multivariate normal pdfs. 3 | 4 | D = numel(Mu1); 5 | 6 | Mu1 = Mu1(:); 7 | Mu2 = Mu2(:); 8 | 9 | dmu = Mu2 - Mu1; 10 | detq1 = det(Sigma1); 11 | detq2 = det(Sigma2); 12 | lndet = log(detq2 / detq1); 13 | 14 | kl1 = 0.5*(trace(Sigma2\Sigma1) + dmu'*(Sigma2\dmu) - D + lndet); 15 | if nargout > 1 16 | kl2 = 0.5*(trace(Sigma1\Sigma2) + dmu'*(Sigma1\dmu) - D - lndet); 17 | end -------------------------------------------------------------------------------- /shared/qtrapz.m: -------------------------------------------------------------------------------- 1 | function z = qtrapz(y,dim) 2 | %QTRAPZ Quick trapezoidal numerical integration. 3 | % Z = QTRAPZ(Y) computes an approximation of the integral of Y via 4 | % the trapezoidal method (with unit spacing). To compute the integral 5 | % for spacing different from one, multiply Z by the spacing increment. 6 | % 7 | % For vectors, QTRAPZ(Y) is the integral of Y. For matrices, QTRAPZ(Y) 8 | % is a row vector with the integral over each column. For N-D 9 | % arrays, QTRAPZ(Y) works across the first non-singleton dimension. 10 | % 11 | % Z = QTRAPZ(Y,DIM) integrates across dimension DIM of Y. The length of X 12 | % must be the same as size(Y,DIM). 13 | % 14 | % QTRAPZ is up to 3-4 times faster than TRAPZ for large arrays. 15 | % 16 | % See also TRAPZ. 17 | 18 | % Luigi Acerbi 19 | % Version 1.0. Release date: Jul/20/2015. 20 | 21 | % By default integrate along the first non-singleton dimension 22 | if nargin < 2; dim = find(size(y)~=1,1); end 23 | 24 | % Behaves as sum on empty array 25 | if isempty(y); z = sum(y,dim); return; end 26 | 27 | % Compute dimensions of input matrix 28 | if isvector(y); n = 1; else n = ndims(y); end 29 | 30 | switch n 31 | case {1,2} % 1-D or 2-D array 32 | switch dim 33 | case 1 34 | z = sum(y,1) - 0.5*(y(1,:) + y(end,:)); 35 | case 2 36 | z = sum(y,2) - 0.5*(y(:,1) + y(:,end)); 37 | otherwise 38 | error('qtrapz:dimMismatch', 'DIM must specify one of the dimensions of Y.'); 39 | end 40 | 41 | case 3 % 3-D array 42 | switch dim 43 | case 1 44 | z = sum(y,1) - 0.5*(y(1,:,:) + y(end,:,:)); 45 | case 2 46 | z = sum(y,2) - 0.5*(y(:,1,:) + y(:,end,:)); 47 | case 3 48 | z = sum(y,3) - 0.5*(y(:,:,1) + y(:,:,end)); 49 | otherwise 50 | error('qtrapz:dimMismatch', 'DIM must specify one of the dimensions of Y.'); 51 | end 52 | 53 | case 4 % 4-D array 54 | switch dim 55 | case 1 56 | z = sum(y,1) - 0.5*(y(1,:,:,:) + y(end,:,:,:)); 57 | case 2 58 | z = sum(y,2) - 0.5*(y(:,1,:,:) + y(:,end,:,:)); 59 | case 3 60 | z = sum(y,3) - 0.5*(y(:,:,1,:) + y(:,:,end,:)); 61 | case 4 62 | z = sum(y,4) - 0.5*(y(:,:,:,1) + y(:,:,:,end)); 63 | otherwise 64 | error('qtrapz:dimMismatch', 'DIM must specify one of the dimensions of Y.'); 65 | end 66 | 67 | otherwise % 5-D array or more 68 | for iDim = 1:n; index{iDim} = 1:size(y,iDim); end 69 | index1 = index; index1{dim} = 1; 70 | indexend = index; indexend{dim} = size(y,dim); 71 | try 72 | z = sum(y,dim) - 0.5*(y(index1{:}) + y(indexend{:})); 73 | catch 74 | error('qtrapz:dimMismatch', 'DIM must specify one of the dimensions of Y.'); 75 | end 76 | end -------------------------------------------------------------------------------- /shared/warpvars_vbmc_test.m: -------------------------------------------------------------------------------- 1 | function warpvars_vbmc_test(nvars) 2 | 3 | if nargin < 1 || isempty(nvars); nvars = 1; end 4 | 5 | 6 | for iType = [0,3,9,10,12,13] 7 | 8 | fprintf('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n\n'); 9 | fprintf('Testing transformation type %d...\n',iType); 10 | 11 | nvars = 1; 12 | switch iType 13 | case {0,10} 14 | LB = -Inf; UB = Inf; PLB = -10; PUB = 10; 15 | case {3,9,12,13} 16 | LB = -9; UB = 4; PLB = -8.99; PUB = 3.99; 17 | end 18 | 19 | trinfo = warpvars_vbmc(nvars,LB,UB); 20 | trinfo.type = iType*ones(1,nvars); 21 | trinfo.alpha = exp(2*rand(1,nvars)); 22 | trinfo.beta = exp(2*rand(1,nvars)); 23 | trinfo.mu = 0.5*(PUB+PLB); 24 | trinfo.delta = (PUB-PLB); 25 | 26 | x = linspace(PLB,PUB,101); 27 | x2 = warpvars_vbmc(warpvars_vbmc(x,'dir',trinfo),'inv',trinfo); 28 | 29 | fprintf('Maximum error for identity transform f^-1(f(x)): %.g.\n\n',max(abs(x - x2))); 30 | 31 | fprintf('Checking derivative and log derivative:\n\n'); 32 | x0 = rand(1,nvars).*(PUB-PLB)+PLB; 33 | derivcheck(@(x) fun(x,trinfo,0),x0,1); 34 | derivcheck(@(x) fun(x,trinfo,1),x0,1); 35 | 36 | if any(iType == [9 10]) 37 | fprintf('Checking derivatives wrt warping parameters:\n\n'); 38 | theta0 = 3*randn(1,2); 39 | derivcheck(@(theta) funfirst(theta,x0,trinfo),theta0',0); 40 | derivcheck(@(theta) funmixed(theta,x0,trinfo),theta0',0); 41 | end 42 | 43 | end 44 | 45 | 46 | % if nvars == 1 47 | % x = linspace(LB+sqrt(eps),UB-sqrt(eps),101); 48 | % x2 = warpvars_vbmc(warpvars_vbmc(x,'dir',trinfo),'inv',trinfo); 49 | % max(abs(x - x2)) 50 | % 51 | % x0 = rand(1,nvars).*(UB-LB)+LB; 52 | % derivcheck(@(x) fun(x,trinfo),x0,1); 53 | % else 54 | % N = 10; 55 | % [Q,R] = qr(randn(nvars)); 56 | % if det(Q) < 0; Q(:,1) = -Q(:,1); end 57 | % trinfo.R_mat = Q; 58 | % % trinfo.R_mat = eye(Nvars); 59 | % % trinfo.scale = exp(randn(1,Nvars)); 60 | % 61 | % x = randn(N,nvars); 62 | % x2 = warpvars_vbmc(warpvars_vbmc(x,'dir',trinfo),'inv',trinfo); 63 | % 64 | % x - x2 65 | % 66 | % 67 | % 68 | % x0 = 0.1*rand(1,nvars).*(UB-LB)+LB; 69 | % x0t = warpvars_vbmc(x0,'dir',trinfo); 70 | % 71 | % derivcheck(@(x) fun(x,trinfo),x0,1); 72 | % derivcheck(@(x) invfun(x,trinfo),x0t,1); 73 | % 74 | % end 75 | 76 | 77 | % x0 = randn(1,Nvars); 78 | % derivcheck(@(theta) funfirst(theta,x0,trinfo),0.1*randn(1,2),0); 79 | 80 | 81 | end 82 | 83 | function [y,dy] = fun(x,trinfo,logflag) 84 | 85 | if nargin < 3 || isempty(logflag); logflag = 0; end 86 | 87 | y = warpvars_vbmc(x,'dir',trinfo); 88 | % dy = warpvars_vbmc(y,'g',trinfo); 89 | 90 | if logflag 91 | dy = exp(-warpvars_vbmc(y,'logpdf',trinfo)); 92 | else 93 | dy = 1./warpvars_vbmc(y,'pdf',trinfo); 94 | end 95 | 96 | end 97 | 98 | function [y,dy] = invfun(x,trinfo) 99 | 100 | y = warpvars_vbmc(x,'inv',trinfo); 101 | dy = warpvars_vbmc(x,'r',trinfo); 102 | % dy = exp(-warpvars_vbmc(y,'logpdf',trinfo)); 103 | 104 | end 105 | 106 | 107 | function [y,dy] = funfirst(theta,x,trinfo) 108 | 109 | nvars = numel(trinfo.lb_orig); 110 | theta = exp(theta); 111 | 112 | trinfo.alpha(1) = theta(1); 113 | trinfo.beta(1) = theta(2); 114 | 115 | y = warpvars_vbmc(x,'d',trinfo); 116 | dy = warpvars_vbmc(y,'f',trinfo); 117 | 118 | dy = dy([1;nvars+1]) .* theta(:)'; 119 | % dy = exp(-warpvars_vbmc(y,'logpdf',trinfo)); 120 | 121 | end 122 | 123 | 124 | function [dy,ddy] = funmixed(theta,x,trinfo) 125 | 126 | nvars = numel(trinfo.lb_orig); 127 | theta = exp(theta); 128 | 129 | trinfo.alpha(1) = theta(1); 130 | trinfo.beta(1) = theta(2); 131 | 132 | y = warpvars_vbmc(x,'d',trinfo); 133 | dy = exp(-warpvars_vbmc(y,'logpdf',trinfo)); 134 | 135 | ddy = warpvars_vbmc(y,'m',trinfo); 136 | ddy = ddy([1;nvars+1]) .* theta(:)'; 137 | 138 | end 139 | -------------------------------------------------------------------------------- /test/test_pdfs_vbmc.m: -------------------------------------------------------------------------------- 1 | function test_pdfs_vbmc() 2 | %TEST_PDFS_VBMC Test pdfs introduced in the VBMC package. 3 | 4 | lb = [-1.1,-4.1]; 5 | ub = [3.2,-2.8]; 6 | a = [-1,-4]; 7 | b = [3,-3]; 8 | n = 1e6; 9 | 10 | tolerr = 1e-3; % Error tolerance on normalization constant 11 | tolrmse = 0.05; % Error tolerance on histogram vs pdf 12 | 13 | %% Test multivariate uniform box distribution 14 | pdf1 = @(x) munifboxpdf(x,a(1),b(1)); 15 | pdf2 = @(x) munifboxpdf(x,a,b); 16 | pdf1log = @(x) exp(munifboxlogpdf(x,a(1),b(1))); 17 | pdf2log = @(x) exp(munifboxlogpdf(x,a,b)); 18 | pdfrnd = @(n) munifboxrnd(a,b,n); 19 | name = 'munifbox'; 20 | 21 | test_pdf1_normalization(pdf1,lb,ub,tolerr,name); 22 | test_pdf2_normalization(pdf2,lb,ub,tolerr,name); 23 | test_pdf1_normalization(pdf1log,lb,ub,tolerr,name); 24 | test_pdf2_normalization(pdf2log,lb,ub,tolerr,name); 25 | test_rnd(pdfrnd,pdf1,a,b,n,tolrmse,name); 26 | 27 | %% Test multivariate trapezoidal distribution 28 | u = [-0.5,-3.8]; 29 | v = [1.5,-3.4]; 30 | pdf1 = @(x) mtrapezpdf(x,a(1),u(1),v(1),b(1)); 31 | pdf2 = @(x) mtrapezpdf(x,a,u,v,b); 32 | pdf1log = @(x) exp(mtrapezlogpdf(x,a(1),u(1),v(1),b(1))); 33 | pdf2log = @(x) exp(mtrapezlogpdf(x,a,u,v,b)); 34 | pdfrnd = @(n) mtrapezrnd(a,u,v,b,n); 35 | name = 'mtrapez'; 36 | test_pdf1_normalization(pdf1,lb,ub,tolerr,name); 37 | test_pdf2_normalization(pdf2,lb,ub,tolerr,name); 38 | test_pdf1_normalization(pdf1log,lb,ub,tolerr,name); 39 | test_pdf2_normalization(pdf2log,lb,ub,tolerr,name); 40 | test_rnd(pdfrnd,pdf1,a,b,n,tolrmse,name); 41 | 42 | %% Test multivariate spline trapezoidal distribution 43 | pdf1 = @(x) msplinetrapezpdf(x,a(1),u(1),v(1),b(1)); 44 | pdf2 = @(x) msplinetrapezpdf(x,a,u,v,b); 45 | pdf1log = @(x) exp(msplinetrapezlogpdf(x,a(1),u(1),v(1),b(1))); 46 | pdf2log = @(x) exp(msplinetrapezlogpdf(x,a,u,v,b)); 47 | pdfrnd = @(n) msplinetrapezrnd(a,u,v,b,n); 48 | name = 'msplinetrapez'; 49 | test_pdf1_normalization(pdf1,lb,ub,tolerr,name); 50 | test_pdf2_normalization(pdf2,lb,ub,tolerr,name); 51 | test_pdf1_normalization(pdf1log,lb,ub,tolerr,name); 52 | test_pdf2_normalization(pdf2log,lb,ub,tolerr,name); 53 | test_rnd(pdfrnd,pdf1,a,b,n,tolrmse,name); 54 | 55 | %% Test multivariate smoothbox distribution 56 | sigma = [0.7,0.45]; 57 | pdf1 = @(x) msmoothboxpdf(x,a(1),b(1),sigma(1)); 58 | pdf2 = @(x) msmoothboxpdf(x,a,b,sigma); 59 | pdf1log = @(x) exp(msmoothboxlogpdf(x,a(1),b(1),sigma(1))); 60 | pdf2log = @(x) exp(msmoothboxlogpdf(x,a,b,sigma)); 61 | pdfrnd = @(n) msmoothboxrnd(a,b,sigma,n); 62 | name = 'msmoothbox'; 63 | lb = [-5,-7]; 64 | ub = [5,0]; 65 | test_pdf1_normalization(pdf1,lb,ub,tolerr,name); 66 | test_pdf2_normalization(pdf2,lb,ub,tolerr,name); 67 | test_pdf1_normalization(pdf1log,lb,ub,tolerr,name); 68 | test_pdf2_normalization(pdf2log,lb,ub,tolerr,name); 69 | test_rnd(pdfrnd,pdf1,a,b,n,tolrmse,name); 70 | 71 | %close all; 72 | 73 | end 74 | 75 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 76 | function test_pdf1_normalization(pdf1,lb,ub,tol,name) 77 | %TEST_PDF1_NORMALIZATION Test normalization of univariate pdf. 78 | 79 | % Check 1D integral 80 | y = integral(@(x) pdf1(x), lb(1), ub(1), 'ArrayValued', true); 81 | fprintf('%s: 1D integral: %.6f\n', name, y); 82 | assert(abs(y - 1) < tol, ['Test error: univariate ' name ' does not integrate to 1.']); 83 | 84 | end 85 | 86 | function test_pdf2_normalization(pdf2,lb,ub,tol,name) 87 | %TEST_PDF2_NORMALIZATION Test normalization of bivariate pdf. 88 | 89 | % Check 2D integral 90 | y = integral2(@(x1,x2) reshape(pdf2([x1(:),x2(:)]),size(x1)), ... 91 | lb(1), ub(1), lb(2), ub(2)); 92 | fprintf('%s: 2D integral: %.6f\n', name, y); 93 | assert(abs(y - 1) < tol, ['Test error: bivariate ' name ' does not integrate to 1.']); 94 | 95 | end 96 | 97 | function test_rnd(pdfrnd,pdf1,a,b,n,tol,name) 98 | %TEST_RND Test random sample generation (histogram vs pdf). 99 | 100 | r = pdfrnd(n); 101 | h = histogram(r(:,1),100,'BinLimits',[a(1),b(1)],'Normalization','pdf'); 102 | x = 0.5*(h.BinEdges(1:end-1) + h.BinEdges(2:end))'; 103 | y = pdf1(x)'; 104 | rmse = sqrt(sum(((y - h.Values)).^2*h.BinWidth)); 105 | fprintf('%s: histogram rmse: %.6f\n', name, rmse); 106 | assert(rmse < tol, ['Test error: generated histogram does not match ' name ' pdf.']); 107 | 108 | end -------------------------------------------------------------------------------- /utils/covcma.m: -------------------------------------------------------------------------------- 1 | function [Sigma,x0] = covcma(X,y,x0,d,frac) 2 | %WCMA Weighted covariance matrix (inspired by CMA-ES). 3 | 4 | if nargin < 3; x0 = []; end 5 | if nargin < 4 || isempty(d); d = 'descend'; end 6 | if nargin < 5 || isempty(frac); frac = 0.5; end 7 | 8 | [N,D] = size(X); 9 | 10 | % Compute vector weights 11 | mu = frac*N; 12 | weights = zeros(1,1,floor(mu)); 13 | weights(1,1,:) = log(mu+1/2)-log(1:floor(mu)); 14 | weights = weights./sum(weights); 15 | 16 | % Compute top vectors 17 | [~,index] = sort(y,d); 18 | 19 | if isempty(x0) 20 | x0 = sum(bsxfun(@times,weights(:),X(index(1:floor(mu)),:)),1); 21 | end 22 | 23 | % Compute weighted covariance matrix wrt X0 24 | topx = bsxfun(@minus,X(index(1:floor(mu)),:),x0); 25 | Sigma = sum(bsxfun(@times,weights,topx'*topx),3); 26 | 27 | % % Rescale covariance matrix according to mean vector length 28 | % [E,lambda] = eig(C); 29 | % % [sqrt(diag(lambda))',jit] 30 | % lambda = diag(lambda) + jit.^2; 31 | % lambda = lambda/sum(lambda); 32 | % 33 | % % Square root of covariance matrix 34 | % sigma = diag(sqrt(lambda))*E'; 35 | % 36 | % % Rescale by current scale (reduced) 37 | % sigma = MeshSize*SearchFactor*sigma; 38 | % 39 | % % Random draw from multivariate normal 40 | % xs = bsxfun(@plus, x, randn(options.Nsearch,D)*sigma); 41 | 42 | end -------------------------------------------------------------------------------- /utils/eissample_lite.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acerbilab/vbmc/396d649c3490f1459828ac85f552482869edf41c/utils/eissample_lite.m -------------------------------------------------------------------------------- /utils/evalbool.m: -------------------------------------------------------------------------------- 1 | function tf = evalbool(s) 2 | %EVALBOOL Evaluate argument to a bool 3 | 4 | if ~ischar(s) % S may not and cannot be empty 5 | tf = s; 6 | 7 | else % Evaluation of string S 8 | if strncmpi(s, 'yes', 3) || strncmpi(s, 'on', 2) ... 9 | || strncmpi(s, 'true', 4) || strncmp(s, '1 ', 2) 10 | tf = 1; 11 | elseif strncmpi(s, 'no', 2) || strncmpi(s, 'off', 3) ... 12 | || strncmpi(s, 'false', 5) || strncmp(s, '0 ', 2) 13 | tf = 0; 14 | else 15 | try tf = evalin('caller', s); catch 16 | error(['String value "' s '" cannot be evaluated']); 17 | end 18 | try tf ~= 0; catch 19 | error(['String value "' s '" cannot be evaluated reasonably']); 20 | end 21 | end 22 | 23 | end 24 | 25 | end -------------------------------------------------------------------------------- /utils/fminadam.m: -------------------------------------------------------------------------------- 1 | function [x,f,xtab,ftab,iter] = fminadam(fun,x0,LB,UB,TolFun,MaxIter,master_stepsize) 2 | %FMINADAM Function minimization via a modified ADAM algorithm. 3 | 4 | if nargin < 3; LB = []; end 5 | if nargin < 4; UB = []; end 6 | if nargin < 5 || isempty(TolFun); TolFun = 0.001; end 7 | if nargin < 6 || isempty(MaxIter); MaxIter = 1e4; end 8 | if nargin < 7; master_stepsize = []; end 9 | 10 | % Assign default parameters 11 | master_stepsize_default.max = 0.1; 12 | master_stepsize_default.min = 0.001; 13 | master_stepsize_default.decay = 200; 14 | for f = fields(master_stepsize_default)' 15 | if ~isfield(master_stepsize,f{:}) || isempty(master_stepsize.(f{:})) 16 | master_stepsize.(f{:}) = master_stepsize_default.(f{:}); 17 | end 18 | end 19 | 20 | %% Adam with momentum 21 | fudge_factor = sqrt(eps); 22 | beta1 = 0.9; 23 | beta2 = 0.999; 24 | batchsize = 20; 25 | TolX = 0.001; 26 | TolX_max = 0.1; 27 | TolFun_max = TolFun*100; 28 | 29 | MinIter = batchsize*2; 30 | 31 | nvars = numel(x0); 32 | if isempty(LB); LB = -Inf(nvars,1); end 33 | if isempty(UB); UB = Inf(nvars,1); end 34 | 35 | m = 0; v = 0; 36 | %xtab = zeros(nvars,batchsize*2); 37 | xtab = zeros(nvars,MaxIter); 38 | 39 | x = x0(:); 40 | ftab = NaN(1,MaxIter); 41 | 42 | for iter = 1:MaxIter 43 | idx = mod(iter-1,batchsize*2) + 1; 44 | isMinibatchEnd = mod(iter,batchsize) == 0; 45 | 46 | %if mod(iter,100) == 0; fprintf('%d..',iter); end 47 | 48 | [ftab(iter),grad] = fun(x); 49 | grad = grad(:); 50 | 51 | m = beta1 * m + (1-beta1) * grad; 52 | v = beta2 * v + (1-beta2) * grad.^2; 53 | mhat = m / (1-beta1^iter); 54 | vhat = v / (1-beta2^iter); 55 | 56 | stepsize = master_stepsize.min + ... 57 | (master_stepsize.max - master_stepsize.min)*exp(-iter/master_stepsize.decay); 58 | 59 | x = x - stepsize .* mhat ./(sqrt(vhat) + fudge_factor); % update 60 | x = min(max(x,LB(:)),UB(:)); 61 | 62 | % xtab(:,idx) = x; % Store X 63 | xtab(:,iter) = x; % Store X 64 | 65 | if isMinibatchEnd && iter >= MinIter 66 | xxp = linspace(-(batchsize-1)/2,(batchsize-1)/2,batchsize); 67 | [p,S] = polyfit(xxp,ftab(iter-batchsize+1:iter),1); 68 | slope = p(1); 69 | Rinv = inv(S.R); A = (Rinv*Rinv')*S.normr^2/S.df; 70 | slope_err = sqrt(A(1,1) + TolFun^2); 71 | slope_err_max = sqrt(A(1,1) + TolFun_max^2); 72 | 73 | % Check random walk distance as termination condition 74 | %dx = sqrt(sum((mean(xtab(:,1:batchsize),2) - mean(xtab(:,batchsize+(1:batchsize)),2)).^2/batchsize,1)); 75 | dx = sqrt(sum((mean(xtab(:,iter-batchsize+1:iter),2) - mean(xtab(:,(iter-batchsize+1:iter)-batchsize),2)).^2/batchsize,1)); 76 | 77 | % Termination conditions 78 | if ( dx < TolX && abs(slope) 1; xtab = xtab'; end % Transpose -------------------------------------------------------------------------------- /utils/psycho_gen.m: -------------------------------------------------------------------------------- 1 | function R=psycho_gen(theta,S) 2 | %PSYCHO_GEN Generate responses for psychometric function model. 3 | % R=PSYCHO_GEN(THETA,S) generates responses in a simple orientation 4 | % discrimination task, where S is a vector of stimulus orientations (in 5 | % deg) for each trial, and THETA is a model parameter vector, with 6 | % THETA(1) as eta=log(sigma), the log of the sensory noise; THETA(2) the 7 | % bias term; THETA(3) is the lapse rate. The returned vector of responses 8 | % per trial reports 1 for "rightwards" and -1 for "leftwards". 9 | % 10 | % See Section 5.2 of the manuscript for more details on the model. 11 | % 12 | % Note that this model is very simple and used only for didactic purposes; 13 | % one should use the analytical log-likelihood whenever available. 14 | 15 | % Luigi Acerbi, 2020 16 | 17 | sigma = exp(theta(1)); 18 | bias = theta(2); 19 | lapse = theta(3); 20 | 21 | %% Noisy measurement 22 | 23 | % Ass Gaussian noise to true orientations S to simulate noisy measurements 24 | X = S + sigma*randn(size(S)); 25 | 26 | %% Decision rule 27 | 28 | % The response is 1 for "rightwards" if the internal measurement is larger 29 | % than the BIAS term; -1 for "leftwards" otherwise 30 | R = zeros(size(S)); 31 | R(X >= bias) = 1; 32 | R(X < bias) = -1; 33 | 34 | %% Lapses 35 | 36 | % Choose trials in which subject lapses; response there is given at chance 37 | lapse_idx = rand(size(S)) < lapse; 38 | 39 | % Random responses (equal probability of 1 or -1) 40 | lapse_val = randi(2,[sum(lapse_idx),1])*2-3; 41 | R(lapse_idx) = lapse_val; 42 | 43 | end -------------------------------------------------------------------------------- /utils/quantile1.m: -------------------------------------------------------------------------------- 1 | function y = quantile1(x,p) 2 | %QUANTILE1 Quantile of a vector. 3 | % Y = PRCTILE(X,P) returns percentiles of the values in X. P is a scalar 4 | % or a vector of percent values. When X is a vector, Y is the same size 5 | % as P, and Y(i) contains the P(i)-th percentile. When X is a matrix, 6 | % the i-th row of Y contains the P(i)-th percentiles of each column of X. 7 | % For N-D arrays, PRCTILE operates along the first non-singleton 8 | % dimension. 9 | % 10 | % Percentiles are specified using percentages, from 0 to 100. For an N 11 | % element vector X, PRCTILE computes percentiles as follows: 12 | % 1) The sorted values in X are taken as the 100*(0.5/N), 100*(1.5/N), 13 | % ..., 100*((N-0.5)/N) percentiles. 14 | % 2) Linear interpolation is used to compute percentiles for percent 15 | % values between 100*(0.5/N) and 100*((N-0.5)/N) 16 | % 3) The minimum or maximum values in X are assigned to percentiles 17 | % for percent values outside that range. 18 | % 19 | % PRCTILE treats NaNs as missing values, and removes them. 20 | % 21 | % Examples: 22 | % y = prctile(x,50); % the median of x 23 | % y = prctile(x,[2.5 25 50 75 97.5]); % a useful summary of x 24 | % 25 | % See also IQR, MEDIAN, NANMEDIAN, QUANTILE. 26 | 27 | % Copyright 1993-2016 The MathWorks, Inc. 28 | 29 | % If X is empty, return all NaNs. 30 | if isempty(x) 31 | y = nan(size(p),'like',x); 32 | else 33 | % Drop X's leading singleton dims, and combine its trailing dims. This 34 | % leaves a matrix, and we can work along columns. 35 | x = x(:); 36 | 37 | x = sort(x,1); 38 | n = sum(~isnan(x), 1); % Number of non-NaN values 39 | 40 | if isequal(p,0.5) % make the median fast 41 | if rem(n,2) % n is odd 42 | y = x((n+1)/2,:); 43 | else % n is even 44 | y = (x(n/2,:) + x(n/2+1,:))/2; 45 | end 46 | else 47 | r = p*n; 48 | k = floor(r+0.5); % K gives the index for the row just before r 49 | kp1 = k + 1; % K+1 gives the index for the row just after r 50 | r = r - k; % R is the ratio between the K and K+1 rows 51 | 52 | % Find indices that are out of the range 1 to n and cap them 53 | k(k<1 | isnan(k)) = 1; 54 | kp1 = bsxfun( @min, kp1, n ); 55 | 56 | % Use simple linear interpolation for the valid percentages 57 | y = (0.5+r).*x(kp1,:)+(0.5-r).*x(k,:); 58 | 59 | % Make sure that values we hit exactly are copied rather than interpolated 60 | exact = (r==-0.5); 61 | if any(exact) 62 | y(exact,:) = x(k(exact),:); 63 | end 64 | 65 | % Make sure that identical values are copied rather than interpolated 66 | same = (x(k,:)==x(kp1,:)); 67 | if any(same(:)) 68 | x = x(k,:); % expand x 69 | y(same) = x(same); 70 | end 71 | 72 | end 73 | 74 | end 75 | 76 | end -------------------------------------------------------------------------------- /utils/softbndloss.m: -------------------------------------------------------------------------------- 1 | function [y,dy] = softbndloss(x,slb,sub,TolCon) 2 | %SOFTBNDLOSS Loss function for soft bounds for function minimization. 3 | 4 | % Penalization relative scale 5 | if nargin < 4 || isempty(TolCon); TolCon = 1e-3; end 6 | 7 | compute_grad = nargout > 1; % Compute gradient only if requested 8 | 9 | ell = (sub - slb).*TolCon; 10 | 11 | y = 0; 12 | dy = zeros(size(x)); 13 | 14 | idx = x < slb; 15 | if any(idx) 16 | y = y + 0.5*sum(((slb(idx) - x(idx))./ell(idx)).^2); 17 | if compute_grad 18 | dy(idx) = (x(idx) - slb(idx))./ell(idx).^2; 19 | end 20 | end 21 | 22 | idx = x > sub; 23 | if any(idx) 24 | y = y + 0.5*sum(((x(idx) - sub(idx))./ell(idx)).^2); 25 | if compute_grad 26 | dy(idx) = (x(idx) - sub(idx))./ell(idx).^2; 27 | end 28 | end 29 | 30 | end -------------------------------------------------------------------------------- /utils/sq_dist.m: -------------------------------------------------------------------------------- 1 | % sq_dist - a function to compute a matrix of all pairwise squared distances 2 | % between two sets of vectors, stored in the columns of the two matrices, a 3 | % (of size D by n) and b (of size D by m). If only a single argument is given 4 | % or the second matrix is empty, the missing matrix is taken to be identical 5 | % to the first. 6 | % 7 | % Usage: C = sq_dist(a, b) 8 | % or: C = sq_dist(a) or equiv.: C = sq_dist(a, []) 9 | % 10 | % Where a is of size Dxn, b is of size Dxm (or empty), C is of size nxm. 11 | % 12 | % Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2010-12-13. 13 | 14 | function C = sq_dist(a, b) 15 | 16 | if nargin<1 || nargin>3 || nargout>1, error('Wrong number of arguments.'); end 17 | bsx = exist('bsxfun','builtin'); % since Matlab R2007a 7.4.0 and Octave 3.0 18 | if ~bsx, bsx = exist('bsxfun'); end % bsxfun is not yet "builtin" in Octave 19 | [D, n] = size(a); 20 | 21 | % Computation of a^2 - 2*a*b + b^2 is less stable than (a-b)^2 because numerical 22 | % precision can be lost when both a and b have very large absolute value and the 23 | % same sign. For that reason, we subtract the mean from the data beforehand to 24 | % stabilise the computations. This is OK because the squared error is 25 | % independent of the mean. 26 | if nargin==1 % subtract mean 27 | mu = mean(a,2); 28 | if bsx 29 | a = bsxfun(@minus,a,mu); 30 | else 31 | a = a - repmat(mu,1,size(a,2)); 32 | end 33 | b = a; m = n; 34 | else 35 | [d, m] = size(b); 36 | if d ~= D, error('Error: column lengths must agree.'); end 37 | mu = (m/(n+m))*mean(b,2) + (n/(n+m))*mean(a,2); 38 | if bsx 39 | a = bsxfun(@minus,a,mu); b = bsxfun(@minus,b,mu); 40 | else 41 | a = a - repmat(mu,1,n); b = b - repmat(mu,1,m); 42 | end 43 | end 44 | 45 | if bsx % compute squared distances 46 | C = bsxfun(@plus,sum(a.*a,1)',bsxfun(@minus,sum(b.*b,1),2*a'*b)); 47 | else 48 | C = repmat(sum(a.*a,1)',1,m) + repmat(sum(b.*b,1),n,1) - 2*a'*b; 49 | end 50 | C = max(C,0); % numerical noise can cause C to negative i.e. C > -1e-14 51 | -------------------------------------------------------------------------------- /utils/unscent_warp.m: -------------------------------------------------------------------------------- 1 | function [xw,sigmaw,xu] = unscent_warp(fun,x,sigma) 2 | %UNSCENT_WARP Unscented transform (coordinate-wise only). 3 | 4 | [N1,D] = size(x); 5 | [N2,D2] = size(sigma); 6 | 7 | N = max(N1,N2); 8 | 9 | if N1 ~= N && N1 ~= 1; error('Mismatch between rows of X and SIGMA.'); end 10 | if N2 ~= N && N2 ~= 1; error('Mismatch between rows of X and SIGMA.'); end 11 | if D ~= D2; error('Mismatch between columns of X and SIGMA.'); end 12 | 13 | if N1 == 1 && N > 1; x = repmat(x,[N,1]); end 14 | if N2 == 1 && N > 1; sigma = repmat(sigma,[N,1]); end 15 | 16 | U = 2*D+1; % # unscented points 17 | 18 | x3(1,:,:) = x; 19 | xx = repmat(x3,[U,1,1]); 20 | 21 | for d = 1:D 22 | sigma3(1,:,1) = sqrt(D)*sigma(:,d); 23 | xx(2*d,:,d) = bsxfun(@plus,xx(2*d,:,d),sigma3); 24 | xx(2*d+1,:,d) = bsxfun(@minus,xx(2*d+1,:,d),sigma3); 25 | end 26 | 27 | xu = reshape(fun(reshape(xx,[N*U,D])),[U,N,D]); 28 | 29 | if N > 1 30 | xw(:,:) = mean(xu,1); 31 | sigmaw(:,:) = std(xu,[],1); 32 | else 33 | xw(1,:) = mean(xu,1); 34 | sigmaw(1,:) = std(xu,[],1); 35 | end 36 | 37 | end -------------------------------------------------------------------------------- /vbmc_isavp.m: -------------------------------------------------------------------------------- 1 | function tf = vbmc_isavp(vp) 2 | %VBMC_ISAVP True for VBMC variational posterior structures. 3 | % VBMC_ISAVP returns true if VP is a variational posterior structure 4 | % returned by VBMC and false otherwise. 5 | % 6 | % See also VBMC. 7 | 8 | if isstruct(vp) 9 | 10 | tf = true; 11 | 12 | % Required fields for variational posterior 13 | vpfields = {'D','K','w','mu','sigma','lambda','trinfo', ... 14 | 'optimize_mu','optimize_sigma','optimize_lambda','optimize_weights','bounds'}; 15 | 16 | % Check that VP has all the required fields, otherwise quit 17 | ff = fields(vp); 18 | for iField = 1:numel(vpfields) 19 | if ~any(strcmp(vpfields{iField},ff)) 20 | tf = false; 21 | break; 22 | end 23 | end 24 | 25 | else 26 | tf = false; 27 | end -------------------------------------------------------------------------------- /vbmc_kldiv.m: -------------------------------------------------------------------------------- 1 | function [kls,xx1,xx2] = vbmc_kldiv(vp1,vp2,Ns,gaussflag) 2 | %VBMC_KLDIV Kullback-Leibler divergence between two variational posteriors. 3 | % KLS = VBMC_KLDIV(VP1,VP2) returns an estimate of the (asymmetric) 4 | % Kullback-Leibler (KL) divergence between two variational posterior 5 | % distributions VP1 and VP2. KLS is a 2-element vector whose first element 6 | % is KL(VP1||VP2) and the second element is KL(VP2||VP1). The symmetrized 7 | % KL divergence can be computed as mean(KLS). 8 | % 9 | % KLS = VBMC_KLDIV(VP1,VP2,NS) uses NS random draws to estimate each 10 | % KL divergence (default NS=1e5). 11 | % 12 | % KLS = VBMC_KLDIV(VP1,VP2,NS,GAUSSFLAG) computes the "Gaussianized" 13 | % KL-divergence if GAUSSFLAG=1, that is the KL divergence between two 14 | % multivariate normal distibutions with the same moments as the variational 15 | % posteriors given as inputs. Otherwise, the standard KL-divergence is 16 | % returned for GAUSSFLAG=0 (default). 17 | % 18 | % [KLS,XX1,XX2] = VBMC_KLDIV(...) returns NS samples from the variational 19 | % posteriors VP1 and VP2 as, respectively, NS-by-D matrices XX1 and XX2, 20 | % where D is the dimensionality of the problem. 21 | % 22 | % If GAUSSFLAG is 1, VP1 and/or VP2 can be N-by-D matrices of samples 23 | % from variational posteriors (they do not need have the same number 24 | % of samples). 25 | % 26 | % See also VBMC, VBMC_MTV, VBMC_PDF, VBMC_RND, VBMC_DIAGNOSTICS. 27 | 28 | if nargin < 3 || isempty(Ns); Ns = 1e5; end 29 | if nargin < 4 || isempty(gaussflag); gaussflag = false; end 30 | 31 | % This was removed because the comparison *has* to be in original space, 32 | % given that the transform might change for distinct variational posteriors 33 | % if nargin < 5 || isempty(origflag); origflag = true; end 34 | origflag = true; 35 | 36 | kls = NaN(1,2); 37 | 38 | if ~gaussflag && (~vbmc_isavp(vp1) || ~vbmc_isavp(vp2)) 39 | error('vbmc_kldiv:WrongInputs', ... 40 | 'Unless the KL divergence is Gaussianized, VP1 and VP2 need to be variational posteriors.'); 41 | end 42 | 43 | %try 44 | if gaussflag 45 | if Ns == 0 % Analytical calculation 46 | if origflag 47 | error('vbmc_kldiv:NoAnalyticalMoments', ... 48 | 'Analytical moments are available only for the transformed space.') 49 | end 50 | [q1mu,q1sigma] = vbmc_moments(vp1,0); 51 | [q2mu,q2sigma] = vbmc_moments(vp2,0); 52 | xx1 = []; xx2 = []; 53 | else % Numerical moments 54 | if vbmc_isavp(vp1) 55 | [q1mu,q1sigma] = vbmc_moments(vp1,origflag,Ns); 56 | else 57 | q1mu = mean(vp1,1); 58 | q1sigma = cov(vp1); 59 | end 60 | if vbmc_isavp(vp2) 61 | [q2mu,q2sigma] = vbmc_moments(vp2,origflag,Ns); 62 | else 63 | q2mu = mean(vp2,1); 64 | q2sigma = cov(vp2); 65 | end 66 | end 67 | [kls(1),kls(2)] = mvnkl(q1mu,q1sigma,q2mu,q2sigma); 68 | 69 | else 70 | MINP = realmin; 71 | 72 | xx1 = vbmc_rnd(vp1,Ns,origflag,1); 73 | q1 = vbmc_pdf(vp1,xx1,origflag); 74 | q2 = vbmc_pdf(vp2,xx1,origflag); 75 | q1(q1 == 0 | ~isfinite(q1)) = 1; % Ignore these points 76 | q2(q2 == 0 | ~isfinite(q2)) = MINP; 77 | kls(1) = -mean(log(q2) - log(q1)); 78 | 79 | xx2 = vbmc_rnd(vp2,Ns,origflag,1); 80 | q1 = vbmc_pdf(vp1,xx2,origflag); 81 | q2 = vbmc_pdf(vp2,xx2,origflag); 82 | q1(q1 == 0 | ~isfinite(q1)) = MINP; 83 | q2(q2 == 0 | ~isfinite(q2)) = 1; % Ignore these points 84 | kls(2) = -mean(log(q1) - log(q2)); 85 | 86 | end 87 | 88 | kls = max(kls,0); % Correct for numerical errors 89 | 90 | %catch 91 | 92 | % Could not compute KL divs 93 | 94 | %end -------------------------------------------------------------------------------- /vbmc_mode.m: -------------------------------------------------------------------------------- 1 | function [x,vp] = vbmc_mode(vp,nmax,origflag) 2 | %VBMC_MODE Find mode of VBMC posterior approximation. 3 | % X = VBMC_PDF(VP) returns the mode of the variational posterior VP. 4 | % 5 | % X = VBMC_PDF(VP,ORIGFLAG) returns the mode of the variational posterior 6 | % in the original parameter space if ORIGFLAG=1 (default), or in the 7 | % transformed VBMC space if ORIGFLAG=0. The two modes are generally not 8 | % equivalent, under a nonlinear transformation of variables. 9 | % 10 | % [X,VP] = VBMC_PDF(...) returns the variational posterior with the mode 11 | % stored in the VP struct. 12 | % 13 | % See also VBMC, VBMC_MOMENTS, VBMC_PDF. 14 | 15 | if nargin < 2 || isempty(nmax); nmax = 20; end 16 | if nargin < 3 || isempty(origflag); origflag = true; end 17 | 18 | if origflag && isfield(vp,'mode') && ~isempty(vp.mode) 19 | x = vp.mode; 20 | else 21 | x0_mat = vp.mu'; 22 | 23 | if nmax < vp.K 24 | y0_vec = nlnpdf(x0_mat); % First, evaluate pdf at all modes 25 | % Start from first NMAX solutions 26 | [~,ord] = sort(y0_vec,'ascend'); 27 | x0_mat = x0_mat(ord(1:nmax),:); 28 | end 29 | 30 | xmin = zeros(size(x0_mat,1),vp.D); 31 | ff = Inf(size(x0_mat,1),1); 32 | 33 | for k = 1:size(x0_mat,1) 34 | x0 = x0_mat(k,:); 35 | if origflag; x0 = warpvars_vbmc(x0,'inv',vp.trinfo); end 36 | 37 | if origflag 38 | opts = optimoptions('fmincon','GradObj','off','Display','off'); 39 | LB = vp.trinfo.lb_orig + sqrt(eps); 40 | UB = vp.trinfo.ub_orig - sqrt(eps); 41 | x0 = min(max(x0,LB),UB); 42 | [xmin(k,:),ff(k)] = fmincon(@nlnpdf,x0,[],[],[],[],LB,UB,[],opts); 43 | else 44 | opts = optimoptions('fminunc','GradObj','off','Display','off'); 45 | [xmin(k,:),ff(k)] = fminunc(@nlnpdf,x0,opts); 46 | end 47 | end 48 | 49 | [fval,idx] = min(ff); 50 | 51 | % Get mode and store it 52 | x = xmin(idx,:); 53 | if nargout > 1 && origflag 54 | vp.mode = x; 55 | end 56 | end 57 | 58 | function [y,dy] = nlnpdf(x) 59 | %NLNPDF Negative log posterior pdf and its gradient. 60 | if nargout > 1 61 | [y,dy] = vbmc_pdf(vp,x,origflag,1); 62 | y = -y; dy = -dy; 63 | else 64 | y = -vbmc_pdf(vp,x,origflag,1); 65 | end 66 | end 67 | end -------------------------------------------------------------------------------- /vbmc_moments.m: -------------------------------------------------------------------------------- 1 | function [mubar,Sigma] = vbmc_moments(vp,origflag,Ns) 2 | %VBMC_MOMENTS Compute moments of variational posterior. 3 | % [MU,SIGMA] = VBMC_MOMENTS(VP) computes the mean MU and covariance 4 | % matrix SIGMA of the variational posterior VP via Monte Carlo sampling. 5 | % 6 | % [...] = VBMC_MOMENTS(VP,ORIGFLAG) computes the moments of the 7 | % variational posterior VP in the original problem space if ORIGFLAG=1 8 | % (default), or in the transformed VBMC space if ORIGFLAG=0. In the 9 | % transformed space, the moments are computed analytically. 10 | % 11 | % [...] = VBMC_MOMENTS(VP,1,NS) uses NS samples to evaluate the moments 12 | % of the variational posterior in the original space (default NS=1e6). 13 | % 14 | % See also VBMC, VBMC_MODE, VBMC_PDF, VBMC_RND. 15 | 16 | if nargin < 2 || isempty(origflag); origflag = true; end 17 | if nargin < 3 || isempty(Ns); Ns = 1e6; end 18 | 19 | covflag = nargout > 1; % Compute covariance? 20 | 21 | K = vp.K; 22 | 23 | if origflag 24 | X = vbmc_rnd(vp,Ns,1,1); 25 | mubar = mean(X,1); 26 | if covflag 27 | Sigma = cov(X); 28 | end 29 | else 30 | w(1,:) = vp.w; % Mixture weights 31 | mu(:,:) = vp.mu; 32 | 33 | mubar = sum(bsxfun(@times,w,mu),2); 34 | 35 | if covflag 36 | sigma(1,:) = vp.sigma; 37 | lambda(:,1) = vp.lambda(:); 38 | 39 | Sigma = sum(w.*sigma.^2)*diag(lambda.^2); 40 | for k = 1:K; Sigma = Sigma + w(k)*(mu(:,k)-mubar)*(mu(:,k)-mubar)'; end 41 | end 42 | 43 | mubar = mubar(:)'; % Return row vector 44 | end -------------------------------------------------------------------------------- /vbmc_mtv.m: -------------------------------------------------------------------------------- 1 | function [mtv,xx1,xx2] = vbmc_mtv(vp1,vp2,Ns) 2 | %VBMC_MTV Marginal Total Variation distances between two variational posteriors. 3 | % MTV = VBMC_MTV(VP1,VP2) returns an estimate of the marginal total 4 | % variation distances between two variational posterior distributions VP1 5 | % and VP2. MTV is a D-element vector whose elements are the total variation 6 | % distance between the marginal distributions of VP1 and VP2, for each 7 | % coordinate dimension. 8 | % 9 | % The total variation distance between two densities p1 and p2 is: 10 | % TV(p1, p2) = 1/2 \int | p1(x) - p2(x) | dx 11 | % 12 | % MTV = VBMC_MTV(VP1,VP2,NS) uses NS random draws to estimate the MTV 13 | % (default NS=1e5). 14 | % 15 | % [MTV,XX1,XX2] = VBMC_MTV(...) returns NS samples from the variational 16 | % posteriors VP1 and VP2 as, respectively, NS-by-D matrices XX1 and XX2, 17 | % where D is the dimensionality of the problem. 18 | % 19 | % VP1 and/or VP2 can be N-by-D matrices of samples from variational 20 | % posteriors (they do not need have the same number of samples). 21 | % 22 | % See also VBMC, VBMC_KLDIV, VBMC_PDF, VBMC_RND, VBMC_DIAGNOSTICS. 23 | 24 | if nargin < 3 || isempty(Ns); Ns = 1e5; end 25 | 26 | % This was removed because the comparison *has* to be in original space, 27 | % given that the transform might change for distinct variational posteriors 28 | % if nargin < 4 || isempty(origflag); origflag = true; end 29 | origflag = true; 30 | 31 | if vbmc_isavp(vp1) 32 | xx1 = vbmc_rnd(vp1,Ns,origflag,1); 33 | lb1 = vp1.trinfo.lb_orig; 34 | ub1 = vp1.trinfo.ub_orig; 35 | else 36 | xx1 = vp1; 37 | lb1 = -Inf(1,size(vp1,2)); 38 | ub1 = Inf(1,size(vp1,2)); 39 | end 40 | if vbmc_isavp(vp2) 41 | xx2 = vbmc_rnd(vp2,Ns,origflag,1); 42 | lb2 = vp2.trinfo.lb_orig; 43 | ub2 = vp2.trinfo.ub_orig; 44 | else 45 | xx2 = vp2; 46 | lb2 = -Inf(1,size(vp2,2)); 47 | ub2 = Inf(1,size(vp2,2)); 48 | end 49 | 50 | D = size(xx1,2); 51 | nkde = 2^13; 52 | mtv = zeros(1,D); 53 | 54 | % Set bounds for kernel density estimate 55 | lb1_xx = min(xx1); ub1_xx = max(xx1); 56 | range1 = ub1_xx - lb1_xx; 57 | lb1 = max(lb1_xx-range1/10,lb1); 58 | ub1 = min(ub1_xx+range1/10,ub1); 59 | 60 | lb2_xx = min(xx2); ub2_xx = max(xx2); 61 | range2 = ub2_xx - lb2_xx; 62 | lb2 = max(lb2_xx-range2/10,lb2); 63 | ub2 = min(ub2_xx+range2/10,ub2); 64 | 65 | % Compute marginal total variation 66 | for i = 1:D 67 | [~,yy1,x1mesh] = kde1d(xx1(:,i),nkde,lb1(i),ub1(i)); 68 | yy1 = yy1/(qtrapz(yy1)*(x1mesh(2)-x1mesh(1))); % Ensure normalization 69 | 70 | [~,yy2,x2mesh] = kde1d(xx2(:,i),nkde,lb2(i),ub2(i)); 71 | yy2 = yy2/(qtrapz(yy2)*(x2mesh(2)-x2mesh(1))); % Ensure normalization 72 | 73 | f = @(x) abs(interp1(x1mesh,yy1,x,'spline',0) - interp1(x2mesh,yy2,x,'spline',0)); 74 | bb = sort([x1mesh([1,end]),x2mesh([1,end])]); 75 | for j = 1:3 76 | xx_range = linspace(bb(j),bb(j+1),1e5); 77 | mtv(i) = mtv(i) + 0.5*qtrapz(f(xx_range))*(xx_range(2)-xx_range(1)); 78 | end 79 | end 80 | 81 | -------------------------------------------------------------------------------- /vbmc_pdf.m: -------------------------------------------------------------------------------- 1 | function [y,dy] = vbmc_pdf(vp,X,origflag,logflag,transflag,df) 2 | %VBMC_PDF Probability density function of VBMC posterior approximation. 3 | % Y = VBMC_PDF(VP,X) returns the probability density of the variational 4 | % posterior VP evaluated at each row of X. Rows of the N-by-D matrix X 5 | % correspond to observations or points, and columns correspond to variables 6 | % or coordinates. Y is an N-by-1 vector. 7 | % 8 | % Y = VBMC_PDF(VP,X,ORIGFLAG) returns the value of the posterior density 9 | % evaluated in the original parameter space for ORIGFLAG=1 (default), or 10 | % in the transformed VBMC space if ORIGFLAG=0. 11 | % 12 | % Y = VBMC_PDF(VP,X,ORIGFLAG,LOGFLAG) returns the value of the log 13 | % posterior density if LOGFLAG=1, otherwise the posterior density for 14 | % LOGFLAG=0 (default). 15 | % 16 | % Y = VBMC_PDF(VP,X,ORIGFLAG,LOGFLAG,TRANSFLAG) for TRANSFLAG=1 assumes 17 | % that X is already specified in tranformed VBMC space. Otherwise, X is 18 | % specified in the original parameter space (default TRANSFLAG=0). 19 | % 20 | % Y = VBMC_PDF(VP,X,ORIGFLAG,LOGFLAG,TRANSFLAG,DF) returns the probability 21 | % density of an heavy-tailed version of the variational posterior, 22 | % in which the multivariate normal components have been replaced by 23 | % multivariate t-distributions with DF degrees of freedom. The default is 24 | % DF=Inf, limit in which the t-distribution becomes a multivariate normal. 25 | % 26 | % See also VBMC, VBMC_RND. 27 | 28 | if nargin < 3 || isempty(origflag); origflag = true; end 29 | if nargin < 4 || isempty(logflag); logflag = false; end 30 | if nargin < 5 || isempty(transflag); transflag = false; end 31 | if nargin < 6 || isempty(df); df = Inf; end 32 | 33 | gradflag = nargout > 1; % Compute gradient 34 | 35 | % Convert points to transformed space 36 | if origflag && ~isempty(vp.trinfo) && ~transflag 37 | % Xold = X; 38 | X = warpvars_vbmc(X,'dir',vp.trinfo); 39 | end 40 | 41 | [N,D] = size(X); 42 | K = vp.K; % Number of components 43 | w = vp.w; % Mixture weights 44 | lambda = vp.lambda(:)'; % LAMBDA is a row vector 45 | 46 | mu_t(:,:) = vp.mu'; % MU transposed 47 | sigma(1,:) = vp.sigma; 48 | 49 | y = zeros(N,1); % Allocate probability vector 50 | if gradflag; dy = zeros(N,D); end 51 | 52 | if ~isfinite(df) || df == 0 53 | % Compute pdf of variational posterior 54 | 55 | % Common normalization factor 56 | nf = 1/(2*pi)^(D/2)/prod(lambda); 57 | 58 | for k = 1:K 59 | d2 = sum(bsxfun(@rdivide,bsxfun(@minus,X,mu_t(k,:)),sigma(k)*lambda).^2,2); 60 | nn = nf*w(k)/sigma(k)^D*exp(-0.5*d2); 61 | y = y + nn; 62 | if gradflag 63 | dy = dy - bsxfun(@times,nn, ... 64 | bsxfun(@rdivide,bsxfun(@minus,X,mu_t(k,:)),lambda.^2.*sigma(k)^2)); 65 | end 66 | end 67 | else 68 | % Compute pdf of heavy-tailed variant of variational posterior 69 | 70 | if df > 0 71 | % (This uses a multivariate t-distribution which is not the same thing 72 | % as the product of D univariate t-distributions) 73 | 74 | % Common normalization factor 75 | nf = exp(gammaln((df+D)/2) - gammaln(df/2))/(df*pi)^(D/2)/prod(lambda); 76 | 77 | for k = 1:K 78 | d2 = sum(bsxfun(@rdivide,bsxfun(@minus, X, mu_t(k,:)),sigma(k)*lambda).^2,2); 79 | nn = nf*w(k)/sigma(k)^D*(1+d2/df).^(-(df+D)/2); 80 | y = y + nn; 81 | if gradflag 82 | error('Gradient of heavy-tailed pdf not supported yet.'); 83 | dy = dy - bsxfun(@times,nn, ... 84 | bsxfun(@rdivide,bsxfun(@minus,X,mu_t(k,:)),lambda.^2.*sigma(k)^2)); 85 | end 86 | end 87 | else 88 | % (This uses a product of D univariate t-distributions) 89 | 90 | df = abs(df); 91 | 92 | % Common normalization factor 93 | nf = (exp(gammaln((df+1)/2) - gammaln(df/2))/sqrt(df*pi))^D/prod(lambda); 94 | 95 | for k = 1:K 96 | d2 = bsxfun(@rdivide,bsxfun(@minus, X, mu_t(k,:)),sigma(k)*lambda).^2; 97 | nn = nf*w(k)/sigma(k)^D*prod((1+d2/df).^(-(df+1)/2),2); 98 | y = y + nn; 99 | if gradflag 100 | error('Gradient of heavy-tailed pdf not supported yet.'); 101 | end 102 | end 103 | end 104 | 105 | end 106 | 107 | if logflag 108 | if gradflag; dy = bsxfun(@rdivide,dy,y); end 109 | y = log(y); 110 | end 111 | 112 | % Apply Jacobian correction 113 | if origflag && ~isempty(vp.trinfo) 114 | if logflag 115 | y = y - warpvars_vbmc(X,'logprob',vp.trinfo); 116 | if gradflag 117 | error('vbmc_pdf:NoOriginalGrad',... 118 | 'Gradient computation in original space not supported yet.'); 119 | dy = dy - warpvars_vbmc(X,'g',vp.trinfo); 120 | end 121 | else 122 | y = y ./ warpvars_vbmc(X,'prob',vp.trinfo); 123 | end 124 | end 125 | 126 | end -------------------------------------------------------------------------------- /vbmc_plot.m: -------------------------------------------------------------------------------- 1 | function vbmc_plot(vp_array,stats) 2 | 3 | if nargin < 2; stats = []; end 4 | 5 | Nsamples = 1e5; 6 | 7 | if ~iscell(vp_array) 8 | temp{1} = vp_array; 9 | vp_array = temp; 10 | end 11 | 12 | if numel(vp_array) == 1 && vbmc_isavp(vp_array{1}) 13 | X = vbmc_rnd(vp_array{1},Nsamples); 14 | for d = 1:size(X,2); names{d} = ['x_{' num2str(d) '}']; end 15 | cornerplot(X,names); 16 | else 17 | Nbins = 40; 18 | Nvps = numel(vp_array); 19 | D = vp_array{1}.D; 20 | mm = zeros(Nvps,D); 21 | cmap = colormap; 22 | cmap = cmap(mod((1:27:(1+27*64))-1,64)+1,:); 23 | 24 | plotmat = [1 1; 1 2; 1 3; 2 2; 2 3; 2 3; 2 4; 2 4; 3 3; 3 4; 3 4; 3 4; 3 5; 3 5; 3 5; 4 4; 4 5; 4 5; 4 5]; 25 | nrows = plotmat(D,1); 26 | ncols = plotmat(D,2); 27 | 28 | for i = 1:Nvps 29 | if ~isempty(stats) && stats.idx_best == i; best_flag = true; else; best_flag = false; end 30 | ltext{i} = ['vp #' num2str(i)]; 31 | if best_flag; ltext{i} = [ltext{i} ' (best)']; end 32 | 33 | X = vbmc_rnd(vp_array{i},Nsamples); 34 | mm(i,:) = median(X); 35 | 36 | for d = 1:D 37 | subplot(nrows,ncols,d); 38 | if best_flag; lw = 3; else; lw = 1; end 39 | hst(i)=histogram(X(:,d),Nbins,'Normalization','probability','Displaystyle','stairs','LineWidth',lw,'EdgeColor',cmap(i,:)); 40 | hold on; 41 | end 42 | end 43 | 44 | for i = 1:Nvps 45 | if ~isempty(stats) && stats.idx_best == i; best_flag = true; else; best_flag = false; end 46 | for d = 1:D 47 | subplot(nrows,ncols,d); 48 | if best_flag; lw = 3; else; lw = 1; end 49 | hln(i)=plot(mm(i,d)*[1 1],ylim,'-','LineWidth',lw,'Color',cmap(i,:)); 50 | hold on; 51 | end 52 | end 53 | 54 | 55 | for d = 1:D 56 | subplot(nrows,ncols,d); 57 | 58 | xlabel(['x_{' num2str(d) '}']); 59 | set(gca,'TickDir','out'); 60 | box off; 61 | 62 | if d == D 63 | hleg = legend(hln,ltext{:}); 64 | set(hleg,'box','off','location','best'); 65 | end 66 | 67 | end 68 | set(gcf,'Color','w'); 69 | 70 | end 71 | 72 | 73 | 74 | 75 | end -------------------------------------------------------------------------------- /vbmc_power.m: -------------------------------------------------------------------------------- 1 | function [vpp,lnZ] = vbmc_power(vp,n,cutoff) 2 | %VBMC_POWER Compute power posterior of variational approximation. 3 | 4 | if nargin < 3 || isempty(cutoff); cutoff = 1e-5; end 5 | if cutoff < 0; cutoff = 0; end 6 | 7 | vpp = vp; 8 | K = vp.K; 9 | 10 | if K > 1 11 | % For ease of reference in the code 12 | D = vp.D; 13 | w = vp.w; 14 | mu = vp.mu; 15 | sigma = vp.sigma; 16 | lambda = vp.lambda; 17 | 18 | % Power posterior parameters 19 | Kp = K^n; 20 | wp = zeros(1,Kp); 21 | mup = zeros(D,Kp); 22 | sigmap = zeros(1,Kp); 23 | end 24 | 25 | switch n 26 | case 1; lnZ = 0; return; 27 | case 2 28 | nf = 1/sqrt(2*pi)^D; 29 | 30 | % First, compute product posterior weights 31 | idx = 0; 32 | for i = 1:K 33 | for j = 1:K 34 | idx = idx + 1; 35 | sigmatilde2 = (sigma(i)^2+sigma(j).^2).*lambda.^2; 36 | wp(idx) = w(i)*w(j).*nf/prod(sqrt(sigmatilde2))*exp(-0.5*sum((mu(:,i)-mu(:,j)).^2./sigmatilde2,1)); 37 | end 38 | end 39 | 40 | Z = sum(wp); % Normalization constant 41 | lnZ = log(Z); 42 | wp = wp/Z; 43 | 44 | % Throw away components which sum below cutoff 45 | wp_sorted = sort(wp); 46 | wp_cum = cumsum(wp_sorted); 47 | idx_cut = sum(wp_cum < cutoff); 48 | if idx_cut > 0; w_cutoff = wp_sorted(idx_cut); else; w_cutoff = 0; end 49 | wp(wp <= w_cutoff) = 0; 50 | wp = wp/sum(wp); 51 | 52 | % Then, compute mean and variance for above-cutoff components only 53 | idx = 0; 54 | for i = 1:K 55 | for j = 1:K 56 | idx = idx + 1; 57 | if wp(idx) == 0; continue; end 58 | mup(:,idx) = (mu(:,i).*sigma(j)^2 + mu(:,j).*sigma(i)^2)./(sigma(i)^2+sigma(j)^2); 59 | sigmap(idx) = sigma(i)*sigma(j)/sqrt(sigma(i)^2+sigma(j)^2); 60 | end 61 | end 62 | 63 | otherwise 64 | error('vbmc_power:UnsupportedPower',... 65 | 'The power N should be a small positive integer. Currently supported values of N: 1 and 2.'); 66 | end 67 | 68 | % Keep only nonzero components 69 | keep_idx = wp > 0; 70 | wp_keep = wp(keep_idx); 71 | wp_keep = wp_keep/sum(wp_keep); 72 | 73 | vpp.K = sum(keep_idx); 74 | vpp.mu = mup(:,keep_idx); 75 | vpp.sigma = sigmap(keep_idx); 76 | vpp.w = wp_keep; 77 | if isfield(vpp,'temperature') && ~isempty(vpp.temperature) 78 | vpp.temperature = vpp.temperature/n; 79 | else 80 | vpp.temperature = 1/n; 81 | end 82 | 83 | 84 | --------------------------------------------------------------------------------