├── Data ├── Abalon.mat ├── DirectMarketing.mat ├── GermanCredit.mat ├── LabourForce.mat └── RealizedLibrary.mat ├── Example ├── CGVB_Logistics_Function_Handle.m ├── CGVB_Logistics_Function_Handle_AutoDiff.m ├── CGVB_Logistics_Model_Object.m ├── CGVB_Logistics_Model_Object_Simple.m ├── CGVB_VAR1_Function_Handle.m ├── CGVB_VAR1_Model_Object.m ├── MGVB_Logistics_Model_Object.m ├── NAGVAC_Logistics_Function_Handle.m ├── NAGVAC_Logistics_Model_Object.m ├── VAFC_Logistics_Function_Handle.m ├── VAFC_Logistics_Model_Object.m └── VAR1.m ├── README.md └── VBLab ├── MCMC └── MCMC.m ├── Models ├── LogisticRegression.m └── ModelClass.m ├── Utilities ├── Distribution.m ├── Normal.m ├── readData.m ├── trainTestSplit.m ├── utils_FNNInitialize.m ├── utils_errorMsg.m ├── utils_gen_Sobol.m ├── utils_itril.m ├── utils_itriu.m ├── utils_jitChol.m ├── utils_logNormalpdf.m ├── utils_logit.m ├── utils_normrnd_qmc.m ├── utils_plotShrinkage.m ├── utils_relu.m ├── utils_rqmc_rnd.m ├── utils_rs_multinomial.m ├── utils_sigmoid.m ├── utils_update_sigma.m ├── utils_vech.m ├── utils_vechinv.m └── vbayesPlot.m └── VB ├── CGVB.m ├── MGVB.m ├── NAGVAC.m ├── VAFC.m └── VBayesLab.m /Data/Abalon.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VBayesLab/VBLab/90c48af33e2011d21a48af1f95101e81b84098ee/Data/Abalon.mat -------------------------------------------------------------------------------- /Data/DirectMarketing.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VBayesLab/VBLab/90c48af33e2011d21a48af1f95101e81b84098ee/Data/DirectMarketing.mat -------------------------------------------------------------------------------- /Data/GermanCredit.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VBayesLab/VBLab/90c48af33e2011d21a48af1f95101e81b84098ee/Data/GermanCredit.mat -------------------------------------------------------------------------------- /Data/LabourForce.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VBayesLab/VBLab/90c48af33e2011d21a48af1f95101e81b84098ee/Data/LabourForce.mat -------------------------------------------------------------------------------- /Data/RealizedLibrary.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VBayesLab/VBLab/90c48af33e2011d21a48af1f95101e81b84098ee/Data/RealizedLibrary.mat -------------------------------------------------------------------------------- /Example/CGVB_Logistics_Function_Handle.m: -------------------------------------------------------------------------------- 1 | % Script to run the Example 3.4 using a built-in Logistic Regression class of 2 | % the VBLab package 3 | 4 | clear % Clear all variables 5 | clc % Clean workspace 6 | 7 | % Random seed to reproduce results 8 | rng(2020) 9 | 10 | % Load the LabourForce dataset 11 | labour = readData('LabourForce',... % Dataset name 12 | 'Type','Matrix',... % Store data as a 2D array (default) 13 | 'Intercept', true); % Add column of intercept (default) 14 | 15 | % Compute number of features 16 | n_features = size(labour,2)-1; 17 | 18 | % Additional Setting 19 | setting.Prior = [0,50]; 20 | 21 | % Initialize using MLE estimate (for quickly converging) 22 | X = labour(:,1:end-1); 23 | y = labour(:,end); 24 | theta_init = glmfit(X,y,'binomial','constant','off'); % initialise mu 25 | 26 | % Run CGVB 27 | Post_CGVB_manual = CGVB(@grad_h_func_logistic,labour,... 28 | 'NumParams',n_features,... 29 | 'Setting',setting,... 30 | 'LearningRate',0.002,... % Learning rate 31 | 'NumSample',50,... % Number of samples to estimate gradient of lowerbound 32 | 'MaxPatience',20,... % For Early stopping 33 | 'MaxIter',5000,... % Maximum number of iterations 34 | 'MeanInit',theta_init ,... % Randomly initialize parameters using 35 | 'GradWeight1',0.9,... % Momentum 1 36 | 'GradWeight2',0.9,... % Momentum 2 37 | 'WindowSize',10,... % Smoothing window for lowerbound 38 | 'GradientMax',10,... % For gradient clipping 39 | 'LBPlot',true); 40 | 41 | %% Plot variational distributions and lowerbound 42 | figure 43 | % Extract variation mean and variance 44 | mu_vb = Post_CGVB_manual.Post.mu; 45 | sigma2_vb = Post_CGVB_manual.Post.sigma2; 46 | 47 | % Plot the variational distribution for the first 8 parameters 48 | for i=1:8 49 | subplot(3,3,i) 50 | vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]}) 51 | grid on 52 | title(['\theta_',num2str(i)]) 53 | set(gca,'FontSize',15) 54 | end 55 | 56 | % Plot the smoothed lower bound 57 | subplot(3,3,9) 58 | plot(Post_CGVB_manual.Post.LB_smooth,'LineWidth',2) 59 | grid on 60 | title('Lower bound') 61 | set(gca,'FontSize',15) 62 | 63 | %% Define gradient of h function for Logistic regression 64 | % theta: Dx1 array 65 | % h_func: Scalar 66 | % h_func_grad: Dx1 array 67 | function [h_func_grad,h_func] = grad_h_func_logistic(data,theta,setting) 68 | 69 | % Extract additional settings 70 | d = length(theta); 71 | sigma2 = setting.Prior(2); 72 | 73 | % Extract data 74 | X = data(:,1:end-1); 75 | y = data(:,end); 76 | 77 | % Compute log likelihood 78 | aux = X*theta; 79 | llh = y.*aux-log(1+exp(aux)); 80 | llh = sum(llh); 81 | 82 | % Compute gradient of log likelihood 83 | ppi = 1./(1+exp(-aux)); 84 | llh_grad = X'*(y-ppi); 85 | 86 | % Compute log prior 87 | log_prior =-d/2*log(2*pi)-d/2*log(sigma2)-theta'*theta/sigma2/2; 88 | 89 | % Compute gradient of log prior 90 | log_prior_grad = -theta/sigma2; 91 | 92 | % Compute h(theta) = log p(y|theta) + log p(theta) 93 | h_func = llh + log_prior; 94 | 95 | % Compute gradient of the h(theta) 96 | h_func_grad = llh_grad + log_prior_grad; 97 | 98 | % h_func_grad must be a column 99 | h_func_grad = reshape(h_func_grad,length(h_func_grad),1); 100 | 101 | end -------------------------------------------------------------------------------- /Example/CGVB_Logistics_Function_Handle_AutoDiff.m: -------------------------------------------------------------------------------- 1 | % Script to run the Example 3.4 using a built-in Logistic Regression class of 2 | % the VBLab package. We define the logistics regression model as a function 3 | % handle and use AutoDiff to automatically compute the gradient of the 4 | % h(theta) function 5 | 6 | clear % Clear all variables 7 | clc % Clean workspace 8 | 9 | % Random seed to reproduce results 10 | rng(2020) 11 | 12 | % Load the LabourForce dataset 13 | labour = readData('LabourForce',... % Dataset name 14 | 'Type','Matrix',... % Store data as a 2D array (default) 15 | 'Intercept', true); % Add column of intercept (default) 16 | 17 | % Compute number of features 18 | n_features = size(labour,2)-1; 19 | 20 | % Additional Setting 21 | setting.Prior = [0,50]; 22 | 23 | % Initialize using MLE estimate (for quickly converging) 24 | X = labour(:,1:end-1); 25 | y = labour(:,end); 26 | theta_init = glmfit(X,y,'binomial','constant','off'); % initialise mu 27 | 28 | % Run CGVB 29 | Post_CGVB_manual = CGVB(@grad_h_func_logistic,labour,... 30 | 'NumParams',n_features,... 31 | 'Setting',setting,... 32 | 'LearningRate',0.002,... % Learning rate 33 | 'NumSample',50,... % Number of samples to estimate gradient of lowerbound 34 | 'MaxPatience',20,... % For Early stopping 35 | 'MaxIter',5000,... % Maximum number of iterations 36 | 'MeanInit',theta_init ,... % Randomly initialize parameters using 37 | 'GradWeight1',0.9,... % Momentum 1 38 | 'GradWeight2',0.9,... % Momentum 2 39 | 'WindowSize',10,... % Smoothing window for lowerbound 40 | 'GradientMax',10,... % For gradient clipping 41 | 'LBPlot',true); 42 | 43 | %% Plot variational distributions and lowerbound 44 | figure 45 | % Extract variation mean and variance 46 | mu_vb = Post_CGVB_manual.Post.mu; 47 | sigma2_vb = Post_CGVB_manual.Post.sigma2; 48 | 49 | % Plot the variational distribution for the first 8 parameters 50 | for i=1:8 51 | subplot(3,3,i) 52 | vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]}) 53 | grid on 54 | title(['\theta_',num2str(i)]) 55 | set(gca,'FontSize',15) 56 | end 57 | 58 | % Plot the smoothed lower bound 59 | subplot(3,3,9) 60 | plot(Post_CGVB_manual.Post.LB_smooth,'LineWidth',2) 61 | grid on 62 | title('Lower bound') 63 | set(gca,'FontSize',15) 64 | 65 | %% Define function to compute gradient of h function for Logistic regression 66 | % Input: 67 | % data: 2D array 68 | % theta: Dx1 array 69 | % setting: struct 70 | % Output: 71 | % h_func: Scalar 72 | % h_func_grad: Dx1 array 73 | function [h_func_grad,h_func] = grad_h_func_logistic(data,theta,setting) 74 | 75 | % Convert parameters to dlarray data type 76 | theta_AD = dlarray(theta); 77 | 78 | % Evaluate the function containing dlgradient using dlfeval 79 | [h_func_grad_AD,h_func_AD] = dlfeval(@grad_h_func_logistic_AD,data,theta_AD,setting); 80 | 81 | % Convert parameters from dlarray to matlab array 82 | h_func_grad = extractdata(h_func_grad_AD); 83 | h_func = extractdata(h_func_AD); 84 | 85 | % Make sure the output is a column vector 86 | h_func_grad = reshape(h_func_grad,length(h_func_grad),1); 87 | 88 | end 89 | 90 | %% Function containing dlgradient 91 | function [h_func_grad,h_func] = grad_h_func_logistic_AD(data,theta,setting) 92 | 93 | h_func = h_func_logistic(data,theta,setting); 94 | h_func_grad = dlgradient(h_func,theta); 95 | end 96 | 97 | %% Now we need to define a function to compute the h(theta) term 98 | % Define h function for Logistic regression 99 | % theta: Dx1 row 100 | % h_func: Dx1 column 101 | function h_func = h_func_logistic(data,theta,setting) 102 | 103 | % Extract additional settings 104 | d = length(theta); 105 | sigma2 = setting.Prior(2); 106 | 107 | % Extract data 108 | y = data(:,end); 109 | X = data(:,1:end-1); 110 | 111 | % Compute log likelihood 112 | aux = X*theta; 113 | log_lik = y.*aux-log(1+exp(aux)); 114 | log_lik = sum(log_lik); 115 | 116 | % Compute log prior 117 | log_prior =-d/2*log(2*pi)-d/2*log(sigma2)-theta'*theta/sigma2/2; 118 | 119 | % h = log p(y|theta) + log p(theta) 120 | h_func = log_lik + log_prior; 121 | 122 | end -------------------------------------------------------------------------------- /Example/CGVB_Logistics_Model_Object.m: -------------------------------------------------------------------------------- 1 | % Script to run the Example 3.4 using a built-in Logistic Regression class of 2 | % the VBLab package 3 | 4 | clear % Clear all variables 5 | clc % Clean workspace 6 | 7 | % Random seed to reproduce results 8 | rng(2020) 9 | 10 | % Load the LabourForce dataset 11 | labour = readData('LabourForce',... % Dataset name 12 | 'Type','Matrix',... % Store data as a 2D array (default) 13 | 'Intercept', true); % Add column of intercept (default) 14 | 15 | % Compute number of features 16 | n_features = size(labour,2)-1; 17 | 18 | % Create a Logistic Regression model object 19 | Mdl = LogisticRegression(n_features,... 20 | 'Prior',{'Normal',[0,50]}); 21 | 22 | %% Run Cholesky GVB with random initialization 23 | Estmdl_1 = CGVB(Mdl,labour,... 24 | 'LearningRate',0.002,... % Learning rate 25 | 'NumSample',50,... % Number of samples to estimate gradient of lowerbound 26 | 'MaxPatience',20,... % For Early stopping 27 | 'MaxIter',5000,... % Maximum number of iterations 28 | 'GradWeight1',0.9,... % Momentum weight 1 29 | 'GradWeight2',0.9,... % Momentum weight 2 30 | 'WindowSize',10,... % Smoothing window for lowerbound 31 | 'StepAdaptive',500,... % For adaptive learning rate 32 | 'GradientMax',10,... % For gradient clipping 33 | 'LBPlot',false); % Dont plot the lowerbound when finish 34 | 35 | %% Run Cholesky GVB with MLE initialization 36 | % Random seed to reproduce results 37 | rng(2020) 38 | 39 | theta_init = Mdl.initParams('MLE',labour); 40 | Estmdl_2 = CGVB(Mdl,labour,... 41 | 'MeanInit',theta_init,... % Initial values of variational mean 42 | 'LearningRate',0.002,... % Learning rate 43 | 'NumSample',50,... % Number of samples to estimate gradient of lowerbound 44 | 'MaxPatience',20,... % For Early stopping 45 | 'MaxIter',5000,... % Maximum number of iterations 46 | 'GradWeight1',0.9,... % Momentum weight 1 47 | 'GradWeight2',0.9,... % Momentum weight 2 48 | 'WindowSize',10,... % Smoothing window for lowerbound 49 | 'StepAdaptive',500,... % For adaptive learning rate 50 | 'GradientMax',10,... % For gradient clipping 51 | 'LBPlot',false); % Dont plot the lowerbound when finish 52 | 53 | %% Then compare convergence of lowerbound in 2 cases 54 | figure 55 | hold on 56 | grid on 57 | plot(Estmdl_1.Post.LB_smooth,'-r','LineWidth',2) 58 | plot(Estmdl_2.Post.LB_smooth,'--b','LineWidth',2) 59 | title('Lowerbound') 60 | xlabel('Iterations') 61 | legend('Random Initialization','MLE Initialization' ) 62 | 63 | %% It is useful to compare the approximate posterior density to the true density obtain by MCMC 64 | Post_MCMC = MCMC(Mdl,labour,... 65 | 'NumMCMC',100000,... % Number of MCMC iterations 66 | 'ParamsInit',theta_init,... % Using MLE estimates as initial values 67 | 'Verbose',100); % Display sampling information after each 100 iterations 68 | 69 | %% Compare densities by CGVB and MCMC 70 | % Get posterior mean and trace plot for a parameter to check the mixing 71 | [mcmc_mean,mcmc_std,mcmc_chain] = Post_MCMC.getParamsMean('BurnInRate',0.2,... % Throw away 20% samples 72 | 'PlotTrace',1:n_features,... % Trace plot for all parameters 73 | 'SubPlot',[2,4]); % Dimension of subplots 74 | 75 | % Plot density 76 | fontsize = 20; 77 | numparams = Estmdl_2.Model.NumParams; 78 | 79 | % Extract variation mean and variance 80 | mu_vb = Estmdl_2.Post.mu; 81 | sigma2_vb = Estmdl_2.Post.sigma2; 82 | 83 | figure 84 | for i = 1:numparams 85 | subplot(3,3,i) 86 | xx = mcmc_mean(i)-4*mcmc_std(i):0.002:mcmc_mean(i)+4*mcmc_std(i); 87 | yy_mcmc = ksdensity(mcmc_chain(:,i),xx,'Bandwidth',0.022); 88 | yy_vb = normpdf(xx,mu_vb(i),sqrt(sigma2_vb(i))); 89 | plot(xx,yy_mcmc,'-k',xx,yy_vb,'--b','LineWidth',1.5) 90 | line([theta_init(i) theta_init(i)],ylim,'LineWidth',1.5,'Color','r') 91 | str = ['\theta_', num2str(i)]; 92 | title(str,'FontSize', fontsize) 93 | legend('MCMC','VB') 94 | end 95 | subplot(3,3,9) 96 | plot(Estmdl_2.Post.LB_smooth,'LineWidth',1.5) 97 | title('Lower bound','FontSize', fontsize) 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /Example/CGVB_Logistics_Model_Object_Simple.m: -------------------------------------------------------------------------------- 1 | % Script to run the Example 3.4 using a built-in Logistic Regression class of 2 | % the VBLab package 3 | 4 | clear % Clear all variables 5 | clc % Clean workspace 6 | 7 | % Random seed to reproduce results 8 | rng(2020) 9 | 10 | % Load the LabourForce dataset 11 | labour = readData('LabourForce',... % Dataset name 12 | 'Type','Matrix',... % Store data as a 2D array (default) 13 | 'Intercept', true); % Add column of intercept (default) 14 | 15 | % Compute number of features 16 | n_features = size(labour,2)-1; 17 | 18 | % Create a Logistic Regression model object 19 | Mdl = LogisticRegression(n_features,... 20 | 'Prior',{'Normal',[0,50]}); 21 | 22 | %% Run Cholesky GVB with random initialization 23 | Post_CGVB = CGVB(Mdl,labour,... 24 | 'LearningRate',0.002,... % Learning rate 25 | 'NumSample',50,... % Number of samples to estimate gradient of lowerbound 26 | 'MaxPatience',20,... % For Early stopping 27 | 'MaxIter',5000,... % Maximum number of iterations 28 | 'GradWeight1',0.9,... % Momentum weight 1 29 | 'GradWeight2',0.9,... % Momentum weight 2 30 | 'WindowSize',10,... % Smoothing window for lowerbound 31 | 'StepAdaptive',500,... % For adaptive learning rate 32 | 'GradientMax',10,... % For gradient clipping 33 | 'LBPlot',false); % Dont plot the lowerbound when finish 34 | 35 | %% Plot variational distributions and lowerbound 36 | figure 37 | % Extract variation mean and variance 38 | mu_vb = Post_CGVB.Post.mu; 39 | sigma2_vb = Post_CGVB.Post.sigma2; 40 | 41 | % Plot the variational distribution for the first 8 parameters 42 | for i=1:n_features 43 | subplot(3,3,i) 44 | vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]}) 45 | grid on 46 | title(['\theta_',num2str(i)]) 47 | set(gca,'FontSize',15) 48 | end 49 | 50 | % Plot the smoothed lower bound 51 | subplot(3,3,9) 52 | plot(Post_CGVB.Post.LB_smooth,'LineWidth',2) 53 | grid on 54 | title('Lower bound') 55 | set(gca,'FontSize',15) 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /Example/CGVB_VAR1_Function_Handle.m: -------------------------------------------------------------------------------- 1 | % Example to fit a VAR(1) model, defined as a function handle, using CGVB method 2 | % We simulate a multivariate time-series 3 | 4 | clear 5 | clc 6 | 7 | rng(2021) 8 | 9 | % Setting 10 | m = 2; % Number of time series 11 | T = 100; % Number of observations 12 | 13 | % Generate data 14 | y = randn(2,100); 15 | 16 | % Additional setting 17 | setting.Prior = [0,1]; % Parameters (mean,variance) of a normal distribution 18 | setting.y.mu = 0; 19 | setting.idx.c = 1:m; 20 | setting.idx.A = m+1:m+m^2; 21 | setting.num_params = m + m^2; 22 | setting.Gamma = 0.1*eye(m); 23 | 24 | %% Run CGVB with defined model 25 | Post_CGVB_VAR1 = CGVB(@grad_h_func_VAR1,y,... 26 | 'NumParams',setting.num_params,... % Number of model parameters 27 | 'Setting',setting,... % Additional setting to compute gradient of h(theta) 28 | 'LearningRate',0.002,... % Learning rate 29 | 'NumSample',50,... % Number of samples to estimate gradient of lowerbound 30 | 'MaxPatience',20,... % For Early stopping 31 | 'MaxIter',5000,... % Maximum number of iterations 32 | 'GradWeight1',0.9,... % Momentum 1 33 | 'GradWeight2',0.9,... % Momentum 2 34 | 'WindowSize',10,... % Smoothing window for lowerbound 35 | 'GradientMax',10,... % For gradient clipping 36 | 'LBPlot',false); 37 | 38 | %% Plot variational distributions and lowerbound 39 | figure 40 | % Extract variation mean and variance 41 | mu_vb = Post_CGVB_VAR1.Post.mu; 42 | sigma2_vb = Post_CGVB_VAR1.Post.sigma2; 43 | 44 | % Plot the variational distribution of each parameter 45 | for i=1:setting.num_params 46 | subplot(2,4,i) 47 | vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]}) 48 | grid on 49 | title(['\theta_',num2str(i)]) 50 | set(gca,'FontSize',15) 51 | end 52 | 53 | % Plot the smoothed lower bound 54 | subplot(2,4,7) 55 | plot(Post_CGVB_VAR1.Post.LB_smooth,'LineWidth',2) 56 | grid on 57 | title('Lower bound') 58 | set(gca,'FontSize',15) 59 | 60 | %% Function to compute the gradient of h(theta) and h(theta). This can be defined in a separated file 61 | % Input: 62 | % y: mxT matrix with M number of time series and T lenght of each time series 63 | % theta: Dx1 array of model parameters 64 | % setting: struct of additional information to compute gradient h(theta) 65 | % Output: 66 | % grad_h_theta: Dx1 array of gradient of h(theta) 67 | % h_theta: h(theta) is scalar 68 | function [grad_h_theta,h_theta] = grad_h_func_VAR1(y,theta,setting) 69 | 70 | % Extract size of data 71 | [m,T] = size(y); 72 | 73 | % Extract model settings 74 | prior_params = setting.Prior; 75 | d = setting.num_params; 76 | idx = setting.idx; 77 | Gamma = setting.Gamma; 78 | Gamma_inv = Gamma^(-1); 79 | 80 | % Extract params from theta 81 | c = theta(idx.c); % c is a Dx1 colum 82 | A = reshape(theta(idx.A),length(c),length(c)); % A is a DxD matrix 83 | 84 | % Log prior 85 | log_prior = Normal.logPdfFnc(theta,prior_params); 86 | 87 | % Log likelihood 88 | log_llh = 0; 89 | for t=2:T 90 | log_llh = log_llh -0.5*(y(:,t) - A*y(:,t-1)-c)' * Gamma_inv * (y(:,t) - A*y(:,t-1)-c); 91 | end 92 | log_llh = log_llh - 0.5*m*(T-1)*log(2*pi) - 0.5*(T-1)*log(det(Gamma)); 93 | 94 | % h(theta) 95 | h_theta = log_prior + log_llh; 96 | 97 | % Gradient of log prior 98 | grad_log_prior = Normal.GradlogPdfFnc(theta,prior_params); 99 | 100 | % Gradient of log likelihood; 101 | grad_llh_c = 0; 102 | grad_llh_A = 0; 103 | for t=2:T 104 | grad_llh_c = grad_llh_c + Gamma_inv*(y(:,t) - A*y(:,t-1)-c); 105 | grad_llh_A = grad_llh_A + kron(y(:,t-1),Gamma_inv*(y(:,t) - A*y(:,t-1)-c)); 106 | end 107 | 108 | grad_llh = [grad_llh_c;grad_llh_A(:)]; 109 | 110 | % Gradient h(theta) 111 | grad_h_theta = grad_log_prior + grad_llh; 112 | 113 | % Make sure grad_h_theta is a column 114 | grad_h_theta = reshape(grad_h_theta,d,1); 115 | 116 | 117 | end -------------------------------------------------------------------------------- /Example/CGVB_VAR1_Model_Object.m: -------------------------------------------------------------------------------- 1 | % Example to fit a VAR(1) model, defined as a custom class object, using CGVB method 2 | % We simulate a multivariate time-series 3 | 4 | clear 5 | clc 6 | 7 | rng(2021) 8 | 9 | % Setting 10 | m = 2; % Number of time series 11 | T = 100; % Number of observations 12 | 13 | % Generate data 14 | y = randn(2,100); 15 | 16 | % Create a VAR1 model object 17 | Mdl = VAR1(m); 18 | 19 | %% Run CGVB with defined model 20 | Post_CGVB_VAR1 = CGVB(Mdl,y,... 21 | 'LearningRate',0.002,... % Learning rate 22 | 'NumSample',50,... % Number of samples to estimate gradient of lowerbound 23 | 'MaxPatience',20,... % For Early stopping 24 | 'MaxIter',5000,... % Maximum number of iterations 25 | 'GradWeight1',0.9,... % Momentum 1 26 | 'GradWeight2',0.9,... % Momentum 2 27 | 'WindowSize',10,... % Smoothing window for lowerbound 28 | 'GradientMax',10,... % For gradient clipping 29 | 'LBPlot',false); 30 | 31 | 32 | %% Plot variational distributions and lowerbound 33 | figure 34 | % Extract variation mean and variance 35 | mu_vb = Post_CGVB_VAR1.Post.mu; 36 | sigma2_vb = Post_CGVB_VAR1.Post.sigma2; 37 | 38 | % Plot the variational distribution of each parameter 39 | for i=1:Post_CGVB_VAR1.Model.NumParams 40 | subplot(2,4,i) 41 | vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]}) 42 | grid on 43 | title(['\theta_',num2str(i)]) 44 | set(gca,'FontSize',15) 45 | end 46 | 47 | % Plot the smoothed lower bound 48 | subplot(2,4,7) 49 | plot(Post_CGVB_VAR1.Post.LB_smooth,'LineWidth',2) 50 | grid on 51 | title('Lower bound') 52 | set(gca,'FontSize',15) 53 | 54 | -------------------------------------------------------------------------------- /Example/MGVB_Logistics_Model_Object.m: -------------------------------------------------------------------------------- 1 | % Script to fit the Logistic Regression modell using the MGVB method 2 | 3 | clear % Clear all variables 4 | clc % Clean workspace 5 | 6 | % Random seed to reproduce results 7 | rng(2020) 8 | 9 | % Load the LabourForce dataset 10 | labour = readData('LabourForce',... % Dataset name 11 | 'Type','Matrix',... % Store data as a 2D array (default) 12 | 'Intercept', true); % Add column of intercept (default) 13 | 14 | % Compute number of features 15 | n_features = size(labour,2)-1; 16 | 17 | % Create a Logistic Regression model object 18 | Mdl = LogisticRegression(n_features,... 19 | 'Prior',{'Normal',[0,50]}); 20 | 21 | %% Run Cholesky GVB to approximate the posterior distribution of model 22 | % using a multivariate normal density 23 | Post_CGVB = MGVB(Mdl,labour,... 24 | 'LearningRate',0.001,... % Learning rate 25 | 'NumSample',100,... % Number of samples to estimate gradient of lowerbound 26 | 'MaxPatience',50,... % For Early stopping 27 | 'MaxIter',2000,... % Maximum number of iterations 28 | 'GradWeight',0.4,... % Momentum weight 29 | 'WindowSize',30,... % Smoothing window for lowerbound 30 | 'SigInitScale',0.04,... % Std of normal distribution for initializing 31 | 'StepAdaptive',500,... % For adaptive learning rate 32 | 'GradientMax',100,... % For gradient clipping 33 | 'LBPlot',true); % Plot the smoothed lowerbound at the end 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /Example/NAGVAC_Logistics_Function_Handle.m: -------------------------------------------------------------------------------- 1 | % Script to fit a Logistic Regression model, defined as a function handle, 2 | % using the NAGVAC method 3 | 4 | clear % Clear all variables 5 | clc % Clean workspace 6 | 7 | % Random seed to reproduce results 8 | rng(2020) 9 | 10 | % Load the LabourForce dataset 11 | credit = readData('LabourForce',... % Dataset name 12 | 'Type','Matrix',... % Store data as a 2D array (default) 13 | 'Intercept', true); % Add column of intercept (default) 14 | 15 | % Compute number of features 16 | n_features = size(credit,2)-1; 17 | 18 | % Additional Setting 19 | setting.Prior = [0,1]; 20 | 21 | % Initialize using MLE estimate 22 | X = credit(:,1:end-1); 23 | y = credit(:,end); 24 | 25 | % Run CGVB 26 | Post_CGVB_manual = NAGVAC(@grad_h_func_logistic,credit,... 27 | 'NumParams',n_features,... 28 | 'Setting',setting,... 29 | 'NumSample',100,... % Number of samples to estimate gradient of lowerbound 30 | 'LearningRate',0.01,... % Learning rate 31 | 'MaxPatience',20,... % For Early stopping 32 | 'MaxIter',10000,... % Maximum number of iterations 33 | 'GradientMax',200,... % For gradient clipping 34 | 'WindowSize',30, ... % Smoothing window for lowerbound 35 | 'LBPlot',true); % Dont plot the lowerbound when finish 36 | 37 | %% Define gradient of h function for Logistic regression 38 | % theta: Dx1 array 39 | % h_func: Scalar 40 | % h_func_grad: Dx1 array 41 | function [h_func_grad,h_func] = grad_h_func_logistic(data,theta,setting) 42 | 43 | % Extract additional settings 44 | d = length(theta); 45 | sigma2 = setting.Prior(2); 46 | 47 | % Extract data 48 | X = data(:,1:end-1); 49 | y = data(:,end); 50 | 51 | % Compute log likelihood 52 | aux = X*theta; 53 | llh = y.*aux-log(1+exp(aux)); 54 | llh = sum(llh); 55 | 56 | % Compute gradient of log likelihood 57 | ppi = 1./(1+exp(-aux)); 58 | llh_grad = X'*(y-ppi); 59 | 60 | % Compute log prior 61 | log_prior =-d/2*log(2*pi)-d/2*log(sigma2)-theta'*theta/sigma2/2; 62 | 63 | % Compute gradient of log prior 64 | log_prior_grad = -theta/sigma2; 65 | 66 | % Compute h(theta) = log p(y|theta) + log p(theta) 67 | h_func = llh + log_prior; 68 | 69 | % Compute gradient of the h(theta) 70 | h_func_grad = llh_grad + log_prior_grad; 71 | 72 | % h_func_grad must be a column 73 | h_func_grad = reshape(h_func_grad,length(h_func_grad),1); 74 | 75 | end -------------------------------------------------------------------------------- /Example/NAGVAC_Logistics_Model_Object.m: -------------------------------------------------------------------------------- 1 | % Script to fit a Logistic Regression model, defined as a class object, 2 | % using the NAGVAC method 3 | 4 | clear % Clear all variables 5 | clc % Clean workspace 6 | 7 | % Random seed to reproduce results 8 | rng(2020) 9 | 10 | % Load LabourForce data. 11 | labour = readData('LabourForce',... % Dataset name 12 | 'Type','Matrix',... % Store data as a 2D array (default) 13 | 'Intercept', true); % Add column of intercept (default) 14 | 15 | % Compute number of features 16 | n_features = size(labour,2)-1; 17 | 18 | % Create a Logistic Regression model object 19 | Mdl = LogisticRegression(n_features,... 20 | 'Prior',{'Normal',[0,50]}); 21 | 22 | %% Run NAGVAC with random initialization 23 | Post_NAGVAC = NAGVAC(Mdl,labour,... 24 | 'NumSample',200,... % Number of samples to estimate gradient of lowerbound 25 | 'LearningRate',0.005,... % Learning rate 26 | 'MaxPatience',20,... % For Early stopping 27 | 'MaxIter',10000,... % Maximum number of iterations 28 | 'GradientMax',200,... % For gradient clipping 29 | 'WindowSize',50, ... % Smoothing window for lowerbound 30 | 'LBPlot',true); % Dont plot the lowerbound when finish 31 | 32 | %% Plot variational distributions and lowerbound 33 | figure 34 | % Extract variation mean and variance 35 | mu_vb = Post_NAGVAC.Post.mu; 36 | sigma2_vb = Post_NAGVAC.Post.sigma2; 37 | 38 | % Plot the variational distribution for the first 8 parameters 39 | for i=1:n_features 40 | subplot(3,3,i) 41 | vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]}) 42 | grid on 43 | title(['\theta_',num2str(i)]) 44 | set(gca,'FontSize',15) 45 | end 46 | 47 | % Plot the smoothed lower bound 48 | subplot(3,3,9) 49 | plot(Post_NAGVAC.Post.LB_smooth,'LineWidth',2) 50 | grid on 51 | title('Lower bound') 52 | set(gca,'FontSize',15) 53 | 54 | -------------------------------------------------------------------------------- /Example/VAFC_Logistics_Function_Handle.m: -------------------------------------------------------------------------------- 1 | % Script to fit a Logistic Regression model, defined as a function handle, 2 | % using the VAFC method 3 | 4 | clear % Clear all variables 5 | clc % Clean workspace 6 | 7 | % Random seed to reproduce results 8 | rng(2020) 9 | 10 | % Load the GermanCredit dataset 11 | credit = readData('GermanCredit',... % Dataset name 12 | 'Type','Matrix',... % Store data as a 2D array (default) 13 | 'Intercept', true); % Add column of intercept (default) 14 | 15 | % Compute number of features 16 | n_features = size(credit,2)-1; 17 | 18 | % Additional Setting 19 | setting.Prior = [0,50]; 20 | 21 | % Initialize using MLE estimate 22 | X = credit(:,1:end-1); 23 | y = credit(:,end); 24 | 25 | % Run CGVB 26 | Post_VAFC_manual = VAFC(@grad_h_func_logistic,credit,... 27 | 'NumParams',n_features,... 28 | 'Setting',setting,... 29 | 'NumFactor',4, ... % Number of factors of the loading matrix 30 | 'NumSample',100,... % Number of samples to estimate gradient of lowerbound 31 | 'LearningRate',0.05,... % Learning rate 32 | 'MaxPatience',30,... % For Early stopping 33 | 'MaxIter',10000,... % Maximum number of iterations 34 | 'GradientMax',200,... % For gradient clipping 35 | 'WindowSize',20, ... % Smoothing window for lowerbound 36 | 'LBPlot',true); % Dont plot the lowerbound when finish 37 | 38 | %% Plot variational distributions and lowerbound 39 | figure 40 | % Extract variation mean and variance 41 | mu_vb = Post_VAFC_manual.Post.mu; 42 | sigma2_vb = Post_VAFC_manual.Post.sigma2; 43 | 44 | % Plot the variational distribution for the first 8 parameters 45 | for i=1:8 46 | subplot(3,3,i) 47 | vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]}) 48 | grid on 49 | title(['\theta_',num2str(i)]) 50 | set(gca,'FontSize',15) 51 | end 52 | 53 | % Plot the smoothed lower bound 54 | subplot(3,3,9) 55 | plot(Post_VAFC_manual.Post.LB_smooth,'LineWidth',2) 56 | grid on 57 | title('Lower bound') 58 | set(gca,'FontSize',15) 59 | 60 | %% Define gradient of h function for Logistic regression 61 | % theta: Dx1 array 62 | % h_func: Scalar 63 | % h_func_grad: Dx1 array 64 | function [h_func_grad,h_func] = grad_h_func_logistic(data,theta,setting) 65 | 66 | % Extract additional settings 67 | d = length(theta); 68 | sigma2 = setting.Prior(2); 69 | 70 | % Extract data 71 | X = data(:,1:end-1); 72 | y = data(:,end); 73 | 74 | % Compute log likelihood 75 | aux = X*theta; 76 | llh = y.*aux-log(1+exp(aux)); 77 | llh = sum(llh); 78 | 79 | % Compute gradient of log likelihood 80 | ppi = 1./(1+exp(-aux)); 81 | llh_grad = X'*(y-ppi); 82 | 83 | % Compute log prior 84 | log_prior =-d/2*log(2*pi)-d/2*log(sigma2)-theta'*theta/sigma2/2; 85 | 86 | % Compute gradient of log prior 87 | log_prior_grad = -theta/sigma2; 88 | 89 | % Compute h(theta) = log p(y|theta) + log p(theta) 90 | h_func = llh + log_prior; 91 | 92 | % Compute gradient of the h(theta) 93 | h_func_grad = llh_grad + log_prior_grad; 94 | 95 | % h_func_grad must be a column 96 | h_func_grad = reshape(h_func_grad,length(h_func_grad),1); 97 | 98 | end -------------------------------------------------------------------------------- /Example/VAFC_Logistics_Model_Object.m: -------------------------------------------------------------------------------- 1 | % Script to fit a Logistic Regression model, defined as a class object, 2 | % using the VAFC method 3 | 4 | clear % Clear all variables 5 | clc % Clean workspace 6 | 7 | % Random seed to reproduce results 8 | rng(2020) 9 | 10 | % Load GermanCredit data. 11 | credit = readData('GermanCredit',... % Dataset name 12 | 'Type','Matrix',... % Store data as a 2D array (default) 13 | 'Intercept', true); % Add column of intercept (default) 14 | 15 | % Compute number of features 16 | n_features = size(credit,2)-1; 17 | 18 | % Create a Logistic Regression model object 19 | Mdl = LogisticRegression(n_features); 20 | 21 | %% Run Cholesky GVB with random initialization 22 | Post_VAFC = VAFC(Mdl,credit,... 23 | 'NumFactor',10, ... % Number of factors of the loading matrix 24 | 'NumSample',50,... % Number of samples to estimate gradient of lowerbound 25 | 'LearningRate',0.05,... % Learning rate 26 | 'MaxPatience',20,... % For Early stopping 27 | 'MaxIter',10000,... % Maximum number of iterations 28 | 'GradientMax',200,... % For gradient clipping 29 | 'WindowSize',5, ... % Smoothing window for lowerbound 30 | 'LBPlot',false); % Dont plot the lowerbound when finish 31 | 32 | %% Plot variational distributions and lowerbound 33 | figure 34 | % Extract variation mean and variance 35 | mu_vb = Post_VAFC.Post.mu; 36 | sigma2_vb = Post_VAFC.Post.sigma2; 37 | 38 | % Plot the variational distribution for the first 8 parameters 39 | for i=1:8 40 | subplot(3,3,i) 41 | vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]}) 42 | grid on 43 | title(['\theta_',num2str(i)]) 44 | set(gca,'FontSize',15) 45 | end 46 | 47 | % Plot the smoothed lower bound 48 | subplot(3,3,9) 49 | plot(Post_VAFC.Post.LB_smooth,'LineWidth',2) 50 | grid on 51 | title('Lower bound') 52 | set(gca,'FontSize',15) 53 | 54 | -------------------------------------------------------------------------------- /Example/VAR1.m: -------------------------------------------------------------------------------- 1 | classdef VAR1 2 | %VAR1 Class to model the VAR(1) model 3 | 4 | properties 5 | ModelName % Model name 6 | NumParams % Number of parameters 7 | Prior % Prior object 8 | ParamIdx % Indexes of model parameters in the vector of variational parameters 9 | Gamma % Fix covarian matrix 10 | end 11 | 12 | methods 13 | % Constructor. This will be automatically called when users create a VAR1 object 14 | function obj = VAR1(NumSeries) 15 | % Set value for ModelName and NumParams 16 | obj.ModelName = 'VAR1'; 17 | obj.NumParams = NumSeries + NumSeries^2; 18 | obj.Prior = [0,1]; % Use a normal distribution for prior 19 | obj.ParamIdx.c = 1:NumSeries; 20 | obj.ParamIdx.A = NumSeries+1:obj.NumParams; 21 | obj.Gamma = 0.1*eye(NumSeries); 22 | end 23 | 24 | % Function to compute gradient of h_theta and h_theta 25 | function [h_func_grad, h_func] = hFunctionGrad(obj,y,theta) 26 | % Extract size of data 27 | [m,T] = size(y); 28 | 29 | % Extract model properties 30 | prior_params = obj.Prior; 31 | d = obj.NumParams; 32 | idx = obj.ParamIdx; 33 | gamma = obj.Gamma; 34 | gamma_inv = gamma^(-1); 35 | 36 | % Extract params from theta 37 | c = theta(idx.c); % c is a column 38 | A = reshape(theta(idx.A),length(c),length(c)); % A is a matrix 39 | 40 | % Log prior 41 | log_prior = Normal.logPdfFnc(theta,prior_params); 42 | 43 | % Log likelihood 44 | log_llh = 0; 45 | for t=2:T 46 | log_llh = log_llh -0.5*(y(:,t) - A*y(:,t-1)-c)' * gamma_inv * (y(:,t) - A*y(:,t-1)-c); 47 | end 48 | log_llh = log_llh - 0.5*m*(T-1)*log(2*pi) - 0.5*(T-1)*log(det(gamma)); 49 | 50 | % Compute h_theta 51 | h_func = log_prior + log_llh; 52 | 53 | % Gradient log_prior 54 | grad_log_prior = Normal.GradlogPdfFnc(theta,prior_params); 55 | 56 | % Gradient log_llh; 57 | grad_llh_c = 0; 58 | grad_llh_A = 0; 59 | for t=2:T 60 | grad_llh_c = grad_llh_c + gamma_inv*(y(:,t) - A*y(:,t-1)-c); 61 | grad_llh_A = grad_llh_A + kron(y(:,t-1),gamma_inv*(y(:,t) - A*y(:,t-1)-c)); 62 | end 63 | 64 | grad_llh = [grad_llh_c;grad_llh_A(:)]; 65 | 66 | % Compute Gradient of h_theta 67 | h_func_grad = grad_log_prior + grad_llh; 68 | 69 | % Make sure grad_h_theta is a column 70 | h_func_grad = reshape(h_func_grad,d,1); 71 | end 72 | end 73 | end 74 | 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VBLab: a Matlab package for Variational Inference 2 | 3 | ## Documentation 4 | 5 | The official documentation is avaiable at https://vbayeslab.github.io/VBLabDocs/ 6 | 7 | ## Getting started 8 | 9 | ### Install VBLab package 10 | 11 | 1. Download or clone the VBLab package on [VBLab Github Page](https://github.com/VBayesLab/Tutorial-on-VB) 12 | 2. Add the VBLab package, with all subfolders, to the Matlab search path. See [How to add or remove folders to Matlab search path](https://au.mathworks.com/help/matlab/matlab_env/add-remove-or-reorder-folders-on-the-search-path.html) 13 | 14 | ### How to start 15 | 16 | 1. Read the [VB tutorial paper](https://www.researchgate.net/publication/340006729_A_practical_tutorial_on_Variational_Bayes) for the theoretical explanation of the VB methods supported by the VBLab package. See also [shorter version of the VB tutorial](https://vbayeslab.github.io/VBLabDocs/tutorial/) on the documentation website. 17 | 2. Run [examples](https://github.com/VBayesLab/Tutorial-on-VB) showing how to use various VB methods to fit different VBLab and user-defined models. See the detailed explanation of the examples in the the [VB tutorial paper](https://www.researchgate.net/publication/340006729_A_practical_tutorial_on_Variational_Bayes) or in the [Example](https://vbayeslab.github.io/VBLabDocs/example/) section on the documentation website. 18 | 3. Check API reference for [supported VB techniques](https://vbayeslab.github.io/VBLabDocs/gvb/), [statistical models](https://vbayeslab.github.io/VBLabDocs/model/) and [how to define custom models](https://vbayeslab.github.io/VBLabDocs/model/custom/) for users' applications. 19 | 20 | --- 21 | 22 | ## Authors 23 | 24 | - **Trong-Nghia Nguyen**, PhD candidate, The University of Sydney Business School. ([Google scholar](https://scholar.google.com.vn/citations?user=4fEGoI8AAAAJ&hl=en), [Research gate](https://www.researchgate.net/profile/Nghia_Nguyen79), [LinkedIn](https://www.linkedin.com/in/nguyen-nghia-458b3097/)) 25 | - **Minh-Ngoc Tran**, Associate Professor, The University of Sydney Business School. ([Google scholar](https://scholar.google.com/citations?user=98A6Dq8AAAAJ&hl=en), [Research gate](https://www.researchgate.net/profile/Minh-Ngoc-Tran), [Home Page](https://sites.google.com/site/mntran26/home)) 26 | - **Viet-Hung Dao**, PhD candidate, The University of New South Wales Business School. ([Home Page](https://acems.org.au/our-people/hung-dao)) 27 | 28 | --- 29 | 30 | ## Citing VBLab 31 | 32 | If you use VBLab in a publication, we would appreciate your citation to the following paper: 33 | 34 | M.-N. Tran, T.-N. Nguyen and V.-H. Dao (2021). [A practical tutorial on Variational Bayes](https://www.researchgate.net/publication/340006729_A_practical_tutorial_on_Variational_Bayes). *Technical Report*. 35 | 36 | **Or bibtex entry** : 37 | ```yaml 38 | @TECHREPORT{Tran:2021, 39 | AUTHOR = "M.-N. Tran, T.-N. Nguyen and V.-H. Dao", 40 | TITLE = "A practical tutorial on Variational Bayes", 41 | YEAR = {2021}, 42 | NOTE = {DOI: 10.13140/RG.2.2.20173.59360}, 43 | } 44 | ``` 45 | -------------------------------------------------------------------------------- /VBLab/MCMC/MCMC.m: -------------------------------------------------------------------------------- 1 | classdef MCMC < handle & matlab.mixin.CustomDisplay 2 | %MCMC Summary of this class goes here 3 | % Detailed explanation goes here 4 | 5 | properties 6 | Method 7 | Model % Instance of model to be fitted 8 | ModelToFit % Name of model to be fitted 9 | SeriesLength % Length of the series 10 | NumMCMC % Number of MCMC iterations 11 | BurnInRate % Percentage of sample for burnin 12 | BurnIn % Number of samples for burnin 13 | TargetAccept % Target acceptance rate 14 | NumCovariance % Number of latest samples to calculate adaptive covariance matrix for random-walk proposal 15 | SaveFileName % Save file name 16 | SaveAfter % Save the current results after each 5000 iteration 17 | ParamsInit % Initial values of model parameters 18 | Seed % Random seed 19 | Post % Struct to store estimation results 20 | Initialize % Initialization method 21 | LogLikelihood % Handle of the log-likelihood function 22 | PrintMessage % Custom message during the sampling phase 23 | CPU % Sampling time 24 | Verbose % Turn on of off printed message during sampling phase 25 | SigScale 26 | Scale 27 | Params 28 | end 29 | 30 | methods 31 | function obj = MCMC(model,data,varargin) 32 | %MCMC Construct an instance of this class 33 | % Detailed explanation goes here 34 | obj.Method = 'MCMC'; 35 | obj.Model = model; 36 | obj.ModelToFit = model.ModelName; 37 | obj.NumMCMC = 50000; 38 | obj.TargetAccept = 0.25; 39 | obj.BurnInRate = 0.2; 40 | obj.NumCovariance = 2000; 41 | obj.SigScale = 0.01; 42 | obj.Scale = 1; 43 | obj.SaveAfter = 0; 44 | obj.Verbose = 100; 45 | obj.ParamsInit = []; 46 | 47 | if nargin > 2 48 | %Parse additional options 49 | paramNames = {'NumMCMC' 'BurnInRate' 'TargetAccept' 'NumCovariance' ... 50 | 'ParamsInit' 'SaveFileName' 'SaveAfter' 'Verbose' ... 51 | 'Seed' 'SigScale' 'Scale'}; 52 | paramDflts = {obj.NumMCMC obj.BurnInRate obj.TargetAccept obj.NumCovariance ... 53 | obj.ParamsInit obj.SaveFileName obj.SaveAfter obj.Verbose ... 54 | obj.Seed obj.SigScale obj.Scale}; 55 | 56 | [obj.NumMCMC,... 57 | obj.BurnInRate,... 58 | obj.TargetAccept,... 59 | obj.NumCovariance,... 60 | obj.ParamsInit,... 61 | obj.SaveFileName,... 62 | obj.SaveAfter,... 63 | obj.Verbose,... 64 | obj.Seed,... 65 | obj.SigScale,... 66 | obj.Scale] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:}); 67 | end 68 | 69 | obj.BurnIn = floor(obj.BurnInRate*obj.NumMCMC); 70 | 71 | % Set up saved file name 72 | DateVector = datevec(date); 73 | [~, MonthString] = month(date); 74 | date_time = ['_',num2str(DateVector(3)),'_',MonthString,'_']; 75 | obj.SaveFileName = ['Results_MCMC',date_time]; 76 | 77 | % Run MCMC 78 | obj.Post = obj.fit(data); 79 | 80 | end 81 | 82 | % Sample a posterior using MCMC 83 | function Post = fit(obj,data) 84 | 85 | % Extract sampling setting 86 | model = obj.Model; 87 | num_params = model.NumParams; 88 | verbose = obj.Verbose; 89 | numMCMC = obj.NumMCMC; 90 | scale = obj.Scale; 91 | V = obj.SigScale*eye(num_params); 92 | accept_rate = obj.TargetAccept; 93 | N_corr = obj.NumCovariance; 94 | saveAfter = obj.SaveAfter; 95 | saveFileName = obj.SaveFileName; 96 | params_init = obj.ParamsInit; 97 | 98 | thetasave = zeros(numMCMC,num_params); 99 | 100 | % Get initial values of parameters 101 | if ~isempty(params_init) % If a vector of initial values if provided 102 | if (length(params_init) ~= num_params) 103 | error(utils_errorMsg('vbayeslab:InitVectorMisMatched')) 104 | else 105 | params = params_init; 106 | end 107 | else 108 | params = model.initParams('Prior'); 109 | end 110 | 111 | % Make sure params is a row vector 112 | params = reshape(params,1,num_params); 113 | 114 | % For the first iteration 115 | log_prior = model.logPriors(params); 116 | lik = model.logLik(data,params); 117 | jac = model.logJac(params); 118 | post = log_prior + lik; 119 | 120 | tic 121 | for i=1:numMCMC 122 | if(verbose) 123 | if(mod(i,verbose)==0) 124 | disp(['iter: ',num2str(i),'(',num2str(i/numMCMC*100),'%)']) 125 | end 126 | end 127 | 128 | % Transform params to normal distribution scale 129 | params_normal = model.toNormalParams(params); 130 | 131 | % Using multivariate normal distribution as proposal distribution 132 | sample = mvnrnd(params_normal,scale.*V); 133 | 134 | % Convert theta to original distribution 135 | theta = model.toOriginalParams(sample); 136 | 137 | % Calculate acceptance probability for new proposed sample 138 | log_prior_star = model.logPriors(theta); 139 | lik_star = model.logLik(data,theta); 140 | jac_star = model.logJac(theta); 141 | post_star = log_prior_star + lik_star; 142 | 143 | A = rand(); 144 | r = exp(post_star-post+jac-jac_star); 145 | C = min(1,r); 146 | if A<=C 147 | params = theta; 148 | post = post_star; 149 | jac = jac_star; 150 | end 151 | thetasave(i,:) = params; 152 | 153 | % Adaptive scale for proposal distribution 154 | if i > 50 155 | scale = utils_update_sigma(scale,C,accept_rate,i,num_params); 156 | if (i > N_corr) 157 | V = cov(thetasave(i-N_corr+1:i,:)); 158 | else 159 | V = cov(thetasave(1:i,:)); 160 | end 161 | V = utils_jitChol(V); 162 | end 163 | Post.theta(i,:) = params; 164 | Post.scale(i) = scale; 165 | 166 | % Store results after each 5000 iteration 167 | if(saveAfter>0) 168 | if mod(i,saveAfter)==0 169 | save(saveFileName,'Post') 170 | end 171 | end 172 | end 173 | Post.cpu = toc; 174 | end 175 | 176 | % Function to get parameter means given MCMC samples 177 | function [params_mean,params_std,params] = getParamsMean(obj,varargin) 178 | post = obj.Post; 179 | burnin = []; 180 | burninrate = []; 181 | PlotTrace = []; % Array of indexes of model parameters 182 | subplotsize = []; 183 | if nargin > 0 184 | %Parse additional options 185 | paramNames = {'BurnIn' 'BurnInRate' 'PlotTrace' 'SubPlot'}; 186 | paramDflts = {burnin burninrate PlotTrace subplotsize}; 187 | 188 | [burnin,... 189 | burninrate,... 190 | PlotTrace,... 191 | subplotsize] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:}); 192 | end 193 | 194 | if(isempty(burnin)) 195 | burnin = obj.BurnIn; 196 | end 197 | 198 | if(isempty(burninrate)) 199 | burninrate = obj.BurnInRate; 200 | else 201 | burnin = floor(burninrate*obj.NumMCMC); 202 | end 203 | 204 | params_mean = mean(post.theta(burnin+1:end,:)); 205 | params_std = sqrt(mean(post.theta(burnin+1:end,:).^2)-params_mean.^2); 206 | params = post.theta(burnin+1:end,:); 207 | % If user wants to plot trace of the first parameter to check 208 | % the mixing 209 | if (~isempty(PlotTrace) && ~isempty(subplotsize)) 210 | nrow = subplotsize(1); 211 | ncol = subplotsize(2); 212 | 213 | figure 214 | for i=1:length(PlotTrace) 215 | subplot(nrow,ncol,i) 216 | plot(post.theta(burnin+1:end,PlotTrace(i))) 217 | title(['\theta_',num2str(i)],'FontSize', 20) 218 | end 219 | end 220 | end 221 | end 222 | end 223 | 224 | -------------------------------------------------------------------------------- /VBLab/Models/LogisticRegression.m: -------------------------------------------------------------------------------- 1 | classdef LogisticRegression 2 | %LOGISTICREGRESSION Summary of this class goes here 3 | % Detailed explanation goes here 4 | % Attributes 5 | properties 6 | ModelName % Model name 7 | NumParams % Number of parameters 8 | PriorInput % Prior specified by users 9 | Prior % Prior object 10 | PriorVal % Parameters of priors 11 | Intercept % Option to add intercept or not (only for testing) 12 | AutoDiff % Option to use autodiff (only for testing) 13 | CutOff % Cutoff probability for classification making 14 | Post % Struct to store training results (maybe not used) 15 | end 16 | 17 | methods 18 | % Constructors 19 | function obj = LogisticRegression(n_features,varargin) 20 | %LOGISTICREGRESSION Construct an instance of this class 21 | % Detailed explanation goes here 22 | obj.ModelName = 'LogisticRegression'; 23 | obj.PriorInput = {'Normal',[0,1]}; 24 | obj.Intercept = true; 25 | obj.AutoDiff = false; 26 | obj.NumParams = n_features; 27 | obj.CutOff = 0.5; 28 | 29 | % Get additional arguments (some arguments are only for testing) 30 | if nargin > 1 31 | %Parse additional options 32 | paramNames = {'AutoDiff' 'Intercept' 'Prior',... 33 | 'CutOff'}; 34 | paramDflts = {obj.AutoDiff obj.Intercept obj.PriorInput,... 35 | obj.CutOff}; 36 | 37 | [obj.AutoDiff,... 38 | obj.Intercept,... 39 | obj.PriorInput,... 40 | obj.CutOff] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:}); 41 | end 42 | 43 | % Set prior object using built-in distribution classes 44 | eval(['obj.Prior=',obj.PriorInput{1}]); 45 | obj.PriorVal = obj.PriorInput{2}; 46 | 47 | end 48 | 49 | %% Log likelihood 50 | % Input: 51 | % - data: 2D array. The last column is the responses 52 | % - params: Dx1 vector of parameters 53 | % Output: 54 | % - llh: Log likelihood of the model 55 | function llh = logLik(obj,data,params) 56 | 57 | % Make sure params is a columns 58 | params = reshape(obj.toOriginalParams(params),obj.NumParams,1); 59 | 60 | % Extract data 61 | y = data(:,end); 62 | X = data(:,1:end-1); 63 | 64 | % Compute log likelihood 65 | aux = X*params; 66 | llh = y.*aux-log(1+exp(aux)); 67 | llh = sum(llh); 68 | 69 | 70 | end 71 | 72 | %% Compute gradient of Log likelihood 73 | % Input: 74 | % - data: 2D array. The last column is the responses 75 | % - params: Dx1 vector of parameters 76 | % Output: 77 | % - llh_grad: Log likelihood of the model 78 | function [llh_grad,llh] = logLikGrad(obj,data,params) 79 | 80 | % Extract data 81 | y = data(:,end); 82 | X = data(:,1:end-1); 83 | 84 | % Convert theta (normal) to original distribution 85 | params = reshape(obj.toOriginalParams(params),obj.NumParams,1); 86 | 87 | % Check if auto-diff option is available 88 | if (obj.AutoDiff) 89 | % We have to convert params to dlarray to enable autodiff 90 | params_autodiff = dlarray(params); 91 | [llh_grad_autodiff,llh_auto_diff] = dlfeval(@obj.logLikGradAutoDiff,data,params_autodiff); 92 | llh_grad = extractdata(llh_grad_autodiff)'; 93 | llh = extractdata(llh_auto_diff); 94 | else 95 | % Compute gradient of log likelihood 96 | aux = X*params; 97 | ppi = 1./(1+exp(-aux)); 98 | llh_grad = X'*(y-ppi); 99 | 100 | % Compute log likelihood 101 | llh = y.*aux-log(1+exp(aux)); 102 | llh = sum(llh); 103 | end 104 | end 105 | 106 | %% Compute gradient of Log likelihood using AutoDiff 107 | % Input: 108 | % - data: 2D array. The last column is the responses 109 | % - params: 1xD vector of parameters 110 | % Output: 111 | % - llh_grad: Log likelihood of the model 112 | function [llh_grad,llh] = logLikGradAutoDiff(obj,data,params) 113 | 114 | llh = obj.logLik(data,params); 115 | 116 | llh_grad = dlgradient(llh,params); 117 | end 118 | 119 | %% Compute log prior of parameters 120 | % Input: 121 | % - params: the Dx1 vector of parameters 122 | % Output: 123 | % - llh: Log prior of model parameters 124 | function log_prior = logPriors(obj,params) 125 | 126 | params = reshape(obj.toOriginalParams(params),obj.NumParams,1); 127 | 128 | % Compute log prior 129 | log_prior = obj.Prior.logPdfFnc(params,obj.PriorVal); 130 | 131 | end 132 | 133 | %% Compute gradient of log prior of parameters 134 | % Input: 135 | % - params: 1xD vector of parameters 136 | % Output: 137 | % - log_prior_grad: Gradient of log prior of model parameters 138 | function [log_prior_grad,log_prior] = logPriorsGrad(obj,params) 139 | 140 | % Compute log prior 141 | log_prior = obj.Prior.logPdfFnc(params,obj.PriorVal); 142 | 143 | % Compute gradient of log prior 144 | log_prior_grad = obj.Prior.GradlogPdfFnc(params,obj.PriorVal); 145 | end 146 | 147 | %% Log of Jacobian of all paramters 148 | % Input: 149 | % - params: the ROW vector of parameters 150 | % Output: 151 | % - llh: Log prior of model parameters 152 | function logjac = logJac(obj,params) 153 | logjac = 0; 154 | end 155 | 156 | %% Log of Jacobian of all paramters 157 | % Input: 158 | % - params: the ROW vector of parameters 159 | % Output: 160 | % - llh: Log prior of model parameters 161 | function [logJac_grad,logJac] = logJacGrad(obj,params) 162 | logJac_grad = 0; 163 | logJac = 0; 164 | end 165 | 166 | %% Function to compute h_theta = log lik + log prior 167 | % Input: 168 | % - data: 2D array. The last column is the responses 169 | % - theta: Dx1 vector of parameters 170 | % Output: 171 | % - h_func: Log likelihood + log prior 172 | function h_func = hFunction(obj,data,theta) 173 | % Transform parameters from normal to original distribution 174 | params = obj.toOriginalParams(theta); 175 | 176 | % Compute h(theta) 177 | log_lik = obj.logLik(data,params); 178 | log_prior = obj.logPriors(params); 179 | log_jac = obj.logJac(params); 180 | h_func = log_lik + log_prior + log_jac; 181 | end 182 | 183 | %% Function to compute gradient of h_theta = grad log lik + grad log prior 184 | % Input: 185 | % - data: 2D array. The last column is the responses 186 | % - theta: Dx1 vector of parameters 187 | % Output: 188 | % - h_func_grad: gradient (Log likelihood + log prior) 189 | % - h_func: Log likelihood + log prior 190 | function [h_func_grad, h_func] = hFunctionGrad(obj,data,theta) 191 | 192 | % Transform parameters from normal to original distribution 193 | params = obj.toOriginalParams(theta); 194 | 195 | % Compute h(theta) 196 | [llh_grad,llh] = obj.logLikGrad(data,params); 197 | [log_prior_grad,log_prior] = obj.logPriorsGrad(params); 198 | [logJac_grad,logJac] = obj.logJacGrad(params); 199 | h_func = llh + log_prior + logJac; 200 | h_func_grad = llh_grad + log_prior_grad + logJac_grad; 201 | end 202 | 203 | %% Transform parameters to from normal to original distribution 204 | function paramsOriginal = toOriginalParams(obj,params) 205 | paramsOriginal = obj.Prior.toOriginalParams(params); 206 | end 207 | 208 | %% Transform parameters to from normal to original distribution 209 | function paramsNormal = toNormalParams(obj,params) 210 | paramsNormal = obj.Prior.toNormalParams(params); 211 | end 212 | 213 | %% Initialize parameters 214 | function params = initParams(obj,type,varargin) 215 | d_theta = obj.NumParams; 216 | switch type 217 | case 'MLE' % 2D array of must be provided 218 | data = varargin{1}; 219 | X = data(:,1:end-1); 220 | y = data(:,end); 221 | params = glmfit(X,y,'binomial','constant','off'); % initialise mu 222 | case 'Prior' 223 | params = obj.Prior.rngFnc(obj.PriorVal,[d_theta,1]); 224 | case 'Random' % (only for testing) 225 | std_init = varargin{1}; 226 | params = normrnd(0,std_init,[d_theta,1]); 227 | case 'Zeros' % (Only for testing) 228 | params = zeros(d_theta,1); 229 | otherwise 230 | error(['There is no initialization method called ',type,' in the model object!']) 231 | end 232 | end 233 | end 234 | end 235 | 236 | -------------------------------------------------------------------------------- /VBLab/Models/ModelClass.m: -------------------------------------------------------------------------------- 1 | classdef ModelClass 2 | %MODELCLASS Summary of this class goes here 3 | % Detailed explanation goes here 4 | 5 | properties 6 | ModelName 7 | NumParam 8 | end 9 | 10 | methods 11 | function obj = ModelClass(inputArg1,inputArg2) 12 | %MODELCLASS Construct an instance of this class 13 | % Detailed explanation goes here 14 | obj.Property1 = inputArg1 + inputArg2; 15 | end 16 | end 17 | 18 | methods (Abstract) 19 | llh = logLikFnc(obj,data,params); 20 | llh_grad = logLikGradFnc(obj,data,params); 21 | log_prior = logPriorsFnc(obj,params); 22 | log_prior_grad = logPriorsGradFnc(obj,params); 23 | logjac = logJacFnc(obj,params); 24 | logjac_grad = logJacGradFnc(obj,params); 25 | end 26 | 27 | end 28 | 29 | -------------------------------------------------------------------------------- /VBLab/Utilities/Distribution.m: -------------------------------------------------------------------------------- 1 | classdef Distribution 2 | %DISTRIBUTION A (Abtract) Superclass to define a probability distribution 3 | 4 | properties 5 | end 6 | 7 | methods (Abstract) 8 | random_num = rngFnc(obj,params,dim); 9 | llh = logPdfFnc(obj,data,params); 10 | llh_grad = GradlogPdfFnc(obj,data,params); 11 | logjac = logJacFnc(obj,params); 12 | logjac_grad = GradlogJacFnc(obj,params); 13 | end 14 | 15 | end 16 | -------------------------------------------------------------------------------- /VBLab/Utilities/Normal.m: -------------------------------------------------------------------------------- 1 | classdef Normal < Distribution 2 | %NORMAL Class to compute quantities related to normal distribution 3 | 4 | properties 5 | end 6 | 7 | methods (Static) 8 | 9 | %% Random number generator 10 | function random_num = rngFnc(params,dim) 11 | random_num = normrnd(params(1),sqrt(params(2)),dim); 12 | end 13 | 14 | %% Log pdf function 15 | % input: 16 | % x: Dx1 17 | % params = [mean(scalar),variance(scalar)] 18 | % output: 19 | % log of pdf function(scalar) 20 | function log_pdf = logPdfFnc(x,params) 21 | mu = params(1); 22 | sigma2 = params(2); 23 | d = length(x); 24 | log_pdf = -d/2*log(2*pi)-d/2*log(sigma2)-(x-mu)'*(x-mu)/sigma2/2; 25 | end 26 | 27 | %% Gradient of log pdf function 28 | % input: 29 | % x: Dx1 30 | % params = [mean(scalar),variance(scalar)] 31 | % output: 32 | % gradient of log pdf function: Dx1 33 | function grad_log_pdf = GradlogPdfFnc(x,params) 34 | mu = params(1); 35 | sigma2 = params(2); 36 | grad_log_pdf = -(x-mu)/sigma2; 37 | end 38 | 39 | %% Log jacobian 40 | % input: 41 | % x: Dx1 42 | % params = [mean(scalar),variance(scalar)] 43 | % output: 44 | % log Jacobian of transformation (scalar) 45 | function log_jac = logJacFnc(x,params) 46 | log_jac = 0; 47 | end 48 | 49 | %% Gradient of log jacobian 50 | % input: 51 | % x: Dx1 52 | % params = [mean(scalar),variance(scalar)] 53 | % output: 54 | % gradient of log Jacobian of transformation: Dx1 55 | function grad_log_jac = GradlogJacFnc(x,params) 56 | grad_log_jac = 0; 57 | end 58 | 59 | %% Transform parameters to normal distribution 60 | % input: 61 | % x: Dx1 62 | function params_normal = toNormalParams(x) 63 | params_normal = x; 64 | end 65 | 66 | %% Tranform normal parameters to original distribution 67 | % input: 68 | % x: Dx1 69 | function params_ori = toOriginalParams(x) 70 | params_ori = x; 71 | end 72 | 73 | %% Plot density given distribution parameters 74 | % input: 75 | % x: Dx1 76 | function plotPdf(params,varargin) 77 | 78 | % Extract 79 | mu = params(1); 80 | sigma2 = params(2); 81 | 82 | xx = mu-4*sqrt(sigma2):0.001:mu+4*sqrt(sigma2); 83 | yy = normpdf(xx,mu,sqrt(sigma2)); 84 | plot(xx,yy,'LineWidth',2) 85 | end 86 | end 87 | end 88 | 89 | -------------------------------------------------------------------------------- /VBLab/Utilities/readData.m: -------------------------------------------------------------------------------- 1 | function data_out = readData(dataName, varargin) 2 | 3 | % Initialize additional options 4 | Intercept = true; 5 | Length = NaN; 6 | Normalized = true; 7 | Index = ''; 8 | Type = 'Matrix'; 9 | RealizedMeasure = ''; 10 | 11 | % Load user's options 12 | if nargin > 1 13 | %Parse additional options 14 | paramNames = {'Intercept' 'Length' 'Normalized' ... 15 | 'Index' 'Type' 'RealizedMeasure'}; 16 | paramDflts = { Intercept Length Normalized ... 17 | Index Type RealizedMeasure }; 18 | 19 | [Intercept,... 20 | Length,... 21 | Normalized,... 22 | Index,... 23 | Type,... 24 | RealizedMeasure] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:}); 25 | end 26 | 27 | % Load built-in datasets 28 | datatype = ''; 29 | switch dataName 30 | % Abalon data 31 | case 'Abalon' 32 | datatype = 'Cross-Sectional'; 33 | data = load('Abalon.mat'); 34 | data_mat = data.data; 35 | 36 | % DirectMarketing data 37 | case 'DirectMarketing' 38 | datatype = 'Cross-Sectional'; 39 | data = load('DirectMarketing.mat'); 40 | data_mat = data.data; 41 | if(Normalized) 42 | norm_col = [1,2,3,12]; 43 | data_mat(:,norm_col) = zscore(data_mat(:,norm_col)); 44 | end 45 | 46 | % GermanCredit data 47 | case 'GermanCredit' 48 | datatype = 'Cross-Sectional'; 49 | data = load('GermanCredit.mat'); 50 | data_mat = data.data; 51 | if(Normalized) 52 | data_mat = [zscore(data_mat(:,1:15)),data_mat(:,16:end)]; 53 | end 54 | 55 | % LabourForce data 56 | case 'LabourForce' 57 | datatype = 'Cross-Sectional'; 58 | data = load('LabourForce.mat'); 59 | data_mat = data.data; 60 | if(Normalized) 61 | norm_col = [3,4,5,6]; 62 | data_mat(:,norm_col) = (data_mat(:,norm_col)-mean(data_mat(:,norm_col)))./std(data_mat(:,norm_col)); 63 | end 64 | 65 | % RealizedLibrary data 66 | case 'RealizedLibrary' 67 | datatype = 'TimeSeries'; 68 | data = load('RealizedLibrary.mat'); 69 | % If length is specified 70 | if(isempty(Index)) 71 | error('At least one index must be specified!') 72 | else 73 | data_mat = data.(Index).open_to_close*100; 74 | if Length > 0 75 | T = Length; 76 | if Length <= length(data_mat) 77 | data_mat = data_mat(end-T+1:end); 78 | else 79 | error('The Length argument must be smaller than the lenght of the time series!') 80 | end 81 | end 82 | end 83 | 84 | if(~isempty(RealizedMeasure)) 85 | num_obs = length(data_mat); 86 | data_out.return = data_mat; 87 | if iscell(RealizedMeasure) 88 | num_realized = length(RealizedMeasure); 89 | for i = 1:num_realized 90 | data_out.(RealizedMeasure{i}) = data.(Index).(RealizedMeasure{i})(end-num_obs+1:end)*100^2; 91 | end 92 | else 93 | data_out.(RealizedMeasure) = data.(Index).(RealizedMeasure)(end-num_obs+1:end)*100^2; 94 | end 95 | else 96 | data_out = data_mat; 97 | end 98 | end 99 | 100 | %% Check additional options 101 | % If a column of 1 is added to the matrix X of cross-sectional data (default) 102 | if strcmp(datatype,'Cross-Sectional') && Intercept 103 | data_mat = [ones(size(data_mat,1),1),data_mat]; 104 | VarNames = ['Intercept',data.VarNames]; 105 | data_out = data_mat; 106 | if strcmp(Type,'Table') 107 | data_table = array2table(data_mat); 108 | data_table.Properties.VariableNames = VarNames; 109 | data_out = data_table; 110 | end 111 | else 112 | data_out = data_mat; 113 | end 114 | 115 | end 116 | 117 | -------------------------------------------------------------------------------- /VBLab/Utilities/trainTestSplit.m: -------------------------------------------------------------------------------- 1 | function [outputArg1,outputArg2] = trainTestSplit(inputArg1,inputArg2) 2 | %TRAINTESTSPLIT Summary of this function goes here 3 | % Detailed explanation goes here 4 | outputArg1 = inputArg1; 5 | outputArg2 = inputArg2; 6 | end 7 | 8 | -------------------------------------------------------------------------------- /VBLab/Utilities/utils_FNNInitialize.m: -------------------------------------------------------------------------------- 1 | function weights = utils_FNNInitialize(layers) 2 | %NNINITIALIZE Summary of this function goes here 3 | % layers: vector of doubles, each number specifing the amount of 4 | % nodes in a layer of the network. 5 | % 6 | % weights: cell array of weight matrices specifing the 7 | % translation from one layer of the network to the next. 8 | % 9 | % Copyright 2018 Minh-Ngoc Tran (minh-ngoc.tran@sydney.edu.au) and Nghia 10 | % Nguyen (nghia.nguyen@sydney.edu.au) 11 | % 12 | % http://www.xxx.com 13 | % 14 | % Version: 1.0 15 | % LAST UPDATE: April, 2018 16 | 17 | weights = cell(1, length(layers)-1); 18 | 19 | for i = 1:length(layers)-1 20 | % Using random weights from -b to b 21 | b = sqrt(6)/(layers(i)+layers(i+1)); 22 | if i==1 23 | weights{i} = rand(layers(i+1),layers(i))*2*b - b; % Input layer already have bias 24 | else 25 | weights{i} = rand(layers(i+1),layers(i)+1)*2*b - b; % 1 bias in input layer 26 | end 27 | end 28 | 29 | end 30 | 31 | -------------------------------------------------------------------------------- /VBLab/Utilities/utils_errorMsg.m: -------------------------------------------------------------------------------- 1 | function msg_out = utils_errorMsg(identifier) 2 | %UTILS_ERRORMSG Define custom error/warning messages for exceptions 3 | % UTILS_ERRORMSG = (IDENTIFIER) extract message for input indentifier 4 | % 5 | % 6 | % Copyright 2021 Nguyen (nghia.nguyen@sydney.edu.au) 7 | % 8 | % https://github.com/VBayesLab/VBLab 9 | % 10 | % Version: 1.0 11 | % LAST UPDATE: Feb, 2021 12 | 13 | switch identifier 14 | case 'vbayeslab:TooFewInputs' 15 | msg_out = 'At least two arguments are specified'; 16 | case 'vbayeslab:InputSizeMismatchX' 17 | msg_out = 'X and Y must have the same number of observations'; 18 | case 'vbayeslab:InputSizeMismatchY' 19 | msg_out = 'Y must be a single column vector'; 20 | case 'vbayeslab:ArgumentMustBePair' 21 | msg_out = 'Optinal arguments must be pairs'; 22 | case 'vbayeslab:ResponseMustBeBinary' 23 | msg_out = 'Two level categorical variable required'; 24 | case 'vbayeslab:DistributionMustBeBinomial' 25 | msg_out = 'Binomial distribution option required'; 26 | case 'vbayeslab:MustSpecifyActivationFunction' 27 | msg_out = 'Activation function type requied'; 28 | case 'vbayeslab:InitVectorMisMatched' 29 | msg_out = 'The length of the initial values must equal to number of model parameters'; 30 | end 31 | end 32 | 33 | -------------------------------------------------------------------------------- /VBLab/Utilities/utils_gen_Sobol.m: -------------------------------------------------------------------------------- 1 | %genertate Sobol Sequence 2 | function X1 = utils_gen_Sobol(m,s) 3 | N = pow2(m); % Number of points; 4 | cmax = 52; % number of digits of generated points 5 | 6 | N = pow2(m); % Number of points; 7 | P = sobolset(s); % Get Sobol sequence; 8 | P = scramble(P,'MatousekAffineOwen'); % Scramble Sobol points; 9 | X1 = net(P,N); 10 | 11 | X1 = X1'; 12 | -------------------------------------------------------------------------------- /VBLab/Utilities/utils_itril.m: -------------------------------------------------------------------------------- 1 | function [I J] = utils_itril(sz, k) 2 | % function [I J] = utils_itril(sz, k) % OR 3 | % I = itril(sz, k) 4 | % 5 | % Return the subindices [I J] (or linear indices I if single output call) 6 | % in the purpose of extracting an lower triangular part of the matrix of 7 | % the size SZ. Input k is optional shifting. For k=0, extract from the main 8 | % diagonal. For k>0 -> above the diagonal, k<0 -> below the diagonal 9 | % 10 | % This returnd same as [...] = find(tril(ones(sz),k)) 11 | % - Output is a column and sorted with respect to linear indice 12 | % - No intermediate matrix is generated, that could be useful for large 13 | % size problem 14 | % - Mathematically, A(itril(size(A)) is called (lower) "half-vectorization" 15 | % of A 16 | % 17 | % Example: 18 | % 19 | % A = [ 7 5 4 20 | % 4 2 3 21 | % 9 1 9 22 | % 3 5 7 ] 23 | % 24 | % I = itril(size(A)) % gives [1 2 3 4 6 7 8 11 12]' 25 | % A(I) % gives [7 4 9 3 2 1 5 9 7]' OR A(tril(A)>0) 26 | % 27 | % Author: Bruno Luong 28 | % Date: 21/March/2009 29 | 30 | if isscalar(sz) 31 | sz = [sz sz]; 32 | end 33 | m=sz(1); 34 | n=sz(2); 35 | 36 | % Main diagonal by default 37 | if nargin<2 38 | k=0; 39 | end 40 | 41 | nc = min(n,m+k); % number of columns of the triangular part 42 | lo = max((1:nc).'-k,1); % lower row indice for each column 43 | hi = m + zeros(nc,1); % upper row indice for each column 44 | 45 | if isempty(lo) 46 | I = zeros(0,1); 47 | J = zeros(0,1); 48 | else 49 | c=cumsum([0; hi-lo]+1); % cumsum of the length 50 | I = accumarray(c(1:end-1), (lo-[0; hi(1:end-1)]-1), ... 51 | [c(end)-1 1]); 52 | I = cumsum(I+1); % row indice 53 | J = cumsum(accumarray(c,1)); 54 | J = J(1:end-1); % column indice 55 | end 56 | 57 | if nargout<2 58 | % convert to linear indices 59 | I = sub2ind([m n], I, J); 60 | end 61 | 62 | end % itril 63 | 64 | -------------------------------------------------------------------------------- /VBLab/Utilities/utils_itriu.m: -------------------------------------------------------------------------------- 1 | function [I J] = utils_itriu(sz, k) 2 | % function [I J] = itriu(sz) % OR 3 | % I = itriu(sz) OR 4 | % 5 | % Return the subindices [I J] (or linear indices I if single output call) 6 | % in the purpose of extracting an upper triangular part of the matrix of 7 | % the size SZ. Input k is optional shifting. For k=0, extract from the main 8 | % diagonal. For k>0 -> above the diagonal, k<0 -> below the diagonal 9 | % 10 | % This returnd same as [...] = find(triu(ones(sz),k)) 11 | % - Output is a column and sorted with respect to linear indice 12 | % - No intermediate matrix is generated, that could be useful for large 13 | % size problem 14 | % - Mathematically, A(itriu(size(A)) is called (upper) "half-vectorization" 15 | % of A 16 | % 17 | % Example: 18 | % 19 | % A = [ 7 5 4 20 | % 4 2 3 21 | % 9 1 9 22 | % 3 5 7 ] 23 | % 24 | % I = itriu(size(A)) % gives [1 5 6 9 10 11]' 25 | % A(I) % gives [7 5 2 4 3 9]' OR A(triu(A)>0) 26 | % 27 | % Author: Bruno Luong 28 | % Date: 21/March/2009 29 | 30 | if isscalar(sz) 31 | sz = [sz sz]; 32 | end 33 | m=sz(1); 34 | n=sz(2); 35 | 36 | % Main diagonal by default 37 | if nargin<2 38 | k=0; 39 | end 40 | 41 | nc = n-max(k,0); % number of columns of the triangular part 42 | lo = ones(nc,1); % lower row indice for each column 43 | hi = min((1:nc).'-min(k,0),m); % upper row indice for each column 44 | 45 | if isempty(lo) 46 | I = zeros(0,1); 47 | J = zeros(0,1); 48 | else 49 | c=cumsum([0; hi-lo]+1); % cumsum of the length 50 | I = accumarray(c(1:end-1), (lo-[0; hi(1:end-1)]-1), ... 51 | [c(end)-1 1]); 52 | I = cumsum(I+1); % row indice 53 | J = accumarray(c,1); 54 | J(1) = 1 + max(k,0); % The row indices starts from this value 55 | J = cumsum(J(1:end-1)); % column indice 56 | end 57 | 58 | if nargout<2 59 | % convert to linear indices 60 | I = sub2ind([m n], I, J); 61 | end 62 | 63 | end % itriu 64 | 65 | -------------------------------------------------------------------------------- /VBLab/Utilities/utils_jitChol.m: -------------------------------------------------------------------------------- 1 | function [B_var] = utils_jitChol(B_var) 2 | %keyboard 3 | [R,p]=chol(B_var); 4 | if p>0 5 | min_eig=min(eig(B_var)); 6 | d=size(B_var,1); 7 | delta=max(0,-2*min_eig+10^(-5)).*eye(d); 8 | B_var=B_var+delta; 9 | else 10 | B_var=B_var; 11 | end 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | end 22 | % if nargin < 2 23 | % maxTries = 1000; 24 | % end 25 | % n=size(K,1); % no. of input samples 26 | % e=min(eig(K)); % minimum eigenvalue 27 | % jitter=0; % amount if jitter (noise) added to the diagonal 28 | % L=[]; 29 | % 30 | % for i=1:maxTries 31 | % try 32 | % L=chol(K,'lower'); 33 | % catch 34 | % K(1:(n+1):end)=K(1:(n+1):end)+e; 35 | % jitter=jitter+e; 36 | % e=e*10; 37 | % continue; 38 | % end 39 | % break; 40 | % end 41 | % 42 | % K1 = K; 43 | 44 | % if isempty(L) %if nothing was assigned in previous step, 45 | % K(1:(n+1):end)=K(1:(n+1):end)-jitter; 46 | % e=1e-10; jitter=0; %reset parameters 47 | % l=max(diag(K)); K=K/l; 48 | % for i=1:maxTries 49 | % try 50 | % L=chol(K,'lower'); 51 | % catch 52 | % K(1:(n+1):end)=K(1:(n+1):end)+e; 53 | % jitter=jitter+e; 54 | % e=e*10; 55 | % continue; 56 | % end 57 | % L=sqrt(l)*L; 58 | % break; 59 | % end 60 | % end 61 | -------------------------------------------------------------------------------- /VBLab/Utilities/utils_logNormalpdf.m: -------------------------------------------------------------------------------- 1 | function logNormal = utils_logNormalpdf(theta,mu,sigma2) 2 | 3 | logNormal = -0.5*log(2*pi)-0.5*log(sigma2)-0.5*(theta-mu).^2/sigma2; 4 | 5 | end 6 | 7 | -------------------------------------------------------------------------------- /VBLab/Utilities/utils_logit.m: -------------------------------------------------------------------------------- 1 | function output = utils_logit(input) 2 | %UTILS_LOGIT Summary of this function goes here 3 | % Detailed explanation goes here 4 | output = log(1./(1-input)); 5 | end 6 | 7 | -------------------------------------------------------------------------------- /VBLab/Utilities/utils_normrnd_qmc.m: -------------------------------------------------------------------------------- 1 | function x = utils_normrnd_qmc(S,d) 2 | % generate Sxd matrix of standard normal numbers by RQMC 3 | rqmc = utils_rqmc_rnd(S,d); 4 | rqmc = rqmc(1:S,:); 5 | x = norminv(rqmc); 6 | end -------------------------------------------------------------------------------- /VBLab/Utilities/utils_plotShrinkage.m: -------------------------------------------------------------------------------- 1 | function plotShrinkage(ShrinkageCoef,opt) 2 | %PLOTSHRINKAGE Plot shrinkage coefficient of Group Lasso regularization 3 | % 4 | % 5 | % Copyright 2018 Minh-Ngoc Tran (minh-ngoc.tran@sydney.edu.au) and Nghia 6 | % Nguyen (nghia.nguyen@sydney.edu.au) 7 | % 8 | % http://www.xxx.com 9 | % 10 | % Version: 1.0 11 | % LAST UPDATE: April, 2018 12 | 13 | % Do not plot intercept coefficient 14 | % ShrinkageCoef = ShrinkageCoef(2:end,:); 15 | 16 | TextTitle = opt.title; 17 | labelX = opt.labelX; 18 | labelY = opt.labelY; 19 | linewidth = opt.linewidth; 20 | color = opt.color; 21 | 22 | numCoeff = size(ShrinkageCoef,1); % Number of shrinkage coefficients 23 | fontsize = 13; 24 | 25 | % Define default settings 26 | if(isempty(TextTitle)) 27 | TextTitle = 'Shrinakge Coefficients'; 28 | end 29 | if(isempty(labelX)) 30 | labelX = 'Iteration'; 31 | end 32 | 33 | % Plot 34 | plot(ShrinkageCoef','LineWidth',linewidth); 35 | grid on 36 | title(TextTitle,'FontSize', 20) 37 | xlabel(labelX,'FontSize', 15) 38 | ylabel(labelY,'FontSize', 15) 39 | Ytext = ShrinkageCoef(:,end); % Y coordination of text, different for coefficients 40 | Xtext = size(ShrinkageCoef,2); % X coordination of text, same for all coefficients 41 | for i=1:numCoeff 42 | text(Xtext,Ytext(i),['\gamma_{',num2str(i),'}'],'fontsize',fontsize) 43 | end 44 | end 45 | 46 | -------------------------------------------------------------------------------- /VBLab/Utilities/utils_relu.m: -------------------------------------------------------------------------------- 1 | function out = utils_relu(in) 2 | %UTILS_RELU 3 | out = max(0,in); 4 | end 5 | 6 | -------------------------------------------------------------------------------- /VBLab/Utilities/utils_rqmc_rnd.m: -------------------------------------------------------------------------------- 1 | function f = utils_rqmc_rnd(S,d) 2 | % generate a matrix of RQMC of size S times d 3 | max_sobol = 1111; 4 | r = floor(d/max_sobol); 5 | s = d-r*max_sobol; 6 | if r>=1 7 | f = utils_gen_Sobol(ceil(log2(S)),max_sobol)'; 8 | for i = 2:r 9 | f = [f,utils_gen_Sobol(ceil(log2(S)),max_sobol)']; 10 | end 11 | f = [f,utils_gen_Sobol(ceil(log2(S)),s)']; 12 | else 13 | f = utils_gen_Sobol(ceil(log2(S)),d)'; 14 | end 15 | 16 | end 17 | -------------------------------------------------------------------------------- /VBLab/Utilities/utils_rs_multinomial.m: -------------------------------------------------------------------------------- 1 | function indx = utils_rs_multinomial(w) 2 | 3 | N = length(w); % number of particles 4 | indx = zeros(1,N); % preallocate 5 | Q = cumsum(w); % cumulative sum 6 | u = sort(rand(1,N)); % random numbers 7 | 8 | j = 1; 9 | for i=1:N 10 | while (Q(j) value is a cell array of distribution name and parameters 12 | % 'Shrinkage' -> value is an NxD array of shrinkage parameters 13 | % 'Interval' -> value is a 1D array of prediction values 14 | % 'ROC' -> value iss a 1D array of prediction 15 | 16 | 17 | if nargin < 2 18 | error(utils_errorMsg('vbayeslab:TooFewInputs')); 19 | end 20 | 21 | %% Parse additional options 22 | paramNames = {'Title' 'Xlabel' 'Ylabel' 'LineWidth',... 23 | 'Color' 'IntervalStyle' 'Nsample' 'Ordering',... 24 | 'yTest' 'Legend' 'Subplot' 'VarNames' }; 25 | 26 | paramDflts = {NaN NaN NaN 2,... 27 | 'red' 'shade' 50 'ascend',... 28 | NaN NaN NaN NaN}; 29 | 30 | [TextTitle,labelX,labelY,linewidth,... 31 | color,style,npoint,order,... 32 | yTest,Textlegend,VarNames] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:}); 33 | 34 | % Store plot options to a structure 35 | opt.title = TextTitle; 36 | opt.labelX = labelX; 37 | opt.labelY = labelY; 38 | opt.linewidth = linewidth; 39 | opt.color = color; 40 | 41 | switch type 42 | % Plot distribution density 43 | % value must be a cell array with distribution name and 44 | % distribution parameters 45 | case 'Density' 46 | eval(['dist=',value{1},';']); % Use distribution name as a distribution object 47 | params = value{2}; % Distribution parameters 48 | dist.plotPdf(params); 49 | 50 | % Plot shrinkage parameters of a deepGLM model 51 | case 'Shrinkage' 52 | plotShrinkage(value,opt); 53 | 54 | % Plot prediction interval for continuous output 55 | case 'Interval' 56 | yhat = value.yhatMatrix; 57 | yhatInterval = value.interval; 58 | predMean = mean(yhat); 59 | % If test data have more than 100 rows, extract randomly 100 points to draw 60 | if(length(predMean)>=npoint) 61 | idx = randperm(length(yhatInterval),npoint); 62 | intervalPlot = yhatInterval(idx,:); 63 | yhatMeanPlot = predMean(idx)'; 64 | if(~isempty(yTest)) 65 | ytruePlot = yTest(idx)'; 66 | end 67 | else 68 | yhatMeanPlot = predMean'; 69 | intervalPlot = yhatInterval; 70 | ytruePlot = yTest; 71 | end 72 | % Sort data 73 | [yhatMeanPlot,sortIdx] = sort(yhatMeanPlot,order); 74 | intervalPlot = intervalPlot(sortIdx,:); 75 | if(isempty(yTest)) 76 | ytruePlot = []; 77 | else 78 | ytruePlot = ytruePlot(sortIdx); 79 | end 80 | plotInterval(yhatMeanPlot,intervalPlot,opt,... 81 | 'ytrue',ytruePlot,... 82 | 'Style',style); 83 | 84 | % Plot ROC curve for binary outcomes 85 | % Value is prediction class labels. Could be a 1D array (single ROC) 86 | % or cell array of 1D array (multiple ROC) 87 | % The 'Ytest' argument must be provided 88 | case 'ROC' 89 | if(~isnumeric(yTest)) 90 | disp('Target should be a column of binary responses!') 91 | return 92 | else 93 | % Plot single ROC 94 | if(size(value,2)==1) 95 | [tpr,fpr,~] = roc(yTest',value'); 96 | plot(fpr,tpr,'LineWidth',linewidth); 97 | grid on 98 | title(TextTitle,'FontSize',20); 99 | xlabel(labelX,'FontSize',15); 100 | ylabel(labelY,'FontSize',15); 101 | % Plot multiple ROC 102 | else 103 | tpr = cell(1,size(value,2)); 104 | fpr = cell(1,size(value,2)); 105 | for i=1:size(Pred,2) 106 | [tpr{i},fpr{i},~] = roc(yTest',value(:,i)'); 107 | plot(fpr{i},tpr{i},'LineWidth',linewidth); 108 | grid on 109 | hold on 110 | end 111 | title(TextTitle,'FontSize',20); 112 | xlabel(labelX,'FontSize',15); 113 | ylabel(labelY,'FontSize',15); 114 | legend(Textlegend{1},Textlegend{2}); 115 | end 116 | end 117 | end 118 | end 119 | 120 | -------------------------------------------------------------------------------- /VBLab/VB/CGVB.m: -------------------------------------------------------------------------------- 1 | classdef CGVB < VBayesLab 2 | %CGVB Summary of this class goes here 3 | % Detailed explanation goes here 4 | 5 | properties 6 | GradWeight1 % Momentum weight 1 7 | GradWeight2 % Momentum weight 2 8 | end 9 | 10 | methods 11 | function obj = CGVB(mdl,data,varargin) 12 | %CGVB Construct an instance of this class 13 | % Detailed explanation goes here 14 | obj.Method = 'CGVB'; 15 | obj.GradWeight1 = 0.9; 16 | obj.GradWeight2 = 0.9; 17 | 18 | % Parse additional options 19 | if nargin > 2 20 | paramNames = {'NumSample' 'LearningRate' 'GradWeight1' 'GradWeight2' ... 21 | 'MaxIter' 'MaxPatience' 'WindowSize' 'Verbose' ... 22 | 'InitMethod' 'StdForInit' 'Seed' 'MeanInit' ... 23 | 'SigInitScale' 'LBPlot' 'GradientMax' 'AutoDiff' ... 24 | 'HFuntion' 'NumParams' 'DataTrain' 'Setting' ... 25 | 'StepAdaptive' 'SaveParams'}; 26 | paramDflts = {obj.NumSample obj.LearningRate obj.GradWeight1 obj.GradWeight2 ... 27 | obj.MaxIter obj.MaxPatience obj.WindowSize obj.Verbose ... 28 | obj.InitMethod obj.StdForInit obj.Seed obj.MeanInit ... 29 | obj.SigInitScale obj.LBPlot obj.GradientMax obj.AutoDiff ... 30 | obj.HFuntion obj.NumParams obj.DataTrain obj.Setting ... 31 | obj.StepAdaptive obj.SaveParams}; 32 | 33 | [obj.NumSample,... 34 | obj.LearningRate,... 35 | obj.GradWeight1,... 36 | obj.GradWeight2,... 37 | obj.MaxIter,... 38 | obj.MaxPatience,... 39 | obj.WindowSize,... 40 | obj.Verbose,... 41 | obj.InitMethod,... 42 | obj.StdForInit,... 43 | obj.Seed,... 44 | obj.MeanInit,... 45 | obj.SigInitScale,... 46 | obj.LBPlot,... 47 | obj.GradientMax,... 48 | obj.AutoDiff,... 49 | obj.HFuntion,... 50 | obj.NumParams,... 51 | obj.DataTrain,... 52 | obj.Setting,... 53 | obj.StepAdaptive,... 54 | obj.SaveParams] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:}); 55 | end 56 | 57 | % Check if model object or function handle is provided 58 | if (isobject(mdl)) % If model object is provided 59 | obj.Model = mdl; 60 | obj.ModelToFit = obj.Model.ModelName; % Set model name if model is specified 61 | else % If function handle is provided 62 | obj.GradHFuntion = mdl; 63 | end 64 | 65 | % Main function to run CGVB 66 | obj.Post = obj.fit(data); 67 | end 68 | 69 | %% VB main function 70 | function Post = fit(obj,data) 71 | 72 | % Extract model object if provided 73 | if (~isempty(obj.Model)) 74 | model = obj.Model; 75 | d_theta = model.NumParams; % Number of parameters 76 | else % If model object is not provided, number of parameters must be provided 77 | if (~isempty(obj.NumParams)) 78 | d_theta = obj.NumParams; 79 | else 80 | error('Number of model parameters have to be specified!') 81 | end 82 | end 83 | 84 | % Unload training parameters (only for convenience) 85 | std_init = obj.StdForInit; 86 | eps0 = obj.LearningRate; 87 | S = obj.NumSample; 88 | ini_mu = obj.MeanInit; 89 | window_size = obj.WindowSize; 90 | max_patience = obj.MaxPatience; 91 | init_scale = obj.SigInitScale; 92 | tau_threshold = obj.StepAdaptive; 93 | max_iter = obj.MaxIter; 94 | lb_plot = obj.LBPlot; 95 | max_grad = obj.GradientMax; 96 | momentum_beta1 = obj.GradWeight1; 97 | momentum_beta2 = obj.GradWeight2; 98 | grad_hfunc = obj.GradHFuntion; 99 | setting = obj.Setting; 100 | verbose = obj.Verbose; 101 | save_params = obj.SaveParams; 102 | 103 | % Store variational mean in each iteration (if specified) 104 | if(save_params) 105 | params_iter = zeros(max_iter,d_theta); 106 | end 107 | 108 | % Initialization 109 | iter = 0; 110 | patience = 0; 111 | stop = false; 112 | LB_smooth = 0; 113 | lambda_best = []; 114 | 115 | % Number of variational parameters 116 | d_lambda = d_theta + d_theta*(d_theta+1)/2; 117 | 118 | % Initialization of mu 119 | % If initial parameters are not specified, then randomly 120 | % initialize variational parameters 121 | if isempty(ini_mu) 122 | mu = normrnd(0,std_init,d_theta,1); 123 | else % If initial parameters are provided 124 | if (length(ini_mu) ~= d_theta) 125 | error(utils_errorMsg('vbayeslab:InitVectorMisMatched')) 126 | else 127 | mu = reshape(ini_mu,d_theta,1); % Must be a colums vector 128 | end 129 | end 130 | 131 | % Initialize variational parameters 132 | L = init_scale*eye(d_theta); 133 | lambda = [mu;vech(L)]; 134 | 135 | % Pre-allocation 136 | grad_LB = zeros(S,d_lambda); 137 | h_lambda = zeros(S,1); 138 | rqmc = normrnd(0,1,S,d_theta); 139 | 140 | for s = 1:S 141 | % Parameters in Normal distribution 142 | varepsilon = rqmc(s,:)'; 143 | theta = mu+L*varepsilon; % Theta -> Dx1 column 144 | 145 | % Gradient of q_lambda. This function is independent to the 146 | % model 147 | [grad_log_q,log_q] = obj.log_q_grad(theta,mu,L); 148 | 149 | % If handle of function to compute gradient of h(theta), 150 | % then a model object with method of calculating gradient 151 | % of h(theta) must be provided. 152 | if isempty(grad_hfunc) 153 | if (~isempty(obj.Model)) 154 | % Call the hFunctionGrad of the model to compute 155 | % h(theta) and gradient of h(theta) 156 | [grad_h_theta,h_theta] = model.hFunctionGrad(data,theta); 157 | else 158 | error('An model object of handle of function to compute gradient of h(theta) must be provided!') 159 | end 160 | else 161 | % If user provide function to directly compute gradient 162 | % h theta then use it 163 | [grad_h_theta,h_theta] = grad_hfunc(data,theta,setting); 164 | end 165 | 166 | % Make sure gradient is a column 167 | grad_h_theta = reshape(grad_h_theta,length(grad_h_theta),1); 168 | 169 | % Compute h_lambda and gradient of h_lambda 170 | h_lambda(s) = h_theta - log_q; 171 | grad_h_lambda = grad_h_theta - grad_log_q ; 172 | 173 | % Gradient of lowerbound 174 | grad_LB(s,:) = [grad_h_lambda;utils_vech(grad_h_lambda*(varepsilon'))]'; 175 | 176 | end 177 | grad_LB = mean(grad_LB)'; 178 | LB = mean(h_lambda); 179 | 180 | % Gradient clipping to avoid exploded gradient 181 | grad_norm = norm(grad_LB); 182 | if norm(grad_LB) > max_grad 183 | grad_LB = (max_grad/grad_norm)*grad_LB; 184 | end 185 | 186 | g_adaptive = grad_LB; 187 | v_adaptive = g_adaptive.^2; 188 | g_bar_adaptive = g_adaptive; 189 | v_bar_adaptive = v_adaptive; 190 | 191 | % Run main VB iterations 192 | while ~stop 193 | 194 | iter = iter+1; 195 | mu = lambda(1:d_theta); 196 | L = utils_vechinv(lambda(d_theta+1:end),2); 197 | 198 | grad_LB = zeros(S,d_lambda); 199 | h_lambda = zeros(S,1); 200 | rqmc = normrnd(0,1,S,d_theta); 201 | for s = 1:S 202 | % Parameters in Normal distribution 203 | varepsilon = rqmc(s,:)'; 204 | theta = mu+L*varepsilon; 205 | 206 | % Gradient of q_lambda. This function is independent to the 207 | % model 208 | [grad_log_q,log_q] = obj.log_q_grad(theta,mu,L); 209 | 210 | % If handle of function to compute gradient of h(theta), 211 | % then a model object with method of calculating gradient 212 | % of h(theta) must be provided. 213 | if isempty(grad_hfunc) 214 | if (~isempty(obj.Model)) 215 | % Call the hFunctionGrad of the model to compute 216 | % h(theta) and gradient of h(theta) 217 | [grad_h_theta,h_theta] = model.hFunctionGrad(data,theta); 218 | else 219 | error('An model object of handle of function to compute gradient of h(theta) must be provided!') 220 | end 221 | else 222 | % If user provide function to directly compute gradient 223 | % h theta then use it 224 | [grad_h_theta,h_theta] = grad_hfunc(data,theta,setting); 225 | end 226 | 227 | % Make sure gradient is a column 228 | grad_h_theta = reshape(grad_h_theta,length(grad_h_theta),1); 229 | 230 | % Compute h_lambda and gradient of h_lambda 231 | h_lambda(s) = h_theta - log_q; 232 | grad_h_lambda = grad_h_theta - grad_log_q ; 233 | 234 | % Gradient of lowerbound 235 | grad_LB(s,:) = [grad_h_lambda;utils_vech(grad_h_lambda*(varepsilon'))]'; 236 | end 237 | 238 | grad_LB = mean(grad_LB)'; 239 | 240 | % gradient clipping 241 | grad_norm = norm(grad_LB); 242 | if norm(grad_LB)>max_grad 243 | grad_LB = (max_grad/grad_norm)*grad_LB; 244 | end 245 | 246 | g_adaptive = grad_LB; 247 | v_adaptive = g_adaptive.^2; 248 | g_bar_adaptive = momentum_beta1*g_bar_adaptive+(1-momentum_beta1)*g_adaptive; 249 | v_bar_adaptive = momentum_beta2*v_bar_adaptive+(1-momentum_beta2)*v_adaptive; 250 | 251 | % After a specified number of iterations, make the step 252 | % size smaller. This can be modified to implement more 253 | % sotiphicated adaptive learning rate methods. 254 | if iter>=tau_threshold 255 | stepsize = eps0*tau_threshold/iter; 256 | else 257 | stepsize = eps0; 258 | end 259 | 260 | % Update new lambda 261 | lambda = lambda + stepsize*g_bar_adaptive./sqrt(v_bar_adaptive); 262 | 263 | % Estimate the lowerbound at the current iteration 264 | LB(iter) = mean(h_lambda); 265 | 266 | % Smooth the lowerbound 267 | if iter>=window_size 268 | LB_smooth(iter-window_size+1) = mean(LB(iter-window_size+1:iter)); 269 | end 270 | 271 | % Check for early stopping 272 | if (iter>window_size)&&(LB_smooth(iter-window_size+1)>=max(LB_smooth)) 273 | lambda_best = lambda; 274 | patience = 0; 275 | else 276 | patience = patience+1; 277 | end 278 | 279 | if (patience>max_patience)||(iter>max_iter) 280 | stop = true; 281 | end 282 | 283 | % Display training information 284 | if(verbose) 285 | if iter> window_size 286 | disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB_smooth(iter-window_size))]) 287 | else 288 | disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB(iter))]) 289 | end 290 | end 291 | 292 | % If users want to save variational mean in each iteration 293 | % Only use when debuging code 294 | if(save_params) 295 | params_iter(iter,:) = mu; 296 | end 297 | end 298 | 299 | % Store output 300 | if(save_params) 301 | Post.muIter = params_iter(1:iter-1,:); 302 | end 303 | 304 | % If the algorithm stops too early 305 | if(isempty(lambda_best)) 306 | lambda_best = lambda; 307 | end 308 | 309 | % Store final results 310 | Post.LB_smooth = LB_smooth; 311 | Post.LB = LB; 312 | Post.lambda = lambda_best; 313 | Post.mu = lambda(1:d_theta); 314 | Post.L = utils_vechinv(lambda(d_theta+1:end),2); 315 | Post.Sigma = L*(L'); 316 | Post.sigma2 = diag(Post.Sigma); 317 | 318 | % If users want to plot the lowerbound 319 | if(lb_plot) 320 | obj.plot_lb(LB_smooth); 321 | end 322 | 323 | end 324 | 325 | %% Gradient of log_q_lambda. This is independent to the model 326 | % Log pdf of multivariate normal distribution 327 | function [grad_log_q,log_q] = log_q_grad(obj,theta,mu,L) 328 | d = length(theta); 329 | Sigma = L*(L'); 330 | log_q = -d/2*log(2*pi)-1/2*log(det(Sigma))-1/2*(theta-mu)'*(Sigma\(theta-mu)); 331 | grad_log_q = -Sigma\(theta-mu); 332 | end 333 | end 334 | end 335 | 336 | -------------------------------------------------------------------------------- /VBLab/VB/MGVB.m: -------------------------------------------------------------------------------- 1 | classdef MGVB < VBayesLab 2 | %MVB Summary of this class goes here 3 | % Detailed explanation goes here 4 | 5 | properties 6 | GradClipInit % If doing gradient clipping at the beginning 7 | end 8 | 9 | methods 10 | function obj = MGVB(mdl,data,varargin) 11 | %MVB Construct an instance of this class 12 | % Detailed explanation goes here 13 | obj.Method = 'MGVB'; 14 | obj.GradWeight = 0.4; % Small gradient weight is better 15 | obj.GradClipInit = 0; % Sometimes we need to clip the gradient early 16 | 17 | % Parse additional options 18 | if nargin > 2 19 | paramNames = {'NumSample' 'LearningRate' 'GradWeight' 'GradClipInit' ... 20 | 'MaxIter' 'MaxPatience' 'WindowSize' 'Verbose' ... 21 | 'InitMethod' 'StdForInit' 'Seed' 'MeanInit' ... 22 | 'SigInitScale' 'LBPlot' 'GradientMax' ... 23 | 'NumParams' 'DataTrain' 'Setting' 'StepAdaptive' ... 24 | 'SaveParams'}; 25 | paramDflts = {obj.NumSample obj.LearningRate obj.GradWeight obj.GradClipInit ... 26 | obj.MaxIter obj.MaxPatience obj.WindowSize obj.Verbose ... 27 | obj.InitMethod obj.StdForInit obj.Seed obj.MeanInit ... 28 | obj.SigInitScale obj.LBPlot obj.GradientMax ... 29 | obj.NumParams obj.DataTrain obj.Setting obj.StepAdaptive ... 30 | obj.SaveParams}; 31 | 32 | [obj.NumSample,... 33 | obj.LearningRate,... 34 | obj.GradWeight,... 35 | obj.GradClipInit,... 36 | obj.MaxIter,... 37 | obj.MaxPatience,... 38 | obj.WindowSize,... 39 | obj.Verbose,... 40 | obj.InitMethod,... 41 | obj.StdForInit,... 42 | obj.Seed,... 43 | obj.MeanInit,... 44 | obj.SigInitScale,... 45 | obj.LBPlot,... 46 | obj.GradientMax,... 47 | obj.NumParams,... 48 | obj.DataTrain,... 49 | obj.Setting,... 50 | obj.StepAdaptive,... 51 | obj.SaveParams] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:}); 52 | end 53 | 54 | % Check if model object or function handle is provided 55 | if (isobject(mdl)) % If model object is provided 56 | obj.Model = mdl; 57 | obj.ModelToFit = obj.Model.ModelName; % Set model name if model is specified 58 | else % If function handle is provided 59 | obj.HFuntion = mdl; 60 | end 61 | 62 | % Main function to run MGVB 63 | obj.Post = obj.fit(data); 64 | end 65 | 66 | %% VB main function 67 | function Post = fit(obj,data) 68 | 69 | % Extract model object if provided 70 | if (~isempty(obj.Model)) 71 | model = obj.Model; 72 | d_theta = model.NumParams; % Number of parameters 73 | else % If model object is not provided, number of parameters must be provided 74 | if (~isempty(obj.NumParams)) 75 | d_theta = obj.NumParams; 76 | else 77 | error('Number of model parameters have to be specified!') 78 | end 79 | end 80 | 81 | % Extract sampling setting 82 | std_init = obj.StdForInit; 83 | eps0 = obj.LearningRate; 84 | S = obj.NumSample; 85 | ini_mu = obj.MeanInit; 86 | window_size = obj.WindowSize; 87 | max_patience = obj.MaxPatience; 88 | momentum_weight = obj.GradWeight; 89 | init_scale = obj.SigInitScale; 90 | stepsize_adapt = obj.StepAdaptive; 91 | max_iter = obj.MaxIter; 92 | lb_plot = obj.LBPlot; 93 | max_grad = obj.GradientMax; 94 | max_grad_init = obj.GradClipInit; 95 | hfunc = obj.HFuntion; 96 | setting = obj.Setting; 97 | verbose = obj.Verbose; 98 | save_params = obj.SaveParams; 99 | 100 | % Store variational mean in each iteration (if specified) 101 | if(save_params) 102 | params_iter = zeros(max_iter,d_theta); 103 | end 104 | 105 | % Initialization 106 | iter = 0; 107 | patience = 0; 108 | stop = false; 109 | LB_smooth = 0; 110 | 111 | % Initialization of mu 112 | % If initial parameters are not specified, then use some 113 | % initialization methods 114 | if isempty(ini_mu) 115 | mu = normrnd(0,std_init,d_theta,1); 116 | else % If initial parameters are provided 117 | mu = ini_mu; 118 | end 119 | 120 | Sig = init_scale*eye(d_theta); % Initialization of Sig 121 | c12 = zeros(1,d_theta+d_theta*d_theta); % Control variate, initilised to be all zero 122 | Sig_inv = eye(d_theta)/Sig; 123 | 124 | gra_log_q_lambda = zeros(S,d_theta+d_theta*d_theta); % Gradient of log_q 125 | grad_log_q_h_function = zeros(S,d_theta+d_theta*d_theta); % (gradient of log_q) x h(theta) 126 | grad_log_q_h_function_cv = zeros(S,d_theta+d_theta*d_theta); % Control_variate version: (gradient of log_q) x (h(theta)-c) 127 | 128 | rqmc = utils_normrnd_qmc(S,d_theta); % Generate standard normal numbers, using quasi-MC 129 | C_lower = chol(Sig,'lower'); 130 | 131 | for s = 1:S 132 | % Parameters in Normal distribution 133 | theta = mu + C_lower*rqmc(s,:)'; 134 | 135 | % If handle of function to compute h(theta) is not provided, 136 | % then a model object with method of calculating of h(theta) 137 | % must be provided. 138 | if isempty(hfunc) 139 | if (~isempty(obj.Model)) 140 | % Call the hFunction of the model to compute h(theta) 141 | h_theta = model.hFunction(data,theta); 142 | else 143 | error('An model object of handle of function to compute gradient of h(theta) must be provided!') 144 | end 145 | else 146 | % If user provide function to directly compute h(theta) 147 | % then use it 148 | h_theta = hfunc(data,theta,setting); 149 | end 150 | 151 | % Log q_lambda 152 | log_q_lambda = -d_theta/2*log(2*pi)-1/2*log(det(Sig))-1/2*(theta-mu)'*Sig_inv*(theta-mu); 153 | 154 | % h function 155 | h_function = h_theta - log_q_lambda; 156 | 157 | aux = Sig_inv*(theta-mu); 158 | gra_log_q_mu = aux; 159 | gra_log_q_Sig = -1/2*Sig_inv+1/2*aux*(aux'); 160 | gra_log_q_lambda(s,:) = [gra_log_q_mu;gra_log_q_Sig(:)]'; 161 | grad_log_q_h_function(s,:) = gra_log_q_lambda(s,:)*h_function; 162 | grad_log_q_h_function_cv(s,:) = gra_log_q_lambda(s,:).*(h_function-c12); 163 | end 164 | 165 | c12 = zeros(1,d_theta+d_theta*d_theta); 166 | for i = 1:d_theta+d_theta*d_theta 167 | aa = cov(grad_log_q_h_function(:,i),gra_log_q_lambda(:,i)); 168 | c12(i) = aa(1,2)/aa(2,2); 169 | end 170 | Y12 = mean(grad_log_q_h_function_cv)'; % Euclidiance gradient of lower bounf LB 171 | 172 | % Gradient clipping at the beginning 173 | if(max_grad_init>0) 174 | grad_norm = norm(Y12); 175 | norm_gradient_threshold = max_grad_init; 176 | if grad_norm>norm_gradient_threshold 177 | Y12 = (norm_gradient_threshold/grad_norm)*Y12; 178 | end 179 | end 180 | 181 | % To use manifold GVB for other models, all we need is Euclidiance gradient 182 | % of LB. All the other stuff below are model-independent. 183 | gradLB_mu = Sig*Y12(1:d_theta); % Natural gradient of LB w.r.t. mu 184 | gradLB_Sig = Sig*reshape(Y12(d_theta+1:end),d_theta,d_theta)*Sig; % Natural gradient of LB w.r.t. Sigma 185 | gradLB_Sig_momentum = gradLB_Sig; % Initialise momentum gradient for Sig 186 | gradLB_mu_momentum = gradLB_mu; % initialise momentum gradient for Sig 187 | 188 | % Prepare for the next iterations 189 | mu_best = mu; 190 | Sig_best = Sig; 191 | while ~stop 192 | 193 | iter = iter+1; 194 | if iter>stepsize_adapt 195 | stepsize = eps0*stepsize_adapt/iter; 196 | else 197 | stepsize = eps0; 198 | end 199 | Sig_old = Sig; 200 | Sig = obj.retraction_spd(Sig_old,gradLB_Sig_momentum,stepsize); % retraction to update Sigma 201 | mu = mu + stepsize*gradLB_mu_momentum; % update mu 202 | 203 | gra_log_q_lambda = zeros(S,d_theta + d_theta*d_theta); 204 | grad_log_q_h_function = zeros(S,d_theta + d_theta*d_theta); 205 | grad_log_q_h_function_cv = zeros(S,d_theta + d_theta*d_theta); % control_variate 206 | 207 | lb_log_h = zeros(S,1); 208 | Sig_inv = eye(d_theta)/Sig; 209 | rqmc = utils_normrnd_qmc(S,d_theta); 210 | C_lower = chol(Sig,'lower'); 211 | for s = 1:S 212 | % Parameters in Normal distribution 213 | theta = mu + C_lower*rqmc(s,:)'; 214 | 215 | % If handle of function to compute h(theta) is not provided, 216 | % then a model object with method of calculating of h(theta) 217 | % must be provided. 218 | if isempty(hfunc) 219 | if (~isempty(obj.Model)) 220 | % Call the hFunction of the model to compute h(theta) 221 | h_theta = model.hFunction(data,theta); 222 | else 223 | error('An model object of handle of function to compute gradient of h(theta) must be provided!') 224 | end 225 | else 226 | % If user provide function to directly compute h(theta) 227 | % then use it 228 | h_theta = hfunc(data,theta,setting); 229 | end 230 | 231 | % log q_lambda 232 | log_q_lambda = -d_theta/2*log(2*pi)-1/2*log(det(Sig))-1/2*(theta-mu)'*Sig_inv*(theta-mu); 233 | 234 | h_function = h_theta - log_q_lambda; 235 | 236 | % To compute the lowerbound 237 | lb_log_h(s) = h_function; 238 | 239 | aux = Sig_inv*(theta-mu); 240 | gra_log_q_mu = aux; 241 | gra_log_q_Sig = -1/2*Sig_inv+1/2*aux*(aux'); 242 | gra_log_q_lambda(s,:) = [gra_log_q_mu;gra_log_q_Sig(:)]'; 243 | grad_log_q_h_function(s,:) = gra_log_q_lambda(s,:)*h_function; 244 | grad_log_q_h_function_cv(s,:) = gra_log_q_lambda(s,:).*(h_function-c12); 245 | end 246 | for i = 1:d_theta+d_theta*d_theta 247 | aa = cov(grad_log_q_h_function(:,i),gra_log_q_lambda(:,i)); 248 | c12(i) = aa(1,2)/aa(2,2); 249 | end 250 | Y12 = mean(grad_log_q_h_function_cv)'; 251 | 252 | % Clipping the gradient 253 | grad_norm = norm(Y12); 254 | norm_gradient_threshold = max_grad; 255 | if grad_norm > norm_gradient_threshold 256 | Y12 = (norm_gradient_threshold/grad_norm)*Y12; 257 | end 258 | 259 | gradLB_mu = Sig*Y12(1:d_theta); 260 | gradLB_Sig = Sig*reshape(Y12(d_theta+1:end),d_theta,d_theta)*Sig; 261 | 262 | zeta = obj.parallel_transport_spd(Sig_old,Sig,gradLB_Sig_momentum); % vector transport to move gradLB_Sig_momentum 263 | 264 | % From previous Sig_old to new point Sigma 265 | gradLB_Sig_momentum = momentum_weight*zeta+(1-momentum_weight)*gradLB_Sig; % update momentum grad for Sigma 266 | gradLB_mu_momentum = momentum_weight*gradLB_mu_momentum+(1-momentum_weight)*gradLB_mu; % update momentum grad for mu 267 | 268 | % Lower bound 269 | LB(iter) = mean(lb_log_h); 270 | 271 | % Smooth the lowerbound and store best results 272 | if iter>window_size 273 | LB_smooth(iter-window_size) = mean(LB(iter-window_size:iter)); % smooth out LB by moving average 274 | if LB_smooth(iter-window_size)>=max(LB_smooth) 275 | mu_best = mu; 276 | Sig_best = Sig; 277 | patience = 0; 278 | else 279 | patience = patience + 1; 280 | end 281 | end 282 | 283 | if (patience>max_patience)||(iter>max_iter) 284 | stop = true; 285 | end 286 | 287 | % Display training information 288 | if(verbose) 289 | if iter> window_size 290 | disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB_smooth(iter-window_size))]) 291 | else 292 | disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB(iter))]) 293 | end 294 | end 295 | 296 | % If users want to save variational mean in each iteration 297 | % Only use when debuging code 298 | if(save_params) 299 | params_iter(iter,:) = mu; 300 | end 301 | 302 | end 303 | 304 | % Store output 305 | if(save_params) 306 | Post.muIter = params_iter(1:iter-1,:); 307 | end 308 | 309 | % Store output 310 | Post.LB_smooth = LB_smooth; 311 | Post.LB = LB; 312 | Post.mu = mu_best; 313 | Post.Sigma = Sig_best; 314 | Post.sigma2 = diag(Post.Sigma); 315 | 316 | % Plot lowerbound 317 | if(lb_plot) 318 | obj.plot_lb(LB_smooth); 319 | end 320 | end 321 | 322 | %% 323 | function zeta = parallel_transport_spd(obj,X, Y, eta) 324 | E = sqrtm((Y/X)); 325 | zeta = E*eta*E'; 326 | end 327 | 328 | %% 329 | function Y = retraction_spd(obj,X, eta, t) 330 | teta = t*eta; 331 | symm = @(X) .5*(X+X'); 332 | Y = symm(X + teta + .5*teta*(X\teta)); 333 | [~,index] = chol(Y); 334 | iter = 1; 335 | max_iter = 5; 336 | while (index)&&(iter<=max_iter) 337 | iter = iter+1; 338 | t = t/2; 339 | teta = t*eta; 340 | Y = symm(X + teta + .5*teta*(X\teta)); 341 | [~,index] = chol(Y); 342 | end 343 | if iter >= max_iter 344 | Y = X; 345 | end 346 | end 347 | end 348 | end 349 | 350 | -------------------------------------------------------------------------------- /VBLab/VB/NAGVAC.m: -------------------------------------------------------------------------------- 1 | classdef NAGVAC < VBayesLab 2 | %NAGVAC Summary of this class goes here 3 | % Detailed explanation goes here 4 | 5 | properties 6 | GradClipInit % If doing gradient clipping at the beginning 7 | end 8 | 9 | methods 10 | function obj = NAGVAC(mdl,data,varargin) 11 | %NAGVAC Construct an instance of this class 12 | % Detailed explanation goes here 13 | obj.Method = 'NAGVAC'; 14 | obj.WindowSize = 30; 15 | obj.NumSample = 10; 16 | obj.LearningRate = 0.01; 17 | obj.MaxIter = 5000; 18 | obj.MaxPatience = 20; 19 | obj.StdForInit = 0.01; 20 | obj.StepAdaptive = obj.MaxIter/2; 21 | obj.GradWeight = 0.9; 22 | obj.LBPlot = true; 23 | obj.GradientMax = 100; 24 | obj.InitMethod = 'Random'; 25 | obj.Verbose = true; 26 | obj.SaveParams = false; 27 | 28 | % Parse additional options 29 | if nargin > 2 30 | paramNames = {'NumSample' 'LearningRate' 'GradWeight' 'GradClipInit'... 31 | 'MaxIter' 'MaxPatience' 'WindowSize' 'Verbose' ... 32 | 'InitMethod' 'StdForInit' 'Seed' 'MeanInit' ... 33 | 'SigInitScale' 'LBPlot' 'GradientMax' 'AutoDiff' ... 34 | 'HFuntion' 'NumParams' 'DataTrain' 'Setting'... 35 | 'StepAdaptive' 'SaveParams'}; 36 | paramDflts = {obj.NumSample obj.LearningRate obj.GradWeight obj.GradClipInit ... 37 | obj.MaxIter obj.MaxPatience obj.WindowSize obj.Verbose ... 38 | obj.InitMethod obj.StdForInit obj.Seed obj.MeanInit ... 39 | obj.SigInitScale obj.LBPlot obj.GradientMax obj.AutoDiff ... 40 | obj.HFuntion obj.NumParams obj.DataTrain obj.Setting ... 41 | obj.StepAdaptive obj.SaveParams}; 42 | 43 | [obj.NumSample,... 44 | obj.LearningRate,... 45 | obj.GradWeight,... 46 | obj.GradClipInit,... 47 | obj.MaxIter,... 48 | obj.MaxPatience,... 49 | obj.WindowSize,... 50 | obj.Verbose,... 51 | obj.InitMethod,... 52 | obj.StdForInit,... 53 | obj.Seed,... 54 | obj.MeanInit,... 55 | obj.SigInitScale,... 56 | obj.LBPlot,... 57 | obj.GradientMax,... 58 | obj.AutoDiff,... 59 | obj.HFuntion,... 60 | obj.NumParams,... 61 | obj.DataTrain,... 62 | obj.Setting,... 63 | obj.StepAdaptive,... 64 | obj.SaveParams] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:}); 65 | end 66 | 67 | % Check if model object or function handle is provided 68 | if (isobject(mdl)) % If model object is provided 69 | obj.Model = mdl; 70 | obj.ModelToFit = obj.Model.ModelName; % Set model name if model is specified 71 | else % If function handle is provided 72 | obj.GradHFuntion = mdl; 73 | end 74 | 75 | % Main function to run NAGVAC 76 | obj.Post = obj.fit(data); 77 | 78 | end 79 | 80 | %% VB main function 81 | function Post = fit(obj,data) 82 | 83 | % Extract model object if provided 84 | if (~isempty(obj.Model)) 85 | model = obj.Model; 86 | d_theta = model.NumParams; % Number of parameters 87 | else % If model object is not provided, number of parameters must be provided 88 | if (~isempty(obj.NumParams)) 89 | d_theta = obj.NumParams; 90 | else 91 | error('Number of model parameters have to be specified!') 92 | end 93 | end 94 | 95 | % Extract sampling setting 96 | std_init = obj.StdForInit; 97 | eps0 = obj.LearningRate; 98 | S = obj.NumSample; 99 | ini_mu = obj.MeanInit; 100 | window_size = obj.WindowSize; 101 | max_patience = obj.MaxPatience; 102 | init_scale = obj.SigInitScale; 103 | momentum_weight = obj.GradWeight; 104 | tau_threshold = obj.StepAdaptive; 105 | max_iter = obj.MaxIter; 106 | lb_plot = obj.LBPlot; 107 | max_grad = obj.GradientMax; 108 | grad_hfunc = obj.GradHFuntion; 109 | setting = obj.Setting; 110 | verbose = obj.Verbose; 111 | save_params = obj.SaveParams; 112 | 113 | % Store variational mean in each iteration (if specified) 114 | if(save_params) 115 | params_iter = zeros(max_iter,d_theta); 116 | end 117 | 118 | % Initialization 119 | iter = 1; 120 | patience = 0; 121 | stop = false; 122 | LB_smooth = 0; 123 | lambda_best = []; 124 | 125 | % Initialization of mu 126 | % If initial parameters are not specified, then use some 127 | % initialization methods 128 | if isempty(ini_mu) 129 | mu = normrnd(0,std_init,d_theta,1); 130 | else % If initial parameters are provided 131 | mu = ini_mu; 132 | end 133 | 134 | b = normrnd(0,std_init,d_theta,1); 135 | c = init_scale*ones(d_theta,1); 136 | 137 | lambda = [mu;b;c]; % Variational parameters vector 138 | lambda_seq(iter,:) = lambda'; 139 | 140 | % Store all setting to a structure 141 | param(iter,:) = mu'; 142 | 143 | %% First VB iteration 144 | rqmc = normrnd(0,1,S,d_theta+1); 145 | grad_lb_iter = zeros(S,3*d_theta); % Store gradient of lb over S MC simulations 146 | lb_first_term = zeros(S,1); % To estimate the first term in lb = E_q(log f)-E_q(log q) 147 | 148 | for s = 1:S 149 | % Parameters in Normal distribution 150 | U_normal = rqmc(s,:)'; 151 | epsilon1 = U_normal(1); 152 | epsilon2 = U_normal(2:end); 153 | theta = mu + b*epsilon1 + c.*epsilon2; % Compute Theta 154 | 155 | % If handle of function to compute gradient of h(theta), 156 | % then a model object with method of calculating gradient 157 | % of h(theta) must be provided. 158 | if isempty(grad_hfunc) 159 | if (~isempty(obj.Model)) 160 | % Call the hFunctionGrad of the model to compute 161 | % h(theta) and gradient of h(theta) 162 | [grad_h_theta,h_theta] = model.hFunctionGrad(data,theta); 163 | else 164 | error('An model object of handle of function to compute gradient of h(theta) must be provided!') 165 | end 166 | else 167 | % If user provide function to directly compute gradient 168 | % h theta then use it 169 | [grad_h_theta,h_theta] = grad_hfunc(data,theta,setting); 170 | end 171 | 172 | % Gradient of log variational distribution 173 | grad_log_q = obj.grad_log_q_function(b,c,theta,mu); 174 | 175 | % Gradient of h(theta) and lowerbound 176 | grad_theta = grad_h_theta - grad_log_q; 177 | grad_lb_iter(s,:) = [grad_theta;epsilon1*grad_theta;epsilon2.*grad_theta]'; 178 | 179 | % for lower bound 180 | lb_first_term(s) = h_theta; 181 | 182 | end 183 | 184 | % Estimation of lowerbound 185 | logdet = log(det(1 + (b./(c.^2))'*b)) + sum(log((c.^2))); 186 | lb_log_q = -0.5*d_theta*log(2*pi) - 0.5*logdet - d_theta/2; % Mean of log-q -> mean(log q(theta)) 187 | LB(iter) = mean(lb_first_term) - lb_log_q; 188 | 189 | % Gradient of log variational distribution 190 | grad_lb = (mean(grad_lb_iter))'; 191 | gradient_lambda = obj.inverse_fisher_times_grad(b,c,grad_lb); 192 | gradient_bar = gradient_lambda; 193 | 194 | %% Main VB loop 195 | while ~stop 196 | 197 | % If users want to save variational mean in each iteration 198 | % Only use when debuging code 199 | if(save_params) 200 | params_iter(iter,:) = mu; 201 | end 202 | 203 | iter = iter + 1; 204 | rqmc = normrnd(0,1,S,d_theta+1); 205 | grad_lb_iter = zeros(S,3*d_theta); % store gradient of lb over S MC simulations 206 | lb_first_term = zeros(S,1); % to estimate the first term in lb = E_q(log f)-E_q(log q) 207 | for s=1:S 208 | % Parameters in Normal distribution 209 | U_normal = rqmc(s,:)'; 210 | epsilon1 = U_normal(1); 211 | epsilon2 = U_normal(2:end); 212 | theta = mu + b*epsilon1 + c.*epsilon2; 213 | 214 | % If handle of function to compute gradient of h(theta), 215 | % then a model object with method of calculating gradient 216 | % of h(theta) must be provided. 217 | if isempty(grad_hfunc) 218 | if (~isempty(obj.Model)) 219 | % Call the hFunctionGrad of the model to compute 220 | % h(theta) and gradient of h(theta) 221 | [grad_h_theta,h_theta] = model.hFunctionGrad(data,theta); 222 | else 223 | error('An model object of handle of function to compute gradient of h(theta) must be provided!') 224 | end 225 | else 226 | % If user provide function to directly compute gradient 227 | % h theta then use it 228 | [grad_h_theta,h_theta] = grad_hfunc(data,theta,setting); 229 | end 230 | 231 | % Gradient of log variational distribution 232 | grad_log_q = obj.grad_log_q_function(b,c,theta,mu); 233 | 234 | % Gradient of h(theta) and lowerbound 235 | grad_theta = grad_h_theta - grad_log_q; 236 | grad_lb_iter(s,:) = [grad_theta;epsilon1*grad_theta;epsilon2.*grad_theta]'; 237 | 238 | % for lower bound 239 | lb_first_term(s) = h_theta; 240 | end 241 | 242 | % Estimation of lowerbound 243 | logdet = log(det(1 + (b./(c.^2))'*b)) + sum(log((c.^2))); 244 | lb_log_q = -0.5*d_theta*log(2*pi) - 0.5*logdet - d_theta/2; % Mean of log-q -> mean(log q(theta)) 245 | LB(iter) = mean(lb_first_term) - lb_log_q; 246 | 247 | % Gradient of log variational distribution 248 | grad_lb = (mean(grad_lb_iter))'; 249 | gradient_lambda = obj.inverse_fisher_times_grad(b,c,grad_lb); 250 | 251 | % Gradient clipping 252 | grad_norm = norm(gradient_lambda); 253 | norm_gradient_threshold = max_grad; 254 | if norm(gradient_lambda) > norm_gradient_threshold 255 | gradient_lambda = (norm_gradient_threshold/grad_norm)*gradient_lambda; 256 | end 257 | 258 | gradient_bar = momentum_weight*gradient_bar + (1-momentum_weight)*gradient_lambda; 259 | 260 | if iter > tau_threshold 261 | stepsize = eps0*tau_threshold/iter; 262 | else 263 | stepsize = eps0; 264 | end 265 | lambda = lambda + stepsize*gradient_bar; 266 | lambda_seq(iter,:) = lambda'; 267 | 268 | % Reconstruct variantional parameters 269 | mu = lambda(1:d_theta,1); 270 | b = lambda(d_theta+1:2*d_theta,1); 271 | c = lambda(2*d_theta+1:end); 272 | 273 | % Store parameters in each iteration 274 | param(iter,:) = mu'; 275 | 276 | if iter > window_size 277 | LB_smooth(iter-window_size) = mean(LB(iter-window_size+1:iter)); 278 | if LB_smooth(end)>= max(LB_smooth) 279 | lambda_best = lambda; 280 | patience = 0; 281 | else 282 | patience = patience + 1; 283 | end 284 | end 285 | if (patience>max_patience)||(iter>max_iter) 286 | stop = true; 287 | end 288 | 289 | % Display training information 290 | if(verbose) 291 | if iter> window_size 292 | disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB_smooth(iter-window_size))]) 293 | else 294 | disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB(iter))]) 295 | end 296 | end 297 | 298 | end 299 | 300 | % Store output 301 | if(save_params) 302 | Post.muIter = params_iter(1:iter-1,:); 303 | end 304 | 305 | % If the algorithm stops too early 306 | if(isempty(lambda_best)) 307 | lambda_best = lambda; 308 | end 309 | 310 | % Store final results 311 | Post.LB_smooth = LB_smooth; 312 | Post.LB = LB; 313 | Post.lambda = lambda_best; 314 | Post.mu = lambda_best(1:d_theta); 315 | Post.b = lambda_best(d_theta+1:2*d_theta); 316 | Post.c = lambda_best(2*d_theta+1:end); 317 | Post.Sigma = Post.b*Post.b' + diag(Post.c.^2); 318 | Post.sigma2 = diag(Post.Sigma); 319 | 320 | % Plot lowerbound 321 | if(lb_plot) 322 | obj.plot_lb(LB_smooth); 323 | end 324 | 325 | end 326 | 327 | %% Obtain samples from the estimate VB 328 | % index: Indexes of parameter 329 | function Sample = sampleFromVB(obj,Post,n) 330 | mu = Post.mu; 331 | b = Post.b; 332 | c = Post.c; 333 | Sigma = b*b'+ diag(c.^2); 334 | 335 | Sample = mvnrnd(mu,Sigma,n); 336 | end 337 | 338 | %% I^-1 x grad 339 | function prod = inverse_fisher_times_grad(obj,b,c,grad) 340 | d = length(b); 341 | grad1 = grad(1:d); 342 | grad2 = grad(d+1:2*d); 343 | grad3 = grad(2*d+1:end); 344 | 345 | c2 = c.^2; 346 | b2 = b.^2; 347 | 348 | prod1 = (b'*grad1)*b+(grad1.*c2); 349 | 350 | alpha = 1/(1+sum(b2./c2)); 351 | Cminus = diag(1./c2); 352 | Cminus_b = b./c2; 353 | Sigma_inv = Cminus-alpha*(Cminus_b*Cminus_b'); 354 | 355 | A11_inv = (1/(1-alpha))*((1-1/(sum(b2)+1-alpha))*(b*b')+diag(c2)); 356 | 357 | C = diag(c); 358 | A12 = 2*(C*Sigma_inv*b*ones(1,d)).*Sigma_inv; 359 | A21 = A12'; 360 | A22 = 2*C*(Sigma_inv.*Sigma_inv)*C; 361 | 362 | D = A22-A21*A11_inv*A12; 363 | prod2 = A11_inv*grad2+(A11_inv*A12)*(D\A21)*(A11_inv*grad2)-(A11_inv*A12)*(D\grad3); 364 | prod3 = -(D\A21)*(A11_inv*grad2)+D\grad3; 365 | 366 | prod = [prod1;prod2;prod3]; 367 | end 368 | 369 | %% Gradient of log q_lambda 370 | function grad_log_q = grad_log_q_function(obj,b,c,theta,mu) 371 | x = theta - mu; 372 | d = b./c.^2; 373 | grad_log_q = -x./c.^2+(d'*x)/(1+(d'*b))*d; 374 | end 375 | 376 | end 377 | end 378 | 379 | -------------------------------------------------------------------------------- /VBLab/VB/VAFC.m: -------------------------------------------------------------------------------- 1 | classdef VAFC < VBayesLab 2 | %VAFC Summary of this class goes here 3 | % Detailed explanation goes here 4 | 5 | properties 6 | NumFactor % Number of factors 7 | Adelta % If ADADELTA is used for optimization 8 | end 9 | 10 | methods 11 | function obj = VAFC(mdl,data,varargin) 12 | %CGVB Construct an instance of this class 13 | % Detailed explanation goes here 14 | obj.Method = 'VAFC'; 15 | obj.NumFactor = 4; 16 | obj.Adelta.rho = 0.95; 17 | obj.Adelta.eps = 10^-6; 18 | obj.Optimization = 'Simple'; % Could be 'Adelta' 19 | obj.SigInitScale = 0.01; 20 | 21 | % Parse additional options 22 | if nargin > 2 23 | %Parse additional options 24 | paramNames = {'NumSample' 'LearningRate' 'GradWeight' ... 25 | 'MaxIter' 'MaxPatience' 'WindowSize' 'Verbose' ... 26 | 'InitMethod' 'StdForInit' 'Seed' 'MeanInit' ... 27 | 'SigInitScale' 'LBPlot' 'GradientMax' 'AutoDiff' ... 28 | 'HFuntion' 'NumParams' 'DataTrain' 'Setting' ... 29 | 'StepAdaptive' 'NumFactor' 'SaveParams' 'Optimization'}; 30 | paramDflts = {obj.NumSample obj.LearningRate obj.GradWeight ... 31 | obj.MaxIter obj.MaxPatience obj.WindowSize obj.Verbose ... 32 | obj.InitMethod obj.StdForInit obj.Seed obj.MeanInit ... 33 | obj.SigInitScale obj.LBPlot obj.GradientMax obj.AutoDiff ... 34 | obj.HFuntion obj.NumParams obj.DataTrain obj.Setting ... 35 | obj.StepAdaptive obj.NumFactor obj.SaveParams obj.Optimization}; 36 | 37 | [obj.NumSample, obj.LearningRate,obj.GradWeight,... 38 | obj.MaxIter, obj.MaxPatience, obj.WindowSize,obj.Verbose,... 39 | obj.InitMethod, obj.StdForInit, obj.Seed, obj.MeanInit,... 40 | obj.SigInitScale, obj.LBPlot, obj.GradientMax, obj.AutoDiff,... 41 | obj.HFuntion, obj.NumParams, obj.DataTrain, obj.Setting,... 42 | obj.StepAdaptive, obj.NumFactor, obj.SaveParams, obj.Optimization] ... 43 | = internal.stats.parseArgs(paramNames, paramDflts, varargin{:}); 44 | end 45 | 46 | % Check if model object or function handle is provided 47 | if (isobject(mdl)) % If model object is provided 48 | obj.Model = mdl; 49 | obj.ModelToFit = obj.Model.ModelName; % Set model name if model is specified 50 | else % If function handle is provided 51 | obj.GradHFuntion = mdl; 52 | end 53 | 54 | % Main function to run CGVB 55 | obj.Post = obj.fit(data); 56 | 57 | end 58 | 59 | %% VB main function 60 | function Post = fit(obj,data) 61 | 62 | % Extract model object if provided 63 | if (~isempty(obj.Model)) % If instance of a model is provided 64 | model = obj.Model; 65 | d_theta = model.NumParams; % Number of parameters 66 | else % 67 | if (~isempty(obj.NumParams)) 68 | d_theta = obj.NumParams; 69 | else 70 | error('Number of model parameters have to be specified!') 71 | end 72 | end 73 | 74 | % Unload training parameters (only for convenience) 75 | std_init = obj.StdForInit; 76 | eps0 = obj.LearningRate; 77 | S = obj.NumSample; 78 | ini_mu = obj.MeanInit; 79 | window_size = obj.WindowSize; 80 | max_patience = obj.MaxPatience; 81 | init_scale = obj.SigInitScale; 82 | tau_threshold = obj.StepAdaptive; 83 | max_iter = obj.MaxIter; 84 | lb_plot = obj.LBPlot; 85 | max_grad = obj.GradientMax; 86 | momentum_weight = obj.GradWeight; 87 | num_factor = obj.NumFactor; 88 | grad_hfunc = obj.GradHFuntion; 89 | setting = obj.Setting; 90 | opt = obj.Optimization; 91 | verbose = obj.Verbose; 92 | save_params = obj.SaveParams; 93 | 94 | % Store variational mean in each iteration (if specified) 95 | if(save_params) 96 | params_iter = zeros(max_iter,d_theta); 97 | end 98 | 99 | % Initialization 100 | iter = 1; 101 | patience = 0; 102 | stop = false; 103 | LB_smooth = 0; 104 | lambda_best = []; 105 | 106 | % Initialization of mu 107 | % If initial parameters are not specified, then use some 108 | % initialization methods 109 | if isempty(ini_mu) 110 | mu = normrnd(0,std_init,d_theta,1); 111 | else % If initial parameters are provided 112 | mu = ini_mu; 113 | end 114 | B = normrnd(0,std_init,d_theta,num_factor); 115 | c = init_scale*ones(d_theta,1); 116 | 117 | % Column vector variational parameters 118 | lambda = [mu;B(:);c]; 119 | 120 | if (strcmp(opt,'Adelta')) 121 | % ADADELTA parameter 122 | rho = obj.Adelta.rho; 123 | eps_step = obj.Adelta.eps; 124 | Edelta2_lambda = zeros(length(lambda),1); 125 | Eg2_lambda = zeros(length(lambda),1); 126 | end 127 | 128 | 129 | % Store all setting to a structure 130 | param(iter,:) = mu'; 131 | 132 | %% First VB iteration 133 | lb_iter = zeros(S,1); 134 | grad_lb_mu_iter = zeros(S,d_theta); 135 | grad_lb_B_iter = zeros(S,d_theta*num_factor); 136 | grad_lb_c_iter = zeros(S,d_theta); 137 | 138 | % To compute log q_lambda 139 | Dinv2B = bsxfun(@times,B,1./c.^2); 140 | Blogdet = log(det(eye(num_factor) + bsxfun(@times,B, 1./(c.^2))'*B)) + sum(log((c.^2))); 141 | 142 | rqmc = normrnd(0,1,S,d_theta+num_factor); 143 | for s = 1:S 144 | % Compute model parameters from variational parameters 145 | U_normal = rqmc(s,:)'; 146 | epsilon1 = U_normal(1:num_factor); 147 | epsilon2 = U_normal((num_factor+1):end); 148 | theta = mu + B*epsilon1 + c.*epsilon2; % Compute theta 149 | 150 | % If handle of function to compute gradient of h(theta), 151 | % then a model object with method of calculating gradient 152 | % of h(theta) must be provided. 153 | if isempty(grad_hfunc) 154 | if (~isempty(obj.Model)) 155 | % Call the hFunctionGrad of the model to compute 156 | % h(theta) and gradient of h(theta) 157 | [grad_h_theta,h_theta] = model.hFunctionGrad(data,theta); 158 | else 159 | error('An model object of handle of function to compute gradient of h(theta) must be provided!') 160 | end 161 | else 162 | % If user provide function to directly compute gradient 163 | % h theta then use it 164 | [grad_h_theta,h_theta] = grad_hfunc(data,theta,setting); 165 | end 166 | 167 | % Gradient of log variational distribution 168 | [L_mu,L_B,L_c] = obj.grad_log_q_function(B,c,epsilon1,epsilon2,grad_h_theta); 169 | 170 | % Gradient of lowerbound 171 | grad_lb_mu_iter(s,:) = L_mu; 172 | grad_lb_B_iter(s,:) = L_B(:); 173 | grad_lb_c_iter(s,:) = L_c; 174 | 175 | % For lower bound 176 | Bz_deps = theta - mu; 177 | DBz_deps = bsxfun(@times,Bz_deps,1./c.^2); 178 | Half1 = DBz_deps; 179 | Half2 = Dinv2B/(eye(num_factor) + B'*Dinv2B)*B'*DBz_deps; 180 | log_q_lambda = - d_theta/2*log(2*pi) - 1/2*Blogdet - 1/2*Bz_deps'*(Half1-Half2); 181 | lb_iter(s) = h_theta - log_q_lambda; 182 | end 183 | 184 | % Estimation of lowerbound 185 | LB(iter) = mean(lb_iter); 186 | 187 | % Gradient of log variational distribution 188 | grad_lb_mu = mean(grad_lb_mu_iter,1)'; 189 | grad_lb_B = mean(grad_lb_B_iter,1)'; 190 | grad_lb_c = mean(grad_lb_c_iter,1)'; 191 | 192 | % Natural gradient 193 | gradient_lambda = obj.inv_fisher_grad_multifactor(B,c,grad_lb_mu,grad_lb_B,grad_lb_c); 194 | norm_gradient = norm(gradient_lambda); 195 | norm_gradient_seq1 = norm_gradient; 196 | gradient_bar = gradient_lambda; 197 | 198 | %% Main VB loop 199 | while ~stop 200 | 201 | iter = iter + 1; 202 | 203 | % To compute log q_lambda 204 | Dinv2B = bsxfun(@times,B,1./c.^2); 205 | Blogdet = log(det(eye(num_factor) + bsxfun(@times,B, 1./(c.^2))'*B)) + sum(log((c.^2))); 206 | 207 | rqmc = normrnd(0,1,S,d_theta+num_factor); 208 | for s=1:S 209 | % Compute model parameters from variational parameters 210 | U_normal = rqmc(s,:)'; 211 | epsilon1 = U_normal(1:num_factor); 212 | epsilon2 = U_normal((num_factor+1):end); 213 | theta = mu + B*epsilon1 + c.*epsilon2; 214 | 215 | % If handle of function to compute gradient of h(theta), 216 | % then a model object with method of calculating gradient 217 | % of h(theta) must be provided. 218 | if isempty(grad_hfunc) 219 | if (~isempty(obj.Model)) 220 | % Call the hFunctionGrad of the model to compute 221 | % h(theta) and gradient of h(theta) 222 | [grad_h_theta,h_theta] = model.hFunctionGrad(data,theta); 223 | else 224 | error('An model object of handle of function to compute gradient of h(theta) must be provided!') 225 | end 226 | else 227 | % If user provide function to directly compute gradient 228 | % h theta then use it 229 | [grad_h_theta,h_theta] = grad_hfunc(data,theta,setting); 230 | end 231 | 232 | % Gradient of log variational distribution 233 | [L_mu,L_B,L_c] = obj.grad_log_q_function(B,c,epsilon1,epsilon2,grad_h_theta); 234 | 235 | % Gradient of lowerbound 236 | grad_lb_mu_iter(s,:) = L_mu; 237 | grad_lb_B_iter(s,:) = L_B(:); 238 | grad_lb_c_iter(s,:) = L_c; 239 | 240 | % For lower bound 241 | Bz_deps = theta - mu; 242 | DBz_deps = bsxfun(@times,Bz_deps,1./c.^2); 243 | Half1 = DBz_deps; 244 | Half2 = Dinv2B/(eye(num_factor) + B'*Dinv2B)*B'*DBz_deps; 245 | log_q_lambda = - d_theta/2*log(2*pi) - 1/2*Blogdet - 1/2*Bz_deps'*(Half1-Half2); 246 | lb_iter(s) = h_theta - log_q_lambda; 247 | end 248 | 249 | % Estimation of lowerbound 250 | LB(iter) = mean(lb_iter); 251 | 252 | % Gradient of log variational distribution 253 | grad_lb_mu = mean(grad_lb_mu_iter,1)'; 254 | grad_lb_B = mean(grad_lb_B_iter,1)'; 255 | grad_lb_c = mean(grad_lb_c_iter,1)'; 256 | 257 | gradient_lambda = obj.inv_fisher_grad_multifactor(B,c,grad_lb_mu,grad_lb_B,grad_lb_c); 258 | grad_norm_current = norm(gradient_lambda); 259 | norm_gradient_seq1(iter) = grad_norm_current; 260 | if norm(gradient_lambda)>max_grad 261 | gradient_lambda = (max_grad/norm(gradient_lambda))*gradient_lambda; 262 | end 263 | norm_gradient = norm_gradient+norm(gradient_lambda); 264 | gradient_bar = momentum_weight*gradient_bar+(1-momentum_weight)*gradient_lambda; 265 | 266 | if (strcmp(opt,'Adelta')) 267 | % ADADELTA 268 | grad_lb = mean(grad_lb_iter,1)'; 269 | Eg2_lambda = rho*Eg2_lambda + (1-rho)*grad_lb.^2; 270 | temp = sqrt(Edelta2_lambda + eps_step)./sqrt(Eg2_lambda+eps_step); 271 | d_lambda = temp.*grad_lb; 272 | lambda = lambda + d_lambda; 273 | Edelta2_lambda = rho*Edelta2_lambda + (1-rho)*d_lambda.^2; 274 | else 275 | if iter>tau_threshold 276 | stepsize = eps0*tau_threshold/iter; 277 | else 278 | stepsize = eps0; 279 | end 280 | lambda = lambda + stepsize*gradient_bar; 281 | end 282 | 283 | % Reconstruct variantional parameters 284 | mu = lambda(1:d_theta,1); 285 | vecB = lambda(d_theta+1:d_theta+d_theta*num_factor,1); 286 | B = reshape(vecB,d_theta,num_factor); 287 | c = lambda(d_theta+d_theta*num_factor+1:end,1); 288 | 289 | % Store parameters in each iteration 290 | param(iter,:) = mu'; 291 | 292 | % Smooth the lowerbound 293 | if iter>=window_size 294 | LB_smooth(iter-window_size+1) = mean(LB(iter-window_size+1:iter)); 295 | end 296 | 297 | % Check for early stopping 298 | if (iter>window_size)&&(LB_smooth(iter-window_size+1)>=max(LB_smooth)) 299 | lambda_best = lambda; 300 | patience = 0; 301 | else 302 | patience = patience+1; 303 | end 304 | 305 | if (patience>max_patience)||(iter>max_iter) 306 | stop = true; 307 | end 308 | 309 | % Display training information 310 | if(verbose) 311 | if iter> window_size 312 | disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB_smooth(iter-window_size))]) 313 | else 314 | disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB(iter))]) 315 | end 316 | end 317 | 318 | % If users want to save variational mean in each iteration 319 | % Only use when debuging code 320 | if(save_params) 321 | params_iter(iter,:) = mu; 322 | end 323 | end 324 | 325 | % Store output 326 | if(save_params) 327 | Post.muIter = params_iter(1:iter-1,:); 328 | end 329 | 330 | % If the algorithm stops too early 331 | if(isempty(lambda_best)) 332 | lambda_best = lambda; 333 | end 334 | 335 | Post.LB_smooth = LB_smooth; 336 | Post.LB = LB; 337 | Post.lambda = lambda_best; 338 | Post.mu = lambda_best(1:d_theta,1); 339 | Post.B = reshape(lambda_best(d_theta+1:d_theta+d_theta*num_factor,1),d_theta,num_factor); 340 | Post.c = lambda_best(d_theta+d_theta*num_factor+1:end,1); 341 | Post.params = param; 342 | Post.Sigma = Post.B*Post.B' + diag(Post.c.^2); 343 | Post.sigma2 = diag(Post.Sigma); 344 | 345 | % If users want to plot the lowerbound 346 | if(lb_plot) 347 | obj.plot_lb(LB_smooth); 348 | end 349 | end 350 | 351 | %% Gradient of log q_lambda 352 | function [L_mu,L_B,L_c] = grad_log_q_function(obj,B,c,epsilon1,epsilon2,grad_log_h) 353 | 354 | Bz_deps = B*epsilon1 + c.*eps; % theta-mu 355 | Dinv2B = bsxfun(@times,B,1./c.^2); %D^-2*B 356 | DBz_deps = bsxfun(@times,Bz_deps,1./c.^2); %D^-2 * Bz_deps 357 | 358 | Half1 = DBz_deps; 359 | Half2 = Dinv2B/(eye(obj.NumFactor) + B'*Dinv2B)*B'*DBz_deps; 360 | L_mu = grad_log_h + (Half1-Half2); 361 | L_B = grad_log_h*epsilon1'+(Half1-Half2)*epsilon1'; 362 | L_c = grad_log_h.*epsilon2 + (Half1 - Half2).*epsilon2; 363 | 364 | end 365 | 366 | function prod = inv_fisher_grad_multifactor(obj,B,c,grad1,grad2,grad3) 367 | % function prod = inverse_fisher_times_grad(b,c,grad) 368 | % compute the product inverse_fisher x grad 369 | % B: dxp matrix where p< a name 6 | Model % Instance of the model to sample from 7 | ModelToFit % Name of model to be fitted 8 | NumSample % Number of samples to estimate the likelihood and gradient of likelihood 9 | GradWeight % Momentum weight 10 | LearningRate % Learning rate decay factor 11 | MaxIter % Maximum number of VB iterations 12 | MaxPatience % Maximum number of patiences for early stopping 13 | WindowSize % Smoothing window 14 | ParamsInit % Initial values of model parameters 15 | NumParams % Number of model parameters 16 | Seed % Random seed 17 | Post % Struct to store estimation results 18 | Verbose % Turn on of off printed message during sampling phase 19 | StdForInit % Std of the normal distribution to initialize VB params 20 | MeanInit % Pre-specified values of mean(theta) 21 | SigInitScale % A constant to scale up or down std of normal distribution 22 | StepAdaptive % From this iteration, stepsize is reduced 23 | LBPlot % If user wants to plot the lowerbound at the end 24 | GradientMax % For gradient clipping 25 | InitMethod % Method to initialize mu (variational mean) 26 | AutoDiff % Turn on/off automatic differentiation 27 | HFuntion % Instance of function to compute h(theta) 28 | GradHFuntion % Instance of function to compute gradient of h(theta) 29 | DataTrain % Training data 30 | Setting % Struct to store additional setting to the model 31 | SaveParams % If save parameters in all iterations or not 32 | Optimization % Optimization method 33 | end 34 | 35 | methods 36 | function obj = VBayesLab(varargin) 37 | %MODEL Construct an instance of this class 38 | % Detailed explanation goes here 39 | obj.AutoDiff = false; 40 | obj.GradientMax = 100; 41 | obj.GradWeight = 0.9; 42 | obj.InitMethod = 'Random'; 43 | obj.LBPlot = true; 44 | obj.LearningRate = 0.001; 45 | obj.MaxIter = 5000; 46 | obj.MaxPatience = 20; 47 | obj.NumSample = 50; 48 | obj.StdForInit = 0.01; 49 | obj.SigInitScale = 0.1; 50 | obj.StepAdaptive = obj.MaxIter/2; 51 | obj.SaveParams = false; 52 | obj.Verbose = true; 53 | obj.WindowSize = 30; 54 | end 55 | 56 | %% Plot lowerbound 57 | % Call this after running VB 58 | function plot_lb(obj,lb) 59 | plot(lb,'LineWidth',2) 60 | if(~isempty(obj.Model)) 61 | title(['Lower bound ',obj.Method ,' - ',obj.Model.ModelName]) 62 | else 63 | title('Lower bound') 64 | end 65 | xlabel('Iterations') 66 | ylabel('Lower bound') 67 | end 68 | end 69 | 70 | end 71 | 72 | --------------------------------------------------------------------------------