├── Data
    ├── Abalon.mat
    ├── DirectMarketing.mat
    ├── GermanCredit.mat
    ├── LabourForce.mat
    └── RealizedLibrary.mat
├── Example
    ├── CGVB_Logistics_Function_Handle.m
    ├── CGVB_Logistics_Function_Handle_AutoDiff.m
    ├── CGVB_Logistics_Model_Object.m
    ├── CGVB_Logistics_Model_Object_Simple.m
    ├── CGVB_VAR1_Function_Handle.m
    ├── CGVB_VAR1_Model_Object.m
    ├── MGVB_Logistics_Model_Object.m
    ├── NAGVAC_Logistics_Function_Handle.m
    ├── NAGVAC_Logistics_Model_Object.m
    ├── VAFC_Logistics_Function_Handle.m
    ├── VAFC_Logistics_Model_Object.m
    └── VAR1.m
├── README.md
└── VBLab
    ├── MCMC
        └── MCMC.m
    ├── Models
        ├── LogisticRegression.m
        └── ModelClass.m
    ├── Utilities
        ├── Distribution.m
        ├── Normal.m
        ├── readData.m
        ├── trainTestSplit.m
        ├── utils_FNNInitialize.m
        ├── utils_errorMsg.m
        ├── utils_gen_Sobol.m
        ├── utils_itril.m
        ├── utils_itriu.m
        ├── utils_jitChol.m
        ├── utils_logNormalpdf.m
        ├── utils_logit.m
        ├── utils_normrnd_qmc.m
        ├── utils_plotShrinkage.m
        ├── utils_relu.m
        ├── utils_rqmc_rnd.m
        ├── utils_rs_multinomial.m
        ├── utils_sigmoid.m
        ├── utils_update_sigma.m
        ├── utils_vech.m
        ├── utils_vechinv.m
        └── vbayesPlot.m
    └── VB
        ├── CGVB.m
        ├── MGVB.m
        ├── NAGVAC.m
        ├── VAFC.m
        └── VBayesLab.m


/Data/Abalon.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VBayesLab/VBLab/90c48af33e2011d21a48af1f95101e81b84098ee/Data/Abalon.mat


--------------------------------------------------------------------------------
/Data/DirectMarketing.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VBayesLab/VBLab/90c48af33e2011d21a48af1f95101e81b84098ee/Data/DirectMarketing.mat


--------------------------------------------------------------------------------
/Data/GermanCredit.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VBayesLab/VBLab/90c48af33e2011d21a48af1f95101e81b84098ee/Data/GermanCredit.mat


--------------------------------------------------------------------------------
/Data/LabourForce.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VBayesLab/VBLab/90c48af33e2011d21a48af1f95101e81b84098ee/Data/LabourForce.mat


--------------------------------------------------------------------------------
/Data/RealizedLibrary.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VBayesLab/VBLab/90c48af33e2011d21a48af1f95101e81b84098ee/Data/RealizedLibrary.mat


--------------------------------------------------------------------------------
/Example/CGVB_Logistics_Function_Handle.m:
--------------------------------------------------------------------------------
  1 | % Script to run the Example 3.4 using a built-in Logistic Regression class of
  2 | % the VBLab package
  3 | 
  4 | clear  % Clear all variables 
  5 | clc % Clean workspace
  6 | 
  7 | % Random seed to reproduce results 
  8 | rng(2020)
  9 | 
 10 | % Load the LabourForce dataset
 11 | labour = readData('LabourForce',...     % Dataset name
 12 |                   'Type','Matrix',...   % Store data as a 2D array (default)
 13 |                   'Intercept', true);   % Add column of intercept (default)
 14 | 
 15 | % Compute number of features
 16 | n_features = size(labour,2)-1;
 17 | 
 18 | % Additional Setting
 19 | setting.Prior = [0,50];
 20 | 
 21 | % Initialize using MLE estimate (for quickly converging)
 22 | X = labour(:,1:end-1);
 23 | y = labour(:,end);
 24 | theta_init = glmfit(X,y,'binomial','constant','off'); % initialise mu
 25 | 
 26 | % Run CGVB
 27 | Post_CGVB_manual = CGVB(@grad_h_func_logistic,labour,...
 28 |                         'NumParams',n_features,...
 29 |                         'Setting',setting,...
 30 |                         'LearningRate',0.002,...   % Learning rate
 31 |                         'NumSample',50,...         % Number of samples to estimate gradient of lowerbound
 32 |                         'MaxPatience',20,...       % For Early stopping
 33 |                         'MaxIter',5000,...         % Maximum number of iterations
 34 |                         'MeanInit',theta_init ,... % Randomly initialize parameters using 
 35 |                         'GradWeight1',0.9,...      % Momentum 1
 36 |                         'GradWeight2',0.9,...      % Momentum 2
 37 |                         'WindowSize',10,...        % Smoothing window for lowerbound
 38 |                         'GradientMax',10,...       % For gradient clipping
 39 |                         'LBPlot',true); 
 40 |                     
 41 | %% Plot variational distributions and lowerbound 
 42 | figure
 43 | % Extract variation mean and variance
 44 | mu_vb     = Post_CGVB_manual.Post.mu;
 45 | sigma2_vb = Post_CGVB_manual.Post.sigma2;
 46 | 
 47 | % Plot the variational distribution for the first 8 parameters
 48 | for i=1:8
 49 |     subplot(3,3,i)
 50 |     vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]})
 51 |     grid on
 52 |     title(['\theta_',num2str(i)])
 53 |     set(gca,'FontSize',15)
 54 | end
 55 | 
 56 | % Plot the smoothed lower bound
 57 | subplot(3,3,9)
 58 | plot(Post_CGVB_manual.Post.LB_smooth,'LineWidth',2)
 59 | grid on
 60 | title('Lower bound')
 61 | set(gca,'FontSize',15)
 62 | 
 63 | %% Define gradient of h function for Logistic regression 
 64 | % theta: Dx1 array
 65 | % h_func: Scalar
 66 | % h_func_grad: Dx1 array
 67 | function [h_func_grad,h_func] = grad_h_func_logistic(data,theta,setting)
 68 | 
 69 |     % Extract additional settings
 70 |     d = length(theta);
 71 |     sigma2 = setting.Prior(2);
 72 |     
 73 |     % Extract data
 74 |     X = data(:,1:end-1);
 75 |     y = data(:,end);
 76 |     
 77 |     % Compute log likelihood
 78 |     aux = X*theta;
 79 |     llh = y.*aux-log(1+exp(aux));
 80 |     llh = sum(llh);  
 81 |     
 82 |     % Compute gradient of log likelihood
 83 |     ppi       = 1./(1+exp(-aux));
 84 |     llh_grad  = X'*(y-ppi);
 85 | 
 86 |     % Compute log prior
 87 |     log_prior =-d/2*log(2*pi)-d/2*log(sigma2)-theta'*theta/sigma2/2;
 88 |     
 89 |     % Compute gradient of log prior
 90 |     log_prior_grad = -theta/sigma2;
 91 |     
 92 |     % Compute h(theta) = log p(y|theta) + log p(theta)
 93 |     h_func = llh + log_prior;
 94 |     
 95 |     % Compute gradient of the h(theta)
 96 |     h_func_grad = llh_grad + log_prior_grad;
 97 | 
 98 |     % h_func_grad must be a column
 99 |     h_func_grad = reshape(h_func_grad,length(h_func_grad),1);
100 | 
101 | end


--------------------------------------------------------------------------------
/Example/CGVB_Logistics_Function_Handle_AutoDiff.m:
--------------------------------------------------------------------------------
  1 | % Script to run the Example 3.4 using a built-in Logistic Regression class of
  2 | % the VBLab package. We define the logistics regression model as a function
  3 | % handle and use AutoDiff to automatically compute the gradient of the
  4 | % h(theta) function
  5 | 
  6 | clear  % Clear all variables 
  7 | clc % Clean workspace
  8 | 
  9 | % Random seed to reproduce results 
 10 | rng(2020)
 11 | 
 12 | % Load the LabourForce dataset
 13 | labour = readData('LabourForce',...     % Dataset name
 14 |                   'Type','Matrix',...   % Store data as a 2D array (default)
 15 |                   'Intercept', true);   % Add column of intercept (default)
 16 | 
 17 | % Compute number of features
 18 | n_features = size(labour,2)-1;
 19 | 
 20 | % Additional Setting
 21 | setting.Prior = [0,50];
 22 | 
 23 | % Initialize using MLE estimate (for quickly converging)
 24 | X = labour(:,1:end-1);
 25 | y = labour(:,end);
 26 | theta_init = glmfit(X,y,'binomial','constant','off'); % initialise mu
 27 | 
 28 | % Run CGVB
 29 | Post_CGVB_manual = CGVB(@grad_h_func_logistic,labour,...
 30 |                         'NumParams',n_features,...
 31 |                         'Setting',setting,...
 32 |                         'LearningRate',0.002,...   % Learning rate
 33 |                         'NumSample',50,...         % Number of samples to estimate gradient of lowerbound
 34 |                         'MaxPatience',20,...       % For Early stopping
 35 |                         'MaxIter',5000,...         % Maximum number of iterations
 36 |                         'MeanInit',theta_init ,... % Randomly initialize parameters using 
 37 |                         'GradWeight1',0.9,...      % Momentum 1
 38 |                         'GradWeight2',0.9,...      % Momentum 2
 39 |                         'WindowSize',10,...        % Smoothing window for lowerbound
 40 |                         'GradientMax',10,...       % For gradient clipping
 41 |                         'LBPlot',true); 
 42 |                     
 43 | %% Plot variational distributions and lowerbound 
 44 | figure
 45 | % Extract variation mean and variance
 46 | mu_vb     = Post_CGVB_manual.Post.mu;
 47 | sigma2_vb = Post_CGVB_manual.Post.sigma2;
 48 | 
 49 | % Plot the variational distribution for the first 8 parameters
 50 | for i=1:8
 51 |     subplot(3,3,i)
 52 |     vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]})
 53 |     grid on
 54 |     title(['\theta_',num2str(i)])
 55 |     set(gca,'FontSize',15)
 56 | end
 57 | 
 58 | % Plot the smoothed lower bound
 59 | subplot(3,3,9)
 60 | plot(Post_CGVB_manual.Post.LB_smooth,'LineWidth',2)
 61 | grid on
 62 | title('Lower bound')
 63 | set(gca,'FontSize',15)
 64 | 
 65 | %% Define function to compute gradient of h function for Logistic regression 
 66 | % Input:
 67 | %   data: 2D array
 68 | %   theta: Dx1 array
 69 | %   setting: struct
 70 | % Output:
 71 | %   h_func: Scalar
 72 | %   h_func_grad: Dx1 array
 73 | function [h_func_grad,h_func] = grad_h_func_logistic(data,theta,setting)
 74 | 
 75 |     % Convert parameters to dlarray data type
 76 |     theta_AD = dlarray(theta);
 77 |     
 78 |     % Evaluate the function containing dlgradient using dlfeval
 79 |     [h_func_grad_AD,h_func_AD] = dlfeval(@grad_h_func_logistic_AD,data,theta_AD,setting);
 80 |     
 81 |     % Convert parameters from dlarray to matlab array
 82 |     h_func_grad = extractdata(h_func_grad_AD);
 83 |     h_func = extractdata(h_func_AD);
 84 |     
 85 |     % Make sure the output is a column vector
 86 |     h_func_grad = reshape(h_func_grad,length(h_func_grad),1);
 87 | 
 88 | end
 89 | 
 90 | %% Function containing dlgradient
 91 | function [h_func_grad,h_func] = grad_h_func_logistic_AD(data,theta,setting)
 92 | 
 93 |     h_func = h_func_logistic(data,theta,setting);
 94 |     h_func_grad = dlgradient(h_func,theta);    
 95 | end
 96 | 
 97 | %% Now we need to define a function to compute the h(theta) term
 98 | % Define h function for Logistic regression 
 99 | % theta: Dx1 row
100 | % h_func: Dx1 column
101 | function h_func = h_func_logistic(data,theta,setting)
102 | 
103 |     % Extract additional settings
104 |     d = length(theta);
105 |     sigma2 = setting.Prior(2);
106 |     
107 |     % Extract data
108 |     y = data(:,end);
109 |     X = data(:,1:end-1);
110 |     
111 |     % Compute log likelihood
112 |     aux = X*theta;
113 |     log_lik = y.*aux-log(1+exp(aux));
114 |     log_lik = sum(log_lik);  
115 |     
116 |     % Compute log prior
117 |     log_prior =-d/2*log(2*pi)-d/2*log(sigma2)-theta'*theta/sigma2/2;
118 |     
119 |     % h = log p(y|theta) + log p(theta)
120 |     h_func = log_lik + log_prior;
121 |     
122 | end


--------------------------------------------------------------------------------
/Example/CGVB_Logistics_Model_Object.m:
--------------------------------------------------------------------------------
  1 | % Script to run the Example 3.4 using a built-in Logistic Regression class of
  2 | % the VBLab package
  3 | 
  4 | clear  % Clear all variables 
  5 | clc % Clean workspace
  6 | 
  7 | % Random seed to reproduce results 
  8 | rng(2020)
  9 | 
 10 | % Load the LabourForce dataset
 11 | labour = readData('LabourForce',...     % Dataset name
 12 |                   'Type','Matrix',...   % Store data as a 2D array (default)
 13 |                   'Intercept', true);   % Add column of intercept (default)
 14 | 
 15 | % Compute number of features
 16 | n_features = size(labour,2)-1;
 17 | 
 18 | % Create a Logistic Regression model object
 19 | Mdl = LogisticRegression(n_features,...
 20 |                          'Prior',{'Normal',[0,50]});
 21 |                           
 22 | %% Run Cholesky GVB with random initialization
 23 | Estmdl_1  = CGVB(Mdl,labour,...
 24 |                 'LearningRate',0.002,...  % Learning rate
 25 |                 'NumSample',50,...        % Number of samples to estimate gradient of lowerbound
 26 |                 'MaxPatience',20,...      % For Early stopping
 27 |                 'MaxIter',5000,...        % Maximum number of iterations
 28 |                 'GradWeight1',0.9,...     % Momentum weight 1
 29 |                 'GradWeight2',0.9,...     % Momentum weight 2
 30 |                 'WindowSize',10,...       % Smoothing window for lowerbound
 31 |                 'StepAdaptive',500,...    % For adaptive learning rate
 32 |                 'GradientMax',10,...      % For gradient clipping    
 33 |                 'LBPlot',false);          % Dont plot the lowerbound when finish
 34 |      
 35 | %% Run Cholesky GVB with MLE initialization
 36 | % Random seed to reproduce results 
 37 | rng(2020)
 38 | 
 39 | theta_init = Mdl.initParams('MLE',labour); 
 40 | Estmdl_2  = CGVB(Mdl,labour,...
 41 |                 'MeanInit',theta_init,... % Initial values of variational mean
 42 |                 'LearningRate',0.002,...  % Learning rate
 43 |                 'NumSample',50,...        % Number of samples to estimate gradient of lowerbound
 44 |                 'MaxPatience',20,...      % For Early stopping
 45 |                 'MaxIter',5000,...        % Maximum number of iterations
 46 |                 'GradWeight1',0.9,...     % Momentum weight 1
 47 |                 'GradWeight2',0.9,...     % Momentum weight 2
 48 |                 'WindowSize',10,...       % Smoothing window for lowerbound
 49 |                 'StepAdaptive',500,...    % For adaptive learning rate
 50 |                 'GradientMax',10,...      % For gradient clipping    
 51 |                 'LBPlot',false);          % Dont plot the lowerbound when finish
 52 | 
 53 | %% Then compare convergence of lowerbound in 2 cases 
 54 | figure
 55 | hold on
 56 | grid on
 57 | plot(Estmdl_1.Post.LB_smooth,'-r','LineWidth',2)
 58 | plot(Estmdl_2.Post.LB_smooth,'--b','LineWidth',2)
 59 | title('Lowerbound')
 60 | xlabel('Iterations')
 61 | legend('Random Initialization','MLE Initialization' )
 62 | 
 63 | %% It is useful to compare the approximate posterior density to the true density obtain by MCMC
 64 | Post_MCMC = MCMC(Mdl,labour,...
 65 |                  'NumMCMC',100000,...        % Number of MCMC iterations
 66 |                  'ParamsInit',theta_init,... % Using MLE estimates as initial values
 67 |                  'Verbose',100);             % Display sampling information after each 100 iterations
 68 |              
 69 | %% Compare densities by CGVB and MCMC
 70 | % Get posterior mean and trace plot for a parameter to check the mixing 
 71 | [mcmc_mean,mcmc_std,mcmc_chain] = Post_MCMC.getParamsMean('BurnInRate',0.2,...         % Throw away 20% samples
 72 |                                                           'PlotTrace',1:n_features,... % Trace plot for all parameters
 73 |                                                           'SubPlot',[2,4]);            % Dimension of subplots
 74 | 
 75 | % Plot density
 76 | fontsize  = 20;
 77 | numparams = Estmdl_2.Model.NumParams;
 78 | 
 79 | % Extract variation mean and variance
 80 | mu_vb     = Estmdl_2.Post.mu;
 81 | sigma2_vb = Estmdl_2.Post.sigma2;
 82 | 
 83 | figure
 84 | for i = 1:numparams
 85 |     subplot(3,3,i)
 86 |     xx = mcmc_mean(i)-4*mcmc_std(i):0.002:mcmc_mean(i)+4*mcmc_std(i);
 87 |     yy_mcmc = ksdensity(mcmc_chain(:,i),xx,'Bandwidth',0.022);    
 88 |     yy_vb = normpdf(xx,mu_vb(i),sqrt(sigma2_vb(i)));    
 89 |     plot(xx,yy_mcmc,'-k',xx,yy_vb,'--b','LineWidth',1.5)
 90 |     line([theta_init(i) theta_init(i)],ylim,'LineWidth',1.5,'Color','r')    
 91 |     str = ['\theta_', num2str(i)];   
 92 |     title(str,'FontSize', fontsize)
 93 |     legend('MCMC','VB')
 94 | end
 95 | subplot(3,3,9)
 96 | plot(Estmdl_2.Post.LB_smooth,'LineWidth',1.5)
 97 | title('Lower bound','FontSize', fontsize)
 98 |              
 99 |              
100 |              
101 |              
102 |              


--------------------------------------------------------------------------------
/Example/CGVB_Logistics_Model_Object_Simple.m:
--------------------------------------------------------------------------------
 1 | % Script to run the Example 3.4 using a built-in Logistic Regression class of
 2 | % the VBLab package
 3 | 
 4 | clear  % Clear all variables 
 5 | clc % Clean workspace
 6 | 
 7 | % Random seed to reproduce results 
 8 | rng(2020)
 9 | 
10 | % Load the LabourForce dataset
11 | labour = readData('LabourForce',...     % Dataset name
12 |                   'Type','Matrix',...   % Store data as a 2D array (default)
13 |                   'Intercept', true);   % Add column of intercept (default)
14 | 
15 | % Compute number of features
16 | n_features = size(labour,2)-1;
17 | 
18 | % Create a Logistic Regression model object
19 | Mdl = LogisticRegression(n_features,...
20 |                          'Prior',{'Normal',[0,50]});
21 |                           
22 | %% Run Cholesky GVB with random initialization
23 | Post_CGVB  = CGVB(Mdl,labour,...
24 |                  'LearningRate',0.002,...  % Learning rate
25 |                  'NumSample',50,...        % Number of samples to estimate gradient of lowerbound
26 |                  'MaxPatience',20,...      % For Early stopping
27 |                  'MaxIter',5000,...        % Maximum number of iterations
28 |                  'GradWeight1',0.9,...     % Momentum weight 1
29 |                  'GradWeight2',0.9,...     % Momentum weight 2
30 |                  'WindowSize',10,...       % Smoothing window for lowerbound
31 |                  'StepAdaptive',500,...    % For adaptive learning rate
32 |                  'GradientMax',10,...      % For gradient clipping    
33 |                  'LBPlot',false);          % Dont plot the lowerbound when finish
34 |      
35 | %% Plot variational distributions and lowerbound 
36 | figure
37 | % Extract variation mean and variance
38 | mu_vb     = Post_CGVB.Post.mu;
39 | sigma2_vb = Post_CGVB.Post.sigma2;
40 | 
41 | % Plot the variational distribution for the first 8 parameters
42 | for i=1:n_features
43 |     subplot(3,3,i)
44 |     vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]})
45 |     grid on
46 |     title(['\theta_',num2str(i)])
47 |     set(gca,'FontSize',15)
48 | end
49 | 
50 | % Plot the smoothed lower bound
51 | subplot(3,3,9)
52 | plot(Post_CGVB.Post.LB_smooth,'LineWidth',2)
53 | grid on
54 | title('Lower bound')
55 | set(gca,'FontSize',15)
56 |              
57 |              
58 |              
59 |              
60 |              


--------------------------------------------------------------------------------
/Example/CGVB_VAR1_Function_Handle.m:
--------------------------------------------------------------------------------
  1 | % Example to fit a VAR(1) model, defined as a function handle, using CGVB method
  2 | % We simulate a multivariate time-series 
  3 | 
  4 | clear 
  5 | clc
  6 | 
  7 | rng(2021)
  8 | 
  9 | % Setting
 10 | m = 2;   % Number of time series
 11 | T = 100; % Number of observations
 12 | 
 13 | % Generate data
 14 | y = randn(2,100);
 15 | 
 16 | % Additional setting
 17 | setting.Prior = [0,1];    % Parameters (mean,variance) of a normal distribution
 18 | setting.y.mu = 0;
 19 | setting.idx.c = 1:m;
 20 | setting.idx.A = m+1:m+m^2;
 21 | setting.num_params = m + m^2; 
 22 | setting.Gamma = 0.1*eye(m);
 23 | 
 24 | %% Run CGVB with defined model
 25 | Post_CGVB_VAR1 = CGVB(@grad_h_func_VAR1,y,...
 26 |                       'NumParams',setting.num_params,... % Number of model parameters
 27 |                       'Setting',setting,...          % Additional setting to compute gradient of h(theta)
 28 |                       'LearningRate',0.002,...       % Learning rate
 29 |                       'NumSample',50,...             % Number of samples to estimate gradient of lowerbound
 30 |                       'MaxPatience',20,...           % For Early stopping
 31 |                       'MaxIter',5000,...             % Maximum number of iterations
 32 |                       'GradWeight1',0.9,...          % Momentum 1
 33 |                       'GradWeight2',0.9,...          % Momentum 2
 34 |                       'WindowSize',10,...            % Smoothing window for lowerbound
 35 |                       'GradientMax',10,...           % For gradient clipping
 36 |                       'LBPlot',false); 
 37 |                     
 38 | %% Plot variational distributions and lowerbound 
 39 | figure
 40 | % Extract variation mean and variance
 41 | mu_vb     = Post_CGVB_VAR1.Post.mu;
 42 | sigma2_vb = Post_CGVB_VAR1.Post.sigma2;
 43 | 
 44 | % Plot the variational distribution of each parameter
 45 | for i=1:setting.num_params
 46 |     subplot(2,4,i)
 47 |     vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]})
 48 |     grid on
 49 |     title(['\theta_',num2str(i)])
 50 |     set(gca,'FontSize',15)
 51 | end
 52 | 
 53 | % Plot the smoothed lower bound
 54 | subplot(2,4,7)
 55 | plot(Post_CGVB_VAR1.Post.LB_smooth,'LineWidth',2)
 56 | grid on
 57 | title('Lower bound')
 58 | set(gca,'FontSize',15)
 59 |                     
 60 | %% Function to compute the gradient of h(theta) and h(theta). This can be defined in a separated file
 61 | % Input: 
 62 | %       y: mxT matrix with M number of time series and T lenght of each time series
 63 | %       theta: Dx1 array of model parameters
 64 | %       setting: struct of additional information to compute gradient h(theta)
 65 | % Output:
 66 | %       grad_h_theta: Dx1 array of gradient of h(theta)
 67 | %       h_theta: h(theta) is scalar
 68 | function [grad_h_theta,h_theta] = grad_h_func_VAR1(y,theta,setting)
 69 | 
 70 |     % Extract size of data
 71 |     [m,T] = size(y);
 72 |     
 73 |     % Extract model settings
 74 |     prior_params = setting.Prior;
 75 |     d = setting.num_params;
 76 |     idx = setting.idx;
 77 |     Gamma = setting.Gamma;
 78 |     Gamma_inv = Gamma^(-1);
 79 | 
 80 |    % Extract params from theta
 81 |     c = theta(idx.c);                               % c is a Dx1 colum
 82 |     A = reshape(theta(idx.A),length(c),length(c));  % A is a DxD matrix
 83 |     
 84 |     % Log prior
 85 |     log_prior = Normal.logPdfFnc(theta,prior_params);
 86 |     
 87 |     % Log likelihood
 88 |     log_llh = 0;
 89 |     for t=2:T
 90 |         log_llh = log_llh -0.5*(y(:,t) - A*y(:,t-1)-c)' * Gamma_inv * (y(:,t) - A*y(:,t-1)-c);
 91 |     end  
 92 |     log_llh = log_llh - 0.5*m*(T-1)*log(2*pi) - 0.5*(T-1)*log(det(Gamma));
 93 | 
 94 |     % h(theta)
 95 |     h_theta = log_prior + log_llh;
 96 |     
 97 |     % Gradient of log prior
 98 |     grad_log_prior = Normal.GradlogPdfFnc(theta,prior_params);
 99 |     
100 |     % Gradient of log likelihood;
101 |     grad_llh_c = 0;
102 |     grad_llh_A = 0;
103 |     for t=2:T
104 |         grad_llh_c = grad_llh_c + Gamma_inv*(y(:,t) - A*y(:,t-1)-c);
105 |         grad_llh_A = grad_llh_A + kron(y(:,t-1),Gamma_inv*(y(:,t) - A*y(:,t-1)-c));
106 |     end
107 |     
108 |     grad_llh = [grad_llh_c;grad_llh_A(:)];
109 |     
110 |     % Gradient h(theta)
111 |     grad_h_theta = grad_log_prior + grad_llh;
112 |     
113 |     % Make sure grad_h_theta is a column
114 |     grad_h_theta = reshape(grad_h_theta,d,1);
115 |     
116 |     
117 | end    


--------------------------------------------------------------------------------
/Example/CGVB_VAR1_Model_Object.m:
--------------------------------------------------------------------------------
 1 | % Example to fit a VAR(1) model, defined as a custom class object, using CGVB method
 2 | % We simulate a multivariate time-series  
 3 | 
 4 | clear 
 5 | clc
 6 | 
 7 | rng(2021)
 8 | 
 9 | % Setting
10 | m = 2;   % Number of time series
11 | T = 100; % Number of observations
12 | 
13 | % Generate data
14 | y = randn(2,100);
15 | 
16 | % Create a VAR1 model object
17 | Mdl = VAR1(m);
18 | 
19 | %% Run CGVB with defined model
20 | Post_CGVB_VAR1 = CGVB(Mdl,y,...
21 |                       'LearningRate',0.002,...       % Learning rate
22 |                       'NumSample',50,...             % Number of samples to estimate gradient of lowerbound
23 |                       'MaxPatience',20,...           % For Early stopping
24 |                       'MaxIter',5000,...             % Maximum number of iterations
25 |                       'GradWeight1',0.9,...          % Momentum 1
26 |                       'GradWeight2',0.9,...          % Momentum 2
27 |                       'WindowSize',10,...            % Smoothing window for lowerbound
28 |                       'GradientMax',10,...           % For gradient clipping
29 |                       'LBPlot',false); 
30 |                  
31 |                   
32 | %% Plot variational distributions and lowerbound 
33 | figure
34 | % Extract variation mean and variance
35 | mu_vb     = Post_CGVB_VAR1.Post.mu;
36 | sigma2_vb = Post_CGVB_VAR1.Post.sigma2;
37 | 
38 | % Plot the variational distribution of each parameter
39 | for i=1:Post_CGVB_VAR1.Model.NumParams
40 |     subplot(2,4,i)
41 |     vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]})
42 |     grid on
43 |     title(['\theta_',num2str(i)])
44 |     set(gca,'FontSize',15)
45 | end
46 | 
47 | % Plot the smoothed lower bound
48 | subplot(2,4,7)
49 | plot(Post_CGVB_VAR1.Post.LB_smooth,'LineWidth',2)
50 | grid on
51 | title('Lower bound')
52 | set(gca,'FontSize',15)        
53 | 
54 | 


--------------------------------------------------------------------------------
/Example/MGVB_Logistics_Model_Object.m:
--------------------------------------------------------------------------------
 1 | % Script to fit the Logistic Regression modell using the MGVB method 
 2 | 
 3 | clear  % Clear all variables 
 4 | clc % Clean workspace
 5 | 
 6 | % Random seed to reproduce results 
 7 | rng(2020)
 8 | 
 9 | % Load the LabourForce dataset
10 | labour = readData('LabourForce',...     % Dataset name
11 |                   'Type','Matrix',...   % Store data as a 2D array (default)
12 |                   'Intercept', true);   % Add column of intercept (default)
13 | 
14 | % Compute number of features
15 | n_features = size(labour,2)-1;
16 | 
17 | % Create a Logistic Regression model object
18 | Mdl = LogisticRegression(n_features,...
19 |                         'Prior',{'Normal',[0,50]});
20 |                           
21 | %% Run Cholesky GVB to approximate the posterior distribution of model 
22 | % using a multivariate normal density
23 | Post_CGVB = MGVB(Mdl,labour,...
24 |                 'LearningRate',0.001,... % Learning rate
25 |                 'NumSample',100,...      % Number of samples to estimate gradient of lowerbound 
26 |                 'MaxPatience',50,...     % For Early stopping
27 |                 'MaxIter',2000,...       % Maximum number of iterations
28 |                 'GradWeight',0.4,...     % Momentum weight
29 |                 'WindowSize',30,...      % Smoothing window for lowerbound
30 |                 'SigInitScale',0.04,...  % Std of normal distribution for initializing  
31 |                 'StepAdaptive',500,...   % For adaptive learning rate   
32 |                 'GradientMax',100,...    % For gradient clipping     
33 |                 'LBPlot',true);          % Plot the smoothed lowerbound at the end
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/Example/NAGVAC_Logistics_Function_Handle.m:
--------------------------------------------------------------------------------
 1 | % Script to fit a Logistic Regression model, defined as a function handle,
 2 | % using the NAGVAC method
 3 | 
 4 | clear  % Clear all variables 
 5 | clc % Clean workspace
 6 | 
 7 | % Random seed to reproduce results 
 8 | rng(2020)
 9 | 
10 | % Load the LabourForce dataset
11 | credit = readData('LabourForce',...     % Dataset name
12 |                   'Type','Matrix',...   % Store data as a 2D array (default)
13 |                   'Intercept', true);   % Add column of intercept (default)
14 | 
15 | % Compute number of features
16 | n_features = size(credit,2)-1;
17 | 
18 | % Additional Setting
19 | setting.Prior = [0,1];
20 | 
21 | % Initialize using MLE estimate
22 | X = credit(:,1:end-1);
23 | y = credit(:,end);
24 | 
25 | % Run CGVB
26 | Post_CGVB_manual = NAGVAC(@grad_h_func_logistic,credit,...
27 |                           'NumParams',n_features,...
28 |                           'Setting',setting,...
29 |                           'NumSample',100,...       % Number of samples to estimate gradient of lowerbound
30 |                           'LearningRate',0.01,...   % Learning rate
31 |                           'MaxPatience',20,...      % For Early stopping
32 |                           'MaxIter',10000,...       % Maximum number of iterations
33 |                           'GradientMax',200,...     % For gradient clipping    
34 |                           'WindowSize',30, ...      % Smoothing window for lowerbound
35 |                           'LBPlot',true);           % Dont plot the lowerbound when finish
36 | 
37 | %% Define gradient of h function for Logistic regression 
38 | % theta: Dx1 array
39 | % h_func: Scalar
40 | % h_func_grad: Dx1 array
41 | function [h_func_grad,h_func] = grad_h_func_logistic(data,theta,setting)
42 | 
43 |     % Extract additional settings
44 |     d = length(theta);
45 |     sigma2 = setting.Prior(2);
46 |     
47 |     % Extract data
48 |     X = data(:,1:end-1);
49 |     y = data(:,end);
50 |     
51 |     % Compute log likelihood
52 |     aux = X*theta;
53 |     llh = y.*aux-log(1+exp(aux));
54 |     llh = sum(llh);  
55 |     
56 |     % Compute gradient of log likelihood
57 |     ppi       = 1./(1+exp(-aux));
58 |     llh_grad  = X'*(y-ppi);
59 | 
60 |     % Compute log prior
61 |     log_prior =-d/2*log(2*pi)-d/2*log(sigma2)-theta'*theta/sigma2/2;
62 |     
63 |     % Compute gradient of log prior
64 |     log_prior_grad = -theta/sigma2;
65 |     
66 |     % Compute h(theta) = log p(y|theta) + log p(theta)
67 |     h_func = llh + log_prior;
68 |     
69 |     % Compute gradient of the h(theta)
70 |     h_func_grad = llh_grad + log_prior_grad;
71 | 
72 |     % h_func_grad must be a column
73 |     h_func_grad = reshape(h_func_grad,length(h_func_grad),1);
74 | 
75 | end


--------------------------------------------------------------------------------
/Example/NAGVAC_Logistics_Model_Object.m:
--------------------------------------------------------------------------------
 1 | % Script to fit a Logistic Regression model, defined as a class object,
 2 | % using the NAGVAC method
 3 | 
 4 | clear  % Clear all variables 
 5 | clc % Clean workspace
 6 | 
 7 | % Random seed to reproduce results 
 8 | rng(2020)
 9 | 
10 | % Load LabourForce data. 
11 | labour = readData('LabourForce',...     % Dataset name
12 |                   'Type','Matrix',...   % Store data as a 2D array (default)
13 |                   'Intercept', true);   % Add column of intercept (default)
14 | 
15 | % Compute number of features
16 | n_features = size(labour,2)-1;
17 | 
18 | % Create a Logistic Regression model object
19 | Mdl = LogisticRegression(n_features,...
20 |                         'Prior',{'Normal',[0,50]});
21 |                           
22 | %% Run NAGVAC with random initialization
23 | Post_NAGVAC = NAGVAC(Mdl,labour,...
24 |                     'NumSample',200,...       % Number of samples to estimate gradient of lowerbound
25 |                     'LearningRate',0.005,...  % Learning rate
26 |                     'MaxPatience',20,...      % For Early stopping
27 |                     'MaxIter',10000,...       % Maximum number of iterations
28 |                     'GradientMax',200,...     % For gradient clipping    
29 |                     'WindowSize',50, ...      % Smoothing window for lowerbound
30 |                     'LBPlot',true);           % Dont plot the lowerbound when finish
31 |              
32 | %% Plot variational distributions and lowerbound 
33 | figure
34 | % Extract variation mean and variance
35 | mu_vb     = Post_NAGVAC.Post.mu;
36 | sigma2_vb = Post_NAGVAC.Post.sigma2;
37 | 
38 | % Plot the variational distribution for the first 8 parameters
39 | for i=1:n_features
40 |     subplot(3,3,i)
41 |     vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]})
42 |     grid on
43 |     title(['\theta_',num2str(i)])
44 |     set(gca,'FontSize',15)
45 | end
46 | 
47 | % Plot the smoothed lower bound
48 | subplot(3,3,9)
49 | plot(Post_NAGVAC.Post.LB_smooth,'LineWidth',2)
50 | grid on
51 | title('Lower bound')
52 | set(gca,'FontSize',15)     
53 |               
54 | 


--------------------------------------------------------------------------------
/Example/VAFC_Logistics_Function_Handle.m:
--------------------------------------------------------------------------------
 1 | % Script to fit a Logistic Regression model, defined as a function handle,
 2 | % using the VAFC method
 3 | 
 4 | clear  % Clear all variables 
 5 | clc % Clean workspace
 6 | 
 7 | % Random seed to reproduce results 
 8 | rng(2020)
 9 | 
10 | % Load the GermanCredit dataset
11 | credit = readData('GermanCredit',...    % Dataset name
12 |                   'Type','Matrix',...   % Store data as a 2D array (default)
13 |                   'Intercept', true);   % Add column of intercept (default)
14 | 
15 | % Compute number of features
16 | n_features = size(credit,2)-1;
17 | 
18 | % Additional Setting
19 | setting.Prior = [0,50];
20 | 
21 | % Initialize using MLE estimate
22 | X = credit(:,1:end-1);
23 | y = credit(:,end);
24 | 
25 | % Run CGVB
26 | Post_VAFC_manual = VAFC(@grad_h_func_logistic,credit,...
27 |                         'NumParams',n_features,...
28 |                         'Setting',setting,...
29 |                         'NumFactor',4, ...       % Number of factors of the loading matrix
30 |                         'NumSample',100,...      % Number of samples to estimate gradient of lowerbound
31 |                         'LearningRate',0.05,...  % Learning rate
32 |                         'MaxPatience',30,...     % For Early stopping
33 |                         'MaxIter',10000,...      % Maximum number of iterations
34 |                         'GradientMax',200,...    % For gradient clipping    
35 |                         'WindowSize',20, ...     % Smoothing window for lowerbound
36 |                         'LBPlot',true);          % Dont plot the lowerbound when finish
37 | 
38 | %% Plot variational distributions and lowerbound 
39 | figure
40 | % Extract variation mean and variance
41 | mu_vb     = Post_VAFC_manual.Post.mu;
42 | sigma2_vb = Post_VAFC_manual.Post.sigma2;
43 | 
44 | % Plot the variational distribution for the first 8 parameters
45 | for i=1:8
46 |     subplot(3,3,i)
47 |     vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]})
48 |     grid on
49 |     title(['\theta_',num2str(i)])
50 |     set(gca,'FontSize',15)
51 | end
52 | 
53 | % Plot the smoothed lower bound
54 | subplot(3,3,9)
55 | plot(Post_VAFC_manual.Post.LB_smooth,'LineWidth',2)
56 | grid on
57 | title('Lower bound')
58 | set(gca,'FontSize',15)
59 | 
60 | %% Define gradient of h function for Logistic regression 
61 | % theta: Dx1 array
62 | % h_func: Scalar
63 | % h_func_grad: Dx1 array
64 | function [h_func_grad,h_func] = grad_h_func_logistic(data,theta,setting)
65 | 
66 |     % Extract additional settings
67 |     d = length(theta);
68 |     sigma2 = setting.Prior(2);
69 |     
70 |     % Extract data
71 |     X = data(:,1:end-1);
72 |     y = data(:,end);
73 |     
74 |     % Compute log likelihood
75 |     aux = X*theta;
76 |     llh = y.*aux-log(1+exp(aux));
77 |     llh = sum(llh);  
78 |     
79 |     % Compute gradient of log likelihood
80 |     ppi       = 1./(1+exp(-aux));
81 |     llh_grad  = X'*(y-ppi);
82 | 
83 |     % Compute log prior
84 |     log_prior =-d/2*log(2*pi)-d/2*log(sigma2)-theta'*theta/sigma2/2;
85 |     
86 |     % Compute gradient of log prior
87 |     log_prior_grad = -theta/sigma2;
88 |     
89 |     % Compute h(theta) = log p(y|theta) + log p(theta)
90 |     h_func = llh + log_prior;
91 |     
92 |     % Compute gradient of the h(theta)
93 |     h_func_grad = llh_grad + log_prior_grad;
94 | 
95 |     % h_func_grad must be a column
96 |     h_func_grad = reshape(h_func_grad,length(h_func_grad),1);
97 | 
98 | end


--------------------------------------------------------------------------------
/Example/VAFC_Logistics_Model_Object.m:
--------------------------------------------------------------------------------
 1 | % Script to fit a Logistic Regression model, defined as a class object,
 2 | % using the VAFC method
 3 | 
 4 | clear  % Clear all variables 
 5 | clc % Clean workspace
 6 | 
 7 | % Random seed to reproduce results 
 8 | rng(2020)
 9 | 
10 | % Load GermanCredit data. 
11 | credit = readData('GermanCredit',...    % Dataset name
12 |                   'Type','Matrix',...   % Store data as a 2D array (default)
13 |                   'Intercept', true);   % Add column of intercept (default)
14 | 
15 | % Compute number of features
16 | n_features = size(credit,2)-1;
17 | 
18 | % Create a Logistic Regression model object
19 | Mdl = LogisticRegression(n_features);
20 |                           
21 | %% Run Cholesky GVB with random initialization
22 | Post_VAFC = VAFC(Mdl,credit,...
23 |                  'NumFactor',10, ...      % Number of factors of the loading matrix
24 |                  'NumSample',50,...       % Number of samples to estimate gradient of lowerbound
25 |                  'LearningRate',0.05,...  % Learning rate
26 |                  'MaxPatience',20,...     % For Early stopping
27 |                  'MaxIter',10000,...      % Maximum number of iterations
28 |                  'GradientMax',200,...    % For gradient clipping    
29 |                  'WindowSize',5, ...      % Smoothing window for lowerbound
30 |                  'LBPlot',false);         % Dont plot the lowerbound when finish             
31 | 
32 | %% Plot variational distributions and lowerbound 
33 | figure
34 | % Extract variation mean and variance
35 | mu_vb     = Post_VAFC.Post.mu;
36 | sigma2_vb = Post_VAFC.Post.sigma2;
37 | 
38 | % Plot the variational distribution for the first 8 parameters
39 | for i=1:8
40 |     subplot(3,3,i)
41 |     vbayesPlot('Density',{'Normal',[mu_vb(i),sigma2_vb(i)]})
42 |     grid on
43 |     title(['\theta_',num2str(i)])
44 |     set(gca,'FontSize',15)
45 | end
46 | 
47 | % Plot the smoothed lower bound
48 | subplot(3,3,9)
49 | plot(Post_VAFC.Post.LB_smooth,'LineWidth',2)
50 | grid on
51 | title('Lower bound')
52 | set(gca,'FontSize',15)
53 | 
54 | 


--------------------------------------------------------------------------------
/Example/VAR1.m:
--------------------------------------------------------------------------------
 1 | classdef VAR1
 2 |     %VAR1 Class to model the VAR(1) model
 3 |     
 4 |     properties 
 5 |         ModelName      % Model name 
 6 |         NumParams      % Number of parameters
 7 |         Prior          % Prior object
 8 |         ParamIdx       % Indexes of model parameters in the vector of variational parameters
 9 |         Gamma          % Fix covarian matrix
10 |     end
11 |     
12 |     methods
13 |         % Constructor. This will be automatically called when users create a VAR1 object
14 |         function obj = VAR1(NumSeries)
15 |             % Set value for ModelName and NumParams
16 |             obj.ModelName  = 'VAR1';                 
17 |             obj.NumParams  = NumSeries + NumSeries^2; 
18 |             obj.Prior      = [0,1]; % Use a normal distribution for prior
19 |             obj.ParamIdx.c = 1:NumSeries;
20 |             obj.ParamIdx.A = NumSeries+1:obj.NumParams;
21 |             obj.Gamma      =  0.1*eye(NumSeries);
22 |         end
23 |         
24 |         % Function to compute gradient of h_theta and h_theta
25 |         function [h_func_grad, h_func] = hFunctionGrad(obj,y,theta)
26 |             % Extract size of data
27 |             [m,T] = size(y);
28 |                 
29 |             % Extract model properties
30 |             prior_params = obj.Prior;
31 |             d = obj.NumParams;
32 |             idx = obj.ParamIdx;
33 |             gamma = obj.Gamma;
34 |             gamma_inv = gamma^(-1);
35 | 
36 |             % Extract params from theta
37 |             c = theta(idx.c);                               % c is a column
38 |             A = reshape(theta(idx.A),length(c),length(c));  % A is a matrix
39 |                 
40 |             % Log prior
41 |             log_prior = Normal.logPdfFnc(theta,prior_params);
42 |                 
43 |             % Log likelihood
44 |             log_llh = 0;
45 |             for t=2:T
46 |                 log_llh = log_llh -0.5*(y(:,t) - A*y(:,t-1)-c)' * gamma_inv * (y(:,t) - A*y(:,t-1)-c);
47 |             end  
48 |             log_llh = log_llh - 0.5*m*(T-1)*log(2*pi) - 0.5*(T-1)*log(det(gamma));
49 | 
50 |             % Compute h_theta
51 |             h_func = log_prior + log_llh;
52 |                 
53 |             % Gradient log_prior
54 |             grad_log_prior = Normal.GradlogPdfFnc(theta,prior_params);
55 |                 
56 |             % Gradient log_llh;
57 |             grad_llh_c = 0;
58 |             grad_llh_A = 0;
59 |             for t=2:T
60 |                 grad_llh_c = grad_llh_c + gamma_inv*(y(:,t) - A*y(:,t-1)-c);
61 |                 grad_llh_A = grad_llh_A + kron(y(:,t-1),gamma_inv*(y(:,t) - A*y(:,t-1)-c));
62 |             end
63 |                 
64 |             grad_llh = [grad_llh_c;grad_llh_A(:)];
65 |                 
66 |             % Compute Gradient of h_theta
67 |             h_func_grad = grad_log_prior + grad_llh;
68 |             
69 |             % Make sure grad_h_theta is a column
70 |             h_func_grad = reshape(h_func_grad,d,1);
71 |         end  
72 |     end
73 | end
74 | 
75 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # VBLab: a Matlab package for Variational Inference
 2 | 
 3 | ## Documentation
 4 | 
 5 | The official documentation is avaiable at https://vbayeslab.github.io/VBLabDocs/
 6 | 
 7 | ## Getting started
 8 | 
 9 | ### Install VBLab package
10 | 
11 | 1. Download or clone the VBLab package on [VBLab Github Page](https://github.com/VBayesLab/Tutorial-on-VB)
12 | 2. Add the VBLab package, with all subfolders, to the Matlab search path. See [How to add or remove folders to Matlab search path](https://au.mathworks.com/help/matlab/matlab_env/add-remove-or-reorder-folders-on-the-search-path.html)
13 | 
14 | ### How to start
15 | 
16 | 1. Read the [VB tutorial paper](https://www.researchgate.net/publication/340006729_A_practical_tutorial_on_Variational_Bayes) for the theoretical explanation of the VB methods supported by the VBLab package. See also [shorter version of the VB tutorial](https://vbayeslab.github.io/VBLabDocs/tutorial/) on the documentation website.
17 | 2. Run [examples](https://github.com/VBayesLab/Tutorial-on-VB) showing how to use various VB methods to fit different VBLab and user-defined models. See the detailed explanation of the examples in the the [VB tutorial paper](https://www.researchgate.net/publication/340006729_A_practical_tutorial_on_Variational_Bayes) or in the [Example](https://vbayeslab.github.io/VBLabDocs/example/) section on the documentation website.
18 | 3. Check API reference for [supported VB techniques](https://vbayeslab.github.io/VBLabDocs/gvb/), [statistical models](https://vbayeslab.github.io/VBLabDocs/model/) and [how to define custom models](https://vbayeslab.github.io/VBLabDocs/model/custom/) for users' applications.  
19 | 
20 | ---
21 | 
22 | ## Authors
23 | 
24 | - **Trong-Nghia Nguyen**, PhD candidate, The University of Sydney Business School. ([Google scholar](https://scholar.google.com.vn/citations?user=4fEGoI8AAAAJ&hl=en), [Research gate](https://www.researchgate.net/profile/Nghia_Nguyen79), [LinkedIn](https://www.linkedin.com/in/nguyen-nghia-458b3097/))
25 | - **Minh-Ngoc Tran**, Associate Professor, The University of Sydney Business School. ([Google scholar](https://scholar.google.com/citations?user=98A6Dq8AAAAJ&hl=en), [Research gate](https://www.researchgate.net/profile/Minh-Ngoc-Tran), [Home Page](https://sites.google.com/site/mntran26/home))
26 | - **Viet-Hung Dao**, PhD candidate, The University of New South Wales Business School. ([Home Page](https://acems.org.au/our-people/hung-dao))
27 | 
28 | --- 
29 | 
30 | ## Citing VBLab
31 | 
32 | If you use VBLab in a publication, we would appreciate your citation to the following paper:
33 | 
34 | M.-N. Tran, T.-N. Nguyen and V.-H. Dao (2021). [A practical tutorial on Variational Bayes](https://www.researchgate.net/publication/340006729_A_practical_tutorial_on_Variational_Bayes). *Technical Report*.
35 | 
36 | **Or bibtex entry** :
37 | ```yaml
38 | @TECHREPORT{Tran:2021,
39 | 	AUTHOR = "M.-N. Tran, T.-N. Nguyen and V.-H. Dao",
40 | 	TITLE = "A practical tutorial on Variational Bayes",
41 | 	YEAR = {2021},
42 | 	NOTE = {DOI: 10.13140/RG.2.2.20173.59360},
43 | }
44 | ```
45 | 


--------------------------------------------------------------------------------
/VBLab/MCMC/MCMC.m:
--------------------------------------------------------------------------------
  1 | classdef MCMC < handle & matlab.mixin.CustomDisplay
  2 |     %MCMC Summary of this class goes here
  3 |     %   Detailed explanation goes here
  4 |     
  5 |     properties
  6 |         Method
  7 |         Model           % Instance of model to be fitted
  8 |         ModelToFit      % Name of model to be fitted
  9 |         SeriesLength    % Length of the series    
 10 |         NumMCMC         % Number of MCMC iterations 
 11 |         BurnInRate      % Percentage of sample for burnin
 12 |         BurnIn          % Number of samples for burnin
 13 |         TargetAccept    % Target acceptance rate
 14 |         NumCovariance   % Number of latest samples to calculate adaptive covariance matrix for random-walk proposal
 15 |         SaveFileName    % Save file name
 16 |         SaveAfter       % Save the current results after each 5000 iteration
 17 |         ParamsInit      % Initial values of model parameters
 18 |         Seed            % Random seed
 19 |         Post            % Struct to store estimation results
 20 |         Initialize      % Initialization method
 21 |         LogLikelihood   % Handle of the log-likelihood function
 22 |         PrintMessage    % Custom message during the sampling phase
 23 |         CPU             % Sampling time    
 24 |         Verbose         % Turn on of off printed message during sampling phase
 25 |         SigScale
 26 |         Scale
 27 |         Params
 28 |     end
 29 |     
 30 |     methods
 31 |         function obj = MCMC(model,data,varargin)
 32 |             %MCMC Construct an instance of this class
 33 |             %   Detailed explanation goes here
 34 |             obj.Method        = 'MCMC';
 35 |             obj.Model         = model;   
 36 |             obj.ModelToFit    = model.ModelName;
 37 |             obj.NumMCMC       = 50000;
 38 |             obj.TargetAccept  = 0.25;
 39 |             obj.BurnInRate    = 0.2;
 40 |             obj.NumCovariance = 2000;
 41 |             obj.SigScale      = 0.01;
 42 |             obj.Scale         = 1;
 43 |             obj.SaveAfter     = 0;
 44 |             obj.Verbose       = 100;
 45 |             obj.ParamsInit    = [];
 46 |             
 47 |             if nargin > 2
 48 |                 %Parse additional options
 49 |                 paramNames = {'NumMCMC'         'BurnInRate'      'TargetAccept'      'NumCovariance'   ...
 50 |                               'ParamsInit'      'SaveFileName'    'SaveAfter'         'Verbose'     ...
 51 |                               'Seed'            'SigScale'        'Scale'};
 52 |                 paramDflts = {obj.NumMCMC       obj.BurnInRate    obj.TargetAccept    obj.NumCovariance  ...
 53 |                               obj.ParamsInit    obj.SaveFileName  obj.SaveAfter       obj.Verbose   ...
 54 |                               obj.Seed          obj.SigScale     obj.Scale};
 55 | 
 56 |                 [obj.NumMCMC,...
 57 |                  obj.BurnInRate,...
 58 |                  obj.TargetAccept,...
 59 |                  obj.NumCovariance,...
 60 |                  obj.ParamsInit,...
 61 |                  obj.SaveFileName,...
 62 |                  obj.SaveAfter,...
 63 |                  obj.Verbose,...
 64 |                  obj.Seed,...
 65 |                  obj.SigScale,...
 66 |                  obj.Scale] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:});                
 67 |             end
 68 |             
 69 |             obj.BurnIn = floor(obj.BurnInRate*obj.NumMCMC);
 70 |             
 71 |             % Set up saved file name
 72 |             DateVector = datevec(date);
 73 |             [~, MonthString] = month(date);
 74 |             date_time = ['_',num2str(DateVector(3)),'_',MonthString,'_'];
 75 |             obj.SaveFileName = ['Results_MCMC',date_time];
 76 |             
 77 |             % Run MCMC
 78 |             obj.Post   = obj.fit(data); 
 79 |             
 80 |         end
 81 |         
 82 |         % Sample a posterior using MCMC
 83 |         function Post = fit(obj,data)
 84 |             
 85 |             % Extract sampling setting
 86 |             model        = obj.Model;
 87 |             num_params   = model.NumParams;
 88 |             verbose      = obj.Verbose;
 89 |             numMCMC      = obj.NumMCMC;
 90 |             scale        = obj.Scale;
 91 |             V            = obj.SigScale*eye(num_params);
 92 |             accept_rate  = obj.TargetAccept;
 93 |             N_corr       = obj.NumCovariance;
 94 |             saveAfter    = obj.SaveAfter;
 95 |             saveFileName = obj.SaveFileName;
 96 |             params_init  = obj.ParamsInit;
 97 |             
 98 |             thetasave    = zeros(numMCMC,num_params);
 99 |                          
100 |             % Get initial values of parameters
101 |             if ~isempty(params_init) % If a vector of initial values if provided
102 |                 if (length(params_init) ~= num_params)
103 |                     error(utils_errorMsg('vbayeslab:InitVectorMisMatched'))
104 |                 else
105 |                     params = params_init;
106 |                 end
107 |             else
108 |                 params = model.initParams('Prior');
109 |             end
110 |             
111 |             % Make sure params is a row vector
112 |             params = reshape(params,1,num_params);
113 |             
114 |             % For the first iteration
115 |             log_prior = model.logPriors(params);
116 |             lik       = model.logLik(data,params);
117 |             jac       = model.logJac(params);
118 |             post      = log_prior + lik;
119 |             
120 |             tic
121 |             for i=1:numMCMC
122 |                 if(verbose)
123 |                     if(mod(i,verbose)==0)
124 |                         disp(['iter: ',num2str(i),'(',num2str(i/numMCMC*100),'%)'])
125 |                     end
126 |                 end
127 | 
128 |                 % Transform params to normal distribution scale
129 |                 params_normal = model.toNormalParams(params);
130 |                 
131 |                 % Using multivariate normal distribution as proposal distribution
132 |                 sample = mvnrnd(params_normal,scale.*V);
133 | 
134 |                 % Convert theta to original distribution
135 |                 theta = model.toOriginalParams(sample);
136 | 
137 |                 % Calculate acceptance probability for new proposed sample
138 |                 log_prior_star = model.logPriors(theta);
139 |                 lik_star       = model.logLik(data,theta);
140 |                 jac_star       = model.logJac(theta);
141 |                 post_star      = log_prior_star + lik_star;
142 | 
143 |                 A = rand();
144 |                 r = exp(post_star-post+jac-jac_star);
145 |                 C = min(1,r);   
146 |                 if A<=C
147 |                     params = theta;
148 |                     post   = post_star;
149 |                     jac    = jac_star;
150 |                 end
151 |                 thetasave(i,:) = params;
152 | 
153 |                 % Adaptive scale for proposal distribution
154 |                 if i > 50
155 |                     scale = utils_update_sigma(scale,C,accept_rate,i,num_params);
156 |                     if (i > N_corr)
157 |                         V = cov(thetasave(i-N_corr+1:i,:));
158 |                     else
159 |                         V = cov(thetasave(1:i,:));
160 |                     end
161 |                     V = utils_jitChol(V);
162 |                 end
163 |                 Post.theta(i,:) = params;
164 |                 Post.scale(i)   = scale;
165 | 
166 |                 % Store results after each 5000 iteration
167 |                 if(saveAfter>0)
168 |                     if mod(i,saveAfter)==0
169 |                         save(saveFileName,'Post')
170 |                     end
171 |                 end
172 |             end
173 |             Post.cpu = toc; 
174 |         end
175 |         
176 |         % Function to get parameter means given MCMC samples
177 |         function [params_mean,params_std,params] = getParamsMean(obj,varargin)
178 |             post = obj.Post;
179 |             burnin = [];
180 |             burninrate = [];
181 |             PlotTrace = [];   % Array of indexes of model parameters
182 |             subplotsize = [];
183 |             if nargin > 0
184 |                 %Parse additional options
185 |                 paramNames = {'BurnIn'          'BurnInRate'  'PlotTrace'   'SubPlot'};
186 |                 paramDflts = {burnin             burninrate    PlotTrace    subplotsize};
187 | 
188 |                 [burnin,...
189 |                  burninrate,...
190 |                  PlotTrace,...
191 |                  subplotsize] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:});                
192 |             end
193 |             
194 |             if(isempty(burnin))
195 |                 burnin = obj.BurnIn;
196 |             end
197 |             
198 |             if(isempty(burninrate))
199 |                 burninrate = obj.BurnInRate;
200 |             else
201 |                 burnin = floor(burninrate*obj.NumMCMC);
202 |             end
203 |             
204 |             params_mean = mean(post.theta(burnin+1:end,:));
205 |             params_std  = sqrt(mean(post.theta(burnin+1:end,:).^2)-params_mean.^2);
206 |             params      = post.theta(burnin+1:end,:);
207 |             % If user wants to plot trace of the first parameter to check
208 |             % the mixing
209 |             if (~isempty(PlotTrace) && ~isempty(subplotsize))
210 |                 nrow = subplotsize(1);
211 |                 ncol = subplotsize(2);
212 |                 
213 |                 figure
214 |                 for i=1:length(PlotTrace)
215 |                     subplot(nrow,ncol,i)
216 |                     plot(post.theta(burnin+1:end,PlotTrace(i)))
217 |                     title(['\theta_',num2str(i)],'FontSize', 20)
218 |                 end
219 |             end
220 |         end
221 |     end
222 | end
223 | 
224 | 


--------------------------------------------------------------------------------
/VBLab/Models/LogisticRegression.m:
--------------------------------------------------------------------------------
  1 | classdef LogisticRegression
  2 |     %LOGISTICREGRESSION Summary of this class goes here
  3 |     %   Detailed explanation goes here
  4 |     % Attributes
  5 |     properties 
  6 |         ModelName      % Model name 
  7 |         NumParams      % Number of parameters
  8 |         PriorInput     % Prior specified by users
  9 |         Prior          % Prior object
 10 |         PriorVal       % Parameters of priors  
 11 |         Intercept      % Option to add intercept or not (only for testing)
 12 |         AutoDiff       % Option to use autodiff (only for testing)
 13 |         CutOff         % Cutoff probability for classification making
 14 |         Post           % Struct to store training results (maybe not used)        
 15 |     end
 16 |     
 17 |     methods
 18 |         % Constructors
 19 |         function obj = LogisticRegression(n_features,varargin)
 20 |             %LOGISTICREGRESSION Construct an instance of this class
 21 |             %   Detailed explanation goes here
 22 |             obj.ModelName  = 'LogisticRegression';
 23 |             obj.PriorInput = {'Normal',[0,1]};
 24 |             obj.Intercept  = true;
 25 |             obj.AutoDiff   = false;
 26 |             obj.NumParams  = n_features;
 27 |             obj.CutOff     = 0.5;
 28 | 
 29 |             % Get additional arguments (some arguments are only for testing)
 30 |             if nargin > 1
 31 |                 %Parse additional options
 32 |                 paramNames = {'AutoDiff'              'Intercept'          'Prior',...
 33 |                               'CutOff'};
 34 |                 paramDflts = {obj.AutoDiff            obj.Intercept        obj.PriorInput,...
 35 |                               obj.CutOff};
 36 | 
 37 |                 [obj.AutoDiff,...
 38 |                  obj.Intercept,...
 39 |                  obj.PriorInput,...
 40 |                  obj.CutOff] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:});                
 41 |             end 
 42 |             
 43 |             % Set prior object using built-in distribution classes
 44 |             eval(['obj.Prior=',obj.PriorInput{1}]);
 45 |             obj.PriorVal = obj.PriorInput{2};
 46 |             
 47 |         end
 48 |         
 49 |         %% Log likelihood
 50 |         % Input: 
 51 |         %   - data: 2D array. The last column is the responses
 52 |         %   - params: Dx1 vector of parameters
 53 |         % Output: 
 54 |         %   - llh: Log likelihood of the model
 55 |         function llh = logLik(obj,data,params)
 56 |             
 57 |             % Make sure params is a columns
 58 |             params = reshape(obj.toOriginalParams(params),obj.NumParams,1);
 59 |                         
 60 |             % Extract data
 61 |             y = data(:,end);
 62 |             X = data(:,1:end-1);
 63 |             
 64 |             % Compute log likelihood
 65 |             aux = X*params;
 66 |             llh = y.*aux-log(1+exp(aux));
 67 |             llh = sum(llh);
 68 |             
 69 |             
 70 |         end
 71 | 
 72 |         %% Compute gradient of Log likelihood
 73 |         % Input: 
 74 |         %   - data: 2D array. The last column is the responses
 75 |         %   - params: Dx1 vector of parameters
 76 |         % Output: 
 77 |         %   - llh_grad: Log likelihood of the model
 78 |         function [llh_grad,llh] = logLikGrad(obj,data,params)
 79 |             
 80 |             % Extract data
 81 |             y = data(:,end);
 82 |             X = data(:,1:end-1);
 83 |             
 84 |             % Convert theta (normal) to original distribution
 85 |             params = reshape(obj.toOriginalParams(params),obj.NumParams,1);
 86 |             
 87 |             % Check if auto-diff option is available
 88 |             if (obj.AutoDiff)
 89 |                 % We have to convert params to dlarray to enable autodiff
 90 |                 params_autodiff = dlarray(params);
 91 |                 [llh_grad_autodiff,llh_auto_diff] = dlfeval(@obj.logLikGradAutoDiff,data,params_autodiff);
 92 |                 llh_grad = extractdata(llh_grad_autodiff)';
 93 |                 llh      = extractdata(llh_auto_diff);
 94 |             else
 95 |                 % Compute gradient of log likelihood
 96 |                 aux       = X*params;            
 97 |                 ppi       = 1./(1+exp(-aux));
 98 |                 llh_grad  = X'*(y-ppi);
 99 |                 
100 |                 % Compute log likelihood
101 |                 llh = y.*aux-log(1+exp(aux));
102 |                 llh = sum(llh);
103 |             end
104 |         end
105 |  
106 |         %% Compute gradient of Log likelihood using AutoDiff
107 |         % Input: 
108 |         %   - data: 2D array. The last column is the responses
109 |         %   - params: 1xD vector of parameters
110 |         % Output: 
111 |         %   - llh_grad: Log likelihood of the model
112 |         function [llh_grad,llh] = logLikGradAutoDiff(obj,data,params)
113 |                         
114 |             llh = obj.logLik(data,params);
115 |              
116 |             llh_grad = dlgradient(llh,params);
117 |         end
118 |         
119 |         %% Compute log prior of parameters
120 |         % Input: 
121 |         %   - params: the Dx1 vector of parameters
122 |         % Output: 
123 |         %   - llh: Log prior of model parameters       
124 |         function log_prior = logPriors(obj,params)
125 |             
126 |             params = reshape(obj.toOriginalParams(params),obj.NumParams,1);
127 |             
128 |             % Compute log prior
129 |             log_prior = obj.Prior.logPdfFnc(params,obj.PriorVal);
130 |             
131 |         end  
132 |         
133 |         %% Compute gradient of log prior of parameters
134 |         % Input: 
135 |         %   - params: 1xD vector of parameters
136 |         % Output: 
137 |         %   - log_prior_grad: Gradient of log prior of model parameters       
138 |         function [log_prior_grad,log_prior] = logPriorsGrad(obj,params)
139 | 
140 |             % Compute log prior
141 |             log_prior = obj.Prior.logPdfFnc(params,obj.PriorVal);
142 |             
143 |             % Compute gradient of log prior
144 |             log_prior_grad = obj.Prior.GradlogPdfFnc(params,obj.PriorVal);
145 |         end        
146 |         
147 |         %% Log of Jacobian of all paramters
148 |         % Input: 
149 |         %   - params: the ROW vector of parameters
150 |         % Output: 
151 |         %   - llh: Log prior of model parameters  
152 |         function logjac = logJac(obj,params)
153 |             logjac = 0;
154 |         end
155 |         
156 |         %% Log of Jacobian of all paramters
157 |         % Input: 
158 |         %   - params: the ROW vector of parameters
159 |         % Output: 
160 |         %   - llh: Log prior of model parameters  
161 |         function [logJac_grad,logJac] = logJacGrad(obj,params)
162 |             logJac_grad = 0;
163 |             logJac      = 0;
164 |         end
165 |         
166 |         %% Function to compute h_theta = log lik + log prior
167 |         % Input: 
168 |         %   - data: 2D array. The last column is the responses
169 |         %   - theta: Dx1 vector of parameters
170 |         % Output: 
171 |         %   - h_func: Log likelihood + log prior
172 |         function h_func = hFunction(obj,data,theta)            
173 |             % Transform parameters from normal to original distribution
174 |             params = obj.toOriginalParams(theta);  
175 |             
176 |             % Compute h(theta)
177 |             log_lik = obj.logLik(data,params);
178 |             log_prior = obj.logPriors(params);
179 |             log_jac = obj.logJac(params);
180 |             h_func = log_lik + log_prior + log_jac;
181 |         end
182 |         
183 |         %% Function to compute gradient of h_theta = grad log lik + grad log prior
184 |         % Input: 
185 |         %   - data: 2D array. The last column is the responses
186 |         %   - theta: Dx1 vector of parameters
187 |         % Output: 
188 |         %   - h_func_grad: gradient (Log likelihood + log prior)
189 |         %   - h_func: Log likelihood + log prior
190 |         function [h_func_grad, h_func] = hFunctionGrad(obj,data,theta)
191 |             
192 |             % Transform parameters from normal to original distribution
193 |             params = obj.toOriginalParams(theta);
194 |             
195 |             % Compute h(theta)
196 |             [llh_grad,llh] = obj.logLikGrad(data,params);
197 |             [log_prior_grad,log_prior] = obj.logPriorsGrad(params);
198 |             [logJac_grad,logJac] = obj.logJacGrad(params);
199 |             h_func = llh + log_prior + logJac;
200 |             h_func_grad = llh_grad + log_prior_grad + logJac_grad;
201 |         end        
202 |         
203 |         %% Transform parameters to from normal to original distribution
204 |         function paramsOriginal = toOriginalParams(obj,params)
205 |             paramsOriginal = obj.Prior.toOriginalParams(params);
206 |         end
207 | 
208 |         %% Transform parameters to from normal to original distribution
209 |         function paramsNormal = toNormalParams(obj,params)
210 |             paramsNormal = obj.Prior.toNormalParams(params);
211 |         end
212 |         
213 |         %% Initialize parameters
214 |         function params = initParams(obj,type,varargin)
215 |             d_theta = obj.NumParams;
216 |             switch type
217 |                 case 'MLE' % 2D array of must be provided
218 |                     data = varargin{1};
219 |                     X = data(:,1:end-1);
220 |                     y = data(:,end);
221 |                     params = glmfit(X,y,'binomial','constant','off'); % initialise mu
222 |                 case 'Prior'
223 |                     params = obj.Prior.rngFnc(obj.PriorVal,[d_theta,1]);    
224 |                 case 'Random' % (only for testing)
225 |                     std_init = varargin{1};
226 |                     params   = normrnd(0,std_init,[d_theta,1]);
227 |                 case 'Zeros' % (Only for testing)
228 |                     params = zeros(d_theta,1); 
229 |                 otherwise
230 |                     error(['There is no initialization method called ',type,' in the model object!'])
231 |             end
232 |         end
233 |     end
234 | end
235 | 
236 | 


--------------------------------------------------------------------------------
/VBLab/Models/ModelClass.m:
--------------------------------------------------------------------------------
 1 | classdef ModelClass
 2 |     %MODELCLASS Summary of this class goes here
 3 |     %   Detailed explanation goes here
 4 |     
 5 |     properties
 6 |         ModelName
 7 |         NumParam
 8 |     end
 9 |     
10 |     methods
11 |         function obj = ModelClass(inputArg1,inputArg2)
12 |             %MODELCLASS Construct an instance of this class
13 |             %   Detailed explanation goes here
14 |             obj.Property1 = inputArg1 + inputArg2;
15 |         end
16 |     end
17 |     
18 |     methods (Abstract)
19 |         llh            = logLikFnc(obj,data,params);
20 |         llh_grad       = logLikGradFnc(obj,data,params);
21 |         log_prior      = logPriorsFnc(obj,params);
22 |         log_prior_grad = logPriorsGradFnc(obj,params);
23 |         logjac         = logJacFnc(obj,params);
24 |         logjac_grad    = logJacGradFnc(obj,params);
25 |     end
26 | 
27 | end
28 | 
29 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/Distribution.m:
--------------------------------------------------------------------------------
 1 | classdef Distribution
 2 |     %DISTRIBUTION A (Abtract) Superclass to define a probability distribution
 3 |     
 4 |     properties
 5 |     end
 6 |     
 7 |     methods (Abstract)
 8 |         random_num     = rngFnc(obj,params,dim);
 9 |         llh            = logPdfFnc(obj,data,params);
10 |         llh_grad       = GradlogPdfFnc(obj,data,params);
11 |         logjac         = logJacFnc(obj,params);
12 |         logjac_grad    = GradlogJacFnc(obj,params);
13 |     end
14 |     
15 | end
16 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/Normal.m:
--------------------------------------------------------------------------------
 1 | classdef Normal < Distribution
 2 |     %NORMAL Class to compute quantities related to normal distribution
 3 |     
 4 |     properties
 5 |     end
 6 |     
 7 |     methods (Static)
 8 |         
 9 |         %% Random number generator
10 |         function random_num = rngFnc(params,dim)
11 |             random_num = normrnd(params(1),sqrt(params(2)),dim);
12 |         end
13 |         
14 |         %% Log pdf function
15 |         % input: 
16 |         %        x: Dx1 
17 |         %        params = [mean(scalar),variance(scalar)]
18 |         % output:
19 |         %        log of pdf function(scalar)
20 |         function log_pdf = logPdfFnc(x,params)
21 |             mu = params(1);
22 |             sigma2 = params(2);
23 |             d = length(x);
24 |             log_pdf = -d/2*log(2*pi)-d/2*log(sigma2)-(x-mu)'*(x-mu)/sigma2/2;
25 |         end
26 | 
27 |         %% Gradient of log pdf function
28 |         % input: 
29 |         %        x: Dx1 
30 |         %        params = [mean(scalar),variance(scalar)]
31 |         % output:
32 |         %        gradient of log pdf function: Dx1
33 |         function grad_log_pdf = GradlogPdfFnc(x,params)
34 |             mu = params(1);
35 |             sigma2 = params(2);
36 |             grad_log_pdf = -(x-mu)/sigma2;
37 |         end
38 | 
39 |         %% Log jacobian
40 |         % input: 
41 |         %        x: Dx1 
42 |         %        params = [mean(scalar),variance(scalar)]
43 |         % output:
44 |         %        log Jacobian of transformation (scalar)
45 |         function log_jac = logJacFnc(x,params)
46 |             log_jac = 0;
47 |         end
48 |         
49 |         %% Gradient of log jacobian
50 |         % input: 
51 |         %        x: Dx1 
52 |         %        params = [mean(scalar),variance(scalar)]
53 |         % output:
54 |         %        gradient of log Jacobian of transformation: Dx1 
55 |         function grad_log_jac = GradlogJacFnc(x,params)
56 |             grad_log_jac = 0;
57 |         end
58 |         
59 |         %% Transform parameters to normal distribution
60 |         % input: 
61 |         %        x: Dx1 
62 |         function params_normal = toNormalParams(x)
63 |             params_normal = x;
64 |         end
65 | 
66 |         %% Tranform normal parameters to original distribution
67 |         % input: 
68 |         %        x: Dx1 
69 |         function params_ori = toOriginalParams(x)
70 |             params_ori = x;
71 |         end
72 |         
73 |         %% Plot density given distribution parameters
74 |         % input: 
75 |         %        x: Dx1 
76 |         function plotPdf(params,varargin)
77 |             
78 |             % Extract 
79 |             mu = params(1);
80 |             sigma2 = params(2);
81 |             
82 |             xx = mu-4*sqrt(sigma2):0.001:mu+4*sqrt(sigma2);
83 |             yy = normpdf(xx,mu,sqrt(sigma2));
84 |             plot(xx,yy,'LineWidth',2)
85 |         end
86 |     end
87 | end
88 | 
89 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/readData.m:
--------------------------------------------------------------------------------
  1 | function data_out = readData(dataName, varargin)
  2 | 
  3 | % Initialize additional options
  4 | Intercept = true;  
  5 | Length = NaN;
  6 | Normalized = true;
  7 | Index = '';
  8 | Type = 'Matrix';
  9 | RealizedMeasure = '';
 10 | 
 11 | % Load user's options
 12 | if nargin > 1
 13 |     %Parse additional options
 14 |     paramNames = {'Intercept'     'Length'     'Normalized' ...
 15 |                   'Index'         'Type'       'RealizedMeasure'};
 16 |     paramDflts = { Intercept      Length       Normalized ...
 17 |                    Index          Type         RealizedMeasure };
 18 | 
 19 |     [Intercept,...
 20 |      Length,...
 21 |      Normalized,...
 22 |      Index,...
 23 |      Type,...
 24 |      RealizedMeasure] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:});                
 25 | end 
 26 | 
 27 | % Load built-in datasets
 28 | datatype = '';
 29 | switch dataName
 30 |     % Abalon data
 31 |     case 'Abalon'
 32 |         datatype = 'Cross-Sectional';
 33 |         data = load('Abalon.mat');
 34 |         data_mat = data.data;
 35 |         
 36 |     % DirectMarketing data
 37 |     case 'DirectMarketing'
 38 |         datatype = 'Cross-Sectional';
 39 |         data = load('DirectMarketing.mat');
 40 |         data_mat = data.data;
 41 |         if(Normalized)
 42 |             norm_col = [1,2,3,12];
 43 |             data_mat(:,norm_col) = zscore(data_mat(:,norm_col));
 44 |         end
 45 |         
 46 |     % GermanCredit data
 47 |     case 'GermanCredit'
 48 |         datatype = 'Cross-Sectional';
 49 |         data = load('GermanCredit.mat');
 50 |         data_mat = data.data;
 51 |         if(Normalized)
 52 |             data_mat = [zscore(data_mat(:,1:15)),data_mat(:,16:end)];
 53 |         end
 54 |         
 55 |     % LabourForce data
 56 |     case 'LabourForce'
 57 |         datatype = 'Cross-Sectional';
 58 |         data = load('LabourForce.mat');
 59 |         data_mat = data.data;
 60 |         if(Normalized)
 61 |             norm_col = [3,4,5,6];
 62 |             data_mat(:,norm_col) = (data_mat(:,norm_col)-mean(data_mat(:,norm_col)))./std(data_mat(:,norm_col));
 63 |         end       
 64 |         
 65 |     % RealizedLibrary data
 66 |     case 'RealizedLibrary'
 67 |         datatype = 'TimeSeries';
 68 |         data = load('RealizedLibrary.mat');
 69 |         % If length is specified
 70 |         if(isempty(Index))
 71 |             error('At least one index must be specified!')
 72 |         else
 73 |             data_mat = data.(Index).open_to_close*100;  
 74 |             if Length > 0 
 75 |                 T = Length;
 76 |                 if Length <= length(data_mat)
 77 |                     data_mat = data_mat(end-T+1:end);
 78 |                 else
 79 |                     error('The Length argument must be smaller than the lenght of the time series!')
 80 |                 end
 81 |             end
 82 |         end
 83 |         
 84 |         if(~isempty(RealizedMeasure))
 85 |             num_obs = length(data_mat);
 86 |             data_out.return = data_mat;
 87 |             if iscell(RealizedMeasure)
 88 |                 num_realized = length(RealizedMeasure);
 89 |                 for i = 1:num_realized
 90 |                     data_out.(RealizedMeasure{i}) = data.(Index).(RealizedMeasure{i})(end-num_obs+1:end)*100^2;
 91 |                 end
 92 |             else
 93 |                 data_out.(RealizedMeasure) = data.(Index).(RealizedMeasure)(end-num_obs+1:end)*100^2;    
 94 |             end
 95 |         else
 96 |             data_out = data_mat;
 97 |         end
 98 | end
 99 | 
100 | %% Check additional options
101 | % If a column of 1 is added to the matrix X of cross-sectional data (default)
102 | if strcmp(datatype,'Cross-Sectional') && Intercept
103 |     data_mat = [ones(size(data_mat,1),1),data_mat];
104 |     VarNames = ['Intercept',data.VarNames];
105 |     data_out = data_mat;
106 |     if strcmp(Type,'Table')
107 |         data_table = array2table(data_mat);
108 |         data_table.Properties.VariableNames = VarNames;
109 |         data_out = data_table;
110 |     end
111 | else
112 |     data_out = data_mat;
113 | end
114 | 
115 | end
116 | 
117 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/trainTestSplit.m:
--------------------------------------------------------------------------------
1 | function [outputArg1,outputArg2] = trainTestSplit(inputArg1,inputArg2)
2 | %TRAINTESTSPLIT Summary of this function goes here
3 | %   Detailed explanation goes here
4 | outputArg1 = inputArg1;
5 | outputArg2 = inputArg2;
6 | end
7 | 
8 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_FNNInitialize.m:
--------------------------------------------------------------------------------
 1 | function weights = utils_FNNInitialize(layers)
 2 | %NNINITIALIZE Summary of this function goes here
 3 | %  layers: vector of doubles, each number specifing the amount of
 4 | %  nodes in a layer of the network.
 5 | %
 6 | %  weights: cell array of weight matrices specifing the
 7 | %  translation from one layer of the network to the next.
 8 | %
 9 | %   Copyright 2018 Minh-Ngoc Tran (minh-ngoc.tran@sydney.edu.au) and Nghia
10 | %   Nguyen (nghia.nguyen@sydney.edu.au)
11 | %
12 | %   http://www.xxx.com
13 | %
14 | %   Version: 1.0
15 | %   LAST UPDATE: April, 2018
16 | 
17 | weights = cell(1, length(layers)-1);
18 | 
19 | for i = 1:length(layers)-1
20 |     % Using random weights from -b to b 
21 |     b = sqrt(6)/(layers(i)+layers(i+1));
22 |     if i==1
23 |         weights{i} = rand(layers(i+1),layers(i))*2*b - b;  % Input layer already have bias
24 |     else
25 |         weights{i} = rand(layers(i+1),layers(i)+1)*2*b - b;  % 1 bias in input layer
26 |     end
27 | end
28 | 
29 | end
30 | 
31 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_errorMsg.m:
--------------------------------------------------------------------------------
 1 | function msg_out = utils_errorMsg(identifier)
 2 | %UTILS_ERRORMSG Define custom error/warning messages for exceptions
 3 | %   UTILS_ERRORMSG = (IDENTIFIER) extract message for input indentifier
 4 | %   
 5 | %
 6 | %   Copyright 2021 Nguyen (nghia.nguyen@sydney.edu.au)
 7 | %
 8 | %   https://github.com/VBayesLab/VBLab
 9 | %
10 | %   Version: 1.0
11 | %   LAST UPDATE: Feb, 2021
12 | 
13 | switch identifier
14 |     case 'vbayeslab:TooFewInputs'
15 |         msg_out = 'At least two arguments are specified';
16 |     case 'vbayeslab:InputSizeMismatchX'
17 |         msg_out = 'X and Y must have the same number of observations';
18 |     case 'vbayeslab:InputSizeMismatchY'
19 |         msg_out = 'Y must be a single column vector';
20 |     case 'vbayeslab:ArgumentMustBePair'
21 |         msg_out = 'Optinal arguments must be pairs';
22 |     case 'vbayeslab:ResponseMustBeBinary'
23 |         msg_out = 'Two level categorical variable required';
24 |     case 'vbayeslab:DistributionMustBeBinomial'
25 |         msg_out = 'Binomial distribution option required';
26 |     case 'vbayeslab:MustSpecifyActivationFunction'
27 |         msg_out = 'Activation function type requied';
28 |     case 'vbayeslab:InitVectorMisMatched'
29 |         msg_out = 'The length of the initial values must equal to number of model parameters';
30 | end
31 | end
32 | 
33 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_gen_Sobol.m:
--------------------------------------------------------------------------------
 1 | %genertate Sobol Sequence
 2 | function X1 = utils_gen_Sobol(m,s)
 3 | N = pow2(m); % Number of points;
 4 | cmax = 52; % number of digits of generated points
 5 | 
 6 | N  = pow2(m);                             % Number of points;
 7 | P  = sobolset(s);                         % Get Sobol sequence;
 8 | P  = scramble(P,'MatousekAffineOwen');    % Scramble Sobol points;
 9 | X1 = net(P,N);
10 | 
11 | X1 = X1';
12 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_itril.m:
--------------------------------------------------------------------------------
 1 | function [I J] = utils_itril(sz, k)
 2 | % function [I J] = utils_itril(sz, k) % OR
 3 | % I = itril(sz, k)
 4 | %
 5 | % Return the subindices [I J] (or linear indices I if single output call)
 6 | % in the purpose of extracting an lower triangular part of the matrix of
 7 | % the size SZ. Input k is optional shifting. For k=0, extract from the main
 8 | % diagonal. For k>0 -> above the diagonal, k<0 -> below the diagonal
 9 | % 
10 | % This returnd same as [...] = find(tril(ones(sz),k))
11 | % - Output is a column and sorted with respect to linear indice
12 | % - No intermediate matrix is generated, that could be useful for large
13 | %   size problem
14 | % - Mathematically, A(itril(size(A)) is called (lower) "half-vectorization"
15 | %   of A 
16 | %
17 | % Example:
18 | %
19 | % A = [ 7     5     4
20 | %       4     2     3
21 | %       9     1     9
22 | %       3     5     7 ]
23 | %
24 | % I = itril(size(A))  % gives [1 2 3 4 6 7 8 11 12]'
25 | % A(I)                % gives [7 4 9 3 2 1 5  9  7]' OR A(tril(A)>0)
26 | %
27 | % Author: Bruno Luong <brunoluong@yahoo.com>
28 | % Date: 21/March/2009
29 | 
30 | if isscalar(sz)
31 |     sz = [sz sz];
32 | end
33 | m=sz(1);
34 | n=sz(2);
35 | 
36 | % Main diagonal by default
37 | if nargin<2
38 |     k=0;
39 | end
40 | 
41 | nc = min(n,m+k); % number of columns of the triangular part
42 | lo = max((1:nc).'-k,1); % lower row indice for each column
43 | hi = m + zeros(nc,1); % upper row indice for each column
44 | 
45 | if isempty(lo)
46 |     I = zeros(0,1);
47 |     J = zeros(0,1);
48 | else
49 |     c=cumsum([0; hi-lo]+1); % cumsum of the length
50 |     I = accumarray(c(1:end-1), (lo-[0; hi(1:end-1)]-1), ...
51 |                    [c(end)-1 1]);
52 |     I = cumsum(I+1); % row indice
53 |     J = cumsum(accumarray(c,1));
54 |     J = J(1:end-1); % column indice
55 | end
56 | 
57 | if nargout<2
58 |     % convert to linear indices
59 |     I = sub2ind([m n], I, J);
60 | end
61 | 
62 | end % itril
63 | 
64 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_itriu.m:
--------------------------------------------------------------------------------
 1 | function [I J] = utils_itriu(sz, k)
 2 | % function [I J] = itriu(sz) % OR
 3 | % I = itriu(sz) OR
 4 | % 
 5 | % Return the subindices [I J] (or linear indices I if single output call)
 6 | % in the purpose of extracting an upper triangular part of the matrix of
 7 | % the size SZ. Input k is optional shifting. For k=0, extract from the main
 8 | % diagonal. For k>0 -> above the diagonal, k<0 -> below the diagonal
 9 | %
10 | % This returnd same as [...] = find(triu(ones(sz),k))
11 | % - Output is a column and sorted with respect to linear indice
12 | % - No intermediate matrix is generated, that could be useful for large
13 | %   size problem
14 | % - Mathematically, A(itriu(size(A)) is called (upper) "half-vectorization"
15 | %   of A 
16 | %
17 | % Example:
18 | %
19 | % A = [ 7     5     4
20 | %       4     2     3
21 | %       9     1     9
22 | %       3     5     7 ]
23 | %
24 | % I = itriu(size(A))  % gives [1 5 6 9 10 11]'
25 | % A(I)                % gives [7 5 2 4  3  9]' OR A(triu(A)>0)
26 | %
27 | % Author: Bruno Luong <brunoluong@yahoo.com>
28 | % Date: 21/March/2009
29 | 
30 | if isscalar(sz)
31 |     sz = [sz sz];
32 | end
33 | m=sz(1);
34 | n=sz(2);
35 | 
36 | % Main diagonal by default
37 | if nargin<2
38 |     k=0;
39 | end
40 | 
41 | nc = n-max(k,0); % number of columns of the triangular part
42 | lo = ones(nc,1); % lower row indice for each column
43 | hi = min((1:nc).'-min(k,0),m); % upper row indice for each column
44 | 
45 | if isempty(lo)
46 |     I = zeros(0,1);
47 |     J = zeros(0,1);
48 | else
49 |     c=cumsum([0; hi-lo]+1); % cumsum of the length
50 |     I = accumarray(c(1:end-1), (lo-[0; hi(1:end-1)]-1), ...
51 |                    [c(end)-1 1]);
52 |     I = cumsum(I+1); % row indice
53 |     J = accumarray(c,1);
54 |     J(1) = 1 + max(k,0); % The row indices starts from this value
55 |     J = cumsum(J(1:end-1)); % column indice
56 | end
57 | 
58 | if nargout<2
59 |     % convert to linear indices
60 |     I = sub2ind([m n], I, J);
61 | end
62 | 
63 | end % itriu
64 | 
65 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_jitChol.m:
--------------------------------------------------------------------------------
 1 | function [B_var] = utils_jitChol(B_var)
 2 | %keyboard
 3 | [R,p]=chol(B_var);
 4 | if p>0
 5 |     min_eig=min(eig(B_var));
 6 |     d=size(B_var,1);
 7 |     delta=max(0,-2*min_eig+10^(-5)).*eye(d);
 8 |     B_var=B_var+delta;
 9 | else
10 |     B_var=B_var;
11 | end
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 
21 | end
22 | % if nargin < 2
23 | %   maxTries = 1000;
24 | % end
25 | % n=size(K,1); % no. of input samples
26 | % e=min(eig(K)); % minimum eigenvalue
27 | % jitter=0; % amount if jitter (noise) added to the diagonal
28 | % L=[];
29 | % 
30 | % for i=1:maxTries
31 | %     try
32 | %         L=chol(K,'lower');
33 | %     catch
34 | %         K(1:(n+1):end)=K(1:(n+1):end)+e;
35 | %         jitter=jitter+e;
36 | %         e=e*10;
37 | %         continue;
38 | %     end
39 | %     break;
40 | % end
41 | % 
42 | % K1 = K;
43 | 
44 | % if isempty(L) %if nothing was assigned in previous step,
45 | %     K(1:(n+1):end)=K(1:(n+1):end)-jitter;
46 | %     e=1e-10; jitter=0; %reset parameters
47 | %     l=max(diag(K)); K=K/l;
48 | %     for i=1:maxTries
49 | %         try
50 | %             L=chol(K,'lower');
51 | %         catch
52 | %             K(1:(n+1):end)=K(1:(n+1):end)+e;
53 | %             jitter=jitter+e;
54 | %             e=e*10;
55 | %             continue;
56 | %         end
57 | %         L=sqrt(l)*L;
58 | %         break;
59 | %     end
60 | % end
61 |         


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_logNormalpdf.m:
--------------------------------------------------------------------------------
1 | function logNormal = utils_logNormalpdf(theta,mu,sigma2)
2 | 
3 | logNormal = -0.5*log(2*pi)-0.5*log(sigma2)-0.5*(theta-mu).^2/sigma2;
4 | 
5 | end
6 | 
7 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_logit.m:
--------------------------------------------------------------------------------
1 | function output = utils_logit(input)
2 | %UTILS_LOGIT Summary of this function goes here
3 | %   Detailed explanation goes here
4 | output = log(1./(1-input));
5 | end
6 | 
7 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_normrnd_qmc.m:
--------------------------------------------------------------------------------
1 | function x = utils_normrnd_qmc(S,d)
2 | % generate Sxd matrix of standard normal numbers by RQMC
3 | rqmc = utils_rqmc_rnd(S,d);  
4 | rqmc = rqmc(1:S,:);
5 | x = norminv(rqmc); 
6 | end


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_plotShrinkage.m:
--------------------------------------------------------------------------------
 1 | function plotShrinkage(ShrinkageCoef,opt)
 2 | %PLOTSHRINKAGE Plot shrinkage coefficient of Group Lasso regularization
 3 | %
 4 | %
 5 | %   Copyright 2018 Minh-Ngoc Tran (minh-ngoc.tran@sydney.edu.au) and Nghia
 6 | %   Nguyen (nghia.nguyen@sydney.edu.au)
 7 | %   
 8 | %   http://www.xxx.com
 9 | %
10 | %   Version: 1.0
11 | %   LAST UPDATE: April, 2018
12 | 
13 | % Do not plot intercept coefficient
14 | % ShrinkageCoef = ShrinkageCoef(2:end,:);
15 | 
16 | TextTitle = opt.title;
17 | labelX = opt.labelX;
18 | labelY = opt.labelY;
19 | linewidth = opt.linewidth;
20 | color = opt.color;
21 | 
22 | numCoeff = size(ShrinkageCoef,1);   % Number of shrinkage coefficients
23 | fontsize = 13;
24 | 
25 | % Define default settings
26 | if(isempty(TextTitle))
27 |     TextTitle = 'Shrinakge Coefficients';
28 | end
29 | if(isempty(labelX))
30 |     labelX = 'Iteration';
31 | end
32 | 
33 | % Plot
34 | plot(ShrinkageCoef','LineWidth',linewidth);
35 | grid on
36 | title(TextTitle,'FontSize', 20)
37 | xlabel(labelX,'FontSize', 15)
38 | ylabel(labelY,'FontSize', 15)
39 | Ytext = ShrinkageCoef(:,end);  % Y coordination of text, different for coefficients
40 | Xtext = size(ShrinkageCoef,2); % X coordination of text, same for all coefficients 
41 | for i=1:numCoeff
42 |     text(Xtext,Ytext(i),['\gamma_{',num2str(i),'}'],'fontsize',fontsize)
43 | end
44 | end
45 | 
46 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_relu.m:
--------------------------------------------------------------------------------
1 | function out = utils_relu(in)
2 | %UTILS_RELU 
3 | out = max(0,in);
4 | end
5 | 
6 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_rqmc_rnd.m:
--------------------------------------------------------------------------------
 1 | function f = utils_rqmc_rnd(S,d)
 2 | % generate a matrix of RQMC of size S times d
 3 | max_sobol = 1111;
 4 | r = floor(d/max_sobol);
 5 | s = d-r*max_sobol;
 6 | if r>=1
 7 |     f = utils_gen_Sobol(ceil(log2(S)),max_sobol)'; 
 8 |     for i = 2:r
 9 |         f = [f,utils_gen_Sobol(ceil(log2(S)),max_sobol)']; 
10 |     end
11 |     f = [f,utils_gen_Sobol(ceil(log2(S)),s)']; 
12 | else
13 |     f = utils_gen_Sobol(ceil(log2(S)),d)'; 
14 | end
15 |     
16 | end
17 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_rs_multinomial.m:
--------------------------------------------------------------------------------
 1 | function indx = utils_rs_multinomial(w)
 2 | 
 3 | N = length(w); % number of particles
 4 | indx = zeros(1,N); % preallocate 
 5 | Q = cumsum(w); % cumulative sum
 6 | u = sort(rand(1,N)); % random numbers
 7 | 
 8 | j = 1;
 9 | for i=1:N
10 |     while (Q(j)<u(i))
11 |         j = j+1; % climb the ladder
12 |     end
13 |     indx(i) = j; % assign index
14 | end
15 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_sigmoid.m:
--------------------------------------------------------------------------------
1 | function output = utils_sigmoid(z)
2 | %UTILS_SIGMOID Summary of this function goes here
3 | %   Detailed explanation goes here
4 | output = 1.0 ./ (1.0 + exp(-z));
5 | end
6 | 
7 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_update_sigma.m:
--------------------------------------------------------------------------------
1 | function [theta] = utils_update_sigma(sigma2,acc,p,i,d)
2 | T = 200;
3 | alpha = -norminv(p/2);
4 | c = ((1-1/d)*sqrt(2*pi)*exp(alpha^2/2)/(2*alpha) + 1/(d*p*(1-p)));
5 | Theta = log(sqrt(abs(sigma2)));
6 | Theta = Theta + c*(acc-p)/max(T, i/d);
7 | theta = (exp(Theta));
8 | theta = theta^2;
9 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_vech.m:
--------------------------------------------------------------------------------
1 | function v = utils_vech(A)
2 | v = A(utils_itril(size(A)));
3 | end
4 | 


--------------------------------------------------------------------------------
/VBLab/Utilities/utils_vechinv.m:
--------------------------------------------------------------------------------
 1 | function A=utils_vechinv(x,flag)
 2 | % VechInv Reverses the vech operator
 3 | % USAGE:
 4 | %   A=vechinv(x,flag)
 5 | % INPUTS:
 6 | %   x : a vector of length d(d+1)/2 for some integer d
 7 | %   flag : an optional parameter (see below)
 8 | % OUTPUT:
 9 | %  A  : an nxn symmetric or triangular, depending on FLAG.
10 | %
11 | %  If FLAG = 0: A is a symmetrix matrix.
12 | %            1: A is an upper triangular matrix.
13 | %            2: A is a lower triangular matrix.
14 | if nargin<2 | isempty(flag), flag=0; end
15 | 
16 | d1=length(x);
17 | d=round((sqrt(1+8*d1)-1)/2);
18 | if d*(d+1)/2~=d1
19 |   error('Input is not of proper length');
20 | end
21 | 
22 | A=zeros(d,d);
23 | ind=find(tril(ones(d,d)));
24 | A(ind)=x;
25 | switch flag
26 | case 0
27 |   % symmetric
28 |   A=A+triu(A',1);
29 | case 1
30 |   % upper triangular
31 |   A=A';
32 | case 2
33 |   % lower triangular
34 |   % already done
35 | otherwise
36 |   error('Invalid flag')
37 | end


--------------------------------------------------------------------------------
/VBLab/Utilities/vbayesPlot.m:
--------------------------------------------------------------------------------
  1 | function vbayesPlot(type,value,varargin)
  2 | %VBAYESPLOT Plot analytics figures for results from VB or MCMC 
  3 | % Input: 
  4 | %     Type: string specifying type of plot, including
  5 | %           'Density': Plot density distribution. 
  6 | %           'Shrinkage': Plot shrinkage coefficients of a deepGLM model
  7 | %           'Interval': Plot prediction interval for continuous responses
  8 | %           'ROC': plot ROC curve for binary response. 'Ytest' must be provided
  9 | %
 10 | %     value: Additional information associated with each type
 11 | %           'Density' -> value is a cell array of distribution name and parameters
 12 | %           'Shrinkage' -> value is an NxD array of shrinkage parameters
 13 | %           'Interval' -> value is a 1D array of prediction values
 14 | %           'ROC' -> value iss a 1D array of prediction 
 15 | 
 16 | 
 17 |     if nargin < 2
 18 |         error(utils_errorMsg('vbayeslab:TooFewInputs'));
 19 |     end
 20 | 
 21 |     %% Parse additional options
 22 |     paramNames = {'Title'          'Xlabel'          'Ylabel'         'LineWidth',...
 23 |                   'Color'          'IntervalStyle'   'Nsample'        'Ordering',...
 24 |                   'yTest'          'Legend'          'Subplot'        'VarNames' };
 25 | 
 26 |     paramDflts = {NaN              NaN               NaN              2,...                 
 27 |                   'red'            'shade'           50               'ascend',...
 28 |                   NaN              NaN               NaN              NaN};
 29 | 
 30 |     [TextTitle,labelX,labelY,linewidth,...
 31 |      color,style,npoint,order,...
 32 |      yTest,Textlegend,VarNames] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:});
 33 | 
 34 |     % Store plot options to a structure
 35 |     opt.title = TextTitle;
 36 |     opt.labelX = labelX;
 37 |     opt.labelY = labelY;
 38 |     opt.linewidth = linewidth;
 39 |     opt.color = color;
 40 | 
 41 |     switch type
 42 |         % Plot distribution density
 43 |         % value must be a cell array with distribution name and
 44 |         % distribution parameters
 45 |         case 'Density'
 46 |             eval(['dist=',value{1},';']); % Use distribution name as a distribution object
 47 |             params = value{2};  % Distribution parameters
 48 |             dist.plotPdf(params);
 49 |                         
 50 |         % Plot shrinkage parameters of a deepGLM model 
 51 |         case 'Shrinkage'
 52 |             plotShrinkage(value,opt);
 53 |             
 54 |         % Plot prediction interval for continuous output
 55 |         case 'Interval'
 56 |             yhat = value.yhatMatrix;
 57 |             yhatInterval = value.interval;
 58 |             predMean = mean(yhat);
 59 |             % If test data have more than 100 rows, extract randomly 100 points to draw
 60 |             if(length(predMean)>=npoint)
 61 |                 idx = randperm(length(yhatInterval),npoint);
 62 |                 intervalPlot = yhatInterval(idx,:);
 63 |                 yhatMeanPlot = predMean(idx)';
 64 |                 if(~isempty(yTest))
 65 |                      ytruePlot = yTest(idx)';
 66 |                 end
 67 |             else
 68 |                 yhatMeanPlot = predMean';
 69 |                 intervalPlot = yhatInterval;
 70 |                 ytruePlot = yTest;
 71 |             end
 72 |             % Sort data
 73 |             [yhatMeanPlot,sortIdx] = sort(yhatMeanPlot,order);
 74 |             intervalPlot = intervalPlot(sortIdx,:);
 75 |             if(isempty(yTest))
 76 |                 ytruePlot = [];
 77 |             else
 78 |                 ytruePlot = ytruePlot(sortIdx);
 79 |             end
 80 |             plotInterval(yhatMeanPlot,intervalPlot,opt,...
 81 |                         'ytrue',ytruePlot,...
 82 |                         'Style',style);
 83 |                     
 84 |         % Plot ROC curve for binary outcomes
 85 |         % Value is prediction class labels. Could be a 1D array (single ROC)
 86 |         % or cell array of 1D array (multiple ROC)
 87 |         % The 'Ytest' argument must be provided
 88 |         case 'ROC'
 89 |             if(~isnumeric(yTest))
 90 |                 disp('Target should be a column of binary responses!')
 91 |                 return
 92 |             else
 93 |                 % Plot single ROC
 94 |                 if(size(value,2)==1)
 95 |                     [tpr,fpr,~] = roc(yTest',value');
 96 |                     plot(fpr,tpr,'LineWidth',linewidth);
 97 |                     grid on
 98 |                     title(TextTitle,'FontSize',20);
 99 |                     xlabel(labelX,'FontSize',15);
100 |                     ylabel(labelY,'FontSize',15);
101 |                 % Plot multiple ROC
102 |                 else
103 |                     tpr = cell(1,size(value,2));
104 |                     fpr = cell(1,size(value,2));
105 |                     for i=1:size(Pred,2)
106 |                         [tpr{i},fpr{i},~] = roc(yTest',value(:,i)');
107 |                         plot(fpr{i},tpr{i},'LineWidth',linewidth);
108 |                         grid on
109 |                         hold on
110 |                     end
111 |                     title(TextTitle,'FontSize',20);
112 |                     xlabel(labelX,'FontSize',15);
113 |                     ylabel(labelY,'FontSize',15);
114 |                     legend(Textlegend{1},Textlegend{2});
115 |                 end
116 |             end
117 |     end
118 | end
119 | 
120 | 


--------------------------------------------------------------------------------
/VBLab/VB/CGVB.m:
--------------------------------------------------------------------------------
  1 | classdef CGVB < VBayesLab
  2 |     %CGVB Summary of this class goes here
  3 |     %   Detailed explanation goes here
  4 |     
  5 |     properties
  6 |         GradWeight1        % Momentum weight 1
  7 |         GradWeight2        % Momentum weight 2
  8 |     end
  9 |     
 10 |     methods
 11 |         function obj = CGVB(mdl,data,varargin)
 12 |             %CGVB Construct an instance of this class
 13 |             %   Detailed explanation goes here
 14 |             obj.Method       = 'CGVB';
 15 |             obj.GradWeight1  = 0.9;
 16 |             obj.GradWeight2  = 0.9;
 17 |             
 18 |             % Parse additional options
 19 |             if nargin > 2
 20 |                 paramNames = {'NumSample'             'LearningRate'       'GradWeight1'     'GradWeight2' ...      
 21 |                               'MaxIter'               'MaxPatience'        'WindowSize'      'Verbose' ...        
 22 |                               'InitMethod'            'StdForInit'         'Seed'            'MeanInit' ...       
 23 |                               'SigInitScale'          'LBPlot'             'GradientMax'     'AutoDiff' ...       
 24 |                               'HFuntion'              'NumParams'          'DataTrain'       'Setting' ...
 25 |                               'StepAdaptive'          'SaveParams'};
 26 |                 paramDflts = {obj.NumSample           obj.LearningRate     obj.GradWeight1   obj.GradWeight2 ...    
 27 |                               obj.MaxIter             obj.MaxPatience      obj.WindowSize    obj.Verbose ...      
 28 |                               obj.InitMethod          obj.StdForInit       obj.Seed          obj.MeanInit ...      
 29 |                               obj.SigInitScale        obj.LBPlot           obj.GradientMax   obj.AutoDiff ...
 30 |                               obj.HFuntion            obj.NumParams        obj.DataTrain     obj.Setting  ...
 31 |                               obj.StepAdaptive        obj.SaveParams};
 32 | 
 33 |                 [obj.NumSample,...
 34 |                  obj.LearningRate,...
 35 |                  obj.GradWeight1,...
 36 |                  obj.GradWeight2,...
 37 |                  obj.MaxIter,...
 38 |                  obj.MaxPatience,...
 39 |                  obj.WindowSize,...
 40 |                  obj.Verbose,...
 41 |                  obj.InitMethod,...
 42 |                  obj.StdForInit,...
 43 |                  obj.Seed,...
 44 |                  obj.MeanInit,...
 45 |                  obj.SigInitScale,...
 46 |                  obj.LBPlot,...
 47 |                  obj.GradientMax,...
 48 |                  obj.AutoDiff,...
 49 |                  obj.HFuntion,...
 50 |                  obj.NumParams,...
 51 |                  obj.DataTrain,...
 52 |                  obj.Setting,...
 53 |                  obj.StepAdaptive,...
 54 |                  obj.SaveParams] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:});                
 55 |            end 
 56 |            
 57 |            % Check if model object or function handle is provided
 58 |            if (isobject(mdl)) % If model object is provided
 59 |                obj.Model = mdl;
 60 |                obj.ModelToFit = obj.Model.ModelName; % Set model name if model is specified
 61 |            else % If function handle is provided
 62 |                obj.GradHFuntion = mdl;
 63 |            end
 64 |                                 
 65 |            % Main function to run CGVB
 66 |            obj.Post = obj.fit(data);  
 67 |         end
 68 |         
 69 |         %% VB main function 
 70 |         function Post = fit(obj,data)
 71 |             
 72 |             % Extract model object if provided 
 73 |             if (~isempty(obj.Model))                  
 74 |                 model           = obj.Model;
 75 |                 d_theta         = model.NumParams;      % Number of parameters
 76 |             else  % If model object is not provided, number of parameters must be provided  
 77 |                 if (~isempty(obj.NumParams))
 78 |                     d_theta = obj.NumParams;
 79 |                 else
 80 |                     error('Number of model parameters have to be specified!')
 81 |                 end
 82 |             end
 83 |             
 84 |             % Unload training parameters (only for convenience)
 85 |             std_init        = obj.StdForInit;
 86 |             eps0            = obj.LearningRate;
 87 |             S               = obj.NumSample;
 88 |             ini_mu          = obj.MeanInit;
 89 |             window_size     = obj.WindowSize;
 90 |             max_patience    = obj.MaxPatience;
 91 |             init_scale      = obj.SigInitScale;
 92 |             tau_threshold   = obj.StepAdaptive;
 93 |             max_iter        = obj.MaxIter;
 94 |             lb_plot         = obj.LBPlot;
 95 |             max_grad        = obj.GradientMax;
 96 |             momentum_beta1  = obj.GradWeight1;
 97 |             momentum_beta2  = obj.GradWeight2;
 98 |             grad_hfunc      = obj.GradHFuntion;
 99 |             setting         = obj.Setting;
100 |             verbose         = obj.Verbose;
101 |             save_params     = obj.SaveParams;
102 |             
103 |             % Store variational mean in each iteration (if specified)
104 |             if(save_params)
105 |                 params_iter = zeros(max_iter,d_theta);
106 |             end  
107 |             
108 |             % Initialization
109 |             iter        = 0;              
110 |             patience    = 0;
111 |             stop        = false; 
112 |             LB_smooth   = 0;
113 |             lambda_best = [];
114 |             
115 |             % Number of variational parameters
116 |             d_lambda = d_theta + d_theta*(d_theta+1)/2;
117 | 
118 |             % Initialization of mu
119 |             % If initial parameters are not specified, then randomly
120 |             % initialize variational parameters
121 |             if isempty(ini_mu)
122 |                 mu = normrnd(0,std_init,d_theta,1);
123 |             else % If initial parameters are provided
124 |                 if (length(ini_mu) ~= d_theta)
125 |                     error(utils_errorMsg('vbayeslab:InitVectorMisMatched'))
126 |                 else
127 |                     mu = reshape(ini_mu,d_theta,1); % Must be a colums vector
128 |                 end
129 |             end
130 |             
131 |             % Initialize variational parameters
132 |             L      = init_scale*eye(d_theta);
133 |             lambda = [mu;vech(L)];
134 | 
135 |             % Pre-allocation
136 |             grad_LB  = zeros(S,d_lambda);
137 |             h_lambda = zeros(S,1);
138 |             rqmc     = normrnd(0,1,S,d_theta); 
139 |   
140 |             for s = 1:S  
141 |                 % Parameters in Normal distribution
142 |                 varepsilon = rqmc(s,:)';
143 |                 theta      = mu+L*varepsilon;  % Theta -> Dx1 column
144 | 
145 |                 % Gradient of q_lambda. This function is independent to the
146 |                 % model 
147 |                 [grad_log_q,log_q] = obj.log_q_grad(theta,mu,L);
148 |                 
149 |                 % If handle of function to compute gradient of h(theta),
150 |                 % then a model object with method of calculating gradient
151 |                 % of h(theta) must be provided.
152 |                 if isempty(grad_hfunc)
153 |                     if (~isempty(obj.Model)) 
154 |                         % Call the hFunctionGrad of the model to compute
155 |                         % h(theta) and gradient of h(theta)
156 |                         [grad_h_theta,h_theta] = model.hFunctionGrad(data,theta);   
157 |                     else
158 |                         error('An model object of handle of function to compute gradient of h(theta) must be provided!')
159 |                     end                   
160 |                 else
161 |                     % If user provide function to directly compute gradient
162 |                     % h theta then use it
163 |                     [grad_h_theta,h_theta] = grad_hfunc(data,theta,setting);
164 |                 end
165 |                 
166 |                 % Make sure gradient is a column
167 |                 grad_h_theta = reshape(grad_h_theta,length(grad_h_theta),1);
168 |                 
169 |                 % Compute h_lambda and gradient of h_lambda
170 |                 h_lambda(s) = h_theta - log_q;
171 |                 grad_h_lambda = grad_h_theta - grad_log_q ;
172 |                     
173 |                 % Gradient of lowerbound
174 |                 grad_LB(s,:)  = [grad_h_lambda;utils_vech(grad_h_lambda*(varepsilon'))]';
175 |                 
176 |             end
177 |             grad_LB = mean(grad_LB)';
178 |             LB      = mean(h_lambda);
179 | 
180 |             % Gradient clipping to avoid exploded gradient
181 |             grad_norm = norm(grad_LB);
182 |             if norm(grad_LB) > max_grad
183 |                 grad_LB = (max_grad/grad_norm)*grad_LB;
184 |             end
185 | 
186 |             g_adaptive     = grad_LB; 
187 |             v_adaptive     = g_adaptive.^2; 
188 |             g_bar_adaptive = g_adaptive; 
189 |             v_bar_adaptive = v_adaptive; 
190 |                      
191 |             % Run main VB iterations 
192 |             while ~stop                   
193 |                 
194 |                 iter = iter+1;
195 |                 mu   = lambda(1:d_theta);
196 |                 L    = utils_vechinv(lambda(d_theta+1:end),2);
197 | 
198 |                 grad_LB  = zeros(S,d_lambda);
199 |                 h_lambda = zeros(S,1);
200 |                 rqmc     = normrnd(0,1,S,d_theta); 
201 |                 for s = 1:S    
202 |                     % Parameters in Normal distribution
203 |                     varepsilon = rqmc(s,:)';
204 |                     theta      = mu+L*varepsilon;
205 | 
206 |                     % Gradient of q_lambda. This function is independent to the
207 |                     % model 
208 |                     [grad_log_q,log_q] = obj.log_q_grad(theta,mu,L);
209 | 
210 |                     % If handle of function to compute gradient of h(theta),
211 |                     % then a model object with method of calculating gradient
212 |                     % of h(theta) must be provided.
213 |                     if isempty(grad_hfunc)
214 |                         if (~isempty(obj.Model)) 
215 |                             % Call the hFunctionGrad of the model to compute
216 |                             % h(theta) and gradient of h(theta)
217 |                             [grad_h_theta,h_theta] = model.hFunctionGrad(data,theta);   
218 |                         else
219 |                             error('An model object of handle of function to compute gradient of h(theta) must be provided!')
220 |                         end                   
221 |                     else
222 |                         % If user provide function to directly compute gradient
223 |                         % h theta then use it
224 |                         [grad_h_theta,h_theta] = grad_hfunc(data,theta,setting);
225 |                     end
226 | 
227 |                     % Make sure gradient is a column
228 |                     grad_h_theta = reshape(grad_h_theta,length(grad_h_theta),1);
229 | 
230 |                     % Compute h_lambda and gradient of h_lambda
231 |                     h_lambda(s) = h_theta - log_q;
232 |                     grad_h_lambda = grad_h_theta - grad_log_q ;
233 | 
234 |                     % Gradient of lowerbound
235 |                     grad_LB(s,:)  = [grad_h_lambda;utils_vech(grad_h_lambda*(varepsilon'))]';
236 |                 end
237 |                 
238 |                 grad_LB = mean(grad_LB)';
239 | 
240 |                 % gradient clipping
241 |                 grad_norm = norm(grad_LB);    
242 |                 if norm(grad_LB)>max_grad
243 |                     grad_LB = (max_grad/grad_norm)*grad_LB;
244 |                 end
245 | 
246 |                 g_adaptive     = grad_LB; 
247 |                 v_adaptive     = g_adaptive.^2; 
248 |                 g_bar_adaptive = momentum_beta1*g_bar_adaptive+(1-momentum_beta1)*g_adaptive;
249 |                 v_bar_adaptive = momentum_beta2*v_bar_adaptive+(1-momentum_beta2)*v_adaptive;
250 | 
251 |                 % After a specified number of iterations, make the step
252 |                 % size smaller. This can be modified to implement more
253 |                 % sotiphicated adaptive learning rate methods.
254 |                 if iter>=tau_threshold
255 |                     stepsize = eps0*tau_threshold/iter;
256 |                 else
257 |                     stepsize = eps0;
258 |                 end
259 |                 
260 |                 % Update new lambda
261 |                 lambda = lambda + stepsize*g_bar_adaptive./sqrt(v_bar_adaptive);
262 |                 
263 |                 % Estimate the lowerbound at the current iteration
264 |                 LB(iter) = mean(h_lambda);
265 | 
266 |                 % Smooth the lowerbound
267 |                 if iter>=window_size
268 |                     LB_smooth(iter-window_size+1) = mean(LB(iter-window_size+1:iter));
269 |                 end
270 | 
271 |                 % Check for early stopping
272 |                 if (iter>window_size)&&(LB_smooth(iter-window_size+1)>=max(LB_smooth))
273 |                     lambda_best = lambda;
274 |                     patience = 0;
275 |                 else
276 |                     patience = patience+1;
277 |                 end
278 | 
279 |                 if (patience>max_patience)||(iter>max_iter) 
280 |                     stop = true; 
281 |                 end 
282 | 
283 |                 % Display training information
284 |                 if(verbose)
285 |                     if iter> window_size
286 |                         disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB_smooth(iter-window_size))])
287 |                     else
288 |                         disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB(iter))])
289 |                     end
290 |                 end
291 |                 
292 |                 % If users want to save variational mean in each iteration
293 |                 % Only use when debuging code
294 |                 if(save_params)
295 |                     params_iter(iter,:) = mu;
296 |                 end
297 |             end
298 |             
299 |             % Store output 
300 |             if(save_params)
301 |                 Post.muIter = params_iter(1:iter-1,:);
302 |             end
303 | 
304 |             % If the algorithm stops too early
305 |             if(isempty(lambda_best))
306 |                 lambda_best = lambda;
307 |             end
308 |             
309 |             % Store final results
310 |             Post.LB_smooth = LB_smooth;
311 |             Post.LB        = LB;
312 |             Post.lambda    = lambda_best;
313 |             Post.mu        = lambda(1:d_theta);
314 |             Post.L         = utils_vechinv(lambda(d_theta+1:end),2);
315 |             Post.Sigma     = L*(L');
316 |             Post.sigma2    = diag(Post.Sigma);
317 |             
318 |             % If users want to plot the lowerbound
319 |             if(lb_plot)
320 |                 obj.plot_lb(LB_smooth);
321 |             end
322 |             
323 |         end
324 |         
325 |         %% Gradient of log_q_lambda. This is independent to the model
326 |         % Log pdf of multivariate normal distribution
327 |         function [grad_log_q,log_q] = log_q_grad(obj,theta,mu,L)
328 |             d          = length(theta);
329 |             Sigma      = L*(L');
330 |             log_q      = -d/2*log(2*pi)-1/2*log(det(Sigma))-1/2*(theta-mu)'*(Sigma\(theta-mu));
331 |             grad_log_q = -Sigma\(theta-mu);            
332 |         end        
333 |     end
334 | end
335 | 
336 | 


--------------------------------------------------------------------------------
/VBLab/VB/MGVB.m:
--------------------------------------------------------------------------------
  1 | classdef MGVB < VBayesLab
  2 |     %MVB Summary of this class goes here
  3 |     %   Detailed explanation goes here
  4 |     
  5 |     properties
  6 |         GradClipInit       % If doing gradient clipping at the beginning
  7 |     end
  8 |     
  9 |     methods
 10 |         function obj = MGVB(mdl,data,varargin)
 11 |             %MVB Construct an instance of this class
 12 |             %   Detailed explanation goes here
 13 |             obj.Method        = 'MGVB';
 14 |             obj.GradWeight    = 0.4;    % Small gradient weight is better
 15 |             obj.GradClipInit  = 0;      % Sometimes we need to clip the gradient early
 16 |             
 17 |             % Parse additional options
 18 |             if nargin > 2
 19 |                 paramNames = {'NumSample'             'LearningRate'       'GradWeight'      'GradClipInit' ...      
 20 |                               'MaxIter'               'MaxPatience'        'WindowSize'      'Verbose' ...        
 21 |                               'InitMethod'            'StdForInit'         'Seed'            'MeanInit' ...       
 22 |                               'SigInitScale'          'LBPlot'             'GradientMax' ...
 23 |                               'NumParams'             'DataTrain'          'Setting'         'StepAdaptive' ...
 24 |                               'SaveParams'};
 25 |                 paramDflts = {obj.NumSample            obj.LearningRate    obj.GradWeight    obj.GradClipInit ...    
 26 |                               obj.MaxIter              obj.MaxPatience     obj.WindowSize    obj.Verbose ...      
 27 |                               obj.InitMethod           obj.StdForInit      obj.Seed          obj.MeanInit ...      
 28 |                               obj.SigInitScale         obj.LBPlot          obj.GradientMax  ...
 29 |                               obj.NumParams            obj.DataTrain       obj.Setting       obj.StepAdaptive ...
 30 |                               obj.SaveParams};
 31 | 
 32 |                 [obj.NumSample,...
 33 |                  obj.LearningRate,...
 34 |                  obj.GradWeight,...
 35 |                  obj.GradClipInit,...
 36 |                  obj.MaxIter,...
 37 |                  obj.MaxPatience,...
 38 |                  obj.WindowSize,...
 39 |                  obj.Verbose,...
 40 |                  obj.InitMethod,...
 41 |                  obj.StdForInit,...
 42 |                  obj.Seed,...
 43 |                  obj.MeanInit,...
 44 |                  obj.SigInitScale,...
 45 |                  obj.LBPlot,...
 46 |                  obj.GradientMax,...
 47 |                  obj.NumParams,...
 48 |                  obj.DataTrain,...
 49 |                  obj.Setting,...
 50 |                  obj.StepAdaptive,...
 51 |                  obj.SaveParams] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:});                
 52 |            end 
 53 |            
 54 |            % Check if model object or function handle is provided
 55 |            if (isobject(mdl)) % If model object is provided
 56 |                obj.Model = mdl;
 57 |                obj.ModelToFit = obj.Model.ModelName; % Set model name if model is specified
 58 |            else % If function handle is provided
 59 |                obj.HFuntion = mdl;
 60 |            end
 61 |            
 62 |            % Main function to run MGVB
 63 |            obj.Post   = obj.fit(data);             
 64 |         end
 65 |         
 66 |         %% VB main function 
 67 |         function Post = fit(obj,data)
 68 | 
 69 |             % Extract model object if provided 
 70 |             if (~isempty(obj.Model))                  
 71 |                 model           = obj.Model;
 72 |                 d_theta         = model.NumParams;      % Number of parameters
 73 |             else  % If model object is not provided, number of parameters must be provided  
 74 |                 if (~isempty(obj.NumParams))
 75 |                     d_theta = obj.NumParams;
 76 |                 else
 77 |                     error('Number of model parameters have to be specified!')
 78 |                 end
 79 |             end
 80 |             
 81 |             % Extract sampling setting
 82 |             std_init        = obj.StdForInit;
 83 |             eps0            = obj.LearningRate;
 84 |             S               = obj.NumSample;
 85 |             ini_mu          = obj.MeanInit;
 86 |             window_size     = obj.WindowSize;
 87 |             max_patience    = obj.MaxPatience;
 88 |             momentum_weight = obj.GradWeight;
 89 |             init_scale      = obj.SigInitScale;
 90 |             stepsize_adapt  = obj.StepAdaptive;
 91 |             max_iter        = obj.MaxIter;
 92 |             lb_plot         = obj.LBPlot;
 93 |             max_grad        = obj.GradientMax;
 94 |             max_grad_init   = obj.GradClipInit;
 95 |             hfunc           = obj.HFuntion;
 96 |             setting         = obj.Setting;
 97 |             verbose         = obj.Verbose;
 98 |             save_params     = obj.SaveParams;     
 99 | 
100 |             % Store variational mean in each iteration (if specified)
101 |             if(save_params)
102 |                 params_iter = zeros(max_iter,d_theta);
103 |             end  
104 |             
105 |             % Initialization
106 |             iter      = 0;              
107 |             patience  = 0;
108 |             stop      = false; 
109 |             LB_smooth = 0;
110 |             
111 |             % Initialization of mu
112 |             % If initial parameters are not specified, then use some
113 |             % initialization methods
114 |             if isempty(ini_mu)
115 |                 mu = normrnd(0,std_init,d_theta,1);
116 |             else % If initial parameters are provided
117 |                 mu = ini_mu;
118 |             end
119 |             
120 |             Sig     = init_scale*eye(d_theta); % Initialization of Sig
121 |             c12     = zeros(1,d_theta+d_theta*d_theta);   % Control variate, initilised to be all zero
122 |             Sig_inv = eye(d_theta)/Sig;
123 |             
124 |             gra_log_q_lambda         = zeros(S,d_theta+d_theta*d_theta); % Gradient of log_q
125 |             grad_log_q_h_function    = zeros(S,d_theta+d_theta*d_theta); % (gradient of log_q) x h(theta)
126 |             grad_log_q_h_function_cv = zeros(S,d_theta+d_theta*d_theta);                   % Control_variate version: (gradient of log_q) x (h(theta)-c)
127 |             
128 |             rqmc = utils_normrnd_qmc(S,d_theta);      % Generate standard normal numbers, using quasi-MC
129 |             C_lower = chol(Sig,'lower');
130 |             
131 |             for s = 1:S
132 |                 % Parameters in Normal distribution
133 |                 theta = mu + C_lower*rqmc(s,:)';
134 |                 
135 |                 % If handle of function to compute h(theta) is not provided, 
136 |                 % then a model object with method of calculating  of h(theta) 
137 |                 % must be provided.
138 |                 if isempty(hfunc)
139 |                     if (~isempty(obj.Model)) 
140 |                         % Call the hFunction of the model to compute h(theta)
141 |                         h_theta = model.hFunction(data,theta);   
142 |                     else
143 |                         error('An model object of handle of function to compute gradient of h(theta) must be provided!')
144 |                     end                   
145 |                 else
146 |                     % If user provide function to directly compute h(theta)
147 |                     % then use it
148 |                     h_theta = hfunc(data,theta,setting);
149 |                 end
150 |                 
151 |                 % Log q_lambda
152 |                 log_q_lambda = -d_theta/2*log(2*pi)-1/2*log(det(Sig))-1/2*(theta-mu)'*Sig_inv*(theta-mu);
153 |                 
154 |                 % h function
155 |                 h_function = h_theta - log_q_lambda;
156 | 
157 |                 aux                           = Sig_inv*(theta-mu);
158 |                 gra_log_q_mu                  = aux;
159 |                 gra_log_q_Sig                 = -1/2*Sig_inv+1/2*aux*(aux');    
160 |                 gra_log_q_lambda(s,:)         = [gra_log_q_mu;gra_log_q_Sig(:)]';
161 |                 grad_log_q_h_function(s,:)    = gra_log_q_lambda(s,:)*h_function;    
162 |                 grad_log_q_h_function_cv(s,:) = gra_log_q_lambda(s,:).*(h_function-c12);    
163 |             end
164 |             
165 |             c12 = zeros(1,d_theta+d_theta*d_theta); 
166 |             for i = 1:d_theta+d_theta*d_theta
167 |                 aa     = cov(grad_log_q_h_function(:,i),gra_log_q_lambda(:,i));
168 |                 c12(i) = aa(1,2)/aa(2,2);
169 |             end
170 |             Y12 = mean(grad_log_q_h_function_cv)'; % Euclidiance gradient of lower bounf LB
171 |             
172 |             % Gradient clipping at the beginning
173 |             if(max_grad_init>0)
174 |                 grad_norm = norm(Y12);
175 |                 norm_gradient_threshold = max_grad_init;
176 |                 if grad_norm>norm_gradient_threshold
177 |                     Y12 = (norm_gradient_threshold/grad_norm)*Y12;
178 |                 end
179 |             end
180 | 
181 |             % To use manifold GVB for other models, all we need is Euclidiance gradient
182 |             % of LB. All the other stuff below are model-independent.
183 |             gradLB_mu           = Sig*Y12(1:d_theta);                % Natural gradient of LB w.r.t. mu
184 |             gradLB_Sig          = Sig*reshape(Y12(d_theta+1:end),d_theta,d_theta)*Sig; % Natural gradient of LB w.r.t. Sigma
185 |             gradLB_Sig_momentum = gradLB_Sig;                        % Initialise momentum gradient for Sig
186 |             gradLB_mu_momentum  = gradLB_mu;                         % initialise momentum gradient for Sig
187 | 
188 |             % Prepare for the next iterations
189 |             mu_best   = mu; 
190 |             Sig_best  = Sig; 
191 |             while ~stop   
192 |                 
193 |                 iter = iter+1;    
194 |                 if iter>stepsize_adapt
195 |                     stepsize = eps0*stepsize_adapt/iter;
196 |                 else
197 |                     stepsize = eps0;
198 |                 end    
199 |                 Sig_old = Sig;    
200 |                 Sig     = obj.retraction_spd(Sig_old,gradLB_Sig_momentum,stepsize); % retraction to update Sigma
201 |                 mu      = mu + stepsize*gradLB_mu_momentum;                       % update mu
202 | 
203 |                 gra_log_q_lambda         = zeros(S,d_theta + d_theta*d_theta); 
204 |                 grad_log_q_h_function    = zeros(S,d_theta + d_theta*d_theta); 
205 |                 grad_log_q_h_function_cv = zeros(S,d_theta + d_theta*d_theta); % control_variate
206 |                 
207 |                 lb_log_h = zeros(S,1);
208 |                 Sig_inv  = eye(d_theta)/Sig;
209 |                 rqmc     = utils_normrnd_qmc(S,d_theta);      
210 |                 C_lower  = chol(Sig,'lower');
211 |                 for s = 1:S    
212 |                     % Parameters in Normal distribution
213 |                     theta = mu + C_lower*rqmc(s,:)';
214 |                     
215 |                     % If handle of function to compute h(theta) is not provided, 
216 |                     % then a model object with method of calculating  of h(theta) 
217 |                     % must be provided.
218 |                     if isempty(hfunc)
219 |                         if (~isempty(obj.Model)) 
220 |                             % Call the hFunction of the model to compute h(theta)
221 |                             h_theta = model.hFunction(data,theta);   
222 |                         else
223 |                             error('An model object of handle of function to compute gradient of h(theta) must be provided!')
224 |                         end                   
225 |                     else
226 |                         % If user provide function to directly compute h(theta)
227 |                         % then use it
228 |                         h_theta = hfunc(data,theta,setting);
229 |                     end
230 |                     
231 |                     % log q_lambda
232 |                     log_q_lambda = -d_theta/2*log(2*pi)-1/2*log(det(Sig))-1/2*(theta-mu)'*Sig_inv*(theta-mu);
233 |                     
234 |                     h_function = h_theta - log_q_lambda;
235 |                     
236 |                     % To compute the lowerbound
237 |                     lb_log_h(s) = h_function;
238 | 
239 |                     aux                           = Sig_inv*(theta-mu);
240 |                     gra_log_q_mu                  = aux;
241 |                     gra_log_q_Sig                 = -1/2*Sig_inv+1/2*aux*(aux');    
242 |                     gra_log_q_lambda(s,:)         = [gra_log_q_mu;gra_log_q_Sig(:)]';
243 |                     grad_log_q_h_function(s,:)    = gra_log_q_lambda(s,:)*h_function;    
244 |                     grad_log_q_h_function_cv(s,:) = gra_log_q_lambda(s,:).*(h_function-c12);
245 |                 end
246 |                 for i = 1:d_theta+d_theta*d_theta
247 |                     aa = cov(grad_log_q_h_function(:,i),gra_log_q_lambda(:,i));
248 |                     c12(i) = aa(1,2)/aa(2,2);
249 |                 end  
250 |                 Y12 = mean(grad_log_q_h_function_cv)';
251 |                 
252 |                 % Clipping the gradient
253 |                 grad_norm               = norm(Y12);
254 |                 norm_gradient_threshold = max_grad;
255 |                 if grad_norm > norm_gradient_threshold
256 |                     Y12 = (norm_gradient_threshold/grad_norm)*Y12;
257 |                 end
258 | 
259 |                 gradLB_mu  = Sig*Y12(1:d_theta);
260 |                 gradLB_Sig = Sig*reshape(Y12(d_theta+1:end),d_theta,d_theta)*Sig;
261 | 
262 |                 zeta = obj.parallel_transport_spd(Sig_old,Sig,gradLB_Sig_momentum); % vector transport to move gradLB_Sig_momentum
263 |                 
264 |                 % From previous Sig_old to new point Sigma
265 |                 gradLB_Sig_momentum = momentum_weight*zeta+(1-momentum_weight)*gradLB_Sig; % update momentum grad for Sigma
266 |                 gradLB_mu_momentum  = momentum_weight*gradLB_mu_momentum+(1-momentum_weight)*gradLB_mu; % update momentum grad for mu
267 | 
268 |                 % Lower bound
269 |                 LB(iter) = mean(lb_log_h);
270 |                 
271 |                 % Smooth the lowerbound and store best results 
272 |                 if iter>window_size
273 |                     LB_smooth(iter-window_size) = mean(LB(iter-window_size:iter));    % smooth out LB by moving average
274 |                     if LB_smooth(iter-window_size)>=max(LB_smooth)
275 |                         mu_best  = mu; 
276 |                         Sig_best = Sig;
277 |                         patience = 0;
278 |                     else
279 |                         patience = patience + 1;
280 |                     end
281 |                 end
282 | 
283 |                 if (patience>max_patience)||(iter>max_iter) 
284 |                     stop = true; 
285 |                 end   
286 |                 
287 |                 % Display training information
288 |                 if(verbose)
289 |                     if iter> window_size
290 |                         disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB_smooth(iter-window_size))])
291 |                     else
292 |                         disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB(iter))])
293 |                     end
294 |                 end
295 |                 
296 |                 % If users want to save variational mean in each iteration
297 |                 % Only use when debuging code
298 |                 if(save_params)
299 |                     params_iter(iter,:) = mu;
300 |                 end
301 |                 
302 |             end
303 |  
304 |             % Store output 
305 |             if(save_params)
306 |                 Post.muIter = params_iter(1:iter-1,:);
307 |             end
308 |             
309 |             % Store output
310 |             Post.LB_smooth = LB_smooth;
311 |             Post.LB        = LB;
312 |             Post.mu        = mu_best; 
313 |             Post.Sigma     = Sig_best;
314 |             Post.sigma2    = diag(Post.Sigma);
315 |             
316 |             % Plot lowerbound
317 |             if(lb_plot)
318 |                 obj.plot_lb(LB_smooth);
319 |             end
320 |         end
321 |         
322 |         %% 
323 |         function zeta = parallel_transport_spd(obj,X, Y, eta)
324 |             E    = sqrtm((Y/X));
325 |             zeta = E*eta*E';
326 |         end
327 |         
328 |         %%
329 |         function Y = retraction_spd(obj,X, eta, t)
330 |             teta      = t*eta;
331 |             symm      = @(X) .5*(X+X');
332 |             Y         = symm(X + teta + .5*teta*(X\teta));
333 |             [~,index] = chol(Y);
334 |             iter      = 1; 
335 |             max_iter  = 5;
336 |             while (index)&&(iter<=max_iter)
337 |                 iter      = iter+1;
338 |                 t         = t/2;
339 |                 teta      = t*eta;
340 |                 Y         = symm(X + teta + .5*teta*(X\teta));
341 |                 [~,index] = chol(Y);
342 |             end   
343 |             if iter >= max_iter
344 |                 Y = X;
345 |             end
346 |         end
347 |     end
348 | end
349 | 
350 | 


--------------------------------------------------------------------------------
/VBLab/VB/NAGVAC.m:
--------------------------------------------------------------------------------
  1 | classdef NAGVAC < VBayesLab
  2 |     %NAGVAC Summary of this class goes here
  3 |     %   Detailed explanation goes here
  4 |     
  5 |     properties
  6 |         GradClipInit       % If doing gradient clipping at the beginning
  7 |     end
  8 |     
  9 |     methods
 10 |         function obj = NAGVAC(mdl,data,varargin)
 11 |             %NAGVAC Construct an instance of this class
 12 |             %   Detailed explanation goes here
 13 |             obj.Method       = 'NAGVAC';
 14 |             obj.WindowSize   = 30;
 15 |             obj.NumSample    = 10;
 16 |             obj.LearningRate = 0.01;
 17 |             obj.MaxIter      = 5000;
 18 |             obj.MaxPatience  = 20;
 19 |             obj.StdForInit   = 0.01;
 20 |             obj.StepAdaptive = obj.MaxIter/2;
 21 |             obj.GradWeight   = 0.9;
 22 |             obj.LBPlot       = true;
 23 |             obj.GradientMax  = 100; 
 24 |             obj.InitMethod   = 'Random';
 25 |             obj.Verbose      = true;
 26 |             obj.SaveParams   = false;
 27 |  
 28 |             % Parse additional options
 29 |             if nargin > 2
 30 |                 paramNames = {'NumSample'             'LearningRate'       'GradWeight'      'GradClipInit'...      
 31 |                               'MaxIter'               'MaxPatience'        'WindowSize'      'Verbose' ...        
 32 |                               'InitMethod'            'StdForInit'         'Seed'            'MeanInit' ...       
 33 |                               'SigInitScale'          'LBPlot'             'GradientMax'     'AutoDiff' ...       
 34 |                               'HFuntion'              'NumParams'          'DataTrain'       'Setting'...
 35 |                               'StepAdaptive'          'SaveParams'};
 36 |                 paramDflts = {obj.NumSample            obj.LearningRate    obj.GradWeight    obj.GradClipInit ...    
 37 |                               obj.MaxIter              obj.MaxPatience     obj.WindowSize    obj.Verbose ...      
 38 |                               obj.InitMethod           obj.StdForInit      obj.Seed          obj.MeanInit ...      
 39 |                               obj.SigInitScale         obj.LBPlot          obj.GradientMax   obj.AutoDiff ...
 40 |                               obj.HFuntion             obj.NumParams       obj.DataTrain     obj.Setting ...
 41 |                               obj.StepAdaptive         obj.SaveParams};
 42 | 
 43 |                 [obj.NumSample,...
 44 |                  obj.LearningRate,...
 45 |                  obj.GradWeight,...
 46 |                  obj.GradClipInit,...
 47 |                  obj.MaxIter,...
 48 |                  obj.MaxPatience,...
 49 |                  obj.WindowSize,...
 50 |                  obj.Verbose,...
 51 |                  obj.InitMethod,...
 52 |                  obj.StdForInit,...
 53 |                  obj.Seed,...
 54 |                  obj.MeanInit,...
 55 |                  obj.SigInitScale,...
 56 |                  obj.LBPlot,...
 57 |                  obj.GradientMax,...
 58 |                  obj.AutoDiff,...
 59 |                  obj.HFuntion,...
 60 |                  obj.NumParams,...
 61 |                  obj.DataTrain,...
 62 |                  obj.Setting,...
 63 |                  obj.StepAdaptive,...
 64 |                  obj.SaveParams] = internal.stats.parseArgs(paramNames, paramDflts, varargin{:});                
 65 |            end        
 66 |            
 67 |            % Check if model object or function handle is provided
 68 |            if (isobject(mdl)) % If model object is provided
 69 |                obj.Model = mdl;
 70 |                obj.ModelToFit = obj.Model.ModelName; % Set model name if model is specified
 71 |            else % If function handle is provided
 72 |                obj.GradHFuntion = mdl;
 73 |            end
 74 |            
 75 |            % Main function to run NAGVAC
 76 |            obj.Post = obj.fit(data);  
 77 |             
 78 |         end
 79 |         
 80 |         %% VB main function
 81 |         function Post = fit(obj,data)
 82 | 
 83 |             % Extract model object if provided 
 84 |             if (~isempty(obj.Model))                  
 85 |                 model           = obj.Model;
 86 |                 d_theta         = model.NumParams;      % Number of parameters
 87 |             else  % If model object is not provided, number of parameters must be provided  
 88 |                 if (~isempty(obj.NumParams))
 89 |                     d_theta = obj.NumParams;
 90 |                 else
 91 |                     error('Number of model parameters have to be specified!')
 92 |                 end
 93 |             end
 94 |             
 95 |             % Extract sampling setting
 96 |             std_init        = obj.StdForInit;
 97 |             eps0            = obj.LearningRate;
 98 |             S               = obj.NumSample;
 99 |             ini_mu          = obj.MeanInit;
100 |             window_size     = obj.WindowSize;
101 |             max_patience    = obj.MaxPatience;
102 |             init_scale      = obj.SigInitScale;
103 |             momentum_weight = obj.GradWeight;
104 |             tau_threshold   = obj.StepAdaptive;
105 |             max_iter        = obj.MaxIter;
106 |             lb_plot         = obj.LBPlot;
107 |             max_grad        = obj.GradientMax;
108 |             grad_hfunc      = obj.GradHFuntion;
109 |             setting         = obj.Setting;
110 |             verbose         = obj.Verbose;
111 |             save_params     = obj.SaveParams;
112 |  
113 |             % Store variational mean in each iteration (if specified)
114 |             if(save_params)
115 |                 params_iter = zeros(max_iter,d_theta);
116 |             end  
117 |             
118 |             % Initialization
119 |             iter        = 1;              
120 |             patience    = 0;
121 |             stop        = false; 
122 |             LB_smooth   = 0;
123 |             lambda_best = [];
124 |             
125 |             % Initialization of mu
126 |             % If initial parameters are not specified, then use some
127 |             % initialization methods
128 |             if isempty(ini_mu)
129 |                 mu = normrnd(0,std_init,d_theta,1);
130 |             else % If initial parameters are provided
131 |                 mu = ini_mu;
132 |             end
133 |             
134 |             b = normrnd(0,std_init,d_theta,1);     
135 |             c = init_scale*ones(d_theta,1);
136 |             
137 |             lambda             = [mu;b;c];            % Variational parameters vector
138 |             lambda_seq(iter,:) = lambda';
139 | 
140 |             % Store all setting to a structure
141 |             param(iter,:) = mu';
142 | 
143 |             %% First VB iteration
144 |             rqmc          = normrnd(0,1,S,d_theta+1); 
145 |             grad_lb_iter  = zeros(S,3*d_theta);       % Store gradient of lb over S MC simulations
146 |             lb_first_term = zeros(S,1);               % To estimate the first term in lb = E_q(log f)-E_q(log q)
147 | 
148 |             for s = 1:S
149 |                 % Parameters in Normal distribution
150 |                 U_normal = rqmc(s,:)';
151 |                 epsilon1 = U_normal(1);
152 |                 epsilon2 = U_normal(2:end);
153 |                 theta    = mu + b*epsilon1 + c.*epsilon2;  % Compute Theta
154 |                 
155 |                 % If handle of function to compute gradient of h(theta),
156 |                 % then a model object with method of calculating gradient
157 |                 % of h(theta) must be provided.
158 |                 if isempty(grad_hfunc)
159 |                     if (~isempty(obj.Model)) 
160 |                         % Call the hFunctionGrad of the model to compute
161 |                         % h(theta) and gradient of h(theta)
162 |                         [grad_h_theta,h_theta] = model.hFunctionGrad(data,theta);   
163 |                     else
164 |                         error('An model object of handle of function to compute gradient of h(theta) must be provided!')
165 |                     end                   
166 |                 else
167 |                     % If user provide function to directly compute gradient
168 |                     % h theta then use it
169 |                     [grad_h_theta,h_theta] = grad_hfunc(data,theta,setting);
170 |                 end
171 | 
172 |                 % Gradient of  log variational distribution
173 |                 grad_log_q = obj.grad_log_q_function(b,c,theta,mu);
174 | 
175 |                 % Gradient of h(theta) and lowerbound
176 |                 grad_theta        = grad_h_theta - grad_log_q;
177 |                 grad_lb_iter(s,:) = [grad_theta;epsilon1*grad_theta;epsilon2.*grad_theta]';
178 | 
179 |                 % for lower bound
180 |                 lb_first_term(s) = h_theta;
181 |                 
182 |             end
183 |             
184 |             % Estimation of lowerbound
185 |             logdet   = log(det(1 + (b./(c.^2))'*b)) + sum(log((c.^2)));
186 |             lb_log_q = -0.5*d_theta*log(2*pi) - 0.5*logdet - d_theta/2; % Mean of log-q -> mean(log q(theta))
187 |             LB(iter) = mean(lb_first_term) - lb_log_q;
188 | 
189 |             % Gradient of log variational distribution
190 |             grad_lb         = (mean(grad_lb_iter))';
191 |             gradient_lambda = obj.inverse_fisher_times_grad(b,c,grad_lb);
192 |             gradient_bar    = gradient_lambda;
193 | 
194 |             %% Main VB loop
195 |             while ~stop
196 |                 
197 |                 % If users want to save variational mean in each iteration
198 |                 % Only use when debuging code
199 |                 if(save_params)
200 |                     params_iter(iter,:) = mu;
201 |                 end
202 |                 
203 |                 iter = iter + 1;
204 |                 rqmc = normrnd(0,1,S,d_theta+1); 
205 |                 grad_lb_iter  = zeros(S,3*d_theta); % store gradient of lb over S MC simulations
206 |                 lb_first_term = zeros(S,1); % to estimate the first term in lb = E_q(log f)-E_q(log q)
207 |                 for s=1:S
208 |                     % Parameters in Normal distribution
209 |                     U_normal = rqmc(s,:)';
210 |                     epsilon1 = U_normal(1);
211 |                     epsilon2 = U_normal(2:end);
212 |                     theta    = mu + b*epsilon1 + c.*epsilon2;
213 | 
214 |                     % If handle of function to compute gradient of h(theta),
215 |                     % then a model object with method of calculating gradient
216 |                     % of h(theta) must be provided.
217 |                     if isempty(grad_hfunc)
218 |                         if (~isempty(obj.Model)) 
219 |                             % Call the hFunctionGrad of the model to compute
220 |                             % h(theta) and gradient of h(theta)
221 |                             [grad_h_theta,h_theta] = model.hFunctionGrad(data,theta);   
222 |                         else
223 |                             error('An model object of handle of function to compute gradient of h(theta) must be provided!')
224 |                         end                   
225 |                     else
226 |                         % If user provide function to directly compute gradient
227 |                         % h theta then use it
228 |                         [grad_h_theta,h_theta] = grad_hfunc(data,theta,setting);
229 |                     end
230 | 
231 |                     % Gradient of  log variational distribution
232 |                     grad_log_q = obj.grad_log_q_function(b,c,theta,mu);
233 | 
234 |                     % Gradient of h(theta) and lowerbound
235 |                     grad_theta        = grad_h_theta - grad_log_q;
236 |                     grad_lb_iter(s,:) = [grad_theta;epsilon1*grad_theta;epsilon2.*grad_theta]';
237 | 
238 |                     % for lower bound
239 |                     lb_first_term(s) = h_theta;
240 |                 end
241 | 
242 |                 % Estimation of lowerbound
243 |                 logdet   = log(det(1 + (b./(c.^2))'*b)) + sum(log((c.^2)));
244 |                 lb_log_q = -0.5*d_theta*log(2*pi) - 0.5*logdet - d_theta/2; % Mean of log-q -> mean(log q(theta))
245 |                 LB(iter) = mean(lb_first_term) - lb_log_q;
246 | 
247 |                 % Gradient of log variational distribution
248 |                 grad_lb         = (mean(grad_lb_iter))';
249 |                 gradient_lambda = obj.inverse_fisher_times_grad(b,c,grad_lb);
250 | 
251 |                 % Gradient clipping
252 |                 grad_norm = norm(gradient_lambda);
253 |                 norm_gradient_threshold = max_grad;
254 |                 if norm(gradient_lambda) > norm_gradient_threshold
255 |                     gradient_lambda = (norm_gradient_threshold/grad_norm)*gradient_lambda;
256 |                 end
257 | 
258 |                 gradient_bar = momentum_weight*gradient_bar + (1-momentum_weight)*gradient_lambda;     
259 | 
260 |                 if iter > tau_threshold
261 |                     stepsize = eps0*tau_threshold/iter;
262 |                 else
263 |                     stepsize = eps0;
264 |                 end
265 |                 lambda = lambda + stepsize*gradient_bar;
266 |                 lambda_seq(iter,:) = lambda';
267 | 
268 |                 % Reconstruct variantional parameters
269 |                 mu = lambda(1:d_theta,1);
270 |                 b  = lambda(d_theta+1:2*d_theta,1);
271 |                 c  = lambda(2*d_theta+1:end);
272 | 
273 |                 % Store parameters in each iteration    
274 |                 param(iter,:) = mu';
275 | 
276 |                 if iter > window_size  
277 |                     LB_smooth(iter-window_size) = mean(LB(iter-window_size+1:iter));
278 |                     if LB_smooth(end)>= max(LB_smooth)
279 |                         lambda_best = lambda;
280 |                         patience = 0;
281 |                     else
282 |                         patience = patience + 1;
283 |                     end   
284 |                 end
285 |                 if (patience>max_patience)||(iter>max_iter) 
286 |                     stop = true;
287 |                 end
288 | 
289 |                 % Display training information
290 |                 if(verbose)
291 |                     if iter> window_size
292 |                         disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB_smooth(iter-window_size))])
293 |                     else
294 |                         disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB(iter))])
295 |                     end
296 |                 end
297 |                
298 |             end
299 |             
300 |             % Store output 
301 |             if(save_params)
302 |                 Post.muIter = params_iter(1:iter-1,:);
303 |             end
304 | 
305 |             % If the algorithm stops too early
306 |             if(isempty(lambda_best))
307 |                 lambda_best = lambda;
308 |             end
309 |             
310 |             % Store final results
311 |             Post.LB_smooth = LB_smooth;
312 |             Post.LB        = LB; 
313 |             Post.lambda    = lambda_best;
314 |             Post.mu        = lambda_best(1:d_theta); 
315 |             Post.b         = lambda_best(d_theta+1:2*d_theta);
316 |             Post.c         = lambda_best(2*d_theta+1:end);
317 |             Post.Sigma     = Post.b*Post.b' + diag(Post.c.^2);
318 |             Post.sigma2    = diag(Post.Sigma);
319 |             
320 |             % Plot lowerbound
321 |             if(lb_plot)
322 |                 obj.plot_lb(LB_smooth);
323 |             end            
324 |             
325 |         end
326 |         
327 |         %% Obtain samples from the estimate VB
328 |         %  index: Indexes of parameter 
329 |         function Sample = sampleFromVB(obj,Post,n)            
330 |             mu    = Post.mu;
331 |             b     = Post.b;
332 |             c     = Post.c;
333 |             Sigma = b*b'+ diag(c.^2);
334 |             
335 |             Sample = mvnrnd(mu,Sigma,n);
336 |         end      
337 |         
338 |         %% I^-1 x grad
339 |         function prod = inverse_fisher_times_grad(obj,b,c,grad)
340 |             d     = length(b);
341 |             grad1 = grad(1:d);
342 |             grad2 = grad(d+1:2*d);
343 |             grad3 = grad(2*d+1:end);
344 | 
345 |             c2 = c.^2;
346 |             b2 = b.^2;
347 | 
348 |             prod1 = (b'*grad1)*b+(grad1.*c2);
349 | 
350 |             alpha     = 1/(1+sum(b2./c2));
351 |             Cminus    = diag(1./c2);
352 |             Cminus_b  = b./c2;
353 |             Sigma_inv = Cminus-alpha*(Cminus_b*Cminus_b');
354 | 
355 |             A11_inv = (1/(1-alpha))*((1-1/(sum(b2)+1-alpha))*(b*b')+diag(c2));
356 | 
357 |             C   = diag(c);
358 |             A12 = 2*(C*Sigma_inv*b*ones(1,d)).*Sigma_inv;
359 |             A21 = A12';
360 |             A22 = 2*C*(Sigma_inv.*Sigma_inv)*C;
361 | 
362 |             D     = A22-A21*A11_inv*A12;
363 |             prod2 = A11_inv*grad2+(A11_inv*A12)*(D\A21)*(A11_inv*grad2)-(A11_inv*A12)*(D\grad3);
364 |             prod3 = -(D\A21)*(A11_inv*grad2)+D\grad3;
365 | 
366 |             prod  = [prod1;prod2;prod3];            
367 |         end
368 |         
369 |         %% Gradient of log q_lambda
370 |         function grad_log_q = grad_log_q_function(obj,b,c,theta,mu)
371 |             x          = theta - mu;
372 |             d          = b./c.^2;
373 |             grad_log_q = -x./c.^2+(d'*x)/(1+(d'*b))*d;
374 |         end
375 |         
376 |     end
377 | end
378 | 
379 | 


--------------------------------------------------------------------------------
/VBLab/VB/VAFC.m:
--------------------------------------------------------------------------------
  1 | classdef VAFC < VBayesLab
  2 |     %VAFC Summary of this class goes here
  3 |     %   Detailed explanation goes here
  4 |     
  5 |     properties
  6 |         NumFactor          % Number of factors
  7 |         Adelta             % If ADADELTA is used for optimization
  8 |     end
  9 |     
 10 |     methods
 11 |         function obj = VAFC(mdl,data,varargin)
 12 |             %CGVB Construct an instance of this class
 13 |             %   Detailed explanation goes here
 14 |             obj.Method       = 'VAFC';
 15 |             obj.NumFactor    = 4;
 16 |             obj.Adelta.rho   = 0.95;
 17 |             obj.Adelta.eps   = 10^-6;
 18 |             obj.Optimization = 'Simple';  % Could be 'Adelta'
 19 |             obj.SigInitScale = 0.01;
 20 |             
 21 |             % Parse additional options
 22 |             if nargin > 2
 23 |                 %Parse additional options
 24 |                 paramNames = {'NumSample'             'LearningRate'       'GradWeight'      ...      
 25 |                               'MaxIter'               'MaxPatience'        'WindowSize'      'Verbose' ...        
 26 |                               'InitMethod'            'StdForInit'         'Seed'            'MeanInit' ...       
 27 |                               'SigInitScale'          'LBPlot'             'GradientMax'     'AutoDiff' ...       
 28 |                               'HFuntion'              'NumParams'          'DataTrain'       'Setting' ...
 29 |                               'StepAdaptive'          'NumFactor'          'SaveParams'      'Optimization'};
 30 |                 paramDflts = {obj.NumSample            obj.LearningRate    obj.GradWeight    ...    
 31 |                               obj.MaxIter              obj.MaxPatience     obj.WindowSize    obj.Verbose ...      
 32 |                               obj.InitMethod           obj.StdForInit      obj.Seed          obj.MeanInit ...      
 33 |                               obj.SigInitScale         obj.LBPlot          obj.GradientMax   obj.AutoDiff ...
 34 |                               obj.HFuntion             obj.NumParams       obj.DataTrain     obj.Setting ...
 35 |                               obj.StepAdaptive         obj.NumFactor       obj.SaveParams    obj.Optimization};
 36 | 
 37 |                 [obj.NumSample, obj.LearningRate,obj.GradWeight,...
 38 |                  obj.MaxIter, obj.MaxPatience, obj.WindowSize,obj.Verbose,...
 39 |                  obj.InitMethod, obj.StdForInit, obj.Seed, obj.MeanInit,...
 40 |                  obj.SigInitScale, obj.LBPlot, obj.GradientMax, obj.AutoDiff,...
 41 |                  obj.HFuntion, obj.NumParams, obj.DataTrain, obj.Setting,...
 42 |                  obj.StepAdaptive, obj.NumFactor, obj.SaveParams, obj.Optimization] ...
 43 |                                         = internal.stats.parseArgs(paramNames, paramDflts, varargin{:});                
 44 |            end 
 45 |            
 46 |            % Check if model object or function handle is provided
 47 |            if (isobject(mdl)) % If model object is provided
 48 |                obj.Model = mdl;
 49 |                obj.ModelToFit = obj.Model.ModelName; % Set model name if model is specified
 50 |            else % If function handle is provided
 51 |                obj.GradHFuntion = mdl;
 52 |            end
 53 |            
 54 |            % Main function to run CGVB
 55 |            obj.Post = obj.fit(data);
 56 |            
 57 |         end
 58 |         
 59 |         %% VB main function
 60 |         function Post = fit(obj,data)
 61 | 
 62 |             % Extract model object if provided
 63 |             if (~isempty(obj.Model))                   % If instance of a model is provided
 64 |                 model           = obj.Model;
 65 |                 d_theta         = model.NumParams;      % Number of parameters
 66 |             else                                       %   
 67 |                 if (~isempty(obj.NumParams))
 68 |                     d_theta = obj.NumParams;
 69 |                 else
 70 |                     error('Number of model parameters have to be specified!')
 71 |                 end
 72 |             end
 73 |             
 74 |             % Unload training parameters (only for convenience)
 75 |             std_init        = obj.StdForInit;
 76 |             eps0            = obj.LearningRate;
 77 |             S               = obj.NumSample;
 78 |             ini_mu          = obj.MeanInit;
 79 |             window_size     = obj.WindowSize;
 80 |             max_patience    = obj.MaxPatience;
 81 |             init_scale      = obj.SigInitScale;
 82 |             tau_threshold   = obj.StepAdaptive;
 83 |             max_iter        = obj.MaxIter;
 84 |             lb_plot         = obj.LBPlot;
 85 |             max_grad        = obj.GradientMax;
 86 |             momentum_weight = obj.GradWeight;
 87 |             num_factor      = obj.NumFactor;
 88 |             grad_hfunc      = obj.GradHFuntion;
 89 |             setting         = obj.Setting;
 90 |             opt             = obj.Optimization;
 91 |             verbose         = obj.Verbose;
 92 |             save_params     = obj.SaveParams;   
 93 | 
 94 |             % Store variational mean in each iteration (if specified)
 95 |             if(save_params)
 96 |                 params_iter = zeros(max_iter,d_theta);
 97 |             end  
 98 |             
 99 |             % Initialization
100 |             iter        = 1;              
101 |             patience    = 0;
102 |             stop        = false; 
103 |             LB_smooth   = 0;
104 |             lambda_best = [];
105 |             
106 |             % Initialization of mu
107 |             % If initial parameters are not specified, then use some
108 |             % initialization methods
109 |             if isempty(ini_mu)
110 |                 mu = normrnd(0,std_init,d_theta,1);
111 |             else % If initial parameters are provided
112 |                 mu = ini_mu;
113 |             end
114 |             B = normrnd(0,std_init,d_theta,num_factor);
115 |             c = init_scale*ones(d_theta,1);  
116 |                 
117 |             %  Column vector variational parameters
118 |             lambda = [mu;B(:);c]; 
119 |             
120 |             if (strcmp(opt,'Adelta'))
121 |                 % ADADELTA parameter
122 |                 rho            = obj.Adelta.rho;
123 |                 eps_step       = obj.Adelta.eps;
124 |                 Edelta2_lambda = zeros(length(lambda),1);
125 |                 Eg2_lambda     = zeros(length(lambda),1);
126 |             end
127 |             
128 | 
129 |             % Store all setting to a structure
130 |             param(iter,:) = mu';
131 | 
132 |             %% First VB iteration
133 |             lb_iter         = zeros(S,1);              
134 |             grad_lb_mu_iter = zeros(S,d_theta);   
135 |             grad_lb_B_iter  = zeros(S,d_theta*num_factor); 
136 |             grad_lb_c_iter  = zeros(S,d_theta);   
137 |             
138 |             % To compute log q_lambda
139 |             Dinv2B = bsxfun(@times,B,1./c.^2);
140 |             Blogdet = log(det(eye(num_factor) + bsxfun(@times,B, 1./(c.^2))'*B)) + sum(log((c.^2)));
141 |     
142 |             rqmc = normrnd(0,1,S,d_theta+num_factor); 
143 |             for s = 1:S
144 |                 % Compute model parameters from variational parameters
145 |                 U_normal = rqmc(s,:)';
146 |                 epsilon1 = U_normal(1:num_factor);
147 |                 epsilon2 = U_normal((num_factor+1):end);
148 |                 theta    = mu + B*epsilon1 + c.*epsilon2;  % Compute theta
149 |            
150 |                 % If handle of function to compute gradient of h(theta),
151 |                 % then a model object with method of calculating gradient
152 |                 % of h(theta) must be provided.
153 |                 if isempty(grad_hfunc)
154 |                     if (~isempty(obj.Model)) 
155 |                         % Call the hFunctionGrad of the model to compute
156 |                         % h(theta) and gradient of h(theta)
157 |                         [grad_h_theta,h_theta] = model.hFunctionGrad(data,theta);   
158 |                     else
159 |                         error('An model object of handle of function to compute gradient of h(theta) must be provided!')
160 |                     end                   
161 |                 else
162 |                     % If user provide function to directly compute gradient
163 |                     % h theta then use it
164 |                     [grad_h_theta,h_theta] = grad_hfunc(data,theta,setting);
165 |                 end
166 |                 
167 |                 % Gradient of  log variational distribution
168 |                 [L_mu,L_B,L_c] = obj.grad_log_q_function(B,c,epsilon1,epsilon2,grad_h_theta);
169 |                  
170 |                 % Gradient of lowerbound
171 |                 grad_lb_mu_iter(s,:) = L_mu;
172 |                 grad_lb_B_iter(s,:)  = L_B(:);
173 |                 grad_lb_c_iter(s,:)  = L_c;
174 |               
175 |                 % For lower bound
176 |                 Bz_deps      = theta - mu;
177 |                 DBz_deps     = bsxfun(@times,Bz_deps,1./c.^2); 
178 |                 Half1        = DBz_deps;
179 |                 Half2        = Dinv2B/(eye(num_factor) + B'*Dinv2B)*B'*DBz_deps;
180 |                 log_q_lambda = - d_theta/2*log(2*pi) - 1/2*Blogdet - 1/2*Bz_deps'*(Half1-Half2);
181 |                 lb_iter(s)   = h_theta - log_q_lambda;
182 |             end
183 |             
184 |             % Estimation of lowerbound
185 |             LB(iter) = mean(lb_iter);
186 | 
187 |             % Gradient of log variational distribution
188 |             grad_lb_mu = mean(grad_lb_mu_iter,1)';
189 |             grad_lb_B  = mean(grad_lb_B_iter,1)';
190 |             grad_lb_c  = mean(grad_lb_c_iter,1)';
191 | 
192 |             % Natural gradient
193 |             gradient_lambda    = obj.inv_fisher_grad_multifactor(B,c,grad_lb_mu,grad_lb_B,grad_lb_c);
194 |             norm_gradient      = norm(gradient_lambda);
195 |             norm_gradient_seq1 = norm_gradient;
196 |             gradient_bar       = gradient_lambda;
197 | 
198 |             %% Main VB loop
199 |             while ~stop 
200 |                 
201 |                 iter = iter + 1;   
202 |                 
203 |                 % To compute log q_lambda
204 |                 Dinv2B = bsxfun(@times,B,1./c.^2);
205 |                 Blogdet = log(det(eye(num_factor) + bsxfun(@times,B, 1./(c.^2))'*B)) + sum(log((c.^2)));
206 |             
207 |                 rqmc = normrnd(0,1,S,d_theta+num_factor); 
208 |                 for s=1:S
209 |                     % Compute model parameters from variational parameters
210 |                     U_normal = rqmc(s,:)';
211 |                     epsilon1 = U_normal(1:num_factor);
212 |                     epsilon2 = U_normal((num_factor+1):end);
213 |                     theta    = mu + B*epsilon1 + c.*epsilon2; 
214 | 
215 |                     % If handle of function to compute gradient of h(theta),
216 |                     % then a model object with method of calculating gradient
217 |                     % of h(theta) must be provided.
218 |                     if isempty(grad_hfunc)
219 |                         if (~isempty(obj.Model)) 
220 |                             % Call the hFunctionGrad of the model to compute
221 |                             % h(theta) and gradient of h(theta)
222 |                             [grad_h_theta,h_theta] = model.hFunctionGrad(data,theta);   
223 |                         else
224 |                             error('An model object of handle of function to compute gradient of h(theta) must be provided!')
225 |                         end                   
226 |                     else
227 |                         % If user provide function to directly compute gradient
228 |                         % h theta then use it
229 |                         [grad_h_theta,h_theta] = grad_hfunc(data,theta,setting);
230 |                     end
231 |                 
232 |                     % Gradient of  log variational distribution
233 |                     [L_mu,L_B,L_c] = obj.grad_log_q_function(B,c,epsilon1,epsilon2,grad_h_theta);
234 |  
235 |                     % Gradient of lowerbound
236 |                     grad_lb_mu_iter(s,:) = L_mu;
237 |                     grad_lb_B_iter(s,:)  = L_B(:);
238 |                     grad_lb_c_iter(s,:)  = L_c;
239 | 
240 |                     % For lower bound
241 |                     Bz_deps      = theta - mu;
242 |                     DBz_deps     = bsxfun(@times,Bz_deps,1./c.^2); 
243 |                     Half1        = DBz_deps;
244 |                     Half2        = Dinv2B/(eye(num_factor) + B'*Dinv2B)*B'*DBz_deps;
245 |                     log_q_lambda = - d_theta/2*log(2*pi) - 1/2*Blogdet - 1/2*Bz_deps'*(Half1-Half2);
246 |                     lb_iter(s)   = h_theta - log_q_lambda;
247 |                 end
248 |                 
249 |                 % Estimation of lowerbound
250 |                 LB(iter) = mean(lb_iter);
251 | 
252 |                 % Gradient of log variational distribution
253 |                 grad_lb_mu = mean(grad_lb_mu_iter,1)';
254 |                 grad_lb_B  = mean(grad_lb_B_iter,1)';
255 |                 grad_lb_c  = mean(grad_lb_c_iter,1)';
256 | 
257 |                 gradient_lambda          = obj.inv_fisher_grad_multifactor(B,c,grad_lb_mu,grad_lb_B,grad_lb_c);
258 |                 grad_norm_current        = norm(gradient_lambda);
259 |                 norm_gradient_seq1(iter) = grad_norm_current;
260 |                 if norm(gradient_lambda)>max_grad
261 |                     gradient_lambda = (max_grad/norm(gradient_lambda))*gradient_lambda;
262 |                 end
263 |                 norm_gradient = norm_gradient+norm(gradient_lambda);    
264 |                 gradient_bar  = momentum_weight*gradient_bar+(1-momentum_weight)*gradient_lambda;
265 |                 
266 |                 if (strcmp(opt,'Adelta'))
267 |                     % ADADELTA            
268 |                     grad_lb = mean(grad_lb_iter,1)';
269 |                     Eg2_lambda     = rho*Eg2_lambda + (1-rho)*grad_lb.^2;
270 |                     temp           = sqrt(Edelta2_lambda + eps_step)./sqrt(Eg2_lambda+eps_step);
271 |                     d_lambda       = temp.*grad_lb;
272 |                     lambda         = lambda + d_lambda;
273 |                     Edelta2_lambda = rho*Edelta2_lambda + (1-rho)*d_lambda.^2;
274 |                 else
275 |                     if iter>tau_threshold
276 |                         stepsize = eps0*tau_threshold/iter;
277 |                     else
278 |                         stepsize = eps0;
279 |                     end
280 |                     lambda = lambda + stepsize*gradient_bar;
281 |                 end
282 | 
283 |                 % Reconstruct variantional parameters
284 |                 mu   = lambda(1:d_theta,1);
285 |                 vecB = lambda(d_theta+1:d_theta+d_theta*num_factor,1);
286 |                 B    = reshape(vecB,d_theta,num_factor);
287 |                 c    = lambda(d_theta+d_theta*num_factor+1:end,1);
288 | 
289 |                 % Store parameters in each iteration    
290 |                 param(iter,:) = mu';
291 |                 
292 |                 % Smooth the lowerbound
293 |                 if iter>=window_size
294 |                     LB_smooth(iter-window_size+1) = mean(LB(iter-window_size+1:iter));
295 |                 end
296 | 
297 |                 % Check for early stopping
298 |                 if (iter>window_size)&&(LB_smooth(iter-window_size+1)>=max(LB_smooth))
299 |                     lambda_best = lambda;
300 |                     patience = 0;
301 |                 else
302 |                     patience = patience+1;
303 |                 end
304 | 
305 |                 if (patience>max_patience)||(iter>max_iter) 
306 |                     stop = true; 
307 |                 end 
308 | 
309 |                 % Display training information
310 |                 if(verbose)
311 |                     if iter> window_size
312 |                         disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB_smooth(iter-window_size))])
313 |                     else
314 |                         disp(['Iter: ',num2str(iter),'| LB: ',num2str(LB(iter))])
315 |                     end
316 |                 end
317 |                 
318 |                 % If users want to save variational mean in each iteration
319 |                 % Only use when debuging code
320 |                 if(save_params)
321 |                     params_iter(iter,:) = mu;
322 |                 end
323 |             end
324 |             
325 |             % Store output 
326 |             if(save_params)
327 |                 Post.muIter = params_iter(1:iter-1,:);
328 |             end
329 |             
330 |             % If the algorithm stops too early
331 |             if(isempty(lambda_best))
332 |                 lambda_best = lambda;
333 |             end
334 |             
335 |             Post.LB_smooth = LB_smooth;
336 |             Post.LB        = LB; 
337 |             Post.lambda    = lambda_best;
338 |             Post.mu        = lambda_best(1:d_theta,1);
339 |             Post.B         = reshape(lambda_best(d_theta+1:d_theta+d_theta*num_factor,1),d_theta,num_factor);
340 |             Post.c         = lambda_best(d_theta+d_theta*num_factor+1:end,1);
341 |             Post.params    = param;
342 |             Post.Sigma     = Post.B*Post.B' + diag(Post.c.^2);
343 |             Post.sigma2    = diag(Post.Sigma);
344 |             
345 |             % If users want to plot the lowerbound
346 |             if(lb_plot)
347 |                 obj.plot_lb(LB_smooth);
348 |             end            
349 |         end
350 |         
351 |         %% Gradient of log q_lambda
352 |         function [L_mu,L_B,L_c] = grad_log_q_function(obj,B,c,epsilon1,epsilon2,grad_log_h)
353 |             
354 |             Bz_deps  = B*epsilon1 + c.*eps;  % theta-mu
355 |             Dinv2B   = bsxfun(@times,B,1./c.^2); %D^-2*B
356 |             DBz_deps = bsxfun(@times,Bz_deps,1./c.^2);  %D^-2 * Bz_deps
357 | 
358 |             Half1 = DBz_deps;
359 |             Half2 = Dinv2B/(eye(obj.NumFactor) + B'*Dinv2B)*B'*DBz_deps;
360 |             L_mu  = grad_log_h + (Half1-Half2);
361 |             L_B   = grad_log_h*epsilon1'+(Half1-Half2)*epsilon1';
362 |             L_c   = grad_log_h.*epsilon2 + (Half1 - Half2).*epsilon2;
363 |             
364 |         end
365 |         
366 |         function prod = inv_fisher_grad_multifactor(obj,B,c,grad1,grad2,grad3)
367 |             % function prod = inverse_fisher_times_grad(b,c,grad)
368 |             % compute the product inverse_fisher x grad
369 |             % B: dxp matrix where p<<d
370 |             [d,p] = size(B);
371 | 
372 |             % I11 = Siginv = D^(-2) - D^(-2)*B/(eye(p) + B'*D^(-2)*B)*B'*D^(-2);
373 |             Dinv2B = bsxfun(@times,B,1./c.^2); %D^-2*B
374 |             Siginv = diag(1./c.^2) - Dinv2B/(eye(p) + B'*Dinv2B)*Dinv2B';
375 |             I11 = Siginv;
376 | 
377 |             % I22
378 |             I22 = 2*kron((B'*Siginv*B),Siginv);
379 | 
380 |             % I33
381 |             Siginv_approx = Siginv.*eye(d);  % Approximate Siginv by zeroing all off-diagonal
382 |             Siginv_approx_D = bsxfun(@times,Siginv_approx',c)'; % Siginv_approx * D
383 |             D_Siginv_approx = bsxfun(@times,Siginv_approx,c);  % D * Siginv_approx
384 |             I33 = 2*D_Siginv_approx .* Siginv_approx_D;
385 | 
386 |             prod1 = I11\grad1;
387 |             prod2 = I22\grad2;
388 |             prod3 = I33\grad3;
389 |             prod = [prod1;prod2;prod3];
390 |         end
391 |     end
392 | end
393 | 
394 | 


--------------------------------------------------------------------------------
/VBLab/VB/VBayesLab.m:
--------------------------------------------------------------------------------
 1 | classdef VBayesLab
 2 |     %MODEL Abstract classes for models
 3 |     
 4 |     properties 
 5 |         Method             % VB method -> a name
 6 |         Model              % Instance of the model to sample from
 7 |         ModelToFit         % Name of model to be fitted
 8 |         NumSample          % Number of samples to estimate the likelihood and gradient of likelihood
 9 |         GradWeight         % Momentum weight
10 |         LearningRate       % Learning rate decay factor 
11 |         MaxIter            % Maximum number of VB iterations
12 |         MaxPatience        % Maximum number of patiences for early stopping
13 |         WindowSize         % Smoothing window
14 |         ParamsInit         % Initial values of model parameters
15 |         NumParams          % Number of model parameters
16 |         Seed               % Random seed
17 |         Post               % Struct to store estimation results
18 |         Verbose            % Turn on of off printed message during sampling phase
19 |         StdForInit         % Std of the normal distribution to initialize VB params
20 |         MeanInit           % Pre-specified values of mean(theta)
21 |         SigInitScale       % A constant to scale up or down std of normal distribution
22 |         StepAdaptive       % From this iteration, stepsize is reduced 
23 |         LBPlot             % If user wants to plot the lowerbound at the end
24 |         GradientMax        % For gradient clipping
25 |         InitMethod         % Method to initialize mu (variational mean)
26 |         AutoDiff           % Turn on/off automatic differentiation
27 |         HFuntion           % Instance of function to compute h(theta)
28 |         GradHFuntion       % Instance of function to compute gradient of h(theta)
29 |         DataTrain          % Training data
30 |         Setting            % Struct to store additional setting to the model
31 |         SaveParams         % If save parameters in all iterations or not
32 |         Optimization       % Optimization method
33 |     end
34 |     
35 |     methods
36 |         function obj = VBayesLab(varargin)
37 |             %MODEL Construct an instance of this class
38 |             %   Detailed explanation goes here
39 |             obj.AutoDiff     = false;
40 |             obj.GradientMax  = 100; 
41 |             obj.GradWeight   = 0.9;
42 |             obj.InitMethod   = 'Random';
43 |             obj.LBPlot       = true;
44 |             obj.LearningRate = 0.001;
45 |             obj.MaxIter      = 5000;
46 |             obj.MaxPatience  = 20;
47 |             obj.NumSample    = 50;
48 |             obj.StdForInit   = 0.01;
49 |             obj.SigInitScale = 0.1;
50 |             obj.StepAdaptive = obj.MaxIter/2;
51 |             obj.SaveParams   = false;
52 |             obj.Verbose      = true;
53 |             obj.WindowSize   = 30;
54 |         end
55 |         
56 |         %% Plot lowerbound
57 |         % Call this after running VB 
58 |         function plot_lb(obj,lb)
59 |             plot(lb,'LineWidth',2)
60 |             if(~isempty(obj.Model))
61 |                 title(['Lower bound ',obj.Method ,' - ',obj.Model.ModelName])
62 |             else
63 |                 title('Lower bound')
64 |             end
65 |             xlabel('Iterations')
66 |             ylabel('Lower bound')
67 |         end
68 |     end
69 |     
70 | end
71 | 
72 | 


--------------------------------------------------------------------------------