├── README.md
├── checkStackedAECost_nonneg.m
├── feedForwardAutoencoder.m
├── initializeParameters_nonneg.m
├── loadMNISTImages.m
├── loadMNISTLabels.m
├── main.m
├── minFunc
    ├── ArmijoBacktrack.m
    ├── WolfeLineSearch.m
    ├── autoGrad.m
    ├── autoHess.m
    ├── autoHv.m
    ├── autoTensor.m
    ├── callOutput.m
    ├── conjGrad.m
    ├── dampedUpdate.m
    ├── example_minFunc.m
    ├── example_minFunc_LR.m
    ├── isLegal.m
    ├── lbfgs.m
    ├── lbfgsC.c
    ├── lbfgsC.mexa64
    ├── lbfgsC.mexglx
    ├── lbfgsC.mexmac
    ├── lbfgsC.mexmaci
    ├── lbfgsC.mexmaci64
    ├── lbfgsC.mexw32
    ├── lbfgsC.mexw64
    ├── lbfgsUpdate.m
    ├── logistic
    │   ├── LogisticDiagPrecond.m
    │   ├── LogisticHv.m
    │   ├── LogisticLoss.m
    │   ├── mexutil.c
    │   ├── mexutil.h
    │   ├── mylogsumexp.m
    │   ├── repmatC.c
    │   ├── repmatC.dll
    │   ├── repmatC.mexglx
    │   └── repmatC.mexmac
    ├── mchol.m
    ├── mcholC.c
    ├── mcholC.mexmaci64
    ├── mcholC.mexw32
    ├── mcholC.mexw64
    ├── mcholinc.m
    ├── minFunc.m
    ├── minFunc_processInputOptions.m
    ├── polyinterp.m
    ├── precondDiag.m
    ├── precondTriu.m
    ├── precondTriuDiag.m
    ├── rosenbrock.m
    └── taylorModel.m
├── mnist
    ├── t10k-images.idx3-ubyte
    ├── t10k-labels.idx1-ubyte
    ├── train-images.idx3-ubyte
    └── train-labels.idx1-ubyte
├── params2stack.m
├── softmax
    ├── computeNumericalGradient.m
    ├── softmaxCost_nonneg.m
    ├── softmaxPredict.m
    └── softmaxTrain_nonneg.m
├── sparseAutoencoderCost_nonneg.m
├── stack2params.m
├── stackedAECost_nonneg.m
└── stackedAEPredict.m


/README.md:
--------------------------------------------------------------------------------
 1 | # Nonnegativity-Constrained-Autoencoder-NCAE
 2 | Matlab code for implementing Nonnegativity Constrained Autoencoder (NCAE) for Part-based Deep Learning.
 3 | 
 4 | Reference:
 5 | 
 6 | [1] Hosseini-Asl, E.; Zurada, J.M.; Nasraoui, O., "Deep Learning of Part-Based Representation of Data Using Sparse Autoencoders With Nonnegativity Constraints," in Neural Networks and Learning Systems, IEEE Transactions on , vol.PP, no.99, pp.1-13
 7 | doi: 10.1109/TNNLS.2015.2479223
 8 | URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7310882&isnumber=6104215
 9 | 
10 | [2] UFLDL Tutorial, http://deeplearning.stanford.edu/wiki/index.php/UFLDL_Tutorial
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/checkStackedAECost_nonneg.m:
--------------------------------------------------------------------------------
 1 | function [] = checkStackedAECost_nonneg()
 2 | 
 3 | % Check the gradients for the stacked autoencoder
 4 | %
 5 | % In general, we recommend that the creation of such files for checking
 6 | % gradients when you write new cost functions.
 7 | %
 8 | 
 9 | %% Setup random data / small model
10 | clc
11 | inputSize = 4;
12 | hiddenSize = 5;
13 | lambda1 = 0.01;
14 | lambda2 = 0.1;
15 | data   = randn(inputSize, 5);
16 | labels = [ 1 2 1 2 1 ];
17 | numClasses = 2;
18 | 
19 | stack = cell(2,1);
20 | stack{1}.w = 0.1 * randn(3, inputSize);
21 | stack{1}.b = zeros(3, 1);
22 | stack{2}.w = 0.1 * randn(hiddenSize, 3);
23 | stack{2}.b = zeros(hiddenSize, 1);
24 | softmaxTheta = 0.005 * randn(hiddenSize * numClasses, 1);
25 | 
26 | [stackparams, netconfig] = stack2params(stack);
27 | stackedAETheta = [ softmaxTheta ; stackparams ];
28 | 
29 | 
30 | [cost, grad] = stackedAECost_nonneg(stackedAETheta, inputSize, hiddenSize, ...
31 |                              numClasses, netconfig, ...
32 |                              lambda1, data, labels);
33 | 
34 | % Check that the numerical and analytic gradients are the same
35 | numgrad = computeNumericalGradient( @(x) stackedAECost_nonneg(x, inputSize, ...
36 |                                         hiddenSize, numClasses, netconfig, ...
37 |                                         lambda1, data, labels), ...
38 |                                         stackedAETheta);
39 | 
40 | % Use this to visually compare the gradients side by side
41 | disp([numgrad grad]); 
42 | 
43 | % Compare numerically computed gradients with the ones obtained from backpropagation
44 | disp('Norm between numerical and analytical gradient (should be less than 1e-9)');
45 | diff = norm(numgrad-grad)/norm(numgrad+grad);
46 | disp(diff); % Should be small. In our implementation, these values are
47 |             % usually less than 1e-9.
48 | 
49 |             % When you got this working, Congratulations!!! 
50 |             
51 |             
52 | 


--------------------------------------------------------------------------------
/feedForwardAutoencoder.m:
--------------------------------------------------------------------------------
 1 | function [activation] = feedForwardAutoencoder(theta, hiddenSize, visibleSize, dropoutFraction, data)
 2 | 
 3 | 
 4 | 
 5 | W1 = reshape(theta(1:hiddenSize*visibleSize), hiddenSize, visibleSize);
 6 | b1 = theta(2*hiddenSize*visibleSize+1:2*hiddenSize*visibleSize+hiddenSize);
 7 | 
 8 | 
 9 | z2 = W1*data + repmat(b1,1,size(data,2));
10 | activation = sigmoid(z2);
11 | 
12 | if(dropoutFraction > 0)
13 |    activation = activation.*(1 - dropoutFraction);
14 | end
15 |  
16 | 
17 | 
18 | end
19 | 
20 | 
21 | function sigm = sigmoid(x)
22 |     sigm = 1 ./ (1 + exp(-x));
23 | end
24 | 


--------------------------------------------------------------------------------
/initializeParameters_nonneg.m:
--------------------------------------------------------------------------------
 1 | function theta = initializeParameters_nonneg(hiddenSize, visibleSize, seed)
 2 | 
 3 | %% Initialize parameters randomly based on layer sizes.
 4 | r  = sqrt(6) / sqrt(hiddenSize+visibleSize+1);   % we'll choose weights uniformly from the interval [-r, r]
 5 | rand('state',seed)
 6 | 
 7 | W1 = rand(hiddenSize, visibleSize)* r;
 8 | W2 = rand(visibleSize, hiddenSize) * r;
 9 | 
10 | b1 = zeros(hiddenSize, 1);
11 | b2 = zeros(visibleSize, 1);
12 | 
13 | theta = [W1(:) ; W2(:) ; b1(:) ; b2(:)];
14 | 
15 | end
16 | 
17 | 


--------------------------------------------------------------------------------
/loadMNISTImages.m:
--------------------------------------------------------------------------------
 1 | function images = loadMNISTImages(filename)
 2 | %loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing
 3 | %the raw MNIST images
 4 | 
 5 | fp = fopen(filename, 'rb');
 6 | assert(fp ~= -1, ['Could not open ', filename, '']);
 7 | 
 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be');
 9 | assert(magic == 2051, ['Bad magic number in ', filename, '']);
10 | 
11 | numImages = fread(fp, 1, 'int32', 0, 'ieee-be');
12 | numRows = fread(fp, 1, 'int32', 0, 'ieee-be');
13 | numCols = fread(fp, 1, 'int32', 0, 'ieee-be');
14 | 
15 | images = fread(fp, inf, 'unsigned char');
16 | images = reshape(images, numCols, numRows, numImages);
17 | images = permute(images,[2 1 3]);
18 | 
19 | fclose(fp);
20 | 
21 | % Reshape to #pixels x #examples
22 | images = reshape(images, size(images, 1) * size(images, 2), size(images, 3));
23 | % Convert to double and rescale to [0,1]
24 | images = double(images) / 255;
25 | 
26 | end
27 | 


--------------------------------------------------------------------------------
/loadMNISTLabels.m:
--------------------------------------------------------------------------------
 1 | function labels = loadMNISTLabels(filename)
 2 | %loadMNISTLabels returns a [number of MNIST images]x1 matrix containing
 3 | %the labels for the MNIST images
 4 | 
 5 | fp = fopen(filename, 'rb');
 6 | assert(fp ~= -1, ['Could not open ', filename, '']);
 7 | 
 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be');
 9 | assert(magic == 2049, ['Bad magic number in ', filename, '']);
10 | 
11 | numLabels = fread(fp, 1, 'int32', 0, 'ieee-be');
12 | 
13 | labels = fread(fp, inf, 'unsigned char');
14 | 
15 | assert(size(labels,1) == numLabels, 'Mismatch in label count');
16 | 
17 | fclose(fp);
18 | 
19 | end
20 | 


--------------------------------------------------------------------------------
/main.m:
--------------------------------------------------------------------------------
  1 | 
  2 | clc
  3 | clear all
  4 | close all
  5 | 
  6 | %% Initialize Deep Network Parameters
  7 | 
  8 | inputSize = 784;
  9 | numClasses = 10;
 10 | hiddenSizeL1 = 196;    % Layer 1 Hidden Size
 11 | hiddenSizeL2 = 20;    % Layer 2 Hidden Size
 12 | sparsityParam = 0.05;   % desired average activation of the hidden units.
 13 | lambda = 3e-3;         % weight decay parameter      
 14 | beta = 3;              % weight of sparsity penalty term       
 15 | 
 16 | inputZeroMaskedFraction   = 0.0;  % denoising ratio
 17 | dropoutFraction   = 0.0;          % dropout ratio
 18 | 
 19 | %% Load data from the MNIST database
 20 | 
 21 | % Load MNIST database files
 22 | addpath('/Datasets/MNIST')
 23 | trainData = loadMNISTImages('mnist/train-images.idx3-ubyte');
 24 | trainLabels = loadMNISTLabels('mnist/train-labels.idx1-ubyte');
 25 | 
 26 | trainLabels(trainLabels == 0) = 10; % Remap 0 to 10 since our labels need to start from 1
 27 | 
 28 | testData = loadMNISTImages('mnist/t10k-images.idx3-ubyte');
 29 | testLabels = loadMNISTLabels('mnist/t10k-labels.idx1-ubyte');
 30 | testLabels(testLabels == 0) = 10; % Remap 0 to 10 since our labels need to start from 1
 31 | 
 32 | 
 33 | %% STEP 2: Train the first sparse autoencoder
 34 | 
 35 | 
 36 | % Randomly initialize the parameters
 37 | 
 38 | seed = 1;
 39 | sae1Theta = initializeParameters_nonneg(hiddenSizeL1, inputSize, seed);
 40 | 
 41 | 
 42 | addpath minFunc/
 43 | options.Method = 'lbfgs'; 
 44 | options.maxIter = 400;	  
 45 | options.display = 'on';
 46 | 
 47 | 
 48 | 
 49 | [sae1OptTheta, cost, costhistoty] = minFunc( @(p) sparseAutoencoderCost_nonneg(p, ...
 50 |                                    inputSize, hiddenSizeL1, ...
 51 |                                    lambda, inputZeroMaskedFraction,...
 52 |                                    dropoutFraction, sparsityParam, ...
 53 |                                    beta, trainData), ...
 54 |                                    sae1Theta, options);
 55 | 
 56 | %% Train the second sparse autoencoder
 57 | 
 58 | [sae1Features] = feedForwardAutoencoder(sae1OptTheta, hiddenSizeL1, ...
 59 |                                         inputSize, dropoutFraction, trainData);
 60 |                                     
 61 | %  Randomly initialize the parameters
 62 | sae2Theta = initializeParameters_nonneg(hiddenSizeL2, hiddenSizeL1, seed);
 63 | 
 64 | [sae2OptTheta, cost] = minFunc( @(p) sparseAutoencoderCost_nonneg(p, ...
 65 |                                    hiddenSizeL1, hiddenSizeL2, ...
 66 |                                    lambda, inputZeroMaskedFraction,...
 67 |                                    dropoutFraction, sparsityParam, ...
 68 |                                    beta, sae1Features), ...
 69 |                                    sae2Theta, options);
 70 | 
 71 | %% Train the softmax classifier
 72 | 
 73 | [sae2Features] = feedForwardAutoencoder(sae2OptTheta, hiddenSizeL2, ...
 74 |                                         hiddenSizeL1, dropoutFraction, sae1Features);
 75 | 
 76 | %  Randomly initialize the parameters
 77 | rand('state',seed);
 78 | saeSoftmaxTheta = 0.005 * randn(hiddenSizeL2 * numClasses, 1);
 79 | 
 80 | addpath softmax/
 81 | 
 82 | options.maxIter = 100;
 83 | softmaxModel = softmaxTrain_nonneg(hiddenSizeL2, numClasses, lambda, ...
 84 |                             sae2Features, trainLabels, options);
 85 | 
 86 | saeSoftmaxOptTheta = softmaxModel.optTheta(:);
 87 | 
 88 | 
 89 | %% Finetune softmax model
 90 | 
 91 | 
 92 | % Initialize the stack using the parameters learned
 93 | stack = cell(2,1);
 94 | stack{1}.w = reshape(sae1OptTheta(1:hiddenSizeL1*inputSize), ...
 95 |                      hiddenSizeL1, inputSize);
 96 | stack{1}.b = sae1OptTheta(2*hiddenSizeL1*inputSize+1:2*hiddenSizeL1*inputSize+hiddenSizeL1);
 97 | stack{2}.w = reshape(sae2OptTheta(1:hiddenSizeL2*hiddenSizeL1), ...
 98 |                      hiddenSizeL2, hiddenSizeL1);
 99 | stack{2}.b = sae2OptTheta(2*hiddenSizeL2*hiddenSizeL1+1:2*hiddenSizeL2*hiddenSizeL1+hiddenSizeL2);
100 | 
101 | % Initialize the parameters for the deep model
102 | [stackparams, netconfig] = stack2params(stack);
103 | stackedAETheta = [ saeSoftmaxOptTheta ; stackparams ];
104 | 
105 | 
106 | %% Check Gradient
107 | 
108 | 
109 |  checkStackedAECost_nonneg()
110 | 
111 | %% Fine-tuning AE
112 | 
113 | options.Method = 'lbfgs'; 
114 | options.maxIter = 400;	  
115 | options.display = 'on';
116 | 
117 | dbstop if error
118 | [stackedAEOptTheta, cost] = minFunc( @(p) stackedAECost_nonneg(p, inputSize, hiddenSizeL2, ...
119 |                                               numClasses, netconfig, ...
120 |                                               lambda, trainData, trainLabels), ...
121 |                                               stackedAETheta, options);
122 | 
123 | 
124 | %% Test 
125 | 
126 | 
127 | [pred] = stackedAEPredict(stackedAETheta, inputSize, hiddenSizeL2, ...
128 |                           numClasses, netconfig, dropoutFraction, testData);
129 | 
130 | acc_before(seed) = mean(testLabels(:) == pred(:));
131 | fprintf('Before Finetuning Test Accuracy: %0.3f%%\n', acc_before(seed) * 100);
132 | 
133 | [pred] = stackedAEPredict(stackedAEOptTheta, inputSize, hiddenSizeL2, ...
134 |                           numClasses, netconfig, dropoutFraction, testData);
135 | 
136 | acc_after(seed) = mean(testLabels(:) == pred(:));
137 | fprintf('After Finetuning Test Accuracy: %0.3f%%\n', acc_after(seed) * 100);
138 | 
139 | 


--------------------------------------------------------------------------------
/minFunc/ArmijoBacktrack.m:
--------------------------------------------------------------------------------
  1 | function [t,x_new,f_new,g_new,funEvals,H] = ArmijoBacktrack(...
  2 |     x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,saveHessianComp,funObj,varargin)
  3 | %
  4 | % Backtracking linesearch to satisfy Armijo condition
  5 | %
  6 | % Inputs:
  7 | %   x: starting location
  8 | %   t: initial step size
  9 | %   d: descent direction
 10 | %   f: function value at starting location
 11 | %   fr: reference function value (usually funObj(x))
 12 | %   gtd: directional derivative at starting location
 13 | %   c1: sufficient decrease parameter
 14 | %   debug: display debugging information
 15 | %   LS: type of interpolation
 16 | %   tolX: minimum allowable step length
 17 | %   doPlot: do a graphical display of interpolation
 18 | %   funObj: objective function
 19 | %   varargin: parameters of objective function
 20 | %
 21 | % Outputs:
 22 | %   t: step length
 23 | %   f_new: function value at x+t*d
 24 | %   g_new: gradient value at x+t*d
 25 | %   funEvals: number function evaluations performed by line search
 26 | %   H: Hessian at initial guess (only computed if requested
 27 | 
 28 | % Evaluate the Objective and Gradient at the Initial Step
 29 | if nargout == 6
 30 |     [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 
 31 | else
 32 |     [f_new,g_new] = feval(funObj, x + t*d, varargin{:}); 
 33 | end
 34 | funEvals = 1;
 35 | 
 36 | while f_new > fr + c1*t*gtd || ~isLegal(f_new)
 37 | 
 38 |     temp = t;
 39 |     if LS == 0 || ~isLegal(f_new)
 40 |         % Backtrack w/ fixed backtracking rate
 41 |         if debug
 42 |             fprintf('Fixed BT\n');
 43 |         end
 44 |         t = 0.5*t;
 45 |     elseif LS == 2 && isLegal(g_new)
 46 |         % Backtracking w/ cubic interpolation w/ derivative
 47 |         if debug
 48 |             fprintf('Grad-Cubic BT\n');
 49 |         end
 50 |         t = polyinterp([0 f gtd; t f_new g_new'*d],doPlot);
 51 |     elseif funEvals < 2 || ~isLegal(f_prev)
 52 |         % Backtracking w/ quadratic interpolation (no derivative at new point)
 53 |         if debug
 54 |             fprintf('Quad BT\n');
 55 |         end
 56 |         t = polyinterp([0 f gtd; t f_new sqrt(-1)],doPlot);
 57 |     else%if LS == 1
 58 |         % Backtracking w/ cubic interpolation (no derivatives at new points)
 59 |         if debug
 60 |             fprintf('Cubic BT\n');
 61 |         end
 62 |         t = polyinterp([0 f gtd; t f_new sqrt(-1); t_prev f_prev sqrt(-1)],doPlot);
 63 |     end
 64 | 
 65 |     % Adjust if change in t is too small/large
 66 | 
 67 |     if t < temp*1e-3
 68 |         if debug
 69 |             fprintf('Interpolated Value Too Small, Adjusting\n');
 70 |         end
 71 |         t = temp*1e-3;
 72 |     elseif t > temp*0.6
 73 |         if debug
 74 |             fprintf('Interpolated Value Too Large, Adjusting\n');
 75 |         end
 76 |         t = temp*0.6;
 77 |     end
 78 | 
 79 |     f_prev = f_new;
 80 |     t_prev = temp;
 81 |     if ~saveHessianComp && nargout == 6
 82 |         [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 
 83 |     else
 84 |         [f_new,g_new] = feval(funObj, x + t*d, varargin{:}); 
 85 |     end
 86 |     funEvals = funEvals+1;
 87 | 
 88 |     % Check whether step size has become too small
 89 |     if sum(abs(t*d)) <= tolX
 90 |         if debug
 91 |             fprintf('Backtracking Line Search Failed\n');
 92 |         end
 93 |         t = 0;
 94 |         f_new = f;
 95 |         g_new = g;
 96 |         break;
 97 |     end
 98 | end
 99 | 
100 | % Evaluate Hessian at new point
101 | if nargout == 6 && funEvals > 1 && saveHessianComp
102 |     [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 
103 |     funEvals = funEvals+1;
104 | end
105 | 
106 | x_new = x + t*d;
107 | 
108 | end
109 | 


--------------------------------------------------------------------------------
/minFunc/WolfeLineSearch.m:
--------------------------------------------------------------------------------
  1 | function [t,f_new,g_new,funEvals,H] = WolfeLineSearch(...
  2 |     x,t,d,f,g,gtd,c1,c2,LS,maxLS,tolX,debug,doPlot,saveHessianComp,funObj,varargin)
  3 | %
  4 | % Bracketing Line Search to Satisfy Wolfe Conditions
  5 | %
  6 | % Inputs:
  7 | %   x: starting location
  8 | %   t: initial step size
  9 | %   d: descent direction
 10 | %   f: function value at starting location
 11 | %   g: gradient at starting location
 12 | %   gtd: directional derivative at starting location
 13 | %   c1: sufficient decrease parameter
 14 | %   c2: curvature parameter
 15 | %   debug: display debugging information
 16 | %   LS: type of interpolation
 17 | %   maxLS: maximum number of iterations
 18 | %   tolX: minimum allowable step length
 19 | %   doPlot: do a graphical display of interpolation
 20 | %   funObj: objective function
 21 | %   varargin: parameters of objective function
 22 | %
 23 | % Outputs:
 24 | %   t: step length
 25 | %   f_new: function value at x+t*d
 26 | %   g_new: gradient value at x+t*d
 27 | %   funEvals: number function evaluations performed by line search
 28 | %   H: Hessian at initial guess (only computed if requested
 29 | 
 30 | % Evaluate the Objective and Gradient at the Initial Step
 31 | if nargout == 5
 32 |     [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 
 33 | else
 34 |     [f_new,g_new] = feval(funObj, x + t*d, varargin{:}); 
 35 | end
 36 | funEvals = 1;
 37 | gtd_new = g_new'*d;
 38 | 
 39 | % Bracket an Interval containing a point satisfying the
 40 | % Wolfe criteria
 41 | 
 42 | LSiter = 0;
 43 | t_prev = 0;
 44 | f_prev = f;
 45 | g_prev = g;
 46 | gtd_prev = gtd;
 47 | done = 0;
 48 | 
 49 | while LSiter < maxLS
 50 | 
 51 |     %% Bracketing Phase
 52 |     if ~isLegal(f_new) || ~isLegal(g_new)
 53 |         if 0
 54 |             if debug
 55 |                 fprintf('Extrapolated into illegal region, Bisecting\n');
 56 |             end
 57 |             t = (t + t_prev)/2;
 58 |             if ~saveHessianComp && nargout == 5
 59 |                 [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 
 60 |             else
 61 |                 [f_new,g_new] = feval(funObj, x + t*d, varargin{:}); 
 62 |             end
 63 |             funEvals = funEvals + 1;
 64 |             gtd_new = g_new'*d;
 65 |             LSiter = LSiter+1;
 66 |             continue;
 67 |         else
 68 |             if debug
 69 |                 fprintf('Extrapolated into illegal region, switching to Armijo line-search\n');
 70 |             end
 71 |             t = (t + t_prev)/2;
 72 |             % Do Armijo
 73 |             if nargout == 5
 74 |                 [t,x_new,f_new,g_new,armijoFunEvals,H] = ArmijoBacktrack(...
 75 |                   x,t,d,f,f,g,gtd,c1,max(0,min(LS-2,2)),tolX,debug,doPlot,saveHessianComp,...
 76 |                   funObj,varargin{:});
 77 |             else
 78 |                 [t,x_new,f_new,g_new,armijoFunEvals] = ArmijoBacktrack(...
 79 |                   x,t,d,f,f,g,gtd,c1,max(0,min(LS-2,2)),tolX,debug,doPlot,saveHessianComp,...
 80 |                   funObj,varargin{:});
 81 |             end
 82 |             funEvals = funEvals + armijoFunEvals;
 83 |             return;
 84 |         end
 85 |     end
 86 | 
 87 | 
 88 |     if f_new > f + c1*t*gtd || (LSiter > 1 && f_new >= f_prev)
 89 |         bracket = [t_prev t];
 90 |         bracketFval = [f_prev f_new];
 91 |         bracketGval = [g_prev g_new];
 92 |         break;
 93 |     elseif abs(gtd_new) <= -c2*gtd
 94 |         bracket = t;
 95 |         bracketFval = f_new;
 96 |         bracketGval = g_new;
 97 |         done = 1;
 98 |         break;
 99 |     elseif gtd_new >= 0
100 |         bracket = [t_prev t];
101 |         bracketFval = [f_prev f_new];
102 |         bracketGval = [g_prev g_new];
103 |         break;
104 |     end
105 |     temp = t_prev;
106 |     t_prev = t;
107 |     minStep = t + 0.01*(t-temp);
108 |     maxStep = t*10;
109 |     if LS == 3
110 |         if debug
111 |             fprintf('Extending Braket\n');
112 |         end
113 |         t = maxStep;
114 |     elseif LS ==4
115 |         if debug
116 |             fprintf('Cubic Extrapolation\n');
117 |         end
118 |         t = polyinterp([temp f_prev gtd_prev; t f_new gtd_new],doPlot,minStep,maxStep);
119 |     else
120 |         t = mixedExtrap(temp,f_prev,gtd_prev,t,f_new,gtd_new,minStep,maxStep,debug,doPlot);
121 |     end
122 |     
123 |     f_prev = f_new;
124 |     g_prev = g_new;
125 |     gtd_prev = gtd_new;
126 |     if ~saveHessianComp && nargout == 5
127 |         [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 
128 |     else
129 |         [f_new,g_new] = feval(funObj, x + t*d, varargin{:}); 
130 |     end
131 |     funEvals = funEvals + 1;
132 |     gtd_new = g_new'*d;
133 |     LSiter = LSiter+1;
134 | end
135 | 
136 | if LSiter == maxLS
137 |     bracket = [0 t];
138 |     bracketFval = [f f_new];
139 |     bracketGval = [g g_new];
140 | end
141 | 
142 | %% Zoom Phase
143 | 
144 | % We now either have a point satisfying the criteria, or a bracket
145 | % surrounding a point satisfying the criteria
146 | % Refine the bracket until we find a point satisfying the criteria
147 | insufProgress = 0;
148 | Tpos = 2;
149 | LOposRemoved = 0;
150 | while ~done && LSiter < maxLS
151 | 
152 |     % Find High and Low Points in bracket
153 |     [f_LO LOpos] = min(bracketFval);
154 |     HIpos = -LOpos + 3;
155 | 
156 |     % Compute new trial value
157 |     if LS == 3 || ~isLegal(bracketFval) || ~isLegal(bracketGval)
158 |         if debug
159 |             fprintf('Bisecting\n');
160 |         end
161 |         t = mean(bracket);
162 |     elseif LS == 4
163 |         if debug
164 |             fprintf('Grad-Cubic Interpolation\n');
165 |         end
166 |         t = polyinterp([bracket(1) bracketFval(1) bracketGval(:,1)'*d
167 |             bracket(2) bracketFval(2) bracketGval(:,2)'*d],doPlot);
168 |     else
169 |         % Mixed Case %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
170 |         nonTpos = -Tpos+3;
171 |         if LOposRemoved == 0
172 |             oldLOval = bracket(nonTpos);
173 |             oldLOFval = bracketFval(nonTpos);
174 |             oldLOGval = bracketGval(:,nonTpos);
175 |         end
176 |         t = mixedInterp(bracket,bracketFval,bracketGval,d,Tpos,oldLOval,oldLOFval,oldLOGval,debug,doPlot);
177 |     end
178 | 
179 | 
180 |     % Test that we are making sufficient progress
181 |     if min(max(bracket)-t,t-min(bracket))/(max(bracket)-min(bracket)) < 0.1
182 |         if debug
183 |             fprintf('Interpolation close to boundary');
184 |         end
185 |         if insufProgress || t>=max(bracket) || t <= min(bracket)
186 |             if debug
187 |                 fprintf(', Evaluating at 0.1 away from boundary\n');
188 |             end
189 |             if abs(t-max(bracket)) < abs(t-min(bracket))
190 |                 t = max(bracket)-0.1*(max(bracket)-min(bracket));
191 |             else
192 |                 t = min(bracket)+0.1*(max(bracket)-min(bracket));
193 |             end
194 |             insufProgress = 0;
195 |         else
196 |             if debug
197 |                 fprintf('\n');
198 |             end
199 |             insufProgress = 1;
200 |         end
201 |     else
202 |         insufProgress = 0;
203 |     end
204 | 
205 |     % Evaluate new point
206 |     if ~saveHessianComp && nargout == 5
207 |         [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 
208 |     else
209 |         [f_new,g_new] = feval(funObj, x + t*d, varargin{:}); 
210 |     end
211 |     funEvals = funEvals + 1;
212 |     gtd_new = g_new'*d;
213 |     LSiter = LSiter+1;
214 | 
215 |     if f_new > f + c1*t*gtd || f_new >= f_LO
216 |         % Armijo condition not satisfied or not lower than lowest
217 |         % point
218 |         bracket(HIpos) = t;
219 |         bracketFval(HIpos) = f_new;
220 |         bracketGval(:,HIpos) = g_new;
221 |         Tpos = HIpos;
222 |     else
223 |         if abs(gtd_new) <= - c2*gtd
224 |             % Wolfe conditions satisfied
225 |             done = 1;
226 |         elseif gtd_new*(bracket(HIpos)-bracket(LOpos)) >= 0
227 |             % Old HI becomes new LO
228 |             bracket(HIpos) = bracket(LOpos);
229 |             bracketFval(HIpos) = bracketFval(LOpos);
230 |             bracketGval(:,HIpos) = bracketGval(:,LOpos);
231 |             if LS == 5
232 |                 if debug
233 |                     fprintf('LO Pos is being removed!\n');
234 |                 end
235 |                 LOposRemoved = 1;
236 |                 oldLOval = bracket(LOpos);
237 |                 oldLOFval = bracketFval(LOpos);
238 |                 oldLOGval = bracketGval(:,LOpos);
239 |             end
240 |         end
241 |         % New point becomes new LO
242 |         bracket(LOpos) = t;
243 |         bracketFval(LOpos) = f_new;
244 |         bracketGval(:,LOpos) = g_new;
245 |         Tpos = LOpos;
246 |     end
247 | 
248 |     if ~done && abs((bracket(1)-bracket(2))*gtd_new) < tolX
249 |         if debug
250 |             fprintf('Line Search can not make further progress\n');
251 |         end
252 |         break;
253 |     end
254 | 
255 | end
256 | 
257 | %%
258 | if LSiter == maxLS
259 |     if debug
260 |         fprintf('Line Search Exceeded Maximum Line Search Iterations\n');
261 |     end
262 | end
263 | 
264 | [f_LO LOpos] = min(bracketFval);
265 | t = bracket(LOpos);
266 | f_new = bracketFval(LOpos);
267 | g_new = bracketGval(:,LOpos);
268 | 
269 | 
270 | 
271 | % Evaluate Hessian at new point
272 | if nargout == 5 && funEvals > 1 && saveHessianComp
273 |     [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 
274 |     funEvals = funEvals + 1;
275 | end
276 | 
277 | end
278 | 
279 | 
280 | %%
281 | function [t] = mixedExtrap(x0,f0,g0,x1,f1,g1,minStep,maxStep,debug,doPlot);
282 | alpha_c = polyinterp([x0 f0 g0; x1 f1 g1],doPlot,minStep,maxStep);
283 | alpha_s = polyinterp([x0 f0 g0; x1 sqrt(-1) g1],doPlot,minStep,maxStep);
284 | if alpha_c > minStep && abs(alpha_c - x1) < abs(alpha_s - x1)
285 |     if debug
286 |         fprintf('Cubic Extrapolation\n');
287 |     end
288 |     t = alpha_c;
289 | else
290 |     if debug
291 |         fprintf('Secant Extrapolation\n');
292 |     end
293 |     t = alpha_s;
294 | end
295 | end
296 | 
297 | %%
298 | function [t] = mixedInterp(bracket,bracketFval,bracketGval,d,Tpos,oldLOval,oldLOFval,oldLOGval,debug,doPlot);
299 | 
300 | % Mixed Case %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
301 | nonTpos = -Tpos+3;
302 | 
303 | gtdT = bracketGval(:,Tpos)'*d;
304 | gtdNonT = bracketGval(:,nonTpos)'*d;
305 | oldLOgtd = oldLOGval'*d;
306 | if bracketFval(Tpos) > oldLOFval
307 |     alpha_c = polyinterp([oldLOval oldLOFval oldLOgtd
308 |         bracket(Tpos) bracketFval(Tpos) gtdT],doPlot);
309 |     alpha_q = polyinterp([oldLOval oldLOFval oldLOgtd
310 |         bracket(Tpos) bracketFval(Tpos) sqrt(-1)],doPlot);
311 |     if abs(alpha_c - oldLOval) < abs(alpha_q - oldLOval)
312 |         if debug
313 |             fprintf('Cubic Interpolation\n');
314 |         end
315 |         t = alpha_c;
316 |     else
317 |         if debug
318 |             fprintf('Mixed Quad/Cubic Interpolation\n');
319 |         end
320 |         t = (alpha_q + alpha_c)/2;
321 |     end
322 | elseif gtdT'*oldLOgtd < 0
323 |     alpha_c = polyinterp([oldLOval oldLOFval oldLOgtd
324 |         bracket(Tpos) bracketFval(Tpos) gtdT],doPlot);
325 |     alpha_s = polyinterp([oldLOval oldLOFval oldLOgtd
326 |         bracket(Tpos) sqrt(-1) gtdT],doPlot);
327 |     if abs(alpha_c - bracket(Tpos)) >= abs(alpha_s - bracket(Tpos))
328 |         if debug
329 |             fprintf('Cubic Interpolation\n');
330 |         end
331 |         t = alpha_c;
332 |     else
333 |         if debug
334 |             fprintf('Quad Interpolation\n');
335 |         end
336 |         t = alpha_s;
337 |     end
338 | elseif abs(gtdT) <= abs(oldLOgtd)
339 |     alpha_c = polyinterp([oldLOval oldLOFval oldLOgtd
340 |         bracket(Tpos) bracketFval(Tpos) gtdT],...
341 |         doPlot,min(bracket),max(bracket));
342 |     alpha_s = polyinterp([oldLOval sqrt(-1) oldLOgtd
343 |         bracket(Tpos) bracketFval(Tpos) gtdT],...
344 |         doPlot,min(bracket),max(bracket));
345 |     if alpha_c > min(bracket) && alpha_c < max(bracket)
346 |         if abs(alpha_c - bracket(Tpos)) < abs(alpha_s - bracket(Tpos))
347 |             if debug
348 |                 fprintf('Bounded Cubic Extrapolation\n');
349 |             end
350 |             t = alpha_c;
351 |         else
352 |             if debug
353 |                 fprintf('Bounded Secant Extrapolation\n');
354 |             end
355 |             t = alpha_s;
356 |         end
357 |     else
358 |         if debug
359 |             fprintf('Bounded Secant Extrapolation\n');
360 |         end
361 |         t = alpha_s;
362 |     end
363 | 
364 |     if bracket(Tpos) > oldLOval
365 |         t = min(bracket(Tpos) + 0.66*(bracket(nonTpos) - bracket(Tpos)),t);
366 |     else
367 |         t = max(bracket(Tpos) + 0.66*(bracket(nonTpos) - bracket(Tpos)),t);
368 |     end
369 | else
370 |     t = polyinterp([bracket(nonTpos) bracketFval(nonTpos) gtdNonT
371 |         bracket(Tpos) bracketFval(Tpos) gtdT],doPlot);
372 | end
373 | end


--------------------------------------------------------------------------------
/minFunc/autoGrad.m:
--------------------------------------------------------------------------------
1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin)% [f,g] = autoGrad(x,useComplex,funObj,varargin)%% Numerically compute gradient of objective function from function valuesp = length(x);mu = 1e-150;if useComplex % Use Complex Differentials    diff = zeros(p,1);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(diff));    g = imag(diff)/mu;else % Use Finite Differencing    f = funObj(x,varargin{:});    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*e_j,varargin{:});    end    g = (diff-f)/mu;endif 0 % DEBUG CODE    [fReal gReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    pause;end


--------------------------------------------------------------------------------
/minFunc/autoHess.m:
--------------------------------------------------------------------------------
1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin)% Numerically compute Hessian of objective function from gradient valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(diff),2);    H = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g] = funObj(x,varargin{:});    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f diff(:,j)] = funObj(x + mu*e_j,varargin{:});    end    H = (diff-repmat(g,[1 p]))/mu;end% Make sure H is symmetricH = (H+H')/2;if 0 % DEBUG CODE    [fReal gReal HReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    [HReal H]    pause;end


--------------------------------------------------------------------------------
/minFunc/autoHv.m:
--------------------------------------------------------------------------------
 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin)
 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:})
 3 | %  based on gradient values
 4 | 
 5 | if useComplex
 6 |     mu = 1e-150i;
 7 | else
 8 |     mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v);
 9 | end
10 | [f,finDif] = funObj(x + v*mu,varargin{:});
11 | Hv = (finDif-g)/mu;


--------------------------------------------------------------------------------
/minFunc/autoTensor.m:
--------------------------------------------------------------------------------
1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% Numerically compute Tensor of 3rd-derivatives of objective function from Hessian valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(g),2);    H = mean(real(diff),3);    T = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g,H] = funObj(x,varargin{:});    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:});    end    T = (diff-repmat(H,[1 1 p]))/mu;end


--------------------------------------------------------------------------------
/minFunc/callOutput.m:
--------------------------------------------------------------------------------
 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin)
 2 | 
 3 | optimValues.iteration = i;
 4 | optimValues.funccount = funEvals;
 5 | optimValues.fval = f;
 6 | optimValues.stepsize = t;
 7 | optimValues.directionalderivative = gtd;
 8 | optimValues.gradient = g;
 9 | optimValues.searchdirection = d;
10 | optimValues.firstorderopt = opt;
11 | 
12 | feval(outputFcn, x,optimValues,state,varargin{:});


--------------------------------------------------------------------------------
/minFunc/conjGrad.m:
--------------------------------------------------------------------------------
 1 | function [x,k,res,negCurv] = cg(A,b,optTol,maxIter,verbose,precFunc,precArgs,matrixVectFunc,matrixVectArgs)
 2 | % [x,k,res,negCurv] =
 3 | % cg(A,b,optTol,maxIter,verbose,precFunc,precArgs,matrixVectFunc,matrixVect
 4 | % Args)
 5 | % Linear Conjugate Gradient, where optionally we use
 6 | % - preconditioner on vector v with precFunc(v,precArgs{:})
 7 | % - matrix multipled by vector with matrixVectFunc(v,matrixVectArgs{:})
 8 | 
 9 | x = zeros(size(b));
10 | r = -b;
11 | 
12 | % Apply preconditioner (if supplied)
13 | if nargin >= 7 && ~isempty(precFunc)
14 |     y = precFunc(r,precArgs{:});
15 | else
16 |     y = r;
17 | end
18 | 
19 | ry = r'*y;
20 | p = -y;
21 | k = 0;
22 | 
23 | res = norm(r);
24 | done = 0;
25 | negCurv = [];
26 | while res > optTol & k < maxIter & ~done
27 |     % Compute Matrix-vector product
28 |     if nargin >= 9
29 |         Ap = matrixVectFunc(p,matrixVectArgs{:});
30 |     else
31 |         Ap = A*p;
32 |     end
33 |     pAp = p'*Ap;
34 | 
35 |     % Check for negative Curvature
36 |     if pAp <= 1e-16
37 |         if verbose
38 |             fprintf('Negative Curvature Detected!\n');
39 |         end
40 |         
41 |         if nargout == 4
42 |            if pAp < 0
43 |               negCurv = p;
44 |               return
45 |            end
46 |         end
47 |         
48 |         if k == 0
49 |             if verbose
50 |                 fprintf('First-Iter, Proceeding...\n');
51 |             end
52 |             done = 1;
53 |         else
54 |             if verbose
55 |                 fprintf('Stopping\n');
56 |             end
57 |             break;
58 |         end
59 |     end
60 | 
61 |     % Conjugate Gradient
62 |     alpha = ry/(pAp);
63 |     x = x + alpha*p;
64 |     r = r + alpha*Ap;
65 |     
66 |     % If supplied, apply preconditioner
67 |     if nargin >= 7 && ~isempty(precFunc)
68 |         y = precFunc(r,precArgs{:});
69 |     else
70 |         y = r;
71 |     end
72 |     
73 |     ry_new = r'*y;
74 |     beta = ry_new/ry;
75 |     p = -y + beta*p;
76 |     k = k + 1;
77 | 
78 |     % Update variables
79 |     ry = ry_new;
80 |     res = norm(r);
81 | end
82 | end
83 | 


--------------------------------------------------------------------------------
/minFunc/dampedUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | 
 3 | %B0 = eye(length(y))/Hdiag;
 4 | S = old_dirs(:,2:end);
 5 | Y = old_stps(:,2:end);
 6 | k = size(Y,2);
 7 | L = zeros(k);
 8 | for j = 1:k
 9 |     for i = j+1:k
10 |         L(i,j) = S(:,i)'*Y(:,j);
11 |     end
12 | end
13 | D = diag(diag(S'*Y));
14 | N = [S/Hdiag Y];
15 | M = [S'*S/Hdiag L;L' -D];
16 | 
17 | ys = y'*s;
18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s
19 | sBs = s'*Bs;
20 | 
21 | eta = .02;
22 | if ys < eta*sBs
23 |     if debug
24 |         fprintf('Damped Update\n');
25 |     end
26 |     theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1);
27 |     y = theta*y + (1-theta)*Bs;
28 | end
29 | 
30 | 
31 | numCorrections = size(old_dirs,2);
32 | if numCorrections < corrections
33 |     % Full Update
34 |     old_dirs(:,numCorrections+1) = s;
35 |     old_stps(:,numCorrections+1) = y;
36 | else
37 |     % Limited-Memory Update
38 |     old_dirs = [old_dirs(:,2:corrections) s];
39 |     old_stps = [old_stps(:,2:corrections) y];
40 | end
41 | 
42 | % Update scale of initial Hessian approximation
43 | Hdiag = (y'*s)/(y'*y);


--------------------------------------------------------------------------------
/minFunc/example_minFunc.m:
--------------------------------------------------------------------------------
 1 | % Runs various limited-memory solvers on 2D rosenbrock function for 25
 2 | % function evaluations
 3 | maxFunEvals = 25;
 4 | 
 5 | fprintf('Result after %d evaluations of limited-memory solvers on 2D rosenbrock:\n',maxFunEvals);
 6 | 
 7 | fprintf('---------------------------------------\n');
 8 | fprintf('x1 = %.4f, x2 = %.4f (starting point)\n',0,0);
 9 | fprintf('x1 = %.4f, x2 = %.4f (optimal solution)\n',1,1);
10 | fprintf('---------------------------------------\n');
11 | 
12 | if exist('minimize') == 2
13 |     % Minimize.m - conjugate gradient method
14 |     x = minimize([0 0]', 'rosenbrock', -maxFunEvals);
15 |     fprintf('x1 = %.4f, x2 = %.4f (minimize.m by C. Rasmussen)\n',x(1),x(2));
16 | end
17 | 
18 | options = [];
19 | options.display = 'none';
20 | options.maxFunEvals = maxFunEvals;
21 | 
22 | % Steepest Descent
23 | options.Method = 'sd';
24 | x = minFunc(@rosenbrock,[0 0]',options);
25 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with steepest descent)\n',x(1),x(2));
26 | 
27 | % Cyclic Steepest Descent
28 | options.Method = 'csd';
29 | x = minFunc(@rosenbrock,[0 0]',options);
30 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with cyclic steepest descent)\n',x(1),x(2));
31 | 
32 | % Barzilai & Borwein
33 | options.Method = 'bb';
34 | options.bbType = 1;
35 | x = minFunc(@rosenbrock,[0 0]',options);
36 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with spectral gradient descent)\n',x(1),x(2));
37 | 
38 | % Hessian-Free Newton
39 | options.Method = 'newton0';
40 | x = minFunc(@rosenbrock,[0 0]',options);
41 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with Hessian-free Newton)\n',x(1),x(2));
42 | 
43 | % Hessian-Free Newton w/ L-BFGS preconditioner
44 | options.Method = 'pnewton0';
45 | x = minFunc(@rosenbrock,[0 0]',options);
46 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with preconditioned Hessian-free Newton)\n',x(1),x(2));
47 | 
48 | % Conjugate Gradient
49 | options.Method = 'cg';
50 | x = minFunc(@rosenbrock,[0 0]',options);
51 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with conjugate gradient)\n',x(1),x(2));
52 | 
53 | % Scaled conjugate Gradient
54 | options.Method = 'scg';
55 | x = minFunc(@rosenbrock,[0 0]',options);
56 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with scaled conjugate gradient)\n',x(1),x(2));
57 | 
58 | % Preconditioned Conjugate Gradient
59 | options.Method = 'pcg';
60 | x = minFunc(@rosenbrock,[0 0]',options);
61 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with preconditioned conjugate gradient)\n',x(1),x(2));
62 | 
63 | % Default: L-BFGS (default)
64 | options.Method = 'lbfgs';
65 | x = minFunc(@rosenbrock,[0 0]',options);
66 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with limited-memory BFGS - default)\n',x(1),x(2));
67 | 
68 | fprintf('---------------------------------------\n');
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/minFunc/example_minFunc_LR.m:
--------------------------------------------------------------------------------
 1 | clear all
 2 | 
 3 | nInst = 500;
 4 | nVars = 100;
 5 | X = [ones(nInst,1) randn(nInst,nVars-1)];
 6 | w = randn(nVars,1);
 7 | y = sign(X*w);
 8 | flipInd = rand(nInst,1) > .9;
 9 | y(flipInd) = -y(flipInd);
10 | 
11 | w_init = zeros(nVars,1);
12 | funObj = @(w)LogisticLoss(w,X,y);
13 | 
14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n');
15 | options.Method = 'newton0';
16 | minFunc(@LogisticLoss,w_init,options,X,y);
17 | pause;
18 | 
19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n');
20 | options.Method = 'pnewton0';
21 | options.precFunc = @LogisticDiagPrecond;
22 | minFunc(@LogisticLoss,w_init,options,X,y);
23 | pause;
24 | 
25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n');
26 | options.Method = 'pnewton0';
27 | options.precFunc = [];
28 | minFunc(@LogisticLoss,w_init,options,X,y);
29 | pause;
30 | 
31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n');
32 | options.Method = 'newton0';
33 | options.HvFunc = @LogisticHv;
34 | minFunc(@LogisticLoss,w_init,options,X,y);
35 | pause;
36 | 
37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n');
38 | options.Method = 'pnewton0';
39 | options.HvFunc = @LogisticHv;
40 | options.precFunc = @LogisticDiagPrecond;
41 | minFunc(@LogisticLoss,w_init,options,X,y);
42 | pause;
43 | 
44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n');
45 | options.Method = 'pnewton0';
46 | options.precFunc = [];
47 | options.HvFunc = @LogisticHv;
48 | minFunc(@LogisticLoss,w_init,options,X,y);
49 | pause;


--------------------------------------------------------------------------------
/minFunc/isLegal.m:
--------------------------------------------------------------------------------
1 | function [legal] = isLegal(v)
2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0;


--------------------------------------------------------------------------------
/minFunc/lbfgs.m:
--------------------------------------------------------------------------------
 1 | function [d] = lbfgs(g,s,y,Hdiag)
 2 | % BFGS Search Direction
 3 | %
 4 | % This function returns the (L-BFGS) approximate inverse Hessian,
 5 | % multiplied by the gradient
 6 | %
 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS
 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS
 9 | %
10 | % s - previous search directions (p by k)
11 | % y - previous step sizes (p by k)
12 | % g - gradient (p by 1)
13 | % Hdiag - value of initial Hessian diagonal elements (scalar)
14 | 
15 | [p,k] = size(s);
16 | 
17 | for i = 1:k
18 |     ro(i,1) = 1/(y(:,i)'*s(:,i));
19 | end
20 | 
21 | q = zeros(p,k+1);
22 | r = zeros(p,k+1);
23 | al =zeros(k,1);
24 | be =zeros(k,1);
25 | 
26 | q(:,k+1) = g;
27 | 
28 | for i = k:-1:1
29 |     al(i) = ro(i)*s(:,i)'*q(:,i+1);
30 |     q(:,i) = q(:,i+1)-al(i)*y(:,i);
31 | end
32 | 
33 | % Multiply by Initial Hessian
34 | r(:,1) = Hdiag*q(:,1);
35 | 
36 | for i = 1:k
37 |     be(i) = ro(i)*y(:,i)'*r(:,i);
38 |     r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i));
39 | end
40 | d=r(:,k+1);


--------------------------------------------------------------------------------
/minFunc/lbfgsC.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include "mex.h"
  3 | 
  4 | /* See lbfgs.m for details! */
  5 | /* This function may not exit gracefully on bad input! */
  6 | 
  7 | 
  8 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
  9 | {
 10 |     /* Variable Declarations */
 11 |     
 12 |     double *s, *y, *g, *H, *d, *ro, *alpha, *beta, *q, *r;
 13 |     int nVars,nSteps,lhs_dims[2];
 14 |     double temp;
 15 |     int i,j;
 16 |     
 17 |     /* Get Input Pointers */
 18 | 	
 19 |     g = mxGetPr(prhs[0]);
 20 |     s = mxGetPr(prhs[1]);
 21 |     y = mxGetPr(prhs[2]);
 22 |     H = mxGetPr(prhs[3]);
 23 |     
 24 |     /* Compute number of variables (p), rank of update (d) */
 25 |     
 26 |     nVars = mxGetDimensions(prhs[1])[0];
 27 |     nSteps = mxGetDimensions(prhs[1])[1];
 28 |     
 29 | 	/* Allocated Memory for Function Variables */
 30 |     ro = mxCalloc(nSteps,sizeof(double));
 31 | 	alpha = mxCalloc(nSteps,sizeof(double));
 32 | 	beta = mxCalloc(nSteps,sizeof(double));
 33 | 	q = mxCalloc(nVars*(nSteps+1),sizeof(double));
 34 | 	r = mxCalloc(nVars*(nSteps+1),sizeof(double));
 35 | 	
 36 |     /* Set-up Output Vector */
 37 |     
 38 |     lhs_dims[0] = nVars;
 39 |     lhs_dims[1] = 1;
 40 |     
 41 |     plhs[0] = mxCreateNumericArray(2,lhs_dims,mxDOUBLE_CLASS,mxREAL);
 42 |     d = mxGetPr(plhs[0]);
 43 |     
 44 |     /* ro = 1/(y(:,i)'*s(:,i)) */
 45 |     for(i=0;i<nSteps;i++)
 46 |     {
 47 |         temp = 0;
 48 |         for(j=0;j<nVars;j++)
 49 |         {
 50 | 			temp += y[j+nVars*i]*s[j+nVars*i];
 51 |         }
 52 |         ro[i] = 1/temp;
 53 |     }
 54 | 	
 55 | 	/* q(:,k+1) = g */
 56 | 	for(i=0;i<nVars;i++)
 57 | 	{
 58 | 		q[i+nVars*nSteps] = g[i];
 59 | 	}
 60 | 
 61 | 	for(i=nSteps-1;i>=0;i--)
 62 | 	{
 63 | 		/* alpha(i) = ro(i)*s(:,i)'*q(:,i+1) */
 64 | 		alpha[i] = 0;
 65 | 		for(j=0;j<nVars;j++)
 66 | 		{
 67 | 			alpha[i] += s[j+nVars*i]*q[j+nVars*(i+1)]; 
 68 | 		}
 69 | 		alpha[i] *= ro[i];
 70 | 
 71 | 		/* q(:,i) = q(:,i+1)-alpha(i)*y(:,i) */
 72 | 		for(j=0;j<nVars;j++)
 73 | 		{
 74 | 			q[j+nVars*i]=q[j+nVars*(i+1)]-alpha[i]*y[j+nVars*i];
 75 | 		}
 76 | 	}
 77 | 
 78 | 	/*  r(:,1) = q(:,1) */
 79 | 	for(i=0;i<nVars;i++)
 80 | 	{
 81 | 		r[i] = H[0]*q[i];
 82 | 	}
 83 | 
 84 | 	for(i=0;i<nSteps;i++)
 85 | 	{
 86 | 		/* beta(i) = ro(i)*y(:,i)'*r(:,i) */
 87 | 		beta[i] = 0;
 88 | 		for(j=0;j<nVars;j++)
 89 | 		{
 90 | 			beta[i] += y[j+nVars*i]*r[j+nVars*i];
 91 | 		}
 92 | 		beta[i] *= ro[i];
 93 | 
 94 | 		/* r(:,i+1) = r(:,i) + s(:,i)*(alpha(i)-beta(i)) */
 95 | 		for(j=0;j<nVars;j++)
 96 | 		{
 97 | 			r[j+nVars*(i+1)]=r[j+nVars*i]+s[j+nVars*i]*(alpha[i]-beta[i]);
 98 | 		}
 99 | 	}
100 | 
101 | 	/* d = r(:,k+1) */
102 | 	for(i=0;i<nVars;i++)
103 | 	{
104 | 		d[i]=r[i+nVars*nSteps];
105 | 	}
106 | 
107 | 	/* Free Memory */
108 | 	
109 | 	mxFree(ro);
110 | 	mxFree(alpha);
111 | 	mxFree(beta);
112 | 	mxFree(q);
113 | 	mxFree(r);
114 | 	
115 | }
116 | 


--------------------------------------------------------------------------------
/minFunc/lbfgsC.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/minFunc/lbfgsC.mexa64


--------------------------------------------------------------------------------
/minFunc/lbfgsC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/minFunc/lbfgsC.mexglx


--------------------------------------------------------------------------------
/minFunc/lbfgsC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/minFunc/lbfgsC.mexmac


--------------------------------------------------------------------------------
/minFunc/lbfgsC.mexmaci:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/minFunc/lbfgsC.mexmaci


--------------------------------------------------------------------------------
/minFunc/lbfgsC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/minFunc/lbfgsC.mexmaci64


--------------------------------------------------------------------------------
/minFunc/lbfgsC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/minFunc/lbfgsC.mexw32


--------------------------------------------------------------------------------
/minFunc/lbfgsC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/minFunc/lbfgsC.mexw64


--------------------------------------------------------------------------------
/minFunc/lbfgsUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | ys = y'*s;
 3 | if ys > 1e-10
 4 |     numCorrections = size(old_dirs,2);
 5 |     if numCorrections < corrections
 6 |         % Full Update
 7 |         old_dirs(:,numCorrections+1) = s;
 8 |         old_stps(:,numCorrections+1) = y;
 9 |     else
10 |         % Limited-Memory Update
11 |         old_dirs = [old_dirs(:,2:corrections) s];
12 |         old_stps = [old_stps(:,2:corrections) y];
13 |     end
14 | 
15 |     % Update scale of initial Hessian approximation
16 |     Hdiag = ys/(y'*y);
17 | else
18 |     if debug
19 |         fprintf('Skipping Update\n');
20 |     end
21 | end


--------------------------------------------------------------------------------
/minFunc/logistic/LogisticDiagPrecond.m:
--------------------------------------------------------------------------------
 1 | function [m] = LogisticHv(v,w,X,y)
 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to
 3 | % w(feature,1)
 4 | % X(instance,feature)
 5 | % y(instance,1)
 6 | 
 7 | sig = 1./(1+exp(-y.*(X*w)));
 8 | 
 9 | % Compute diagonals of Hessian
10 | sig = sig.*(1-sig);
11 | for i = 1:length(w)
12 |    h(i,1) = (sig.*X(:,i))'*X(:,i);
13 | end
14 | 
15 | % Apply preconditioner
16 | m = v./h;
17 | 
18 | % Exact preconditioner
19 | %H = X'*diag(sig.*(1-sig))*X;
20 | %m = H\v;
21 | 


--------------------------------------------------------------------------------
/minFunc/logistic/LogisticHv.m:
--------------------------------------------------------------------------------
1 | function [Hv] = LogisticHv(v,w,X,y)
2 | % v(feature,1) - vector that we will multiply Hessian by
3 | % w(feature,1)
4 | % X(instance,feature)
5 | % y(instance,1)
6 | 
7 | sig = 1./(1+exp(-y.*(X*w)));
8 | Hv = X.'*(sig.*(1-sig).*(X*v));
9 | 


--------------------------------------------------------------------------------
/minFunc/logistic/LogisticLoss.m:
--------------------------------------------------------------------------------
 1 | function [nll,g,H,T] = LogisticLoss(w,X,y)
 2 | % w(feature,1)
 3 | % X(instance,feature)
 4 | % y(instance,1)
 5 | 
 6 | [n,p] = size(X);
 7 | 
 8 | Xw = X*w;
 9 | yXw = y.*Xw;
10 | 
11 | nll = sum(	([zeros(n,1) -yXw]));
12 | 
13 | if nargout > 1
14 |     if nargout > 2
15 |         sig = 1./(1+exp(-yXw));
16 |         g = -X.'*(y.*(1-sig));
17 |     else
18 |         g = -X.'*(y./(1+exp(yXw)));
19 |     end
20 | end
21 | 
22 | if nargout > 2
23 |     H = X.'*diag(sparse(sig.*(1-sig)))*X;
24 | end
25 | 
26 | if nargout > 3
27 |     T = zeros(p,p,p);
28 |     for j1 = 1:p
29 |         for j2 = 1:p
30 |             for j3 = 1:p
31 |                 T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig));
32 |             end
33 |         end
34 |     end
35 | end


--------------------------------------------------------------------------------
/minFunc/logistic/mexutil.c:
--------------------------------------------------------------------------------
 1 | #include "mexutil.h"
 2 | 
 3 | /* Functions to create uninitialized arrays. */
 4 | 
 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
 6 |          mxClassID class, mxComplexity ComplexFlag)
 7 | {
 8 |   mxArray *a;
 9 |   int i, *dims1 = mxMalloc(ndim*sizeof(int));
10 |   size_t sz = 1;
11 |   for(i=0;i<ndim;i++) {
12 |     sz *= dims[i];
13 |     dims1[i] = 1;
14 |   }
15 |   a = mxCreateNumericArray(ndim,dims1,class,ComplexFlag);
16 |   sz *= mxGetElementSize(a);
17 |   mxSetDimensions(a, dims, ndim);
18 |   mxFree(dims1);
19 |   mxSetData(a, mxRealloc(mxGetData(a), sz));
20 |   if(ComplexFlag == mxCOMPLEX) {
21 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
22 |   }
23 |   return a;
24 | }
25 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
26 | 				mxComplexity ComplexFlag)
27 | {
28 |   size_t sz = m*n*sizeof(double);
29 |   mxArray *a = mxCreateNumericMatrix(1, 1, class, ComplexFlag);
30 |   mxSetM(a,m);
31 |   mxSetN(a,n);
32 |   mxSetPr(a, mxRealloc(mxGetPr(a),sz));
33 |   if(ComplexFlag == mxCOMPLEX) {
34 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
35 |   }
36 |   return a;
37 | }
38 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
39 | 			       mxComplexity ComplexFlag)
40 | {
41 |   return mxCreateNumericMatrixE(m,n,mxDOUBLE_CLASS,ComplexFlag);
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/minFunc/logistic/mexutil.h:
--------------------------------------------------------------------------------
1 | #include "mex.h"
2 | 
3 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
4 | 			       mxClassID class, mxComplexity ComplexFlag);
5 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
6 | 				mxComplexity ComplexFlag);
7 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
8 | 			       mxComplexity ComplexFlag);
9 | 


--------------------------------------------------------------------------------
/minFunc/logistic/mylogsumexp.m:
--------------------------------------------------------------------------------
1 | function lse = mylogsumexp(b)
2 | % does logsumexp across columns
3 | B = max(b,[],2);
4 | if issparse(b)
5 |     lse = log(sum(exp(b-repmat(B,[1 size(b,2)])),2))+B;
6 | else
7 |     lse = log(sum(exp(b-repmatC(B,[1 size(b,2)])),2))+B;
8 | end
9 | end


--------------------------------------------------------------------------------
/minFunc/logistic/repmatC.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | mex -c mexutil.c
  3 | mex repmat.c mexutil.obj
  4 | to check for warnings:
  5 | gcc -Wall -I/cygdrive/c/MATLAB6p1/extern/include -c repmat.c
  6 | */
  7 | #include "mexutil.h"
  8 | #include <string.h>
  9 | 
 10 | /* repeat a block of memory rep times */
 11 | void memrep(char *dest, size_t chunk, int rep)
 12 | {
 13 | #if 0
 14 |   /* slow way */
 15 |   int i;
 16 |   char *p = dest;
 17 |   for(i=1;i<rep;i++) {
 18 |     p += chunk;
 19 |     memcpy(p, dest, chunk);
 20 |   }
 21 | #else
 22 |   /* fast way */
 23 |   if(rep == 1) return;
 24 |   memcpy(dest + chunk, dest, chunk); 
 25 |   if(rep & 1) {
 26 |     dest += chunk;
 27 |     memcpy(dest + chunk, dest, chunk);
 28 |   }
 29 |   /* now repeat using a block twice as big */
 30 |   memrep(dest, chunk<<1, rep>>1);
 31 | #endif
 32 | }
 33 | 
 34 | void repmat(char *dest, const char *src, int ndim, int *destdimsize, 
 35 | 	    int *dimsize, const int *dims, int *rep) 
 36 | {
 37 |   int d = ndim-1;
 38 |   int i, chunk;
 39 |   /* copy the first repetition into dest */
 40 |   if(d == 0) {
 41 |     chunk = dimsize[0];
 42 |     memcpy(dest,src,chunk);
 43 |   }
 44 |   else {
 45 |     /* recursively repeat each slice of src */
 46 |     for(i=0;i<dims[d];i++) {
 47 |       repmat(dest + i*destdimsize[d-1], src + i*dimsize[d-1], 
 48 | 	     ndim-1, destdimsize, dimsize, dims, rep);
 49 |     }
 50 |     chunk = destdimsize[d-1]*dims[d];
 51 |   }
 52 |   /* copy the result rep-1 times */
 53 |   memrep(dest,chunk,rep[d]);
 54 | }
 55 | 
 56 | void mexFunction(int nlhs, mxArray *plhs[],
 57 |                  int nrhs, const mxArray *prhs[])
 58 | {
 59 |   const mxArray *srcmat;
 60 |   int ndim, *dimsize, eltsize;
 61 |   const int *dims;
 62 |   int ndimdest, *destdims, *destdimsize;
 63 |   char *src, *dest;
 64 |   int *rep;
 65 |   int i,nrep;
 66 |   int extra_rep = 1;
 67 |   int empty;
 68 | 
 69 |   if(nrhs < 2) mexErrMsgTxt("Usage: xrepmat(A, [N M ...])");
 70 |   srcmat = prhs[0];
 71 |   if(mxIsSparse(srcmat)) {
 72 |     mexErrMsgTxt("Sorry, can't handle sparse matrices yet.");
 73 |   }
 74 |   if(mxIsCell(srcmat)) {
 75 |     mexErrMsgTxt("Sorry, can't handle cell arrays yet.");
 76 |   }
 77 |   ndim = mxGetNumberOfDimensions(srcmat);
 78 |   dims = mxGetDimensions(srcmat);
 79 |   eltsize = mxGetElementSize(srcmat);
 80 | 
 81 |   /* compute dimension sizes */
 82 |   dimsize = mxCalloc(ndim, sizeof(int));
 83 |   dimsize[0] = eltsize*dims[0];
 84 |   for(i=1;i<ndim;i++) dimsize[i] = dimsize[i-1]*dims[i];
 85 | 
 86 |   /* determine repetition vector */
 87 |   ndimdest = ndim;
 88 |   if(nrhs == 2) {
 89 |     nrep = mxGetN(prhs[1]);
 90 |     if(nrep > ndimdest) ndimdest = nrep;
 91 |     rep = mxCalloc(ndimdest, sizeof(int));
 92 |     for(i=0;i<nrep;i++) {
 93 |       double repv = mxGetPr(prhs[1])[i];
 94 |       rep[i] = (int)repv;
 95 |     }
 96 |     if(nrep == 1) {
 97 |       /* special behavior */
 98 |       nrep = 2;
 99 |       rep[1] = rep[0];
100 |     }
101 |   }
102 |   else {
103 |     nrep = nrhs-1;
104 |     if(nrep > ndimdest) ndimdest = nrep;
105 |     rep = mxCalloc(ndimdest, sizeof(int));
106 |     for(i=0;i<nrep;i++) {
107 |       rep[i] = (int)*mxGetPr(prhs[i+1]);
108 |     }
109 |   }
110 |   for(i=nrep;i<ndimdest;i++) rep[i] = 1;
111 | 
112 |   /* compute output size */
113 |   destdims = mxCalloc(ndimdest, sizeof(int));
114 |   for(i=0;i<ndim;i++) destdims[i] = dims[i]*rep[i];
115 |   for(;i<ndimdest;i++) { 
116 |     destdims[i] = rep[i];
117 |     extra_rep *= rep[i];
118 |   }
119 |   destdimsize = mxCalloc(ndim, sizeof(int));
120 |   destdimsize[0] = eltsize*destdims[0];
121 |   for(i=1;i<ndim;i++) destdimsize[i] = destdimsize[i-1]*destdims[i];
122 | 
123 |     
124 |   /* for speed, array should be uninitialized */
125 |   plhs[0] = mxCreateNumericArray(ndimdest, destdims, mxGetClassID(srcmat), 
126 | 				  mxIsComplex(srcmat)?mxCOMPLEX:mxREAL);
127 | 
128 |   /* if any rep[i] == 0, output should be empty array.
129 |      Added by KPM 11/13/02.
130 |   */
131 |   empty = 0;
132 |   for (i=0; i < nrep; i++) {
133 |     if (rep[i]==0) 
134 |       empty = 1;
135 |   }
136 |   if (empty) 
137 |     return;
138 | 
139 |   src = (char*)mxGetData(srcmat);
140 |   dest = (char*)mxGetData(plhs[0]);
141 |   repmat(dest,src,ndim,destdimsize,dimsize,dims,rep);
142 |   if(ndimdest > ndim) memrep(dest,destdimsize[ndim-1],extra_rep);
143 |   if(mxIsComplex(srcmat)) {
144 |     src = (char*)mxGetPi(srcmat);
145 |     dest = (char*)mxGetPi(plhs[0]);
146 |     repmat(dest,src,ndim,destdimsize,dimsize,dims,rep);
147 |     if(ndimdest > ndim) memrep(dest,destdimsize[ndim-1],extra_rep);
148 |   }
149 | }
150 | 


--------------------------------------------------------------------------------
/minFunc/logistic/repmatC.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/minFunc/logistic/repmatC.dll


--------------------------------------------------------------------------------
/minFunc/logistic/repmatC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/minFunc/logistic/repmatC.mexglx


--------------------------------------------------------------------------------
/minFunc/logistic/repmatC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/minFunc/logistic/repmatC.mexmac


--------------------------------------------------------------------------------
/minFunc/mchol.m:
--------------------------------------------------------------------------------
 1 | function [l,d,perm] = mchol(A,mu)
 2 | % Compute a modified LDL factorization of A
 3 | % (MEX ME!)
 4 | 
 5 | if nargin < 2
 6 |     mu = 1e-12;
 7 | end
 8 | 
 9 | n = size(A,1);
10 | l = eye(n);
11 | d = zeros(n,1);
12 | perm = 1:n;
13 | 
14 | for i = 1:n
15 |     c(i,i) = A(i,i);
16 | end
17 | 
18 | % Compute modification parameters
19 | gamma = max(abs(diag(A)));
20 | xi = max(max(abs(setdiag(A,0))));
21 | delta = mu*max(gamma+xi,1);
22 | if n > 1
23 |     beta = sqrt(max([gamma xi/sqrt(n^2-1) mu]));
24 | else
25 |     beta = sqrt(max([gamma mu]));
26 | end
27 | 
28 | for j = 1:n
29 |     
30 |     % Find q that results in Best Permutation with j
31 |     [maxVal maxPos] = max(abs(diag(c(j:end,j:end))));
32 |     q = maxPos+j-1;
33 |     
34 |     % Permute d,c,l,a
35 |     d([j q]) = d([q j]);
36 |     perm([j q]) = perm([q j]);
37 |     c([j q],:) = c([q j],:);
38 |     c(:,[j q]) = c(:,[q j]);
39 |     l([j q],:) = l([q j],:);
40 |     l(:,[j q]) = l(:,[q j]);
41 |     A([j q],:) = A([q j],:);
42 |     A(:,[j q]) = A(:,[q j]);
43 |     
44 |     for s = 1:j-1
45 |         l(j,s) = c(j,s)/d(s);
46 |     end
47 |     for i = j+1:n
48 |         c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1));
49 |     end
50 |     theta = 0;
51 |     if j < n && j > 1
52 |         theta = max(abs(c(j+1:n,j)));
53 |     end
54 |     d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]);
55 |     if j < n
56 |         for i = j+1:n
57 |             c(i,i) = c(i,i) - (c(i,j)^2)/d(j);
58 |         end
59 |     end
60 | end


--------------------------------------------------------------------------------
/minFunc/mcholC.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include "mex.h"
  3 | 
  4 | double mymax(double x, double y)
  5 | {
  6 |     if (x > y)
  7 |         return x;
  8 |     else
  9 |         return y;
 10 | }
 11 | 
 12 | double absolute(double x)
 13 | {
 14 |     if (x >= -x)
 15 |         return x;
 16 |     else
 17 |         return -x;
 18 | }
 19 | 
 20 | void permuteInt(int *x, int p, int q)
 21 | {
 22 |     int temp;
 23 |     temp = x[p];
 24 |     x[p] = x[q];
 25 |     x[q] = temp;
 26 | }
 27 | 
 28 | void permute(double *x, int p, int q)
 29 | {
 30 |     double temp;
 31 |     temp = x[p];
 32 |     x[p] = x[q];
 33 |     x[q] = temp;
 34 | }
 35 | 
 36 | void permuteRows(double *x, int p, int q,int n)
 37 | {
 38 |     int i;
 39 |     double temp;
 40 |     for(i = 0; i < n; i++)
 41 |     {
 42 |         temp = x[p+i*n];
 43 |         x[p+i*n] = x[q+i*n];
 44 |         x[q+i*n] = temp;
 45 |     }
 46 | }
 47 | 
 48 | void permuteCols(double *x, int p, int q,int n)
 49 | {
 50 |     int i;
 51 |     double temp;
 52 |     for(i = 0; i < n; i++)
 53 |     {
 54 |         temp = x[i+p*n];
 55 |         x[i+p*n] = x[i+q*n];
 56 |         x[i+q*n] = temp;
 57 |     }
 58 | }
 59 | 
 60 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
 61 | {
 62 |     int n,sizL[2],sizD[2],i,j,q,s,
 63 |     *P;
 64 |     
 65 |     double mu,gamma,xi,delta,beta,maxVal,theta,
 66 |     *c,    *H, *L, *D, *A;
 67 |     
 68 |     /* Input */
 69 |     H = mxGetPr(prhs[0]);
 70 |     if (nrhs == 1)
 71 |     {
 72 |         mu = 1e-12;
 73 |     }
 74 |     else
 75 |     {
 76 |         mu = mxGetScalar(prhs[1]);
 77 |     }
 78 |     
 79 |     /* Compute Sizes */
 80 |     n = mxGetDimensions(prhs[0])[0];
 81 |     
 82 |     /* Form Output */
 83 |     sizL[0] = n;
 84 |     sizL[1] = n;
 85 |     plhs[0] = mxCreateNumericArray(2,sizL,mxDOUBLE_CLASS,mxREAL);
 86 |     L = mxGetPr(plhs[0]);
 87 |     sizD[0] = n;
 88 |     sizD[1] = 1;
 89 |     plhs[1] = mxCreateNumericArray(2,sizD,mxDOUBLE_CLASS,mxREAL);
 90 |     D = mxGetPr(plhs[1]);
 91 |     plhs[2] = mxCreateNumericArray(2,sizD,mxINT32_CLASS,mxREAL);
 92 |     P = (int*)mxGetData(plhs[2]);
 93 |     
 94 |     /* Initialize */
 95 |     c = mxCalloc(n*n,sizeof(double));
 96 |     A = mxCalloc(n*n,sizeof(double));
 97 |     
 98 |     for (i = 0; i < n; i++)
 99 |     {
100 |         P[i] = i;
101 |         for (j = 0;j < n; j++)
102 |         {
103 |             A[i+n*j] = H[i+n*j];
104 |         }
105 |     }
106 |     
107 |     gamma = 0;
108 |     for (i = 0; i < n; i++)
109 |     {
110 |         L[i+n*i] = 1;
111 |         c[i+n*i] = A[i+n*i];
112 |     }
113 |     
114 |     /* Compute modification parameters */
115 |     gamma = -1;
116 |     xi = -1;
117 |     for (i = 0; i < n; i++)
118 |     {
119 |         gamma = mymax(gamma,absolute(A[i+n*i]));
120 |         for (j = 0;j < n; j++)
121 |         {
122 |             //printf("A(%d,%d) = %f, %f\n",i,j,A[i+n*j],absolute(A[i+n*j]));
123 |             if (i != j)
124 |                 xi = mymax(xi,absolute(A[i+n*j]));
125 |         }
126 |     }
127 |     delta = mu*mymax(gamma+xi,1);
128 |     
129 |     if (n > 1)
130 |     {
131 |         beta = sqrt(mymax(gamma,mymax(mu,xi/sqrt(n*n-1))));
132 |     }
133 |     else
134 |     {
135 |         beta = sqrt(mymax(gamma,mu));
136 |     }
137 |     
138 |     for (j = 0; j < n; j++)
139 |     {
140 |         
141 |     /* Find q that results in Best Permutation with j */
142 |         maxVal = -1;
143 |         q = 0;
144 |         for(i = j; i < n; i++)
145 |         {
146 |             if (absolute(c[i+n*i]) > maxVal)
147 |             {
148 |                 maxVal = mymax(maxVal,absolute(c[i+n*i]));
149 |                 q = i;
150 |             }
151 |         }
152 |         
153 |         /* Permute D,c,L,A,P */
154 |         permute(D,j,q);
155 |         permuteInt(P,j,q);
156 |         permuteRows(c,j,q,n);
157 |         permuteCols(c,j,q,n);
158 |         permuteRows(L,j,q,n);
159 |         permuteCols(L,j,q,n);
160 |         permuteRows(A,j,q,n);
161 |         permuteCols(A,j,q,n);
162 |         
163 |         for(s = 0; s <= j-1; s++)
164 |             L[j+n*s] = c[j+n*s]/D[s];
165 |         
166 |         for(i = j+1; i < n; i++)
167 |         {
168 |             c[i+j*n] = A[i+j*n];
169 |             for(s = 0; s <= j-1; s++)
170 |             {
171 |                 c[i+j*n] -= L[j+n*s]*c[i+n*s];
172 |             }
173 |         }
174 |         
175 |         theta = 0;
176 |         if (j < n-1)
177 |         {
178 |             for(i = j+1;i < n; i++)
179 |                 theta = mymax(theta,absolute(c[i+n*j]));
180 |         }
181 |         
182 |         D[j] = mymax(absolute(c[j+n*j]),mymax(delta,theta*theta/(beta*beta)));
183 |         
184 |         if (j < n-1)
185 |         {
186 |             for(i = j+1; i < n; i++)
187 |             {
188 |                 c[i+n*i] = c[i+n*i] - c[i+n*j]*c[i+n*j]/D[j];
189 |             }
190 |         }
191 |         
192 |     }
193 |     
194 |     for(i = 0; i < n; i++)
195 |         P[i]++;
196 |     
197 |     mxFree(c);
198 |     mxFree(A);
199 | }


--------------------------------------------------------------------------------
/minFunc/mcholC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/minFunc/mcholC.mexmaci64


--------------------------------------------------------------------------------
/minFunc/mcholC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/minFunc/mcholC.mexw32


--------------------------------------------------------------------------------
/minFunc/mcholC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/minFunc/mcholC.mexw64


--------------------------------------------------------------------------------
/minFunc/mcholinc.m:
--------------------------------------------------------------------------------
 1 | function [R,tau] = mcholinc(H,verbose)
 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd
 3 | 
 4 | p = size(H,1);
 5 | 
 6 | beta = norm(H,'fro');
 7 | if min(diag(H)) > 1e-12
 8 |     tau = 0;
 9 | else
10 |     if verbose
11 |         fprintf('Small Value on Diagonal, Adjusting Hessian\n');
12 |     end
13 |     tau = max(beta/2,1e-12);
14 | end
15 | while 1
16 |     [R,posDef] = chol(H+tau*eye(p));
17 |     if posDef == 0
18 |         break;
19 |     else
20 |         if verbose
21 |             fprintf('Cholesky Failed, Adjusting Hessian\n');
22 |         end
23 |         tau = max(2*tau,beta/2);
24 |     end
25 | end
26 | 


--------------------------------------------------------------------------------
/minFunc/minFunc.m:
--------------------------------------------------------------------------------
   1 | function [x,f,exitflag,output] = minFunc(funObj,x0,options,varargin)
   2 | % minFunc(funObj,x0,options,varargin)
   3 | %
   4 | % Unconstrained optimizer using a line search strategy
   5 | %
   6 | % Uses an interface very similar to fminunc
   7 | %   (it doesn't support all of the optimization toolbox options,
   8 | %       but supports many other options).
   9 | %
  10 | % It computes descent directions using one of ('Method'):
  11 | %   - 'sd': Steepest Descent
  12 | %       (no previous information used, not recommended)
  13 | %   - 'csd': Cyclic Steepest Descent
  14 | %       (uses previous step length for a fixed length cycle)
  15 | %   - 'bb': Barzilai and Borwein Gradient
  16 | %       (uses only previous step)
  17 | %   - 'cg': Non-Linear Conjugate Gradient
  18 | %       (uses only previous step and a vector beta)
  19 | %   - 'scg': Scaled Non-Linear Conjugate Gradient
  20 | %       (uses previous step and a vector beta, 
  21 | %           and Hessian-vector products to initialize line search)
  22 | %   - 'pcg': Preconditionined Non-Linear Conjugate Gradient
  23 | %       (uses only previous step and a vector beta, preconditioned version)
  24 | %   - 'lbfgs': Quasi-Newton with Limited-Memory BFGS Updating
  25 | %       (default: uses a predetermined nunber of previous steps to form a 
  26 | %           low-rank Hessian approximation)
  27 | %   - 'newton0': Hessian-Free Newton
  28 | %       (numerically computes Hessian-Vector products)
  29 | %   - 'pnewton0': Preconditioned Hessian-Free Newton 
  30 | %       (numerically computes Hessian-Vector products, preconditioned
  31 | %       version)
  32 | %   - 'qnewton': Quasi-Newton Hessian approximation
  33 | %       (uses dense Hessian approximation)
  34 | %   - 'mnewton': Newton's method with Hessian calculation after every
  35 | %   user-specified number of iterations
  36 | %       (needs user-supplied Hessian matrix)
  37 | %   - 'newton': Newton's method with Hessian calculation every iteration
  38 | %       (needs user-supplied Hessian matrix)
  39 | %   - 'tensor': Tensor
  40 | %       (needs user-supplied Hessian matrix and Tensor of 3rd partial derivatives)
  41 | %
  42 | % Several line search strategies are available for finding a step length satisfying
  43 | %   the termination criteria ('LS'):
  44 | %   - 0: Backtrack w/ Step Size Halving
  45 | %   - 1: Backtrack w/ Quadratic/Cubic Interpolation from new function values
  46 | %   - 2: Backtrack w/ Cubic Interpolation from new function + gradient
  47 | %   values (default for 'bb' and 'sd')
  48 | %   - 3: Bracketing w/ Step Size Doubling and Bisection
  49 | %   - 4: Bracketing w/ Cubic Interpolation/Extrapolation with function +
  50 | %   gradient values (default for all except 'bb' and 'sd')
  51 | %   - 5: Bracketing w/ Mixed Quadratic/Cubic Interpolation/Extrapolation
  52 | %   - 6: Use Matlab Optimization Toolbox's line search
  53 | %           (requires Matlab's linesearch.m to be added to the path)
  54 | %
  55 | %   Above, the first three find a point satisfying the Armijo conditions,
  56 | %   while the last four search for find a point satisfying the Wolfe
  57 | %   conditions.  If the objective function overflows, it is recommended
  58 | %   to use one of the first 3.
  59 | %   The first three can be used to perform a non-monotone
  60 | %   linesearch by changing the option 'Fref'.
  61 | %
  62 | % Several strategies for choosing the initial step size are avaiable ('LS_init'):
  63 | %   - 0: Always try an initial step length of 1 (default for all except 'cg' and 'sd')
  64 | %       (t = 1)
  65 | %   - 1: Use a step similar to the previous step (default for 'cg' and 'sd')
  66 | %       (t = t_old*min(2,g'd/g_old'd_old))
  67 | %   - 2: Quadratic Initialization using previous function value and new
  68 | %   function value/gradient (use this if steps tend to be very long)
  69 | %       (t = min(1,2*(f-f_old)/g))
  70 | %   - 3: The minimum between 1 and twice the previous step length
  71 | %       (t = min(1,2*t)
  72 | %   - 4: The scaled conjugate gradient step length (may accelerate
  73 | %   conjugate gradient methods, but requires a Hessian-vector product)
  74 | %       (t = g'd/d'Hd)
  75 | %
  76 | % Inputs:
  77 | %   funObj is a function handle
  78 | %   x0 is a starting vector;
  79 | %   options is a struct containing parameters
  80 | %  (defaults are used for non-existent or blank fields)
  81 | %   all other arguments are passed to funObj
  82 | %
  83 | % Outputs:
  84 | %   x is the minimum value found
  85 | %   f is the function value at the minimum found
  86 | %   exitflag returns an exit condition
  87 | %   output returns a structure with other information
  88 | %
  89 | % Supported Input Options
  90 | %   Display - Level of display [ off | final | (iter) | full | excessive ]
  91 | %   MaxFunEvals - Maximum number of function evaluations allowed (1000)
  92 | %   MaxIter - Maximum number of iterations allowed (500)
  93 | %   TolFun - Termination tolerance on the first-order optimality (1e-5)
  94 | %   TolX - Termination tolerance on progress in terms of function/parameter changes (1e-9)
  95 | %   Method - [ sd | csd | bb | cg | scg | pcg | {lbfgs} | newton0 | pnewton0 |
  96 | %       qnewton | mnewton | newton | tensor ]
  97 | %   c1 - Sufficient Decrease for Armijo condition (1e-4)
  98 | %   c2 - Curvature Decrease for Wolfe conditions (.2 for cg methods, .9 otherwise)
  99 | %   LS_init - Line Search Initialization -see above (2 for cg/sd, 4 for scg, 0 otherwise)
 100 | %   LS - Line Search type -see above (2 for bb, 4 otherwise)
 101 | %   Fref - Setting this to a positive integer greater than 1
 102 | %       will use non-monotone Armijo objective in the line search.
 103 | %       (20 for bb, 10 for csd, 1 for all others)
 104 | %   numDiff - compute derivative numerically
 105 | %       (default: 0) (this option has a different effect for 'newton', see below)
 106 | %   useComplex - if 1, use complex differentials when computing numerical derivatives
 107 | %       to get very accurate values (default: 0)
 108 | %   DerivativeCheck - if 'on', computes derivatives numerically at initial
 109 | %       point and compares to user-supplied derivative (default: 'off')
 110 | %   outputFcn - function to run after each iteration (default: []).  It
 111 | %       should have the following interface:
 112 | %       outputFcn(x,infoStruct,state,varargin{:})
 113 | %   useMex - where applicable, use mex files to speed things up (default: 1)
 114 | %
 115 | % Method-specific input options:
 116 | %   newton:
 117 | %       HessianModify - type of Hessian modification for direct solvers to
 118 | %       use if the Hessian is not positive definite (default: 0)
 119 | %           0: Minimum Euclidean norm s.t. eigenvalues sufficiently large
 120 | %           (requires eigenvalues on iterations where matrix is not pd)
 121 | %           1: Start with (1/2)*||A||_F and increment until Cholesky succeeds
 122 | %           (an approximation to method 0, does not require eigenvalues)
 123 | %           2: Modified LDL factorization
 124 | %           (only 1 generalized Cholesky factorization done and no eigenvalues required)
 125 | %           3: Modified Spectral Decomposition
 126 | %           (requires eigenvalues)
 127 | %           4: Modified Symmetric Indefinite Factorization
 128 | %           5: Uses the eigenvector of the smallest eigenvalue as negative
 129 | %           curvature direction
 130 | %       cgSolve - use conjugate gradient instead of direct solver (default: 0)
 131 | %           0: Direct Solver
 132 | %           1: Conjugate Gradient
 133 | %           2: Conjugate Gradient with Diagonal Preconditioner
 134 | %           3: Conjugate Gradient with LBFGS Preconditioner
 135 | %           x: Conjugate Graident with Symmetric Successive Over Relaxation
 136 | %           Preconditioner with parameter x
 137 | %               (where x is a real number in the range [0,2])
 138 | %           x: Conjugate Gradient with Incomplete Cholesky Preconditioner
 139 | %           with drop tolerance -x
 140 | %               (where x is a real negative number)
 141 | %       numDiff - compute Hessian numerically
 142 | %                 (default: 0, done with complex differentials if useComplex = 1)
 143 | %       LS_saveHessiancomp - when on, only computes the Hessian at the
 144 | %       first and last iteration of the line search (default: 1)
 145 | %   mnewton:
 146 | %       HessianIter - number of iterations to use same Hessian (default: 5)
 147 | %   qnewton:
 148 | %       initialHessType - scale initial Hessian approximation (default: 1)
 149 | %       qnUpdate - type of quasi-Newton update (default: 3):
 150 | %           0: BFGS
 151 | %           1: SR1 (when it is positive-definite, otherwise BFGS)
 152 | %           2: Hoshino
 153 | %           3: Self-Scaling BFGS
 154 | %           4: Oren's Self-Scaling Variable Metric method 
 155 | %           5: McCormick-Huang asymmetric update
 156 | %       Damped - use damped BFGS update (default: 1)
 157 | %   newton0/pnewton0:
 158 | %       HvFunc - user-supplied function that returns Hessian-vector products
 159 | %           (by default, these are computed numerically using autoHv)
 160 | %           HvFunc should have the following interface: HvFunc(v,x,varargin{:})
 161 | %       useComplex - use a complex perturbation to get high accuracy
 162 | %           Hessian-vector products (default: 0)
 163 | %           (the increased accuracy can make the method much more efficient,
 164 | %               but gradient code must properly support complex inputs)
 165 | %       useNegCurv - a negative curvature direction is used as the descent
 166 | %           direction if one is encountered during the cg iterations
 167 | %           (default: 1)
 168 | %       precFunc (for pnewton0 only) - user-supplied preconditioner
 169 | %           (by default, an L-BFGS preconditioner is used)
 170 | %           precFunc should have the following interfact:
 171 | %           precFunc(v,x,varargin{:})
 172 | %   lbfgs:
 173 | %       Corr - number of corrections to store in memory (default: 100)
 174 | %           (higher numbers converge faster but use more memory)
 175 | %       Damped - use damped update (default: 0)
 176 | %   pcg:
 177 | %       cgUpdate - type of update (default: 2)
 178 | %   cg/scg/pcg:
 179 | %       cgUpdate - type of update (default for cg/scg: 2, default for pcg: 1)
 180 | %           0: Fletcher Reeves
 181 | %           1: Polak-Ribiere
 182 | %           2: Hestenes-Stiefel (not supported for pcg)
 183 | %           3: Gilbert-Nocedal
 184 | %       HvFunc (for scg only)- user-supplied function that returns Hessian-vector 
 185 | %           products
 186 | %           (by default, these are computed numerically using autoHv)
 187 | %           HvFunc should have the following interface:
 188 | %           HvFunc(v,x,varargin{:})
 189 | %       precFunc (for pcg only) - user-supplied preconditioner
 190 | %           (by default, an L-BFGS preconditioner is used)
 191 | %           precFunc should have the following interfact:
 192 | %           precFunc(v,x,varargin{:})
 193 | %   bb:
 194 | %       bbType - type of bb step (default: 1)
 195 | %           0: min_alpha ||delta_x - alpha delta_g||_2
 196 | %           1: min_alpha ||alpha delta_x - delta_g||_2
 197 | %           2: Conic BB
 198 | %           3: Gradient method with retards
 199 | %   csd:
 200 | %       cycle - length of cycle (default: 3)
 201 | %
 202 | % Supported Output Options
 203 | %   iterations - number of iterations taken
 204 | %   funcCount - number of function evaluations
 205 | %   algorithm - algorithm used
 206 | %   firstorderopt - first-order optimality
 207 | %   message - exit message
 208 | %   trace.funccount - function evaluations after each iteration
 209 | %   trace.fval - function value after each iteration
 210 | %
 211 | % Author: Mark Schmidt (2006)
 212 | % Web: http://www.cs.ubc.ca/~schmidtm
 213 | %
 214 | % Sources (in order of how much the source material contributes):
 215 | %   J. Nocedal and S.J. Wright.  1999.  "Numerical Optimization".  Springer Verlag.
 216 | %   R. Fletcher.  1987.  "Practical Methods of Optimization".  Wiley.
 217 | %   J. Demmel.  1997.  "Applied Linear Algebra.  SIAM.
 218 | %   R. Barret, M. Berry, T. Chan, J. Demmel, J. Dongarra, V. Eijkhout, R.
 219 | %   Pozo, C. Romine, and H. Van der Vost.  1994.  "Templates for the Solution of
 220 | %   Linear Systems: Building Blocks for Iterative Methods".  SIAM.
 221 | %   J. More and D. Thuente.  "Line search algorithms with guaranteed
 222 | %   sufficient decrease".  ACM Trans. Math. Softw. vol 20, 286-307, 1994.
 223 | %   M. Raydan.  "The Barzilai and Borwein gradient method for the large
 224 | %   scale unconstrained minimization problem".  SIAM J. Optim., 7, 26-33,
 225 | %   (1997).
 226 | %   "Mathematical Optimization".  The Computational Science Education
 227 | %   Project.  1995.
 228 | %   C. Kelley.  1999.  "Iterative Methods for Optimization".  Frontiers in
 229 | %   Applied Mathematics.  SIAM.
 230 | 
 231 | if nargin < 3
 232 |     options = [];
 233 | end
 234 | 
 235 | % Get Parameters
 236 | [verbose,verboseI,debug,doPlot,maxFunEvals,maxIter,tolFun,tolX,method,...
 237 |     corrections,c1,c2,LS_init,LS,cgSolve,qnUpdate,cgUpdate,initialHessType,...
 238 |     HessianModify,Fref,useComplex,numDiff,LS_saveHessianComp,...
 239 |     DerivativeCheck,Damped,HvFunc,bbType,cycle,...
 240 |     HessianIter,outputFcn,useMex,useNegCurv,precFunc] = ...
 241 |     minFunc_processInputOptions(options);
 242 | 
 243 | if isfield(options, 'logfile')
 244 |     logfile = options.logfile;
 245 | else
 246 |     logfile = [];
 247 | end
 248 | 
 249 | % Constants
 250 | SD = 0;
 251 | CSD = 1;
 252 | BB = 2;
 253 | CG = 3;
 254 | PCG = 4;
 255 | LBFGS = 5;
 256 | QNEWTON = 6;
 257 | NEWTON0 = 7;
 258 | NEWTON = 8;
 259 | TENSOR = 9;
 260 | 
 261 | % Initialize
 262 | p = length(x0);
 263 | d = zeros(p,1);
 264 | x = x0;
 265 | t = 1;
 266 | 
 267 | % If necessary, form numerical differentiation functions
 268 | funEvalMultiplier = 1;
 269 | if numDiff && method ~= TENSOR
 270 |     varargin(3:end+2) = varargin(1:end);
 271 |     varargin{1} = useComplex;
 272 |     varargin{2} = funObj;
 273 |     if method ~= NEWTON
 274 |         if debug
 275 |             if useComplex
 276 |                 fprintf('Using complex differentials for gradient computation\n');
 277 |             else
 278 |                 fprintf('Using finite differences for gradient computation\n');
 279 |             end
 280 |         end
 281 |         funObj = @autoGrad;
 282 |     else
 283 |         if debug
 284 |             if useComplex
 285 |                 fprintf('Using complex differentials for gradient computation\n');
 286 |             else
 287 |                 fprintf('Using finite differences for gradient computation\n');
 288 |             end
 289 |         end
 290 |         funObj = @autoHess;
 291 |     end
 292 | 
 293 |     if method == NEWTON0 && useComplex == 1
 294 |         if debug
 295 |             fprintf('Turning off the use of complex differentials\n');
 296 |         end
 297 |         useComplex = 0;
 298 |     end
 299 | 
 300 |     if useComplex
 301 |         funEvalMultiplier = p;
 302 |     else
 303 |         funEvalMultiplier = p+1;
 304 |     end
 305 | end
 306 | 
 307 | % Evaluate Initial Point
 308 | if method < NEWTON
 309 |     [f,g] = feval(funObj, x, varargin{:});
 310 | else
 311 |     [f,g,H] = feval(funObj, x, varargin{:});
 312 |     computeHessian = 1;
 313 | end
 314 | funEvals = 1;
 315 | 
 316 | if strcmp(DerivativeCheck,'on')
 317 |     if numDiff
 318 |         fprintf('Can not do derivative checking when numDiff is 1\n');
 319 |     end
 320 |     % Check provided gradient/hessian function using numerical derivatives
 321 |     fprintf('Checking Gradient:\n');
 322 |     [f2,g2] = autoGrad(x,useComplex,funObj,varargin{:});
 323 | 
 324 |     fprintf('Max difference between user and numerical gradient: %f\n',max(abs(g-g2)));
 325 |     if max(abs(g-g2)) > 1e-4
 326 |         fprintf('User NumDif:\n');
 327 |         [g g2]
 328 |         diff = abs(g-g2)
 329 |         pause;
 330 |     end
 331 | 
 332 |     if method >= NEWTON
 333 |         fprintf('Check Hessian:\n');
 334 |         [f2,g2,H2] = autoHess(x,useComplex,funObj,varargin{:});
 335 | 
 336 |         fprintf('Max difference between user and numerical hessian: %f\n',max(abs(H(:)-H2(:))));
 337 |         if max(abs(H(:)-H2(:))) > 1e-4
 338 |             H
 339 |             H2
 340 |             diff = abs(H-H2)
 341 |             pause;
 342 |         end
 343 |     end
 344 | end
 345 | 
 346 | % Output Log
 347 | if verboseI
 348 |     fprintf('%10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond');
 349 | end
 350 | 
 351 | if logfile
 352 |     fid = fopen(logfile, 'a');
 353 |     if (fid > 0)
 354 |         fprintf(fid, '-- %10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond');
 355 |         fclose(fid);
 356 |     end
 357 | end
 358 | 
 359 | % Output Function
 360 | if ~isempty(outputFcn)
 361 |     callOutput(outputFcn,x,'init',0,funEvals,f,[],[],g,[],sum(abs(g)),varargin{:});
 362 | end
 363 | 
 364 | % Initialize Trace
 365 | trace.fval = f;
 366 | trace.funcCount = funEvals;
 367 | 
 368 | % Check optimality of initial point
 369 | if sum(abs(g)) <= tolFun
 370 |     exitflag=1;
 371 |     msg = 'Optimality Condition below TolFun';
 372 |     if verbose
 373 |         fprintf('%s\n',msg);
 374 |     end
 375 |     if nargout > 3
 376 |         output = struct('iterations',0,'funcCount',1,...
 377 |             'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace);
 378 |     end
 379 |     return;
 380 | end
 381 | 
 382 | % Perform up to a maximum of 'maxIter' descent steps:
 383 | for i = 1:maxIter
 384 | 
 385 |     % ****************** COMPUTE DESCENT DIRECTION *****************
 386 | 
 387 |     switch method
 388 |         case SD % Steepest Descent
 389 |             d = -g;
 390 | 
 391 |         case CSD % Cyclic Steepest Descent
 392 | 
 393 |             if mod(i,cycle) == 1 % Use Steepest Descent
 394 |                 alpha = 1;
 395 |                 LS_init = 2;
 396 |                 LS = 4; % Precise Line Search
 397 |             elseif mod(i,cycle) == mod(1+1,cycle) % Use Previous Step
 398 |                 alpha = t;
 399 |                 LS_init = 0;
 400 |                 LS = 2; % Non-monotonic line search
 401 |             end
 402 |             d = -alpha*g;
 403 | 
 404 |         case BB % Steepest Descent with Barzilai and Borwein Step Length
 405 | 
 406 |             if i == 1
 407 |                 d = -g;
 408 |             else
 409 |                 y = g-g_old;
 410 |                 s = t*d;
 411 |                 if bbType == 0
 412 |                     yy = y'*y;
 413 |                     alpha = (s'*y)/(yy);
 414 |                     if alpha <= 1e-10 || alpha > 1e10
 415 |                         alpha = 1;
 416 |                     end
 417 |                 elseif bbType == 1
 418 |                     sy = s'*y;
 419 |                     alpha = (s'*s)/sy;
 420 |                     if alpha <= 1e-10 || alpha > 1e10
 421 |                         alpha = 1;
 422 |                     end
 423 |                 elseif bbType == 2 % Conic Interpolation ('Modified BB')
 424 |                     sy = s'*y;
 425 |                     ss = s'*s;
 426 |                     alpha = ss/sy;
 427 |                     if alpha <= 1e-10 || alpha > 1e10
 428 |                         alpha = 1;
 429 |                     end
 430 |                     alphaConic = ss/(6*(myF_old - f) + 4*g'*s + 2*g_old'*s);
 431 |                     if alphaConic > .001*alpha && alphaConic < 1000*alpha
 432 |                         alpha = alphaConic;
 433 |                     end
 434 |                 elseif bbType == 3 % Gradient Method with retards (bb type 1, random selection of previous step)
 435 |                     sy = s'*y;
 436 |                     alpha = (s'*s)/sy;
 437 |                     if alpha <= 1e-10 || alpha > 1e10
 438 |                         alpha = 1;
 439 |                     end
 440 |                     v(1+mod(i-2,5)) = alpha;
 441 |                     alpha = v(ceil(rand*length(v)));
 442 |                 end
 443 |                 d = -alpha*g;
 444 |             end
 445 |             g_old = g;
 446 |             myF_old = f;
 447 | 
 448 | 
 449 |         case CG % Non-Linear Conjugate Gradient
 450 | 
 451 |             if i == 1
 452 |                 d = -g; % Initially use steepest descent direction
 453 |             else
 454 |                 gtgo = g'*g_old;
 455 |                 gotgo = g_old'*g_old;
 456 | 
 457 |                 if cgUpdate == 0
 458 |                     % Fletcher-Reeves
 459 |                     beta = (g'*g)/(gotgo);
 460 |                 elseif cgUpdate == 1
 461 |                     % Polak-Ribiere
 462 |                     beta = (g'*(g-g_old)) /(gotgo);
 463 |                 elseif cgUpdate == 2
 464 |                     % Hestenes-Stiefel
 465 |                     beta = (g'*(g-g_old))/((g-g_old)'*d);
 466 |                 else
 467 |                     % Gilbert-Nocedal
 468 |                     beta_FR = (g'*(g-g_old)) /(gotgo);
 469 |                     beta_PR = (g'*g-gtgo)/(gotgo);
 470 |                     beta = max(-beta_FR,min(beta_PR,beta_FR));
 471 |                 end
 472 | 
 473 |                 d = -g + beta*d;
 474 | 
 475 |                 % Restart if not a direction of sufficient descent
 476 |                 if g'*d > -tolX
 477 |                     if debug
 478 |                         fprintf('Restarting CG\n');
 479 |                     end
 480 |                     beta = 0;
 481 |                     d = -g;
 482 |                 end
 483 | 
 484 |                 % Old restart rule:
 485 |                 %if beta < 0 || abs(gtgo)/(gotgo) >= 0.1 || g'*d >= 0
 486 | 
 487 |             end
 488 |             g_old = g;
 489 | 
 490 |         case PCG % Preconditioned Non-Linear Conjugate Gradient
 491 | 
 492 |             % Apply preconditioner to negative gradient
 493 |             if isempty(precFunc)
 494 |                 % Use L-BFGS Preconditioner
 495 |                 if i == 1
 496 |                     old_dirs = zeros(length(g),0);
 497 |                     old_stps = zeros(length(g),0);
 498 |                     Hdiag = 1;
 499 |                     s = -g;
 500 |                 else
 501 |                     [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
 502 | 
 503 |                     if useMex
 504 |                         s = lbfgsC(-g,old_dirs,old_stps,Hdiag);
 505 |                     else
 506 |                         s = lbfgs(-g,old_dirs,old_stps,Hdiag);
 507 |                     end
 508 |                 end
 509 |             else % User-supplied preconditioner
 510 |                 s = precFunc(-g,x,varargin{:});
 511 |             end
 512 | 
 513 |             if i == 1
 514 |                 d = s;
 515 |             else
 516 | 
 517 |                 if cgUpdate == 0
 518 |                     % Preconditioned Fletcher-Reeves
 519 |                     beta = (g'*s)/(g_old'*s_old);
 520 |                 elseif cgUpdate < 3
 521 |                     % Preconditioned Polak-Ribiere
 522 |                     beta = (g'*(s-s_old))/(g_old'*s_old);
 523 |                 else
 524 |                     % Preconditioned Gilbert-Nocedal
 525 |                     beta_FR = (g'*s)/(g_old'*s_old);
 526 |                     beta_PR = (g'*(s-s_old))/(g_old'*s_old);
 527 |                     beta = max(-beta_FR,min(beta_PR,beta_FR));
 528 |                 end
 529 |                 d = s + beta*d;
 530 | 
 531 |                 if g'*d > -tolX
 532 |                     if debug
 533 |                         fprintf('Restarting CG\n');
 534 |                     end
 535 |                     beta = 0;
 536 |                     d = s;
 537 |                 end
 538 | 
 539 |             end
 540 |             g_old = g;
 541 |             s_old = s;
 542 |         case LBFGS % L-BFGS
 543 | 
 544 |             % Update the direction and step sizes
 545 | 
 546 |             if i == 1
 547 |                 d = -g; % Initially use steepest descent direction
 548 |                 old_dirs = zeros(length(g),0);
 549 |                 old_stps = zeros(length(d),0);
 550 |                 Hdiag = 1;
 551 |             else
 552 |                 if Damped
 553 |                     [old_dirs,old_stps,Hdiag] = dampedUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
 554 |                 else
 555 |                     [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
 556 |                 end
 557 | 
 558 |                 if useMex
 559 |                     d = lbfgsC(-g,old_dirs,old_stps,Hdiag);
 560 |                 else
 561 |                     d = lbfgs(-g,old_dirs,old_stps,Hdiag);
 562 |                 end
 563 |             end
 564 |             g_old = g;
 565 | 
 566 |         case QNEWTON % Use quasi-Newton Hessian approximation
 567 | 
 568 |             if i == 1
 569 |                 d = -g;
 570 |             else
 571 |                 % Compute difference vectors
 572 |                 y = g-g_old;
 573 |                 s = t*d;
 574 | 
 575 |                 if i == 2
 576 |                     % Make initial Hessian approximation
 577 |                     if initialHessType == 0
 578 |                         % Identity
 579 |                         if qnUpdate <= 1
 580 |                             R = eye(length(g));
 581 |                         else
 582 |                             H = eye(length(g));
 583 |                         end
 584 |                     else
 585 |                         % Scaled Identity
 586 |                         if debug
 587 |                             fprintf('Scaling Initial Hessian Approximation\n');
 588 |                         end
 589 |                         if qnUpdate <= 1
 590 |                             % Use Cholesky of Hessian approximation
 591 |                             R = sqrt((y'*y)/(y'*s))*eye(length(g));
 592 |                         else
 593 |                             % Use Inverse of Hessian approximation
 594 |                             H = eye(length(g))*(y'*s)/(y'*y);
 595 |                         end
 596 |                     end
 597 |                 end
 598 | 
 599 |                 if qnUpdate == 0 % Use BFGS updates
 600 |                     Bs = R'*(R*s);
 601 |                     if Damped
 602 |                         eta = .02;
 603 |                         if y'*s < eta*s'*Bs
 604 |                             if debug
 605 |                                 fprintf('Damped Update\n');
 606 |                             end
 607 |                             theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1);
 608 |                             y = theta*y + (1-theta)*Bs;
 609 |                         end
 610 |                         R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
 611 |                     else
 612 |                         if y'*s > 1e-10
 613 |                             R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
 614 |                         else
 615 |                             if debug
 616 |                                 fprintf('Skipping Update\n');
 617 |                             end
 618 |                         end
 619 |                     end
 620 |                 elseif qnUpdate == 1 % Perform SR1 Update if it maintains positive-definiteness
 621 | 
 622 |                     Bs = R'*(R*s);
 623 |                     ymBs = y-Bs;
 624 |                     if abs(s'*ymBs) >= norm(s)*norm(ymBs)*1e-8 && (s-((R\(R'\y))))'*y > 1e-10
 625 |                         R = cholupdate(R,-ymBs/sqrt(ymBs'*s),'-');
 626 |                     else
 627 |                         if debug
 628 |                             fprintf('SR1 not positive-definite, doing BFGS Update\n');
 629 |                         end
 630 |                         if Damped
 631 |                             eta = .02;
 632 |                             if y'*s < eta*s'*Bs
 633 |                                 if debug
 634 |                                     fprintf('Damped Update\n');
 635 |                                 end
 636 |                                 theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1);
 637 |                                 y = theta*y + (1-theta)*Bs;
 638 |                             end
 639 |                             R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
 640 |                         else
 641 |                             if y'*s > 1e-10
 642 |                                 R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-');
 643 |                             else
 644 |                                 if debug
 645 |                                     fprintf('Skipping Update\n');
 646 |                                 end
 647 |                             end
 648 |                         end
 649 |                     end
 650 |                 elseif qnUpdate == 2 % Use Hoshino update
 651 |                     v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y));
 652 |                     phi = 1/(1 + (y'*H*y)/(s'*y));
 653 |                     H = H + (s*s')/(s'*y) - (H*y*y'*H)/(y'*H*y) + phi*v*v';
 654 | 
 655 |                 elseif qnUpdate == 3 % Self-Scaling BFGS update
 656 |                     ys = y'*s;
 657 |                     Hy = H*y;
 658 |                     yHy = y'*Hy;
 659 |                     gamma = ys/yHy;
 660 |                     v = sqrt(yHy)*(s/ys - Hy/yHy);
 661 |                     H = gamma*(H - Hy*Hy'/yHy + v*v') + (s*s')/ys;
 662 |                 elseif qnUpdate == 4 % Oren's Self-Scaling Variable Metric update
 663 | 
 664 |                     % Oren's method
 665 |                     if (s'*y)/(y'*H*y) > 1
 666 |                         phi = 1; % BFGS
 667 |                         omega = 0;
 668 |                     elseif (s'*(H\s))/(s'*y) < 1
 669 |                         phi = 0; % DFP
 670 |                         omega = 1;
 671 |                     else
 672 |                         phi = (s'*y)*(y'*H*y-s'*y)/((s'*(H\s))*(y'*H*y)-(s'*y)^2);
 673 |                         omega = phi;
 674 |                     end
 675 | 
 676 |                     gamma = (1-omega)*(s'*y)/(y'*H*y) + omega*(s'*(H\s))/(s'*y);
 677 |                     v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y));
 678 |                     H = gamma*(H - (H*y*y'*H)/(y'*H*y) + phi*v*v') + (s*s')/(s'*y);
 679 | 
 680 |                 elseif qnUpdate == 5 % McCormick-Huang asymmetric update
 681 |                     theta = 1;
 682 |                     phi = 0;
 683 |                     psi = 1;
 684 |                     omega = 0;
 685 |                     t1 = s*(theta*s + phi*H'*y)';
 686 |                     t2 = (theta*s + phi*H'*y)'*y;
 687 |                     t3 = H*y*(psi*s + omega*H'*y)';
 688 |                     t4 = (psi*s + omega*H'*y)'*y;
 689 |                     H = H + t1/t2 - t3/t4;
 690 |                 end
 691 | 
 692 |                 if qnUpdate <= 1
 693 |                     d = -R\(R'\g);
 694 |                 else
 695 |                     d = -H*g;
 696 |                 end
 697 | 
 698 |             end
 699 |             g_old = g;
 700 | 
 701 |         case NEWTON0 % Hessian-Free Newton
 702 | 
 703 |             cgMaxIter = min(p,maxFunEvals-funEvals);
 704 |             cgForce = min(0.5,sqrt(norm(g)))*norm(g);
 705 | 
 706 |             % Set-up preconditioner
 707 |             precondFunc = [];
 708 |             precondArgs = [];
 709 |             if cgSolve == 1
 710 |                 if isempty(precFunc) % Apply L-BFGS preconditioner
 711 |                     if i == 1
 712 |                         old_dirs = zeros(length(g),0);
 713 |                         old_stps = zeros(length(g),0);
 714 |                         Hdiag = 1;
 715 |                     else
 716 |                         [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
 717 |                         if useMex
 718 |                             precondFunc = @lbfgsC;
 719 |                         else
 720 |                             precondFunc = @lbfgs;
 721 |                         end
 722 |                         precondArgs = {old_dirs,old_stps,Hdiag};
 723 |                     end
 724 |                     g_old = g;
 725 |                 else
 726 |                     % Apply user-defined preconditioner
 727 |                     precondFunc = precFunc;
 728 |                     precondArgs = {x,varargin{:}};
 729 |                 end
 730 |             end
 731 | 
 732 |             % Solve Newton system using cg and hessian-vector products
 733 |             if isempty(HvFunc)
 734 |                 % No user-supplied Hessian-vector function,
 735 |                 % use automatic differentiation
 736 |                 HvFun = @autoHv;
 737 |                 HvArgs = {x,g,useComplex,funObj,varargin{:}};
 738 |             else
 739 |                 % Use user-supplid Hessian-vector function
 740 |                 HvFun = HvFunc;
 741 |                 HvArgs = {x,varargin{:}};
 742 |             end
 743 |             
 744 |             if useNegCurv
 745 |                 [d,cgIter,cgRes,negCurv] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs);
 746 |             else
 747 |                 [d,cgIter,cgRes] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs);
 748 |             end
 749 | 
 750 |             funEvals = funEvals+cgIter;
 751 |             if debug
 752 |                 fprintf('newtonCG stopped on iteration %d w/ residual %.5e\n',cgIter,cgRes);
 753 | 
 754 |             end
 755 | 
 756 |             if useNegCurv
 757 |                 if ~isempty(negCurv)
 758 |                     %if debug
 759 |                     fprintf('Using negative curvature direction\n');
 760 |                     %end
 761 |                     d = negCurv/norm(negCurv);
 762 |                     d = d/sum(abs(g));
 763 |                 end
 764 |             end
 765 | 
 766 |         case NEWTON % Newton search direction
 767 | 
 768 |             if cgSolve == 0
 769 |                 if HessianModify == 0
 770 |                     % Attempt to perform a Cholesky factorization of the Hessian
 771 |                     [R,posDef] = chol(H);
 772 | 
 773 |                     % If the Cholesky factorization was successful, then the Hessian is
 774 |                     % positive definite, solve the system
 775 |                     if posDef == 0
 776 |                         d = -R\(R'\g);
 777 | 
 778 |                     else
 779 |                         % otherwise, adjust the Hessian to be positive definite based on the
 780 |                         % minimum eigenvalue, and solve with QR
 781 |                         % (expensive, we don't want to do this very much)
 782 |                         if debug
 783 |                             fprintf('Adjusting Hessian\n');
 784 |                         end
 785 |                         H = H + eye(length(g)) * max(0,1e-12 - min(real(eig(H))));
 786 |                         d = -H\g;
 787 |                     end
 788 |                 elseif HessianModify == 1
 789 |                     % Modified Incomplete Cholesky
 790 |                     R = mcholinc(H,debug);
 791 |                     d = -R\(R'\g);
 792 |                 elseif HessianModify == 2
 793 |                     % Modified Generalized Cholesky
 794 |                     if useMex
 795 |                         [L D perm] = mcholC(H);
 796 |                     else
 797 |                         [L D perm] = mchol(H);
 798 |                     end
 799 |                     d(perm) = -L' \ ((D.^-1).*(L \ g(perm)));
 800 | 
 801 |                 elseif HessianModify == 3
 802 |                     % Modified Spectral Decomposition
 803 |                     [V,D] = eig((H+H')/2);
 804 |                     D = diag(D);
 805 |                     D = max(abs(D),max(max(abs(D)),1)*1e-12);
 806 |                     d = -V*((V'*g)./D);
 807 |                 elseif HessianModify == 4
 808 |                     % Modified Symmetric Indefinite Factorization
 809 |                     [L,D,perm] = ldl(H,'vector');
 810 |                     [blockPos junk] = find(triu(D,1));
 811 |                     for diagInd = setdiff(setdiff(1:p,blockPos),blockPos+1)
 812 |                         if D(diagInd,diagInd) < 1e-12
 813 |                             D(diagInd,diagInd) = 1e-12;
 814 |                         end
 815 |                     end
 816 |                     for blockInd = blockPos'
 817 |                         block = D(blockInd:blockInd+1,blockInd:blockInd+1);
 818 |                         block_a = block(1);
 819 |                         block_b = block(2);
 820 |                         block_d = block(4);
 821 |                         lambda = (block_a+block_d)/2 - sqrt(4*block_b^2 + (block_a - block_d)^2)/2;
 822 |                         D(blockInd:blockInd+1,blockInd:blockInd+1) = block+eye(2)*(lambda+1e-12);
 823 |                     end
 824 |                     d(perm) = -L' \ (D \ (L \ g(perm)));
 825 |                 else
 826 |                     % Take Newton step if Hessian is pd,
 827 |                     % otherwise take a step with negative curvature
 828 |                     [R,posDef] = chol(H);
 829 |                     if posDef == 0
 830 |                         d = -R\(R'\g);
 831 |                     else
 832 |                         if debug
 833 |                             fprintf('Taking Direction of Negative Curvature\n');
 834 |                         end
 835 |                         [V,D] = eig(H);
 836 |                         u = V(:,1);
 837 |                         d = -sign(u'*g)*u;
 838 |                     end
 839 |                 end
 840 | 
 841 |             else
 842 |                 % Solve with Conjugate Gradient
 843 |                 cgMaxIter = p;
 844 |                 cgForce = min(0.5,sqrt(norm(g)))*norm(g);
 845 | 
 846 |                 % Select Preconditioner
 847 |                 if cgSolve == 1
 848 |                     % No preconditioner
 849 |                     precondFunc = [];
 850 |                     precondArgs = [];
 851 |                 elseif cgSolve == 2
 852 |                     % Diagonal preconditioner
 853 |                     precDiag = diag(H);
 854 |                     precDiag(precDiag < 1e-12) = 1e-12 - min(precDiag);
 855 |                     precondFunc = @precondDiag;
 856 |                     precondArgs = {precDiag.^-1};
 857 |                 elseif cgSolve == 3
 858 |                     % L-BFGS preconditioner
 859 |                     if i == 1
 860 |                         old_dirs = zeros(length(g),0);
 861 |                         old_stps = zeros(length(g),0);
 862 |                         Hdiag = 1;
 863 |                     else
 864 |                         [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag);
 865 |                     end
 866 |                     g_old = g;
 867 |                     if useMex
 868 |                         precondFunc = @lbfgsC;
 869 |                     else
 870 |                         precondFunc = @lbfgs;
 871 |                     end
 872 |                     precondArgs = {old_dirs,old_stps,Hdiag};
 873 |                 elseif cgSolve > 0
 874 |                     % Symmetric Successive Overelaxation Preconditioner
 875 |                     omega = cgSolve;
 876 |                     D = diag(H);
 877 |                     D(D < 1e-12) = 1e-12 - min(D);
 878 |                     precDiag = (omega/(2-omega))*D.^-1;
 879 |                     precTriu = diag(D/omega) + triu(H,1);
 880 |                     precondFunc = @precondTriuDiag;
 881 |                     precondArgs = {precTriu,precDiag.^-1};
 882 |                 else
 883 |                     % Incomplete Cholesky Preconditioner
 884 |                     opts.droptol = -cgSolve;
 885 |                     opts.rdiag = 1;
 886 |                     R = cholinc(sparse(H),opts);
 887 |                     if min(diag(R)) < 1e-12
 888 |                         R = cholinc(sparse(H + eye*(1e-12 - min(diag(R)))),opts);
 889 |                     end
 890 |                     precondFunc = @precondTriu;
 891 |                     precondArgs = {R};
 892 |                 end
 893 | 
 894 |                 % Run cg with the appropriate preconditioner
 895 |                 if isempty(HvFunc)
 896 |                     % No user-supplied Hessian-vector function
 897 |                     [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs);
 898 |                 else
 899 |                     % Use user-supplied Hessian-vector function
 900 |                     [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFunc,{x,varargin{:}});
 901 |                 end
 902 |                 if debug
 903 |                     fprintf('CG stopped after %d iterations w/ residual %.5e\n',cgIter,cgRes);
 904 |                     %funEvals = funEvals + cgIter;
 905 |                 end
 906 |             end
 907 | 
 908 |         case TENSOR % Tensor Method
 909 | 
 910 |             if numDiff
 911 |                 % Compute 3rd-order Tensor Numerically
 912 |                 [junk1 junk2 junk3 T] = autoTensor(x,useComplex,funObj,varargin{:});
 913 |             else
 914 |                 % Use user-supplied 3rd-derivative Tensor
 915 |                 [junk1 junk2 junk3 T] = feval(funObj, x, varargin{:});
 916 |             end
 917 |             options_sub.Method = 'newton';
 918 |             options_sub.Display = 'none';
 919 |             options_sub.TolX = tolX;
 920 |             options_sub.TolFun = tolFun;
 921 |             d = minFunc(@taylorModel,zeros(p,1),options_sub,f,g,H,T);
 922 | 
 923 |             if any(abs(d) > 1e5) || all(abs(d) < 1e-5) || g'*d > -tolX
 924 |                 if debug
 925 |                     fprintf('Using 2nd-Order Step\n');
 926 |                 end
 927 |                 [V,D] = eig((H+H')/2);
 928 |                 D = diag(D);
 929 |                 D = max(abs(D),max(max(abs(D)),1)*1e-12);
 930 |                 d = -V*((V'*g)./D);
 931 |             else
 932 |                 if debug
 933 |                     fprintf('Using 3rd-Order Step\n');
 934 |                 end
 935 |             end
 936 |     end
 937 | 
 938 |     if ~isLegal(d)
 939 |         fprintf('Step direction is illegal!\n');
 940 |         pause;
 941 |         return
 942 |     end
 943 | 
 944 |     % ****************** COMPUTE STEP LENGTH ************************
 945 | 
 946 |     % Directional Derivative
 947 |     gtd = g'*d;
 948 | 
 949 |     % Check that progress can be made along direction
 950 |     if gtd > -tolX
 951 |         exitflag=2;
 952 |         msg = 'Directional Derivative below TolX';
 953 |         break;
 954 |     end
 955 | 
 956 |     % Select Initial Guess
 957 |     if i == 1
 958 |         if method < NEWTON0
 959 |             t = min(1,1/sum(abs(g)));
 960 |         else
 961 |             t = 1;
 962 |         end
 963 |     else
 964 |         if LS_init == 0
 965 |             % Newton step
 966 |             t = 1;
 967 |         elseif LS_init == 1
 968 |             % Close to previous step length
 969 |             t = t*min(2,(gtd_old)/(gtd));
 970 |         elseif LS_init == 2
 971 |             % Quadratic Initialization based on {f,g} and previous f
 972 |             t = min(1,2*(f-f_old)/(gtd));
 973 |         elseif LS_init == 3
 974 |             % Double previous step length
 975 |             t = min(1,t*2);
 976 |         elseif LS_init == 4
 977 |             % Scaled step length if possible
 978 |             if isempty(HvFunc)
 979 |                 % No user-supplied Hessian-vector function,
 980 |                 % use automatic differentiation
 981 |                 dHd = d'*autoHv(d,x,g,0,funObj,varargin{:});
 982 |             else
 983 |                 % Use user-supplid Hessian-vector function
 984 |                 dHd = d'*HvFunc(d,x,varargin{:});
 985 |             end
 986 | 
 987 |             funEvals = funEvals + 1;
 988 |             if dHd > 0
 989 |                 t = -gtd/(dHd);
 990 |             else
 991 |                 t = min(1,2*(f-f_old)/(gtd));
 992 |             end
 993 |         end
 994 | 
 995 |         if t <= 0
 996 |             t = 1;
 997 |         end
 998 |     end
 999 |     f_old = f;
1000 |     gtd_old = gtd;
1001 | 
1002 |     % Compute reference fr if using non-monotone objective
1003 |     if Fref == 1
1004 |         fr = f;
1005 |     else
1006 |         if i == 1
1007 |             old_fvals = repmat(-inf,[Fref 1]);
1008 |         end
1009 | 
1010 |         if i <= Fref
1011 |             old_fvals(i) = f;
1012 |         else
1013 |             old_fvals = [old_fvals(2:end);f];
1014 |         end
1015 |         fr = max(old_fvals);
1016 |     end
1017 | 
1018 |     computeHessian = 0;
1019 |     if method >= NEWTON
1020 |         if HessianIter == 1
1021 |             computeHessian = 1;
1022 |         elseif i > 1 && mod(i-1,HessianIter) == 0
1023 |             computeHessian = 1;
1024 |         end
1025 |     end
1026 | 
1027 |     % Line Search
1028 |     f_old = f;
1029 |     if LS < 3 % Use Armijo Bactracking
1030 |         % Perform Backtracking line search
1031 |         if computeHessian
1032 |             [t,x,f,g,LSfunEvals,H] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:});
1033 |         else
1034 |             [t,x,f,g,LSfunEvals] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,1,funObj,varargin{:});
1035 |         end
1036 |         funEvals = funEvals + LSfunEvals;
1037 | 
1038 |     elseif LS < 6
1039 |         % Find Point satisfying Wolfe
1040 | 
1041 |         if computeHessian
1042 |             [t,f,g,LSfunEvals,H] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:});
1043 |         else
1044 |             [t,f,g,LSfunEvals] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,1,funObj,varargin{:});
1045 |         end
1046 |         funEvals = funEvals + LSfunEvals;
1047 |         x = x + t*d;
1048 |      
1049 |     else
1050 |         % Use Matlab optim toolbox line search
1051 |         [t,f_new,fPrime_new,g_new,LSexitFlag,LSiter]=...
1052 |             lineSearch({'fungrad',[],funObj},x,p,1,p,d,f,gtd,t,c1,c2,-inf,maxFunEvals-funEvals,...
1053 |             tolX,[],[],[],varargin{:});
1054 |         funEvals = funEvals + LSiter;
1055 |         if isempty(t)
1056 |             exitflag = -2;
1057 |             msg = 'Matlab LineSearch failed';
1058 |             break;
1059 |         end
1060 | 
1061 |         if method >= NEWTON
1062 |                                    
1063 |             [f_new,g_new,H] = funObj(x + t*d,varargin{:});
1064 |             funEvals = funEvals + 1;
1065 |         end
1066 |         x = x + t*d;
1067 |         
1068 |         f = f_new;
1069 |         g = g_new;
1070 |     end
1071 | 
1072 |     % Output iteration information
1073 |     if verboseI
1074 |         fprintf('%10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g)));
1075 |     end
1076 | 
1077 |     if logfile
1078 |         fid = fopen(logfile, 'a');
1079 |         if (fid > 0)
1080 |             fprintf(fid, '-- %10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g)));
1081 |             fclose(fid);
1082 |         end
1083 |     end
1084 | 
1085 |     
1086 |     % Output Function
1087 |     if ~isempty(outputFcn)
1088 |         callOutput(outputFcn,x,'iter',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:});
1089 |     end
1090 | 
1091 |     % Update Trace
1092 |     trace.fval(end+1,1) = f;
1093 |     trace.funcCount(end+1,1) = funEvals;
1094 | 
1095 |     % Check Optimality Condition
1096 |     if sum(abs(g)) <= tolFun
1097 |         exitflag=1;
1098 |         msg = 'Optimality Condition below TolFun';
1099 |         break;
1100 |     end
1101 | 
1102 |     % ******************* Check for lack of progress *******************
1103 | 
1104 |     if sum(abs(t*d)) <= tolX
1105 |         exitflag=2;
1106 |         msg = 'Step Size below TolX';
1107 |         break;
1108 |     end
1109 | 
1110 | 
1111 |     if abs(f-f_old) < tolX
1112 |         exitflag=2;
1113 |         msg = 'Function Value changing by less than TolX';
1114 |         break;
1115 |     end
1116 | 
1117 |     % ******** Check for going over iteration/evaluation limit *******************
1118 | 
1119 |     if funEvals*funEvalMultiplier > maxFunEvals
1120 |         exitflag = 0;
1121 |         msg = 'Exceeded Maximum Number of Function Evaluations';
1122 |         break;
1123 |     end
1124 | 
1125 |     if i == maxIter
1126 |         exitflag = 0;
1127 |         msg='Exceeded Maximum Number of Iterations';
1128 |         break;
1129 |     end
1130 | 
1131 | end
1132 | 
1133 | if verbose
1134 |     fprintf('%s\n',msg);
1135 | end
1136 | if nargout > 3
1137 |     output = struct('iterations',i,'funcCount',funEvals*funEvalMultiplier,...
1138 |         'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace);
1139 | end
1140 | 
1141 | % Output Function
1142 | if ~isempty(outputFcn)
1143 |     callOutput(outputFcn,x,'done',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:});
1144 | end
1145 | 
1146 | end
1147 | 
1148 | 


--------------------------------------------------------------------------------
/minFunc/minFunc_processInputOptions.m:
--------------------------------------------------------------------------------
  1 | 
  2 | function [verbose,verboseI,debug,doPlot,maxFunEvals,maxIter,tolFun,tolX,method,...
  3 |     corrections,c1,c2,LS_init,LS,cgSolve,qnUpdate,cgUpdate,initialHessType,...
  4 |     HessianModify,Fref,useComplex,numDiff,LS_saveHessianComp,...
  5 |     DerivativeCheck,Damped,HvFunc,bbType,cycle,...
  6 |     HessianIter,outputFcn,useMex,useNegCurv,precFunc] = ...
  7 |     minFunc_processInputOptions(o)
  8 | 
  9 | % Constants
 10 | SD = 0;
 11 | CSD = 1;
 12 | BB = 2;
 13 | CG = 3;
 14 | PCG = 4;
 15 | LBFGS = 5;
 16 | QNEWTON = 6;
 17 | NEWTON0 = 7;
 18 | NEWTON = 8;
 19 | TENSOR = 9;
 20 | 
 21 | verbose = 1;
 22 | verboseI= 1;
 23 | debug = 0;
 24 | doPlot = 0;
 25 | method = LBFGS;
 26 | cgSolve = 0;
 27 | 
 28 | o = toUpper(o);
 29 | 
 30 | if isfield(o,'DISPLAY')
 31 |     switch(upper(o.DISPLAY))
 32 |         case 0
 33 |             verbose = 0;
 34 |             verboseI = 0;
 35 |         case 'FINAL'
 36 |             verboseI = 0;
 37 |         case 'OFF'
 38 |             verbose = 0;
 39 |             verboseI = 0;
 40 |         case 'NONE'
 41 |             verbose = 0;
 42 |             verboseI = 0;
 43 |         case 'FULL'
 44 |             debug = 1;
 45 |         case 'EXCESSIVE'
 46 |             debug = 1;
 47 |             doPlot = 1;
 48 |     end
 49 | end
 50 | 
 51 | 
 52 | LS_init = 0;
 53 | c2 = 0.9;
 54 | LS = 4;
 55 | Fref = 1;
 56 | Damped = 0;
 57 | HessianIter = 1;
 58 | if isfield(o,'METHOD')
 59 |     m = upper(o.METHOD);
 60 |     switch(m)
 61 |         case 'TENSOR'
 62 |             method = TENSOR;
 63 |         case 'NEWTON'
 64 |             method = NEWTON;
 65 |         case 'MNEWTON'
 66 |             method = NEWTON;
 67 |             HessianIter = 5;
 68 |         case 'PNEWTON0'
 69 |             method = NEWTON0;
 70 |             cgSolve = 1;
 71 |         case 'NEWTON0'
 72 |             method = NEWTON0;
 73 |         case 'QNEWTON'
 74 |             method = QNEWTON;
 75 |             Damped = 1;
 76 |         case 'LBFGS'
 77 |             method = LBFGS;
 78 |         case 'BB'
 79 |             method = BB;
 80 |             LS = 2;
 81 |             Fref = 20;
 82 |         case 'PCG'
 83 |             method = PCG;
 84 |             c2 = 0.2;
 85 |             LS_init = 2;
 86 |         case 'SCG'
 87 |             method = CG;
 88 |             c2 = 0.2;
 89 |             LS_init = 4;
 90 |         case 'CG'
 91 |             method = CG;
 92 |             c2 = 0.2;
 93 |             LS_init = 2;
 94 |         case 'CSD'
 95 |             method = CSD;
 96 |             c2 = 0.2;
 97 |             Fref = 10;
 98 |             LS_init = 2;
 99 |         case 'SD'
100 |             method = SD;
101 |             LS_init = 2;
102 |     end
103 | end
104 | 
105 | maxFunEvals = getOpt(o,'MAXFUNEVALS',1000);
106 | maxIter = getOpt(o,'MAXITER',500);
107 | tolFun = getOpt(o,'TOLFUN',1e-5);
108 | tolX = getOpt(o,'TOLX',1e-9);
109 | corrections = getOpt(o,'CORR',100);
110 | c1 = getOpt(o,'C1',1e-4);
111 | c2 = getOpt(o,'C2',c2);
112 | LS_init = getOpt(o,'LS_INIT',LS_init);
113 | LS = getOpt(o,'LS',LS);
114 | cgSolve = getOpt(o,'CGSOLVE',cgSolve);
115 | qnUpdate = getOpt(o,'QNUPDATE',3);
116 | cgUpdate = getOpt(o,'CGUPDATE',2);
117 | initialHessType = getOpt(o,'INITIALHESSTYPE',1);
118 | HessianModify = getOpt(o,'HESSIANMODIFY',0);
119 | Fref = getOpt(o,'FREF',Fref);
120 | useComplex = getOpt(o,'USECOMPLEX',0);
121 | numDiff = getOpt(o,'NUMDIFF',0);
122 | LS_saveHessianComp = getOpt(o,'LS_SAVEHESSIANCOMP',1);
123 | DerivativeCheck = getOpt(o,'DERIVATIVECHECK',0);
124 | Damped = getOpt(o,'DAMPED',Damped);
125 | HvFunc = getOpt(o,'HVFUNC',[]);
126 | bbType = getOpt(o,'BBTYPE',0);
127 | cycle = getOpt(o,'CYCLE',3);
128 | HessianIter = getOpt(o,'HESSIANITER',HessianIter);
129 | outputFcn = getOpt(o,'OUTPUTFCN',[]);
130 | useMex = getOpt(o,'USEMEX',1);
131 | useNegCurv = getOpt(o,'USENEGCURV',1);
132 | precFunc = getOpt(o,'PRECFUNC',[]);
133 | end
134 | 
135 | function [v] = getOpt(options,opt,default)
136 | if isfield(options,opt)
137 |     if ~isempty(getfield(options,opt))
138 |         v = getfield(options,opt);
139 |     else
140 |         v = default;
141 |     end
142 | else
143 |     v = default;
144 | end
145 | end
146 | 
147 | function [o] = toUpper(o)
148 | if ~isempty(o)
149 |     fn = fieldnames(o);
150 |     for i = 1:length(fn)
151 |         o = setfield(o,upper(fn{i}),getfield(o,fn{i}));
152 |     end
153 | end
154 | end


--------------------------------------------------------------------------------
/minFunc/polyinterp.m:
--------------------------------------------------------------------------------
  1 | function [minPos,fmin] = polyinterp(points,doPlot,xminBound,xmaxBound)
  2 | % function [minPos] = polyinterp(points,doPlot,xminBound,xmaxBound)
  3 | %
  4 | %   Minimum of interpolating polynomial based on function and derivative
  5 | %   values
  6 | %
  7 | %   In can also be used for extrapolation if {xmin,xmax} are outside
  8 | %   the domain of the points.
  9 | %
 10 | %   Input:
 11 | %       points(pointNum,[x f g])
 12 | %       doPlot: set to 1 to plot, default: 0
 13 | %       xmin: min value that brackets minimum (default: min of points)
 14 | %       xmax: max value that brackets maximum (default: max of points)
 15 | %
 16 | %   set f or g to sqrt(-1) if they are not known
 17 | %   the order of the polynomial is the number of known f and g values minus 1
 18 | 
 19 | if nargin < 2
 20 |     doPlot = 0;
 21 | end
 22 | 
 23 | nPoints = size(points,1);
 24 | order = sum(sum((imag(points(:,2:3))==0)))-1;
 25 | 
 26 | % Code for most common case:
 27 | %   - cubic interpolation of 2 points
 28 | %       w/ function and derivative values for both
 29 | %   - no xminBound/xmaxBound
 30 | 
 31 | if nPoints == 2 && order ==3 && nargin <= 2 && doPlot == 0
 32 |     % Solution in this case (where x2 is the farthest point):
 33 |     %    d1 = g1 + g2 - 3*(f1-f2)/(x1-x2);
 34 |     %    d2 = sqrt(d1^2 - g1*g2);
 35 |     %    minPos = x2 - (x2 - x1)*((g2 + d2 - d1)/(g2 - g1 + 2*d2));
 36 |     %    t_new = min(max(minPos,x1),x2);
 37 |     [minVal minPos] = min(points(:,1));
 38 |     notMinPos = -minPos+3;
 39 |     d1 = points(minPos,3) + points(notMinPos,3) - 3*(points(minPos,2)-points(notMinPos,2))/(points(minPos,1)-points(notMinPos,1));
 40 |     d2 = sqrt(d1^2 - points(minPos,3)*points(notMinPos,3));
 41 |     if isreal(d2)
 42 |         t = points(notMinPos,1) - (points(notMinPos,1) - points(minPos,1))*((points(notMinPos,3) + d2 - d1)/(points(notMinPos,3) - points(minPos,3) + 2*d2));
 43 |         minPos = min(max(t,points(minPos,1)),points(notMinPos,1));
 44 |     else
 45 |         minPos = mean(points(:,1));
 46 |     end
 47 |     return;
 48 | end
 49 | 
 50 | xmin = min(points(:,1));
 51 | xmax = max(points(:,1));
 52 | 
 53 | % Compute Bounds of Interpolation Area
 54 | if nargin < 3
 55 |     xminBound = xmin;
 56 | end
 57 | if nargin < 4
 58 |     xmaxBound = xmax;
 59 | end
 60 | 
 61 | % Constraints Based on available Function Values
 62 | A = zeros(0,order+1);
 63 | b = zeros(0,1);
 64 | for i = 1:nPoints
 65 |     if imag(points(i,2))==0
 66 |         constraint = zeros(1,order+1);
 67 |         for j = order:-1:0
 68 |             constraint(order-j+1) = points(i,1)^j;
 69 |         end
 70 |         A = [A;constraint];
 71 |         b = [b;points(i,2)];
 72 |     end
 73 | end
 74 | 
 75 | % Constraints based on available Derivatives
 76 | for i = 1:nPoints
 77 |     if isreal(points(i,3))
 78 |         constraint = zeros(1,order+1);
 79 |         for j = 1:order
 80 |             constraint(j) = (order-j+1)*points(i,1)^(order-j);
 81 |         end
 82 |         A = [A;constraint];
 83 |         b = [b;points(i,3)];
 84 |     end
 85 | end
 86 | 
 87 | % Find interpolating polynomial
 88 | params = A\b;
 89 | 
 90 | % Compute Critical Points
 91 | dParams = zeros(order,1);
 92 | for i = 1:length(params)-1
 93 |     dParams(i) = params(i)*(order-i+1);
 94 | end
 95 | 
 96 | if any(isinf(dParams))
 97 |     cp = [xminBound;xmaxBound;points(:,1)].';
 98 | else
 99 |     cp = [xminBound;xmaxBound;points(:,1);roots(dParams)].';
100 | end
101 | 
102 | % Test Critical Points
103 | fmin = inf;
104 | minPos = (xminBound+xmaxBound)/2; % Default to Bisection if no critical points valid
105 | for xCP = cp
106 |     if imag(xCP)==0 && xCP >= xminBound && xCP <= xmaxBound
107 |         fCP = polyval(params,xCP);
108 |         if imag(fCP)==0 && fCP < fmin
109 |             minPos = real(xCP);
110 |             fmin = real(fCP);
111 |         end
112 |     end
113 | end
114 | % Plot Situation
115 | if doPlot
116 |     figure(1); clf; hold on;
117 | 
118 |     % Plot Points
119 |     plot(points(:,1),points(:,2),'b*');
120 | 
121 |     % Plot Derivatives
122 |     for i = 1:nPoints
123 |         if isreal(points(i,3))
124 |             m = points(i,3);
125 |             b = points(i,2) - m*points(i,1);
126 |             plot([points(i,1)-.05 points(i,1)+.05],...
127 |                 [(points(i,1)-.05)*m+b (points(i,1)+.05)*m+b],'c.-');
128 |         end
129 |     end
130 | 
131 |     % Plot Function
132 |     x = min(xmin,xminBound)-.1:(max(xmax,xmaxBound)+.1-min(xmin,xminBound)-.1)/100:max(xmax,xmaxBound)+.1;
133 |     size(x)
134 |     for i = 1:length(x)
135 |         f(i) = polyval(params,x(i));
136 |     end
137 |     plot(x,f,'y');
138 |     axis([x(1)-.1 x(end)+.1 min(f)-.1 max(f)+.1]);
139 | 
140 |     % Plot Minimum
141 |     plot(minPos,fmin,'g+');
142 |     if doPlot == 1
143 |         pause(1);
144 |     end
145 | end


--------------------------------------------------------------------------------
/minFunc/precondDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondDiag(r,D)
2 | y = D.*r;


--------------------------------------------------------------------------------
/minFunc/precondTriu.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U)
2 | y = U \ (U' \ r);


--------------------------------------------------------------------------------
/minFunc/precondTriuDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U,D)
2 | y = U \ (D .* (U' \ r));


--------------------------------------------------------------------------------
/minFunc/rosenbrock.m:
--------------------------------------------------------------------------------
 1 | function [f, df, ddf, dddf] = rosenbrock(x);
 2 | 
 3 | % rosenbrock.m This function returns the function value, partial derivatives
 4 | % and Hessian of the (general dimension) rosenbrock function, given by:
 5 | %
 6 | %       f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 
 7 | %
 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1).
 9 | %
10 | % Carl Edward Rasmussen, 2001-07-21.
11 | 
12 | D = length(x);
13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2);
14 | 
15 | if nargout > 1
16 |   df = zeros(D, 1);
17 |   df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1));
18 |   df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2);
19 | end
20 | 
21 | if nargout > 2
22 |   ddf = zeros(D,D);
23 |   ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2);
24 |   ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1);
25 |   ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1);
26 | end
27 | 
28 | if nargout > 3
29 |     dddf = zeros(D,D,D);
30 |     for d = 1:D
31 |        if d > 1
32 |            dddf(d,d-1,d-1) = -400;
33 |        end
34 |        if d < D
35 |           dddf(d,d+1,d) = -400;
36 |           dddf(d,d,d+1) = -400;
37 |           dddf(d,d,d) = 2400*x(d);
38 |        end
39 |     end
40 | end


--------------------------------------------------------------------------------
/minFunc/taylorModel.m:
--------------------------------------------------------------------------------
 1 | function [f,g,H] = taylorModel(d,f,g,H,T)
 2 | 
 3 | p = length(d);
 4 | 
 5 | fd3 = 0;
 6 | gd2 = zeros(p,1);
 7 | Hd = zeros(p);
 8 | for t1 = 1:p
 9 |     for t2 = 1:p
10 |         for t3 = 1:p
11 |             fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3);
12 | 
13 |             if nargout > 1
14 |                 gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2);
15 |             end
16 | 
17 |             if nargout > 2
18 |                 Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1);
19 |             end
20 |         end
21 | 
22 |     end
23 | end
24 | 
25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3;
26 | 
27 | if nargout > 1
28 |     g = g + H*d + (1/2)*gd2;
29 | end
30 | 
31 | if nargout > 2
32 |     H = H + Hd;
33 | end
34 | 
35 | if any(abs(d) > 1e5)
36 |     % We want the optimizer to stop if the solution is unbounded
37 |     g = zeros(p,1);
38 | end


--------------------------------------------------------------------------------
/mnist/t10k-images.idx3-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/mnist/t10k-images.idx3-ubyte


--------------------------------------------------------------------------------
/mnist/t10k-labels.idx1-ubyte:
--------------------------------------------------------------------------------
1 |     ' 		 	 		   		  				 						   		  	  	 				 		 		  					      			  	 	   		 			 		   		 			 		 	 		 	   		 	  	   	 		  	 				    					  	    	  		 		  	 	   	 		 	   			 			  	  		 	 	 	 		 		 			 			 	 	 	 	  	   	   	 	 		   		   						 	 		 	    	 						 	 		 	  		 		 		  	   			 	    			   	 		 				 			 		  	  	  	 		       				  	  			   			 		 	 			   	 	 	 	 				   		   	  		   		 	 	  	 	  		  		 								   		 		    				   	  	 	 	 		 			  	 			   			 	 	  			  	 			  	   		  	    		 			     	 			  		    	  		  	    						 			  					 	 		   	 		 			   				    		   			    					    	 	    		      		        	     	 				 		   	     						  			 				 		 	 	   			  	      		  		   	 			   			   	 		    		 			  					  	 				 					   			  	 			 	 		 						 	  	 	    	   	  								  	  		 				  	 			 			 				 	  		 										 		  	 	   	 	 	 	   								 		  	     			 				   	 		 							 	  	   	  		 				 	 								  	  	 		  	 		  	  					  	 	  				   		   						  		  						   	 	 	 		 	 		   	 		  	  	 					 	  	      			 	 	 	   		  	  		   					 	 	 	 	   	  	    		   	 	  		 	 			 	  	 	 				    		 	    			 	 	   		 	 				 		  	    	  	 		 	 		 	 	   	 	 	 		 	    		   			 		 	 	 	  	  		     	 				 	 	 	 					 		        		 	     	 		  						    	 	 	     	 				 			    		 		    	  		 		 	 	 				 		 	 	 		      		  	 		  	 	 	    	 		  							    	 	 	  	     							  		  	 	 	  				 		 	 	 	   	 	 	    	 	 					 	   	 	 	 		 	  	 		   		 		  	    			 	 		 			  	 	 	   	 	 					 	   	 	 			 		  		  	 	   	  	 	 		  	 			 	  		  	  	 		 	  	 	  		   	 	 	 		   				  	  	 		   	 	 	 	 		   	 	 		    			   		 	 	 	 	 	 	 	  		 		 		     	 		 	 	  	 	 			 	 	 		    			  	 	 	 	    		 		    	  		 		 	 	 				    		 	    			 	 	 	 			 	       		 				 	 	 			 				    		   			   		 	 	 			    			  	 	  	 	 				       					 	  			  	  	   	 	 	 	  		 	 		 	      				 	 	 	   		 	 					 	 	 	    		  	  		   		 	 	 	 		    				   	  	 		 	   		 	  		 	 	     	 	 


--------------------------------------------------------------------------------
/mnist/train-images.idx3-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/mnist/train-images.idx3-ubyte


--------------------------------------------------------------------------------
/mnist/train-labels.idx1-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ehosseiniasl/Nonnegativity-Constrained-Autoencoder-NCAE/219c53631d60a268ba8550796e2ae38639450861/mnist/train-labels.idx1-ubyte


--------------------------------------------------------------------------------
/params2stack.m:
--------------------------------------------------------------------------------
 1 | function stack = params2stack(params, netconfig)
 2 | 
 3 | % Converts a flattened parameter vector into a nice "stack" structure 
 4 | % for us to work with. This is useful when you're building multilayer
 5 | % networks.
 6 | %
 7 | % stack = params2stack(params, netconfig)
 8 | %
 9 | % params - flattened parameter vector
10 | % netconfig - auxiliary variable containing 
11 | %             the configuration of the network
12 | %
13 | 
14 | 
15 | % Map the params (a vector into a stack of weights)
16 | depth = numel(netconfig.layersizes);
17 | stack = cell(depth,1);
18 | prevLayerSize = netconfig.inputsize; % the size of the previous layer
19 | curPos = double(1);                  % mark current position in parameter vector
20 | 
21 | for d = 1:depth
22 |     % Create layer d
23 |     stack{d} = struct;
24 | 
25 |     % Extract weights
26 |     wlen = double(netconfig.layersizes{d} * prevLayerSize);
27 |     stack{d}.w = reshape(params(curPos:curPos+wlen-1), netconfig.layersizes{d}, prevLayerSize);
28 |     curPos = curPos+wlen;
29 | 
30 |     % Extract bias
31 |     blen = double(netconfig.layersizes{d});
32 |     stack{d}.b = reshape(params(curPos:curPos+blen-1), netconfig.layersizes{d}, 1);
33 |     curPos = curPos+blen;
34 |     
35 |     % Set previous layer size
36 |     prevLayerSize = netconfig.layersizes{d};
37 | end
38 | 
39 | end


--------------------------------------------------------------------------------
/softmax/computeNumericalGradient.m:
--------------------------------------------------------------------------------
 1 | function numgrad = computeNumericalGradient(J, theta)
 2 | % numgrad = computeNumericalGradient(J, theta)
 3 | % theta: a vector of parameters
 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the
 5 | % function value at theta. 
 6 |   
 7 | % Initialize numgrad with zeros
 8 | numgrad = zeros(size(theta));
 9 | 
10 | %% ---------- YOUR CODE HERE --------------------------------------
11 | % Instructions: 
12 | % Implement numerical gradient checking, and return the result in numgrad.  
13 | % (See Section 2.3 of the lecture notes.)
14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 
15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta.  
16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 
17 | % respect to theta(i).
18 | %                
19 | % Hint: You will probably want to compute the elements of numgrad one at a time. 
20 | 
21 | eps = 1e-4;
22 | 
23 | for i = 1:size(theta,1)
24 |     
25 |     delta = zeros(size(theta,1),1);
26 |     delta(i) = eps;
27 |     theta_p = theta + delta;
28 |     theta_n = theta - delta;
29 |     numgrad(i,1)  = (J(theta_p)-J(theta_n))./(2*eps);
30 |    
31 | end
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | %% ---------------------------------------------------------------
39 | end
40 | 


--------------------------------------------------------------------------------
/softmax/softmaxCost_nonneg.m:
--------------------------------------------------------------------------------
 1 | function [cost, grad] = softmaxCost(theta, numClasses, inputSize, lambda, data, labels)
 2 | 
 3 | % numClasses - the number of classes 
 4 | % inputSize - the size N of the input vector
 5 | % lambda - weight decay parameter
 6 | % data - the N x M input matrix, where each column data(:, i) corresponds to
 7 | %        a single test set
 8 | % labels - an M x 1 matrix containing the labels corresponding for the input data
 9 | %
10 | 
11 | % Unroll the parameters from theta
12 | theta = reshape(theta, numClasses, inputSize);
13 | 
14 | numCases = size(data, 2);
15 | 
16 | groundTruth = full(sparse(labels, 1:numCases, 1));
17 | cost = 0;
18 | 
19 | thetagrad = zeros(numClasses, inputSize);
20 | 
21 | %% ---------- YOUR CODE HERE --------------------------------------
22 | %  Instructions: Compute the cost and gradient for softmax regression.
23 | %                You need to compute thetagrad and cost.
24 | %                The groundTruth matrix might come in handy.
25 | 
26 | % tmp = theta*data;
27 | 
28 | prob = exp(theta*data);
29 | 
30 | [r,c] = find(isinf(prob));
31 | prob(r,c) = exp(709);  % avoid Inf in prob matrix
32 | 
33 | prob_norm = prob./repmat(sum(prob),numClasses,1);
34 | 
35 | [r,c] = find(prob_norm == 0);
36 | prob_norm(r,c) = eps;
37 | 
38 | theta_neg = zeros(size(theta,1), size(theta,2));
39 | 
40 | theta_neg(find(theta<0)) = theta(find(theta<0));
41 | 
42 | theta_neg_abs = theta_neg;
43 | theta_neg_abs(theta_neg_abs~=0)=1;
44 | 
45 | weight_neg_decay = sum(sum(theta_neg.^2)) ;
46 | 
47 | 
48 | cost = -sum(sum(groundTruth.*log(prob_norm)))/numCases + lambda/2*weight_neg_decay;
49 | 
50 | cost_acc = -sum(sum(groundTruth.*log(prob_norm)))/numCases
51 | if isnan(cost_acc)
52 |     error()
53 | end
54 | 
55 | % cost = -sum(sum(groundTruth.*log(prob_norm)))/numCases + lambda/2*weight_neg_decay - 0.5*lambda*(sum(theta_neg(:)));
56 | 
57 | thetagrad = -1/numCases * (data*(groundTruth-prob_norm)') ;
58 | 
59 | thetagrad = thetagrad' + lambda*theta_neg;
60 | 
61 | % thetagrad = thetagrad' + lambda*theta_neg - 0.5*lambda*theta_neg_abs;
62 | 
63 | 
64 | 
65 | % ------------------------------------------------------------------
66 | % Unroll the gradient matrices into a vector for minFunc
67 | grad = [thetagrad(:)];
68 | end
69 | 
70 | 


--------------------------------------------------------------------------------
/softmax/softmaxPredict.m:
--------------------------------------------------------------------------------
 1 | function [pred] = softmaxPredict(softmaxModel, data)
 2 | 
 3 | % softmaxModel - model trained using softmaxTrain
 4 | % data - the N x M input matrix, where each column data(:, i) corresponds to
 5 | %        a single test set
 6 | %
 7 | % Your code should produce the prediction matrix 
 8 | % pred, where pred(i) is argmax_c P(y(c) | x(i)).
 9 |  
10 | % Unroll the parameters from theta
11 | theta = softmaxModel.optTheta;  % this provides a numClasses x inputSize matrix
12 | pred = zeros(1, size(data, 2));
13 | 
14 | %% ---------- YOUR CODE HERE --------------------------------------
15 | %  Instructions: Compute pred using theta assuming that the labels start 
16 | %                from 1.
17 | 
18 | prob = exp(theta*data);
19 | prob_norm = prob./repmat(sum(prob),size(theta,1),1);
20 | 
21 | [tmp,pred]=max(prob_norm);
22 | 
23 | 
24 | 
25 | % ---------------------------------------------------------------------
26 | 
27 | end
28 | 
29 | 


--------------------------------------------------------------------------------
/softmax/softmaxTrain_nonneg.m:
--------------------------------------------------------------------------------
 1 | function [softmaxModel] = softmaxTrain_nonneg(inputSize, numClasses, lambda, inputData, labels, options)
 2 | %softmaxTrain Train a softmax model with the given parameters on the given
 3 | % data. Returns softmaxOptTheta, a vector containing the trained parameters
 4 | % for the model.
 5 | %
 6 | % inputSize: the size of an input vector x^(i)
 7 | % numClasses: the number of classes 
 8 | % lambda: weight decay parameter
 9 | % inputData: an N by M matrix containing the input data, such that
10 | %            inputData(:, c) is the cth input
11 | % labels: M by 1 matrix containing the class labels for the
12 | %            corresponding inputs. labels(c) is the class label for
13 | %            the cth input
14 | % options (optional): options
15 | %   options.maxIter: number of iterations to train for
16 | 
17 | if ~exist('options', 'var')
18 |     options = struct;
19 | end
20 | 
21 | if ~isfield(options, 'maxIter')
22 |     options.maxIter = 400;
23 | end
24 | 
25 | % initialize parameters
26 | theta = 0.005 * randn(numClasses * inputSize, 1);
27 | 
28 | % Use minFunc to minimize the function
29 | addpath minFunc/
30 | options.Method = 'lbfgs'; % Here, we use L-BFGS to optimize our cost
31 |                           % function. Generally, for minFunc to work, you
32 |                           % need a function pointer with two outputs: the
33 |                           % function value and the gradient. In our problem,
34 |                           % softmaxCost.m satisfies this.
35 | minFuncOptions.display = 'on';
36 | options.optTol = 1e-12;
37 | [softmaxOptTheta, cost] = minFunc( @(p) softmaxCost_nonneg(p, ...
38 |                                    numClasses, inputSize, lambda, ...
39 |                                    inputData, labels), ...                                   
40 |                               theta, options);
41 | 
42 | % Fold softmaxOptTheta into a nicer format
43 | softmaxModel.optTheta = reshape(softmaxOptTheta, numClasses, inputSize);
44 | softmaxModel.inputSize = inputSize;
45 | softmaxModel.numClasses = numClasses;
46 |                           
47 | end                          
48 | 


--------------------------------------------------------------------------------
/sparseAutoencoderCost_nonneg.m:
--------------------------------------------------------------------------------
  1 | function [cost,grad, objhistory] = sparseAutoencoderCost_nonneg(theta, visibleSize, hiddenSize, ...
  2 |                                              lambda, inputZeroMaskedFraction, dropoutFraction, sparsityParam, beta, data)
  3 | 
  4 | 
  5 | objhistory = [];
  6 | 
  7 | W1 = reshape(theta(1:hiddenSize*visibleSize), hiddenSize, visibleSize);
  8 | W2 = reshape(theta(hiddenSize*visibleSize+1:2*hiddenSize*visibleSize), visibleSize, hiddenSize);
  9 | b1 = theta(2*hiddenSize*visibleSize+1:2*hiddenSize*visibleSize+hiddenSize);
 10 | b2 = theta(2*hiddenSize*visibleSize+hiddenSize+1:end);
 11 | 
 12 | % Cost and gradient variables (your code needs to compute these values). 
 13 | % Here, we initialize them to zeros. 
 14 | cost = 0;
 15 | W1grad = zeros(size(W1)); 
 16 | W2grad = zeros(size(W2));
 17 | b1grad = zeros(size(b1)); 
 18 | b2grad = zeros(size(b2));
 19 | 
 20 | %% computing delta's in output and hidden layers
 21 | 
 22 | 
 23 | y = data;
 24 | a1 = data;
 25 | if (inputZeroMaskedFraction>0)
 26 |     a1 = a1.*(rand(size(a1))>inputZeroMaskedFraction);
 27 | end
 28 | 
 29 | z2 = W1*a1 + repmat(b1,1,size(a1,2));
 30 | a2 = sigmoid(z2);
 31 | 
 32 | %dropout
 33 | if(dropoutFraction > 0)
 34 |     dropOutMask = (rand(size(a2))>dropoutFraction);
 35 |     a2 = a2.*dropOutMask;
 36 | end
 37 | 
 38 | z3 = W2*a2 + repmat(b2,1,size(a2,2));
 39 | a3 = sigmoid(z3);
 40 | 
 41 | yhat = a3;
 42 | 
 43 | delta3 = -(y - yhat) .* (a3.*(ones(visibleSize,size(y,2))-a3));
 44 | 
 45 | 
 46 | param = sum(a2,2)./size(y,2);
 47 | par = sparsityParam*ones(hiddenSize,1);
 48 | sparsity = beta*(-par./param + (ones(hiddenSize,1)-par)./(ones(hiddenSize,1)-param));
 49 | sparsity = repmat(sparsity,1,size(data,2));
 50 | 
 51 | delta2 = (W2'*delta3 + sparsity) .* (a2.*(ones(hiddenSize,size(y,2))-a2));
 52 | 
 53 | if(dropoutFraction > 0)
 54 |     delta2 = delta2.*dropOutMask;
 55 | end
 56 | 
 57 | 
 58 | kl = sum(sparsityParam*log(par./param) + (1-sparsityParam)*log((ones(hiddenSize,1)-par)./(ones(hiddenSize,1)-param)));
 59 | 
 60 | 
 61 | idx1 = find(W1 < 0);
 62 | idx2 = find(W1 <= -1);
 63 | idx3 = find(W1 >= 0);
 64 | 
 65 | idx4 = find(W2 < 0);
 66 | idx5 = find(W2 <= -1);
 67 | idx6 = find(W2 >= 0);
 68 | 
 69 | L2_regN = sum(sum(W1(idx1).^2)) + sum(sum(W2(idx4).^2));
 70 | L2_regP = sum(sum(W1(idx3).^2)) + sum(sum(W2(idx6).^2));
 71 | L1_reg = sum(abs(W1(:))) + sum(abs(W2(:)));
 72 | 
 73 | 
 74 | cost = 0.5*sum(sum((y-yhat).^2))./size(y,2) + beta*kl + lambda/2*L2_regN;
 75 | 
 76 | newobj = 0.5*sum(sum((y-yhat).^2))./size(y,2);
 77 | objhistory = [objhistory newobj];
 78 | 
 79 | one1 = ones(size(W1));
 80 | one2 = ones(size(W2));
 81 | 
 82 | W1grad = delta2*(a1')./(size(y,2));
 83 | W1grad(idx1) = W1grad(idx1) + lambda*W1(idx1);
 84 | 
 85 | W2grad = delta3*(a2')./(size(y,2));
 86 | W2grad(idx4) = W2grad(idx4) + lambda*W2(idx4);
 87 | 
 88 | b1grad = sum(delta2,2)./(size(y,2));
 89 | 
 90 | b2grad = sum(delta3,2)./(size(y,2));
 91 | 
 92 | 
 93 | 
 94 | grad = [W1grad(:) ; W2grad(:) ; b1grad(:) ; b2grad(:)];
 95 | 
 96 | end
 97 | 
 98 |  
 99 | 
100 | function sigm = sigmoid(x)
101 |   
102 |     sigm = 1 ./ (1 + exp(-x));
103 | end
104 | 
105 | 


--------------------------------------------------------------------------------
/stack2params.m:
--------------------------------------------------------------------------------
 1 | function [params, netconfig] = stack2params(stack)
 2 | 
 3 | % Converts a "stack" structure into a flattened parameter vector and also
 4 | % stores the network configuration. This is useful when working with
 5 | % optimization toolboxes such as minFunc.
 6 | %
 7 | % [params, netconfig] = stack2params(stack)
 8 | %
 9 | % stack - the stack structure, where stack{1}.w = weights of first layer
10 | %                                    stack{1}.b = weights of first layer
11 | %                                    stack{2}.w = weights of second layer
12 | %                                    stack{2}.b = weights of second layer
13 | %                                    ... etc.
14 | 
15 | 
16 | % Setup the compressed param vector
17 | params = [];
18 | for d = 1:numel(stack)
19 |     
20 |     % This can be optimized. But since our stacks are relatively short, it
21 |     % is okay
22 |     params = [params ; stack{d}.w(:) ; stack{d}.b(:) ];
23 |     
24 |     % Check that stack is of the correct form
25 |     assert(size(stack{d}.w, 1) == size(stack{d}.b, 1), ...
26 |         ['The bias should be a *column* vector of ' ...
27 |          int2str(size(stack{d}.w, 1)) 'x1']);
28 |     if d < numel(stack)
29 |         assert(size(stack{d}.w, 1) == size(stack{d+1}.w, 2), ...
30 |             ['The adjacent layers L' int2str(d) ' and L' int2str(d+1) ...
31 |              ' should have matching sizes.']);
32 |     end
33 |     
34 | end
35 | 
36 | if nargout > 1
37 |     % Setup netconfig
38 |     if numel(stack) == 0
39 |         netconfig.inputsize = 0;
40 |         netconfig.layersizes = {};
41 |     else
42 |         netconfig.inputsize = size(stack{1}.w, 2);
43 |         netconfig.layersizes = {};
44 |         for d = 1:numel(stack)
45 |             netconfig.layersizes = [netconfig.layersizes ; size(stack{d}.w,1)];
46 |         end
47 |     end
48 | end
49 | 
50 | end


--------------------------------------------------------------------------------
/stackedAECost_nonneg.m:
--------------------------------------------------------------------------------
  1 | function [ cost, grad ] = stackedAECost(theta, inputSize, hiddenSize, ...
  2 |                                               numClasses, netconfig, ...
  3 |                                               lambda1, data, labels)
  4 |                                         
  5 | 
  6 | 
  7 | %% Unroll softmaxTheta parameter
  8 | 
  9 | % We first extract the part which compute the softmax gradient
 10 | softmaxTheta = reshape(theta(1:hiddenSize*numClasses), numClasses, hiddenSize);
 11 | 
 12 | % Extract out the "stack"
 13 | stack = params2stack(theta(hiddenSize*numClasses+1:end), netconfig);
 14 | 
 15 | softmaxThetaGrad = zeros(size(softmaxTheta));
 16 | stackgrad = cell(size(stack));
 17 | for d = 1:numel(stack)
 18 |     stackgrad{d}.w = zeros(size(stack{d}.w));
 19 |     stackgrad{d}.b = zeros(size(stack{d}.b));
 20 | end
 21 | 
 22 | cost = 0; 
 23 | 
 24 | 
 25 | M = size(data, 2);
 26 | groundTruth = full(sparse(labels, 1:M, 1));
 27 | 
 28 | 
 29 | %% 
 30 | 
 31 | W1 = stack{1}.w;
 32 | W2 = stack{2}.w;
 33 | b1 = stack{1}.b;
 34 | b2 = stack{2}.b;
 35 | 
 36 | a1 = data;
 37 | z2 = W1*a1 + repmat(b1, 1, size(data,2));
 38 | a2 = sigmoid(z2);
 39 | z3 = W2*a2 + repmat(b2, 1, size(data,2));
 40 | a3 = sigmoid(z3);
 41 | 
 42 | prob = exp(softmaxTheta*a3);
 43 | [r,c] = find(isinf(prob));
 44 | prob(r,c) = exp(709);  % avoid Inf in prob matrix
 45 | prob_norm = prob./repmat(sum(prob),numClasses,1);
 46 | [r,c] = find(prob_norm == 0);
 47 | prob_norm(r,c) = eps;
 48 | 
 49 | a4 = prob_norm;
 50 | 
 51 | delta3 = -(softmaxTheta'*(groundTruth-prob_norm)) .* (a3.*(ones(size(softmaxTheta,2),size(data,2))-a3));
 52 | 
 53 | delta2 = (W2'*delta3) .* (a2.*(ones(size(W2,2),size(data,2))-a2));
 54 | 
 55 | delta1 = (W1'*delta2) .* (a1.*(ones(size(W1,2),size(data,2))-a1));
 56 | 
 57 | idx1 = find(W1 < 0);
 58 | idx2 = find(W1 <= -1);
 59 | idx3 = find(W1 >= 0);
 60 | 
 61 | idx4 = find(W2 < 0);
 62 | idx5 = find(W2 <= -1);
 63 | idx6 = find(W2 >= 0);
 64 | 
 65 | L2_regN = sum(sum(W1(idx1).^2))+sum(sum(W2(idx4).^2));
 66 | L2_regP = sum(sum(W1(idx3).^2))+sum(sum(W2(idx6).^2));
 67 | 
 68 | 
 69 | stackgrad{1}.w = delta2*(a1')./size(data,2);
 70 | stackgrad{1}.w(idx1) = stackgrad{1}.w(idx1) + lambda1*W1(idx1);
 71 | stackgrad{1}.b = sum(delta2,2)./size(data,2) ;
 72 | 
 73 | stackgrad{2}.w = delta3*(a2')./size(data,2);
 74 | stackgrad{2}.w(idx4) = stackgrad{2}.w(idx4) + lambda1*W2(idx4);
 75 | stackgrad{2}.b = sum(delta3,2)./size(data,2);
 76 | 
 77 | 
 78 | idx7 = find(softmaxTheta<0);
 79 | idx8 = find(softmaxTheta>=0);
 80 | 
 81 | softmax_L2_regN = sum(sum(softmaxTheta(idx7).^2));
 82 | softmax_L2_regP = sum(sum(softmaxTheta(idx8).^2));
 83 | 
 84 | softmaxThetaGrad = -1/size(data,2) * (a3*(groundTruth-prob_norm)') ;
 85 | 
 86 | softmaxThetaGrad = softmaxThetaGrad';
 87 | softmaxThetaGrad(idx7) = softmaxThetaGrad(idx7) + lambda1*softmaxTheta(idx7);
 88 | 
 89 | cost = -sum(sum(groundTruth.*log(prob_norm)))/size(data,2) + lambda1/2*softmax_L2_regN + lambda1/2*L2_regN;
 90 | 
 91 | if isnan(cost)
 92 |     error()
 93 | end
 94 | 
 95 | %% Roll gradient vector
 96 | 
 97 | grad = [softmaxThetaGrad(:) ; stack2params(stackgrad)];
 98 | 
 99 | end
100 | 
101 | 
102 | % You might find this useful
103 | function sigm = sigmoid(x)
104 |     sigm = 1 ./ (1 + exp(-x));
105 | end
106 | 


--------------------------------------------------------------------------------
/stackedAEPredict.m:
--------------------------------------------------------------------------------
 1 | function [pred] = stackedAEPredict(theta, inputSize, hiddenSize, numClasses, netconfig, dropoutFraction, data)
 2 |                                          
 3 | 
 4 |  
 5 | %% Unroll theta parameter
 6 | 
 7 | softmaxTheta = reshape(theta(1:hiddenSize*numClasses), numClasses, hiddenSize);
 8 | 
 9 | stack = params2stack(theta(hiddenSize*numClasses+1:end), netconfig);
10 | 
11 | %% 
12 | 
13 | W1 = stack{1}.w;
14 | W2 = stack{2}.w;
15 | b1 = stack{1}.b;
16 | b2 = stack{2}.b;
17 | 
18 | a1 = data;
19 | z2 = W1*a1 + repmat(b1, 1, size(data,2));
20 | a2 = sigmoid(z2);
21 | if(dropoutFraction > 0)
22 |    a2 = a2.*(1 - dropoutFraction);
23 | end
24 | z3 = W2*a2 + repmat(b2, 1, size(data,2));
25 | a3 = sigmoid(z3);
26 | if(dropoutFraction > 0)
27 |    a3 = a3.*(1 - dropoutFraction);
28 | end
29 | 
30 | prob = exp(softmaxTheta*a3);
31 | prob_norm = prob./repmat(sum(prob),numClasses,1);
32 | 
33 | [tmp,pred]=max(prob_norm);
34 | 
35 | 
36 | 
37 | end
38 | 
39 | 
40 | function sigm = sigmoid(x)
41 |     sigm = 1 ./ (1 + exp(-x));
42 | end
43 | 


--------------------------------------------------------------------------------