├── Exercise1 Sparse Autoencoder
    ├── checkNumericalGradient.m
    ├── computeNumericalGradient.m
    ├── display_network.m
    ├── initializeParameters.m
    ├── minFunc
    │   ├── ArmijoBacktrack.m
    │   ├── WolfeLineSearch.m
    │   ├── autoGrad.m
    │   ├── autoHess.m
    │   ├── autoHv.m
    │   ├── autoTensor.m
    │   ├── callOutput.m
    │   ├── conjGrad.m
    │   ├── dampedUpdate.m
    │   ├── example_minFunc.m
    │   ├── example_minFunc_LR.m
    │   ├── isLegal.m
    │   ├── lbfgs.m
    │   ├── lbfgsC.c
    │   ├── lbfgsC.mexa64
    │   ├── lbfgsC.mexglx
    │   ├── lbfgsC.mexmac
    │   ├── lbfgsC.mexmaci
    │   ├── lbfgsC.mexmaci64
    │   ├── lbfgsC.mexw32
    │   ├── lbfgsC.mexw64
    │   ├── lbfgsUpdate.m
    │   ├── logistic
    │   │   ├── LogisticDiagPrecond.m
    │   │   ├── LogisticHv.m
    │   │   ├── LogisticLoss.m
    │   │   ├── mexutil.c
    │   │   ├── mexutil.h
    │   │   ├── mylogsumexp.m
    │   │   ├── repmatC.c
    │   │   ├── repmatC.dll
    │   │   ├── repmatC.mexglx
    │   │   └── repmatC.mexmac
    │   ├── mchol.m
    │   ├── mcholC.c
    │   ├── mcholC.mexmaci64
    │   ├── mcholC.mexw32
    │   ├── mcholC.mexw64
    │   ├── mcholinc.m
    │   ├── minFunc.m
    │   ├── minFunc_processInputOptions.m
    │   ├── polyinterp.m
    │   ├── precondDiag.m
    │   ├── precondTriu.m
    │   ├── precondTriuDiag.m
    │   ├── rosenbrock.m
    │   └── taylorModel.m
    ├── sampleIMAGES.m
    ├── sparseAutoencoderCost.m
    ├── train.m
    └── weights.jpg
├── Exercise10 Sparse Coding
    ├── IMAGES.mat
    ├── checkNumericalGradient.m
    ├── computeNumericalGradient.m
    ├── display_network.m
    ├── sampleIMAGES.m
    ├── sparseCodingExercise.m
    ├── sparseCodingFeatureCost.m
    └── sparseCodingWeightCost.m
├── Exercise2 Vectorization
    ├── checkNumericalGradient.m
    ├── computeNumericalGradient.asv
    ├── computeNumericalGradient.m
    ├── display_network.m
    ├── initializeParameters.m
    ├── loadMNISTImages.m
    ├── loadMNISTLabels.m
    ├── minFunc
    │   ├── ArmijoBacktrack.m
    │   ├── WolfeLineSearch.m
    │   ├── autoGrad.m
    │   ├── autoHess.m
    │   ├── autoHv.m
    │   ├── autoTensor.m
    │   ├── callOutput.m
    │   ├── conjGrad.m
    │   ├── dampedUpdate.m
    │   ├── example_minFunc.m
    │   ├── example_minFunc_LR.m
    │   ├── isLegal.m
    │   ├── lbfgs.m
    │   ├── lbfgsC.c
    │   ├── lbfgsC.mexa64
    │   ├── lbfgsC.mexglx
    │   ├── lbfgsC.mexmac
    │   ├── lbfgsC.mexmaci
    │   ├── lbfgsC.mexmaci64
    │   ├── lbfgsC.mexw32
    │   ├── lbfgsC.mexw64
    │   ├── lbfgsUpdate.m
    │   ├── logistic
    │   │   ├── LogisticDiagPrecond.m
    │   │   ├── LogisticHv.m
    │   │   ├── LogisticLoss.m
    │   │   ├── mexutil.c
    │   │   ├── mexutil.h
    │   │   ├── mylogsumexp.m
    │   │   ├── repmatC.c
    │   │   ├── repmatC.dll
    │   │   ├── repmatC.mexglx
    │   │   └── repmatC.mexmac
    │   ├── mchol.m
    │   ├── mcholC.c
    │   ├── mcholC.mexmaci64
    │   ├── mcholC.mexw32
    │   ├── mcholC.mexw64
    │   ├── mcholinc.m
    │   ├── minFunc.m
    │   ├── minFunc_processInputOptions.m
    │   ├── polyinterp.m
    │   ├── precondDiag.m
    │   ├── precondTriu.m
    │   ├── precondTriuDiag.m
    │   ├── rosenbrock.m
    │   └── taylorModel.m
    ├── sampleIMAGES.m
    ├── sparseAutoencoderCost.m
    ├── train.m
    └── weights.jpg
├── Exercise3 PCA in 2D
    ├── pcaData.txt
    ├── pca_2d.asv
    └── pca_2d.m
├── Exercise4 PCA and Whitening
    ├── display_network.m
    ├── pca_gen.m
    └── sampleIMAGESRAW.m
├── Exercise5 Softmax Regression
    ├── computeNumericalGradient.m
    ├── loadMNISTImages.m
    ├── loadMNISTLabels.m
    ├── minFunc
    │   ├── ArmijoBacktrack.m
    │   ├── WolfeLineSearch.m
    │   ├── autoGrad.m
    │   ├── autoHess.m
    │   ├── autoHv.m
    │   ├── autoTensor.m
    │   ├── callOutput.m
    │   ├── conjGrad.m
    │   ├── dampedUpdate.m
    │   ├── example_minFunc.m
    │   ├── example_minFunc_LR.m
    │   ├── isLegal.m
    │   ├── lbfgs.m
    │   ├── lbfgsC.c
    │   ├── lbfgsC.mexa64
    │   ├── lbfgsC.mexglx
    │   ├── lbfgsC.mexmac
    │   ├── lbfgsC.mexmaci
    │   ├── lbfgsC.mexmaci64
    │   ├── lbfgsC.mexw32
    │   ├── lbfgsC.mexw64
    │   ├── lbfgsUpdate.m
    │   ├── logistic
    │   │   ├── LogisticDiagPrecond.m
    │   │   ├── LogisticHv.m
    │   │   ├── LogisticLoss.m
    │   │   ├── mexutil.c
    │   │   ├── mexutil.h
    │   │   ├── mylogsumexp.m
    │   │   ├── repmatC.c
    │   │   ├── repmatC.dll
    │   │   ├── repmatC.mexglx
    │   │   └── repmatC.mexmac
    │   ├── mchol.m
    │   ├── mcholC.c
    │   ├── mcholC.mexmaci64
    │   ├── mcholC.mexw32
    │   ├── mcholC.mexw64
    │   ├── mcholinc.m
    │   ├── minFunc.m
    │   ├── minFunc_processInputOptions.m
    │   ├── polyinterp.m
    │   ├── precondDiag.m
    │   ├── precondTriu.m
    │   ├── precondTriuDiag.m
    │   ├── rosenbrock.m
    │   └── taylorModel.m
    ├── softmaxCost.m
    ├── softmaxExercise.m
    ├── softmaxPredict.m
    └── softmaxTrain.m
├── Exercise6 Self-Taught Learning
    ├── computeNumericalGradient.m
    ├── display_network.m
    ├── feedForwardAutoencoder.m
    ├── initializeParameters.m
    ├── loadMNISTImages.m
    ├── loadMNISTLabels.m
    ├── minFunc
    │   ├── ArmijoBacktrack.m
    │   ├── WolfeLineSearch.m
    │   ├── autoGrad.m
    │   ├── autoHess.m
    │   ├── autoHv.m
    │   ├── autoTensor.m
    │   ├── callOutput.m
    │   ├── conjGrad.m
    │   ├── dampedUpdate.m
    │   ├── example_minFunc.m
    │   ├── example_minFunc_LR.m
    │   ├── isLegal.m
    │   ├── lbfgs.m
    │   ├── lbfgsC.c
    │   ├── lbfgsC.mexa64
    │   ├── lbfgsC.mexglx
    │   ├── lbfgsC.mexmac
    │   ├── lbfgsC.mexmaci
    │   ├── lbfgsC.mexmaci64
    │   ├── lbfgsC.mexw32
    │   ├── lbfgsC.mexw64
    │   ├── lbfgsUpdate.m
    │   ├── logistic
    │   │   ├── LogisticDiagPrecond.m
    │   │   ├── LogisticHv.m
    │   │   ├── LogisticLoss.m
    │   │   ├── mexutil.c
    │   │   ├── mexutil.h
    │   │   ├── mylogsumexp.m
    │   │   ├── repmatC.c
    │   │   ├── repmatC.dll
    │   │   ├── repmatC.mexglx
    │   │   └── repmatC.mexmac
    │   ├── mchol.m
    │   ├── mcholC.c
    │   ├── mcholC.mexmaci64
    │   ├── mcholC.mexw32
    │   ├── mcholC.mexw64
    │   ├── mcholinc.m
    │   ├── minFunc.m
    │   ├── minFunc_processInputOptions.m
    │   ├── polyinterp.m
    │   ├── precondDiag.m
    │   ├── precondTriu.m
    │   ├── precondTriuDiag.m
    │   ├── rosenbrock.m
    │   └── taylorModel.m
    ├── softmaxCost.m
    ├── softmaxPredict.m
    ├── softmaxTrain.m
    ├── sparseAutoencoderCost.m
    ├── stlExercise.m
    └── testMemory.m
├── Exercise7 Implement deep networks for digit classification
    ├── checkStackedAECost.m
    ├── computeNumericalGradient.m
    ├── feedForwardAutoencoder.m
    ├── initializeParameters.m
    ├── loadMNISTImages.m
    ├── loadMNISTLabels.m
    ├── minFunc
    │   ├── ArmijoBacktrack.m
    │   ├── WolfeLineSearch.m
    │   ├── autoGrad.m
    │   ├── autoHess.m
    │   ├── autoHv.m
    │   ├── autoTensor.m
    │   ├── callOutput.m
    │   ├── conjGrad.m
    │   ├── dampedUpdate.m
    │   ├── example_minFunc.m
    │   ├── example_minFunc_LR.m
    │   ├── isLegal.m
    │   ├── lbfgs.m
    │   ├── lbfgsC.c
    │   ├── lbfgsC.mexa64
    │   ├── lbfgsC.mexglx
    │   ├── lbfgsC.mexmac
    │   ├── lbfgsC.mexmaci
    │   ├── lbfgsC.mexmaci64
    │   ├── lbfgsC.mexw32
    │   ├── lbfgsC.mexw64
    │   ├── lbfgsUpdate.m
    │   ├── logistic
    │   │   ├── LogisticDiagPrecond.m
    │   │   ├── LogisticHv.m
    │   │   ├── LogisticLoss.m
    │   │   ├── mexutil.c
    │   │   ├── mexutil.h
    │   │   ├── mylogsumexp.m
    │   │   ├── repmatC.c
    │   │   ├── repmatC.dll
    │   │   ├── repmatC.mexglx
    │   │   └── repmatC.mexmac
    │   ├── mchol.m
    │   ├── mcholC.c
    │   ├── mcholC.mexmaci64
    │   ├── mcholC.mexw32
    │   ├── mcholC.mexw64
    │   ├── mcholinc.m
    │   ├── minFunc.m
    │   ├── minFunc_processInputOptions.m
    │   ├── polyinterp.m
    │   ├── precondDiag.m
    │   ├── precondTriu.m
    │   ├── precondTriuDiag.m
    │   ├── rosenbrock.m
    │   └── taylorModel.m
    ├── params2stack.m
    ├── softmaxCost.m
    ├── softmaxPredict.m
    ├── softmaxTrain.m
    ├── sparseAutoencoderCost.m
    ├── sparseAutoencoderCost_modify.m
    ├── stack2params.m
    ├── stackedAECost.m
    ├── stackedAEExercise.m
    └── stackedAEPredict.m
├── Exercise8 Learning color features with Sparse Autoencoders
    ├── computeNumericalGradient.m
    ├── displayColorNetwork.m
    ├── initializeParameters.m
    ├── linearDecoderExercise.m
    ├── minFunc
    │   ├── ArmijoBacktrack.m
    │   ├── WolfeLineSearch.m
    │   ├── autoGrad.m
    │   ├── autoHess.m
    │   ├── autoHv.m
    │   ├── autoTensor.m
    │   ├── callOutput.m
    │   ├── conjGrad.m
    │   ├── dampedUpdate.m
    │   ├── example_minFunc.m
    │   ├── example_minFunc_LR.m
    │   ├── isLegal.m
    │   ├── lbfgs.m
    │   ├── lbfgsC.c
    │   ├── lbfgsC.mexa64
    │   ├── lbfgsC.mexglx
    │   ├── lbfgsC.mexmac
    │   ├── lbfgsC.mexmaci
    │   ├── lbfgsC.mexmaci64
    │   ├── lbfgsC.mexw32
    │   ├── lbfgsC.mexw64
    │   ├── lbfgsUpdate.m
    │   ├── logistic
    │   │   ├── LogisticDiagPrecond.m
    │   │   ├── LogisticHv.m
    │   │   ├── LogisticLoss.m
    │   │   ├── mexutil.c
    │   │   ├── mexutil.h
    │   │   ├── mylogsumexp.m
    │   │   ├── repmatC.c
    │   │   ├── repmatC.dll
    │   │   ├── repmatC.mexglx
    │   │   └── repmatC.mexmac
    │   ├── mchol.m
    │   ├── mcholC.c
    │   ├── mcholC.mexmaci64
    │   ├── mcholC.mexw32
    │   ├── mcholC.mexw64
    │   ├── mcholinc.m
    │   ├── minFunc.m
    │   ├── minFunc_processInputOptions.m
    │   ├── polyinterp.m
    │   ├── precondDiag.m
    │   ├── precondTriu.m
    │   ├── precondTriuDiag.m
    │   ├── rosenbrock.m
    │   └── taylorModel.m
    └── sparseAutoencoderLinearCost.m
├── Exercise9 Convolution and Pooling
    ├── cnnConvolve.m
    ├── cnnExercise.m
    ├── cnnPool.m
    ├── displayColorNetwork.m
    ├── feedForwardAutoencoder.m
    ├── minFunc
    │   ├── ArmijoBacktrack.m
    │   ├── WolfeLineSearch.m
    │   ├── autoGrad.m
    │   ├── autoHess.m
    │   ├── autoHv.m
    │   ├── autoTensor.m
    │   ├── callOutput.m
    │   ├── conjGrad.m
    │   ├── dampedUpdate.m
    │   ├── example_minFunc.m
    │   ├── example_minFunc_LR.m
    │   ├── isLegal.m
    │   ├── lbfgs.m
    │   ├── lbfgsC.c
    │   ├── lbfgsC.mexa64
    │   ├── lbfgsC.mexglx
    │   ├── lbfgsC.mexmac
    │   ├── lbfgsC.mexmaci
    │   ├── lbfgsC.mexmaci64
    │   ├── lbfgsC.mexw32
    │   ├── lbfgsC.mexw64
    │   ├── lbfgsUpdate.m
    │   ├── logistic
    │   │   ├── LogisticDiagPrecond.m
    │   │   ├── LogisticHv.m
    │   │   ├── LogisticLoss.m
    │   │   ├── mexutil.c
    │   │   ├── mexutil.h
    │   │   ├── mylogsumexp.m
    │   │   ├── repmatC.c
    │   │   ├── repmatC.dll
    │   │   ├── repmatC.mexglx
    │   │   └── repmatC.mexmac
    │   ├── mchol.m
    │   ├── mcholC.c
    │   ├── mcholC.mexmaci64
    │   ├── mcholC.mexw32
    │   ├── mcholC.mexw64
    │   ├── mcholinc.m
    │   ├── minFunc.m
    │   ├── minFunc_processInputOptions.m
    │   ├── polyinterp.m
    │   ├── precondDiag.m
    │   ├── precondTriu.m
    │   ├── precondTriuDiag.m
    │   ├── rosenbrock.m
    │   └── taylorModel.m
    ├── softmaxCost.m
    ├── softmaxPredict.m
    └── softmaxTrain.m
└── README.md


/Exercise1 Sparse Autoencoder/computeNumericalGradient.m:
--------------------------------------------------------------------------------
 1 | function numgrad = computeNumericalGradient(J, theta)
 2 | % numgrad = computeNumericalGradient(J, theta)
 3 | % theta: a vector of parameters
 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the
 5 | % function value at theta. 
 6 |   
 7 | % Initialize numgrad with zeros
 8 | numgrad = zeros(size(theta));
 9 | 
10 | %% ---------- YOUR CODE HERE --------------------------------------
11 | % Instructions: 
12 | % Implement numerical gradient checking, and return the result in numgrad.  
13 | % (See Section 2.3 of the lecture notes.)
14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 
15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta.  
16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 
17 | % respect to theta(i).
18 | %                
19 | % Hint: You will probably want to compute the elements of numgrad one at a
20 | % time. 
21 | epsilon = 10^(-4);
22 | n = size(theta, 1);
23 | % J1 = zeros(size(numgrad));
24 | % J2 = zeros(size(numgrad));
25 | J1 = zeros(1, 1);
26 | J2 = zeros(1, 1);
27 | grad = zeros(size(numgrad));
28 | temp1 = zeros(size(theta));
29 | temp2 = zeros(size(theta));
30 | 
31 | for i = 1 : n
32 | %     i
33 |     temp1 = theta;
34 |     temp2 = theta;
35 |     temp1(i) = temp1(i) + epsilon;
36 |     temp2(i) = temp2(i) - epsilon;
37 |     [J1, grad] = J(temp1);
38 |     [J2, grad] = J(temp2);
39 |     numgrad(i) = (J1 - J2) / (2*epsilon);
40 | end
41 | 
42 | 
43 | % theta1 = theta + epsilon;
44 | % theta2 = theta - epsilon;
45 | % [J1(i), grad] = J(temp1);
46 | % [J2(i), grad] = J(temp2);
47 | 
48 | 
49 | 
50 | 
51 | %% ---------------------------------------------------------------
52 | end
53 | 


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/initializeParameters.m:
--------------------------------------------------------------------------------
 1 | function theta = initializeParameters(hiddenSize, visibleSize)
 2 | 
 3 | %% Initialize parameters randomly based on layer sizes.
 4 | r  = sqrt(6) / sqrt(hiddenSize+visibleSize+1);   % we'll choose weights uniformly from the interval [-r, r]
 5 | W1 = rand(hiddenSize, visibleSize) * 2 * r - r;
 6 | W2 = rand(visibleSize, hiddenSize) * 2 * r - r;
 7 | 
 8 | b1 = zeros(hiddenSize, 1);
 9 | b2 = zeros(visibleSize, 1);
10 | 
11 | % Convert weights and bias gradients to the vector form.
12 | % This step will "unroll" (flatten and concatenate together) all 
13 | % your parameters into a vector, which can then be used with minFunc. 
14 | theta = [W1(:) ; W2(:) ; b1(:) ; b2(:)];
15 | 
16 | end
17 | 
18 | 


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/autoGrad.m:
--------------------------------------------------------------------------------
1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin)% [f,g] = autoGrad(x,useComplex,funObj,varargin)%% Numerically compute gradient of objective function from function valuesp = length(x);mu = 1e-150;if useComplex % Use Complex Differentials    diff = zeros(p,1);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(diff));    g = imag(diff)/mu;else % Use Finite Differencing    f = funObj(x,varargin{:});    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*e_j,varargin{:});    end    g = (diff-f)/mu;endif 0 % DEBUG CODE    [fReal gReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    pause;end


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/autoHess.m:
--------------------------------------------------------------------------------
1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin)% Numerically compute Hessian of objective function from gradient valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(diff),2);    H = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g] = funObj(x,varargin{:});    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f diff(:,j)] = funObj(x + mu*e_j,varargin{:});    end    H = (diff-repmat(g,[1 p]))/mu;end% Make sure H is symmetricH = (H+H')/2;if 0 % DEBUG CODE    [fReal gReal HReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    [HReal H]    pause;end


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/autoHv.m:
--------------------------------------------------------------------------------
 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin)
 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:})
 3 | %  based on gradient values
 4 | 
 5 | if useComplex
 6 |     mu = 1e-150i;
 7 | else
 8 |     mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v);
 9 | end
10 | [f,finDif] = funObj(x + v*mu,varargin{:});
11 | Hv = (finDif-g)/mu;


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/autoTensor.m:
--------------------------------------------------------------------------------
1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% Numerically compute Tensor of 3rd-derivatives of objective function from Hessian valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(g),2);    H = mean(real(diff),3);    T = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g,H] = funObj(x,varargin{:});    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:});    end    T = (diff-repmat(H,[1 1 p]))/mu;end


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/callOutput.m:
--------------------------------------------------------------------------------
 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin)
 2 | 
 3 | optimValues.iteration = i;
 4 | optimValues.funccount = funEvals;
 5 | optimValues.fval = f;
 6 | optimValues.stepsize = t;
 7 | optimValues.directionalderivative = gtd;
 8 | optimValues.gradient = g;
 9 | optimValues.searchdirection = d;
10 | optimValues.firstorderopt = opt;
11 | 
12 | feval(outputFcn, x,optimValues,state,varargin{:});


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/dampedUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | 
 3 | %B0 = eye(length(y))/Hdiag;
 4 | S = old_dirs(:,2:end);
 5 | Y = old_stps(:,2:end);
 6 | k = size(Y,2);
 7 | L = zeros(k);
 8 | for j = 1:k
 9 |     for i = j+1:k
10 |         L(i,j) = S(:,i)'*Y(:,j);
11 |     end
12 | end
13 | D = diag(diag(S'*Y));
14 | N = [S/Hdiag Y];
15 | M = [S'*S/Hdiag L;L' -D];
16 | 
17 | ys = y'*s;
18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s
19 | sBs = s'*Bs;
20 | 
21 | eta = .02;
22 | if ys < eta*sBs
23 |     if debug
24 |         fprintf('Damped Update\n');
25 |     end
26 |     theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1);
27 |     y = theta*y + (1-theta)*Bs;
28 | end
29 | 
30 | 
31 | numCorrections = size(old_dirs,2);
32 | if numCorrections < corrections
33 |     % Full Update
34 |     old_dirs(:,numCorrections+1) = s;
35 |     old_stps(:,numCorrections+1) = y;
36 | else
37 |     % Limited-Memory Update
38 |     old_dirs = [old_dirs(:,2:corrections) s];
39 |     old_stps = [old_stps(:,2:corrections) y];
40 | end
41 | 
42 | % Update scale of initial Hessian approximation
43 | Hdiag = (y'*s)/(y'*y);


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/example_minFunc_LR.m:
--------------------------------------------------------------------------------
 1 | clear all
 2 | 
 3 | nInst = 500;
 4 | nVars = 100;
 5 | X = [ones(nInst,1) randn(nInst,nVars-1)];
 6 | w = randn(nVars,1);
 7 | y = sign(X*w);
 8 | flipInd = rand(nInst,1) > .9;
 9 | y(flipInd) = -y(flipInd);
10 | 
11 | w_init = zeros(nVars,1);
12 | funObj = @(w)LogisticLoss(w,X,y);
13 | 
14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n');
15 | options.Method = 'newton0';
16 | minFunc(@LogisticLoss,w_init,options,X,y);
17 | pause;
18 | 
19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n');
20 | options.Method = 'pnewton0';
21 | options.precFunc = @LogisticDiagPrecond;
22 | minFunc(@LogisticLoss,w_init,options,X,y);
23 | pause;
24 | 
25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n');
26 | options.Method = 'pnewton0';
27 | options.precFunc = [];
28 | minFunc(@LogisticLoss,w_init,options,X,y);
29 | pause;
30 | 
31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n');
32 | options.Method = 'newton0';
33 | options.HvFunc = @LogisticHv;
34 | minFunc(@LogisticLoss,w_init,options,X,y);
35 | pause;
36 | 
37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n');
38 | options.Method = 'pnewton0';
39 | options.HvFunc = @LogisticHv;
40 | options.precFunc = @LogisticDiagPrecond;
41 | minFunc(@LogisticLoss,w_init,options,X,y);
42 | pause;
43 | 
44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n');
45 | options.Method = 'pnewton0';
46 | options.precFunc = [];
47 | options.HvFunc = @LogisticHv;
48 | minFunc(@LogisticLoss,w_init,options,X,y);
49 | pause;


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/isLegal.m:
--------------------------------------------------------------------------------
1 | function [legal] = isLegal(v)
2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0;


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/lbfgs.m:
--------------------------------------------------------------------------------
 1 | function [d] = lbfgs(g,s,y,Hdiag)
 2 | % BFGS Search Direction
 3 | %
 4 | % This function returns the (L-BFGS) approximate inverse Hessian,
 5 | % multiplied by the gradient
 6 | %
 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS
 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS
 9 | %
10 | % s - previous search directions (p by k)
11 | % y - previous step sizes (p by k)
12 | % g - gradient (p by 1)
13 | % Hdiag - value of initial Hessian diagonal elements (scalar)
14 | 
15 | [p,k] = size(s);
16 | 
17 | for i = 1:k
18 |     ro(i,1) = 1/(y(:,i)'*s(:,i));
19 | end
20 | 
21 | q = zeros(p,k+1);
22 | r = zeros(p,k+1);
23 | al =zeros(k,1);
24 | be =zeros(k,1);
25 | 
26 | q(:,k+1) = g;
27 | 
28 | for i = k:-1:1
29 |     al(i) = ro(i)*s(:,i)'*q(:,i+1);
30 |     q(:,i) = q(:,i+1)-al(i)*y(:,i);
31 | end
32 | 
33 | % Multiply by Initial Hessian
34 | r(:,1) = Hdiag*q(:,1);
35 | 
36 | for i = 1:k
37 |     be(i) = ro(i)*y(:,i)'*r(:,i);
38 |     r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i));
39 | end
40 | d=r(:,k+1);


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexa64


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexglx


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexmac


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexmaci:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexmaci


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexmaci64


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexw32


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexw64


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/lbfgsUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | ys = y'*s;
 3 | if ys > 1e-10
 4 |     numCorrections = size(old_dirs,2);
 5 |     if numCorrections < corrections
 6 |         % Full Update
 7 |         old_dirs(:,numCorrections+1) = s;
 8 |         old_stps(:,numCorrections+1) = y;
 9 |     else
10 |         % Limited-Memory Update
11 |         old_dirs = [old_dirs(:,2:corrections) s];
12 |         old_stps = [old_stps(:,2:corrections) y];
13 |     end
14 | 
15 |     % Update scale of initial Hessian approximation
16 |     Hdiag = ys/(y'*y);
17 | else
18 |     if debug
19 |         fprintf('Skipping Update\n');
20 |     end
21 | end


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/logistic/LogisticDiagPrecond.m:
--------------------------------------------------------------------------------
 1 | function [m] = LogisticHv(v,w,X,y)
 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to
 3 | % w(feature,1)
 4 | % X(instance,feature)
 5 | % y(instance,1)
 6 | 
 7 | sig = 1./(1+exp(-y.*(X*w)));
 8 | 
 9 | % Compute diagonals of Hessian
10 | sig = sig.*(1-sig);
11 | for i = 1:length(w)
12 |    h(i,1) = (sig.*X(:,i))'*X(:,i);
13 | end
14 | 
15 | % Apply preconditioner
16 | m = v./h;
17 | 
18 | % Exact preconditioner
19 | %H = X'*diag(sig.*(1-sig))*X;
20 | %m = H\v;
21 | 


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/logistic/LogisticHv.m:
--------------------------------------------------------------------------------
1 | function [Hv] = LogisticHv(v,w,X,y)
2 | % v(feature,1) - vector that we will multiply Hessian by
3 | % w(feature,1)
4 | % X(instance,feature)
5 | % y(instance,1)
6 | 
7 | sig = 1./(1+exp(-y.*(X*w)));
8 | Hv = X.'*(sig.*(1-sig).*(X*v));
9 | 


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/logistic/LogisticLoss.m:
--------------------------------------------------------------------------------
 1 | function [nll,g,H,T] = LogisticLoss(w,X,y)
 2 | % w(feature,1)
 3 | % X(instance,feature)
 4 | % y(instance,1)
 5 | 
 6 | [n,p] = size(X);
 7 | 
 8 | Xw = X*w;
 9 | yXw = y.*Xw;
10 | 
11 | nll = sum(	([zeros(n,1) -yXw]));
12 | 
13 | if nargout > 1
14 |     if nargout > 2
15 |         sig = 1./(1+exp(-yXw));
16 |         g = -X.'*(y.*(1-sig));
17 |     else
18 |         g = -X.'*(y./(1+exp(yXw)));
19 |     end
20 | end
21 | 
22 | if nargout > 2
23 |     H = X.'*diag(sparse(sig.*(1-sig)))*X;
24 | end
25 | 
26 | if nargout > 3
27 |     T = zeros(p,p,p);
28 |     for j1 = 1:p
29 |         for j2 = 1:p
30 |             for j3 = 1:p
31 |                 T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig));
32 |             end
33 |         end
34 |     end
35 | end


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/logistic/mexutil.c:
--------------------------------------------------------------------------------
 1 | #include "mexutil.h"
 2 | 
 3 | /* Functions to create uninitialized arrays. */
 4 | 
 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
 6 |          mxClassID class, mxComplexity ComplexFlag)
 7 | {
 8 |   mxArray *a;
 9 |   int i, *dims1 = mxMalloc(ndim*sizeof(int));
10 |   size_t sz = 1;
11 |   for(i=0;i<ndim;i++) {
12 |     sz *= dims[i];
13 |     dims1[i] = 1;
14 |   }
15 |   a = mxCreateNumericArray(ndim,dims1,class,ComplexFlag);
16 |   sz *= mxGetElementSize(a);
17 |   mxSetDimensions(a, dims, ndim);
18 |   mxFree(dims1);
19 |   mxSetData(a, mxRealloc(mxGetData(a), sz));
20 |   if(ComplexFlag == mxCOMPLEX) {
21 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
22 |   }
23 |   return a;
24 | }
25 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
26 | 				mxComplexity ComplexFlag)
27 | {
28 |   size_t sz = m*n*sizeof(double);
29 |   mxArray *a = mxCreateNumericMatrix(1, 1, class, ComplexFlag);
30 |   mxSetM(a,m);
31 |   mxSetN(a,n);
32 |   mxSetPr(a, mxRealloc(mxGetPr(a),sz));
33 |   if(ComplexFlag == mxCOMPLEX) {
34 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
35 |   }
36 |   return a;
37 | }
38 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
39 | 			       mxComplexity ComplexFlag)
40 | {
41 |   return mxCreateNumericMatrixE(m,n,mxDOUBLE_CLASS,ComplexFlag);
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/logistic/mexutil.h:
--------------------------------------------------------------------------------
1 | #include "mex.h"
2 | 
3 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
4 | 			       mxClassID class, mxComplexity ComplexFlag);
5 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
6 | 				mxComplexity ComplexFlag);
7 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
8 | 			       mxComplexity ComplexFlag);
9 | 


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/logistic/mylogsumexp.m:
--------------------------------------------------------------------------------
1 | function lse = mylogsumexp(b)
2 | % does logsumexp across columns
3 | B = max(b,[],2);
4 | if issparse(b)
5 |     lse = log(sum(exp(b-repmat(B,[1 size(b,2)])),2))+B;
6 | else
7 |     lse = log(sum(exp(b-repmatC(B,[1 size(b,2)])),2))+B;
8 | end
9 | end


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/logistic/repmatC.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/logistic/repmatC.dll


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/logistic/repmatC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/logistic/repmatC.mexglx


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/logistic/repmatC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/logistic/repmatC.mexmac


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/mchol.m:
--------------------------------------------------------------------------------
 1 | function [l,d,perm] = mchol(A,mu)
 2 | % Compute a modified LDL factorization of A
 3 | % (MEX ME!)
 4 | 
 5 | if nargin < 2
 6 |     mu = 1e-12;
 7 | end
 8 | 
 9 | n = size(A,1);
10 | l = eye(n);
11 | d = zeros(n,1);
12 | perm = 1:n;
13 | 
14 | for i = 1:n
15 |     c(i,i) = A(i,i);
16 | end
17 | 
18 | % Compute modification parameters
19 | gamma = max(abs(diag(A)));
20 | xi = max(max(abs(setdiag(A,0))));
21 | delta = mu*max(gamma+xi,1);
22 | if n > 1
23 |     beta = sqrt(max([gamma xi/sqrt(n^2-1) mu]));
24 | else
25 |     beta = sqrt(max([gamma mu]));
26 | end
27 | 
28 | for j = 1:n
29 |     
30 |     % Find q that results in Best Permutation with j
31 |     [maxVal maxPos] = max(abs(diag(c(j:end,j:end))));
32 |     q = maxPos+j-1;
33 |     
34 |     % Permute d,c,l,a
35 |     d([j q]) = d([q j]);
36 |     perm([j q]) = perm([q j]);
37 |     c([j q],:) = c([q j],:);
38 |     c(:,[j q]) = c(:,[q j]);
39 |     l([j q],:) = l([q j],:);
40 |     l(:,[j q]) = l(:,[q j]);
41 |     A([j q],:) = A([q j],:);
42 |     A(:,[j q]) = A(:,[q j]);
43 |     
44 |     for s = 1:j-1
45 |         l(j,s) = c(j,s)/d(s);
46 |     end
47 |     for i = j+1:n
48 |         c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1));
49 |     end
50 |     theta = 0;
51 |     if j < n && j > 1
52 |         theta = max(abs(c(j+1:n,j)));
53 |     end
54 |     d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]);
55 |     if j < n
56 |         for i = j+1:n
57 |             c(i,i) = c(i,i) - (c(i,j)^2)/d(j);
58 |         end
59 |     end
60 | end


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/mcholC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/mcholC.mexmaci64


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/mcholC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/mcholC.mexw32


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/mcholC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/mcholC.mexw64


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/mcholinc.m:
--------------------------------------------------------------------------------
 1 | function [R,tau] = mcholinc(H,verbose)
 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd
 3 | 
 4 | p = size(H,1);
 5 | 
 6 | beta = norm(H,'fro');
 7 | if min(diag(H)) > 1e-12
 8 |     tau = 0;
 9 | else
10 |     if verbose
11 |         fprintf('Small Value on Diagonal, Adjusting Hessian\n');
12 |     end
13 |     tau = max(beta/2,1e-12);
14 | end
15 | while 1
16 |     [R,posDef] = chol(H+tau*eye(p));
17 |     if posDef == 0
18 |         break;
19 |     else
20 |         if verbose
21 |             fprintf('Cholesky Failed, Adjusting Hessian\n');
22 |         end
23 |         tau = max(2*tau,beta/2);
24 |     end
25 | end
26 | 


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/precondDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondDiag(r,D)
2 | y = D.*r;


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/precondTriu.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U)
2 | y = U \ (U' \ r);


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/precondTriuDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U,D)
2 | y = U \ (D .* (U' \ r));


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/rosenbrock.m:
--------------------------------------------------------------------------------
 1 | function [f, df, ddf, dddf] = rosenbrock(x);
 2 | 
 3 | % rosenbrock.m This function returns the function value, partial derivatives
 4 | % and Hessian of the (general dimension) rosenbrock function, given by:
 5 | %
 6 | %       f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 
 7 | %
 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1).
 9 | %
10 | % Carl Edward Rasmussen, 2001-07-21.
11 | 
12 | D = length(x);
13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2);
14 | 
15 | if nargout > 1
16 |   df = zeros(D, 1);
17 |   df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1));
18 |   df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2);
19 | end
20 | 
21 | if nargout > 2
22 |   ddf = zeros(D,D);
23 |   ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2);
24 |   ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1);
25 |   ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1);
26 | end
27 | 
28 | if nargout > 3
29 |     dddf = zeros(D,D,D);
30 |     for d = 1:D
31 |        if d > 1
32 |            dddf(d,d-1,d-1) = -400;
33 |        end
34 |        if d < D
35 |           dddf(d,d+1,d) = -400;
36 |           dddf(d,d,d+1) = -400;
37 |           dddf(d,d,d) = 2400*x(d);
38 |        end
39 |     end
40 | end


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/minFunc/taylorModel.m:
--------------------------------------------------------------------------------
 1 | function [f,g,H] = taylorModel(d,f,g,H,T)
 2 | 
 3 | p = length(d);
 4 | 
 5 | fd3 = 0;
 6 | gd2 = zeros(p,1);
 7 | Hd = zeros(p);
 8 | for t1 = 1:p
 9 |     for t2 = 1:p
10 |         for t3 = 1:p
11 |             fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3);
12 | 
13 |             if nargout > 1
14 |                 gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2);
15 |             end
16 | 
17 |             if nargout > 2
18 |                 Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1);
19 |             end
20 |         end
21 | 
22 |     end
23 | end
24 | 
25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3;
26 | 
27 | if nargout > 1
28 |     g = g + H*d + (1/2)*gd2;
29 | end
30 | 
31 | if nargout > 2
32 |     H = H + Hd;
33 | end
34 | 
35 | if any(abs(d) > 1e5)
36 |     % We want the optimizer to stop if the solution is unbounded
37 |     g = zeros(p,1);
38 | end


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/sampleIMAGES.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/sampleIMAGES.m


--------------------------------------------------------------------------------
/Exercise1 Sparse Autoencoder/weights.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/weights.jpg


--------------------------------------------------------------------------------
/Exercise10 Sparse Coding/IMAGES.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise10 Sparse Coding/IMAGES.mat


--------------------------------------------------------------------------------
/Exercise10 Sparse Coding/computeNumericalGradient.m:
--------------------------------------------------------------------------------
 1 | function numgrad = computeNumericalGradient(J, theta)
 2 | % numgrad = computeNumericalGradient(J, theta)
 3 | % theta: a vector of parameters
 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the
 5 | % function value at theta. 
 6 |   
 7 | % Initialize numgrad with zeros
 8 | numgrad = zeros(size(theta));
 9 | 
10 | %% ---------- YOUR CODE HERE --------------------------------------
11 | % Instructions: 
12 | % Implement numerical gradient checking, and return the result in numgrad.  
13 | % (See Section 2.3 of the lecture notes.)
14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 
15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta.  
16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 
17 | % respect to theta(i).
18 | %                
19 | % Hint: You will probably want to compute the elements of numgrad one at a
20 | % time. 
21 | epsilon = 10^(-4);
22 | n = size(theta, 1);
23 | % J1 = zeros(size(numgrad));
24 | % J2 = zeros(size(numgrad));
25 | J1 = zeros(1, 1);
26 | J2 = zeros(1, 1);
27 | grad = zeros(size(numgrad));
28 | temp1 = zeros(size(theta));
29 | temp2 = zeros(size(theta));
30 | 
31 | for i = 1 : n
32 | %     i
33 |     temp1 = theta;
34 |     temp2 = theta;
35 |     temp1(i) = temp1(i) + epsilon;
36 |     temp2(i) = temp2(i) - epsilon;
37 |     [J1, grad] = J(temp1);
38 |     [J2, grad] = J(temp2);
39 |     numgrad(i) = (J1 - J2) / (2*epsilon);
40 | end
41 | 
42 | 
43 | % theta1 = theta + epsilon;
44 | % theta2 = theta - epsilon;
45 | % [J1(i), grad] = J(temp1);
46 | % [J2(i), grad] = J(temp2);
47 | 
48 | 
49 | 
50 | 
51 | %% ---------------------------------------------------------------
52 | end
53 | 


--------------------------------------------------------------------------------
/Exercise10 Sparse Coding/sampleIMAGES.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise10 Sparse Coding/sampleIMAGES.m


--------------------------------------------------------------------------------
/Exercise10 Sparse Coding/sparseCodingFeatureCost.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise10 Sparse Coding/sparseCodingFeatureCost.m


--------------------------------------------------------------------------------
/Exercise10 Sparse Coding/sparseCodingWeightCost.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise10 Sparse Coding/sparseCodingWeightCost.m


--------------------------------------------------------------------------------
/Exercise2 Vectorization/computeNumericalGradient.asv:
--------------------------------------------------------------------------------
 1 | function numgrad = computeNumericalGradient(J, theta)
 2 | % numgrad = computeNumericalGradient(J, theta)
 3 | % theta: a vector of parameters
 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the
 5 | % function value at theta. 
 6 |   
 7 | % Initialize numgrad with zeros
 8 | numgrad = zeros(size(theta));
 9 | 
10 | %% ---------- YOUR CODE HERE --------------------------------------
11 | % Instructions: 
12 | % Implement numerical gradient checking, and return the result in numgrad.  
13 | % (See Section 2.3 of the lecture notes.)
14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 
15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta.  
16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 
17 | % respect to theta(i).
18 | %                
19 | % Hint: You will probably want to compute the elements of numgrad one at a
20 | % time. 
21 | epsilon = 10^(-4);
22 | J1, gra
23 | numgrad = 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | %% ---------------------------------------------------------------
31 | end
32 | 


--------------------------------------------------------------------------------
/Exercise2 Vectorization/computeNumericalGradient.m:
--------------------------------------------------------------------------------
 1 | function numgrad = computeNumericalGradient(J, theta)
 2 | % numgrad = computeNumericalGradient(J, theta)
 3 | % theta: a vector of parameters
 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the
 5 | % function value at theta. 
 6 |   
 7 | % Initialize numgrad with zeros
 8 | numgrad = zeros(size(theta));
 9 | 
10 | %% ---------- YOUR CODE HERE --------------------------------------
11 | % Instructions: 
12 | % Implement numerical gradient checking, and return the result in numgrad.  
13 | % (See Section 2.3 of the lecture notes.)
14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 
15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta.  
16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 
17 | % respect to theta(i).
18 | %                
19 | % Hint: You will probably want to compute the elements of numgrad one at a
20 | % time. 
21 | epsilon = 10^(-4);
22 | n = size(theta, 1);
23 | % J1 = zeros(size(numgrad));
24 | % J2 = zeros(size(numgrad));
25 | J1 = zeros(1, 1);
26 | J2 = zeros(1, 1);
27 | grad = zeros(size(numgrad));
28 | temp1 = zeros(size(theta));
29 | temp2 = zeros(size(theta));
30 | 
31 | for i = 1 : n
32 | %     i
33 |     temp1 = theta;
34 |     temp2 = theta;
35 |     temp1(i) = temp1(i) + epsilon;
36 |     temp2(i) = temp2(i) - epsilon;
37 |     [J1, grad] = J(temp1);
38 |     [J2, grad] = J(temp2);
39 |     numgrad(i) = (J1 - J2) / (2*epsilon);
40 | end
41 | 
42 | 
43 | % theta1 = theta + epsilon;
44 | % theta2 = theta - epsilon;
45 | % [J1(i), grad] = J(temp1);
46 | % [J2(i), grad] = J(temp2);
47 | 
48 | 
49 | 
50 | 
51 | %% ---------------------------------------------------------------
52 | end
53 | 


--------------------------------------------------------------------------------
/Exercise2 Vectorization/initializeParameters.m:
--------------------------------------------------------------------------------
 1 | function theta = initializeParameters(hiddenSize, visibleSize)
 2 | 
 3 | %% Initialize parameters randomly based on layer sizes.
 4 | r  = sqrt(6) / sqrt(hiddenSize+visibleSize+1);   % we'll choose weights uniformly from the interval [-r, r]
 5 | W1 = rand(hiddenSize, visibleSize) * 2 * r - r;
 6 | W2 = rand(visibleSize, hiddenSize) * 2 * r - r;
 7 | 
 8 | b1 = zeros(hiddenSize, 1);
 9 | b2 = zeros(visibleSize, 1);
10 | 
11 | % Convert weights and bias gradients to the vector form.
12 | % This step will "unroll" (flatten and concatenate together) all 
13 | % your parameters into a vector, which can then be used with minFunc. 
14 | theta = [W1(:) ; W2(:) ; b1(:) ; b2(:)];
15 | 
16 | end
17 | 
18 | 


--------------------------------------------------------------------------------
/Exercise2 Vectorization/loadMNISTImages.m:
--------------------------------------------------------------------------------
 1 | function images = loadMNISTImages(filename)
 2 | %loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing
 3 | %the raw MNIST images
 4 | 
 5 | fp = fopen(filename, 'rb');
 6 | assert(fp ~= -1, ['Could not open ', filename, '']);
 7 | 
 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be');
 9 | assert(magic == 2051, ['Bad magic number in ', filename, '']);
10 | 
11 | numImages = fread(fp, 1, 'int32', 0, 'ieee-be');
12 | numRows = fread(fp, 1, 'int32', 0, 'ieee-be');
13 | numCols = fread(fp, 1, 'int32', 0, 'ieee-be');
14 | 
15 | images = fread(fp, inf, 'unsigned char');
16 | images = reshape(images, numCols, numRows, numImages);
17 | images = permute(images,[2 1 3]);
18 | 
19 | fclose(fp);
20 | 
21 | % Reshape to #pixels x #examples
22 | images = reshape(images, size(images, 1) * size(images, 2), size(images, 3));
23 | % Convert to double and rescale to [0,1]
24 | images = double(images) / 255;
25 | 
26 | end
27 | 


--------------------------------------------------------------------------------
/Exercise2 Vectorization/loadMNISTLabels.m:
--------------------------------------------------------------------------------
 1 | function labels = loadMNISTLabels(filename)
 2 | %loadMNISTLabels returns a [number of MNIST images]x1 matrix containing
 3 | %the labels for the MNIST images
 4 | 
 5 | fp = fopen(filename, 'rb');
 6 | assert(fp ~= -1, ['Could not open ', filename, '']);
 7 | 
 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be');
 9 | assert(magic == 2049, ['Bad magic number in ', filename, '']);
10 | 
11 | numLabels = fread(fp, 1, 'int32', 0, 'ieee-be');
12 | 
13 | labels = fread(fp, inf, 'unsigned char');
14 | 
15 | assert(size(labels,1) == numLabels, 'Mismatch in label count');
16 | 
17 | fclose(fp);
18 | 
19 | end
20 | 


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/autoGrad.m:
--------------------------------------------------------------------------------
1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin)% [f,g] = autoGrad(x,useComplex,funObj,varargin)%% Numerically compute gradient of objective function from function valuesp = length(x);mu = 1e-150;if useComplex % Use Complex Differentials    diff = zeros(p,1);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(diff));    g = imag(diff)/mu;else % Use Finite Differencing    f = funObj(x,varargin{:});    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*e_j,varargin{:});    end    g = (diff-f)/mu;endif 0 % DEBUG CODE    [fReal gReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    pause;end


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/autoHess.m:
--------------------------------------------------------------------------------
1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin)% Numerically compute Hessian of objective function from gradient valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(diff),2);    H = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g] = funObj(x,varargin{:});    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f diff(:,j)] = funObj(x + mu*e_j,varargin{:});    end    H = (diff-repmat(g,[1 p]))/mu;end% Make sure H is symmetricH = (H+H')/2;if 0 % DEBUG CODE    [fReal gReal HReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    [HReal H]    pause;end


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/autoHv.m:
--------------------------------------------------------------------------------
 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin)
 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:})
 3 | %  based on gradient values
 4 | 
 5 | if useComplex
 6 |     mu = 1e-150i;
 7 | else
 8 |     mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v);
 9 | end
10 | [f,finDif] = funObj(x + v*mu,varargin{:});
11 | Hv = (finDif-g)/mu;


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/autoTensor.m:
--------------------------------------------------------------------------------
1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% Numerically compute Tensor of 3rd-derivatives of objective function from Hessian valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(g),2);    H = mean(real(diff),3);    T = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g,H] = funObj(x,varargin{:});    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:});    end    T = (diff-repmat(H,[1 1 p]))/mu;end


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/callOutput.m:
--------------------------------------------------------------------------------
 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin)
 2 | 
 3 | optimValues.iteration = i;
 4 | optimValues.funccount = funEvals;
 5 | optimValues.fval = f;
 6 | optimValues.stepsize = t;
 7 | optimValues.directionalderivative = gtd;
 8 | optimValues.gradient = g;
 9 | optimValues.searchdirection = d;
10 | optimValues.firstorderopt = opt;
11 | 
12 | feval(outputFcn, x,optimValues,state,varargin{:});


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/dampedUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | 
 3 | %B0 = eye(length(y))/Hdiag;
 4 | S = old_dirs(:,2:end);
 5 | Y = old_stps(:,2:end);
 6 | k = size(Y,2);
 7 | L = zeros(k);
 8 | for j = 1:k
 9 |     for i = j+1:k
10 |         L(i,j) = S(:,i)'*Y(:,j);
11 |     end
12 | end
13 | D = diag(diag(S'*Y));
14 | N = [S/Hdiag Y];
15 | M = [S'*S/Hdiag L;L' -D];
16 | 
17 | ys = y'*s;
18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s
19 | sBs = s'*Bs;
20 | 
21 | eta = .02;
22 | if ys < eta*sBs
23 |     if debug
24 |         fprintf('Damped Update\n');
25 |     end
26 |     theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1);
27 |     y = theta*y + (1-theta)*Bs;
28 | end
29 | 
30 | 
31 | numCorrections = size(old_dirs,2);
32 | if numCorrections < corrections
33 |     % Full Update
34 |     old_dirs(:,numCorrections+1) = s;
35 |     old_stps(:,numCorrections+1) = y;
36 | else
37 |     % Limited-Memory Update
38 |     old_dirs = [old_dirs(:,2:corrections) s];
39 |     old_stps = [old_stps(:,2:corrections) y];
40 | end
41 | 
42 | % Update scale of initial Hessian approximation
43 | Hdiag = (y'*s)/(y'*y);


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/example_minFunc_LR.m:
--------------------------------------------------------------------------------
 1 | clear all
 2 | 
 3 | nInst = 500;
 4 | nVars = 100;
 5 | X = [ones(nInst,1) randn(nInst,nVars-1)];
 6 | w = randn(nVars,1);
 7 | y = sign(X*w);
 8 | flipInd = rand(nInst,1) > .9;
 9 | y(flipInd) = -y(flipInd);
10 | 
11 | w_init = zeros(nVars,1);
12 | funObj = @(w)LogisticLoss(w,X,y);
13 | 
14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n');
15 | options.Method = 'newton0';
16 | minFunc(@LogisticLoss,w_init,options,X,y);
17 | pause;
18 | 
19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n');
20 | options.Method = 'pnewton0';
21 | options.precFunc = @LogisticDiagPrecond;
22 | minFunc(@LogisticLoss,w_init,options,X,y);
23 | pause;
24 | 
25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n');
26 | options.Method = 'pnewton0';
27 | options.precFunc = [];
28 | minFunc(@LogisticLoss,w_init,options,X,y);
29 | pause;
30 | 
31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n');
32 | options.Method = 'newton0';
33 | options.HvFunc = @LogisticHv;
34 | minFunc(@LogisticLoss,w_init,options,X,y);
35 | pause;
36 | 
37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n');
38 | options.Method = 'pnewton0';
39 | options.HvFunc = @LogisticHv;
40 | options.precFunc = @LogisticDiagPrecond;
41 | minFunc(@LogisticLoss,w_init,options,X,y);
42 | pause;
43 | 
44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n');
45 | options.Method = 'pnewton0';
46 | options.precFunc = [];
47 | options.HvFunc = @LogisticHv;
48 | minFunc(@LogisticLoss,w_init,options,X,y);
49 | pause;


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/isLegal.m:
--------------------------------------------------------------------------------
1 | function [legal] = isLegal(v)
2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0;


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/lbfgs.m:
--------------------------------------------------------------------------------
 1 | function [d] = lbfgs(g,s,y,Hdiag)
 2 | % BFGS Search Direction
 3 | %
 4 | % This function returns the (L-BFGS) approximate inverse Hessian,
 5 | % multiplied by the gradient
 6 | %
 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS
 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS
 9 | %
10 | % s - previous search directions (p by k)
11 | % y - previous step sizes (p by k)
12 | % g - gradient (p by 1)
13 | % Hdiag - value of initial Hessian diagonal elements (scalar)
14 | 
15 | [p,k] = size(s);
16 | 
17 | for i = 1:k
18 |     ro(i,1) = 1/(y(:,i)'*s(:,i));
19 | end
20 | 
21 | q = zeros(p,k+1);
22 | r = zeros(p,k+1);
23 | al =zeros(k,1);
24 | be =zeros(k,1);
25 | 
26 | q(:,k+1) = g;
27 | 
28 | for i = k:-1:1
29 |     al(i) = ro(i)*s(:,i)'*q(:,i+1);
30 |     q(:,i) = q(:,i+1)-al(i)*y(:,i);
31 | end
32 | 
33 | % Multiply by Initial Hessian
34 | r(:,1) = Hdiag*q(:,1);
35 | 
36 | for i = 1:k
37 |     be(i) = ro(i)*y(:,i)'*r(:,i);
38 |     r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i));
39 | end
40 | d=r(:,k+1);


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/lbfgsC.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/lbfgsC.mexa64


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/lbfgsC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/lbfgsC.mexglx


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/lbfgsC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/lbfgsC.mexmac


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/lbfgsC.mexmaci:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/lbfgsC.mexmaci


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/lbfgsC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/lbfgsC.mexmaci64


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/lbfgsC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/lbfgsC.mexw32


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/lbfgsC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/lbfgsC.mexw64


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/lbfgsUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | ys = y'*s;
 3 | if ys > 1e-10
 4 |     numCorrections = size(old_dirs,2);
 5 |     if numCorrections < corrections
 6 |         % Full Update
 7 |         old_dirs(:,numCorrections+1) = s;
 8 |         old_stps(:,numCorrections+1) = y;
 9 |     else
10 |         % Limited-Memory Update
11 |         old_dirs = [old_dirs(:,2:corrections) s];
12 |         old_stps = [old_stps(:,2:corrections) y];
13 |     end
14 | 
15 |     % Update scale of initial Hessian approximation
16 |     Hdiag = ys/(y'*y);
17 | else
18 |     if debug
19 |         fprintf('Skipping Update\n');
20 |     end
21 | end


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/logistic/LogisticDiagPrecond.m:
--------------------------------------------------------------------------------
 1 | function [m] = LogisticHv(v,w,X,y)
 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to
 3 | % w(feature,1)
 4 | % X(instance,feature)
 5 | % y(instance,1)
 6 | 
 7 | sig = 1./(1+exp(-y.*(X*w)));
 8 | 
 9 | % Compute diagonals of Hessian
10 | sig = sig.*(1-sig);
11 | for i = 1:length(w)
12 |    h(i,1) = (sig.*X(:,i))'*X(:,i);
13 | end
14 | 
15 | % Apply preconditioner
16 | m = v./h;
17 | 
18 | % Exact preconditioner
19 | %H = X'*diag(sig.*(1-sig))*X;
20 | %m = H\v;
21 | 


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/logistic/LogisticHv.m:
--------------------------------------------------------------------------------
1 | function [Hv] = LogisticHv(v,w,X,y)
2 | % v(feature,1) - vector that we will multiply Hessian by
3 | % w(feature,1)
4 | % X(instance,feature)
5 | % y(instance,1)
6 | 
7 | sig = 1./(1+exp(-y.*(X*w)));
8 | Hv = X.'*(sig.*(1-sig).*(X*v));
9 | 


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/logistic/LogisticLoss.m:
--------------------------------------------------------------------------------
 1 | function [nll,g,H,T] = LogisticLoss(w,X,y)
 2 | % w(feature,1)
 3 | % X(instance,feature)
 4 | % y(instance,1)
 5 | 
 6 | [n,p] = size(X);
 7 | 
 8 | Xw = X*w;
 9 | yXw = y.*Xw;
10 | 
11 | nll = sum(	([zeros(n,1) -yXw]));
12 | 
13 | if nargout > 1
14 |     if nargout > 2
15 |         sig = 1./(1+exp(-yXw));
16 |         g = -X.'*(y.*(1-sig));
17 |     else
18 |         g = -X.'*(y./(1+exp(yXw)));
19 |     end
20 | end
21 | 
22 | if nargout > 2
23 |     H = X.'*diag(sparse(sig.*(1-sig)))*X;
24 | end
25 | 
26 | if nargout > 3
27 |     T = zeros(p,p,p);
28 |     for j1 = 1:p
29 |         for j2 = 1:p
30 |             for j3 = 1:p
31 |                 T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig));
32 |             end
33 |         end
34 |     end
35 | end


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/logistic/mexutil.c:
--------------------------------------------------------------------------------
 1 | #include "mexutil.h"
 2 | 
 3 | /* Functions to create uninitialized arrays. */
 4 | 
 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
 6 |          mxClassID class, mxComplexity ComplexFlag)
 7 | {
 8 |   mxArray *a;
 9 |   int i, *dims1 = mxMalloc(ndim*sizeof(int));
10 |   size_t sz = 1;
11 |   for(i=0;i<ndim;i++) {
12 |     sz *= dims[i];
13 |     dims1[i] = 1;
14 |   }
15 |   a = mxCreateNumericArray(ndim,dims1,class,ComplexFlag);
16 |   sz *= mxGetElementSize(a);
17 |   mxSetDimensions(a, dims, ndim);
18 |   mxFree(dims1);
19 |   mxSetData(a, mxRealloc(mxGetData(a), sz));
20 |   if(ComplexFlag == mxCOMPLEX) {
21 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
22 |   }
23 |   return a;
24 | }
25 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
26 | 				mxComplexity ComplexFlag)
27 | {
28 |   size_t sz = m*n*sizeof(double);
29 |   mxArray *a = mxCreateNumericMatrix(1, 1, class, ComplexFlag);
30 |   mxSetM(a,m);
31 |   mxSetN(a,n);
32 |   mxSetPr(a, mxRealloc(mxGetPr(a),sz));
33 |   if(ComplexFlag == mxCOMPLEX) {
34 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
35 |   }
36 |   return a;
37 | }
38 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
39 | 			       mxComplexity ComplexFlag)
40 | {
41 |   return mxCreateNumericMatrixE(m,n,mxDOUBLE_CLASS,ComplexFlag);
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/logistic/mexutil.h:
--------------------------------------------------------------------------------
1 | #include "mex.h"
2 | 
3 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
4 | 			       mxClassID class, mxComplexity ComplexFlag);
5 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
6 | 				mxComplexity ComplexFlag);
7 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
8 | 			       mxComplexity ComplexFlag);
9 | 


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/logistic/mylogsumexp.m:
--------------------------------------------------------------------------------
1 | function lse = mylogsumexp(b)
2 | % does logsumexp across columns
3 | B = max(b,[],2);
4 | if issparse(b)
5 |     lse = log(sum(exp(b-repmat(B,[1 size(b,2)])),2))+B;
6 | else
7 |     lse = log(sum(exp(b-repmatC(B,[1 size(b,2)])),2))+B;
8 | end
9 | end


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/logistic/repmatC.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/logistic/repmatC.dll


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/logistic/repmatC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/logistic/repmatC.mexglx


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/logistic/repmatC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/logistic/repmatC.mexmac


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/mchol.m:
--------------------------------------------------------------------------------
 1 | function [l,d,perm] = mchol(A,mu)
 2 | % Compute a modified LDL factorization of A
 3 | % (MEX ME!)
 4 | 
 5 | if nargin < 2
 6 |     mu = 1e-12;
 7 | end
 8 | 
 9 | n = size(A,1);
10 | l = eye(n);
11 | d = zeros(n,1);
12 | perm = 1:n;
13 | 
14 | for i = 1:n
15 |     c(i,i) = A(i,i);
16 | end
17 | 
18 | % Compute modification parameters
19 | gamma = max(abs(diag(A)));
20 | xi = max(max(abs(setdiag(A,0))));
21 | delta = mu*max(gamma+xi,1);
22 | if n > 1
23 |     beta = sqrt(max([gamma xi/sqrt(n^2-1) mu]));
24 | else
25 |     beta = sqrt(max([gamma mu]));
26 | end
27 | 
28 | for j = 1:n
29 |     
30 |     % Find q that results in Best Permutation with j
31 |     [maxVal maxPos] = max(abs(diag(c(j:end,j:end))));
32 |     q = maxPos+j-1;
33 |     
34 |     % Permute d,c,l,a
35 |     d([j q]) = d([q j]);
36 |     perm([j q]) = perm([q j]);
37 |     c([j q],:) = c([q j],:);
38 |     c(:,[j q]) = c(:,[q j]);
39 |     l([j q],:) = l([q j],:);
40 |     l(:,[j q]) = l(:,[q j]);
41 |     A([j q],:) = A([q j],:);
42 |     A(:,[j q]) = A(:,[q j]);
43 |     
44 |     for s = 1:j-1
45 |         l(j,s) = c(j,s)/d(s);
46 |     end
47 |     for i = j+1:n
48 |         c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1));
49 |     end
50 |     theta = 0;
51 |     if j < n && j > 1
52 |         theta = max(abs(c(j+1:n,j)));
53 |     end
54 |     d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]);
55 |     if j < n
56 |         for i = j+1:n
57 |             c(i,i) = c(i,i) - (c(i,j)^2)/d(j);
58 |         end
59 |     end
60 | end


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/mcholC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/mcholC.mexmaci64


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/mcholC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/mcholC.mexw32


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/mcholC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/mcholC.mexw64


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/mcholinc.m:
--------------------------------------------------------------------------------
 1 | function [R,tau] = mcholinc(H,verbose)
 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd
 3 | 
 4 | p = size(H,1);
 5 | 
 6 | beta = norm(H,'fro');
 7 | if min(diag(H)) > 1e-12
 8 |     tau = 0;
 9 | else
10 |     if verbose
11 |         fprintf('Small Value on Diagonal, Adjusting Hessian\n');
12 |     end
13 |     tau = max(beta/2,1e-12);
14 | end
15 | while 1
16 |     [R,posDef] = chol(H+tau*eye(p));
17 |     if posDef == 0
18 |         break;
19 |     else
20 |         if verbose
21 |             fprintf('Cholesky Failed, Adjusting Hessian\n');
22 |         end
23 |         tau = max(2*tau,beta/2);
24 |     end
25 | end
26 | 


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/precondDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondDiag(r,D)
2 | y = D.*r;


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/precondTriu.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U)
2 | y = U \ (U' \ r);


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/precondTriuDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U,D)
2 | y = U \ (D .* (U' \ r));


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/rosenbrock.m:
--------------------------------------------------------------------------------
 1 | function [f, df, ddf, dddf] = rosenbrock(x);
 2 | 
 3 | % rosenbrock.m This function returns the function value, partial derivatives
 4 | % and Hessian of the (general dimension) rosenbrock function, given by:
 5 | %
 6 | %       f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 
 7 | %
 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1).
 9 | %
10 | % Carl Edward Rasmussen, 2001-07-21.
11 | 
12 | D = length(x);
13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2);
14 | 
15 | if nargout > 1
16 |   df = zeros(D, 1);
17 |   df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1));
18 |   df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2);
19 | end
20 | 
21 | if nargout > 2
22 |   ddf = zeros(D,D);
23 |   ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2);
24 |   ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1);
25 |   ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1);
26 | end
27 | 
28 | if nargout > 3
29 |     dddf = zeros(D,D,D);
30 |     for d = 1:D
31 |        if d > 1
32 |            dddf(d,d-1,d-1) = -400;
33 |        end
34 |        if d < D
35 |           dddf(d,d+1,d) = -400;
36 |           dddf(d,d,d+1) = -400;
37 |           dddf(d,d,d) = 2400*x(d);
38 |        end
39 |     end
40 | end


--------------------------------------------------------------------------------
/Exercise2 Vectorization/minFunc/taylorModel.m:
--------------------------------------------------------------------------------
 1 | function [f,g,H] = taylorModel(d,f,g,H,T)
 2 | 
 3 | p = length(d);
 4 | 
 5 | fd3 = 0;
 6 | gd2 = zeros(p,1);
 7 | Hd = zeros(p);
 8 | for t1 = 1:p
 9 |     for t2 = 1:p
10 |         for t3 = 1:p
11 |             fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3);
12 | 
13 |             if nargout > 1
14 |                 gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2);
15 |             end
16 | 
17 |             if nargout > 2
18 |                 Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1);
19 |             end
20 |         end
21 | 
22 |     end
23 | end
24 | 
25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3;
26 | 
27 | if nargout > 1
28 |     g = g + H*d + (1/2)*gd2;
29 | end
30 | 
31 | if nargout > 2
32 |     H = H + Hd;
33 | end
34 | 
35 | if any(abs(d) > 1e5)
36 |     % We want the optimizer to stop if the solution is unbounded
37 |     g = zeros(p,1);
38 | end


--------------------------------------------------------------------------------
/Exercise2 Vectorization/sampleIMAGES.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/sampleIMAGES.m


--------------------------------------------------------------------------------
/Exercise2 Vectorization/train.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/train.m


--------------------------------------------------------------------------------
/Exercise2 Vectorization/weights.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/weights.jpg


--------------------------------------------------------------------------------
/Exercise3 PCA in 2D/pcaData.txt:
--------------------------------------------------------------------------------
1 |   -6.7644914e-01  -6.3089308e-01  -4.8915202e-01  -4.8005424e-01  -3.7842021e-01  -3.3788391e-01  -3.2023528e-01  -3.1108837e-01  -2.3145555e-01  -1.9623727e-01  -1.5678926e-01  -1.4900779e-01  -1.0861557e-01  -1.0506308e-01  -8.0899829e-02  -7.1157518e-02  -6.3251073e-02  -2.6007219e-02  -2.2553443e-02  -5.8489047e-03  -4.3935323e-03  -1.7309716e-03   7.8223728e-03   7.5386969e-02   8.6608396e-02   9.6406046e-02   1.0331683e-01   1.0531131e-01   1.1493296e-01   1.3052813e-01   1.6626253e-01   1.7901863e-01   1.9267343e-01   1.9414427e-01   1.9770003e-01   2.3043613e-01   3.2715844e-01   3.2737163e-01   3.2922364e-01   3.4869293e-01   3.7500704e-01   4.2830153e-01   4.5432503e-01   5.4422436e-01   6.6539963e-01
2 |   -4.4722050e-01  -7.4778067e-01  -3.9074344e-01  -5.6036362e-01  -3.4291940e-01  -1.3832158e-01   1.2360939e-01  -3.3934986e-01  -8.2868433e-02  -2.4759514e-01  -1.0914760e-01   4.2243921e-01  -5.2329327e-02  -2.0126541e-01   1.3016657e-01   1.2293321e-01  -3.4787750e-01  -1.4584897e-01  -1.0559656e-01  -5.4200847e-02   1.6915422e-02  -1.1069762e-01   9.0859816e-02   1.5269096e-01  -9.4416463e-02   1.5116385e-01  -1.3540126e-01   2.4592698e-01   5.1087447e-02   2.4583340e-01  -5.9535372e-02   2.9704742e-01   1.0168115e-01   1.4258649e-01   1.0662592e-01   3.1698532e-01   6.1577841e-01   4.3911172e-01   2.7156501e-01   1.3572389e-01   3.1918066e-01   1.5122962e-01   3.4979047e-01   6.2316971e-01   5.2018811e-01
3 | 


--------------------------------------------------------------------------------
/Exercise3 PCA in 2D/pca_2d.asv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise3 PCA in 2D/pca_2d.asv


--------------------------------------------------------------------------------
/Exercise3 PCA in 2D/pca_2d.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise3 PCA in 2D/pca_2d.m


--------------------------------------------------------------------------------
/Exercise4 PCA and Whitening/pca_gen.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise4 PCA and Whitening/pca_gen.m


--------------------------------------------------------------------------------
/Exercise4 PCA and Whitening/sampleIMAGESRAW.m:
--------------------------------------------------------------------------------
 1 | function patches = sampleIMAGESRAW
 2 | 
 3 | % sampleIMAGESRAW
 4 | % Returns 10000 "raw" unwhitened  patches
 5 | 
 6 | 
 7 | load IMAGES_RAW;
 8 | IMAGES = IMAGESr;
 9 | 
10 | patchSize = 12;
11 | numPatches = 10000;
12 | 
13 | % Initialize patches with zeros.  Your code will fill in this matrix--one
14 | % column per patch, 10000 columns. 
15 | patches = zeros(patchSize*patchSize, numPatches);
16 | 
17 | p = 0;
18 | for im = 1:size(IMAGES, 3)
19 |     
20 |     % Sample Patches
21 |     numsamples = numPatches / size(IMAGES, 3);
22 |     for s = 1:numsamples
23 |         y = randi(size(IMAGES,1)-patchSize+1);
24 |         x = randi(size(IMAGES,2)-patchSize+1);
25 |         sample = IMAGES(y:y+patchSize-1, x:x+patchSize-1,im);
26 |         p = p + 1;
27 |         patches(:, p) = sample(:); 
28 |     end
29 |    
30 | end
31 | 
32 | end
33 | 


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/computeNumericalGradient.m:
--------------------------------------------------------------------------------
 1 | function numgrad = computeNumericalGradient(J, theta)
 2 | % numgrad = computeNumericalGradient(J, theta)
 3 | % theta: a vector of parameters
 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the
 5 | % function value at theta. 
 6 |   
 7 | % Initialize numgrad with zeros
 8 | numgrad = zeros(size(theta));
 9 | 
10 | %% ---------- YOUR CODE HERE --------------------------------------
11 | % Instructions: 
12 | % Implement numerical gradient checking, and return the result in numgrad.  
13 | % (See Section 2.3 of the lecture notes.)
14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 
15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta.  
16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 
17 | % respect to theta(i).
18 | %                
19 | % Hint: You will probably want to compute the elements of numgrad one at a
20 | % time. 
21 | epsilon = 10^(-4);
22 | n = size(theta, 1);
23 | % J1 = zeros(size(numgrad));
24 | % J2 = zeros(size(numgrad));
25 | J1 = zeros(1, 1);
26 | J2 = zeros(1, 1);
27 | grad = zeros(size(numgrad));
28 | temp1 = zeros(size(theta));
29 | temp2 = zeros(size(theta));
30 | 
31 | for i = 1 : n
32 | %     i
33 |     temp1 = theta;
34 |     temp2 = theta;
35 |     temp1(i) = temp1(i) + epsilon;
36 |     temp2(i) = temp2(i) - epsilon;
37 |     [J1, grad] = J(temp1);
38 |     [J2, grad] = J(temp2);
39 |     numgrad(i) = (J1 - J2) / (2*epsilon);
40 | end
41 | 
42 | 
43 | % theta1 = theta + epsilon;
44 | % theta2 = theta - epsilon;
45 | % [J1(i), grad] = J(temp1);
46 | % [J2(i), grad] = J(temp2);
47 | 
48 | 
49 | 
50 | 
51 | %% ---------------------------------------------------------------
52 | end
53 | 


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/loadMNISTImages.m:
--------------------------------------------------------------------------------
 1 | function images = loadMNISTImages(filename)
 2 | %loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing
 3 | %the raw MNIST images
 4 | 
 5 | fp = fopen(filename, 'rb');
 6 | assert(fp ~= -1, ['Could not open ', filename, '']);
 7 | 
 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be');
 9 | assert(magic == 2051, ['Bad magic number in ', filename, '']);
10 | 
11 | numImages = fread(fp, 1, 'int32', 0, 'ieee-be');
12 | numRows = fread(fp, 1, 'int32', 0, 'ieee-be');
13 | numCols = fread(fp, 1, 'int32', 0, 'ieee-be');
14 | 
15 | images = fread(fp, inf, 'unsigned char');
16 | images = reshape(images, numCols, numRows, numImages);
17 | images = permute(images,[2 1 3]);
18 | 
19 | fclose(fp);
20 | 
21 | % Reshape to #pixels x #examples
22 | images = reshape(images, size(images, 1) * size(images, 2), size(images, 3));
23 | % Convert to double and rescale to [0,1]
24 | images = double(images) / 255;
25 | 
26 | end
27 | 


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/loadMNISTLabels.m:
--------------------------------------------------------------------------------
 1 | function labels = loadMNISTLabels(filename)
 2 | %loadMNISTLabels returns a [number of MNIST images]x1 matrix containing
 3 | %the labels for the MNIST images
 4 | 
 5 | fp = fopen(filename, 'rb');
 6 | assert(fp ~= -1, ['Could not open ', filename, '']);
 7 | 
 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be');
 9 | assert(magic == 2049, ['Bad magic number in ', filename, '']);
10 | 
11 | numLabels = fread(fp, 1, 'int32', 0, 'ieee-be');
12 | 
13 | labels = fread(fp, inf, 'unsigned char');
14 | 
15 | assert(size(labels,1) == numLabels, 'Mismatch in label count');
16 | 
17 | fclose(fp);
18 | 
19 | end
20 | 


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/autoGrad.m:
--------------------------------------------------------------------------------
1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin)% [f,g] = autoGrad(x,useComplex,funObj,varargin)%% Numerically compute gradient of objective function from function valuesp = length(x);mu = 1e-150;if useComplex % Use Complex Differentials    diff = zeros(p,1);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(diff));    g = imag(diff)/mu;else % Use Finite Differencing    f = funObj(x,varargin{:});    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*e_j,varargin{:});    end    g = (diff-f)/mu;endif 0 % DEBUG CODE    [fReal gReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    pause;end


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/autoHess.m:
--------------------------------------------------------------------------------
1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin)% Numerically compute Hessian of objective function from gradient valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(diff),2);    H = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g] = funObj(x,varargin{:});    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f diff(:,j)] = funObj(x + mu*e_j,varargin{:});    end    H = (diff-repmat(g,[1 p]))/mu;end% Make sure H is symmetricH = (H+H')/2;if 0 % DEBUG CODE    [fReal gReal HReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    [HReal H]    pause;end


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/autoHv.m:
--------------------------------------------------------------------------------
 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin)
 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:})
 3 | %  based on gradient values
 4 | 
 5 | if useComplex
 6 |     mu = 1e-150i;
 7 | else
 8 |     mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v);
 9 | end
10 | [f,finDif] = funObj(x + v*mu,varargin{:});
11 | Hv = (finDif-g)/mu;


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/autoTensor.m:
--------------------------------------------------------------------------------
1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% Numerically compute Tensor of 3rd-derivatives of objective function from Hessian valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(g),2);    H = mean(real(diff),3);    T = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g,H] = funObj(x,varargin{:});    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:});    end    T = (diff-repmat(H,[1 1 p]))/mu;end


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/callOutput.m:
--------------------------------------------------------------------------------
 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin)
 2 | 
 3 | optimValues.iteration = i;
 4 | optimValues.funccount = funEvals;
 5 | optimValues.fval = f;
 6 | optimValues.stepsize = t;
 7 | optimValues.directionalderivative = gtd;
 8 | optimValues.gradient = g;
 9 | optimValues.searchdirection = d;
10 | optimValues.firstorderopt = opt;
11 | 
12 | feval(outputFcn, x,optimValues,state,varargin{:});


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/dampedUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | 
 3 | %B0 = eye(length(y))/Hdiag;
 4 | S = old_dirs(:,2:end);
 5 | Y = old_stps(:,2:end);
 6 | k = size(Y,2);
 7 | L = zeros(k);
 8 | for j = 1:k
 9 |     for i = j+1:k
10 |         L(i,j) = S(:,i)'*Y(:,j);
11 |     end
12 | end
13 | D = diag(diag(S'*Y));
14 | N = [S/Hdiag Y];
15 | M = [S'*S/Hdiag L;L' -D];
16 | 
17 | ys = y'*s;
18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s
19 | sBs = s'*Bs;
20 | 
21 | eta = .02;
22 | if ys < eta*sBs
23 |     if debug
24 |         fprintf('Damped Update\n');
25 |     end
26 |     theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1);
27 |     y = theta*y + (1-theta)*Bs;
28 | end
29 | 
30 | 
31 | numCorrections = size(old_dirs,2);
32 | if numCorrections < corrections
33 |     % Full Update
34 |     old_dirs(:,numCorrections+1) = s;
35 |     old_stps(:,numCorrections+1) = y;
36 | else
37 |     % Limited-Memory Update
38 |     old_dirs = [old_dirs(:,2:corrections) s];
39 |     old_stps = [old_stps(:,2:corrections) y];
40 | end
41 | 
42 | % Update scale of initial Hessian approximation
43 | Hdiag = (y'*s)/(y'*y);


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/example_minFunc_LR.m:
--------------------------------------------------------------------------------
 1 | clear all
 2 | 
 3 | nInst = 500;
 4 | nVars = 100;
 5 | X = [ones(nInst,1) randn(nInst,nVars-1)];
 6 | w = randn(nVars,1);
 7 | y = sign(X*w);
 8 | flipInd = rand(nInst,1) > .9;
 9 | y(flipInd) = -y(flipInd);
10 | 
11 | w_init = zeros(nVars,1);
12 | funObj = @(w)LogisticLoss(w,X,y);
13 | 
14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n');
15 | options.Method = 'newton0';
16 | minFunc(@LogisticLoss,w_init,options,X,y);
17 | pause;
18 | 
19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n');
20 | options.Method = 'pnewton0';
21 | options.precFunc = @LogisticDiagPrecond;
22 | minFunc(@LogisticLoss,w_init,options,X,y);
23 | pause;
24 | 
25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n');
26 | options.Method = 'pnewton0';
27 | options.precFunc = [];
28 | minFunc(@LogisticLoss,w_init,options,X,y);
29 | pause;
30 | 
31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n');
32 | options.Method = 'newton0';
33 | options.HvFunc = @LogisticHv;
34 | minFunc(@LogisticLoss,w_init,options,X,y);
35 | pause;
36 | 
37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n');
38 | options.Method = 'pnewton0';
39 | options.HvFunc = @LogisticHv;
40 | options.precFunc = @LogisticDiagPrecond;
41 | minFunc(@LogisticLoss,w_init,options,X,y);
42 | pause;
43 | 
44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n');
45 | options.Method = 'pnewton0';
46 | options.precFunc = [];
47 | options.HvFunc = @LogisticHv;
48 | minFunc(@LogisticLoss,w_init,options,X,y);
49 | pause;


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/isLegal.m:
--------------------------------------------------------------------------------
1 | function [legal] = isLegal(v)
2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0;


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/lbfgs.m:
--------------------------------------------------------------------------------
 1 | function [d] = lbfgs(g,s,y,Hdiag)
 2 | % BFGS Search Direction
 3 | %
 4 | % This function returns the (L-BFGS) approximate inverse Hessian,
 5 | % multiplied by the gradient
 6 | %
 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS
 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS
 9 | %
10 | % s - previous search directions (p by k)
11 | % y - previous step sizes (p by k)
12 | % g - gradient (p by 1)
13 | % Hdiag - value of initial Hessian diagonal elements (scalar)
14 | 
15 | [p,k] = size(s);
16 | 
17 | for i = 1:k
18 |     ro(i,1) = 1/(y(:,i)'*s(:,i));
19 | end
20 | 
21 | q = zeros(p,k+1);
22 | r = zeros(p,k+1);
23 | al =zeros(k,1);
24 | be =zeros(k,1);
25 | 
26 | q(:,k+1) = g;
27 | 
28 | for i = k:-1:1
29 |     al(i) = ro(i)*s(:,i)'*q(:,i+1);
30 |     q(:,i) = q(:,i+1)-al(i)*y(:,i);
31 | end
32 | 
33 | % Multiply by Initial Hessian
34 | r(:,1) = Hdiag*q(:,1);
35 | 
36 | for i = 1:k
37 |     be(i) = ro(i)*y(:,i)'*r(:,i);
38 |     r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i));
39 | end
40 | d=r(:,k+1);


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/lbfgsC.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/lbfgsC.mexa64


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/lbfgsC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/lbfgsC.mexglx


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/lbfgsC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/lbfgsC.mexmac


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/lbfgsC.mexmaci:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/lbfgsC.mexmaci


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/lbfgsC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/lbfgsC.mexmaci64


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/lbfgsC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/lbfgsC.mexw32


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/lbfgsC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/lbfgsC.mexw64


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/lbfgsUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | ys = y'*s;
 3 | if ys > 1e-10
 4 |     numCorrections = size(old_dirs,2);
 5 |     if numCorrections < corrections
 6 |         % Full Update
 7 |         old_dirs(:,numCorrections+1) = s;
 8 |         old_stps(:,numCorrections+1) = y;
 9 |     else
10 |         % Limited-Memory Update
11 |         old_dirs = [old_dirs(:,2:corrections) s];
12 |         old_stps = [old_stps(:,2:corrections) y];
13 |     end
14 | 
15 |     % Update scale of initial Hessian approximation
16 |     Hdiag = ys/(y'*y);
17 | else
18 |     if debug
19 |         fprintf('Skipping Update\n');
20 |     end
21 | end


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/logistic/LogisticDiagPrecond.m:
--------------------------------------------------------------------------------
 1 | function [m] = LogisticHv(v,w,X,y)
 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to
 3 | % w(feature,1)
 4 | % X(instance,feature)
 5 | % y(instance,1)
 6 | 
 7 | sig = 1./(1+exp(-y.*(X*w)));
 8 | 
 9 | % Compute diagonals of Hessian
10 | sig = sig.*(1-sig);
11 | for i = 1:length(w)
12 |    h(i,1) = (sig.*X(:,i))'*X(:,i);
13 | end
14 | 
15 | % Apply preconditioner
16 | m = v./h;
17 | 
18 | % Exact preconditioner
19 | %H = X'*diag(sig.*(1-sig))*X;
20 | %m = H\v;
21 | 


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/logistic/LogisticHv.m:
--------------------------------------------------------------------------------
1 | function [Hv] = LogisticHv(v,w,X,y)
2 | % v(feature,1) - vector that we will multiply Hessian by
3 | % w(feature,1)
4 | % X(instance,feature)
5 | % y(instance,1)
6 | 
7 | sig = 1./(1+exp(-y.*(X*w)));
8 | Hv = X.'*(sig.*(1-sig).*(X*v));
9 | 


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/logistic/LogisticLoss.m:
--------------------------------------------------------------------------------
 1 | function [nll,g,H,T] = LogisticLoss(w,X,y)
 2 | % w(feature,1)
 3 | % X(instance,feature)
 4 | % y(instance,1)
 5 | 
 6 | [n,p] = size(X);
 7 | 
 8 | Xw = X*w;
 9 | yXw = y.*Xw;
10 | 
11 | nll = sum(	([zeros(n,1) -yXw]));
12 | 
13 | if nargout > 1
14 |     if nargout > 2
15 |         sig = 1./(1+exp(-yXw));
16 |         g = -X.'*(y.*(1-sig));
17 |     else
18 |         g = -X.'*(y./(1+exp(yXw)));
19 |     end
20 | end
21 | 
22 | if nargout > 2
23 |     H = X.'*diag(sparse(sig.*(1-sig)))*X;
24 | end
25 | 
26 | if nargout > 3
27 |     T = zeros(p,p,p);
28 |     for j1 = 1:p
29 |         for j2 = 1:p
30 |             for j3 = 1:p
31 |                 T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig));
32 |             end
33 |         end
34 |     end
35 | end


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/logistic/mexutil.c:
--------------------------------------------------------------------------------
 1 | #include "mexutil.h"
 2 | 
 3 | /* Functions to create uninitialized arrays. */
 4 | 
 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
 6 |          mxClassID class, mxComplexity ComplexFlag)
 7 | {
 8 |   mxArray *a;
 9 |   int i, *dims1 = mxMalloc(ndim*sizeof(int));
10 |   size_t sz = 1;
11 |   for(i=0;i<ndim;i++) {
12 |     sz *= dims[i];
13 |     dims1[i] = 1;
14 |   }
15 |   a = mxCreateNumericArray(ndim,dims1,class,ComplexFlag);
16 |   sz *= mxGetElementSize(a);
17 |   mxSetDimensions(a, dims, ndim);
18 |   mxFree(dims1);
19 |   mxSetData(a, mxRealloc(mxGetData(a), sz));
20 |   if(ComplexFlag == mxCOMPLEX) {
21 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
22 |   }
23 |   return a;
24 | }
25 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
26 | 				mxComplexity ComplexFlag)
27 | {
28 |   size_t sz = m*n*sizeof(double);
29 |   mxArray *a = mxCreateNumericMatrix(1, 1, class, ComplexFlag);
30 |   mxSetM(a,m);
31 |   mxSetN(a,n);
32 |   mxSetPr(a, mxRealloc(mxGetPr(a),sz));
33 |   if(ComplexFlag == mxCOMPLEX) {
34 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
35 |   }
36 |   return a;
37 | }
38 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
39 | 			       mxComplexity ComplexFlag)
40 | {
41 |   return mxCreateNumericMatrixE(m,n,mxDOUBLE_CLASS,ComplexFlag);
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/logistic/mexutil.h:
--------------------------------------------------------------------------------
1 | #include "mex.h"
2 | 
3 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
4 | 			       mxClassID class, mxComplexity ComplexFlag);
5 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
6 | 				mxComplexity ComplexFlag);
7 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
8 | 			       mxComplexity ComplexFlag);
9 | 


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/logistic/mylogsumexp.m:
--------------------------------------------------------------------------------
1 | function lse = mylogsumexp(b)
2 | % does logsumexp across columns
3 | B = max(b,[],2);
4 | if issparse(b)
5 |     lse = log(sum(exp(b-repmat(B,[1 size(b,2)])),2))+B;
6 | else
7 |     lse = log(sum(exp(b-repmatC(B,[1 size(b,2)])),2))+B;
8 | end
9 | end


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/logistic/repmatC.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/logistic/repmatC.dll


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/logistic/repmatC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/logistic/repmatC.mexglx


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/logistic/repmatC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/logistic/repmatC.mexmac


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/mchol.m:
--------------------------------------------------------------------------------
 1 | function [l,d,perm] = mchol(A,mu)
 2 | % Compute a modified LDL factorization of A
 3 | % (MEX ME!)
 4 | 
 5 | if nargin < 2
 6 |     mu = 1e-12;
 7 | end
 8 | 
 9 | n = size(A,1);
10 | l = eye(n);
11 | d = zeros(n,1);
12 | perm = 1:n;
13 | 
14 | for i = 1:n
15 |     c(i,i) = A(i,i);
16 | end
17 | 
18 | % Compute modification parameters
19 | gamma = max(abs(diag(A)));
20 | xi = max(max(abs(setdiag(A,0))));
21 | delta = mu*max(gamma+xi,1);
22 | if n > 1
23 |     beta = sqrt(max([gamma xi/sqrt(n^2-1) mu]));
24 | else
25 |     beta = sqrt(max([gamma mu]));
26 | end
27 | 
28 | for j = 1:n
29 |     
30 |     % Find q that results in Best Permutation with j
31 |     [maxVal maxPos] = max(abs(diag(c(j:end,j:end))));
32 |     q = maxPos+j-1;
33 |     
34 |     % Permute d,c,l,a
35 |     d([j q]) = d([q j]);
36 |     perm([j q]) = perm([q j]);
37 |     c([j q],:) = c([q j],:);
38 |     c(:,[j q]) = c(:,[q j]);
39 |     l([j q],:) = l([q j],:);
40 |     l(:,[j q]) = l(:,[q j]);
41 |     A([j q],:) = A([q j],:);
42 |     A(:,[j q]) = A(:,[q j]);
43 |     
44 |     for s = 1:j-1
45 |         l(j,s) = c(j,s)/d(s);
46 |     end
47 |     for i = j+1:n
48 |         c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1));
49 |     end
50 |     theta = 0;
51 |     if j < n && j > 1
52 |         theta = max(abs(c(j+1:n,j)));
53 |     end
54 |     d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]);
55 |     if j < n
56 |         for i = j+1:n
57 |             c(i,i) = c(i,i) - (c(i,j)^2)/d(j);
58 |         end
59 |     end
60 | end


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/mcholC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/mcholC.mexmaci64


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/mcholC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/mcholC.mexw32


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/mcholC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/mcholC.mexw64


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/mcholinc.m:
--------------------------------------------------------------------------------
 1 | function [R,tau] = mcholinc(H,verbose)
 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd
 3 | 
 4 | p = size(H,1);
 5 | 
 6 | beta = norm(H,'fro');
 7 | if min(diag(H)) > 1e-12
 8 |     tau = 0;
 9 | else
10 |     if verbose
11 |         fprintf('Small Value on Diagonal, Adjusting Hessian\n');
12 |     end
13 |     tau = max(beta/2,1e-12);
14 | end
15 | while 1
16 |     [R,posDef] = chol(H+tau*eye(p));
17 |     if posDef == 0
18 |         break;
19 |     else
20 |         if verbose
21 |             fprintf('Cholesky Failed, Adjusting Hessian\n');
22 |         end
23 |         tau = max(2*tau,beta/2);
24 |     end
25 | end
26 | 


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/precondDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondDiag(r,D)
2 | y = D.*r;


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/precondTriu.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U)
2 | y = U \ (U' \ r);


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/precondTriuDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U,D)
2 | y = U \ (D .* (U' \ r));


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/rosenbrock.m:
--------------------------------------------------------------------------------
 1 | function [f, df, ddf, dddf] = rosenbrock(x);
 2 | 
 3 | % rosenbrock.m This function returns the function value, partial derivatives
 4 | % and Hessian of the (general dimension) rosenbrock function, given by:
 5 | %
 6 | %       f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 
 7 | %
 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1).
 9 | %
10 | % Carl Edward Rasmussen, 2001-07-21.
11 | 
12 | D = length(x);
13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2);
14 | 
15 | if nargout > 1
16 |   df = zeros(D, 1);
17 |   df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1));
18 |   df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2);
19 | end
20 | 
21 | if nargout > 2
22 |   ddf = zeros(D,D);
23 |   ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2);
24 |   ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1);
25 |   ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1);
26 | end
27 | 
28 | if nargout > 3
29 |     dddf = zeros(D,D,D);
30 |     for d = 1:D
31 |        if d > 1
32 |            dddf(d,d-1,d-1) = -400;
33 |        end
34 |        if d < D
35 |           dddf(d,d+1,d) = -400;
36 |           dddf(d,d,d+1) = -400;
37 |           dddf(d,d,d) = 2400*x(d);
38 |        end
39 |     end
40 | end


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/minFunc/taylorModel.m:
--------------------------------------------------------------------------------
 1 | function [f,g,H] = taylorModel(d,f,g,H,T)
 2 | 
 3 | p = length(d);
 4 | 
 5 | fd3 = 0;
 6 | gd2 = zeros(p,1);
 7 | Hd = zeros(p);
 8 | for t1 = 1:p
 9 |     for t2 = 1:p
10 |         for t3 = 1:p
11 |             fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3);
12 | 
13 |             if nargout > 1
14 |                 gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2);
15 |             end
16 | 
17 |             if nargout > 2
18 |                 Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1);
19 |             end
20 |         end
21 | 
22 |     end
23 | end
24 | 
25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3;
26 | 
27 | if nargout > 1
28 |     g = g + H*d + (1/2)*gd2;
29 | end
30 | 
31 | if nargout > 2
32 |     H = H + Hd;
33 | end
34 | 
35 | if any(abs(d) > 1e5)
36 |     % We want the optimizer to stop if the solution is unbounded
37 |     g = zeros(p,1);
38 | end


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/softmaxCost.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/softmaxCost.m


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/softmaxPredict.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/softmaxPredict.m


--------------------------------------------------------------------------------
/Exercise5 Softmax Regression/softmaxTrain.m:
--------------------------------------------------------------------------------
 1 | function [softmaxModel] = softmaxTrain(inputSize, numClasses, lambda, inputData, labels, options)
 2 | %softmaxTrain Train a softmax model with the given parameters on the given
 3 | % data. Returns softmaxOptTheta, a vector containing the trained parameters
 4 | % for the model.
 5 | %
 6 | % inputSize: the size of an input vector x^(i)
 7 | % numClasses: the number of classes 
 8 | % lambda: weight decay parameter
 9 | % inputData: an N by M matrix containing the input data, such that
10 | %            inputData(:, c) is the cth input
11 | % labels: M by 1 matrix containing the class labels for the
12 | %            corresponding inputs. labels(c) is the class label for
13 | %            the cth input
14 | % options (optional): options
15 | %   options.maxIter: number of iterations to train for
16 | 
17 | if ~exist('options', 'var')
18 |     options = struct;
19 | end
20 | 
21 | if ~isfield(options, 'maxIter')
22 |     options.maxIter = 400;
23 | end
24 | 
25 | % initialize parameters
26 | theta = 0.005 * randn(numClasses * inputSize, 1);
27 | 
28 | % Use minFunc to minimize the function
29 | addpath minFunc/
30 | options.Method = 'lbfgs'; % Here, we use L-BFGS to optimize our cost
31 |                           % function. Generally, for minFunc to work, you
32 |                           % need a function pointer with two outputs: the
33 |                           % function value and the gradient. In our problem,
34 |                           % softmaxCost.m satisfies this.
35 | minFuncOptions.display = 'on';
36 | 
37 | [softmaxOptTheta, cost] = minFunc( @(p) softmaxCost(p, ...
38 |                                    numClasses, inputSize, lambda, ...
39 |                                    inputData, labels), ...                                   
40 |                               theta, options);
41 | 
42 | % Fold softmaxOptTheta into a nicer format
43 | softmaxModel.optTheta = reshape(softmaxOptTheta, numClasses, inputSize);
44 | softmaxModel.inputSize = inputSize;
45 | softmaxModel.numClasses = numClasses;
46 |                           
47 | end                          
48 | 


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/computeNumericalGradient.m:
--------------------------------------------------------------------------------
 1 | function numgrad = computeNumericalGradient(J, theta)
 2 | % numgrad = computeNumericalGradient(J, theta)
 3 | % theta: a vector of parameters
 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the
 5 | % function value at theta. 
 6 |   
 7 | % Initialize numgrad with zeros
 8 | numgrad = zeros(size(theta));
 9 | 
10 | %% ---------- YOUR CODE HERE --------------------------------------
11 | % Instructions: 
12 | % Implement numerical gradient checking, and return the result in numgrad.  
13 | % (See Section 2.3 of the lecture notes.)
14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 
15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta.  
16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 
17 | % respect to theta(i).
18 | %                
19 | % Hint: You will probably want to compute the elements of numgrad one at a
20 | % time. 
21 | epsilon = 10^(-4);
22 | n = size(theta, 1);
23 | % J1 = zeros(size(numgrad));
24 | % J2 = zeros(size(numgrad));
25 | J1 = zeros(1, 1);
26 | J2 = zeros(1, 1);
27 | grad = zeros(size(numgrad));
28 | temp1 = zeros(size(theta));
29 | temp2 = zeros(size(theta));
30 | 
31 | for i = 1 : n
32 |     temp1 = theta;
33 |     temp2 = theta;
34 |     temp1(i) = temp1(i) + epsilon;
35 |     temp2(i) = temp2(i) - epsilon;
36 |     [J1, grad] = J(temp1);
37 |     [J2, grad] = J(temp2);
38 |     numgrad(i) = (J1 - J2) / (2*epsilon);
39 | end
40 | 
41 | 
42 | % theta1 = theta + epsilon;
43 | % theta2 = theta - epsilon;
44 | % [J1(i), grad] = J(temp1);
45 | % [J2(i), grad] = J(temp2);
46 | 
47 | 
48 | 
49 | 
50 | %% ---------------------------------------------------------------
51 | end
52 | 


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/feedForwardAutoencoder.m:
--------------------------------------------------------------------------------
 1 | function [activation] = feedForwardAutoencoder(theta, hiddenSize, visibleSize, data)
 2 | 
 3 | % theta: trained weights from the autoencoder
 4 | % visibleSize: the number of input units (probably 64) 
 5 | % hiddenSize: the number of hidden units (probably 25) 
 6 | % data: Our matrix containing the training data as columns.  So, data(:,i) is the i-th training example. 
 7 |   
 8 | % We first convert theta to the (W1, W2, b1, b2) matrix/vector format, so that this 
 9 | % follows the notation convention of the lecture notes. 
10 | 
11 | W1 = reshape(theta(1:hiddenSize*visibleSize), hiddenSize, visibleSize);
12 | b1 = theta(2*hiddenSize*visibleSize+1:2*hiddenSize*visibleSize+hiddenSize);
13 | 
14 | %% ---------- YOUR CODE HERE --------------------------------------
15 | %  Instructions: Compute the activation of the hidden layer for the Sparse Autoencoder.
16 | m = size(data, 2);
17 | z2 = W1 * data + repmat(b1,1,m);
18 | a2 = sigmoid(z2);
19 | activation = a2;
20 | %-------------------------------------------------------------------
21 | 
22 | end
23 | 
24 | %-------------------------------------------------------------------
25 | % Here's an implementation of the sigmoid function, which you may find useful
26 | % in your computation of the costs and the gradients.  This inputs a (row or
27 | % column) vector (say (z1, z2, z3)) and returns (f(z1), f(z2), f(z3)). 
28 | 
29 | function sigm = sigmoid(x)
30 |     sigm = 1 ./ (1 + exp(-x));
31 | end
32 | 


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/initializeParameters.m:
--------------------------------------------------------------------------------
 1 | function theta = initializeParameters(hiddenSize, visibleSize)
 2 | 
 3 | %% Initialize parameters randomly based on layer sizes.
 4 | r  = sqrt(6) / sqrt(hiddenSize+visibleSize+1);   % we'll choose weights uniformly from the interval [-r, r]
 5 | W1 = rand(hiddenSize, visibleSize) * 2 * r - r;
 6 | W2 = rand(visibleSize, hiddenSize) * 2 * r - r;
 7 | 
 8 | b1 = zeros(hiddenSize, 1);
 9 | b2 = zeros(visibleSize, 1);
10 | 
11 | % Convert weights and bias gradients to the vector form.
12 | % This step will "unroll" (flatten and concatenate together) all 
13 | % your parameters into a vector, which can then be used with minFunc. 
14 | theta = [W1(:) ; W2(:) ; b1(:) ; b2(:)];
15 | 
16 | end
17 | 
18 | 


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/loadMNISTImages.m:
--------------------------------------------------------------------------------
 1 | function images = loadMNISTImages(filename)
 2 | %loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing
 3 | %the raw MNIST images
 4 | 
 5 | fp = fopen(filename, 'rb');
 6 | assert(fp ~= -1, ['Could not open ', filename, '']);
 7 | 
 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be');
 9 | assert(magic == 2051, ['Bad magic number in ', filename, '']);
10 | 
11 | numImages = fread(fp, 1, 'int32', 0, 'ieee-be');
12 | numRows = fread(fp, 1, 'int32', 0, 'ieee-be');
13 | numCols = fread(fp, 1, 'int32', 0, 'ieee-be');
14 | 
15 | images = fread(fp, inf, 'unsigned char');
16 | images = reshape(images, numCols, numRows, numImages);
17 | images = permute(images,[2 1 3]);
18 | 
19 | fclose(fp);
20 | 
21 | % Reshape to #pixels x #examples
22 | images = reshape(images, size(images, 1) * size(images, 2), size(images, 3));
23 | % Convert to double and rescale to [0,1]
24 | images = double(images) / 255;
25 | 
26 | end
27 | 


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/loadMNISTLabels.m:
--------------------------------------------------------------------------------
 1 | function labels = loadMNISTLabels(filename)
 2 | %loadMNISTLabels returns a [number of MNIST images]x1 matrix containing
 3 | %the labels for the MNIST images
 4 | 
 5 | fp = fopen(filename, 'rb');
 6 | assert(fp ~= -1, ['Could not open ', filename, '']);
 7 | 
 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be');
 9 | assert(magic == 2049, ['Bad magic number in ', filename, '']);
10 | 
11 | numLabels = fread(fp, 1, 'int32', 0, 'ieee-be');
12 | 
13 | labels = fread(fp, inf, 'unsigned char');
14 | 
15 | assert(size(labels,1) == numLabels, 'Mismatch in label count');
16 | 
17 | fclose(fp);
18 | 
19 | end
20 | 


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/autoGrad.m:
--------------------------------------------------------------------------------
1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin)% [f,g] = autoGrad(x,useComplex,funObj,varargin)%% Numerically compute gradient of objective function from function valuesp = length(x);mu = 1e-150;if useComplex % Use Complex Differentials    diff = zeros(p,1);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(diff));    g = imag(diff)/mu;else % Use Finite Differencing    f = funObj(x,varargin{:});    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*e_j,varargin{:});    end    g = (diff-f)/mu;endif 0 % DEBUG CODE    [fReal gReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    pause;end


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/autoHess.m:
--------------------------------------------------------------------------------
1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin)% Numerically compute Hessian of objective function from gradient valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(diff),2);    H = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g] = funObj(x,varargin{:});    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f diff(:,j)] = funObj(x + mu*e_j,varargin{:});    end    H = (diff-repmat(g,[1 p]))/mu;end% Make sure H is symmetricH = (H+H')/2;if 0 % DEBUG CODE    [fReal gReal HReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    [HReal H]    pause;end


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/autoHv.m:
--------------------------------------------------------------------------------
 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin)
 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:})
 3 | %  based on gradient values
 4 | 
 5 | if useComplex
 6 |     mu = 1e-150i;
 7 | else
 8 |     mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v);
 9 | end
10 | [f,finDif] = funObj(x + v*mu,varargin{:});
11 | Hv = (finDif-g)/mu;


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/autoTensor.m:
--------------------------------------------------------------------------------
1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% Numerically compute Tensor of 3rd-derivatives of objective function from Hessian valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(g),2);    H = mean(real(diff),3);    T = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g,H] = funObj(x,varargin{:});    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:});    end    T = (diff-repmat(H,[1 1 p]))/mu;end


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/callOutput.m:
--------------------------------------------------------------------------------
 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin)
 2 | 
 3 | optimValues.iteration = i;
 4 | optimValues.funccount = funEvals;
 5 | optimValues.fval = f;
 6 | optimValues.stepsize = t;
 7 | optimValues.directionalderivative = gtd;
 8 | optimValues.gradient = g;
 9 | optimValues.searchdirection = d;
10 | optimValues.firstorderopt = opt;
11 | 
12 | feval(outputFcn, x,optimValues,state,varargin{:});


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/dampedUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | 
 3 | %B0 = eye(length(y))/Hdiag;
 4 | S = old_dirs(:,2:end);
 5 | Y = old_stps(:,2:end);
 6 | k = size(Y,2);
 7 | L = zeros(k);
 8 | for j = 1:k
 9 |     for i = j+1:k
10 |         L(i,j) = S(:,i)'*Y(:,j);
11 |     end
12 | end
13 | D = diag(diag(S'*Y));
14 | N = [S/Hdiag Y];
15 | M = [S'*S/Hdiag L;L' -D];
16 | 
17 | ys = y'*s;
18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s
19 | sBs = s'*Bs;
20 | 
21 | eta = .02;
22 | if ys < eta*sBs
23 |     if debug
24 |         fprintf('Damped Update\n');
25 |     end
26 |     theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1);
27 |     y = theta*y + (1-theta)*Bs;
28 | end
29 | 
30 | 
31 | numCorrections = size(old_dirs,2);
32 | if numCorrections < corrections
33 |     % Full Update
34 |     old_dirs(:,numCorrections+1) = s;
35 |     old_stps(:,numCorrections+1) = y;
36 | else
37 |     % Limited-Memory Update
38 |     old_dirs = [old_dirs(:,2:corrections) s];
39 |     old_stps = [old_stps(:,2:corrections) y];
40 | end
41 | 
42 | % Update scale of initial Hessian approximation
43 | Hdiag = (y'*s)/(y'*y);


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/example_minFunc_LR.m:
--------------------------------------------------------------------------------
 1 | clear all
 2 | 
 3 | nInst = 500;
 4 | nVars = 100;
 5 | X = [ones(nInst,1) randn(nInst,nVars-1)];
 6 | w = randn(nVars,1);
 7 | y = sign(X*w);
 8 | flipInd = rand(nInst,1) > .9;
 9 | y(flipInd) = -y(flipInd);
10 | 
11 | w_init = zeros(nVars,1);
12 | funObj = @(w)LogisticLoss(w,X,y);
13 | 
14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n');
15 | options.Method = 'newton0';
16 | minFunc(@LogisticLoss,w_init,options,X,y);
17 | pause;
18 | 
19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n');
20 | options.Method = 'pnewton0';
21 | options.precFunc = @LogisticDiagPrecond;
22 | minFunc(@LogisticLoss,w_init,options,X,y);
23 | pause;
24 | 
25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n');
26 | options.Method = 'pnewton0';
27 | options.precFunc = [];
28 | minFunc(@LogisticLoss,w_init,options,X,y);
29 | pause;
30 | 
31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n');
32 | options.Method = 'newton0';
33 | options.HvFunc = @LogisticHv;
34 | minFunc(@LogisticLoss,w_init,options,X,y);
35 | pause;
36 | 
37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n');
38 | options.Method = 'pnewton0';
39 | options.HvFunc = @LogisticHv;
40 | options.precFunc = @LogisticDiagPrecond;
41 | minFunc(@LogisticLoss,w_init,options,X,y);
42 | pause;
43 | 
44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n');
45 | options.Method = 'pnewton0';
46 | options.precFunc = [];
47 | options.HvFunc = @LogisticHv;
48 | minFunc(@LogisticLoss,w_init,options,X,y);
49 | pause;


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/isLegal.m:
--------------------------------------------------------------------------------
1 | function [legal] = isLegal(v)
2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0;


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/lbfgs.m:
--------------------------------------------------------------------------------
 1 | function [d] = lbfgs(g,s,y,Hdiag)
 2 | % BFGS Search Direction
 3 | %
 4 | % This function returns the (L-BFGS) approximate inverse Hessian,
 5 | % multiplied by the gradient
 6 | %
 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS
 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS
 9 | %
10 | % s - previous search directions (p by k)
11 | % y - previous step sizes (p by k)
12 | % g - gradient (p by 1)
13 | % Hdiag - value of initial Hessian diagonal elements (scalar)
14 | 
15 | [p,k] = size(s);
16 | 
17 | for i = 1:k
18 |     ro(i,1) = 1/(y(:,i)'*s(:,i));
19 | end
20 | 
21 | q = zeros(p,k+1);
22 | r = zeros(p,k+1);
23 | al =zeros(k,1);
24 | be =zeros(k,1);
25 | 
26 | q(:,k+1) = g;
27 | 
28 | for i = k:-1:1
29 |     al(i) = ro(i)*s(:,i)'*q(:,i+1);
30 |     q(:,i) = q(:,i+1)-al(i)*y(:,i);
31 | end
32 | 
33 | % Multiply by Initial Hessian
34 | r(:,1) = Hdiag*q(:,1);
35 | 
36 | for i = 1:k
37 |     be(i) = ro(i)*y(:,i)'*r(:,i);
38 |     r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i));
39 | end
40 | d=r(:,k+1);


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexa64


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexglx


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexmac


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexmaci:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexmaci


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexmaci64


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexw32


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexw64


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/lbfgsUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | ys = y'*s;
 3 | if ys > 1e-10
 4 |     numCorrections = size(old_dirs,2);
 5 |     if numCorrections < corrections
 6 |         % Full Update
 7 |         old_dirs(:,numCorrections+1) = s;
 8 |         old_stps(:,numCorrections+1) = y;
 9 |     else
10 |         % Limited-Memory Update
11 |         old_dirs = [old_dirs(:,2:corrections) s];
12 |         old_stps = [old_stps(:,2:corrections) y];
13 |     end
14 | 
15 |     % Update scale of initial Hessian approximation
16 |     Hdiag = ys/(y'*y);
17 | else
18 |     if debug
19 |         fprintf('Skipping Update\n');
20 |     end
21 | end


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/logistic/LogisticDiagPrecond.m:
--------------------------------------------------------------------------------
 1 | function [m] = LogisticHv(v,w,X,y)
 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to
 3 | % w(feature,1)
 4 | % X(instance,feature)
 5 | % y(instance,1)
 6 | 
 7 | sig = 1./(1+exp(-y.*(X*w)));
 8 | 
 9 | % Compute diagonals of Hessian
10 | sig = sig.*(1-sig);
11 | for i = 1:length(w)
12 |    h(i,1) = (sig.*X(:,i))'*X(:,i);
13 | end
14 | 
15 | % Apply preconditioner
16 | m = v./h;
17 | 
18 | % Exact preconditioner
19 | %H = X'*diag(sig.*(1-sig))*X;
20 | %m = H\v;
21 | 


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/logistic/LogisticHv.m:
--------------------------------------------------------------------------------
1 | function [Hv] = LogisticHv(v,w,X,y)
2 | % v(feature,1) - vector that we will multiply Hessian by
3 | % w(feature,1)
4 | % X(instance,feature)
5 | % y(instance,1)
6 | 
7 | sig = 1./(1+exp(-y.*(X*w)));
8 | Hv = X.'*(sig.*(1-sig).*(X*v));
9 | 


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/logistic/LogisticLoss.m:
--------------------------------------------------------------------------------
 1 | function [nll,g,H,T] = LogisticLoss(w,X,y)
 2 | % w(feature,1)
 3 | % X(instance,feature)
 4 | % y(instance,1)
 5 | 
 6 | [n,p] = size(X);
 7 | 
 8 | Xw = X*w;
 9 | yXw = y.*Xw;
10 | 
11 | nll = sum(	([zeros(n,1) -yXw]));
12 | 
13 | if nargout > 1
14 |     if nargout > 2
15 |         sig = 1./(1+exp(-yXw));
16 |         g = -X.'*(y.*(1-sig));
17 |     else
18 |         g = -X.'*(y./(1+exp(yXw)));
19 |     end
20 | end
21 | 
22 | if nargout > 2
23 |     H = X.'*diag(sparse(sig.*(1-sig)))*X;
24 | end
25 | 
26 | if nargout > 3
27 |     T = zeros(p,p,p);
28 |     for j1 = 1:p
29 |         for j2 = 1:p
30 |             for j3 = 1:p
31 |                 T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig));
32 |             end
33 |         end
34 |     end
35 | end


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/logistic/mexutil.c:
--------------------------------------------------------------------------------
 1 | #include "mexutil.h"
 2 | 
 3 | /* Functions to create uninitialized arrays. */
 4 | 
 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
 6 |          mxClassID class, mxComplexity ComplexFlag)
 7 | {
 8 |   mxArray *a;
 9 |   int i, *dims1 = mxMalloc(ndim*sizeof(int));
10 |   size_t sz = 1;
11 |   for(i=0;i<ndim;i++) {
12 |     sz *= dims[i];
13 |     dims1[i] = 1;
14 |   }
15 |   a = mxCreateNumericArray(ndim,dims1,class,ComplexFlag);
16 |   sz *= mxGetElementSize(a);
17 |   mxSetDimensions(a, dims, ndim);
18 |   mxFree(dims1);
19 |   mxSetData(a, mxRealloc(mxGetData(a), sz));
20 |   if(ComplexFlag == mxCOMPLEX) {
21 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
22 |   }
23 |   return a;
24 | }
25 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
26 | 				mxComplexity ComplexFlag)
27 | {
28 |   size_t sz = m*n*sizeof(double);
29 |   mxArray *a = mxCreateNumericMatrix(1, 1, class, ComplexFlag);
30 |   mxSetM(a,m);
31 |   mxSetN(a,n);
32 |   mxSetPr(a, mxRealloc(mxGetPr(a),sz));
33 |   if(ComplexFlag == mxCOMPLEX) {
34 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
35 |   }
36 |   return a;
37 | }
38 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
39 | 			       mxComplexity ComplexFlag)
40 | {
41 |   return mxCreateNumericMatrixE(m,n,mxDOUBLE_CLASS,ComplexFlag);
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/logistic/mexutil.h:
--------------------------------------------------------------------------------
1 | #include "mex.h"
2 | 
3 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
4 | 			       mxClassID class, mxComplexity ComplexFlag);
5 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
6 | 				mxComplexity ComplexFlag);
7 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
8 | 			       mxComplexity ComplexFlag);
9 | 


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/logistic/mylogsumexp.m:
--------------------------------------------------------------------------------
1 | function lse = mylogsumexp(b)
2 | % does logsumexp across columns
3 | B = max(b,[],2);
4 | if issparse(b)
5 |     lse = log(sum(exp(b-repmat(B,[1 size(b,2)])),2))+B;
6 | else
7 |     lse = log(sum(exp(b-repmatC(B,[1 size(b,2)])),2))+B;
8 | end
9 | end


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/logistic/repmatC.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/logistic/repmatC.dll


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/logistic/repmatC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/logistic/repmatC.mexglx


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/logistic/repmatC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/logistic/repmatC.mexmac


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/mchol.m:
--------------------------------------------------------------------------------
 1 | function [l,d,perm] = mchol(A,mu)
 2 | % Compute a modified LDL factorization of A
 3 | % (MEX ME!)
 4 | 
 5 | if nargin < 2
 6 |     mu = 1e-12;
 7 | end
 8 | 
 9 | n = size(A,1);
10 | l = eye(n);
11 | d = zeros(n,1);
12 | perm = 1:n;
13 | 
14 | for i = 1:n
15 |     c(i,i) = A(i,i);
16 | end
17 | 
18 | % Compute modification parameters
19 | gamma = max(abs(diag(A)));
20 | xi = max(max(abs(setdiag(A,0))));
21 | delta = mu*max(gamma+xi,1);
22 | if n > 1
23 |     beta = sqrt(max([gamma xi/sqrt(n^2-1) mu]));
24 | else
25 |     beta = sqrt(max([gamma mu]));
26 | end
27 | 
28 | for j = 1:n
29 |     
30 |     % Find q that results in Best Permutation with j
31 |     [maxVal maxPos] = max(abs(diag(c(j:end,j:end))));
32 |     q = maxPos+j-1;
33 |     
34 |     % Permute d,c,l,a
35 |     d([j q]) = d([q j]);
36 |     perm([j q]) = perm([q j]);
37 |     c([j q],:) = c([q j],:);
38 |     c(:,[j q]) = c(:,[q j]);
39 |     l([j q],:) = l([q j],:);
40 |     l(:,[j q]) = l(:,[q j]);
41 |     A([j q],:) = A([q j],:);
42 |     A(:,[j q]) = A(:,[q j]);
43 |     
44 |     for s = 1:j-1
45 |         l(j,s) = c(j,s)/d(s);
46 |     end
47 |     for i = j+1:n
48 |         c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1));
49 |     end
50 |     theta = 0;
51 |     if j < n && j > 1
52 |         theta = max(abs(c(j+1:n,j)));
53 |     end
54 |     d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]);
55 |     if j < n
56 |         for i = j+1:n
57 |             c(i,i) = c(i,i) - (c(i,j)^2)/d(j);
58 |         end
59 |     end
60 | end


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/mcholC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/mcholC.mexmaci64


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/mcholC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/mcholC.mexw32


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/mcholC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/mcholC.mexw64


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/mcholinc.m:
--------------------------------------------------------------------------------
 1 | function [R,tau] = mcholinc(H,verbose)
 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd
 3 | 
 4 | p = size(H,1);
 5 | 
 6 | beta = norm(H,'fro');
 7 | if min(diag(H)) > 1e-12
 8 |     tau = 0;
 9 | else
10 |     if verbose
11 |         fprintf('Small Value on Diagonal, Adjusting Hessian\n');
12 |     end
13 |     tau = max(beta/2,1e-12);
14 | end
15 | while 1
16 |     [R,posDef] = chol(H+tau*eye(p));
17 |     if posDef == 0
18 |         break;
19 |     else
20 |         if verbose
21 |             fprintf('Cholesky Failed, Adjusting Hessian\n');
22 |         end
23 |         tau = max(2*tau,beta/2);
24 |     end
25 | end
26 | 


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/precondDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondDiag(r,D)
2 | y = D.*r;


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/precondTriu.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U)
2 | y = U \ (U' \ r);


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/precondTriuDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U,D)
2 | y = U \ (D .* (U' \ r));


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/rosenbrock.m:
--------------------------------------------------------------------------------
 1 | function [f, df, ddf, dddf] = rosenbrock(x);
 2 | 
 3 | % rosenbrock.m This function returns the function value, partial derivatives
 4 | % and Hessian of the (general dimension) rosenbrock function, given by:
 5 | %
 6 | %       f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 
 7 | %
 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1).
 9 | %
10 | % Carl Edward Rasmussen, 2001-07-21.
11 | 
12 | D = length(x);
13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2);
14 | 
15 | if nargout > 1
16 |   df = zeros(D, 1);
17 |   df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1));
18 |   df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2);
19 | end
20 | 
21 | if nargout > 2
22 |   ddf = zeros(D,D);
23 |   ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2);
24 |   ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1);
25 |   ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1);
26 | end
27 | 
28 | if nargout > 3
29 |     dddf = zeros(D,D,D);
30 |     for d = 1:D
31 |        if d > 1
32 |            dddf(d,d-1,d-1) = -400;
33 |        end
34 |        if d < D
35 |           dddf(d,d+1,d) = -400;
36 |           dddf(d,d,d+1) = -400;
37 |           dddf(d,d,d) = 2400*x(d);
38 |        end
39 |     end
40 | end


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/minFunc/taylorModel.m:
--------------------------------------------------------------------------------
 1 | function [f,g,H] = taylorModel(d,f,g,H,T)
 2 | 
 3 | p = length(d);
 4 | 
 5 | fd3 = 0;
 6 | gd2 = zeros(p,1);
 7 | Hd = zeros(p);
 8 | for t1 = 1:p
 9 |     for t2 = 1:p
10 |         for t3 = 1:p
11 |             fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3);
12 | 
13 |             if nargout > 1
14 |                 gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2);
15 |             end
16 | 
17 |             if nargout > 2
18 |                 Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1);
19 |             end
20 |         end
21 | 
22 |     end
23 | end
24 | 
25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3;
26 | 
27 | if nargout > 1
28 |     g = g + H*d + (1/2)*gd2;
29 | end
30 | 
31 | if nargout > 2
32 |     H = H + Hd;
33 | end
34 | 
35 | if any(abs(d) > 1e5)
36 |     % We want the optimizer to stop if the solution is unbounded
37 |     g = zeros(p,1);
38 | end


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/softmaxCost.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/softmaxCost.m


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/softmaxPredict.m:
--------------------------------------------------------------------------------
 1 | function [pred] = softmaxPredict(softmaxModel, data)
 2 | 
 3 | % softmaxModel - model trained using softmaxTrain
 4 | % data - the N x M input matrix, where each column data(:, i) corresponds to
 5 | %        a single test set
 6 | %
 7 | % Your code should produce the prediction matrix 
 8 | % pred, where pred(i) is argmax_c P(y(c) | x(i)).
 9 |  
10 | % Unroll the parameters from theta
11 | theta = softmaxModel.optTheta;  % this provides a numClasses x inputSize matrix
12 | pred = zeros(1, size(data, 2));
13 | 
14 | %% ---------- YOUR CODE HERE --------------------------------------
15 | %  Instructions: Compute pred using theta assuming that the labels start 
16 | %                from 1.
17 | numClasses = softmaxModel.numClasses;
18 | inputSize = softmaxModel.inputSize;
19 | theta = reshape(theta, numClasses, inputSize);
20 | 
21 | M = exp(theta * data);
22 | M = bsxfun(@rdivide, M, sum(M));
23 | 
24 | [p,pred] = max(M, [], 1);
25 | 
26 | 
27 | 
28 | 
29 | 
30 | % ---------------------------------------------------------------------
31 | 
32 | end
33 | 
34 | 


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/stlExercise.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/stlExercise.m


--------------------------------------------------------------------------------
/Exercise6 Self-Taught Learning/testMemory.m:
--------------------------------------------------------------------------------
1 | function testMemory()
2 | y = zeros(1011,10000);
3 | memory;


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/checkStackedAECost.m:
--------------------------------------------------------------------------------
 1 | function [] = checkStackedAECost()
 2 | 
 3 | % Check the gradients for the stacked autoencoder
 4 | %
 5 | % In general, we recommend that the creation of such files for checking
 6 | % gradients when you write new cost functions.
 7 | %
 8 | 
 9 | %% Setup random data / small model
10 | inputSize = 4;
11 | hiddenSize = 5;
12 | lambda = 0.01;
13 | data   = randn(inputSize, 5);
14 | labels = [ 1 2 1 2 1 ];
15 | numClasses = 2;
16 | 
17 | stack = cell(2,1);
18 | stack{1}.w = 0.1 * randn(3, inputSize);
19 | stack{1}.b = zeros(3, 1);
20 | stack{2}.w = 0.1 * randn(hiddenSize, 3);
21 | stack{2}.b = zeros(hiddenSize, 1);
22 | softmaxTheta = 0.005 * randn(hiddenSize * numClasses, 1);
23 | 
24 | [stackparams, netconfig] = stack2params(stack);
25 | stackedAETheta = [ softmaxTheta ; stackparams ];
26 | 
27 | 
28 | [cost, grad] = stackedAECost(stackedAETheta, inputSize, hiddenSize, ...
29 |                              numClasses, netconfig, ...
30 |                              lambda, data, labels);
31 | 
32 | % Check that the numerical and analytic gradients are the same
33 | numgrad = computeNumericalGradient( @(x) stackedAECost(x, inputSize, ...
34 |                                         hiddenSize, numClasses, netconfig, ...
35 |                                         lambda, data, labels), ...
36 |                                         stackedAETheta);
37 | 
38 | % Use this to visually compare the gradients side by side
39 | disp([numgrad grad]); 
40 | 
41 | % Compare numerically computed gradients with the ones obtained from backpropagation
42 | disp('Norm between numerical and analytical gradient (should be less than 1e-9)');
43 | diff = norm(numgrad-grad)/norm(numgrad+grad);
44 | disp(diff); % Should be small. In our implementation, these values are
45 |             % usually less than 1e-9.
46 | 
47 |             % When you got this working, Congratulations!!! 
48 |             
49 |             
50 | 


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/computeNumericalGradient.m:
--------------------------------------------------------------------------------
 1 | function numgrad = computeNumericalGradient(J, theta)
 2 | % numgrad = computeNumericalGradient(J, theta)
 3 | % theta: a vector of parameters
 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the
 5 | % function value at theta. 
 6 |   
 7 | % Initialize numgrad with zeros
 8 | numgrad = zeros(size(theta));
 9 | 
10 | %% ---------- YOUR CODE HERE --------------------------------------
11 | % Instructions: 
12 | % Implement numerical gradient checking, and return the result in numgrad.  
13 | % (See Section 2.3 of the lecture notes.)
14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 
15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta.  
16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 
17 | % respect to theta(i).
18 | %                
19 | % Hint: You will probably want to compute the elements of numgrad one at a
20 | % time. 
21 | epsilon = 10^(-4);
22 | n = size(theta, 1);
23 | % J1 = zeros(size(numgrad));
24 | % J2 = zeros(size(numgrad));
25 | J1 = zeros(1, 1);
26 | J2 = zeros(1, 1);
27 | grad = zeros(size(numgrad));
28 | temp1 = zeros(size(theta));
29 | temp2 = zeros(size(theta));
30 | 
31 | for i = 1 : n
32 | %     i
33 |     temp1 = theta;
34 |     temp2 = theta;
35 |     temp1(i) = temp1(i) + epsilon;
36 |     temp2(i) = temp2(i) - epsilon;
37 |     [J1, grad] = J(temp1);
38 |     [J2, grad] = J(temp2);
39 |     numgrad(i) = (J1 - J2) / (2*epsilon);
40 | end
41 | 
42 | 
43 | % theta1 = theta + epsilon;
44 | % theta2 = theta - epsilon;
45 | % [J1(i), grad] = J(temp1);
46 | % [J2(i), grad] = J(temp2);
47 | 
48 | 
49 | 
50 | 
51 | %% ---------------------------------------------------------------
52 | end
53 | 


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/feedForwardAutoencoder.m:
--------------------------------------------------------------------------------
 1 | function [activation] = feedForwardAutoencoder(theta, hiddenSize, visibleSize, data)
 2 | 
 3 | % theta: trained weights from the autoencoder
 4 | % visibleSize: the number of input units (probably 64) 
 5 | % hiddenSize: the number of hidden units (probably 25) 
 6 | % data: Our matrix containing the training data as columns.  So, data(:,i) is the i-th training example. 
 7 |   
 8 | % We first convert theta to the (W1, W2, b1, b2) matrix/vector format, so that this 
 9 | % follows the notation convention of the lecture notes. 
10 | 
11 | W1 = reshape(theta(1:hiddenSize*visibleSize), hiddenSize, visibleSize);
12 | b1 = theta(2*hiddenSize*visibleSize+1:2*hiddenSize*visibleSize+hiddenSize);
13 | 
14 | %% ---------- YOUR CODE HERE --------------------------------------
15 | %  Instructions: Compute the activation of the hidden layer for the Sparse Autoencoder.
16 | m = size(data, 2);
17 | z2 = W1 * data + repmat(b1,1,m);
18 | a2 = sigmoid(z2);
19 | activation = a2;
20 | %-------------------------------------------------------------------
21 | 
22 | end
23 | 
24 | %-------------------------------------------------------------------
25 | % Here's an implementation of the sigmoid function, which you may find useful
26 | % in your computation of the costs and the gradients.  This inputs a (row or
27 | % column) vector (say (z1, z2, z3)) and returns (f(z1), f(z2), f(z3)). 
28 | 
29 | function sigm = sigmoid(x)
30 |     sigm = 1 ./ (1 + exp(-x));
31 | end
32 | 


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/initializeParameters.m:
--------------------------------------------------------------------------------
 1 | function theta = initializeParameters(hiddenSize, visibleSize)
 2 | 
 3 | %% Initialize parameters randomly based on layer sizes.
 4 | r  = sqrt(6) / sqrt(hiddenSize+visibleSize+1);   % we'll choose weights uniformly from the interval [-r, r]
 5 | W1 = rand(hiddenSize, visibleSize) * 2 * r - r;
 6 | W2 = rand(visibleSize, hiddenSize) * 2 * r - r;
 7 | 
 8 | b1 = zeros(hiddenSize, 1);
 9 | b2 = zeros(visibleSize, 1);
10 | 
11 | % Convert weights and bias gradients to the vector form.
12 | % This step will "unroll" (flatten and concatenate together) all 
13 | % your parameters into a vector, which can then be used with minFunc. 
14 | theta = [W1(:) ; W2(:) ; b1(:) ; b2(:)];
15 | 
16 | end
17 | 
18 | 


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/loadMNISTImages.m:
--------------------------------------------------------------------------------
 1 | function images = loadMNISTImages(filename)
 2 | %loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing
 3 | %the raw MNIST images
 4 | 
 5 | fp = fopen(filename, 'rb');
 6 | assert(fp ~= -1, ['Could not open ', filename, '']);
 7 | 
 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be');
 9 | assert(magic == 2051, ['Bad magic number in ', filename, '']);
10 | 
11 | numImages = fread(fp, 1, 'int32', 0, 'ieee-be');
12 | numRows = fread(fp, 1, 'int32', 0, 'ieee-be');
13 | numCols = fread(fp, 1, 'int32', 0, 'ieee-be');
14 | 
15 | images = fread(fp, inf, 'unsigned char');
16 | images = reshape(images, numCols, numRows, numImages);
17 | images = permute(images,[2 1 3]);
18 | 
19 | fclose(fp);
20 | 
21 | % Reshape to #pixels x #examples
22 | images = reshape(images, size(images, 1) * size(images, 2), size(images, 3));
23 | % Convert to double and rescale to [0,1]
24 | images = double(images) / 255;
25 | 
26 | end
27 | 


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/loadMNISTLabels.m:
--------------------------------------------------------------------------------
 1 | function labels = loadMNISTLabels(filename)
 2 | %loadMNISTLabels returns a [number of MNIST images]x1 matrix containing
 3 | %the labels for the MNIST images
 4 | 
 5 | fp = fopen(filename, 'rb');
 6 | assert(fp ~= -1, ['Could not open ', filename, '']);
 7 | 
 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be');
 9 | assert(magic == 2049, ['Bad magic number in ', filename, '']);
10 | 
11 | numLabels = fread(fp, 1, 'int32', 0, 'ieee-be');
12 | 
13 | labels = fread(fp, inf, 'unsigned char');
14 | 
15 | assert(size(labels,1) == numLabels, 'Mismatch in label count');
16 | 
17 | fclose(fp);
18 | 
19 | end
20 | 


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/autoGrad.m:
--------------------------------------------------------------------------------
1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin)% [f,g] = autoGrad(x,useComplex,funObj,varargin)%% Numerically compute gradient of objective function from function valuesp = length(x);mu = 1e-150;if useComplex % Use Complex Differentials    diff = zeros(p,1);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(diff));    g = imag(diff)/mu;else % Use Finite Differencing    f = funObj(x,varargin{:});    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*e_j,varargin{:});    end    g = (diff-f)/mu;endif 0 % DEBUG CODE    [fReal gReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    pause;end


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/autoHess.m:
--------------------------------------------------------------------------------
1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin)% Numerically compute Hessian of objective function from gradient valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(diff),2);    H = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g] = funObj(x,varargin{:});    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f diff(:,j)] = funObj(x + mu*e_j,varargin{:});    end    H = (diff-repmat(g,[1 p]))/mu;end% Make sure H is symmetricH = (H+H')/2;if 0 % DEBUG CODE    [fReal gReal HReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    [HReal H]    pause;end


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/autoHv.m:
--------------------------------------------------------------------------------
 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin)
 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:})
 3 | %  based on gradient values
 4 | 
 5 | if useComplex
 6 |     mu = 1e-150i;
 7 | else
 8 |     mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v);
 9 | end
10 | [f,finDif] = funObj(x + v*mu,varargin{:});
11 | Hv = (finDif-g)/mu;


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/autoTensor.m:
--------------------------------------------------------------------------------
1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% Numerically compute Tensor of 3rd-derivatives of objective function from Hessian valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(g),2);    H = mean(real(diff),3);    T = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g,H] = funObj(x,varargin{:});    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:});    end    T = (diff-repmat(H,[1 1 p]))/mu;end


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/callOutput.m:
--------------------------------------------------------------------------------
 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin)
 2 | 
 3 | optimValues.iteration = i;
 4 | optimValues.funccount = funEvals;
 5 | optimValues.fval = f;
 6 | optimValues.stepsize = t;
 7 | optimValues.directionalderivative = gtd;
 8 | optimValues.gradient = g;
 9 | optimValues.searchdirection = d;
10 | optimValues.firstorderopt = opt;
11 | 
12 | feval(outputFcn, x,optimValues,state,varargin{:});


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/dampedUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | 
 3 | %B0 = eye(length(y))/Hdiag;
 4 | S = old_dirs(:,2:end);
 5 | Y = old_stps(:,2:end);
 6 | k = size(Y,2);
 7 | L = zeros(k);
 8 | for j = 1:k
 9 |     for i = j+1:k
10 |         L(i,j) = S(:,i)'*Y(:,j);
11 |     end
12 | end
13 | D = diag(diag(S'*Y));
14 | N = [S/Hdiag Y];
15 | M = [S'*S/Hdiag L;L' -D];
16 | 
17 | ys = y'*s;
18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s
19 | sBs = s'*Bs;
20 | 
21 | eta = .02;
22 | if ys < eta*sBs
23 |     if debug
24 |         fprintf('Damped Update\n');
25 |     end
26 |     theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1);
27 |     y = theta*y + (1-theta)*Bs;
28 | end
29 | 
30 | 
31 | numCorrections = size(old_dirs,2);
32 | if numCorrections < corrections
33 |     % Full Update
34 |     old_dirs(:,numCorrections+1) = s;
35 |     old_stps(:,numCorrections+1) = y;
36 | else
37 |     % Limited-Memory Update
38 |     old_dirs = [old_dirs(:,2:corrections) s];
39 |     old_stps = [old_stps(:,2:corrections) y];
40 | end
41 | 
42 | % Update scale of initial Hessian approximation
43 | Hdiag = (y'*s)/(y'*y);


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/example_minFunc_LR.m:
--------------------------------------------------------------------------------
 1 | clear all
 2 | 
 3 | nInst = 500;
 4 | nVars = 100;
 5 | X = [ones(nInst,1) randn(nInst,nVars-1)];
 6 | w = randn(nVars,1);
 7 | y = sign(X*w);
 8 | flipInd = rand(nInst,1) > .9;
 9 | y(flipInd) = -y(flipInd);
10 | 
11 | w_init = zeros(nVars,1);
12 | funObj = @(w)LogisticLoss(w,X,y);
13 | 
14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n');
15 | options.Method = 'newton0';
16 | minFunc(@LogisticLoss,w_init,options,X,y);
17 | pause;
18 | 
19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n');
20 | options.Method = 'pnewton0';
21 | options.precFunc = @LogisticDiagPrecond;
22 | minFunc(@LogisticLoss,w_init,options,X,y);
23 | pause;
24 | 
25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n');
26 | options.Method = 'pnewton0';
27 | options.precFunc = [];
28 | minFunc(@LogisticLoss,w_init,options,X,y);
29 | pause;
30 | 
31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n');
32 | options.Method = 'newton0';
33 | options.HvFunc = @LogisticHv;
34 | minFunc(@LogisticLoss,w_init,options,X,y);
35 | pause;
36 | 
37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n');
38 | options.Method = 'pnewton0';
39 | options.HvFunc = @LogisticHv;
40 | options.precFunc = @LogisticDiagPrecond;
41 | minFunc(@LogisticLoss,w_init,options,X,y);
42 | pause;
43 | 
44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n');
45 | options.Method = 'pnewton0';
46 | options.precFunc = [];
47 | options.HvFunc = @LogisticHv;
48 | minFunc(@LogisticLoss,w_init,options,X,y);
49 | pause;


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/isLegal.m:
--------------------------------------------------------------------------------
1 | function [legal] = isLegal(v)
2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0;


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/lbfgs.m:
--------------------------------------------------------------------------------
 1 | function [d] = lbfgs(g,s,y,Hdiag)
 2 | % BFGS Search Direction
 3 | %
 4 | % This function returns the (L-BFGS) approximate inverse Hessian,
 5 | % multiplied by the gradient
 6 | %
 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS
 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS
 9 | %
10 | % s - previous search directions (p by k)
11 | % y - previous step sizes (p by k)
12 | % g - gradient (p by 1)
13 | % Hdiag - value of initial Hessian diagonal elements (scalar)
14 | 
15 | [p,k] = size(s);
16 | 
17 | for i = 1:k
18 |     ro(i,1) = 1/(y(:,i)'*s(:,i));
19 | end
20 | 
21 | q = zeros(p,k+1);
22 | r = zeros(p,k+1);
23 | al =zeros(k,1);
24 | be =zeros(k,1);
25 | 
26 | q(:,k+1) = g;
27 | 
28 | for i = k:-1:1
29 |     al(i) = ro(i)*s(:,i)'*q(:,i+1);
30 |     q(:,i) = q(:,i+1)-al(i)*y(:,i);
31 | end
32 | 
33 | % Multiply by Initial Hessian
34 | r(:,1) = Hdiag*q(:,1);
35 | 
36 | for i = 1:k
37 |     be(i) = ro(i)*y(:,i)'*r(:,i);
38 |     r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i));
39 | end
40 | d=r(:,k+1);


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexa64


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexglx


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexmac


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexmaci:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexmaci


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexmaci64


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexw32


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexw64


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | ys = y'*s;
 3 | if ys > 1e-10
 4 |     numCorrections = size(old_dirs,2);
 5 |     if numCorrections < corrections
 6 |         % Full Update
 7 |         old_dirs(:,numCorrections+1) = s;
 8 |         old_stps(:,numCorrections+1) = y;
 9 |     else
10 |         % Limited-Memory Update
11 |         old_dirs = [old_dirs(:,2:corrections) s];
12 |         old_stps = [old_stps(:,2:corrections) y];
13 |     end
14 | 
15 |     % Update scale of initial Hessian approximation
16 |     Hdiag = ys/(y'*y);
17 | else
18 |     if debug
19 |         fprintf('Skipping Update\n');
20 |     end
21 | end


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/logistic/LogisticDiagPrecond.m:
--------------------------------------------------------------------------------
 1 | function [m] = LogisticHv(v,w,X,y)
 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to
 3 | % w(feature,1)
 4 | % X(instance,feature)
 5 | % y(instance,1)
 6 | 
 7 | sig = 1./(1+exp(-y.*(X*w)));
 8 | 
 9 | % Compute diagonals of Hessian
10 | sig = sig.*(1-sig);
11 | for i = 1:length(w)
12 |    h(i,1) = (sig.*X(:,i))'*X(:,i);
13 | end
14 | 
15 | % Apply preconditioner
16 | m = v./h;
17 | 
18 | % Exact preconditioner
19 | %H = X'*diag(sig.*(1-sig))*X;
20 | %m = H\v;
21 | 


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/logistic/LogisticHv.m:
--------------------------------------------------------------------------------
1 | function [Hv] = LogisticHv(v,w,X,y)
2 | % v(feature,1) - vector that we will multiply Hessian by
3 | % w(feature,1)
4 | % X(instance,feature)
5 | % y(instance,1)
6 | 
7 | sig = 1./(1+exp(-y.*(X*w)));
8 | Hv = X.'*(sig.*(1-sig).*(X*v));
9 | 


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/logistic/LogisticLoss.m:
--------------------------------------------------------------------------------
 1 | function [nll,g,H,T] = LogisticLoss(w,X,y)
 2 | % w(feature,1)
 3 | % X(instance,feature)
 4 | % y(instance,1)
 5 | 
 6 | [n,p] = size(X);
 7 | 
 8 | Xw = X*w;
 9 | yXw = y.*Xw;
10 | 
11 | nll = sum(	([zeros(n,1) -yXw]));
12 | 
13 | if nargout > 1
14 |     if nargout > 2
15 |         sig = 1./(1+exp(-yXw));
16 |         g = -X.'*(y.*(1-sig));
17 |     else
18 |         g = -X.'*(y./(1+exp(yXw)));
19 |     end
20 | end
21 | 
22 | if nargout > 2
23 |     H = X.'*diag(sparse(sig.*(1-sig)))*X;
24 | end
25 | 
26 | if nargout > 3
27 |     T = zeros(p,p,p);
28 |     for j1 = 1:p
29 |         for j2 = 1:p
30 |             for j3 = 1:p
31 |                 T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig));
32 |             end
33 |         end
34 |     end
35 | end


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/logistic/mexutil.c:
--------------------------------------------------------------------------------
 1 | #include "mexutil.h"
 2 | 
 3 | /* Functions to create uninitialized arrays. */
 4 | 
 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
 6 |          mxClassID class, mxComplexity ComplexFlag)
 7 | {
 8 |   mxArray *a;
 9 |   int i, *dims1 = mxMalloc(ndim*sizeof(int));
10 |   size_t sz = 1;
11 |   for(i=0;i<ndim;i++) {
12 |     sz *= dims[i];
13 |     dims1[i] = 1;
14 |   }
15 |   a = mxCreateNumericArray(ndim,dims1,class,ComplexFlag);
16 |   sz *= mxGetElementSize(a);
17 |   mxSetDimensions(a, dims, ndim);
18 |   mxFree(dims1);
19 |   mxSetData(a, mxRealloc(mxGetData(a), sz));
20 |   if(ComplexFlag == mxCOMPLEX) {
21 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
22 |   }
23 |   return a;
24 | }
25 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
26 | 				mxComplexity ComplexFlag)
27 | {
28 |   size_t sz = m*n*sizeof(double);
29 |   mxArray *a = mxCreateNumericMatrix(1, 1, class, ComplexFlag);
30 |   mxSetM(a,m);
31 |   mxSetN(a,n);
32 |   mxSetPr(a, mxRealloc(mxGetPr(a),sz));
33 |   if(ComplexFlag == mxCOMPLEX) {
34 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
35 |   }
36 |   return a;
37 | }
38 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
39 | 			       mxComplexity ComplexFlag)
40 | {
41 |   return mxCreateNumericMatrixE(m,n,mxDOUBLE_CLASS,ComplexFlag);
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/logistic/mexutil.h:
--------------------------------------------------------------------------------
1 | #include "mex.h"
2 | 
3 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
4 | 			       mxClassID class, mxComplexity ComplexFlag);
5 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
6 | 				mxComplexity ComplexFlag);
7 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
8 | 			       mxComplexity ComplexFlag);
9 | 


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/logistic/mylogsumexp.m:
--------------------------------------------------------------------------------
1 | function lse = mylogsumexp(b)
2 | % does logsumexp across columns
3 | B = max(b,[],2);
4 | if issparse(b)
5 |     lse = log(sum(exp(b-repmat(B,[1 size(b,2)])),2))+B;
6 | else
7 |     lse = log(sum(exp(b-repmatC(B,[1 size(b,2)])),2))+B;
8 | end
9 | end


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/logistic/repmatC.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/logistic/repmatC.dll


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/logistic/repmatC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/logistic/repmatC.mexglx


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/logistic/repmatC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/logistic/repmatC.mexmac


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/mchol.m:
--------------------------------------------------------------------------------
 1 | function [l,d,perm] = mchol(A,mu)
 2 | % Compute a modified LDL factorization of A
 3 | % (MEX ME!)
 4 | 
 5 | if nargin < 2
 6 |     mu = 1e-12;
 7 | end
 8 | 
 9 | n = size(A,1);
10 | l = eye(n);
11 | d = zeros(n,1);
12 | perm = 1:n;
13 | 
14 | for i = 1:n
15 |     c(i,i) = A(i,i);
16 | end
17 | 
18 | % Compute modification parameters
19 | gamma = max(abs(diag(A)));
20 | xi = max(max(abs(setdiag(A,0))));
21 | delta = mu*max(gamma+xi,1);
22 | if n > 1
23 |     beta = sqrt(max([gamma xi/sqrt(n^2-1) mu]));
24 | else
25 |     beta = sqrt(max([gamma mu]));
26 | end
27 | 
28 | for j = 1:n
29 |     
30 |     % Find q that results in Best Permutation with j
31 |     [maxVal maxPos] = max(abs(diag(c(j:end,j:end))));
32 |     q = maxPos+j-1;
33 |     
34 |     % Permute d,c,l,a
35 |     d([j q]) = d([q j]);
36 |     perm([j q]) = perm([q j]);
37 |     c([j q],:) = c([q j],:);
38 |     c(:,[j q]) = c(:,[q j]);
39 |     l([j q],:) = l([q j],:);
40 |     l(:,[j q]) = l(:,[q j]);
41 |     A([j q],:) = A([q j],:);
42 |     A(:,[j q]) = A(:,[q j]);
43 |     
44 |     for s = 1:j-1
45 |         l(j,s) = c(j,s)/d(s);
46 |     end
47 |     for i = j+1:n
48 |         c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1));
49 |     end
50 |     theta = 0;
51 |     if j < n && j > 1
52 |         theta = max(abs(c(j+1:n,j)));
53 |     end
54 |     d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]);
55 |     if j < n
56 |         for i = j+1:n
57 |             c(i,i) = c(i,i) - (c(i,j)^2)/d(j);
58 |         end
59 |     end
60 | end


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/mcholC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/mcholC.mexmaci64


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/mcholC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/mcholC.mexw32


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/mcholC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/mcholC.mexw64


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/mcholinc.m:
--------------------------------------------------------------------------------
 1 | function [R,tau] = mcholinc(H,verbose)
 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd
 3 | 
 4 | p = size(H,1);
 5 | 
 6 | beta = norm(H,'fro');
 7 | if min(diag(H)) > 1e-12
 8 |     tau = 0;
 9 | else
10 |     if verbose
11 |         fprintf('Small Value on Diagonal, Adjusting Hessian\n');
12 |     end
13 |     tau = max(beta/2,1e-12);
14 | end
15 | while 1
16 |     [R,posDef] = chol(H+tau*eye(p));
17 |     if posDef == 0
18 |         break;
19 |     else
20 |         if verbose
21 |             fprintf('Cholesky Failed, Adjusting Hessian\n');
22 |         end
23 |         tau = max(2*tau,beta/2);
24 |     end
25 | end
26 | 


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/precondDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondDiag(r,D)
2 | y = D.*r;


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/precondTriu.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U)
2 | y = U \ (U' \ r);


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/precondTriuDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U,D)
2 | y = U \ (D .* (U' \ r));


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/rosenbrock.m:
--------------------------------------------------------------------------------
 1 | function [f, df, ddf, dddf] = rosenbrock(x);
 2 | 
 3 | % rosenbrock.m This function returns the function value, partial derivatives
 4 | % and Hessian of the (general dimension) rosenbrock function, given by:
 5 | %
 6 | %       f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 
 7 | %
 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1).
 9 | %
10 | % Carl Edward Rasmussen, 2001-07-21.
11 | 
12 | D = length(x);
13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2);
14 | 
15 | if nargout > 1
16 |   df = zeros(D, 1);
17 |   df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1));
18 |   df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2);
19 | end
20 | 
21 | if nargout > 2
22 |   ddf = zeros(D,D);
23 |   ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2);
24 |   ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1);
25 |   ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1);
26 | end
27 | 
28 | if nargout > 3
29 |     dddf = zeros(D,D,D);
30 |     for d = 1:D
31 |        if d > 1
32 |            dddf(d,d-1,d-1) = -400;
33 |        end
34 |        if d < D
35 |           dddf(d,d+1,d) = -400;
36 |           dddf(d,d,d+1) = -400;
37 |           dddf(d,d,d) = 2400*x(d);
38 |        end
39 |     end
40 | end


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/minFunc/taylorModel.m:
--------------------------------------------------------------------------------
 1 | function [f,g,H] = taylorModel(d,f,g,H,T)
 2 | 
 3 | p = length(d);
 4 | 
 5 | fd3 = 0;
 6 | gd2 = zeros(p,1);
 7 | Hd = zeros(p);
 8 | for t1 = 1:p
 9 |     for t2 = 1:p
10 |         for t3 = 1:p
11 |             fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3);
12 | 
13 |             if nargout > 1
14 |                 gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2);
15 |             end
16 | 
17 |             if nargout > 2
18 |                 Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1);
19 |             end
20 |         end
21 | 
22 |     end
23 | end
24 | 
25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3;
26 | 
27 | if nargout > 1
28 |     g = g + H*d + (1/2)*gd2;
29 | end
30 | 
31 | if nargout > 2
32 |     H = H + Hd;
33 | end
34 | 
35 | if any(abs(d) > 1e5)
36 |     % We want the optimizer to stop if the solution is unbounded
37 |     g = zeros(p,1);
38 | end


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/params2stack.m:
--------------------------------------------------------------------------------
 1 | function stack = params2stack(params, netconfig)
 2 | 
 3 | % Converts a flattened parameter vector into a nice "stack" structure 
 4 | % for us to work with. This is useful when you're building multilayer
 5 | % networks.
 6 | %
 7 | % stack = params2stack(params, netconfig)
 8 | %
 9 | % params - flattened parameter vector
10 | % netconfig - auxiliary variable containing 
11 | %             the configuration of the network
12 | %
13 | 
14 | 
15 | % Map the params (a vector into a stack of weights)
16 | depth = numel(netconfig.layersizes);
17 | stack = cell(depth,1);
18 | prevLayerSize = netconfig.inputsize; % the size of the previous layer
19 | curPos = double(1);                  % mark current position in parameter vector
20 | 
21 | for d = 1:depth
22 |     % Create layer d
23 |     stack{d} = struct;
24 | 
25 |     % Extract weights
26 |     wlen = double(netconfig.layersizes{d} * prevLayerSize);
27 |     stack{d}.w = reshape(params(curPos:curPos+wlen-1), netconfig.layersizes{d}, prevLayerSize);
28 |     curPos = curPos+wlen;
29 | 
30 |     % Extract bias
31 |     blen = double(netconfig.layersizes{d});
32 |     stack{d}.b = reshape(params(curPos:curPos+blen-1), netconfig.layersizes{d}, 1);
33 |     curPos = curPos+blen;
34 |     
35 |     % Set previous layer size
36 |     prevLayerSize = netconfig.layersizes{d};
37 | end
38 | 
39 | end


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/softmaxCost.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/softmaxCost.m


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/softmaxPredict.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/softmaxPredict.m


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/sparseAutoencoderCost_modify.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/sparseAutoencoderCost_modify.m


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/stack2params.m:
--------------------------------------------------------------------------------
 1 | function [params, netconfig] = stack2params(stack)
 2 | 
 3 | % Converts a "stack" structure into a flattened parameter vector and also
 4 | % stores the network configuration. This is useful when working with
 5 | % optimization toolboxes such as minFunc.
 6 | %
 7 | % [params, netconfig] = stack2params(stack)
 8 | %
 9 | % stack - the stack structure, where stack{1}.w = weights of first layer
10 | %                                    stack{1}.b = weights of first layer
11 | %                                    stack{2}.w = weights of second layer
12 | %                                    stack{2}.b = weights of second layer
13 | %                                    ... etc.
14 | 
15 | 
16 | % Setup the compressed param vector
17 | params = [];
18 | for d = 1:numel(stack)
19 |     
20 |     % This can be optimized. But since our stacks are relatively short, it
21 |     % is okay
22 |     params = [params ; stack{d}.w(:) ; stack{d}.b(:) ];
23 |     
24 |     % Check that stack is of the correct form
25 |     assert(size(stack{d}.w, 1) == size(stack{d}.b, 1), ...
26 |         ['The bias should be a *column* vector of ' ...
27 |          int2str(size(stack{d}.w, 1)) 'x1']);
28 |     if d < numel(stack)
29 |         assert(size(stack{d}.w, 1) == size(stack{d+1}.w, 2), ...
30 |             ['The adjacent layers L' int2str(d) ' and L' int2str(d+1) ...
31 |              ' should have matching sizes.']);
32 |     end
33 |     
34 | end
35 | 
36 | if nargout > 1
37 |     % Setup netconfig
38 |     if numel(stack) == 0
39 |         netconfig.inputsize = 0;
40 |         netconfig.layersizes = {};
41 |     else
42 |         netconfig.inputsize = size(stack{1}.w, 2);
43 |         netconfig.layersizes = {};
44 |         for d = 1:numel(stack)
45 |             netconfig.layersizes = [netconfig.layersizes ; size(stack{d}.w,1)];
46 |         end
47 |     end
48 | end
49 | 
50 | end


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/stackedAECost.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/stackedAECost.m


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/stackedAEExercise.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/stackedAEExercise.m


--------------------------------------------------------------------------------
/Exercise7 Implement deep networks for digit classification/stackedAEPredict.m:
--------------------------------------------------------------------------------
 1 | function [pred] = stackedAEPredict(theta, inputSize, hiddenSize, numClasses, netconfig, data)
 2 |                                          
 3 | % stackedAEPredict: Takes a trained theta and a test data set,
 4 | % and returns the predicted labels for each example.
 5 |                                          
 6 | % theta: trained weights from the autoencoder
 7 | % visibleSize: the number of input units
 8 | % hiddenSize:  the number of hidden units *at the 2nd layer*
 9 | % numClasses:  the number of categories
10 | % data: Our matrix containing the training data as columns.  So, data(:,i) is the i-th training example. 
11 | 
12 | % Your code should produce the prediction matrix 
13 | % pred, where pred(i) is argmax_c P(y(c) | x(i)).
14 |  
15 | %% Unroll theta parameter
16 | 
17 | % We first extract the part which compute the softmax gradient
18 | softmaxTheta = reshape(theta(1:hiddenSize*numClasses), numClasses, hiddenSize);
19 | 
20 | % Extract out the "stack"
21 | stack = params2stack(theta(hiddenSize*numClasses+1:end), netconfig);
22 | 
23 | %% ---------- YOUR CODE HERE --------------------------------------
24 | %  Instructions: Compute pred using theta assuming that the labels start 
25 | %                from 1.
26 | 
27 | z2 = bsxfun(@plus, stack{1}.w*data, stack{1}.b);
28 | a2 = sigmoid(z2);
29 | z3 = bsxfun(@plus, stack{2}.w*a2, stack{2}.b);
30 | a3 = sigmoid(z3);
31 | z4 = softmaxTheta * a3;
32 | a4 = exp(z4);
33 | a4 = bsxfun(@rdivide, a4, sum(a4));
34 | 
35 | 
36 | [p,pred] = max(a4, [], 1);
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | % -----------------------------------------------------------
47 | 
48 | end
49 | 
50 | 
51 | % You might find this useful
52 | function sigm = sigmoid(x)
53 |     sigm = 1 ./ (1 + exp(-x));
54 | end
55 | 


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/computeNumericalGradient.m:
--------------------------------------------------------------------------------
 1 | function numgrad = computeNumericalGradient(J, theta)
 2 | % numgrad = computeNumericalGradient(J, theta)
 3 | % theta: a vector of parameters
 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the
 5 | % function value at theta. 
 6 |   
 7 | % Initialize numgrad with zeros
 8 | numgrad = zeros(size(theta));
 9 | 
10 | %% ---------- YOUR CODE HERE --------------------------------------
11 | % Instructions: 
12 | % Implement numerical gradient checking, and return the result in numgrad.  
13 | % (See Section 2.3 of the lecture notes.)
14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 
15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta.  
16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 
17 | % respect to theta(i).
18 | %                
19 | % Hint: You will probably want to compute the elements of numgrad one at a
20 | % time. 
21 | epsilon = 10^(-4);
22 | n = size(theta, 1);
23 | % J1 = zeros(size(numgrad));
24 | % J2 = zeros(size(numgrad));
25 | J1 = zeros(1, 1);
26 | J2 = zeros(1, 1);
27 | grad = zeros(size(numgrad));
28 | temp1 = zeros(size(theta));
29 | temp2 = zeros(size(theta));
30 | 
31 | for i = 1 : n
32 | %     i
33 |     temp1 = theta;
34 |     temp2 = theta;
35 |     temp1(i) = temp1(i) + epsilon;
36 |     temp2(i) = temp2(i) - epsilon;
37 |     [J1, grad] = J(temp1);
38 |     [J2, grad] = J(temp2);
39 |     numgrad(i) = (J1 - J2) / (2*epsilon);
40 | end
41 | 
42 | 
43 | % theta1 = theta + epsilon;
44 | % theta2 = theta - epsilon;
45 | % [J1(i), grad] = J(temp1);
46 | % [J2(i), grad] = J(temp2);
47 | 
48 | 
49 | 
50 | 
51 | %% ---------------------------------------------------------------
52 | end
53 | 


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/displayColorNetwork.m:
--------------------------------------------------------------------------------
 1 | function displayColorNetwork(A)
 2 | 
 3 | % display receptive field(s) or basis vector(s) for image patches
 4 | %
 5 | % A         the basis, with patches as column vectors
 6 | 
 7 | % In case the midpoint is not set at 0, we shift it dynamically
 8 | if min(A(:)) >= 0
 9 |     A = A - mean(A(:));
10 | end
11 | 
12 | cols = round(sqrt(size(A, 2)));
13 | 
14 | channel_size = size(A,1) / 3;
15 | dim = sqrt(channel_size);
16 | dimp = dim+1;
17 | rows = ceil(size(A,2)/cols);
18 | B = A(1:channel_size,:);
19 | C = A(channel_size+1:channel_size*2,:);
20 | D = A(2*channel_size+1:channel_size*3,:);
21 | B=B./(ones(size(B,1),1)*max(abs(B)));
22 | C=C./(ones(size(C,1),1)*max(abs(C)));
23 | D=D./(ones(size(D,1),1)*max(abs(D)));
24 | % Initialization of the image
25 | I = ones(dim*rows+rows-1,dim*cols+cols-1,3);
26 | 
27 | %Transfer features to this image matrix
28 | for i=0:rows-1
29 |   for j=0:cols-1
30 |       
31 |     if i*cols+j+1 > size(B, 2)
32 |         break
33 |     end
34 |     
35 |     % This sets the patch
36 |     I(i*dimp+1:i*dimp+dim,j*dimp+1:j*dimp+dim,1) = ...
37 |          reshape(B(:,i*cols+j+1),[dim dim]);
38 |     I(i*dimp+1:i*dimp+dim,j*dimp+1:j*dimp+dim,2) = ...
39 |          reshape(C(:,i*cols+j+1),[dim dim]);
40 |     I(i*dimp+1:i*dimp+dim,j*dimp+1:j*dimp+dim,3) = ...
41 |          reshape(D(:,i*cols+j+1),[dim dim]);
42 | 
43 |   end
44 | end
45 | 
46 | I = I + 1;
47 | I = I / 2;
48 | imagesc(I); 
49 | axis equal
50 | axis off
51 | 
52 | end
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/initializeParameters.m:
--------------------------------------------------------------------------------
 1 | function theta = initializeParameters(hiddenSize, visibleSize)
 2 | 
 3 | %% Initialize parameters randomly based on layer sizes.
 4 | r  = sqrt(6) / sqrt(hiddenSize+visibleSize+1);   % we'll choose weights uniformly from the interval [-r, r]
 5 | W1 = rand(hiddenSize, visibleSize) * 2 * r - r;
 6 | W2 = rand(visibleSize, hiddenSize) * 2 * r - r;
 7 | 
 8 | b1 = zeros(hiddenSize, 1);
 9 | b2 = zeros(visibleSize, 1);
10 | 
11 | % Convert weights and bias gradients to the vector form.
12 | % This step will "unroll" (flatten and concatenate together) all 
13 | % your parameters into a vector, which can then be used with minFunc. 
14 | theta = [W1(:) ; W2(:) ; b1(:) ; b2(:)];
15 | 
16 | end
17 | 
18 | 


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/autoGrad.m:
--------------------------------------------------------------------------------
1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin)% [f,g] = autoGrad(x,useComplex,funObj,varargin)%% Numerically compute gradient of objective function from function valuesp = length(x);mu = 1e-150;if useComplex % Use Complex Differentials    diff = zeros(p,1);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(diff));    g = imag(diff)/mu;else % Use Finite Differencing    f = funObj(x,varargin{:});    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*e_j,varargin{:});    end    g = (diff-f)/mu;endif 0 % DEBUG CODE    [fReal gReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    pause;end


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/autoHess.m:
--------------------------------------------------------------------------------
1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin)% Numerically compute Hessian of objective function from gradient valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(diff),2);    H = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g] = funObj(x,varargin{:});    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f diff(:,j)] = funObj(x + mu*e_j,varargin{:});    end    H = (diff-repmat(g,[1 p]))/mu;end% Make sure H is symmetricH = (H+H')/2;if 0 % DEBUG CODE    [fReal gReal HReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    [HReal H]    pause;end


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/autoHv.m:
--------------------------------------------------------------------------------
 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin)
 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:})
 3 | %  based on gradient values
 4 | 
 5 | if useComplex
 6 |     mu = 1e-150i;
 7 | else
 8 |     mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v);
 9 | end
10 | [f,finDif] = funObj(x + v*mu,varargin{:});
11 | Hv = (finDif-g)/mu;


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/autoTensor.m:
--------------------------------------------------------------------------------
1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% Numerically compute Tensor of 3rd-derivatives of objective function from Hessian valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(g),2);    H = mean(real(diff),3);    T = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g,H] = funObj(x,varargin{:});    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:});    end    T = (diff-repmat(H,[1 1 p]))/mu;end


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/callOutput.m:
--------------------------------------------------------------------------------
 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin)
 2 | 
 3 | optimValues.iteration = i;
 4 | optimValues.funccount = funEvals;
 5 | optimValues.fval = f;
 6 | optimValues.stepsize = t;
 7 | optimValues.directionalderivative = gtd;
 8 | optimValues.gradient = g;
 9 | optimValues.searchdirection = d;
10 | optimValues.firstorderopt = opt;
11 | 
12 | feval(outputFcn, x,optimValues,state,varargin{:});


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/dampedUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | 
 3 | %B0 = eye(length(y))/Hdiag;
 4 | S = old_dirs(:,2:end);
 5 | Y = old_stps(:,2:end);
 6 | k = size(Y,2);
 7 | L = zeros(k);
 8 | for j = 1:k
 9 |     for i = j+1:k
10 |         L(i,j) = S(:,i)'*Y(:,j);
11 |     end
12 | end
13 | D = diag(diag(S'*Y));
14 | N = [S/Hdiag Y];
15 | M = [S'*S/Hdiag L;L' -D];
16 | 
17 | ys = y'*s;
18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s
19 | sBs = s'*Bs;
20 | 
21 | eta = .02;
22 | if ys < eta*sBs
23 |     if debug
24 |         fprintf('Damped Update\n');
25 |     end
26 |     theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1);
27 |     y = theta*y + (1-theta)*Bs;
28 | end
29 | 
30 | 
31 | numCorrections = size(old_dirs,2);
32 | if numCorrections < corrections
33 |     % Full Update
34 |     old_dirs(:,numCorrections+1) = s;
35 |     old_stps(:,numCorrections+1) = y;
36 | else
37 |     % Limited-Memory Update
38 |     old_dirs = [old_dirs(:,2:corrections) s];
39 |     old_stps = [old_stps(:,2:corrections) y];
40 | end
41 | 
42 | % Update scale of initial Hessian approximation
43 | Hdiag = (y'*s)/(y'*y);


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/example_minFunc_LR.m:
--------------------------------------------------------------------------------
 1 | clear all
 2 | 
 3 | nInst = 500;
 4 | nVars = 100;
 5 | X = [ones(nInst,1) randn(nInst,nVars-1)];
 6 | w = randn(nVars,1);
 7 | y = sign(X*w);
 8 | flipInd = rand(nInst,1) > .9;
 9 | y(flipInd) = -y(flipInd);
10 | 
11 | w_init = zeros(nVars,1);
12 | funObj = @(w)LogisticLoss(w,X,y);
13 | 
14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n');
15 | options.Method = 'newton0';
16 | minFunc(@LogisticLoss,w_init,options,X,y);
17 | pause;
18 | 
19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n');
20 | options.Method = 'pnewton0';
21 | options.precFunc = @LogisticDiagPrecond;
22 | minFunc(@LogisticLoss,w_init,options,X,y);
23 | pause;
24 | 
25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n');
26 | options.Method = 'pnewton0';
27 | options.precFunc = [];
28 | minFunc(@LogisticLoss,w_init,options,X,y);
29 | pause;
30 | 
31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n');
32 | options.Method = 'newton0';
33 | options.HvFunc = @LogisticHv;
34 | minFunc(@LogisticLoss,w_init,options,X,y);
35 | pause;
36 | 
37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n');
38 | options.Method = 'pnewton0';
39 | options.HvFunc = @LogisticHv;
40 | options.precFunc = @LogisticDiagPrecond;
41 | minFunc(@LogisticLoss,w_init,options,X,y);
42 | pause;
43 | 
44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n');
45 | options.Method = 'pnewton0';
46 | options.precFunc = [];
47 | options.HvFunc = @LogisticHv;
48 | minFunc(@LogisticLoss,w_init,options,X,y);
49 | pause;


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/isLegal.m:
--------------------------------------------------------------------------------
1 | function [legal] = isLegal(v)
2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0;


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgs.m:
--------------------------------------------------------------------------------
 1 | function [d] = lbfgs(g,s,y,Hdiag)
 2 | % BFGS Search Direction
 3 | %
 4 | % This function returns the (L-BFGS) approximate inverse Hessian,
 5 | % multiplied by the gradient
 6 | %
 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS
 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS
 9 | %
10 | % s - previous search directions (p by k)
11 | % y - previous step sizes (p by k)
12 | % g - gradient (p by 1)
13 | % Hdiag - value of initial Hessian diagonal elements (scalar)
14 | 
15 | [p,k] = size(s);
16 | 
17 | for i = 1:k
18 |     ro(i,1) = 1/(y(:,i)'*s(:,i));
19 | end
20 | 
21 | q = zeros(p,k+1);
22 | r = zeros(p,k+1);
23 | al =zeros(k,1);
24 | be =zeros(k,1);
25 | 
26 | q(:,k+1) = g;
27 | 
28 | for i = k:-1:1
29 |     al(i) = ro(i)*s(:,i)'*q(:,i+1);
30 |     q(:,i) = q(:,i+1)-al(i)*y(:,i);
31 | end
32 | 
33 | % Multiply by Initial Hessian
34 | r(:,1) = Hdiag*q(:,1);
35 | 
36 | for i = 1:k
37 |     be(i) = ro(i)*y(:,i)'*r(:,i);
38 |     r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i));
39 | end
40 | d=r(:,k+1);


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexa64


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexglx


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexmac


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexmaci:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexmaci


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexmaci64


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexw32


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexw64


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | ys = y'*s;
 3 | if ys > 1e-10
 4 |     numCorrections = size(old_dirs,2);
 5 |     if numCorrections < corrections
 6 |         % Full Update
 7 |         old_dirs(:,numCorrections+1) = s;
 8 |         old_stps(:,numCorrections+1) = y;
 9 |     else
10 |         % Limited-Memory Update
11 |         old_dirs = [old_dirs(:,2:corrections) s];
12 |         old_stps = [old_stps(:,2:corrections) y];
13 |     end
14 | 
15 |     % Update scale of initial Hessian approximation
16 |     Hdiag = ys/(y'*y);
17 | else
18 |     if debug
19 |         fprintf('Skipping Update\n');
20 |     end
21 | end


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/LogisticDiagPrecond.m:
--------------------------------------------------------------------------------
 1 | function [m] = LogisticHv(v,w,X,y)
 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to
 3 | % w(feature,1)
 4 | % X(instance,feature)
 5 | % y(instance,1)
 6 | 
 7 | sig = 1./(1+exp(-y.*(X*w)));
 8 | 
 9 | % Compute diagonals of Hessian
10 | sig = sig.*(1-sig);
11 | for i = 1:length(w)
12 |    h(i,1) = (sig.*X(:,i))'*X(:,i);
13 | end
14 | 
15 | % Apply preconditioner
16 | m = v./h;
17 | 
18 | % Exact preconditioner
19 | %H = X'*diag(sig.*(1-sig))*X;
20 | %m = H\v;
21 | 


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/LogisticHv.m:
--------------------------------------------------------------------------------
1 | function [Hv] = LogisticHv(v,w,X,y)
2 | % v(feature,1) - vector that we will multiply Hessian by
3 | % w(feature,1)
4 | % X(instance,feature)
5 | % y(instance,1)
6 | 
7 | sig = 1./(1+exp(-y.*(X*w)));
8 | Hv = X.'*(sig.*(1-sig).*(X*v));
9 | 


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/LogisticLoss.m:
--------------------------------------------------------------------------------
 1 | function [nll,g,H,T] = LogisticLoss(w,X,y)
 2 | % w(feature,1)
 3 | % X(instance,feature)
 4 | % y(instance,1)
 5 | 
 6 | [n,p] = size(X);
 7 | 
 8 | Xw = X*w;
 9 | yXw = y.*Xw;
10 | 
11 | nll = sum(	([zeros(n,1) -yXw]));
12 | 
13 | if nargout > 1
14 |     if nargout > 2
15 |         sig = 1./(1+exp(-yXw));
16 |         g = -X.'*(y.*(1-sig));
17 |     else
18 |         g = -X.'*(y./(1+exp(yXw)));
19 |     end
20 | end
21 | 
22 | if nargout > 2
23 |     H = X.'*diag(sparse(sig.*(1-sig)))*X;
24 | end
25 | 
26 | if nargout > 3
27 |     T = zeros(p,p,p);
28 |     for j1 = 1:p
29 |         for j2 = 1:p
30 |             for j3 = 1:p
31 |                 T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig));
32 |             end
33 |         end
34 |     end
35 | end


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/mexutil.c:
--------------------------------------------------------------------------------
 1 | #include "mexutil.h"
 2 | 
 3 | /* Functions to create uninitialized arrays. */
 4 | 
 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
 6 |          mxClassID class, mxComplexity ComplexFlag)
 7 | {
 8 |   mxArray *a;
 9 |   int i, *dims1 = mxMalloc(ndim*sizeof(int));
10 |   size_t sz = 1;
11 |   for(i=0;i<ndim;i++) {
12 |     sz *= dims[i];
13 |     dims1[i] = 1;
14 |   }
15 |   a = mxCreateNumericArray(ndim,dims1,class,ComplexFlag);
16 |   sz *= mxGetElementSize(a);
17 |   mxSetDimensions(a, dims, ndim);
18 |   mxFree(dims1);
19 |   mxSetData(a, mxRealloc(mxGetData(a), sz));
20 |   if(ComplexFlag == mxCOMPLEX) {
21 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
22 |   }
23 |   return a;
24 | }
25 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
26 | 				mxComplexity ComplexFlag)
27 | {
28 |   size_t sz = m*n*sizeof(double);
29 |   mxArray *a = mxCreateNumericMatrix(1, 1, class, ComplexFlag);
30 |   mxSetM(a,m);
31 |   mxSetN(a,n);
32 |   mxSetPr(a, mxRealloc(mxGetPr(a),sz));
33 |   if(ComplexFlag == mxCOMPLEX) {
34 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
35 |   }
36 |   return a;
37 | }
38 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
39 | 			       mxComplexity ComplexFlag)
40 | {
41 |   return mxCreateNumericMatrixE(m,n,mxDOUBLE_CLASS,ComplexFlag);
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/mexutil.h:
--------------------------------------------------------------------------------
1 | #include "mex.h"
2 | 
3 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
4 | 			       mxClassID class, mxComplexity ComplexFlag);
5 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
6 | 				mxComplexity ComplexFlag);
7 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
8 | 			       mxComplexity ComplexFlag);
9 | 


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/mylogsumexp.m:
--------------------------------------------------------------------------------
1 | function lse = mylogsumexp(b)
2 | % does logsumexp across columns
3 | B = max(b,[],2);
4 | if issparse(b)
5 |     lse = log(sum(exp(b-repmat(B,[1 size(b,2)])),2))+B;
6 | else
7 |     lse = log(sum(exp(b-repmatC(B,[1 size(b,2)])),2))+B;
8 | end
9 | end


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/repmatC.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/repmatC.dll


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/repmatC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/repmatC.mexglx


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/repmatC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/repmatC.mexmac


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/mchol.m:
--------------------------------------------------------------------------------
 1 | function [l,d,perm] = mchol(A,mu)
 2 | % Compute a modified LDL factorization of A
 3 | % (MEX ME!)
 4 | 
 5 | if nargin < 2
 6 |     mu = 1e-12;
 7 | end
 8 | 
 9 | n = size(A,1);
10 | l = eye(n);
11 | d = zeros(n,1);
12 | perm = 1:n;
13 | 
14 | for i = 1:n
15 |     c(i,i) = A(i,i);
16 | end
17 | 
18 | % Compute modification parameters
19 | gamma = max(abs(diag(A)));
20 | xi = max(max(abs(setdiag(A,0))));
21 | delta = mu*max(gamma+xi,1);
22 | if n > 1
23 |     beta = sqrt(max([gamma xi/sqrt(n^2-1) mu]));
24 | else
25 |     beta = sqrt(max([gamma mu]));
26 | end
27 | 
28 | for j = 1:n
29 |     
30 |     % Find q that results in Best Permutation with j
31 |     [maxVal maxPos] = max(abs(diag(c(j:end,j:end))));
32 |     q = maxPos+j-1;
33 |     
34 |     % Permute d,c,l,a
35 |     d([j q]) = d([q j]);
36 |     perm([j q]) = perm([q j]);
37 |     c([j q],:) = c([q j],:);
38 |     c(:,[j q]) = c(:,[q j]);
39 |     l([j q],:) = l([q j],:);
40 |     l(:,[j q]) = l(:,[q j]);
41 |     A([j q],:) = A([q j],:);
42 |     A(:,[j q]) = A(:,[q j]);
43 |     
44 |     for s = 1:j-1
45 |         l(j,s) = c(j,s)/d(s);
46 |     end
47 |     for i = j+1:n
48 |         c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1));
49 |     end
50 |     theta = 0;
51 |     if j < n && j > 1
52 |         theta = max(abs(c(j+1:n,j)));
53 |     end
54 |     d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]);
55 |     if j < n
56 |         for i = j+1:n
57 |             c(i,i) = c(i,i) - (c(i,j)^2)/d(j);
58 |         end
59 |     end
60 | end


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/mcholC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/mcholC.mexmaci64


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/mcholC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/mcholC.mexw32


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/mcholC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/mcholC.mexw64


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/mcholinc.m:
--------------------------------------------------------------------------------
 1 | function [R,tau] = mcholinc(H,verbose)
 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd
 3 | 
 4 | p = size(H,1);
 5 | 
 6 | beta = norm(H,'fro');
 7 | if min(diag(H)) > 1e-12
 8 |     tau = 0;
 9 | else
10 |     if verbose
11 |         fprintf('Small Value on Diagonal, Adjusting Hessian\n');
12 |     end
13 |     tau = max(beta/2,1e-12);
14 | end
15 | while 1
16 |     [R,posDef] = chol(H+tau*eye(p));
17 |     if posDef == 0
18 |         break;
19 |     else
20 |         if verbose
21 |             fprintf('Cholesky Failed, Adjusting Hessian\n');
22 |         end
23 |         tau = max(2*tau,beta/2);
24 |     end
25 | end
26 | 


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/precondDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondDiag(r,D)
2 | y = D.*r;


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/precondTriu.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U)
2 | y = U \ (U' \ r);


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/precondTriuDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U,D)
2 | y = U \ (D .* (U' \ r));


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/rosenbrock.m:
--------------------------------------------------------------------------------
 1 | function [f, df, ddf, dddf] = rosenbrock(x);
 2 | 
 3 | % rosenbrock.m This function returns the function value, partial derivatives
 4 | % and Hessian of the (general dimension) rosenbrock function, given by:
 5 | %
 6 | %       f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 
 7 | %
 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1).
 9 | %
10 | % Carl Edward Rasmussen, 2001-07-21.
11 | 
12 | D = length(x);
13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2);
14 | 
15 | if nargout > 1
16 |   df = zeros(D, 1);
17 |   df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1));
18 |   df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2);
19 | end
20 | 
21 | if nargout > 2
22 |   ddf = zeros(D,D);
23 |   ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2);
24 |   ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1);
25 |   ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1);
26 | end
27 | 
28 | if nargout > 3
29 |     dddf = zeros(D,D,D);
30 |     for d = 1:D
31 |        if d > 1
32 |            dddf(d,d-1,d-1) = -400;
33 |        end
34 |        if d < D
35 |           dddf(d,d+1,d) = -400;
36 |           dddf(d,d,d+1) = -400;
37 |           dddf(d,d,d) = 2400*x(d);
38 |        end
39 |     end
40 | end


--------------------------------------------------------------------------------
/Exercise8 Learning color features with Sparse Autoencoders/minFunc/taylorModel.m:
--------------------------------------------------------------------------------
 1 | function [f,g,H] = taylorModel(d,f,g,H,T)
 2 | 
 3 | p = length(d);
 4 | 
 5 | fd3 = 0;
 6 | gd2 = zeros(p,1);
 7 | Hd = zeros(p);
 8 | for t1 = 1:p
 9 |     for t2 = 1:p
10 |         for t3 = 1:p
11 |             fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3);
12 | 
13 |             if nargout > 1
14 |                 gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2);
15 |             end
16 | 
17 |             if nargout > 2
18 |                 Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1);
19 |             end
20 |         end
21 | 
22 |     end
23 | end
24 | 
25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3;
26 | 
27 | if nargout > 1
28 |     g = g + H*d + (1/2)*gd2;
29 | end
30 | 
31 | if nargout > 2
32 |     H = H + Hd;
33 | end
34 | 
35 | if any(abs(d) > 1e5)
36 |     % We want the optimizer to stop if the solution is unbounded
37 |     g = zeros(p,1);
38 | end


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/cnnConvolve.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/cnnConvolve.m


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/displayColorNetwork.m:
--------------------------------------------------------------------------------
 1 | function displayColorNetwork(A)
 2 | 
 3 | % display receptive field(s) or basis vector(s) for image patches
 4 | %
 5 | % A         the basis, with patches as column vectors
 6 | 
 7 | % In case the midpoint is not set at 0, we shift it dynamically
 8 | if min(A(:)) >= 0
 9 |     A = A - mean(A(:));
10 | end
11 | 
12 | cols = round(sqrt(size(A, 2)));
13 | 
14 | channel_size = size(A,1) / 3;
15 | dim = sqrt(channel_size);
16 | dimp = dim+1;
17 | rows = ceil(size(A,2)/cols);
18 | B = A(1:channel_size,:);
19 | C = A(channel_size+1:channel_size*2,:);
20 | D = A(2*channel_size+1:channel_size*3,:);
21 | B=B./(ones(size(B,1),1)*max(abs(B)));
22 | C=C./(ones(size(C,1),1)*max(abs(C)));
23 | D=D./(ones(size(D,1),1)*max(abs(D)));
24 | % Initialization of the image
25 | I = ones(dim*rows+rows-1,dim*cols+cols-1,3);
26 | 
27 | %Transfer features to this image matrix
28 | for i=0:rows-1
29 |   for j=0:cols-1
30 |       
31 |     if i*cols+j+1 > size(B, 2)
32 |         break
33 |     end
34 |     
35 |     % This sets the patch
36 |     I(i*dimp+1:i*dimp+dim,j*dimp+1:j*dimp+dim,1) = ...
37 |          reshape(B(:,i*cols+j+1),[dim dim]);
38 |     I(i*dimp+1:i*dimp+dim,j*dimp+1:j*dimp+dim,2) = ...
39 |          reshape(C(:,i*cols+j+1),[dim dim]);
40 |     I(i*dimp+1:i*dimp+dim,j*dimp+1:j*dimp+dim,3) = ...
41 |          reshape(D(:,i*cols+j+1),[dim dim]);
42 | 
43 |   end
44 | end
45 | 
46 | I = I + 1;
47 | I = I / 2;
48 | imagesc(I); 
49 | axis equal
50 | axis off
51 | 
52 | end
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/feedForwardAutoencoder.m:
--------------------------------------------------------------------------------
 1 | function [activation] = feedForwardAutoencoder(theta, hiddenSize, visibleSize, data)
 2 | 
 3 | % theta: trained weights from the autoencoder
 4 | % visibleSize: the number of input units (probably 64) 
 5 | % hiddenSize: the number of hidden units (probably 25) 
 6 | % data: Our matrix containing the training data as columns.  So, data(:,i) is the i-th training example. 
 7 |   
 8 | % We first convert theta to the (W1, W2, b1, b2) matrix/vector format, so that this 
 9 | % follows the notation convention of the lecture notes. 
10 | 
11 | W1 = reshape(theta(1:hiddenSize*visibleSize), hiddenSize, visibleSize);
12 | b1 = theta(2*hiddenSize*visibleSize+1:2*hiddenSize*visibleSize+hiddenSize);
13 | 
14 | %% ---------- YOUR CODE HERE --------------------------------------
15 | %  Instructions: Compute the activation of the hidden layer for the Sparse Autoencoder.
16 | m = size(data, 2);
17 | z2 = W1 * data + repmat(b1,1,m);
18 | a2 = sigmoid(z2);
19 | activation = a2;
20 | %-------------------------------------------------------------------
21 | 
22 | end
23 | 
24 | %-------------------------------------------------------------------
25 | % Here's an implementation of the sigmoid function, which you may find useful
26 | % in your computation of the costs and the gradients.  This inputs a (row or
27 | % column) vector (say (z1, z2, z3)) and returns (f(z1), f(z2), f(z3)). 
28 | 
29 | function sigm = sigmoid(x)
30 |     sigm = 1 ./ (1 + exp(-x));
31 | end
32 | 


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/autoGrad.m:
--------------------------------------------------------------------------------
1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin)% [f,g] = autoGrad(x,useComplex,funObj,varargin)%% Numerically compute gradient of objective function from function valuesp = length(x);mu = 1e-150;if useComplex % Use Complex Differentials    diff = zeros(p,1);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(diff));    g = imag(diff)/mu;else % Use Finite Differencing    f = funObj(x,varargin{:});    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        diff(j,1) = funObj(x + mu*e_j,varargin{:});    end    g = (diff-f)/mu;endif 0 % DEBUG CODE    [fReal gReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    pause;end


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/autoHess.m:
--------------------------------------------------------------------------------
1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin)% Numerically compute Hessian of objective function from gradient valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(diff),2);    H = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g] = funObj(x,varargin{:});    diff = zeros(p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f diff(:,j)] = funObj(x + mu*e_j,varargin{:});    end    H = (diff-repmat(g,[1 p]))/mu;end% Make sure H is symmetricH = (H+H')/2;if 0 % DEBUG CODE    [fReal gReal HReal] = funObj(x,varargin{:});    [fReal f]    [gReal g]    [HReal H]    pause;end


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/autoHv.m:
--------------------------------------------------------------------------------
 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin)
 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:})
 3 | %  based on gradient values
 4 | 
 5 | if useComplex
 6 |     mu = 1e-150i;
 7 | else
 8 |     mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v);
 9 | end
10 | [f,finDif] = funObj(x + v*mu,varargin{:});
11 | Hv = (finDif-g)/mu;


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/autoTensor.m:
--------------------------------------------------------------------------------
1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin)% Numerically compute Tensor of 3rd-derivatives of objective function from Hessian valuesp = length(x);if useComplex % Use Complex Differentials    mu = 1e-150;    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:});    end    f = mean(real(f));    g = mean(real(g),2);    H = mean(real(diff),3);    T = imag(diff)/mu;else % Use finite differencing    mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p);        [f,g,H] = funObj(x,varargin{:});    diff = zeros(p,p,p);    for j = 1:p        e_j = zeros(p,1);        e_j(j) = 1;        [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:});    end    T = (diff-repmat(H,[1 1 p]))/mu;end


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/callOutput.m:
--------------------------------------------------------------------------------
 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin)
 2 | 
 3 | optimValues.iteration = i;
 4 | optimValues.funccount = funEvals;
 5 | optimValues.fval = f;
 6 | optimValues.stepsize = t;
 7 | optimValues.directionalderivative = gtd;
 8 | optimValues.gradient = g;
 9 | optimValues.searchdirection = d;
10 | optimValues.firstorderopt = opt;
11 | 
12 | feval(outputFcn, x,optimValues,state,varargin{:});


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/dampedUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | 
 3 | %B0 = eye(length(y))/Hdiag;
 4 | S = old_dirs(:,2:end);
 5 | Y = old_stps(:,2:end);
 6 | k = size(Y,2);
 7 | L = zeros(k);
 8 | for j = 1:k
 9 |     for i = j+1:k
10 |         L(i,j) = S(:,i)'*Y(:,j);
11 |     end
12 | end
13 | D = diag(diag(S'*Y));
14 | N = [S/Hdiag Y];
15 | M = [S'*S/Hdiag L;L' -D];
16 | 
17 | ys = y'*s;
18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s
19 | sBs = s'*Bs;
20 | 
21 | eta = .02;
22 | if ys < eta*sBs
23 |     if debug
24 |         fprintf('Damped Update\n');
25 |     end
26 |     theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1);
27 |     y = theta*y + (1-theta)*Bs;
28 | end
29 | 
30 | 
31 | numCorrections = size(old_dirs,2);
32 | if numCorrections < corrections
33 |     % Full Update
34 |     old_dirs(:,numCorrections+1) = s;
35 |     old_stps(:,numCorrections+1) = y;
36 | else
37 |     % Limited-Memory Update
38 |     old_dirs = [old_dirs(:,2:corrections) s];
39 |     old_stps = [old_stps(:,2:corrections) y];
40 | end
41 | 
42 | % Update scale of initial Hessian approximation
43 | Hdiag = (y'*s)/(y'*y);


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/example_minFunc_LR.m:
--------------------------------------------------------------------------------
 1 | clear all
 2 | 
 3 | nInst = 500;
 4 | nVars = 100;
 5 | X = [ones(nInst,1) randn(nInst,nVars-1)];
 6 | w = randn(nVars,1);
 7 | y = sign(X*w);
 8 | flipInd = rand(nInst,1) > .9;
 9 | y(flipInd) = -y(flipInd);
10 | 
11 | w_init = zeros(nVars,1);
12 | funObj = @(w)LogisticLoss(w,X,y);
13 | 
14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n');
15 | options.Method = 'newton0';
16 | minFunc(@LogisticLoss,w_init,options,X,y);
17 | pause;
18 | 
19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n');
20 | options.Method = 'pnewton0';
21 | options.precFunc = @LogisticDiagPrecond;
22 | minFunc(@LogisticLoss,w_init,options,X,y);
23 | pause;
24 | 
25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n');
26 | options.Method = 'pnewton0';
27 | options.precFunc = [];
28 | minFunc(@LogisticLoss,w_init,options,X,y);
29 | pause;
30 | 
31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n');
32 | options.Method = 'newton0';
33 | options.HvFunc = @LogisticHv;
34 | minFunc(@LogisticLoss,w_init,options,X,y);
35 | pause;
36 | 
37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n');
38 | options.Method = 'pnewton0';
39 | options.HvFunc = @LogisticHv;
40 | options.precFunc = @LogisticDiagPrecond;
41 | minFunc(@LogisticLoss,w_init,options,X,y);
42 | pause;
43 | 
44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n');
45 | options.Method = 'pnewton0';
46 | options.precFunc = [];
47 | options.HvFunc = @LogisticHv;
48 | minFunc(@LogisticLoss,w_init,options,X,y);
49 | pause;


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/isLegal.m:
--------------------------------------------------------------------------------
1 | function [legal] = isLegal(v)
2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0;


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/lbfgs.m:
--------------------------------------------------------------------------------
 1 | function [d] = lbfgs(g,s,y,Hdiag)
 2 | % BFGS Search Direction
 3 | %
 4 | % This function returns the (L-BFGS) approximate inverse Hessian,
 5 | % multiplied by the gradient
 6 | %
 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS
 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS
 9 | %
10 | % s - previous search directions (p by k)
11 | % y - previous step sizes (p by k)
12 | % g - gradient (p by 1)
13 | % Hdiag - value of initial Hessian diagonal elements (scalar)
14 | 
15 | [p,k] = size(s);
16 | 
17 | for i = 1:k
18 |     ro(i,1) = 1/(y(:,i)'*s(:,i));
19 | end
20 | 
21 | q = zeros(p,k+1);
22 | r = zeros(p,k+1);
23 | al =zeros(k,1);
24 | be =zeros(k,1);
25 | 
26 | q(:,k+1) = g;
27 | 
28 | for i = k:-1:1
29 |     al(i) = ro(i)*s(:,i)'*q(:,i+1);
30 |     q(:,i) = q(:,i+1)-al(i)*y(:,i);
31 | end
32 | 
33 | % Multiply by Initial Hessian
34 | r(:,1) = Hdiag*q(:,1);
35 | 
36 | for i = 1:k
37 |     be(i) = ro(i)*y(:,i)'*r(:,i);
38 |     r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i));
39 | end
40 | d=r(:,k+1);


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexa64


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexglx


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexmac


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexmaci:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexmaci


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexmaci64


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexw32


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexw64


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/lbfgsUpdate.m:
--------------------------------------------------------------------------------
 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag)
 2 | ys = y'*s;
 3 | if ys > 1e-10
 4 |     numCorrections = size(old_dirs,2);
 5 |     if numCorrections < corrections
 6 |         % Full Update
 7 |         old_dirs(:,numCorrections+1) = s;
 8 |         old_stps(:,numCorrections+1) = y;
 9 |     else
10 |         % Limited-Memory Update
11 |         old_dirs = [old_dirs(:,2:corrections) s];
12 |         old_stps = [old_stps(:,2:corrections) y];
13 |     end
14 | 
15 |     % Update scale of initial Hessian approximation
16 |     Hdiag = ys/(y'*y);
17 | else
18 |     if debug
19 |         fprintf('Skipping Update\n');
20 |     end
21 | end


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/logistic/LogisticDiagPrecond.m:
--------------------------------------------------------------------------------
 1 | function [m] = LogisticHv(v,w,X,y)
 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to
 3 | % w(feature,1)
 4 | % X(instance,feature)
 5 | % y(instance,1)
 6 | 
 7 | sig = 1./(1+exp(-y.*(X*w)));
 8 | 
 9 | % Compute diagonals of Hessian
10 | sig = sig.*(1-sig);
11 | for i = 1:length(w)
12 |    h(i,1) = (sig.*X(:,i))'*X(:,i);
13 | end
14 | 
15 | % Apply preconditioner
16 | m = v./h;
17 | 
18 | % Exact preconditioner
19 | %H = X'*diag(sig.*(1-sig))*X;
20 | %m = H\v;
21 | 


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/logistic/LogisticHv.m:
--------------------------------------------------------------------------------
1 | function [Hv] = LogisticHv(v,w,X,y)
2 | % v(feature,1) - vector that we will multiply Hessian by
3 | % w(feature,1)
4 | % X(instance,feature)
5 | % y(instance,1)
6 | 
7 | sig = 1./(1+exp(-y.*(X*w)));
8 | Hv = X.'*(sig.*(1-sig).*(X*v));
9 | 


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/logistic/LogisticLoss.m:
--------------------------------------------------------------------------------
 1 | function [nll,g,H,T] = LogisticLoss(w,X,y)
 2 | % w(feature,1)
 3 | % X(instance,feature)
 4 | % y(instance,1)
 5 | 
 6 | [n,p] = size(X);
 7 | 
 8 | Xw = X*w;
 9 | yXw = y.*Xw;
10 | 
11 | nll = sum(	([zeros(n,1) -yXw]));
12 | 
13 | if nargout > 1
14 |     if nargout > 2
15 |         sig = 1./(1+exp(-yXw));
16 |         g = -X.'*(y.*(1-sig));
17 |     else
18 |         g = -X.'*(y./(1+exp(yXw)));
19 |     end
20 | end
21 | 
22 | if nargout > 2
23 |     H = X.'*diag(sparse(sig.*(1-sig)))*X;
24 | end
25 | 
26 | if nargout > 3
27 |     T = zeros(p,p,p);
28 |     for j1 = 1:p
29 |         for j2 = 1:p
30 |             for j3 = 1:p
31 |                 T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig));
32 |             end
33 |         end
34 |     end
35 | end


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/logistic/mexutil.c:
--------------------------------------------------------------------------------
 1 | #include "mexutil.h"
 2 | 
 3 | /* Functions to create uninitialized arrays. */
 4 | 
 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
 6 |          mxClassID class, mxComplexity ComplexFlag)
 7 | {
 8 |   mxArray *a;
 9 |   int i, *dims1 = mxMalloc(ndim*sizeof(int));
10 |   size_t sz = 1;
11 |   for(i=0;i<ndim;i++) {
12 |     sz *= dims[i];
13 |     dims1[i] = 1;
14 |   }
15 |   a = mxCreateNumericArray(ndim,dims1,class,ComplexFlag);
16 |   sz *= mxGetElementSize(a);
17 |   mxSetDimensions(a, dims, ndim);
18 |   mxFree(dims1);
19 |   mxSetData(a, mxRealloc(mxGetData(a), sz));
20 |   if(ComplexFlag == mxCOMPLEX) {
21 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
22 |   }
23 |   return a;
24 | }
25 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
26 | 				mxComplexity ComplexFlag)
27 | {
28 |   size_t sz = m*n*sizeof(double);
29 |   mxArray *a = mxCreateNumericMatrix(1, 1, class, ComplexFlag);
30 |   mxSetM(a,m);
31 |   mxSetN(a,n);
32 |   mxSetPr(a, mxRealloc(mxGetPr(a),sz));
33 |   if(ComplexFlag == mxCOMPLEX) {
34 |     mxSetPi(a, mxRealloc(mxGetPi(a),sz));
35 |   }
36 |   return a;
37 | }
38 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
39 | 			       mxComplexity ComplexFlag)
40 | {
41 |   return mxCreateNumericMatrixE(m,n,mxDOUBLE_CLASS,ComplexFlag);
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/logistic/mexutil.h:
--------------------------------------------------------------------------------
1 | #include "mex.h"
2 | 
3 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 
4 | 			       mxClassID class, mxComplexity ComplexFlag);
5 | mxArray *mxCreateNumericMatrixE(int m, int n, mxClassID class, 
6 | 				mxComplexity ComplexFlag);
7 | mxArray *mxCreateDoubleMatrixE(int m, int n, 
8 | 			       mxComplexity ComplexFlag);
9 | 


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/logistic/mylogsumexp.m:
--------------------------------------------------------------------------------
1 | function lse = mylogsumexp(b)
2 | % does logsumexp across columns
3 | B = max(b,[],2);
4 | if issparse(b)
5 |     lse = log(sum(exp(b-repmat(B,[1 size(b,2)])),2))+B;
6 | else
7 |     lse = log(sum(exp(b-repmatC(B,[1 size(b,2)])),2))+B;
8 | end
9 | end


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/logistic/repmatC.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/logistic/repmatC.dll


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/logistic/repmatC.mexglx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/logistic/repmatC.mexglx


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/logistic/repmatC.mexmac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/logistic/repmatC.mexmac


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/mchol.m:
--------------------------------------------------------------------------------
 1 | function [l,d,perm] = mchol(A,mu)
 2 | % Compute a modified LDL factorization of A
 3 | % (MEX ME!)
 4 | 
 5 | if nargin < 2
 6 |     mu = 1e-12;
 7 | end
 8 | 
 9 | n = size(A,1);
10 | l = eye(n);
11 | d = zeros(n,1);
12 | perm = 1:n;
13 | 
14 | for i = 1:n
15 |     c(i,i) = A(i,i);
16 | end
17 | 
18 | % Compute modification parameters
19 | gamma = max(abs(diag(A)));
20 | xi = max(max(abs(setdiag(A,0))));
21 | delta = mu*max(gamma+xi,1);
22 | if n > 1
23 |     beta = sqrt(max([gamma xi/sqrt(n^2-1) mu]));
24 | else
25 |     beta = sqrt(max([gamma mu]));
26 | end
27 | 
28 | for j = 1:n
29 |     
30 |     % Find q that results in Best Permutation with j
31 |     [maxVal maxPos] = max(abs(diag(c(j:end,j:end))));
32 |     q = maxPos+j-1;
33 |     
34 |     % Permute d,c,l,a
35 |     d([j q]) = d([q j]);
36 |     perm([j q]) = perm([q j]);
37 |     c([j q],:) = c([q j],:);
38 |     c(:,[j q]) = c(:,[q j]);
39 |     l([j q],:) = l([q j],:);
40 |     l(:,[j q]) = l(:,[q j]);
41 |     A([j q],:) = A([q j],:);
42 |     A(:,[j q]) = A(:,[q j]);
43 |     
44 |     for s = 1:j-1
45 |         l(j,s) = c(j,s)/d(s);
46 |     end
47 |     for i = j+1:n
48 |         c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1));
49 |     end
50 |     theta = 0;
51 |     if j < n && j > 1
52 |         theta = max(abs(c(j+1:n,j)));
53 |     end
54 |     d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]);
55 |     if j < n
56 |         for i = j+1:n
57 |             c(i,i) = c(i,i) - (c(i,j)^2)/d(j);
58 |         end
59 |     end
60 | end


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/mcholC.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/mcholC.mexmaci64


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/mcholC.mexw32:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/mcholC.mexw32


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/mcholC.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/mcholC.mexw64


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/mcholinc.m:
--------------------------------------------------------------------------------
 1 | function [R,tau] = mcholinc(H,verbose)
 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd
 3 | 
 4 | p = size(H,1);
 5 | 
 6 | beta = norm(H,'fro');
 7 | if min(diag(H)) > 1e-12
 8 |     tau = 0;
 9 | else
10 |     if verbose
11 |         fprintf('Small Value on Diagonal, Adjusting Hessian\n');
12 |     end
13 |     tau = max(beta/2,1e-12);
14 | end
15 | while 1
16 |     [R,posDef] = chol(H+tau*eye(p));
17 |     if posDef == 0
18 |         break;
19 |     else
20 |         if verbose
21 |             fprintf('Cholesky Failed, Adjusting Hessian\n');
22 |         end
23 |         tau = max(2*tau,beta/2);
24 |     end
25 | end
26 | 


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/precondDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondDiag(r,D)
2 | y = D.*r;


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/precondTriu.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U)
2 | y = U \ (U' \ r);


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/precondTriuDiag.m:
--------------------------------------------------------------------------------
1 | function [y] = precondUpper(r,U,D)
2 | y = U \ (D .* (U' \ r));


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/rosenbrock.m:
--------------------------------------------------------------------------------
 1 | function [f, df, ddf, dddf] = rosenbrock(x);
 2 | 
 3 | % rosenbrock.m This function returns the function value, partial derivatives
 4 | % and Hessian of the (general dimension) rosenbrock function, given by:
 5 | %
 6 | %       f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 
 7 | %
 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1).
 9 | %
10 | % Carl Edward Rasmussen, 2001-07-21.
11 | 
12 | D = length(x);
13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2);
14 | 
15 | if nargout > 1
16 |   df = zeros(D, 1);
17 |   df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1));
18 |   df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2);
19 | end
20 | 
21 | if nargout > 2
22 |   ddf = zeros(D,D);
23 |   ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2);
24 |   ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1);
25 |   ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1);
26 | end
27 | 
28 | if nargout > 3
29 |     dddf = zeros(D,D,D);
30 |     for d = 1:D
31 |        if d > 1
32 |            dddf(d,d-1,d-1) = -400;
33 |        end
34 |        if d < D
35 |           dddf(d,d+1,d) = -400;
36 |           dddf(d,d,d+1) = -400;
37 |           dddf(d,d,d) = 2400*x(d);
38 |        end
39 |     end
40 | end


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/minFunc/taylorModel.m:
--------------------------------------------------------------------------------
 1 | function [f,g,H] = taylorModel(d,f,g,H,T)
 2 | 
 3 | p = length(d);
 4 | 
 5 | fd3 = 0;
 6 | gd2 = zeros(p,1);
 7 | Hd = zeros(p);
 8 | for t1 = 1:p
 9 |     for t2 = 1:p
10 |         for t3 = 1:p
11 |             fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3);
12 | 
13 |             if nargout > 1
14 |                 gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2);
15 |             end
16 | 
17 |             if nargout > 2
18 |                 Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1);
19 |             end
20 |         end
21 | 
22 |     end
23 | end
24 | 
25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3;
26 | 
27 | if nargout > 1
28 |     g = g + H*d + (1/2)*gd2;
29 | end
30 | 
31 | if nargout > 2
32 |     H = H + Hd;
33 | end
34 | 
35 | if any(abs(d) > 1e5)
36 |     % We want the optimizer to stop if the solution is unbounded
37 |     g = zeros(p,1);
38 | end


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/softmaxCost.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/softmaxCost.m


--------------------------------------------------------------------------------
/Exercise9 Convolution and Pooling/softmaxPredict.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/softmaxPredict.m


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | #简介#
 2 | 
 3 | 1. 这个repo包括Andrew Ng深度学习教程([ufldl tutorial][1])的习题。
 4 | 
 5 | 2. 由于这个教程的很多练习都依赖前面已完成的部分，所以你可以看到不同的练习包含了重复的文件，为了完整性我就没有把这些重复的文件删除。
 6 | 
 7 | 3. sparse coding部分未完成，ICA的练习还没有做，此外代码还没优化，尤其是前面的练习， 先上传了再说- -。
 8 | 
 9 | 4. 这个[blog][2]的作者不仅完成了练习，还加上了自己的理解和注释，是很好的参考。
10 | 
11 | #To Do#
12 | 
13 | 1. 去除冗余的文件
14 | 
15 | 2. 注释
16 | 
17 | 3. 剩余的两个练习
18 | 
19 | 4. 代码优化
20 | 
21 | [1]: http://ufldl.stanford.edu/wiki/index.php/UFLDL_Tutorial
22 | [2]: http://www.cnblogs.com/tornadomeet/category/361811.html
23 | 
24 | 


--------------------------------------------------------------------------------