├── Exercise1 Sparse Autoencoder ├── checkNumericalGradient.m ├── computeNumericalGradient.m ├── display_network.m ├── initializeParameters.m ├── minFunc │ ├── ArmijoBacktrack.m │ ├── WolfeLineSearch.m │ ├── autoGrad.m │ ├── autoHess.m │ ├── autoHv.m │ ├── autoTensor.m │ ├── callOutput.m │ ├── conjGrad.m │ ├── dampedUpdate.m │ ├── example_minFunc.m │ ├── example_minFunc_LR.m │ ├── isLegal.m │ ├── lbfgs.m │ ├── lbfgsC.c │ ├── lbfgsC.mexa64 │ ├── lbfgsC.mexglx │ ├── lbfgsC.mexmac │ ├── lbfgsC.mexmaci │ ├── lbfgsC.mexmaci64 │ ├── lbfgsC.mexw32 │ ├── lbfgsC.mexw64 │ ├── lbfgsUpdate.m │ ├── logistic │ │ ├── LogisticDiagPrecond.m │ │ ├── LogisticHv.m │ │ ├── LogisticLoss.m │ │ ├── mexutil.c │ │ ├── mexutil.h │ │ ├── mylogsumexp.m │ │ ├── repmatC.c │ │ ├── repmatC.dll │ │ ├── repmatC.mexglx │ │ └── repmatC.mexmac │ ├── mchol.m │ ├── mcholC.c │ ├── mcholC.mexmaci64 │ ├── mcholC.mexw32 │ ├── mcholC.mexw64 │ ├── mcholinc.m │ ├── minFunc.m │ ├── minFunc_processInputOptions.m │ ├── polyinterp.m │ ├── precondDiag.m │ ├── precondTriu.m │ ├── precondTriuDiag.m │ ├── rosenbrock.m │ └── taylorModel.m ├── sampleIMAGES.m ├── sparseAutoencoderCost.m ├── train.m └── weights.jpg ├── Exercise10 Sparse Coding ├── IMAGES.mat ├── checkNumericalGradient.m ├── computeNumericalGradient.m ├── display_network.m ├── sampleIMAGES.m ├── sparseCodingExercise.m ├── sparseCodingFeatureCost.m └── sparseCodingWeightCost.m ├── Exercise2 Vectorization ├── checkNumericalGradient.m ├── computeNumericalGradient.asv ├── computeNumericalGradient.m ├── display_network.m ├── initializeParameters.m ├── loadMNISTImages.m ├── loadMNISTLabels.m ├── minFunc │ ├── ArmijoBacktrack.m │ ├── WolfeLineSearch.m │ ├── autoGrad.m │ ├── autoHess.m │ ├── autoHv.m │ ├── autoTensor.m │ ├── callOutput.m │ ├── conjGrad.m │ ├── dampedUpdate.m │ ├── example_minFunc.m │ ├── example_minFunc_LR.m │ ├── isLegal.m │ ├── lbfgs.m │ ├── lbfgsC.c │ ├── lbfgsC.mexa64 │ ├── lbfgsC.mexglx │ ├── lbfgsC.mexmac │ ├── lbfgsC.mexmaci │ ├── lbfgsC.mexmaci64 │ ├── lbfgsC.mexw32 │ ├── lbfgsC.mexw64 │ ├── lbfgsUpdate.m │ ├── logistic │ │ ├── LogisticDiagPrecond.m │ │ ├── LogisticHv.m │ │ ├── LogisticLoss.m │ │ ├── mexutil.c │ │ ├── mexutil.h │ │ ├── mylogsumexp.m │ │ ├── repmatC.c │ │ ├── repmatC.dll │ │ ├── repmatC.mexglx │ │ └── repmatC.mexmac │ ├── mchol.m │ ├── mcholC.c │ ├── mcholC.mexmaci64 │ ├── mcholC.mexw32 │ ├── mcholC.mexw64 │ ├── mcholinc.m │ ├── minFunc.m │ ├── minFunc_processInputOptions.m │ ├── polyinterp.m │ ├── precondDiag.m │ ├── precondTriu.m │ ├── precondTriuDiag.m │ ├── rosenbrock.m │ └── taylorModel.m ├── sampleIMAGES.m ├── sparseAutoencoderCost.m ├── train.m └── weights.jpg ├── Exercise3 PCA in 2D ├── pcaData.txt ├── pca_2d.asv └── pca_2d.m ├── Exercise4 PCA and Whitening ├── display_network.m ├── pca_gen.m └── sampleIMAGESRAW.m ├── Exercise5 Softmax Regression ├── computeNumericalGradient.m ├── loadMNISTImages.m ├── loadMNISTLabels.m ├── minFunc │ ├── ArmijoBacktrack.m │ ├── WolfeLineSearch.m │ ├── autoGrad.m │ ├── autoHess.m │ ├── autoHv.m │ ├── autoTensor.m │ ├── callOutput.m │ ├── conjGrad.m │ ├── dampedUpdate.m │ ├── example_minFunc.m │ ├── example_minFunc_LR.m │ ├── isLegal.m │ ├── lbfgs.m │ ├── lbfgsC.c │ ├── lbfgsC.mexa64 │ ├── lbfgsC.mexglx │ ├── lbfgsC.mexmac │ ├── lbfgsC.mexmaci │ ├── lbfgsC.mexmaci64 │ ├── lbfgsC.mexw32 │ ├── lbfgsC.mexw64 │ ├── lbfgsUpdate.m │ ├── logistic │ │ ├── LogisticDiagPrecond.m │ │ ├── LogisticHv.m │ │ ├── LogisticLoss.m │ │ ├── mexutil.c │ │ ├── mexutil.h │ │ ├── mylogsumexp.m │ │ ├── repmatC.c │ │ ├── repmatC.dll │ │ ├── repmatC.mexglx │ │ └── repmatC.mexmac │ ├── mchol.m │ ├── mcholC.c │ ├── mcholC.mexmaci64 │ ├── mcholC.mexw32 │ ├── mcholC.mexw64 │ ├── mcholinc.m │ ├── minFunc.m │ ├── minFunc_processInputOptions.m │ ├── polyinterp.m │ ├── precondDiag.m │ ├── precondTriu.m │ ├── precondTriuDiag.m │ ├── rosenbrock.m │ └── taylorModel.m ├── softmaxCost.m ├── softmaxExercise.m ├── softmaxPredict.m └── softmaxTrain.m ├── Exercise6 Self-Taught Learning ├── computeNumericalGradient.m ├── display_network.m ├── feedForwardAutoencoder.m ├── initializeParameters.m ├── loadMNISTImages.m ├── loadMNISTLabels.m ├── minFunc │ ├── ArmijoBacktrack.m │ ├── WolfeLineSearch.m │ ├── autoGrad.m │ ├── autoHess.m │ ├── autoHv.m │ ├── autoTensor.m │ ├── callOutput.m │ ├── conjGrad.m │ ├── dampedUpdate.m │ ├── example_minFunc.m │ ├── example_minFunc_LR.m │ ├── isLegal.m │ ├── lbfgs.m │ ├── lbfgsC.c │ ├── lbfgsC.mexa64 │ ├── lbfgsC.mexglx │ ├── lbfgsC.mexmac │ ├── lbfgsC.mexmaci │ ├── lbfgsC.mexmaci64 │ ├── lbfgsC.mexw32 │ ├── lbfgsC.mexw64 │ ├── lbfgsUpdate.m │ ├── logistic │ │ ├── LogisticDiagPrecond.m │ │ ├── LogisticHv.m │ │ ├── LogisticLoss.m │ │ ├── mexutil.c │ │ ├── mexutil.h │ │ ├── mylogsumexp.m │ │ ├── repmatC.c │ │ ├── repmatC.dll │ │ ├── repmatC.mexglx │ │ └── repmatC.mexmac │ ├── mchol.m │ ├── mcholC.c │ ├── mcholC.mexmaci64 │ ├── mcholC.mexw32 │ ├── mcholC.mexw64 │ ├── mcholinc.m │ ├── minFunc.m │ ├── minFunc_processInputOptions.m │ ├── polyinterp.m │ ├── precondDiag.m │ ├── precondTriu.m │ ├── precondTriuDiag.m │ ├── rosenbrock.m │ └── taylorModel.m ├── softmaxCost.m ├── softmaxPredict.m ├── softmaxTrain.m ├── sparseAutoencoderCost.m ├── stlExercise.m └── testMemory.m ├── Exercise7 Implement deep networks for digit classification ├── checkStackedAECost.m ├── computeNumericalGradient.m ├── feedForwardAutoencoder.m ├── initializeParameters.m ├── loadMNISTImages.m ├── loadMNISTLabels.m ├── minFunc │ ├── ArmijoBacktrack.m │ ├── WolfeLineSearch.m │ ├── autoGrad.m │ ├── autoHess.m │ ├── autoHv.m │ ├── autoTensor.m │ ├── callOutput.m │ ├── conjGrad.m │ ├── dampedUpdate.m │ ├── example_minFunc.m │ ├── example_minFunc_LR.m │ ├── isLegal.m │ ├── lbfgs.m │ ├── lbfgsC.c │ ├── lbfgsC.mexa64 │ ├── lbfgsC.mexglx │ ├── lbfgsC.mexmac │ ├── lbfgsC.mexmaci │ ├── lbfgsC.mexmaci64 │ ├── lbfgsC.mexw32 │ ├── lbfgsC.mexw64 │ ├── lbfgsUpdate.m │ ├── logistic │ │ ├── LogisticDiagPrecond.m │ │ ├── LogisticHv.m │ │ ├── LogisticLoss.m │ │ ├── mexutil.c │ │ ├── mexutil.h │ │ ├── mylogsumexp.m │ │ ├── repmatC.c │ │ ├── repmatC.dll │ │ ├── repmatC.mexglx │ │ └── repmatC.mexmac │ ├── mchol.m │ ├── mcholC.c │ ├── mcholC.mexmaci64 │ ├── mcholC.mexw32 │ ├── mcholC.mexw64 │ ├── mcholinc.m │ ├── minFunc.m │ ├── minFunc_processInputOptions.m │ ├── polyinterp.m │ ├── precondDiag.m │ ├── precondTriu.m │ ├── precondTriuDiag.m │ ├── rosenbrock.m │ └── taylorModel.m ├── params2stack.m ├── softmaxCost.m ├── softmaxPredict.m ├── softmaxTrain.m ├── sparseAutoencoderCost.m ├── sparseAutoencoderCost_modify.m ├── stack2params.m ├── stackedAECost.m ├── stackedAEExercise.m └── stackedAEPredict.m ├── Exercise8 Learning color features with Sparse Autoencoders ├── computeNumericalGradient.m ├── displayColorNetwork.m ├── initializeParameters.m ├── linearDecoderExercise.m ├── minFunc │ ├── ArmijoBacktrack.m │ ├── WolfeLineSearch.m │ ├── autoGrad.m │ ├── autoHess.m │ ├── autoHv.m │ ├── autoTensor.m │ ├── callOutput.m │ ├── conjGrad.m │ ├── dampedUpdate.m │ ├── example_minFunc.m │ ├── example_minFunc_LR.m │ ├── isLegal.m │ ├── lbfgs.m │ ├── lbfgsC.c │ ├── lbfgsC.mexa64 │ ├── lbfgsC.mexglx │ ├── lbfgsC.mexmac │ ├── lbfgsC.mexmaci │ ├── lbfgsC.mexmaci64 │ ├── lbfgsC.mexw32 │ ├── lbfgsC.mexw64 │ ├── lbfgsUpdate.m │ ├── logistic │ │ ├── LogisticDiagPrecond.m │ │ ├── LogisticHv.m │ │ ├── LogisticLoss.m │ │ ├── mexutil.c │ │ ├── mexutil.h │ │ ├── mylogsumexp.m │ │ ├── repmatC.c │ │ ├── repmatC.dll │ │ ├── repmatC.mexglx │ │ └── repmatC.mexmac │ ├── mchol.m │ ├── mcholC.c │ ├── mcholC.mexmaci64 │ ├── mcholC.mexw32 │ ├── mcholC.mexw64 │ ├── mcholinc.m │ ├── minFunc.m │ ├── minFunc_processInputOptions.m │ ├── polyinterp.m │ ├── precondDiag.m │ ├── precondTriu.m │ ├── precondTriuDiag.m │ ├── rosenbrock.m │ └── taylorModel.m └── sparseAutoencoderLinearCost.m ├── Exercise9 Convolution and Pooling ├── cnnConvolve.m ├── cnnExercise.m ├── cnnPool.m ├── displayColorNetwork.m ├── feedForwardAutoencoder.m ├── minFunc │ ├── ArmijoBacktrack.m │ ├── WolfeLineSearch.m │ ├── autoGrad.m │ ├── autoHess.m │ ├── autoHv.m │ ├── autoTensor.m │ ├── callOutput.m │ ├── conjGrad.m │ ├── dampedUpdate.m │ ├── example_minFunc.m │ ├── example_minFunc_LR.m │ ├── isLegal.m │ ├── lbfgs.m │ ├── lbfgsC.c │ ├── lbfgsC.mexa64 │ ├── lbfgsC.mexglx │ ├── lbfgsC.mexmac │ ├── lbfgsC.mexmaci │ ├── lbfgsC.mexmaci64 │ ├── lbfgsC.mexw32 │ ├── lbfgsC.mexw64 │ ├── lbfgsUpdate.m │ ├── logistic │ │ ├── LogisticDiagPrecond.m │ │ ├── LogisticHv.m │ │ ├── LogisticLoss.m │ │ ├── mexutil.c │ │ ├── mexutil.h │ │ ├── mylogsumexp.m │ │ ├── repmatC.c │ │ ├── repmatC.dll │ │ ├── repmatC.mexglx │ │ └── repmatC.mexmac │ ├── mchol.m │ ├── mcholC.c │ ├── mcholC.mexmaci64 │ ├── mcholC.mexw32 │ ├── mcholC.mexw64 │ ├── mcholinc.m │ ├── minFunc.m │ ├── minFunc_processInputOptions.m │ ├── polyinterp.m │ ├── precondDiag.m │ ├── precondTriu.m │ ├── precondTriuDiag.m │ ├── rosenbrock.m │ └── taylorModel.m ├── softmaxCost.m ├── softmaxPredict.m └── softmaxTrain.m └── README.md /Exercise1 Sparse Autoencoder/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | % numgrad = computeNumericalGradient(J, theta) 3 | % theta: a vector of parameters 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the 5 | % function value at theta. 6 | 7 | % Initialize numgrad with zeros 8 | numgrad = zeros(size(theta)); 9 | 10 | %% ---------- YOUR CODE HERE -------------------------------------- 11 | % Instructions: 12 | % Implement numerical gradient checking, and return the result in numgrad. 13 | % (See Section 2.3 of the lecture notes.) 14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta. 16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 17 | % respect to theta(i). 18 | % 19 | % Hint: You will probably want to compute the elements of numgrad one at a 20 | % time. 21 | epsilon = 10^(-4); 22 | n = size(theta, 1); 23 | % J1 = zeros(size(numgrad)); 24 | % J2 = zeros(size(numgrad)); 25 | J1 = zeros(1, 1); 26 | J2 = zeros(1, 1); 27 | grad = zeros(size(numgrad)); 28 | temp1 = zeros(size(theta)); 29 | temp2 = zeros(size(theta)); 30 | 31 | for i = 1 : n 32 | % i 33 | temp1 = theta; 34 | temp2 = theta; 35 | temp1(i) = temp1(i) + epsilon; 36 | temp2(i) = temp2(i) - epsilon; 37 | [J1, grad] = J(temp1); 38 | [J2, grad] = J(temp2); 39 | numgrad(i) = (J1 - J2) / (2*epsilon); 40 | end 41 | 42 | 43 | % theta1 = theta + epsilon; 44 | % theta2 = theta - epsilon; 45 | % [J1(i), grad] = J(temp1); 46 | % [J2(i), grad] = J(temp2); 47 | 48 | 49 | 50 | 51 | %% --------------------------------------------------------------- 52 | end 53 | -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/initializeParameters.m: -------------------------------------------------------------------------------- 1 | function theta = initializeParameters(hiddenSize, visibleSize) 2 | 3 | %% Initialize parameters randomly based on layer sizes. 4 | r = sqrt(6) / sqrt(hiddenSize+visibleSize+1); % we'll choose weights uniformly from the interval [-r, r] 5 | W1 = rand(hiddenSize, visibleSize) * 2 * r - r; 6 | W2 = rand(visibleSize, hiddenSize) * 2 * r - r; 7 | 8 | b1 = zeros(hiddenSize, 1); 9 | b2 = zeros(visibleSize, 1); 10 | 11 | % Convert weights and bias gradients to the vector form. 12 | % This step will "unroll" (flatten and concatenate together) all 13 | % your parameters into a vector, which can then be used with minFunc. 14 | theta = [W1(:) ; W2(:) ; b1(:) ; b2(:)]; 15 | 16 | end 17 | 18 | -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/autoGrad.m: -------------------------------------------------------------------------------- 1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin) % [f,g] = autoGrad(x,useComplex,funObj,varargin) % % Numerically compute gradient of objective function from function values p = length(x); mu = 1e-150; if useComplex % Use Complex Differentials diff = zeros(p,1); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(diff)); g = imag(diff)/mu; else % Use Finite Differencing f = funObj(x,varargin{:}); mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*e_j,varargin{:}); end g = (diff-f)/mu; end if 0 % DEBUG CODE [fReal gReal] = funObj(x,varargin{:}); [fReal f] [gReal g] pause; end -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/autoHess.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin) % Numerically compute Hessian of objective function from gradient values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(diff),2); H = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g] = funObj(x,varargin{:}); diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f diff(:,j)] = funObj(x + mu*e_j,varargin{:}); end H = (diff-repmat(g,[1 p]))/mu; end % Make sure H is symmetric H = (H+H')/2; if 0 % DEBUG CODE [fReal gReal HReal] = funObj(x,varargin{:}); [fReal f] [gReal g] [HReal H] pause; end -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/autoHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin) 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:}) 3 | % based on gradient values 4 | 5 | if useComplex 6 | mu = 1e-150i; 7 | else 8 | mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v); 9 | end 10 | [f,finDif] = funObj(x + v*mu,varargin{:}); 11 | Hv = (finDif-g)/mu; -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/autoTensor.m: -------------------------------------------------------------------------------- 1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % Numerically compute Tensor of 3rd-derivatives of objective function from Hessian values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(g),2); H = mean(real(diff),3); T = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g,H] = funObj(x,varargin{:}); diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:}); end T = (diff-repmat(H,[1 1 p]))/mu; end -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/callOutput.m: -------------------------------------------------------------------------------- 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin) 2 | 3 | optimValues.iteration = i; 4 | optimValues.funccount = funEvals; 5 | optimValues.fval = f; 6 | optimValues.stepsize = t; 7 | optimValues.directionalderivative = gtd; 8 | optimValues.gradient = g; 9 | optimValues.searchdirection = d; 10 | optimValues.firstorderopt = opt; 11 | 12 | feval(outputFcn, x,optimValues,state,varargin{:}); -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/dampedUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | 3 | %B0 = eye(length(y))/Hdiag; 4 | S = old_dirs(:,2:end); 5 | Y = old_stps(:,2:end); 6 | k = size(Y,2); 7 | L = zeros(k); 8 | for j = 1:k 9 | for i = j+1:k 10 | L(i,j) = S(:,i)'*Y(:,j); 11 | end 12 | end 13 | D = diag(diag(S'*Y)); 14 | N = [S/Hdiag Y]; 15 | M = [S'*S/Hdiag L;L' -D]; 16 | 17 | ys = y'*s; 18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s 19 | sBs = s'*Bs; 20 | 21 | eta = .02; 22 | if ys < eta*sBs 23 | if debug 24 | fprintf('Damped Update\n'); 25 | end 26 | theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1); 27 | y = theta*y + (1-theta)*Bs; 28 | end 29 | 30 | 31 | numCorrections = size(old_dirs,2); 32 | if numCorrections < corrections 33 | % Full Update 34 | old_dirs(:,numCorrections+1) = s; 35 | old_stps(:,numCorrections+1) = y; 36 | else 37 | % Limited-Memory Update 38 | old_dirs = [old_dirs(:,2:corrections) s]; 39 | old_stps = [old_stps(:,2:corrections) y]; 40 | end 41 | 42 | % Update scale of initial Hessian approximation 43 | Hdiag = (y'*s)/(y'*y); -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/example_minFunc_LR.m: -------------------------------------------------------------------------------- 1 | clear all 2 | 3 | nInst = 500; 4 | nVars = 100; 5 | X = [ones(nInst,1) randn(nInst,nVars-1)]; 6 | w = randn(nVars,1); 7 | y = sign(X*w); 8 | flipInd = rand(nInst,1) > .9; 9 | y(flipInd) = -y(flipInd); 10 | 11 | w_init = zeros(nVars,1); 12 | funObj = @(w)LogisticLoss(w,X,y); 13 | 14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n'); 15 | options.Method = 'newton0'; 16 | minFunc(@LogisticLoss,w_init,options,X,y); 17 | pause; 18 | 19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n'); 20 | options.Method = 'pnewton0'; 21 | options.precFunc = @LogisticDiagPrecond; 22 | minFunc(@LogisticLoss,w_init,options,X,y); 23 | pause; 24 | 25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n'); 26 | options.Method = 'pnewton0'; 27 | options.precFunc = []; 28 | minFunc(@LogisticLoss,w_init,options,X,y); 29 | pause; 30 | 31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n'); 32 | options.Method = 'newton0'; 33 | options.HvFunc = @LogisticHv; 34 | minFunc(@LogisticLoss,w_init,options,X,y); 35 | pause; 36 | 37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n'); 38 | options.Method = 'pnewton0'; 39 | options.HvFunc = @LogisticHv; 40 | options.precFunc = @LogisticDiagPrecond; 41 | minFunc(@LogisticLoss,w_init,options,X,y); 42 | pause; 43 | 44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n'); 45 | options.Method = 'pnewton0'; 46 | options.precFunc = []; 47 | options.HvFunc = @LogisticHv; 48 | minFunc(@LogisticLoss,w_init,options,X,y); 49 | pause; -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/isLegal.m: -------------------------------------------------------------------------------- 1 | function [legal] = isLegal(v) 2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0; -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/lbfgs.m: -------------------------------------------------------------------------------- 1 | function [d] = lbfgs(g,s,y,Hdiag) 2 | % BFGS Search Direction 3 | % 4 | % This function returns the (L-BFGS) approximate inverse Hessian, 5 | % multiplied by the gradient 6 | % 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS 9 | % 10 | % s - previous search directions (p by k) 11 | % y - previous step sizes (p by k) 12 | % g - gradient (p by 1) 13 | % Hdiag - value of initial Hessian diagonal elements (scalar) 14 | 15 | [p,k] = size(s); 16 | 17 | for i = 1:k 18 | ro(i,1) = 1/(y(:,i)'*s(:,i)); 19 | end 20 | 21 | q = zeros(p,k+1); 22 | r = zeros(p,k+1); 23 | al =zeros(k,1); 24 | be =zeros(k,1); 25 | 26 | q(:,k+1) = g; 27 | 28 | for i = k:-1:1 29 | al(i) = ro(i)*s(:,i)'*q(:,i+1); 30 | q(:,i) = q(:,i+1)-al(i)*y(:,i); 31 | end 32 | 33 | % Multiply by Initial Hessian 34 | r(:,1) = Hdiag*q(:,1); 35 | 36 | for i = 1:k 37 | be(i) = ro(i)*y(:,i)'*r(:,i); 38 | r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i)); 39 | end 40 | d=r(:,k+1); -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexa64 -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexglx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexglx -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexmac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexmac -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexmaci: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexmaci -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexmaci64 -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexw32 -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/lbfgsC.mexw64 -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/lbfgsUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | ys = y'*s; 3 | if ys > 1e-10 4 | numCorrections = size(old_dirs,2); 5 | if numCorrections < corrections 6 | % Full Update 7 | old_dirs(:,numCorrections+1) = s; 8 | old_stps(:,numCorrections+1) = y; 9 | else 10 | % Limited-Memory Update 11 | old_dirs = [old_dirs(:,2:corrections) s]; 12 | old_stps = [old_stps(:,2:corrections) y]; 13 | end 14 | 15 | % Update scale of initial Hessian approximation 16 | Hdiag = ys/(y'*y); 17 | else 18 | if debug 19 | fprintf('Skipping Update\n'); 20 | end 21 | end -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/logistic/LogisticDiagPrecond.m: -------------------------------------------------------------------------------- 1 | function [m] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | 9 | % Compute diagonals of Hessian 10 | sig = sig.*(1-sig); 11 | for i = 1:length(w) 12 | h(i,1) = (sig.*X(:,i))'*X(:,i); 13 | end 14 | 15 | % Apply preconditioner 16 | m = v./h; 17 | 18 | % Exact preconditioner 19 | %H = X'*diag(sig.*(1-sig))*X; 20 | %m = H\v; 21 | -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/logistic/LogisticHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will multiply Hessian by 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | Hv = X.'*(sig.*(1-sig).*(X*v)); 9 | -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/logistic/LogisticLoss.m: -------------------------------------------------------------------------------- 1 | function [nll,g,H,T] = LogisticLoss(w,X,y) 2 | % w(feature,1) 3 | % X(instance,feature) 4 | % y(instance,1) 5 | 6 | [n,p] = size(X); 7 | 8 | Xw = X*w; 9 | yXw = y.*Xw; 10 | 11 | nll = sum( ([zeros(n,1) -yXw])); 12 | 13 | if nargout > 1 14 | if nargout > 2 15 | sig = 1./(1+exp(-yXw)); 16 | g = -X.'*(y.*(1-sig)); 17 | else 18 | g = -X.'*(y./(1+exp(yXw))); 19 | end 20 | end 21 | 22 | if nargout > 2 23 | H = X.'*diag(sparse(sig.*(1-sig)))*X; 24 | end 25 | 26 | if nargout > 3 27 | T = zeros(p,p,p); 28 | for j1 = 1:p 29 | for j2 = 1:p 30 | for j3 = 1:p 31 | T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig)); 32 | end 33 | end 34 | end 35 | end -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/logistic/mexutil.c: -------------------------------------------------------------------------------- 1 | #include "mexutil.h" 2 | 3 | /* Functions to create uninitialized arrays. */ 4 | 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 6 | mxClassID class, mxComplexity ComplexFlag) 7 | { 8 | mxArray *a; 9 | int i, *dims1 = mxMalloc(ndim*sizeof(int)); 10 | size_t sz = 1; 11 | for(i=0;i 1 23 | beta = sqrt(max([gamma xi/sqrt(n^2-1) mu])); 24 | else 25 | beta = sqrt(max([gamma mu])); 26 | end 27 | 28 | for j = 1:n 29 | 30 | % Find q that results in Best Permutation with j 31 | [maxVal maxPos] = max(abs(diag(c(j:end,j:end)))); 32 | q = maxPos+j-1; 33 | 34 | % Permute d,c,l,a 35 | d([j q]) = d([q j]); 36 | perm([j q]) = perm([q j]); 37 | c([j q],:) = c([q j],:); 38 | c(:,[j q]) = c(:,[q j]); 39 | l([j q],:) = l([q j],:); 40 | l(:,[j q]) = l(:,[q j]); 41 | A([j q],:) = A([q j],:); 42 | A(:,[j q]) = A(:,[q j]); 43 | 44 | for s = 1:j-1 45 | l(j,s) = c(j,s)/d(s); 46 | end 47 | for i = j+1:n 48 | c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1)); 49 | end 50 | theta = 0; 51 | if j < n && j > 1 52 | theta = max(abs(c(j+1:n,j))); 53 | end 54 | d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]); 55 | if j < n 56 | for i = j+1:n 57 | c(i,i) = c(i,i) - (c(i,j)^2)/d(j); 58 | end 59 | end 60 | end -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/mcholC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/mcholC.mexmaci64 -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/mcholC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/mcholC.mexw32 -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/mcholC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/minFunc/mcholC.mexw64 -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/mcholinc.m: -------------------------------------------------------------------------------- 1 | function [R,tau] = mcholinc(H,verbose) 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd 3 | 4 | p = size(H,1); 5 | 6 | beta = norm(H,'fro'); 7 | if min(diag(H)) > 1e-12 8 | tau = 0; 9 | else 10 | if verbose 11 | fprintf('Small Value on Diagonal, Adjusting Hessian\n'); 12 | end 13 | tau = max(beta/2,1e-12); 14 | end 15 | while 1 16 | [R,posDef] = chol(H+tau*eye(p)); 17 | if posDef == 0 18 | break; 19 | else 20 | if verbose 21 | fprintf('Cholesky Failed, Adjusting Hessian\n'); 22 | end 23 | tau = max(2*tau,beta/2); 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/precondDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondDiag(r,D) 2 | y = D.*r; -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/precondTriu.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U) 2 | y = U \ (U' \ r); -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/precondTriuDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U,D) 2 | y = U \ (D .* (U' \ r)); -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/rosenbrock.m: -------------------------------------------------------------------------------- 1 | function [f, df, ddf, dddf] = rosenbrock(x); 2 | 3 | % rosenbrock.m This function returns the function value, partial derivatives 4 | % and Hessian of the (general dimension) rosenbrock function, given by: 5 | % 6 | % f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 7 | % 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1). 9 | % 10 | % Carl Edward Rasmussen, 2001-07-21. 11 | 12 | D = length(x); 13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2); 14 | 15 | if nargout > 1 16 | df = zeros(D, 1); 17 | df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1)); 18 | df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2); 19 | end 20 | 21 | if nargout > 2 22 | ddf = zeros(D,D); 23 | ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2); 24 | ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1); 25 | ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1); 26 | end 27 | 28 | if nargout > 3 29 | dddf = zeros(D,D,D); 30 | for d = 1:D 31 | if d > 1 32 | dddf(d,d-1,d-1) = -400; 33 | end 34 | if d < D 35 | dddf(d,d+1,d) = -400; 36 | dddf(d,d,d+1) = -400; 37 | dddf(d,d,d) = 2400*x(d); 38 | end 39 | end 40 | end -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/minFunc/taylorModel.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = taylorModel(d,f,g,H,T) 2 | 3 | p = length(d); 4 | 5 | fd3 = 0; 6 | gd2 = zeros(p,1); 7 | Hd = zeros(p); 8 | for t1 = 1:p 9 | for t2 = 1:p 10 | for t3 = 1:p 11 | fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3); 12 | 13 | if nargout > 1 14 | gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2); 15 | end 16 | 17 | if nargout > 2 18 | Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1); 19 | end 20 | end 21 | 22 | end 23 | end 24 | 25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3; 26 | 27 | if nargout > 1 28 | g = g + H*d + (1/2)*gd2; 29 | end 30 | 31 | if nargout > 2 32 | H = H + Hd; 33 | end 34 | 35 | if any(abs(d) > 1e5) 36 | % We want the optimizer to stop if the solution is unbounded 37 | g = zeros(p,1); 38 | end -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/sampleIMAGES.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/sampleIMAGES.m -------------------------------------------------------------------------------- /Exercise1 Sparse Autoencoder/weights.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise1 Sparse Autoencoder/weights.jpg -------------------------------------------------------------------------------- /Exercise10 Sparse Coding/IMAGES.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise10 Sparse Coding/IMAGES.mat -------------------------------------------------------------------------------- /Exercise10 Sparse Coding/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | % numgrad = computeNumericalGradient(J, theta) 3 | % theta: a vector of parameters 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the 5 | % function value at theta. 6 | 7 | % Initialize numgrad with zeros 8 | numgrad = zeros(size(theta)); 9 | 10 | %% ---------- YOUR CODE HERE -------------------------------------- 11 | % Instructions: 12 | % Implement numerical gradient checking, and return the result in numgrad. 13 | % (See Section 2.3 of the lecture notes.) 14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta. 16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 17 | % respect to theta(i). 18 | % 19 | % Hint: You will probably want to compute the elements of numgrad one at a 20 | % time. 21 | epsilon = 10^(-4); 22 | n = size(theta, 1); 23 | % J1 = zeros(size(numgrad)); 24 | % J2 = zeros(size(numgrad)); 25 | J1 = zeros(1, 1); 26 | J2 = zeros(1, 1); 27 | grad = zeros(size(numgrad)); 28 | temp1 = zeros(size(theta)); 29 | temp2 = zeros(size(theta)); 30 | 31 | for i = 1 : n 32 | % i 33 | temp1 = theta; 34 | temp2 = theta; 35 | temp1(i) = temp1(i) + epsilon; 36 | temp2(i) = temp2(i) - epsilon; 37 | [J1, grad] = J(temp1); 38 | [J2, grad] = J(temp2); 39 | numgrad(i) = (J1 - J2) / (2*epsilon); 40 | end 41 | 42 | 43 | % theta1 = theta + epsilon; 44 | % theta2 = theta - epsilon; 45 | % [J1(i), grad] = J(temp1); 46 | % [J2(i), grad] = J(temp2); 47 | 48 | 49 | 50 | 51 | %% --------------------------------------------------------------- 52 | end 53 | -------------------------------------------------------------------------------- /Exercise10 Sparse Coding/sampleIMAGES.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise10 Sparse Coding/sampleIMAGES.m -------------------------------------------------------------------------------- /Exercise10 Sparse Coding/sparseCodingFeatureCost.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise10 Sparse Coding/sparseCodingFeatureCost.m -------------------------------------------------------------------------------- /Exercise10 Sparse Coding/sparseCodingWeightCost.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise10 Sparse Coding/sparseCodingWeightCost.m -------------------------------------------------------------------------------- /Exercise2 Vectorization/computeNumericalGradient.asv: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | % numgrad = computeNumericalGradient(J, theta) 3 | % theta: a vector of parameters 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the 5 | % function value at theta. 6 | 7 | % Initialize numgrad with zeros 8 | numgrad = zeros(size(theta)); 9 | 10 | %% ---------- YOUR CODE HERE -------------------------------------- 11 | % Instructions: 12 | % Implement numerical gradient checking, and return the result in numgrad. 13 | % (See Section 2.3 of the lecture notes.) 14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta. 16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 17 | % respect to theta(i). 18 | % 19 | % Hint: You will probably want to compute the elements of numgrad one at a 20 | % time. 21 | epsilon = 10^(-4); 22 | J1, gra 23 | numgrad = 24 | 25 | 26 | 27 | 28 | 29 | 30 | %% --------------------------------------------------------------- 31 | end 32 | -------------------------------------------------------------------------------- /Exercise2 Vectorization/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | % numgrad = computeNumericalGradient(J, theta) 3 | % theta: a vector of parameters 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the 5 | % function value at theta. 6 | 7 | % Initialize numgrad with zeros 8 | numgrad = zeros(size(theta)); 9 | 10 | %% ---------- YOUR CODE HERE -------------------------------------- 11 | % Instructions: 12 | % Implement numerical gradient checking, and return the result in numgrad. 13 | % (See Section 2.3 of the lecture notes.) 14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta. 16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 17 | % respect to theta(i). 18 | % 19 | % Hint: You will probably want to compute the elements of numgrad one at a 20 | % time. 21 | epsilon = 10^(-4); 22 | n = size(theta, 1); 23 | % J1 = zeros(size(numgrad)); 24 | % J2 = zeros(size(numgrad)); 25 | J1 = zeros(1, 1); 26 | J2 = zeros(1, 1); 27 | grad = zeros(size(numgrad)); 28 | temp1 = zeros(size(theta)); 29 | temp2 = zeros(size(theta)); 30 | 31 | for i = 1 : n 32 | % i 33 | temp1 = theta; 34 | temp2 = theta; 35 | temp1(i) = temp1(i) + epsilon; 36 | temp2(i) = temp2(i) - epsilon; 37 | [J1, grad] = J(temp1); 38 | [J2, grad] = J(temp2); 39 | numgrad(i) = (J1 - J2) / (2*epsilon); 40 | end 41 | 42 | 43 | % theta1 = theta + epsilon; 44 | % theta2 = theta - epsilon; 45 | % [J1(i), grad] = J(temp1); 46 | % [J2(i), grad] = J(temp2); 47 | 48 | 49 | 50 | 51 | %% --------------------------------------------------------------- 52 | end 53 | -------------------------------------------------------------------------------- /Exercise2 Vectorization/initializeParameters.m: -------------------------------------------------------------------------------- 1 | function theta = initializeParameters(hiddenSize, visibleSize) 2 | 3 | %% Initialize parameters randomly based on layer sizes. 4 | r = sqrt(6) / sqrt(hiddenSize+visibleSize+1); % we'll choose weights uniformly from the interval [-r, r] 5 | W1 = rand(hiddenSize, visibleSize) * 2 * r - r; 6 | W2 = rand(visibleSize, hiddenSize) * 2 * r - r; 7 | 8 | b1 = zeros(hiddenSize, 1); 9 | b2 = zeros(visibleSize, 1); 10 | 11 | % Convert weights and bias gradients to the vector form. 12 | % This step will "unroll" (flatten and concatenate together) all 13 | % your parameters into a vector, which can then be used with minFunc. 14 | theta = [W1(:) ; W2(:) ; b1(:) ; b2(:)]; 15 | 16 | end 17 | 18 | -------------------------------------------------------------------------------- /Exercise2 Vectorization/loadMNISTImages.m: -------------------------------------------------------------------------------- 1 | function images = loadMNISTImages(filename) 2 | %loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing 3 | %the raw MNIST images 4 | 5 | fp = fopen(filename, 'rb'); 6 | assert(fp ~= -1, ['Could not open ', filename, '']); 7 | 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be'); 9 | assert(magic == 2051, ['Bad magic number in ', filename, '']); 10 | 11 | numImages = fread(fp, 1, 'int32', 0, 'ieee-be'); 12 | numRows = fread(fp, 1, 'int32', 0, 'ieee-be'); 13 | numCols = fread(fp, 1, 'int32', 0, 'ieee-be'); 14 | 15 | images = fread(fp, inf, 'unsigned char'); 16 | images = reshape(images, numCols, numRows, numImages); 17 | images = permute(images,[2 1 3]); 18 | 19 | fclose(fp); 20 | 21 | % Reshape to #pixels x #examples 22 | images = reshape(images, size(images, 1) * size(images, 2), size(images, 3)); 23 | % Convert to double and rescale to [0,1] 24 | images = double(images) / 255; 25 | 26 | end 27 | -------------------------------------------------------------------------------- /Exercise2 Vectorization/loadMNISTLabels.m: -------------------------------------------------------------------------------- 1 | function labels = loadMNISTLabels(filename) 2 | %loadMNISTLabels returns a [number of MNIST images]x1 matrix containing 3 | %the labels for the MNIST images 4 | 5 | fp = fopen(filename, 'rb'); 6 | assert(fp ~= -1, ['Could not open ', filename, '']); 7 | 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be'); 9 | assert(magic == 2049, ['Bad magic number in ', filename, '']); 10 | 11 | numLabels = fread(fp, 1, 'int32', 0, 'ieee-be'); 12 | 13 | labels = fread(fp, inf, 'unsigned char'); 14 | 15 | assert(size(labels,1) == numLabels, 'Mismatch in label count'); 16 | 17 | fclose(fp); 18 | 19 | end 20 | -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/autoGrad.m: -------------------------------------------------------------------------------- 1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin) % [f,g] = autoGrad(x,useComplex,funObj,varargin) % % Numerically compute gradient of objective function from function values p = length(x); mu = 1e-150; if useComplex % Use Complex Differentials diff = zeros(p,1); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(diff)); g = imag(diff)/mu; else % Use Finite Differencing f = funObj(x,varargin{:}); mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*e_j,varargin{:}); end g = (diff-f)/mu; end if 0 % DEBUG CODE [fReal gReal] = funObj(x,varargin{:}); [fReal f] [gReal g] pause; end -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/autoHess.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin) % Numerically compute Hessian of objective function from gradient values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(diff),2); H = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g] = funObj(x,varargin{:}); diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f diff(:,j)] = funObj(x + mu*e_j,varargin{:}); end H = (diff-repmat(g,[1 p]))/mu; end % Make sure H is symmetric H = (H+H')/2; if 0 % DEBUG CODE [fReal gReal HReal] = funObj(x,varargin{:}); [fReal f] [gReal g] [HReal H] pause; end -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/autoHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin) 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:}) 3 | % based on gradient values 4 | 5 | if useComplex 6 | mu = 1e-150i; 7 | else 8 | mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v); 9 | end 10 | [f,finDif] = funObj(x + v*mu,varargin{:}); 11 | Hv = (finDif-g)/mu; -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/autoTensor.m: -------------------------------------------------------------------------------- 1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % Numerically compute Tensor of 3rd-derivatives of objective function from Hessian values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(g),2); H = mean(real(diff),3); T = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g,H] = funObj(x,varargin{:}); diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:}); end T = (diff-repmat(H,[1 1 p]))/mu; end -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/callOutput.m: -------------------------------------------------------------------------------- 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin) 2 | 3 | optimValues.iteration = i; 4 | optimValues.funccount = funEvals; 5 | optimValues.fval = f; 6 | optimValues.stepsize = t; 7 | optimValues.directionalderivative = gtd; 8 | optimValues.gradient = g; 9 | optimValues.searchdirection = d; 10 | optimValues.firstorderopt = opt; 11 | 12 | feval(outputFcn, x,optimValues,state,varargin{:}); -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/dampedUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | 3 | %B0 = eye(length(y))/Hdiag; 4 | S = old_dirs(:,2:end); 5 | Y = old_stps(:,2:end); 6 | k = size(Y,2); 7 | L = zeros(k); 8 | for j = 1:k 9 | for i = j+1:k 10 | L(i,j) = S(:,i)'*Y(:,j); 11 | end 12 | end 13 | D = diag(diag(S'*Y)); 14 | N = [S/Hdiag Y]; 15 | M = [S'*S/Hdiag L;L' -D]; 16 | 17 | ys = y'*s; 18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s 19 | sBs = s'*Bs; 20 | 21 | eta = .02; 22 | if ys < eta*sBs 23 | if debug 24 | fprintf('Damped Update\n'); 25 | end 26 | theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1); 27 | y = theta*y + (1-theta)*Bs; 28 | end 29 | 30 | 31 | numCorrections = size(old_dirs,2); 32 | if numCorrections < corrections 33 | % Full Update 34 | old_dirs(:,numCorrections+1) = s; 35 | old_stps(:,numCorrections+1) = y; 36 | else 37 | % Limited-Memory Update 38 | old_dirs = [old_dirs(:,2:corrections) s]; 39 | old_stps = [old_stps(:,2:corrections) y]; 40 | end 41 | 42 | % Update scale of initial Hessian approximation 43 | Hdiag = (y'*s)/(y'*y); -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/example_minFunc_LR.m: -------------------------------------------------------------------------------- 1 | clear all 2 | 3 | nInst = 500; 4 | nVars = 100; 5 | X = [ones(nInst,1) randn(nInst,nVars-1)]; 6 | w = randn(nVars,1); 7 | y = sign(X*w); 8 | flipInd = rand(nInst,1) > .9; 9 | y(flipInd) = -y(flipInd); 10 | 11 | w_init = zeros(nVars,1); 12 | funObj = @(w)LogisticLoss(w,X,y); 13 | 14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n'); 15 | options.Method = 'newton0'; 16 | minFunc(@LogisticLoss,w_init,options,X,y); 17 | pause; 18 | 19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n'); 20 | options.Method = 'pnewton0'; 21 | options.precFunc = @LogisticDiagPrecond; 22 | minFunc(@LogisticLoss,w_init,options,X,y); 23 | pause; 24 | 25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n'); 26 | options.Method = 'pnewton0'; 27 | options.precFunc = []; 28 | minFunc(@LogisticLoss,w_init,options,X,y); 29 | pause; 30 | 31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n'); 32 | options.Method = 'newton0'; 33 | options.HvFunc = @LogisticHv; 34 | minFunc(@LogisticLoss,w_init,options,X,y); 35 | pause; 36 | 37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n'); 38 | options.Method = 'pnewton0'; 39 | options.HvFunc = @LogisticHv; 40 | options.precFunc = @LogisticDiagPrecond; 41 | minFunc(@LogisticLoss,w_init,options,X,y); 42 | pause; 43 | 44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n'); 45 | options.Method = 'pnewton0'; 46 | options.precFunc = []; 47 | options.HvFunc = @LogisticHv; 48 | minFunc(@LogisticLoss,w_init,options,X,y); 49 | pause; -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/isLegal.m: -------------------------------------------------------------------------------- 1 | function [legal] = isLegal(v) 2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0; -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/lbfgs.m: -------------------------------------------------------------------------------- 1 | function [d] = lbfgs(g,s,y,Hdiag) 2 | % BFGS Search Direction 3 | % 4 | % This function returns the (L-BFGS) approximate inverse Hessian, 5 | % multiplied by the gradient 6 | % 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS 9 | % 10 | % s - previous search directions (p by k) 11 | % y - previous step sizes (p by k) 12 | % g - gradient (p by 1) 13 | % Hdiag - value of initial Hessian diagonal elements (scalar) 14 | 15 | [p,k] = size(s); 16 | 17 | for i = 1:k 18 | ro(i,1) = 1/(y(:,i)'*s(:,i)); 19 | end 20 | 21 | q = zeros(p,k+1); 22 | r = zeros(p,k+1); 23 | al =zeros(k,1); 24 | be =zeros(k,1); 25 | 26 | q(:,k+1) = g; 27 | 28 | for i = k:-1:1 29 | al(i) = ro(i)*s(:,i)'*q(:,i+1); 30 | q(:,i) = q(:,i+1)-al(i)*y(:,i); 31 | end 32 | 33 | % Multiply by Initial Hessian 34 | r(:,1) = Hdiag*q(:,1); 35 | 36 | for i = 1:k 37 | be(i) = ro(i)*y(:,i)'*r(:,i); 38 | r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i)); 39 | end 40 | d=r(:,k+1); -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/lbfgsC.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/lbfgsC.mexa64 -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/lbfgsC.mexglx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/lbfgsC.mexglx -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/lbfgsC.mexmac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/lbfgsC.mexmac -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/lbfgsC.mexmaci: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/lbfgsC.mexmaci -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/lbfgsC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/lbfgsC.mexmaci64 -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/lbfgsC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/lbfgsC.mexw32 -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/lbfgsC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/lbfgsC.mexw64 -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/lbfgsUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | ys = y'*s; 3 | if ys > 1e-10 4 | numCorrections = size(old_dirs,2); 5 | if numCorrections < corrections 6 | % Full Update 7 | old_dirs(:,numCorrections+1) = s; 8 | old_stps(:,numCorrections+1) = y; 9 | else 10 | % Limited-Memory Update 11 | old_dirs = [old_dirs(:,2:corrections) s]; 12 | old_stps = [old_stps(:,2:corrections) y]; 13 | end 14 | 15 | % Update scale of initial Hessian approximation 16 | Hdiag = ys/(y'*y); 17 | else 18 | if debug 19 | fprintf('Skipping Update\n'); 20 | end 21 | end -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/logistic/LogisticDiagPrecond.m: -------------------------------------------------------------------------------- 1 | function [m] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | 9 | % Compute diagonals of Hessian 10 | sig = sig.*(1-sig); 11 | for i = 1:length(w) 12 | h(i,1) = (sig.*X(:,i))'*X(:,i); 13 | end 14 | 15 | % Apply preconditioner 16 | m = v./h; 17 | 18 | % Exact preconditioner 19 | %H = X'*diag(sig.*(1-sig))*X; 20 | %m = H\v; 21 | -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/logistic/LogisticHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will multiply Hessian by 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | Hv = X.'*(sig.*(1-sig).*(X*v)); 9 | -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/logistic/LogisticLoss.m: -------------------------------------------------------------------------------- 1 | function [nll,g,H,T] = LogisticLoss(w,X,y) 2 | % w(feature,1) 3 | % X(instance,feature) 4 | % y(instance,1) 5 | 6 | [n,p] = size(X); 7 | 8 | Xw = X*w; 9 | yXw = y.*Xw; 10 | 11 | nll = sum( ([zeros(n,1) -yXw])); 12 | 13 | if nargout > 1 14 | if nargout > 2 15 | sig = 1./(1+exp(-yXw)); 16 | g = -X.'*(y.*(1-sig)); 17 | else 18 | g = -X.'*(y./(1+exp(yXw))); 19 | end 20 | end 21 | 22 | if nargout > 2 23 | H = X.'*diag(sparse(sig.*(1-sig)))*X; 24 | end 25 | 26 | if nargout > 3 27 | T = zeros(p,p,p); 28 | for j1 = 1:p 29 | for j2 = 1:p 30 | for j3 = 1:p 31 | T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig)); 32 | end 33 | end 34 | end 35 | end -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/logistic/mexutil.c: -------------------------------------------------------------------------------- 1 | #include "mexutil.h" 2 | 3 | /* Functions to create uninitialized arrays. */ 4 | 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 6 | mxClassID class, mxComplexity ComplexFlag) 7 | { 8 | mxArray *a; 9 | int i, *dims1 = mxMalloc(ndim*sizeof(int)); 10 | size_t sz = 1; 11 | for(i=0;i 1 23 | beta = sqrt(max([gamma xi/sqrt(n^2-1) mu])); 24 | else 25 | beta = sqrt(max([gamma mu])); 26 | end 27 | 28 | for j = 1:n 29 | 30 | % Find q that results in Best Permutation with j 31 | [maxVal maxPos] = max(abs(diag(c(j:end,j:end)))); 32 | q = maxPos+j-1; 33 | 34 | % Permute d,c,l,a 35 | d([j q]) = d([q j]); 36 | perm([j q]) = perm([q j]); 37 | c([j q],:) = c([q j],:); 38 | c(:,[j q]) = c(:,[q j]); 39 | l([j q],:) = l([q j],:); 40 | l(:,[j q]) = l(:,[q j]); 41 | A([j q],:) = A([q j],:); 42 | A(:,[j q]) = A(:,[q j]); 43 | 44 | for s = 1:j-1 45 | l(j,s) = c(j,s)/d(s); 46 | end 47 | for i = j+1:n 48 | c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1)); 49 | end 50 | theta = 0; 51 | if j < n && j > 1 52 | theta = max(abs(c(j+1:n,j))); 53 | end 54 | d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]); 55 | if j < n 56 | for i = j+1:n 57 | c(i,i) = c(i,i) - (c(i,j)^2)/d(j); 58 | end 59 | end 60 | end -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/mcholC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/mcholC.mexmaci64 -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/mcholC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/mcholC.mexw32 -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/mcholC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/minFunc/mcholC.mexw64 -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/mcholinc.m: -------------------------------------------------------------------------------- 1 | function [R,tau] = mcholinc(H,verbose) 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd 3 | 4 | p = size(H,1); 5 | 6 | beta = norm(H,'fro'); 7 | if min(diag(H)) > 1e-12 8 | tau = 0; 9 | else 10 | if verbose 11 | fprintf('Small Value on Diagonal, Adjusting Hessian\n'); 12 | end 13 | tau = max(beta/2,1e-12); 14 | end 15 | while 1 16 | [R,posDef] = chol(H+tau*eye(p)); 17 | if posDef == 0 18 | break; 19 | else 20 | if verbose 21 | fprintf('Cholesky Failed, Adjusting Hessian\n'); 22 | end 23 | tau = max(2*tau,beta/2); 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/precondDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondDiag(r,D) 2 | y = D.*r; -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/precondTriu.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U) 2 | y = U \ (U' \ r); -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/precondTriuDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U,D) 2 | y = U \ (D .* (U' \ r)); -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/rosenbrock.m: -------------------------------------------------------------------------------- 1 | function [f, df, ddf, dddf] = rosenbrock(x); 2 | 3 | % rosenbrock.m This function returns the function value, partial derivatives 4 | % and Hessian of the (general dimension) rosenbrock function, given by: 5 | % 6 | % f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 7 | % 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1). 9 | % 10 | % Carl Edward Rasmussen, 2001-07-21. 11 | 12 | D = length(x); 13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2); 14 | 15 | if nargout > 1 16 | df = zeros(D, 1); 17 | df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1)); 18 | df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2); 19 | end 20 | 21 | if nargout > 2 22 | ddf = zeros(D,D); 23 | ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2); 24 | ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1); 25 | ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1); 26 | end 27 | 28 | if nargout > 3 29 | dddf = zeros(D,D,D); 30 | for d = 1:D 31 | if d > 1 32 | dddf(d,d-1,d-1) = -400; 33 | end 34 | if d < D 35 | dddf(d,d+1,d) = -400; 36 | dddf(d,d,d+1) = -400; 37 | dddf(d,d,d) = 2400*x(d); 38 | end 39 | end 40 | end -------------------------------------------------------------------------------- /Exercise2 Vectorization/minFunc/taylorModel.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = taylorModel(d,f,g,H,T) 2 | 3 | p = length(d); 4 | 5 | fd3 = 0; 6 | gd2 = zeros(p,1); 7 | Hd = zeros(p); 8 | for t1 = 1:p 9 | for t2 = 1:p 10 | for t3 = 1:p 11 | fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3); 12 | 13 | if nargout > 1 14 | gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2); 15 | end 16 | 17 | if nargout > 2 18 | Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1); 19 | end 20 | end 21 | 22 | end 23 | end 24 | 25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3; 26 | 27 | if nargout > 1 28 | g = g + H*d + (1/2)*gd2; 29 | end 30 | 31 | if nargout > 2 32 | H = H + Hd; 33 | end 34 | 35 | if any(abs(d) > 1e5) 36 | % We want the optimizer to stop if the solution is unbounded 37 | g = zeros(p,1); 38 | end -------------------------------------------------------------------------------- /Exercise2 Vectorization/sampleIMAGES.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/sampleIMAGES.m -------------------------------------------------------------------------------- /Exercise2 Vectorization/train.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/train.m -------------------------------------------------------------------------------- /Exercise2 Vectorization/weights.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise2 Vectorization/weights.jpg -------------------------------------------------------------------------------- /Exercise3 PCA in 2D/pcaData.txt: -------------------------------------------------------------------------------- 1 | -6.7644914e-01 -6.3089308e-01 -4.8915202e-01 -4.8005424e-01 -3.7842021e-01 -3.3788391e-01 -3.2023528e-01 -3.1108837e-01 -2.3145555e-01 -1.9623727e-01 -1.5678926e-01 -1.4900779e-01 -1.0861557e-01 -1.0506308e-01 -8.0899829e-02 -7.1157518e-02 -6.3251073e-02 -2.6007219e-02 -2.2553443e-02 -5.8489047e-03 -4.3935323e-03 -1.7309716e-03 7.8223728e-03 7.5386969e-02 8.6608396e-02 9.6406046e-02 1.0331683e-01 1.0531131e-01 1.1493296e-01 1.3052813e-01 1.6626253e-01 1.7901863e-01 1.9267343e-01 1.9414427e-01 1.9770003e-01 2.3043613e-01 3.2715844e-01 3.2737163e-01 3.2922364e-01 3.4869293e-01 3.7500704e-01 4.2830153e-01 4.5432503e-01 5.4422436e-01 6.6539963e-01 2 | -4.4722050e-01 -7.4778067e-01 -3.9074344e-01 -5.6036362e-01 -3.4291940e-01 -1.3832158e-01 1.2360939e-01 -3.3934986e-01 -8.2868433e-02 -2.4759514e-01 -1.0914760e-01 4.2243921e-01 -5.2329327e-02 -2.0126541e-01 1.3016657e-01 1.2293321e-01 -3.4787750e-01 -1.4584897e-01 -1.0559656e-01 -5.4200847e-02 1.6915422e-02 -1.1069762e-01 9.0859816e-02 1.5269096e-01 -9.4416463e-02 1.5116385e-01 -1.3540126e-01 2.4592698e-01 5.1087447e-02 2.4583340e-01 -5.9535372e-02 2.9704742e-01 1.0168115e-01 1.4258649e-01 1.0662592e-01 3.1698532e-01 6.1577841e-01 4.3911172e-01 2.7156501e-01 1.3572389e-01 3.1918066e-01 1.5122962e-01 3.4979047e-01 6.2316971e-01 5.2018811e-01 3 | -------------------------------------------------------------------------------- /Exercise3 PCA in 2D/pca_2d.asv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise3 PCA in 2D/pca_2d.asv -------------------------------------------------------------------------------- /Exercise3 PCA in 2D/pca_2d.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise3 PCA in 2D/pca_2d.m -------------------------------------------------------------------------------- /Exercise4 PCA and Whitening/pca_gen.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise4 PCA and Whitening/pca_gen.m -------------------------------------------------------------------------------- /Exercise4 PCA and Whitening/sampleIMAGESRAW.m: -------------------------------------------------------------------------------- 1 | function patches = sampleIMAGESRAW 2 | 3 | % sampleIMAGESRAW 4 | % Returns 10000 "raw" unwhitened patches 5 | 6 | 7 | load IMAGES_RAW; 8 | IMAGES = IMAGESr; 9 | 10 | patchSize = 12; 11 | numPatches = 10000; 12 | 13 | % Initialize patches with zeros. Your code will fill in this matrix--one 14 | % column per patch, 10000 columns. 15 | patches = zeros(patchSize*patchSize, numPatches); 16 | 17 | p = 0; 18 | for im = 1:size(IMAGES, 3) 19 | 20 | % Sample Patches 21 | numsamples = numPatches / size(IMAGES, 3); 22 | for s = 1:numsamples 23 | y = randi(size(IMAGES,1)-patchSize+1); 24 | x = randi(size(IMAGES,2)-patchSize+1); 25 | sample = IMAGES(y:y+patchSize-1, x:x+patchSize-1,im); 26 | p = p + 1; 27 | patches(:, p) = sample(:); 28 | end 29 | 30 | end 31 | 32 | end 33 | -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | % numgrad = computeNumericalGradient(J, theta) 3 | % theta: a vector of parameters 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the 5 | % function value at theta. 6 | 7 | % Initialize numgrad with zeros 8 | numgrad = zeros(size(theta)); 9 | 10 | %% ---------- YOUR CODE HERE -------------------------------------- 11 | % Instructions: 12 | % Implement numerical gradient checking, and return the result in numgrad. 13 | % (See Section 2.3 of the lecture notes.) 14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta. 16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 17 | % respect to theta(i). 18 | % 19 | % Hint: You will probably want to compute the elements of numgrad one at a 20 | % time. 21 | epsilon = 10^(-4); 22 | n = size(theta, 1); 23 | % J1 = zeros(size(numgrad)); 24 | % J2 = zeros(size(numgrad)); 25 | J1 = zeros(1, 1); 26 | J2 = zeros(1, 1); 27 | grad = zeros(size(numgrad)); 28 | temp1 = zeros(size(theta)); 29 | temp2 = zeros(size(theta)); 30 | 31 | for i = 1 : n 32 | % i 33 | temp1 = theta; 34 | temp2 = theta; 35 | temp1(i) = temp1(i) + epsilon; 36 | temp2(i) = temp2(i) - epsilon; 37 | [J1, grad] = J(temp1); 38 | [J2, grad] = J(temp2); 39 | numgrad(i) = (J1 - J2) / (2*epsilon); 40 | end 41 | 42 | 43 | % theta1 = theta + epsilon; 44 | % theta2 = theta - epsilon; 45 | % [J1(i), grad] = J(temp1); 46 | % [J2(i), grad] = J(temp2); 47 | 48 | 49 | 50 | 51 | %% --------------------------------------------------------------- 52 | end 53 | -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/loadMNISTImages.m: -------------------------------------------------------------------------------- 1 | function images = loadMNISTImages(filename) 2 | %loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing 3 | %the raw MNIST images 4 | 5 | fp = fopen(filename, 'rb'); 6 | assert(fp ~= -1, ['Could not open ', filename, '']); 7 | 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be'); 9 | assert(magic == 2051, ['Bad magic number in ', filename, '']); 10 | 11 | numImages = fread(fp, 1, 'int32', 0, 'ieee-be'); 12 | numRows = fread(fp, 1, 'int32', 0, 'ieee-be'); 13 | numCols = fread(fp, 1, 'int32', 0, 'ieee-be'); 14 | 15 | images = fread(fp, inf, 'unsigned char'); 16 | images = reshape(images, numCols, numRows, numImages); 17 | images = permute(images,[2 1 3]); 18 | 19 | fclose(fp); 20 | 21 | % Reshape to #pixels x #examples 22 | images = reshape(images, size(images, 1) * size(images, 2), size(images, 3)); 23 | % Convert to double and rescale to [0,1] 24 | images = double(images) / 255; 25 | 26 | end 27 | -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/loadMNISTLabels.m: -------------------------------------------------------------------------------- 1 | function labels = loadMNISTLabels(filename) 2 | %loadMNISTLabels returns a [number of MNIST images]x1 matrix containing 3 | %the labels for the MNIST images 4 | 5 | fp = fopen(filename, 'rb'); 6 | assert(fp ~= -1, ['Could not open ', filename, '']); 7 | 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be'); 9 | assert(magic == 2049, ['Bad magic number in ', filename, '']); 10 | 11 | numLabels = fread(fp, 1, 'int32', 0, 'ieee-be'); 12 | 13 | labels = fread(fp, inf, 'unsigned char'); 14 | 15 | assert(size(labels,1) == numLabels, 'Mismatch in label count'); 16 | 17 | fclose(fp); 18 | 19 | end 20 | -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/autoGrad.m: -------------------------------------------------------------------------------- 1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin) % [f,g] = autoGrad(x,useComplex,funObj,varargin) % % Numerically compute gradient of objective function from function values p = length(x); mu = 1e-150; if useComplex % Use Complex Differentials diff = zeros(p,1); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(diff)); g = imag(diff)/mu; else % Use Finite Differencing f = funObj(x,varargin{:}); mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*e_j,varargin{:}); end g = (diff-f)/mu; end if 0 % DEBUG CODE [fReal gReal] = funObj(x,varargin{:}); [fReal f] [gReal g] pause; end -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/autoHess.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin) % Numerically compute Hessian of objective function from gradient values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(diff),2); H = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g] = funObj(x,varargin{:}); diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f diff(:,j)] = funObj(x + mu*e_j,varargin{:}); end H = (diff-repmat(g,[1 p]))/mu; end % Make sure H is symmetric H = (H+H')/2; if 0 % DEBUG CODE [fReal gReal HReal] = funObj(x,varargin{:}); [fReal f] [gReal g] [HReal H] pause; end -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/autoHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin) 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:}) 3 | % based on gradient values 4 | 5 | if useComplex 6 | mu = 1e-150i; 7 | else 8 | mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v); 9 | end 10 | [f,finDif] = funObj(x + v*mu,varargin{:}); 11 | Hv = (finDif-g)/mu; -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/autoTensor.m: -------------------------------------------------------------------------------- 1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % Numerically compute Tensor of 3rd-derivatives of objective function from Hessian values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(g),2); H = mean(real(diff),3); T = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g,H] = funObj(x,varargin{:}); diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:}); end T = (diff-repmat(H,[1 1 p]))/mu; end -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/callOutput.m: -------------------------------------------------------------------------------- 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin) 2 | 3 | optimValues.iteration = i; 4 | optimValues.funccount = funEvals; 5 | optimValues.fval = f; 6 | optimValues.stepsize = t; 7 | optimValues.directionalderivative = gtd; 8 | optimValues.gradient = g; 9 | optimValues.searchdirection = d; 10 | optimValues.firstorderopt = opt; 11 | 12 | feval(outputFcn, x,optimValues,state,varargin{:}); -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/dampedUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | 3 | %B0 = eye(length(y))/Hdiag; 4 | S = old_dirs(:,2:end); 5 | Y = old_stps(:,2:end); 6 | k = size(Y,2); 7 | L = zeros(k); 8 | for j = 1:k 9 | for i = j+1:k 10 | L(i,j) = S(:,i)'*Y(:,j); 11 | end 12 | end 13 | D = diag(diag(S'*Y)); 14 | N = [S/Hdiag Y]; 15 | M = [S'*S/Hdiag L;L' -D]; 16 | 17 | ys = y'*s; 18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s 19 | sBs = s'*Bs; 20 | 21 | eta = .02; 22 | if ys < eta*sBs 23 | if debug 24 | fprintf('Damped Update\n'); 25 | end 26 | theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1); 27 | y = theta*y + (1-theta)*Bs; 28 | end 29 | 30 | 31 | numCorrections = size(old_dirs,2); 32 | if numCorrections < corrections 33 | % Full Update 34 | old_dirs(:,numCorrections+1) = s; 35 | old_stps(:,numCorrections+1) = y; 36 | else 37 | % Limited-Memory Update 38 | old_dirs = [old_dirs(:,2:corrections) s]; 39 | old_stps = [old_stps(:,2:corrections) y]; 40 | end 41 | 42 | % Update scale of initial Hessian approximation 43 | Hdiag = (y'*s)/(y'*y); -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/example_minFunc_LR.m: -------------------------------------------------------------------------------- 1 | clear all 2 | 3 | nInst = 500; 4 | nVars = 100; 5 | X = [ones(nInst,1) randn(nInst,nVars-1)]; 6 | w = randn(nVars,1); 7 | y = sign(X*w); 8 | flipInd = rand(nInst,1) > .9; 9 | y(flipInd) = -y(flipInd); 10 | 11 | w_init = zeros(nVars,1); 12 | funObj = @(w)LogisticLoss(w,X,y); 13 | 14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n'); 15 | options.Method = 'newton0'; 16 | minFunc(@LogisticLoss,w_init,options,X,y); 17 | pause; 18 | 19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n'); 20 | options.Method = 'pnewton0'; 21 | options.precFunc = @LogisticDiagPrecond; 22 | minFunc(@LogisticLoss,w_init,options,X,y); 23 | pause; 24 | 25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n'); 26 | options.Method = 'pnewton0'; 27 | options.precFunc = []; 28 | minFunc(@LogisticLoss,w_init,options,X,y); 29 | pause; 30 | 31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n'); 32 | options.Method = 'newton0'; 33 | options.HvFunc = @LogisticHv; 34 | minFunc(@LogisticLoss,w_init,options,X,y); 35 | pause; 36 | 37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n'); 38 | options.Method = 'pnewton0'; 39 | options.HvFunc = @LogisticHv; 40 | options.precFunc = @LogisticDiagPrecond; 41 | minFunc(@LogisticLoss,w_init,options,X,y); 42 | pause; 43 | 44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n'); 45 | options.Method = 'pnewton0'; 46 | options.precFunc = []; 47 | options.HvFunc = @LogisticHv; 48 | minFunc(@LogisticLoss,w_init,options,X,y); 49 | pause; -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/isLegal.m: -------------------------------------------------------------------------------- 1 | function [legal] = isLegal(v) 2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0; -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/lbfgs.m: -------------------------------------------------------------------------------- 1 | function [d] = lbfgs(g,s,y,Hdiag) 2 | % BFGS Search Direction 3 | % 4 | % This function returns the (L-BFGS) approximate inverse Hessian, 5 | % multiplied by the gradient 6 | % 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS 9 | % 10 | % s - previous search directions (p by k) 11 | % y - previous step sizes (p by k) 12 | % g - gradient (p by 1) 13 | % Hdiag - value of initial Hessian diagonal elements (scalar) 14 | 15 | [p,k] = size(s); 16 | 17 | for i = 1:k 18 | ro(i,1) = 1/(y(:,i)'*s(:,i)); 19 | end 20 | 21 | q = zeros(p,k+1); 22 | r = zeros(p,k+1); 23 | al =zeros(k,1); 24 | be =zeros(k,1); 25 | 26 | q(:,k+1) = g; 27 | 28 | for i = k:-1:1 29 | al(i) = ro(i)*s(:,i)'*q(:,i+1); 30 | q(:,i) = q(:,i+1)-al(i)*y(:,i); 31 | end 32 | 33 | % Multiply by Initial Hessian 34 | r(:,1) = Hdiag*q(:,1); 35 | 36 | for i = 1:k 37 | be(i) = ro(i)*y(:,i)'*r(:,i); 38 | r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i)); 39 | end 40 | d=r(:,k+1); -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/lbfgsC.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/lbfgsC.mexa64 -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/lbfgsC.mexglx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/lbfgsC.mexglx -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/lbfgsC.mexmac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/lbfgsC.mexmac -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/lbfgsC.mexmaci: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/lbfgsC.mexmaci -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/lbfgsC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/lbfgsC.mexmaci64 -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/lbfgsC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/lbfgsC.mexw32 -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/lbfgsC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/lbfgsC.mexw64 -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/lbfgsUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | ys = y'*s; 3 | if ys > 1e-10 4 | numCorrections = size(old_dirs,2); 5 | if numCorrections < corrections 6 | % Full Update 7 | old_dirs(:,numCorrections+1) = s; 8 | old_stps(:,numCorrections+1) = y; 9 | else 10 | % Limited-Memory Update 11 | old_dirs = [old_dirs(:,2:corrections) s]; 12 | old_stps = [old_stps(:,2:corrections) y]; 13 | end 14 | 15 | % Update scale of initial Hessian approximation 16 | Hdiag = ys/(y'*y); 17 | else 18 | if debug 19 | fprintf('Skipping Update\n'); 20 | end 21 | end -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/logistic/LogisticDiagPrecond.m: -------------------------------------------------------------------------------- 1 | function [m] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | 9 | % Compute diagonals of Hessian 10 | sig = sig.*(1-sig); 11 | for i = 1:length(w) 12 | h(i,1) = (sig.*X(:,i))'*X(:,i); 13 | end 14 | 15 | % Apply preconditioner 16 | m = v./h; 17 | 18 | % Exact preconditioner 19 | %H = X'*diag(sig.*(1-sig))*X; 20 | %m = H\v; 21 | -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/logistic/LogisticHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will multiply Hessian by 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | Hv = X.'*(sig.*(1-sig).*(X*v)); 9 | -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/logistic/LogisticLoss.m: -------------------------------------------------------------------------------- 1 | function [nll,g,H,T] = LogisticLoss(w,X,y) 2 | % w(feature,1) 3 | % X(instance,feature) 4 | % y(instance,1) 5 | 6 | [n,p] = size(X); 7 | 8 | Xw = X*w; 9 | yXw = y.*Xw; 10 | 11 | nll = sum( ([zeros(n,1) -yXw])); 12 | 13 | if nargout > 1 14 | if nargout > 2 15 | sig = 1./(1+exp(-yXw)); 16 | g = -X.'*(y.*(1-sig)); 17 | else 18 | g = -X.'*(y./(1+exp(yXw))); 19 | end 20 | end 21 | 22 | if nargout > 2 23 | H = X.'*diag(sparse(sig.*(1-sig)))*X; 24 | end 25 | 26 | if nargout > 3 27 | T = zeros(p,p,p); 28 | for j1 = 1:p 29 | for j2 = 1:p 30 | for j3 = 1:p 31 | T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig)); 32 | end 33 | end 34 | end 35 | end -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/logistic/mexutil.c: -------------------------------------------------------------------------------- 1 | #include "mexutil.h" 2 | 3 | /* Functions to create uninitialized arrays. */ 4 | 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 6 | mxClassID class, mxComplexity ComplexFlag) 7 | { 8 | mxArray *a; 9 | int i, *dims1 = mxMalloc(ndim*sizeof(int)); 10 | size_t sz = 1; 11 | for(i=0;i 1 23 | beta = sqrt(max([gamma xi/sqrt(n^2-1) mu])); 24 | else 25 | beta = sqrt(max([gamma mu])); 26 | end 27 | 28 | for j = 1:n 29 | 30 | % Find q that results in Best Permutation with j 31 | [maxVal maxPos] = max(abs(diag(c(j:end,j:end)))); 32 | q = maxPos+j-1; 33 | 34 | % Permute d,c,l,a 35 | d([j q]) = d([q j]); 36 | perm([j q]) = perm([q j]); 37 | c([j q],:) = c([q j],:); 38 | c(:,[j q]) = c(:,[q j]); 39 | l([j q],:) = l([q j],:); 40 | l(:,[j q]) = l(:,[q j]); 41 | A([j q],:) = A([q j],:); 42 | A(:,[j q]) = A(:,[q j]); 43 | 44 | for s = 1:j-1 45 | l(j,s) = c(j,s)/d(s); 46 | end 47 | for i = j+1:n 48 | c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1)); 49 | end 50 | theta = 0; 51 | if j < n && j > 1 52 | theta = max(abs(c(j+1:n,j))); 53 | end 54 | d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]); 55 | if j < n 56 | for i = j+1:n 57 | c(i,i) = c(i,i) - (c(i,j)^2)/d(j); 58 | end 59 | end 60 | end -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/mcholC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/mcholC.mexmaci64 -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/mcholC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/mcholC.mexw32 -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/mcholC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/minFunc/mcholC.mexw64 -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/mcholinc.m: -------------------------------------------------------------------------------- 1 | function [R,tau] = mcholinc(H,verbose) 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd 3 | 4 | p = size(H,1); 5 | 6 | beta = norm(H,'fro'); 7 | if min(diag(H)) > 1e-12 8 | tau = 0; 9 | else 10 | if verbose 11 | fprintf('Small Value on Diagonal, Adjusting Hessian\n'); 12 | end 13 | tau = max(beta/2,1e-12); 14 | end 15 | while 1 16 | [R,posDef] = chol(H+tau*eye(p)); 17 | if posDef == 0 18 | break; 19 | else 20 | if verbose 21 | fprintf('Cholesky Failed, Adjusting Hessian\n'); 22 | end 23 | tau = max(2*tau,beta/2); 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/precondDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondDiag(r,D) 2 | y = D.*r; -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/precondTriu.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U) 2 | y = U \ (U' \ r); -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/precondTriuDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U,D) 2 | y = U \ (D .* (U' \ r)); -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/rosenbrock.m: -------------------------------------------------------------------------------- 1 | function [f, df, ddf, dddf] = rosenbrock(x); 2 | 3 | % rosenbrock.m This function returns the function value, partial derivatives 4 | % and Hessian of the (general dimension) rosenbrock function, given by: 5 | % 6 | % f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 7 | % 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1). 9 | % 10 | % Carl Edward Rasmussen, 2001-07-21. 11 | 12 | D = length(x); 13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2); 14 | 15 | if nargout > 1 16 | df = zeros(D, 1); 17 | df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1)); 18 | df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2); 19 | end 20 | 21 | if nargout > 2 22 | ddf = zeros(D,D); 23 | ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2); 24 | ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1); 25 | ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1); 26 | end 27 | 28 | if nargout > 3 29 | dddf = zeros(D,D,D); 30 | for d = 1:D 31 | if d > 1 32 | dddf(d,d-1,d-1) = -400; 33 | end 34 | if d < D 35 | dddf(d,d+1,d) = -400; 36 | dddf(d,d,d+1) = -400; 37 | dddf(d,d,d) = 2400*x(d); 38 | end 39 | end 40 | end -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/minFunc/taylorModel.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = taylorModel(d,f,g,H,T) 2 | 3 | p = length(d); 4 | 5 | fd3 = 0; 6 | gd2 = zeros(p,1); 7 | Hd = zeros(p); 8 | for t1 = 1:p 9 | for t2 = 1:p 10 | for t3 = 1:p 11 | fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3); 12 | 13 | if nargout > 1 14 | gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2); 15 | end 16 | 17 | if nargout > 2 18 | Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1); 19 | end 20 | end 21 | 22 | end 23 | end 24 | 25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3; 26 | 27 | if nargout > 1 28 | g = g + H*d + (1/2)*gd2; 29 | end 30 | 31 | if nargout > 2 32 | H = H + Hd; 33 | end 34 | 35 | if any(abs(d) > 1e5) 36 | % We want the optimizer to stop if the solution is unbounded 37 | g = zeros(p,1); 38 | end -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/softmaxCost.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/softmaxCost.m -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/softmaxPredict.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise5 Softmax Regression/softmaxPredict.m -------------------------------------------------------------------------------- /Exercise5 Softmax Regression/softmaxTrain.m: -------------------------------------------------------------------------------- 1 | function [softmaxModel] = softmaxTrain(inputSize, numClasses, lambda, inputData, labels, options) 2 | %softmaxTrain Train a softmax model with the given parameters on the given 3 | % data. Returns softmaxOptTheta, a vector containing the trained parameters 4 | % for the model. 5 | % 6 | % inputSize: the size of an input vector x^(i) 7 | % numClasses: the number of classes 8 | % lambda: weight decay parameter 9 | % inputData: an N by M matrix containing the input data, such that 10 | % inputData(:, c) is the cth input 11 | % labels: M by 1 matrix containing the class labels for the 12 | % corresponding inputs. labels(c) is the class label for 13 | % the cth input 14 | % options (optional): options 15 | % options.maxIter: number of iterations to train for 16 | 17 | if ~exist('options', 'var') 18 | options = struct; 19 | end 20 | 21 | if ~isfield(options, 'maxIter') 22 | options.maxIter = 400; 23 | end 24 | 25 | % initialize parameters 26 | theta = 0.005 * randn(numClasses * inputSize, 1); 27 | 28 | % Use minFunc to minimize the function 29 | addpath minFunc/ 30 | options.Method = 'lbfgs'; % Here, we use L-BFGS to optimize our cost 31 | % function. Generally, for minFunc to work, you 32 | % need a function pointer with two outputs: the 33 | % function value and the gradient. In our problem, 34 | % softmaxCost.m satisfies this. 35 | minFuncOptions.display = 'on'; 36 | 37 | [softmaxOptTheta, cost] = minFunc( @(p) softmaxCost(p, ... 38 | numClasses, inputSize, lambda, ... 39 | inputData, labels), ... 40 | theta, options); 41 | 42 | % Fold softmaxOptTheta into a nicer format 43 | softmaxModel.optTheta = reshape(softmaxOptTheta, numClasses, inputSize); 44 | softmaxModel.inputSize = inputSize; 45 | softmaxModel.numClasses = numClasses; 46 | 47 | end 48 | -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | % numgrad = computeNumericalGradient(J, theta) 3 | % theta: a vector of parameters 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the 5 | % function value at theta. 6 | 7 | % Initialize numgrad with zeros 8 | numgrad = zeros(size(theta)); 9 | 10 | %% ---------- YOUR CODE HERE -------------------------------------- 11 | % Instructions: 12 | % Implement numerical gradient checking, and return the result in numgrad. 13 | % (See Section 2.3 of the lecture notes.) 14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta. 16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 17 | % respect to theta(i). 18 | % 19 | % Hint: You will probably want to compute the elements of numgrad one at a 20 | % time. 21 | epsilon = 10^(-4); 22 | n = size(theta, 1); 23 | % J1 = zeros(size(numgrad)); 24 | % J2 = zeros(size(numgrad)); 25 | J1 = zeros(1, 1); 26 | J2 = zeros(1, 1); 27 | grad = zeros(size(numgrad)); 28 | temp1 = zeros(size(theta)); 29 | temp2 = zeros(size(theta)); 30 | 31 | for i = 1 : n 32 | temp1 = theta; 33 | temp2 = theta; 34 | temp1(i) = temp1(i) + epsilon; 35 | temp2(i) = temp2(i) - epsilon; 36 | [J1, grad] = J(temp1); 37 | [J2, grad] = J(temp2); 38 | numgrad(i) = (J1 - J2) / (2*epsilon); 39 | end 40 | 41 | 42 | % theta1 = theta + epsilon; 43 | % theta2 = theta - epsilon; 44 | % [J1(i), grad] = J(temp1); 45 | % [J2(i), grad] = J(temp2); 46 | 47 | 48 | 49 | 50 | %% --------------------------------------------------------------- 51 | end 52 | -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/feedForwardAutoencoder.m: -------------------------------------------------------------------------------- 1 | function [activation] = feedForwardAutoencoder(theta, hiddenSize, visibleSize, data) 2 | 3 | % theta: trained weights from the autoencoder 4 | % visibleSize: the number of input units (probably 64) 5 | % hiddenSize: the number of hidden units (probably 25) 6 | % data: Our matrix containing the training data as columns. So, data(:,i) is the i-th training example. 7 | 8 | % We first convert theta to the (W1, W2, b1, b2) matrix/vector format, so that this 9 | % follows the notation convention of the lecture notes. 10 | 11 | W1 = reshape(theta(1:hiddenSize*visibleSize), hiddenSize, visibleSize); 12 | b1 = theta(2*hiddenSize*visibleSize+1:2*hiddenSize*visibleSize+hiddenSize); 13 | 14 | %% ---------- YOUR CODE HERE -------------------------------------- 15 | % Instructions: Compute the activation of the hidden layer for the Sparse Autoencoder. 16 | m = size(data, 2); 17 | z2 = W1 * data + repmat(b1,1,m); 18 | a2 = sigmoid(z2); 19 | activation = a2; 20 | %------------------------------------------------------------------- 21 | 22 | end 23 | 24 | %------------------------------------------------------------------- 25 | % Here's an implementation of the sigmoid function, which you may find useful 26 | % in your computation of the costs and the gradients. This inputs a (row or 27 | % column) vector (say (z1, z2, z3)) and returns (f(z1), f(z2), f(z3)). 28 | 29 | function sigm = sigmoid(x) 30 | sigm = 1 ./ (1 + exp(-x)); 31 | end 32 | -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/initializeParameters.m: -------------------------------------------------------------------------------- 1 | function theta = initializeParameters(hiddenSize, visibleSize) 2 | 3 | %% Initialize parameters randomly based on layer sizes. 4 | r = sqrt(6) / sqrt(hiddenSize+visibleSize+1); % we'll choose weights uniformly from the interval [-r, r] 5 | W1 = rand(hiddenSize, visibleSize) * 2 * r - r; 6 | W2 = rand(visibleSize, hiddenSize) * 2 * r - r; 7 | 8 | b1 = zeros(hiddenSize, 1); 9 | b2 = zeros(visibleSize, 1); 10 | 11 | % Convert weights and bias gradients to the vector form. 12 | % This step will "unroll" (flatten and concatenate together) all 13 | % your parameters into a vector, which can then be used with minFunc. 14 | theta = [W1(:) ; W2(:) ; b1(:) ; b2(:)]; 15 | 16 | end 17 | 18 | -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/loadMNISTImages.m: -------------------------------------------------------------------------------- 1 | function images = loadMNISTImages(filename) 2 | %loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing 3 | %the raw MNIST images 4 | 5 | fp = fopen(filename, 'rb'); 6 | assert(fp ~= -1, ['Could not open ', filename, '']); 7 | 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be'); 9 | assert(magic == 2051, ['Bad magic number in ', filename, '']); 10 | 11 | numImages = fread(fp, 1, 'int32', 0, 'ieee-be'); 12 | numRows = fread(fp, 1, 'int32', 0, 'ieee-be'); 13 | numCols = fread(fp, 1, 'int32', 0, 'ieee-be'); 14 | 15 | images = fread(fp, inf, 'unsigned char'); 16 | images = reshape(images, numCols, numRows, numImages); 17 | images = permute(images,[2 1 3]); 18 | 19 | fclose(fp); 20 | 21 | % Reshape to #pixels x #examples 22 | images = reshape(images, size(images, 1) * size(images, 2), size(images, 3)); 23 | % Convert to double and rescale to [0,1] 24 | images = double(images) / 255; 25 | 26 | end 27 | -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/loadMNISTLabels.m: -------------------------------------------------------------------------------- 1 | function labels = loadMNISTLabels(filename) 2 | %loadMNISTLabels returns a [number of MNIST images]x1 matrix containing 3 | %the labels for the MNIST images 4 | 5 | fp = fopen(filename, 'rb'); 6 | assert(fp ~= -1, ['Could not open ', filename, '']); 7 | 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be'); 9 | assert(magic == 2049, ['Bad magic number in ', filename, '']); 10 | 11 | numLabels = fread(fp, 1, 'int32', 0, 'ieee-be'); 12 | 13 | labels = fread(fp, inf, 'unsigned char'); 14 | 15 | assert(size(labels,1) == numLabels, 'Mismatch in label count'); 16 | 17 | fclose(fp); 18 | 19 | end 20 | -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/autoGrad.m: -------------------------------------------------------------------------------- 1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin) % [f,g] = autoGrad(x,useComplex,funObj,varargin) % % Numerically compute gradient of objective function from function values p = length(x); mu = 1e-150; if useComplex % Use Complex Differentials diff = zeros(p,1); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(diff)); g = imag(diff)/mu; else % Use Finite Differencing f = funObj(x,varargin{:}); mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*e_j,varargin{:}); end g = (diff-f)/mu; end if 0 % DEBUG CODE [fReal gReal] = funObj(x,varargin{:}); [fReal f] [gReal g] pause; end -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/autoHess.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin) % Numerically compute Hessian of objective function from gradient values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(diff),2); H = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g] = funObj(x,varargin{:}); diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f diff(:,j)] = funObj(x + mu*e_j,varargin{:}); end H = (diff-repmat(g,[1 p]))/mu; end % Make sure H is symmetric H = (H+H')/2; if 0 % DEBUG CODE [fReal gReal HReal] = funObj(x,varargin{:}); [fReal f] [gReal g] [HReal H] pause; end -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/autoHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin) 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:}) 3 | % based on gradient values 4 | 5 | if useComplex 6 | mu = 1e-150i; 7 | else 8 | mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v); 9 | end 10 | [f,finDif] = funObj(x + v*mu,varargin{:}); 11 | Hv = (finDif-g)/mu; -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/autoTensor.m: -------------------------------------------------------------------------------- 1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % Numerically compute Tensor of 3rd-derivatives of objective function from Hessian values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(g),2); H = mean(real(diff),3); T = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g,H] = funObj(x,varargin{:}); diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:}); end T = (diff-repmat(H,[1 1 p]))/mu; end -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/callOutput.m: -------------------------------------------------------------------------------- 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin) 2 | 3 | optimValues.iteration = i; 4 | optimValues.funccount = funEvals; 5 | optimValues.fval = f; 6 | optimValues.stepsize = t; 7 | optimValues.directionalderivative = gtd; 8 | optimValues.gradient = g; 9 | optimValues.searchdirection = d; 10 | optimValues.firstorderopt = opt; 11 | 12 | feval(outputFcn, x,optimValues,state,varargin{:}); -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/dampedUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | 3 | %B0 = eye(length(y))/Hdiag; 4 | S = old_dirs(:,2:end); 5 | Y = old_stps(:,2:end); 6 | k = size(Y,2); 7 | L = zeros(k); 8 | for j = 1:k 9 | for i = j+1:k 10 | L(i,j) = S(:,i)'*Y(:,j); 11 | end 12 | end 13 | D = diag(diag(S'*Y)); 14 | N = [S/Hdiag Y]; 15 | M = [S'*S/Hdiag L;L' -D]; 16 | 17 | ys = y'*s; 18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s 19 | sBs = s'*Bs; 20 | 21 | eta = .02; 22 | if ys < eta*sBs 23 | if debug 24 | fprintf('Damped Update\n'); 25 | end 26 | theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1); 27 | y = theta*y + (1-theta)*Bs; 28 | end 29 | 30 | 31 | numCorrections = size(old_dirs,2); 32 | if numCorrections < corrections 33 | % Full Update 34 | old_dirs(:,numCorrections+1) = s; 35 | old_stps(:,numCorrections+1) = y; 36 | else 37 | % Limited-Memory Update 38 | old_dirs = [old_dirs(:,2:corrections) s]; 39 | old_stps = [old_stps(:,2:corrections) y]; 40 | end 41 | 42 | % Update scale of initial Hessian approximation 43 | Hdiag = (y'*s)/(y'*y); -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/example_minFunc_LR.m: -------------------------------------------------------------------------------- 1 | clear all 2 | 3 | nInst = 500; 4 | nVars = 100; 5 | X = [ones(nInst,1) randn(nInst,nVars-1)]; 6 | w = randn(nVars,1); 7 | y = sign(X*w); 8 | flipInd = rand(nInst,1) > .9; 9 | y(flipInd) = -y(flipInd); 10 | 11 | w_init = zeros(nVars,1); 12 | funObj = @(w)LogisticLoss(w,X,y); 13 | 14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n'); 15 | options.Method = 'newton0'; 16 | minFunc(@LogisticLoss,w_init,options,X,y); 17 | pause; 18 | 19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n'); 20 | options.Method = 'pnewton0'; 21 | options.precFunc = @LogisticDiagPrecond; 22 | minFunc(@LogisticLoss,w_init,options,X,y); 23 | pause; 24 | 25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n'); 26 | options.Method = 'pnewton0'; 27 | options.precFunc = []; 28 | minFunc(@LogisticLoss,w_init,options,X,y); 29 | pause; 30 | 31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n'); 32 | options.Method = 'newton0'; 33 | options.HvFunc = @LogisticHv; 34 | minFunc(@LogisticLoss,w_init,options,X,y); 35 | pause; 36 | 37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n'); 38 | options.Method = 'pnewton0'; 39 | options.HvFunc = @LogisticHv; 40 | options.precFunc = @LogisticDiagPrecond; 41 | minFunc(@LogisticLoss,w_init,options,X,y); 42 | pause; 43 | 44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n'); 45 | options.Method = 'pnewton0'; 46 | options.precFunc = []; 47 | options.HvFunc = @LogisticHv; 48 | minFunc(@LogisticLoss,w_init,options,X,y); 49 | pause; -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/isLegal.m: -------------------------------------------------------------------------------- 1 | function [legal] = isLegal(v) 2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0; -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/lbfgs.m: -------------------------------------------------------------------------------- 1 | function [d] = lbfgs(g,s,y,Hdiag) 2 | % BFGS Search Direction 3 | % 4 | % This function returns the (L-BFGS) approximate inverse Hessian, 5 | % multiplied by the gradient 6 | % 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS 9 | % 10 | % s - previous search directions (p by k) 11 | % y - previous step sizes (p by k) 12 | % g - gradient (p by 1) 13 | % Hdiag - value of initial Hessian diagonal elements (scalar) 14 | 15 | [p,k] = size(s); 16 | 17 | for i = 1:k 18 | ro(i,1) = 1/(y(:,i)'*s(:,i)); 19 | end 20 | 21 | q = zeros(p,k+1); 22 | r = zeros(p,k+1); 23 | al =zeros(k,1); 24 | be =zeros(k,1); 25 | 26 | q(:,k+1) = g; 27 | 28 | for i = k:-1:1 29 | al(i) = ro(i)*s(:,i)'*q(:,i+1); 30 | q(:,i) = q(:,i+1)-al(i)*y(:,i); 31 | end 32 | 33 | % Multiply by Initial Hessian 34 | r(:,1) = Hdiag*q(:,1); 35 | 36 | for i = 1:k 37 | be(i) = ro(i)*y(:,i)'*r(:,i); 38 | r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i)); 39 | end 40 | d=r(:,k+1); -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexa64 -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexglx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexglx -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexmac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexmac -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexmaci: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexmaci -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexmaci64 -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexw32 -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/lbfgsC.mexw64 -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/lbfgsUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | ys = y'*s; 3 | if ys > 1e-10 4 | numCorrections = size(old_dirs,2); 5 | if numCorrections < corrections 6 | % Full Update 7 | old_dirs(:,numCorrections+1) = s; 8 | old_stps(:,numCorrections+1) = y; 9 | else 10 | % Limited-Memory Update 11 | old_dirs = [old_dirs(:,2:corrections) s]; 12 | old_stps = [old_stps(:,2:corrections) y]; 13 | end 14 | 15 | % Update scale of initial Hessian approximation 16 | Hdiag = ys/(y'*y); 17 | else 18 | if debug 19 | fprintf('Skipping Update\n'); 20 | end 21 | end -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/logistic/LogisticDiagPrecond.m: -------------------------------------------------------------------------------- 1 | function [m] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | 9 | % Compute diagonals of Hessian 10 | sig = sig.*(1-sig); 11 | for i = 1:length(w) 12 | h(i,1) = (sig.*X(:,i))'*X(:,i); 13 | end 14 | 15 | % Apply preconditioner 16 | m = v./h; 17 | 18 | % Exact preconditioner 19 | %H = X'*diag(sig.*(1-sig))*X; 20 | %m = H\v; 21 | -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/logistic/LogisticHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will multiply Hessian by 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | Hv = X.'*(sig.*(1-sig).*(X*v)); 9 | -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/logistic/LogisticLoss.m: -------------------------------------------------------------------------------- 1 | function [nll,g,H,T] = LogisticLoss(w,X,y) 2 | % w(feature,1) 3 | % X(instance,feature) 4 | % y(instance,1) 5 | 6 | [n,p] = size(X); 7 | 8 | Xw = X*w; 9 | yXw = y.*Xw; 10 | 11 | nll = sum( ([zeros(n,1) -yXw])); 12 | 13 | if nargout > 1 14 | if nargout > 2 15 | sig = 1./(1+exp(-yXw)); 16 | g = -X.'*(y.*(1-sig)); 17 | else 18 | g = -X.'*(y./(1+exp(yXw))); 19 | end 20 | end 21 | 22 | if nargout > 2 23 | H = X.'*diag(sparse(sig.*(1-sig)))*X; 24 | end 25 | 26 | if nargout > 3 27 | T = zeros(p,p,p); 28 | for j1 = 1:p 29 | for j2 = 1:p 30 | for j3 = 1:p 31 | T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig)); 32 | end 33 | end 34 | end 35 | end -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/logistic/mexutil.c: -------------------------------------------------------------------------------- 1 | #include "mexutil.h" 2 | 3 | /* Functions to create uninitialized arrays. */ 4 | 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 6 | mxClassID class, mxComplexity ComplexFlag) 7 | { 8 | mxArray *a; 9 | int i, *dims1 = mxMalloc(ndim*sizeof(int)); 10 | size_t sz = 1; 11 | for(i=0;i 1 23 | beta = sqrt(max([gamma xi/sqrt(n^2-1) mu])); 24 | else 25 | beta = sqrt(max([gamma mu])); 26 | end 27 | 28 | for j = 1:n 29 | 30 | % Find q that results in Best Permutation with j 31 | [maxVal maxPos] = max(abs(diag(c(j:end,j:end)))); 32 | q = maxPos+j-1; 33 | 34 | % Permute d,c,l,a 35 | d([j q]) = d([q j]); 36 | perm([j q]) = perm([q j]); 37 | c([j q],:) = c([q j],:); 38 | c(:,[j q]) = c(:,[q j]); 39 | l([j q],:) = l([q j],:); 40 | l(:,[j q]) = l(:,[q j]); 41 | A([j q],:) = A([q j],:); 42 | A(:,[j q]) = A(:,[q j]); 43 | 44 | for s = 1:j-1 45 | l(j,s) = c(j,s)/d(s); 46 | end 47 | for i = j+1:n 48 | c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1)); 49 | end 50 | theta = 0; 51 | if j < n && j > 1 52 | theta = max(abs(c(j+1:n,j))); 53 | end 54 | d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]); 55 | if j < n 56 | for i = j+1:n 57 | c(i,i) = c(i,i) - (c(i,j)^2)/d(j); 58 | end 59 | end 60 | end -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/mcholC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/mcholC.mexmaci64 -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/mcholC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/mcholC.mexw32 -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/mcholC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/minFunc/mcholC.mexw64 -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/mcholinc.m: -------------------------------------------------------------------------------- 1 | function [R,tau] = mcholinc(H,verbose) 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd 3 | 4 | p = size(H,1); 5 | 6 | beta = norm(H,'fro'); 7 | if min(diag(H)) > 1e-12 8 | tau = 0; 9 | else 10 | if verbose 11 | fprintf('Small Value on Diagonal, Adjusting Hessian\n'); 12 | end 13 | tau = max(beta/2,1e-12); 14 | end 15 | while 1 16 | [R,posDef] = chol(H+tau*eye(p)); 17 | if posDef == 0 18 | break; 19 | else 20 | if verbose 21 | fprintf('Cholesky Failed, Adjusting Hessian\n'); 22 | end 23 | tau = max(2*tau,beta/2); 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/precondDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondDiag(r,D) 2 | y = D.*r; -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/precondTriu.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U) 2 | y = U \ (U' \ r); -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/precondTriuDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U,D) 2 | y = U \ (D .* (U' \ r)); -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/rosenbrock.m: -------------------------------------------------------------------------------- 1 | function [f, df, ddf, dddf] = rosenbrock(x); 2 | 3 | % rosenbrock.m This function returns the function value, partial derivatives 4 | % and Hessian of the (general dimension) rosenbrock function, given by: 5 | % 6 | % f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 7 | % 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1). 9 | % 10 | % Carl Edward Rasmussen, 2001-07-21. 11 | 12 | D = length(x); 13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2); 14 | 15 | if nargout > 1 16 | df = zeros(D, 1); 17 | df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1)); 18 | df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2); 19 | end 20 | 21 | if nargout > 2 22 | ddf = zeros(D,D); 23 | ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2); 24 | ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1); 25 | ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1); 26 | end 27 | 28 | if nargout > 3 29 | dddf = zeros(D,D,D); 30 | for d = 1:D 31 | if d > 1 32 | dddf(d,d-1,d-1) = -400; 33 | end 34 | if d < D 35 | dddf(d,d+1,d) = -400; 36 | dddf(d,d,d+1) = -400; 37 | dddf(d,d,d) = 2400*x(d); 38 | end 39 | end 40 | end -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/minFunc/taylorModel.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = taylorModel(d,f,g,H,T) 2 | 3 | p = length(d); 4 | 5 | fd3 = 0; 6 | gd2 = zeros(p,1); 7 | Hd = zeros(p); 8 | for t1 = 1:p 9 | for t2 = 1:p 10 | for t3 = 1:p 11 | fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3); 12 | 13 | if nargout > 1 14 | gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2); 15 | end 16 | 17 | if nargout > 2 18 | Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1); 19 | end 20 | end 21 | 22 | end 23 | end 24 | 25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3; 26 | 27 | if nargout > 1 28 | g = g + H*d + (1/2)*gd2; 29 | end 30 | 31 | if nargout > 2 32 | H = H + Hd; 33 | end 34 | 35 | if any(abs(d) > 1e5) 36 | % We want the optimizer to stop if the solution is unbounded 37 | g = zeros(p,1); 38 | end -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/softmaxCost.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/softmaxCost.m -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/softmaxPredict.m: -------------------------------------------------------------------------------- 1 | function [pred] = softmaxPredict(softmaxModel, data) 2 | 3 | % softmaxModel - model trained using softmaxTrain 4 | % data - the N x M input matrix, where each column data(:, i) corresponds to 5 | % a single test set 6 | % 7 | % Your code should produce the prediction matrix 8 | % pred, where pred(i) is argmax_c P(y(c) | x(i)). 9 | 10 | % Unroll the parameters from theta 11 | theta = softmaxModel.optTheta; % this provides a numClasses x inputSize matrix 12 | pred = zeros(1, size(data, 2)); 13 | 14 | %% ---------- YOUR CODE HERE -------------------------------------- 15 | % Instructions: Compute pred using theta assuming that the labels start 16 | % from 1. 17 | numClasses = softmaxModel.numClasses; 18 | inputSize = softmaxModel.inputSize; 19 | theta = reshape(theta, numClasses, inputSize); 20 | 21 | M = exp(theta * data); 22 | M = bsxfun(@rdivide, M, sum(M)); 23 | 24 | [p,pred] = max(M, [], 1); 25 | 26 | 27 | 28 | 29 | 30 | % --------------------------------------------------------------------- 31 | 32 | end 33 | 34 | -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/stlExercise.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise6 Self-Taught Learning/stlExercise.m -------------------------------------------------------------------------------- /Exercise6 Self-Taught Learning/testMemory.m: -------------------------------------------------------------------------------- 1 | function testMemory() 2 | y = zeros(1011,10000); 3 | memory; -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/checkStackedAECost.m: -------------------------------------------------------------------------------- 1 | function [] = checkStackedAECost() 2 | 3 | % Check the gradients for the stacked autoencoder 4 | % 5 | % In general, we recommend that the creation of such files for checking 6 | % gradients when you write new cost functions. 7 | % 8 | 9 | %% Setup random data / small model 10 | inputSize = 4; 11 | hiddenSize = 5; 12 | lambda = 0.01; 13 | data = randn(inputSize, 5); 14 | labels = [ 1 2 1 2 1 ]; 15 | numClasses = 2; 16 | 17 | stack = cell(2,1); 18 | stack{1}.w = 0.1 * randn(3, inputSize); 19 | stack{1}.b = zeros(3, 1); 20 | stack{2}.w = 0.1 * randn(hiddenSize, 3); 21 | stack{2}.b = zeros(hiddenSize, 1); 22 | softmaxTheta = 0.005 * randn(hiddenSize * numClasses, 1); 23 | 24 | [stackparams, netconfig] = stack2params(stack); 25 | stackedAETheta = [ softmaxTheta ; stackparams ]; 26 | 27 | 28 | [cost, grad] = stackedAECost(stackedAETheta, inputSize, hiddenSize, ... 29 | numClasses, netconfig, ... 30 | lambda, data, labels); 31 | 32 | % Check that the numerical and analytic gradients are the same 33 | numgrad = computeNumericalGradient( @(x) stackedAECost(x, inputSize, ... 34 | hiddenSize, numClasses, netconfig, ... 35 | lambda, data, labels), ... 36 | stackedAETheta); 37 | 38 | % Use this to visually compare the gradients side by side 39 | disp([numgrad grad]); 40 | 41 | % Compare numerically computed gradients with the ones obtained from backpropagation 42 | disp('Norm between numerical and analytical gradient (should be less than 1e-9)'); 43 | diff = norm(numgrad-grad)/norm(numgrad+grad); 44 | disp(diff); % Should be small. In our implementation, these values are 45 | % usually less than 1e-9. 46 | 47 | % When you got this working, Congratulations!!! 48 | 49 | 50 | -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | % numgrad = computeNumericalGradient(J, theta) 3 | % theta: a vector of parameters 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the 5 | % function value at theta. 6 | 7 | % Initialize numgrad with zeros 8 | numgrad = zeros(size(theta)); 9 | 10 | %% ---------- YOUR CODE HERE -------------------------------------- 11 | % Instructions: 12 | % Implement numerical gradient checking, and return the result in numgrad. 13 | % (See Section 2.3 of the lecture notes.) 14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta. 16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 17 | % respect to theta(i). 18 | % 19 | % Hint: You will probably want to compute the elements of numgrad one at a 20 | % time. 21 | epsilon = 10^(-4); 22 | n = size(theta, 1); 23 | % J1 = zeros(size(numgrad)); 24 | % J2 = zeros(size(numgrad)); 25 | J1 = zeros(1, 1); 26 | J2 = zeros(1, 1); 27 | grad = zeros(size(numgrad)); 28 | temp1 = zeros(size(theta)); 29 | temp2 = zeros(size(theta)); 30 | 31 | for i = 1 : n 32 | % i 33 | temp1 = theta; 34 | temp2 = theta; 35 | temp1(i) = temp1(i) + epsilon; 36 | temp2(i) = temp2(i) - epsilon; 37 | [J1, grad] = J(temp1); 38 | [J2, grad] = J(temp2); 39 | numgrad(i) = (J1 - J2) / (2*epsilon); 40 | end 41 | 42 | 43 | % theta1 = theta + epsilon; 44 | % theta2 = theta - epsilon; 45 | % [J1(i), grad] = J(temp1); 46 | % [J2(i), grad] = J(temp2); 47 | 48 | 49 | 50 | 51 | %% --------------------------------------------------------------- 52 | end 53 | -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/feedForwardAutoencoder.m: -------------------------------------------------------------------------------- 1 | function [activation] = feedForwardAutoencoder(theta, hiddenSize, visibleSize, data) 2 | 3 | % theta: trained weights from the autoencoder 4 | % visibleSize: the number of input units (probably 64) 5 | % hiddenSize: the number of hidden units (probably 25) 6 | % data: Our matrix containing the training data as columns. So, data(:,i) is the i-th training example. 7 | 8 | % We first convert theta to the (W1, W2, b1, b2) matrix/vector format, so that this 9 | % follows the notation convention of the lecture notes. 10 | 11 | W1 = reshape(theta(1:hiddenSize*visibleSize), hiddenSize, visibleSize); 12 | b1 = theta(2*hiddenSize*visibleSize+1:2*hiddenSize*visibleSize+hiddenSize); 13 | 14 | %% ---------- YOUR CODE HERE -------------------------------------- 15 | % Instructions: Compute the activation of the hidden layer for the Sparse Autoencoder. 16 | m = size(data, 2); 17 | z2 = W1 * data + repmat(b1,1,m); 18 | a2 = sigmoid(z2); 19 | activation = a2; 20 | %------------------------------------------------------------------- 21 | 22 | end 23 | 24 | %------------------------------------------------------------------- 25 | % Here's an implementation of the sigmoid function, which you may find useful 26 | % in your computation of the costs and the gradients. This inputs a (row or 27 | % column) vector (say (z1, z2, z3)) and returns (f(z1), f(z2), f(z3)). 28 | 29 | function sigm = sigmoid(x) 30 | sigm = 1 ./ (1 + exp(-x)); 31 | end 32 | -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/initializeParameters.m: -------------------------------------------------------------------------------- 1 | function theta = initializeParameters(hiddenSize, visibleSize) 2 | 3 | %% Initialize parameters randomly based on layer sizes. 4 | r = sqrt(6) / sqrt(hiddenSize+visibleSize+1); % we'll choose weights uniformly from the interval [-r, r] 5 | W1 = rand(hiddenSize, visibleSize) * 2 * r - r; 6 | W2 = rand(visibleSize, hiddenSize) * 2 * r - r; 7 | 8 | b1 = zeros(hiddenSize, 1); 9 | b2 = zeros(visibleSize, 1); 10 | 11 | % Convert weights and bias gradients to the vector form. 12 | % This step will "unroll" (flatten and concatenate together) all 13 | % your parameters into a vector, which can then be used with minFunc. 14 | theta = [W1(:) ; W2(:) ; b1(:) ; b2(:)]; 15 | 16 | end 17 | 18 | -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/loadMNISTImages.m: -------------------------------------------------------------------------------- 1 | function images = loadMNISTImages(filename) 2 | %loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing 3 | %the raw MNIST images 4 | 5 | fp = fopen(filename, 'rb'); 6 | assert(fp ~= -1, ['Could not open ', filename, '']); 7 | 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be'); 9 | assert(magic == 2051, ['Bad magic number in ', filename, '']); 10 | 11 | numImages = fread(fp, 1, 'int32', 0, 'ieee-be'); 12 | numRows = fread(fp, 1, 'int32', 0, 'ieee-be'); 13 | numCols = fread(fp, 1, 'int32', 0, 'ieee-be'); 14 | 15 | images = fread(fp, inf, 'unsigned char'); 16 | images = reshape(images, numCols, numRows, numImages); 17 | images = permute(images,[2 1 3]); 18 | 19 | fclose(fp); 20 | 21 | % Reshape to #pixels x #examples 22 | images = reshape(images, size(images, 1) * size(images, 2), size(images, 3)); 23 | % Convert to double and rescale to [0,1] 24 | images = double(images) / 255; 25 | 26 | end 27 | -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/loadMNISTLabels.m: -------------------------------------------------------------------------------- 1 | function labels = loadMNISTLabels(filename) 2 | %loadMNISTLabels returns a [number of MNIST images]x1 matrix containing 3 | %the labels for the MNIST images 4 | 5 | fp = fopen(filename, 'rb'); 6 | assert(fp ~= -1, ['Could not open ', filename, '']); 7 | 8 | magic = fread(fp, 1, 'int32', 0, 'ieee-be'); 9 | assert(magic == 2049, ['Bad magic number in ', filename, '']); 10 | 11 | numLabels = fread(fp, 1, 'int32', 0, 'ieee-be'); 12 | 13 | labels = fread(fp, inf, 'unsigned char'); 14 | 15 | assert(size(labels,1) == numLabels, 'Mismatch in label count'); 16 | 17 | fclose(fp); 18 | 19 | end 20 | -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/autoGrad.m: -------------------------------------------------------------------------------- 1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin) % [f,g] = autoGrad(x,useComplex,funObj,varargin) % % Numerically compute gradient of objective function from function values p = length(x); mu = 1e-150; if useComplex % Use Complex Differentials diff = zeros(p,1); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(diff)); g = imag(diff)/mu; else % Use Finite Differencing f = funObj(x,varargin{:}); mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*e_j,varargin{:}); end g = (diff-f)/mu; end if 0 % DEBUG CODE [fReal gReal] = funObj(x,varargin{:}); [fReal f] [gReal g] pause; end -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/autoHess.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin) % Numerically compute Hessian of objective function from gradient values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(diff),2); H = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g] = funObj(x,varargin{:}); diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f diff(:,j)] = funObj(x + mu*e_j,varargin{:}); end H = (diff-repmat(g,[1 p]))/mu; end % Make sure H is symmetric H = (H+H')/2; if 0 % DEBUG CODE [fReal gReal HReal] = funObj(x,varargin{:}); [fReal f] [gReal g] [HReal H] pause; end -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/autoHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin) 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:}) 3 | % based on gradient values 4 | 5 | if useComplex 6 | mu = 1e-150i; 7 | else 8 | mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v); 9 | end 10 | [f,finDif] = funObj(x + v*mu,varargin{:}); 11 | Hv = (finDif-g)/mu; -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/autoTensor.m: -------------------------------------------------------------------------------- 1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % Numerically compute Tensor of 3rd-derivatives of objective function from Hessian values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(g),2); H = mean(real(diff),3); T = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g,H] = funObj(x,varargin{:}); diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:}); end T = (diff-repmat(H,[1 1 p]))/mu; end -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/callOutput.m: -------------------------------------------------------------------------------- 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin) 2 | 3 | optimValues.iteration = i; 4 | optimValues.funccount = funEvals; 5 | optimValues.fval = f; 6 | optimValues.stepsize = t; 7 | optimValues.directionalderivative = gtd; 8 | optimValues.gradient = g; 9 | optimValues.searchdirection = d; 10 | optimValues.firstorderopt = opt; 11 | 12 | feval(outputFcn, x,optimValues,state,varargin{:}); -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/dampedUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | 3 | %B0 = eye(length(y))/Hdiag; 4 | S = old_dirs(:,2:end); 5 | Y = old_stps(:,2:end); 6 | k = size(Y,2); 7 | L = zeros(k); 8 | for j = 1:k 9 | for i = j+1:k 10 | L(i,j) = S(:,i)'*Y(:,j); 11 | end 12 | end 13 | D = diag(diag(S'*Y)); 14 | N = [S/Hdiag Y]; 15 | M = [S'*S/Hdiag L;L' -D]; 16 | 17 | ys = y'*s; 18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s 19 | sBs = s'*Bs; 20 | 21 | eta = .02; 22 | if ys < eta*sBs 23 | if debug 24 | fprintf('Damped Update\n'); 25 | end 26 | theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1); 27 | y = theta*y + (1-theta)*Bs; 28 | end 29 | 30 | 31 | numCorrections = size(old_dirs,2); 32 | if numCorrections < corrections 33 | % Full Update 34 | old_dirs(:,numCorrections+1) = s; 35 | old_stps(:,numCorrections+1) = y; 36 | else 37 | % Limited-Memory Update 38 | old_dirs = [old_dirs(:,2:corrections) s]; 39 | old_stps = [old_stps(:,2:corrections) y]; 40 | end 41 | 42 | % Update scale of initial Hessian approximation 43 | Hdiag = (y'*s)/(y'*y); -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/example_minFunc_LR.m: -------------------------------------------------------------------------------- 1 | clear all 2 | 3 | nInst = 500; 4 | nVars = 100; 5 | X = [ones(nInst,1) randn(nInst,nVars-1)]; 6 | w = randn(nVars,1); 7 | y = sign(X*w); 8 | flipInd = rand(nInst,1) > .9; 9 | y(flipInd) = -y(flipInd); 10 | 11 | w_init = zeros(nVars,1); 12 | funObj = @(w)LogisticLoss(w,X,y); 13 | 14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n'); 15 | options.Method = 'newton0'; 16 | minFunc(@LogisticLoss,w_init,options,X,y); 17 | pause; 18 | 19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n'); 20 | options.Method = 'pnewton0'; 21 | options.precFunc = @LogisticDiagPrecond; 22 | minFunc(@LogisticLoss,w_init,options,X,y); 23 | pause; 24 | 25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n'); 26 | options.Method = 'pnewton0'; 27 | options.precFunc = []; 28 | minFunc(@LogisticLoss,w_init,options,X,y); 29 | pause; 30 | 31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n'); 32 | options.Method = 'newton0'; 33 | options.HvFunc = @LogisticHv; 34 | minFunc(@LogisticLoss,w_init,options,X,y); 35 | pause; 36 | 37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n'); 38 | options.Method = 'pnewton0'; 39 | options.HvFunc = @LogisticHv; 40 | options.precFunc = @LogisticDiagPrecond; 41 | minFunc(@LogisticLoss,w_init,options,X,y); 42 | pause; 43 | 44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n'); 45 | options.Method = 'pnewton0'; 46 | options.precFunc = []; 47 | options.HvFunc = @LogisticHv; 48 | minFunc(@LogisticLoss,w_init,options,X,y); 49 | pause; -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/isLegal.m: -------------------------------------------------------------------------------- 1 | function [legal] = isLegal(v) 2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0; -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/lbfgs.m: -------------------------------------------------------------------------------- 1 | function [d] = lbfgs(g,s,y,Hdiag) 2 | % BFGS Search Direction 3 | % 4 | % This function returns the (L-BFGS) approximate inverse Hessian, 5 | % multiplied by the gradient 6 | % 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS 9 | % 10 | % s - previous search directions (p by k) 11 | % y - previous step sizes (p by k) 12 | % g - gradient (p by 1) 13 | % Hdiag - value of initial Hessian diagonal elements (scalar) 14 | 15 | [p,k] = size(s); 16 | 17 | for i = 1:k 18 | ro(i,1) = 1/(y(:,i)'*s(:,i)); 19 | end 20 | 21 | q = zeros(p,k+1); 22 | r = zeros(p,k+1); 23 | al =zeros(k,1); 24 | be =zeros(k,1); 25 | 26 | q(:,k+1) = g; 27 | 28 | for i = k:-1:1 29 | al(i) = ro(i)*s(:,i)'*q(:,i+1); 30 | q(:,i) = q(:,i+1)-al(i)*y(:,i); 31 | end 32 | 33 | % Multiply by Initial Hessian 34 | r(:,1) = Hdiag*q(:,1); 35 | 36 | for i = 1:k 37 | be(i) = ro(i)*y(:,i)'*r(:,i); 38 | r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i)); 39 | end 40 | d=r(:,k+1); -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexa64 -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexglx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexglx -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexmac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexmac -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexmaci: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexmaci -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexmaci64 -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexw32 -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/lbfgsC.mexw64 -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/lbfgsUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | ys = y'*s; 3 | if ys > 1e-10 4 | numCorrections = size(old_dirs,2); 5 | if numCorrections < corrections 6 | % Full Update 7 | old_dirs(:,numCorrections+1) = s; 8 | old_stps(:,numCorrections+1) = y; 9 | else 10 | % Limited-Memory Update 11 | old_dirs = [old_dirs(:,2:corrections) s]; 12 | old_stps = [old_stps(:,2:corrections) y]; 13 | end 14 | 15 | % Update scale of initial Hessian approximation 16 | Hdiag = ys/(y'*y); 17 | else 18 | if debug 19 | fprintf('Skipping Update\n'); 20 | end 21 | end -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/logistic/LogisticDiagPrecond.m: -------------------------------------------------------------------------------- 1 | function [m] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | 9 | % Compute diagonals of Hessian 10 | sig = sig.*(1-sig); 11 | for i = 1:length(w) 12 | h(i,1) = (sig.*X(:,i))'*X(:,i); 13 | end 14 | 15 | % Apply preconditioner 16 | m = v./h; 17 | 18 | % Exact preconditioner 19 | %H = X'*diag(sig.*(1-sig))*X; 20 | %m = H\v; 21 | -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/logistic/LogisticHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will multiply Hessian by 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | Hv = X.'*(sig.*(1-sig).*(X*v)); 9 | -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/logistic/LogisticLoss.m: -------------------------------------------------------------------------------- 1 | function [nll,g,H,T] = LogisticLoss(w,X,y) 2 | % w(feature,1) 3 | % X(instance,feature) 4 | % y(instance,1) 5 | 6 | [n,p] = size(X); 7 | 8 | Xw = X*w; 9 | yXw = y.*Xw; 10 | 11 | nll = sum( ([zeros(n,1) -yXw])); 12 | 13 | if nargout > 1 14 | if nargout > 2 15 | sig = 1./(1+exp(-yXw)); 16 | g = -X.'*(y.*(1-sig)); 17 | else 18 | g = -X.'*(y./(1+exp(yXw))); 19 | end 20 | end 21 | 22 | if nargout > 2 23 | H = X.'*diag(sparse(sig.*(1-sig)))*X; 24 | end 25 | 26 | if nargout > 3 27 | T = zeros(p,p,p); 28 | for j1 = 1:p 29 | for j2 = 1:p 30 | for j3 = 1:p 31 | T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig)); 32 | end 33 | end 34 | end 35 | end -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/logistic/mexutil.c: -------------------------------------------------------------------------------- 1 | #include "mexutil.h" 2 | 3 | /* Functions to create uninitialized arrays. */ 4 | 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 6 | mxClassID class, mxComplexity ComplexFlag) 7 | { 8 | mxArray *a; 9 | int i, *dims1 = mxMalloc(ndim*sizeof(int)); 10 | size_t sz = 1; 11 | for(i=0;i 1 23 | beta = sqrt(max([gamma xi/sqrt(n^2-1) mu])); 24 | else 25 | beta = sqrt(max([gamma mu])); 26 | end 27 | 28 | for j = 1:n 29 | 30 | % Find q that results in Best Permutation with j 31 | [maxVal maxPos] = max(abs(diag(c(j:end,j:end)))); 32 | q = maxPos+j-1; 33 | 34 | % Permute d,c,l,a 35 | d([j q]) = d([q j]); 36 | perm([j q]) = perm([q j]); 37 | c([j q],:) = c([q j],:); 38 | c(:,[j q]) = c(:,[q j]); 39 | l([j q],:) = l([q j],:); 40 | l(:,[j q]) = l(:,[q j]); 41 | A([j q],:) = A([q j],:); 42 | A(:,[j q]) = A(:,[q j]); 43 | 44 | for s = 1:j-1 45 | l(j,s) = c(j,s)/d(s); 46 | end 47 | for i = j+1:n 48 | c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1)); 49 | end 50 | theta = 0; 51 | if j < n && j > 1 52 | theta = max(abs(c(j+1:n,j))); 53 | end 54 | d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]); 55 | if j < n 56 | for i = j+1:n 57 | c(i,i) = c(i,i) - (c(i,j)^2)/d(j); 58 | end 59 | end 60 | end -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/mcholC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/mcholC.mexmaci64 -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/mcholC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/mcholC.mexw32 -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/mcholC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/minFunc/mcholC.mexw64 -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/mcholinc.m: -------------------------------------------------------------------------------- 1 | function [R,tau] = mcholinc(H,verbose) 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd 3 | 4 | p = size(H,1); 5 | 6 | beta = norm(H,'fro'); 7 | if min(diag(H)) > 1e-12 8 | tau = 0; 9 | else 10 | if verbose 11 | fprintf('Small Value on Diagonal, Adjusting Hessian\n'); 12 | end 13 | tau = max(beta/2,1e-12); 14 | end 15 | while 1 16 | [R,posDef] = chol(H+tau*eye(p)); 17 | if posDef == 0 18 | break; 19 | else 20 | if verbose 21 | fprintf('Cholesky Failed, Adjusting Hessian\n'); 22 | end 23 | tau = max(2*tau,beta/2); 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/precondDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondDiag(r,D) 2 | y = D.*r; -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/precondTriu.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U) 2 | y = U \ (U' \ r); -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/precondTriuDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U,D) 2 | y = U \ (D .* (U' \ r)); -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/rosenbrock.m: -------------------------------------------------------------------------------- 1 | function [f, df, ddf, dddf] = rosenbrock(x); 2 | 3 | % rosenbrock.m This function returns the function value, partial derivatives 4 | % and Hessian of the (general dimension) rosenbrock function, given by: 5 | % 6 | % f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 7 | % 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1). 9 | % 10 | % Carl Edward Rasmussen, 2001-07-21. 11 | 12 | D = length(x); 13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2); 14 | 15 | if nargout > 1 16 | df = zeros(D, 1); 17 | df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1)); 18 | df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2); 19 | end 20 | 21 | if nargout > 2 22 | ddf = zeros(D,D); 23 | ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2); 24 | ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1); 25 | ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1); 26 | end 27 | 28 | if nargout > 3 29 | dddf = zeros(D,D,D); 30 | for d = 1:D 31 | if d > 1 32 | dddf(d,d-1,d-1) = -400; 33 | end 34 | if d < D 35 | dddf(d,d+1,d) = -400; 36 | dddf(d,d,d+1) = -400; 37 | dddf(d,d,d) = 2400*x(d); 38 | end 39 | end 40 | end -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/minFunc/taylorModel.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = taylorModel(d,f,g,H,T) 2 | 3 | p = length(d); 4 | 5 | fd3 = 0; 6 | gd2 = zeros(p,1); 7 | Hd = zeros(p); 8 | for t1 = 1:p 9 | for t2 = 1:p 10 | for t3 = 1:p 11 | fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3); 12 | 13 | if nargout > 1 14 | gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2); 15 | end 16 | 17 | if nargout > 2 18 | Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1); 19 | end 20 | end 21 | 22 | end 23 | end 24 | 25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3; 26 | 27 | if nargout > 1 28 | g = g + H*d + (1/2)*gd2; 29 | end 30 | 31 | if nargout > 2 32 | H = H + Hd; 33 | end 34 | 35 | if any(abs(d) > 1e5) 36 | % We want the optimizer to stop if the solution is unbounded 37 | g = zeros(p,1); 38 | end -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/params2stack.m: -------------------------------------------------------------------------------- 1 | function stack = params2stack(params, netconfig) 2 | 3 | % Converts a flattened parameter vector into a nice "stack" structure 4 | % for us to work with. This is useful when you're building multilayer 5 | % networks. 6 | % 7 | % stack = params2stack(params, netconfig) 8 | % 9 | % params - flattened parameter vector 10 | % netconfig - auxiliary variable containing 11 | % the configuration of the network 12 | % 13 | 14 | 15 | % Map the params (a vector into a stack of weights) 16 | depth = numel(netconfig.layersizes); 17 | stack = cell(depth,1); 18 | prevLayerSize = netconfig.inputsize; % the size of the previous layer 19 | curPos = double(1); % mark current position in parameter vector 20 | 21 | for d = 1:depth 22 | % Create layer d 23 | stack{d} = struct; 24 | 25 | % Extract weights 26 | wlen = double(netconfig.layersizes{d} * prevLayerSize); 27 | stack{d}.w = reshape(params(curPos:curPos+wlen-1), netconfig.layersizes{d}, prevLayerSize); 28 | curPos = curPos+wlen; 29 | 30 | % Extract bias 31 | blen = double(netconfig.layersizes{d}); 32 | stack{d}.b = reshape(params(curPos:curPos+blen-1), netconfig.layersizes{d}, 1); 33 | curPos = curPos+blen; 34 | 35 | % Set previous layer size 36 | prevLayerSize = netconfig.layersizes{d}; 37 | end 38 | 39 | end -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/softmaxCost.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/softmaxCost.m -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/softmaxPredict.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/softmaxPredict.m -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/sparseAutoencoderCost_modify.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/sparseAutoencoderCost_modify.m -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/stack2params.m: -------------------------------------------------------------------------------- 1 | function [params, netconfig] = stack2params(stack) 2 | 3 | % Converts a "stack" structure into a flattened parameter vector and also 4 | % stores the network configuration. This is useful when working with 5 | % optimization toolboxes such as minFunc. 6 | % 7 | % [params, netconfig] = stack2params(stack) 8 | % 9 | % stack - the stack structure, where stack{1}.w = weights of first layer 10 | % stack{1}.b = weights of first layer 11 | % stack{2}.w = weights of second layer 12 | % stack{2}.b = weights of second layer 13 | % ... etc. 14 | 15 | 16 | % Setup the compressed param vector 17 | params = []; 18 | for d = 1:numel(stack) 19 | 20 | % This can be optimized. But since our stacks are relatively short, it 21 | % is okay 22 | params = [params ; stack{d}.w(:) ; stack{d}.b(:) ]; 23 | 24 | % Check that stack is of the correct form 25 | assert(size(stack{d}.w, 1) == size(stack{d}.b, 1), ... 26 | ['The bias should be a *column* vector of ' ... 27 | int2str(size(stack{d}.w, 1)) 'x1']); 28 | if d < numel(stack) 29 | assert(size(stack{d}.w, 1) == size(stack{d+1}.w, 2), ... 30 | ['The adjacent layers L' int2str(d) ' and L' int2str(d+1) ... 31 | ' should have matching sizes.']); 32 | end 33 | 34 | end 35 | 36 | if nargout > 1 37 | % Setup netconfig 38 | if numel(stack) == 0 39 | netconfig.inputsize = 0; 40 | netconfig.layersizes = {}; 41 | else 42 | netconfig.inputsize = size(stack{1}.w, 2); 43 | netconfig.layersizes = {}; 44 | for d = 1:numel(stack) 45 | netconfig.layersizes = [netconfig.layersizes ; size(stack{d}.w,1)]; 46 | end 47 | end 48 | end 49 | 50 | end -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/stackedAECost.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/stackedAECost.m -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/stackedAEExercise.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise7 Implement deep networks for digit classification/stackedAEExercise.m -------------------------------------------------------------------------------- /Exercise7 Implement deep networks for digit classification/stackedAEPredict.m: -------------------------------------------------------------------------------- 1 | function [pred] = stackedAEPredict(theta, inputSize, hiddenSize, numClasses, netconfig, data) 2 | 3 | % stackedAEPredict: Takes a trained theta and a test data set, 4 | % and returns the predicted labels for each example. 5 | 6 | % theta: trained weights from the autoencoder 7 | % visibleSize: the number of input units 8 | % hiddenSize: the number of hidden units *at the 2nd layer* 9 | % numClasses: the number of categories 10 | % data: Our matrix containing the training data as columns. So, data(:,i) is the i-th training example. 11 | 12 | % Your code should produce the prediction matrix 13 | % pred, where pred(i) is argmax_c P(y(c) | x(i)). 14 | 15 | %% Unroll theta parameter 16 | 17 | % We first extract the part which compute the softmax gradient 18 | softmaxTheta = reshape(theta(1:hiddenSize*numClasses), numClasses, hiddenSize); 19 | 20 | % Extract out the "stack" 21 | stack = params2stack(theta(hiddenSize*numClasses+1:end), netconfig); 22 | 23 | %% ---------- YOUR CODE HERE -------------------------------------- 24 | % Instructions: Compute pred using theta assuming that the labels start 25 | % from 1. 26 | 27 | z2 = bsxfun(@plus, stack{1}.w*data, stack{1}.b); 28 | a2 = sigmoid(z2); 29 | z3 = bsxfun(@plus, stack{2}.w*a2, stack{2}.b); 30 | a3 = sigmoid(z3); 31 | z4 = softmaxTheta * a3; 32 | a4 = exp(z4); 33 | a4 = bsxfun(@rdivide, a4, sum(a4)); 34 | 35 | 36 | [p,pred] = max(a4, [], 1); 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | % ----------------------------------------------------------- 47 | 48 | end 49 | 50 | 51 | % You might find this useful 52 | function sigm = sigmoid(x) 53 | sigm = 1 ./ (1 + exp(-x)); 54 | end 55 | -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/computeNumericalGradient.m: -------------------------------------------------------------------------------- 1 | function numgrad = computeNumericalGradient(J, theta) 2 | % numgrad = computeNumericalGradient(J, theta) 3 | % theta: a vector of parameters 4 | % J: a function that outputs a real-number. Calling y = J(theta) will return the 5 | % function value at theta. 6 | 7 | % Initialize numgrad with zeros 8 | numgrad = zeros(size(theta)); 9 | 10 | %% ---------- YOUR CODE HERE -------------------------------------- 11 | % Instructions: 12 | % Implement numerical gradient checking, and return the result in numgrad. 13 | % (See Section 2.3 of the lecture notes.) 14 | % You should write code so that numgrad(i) is (the numerical approximation to) the 15 | % partial derivative of J with respect to the i-th input argument, evaluated at theta. 16 | % I.e., numgrad(i) should be the (approximately) the partial derivative of J with 17 | % respect to theta(i). 18 | % 19 | % Hint: You will probably want to compute the elements of numgrad one at a 20 | % time. 21 | epsilon = 10^(-4); 22 | n = size(theta, 1); 23 | % J1 = zeros(size(numgrad)); 24 | % J2 = zeros(size(numgrad)); 25 | J1 = zeros(1, 1); 26 | J2 = zeros(1, 1); 27 | grad = zeros(size(numgrad)); 28 | temp1 = zeros(size(theta)); 29 | temp2 = zeros(size(theta)); 30 | 31 | for i = 1 : n 32 | % i 33 | temp1 = theta; 34 | temp2 = theta; 35 | temp1(i) = temp1(i) + epsilon; 36 | temp2(i) = temp2(i) - epsilon; 37 | [J1, grad] = J(temp1); 38 | [J2, grad] = J(temp2); 39 | numgrad(i) = (J1 - J2) / (2*epsilon); 40 | end 41 | 42 | 43 | % theta1 = theta + epsilon; 44 | % theta2 = theta - epsilon; 45 | % [J1(i), grad] = J(temp1); 46 | % [J2(i), grad] = J(temp2); 47 | 48 | 49 | 50 | 51 | %% --------------------------------------------------------------- 52 | end 53 | -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/displayColorNetwork.m: -------------------------------------------------------------------------------- 1 | function displayColorNetwork(A) 2 | 3 | % display receptive field(s) or basis vector(s) for image patches 4 | % 5 | % A the basis, with patches as column vectors 6 | 7 | % In case the midpoint is not set at 0, we shift it dynamically 8 | if min(A(:)) >= 0 9 | A = A - mean(A(:)); 10 | end 11 | 12 | cols = round(sqrt(size(A, 2))); 13 | 14 | channel_size = size(A,1) / 3; 15 | dim = sqrt(channel_size); 16 | dimp = dim+1; 17 | rows = ceil(size(A,2)/cols); 18 | B = A(1:channel_size,:); 19 | C = A(channel_size+1:channel_size*2,:); 20 | D = A(2*channel_size+1:channel_size*3,:); 21 | B=B./(ones(size(B,1),1)*max(abs(B))); 22 | C=C./(ones(size(C,1),1)*max(abs(C))); 23 | D=D./(ones(size(D,1),1)*max(abs(D))); 24 | % Initialization of the image 25 | I = ones(dim*rows+rows-1,dim*cols+cols-1,3); 26 | 27 | %Transfer features to this image matrix 28 | for i=0:rows-1 29 | for j=0:cols-1 30 | 31 | if i*cols+j+1 > size(B, 2) 32 | break 33 | end 34 | 35 | % This sets the patch 36 | I(i*dimp+1:i*dimp+dim,j*dimp+1:j*dimp+dim,1) = ... 37 | reshape(B(:,i*cols+j+1),[dim dim]); 38 | I(i*dimp+1:i*dimp+dim,j*dimp+1:j*dimp+dim,2) = ... 39 | reshape(C(:,i*cols+j+1),[dim dim]); 40 | I(i*dimp+1:i*dimp+dim,j*dimp+1:j*dimp+dim,3) = ... 41 | reshape(D(:,i*cols+j+1),[dim dim]); 42 | 43 | end 44 | end 45 | 46 | I = I + 1; 47 | I = I / 2; 48 | imagesc(I); 49 | axis equal 50 | axis off 51 | 52 | end 53 | 54 | 55 | -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/initializeParameters.m: -------------------------------------------------------------------------------- 1 | function theta = initializeParameters(hiddenSize, visibleSize) 2 | 3 | %% Initialize parameters randomly based on layer sizes. 4 | r = sqrt(6) / sqrt(hiddenSize+visibleSize+1); % we'll choose weights uniformly from the interval [-r, r] 5 | W1 = rand(hiddenSize, visibleSize) * 2 * r - r; 6 | W2 = rand(visibleSize, hiddenSize) * 2 * r - r; 7 | 8 | b1 = zeros(hiddenSize, 1); 9 | b2 = zeros(visibleSize, 1); 10 | 11 | % Convert weights and bias gradients to the vector form. 12 | % This step will "unroll" (flatten and concatenate together) all 13 | % your parameters into a vector, which can then be used with minFunc. 14 | theta = [W1(:) ; W2(:) ; b1(:) ; b2(:)]; 15 | 16 | end 17 | 18 | -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/autoGrad.m: -------------------------------------------------------------------------------- 1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin) % [f,g] = autoGrad(x,useComplex,funObj,varargin) % % Numerically compute gradient of objective function from function values p = length(x); mu = 1e-150; if useComplex % Use Complex Differentials diff = zeros(p,1); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(diff)); g = imag(diff)/mu; else % Use Finite Differencing f = funObj(x,varargin{:}); mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*e_j,varargin{:}); end g = (diff-f)/mu; end if 0 % DEBUG CODE [fReal gReal] = funObj(x,varargin{:}); [fReal f] [gReal g] pause; end -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/autoHess.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin) % Numerically compute Hessian of objective function from gradient values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(diff),2); H = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g] = funObj(x,varargin{:}); diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f diff(:,j)] = funObj(x + mu*e_j,varargin{:}); end H = (diff-repmat(g,[1 p]))/mu; end % Make sure H is symmetric H = (H+H')/2; if 0 % DEBUG CODE [fReal gReal HReal] = funObj(x,varargin{:}); [fReal f] [gReal g] [HReal H] pause; end -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/autoHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin) 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:}) 3 | % based on gradient values 4 | 5 | if useComplex 6 | mu = 1e-150i; 7 | else 8 | mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v); 9 | end 10 | [f,finDif] = funObj(x + v*mu,varargin{:}); 11 | Hv = (finDif-g)/mu; -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/autoTensor.m: -------------------------------------------------------------------------------- 1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % Numerically compute Tensor of 3rd-derivatives of objective function from Hessian values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(g),2); H = mean(real(diff),3); T = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g,H] = funObj(x,varargin{:}); diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:}); end T = (diff-repmat(H,[1 1 p]))/mu; end -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/callOutput.m: -------------------------------------------------------------------------------- 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin) 2 | 3 | optimValues.iteration = i; 4 | optimValues.funccount = funEvals; 5 | optimValues.fval = f; 6 | optimValues.stepsize = t; 7 | optimValues.directionalderivative = gtd; 8 | optimValues.gradient = g; 9 | optimValues.searchdirection = d; 10 | optimValues.firstorderopt = opt; 11 | 12 | feval(outputFcn, x,optimValues,state,varargin{:}); -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/dampedUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | 3 | %B0 = eye(length(y))/Hdiag; 4 | S = old_dirs(:,2:end); 5 | Y = old_stps(:,2:end); 6 | k = size(Y,2); 7 | L = zeros(k); 8 | for j = 1:k 9 | for i = j+1:k 10 | L(i,j) = S(:,i)'*Y(:,j); 11 | end 12 | end 13 | D = diag(diag(S'*Y)); 14 | N = [S/Hdiag Y]; 15 | M = [S'*S/Hdiag L;L' -D]; 16 | 17 | ys = y'*s; 18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s 19 | sBs = s'*Bs; 20 | 21 | eta = .02; 22 | if ys < eta*sBs 23 | if debug 24 | fprintf('Damped Update\n'); 25 | end 26 | theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1); 27 | y = theta*y + (1-theta)*Bs; 28 | end 29 | 30 | 31 | numCorrections = size(old_dirs,2); 32 | if numCorrections < corrections 33 | % Full Update 34 | old_dirs(:,numCorrections+1) = s; 35 | old_stps(:,numCorrections+1) = y; 36 | else 37 | % Limited-Memory Update 38 | old_dirs = [old_dirs(:,2:corrections) s]; 39 | old_stps = [old_stps(:,2:corrections) y]; 40 | end 41 | 42 | % Update scale of initial Hessian approximation 43 | Hdiag = (y'*s)/(y'*y); -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/example_minFunc_LR.m: -------------------------------------------------------------------------------- 1 | clear all 2 | 3 | nInst = 500; 4 | nVars = 100; 5 | X = [ones(nInst,1) randn(nInst,nVars-1)]; 6 | w = randn(nVars,1); 7 | y = sign(X*w); 8 | flipInd = rand(nInst,1) > .9; 9 | y(flipInd) = -y(flipInd); 10 | 11 | w_init = zeros(nVars,1); 12 | funObj = @(w)LogisticLoss(w,X,y); 13 | 14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n'); 15 | options.Method = 'newton0'; 16 | minFunc(@LogisticLoss,w_init,options,X,y); 17 | pause; 18 | 19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n'); 20 | options.Method = 'pnewton0'; 21 | options.precFunc = @LogisticDiagPrecond; 22 | minFunc(@LogisticLoss,w_init,options,X,y); 23 | pause; 24 | 25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n'); 26 | options.Method = 'pnewton0'; 27 | options.precFunc = []; 28 | minFunc(@LogisticLoss,w_init,options,X,y); 29 | pause; 30 | 31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n'); 32 | options.Method = 'newton0'; 33 | options.HvFunc = @LogisticHv; 34 | minFunc(@LogisticLoss,w_init,options,X,y); 35 | pause; 36 | 37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n'); 38 | options.Method = 'pnewton0'; 39 | options.HvFunc = @LogisticHv; 40 | options.precFunc = @LogisticDiagPrecond; 41 | minFunc(@LogisticLoss,w_init,options,X,y); 42 | pause; 43 | 44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n'); 45 | options.Method = 'pnewton0'; 46 | options.precFunc = []; 47 | options.HvFunc = @LogisticHv; 48 | minFunc(@LogisticLoss,w_init,options,X,y); 49 | pause; -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/isLegal.m: -------------------------------------------------------------------------------- 1 | function [legal] = isLegal(v) 2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0; -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgs.m: -------------------------------------------------------------------------------- 1 | function [d] = lbfgs(g,s,y,Hdiag) 2 | % BFGS Search Direction 3 | % 4 | % This function returns the (L-BFGS) approximate inverse Hessian, 5 | % multiplied by the gradient 6 | % 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS 9 | % 10 | % s - previous search directions (p by k) 11 | % y - previous step sizes (p by k) 12 | % g - gradient (p by 1) 13 | % Hdiag - value of initial Hessian diagonal elements (scalar) 14 | 15 | [p,k] = size(s); 16 | 17 | for i = 1:k 18 | ro(i,1) = 1/(y(:,i)'*s(:,i)); 19 | end 20 | 21 | q = zeros(p,k+1); 22 | r = zeros(p,k+1); 23 | al =zeros(k,1); 24 | be =zeros(k,1); 25 | 26 | q(:,k+1) = g; 27 | 28 | for i = k:-1:1 29 | al(i) = ro(i)*s(:,i)'*q(:,i+1); 30 | q(:,i) = q(:,i+1)-al(i)*y(:,i); 31 | end 32 | 33 | % Multiply by Initial Hessian 34 | r(:,1) = Hdiag*q(:,1); 35 | 36 | for i = 1:k 37 | be(i) = ro(i)*y(:,i)'*r(:,i); 38 | r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i)); 39 | end 40 | d=r(:,k+1); -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexa64 -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexglx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexglx -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexmac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexmac -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexmaci: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexmaci -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexmaci64 -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexw32 -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsC.mexw64 -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/lbfgsUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | ys = y'*s; 3 | if ys > 1e-10 4 | numCorrections = size(old_dirs,2); 5 | if numCorrections < corrections 6 | % Full Update 7 | old_dirs(:,numCorrections+1) = s; 8 | old_stps(:,numCorrections+1) = y; 9 | else 10 | % Limited-Memory Update 11 | old_dirs = [old_dirs(:,2:corrections) s]; 12 | old_stps = [old_stps(:,2:corrections) y]; 13 | end 14 | 15 | % Update scale of initial Hessian approximation 16 | Hdiag = ys/(y'*y); 17 | else 18 | if debug 19 | fprintf('Skipping Update\n'); 20 | end 21 | end -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/LogisticDiagPrecond.m: -------------------------------------------------------------------------------- 1 | function [m] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | 9 | % Compute diagonals of Hessian 10 | sig = sig.*(1-sig); 11 | for i = 1:length(w) 12 | h(i,1) = (sig.*X(:,i))'*X(:,i); 13 | end 14 | 15 | % Apply preconditioner 16 | m = v./h; 17 | 18 | % Exact preconditioner 19 | %H = X'*diag(sig.*(1-sig))*X; 20 | %m = H\v; 21 | -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/LogisticHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will multiply Hessian by 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | Hv = X.'*(sig.*(1-sig).*(X*v)); 9 | -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/LogisticLoss.m: -------------------------------------------------------------------------------- 1 | function [nll,g,H,T] = LogisticLoss(w,X,y) 2 | % w(feature,1) 3 | % X(instance,feature) 4 | % y(instance,1) 5 | 6 | [n,p] = size(X); 7 | 8 | Xw = X*w; 9 | yXw = y.*Xw; 10 | 11 | nll = sum( ([zeros(n,1) -yXw])); 12 | 13 | if nargout > 1 14 | if nargout > 2 15 | sig = 1./(1+exp(-yXw)); 16 | g = -X.'*(y.*(1-sig)); 17 | else 18 | g = -X.'*(y./(1+exp(yXw))); 19 | end 20 | end 21 | 22 | if nargout > 2 23 | H = X.'*diag(sparse(sig.*(1-sig)))*X; 24 | end 25 | 26 | if nargout > 3 27 | T = zeros(p,p,p); 28 | for j1 = 1:p 29 | for j2 = 1:p 30 | for j3 = 1:p 31 | T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig)); 32 | end 33 | end 34 | end 35 | end -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/logistic/mexutil.c: -------------------------------------------------------------------------------- 1 | #include "mexutil.h" 2 | 3 | /* Functions to create uninitialized arrays. */ 4 | 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 6 | mxClassID class, mxComplexity ComplexFlag) 7 | { 8 | mxArray *a; 9 | int i, *dims1 = mxMalloc(ndim*sizeof(int)); 10 | size_t sz = 1; 11 | for(i=0;i 1 23 | beta = sqrt(max([gamma xi/sqrt(n^2-1) mu])); 24 | else 25 | beta = sqrt(max([gamma mu])); 26 | end 27 | 28 | for j = 1:n 29 | 30 | % Find q that results in Best Permutation with j 31 | [maxVal maxPos] = max(abs(diag(c(j:end,j:end)))); 32 | q = maxPos+j-1; 33 | 34 | % Permute d,c,l,a 35 | d([j q]) = d([q j]); 36 | perm([j q]) = perm([q j]); 37 | c([j q],:) = c([q j],:); 38 | c(:,[j q]) = c(:,[q j]); 39 | l([j q],:) = l([q j],:); 40 | l(:,[j q]) = l(:,[q j]); 41 | A([j q],:) = A([q j],:); 42 | A(:,[j q]) = A(:,[q j]); 43 | 44 | for s = 1:j-1 45 | l(j,s) = c(j,s)/d(s); 46 | end 47 | for i = j+1:n 48 | c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1)); 49 | end 50 | theta = 0; 51 | if j < n && j > 1 52 | theta = max(abs(c(j+1:n,j))); 53 | end 54 | d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]); 55 | if j < n 56 | for i = j+1:n 57 | c(i,i) = c(i,i) - (c(i,j)^2)/d(j); 58 | end 59 | end 60 | end -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/mcholC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/mcholC.mexmaci64 -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/mcholC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/mcholC.mexw32 -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/mcholC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise8 Learning color features with Sparse Autoencoders/minFunc/mcholC.mexw64 -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/mcholinc.m: -------------------------------------------------------------------------------- 1 | function [R,tau] = mcholinc(H,verbose) 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd 3 | 4 | p = size(H,1); 5 | 6 | beta = norm(H,'fro'); 7 | if min(diag(H)) > 1e-12 8 | tau = 0; 9 | else 10 | if verbose 11 | fprintf('Small Value on Diagonal, Adjusting Hessian\n'); 12 | end 13 | tau = max(beta/2,1e-12); 14 | end 15 | while 1 16 | [R,posDef] = chol(H+tau*eye(p)); 17 | if posDef == 0 18 | break; 19 | else 20 | if verbose 21 | fprintf('Cholesky Failed, Adjusting Hessian\n'); 22 | end 23 | tau = max(2*tau,beta/2); 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/precondDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondDiag(r,D) 2 | y = D.*r; -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/precondTriu.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U) 2 | y = U \ (U' \ r); -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/precondTriuDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U,D) 2 | y = U \ (D .* (U' \ r)); -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/rosenbrock.m: -------------------------------------------------------------------------------- 1 | function [f, df, ddf, dddf] = rosenbrock(x); 2 | 3 | % rosenbrock.m This function returns the function value, partial derivatives 4 | % and Hessian of the (general dimension) rosenbrock function, given by: 5 | % 6 | % f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 7 | % 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1). 9 | % 10 | % Carl Edward Rasmussen, 2001-07-21. 11 | 12 | D = length(x); 13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2); 14 | 15 | if nargout > 1 16 | df = zeros(D, 1); 17 | df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1)); 18 | df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2); 19 | end 20 | 21 | if nargout > 2 22 | ddf = zeros(D,D); 23 | ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2); 24 | ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1); 25 | ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1); 26 | end 27 | 28 | if nargout > 3 29 | dddf = zeros(D,D,D); 30 | for d = 1:D 31 | if d > 1 32 | dddf(d,d-1,d-1) = -400; 33 | end 34 | if d < D 35 | dddf(d,d+1,d) = -400; 36 | dddf(d,d,d+1) = -400; 37 | dddf(d,d,d) = 2400*x(d); 38 | end 39 | end 40 | end -------------------------------------------------------------------------------- /Exercise8 Learning color features with Sparse Autoencoders/minFunc/taylorModel.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = taylorModel(d,f,g,H,T) 2 | 3 | p = length(d); 4 | 5 | fd3 = 0; 6 | gd2 = zeros(p,1); 7 | Hd = zeros(p); 8 | for t1 = 1:p 9 | for t2 = 1:p 10 | for t3 = 1:p 11 | fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3); 12 | 13 | if nargout > 1 14 | gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2); 15 | end 16 | 17 | if nargout > 2 18 | Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1); 19 | end 20 | end 21 | 22 | end 23 | end 24 | 25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3; 26 | 27 | if nargout > 1 28 | g = g + H*d + (1/2)*gd2; 29 | end 30 | 31 | if nargout > 2 32 | H = H + Hd; 33 | end 34 | 35 | if any(abs(d) > 1e5) 36 | % We want the optimizer to stop if the solution is unbounded 37 | g = zeros(p,1); 38 | end -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/cnnConvolve.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/cnnConvolve.m -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/displayColorNetwork.m: -------------------------------------------------------------------------------- 1 | function displayColorNetwork(A) 2 | 3 | % display receptive field(s) or basis vector(s) for image patches 4 | % 5 | % A the basis, with patches as column vectors 6 | 7 | % In case the midpoint is not set at 0, we shift it dynamically 8 | if min(A(:)) >= 0 9 | A = A - mean(A(:)); 10 | end 11 | 12 | cols = round(sqrt(size(A, 2))); 13 | 14 | channel_size = size(A,1) / 3; 15 | dim = sqrt(channel_size); 16 | dimp = dim+1; 17 | rows = ceil(size(A,2)/cols); 18 | B = A(1:channel_size,:); 19 | C = A(channel_size+1:channel_size*2,:); 20 | D = A(2*channel_size+1:channel_size*3,:); 21 | B=B./(ones(size(B,1),1)*max(abs(B))); 22 | C=C./(ones(size(C,1),1)*max(abs(C))); 23 | D=D./(ones(size(D,1),1)*max(abs(D))); 24 | % Initialization of the image 25 | I = ones(dim*rows+rows-1,dim*cols+cols-1,3); 26 | 27 | %Transfer features to this image matrix 28 | for i=0:rows-1 29 | for j=0:cols-1 30 | 31 | if i*cols+j+1 > size(B, 2) 32 | break 33 | end 34 | 35 | % This sets the patch 36 | I(i*dimp+1:i*dimp+dim,j*dimp+1:j*dimp+dim,1) = ... 37 | reshape(B(:,i*cols+j+1),[dim dim]); 38 | I(i*dimp+1:i*dimp+dim,j*dimp+1:j*dimp+dim,2) = ... 39 | reshape(C(:,i*cols+j+1),[dim dim]); 40 | I(i*dimp+1:i*dimp+dim,j*dimp+1:j*dimp+dim,3) = ... 41 | reshape(D(:,i*cols+j+1),[dim dim]); 42 | 43 | end 44 | end 45 | 46 | I = I + 1; 47 | I = I / 2; 48 | imagesc(I); 49 | axis equal 50 | axis off 51 | 52 | end 53 | 54 | 55 | -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/feedForwardAutoencoder.m: -------------------------------------------------------------------------------- 1 | function [activation] = feedForwardAutoencoder(theta, hiddenSize, visibleSize, data) 2 | 3 | % theta: trained weights from the autoencoder 4 | % visibleSize: the number of input units (probably 64) 5 | % hiddenSize: the number of hidden units (probably 25) 6 | % data: Our matrix containing the training data as columns. So, data(:,i) is the i-th training example. 7 | 8 | % We first convert theta to the (W1, W2, b1, b2) matrix/vector format, so that this 9 | % follows the notation convention of the lecture notes. 10 | 11 | W1 = reshape(theta(1:hiddenSize*visibleSize), hiddenSize, visibleSize); 12 | b1 = theta(2*hiddenSize*visibleSize+1:2*hiddenSize*visibleSize+hiddenSize); 13 | 14 | %% ---------- YOUR CODE HERE -------------------------------------- 15 | % Instructions: Compute the activation of the hidden layer for the Sparse Autoencoder. 16 | m = size(data, 2); 17 | z2 = W1 * data + repmat(b1,1,m); 18 | a2 = sigmoid(z2); 19 | activation = a2; 20 | %------------------------------------------------------------------- 21 | 22 | end 23 | 24 | %------------------------------------------------------------------- 25 | % Here's an implementation of the sigmoid function, which you may find useful 26 | % in your computation of the costs and the gradients. This inputs a (row or 27 | % column) vector (say (z1, z2, z3)) and returns (f(z1), f(z2), f(z3)). 28 | 29 | function sigm = sigmoid(x) 30 | sigm = 1 ./ (1 + exp(-x)); 31 | end 32 | -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/autoGrad.m: -------------------------------------------------------------------------------- 1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin) % [f,g] = autoGrad(x,useComplex,funObj,varargin) % % Numerically compute gradient of objective function from function values p = length(x); mu = 1e-150; if useComplex % Use Complex Differentials diff = zeros(p,1); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(diff)); g = imag(diff)/mu; else % Use Finite Differencing f = funObj(x,varargin{:}); mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*e_j,varargin{:}); end g = (diff-f)/mu; end if 0 % DEBUG CODE [fReal gReal] = funObj(x,varargin{:}); [fReal f] [gReal g] pause; end -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/autoHess.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin) % Numerically compute Hessian of objective function from gradient values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(diff),2); H = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g] = funObj(x,varargin{:}); diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f diff(:,j)] = funObj(x + mu*e_j,varargin{:}); end H = (diff-repmat(g,[1 p]))/mu; end % Make sure H is symmetric H = (H+H')/2; if 0 % DEBUG CODE [fReal gReal HReal] = funObj(x,varargin{:}); [fReal f] [gReal g] [HReal H] pause; end -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/autoHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin) 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:}) 3 | % based on gradient values 4 | 5 | if useComplex 6 | mu = 1e-150i; 7 | else 8 | mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v); 9 | end 10 | [f,finDif] = funObj(x + v*mu,varargin{:}); 11 | Hv = (finDif-g)/mu; -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/autoTensor.m: -------------------------------------------------------------------------------- 1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % Numerically compute Tensor of 3rd-derivatives of objective function from Hessian values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(g),2); H = mean(real(diff),3); T = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g,H] = funObj(x,varargin{:}); diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:}); end T = (diff-repmat(H,[1 1 p]))/mu; end -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/callOutput.m: -------------------------------------------------------------------------------- 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin) 2 | 3 | optimValues.iteration = i; 4 | optimValues.funccount = funEvals; 5 | optimValues.fval = f; 6 | optimValues.stepsize = t; 7 | optimValues.directionalderivative = gtd; 8 | optimValues.gradient = g; 9 | optimValues.searchdirection = d; 10 | optimValues.firstorderopt = opt; 11 | 12 | feval(outputFcn, x,optimValues,state,varargin{:}); -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/dampedUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | 3 | %B0 = eye(length(y))/Hdiag; 4 | S = old_dirs(:,2:end); 5 | Y = old_stps(:,2:end); 6 | k = size(Y,2); 7 | L = zeros(k); 8 | for j = 1:k 9 | for i = j+1:k 10 | L(i,j) = S(:,i)'*Y(:,j); 11 | end 12 | end 13 | D = diag(diag(S'*Y)); 14 | N = [S/Hdiag Y]; 15 | M = [S'*S/Hdiag L;L' -D]; 16 | 17 | ys = y'*s; 18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s 19 | sBs = s'*Bs; 20 | 21 | eta = .02; 22 | if ys < eta*sBs 23 | if debug 24 | fprintf('Damped Update\n'); 25 | end 26 | theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1); 27 | y = theta*y + (1-theta)*Bs; 28 | end 29 | 30 | 31 | numCorrections = size(old_dirs,2); 32 | if numCorrections < corrections 33 | % Full Update 34 | old_dirs(:,numCorrections+1) = s; 35 | old_stps(:,numCorrections+1) = y; 36 | else 37 | % Limited-Memory Update 38 | old_dirs = [old_dirs(:,2:corrections) s]; 39 | old_stps = [old_stps(:,2:corrections) y]; 40 | end 41 | 42 | % Update scale of initial Hessian approximation 43 | Hdiag = (y'*s)/(y'*y); -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/example_minFunc_LR.m: -------------------------------------------------------------------------------- 1 | clear all 2 | 3 | nInst = 500; 4 | nVars = 100; 5 | X = [ones(nInst,1) randn(nInst,nVars-1)]; 6 | w = randn(nVars,1); 7 | y = sign(X*w); 8 | flipInd = rand(nInst,1) > .9; 9 | y(flipInd) = -y(flipInd); 10 | 11 | w_init = zeros(nVars,1); 12 | funObj = @(w)LogisticLoss(w,X,y); 13 | 14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n'); 15 | options.Method = 'newton0'; 16 | minFunc(@LogisticLoss,w_init,options,X,y); 17 | pause; 18 | 19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n'); 20 | options.Method = 'pnewton0'; 21 | options.precFunc = @LogisticDiagPrecond; 22 | minFunc(@LogisticLoss,w_init,options,X,y); 23 | pause; 24 | 25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n'); 26 | options.Method = 'pnewton0'; 27 | options.precFunc = []; 28 | minFunc(@LogisticLoss,w_init,options,X,y); 29 | pause; 30 | 31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n'); 32 | options.Method = 'newton0'; 33 | options.HvFunc = @LogisticHv; 34 | minFunc(@LogisticLoss,w_init,options,X,y); 35 | pause; 36 | 37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n'); 38 | options.Method = 'pnewton0'; 39 | options.HvFunc = @LogisticHv; 40 | options.precFunc = @LogisticDiagPrecond; 41 | minFunc(@LogisticLoss,w_init,options,X,y); 42 | pause; 43 | 44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n'); 45 | options.Method = 'pnewton0'; 46 | options.precFunc = []; 47 | options.HvFunc = @LogisticHv; 48 | minFunc(@LogisticLoss,w_init,options,X,y); 49 | pause; -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/isLegal.m: -------------------------------------------------------------------------------- 1 | function [legal] = isLegal(v) 2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0; -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/lbfgs.m: -------------------------------------------------------------------------------- 1 | function [d] = lbfgs(g,s,y,Hdiag) 2 | % BFGS Search Direction 3 | % 4 | % This function returns the (L-BFGS) approximate inverse Hessian, 5 | % multiplied by the gradient 6 | % 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS 9 | % 10 | % s - previous search directions (p by k) 11 | % y - previous step sizes (p by k) 12 | % g - gradient (p by 1) 13 | % Hdiag - value of initial Hessian diagonal elements (scalar) 14 | 15 | [p,k] = size(s); 16 | 17 | for i = 1:k 18 | ro(i,1) = 1/(y(:,i)'*s(:,i)); 19 | end 20 | 21 | q = zeros(p,k+1); 22 | r = zeros(p,k+1); 23 | al =zeros(k,1); 24 | be =zeros(k,1); 25 | 26 | q(:,k+1) = g; 27 | 28 | for i = k:-1:1 29 | al(i) = ro(i)*s(:,i)'*q(:,i+1); 30 | q(:,i) = q(:,i+1)-al(i)*y(:,i); 31 | end 32 | 33 | % Multiply by Initial Hessian 34 | r(:,1) = Hdiag*q(:,1); 35 | 36 | for i = 1:k 37 | be(i) = ro(i)*y(:,i)'*r(:,i); 38 | r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i)); 39 | end 40 | d=r(:,k+1); -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexa64 -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexglx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexglx -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexmac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexmac -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexmaci: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexmaci -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexmaci64 -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexw32 -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/lbfgsC.mexw64 -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/lbfgsUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | ys = y'*s; 3 | if ys > 1e-10 4 | numCorrections = size(old_dirs,2); 5 | if numCorrections < corrections 6 | % Full Update 7 | old_dirs(:,numCorrections+1) = s; 8 | old_stps(:,numCorrections+1) = y; 9 | else 10 | % Limited-Memory Update 11 | old_dirs = [old_dirs(:,2:corrections) s]; 12 | old_stps = [old_stps(:,2:corrections) y]; 13 | end 14 | 15 | % Update scale of initial Hessian approximation 16 | Hdiag = ys/(y'*y); 17 | else 18 | if debug 19 | fprintf('Skipping Update\n'); 20 | end 21 | end -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/logistic/LogisticDiagPrecond.m: -------------------------------------------------------------------------------- 1 | function [m] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | 9 | % Compute diagonals of Hessian 10 | sig = sig.*(1-sig); 11 | for i = 1:length(w) 12 | h(i,1) = (sig.*X(:,i))'*X(:,i); 13 | end 14 | 15 | % Apply preconditioner 16 | m = v./h; 17 | 18 | % Exact preconditioner 19 | %H = X'*diag(sig.*(1-sig))*X; 20 | %m = H\v; 21 | -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/logistic/LogisticHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will multiply Hessian by 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | Hv = X.'*(sig.*(1-sig).*(X*v)); 9 | -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/logistic/LogisticLoss.m: -------------------------------------------------------------------------------- 1 | function [nll,g,H,T] = LogisticLoss(w,X,y) 2 | % w(feature,1) 3 | % X(instance,feature) 4 | % y(instance,1) 5 | 6 | [n,p] = size(X); 7 | 8 | Xw = X*w; 9 | yXw = y.*Xw; 10 | 11 | nll = sum( ([zeros(n,1) -yXw])); 12 | 13 | if nargout > 1 14 | if nargout > 2 15 | sig = 1./(1+exp(-yXw)); 16 | g = -X.'*(y.*(1-sig)); 17 | else 18 | g = -X.'*(y./(1+exp(yXw))); 19 | end 20 | end 21 | 22 | if nargout > 2 23 | H = X.'*diag(sparse(sig.*(1-sig)))*X; 24 | end 25 | 26 | if nargout > 3 27 | T = zeros(p,p,p); 28 | for j1 = 1:p 29 | for j2 = 1:p 30 | for j3 = 1:p 31 | T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig)); 32 | end 33 | end 34 | end 35 | end -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/logistic/mexutil.c: -------------------------------------------------------------------------------- 1 | #include "mexutil.h" 2 | 3 | /* Functions to create uninitialized arrays. */ 4 | 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 6 | mxClassID class, mxComplexity ComplexFlag) 7 | { 8 | mxArray *a; 9 | int i, *dims1 = mxMalloc(ndim*sizeof(int)); 10 | size_t sz = 1; 11 | for(i=0;i 1 23 | beta = sqrt(max([gamma xi/sqrt(n^2-1) mu])); 24 | else 25 | beta = sqrt(max([gamma mu])); 26 | end 27 | 28 | for j = 1:n 29 | 30 | % Find q that results in Best Permutation with j 31 | [maxVal maxPos] = max(abs(diag(c(j:end,j:end)))); 32 | q = maxPos+j-1; 33 | 34 | % Permute d,c,l,a 35 | d([j q]) = d([q j]); 36 | perm([j q]) = perm([q j]); 37 | c([j q],:) = c([q j],:); 38 | c(:,[j q]) = c(:,[q j]); 39 | l([j q],:) = l([q j],:); 40 | l(:,[j q]) = l(:,[q j]); 41 | A([j q],:) = A([q j],:); 42 | A(:,[j q]) = A(:,[q j]); 43 | 44 | for s = 1:j-1 45 | l(j,s) = c(j,s)/d(s); 46 | end 47 | for i = j+1:n 48 | c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1)); 49 | end 50 | theta = 0; 51 | if j < n && j > 1 52 | theta = max(abs(c(j+1:n,j))); 53 | end 54 | d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]); 55 | if j < n 56 | for i = j+1:n 57 | c(i,i) = c(i,i) - (c(i,j)^2)/d(j); 58 | end 59 | end 60 | end -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/mcholC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/mcholC.mexmaci64 -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/mcholC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/mcholC.mexw32 -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/mcholC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/minFunc/mcholC.mexw64 -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/mcholinc.m: -------------------------------------------------------------------------------- 1 | function [R,tau] = mcholinc(H,verbose) 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd 3 | 4 | p = size(H,1); 5 | 6 | beta = norm(H,'fro'); 7 | if min(diag(H)) > 1e-12 8 | tau = 0; 9 | else 10 | if verbose 11 | fprintf('Small Value on Diagonal, Adjusting Hessian\n'); 12 | end 13 | tau = max(beta/2,1e-12); 14 | end 15 | while 1 16 | [R,posDef] = chol(H+tau*eye(p)); 17 | if posDef == 0 18 | break; 19 | else 20 | if verbose 21 | fprintf('Cholesky Failed, Adjusting Hessian\n'); 22 | end 23 | tau = max(2*tau,beta/2); 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/precondDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondDiag(r,D) 2 | y = D.*r; -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/precondTriu.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U) 2 | y = U \ (U' \ r); -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/precondTriuDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U,D) 2 | y = U \ (D .* (U' \ r)); -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/rosenbrock.m: -------------------------------------------------------------------------------- 1 | function [f, df, ddf, dddf] = rosenbrock(x); 2 | 3 | % rosenbrock.m This function returns the function value, partial derivatives 4 | % and Hessian of the (general dimension) rosenbrock function, given by: 5 | % 6 | % f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 7 | % 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1). 9 | % 10 | % Carl Edward Rasmussen, 2001-07-21. 11 | 12 | D = length(x); 13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2); 14 | 15 | if nargout > 1 16 | df = zeros(D, 1); 17 | df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1)); 18 | df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2); 19 | end 20 | 21 | if nargout > 2 22 | ddf = zeros(D,D); 23 | ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2); 24 | ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1); 25 | ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1); 26 | end 27 | 28 | if nargout > 3 29 | dddf = zeros(D,D,D); 30 | for d = 1:D 31 | if d > 1 32 | dddf(d,d-1,d-1) = -400; 33 | end 34 | if d < D 35 | dddf(d,d+1,d) = -400; 36 | dddf(d,d,d+1) = -400; 37 | dddf(d,d,d) = 2400*x(d); 38 | end 39 | end 40 | end -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/minFunc/taylorModel.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = taylorModel(d,f,g,H,T) 2 | 3 | p = length(d); 4 | 5 | fd3 = 0; 6 | gd2 = zeros(p,1); 7 | Hd = zeros(p); 8 | for t1 = 1:p 9 | for t2 = 1:p 10 | for t3 = 1:p 11 | fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3); 12 | 13 | if nargout > 1 14 | gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2); 15 | end 16 | 17 | if nargout > 2 18 | Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1); 19 | end 20 | end 21 | 22 | end 23 | end 24 | 25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3; 26 | 27 | if nargout > 1 28 | g = g + H*d + (1/2)*gd2; 29 | end 30 | 31 | if nargout > 2 32 | H = H + Hd; 33 | end 34 | 35 | if any(abs(d) > 1e5) 36 | % We want the optimizer to stop if the solution is unbounded 37 | g = zeros(p,1); 38 | end -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/softmaxCost.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/softmaxCost.m -------------------------------------------------------------------------------- /Exercise9 Convolution and Pooling/softmaxPredict.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkyang/UFLDL-Tutorial-Exercise/ecd413a2c9c2d1e5e5e55422eb5de71891732b10/Exercise9 Convolution and Pooling/softmaxPredict.m -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #简介# 2 | 3 | 1. 这个repo包括Andrew Ng深度学习教程([ufldl tutorial][1])的习题。 4 | 5 | 2. 由于这个教程的很多练习都依赖前面已完成的部分,所以你可以看到不同的练习包含了重复的文件,为了完整性我就没有把这些重复的文件删除。 6 | 7 | 3. sparse coding部分未完成,ICA的练习还没有做,此外代码还没优化,尤其是前面的练习, 先上传了再说- -。 8 | 9 | 4. 这个[blog][2]的作者不仅完成了练习,还加上了自己的理解和注释,是很好的参考。 10 | 11 | #To Do# 12 | 13 | 1. 去除冗余的文件 14 | 15 | 2. 注释 16 | 17 | 3. 剩余的两个练习 18 | 19 | 4. 代码优化 20 | 21 | [1]: http://ufldl.stanford.edu/wiki/index.php/UFLDL_Tutorial 22 | [2]: http://www.cnblogs.com/tornadomeet/category/361811.html 23 | 24 | --------------------------------------------------------------------------------