├── README ├── README.docx ├── calc_AE_batch.m ├── calc_BP_batch.m ├── check_AE.m ├── check_BP.m ├── check_grad.m ├── dataSet ├── t10k-images.idx3-ubyte ├── t10k-labels.idx1-ubyte ├── train-images.idx3-ubyte └── train-labels.idx1-ubyte ├── display_network.m ├── get_BPNN_option.m ├── get_SAE_option.m ├── get_accuracy_rate.m ├── init_parameters.m ├── load_MNIST_data.m ├── main.m ├── minFunc ├── ArmijoBacktrack.m ├── WolfeLineSearch.m ├── autoGrad.m ├── autoHess.m ├── autoHv.m ├── autoTensor.m ├── callOutput.m ├── conjGrad.m ├── dampedUpdate.m ├── example_minFunc.m ├── example_minFunc_LR.m ├── isLegal.m ├── lbfgs.m ├── lbfgsC.c ├── lbfgsC.mexa64 ├── lbfgsC.mexglx ├── lbfgsC.mexmac ├── lbfgsC.mexmaci ├── lbfgsC.mexmaci64 ├── lbfgsC.mexw32 ├── lbfgsC.mexw64 ├── lbfgsUpdate.m ├── logistic │ ├── LogisticDiagPrecond.m │ ├── LogisticHv.m │ ├── LogisticLoss.m │ ├── mexutil.c │ ├── mexutil.h │ ├── mylogsumexp.m │ ├── repmatC.c │ ├── repmatC.dll │ ├── repmatC.mexglx │ └── repmatC.mexmac ├── mchol.m ├── mcholC.c ├── mcholC.mexmaci64 ├── mcholC.mexw32 ├── mcholC.mexw64 ├── mcholinc.m ├── minFunc.m ├── minFunc_processInputOptions.m ├── polyinterp.m ├── precondDiag.m ├── precondTriu.m ├── precondTriuDiag.m ├── rosenbrock.m └── taylorModel.m ├── predict_NN.m ├── run_SAE_once.m ├── test.m ├── train_AE.m ├── train_BPNN.m ├── train_SAE.m ├── ~$README.docx └── 图 ├── 3-layers-NN.vsdx ├── AE-BP.jpg ├── AE-BP.vsdx ├── AE-FF.jpg ├── AE-FF.vsdx └── AE.vsdx /README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/README -------------------------------------------------------------------------------- /README.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/README.docx -------------------------------------------------------------------------------- /calc_AE_batch.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/calc_AE_batch.m -------------------------------------------------------------------------------- /calc_BP_batch.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/calc_BP_batch.m -------------------------------------------------------------------------------- /check_AE.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/check_AE.m -------------------------------------------------------------------------------- /check_BP.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/check_BP.m -------------------------------------------------------------------------------- /check_grad.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/check_grad.m -------------------------------------------------------------------------------- /dataSet/t10k-images.idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/dataSet/t10k-images.idx3-ubyte -------------------------------------------------------------------------------- /dataSet/t10k-labels.idx1-ubyte: -------------------------------------------------------------------------------- 1 | '                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             -------------------------------------------------------------------------------- /dataSet/train-images.idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/dataSet/train-images.idx3-ubyte -------------------------------------------------------------------------------- /dataSet/train-labels.idx1-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/dataSet/train-labels.idx1-ubyte -------------------------------------------------------------------------------- /display_network.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/display_network.m -------------------------------------------------------------------------------- /get_BPNN_option.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/get_BPNN_option.m -------------------------------------------------------------------------------- /get_SAE_option.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/get_SAE_option.m -------------------------------------------------------------------------------- /get_accuracy_rate.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/get_accuracy_rate.m -------------------------------------------------------------------------------- /init_parameters.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/init_parameters.m -------------------------------------------------------------------------------- /load_MNIST_data.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/load_MNIST_data.m -------------------------------------------------------------------------------- /main.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/main.m -------------------------------------------------------------------------------- /minFunc/ArmijoBacktrack.m: -------------------------------------------------------------------------------- 1 | function [t,x_new,f_new,g_new,funEvals,H] = ArmijoBacktrack(... 2 | x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,saveHessianComp,funObj,varargin) 3 | % 4 | % Backtracking linesearch to satisfy Armijo condition 5 | % 6 | % Inputs: 7 | % x: starting location 8 | % t: initial step size 9 | % d: descent direction 10 | % f: function value at starting location 11 | % fr: reference function value (usually funObj(x)) 12 | % gtd: directional derivative at starting location 13 | % c1: sufficient decrease parameter 14 | % debug: display debugging information 15 | % LS: type of interpolation 16 | % tolX: minimum allowable step length 17 | % doPlot: do a graphical display of interpolation 18 | % funObj: objective function 19 | % varargin: parameters of objective function 20 | % 21 | % Outputs: 22 | % t: step length 23 | % f_new: function value at x+t*d 24 | % g_new: gradient value at x+t*d 25 | % funEvals: number function evaluations performed by line search 26 | % H: Hessian at initial guess (only computed if requested 27 | 28 | % Evaluate the Objective and Gradient at the Initial Step 29 | if nargout == 6 30 | [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 31 | else 32 | [f_new,g_new] = feval(funObj, x + t*d, varargin{:}); 33 | end 34 | funEvals = 1; 35 | 36 | while f_new > fr + c1*t*gtd || ~isLegal(f_new) 37 | 38 | temp = t; 39 | if LS == 0 || ~isLegal(f_new) 40 | % Backtrack w/ fixed backtracking rate 41 | if debug 42 | fprintf('Fixed BT\n'); 43 | end 44 | t = 0.5*t; 45 | elseif LS == 2 && isLegal(g_new) 46 | % Backtracking w/ cubic interpolation w/ derivative 47 | if debug 48 | fprintf('Grad-Cubic BT\n'); 49 | end 50 | t = polyinterp([0 f gtd; t f_new g_new'*d],doPlot); 51 | elseif funEvals < 2 || ~isLegal(f_prev) 52 | % Backtracking w/ quadratic interpolation (no derivative at new point) 53 | if debug 54 | fprintf('Quad BT\n'); 55 | end 56 | t = polyinterp([0 f gtd; t f_new sqrt(-1)],doPlot); 57 | else%if LS == 1 58 | % Backtracking w/ cubic interpolation (no derivatives at new points) 59 | if debug 60 | fprintf('Cubic BT\n'); 61 | end 62 | t = polyinterp([0 f gtd; t f_new sqrt(-1); t_prev f_prev sqrt(-1)],doPlot); 63 | end 64 | 65 | % Adjust if change in t is too small/large 66 | 67 | if t < temp*1e-3 68 | if debug 69 | fprintf('Interpolated Value Too Small, Adjusting\n'); 70 | end 71 | t = temp*1e-3; 72 | elseif t > temp*0.6 73 | if debug 74 | fprintf('Interpolated Value Too Large, Adjusting\n'); 75 | end 76 | t = temp*0.6; 77 | end 78 | 79 | f_prev = f_new; 80 | t_prev = temp; 81 | if ~saveHessianComp && nargout == 6 82 | [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 83 | else 84 | [f_new,g_new] = feval(funObj, x + t*d, varargin{:}); 85 | end 86 | funEvals = funEvals+1; 87 | 88 | % Check whether step size has become too small 89 | if sum(abs(t*d)) <= tolX 90 | if debug 91 | fprintf('Backtracking Line Search Failed\n'); 92 | end 93 | t = 0; 94 | f_new = f; 95 | g_new = g; 96 | break; 97 | end 98 | end 99 | 100 | % Evaluate Hessian at new point 101 | if nargout == 6 && funEvals > 1 && saveHessianComp 102 | [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 103 | funEvals = funEvals+1; 104 | end 105 | 106 | x_new = x + t*d; 107 | 108 | end 109 | -------------------------------------------------------------------------------- /minFunc/WolfeLineSearch.m: -------------------------------------------------------------------------------- 1 | function [t,f_new,g_new,funEvals,H] = WolfeLineSearch(... 2 | x,t,d,f,g,gtd,c1,c2,LS,maxLS,tolX,debug,doPlot,saveHessianComp,funObj,varargin) 3 | % 4 | % Bracketing Line Search to Satisfy Wolfe Conditions 5 | % 6 | % Inputs: 7 | % x: starting location 8 | % t: initial step size 9 | % d: descent direction 10 | % f: function value at starting location 11 | % g: gradient at starting location 12 | % gtd: directional derivative at starting location 13 | % c1: sufficient decrease parameter 14 | % c2: curvature parameter 15 | % debug: display debugging information 16 | % LS: type of interpolation 17 | % maxLS: maximum number of iterations 18 | % tolX: minimum allowable step length 19 | % doPlot: do a graphical display of interpolation 20 | % funObj: objective function 21 | % varargin: parameters of objective function 22 | % 23 | % Outputs: 24 | % t: step length 25 | % f_new: function value at x+t*d 26 | % g_new: gradient value at x+t*d 27 | % funEvals: number function evaluations performed by line search 28 | % H: Hessian at initial guess (only computed if requested 29 | 30 | % Evaluate the Objective and Gradient at the Initial Step 31 | if nargout == 5 32 | [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 33 | else 34 | [f_new,g_new] = feval(funObj, x + t*d, varargin{:}); 35 | end 36 | funEvals = 1; 37 | gtd_new = g_new'*d; 38 | 39 | % Bracket an Interval containing a point satisfying the 40 | % Wolfe criteria 41 | 42 | LSiter = 0; 43 | t_prev = 0; 44 | f_prev = f; 45 | g_prev = g; 46 | gtd_prev = gtd; 47 | done = 0; 48 | 49 | while LSiter < maxLS 50 | 51 | %% Bracketing Phase 52 | if ~isLegal(f_new) || ~isLegal(g_new) 53 | if 0 54 | if debug 55 | fprintf('Extrapolated into illegal region, Bisecting\n'); 56 | end 57 | t = (t + t_prev)/2; 58 | if ~saveHessianComp && nargout == 5 59 | [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 60 | else 61 | [f_new,g_new] = feval(funObj, x + t*d, varargin{:}); 62 | end 63 | funEvals = funEvals + 1; 64 | gtd_new = g_new'*d; 65 | LSiter = LSiter+1; 66 | continue; 67 | else 68 | if debug 69 | fprintf('Extrapolated into illegal region, switching to Armijo line-search\n'); 70 | end 71 | t = (t + t_prev)/2; 72 | % Do Armijo 73 | if nargout == 5 74 | [t,x_new,f_new,g_new,armijoFunEvals,H] = ArmijoBacktrack(... 75 | x,t,d,f,f,g,gtd,c1,max(0,min(LS-2,2)),tolX,debug,doPlot,saveHessianComp,... 76 | funObj,varargin{:}); 77 | else 78 | [t,x_new,f_new,g_new,armijoFunEvals] = ArmijoBacktrack(... 79 | x,t,d,f,f,g,gtd,c1,max(0,min(LS-2,2)),tolX,debug,doPlot,saveHessianComp,... 80 | funObj,varargin{:}); 81 | end 82 | funEvals = funEvals + armijoFunEvals; 83 | return; 84 | end 85 | end 86 | 87 | 88 | if f_new > f + c1*t*gtd || (LSiter > 1 && f_new >= f_prev) 89 | bracket = [t_prev t]; 90 | bracketFval = [f_prev f_new]; 91 | bracketGval = [g_prev g_new]; 92 | break; 93 | elseif abs(gtd_new) <= -c2*gtd 94 | bracket = t; 95 | bracketFval = f_new; 96 | bracketGval = g_new; 97 | done = 1; 98 | break; 99 | elseif gtd_new >= 0 100 | bracket = [t_prev t]; 101 | bracketFval = [f_prev f_new]; 102 | bracketGval = [g_prev g_new]; 103 | break; 104 | end 105 | temp = t_prev; 106 | t_prev = t; 107 | minStep = t + 0.01*(t-temp); 108 | maxStep = t*10; 109 | if LS == 3 110 | if debug 111 | fprintf('Extending Braket\n'); 112 | end 113 | t = maxStep; 114 | elseif LS ==4 115 | if debug 116 | fprintf('Cubic Extrapolation\n'); 117 | end 118 | t = polyinterp([temp f_prev gtd_prev; t f_new gtd_new],doPlot,minStep,maxStep); 119 | else 120 | t = mixedExtrap(temp,f_prev,gtd_prev,t,f_new,gtd_new,minStep,maxStep,debug,doPlot); 121 | end 122 | 123 | f_prev = f_new; 124 | g_prev = g_new; 125 | gtd_prev = gtd_new; 126 | if ~saveHessianComp && nargout == 5 127 | [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 128 | else 129 | [f_new,g_new] = feval(funObj, x + t*d, varargin{:}); 130 | end 131 | funEvals = funEvals + 1; 132 | gtd_new = g_new'*d; 133 | LSiter = LSiter+1; 134 | end 135 | 136 | if LSiter == maxLS 137 | bracket = [0 t]; 138 | bracketFval = [f f_new]; 139 | bracketGval = [g g_new]; 140 | end 141 | 142 | %% Zoom Phase 143 | 144 | % We now either have a point satisfying the criteria, or a bracket 145 | % surrounding a point satisfying the criteria 146 | % Refine the bracket until we find a point satisfying the criteria 147 | insufProgress = 0; 148 | Tpos = 2; 149 | LOposRemoved = 0; 150 | while ~done && LSiter < maxLS 151 | 152 | % Find High and Low Points in bracket 153 | [f_LO LOpos] = min(bracketFval); 154 | HIpos = -LOpos + 3; 155 | 156 | % Compute new trial value 157 | if LS == 3 || ~isLegal(bracketFval) || ~isLegal(bracketGval) 158 | if debug 159 | fprintf('Bisecting\n'); 160 | end 161 | t = mean(bracket); 162 | elseif LS == 4 163 | if debug 164 | fprintf('Grad-Cubic Interpolation\n'); 165 | end 166 | t = polyinterp([bracket(1) bracketFval(1) bracketGval(:,1)'*d 167 | bracket(2) bracketFval(2) bracketGval(:,2)'*d],doPlot); 168 | else 169 | % Mixed Case %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 170 | nonTpos = -Tpos+3; 171 | if LOposRemoved == 0 172 | oldLOval = bracket(nonTpos); 173 | oldLOFval = bracketFval(nonTpos); 174 | oldLOGval = bracketGval(:,nonTpos); 175 | end 176 | t = mixedInterp(bracket,bracketFval,bracketGval,d,Tpos,oldLOval,oldLOFval,oldLOGval,debug,doPlot); 177 | end 178 | 179 | 180 | % Test that we are making sufficient progress 181 | if min(max(bracket)-t,t-min(bracket))/(max(bracket)-min(bracket)) < 0.1 182 | if debug 183 | fprintf('Interpolation close to boundary'); 184 | end 185 | if insufProgress || t>=max(bracket) || t <= min(bracket) 186 | if debug 187 | fprintf(', Evaluating at 0.1 away from boundary\n'); 188 | end 189 | if abs(t-max(bracket)) < abs(t-min(bracket)) 190 | t = max(bracket)-0.1*(max(bracket)-min(bracket)); 191 | else 192 | t = min(bracket)+0.1*(max(bracket)-min(bracket)); 193 | end 194 | insufProgress = 0; 195 | else 196 | if debug 197 | fprintf('\n'); 198 | end 199 | insufProgress = 1; 200 | end 201 | else 202 | insufProgress = 0; 203 | end 204 | 205 | % Evaluate new point 206 | if ~saveHessianComp && nargout == 5 207 | [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 208 | else 209 | [f_new,g_new] = feval(funObj, x + t*d, varargin{:}); 210 | end 211 | funEvals = funEvals + 1; 212 | gtd_new = g_new'*d; 213 | LSiter = LSiter+1; 214 | 215 | if f_new > f + c1*t*gtd || f_new >= f_LO 216 | % Armijo condition not satisfied or not lower than lowest 217 | % point 218 | bracket(HIpos) = t; 219 | bracketFval(HIpos) = f_new; 220 | bracketGval(:,HIpos) = g_new; 221 | Tpos = HIpos; 222 | else 223 | if abs(gtd_new) <= - c2*gtd 224 | % Wolfe conditions satisfied 225 | done = 1; 226 | elseif gtd_new*(bracket(HIpos)-bracket(LOpos)) >= 0 227 | % Old HI becomes new LO 228 | bracket(HIpos) = bracket(LOpos); 229 | bracketFval(HIpos) = bracketFval(LOpos); 230 | bracketGval(:,HIpos) = bracketGval(:,LOpos); 231 | if LS == 5 232 | if debug 233 | fprintf('LO Pos is being removed!\n'); 234 | end 235 | LOposRemoved = 1; 236 | oldLOval = bracket(LOpos); 237 | oldLOFval = bracketFval(LOpos); 238 | oldLOGval = bracketGval(:,LOpos); 239 | end 240 | end 241 | % New point becomes new LO 242 | bracket(LOpos) = t; 243 | bracketFval(LOpos) = f_new; 244 | bracketGval(:,LOpos) = g_new; 245 | Tpos = LOpos; 246 | end 247 | 248 | if ~done && abs((bracket(1)-bracket(2))*gtd_new) < tolX 249 | if debug 250 | fprintf('Line Search can not make further progress\n'); 251 | end 252 | break; 253 | end 254 | 255 | end 256 | 257 | %% 258 | if LSiter == maxLS 259 | if debug 260 | fprintf('Line Search Exceeded Maximum Line Search Iterations\n'); 261 | end 262 | end 263 | 264 | [f_LO LOpos] = min(bracketFval); 265 | t = bracket(LOpos); 266 | f_new = bracketFval(LOpos); 267 | g_new = bracketGval(:,LOpos); 268 | 269 | 270 | 271 | % Evaluate Hessian at new point 272 | if nargout == 5 && funEvals > 1 && saveHessianComp 273 | [f_new,g_new,H] = feval(funObj, x + t*d, varargin{:}); 274 | funEvals = funEvals + 1; 275 | end 276 | 277 | end 278 | 279 | 280 | %% 281 | function [t] = mixedExtrap(x0,f0,g0,x1,f1,g1,minStep,maxStep,debug,doPlot); 282 | alpha_c = polyinterp([x0 f0 g0; x1 f1 g1],doPlot,minStep,maxStep); 283 | alpha_s = polyinterp([x0 f0 g0; x1 sqrt(-1) g1],doPlot,minStep,maxStep); 284 | if alpha_c > minStep && abs(alpha_c - x1) < abs(alpha_s - x1) 285 | if debug 286 | fprintf('Cubic Extrapolation\n'); 287 | end 288 | t = alpha_c; 289 | else 290 | if debug 291 | fprintf('Secant Extrapolation\n'); 292 | end 293 | t = alpha_s; 294 | end 295 | end 296 | 297 | %% 298 | function [t] = mixedInterp(bracket,bracketFval,bracketGval,d,Tpos,oldLOval,oldLOFval,oldLOGval,debug,doPlot); 299 | 300 | % Mixed Case %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 301 | nonTpos = -Tpos+3; 302 | 303 | gtdT = bracketGval(:,Tpos)'*d; 304 | gtdNonT = bracketGval(:,nonTpos)'*d; 305 | oldLOgtd = oldLOGval'*d; 306 | if bracketFval(Tpos) > oldLOFval 307 | alpha_c = polyinterp([oldLOval oldLOFval oldLOgtd 308 | bracket(Tpos) bracketFval(Tpos) gtdT],doPlot); 309 | alpha_q = polyinterp([oldLOval oldLOFval oldLOgtd 310 | bracket(Tpos) bracketFval(Tpos) sqrt(-1)],doPlot); 311 | if abs(alpha_c - oldLOval) < abs(alpha_q - oldLOval) 312 | if debug 313 | fprintf('Cubic Interpolation\n'); 314 | end 315 | t = alpha_c; 316 | else 317 | if debug 318 | fprintf('Mixed Quad/Cubic Interpolation\n'); 319 | end 320 | t = (alpha_q + alpha_c)/2; 321 | end 322 | elseif gtdT'*oldLOgtd < 0 323 | alpha_c = polyinterp([oldLOval oldLOFval oldLOgtd 324 | bracket(Tpos) bracketFval(Tpos) gtdT],doPlot); 325 | alpha_s = polyinterp([oldLOval oldLOFval oldLOgtd 326 | bracket(Tpos) sqrt(-1) gtdT],doPlot); 327 | if abs(alpha_c - bracket(Tpos)) >= abs(alpha_s - bracket(Tpos)) 328 | if debug 329 | fprintf('Cubic Interpolation\n'); 330 | end 331 | t = alpha_c; 332 | else 333 | if debug 334 | fprintf('Quad Interpolation\n'); 335 | end 336 | t = alpha_s; 337 | end 338 | elseif abs(gtdT) <= abs(oldLOgtd) 339 | alpha_c = polyinterp([oldLOval oldLOFval oldLOgtd 340 | bracket(Tpos) bracketFval(Tpos) gtdT],... 341 | doPlot,min(bracket),max(bracket)); 342 | alpha_s = polyinterp([oldLOval sqrt(-1) oldLOgtd 343 | bracket(Tpos) bracketFval(Tpos) gtdT],... 344 | doPlot,min(bracket),max(bracket)); 345 | if alpha_c > min(bracket) && alpha_c < max(bracket) 346 | if abs(alpha_c - bracket(Tpos)) < abs(alpha_s - bracket(Tpos)) 347 | if debug 348 | fprintf('Bounded Cubic Extrapolation\n'); 349 | end 350 | t = alpha_c; 351 | else 352 | if debug 353 | fprintf('Bounded Secant Extrapolation\n'); 354 | end 355 | t = alpha_s; 356 | end 357 | else 358 | if debug 359 | fprintf('Bounded Secant Extrapolation\n'); 360 | end 361 | t = alpha_s; 362 | end 363 | 364 | if bracket(Tpos) > oldLOval 365 | t = min(bracket(Tpos) + 0.66*(bracket(nonTpos) - bracket(Tpos)),t); 366 | else 367 | t = max(bracket(Tpos) + 0.66*(bracket(nonTpos) - bracket(Tpos)),t); 368 | end 369 | else 370 | t = polyinterp([bracket(nonTpos) bracketFval(nonTpos) gtdNonT 371 | bracket(Tpos) bracketFval(Tpos) gtdT],doPlot); 372 | end 373 | end -------------------------------------------------------------------------------- /minFunc/autoGrad.m: -------------------------------------------------------------------------------- 1 | function [f,g] = autoGrad(x,useComplex,funObj,varargin) % [f,g] = autoGrad(x,useComplex,funObj,varargin) % % Numerically compute gradient of objective function from function values p = length(x); mu = 1e-150; if useComplex % Use Complex Differentials diff = zeros(p,1); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(diff)); g = imag(diff)/mu; else % Use Finite Differencing f = funObj(x,varargin{:}); mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; diff(j,1) = funObj(x + mu*e_j,varargin{:}); end g = (diff-f)/mu; end if 0 % DEBUG CODE [fReal gReal] = funObj(x,varargin{:}); [fReal f] [gReal g] pause; end -------------------------------------------------------------------------------- /minFunc/autoHess.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = autoHess(x,useComplex,funObj,varargin) % Numerically compute Hessian of objective function from gradient values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) diff(:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(diff),2); H = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g] = funObj(x,varargin{:}); diff = zeros(p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f diff(:,j)] = funObj(x + mu*e_j,varargin{:}); end H = (diff-repmat(g,[1 p]))/mu; end % Make sure H is symmetric H = (H+H')/2; if 0 % DEBUG CODE [fReal gReal HReal] = funObj(x,varargin{:}); [fReal f] [gReal g] [HReal H] pause; end -------------------------------------------------------------------------------- /minFunc/autoHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = autoHv(v,x,g,useComplex,funObj,varargin) 2 | % Numerically compute Hessian-vector product H*v of funObj(x,varargin{:}) 3 | % based on gradient values 4 | 5 | if useComplex 6 | mu = 1e-150i; 7 | else 8 | mu = 2*sqrt(1e-12)*(1+norm(x))/norm(v); 9 | end 10 | [f,finDif] = funObj(x + v*mu,varargin{:}); 11 | Hv = (finDif-g)/mu; -------------------------------------------------------------------------------- /minFunc/autoTensor.m: -------------------------------------------------------------------------------- 1 | function [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % [f,g,H,T] = autoTensor(x,useComplex,funObj,varargin) % Numerically compute Tensor of 3rd-derivatives of objective function from Hessian values p = length(x); if useComplex % Use Complex Differentials mu = 1e-150; diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [f(j) g(:,j) diff(:,:,j)] = funObj(x + mu*i*e_j,varargin{:}); end f = mean(real(f)); g = mean(real(g),2); H = mean(real(diff),3); T = imag(diff)/mu; else % Use finite differencing mu = 2*sqrt(1e-12)*(1+norm(x))/norm(p); [f,g,H] = funObj(x,varargin{:}); diff = zeros(p,p,p); for j = 1:p e_j = zeros(p,1); e_j(j) = 1; [junk1 junk2 diff(:,:,j)] = funObj(x + mu*e_j,varargin{:}); end T = (diff-repmat(H,[1 1 p]))/mu; end -------------------------------------------------------------------------------- /minFunc/callOutput.m: -------------------------------------------------------------------------------- 1 | function [] = callOutput(outputFcn,x,state,i,funEvals,f,t,gtd,g,d,opt,varargin) 2 | 3 | optimValues.iteration = i; 4 | optimValues.funccount = funEvals; 5 | optimValues.fval = f; 6 | optimValues.stepsize = t; 7 | optimValues.directionalderivative = gtd; 8 | optimValues.gradient = g; 9 | optimValues.searchdirection = d; 10 | optimValues.firstorderopt = opt; 11 | 12 | feval(outputFcn, x,optimValues,state,varargin{:}); -------------------------------------------------------------------------------- /minFunc/conjGrad.m: -------------------------------------------------------------------------------- 1 | function [x,k,res,negCurv] = cg(A,b,optTol,maxIter,verbose,precFunc,precArgs,matrixVectFunc,matrixVectArgs) 2 | % [x,k,res,negCurv] = 3 | % cg(A,b,optTol,maxIter,verbose,precFunc,precArgs,matrixVectFunc,matrixVect 4 | % Args) 5 | % Linear Conjugate Gradient, where optionally we use 6 | % - preconditioner on vector v with precFunc(v,precArgs{:}) 7 | % - matrix multipled by vector with matrixVectFunc(v,matrixVectArgs{:}) 8 | 9 | x = zeros(size(b)); 10 | r = -b; 11 | 12 | % Apply preconditioner (if supplied) 13 | if nargin >= 7 && ~isempty(precFunc) 14 | y = precFunc(r,precArgs{:}); 15 | else 16 | y = r; 17 | end 18 | 19 | ry = r'*y; 20 | p = -y; 21 | k = 0; 22 | 23 | res = norm(r); 24 | done = 0; 25 | negCurv = []; 26 | while res > optTol & k < maxIter & ~done 27 | % Compute Matrix-vector product 28 | if nargin >= 9 29 | Ap = matrixVectFunc(p,matrixVectArgs{:}); 30 | else 31 | Ap = A*p; 32 | end 33 | pAp = p'*Ap; 34 | 35 | % Check for negative Curvature 36 | if pAp <= 1e-16 37 | if verbose 38 | fprintf('Negative Curvature Detected!\n'); 39 | end 40 | 41 | if nargout == 4 42 | if pAp < 0 43 | negCurv = p; 44 | return 45 | end 46 | end 47 | 48 | if k == 0 49 | if verbose 50 | fprintf('First-Iter, Proceeding...\n'); 51 | end 52 | done = 1; 53 | else 54 | if verbose 55 | fprintf('Stopping\n'); 56 | end 57 | break; 58 | end 59 | end 60 | 61 | % Conjugate Gradient 62 | alpha = ry/(pAp); 63 | x = x + alpha*p; 64 | r = r + alpha*Ap; 65 | 66 | % If supplied, apply preconditioner 67 | if nargin >= 7 && ~isempty(precFunc) 68 | y = precFunc(r,precArgs{:}); 69 | else 70 | y = r; 71 | end 72 | 73 | ry_new = r'*y; 74 | beta = ry_new/ry; 75 | p = -y + beta*p; 76 | k = k + 1; 77 | 78 | % Update variables 79 | ry = ry_new; 80 | res = norm(r); 81 | end 82 | end 83 | -------------------------------------------------------------------------------- /minFunc/dampedUpdate.m: -------------------------------------------------------------------------------- 1 | function [old_dirs,old_stps,Hdiag,Bcompact] = lbfgsUpdate(y,s,corrections,debug,old_dirs,old_stps,Hdiag) 2 | 3 | %B0 = eye(length(y))/Hdiag; 4 | S = old_dirs(:,2:end); 5 | Y = old_stps(:,2:end); 6 | k = size(Y,2); 7 | L = zeros(k); 8 | for j = 1:k 9 | for i = j+1:k 10 | L(i,j) = S(:,i)'*Y(:,j); 11 | end 12 | end 13 | D = diag(diag(S'*Y)); 14 | N = [S/Hdiag Y]; 15 | M = [S'*S/Hdiag L;L' -D]; 16 | 17 | ys = y'*s; 18 | Bs = s/Hdiag - N*(M\(N'*s)); % Product B*s 19 | sBs = s'*Bs; 20 | 21 | eta = .02; 22 | if ys < eta*sBs 23 | if debug 24 | fprintf('Damped Update\n'); 25 | end 26 | theta = min(max(0,((1-eta)*sBs)/(sBs - ys)),1); 27 | y = theta*y + (1-theta)*Bs; 28 | end 29 | 30 | 31 | numCorrections = size(old_dirs,2); 32 | if numCorrections < corrections 33 | % Full Update 34 | old_dirs(:,numCorrections+1) = s; 35 | old_stps(:,numCorrections+1) = y; 36 | else 37 | % Limited-Memory Update 38 | old_dirs = [old_dirs(:,2:corrections) s]; 39 | old_stps = [old_stps(:,2:corrections) y]; 40 | end 41 | 42 | % Update scale of initial Hessian approximation 43 | Hdiag = (y'*s)/(y'*y); -------------------------------------------------------------------------------- /minFunc/example_minFunc.m: -------------------------------------------------------------------------------- 1 | % Runs various limited-memory solvers on 2D rosenbrock function for 25 2 | % function evaluations 3 | maxFunEvals = 25; 4 | 5 | fprintf('Result after %d evaluations of limited-memory solvers on 2D rosenbrock:\n',maxFunEvals); 6 | 7 | fprintf('---------------------------------------\n'); 8 | fprintf('x1 = %.4f, x2 = %.4f (starting point)\n',0,0); 9 | fprintf('x1 = %.4f, x2 = %.4f (optimal solution)\n',1,1); 10 | fprintf('---------------------------------------\n'); 11 | 12 | if exist('minimize') == 2 13 | % Minimize.m - conjugate gradient method 14 | x = minimize([0 0]', 'rosenbrock', -maxFunEvals); 15 | fprintf('x1 = %.4f, x2 = %.4f (minimize.m by C. Rasmussen)\n',x(1),x(2)); 16 | end 17 | 18 | options = []; 19 | options.display = 'none'; 20 | options.maxFunEvals = maxFunEvals; 21 | 22 | % Steepest Descent 23 | options.Method = 'sd'; 24 | x = minFunc(@rosenbrock,[0 0]',options); 25 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with steepest descent)\n',x(1),x(2)); 26 | 27 | % Cyclic Steepest Descent 28 | options.Method = 'csd'; 29 | x = minFunc(@rosenbrock,[0 0]',options); 30 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with cyclic steepest descent)\n',x(1),x(2)); 31 | 32 | % Barzilai & Borwein 33 | options.Method = 'bb'; 34 | options.bbType = 1; 35 | x = minFunc(@rosenbrock,[0 0]',options); 36 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with spectral gradient descent)\n',x(1),x(2)); 37 | 38 | % Hessian-Free Newton 39 | options.Method = 'newton0'; 40 | x = minFunc(@rosenbrock,[0 0]',options); 41 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with Hessian-free Newton)\n',x(1),x(2)); 42 | 43 | % Hessian-Free Newton w/ L-BFGS preconditioner 44 | options.Method = 'pnewton0'; 45 | x = minFunc(@rosenbrock,[0 0]',options); 46 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with preconditioned Hessian-free Newton)\n',x(1),x(2)); 47 | 48 | % Conjugate Gradient 49 | options.Method = 'cg'; 50 | x = minFunc(@rosenbrock,[0 0]',options); 51 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with conjugate gradient)\n',x(1),x(2)); 52 | 53 | % Scaled conjugate Gradient 54 | options.Method = 'scg'; 55 | x = minFunc(@rosenbrock,[0 0]',options); 56 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with scaled conjugate gradient)\n',x(1),x(2)); 57 | 58 | % Preconditioned Conjugate Gradient 59 | options.Method = 'pcg'; 60 | x = minFunc(@rosenbrock,[0 0]',options); 61 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with preconditioned conjugate gradient)\n',x(1),x(2)); 62 | 63 | % Default: L-BFGS (default) 64 | options.Method = 'lbfgs'; 65 | x = minFunc(@rosenbrock,[0 0]',options); 66 | fprintf('x1 = %.4f, x2 = %.4f (minFunc with limited-memory BFGS - default)\n',x(1),x(2)); 67 | 68 | fprintf('---------------------------------------\n'); 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /minFunc/example_minFunc_LR.m: -------------------------------------------------------------------------------- 1 | clear all 2 | 3 | nInst = 500; 4 | nVars = 100; 5 | X = [ones(nInst,1) randn(nInst,nVars-1)]; 6 | w = randn(nVars,1); 7 | y = sign(X*w); 8 | flipInd = rand(nInst,1) > .9; 9 | y(flipInd) = -y(flipInd); 10 | 11 | w_init = zeros(nVars,1); 12 | funObj = @(w)LogisticLoss(w,X,y); 13 | 14 | fprintf('Running Hessian-Free Newton w/ numerical Hessian-Vector products\n'); 15 | options.Method = 'newton0'; 16 | minFunc(@LogisticLoss,w_init,options,X,y); 17 | pause; 18 | 19 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (Diagonal preconditioner)\n'); 20 | options.Method = 'pnewton0'; 21 | options.precFunc = @LogisticDiagPrecond; 22 | minFunc(@LogisticLoss,w_init,options,X,y); 23 | pause; 24 | 25 | fprintf('Running Preconditioned Hessian-Free Newton w/ numerical Hessian-Vector products (L-BFGS preconditioner)\n'); 26 | options.Method = 'pnewton0'; 27 | options.precFunc = []; 28 | minFunc(@LogisticLoss,w_init,options,X,y); 29 | pause; 30 | 31 | fprintf('Running Hessian-Free Newton w/ analytic Hessian-Vector products\n'); 32 | options.Method = 'newton0'; 33 | options.HvFunc = @LogisticHv; 34 | minFunc(@LogisticLoss,w_init,options,X,y); 35 | pause; 36 | 37 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (Diagonal preconditioner)\n'); 38 | options.Method = 'pnewton0'; 39 | options.HvFunc = @LogisticHv; 40 | options.precFunc = @LogisticDiagPrecond; 41 | minFunc(@LogisticLoss,w_init,options,X,y); 42 | pause; 43 | 44 | fprintf('Running Preconditioned Hessian-Free Newton w/ analytic Hessian-Vector products (L-BFGS preconditioner)\n'); 45 | options.Method = 'pnewton0'; 46 | options.precFunc = []; 47 | options.HvFunc = @LogisticHv; 48 | minFunc(@LogisticLoss,w_init,options,X,y); 49 | pause; -------------------------------------------------------------------------------- /minFunc/isLegal.m: -------------------------------------------------------------------------------- 1 | function [legal] = isLegal(v) 2 | legal = sum(any(imag(v(:))))==0 & sum(isnan(v(:)))==0 & sum(isinf(v(:)))==0; -------------------------------------------------------------------------------- /minFunc/lbfgs.m: -------------------------------------------------------------------------------- 1 | function [d] = lbfgs(g,s,y,Hdiag) 2 | % BFGS Search Direction 3 | % 4 | % This function returns the (L-BFGS) approximate inverse Hessian, 5 | % multiplied by the gradient 6 | % 7 | % If you pass in all previous directions/sizes, it will be the same as full BFGS 8 | % If you truncate to the k most recent directions/sizes, it will be L-BFGS 9 | % 10 | % s - previous search directions (p by k) 11 | % y - previous step sizes (p by k) 12 | % g - gradient (p by 1) 13 | % Hdiag - value of initial Hessian diagonal elements (scalar) 14 | 15 | [p,k] = size(s); 16 | 17 | for i = 1:k 18 | ro(i,1) = 1/(y(:,i)'*s(:,i)); 19 | end 20 | 21 | q = zeros(p,k+1); 22 | r = zeros(p,k+1); 23 | al =zeros(k,1); 24 | be =zeros(k,1); 25 | 26 | q(:,k+1) = g; 27 | 28 | for i = k:-1:1 29 | al(i) = ro(i)*s(:,i)'*q(:,i+1); 30 | q(:,i) = q(:,i+1)-al(i)*y(:,i); 31 | end 32 | 33 | % Multiply by Initial Hessian 34 | r(:,1) = Hdiag*q(:,1); 35 | 36 | for i = 1:k 37 | be(i) = ro(i)*y(:,i)'*r(:,i); 38 | r(:,i+1) = r(:,i) + s(:,i)*(al(i)-be(i)); 39 | end 40 | d=r(:,k+1); -------------------------------------------------------------------------------- /minFunc/lbfgsC.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "mex.h" 3 | 4 | /* See lbfgs.m for details! */ 5 | /* This function may not exit gracefully on bad input! */ 6 | 7 | 8 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 9 | { 10 | /* Variable Declarations */ 11 | 12 | double *s, *y, *g, *H, *d, *ro, *alpha, *beta, *q, *r; 13 | int nVars,nSteps,lhs_dims[2]; 14 | double temp; 15 | int i,j; 16 | 17 | /* Get Input Pointers */ 18 | 19 | g = mxGetPr(prhs[0]); 20 | s = mxGetPr(prhs[1]); 21 | y = mxGetPr(prhs[2]); 22 | H = mxGetPr(prhs[3]); 23 | 24 | /* Compute number of variables (p), rank of update (d) */ 25 | 26 | nVars = mxGetDimensions(prhs[1])[0]; 27 | nSteps = mxGetDimensions(prhs[1])[1]; 28 | 29 | /* Allocated Memory for Function Variables */ 30 | ro = mxCalloc(nSteps,sizeof(double)); 31 | alpha = mxCalloc(nSteps,sizeof(double)); 32 | beta = mxCalloc(nSteps,sizeof(double)); 33 | q = mxCalloc(nVars*(nSteps+1),sizeof(double)); 34 | r = mxCalloc(nVars*(nSteps+1),sizeof(double)); 35 | 36 | /* Set-up Output Vector */ 37 | 38 | lhs_dims[0] = nVars; 39 | lhs_dims[1] = 1; 40 | 41 | plhs[0] = mxCreateNumericArray(2,lhs_dims,mxDOUBLE_CLASS,mxREAL); 42 | d = mxGetPr(plhs[0]); 43 | 44 | /* ro = 1/(y(:,i)'*s(:,i)) */ 45 | for(i=0;i=0;i--) 62 | { 63 | /* alpha(i) = ro(i)*s(:,i)'*q(:,i+1) */ 64 | alpha[i] = 0; 65 | for(j=0;j 1e-10 4 | numCorrections = size(old_dirs,2); 5 | if numCorrections < corrections 6 | % Full Update 7 | old_dirs(:,numCorrections+1) = s; 8 | old_stps(:,numCorrections+1) = y; 9 | else 10 | % Limited-Memory Update 11 | old_dirs = [old_dirs(:,2:corrections) s]; 12 | old_stps = [old_stps(:,2:corrections) y]; 13 | end 14 | 15 | % Update scale of initial Hessian approximation 16 | Hdiag = ys/(y'*y); 17 | else 18 | if debug 19 | fprintf('Skipping Update\n'); 20 | end 21 | end -------------------------------------------------------------------------------- /minFunc/logistic/LogisticDiagPrecond.m: -------------------------------------------------------------------------------- 1 | function [m] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will apply diagonal preconditioner to 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | 9 | % Compute diagonals of Hessian 10 | sig = sig.*(1-sig); 11 | for i = 1:length(w) 12 | h(i,1) = (sig.*X(:,i))'*X(:,i); 13 | end 14 | 15 | % Apply preconditioner 16 | m = v./h; 17 | 18 | % Exact preconditioner 19 | %H = X'*diag(sig.*(1-sig))*X; 20 | %m = H\v; 21 | -------------------------------------------------------------------------------- /minFunc/logistic/LogisticHv.m: -------------------------------------------------------------------------------- 1 | function [Hv] = LogisticHv(v,w,X,y) 2 | % v(feature,1) - vector that we will multiply Hessian by 3 | % w(feature,1) 4 | % X(instance,feature) 5 | % y(instance,1) 6 | 7 | sig = 1./(1+exp(-y.*(X*w))); 8 | Hv = X.'*(sig.*(1-sig).*(X*v)); 9 | -------------------------------------------------------------------------------- /minFunc/logistic/LogisticLoss.m: -------------------------------------------------------------------------------- 1 | function [nll,g,H,T] = LogisticLoss(w,X,y) 2 | % w(feature,1) 3 | % X(instance,feature) 4 | % y(instance,1) 5 | 6 | [n,p] = size(X); 7 | 8 | Xw = X*w; 9 | yXw = y.*Xw; 10 | 11 | nll = sum( ([zeros(n,1) -yXw])); 12 | 13 | if nargout > 1 14 | if nargout > 2 15 | sig = 1./(1+exp(-yXw)); 16 | g = -X.'*(y.*(1-sig)); 17 | else 18 | g = -X.'*(y./(1+exp(yXw))); 19 | end 20 | end 21 | 22 | if nargout > 2 23 | H = X.'*diag(sparse(sig.*(1-sig)))*X; 24 | end 25 | 26 | if nargout > 3 27 | T = zeros(p,p,p); 28 | for j1 = 1:p 29 | for j2 = 1:p 30 | for j3 = 1:p 31 | T(j1,j2,j3) = sum(y(:).^3.*X(:,j1).*X(:,j2).*X(:,j3).*sig.*(1-sig).*(1-2*sig)); 32 | end 33 | end 34 | end 35 | end -------------------------------------------------------------------------------- /minFunc/logistic/mexutil.c: -------------------------------------------------------------------------------- 1 | #include "mexutil.h" 2 | 3 | /* Functions to create uninitialized arrays. */ 4 | 5 | mxArray *mxCreateNumericArrayE(int ndim, const int *dims, 6 | mxClassID class, mxComplexity ComplexFlag) 7 | { 8 | mxArray *a; 9 | int i, *dims1 = mxMalloc(ndim*sizeof(int)); 10 | size_t sz = 1; 11 | for(i=0;i 9 | 10 | /* repeat a block of memory rep times */ 11 | void memrep(char *dest, size_t chunk, int rep) 12 | { 13 | #if 0 14 | /* slow way */ 15 | int i; 16 | char *p = dest; 17 | for(i=1;i>1); 31 | #endif 32 | } 33 | 34 | void repmat(char *dest, const char *src, int ndim, int *destdimsize, 35 | int *dimsize, const int *dims, int *rep) 36 | { 37 | int d = ndim-1; 38 | int i, chunk; 39 | /* copy the first repetition into dest */ 40 | if(d == 0) { 41 | chunk = dimsize[0]; 42 | memcpy(dest,src,chunk); 43 | } 44 | else { 45 | /* recursively repeat each slice of src */ 46 | for(i=0;i ndimdest) ndimdest = nrep; 91 | rep = mxCalloc(ndimdest, sizeof(int)); 92 | for(i=0;i ndimdest) ndimdest = nrep; 105 | rep = mxCalloc(ndimdest, sizeof(int)); 106 | for(i=0;i ndim) memrep(dest,destdimsize[ndim-1],extra_rep); 143 | if(mxIsComplex(srcmat)) { 144 | src = (char*)mxGetPi(srcmat); 145 | dest = (char*)mxGetPi(plhs[0]); 146 | repmat(dest,src,ndim,destdimsize,dimsize,dims,rep); 147 | if(ndimdest > ndim) memrep(dest,destdimsize[ndim-1],extra_rep); 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /minFunc/logistic/repmatC.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/minFunc/logistic/repmatC.dll -------------------------------------------------------------------------------- /minFunc/logistic/repmatC.mexglx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/minFunc/logistic/repmatC.mexglx -------------------------------------------------------------------------------- /minFunc/logistic/repmatC.mexmac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/minFunc/logistic/repmatC.mexmac -------------------------------------------------------------------------------- /minFunc/mchol.m: -------------------------------------------------------------------------------- 1 | function [l,d,perm] = mchol(A,mu) 2 | % Compute a modified LDL factorization of A 3 | % (MEX ME!) 4 | 5 | if nargin < 2 6 | mu = 1e-12; 7 | end 8 | 9 | n = size(A,1); 10 | l = eye(n); 11 | d = zeros(n,1); 12 | perm = 1:n; 13 | 14 | for i = 1:n 15 | c(i,i) = A(i,i); 16 | end 17 | 18 | % Compute modification parameters 19 | gamma = max(abs(diag(A))); 20 | xi = max(max(abs(setdiag(A,0)))); 21 | delta = mu*max(gamma+xi,1); 22 | if n > 1 23 | beta = sqrt(max([gamma xi/sqrt(n^2-1) mu])); 24 | else 25 | beta = sqrt(max([gamma mu])); 26 | end 27 | 28 | for j = 1:n 29 | 30 | % Find q that results in Best Permutation with j 31 | [maxVal maxPos] = max(abs(diag(c(j:end,j:end)))); 32 | q = maxPos+j-1; 33 | 34 | % Permute d,c,l,a 35 | d([j q]) = d([q j]); 36 | perm([j q]) = perm([q j]); 37 | c([j q],:) = c([q j],:); 38 | c(:,[j q]) = c(:,[q j]); 39 | l([j q],:) = l([q j],:); 40 | l(:,[j q]) = l(:,[q j]); 41 | A([j q],:) = A([q j],:); 42 | A(:,[j q]) = A(:,[q j]); 43 | 44 | for s = 1:j-1 45 | l(j,s) = c(j,s)/d(s); 46 | end 47 | for i = j+1:n 48 | c(i,j) = A(i,j) - sum(l(j,1:j-1).*c(i,1:j-1)); 49 | end 50 | theta = 0; 51 | if j < n && j > 1 52 | theta = max(abs(c(j+1:n,j))); 53 | end 54 | d(j) = max([abs(c(j,j)) (theta/beta)^2 delta]); 55 | if j < n 56 | for i = j+1:n 57 | c(i,i) = c(i,i) - (c(i,j)^2)/d(j); 58 | end 59 | end 60 | end -------------------------------------------------------------------------------- /minFunc/mcholC.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "mex.h" 3 | 4 | double mymax(double x, double y) 5 | { 6 | if (x > y) 7 | return x; 8 | else 9 | return y; 10 | } 11 | 12 | double absolute(double x) 13 | { 14 | if (x >= -x) 15 | return x; 16 | else 17 | return -x; 18 | } 19 | 20 | void permuteInt(int *x, int p, int q) 21 | { 22 | int temp; 23 | temp = x[p]; 24 | x[p] = x[q]; 25 | x[q] = temp; 26 | } 27 | 28 | void permute(double *x, int p, int q) 29 | { 30 | double temp; 31 | temp = x[p]; 32 | x[p] = x[q]; 33 | x[q] = temp; 34 | } 35 | 36 | void permuteRows(double *x, int p, int q,int n) 37 | { 38 | int i; 39 | double temp; 40 | for(i = 0; i < n; i++) 41 | { 42 | temp = x[p+i*n]; 43 | x[p+i*n] = x[q+i*n]; 44 | x[q+i*n] = temp; 45 | } 46 | } 47 | 48 | void permuteCols(double *x, int p, int q,int n) 49 | { 50 | int i; 51 | double temp; 52 | for(i = 0; i < n; i++) 53 | { 54 | temp = x[i+p*n]; 55 | x[i+p*n] = x[i+q*n]; 56 | x[i+q*n] = temp; 57 | } 58 | } 59 | 60 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 61 | { 62 | int n,sizL[2],sizD[2],i,j,q,s, 63 | *P; 64 | 65 | double mu,gamma,xi,delta,beta,maxVal,theta, 66 | *c, *H, *L, *D, *A; 67 | 68 | /* Input */ 69 | H = mxGetPr(prhs[0]); 70 | if (nrhs == 1) 71 | { 72 | mu = 1e-12; 73 | } 74 | else 75 | { 76 | mu = mxGetScalar(prhs[1]); 77 | } 78 | 79 | /* Compute Sizes */ 80 | n = mxGetDimensions(prhs[0])[0]; 81 | 82 | /* Form Output */ 83 | sizL[0] = n; 84 | sizL[1] = n; 85 | plhs[0] = mxCreateNumericArray(2,sizL,mxDOUBLE_CLASS,mxREAL); 86 | L = mxGetPr(plhs[0]); 87 | sizD[0] = n; 88 | sizD[1] = 1; 89 | plhs[1] = mxCreateNumericArray(2,sizD,mxDOUBLE_CLASS,mxREAL); 90 | D = mxGetPr(plhs[1]); 91 | plhs[2] = mxCreateNumericArray(2,sizD,mxINT32_CLASS,mxREAL); 92 | P = (int*)mxGetData(plhs[2]); 93 | 94 | /* Initialize */ 95 | c = mxCalloc(n*n,sizeof(double)); 96 | A = mxCalloc(n*n,sizeof(double)); 97 | 98 | for (i = 0; i < n; i++) 99 | { 100 | P[i] = i; 101 | for (j = 0;j < n; j++) 102 | { 103 | A[i+n*j] = H[i+n*j]; 104 | } 105 | } 106 | 107 | gamma = 0; 108 | for (i = 0; i < n; i++) 109 | { 110 | L[i+n*i] = 1; 111 | c[i+n*i] = A[i+n*i]; 112 | } 113 | 114 | /* Compute modification parameters */ 115 | gamma = -1; 116 | xi = -1; 117 | for (i = 0; i < n; i++) 118 | { 119 | gamma = mymax(gamma,absolute(A[i+n*i])); 120 | for (j = 0;j < n; j++) 121 | { 122 | //printf("A(%d,%d) = %f, %f\n",i,j,A[i+n*j],absolute(A[i+n*j])); 123 | if (i != j) 124 | xi = mymax(xi,absolute(A[i+n*j])); 125 | } 126 | } 127 | delta = mu*mymax(gamma+xi,1); 128 | 129 | if (n > 1) 130 | { 131 | beta = sqrt(mymax(gamma,mymax(mu,xi/sqrt(n*n-1)))); 132 | } 133 | else 134 | { 135 | beta = sqrt(mymax(gamma,mu)); 136 | } 137 | 138 | for (j = 0; j < n; j++) 139 | { 140 | 141 | /* Find q that results in Best Permutation with j */ 142 | maxVal = -1; 143 | q = 0; 144 | for(i = j; i < n; i++) 145 | { 146 | if (absolute(c[i+n*i]) > maxVal) 147 | { 148 | maxVal = mymax(maxVal,absolute(c[i+n*i])); 149 | q = i; 150 | } 151 | } 152 | 153 | /* Permute D,c,L,A,P */ 154 | permute(D,j,q); 155 | permuteInt(P,j,q); 156 | permuteRows(c,j,q,n); 157 | permuteCols(c,j,q,n); 158 | permuteRows(L,j,q,n); 159 | permuteCols(L,j,q,n); 160 | permuteRows(A,j,q,n); 161 | permuteCols(A,j,q,n); 162 | 163 | for(s = 0; s <= j-1; s++) 164 | L[j+n*s] = c[j+n*s]/D[s]; 165 | 166 | for(i = j+1; i < n; i++) 167 | { 168 | c[i+j*n] = A[i+j*n]; 169 | for(s = 0; s <= j-1; s++) 170 | { 171 | c[i+j*n] -= L[j+n*s]*c[i+n*s]; 172 | } 173 | } 174 | 175 | theta = 0; 176 | if (j < n-1) 177 | { 178 | for(i = j+1;i < n; i++) 179 | theta = mymax(theta,absolute(c[i+n*j])); 180 | } 181 | 182 | D[j] = mymax(absolute(c[j+n*j]),mymax(delta,theta*theta/(beta*beta))); 183 | 184 | if (j < n-1) 185 | { 186 | for(i = j+1; i < n; i++) 187 | { 188 | c[i+n*i] = c[i+n*i] - c[i+n*j]*c[i+n*j]/D[j]; 189 | } 190 | } 191 | 192 | } 193 | 194 | for(i = 0; i < n; i++) 195 | P[i]++; 196 | 197 | mxFree(c); 198 | mxFree(A); 199 | } -------------------------------------------------------------------------------- /minFunc/mcholC.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/minFunc/mcholC.mexmaci64 -------------------------------------------------------------------------------- /minFunc/mcholC.mexw32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/minFunc/mcholC.mexw32 -------------------------------------------------------------------------------- /minFunc/mcholC.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/minFunc/mcholC.mexw64 -------------------------------------------------------------------------------- /minFunc/mcholinc.m: -------------------------------------------------------------------------------- 1 | function [R,tau] = mcholinc(H,verbose) 2 | % Computes Cholesky of H+tau*I, for suitably large tau that matrix is pd 3 | 4 | p = size(H,1); 5 | 6 | beta = norm(H,'fro'); 7 | if min(diag(H)) > 1e-12 8 | tau = 0; 9 | else 10 | if verbose 11 | fprintf('Small Value on Diagonal, Adjusting Hessian\n'); 12 | end 13 | tau = max(beta/2,1e-12); 14 | end 15 | while 1 16 | [R,posDef] = chol(H+tau*eye(p)); 17 | if posDef == 0 18 | break; 19 | else 20 | if verbose 21 | fprintf('Cholesky Failed, Adjusting Hessian\n'); 22 | end 23 | tau = max(2*tau,beta/2); 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /minFunc/minFunc.m: -------------------------------------------------------------------------------- 1 | function [x,f,exitflag,output] = minFunc(funObj,x0,options,varargin) 2 | % minFunc(funObj,x0,options,varargin) 3 | % 4 | % Unconstrained optimizer using a line search strategy 5 | % 6 | % Uses an interface very similar to fminunc 7 | % (it doesn't support all of the optimization toolbox options, 8 | % but supports many other options). 9 | % 10 | % It computes descent directions using one of ('Method'): 11 | % - 'sd': Steepest Descent 12 | % (no previous information used, not recommended) 13 | % - 'csd': Cyclic Steepest Descent 14 | % (uses previous step length for a fixed length cycle) 15 | % - 'bb': Barzilai and Borwein Gradient 16 | % (uses only previous step) 17 | % - 'cg': Non-Linear Conjugate Gradient 18 | % (uses only previous step and a vector beta) 19 | % - 'scg': Scaled Non-Linear Conjugate Gradient 20 | % (uses previous step and a vector beta, 21 | % and Hessian-vector products to initialize line search) 22 | % - 'pcg': Preconditionined Non-Linear Conjugate Gradient 23 | % (uses only previous step and a vector beta, preconditioned version) 24 | % - 'lbfgs': Quasi-Newton with Limited-Memory BFGS Updating 25 | % (default: uses a predetermined nunber of previous steps to form a 26 | % low-rank Hessian approximation) 27 | % - 'newton0': Hessian-Free Newton 28 | % (numerically computes Hessian-Vector products) 29 | % - 'pnewton0': Preconditioned Hessian-Free Newton 30 | % (numerically computes Hessian-Vector products, preconditioned 31 | % version) 32 | % - 'qnewton': Quasi-Newton Hessian approximation 33 | % (uses dense Hessian approximation) 34 | % - 'mnewton': Newton's method with Hessian calculation after every 35 | % user-specified number of iterations 36 | % (needs user-supplied Hessian matrix) 37 | % - 'newton': Newton's method with Hessian calculation every iteration 38 | % (needs user-supplied Hessian matrix) 39 | % - 'tensor': Tensor 40 | % (needs user-supplied Hessian matrix and Tensor of 3rd partial derivatives) 41 | % 42 | % Several line search strategies are available for finding a step length satisfying 43 | % the termination criteria ('LS'): 44 | % - 0: Backtrack w/ Step Size Halving 45 | % - 1: Backtrack w/ Quadratic/Cubic Interpolation from new function values 46 | % - 2: Backtrack w/ Cubic Interpolation from new function + gradient 47 | % values (default for 'bb' and 'sd') 48 | % - 3: Bracketing w/ Step Size Doubling and Bisection 49 | % - 4: Bracketing w/ Cubic Interpolation/Extrapolation with function + 50 | % gradient values (default for all except 'bb' and 'sd') 51 | % - 5: Bracketing w/ Mixed Quadratic/Cubic Interpolation/Extrapolation 52 | % - 6: Use Matlab Optimization Toolbox's line search 53 | % (requires Matlab's linesearch.m to be added to the path) 54 | % 55 | % Above, the first three find a point satisfying the Armijo conditions, 56 | % while the last four search for find a point satisfying the Wolfe 57 | % conditions. If the objective function overflows, it is recommended 58 | % to use one of the first 3. 59 | % The first three can be used to perform a non-monotone 60 | % linesearch by changing the option 'Fref'. 61 | % 62 | % Several strategies for choosing the initial step size are avaiable ('LS_init'): 63 | % - 0: Always try an initial step length of 1 (default for all except 'cg' and 'sd') 64 | % (t = 1) 65 | % - 1: Use a step similar to the previous step (default for 'cg' and 'sd') 66 | % (t = t_old*min(2,g'd/g_old'd_old)) 67 | % - 2: Quadratic Initialization using previous function value and new 68 | % function value/gradient (use this if steps tend to be very long) 69 | % (t = min(1,2*(f-f_old)/g)) 70 | % - 3: The minimum between 1 and twice the previous step length 71 | % (t = min(1,2*t) 72 | % - 4: The scaled conjugate gradient step length (may accelerate 73 | % conjugate gradient methods, but requires a Hessian-vector product) 74 | % (t = g'd/d'Hd) 75 | % 76 | % Inputs: 77 | % funObj is a function handle 78 | % x0 is a starting vector; 79 | % options is a struct containing parameters 80 | % (defaults are used for non-existent or blank fields) 81 | % all other arguments are passed to funObj 82 | % 83 | % Outputs: 84 | % x is the minimum value found 85 | % f is the function value at the minimum found 86 | % exitflag returns an exit condition 87 | % output returns a structure with other information 88 | % 89 | % Supported Input Options 90 | % Display - Level of display [ off | final | (iter) | full | excessive ] 91 | % MaxFunEvals - Maximum number of function evaluations allowed (1000) 92 | % MaxIter - Maximum number of iterations allowed (500) 93 | % TolFun - Termination tolerance on the first-order optimality (1e-5) 94 | % TolX - Termination tolerance on progress in terms of function/parameter changes (1e-9) 95 | % Method - [ sd | csd | bb | cg | scg | pcg | {lbfgs} | newton0 | pnewton0 | 96 | % qnewton | mnewton | newton | tensor ] 97 | % c1 - Sufficient Decrease for Armijo condition (1e-4) 98 | % c2 - Curvature Decrease for Wolfe conditions (.2 for cg methods, .9 otherwise) 99 | % LS_init - Line Search Initialization -see above (2 for cg/sd, 4 for scg, 0 otherwise) 100 | % LS - Line Search type -see above (2 for bb, 4 otherwise) 101 | % Fref - Setting this to a positive integer greater than 1 102 | % will use non-monotone Armijo objective in the line search. 103 | % (20 for bb, 10 for csd, 1 for all others) 104 | % numDiff - compute derivative numerically 105 | % (default: 0) (this option has a different effect for 'newton', see below) 106 | % useComplex - if 1, use complex differentials when computing numerical derivatives 107 | % to get very accurate values (default: 0) 108 | % DerivativeCheck - if 'on', computes derivatives numerically at initial 109 | % point and compares to user-supplied derivative (default: 'off') 110 | % outputFcn - function to run after each iteration (default: []). It 111 | % should have the following interface: 112 | % outputFcn(x,infoStruct,state,varargin{:}) 113 | % useMex - where applicable, use mex files to speed things up (default: 1) 114 | % 115 | % Method-specific input options: 116 | % newton: 117 | % HessianModify - type of Hessian modification for direct solvers to 118 | % use if the Hessian is not positive definite (default: 0) 119 | % 0: Minimum Euclidean norm s.t. eigenvalues sufficiently large 120 | % (requires eigenvalues on iterations where matrix is not pd) 121 | % 1: Start with (1/2)*||A||_F and increment until Cholesky succeeds 122 | % (an approximation to method 0, does not require eigenvalues) 123 | % 2: Modified LDL factorization 124 | % (only 1 generalized Cholesky factorization done and no eigenvalues required) 125 | % 3: Modified Spectral Decomposition 126 | % (requires eigenvalues) 127 | % 4: Modified Symmetric Indefinite Factorization 128 | % 5: Uses the eigenvector of the smallest eigenvalue as negative 129 | % curvature direction 130 | % cgSolve - use conjugate gradient instead of direct solver (default: 0) 131 | % 0: Direct Solver 132 | % 1: Conjugate Gradient 133 | % 2: Conjugate Gradient with Diagonal Preconditioner 134 | % 3: Conjugate Gradient with LBFGS Preconditioner 135 | % x: Conjugate Graident with Symmetric Successive Over Relaxation 136 | % Preconditioner with parameter x 137 | % (where x is a real number in the range [0,2]) 138 | % x: Conjugate Gradient with Incomplete Cholesky Preconditioner 139 | % with drop tolerance -x 140 | % (where x is a real negative number) 141 | % numDiff - compute Hessian numerically 142 | % (default: 0, done with complex differentials if useComplex = 1) 143 | % LS_saveHessiancomp - when on, only computes the Hessian at the 144 | % first and last iteration of the line search (default: 1) 145 | % mnewton: 146 | % HessianIter - number of iterations to use same Hessian (default: 5) 147 | % qnewton: 148 | % initialHessType - scale initial Hessian approximation (default: 1) 149 | % qnUpdate - type of quasi-Newton update (default: 3): 150 | % 0: BFGS 151 | % 1: SR1 (when it is positive-definite, otherwise BFGS) 152 | % 2: Hoshino 153 | % 3: Self-Scaling BFGS 154 | % 4: Oren's Self-Scaling Variable Metric method 155 | % 5: McCormick-Huang asymmetric update 156 | % Damped - use damped BFGS update (default: 1) 157 | % newton0/pnewton0: 158 | % HvFunc - user-supplied function that returns Hessian-vector products 159 | % (by default, these are computed numerically using autoHv) 160 | % HvFunc should have the following interface: HvFunc(v,x,varargin{:}) 161 | % useComplex - use a complex perturbation to get high accuracy 162 | % Hessian-vector products (default: 0) 163 | % (the increased accuracy can make the method much more efficient, 164 | % but gradient code must properly support complex inputs) 165 | % useNegCurv - a negative curvature direction is used as the descent 166 | % direction if one is encountered during the cg iterations 167 | % (default: 1) 168 | % precFunc (for pnewton0 only) - user-supplied preconditioner 169 | % (by default, an L-BFGS preconditioner is used) 170 | % precFunc should have the following interfact: 171 | % precFunc(v,x,varargin{:}) 172 | % lbfgs: 173 | % Corr - number of corrections to store in memory (default: 100) 174 | % (higher numbers converge faster but use more memory) 175 | % Damped - use damped update (default: 0) 176 | % pcg: 177 | % cgUpdate - type of update (default: 2) 178 | % cg/scg/pcg: 179 | % cgUpdate - type of update (default for cg/scg: 2, default for pcg: 1) 180 | % 0: Fletcher Reeves 181 | % 1: Polak-Ribiere 182 | % 2: Hestenes-Stiefel (not supported for pcg) 183 | % 3: Gilbert-Nocedal 184 | % HvFunc (for scg only)- user-supplied function that returns Hessian-vector 185 | % products 186 | % (by default, these are computed numerically using autoHv) 187 | % HvFunc should have the following interface: 188 | % HvFunc(v,x,varargin{:}) 189 | % precFunc (for pcg only) - user-supplied preconditioner 190 | % (by default, an L-BFGS preconditioner is used) 191 | % precFunc should have the following interfact: 192 | % precFunc(v,x,varargin{:}) 193 | % bb: 194 | % bbType - type of bb step (default: 1) 195 | % 0: min_alpha ||delta_x - alpha delta_g||_2 196 | % 1: min_alpha ||alpha delta_x - delta_g||_2 197 | % 2: Conic BB 198 | % 3: Gradient method with retards 199 | % csd: 200 | % cycle - length of cycle (default: 3) 201 | % 202 | % Supported Output Options 203 | % iterations - number of iterations taken 204 | % funcCount - number of function evaluations 205 | % algorithm - algorithm used 206 | % firstorderopt - first-order optimality 207 | % message - exit message 208 | % trace.funccount - function evaluations after each iteration 209 | % trace.fval - function value after each iteration 210 | % 211 | % Author: Mark Schmidt (2006) 212 | % Web: http://www.cs.ubc.ca/~schmidtm 213 | % 214 | % Sources (in order of how much the source material contributes): 215 | % J. Nocedal and S.J. Wright. 1999. "Numerical Optimization". Springer Verlag. 216 | % R. Fletcher. 1987. "Practical Methods of Optimization". Wiley. 217 | % J. Demmel. 1997. "Applied Linear Algebra. SIAM. 218 | % R. Barret, M. Berry, T. Chan, J. Demmel, J. Dongarra, V. Eijkhout, R. 219 | % Pozo, C. Romine, and H. Van der Vost. 1994. "Templates for the Solution of 220 | % Linear Systems: Building Blocks for Iterative Methods". SIAM. 221 | % J. More and D. Thuente. "Line search algorithms with guaranteed 222 | % sufficient decrease". ACM Trans. Math. Softw. vol 20, 286-307, 1994. 223 | % M. Raydan. "The Barzilai and Borwein gradient method for the large 224 | % scale unconstrained minimization problem". SIAM J. Optim., 7, 26-33, 225 | % (1997). 226 | % "Mathematical Optimization". The Computational Science Education 227 | % Project. 1995. 228 | % C. Kelley. 1999. "Iterative Methods for Optimization". Frontiers in 229 | % Applied Mathematics. SIAM. 230 | 231 | if nargin < 3 232 | options = []; 233 | end 234 | 235 | % Get Parameters 236 | [verbose,verboseI,debug,doPlot,maxFunEvals,maxIter,tolFun,tolX,method,... 237 | corrections,c1,c2,LS_init,LS,cgSolve,qnUpdate,cgUpdate,initialHessType,... 238 | HessianModify,Fref,useComplex,numDiff,LS_saveHessianComp,... 239 | DerivativeCheck,Damped,HvFunc,bbType,cycle,... 240 | HessianIter,outputFcn,useMex,useNegCurv,precFunc] = ... 241 | minFunc_processInputOptions(options); 242 | 243 | if isfield(options, 'logfile') 244 | logfile = options.logfile; 245 | else 246 | logfile = []; 247 | end 248 | 249 | % Constants 250 | SD = 0; 251 | CSD = 1; 252 | BB = 2; 253 | CG = 3; 254 | PCG = 4; 255 | LBFGS = 5; 256 | QNEWTON = 6; 257 | NEWTON0 = 7; 258 | NEWTON = 8; 259 | TENSOR = 9; 260 | 261 | % Initialize 262 | p = length(x0); 263 | d = zeros(p,1); 264 | x = x0; 265 | t = 1; 266 | 267 | % If necessary, form numerical differentiation functions 268 | funEvalMultiplier = 1; 269 | if numDiff && method ~= TENSOR 270 | varargin(3:end+2) = varargin(1:end); 271 | varargin{1} = useComplex; 272 | varargin{2} = funObj; 273 | if method ~= NEWTON 274 | if debug 275 | if useComplex 276 | fprintf('Using complex differentials for gradient computation\n'); 277 | else 278 | fprintf('Using finite differences for gradient computation\n'); 279 | end 280 | end 281 | funObj = @autoGrad; 282 | else 283 | if debug 284 | if useComplex 285 | fprintf('Using complex differentials for gradient computation\n'); 286 | else 287 | fprintf('Using finite differences for gradient computation\n'); 288 | end 289 | end 290 | funObj = @autoHess; 291 | end 292 | 293 | if method == NEWTON0 && useComplex == 1 294 | if debug 295 | fprintf('Turning off the use of complex differentials\n'); 296 | end 297 | useComplex = 0; 298 | end 299 | 300 | if useComplex 301 | funEvalMultiplier = p; 302 | else 303 | funEvalMultiplier = p+1; 304 | end 305 | end 306 | 307 | % Evaluate Initial Point 308 | if method < NEWTON 309 | [f,g] = feval(funObj, x, varargin{:}); 310 | else 311 | [f,g,H] = feval(funObj, x, varargin{:}); 312 | computeHessian = 1; 313 | end 314 | funEvals = 1; 315 | 316 | if strcmp(DerivativeCheck,'on') 317 | if numDiff 318 | fprintf('Can not do derivative checking when numDiff is 1\n'); 319 | end 320 | % Check provided gradient/hessian function using numerical derivatives 321 | fprintf('Checking Gradient:\n'); 322 | [f2,g2] = autoGrad(x,useComplex,funObj,varargin{:}); 323 | 324 | fprintf('Max difference between user and numerical gradient: %f\n',max(abs(g-g2))); 325 | if max(abs(g-g2)) > 1e-4 326 | fprintf('User NumDif:\n'); 327 | [g g2] 328 | diff = abs(g-g2) 329 | pause; 330 | end 331 | 332 | if method >= NEWTON 333 | fprintf('Check Hessian:\n'); 334 | [f2,g2,H2] = autoHess(x,useComplex,funObj,varargin{:}); 335 | 336 | fprintf('Max difference between user and numerical hessian: %f\n',max(abs(H(:)-H2(:)))); 337 | if max(abs(H(:)-H2(:))) > 1e-4 338 | H 339 | H2 340 | diff = abs(H-H2) 341 | pause; 342 | end 343 | end 344 | end 345 | 346 | % Output Log 347 | if verboseI 348 | fprintf('%10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond'); 349 | end 350 | 351 | if logfile 352 | fid = fopen(logfile, 'a'); 353 | if (fid > 0) 354 | fprintf(fid, '-- %10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond'); 355 | fclose(fid); 356 | end 357 | end 358 | 359 | % Output Function 360 | if ~isempty(outputFcn) 361 | callOutput(outputFcn,x,'init',0,funEvals,f,[],[],g,[],sum(abs(g)),varargin{:}); 362 | end 363 | 364 | % Initialize Trace 365 | trace.fval = f; 366 | trace.funcCount = funEvals; 367 | 368 | % Check optimality of initial point 369 | if sum(abs(g)) <= tolFun 370 | exitflag=1; 371 | msg = 'Optimality Condition below TolFun'; 372 | if verbose 373 | fprintf('%s\n',msg); 374 | end 375 | if nargout > 3 376 | output = struct('iterations',0,'funcCount',1,... 377 | 'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace); 378 | end 379 | return; 380 | end 381 | 382 | % Perform up to a maximum of 'maxIter' descent steps: 383 | for i = 1:maxIter 384 | 385 | % ****************** COMPUTE DESCENT DIRECTION ***************** 386 | 387 | switch method 388 | case SD % Steepest Descent 389 | d = -g; 390 | 391 | case CSD % Cyclic Steepest Descent 392 | 393 | if mod(i,cycle) == 1 % Use Steepest Descent 394 | alpha = 1; 395 | LS_init = 2; 396 | LS = 4; % Precise Line Search 397 | elseif mod(i,cycle) == mod(1+1,cycle) % Use Previous Step 398 | alpha = t; 399 | LS_init = 0; 400 | LS = 2; % Non-monotonic line search 401 | end 402 | d = -alpha*g; 403 | 404 | case BB % Steepest Descent with Barzilai and Borwein Step Length 405 | 406 | if i == 1 407 | d = -g; 408 | else 409 | y = g-g_old; 410 | s = t*d; 411 | if bbType == 0 412 | yy = y'*y; 413 | alpha = (s'*y)/(yy); 414 | if alpha <= 1e-10 || alpha > 1e10 415 | alpha = 1; 416 | end 417 | elseif bbType == 1 418 | sy = s'*y; 419 | alpha = (s'*s)/sy; 420 | if alpha <= 1e-10 || alpha > 1e10 421 | alpha = 1; 422 | end 423 | elseif bbType == 2 % Conic Interpolation ('Modified BB') 424 | sy = s'*y; 425 | ss = s'*s; 426 | alpha = ss/sy; 427 | if alpha <= 1e-10 || alpha > 1e10 428 | alpha = 1; 429 | end 430 | alphaConic = ss/(6*(myF_old - f) + 4*g'*s + 2*g_old'*s); 431 | if alphaConic > .001*alpha && alphaConic < 1000*alpha 432 | alpha = alphaConic; 433 | end 434 | elseif bbType == 3 % Gradient Method with retards (bb type 1, random selection of previous step) 435 | sy = s'*y; 436 | alpha = (s'*s)/sy; 437 | if alpha <= 1e-10 || alpha > 1e10 438 | alpha = 1; 439 | end 440 | v(1+mod(i-2,5)) = alpha; 441 | alpha = v(ceil(rand*length(v))); 442 | end 443 | d = -alpha*g; 444 | end 445 | g_old = g; 446 | myF_old = f; 447 | 448 | 449 | case CG % Non-Linear Conjugate Gradient 450 | 451 | if i == 1 452 | d = -g; % Initially use steepest descent direction 453 | else 454 | gtgo = g'*g_old; 455 | gotgo = g_old'*g_old; 456 | 457 | if cgUpdate == 0 458 | % Fletcher-Reeves 459 | beta = (g'*g)/(gotgo); 460 | elseif cgUpdate == 1 461 | % Polak-Ribiere 462 | beta = (g'*(g-g_old)) /(gotgo); 463 | elseif cgUpdate == 2 464 | % Hestenes-Stiefel 465 | beta = (g'*(g-g_old))/((g-g_old)'*d); 466 | else 467 | % Gilbert-Nocedal 468 | beta_FR = (g'*(g-g_old)) /(gotgo); 469 | beta_PR = (g'*g-gtgo)/(gotgo); 470 | beta = max(-beta_FR,min(beta_PR,beta_FR)); 471 | end 472 | 473 | d = -g + beta*d; 474 | 475 | % Restart if not a direction of sufficient descent 476 | if g'*d > -tolX 477 | if debug 478 | fprintf('Restarting CG\n'); 479 | end 480 | beta = 0; 481 | d = -g; 482 | end 483 | 484 | % Old restart rule: 485 | %if beta < 0 || abs(gtgo)/(gotgo) >= 0.1 || g'*d >= 0 486 | 487 | end 488 | g_old = g; 489 | 490 | case PCG % Preconditioned Non-Linear Conjugate Gradient 491 | 492 | % Apply preconditioner to negative gradient 493 | if isempty(precFunc) 494 | % Use L-BFGS Preconditioner 495 | if i == 1 496 | old_dirs = zeros(length(g),0); 497 | old_stps = zeros(length(g),0); 498 | Hdiag = 1; 499 | s = -g; 500 | else 501 | [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); 502 | 503 | if useMex 504 | s = lbfgsC(-g,old_dirs,old_stps,Hdiag); 505 | else 506 | s = lbfgs(-g,old_dirs,old_stps,Hdiag); 507 | end 508 | end 509 | else % User-supplied preconditioner 510 | s = precFunc(-g,x,varargin{:}); 511 | end 512 | 513 | if i == 1 514 | d = s; 515 | else 516 | 517 | if cgUpdate == 0 518 | % Preconditioned Fletcher-Reeves 519 | beta = (g'*s)/(g_old'*s_old); 520 | elseif cgUpdate < 3 521 | % Preconditioned Polak-Ribiere 522 | beta = (g'*(s-s_old))/(g_old'*s_old); 523 | else 524 | % Preconditioned Gilbert-Nocedal 525 | beta_FR = (g'*s)/(g_old'*s_old); 526 | beta_PR = (g'*(s-s_old))/(g_old'*s_old); 527 | beta = max(-beta_FR,min(beta_PR,beta_FR)); 528 | end 529 | d = s + beta*d; 530 | 531 | if g'*d > -tolX 532 | if debug 533 | fprintf('Restarting CG\n'); 534 | end 535 | beta = 0; 536 | d = s; 537 | end 538 | 539 | end 540 | g_old = g; 541 | s_old = s; 542 | case LBFGS % L-BFGS 543 | 544 | % Update the direction and step sizes 545 | 546 | if i == 1 547 | d = -g; % Initially use steepest descent direction 548 | old_dirs = zeros(length(g),0); 549 | old_stps = zeros(length(d),0); 550 | Hdiag = 1; 551 | else 552 | if Damped 553 | [old_dirs,old_stps,Hdiag] = dampedUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); 554 | else 555 | [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); 556 | end 557 | 558 | if useMex 559 | d = lbfgsC(-g,old_dirs,old_stps,Hdiag); 560 | else 561 | d = lbfgs(-g,old_dirs,old_stps,Hdiag); 562 | end 563 | end 564 | g_old = g; 565 | 566 | case QNEWTON % Use quasi-Newton Hessian approximation 567 | 568 | if i == 1 569 | d = -g; 570 | else 571 | % Compute difference vectors 572 | y = g-g_old; 573 | s = t*d; 574 | 575 | if i == 2 576 | % Make initial Hessian approximation 577 | if initialHessType == 0 578 | % Identity 579 | if qnUpdate <= 1 580 | R = eye(length(g)); 581 | else 582 | H = eye(length(g)); 583 | end 584 | else 585 | % Scaled Identity 586 | if debug 587 | fprintf('Scaling Initial Hessian Approximation\n'); 588 | end 589 | if qnUpdate <= 1 590 | % Use Cholesky of Hessian approximation 591 | R = sqrt((y'*y)/(y'*s))*eye(length(g)); 592 | else 593 | % Use Inverse of Hessian approximation 594 | H = eye(length(g))*(y'*s)/(y'*y); 595 | end 596 | end 597 | end 598 | 599 | if qnUpdate == 0 % Use BFGS updates 600 | Bs = R'*(R*s); 601 | if Damped 602 | eta = .02; 603 | if y'*s < eta*s'*Bs 604 | if debug 605 | fprintf('Damped Update\n'); 606 | end 607 | theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1); 608 | y = theta*y + (1-theta)*Bs; 609 | end 610 | R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); 611 | else 612 | if y'*s > 1e-10 613 | R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); 614 | else 615 | if debug 616 | fprintf('Skipping Update\n'); 617 | end 618 | end 619 | end 620 | elseif qnUpdate == 1 % Perform SR1 Update if it maintains positive-definiteness 621 | 622 | Bs = R'*(R*s); 623 | ymBs = y-Bs; 624 | if abs(s'*ymBs) >= norm(s)*norm(ymBs)*1e-8 && (s-((R\(R'\y))))'*y > 1e-10 625 | R = cholupdate(R,-ymBs/sqrt(ymBs'*s),'-'); 626 | else 627 | if debug 628 | fprintf('SR1 not positive-definite, doing BFGS Update\n'); 629 | end 630 | if Damped 631 | eta = .02; 632 | if y'*s < eta*s'*Bs 633 | if debug 634 | fprintf('Damped Update\n'); 635 | end 636 | theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1); 637 | y = theta*y + (1-theta)*Bs; 638 | end 639 | R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); 640 | else 641 | if y'*s > 1e-10 642 | R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); 643 | else 644 | if debug 645 | fprintf('Skipping Update\n'); 646 | end 647 | end 648 | end 649 | end 650 | elseif qnUpdate == 2 % Use Hoshino update 651 | v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y)); 652 | phi = 1/(1 + (y'*H*y)/(s'*y)); 653 | H = H + (s*s')/(s'*y) - (H*y*y'*H)/(y'*H*y) + phi*v*v'; 654 | 655 | elseif qnUpdate == 3 % Self-Scaling BFGS update 656 | ys = y'*s; 657 | Hy = H*y; 658 | yHy = y'*Hy; 659 | gamma = ys/yHy; 660 | v = sqrt(yHy)*(s/ys - Hy/yHy); 661 | H = gamma*(H - Hy*Hy'/yHy + v*v') + (s*s')/ys; 662 | elseif qnUpdate == 4 % Oren's Self-Scaling Variable Metric update 663 | 664 | % Oren's method 665 | if (s'*y)/(y'*H*y) > 1 666 | phi = 1; % BFGS 667 | omega = 0; 668 | elseif (s'*(H\s))/(s'*y) < 1 669 | phi = 0; % DFP 670 | omega = 1; 671 | else 672 | phi = (s'*y)*(y'*H*y-s'*y)/((s'*(H\s))*(y'*H*y)-(s'*y)^2); 673 | omega = phi; 674 | end 675 | 676 | gamma = (1-omega)*(s'*y)/(y'*H*y) + omega*(s'*(H\s))/(s'*y); 677 | v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y)); 678 | H = gamma*(H - (H*y*y'*H)/(y'*H*y) + phi*v*v') + (s*s')/(s'*y); 679 | 680 | elseif qnUpdate == 5 % McCormick-Huang asymmetric update 681 | theta = 1; 682 | phi = 0; 683 | psi = 1; 684 | omega = 0; 685 | t1 = s*(theta*s + phi*H'*y)'; 686 | t2 = (theta*s + phi*H'*y)'*y; 687 | t3 = H*y*(psi*s + omega*H'*y)'; 688 | t4 = (psi*s + omega*H'*y)'*y; 689 | H = H + t1/t2 - t3/t4; 690 | end 691 | 692 | if qnUpdate <= 1 693 | d = -R\(R'\g); 694 | else 695 | d = -H*g; 696 | end 697 | 698 | end 699 | g_old = g; 700 | 701 | case NEWTON0 % Hessian-Free Newton 702 | 703 | cgMaxIter = min(p,maxFunEvals-funEvals); 704 | cgForce = min(0.5,sqrt(norm(g)))*norm(g); 705 | 706 | % Set-up preconditioner 707 | precondFunc = []; 708 | precondArgs = []; 709 | if cgSolve == 1 710 | if isempty(precFunc) % Apply L-BFGS preconditioner 711 | if i == 1 712 | old_dirs = zeros(length(g),0); 713 | old_stps = zeros(length(g),0); 714 | Hdiag = 1; 715 | else 716 | [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); 717 | if useMex 718 | precondFunc = @lbfgsC; 719 | else 720 | precondFunc = @lbfgs; 721 | end 722 | precondArgs = {old_dirs,old_stps,Hdiag}; 723 | end 724 | g_old = g; 725 | else 726 | % Apply user-defined preconditioner 727 | precondFunc = precFunc; 728 | precondArgs = {x,varargin{:}}; 729 | end 730 | end 731 | 732 | % Solve Newton system using cg and hessian-vector products 733 | if isempty(HvFunc) 734 | % No user-supplied Hessian-vector function, 735 | % use automatic differentiation 736 | HvFun = @autoHv; 737 | HvArgs = {x,g,useComplex,funObj,varargin{:}}; 738 | else 739 | % Use user-supplid Hessian-vector function 740 | HvFun = HvFunc; 741 | HvArgs = {x,varargin{:}}; 742 | end 743 | 744 | if useNegCurv 745 | [d,cgIter,cgRes,negCurv] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs); 746 | else 747 | [d,cgIter,cgRes] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs); 748 | end 749 | 750 | funEvals = funEvals+cgIter; 751 | if debug 752 | fprintf('newtonCG stopped on iteration %d w/ residual %.5e\n',cgIter,cgRes); 753 | 754 | end 755 | 756 | if useNegCurv 757 | if ~isempty(negCurv) 758 | %if debug 759 | fprintf('Using negative curvature direction\n'); 760 | %end 761 | d = negCurv/norm(negCurv); 762 | d = d/sum(abs(g)); 763 | end 764 | end 765 | 766 | case NEWTON % Newton search direction 767 | 768 | if cgSolve == 0 769 | if HessianModify == 0 770 | % Attempt to perform a Cholesky factorization of the Hessian 771 | [R,posDef] = chol(H); 772 | 773 | % If the Cholesky factorization was successful, then the Hessian is 774 | % positive definite, solve the system 775 | if posDef == 0 776 | d = -R\(R'\g); 777 | 778 | else 779 | % otherwise, adjust the Hessian to be positive definite based on the 780 | % minimum eigenvalue, and solve with QR 781 | % (expensive, we don't want to do this very much) 782 | if debug 783 | fprintf('Adjusting Hessian\n'); 784 | end 785 | H = H + eye(length(g)) * max(0,1e-12 - min(real(eig(H)))); 786 | d = -H\g; 787 | end 788 | elseif HessianModify == 1 789 | % Modified Incomplete Cholesky 790 | R = mcholinc(H,debug); 791 | d = -R\(R'\g); 792 | elseif HessianModify == 2 793 | % Modified Generalized Cholesky 794 | if useMex 795 | [L D perm] = mcholC(H); 796 | else 797 | [L D perm] = mchol(H); 798 | end 799 | d(perm) = -L' \ ((D.^-1).*(L \ g(perm))); 800 | 801 | elseif HessianModify == 3 802 | % Modified Spectral Decomposition 803 | [V,D] = eig((H+H')/2); 804 | D = diag(D); 805 | D = max(abs(D),max(max(abs(D)),1)*1e-12); 806 | d = -V*((V'*g)./D); 807 | elseif HessianModify == 4 808 | % Modified Symmetric Indefinite Factorization 809 | [L,D,perm] = ldl(H,'vector'); 810 | [blockPos junk] = find(triu(D,1)); 811 | for diagInd = setdiff(setdiff(1:p,blockPos),blockPos+1) 812 | if D(diagInd,diagInd) < 1e-12 813 | D(diagInd,diagInd) = 1e-12; 814 | end 815 | end 816 | for blockInd = blockPos' 817 | block = D(blockInd:blockInd+1,blockInd:blockInd+1); 818 | block_a = block(1); 819 | block_b = block(2); 820 | block_d = block(4); 821 | lambda = (block_a+block_d)/2 - sqrt(4*block_b^2 + (block_a - block_d)^2)/2; 822 | D(blockInd:blockInd+1,blockInd:blockInd+1) = block+eye(2)*(lambda+1e-12); 823 | end 824 | d(perm) = -L' \ (D \ (L \ g(perm))); 825 | else 826 | % Take Newton step if Hessian is pd, 827 | % otherwise take a step with negative curvature 828 | [R,posDef] = chol(H); 829 | if posDef == 0 830 | d = -R\(R'\g); 831 | else 832 | if debug 833 | fprintf('Taking Direction of Negative Curvature\n'); 834 | end 835 | [V,D] = eig(H); 836 | u = V(:,1); 837 | d = -sign(u'*g)*u; 838 | end 839 | end 840 | 841 | else 842 | % Solve with Conjugate Gradient 843 | cgMaxIter = p; 844 | cgForce = min(0.5,sqrt(norm(g)))*norm(g); 845 | 846 | % Select Preconditioner 847 | if cgSolve == 1 848 | % No preconditioner 849 | precondFunc = []; 850 | precondArgs = []; 851 | elseif cgSolve == 2 852 | % Diagonal preconditioner 853 | precDiag = diag(H); 854 | precDiag(precDiag < 1e-12) = 1e-12 - min(precDiag); 855 | precondFunc = @precondDiag; 856 | precondArgs = {precDiag.^-1}; 857 | elseif cgSolve == 3 858 | % L-BFGS preconditioner 859 | if i == 1 860 | old_dirs = zeros(length(g),0); 861 | old_stps = zeros(length(g),0); 862 | Hdiag = 1; 863 | else 864 | [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); 865 | end 866 | g_old = g; 867 | if useMex 868 | precondFunc = @lbfgsC; 869 | else 870 | precondFunc = @lbfgs; 871 | end 872 | precondArgs = {old_dirs,old_stps,Hdiag}; 873 | elseif cgSolve > 0 874 | % Symmetric Successive Overelaxation Preconditioner 875 | omega = cgSolve; 876 | D = diag(H); 877 | D(D < 1e-12) = 1e-12 - min(D); 878 | precDiag = (omega/(2-omega))*D.^-1; 879 | precTriu = diag(D/omega) + triu(H,1); 880 | precondFunc = @precondTriuDiag; 881 | precondArgs = {precTriu,precDiag.^-1}; 882 | else 883 | % Incomplete Cholesky Preconditioner 884 | opts.droptol = -cgSolve; 885 | opts.rdiag = 1; 886 | R = cholinc(sparse(H),opts); 887 | if min(diag(R)) < 1e-12 888 | R = cholinc(sparse(H + eye*(1e-12 - min(diag(R)))),opts); 889 | end 890 | precondFunc = @precondTriu; 891 | precondArgs = {R}; 892 | end 893 | 894 | % Run cg with the appropriate preconditioner 895 | if isempty(HvFunc) 896 | % No user-supplied Hessian-vector function 897 | [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs); 898 | else 899 | % Use user-supplied Hessian-vector function 900 | [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFunc,{x,varargin{:}}); 901 | end 902 | if debug 903 | fprintf('CG stopped after %d iterations w/ residual %.5e\n',cgIter,cgRes); 904 | %funEvals = funEvals + cgIter; 905 | end 906 | end 907 | 908 | case TENSOR % Tensor Method 909 | 910 | if numDiff 911 | % Compute 3rd-order Tensor Numerically 912 | [junk1 junk2 junk3 T] = autoTensor(x,useComplex,funObj,varargin{:}); 913 | else 914 | % Use user-supplied 3rd-derivative Tensor 915 | [junk1 junk2 junk3 T] = feval(funObj, x, varargin{:}); 916 | end 917 | options_sub.Method = 'newton'; 918 | options_sub.Display = 'none'; 919 | options_sub.TolX = tolX; 920 | options_sub.TolFun = tolFun; 921 | d = minFunc(@taylorModel,zeros(p,1),options_sub,f,g,H,T); 922 | 923 | if any(abs(d) > 1e5) || all(abs(d) < 1e-5) || g'*d > -tolX 924 | if debug 925 | fprintf('Using 2nd-Order Step\n'); 926 | end 927 | [V,D] = eig((H+H')/2); 928 | D = diag(D); 929 | D = max(abs(D),max(max(abs(D)),1)*1e-12); 930 | d = -V*((V'*g)./D); 931 | else 932 | if debug 933 | fprintf('Using 3rd-Order Step\n'); 934 | end 935 | end 936 | end 937 | 938 | if ~isLegal(d) 939 | fprintf('Step direction is illegal!\n'); 940 | pause; 941 | return 942 | end 943 | 944 | % ****************** COMPUTE STEP LENGTH ************************ 945 | 946 | % Directional Derivative 947 | gtd = g'*d; 948 | 949 | % Check that progress can be made along direction 950 | if gtd > -tolX 951 | exitflag=2; 952 | msg = 'Directional Derivative below TolX'; 953 | break; 954 | end 955 | 956 | % Select Initial Guess 957 | if i == 1 958 | if method < NEWTON0 959 | t = min(1,1/sum(abs(g))); 960 | else 961 | t = 1; 962 | end 963 | else 964 | if LS_init == 0 965 | % Newton step 966 | t = 1; 967 | elseif LS_init == 1 968 | % Close to previous step length 969 | t = t*min(2,(gtd_old)/(gtd)); 970 | elseif LS_init == 2 971 | % Quadratic Initialization based on {f,g} and previous f 972 | t = min(1,2*(f-f_old)/(gtd)); 973 | elseif LS_init == 3 974 | % Double previous step length 975 | t = min(1,t*2); 976 | elseif LS_init == 4 977 | % Scaled step length if possible 978 | if isempty(HvFunc) 979 | % No user-supplied Hessian-vector function, 980 | % use automatic differentiation 981 | dHd = d'*autoHv(d,x,g,0,funObj,varargin{:}); 982 | else 983 | % Use user-supplid Hessian-vector function 984 | dHd = d'*HvFunc(d,x,varargin{:}); 985 | end 986 | 987 | funEvals = funEvals + 1; 988 | if dHd > 0 989 | t = -gtd/(dHd); 990 | else 991 | t = min(1,2*(f-f_old)/(gtd)); 992 | end 993 | end 994 | 995 | if t <= 0 996 | t = 1; 997 | end 998 | end 999 | f_old = f; 1000 | gtd_old = gtd; 1001 | 1002 | % Compute reference fr if using non-monotone objective 1003 | if Fref == 1 1004 | fr = f; 1005 | else 1006 | if i == 1 1007 | old_fvals = repmat(-inf,[Fref 1]); 1008 | end 1009 | 1010 | if i <= Fref 1011 | old_fvals(i) = f; 1012 | else 1013 | old_fvals = [old_fvals(2:end);f]; 1014 | end 1015 | fr = max(old_fvals); 1016 | end 1017 | 1018 | computeHessian = 0; 1019 | if method >= NEWTON 1020 | if HessianIter == 1 1021 | computeHessian = 1; 1022 | elseif i > 1 && mod(i-1,HessianIter) == 0 1023 | computeHessian = 1; 1024 | end 1025 | end 1026 | 1027 | % Line Search 1028 | f_old = f; 1029 | if LS < 3 % Use Armijo Bactracking 1030 | % Perform Backtracking line search 1031 | if computeHessian 1032 | [t,x,f,g,LSfunEvals,H] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:}); 1033 | else 1034 | [t,x,f,g,LSfunEvals] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,1,funObj,varargin{:}); 1035 | end 1036 | funEvals = funEvals + LSfunEvals; 1037 | 1038 | elseif LS < 6 1039 | % Find Point satisfying Wolfe 1040 | 1041 | if computeHessian 1042 | [t,f,g,LSfunEvals,H] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:}); 1043 | else 1044 | [t,f,g,LSfunEvals] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,1,funObj,varargin{:}); 1045 | end 1046 | funEvals = funEvals + LSfunEvals; 1047 | x = x + t*d; 1048 | 1049 | else 1050 | % Use Matlab optim toolbox line search 1051 | [t,f_new,fPrime_new,g_new,LSexitFlag,LSiter]=... 1052 | lineSearch({'fungrad',[],funObj},x,p,1,p,d,f,gtd,t,c1,c2,-inf,maxFunEvals-funEvals,... 1053 | tolX,[],[],[],varargin{:}); 1054 | funEvals = funEvals + LSiter; 1055 | if isempty(t) 1056 | exitflag = -2; 1057 | msg = 'Matlab LineSearch failed'; 1058 | break; 1059 | end 1060 | 1061 | if method >= NEWTON 1062 | [f_new,g_new,H] = funObj(x + t*d,varargin{:}); 1063 | funEvals = funEvals + 1; 1064 | end 1065 | x = x + t*d; 1066 | f = f_new; 1067 | g = g_new; 1068 | end 1069 | 1070 | % Output iteration information 1071 | if verboseI 1072 | fprintf('%10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g))); 1073 | end 1074 | 1075 | if logfile 1076 | fid = fopen(logfile, 'a'); 1077 | if (fid > 0) 1078 | fprintf(fid, '-- %10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g))); 1079 | fclose(fid); 1080 | end 1081 | end 1082 | 1083 | 1084 | % Output Function 1085 | if ~isempty(outputFcn) 1086 | callOutput(outputFcn,x,'iter',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:}); 1087 | end 1088 | 1089 | % Update Trace 1090 | trace.fval(end+1,1) = f; 1091 | trace.funcCount(end+1,1) = funEvals; 1092 | 1093 | % Check Optimality Condition 1094 | if sum(abs(g)) <= tolFun 1095 | exitflag=1; 1096 | msg = 'Optimality Condition below TolFun'; 1097 | break; 1098 | end 1099 | 1100 | % ******************* Check for lack of progress ******************* 1101 | 1102 | if sum(abs(t*d)) <= tolX 1103 | exitflag=2; 1104 | msg = 'Step Size below TolX'; 1105 | break; 1106 | end 1107 | 1108 | 1109 | if abs(f-f_old) < tolX 1110 | exitflag=2; 1111 | msg = 'Function Value changing by less than TolX'; 1112 | break; 1113 | end 1114 | 1115 | % ******** Check for going over iteration/evaluation limit ******************* 1116 | 1117 | if funEvals*funEvalMultiplier > maxFunEvals 1118 | exitflag = 0; 1119 | msg = 'Exceeded Maximum Number of Function Evaluations'; 1120 | break; 1121 | end 1122 | 1123 | if i == maxIter 1124 | exitflag = 0; 1125 | msg='Exceeded Maximum Number of Iterations'; 1126 | break; 1127 | end 1128 | 1129 | end 1130 | 1131 | if verbose 1132 | fprintf('%s\n',msg); 1133 | end 1134 | if nargout > 3 1135 | output = struct('iterations',i,'funcCount',funEvals*funEvalMultiplier,... 1136 | 'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace); 1137 | end 1138 | 1139 | % Output Function 1140 | if ~isempty(outputFcn) 1141 | callOutput(outputFcn,x,'done',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:}); 1142 | end 1143 | 1144 | end 1145 | 1146 | -------------------------------------------------------------------------------- /minFunc/minFunc_processInputOptions.m: -------------------------------------------------------------------------------- 1 | 2 | function [verbose,verboseI,debug,doPlot,maxFunEvals,maxIter,tolFun,tolX,method,... 3 | corrections,c1,c2,LS_init,LS,cgSolve,qnUpdate,cgUpdate,initialHessType,... 4 | HessianModify,Fref,useComplex,numDiff,LS_saveHessianComp,... 5 | DerivativeCheck,Damped,HvFunc,bbType,cycle,... 6 | HessianIter,outputFcn,useMex,useNegCurv,precFunc] = ... 7 | minFunc_processInputOptions(o) 8 | 9 | % Constants 10 | SD = 0; 11 | CSD = 1; 12 | BB = 2; 13 | CG = 3; 14 | PCG = 4; 15 | LBFGS = 5; 16 | QNEWTON = 6; 17 | NEWTON0 = 7; 18 | NEWTON = 8; 19 | TENSOR = 9; 20 | 21 | verbose = 1; 22 | verboseI= 1; 23 | debug = 0; 24 | doPlot = 0; 25 | method = LBFGS; 26 | cgSolve = 0; 27 | 28 | o = toUpper(o); 29 | 30 | if isfield(o,'DISPLAY') 31 | switch(upper(o.DISPLAY)) 32 | case 0 33 | verbose = 0; 34 | verboseI = 0; 35 | case 'FINAL' 36 | verboseI = 0; 37 | case 'OFF' 38 | verbose = 0; 39 | verboseI = 0; 40 | case 'NONE' 41 | verbose = 0; 42 | verboseI = 0; 43 | case 'FULL' 44 | debug = 1; 45 | case 'EXCESSIVE' 46 | debug = 1; 47 | doPlot = 1; 48 | end 49 | end 50 | 51 | 52 | LS_init = 0; 53 | c2 = 0.9; 54 | LS = 4; 55 | Fref = 1; 56 | Damped = 0; 57 | HessianIter = 1; 58 | if isfield(o,'METHOD') 59 | m = upper(o.METHOD); 60 | switch(m) 61 | case 'TENSOR' 62 | method = TENSOR; 63 | case 'NEWTON' 64 | method = NEWTON; 65 | case 'MNEWTON' 66 | method = NEWTON; 67 | HessianIter = 5; 68 | case 'PNEWTON0' 69 | method = NEWTON0; 70 | cgSolve = 1; 71 | case 'NEWTON0' 72 | method = NEWTON0; 73 | case 'QNEWTON' 74 | method = QNEWTON; 75 | Damped = 1; 76 | case 'LBFGS' 77 | method = LBFGS; 78 | case 'BB' 79 | method = BB; 80 | LS = 2; 81 | Fref = 20; 82 | case 'PCG' 83 | method = PCG; 84 | c2 = 0.2; 85 | LS_init = 2; 86 | case 'SCG' 87 | method = CG; 88 | c2 = 0.2; 89 | LS_init = 4; 90 | case 'CG' 91 | method = CG; 92 | c2 = 0.2; 93 | LS_init = 2; 94 | case 'CSD' 95 | method = CSD; 96 | c2 = 0.2; 97 | Fref = 10; 98 | LS_init = 2; 99 | case 'SD' 100 | method = SD; 101 | LS_init = 2; 102 | end 103 | end 104 | 105 | maxFunEvals = getOpt(o,'MAXFUNEVALS',1000); 106 | maxIter = getOpt(o,'MAXITER',500); 107 | tolFun = getOpt(o,'TOLFUN',1e-5); 108 | tolX = getOpt(o,'TOLX',1e-9); 109 | corrections = getOpt(o,'CORR',100); 110 | c1 = getOpt(o,'C1',1e-4); 111 | c2 = getOpt(o,'C2',c2); 112 | LS_init = getOpt(o,'LS_INIT',LS_init); 113 | LS = getOpt(o,'LS',LS); 114 | cgSolve = getOpt(o,'CGSOLVE',cgSolve); 115 | qnUpdate = getOpt(o,'QNUPDATE',3); 116 | cgUpdate = getOpt(o,'CGUPDATE',2); 117 | initialHessType = getOpt(o,'INITIALHESSTYPE',1); 118 | HessianModify = getOpt(o,'HESSIANMODIFY',0); 119 | Fref = getOpt(o,'FREF',Fref); 120 | useComplex = getOpt(o,'USECOMPLEX',0); 121 | numDiff = getOpt(o,'NUMDIFF',0); 122 | LS_saveHessianComp = getOpt(o,'LS_SAVEHESSIANCOMP',1); 123 | DerivativeCheck = getOpt(o,'DERIVATIVECHECK',0); 124 | Damped = getOpt(o,'DAMPED',Damped); 125 | HvFunc = getOpt(o,'HVFUNC',[]); 126 | bbType = getOpt(o,'BBTYPE',0); 127 | cycle = getOpt(o,'CYCLE',3); 128 | HessianIter = getOpt(o,'HESSIANITER',HessianIter); 129 | outputFcn = getOpt(o,'OUTPUTFCN',[]); 130 | useMex = getOpt(o,'USEMEX',1); 131 | useNegCurv = getOpt(o,'USENEGCURV',1); 132 | precFunc = getOpt(o,'PRECFUNC',[]); 133 | end 134 | 135 | function [v] = getOpt(options,opt,default) 136 | if isfield(options,opt) 137 | if ~isempty(getfield(options,opt)) 138 | v = getfield(options,opt); 139 | else 140 | v = default; 141 | end 142 | else 143 | v = default; 144 | end 145 | end 146 | 147 | function [o] = toUpper(o) 148 | if ~isempty(o) 149 | fn = fieldnames(o); 150 | for i = 1:length(fn) 151 | o = setfield(o,upper(fn{i}),getfield(o,fn{i})); 152 | end 153 | end 154 | end -------------------------------------------------------------------------------- /minFunc/polyinterp.m: -------------------------------------------------------------------------------- 1 | function [minPos,fmin] = polyinterp(points,doPlot,xminBound,xmaxBound) 2 | % function [minPos] = polyinterp(points,doPlot,xminBound,xmaxBound) 3 | % 4 | % Minimum of interpolating polynomial based on function and derivative 5 | % values 6 | % 7 | % In can also be used for extrapolation if {xmin,xmax} are outside 8 | % the domain of the points. 9 | % 10 | % Input: 11 | % points(pointNum,[x f g]) 12 | % doPlot: set to 1 to plot, default: 0 13 | % xmin: min value that brackets minimum (default: min of points) 14 | % xmax: max value that brackets maximum (default: max of points) 15 | % 16 | % set f or g to sqrt(-1) if they are not known 17 | % the order of the polynomial is the number of known f and g values minus 1 18 | 19 | if nargin < 2 20 | doPlot = 0; 21 | end 22 | 23 | nPoints = size(points,1); 24 | order = sum(sum((imag(points(:,2:3))==0)))-1; 25 | 26 | % Code for most common case: 27 | % - cubic interpolation of 2 points 28 | % w/ function and derivative values for both 29 | % - no xminBound/xmaxBound 30 | 31 | if nPoints == 2 && order ==3 && nargin <= 2 && doPlot == 0 32 | % Solution in this case (where x2 is the farthest point): 33 | % d1 = g1 + g2 - 3*(f1-f2)/(x1-x2); 34 | % d2 = sqrt(d1^2 - g1*g2); 35 | % minPos = x2 - (x2 - x1)*((g2 + d2 - d1)/(g2 - g1 + 2*d2)); 36 | % t_new = min(max(minPos,x1),x2); 37 | [minVal minPos] = min(points(:,1)); 38 | notMinPos = -minPos+3; 39 | d1 = points(minPos,3) + points(notMinPos,3) - 3*(points(minPos,2)-points(notMinPos,2))/(points(minPos,1)-points(notMinPos,1)); 40 | d2 = sqrt(d1^2 - points(minPos,3)*points(notMinPos,3)); 41 | if isreal(d2) 42 | t = points(notMinPos,1) - (points(notMinPos,1) - points(minPos,1))*((points(notMinPos,3) + d2 - d1)/(points(notMinPos,3) - points(minPos,3) + 2*d2)); 43 | minPos = min(max(t,points(minPos,1)),points(notMinPos,1)); 44 | else 45 | minPos = mean(points(:,1)); 46 | end 47 | return; 48 | end 49 | 50 | xmin = min(points(:,1)); 51 | xmax = max(points(:,1)); 52 | 53 | % Compute Bounds of Interpolation Area 54 | if nargin < 3 55 | xminBound = xmin; 56 | end 57 | if nargin < 4 58 | xmaxBound = xmax; 59 | end 60 | 61 | % Constraints Based on available Function Values 62 | A = zeros(0,order+1); 63 | b = zeros(0,1); 64 | for i = 1:nPoints 65 | if imag(points(i,2))==0 66 | constraint = zeros(1,order+1); 67 | for j = order:-1:0 68 | constraint(order-j+1) = points(i,1)^j; 69 | end 70 | A = [A;constraint]; 71 | b = [b;points(i,2)]; 72 | end 73 | end 74 | 75 | % Constraints based on available Derivatives 76 | for i = 1:nPoints 77 | if isreal(points(i,3)) 78 | constraint = zeros(1,order+1); 79 | for j = 1:order 80 | constraint(j) = (order-j+1)*points(i,1)^(order-j); 81 | end 82 | A = [A;constraint]; 83 | b = [b;points(i,3)]; 84 | end 85 | end 86 | 87 | % Find interpolating polynomial 88 | params = A\b; 89 | 90 | % Compute Critical Points 91 | dParams = zeros(order,1); 92 | for i = 1:length(params)-1 93 | dParams(i) = params(i)*(order-i+1); 94 | end 95 | 96 | if any(isinf(dParams)) 97 | cp = [xminBound;xmaxBound;points(:,1)].'; 98 | else 99 | cp = [xminBound;xmaxBound;points(:,1);roots(dParams)].'; 100 | end 101 | 102 | % Test Critical Points 103 | fmin = inf; 104 | minPos = (xminBound+xmaxBound)/2; % Default to Bisection if no critical points valid 105 | for xCP = cp 106 | if imag(xCP)==0 && xCP >= xminBound && xCP <= xmaxBound 107 | fCP = polyval(params,xCP); 108 | if imag(fCP)==0 && fCP < fmin 109 | minPos = real(xCP); 110 | fmin = real(fCP); 111 | end 112 | end 113 | end 114 | % Plot Situation 115 | if doPlot 116 | figure(1); clf; hold on; 117 | 118 | % Plot Points 119 | plot(points(:,1),points(:,2),'b*'); 120 | 121 | % Plot Derivatives 122 | for i = 1:nPoints 123 | if isreal(points(i,3)) 124 | m = points(i,3); 125 | b = points(i,2) - m*points(i,1); 126 | plot([points(i,1)-.05 points(i,1)+.05],... 127 | [(points(i,1)-.05)*m+b (points(i,1)+.05)*m+b],'c.-'); 128 | end 129 | end 130 | 131 | % Plot Function 132 | x = min(xmin,xminBound)-.1:(max(xmax,xmaxBound)+.1-min(xmin,xminBound)-.1)/100:max(xmax,xmaxBound)+.1; 133 | size(x) 134 | for i = 1:length(x) 135 | f(i) = polyval(params,x(i)); 136 | end 137 | plot(x,f,'y'); 138 | axis([x(1)-.1 x(end)+.1 min(f)-.1 max(f)+.1]); 139 | 140 | % Plot Minimum 141 | plot(minPos,fmin,'g+'); 142 | if doPlot == 1 143 | pause(1); 144 | end 145 | end -------------------------------------------------------------------------------- /minFunc/precondDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondDiag(r,D) 2 | y = D.*r; -------------------------------------------------------------------------------- /minFunc/precondTriu.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U) 2 | y = U \ (U' \ r); -------------------------------------------------------------------------------- /minFunc/precondTriuDiag.m: -------------------------------------------------------------------------------- 1 | function [y] = precondUpper(r,U,D) 2 | y = U \ (D .* (U' \ r)); -------------------------------------------------------------------------------- /minFunc/rosenbrock.m: -------------------------------------------------------------------------------- 1 | function [f, df, ddf, dddf] = rosenbrock(x); 2 | 3 | % rosenbrock.m This function returns the function value, partial derivatives 4 | % and Hessian of the (general dimension) rosenbrock function, given by: 5 | % 6 | % f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 7 | % 8 | % where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1). 9 | % 10 | % Carl Edward Rasmussen, 2001-07-21. 11 | 12 | D = length(x); 13 | f = sum(100*(x(2:D)-x(1:D-1).^2).^2 + (1-x(1:D-1)).^2); 14 | 15 | if nargout > 1 16 | df = zeros(D, 1); 17 | df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1)); 18 | df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2); 19 | end 20 | 21 | if nargout > 2 22 | ddf = zeros(D,D); 23 | ddf(1:D-1,1:D-1) = diag(-400*x(2:D) + 1200*x(1:D-1).^2 + 2); 24 | ddf(2:D,2:D) = ddf(2:D,2:D) + 200*eye(D-1); 25 | ddf = ddf - diag(400*x(1:D-1),1) - diag(400*x(1:D-1),-1); 26 | end 27 | 28 | if nargout > 3 29 | dddf = zeros(D,D,D); 30 | for d = 1:D 31 | if d > 1 32 | dddf(d,d-1,d-1) = -400; 33 | end 34 | if d < D 35 | dddf(d,d+1,d) = -400; 36 | dddf(d,d,d+1) = -400; 37 | dddf(d,d,d) = 2400*x(d); 38 | end 39 | end 40 | end -------------------------------------------------------------------------------- /minFunc/taylorModel.m: -------------------------------------------------------------------------------- 1 | function [f,g,H] = taylorModel(d,f,g,H,T) 2 | 3 | p = length(d); 4 | 5 | fd3 = 0; 6 | gd2 = zeros(p,1); 7 | Hd = zeros(p); 8 | for t1 = 1:p 9 | for t2 = 1:p 10 | for t3 = 1:p 11 | fd3 = fd3 + T(t1,t2,t3)*d(t1)*d(t2)*d(t3); 12 | 13 | if nargout > 1 14 | gd2(t3) = gd2(t3) + T(t1,t2,t3)*d(t1)*d(t2); 15 | end 16 | 17 | if nargout > 2 18 | Hd(t2,t3) = Hd(t2,t3) + T(t1,t2,t3)*d(t1); 19 | end 20 | end 21 | 22 | end 23 | end 24 | 25 | f = f + g'*d + (1/2)*d'*H*d + (1/6)*fd3; 26 | 27 | if nargout > 1 28 | g = g + H*d + (1/2)*gd2; 29 | end 30 | 31 | if nargout > 2 32 | H = H + Hd; 33 | end 34 | 35 | if any(abs(d) > 1e5) 36 | % We want the optimizer to stop if the solution is unbounded 37 | g = zeros(p,1); 38 | end -------------------------------------------------------------------------------- /predict_NN.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/predict_NN.m -------------------------------------------------------------------------------- /run_SAE_once.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/run_SAE_once.m -------------------------------------------------------------------------------- /test.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/test.m -------------------------------------------------------------------------------- /train_AE.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/train_AE.m -------------------------------------------------------------------------------- /train_BPNN.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/train_BPNN.m -------------------------------------------------------------------------------- /train_SAE.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/train_SAE.m -------------------------------------------------------------------------------- /~$README.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/~$README.docx -------------------------------------------------------------------------------- /图/3-layers-NN.vsdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/图/3-layers-NN.vsdx -------------------------------------------------------------------------------- /图/AE-BP.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/图/AE-BP.jpg -------------------------------------------------------------------------------- /图/AE-BP.vsdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/图/AE-BP.vsdx -------------------------------------------------------------------------------- /图/AE-FF.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/图/AE-FF.jpg -------------------------------------------------------------------------------- /图/AE-FF.vsdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/图/AE-FF.vsdx -------------------------------------------------------------------------------- /图/AE.vsdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zheng-yuwei/Stacked_Autoencoder/14fa7b473d5c7cc56c050c5b58f50d02709ab2d0/图/AE.vsdx --------------------------------------------------------------------------------