├── Data ├── data_boost.mat ├── hw3_dataset.zip └── data_format.txt ├── Stump ├── initStump.m ├── predStump.m ├── buildStump.m └── buildOneDStump.m ├── Boost ├── initAdaBoost.m ├── predAdaBoost.m └── buildAdaBoost.m ├── CV └── buildCVMatrix.m └── demoAdaboost.m /Data/data_boost.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hetianch/AdaBoost/HEAD/Data/data_boost.mat -------------------------------------------------------------------------------- /Data/hw3_dataset.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hetianch/AdaBoost/HEAD/Data/hw3_dataset.zip -------------------------------------------------------------------------------- /Data/data_format.txt: -------------------------------------------------------------------------------- 1 | X: each row of X is a 6-d feature vector 2 | Y: each row of Y is the class label (+1/-1) -------------------------------------------------------------------------------- /Stump/initStump.m: -------------------------------------------------------------------------------- 1 | function stump = initStump(dim) 2 | stump.dim = dim; 3 | stump.error = 1e6; 4 | stump.threshold = []; 5 | stump.less = 1; 6 | stump.more = -1; 7 | end 8 | -------------------------------------------------------------------------------- /Boost/initAdaBoost.m: -------------------------------------------------------------------------------- 1 | function abClassifier = initAdaBoost(N) 2 | abClassifier.nWC = 0; 3 | abClassifier.WeakClas = cell(N,1); 4 | abClassifier.Weight = zeros(N,1); 5 | abClassifier.trnErr = zeros(N, 1); 6 | abClassifier.tstErr = zeros(N, 1); 7 | abClassifier.hasTestData = false; 8 | end 9 | -------------------------------------------------------------------------------- /Stump/predStump.m: -------------------------------------------------------------------------------- 1 | % Make prediction based on a decision stump 2 | 3 | function label = predStump(X, stump) 4 | N = size(X, 1); 5 | x = X(:, stump.dim); 6 | idx = logical(x >= stump.threshold); % N x 1 7 | label = zeros(N, 1); 8 | label(idx) = stump.more; 9 | label(~idx) = stump.less; 10 | end 11 | -------------------------------------------------------------------------------- /Stump/buildStump.m: -------------------------------------------------------------------------------- 1 | function stump = buildStump(X, y, weight) 2 | D = size(X, 2); % Dimension 3 | 4 | if nargin <= 2 5 | weight = ones(size(X,1), 1); 6 | end 7 | 8 | cellDS = cell(D, 1); 9 | Err = zeros(D, 1); 10 | for i = 1:D 11 | cellDS{i} = buildOneDStump(X(:,i), y, i, weight); 12 | Err(i) = cellDS{i}.error; 13 | end 14 | [v, idx] = min(Err); 15 | stump = cellDS{idx}; 16 | end 17 | -------------------------------------------------------------------------------- /CV/buildCVMatrix.m: -------------------------------------------------------------------------------- 1 | function [trnM, tstM] = buildCVMatrix(N, nfold) 2 | blockSize = floor(N/nfold); 3 | trnM = zeros(N, nfold); 4 | tstM = zeros(N, nfold); 5 | 6 | for i = 1:(nfold-1) 7 | index = repmat(false, N, 1); 8 | index(((i-1)*blockSize+1):(i*blockSize)) = true; 9 | tstM(index, i) = true; 10 | trnM(~index, i) = true; 11 | end 12 | index = repmat(false, N, 1); 13 | index( ((nfold-1)*blockSize+1):N ) = true; 14 | tstM(index, nfold) = true; 15 | trnM(~index, nfold) = true; 16 | end 17 | -------------------------------------------------------------------------------- /Boost/predAdaBoost.m: -------------------------------------------------------------------------------- 1 | function [Label, Err] = predAdaBoost(abClassifier, X, Y) 2 | N = size(X, 1); 3 | 4 | if nargin < 3 5 | Y = []; 6 | end 7 | 8 | M = abClassifier.nWC; 9 | LabM = zeros(N, M); 10 | for i = 1:M 11 | LabM(:,i) = abClassifier.Weight(i)*predStump(X, abClassifier.WeakClas{i}); 12 | end 13 | 14 | % 15 | Label = zeros(N, 1); 16 | LabM = sum(LabM, 2); 17 | idx = logical(LabM > 0); 18 | Label(idx) = 1; 19 | Label(~idx) = -1; 20 | 21 | % 22 | if ~isempty(Y) 23 | Err = logical(Label ~= Y); 24 | Err = sum(Err)/N; 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /demoAdaboost.m: -------------------------------------------------------------------------------- 1 | addpath 'Stump'; 2 | addpath 'Boost'; 3 | addpath 'Data'; 4 | addpath 'CV'; 5 | 6 | % Load data 7 | load data_boost.mat; 8 | 9 | % Test decision stump 10 | % stump = buildStump(X, Y); 11 | 12 | % 13 | nfold = 10; 14 | iter = 300; 15 | tstError = zeros(nfold, iter); 16 | trnError = zeros(nfold, iter); 17 | [trnM, tstM] = buildCVMatrix(size(X, 1), nfold); 18 | for n = 1:nfold 19 | fprintf('\tFold %d\n', n); 20 | idx_trn = logical(trnM(:, n) == 1); 21 | trnX = X(idx_trn, :); 22 | tstX = X(~idx_trn, :); 23 | trnY = Y(idx_trn); 24 | tstY = Y(~idx_trn); 25 | abClassifier = buildAdaBoost(trnX, trnY, iter, tstX, tstY); 26 | trnError(n, :) = abClassifier.trnErr; 27 | tstError(n, :) = abClassifier.tstErr; 28 | end 29 | 30 | plot(1:iter, mean(trnError, 1)); 31 | hold on; 32 | plot(1:iter, mean(tstError, 1)); 33 | -------------------------------------------------------------------------------- /Stump/buildOneDStump.m: -------------------------------------------------------------------------------- 1 | function stump = buildOneDStump(x, y, d, w) 2 | [err_1, t_1] = searchThreshold(x, y, w, '>'); % > t_1 -> +1 3 | [err_2, t_2] = searchThreshold(x, y, w, '<'); % < t_2 -> +1 4 | stump = initStump(d); 5 | if err_1 <= err_2 6 | stump.threshold = t_1; 7 | stump.error = err_1; 8 | stump.less = -1; 9 | stump.more = 1; 10 | else 11 | stump.threshold = t_2; 12 | stump.error = err_2; 13 | stump.less = 1; 14 | stump.more = -1; 15 | end 16 | end 17 | 18 | function [error, thresh] = searchThreshold(x, y, w, sign) 19 | N = length(x); 20 | err_n = zeros(N, 1); 21 | y_predict = zeros(N, 1); 22 | for n=1:N 23 | switch sign 24 | case '>' 25 | idx = logical(x >= x(n)); 26 | y_predict(idx) = 1; 27 | y_predict(~idx) = -1; 28 | case '<' 29 | idx = logical(x < x(n)); 30 | y_predict(idx) = 1; 31 | y_predict(~idx) = -1; 32 | end 33 | err_label = logical(y ~= y_predict); 34 | err_n(n) = sum(err_label.*w)/sum(w); 35 | end 36 | [v, idx] = min(err_n); 37 | error = v; 38 | thresh = x(idx); 39 | end 40 | -------------------------------------------------------------------------------- /Boost/buildAdaBoost.m: -------------------------------------------------------------------------------- 1 | function abClassifier = buildAdaBoost(trnX, trnY, iter, tstX, tstY) 2 | if nargin < 4 3 | tstX = []; 4 | tstY = []; 5 | end 6 | abClassifier = initAdaBoost(iter); 7 | 8 | N = size(trnX, 1); % Number of training samples 9 | sampleWeight = repmat(1/N, N, 1); 10 | 11 | for i = 1:iter 12 | weakClassifier = buildStump(trnX, trnY, sampleWeight); 13 | abClassifier.WeakClas{i} = weakClassifier; 14 | abClassifier.nWC = i; 15 | % Compute the weight of this classifier 16 | abClassifier.Weight(i) = 0.5*log((1-weakClassifier.error)/weakClassifier.error); 17 | % Update sample weight 18 | label = predStump(trnX, weakClassifier); 19 | tmpSampleWeight = -1*abClassifier.Weight(i)*(trnY.*label); % N x 1 20 | tmpSampleWeight = sampleWeight.*exp(tmpSampleWeight); % N x 1 21 | sampleWeight = tmpSampleWeight./sum(tmpSampleWeight); % Normalized 22 | 23 | % Predict on training data 24 | [ttt, abClassifier.trnErr(i)] = predAdaBoost(abClassifier, trnX, trnY); 25 | % Predict on test data 26 | if ~isempty(tstY) 27 | abClassifier.hasTestData = true; 28 | [ttt, abClassifier.tstErr(i)] = predAdaBoost(abClassifier, tstX, tstY); 29 | end 30 | % fprintf('\tIteration %d, Training error %f\n', i, abClassifier.trnErr(i)); 31 | end 32 | end 33 | --------------------------------------------------------------------------------