├── Evaluate.m ├── Project Report Group 9.odt ├── README.md ├── Results_statistics.m ├── dbn_result_statistics.m ├── dbnsetup.m ├── dbntrain.m ├── dbnunfoldtonn.asv ├── dbnunfoldtonn.m ├── further_improvements.docx ├── nnapplygrads.m ├── nnbp.m ├── nnchecknumgrad.m ├── nneval.m ├── nnff.m ├── nnpredict.m ├── nnsetup.m ├── nntest.m ├── nntrain.m ├── nnupdatefigures.m ├── rbmdown.m ├── rbmtrain.m ├── rbmup.m ├── sigm.m └── sigmrnd.m /Evaluate.m: -------------------------------------------------------------------------------- 1 | function EVAL = Evaluate(ACTUAL,PREDICTED) 2 | % This fucntion evaluates the performance of a classification model by 3 | % calculating the common performance measures: Accuracy, Sensitivity, 4 | % Specificity, Precision, Recall, F-Measure, G-mean. 5 | % Input: ACTUAL = Column matrix with actual class labels of the training 6 | % examples 7 | % PREDICTED = Column matrix with predicted class labels by the 8 | % classification model 9 | % Output: EVAL = Row matrix with all the performance measures 10 | 11 | 12 | idx = (ACTUAL()==1); 13 | 14 | p = length(ACTUAL(idx)); 15 | n = length(ACTUAL(~idx)); 16 | N = p+n; 17 | 18 | tp = sum(ACTUAL(idx)==PREDICTED(idx)); 19 | tn = sum(ACTUAL(~idx)==PREDICTED(~idx)); 20 | fp = n-tn; 21 | fn = p-tp; 22 | 23 | tp_rate = tp/p; 24 | tn_rate = tn/n; 25 | 26 | accuracy = (tp+tn)/N; 27 | sensitivity = tp_rate; 28 | specificity = tn_rate; 29 | precision = tp/(tp+fp); 30 | recall = sensitivity; 31 | f_measure = 2*((precision*recall)/(precision + recall)); 32 | gmean = sqrt(tp_rate*tn_rate); 33 | 34 | EVAL = [accuracy sensitivity specificity precision recall f_measure gmean]; -------------------------------------------------------------------------------- /Project Report Group 9.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adiengineer/ADHD_classification_DBN_extraction/d5dfe644d3b12eb93fb86f6506f98f07a4895700/Project Report Group 9.odt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ADHD_classification_DBN_extraction 2 | Guide : Prof Sundaram Suresh (NTU- Singapore) Area: Deep learning neural networks for feature extraction in high dimensional neuro imaging data. Tools used: Standard neuro imaging software for preprocessing, a MATLAB deep learning toolbox DeeBNet. I used deep learning algorithms including RBM’s and CNN’s to train on an open source MRI data set and classify unseen fMRI scans as having ADHD or not. I was able to achieve accuracy scores of 64% which is incrementally better than the current start of art(as of 2016). The project was challenging due to the high dimensionality of the input data and the meager number of test samples. 3 | 4 | Since the clinical fmri data is high-dimensional naive, shallow classifiers are not able to do a good job. 5 | The basic idea behind the project was to investigate if RBM's/ DBN's can extract useful higher level features. 6 | 7 | I implemented a RBM based multilayer Deep Belief Network and extracted features from the raw dataset. I had to 8 | perform a lot of experiments to train the network properly. 9 | 10 | The results indicate that the features extracted using the above lead to significantly better classification results. 11 | I was able to achieve results as high as 75% compared to the previous baseline of 65%. 12 | 13 | The .docx files are the reports which have more details. 14 | -------------------------------------------------------------------------------- /Results_statistics.m: -------------------------------------------------------------------------------- 1 | function [Accuracy, Avg_F_measure, Norm_F] = Results_statistics (label, Predicted) 2 | 3 | error = 0; 4 | for i=1: length(label) 5 | if label(i) ~= Predicted(i) 6 | error = error+1; 7 | end; 8 | end; 9 | error_rate = error/length(label); 10 | Accuracy = 1 - error_rate; 11 | 12 | [uniques,numUnique] = count_unique(label); 13 | [uniques_pre,numUnique_Pre] = count_unique(Predicted); 14 | 15 | if length(uniques) > length(uniques_pre) 16 | disp('Predicted has less number of Classes '); 17 | end; 18 | if length(uniques) < length(uniques_pre) 19 | disp('Predicted has more number of Classes '); 20 | end; 21 | Avg_accuracy = 0; 22 | Avg_F_measure = 0; 23 | Accuracies = zeros(length(uniques),1); 24 | F_measures = zeros(length(uniques),1); 25 | for i =1: length(uniques) 26 | 27 | label_index = find(label==uniques(i)); %actual label 28 | Pre_index = find(Predicted==uniques(i)); %predicted label 29 | 30 | True_possitive = length( intersect(label_index,Pre_index) ); 31 | False_Negative = length(label_index)-True_possitive; 32 | False_Positive = length( setdiff(Pre_index, label_index) ); 33 | 34 | Precision = True_possitive/length(Pre_index); 35 | Recall = True_possitive/length(label_index); 36 | 37 | F_Measure = 2*Precision*Recall/(Precision+Recall); 38 | if isnan(F_Measure) 39 | F_Measure = 0; 40 | end; 41 | 42 | accuracy = True_possitive/length(label_index); 43 | Avg_accuracy = Avg_accuracy + accuracy; 44 | Avg_F_measure = Avg_F_measure + F_Measure; 45 | 46 | Accuracies(i) = accuracy; 47 | F_measures(i) = F_Measure; 48 | 49 | class_errors{i}.label = uniques(i); 50 | class_errors{i}.instance_no = length(label_index); 51 | class_errors{i}.True_possitive = True_possitive; 52 | class_errors{i}.False_Negative = False_Negative; 53 | class_errors{i}.False_Positive = False_Positive; 54 | class_errors{i}.Precision = Precision; 55 | class_errors{i}.Recall = Recall; 56 | class_errors{i}.accuracy = accuracy; 57 | class_errors{i}.F_Measure = F_Measure; 58 | end; 59 | 60 | Avg_accuracy = Avg_accuracy/length(uniques); 61 | Avg_F_measure = Avg_F_measure /length(uniques); 62 | 63 | a = F_measures.*numUnique; 64 | Norm_F = sum(a)/sum(numUnique); 65 | 66 | %% compute confusion matrix 67 | [label_uni,label_inst] = count_unique(label); 68 | [pre_uni,pre_inst] = count_unique(Predicted); 69 | Confus_matrix = zeros(length(label_uni),length(pre_uni)); 70 | for i=1: length(label_uni) 71 | for j=1: length(pre_uni) 72 | label_Index = find(label ==label_uni(i)); 73 | pre_Index = find(Predicted ==label_uni(j)); 74 | temp = length( intersect(label_Index,pre_Index)); 75 | Confus_matrix(i,j)= temp/label_inst(i)*100; 76 | 77 | end; 78 | end; 79 | 80 | 81 | 82 | end 83 | 84 | 85 | function [uniques,numUnique] = count_unique(x,option) 86 | %COUNT_UNIQUE Determines unique values, and counts occurrences 87 | % [uniques,numUnique] = count_unique(x) 88 | % 89 | % This function determines unique values of an array, and also counts the 90 | % number of instances of those values. 91 | % 92 | % This uses the MATLAB builtin function accumarray, and is faster than 93 | % MATLAB's unique function for intermediate to large sizes of arrays for integer values. 94 | % Unlike 'unique' it cannot be used to determine if rows are unique or 95 | % operate on cell arrays. 96 | % 97 | % If float values are passed, it uses MATLAB's logic builtin unique function to 98 | % determine unique values, and then to count instances. 99 | % 100 | % Descriptions of Input Variables: 101 | % x: Input vector or matrix, N-D. Must be a type acceptable to 102 | % accumarray, numeric, logical, char, scalar, or cell array of 103 | % strings. 104 | % option: Acceptable values currently only 'float'. If 'float' is 105 | % specified, the input x vector will be treated as containing 106 | % decimal values, regardless of whether it is a float array type. 107 | % 108 | % Descriptions of Output Variables: 109 | % uniques: sorted unique values 110 | % numUnique: number of instances of each unique value 111 | % 112 | % Example(s): 113 | % >> [uniques] = count_unique(largeArray); 114 | % >> [uniques,numUnique] = count_unique(largeArray); 115 | % 116 | % See also: unique, accumarray 117 | 118 | % Author: Anthony Kendall 119 | % Contact: anthony [dot] kendall [at] gmail [dot] com 120 | % Created: 2009-03-17 121 | 122 | testFloat = false; 123 | if nargin == 2 && strcmpi(option,'float') 124 | testFloat = true; 125 | end 126 | 127 | nOut = nargout; 128 | if testFloat 129 | if nOut < 2 130 | [uniques] = float_cell_unique(x,nOut); 131 | else 132 | [uniques,numUnique] = float_cell_unique(x,nOut); 133 | end 134 | else 135 | try %this will fail if the array is float or cell 136 | if nOut < 2 137 | [uniques] = int_log_unique(x,nOut); 138 | else 139 | [uniques,numUnique] = int_log_unique(x,nOut); 140 | end 141 | catch %default to standard approach 142 | if nOut < 2 143 | [uniques] = float_cell_unique(x,nOut); 144 | else 145 | [uniques,numUnique] = float_cell_unique(x,nOut); 146 | end 147 | end 148 | end 149 | 150 | end 151 | 152 | function [uniques,numUnique] = int_log_unique(x,nOut) 153 | %First, determine the offset for negative values 154 | minVal = min(x(:)); 155 | 156 | %Check to see if accumarray is appropriate for this function 157 | maxIndex = max(x(:)) - minVal + 1; 158 | if maxIndex / numel(x) > 1000 159 | error('Accumarray is inefficient for arrays when ind values are >> than the number of elements') 160 | end 161 | 162 | %Now, offset to get the index 163 | index = x(:) - minVal + 1; 164 | 165 | %Count the occurrences of each index value 166 | numUnique = accumarray(index,1); 167 | 168 | %Get the values which occur more than once 169 | uniqueInd = (1:length(numUnique))'; 170 | uniques = uniqueInd(numUnique>0) + minVal - 1; 171 | 172 | if nOut == 2 173 | %Trim the numUnique array 174 | numUnique = numUnique(numUnique>0); 175 | end 176 | end 177 | 178 | function [uniques,numUnique] = float_cell_unique(x,nOut) 179 | 180 | if ~iscell(x) 181 | %First, sort the input vector 182 | x = sort(x(:)); 183 | numelX = numel(x); 184 | 185 | %Check to see if the array type needs to be converted to double 186 | currClass = class(x); 187 | isdouble = strcmp(currClass,'double'); 188 | 189 | if ~isdouble 190 | x = double(x); 191 | end 192 | 193 | %Check to see if there are any NaNs or Infs, sort returns these either at 194 | %the beginning or end of an array 195 | if isnan(x(1)) || isinf(x(1)) || isnan(x(numelX)) || isinf(x(numelX)) 196 | %Check to see if the array contains nans or infs 197 | xnan = isnan(x); 198 | xinf = isinf(x); 199 | testRep = xnan | xinf; 200 | 201 | %Remove all of these from the array 202 | x = x(~testRep); 203 | end 204 | 205 | %Determine break locations of unique values 206 | uniqueLocs = [true;diff(x) ~= 0]; 207 | else 208 | isdouble = true; %just to avoid conversion on finish 209 | 210 | %Sort the rows of the cell array 211 | x = sort(x(:)); 212 | 213 | %Determine unique location values 214 | uniqueLocs = [true;~strcmp(x(1:end-1),x(2:end)) ~= 0] ; 215 | end 216 | 217 | %Determine the unique values 218 | uniques = x(uniqueLocs); 219 | 220 | if ~isdouble 221 | x = feval(currClass,x); 222 | end 223 | 224 | %Count the number of duplicate values 225 | if nOut == 2 226 | numUnique = diff([find(uniqueLocs);length(x)+1]); 227 | end 228 | end 229 | -------------------------------------------------------------------------------- /dbn_result_statistics.m: -------------------------------------------------------------------------------- 1 | function[Accuracy, Ypre] = dbn_result_statistics(dbn_sizes,train_data, test_data, train_label, test_label, opts, n_out) 2 | 3 | %%%Set up, Train DBN and use its weights for top layer NN initialization 4 | %%%the dbn.sizes is the number of hidden neurons for each RBM layers 5 | %%%and adjustable 6 | dbn.sizes=dbn_sizes 7 | dbn = dbnsetup(dbn, train_data, opts); 8 | dbn = dbntrain(dbn, train_data, opts); 9 | 10 | nn = dbnunfoldtonn(dbn, n_out); 11 | nn.activation_function = 'sigm'; 12 | 13 | nn = nntrain(nn, train_data, train_label, opts); 14 | % [er, bad] = nntest(nn, test_data, test_label); 15 | 16 | Ytst_pre = nnpredict(nn, test_data); 17 | 18 | % Double the result 19 | [~, expected] = max(test_label,[],2); 20 | [acc, af, nf] = Results_statistics (expected, Ytst_pre); 21 | 22 | Accuracy=acc 23 | Ypre=Ytst_pre 24 | 25 | end 26 | -------------------------------------------------------------------------------- /dbnsetup.m: -------------------------------------------------------------------------------- 1 | function dbn = dbnsetup(dbn, x, opts) 2 | n = size(x, 2); 3 | % n=500 4 | dbn.sizes = [n, dbn.sizes]; 5 | 6 | for u = 1 : numel(dbn.sizes) - 1 7 | dbn.rbm{u}.alpha = opts.alpha; 8 | dbn.rbm{u}.momentum = opts.momentum; 9 | 10 | dbn.rbm{u}.W = zeros(dbn.sizes(u + 1), dbn.sizes(u)); 11 | dbn.rbm{u}.vW = zeros(dbn.sizes(u + 1), dbn.sizes(u)); 12 | 13 | dbn.rbm{u}.b = zeros(dbn.sizes(u), 1); 14 | dbn.rbm{u}.vb = zeros(dbn.sizes(u), 1); 15 | 16 | dbn.rbm{u}.c = zeros(dbn.sizes(u + 1), 1); 17 | dbn.rbm{u}.vc = zeros(dbn.sizes(u + 1), 1); 18 | end 19 | 20 | end 21 | -------------------------------------------------------------------------------- /dbntrain.m: -------------------------------------------------------------------------------- 1 | function dbn = dbntrain(dbn, x, opts) 2 | n = numel(dbn.rbm); 3 | 4 | dbn.rbm{1} = rbmtrain(dbn.rbm{1}, x, opts); 5 | for i = 2 : n 6 | x = rbmup(dbn.rbm{i - 1}, x); 7 | dbn.rbm{i} = rbmtrain(dbn.rbm{i}, x, opts); 8 | end 9 | 10 | end 11 | -------------------------------------------------------------------------------- /dbnunfoldtonn.asv: -------------------------------------------------------------------------------- 1 | function nn = dbnunfoldtonn(dbn, outputsize) 2 | %DBNUNFOLDTONN Unfolds a DBN to a NN 3 | % dbnunfoldtonn(dbn, outputsize ) returns the unfolded dbn with a final 4 | % layer of size outputsize added. 5 | if(exist('outputsize','var')) 6 | size = [dbn.sizes outputsize]; 7 | else 8 | size = [dbn.sizes]; 9 | end 10 | nn = nnsetup(size); 11 | for i = 1 : numel(dbn.rbm) 12 | nn.W{i} = [dbn.rbm{i}.c dbn.rbm{i}.W]; 13 | end 14 | x = [ones(m,1) train_x]; 15 | nn.a{1} = x; 16 | for i = 2 : n-1 17 | 18 | nn.a{i}=sigm(nn.a{n - 1} * nn.W{n - 1}'); 19 | nn.a{i} = [ones(m,1) nn.a{i}]; 20 | end 21 | end 22 | 23 | -------------------------------------------------------------------------------- /dbnunfoldtonn.m: -------------------------------------------------------------------------------- 1 | function nn = dbnunfoldtonn(dbn, outputsize) 2 | %DBNUNFOLDTONN Unfolds a DBN to a NN 3 | % dbnunfoldtonn(dbn, outputsize ) returns the unfolded dbn with a final 4 | % layer of size outputsize added. 5 | if(exist('outputsize','var')) 6 | size = [dbn.sizes outputsize]; 7 | else 8 | size = [dbn.sizes]; 9 | end 10 | nn = nnsetup(size); 11 | for i = 1 : numel(dbn.rbm) 12 | nn.W{i} = [dbn.rbm{i}.c dbn.rbm{i}.W]; 13 | end 14 | end 15 | 16 | -------------------------------------------------------------------------------- /further_improvements.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adiengineer/ADHD_classification_DBN_extraction/d5dfe644d3b12eb93fb86f6506f98f07a4895700/further_improvements.docx -------------------------------------------------------------------------------- /nnapplygrads.m: -------------------------------------------------------------------------------- 1 | function nn = nnapplygrads(nn) 2 | %NNAPPLYGRADS updates weights and biases with calculated gradients 3 | % nn = nnapplygrads(nn) returns an neural network structure with updated 4 | % weights and biases 5 | 6 | for i = 1 : (nn.n - 1) 7 | if(nn.weightPenaltyL2>0) 8 | dW = nn.dW{i} + nn.weightPenaltyL2 * [zeros(size(nn.W{i},1),1) nn.W{i}(:,2:end)]; 9 | else 10 | dW = nn.dW{i}; 11 | end 12 | 13 | dW = nn.learningRate * dW; 14 | 15 | if(nn.momentum>0) 16 | nn.vW{i} = nn.momentum*nn.vW{i} + dW; 17 | dW = nn.vW{i}; 18 | end 19 | 20 | nn.W{i} = nn.W{i} - dW; 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /nnbp.m: -------------------------------------------------------------------------------- 1 | function nn = nnbp(nn) 2 | %NNBP performs backpropagation 3 | % nn = nnbp(nn) returns an neural network structure with updated weights 4 | 5 | n = nn.n; 6 | sparsityError = 0; 7 | switch nn.output 8 | case 'sigm' 9 | d{n} = - nn.e .* (nn.a{n} .* (1 - nn.a{n})); 10 | case {'softmax','linear'} 11 | d{n} = - nn.e; 12 | end 13 | for i = (n - 1) : -1 : 2 14 | % Derivative of the activation function 15 | switch nn.activation_function 16 | case 'sigm' 17 | d_act = nn.a{i} .* (1 - nn.a{i}); 18 | case 'tanh_opt' 19 | d_act = 1.7159 * 2/3 * (1 - 1/(1.7159)^2 * nn.a{i}.^2); 20 | end 21 | 22 | if(nn.nonSparsityPenalty>0) 23 | pi = repmat(nn.p{i}, size(nn.a{i}, 1), 1); 24 | sparsityError = [zeros(size(nn.a{i},1),1) nn.nonSparsityPenalty * (-nn.sparsityTarget ./ pi + (1 - nn.sparsityTarget) ./ (1 - pi))]; 25 | end 26 | 27 | % Backpropagate first derivatives 28 | if i+1==n % in this case in d{n} there is not the bias term to be removed 29 | d{i} = (d{i + 1} * nn.W{i} + sparsityError) .* d_act; % Bishop (5.56) 30 | else % in this case in d{i} the bias term has to be removed 31 | d{i} = (d{i + 1}(:,2:end) * nn.W{i} + sparsityError) .* d_act; 32 | end 33 | 34 | if(nn.dropoutFraction>0) 35 | d{i} = d{i} .* [ones(size(d{i},1),1) nn.dropOutMask{i}]; 36 | end 37 | 38 | end 39 | 40 | for i = 1 : (n - 1) 41 | if i+1==n 42 | nn.dW{i} = (d{i + 1}' * nn.a{i}) / size(d{i + 1}, 1); 43 | else 44 | nn.dW{i} = (d{i + 1}(:,2:end)' * nn.a{i}) / size(d{i + 1}, 1); 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /nnchecknumgrad.m: -------------------------------------------------------------------------------- 1 | function nnchecknumgrad(nn, x, y) 2 | epsilon = 1e-6; 3 | er = 1e-7; 4 | n = nn.n; 5 | for l = 1 : (n - 1) 6 | for i = 1 : size(nn.W{l}, 1) 7 | for j = 1 : size(nn.W{l}, 2) 8 | nn_m = nn; nn_p = nn; 9 | nn_m.W{l}(i, j) = nn.W{l}(i, j) - epsilon; 10 | nn_p.W{l}(i, j) = nn.W{l}(i, j) + epsilon; 11 | rand('state',0) 12 | nn_m = nnff(nn_m, x, y); 13 | rand('state',0) 14 | nn_p = nnff(nn_p, x, y); 15 | dW = (nn_p.L - nn_m.L) / (2 * epsilon); 16 | e = abs(dW - nn.dW{l}(i, j)); 17 | 18 | assert(e < er, 'numerical gradient checking failed'); 19 | end 20 | end 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /nneval.m: -------------------------------------------------------------------------------- 1 | function [loss] = nneval(nn, loss, train_x, train_y, val_x, val_y) 2 | %NNEVAL evaluates performance of neural network 3 | % Returns a updated loss struct 4 | assert(nargin == 4 || nargin == 6, 'Wrong number of arguments'); 5 | 6 | % training performance 7 | nn = nnff(nn, train_x, train_y); 8 | loss.train.e(end + 1) = nn.L; 9 | 10 | % validation performance 11 | if nargin == 6 12 | nn = nnff(nn, val_x, val_y); 13 | loss.val.e(end + 1) = nn.L; 14 | end 15 | 16 | %calc misclassification rate if softmax 17 | if strcmp(nn.output,'softmax') 18 | [er_train, ~] = nntest(nn, train_x, train_y); 19 | loss.train.e_frac(end+1) = er_train; 20 | 21 | if nargin == 6 22 | [er_val, ~] = nntest(nn, val_x, val_y); 23 | loss.val.e_frac(end+1) = er_val; 24 | end 25 | end 26 | 27 | end 28 | -------------------------------------------------------------------------------- /nnff.m: -------------------------------------------------------------------------------- 1 | function nn = nnff(nn, x, y) 2 | %NNFF performs a feedforward pass 3 | % nn = nnff(nn, x, y) returns an neural network structure with updated 4 | % layer activations, error and loss (nn.a, nn.e and nn.L) 5 | 6 | n = nn.n; 7 | m = size(x, 1); 8 | 9 | x = [ones(m,1) x]; 10 | nn.a{1} = x; 11 | 12 | %feedforward pass 13 | for i = 2 : n-1 14 | switch nn.activation_function 15 | case 'sigm' 16 | % Calculate the unit's outputs (including the bias term) 17 | nn.a{i} = sigm(nn.a{i - 1} * nn.W{i - 1}'); 18 | case 'tanh_opt' 19 | nn.a{i} = tanh_opt(nn.a{i - 1} * nn.W{i - 1}'); 20 | end 21 | 22 | %dropout 23 | if(nn.dropoutFraction > 0) 24 | if(nn.testing) 25 | nn.a{i} = nn.a{i}.*(1 - nn.dropoutFraction); 26 | else 27 | nn.dropOutMask{i} = (rand(size(nn.a{i}))>nn.dropoutFraction); 28 | nn.a{i} = nn.a{i}.*nn.dropOutMask{i}; 29 | end 30 | end 31 | 32 | %calculate running exponential activations for use with sparsity 33 | if(nn.nonSparsityPenalty>0) 34 | nn.p{i} = 0.99 * nn.p{i} + 0.01 * mean(nn.a{i}, 1); 35 | end 36 | 37 | %Add the bias term 38 | nn.a{i} = [ones(m,1) nn.a{i}]; 39 | end 40 | switch nn.output 41 | case 'sigm' 42 | nn.a{n} = sigm(nn.a{n - 1} * nn.W{n - 1}'); 43 | case 'linear' 44 | nn.a{n} = nn.a{n - 1} * nn.W{n - 1}'; 45 | case 'softmax' 46 | nn.a{n} = nn.a{n - 1} * nn.W{n - 1}'; 47 | nn.a{n} = exp(bsxfun(@minus, nn.a{n}, max(nn.a{n},[],2))); 48 | nn.a{n} = bsxfun(@rdivide, nn.a{n}, sum(nn.a{n}, 2)); 49 | end 50 | 51 | %error and loss 52 | nn.e = y - nn.a{n}; 53 | 54 | switch nn.output 55 | case {'sigm', 'linear'} 56 | nn.L = 1/2 * sum(sum(nn.e .^ 2)) / m; 57 | case 'softmax' 58 | nn.L = -sum(sum(y .* log(nn.a{n}))) / m; 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /nnpredict.m: -------------------------------------------------------------------------------- 1 | function labels = nnpredict(nn, x) 2 | nn.testing = 1; 3 | nn = nnff(nn, x, zeros(size(x,1), nn.size(end))); 4 | nn.testing = 0; 5 | [~, i] = max(nn.a{end},[],2); 6 | labels = i; 7 | end 8 | -------------------------------------------------------------------------------- /nnsetup.m: -------------------------------------------------------------------------------- 1 | function nn = nnsetup(architecture) 2 | %NNSETUP creates a Feedforward Backpropagate Neural Network 3 | % nn = nnsetup(architecture) returns an neural network structure with n=numel(architecture) 4 | % layers, architecture being a n x 1 vector of layer sizes e.g. [784 100 10] 5 | 6 | nn.size = architecture; 7 | nn.n = numel(nn.size); 8 | 9 | nn.activation_function = 'tanh_opt'; % Activation functions of hidden layers: 'sigm' (sigmoid) or 'tanh_opt' (optimal tanh). 10 | nn.learningRate = 0.5; %2 % learning rate Note: typically needs to be lower when using 'sigm' activation function and non-normalized inputs. 11 | nn.momentum = 0.5; % Momentum 12 | nn.scaling_learningRate = 1; % Scaling factor for the learning rate (each epoch) 13 | nn.weightPenaltyL2 = 0; % L2 regularization 14 | nn.nonSparsityPenalty = 0; % Non sparsity penalty 15 | nn.sparsityTarget = 0.05; % Sparsity target 16 | nn.inputZeroMaskedFraction = 0; % Used for Denoising AutoEncoders 17 | nn.dropoutFraction = 0; % Dropout level (http://www.cs.toronto.edu/~hinton/absps/dropout.pdf) 18 | nn.testing = 0; % Internal variable. nntest sets this to one. 19 | nn.output = 'sigm'; % output unit 'sigm' (=logistic), 'softmax' and 'linear' 20 | 21 | for i = 2 : nn.n 22 | % weights and weight momentum 23 | nn.W{i - 1} = (rand(nn.size(i), nn.size(i - 1)+1) - 0.5) * 2 * 4 * sqrt(6 / (nn.size(i) + nn.size(i - 1))); 24 | nn.vW{i - 1} = zeros(size(nn.W{i - 1})); 25 | 26 | % average activations (for use with sparsity) 27 | nn.p{i} = zeros(1, nn.size(i)); 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /nntest.m: -------------------------------------------------------------------------------- 1 | function [er, bad] = nntest(nn, x, y) 2 | labels = nnpredict(nn, x); 3 | % predictions = nnpredict(nn, x); 4 | % labels = getypre(predictions,n,ny,ylag); 5 | label_size=size(labels) 6 | [~, expected] = max(y,[],2); 7 | expected_size=size(expected) 8 | bad = find(labels ~= expected); 9 | er = numel(bad) / size(x, 1); 10 | end 11 | -------------------------------------------------------------------------------- /nntrain.m: -------------------------------------------------------------------------------- 1 | function [nn, L] = nntrain(nn, train_x, train_y, opts, val_x, val_y) 2 | %NNTRAIN trains a neural net 3 | % [nn, L] = nnff(nn, x, y, opts) trains the neural network nn with input x and 4 | % output y for opts.numepochs epochs, with minibatches of size 5 | % opts.batchsize. Returns a neural network nn with updated activations, 6 | % errors, weights and biases, (nn.a, nn.e, nn.W, nn.b) and L, the sum 7 | % squared error for each training minibatch. 8 | 9 | assert(isfloat(train_x), 'train_x must be a float'); 10 | assert(nargin == 4 || nargin == 6,'number ofinput arguments must be 4 or 6') 11 | 12 | loss.train.e = []; 13 | loss.train.e_frac = []; 14 | loss.val.e = []; 15 | loss.val.e_frac = []; 16 | opts.validation = 0; 17 | if nargin == 6 18 | opts.validation = 1; 19 | end 20 | trainerror=[]; 21 | fhandle = []; 22 | if isfield(opts,'plot') && opts.plot == 1 23 | fhandle = figure(); 24 | end 25 | 26 | 27 | m = size(train_x, 1) 28 | batchsize = opts.batchsize 29 | % batchsize = 1 30 | numepochs = opts.numepochs 31 | numbatches =floor(m / batchsize) 32 | 33 | assert(rem(numbatches, 1) == 0, 'numbatches must be a integer'); 34 | 35 | L = zeros(numepochs*numbatches,1); 36 | n = 1; 37 | for i = 1 : numepochs 38 | tic; 39 | 40 | kk = randperm(m); 41 | for l = 1 : numbatches 42 | batch_x = train_x(kk((l - 1) * batchsize + 1 : l * batchsize), :); 43 | 44 | %Add noise to input (for use in denoising autoencoder) 45 | if(nn.inputZeroMaskedFraction ~= 0) 46 | batch_x = batch_x.*(rand(size(batch_x))>nn.inputZeroMaskedFraction); 47 | end 48 | 49 | batch_y = train_y(kk((l - 1) * batchsize + 1 : l * batchsize), :); 50 | 51 | nn = nnff(nn, batch_x, batch_y); 52 | nn = nnbp(nn); 53 | nn = nnapplygrads(nn); 54 | 55 | L(n) = nn.L; 56 | 57 | n = n + 1; 58 | end 59 | 60 | t = toc; 61 | 62 | if opts.validation == 1 63 | loss = nneval(nn, loss, train_x, train_y, val_x, val_y); 64 | str_perf = sprintf('; Full-batch train mse = %f, val mse = %f', loss.train.e(end), loss.val.e(end)); 65 | else 66 | loss = nneval(nn, loss, train_x, train_y); 67 | str_perf = sprintf('; Full-batch train err = %f', loss.train.e(end)); 68 | end 69 | if ishandle(fhandle) 70 | nnupdatefigures(nn, fhandle, loss, opts, i); 71 | end 72 | 73 | disp(['epoch ' num2str(i) '/' num2str(opts.numepochs) '. Took ' num2str(t) ' seconds' '. Mini-batch mean squared error on training set is ' num2str(mean(L((n-numbatches):(n-1)))) str_perf]); 74 | nn.learningRate = nn.learningRate * nn.scaling_learningRate; 75 | 76 | 77 | 78 | end 79 | end 80 | 81 | -------------------------------------------------------------------------------- /nnupdatefigures.m: -------------------------------------------------------------------------------- 1 | function nnupdatefigures(nn,fhandle,L,opts,i) 2 | %NNUPDATEFIGURES updates figures during training 3 | if i > 1 %dont plot first point, its only a point 4 | x_ax = 1:i; 5 | % create legend 6 | if opts.validation == 1 7 | M = {'Training','Validation'}; 8 | else 9 | M = {'Training'}; 10 | end 11 | 12 | %create data for plots 13 | if strcmp(nn.output,'softmax') 14 | plot_x = x_ax'; 15 | plot_ye = L.train.e'; 16 | plot_yfrac = L.train.e_frac'; 17 | 18 | else 19 | plot_x = x_ax'; 20 | plot_ye = L.train.e'; 21 | end 22 | 23 | %add error on validation data if present 24 | if opts.validation == 1 25 | plot_x = [plot_x, x_ax']; 26 | plot_ye = [plot_ye,L.val.e']; 27 | end 28 | 29 | 30 | %add classification error on validation data if present 31 | if opts.validation == 1 && strcmp(nn.output,'softmax') 32 | plot_yfrac = [plot_yfrac, L.val.e_frac']; 33 | end 34 | 35 | % plotting 36 | figure(fhandle); 37 | if strcmp(nn.output,'softmax') %also plot classification error 38 | 39 | p1 = subplot(1,2,1); 40 | plot(plot_x,plot_ye); 41 | xlabel('Number of epochs'); ylabel('Error');title('Error'); 42 | title('Error') 43 | legend(p1, M,'Location','NorthEast'); 44 | set(p1, 'Xlim',[0,opts.numepochs + 1]) 45 | 46 | p2 = subplot(1,2,2); 47 | plot(plot_x,plot_yfrac); 48 | xlabel('Number of epochs'); ylabel('Misclassification rate'); 49 | title('Misclassification rate') 50 | legend(p2, M,'Location','NorthEast'); 51 | set(p2, 'Xlim',[0,opts.numepochs + 1]) 52 | 53 | else 54 | 55 | p = plot(plot_x,plot_ye); 56 | xlabel('Number of epochs'); ylabel('Error');title('Error'); 57 | legend(p, M,'Location','NorthEast'); 58 | set(gca, 'Xlim',[0,opts.numepochs + 1]) 59 | 60 | end 61 | drawnow; 62 | end 63 | end 64 | -------------------------------------------------------------------------------- /rbmdown.m: -------------------------------------------------------------------------------- 1 | function x = rbmdown(rbm, x) 2 | x = sigm(repmat(rbm.b', size(x, 1), 1) + x * rbm.W); 3 | end 4 | -------------------------------------------------------------------------------- /rbmtrain.m: -------------------------------------------------------------------------------- 1 | function rbm = rbmtrain(rbm, x, opts) 2 | assert(isfloat(x), 'x must be a float'); 3 | assert(all(x(:)>=0) && all(x(:)<=1), 'all data in x must be in [0:1]'); 4 | m = size(x, 1); 5 | numbatches = m / opts.batchsize; 6 | numbatches=floor(numbatches); 7 | assert(rem(numbatches, 1) == 0, 'numbatches not integer'); 8 | 9 | for i = 1 : opts.numepochs 10 | kk = randperm(m); 11 | err = 0; 12 | for l = 1 : numbatches 13 | batch = x(kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize), :); 14 | 15 | v1 = batch; 16 | h1 = sigmrnd(repmat(rbm.c', opts.batchsize, 1) + v1 * rbm.W'); 17 | v2 = sigmrnd(repmat(rbm.b', opts.batchsize, 1) + h1 * rbm.W); 18 | h2 = sigm(repmat(rbm.c', opts.batchsize, 1) + v2 * rbm.W'); 19 | 20 | c1 = h1' * v1; 21 | c2 = h2' * v2; 22 | 23 | rbm.vW = rbm.momentum * rbm.vW + rbm.alpha * (c1 - c2) / opts.batchsize; 24 | rbm.vb = rbm.momentum * rbm.vb + rbm.alpha * sum(v1 - v2)' / opts.batchsize; 25 | rbm.vc = rbm.momentum * rbm.vc + rbm.alpha * sum(h1 - h2)' / opts.batchsize; 26 | 27 | rbm.W = rbm.W + rbm.vW; 28 | rbm.b = rbm.b + rbm.vb; 29 | rbm.c = rbm.c + rbm.vc; 30 | 31 | err = err + sum(sum((v1 - v2) .^ 2)) / opts.batchsize; 32 | end 33 | 34 | disp(['epoch ' num2str(i) '/' num2str(opts.numepochs) '. Average reconstruction error is: ' num2str(err / numbatches)]); 35 | 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /rbmup.m: -------------------------------------------------------------------------------- 1 | function x = rbmup(rbm, x) 2 | x = sigm(repmat(rbm.c', size(x, 1), 1) + x * rbm.W'); 3 | end 4 | -------------------------------------------------------------------------------- /sigm.m: -------------------------------------------------------------------------------- 1 | function X = sigm(P) 2 | X = 1./(1+exp(-P)); 3 | end -------------------------------------------------------------------------------- /sigmrnd.m: -------------------------------------------------------------------------------- 1 | function X = sigmrnd(P) 2 | % X = double(1./(1+exp(-P)))+1*randn(size(P)); 3 | X = double(1./(1+exp(-P)) > rand(size(P))); 4 | end --------------------------------------------------------------------------------