├── plotConfusion.m ├── fcnn ├── vl_nnconv.mexa64 ├── vl_nnconv.mexw64 ├── vl_nnpool.mexa64 ├── vl_nnpool.mexw64 ├── vl_imreadjpeg.mexw64 ├── vl_nnnormalize.mexa64 ├── vl_nnnormalize.mexw64 ├── vl_rootnn.m ├── vl_setupnn.m ├── vl_nnrelu.m ├── vl_nnnoffset.m ├── vl_simplenn_move.m ├── vl_nnsoftmax.m ├── vl_nnnormalize.m ├── vl_nndropout.m ├── vl_nnloss.m ├── vl_nnsoftmaxloss.m~ ├── vl_nnpool.m ├── vl_nnsoftmaxloss.m ├── vl_argparse.m ├── vl_nnconv.m ├── vl_simplenn_diagnose.m ├── vl_simplenn_display.m └── vl_simplenn.m ├── .gitattributes ├── normalize.m ├── getypre.m ├── README.md ├── convertlabel.m ├── main_OP.m ├── .gitignore ├── fcnn.m ├── Results_statistics.m └── cnn_train.m /plotConfusion.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianboyang/CNNHAR/HEAD/plotConfusion.m -------------------------------------------------------------------------------- /fcnn/vl_nnconv.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianboyang/CNNHAR/HEAD/fcnn/vl_nnconv.mexa64 -------------------------------------------------------------------------------- /fcnn/vl_nnconv.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianboyang/CNNHAR/HEAD/fcnn/vl_nnconv.mexw64 -------------------------------------------------------------------------------- /fcnn/vl_nnpool.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianboyang/CNNHAR/HEAD/fcnn/vl_nnpool.mexa64 -------------------------------------------------------------------------------- /fcnn/vl_nnpool.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianboyang/CNNHAR/HEAD/fcnn/vl_nnpool.mexw64 -------------------------------------------------------------------------------- /fcnn/vl_imreadjpeg.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianboyang/CNNHAR/HEAD/fcnn/vl_imreadjpeg.mexw64 -------------------------------------------------------------------------------- /fcnn/vl_nnnormalize.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianboyang/CNNHAR/HEAD/fcnn/vl_nnnormalize.mexa64 -------------------------------------------------------------------------------- /fcnn/vl_nnnormalize.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianboyang/CNNHAR/HEAD/fcnn/vl_nnnormalize.mexw64 -------------------------------------------------------------------------------- /fcnn/vl_rootnn.m: -------------------------------------------------------------------------------- 1 | function root = vl_rootnn() 2 | % VL_ROOTNN Get the root path of the MatConvNet toolbox 3 | % VL_ROOTNN() returns the path to the MatConvNet toolbox. 4 | 5 | % Copyright (C) 2014 Andrea Vedaldi. 6 | % All rights reserved. 7 | % 8 | % This file is part of the VLFeat library and is made available under 9 | % the terms of the BSD license (see the COPYING file). 10 | 11 | root = fileparts(fileparts(mfilename('fullpath'))) ; 12 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /fcnn/vl_setupnn.m: -------------------------------------------------------------------------------- 1 | function vl_setupnn() 2 | % VL_SETUPNN Setup the MatConvNet toolbox 3 | % VL_SETUPNN() function adds the MatConvNet toolbox to MATLAB path. 4 | 5 | % Copyright (C) 2014 Andrea Vedaldi. 6 | % All rights reserved. 7 | % 8 | % This file is part of the VLFeat library and is made available under 9 | % the terms of the BSD license (see the COPYING file). 10 | 11 | root = vl_rootnn() ; 12 | addpath(fullfile(root, 'matlab')) ; 13 | % addpath(fullfile(root, 'matlab', 'mex')) ; 14 | % addpath(fullfile(root, 'matlab', 'xtest')) ; 15 | -------------------------------------------------------------------------------- /normalize.m: -------------------------------------------------------------------------------- 1 | function [xapp,xtest] = normalize(xapp,xtest) 2 | % USAGE: 3 | % [xapp,xtest] = normalizemeanstd(xapp,xtest) 4 | % normalize inputs and output mean and standard deviation to 0 and 1 5 | meanxapp=mean(xapp); 6 | stdxapp=std(xapp); 7 | [nbxapp features]=size(xapp); 8 | for i=1:features 9 | if stdxapp(i)<1e-8 10 | stdxapp(i)=1; 11 | end 12 | end 13 | nbvar=size(xapp,2); 14 | xapp= bsxfun(@rdivide,bsxfun(@minus,xapp, meanxapp),stdxapp) ; 15 | if nargin >1 16 | nbxtest=size(xtest,1); 17 | xtest= bsxfun(@rdivide,bsxfun(@minus,xtest,meanxapp),stdxapp ); 18 | end; 19 | end -------------------------------------------------------------------------------- /fcnn/vl_nnrelu.m: -------------------------------------------------------------------------------- 1 | function y = vl_nnrelu(x,dzdy) 2 | % VL_NNRELU CNN rectified linear unit 3 | % Y = VL_NNRELU(X) applies the rectified linear unit to the data 4 | % X. X can have arbitrary size. 5 | % 6 | % DZDX = VL_NNRELU(X, DZDY) computes the network derivative DZDX 7 | % with respect to the input X given the derivative DZDY with respect 8 | % to the output Y. DZDX has the same dimension as X. 9 | 10 | % Copyright (C) 2014 Andrea Vedaldi. 11 | % All rights reserved. 12 | % 13 | % This file is part of the VLFeat library and is made available under 14 | % the terms of the BSD license (see the COPYING file). 15 | 16 | if nargin <= 1 || isempty(dzdy) 17 | y = max(x, single(0)) ; 18 | else 19 | y = dzdy .* (x > single(0)) ; 20 | end 21 | -------------------------------------------------------------------------------- /getypre.m: -------------------------------------------------------------------------------- 1 | function ypre = getypre(predictions,dataset) 2 | k = strfind(dataset, '.mat'); 3 | if isempty(k) 4 | load(['data/' dataset '/xybagtst']); 5 | else 6 | load(dataset); 7 | end 8 | 9 | ypre_d = zeros(n,bagsize); 10 | nbag = length(predictions); 11 | di = 1; ii = 1; 12 | for i = 1:nbag 13 | y = predictions(i); 14 | ypre_d(ii:ii+bagsize-1,di) = kron(y,ones(bagsize,1)); 15 | ii = ii + oltst; di = di + 1; if di + 1 > bagsize, di = 1; end; 16 | end 17 | ypre = zeros(n,1); 18 | for i = 1:n 19 | temp = ypre_d(i,:); 20 | temp(temp == 0) = []; 21 | ytclass = tabulate(temp); 22 | if ~isempty(temp) 23 | [~,idx] = max(ytclass(:,2)); 24 | ytclass = ytclass(idx,1); 25 | ypre(i) = ytclass; 26 | else 27 | ypre(i) = 1; 28 | end 29 | end -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CNNHAR 2 | Deep convolutional neural network for human activity recognition 3 | ### Description: 4 | This is a software package of Deep Convolutional Neural Networks on Multichannel Time Series for Human Activity Recognition 5 | 6 | ### Reference: 7 | Jian-Bo Yang, Minh Nhut Nguyen, Phyo Phyo San, Xiao-Li Li, Priyadarsini Krishnaswamy Shonali, 8 | "Deep Convolutional Neural Networks on Multichannel Time Series for Human Activity Recognition", 9 | IJCAI 2015 10 | 11 | Author: 12 | Jianbo Yang, Scientist, Institute for Infocomm Research 13 | https://sites.google.com/site/jbysite/ 14 | 15 | Version history: 16 | version 1.0 (June 15, 2015) 17 | 18 | Data: 19 | The sample data is available at: https://drive.google.com/file/d/0BwKaI_JkwD0GT25mbTdiaU1jeG8/view 20 | 21 | 22 | 23 | # CNNHAR 24 | 25 | -------------------------------------------------------------------------------- /convertlabel.m: -------------------------------------------------------------------------------- 1 | function [y2t,y4t, y18t] = convertlabel(x) 2 | yt = x(:,end-1:end); 3 | l1 = yt(:,1); 4 | l2 = yt(:,2); 5 | 6 | glabels = [0 506616, 506617, 504616, 504617, 506620, 504620, 506605, 504605 ... 7 | 506619, 504619, 506611, 504611, 506608, 504608, 508612, 507621, 505606]; 8 | llabels = [0, 101, 102, 104, 105]; 9 | 10 | % Null vs. Gesture 11 | y2t = yt(:,1); 12 | idx = find(l2 == 0); 13 | y2t(idx) = -1*ones(length(idx),1); 14 | idx = find(l2 ~= 0); 15 | y2t(idx) = ones(length(idx),1); 16 | 17 | % Locomotion 18 | y4t = yt(:,1); 19 | for i = 1:length(llabels) 20 | idx = find(l1 == llabels(i)); 21 | y4t(idx) = i*ones(length(idx),1); 22 | end 23 | 24 | % Gesture 25 | y18t = yt(:,1); 26 | for i = 1:length(glabels) 27 | idx = find(l2 == glabels(i)); 28 | y18t(idx) = i*ones(length(idx),1); 29 | end -------------------------------------------------------------------------------- /main_OP.m: -------------------------------------------------------------------------------- 1 | clc 2 | clear 3 | s = RandStream('mcg16807','Seed',0); 4 | RandStream.setGlobalStream(s) 5 | 6 | 7 | dataset = 'S1_label18'; wins = 100; 8 | 9 | 10 | load(['data/' dataset '/xybagtst']); 11 | for m = 1 12 | switch m 13 | case 1 14 | method = 'cnn'; 15 | y1bag = fcnn(dataset); 16 | y1 = getypre(y1bag,dataset); 17 | case 2 18 | method = 'cnn_smoothing'; 19 | resultfile = ['result/' dataset '_cnn']; 20 | y1 = fsmoothing(resultfile, wins); 21 | end 22 | % Calculate the reulst 23 | [acc, af, nf] = Results_statistics (ytst, y1); 24 | fprintf(['acc = %f, af = %f, nf = %f\n'],100*acc, 100*af, 100*nf); 25 | save(['result/' dataset '_' method],'acc','af','nf','y1') 26 | C = confusionmat(ytst,y1); 27 | end 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # Windows Installer files 12 | *.cab 13 | *.msi 14 | *.msm 15 | *.msp 16 | 17 | # Windows shortcuts 18 | *.lnk 19 | 20 | # ========================= 21 | # Operating System Files 22 | # ========================= 23 | 24 | # OSX 25 | # ========================= 26 | 27 | .DS_Store 28 | .AppleDouble 29 | .LSOverride 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear in the root of a volume 35 | .DocumentRevisions-V100 36 | .fseventsd 37 | .Spotlight-V100 38 | .TemporaryItems 39 | .Trashes 40 | .VolumeIcon.icns 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | -------------------------------------------------------------------------------- /fcnn/vl_nnnoffset.m: -------------------------------------------------------------------------------- 1 | function y = vl_nnnoffset(x, param, dzdy) 2 | % VL_NNNOFFSET Adds an offset dependent on the feature norm 3 | % Y = VL_NNNOFFSET(X, PARAM) subtracts from each element of X the 4 | % weighted norm of the feature channels: 5 | % 6 | % X(i,j,k) = X(i,j,k) - PARAM(1) * L(i,j) ^ PARAM(2) 7 | % 8 | % where 9 | % 10 | % L(i,j) = sum_K X(i,j,k)^2 11 | % 12 | % DZDX = VL_NNNOFFSET(X, PARAM, DZDY) computes the derivative of 13 | % the network given the derivative DZDY with respect to the output 14 | % of this block. 15 | 16 | % Copyright (C) 2014 Andrea Vedaldi. 17 | % All rights reserved. 18 | % 19 | % This file is part of the VLFeat library and is made available under 20 | % the terms of the BSD license (see the COPYING file). 21 | 22 | L = sum(x.^2,3) ; 23 | L = max(L, single(1e-8)) ; 24 | param = single(param) ; 25 | 26 | if nargin <= 2 27 | y = bsxfun(@minus, x, param(1)*L.^param(2)) ; 28 | else 29 | y = dzdy - bsxfun(@times, (2*param(1)*param(2))* x, sum(dzdy,3) .* (L.^(param(2)-1))) ; 30 | end -------------------------------------------------------------------------------- /fcnn/vl_simplenn_move.m: -------------------------------------------------------------------------------- 1 | function net = vl_simplenn_move(net, destination) 2 | % VL_SIMPLENN_MOVE Move a simple CNN between CPU and GPU 3 | % NET = VL_SIMPLENN_MOVE(NET, 'gpu') moves the network 4 | % on the current GPU device. 5 | % 6 | % NET = VL_SIMPLENN_MOVE(NET, 'cpu') moves the network 7 | % on the CPU. 8 | 9 | % Copyright (C) 2014 Andrea Vedaldi. 10 | % All rights reserved. 11 | % 12 | % This file is part of the VLFeat library and is made available under 13 | % the terms of the BSD license (see the COPYING file). 14 | 15 | switch destination 16 | case 'gpu', moveop = @(x) gpuArray(x) ; 17 | case 'cpu', moveop = @(x) gather(x) ; 18 | otherwise, error('Unknown desitation ''%s''.', destination) ; 19 | end 20 | for l=1:numel(net.layers) 21 | switch net.layers{l}.type 22 | case 'conv' 23 | for f = {'filters', 'biases', 'filtersMomentum', 'biasesMomentum'} 24 | f = char(f) ; 25 | if isfield(net.layers{l}, f) 26 | net.layers{l}.(f) = moveop(net.layers{l}.(f)) ; 27 | end 28 | end 29 | otherwise 30 | % nothing to do ? 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /fcnn/vl_nnsoftmax.m: -------------------------------------------------------------------------------- 1 | function Y = vl_nnsoftmax(X,dzdY) 2 | % VL_NNSOFTMAX CNN softmax 3 | % Y = VL_NNSOFTMAX(X) applies the softmax operator the data X. X 4 | % has dimension H x W x D x N, packing N arrays of W x H 5 | % D-dimensional vectors. 6 | % 7 | % D can be thought of as the number of possible classes and the 8 | % function computes the softmax along the D dimension. Often W=H=1, 9 | % but this is not a requirement, as the operator is applied 10 | % convolutionally at all spatial locations. 11 | % 12 | % DZDX = VL_NNSOFTMAX(X, DZDY) computes the derivative DZDX of the 13 | % CNN otuoutwith respect to the input X given the derivative DZDY 14 | % with respect to the block output Y. DZDX has the same dimension 15 | % as X. 16 | 17 | % Copyright (C) 2014 Andrea Vedaldi. 18 | % All rights reserved. 19 | % 20 | % This file is part of the VLFeat library and is made available under 21 | % the terms of the BSD license (see the COPYING file). 22 | 23 | E = exp(bsxfun(@minus, X, max(X,[],3))) ; 24 | L = sum(E,3) ; 25 | Y = bsxfun(@rdivide, E, L) ; 26 | 27 | if nargin <= 1, return ; end 28 | 29 | % backward 30 | Y = Y .* bsxfun(@minus, dzdY, sum(dzdY .* Y, 3)) ; 31 | -------------------------------------------------------------------------------- /fcnn/vl_nnnormalize.m: -------------------------------------------------------------------------------- 1 | % VL_NNNORMALIZE Feature-wise sliding window normalization 2 | % Y = VL_NNORMALIZE(X, PARAM) performs feature-wise sliding window 3 | % normalization of the image X. The normalized output is given by: 4 | % 5 | % Y(i,j,k) = X(i,j,k) / L(i,j,k)^BETA 6 | % 7 | % where the normalising factor is 8 | % 9 | % L(i,j,k) = KAPPA + ALPHA * (sum_{q in Q(k)} X(i,j,k)^2, 10 | % 11 | % PARAM = [N KAPPA ALPHA BETA], and N is the size of the window. The 12 | % window Q(k) itself is defined as: 13 | % 14 | % Q(k) = [max(1, k-FLOOR((N-1)/2)), min(D, k+CEIL((N-1)/2))]. 15 | % 16 | % where D is the number of feature dimensions in X. Note in 17 | % particular that, by setting N >= 2D, the function can be used to 18 | % normalize the whole feature vector. 19 | % 20 | % DZDX = VL_NNORMALIZE(X, PARAM, DZDY) computes the derivative of 21 | % the network output DZDX with respect to the block input X given 22 | % the derivative DZDY with respect to the block output Y. 23 | 24 | % Copyright (C) 2014 Andrea Vedaldi. 25 | % All rights reserved. 26 | % 27 | % This file is part of the VLFeat library and is made available under 28 | % the terms of the BSD license (see the COPYING file). 29 | -------------------------------------------------------------------------------- /fcnn/vl_nndropout.m: -------------------------------------------------------------------------------- 1 | function [y,mask] = vl_nndropout(x,varargin) 2 | % VL_NNDROPOUT CNN dropout 3 | % [Y,MASK] = VL_NNDROPOUT(X) applies dropout to the data X. MASK 4 | % is the randomly sampled dropout mask. Both Y and MASK have the 5 | % same size as X. 6 | % 7 | % VL_NNDROPOUT(X, 'rate', R) sets the dropout rate to R. 8 | % 9 | % [DZDX] = VL_NNDROPOUT(X, DZDY, 'mask', MASK) computes the 10 | % derivatives DZDX of the network relative to the input X given 11 | % the derivative DZDY relative to the outut Y. 12 | 13 | % Copyright (C) 2014 Andrea Vedaldi. 14 | % All rights reserved. 15 | % 16 | % This file is part of the VLFeat library and is made available under 17 | % the terms of the BSD license (see the COPYING file). 18 | 19 | opts.rate = 0.5 ; 20 | opts.mask = [] ; 21 | 22 | backMode = numel(varargin) > 0 && ~isstr(varargin{1}) ; 23 | if backMode 24 | dzdy = varargin{1} ; 25 | opts = vl_argparse(opts, varargin(2:end)) ; 26 | else 27 | opts = vl_argparse(opts, varargin) ; 28 | end 29 | 30 | % determine mask 31 | mask = opts.mask ; 32 | scale = single(1 / (1 - opts.rate)) ; 33 | if backMode && isempty(mask) 34 | warning('vl_nndropout: when using in backward mode, the mask should be specified') ; 35 | end 36 | if isempty(mask) 37 | if isa(x,'gpuArray') 38 | mask = scale * single(gpuArray.rand(size(x)) >= opts.rate) ; 39 | else 40 | mask = scale * single(rand(size(x)) >= opts.rate) ; 41 | end 42 | end 43 | 44 | % do job 45 | if ~backMode 46 | y = mask .* x ; 47 | else 48 | y = mask .* dzdy ; 49 | end 50 | -------------------------------------------------------------------------------- /fcnn/vl_nnloss.m: -------------------------------------------------------------------------------- 1 | function Y = vl_nnloss(X,c,dzdy) 2 | % VL_NNLOSS CNN log-loss 3 | % Y = VL_NNLOSS(X, C) applies the the logistic loss to the data 4 | % X. X has dimension H x W x D x N, packing N arrays of W x H 5 | % D-dimensional vectors. 6 | % 7 | % C contains the class labels, which should be integer in the range 8 | % 1 to D. C can be an array with either N elements or with H x W x 9 | % 1 x N dimensions. In the fist case, a given class label is 10 | % applied at all spatial locations; in the second case, different 11 | % class labels can be specified for different locations. 12 | % 13 | % D can be thought of as the number of possible classes and the 14 | % function computes the softmax along the D dimension. Often W=H=1, 15 | % but this is not a requirement, as the operator is applied 16 | % convolutionally at all spatial locations. 17 | % 18 | % DZDX = VL_NNLOSS(X, C, DZDY) computes the derivative DZDX of the 19 | % CNN with respect to the input X given the derivative DZDY with 20 | % respect to the block output Y. DZDX has the same dimension as X. 21 | 22 | % Copyright (C) 2014 Andrea Vedaldi. 23 | % All rights reserved. 24 | % 25 | % This file is part of the VLFeat library and is made available under 26 | % the terms of the BSD license (see the COPYING file). 27 | 28 | % no division by zero 29 | X = X + 1e-4 ; 30 | sz = [size(X,1) size(X,2) size(X,3) size(X,4)] ; 31 | 32 | % index from 0 33 | c = c - 1 ; 34 | 35 | if numel(c) == sz(4) 36 | % one label per image 37 | c = reshape(c, [1 1 1 sz(4)]) ; 38 | c = repmat(c, [sz(1) sz(2)]) ; 39 | else 40 | % one label per spatial location 41 | sz_ = size(c) ; 42 | assert(isequal(sz_, [sz(1) sz(2) 1 sz(4)])) ; 43 | end 44 | 45 | % convert to indeces 46 | c_ = 0:numel(c)-1 ; 47 | c_ = 1 + ... 48 | mod(c_, sz(1)*sz(2)) + ... 49 | (sz(1)*sz(2)) * c(:)' + ... 50 | (sz(1)*sz(2)*sz(3)) * floor(c_/(sz(1)*sz(2))) ; 51 | 52 | n = sz(1)*sz(2) ; 53 | if nargin <= 2 54 | Y = - sum(log(X(c_))) / n ; 55 | else 56 | Y_ = - (1./X) * (dzdy/n) ; 57 | Y = Y_*0 ; 58 | Y(c_) = Y_(c_) ; 59 | end 60 | -------------------------------------------------------------------------------- /fcnn/vl_nnsoftmaxloss.m~: -------------------------------------------------------------------------------- 1 | function Y = vl_nnsoftmaxloss(X,c,dzdy) 2 | % VL_NNSOFTMAXLOSS CNN combined softmax and logistic loss 3 | % Y = VL_NNSOFTMAX(X, C) applies the softmax operator followed by 4 | % the logistic loss the data X. X has dimension H x W x D x N, 5 | % packing N arrays of W x H D-dimensional vectors. 6 | % 7 | % C contains the class labels, which should be integer in the range 8 | % 1 to D. C can be an array with either N elements or with H x W x 9 | % 1 x N dimensions. In the fist case, a given class label is 10 | % applied at all spatial locations; in the second case, different 11 | % class labels can be specified for different locations. 12 | % 13 | % D can be thought of as the number of possible classes and the 14 | % function computes the softmax along the D dimension. Often W=H=1, 15 | % but this is not a requirement, as the operator is applied 16 | % convolutionally at all spatial locations. 17 | % 18 | % DZDX = VL_NNSOFTMAXLOSS(X, C, DZDY) computes the derivative DZDX 19 | % of the CNN with respect to the input X given the derivative DZDY 20 | % with respect to the block output Y. DZDX has the same dimension 21 | % as X. 22 | 23 | % Copyright (C) 2014 Andrea Vedaldi. 24 | % All rights reserved. 25 | % 26 | % This file is part of the VLFeat library and is made available under 27 | % the terms of the BSD license (see the COPYING file). 28 | 29 | %X = X + 1e-6 ; 30 | sz = [size(X,1) size(X,2) size(X,3) size(X,4)] ; 31 | 32 | % index from 0 33 | c = c - 1 ; 34 | 35 | if numel(c) == sz(4) 36 | % one label per image 37 | c = reshape(c, [1 1 1 sz(4)]) ; 38 | c = repmat(c, [sz(1) sz(2)]) ; 39 | else 40 | % one label per spatial location 41 | sz_ = size(c) ; 42 | assert(isequal(sz_, [sz(1) sz(2) 1 sz(4)])) ; 43 | end 44 | 45 | % convert to indeces 46 | c_ = 0:numel(c)-1 ; 47 | c_ = 1 + ... 48 | mod(c_, sz(1)*sz(2)) + ... 49 | (sz(1)*sz(2)) * c(:)' + ... 50 | (sz(1)*sz(2)*sz(3)) * floor(c_/(sz(1)*sz(2))) ; 51 | 52 | % compute softmaxloss 53 | fprintf('max(c_) = %d\n' 54 | Xmax = max(X,[],3) ; 55 | ex = exp(bsxfun(@minus, X, Xmax)) ; 56 | 57 | n = sz(1)*sz(2) ; 58 | if nargin <= 2 59 | t = Xmax + log(sum(ex,3)) - reshape(X(c_), [sz(1:2) 1 sz(4)]) ; 60 | Y = sum(t(:)) / n ; 61 | else 62 | Y = bsxfun(@rdivide, ex, sum(ex,3)) ; 63 | Y(c_) = Y(c_) - 1; 64 | Y = Y * (dzdy / n) ; 65 | end 66 | -------------------------------------------------------------------------------- /fcnn/vl_nnpool.m: -------------------------------------------------------------------------------- 1 | % VL_NNPOOL CNN poolinng 2 | % Y = VL_NNPOOL(X, POOL) applies the pooling operator to all 3 | % channels of the data X using a square filter of size POOL. X is a 4 | % SINGLE array of dimension H x W x D x N where (H,W) are the 5 | % height and width of the map stack, D is the image depth (number 6 | % of feature channels) and N the number of of images in the stack. 7 | % 8 | % Y = VL_NNPOOL(X, [POOLY, POOLX]) uses a rectangular filter of 9 | % height POOLY and width POOLX. 10 | % 11 | % DZDX = VL_NNPOOL(X, POOL, DZDY) computes the derivatives of 12 | % the nework output Z w.r.t. the data X given the derivative DZDY 13 | % w.r.t the max-pooling output Y. 14 | % 15 | % VL_NNCONV(..., 'option', value, ...) takes the following options: 16 | % 17 | % Stride:: [1] 18 | % The output stride (downsampling factor). It can be either a 19 | % scalar for isotropic downsampling or a vector [STRIDEY 20 | % STRIDEX]. 21 | % 22 | % Pad:: [0] 23 | % The amount of input padding. Input images are padded with zeros 24 | % by this number of pixels on all sides before the convolution is 25 | % computed. It can also be a vector [TOP BOTTOM LEFT RIGHT] to 26 | % specify a different amount of padding in each direction. The 27 | % size of the poolin filter has to exceed the padding. 28 | % 29 | % Method:: ['max'] 30 | % Specify method of pooling. It can be either 'max' (retain max value 31 | % over the pooling region per channel) or 'avg' (compute the average 32 | % value over the poolling region per channel). 33 | % 34 | % The pooling window must be not larger than the padded image, i.e. 35 | % 36 | % 1 <= POOLY <= HEIGHT + (PADTOP + PADBOTTOM), 37 | % 1 <= POOLX <= WIDTH + (PADLEFT + PADRIGHT). 38 | % 39 | % The output a is a SINGLE array of dimension YH x YW x K x N of N 40 | % images with K challens and size: 41 | % 42 | % YH = floor((H + (PADTOP+PADBOTTOM) - POOLY)/STRIDEY) + 1, 43 | % YW = floor((W + (PADLEFT+PADRIGHT) - POOLX)/STRIDEX) + 1. 44 | % 45 | % The derivative DZDY has the same dimension of the output Y and 46 | % the derivative DZDX has the same dimension as the input X. 47 | 48 | % Copyright (C) 2014 Andrea Vedaldi, Karel Lenc, and Max Jaderberg. 49 | % All rights reserved. 50 | % 51 | % This file is part of the VLFeat library and is made available under 52 | % the terms of the BSD license (see the COPYING file). 53 | 54 | -------------------------------------------------------------------------------- /fcnn/vl_nnsoftmaxloss.m: -------------------------------------------------------------------------------- 1 | function Y = vl_nnsoftmaxloss(X,c,dzdy) 2 | % VL_NNSOFTMAXLOSS CNN combined softmax and logistic loss 3 | % Y = VL_NNSOFTMAX(X, C) applies the softmax operator followed by 4 | % the logistic loss the data X. X has dimension H x W x D x N, 5 | % packing N arrays of W x H D-dimensional vectors. 6 | % 7 | % C contains the class labels, which should be integer in the range 8 | % 1 to D. C can be an array with either N elements or with H x W x 9 | % 1 x N dimensions. In the fist case, a given class label is 10 | % applied at all spatial locations; in the second case, different 11 | % class labels can be specified for different locations. 12 | % 13 | % D can be thought of as the number of possible classes and the 14 | % function computes the softmax along the D dimension. Often W=H=1, 15 | % but this is not a requirement, as the operator is applied 16 | % convolutionally at all spatial locations. 17 | % 18 | % DZDX = VL_NNSOFTMAXLOSS(X, C, DZDY) computes the derivative DZDX 19 | % of the CNN with respect to the input X given the derivative DZDY 20 | % with respect to the block output Y. DZDX has the same dimension 21 | % as X. 22 | 23 | % Copyright (C) 2014 Andrea Vedaldi. 24 | % All rights reserved. 25 | % 26 | % This file is part of the VLFeat library and is made available under 27 | % the terms of the BSD license (see the COPYING file). 28 | 29 | %X = X + 1e-6 ; 30 | sz = [size(X,1) size(X,2) size(X,3) size(X,4)] ; 31 | 32 | % index from 0 33 | c = c - 1 ; 34 | cend = c(end); 35 | if numel(c) == sz(4) 36 | % one label per image 37 | c = reshape(c, [1 1 1 sz(4)]) ; 38 | c = repmat(c, [sz(1) sz(2)]) ; 39 | else 40 | % one label per spatial location 41 | sz_ = size(c) ; 42 | assert(isequal(sz_, [sz(1) sz(2) 1 sz(4)])) ; 43 | end 44 | 45 | % convert to indeces 46 | c_ = 0:numel(c)-1 ; 47 | c_ = 1 + ... 48 | mod(c_, sz(1)*sz(2)) + ... 49 | (sz(1)*sz(2)) * c(:)' + ... 50 | (sz(1)*sz(2)*sz(3)) * floor(c_/(sz(1)*sz(2))) ; 51 | 52 | % compute softmaxloss 53 | % fprintf('\n max(c_) = %d, and numel(X) = %d, and c(end) = %d\n',max(c_), numel(X), cend); 54 | Xmax = max(X,[],3) ; 55 | ex = exp(bsxfun(@minus, X, Xmax)) ; 56 | 57 | n = sz(1)*sz(2) ; 58 | if nargin <= 2 59 | t = Xmax + log(sum(ex,3)) - reshape(X(c_), [sz(1:2) 1 sz(4)]) ; 60 | Y = sum(t(:)) / n ; 61 | else 62 | Y = bsxfun(@rdivide, ex, sum(ex,3)) ; 63 | Y(c_) = Y(c_) - 1; 64 | Y = Y * (dzdy / n) ; 65 | end 66 | -------------------------------------------------------------------------------- /fcnn/vl_argparse.m: -------------------------------------------------------------------------------- 1 | function [conf, args] = vl_argparse(conf, args) 2 | % VL_ARGPARSE Parse list of parameter-value pairs 3 | % CONF = VL_ARGPARSE(CONF, ARGS) updates the structure CONF based on 4 | % the specified parameter-value pairs ARGS={PAR1, VAL1, ... PARN, 5 | % VALN}. The function produces an error if an unknown parameter name 6 | % is passed in. 7 | % 8 | % [CONF, ARGS] = VL_ARGPARSE(CONF, ARGS) copies any parameter in 9 | % ARGS that does not match CONF back to ARGS instead of producing an 10 | % error. 11 | % 12 | % Example:: 13 | % The function can be used to parse a list of arguments 14 | % passed to a MATLAB functions: 15 | % 16 | % function myFunction(x,y,z,varargin) 17 | % conf.parameterName = defaultValue ; 18 | % conf = vl_argparse(conf, varargin) 19 | % 20 | % If only a subset of the options should be parsed, for example 21 | % because the other options are interpreted by a subroutine, then 22 | % use the form 23 | % 24 | % [conf, varargin] = vl_argparse(conf, varargin) 25 | % 26 | % that copies back to VARARGIN any unknown parameter. 27 | % 28 | % See also: VL_OVERRIDE(), VL_HELP(). 29 | 30 | % Authors: Andrea Vedaldi 31 | 32 | % Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson. 33 | % All rights reserved. 34 | % 35 | % This file is part of the VLFeat library and is made available under 36 | % the terms of the BSD license (see the COPYING file). 37 | 38 | if ~isstruct(conf), error('CONF must be a structure') ; end 39 | 40 | remainingArgs = {} ; 41 | names = fieldnames(conf) ; 42 | 43 | ai = 1 ; 44 | while ai <= length(args) 45 | paramName = args{ai} ; 46 | if isstruct(paramName) 47 | moreArgs = cat(2, fieldnames(args{ai}), struct2cell(args{ai}))' ; 48 | [conf,r] = vl_argparse(conf, moreArgs(:)) ; 49 | remainingArgs = cat(2, remainingArgs, r) ; 50 | ai = ai +1 ; 51 | continue ; 52 | end 53 | if ~ischar(paramName) 54 | error('The name of the parameter number %d is not a string nor a structure', (ai-1)/2+1) ; 55 | end 56 | if ai + 1 > length(args) 57 | error('Parameter-value pair expected (missing value?).') ; 58 | end 59 | value = args{ai+1} ; 60 | i = find(strcmpi(paramName, names)) ; 61 | if isempty(i) 62 | if nargout < 2 63 | error('Unknown parameter ''%s''.', paramName) ; 64 | else 65 | remainingArgs(end+1:end+2) = args(ai:ai+1) ; 66 | end 67 | else 68 | paramName = names{i} ; 69 | if isstruct(conf.(paramName)) 70 | [conf.(paramName),r] = vl_argparse(conf.(paramName), {value}) ; 71 | else 72 | conf.(paramName) = value ; 73 | end 74 | end 75 | ai = ai + 2 ; 76 | end 77 | 78 | args = remainingArgs ; 79 | -------------------------------------------------------------------------------- /fcnn/vl_nnconv.m: -------------------------------------------------------------------------------- 1 | % VL_NNCONV CNN convolution 2 | % Y = VL_NNCONV(X, F, B) computes the convolution of the image stack X 3 | % with the filter bank F and biases B. If B is the empty matrix, 4 | % then no biases are added. If F is the empty matrix, then 5 | % the function does not filter the image, but still adds the 6 | % biases as well as performing downsampling and padding as explained 7 | % below. 8 | % 9 | % [DXDY, DXDF, DXDB] = VL_NNCONV(X, F, B, DZDY) computes the 10 | % derivatives of the nework output Z w.r.t. the data X and 11 | % parameters F, B given the derivative w.r.t the output Y. If B is 12 | % the empty matrix, then DXDB is also empty. 13 | % 14 | % X is a SINGLE array of dimension H x W x D x N where (H,W) are 15 | % the height and width of the map stack, D is the image depth 16 | % (number of feature channels) and N the number of of images in the 17 | % stack. 18 | % 19 | % F is a SINGLE array fo dimension FW x FH x D x K where (FH,FW) are 20 | % the filter height and width and K the number o filters in the 21 | % bank. 22 | % 23 | % VL_NNCONV() implements a special `fully-connected' mode: when the 24 | % support of the filters matches exactly the support of the input 25 | % image, the code uses an optimized path for faster computation. 26 | % 27 | % VL_NNCONV(..., 'option', value, ...) takes the following options: 28 | % 29 | % Stride:: [1] 30 | % The output stride (downsampling factor). Passing [STRIDEY 31 | % STRIDEX] allows specifying different subsampling factors for 32 | % the vertical and horizontal directions. 33 | % 34 | % Pad:: [0] 35 | % The amount of input padding. Input images are padded with zeros 36 | % by this number of pixels before the convolution is 37 | % computed. Passing [TOP BOTTOM LEFT RIGHT] allows specifying 38 | % different padding amounts for the top, bottom, left, and right 39 | % sides respectively. 40 | % 41 | % The filter size must be not larger than the padded image, i.e. 42 | % 43 | % 1 <= FH <= H + 2*(PADTOP+PADBOTTOM), 44 | % 1 <= FW <= W + 2*(PADLEFT+PADRIGHT). 45 | % 46 | % The output a is a SINGLE array of dimension YH x YW x K x N of 47 | % N images with K challens and size: 48 | % 49 | % YH = floor((H + (PADTOP+PADBOTTOM) - FH)/STRIDEY) + 1, 50 | % YW = floor((W + (PADLEFT+PADRIGHT) - FW)/STRIDEX) + 1. 51 | % 52 | % The derivative DZDY has the same dimension of the output Y, 53 | % the derivative DZDX has the same dimension as the input X, and 54 | % the derivative DZDF has the the same dimenson as F. 55 | 56 | % Copyright (C) 2014 Andrea Vedaldi and Max Jaderberg. 57 | % All rights reserved. 58 | % 59 | % This file is part of the VLFeat library and is made available under 60 | % the terms of the BSD license (see the COPYING file). 61 | -------------------------------------------------------------------------------- /fcnn/vl_simplenn_diagnose.m: -------------------------------------------------------------------------------- 1 | function vl_simplenn_diagnose(net, res) 2 | % VL_SIMPLENN_DIAGNOSE Plot diagnostic information 3 | % VL_SIMPLENN_DIAGNOSE(NET, RES) plots in the current window 4 | % the average, maximum, and miminum element for all the filters 5 | % and biases in the network NET. If RES is also provided, it will 6 | % plot the average, minimum, and maximum element for all the 7 | % intermediate responses and deriviatives stored in RES as well. 8 | % 9 | % This function can be used to rapidly glance at the evolution 10 | % of the paramters during training. 11 | 12 | n = numel(net.layers) ; 13 | fmu = NaN + zeros(1, n) ; 14 | fmi = fmu ; 15 | fmx = fmu ; 16 | bmu = fmu ; 17 | bmi = fmu ; 18 | bmx = fmu ; 19 | xmu = fmu ; 20 | xmi = fmi ; 21 | xmx = fmx ; 22 | dxmu = fmu ; 23 | dxmi = fmi ; 24 | dxmx = fmx ; 25 | dfmu = fmu ; 26 | dfmi = fmu ; 27 | dfmx = fmu ; 28 | dbmu = fmu ; 29 | dbmi = fmu ; 30 | dbmx = fmu ; 31 | 32 | for i=1:numel(net.layers) 33 | ly = net.layers{i} ; 34 | if strcmp(ly.type, 'conv') && numel(ly.filters) > 0 35 | x = gather(ly.filters) ; 36 | fmu(i) = mean(x(:)) ; 37 | fmi(i) = min(x(:)) ; 38 | fmx(i) = max(x(:)) ; 39 | end 40 | if strcmp(ly.type, 'conv') && numel(ly.biases) > 0 41 | x = gather(ly.biases) ; 42 | bmu(i) = mean(x(:)) ; 43 | bmi(i) = min(x(:)) ; 44 | bmx(i) = max(x(:)) ; 45 | end 46 | if nargin > 1 47 | if numel(res(i).x) > 1 48 | x = gather(res(i).x) ; 49 | xmu(i) = mean(x(:)) ; 50 | xmi(i) = min(x(:)) ; 51 | xmx(i) = max(x(:)) ; 52 | end 53 | if numel(res(i).dzdx) > 1 54 | x = gather(res(i).dzdx); 55 | dxmu(i) = mean(x(:)) ; 56 | dxmi(i) = min(x(:)) ; 57 | dxmx(i) = max(x(:)) ; 58 | end 59 | if strcmp(ly.type, 'conv') && numel(res(i).dzdw{1}) > 0 60 | x = gather(res(i).dzdw{1}) ; 61 | dfmu(i) = mean(x(:)) ; 62 | dfmi(i) = min(x(:)) ; 63 | dfmx(i) = max(x(:)) ; 64 | end 65 | if strcmp(ly.type, 'conv') && numel(res(i).dzdw{2}) > 0 66 | x = gather(res(i).dzdw{2}) ; 67 | dbmu(i) = mean(x(:)) ; 68 | dbmi(i) = min(x(:)) ; 69 | dbmx(i) = max(x(:)) ; 70 | end 71 | end 72 | end 73 | 74 | if nargin > 1 75 | np = 6 ; 76 | else 77 | np = 2 ; 78 | end 79 | 80 | clf ; subplot(np,1,1) ; 81 | errorbar(1:n, fmu, fmi, fmx, 'bo') ; 82 | grid on ; 83 | xlabel('layer') ; 84 | ylabel('filters') ; 85 | title('coefficient ranges') ; 86 | 87 | subplot(np,1,2) ; 88 | errorbar(1:n, bmu, bmi, bmx, 'bo') ; 89 | grid on ; 90 | xlabel('layer') ; 91 | ylabel('biases') ; 92 | 93 | if nargin > 1 94 | subplot(np,1,3) ; 95 | errorbar(1:n, xmu, xmi, xmx, 'bo') ; 96 | grid on ; 97 | xlabel('layer') ; 98 | ylabel('x') ; 99 | 100 | subplot(np,1,4) ; 101 | errorbar(1:n, dxmu, dxmi, dxmx, 'bo') ; 102 | grid on ; 103 | xlabel('layer') ; 104 | ylabel('dzdx') ; 105 | 106 | subplot(np,1,5) ; 107 | errorbar(1:n, dfmu, dfmi, dfmx, 'bo') ; 108 | grid on ; 109 | xlabel('layer') ; 110 | ylabel('dfilters') ; 111 | 112 | subplot(np,1,6) ; 113 | errorbar(1:n, dbmu, dbmi, dbmx, 'bo') ; 114 | grid on ; 115 | xlabel('layer') ; 116 | ylabel('dbiases') ; 117 | end 118 | 119 | 120 | drawnow ; 121 | -------------------------------------------------------------------------------- /fcnn.m: -------------------------------------------------------------------------------- 1 | function y1bag = fcnn(dataset) 2 | addpath('fcnn') 3 | k = strfind(dataset, '.mat'); 4 | if isempty(k) 5 | opts.dataDir = ['data/' dataset]; 6 | opts.expDir = ['data/' dataset '/']; 7 | opts.imdbPath = fullfile(opts.dataDir, 'imdb.mat'); 8 | else 9 | opts.imdbPath = dataset; 10 | end 11 | opts.train.batchSize = 12 ; % for dataset S123_label18 12 | % opts.train.batchSize = 20 ; 13 | opts.train.numEpochs =8; 14 | % opts.train.numEpochs =32; 15 | opts.train.continue = true ; 16 | opts.train.useGpu = false ; 17 | % opts.train.learningRate = 0.001 ; 18 | opts.train.learningRate = [0.01*ones(1, 3) 0.001*ones(1, 25) 0.0001*ones(1,15)] ; 19 | % opts.train.expDir = opts.expDir ; 20 | opts.train.outputfea = 'true'; 21 | 22 | % -------------------------------------------------------------------- 23 | % Prepare data 24 | % -------------------------------------------------------------------- 25 | 26 | if exist(opts.imdbPath) 27 | imdb = load(opts.imdbPath) ; 28 | else 29 | error('no datafile') 30 | end 31 | c = length(unique(imdb.images.labels)); 32 | d = size(imdb.images.data,2); 33 | % Define a network similar to LeNet 34 | f=1/100 ; 35 | 36 | net.layers = {} ; 37 | net.layers{end+1} = struct('type', 'conv', ... 38 | 'filters', f*randn(5,1,1,50, 'single'), ... 39 | 'biases', zeros(1, 50, 'single'), ... 40 | 'stride',1, ... 41 | 'pad', 0) ; 42 | net.layers{end+1} = struct('type', 'relu') ; 43 | net.layers{end+1} = struct('type', 'pool', ... 44 | 'method', 'max', ... 45 | 'pool', [4 1], ... 46 | 'stride', [2 1], ... 47 | 'pad', 0) ; 48 | net.layers{end+1} = struct('type', 'normalize', ... 49 | 'param', [5 1 0.0001/5 0.75]) ; 50 | 51 | 52 | 53 | net.layers{end+1} = struct('type', 'conv', ... 54 | 'filters', f*randn(5,1,50,40, 'single'),... 55 | 'biases', zeros(1,40,'single'), ... 56 | 'stride', 1, ... 57 | 'pad', 0) ; 58 | net.layers{end+1} = struct('type', 'relu') ; 59 | net.layers{end+1} = struct('type', 'pool', ... 60 | 'method', 'max', ... 61 | 'pool', [4 1], ... 62 | 'stride', [2 1], ... 63 | 'pad', 0) ; 64 | net.layers{end+1} = struct('type', 'normalize', ... 65 | 'param', [5 1 0.0001/5 0.75]) ; 66 | 67 | 68 | 69 | net.layers{end+1} = struct('type', 'conv', ... 70 | 'filters', f*randn(3,1,40,20, 'single'),... 71 | 'biases', zeros(1,20,'single'), ... 72 | 'stride', 1, ... 73 | 'pad', 0) ; 74 | net.layers{end+1} = struct('type', 'relu') ; 75 | net.layers{end+1} = struct('type', 'normalize', ... 76 | 'param', [5 1 0.0001/5 0.75]) ; 77 | 78 | net.layers{end+1} = struct('type', 'conv', ... 79 | 'filters', f*randn(1,d,20,400, 'single'),... 80 | 'biases', zeros(1,400,'single'), ... 81 | 'stride', 1, ... 82 | 'pad', 0) ; 83 | net.layers{end+1} = struct('type', 'relu') ; 84 | net.layers{end+1} = struct('type', 'normalize', ... 85 | 'param', [5 1 0.0001/5 0.75]) ; 86 | 87 | net.layers{end+1} = struct('type', 'conv', ... 88 | 'filters', f*randn(1,1,400,18, 'single'),... 89 | 'biases', zeros(1,18,'single'), ... 90 | 'stride', 1, ... 91 | 'pad', 0) ; 92 | net.layers{end+1} = struct('type', 'softmaxloss') ; 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | % MAKE SURE the last layer's size is [1 1 X N] 101 | % -------------------------------------------------------------------- 102 | % Train 103 | % -------------------------------------------------------------------- 104 | 105 | % Take the mean out and make GPU if needed 106 | imdb.images.data = bsxfun(@minus, imdb.images.data, mean(imdb.images.data,4)) ; 107 | if opts.train.useGpu 108 | imdb.images.data = gpuArray(imdb.images.data) ; 109 | end 110 | 111 | [net,info, y1bag] = cnn_train(net, imdb, @getBatch, ...50 112 | opts.train, ... 113 | 'val', find(imdb.images.set == 3)) ; 114 | save('~predictions','y1bag','-v7.3'); 115 | 116 | rmpath('fcnn') 117 | 118 | delete(['data/expnet-epoch*.mat']); 119 | 120 | % -------------------------------------------------------------------- 121 | function [im, labels] = getBatch(imdb, batch) 122 | % -------------------------------------------------------------------- 123 | im = imdb.images.data(:,:,:,batch) ; 124 | labels = imdb.images.labels(1,batch) ; -------------------------------------------------------------------------------- /fcnn/vl_simplenn_display.m: -------------------------------------------------------------------------------- 1 | function vl_simplenn_display(net, res) 2 | % VL_SIMPLENN_DISPLAY Simple CNN statistics 3 | % VL_SIMPLENN_DISPLAY(NET) prints statistics about the network NET. 4 | 5 | % Copyright (C) 2014 Andrea Vedaldi. 6 | % All rights reserved. 7 | % 8 | % This file is part of the VLFeat library and is made available under 9 | % the terms of the BSD license (see the COPYING file). 10 | 11 | fields={'layer', 'type', 'support', 'stride', 'pad', 'dim', 'fdim', 'field', 'mem'}; 12 | if nargin > 1 13 | fields = {fields{:}, 'xwhd', 'xmem', 'dxmem'} ; 14 | end 15 | 16 | for w=fields 17 | switch char(w) 18 | case 'type', s = 'type' ; 19 | case 'stride', s = 'stride' ; 20 | case 'padding', s = 'pad' ; 21 | case 'field', s = 'rec. field' ; 22 | case 'dim', s = 'out dim' ; 23 | case 'fdim', s = 'filt dim' ; 24 | case 'mem', s = 'c/g net KB' ; 25 | case 'xwhd', s = 'x w/h/d' ; 26 | case 'xmem', s = 'c/g x MB' ; 27 | case 'dxmem', s = 'c/g dx MB' ; 28 | otherwise, s = char(w) ; 29 | end 30 | fprintf('%10s',s) ; 31 | for l=1:numel(net.layers) 32 | ly=net.layers{l} ; 33 | switch char(w) 34 | case 'layer', s=sprintf('%d', l) ; 35 | case 'type' 36 | switch ly.type 37 | case 'normalize', s='nrm'; 38 | case 'pool', if strcmpi(ly.method,'avg'), s='apool'; else s='mpool'; end 39 | case 'conv', s='cnv' ; 40 | case 'softmax', s='sftm' ; 41 | case 'loss', s='lloss' ; 42 | case 'softmaxloss', 'sftml' ; 43 | otherwise s=ly.type ; 44 | end 45 | case 'support' 46 | switch ly.type 47 | case 'conv', support(1:2,l) = max([size(ly.filters,1) ; size(ly.filters,2)],1) ; 48 | case 'pool', support(1:2,l) = ly.pool(:) ; 49 | otherwise, support(1:2,l) = [1;1] ; 50 | end 51 | s=sprintf('%dx%d', support(1,l), support(2,l)) ; 52 | case 'fdim' 53 | switch ly.type 54 | case 'conv' 55 | filterDimension(l) = size(ly.filters,3) ; 56 | s=sprintf('%d', filterDimension(l)) ; 57 | otherwise 58 | filterDimension(l) = 0 ; 59 | s='n/a' ; 60 | end 61 | case 'stride' 62 | switch ly.type 63 | case {'conv', 'pool'} 64 | if numel(ly.stride) == 1 65 | stride(1:2,l) = ly.stride ; 66 | else 67 | stride(1:2,l) = ly.stride(:) ; 68 | end 69 | otherwise, stride(1:2,l)=1 ; 70 | end 71 | if all(stride(:,l)==stride(1,l)) 72 | s=sprintf('%d', stride(1,l)) ; 73 | else 74 | s=sprintf('%dx%d', stride(1,l), stride(2,l)) ; 75 | end 76 | case 'pad' 77 | switch ly.type 78 | case {'conv', 'pool'} 79 | if numel(ly.pad) == 1 80 | pad(1:4,l) = ly.pad ; 81 | else 82 | pad(1:4,l) = ly.pad(:) ; 83 | end 84 | otherwise, pad(1:4,l)=0 ; 85 | end 86 | if all(pad(:,l)==pad(1,l)) 87 | s=sprintf('%d', pad(1,l)) ; 88 | else 89 | s=sprintf('%d,%dx%d,%d', pad(1,l), pad(2,l), pad(3,l), pad(4,l)) ; 90 | end 91 | case 'field' 92 | for i=1:2 93 | field(i,l) = sum(cumprod([1 stride(i,1:l-1)]).*(support(i,1:l)-1))+1 ; 94 | end 95 | if all(field(:,l)==field(1,l)) 96 | s=sprintf('%d', field(1,l)) ; 97 | else 98 | s=sprintf('%dx%d', field(1,l), field(2,l)) ; 99 | end 100 | case 'mem' 101 | [a,b] = xmem(ly) ; 102 | mem(1:2,l) = [a;b] ; 103 | s=sprintf('%.0f/%.0f', a/1024, b/1024) ; 104 | case 'dim' 105 | switch ly.type 106 | case 'conv', dimension(1,l) = size(ly.filters,4) ; 107 | otherwise 108 | if l > 1 109 | dimension(1,l) = dimension(1,l-1) ; 110 | end 111 | end 112 | s=sprintf('%d', dimension(1,l)) ; 113 | case 'xwhd' 114 | sz=size(res(l+1).x) ; 115 | s=sprintf('%dx%dx%d%d', sz(1), sz(2), sz(3), sz(4)') 116 | case 'xmem' 117 | [a,b]=xmem(res(l+1).x) ; 118 | rmem(1:2,l) = [a;b] ; 119 | s=sprintf('%.0f/%.0f', a/1024^2, b/1024^2) ; 120 | case 'dxmem' 121 | [a,b]=xmem(res(l+1).dzdx) ; 122 | rmem(1:2,l) = [a;b] ; 123 | s=sprintf('%.0f/%.0f', a/1024^2, b/1024^2) ; 124 | end 125 | fprintf('|%7s', s) ; 126 | end 127 | fprintf('|\n') ; 128 | end 129 | [a,b] = xmem(net) ; 130 | fprintf('total network CPU/GPU memory: %.1f/%1.f MB\n', a/1024^2, b/1024^2) ; 131 | if nargin > 1 132 | [a,b] = xmem(res) ; 133 | fprintf('total result CPU/GPU memory: %.1f/%1.f MB\n', a/1024^2, b/1024^2) ; 134 | end 135 | 136 | % ------------------------------------------------------------------------- 137 | function [cpuMem,gpuMem] = xmem(s, cpuMem, gpuMem) 138 | % ------------------------------------------------------------------------- 139 | if nargin <= 1 140 | cpuMem = 0 ; 141 | gpuMem = 0 ; 142 | end 143 | if isstruct(s) 144 | for f=fieldnames(s)' 145 | f = char(f) ; 146 | for i=1:numel(s) 147 | [cpuMem,gpuMem] = xmem(s(i).(f), cpuMem, gpuMem) ; 148 | end 149 | end 150 | elseif iscell(s) 151 | for i=1:numel(s) 152 | [cpuMem,gpuMem] = xmem(s{i}, cpuMem, gpuMem) ; 153 | end 154 | elseif isnumeric(s) 155 | if isa(s, 'single') 156 | mult = 4 ; 157 | else 158 | mult = 8 ; 159 | end 160 | if isa(s,'gpuArray') 161 | gpuMem = gpuMem + mult * numel(s) ; 162 | else 163 | cpuMem = cpuMem + mult * numel(s) ; 164 | end 165 | end 166 | 167 | 168 | -------------------------------------------------------------------------------- /Results_statistics.m: -------------------------------------------------------------------------------- 1 | function [Accuracy, Avg_F_measure, Norm_F] = Results_statistics (label, Predicted) 2 | 3 | error = 0; 4 | for i=1: length(label) 5 | if label(i) ~= Predicted(i) 6 | error = error+1; 7 | end; 8 | end; 9 | error_rate = error/length(label); 10 | Accuracy = 1 - error_rate; 11 | 12 | [uniques,numUnique] = count_unique(label); 13 | [uniques_pre,numUnique_Pre] = count_unique(Predicted); 14 | 15 | if length(uniques) > length(uniques_pre) 16 | disp('Predicted has less number of Classes '); 17 | end; 18 | if length(uniques) < length(uniques_pre) 19 | disp('Predicted has more number of Classes '); 20 | end; 21 | Avg_accuracy = 0; 22 | Avg_F_measure = 0; 23 | Accuracies = zeros(length(uniques),1); 24 | F_measures = zeros(length(uniques),1); 25 | for i =1: length(uniques) 26 | 27 | label_index = find(label==uniques(i)); %actual label 28 | Pre_index = find(Predicted==uniques(i)); %predicted label 29 | 30 | True_possitive = length( intersect(label_index,Pre_index) ); 31 | False_Negative = length(label_index)-True_possitive; 32 | False_Positive = length( setdiff(Pre_index, label_index) ); 33 | 34 | Precision = True_possitive/length(Pre_index); 35 | Recall = True_possitive/length(label_index); 36 | 37 | F_Measure = 2*Precision*Recall/(Precision+Recall); 38 | if isnan(F_Measure) 39 | F_Measure = 0; 40 | end; 41 | 42 | accuracy = True_possitive/length(label_index); 43 | Avg_accuracy = Avg_accuracy + accuracy; 44 | Avg_F_measure = Avg_F_measure + F_Measure; 45 | 46 | Accuracies(i) = accuracy; 47 | F_measures(i) = F_Measure; 48 | 49 | class_errors{i}.label = uniques(i); 50 | class_errors{i}.instance_no = length(label_index); 51 | class_errors{i}.True_possitive = True_possitive; 52 | class_errors{i}.False_Negative = False_Negative; 53 | class_errors{i}.False_Positive = False_Positive; 54 | class_errors{i}.Precision = Precision; 55 | class_errors{i}.Recall = Recall; 56 | class_errors{i}.accuracy = accuracy; 57 | class_errors{i}.F_Measure = F_Measure; 58 | end; 59 | 60 | Avg_accuracy = Avg_accuracy/length(uniques); 61 | Avg_F_measure = Avg_F_measure /length(uniques); 62 | 63 | a = F_measures.*numUnique; 64 | Norm_F = sum(a)/sum(numUnique); 65 | 66 | %% compute confusion matrix 67 | [label_uni,label_inst] = count_unique(label); 68 | [pre_uni,pre_inst] = count_unique(Predicted); 69 | Confus_matrix = zeros(length(label_uni),length(pre_uni)); 70 | for i=1: length(label_uni) 71 | for j=1: length(pre_uni) 72 | label_Index = find(label ==label_uni(i)); 73 | pre_Index = find(Predicted ==label_uni(j)); 74 | temp = length( intersect(label_Index,pre_Index)); 75 | Confus_matrix(i,j)= temp/label_inst(i)*100; 76 | 77 | end; 78 | end; 79 | 80 | 81 | 82 | end 83 | 84 | 85 | function [uniques,numUnique] = count_unique(x,option) 86 | %COUNT_UNIQUE Determines unique values, and counts occurrences 87 | % [uniques,numUnique] = count_unique(x) 88 | % 89 | % This function determines unique values of an array, and also counts the 90 | % number of instances of those values. 91 | % 92 | % This uses the MATLAB builtin function accumarray, and is faster than 93 | % MATLAB's unique function for intermediate to large sizes of arrays for integer values. 94 | % Unlike 'unique' it cannot be used to determine if rows are unique or 95 | % operate on cell arrays. 96 | % 97 | % If float values are passed, it uses MATLAB's logic builtin unique function to 98 | % determine unique values, and then to count instances. 99 | % 100 | % Descriptions of Input Variables: 101 | % x: Input vector or matrix, N-D. Must be a type acceptable to 102 | % accumarray, numeric, logical, char, scalar, or cell array of 103 | % strings. 104 | % option: Acceptable values currently only 'float'. If 'float' is 105 | % specified, the input x vector will be treated as containing 106 | % decimal values, regardless of whether it is a float array type. 107 | % 108 | % Descriptions of Output Variables: 109 | % uniques: sorted unique values 110 | % numUnique: number of instances of each unique value 111 | % 112 | % Example(s): 113 | % >> [uniques] = count_unique(largeArray); 114 | % >> [uniques,numUnique] = count_unique(largeArray); 115 | % 116 | % See also: unique, accumarray 117 | 118 | % Author: Anthony Kendall 119 | % Contact: anthony [dot] kendall [at] gmail [dot] com 120 | % Created: 2009-03-17 121 | 122 | testFloat = false; 123 | if nargin == 2 && strcmpi(option,'float') 124 | testFloat = true; 125 | end 126 | 127 | nOut = nargout; 128 | if testFloat 129 | if nOut < 2 130 | [uniques] = float_cell_unique(x,nOut); 131 | else 132 | [uniques,numUnique] = float_cell_unique(x,nOut); 133 | end 134 | else 135 | try %this will fail if the array is float or cell 136 | if nOut < 2 137 | [uniques] = int_log_unique(x,nOut); 138 | else 139 | [uniques,numUnique] = int_log_unique(x,nOut); 140 | end 141 | catch %default to standard approach 142 | if nOut < 2 143 | [uniques] = float_cell_unique(x,nOut); 144 | else 145 | [uniques,numUnique] = float_cell_unique(x,nOut); 146 | end 147 | end 148 | end 149 | 150 | end 151 | 152 | function [uniques,numUnique] = int_log_unique(x,nOut) 153 | %First, determine the offset for negative values 154 | minVal = min(x(:)); 155 | 156 | %Check to see if accumarray is appropriate for this function 157 | maxIndex = max(x(:)) - minVal + 1; 158 | if maxIndex / numel(x) > 1000 159 | error('Accumarray is inefficient for arrays when ind values are >> than the number of elements') 160 | end 161 | 162 | %Now, offset to get the index 163 | index = x(:) - minVal + 1; 164 | 165 | %Count the occurrences of each index value 166 | numUnique = accumarray(index,1); 167 | 168 | %Get the values which occur more than once 169 | uniqueInd = (1:length(numUnique))'; 170 | uniques = uniqueInd(numUnique>0) + minVal - 1; 171 | 172 | if nOut == 2 173 | %Trim the numUnique array 174 | numUnique = numUnique(numUnique>0); 175 | end 176 | end 177 | 178 | function [uniques,numUnique] = float_cell_unique(x,nOut) 179 | 180 | if ~iscell(x) 181 | %First, sort the input vector 182 | x = sort(x(:)); 183 | numelX = numel(x); 184 | 185 | %Check to see if the array type needs to be converted to double 186 | currClass = class(x); 187 | isdouble = strcmp(currClass,'double'); 188 | 189 | if ~isdouble 190 | x = double(x); 191 | end 192 | 193 | %Check to see if there are any NaNs or Infs, sort returns these either at 194 | %the beginning or end of an array 195 | if isnan(x(1)) || isinf(x(1)) || isnan(x(numelX)) || isinf(x(numelX)) 196 | %Check to see if the array contains nans or infs 197 | xnan = isnan(x); 198 | xinf = isinf(x); 199 | testRep = xnan | xinf; 200 | 201 | %Remove all of these from the array 202 | x = x(~testRep); 203 | end 204 | 205 | %Determine break locations of unique values 206 | uniqueLocs = [true;diff(x) ~= 0]; 207 | else 208 | isdouble = true; %just to avoid conversion on finish 209 | 210 | %Sort the rows of the cell array 211 | x = sort(x(:)); 212 | 213 | %Determine unique location values 214 | uniqueLocs = [true;~strcmp(x(1:end-1),x(2:end)) ~= 0] ; 215 | end 216 | 217 | %Determine the unique values 218 | uniques = x(uniqueLocs); 219 | 220 | if ~isdouble 221 | x = feval(currClass,x); 222 | end 223 | 224 | %Count the number of duplicate values 225 | if nOut == 2 226 | numUnique = diff([find(uniqueLocs);length(x)+1]); 227 | end 228 | end 229 | -------------------------------------------------------------------------------- /fcnn/vl_simplenn.m: -------------------------------------------------------------------------------- 1 | function res = vl_simplenn(net, x, dzdy, res, varargin) 2 | % VL_SIMPLENN Evaluates a simple CNN 3 | % RES = VL_SIMPLENN(NET, X) evaluates the convnet NET on data X. 4 | % RES = VL_SIMPLENN(NET, X, DZDY) evaluates the convnent NET and its 5 | % derivative on data X and output derivative DZDY. 6 | % 7 | % The network has a simple (linear) topology, i.e. the computational 8 | % blocks are arranged in a sequence of layers. Please note that 9 | % there is no need to use this wrapper, which is provided for 10 | % convenience. Instead, the individual CNN computational blocks can 11 | % be evaluated directly, making it possible to create significantly 12 | % more complex topologies, and in general allowing greater 13 | % flexibility. 14 | % 15 | % The NET structure contains two fields: 16 | % 17 | % - net.layers: the CNN layers. 18 | % - net.normalization: information on how to normalize input data. 19 | % 20 | % The network expects the data X to be already normalized. This 21 | % usually involves rescaling the input image(s) and subtracting a 22 | % mean. 23 | % 24 | % RES is a structure array with one element per network layer plus 25 | % one representing the input. So RES(1) refers to the zeroth-layer 26 | % (input), RES(2) refers to the first layer, etc. Each entry has 27 | % fields: 28 | % 29 | % - res(i+1).x: the output of layer i. Hence res(1).x is the network 30 | % input. 31 | % 32 | % - res(i+1).aux: auxiliary output data of layer i. For example, 33 | % dropout uses this field to store the dropout mask. 34 | % 35 | % - res(i+1).dzdx: the derivative of the network output relative to 36 | % variable res(i+1).x, i.e. the output of layer i. In particular 37 | % res(1).dzdx is the derivative of the network output with respect 38 | % to the network input. 39 | % 40 | % - res(i+1).dzdw: the derivative of the network output relative to 41 | % the parameters of layer i. It can be a cell array for multiple 42 | % parameters. 43 | % 44 | % net.layers is a cell array of network layers. The following 45 | % layers, encapsulating corresponding functions in the toolbox, are 46 | % supported: 47 | % 48 | % Convolutional layer:: 49 | % The convolutional layer wraps VL_NNCONV(). It has fields: 50 | % 51 | % - layer.type = 'conv' 52 | % - layer.filters: the filters. 53 | % - layer.biases: the biases. 54 | % - layer.stride: the sampling stride (usually 1). 55 | % - layer.padding: the padding (usually 0). 56 | % 57 | % Max pooling layer:: 58 | % The max pooling layer wraps VL_NNPOOL(). It has fields: 59 | % 60 | % - layer.type = 'pool' 61 | % - layer.method: pooling method ('max' or 'avg'). 62 | % - layer.pool: the pooling size. 63 | % - layer.stride: the sampling stride (usually 1). 64 | % - layer.padding: the padding (usually 0). 65 | % 66 | % Normalization layer:: 67 | % The normalization layer wraps VL_NNNORMALIZE(). It has fields 68 | % 69 | % - layer.type = 'normalize' 70 | % - layer.param: the normalization parameters. 71 | % 72 | % ReLU layer:: 73 | % The ReLU layer wraps VL_NNRELU(). It has fields: 74 | % 75 | % - layer.type = 'relu' 76 | % 77 | % Dropout layer:: 78 | % The dropout layer wraps VL_NNDROPOUT(). It has fields: 79 | % 80 | % - layer.type = 'dropout' 81 | % - layer.rate: the dropout rate. 82 | % 83 | % Softmax layer:: 84 | % The softmax layer wraps VL_NNSOFTMAX(). It has fields 85 | % 86 | % - layer.type = 'softmax' 87 | % 88 | % Log-loss layer:: 89 | % The log-loss layer wraps VL_NNLOSS(). It has fields: 90 | % 91 | % - layer.type = 'loss' 92 | % - layer.class: the ground-truth class. 93 | % 94 | % Softmax-log-loss layer:: 95 | % The softmax-log-loss layer wraps VL_NNSOFTMAXLOSS(). It has 96 | % fields: 97 | % 98 | % - layer.type = 'softmaxloss' 99 | % - layer.class: the ground-truth class. 100 | % 101 | % Custom layer:: 102 | % This can be used to specify custom layers. 103 | % 104 | % - layer.type = 'custom' 105 | % - layer.forward: a function handle computing the block. 106 | % - layer.backward: a function handle computing the block derivative. 107 | % 108 | % The first function is called as res(i+1) = forward(layer, res(i), res(i+1)) 109 | % where res() is the struct array specified before. The second function is 110 | % called as res(i) = backward(layer, res(i), res(i+1)). Note that the 111 | % `layer` structure can contain additional fields if needed. 112 | 113 | 114 | % Copyright (C) 2014 Andrea Vedaldi. 115 | % All rights reserved. 116 | % 117 | % This file is part of the VLFeat library and is made available under 118 | % the terms of the BSD license (see the COPYING file). 119 | 120 | opts.res = [] ; 121 | opts.conserveMemory = false ; 122 | opts.sync = false ; 123 | opts.disableDropout = false ; 124 | opts.freezeDropout = false ; 125 | opts = vl_argparse(opts, varargin); 126 | 127 | n = numel(net.layers) ; 128 | 129 | if (nargin <= 2) || isempty(dzdy) 130 | doder = false ; 131 | else 132 | doder = true ; 133 | end 134 | 135 | gpuMode = isa(x, 'gpuArray') ; 136 | 137 | if nargin <= 3 || isempty(res) 138 | res = struct(... 139 | 'x', cell(1,n+1), ... 140 | 'dzdx', cell(1,n+1), ... 141 | 'dzdw', cell(1,n+1), ... 142 | 'aux', cell(1,n+1), ... 143 | 'time', num2cell(zeros(1,n+1)), ... 144 | 'backwardTime', num2cell(zeros(1,n+1))) ; 145 | end 146 | res(1).x = x ; 147 | 148 | for i=1:n 149 | l = net.layers{i} ; 150 | res(i).time = tic ; 151 | switch l.type 152 | case 'conv' 153 | res(i+1).x = vl_nnconv(res(i).x, l.filters, l.biases, 'pad', l.pad, 'stride', l.stride) ; 154 | case 'pool' 155 | res(i+1).x = vl_nnpool(res(i).x, l.pool, 'pad', l.pad, 'stride', l.stride, 'method', l.method) ; 156 | case 'normalize' 157 | res(i+1).x = vl_nnnormalize(res(i).x, l.param) ; 158 | case 'softmax' 159 | res(i+1).x = vl_nnsoftmax(res(i).x) ; 160 | case 'loss' 161 | res(i+1).x = vl_nnloss(res(i).x, l.class) ; 162 | case 'softmaxloss' 163 | res(i+1).x = vl_nnsoftmaxloss(res(i).x, l.class) ; 164 | case 'relu' 165 | res(i+1).x = vl_nnrelu(res(i).x) ; 166 | case 'noffset' 167 | res(i+1).x = vl_nnnoffset(res(i).x, l.param) ; 168 | case 'dropout' 169 | if opts.disableDropout 170 | res(i+1).x = res(i).x ; 171 | elseif opts.freezeDropout 172 | [res(i+1).x, res(i+1).aux] = vl_nndropout(res(i).x, 'rate', l.rate, 'mask', res(i+1).aux) ; 173 | else 174 | [res(i+1).x, res(i+1).aux] = vl_nndropout(res(i).x, 'rate', l.rate) ; 175 | end 176 | case 'custom' 177 | res(i+1) = l.forward(l, res(i), res(i+1)) ; 178 | otherwise 179 | error('Unknown layer type %s', l.type) ; 180 | end 181 | if opts.conserveMemory & ~doder & i < numel(net.layers) - 1 182 | % TODO: forget unnecesary intermediate computations even when 183 | % derivatives are required 184 | res(i).x = [] ; 185 | end 186 | if gpuMode & opts.sync 187 | % This should make things slower, but on MATLAB 2014a it is necessary 188 | % for any decent performance. 189 | wait(gpuDevice) ; 190 | end 191 | res(i).time = toc(res(i).time) ; 192 | % if i == n 193 | % fprintf('\n layer %d: size = [%d,%d,%d,%d]\n',i,size(res(i).x)) 194 | % else 195 | % fprintf('\n layer %d: size = [%d,%d,%d,%d]\n',i,size(res(i+1).x)) 196 | % end 197 | end 198 | 199 | if doder 200 | res(n+1).dzdx = dzdy ; 201 | for i=n:-1:1 202 | l = net.layers{i} ; 203 | res(i).backwardTime = tic ; 204 | switch l.type 205 | case 'conv' 206 | [res(i).dzdx, res(i).dzdw{1}, res(i).dzdw{2}] = ... 207 | vl_nnconv(res(i).x, l.filters, l.biases, ... 208 | res(i+1).dzdx, ... 209 | 'pad', l.pad, 'stride', l.stride) ; 210 | case 'pool' 211 | res(i).dzdx = vl_nnpool(res(i).x, l.pool, res(i+1).dzdx, ... 212 | 'pad', l.pad, 'stride', l.stride, 'method', l.method) ; 213 | case 'normalize' 214 | res(i).dzdx = vl_nnnormalize(res(i).x, l.param, res(i+1).dzdx) ; 215 | case 'softmax' 216 | res(i).dzdx = vl_nnsoftmax(res(i).x, res(i+1).dzdx) ; 217 | case 'loss' 218 | res(i).dzdx = vl_nnloss(res(i).x, l.class, res(i+1).dzdx) ; 219 | case 'softmaxloss' 220 | res(i).dzdx = vl_nnsoftmaxloss(res(i).x, l.class, res(i+1).dzdx) ; 221 | case 'relu' 222 | res(i).dzdx = vl_nnrelu(res(i).x, res(i+1).dzdx) ; 223 | case 'noffset' 224 | res(i).dzdx = vl_nnnoffset(res(i).x, l.param, res(i+1).dzdx) ; 225 | case 'dropout' 226 | if opts.disableDropout 227 | res(i).dzdx = res(i+1).dzdx ; 228 | else 229 | res(i).dzdx = vl_nndropout(res(i).x, res(i+1).dzdx, 'mask', res(i+1).aux) ; 230 | end 231 | case 'custom' 232 | res(i) = l.backward(l, res(i), res(i+1)) ; 233 | end 234 | if opts.conserveMemory 235 | res(i+1).dzdx = [] ; 236 | end 237 | if gpuMode & opts.sync 238 | wait(gpuDevice) ; 239 | end 240 | res(i).backwardTime = toc(res(i).backwardTime) ; 241 | end 242 | end 243 | -------------------------------------------------------------------------------- /cnn_train.m: -------------------------------------------------------------------------------- 1 | function [net, info, predictions] = cnn_train(net, imdb, getBatch, varargin) 2 | % CNN_TRAIN Demonstrates training a CNN 3 | % CNN_TRAIN() is an example learner implementing stochastic gradient 4 | % descent with momentum to train a CNN for image classification. 5 | % It can be used with different datasets by providing a suitable 6 | % getBatch function. 7 | 8 | opts.train = [] ; 9 | opts.val = [] ; 10 | opts.numEpochs = 300 ; 11 | opts.batchSize = 256 ; 12 | opts.useGpu = false ; 13 | opts.learningRate = 0.001 ; 14 | opts.continue = false ; 15 | opts.expDir = 'data/exp' ; 16 | opts.conserveMemory = false ; 17 | opts.sync = true ; 18 | opts.prefetch = false ; 19 | opts.weightDecay = 0.0005 ; 20 | opts.momentum = 0.9 ; 21 | opts.errorType = 'multiclass' ; 22 | opts.plotDiagnostics = false ; 23 | opts.outputfea = []; 24 | opts = vl_argparse(opts, varargin) ; 25 | 26 | if ~exist(opts.expDir), mkdir(opts.expDir) ; end 27 | if isempty(opts.train), opts.train = find(imdb.images.set==1) ; end 28 | if isempty(opts.val), opts.val = find(imdb.images.set==2) ; end 29 | if isnan(opts.train), opts.train = [] ; end 30 | if opts.outputfea, xtrn = []; xtst = []; ytrn = []; ytst = []; xtrn = single(xtrn); xtst = single(xtst); end 31 | % opts.val = []; 32 | % ------------------------------------------------------------------------- 33 | % Network initialization 34 | % ------------------------------------------------------------------------- 35 | 36 | for i=1:numel(net.layers) 37 | if ~strcmp(net.layers{i}.type,'conv'), continue; end 38 | net.layers{i}.filtersMomentum = zeros('like',net.layers{i}.filters) ; 39 | net.layers{i}.biasesMomentum = zeros('like',net.layers{i}.biases) ; 40 | if ~isfield(net.layers{i}, 'filtersLearningRate') 41 | net.layers{i}.filtersLearningRate = 1 ; 42 | end 43 | if ~isfield(net.layers{i}, 'biasesLearningRate') 44 | net.layers{i}.biasesLearningRate = 1 ; 45 | end 46 | if ~isfield(net.layers{i}, 'filtersWeightDecay') 47 | net.layers{i}.filtersWeightDecay = 1 ; 48 | end 49 | if ~isfield(net.layers{i}, 'biasesWeightDecay') 50 | net.layers{i}.biasesWeightDecay = 1 ; 51 | end 52 | end 53 | 54 | if opts.useGpu 55 | net = vl_simplenn_move(net, 'gpu') ; 56 | for i=1:numel(net.layers) 57 | if ~strcmp(net.layers{i}.type,'conv'), continue; end 58 | net.layers{i}.filtersMomentum = gpuArray(net.layers{i}.filtersMomentum) ; 59 | net.layers{i}.biasesMomentum = gpuArray(net.layers{i}.biasesMomentum) ; 60 | end 61 | end 62 | 63 | % ------------------------------------------------------------------------- 64 | % Train and validate 65 | % ------------------------------------------------------------------------- 66 | 67 | rng(0) ; 68 | 69 | if opts.useGpu 70 | one = gpuArray(single(1)) ; 71 | else 72 | one = single(1) ; 73 | end 74 | 75 | info.train.objective = [] ; 76 | info.train.error = [] ; 77 | info.train.topFiveError = [] ; 78 | info.train.speed = [] ; 79 | info.val.objective = [] ; 80 | info.val.error = [] ; 81 | info.val.topFiveError = [] ; 82 | info.val.speed = [] ; 83 | 84 | lr = 0 ; 85 | res = [] ; 86 | tttrain = 0; 87 | for epoch=1:opts.numEpochs 88 | tt1 = cputime; 89 | % fprintf('--------------- epoch %d -----------\n',epoch); 90 | prevLr = lr ; 91 | lr = opts.learningRate(min(epoch, numel(opts.learningRate))) ; 92 | 93 | % fast-forward to where we stopped 94 | modelPath = [opts.expDir, 'net-epoch-%d.mat'] ; 95 | modelFigPath = fullfile(opts.expDir, 'net-train.pdf') ; 96 | if opts.continue 97 | if exist(sprintf(modelPath, epoch),'file'), continue ; end 98 | if epoch > 1 99 | fprintf('\n resuming by loading epoch %d\n', epoch-1) ; 100 | load(sprintf(modelPath, epoch-1), 'net', 'info') ; 101 | end 102 | end 103 | 104 | train = opts.train(randperm(numel(opts.train))) ; 105 | val = opts.val ; 106 | % train(end) = 92; 107 | 108 | info.train.objective(end+1) = 0 ; 109 | info.train.error(end+1) = 0 ; 110 | info.train.topFiveError(end+1) = 0 ; 111 | info.train.speed(end+1) = 0 ; 112 | info.val.objective(end+1) = 0 ; 113 | info.val.error(end+1) = 0 ; 114 | info.val.topFiveError(end+1) = 0 ; 115 | info.val.speed(end+1) = 0 ; 116 | 117 | % reset momentum if needed 118 | if prevLr ~= lr 119 | fprintf('learning rate changed (%f --> %f): resetting momentum\n', prevLr, lr) ; 120 | for l=1:numel(net.layers) 121 | if ~strcmp(net.layers{l}.type, 'conv'), continue ; end 122 | net.layers{l}.filtersMomentum = 0 * net.layers{l}.filtersMomentum ; 123 | net.layers{l}.biasesMomentum = 0 * net.layers{l}.biasesMomentum ; 124 | end 125 | end 126 | 127 | for t=1:opts.batchSize:numel(train) 128 | % get next image batch and labels 129 | batch = train(t:min(t+opts.batchSize-1, numel(train))) ; 130 | batch_time = tic ; 131 | % fprintf('training: epoch %02d: processing batch %3d of %3d ...', epoch, ... 132 | % fix(t/opts.batchSize)+1, ceil(numel(train)/opts.batchSize)) ; 133 | % fprintf('training: batch %3d', fix(t/opts.batchSize)+1) ; 134 | [im, labels] = getBatch(imdb, batch) ; 135 | if opts.prefetch 136 | nextBatch = train(t+opts.batchSize:min(t+2*opts.batchSize-1, numel(train))) ; 137 | getBatch(imdb, nextBatch) ; 138 | end 139 | if opts.useGpu 140 | im = gpuArray(im) ; 141 | end 142 | 143 | % backprop 144 | net.layers{end}.class = labels ; 145 | res = vl_simplenn(net, im, one, res, ... 146 | 'conserveMemory', opts.conserveMemory, ... 147 | 'sync', opts.sync) ; 148 | % jby: Save traning feature 149 | if epoch == opts.numEpochs && strcmp(opts.outputfea, 'true') 150 | xtrn = [xtrn; squeeze(res(end-2).x)']; 151 | ytrn = [ytrn; labels']; 152 | end 153 | % gradient step 154 | for l=1:numel(net.layers) 155 | if ~strcmp(net.layers{l}.type, 'conv'), continue ; end 156 | 157 | net.layers{l}.filtersMomentum = ... 158 | opts.momentum * net.layers{l}.filtersMomentum ... 159 | - (lr * net.layers{l}.filtersLearningRate) * ... 160 | (opts.weightDecay * net.layers{l}.filtersWeightDecay) * net.layers{l}.filters ... 161 | - (lr * net.layers{l}.filtersLearningRate) / numel(batch) * res(l).dzdw{1} ; 162 | 163 | net.layers{l}.biasesMomentum = ... 164 | opts.momentum * net.layers{l}.biasesMomentum ... 165 | - (lr * net.layers{l}.biasesLearningRate) * .... 166 | (opts.weightDecay * net.layers{l}.biasesWeightDecay) * net.layers{l}.biases ... 167 | - (lr * net.layers{l}.biasesLearningRate) / numel(batch) * res(l).dzdw{2} ; 168 | 169 | net.layers{l}.filters = net.layers{l}.filters + net.layers{l}.filtersMomentum ; 170 | net.layers{l}.biases = net.layers{l}.biases + net.layers{l}.biasesMomentum ; 171 | end 172 | 173 | % print information 174 | batch_time = toc(batch_time) ; 175 | speed = numel(batch)/batch_time ; 176 | info.train = updateError(opts, info.train, net, res, batch_time) ; 177 | 178 | n = t + numel(batch) - 1 ; 179 | % fprintf(' %.2f s (%.1f images/s)', batch_time, speed) ; 180 | % fprintf(' err %.1f err5 %.1f', ... 181 | % info.train.error(end)/n*100, info.train.topFiveError(end)/n*100) ; 182 | % fprintf('\n') ; 183 | 184 | % debug info 185 | if opts.plotDiagnostics 186 | figure(2) ; vl_simplenn_diagnose(net,res) ; drawnow ; 187 | end 188 | 189 | % predictions = gather(res(end-1).x) ; 190 | % switch opts.errorType 191 | % case 'multiclass' 192 | % [~,predictions] = sort(predictions, 3, 'descend') ; 193 | % sz = size(predictions); 194 | % predictions = reshape(predictions,[sz(3),sz(4)]); 195 | % predictions = predictions(1,:); 196 | % case 'binary' 197 | % predictions = predictions; 198 | % end 199 | % yclass = unique(predictions) 200 | end % next batch 201 | 202 | 203 | predictions = gather(res(end-1).x) ; 204 | switch opts.errorType 205 | case 'multiclass' 206 | [~,predictions] = sort(predictions, 3, 'descend') ; 207 | sz = size(predictions); 208 | if length(sz) < 4 209 | predictions = reshape(predictions,[sz(3),1]); 210 | else 211 | predictions = reshape(predictions,[sz(3),sz(4)]); 212 | end 213 | predictions = predictions(1,:); 214 | case 'binary' 215 | predictions = predictions; 216 | end 217 | % yclass = unique(predictions) 218 | tt2 = cputime; 219 | tttrain = tttrain + tt2 - tt1; 220 | 221 | % evaluation on validation set 222 | 223 | ypredictions = []; 224 | for t=1:opts.batchSize:numel(val)+opts.batchSize 225 | batch_time = tic ; 226 | batch = val(t:min(t+opts.batchSize-1, numel(val))) ; 227 | if ~isempty(batch) 228 | % fprintf('validation: epoch %02d: processing batch %3d of %3d ...', epoch, ... 229 | % fix(t/opts.batchSize)+1, ceil(numel(val)/opts.batchSize)) ; 230 | % fprintf('validation: batch %3d', fix(t/opts.batchSize)+1) ; 231 | [im, labels] = getBatch(imdb, batch) ; 232 | if opts.prefetch 233 | nextBatch = val(t+opts.batchSize:min(t+2*opts.batchSize-1, numel(val))) ; 234 | getBatch(imdb, nextBatch) ; 235 | end 236 | if opts.useGpu 237 | im = gpuArray(im) ; 238 | end 239 | 240 | net.layers{end}.class = labels ; 241 | res = vl_simplenn(net, im, [], res, ... 242 | 'disableDropout', true, ... 243 | 'conserveMemory', opts.conserveMemory, ... 244 | 'sync', opts.sync) ; 245 | 246 | % jby: Save testing feature 247 | predictions = gather(res(end-1).x) ; 248 | switch opts.errorType 249 | case 'multiclass' 250 | [~,predictions] = sort(predictions, 3, 'descend') ; 251 | sz = size(predictions); 252 | if length(sz) < 4 253 | predictions = reshape(predictions,[sz(3),1]); 254 | else 255 | predictions = reshape(predictions,[sz(3),sz(4)]); 256 | end 257 | predictions = predictions(1,:); 258 | case 'binary' 259 | predictions = predictions; 260 | end 261 | ypredictions = [ypredictions predictions]; 262 | if epoch == opts.numEpochs && strcmp(opts.outputfea, 'true') 263 | xtst = [xtst; squeeze(res(end-2).x)']; 264 | ytst = [ytst; labels']; 265 | end 266 | 267 | % print information 268 | batch_time = toc(batch_time) ; 269 | speed = numel(batch)/batch_time ; 270 | info.val = updateError(opts, info.val, net, res, batch_time) ; 271 | 272 | n = t + numel(batch) - 1 ; 273 | % fprintf(' %.2f s (%.1f images/s)', batch_time, speed) ; 274 | % fprintf(' err %.1f err5 %.1f', ... 275 | % info.val.error(end)/n*100, info.val.topFiveError(end)/n*100) ; 276 | % fprintf('\n') ; 277 | end 278 | end 279 | 280 | tttest = cputime - tt2; 281 | 282 | 283 | % save 284 | info.train.objective(end) = info.train.objective(end) / numel(train) ; 285 | info.train.error(end) = info.train.error(end) / numel(train) ; 286 | info.train.topFiveError(end) = info.train.topFiveError(end) / numel(train) ; 287 | info.train.speed(end) = numel(val) / info.train.speed(end) ; 288 | info.val.objective(end) = info.val.objective(end) / numel(val) ; 289 | info.val.error(end) = info.val.error(end) / numel(val) ; 290 | info.val.topFiveError(end) = info.val.topFiveError(end) / numel(val) ; 291 | info.val.speed(end) = numel(val) / info.val.speed(end) ; 292 | save(sprintf(modelPath,epoch), 'net', 'info') ; 293 | 294 | % figure(1) ; clf ; 295 | % subplot(1,2,1) ; 296 | % semilogy(1:epoch, info.train.objective, 'k') ; hold on ; 297 | % semilogy(1:epoch, info.val.objective, 'b') ; 298 | % xlabel('training epoch') ; ylabel('energy') ; 299 | % grid on ; 300 | % h=legend('train', 'val') ; 301 | % set(h,'color','none'); 302 | % title('objective') ; 303 | % subplot(1,2,2) ; 304 | % switch opts.errorType 305 | % case 'multiclass' 306 | % plot(1:epoch, info.train.error, 'k') ; hold on ; 307 | % plot(1:epoch, info.train.topFiveError, 'k--') ; 308 | % plot(1:epoch, info.val.error, 'b') ; 309 | % plot(1:epoch, info.val.topFiveError, 'b--') ; 310 | % h=legend('train','train-5','val','val-5') ; 311 | % case 'binary' 312 | % plot(1:epoch, info.train.error, 'k') ; hold on ; 313 | % plot(1:epoch, info.val.error, 'b') ; 314 | % h=legend('train','val') ; 315 | % end 316 | % grid on ; 317 | % xlabel('training epoch') ; ylabel('error') ; 318 | % set(h,'color','none') ; 319 | % title('error') ; 320 | % drawnow ; 321 | % print(1, modelFigPath, '-dpdf') ; 322 | end 323 | 324 | % val = opts.val ; 325 | % batch = val; 326 | % [im, labels] = getBatch(imdb, batch) ; 327 | % net.layers{end}.class = labels ; 328 | % res = vl_simplenn(net, im, [], res, ... 329 | % 'disableDropout', true, ... 330 | % 'conserveMemory', opts.conserveMemory, ... 331 | % 'sync', opts.sync) ; 332 | % 333 | % predictions = gather(res(end-1).x) ; 334 | % switch opts.errorType 335 | % case 'multiclass' 336 | % [~,predictions] = sort(predictions, 3, 'descend') ; 337 | % sz = size(predictions); 338 | % predictions = reshape(predictions,[sz(3),sz(4)]); 339 | % predictions = predictions(1,:); 340 | % case 'binary' 341 | % predictions = predictions; 342 | % end 343 | predictions = ypredictions; 344 | if opts.outputfea, info.xtrn = xtrn; info.xtst = xtst; info.ytrn = ytrn; info.ytst = ytst; end 345 | fprintf('\ntttrain = %d and tttest = %d\n',tttrain,tttest); 346 | % ------------------------------------------------------------------------- 347 | function info = updateError(opts, info, net, res, speed) 348 | % ------------------------------------------------------------------------- 349 | predictions = gather(res(end-1).x) ; 350 | sz = size(predictions) ; 351 | n = prod(sz(1:2)) ; 352 | 353 | labels = net.layers{end}.class ; 354 | info.objective(end) = info.objective(end) + sum(double(gather(res(end).x))) ; 355 | info.speed(end) = info.speed(end) + speed ; 356 | switch opts.errorType 357 | case 'multiclass' 358 | [~,predictions] = sort(predictions, 3, 'descend') ; 359 | error = ~bsxfun(@eq, predictions, reshape(labels, 1, 1, 1, [])) ; 360 | info.error(end) = info.error(end) +.... 361 | sum(sum(sum(error(:,:,1,:))))/n ; 362 | info.topFiveError(end) = info.topFiveError(end) + ... 363 | sum(sum(sum(min(error(:,:,1:5,:),[],3))))/n ; 364 | case 'binary' 365 | error = bsxfun(@times, predictions, labels) < 0 ; 366 | info.error(end) = info.error(end) + sum(error(:))/n ; 367 | end 368 | 369 | 370 | 371 | --------------------------------------------------------------------------------