├── MaxPooling.cpp ├── README.md ├── cnnTest.m ├── cnnTrain.m └── upsampleMax.m /MaxPooling.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * MaxPooling.c 3 | * 4 | * Implements the max-pooling transfer function. Takes a 4D tensor shaped as 5 | * (rows, cols, nchannels, nsamples) and a pooling shape as (prows, pcols) and 6 | * returns a set of max-values with the corresponding indices in the input 7 | * matrix. 8 | * 9 | * e.g. 10 | * [m, idx] = MaxPooling(IM, [2 2]) 11 | * 12 | * Created on: July 11, 2011 13 | * Author: Jonathan Masci 14 | * 15 | * This file is available under the terms of the GNU GPLv2. 16 | */ 17 | 18 | #include "mex.h" 19 | 20 | #ifdef OPENMP 21 | #include 22 | #endif 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #define IDX2F(i,j,ld) ((((j)-1)*(ld))+((i)-1)) 30 | #define IDX2C(i,j,ld) (((j)*(ld))+(i)) 31 | 32 | int debug = 0; 33 | 34 | /** 35 | * Computes the max-pooling for the given 2D map, and no, the name is not a typo. 36 | * All pointers are passed already offset so to avoid cumbersome indexing. 37 | * 38 | * @param ptr_data pointer set to the begin of this map 39 | * @param DATA_DIMS data dimensions 40 | * @param ptr_pool pooling sizes 41 | * @param ptr_out pointer to the output max-values set to the right position 42 | * @param ptr_idx pointer to the output indices set to the right position 43 | */ 44 | template 45 | inline void compute_map_pooling(T *ptr_data, const mwSize *DATA_DIMS, T *ptr_pool, 46 | T *ptr_out, T *ptr_idx, int tile_start) 47 | { 48 | T m; 49 | int idx; 50 | int count = 0; 51 | 52 | for (int col = 0; col < DATA_DIMS[1]; col += ptr_pool[1]) { 53 | for (int row = 0; row < DATA_DIMS[0]; row += ptr_pool[0]) { 54 | if (debug) 55 | fprintf(stderr, "r = %i, c = %i \n", row, col); 56 | 57 | m = -std::numeric_limits::max(); 58 | idx = -1; 59 | for (int pcol = 0; (pcol < ptr_pool[1] && col + pcol < DATA_DIMS[1]); ++pcol) { 60 | for (int prow = 0; (prow < ptr_pool[0] && row + prow < DATA_DIMS[0]); ++prow) { 61 | if (debug) { 62 | fprintf(stderr, "m = %f, data = %f \n", m, ptr_data[IDX2C(row + prow, col + pcol, DATA_DIMS[0])]); 63 | fprintf(stderr, "rr = %i, cc = %i \n --> idx = %i \n", row + prow, col + pcol, idx); 64 | } 65 | 66 | if (ptr_data[IDX2C(row + prow, col + pcol, DATA_DIMS[0])] > m) { 67 | idx = IDX2C(row + prow, col + pcol, DATA_DIMS[0]); 68 | m = ptr_data[idx]; 69 | } 70 | } 71 | } 72 | 73 | if (debug && idx == -1) { 74 | fprintf(stderr, "dioschifoso\n"); 75 | return; 76 | } 77 | 78 | if (debug) 79 | fprintf(stderr, "count = %i\n",count); 80 | 81 | /* idxs are to be used in Matlab and hence a +1 is needed */ 82 | ptr_idx[count] = idx + 1 + tile_start; 83 | ptr_out[count] = m; 84 | count++; 85 | } 86 | } 87 | } 88 | 89 | /** 90 | * This is the wrapper for the actual computation. 91 | * It is a template so that multiple types can be handled. 92 | */ 93 | template 94 | void mexMaxPooling(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[], mxClassID classID) 95 | { 96 | 97 | /***************************************************************************/ 98 | /** Variables */ 99 | /***************************************************************************/ 100 | mwSize IDX_DIMS[1]; 101 | mwSize DATA_DIMS[4]; 102 | mwSize M_DIMS[4]; 103 | const mwSize *POOL_DIMS; 104 | int DATA_NUMEL; 105 | int POOL_NUMEL; 106 | 107 | /** 108 | * Pointers to data 109 | */ 110 | T *ptr_data = NULL; 111 | T *ptr_pool = NULL; 112 | T *ptr_out = NULL; 113 | T *ptr_idx = NULL; 114 | 115 | /***************************************************************************/ 116 | /** Setting input pointers *************************************************/ 117 | /***************************************************************************/ 118 | ptr_data = (T *)mxGetData(prhs[0]); 119 | ptr_pool = (T *)mxGetData(prhs[1]); 120 | if (debug) 121 | fprintf(stderr,"Pooling size: h=%f, w=%f\n", ptr_pool[0], ptr_pool[1]); 122 | 123 | /***************************************************************************/ 124 | /** Setting parameters *****************************************************/ 125 | /***************************************************************************/ 126 | /* Data dimensions. As also a 2D tensor can be used I fill empty dimensions 127 | * with 1 */ 128 | const mwSize *tmp = mxGetDimensions(prhs[0]); 129 | DATA_DIMS[0] = tmp[0]; 130 | DATA_DIMS[1] = tmp[1]; 131 | if (mxGetNumberOfDimensions(prhs[0]) == 2) { 132 | DATA_DIMS[2] = 1; 133 | DATA_DIMS[3] = 1; 134 | } else if (mxGetNumberOfDimensions(prhs[0]) == 3) { 135 | DATA_DIMS[2] = tmp[2]; 136 | DATA_DIMS[3] = 1; 137 | } else { 138 | DATA_DIMS[2] = tmp[2]; 139 | DATA_DIMS[3] = tmp[3]; 140 | } 141 | 142 | DATA_NUMEL = DATA_DIMS[0] * DATA_DIMS[1] * DATA_DIMS[2] * DATA_DIMS[3]; 143 | if (debug) 144 | fprintf(stderr,"Data size: h=%d, w=%d, z=%d, n=%d (%d)\n", DATA_DIMS[0], DATA_DIMS[1], DATA_DIMS[2], DATA_DIMS[3], DATA_NUMEL); 145 | 146 | /* Output dimensions: the first output argument is of size equals to the input 147 | * whereas the second is of size equals to the number of pooled values. 148 | * Below there is ceil because also non complete tiles are considered when 149 | * input dims are not multiples of pooling dims. */ 150 | M_DIMS[0] = ceil(float(DATA_DIMS[0]) / float(ptr_pool[0])); 151 | M_DIMS[1] = ceil(float(DATA_DIMS[1]) / float(ptr_pool[1])); 152 | M_DIMS[2] = DATA_DIMS[2]; 153 | M_DIMS[3] = DATA_DIMS[3]; 154 | IDX_DIMS[0] = M_DIMS[0] * M_DIMS[1] * M_DIMS[2] * M_DIMS[3]; 155 | if (debug){ 156 | fprintf(stderr,"Each output image has (%d, %d) pooled values, " 157 | "IDXs size: h=%d \n", M_DIMS[0], M_DIMS[1], IDX_DIMS[0]); 158 | fprintf(stderr, "M size: h=%d, w=%d, z=%d, n=%d\n", M_DIMS[0], M_DIMS[1], M_DIMS[2], M_DIMS[3]); 159 | } 160 | 161 | /***************************************************************************/ 162 | /** Variables allocation ***************************************************/ 163 | /***************************************************************************/ 164 | /* OUTPUTS: max-values and corresponding indices */ 165 | plhs[0] = mxCreateNumericArray(4, M_DIMS, classID, mxREAL); 166 | ptr_out = (T *)mxGetData(plhs[0]); 167 | plhs[1] = mxCreateNumericArray(1, IDX_DIMS, classID, mxREAL); 168 | ptr_idx = (T *)mxGetData(plhs[1]); 169 | 170 | /***************************************************************************/ 171 | /** Compute max-pooling ****************************************************/ 172 | /***************************************************************************/ 173 | int tile_start = 0; 174 | int ptr_offset = 0; 175 | int M_sample_size = M_DIMS[0] * M_DIMS[1] * M_DIMS[2]; 176 | int D_sample_size = DATA_DIMS[0] * DATA_DIMS[1] * DATA_DIMS[2]; 177 | 178 | for (int n = 0; n < DATA_DIMS[3]; ++n) { 179 | #ifdef OPENMP 180 | #pragma omp parallel for 181 | #endif 182 | for (int k = 0; k < DATA_DIMS[2]; ++k) { 183 | tile_start = n * M_sample_size + k * M_DIMS[0] * M_DIMS[1]; 184 | ptr_offset = n * D_sample_size + k * DATA_DIMS[0] * DATA_DIMS[1]; 185 | 186 | compute_map_pooling (&ptr_data[ptr_offset], DATA_DIMS, ptr_pool, &ptr_out[tile_start], &ptr_idx[tile_start], ptr_offset); 187 | 188 | if (debug) 189 | fprintf(stderr, "tile_start: %i, ptr_offset: %i\n", tile_start, ptr_offset); 190 | } 191 | } 192 | } 193 | 194 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 195 | { 196 | /***************************************************************************/ 197 | /** Check input ************************************************************/ 198 | /***************************************************************************/ 199 | if (nrhs !=2) 200 | mexErrMsgTxt("Must have 2 input arguments: x, pooling_shape"); 201 | 202 | if (nlhs !=2) 203 | mexErrMsgTxt("Must have 2 output arguments ([max_value, idxs])"); 204 | 205 | if (mxIsComplex(prhs[0]) || !(mxIsClass(prhs[0],"single") || mxIsClass(prhs[0],"double"))) 206 | mexErrMsgTxt("Input data must be real, single/double type"); 207 | 208 | if (mxIsComplex(prhs[1]) || !(mxIsClass(prhs[1],"single") || mxIsClass(prhs[1],"double"))) 209 | mexErrMsgTxt("Pooling dimensions (rows, cols) must be real, single/double type"); 210 | 211 | if (mxGetNumberOfDimensions(prhs[0]) < 2) 212 | mexErrMsgTxt("Input data must have at least 2-dimensions (rows, cols, nchannels, nsamples) " 213 | "\nThe last two dimensions will be considered to be 1."); 214 | 215 | if (mxGetNumberOfDimensions(prhs[1]) != 2) 216 | mexErrMsgTxt("Pooling data must have 2-dimensions (prows, pcols)"); 217 | 218 | mxClassID classID = mxGetClassID(prhs[0]); 219 | 220 | /** This is mainly to avoid two typenames. Should not be a big usability issue. */ 221 | if (mxGetClassID(prhs[1]) != classID) 222 | mexErrMsgTxt("Input data and pooling need to be of the same type"); 223 | 224 | /***************************************************************************/ 225 | /** Switch for the supported data types */ 226 | /***************************************************************************/ 227 | if (classID == mxSINGLE_CLASS) { 228 | if (debug) 229 | fprintf(stderr, "Executing the single version\n"); 230 | 231 | mexMaxPooling(nlhs, plhs, nrhs, prhs, classID); 232 | } else if (classID == mxDOUBLE_CLASS) { 233 | if (debug) 234 | fprintf(stderr, "Executing the double version\n"); 235 | 236 | mexMaxPooling(nlhs, plhs, nrhs, prhs, classID); 237 | } 238 | } 239 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A simple convolutional neural network with linear neurons. 2 | 3 | Structure: input->convolution->max pooling->convolution->max pooling->fully connected. 4 | 5 | Compile the max pooling function in C++ first by 6 | ``` 7 | mex MaxPooling.cpp 8 | ``` 9 | 10 | The data can be downloaded in http://ai.stanford.edu/~amaas/data/data.zip 11 | 12 | The data loading functions can be downloaded in http://ufldl.stanford.edu/wiki/index.php/Using_the_MNIST_Dataset 13 | 14 | About 98% classification accuracy for 50 epochs of all training data. 15 | 16 | -------------------------------------------------------------------------------- /cnnTest.m: -------------------------------------------------------------------------------- 1 | testImages = loadMNISTImages('../common/t10k-images-idx3-ubyte'); 2 | testImages = reshape(testImages,imageSize,imageSize,[]); 3 | testLabels = loadMNISTLabels('../common/t10k-labels-idx1-ubyte'); 4 | testLabels(testLabels==0) = 10; % Remap 0 to 10 5 | testSize = length(testImages); 6 | preds = zeros(testSize,1); 7 | 8 | mbSize = size(testImages,3); 9 | % Feedforward 10 | 11 | o1 = zeros(convDim1,convDim1,filterNum1,mbSize); 12 | for i = 1:filterNum1 13 | o1(:,:,i,:) = convn(testImages,rot90(Wc1(:,:,i),2),'valid') + bc1(i); 14 | end 15 | o1Pooled = zeros(outputDim1,outputDim1,filterNum1,mbSize); 16 | o1PoolIdx = zeros(outputDim1^2,filterNum1,mbSize); 17 | for i = 1:mbSize 18 | for j = 1:filterNum1 19 | [o1Pooled(:,:,j,i) o1PoolIdx(:,j,i)] = MaxPooling(o1(:,:,j,i),[poolDim1 poolDim1]); 20 | end 21 | end 22 | o2 = zeros(convDim2,convDim2,filterNum2,mbSize); 23 | for i = 1:filterNum2 24 | for j = 1:filterNum1 25 | o2(:,:,i,:) = o2(:,:,i,:) + convn(o1Pooled(:,:,j,:),rot90(Wc2(:,:,j,i),2),'valid'); 26 | end 27 | o2(:,:,i,:) = o2(:,:,i,:) + bc2(i); 28 | end 29 | o2Pooled = zeros(outputDim2,outputDim2,filterNum2,mbSize); 30 | o2PoolIdx = zeros(outputDim2^2,filterNum2,mbSize); 31 | for i = 1:mbSize 32 | for j = 1:filterNum2 33 | [o2Pooled(:,:,j,i) o2PoolIdx(:,j,i)] = MaxPooling(o2(:,:,j,i),[poolDim2 poolDim2]); 34 | end 35 | end 36 | o2PooledVec = reshape(o2Pooled,[],mbSize); 37 | o3 = Wd*o2PooledVec + repmat(bd,[1,mbSize]); 38 | 39 | [~,preds] = max(o3); 40 | 41 | acc = sum(preds'==testLabels)/length(preds); 42 | fprintf('Accuracy is %f\n',acc); -------------------------------------------------------------------------------- /cnnTrain.m: -------------------------------------------------------------------------------- 1 | clear all 2 | 3 | % Hyper-parameters 4 | imageSize = 28; 5 | filterNum1 = 10; 6 | filterSize1 = 5; 7 | poolDim1 = 2; 8 | filterNum2 = 10; 9 | filterSize2 = 5; 10 | poolDim2 = 2; 11 | convDim1 = imageSize - filterSize1 + 1; 12 | outputDim1 = convDim1/poolDim1; 13 | convDim2 = outputDim1 - filterSize2 + 1; 14 | outputDim2 = convDim2/poolDim2; 15 | classNum = 10; 16 | sampleSize = 60000; 17 | mbSize = 50; % mini-batch sample size 18 | r = 0.05; % learning rate 19 | wdr = 0.00; % weight decay rate 20 | mom = 0.5; % momentum 21 | epochNum = 100; 22 | 23 | % Load data 24 | addpath('../common'); 25 | images = loadMNISTImages('../common/train-images-idx3-ubyte'); 26 | images = reshape(images,imageSize,imageSize,[]); 27 | labels = loadMNISTLabels('../common/train-labels-idx1-ubyte'); 28 | labels(labels==0) = 10; 29 | labelMat = full(sparse(labels,1:length(labels),1)); 30 | 31 | % Initialize parameters 32 | Wc1 = 0.1*(randn(filterSize1,filterSize1,filterNum1)); 33 | bc1 = zeros(filterNum1,1); 34 | Wc2 = 0.1*(randn(filterSize2,filterSize2,filterNum1,filterNum2)); 35 | bc2 = zeros(filterNum2,1); 36 | Wd = 0.01*(rand(classNum,filterNum2*outputDim2^2) - 0.5); 37 | bd = zeros(classNum,1); 38 | 39 | % Velocity of parameters 40 | vel_Wc1 = zeros(size(Wc1)); 41 | vel_bc1 = zeros(size(bc1)); 42 | vel_Wc2 = zeros(size(Wc2)); 43 | vel_bc2 = zeros(size(bc2)); 44 | vel_Wd = zeros(size(Wd)); 45 | vel_bd = zeros(size(bd)); 46 | 47 | %% Training 48 | for e = 1:epochNum 49 | err = 0; 50 | rp = randperm(sampleSize); 51 | for s = 1:mbSize:(sampleSize-mbSize+1) 52 | mbImages = images(:,:,rp(s:s+mbSize-1)); 53 | 54 | % Feedforward 55 | o1 = zeros(convDim1,convDim1,filterNum1,mbSize); 56 | for i = 1:filterNum1 57 | o1(:,:,i,:) = convn(mbImages,rot90(Wc1(:,:,i),2),'valid') + bc1(i); 58 | end 59 | o1Pooled = zeros(outputDim1,outputDim1,filterNum1,mbSize); 60 | o1PoolIdx = zeros(outputDim1^2,filterNum1,mbSize); 61 | for i = 1:mbSize 62 | for j = 1:filterNum1 63 | [o1Pooled(:,:,j,i) o1PoolIdx(:,j,i)] = MaxPooling(o1(:,:,j,i),[poolDim1 poolDim1]); 64 | end 65 | end 66 | o2 = zeros(convDim2,convDim2,filterNum2,mbSize); 67 | for i = 1:filterNum2 68 | for j = 1:filterNum1 69 | o2(:,:,i,:) = o2(:,:,i,:) + convn(o1Pooled(:,:,j,:),rot90(Wc2(:,:,j,i),2),'valid'); 70 | end 71 | o2(:,:,i,:) = o2(:,:,i,:) + bc2(i); 72 | end 73 | o2Pooled = zeros(outputDim2,outputDim2,filterNum2,mbSize); 74 | o2PoolIdx = zeros(outputDim2^2,filterNum2,mbSize); 75 | for i = 1:mbSize 76 | for j = 1:filterNum2 77 | [o2Pooled(:,:,j,i) o2PoolIdx(:,j,i)] = MaxPooling(o2(:,:,j,i),[poolDim2 poolDim2]); 78 | end 79 | end 80 | o2PooledVec = reshape(o2Pooled,[],mbSize); 81 | o3 = Wd*o2PooledVec + repmat(bd,[1,mbSize]); 82 | 83 | % Back Propagation 84 | y = labelMat(:,rp(s:s+mbSize-1)); 85 | delta_d = o3 - y; 86 | delta_s2 = Wd' * delta_d; 87 | delta_s2 = reshape(delta_s2,outputDim2,outputDim2,filterNum2,mbSize); 88 | delta_c2 = zeros(convDim2,convDim2,filterNum2,mbSize); 89 | for i = 1:mbSize 90 | for j = 1:filterNum2 91 | delta_c2(:,:,j,i) = upsampleMax(delta_s2(:,:,j,i),o2PoolIdx(:,j,i),poolDim2); 92 | end 93 | end 94 | delta_s1 = zeros(outputDim1,outputDim1,filterNum1,mbSize); 95 | for i = 1:filterNum1 96 | for j = 1:filterNum2 97 | delta_s1(:,:,i,:) = delta_s1(:,:,i,:) + convn(delta_c2(:,:,j,:),Wc2(:,:,i,j),'full'); 98 | end 99 | end 100 | delta_c1 = zeros(convDim1,convDim1,filterNum1,mbSize); 101 | for i = 1:mbSize 102 | for j = 1:filterNum1 103 | delta_c1(:,:,j,i) = upsampleMax(delta_s1(:,:,j,i),o1PoolIdx(:,j,i),poolDim1); 104 | end 105 | end 106 | 107 | grad_Wd = (1/mbSize)*delta_d*o2PooledVec'; 108 | grad_bd = zeros(size(bd)); 109 | for i = 1:classNum 110 | grad_bd(i) = (1/mbSize)*sum(delta_d(i,:)); 111 | end 112 | grad_Wc2 = zeros(size(Wc2)); 113 | grad_bc2 = zeros(size(bc2)); 114 | for i = 1:filterNum2 115 | for j = 1:filterNum1 116 | for k = 1:mbSize 117 | grad_Wc2(:,:,j,i) = grad_Wc2(:,:,j,i) + conv2(o1Pooled(:,:,j,k),rot90(delta_c2(:,:,i,k),2),'valid'); 118 | end 119 | grad_Wc2(:,:,j,i) = (1/mbSize)*grad_Wc2(:,:,j,i); 120 | end 121 | tmp_grad_bc2 = delta_c2(:,:,i,:); 122 | grad_bc2(i) = (1/mbSize)*sum(tmp_grad_bc2(:)); 123 | end 124 | grad_Wc1 = zeros(size(Wc1)); 125 | grad_bc1 = zeros(size(bc1)); 126 | for i = 1:filterNum1 127 | for j = 1:mbSize 128 | grad_Wc1(:,:,i) = grad_Wc1(:,:,i) + conv2(mbImages(:,:,j),rot90(delta_c1(:,:,i,j),2),'valid'); 129 | end 130 | grad_Wc1(:,:,i) = (1/mbSize)*grad_Wc1(:,:,i); 131 | tmp_grad_bc1 = delta_c1(:,:,i,:); 132 | grad_bc1(i) = (1/mbSize)*sum(tmp_grad_bc1(:)); 133 | end 134 | 135 | vel_Wd = mom*vel_Wd - r*grad_Wd - wdr*Wd; 136 | vel_bd = mom*vel_bd - r*grad_bd - wdr*bd; 137 | vel_Wc2 = mom*vel_Wc2 - r*grad_Wc2 - wdr*Wc2; 138 | vel_bc2 = mom*vel_bc2 - r*grad_bc2 - wdr*bc2; 139 | vel_Wc1 = mom*vel_Wc1 - r*grad_Wc1 - wdr*Wc1; 140 | vel_bc1 = mom*vel_bc1 - r*grad_bc1 - wdr*bc1; 141 | 142 | Wd = Wd + vel_Wd; 143 | bd = bd + vel_bd; 144 | Wc2 = Wc2 + vel_Wc2; 145 | bc2 = bc2 + vel_bc2; 146 | Wc1 = Wc1 + vel_Wc1; 147 | bc1 = bc1 + vel_bc1; 148 | 149 | mbErr = mean(delta_d(:).^2); 150 | err = err + mbErr; 151 | end 152 | fprintf('Epoch %d Training error: %f\n',e,err); 153 | r = 0.98*r; 154 | end 155 | 156 | cnnTest; 157 | -------------------------------------------------------------------------------- /upsampleMax.m: -------------------------------------------------------------------------------- 1 | function up = upsampleMax(convolvedFeatures,poolIdx,poolDim) 2 | up = zeros(size(convolvedFeatures)*poolDim); 3 | up(poolIdx) = convolvedFeatures(:); 4 | --------------------------------------------------------------------------------