├── .gitignore
├── CMakeLists.txt
├── DONT_README.md
├── README.md
├── best-results
    └── results_epoch_8.csv
├── layers
    ├── convolution_layer.hpp
    ├── cross_entropy_loss_layer.hpp
    ├── dense_layer.hpp
    ├── max_pooling_layer.hpp
    ├── relu_layer.hpp
    └── softmax_layer.hpp
├── le_net.cpp
├── tests
    ├── convolution_layer_test.cpp
    ├── cross_entropy_loss_layer_test.cpp
    ├── dense_layer_test.cpp
    ├── integration_test.cpp
    ├── max_pooling_layer_test.cpp
    ├── mnist_test.cpp
    ├── relu_layer_test.cpp
    └── softmax_layer_test.cpp
└── utils
    └── mnist.hpp


/.gitignore:
--------------------------------------------------------------------------------
1 | *out
2 | build
3 | data
4 | data_small
5 | data_medium
6 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.5)
 2 | 
 3 | project(cpp-cnn)
 4 | 
 5 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 6 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 7 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 8 | 
 9 | find_package(Boost 1.40 COMPONENTS unit_test_framework REQUIRED)
10 | find_package(Armadillo 6.5 REQUIRED)
11 | 
12 | include_directories(${Boost_INCLUDE_DIR})
13 | include_directories(${ARMADILLO_INCLUDE_DIRS})
14 | 
15 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
16 | 
17 | add_executable(
18 |   convolution_layer_test
19 |   tests/convolution_layer_test.cpp
20 |   )
21 | 
22 | target_link_libraries(convolution_layer_test LINK_PUBLIC ${Boost_LIBRARIES})
23 | target_link_libraries(convolution_layer_test LINK_PUBLIC ${ARMADILLO_LIBRARIES})
24 | 
25 | add_executable(
26 |   dense_layer_test
27 |   tests/dense_layer_test.cpp
28 |   )
29 | 
30 | target_link_libraries(dense_layer_test LINK_PUBLIC ${Boost_LIBRARIES})
31 | target_link_libraries(dense_layer_test LINK_PUBLIC ${ARMADILLO_LIBRARIES})
32 | 
33 | add_executable(
34 |   max_pooling_layer_test
35 |   tests/max_pooling_layer_test.cpp
36 |   )
37 | 
38 | target_link_libraries(max_pooling_layer_test LINK_PUBLIC ${Boost_LIBRARIES})
39 | target_link_libraries(max_pooling_layer_test LINK_PUBLIC ${ARMADILLO_LIBRARIES})
40 | 
41 | add_executable(
42 |   softmax_layer_test
43 |   tests/softmax_layer_test.cpp
44 |   )
45 | 
46 | target_link_libraries(softmax_layer_test LINK_PUBLIC ${Boost_LIBRARIES})
47 | target_link_libraries(softmax_layer_test LINK_PUBLIC ${ARMADILLO_LIBRARIES})
48 | 
49 | add_executable(
50 |   relu_layer_test
51 |   tests/relu_layer_test.cpp
52 |   )
53 | 
54 | target_link_libraries(relu_layer_test LINK_PUBLIC ${Boost_LIBRARIES})
55 | target_link_libraries(relu_layer_test LINK_PUBLIC ${ARMADILLO_LIBRARIES})
56 | 
57 | add_executable(
58 |   cross_entropy_loss_layer_test
59 |   tests/cross_entropy_loss_layer_test.cpp
60 |   )
61 | 
62 | target_link_libraries(cross_entropy_loss_layer_test LINK_PUBLIC ${Boost_LIBRARIES})
63 | target_link_libraries(cross_entropy_loss_layer_test LINK_PUBLIC ${ARMADILLO_LIBRARIES})
64 | 
65 | add_executable(
66 |   integration_test
67 |   tests/integration_test.cpp
68 |   )
69 | 
70 | target_link_libraries(integration_test LINK_PUBLIC ${Boost_LIBRARIES})
71 | target_link_libraries(integration_test LINK_PUBLIC ${ARMADILLO_LIBRARIES})
72 | 
73 | add_executable(
74 |   mnist_util_test
75 |   tests/mnist_test.cpp
76 |   )
77 | 
78 | 
79 | target_link_libraries(mnist_util_test LINK_PUBLIC ${Boost_LIBRARIES})
80 | target_link_libraries(mnist_util_test LINK_PUBLIC ${ARMADILLO_LIBRARIES})
81 | 
82 | add_executable(
83 |   le_net
84 |   le_net.cpp
85 |   )
86 | 
87 | 
88 | target_link_libraries(le_net LINK_PUBLIC ${Boost_LIBRARIES})
89 | target_link_libraries(le_net LINK_PUBLIC ${ARMADILLO_LIBRARIES})
90 | 
91 | file(COPY ${CMAKE_SOURCE_DIR}/data DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
92 | # file(COPY ${CMAKE_SOURCE_DIR}/data_medium DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
93 | # file(COPY ${CMAKE_SOURCE_DIR}/data_small DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
94 | 
95 | 


--------------------------------------------------------------------------------
/DONT_README.md:
--------------------------------------------------------------------------------
  1 | I'm at work and there's is nothing to do. So I'm going to write a CNN.. in C++ .. from scratch..
  2 | 
  3 | .. becuase that's what people do when they're bored out of their minds
  4 | 
  5 | So, my initial thoughts are:
  6 | 
  7 | 1. I need some linear algebra library to make the vector stuff fast. I've worked with Eigen in the past, and mlpack uses Armadillo so I'm familiar with that too. I don't want to go through the hassle of installing Eigen (which may not be too much of a hassle, but still..) so I'm going to use Armadillo.
  8 | 
  9 | 2. I'll need a class for the Convolution Layer, a class for the Pooling layer and a class for the dense layer at the end
 10 | 
 11 | 3. Like all good boys who write CNNs, I think it would be best to test this on the MNIST dataset -- so we'll need code to parse that too. Since the data is stored in binary form, this might be unpleasant.. Oh well, I'll handle it when I get to it.
 12 | 
 13 | Let's begin right in the middle of everything - with the Convolution Layer.
 14 | 
 15 | So we've hit the ground running with the ConvolutionalLayer constructor -- It initializes stuff like dimensions of input volume, number of filters, stride and filter dimensions. It also initializes the filter weights. I've decided to use a truncated normal initialization (Ie. random values sampled from a Gaussian distribution having mean 0 and variance 1. Values more than two standard deviations away from the mean are rejected).
 16 | 
 17 | Cool cool .. the weight initializations look correct. Time for a git commit and then we move on to the forward pass through the conv layer.
 18 | 
 19 | I'm going to go for the simplest kind of convolution implementation there is -- no padding, no FFT.
 20 | 
 21 | I've realized that filters (in general) have a depth dimension as well, and I've defined filters as 2D matrices.. Need to fix that.
 22 | 
 23 | Done fixing the filter dimensions. Commit and move on to the forward pass.
 24 | 
 25 | Update: I went for a snack break. My boss told me to do a few things -- which I mostly ignored, because he's an idiot. And I just finished the forward pass implementation. It feels weird working on this in the office, so I'm going to head home and continue from there. I plan to test the forward pass implementation first and then try and figure out how the backward pass is going to go.
 26 | 
 27 | A few more notes on the rest of the implementation:
 28 | 1. The optimizer -- for now, I'll just use vanilla mini-batch SGD  to train. Maybe later I'll switch it up to Adam or RMSprop.
 29 | 
 30 | 2. I realize that I'll need to add layers for ReLU activation in the hidden units, and a softmax layer at the very end of the network.
 31 | 
 32 | 3. I'll probably add a class like "class LeNet" that contains the entire CNN architecture. I dont really plan on reusing any of the layers, so its fine if they're a bit dirty.
 33 | 
 34 | 3 minutes till my cab arrives .. better head down.
 35 | 
 36 | Okay, I didn't really do much at home yesterday, and today I was a bit busy doing pointless things at work. I found some time now to work on this, and have completed the backward pass. Actually, I'd been thinking about the math of the backward pass through the conv layer today.. and I was quite surprised at how easily it worked out to nice expressions. Hopefully I'll get some time to write a blog post about it -- I think I actually found a nice method to it as well. Anyway, I'd scribbled down
 37 | most of it in my little notebook at work and the implementation wasn't too hard.
 38 | 
 39 | I've still got the forward pass testing in the backlog. Now, I can add the backward pass testing as well. I've added a function stub for gradient check as well in the CovolutionLayer class. I think I'll make 100% sure that theres nothing wring with my Conv layer before proceeding with the other components (dense layer, max pooling, relu and sigmoid). Hopefully tomorrow I'll get time for testing and then finish the CNN over the weekend.
 40 | 
 41 | Oh yeah, a minor note -- I'd not differentiated between the strides in the vertical and horizontal directions. Updated this.
 42 | 
 43 | I've been giving a bit of thought to writing proper tests -- test driven development and all that. I've decided to go with the boost.Test framework (which also happens to be used by mlpack).
 44 | 
 45 | It WORKS!!! Both the forward and backward pass seem to be working fine on basic tests. I've even written a gradient check and both the analytic and numeric gradients agree. I didn't expect things to go so smoothly, I was completely prepared to shed tears -- but hey, looks like I'm smart after all.
 46 | 
 47 | I've added a larger test for the backward pass - and used different prime values for input dimensions and filter dimensions. This was a good test to add.. it pointed out a bug in the way gradients were being propagated when the stride was > 1.
 48 | 
 49 | With this, the convolution layer looks pretty much done to me - atleast for now. I can proceed with the dense layer now. I think I'll refactor the project into more files.
 50 | 
 51 | Oh wait, there need to be more tests for checking if the gradients are being accumulated and if the batch update is happening correctly. I'll add those after some part of the dense layer implementation.
 52 | 
 53 | Yeah.. I went to sleep after I got the conv layer working. Today is saturday, so I've been at this for a while -- and here's the update. I've completed the dense layer implementation and written tests for all of it. It works really nicely. I've also been giving some thought to how I'm going to parse the MNIST data -- I think I'll just use mlpack's data::Load functionality to load the data into armadillo matrices and then let my CNN model take it from there. I really dont want to be writing
 54 | code to parse a binary file into an armadillo matrix by myself. Also, I think in the gradient check I did for the conv layer backward pass, I checked only the gradient wrt input -- and completely forgot about the gradient wrt filters. I'm going to add that now.
 55 | 
 56 | Next on the TODO list would be the implementation of softmax layer, max pooling layer and relu layer (in that order).
 57 | 
 58 | Added gradient wrt filters check in the conv layer backward pass test. As expected, gradients are correct. I've also remembered that I need to add the update weights function in the dense layer, and write tests to check the updates.
 59 | 
 60 | Added softmax layer and tests for forward and backward passes.
 61 | 
 62 | Okay, so yesterday was SUnday and I didn't do shit. I was at work all day today and after that I was waiting for the Google Summer of Code results to be announced (too excited to get any work done). Anyway, the results were a big let down - not that I was expecting a selection.. still, rejection hurts. I'm back at it with the MaxPooling layer implementation. The backward pass proved to be trickier than expected but I think I've got it right -- I'm going to write the tests for it now, and then
 63 | we'll know for sure.
 64 | 
 65 | Yep. It works. I guess only RelU remains now.
 66 | 
 67 | ReLU Done! All the components are done. I think I'll write some more tests to try out small networks before I create LeNet.
 68 | 
 69 | Actually, one component is still remains -- the loss layer. Imma have to implement cross entropy loss first
 70 | 
 71 | Implemented cross entropy loss along with tests. Now we can move to the integration tests.
 72 | 
 73 | I went to sleep last night.. I've decided to skip office today because I'm still a little bummed at the GSoC rejection. Anyway, the silver lining is that all my components seem to be working - I just wrote a simple network and ran it on two training examples for 10 epochs and the loss decreases beautifully:
 74 | ```
 75 | [DEBUG INTEGRATION TEST ]       Epoch #0        Cross Entropy Loss: 0.482423
 76 | [DEBUG INTEGRATION TEST ]       Epoch #1        Cross Entropy Loss: 0.121352
 77 | [DEBUG INTEGRATION TEST ]       Epoch #2        Cross Entropy Loss: 0.0802145
 78 | [DEBUG INTEGRATION TEST ]       Epoch #3        Cross Entropy Loss: 0.0604326
 79 | [DEBUG INTEGRATION TEST ]       Epoch #4        Cross Entropy Loss: 0.0488966
 80 | [DEBUG INTEGRATION TEST ]       Epoch #5        Cross Entropy Loss: 0.0410873
 81 | [DEBUG INTEGRATION TEST ]       Epoch #6        Cross Entropy Loss: 0.0354383
 82 | [DEBUG INTEGRATION TEST ]       Epoch #7        Cross Entropy Loss: 0.0311572
 83 | [DEBUG INTEGRATION TEST ]       Epoch #8        Cross Entropy Loss: 0.0277985
 84 | [DEBUG INTEGRATION TEST ]       Epoch #9        Cross Entropy Loss: 0.0250919
 85 | ```
 86 | 
 87 | I think I should document the code and make style fixes before I proceed with anything else.
 88 | 
 89 | Okay, I've refactored the code and made a bunch of style fixes and added comments (sparingly). Also, I've added a cmake configuration to easily build everything and make things cross-platform. Now for the part that I've been putting off from the very beginning: parsing the binary MNIST data.
 90 | 
 91 | I've just realized that the digit recognizer challenge on Kaggle has csv datasets for digit recognition. Those might be easier to parse.
 92 | 
 93 | Done with the data parsing module. Now for the big sausage - LeNet. Oh yeah, minor note -- anyone attempting to run this code will have to download the Kaggle dataset into a `data/` directory.
 94 | 
 95 | Okay, so I've assembled the Le Net - but there seems to be a very strange issue.. The training loss decreses over epochs, so does the validation loss - all good, right? Wrong! The training and validation accuracies are also decresing over epochs! FTW!!! Go home CNN, you're drunk! What is strange is that I can't seem to get the model to overfit on a smaller sub-dataset either. I think its time to write another integration test.
 96 | 
 97 | I might've made some headway into the issue - it looks like the input to the loss layer is very very close to a one-hot vector which is causing infinities and negative infinities to appear. Need to find some way to make this numerically stable. Okay, a little googling around has shown that if we combine the softmax and cross entropy layers then the backward gradient becomes numerically stable. So, we will do that now.
 98 | 
 99 | It's not working at all. Need to start fresh.
100 | 
101 | Okay, so I've written a few more integration tests and here is what I've found:
102 | - The backward pass through the dense layer was slightly incorrect. I'd forgotten to incorporate the upstreamGradient into the gradients wrt weights.
103 | - The dense layer was also missing biases. I've added these now.
104 | With these changes, I can train simple networks for:
105 | - learning the AND decision boundary
106 | - learning a single MNIST image
107 | - On a sample of 3000 MNIST images, a simple convnet (conv-relu-maxpool-dense-softmax-cross_entropy schema) can be trained with SGD to obtain the following results:
108 | ```
109 | [DEBUG INTEGRATION TEST ]	Size of training set: 2700
110 | [DEBUG INTEGRATION TEST ]	Size of validation set: 300
111 | [DEBUG INTEGRATION TEST ]
112 | [DEBUG INTEGRATION TEST ]	Average loss: 2.22893
113 | [DEBUG INTEGRATION TEST ]	Validation Accuracy: 0.406667
114 | [DEBUG INTEGRATION TEST ]
115 | [DEBUG INTEGRATION TEST ]	Average loss: 1.33203
116 | [DEBUG INTEGRATION TEST ]	Validation Accuracy: 0.676667
117 | [DEBUG INTEGRATION TEST ]
118 | [DEBUG INTEGRATION TEST ]	Average loss: 0.841367
119 | [DEBUG INTEGRATION TEST ]	Validation Accuracy: 0.753333
120 | [DEBUG INTEGRATION TEST ]
121 | [DEBUG INTEGRATION TEST ]	Average loss: 0.584995
122 | [DEBUG INTEGRATION TEST ]	Validation Accuracy: 0.79
123 | [DEBUG INTEGRATION TEST ]
124 | [DEBUG INTEGRATION TEST ]	Average loss: 0.44068
125 | [DEBUG INTEGRATION TEST ]	Validation Accuracy: 0.813333
126 | [DEBUG INTEGRATION TEST ]
127 | [DEBUG INTEGRATION TEST ]	Average loss: 0.360519
128 | [DEBUG INTEGRATION TEST ]	Validation Accuracy: 0.81
129 | [DEBUG INTEGRATION TEST ]
130 | [DEBUG INTEGRATION TEST ]	Average loss: 0.294253
131 | [DEBUG INTEGRATION TEST ]	Validation Accuracy: 0.84
132 | [DEBUG INTEGRATION TEST ]
133 | [DEBUG INTEGRATION TEST ]	Average loss: 0.265645
134 | [DEBUG INTEGRATION TEST ]	Validation Accuracy: 0.83
135 | [DEBUG INTEGRATION TEST ]
136 | [DEBUG INTEGRATION TEST ]	Average loss: 0.220504
137 | [DEBUG INTEGRATION TEST ]	Validation Accuracy: 0.863333
138 | [DEBUG INTEGRATION TEST ]
139 | [DEBUG INTEGRATION TEST ]	Average loss: 0.164675
140 | [DEBUG INTEGRATION TEST ]	Validation Accuracy: 0.863333
141 | ```
142 | 
143 | which is reassuring ... I think we should be good to go on LeNet now.
144 | 
145 | Fuck yeah .. LeNet is working on the medium sozed dataset:
146 | ```
147 | [DEBUG LE NET ] Training data size: 2700
148 | [DEBUG LE NET ] Validation data size: 300
149 | [DEBUG LE NET ] Test data size: 10
150 | [DEBUG LE NET ]
151 | [DEBUG LE NET ] Loss after epoch #0: 0.578797
152 | [DEBUG LE NET ] Val accuracy: 0.886667
153 | [DEBUG LE NET ]
154 | [DEBUG LE NET ] Loss after epoch #1: 0.181674
155 | [DEBUG LE NET ] Val accuracy: 0.936667
156 | [DEBUG LE NET ]
157 | [DEBUG LE NET ] Loss after epoch #2: 0.155978
158 | [DEBUG LE NET ] Val accuracy: 0.913333
159 | [DEBUG LE NET ]
160 | [DEBUG LE NET ] Loss after epoch #3: 0.0978818
161 | [DEBUG LE NET ] Val accuracy: 0.956667
162 | [DEBUG LE NET ]
163 | [DEBUG LE NET ] Loss after epoch #4: 0.0800541
164 | [DEBUG LE NET ] Val accuracy: 0.953333
165 | [DEBUG LE NET ]
166 | [DEBUG LE NET ] Loss after epoch #5: 0.0567186
167 | [DEBUG LE NET ] Val accuracy: 0.936667
168 | [DEBUG LE NET ]
169 | [DEBUG LE NET ] Loss after epoch #6: 0.0514032
170 | [DEBUG LE NET ] Val accuracy: 0.916667
171 | [DEBUG LE NET ]
172 | [DEBUG LE NET ] Loss after epoch #7: 0.0396252
173 | [DEBUG LE NET ] Val accuracy: 0.926667
174 | [DEBUG LE NET ]
175 | [DEBUG LE NET ] Loss after epoch #8: 0.0444968
176 | [DEBUG LE NET ] Val accuracy: 0.933333
177 | [DEBUG LE NET ]
178 | [DEBUG LE NET ] Loss after epoch #9: 0.0350243
179 | [DEBUG LE NET ] Val accuracy: 0.93
180 | ```
181 | For future reference -- these results were obtained with the following hyperparameter settings:
182 | - Learning rate: 0.05
183 | - Epochs: 10
184 | - Batch Size: 10
185 | - Train data: data_medium
186 | Oh, and there's one more thing.. I noticed earlier that the output of the dense layer is quite high -- of the order of 1e2. Clearly too high for the softmax to give meaningful outputs. So, I've scaled the input to the softmax by 1e2. This is hacky, and I should probably figure out a cleaner way to do this. Maybe normalize the input differently?.. I think I might try to make the input have zero mean and unit variance and try - but for now I think the scaling is fine.
187 | 
188 | Now for the mother lode... the complete Kaggle dataset
189 | 
190 | OMFG!!!! IT WOOORRKKSS!!!!
191 | It's completed 5 epochs:
192 | ```
193 | [DEBUG LE NET ] Training data size: 37800
194 | [DEBUG LE NET ] Validation data size: 4200
195 | [DEBUG LE NET ] Test data size: 28000
196 | [DEBUG LE NET ]
197 | [DEBUG LE NET ] Loss after epoch #0: 0.189032
198 | [DEBUG LE NET ] Val accuracy: 0.960952
199 | [DEBUG LE NET ]
200 | [DEBUG LE NET ] Loss after epoch #1: 0.102551
201 | [DEBUG LE NET ] Val accuracy: 0.966905
202 | [DEBUG LE NET ]
203 | [DEBUG LE NET ] Loss after epoch #2: 0.0846397
204 | [DEBUG LE NET ] Val accuracy: 0.971905
205 | [DEBUG LE NET ]
206 | [DEBUG LE NET ] Loss after epoch #3: 0.0762915
207 | [DEBUG LE NET ] Val accuracy: 0.97119
208 | [DEBUG LE NET ]
209 | [DEBUG LE NET ] Loss after epoch #4: 0.0741992
210 | [DEBUG LE NET ] Val accuracy: 0.975714
211 | ```
212 | I think that this is not bad at all for a handwritten CNN. It takes a long time to run (~20 minutes per epoch on my shitty machine), but then again - handwritten. I'm going to stop the execution now and save the results and make the debug output prettier. For future reference: I did not change the hyperparameters from the previous run -- only the dataset was expanded to the original size.
213 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## CPP-CNN
 2 | 
 3 | A C++ implementation of the popular LeNet convolutional neural network architecture. Currently it trains on the Kaggle Digit Recognizer challenge data and gives 0.973 accuracy on the leaderboard. At the time of writing this, I got a rank of 1414 using this model. The results csv file can be found in the `best-results/` directory.
 4 | 
 5 | I think that this is probably more for my own benefit than anyone else - but I've still tried to make to code as readable as possible in case someone else finds this and wants to play around with it.
 6 | 
 7 | ### Prerequisites for building and running the model
 8 | 
 9 | You'll probably need
10 | - g++ >= 5.0.0
11 | - CMake >= 3.0.0
12 | - make >= 4.0
13 | - Armadillo >= 8.300.4
14 | - Boost unit test framework (Boost version >= 1.58)
15 | 
16 | to run everything in this repo. I've only tried to run this on a Linux system (Ubuntu 16.04) -- but I dont see any obvious reason why it shouldn't work on other platforms as long as you have the dependencies installed.
17 | 
18 | You will also need the Kaggle Digit recognizer dataset - which can be downloaded from [here](https://www.kaggle.com/c/digit-recognizer/data)
19 | 
20 | ### Building and Running the LeNet on the Digit Recognizer dataset
21 | 
22 | 1. Clone this repository. `git clone https://github.com/plantsandbuildings/cpp-cnn`
23 | 2. `cd` into the project root (`cd cpp-cnn`) and create the build and data directories using `mkdir build data`.
24 | 3. Copy the Kaggle Digit Recognizer dataset into the `data` directory. The `data` directory should now contain two CSV files -- `train.csv` and `test.csv`.
25 | 4. `cd` into the build directory (`cd build`) and configure the build using `cmake ../` This will generate a `Makefile` to build the project.
26 | 5. Run `make` to build the project. Binaries are written to `build/bin`.
27 | 6. Train the model on the Kaggle data using `bin/le_net`.
28 | 
29 | The program will write the test predictions after each epoch of training into CSV files - `build/results_epoch_1.csv`, `build/results_epoch_2.csv` etc. These files can directly be uploaded to the [submission page](https://www.kaggle.com/c/digit-recognizer/submit) on Kaggle to view the scores.
30 | 


--------------------------------------------------------------------------------
/layers/convolution_layer.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef CONV_LAYER_HPP
  2 | #define CONV_LAYER_HPP
  3 | 
  4 | #include <armadillo>
  5 | #include <vector>
  6 | #include <iostream>
  7 | #include <cmath>
  8 | #include <cassert>
  9 | 
 10 | #define DEBUG false
 11 | #define DEBUG_PREFIX "[DEBUG CONV LAYER ]\t"
 12 | 
 13 | class ConvolutionLayer
 14 | {
 15 |  public:
 16 |   ConvolutionLayer(
 17 |       size_t inputHeight,
 18 |       size_t inputWidth,
 19 |       size_t inputDepth,
 20 |       size_t filterHeight,
 21 |       size_t filterWidth,
 22 |       size_t horizontalStride,
 23 |       size_t verticalStride,
 24 |       size_t numFilters) :
 25 |     inputHeight(inputHeight),
 26 |     inputWidth(inputWidth),
 27 |     inputDepth(inputDepth),
 28 |     filterHeight(filterHeight),
 29 |     filterWidth(filterWidth),
 30 |     horizontalStride(horizontalStride),
 31 |     verticalStride(verticalStride),
 32 |     numFilters(numFilters)
 33 |   {
 34 |     // Initialize the filters.
 35 |     filters.resize(numFilters);
 36 |     for (size_t i=0; i<numFilters; i++)
 37 |     {
 38 |       filters[i] = arma::zeros(filterHeight, filterWidth, inputDepth);
 39 |       filters[i].imbue( [&]() { return _getTruncNormalVal(0.0, 1.0); } );
 40 |     }
 41 | 
 42 |     _resetAccumulatedGradients();
 43 | 
 44 | #if DEBUG
 45 |     std::cout
 46 |         << DEBUG_PREFIX << "---------------------------------------------"
 47 |         << std::endl
 48 |         << DEBUG_PREFIX << "CONSTRUCTOR DEBUG OUTPUT"
 49 |         << std::endl
 50 |         << DEBUG_PREFIX << "---------------------------------------------"
 51 |         << std::endl;
 52 |     for (size_t i=0; i<numFilters; i++)
 53 |     {
 54 |       std::cout << DEBUG_PREFIX << "Filter #" << i << std::endl;
 55 |       std::cout << DEBUG_PREFIX << arma::size(filters[i]) << std::endl;
 56 |       for (size_t sidx=0; sidx<inputDepth; sidx++)
 57 |       {
 58 |         std::cout << DEBUG_PREFIX << "  Slice # " << sidx << std::endl;
 59 |         for (size_t ridx=0; ridx<filterHeight; ridx++)
 60 |           std::cout << DEBUG_PREFIX << filters[i].slice(sidx).row(ridx);
 61 |       }
 62 |     }
 63 | #endif
 64 |   }
 65 | 
 66 |   void Forward(arma::cube& input, arma::cube& output)
 67 |   {
 68 |     // The filter dimensions and strides must satisfy some contraints for
 69 |     // the convolution operation to be well defined.
 70 |     assert((inputHeight - filterHeight)%verticalStride == 0);
 71 |     assert((inputWidth - filterWidth)%horizontalStride == 0);
 72 | 
 73 |     // Output initialization.
 74 |     output = arma::zeros((inputHeight - filterHeight)/verticalStride + 1,
 75 |                          (inputWidth - filterWidth)/horizontalStride + 1,
 76 |                          numFilters);
 77 | 
 78 |     // Perform convolution for each filter.
 79 |     for (size_t fidx = 0; fidx < numFilters; fidx++)
 80 |     {
 81 |       for (size_t i=0; i <= inputHeight - filterHeight; i += verticalStride)
 82 |         for (size_t j=0; j <= inputWidth - filterWidth; j += horizontalStride)
 83 |           output((i/verticalStride), (j/horizontalStride), fidx) = arma::dot(
 84 |               arma::vectorise(
 85 |                   input.subcube(i, j, 0,
 86 |                                 i+filterHeight-1, j+filterWidth-1, inputDepth-1)
 87 |                 ),
 88 |               arma::vectorise(filters[fidx]));
 89 |     }
 90 | 
 91 |     // Store the input and output. This will be needed by the backward pass.
 92 |     this->input = input;
 93 |     this->output = output;
 94 | 
 95 | #if DEBUG
 96 |     std::cout
 97 |         << DEBUG_PREFIX << "---------------------------------------------"
 98 |         << std::endl
 99 |         << DEBUG_PREFIX << "FORWARD PASS DEBUG OUTPUT"
100 |         << std::endl
101 |         << DEBUG_PREFIX << "---------------------------------------------"
102 |         << std::endl;
103 | 
104 |     // Print input.
105 |     std::cout << DEBUG_PREFIX << std::endl;
106 |     std::cout << DEBUG_PREFIX << "Input to conv layer:" << std::endl;
107 |     for (size_t i=0; i<inputDepth; i++)
108 |     {
109 |       std::cout << DEBUG_PREFIX << "Input slice #" << i << std::endl;
110 |       for (size_t r=0; r<inputHeight; r++)
111 |         std::cout << DEBUG_PREFIX << input.slice(i).row(r);
112 |     }
113 | 
114 |     // Print filters.
115 |     std::cout << DEBUG_PREFIX << std::endl;
116 |     std::cout << DEBUG_PREFIX << "Filters:" << std::endl;
117 |     for (size_t i=0; i<numFilters; i++)
118 |     {
119 |       std::cout << DEBUG_PREFIX << "Filter #" << i << std::endl;
120 |       std::cout << DEBUG_PREFIX << arma::size(filters[i]) << std::endl;
121 |       for (size_t sidx=0; sidx<inputDepth; sidx++)
122 |       {
123 |         std::cout << DEBUG_PREFIX << "Slice #" << sidx << std::endl;
124 |         for (size_t ridx=0; ridx<filterHeight; ridx++)
125 |           std::cout << DEBUG_PREFIX << filters[i].slice(sidx).row(ridx);
126 |       }
127 |     }
128 | 
129 |     // Print output.
130 |     std::cout << DEBUG_PREFIX << std::endl;
131 |     std::cout << DEBUG_PREFIX << "Output of conv layer:" << std::endl;
132 |     for (size_t i=0; i<numFilters; i++)
133 |     {
134 |       std::cout << DEBUG_PREFIX << "Output slice #" << i << std::endl;
135 |       for (size_t r=0; r<output.n_rows; r++)
136 |         std::cout << DEBUG_PREFIX << output.slice(i).row(r);
137 |     }
138 | #endif
139 |   }
140 | 
141 |   void Backward(arma::cube& upstreamGradient)
142 |   {
143 |     // Upstream gradient must have same dimensions as the output.
144 |     assert(upstreamGradient.n_slices == numFilters);
145 |     assert(upstreamGradient.n_rows == output.n_rows);
146 |     assert(upstreamGradient.n_cols == output.n_cols);
147 | 
148 |     // Initialize gradient wrt input. Note that the dimensions are same as those
149 |     // of the input.
150 |     gradInput = arma::zeros(arma::size(input));
151 | 
152 |     // Compute the gradient wrt input.
153 |     for (size_t sidx=0; sidx < numFilters; sidx++)
154 |     {
155 |       for (size_t r=0; r<output.n_rows; r ++)
156 |       {
157 |         for (size_t c=0; c<output.n_cols; c ++)
158 |         {
159 |           arma::cube tmp(arma::size(input), arma::fill::zeros);
160 |           tmp.subcube(r*verticalStride,
161 |                       c*horizontalStride,
162 |                       0,
163 |                       (r*verticalStride)+filterHeight-1,
164 |                       (c*horizontalStride)+filterWidth-1,
165 |                       inputDepth-1)
166 |               = filters[sidx];
167 |           gradInput += upstreamGradient.slice(sidx)(r, c) * tmp;
168 |         }
169 |       }
170 |     }
171 | 
172 | #if DEBUG
173 |     std::cout
174 |         << DEBUG_PREFIX << "---------------------------------------------"
175 |         << std::endl
176 |         << DEBUG_PREFIX << "BACKWARD PASS DEBUG OUTPUT"
177 |         << std::endl
178 |         << DEBUG_PREFIX << "---------------------------------------------"
179 |         << std::endl;
180 |     std::cout << DEBUG_PREFIX << std::endl;
181 |     std::cout << DEBUG_PREFIX << "Gradient wrt input:" << std::endl;
182 |     for (size_t s=0; s < gradInput.n_slices; s++)
183 |     {
184 |       std::cout << DEBUG_PREFIX << "Gradient slice #" << s << std::endl;
185 |       for (size_t r=0; r < gradInput.n_rows; r++)
186 |         std::cout << DEBUG_PREFIX << gradInput.slice(s).row(r);
187 |     }
188 | #endif
189 | 
190 |     // Update the accumulated gradient wrt input.
191 |     accumulatedGradInput += gradInput;
192 | 
193 |     // Initialize the gradient wrt filters.
194 |     gradFilters.clear();
195 |     gradFilters.resize(numFilters);
196 |     for (size_t i=0; i<numFilters; i++)
197 |       gradFilters[i] = arma::zeros(filterHeight, filterWidth, inputDepth);
198 | 
199 |     // Compute the gradient wrt filters.
200 |     for (size_t fidx=0; fidx<numFilters; fidx++)
201 |     {
202 |       for (size_t r=0; r<output.n_rows; r ++)
203 |       {
204 |         for (size_t c=0; c<output.n_cols; c ++)
205 |         {
206 |           arma::cube tmp(arma::size(filters[fidx]), arma::fill::zeros);
207 |           tmp = input.subcube(r*verticalStride,
208 |                               c*horizontalStride,
209 |                               0,
210 |                               (r*verticalStride)+filterHeight-1,
211 |                               (c*horizontalStride)+filterWidth-1,
212 |                               inputDepth-1);
213 |           gradFilters[fidx] += upstreamGradient.slice(fidx)(r, c) * tmp;
214 |         }
215 |       }
216 |     }
217 | 
218 | #if DEBUG
219 |     std::cout << DEBUG_PREFIX << std::endl;
220 |     std::cout << DEBUG_PREFIX << "Gradient wrt filters:" << std::endl;
221 |     for (size_t i=0; i<numFilters; i++)
222 |     {
223 |       for (size_t s=0; s < gradFilters[i].n_slices; s++)
224 |       {
225 |         std::cout << DEBUG_PREFIX << "Gradient slice #" << s << std::endl;
226 |         for (size_t r=0; r < gradFilters[i].n_rows; r++)
227 |           std::cout << DEBUG_PREFIX << gradFilters[i].slice(s).row(r);
228 |       }
229 |     }
230 | #endif
231 | 
232 |     // Update the accumulated gradient wrt filters.
233 |     for (size_t fidx=0; fidx<numFilters; fidx++)
234 |       accumulatedGradFilters[fidx] += gradFilters[fidx];
235 |   }
236 | 
237 |   void UpdateFilterWeights(size_t batchSize, double learningRate)
238 |   {
239 |     for (size_t fidx=0; fidx<numFilters; fidx++)
240 |       filters[fidx] -= learningRate * (accumulatedGradFilters[fidx]/batchSize);
241 | 
242 |     _resetAccumulatedGradients();
243 |   }
244 | 
245 |   void setFilters(std::vector<arma::cube> filters) { this->filters = filters; }
246 | 
247 |   std::vector<arma::cube> getFilters() { return this->filters; }
248 | 
249 |   arma::cube getGradientWrtInput() { return gradInput; }
250 | 
251 |   std::vector<arma::cube> getGradientWrtFilters() { return gradFilters; }
252 | 
253 |  private:
254 |   size_t inputHeight;
255 |   size_t inputWidth;
256 |   size_t inputDepth;
257 |   size_t filterHeight;
258 |   size_t filterWidth;
259 |   size_t horizontalStride;
260 |   size_t verticalStride;
261 |   size_t numFilters;
262 | 
263 |   std::vector<arma::cube> filters;
264 | 
265 |   double _getTruncNormalVal(double mean, double variance)
266 |   {
267 |     double stddev = sqrt(variance);
268 |     arma::mat candidate = {3.0 * stddev};
269 |     while (std::abs(candidate[0] - mean) > 2.0 * stddev)
270 |       candidate.randn(1, 1);
271 |     return candidate[0];
272 |   }
273 | 
274 |   void _resetAccumulatedGradients()
275 |   {
276 |     accumulatedGradFilters.clear();
277 |     accumulatedGradFilters.resize(numFilters);
278 |     for (size_t fidx=0; fidx<numFilters; fidx++)
279 |       accumulatedGradFilters[fidx] = arma::zeros(filterHeight,
280 |                                                  filterWidth,
281 |                                                  inputDepth);
282 |     accumulatedGradInput = arma::zeros(inputHeight, inputWidth, inputDepth);
283 |   }
284 | 
285 |   arma::cube input;
286 |   arma::cube output;
287 |   arma::cube gradInput;
288 |   arma::cube accumulatedGradInput;
289 |   std::vector<arma::cube> gradFilters;
290 |   std::vector<arma::cube> accumulatedGradFilters;
291 | };
292 | 
293 | #undef DEBUG
294 | #undef DEBUG_PREFIX
295 | #endif
296 | 


--------------------------------------------------------------------------------
/layers/cross_entropy_loss_layer.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef CROSS_ENTROPY_LOSS_LAYER_HPP
 2 | #define CROSS_ENTROPY_LOSS_LAYER_HPP
 3 | 
 4 | #include <iostream>
 5 | #include <cassert>
 6 | #include <armadillo>
 7 | 
 8 | class CrossEntropyLossLayer
 9 | {
10 |  public:
11 |   CrossEntropyLossLayer(size_t numInputs) : numInputs(numInputs)
12 |   {
13 |     // Nothing to do here.
14 |   }
15 | 
16 |   double Forward(arma::vec& predictedDistribution,
17 |                  arma::vec& actualDistribution)
18 |   {
19 |     assert(predictedDistribution.n_elem == numInputs);
20 |     assert(actualDistribution.n_elem == numInputs);
21 | 
22 |     // Cache the prdicted and actual labels -- these will be required in the
23 |     // backward pass.
24 |     this->predictedDistribution = predictedDistribution;
25 |     this->actualDistribution = actualDistribution;
26 | 
27 |     // Compute the loss and cache that too.
28 |     this->loss = -arma::dot(actualDistribution,
29 |                             arma::log(predictedDistribution));
30 |     return this->loss;
31 |   }
32 | 
33 |   void Backward()
34 |   {
35 |     gradientWrtPredictedDistribution =
36 |         -(actualDistribution % (1/predictedDistribution));
37 |   }
38 | 
39 |   arma::vec getGradientWrtPredictedDistribution()
40 |   {
41 |     return gradientWrtPredictedDistribution;
42 |   }
43 | 
44 |  private:
45 |   size_t numInputs;
46 |   arma::vec predictedDistribution;
47 |   arma::vec actualDistribution;
48 | 
49 |   double loss;
50 | 
51 |   arma::vec gradientWrtPredictedDistribution;
52 | };
53 | 
54 | #endif
55 | 


--------------------------------------------------------------------------------
/layers/dense_layer.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef DENSE_LAYER_HPP
  2 | #define DENSE_LAYER_HPP
  3 | 
  4 | #include <armadillo>
  5 | #include <vector>
  6 | #include <cmath>
  7 | #include <cassert>
  8 | 
  9 | #define DEBUG false
 10 | #define DEBUG_PREFIX "[DEBUG DENSE LAYER ]\t"
 11 | 
 12 | class DenseLayer
 13 | {
 14 |  public:
 15 |   DenseLayer(size_t inputHeight,
 16 |              size_t inputWidth,
 17 |              size_t inputDepth,
 18 |              size_t numOutputs) :
 19 |       inputHeight(inputHeight),
 20 |       inputWidth(inputWidth),
 21 |       inputDepth(inputDepth),
 22 |       numOutputs(numOutputs)
 23 |   {
 24 |     // Initialize the weights.
 25 |     weights = arma::zeros(numOutputs, inputHeight*inputWidth*inputDepth);
 26 |     weights.imbue( [&]() { return _getTruncNormalVal(0.0, 1.0); } );
 27 | 
 28 |     // Initialize the biases
 29 |     biases = arma::zeros(numOutputs);
 30 | 
 31 |     // Reset accumulated gradients.
 32 |     _resetAccumulatedGradients();
 33 |   }
 34 | 
 35 |   void Forward(arma::cube& input, arma::vec& output)
 36 |   {
 37 |     arma::vec flatInput = arma::vectorise(input);
 38 |     output = (weights * flatInput) + biases;
 39 | 
 40 |     this->input = input;
 41 |     this->output = output;
 42 |   }
 43 | 
 44 |   void Backward(arma::vec& upstreamGradient)
 45 |   {
 46 |     arma::vec gradInputVec = arma::zeros(inputHeight*inputWidth*inputDepth);
 47 |     for (size_t i=0; i<(inputHeight*inputWidth*inputDepth); i++)
 48 |       gradInputVec[i] = arma::dot(weights.col(i), upstreamGradient);
 49 |     arma::cube tmp((inputHeight*inputWidth*inputDepth), 1, 1);
 50 |     tmp.slice(0).col(0) = gradInputVec;
 51 |     gradInput = arma::reshape(tmp, inputHeight, inputWidth, inputDepth);
 52 | 
 53 |     accumulatedGradInput += gradInput;
 54 | 
 55 |     gradWeights = arma::zeros(arma::size(weights));
 56 |     for (size_t i=0; i<gradWeights.n_rows; i++)
 57 |       gradWeights.row(i) = vectorise(input).t() * upstreamGradient[i];
 58 | 
 59 |     accumulatedGradWeights += gradWeights;
 60 | 
 61 |     gradBiases = upstreamGradient;
 62 |     accumulatedGradBiases += gradBiases;
 63 |   }
 64 | 
 65 |   void UpdateWeightsAndBiases(size_t batchSize, double learningRate)
 66 |   {
 67 |     weights = weights - learningRate * (accumulatedGradWeights/batchSize);
 68 |     biases = biases - learningRate * (accumulatedGradBiases/batchSize);
 69 |     _resetAccumulatedGradients();
 70 |   }
 71 | 
 72 |   arma::mat getGradientWrtWeights() { return gradWeights; }
 73 | 
 74 |   arma::cube getGradientWrtInput() { return gradInput; }
 75 | 
 76 |   arma::vec getGradientWrtBiases() { return gradBiases; }
 77 | 
 78 |   arma::mat getWeights() { return weights; }
 79 | 
 80 |   arma::vec getBiases() { return biases; }
 81 | 
 82 |   void setWeights(arma::mat weights) { this->weights = weights; }
 83 | 
 84 |   void setBiases(arma::vec biases) { this->biases = biases; }
 85 | 
 86 |  private:
 87 |   size_t inputHeight;
 88 |   size_t inputWidth;
 89 |   size_t inputDepth;
 90 |   arma::cube input;
 91 | 
 92 |   size_t numOutputs;
 93 |   arma::vec output;
 94 | 
 95 |   arma::mat weights;
 96 |   arma::vec biases;
 97 | 
 98 |   arma::cube gradInput;
 99 |   arma::mat gradWeights;
100 |   arma::vec gradBiases;
101 | 
102 |   arma::cube accumulatedGradInput;
103 |   arma::mat accumulatedGradWeights;
104 |   arma::vec accumulatedGradBiases;
105 | 
106 |   double _getTruncNormalVal(double mean, double variance)
107 |   {
108 |     double stddev = sqrt(variance);
109 |     arma::mat candidate = {3.0 * stddev};
110 |     while (std::abs(candidate[0] - mean) > 2.0 * stddev)
111 |       candidate.randn(1, 1);
112 |     return candidate[0];
113 |   }
114 | 
115 |   void _resetAccumulatedGradients()
116 |   {
117 |     accumulatedGradInput = arma::zeros(inputHeight, inputWidth, inputDepth);
118 |     accumulatedGradWeights = arma::zeros(
119 |         numOutputs,
120 |         inputHeight*inputWidth*inputDepth
121 |         );
122 |     accumulatedGradBiases = arma::zeros(numOutputs);
123 |   }
124 | };
125 | 
126 | #undef DEBUG
127 | #undef DEBUG_PREFIX
128 | 
129 | #endif
130 | 


--------------------------------------------------------------------------------
/layers/max_pooling_layer.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef MAX_POOLING_LAYER_HPP
  2 | #define MAX_POOLING_LAYER_HPP
  3 | 
  4 | #include <iostream>
  5 | #include <armadillo>
  6 | #include <cassert>
  7 | 
  8 | #define DEBUG false
  9 | #define DEBUG_PREFIX "[DEBUG POOL LAYER ]\t"
 10 | 
 11 | class MaxPoolingLayer
 12 | {
 13 |  public:
 14 |   MaxPoolingLayer(size_t inputHeight,
 15 |                   size_t inputWidth,
 16 |                   size_t inputDepth,
 17 |                   size_t poolingWindowHeight,
 18 |                   size_t poolingWindowWidth,
 19 |                   size_t verticalStride,
 20 |                   size_t horizontalStride) :
 21 |       inputHeight(inputHeight),
 22 |       inputWidth(inputWidth),
 23 |       inputDepth(inputDepth),
 24 |       poolingWindowHeight(poolingWindowHeight),
 25 |       poolingWindowWidth(poolingWindowWidth),
 26 |       verticalStride(verticalStride),
 27 |       horizontalStride(horizontalStride)
 28 |   {
 29 |     // Nothing to do here.
 30 |   }
 31 | 
 32 |   void Forward(arma::cube& input, arma::cube& output)
 33 |   {
 34 |     assert((inputHeight - poolingWindowHeight)%verticalStride == 0);
 35 |     assert((inputWidth - poolingWindowWidth)%horizontalStride == 0);
 36 |     output = arma::zeros(
 37 |         (inputHeight - poolingWindowHeight)/verticalStride + 1,
 38 |         (inputWidth - poolingWindowWidth)/horizontalStride + 1,
 39 |         inputDepth
 40 |         );
 41 |     for (size_t sidx = 0; sidx < inputDepth; sidx ++)
 42 |     {
 43 |       for (size_t ridx = 0;
 44 |            ridx <= inputHeight - poolingWindowHeight;
 45 |            ridx += verticalStride)
 46 |       {
 47 |         for (size_t cidx = 0;
 48 |              cidx <= inputWidth - poolingWindowWidth;
 49 |              cidx += horizontalStride)
 50 |         {
 51 |           output.slice(sidx)(ridx/verticalStride, cidx/horizontalStride) =
 52 |             input.slice(sidx).submat(ridx,
 53 |                           cidx,
 54 |                           ridx+poolingWindowHeight-1,
 55 |                           cidx+poolingWindowWidth-1)
 56 |             .max();
 57 |         }
 58 |       }
 59 |     }
 60 | 
 61 |     this->input = input;
 62 |     this->output = output;
 63 | #if DEBUG
 64 |     std::cout
 65 |         << DEBUG_PREFIX << "---------------------------------------------"
 66 |         << std::endl
 67 |         << DEBUG_PREFIX << "FORWARD PASS DEBUG OUTPUT"
 68 |         << std::endl
 69 |         << DEBUG_PREFIX << "---------------------------------------------"
 70 |         << std::endl;
 71 |     std::cout << DEBUG_PREFIX << std::endl;
 72 |     std::cout
 73 |         << DEBUG_PREFIX << "Input to Max pooling layer:"
 74 |         << std::endl;
 75 |     for (size_t i=0; i<inputDepth; i++)
 76 |     {
 77 |       std::cout << DEBUG_PREFIX << "Slice #" << i << std::endl;
 78 |       for (size_t r=0; r < input.slice(i).n_rows; r++)
 79 |         std::cout << DEBUG_PREFIX << input.slice(i).row(r);
 80 |     }
 81 |     std::cout << DEBUG_PREFIX << std::endl;
 82 |     std::cout
 83 |         << DEBUG_PREFIX << "Output of Max pooling layer:"
 84 |         << std::endl;
 85 |     for (size_t i=0; i<inputDepth; i++)
 86 |     {
 87 |       std::cout << DEBUG_PREFIX << "Slice #" << i << std::endl;
 88 |       for (size_t r=0; r < output.slice(i).n_rows; r++)
 89 |         std::cout << DEBUG_PREFIX << output.slice(i).row(r);
 90 |     }
 91 | #endif
 92 |   }
 93 | 
 94 |   void Backward(arma::cube& upstreamGradient)
 95 |   {
 96 |     assert (upstreamGradient.n_rows == output.n_rows);
 97 |     assert (upstreamGradient.n_cols == output.n_cols);
 98 |     assert (upstreamGradient.n_slices == output.n_slices);
 99 | 
100 |     gradientWrtInput = arma::zeros(inputHeight, inputWidth, inputDepth);
101 |     for (size_t i=0; i<inputDepth; i++)
102 |     {
103 |       for (size_t r=0;
104 |            r + poolingWindowHeight <= inputHeight;
105 |            r += verticalStride)
106 |       {
107 |         for (size_t c=0;
108 |              c + poolingWindowWidth <= inputWidth;
109 |              c += horizontalStride)
110 |         {
111 |           arma::mat tmp(poolingWindowHeight,
112 |                          poolingWindowWidth,
113 |                          arma::fill::zeros);
114 |           tmp(input.slice(i).submat(r, c,
115 |                 r+poolingWindowHeight-1, c+poolingWindowWidth-1)
116 |                 .index_max()) = upstreamGradient.slice(i)(r/verticalStride,
117 |                                                           c/horizontalStride);
118 |           gradientWrtInput.slice(i).submat(r, c,
119 |               r+poolingWindowHeight-1, c+poolingWindowWidth-1) += tmp;
120 |         }
121 |       }
122 |     }
123 |   }
124 | 
125 |   arma::cube getGradientWrtInput() { return gradientWrtInput; }
126 | 
127 |  private:
128 |   size_t inputHeight;
129 |   size_t inputWidth;
130 |   size_t inputDepth;
131 |   size_t poolingWindowHeight;
132 |   size_t poolingWindowWidth;
133 |   size_t verticalStride;
134 |   size_t horizontalStride;
135 | 
136 |   arma::cube input;
137 |   arma::cube output;
138 | 
139 |   arma::cube gradientWrtInput;
140 | };
141 | 
142 | #undef DEBUG
143 | #undef DEBUG_PREFIX
144 | #endif
145 | 


--------------------------------------------------------------------------------
/layers/relu_layer.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef RELU_LAYER_HPP
 2 | #define RELU_LAYER_HPP
 3 | 
 4 | #include <iostream>
 5 | #include <armadillo>
 6 | 
 7 | class ReLULayer
 8 | {
 9 |  public:
10 |   ReLULayer(size_t inputHeight,
11 |        size_t inputWidth,
12 |        size_t inputDepth) :
13 |       inputHeight(inputHeight),
14 |       inputWidth(inputWidth),
15 |       inputDepth(inputDepth)
16 |   {
17 |     // Nothing to do here.
18 |   }
19 | 
20 |   void Forward(arma::cube& input, arma::cube& output)
21 |   {
22 |     output = arma::zeros(arma::size(input));
23 |     output = arma::max(input, output);
24 |     this->input = input;
25 |     this->output = output;
26 |   }
27 | 
28 |   void Backward(arma::cube upstreamGradient)
29 |   {
30 |     gradientWrtInput = input;
31 |     gradientWrtInput.transform( [](double val) { return val > 0? 1 : 0; } );
32 |     gradientWrtInput = gradientWrtInput % upstreamGradient;
33 |   }
34 | 
35 |   arma::cube getGradientWrtInput() { return gradientWrtInput; }
36 | 
37 |  private:
38 |   size_t inputHeight;
39 |   size_t inputWidth;
40 |   size_t inputDepth;
41 | 
42 |   arma::cube input;
43 |   arma::cube output;
44 | 
45 |   arma::cube gradientWrtInput;
46 | };
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/layers/softmax_layer.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SOFTMAX_LAYER_HPP
 2 | #define SOFTMAX_LAYER_HPP
 3 | 
 4 | #include <iostream>
 5 | #include <armadillo>
 6 | 
 7 | class SoftmaxLayer
 8 | {
 9 |  public:
10 |   SoftmaxLayer(size_t numInputs) :
11 |       numInputs(numInputs)
12 |   {
13 |     // Nothing to do here.
14 |   }
15 | 
16 |   void Forward(arma::vec& input, arma::vec& output)
17 |   {
18 |     double sumExp = arma::accu(arma::exp(input - arma::max(input)));
19 |     output = arma::exp(input - arma::max(input))/sumExp;
20 | 
21 |     this->input = input;
22 |     this->output = output;
23 |   }
24 | 
25 |   void Backward(arma::vec& upstreamGradient)
26 |   {
27 |     double sub = arma::dot(upstreamGradient, output);
28 |     gradWrtInput = (upstreamGradient - sub) % output;
29 |   }
30 | 
31 |   arma::vec getGradientWrtInput() { return gradWrtInput; }
32 | 
33 |  private:
34 |   size_t numInputs;
35 |   arma::vec input;
36 |   arma::vec output;
37 | 
38 |   arma::vec gradWrtInput;
39 | };
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/le_net.cpp:
--------------------------------------------------------------------------------
  1 | #include "layers/convolution_layer.hpp"
  2 | #include "layers/max_pooling_layer.hpp"
  3 | #include "layers/relu_layer.hpp"
  4 | #include "layers/dense_layer.hpp"
  5 | #include "layers/softmax_layer.hpp"
  6 | #include "layers/cross_entropy_loss_layer.hpp"
  7 | #include "utils/mnist.hpp"
  8 | 
  9 | #include <iostream>
 10 | #include <vector>
 11 | #include <cassert>
 12 | #include <armadillo>
 13 | #include <boost/test/unit_test.hpp>
 14 | #include <fstream>
 15 | 
 16 | #define DEBUG true
 17 | #define DEBUG_PREFIX "[DEBUG LE NET ]\t"
 18 | 
 19 | int main(int argc, char ** argv)
 20 | {
 21 |   // Read the Kaggle data
 22 |   MNISTData md("../data");
 23 | 
 24 |   std::vector<arma::cube> trainData = md.getTrainData();
 25 |   std::vector<arma::vec> trainLabels = md.getTrainLabels();
 26 | 
 27 |   std::vector<arma::cube> validationData = md.getValidationData();
 28 |   std::vector<arma::vec> validationLabels = md.getValidationLabels();
 29 | 
 30 |   assert(trainData.size() == trainLabels.size());
 31 |   assert(validationData.size() == validationLabels.size());
 32 | 
 33 |   std::vector<arma::cube> testData = md.getTestData();
 34 | 
 35 | #if DEBUG
 36 |   std::cout << DEBUG_PREFIX
 37 |       << "Training data size: " << trainData.size() << std::endl;
 38 |   std::cout << DEBUG_PREFIX
 39 |       << "Validation data size: " << validationData.size() << std::endl;
 40 |   std::cout << DEBUG_PREFIX
 41 |       << "Test data size: " << testData.size() << std::endl;
 42 |   std::cout << DEBUG_PREFIX << std::endl;
 43 | #endif
 44 | 
 45 |   const size_t TRAIN_DATA_SIZE = trainData.size();
 46 |   const size_t VALIDATION_DATA_SIZE = validationData.size();
 47 |   const size_t TEST_DATA_SIZE = testData.size();
 48 |   const double LEARNING_RATE = 0.05;
 49 |   const size_t EPOCHS = 10;
 50 |   const size_t BATCH_SIZE = 10;
 51 |   const size_t NUM_BATCHES = TRAIN_DATA_SIZE / BATCH_SIZE;
 52 | 
 53 |   // Define the network layers
 54 |   ConvolutionLayer c1(
 55 |       28,
 56 |       28,
 57 |       1,
 58 |       5,
 59 |       5,
 60 |       1,
 61 |       1,
 62 |       6);
 63 |   // Output is 24 x 24 x 6
 64 | 
 65 |   ReLULayer r1(
 66 |       24,
 67 |       24,
 68 |       6);
 69 |   // Output is 24 x 24 x 6
 70 | 
 71 |   MaxPoolingLayer mp1(
 72 |       24,
 73 |       24,
 74 |       6,
 75 |       2,
 76 |       2,
 77 |       2,
 78 |       2);
 79 |   // Output is 12 x 12 x 6
 80 | 
 81 |   ConvolutionLayer c2(
 82 |       12,
 83 |       12,
 84 |       6,
 85 |       5,
 86 |       5,
 87 |       1,
 88 |       1,
 89 |       16);
 90 |   // Output is 8 x 8 x 16
 91 | 
 92 |   ReLULayer r2(
 93 |       8,
 94 |       8,
 95 |       16);
 96 |   // Output is 8 x 8 x 16
 97 | 
 98 |   MaxPoolingLayer mp2(
 99 |       8,
100 |       8,
101 |       16,
102 |       2,
103 |       2,
104 |       2,
105 |       2);
106 |   // Output is 4 x 4 x 16
107 | 
108 |   DenseLayer d(
109 |       4,
110 |       4,
111 |       16,
112 |       10);
113 |   // Output is a vector of size 10
114 | 
115 |   SoftmaxLayer s(10);
116 |   // Output is a vector of size 10
117 | 
118 |   CrossEntropyLossLayer l(10);
119 | 
120 |   // Initialize armadillo structures to store intermediate outputs (Ie. outputs
121 |   // of hidden layers)
122 |   arma::cube c1Out = arma::zeros(24, 24, 6);
123 |   arma::cube r1Out = arma::zeros(24, 24, 6);
124 |   arma::cube mp1Out = arma::zeros(12, 12, 6);
125 |   arma::cube c2Out = arma::zeros(8, 8, 16);
126 |   arma::cube r2Out = arma::zeros(8, 8, 16);
127 |   arma::cube mp2Out = arma::zeros(4, 4, 16);
128 |   arma::vec dOut = arma::zeros(10);
129 |   arma::vec sOut = arma::zeros(10);
130 | 
131 |   // Initialize loss and cumulative loss. Cumulative loss totals loss over all
132 |   // training examples in a minibatch.
133 |   double loss = 0.0;
134 |   double cumLoss = 0.0;
135 | 
136 | 
137 |   for (size_t epoch = 0; epoch < EPOCHS; epoch++)
138 |   {
139 | #if DEBUG
140 |     std::cout << DEBUG_PREFIX << std::endl;
141 |     std::cout << DEBUG_PREFIX << "Epoch # " << epoch << std::endl;
142 | #endif
143 |     for (size_t batchIdx = 0; batchIdx < NUM_BATCHES; batchIdx++)
144 |     {
145 |       // Generate a random batch.
146 |       arma::vec batch(BATCH_SIZE, arma::fill::randu);
147 |       batch *= (TRAIN_DATA_SIZE - 1);
148 | 
149 |       for (size_t i = 0; i < BATCH_SIZE; i++)
150 |       {
151 |         // Forward pass
152 |         c1.Forward(trainData[batch[i]], c1Out);
153 |         r1.Forward(c1Out, r1Out);
154 |         mp1.Forward(r1Out, mp1Out);
155 |         c2.Forward(mp1Out, c2Out);
156 |         r2.Forward(c2Out, r2Out);
157 |         mp2.Forward(r2Out, mp2Out);
158 |         d.Forward(mp2Out, dOut);
159 |         dOut /= 100;
160 |         s.Forward(dOut, sOut);
161 | 
162 |         // Compute the loss
163 |         loss = l.Forward(sOut, trainLabels[batch[i]]);
164 |         cumLoss += loss;
165 | 
166 |         // Backward pass
167 |         l.Backward();
168 |         arma::vec gradWrtPredictedDistribution =
169 |             l.getGradientWrtPredictedDistribution();
170 |         s.Backward(gradWrtPredictedDistribution);
171 |         arma::vec gradWrtSIn = s.getGradientWrtInput();
172 |         d.Backward(gradWrtSIn);
173 |         arma::cube gradWrtDIn = d.getGradientWrtInput();
174 |         mp2.Backward(gradWrtDIn);
175 |         arma::cube gradWrtMP2In = mp2.getGradientWrtInput();
176 |         r2.Backward(gradWrtMP2In);
177 |         arma::cube gradWrtR2In = r2.getGradientWrtInput();
178 |         c2.Backward(gradWrtR2In);
179 |         arma::cube gradWrtC2In = c2.getGradientWrtInput();
180 |         mp1.Backward(gradWrtC2In);
181 |         arma::cube gradWrtMP1In = mp1.getGradientWrtInput();
182 |         r1.Backward(gradWrtMP1In);
183 |         arma::cube gradWrtR1In = r1.getGradientWrtInput();
184 |         c1.Backward(gradWrtR1In);
185 |         arma::cube gradWrtC1In = c1.getGradientWrtInput();
186 |       }
187 | 
188 |       // Update params
189 |       d.UpdateWeightsAndBiases(BATCH_SIZE, LEARNING_RATE);
190 |       c1.UpdateFilterWeights(BATCH_SIZE, LEARNING_RATE);
191 |       c2.UpdateFilterWeights(BATCH_SIZE, LEARNING_RATE);
192 |     }
193 | 
194 | #if DEBUG
195 |     // Output loss on training dataset after each epoch
196 |     std::cout << DEBUG_PREFIX << std::endl;
197 |     std::cout << DEBUG_PREFIX << "Training loss: "
198 |         << cumLoss / (BATCH_SIZE * NUM_BATCHES) << std::endl;
199 | #endif
200 | 
201 |     // Compute the training accuracy after epoch
202 |     double correct = 0.0;
203 |     for (size_t i = 0; i < TRAIN_DATA_SIZE; i++)
204 |     {
205 |       // Forward pass
206 |       c1.Forward(trainData[i], c1Out);
207 |       r1.Forward(c1Out, r1Out);
208 |       mp1.Forward(r1Out, mp1Out);
209 |       c2.Forward(mp1Out, c2Out);
210 |       r2.Forward(c2Out, r2Out);
211 |       mp2.Forward(r2Out, mp2Out);
212 |       d.Forward(mp2Out, dOut);
213 |       dOut /= 100;
214 |       s.Forward(dOut, sOut);
215 | 
216 |       if (trainLabels[i].index_max() == sOut.index_max())
217 |         correct += 1.0;
218 |     }
219 | 
220 | #if DEBUG
221 |     // Output accuracy on training dataset after each epoch
222 |     std::cout << DEBUG_PREFIX
223 |         << "Training accuracy: " << correct/TRAIN_DATA_SIZE << std::endl;
224 | #endif
225 | 
226 |     // Compute validation accuracy after epoch
227 |     cumLoss = 0.0;
228 |     correct = 0.0;
229 |     for (size_t i = 0; i < VALIDATION_DATA_SIZE; i++)
230 |     {
231 |       // Forward pass
232 |       c1.Forward(validationData[i], c1Out);
233 |       r1.Forward(c1Out, r1Out);
234 |       mp1.Forward(r1Out, mp1Out);
235 |       c2.Forward(mp1Out, c2Out);
236 |       r2.Forward(c2Out, r2Out);
237 |       mp2.Forward(r2Out, mp2Out);
238 |       d.Forward(mp2Out, dOut);
239 |       dOut /= 100;
240 |       s.Forward(dOut, sOut);
241 | 
242 |       cumLoss += l.Forward(sOut, validationLabels[i]);
243 | 
244 |       if (validationLabels[i].index_max() == sOut.index_max())
245 |         correct += 1.0;
246 |     }
247 | 
248 | #if DEBUG
249 |     // Output validation loss after each epoch
250 |     std::cout << DEBUG_PREFIX
251 |         << "Validation loss: " << cumLoss / (BATCH_SIZE * NUM_BATCHES)
252 |         << std::endl;
253 | 
254 |     // Output validation accuracy after each epoch
255 |     std::cout << DEBUG_PREFIX
256 |         << "Val accuracy: " << correct / VALIDATION_DATA_SIZE << std::endl;
257 |     std::cout << DEBUG_PREFIX << std::endl;
258 | #endif
259 | 
260 |     // Reset cumulative loss and correct count
261 |     cumLoss = 0.0;
262 |     correct = 0.0;
263 | 
264 |     // Write results on test data to results csv
265 |     std::fstream fout("results_epoch_" + std::to_string(epoch) + ".csv",
266 |                       std::ios::out);
267 |     fout << "ImageId,Label" << std::endl;
268 |     for (size_t i=0; i<TEST_DATA_SIZE; i++)
269 |     {
270 |       // Forward pass
271 |       c1.Forward(testData[i], c1Out);
272 |       r1.Forward(c1Out, r1Out);
273 |       mp1.Forward(r1Out, mp1Out);
274 |       c2.Forward(mp1Out, c2Out);
275 |       r2.Forward(c2Out, r2Out);
276 |       mp2.Forward(r2Out, mp2Out);
277 |       d.Forward(mp2Out, dOut);
278 |       dOut /= 100;
279 |       s.Forward(dOut, sOut);
280 | 
281 |       fout << std::to_string(i+1) << ","
282 |           << std::to_string(sOut.index_max()) << std::endl;
283 |     }
284 |     fout.close();
285 |   }
286 | }
287 | 
288 | #undef DEBUG
289 | #undef DEBUG_PREFIX
290 | 


--------------------------------------------------------------------------------
/tests/convolution_layer_test.cpp:
--------------------------------------------------------------------------------
  1 | #define BOOST_TEST_MODULE ConvolutionLayerTests
  2 | #define BOOST_TEST_DYN_LINK
  3 | 
  4 | #include <boost/test/unit_test.hpp>
  5 | #include "../layers/convolution_layer.hpp"
  6 | 
  7 | #define DEBUG false
  8 | #define DEBUG_PREFIX "[CONV LAYER TESTS ]\t"
  9 | 
 10 | BOOST_AUTO_TEST_CASE(ConstructorTest)
 11 | {
 12 |   ConvolutionLayer c(
 13 |       5,  // Input height.
 14 |       5,  // Input width.
 15 |       3,  // Input depth.
 16 |       2,  // Filter height.
 17 |       3,  // Filter width.
 18 |       1,  // Horizontal stride.
 19 |       1,  // Vertical stride.
 20 |       3); // Number of filters.
 21 | }
 22 | 
 23 | BOOST_AUTO_TEST_CASE(ForwardPassTest)
 24 | {
 25 |   arma::cube input(3, 3, 1, arma::fill::zeros);
 26 |   input.slice(0) = {{1, 2, 3}, {2, 3, 4}, {3, 4, 5}};
 27 | 
 28 |   arma::cube filter1(2, 2, 1, arma::fill::zeros);
 29 |   filter1.slice(0) = {{1, 0}, {0, 1}};
 30 | 
 31 |   arma::cube filter2(2, 2, 1, arma::fill::zeros);
 32 |   filter2.slice(0) = {{0, 1}, {1, 0}};
 33 | 
 34 |   std::vector<arma::cube> filters;
 35 |   filters.push_back(filter1);
 36 |   filters.push_back(filter2);
 37 | 
 38 |   ConvolutionLayer c(
 39 |       3,  // Input height.
 40 |       3,  // Input width.
 41 |       1,  // Input depth.
 42 |       2,  // Filter width.
 43 |       2,  // Filter depth.
 44 |       1,  // Horizontal stride.
 45 |       1,  // Vertical stride.
 46 |       2); // Number of filters.
 47 | 
 48 |   c.setFilters(filters);
 49 | 
 50 |   arma::cube output;
 51 |   c.Forward(input, output);
 52 | }
 53 | 
 54 | BOOST_AUTO_TEST_CASE(BackwardPassTest)
 55 | {
 56 |   arma::cube input(3, 3, 1, arma::fill::zeros);
 57 |   input.slice(0) = {{1, 2, 3}, {2, 3, 4}, {3, 4, 5}};
 58 | 
 59 |   arma::cube filter1(2, 2, 1, arma::fill::zeros);
 60 |   filter1.slice(0) = {{1, 0}, {0, 1}};
 61 | 
 62 |   arma::cube filter2(2, 2, 1, arma::fill::zeros);
 63 |   filter2.slice(0) = {{0, 1}, {1, 0}};
 64 | 
 65 |   std::vector<arma::cube> filters;
 66 |   filters.push_back(filter1);
 67 |   filters.push_back(filter2);
 68 | 
 69 |   ConvolutionLayer c(
 70 |       3,  // Input height.
 71 |       3,  // Input width.
 72 |       1,  // Input depth.
 73 |       2,  // Filter width.
 74 |       2,  // Filter depth.
 75 |       1,  // Horizontal stride.
 76 |       1,  // Vertical stride.
 77 |       2); // Number of filters.
 78 | 
 79 |   c.setFilters(filters);
 80 | 
 81 |   arma::cube output;
 82 |   c.Forward(input, output);
 83 | 
 84 |   // For now, let the loss be the sum of all the output activations. Therefore,
 85 |   // the upstream gradient is all ones.
 86 |   arma::cube upstreamGradient(2, 2, 2, arma::fill::ones);
 87 | 
 88 |   c.Backward(upstreamGradient);
 89 | 
 90 |   arma::cube gradInput = c.getGradientWrtInput();
 91 | 
 92 |   std::vector<arma::cube> gradFilters = c.getGradientWrtFilters();
 93 | 
 94 |   // Now compute approximate gradients.
 95 |   double disturbance = 0.5e-5;
 96 | 
 97 |   output = arma::zeros(arma::size(output));
 98 |   arma::cube approxGradientWrtInput(arma::size(input), arma::fill::zeros);
 99 |   for (size_t i=0; i<input.n_elem; i++)
100 |   {
101 |     input[i] += disturbance;
102 |     c.Forward(input, output);
103 |     double l1 = arma::accu(output);
104 |     input[i] -= 2*disturbance;
105 |     c.Forward(input, output);
106 |     double l2 = arma::accu(output);
107 |     approxGradientWrtInput[i] = (l1 - l2)/(2.0*disturbance);
108 |     input[i] += disturbance;
109 |   }
110 | 
111 | #if DEBUG
112 |   std::cout
113 |       << DEBUG_PREFIX << "---------------------------------------------"
114 |       << std::endl
115 |       << DEBUG_PREFIX << "BACKWARD PASS TEST (BackwardPassTest) DEBUG OUTPUT"
116 |       << std::endl
117 |       << DEBUG_PREFIX << "---------------------------------------------"
118 |       << std::endl;
119 |   std::cout << DEBUG_PREFIX << "Approx gradient wrt inputs:" << std::endl;
120 |   for (size_t s=0; s<approxGradientWrtInput.n_slices; s++)
121 |   {
122 |     std::cout << DEBUG_PREFIX << "Slice #" << s << std::endl;
123 |     for (size_t r=0; r<approxGradientWrtInput.slice(s).n_rows; r++)
124 |       std::cout << DEBUG_PREFIX << approxGradientWrtInput.slice(s).row(r);
125 |   }
126 | #endif
127 | }
128 | 
129 | BOOST_AUTO_TEST_CASE(BackwardPassBigTest)
130 | {
131 |   // Input is 7 rows, 11 cols, and 3 slices.
132 |   arma::cube input(7, 11, 3, arma::fill::randn);
133 | 
134 |   ConvolutionLayer c(
135 |       7,  // Input height.
136 |       11,  // Input width.
137 |       3,  // Input depth.
138 |       3,  // Filter height.
139 |       5,  // Filter width.
140 |       2,  // Horizontal stride.
141 |       2,  // Vertical stride.
142 |       2); // Number of filters.
143 | 
144 |   arma::cube output;
145 |   c.Forward(input, output);
146 | 
147 |   // For now, let the loss be the sum of all the output activations. Therefore,
148 |   // the upstream gradient is all ones.
149 |   arma::cube upstreamGradient(3, 4, 2, arma::fill::ones);
150 | 
151 |   c.Backward(upstreamGradient);
152 | 
153 |   arma::cube gradInput = c.getGradientWrtInput();
154 | 
155 |   std::vector<arma::cube> gradFilters = c.getGradientWrtFilters();
156 | 
157 |   // Now compute approximate gradients.
158 |   double disturbance = 0.5e-5;
159 | 
160 |   output = arma::zeros(arma::size(output));
161 |   arma::cube approxGradientWrtInput(arma::size(input), arma::fill::zeros);
162 |   for (size_t i=0; i<input.n_elem; i++)
163 |   {
164 |     input[i] += disturbance;
165 |     c.Forward(input, output);
166 |     double l1 = arma::accu(output);
167 |     input[i] -= 2*disturbance;
168 |     c.Forward(input, output);
169 |     double l2 = arma::accu(output);
170 |     approxGradientWrtInput[i] = (l1 - l2)/(2.0*disturbance);
171 |     input[i] += disturbance;
172 |   }
173 | 
174 | #if DEBUG
175 |   std::cout
176 |       << DEBUG_PREFIX << "---------------------------------------------"
177 |       << std::endl
178 |       << DEBUG_PREFIX << "BACKWARD PASS TEST (BackwardPassBigTest) DEBUG OUTPUT"
179 |       << std::endl
180 |       << DEBUG_PREFIX << "---------------------------------------------"
181 |       << std::endl;
182 |   std::cout << DEBUG_PREFIX << "Approx gradient wrt inputs:" << std::endl;
183 |   for (size_t s=0; s<approxGradientWrtInput.n_slices; s++)
184 |   {
185 |     std::cout << DEBUG_PREFIX << "Slice #" << s << std::endl;
186 |     for (size_t r=0; r<approxGradientWrtInput.slice(s).n_rows; r++)
187 |       std::cout << DEBUG_PREFIX << approxGradientWrtInput.slice(s).row(r);
188 |   }
189 | #endif
190 | 
191 |   BOOST_REQUIRE(arma::approx_equal(gradInput,
192 |                                    approxGradientWrtInput,
193 |                                    "absdiff",
194 |                                    disturbance));
195 | 
196 |   std::vector<arma::cube> approxGradientWrtFilters(2);
197 |   approxGradientWrtFilters[0] = arma::zeros(3, 5, 3);
198 |   approxGradientWrtFilters[1] = arma::zeros(3, 5, 3);
199 | 
200 |   std::vector<arma::cube> filters = c.getFilters();
201 | 
202 |   for (size_t fidx=0; fidx<2; fidx++)
203 |   {
204 |     for (size_t idx=0; idx<filters[fidx].n_elem; idx++)
205 |     {
206 |       filters[fidx][idx] += disturbance;
207 |       c.setFilters(filters);
208 |       c.Forward(input, output);
209 |       double l1 = arma::accu(output);
210 |       filters[fidx][idx] -= 2.0*disturbance;
211 |       c.setFilters(filters);
212 |       c.Forward(input, output);
213 |       double l2 = arma::accu(output);
214 |       approxGradientWrtFilters[fidx][idx] = (l1-l2)/(2.0*disturbance);
215 |       filters[fidx][idx] += disturbance;
216 |       c.setFilters(filters);
217 |     }
218 |   }
219 | 
220 |   for (size_t fidx=0; fidx<2; fidx++)
221 |     BOOST_REQUIRE(arma::approx_equal(gradFilters[fidx],
222 |                   approxGradientWrtFilters[fidx],
223 |                   "absdiff",
224 |                   disturbance));
225 | }
226 | 
227 | #undef DEBUG
228 | #undef DEBUG_PREFIX
229 | 


--------------------------------------------------------------------------------
/tests/cross_entropy_loss_layer_test.cpp:
--------------------------------------------------------------------------------
 1 | #define BOOST_TEST_MODULE CrossEntropyLossLayerTests
 2 | #define BOOST_TEST_DYN_LINK
 3 | 
 4 | #include <boost/test/unit_test.hpp>
 5 | #include "../layers/cross_entropy_loss_layer.hpp"
 6 | 
 7 | BOOST_AUTO_TEST_CASE(ForwardPassTest)
 8 | {
 9 |   CrossEntropyLossLayer c(3);
10 | 
11 |   arma::vec predictedDistribution = {0.25, 0.25, 0.5};
12 |   arma::vec actualDistribution1 = {1, 0, 0};
13 |   arma::vec actualDistribution2 = {0, 0, 1};
14 | 
15 |   double loss1 = c.Forward(predictedDistribution, actualDistribution1);
16 |   double loss2 = c.Forward(predictedDistribution, actualDistribution2);
17 | 
18 |   BOOST_REQUIRE(loss1 > loss2);
19 | }
20 | 
21 | BOOST_AUTO_TEST_CASE(BackwardPassTest)
22 | {
23 |   CrossEntropyLossLayer c(3);
24 | 
25 |   arma::vec predictedDistribution = {0.25, 0.25, 0.5};
26 |   arma::vec actualDistribution = {0, 0, 1};
27 | 
28 |   double loss2 = c.Forward(predictedDistribution, actualDistribution);
29 | 
30 |   c.Backward();
31 | 
32 |   arma::vec gradientWrtPredictedDistribution =
33 |       c.getGradientWrtPredictedDistribution();
34 |   arma::vec approxGradient = arma::zeros(arma::size(predictedDistribution));
35 | 
36 |   double disturbance = 0.5e-5;
37 |   for (size_t i=0; i<predictedDistribution.n_elem; i++)
38 |   {
39 |     predictedDistribution[i] += disturbance;
40 |     double l1 = c.Forward(predictedDistribution, actualDistribution);
41 |     predictedDistribution[i] -= 2.0*disturbance;
42 |     double l2 = c.Forward(predictedDistribution, actualDistribution);
43 |     approxGradient[i] = (l1-l2)/(2.0*disturbance);
44 |     predictedDistribution[i] += disturbance;
45 |   }
46 | 
47 |   BOOST_REQUIRE(arma::approx_equal(approxGradient,
48 |                                    gradientWrtPredictedDistribution,
49 |                                    "absdiff",
50 |                                    disturbance));
51 | }
52 | 


--------------------------------------------------------------------------------
/tests/dense_layer_test.cpp:
--------------------------------------------------------------------------------
 1 | #define BOOST_TEST_MODULE DenseLayerTests
 2 | #define BOOST_TEST_DYN_LINK
 3 | 
 4 | #include <boost/test/unit_test.hpp>
 5 | #include "../layers/dense_layer.hpp"
 6 | 
 7 | BOOST_AUTO_TEST_CASE(ConstructorTest)
 8 | {
 9 |   DenseLayer d(
10 |       5,    // Input height.
11 |       5,    // Input width.
12 |       3,    // Input depth.
13 |       10);  // Number of outputs.
14 | }
15 | 
16 | BOOST_AUTO_TEST_CASE(ForwardPassTest)
17 | {
18 |   DenseLayer d(
19 |       5,    // Input height.
20 |       5,    // Input width.
21 |       3,    // Input depth.
22 |       10);  // Number of outputs.
23 | 
24 |   arma::cube input(5, 5, 3, arma::fill::randn);
25 |   arma::vec output;
26 | 
27 |   d.Forward(input, output);
28 | }
29 | 
30 | BOOST_AUTO_TEST_CASE(BackwardPassTest)
31 | {
32 |   DenseLayer d(
33 |       5,    // Input height.
34 |       5,    // Input width.
35 |       3,    // Input depth.
36 |       10);  // Number of outputs.
37 | 
38 |   arma::cube input(5, 5, 3, arma::fill::randn);
39 |   arma::mat weights = d.getWeights();
40 |   arma::vec output;
41 | 
42 |   d.Forward(input, output);
43 | 
44 |   // Again, for now we loet the loss function be the sum of all output
45 |   // activations. Therefore, the upstream gradient is all ones.
46 |   arma::vec upstreamGradient = arma::ones(size(output));
47 | 
48 |   d.Backward(upstreamGradient);
49 | 
50 |   arma::cube gradWrtInput = d.getGradientWrtInput();
51 |   arma::mat gradWrtWeights = d.getGradientWrtWeights();
52 | 
53 |   arma::cube approxGradWrtInput = arma::zeros(size(input));
54 |   arma::mat approxGradWrtWeights = arma::zeros(size(weights));
55 | 
56 |   double disturbance = 0.5e-5;
57 |   for (size_t i=0; i<input.n_elem; i++)
58 |   {
59 |     input[i] += disturbance;
60 |     d.Forward(input, output);
61 |     double l1 = arma::accu(output);
62 |     input[i] -= 2.0*disturbance;
63 |     d.Forward(input, output);
64 |     double l2 = arma::accu(output);
65 |     approxGradWrtInput[i] = (l1-l2)/(2.0*disturbance);
66 |     input[i] += disturbance;
67 |   }
68 | 
69 |   BOOST_REQUIRE(arma::approx_equal(gradWrtInput,
70 |                                    approxGradWrtInput,
71 |                                    "absdiff",
72 |                                    disturbance));
73 | 
74 |   for (size_t i=0; i<weights.n_elem; i++)
75 |   {
76 |     weights[i] += disturbance;
77 |     d.setWeights(weights);
78 |     d.Forward(input, output);
79 |     double l1 = arma::accu(output);
80 |     weights[i] -= 2.0*disturbance;
81 |     d.setWeights(weights);
82 |     d.Forward(input, output);
83 |     double l2 = arma::accu(output);
84 |     approxGradWrtWeights[i] = (l1-l2)/(2.0*disturbance);
85 |     weights[i] += disturbance;
86 |     d.setWeights(weights);
87 |   }
88 | 
89 |   BOOST_REQUIRE(arma::approx_equal(gradWrtWeights,
90 |                                    approxGradWrtWeights,
91 |                                    "absdiff",
92 |                                    disturbance));
93 | }
94 | 


--------------------------------------------------------------------------------
/tests/integration_test.cpp:
--------------------------------------------------------------------------------
  1 | #define BOOST_TEST_MODULE IntegrationTests
  2 | #define BOOST_TEST_DYN_LINK
  3 | 
  4 | #include "../layers/convolution_layer.hpp"
  5 | #include "../layers/max_pooling_layer.hpp"
  6 | #include "../layers/relu_layer.hpp"
  7 | #include "../layers/dense_layer.hpp"
  8 | #include "../layers/softmax_layer.hpp"
  9 | #include "../layers/cross_entropy_loss_layer.hpp"
 10 | 
 11 | #include "../utils/mnist.hpp"
 12 | 
 13 | #include <iostream>
 14 | #include <vector>
 15 | #include <cassert>
 16 | #include <armadillo>
 17 | #include <boost/test/unit_test.hpp>
 18 | 
 19 | #define DEBUG true
 20 | #define DEBUG_PREFIX "[DEBUG INTEGRATION TEST ]\t"
 21 | 
 22 | BOOST_AUTO_TEST_CASE(SimpleNetworkTest)
 23 | {
 24 |   // Generate some dummy training data.
 25 |   std::vector<arma::cube> trainData;
 26 | 
 27 |   arma::cube trainExample1(5, 7, 1);
 28 |   arma::mat pos(5, 7, arma::fill::zeros);
 29 |   pos.col(1) = arma::ones(5);
 30 |   trainExample1.slice(0) = pos;
 31 |   trainData.push_back(trainExample1);
 32 | 
 33 |   arma::cube trainExample2(5, 7, 1);
 34 |   arma::mat neg(5, 7, arma::fill::randn);
 35 |   neg = arma::normalise(neg);
 36 |   trainExample2.slice(0) = neg;
 37 |   trainData.push_back(trainExample2);
 38 | 
 39 |   std::vector<arma::vec> trainLabels;
 40 | 
 41 |   arma::vec pos_ = {1, 0};
 42 |   arma::vec neg_ = {0, 1};
 43 |   trainLabels.push_back(pos_);
 44 |   trainLabels.push_back(neg_);
 45 | 
 46 |   // Define the network.
 47 |   ConvolutionLayer c(
 48 |       5,
 49 |       7,
 50 |       1,
 51 |       3,
 52 |       2,
 53 |       1,
 54 |       2,
 55 |       4);
 56 |   // Output dims: 2 x 6 x 4
 57 |   ReLULayer r(2, 6, 4);
 58 |   // Output dims: 2 x 6 x 4
 59 |   DenseLayer d(
 60 |       2,
 61 |       6,
 62 |       4,
 63 |       2);
 64 |   // Output is a vector of size 2
 65 |   SoftmaxLayer s(2);
 66 |   // Output is a vector of size 2
 67 |   CrossEntropyLossLayer l(2);
 68 | 
 69 |   arma::cube convOut;
 70 |   arma::cube reluOut;
 71 |   arma::vec denseOut;
 72 |   arma::vec softmaxOut;
 73 |   double loss;
 74 | 
 75 | 
 76 |   arma::vec gradWrtPredictedDistribution =
 77 |       l.getGradientWrtPredictedDistribution();
 78 |   arma::vec gradWrtSoftmaxInput;
 79 |   arma::cube gradWrtDenseInput;
 80 |   arma::cube gradWrtReluInput;
 81 |   arma::cube gradWrtConvInput;
 82 |   for (size_t epoch=0; epoch<10; epoch++)
 83 |   {
 84 |     // Forward pass the first example.
 85 |     c.Forward(trainData[0], convOut);
 86 |     r.Forward(convOut, reluOut);
 87 |     d.Forward(reluOut, denseOut);
 88 |     s.Forward(denseOut, softmaxOut);
 89 |     loss += l.Forward(softmaxOut, trainLabels[0]);
 90 | 
 91 |     // Backward pass through the first example.
 92 |     l.Backward();
 93 |     gradWrtPredictedDistribution = l.getGradientWrtPredictedDistribution();
 94 |     s.Backward(gradWrtPredictedDistribution);
 95 |     gradWrtSoftmaxInput = s.getGradientWrtInput();
 96 |     d.Backward(gradWrtSoftmaxInput);
 97 |     gradWrtDenseInput = d.getGradientWrtInput();
 98 |     r.Backward(gradWrtDenseInput);
 99 |     gradWrtReluInput = r.getGradientWrtInput();
100 |     c.Backward(gradWrtReluInput);
101 |     gradWrtConvInput = c.getGradientWrtInput();
102 | 
103 |     // Forward pass the second example.
104 |     c.Forward(trainData[1], convOut);
105 |     r.Forward(convOut, reluOut);
106 |     d.Forward(reluOut, denseOut);
107 |     s.Forward(denseOut, softmaxOut);
108 |     loss += l.Forward(softmaxOut, trainLabels[1]);
109 | 
110 |     // Backward pass through the second example.
111 |     l.Backward();
112 |     gradWrtPredictedDistribution = l.getGradientWrtPredictedDistribution();
113 |     s.Backward(gradWrtPredictedDistribution);
114 |     gradWrtSoftmaxInput = s.getGradientWrtInput();
115 |     d.Backward(gradWrtSoftmaxInput);
116 |     gradWrtDenseInput = d.getGradientWrtInput();
117 |     r.Backward(gradWrtDenseInput);
118 |     gradWrtReluInput = r.getGradientWrtInput();
119 |     c.Backward(gradWrtReluInput);
120 |     gradWrtConvInput = c.getGradientWrtInput();
121 | 
122 |     // Update weights.
123 |     d.UpdateWeightsAndBiases(2, 0.1);
124 |     c.UpdateFilterWeights(2, 0.1);
125 | 
126 | #if DEBUG
127 |     std::cout << DEBUG_PREFIX << "Epoch #" << epoch
128 |         << "\tCross Entropy Loss: " << loss << std::endl;
129 | #endif
130 |     loss = 0.0;
131 |   }
132 | #if DEBUG
133 |   // Let us have a look at the peridctions
134 |   c.Forward(trainData[0], convOut);
135 |   r.Forward(convOut, reluOut);
136 |   d.Forward(reluOut, denseOut);
137 |   s.Forward(denseOut, softmaxOut);
138 |   std::cout << DEBUG_PREFIX << softmaxOut.t();
139 |   c.Forward(trainData[1], convOut);
140 |   r.Forward(convOut, reluOut);
141 |   d.Forward(reluOut, denseOut);
142 |   s.Forward(denseOut, softmaxOut);
143 |   std::cout << DEBUG_PREFIX << softmaxOut.t();
144 | #endif
145 | 
146 | }
147 | 
148 | BOOST_AUTO_TEST_CASE(SmallANDNetwork)
149 | {
150 |   std::vector<arma::cube> trainData(4, arma::cube(2, 1, 1, arma::fill::zeros));
151 |   trainData[1].slice(0).col(0) = arma::vec({1, 0});
152 |   trainData[2].slice(0).col(0) = arma::vec({0, 1});
153 |   trainData[3].slice(0).col(0) = arma::vec({1, 1});
154 | 
155 |   std::vector<arma::vec> trainLabels(4);
156 |   trainLabels[0] = {1, 0};
157 |   trainLabels[1] = {1, 0};
158 |   trainLabels[2] = {1, 0};
159 |   trainLabels[3] = {0, 1};
160 | 
161 |   DenseLayer d(2, 1, 1, 2);
162 |   SoftmaxLayer s(2);
163 |   CrossEntropyLossLayer l(2);
164 | 
165 |   arma::vec dOut = arma::zeros(2);
166 |   arma::vec sOut = arma::zeros(2);
167 |   double loss = 0.0;
168 | 
169 |   for (size_t epoch = 0; epoch < 1000; epoch ++)
170 |   {
171 |     loss = 0.0;
172 |     for (size_t i=0; i<4; i++)
173 |     {
174 |       d.Forward(trainData[i], dOut);
175 |       s.Forward(dOut, sOut);
176 |       loss += l.Forward(sOut, trainLabels[i]);
177 | 
178 |       std::cout << DEBUG_PREFIX << std::endl;
179 |       std::cout << DEBUG_PREFIX << "Input: " << trainData[i].slice(0).col(0).t();
180 |       std::cout << DEBUG_PREFIX << "Target: " << trainLabels[i].t();
181 |       std::cout << DEBUG_PREFIX << "Predicted: " << sOut.t();
182 | 
183 |       l.Backward();
184 |       arma::vec gradWrtPredictedDistribution = l.getGradientWrtPredictedDistribution();
185 |       s.Backward(gradWrtPredictedDistribution);
186 |       arma::vec gradWrtSIn = s.getGradientWrtInput();
187 |       d.Backward(gradWrtSIn);
188 |       arma::vec gradWrtDin = d.getGradientWrtInput();
189 |       arma::mat gradWrtWeights = d.getGradientWrtWeights();
190 | 
191 |       std::cout << DEBUG_PREFIX << "Gradient wrt weights:" << std::endl;
192 |       std::cout << gradWrtWeights << std::endl;
193 |     }
194 |     std::cout << DEBUG_PREFIX << "Weights before update:" << std::endl;
195 |     std::cout << d.getWeights() << std::endl;
196 |     std::cout << DEBUG_PREFIX << "Biases before update:" << std::endl;
197 |     std::cout << d.getBiases() << std::endl;
198 |     d.UpdateWeightsAndBiases(4, 0.1);
199 |     std::cout << DEBUG_PREFIX << "Weights after update:" << std::endl;
200 |     std::cout << d.getWeights() << std::endl;
201 |     std::cout << DEBUG_PREFIX << "Biases after update:" << std::endl;
202 |     std::cout << d.getBiases() << std::endl;
203 |     std::cout << DEBUG_PREFIX << "Loss after epoch #" << epoch << ": " << loss << std::endl;
204 |   }
205 |   // Now we check the predictions
206 |   for (size_t i=0; i<4; i++)
207 |   {
208 |     d.Forward(trainData[i], dOut);
209 |     s.Forward(dOut, sOut);
210 | 
211 |     std::cout << DEBUG_PREFIX << std::endl;
212 |     std::cout << DEBUG_PREFIX << "Input: " << arma::vectorise(trainData[i]).t();
213 |     std::cout << DEBUG_PREFIX << "Prediction: " << sOut.t();
214 |     std::cout << DEBUG_PREFIX << std::endl;
215 |   }
216 | }
217 | 
218 | BOOST_AUTO_TEST_CASE(MNISTSmallDenseNetworkTest)
219 | {
220 |   MNISTData md("../data_small");
221 | 
222 |   std::vector<arma::cube> trainData = md.getTrainData();
223 |   std::vector<arma::vec> trainLabels = md.getTrainLabels();
224 | 
225 |   std::vector<arma::cube> validationData = md.getValidationData();
226 |   std::vector<arma::vec> validationLabels = md.getValidationLabels();
227 | 
228 |   const size_t TRAINING_DATA_SIZE = trainData.size();
229 |   const size_t VALIDATION_DATA_SIZE = validationData.size();
230 | 
231 |   std::cout << "Training Data size: " << TRAINING_DATA_SIZE << std::endl;
232 |   std::cout << "Validation Data size: " << VALIDATION_DATA_SIZE << std::endl;
233 | 
234 |   DenseLayer d(28, 28, 1, 10);
235 |   SoftmaxLayer s(10);
236 |   CrossEntropyLossLayer l(10);
237 | 
238 |   arma::vec dOut = arma::zeros(10);
239 |   arma::vec sOut = arma::zeros(10);
240 | 
241 |   arma::mat oldWts = arma::zeros(10, 28*28*1);
242 |   arma::mat newWts = arma::zeros(10, 28*28*1);
243 | 
244 |   arma::vec oldDOut = arma::zeros(10);
245 |   arma::vec newDOut = arma::zeros(10);
246 | 
247 |   arma::vec oldSOut = arma::zeros(10);
248 |   arma::vec newSOut = arma::zeros(10);
249 | 
250 |   // Forward pass the first training example.
251 |   for (size_t epoch = 0; epoch < 100; epoch++)
252 |   {
253 |     oldDOut = dOut;
254 |     d.Forward(trainData[0], dOut);
255 |     newDOut = dOut;
256 |     BOOST_REQUIRE(!arma::approx_equal(oldDOut, newDOut, "absdiff", 0.0));
257 | 
258 |     oldSOut = sOut;
259 |     s.Forward(dOut, sOut);
260 |     newSOut = sOut;
261 |     BOOST_REQUIRE(!arma::approx_equal(oldSOut, newSOut, "absdiff", 0.0));
262 |     std::cout << DEBUG_PREFIX << "Old softmax output:" << std::endl;
263 |     std::cout << oldSOut << std::endl;
264 |     std::cout << DEBUG_PREFIX << "New softmax output:" << std::endl;
265 |     std::cout << newSOut << std::endl;
266 | 
267 |     double loss = l.Forward(sOut, trainLabels[0]);
268 | 
269 |     // std::cout << DEBUG_PREFIX << "Input to dense layer:" << std::endl;
270 |     // std::cout << trainData[0] << std::endl;
271 | 
272 |     // std::cout << DEBUG_PREFIX << "Weights of dense layer:" << std::endl;
273 |     // std::cout << d.getWeights() << std::endl;
274 | 
275 |     // std::cout << DEBUG_PREFIX << "Output of dense layer:" << std::endl;
276 |     // std::cout << sOut << std::endl;
277 | 
278 |     std::cout << DEBUG_PREFIX << "Loss: " << loss << std::endl;
279 | 
280 |     l.Backward();
281 |     arma::vec gradWrtPredictedDistribution = l.getGradientWrtPredictedDistribution();
282 | 
283 |     // std::cout << DEBUG_PREFIX << "Gradient wrt predicted distribution:" << std::endl;
284 |     // std::cout << gradWrtPredictedDistribution << std::endl;
285 | 
286 |     s.Backward(gradWrtPredictedDistribution);
287 |     arma::vec gradWrtSIn = s.getGradientWrtInput();
288 | 
289 |     // std::cout << DEBUG_PREFIX << "Gradient wrt softmax input:"  << std::endl;
290 |     // std::cout << gradWrtSIn << std::endl;
291 | 
292 |     d.Backward(gradWrtSIn);
293 |     arma::mat gradWrtWts = d.getGradientWrtWeights();
294 | 
295 |     // std::cout << DEBUG_PREFIX << "Gradient wrt dense weights:" << std::endl;
296 |     // std::cout << gradWrtWts << std::endl;
297 | 
298 |     oldWts = d.getWeights();
299 |     d.UpdateWeightsAndBiases(1, 0.1);
300 |     newWts = d.getWeights();
301 |     BOOST_REQUIRE(!arma::approx_equal(oldWts, newWts, "absdiff", 0.0));
302 |   }
303 | 
304 |   std::cout << DEBUG_PREFIX << std::endl;
305 |   d.Forward(trainData[0], dOut);
306 |   s.Forward(dOut, sOut);
307 |   std::cout << DEBUG_PREFIX << "Actual output: " << trainLabels[0].t();
308 |   std::cout << DEBUG_PREFIX << "Predicted output: " << sOut.t();
309 | }
310 | 
311 | BOOST_AUTO_TEST_CASE(NowWereGettingSomewhereTest)
312 | {
313 |   MNISTData md("../data_medium");
314 | 
315 |   std::vector<arma::cube> trainData = md.getTrainData();
316 |   std::vector<arma::vec> trainLabels = md.getTrainLabels();
317 | 
318 |   std::vector<arma::cube> validationData = md.getValidationData();
319 |   std::vector<arma::vec> validationLabels = md.getValidationLabels();
320 | 
321 |   std::cout << DEBUG_PREFIX << "Size of training set: " << trainData.size() << std::endl;
322 |   BOOST_REQUIRE_EQUAL(trainData.size(), trainLabels.size());
323 |   std::cout << DEBUG_PREFIX << "Size of validation set: " << validationData.size() << std::endl;
324 |   BOOST_REQUIRE_EQUAL(validationData.size(), validationLabels.size());
325 | 
326 |   // Define the network
327 |   // conv - relu - maxpool - dense - softmax - loss
328 | 
329 |   ConvolutionLayer c(
330 |       28,
331 |       28,
332 |       1,
333 |       7,
334 |       7,
335 |       1,
336 |       1,
337 |       3);
338 |   // Output is 22 x 22 x 3
339 |   ReLULayer r(
340 |       22,
341 |       22,
342 |       3);
343 |   // Output is 22 x 22 x 3
344 |   MaxPoolingLayer m(
345 |       22,
346 |       22,
347 |       3,
348 |       2,
349 |       2,
350 |       2,
351 |       2);
352 |   // Output is 11 x 11 x 3
353 |   DenseLayer d(
354 |       11,
355 |       11,
356 |       3,
357 |       10);
358 |   // Output is a vector of size 10
359 |   SoftmaxLayer s(10);
360 |   // Output is a vector of size 10
361 |   CrossEntropyLossLayer l(10);
362 | 
363 |   arma::cube cOut = arma::zeros(22, 22, 3);
364 |   arma::cube rOut = arma::zeros(22, 22, 3);
365 |   arma::cube mOut = arma::zeros(11, 11, 3);
366 |   arma::vec dOut = arma::zeros(10);
367 |   arma::vec sOut = arma::zeros(10);
368 |   double loss = 0.0;
369 |   // We'll use stochastic gradient descent
370 |   for (size_t epoch = 0; epoch < 10; epoch++)
371 |   {
372 |     double averageLoss = 0.0;
373 |     for(size_t i=0; i<trainData.size(); i++)
374 |     {
375 |       c.Forward(trainData[i], cOut);
376 |       r.Forward(cOut, rOut);
377 |       m.Forward(rOut, mOut);
378 |       d.Forward(mOut, dOut);
379 |       s.Forward(dOut, sOut);
380 | 
381 |       loss = l.Forward(sOut, trainLabels[i]);
382 |       averageLoss += loss;
383 | 
384 |       l.Backward();
385 |       arma::vec gradWrtPredictedDistribution = l.getGradientWrtPredictedDistribution();
386 |       s.Backward(gradWrtPredictedDistribution);
387 |       arma::vec gradWrtSIn = s.getGradientWrtInput();
388 |       d.Backward(gradWrtSIn);
389 |       arma::cube gradWrtDIn = d.getGradientWrtInput();
390 |       m.Backward(gradWrtDIn);
391 |       arma::cube gradWrtMIn = m.getGradientWrtInput();
392 |       r.Backward(gradWrtMIn);
393 |       arma::cube gradWrtRIn = r.getGradientWrtInput();
394 |       c.Backward(gradWrtRIn);
395 |       arma::cube gradWrtCIn = c.getGradientWrtInput();
396 | 
397 |       d.UpdateWeightsAndBiases(1, 0.1);
398 |       c.UpdateFilterWeights(1, 0.1);
399 |     }
400 |     averageLoss /= trainData.size();
401 |     std::cout << DEBUG_PREFIX << "Average loss: " << averageLoss << std::endl;
402 |     // Compute the validation accuracy
403 |     double correct = 0.0;
404 |     for (size_t i=0; i<validationData.size(); i++)
405 |     {
406 |       c.Forward(validationData[i], cOut);
407 |       r.Forward(cOut, rOut);
408 |       m.Forward(rOut, mOut);
409 |       d.Forward(mOut, dOut);
410 |       s.Forward(dOut, sOut);
411 | 
412 |       if (sOut.index_max() == validationLabels[i].index_max())
413 |         correct += 1.0;
414 |     }
415 |     std::cout << DEBUG_PREFIX << "Validation Accuracy: " << correct/validationData.size() << std::endl;
416 |     std::cout << DEBUG_PREFIX << std::endl;
417 |   }
418 | }
419 | 
420 | #undef DEBUG
421 | #undef DEBUG_PREFIX
422 | 


--------------------------------------------------------------------------------
/tests/max_pooling_layer_test.cpp:
--------------------------------------------------------------------------------
 1 | #define BOOST_TEST_MODULE MaxPoolingLayerTests
 2 | #define BOOST_TEST_DYN_LINK
 3 | 
 4 | #include <boost/test/unit_test.hpp>
 5 | #include "../layers/max_pooling_layer.hpp"
 6 | 
 7 | BOOST_AUTO_TEST_CASE(ConstructorTest)
 8 | {
 9 |   MaxPoolingLayer mp(
10 |       7,  // Input height.
11 |       5,  // Input width.
12 |       4,  // Input depth.
13 |       5,  // Pooling window height.
14 |       3,  // Pooling window width.
15 |       2,  // Vertical stride.
16 |       2   // Horizontal stride.
17 |       );
18 | }
19 | 
20 | BOOST_AUTO_TEST_CASE(ForwardPassTest)
21 | {
22 |   MaxPoolingLayer mp(
23 |       7,  // Input height.
24 |       5,  // Input width.
25 |       4,  // Input depth.
26 |       5,  // Pooling window height.
27 |       3,  // Pooling window width.
28 |       2,  // Vertical stride.
29 |       2   // Horizontal stride.
30 |       );
31 | 
32 |   arma::cube input(7, 5, 4, arma::fill::randn);
33 |   arma::cube output;
34 | 
35 |   mp.Forward(input, output);
36 | }
37 | 
38 | 
39 | BOOST_AUTO_TEST_CASE(BackwardPassTest)
40 | {
41 |   MaxPoolingLayer mp(
42 |       7,  // Input height.
43 |       5,  // Input width.
44 |       4,  // Input depth.
45 |       5,  // Pooling window height.
46 |       3,  // Pooling window width.
47 |       2,  // Vertical stride.
48 |       2   // Horizontal stride.
49 |       );
50 | 
51 |   arma::cube input(7, 5, 4, arma::fill::randn);
52 |   arma::cube output;
53 | 
54 |   mp.Forward(input, output);
55 | 
56 |   // Again, for now we loet the loss function be the sum of all output
57 |   // activations. Therefore, the upstream gradient is all ones.
58 |   arma::cube upstreamGradient = arma::ones(size(output));
59 | 
60 |   mp.Backward(upstreamGradient);
61 | 
62 |   arma::cube gradientWrtInput = mp.getGradientWrtInput();
63 | 
64 |   arma::cube approxGradientWrtInput = arma::zeros(arma::size(input));
65 | 
66 |   double disturbance = 0.5e-5;
67 |   for (size_t i=0; i<input.n_elem; i++)
68 |   {
69 |     input[i] += disturbance;
70 |     mp.Forward(input, output);
71 |     double l1 = arma::accu(output);
72 |     input[i] -= 2.0*disturbance;
73 |     mp.Forward(input, output);
74 |     double l2 = arma::accu(output);
75 |     approxGradientWrtInput[i] = (l1-l2)/(2.0*disturbance);
76 |     input[i] += disturbance;
77 |   }
78 | 
79 |   BOOST_REQUIRE(arma::approx_equal(gradientWrtInput,
80 |                                    approxGradientWrtInput,
81 |                                    "absdiff",
82 |                                    disturbance));
83 | }
84 | 


--------------------------------------------------------------------------------
/tests/mnist_test.cpp:
--------------------------------------------------------------------------------
 1 | #define BOOST_TEST_MODULE MNISTUtilTests
 2 | #define BOOST_TEST_DYN_LINK
 3 | 
 4 | #include "../utils/mnist.hpp"
 5 | #include <boost/test/unit_test.hpp>
 6 | 
 7 | BOOST_AUTO_TEST_CASE(ConstructorTest)
 8 | {
 9 |   MNISTData md("../data", 0.5);
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/tests/relu_layer_test.cpp:
--------------------------------------------------------------------------------
 1 | #define BOOST_TEST_MODULE ReLULayerTests
 2 | #define BOOST_TEST_DYN_LINK
 3 | 
 4 | #include <boost/test/unit_test.hpp>
 5 | #include "../layers/relu_layer.hpp"
 6 | 
 7 | BOOST_AUTO_TEST_CASE(ContructorTest)
 8 | {
 9 |   ReLULayer r(5, 7, 3);
10 | }
11 | 
12 | BOOST_AUTO_TEST_CASE(ForwardPassTest)
13 | {
14 |   ReLULayer r(5, 7, 3);
15 |   arma::cube input(5, 7, 3, arma::fill::randn);
16 |   arma::cube output;
17 | 
18 |   r.Forward(input, output);
19 |   BOOST_REQUIRE(arma::size(input) == arma::size(output));
20 | }
21 | 
22 | BOOST_AUTO_TEST_CASE(BackwardPassTest)
23 | {
24 |   ReLULayer r(5, 7, 3);
25 |   arma::cube input(5, 7, 3, arma::fill::randn);
26 |   arma::cube output;
27 | 
28 |   r.Forward(input, output);
29 |   r.Backward(arma::ones(arma::size(output)));
30 | 
31 |   arma::cube gradientWrtInput = r.getGradientWrtInput();
32 | 
33 |   arma::cube approxGradientWrtInput = arma::zeros(arma::size(input));
34 | 
35 |   double disturbance = 0.5e-5;
36 |   for (size_t i=0; i<input.n_elem; i++)
37 |   {
38 |     input[i] += disturbance;
39 |     r.Forward(input, output);
40 |     double l1 = arma::accu(output);
41 |     input[i] -= 2.0*disturbance;
42 |     r.Forward(input, output);
43 |     double l2 = arma::accu(output);
44 |     approxGradientWrtInput[i] = (l1-l2)/(2.0*disturbance);
45 |     input[i] += disturbance;
46 |   }
47 |   BOOST_REQUIRE(arma::approx_equal(gradientWrtInput,
48 |                                    approxGradientWrtInput,
49 |                                    "absdiff",
50 |                                    disturbance));
51 | }
52 | 


--------------------------------------------------------------------------------
/tests/softmax_layer_test.cpp:
--------------------------------------------------------------------------------
 1 | #define BOOST_TEST_MODULE SoftmaxLayerTests
 2 | #define BOOST_TEST_DYN_LINK
 3 | 
 4 | #include <boost/test/unit_test.hpp>
 5 | #include "../layers/softmax_layer.hpp"
 6 | 
 7 | BOOST_AUTO_TEST_CASE(ForwardPassTest)
 8 | {
 9 |   SoftmaxLayer s(3);
10 |   arma::vec input(3, arma::fill::randn);
11 |   arma::vec output;
12 | 
13 |   s.Forward(input, output);
14 | }
15 | 
16 | BOOST_AUTO_TEST_CASE(BackwardPassTest)
17 | {
18 |   SoftmaxLayer s(3);
19 |   arma::vec input(3, arma::fill::randn);
20 |   arma::vec output;
21 | 
22 |   s.Forward(input, output);
23 | 
24 |   arma::vec upstreamGradient = arma::ones(3);
25 |   s.Backward(upstreamGradient);
26 | 
27 |   arma::vec gradWrtInput = s.getGradientWrtInput();
28 | 
29 |   arma::vec approxGradWrtInput = arma::zeros(3);
30 | 
31 |   double disturbance = 0.5e-5;
32 |   for (size_t i=0; i<input.n_elem; i++)
33 |   {
34 |     input[i] += disturbance;
35 |     s.Forward(input, output);
36 |     double l1 = arma::accu(output);
37 |     input[i] -= 2.0*disturbance;
38 |     s.Forward(input, output);
39 |     double l2 = arma::accu(output);
40 |     approxGradWrtInput[i] = (l1-l2)/(2.0*disturbance);
41 |     input[i] += disturbance;
42 |   }
43 |   BOOST_REQUIRE(arma::approx_equal(gradWrtInput,
44 |                                    approxGradWrtInput,
45 |                                    "absdiff",
46 |                                    disturbance));
47 | }
48 | 


--------------------------------------------------------------------------------
/utils/mnist.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef MNIST_HPP
 2 | #define MNIST_HPP
 3 | 
 4 | #include <armadillo>
 5 | #include <string>
 6 | #include <vector>
 7 | #include <cassert>
 8 | 
 9 | class MNISTData
10 | {
11 |  public:
12 |   MNISTData(std::string dataDir, double splitRatio = 0.9)
13 |   {
14 |     assert(splitRatio <= 1 && splitRatio >= 0);
15 |     this->dataDir = dataDir;
16 |     trainFile = dataDir + "/train.csv";
17 |     testFile = dataDir + "/test.csv";
18 | 
19 |     arma::mat trainDataRaw;
20 | 
21 |     trainDataRaw.load(trainFile, arma::csv_ascii);
22 |     trainDataRaw = trainDataRaw.submat(1, 0, trainDataRaw.n_rows - 1, trainDataRaw.n_cols - 1);
23 | 
24 |     int numExamples = trainDataRaw.n_rows;
25 | 
26 |     std::vector<arma::cube> trainDataAll;
27 |     std::vector<arma::vec> trainLabelsAll;
28 |     for (size_t idx=0; idx<trainDataRaw.n_rows; idx++)
29 |     {
30 |       int label = (int)(trainDataRaw.row(idx)(0));
31 |       arma::cube img(28, 28, 1, arma::fill::zeros);
32 |       for (size_t r=0; r<28; r++)
33 |         img.slice(0).row(r) = trainDataRaw.row(idx).subvec(28*r+1, 28*r+28);
34 |       img.slice(0) = arma::normalise(img.slice(0));
35 |       trainDataAll.push_back(img);
36 |       arma::vec labelvec(10, arma::fill::zeros);
37 |       labelvec(label) += 1.0;
38 |       trainLabelsAll.push_back(labelvec);
39 |     }
40 | 
41 |     // Split trainDataAll and trainLabelsAll into train and validation parts.
42 |     trainData = std::vector<arma::cube>(trainDataAll.begin(),
43 |                                         trainDataAll.begin() + numExamples*splitRatio);
44 |     trainLabels = std::vector<arma::vec>(trainLabelsAll.begin(),
45 |                                          trainLabelsAll.begin() + numExamples*splitRatio);
46 | 
47 |     validationData = std::vector<arma::cube>(trainDataAll.begin() + numExamples*splitRatio,
48 |                                              trainDataAll.end());
49 |     validationLabels = std::vector<arma::vec>(trainLabelsAll.begin() + numExamples*splitRatio,
50 |                                               trainLabelsAll.end());
51 | 
52 |     arma::mat testDataRaw;
53 |     testDataRaw.load(testFile, arma::csv_ascii);
54 |     testDataRaw = testDataRaw.submat(1, 0, testDataRaw.n_rows - 1, testDataRaw.n_cols - 1);
55 |     for (size_t idx=0; idx<testDataRaw.n_rows; idx++)
56 |     {
57 |       arma::cube img(28, 28, 1, arma::fill::zeros);
58 |       for (size_t r=0; r<28; r++)
59 |         img.slice(0).row(r) = testDataRaw.row(idx).subvec(28*r, 28*r+27);
60 |       img.slice(0) /= 255.0;
61 |       testData.push_back(img);
62 |     }
63 |   }
64 | 
65 |   std::vector<arma::cube> getTrainData() { return trainData; }
66 | 
67 |   std::vector<arma::cube> getValidationData() { return validationData; }
68 | 
69 |   std::vector<arma::cube> getTestData() { return testData; }
70 | 
71 |   std::vector<arma::vec> getTrainLabels() { return trainLabels; }
72 | 
73 |   std::vector<arma::vec> getValidationLabels() { return validationLabels; }
74 | 
75 |  private:
76 |   std::string dataDir;
77 |   std::string trainFile;
78 |   std::string testFile;
79 | 
80 |   std::vector<arma::cube> trainData;
81 |   std::vector<arma::cube> validationData;
82 |   std::vector<arma::cube> testData;
83 | 
84 |   std::vector<arma::vec> trainLabels;
85 |   std::vector<arma::vec> validationLabels;
86 | };
87 | 
88 | #endif
89 | 


--------------------------------------------------------------------------------