├── .gitignore ├── CMakeLists.txt ├── DONT_README.md ├── README.md ├── best-results └── results_epoch_8.csv ├── layers ├── convolution_layer.hpp ├── cross_entropy_loss_layer.hpp ├── dense_layer.hpp ├── max_pooling_layer.hpp ├── relu_layer.hpp └── softmax_layer.hpp ├── le_net.cpp ├── tests ├── convolution_layer_test.cpp ├── cross_entropy_loss_layer_test.cpp ├── dense_layer_test.cpp ├── integration_test.cpp ├── max_pooling_layer_test.cpp ├── mnist_test.cpp ├── relu_layer_test.cpp └── softmax_layer_test.cpp └── utils └── mnist.hpp /.gitignore: -------------------------------------------------------------------------------- 1 | *out 2 | build 3 | data 4 | data_small 5 | data_medium 6 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | 3 | project(cpp-cnn) 4 | 5 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 6 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 7 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 8 | 9 | find_package(Boost 1.40 COMPONENTS unit_test_framework REQUIRED) 10 | find_package(Armadillo 6.5 REQUIRED) 11 | 12 | include_directories(${Boost_INCLUDE_DIR}) 13 | include_directories(${ARMADILLO_INCLUDE_DIRS}) 14 | 15 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 16 | 17 | add_executable( 18 | convolution_layer_test 19 | tests/convolution_layer_test.cpp 20 | ) 21 | 22 | target_link_libraries(convolution_layer_test LINK_PUBLIC ${Boost_LIBRARIES}) 23 | target_link_libraries(convolution_layer_test LINK_PUBLIC ${ARMADILLO_LIBRARIES}) 24 | 25 | add_executable( 26 | dense_layer_test 27 | tests/dense_layer_test.cpp 28 | ) 29 | 30 | target_link_libraries(dense_layer_test LINK_PUBLIC ${Boost_LIBRARIES}) 31 | target_link_libraries(dense_layer_test LINK_PUBLIC ${ARMADILLO_LIBRARIES}) 32 | 33 | add_executable( 34 | max_pooling_layer_test 35 | tests/max_pooling_layer_test.cpp 36 | ) 37 | 38 | target_link_libraries(max_pooling_layer_test LINK_PUBLIC ${Boost_LIBRARIES}) 39 | target_link_libraries(max_pooling_layer_test LINK_PUBLIC ${ARMADILLO_LIBRARIES}) 40 | 41 | add_executable( 42 | softmax_layer_test 43 | tests/softmax_layer_test.cpp 44 | ) 45 | 46 | target_link_libraries(softmax_layer_test LINK_PUBLIC ${Boost_LIBRARIES}) 47 | target_link_libraries(softmax_layer_test LINK_PUBLIC ${ARMADILLO_LIBRARIES}) 48 | 49 | add_executable( 50 | relu_layer_test 51 | tests/relu_layer_test.cpp 52 | ) 53 | 54 | target_link_libraries(relu_layer_test LINK_PUBLIC ${Boost_LIBRARIES}) 55 | target_link_libraries(relu_layer_test LINK_PUBLIC ${ARMADILLO_LIBRARIES}) 56 | 57 | add_executable( 58 | cross_entropy_loss_layer_test 59 | tests/cross_entropy_loss_layer_test.cpp 60 | ) 61 | 62 | target_link_libraries(cross_entropy_loss_layer_test LINK_PUBLIC ${Boost_LIBRARIES}) 63 | target_link_libraries(cross_entropy_loss_layer_test LINK_PUBLIC ${ARMADILLO_LIBRARIES}) 64 | 65 | add_executable( 66 | integration_test 67 | tests/integration_test.cpp 68 | ) 69 | 70 | target_link_libraries(integration_test LINK_PUBLIC ${Boost_LIBRARIES}) 71 | target_link_libraries(integration_test LINK_PUBLIC ${ARMADILLO_LIBRARIES}) 72 | 73 | add_executable( 74 | mnist_util_test 75 | tests/mnist_test.cpp 76 | ) 77 | 78 | 79 | target_link_libraries(mnist_util_test LINK_PUBLIC ${Boost_LIBRARIES}) 80 | target_link_libraries(mnist_util_test LINK_PUBLIC ${ARMADILLO_LIBRARIES}) 81 | 82 | add_executable( 83 | le_net 84 | le_net.cpp 85 | ) 86 | 87 | 88 | target_link_libraries(le_net LINK_PUBLIC ${Boost_LIBRARIES}) 89 | target_link_libraries(le_net LINK_PUBLIC ${ARMADILLO_LIBRARIES}) 90 | 91 | file(COPY ${CMAKE_SOURCE_DIR}/data DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) 92 | # file(COPY ${CMAKE_SOURCE_DIR}/data_medium DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) 93 | # file(COPY ${CMAKE_SOURCE_DIR}/data_small DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) 94 | 95 | -------------------------------------------------------------------------------- /DONT_README.md: -------------------------------------------------------------------------------- 1 | I'm at work and there's is nothing to do. So I'm going to write a CNN.. in C++ .. from scratch.. 2 | 3 | .. becuase that's what people do when they're bored out of their minds 4 | 5 | So, my initial thoughts are: 6 | 7 | 1. I need some linear algebra library to make the vector stuff fast. I've worked with Eigen in the past, and mlpack uses Armadillo so I'm familiar with that too. I don't want to go through the hassle of installing Eigen (which may not be too much of a hassle, but still..) so I'm going to use Armadillo. 8 | 9 | 2. I'll need a class for the Convolution Layer, a class for the Pooling layer and a class for the dense layer at the end 10 | 11 | 3. Like all good boys who write CNNs, I think it would be best to test this on the MNIST dataset -- so we'll need code to parse that too. Since the data is stored in binary form, this might be unpleasant.. Oh well, I'll handle it when I get to it. 12 | 13 | Let's begin right in the middle of everything - with the Convolution Layer. 14 | 15 | So we've hit the ground running with the ConvolutionalLayer constructor -- It initializes stuff like dimensions of input volume, number of filters, stride and filter dimensions. It also initializes the filter weights. I've decided to use a truncated normal initialization (Ie. random values sampled from a Gaussian distribution having mean 0 and variance 1. Values more than two standard deviations away from the mean are rejected). 16 | 17 | Cool cool .. the weight initializations look correct. Time for a git commit and then we move on to the forward pass through the conv layer. 18 | 19 | I'm going to go for the simplest kind of convolution implementation there is -- no padding, no FFT. 20 | 21 | I've realized that filters (in general) have a depth dimension as well, and I've defined filters as 2D matrices.. Need to fix that. 22 | 23 | Done fixing the filter dimensions. Commit and move on to the forward pass. 24 | 25 | Update: I went for a snack break. My boss told me to do a few things -- which I mostly ignored, because he's an idiot. And I just finished the forward pass implementation. It feels weird working on this in the office, so I'm going to head home and continue from there. I plan to test the forward pass implementation first and then try and figure out how the backward pass is going to go. 26 | 27 | A few more notes on the rest of the implementation: 28 | 1. The optimizer -- for now, I'll just use vanilla mini-batch SGD to train. Maybe later I'll switch it up to Adam or RMSprop. 29 | 30 | 2. I realize that I'll need to add layers for ReLU activation in the hidden units, and a softmax layer at the very end of the network. 31 | 32 | 3. I'll probably add a class like "class LeNet" that contains the entire CNN architecture. I dont really plan on reusing any of the layers, so its fine if they're a bit dirty. 33 | 34 | 3 minutes till my cab arrives .. better head down. 35 | 36 | Okay, I didn't really do much at home yesterday, and today I was a bit busy doing pointless things at work. I found some time now to work on this, and have completed the backward pass. Actually, I'd been thinking about the math of the backward pass through the conv layer today.. and I was quite surprised at how easily it worked out to nice expressions. Hopefully I'll get some time to write a blog post about it -- I think I actually found a nice method to it as well. Anyway, I'd scribbled down 37 | most of it in my little notebook at work and the implementation wasn't too hard. 38 | 39 | I've still got the forward pass testing in the backlog. Now, I can add the backward pass testing as well. I've added a function stub for gradient check as well in the CovolutionLayer class. I think I'll make 100% sure that theres nothing wring with my Conv layer before proceeding with the other components (dense layer, max pooling, relu and sigmoid). Hopefully tomorrow I'll get time for testing and then finish the CNN over the weekend. 40 | 41 | Oh yeah, a minor note -- I'd not differentiated between the strides in the vertical and horizontal directions. Updated this. 42 | 43 | I've been giving a bit of thought to writing proper tests -- test driven development and all that. I've decided to go with the boost.Test framework (which also happens to be used by mlpack). 44 | 45 | It WORKS!!! Both the forward and backward pass seem to be working fine on basic tests. I've even written a gradient check and both the analytic and numeric gradients agree. I didn't expect things to go so smoothly, I was completely prepared to shed tears -- but hey, looks like I'm smart after all. 46 | 47 | I've added a larger test for the backward pass - and used different prime values for input dimensions and filter dimensions. This was a good test to add.. it pointed out a bug in the way gradients were being propagated when the stride was > 1. 48 | 49 | With this, the convolution layer looks pretty much done to me - atleast for now. I can proceed with the dense layer now. I think I'll refactor the project into more files. 50 | 51 | Oh wait, there need to be more tests for checking if the gradients are being accumulated and if the batch update is happening correctly. I'll add those after some part of the dense layer implementation. 52 | 53 | Yeah.. I went to sleep after I got the conv layer working. Today is saturday, so I've been at this for a while -- and here's the update. I've completed the dense layer implementation and written tests for all of it. It works really nicely. I've also been giving some thought to how I'm going to parse the MNIST data -- I think I'll just use mlpack's data::Load functionality to load the data into armadillo matrices and then let my CNN model take it from there. I really dont want to be writing 54 | code to parse a binary file into an armadillo matrix by myself. Also, I think in the gradient check I did for the conv layer backward pass, I checked only the gradient wrt input -- and completely forgot about the gradient wrt filters. I'm going to add that now. 55 | 56 | Next on the TODO list would be the implementation of softmax layer, max pooling layer and relu layer (in that order). 57 | 58 | Added gradient wrt filters check in the conv layer backward pass test. As expected, gradients are correct. I've also remembered that I need to add the update weights function in the dense layer, and write tests to check the updates. 59 | 60 | Added softmax layer and tests for forward and backward passes. 61 | 62 | Okay, so yesterday was SUnday and I didn't do shit. I was at work all day today and after that I was waiting for the Google Summer of Code results to be announced (too excited to get any work done). Anyway, the results were a big let down - not that I was expecting a selection.. still, rejection hurts. I'm back at it with the MaxPooling layer implementation. The backward pass proved to be trickier than expected but I think I've got it right -- I'm going to write the tests for it now, and then 63 | we'll know for sure. 64 | 65 | Yep. It works. I guess only RelU remains now. 66 | 67 | ReLU Done! All the components are done. I think I'll write some more tests to try out small networks before I create LeNet. 68 | 69 | Actually, one component is still remains -- the loss layer. Imma have to implement cross entropy loss first 70 | 71 | Implemented cross entropy loss along with tests. Now we can move to the integration tests. 72 | 73 | I went to sleep last night.. I've decided to skip office today because I'm still a little bummed at the GSoC rejection. Anyway, the silver lining is that all my components seem to be working - I just wrote a simple network and ran it on two training examples for 10 epochs and the loss decreases beautifully: 74 | ``` 75 | [DEBUG INTEGRATION TEST ] Epoch #0 Cross Entropy Loss: 0.482423 76 | [DEBUG INTEGRATION TEST ] Epoch #1 Cross Entropy Loss: 0.121352 77 | [DEBUG INTEGRATION TEST ] Epoch #2 Cross Entropy Loss: 0.0802145 78 | [DEBUG INTEGRATION TEST ] Epoch #3 Cross Entropy Loss: 0.0604326 79 | [DEBUG INTEGRATION TEST ] Epoch #4 Cross Entropy Loss: 0.0488966 80 | [DEBUG INTEGRATION TEST ] Epoch #5 Cross Entropy Loss: 0.0410873 81 | [DEBUG INTEGRATION TEST ] Epoch #6 Cross Entropy Loss: 0.0354383 82 | [DEBUG INTEGRATION TEST ] Epoch #7 Cross Entropy Loss: 0.0311572 83 | [DEBUG INTEGRATION TEST ] Epoch #8 Cross Entropy Loss: 0.0277985 84 | [DEBUG INTEGRATION TEST ] Epoch #9 Cross Entropy Loss: 0.0250919 85 | ``` 86 | 87 | I think I should document the code and make style fixes before I proceed with anything else. 88 | 89 | Okay, I've refactored the code and made a bunch of style fixes and added comments (sparingly). Also, I've added a cmake configuration to easily build everything and make things cross-platform. Now for the part that I've been putting off from the very beginning: parsing the binary MNIST data. 90 | 91 | I've just realized that the digit recognizer challenge on Kaggle has csv datasets for digit recognition. Those might be easier to parse. 92 | 93 | Done with the data parsing module. Now for the big sausage - LeNet. Oh yeah, minor note -- anyone attempting to run this code will have to download the Kaggle dataset into a `data/` directory. 94 | 95 | Okay, so I've assembled the Le Net - but there seems to be a very strange issue.. The training loss decreses over epochs, so does the validation loss - all good, right? Wrong! The training and validation accuracies are also decresing over epochs! FTW!!! Go home CNN, you're drunk! What is strange is that I can't seem to get the model to overfit on a smaller sub-dataset either. I think its time to write another integration test. 96 | 97 | I might've made some headway into the issue - it looks like the input to the loss layer is very very close to a one-hot vector which is causing infinities and negative infinities to appear. Need to find some way to make this numerically stable. Okay, a little googling around has shown that if we combine the softmax and cross entropy layers then the backward gradient becomes numerically stable. So, we will do that now. 98 | 99 | It's not working at all. Need to start fresh. 100 | 101 | Okay, so I've written a few more integration tests and here is what I've found: 102 | - The backward pass through the dense layer was slightly incorrect. I'd forgotten to incorporate the upstreamGradient into the gradients wrt weights. 103 | - The dense layer was also missing biases. I've added these now. 104 | With these changes, I can train simple networks for: 105 | - learning the AND decision boundary 106 | - learning a single MNIST image 107 | - On a sample of 3000 MNIST images, a simple convnet (conv-relu-maxpool-dense-softmax-cross_entropy schema) can be trained with SGD to obtain the following results: 108 | ``` 109 | [DEBUG INTEGRATION TEST ] Size of training set: 2700 110 | [DEBUG INTEGRATION TEST ] Size of validation set: 300 111 | [DEBUG INTEGRATION TEST ] 112 | [DEBUG INTEGRATION TEST ] Average loss: 2.22893 113 | [DEBUG INTEGRATION TEST ] Validation Accuracy: 0.406667 114 | [DEBUG INTEGRATION TEST ] 115 | [DEBUG INTEGRATION TEST ] Average loss: 1.33203 116 | [DEBUG INTEGRATION TEST ] Validation Accuracy: 0.676667 117 | [DEBUG INTEGRATION TEST ] 118 | [DEBUG INTEGRATION TEST ] Average loss: 0.841367 119 | [DEBUG INTEGRATION TEST ] Validation Accuracy: 0.753333 120 | [DEBUG INTEGRATION TEST ] 121 | [DEBUG INTEGRATION TEST ] Average loss: 0.584995 122 | [DEBUG INTEGRATION TEST ] Validation Accuracy: 0.79 123 | [DEBUG INTEGRATION TEST ] 124 | [DEBUG INTEGRATION TEST ] Average loss: 0.44068 125 | [DEBUG INTEGRATION TEST ] Validation Accuracy: 0.813333 126 | [DEBUG INTEGRATION TEST ] 127 | [DEBUG INTEGRATION TEST ] Average loss: 0.360519 128 | [DEBUG INTEGRATION TEST ] Validation Accuracy: 0.81 129 | [DEBUG INTEGRATION TEST ] 130 | [DEBUG INTEGRATION TEST ] Average loss: 0.294253 131 | [DEBUG INTEGRATION TEST ] Validation Accuracy: 0.84 132 | [DEBUG INTEGRATION TEST ] 133 | [DEBUG INTEGRATION TEST ] Average loss: 0.265645 134 | [DEBUG INTEGRATION TEST ] Validation Accuracy: 0.83 135 | [DEBUG INTEGRATION TEST ] 136 | [DEBUG INTEGRATION TEST ] Average loss: 0.220504 137 | [DEBUG INTEGRATION TEST ] Validation Accuracy: 0.863333 138 | [DEBUG INTEGRATION TEST ] 139 | [DEBUG INTEGRATION TEST ] Average loss: 0.164675 140 | [DEBUG INTEGRATION TEST ] Validation Accuracy: 0.863333 141 | ``` 142 | 143 | which is reassuring ... I think we should be good to go on LeNet now. 144 | 145 | Fuck yeah .. LeNet is working on the medium sozed dataset: 146 | ``` 147 | [DEBUG LE NET ] Training data size: 2700 148 | [DEBUG LE NET ] Validation data size: 300 149 | [DEBUG LE NET ] Test data size: 10 150 | [DEBUG LE NET ] 151 | [DEBUG LE NET ] Loss after epoch #0: 0.578797 152 | [DEBUG LE NET ] Val accuracy: 0.886667 153 | [DEBUG LE NET ] 154 | [DEBUG LE NET ] Loss after epoch #1: 0.181674 155 | [DEBUG LE NET ] Val accuracy: 0.936667 156 | [DEBUG LE NET ] 157 | [DEBUG LE NET ] Loss after epoch #2: 0.155978 158 | [DEBUG LE NET ] Val accuracy: 0.913333 159 | [DEBUG LE NET ] 160 | [DEBUG LE NET ] Loss after epoch #3: 0.0978818 161 | [DEBUG LE NET ] Val accuracy: 0.956667 162 | [DEBUG LE NET ] 163 | [DEBUG LE NET ] Loss after epoch #4: 0.0800541 164 | [DEBUG LE NET ] Val accuracy: 0.953333 165 | [DEBUG LE NET ] 166 | [DEBUG LE NET ] Loss after epoch #5: 0.0567186 167 | [DEBUG LE NET ] Val accuracy: 0.936667 168 | [DEBUG LE NET ] 169 | [DEBUG LE NET ] Loss after epoch #6: 0.0514032 170 | [DEBUG LE NET ] Val accuracy: 0.916667 171 | [DEBUG LE NET ] 172 | [DEBUG LE NET ] Loss after epoch #7: 0.0396252 173 | [DEBUG LE NET ] Val accuracy: 0.926667 174 | [DEBUG LE NET ] 175 | [DEBUG LE NET ] Loss after epoch #8: 0.0444968 176 | [DEBUG LE NET ] Val accuracy: 0.933333 177 | [DEBUG LE NET ] 178 | [DEBUG LE NET ] Loss after epoch #9: 0.0350243 179 | [DEBUG LE NET ] Val accuracy: 0.93 180 | ``` 181 | For future reference -- these results were obtained with the following hyperparameter settings: 182 | - Learning rate: 0.05 183 | - Epochs: 10 184 | - Batch Size: 10 185 | - Train data: data_medium 186 | Oh, and there's one more thing.. I noticed earlier that the output of the dense layer is quite high -- of the order of 1e2. Clearly too high for the softmax to give meaningful outputs. So, I've scaled the input to the softmax by 1e2. This is hacky, and I should probably figure out a cleaner way to do this. Maybe normalize the input differently?.. I think I might try to make the input have zero mean and unit variance and try - but for now I think the scaling is fine. 187 | 188 | Now for the mother lode... the complete Kaggle dataset 189 | 190 | OMFG!!!! IT WOOORRKKSS!!!! 191 | It's completed 5 epochs: 192 | ``` 193 | [DEBUG LE NET ] Training data size: 37800 194 | [DEBUG LE NET ] Validation data size: 4200 195 | [DEBUG LE NET ] Test data size: 28000 196 | [DEBUG LE NET ] 197 | [DEBUG LE NET ] Loss after epoch #0: 0.189032 198 | [DEBUG LE NET ] Val accuracy: 0.960952 199 | [DEBUG LE NET ] 200 | [DEBUG LE NET ] Loss after epoch #1: 0.102551 201 | [DEBUG LE NET ] Val accuracy: 0.966905 202 | [DEBUG LE NET ] 203 | [DEBUG LE NET ] Loss after epoch #2: 0.0846397 204 | [DEBUG LE NET ] Val accuracy: 0.971905 205 | [DEBUG LE NET ] 206 | [DEBUG LE NET ] Loss after epoch #3: 0.0762915 207 | [DEBUG LE NET ] Val accuracy: 0.97119 208 | [DEBUG LE NET ] 209 | [DEBUG LE NET ] Loss after epoch #4: 0.0741992 210 | [DEBUG LE NET ] Val accuracy: 0.975714 211 | ``` 212 | I think that this is not bad at all for a handwritten CNN. It takes a long time to run (~20 minutes per epoch on my shitty machine), but then again - handwritten. I'm going to stop the execution now and save the results and make the debug output prettier. For future reference: I did not change the hyperparameters from the previous run -- only the dataset was expanded to the original size. 213 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## CPP-CNN 2 | 3 | A C++ implementation of the popular LeNet convolutional neural network architecture. Currently it trains on the Kaggle Digit Recognizer challenge data and gives 0.973 accuracy on the leaderboard. At the time of writing this, I got a rank of 1414 using this model. The results csv file can be found in the `best-results/` directory. 4 | 5 | I think that this is probably more for my own benefit than anyone else - but I've still tried to make to code as readable as possible in case someone else finds this and wants to play around with it. 6 | 7 | ### Prerequisites for building and running the model 8 | 9 | You'll probably need 10 | - g++ >= 5.0.0 11 | - CMake >= 3.0.0 12 | - make >= 4.0 13 | - Armadillo >= 8.300.4 14 | - Boost unit test framework (Boost version >= 1.58) 15 | 16 | to run everything in this repo. I've only tried to run this on a Linux system (Ubuntu 16.04) -- but I dont see any obvious reason why it shouldn't work on other platforms as long as you have the dependencies installed. 17 | 18 | You will also need the Kaggle Digit recognizer dataset - which can be downloaded from [here](https://www.kaggle.com/c/digit-recognizer/data) 19 | 20 | ### Building and Running the LeNet on the Digit Recognizer dataset 21 | 22 | 1. Clone this repository. `git clone https://github.com/plantsandbuildings/cpp-cnn` 23 | 2. `cd` into the project root (`cd cpp-cnn`) and create the build and data directories using `mkdir build data`. 24 | 3. Copy the Kaggle Digit Recognizer dataset into the `data` directory. The `data` directory should now contain two CSV files -- `train.csv` and `test.csv`. 25 | 4. `cd` into the build directory (`cd build`) and configure the build using `cmake ../` This will generate a `Makefile` to build the project. 26 | 5. Run `make` to build the project. Binaries are written to `build/bin`. 27 | 6. Train the model on the Kaggle data using `bin/le_net`. 28 | 29 | The program will write the test predictions after each epoch of training into CSV files - `build/results_epoch_1.csv`, `build/results_epoch_2.csv` etc. These files can directly be uploaded to the [submission page](https://www.kaggle.com/c/digit-recognizer/submit) on Kaggle to view the scores. 30 | -------------------------------------------------------------------------------- /layers/convolution_layer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CONV_LAYER_HPP 2 | #define CONV_LAYER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define DEBUG false 11 | #define DEBUG_PREFIX "[DEBUG CONV LAYER ]\t" 12 | 13 | class ConvolutionLayer 14 | { 15 | public: 16 | ConvolutionLayer( 17 | size_t inputHeight, 18 | size_t inputWidth, 19 | size_t inputDepth, 20 | size_t filterHeight, 21 | size_t filterWidth, 22 | size_t horizontalStride, 23 | size_t verticalStride, 24 | size_t numFilters) : 25 | inputHeight(inputHeight), 26 | inputWidth(inputWidth), 27 | inputDepth(inputDepth), 28 | filterHeight(filterHeight), 29 | filterWidth(filterWidth), 30 | horizontalStride(horizontalStride), 31 | verticalStride(verticalStride), 32 | numFilters(numFilters) 33 | { 34 | // Initialize the filters. 35 | filters.resize(numFilters); 36 | for (size_t i=0; iinput = input; 93 | this->output = output; 94 | 95 | #if DEBUG 96 | std::cout 97 | << DEBUG_PREFIX << "---------------------------------------------" 98 | << std::endl 99 | << DEBUG_PREFIX << "FORWARD PASS DEBUG OUTPUT" 100 | << std::endl 101 | << DEBUG_PREFIX << "---------------------------------------------" 102 | << std::endl; 103 | 104 | // Print input. 105 | std::cout << DEBUG_PREFIX << std::endl; 106 | std::cout << DEBUG_PREFIX << "Input to conv layer:" << std::endl; 107 | for (size_t i=0; i filters) { this->filters = filters; } 246 | 247 | std::vector getFilters() { return this->filters; } 248 | 249 | arma::cube getGradientWrtInput() { return gradInput; } 250 | 251 | std::vector getGradientWrtFilters() { return gradFilters; } 252 | 253 | private: 254 | size_t inputHeight; 255 | size_t inputWidth; 256 | size_t inputDepth; 257 | size_t filterHeight; 258 | size_t filterWidth; 259 | size_t horizontalStride; 260 | size_t verticalStride; 261 | size_t numFilters; 262 | 263 | std::vector filters; 264 | 265 | double _getTruncNormalVal(double mean, double variance) 266 | { 267 | double stddev = sqrt(variance); 268 | arma::mat candidate = {3.0 * stddev}; 269 | while (std::abs(candidate[0] - mean) > 2.0 * stddev) 270 | candidate.randn(1, 1); 271 | return candidate[0]; 272 | } 273 | 274 | void _resetAccumulatedGradients() 275 | { 276 | accumulatedGradFilters.clear(); 277 | accumulatedGradFilters.resize(numFilters); 278 | for (size_t fidx=0; fidx gradFilters; 290 | std::vector accumulatedGradFilters; 291 | }; 292 | 293 | #undef DEBUG 294 | #undef DEBUG_PREFIX 295 | #endif 296 | -------------------------------------------------------------------------------- /layers/cross_entropy_loss_layer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CROSS_ENTROPY_LOSS_LAYER_HPP 2 | #define CROSS_ENTROPY_LOSS_LAYER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class CrossEntropyLossLayer 9 | { 10 | public: 11 | CrossEntropyLossLayer(size_t numInputs) : numInputs(numInputs) 12 | { 13 | // Nothing to do here. 14 | } 15 | 16 | double Forward(arma::vec& predictedDistribution, 17 | arma::vec& actualDistribution) 18 | { 19 | assert(predictedDistribution.n_elem == numInputs); 20 | assert(actualDistribution.n_elem == numInputs); 21 | 22 | // Cache the prdicted and actual labels -- these will be required in the 23 | // backward pass. 24 | this->predictedDistribution = predictedDistribution; 25 | this->actualDistribution = actualDistribution; 26 | 27 | // Compute the loss and cache that too. 28 | this->loss = -arma::dot(actualDistribution, 29 | arma::log(predictedDistribution)); 30 | return this->loss; 31 | } 32 | 33 | void Backward() 34 | { 35 | gradientWrtPredictedDistribution = 36 | -(actualDistribution % (1/predictedDistribution)); 37 | } 38 | 39 | arma::vec getGradientWrtPredictedDistribution() 40 | { 41 | return gradientWrtPredictedDistribution; 42 | } 43 | 44 | private: 45 | size_t numInputs; 46 | arma::vec predictedDistribution; 47 | arma::vec actualDistribution; 48 | 49 | double loss; 50 | 51 | arma::vec gradientWrtPredictedDistribution; 52 | }; 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /layers/dense_layer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DENSE_LAYER_HPP 2 | #define DENSE_LAYER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define DEBUG false 10 | #define DEBUG_PREFIX "[DEBUG DENSE LAYER ]\t" 11 | 12 | class DenseLayer 13 | { 14 | public: 15 | DenseLayer(size_t inputHeight, 16 | size_t inputWidth, 17 | size_t inputDepth, 18 | size_t numOutputs) : 19 | inputHeight(inputHeight), 20 | inputWidth(inputWidth), 21 | inputDepth(inputDepth), 22 | numOutputs(numOutputs) 23 | { 24 | // Initialize the weights. 25 | weights = arma::zeros(numOutputs, inputHeight*inputWidth*inputDepth); 26 | weights.imbue( [&]() { return _getTruncNormalVal(0.0, 1.0); } ); 27 | 28 | // Initialize the biases 29 | biases = arma::zeros(numOutputs); 30 | 31 | // Reset accumulated gradients. 32 | _resetAccumulatedGradients(); 33 | } 34 | 35 | void Forward(arma::cube& input, arma::vec& output) 36 | { 37 | arma::vec flatInput = arma::vectorise(input); 38 | output = (weights * flatInput) + biases; 39 | 40 | this->input = input; 41 | this->output = output; 42 | } 43 | 44 | void Backward(arma::vec& upstreamGradient) 45 | { 46 | arma::vec gradInputVec = arma::zeros(inputHeight*inputWidth*inputDepth); 47 | for (size_t i=0; i<(inputHeight*inputWidth*inputDepth); i++) 48 | gradInputVec[i] = arma::dot(weights.col(i), upstreamGradient); 49 | arma::cube tmp((inputHeight*inputWidth*inputDepth), 1, 1); 50 | tmp.slice(0).col(0) = gradInputVec; 51 | gradInput = arma::reshape(tmp, inputHeight, inputWidth, inputDepth); 52 | 53 | accumulatedGradInput += gradInput; 54 | 55 | gradWeights = arma::zeros(arma::size(weights)); 56 | for (size_t i=0; iweights = weights; } 83 | 84 | void setBiases(arma::vec biases) { this->biases = biases; } 85 | 86 | private: 87 | size_t inputHeight; 88 | size_t inputWidth; 89 | size_t inputDepth; 90 | arma::cube input; 91 | 92 | size_t numOutputs; 93 | arma::vec output; 94 | 95 | arma::mat weights; 96 | arma::vec biases; 97 | 98 | arma::cube gradInput; 99 | arma::mat gradWeights; 100 | arma::vec gradBiases; 101 | 102 | arma::cube accumulatedGradInput; 103 | arma::mat accumulatedGradWeights; 104 | arma::vec accumulatedGradBiases; 105 | 106 | double _getTruncNormalVal(double mean, double variance) 107 | { 108 | double stddev = sqrt(variance); 109 | arma::mat candidate = {3.0 * stddev}; 110 | while (std::abs(candidate[0] - mean) > 2.0 * stddev) 111 | candidate.randn(1, 1); 112 | return candidate[0]; 113 | } 114 | 115 | void _resetAccumulatedGradients() 116 | { 117 | accumulatedGradInput = arma::zeros(inputHeight, inputWidth, inputDepth); 118 | accumulatedGradWeights = arma::zeros( 119 | numOutputs, 120 | inputHeight*inputWidth*inputDepth 121 | ); 122 | accumulatedGradBiases = arma::zeros(numOutputs); 123 | } 124 | }; 125 | 126 | #undef DEBUG 127 | #undef DEBUG_PREFIX 128 | 129 | #endif 130 | -------------------------------------------------------------------------------- /layers/max_pooling_layer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MAX_POOLING_LAYER_HPP 2 | #define MAX_POOLING_LAYER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define DEBUG false 9 | #define DEBUG_PREFIX "[DEBUG POOL LAYER ]\t" 10 | 11 | class MaxPoolingLayer 12 | { 13 | public: 14 | MaxPoolingLayer(size_t inputHeight, 15 | size_t inputWidth, 16 | size_t inputDepth, 17 | size_t poolingWindowHeight, 18 | size_t poolingWindowWidth, 19 | size_t verticalStride, 20 | size_t horizontalStride) : 21 | inputHeight(inputHeight), 22 | inputWidth(inputWidth), 23 | inputDepth(inputDepth), 24 | poolingWindowHeight(poolingWindowHeight), 25 | poolingWindowWidth(poolingWindowWidth), 26 | verticalStride(verticalStride), 27 | horizontalStride(horizontalStride) 28 | { 29 | // Nothing to do here. 30 | } 31 | 32 | void Forward(arma::cube& input, arma::cube& output) 33 | { 34 | assert((inputHeight - poolingWindowHeight)%verticalStride == 0); 35 | assert((inputWidth - poolingWindowWidth)%horizontalStride == 0); 36 | output = arma::zeros( 37 | (inputHeight - poolingWindowHeight)/verticalStride + 1, 38 | (inputWidth - poolingWindowWidth)/horizontalStride + 1, 39 | inputDepth 40 | ); 41 | for (size_t sidx = 0; sidx < inputDepth; sidx ++) 42 | { 43 | for (size_t ridx = 0; 44 | ridx <= inputHeight - poolingWindowHeight; 45 | ridx += verticalStride) 46 | { 47 | for (size_t cidx = 0; 48 | cidx <= inputWidth - poolingWindowWidth; 49 | cidx += horizontalStride) 50 | { 51 | output.slice(sidx)(ridx/verticalStride, cidx/horizontalStride) = 52 | input.slice(sidx).submat(ridx, 53 | cidx, 54 | ridx+poolingWindowHeight-1, 55 | cidx+poolingWindowWidth-1) 56 | .max(); 57 | } 58 | } 59 | } 60 | 61 | this->input = input; 62 | this->output = output; 63 | #if DEBUG 64 | std::cout 65 | << DEBUG_PREFIX << "---------------------------------------------" 66 | << std::endl 67 | << DEBUG_PREFIX << "FORWARD PASS DEBUG OUTPUT" 68 | << std::endl 69 | << DEBUG_PREFIX << "---------------------------------------------" 70 | << std::endl; 71 | std::cout << DEBUG_PREFIX << std::endl; 72 | std::cout 73 | << DEBUG_PREFIX << "Input to Max pooling layer:" 74 | << std::endl; 75 | for (size_t i=0; i 5 | #include 6 | 7 | class ReLULayer 8 | { 9 | public: 10 | ReLULayer(size_t inputHeight, 11 | size_t inputWidth, 12 | size_t inputDepth) : 13 | inputHeight(inputHeight), 14 | inputWidth(inputWidth), 15 | inputDepth(inputDepth) 16 | { 17 | // Nothing to do here. 18 | } 19 | 20 | void Forward(arma::cube& input, arma::cube& output) 21 | { 22 | output = arma::zeros(arma::size(input)); 23 | output = arma::max(input, output); 24 | this->input = input; 25 | this->output = output; 26 | } 27 | 28 | void Backward(arma::cube upstreamGradient) 29 | { 30 | gradientWrtInput = input; 31 | gradientWrtInput.transform( [](double val) { return val > 0? 1 : 0; } ); 32 | gradientWrtInput = gradientWrtInput % upstreamGradient; 33 | } 34 | 35 | arma::cube getGradientWrtInput() { return gradientWrtInput; } 36 | 37 | private: 38 | size_t inputHeight; 39 | size_t inputWidth; 40 | size_t inputDepth; 41 | 42 | arma::cube input; 43 | arma::cube output; 44 | 45 | arma::cube gradientWrtInput; 46 | }; 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /layers/softmax_layer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SOFTMAX_LAYER_HPP 2 | #define SOFTMAX_LAYER_HPP 3 | 4 | #include 5 | #include 6 | 7 | class SoftmaxLayer 8 | { 9 | public: 10 | SoftmaxLayer(size_t numInputs) : 11 | numInputs(numInputs) 12 | { 13 | // Nothing to do here. 14 | } 15 | 16 | void Forward(arma::vec& input, arma::vec& output) 17 | { 18 | double sumExp = arma::accu(arma::exp(input - arma::max(input))); 19 | output = arma::exp(input - arma::max(input))/sumExp; 20 | 21 | this->input = input; 22 | this->output = output; 23 | } 24 | 25 | void Backward(arma::vec& upstreamGradient) 26 | { 27 | double sub = arma::dot(upstreamGradient, output); 28 | gradWrtInput = (upstreamGradient - sub) % output; 29 | } 30 | 31 | arma::vec getGradientWrtInput() { return gradWrtInput; } 32 | 33 | private: 34 | size_t numInputs; 35 | arma::vec input; 36 | arma::vec output; 37 | 38 | arma::vec gradWrtInput; 39 | }; 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /le_net.cpp: -------------------------------------------------------------------------------- 1 | #include "layers/convolution_layer.hpp" 2 | #include "layers/max_pooling_layer.hpp" 3 | #include "layers/relu_layer.hpp" 4 | #include "layers/dense_layer.hpp" 5 | #include "layers/softmax_layer.hpp" 6 | #include "layers/cross_entropy_loss_layer.hpp" 7 | #include "utils/mnist.hpp" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #define DEBUG true 17 | #define DEBUG_PREFIX "[DEBUG LE NET ]\t" 18 | 19 | int main(int argc, char ** argv) 20 | { 21 | // Read the Kaggle data 22 | MNISTData md("../data"); 23 | 24 | std::vector trainData = md.getTrainData(); 25 | std::vector trainLabels = md.getTrainLabels(); 26 | 27 | std::vector validationData = md.getValidationData(); 28 | std::vector validationLabels = md.getValidationLabels(); 29 | 30 | assert(trainData.size() == trainLabels.size()); 31 | assert(validationData.size() == validationLabels.size()); 32 | 33 | std::vector testData = md.getTestData(); 34 | 35 | #if DEBUG 36 | std::cout << DEBUG_PREFIX 37 | << "Training data size: " << trainData.size() << std::endl; 38 | std::cout << DEBUG_PREFIX 39 | << "Validation data size: " << validationData.size() << std::endl; 40 | std::cout << DEBUG_PREFIX 41 | << "Test data size: " << testData.size() << std::endl; 42 | std::cout << DEBUG_PREFIX << std::endl; 43 | #endif 44 | 45 | const size_t TRAIN_DATA_SIZE = trainData.size(); 46 | const size_t VALIDATION_DATA_SIZE = validationData.size(); 47 | const size_t TEST_DATA_SIZE = testData.size(); 48 | const double LEARNING_RATE = 0.05; 49 | const size_t EPOCHS = 10; 50 | const size_t BATCH_SIZE = 10; 51 | const size_t NUM_BATCHES = TRAIN_DATA_SIZE / BATCH_SIZE; 52 | 53 | // Define the network layers 54 | ConvolutionLayer c1( 55 | 28, 56 | 28, 57 | 1, 58 | 5, 59 | 5, 60 | 1, 61 | 1, 62 | 6); 63 | // Output is 24 x 24 x 6 64 | 65 | ReLULayer r1( 66 | 24, 67 | 24, 68 | 6); 69 | // Output is 24 x 24 x 6 70 | 71 | MaxPoolingLayer mp1( 72 | 24, 73 | 24, 74 | 6, 75 | 2, 76 | 2, 77 | 2, 78 | 2); 79 | // Output is 12 x 12 x 6 80 | 81 | ConvolutionLayer c2( 82 | 12, 83 | 12, 84 | 6, 85 | 5, 86 | 5, 87 | 1, 88 | 1, 89 | 16); 90 | // Output is 8 x 8 x 16 91 | 92 | ReLULayer r2( 93 | 8, 94 | 8, 95 | 16); 96 | // Output is 8 x 8 x 16 97 | 98 | MaxPoolingLayer mp2( 99 | 8, 100 | 8, 101 | 16, 102 | 2, 103 | 2, 104 | 2, 105 | 2); 106 | // Output is 4 x 4 x 16 107 | 108 | DenseLayer d( 109 | 4, 110 | 4, 111 | 16, 112 | 10); 113 | // Output is a vector of size 10 114 | 115 | SoftmaxLayer s(10); 116 | // Output is a vector of size 10 117 | 118 | CrossEntropyLossLayer l(10); 119 | 120 | // Initialize armadillo structures to store intermediate outputs (Ie. outputs 121 | // of hidden layers) 122 | arma::cube c1Out = arma::zeros(24, 24, 6); 123 | arma::cube r1Out = arma::zeros(24, 24, 6); 124 | arma::cube mp1Out = arma::zeros(12, 12, 6); 125 | arma::cube c2Out = arma::zeros(8, 8, 16); 126 | arma::cube r2Out = arma::zeros(8, 8, 16); 127 | arma::cube mp2Out = arma::zeros(4, 4, 16); 128 | arma::vec dOut = arma::zeros(10); 129 | arma::vec sOut = arma::zeros(10); 130 | 131 | // Initialize loss and cumulative loss. Cumulative loss totals loss over all 132 | // training examples in a minibatch. 133 | double loss = 0.0; 134 | double cumLoss = 0.0; 135 | 136 | 137 | for (size_t epoch = 0; epoch < EPOCHS; epoch++) 138 | { 139 | #if DEBUG 140 | std::cout << DEBUG_PREFIX << std::endl; 141 | std::cout << DEBUG_PREFIX << "Epoch # " << epoch << std::endl; 142 | #endif 143 | for (size_t batchIdx = 0; batchIdx < NUM_BATCHES; batchIdx++) 144 | { 145 | // Generate a random batch. 146 | arma::vec batch(BATCH_SIZE, arma::fill::randu); 147 | batch *= (TRAIN_DATA_SIZE - 1); 148 | 149 | for (size_t i = 0; i < BATCH_SIZE; i++) 150 | { 151 | // Forward pass 152 | c1.Forward(trainData[batch[i]], c1Out); 153 | r1.Forward(c1Out, r1Out); 154 | mp1.Forward(r1Out, mp1Out); 155 | c2.Forward(mp1Out, c2Out); 156 | r2.Forward(c2Out, r2Out); 157 | mp2.Forward(r2Out, mp2Out); 158 | d.Forward(mp2Out, dOut); 159 | dOut /= 100; 160 | s.Forward(dOut, sOut); 161 | 162 | // Compute the loss 163 | loss = l.Forward(sOut, trainLabels[batch[i]]); 164 | cumLoss += loss; 165 | 166 | // Backward pass 167 | l.Backward(); 168 | arma::vec gradWrtPredictedDistribution = 169 | l.getGradientWrtPredictedDistribution(); 170 | s.Backward(gradWrtPredictedDistribution); 171 | arma::vec gradWrtSIn = s.getGradientWrtInput(); 172 | d.Backward(gradWrtSIn); 173 | arma::cube gradWrtDIn = d.getGradientWrtInput(); 174 | mp2.Backward(gradWrtDIn); 175 | arma::cube gradWrtMP2In = mp2.getGradientWrtInput(); 176 | r2.Backward(gradWrtMP2In); 177 | arma::cube gradWrtR2In = r2.getGradientWrtInput(); 178 | c2.Backward(gradWrtR2In); 179 | arma::cube gradWrtC2In = c2.getGradientWrtInput(); 180 | mp1.Backward(gradWrtC2In); 181 | arma::cube gradWrtMP1In = mp1.getGradientWrtInput(); 182 | r1.Backward(gradWrtMP1In); 183 | arma::cube gradWrtR1In = r1.getGradientWrtInput(); 184 | c1.Backward(gradWrtR1In); 185 | arma::cube gradWrtC1In = c1.getGradientWrtInput(); 186 | } 187 | 188 | // Update params 189 | d.UpdateWeightsAndBiases(BATCH_SIZE, LEARNING_RATE); 190 | c1.UpdateFilterWeights(BATCH_SIZE, LEARNING_RATE); 191 | c2.UpdateFilterWeights(BATCH_SIZE, LEARNING_RATE); 192 | } 193 | 194 | #if DEBUG 195 | // Output loss on training dataset after each epoch 196 | std::cout << DEBUG_PREFIX << std::endl; 197 | std::cout << DEBUG_PREFIX << "Training loss: " 198 | << cumLoss / (BATCH_SIZE * NUM_BATCHES) << std::endl; 199 | #endif 200 | 201 | // Compute the training accuracy after epoch 202 | double correct = 0.0; 203 | for (size_t i = 0; i < TRAIN_DATA_SIZE; i++) 204 | { 205 | // Forward pass 206 | c1.Forward(trainData[i], c1Out); 207 | r1.Forward(c1Out, r1Out); 208 | mp1.Forward(r1Out, mp1Out); 209 | c2.Forward(mp1Out, c2Out); 210 | r2.Forward(c2Out, r2Out); 211 | mp2.Forward(r2Out, mp2Out); 212 | d.Forward(mp2Out, dOut); 213 | dOut /= 100; 214 | s.Forward(dOut, sOut); 215 | 216 | if (trainLabels[i].index_max() == sOut.index_max()) 217 | correct += 1.0; 218 | } 219 | 220 | #if DEBUG 221 | // Output accuracy on training dataset after each epoch 222 | std::cout << DEBUG_PREFIX 223 | << "Training accuracy: " << correct/TRAIN_DATA_SIZE << std::endl; 224 | #endif 225 | 226 | // Compute validation accuracy after epoch 227 | cumLoss = 0.0; 228 | correct = 0.0; 229 | for (size_t i = 0; i < VALIDATION_DATA_SIZE; i++) 230 | { 231 | // Forward pass 232 | c1.Forward(validationData[i], c1Out); 233 | r1.Forward(c1Out, r1Out); 234 | mp1.Forward(r1Out, mp1Out); 235 | c2.Forward(mp1Out, c2Out); 236 | r2.Forward(c2Out, r2Out); 237 | mp2.Forward(r2Out, mp2Out); 238 | d.Forward(mp2Out, dOut); 239 | dOut /= 100; 240 | s.Forward(dOut, sOut); 241 | 242 | cumLoss += l.Forward(sOut, validationLabels[i]); 243 | 244 | if (validationLabels[i].index_max() == sOut.index_max()) 245 | correct += 1.0; 246 | } 247 | 248 | #if DEBUG 249 | // Output validation loss after each epoch 250 | std::cout << DEBUG_PREFIX 251 | << "Validation loss: " << cumLoss / (BATCH_SIZE * NUM_BATCHES) 252 | << std::endl; 253 | 254 | // Output validation accuracy after each epoch 255 | std::cout << DEBUG_PREFIX 256 | << "Val accuracy: " << correct / VALIDATION_DATA_SIZE << std::endl; 257 | std::cout << DEBUG_PREFIX << std::endl; 258 | #endif 259 | 260 | // Reset cumulative loss and correct count 261 | cumLoss = 0.0; 262 | correct = 0.0; 263 | 264 | // Write results on test data to results csv 265 | std::fstream fout("results_epoch_" + std::to_string(epoch) + ".csv", 266 | std::ios::out); 267 | fout << "ImageId,Label" << std::endl; 268 | for (size_t i=0; i 5 | #include "../layers/convolution_layer.hpp" 6 | 7 | #define DEBUG false 8 | #define DEBUG_PREFIX "[CONV LAYER TESTS ]\t" 9 | 10 | BOOST_AUTO_TEST_CASE(ConstructorTest) 11 | { 12 | ConvolutionLayer c( 13 | 5, // Input height. 14 | 5, // Input width. 15 | 3, // Input depth. 16 | 2, // Filter height. 17 | 3, // Filter width. 18 | 1, // Horizontal stride. 19 | 1, // Vertical stride. 20 | 3); // Number of filters. 21 | } 22 | 23 | BOOST_AUTO_TEST_CASE(ForwardPassTest) 24 | { 25 | arma::cube input(3, 3, 1, arma::fill::zeros); 26 | input.slice(0) = {{1, 2, 3}, {2, 3, 4}, {3, 4, 5}}; 27 | 28 | arma::cube filter1(2, 2, 1, arma::fill::zeros); 29 | filter1.slice(0) = {{1, 0}, {0, 1}}; 30 | 31 | arma::cube filter2(2, 2, 1, arma::fill::zeros); 32 | filter2.slice(0) = {{0, 1}, {1, 0}}; 33 | 34 | std::vector filters; 35 | filters.push_back(filter1); 36 | filters.push_back(filter2); 37 | 38 | ConvolutionLayer c( 39 | 3, // Input height. 40 | 3, // Input width. 41 | 1, // Input depth. 42 | 2, // Filter width. 43 | 2, // Filter depth. 44 | 1, // Horizontal stride. 45 | 1, // Vertical stride. 46 | 2); // Number of filters. 47 | 48 | c.setFilters(filters); 49 | 50 | arma::cube output; 51 | c.Forward(input, output); 52 | } 53 | 54 | BOOST_AUTO_TEST_CASE(BackwardPassTest) 55 | { 56 | arma::cube input(3, 3, 1, arma::fill::zeros); 57 | input.slice(0) = {{1, 2, 3}, {2, 3, 4}, {3, 4, 5}}; 58 | 59 | arma::cube filter1(2, 2, 1, arma::fill::zeros); 60 | filter1.slice(0) = {{1, 0}, {0, 1}}; 61 | 62 | arma::cube filter2(2, 2, 1, arma::fill::zeros); 63 | filter2.slice(0) = {{0, 1}, {1, 0}}; 64 | 65 | std::vector filters; 66 | filters.push_back(filter1); 67 | filters.push_back(filter2); 68 | 69 | ConvolutionLayer c( 70 | 3, // Input height. 71 | 3, // Input width. 72 | 1, // Input depth. 73 | 2, // Filter width. 74 | 2, // Filter depth. 75 | 1, // Horizontal stride. 76 | 1, // Vertical stride. 77 | 2); // Number of filters. 78 | 79 | c.setFilters(filters); 80 | 81 | arma::cube output; 82 | c.Forward(input, output); 83 | 84 | // For now, let the loss be the sum of all the output activations. Therefore, 85 | // the upstream gradient is all ones. 86 | arma::cube upstreamGradient(2, 2, 2, arma::fill::ones); 87 | 88 | c.Backward(upstreamGradient); 89 | 90 | arma::cube gradInput = c.getGradientWrtInput(); 91 | 92 | std::vector gradFilters = c.getGradientWrtFilters(); 93 | 94 | // Now compute approximate gradients. 95 | double disturbance = 0.5e-5; 96 | 97 | output = arma::zeros(arma::size(output)); 98 | arma::cube approxGradientWrtInput(arma::size(input), arma::fill::zeros); 99 | for (size_t i=0; i gradFilters = c.getGradientWrtFilters(); 156 | 157 | // Now compute approximate gradients. 158 | double disturbance = 0.5e-5; 159 | 160 | output = arma::zeros(arma::size(output)); 161 | arma::cube approxGradientWrtInput(arma::size(input), arma::fill::zeros); 162 | for (size_t i=0; i approxGradientWrtFilters(2); 197 | approxGradientWrtFilters[0] = arma::zeros(3, 5, 3); 198 | approxGradientWrtFilters[1] = arma::zeros(3, 5, 3); 199 | 200 | std::vector filters = c.getFilters(); 201 | 202 | for (size_t fidx=0; fidx<2; fidx++) 203 | { 204 | for (size_t idx=0; idx 5 | #include "../layers/cross_entropy_loss_layer.hpp" 6 | 7 | BOOST_AUTO_TEST_CASE(ForwardPassTest) 8 | { 9 | CrossEntropyLossLayer c(3); 10 | 11 | arma::vec predictedDistribution = {0.25, 0.25, 0.5}; 12 | arma::vec actualDistribution1 = {1, 0, 0}; 13 | arma::vec actualDistribution2 = {0, 0, 1}; 14 | 15 | double loss1 = c.Forward(predictedDistribution, actualDistribution1); 16 | double loss2 = c.Forward(predictedDistribution, actualDistribution2); 17 | 18 | BOOST_REQUIRE(loss1 > loss2); 19 | } 20 | 21 | BOOST_AUTO_TEST_CASE(BackwardPassTest) 22 | { 23 | CrossEntropyLossLayer c(3); 24 | 25 | arma::vec predictedDistribution = {0.25, 0.25, 0.5}; 26 | arma::vec actualDistribution = {0, 0, 1}; 27 | 28 | double loss2 = c.Forward(predictedDistribution, actualDistribution); 29 | 30 | c.Backward(); 31 | 32 | arma::vec gradientWrtPredictedDistribution = 33 | c.getGradientWrtPredictedDistribution(); 34 | arma::vec approxGradient = arma::zeros(arma::size(predictedDistribution)); 35 | 36 | double disturbance = 0.5e-5; 37 | for (size_t i=0; i 5 | #include "../layers/dense_layer.hpp" 6 | 7 | BOOST_AUTO_TEST_CASE(ConstructorTest) 8 | { 9 | DenseLayer d( 10 | 5, // Input height. 11 | 5, // Input width. 12 | 3, // Input depth. 13 | 10); // Number of outputs. 14 | } 15 | 16 | BOOST_AUTO_TEST_CASE(ForwardPassTest) 17 | { 18 | DenseLayer d( 19 | 5, // Input height. 20 | 5, // Input width. 21 | 3, // Input depth. 22 | 10); // Number of outputs. 23 | 24 | arma::cube input(5, 5, 3, arma::fill::randn); 25 | arma::vec output; 26 | 27 | d.Forward(input, output); 28 | } 29 | 30 | BOOST_AUTO_TEST_CASE(BackwardPassTest) 31 | { 32 | DenseLayer d( 33 | 5, // Input height. 34 | 5, // Input width. 35 | 3, // Input depth. 36 | 10); // Number of outputs. 37 | 38 | arma::cube input(5, 5, 3, arma::fill::randn); 39 | arma::mat weights = d.getWeights(); 40 | arma::vec output; 41 | 42 | d.Forward(input, output); 43 | 44 | // Again, for now we loet the loss function be the sum of all output 45 | // activations. Therefore, the upstream gradient is all ones. 46 | arma::vec upstreamGradient = arma::ones(size(output)); 47 | 48 | d.Backward(upstreamGradient); 49 | 50 | arma::cube gradWrtInput = d.getGradientWrtInput(); 51 | arma::mat gradWrtWeights = d.getGradientWrtWeights(); 52 | 53 | arma::cube approxGradWrtInput = arma::zeros(size(input)); 54 | arma::mat approxGradWrtWeights = arma::zeros(size(weights)); 55 | 56 | double disturbance = 0.5e-5; 57 | for (size_t i=0; i 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #define DEBUG true 20 | #define DEBUG_PREFIX "[DEBUG INTEGRATION TEST ]\t" 21 | 22 | BOOST_AUTO_TEST_CASE(SimpleNetworkTest) 23 | { 24 | // Generate some dummy training data. 25 | std::vector trainData; 26 | 27 | arma::cube trainExample1(5, 7, 1); 28 | arma::mat pos(5, 7, arma::fill::zeros); 29 | pos.col(1) = arma::ones(5); 30 | trainExample1.slice(0) = pos; 31 | trainData.push_back(trainExample1); 32 | 33 | arma::cube trainExample2(5, 7, 1); 34 | arma::mat neg(5, 7, arma::fill::randn); 35 | neg = arma::normalise(neg); 36 | trainExample2.slice(0) = neg; 37 | trainData.push_back(trainExample2); 38 | 39 | std::vector trainLabels; 40 | 41 | arma::vec pos_ = {1, 0}; 42 | arma::vec neg_ = {0, 1}; 43 | trainLabels.push_back(pos_); 44 | trainLabels.push_back(neg_); 45 | 46 | // Define the network. 47 | ConvolutionLayer c( 48 | 5, 49 | 7, 50 | 1, 51 | 3, 52 | 2, 53 | 1, 54 | 2, 55 | 4); 56 | // Output dims: 2 x 6 x 4 57 | ReLULayer r(2, 6, 4); 58 | // Output dims: 2 x 6 x 4 59 | DenseLayer d( 60 | 2, 61 | 6, 62 | 4, 63 | 2); 64 | // Output is a vector of size 2 65 | SoftmaxLayer s(2); 66 | // Output is a vector of size 2 67 | CrossEntropyLossLayer l(2); 68 | 69 | arma::cube convOut; 70 | arma::cube reluOut; 71 | arma::vec denseOut; 72 | arma::vec softmaxOut; 73 | double loss; 74 | 75 | 76 | arma::vec gradWrtPredictedDistribution = 77 | l.getGradientWrtPredictedDistribution(); 78 | arma::vec gradWrtSoftmaxInput; 79 | arma::cube gradWrtDenseInput; 80 | arma::cube gradWrtReluInput; 81 | arma::cube gradWrtConvInput; 82 | for (size_t epoch=0; epoch<10; epoch++) 83 | { 84 | // Forward pass the first example. 85 | c.Forward(trainData[0], convOut); 86 | r.Forward(convOut, reluOut); 87 | d.Forward(reluOut, denseOut); 88 | s.Forward(denseOut, softmaxOut); 89 | loss += l.Forward(softmaxOut, trainLabels[0]); 90 | 91 | // Backward pass through the first example. 92 | l.Backward(); 93 | gradWrtPredictedDistribution = l.getGradientWrtPredictedDistribution(); 94 | s.Backward(gradWrtPredictedDistribution); 95 | gradWrtSoftmaxInput = s.getGradientWrtInput(); 96 | d.Backward(gradWrtSoftmaxInput); 97 | gradWrtDenseInput = d.getGradientWrtInput(); 98 | r.Backward(gradWrtDenseInput); 99 | gradWrtReluInput = r.getGradientWrtInput(); 100 | c.Backward(gradWrtReluInput); 101 | gradWrtConvInput = c.getGradientWrtInput(); 102 | 103 | // Forward pass the second example. 104 | c.Forward(trainData[1], convOut); 105 | r.Forward(convOut, reluOut); 106 | d.Forward(reluOut, denseOut); 107 | s.Forward(denseOut, softmaxOut); 108 | loss += l.Forward(softmaxOut, trainLabels[1]); 109 | 110 | // Backward pass through the second example. 111 | l.Backward(); 112 | gradWrtPredictedDistribution = l.getGradientWrtPredictedDistribution(); 113 | s.Backward(gradWrtPredictedDistribution); 114 | gradWrtSoftmaxInput = s.getGradientWrtInput(); 115 | d.Backward(gradWrtSoftmaxInput); 116 | gradWrtDenseInput = d.getGradientWrtInput(); 117 | r.Backward(gradWrtDenseInput); 118 | gradWrtReluInput = r.getGradientWrtInput(); 119 | c.Backward(gradWrtReluInput); 120 | gradWrtConvInput = c.getGradientWrtInput(); 121 | 122 | // Update weights. 123 | d.UpdateWeightsAndBiases(2, 0.1); 124 | c.UpdateFilterWeights(2, 0.1); 125 | 126 | #if DEBUG 127 | std::cout << DEBUG_PREFIX << "Epoch #" << epoch 128 | << "\tCross Entropy Loss: " << loss << std::endl; 129 | #endif 130 | loss = 0.0; 131 | } 132 | #if DEBUG 133 | // Let us have a look at the peridctions 134 | c.Forward(trainData[0], convOut); 135 | r.Forward(convOut, reluOut); 136 | d.Forward(reluOut, denseOut); 137 | s.Forward(denseOut, softmaxOut); 138 | std::cout << DEBUG_PREFIX << softmaxOut.t(); 139 | c.Forward(trainData[1], convOut); 140 | r.Forward(convOut, reluOut); 141 | d.Forward(reluOut, denseOut); 142 | s.Forward(denseOut, softmaxOut); 143 | std::cout << DEBUG_PREFIX << softmaxOut.t(); 144 | #endif 145 | 146 | } 147 | 148 | BOOST_AUTO_TEST_CASE(SmallANDNetwork) 149 | { 150 | std::vector trainData(4, arma::cube(2, 1, 1, arma::fill::zeros)); 151 | trainData[1].slice(0).col(0) = arma::vec({1, 0}); 152 | trainData[2].slice(0).col(0) = arma::vec({0, 1}); 153 | trainData[3].slice(0).col(0) = arma::vec({1, 1}); 154 | 155 | std::vector trainLabels(4); 156 | trainLabels[0] = {1, 0}; 157 | trainLabels[1] = {1, 0}; 158 | trainLabels[2] = {1, 0}; 159 | trainLabels[3] = {0, 1}; 160 | 161 | DenseLayer d(2, 1, 1, 2); 162 | SoftmaxLayer s(2); 163 | CrossEntropyLossLayer l(2); 164 | 165 | arma::vec dOut = arma::zeros(2); 166 | arma::vec sOut = arma::zeros(2); 167 | double loss = 0.0; 168 | 169 | for (size_t epoch = 0; epoch < 1000; epoch ++) 170 | { 171 | loss = 0.0; 172 | for (size_t i=0; i<4; i++) 173 | { 174 | d.Forward(trainData[i], dOut); 175 | s.Forward(dOut, sOut); 176 | loss += l.Forward(sOut, trainLabels[i]); 177 | 178 | std::cout << DEBUG_PREFIX << std::endl; 179 | std::cout << DEBUG_PREFIX << "Input: " << trainData[i].slice(0).col(0).t(); 180 | std::cout << DEBUG_PREFIX << "Target: " << trainLabels[i].t(); 181 | std::cout << DEBUG_PREFIX << "Predicted: " << sOut.t(); 182 | 183 | l.Backward(); 184 | arma::vec gradWrtPredictedDistribution = l.getGradientWrtPredictedDistribution(); 185 | s.Backward(gradWrtPredictedDistribution); 186 | arma::vec gradWrtSIn = s.getGradientWrtInput(); 187 | d.Backward(gradWrtSIn); 188 | arma::vec gradWrtDin = d.getGradientWrtInput(); 189 | arma::mat gradWrtWeights = d.getGradientWrtWeights(); 190 | 191 | std::cout << DEBUG_PREFIX << "Gradient wrt weights:" << std::endl; 192 | std::cout << gradWrtWeights << std::endl; 193 | } 194 | std::cout << DEBUG_PREFIX << "Weights before update:" << std::endl; 195 | std::cout << d.getWeights() << std::endl; 196 | std::cout << DEBUG_PREFIX << "Biases before update:" << std::endl; 197 | std::cout << d.getBiases() << std::endl; 198 | d.UpdateWeightsAndBiases(4, 0.1); 199 | std::cout << DEBUG_PREFIX << "Weights after update:" << std::endl; 200 | std::cout << d.getWeights() << std::endl; 201 | std::cout << DEBUG_PREFIX << "Biases after update:" << std::endl; 202 | std::cout << d.getBiases() << std::endl; 203 | std::cout << DEBUG_PREFIX << "Loss after epoch #" << epoch << ": " << loss << std::endl; 204 | } 205 | // Now we check the predictions 206 | for (size_t i=0; i<4; i++) 207 | { 208 | d.Forward(trainData[i], dOut); 209 | s.Forward(dOut, sOut); 210 | 211 | std::cout << DEBUG_PREFIX << std::endl; 212 | std::cout << DEBUG_PREFIX << "Input: " << arma::vectorise(trainData[i]).t(); 213 | std::cout << DEBUG_PREFIX << "Prediction: " << sOut.t(); 214 | std::cout << DEBUG_PREFIX << std::endl; 215 | } 216 | } 217 | 218 | BOOST_AUTO_TEST_CASE(MNISTSmallDenseNetworkTest) 219 | { 220 | MNISTData md("../data_small"); 221 | 222 | std::vector trainData = md.getTrainData(); 223 | std::vector trainLabels = md.getTrainLabels(); 224 | 225 | std::vector validationData = md.getValidationData(); 226 | std::vector validationLabels = md.getValidationLabels(); 227 | 228 | const size_t TRAINING_DATA_SIZE = trainData.size(); 229 | const size_t VALIDATION_DATA_SIZE = validationData.size(); 230 | 231 | std::cout << "Training Data size: " << TRAINING_DATA_SIZE << std::endl; 232 | std::cout << "Validation Data size: " << VALIDATION_DATA_SIZE << std::endl; 233 | 234 | DenseLayer d(28, 28, 1, 10); 235 | SoftmaxLayer s(10); 236 | CrossEntropyLossLayer l(10); 237 | 238 | arma::vec dOut = arma::zeros(10); 239 | arma::vec sOut = arma::zeros(10); 240 | 241 | arma::mat oldWts = arma::zeros(10, 28*28*1); 242 | arma::mat newWts = arma::zeros(10, 28*28*1); 243 | 244 | arma::vec oldDOut = arma::zeros(10); 245 | arma::vec newDOut = arma::zeros(10); 246 | 247 | arma::vec oldSOut = arma::zeros(10); 248 | arma::vec newSOut = arma::zeros(10); 249 | 250 | // Forward pass the first training example. 251 | for (size_t epoch = 0; epoch < 100; epoch++) 252 | { 253 | oldDOut = dOut; 254 | d.Forward(trainData[0], dOut); 255 | newDOut = dOut; 256 | BOOST_REQUIRE(!arma::approx_equal(oldDOut, newDOut, "absdiff", 0.0)); 257 | 258 | oldSOut = sOut; 259 | s.Forward(dOut, sOut); 260 | newSOut = sOut; 261 | BOOST_REQUIRE(!arma::approx_equal(oldSOut, newSOut, "absdiff", 0.0)); 262 | std::cout << DEBUG_PREFIX << "Old softmax output:" << std::endl; 263 | std::cout << oldSOut << std::endl; 264 | std::cout << DEBUG_PREFIX << "New softmax output:" << std::endl; 265 | std::cout << newSOut << std::endl; 266 | 267 | double loss = l.Forward(sOut, trainLabels[0]); 268 | 269 | // std::cout << DEBUG_PREFIX << "Input to dense layer:" << std::endl; 270 | // std::cout << trainData[0] << std::endl; 271 | 272 | // std::cout << DEBUG_PREFIX << "Weights of dense layer:" << std::endl; 273 | // std::cout << d.getWeights() << std::endl; 274 | 275 | // std::cout << DEBUG_PREFIX << "Output of dense layer:" << std::endl; 276 | // std::cout << sOut << std::endl; 277 | 278 | std::cout << DEBUG_PREFIX << "Loss: " << loss << std::endl; 279 | 280 | l.Backward(); 281 | arma::vec gradWrtPredictedDistribution = l.getGradientWrtPredictedDistribution(); 282 | 283 | // std::cout << DEBUG_PREFIX << "Gradient wrt predicted distribution:" << std::endl; 284 | // std::cout << gradWrtPredictedDistribution << std::endl; 285 | 286 | s.Backward(gradWrtPredictedDistribution); 287 | arma::vec gradWrtSIn = s.getGradientWrtInput(); 288 | 289 | // std::cout << DEBUG_PREFIX << "Gradient wrt softmax input:" << std::endl; 290 | // std::cout << gradWrtSIn << std::endl; 291 | 292 | d.Backward(gradWrtSIn); 293 | arma::mat gradWrtWts = d.getGradientWrtWeights(); 294 | 295 | // std::cout << DEBUG_PREFIX << "Gradient wrt dense weights:" << std::endl; 296 | // std::cout << gradWrtWts << std::endl; 297 | 298 | oldWts = d.getWeights(); 299 | d.UpdateWeightsAndBiases(1, 0.1); 300 | newWts = d.getWeights(); 301 | BOOST_REQUIRE(!arma::approx_equal(oldWts, newWts, "absdiff", 0.0)); 302 | } 303 | 304 | std::cout << DEBUG_PREFIX << std::endl; 305 | d.Forward(trainData[0], dOut); 306 | s.Forward(dOut, sOut); 307 | std::cout << DEBUG_PREFIX << "Actual output: " << trainLabels[0].t(); 308 | std::cout << DEBUG_PREFIX << "Predicted output: " << sOut.t(); 309 | } 310 | 311 | BOOST_AUTO_TEST_CASE(NowWereGettingSomewhereTest) 312 | { 313 | MNISTData md("../data_medium"); 314 | 315 | std::vector trainData = md.getTrainData(); 316 | std::vector trainLabels = md.getTrainLabels(); 317 | 318 | std::vector validationData = md.getValidationData(); 319 | std::vector validationLabels = md.getValidationLabels(); 320 | 321 | std::cout << DEBUG_PREFIX << "Size of training set: " << trainData.size() << std::endl; 322 | BOOST_REQUIRE_EQUAL(trainData.size(), trainLabels.size()); 323 | std::cout << DEBUG_PREFIX << "Size of validation set: " << validationData.size() << std::endl; 324 | BOOST_REQUIRE_EQUAL(validationData.size(), validationLabels.size()); 325 | 326 | // Define the network 327 | // conv - relu - maxpool - dense - softmax - loss 328 | 329 | ConvolutionLayer c( 330 | 28, 331 | 28, 332 | 1, 333 | 7, 334 | 7, 335 | 1, 336 | 1, 337 | 3); 338 | // Output is 22 x 22 x 3 339 | ReLULayer r( 340 | 22, 341 | 22, 342 | 3); 343 | // Output is 22 x 22 x 3 344 | MaxPoolingLayer m( 345 | 22, 346 | 22, 347 | 3, 348 | 2, 349 | 2, 350 | 2, 351 | 2); 352 | // Output is 11 x 11 x 3 353 | DenseLayer d( 354 | 11, 355 | 11, 356 | 3, 357 | 10); 358 | // Output is a vector of size 10 359 | SoftmaxLayer s(10); 360 | // Output is a vector of size 10 361 | CrossEntropyLossLayer l(10); 362 | 363 | arma::cube cOut = arma::zeros(22, 22, 3); 364 | arma::cube rOut = arma::zeros(22, 22, 3); 365 | arma::cube mOut = arma::zeros(11, 11, 3); 366 | arma::vec dOut = arma::zeros(10); 367 | arma::vec sOut = arma::zeros(10); 368 | double loss = 0.0; 369 | // We'll use stochastic gradient descent 370 | for (size_t epoch = 0; epoch < 10; epoch++) 371 | { 372 | double averageLoss = 0.0; 373 | for(size_t i=0; i 5 | #include "../layers/max_pooling_layer.hpp" 6 | 7 | BOOST_AUTO_TEST_CASE(ConstructorTest) 8 | { 9 | MaxPoolingLayer mp( 10 | 7, // Input height. 11 | 5, // Input width. 12 | 4, // Input depth. 13 | 5, // Pooling window height. 14 | 3, // Pooling window width. 15 | 2, // Vertical stride. 16 | 2 // Horizontal stride. 17 | ); 18 | } 19 | 20 | BOOST_AUTO_TEST_CASE(ForwardPassTest) 21 | { 22 | MaxPoolingLayer mp( 23 | 7, // Input height. 24 | 5, // Input width. 25 | 4, // Input depth. 26 | 5, // Pooling window height. 27 | 3, // Pooling window width. 28 | 2, // Vertical stride. 29 | 2 // Horizontal stride. 30 | ); 31 | 32 | arma::cube input(7, 5, 4, arma::fill::randn); 33 | arma::cube output; 34 | 35 | mp.Forward(input, output); 36 | } 37 | 38 | 39 | BOOST_AUTO_TEST_CASE(BackwardPassTest) 40 | { 41 | MaxPoolingLayer mp( 42 | 7, // Input height. 43 | 5, // Input width. 44 | 4, // Input depth. 45 | 5, // Pooling window height. 46 | 3, // Pooling window width. 47 | 2, // Vertical stride. 48 | 2 // Horizontal stride. 49 | ); 50 | 51 | arma::cube input(7, 5, 4, arma::fill::randn); 52 | arma::cube output; 53 | 54 | mp.Forward(input, output); 55 | 56 | // Again, for now we loet the loss function be the sum of all output 57 | // activations. Therefore, the upstream gradient is all ones. 58 | arma::cube upstreamGradient = arma::ones(size(output)); 59 | 60 | mp.Backward(upstreamGradient); 61 | 62 | arma::cube gradientWrtInput = mp.getGradientWrtInput(); 63 | 64 | arma::cube approxGradientWrtInput = arma::zeros(arma::size(input)); 65 | 66 | double disturbance = 0.5e-5; 67 | for (size_t i=0; i 6 | 7 | BOOST_AUTO_TEST_CASE(ConstructorTest) 8 | { 9 | MNISTData md("../data", 0.5); 10 | } 11 | 12 | -------------------------------------------------------------------------------- /tests/relu_layer_test.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE ReLULayerTests 2 | #define BOOST_TEST_DYN_LINK 3 | 4 | #include 5 | #include "../layers/relu_layer.hpp" 6 | 7 | BOOST_AUTO_TEST_CASE(ContructorTest) 8 | { 9 | ReLULayer r(5, 7, 3); 10 | } 11 | 12 | BOOST_AUTO_TEST_CASE(ForwardPassTest) 13 | { 14 | ReLULayer r(5, 7, 3); 15 | arma::cube input(5, 7, 3, arma::fill::randn); 16 | arma::cube output; 17 | 18 | r.Forward(input, output); 19 | BOOST_REQUIRE(arma::size(input) == arma::size(output)); 20 | } 21 | 22 | BOOST_AUTO_TEST_CASE(BackwardPassTest) 23 | { 24 | ReLULayer r(5, 7, 3); 25 | arma::cube input(5, 7, 3, arma::fill::randn); 26 | arma::cube output; 27 | 28 | r.Forward(input, output); 29 | r.Backward(arma::ones(arma::size(output))); 30 | 31 | arma::cube gradientWrtInput = r.getGradientWrtInput(); 32 | 33 | arma::cube approxGradientWrtInput = arma::zeros(arma::size(input)); 34 | 35 | double disturbance = 0.5e-5; 36 | for (size_t i=0; i 5 | #include "../layers/softmax_layer.hpp" 6 | 7 | BOOST_AUTO_TEST_CASE(ForwardPassTest) 8 | { 9 | SoftmaxLayer s(3); 10 | arma::vec input(3, arma::fill::randn); 11 | arma::vec output; 12 | 13 | s.Forward(input, output); 14 | } 15 | 16 | BOOST_AUTO_TEST_CASE(BackwardPassTest) 17 | { 18 | SoftmaxLayer s(3); 19 | arma::vec input(3, arma::fill::randn); 20 | arma::vec output; 21 | 22 | s.Forward(input, output); 23 | 24 | arma::vec upstreamGradient = arma::ones(3); 25 | s.Backward(upstreamGradient); 26 | 27 | arma::vec gradWrtInput = s.getGradientWrtInput(); 28 | 29 | arma::vec approxGradWrtInput = arma::zeros(3); 30 | 31 | double disturbance = 0.5e-5; 32 | for (size_t i=0; i 5 | #include 6 | #include 7 | #include 8 | 9 | class MNISTData 10 | { 11 | public: 12 | MNISTData(std::string dataDir, double splitRatio = 0.9) 13 | { 14 | assert(splitRatio <= 1 && splitRatio >= 0); 15 | this->dataDir = dataDir; 16 | trainFile = dataDir + "/train.csv"; 17 | testFile = dataDir + "/test.csv"; 18 | 19 | arma::mat trainDataRaw; 20 | 21 | trainDataRaw.load(trainFile, arma::csv_ascii); 22 | trainDataRaw = trainDataRaw.submat(1, 0, trainDataRaw.n_rows - 1, trainDataRaw.n_cols - 1); 23 | 24 | int numExamples = trainDataRaw.n_rows; 25 | 26 | std::vector trainDataAll; 27 | std::vector trainLabelsAll; 28 | for (size_t idx=0; idx(trainDataAll.begin(), 43 | trainDataAll.begin() + numExamples*splitRatio); 44 | trainLabels = std::vector(trainLabelsAll.begin(), 45 | trainLabelsAll.begin() + numExamples*splitRatio); 46 | 47 | validationData = std::vector(trainDataAll.begin() + numExamples*splitRatio, 48 | trainDataAll.end()); 49 | validationLabels = std::vector(trainLabelsAll.begin() + numExamples*splitRatio, 50 | trainLabelsAll.end()); 51 | 52 | arma::mat testDataRaw; 53 | testDataRaw.load(testFile, arma::csv_ascii); 54 | testDataRaw = testDataRaw.submat(1, 0, testDataRaw.n_rows - 1, testDataRaw.n_cols - 1); 55 | for (size_t idx=0; idx getTrainData() { return trainData; } 66 | 67 | std::vector getValidationData() { return validationData; } 68 | 69 | std::vector getTestData() { return testData; } 70 | 71 | std::vector getTrainLabels() { return trainLabels; } 72 | 73 | std::vector getValidationLabels() { return validationLabels; } 74 | 75 | private: 76 | std::string dataDir; 77 | std::string trainFile; 78 | std::string testFile; 79 | 80 | std::vector trainData; 81 | std::vector validationData; 82 | std::vector testData; 83 | 84 | std::vector trainLabels; 85 | std::vector validationLabels; 86 | }; 87 | 88 | #endif 89 | --------------------------------------------------------------------------------