├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── examples ├── BasicMLP.cpp ├── IrisTest.cpp └── data │ └── iris_data.csv ├── nn ├── Net.h ├── layers │ ├── Dense.h │ ├── Layer.h │ ├── Layers.h │ ├── Relu.h │ └── Softmax.h ├── loss │ ├── CrossEntropy.h │ ├── HuberLoss.h │ ├── Losses.h │ └── MeanSquaredError.h ├── optimizers │ ├── AdamImpl.h │ ├── OptimizerImpl.h │ ├── Optimizers.h │ └── StochasticGradientDescentImpl.h └── utils │ └── WeightInitializers.h └── tests ├── LossTests.cpp └── NetTests.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | cmake-build-debug 3 | build -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0) 2 | project(nn_cpp CXX) 3 | include_directories(nn) 4 | 5 | set(CMAKE_CXX_STANDARD 11) 6 | option(NNCPP_BUILD_TESTS "Whether to build the tests" ON) 7 | option(NNCPP_BUILD_EXAMPLES "Whether to build examples" ON) 8 | 9 | file(GLOB_RECURSE nn_sources nn/*.h) 10 | add_library(nn_cpp ${nn_sources}) 11 | set_target_properties(nn_cpp PROPERTIES LINKER_LANGUAGE CXX) 12 | target_include_directories(nn_cpp PUBLIC nn) 13 | 14 | find_package(Eigen3 REQUIRED) 15 | target_include_directories(nn_cpp PUBLIC ${EIGEN3_INCLUDE_DIR}) 16 | 17 | 18 | if (NNCPP_BUILD_TESTS) 19 | find_package(Boost COMPONENTS unit_test_framework) 20 | 21 | if (Boost_FOUND) 22 | enable_testing() 23 | 24 | add_executable(neural_net_test tests/NetTests.cpp) 25 | target_link_libraries(neural_net_test ${Boost_LIBRARIES} nn_cpp) 26 | add_test(NAME neural_net_test WORKING_DIRECTORY ${CMAKE_BINARY_DIR} COMMAND neural_net_test) 27 | 28 | add_executable(loss_test tests/LossTests.cpp) 29 | target_link_libraries(loss_test ${Boost_LIBRARIES} nn_cpp) 30 | add_test(NAME loss_test WORKING_DIRECTORY ${CMAKE_BINARY_DIR} COMMAND loss_test) 31 | endif() 32 | endif() 33 | 34 | if (NNCPP_BUILD_EXAMPLES) 35 | 36 | add_executable(basic_mlp examples/BasicMLP.cpp) 37 | target_link_libraries(basic_mlp nn_cpp) 38 | 39 | add_executable(iris_test examples/IrisTest.cpp) 40 | target_link_libraries(iris_test nn_cpp) 41 | endif() -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 Ben Caine 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### nn_cpp - Minimalistic C++11 header only Neural Network Library 2 | 3 | Currently under active development, with several minimally viable product features missing (see Todo subsection below) 4 | 5 | We make heavy use of the [Eigen::Tensor](https://bitbucket.org/eigen/eigen/src/default/unsupported/Eigen/CXX11/src/Tensor/README.md) library for computation. 6 | 7 | The goal is to make a simple library to inline small neural networks in modern C++ code without the overhead of a major framework. 8 | 9 | ### Usage Example 10 | 11 | ```c++ 12 | 13 | nn::Net net; 14 | net.add(new nn::Dense(batchSize, inputSize, numHiddenNodes, useBias)); 15 | net.add(new nn::Relu()); 16 | net.add(new nn::Dense(batchSize, numHiddenNodes, outputSize, useBias)); 17 | net.add(new nn::Softmax()); 18 | 19 | nn::CrossEntropyLoss lossFunc; 20 | net.registerOptimizer(new nn::Adam(learningRate)); 21 | 22 | int numEpoch = 100; 23 | for (int currentEpoch = 0; currentEpoch < numEpoch; ++currentEpoch) { 24 | auto result = net.forward<2, 2>(input); 25 | auto loss = lossFunc.loss(result, labels); 26 | 27 | net.backward(lossFunc.backward(result, labels)); 28 | net.step(); 29 | 30 | std::cout << "Epoch: " << currentEpoch << " Loss: " << loss << std::endl; 31 | } 32 | ``` 33 | 34 | ### Dependencies 35 | 36 | ``` 37 | Eigen 3.3.4 38 | Boost 39 | ``` 40 | 41 | ### Todo 42 | Currently a ton of basic functionality that makes it worth using is missing 43 | 44 | - Saving and loading of weights 45 | - Support multi-core CPU usage, and GPU usage 46 | - Additional activation layers 47 | - Additional loss layers 48 | - Convolutions 49 | - Alternatives to gradient descent (SGD, Adam, etc) 50 | - Memory optimizations when test mode only 51 | - Add proper logging 52 | 53 | 54 | ### Building 55 | Assuming you have the above dependencies installed there is nothing to build if you simply want to use the library. Simply include nn/Net.h 56 | 57 | If you want to run the tests, do the following 58 | 59 | ```bash 60 | mkdir build 61 | cd build 62 | cmake -D NNCPP_BUILD_TESTS=ON .. 63 | make test 64 | ``` 65 | 66 | ### Examples 67 | Currently, there are two very simple examples of training an MLP on some data. 68 | 69 | [BasicMLP.cpp](./examples/BasicMLP.cpp) - A two class problem of classifying separable 2D uniformly distributed data (donut vs donut hole) 70 | 71 | [IrisTest.cpp](./examples/IrisTest.cpp) - Classifying the Iris dataset with a simple MLP. 72 | 73 | These are built if you do the following: 74 | 75 | ```bash 76 | mkdir build 77 | cd build 78 | cmake -D NNCPP_BUILD_EXAMPLES=ON .. 79 | make 80 | ``` 81 | 82 | To run, simply run them via the binary. 83 | 84 | ```bash 85 | cd build 86 | ./bin/iris_test 87 | 88 | # Or, for the basic mlp test 89 | 90 | ./bin/basic_mlp 91 | ``` -------------------------------------------------------------------------------- /examples/BasicMLP.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file BasicMLP.cpp 3 | * 4 | * @breif A very basic MLP to test training a network 5 | * 6 | * @date 12/22/17 7 | * @author Ben Caine 8 | */ 9 | #include "../nn/Net.h" 10 | #include "../nn/loss/CrossEntropy.h" 11 | #include "../nn/loss/MeanSquaredError.h" 12 | #include 13 | #include 14 | #include 15 | 16 | struct ToyLabeledData { 17 | std::vector> data; 18 | std::vector labels; 19 | 20 | int getSize() const { 21 | assert (labels.size() == data.size()); 22 | return labels.size(); 23 | } 24 | }; 25 | 26 | ToyLabeledData generateCircleData(int numInnerPoints, int numOuterPoints) { 27 | float innerRadius = 1.0; 28 | float outerRadius = 3.0; 29 | int numTotalPoints= numInnerPoints + numOuterPoints; 30 | 31 | ToyLabeledData dataset; 32 | 33 | // Random generator 34 | std::random_device rd; 35 | std::mt19937 rng(rd()); 36 | std::uniform_real_distribution<> uniformDist(-innerRadius, innerRadius); 37 | 38 | // Create inner circle data (class 0) 39 | while (dataset.data.size() < numInnerPoints) { 40 | auto x = static_cast(uniformDist(rng)); 41 | auto y = static_cast(uniformDist(rng)); 42 | dataset.data.emplace_back(std::pair{x, y}); 43 | dataset.labels.push_back(0); 44 | } 45 | 46 | // Update uniform dist to be from -outer to outer 47 | uniformDist = std::uniform_real_distribution<>(-outerRadius, outerRadius); 48 | auto getOuterVal = [&]() { 49 | float val = 0; 50 | // Reject ones inside the inner circle 51 | while (std::abs(val) < innerRadius) { 52 | val = static_cast(uniformDist(rng)); 53 | } 54 | return val; 55 | }; 56 | 57 | // Fill the rest with outer circle points 58 | while (dataset.data.size() < numTotalPoints) { 59 | float x = getOuterVal(); 60 | float y = getOuterVal(); 61 | 62 | dataset.data.emplace_back(std::pair{x, y}); 63 | dataset.labels.push_back(1); 64 | } 65 | 66 | return dataset; 67 | }; 68 | 69 | void writeDataset(const ToyLabeledData &data, const std::string &dataPath, const std::string &labelPath) { 70 | std::ofstream dataFile; 71 | std::ofstream labelFile; 72 | 73 | dataFile.open(dataPath); 74 | labelFile.open(labelPath); 75 | 76 | for (unsigned int ii = 0; ii < data.getSize(); ++ii) { 77 | dataFile << data.data[ii].first << ", " << data.data[ii].second << "\n"; 78 | labelFile << data.labels[ii] << "\n"; 79 | } 80 | 81 | dataFile.close(); 82 | labelFile.close(); 83 | } 84 | 85 | int main() { 86 | int firstClassSize = 50; 87 | int secondClassSize = 50; 88 | int batchSize = firstClassSize + secondClassSize; 89 | int inputSize = 2; 90 | int numClasses = 2; 91 | 92 | Eigen::Tensor inputData(batchSize, inputSize); 93 | Eigen::Tensor labels(batchSize, inputSize); 94 | inputData.setZero(); 95 | labels.setZero(); 96 | 97 | auto dataset = generateCircleData(firstClassSize, secondClassSize); 98 | int datasetSize = dataset.getSize(); 99 | for (unsigned int ii = 0; ii < datasetSize; ++ii) { 100 | inputData(ii, 0) = dataset.data[ii].first; 101 | inputData(ii, 1) = dataset.data[ii].second; 102 | // Set up one hot encoding 103 | labels(ii, 0) = static_cast(dataset.labels[ii] == 0); 104 | labels(ii, 1) = static_cast(dataset.labels[ii] == 1); 105 | } 106 | 107 | int numHiddenNodes = 10; 108 | bool useBias = true; 109 | nn::Net net; 110 | net.add(new nn::Dense<>(batchSize, inputSize, numHiddenNodes, useBias)); 111 | net.add(new nn::Relu<>()); 112 | net.add(new nn::Dense<>(batchSize, numHiddenNodes, numHiddenNodes, useBias)); 113 | net.add(new nn::Relu<>()); 114 | net.add(new nn::Dense<>(batchSize, numHiddenNodes, numClasses, useBias)); 115 | net.add(new nn::Softmax<>()); 116 | 117 | nn::CrossEntropyLoss lossFunc; 118 | net.registerOptimizer(new nn::Adam(0.01)); 119 | 120 | int numEpoch = 100; 121 | for (unsigned int ii = 0; ii < numEpoch; ++ii) { 122 | // Forward 123 | auto result = net.forward<2, 2>(inputData); 124 | auto loss = lossFunc.loss(result, labels); 125 | auto accuracy = lossFunc.accuracy(result, labels); 126 | std::cout << "Epoch: " << ii << " Current loss: " << loss << " accuracy: " << accuracy << std::endl; 127 | 128 | // Backprop 129 | net.backward(lossFunc.backward(result, labels)); 130 | net.step(); 131 | } 132 | return 0; 133 | } 134 | 135 | -------------------------------------------------------------------------------- /examples/IrisTest.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file IrisTest.cpp 3 | * 4 | * @breif A very basic MLP to classify Iris 5 | * 6 | * @date 12/26/17 7 | * @author Ben Caine 8 | */ 9 | 10 | #include "../nn/Net.h" 11 | #include "../nn/loss/CrossEntropy.h" 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | 18 | const std::map IRIS_TYPE_TO_INT { 19 | {"Iris-setosa", 0}, 20 | {"Iris-versicolor", 1}, 21 | {"Iris-virginica", 2} 22 | }; 23 | 24 | struct IrisDataset { 25 | std::vector> data; 26 | std::vector labels; 27 | }; 28 | 29 | IrisDataset loadIrisDataset(const std::string &path = "../examples/data/iris_data.csv") { 30 | IrisDataset dataset; 31 | 32 | std::ifstream irisFile(path); 33 | std::string line; 34 | while (std::getline(irisFile, line, '\n')) { 35 | std::vector values; 36 | 37 | // TODO: Replace boost or link properly in CMake. Can't figure out canoical name 38 | // for algorithm/string 39 | boost::split(values, line, [](char c) { 40 | return c == ','; 41 | }); 42 | 43 | if (values.size() < 5) { 44 | std::cout << "Found line with less than five elements, skipping" << std::endl; 45 | continue; 46 | } 47 | 48 | float sepalLength = std::stof(values[0]); 49 | float sepalWidth = std::stof(values[1]); 50 | float petalLength = std::stof(values[2]); 51 | float petalWidth = std::stof(values[3]); 52 | std::string labelName = values[4]; 53 | 54 | auto labelIter = IRIS_TYPE_TO_INT.find(labelName); 55 | 56 | if (labelIter == IRIS_TYPE_TO_INT.end()) { 57 | std::cerr << "Unknown Iris type of: " << labelName << " please check dataset." << std::endl; 58 | exit(-1); 59 | } 60 | int labelInt = labelIter->second; 61 | dataset.data.push_back({sepalLength, sepalWidth, petalLength, petalWidth}); 62 | dataset.labels.push_back(labelInt); 63 | } 64 | 65 | return dataset; 66 | } 67 | 68 | int main() { 69 | auto dataset = loadIrisDataset(); 70 | 71 | // TODO: Split into training and test 72 | int batchSize = dataset.labels.size(); 73 | int numFeatures = dataset.data[0].size(); 74 | int numClasses = *std::max_element(dataset.labels.begin(), dataset.labels.end()) + 1; 75 | 76 | Eigen::Tensor input(batchSize, numFeatures); 77 | Eigen::Tensor labels(batchSize, numClasses); 78 | input.setZero(); 79 | labels.setZero(); 80 | 81 | for (unsigned int ii = 0; ii < batchSize; ++ii) { 82 | for (unsigned int feature = 0; feature < numFeatures; ++feature) { 83 | input(ii, feature) = dataset.data[ii][feature]; 84 | } 85 | 86 | labels(ii, dataset.labels[ii]) = 1.0; 87 | } 88 | 89 | int numHiddenNodes = 20; 90 | bool useBias = true; 91 | 92 | nn::Net net; 93 | net.add(new nn::Dense<>(batchSize, numFeatures, numHiddenNodes, useBias)); 94 | net.add(new nn::Relu<>()); 95 | net.add(new nn::Dense<>(batchSize, numHiddenNodes, numHiddenNodes, useBias)); 96 | net.add(new nn::Relu<>()); 97 | net.add(new nn::Dense<>(batchSize, numHiddenNodes, numClasses, useBias)); 98 | net.add(new nn::Softmax<>()); 99 | 100 | nn::CrossEntropyLoss lossFunc; 101 | net.registerOptimizer(new nn::Adam(0.01)); 102 | 103 | int numEpoch = 250; 104 | for (unsigned int ii = 0; ii < numEpoch; ++ii) { 105 | auto result = net.forward<2, 2>(input); 106 | 107 | float loss = lossFunc.loss(result, labels); 108 | float accuracy = lossFunc.accuracy(result, labels); 109 | std::cout << std::setprecision(5); 110 | std::cout << "Epoch: " << ii << " loss: " << loss << " accuracy: " << accuracy << std::endl; 111 | 112 | auto lossBack = lossFunc.backward(result, labels); 113 | net.backward(lossBack); 114 | net.step(); 115 | } 116 | 117 | return 0; 118 | } 119 | 120 | -------------------------------------------------------------------------------- /examples/data/iris_data.csv: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 4.7,3.2,1.3,0.2,Iris-setosa 4 | 4.6,3.1,1.5,0.2,Iris-setosa 5 | 5.0,3.6,1.4,0.2,Iris-setosa 6 | 5.4,3.9,1.7,0.4,Iris-setosa 7 | 4.6,3.4,1.4,0.3,Iris-setosa 8 | 5.0,3.4,1.5,0.2,Iris-setosa 9 | 4.4,2.9,1.4,0.2,Iris-setosa 10 | 4.9,3.1,1.5,0.1,Iris-setosa 11 | 5.4,3.7,1.5,0.2,Iris-setosa 12 | 4.8,3.4,1.6,0.2,Iris-setosa 13 | 4.8,3.0,1.4,0.1,Iris-setosa 14 | 4.3,3.0,1.1,0.1,Iris-setosa 15 | 5.8,4.0,1.2,0.2,Iris-setosa 16 | 5.7,4.4,1.5,0.4,Iris-setosa 17 | 5.4,3.9,1.3,0.4,Iris-setosa 18 | 5.1,3.5,1.4,0.3,Iris-setosa 19 | 5.7,3.8,1.7,0.3,Iris-setosa 20 | 5.1,3.8,1.5,0.3,Iris-setosa 21 | 5.4,3.4,1.7,0.2,Iris-setosa 22 | 5.1,3.7,1.5,0.4,Iris-setosa 23 | 4.6,3.6,1.0,0.2,Iris-setosa 24 | 5.1,3.3,1.7,0.5,Iris-setosa 25 | 4.8,3.4,1.9,0.2,Iris-setosa 26 | 5.0,3.0,1.6,0.2,Iris-setosa 27 | 5.0,3.4,1.6,0.4,Iris-setosa 28 | 5.2,3.5,1.5,0.2,Iris-setosa 29 | 5.2,3.4,1.4,0.2,Iris-setosa 30 | 4.7,3.2,1.6,0.2,Iris-setosa 31 | 4.8,3.1,1.6,0.2,Iris-setosa 32 | 5.4,3.4,1.5,0.4,Iris-setosa 33 | 5.2,4.1,1.5,0.1,Iris-setosa 34 | 5.5,4.2,1.4,0.2,Iris-setosa 35 | 4.9,3.1,1.5,0.1,Iris-setosa 36 | 5.0,3.2,1.2,0.2,Iris-setosa 37 | 5.5,3.5,1.3,0.2,Iris-setosa 38 | 4.9,3.1,1.5,0.1,Iris-setosa 39 | 4.4,3.0,1.3,0.2,Iris-setosa 40 | 5.1,3.4,1.5,0.2,Iris-setosa 41 | 5.0,3.5,1.3,0.3,Iris-setosa 42 | 4.5,2.3,1.3,0.3,Iris-setosa 43 | 4.4,3.2,1.3,0.2,Iris-setosa 44 | 5.0,3.5,1.6,0.6,Iris-setosa 45 | 5.1,3.8,1.9,0.4,Iris-setosa 46 | 4.8,3.0,1.4,0.3,Iris-setosa 47 | 5.1,3.8,1.6,0.2,Iris-setosa 48 | 4.6,3.2,1.4,0.2,Iris-setosa 49 | 5.3,3.7,1.5,0.2,Iris-setosa 50 | 5.0,3.3,1.4,0.2,Iris-setosa 51 | 7.0,3.2,4.7,1.4,Iris-versicolor 52 | 6.4,3.2,4.5,1.5,Iris-versicolor 53 | 6.9,3.1,4.9,1.5,Iris-versicolor 54 | 5.5,2.3,4.0,1.3,Iris-versicolor 55 | 6.5,2.8,4.6,1.5,Iris-versicolor 56 | 5.7,2.8,4.5,1.3,Iris-versicolor 57 | 6.3,3.3,4.7,1.6,Iris-versicolor 58 | 4.9,2.4,3.3,1.0,Iris-versicolor 59 | 6.6,2.9,4.6,1.3,Iris-versicolor 60 | 5.2,2.7,3.9,1.4,Iris-versicolor 61 | 5.0,2.0,3.5,1.0,Iris-versicolor 62 | 5.9,3.0,4.2,1.5,Iris-versicolor 63 | 6.0,2.2,4.0,1.0,Iris-versicolor 64 | 6.1,2.9,4.7,1.4,Iris-versicolor 65 | 5.6,2.9,3.6,1.3,Iris-versicolor 66 | 6.7,3.1,4.4,1.4,Iris-versicolor 67 | 5.6,3.0,4.5,1.5,Iris-versicolor 68 | 5.8,2.7,4.1,1.0,Iris-versicolor 69 | 6.2,2.2,4.5,1.5,Iris-versicolor 70 | 5.6,2.5,3.9,1.1,Iris-versicolor 71 | 5.9,3.2,4.8,1.8,Iris-versicolor 72 | 6.1,2.8,4.0,1.3,Iris-versicolor 73 | 6.3,2.5,4.9,1.5,Iris-versicolor 74 | 6.1,2.8,4.7,1.2,Iris-versicolor 75 | 6.4,2.9,4.3,1.3,Iris-versicolor 76 | 6.6,3.0,4.4,1.4,Iris-versicolor 77 | 6.8,2.8,4.8,1.4,Iris-versicolor 78 | 6.7,3.0,5.0,1.7,Iris-versicolor 79 | 6.0,2.9,4.5,1.5,Iris-versicolor 80 | 5.7,2.6,3.5,1.0,Iris-versicolor 81 | 5.5,2.4,3.8,1.1,Iris-versicolor 82 | 5.5,2.4,3.7,1.0,Iris-versicolor 83 | 5.8,2.7,3.9,1.2,Iris-versicolor 84 | 6.0,2.7,5.1,1.6,Iris-versicolor 85 | 5.4,3.0,4.5,1.5,Iris-versicolor 86 | 6.0,3.4,4.5,1.6,Iris-versicolor 87 | 6.7,3.1,4.7,1.5,Iris-versicolor 88 | 6.3,2.3,4.4,1.3,Iris-versicolor 89 | 5.6,3.0,4.1,1.3,Iris-versicolor 90 | 5.5,2.5,4.0,1.3,Iris-versicolor 91 | 5.5,2.6,4.4,1.2,Iris-versicolor 92 | 6.1,3.0,4.6,1.4,Iris-versicolor 93 | 5.8,2.6,4.0,1.2,Iris-versicolor 94 | 5.0,2.3,3.3,1.0,Iris-versicolor 95 | 5.6,2.7,4.2,1.3,Iris-versicolor 96 | 5.7,3.0,4.2,1.2,Iris-versicolor 97 | 5.7,2.9,4.2,1.3,Iris-versicolor 98 | 6.2,2.9,4.3,1.3,Iris-versicolor 99 | 5.1,2.5,3.0,1.1,Iris-versicolor 100 | 5.7,2.8,4.1,1.3,Iris-versicolor 101 | 6.3,3.3,6.0,2.5,Iris-virginica 102 | 5.8,2.7,5.1,1.9,Iris-virginica 103 | 7.1,3.0,5.9,2.1,Iris-virginica 104 | 6.3,2.9,5.6,1.8,Iris-virginica 105 | 6.5,3.0,5.8,2.2,Iris-virginica 106 | 7.6,3.0,6.6,2.1,Iris-virginica 107 | 4.9,2.5,4.5,1.7,Iris-virginica 108 | 7.3,2.9,6.3,1.8,Iris-virginica 109 | 6.7,2.5,5.8,1.8,Iris-virginica 110 | 7.2,3.6,6.1,2.5,Iris-virginica 111 | 6.5,3.2,5.1,2.0,Iris-virginica 112 | 6.4,2.7,5.3,1.9,Iris-virginica 113 | 6.8,3.0,5.5,2.1,Iris-virginica 114 | 5.7,2.5,5.0,2.0,Iris-virginica 115 | 5.8,2.8,5.1,2.4,Iris-virginica 116 | 6.4,3.2,5.3,2.3,Iris-virginica 117 | 6.5,3.0,5.5,1.8,Iris-virginica 118 | 7.7,3.8,6.7,2.2,Iris-virginica 119 | 7.7,2.6,6.9,2.3,Iris-virginica 120 | 6.0,2.2,5.0,1.5,Iris-virginica 121 | 6.9,3.2,5.7,2.3,Iris-virginica 122 | 5.6,2.8,4.9,2.0,Iris-virginica 123 | 7.7,2.8,6.7,2.0,Iris-virginica 124 | 6.3,2.7,4.9,1.8,Iris-virginica 125 | 6.7,3.3,5.7,2.1,Iris-virginica 126 | 7.2,3.2,6.0,1.8,Iris-virginica 127 | 6.2,2.8,4.8,1.8,Iris-virginica 128 | 6.1,3.0,4.9,1.8,Iris-virginica 129 | 6.4,2.8,5.6,2.1,Iris-virginica 130 | 7.2,3.0,5.8,1.6,Iris-virginica 131 | 7.4,2.8,6.1,1.9,Iris-virginica 132 | 7.9,3.8,6.4,2.0,Iris-virginica 133 | 6.4,2.8,5.6,2.2,Iris-virginica 134 | 6.3,2.8,5.1,1.5,Iris-virginica 135 | 6.1,2.6,5.6,1.4,Iris-virginica 136 | 7.7,3.0,6.1,2.3,Iris-virginica 137 | 6.3,3.4,5.6,2.4,Iris-virginica 138 | 6.4,3.1,5.5,1.8,Iris-virginica 139 | 6.0,3.0,4.8,1.8,Iris-virginica 140 | 6.9,3.1,5.4,2.1,Iris-virginica 141 | 6.7,3.1,5.6,2.4,Iris-virginica 142 | 6.9,3.1,5.1,2.3,Iris-virginica 143 | 5.8,2.7,5.1,1.9,Iris-virginica 144 | 6.8,3.2,5.9,2.3,Iris-virginica 145 | 6.7,3.3,5.7,2.5,Iris-virginica 146 | 6.7,3.0,5.2,2.3,Iris-virginica 147 | 6.3,2.5,5.0,1.9,Iris-virginica 148 | 6.5,3.0,5.2,2.0,Iris-virginica 149 | 6.2,3.4,5.4,2.3,Iris-virginica 150 | 5.9,3.0,5.1,1.8,Iris-virginica 151 | 152 | -------------------------------------------------------------------------------- /nn/Net.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Net.h 3 | * 4 | * @breif A basic Net class that provides an interface to a neural network 5 | * 6 | * @date 12/17/17 7 | * @author Ben Caine 8 | */ 9 | 10 | #ifndef NN_CPP_NET_H 11 | #define NN_CPP_NET_H 12 | 13 | #include "layers/Layers.h" 14 | #include "loss/Losses.h" 15 | #include "optimizers/Optimizers.h" 16 | 17 | #include 18 | #include 19 | 20 | 21 | namespace nn { 22 | 23 | /** 24 | * @brief A neural network class 25 | */ 26 | template 27 | class Net { 28 | public: 29 | /** 30 | * @brief Init a neural network wrapper 31 | */ 32 | Net() = default; 33 | 34 | template 35 | Eigen::Tensor forward(Eigen::Tensor input) { 36 | if (m_layers.empty()) { 37 | std::cerr << "No layers specified" << std::endl; 38 | return {}; 39 | } 40 | 41 | // TODO: How to ensure each forward call returns a lazily evaluated expression instead of a Tensor 42 | // That way we can use this to autogenerate the evaluation chain for efficiency. 43 | // Right now it seems to evaluate each layer individually. 44 | auto currentInput = input; 45 | for (const auto &layer : m_layers) { 46 | currentInput = layer->forward(currentInput); 47 | } 48 | return currentInput; 49 | } 50 | 51 | template 52 | void backward(Eigen::Tensor input) { 53 | if (!m_hasOptimizer) { 54 | std::cerr << "No registered optimizer" << std::endl; 55 | return; 56 | } 57 | 58 | if (m_layers.empty()) { 59 | std::cerr << "No layers specified" << std::endl; 60 | return; 61 | } 62 | 63 | auto accumulatedGrad = input; 64 | for (auto rit = m_layers.rbegin(); rit != m_layers.rend(); ++rit) { 65 | accumulatedGrad = (*rit)->backward(accumulatedGrad); 66 | } 67 | } 68 | 69 | void registerOptimizer(nn::StochasticGradientDescent *optimizer) { 70 | m_hasOptimizer = true; 71 | // TODO: Pulled this out of a private member var cause I can't supertype 72 | std::shared_ptr> optimizerPtr(optimizer); 73 | for (auto &layer : m_layers) { 74 | layer->registerOptimizer(optimizerPtr); 75 | } 76 | } 77 | 78 | void registerOptimizer(nn::Adam *optimizer) { 79 | m_hasOptimizer = true; 80 | // TODO: Pulled this out of a private member var cause I can't supertype 81 | std::shared_ptr> optimizerPtr(optimizer); 82 | for (auto &layer : m_layers) { 83 | layer->registerOptimizer(optimizerPtr); 84 | } 85 | } 86 | 87 | /** 88 | * @brief Update weights for each layer 89 | */ 90 | void step() { 91 | for (auto &layer : m_layers) { 92 | layer->step(); 93 | } 94 | } 95 | 96 | /** 97 | * @brief Add a layer to the neural network 98 | * @param layer [in]: A layer to add 99 | * @return A reference to *this for method chaining 100 | */ 101 | template 102 | Net& add(std::unique_ptr> layer) { 103 | m_layers.push_back(layer); 104 | return *this; 105 | } 106 | 107 | /** 108 | * Add a dense layer 109 | * @param denseLayer [in]: The dense layer to add 110 | * @return A reference to *this for method chaining 111 | */ 112 | template 113 | Net& add(Dense *denseLayer) { 114 | // Do shape checks here 115 | m_layers.push_back(std::unique_ptr>(denseLayer)); 116 | return *this; 117 | } 118 | 119 | /** 120 | * Add a relu layer 121 | * @param reluLayer [in]: The relu layer to add 122 | * @return A reference to *this for method chaining 123 | */ 124 | template 125 | Net& add(Relu *reluLayer) { 126 | m_layers.push_back(std::unique_ptr>(reluLayer)); 127 | return *this; 128 | } 129 | 130 | /** 131 | * Add a softmax layer 132 | * @param softmaxLayer [in]: The softmax layer to add 133 | * @return A reference to *this for method chaining 134 | */ 135 | template 136 | Net& add(Softmax *softmaxLayer) { 137 | m_layers.push_back(std::unique_ptr>(softmaxLayer)); 138 | return *this; 139 | } 140 | 141 | 142 | private: 143 | std::vector>> m_layers; ///< A vector of all our layers 144 | bool m_hasOptimizer; ///< An optimizer has been added to the net 145 | }; 146 | } 147 | 148 | #endif //NN_CPP_NET_H 149 | -------------------------------------------------------------------------------- /nn/layers/Dense.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Dense.h 3 | * 4 | * @breif A Fully connected (Dense) layer 5 | * 6 | * @date 12/17/17 7 | * @author Ben Caine 8 | */ 9 | 10 | #ifndef NN_CPP_DENSE_H 11 | #define NN_CPP_DENSE_H 12 | 13 | #include "layers/Layer.h" 14 | #include "utils/WeightInitializers.h" 15 | 16 | namespace nn { 17 | 18 | template 19 | class Dense : public Layer { 20 | public: 21 | /** 22 | * @brief Create a Dense layer 23 | * @param batchSize [in]: The batch size going through the network 24 | * @param inputDimension [in]: Expected input dimension 25 | * @param outputDimension [in]: The output dimensionality (number of neurons) 26 | * @param useBias [in]: Whether to use a bias term 27 | * @param weightInitializer [in]: The weight initializer scheme to use. Defaults to GlorotUniform 28 | */ 29 | explicit Dense(int batchSize, int inputDimension, int outputDimension, bool useBias, 30 | InitializationScheme weightInitializer = InitializationScheme::GlorotUniform); 31 | 32 | /** 33 | * @brief Return the name of the layer 34 | * @return The layer name 35 | */ 36 | const std::string& getName() { 37 | const static std::string name = "Dense"; 38 | return name; 39 | } 40 | 41 | /** 42 | * @brief Forward through the layer (compute the output) 43 | * @param input [in]: The input to the layer (either data or previous layer output) 44 | * @return The output of this layer 45 | */ 46 | Eigen::Tensor forward(const Eigen::Tensor &input); 47 | 48 | /** 49 | * @brief Compute the gradient (backward pass) of the layer 50 | * @param accumulatedGrad [in]: The input to the backwards pass. (from next layer) 51 | * @return The output of the backwards pass (sent to previous layer) 52 | */ 53 | Eigen::Tensor backward(const Eigen::Tensor &accumulatedGrad); 54 | 55 | /** 56 | * @brief Get the input shape 57 | * @return The input shape 58 | */ 59 | 60 | Eigen::array getOutputShape() { 61 | return m_outputShape; 62 | }; 63 | 64 | /** 65 | * @brief Update weights of the layer w.r.t. gradient 66 | */ 67 | void step(); 68 | 69 | // TODO: Find a nicer way then duplication for subtype optimizer factories 70 | /** 71 | * @brief Set up the optimizer for our weights 72 | */ 73 | void registerOptimizer(std::shared_ptr> optimizer); 74 | 75 | /** 76 | * @brief Set up the optimizer for our weights 77 | */ 78 | void registerOptimizer(std::shared_ptr> optimizer); 79 | 80 | private: 81 | Eigen::array m_outputShape; ///< The output shape of this layer 82 | Eigen::Tensor m_inputCache; ///< Cache the input to calculate gradient 83 | Eigen::Tensor m_weights; ///< Our weights of the layer 84 | Eigen::Tensor m_bias; ///< The bias weights if specified 85 | 86 | // Gradients 87 | Eigen::Tensor m_weightsGrad; ///< The gradient of the weights 88 | Eigen::Tensor m_biasGrad; ///< The gradient of the bias 89 | std::unique_ptr> m_weightOptimizer; ///< The optimizer of our weights 90 | std::unique_ptr> m_biasOptimizer; ///< The optimizer of our bias 91 | 92 | bool m_useBias; ///< Whether we use the bias 93 | }; 94 | 95 | template 96 | Dense::Dense(int batchSize, int inputDimension, int outputDimension, bool useBias, 97 | InitializationScheme weightInitializer): 98 | m_outputShape({batchSize, outputDimension}), 99 | m_useBias(useBias) 100 | { 101 | m_weights = getRandomWeights(inputDimension, outputDimension, weightInitializer); 102 | 103 | m_weightsGrad = Eigen::Tensor(inputDimension, outputDimension); 104 | m_weightsGrad.setZero(); 105 | 106 | if (useBias) { 107 | m_bias = getRandomWeights(1, outputDimension, weightInitializer); 108 | 109 | m_biasGrad = Eigen::Tensor(1, outputDimension); 110 | m_biasGrad.setZero(); 111 | } 112 | }; 113 | 114 | template 115 | Eigen::Tensor Dense::forward(const Eigen::Tensor &input) { 116 | assert(input.dimensions()[1] == m_weights.dimensions()[0] && 117 | "Dense::forward dimensions of input and weights do not match"); 118 | m_inputCache = input; 119 | 120 | Eigen::array, 1> productDims = { Eigen::IndexPair(1, 0) }; 121 | auto result = input.contract(m_weights, productDims); 122 | 123 | if (m_useBias) { 124 | // Copy the bias from (1, outputSize) to (inputBatchSize, outputDimension) 125 | return result + m_bias.broadcast(Eigen::array{input.dimensions()[0], 1}); 126 | } else { 127 | return result; 128 | } 129 | } 130 | 131 | template 132 | Eigen::Tensor Dense::backward(const Eigen::Tensor &accumulatedGrad) { 133 | assert(accumulatedGrad.dimensions()[0] == m_inputCache.dimensions()[0] && 134 | "Dense::backward dimensions of accumulatedGrad and inputCache do not match"); 135 | // m_inputCache is of shape (batchSize, inputDimension) 136 | // accumulatedGrad is of shape (batchSize, outputDimension) 137 | // So we want to contract along dimensions (0, 0), aka m_inputCache.T * accumulatedGrad 138 | // Where dimensions would be (inputDimension, batchSize) * (batchSize, outputDimension) 139 | static const Eigen::array, 1> transposeInput = { Eigen::IndexPair(0, 0) }; 140 | 141 | m_weightsGrad = m_inputCache.contract(accumulatedGrad, transposeInput); 142 | if (m_useBias) { 143 | m_biasGrad = accumulatedGrad.sum(Eigen::array{0}).eval().reshape(Eigen::array{1, m_outputShape[1]}); 144 | } 145 | 146 | // accumulatedGrad is of shape (batchSize, outputDimensions) 147 | // m_weights is of shape (inputDimensions, outputDimensions) 148 | // So we want to contract along dimensions (1, 1), which would be accumulatedGrad * m_weights.T 149 | // Where dimensions would be (batchSize, outputDimension) * (outputDimension, inputDimension) 150 | static const Eigen::array, 1> transposeWeights = { Eigen::IndexPair(1, 1)}; 151 | return accumulatedGrad.contract(m_weights, transposeWeights); 152 | } 153 | 154 | template 155 | void Dense::step() { 156 | m_weights -= m_weightOptimizer->weightUpdate(m_weightsGrad); 157 | 158 | if (m_useBias) { 159 | m_bias -= m_biasOptimizer->weightUpdate(m_biasGrad); 160 | } 161 | } 162 | 163 | // TODO: Find a nicer way then duplication for subtype optimizer factories 164 | template 165 | void Dense::registerOptimizer(std::shared_ptr> optimizer) { 166 | m_weightOptimizer = std::move(optimizer->template createOptimizer()); 167 | 168 | if (m_useBias) { 169 | m_biasOptimizer = std::move(optimizer->template createOptimizer()); 170 | } 171 | } 172 | 173 | template 174 | void Dense::registerOptimizer(std::shared_ptr> optimizer) { 175 | m_weightOptimizer = std::move(optimizer->template createOptimizer()); 176 | 177 | if (m_useBias) { 178 | m_biasOptimizer = std::move(optimizer->template createOptimizer()); 179 | } 180 | 181 | } 182 | } 183 | #endif //NN_CPP_DENSE_H 184 | -------------------------------------------------------------------------------- /nn/layers/Layer.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Layer.h 3 | * 4 | * @breif A base class that provides an interface to a layer 5 | * 6 | * @date 12/17/17 7 | * @author Ben Caine 8 | */ 9 | 10 | #ifndef NN_CPP_LAYER_H 11 | #define NN_CPP_LAYER_H 12 | 13 | #include 14 | #include 15 | #include "optimizers/Optimizers.h" 16 | 17 | namespace nn { 18 | template 19 | class Layer { 20 | public: 21 | /** 22 | * @brief Return the name of the layer 23 | */ 24 | virtual const std::string& getName() = 0; 25 | 26 | /** 27 | * @brief Take an input tensor, perform an operation on it, and return a new tensor 28 | * @param input [in]: The input tensor (from the previous layer) 29 | * @return An output tensor, which is fed into the next layer 30 | */ 31 | virtual Eigen::Tensor forward(const Eigen::Tensor &input) = 0; 32 | 33 | /** 34 | * @brief Perform the backwards operation on the layer. 35 | * @param input [in]: The input tensor (from next layer) 36 | * @return The output tensor, which is fed into the previous layer 37 | */ 38 | virtual Eigen::Tensor backward(const Eigen::Tensor &output) = 0; 39 | 40 | /** 41 | * @brief Update the weights after a backwards pass 42 | */ 43 | virtual void step() = 0; 44 | 45 | // TODO: Need to find a clean way to inherit optimizers to reduce repetition 46 | // TODO: If anyone is reading this, and has ideas... please let me know virtual methods w/ templates is impossible :( 47 | /** 48 | * @brief Registers the optimizer with the layer 49 | * @param optimizer [in]: The optimizer to register 50 | */ 51 | virtual void registerOptimizer(std::shared_ptr> optimizer) = 0; 52 | 53 | /** 54 | * @brief Registers the optimizer with the layer 55 | * @param optimizer [in]: The optimizer to register 56 | */ 57 | virtual void registerOptimizer(std::shared_ptr> optimizer) = 0; 58 | }; 59 | } 60 | 61 | #endif //NN_CPP_LAYER_H 62 | -------------------------------------------------------------------------------- /nn/layers/Layers.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Layers.h 3 | * 4 | * @breif A header to easily import other layers 5 | * 6 | * @date 12/22/17 7 | * @author Ben Caine 8 | */ 9 | 10 | #ifndef NN_CPP_LAYERS_H 11 | #define NN_CPP_LAYERS_H 12 | 13 | #include "Layer.h" 14 | #include "Dense.h" 15 | #include "Softmax.h" 16 | #include "Relu.h" 17 | 18 | #endif //NN_CPP_LAYERS_H 19 | -------------------------------------------------------------------------------- /nn/layers/Relu.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Relu.h 3 | * 4 | * @breif A Relu layer 5 | * 6 | * @date 12/17/17 7 | * @author Ben Caine 8 | */ 9 | 10 | #ifndef NN_CPP_RELU_H 11 | #define NN_CPP_RELU_H 12 | 13 | #include "layers/Layer.h" 14 | 15 | namespace nn { 16 | template 17 | class Relu : public Layer { 18 | public: 19 | 20 | /** 21 | * @brief initialize Relu 22 | */ 23 | Relu() = default; 24 | 25 | /** 26 | * @brief Return the name of the layer 27 | * @return The layer name 28 | */ 29 | const std::string& getName() { 30 | const static std::string name = "Relu"; 31 | return name; 32 | } 33 | 34 | /** 35 | * @brief Forward through the layer (compute the output) 36 | * @param input [in]: The input tensor to apply Relu to 37 | * @return max(0, input) 38 | */ 39 | Eigen::Tensor forward(const Eigen::Tensor &input); 40 | 41 | /** 42 | * @brief Compute the gradient (backward pass) of the layer 43 | * @param accumulatedGrad [in]: The input to the backwards pass (from the next layer) 44 | * @return The output of the backwards pass (sent to the previous layer) 45 | */ 46 | Eigen::Tensor backward(const Eigen::Tensor &accumulatedGrad); 47 | 48 | /** 49 | * @brief Void function in relu 50 | */ 51 | void step() {} 52 | 53 | /** 54 | * @brief Void function in relu 55 | */ 56 | void registerOptimizer(std::shared_ptr> optimizer) {} 57 | 58 | /** 59 | * @brief Void function in relu 60 | */ 61 | void registerOptimizer(std::shared_ptr> optimizer) {} 62 | 63 | private: 64 | Eigen::Tensor m_output; ///< The output of the forward pass 65 | }; 66 | 67 | template 68 | Eigen::Tensor Relu::forward(const Eigen::Tensor &input) { 69 | m_output = input.cwiseMax(static_cast(0)); 70 | return m_output; 71 | }; 72 | 73 | template 74 | Eigen::Tensor Relu::backward(const Eigen::Tensor &accumulatedGrad) { 75 | // Could also check a cached input to Relu::forward, but since 76 | // the output is simply (x, 0), we can just check our already cached output. 77 | auto inputPositive = m_output > static_cast(0); 78 | return inputPositive.select(accumulatedGrad, accumulatedGrad.constant(0.0)); 79 | } 80 | } 81 | 82 | #endif //NN_CPP_RELU_H 83 | -------------------------------------------------------------------------------- /nn/layers/Softmax.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Softmax.h 3 | * 4 | * @breif A Softmax layer 5 | * 6 | * @date 12/17/17 7 | * @author Ben Caine 8 | */ 9 | #ifndef NN_CPP_SOFTMAX_H 10 | #define NN_CPP_SOFTMAX_H 11 | 12 | #include "layers/Layer.h" 13 | 14 | namespace nn { 15 | template 16 | class Softmax : public Layer { 17 | public: 18 | 19 | /** 20 | * @brief initialize Softmax 21 | */ 22 | Softmax() = default; 23 | 24 | /** 25 | * @brief Return the name of the layer 26 | * @return The layer name 27 | */ 28 | const std::string& getName() { 29 | const static std::string name = "Softmax"; 30 | return name; 31 | } 32 | 33 | /** 34 | * @brief Forward through the layer (compute the output) 35 | * @param input [in]: The input tensor to apply softmax to 36 | * @return 37 | */ 38 | Eigen::Tensor forward(const Eigen::Tensor &input); 39 | 40 | /** 41 | * @brief Compute the gradient (backwards pass) of the layer 42 | * @param accumulatedGrad [in]: The input tensor to the backwards pass (from the next layer). This should be one hot encoded labels 43 | * @return The output of the backwards pass (sent ot the previous layer) 44 | */ 45 | Eigen::Tensor backward(const Eigen::Tensor &accumulatedGrad); 46 | 47 | /** 48 | * @brief Update Weights (doesn't do anything w/ softmax) 49 | */ 50 | void step() {} 51 | 52 | /** 53 | * @brief Void function in softmax 54 | */ 55 | void registerOptimizer(std::shared_ptr> optimizer) {} 56 | 57 | /** 58 | * @brief Void function in softmax 59 | */ 60 | void registerOptimizer(std::shared_ptr> optimizer) {} 61 | 62 | private: 63 | Eigen::Tensor m_output; ///< The output of the forward pass 64 | }; 65 | 66 | template 67 | Eigen::Tensor Softmax::forward(const Eigen::Tensor &input) { 68 | int batchSize = input.dimensions()[0]; 69 | int classDims = input.dimensions()[1]; 70 | auto shiftedInput = input - input.maximum(Eigen::array{1}) 71 | .eval().reshape(Eigen::array{batchSize, 1}) 72 | .broadcast(Eigen::array{1, classDims}); 73 | 74 | auto exponentiated = shiftedInput.exp(); 75 | m_output = exponentiated * exponentiated.sum(Eigen::array{1}) 76 | .inverse().eval() 77 | .reshape(Eigen::array({batchSize, 1})) 78 | .broadcast(Eigen::array({1, classDims})); 79 | return m_output; 80 | } 81 | 82 | template 83 | Eigen::Tensor Softmax::backward(const Eigen::Tensor &accumulatedGrad) { 84 | const int batchSize = accumulatedGrad.dimensions()[0]; 85 | assert(batchSize == m_output.dimensions()[0] && "Dimensions of number of batches does not match"); 86 | return accumulatedGrad / accumulatedGrad.constant(batchSize); 87 | } 88 | } 89 | 90 | #endif //NN_CPP_SOFTMAX_H 91 | -------------------------------------------------------------------------------- /nn/loss/CrossEntropy.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file CrossEntropy.h 3 | * 4 | * @breif Cross Entropy loss 5 | * 6 | * @date 12/17/17 7 | * @author Ben Caine 8 | */ 9 | 10 | #ifndef NN_CPP_CROSSENTROPY_H 11 | #define NN_CPP_CROSSENTROPY_H 12 | 13 | #include 14 | 15 | namespace nn { 16 | template 17 | class CrossEntropyLoss { 18 | public: 19 | /** 20 | * @brief Create a cross entropy loss layer 21 | */ 22 | CrossEntropyLoss() = default; 23 | 24 | /** 25 | * @brief Calculate the cross entropy loss 26 | * @param probabilities [in]: "Probabilities" as in 0-1 values output by a layer like Softmax 27 | * @param labels [in]: One hot encoded labels 28 | * @return The loss 29 | */ 30 | Dtype loss(const Eigen::Tensor &probabilities, const Eigen::Tensor &labels); 31 | 32 | /** 33 | * @brief Calculate the accuracy of our labels 34 | * @param probabilities [in]: "Probabilities" as in 0-1 values output by a layer like Softmax 35 | * @param labels [in]: One hot encoded labels 36 | * @return The total accuracy (num_correct / total) 37 | */ 38 | Dtype accuracy(const Eigen::Tensor &probabilities, const Eigen::Tensor &labels); 39 | 40 | /** 41 | * @brief Compute the gradient for Cross Entropy Loss 42 | * @param probabilities [in]: "Probabilities" as in 0-1 values output by a layer like Softmax 43 | * @param labels [in]: One hot encoded labels 44 | * @return The gradient of this loss layer 45 | */ 46 | Eigen::Tensor 47 | backward(const Eigen::Tensor &probabilities, const Eigen::Tensor &labels); 48 | }; 49 | 50 | template 51 | Dtype CrossEntropyLoss::loss(const Eigen::Tensor &probabilities, 52 | const Eigen::Tensor &labels) { 53 | int batchSize = probabilities.dimensions()[0]; 54 | 55 | // TODO: Do I need a stabilizing const here? 56 | static const Dtype stabilizingVal = 0.0001; 57 | Eigen::Tensor summedLoss = (labels * 58 | (probabilities.constant(stabilizingVal) + probabilities).log()).sum(); 59 | return (-1.0 / batchSize) * summedLoss(0); 60 | } 61 | 62 | template 63 | Dtype CrossEntropyLoss::accuracy(const Eigen::Tensor &probabilities, 64 | const Eigen::Tensor &labels) { 65 | assert(probabilities.dimensions()[0] == labels.dimensions()[0] && 66 | "CrossEntropy::accuracy dimensions did not match"); 67 | assert(probabilities.dimensions()[1] == labels.dimensions()[1] && 68 | "CrossEntropy::accuracy dimensions did not match"); 69 | 70 | auto batchSize = static_cast(labels.dimensions()[0]); 71 | 72 | // Argmax across dimension = 1 (so we get a column vector) 73 | Eigen::Tensor ifTensor = probabilities.argmax(1) == labels.argmax(1); 74 | Eigen::Tensor thenTensor(batchSize); 75 | auto result = ifTensor.select(thenTensor.constant(1.0), thenTensor.constant(0)); 76 | Eigen::Tensor count = result.sum(); 77 | return static_cast(count(0)) / batchSize; 78 | } 79 | 80 | template 81 | Eigen::Tensor CrossEntropyLoss::backward(const Eigen::Tensor &probabilities, 82 | const Eigen::Tensor &labels) { 83 | return probabilities - labels; 84 | } 85 | } 86 | #endif //NN_CPP_CROSSENTROPY_H 87 | -------------------------------------------------------------------------------- /nn/loss/HuberLoss.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file HuberLoss.h 3 | * 4 | * @breif Huber Loss 5 | * 6 | * @date 1/06/17 7 | * @author Ben Caine 8 | */ 9 | #ifndef NN_CPP_HUBERLOSS_H 10 | #define NN_CPP_HUBERLOSS_H 11 | 12 | #include 13 | 14 | namespace nn { 15 | template 16 | class HuberLoss { 17 | public: 18 | 19 | /** 20 | * @brief Initialize a SmoothL1Loss loss function 21 | */ 22 | explicit HuberLoss(Dtype threshold = 1.0): m_threshold(threshold) {} 23 | 24 | /** 25 | * @brief Compute the loss 26 | * @param predictions [in]: Predictions from the network 27 | * @param labels [in]: Labels to compute loss with 28 | * @return The loss as a scalar 29 | */ 30 | Dtype loss(const Eigen::Tensor &predictions, const Eigen::Tensor &labels); 31 | 32 | /** 33 | * @brief Compute the gradient of the loss given this data 34 | * @param predictions [in]: Predictions from the network 35 | * @param labels [in]: Labels from dataset 36 | * @return The gradient of the loss layer 37 | */ 38 | Eigen::Tensor 39 | backward(const Eigen::Tensor &predictions, const Eigen::Tensor &labels); 40 | 41 | private: 42 | Dtype m_threshold; ///< The threshold used to determine which part of the piecewise loss 43 | Eigen::Tensor m_cachedSwitchResults; ///< Whether abs(y - y_hat) <= m_threshold 44 | }; 45 | 46 | template 47 | Dtype HuberLoss::loss(const Eigen::Tensor &predictions, 48 | const Eigen::Tensor &labels) { 49 | assert(predictions.dimensions()[0] == labels.dimensions()[0] && 50 | "HuberLoss::loss dimensions don't match"); 51 | assert(predictions.dimensions()[1] == labels.dimensions()[1] && 52 | "HuberLoss::loss dimensions don't match"); 53 | int batchSize = predictions.dimensions()[0]; 54 | // Definition taken from: https://en.wikipedia.org/wiki/Huber_loss 55 | 56 | // Precalculate y_hat - y 57 | auto error = predictions - labels; 58 | auto absoluteError = error.abs(); 59 | 60 | // Set up our switch statement and cache it 61 | m_cachedSwitchResults = absoluteError <= m_threshold; 62 | 63 | // Calculate both terms for the huber loss 64 | auto lessThanThreshold = error.constant(0.5) * error.square(); 65 | auto moreThanThreshold = error.constant(m_threshold) * absoluteError - error.constant(0.5 * pow(m_threshold, 2)); 66 | 67 | // If abs(y_hat - y) <= threshold 68 | auto perItemLoss = m_cachedSwitchResults.select( 69 | lessThanThreshold, // Then use 0.5 * (y_hat - y)^2 70 | moreThanThreshold); // Else use thresh * |y_hat - y| - (0.5 * threshold^2) 71 | 72 | Eigen::Tensor sum = perItemLoss.sum(); 73 | // Sum and divide by N 74 | return sum(0) / batchSize; 75 | } 76 | 77 | template 78 | Eigen::Tensor HuberLoss::backward(const Eigen::Tensor &predictions, 79 | const Eigen::Tensor &labels) { 80 | 81 | auto error = predictions - labels; 82 | 83 | // Note: Grad of linear part of error is threshold * (error / abs(error)), which 84 | // simplifies to threshold * sign(error) 85 | auto errorPositiveOrZero = error >= static_cast(0); 86 | auto absoluteErrorGrad = errorPositiveOrZero.select(error.constant(m_threshold), error.constant(-m_threshold)); 87 | return m_cachedSwitchResults.select(error, absoluteErrorGrad); 88 | } 89 | } 90 | 91 | #endif //NN_CPP_SMOOTHL1LOSS_H 92 | -------------------------------------------------------------------------------- /nn/loss/Losses.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Losses.h 3 | * 4 | * @breif Convenience import function 5 | * 6 | * @date 1/06/17 7 | * @author Ben Caine 8 | */ 9 | 10 | #ifndef NN_CPP_LOSSES_H 11 | #define NN_CPP_LOSSES_H 12 | 13 | #include "CrossEntropy.h" 14 | #include "MeanSquaredError.h" 15 | #include "HuberLoss.h" 16 | 17 | #endif //NN_CPP_LOSSES_H 18 | -------------------------------------------------------------------------------- /nn/loss/MeanSquaredError.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file MeanSquaredError.h 3 | * 4 | * @breif Mean Squared Error loss 5 | * 6 | * @date 12/26/17 7 | * @author Ben Caine 8 | */ 9 | #ifndef NN_CPP_MEANSQUAREDERROR_H 10 | #define NN_CPP_MEANSQUAREDERROR_H 11 | 12 | #include 13 | 14 | namespace nn { 15 | template 16 | class MeanSquaredError { 17 | public: 18 | 19 | /** 20 | * @brief Initialize a Mean Squared Error loss function 21 | */ 22 | MeanSquaredError() = default; 23 | 24 | /** 25 | * @brief Compute the MSE loss 26 | * @param predictions [in]: Predictions from the network 27 | * @param labels [in]: Labels to compute loss with 28 | * @return The loss as a scalar 29 | */ 30 | Dtype loss(const Eigen::Tensor &predictions, const Eigen::Tensor &labels); 31 | 32 | /** 33 | * @brief Compute the gradient of Mean Squared Error given this data 34 | * @param predictions [in]: Predictions from the network 35 | * @param labels [in]: Labels from dataset 36 | * @return The gradient of the loss layer 37 | */ 38 | Eigen::Tensor 39 | backward(const Eigen::Tensor &predictions, const Eigen::Tensor &labels); 40 | }; 41 | 42 | template 43 | Dtype MeanSquaredError::loss(const Eigen::Tensor &predictions, 44 | const Eigen::Tensor &labels) { 45 | assert(predictions.dimensions()[0] == labels.dimensions()[0] && 46 | "MeanSquaredError::loss dimensions don't match"); 47 | assert(predictions.dimensions()[1] == labels.dimensions()[1] && 48 | "MeanSquaredError::loss dimensions don't match"); 49 | 50 | int batchSize = predictions.dimensions()[0]; 51 | 52 | Eigen::Tensor squaredSum = (predictions - labels).square().sum(); 53 | return squaredSum(0) / batchSize; 54 | } 55 | 56 | template 57 | Eigen::Tensor MeanSquaredError::backward(const Eigen::Tensor &predictions, 58 | const Eigen::Tensor &labels) { 59 | return predictions - labels; 60 | } 61 | } 62 | 63 | #endif //NN_CPP_MEANSQUAREDERROR_H 64 | -------------------------------------------------------------------------------- /nn/optimizers/AdamImpl.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file AdamImpl.h 3 | * 4 | * @breif Adam Optimizer 5 | * 6 | * @date 1/06/17 7 | * @author Ben Caine 8 | */ 9 | 10 | #ifndef NN_CPP_ADAM_IMPL_H 11 | #define NN_CPP_ADAM_IMPL_H 12 | 13 | #include "OptimizerImpl.h" 14 | 15 | namespace nn { 16 | namespace internal { 17 | template 18 | class AdamImpl : public OptimizerImpl { 19 | public: 20 | 21 | /** 22 | * @brief Initialize our Adam Solver 23 | * @param learningRate [in]: Base learning rate 24 | * @param beta1 [in]: The first moment decay factor (default = 0.9) 25 | * @param beta2 [in]: The second moment decay factor (default = 0.999) 26 | * @param epsilon [in]: A stabilizing factor for division (default = 1e-8) 27 | */ 28 | explicit AdamImpl(Dtype learningRate, Dtype beta1, Dtype beta2, Dtype epsilon): 29 | m_learningRate(learningRate), m_beta1(beta1), m_beta2(beta2), m_epsilon(epsilon), 30 | m_isInitialized(false), m_currentTimestep(1) 31 | {} 32 | 33 | /** 34 | * @brief Get the update to apply to the weights 35 | * @param gradWeights [in]: Weights to update 36 | * @return The factor to update the weights by 37 | */ 38 | Eigen::Tensor weightUpdate(const Eigen::Tensor &gradWeights) { 39 | if (!m_isInitialized) { 40 | m_firstMoment = Eigen::Tensor(gradWeights.dimensions()); 41 | m_firstMoment.setZero(); 42 | 43 | m_secondMoment = Eigen::Tensor(gradWeights.dimensions()); 44 | m_secondMoment.setZero(); 45 | m_isInitialized = true; 46 | } 47 | 48 | // m_t = B_1 * m_(t-1) + (1 - B_1) * g_t 49 | m_firstMoment = m_firstMoment.constant(m_beta1) * m_firstMoment + 50 | gradWeights.constant(1 - m_beta1) * gradWeights; 51 | 52 | // v_t = B_2 * v_(t-1) + (1 - B_2) * g_t^2 53 | m_secondMoment = m_secondMoment.constant(m_beta2) * m_secondMoment + 54 | gradWeights.constant(1 - m_beta2) * gradWeights.square(); 55 | // 56 | // std::cout << "First moment: " << m_firstMoment << std::endl; 57 | // std::cout << "Second moment: " << m_secondMoment << std::endl; 58 | // std::cout << std::endl << std::endl << std::endl; 59 | 60 | auto biasCorrectedFirstMoment = m_firstMoment / m_firstMoment.constant(1 - pow(m_beta1, m_currentTimestep)); 61 | auto biasCorrectedSecondMoment = m_secondMoment / m_secondMoment.constant(1 - pow(m_beta2, m_currentTimestep)); 62 | // 63 | // std::cout << "Bias corrected first: " << biasCorrectedFirstMoment << std::endl; 64 | // std::cout << "Bias corrected second: " << biasCorrectedSecondMoment << std::endl; 65 | // std::cout << std::endl << std::endl << std::endl; 66 | 67 | 68 | m_currentTimestep ++; 69 | // Return firstMoment * (learning_rate) / (sqrt(secondMoment) + epsilon) 70 | return biasCorrectedFirstMoment * ( 71 | (gradWeights.constant(m_learningRate) / 72 | (biasCorrectedSecondMoment.sqrt() + gradWeights.constant(m_epsilon)) 73 | )); 74 | }; 75 | 76 | private: 77 | Dtype m_learningRate; ///< The learning rate of our optimizer 78 | Dtype m_beta1; ///< Our B1 parameter (first moment decay) 79 | Dtype m_beta2; ///< Our B2 parameter (second moment decay) 80 | Dtype m_epsilon; ///< Stability factor 81 | 82 | bool m_isInitialized; ///< On our first iteration, set the first and second order gradients to zero 83 | size_t m_currentTimestep; ///< Our current timestep (iteration) 84 | 85 | // Our exponentially decaying average of past gradients 86 | Eigen::Tensor m_firstMoment; ///< Our m_t term that represents the first order gradient decay 87 | Eigen::Tensor m_secondMoment; ///< Our v_t term that represents the second order gradient decay 88 | }; 89 | } 90 | } 91 | 92 | #endif //NN_CPP_ADAM_IMPL_H 93 | -------------------------------------------------------------------------------- /nn/optimizers/OptimizerImpl.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file BaseOptimizerImpl.h 3 | * 4 | * @breif A base optimizer impl. Impls update the weights, whereas 5 | * the base ones in Optimizers.h carry paramaters and create optimizer impls 6 | * 7 | * @date 1/06/17 8 | * @author Ben Caine 9 | */ 10 | 11 | #ifndef NN_CPP_OPTIMIZERIMPL_H 12 | #define NN_CPP_OPTIMIZERIMPL_H 13 | 14 | #include 15 | #include 16 | 17 | namespace nn { 18 | template 19 | class OptimizerImpl { 20 | public: 21 | virtual Eigen::Tensor weightUpdate(const Eigen::Tensor &weights) = 0; 22 | }; 23 | } 24 | 25 | #endif //NN_CPP_OPTIMIZERIMPL_H 26 | -------------------------------------------------------------------------------- /nn/optimizers/Optimizers.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Optimizers.h 3 | * 4 | * @breif Optimizer Constructor 5 | * 6 | * @date 1/06/17 7 | * @author Ben Caine 8 | */ 9 | 10 | #ifndef NN_CPP_OPTIMIZERS_H 11 | #define NN_CPP_OPTIMIZERS_H 12 | 13 | #include "StochasticGradientDescentImpl.h" 14 | #include "AdamImpl.h" 15 | #include 16 | 17 | namespace nn { 18 | /** 19 | * The current design is that you declare your optimizer in your main training area 20 | * and the Net class propagates this to all layers, which create their own Impls 21 | * with one Impl per weight. The design is geared towards more complex optimizers 22 | */ 23 | 24 | 25 | /** 26 | * @brief Factory method of SGD 27 | * 28 | * @tparam Dtype : The floating point type of the optimizer 29 | */ 30 | template 31 | class StochasticGradientDescent { 32 | public: 33 | /** 34 | * @brief Create a SGD factory w/ learning rate 35 | * @param learningRate [in]: Learning rate of SGD optimizer 36 | */ 37 | explicit StochasticGradientDescent(Dtype learningRate): 38 | m_learningRate(learningRate) {} 39 | 40 | /** 41 | * @brief Create an optimizer Impl for our given type 42 | * @tparam Dims [in]: The dimensionality of the tensor the optimizer will update 43 | * @return An optimizer impl that can update weights and keep track of state 44 | */ 45 | template 46 | std::unique_ptr> createOptimizer() const { 47 | return std::unique_ptr>(new internal::StochasticGradientDescentImpl(m_learningRate)); 48 | } 49 | 50 | private: 51 | Dtype m_learningRate; ///< The learning rate 52 | }; 53 | 54 | template 55 | class Adam { 56 | public: 57 | /** 58 | * @brief Create an Adam optimizer 59 | * @param learningRate [in]: Base learning rate 60 | * @param beta1 [in]: The first moment decay factor (default = 0.9) 61 | * @param beta2 [in]: The second moment decay factor (default = 0.999) 62 | * @param epsilon [in]: A stabilizing factor for division (default = 1e-8) 63 | */ 64 | explicit Adam(Dtype learningRate, Dtype beta1 = 0.9, Dtype beta2 = 0.999, Dtype epsilon = 1e-8): 65 | m_learningRate(learningRate), m_beta1(beta1), m_beta2(beta2), m_epsilon(epsilon) 66 | {} 67 | 68 | /** 69 | * Create an optimizer Impl for our given type 70 | * @tparam Dims [in]: The dimensionality of the tensor the optimizer will update 71 | * @return An optimizer impl that can update weights and keep track of state 72 | */ 73 | template 74 | std::unique_ptr> createOptimizer() const { 75 | return std::unique_ptr>(new internal::AdamImpl(m_learningRate, m_beta1, m_beta2, m_epsilon)); 76 | }; 77 | 78 | private: 79 | Dtype m_learningRate; ///< The learning rate of our optimizer 80 | Dtype m_beta1; ///< Our B1 parameter (first moment decay) 81 | Dtype m_beta2; ///< Our B2 parameter (second moment decay) 82 | Dtype m_epsilon; ///< Stability factor 83 | }; 84 | 85 | 86 | } 87 | 88 | #endif //NN_CPP_OPTIMIZERS_H 89 | -------------------------------------------------------------------------------- /nn/optimizers/StochasticGradientDescentImpl.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file StochasticGradientDescent.h 3 | * 4 | * @breif Stochastic Gradient Descent 5 | * 6 | * @date 1/06/17 7 | * @author Ben Caine 8 | */ 9 | 10 | #ifndef NN_CPP_STOCHASTICGRADIENTDESCENT_IMPL_H 11 | #define NN_CPP_STOCHASTICGRADIENTDESCENT_IMPL_H 12 | 13 | #include "OptimizerImpl.h" 14 | 15 | namespace nn { 16 | namespace internal { 17 | template 18 | class StochasticGradientDescentImpl : public OptimizerImpl { 19 | public: 20 | 21 | // TODO: Add momentum 22 | /** 23 | * @brief Initialize our SGD Solver 24 | * @param learningRate [in]: The learning rate of SGD 25 | */ 26 | explicit StochasticGradientDescentImpl(Dtype learningRate): 27 | m_learningRate(learningRate) {} 28 | 29 | /** 30 | * @brief Get the update to apply to the weights 31 | * @param gradWeights [in]: Weights to update 32 | * @return The factor to update the weights by 33 | */ 34 | Eigen::Tensor weightUpdate(const Eigen::Tensor &gradWeights) { 35 | return gradWeights * gradWeights.constant(m_learningRate); 36 | }; 37 | 38 | private: 39 | Dtype m_learningRate; ///< Our current learning rate 40 | }; 41 | } 42 | } 43 | 44 | #endif //NN_CPP_STOCHASTICGRADIENTDESCENT_IMPL_H 45 | -------------------------------------------------------------------------------- /nn/utils/WeightInitializers.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file WeightInitializers.h 3 | * 4 | * @breif A collection of helper functions to initialize weights 5 | * 6 | * @date 12/17/17 7 | * @author Ben Caine 8 | */ 9 | #ifndef NN_CPP_WEIGHTINITIALIZERS_H 10 | #define NN_CPP_WEIGHTINITIALIZERS_H 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | namespace nn { 17 | /** 18 | * @brief How to initialize the dense weights 19 | */ 20 | enum class InitializationScheme { 21 | GlorotUniform, 22 | GlorotNormal 23 | }; 24 | 25 | template 26 | class WeightDistribution { 27 | public: 28 | /** 29 | * @brief Create a weight distribution to draw from 30 | * @param scheme [in]: The scheme to initialize with 31 | * @param fanIn [in]: The fan in of the layer 32 | * @param fanOut [in]: The fan out of the layer 33 | */ 34 | explicit WeightDistribution(InitializationScheme scheme, int fanIn, int fanOut): 35 | m_scheme(scheme), 36 | m_randomNumberGenerator(std::random_device()()) 37 | { 38 | if (m_scheme == InitializationScheme::GlorotUniform) { 39 | Dtype limit = std::sqrt(6.0 / (fanIn + fanOut)); 40 | m_uniformDist.reset(new std::uniform_real_distribution(-limit, limit)); 41 | } else if (m_scheme == InitializationScheme::GlorotNormal) { 42 | Dtype std = std::sqrt(2.0 / (fanIn + fanOut)); 43 | m_normalDist.reset(new std::normal_distribution(0, std)); 44 | } 45 | } 46 | 47 | /** 48 | * @brief Get a value from the distribution 49 | * @return 50 | */ 51 | Dtype get() { 52 | if (m_scheme == InitializationScheme::GlorotUniform) { 53 | return (*m_uniformDist)(m_randomNumberGenerator); 54 | } else if (m_scheme == InitializationScheme::GlorotNormal) { 55 | return (*m_normalDist)(m_randomNumberGenerator); 56 | } else { 57 | std::cerr << "Tried to draw from distribution that is uninitialized" << std::endl; 58 | exit(-1); 59 | } 60 | } 61 | 62 | private: 63 | InitializationScheme m_scheme; ///< Our init scheme 64 | std::mt19937 m_randomNumberGenerator; ///< Our random number generator 65 | std::unique_ptr> m_uniformDist; ///< Our uniform distribution 66 | std::unique_ptr> m_normalDist; ///< Our normal distribution 67 | }; 68 | 69 | 70 | /** 71 | * @brief Initialize a tensor of dimension (input x output) with a specified scheme 72 | * @tparam Dtype [in]: Datatype of the tensor (float/double) 73 | * @param inputDimensions [in]: The input dimensions of the layer 74 | * @param outputDimensions [in]: The output dimensions of the layer 75 | * @param scheme [in]: Initialization Scheme 76 | * @return A randomly initialized tensor 77 | * 78 | * @note This function only exists because I can't seem to get Tensor.setRandom to work 79 | * with their builtins. This is way, way less efficient, but is only called on creation of a new layer 80 | */ 81 | template 82 | Eigen::Tensor getRandomWeights(int inputDimensions, int outputDimensions, 83 | InitializationScheme scheme = InitializationScheme::GlorotUniform) { 84 | Eigen::Tensor weights(inputDimensions, outputDimensions); 85 | weights.setZero(); 86 | 87 | auto distribution = WeightDistribution(scheme, inputDimensions, outputDimensions); 88 | for (unsigned int ii = 0; ii < inputDimensions; ++ii) { 89 | for (unsigned int jj = 0; jj < outputDimensions; ++jj) { 90 | weights(ii, jj) = distribution.get(); 91 | } 92 | } 93 | return weights; 94 | }; 95 | } 96 | 97 | #endif //NN_CPP_WEIGHTINITIALIZERS_H 98 | -------------------------------------------------------------------------------- /tests/LossTests.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file LossTests.cpp 3 | * 4 | * @breif Tests for our loss functions 5 | * 6 | * @date 1/06/17 7 | * @author Ben Caine 8 | */ 9 | 10 | #define BOOST_TEST_DYN_LINK 11 | #define BOOST_TEST_MODULE LossTests 12 | 13 | #include 14 | #include 15 | #include "loss/CrossEntropy.h" 16 | #include "loss/HuberLoss.h" 17 | #include "loss/MeanSquaredError.h" 18 | 19 | BOOST_AUTO_TEST_CASE(test_cross_entropy_loss) { 20 | // TODO: Really just checking compilation right now. Need to use TF/Pytorch/Numpy to generate test cases 21 | nn::CrossEntropyLoss lossFunction; 22 | 23 | int batchSize = 4; 24 | int numClasses = 3; 25 | Eigen::Tensor predictions(batchSize, numClasses); 26 | predictions.setValues({ 27 | {0.1, 0.7, 0.2}, 28 | {0.9, 0.0, 0.1}, 29 | {0.0, 0.0, 1.0}, 30 | {0.3, 0.4, 0.3} 31 | }); 32 | 33 | Eigen::Tensor labels(batchSize, numClasses); 34 | labels.setValues({ 35 | {0, 1, 0}, 36 | {1, 0, 0}, 37 | {1, 0, 0}, 38 | {0, 1, 0} 39 | }); 40 | 41 | auto loss = lossFunction.loss(predictions, labels); 42 | auto accuracy = lossFunction.accuracy(predictions, labels); 43 | auto backwardsResult = lossFunction.backward(predictions, labels); 44 | BOOST_REQUIRE_MESSAGE(accuracy == 0.75, "Accuracy was not correct"); 45 | } 46 | 47 | BOOST_AUTO_TEST_CASE(test_mse_loss) { 48 | nn::MeanSquaredError lossFunction; 49 | 50 | const int batchSize = 4; 51 | 52 | Eigen::Tensor predictions(batchSize, 1); 53 | predictions.setValues({{2}, {3}, {4}, {5}}); 54 | 55 | Eigen::Tensor labels(batchSize, 1); 56 | labels.setValues({{2}, {1}, {3}, {0}}); 57 | 58 | // Expected squared error of each should be: 59 | // 0^2 + 2^2 + 1^2 + 5^2 = 0 + 4 + 1 + 25 = 30 / 4 = 7.5 60 | 61 | auto loss = lossFunction.loss(predictions, labels); 62 | BOOST_REQUIRE_MESSAGE(loss == 7.5, "Loss not what was expected"); 63 | 64 | auto backwardsResult = lossFunction.backward(predictions, labels); 65 | std::array expectedBackwardsResults = {0, 2, 1, 5}; 66 | 67 | for (int ii = 0; ii < batchSize; ++ii) { 68 | BOOST_REQUIRE_CLOSE(backwardsResult(ii, 0), expectedBackwardsResults[ii], 1e-3); 69 | } 70 | } 71 | 72 | BOOST_AUTO_TEST_CASE(test_huber_loss) { 73 | float threshold = 1.5; 74 | nn::HuberLoss lossFunction(threshold); 75 | 76 | const int batchSize = 4; 77 | Eigen::Tensor predictions(batchSize, 1); 78 | predictions.setValues({{2}, {3}, {4}, {5}}); 79 | 80 | Eigen::Tensor labels(batchSize, 1); 81 | labels.setValues({{2}, {1}, {3}, {0}}); 82 | 83 | // Expected absolute error: 84 | // [0, 2, 1, 5] 85 | // If our thresh is 1.5, then two terms are squared loss and two are absolute 86 | // So, we expect: 87 | // 0.5 * 0^2 + (1.5 * 2 - 0.5 * 1.5^2) + 0.5 * 1^2 + (1.5 * 5 - 0.5 * 1.5^2) 88 | // Which is: 89 | // [0, 1.875, 0.5, 6.375] = 8.75 / 4 = 2.1875 90 | 91 | auto loss = lossFunction.loss(predictions, labels); 92 | BOOST_REQUIRE_CLOSE(loss, 2.1875, 1e-3); 93 | 94 | auto backwardsResult = lossFunction.backward(predictions, labels); 95 | 96 | 97 | std::array expectedBackwardsResults = {0, threshold, 1, threshold}; 98 | 99 | for (int ii = 0; ii < batchSize; ++ii) { 100 | BOOST_REQUIRE_CLOSE(backwardsResult(ii, 0), expectedBackwardsResults[ii], 1e-3); 101 | } 102 | } -------------------------------------------------------------------------------- /tests/NetTests.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file NetTests.cpp 3 | * 4 | * @breif High level tests of the Net class 5 | * 6 | * @date 12/17/17 7 | * @author Ben Caine 8 | */ 9 | 10 | 11 | #define BOOST_TEST_DYN_LINK 12 | #define BOOST_TEST_MODULE NetTests 13 | 14 | #include 15 | #include 16 | #include "Net.h" 17 | 18 | 19 | BOOST_AUTO_TEST_CASE(test_relu) { 20 | std::cout << "Testing Relu" << std::endl; 21 | nn::Relu relu; 22 | 23 | int dim1 = 1; 24 | int dim2 = 10; 25 | Eigen::Tensor input(dim1, dim2); 26 | input.setRandom(); 27 | input = input * input.constant(-1.0f); 28 | 29 | Eigen::Tensor result = relu.forward(input); 30 | for (unsigned int ii = 0; ii < dim1; ++ii) { 31 | for (unsigned int jj = 0; jj < dim2; ++jj) { 32 | BOOST_REQUIRE_MESSAGE(result(ii, jj) == 0, "Element in result does not equal zero"); 33 | } 34 | } 35 | 36 | // Make a few elements positive 37 | input(0, 5) = 10.0; 38 | input(0, 3) = 150.0; 39 | 40 | result = relu.forward(input); 41 | for (unsigned int ii = 0; ii < dim1; ++ii) { 42 | for (unsigned int jj = 0; jj < dim2; ++jj) { 43 | BOOST_REQUIRE_MESSAGE(result(ii, jj) >= 0, "Element in result does is negative"); 44 | } 45 | } 46 | } 47 | 48 | BOOST_AUTO_TEST_CASE(test_relu_back) { 49 | std::cout << "Testing Relu backwards" << std::endl; 50 | nn::Relu relu; 51 | 52 | int dim1 = 1; 53 | int dim2 = 10; 54 | Eigen::Tensor input(dim1, dim2); 55 | input.setValues({{-10, -7, -5, -3, 0, 1, 3, 5, 7, 10}}); 56 | 57 | Eigen::Tensor accumulatedGrad(dim1, dim2); 58 | accumulatedGrad.setValues({{1, -4, 7, -10, 13, -16, 19, -22, 25, -28}}); 59 | 60 | Eigen::Tensor forwardResult = relu.forward(input); 61 | Eigen::Tensor backwardResult = relu.backward(accumulatedGrad); 62 | 63 | std::vector expectedOutput({0, 0, 0, 0, 0, -16, 19, -22, 25, -28}); 64 | for (unsigned ii = 0; ii < dim2; ++ii) { 65 | BOOST_REQUIRE_MESSAGE(backwardResult(0, ii) == expectedOutput[ii], "Output of relu.backward did not match"); 66 | } 67 | } 68 | 69 | BOOST_AUTO_TEST_CASE(test_softmax) { 70 | std::cout << "Testing Softmax" << std::endl; 71 | nn::Softmax softmax; 72 | 73 | int inputBatchSize = 2; 74 | Eigen::Tensor input(inputBatchSize, 2); 75 | input.setValues({{5, 5}, 76 | {-100, 100}}); 77 | 78 | Eigen::Tensor result = softmax.forward(input); 79 | 80 | BOOST_REQUIRE_MESSAGE(result(0, 0) == 0.5, "Result(0, 0) did not match"); 81 | BOOST_REQUIRE_MESSAGE(result(0, 1) == 0.5, "Result(0, 1) did not match"); 82 | BOOST_REQUIRE_MESSAGE(result(1, 0) == 0, "Result (1, 0) did not match"); 83 | BOOST_REQUIRE_MESSAGE(result(1, 1) == 1, "Result (1, 1) did not match"); 84 | 85 | 86 | inputBatchSize = 1; 87 | int inputSize = 100; 88 | Eigen::Tensor input2(inputBatchSize, inputSize); 89 | input2.setRandom(); 90 | 91 | Eigen::Tensor result2 = softmax.forward(input2); 92 | 93 | float sum = 0; 94 | for (unsigned int ii = 0; ii < inputSize; ++ii) { 95 | sum += result2(0, ii); 96 | } 97 | 98 | BOOST_REQUIRE_CLOSE(sum, 1.0, 1e-3); 99 | } 100 | 101 | BOOST_AUTO_TEST_CASE(test_softmax_back) { 102 | std::cout << "Testing Softmax backwards" << std::endl; 103 | nn::Softmax softmax; 104 | 105 | int inputBatchSize = 2; 106 | Eigen::Tensor input(inputBatchSize, 2); 107 | input.setValues({{5, 7}, 108 | {-100, 100}}); 109 | 110 | Eigen::Tensor result = softmax.forward(input); 111 | 112 | Eigen::Tensor labels(inputBatchSize, 2); 113 | labels.setValues({{0, 1}, 114 | {0, 1}}); 115 | 116 | // Already has state from forward 117 | Eigen::Tensor backwardsResult = softmax.backward(labels); 118 | // TODO: Add actual tests. Test against TF/Pytorch? 119 | } 120 | 121 | BOOST_AUTO_TEST_CASE(test_net1) { 122 | std::cout << "Testing net creation" << std::endl; 123 | nn::Net net; 124 | 125 | // TODO: output of previous should match input of next. Can we auto-infer in some nice way? 126 | int batchSize = 1; 127 | net.add(new nn::Dense(batchSize, 28 * 28, 100, true)) 128 | .add(new nn::Dense(batchSize, 100, 100, true)) 129 | .add(new nn::Dense(batchSize, 100, 10, true)); 130 | 131 | Eigen::Tensor input(batchSize, 28 * 28); 132 | input.setRandom(); 133 | Eigen::Tensor result = net.forward<2, 2>(input); 134 | BOOST_REQUIRE_MESSAGE(result.dimensions()[0] == batchSize, "Result dimension 0 did not match batch size"); 135 | BOOST_REQUIRE_MESSAGE(result.dimensions()[1] == 10, "Result dimension 1 did not match last dense layer"); 136 | } 137 | 138 | BOOST_AUTO_TEST_CASE(test_net2) { 139 | std::cout << "Testing net creation" << std::endl; 140 | nn::Net<> net; 141 | 142 | int batchSize = 64; 143 | int inputX = 28; 144 | int inputY = 28; 145 | int numClasses = 10; 146 | bool useBias = true; 147 | // Basic MLP for testing MNSIT 148 | net.add(new nn::Dense<>(batchSize, inputX * inputY, 100, useBias)) 149 | .add(new nn::Relu<>()) 150 | .add(new nn::Dense<>(batchSize, 100, 100, useBias)) 151 | .add(new nn::Relu<>()) 152 | .add(new nn::Dense<>(batchSize, 100, 10, useBias)) 153 | .add(new nn::Relu<>()) 154 | .add(new nn::Softmax<>()); 155 | 156 | Eigen::Tensor input(batchSize, 28 * 28); 157 | input.setRandom(); 158 | 159 | auto startTime = std::chrono::system_clock::now(); 160 | Eigen::Tensor result = net.forward<2, 2>(input); 161 | auto endTime = std::chrono::system_clock::now(); 162 | 163 | std::chrono::duration duration = endTime - startTime; 164 | std::cout << "A single forward of size: [" << batchSize << ", 28, 28] took: " << duration.count() << "s" 165 | << std::endl; 166 | 167 | Eigen::Tensor fakeLabels(batchSize, numClasses); 168 | fakeLabels.setZero(); 169 | fakeLabels.setValues({{0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, 170 | {0, 1, 0, 0, 0, 0, 0, 0, 0, 1}}); 171 | 172 | net.backward<2>(fakeLabels); 173 | } 174 | 175 | BOOST_AUTO_TEST_CASE(test_regression) { 176 | std::cout << "Testing linear regression" << std::endl; 177 | nn::Net<> net; 178 | 179 | net.add(new nn::Dense(1, 1, 10, true)); 180 | net.add(new nn::Relu()); 181 | net.add(new nn::Dense(1, 10, 1, true)); 182 | 183 | Eigen::Tensor input(1, 1); 184 | input.setRandom(); 185 | 186 | auto startTime = std::chrono::system_clock::now(); 187 | auto result = net.forward<2, 2>(input); 188 | auto endTime = std::chrono::system_clock::now(); 189 | 190 | std::chrono::duration duration = endTime - startTime; 191 | std::cout << "Regression took: " << duration.count() << "s" << std::endl; 192 | } --------------------------------------------------------------------------------