├── src ├── loss │ ├── Losses.h │ ├── MeanSquaredError.h │ └── CrossEntropy.h ├── layers │ ├── Layers.h │ ├── Layer.h │ ├── Relu.h │ ├── Softmax.h │ └── Dense.h ├── optimizers │ ├── OptimizerImpl.h │ ├── StochasticGradientDescentImpl.h │ ├── Optimizers.h │ └── AdamImpl.h ├── utils │ └── WeightInitializers.h └── Net.h ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md └── examples ├── IrisTest.cpp └── data └── iris_data.csv /src/loss/Losses.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "CrossEntropy.h" 4 | #include "MeanSquaredError.h" 5 | 6 | -------------------------------------------------------------------------------- /src/layers/Layers.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Layer.h" 4 | #include "Dense.h" 5 | #include "Softmax.h" 6 | #include "Relu.h" 7 | 8 | -------------------------------------------------------------------------------- /src/optimizers/OptimizerImpl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace nn 7 | { 8 | template 9 | class OptimizerImpl 10 | { 11 | public: 12 | virtual Eigen::Tensor weightUpdate(const Eigen::Tensor &weights) = 0; 13 | }; 14 | } 15 | 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | # Editor generated files 35 | .idea/ 36 | .vscode/ 37 | .spyproject/ 38 | 39 | # Cmake 40 | build/ 41 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0) 2 | project(Cpp-NN CXX) 3 | include_directories(src) 4 | 5 | set(CMAKE_CXX_STANDARD 11) 6 | option(CPP_NN_BUILD_EXAMPLE "Whether to build examples" ON) 7 | 8 | file(GLOB_RECURSE nn_sources src/*.h) 9 | add_library(Cpp-NN ${nn_sources}) 10 | set_target_properties(Cpp-NN PROPERTIES LINKER_LANGUAGE CXX) 11 | target_include_directories(Cpp-NN PUBLIC src) 12 | 13 | find_package(Eigen3 REQUIRED) 14 | find_package(Boost REQUIRED) 15 | 16 | target_include_directories(Cpp-NN PUBLIC ${EIGEN3_INCLUDE_DIR}) 17 | 18 | 19 | if (CPP_NN_BUILD_EXAMPLE) 20 | 21 | add_executable(iris_test examples/IrisTest.cpp) 22 | target_link_libraries(iris_test Cpp-NN) 23 | endif() -------------------------------------------------------------------------------- /src/optimizers/StochasticGradientDescentImpl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "OptimizerImpl.h" 4 | 5 | namespace nn 6 | { 7 | namespace internal 8 | { 9 | template 10 | class StochasticGradientDescentImpl : public OptimizerImpl 11 | { 12 | public: 13 | explicit StochasticGradientDescentImpl(Dtype learningRate) : m_learningRate(learningRate) {} 14 | 15 | Eigen::Tensor weightUpdate(const Eigen::Tensor &gradWeights) 16 | { 17 | return gradWeights * gradWeights.constant(m_learningRate); 18 | }; 19 | 20 | private: 21 | Dtype m_learningRate; 22 | }; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/layers/Layer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include "optimizers/Optimizers.h" 6 | 7 | namespace nn 8 | { 9 | template 10 | class Layer 11 | { 12 | public: 13 | virtual const std::string &getName() = 0; 14 | 15 | virtual Eigen::Tensor forward(const Eigen::Tensor &input) = 0; 16 | 17 | virtual Eigen::Tensor backward(const Eigen::Tensor &output) = 0; 18 | 19 | virtual void step() = 0; 20 | 21 | virtual void registerOptimizer(std::shared_ptr> optimizer) = 0; 22 | 23 | virtual void registerOptimizer(std::shared_ptr> optimizer) = 0; 24 | }; 25 | } 26 | 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Rohith 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/optimizers/Optimizers.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "StochasticGradientDescentImpl.h" 4 | #include "AdamImpl.h" 5 | #include 6 | 7 | namespace nn 8 | { 9 | template 10 | class StochasticGradientDescent 11 | { 12 | public: 13 | explicit StochasticGradientDescent(Dtype learningRate) : m_learningRate(learningRate) {} 14 | 15 | template 16 | std::unique_ptr> createOptimizer() const 17 | { 18 | return std::unique_ptr>(new internal::StochasticGradientDescentImpl(m_learningRate)); 19 | } 20 | 21 | private: 22 | Dtype m_learningRate; 23 | }; 24 | 25 | template 26 | class Adam 27 | { 28 | public: 29 | explicit Adam(Dtype learningRate, Dtype beta1 = 0.9, Dtype beta2 = 0.999, Dtype epsilon = 1e-8) : m_learningRate(learningRate), m_beta1(beta1), m_beta2(beta2), m_epsilon(epsilon) 30 | { 31 | } 32 | 33 | template 34 | std::unique_ptr> createOptimizer() const 35 | { 36 | return std::unique_ptr>(new internal::AdamImpl(m_learningRate, m_beta1, m_beta2, m_epsilon)); 37 | }; 38 | 39 | private: 40 | Dtype m_learningRate; 41 | Dtype m_beta1; 42 | Dtype m_beta2; 43 | Dtype m_epsilon; 44 | }; 45 | 46 | } 47 | 48 | -------------------------------------------------------------------------------- /src/layers/Relu.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "layers/Layer.h" 4 | 5 | namespace nn 6 | { 7 | template 8 | class Relu : public Layer 9 | { 10 | public: 11 | Relu() = default; 12 | 13 | const std::string &getName() 14 | { 15 | const static std::string name = "Relu"; 16 | return name; 17 | } 18 | 19 | Eigen::Tensor forward(const Eigen::Tensor &input); 20 | 21 | Eigen::Tensor backward(const Eigen::Tensor &accumulatedGrad); 22 | 23 | void step() {} 24 | 25 | void registerOptimizer(std::shared_ptr> optimizer) {} 26 | 27 | void registerOptimizer(std::shared_ptr> optimizer) {} 28 | 29 | private: 30 | Eigen::Tensor m_output; 31 | }; 32 | 33 | template 34 | Eigen::Tensor Relu::forward(const Eigen::Tensor &input) 35 | { 36 | m_output = input.cwiseMax(static_cast(0)); 37 | return m_output; 38 | }; 39 | 40 | template 41 | Eigen::Tensor Relu::backward(const Eigen::Tensor &accumulatedGrad) 42 | { 43 | auto inputPositive = m_output > static_cast(0); 44 | return inputPositive.select(accumulatedGrad, accumulatedGrad.constant(0.0)); 45 | } 46 | } 47 | 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

C++ NN 🧠

2 |

A simple Neural Network library written in C++

3 | 4 | ## Installation 🚀 5 | 6 | #### Clone the repo 7 | 8 | ```sh 9 | git clone https://github.com/Rohith04MVK/Cpp-NN 10 | ``` 11 | 12 | #### Run the examples 13 | 14 | ```sh 15 | mkdir build 16 | cd build 17 | cmake -D CPP_NN_BUILD_EXAMPLE=ON .. 18 | make 19 | ``` 20 | 21 | ## The Structure of Networks 22 | ```cpp 23 | int numHiddenNodes = 20; 24 | bool useBias = true; 25 | 26 | nn::Net net; 27 | net.add(new nn::Dense<>(batchSize, numFeatures, numHiddenNodes, useBias)); 28 | net.add(new nn::Relu<>()); 29 | net.add(new nn::Dense<>(batchSize, numHiddenNodes, numHiddenNodes, useBias)); 30 | net.add(new nn::Relu<>()); 31 | net.add(new nn::Dense<>(batchSize, numHiddenNodes, numClasses, useBias)); 32 | net.add(new nn::Softmax<>()); 33 | 34 | nn::CrossEntropyLoss lossFunc; 35 | net.registerOptimizer(new nn::Adam(0.01)); 36 | ``` 37 | ## Training! 38 | ```cpp 39 | int numEpoch = 250; 40 | float loss_t, accuracy_t; 41 | for (unsigned int ii = 0; ii < numEpoch; ++ii) 42 | { 43 | auto result = net.forward<2, 2>(input); 44 | 45 | float loss = lossFunc.loss(result, labels); 46 | float accuracy = lossFunc.accuracy(result, labels); 47 | std::cout << std::setprecision(5); 48 | std::cout << "Epoch: " << ii << " loss: " << loss << " accuracy: " << accuracy << std::endl; 49 | 50 | auto lossBack = lossFunc.backward(result, labels); 51 | net.backward(lossBack); 52 | net.step(); 53 | loss_t = loss; 54 | accuracy_t = accuracy; 55 | } 56 | -------------------------------------------------------------------------------- /src/loss/MeanSquaredError.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace nn 6 | { 7 | template 8 | class MeanSquaredError 9 | { 10 | public: 11 | MeanSquaredError() = default; 12 | 13 | Dtype loss(const Eigen::Tensor &predictions, const Eigen::Tensor &labels); 14 | 15 | Eigen::Tensor 16 | backward(const Eigen::Tensor &predictions, const Eigen::Tensor &labels); 17 | }; 18 | 19 | template 20 | Dtype MeanSquaredError::loss(const Eigen::Tensor &predictions, 21 | const Eigen::Tensor &labels) 22 | { 23 | assert(predictions.dimensions()[0] == labels.dimensions()[0] && 24 | "MeanSquaredError::loss dimensions don't match"); 25 | assert(predictions.dimensions()[1] == labels.dimensions()[1] && 26 | "MeanSquaredError::loss dimensions don't match"); 27 | 28 | int batchSize = predictions.dimensions()[0]; 29 | 30 | Eigen::Tensor squaredSum = (predictions - labels).square().sum(); 31 | return squaredSum(0) / batchSize; 32 | } 33 | 34 | template 35 | Eigen::Tensor MeanSquaredError::backward(const Eigen::Tensor &predictions, 36 | const Eigen::Tensor &labels) 37 | { 38 | return predictions - labels; 39 | } 40 | } 41 | 42 | -------------------------------------------------------------------------------- /src/layers/Softmax.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "layers/Layer.h" 4 | 5 | namespace nn 6 | { 7 | template 8 | class Softmax : public Layer 9 | { 10 | public: 11 | Softmax() = default; 12 | 13 | const std::string &getName() 14 | { 15 | const static std::string name = "Softmax"; 16 | return name; 17 | } 18 | 19 | Eigen::Tensor forward(const Eigen::Tensor &input); 20 | 21 | Eigen::Tensor backward(const Eigen::Tensor &accumulatedGrad); 22 | 23 | void step() {} 24 | 25 | void registerOptimizer(std::shared_ptr> optimizer) {} 26 | 27 | void registerOptimizer(std::shared_ptr> optimizer) {} 28 | 29 | private: 30 | Eigen::Tensor m_output; 31 | }; 32 | 33 | template 34 | Eigen::Tensor Softmax::forward(const Eigen::Tensor &input) 35 | { 36 | int batchSize = input.dimensions()[0]; 37 | int classDims = input.dimensions()[1]; 38 | auto shiftedInput = input - input.maximum(Eigen::array{1}) 39 | .eval() 40 | .reshape(Eigen::array{batchSize, 1}) 41 | .broadcast(Eigen::array{1, classDims}); 42 | 43 | auto exponentiated = shiftedInput.exp(); 44 | m_output = exponentiated * exponentiated.sum(Eigen::array{1}) 45 | .inverse() 46 | .eval() 47 | .reshape(Eigen::array({batchSize, 1})) 48 | .broadcast(Eigen::array({1, classDims})); 49 | return m_output; 50 | } 51 | 52 | template 53 | Eigen::Tensor Softmax::backward(const Eigen::Tensor &accumulatedGrad) 54 | { 55 | const int batchSize = accumulatedGrad.dimensions()[0]; 56 | assert(batchSize == m_output.dimensions()[0] && "Dimensions of number of batches does not match"); 57 | return accumulatedGrad / accumulatedGrad.constant(batchSize); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/optimizers/AdamImpl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "OptimizerImpl.h" 4 | 5 | namespace nn 6 | { 7 | namespace internal 8 | { 9 | template 10 | class AdamImpl : public OptimizerImpl 11 | { 12 | public: 13 | explicit AdamImpl(Dtype learningRate, Dtype beta1, Dtype beta2, Dtype epsilon) : m_learningRate(learningRate), m_beta1(beta1), m_beta2(beta2), m_epsilon(epsilon), 14 | m_isInitialized(false), m_currentTimestep(1) 15 | { 16 | } 17 | 18 | Eigen::Tensor weightUpdate(const Eigen::Tensor &gradWeights) 19 | { 20 | if (!m_isInitialized) 21 | { 22 | m_firstMoment = Eigen::Tensor(gradWeights.dimensions()); 23 | m_firstMoment.setZero(); 24 | 25 | m_secondMoment = Eigen::Tensor(gradWeights.dimensions()); 26 | m_secondMoment.setZero(); 27 | m_isInitialized = true; 28 | } 29 | 30 | m_firstMoment = m_firstMoment.constant(m_beta1) * m_firstMoment + 31 | gradWeights.constant(1 - m_beta1) * gradWeights; 32 | 33 | m_secondMoment = m_secondMoment.constant(m_beta2) * m_secondMoment + 34 | gradWeights.constant(1 - m_beta2) * gradWeights.square(); 35 | 36 | auto biasCorrectedFirstMoment = m_firstMoment / m_firstMoment.constant(1 - pow(m_beta1, m_currentTimestep)); 37 | auto biasCorrectedSecondMoment = m_secondMoment / m_secondMoment.constant(1 - pow(m_beta2, m_currentTimestep)); 38 | 39 | m_currentTimestep++; 40 | return biasCorrectedFirstMoment * ((gradWeights.constant(m_learningRate) / 41 | (biasCorrectedSecondMoment.sqrt() + gradWeights.constant(m_epsilon)))); 42 | }; 43 | 44 | private: 45 | Dtype m_learningRate; 46 | Dtype m_beta1; 47 | Dtype m_beta2; 48 | Dtype m_epsilon; 49 | 50 | bool m_isInitialized; 51 | size_t m_currentTimestep; 52 | 53 | Eigen::Tensor m_firstMoment; 54 | Eigen::Tensor m_secondMoment; 55 | }; 56 | } 57 | } 58 | 59 | -------------------------------------------------------------------------------- /src/loss/CrossEntropy.h: -------------------------------------------------------------------------------- 1 | /** 2 | *Cross-entropy loss is used when adjusting model weights during training 3 | */ 4 | #pragma once 5 | 6 | #include 7 | 8 | namespace nn 9 | { 10 | template 11 | class CrossEntropyLoss 12 | { 13 | public: 14 | CrossEntropyLoss() = default; 15 | 16 | Dtype loss(const Eigen::Tensor &probabilities, const Eigen::Tensor &labels); 17 | 18 | Dtype accuracy(const Eigen::Tensor &probabilities, const Eigen::Tensor &labels); 19 | 20 | Eigen::Tensor 21 | backward(const Eigen::Tensor &probabilities, const Eigen::Tensor &labels); 22 | }; 23 | 24 | template 25 | Dtype CrossEntropyLoss::loss(const Eigen::Tensor &probabilities, 26 | const Eigen::Tensor &labels) 27 | { 28 | int batchSize = probabilities.dimensions()[0]; 29 | 30 | static const Dtype stabilizingVal = 0.0001; 31 | Eigen::Tensor summedLoss = (labels * 32 | (probabilities.constant(stabilizingVal) + probabilities).log()) 33 | .sum(); 34 | return (-1.0 / batchSize) * summedLoss(0); 35 | } 36 | 37 | template 38 | Dtype CrossEntropyLoss::accuracy(const Eigen::Tensor &probabilities, 39 | const Eigen::Tensor &labels) 40 | { 41 | assert(probabilities.dimensions()[0] == labels.dimensions()[0] && 42 | "CrossEntropy::accuracy dimensions did not match"); 43 | assert(probabilities.dimensions()[1] == labels.dimensions()[1] && 44 | "CrossEntropy::accuracy dimensions did not match"); 45 | 46 | auto batchSize = static_cast(labels.dimensions()[0]); 47 | 48 | Eigen::Tensor ifTensor = probabilities.argmax(1) == labels.argmax(1); 49 | Eigen::Tensor thenTensor(batchSize); 50 | auto result = ifTensor.select(thenTensor.constant(1.0), thenTensor.constant(0)); 51 | Eigen::Tensor count = result.sum(); 52 | return static_cast(count(0)) / batchSize; 53 | } 54 | 55 | template 56 | Eigen::Tensor CrossEntropyLoss::backward(const Eigen::Tensor &probabilities, 57 | const Eigen::Tensor &labels) 58 | { 59 | return probabilities - labels; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/utils/WeightInitializers.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace nn 8 | { 9 | enum class InitializationScheme 10 | { 11 | GlorotUniform, 12 | GlorotNormal 13 | }; 14 | 15 | template 16 | class WeightDistribution 17 | { 18 | public: 19 | explicit WeightDistribution(InitializationScheme scheme, int fanIn, int fanOut) : m_scheme(scheme), 20 | m_randomNumberGenerator(std::random_device()()) 21 | { 22 | if (m_scheme == InitializationScheme::GlorotUniform) 23 | { 24 | Dtype limit = std::sqrt(6.0 / (fanIn + fanOut)); 25 | m_uniformDist.reset(new std::uniform_real_distribution(-limit, limit)); 26 | } 27 | else if (m_scheme == InitializationScheme::GlorotNormal) 28 | { 29 | Dtype std = std::sqrt(2.0 / (fanIn + fanOut)); 30 | m_normalDist.reset(new std::normal_distribution(0, std)); 31 | } 32 | } 33 | 34 | Dtype get() 35 | { 36 | if (m_scheme == InitializationScheme::GlorotUniform) 37 | { 38 | return (*m_uniformDist)(m_randomNumberGenerator); 39 | } 40 | else if (m_scheme == InitializationScheme::GlorotNormal) 41 | { 42 | return (*m_normalDist)(m_randomNumberGenerator); 43 | } 44 | else 45 | { 46 | std::cerr << "Tried to draw from distribution that is uninitialized" << std::endl; 47 | exit(-1); 48 | } 49 | } 50 | 51 | private: 52 | InitializationScheme m_scheme; 53 | std::mt19937 m_randomNumberGenerator; 54 | std::unique_ptr> m_uniformDist; 55 | std::unique_ptr> m_normalDist; 56 | }; 57 | 58 | template 59 | Eigen::Tensor getRandomWeights(int inputDimensions, int outputDimensions, 60 | InitializationScheme scheme = InitializationScheme::GlorotUniform) 61 | { 62 | Eigen::Tensor weights(inputDimensions, outputDimensions); 63 | weights.setZero(); 64 | 65 | auto distribution = WeightDistribution(scheme, inputDimensions, outputDimensions); 66 | for (unsigned int ii = 0; ii < inputDimensions; ++ii) 67 | { 68 | for (unsigned int jj = 0; jj < outputDimensions; ++jj) 69 | { 70 | weights(ii, jj) = distribution.get(); 71 | } 72 | } 73 | return weights; 74 | }; 75 | } 76 | 77 | -------------------------------------------------------------------------------- /src/Net.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "layers/Layers.h" 4 | #include "loss/Losses.h" 5 | #include "optimizers/Optimizers.h" 6 | 7 | #include 8 | #include 9 | 10 | namespace nn 11 | { 12 | 13 | template 14 | class Net 15 | { 16 | public: 17 | Net() = default; 18 | 19 | template 20 | Eigen::Tensor forward(Eigen::Tensor input) 21 | { 22 | if (m_layers.empty()) 23 | { 24 | std::cerr << "No layers specified" << std::endl; 25 | return {}; 26 | } 27 | 28 | auto currentInput = input; 29 | for (const auto &layer : m_layers) 30 | { 31 | currentInput = layer->forward(currentInput); 32 | } 33 | return currentInput; 34 | } 35 | 36 | template 37 | void backward(Eigen::Tensor input) 38 | { 39 | if (!m_hasOptimizer) 40 | { 41 | std::cerr << "No registered optimizer" << std::endl; 42 | return; 43 | } 44 | 45 | if (m_layers.empty()) 46 | { 47 | std::cerr << "No layers specified" << std::endl; 48 | return; 49 | } 50 | 51 | auto accumulatedGrad = input; 52 | for (auto rit = m_layers.rbegin(); rit != m_layers.rend(); ++rit) 53 | { 54 | accumulatedGrad = (*rit)->backward(accumulatedGrad); 55 | } 56 | } 57 | 58 | void registerOptimizer(nn::StochasticGradientDescent *optimizer) 59 | { 60 | m_hasOptimizer = true; 61 | std::shared_ptr> optimizerPtr(optimizer); 62 | for (auto &layer : m_layers) 63 | { 64 | layer->registerOptimizer(optimizerPtr); 65 | } 66 | } 67 | 68 | void registerOptimizer(nn::Adam *optimizer) 69 | { 70 | m_hasOptimizer = true; 71 | std::shared_ptr> optimizerPtr(optimizer); 72 | for (auto &layer : m_layers) 73 | { 74 | layer->registerOptimizer(optimizerPtr); 75 | } 76 | } 77 | 78 | void step() 79 | { 80 | for (auto &layer : m_layers) 81 | { 82 | layer->step(); 83 | } 84 | } 85 | 86 | template 87 | Net &add(std::unique_ptr> layer) 88 | { 89 | m_layers.push_back(layer); 90 | return *this; 91 | } 92 | 93 | template 94 | Net &add(Dense *denseLayer) 95 | { 96 | // Do shape checks here 97 | m_layers.push_back(std::unique_ptr>(denseLayer)); 98 | return *this; 99 | } 100 | 101 | template 102 | Net &add(Relu *reluLayer) 103 | { 104 | m_layers.push_back(std::unique_ptr>(reluLayer)); 105 | return *this; 106 | } 107 | 108 | template 109 | Net &add(Softmax *softmaxLayer) 110 | { 111 | m_layers.push_back(std::unique_ptr>(softmaxLayer)); 112 | return *this; 113 | } 114 | 115 | private: 116 | std::vector>> m_layers; 117 | bool m_hasOptimizer; 118 | }; 119 | } 120 | 121 | -------------------------------------------------------------------------------- /examples/IrisTest.cpp: -------------------------------------------------------------------------------- 1 | #include "../src/Net.h" 2 | #include "../src/loss/CrossEntropy.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | const std::map IRIS_TYPE_TO_INT{ 9 | {"Iris-setosa", 0}, 10 | {"Iris-versicolor", 1}, 11 | {"Iris-virginica", 2}}; 12 | 13 | struct IrisDataset 14 | { 15 | std::vector> data; 16 | std::vector labels; 17 | }; 18 | 19 | IrisDataset loadIrisDataset(const std::string &path = "../examples/data/iris_data.csv") 20 | { 21 | IrisDataset dataset; 22 | 23 | std::ifstream irisFile(path); 24 | std::string line; 25 | while (std::getline(irisFile, line, '\n')) 26 | { 27 | std::vector values; 28 | 29 | boost::split(values, line, [](char c) 30 | { return c == ','; }); 31 | 32 | if (values.size() < 5) 33 | { 34 | std::cout << "Found line with less than five elements, skipping" << std::endl; 35 | continue; 36 | } 37 | 38 | float sepalLength = std::stof(values[0]); 39 | float sepalWidth = std::stof(values[1]); 40 | float petalLength = std::stof(values[2]); 41 | float petalWidth = std::stof(values[3]); 42 | std::string labelName = values[4]; 43 | 44 | auto labelIter = IRIS_TYPE_TO_INT.find(labelName); 45 | 46 | if (labelIter == IRIS_TYPE_TO_INT.end()) 47 | { 48 | std::cerr << "Unknown Iris type of: " << labelName << " please check dataset." << std::endl; 49 | exit(-1); 50 | } 51 | int labelInt = labelIter->second; 52 | dataset.data.push_back({sepalLength, sepalWidth, petalLength, petalWidth}); 53 | dataset.labels.push_back(labelInt); 54 | } 55 | 56 | return dataset; 57 | } 58 | 59 | int main() 60 | { 61 | auto dataset = loadIrisDataset(); 62 | 63 | int batchSize = dataset.labels.size(); 64 | int numFeatures = dataset.data[0].size(); 65 | int numClasses = *std::max_element(dataset.labels.begin(), dataset.labels.end()) + 1; 66 | 67 | Eigen::Tensor input(batchSize, numFeatures); 68 | Eigen::Tensor labels(batchSize, numClasses); 69 | input.setZero(); 70 | labels.setZero(); 71 | 72 | for (unsigned int ii = 0; ii < batchSize; ++ii) 73 | { 74 | for (unsigned int feature = 0; feature < numFeatures; ++feature) 75 | { 76 | input(ii, feature) = dataset.data[ii][feature]; 77 | } 78 | 79 | labels(ii, dataset.labels[ii]) = 1.0; 80 | } 81 | 82 | int numHiddenNodes = 20; 83 | bool useBias = true; 84 | 85 | nn::Net net; 86 | net.add(new nn::Dense<>(batchSize, numFeatures, numHiddenNodes, useBias)); 87 | net.add(new nn::Relu<>()); 88 | net.add(new nn::Dense<>(batchSize, numHiddenNodes, numHiddenNodes, useBias)); 89 | net.add(new nn::Relu<>()); 90 | net.add(new nn::Dense<>(batchSize, numHiddenNodes, numClasses, useBias)); 91 | net.add(new nn::Softmax<>()); 92 | 93 | nn::CrossEntropyLoss lossFunc; 94 | net.registerOptimizer(new nn::Adam(0.01)); 95 | 96 | int numEpoch = 250; 97 | float loss_t, accuracy_t; 98 | for (unsigned int ii = 0; ii < numEpoch; ++ii) 99 | { 100 | auto result = net.forward<2, 2>(input); 101 | 102 | float loss = lossFunc.loss(result, labels); 103 | float accuracy = lossFunc.accuracy(result, labels); 104 | std::cout << std::setprecision(5); 105 | std::cout << "Epoch: " << ii << " loss: " << loss << " accuracy: " << accuracy << std::endl; 106 | 107 | auto lossBack = lossFunc.backward(result, labels); 108 | net.backward(lossBack); 109 | net.step(); 110 | loss_t = loss; 111 | accuracy_t = accuracy; 112 | } 113 | std::cout << "Final Loss: " << loss_t << std::endl; 114 | std::cout << "Final Accuracy: " << accuracy_t << std::endl; 115 | 116 | return 0; 117 | } 118 | -------------------------------------------------------------------------------- /examples/data/iris_data.csv: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 4.7,3.2,1.3,0.2,Iris-setosa 4 | 4.6,3.1,1.5,0.2,Iris-setosa 5 | 5.0,3.6,1.4,0.2,Iris-setosa 6 | 5.4,3.9,1.7,0.4,Iris-setosa 7 | 4.6,3.4,1.4,0.3,Iris-setosa 8 | 5.0,3.4,1.5,0.2,Iris-setosa 9 | 4.4,2.9,1.4,0.2,Iris-setosa 10 | 4.9,3.1,1.5,0.1,Iris-setosa 11 | 5.4,3.7,1.5,0.2,Iris-setosa 12 | 4.8,3.4,1.6,0.2,Iris-setosa 13 | 4.8,3.0,1.4,0.1,Iris-setosa 14 | 4.3,3.0,1.1,0.1,Iris-setosa 15 | 5.8,4.0,1.2,0.2,Iris-setosa 16 | 5.7,4.4,1.5,0.4,Iris-setosa 17 | 5.4,3.9,1.3,0.4,Iris-setosa 18 | 5.1,3.5,1.4,0.3,Iris-setosa 19 | 5.7,3.8,1.7,0.3,Iris-setosa 20 | 5.1,3.8,1.5,0.3,Iris-setosa 21 | 5.4,3.4,1.7,0.2,Iris-setosa 22 | 5.1,3.7,1.5,0.4,Iris-setosa 23 | 4.6,3.6,1.0,0.2,Iris-setosa 24 | 5.1,3.3,1.7,0.5,Iris-setosa 25 | 4.8,3.4,1.9,0.2,Iris-setosa 26 | 5.0,3.0,1.6,0.2,Iris-setosa 27 | 5.0,3.4,1.6,0.4,Iris-setosa 28 | 5.2,3.5,1.5,0.2,Iris-setosa 29 | 5.2,3.4,1.4,0.2,Iris-setosa 30 | 4.7,3.2,1.6,0.2,Iris-setosa 31 | 4.8,3.1,1.6,0.2,Iris-setosa 32 | 5.4,3.4,1.5,0.4,Iris-setosa 33 | 5.2,4.1,1.5,0.1,Iris-setosa 34 | 5.5,4.2,1.4,0.2,Iris-setosa 35 | 4.9,3.1,1.5,0.1,Iris-setosa 36 | 5.0,3.2,1.2,0.2,Iris-setosa 37 | 5.5,3.5,1.3,0.2,Iris-setosa 38 | 4.9,3.1,1.5,0.1,Iris-setosa 39 | 4.4,3.0,1.3,0.2,Iris-setosa 40 | 5.1,3.4,1.5,0.2,Iris-setosa 41 | 5.0,3.5,1.3,0.3,Iris-setosa 42 | 4.5,2.3,1.3,0.3,Iris-setosa 43 | 4.4,3.2,1.3,0.2,Iris-setosa 44 | 5.0,3.5,1.6,0.6,Iris-setosa 45 | 5.1,3.8,1.9,0.4,Iris-setosa 46 | 4.8,3.0,1.4,0.3,Iris-setosa 47 | 5.1,3.8,1.6,0.2,Iris-setosa 48 | 4.6,3.2,1.4,0.2,Iris-setosa 49 | 5.3,3.7,1.5,0.2,Iris-setosa 50 | 5.0,3.3,1.4,0.2,Iris-setosa 51 | 7.0,3.2,4.7,1.4,Iris-versicolor 52 | 6.4,3.2,4.5,1.5,Iris-versicolor 53 | 6.9,3.1,4.9,1.5,Iris-versicolor 54 | 5.5,2.3,4.0,1.3,Iris-versicolor 55 | 6.5,2.8,4.6,1.5,Iris-versicolor 56 | 5.7,2.8,4.5,1.3,Iris-versicolor 57 | 6.3,3.3,4.7,1.6,Iris-versicolor 58 | 4.9,2.4,3.3,1.0,Iris-versicolor 59 | 6.6,2.9,4.6,1.3,Iris-versicolor 60 | 5.2,2.7,3.9,1.4,Iris-versicolor 61 | 5.0,2.0,3.5,1.0,Iris-versicolor 62 | 5.9,3.0,4.2,1.5,Iris-versicolor 63 | 6.0,2.2,4.0,1.0,Iris-versicolor 64 | 6.1,2.9,4.7,1.4,Iris-versicolor 65 | 5.6,2.9,3.6,1.3,Iris-versicolor 66 | 6.7,3.1,4.4,1.4,Iris-versicolor 67 | 5.6,3.0,4.5,1.5,Iris-versicolor 68 | 5.8,2.7,4.1,1.0,Iris-versicolor 69 | 6.2,2.2,4.5,1.5,Iris-versicolor 70 | 5.6,2.5,3.9,1.1,Iris-versicolor 71 | 5.9,3.2,4.8,1.8,Iris-versicolor 72 | 6.1,2.8,4.0,1.3,Iris-versicolor 73 | 6.3,2.5,4.9,1.5,Iris-versicolor 74 | 6.1,2.8,4.7,1.2,Iris-versicolor 75 | 6.4,2.9,4.3,1.3,Iris-versicolor 76 | 6.6,3.0,4.4,1.4,Iris-versicolor 77 | 6.8,2.8,4.8,1.4,Iris-versicolor 78 | 6.7,3.0,5.0,1.7,Iris-versicolor 79 | 6.0,2.9,4.5,1.5,Iris-versicolor 80 | 5.7,2.6,3.5,1.0,Iris-versicolor 81 | 5.5,2.4,3.8,1.1,Iris-versicolor 82 | 5.5,2.4,3.7,1.0,Iris-versicolor 83 | 5.8,2.7,3.9,1.2,Iris-versicolor 84 | 6.0,2.7,5.1,1.6,Iris-versicolor 85 | 5.4,3.0,4.5,1.5,Iris-versicolor 86 | 6.0,3.4,4.5,1.6,Iris-versicolor 87 | 6.7,3.1,4.7,1.5,Iris-versicolor 88 | 6.3,2.3,4.4,1.3,Iris-versicolor 89 | 5.6,3.0,4.1,1.3,Iris-versicolor 90 | 5.5,2.5,4.0,1.3,Iris-versicolor 91 | 5.5,2.6,4.4,1.2,Iris-versicolor 92 | 6.1,3.0,4.6,1.4,Iris-versicolor 93 | 5.8,2.6,4.0,1.2,Iris-versicolor 94 | 5.0,2.3,3.3,1.0,Iris-versicolor 95 | 5.6,2.7,4.2,1.3,Iris-versicolor 96 | 5.7,3.0,4.2,1.2,Iris-versicolor 97 | 5.7,2.9,4.2,1.3,Iris-versicolor 98 | 6.2,2.9,4.3,1.3,Iris-versicolor 99 | 5.1,2.5,3.0,1.1,Iris-versicolor 100 | 5.7,2.8,4.1,1.3,Iris-versicolor 101 | 6.3,3.3,6.0,2.5,Iris-virginica 102 | 5.8,2.7,5.1,1.9,Iris-virginica 103 | 7.1,3.0,5.9,2.1,Iris-virginica 104 | 6.3,2.9,5.6,1.8,Iris-virginica 105 | 6.5,3.0,5.8,2.2,Iris-virginica 106 | 7.6,3.0,6.6,2.1,Iris-virginica 107 | 4.9,2.5,4.5,1.7,Iris-virginica 108 | 7.3,2.9,6.3,1.8,Iris-virginica 109 | 6.7,2.5,5.8,1.8,Iris-virginica 110 | 7.2,3.6,6.1,2.5,Iris-virginica 111 | 6.5,3.2,5.1,2.0,Iris-virginica 112 | 6.4,2.7,5.3,1.9,Iris-virginica 113 | 6.8,3.0,5.5,2.1,Iris-virginica 114 | 5.7,2.5,5.0,2.0,Iris-virginica 115 | 5.8,2.8,5.1,2.4,Iris-virginica 116 | 6.4,3.2,5.3,2.3,Iris-virginica 117 | 6.5,3.0,5.5,1.8,Iris-virginica 118 | 7.7,3.8,6.7,2.2,Iris-virginica 119 | 7.7,2.6,6.9,2.3,Iris-virginica 120 | 6.0,2.2,5.0,1.5,Iris-virginica 121 | 6.9,3.2,5.7,2.3,Iris-virginica 122 | 5.6,2.8,4.9,2.0,Iris-virginica 123 | 7.7,2.8,6.7,2.0,Iris-virginica 124 | 6.3,2.7,4.9,1.8,Iris-virginica 125 | 6.7,3.3,5.7,2.1,Iris-virginica 126 | 7.2,3.2,6.0,1.8,Iris-virginica 127 | 6.2,2.8,4.8,1.8,Iris-virginica 128 | 6.1,3.0,4.9,1.8,Iris-virginica 129 | 6.4,2.8,5.6,2.1,Iris-virginica 130 | 7.2,3.0,5.8,1.6,Iris-virginica 131 | 7.4,2.8,6.1,1.9,Iris-virginica 132 | 7.9,3.8,6.4,2.0,Iris-virginica 133 | 6.4,2.8,5.6,2.2,Iris-virginica 134 | 6.3,2.8,5.1,1.5,Iris-virginica 135 | 6.1,2.6,5.6,1.4,Iris-virginica 136 | 7.7,3.0,6.1,2.3,Iris-virginica 137 | 6.3,3.4,5.6,2.4,Iris-virginica 138 | 6.4,3.1,5.5,1.8,Iris-virginica 139 | 6.0,3.0,4.8,1.8,Iris-virginica 140 | 6.9,3.1,5.4,2.1,Iris-virginica 141 | 6.7,3.1,5.6,2.4,Iris-virginica 142 | 6.9,3.1,5.1,2.3,Iris-virginica 143 | 5.8,2.7,5.1,1.9,Iris-virginica 144 | 6.8,3.2,5.9,2.3,Iris-virginica 145 | 6.7,3.3,5.7,2.5,Iris-virginica 146 | 6.7,3.0,5.2,2.3,Iris-virginica 147 | 6.3,2.5,5.0,1.9,Iris-virginica 148 | 6.5,3.0,5.2,2.0,Iris-virginica 149 | 6.2,3.4,5.4,2.3,Iris-virginica 150 | 5.9,3.0,5.1,1.8,Iris-virginica 151 | 152 | -------------------------------------------------------------------------------- /src/layers/Dense.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "layers/Layer.h" 4 | #include "utils/WeightInitializers.h" 5 | 6 | namespace nn 7 | { 8 | 9 | template 10 | class Dense : public Layer 11 | { 12 | public: 13 | explicit Dense(int batchSize, int inputDimension, int outputDimension, bool useBias, 14 | InitializationScheme weightInitializer = InitializationScheme::GlorotUniform); 15 | 16 | const std::string &getName() 17 | { 18 | const static std::string name = "Dense"; 19 | return name; 20 | } 21 | 22 | Eigen::Tensor forward(const Eigen::Tensor &input); 23 | 24 | Eigen::Tensor backward(const Eigen::Tensor &accumulatedGrad); 25 | 26 | Eigen::array getOutputShape() 27 | { 28 | return m_outputShape; 29 | }; 30 | 31 | void step(); 32 | 33 | void registerOptimizer(std::shared_ptr> optimizer); 34 | 35 | void registerOptimizer(std::shared_ptr> optimizer); 36 | 37 | private: 38 | Eigen::array m_outputShape; ///< The output shape of this layer 39 | Eigen::Tensor m_inputCache; ///< Cache the input to calculate gradient 40 | Eigen::Tensor m_weights; ///< Our weights of the layer 41 | Eigen::Tensor m_bias; ///< The bias weights if specified 42 | 43 | // Gradients 44 | Eigen::Tensor m_weightsGrad; ///< The gradient of the weights 45 | Eigen::Tensor m_biasGrad; ///< The gradient of the bias 46 | std::unique_ptr> m_weightOptimizer; ///< The optimizer of our weights 47 | std::unique_ptr> m_biasOptimizer; ///< The optimizer of our bias 48 | 49 | bool m_useBias; ///< Whether we use the bias 50 | }; 51 | 52 | template 53 | Dense::Dense(int batchSize, int inputDimension, int outputDimension, bool useBias, 54 | InitializationScheme weightInitializer) : m_outputShape({batchSize, outputDimension}), 55 | m_useBias(useBias) 56 | { 57 | m_weights = getRandomWeights(inputDimension, outputDimension, weightInitializer); 58 | 59 | m_weightsGrad = Eigen::Tensor(inputDimension, outputDimension); 60 | m_weightsGrad.setZero(); 61 | 62 | if (useBias) 63 | { 64 | m_bias = getRandomWeights(1, outputDimension, weightInitializer); 65 | 66 | m_biasGrad = Eigen::Tensor(1, outputDimension); 67 | m_biasGrad.setZero(); 68 | } 69 | }; 70 | 71 | template 72 | Eigen::Tensor Dense::forward(const Eigen::Tensor &input) 73 | { 74 | assert(input.dimensions()[1] == m_weights.dimensions()[0] && 75 | "Dense::forward dimensions of input and weights do not match"); 76 | m_inputCache = input; 77 | 78 | Eigen::array, 1> productDims = {Eigen::IndexPair(1, 0)}; 79 | auto result = input.contract(m_weights, productDims); 80 | 81 | if (m_useBias) 82 | { 83 | return result + m_bias.broadcast(Eigen::array{input.dimensions()[0], 1}); 84 | } 85 | else 86 | { 87 | return result; 88 | } 89 | } 90 | 91 | template 92 | Eigen::Tensor Dense::backward(const Eigen::Tensor &accumulatedGrad) 93 | { 94 | assert(accumulatedGrad.dimensions()[0] == m_inputCache.dimensions()[0] && 95 | "Dense::backward dimensions of accumulatedGrad and inputCache do not match"); 96 | // m_inputCache is of shape (batchSize, inputDimension) 97 | // accumulatedGrad is of shape (batchSize, outputDimension) 98 | // So we want to contract along dimensions (0, 0), aka m_inputCache.T * accumulatedGrad 99 | // Where dimensions would be (inputDimension, batchSize) * (batchSize, outputDimension) 100 | static const Eigen::array, 1> transposeInput = {Eigen::IndexPair(0, 0)}; 101 | 102 | m_weightsGrad = m_inputCache.contract(accumulatedGrad, transposeInput); 103 | if (m_useBias) 104 | { 105 | m_biasGrad = accumulatedGrad.sum(Eigen::array{0}).eval().reshape(Eigen::array{1, m_outputShape[1]}); 106 | } 107 | 108 | static const Eigen::array, 1> transposeWeights = {Eigen::IndexPair(1, 1)}; 109 | return accumulatedGrad.contract(m_weights, transposeWeights); 110 | } 111 | 112 | template 113 | void Dense::step() 114 | { 115 | m_weights -= m_weightOptimizer->weightUpdate(m_weightsGrad); 116 | 117 | if (m_useBias) 118 | { 119 | m_bias -= m_biasOptimizer->weightUpdate(m_biasGrad); 120 | } 121 | } 122 | 123 | template 124 | void Dense::registerOptimizer(std::shared_ptr> optimizer) 125 | { 126 | m_weightOptimizer = std::move(optimizer->template createOptimizer()); 127 | 128 | if (m_useBias) 129 | { 130 | m_biasOptimizer = std::move(optimizer->template createOptimizer()); 131 | } 132 | } 133 | 134 | template 135 | void Dense::registerOptimizer(std::shared_ptr> optimizer) 136 | { 137 | m_weightOptimizer = std::move(optimizer->template createOptimizer()); 138 | 139 | if (m_useBias) 140 | { 141 | m_biasOptimizer = std::move(optimizer->template createOptimizer()); 142 | } 143 | } 144 | } 145 | --------------------------------------------------------------------------------