├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── get_mnist.sh ├── include ├── Conv2d.h ├── Dropout.h ├── FullyConnected.h ├── LRScheduler.h ├── LinearLRScheduler.h ├── MNISTDataLoader.h ├── MaxPool.h ├── Module.h ├── NetworkModel.h ├── OutputLayer.h ├── ReLU.h ├── Sigmoid.h ├── SoftmaxClassifier.h └── Tensor.h └── src ├── Conv2d.cpp ├── Dropout.cpp ├── FullyConnected.cpp ├── LinearLRScheduler.cpp ├── MNISTDataLoader.cpp ├── MaxPool.cpp ├── NetworkModel.cpp ├── ReLU.cpp ├── Sigmoid.cpp ├── SoftmaxClassifier.cpp ├── Tensor.cpp └── main.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | cmake-build-debug/ 3 | cmake-build-release/ 4 | CMakeFiles/ 5 | data/ 6 | Makefile 7 | cmake_install.cmake 8 | CMakeCache.txt 9 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(neural_net_in_cpp) 3 | 4 | set(CMAKE_CXX_STANDARD 11) 5 | #set (CMAKE_CXX_FLAGS "-pg") -- for profiling 6 | 7 | add_executable(neural_net_in_cpp src/main.cpp src/NetworkModel.cpp src/MNISTDataLoader.cpp src/Tensor.cpp src/FullyConnected.cpp src/Sigmoid.cpp src/SoftmaxClassifier.cpp src/Dropout.cpp src/ReLU.cpp src/Conv2d.cpp src/MaxPool.cpp src/LinearLRScheduler.cpp) 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Lucas Tabelini Torres 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Neural Network in Pure C++ 2 | 3 | Simple modular implementation of a neural network in C++ using only the STL. 4 | 5 | ### Installation 6 | Get the MNIST data set: 7 | 8 | ```sh 9 | bash get_mnist.sh 10 | ``` 11 | Generate your Makefile: 12 | ```sh 13 | cmake -DCMAKE_BUILD_TYPE=Release 14 | ``` 15 | Make the code: 16 | ```sh 17 | make 18 | ``` 19 | Run: 20 | ```sh 21 | ./neural_net_in_cpp data 22 | ``` 23 | The training should take about a minute and achieve ~97% accuracy. 24 | 25 | ### Todos 26 | - [x] Fully connected; 27 | - [x] Sigmoid; 28 | - [x] Dropout; 29 | - [x] ReLU; 30 | - [ ] Tanh; 31 | - [ ] Leaky ReLU; 32 | - [ ] Batch normalization; 33 | - [x] Convolutional layers; 34 | - [x] Max pooling; 35 | - [ ] Other optimizers (Adam, RMSProp, etc); 36 | - [x] Learning rate scheduler; 37 | - [ ] Plots; 38 | - [ ] Filter visualization 39 | - [ ] CUDA? 40 | 41 | License 42 | ---- 43 | 44 | MIT 45 | -------------------------------------------------------------------------------- /get_mnist.sh: -------------------------------------------------------------------------------- 1 | echo "Downloading MNIST data set..." 2 | mkdir data 3 | 4 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz 5 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz 6 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz 7 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz 8 | 9 | echo "Unpacking..." 10 | gunzip train-images-idx3-ubyte.gz && mv train-images-idx3-ubyte data/ 11 | gunzip train-labels-idx1-ubyte.gz && mv train-labels-idx1-ubyte data/ 12 | gunzip t10k-images-idx3-ubyte.gz && mv t10k-images-idx3-ubyte data/ 13 | gunzip t10k-labels-idx1-ubyte.gz && mv t10k-labels-idx1-ubyte data/ 14 | 15 | 16 | -------------------------------------------------------------------------------- /include/Conv2d.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 14/04/19. 3 | // 4 | 5 | #ifndef NEURAL_NET_IN_CPP_CONV2D_H 6 | #define NEURAL_NET_IN_CPP_CONV2D_H 7 | 8 | #include "Module.h" 9 | 10 | class Conv2d : public Module { 11 | private: 12 | Tensor input_; 13 | Tensor product_; 14 | int stride, padding; 15 | public: 16 | Tensor kernels; 17 | Tensor bias; 18 | 19 | Conv2d(int in_channels, int out_channels, int kernel_size, int stride, int padding, int seed = 0); 20 | 21 | Tensor &forward(Tensor &input) override; 22 | 23 | Tensor backprop(Tensor chain_gradient, double learning_rate) override; 24 | 25 | void load(FILE *file_model) override; 26 | 27 | void save(FILE *file_model) override; 28 | }; 29 | 30 | 31 | #endif //NEURAL_NET_IN_CPP_CONV2D_H 32 | -------------------------------------------------------------------------------- /include/Dropout.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 11/04/19. 3 | // 4 | 5 | #ifndef NEURAL_NET_IN_CPP_DROPOUT_H 6 | #define NEURAL_NET_IN_CPP_DROPOUT_H 7 | 8 | 9 | #include "Module.h" 10 | 11 | class Dropout : public Module { 12 | private: 13 | double p_; 14 | int seed_; 15 | Tensor product_; 16 | Tensor dropout_; 17 | public: 18 | explicit Dropout(double p = 0.5, int seed = 0); 19 | 20 | Tensor &forward(Tensor &input) override; 21 | 22 | Tensor backprop(Tensor chainGradient, double learning_rate) override; 23 | 24 | void load(FILE *file_model) override; 25 | 26 | void save(FILE *file_model) override; 27 | }; 28 | 29 | 30 | #endif //NEURAL_NET_IN_CPP_DROPOUT_H 31 | -------------------------------------------------------------------------------- /include/FullyConnected.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 10/04/19. 3 | // 4 | 5 | #ifndef NEURAL_NET_IN_CPP_FULLYCONNECTED_H 6 | #define NEURAL_NET_IN_CPP_FULLYCONNECTED_H 7 | 8 | #include "Module.h" 9 | #include "Tensor.h" 10 | 11 | /* 12 | * Fully Connected layer 13 | * Output: Mx + b 14 | */ 15 | class FullyConnected : public Module { 16 | private: 17 | Tensor weights; 18 | Tensor bias; 19 | Tensor input_; 20 | Tensor product_; 21 | int input_dims[4]; 22 | int input_num_dims; 23 | public: 24 | FullyConnected(int input_size, int output_size, int seed = 0); 25 | 26 | Tensor &forward(Tensor &input) override; 27 | 28 | Tensor backprop(Tensor chainGradient, double learning_rate) override; 29 | 30 | void load(FILE *file_model) override; 31 | 32 | void save(FILE *file_model) override; 33 | }; 34 | 35 | 36 | #endif //NEURAL_NET_IN_CPP_FULLYCONNECTED_H 37 | -------------------------------------------------------------------------------- /include/LRScheduler.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 18/04/19. 3 | // 4 | 5 | #ifndef NEURAL_NET_IN_CPP_LRSCHEDULER_H 6 | #define NEURAL_NET_IN_CPP_LRSCHEDULER_H 7 | 8 | 9 | class LRScheduler { 10 | public: 11 | double learning_rate; 12 | virtual void onIterationEnd(int iteration) = 0; 13 | }; 14 | 15 | 16 | #endif //NEURAL_NET_IN_CPP_LRSCHEDULER_H 17 | -------------------------------------------------------------------------------- /include/LinearLRScheduler.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 18/04/19. 3 | // 4 | 5 | #ifndef NEURAL_NET_IN_CPP_LINEARLRSCHEDULER_H 6 | #define NEURAL_NET_IN_CPP_LINEARLRSCHEDULER_H 7 | 8 | #include "LRScheduler.h" 9 | 10 | class LinearLRScheduler : public LRScheduler { 11 | public: 12 | double step; 13 | LinearLRScheduler(double initial_lr, double step); 14 | void onIterationEnd(int iteration) override; 15 | }; 16 | 17 | 18 | #endif //NEURAL_NET_IN_CPP_LINEARLRSCHEDULER_H 19 | -------------------------------------------------------------------------------- /include/MNISTDataLoader.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 05/04/19. 3 | // 4 | 5 | #ifndef NEURAL_NET_IN_CPP_MNISTDATALOADER_H 6 | #define NEURAL_NET_IN_CPP_MNISTDATALOADER_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "Tensor.h" 15 | 16 | /* 17 | * Utility to read MNIST data. 18 | */ 19 | 20 | class MNISTDataLoader { 21 | private: 22 | std::vector > > images_; 23 | std::vector labels_; 24 | 25 | unsigned int batch_idx_ = 0; 26 | unsigned int batch_size_; 27 | unsigned int rows_ = 28, cols_ = 28, num_images_ = 0; 28 | 29 | /* 30 | * Loads MNIST's labels 31 | */ 32 | void loadLabels(std::string const &path); 33 | 34 | /* 35 | * Converts an array of 4 bytes to an unsigned int 36 | */ 37 | unsigned int bytesToUInt(const char *bytes); 38 | 39 | /* 40 | * Loads MNIST's image set 41 | */ 42 | void loadImages(std::string const &path); 43 | 44 | public: 45 | MNISTDataLoader(std::string const &imagesPath, std::string const &labelsPath, unsigned int batch_size); 46 | 47 | /* 48 | * Get the number of batches in the data set. 49 | */ 50 | int getNumBatches(); 51 | 52 | // void printImage(int idx); 53 | 54 | /* 55 | * Get next batch. Last batch of the dataset may not have the same size of the others. 56 | * Is cyclical, so it can be used indefinitely. 57 | */ 58 | std::pair, std::vector> nextBatch(); 59 | }; 60 | 61 | #endif //NEURAL_NET_IN_CPP_MNISTDATALOADER_H 62 | -------------------------------------------------------------------------------- /include/MaxPool.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by tabelini on 18/04/19. 3 | // 4 | 5 | #ifndef NEURAL_NET_IN_CPP_MAXPOOL_H 6 | #define NEURAL_NET_IN_CPP_MAXPOOL_H 7 | 8 | 9 | #include "Module.h" 10 | 11 | class MaxPool : public Module { 12 | private: 13 | Tensor output_; 14 | Tensor input_; 15 | Tensor indexes; 16 | int stride_, size_; 17 | public: 18 | explicit MaxPool(int size, int stride); 19 | 20 | Tensor &forward(Tensor &input) override; 21 | 22 | Tensor backprop(Tensor chainGradient, double learning_rate) override; 23 | 24 | void load(FILE *file_model) override; 25 | 26 | void save(FILE *file_model) override; 27 | }; 28 | 29 | 30 | #endif //NEURAL_NET_IN_CPP_MAXPOOL_H 31 | -------------------------------------------------------------------------------- /include/Module.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 10/04/19. 3 | // 4 | 5 | #ifndef NEURAL_NET_IN_CPP_MODULE_H 6 | #define NEURAL_NET_IN_CPP_MODULE_H 7 | 8 | #include "Tensor.h" 9 | 10 | /* 11 | * Interface to be used as a building block for models 12 | */ 13 | class Module { 14 | protected: 15 | bool isEval = false; 16 | public: 17 | virtual Tensor &forward(Tensor &input) = 0; 18 | 19 | virtual Tensor backprop(Tensor chainGradient, double learning_rate) = 0; 20 | 21 | virtual void load(FILE *file_model) = 0; 22 | 23 | virtual void save(FILE *file_model) = 0; 24 | 25 | void train(); 26 | 27 | void eval(); 28 | 29 | virtual ~Module() = default; 30 | }; 31 | 32 | inline void Module::eval() { 33 | this->isEval = true; 34 | } 35 | 36 | inline void Module::train() { 37 | this->isEval = false; 38 | } 39 | 40 | 41 | #endif //NEURAL_NET_IN_CPP_MODULE_H 42 | -------------------------------------------------------------------------------- /include/NetworkModel.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 10/04/19. 3 | // 4 | 5 | #ifndef NEURAL_NET_IN_CPP_NETWORKMODEL_H 6 | #define NEURAL_NET_IN_CPP_NETWORKMODEL_H 7 | 8 | #include 9 | #include "Tensor.h" 10 | #include "Module.h" 11 | #include "OutputLayer.h" 12 | #include "../include/LRScheduler.h" 13 | 14 | /* 15 | * Train and test a neural network defined by Modules 16 | */ 17 | class NetworkModel { 18 | private: 19 | std::vector modules_; 20 | OutputLayer *output_layer_; 21 | LRScheduler* lr_scheduler_; 22 | int iteration = 0; 23 | public: 24 | NetworkModel(std::vector &modules, OutputLayer *output_layer, LRScheduler* lr_scheduler); 25 | 26 | double trainStep(Tensor &x, std::vector &y); 27 | 28 | Tensor forward(Tensor &x); 29 | 30 | std::vector predict(Tensor &x); 31 | 32 | void load(std::string path); 33 | 34 | void save(std::string path); 35 | 36 | virtual ~NetworkModel(); 37 | 38 | void eval(); 39 | }; 40 | 41 | 42 | #endif //NEURAL_NET_IN_CPP_NETWORKMODEL_H 43 | -------------------------------------------------------------------------------- /include/OutputLayer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 10/04/19. 3 | // 4 | 5 | #ifndef NEURAL_NET_IN_CPP_OUTPUTLAYER_H 6 | #define NEURAL_NET_IN_CPP_OUTPUTLAYER_H 7 | 8 | #include "Tensor.h" 9 | 10 | /* 11 | * Interface specific for model outputs 12 | */ 13 | class OutputLayer { 14 | public: 15 | virtual Tensor predict(Tensor input) = 0; 16 | 17 | virtual std::pair> backprop(std::vector ground_truth) = 0; 18 | 19 | virtual ~OutputLayer() = default;; 20 | }; 21 | 22 | 23 | #endif //NEURAL_NET_IN_CPP_OUTPUTLAYER_H 24 | -------------------------------------------------------------------------------- /include/ReLU.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 11/04/19. 3 | // 4 | 5 | #ifndef NEURAL_NET_IN_CPP_RELU_H 6 | #define NEURAL_NET_IN_CPP_RELU_H 7 | 8 | 9 | #include "Tensor.h" 10 | #include "Module.h" 11 | 12 | class ReLU : public Module{ 13 | private: 14 | Tensor input_; 15 | Tensor product_; 16 | public: 17 | ReLU(); 18 | 19 | Tensor &forward(Tensor &input) override; 20 | 21 | Tensor backprop(Tensor chainGradient, double learning_rate) override; 22 | 23 | void load(FILE *file_model) override; 24 | 25 | void save(FILE *file_model) override; 26 | }; 27 | 28 | 29 | #endif //NEURAL_NET_IN_CPP_RELU_H 30 | -------------------------------------------------------------------------------- /include/Sigmoid.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 10/04/19. 3 | // 4 | 5 | #ifndef NEURAL_NET_IN_CPP_SIGMOID_H 6 | #define NEURAL_NET_IN_CPP_SIGMOID_H 7 | 8 | #include "Module.h" 9 | #include "Tensor.h" 10 | 11 | /* 12 | * Sigmoid activation layer 13 | * Output: 1.0 / (1.0 + exp(-x)) 14 | */ 15 | class Sigmoid : public Module { 16 | private: 17 | Tensor input_; 18 | Tensor product_; 19 | public: 20 | Sigmoid(); 21 | 22 | Tensor &forward(Tensor &input) override; 23 | 24 | Tensor backprop(Tensor chainGradient, double learning_rate) override; 25 | 26 | void load(FILE *file_model) override; 27 | 28 | void save(FILE *file_model) override; 29 | // ~Sigmoid(); 30 | }; 31 | 32 | #endif //NEURAL_NET_IN_CPP_SIGMOID_H 33 | -------------------------------------------------------------------------------- /include/SoftmaxClassifier.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 10/04/19. 3 | // 4 | 5 | #ifndef NEURAL_NET_IN_CPP_SOFTMAXCLASSIFIER_H 6 | #define NEURAL_NET_IN_CPP_SOFTMAXCLASSIFIER_H 7 | 8 | 9 | #include "OutputLayer.h" 10 | 11 | /* 12 | * Applies softmax and uses cross entropy as loss function 13 | */ 14 | class SoftmaxClassifier : public OutputLayer { 15 | private: 16 | Tensor output_; 17 | public: 18 | Tensor predict(Tensor input) override; 19 | 20 | std::pair> backprop(std::vector ground_truth) override; 21 | 22 | Tensor crossEntropyPrime(Tensor &output, std::vector &y); 23 | 24 | double crossEntropy(Tensor &y_hat, std::vector &y); 25 | }; 26 | 27 | 28 | #endif //NEURAL_NET_IN_CPP_SOFTMAXCLASSIFIER_H 29 | -------------------------------------------------------------------------------- /include/Tensor.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 12/04/19. 3 | // 4 | 5 | #ifndef NEURAL_NET_IN_CPP_TENSOR_H 6 | #define NEURAL_NET_IN_CPP_TENSOR_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | /* 15 | * Tensor class - Supports from 1 to 4 dimensions 16 | */ 17 | template 18 | class Tensor { 19 | private: 20 | T *data_; // TODO: create an storage class to share data between tensors with different views? 21 | int size_ = -1; // -1 means the size is undefined 22 | public: 23 | int num_dims = 0; 24 | int dims[4]{}; // Max tensor dimensions is 4 (could be unlimited, but this makes the implementation simpler) 25 | Tensor() = default; 26 | 27 | Tensor(int num_dims, int const *dims); 28 | 29 | void view(int new_num_dims, int *new_dims); 30 | 31 | void zero(); 32 | 33 | T get(int i); // 1d tensor 34 | T get(int i, int j); // 2d tensor 35 | T get(int i, int j, int k); // 3d tensor 36 | T get(int i, int j, int k, int l); // 4d tensor 37 | 38 | void set(int i, T value); 39 | 40 | void set(int i, int j, T value); 41 | 42 | void set(int i, int j, int k, T value); 43 | 44 | void set(int i, int j, int k, int l, T value); 45 | 46 | void add(int i, T value); 47 | 48 | void add(int i, int j, int k, int l, T value); 49 | 50 | /* 51 | * Matrix multiplication 52 | */ 53 | Tensor matmul(Tensor other); 54 | 55 | /* 56 | * 2D Convolution 57 | */ 58 | Tensor convolve2d(Tensor kernels, int stride, int padding, Tensor bias); 59 | 60 | /* 61 | * Returns the transposal 62 | */ 63 | Tensor matrixTranspose(); 64 | 65 | Tensor relu(); 66 | 67 | Tensor sigmoid(); 68 | 69 | void dropout(std::default_random_engine generator, std::uniform_real_distribution<> distribution, double p); 70 | 71 | /* 72 | * Returns the derivative of the sigmoid function 73 | */ 74 | Tensor sigmoidPrime(); 75 | 76 | Tensor softmax(); 77 | 78 | /* 79 | * Sum every element 80 | */ 81 | T sum(); 82 | 83 | Tensor reluPrime(); 84 | // 85 | // Tensor crossEntropyPrime(Tensor &output, std::vector const &y); 86 | // 87 | // std::vector sumColumns(); 88 | 89 | /* 90 | * Sum of two 2d tensors 91 | */ 92 | Tensor operator+(Tensor &other); 93 | 94 | /* 95 | * Element-wise multiplication of two 2d tensors 96 | */ 97 | Tensor operator*(Tensor other); 98 | 99 | /* 100 | * Multiplies every element of the tensor by a value 101 | */ 102 | Tensor operator*(T multiplier); 103 | 104 | /* 105 | * Divides every element of the tensor by a value 106 | */ 107 | Tensor operator/(T divisor); 108 | 109 | /* 110 | * Subtracts two 2d tensors 111 | */ 112 | Tensor operator-=(Tensor difference); 113 | 114 | /* 115 | * Calculates the mean across each row 116 | */ 117 | Tensor columnWiseSum(); 118 | 119 | Tensor channelWiseSum(); 120 | 121 | /* 122 | * Initializes a tensor's values from a distribution 123 | */ 124 | void randn(std::default_random_engine generator, std::normal_distribution distribution, double multiplier); 125 | 126 | /* 127 | * Prints the tensor's data 128 | */ 129 | void print(); 130 | 131 | Tensor &operator=(const Tensor &other); 132 | 133 | Tensor(const Tensor &other); 134 | 135 | virtual ~Tensor(); 136 | 137 | // Tensor(const Tensor &other); 138 | 139 | // ~Tensor(); 140 | 141 | }; 142 | 143 | 144 | #endif //NEURAL_NET_IN_CPP_TENSOR_H 145 | -------------------------------------------------------------------------------- /src/Conv2d.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 14/04/19. 3 | // 4 | 5 | #include "../include/Conv2d.h" 6 | 7 | Conv2d::Conv2d(int in_channels, int out_channels, int kernel_size, int stride, int padding, int seed) { 8 | std::default_random_engine generator(seed); 9 | std::normal_distribution distribution(0.0, 1.0); 10 | 11 | int kernel_dims[] = {out_channels, in_channels, kernel_size, kernel_size}; 12 | kernels = Tensor(4, kernel_dims); 13 | kernels.randn(generator, distribution, sqrt(2.0 / (kernel_size * kernel_size * out_channels))); 14 | 15 | int bias_dims[] = {out_channels}; 16 | bias = Tensor(1, bias_dims); 17 | bias.randn(generator, distribution, 0); 18 | 19 | this->stride = stride; 20 | this->padding = padding; 21 | } 22 | 23 | Tensor &Conv2d::forward(Tensor &input) { 24 | input_ = input; 25 | product_ = input.convolve2d(kernels, stride, padding, bias); 26 | 27 | return product_; 28 | } 29 | 30 | Tensor Conv2d::backprop(Tensor chain_gradient, double learning_rate) { 31 | Tensor kernels_gradient(kernels.num_dims, kernels.dims); 32 | Tensor input_gradient(input_.num_dims, input_.dims); 33 | Tensor bias_gradient(1, bias.dims); 34 | kernels_gradient.zero(); 35 | input_gradient.zero(); 36 | bias_gradient.zero(); 37 | 38 | // backprop convolution -- not using Tensor.convolve2d for efficiency 39 | for (int i = 0; i < input_.dims[0]; ++i) { // for each batch img 40 | for (int f = 0; f < kernels.dims[0]; f++) { // for each filter 41 | int x = -padding; 42 | for (int cx = 0; cx < chain_gradient.dims[2]; x += stride, cx++) { // for each x in the chain gradient 43 | int y = -padding; 44 | for (int cy = 0; cy < chain_gradient.dims[3]; y += stride, cy++) { // for each y in the chain gradient 45 | double chain_grad = chain_gradient.get(i, f, cx, cy); 46 | for (int fx = 0; fx < kernels.dims[2]; fx++) { // for each x in the filter 47 | int ix = x + fx; // input x 48 | if (ix >= 0 && ix < input_.dims[2]) { 49 | for (int fy = 0; fy < kernels.dims[3]; fy++) { // for each y in the filter 50 | int iy = y + fy; // input y 51 | if (iy >= 0 && iy < input_.dims[3]) { 52 | for (int fc = 0; fc < kernels.dims[1]; fc++) { // for each channel in the filter 53 | kernels_gradient.add(f, fc, fx, fy, input_.get(i, fc, ix, iy) * chain_grad); 54 | input_gradient.add(i, fc, ix, iy, kernels.get(f, fc, fx, fy) * chain_grad); 55 | 56 | } 57 | } 58 | } 59 | } 60 | } 61 | bias_gradient.add(f, chain_grad); 62 | } 63 | } 64 | } 65 | } 66 | kernels -= kernels_gradient * learning_rate; 67 | bias -= bias_gradient * learning_rate; 68 | 69 | return input_gradient; 70 | } 71 | 72 | void Conv2d::load(FILE *file_model) { 73 | double value; 74 | for (int i = 0; i < kernels.dims[0]; ++i) { 75 | for (int j = 0; j < kernels.dims[1]; ++j) { 76 | for (int k = 0; k < kernels.dims[2]; ++k) { 77 | for (int l = 0; l < kernels.dims[3]; ++l) { 78 | int read = fscanf(file_model, "%lf", &value); // NOLINT(cert-err34-c) 79 | if (read != 1) throw std::runtime_error("Invalid model file"); 80 | kernels.set(i, j, k, l, value); 81 | } 82 | } 83 | } 84 | } 85 | for (int m = 0; m < bias.dims[0]; ++m) { 86 | int read = fscanf(file_model, "%lf", &value); // NOLINT(cert-err34-c) 87 | if (read != 1) throw std::runtime_error("Invalid model file"); 88 | bias.set(m, value); 89 | } 90 | } 91 | 92 | void Conv2d::save(FILE *file_model) { 93 | for (int i = 0; i < kernels.dims[0]; ++i) { 94 | for (int j = 0; j < kernels.dims[1]; ++j) { 95 | for (int k = 0; k < kernels.dims[2]; ++k) { 96 | for (int l = 0; l < kernels.dims[3]; ++l) { 97 | fprintf(file_model, "%.18lf ", kernels.get(i, j, k, l)); 98 | } 99 | } 100 | } 101 | } 102 | for (int m = 0; m < bias.dims[0]; ++m) { 103 | fprintf(file_model, "%.18lf ", bias.get(m)); 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/Dropout.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 11/04/19. 3 | // 4 | 5 | #include "../include/Dropout.h" 6 | #include "../include/Tensor.h" 7 | 8 | Dropout::Dropout(double p, int seed) { 9 | p_ = p; 10 | seed_ = seed; 11 | } 12 | 13 | Tensor &Dropout::forward(Tensor &input) { 14 | // if (isEval) { 15 | // return input; 16 | // } 17 | 18 | dropout_ = Tensor(input.num_dims, input.dims); 19 | std::default_random_engine generator(seed_); 20 | std::uniform_real_distribution<> distribution(0., 1.); 21 | 22 | dropout_.dropout(generator, distribution, p_); 23 | product_ = input * dropout_; 24 | return product_; 25 | } 26 | 27 | Tensor Dropout::backprop(Tensor chainGradient, double learning_rate) { 28 | return chainGradient * dropout_; 29 | } 30 | 31 | void Dropout::load(FILE *file_model) { 32 | 33 | } 34 | 35 | void Dropout::save(FILE *file_model) { 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/FullyConnected.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 10/04/19. 3 | // 4 | 5 | #include 6 | #include "../include/FullyConnected.h" 7 | #include "../include/Tensor.h" 8 | 9 | FullyConnected::FullyConnected(int input_size, int output_size, int seed) { 10 | std::default_random_engine generator(seed); 11 | std::normal_distribution distribution(0.0, 1.0); 12 | int weights_dims[] = {input_size, output_size}; 13 | weights = Tensor(2, weights_dims); 14 | weights.randn(generator, distribution, sqrt(2.0 / input_size)); 15 | int bias_dims[] = {output_size}; 16 | bias = Tensor(1, bias_dims); 17 | bias.randn(generator, distribution, 0); 18 | 19 | } 20 | 21 | 22 | Tensor &FullyConnected::forward(Tensor &input) { 23 | input_num_dims = input.num_dims; 24 | std::copy(input.dims, input.dims + input.num_dims, input_dims); 25 | if (input.num_dims != 2) { 26 | // flatten tensor 27 | int flatten_size = 1; 28 | for (int i = 1; i < input.num_dims; ++i) { 29 | flatten_size *= input.dims[i]; 30 | } 31 | int dims[] = {input.dims[0], flatten_size}; 32 | input.view(2, dims); 33 | } 34 | input_ = input; 35 | product_ = input.matmul(weights) + bias; 36 | 37 | return product_; 38 | } 39 | 40 | Tensor FullyConnected::backprop(Tensor chainGradient, double learning_rate) { 41 | Tensor weightGradient = input_.matrixTranspose().matmul(chainGradient); 42 | Tensor biasGradient = chainGradient.columnWiseSum(); 43 | chainGradient = chainGradient.matmul(weights.matrixTranspose()); 44 | chainGradient.view(input_num_dims, input_dims); 45 | weights -= weightGradient * learning_rate; 46 | bias -= biasGradient * learning_rate; 47 | return chainGradient; 48 | } 49 | 50 | void FullyConnected::load(FILE *file_model) { 51 | double value; 52 | for (int i = 0; i < weights.dims[0]; ++i) { 53 | for (int j = 0; j < weights.dims[1]; ++j) { 54 | int read = fscanf(file_model, "%lf", &value); // NOLINT(cert-err34-c) 55 | if (read != 1) throw std::runtime_error("Invalid model file"); 56 | weights.set(i, j, value); 57 | } 58 | } 59 | 60 | for (int i = 0; i < bias.dims[0]; ++i) { 61 | int read = fscanf(file_model, "%lf", &value); // NOLINT(cert-err34-c) 62 | if (read != 1) throw std::runtime_error("Invalid model file"); 63 | bias.set(i, value); 64 | } 65 | } 66 | 67 | void FullyConnected::save(FILE *file_model) { 68 | for (int i = 0; i < weights.dims[0]; ++i) { 69 | for (int j = 0; j < weights.dims[1]; ++j) { 70 | fprintf(file_model, "%.18lf ", weights.get(i, j)); 71 | } 72 | } 73 | 74 | for (int i = 0; i < bias.dims[0]; ++i) { 75 | fprintf(file_model, "%.18lf ", bias.get(i)); 76 | } 77 | } 78 | 79 | // FullyConnected::~FullyConnected() { 80 | // } -------------------------------------------------------------------------------- /src/LinearLRScheduler.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 18/04/19. 3 | // 4 | 5 | #include "../include/LinearLRScheduler.h" 6 | 7 | 8 | LinearLRScheduler::LinearLRScheduler(double initial_lr, double step) { 9 | learning_rate = initial_lr; 10 | this->step = step; 11 | } 12 | 13 | void LinearLRScheduler::onIterationEnd(int iteration) { 14 | learning_rate += step; 15 | } 16 | -------------------------------------------------------------------------------- /src/MNISTDataLoader.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 05/04/19. 3 | // 4 | 5 | #include "../include/MNISTDataLoader.h" 6 | #include "../include/Tensor.h" 7 | 8 | 9 | MNISTDataLoader::MNISTDataLoader(std::string const &imagesPath, std::string const &labelsPath, 10 | unsigned int batch_size) { 11 | this->batch_size_ = batch_size; 12 | loadImages(imagesPath); 13 | loadLabels(labelsPath); 14 | } 15 | 16 | unsigned int MNISTDataLoader::bytesToUInt(const char *bytes) { 17 | return ((unsigned char) bytes[0] << 24) | ((unsigned char) bytes[1] << 16) | 18 | ((unsigned char) bytes[2] << 8) | ((unsigned char) bytes[3] << 0); 19 | } 20 | 21 | void MNISTDataLoader::loadImages(std::string const &path) { 22 | // Info about the dataset's file format can be found at http://yann.lecun.com/exdb/mnist/ 23 | std::ifstream file(path, std::ios::binary | std::ios::in); 24 | if (!file) { 25 | std::cerr << "Error: " << strerror(errno); 26 | exit(1); 27 | } 28 | file.clear(); 29 | char bytes[4]; 30 | file.read(bytes, 4); // magic number 31 | file.read(bytes, 4); 32 | num_images_ = bytesToUInt(bytes); 33 | file.read(bytes, 4); 34 | rows_ = bytesToUInt(bytes); 35 | file.read(bytes, 4); 36 | cols_ = bytesToUInt(bytes); 37 | 38 | // num_images_ = 64; 39 | 40 | images_.resize(num_images_); 41 | char byte; 42 | for (int i = 0; i < num_images_; ++i) { 43 | images_[i].resize(rows_); 44 | for (int j = 0; j < rows_; ++j) { 45 | images_[i][j].resize(cols_); 46 | for (int k = 0; k < cols_; ++k) { 47 | file.read(&byte, 1); 48 | images_[i][j][k] = (unsigned char) (byte & 0xff); 49 | } 50 | } 51 | } 52 | } 53 | 54 | int MNISTDataLoader::getNumBatches() { 55 | if (num_images_ % batch_size_ == 0) { 56 | return num_images_ / batch_size_; 57 | } else { 58 | return (num_images_ / batch_size_) + 1; 59 | } 60 | } 61 | 62 | void MNISTDataLoader::loadLabels(std::string const &path) { 63 | std::ifstream file(path, std::ios::binary | std::ios::in); 64 | if (!file) { 65 | std::cerr << "Error: " << strerror(errno); 66 | } 67 | file.clear(); 68 | char bytes[4]; 69 | file.read(bytes, 4); // magic number 70 | file.read(bytes, 4); 71 | num_images_ = bytesToUInt(bytes); 72 | 73 | labels_.resize(num_images_); 74 | char byte; 75 | for (int i = 0; i < num_images_; ++i) { 76 | file.read(&byte, 1); 77 | labels_[i] = (byte & 0xff); 78 | } 79 | } 80 | 81 | // void MNISTDataLoader::printImage(int idx) { 82 | // for (int i = 0; i < rows_; ++i) { 83 | // for (int j = 0; j < cols_; ++j) { 84 | // if (images_[idx][i][j] > 127) { 85 | // printf("%c", 219); 86 | // } else { 87 | // printf(" "); 88 | // } 89 | // } 90 | // printf("\n"); 91 | // } 92 | // printf("Label: %d\n", labels_[idx]); 93 | // } 94 | 95 | std::pair, std::vector> MNISTDataLoader::nextBatch() { 96 | std::pair, std::vector > batchXY; 97 | int imgsMissing = num_images_ - batch_idx_; 98 | int size = imgsMissing > batch_size_ ? batch_size_ : imgsMissing; 99 | int dims[] = {size, 1, (int) rows_, (int) cols_}; 100 | Tensor tensorImgs(4, dims); 101 | std::vector vecLabels; 102 | for (int i = 0; i < size; ++i) { 103 | for (int j = 0; j < rows_; ++j) { 104 | for (int k = 0; k < cols_; ++k) { 105 | tensorImgs.set(i, 0, j, k, ((double) (images_[batch_idx_ + i][j][k])) / 255.0); 106 | } 107 | } 108 | vecLabels.push_back(labels_[batch_idx_ + i]); 109 | } 110 | batch_idx_ += size; 111 | if (batch_idx_ == num_images_) { 112 | batch_idx_ = 0; 113 | } 114 | batchXY.first = tensorImgs; 115 | batchXY.second = vecLabels; 116 | return batchXY; 117 | } -------------------------------------------------------------------------------- /src/MaxPool.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by tabelini on 18/04/19. 3 | // 4 | 5 | #include "../include/MaxPool.h" 6 | 7 | MaxPool::MaxPool(int size, int stride) { 8 | size_ = size; 9 | stride_ = stride; 10 | } 11 | 12 | Tensor &MaxPool::forward(Tensor &input) { 13 | int w = ((input.dims[3] - (size_ - 1) - 1) / stride_) + 1; 14 | int h = ((input.dims[2] - (size_ - 1) - 1) / stride_) + 1; 15 | int dims[] = {input.dims[0], input.dims[1], h, w}; 16 | output_ = Tensor(4, dims); 17 | indexes = Tensor(4, dims); 18 | for (int i = 0; i < input.dims[0]; ++i) { // for each batch image 19 | for (int j = 0; j < input.dims[1]; ++j) { // for each image channel 20 | for (int k = 0; k < dims[2]; ++k) { // for each output y 21 | for (int l = 0; l < dims[3]; ++l) { // for each output x 22 | double max = -999999999; // -infinity 23 | int index = 0; 24 | for (int m = 0; m < size_; ++m) { 25 | for (int n = 0; n < size_; ++n) { 26 | int input_y = k * stride_ + m; 27 | int input_x = l * stride_ + n; 28 | double value = input.get(i, j, input_y, input_x); 29 | if (value > max) { 30 | index = m * size_ + n; 31 | max = value; 32 | } 33 | } 34 | } 35 | output_.set(i, j, k, l, max); 36 | indexes.set(i, j, k, l, index); 37 | } 38 | } 39 | } 40 | } 41 | input_ = input; 42 | 43 | return output_; 44 | } 45 | 46 | Tensor MaxPool::backprop(Tensor chainGradient, double learning_rate) { 47 | Tensor input_gradient(input_.num_dims, input_.dims); 48 | input_gradient.zero(); 49 | 50 | for (int i = 0; i < input_.dims[0]; ++i) { // for each batch image 51 | for (int j = 0; j < input_.dims[1]; ++j) { // for each image channel 52 | for (int k = 0; k < output_.dims[2]; ++k) { // for each output y 53 | for (int l = 0; l < output_.dims[3]; ++l) { // for each output x 54 | double chain_grad = chainGradient.get(i, j, k, l); 55 | int index = indexes.get(i, j, k, l); 56 | int m = index / size_; 57 | int n = index % size_; 58 | int input_y = k * stride_ + m; 59 | int input_x = l * stride_ + n; 60 | input_gradient.set(i, j, input_y, input_x, chain_grad); 61 | } 62 | } 63 | } 64 | } 65 | 66 | return input_gradient; 67 | } 68 | 69 | void MaxPool::load(FILE *file_model) { 70 | 71 | } 72 | 73 | void MaxPool::save(FILE *file_model) { 74 | 75 | } 76 | -------------------------------------------------------------------------------- /src/NetworkModel.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 10/04/19. 3 | // 4 | 5 | #include "../include/NetworkModel.h" 6 | #include "../include/LRScheduler.h" 7 | #include "../include/Tensor.h" 8 | 9 | using namespace std; 10 | 11 | NetworkModel::NetworkModel(std::vector &modules, OutputLayer *output_layer, LRScheduler* lr_scheduler) { 12 | modules_ = modules; 13 | lr_scheduler_ = lr_scheduler; 14 | output_layer_ = output_layer; 15 | } 16 | 17 | double NetworkModel::trainStep(Tensor &x, vector& y) { 18 | // Forward 19 | Tensor output = forward(x); 20 | 21 | //Backprop 22 | pair> loss_and_cost_gradient = output_layer_->backprop(y); 23 | Tensor chain_gradient = loss_and_cost_gradient.second; 24 | for (int i = (int) modules_.size() - 1; i >= 0; --i) { 25 | chain_gradient = modules_[i]->backprop(chain_gradient, lr_scheduler_->learning_rate); 26 | } 27 | ++iteration; 28 | lr_scheduler_->onIterationEnd(iteration); 29 | // Return loss 30 | return loss_and_cost_gradient.first; 31 | } 32 | 33 | Tensor NetworkModel::forward(Tensor &x) { 34 | for (auto &module : modules_) { 35 | x = module->forward(x); 36 | } 37 | return output_layer_->predict(x); 38 | } 39 | 40 | std::vector NetworkModel::predict(Tensor &x) { 41 | Tensor output = forward(x); 42 | std::vector predictions; 43 | for (int i = 0; i < output.dims[0]; ++i) { 44 | int argmax = -1; 45 | double max = -1; 46 | for (int j = 0; j < output.dims[1]; ++j) { 47 | if (output.get(i, j) > max) { 48 | max = output.get(i, j); 49 | argmax = j; 50 | } 51 | } 52 | predictions.push_back(argmax); 53 | } 54 | 55 | return predictions; 56 | } 57 | 58 | void NetworkModel::load(std::string path) { 59 | FILE *model_file = fopen(path.c_str(), "r"); 60 | if (!model_file) { 61 | throw std::runtime_error("Error reading model file."); 62 | } 63 | for (auto &module : modules_) { 64 | module->load(model_file); 65 | } 66 | } 67 | 68 | void NetworkModel::save(std::string path) { 69 | FILE *model_file = fopen(path.c_str(), "w"); 70 | if (!model_file) { 71 | throw std::runtime_error("Error reading model file."); 72 | } 73 | for (auto &module : modules_) { 74 | module->save(model_file); 75 | } 76 | } 77 | 78 | NetworkModel::~NetworkModel() { 79 | for (auto &module : modules_) { 80 | delete module; 81 | } 82 | delete output_layer_; 83 | delete lr_scheduler_; 84 | } 85 | 86 | void NetworkModel::eval() { 87 | for (auto &module : modules_) { 88 | module->eval(); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/ReLU.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 11/04/19. 3 | // 4 | 5 | #include "../include/ReLU.h" 6 | 7 | ReLU::ReLU() = default; 8 | 9 | Tensor &ReLU::forward(Tensor &input) { 10 | input_ = input; 11 | product_ = input.relu(); 12 | 13 | return product_; 14 | } 15 | 16 | Tensor ReLU::backprop(Tensor chainGradient, double learning_rate) { 17 | return chainGradient * input_.reluPrime(); 18 | } 19 | 20 | void ReLU::load(FILE *file_model) { 21 | 22 | } 23 | 24 | void ReLU::save(FILE *file_model) { 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/Sigmoid.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 10/04/19. 3 | // 4 | 5 | #include "../include/Sigmoid.h" 6 | 7 | Sigmoid::Sigmoid() = default; 8 | 9 | 10 | Tensor &Sigmoid::forward(Tensor &input) { 11 | input_ = input; 12 | product_ = input.sigmoid(); 13 | 14 | return product_; 15 | } 16 | 17 | Tensor Sigmoid::backprop(Tensor chainGradient, double learning_rate) { 18 | return chainGradient * input_.sigmoidPrime(); 19 | } 20 | 21 | void Sigmoid::load(FILE *file_model) { 22 | 23 | } 24 | 25 | void Sigmoid::save(FILE *file_model) { 26 | 27 | } 28 | 29 | // Sigmoid::~Sigmoid() { 30 | // } -------------------------------------------------------------------------------- /src/SoftmaxClassifier.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 10/04/19. 3 | // 4 | 5 | #include "../include/SoftmaxClassifier.h" 6 | #include "../include/Tensor.h" 7 | 8 | Tensor SoftmaxClassifier::predict(Tensor input) { 9 | output_ = input.softmax(); 10 | return output_; 11 | } 12 | 13 | std::pair> SoftmaxClassifier::backprop(std::vector ground_truth) { 14 | double loss = crossEntropy(output_, ground_truth); 15 | Tensor gradient = crossEntropyPrime(output_, ground_truth); 16 | 17 | return std::make_pair(loss, gradient); 18 | } 19 | 20 | 21 | Tensor SoftmaxClassifier::crossEntropyPrime(Tensor &output, std::vector &y) { 22 | Tensor prime = output; 23 | for (int i = 0; i < y.size(); ++i) { 24 | prime.set(i, y[i], prime.get(i, y[i]) - 1); 25 | } 26 | 27 | return prime / output.dims[0]; 28 | } 29 | 30 | 31 | double SoftmaxClassifier::crossEntropy(Tensor &y_hat, std::vector &y) { 32 | double total = 0; 33 | for (int i = 0; i < y.size(); ++i) { 34 | double x = y_hat.get(i, y[i]); 35 | // Sets a minimum value to prevent division by zero (log(0)) 36 | total += -log(x < 0.0000000001 ? 0.0000000001 : x); 37 | } 38 | 39 | return total / y.size(); // batch-wise mean 40 | } -------------------------------------------------------------------------------- /src/Tensor.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lucas on 12/04/19. 3 | // 4 | 5 | #include "../include/Tensor.h" 6 | #include // memset 7 | 8 | 9 | template 10 | class Tensor; 11 | 12 | template 13 | class Tensor; 14 | 15 | template 16 | class Tensor; 17 | 18 | 19 | template 20 | void Tensor::zero() { 21 | memset(data_, 0, sizeof(T) * size_); 22 | } 23 | 24 | template 25 | T Tensor::get(int i, int j) { 26 | assert(num_dims == 2); 27 | return data_[j + i * dims[1]]; 28 | } 29 | 30 | template 31 | T Tensor::get(int i) { 32 | assert(num_dims == 1); 33 | return data_[i]; 34 | } 35 | 36 | template 37 | void Tensor::set(int i, int j, T value) { 38 | assert(num_dims == 2); 39 | data_[j + i * dims[1]] = value; 40 | } 41 | 42 | template 43 | void Tensor::set(int i, T value) { 44 | data_[i] = value; 45 | } 46 | 47 | 48 | template 49 | void Tensor::add(int i, T value) { 50 | data_[i] += value; 51 | } 52 | 53 | template 54 | void Tensor::view(int new_num_dims, int *new_dims) { 55 | assert(new_num_dims > 0 && new_num_dims <= 4); 56 | this->num_dims = new_num_dims; 57 | std::copy(new_dims, new_dims + 4, this->dims); 58 | } 59 | 60 | template 61 | Tensor::Tensor(int num_dims, int const *dims) { 62 | assert(num_dims > 0 && num_dims <= 4); 63 | int size = 1; 64 | for (int i = 0; i < num_dims; ++i) { 65 | size *= dims[i]; 66 | this->dims[i] = dims[i]; 67 | } 68 | size_ = size; 69 | // std::shared_ptr data_sp(new T[size_]); 70 | T *data_sp = new T[size_]; 71 | data_ = data_sp; 72 | this->num_dims = num_dims; 73 | } 74 | 75 | template 76 | T Tensor::get(int i, int j, int k) { 77 | assert(num_dims == 3); 78 | return data_[k + j * dims[2] + i * dims[1] * dims[2]]; 79 | } 80 | 81 | template 82 | T Tensor::get(int i, int j, int k, int l) { 83 | assert(num_dims == 4); 84 | return data_[l + k * dims[3] + j * dims[2] * dims[3] + i * dims[1] * dims[2] * dims[3]]; 85 | } 86 | 87 | template 88 | void Tensor::set(int i, int j, int k, T value) { 89 | assert(num_dims == 3); 90 | data_[k + j * dims[2] + i * dims[1] * dims[2]] = value; 91 | } 92 | 93 | template 94 | void Tensor::set(int i, int j, int k, int l, T value) { 95 | assert(num_dims == 4); 96 | data_[l + k * dims[3] + j * dims[2] * dims[3] + i * dims[1] * dims[2] * dims[3]] = value; 97 | } 98 | 99 | template 100 | void Tensor::add(int i, int j, int k, int l, T value) { 101 | assert(num_dims == 4); 102 | data_[l + k * dims[3] + j * dims[2] * dims[3] + i * dims[1] * dims[2] * dims[3]] += value; 103 | } 104 | 105 | template 106 | Tensor::Tensor(const Tensor &other) : size_(other.size_), num_dims(other.num_dims), 107 | data_(new T[other.size_]) { 108 | std::copy(other.data_, other.data_ + size_, data_); 109 | std::copy(other.dims, other.dims + 4, dims); 110 | } 111 | 112 | template 113 | Tensor::~Tensor() { 114 | delete[] data_; 115 | } 116 | 117 | 118 | template 119 | Tensor Tensor::matmul(Tensor other) { 120 | assert(num_dims == 2 && other.num_dims == 2); 121 | assert(dims[1] == other.dims[0]); 122 | 123 | int new_dims[] = {dims[0], other.dims[1]}; 124 | Tensor product(2, new_dims); 125 | for (int i = 0; i < this->dims[0]; ++i) { 126 | for (int j = 0; j < other.dims[1]; ++j) { 127 | T value = 0; 128 | for (int k = 0; k < other.dims[0]; ++k) { 129 | value += this->get(i, k) * other.get(k, j); 130 | } 131 | product.set(i, j, value); 132 | } 133 | } 134 | return product; 135 | } 136 | 137 | template 138 | Tensor Tensor::matrixTranspose() { 139 | assert(num_dims == 2); 140 | int new_dims[] = {dims[1], dims[0]}; 141 | Tensor transpose(num_dims, new_dims); 142 | for (int i = 0; i < dims[0]; ++i) { 143 | for (int j = 0; j < dims[1]; ++j) { 144 | transpose.set(j, i, get(i, j)); 145 | } 146 | } 147 | 148 | return transpose; 149 | } 150 | 151 | 152 | template 153 | Tensor Tensor::relu() { 154 | Tensor result(num_dims, dims); 155 | for (int i = 0; i < size_; ++i) { 156 | T x = data_[i]; 157 | result.data_[i] = x > 0 ? x : 0; 158 | } 159 | 160 | return result; 161 | } 162 | 163 | template 164 | T sigmoid(T x) { 165 | return 1.0 / (1.0 + exp(-x)); 166 | } 167 | 168 | template 169 | Tensor Tensor::sigmoid() { 170 | Tensor result(num_dims, dims); 171 | for (int i = 0; i < size_; ++i) { 172 | T x = data_[i]; 173 | result.data_[i] = ::sigmoid(x); 174 | } 175 | 176 | return result; 177 | } 178 | 179 | template 180 | T sigmoidPrime(T x) { 181 | return sigmoid(x) * (1.0 - sigmoid(x)); 182 | } 183 | 184 | template 185 | Tensor Tensor::sigmoidPrime() { 186 | Tensor result(num_dims, dims); 187 | for (int i = 0; i < size_; ++i) { 188 | T x = data_[i]; 189 | result.data_[i] = ::sigmoidPrime(x); 190 | } 191 | 192 | return result; 193 | } 194 | 195 | template 196 | T Tensor::sum() { 197 | T total = 0; 198 | for (int i = 0; i < size_; ++i) { 199 | total += data_[i]; 200 | } 201 | return 0; 202 | } 203 | 204 | template 205 | Tensor Tensor::softmax() { 206 | assert(num_dims == 2); 207 | //Softmax with max trick to avoid overflows 208 | int rows = dims[0], cols = dims[1]; 209 | Tensor probabilities(2, dims); 210 | for (int i = 0; i < rows; ++i) { 211 | T row_max = -1; // useless value so my IDE stops screaming at me, will always be replaced 212 | for (int j = 0; j < cols; ++j) { 213 | if (j == 0 || get(i, j) > row_max) { 214 | row_max = get(i, j); 215 | } 216 | } 217 | 218 | T denominator = 0; 219 | for (int j = 0; j < cols; ++j) { 220 | T x = get(i, j); 221 | denominator += exp(get(i, j) - row_max); 222 | } 223 | 224 | 225 | for (int j = 0; j < cols; ++j) { 226 | probabilities.set(i, j, exp(get(i, j) - row_max) / denominator); 227 | } 228 | 229 | } 230 | return probabilities; 231 | } 232 | 233 | template 234 | Tensor Tensor::reluPrime() { 235 | Tensor prime(num_dims, dims); 236 | for (int i = 0; i < size_; ++i) { 237 | prime.data_[i] = data_[i] > 0 ? 1 : 0; 238 | } 239 | return prime; 240 | } 241 | 242 | template 243 | Tensor Tensor::operator+(Tensor &other) { 244 | if (other.num_dims == 1 && other.size_ == this->dims[1] && num_dims == 2) { 245 | // if other is a 1d tensor and this is a 2d tensor 246 | Tensor sum(num_dims, dims); 247 | for (int k = 0; k < this->dims[0]; ++k) { 248 | for (int j = 0; j < this->dims[1]; ++j) { 249 | sum.set(k, j, get(k, j) + other.get(j)); 250 | } 251 | } 252 | 253 | 254 | return sum; 255 | } else if (other.num_dims == num_dims && other.size_ == size_) { 256 | Tensor sum(num_dims, dims); 257 | for (int i = 0; i < size_; ++i) { 258 | sum.data_[i] = data_[i] + other.data_[i]; 259 | } 260 | return sum; 261 | } 262 | throw std::logic_error("Undefined sum"); 263 | } 264 | 265 | 266 | template 267 | Tensor Tensor::operator*(Tensor other) { 268 | assert(size_ == other.size_); 269 | Tensor product(num_dims, dims); 270 | for (int i = 0; i < size_; ++i) { 271 | product.data_[i] = data_[i] * other.data_[i]; 272 | } 273 | return product; 274 | } 275 | 276 | template 277 | Tensor Tensor::operator*(T multiplier) { 278 | Tensor product(num_dims, dims); 279 | for (int i = 0; i < size_; ++i) { 280 | product.data_[i] = data_[i] * multiplier; 281 | } 282 | return product; 283 | } 284 | 285 | template 286 | Tensor Tensor::operator/(T divisor) { 287 | Tensor quotient(num_dims, dims); 288 | for (int i = 0; i < size_; ++i) { 289 | quotient.data_[i] = data_[i] / divisor; 290 | } 291 | return quotient; 292 | } 293 | 294 | template 295 | Tensor Tensor::operator-=(Tensor difference) { 296 | assert(size_ == difference.size_); 297 | for (int i = 0; i < size_; ++i) { 298 | data_[i] = data_[i] - difference.data_[i]; 299 | } 300 | return *this; 301 | } 302 | 303 | template 304 | Tensor Tensor::columnWiseSum() { 305 | assert(num_dims == 2); 306 | int rows = dims[0], cols = dims[1]; 307 | int sum_dims[] = {cols}; 308 | Tensor sum(1, sum_dims); 309 | for (int i = 0; i < cols; ++i) { 310 | T total = 0; 311 | for (int j = 0; j < rows; ++j) { 312 | total += get(j, i); 313 | } 314 | sum.set(i, total); 315 | } 316 | return sum; 317 | } 318 | 319 | template<> 320 | void 321 | Tensor::randn(std::default_random_engine generator, std::normal_distribution distribution, 322 | double multiplier) { 323 | for (int i = 0; i < size_; ++i) { 324 | data_[i] = distribution(generator) * multiplier; 325 | } 326 | } 327 | 328 | template<> 329 | void Tensor::print() { 330 | if (num_dims == 2) { 331 | int rows = dims[0], cols = dims[1]; 332 | std::cout << "Tensor2D (" << rows << ", " << cols << ")\n["; 333 | for (int i = 0; i < rows; ++i) { 334 | if (i != 0) std::cout << " "; 335 | std::cout << "["; 336 | for (int j = 0; j < cols; ++j) { 337 | if (j == (cols - 1)) { 338 | printf("%.18lf", get(i, j)); 339 | } else { 340 | printf("%.18lf ", get(i, j)); 341 | } 342 | 343 | } 344 | if (i == (rows - 1)) { 345 | std::cout << "]]\n"; 346 | } else { 347 | std::cout << "]\n"; 348 | } 349 | } 350 | } else { 351 | printf("Tensor%dd (", num_dims); 352 | for (int i = 0; i < num_dims; ++i) { 353 | printf("%d", dims[i]); 354 | if (i != (num_dims - 1)) { 355 | printf(","); 356 | } 357 | } 358 | printf(")\n["); 359 | for (int j = 0; j < size_; ++j) { 360 | printf("%lf ", data_[j]); 361 | } 362 | printf("]\n"); 363 | } 364 | } 365 | 366 | template 367 | Tensor &Tensor::operator=(const Tensor &other) { 368 | if (this != &other) { 369 | T *new_data = new T[other.size_]; 370 | std::copy(other.data_, other.data_ + other.size_, new_data); 371 | if (size_ != -1) { 372 | delete[] data_; 373 | } 374 | size_ = other.size_; 375 | std::copy(other.dims, other.dims + 4, dims); 376 | num_dims = other.num_dims; 377 | data_ = new_data; 378 | } 379 | 380 | return *this; 381 | } 382 | 383 | template 384 | void Tensor::dropout(std::default_random_engine generator, std::uniform_real_distribution<> distribution, double p) { 385 | for (int i = 0; i < size_; ++i) { 386 | data_[i] = (distribution(generator) < p) / p; 387 | } 388 | } 389 | 390 | template 391 | Tensor Tensor::convolve2d(Tensor kernels, int stride, int padding, Tensor bias) { 392 | assert(kernels.dims[1] == dims[1]); 393 | int w = ((dims[3] + 2 * padding - (kernels.dims[3] - 1) - 1) / stride) + 1; 394 | int h = ((dims[2] + 2 * padding - (kernels.dims[2] - 1) - 1) / stride) + 1; 395 | int result_dims[] = {dims[0], kernels.dims[0], h, w}; 396 | Tensor output(4, result_dims); 397 | for (int i = 0; i < dims[0]; ++i) { // pra cada img do batch 398 | for (int j = 0; j < kernels.dims[0]; ++j) { // pra cada output volume 399 | for (int k = 0; k < h; ++k) { // pra cada k vertical no output volume 400 | for (int l = 0; l < w; ++l) { // pra cada l horizontal no output volume 401 | int im_si = stride * k - padding; 402 | int im_sj = stride * l - padding; 403 | T total = 0; 404 | for (int m = 0; m < kernels.dims[1]; ++m) { // pra cada canal do filtro 405 | for (int n = 0; n < kernels.dims[2]; ++n) { 406 | for (int o = 0; o < kernels.dims[3]; ++o) { 407 | int x = im_si + n, y = im_sj + o; 408 | if (x < 0 || x >= dims[2] || y < 0 || y >= dims[3]) 409 | continue; // se for regiao do padding, pula (soma 0) 410 | T a = get(i, m, x, y); 411 | T b = kernels.get(j, m, n, o); 412 | total += a * b; 413 | } 414 | } 415 | } 416 | output.set(i, j, k, l, total + bias.get(j)); 417 | } 418 | } 419 | } 420 | } 421 | return output; 422 | } -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../include/NetworkModel.h" 3 | #include "../include/Module.h" 4 | #include "../include/FullyConnected.h" 5 | #include "../include/Sigmoid.h" 6 | #include "../include/Dropout.h" 7 | #include "../include/SoftmaxClassifier.h" 8 | #include "../include/MNISTDataLoader.h" 9 | #include "../include/ReLU.h" 10 | #include "../include/Tensor.h" 11 | #include "../include/Conv2d.h" 12 | #include "../include/MaxPool.h" 13 | #include "../include/LinearLRScheduler.h" 14 | 15 | using namespace std; 16 | 17 | /* 18 | * Train a neural network on the MNIST data set and evaluate its performance 19 | */ 20 | 21 | int main(int argc, char **argv) { 22 | if (argc < 2) { 23 | throw runtime_error("Please provide the data directory path as an argument"); 24 | } 25 | printf("Data directory: %s\n", argv[1]); 26 | string data_path = argv[1]; 27 | 28 | printf("Loading training set... "); 29 | fflush(stdout); 30 | MNISTDataLoader train_loader(data_path + "/train-images-idx3-ubyte", data_path + "/train-labels-idx1-ubyte", 32); 31 | printf("Loaded.\n"); 32 | 33 | int seed = 0; 34 | vector modules = {new Conv2d(1, 8, 3, 1, 0, seed), new MaxPool(2, 2), new ReLU(), new FullyConnected(1352, 30, seed), new ReLU(), 35 | new FullyConnected(30, 10, seed)}; 36 | auto lr_sched = new LinearLRScheduler(0.2, -0.000005); 37 | NetworkModel model = NetworkModel(modules, new SoftmaxClassifier(), lr_sched); 38 | // model.load("network.txt"); 39 | 40 | int epochs = 1; 41 | printf("Training for %d epoch(s).\n", epochs); 42 | // Train network 43 | int num_train_batches = train_loader.getNumBatches(); 44 | for (int k = 0; k < epochs; ++k) { 45 | printf("Epoch %d\n", k + 1); 46 | for (int i = 0; i < num_train_batches; ++i) { 47 | pair, vector > xy = train_loader.nextBatch(); 48 | double loss = model.trainStep(xy.first, xy.second); 49 | if ((i + 1) % 10 == 0) { 50 | printf("\rIteration %d/%d - Batch Loss: %.4lf", i + 1, num_train_batches, loss); 51 | fflush(stdout); 52 | } 53 | } 54 | printf("\n"); 55 | } 56 | // Save weights 57 | model.save("network.txt"); 58 | 59 | printf("Loading testing set... "); 60 | fflush(stdout); 61 | MNISTDataLoader test_loader(data_path + "/t10k-images-idx3-ubyte", data_path + "/t10k-labels-idx1-ubyte", 32); 62 | printf("Loaded.\n"); 63 | 64 | model.eval(); 65 | 66 | // Test and measure accuracy 67 | int hits = 0; 68 | int total = 0; 69 | printf("Testing...\n"); 70 | int num_test_batches = test_loader.getNumBatches(); 71 | for (int i = 0; i < num_test_batches; ++i) { 72 | if ((i + 1) % 10 == 0 || i == (num_test_batches - 1)) { 73 | printf("\rIteration %d/%d", i + 1, num_test_batches); 74 | fflush(stdout); 75 | } 76 | pair, vector > xy = test_loader.nextBatch(); 77 | vector predictions = model.predict(xy.first); 78 | for (int j = 0; j < predictions.size(); ++j) { 79 | if (predictions[j] == xy.second[j]) { 80 | hits++; 81 | } 82 | } 83 | total += xy.second.size(); 84 | } 85 | printf("\n"); 86 | 87 | printf("Accuracy: %.2f%% (%d/%d)\n", ((double) hits * 100) / total, hits, total); 88 | 89 | return 0; 90 | } --------------------------------------------------------------------------------