├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── get_mnist.sh
├── include
    ├── Conv2d.h
    ├── Dropout.h
    ├── FullyConnected.h
    ├── LRScheduler.h
    ├── LinearLRScheduler.h
    ├── MNISTDataLoader.h
    ├── MaxPool.h
    ├── Module.h
    ├── NetworkModel.h
    ├── OutputLayer.h
    ├── ReLU.h
    ├── Sigmoid.h
    ├── SoftmaxClassifier.h
    └── Tensor.h
└── src
    ├── Conv2d.cpp
    ├── Dropout.cpp
    ├── FullyConnected.cpp
    ├── LinearLRScheduler.cpp
    ├── MNISTDataLoader.cpp
    ├── MaxPool.cpp
    ├── NetworkModel.cpp
    ├── ReLU.cpp
    ├── Sigmoid.cpp
    ├── SoftmaxClassifier.cpp
    ├── Tensor.cpp
    └── main.cpp


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | cmake-build-debug/
3 | cmake-build-release/
4 | CMakeFiles/
5 | data/
6 | Makefile
7 | cmake_install.cmake
8 | CMakeCache.txt
9 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.5)
2 | project(neural_net_in_cpp)
3 | 
4 | set(CMAKE_CXX_STANDARD 11)
5 | #set (CMAKE_CXX_FLAGS "-pg") -- for profiling
6 | 
7 | add_executable(neural_net_in_cpp src/main.cpp src/NetworkModel.cpp src/MNISTDataLoader.cpp src/Tensor.cpp src/FullyConnected.cpp src/Sigmoid.cpp src/SoftmaxClassifier.cpp src/Dropout.cpp src/ReLU.cpp src/Conv2d.cpp src/MaxPool.cpp src/LinearLRScheduler.cpp)
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Lucas Tabelini Torres
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Neural Network in Pure C++
 2 | 
 3 | Simple modular implementation of a neural network in C++ using only the STL. 
 4 | 
 5 | ### Installation
 6 | Get the MNIST data set:
 7 | 
 8 | ```sh
 9 | bash get_mnist.sh
10 | ```
11 | Generate your Makefile:
12 | ```sh
13 | cmake -DCMAKE_BUILD_TYPE=Release
14 | ```
15 | Make the code:
16 | ```sh
17 | make
18 | ```
19 | Run:
20 | ```sh
21 | ./neural_net_in_cpp data
22 | ```
23 | The training should take about a minute and achieve ~97% accuracy.
24 | 
25 | ### Todos
26 |  - [x] Fully connected;
27 |  - [x] Sigmoid;
28 |  - [x] Dropout;
29 |  - [x] ReLU;
30 |  - [ ] Tanh;
31 |  - [ ] Leaky ReLU;
32 |  - [ ] Batch normalization;
33 |  - [x] Convolutional layers;
34 |  - [x] Max pooling;
35 |  - [ ] Other optimizers (Adam, RMSProp, etc);
36 |  - [x] Learning rate scheduler;
37 |  - [ ] Plots;
38 |  - [ ] Filter visualization
39 |  - [ ] CUDA?
40 | 
41 | License
42 | ----
43 | 
44 | MIT
45 | 


--------------------------------------------------------------------------------
/get_mnist.sh:
--------------------------------------------------------------------------------
 1 | echo "Downloading MNIST data set..."
 2 | mkdir data
 3 | 
 4 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
 5 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
 6 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
 7 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
 8 | 
 9 | echo "Unpacking..."
10 | gunzip train-images-idx3-ubyte.gz && mv train-images-idx3-ubyte data/
11 | gunzip train-labels-idx1-ubyte.gz && mv train-labels-idx1-ubyte data/
12 | gunzip t10k-images-idx3-ubyte.gz && mv t10k-images-idx3-ubyte data/
13 | gunzip t10k-labels-idx1-ubyte.gz && mv t10k-labels-idx1-ubyte data/
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/include/Conv2d.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 14/04/19.
 3 | //
 4 | 
 5 | #ifndef NEURAL_NET_IN_CPP_CONV2D_H
 6 | #define NEURAL_NET_IN_CPP_CONV2D_H
 7 | 
 8 | #include "Module.h"
 9 | 
10 | class Conv2d : public Module {
11 | private:
12 |     Tensor<double> input_;
13 |     Tensor<double> product_;
14 |     int stride, padding;
15 | public:
16 |     Tensor<double> kernels;
17 |     Tensor<double> bias;
18 | 
19 |     Conv2d(int in_channels, int out_channels, int kernel_size, int stride, int padding, int seed = 0);
20 | 
21 |     Tensor<double> &forward(Tensor<double> &input) override;
22 | 
23 |     Tensor<double> backprop(Tensor<double> chain_gradient, double learning_rate) override;
24 | 
25 |     void load(FILE *file_model) override;
26 | 
27 |     void save(FILE *file_model) override;
28 | };
29 | 
30 | 
31 | #endif //NEURAL_NET_IN_CPP_CONV2D_H
32 | 


--------------------------------------------------------------------------------
/include/Dropout.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 11/04/19.
 3 | //
 4 | 
 5 | #ifndef NEURAL_NET_IN_CPP_DROPOUT_H
 6 | #define NEURAL_NET_IN_CPP_DROPOUT_H
 7 | 
 8 | 
 9 | #include "Module.h"
10 | 
11 | class Dropout : public Module {
12 | private:
13 |     double p_;
14 |     int seed_;
15 |     Tensor<double> product_;
16 |     Tensor<double> dropout_;
17 | public:
18 |     explicit Dropout(double p = 0.5, int seed = 0);
19 | 
20 |     Tensor<double> &forward(Tensor<double> &input) override;
21 | 
22 |     Tensor<double> backprop(Tensor<double> chainGradient, double learning_rate) override;
23 | 
24 |     void load(FILE *file_model) override;
25 | 
26 |     void save(FILE *file_model) override;
27 | };
28 | 
29 | 
30 | #endif //NEURAL_NET_IN_CPP_DROPOUT_H
31 | 


--------------------------------------------------------------------------------
/include/FullyConnected.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 10/04/19.
 3 | //
 4 | 
 5 | #ifndef NEURAL_NET_IN_CPP_FULLYCONNECTED_H
 6 | #define NEURAL_NET_IN_CPP_FULLYCONNECTED_H
 7 | 
 8 | #include "Module.h"
 9 | #include "Tensor.h"
10 | 
11 | /*
12 |  * Fully Connected layer
13 |  * Output: Mx + b
14 |  */
15 | class FullyConnected : public Module {
16 | private:
17 |     Tensor<double> weights;
18 |     Tensor<double> bias;
19 |     Tensor<double> input_;
20 |     Tensor<double> product_;
21 |     int input_dims[4];
22 |     int input_num_dims;
23 | public:
24 |     FullyConnected(int input_size, int output_size, int seed = 0);
25 | 
26 |     Tensor<double> &forward(Tensor<double> &input) override;
27 | 
28 |     Tensor<double> backprop(Tensor<double> chainGradient, double learning_rate) override;
29 | 
30 |     void load(FILE *file_model) override;
31 | 
32 |     void save(FILE *file_model) override;
33 | };
34 | 
35 | 
36 | #endif //NEURAL_NET_IN_CPP_FULLYCONNECTED_H
37 | 


--------------------------------------------------------------------------------
/include/LRScheduler.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 18/04/19.
 3 | //
 4 | 
 5 | #ifndef NEURAL_NET_IN_CPP_LRSCHEDULER_H
 6 | #define NEURAL_NET_IN_CPP_LRSCHEDULER_H
 7 | 
 8 | 
 9 | class LRScheduler {
10 | public:
11 |     double learning_rate;
12 |     virtual void onIterationEnd(int iteration) = 0;
13 | };
14 | 
15 | 
16 | #endif //NEURAL_NET_IN_CPP_LRSCHEDULER_H
17 | 


--------------------------------------------------------------------------------
/include/LinearLRScheduler.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 18/04/19.
 3 | //
 4 | 
 5 | #ifndef NEURAL_NET_IN_CPP_LINEARLRSCHEDULER_H
 6 | #define NEURAL_NET_IN_CPP_LINEARLRSCHEDULER_H
 7 | 
 8 | #include "LRScheduler.h"
 9 | 
10 | class LinearLRScheduler : public LRScheduler {
11 | public:
12 |     double step;
13 |     LinearLRScheduler(double initial_lr, double step);
14 |     void onIterationEnd(int iteration) override;
15 | };
16 | 
17 | 
18 | #endif //NEURAL_NET_IN_CPP_LINEARLRSCHEDULER_H
19 | 


--------------------------------------------------------------------------------
/include/MNISTDataLoader.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 05/04/19.
 3 | //
 4 | 
 5 | #ifndef NEURAL_NET_IN_CPP_MNISTDATALOADER_H
 6 | #define NEURAL_NET_IN_CPP_MNISTDATALOADER_H
 7 | 
 8 | #include <stdio.h>
 9 | #include <string>
10 | #include <iostream>
11 | #include <fstream>
12 | #include <errno.h>
13 | #include <string.h>
14 | #include "Tensor.h"
15 | 
16 | /*
17 |  * Utility to read MNIST data.
18 |  */
19 | 
20 | class MNISTDataLoader {
21 | private:
22 |     std::vector<std::vector<std::vector<double> > > images_;
23 |     std::vector<int> labels_;
24 | 
25 |     unsigned int batch_idx_ = 0;
26 |     unsigned int batch_size_;
27 |     unsigned int rows_ = 28, cols_ = 28, num_images_ = 0;
28 | 
29 |     /*
30 |      * Loads MNIST's labels
31 |      */
32 |     void loadLabels(std::string const &path);
33 | 
34 |     /*
35 |      * Converts an array of 4 bytes to an unsigned int
36 |      */
37 |     unsigned int bytesToUInt(const char *bytes);
38 | 
39 |     /*
40 |      * Loads MNIST's image set
41 |      */
42 |     void loadImages(std::string const &path);
43 | 
44 | public:
45 |     MNISTDataLoader(std::string const &imagesPath, std::string const &labelsPath, unsigned int batch_size);
46 | 
47 |     /*
48 |      * Get the number of batches in the data set.
49 |      */
50 |     int getNumBatches();
51 | 
52 | //    void printImage(int idx);
53 | 
54 |     /*
55 |      * Get next batch. Last batch of the dataset may not have the same size of the others.
56 |      * Is cyclical, so it can be used indefinitely.
57 |      */
58 |     std::pair<Tensor<double>, std::vector<int>> nextBatch();
59 | };
60 | 
61 | #endif //NEURAL_NET_IN_CPP_MNISTDATALOADER_H
62 | 


--------------------------------------------------------------------------------
/include/MaxPool.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by tabelini on 18/04/19.
 3 | //
 4 | 
 5 | #ifndef NEURAL_NET_IN_CPP_MAXPOOL_H
 6 | #define NEURAL_NET_IN_CPP_MAXPOOL_H
 7 | 
 8 | 
 9 | #include "Module.h"
10 | 
11 | class MaxPool : public Module {
12 | private:
13 |     Tensor<double> output_;
14 |     Tensor<double> input_;
15 |     Tensor<int> indexes;
16 |     int stride_, size_;
17 | public:
18 |     explicit MaxPool(int size, int stride);
19 | 
20 |     Tensor<double> &forward(Tensor<double> &input) override;
21 | 
22 |     Tensor<double> backprop(Tensor<double> chainGradient, double learning_rate) override;
23 | 
24 |     void load(FILE *file_model) override;
25 | 
26 |     void save(FILE *file_model) override;
27 | };
28 | 
29 | 
30 | #endif //NEURAL_NET_IN_CPP_MAXPOOL_H
31 | 


--------------------------------------------------------------------------------
/include/Module.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 10/04/19.
 3 | //
 4 | 
 5 | #ifndef NEURAL_NET_IN_CPP_MODULE_H
 6 | #define NEURAL_NET_IN_CPP_MODULE_H
 7 | 
 8 | #include "Tensor.h"
 9 | 
10 | /*
11 |  * Interface to be used as a building block for models
12 |  */
13 | class Module {
14 | protected:
15 |     bool isEval = false;
16 | public:
17 |     virtual Tensor<double> &forward(Tensor<double> &input) = 0;
18 | 
19 |     virtual Tensor<double> backprop(Tensor<double> chainGradient, double learning_rate) = 0;
20 | 
21 |     virtual void load(FILE *file_model) = 0;
22 | 
23 |     virtual void save(FILE *file_model) = 0;
24 | 
25 |     void train();
26 | 
27 |     void eval();
28 | 
29 |     virtual ~Module() = default;
30 | };
31 | 
32 | inline void Module::eval() {
33 |     this->isEval = true;
34 | }
35 | 
36 | inline void Module::train() {
37 |     this->isEval = false;
38 | }
39 | 
40 | 
41 | #endif //NEURAL_NET_IN_CPP_MODULE_H
42 | 


--------------------------------------------------------------------------------
/include/NetworkModel.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 10/04/19.
 3 | //
 4 | 
 5 | #ifndef NEURAL_NET_IN_CPP_NETWORKMODEL_H
 6 | #define NEURAL_NET_IN_CPP_NETWORKMODEL_H
 7 | 
 8 | #include <vector>
 9 | #include "Tensor.h"
10 | #include "Module.h"
11 | #include "OutputLayer.h"
12 | #include "../include/LRScheduler.h"
13 | 
14 | /*
15 |  * Train and test a neural network defined by Modules
16 |  */
17 | class NetworkModel {
18 | private:
19 |     std::vector<Module *> modules_;
20 |     OutputLayer *output_layer_;
21 |     LRScheduler* lr_scheduler_;
22 |     int iteration = 0;
23 | public:
24 |     NetworkModel(std::vector<Module *> &modules, OutputLayer *output_layer, LRScheduler* lr_scheduler);
25 | 
26 |     double trainStep(Tensor<double> &x, std::vector<int> &y);
27 | 
28 |     Tensor<double> forward(Tensor<double> &x);
29 | 
30 |     std::vector<int> predict(Tensor<double> &x);
31 | 
32 |     void load(std::string path);
33 | 
34 |     void save(std::string path);
35 | 
36 |     virtual ~NetworkModel();
37 | 
38 |     void eval();
39 | };
40 | 
41 | 
42 | #endif //NEURAL_NET_IN_CPP_NETWORKMODEL_H
43 | 


--------------------------------------------------------------------------------
/include/OutputLayer.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 10/04/19.
 3 | //
 4 | 
 5 | #ifndef NEURAL_NET_IN_CPP_OUTPUTLAYER_H
 6 | #define NEURAL_NET_IN_CPP_OUTPUTLAYER_H
 7 | 
 8 | #include "Tensor.h"
 9 | 
10 | /*
11 |  * Interface specific for model outputs
12 |  */
13 | class OutputLayer {
14 | public:
15 |     virtual Tensor<double> predict(Tensor<double> input) = 0;
16 | 
17 |     virtual std::pair<double, Tensor<double>> backprop(std::vector<int> ground_truth) = 0;
18 | 
19 |     virtual ~OutputLayer() = default;;
20 | };
21 | 
22 | 
23 | #endif //NEURAL_NET_IN_CPP_OUTPUTLAYER_H
24 | 


--------------------------------------------------------------------------------
/include/ReLU.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 11/04/19.
 3 | //
 4 | 
 5 | #ifndef NEURAL_NET_IN_CPP_RELU_H
 6 | #define NEURAL_NET_IN_CPP_RELU_H
 7 | 
 8 | 
 9 | #include "Tensor.h"
10 | #include "Module.h"
11 | 
12 | class ReLU : public Module{
13 | private:
14 |     Tensor<double> input_;
15 |     Tensor<double> product_;
16 | public:
17 |     ReLU();
18 | 
19 |     Tensor<double> &forward(Tensor<double> &input) override;
20 | 
21 |     Tensor<double> backprop(Tensor<double> chainGradient, double learning_rate) override;
22 | 
23 |     void load(FILE *file_model) override;
24 | 
25 |     void save(FILE *file_model) override;
26 | };
27 | 
28 | 
29 | #endif //NEURAL_NET_IN_CPP_RELU_H
30 | 


--------------------------------------------------------------------------------
/include/Sigmoid.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 10/04/19.
 3 | //
 4 | 
 5 | #ifndef NEURAL_NET_IN_CPP_SIGMOID_H
 6 | #define NEURAL_NET_IN_CPP_SIGMOID_H
 7 | 
 8 | #include "Module.h"
 9 | #include "Tensor.h"
10 | 
11 | /*
12 |  * Sigmoid activation layer
13 |  * Output: 1.0 / (1.0 + exp(-x))
14 |  */
15 | class Sigmoid : public Module {
16 | private:
17 |     Tensor<double> input_;
18 |     Tensor<double> product_;
19 | public:
20 |     Sigmoid();
21 | 
22 |     Tensor<double> &forward(Tensor<double> &input) override;
23 | 
24 |     Tensor<double> backprop(Tensor<double> chainGradient, double learning_rate) override;
25 | 
26 |     void load(FILE *file_model) override;
27 | 
28 |     void save(FILE *file_model) override;
29 |     // ~Sigmoid();
30 | };
31 | 
32 | #endif //NEURAL_NET_IN_CPP_SIGMOID_H
33 | 


--------------------------------------------------------------------------------
/include/SoftmaxClassifier.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 10/04/19.
 3 | //
 4 | 
 5 | #ifndef NEURAL_NET_IN_CPP_SOFTMAXCLASSIFIER_H
 6 | #define NEURAL_NET_IN_CPP_SOFTMAXCLASSIFIER_H
 7 | 
 8 | 
 9 | #include "OutputLayer.h"
10 | 
11 | /*
12 |  * Applies softmax and uses cross entropy as loss function
13 |  */
14 | class SoftmaxClassifier : public OutputLayer {
15 | private:
16 |     Tensor<double> output_;
17 | public:
18 |     Tensor<double> predict(Tensor<double> input) override;
19 | 
20 |     std::pair<double, Tensor<double>> backprop(std::vector<int> ground_truth) override;
21 | 
22 |     Tensor<double> crossEntropyPrime(Tensor<double> &output, std::vector<int> &y);
23 | 
24 |     double crossEntropy(Tensor<double> &y_hat, std::vector<int> &y);
25 | };
26 | 
27 | 
28 | #endif //NEURAL_NET_IN_CPP_SOFTMAXCLASSIFIER_H
29 | 


--------------------------------------------------------------------------------
/include/Tensor.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // Created by lucas on 12/04/19.
  3 | //
  4 | 
  5 | #ifndef NEURAL_NET_IN_CPP_TENSOR_H
  6 | #define NEURAL_NET_IN_CPP_TENSOR_H
  7 | 
  8 | #include <vector>
  9 | #include <random>
 10 | #include <assert.h>
 11 | #include <memory>
 12 | #include <iostream>
 13 | 
 14 | /*
 15 |  * Tensor class - Supports from 1 to 4 dimensions
 16 |  */
 17 | template<typename T>
 18 | class Tensor {
 19 | private:
 20 |     T *data_; // TODO: create an storage class to share data between tensors with different views?
 21 |     int size_ = -1; // -1 means the size is undefined
 22 | public:
 23 |     int num_dims = 0;
 24 |     int dims[4]{}; // Max tensor dimensions is 4 (could be unlimited, but this makes the implementation simpler)
 25 |     Tensor() = default;
 26 | 
 27 |     Tensor(int num_dims, int const *dims);
 28 | 
 29 |     void view(int new_num_dims, int *new_dims);
 30 | 
 31 |     void zero();
 32 | 
 33 |     T get(int i); // 1d tensor
 34 |     T get(int i, int j); // 2d tensor
 35 |     T get(int i, int j, int k); // 3d tensor
 36 |     T get(int i, int j, int k, int l); // 4d tensor
 37 | 
 38 |     void set(int i, T value);
 39 | 
 40 |     void set(int i, int j, T value);
 41 | 
 42 |     void set(int i, int j, int k, T value);
 43 | 
 44 |     void set(int i, int j, int k, int l, T value);
 45 | 
 46 |     void add(int i, T value);
 47 | 
 48 |     void add(int i, int j, int k, int l, T value);
 49 | 
 50 |     /*
 51 |      * Matrix multiplication
 52 |      */
 53 |     Tensor<T> matmul(Tensor<T> other);
 54 | 
 55 |     /*
 56 |      * 2D Convolution
 57 |      */
 58 |     Tensor<T> convolve2d(Tensor<T> kernels, int stride, int padding, Tensor<T> bias);
 59 | 
 60 |     /*
 61 |      * Returns the transposal
 62 |      */
 63 |     Tensor<T> matrixTranspose();
 64 | 
 65 |     Tensor<T> relu();
 66 | 
 67 |     Tensor<T> sigmoid();
 68 | 
 69 |     void dropout(std::default_random_engine generator, std::uniform_real_distribution<> distribution, double p);
 70 | 
 71 |     /*
 72 |      * Returns the derivative of the sigmoid function
 73 |      */
 74 |     Tensor<T> sigmoidPrime();
 75 | 
 76 |     Tensor<T> softmax();
 77 | 
 78 |     /*
 79 |      * Sum every element
 80 |      */
 81 |     T sum();
 82 | 
 83 |     Tensor<T> reluPrime();
 84 | //
 85 | //    Tensor<T> crossEntropyPrime(Tensor<T> &output, std::vector<int> const &y);
 86 | //
 87 | //    std::vector<T> sumColumns();
 88 | 
 89 |     /*
 90 |      * Sum of two 2d tensors
 91 |      */
 92 |     Tensor<T> operator+(Tensor<T> &other);
 93 | 
 94 |     /*
 95 |      * Element-wise multiplication of two 2d tensors
 96 |      */
 97 |     Tensor<T> operator*(Tensor<T> other);
 98 | 
 99 |     /*
100 |      * Multiplies every element of the tensor by a value
101 |      */
102 |     Tensor<T> operator*(T multiplier);
103 | 
104 |     /*
105 |      * Divides every element of the tensor by a value
106 |      */
107 |     Tensor<T> operator/(T divisor);
108 | 
109 |     /*
110 |      * Subtracts two 2d tensors
111 |      */
112 |     Tensor<T> operator-=(Tensor<T> difference);
113 | 
114 |     /*
115 |      * Calculates the mean across each row
116 |      */
117 |     Tensor<T> columnWiseSum();
118 | 
119 |     Tensor<T> channelWiseSum();
120 | 
121 |     /*
122 |      * Initializes a tensor's values from a distribution
123 |      */
124 |     void randn(std::default_random_engine generator, std::normal_distribution<double> distribution, double multiplier);
125 | 
126 |     /*
127 |      * Prints the tensor's data
128 |      */
129 |     void print();
130 | 
131 |     Tensor<T> &operator=(const Tensor<T> &other);
132 | 
133 |     Tensor(const Tensor<T> &other);
134 | 
135 |     virtual ~Tensor();
136 | 
137 | //    Tensor<T>(const Tensor<T> &other);
138 | 
139 | //    ~Tensor();
140 | 
141 | };
142 | 
143 | 
144 | #endif //NEURAL_NET_IN_CPP_TENSOR_H
145 | 


--------------------------------------------------------------------------------
/src/Conv2d.cpp:
--------------------------------------------------------------------------------
  1 | //
  2 | // Created by lucas on 14/04/19.
  3 | //
  4 | 
  5 | #include "../include/Conv2d.h"
  6 | 
  7 | Conv2d::Conv2d(int in_channels, int out_channels, int kernel_size, int stride, int padding, int seed) {
  8 |     std::default_random_engine generator(seed);
  9 |     std::normal_distribution<double> distribution(0.0, 1.0);
 10 | 
 11 |     int kernel_dims[] = {out_channels, in_channels, kernel_size, kernel_size};
 12 |     kernels = Tensor<double>(4, kernel_dims);
 13 |     kernels.randn(generator, distribution, sqrt(2.0 / (kernel_size * kernel_size * out_channels)));
 14 | 
 15 |     int bias_dims[] = {out_channels};
 16 |     bias = Tensor<double>(1, bias_dims);
 17 |     bias.randn(generator, distribution, 0);
 18 | 
 19 |     this->stride = stride;
 20 |     this->padding = padding;
 21 | }
 22 | 
 23 | Tensor<double> &Conv2d::forward(Tensor<double> &input) {
 24 |     input_ = input;
 25 |     product_ = input.convolve2d(kernels, stride, padding, bias);
 26 | 
 27 |     return product_;
 28 | }
 29 | 
 30 | Tensor<double> Conv2d::backprop(Tensor<double> chain_gradient, double learning_rate) {
 31 |     Tensor<double> kernels_gradient(kernels.num_dims, kernels.dims);
 32 |     Tensor<double> input_gradient(input_.num_dims, input_.dims);
 33 |     Tensor<double> bias_gradient(1, bias.dims);
 34 |     kernels_gradient.zero();
 35 |     input_gradient.zero();
 36 |     bias_gradient.zero();
 37 | 
 38 |     // backprop convolution -- not using Tensor.convolve2d for efficiency
 39 |     for (int i = 0; i < input_.dims[0]; ++i) { // for each batch img
 40 |         for (int f = 0; f < kernels.dims[0]; f++) { // for each filter
 41 |             int x = -padding;
 42 |             for (int cx = 0; cx < chain_gradient.dims[2]; x += stride, cx++) { // for each x in the chain gradient
 43 |                 int y = -padding;
 44 |                 for (int cy = 0; cy < chain_gradient.dims[3]; y += stride, cy++) { // for each y in the chain gradient
 45 |                     double chain_grad = chain_gradient.get(i, f, cx, cy);
 46 |                     for (int fx = 0; fx < kernels.dims[2]; fx++) { // for each x in the filter
 47 |                         int ix = x + fx; // input x
 48 |                         if (ix >= 0 && ix < input_.dims[2]) {
 49 |                             for (int fy = 0; fy < kernels.dims[3]; fy++) { // for each y in the filter
 50 |                                 int iy = y + fy; // input y
 51 |                                 if (iy >= 0 && iy < input_.dims[3]) {
 52 |                                     for (int fc = 0; fc < kernels.dims[1]; fc++) { // for each channel in the filter
 53 |                                         kernels_gradient.add(f, fc, fx, fy, input_.get(i, fc, ix, iy) * chain_grad);
 54 |                                         input_gradient.add(i, fc, ix, iy, kernels.get(f, fc, fx, fy) * chain_grad);
 55 | 
 56 |                                     }
 57 |                                 }
 58 |                             }
 59 |                         }
 60 |                     }
 61 |                     bias_gradient.add(f, chain_grad);
 62 |                 }
 63 |             }
 64 |         }
 65 |     }
 66 |     kernels -= kernels_gradient * learning_rate;
 67 |     bias -= bias_gradient * learning_rate;
 68 | 
 69 |     return input_gradient;
 70 | }
 71 | 
 72 | void Conv2d::load(FILE *file_model) {
 73 |     double value;
 74 |     for (int i = 0; i < kernels.dims[0]; ++i) {
 75 |         for (int j = 0; j < kernels.dims[1]; ++j) {
 76 |             for (int k = 0; k < kernels.dims[2]; ++k) {
 77 |                 for (int l = 0; l < kernels.dims[3]; ++l) {
 78 |                     int read = fscanf(file_model, "%lf", &value); // NOLINT(cert-err34-c)
 79 |                     if (read != 1) throw std::runtime_error("Invalid model file");
 80 |                     kernels.set(i, j, k, l, value);
 81 |                 }
 82 |             }
 83 |         }
 84 |     }
 85 |     for (int m = 0; m < bias.dims[0]; ++m) {
 86 |         int read = fscanf(file_model, "%lf", &value); // NOLINT(cert-err34-c)
 87 |         if (read != 1) throw std::runtime_error("Invalid model file");
 88 |         bias.set(m, value);
 89 |     }
 90 | }
 91 | 
 92 | void Conv2d::save(FILE *file_model) {
 93 |     for (int i = 0; i < kernels.dims[0]; ++i) {
 94 |         for (int j = 0; j < kernels.dims[1]; ++j) {
 95 |             for (int k = 0; k < kernels.dims[2]; ++k) {
 96 |                 for (int l = 0; l < kernels.dims[3]; ++l) {
 97 |                     fprintf(file_model, "%.18lf ", kernels.get(i, j, k, l));
 98 |                 }
 99 |             }
100 |         }
101 |     }
102 |     for (int m = 0; m < bias.dims[0]; ++m) {
103 |         fprintf(file_model, "%.18lf ", bias.get(m));
104 |     }
105 | }
106 | 


--------------------------------------------------------------------------------
/src/Dropout.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 11/04/19.
 3 | //
 4 | 
 5 | #include "../include/Dropout.h"
 6 | #include "../include/Tensor.h"
 7 | 
 8 | Dropout::Dropout(double p, int seed) {
 9 |     p_ = p;
10 |     seed_ = seed;
11 | }
12 | 
13 | Tensor<double> &Dropout::forward(Tensor<double> &input) {
14 | //    if (isEval) {
15 | //        return input;
16 | //    }
17 | 
18 |     dropout_ = Tensor<double>(input.num_dims, input.dims);
19 |     std::default_random_engine generator(seed_);
20 |     std::uniform_real_distribution<> distribution(0., 1.);
21 | 
22 |     dropout_.dropout(generator, distribution, p_);
23 |     product_ = input * dropout_;
24 |     return product_;
25 | }
26 | 
27 | Tensor<double> Dropout::backprop(Tensor<double> chainGradient, double learning_rate) {
28 |     return chainGradient * dropout_;
29 | }
30 | 
31 | void Dropout::load(FILE *file_model) {
32 | 
33 | }
34 | 
35 | void Dropout::save(FILE *file_model) {
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/src/FullyConnected.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 10/04/19.
 3 | //
 4 | 
 5 | #include <random>
 6 | #include "../include/FullyConnected.h"
 7 | #include "../include/Tensor.h"
 8 | 
 9 | FullyConnected::FullyConnected(int input_size, int output_size, int seed) {
10 |     std::default_random_engine generator(seed);
11 |     std::normal_distribution<double> distribution(0.0, 1.0);
12 |     int weights_dims[] = {input_size, output_size};
13 |     weights = Tensor<double>(2, weights_dims);
14 |     weights.randn(generator, distribution, sqrt(2.0 / input_size));
15 |     int bias_dims[] = {output_size};
16 |     bias = Tensor<double>(1, bias_dims);
17 |     bias.randn(generator, distribution, 0);
18 | 
19 | }
20 | 
21 | 
22 | Tensor<double> &FullyConnected::forward(Tensor<double> &input) {
23 |     input_num_dims = input.num_dims;
24 |     std::copy(input.dims, input.dims + input.num_dims, input_dims);
25 |     if (input.num_dims != 2) {
26 |         // flatten tensor
27 |         int flatten_size = 1;
28 |         for (int i = 1; i < input.num_dims; ++i) {
29 |             flatten_size *= input.dims[i];
30 |         }
31 |         int dims[] = {input.dims[0], flatten_size};
32 |         input.view(2, dims);
33 |     }
34 |     input_ = input;
35 |     product_ = input.matmul(weights) + bias;
36 | 
37 |     return product_;
38 | }
39 | 
40 | Tensor<double> FullyConnected::backprop(Tensor<double> chainGradient, double learning_rate) {
41 |     Tensor<double> weightGradient = input_.matrixTranspose().matmul(chainGradient);
42 |     Tensor<double> biasGradient = chainGradient.columnWiseSum();
43 |     chainGradient = chainGradient.matmul(weights.matrixTranspose());
44 |     chainGradient.view(input_num_dims, input_dims);
45 |     weights -= weightGradient * learning_rate;
46 |     bias -= biasGradient * learning_rate;
47 |     return chainGradient;
48 | }
49 | 
50 | void FullyConnected::load(FILE *file_model) {
51 |     double value;
52 |     for (int i = 0; i < weights.dims[0]; ++i) {
53 |         for (int j = 0; j < weights.dims[1]; ++j) {
54 |             int read = fscanf(file_model, "%lf", &value); // NOLINT(cert-err34-c)
55 |             if (read != 1) throw std::runtime_error("Invalid model file");
56 |             weights.set(i, j, value);
57 |         }
58 |     }
59 | 
60 |     for (int i = 0; i < bias.dims[0]; ++i) {
61 |         int read = fscanf(file_model, "%lf", &value); // NOLINT(cert-err34-c)
62 |         if (read != 1) throw std::runtime_error("Invalid model file");
63 |         bias.set(i, value);
64 |     }
65 | }
66 | 
67 | void FullyConnected::save(FILE *file_model) {
68 |     for (int i = 0; i < weights.dims[0]; ++i) {
69 |         for (int j = 0; j < weights.dims[1]; ++j) {
70 |             fprintf(file_model, "%.18lf ", weights.get(i, j));
71 |         }
72 |     }
73 | 
74 |     for (int i = 0; i < bias.dims[0]; ++i) {
75 |         fprintf(file_model, "%.18lf ", bias.get(i));
76 |     }
77 | }
78 | 
79 | // FullyConnected::~FullyConnected() {
80 | // }


--------------------------------------------------------------------------------
/src/LinearLRScheduler.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 18/04/19.
 3 | //
 4 | 
 5 | #include "../include/LinearLRScheduler.h"
 6 | 
 7 | 
 8 | LinearLRScheduler::LinearLRScheduler(double initial_lr, double step) {
 9 |     learning_rate = initial_lr;
10 |     this->step = step;
11 | }
12 | 
13 | void LinearLRScheduler::onIterationEnd(int iteration) {
14 |     learning_rate += step;
15 | }
16 | 


--------------------------------------------------------------------------------
/src/MNISTDataLoader.cpp:
--------------------------------------------------------------------------------
  1 | //
  2 | // Created by lucas on 05/04/19.
  3 | //
  4 | 
  5 | #include "../include/MNISTDataLoader.h"
  6 | #include "../include/Tensor.h"
  7 | 
  8 | 
  9 | MNISTDataLoader::MNISTDataLoader(std::string const &imagesPath, std::string const &labelsPath,
 10 |                                  unsigned int batch_size) {
 11 |     this->batch_size_ = batch_size;
 12 |     loadImages(imagesPath);
 13 |     loadLabels(labelsPath);
 14 | }
 15 | 
 16 | unsigned int MNISTDataLoader::bytesToUInt(const char *bytes) {
 17 |     return ((unsigned char) bytes[0] << 24) | ((unsigned char) bytes[1] << 16) |
 18 |            ((unsigned char) bytes[2] << 8) | ((unsigned char) bytes[3] << 0);
 19 | }
 20 | 
 21 | void MNISTDataLoader::loadImages(std::string const &path) {
 22 |     // Info about the dataset's file format can be found at http://yann.lecun.com/exdb/mnist/
 23 |     std::ifstream file(path, std::ios::binary | std::ios::in);
 24 |     if (!file) {
 25 |         std::cerr << "Error: " << strerror(errno);
 26 |         exit(1);
 27 |     }
 28 |     file.clear();
 29 |     char bytes[4];
 30 |     file.read(bytes, 4); // magic number
 31 |     file.read(bytes, 4);
 32 |     num_images_ = bytesToUInt(bytes);
 33 |     file.read(bytes, 4);
 34 |     rows_ = bytesToUInt(bytes);
 35 |     file.read(bytes, 4);
 36 |     cols_ = bytesToUInt(bytes);
 37 | 
 38 | //    num_images_ = 64;
 39 | 
 40 |     images_.resize(num_images_);
 41 |     char byte;
 42 |     for (int i = 0; i < num_images_; ++i) {
 43 |         images_[i].resize(rows_);
 44 |         for (int j = 0; j < rows_; ++j) {
 45 |             images_[i][j].resize(cols_);
 46 |             for (int k = 0; k < cols_; ++k) {
 47 |                 file.read(&byte, 1);
 48 |                 images_[i][j][k] = (unsigned char) (byte & 0xff);
 49 |             }
 50 |         }
 51 |     }
 52 | }
 53 | 
 54 | int MNISTDataLoader::getNumBatches() {
 55 |     if (num_images_ % batch_size_ == 0) {
 56 |         return num_images_ / batch_size_;
 57 |     } else {
 58 |         return (num_images_ / batch_size_) + 1;
 59 |     }
 60 | }
 61 | 
 62 | void MNISTDataLoader::loadLabels(std::string const &path) {
 63 |     std::ifstream file(path, std::ios::binary | std::ios::in);
 64 |     if (!file) {
 65 |         std::cerr << "Error: " << strerror(errno);
 66 |     }
 67 |     file.clear();
 68 |     char bytes[4];
 69 |     file.read(bytes, 4); // magic number
 70 |     file.read(bytes, 4);
 71 |     num_images_ = bytesToUInt(bytes);
 72 | 
 73 |     labels_.resize(num_images_);
 74 |     char byte;
 75 |     for (int i = 0; i < num_images_; ++i) {
 76 |         file.read(&byte, 1);
 77 |         labels_[i] = (byte & 0xff);
 78 |     }
 79 | }
 80 | 
 81 | //    void MNISTDataLoader::printImage(int idx) {
 82 | //        for (int i = 0; i < rows_; ++i) {
 83 | //            for (int j = 0; j < cols_; ++j) {
 84 | //                if (images_[idx][i][j] > 127) {
 85 | //                    printf("%c", 219);
 86 | //                } else {
 87 | //                    printf(" ");
 88 | //                }
 89 | //            }
 90 | //            printf("\n");
 91 | //        }
 92 | //        printf("Label: %d\n", labels_[idx]);
 93 | //    }
 94 | 
 95 | std::pair<Tensor<double>, std::vector<int>> MNISTDataLoader::nextBatch() {
 96 |     std::pair<Tensor<double>, std::vector<int> > batchXY;
 97 |     int imgsMissing = num_images_ - batch_idx_;
 98 |     int size = imgsMissing > batch_size_ ? batch_size_ : imgsMissing;
 99 |     int dims[] = {size, 1, (int) rows_, (int) cols_};
100 |     Tensor<double> tensorImgs(4, dims);
101 |     std::vector<int> vecLabels;
102 |     for (int i = 0; i < size; ++i) {
103 |         for (int j = 0; j < rows_; ++j) {
104 |             for (int k = 0; k < cols_; ++k) {
105 |                 tensorImgs.set(i, 0, j, k, ((double) (images_[batch_idx_ + i][j][k])) / 255.0);
106 |             }
107 |         }
108 |         vecLabels.push_back(labels_[batch_idx_ + i]);
109 |     }
110 |     batch_idx_ += size;
111 |     if (batch_idx_ == num_images_) {
112 |         batch_idx_ = 0;
113 |     }
114 |     batchXY.first = tensorImgs;
115 |     batchXY.second = vecLabels;
116 |     return batchXY;
117 | }


--------------------------------------------------------------------------------
/src/MaxPool.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by tabelini on 18/04/19.
 3 | //
 4 | 
 5 | #include "../include/MaxPool.h"
 6 | 
 7 | MaxPool::MaxPool(int size, int stride) {
 8 |     size_ = size;
 9 |     stride_ = stride;
10 | }
11 | 
12 | Tensor<double> &MaxPool::forward(Tensor<double> &input) {
13 |     int w = ((input.dims[3] - (size_ - 1) - 1) / stride_) + 1;
14 |     int h = ((input.dims[2] - (size_ - 1) - 1) / stride_) + 1;
15 |     int dims[] = {input.dims[0], input.dims[1], h, w};
16 |     output_ = Tensor<double>(4, dims);
17 |     indexes = Tensor<int>(4, dims);
18 |     for (int i = 0; i < input.dims[0]; ++i) { // for each batch image
19 |         for (int j = 0; j < input.dims[1]; ++j) { // for each image channel
20 |             for (int k = 0; k < dims[2]; ++k) { // for each output y
21 |                 for (int l = 0; l < dims[3]; ++l) { // for each output x
22 |                     double max = -999999999; // -infinity
23 |                     int index = 0;
24 |                     for (int m = 0; m < size_; ++m) {
25 |                         for (int n = 0; n < size_; ++n) {
26 |                             int input_y = k * stride_ + m;
27 |                             int input_x = l * stride_ + n;
28 |                             double value = input.get(i, j, input_y, input_x);
29 |                             if (value > max) {
30 |                                 index = m * size_ + n;
31 |                                 max = value;
32 |                             }
33 |                         }
34 |                     }
35 |                     output_.set(i, j, k, l, max);
36 |                     indexes.set(i, j, k, l, index);
37 |                 }
38 |             }
39 |         }
40 |     }
41 |     input_ = input;
42 | 
43 |     return output_;
44 | }
45 | 
46 | Tensor<double> MaxPool::backprop(Tensor<double> chainGradient, double learning_rate) {
47 |     Tensor<double> input_gradient(input_.num_dims, input_.dims);
48 |     input_gradient.zero();
49 | 
50 |     for (int i = 0; i < input_.dims[0]; ++i) { // for each batch image
51 |         for (int j = 0; j < input_.dims[1]; ++j) { // for each image channel
52 |             for (int k = 0; k < output_.dims[2]; ++k) { // for each output y
53 |                 for (int l = 0; l < output_.dims[3]; ++l) { // for each output x
54 |                     double chain_grad = chainGradient.get(i, j, k, l);
55 |                     int index = indexes.get(i, j, k, l);
56 |                     int m = index / size_;
57 |                     int n = index % size_;
58 |                     int input_y = k * stride_ + m;
59 |                     int input_x = l * stride_ + n;
60 |                     input_gradient.set(i, j, input_y, input_x, chain_grad);
61 |                 }
62 |             }
63 |         }
64 |     }
65 | 
66 |     return input_gradient;
67 | }
68 | 
69 | void MaxPool::load(FILE *file_model) {
70 | 
71 | }
72 | 
73 | void MaxPool::save(FILE *file_model) {
74 | 
75 | }
76 | 


--------------------------------------------------------------------------------
/src/NetworkModel.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 10/04/19.
 3 | //
 4 | 
 5 | #include "../include/NetworkModel.h"
 6 | #include "../include/LRScheduler.h"
 7 | #include "../include/Tensor.h"
 8 | 
 9 | using namespace std;
10 | 
11 | NetworkModel::NetworkModel(std::vector<Module *> &modules, OutputLayer *output_layer, LRScheduler* lr_scheduler) {
12 |     modules_ = modules;
13 |     lr_scheduler_ = lr_scheduler;
14 |     output_layer_ = output_layer;
15 | }
16 | 
17 | double NetworkModel::trainStep(Tensor<double> &x, vector<int>& y) {
18 |     // Forward
19 |     Tensor<double> output = forward(x);
20 | 
21 |     //Backprop
22 |     pair<double, Tensor<double>> loss_and_cost_gradient = output_layer_->backprop(y);
23 |     Tensor<double> chain_gradient = loss_and_cost_gradient.second;
24 |     for (int i = (int) modules_.size() - 1; i >= 0; --i) {
25 |         chain_gradient = modules_[i]->backprop(chain_gradient, lr_scheduler_->learning_rate);
26 |     }
27 |     ++iteration;
28 |     lr_scheduler_->onIterationEnd(iteration);
29 |     // Return loss
30 |     return loss_and_cost_gradient.first;
31 | }
32 | 
33 | Tensor<double> NetworkModel::forward(Tensor<double> &x) {
34 |     for (auto &module : modules_) {
35 |         x = module->forward(x);
36 |     }
37 |     return output_layer_->predict(x);
38 | }
39 | 
40 | std::vector<int> NetworkModel::predict(Tensor<double> &x) {
41 |     Tensor<double> output = forward(x);
42 |     std::vector<int> predictions;
43 |     for (int i = 0; i < output.dims[0]; ++i) {
44 |         int argmax = -1;
45 |         double max = -1;
46 |         for (int j = 0; j < output.dims[1]; ++j) {
47 |             if (output.get(i, j) > max) {
48 |                 max = output.get(i, j);
49 |                 argmax = j;
50 |             }
51 |         }
52 |         predictions.push_back(argmax);
53 |     }
54 | 
55 |     return predictions;
56 | }
57 | 
58 | void NetworkModel::load(std::string path) {
59 |     FILE *model_file = fopen(path.c_str(), "r");
60 |     if (!model_file) {
61 |         throw std::runtime_error("Error reading model file.");
62 |     }
63 |     for (auto &module : modules_) {
64 |         module->load(model_file);
65 |     }
66 | }
67 | 
68 | void NetworkModel::save(std::string path) {
69 |     FILE *model_file = fopen(path.c_str(), "w");
70 |     if (!model_file) {
71 |         throw std::runtime_error("Error reading model file.");
72 |     }
73 |     for (auto &module : modules_) {
74 |         module->save(model_file);
75 |     }
76 | }
77 | 
78 | NetworkModel::~NetworkModel() {
79 |     for (auto &module : modules_) {
80 |         delete module;
81 |     }
82 |     delete output_layer_;
83 |     delete lr_scheduler_;
84 | }
85 | 
86 | void NetworkModel::eval() {
87 |     for (auto &module : modules_) {
88 |         module->eval();
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/src/ReLU.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 11/04/19.
 3 | //
 4 | 
 5 | #include "../include/ReLU.h"
 6 | 
 7 | ReLU::ReLU() = default;
 8 | 
 9 | Tensor<double> &ReLU::forward(Tensor<double> &input) {
10 |     input_ = input;
11 |     product_ = input.relu();
12 | 
13 |     return product_;
14 | }
15 | 
16 | Tensor<double> ReLU::backprop(Tensor<double> chainGradient, double learning_rate) {
17 |     return chainGradient * input_.reluPrime();
18 | }
19 | 
20 | void ReLU::load(FILE *file_model) {
21 | 
22 | }
23 | 
24 | void ReLU::save(FILE *file_model) {
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/src/Sigmoid.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 10/04/19.
 3 | //
 4 | 
 5 | #include "../include/Sigmoid.h"
 6 | 
 7 | Sigmoid::Sigmoid() = default;
 8 | 
 9 | 
10 | Tensor<double> &Sigmoid::forward(Tensor<double> &input) {
11 |     input_ = input;
12 |     product_ = input.sigmoid();
13 | 
14 |     return product_;
15 | }
16 | 
17 | Tensor<double> Sigmoid::backprop(Tensor<double> chainGradient, double learning_rate) {
18 |     return chainGradient * input_.sigmoidPrime();
19 | }
20 | 
21 | void Sigmoid::load(FILE *file_model) {
22 | 
23 | }
24 | 
25 | void Sigmoid::save(FILE *file_model) {
26 | 
27 | }
28 | 
29 | // Sigmoid::~Sigmoid() {
30 | // }


--------------------------------------------------------------------------------
/src/SoftmaxClassifier.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by lucas on 10/04/19.
 3 | //
 4 | 
 5 | #include "../include/SoftmaxClassifier.h"
 6 | #include "../include/Tensor.h"
 7 | 
 8 | Tensor<double> SoftmaxClassifier::predict(Tensor<double> input) {
 9 |     output_ = input.softmax();
10 |     return output_;
11 | }
12 | 
13 | std::pair<double, Tensor<double>> SoftmaxClassifier::backprop(std::vector<int> ground_truth) {
14 |     double loss = crossEntropy(output_, ground_truth);
15 |     Tensor<double> gradient = crossEntropyPrime(output_, ground_truth);
16 | 
17 |     return std::make_pair(loss, gradient);
18 | }
19 | 
20 | 
21 | Tensor<double> SoftmaxClassifier::crossEntropyPrime(Tensor<double> &output, std::vector<int> &y) {
22 |     Tensor<double> prime = output;
23 |     for (int i = 0; i < y.size(); ++i) {
24 |         prime.set(i, y[i], prime.get(i, y[i]) - 1);
25 |     }
26 | 
27 |     return prime / output.dims[0];
28 | }
29 | 
30 | 
31 | double SoftmaxClassifier::crossEntropy(Tensor<double> &y_hat, std::vector<int> &y) {
32 |     double total = 0;
33 |     for (int i = 0; i < y.size(); ++i) {
34 |         double x = y_hat.get(i, y[i]);
35 |         // Sets a minimum value to prevent division by zero (log(0))
36 |         total += -log(x < 0.0000000001 ? 0.0000000001 : x);
37 |     }
38 | 
39 |     return total / y.size(); // batch-wise mean
40 | }


--------------------------------------------------------------------------------
/src/Tensor.cpp:
--------------------------------------------------------------------------------
  1 | //
  2 | // Created by lucas on 12/04/19.
  3 | //
  4 | 
  5 | #include "../include/Tensor.h"
  6 | #include <cstring> // memset
  7 | 
  8 | 
  9 | template
 10 | class Tensor<int>;
 11 | 
 12 | template
 13 | class Tensor<float>;
 14 | 
 15 | template
 16 | class Tensor<double>;
 17 | 
 18 | 
 19 | template<typename T>
 20 | void Tensor<T>::zero() {
 21 |     memset(data_, 0, sizeof(T) * size_);
 22 | }
 23 | 
 24 | template<typename T>
 25 | T Tensor<T>::get(int i, int j) {
 26 |     assert(num_dims == 2);
 27 |     return data_[j + i * dims[1]];
 28 | }
 29 | 
 30 | template<typename T>
 31 | T Tensor<T>::get(int i) {
 32 |     assert(num_dims == 1);
 33 |     return data_[i];
 34 | }
 35 | 
 36 | template<typename T>
 37 | void Tensor<T>::set(int i, int j, T value) {
 38 |     assert(num_dims == 2);
 39 |     data_[j + i * dims[1]] = value;
 40 | }
 41 | 
 42 | template<typename T>
 43 | void Tensor<T>::set(int i, T value) {
 44 |     data_[i] = value;
 45 | }
 46 | 
 47 | 
 48 | template<typename T>
 49 | void Tensor<T>::add(int i, T value) {
 50 |     data_[i] += value;
 51 | }
 52 | 
 53 | template<typename T>
 54 | void Tensor<T>::view(int new_num_dims, int *new_dims) {
 55 |     assert(new_num_dims > 0 && new_num_dims <= 4);
 56 |     this->num_dims = new_num_dims;
 57 |     std::copy(new_dims, new_dims + 4, this->dims);
 58 | }
 59 | 
 60 | template<typename T>
 61 | Tensor<T>::Tensor(int num_dims, int const *dims) {
 62 |     assert(num_dims > 0 && num_dims <= 4);
 63 |     int size = 1;
 64 |     for (int i = 0; i < num_dims; ++i) {
 65 |         size *= dims[i];
 66 |         this->dims[i] = dims[i];
 67 |     }
 68 |     size_ = size;
 69 | //    std::shared_ptr<T[]> data_sp(new T[size_]);
 70 |     T *data_sp = new T[size_];
 71 |     data_ = data_sp;
 72 |     this->num_dims = num_dims;
 73 | }
 74 | 
 75 | template<typename T>
 76 | T Tensor<T>::get(int i, int j, int k) {
 77 |     assert(num_dims == 3);
 78 |     return data_[k + j * dims[2] + i * dims[1] * dims[2]];
 79 | }
 80 | 
 81 | template<typename T>
 82 | T Tensor<T>::get(int i, int j, int k, int l) {
 83 |     assert(num_dims == 4);
 84 |     return data_[l + k * dims[3] + j * dims[2] * dims[3] + i * dims[1] * dims[2] * dims[3]];
 85 | }
 86 | 
 87 | template<typename T>
 88 | void Tensor<T>::set(int i, int j, int k, T value) {
 89 |     assert(num_dims == 3);
 90 |     data_[k + j * dims[2] + i * dims[1] * dims[2]] = value;
 91 | }
 92 | 
 93 | template<typename T>
 94 | void Tensor<T>::set(int i, int j, int k, int l, T value) {
 95 |     assert(num_dims == 4);
 96 |     data_[l + k * dims[3] + j * dims[2] * dims[3] + i * dims[1] * dims[2] * dims[3]] = value;
 97 | }
 98 | 
 99 | template<typename T>
100 | void Tensor<T>::add(int i, int j, int k, int l, T value) {
101 |     assert(num_dims == 4);
102 |     data_[l + k * dims[3] + j * dims[2] * dims[3] + i * dims[1] * dims[2] * dims[3]] += value;
103 | }
104 | 
105 | template<typename T>
106 | Tensor<T>::Tensor(const Tensor<T> &other) : size_(other.size_), num_dims(other.num_dims),
107 |                                             data_(new T[other.size_]) {
108 |     std::copy(other.data_, other.data_ + size_, data_);
109 |     std::copy(other.dims, other.dims + 4, dims);
110 | }
111 | 
112 | template<typename T>
113 | Tensor<T>::~Tensor() {
114 |     delete[] data_;
115 | }
116 | 
117 | 
118 | template<typename T>
119 | Tensor<T> Tensor<T>::matmul(Tensor<T> other) {
120 |     assert(num_dims == 2 && other.num_dims == 2);
121 |     assert(dims[1] == other.dims[0]);
122 | 
123 |     int new_dims[] = {dims[0], other.dims[1]};
124 |     Tensor<T> product(2, new_dims);
125 |     for (int i = 0; i < this->dims[0]; ++i) {
126 |         for (int j = 0; j < other.dims[1]; ++j) {
127 |             T value = 0;
128 |             for (int k = 0; k < other.dims[0]; ++k) {
129 |                 value += this->get(i, k) * other.get(k, j);
130 |             }
131 |             product.set(i, j, value);
132 |         }
133 |     }
134 |     return product;
135 | }
136 | 
137 | template<typename T>
138 | Tensor<T> Tensor<T>::matrixTranspose() {
139 |     assert(num_dims == 2);
140 |     int new_dims[] = {dims[1], dims[0]};
141 |     Tensor<T> transpose(num_dims, new_dims);
142 |     for (int i = 0; i < dims[0]; ++i) {
143 |         for (int j = 0; j < dims[1]; ++j) {
144 |             transpose.set(j, i, get(i, j));
145 |         }
146 |     }
147 | 
148 |     return transpose;
149 | }
150 | 
151 | 
152 | template<typename T>
153 | Tensor<T> Tensor<T>::relu() {
154 |     Tensor<T> result(num_dims, dims);
155 |     for (int i = 0; i < size_; ++i) {
156 |         T x = data_[i];
157 |         result.data_[i] = x > 0 ? x : 0;
158 |     }
159 | 
160 |     return result;
161 | }
162 | 
163 | template<typename T>
164 | T sigmoid(T x) {
165 |     return 1.0 / (1.0 + exp(-x));
166 | }
167 | 
168 | template<typename T>
169 | Tensor<T> Tensor<T>::sigmoid() {
170 |     Tensor<T> result(num_dims, dims);
171 |     for (int i = 0; i < size_; ++i) {
172 |         T x = data_[i];
173 |         result.data_[i] = ::sigmoid(x);
174 |     }
175 | 
176 |     return result;
177 | }
178 | 
179 | template<typename T>
180 | T sigmoidPrime(T x) {
181 |     return sigmoid(x) * (1.0 - sigmoid(x));
182 | }
183 | 
184 | template<typename T>
185 | Tensor<T> Tensor<T>::sigmoidPrime() {
186 |     Tensor<T> result(num_dims, dims);
187 |     for (int i = 0; i < size_; ++i) {
188 |         T x = data_[i];
189 |         result.data_[i] = ::sigmoidPrime(x);
190 |     }
191 | 
192 |     return result;
193 | }
194 | 
195 | template<typename T>
196 | T Tensor<T>::sum() {
197 |     T total = 0;
198 |     for (int i = 0; i < size_; ++i) {
199 |         total += data_[i];
200 |     }
201 |     return 0;
202 | }
203 | 
204 | template<typename T>
205 | Tensor<T> Tensor<T>::softmax() {
206 |     assert(num_dims == 2);
207 |     //Softmax with max trick to avoid overflows
208 |     int rows = dims[0], cols = dims[1];
209 |     Tensor<T> probabilities(2, dims);
210 |     for (int i = 0; i < rows; ++i) {
211 |         T row_max = -1; // useless value so my IDE stops screaming at me, will always be replaced
212 |         for (int j = 0; j < cols; ++j) {
213 |             if (j == 0 || get(i, j) > row_max) {
214 |                 row_max = get(i, j);
215 |             }
216 |         }
217 | 
218 |         T denominator = 0;
219 |         for (int j = 0; j < cols; ++j) {
220 |             T x = get(i, j);
221 |             denominator += exp(get(i, j) - row_max);
222 |         }
223 | 
224 | 
225 |         for (int j = 0; j < cols; ++j) {
226 |             probabilities.set(i, j, exp(get(i, j) - row_max) / denominator);
227 |         }
228 | 
229 |     }
230 |     return probabilities;
231 | }
232 | 
233 | template<typename T>
234 | Tensor<T> Tensor<T>::reluPrime() {
235 |     Tensor<T> prime(num_dims, dims);
236 |     for (int i = 0; i < size_; ++i) {
237 |         prime.data_[i] = data_[i] > 0 ? 1 : 0;
238 |     }
239 |     return prime;
240 | }
241 | 
242 | template<typename T>
243 | Tensor<T> Tensor<T>::operator+(Tensor<T> &other) {
244 |     if (other.num_dims == 1 && other.size_ == this->dims[1] && num_dims == 2) {
245 |         // if other is a 1d tensor and this is a 2d tensor
246 |         Tensor<T> sum(num_dims, dims);
247 |         for (int k = 0; k < this->dims[0]; ++k) {
248 |             for (int j = 0; j < this->dims[1]; ++j) {
249 |                 sum.set(k, j, get(k, j) + other.get(j));
250 |             }
251 |         }
252 | 
253 | 
254 |         return sum;
255 |     } else if (other.num_dims == num_dims && other.size_ == size_) {
256 |         Tensor<T> sum(num_dims, dims);
257 |         for (int i = 0; i < size_; ++i) {
258 |             sum.data_[i] = data_[i] + other.data_[i];
259 |         }
260 |         return sum;
261 |     }
262 |     throw std::logic_error("Undefined sum");
263 | }
264 | 
265 | 
266 | template<typename T>
267 | Tensor<T> Tensor<T>::operator*(Tensor<T> other) {
268 |     assert(size_ == other.size_);
269 |     Tensor<T> product(num_dims, dims);
270 |     for (int i = 0; i < size_; ++i) {
271 |         product.data_[i] = data_[i] * other.data_[i];
272 |     }
273 |     return product;
274 | }
275 | 
276 | template<typename T>
277 | Tensor<T> Tensor<T>::operator*(T multiplier) {
278 |     Tensor<T> product(num_dims, dims);
279 |     for (int i = 0; i < size_; ++i) {
280 |         product.data_[i] = data_[i] * multiplier;
281 |     }
282 |     return product;
283 | }
284 | 
285 | template<typename T>
286 | Tensor<T> Tensor<T>::operator/(T divisor) {
287 |     Tensor<T> quotient(num_dims, dims);
288 |     for (int i = 0; i < size_; ++i) {
289 |         quotient.data_[i] = data_[i] / divisor;
290 |     }
291 |     return quotient;
292 | }
293 | 
294 | template<typename T>
295 | Tensor<T> Tensor<T>::operator-=(Tensor<T> difference) {
296 |     assert(size_ == difference.size_);
297 |     for (int i = 0; i < size_; ++i) {
298 |         data_[i] = data_[i] - difference.data_[i];
299 |     }
300 |     return *this;
301 | }
302 | 
303 | template<typename T>
304 | Tensor<T> Tensor<T>::columnWiseSum() {
305 |     assert(num_dims == 2);
306 |     int rows = dims[0], cols = dims[1];
307 |     int sum_dims[] = {cols};
308 |     Tensor<T> sum(1, sum_dims);
309 |     for (int i = 0; i < cols; ++i) {
310 |         T total = 0;
311 |         for (int j = 0; j < rows; ++j) {
312 |             total += get(j, i);
313 |         }
314 |         sum.set(i, total);
315 |     }
316 |     return sum;
317 | }
318 | 
319 | template<>
320 | void
321 | Tensor<double>::randn(std::default_random_engine generator, std::normal_distribution<double> distribution,
322 |                       double multiplier) {
323 |     for (int i = 0; i < size_; ++i) {
324 |         data_[i] = distribution(generator) * multiplier;
325 |     }
326 | }
327 | 
328 | template<>
329 | void Tensor<double>::print() {
330 |     if (num_dims == 2) {
331 |         int rows = dims[0], cols = dims[1];
332 |         std::cout << "Tensor2D (" << rows << ", " << cols << ")\n[";
333 |         for (int i = 0; i < rows; ++i) {
334 |             if (i != 0) std::cout << " ";
335 |             std::cout << "[";
336 |             for (int j = 0; j < cols; ++j) {
337 |                 if (j == (cols - 1)) {
338 |                     printf("%.18lf", get(i, j));
339 |                 } else {
340 |                     printf("%.18lf ", get(i, j));
341 |                 }
342 | 
343 |             }
344 |             if (i == (rows - 1)) {
345 |                 std::cout << "]]\n";
346 |             } else {
347 |                 std::cout << "]\n";
348 |             }
349 |         }
350 |     } else {
351 |         printf("Tensor%dd (", num_dims);
352 |         for (int i = 0; i < num_dims; ++i) {
353 |             printf("%d", dims[i]);
354 |             if (i != (num_dims - 1)) {
355 |                 printf(",");
356 |             }
357 |         }
358 |         printf(")\n[");
359 |         for (int j = 0; j < size_; ++j) {
360 |             printf("%lf ", data_[j]);
361 |         }
362 |         printf("]\n");
363 |     }
364 | }
365 | 
366 | template<typename T>
367 | Tensor<T> &Tensor<T>::operator=(const Tensor<T> &other) {
368 |     if (this != &other) {
369 |         T *new_data = new T[other.size_];
370 |         std::copy(other.data_, other.data_ + other.size_, new_data);
371 |         if (size_ != -1) {
372 |             delete[] data_;
373 |         }
374 |         size_ = other.size_;
375 |         std::copy(other.dims, other.dims + 4, dims);
376 |         num_dims = other.num_dims;
377 |         data_ = new_data;
378 |     }
379 | 
380 |     return *this;
381 | }
382 | 
383 | template<typename T>
384 | void Tensor<T>::dropout(std::default_random_engine generator, std::uniform_real_distribution<> distribution, double p) {
385 |     for (int i = 0; i < size_; ++i) {
386 |         data_[i] = (distribution(generator) < p) / p;
387 |     }
388 | }
389 | 
390 | template<typename T>
391 | Tensor<T> Tensor<T>::convolve2d(Tensor<T> kernels, int stride, int padding, Tensor<T> bias) {
392 |     assert(kernels.dims[1] == dims[1]);
393 |     int w = ((dims[3] + 2 * padding - (kernels.dims[3] - 1) - 1) / stride) + 1;
394 |     int h = ((dims[2] + 2 * padding - (kernels.dims[2] - 1) - 1) / stride) + 1;
395 |     int result_dims[] = {dims[0], kernels.dims[0], h, w};
396 |     Tensor<T> output(4, result_dims);
397 |     for (int i = 0; i < dims[0]; ++i) { // pra cada img do batch
398 |         for (int j = 0; j < kernels.dims[0]; ++j) { // pra cada output volume
399 |             for (int k = 0; k < h; ++k) { // pra cada k vertical no output volume
400 |                 for (int l = 0; l < w; ++l) { // pra cada l horizontal no output volume
401 |                     int im_si = stride * k - padding;
402 |                     int im_sj = stride * l - padding;
403 |                     T total = 0;
404 |                     for (int m = 0; m < kernels.dims[1]; ++m) { // pra cada canal do filtro
405 |                         for (int n = 0; n < kernels.dims[2]; ++n) {
406 |                             for (int o = 0; o < kernels.dims[3]; ++o) {
407 |                                 int x = im_si + n, y = im_sj + o;
408 |                                 if (x < 0 || x >= dims[2] || y < 0 || y >= dims[3])
409 |                                     continue; // se for regiao do padding, pula (soma 0)
410 |                                 T a = get(i, m, x, y);
411 |                                 T b = kernels.get(j, m, n, o);
412 |                                 total += a * b;
413 |                             }
414 |                         }
415 |                     }
416 |                     output.set(i, j, k, l, total + bias.get(j));
417 |                 }
418 |             }
419 |         }
420 |     }
421 |     return output;
422 | }


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include "../include/NetworkModel.h"
 3 | #include "../include/Module.h"
 4 | #include "../include/FullyConnected.h"
 5 | #include "../include/Sigmoid.h"
 6 | #include "../include/Dropout.h"
 7 | #include "../include/SoftmaxClassifier.h"
 8 | #include "../include/MNISTDataLoader.h"
 9 | #include "../include/ReLU.h"
10 | #include "../include/Tensor.h"
11 | #include "../include/Conv2d.h"
12 | #include "../include/MaxPool.h"
13 | #include "../include/LinearLRScheduler.h"
14 | 
15 | using namespace std;
16 | 
17 | /*
18 |  * Train a neural network on the MNIST data set and evaluate its performance
19 |  */
20 | 
21 | int main(int argc, char **argv) {
22 |     if (argc < 2) {
23 |         throw runtime_error("Please provide the data directory path as an argument");
24 |     }
25 |     printf("Data directory: %s\n", argv[1]);
26 |     string data_path = argv[1];
27 | 
28 |     printf("Loading training set... ");
29 |     fflush(stdout);
30 |     MNISTDataLoader train_loader(data_path + "/train-images-idx3-ubyte", data_path + "/train-labels-idx1-ubyte", 32);
31 |     printf("Loaded.\n");
32 | 
33 |     int seed = 0;
34 |     vector<Module *> modules = {new Conv2d(1, 8, 3, 1, 0, seed), new MaxPool(2, 2), new ReLU(), new FullyConnected(1352, 30, seed), new ReLU(),
35 |                                 new FullyConnected(30, 10, seed)};
36 |     auto lr_sched = new LinearLRScheduler(0.2, -0.000005);
37 |     NetworkModel model = NetworkModel(modules, new SoftmaxClassifier(), lr_sched);
38 | //    model.load("network.txt");
39 | 
40 |     int epochs = 1;
41 |     printf("Training for %d epoch(s).\n", epochs);
42 |     // Train network
43 |     int num_train_batches = train_loader.getNumBatches();
44 |     for (int k = 0; k < epochs; ++k) {
45 |         printf("Epoch %d\n", k + 1);
46 |         for (int i = 0; i < num_train_batches; ++i) {
47 |             pair<Tensor<double>, vector<int> > xy = train_loader.nextBatch();
48 |             double loss = model.trainStep(xy.first, xy.second);
49 |             if ((i + 1) % 10 == 0) {
50 |                 printf("\rIteration %d/%d - Batch Loss: %.4lf", i + 1, num_train_batches, loss);
51 |                 fflush(stdout);
52 |             }
53 |         }
54 |         printf("\n");
55 |     }
56 |     // Save weights
57 |     model.save("network.txt");
58 | 
59 |     printf("Loading testing set... ");
60 |     fflush(stdout);
61 |     MNISTDataLoader test_loader(data_path + "/t10k-images-idx3-ubyte", data_path + "/t10k-labels-idx1-ubyte", 32);
62 |     printf("Loaded.\n");
63 | 
64 |     model.eval();
65 | 
66 |     // Test and measure accuracy
67 |     int hits = 0;
68 |     int total = 0;
69 |     printf("Testing...\n");
70 |     int num_test_batches = test_loader.getNumBatches();
71 |     for (int i = 0; i < num_test_batches; ++i) {
72 |         if ((i + 1) % 10 == 0 || i == (num_test_batches - 1)) {
73 |             printf("\rIteration %d/%d", i + 1, num_test_batches);
74 |             fflush(stdout);
75 |         }
76 |         pair<Tensor<double>, vector<int> > xy = test_loader.nextBatch();
77 |         vector<int> predictions = model.predict(xy.first);
78 |         for (int j = 0; j < predictions.size(); ++j) {
79 |             if (predictions[j] == xy.second[j]) {
80 |                 hits++;
81 |             }
82 |         }
83 |         total += xy.second.size();
84 |     }
85 |     printf("\n");
86 | 
87 |     printf("Accuracy: %.2f%% (%d/%d)\n", ((double) hits * 100) / total, hits, total);
88 | 
89 |     return 0;
90 | }


--------------------------------------------------------------------------------