├── .gitignore
├── CMakeLists.txt
├── FindEigen3.cmake
├── LICENSE
├── README.md
├── mnist.cpp
├── nn.cpp
├── nn.h
├── nntest.cpp
└── tutorial.cpp


/.gitignore:
--------------------------------------------------------------------------------
1 | .*
2 | build
3 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
 2 | IF(COMMAND CMAKE_POLICY)
 3 |     CMAKE_POLICY(SET CMP0003 OLD)
 4 | ENDIF(COMMAND CMAKE_POLICY)
 5 | 
 6 | PROJECT( nn )
 7 | 
 8 | SET(CMAKE_BUILD_TYPE "Release")
 9 | SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR})
10 | SET(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR} CACHE INTERNAL "Prefix prepended to install directories" FORCE)
11 | 
12 | # color definitions
13 | IF(NOT WIN32)
14 |     STRING(ASCII 27 Esc)
15 |     SET(ColourReset "${Esc}[m")
16 |     SET(BoldRed     "${Esc}[1;31m")
17 |     SET(BoldGreen   "${Esc}[1;32m")
18 |     SET(BoldMagenta "${Esc}[1;35m")
19 | ENDIF()
20 | 
21 | FIND_PACKAGE(OpenMP QUIET)
22 | 
23 | FIND_PACKAGE(Eigen3 REQUIRED)
24 | 
25 | INCLUDE_DIRECTORIES(${EIGEN3_INCLUDE_DIR})
26 | 
27 | ADD_LIBRARY(nn nn.cpp)
28 | 
29 | ADD_EXECUTABLE(tutorial tutorial.cpp)
30 | TARGET_LINK_LIBRARIES(tutorial nn)
31 | 
32 | ADD_EXECUTABLE(mnist mnist.cpp)
33 | TARGET_LINK_LIBRARIES(mnist nn)
34 | 
35 | MESSAGE("")
36 | 
37 | # ----- OpenMP -----
38 | IF (OPENMP_FOUND)
39 | 
40 |     SET (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
41 |     SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
42 |     MESSAGE("${BoldGreen}nn will be compiled with OpenMP support. ${ColourReset}" )
43 | 
44 | ELSE()
45 | 
46 |     MESSAGE("${BoldMagenta}nn will run in single core mode since OpenMP was not found.${ColourReset}" )
47 | 
48 | ENDIF()
49 | 
50 | # ----- Testing -----
51 | OPTION(WITH_GTEST "Download and compile unit tests using googletest. " OFF) 
52 | 
53 | IF (${WITH_GTEST})
54 | 
55 |     ENABLE_TESTING()
56 | 
57 |     INCLUDE(ExternalProject)
58 | 
59 |     # ----- Download and build gtest -----
60 |     ExternalProject_Add(googletest
61 |         SVN_REPOSITORY "http://googletest.googlecode.com/svn/tags/release-1.7.0"
62 |         UPDATE_COMMAND ""
63 |         INSTALL_COMMAND ""
64 |         )
65 | 
66 |     INCLUDE_DIRECTORIES (${CMAKE_CURRENT_BINARY_DIR}/googletest-prefix/src/googletest/include)
67 |     LINK_DIRECTORIES (${CMAKE_CURRENT_BINARY_DIR}/googletest-prefix/src/googletest-build)
68 | 
69 |     ADD_EXECUTABLE(nntest nntest.cpp)
70 |     TARGET_LINK_LIBRARIES(nntest nn gtest gtest_main pthread)
71 | 
72 |     MESSAGE("${BoldGreen}Compiling tests using googletest.${ColourReset}" )
73 |     ADD_TEST( nntest nntest )
74 | 
75 | ELSE()
76 | 
77 |     MESSAGE("${BoldMagenta}Tests will not be compiled. Run cmake with -DWITH_GTEST=ON to compile unit tests.${ColourReset}" )
78 | 
79 | ENDIF()
80 | 
81 | MESSAGE("")
82 | 


--------------------------------------------------------------------------------
/FindEigen3.cmake:
--------------------------------------------------------------------------------
 1 | # - Try to find Eigen3 lib
 2 | #
 3 | # This module supports requiring a minimum version, e.g. you can do
 4 | #   find_package(Eigen3 3.1.2)
 5 | # to require version 3.1.2 or newer of Eigen3.
 6 | #
 7 | # Once done this will define
 8 | #
 9 | #  EIGEN3_FOUND - system has eigen lib with correct version
10 | #  EIGEN3_INCLUDE_DIR - the eigen include directory
11 | #  EIGEN3_VERSION - eigen version
12 | 
13 | # Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
14 | # Copyright (c) 2008, 2009 Gael Guennebaud, <g.gael@free.fr>
15 | # Copyright (c) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
16 | # Redistribution and use is allowed according to the terms of the 2-clause BSD license.
17 | 
18 | if(NOT Eigen3_FIND_VERSION)
19 |   if(NOT Eigen3_FIND_VERSION_MAJOR)
20 |     set(Eigen3_FIND_VERSION_MAJOR 2)
21 |   endif(NOT Eigen3_FIND_VERSION_MAJOR)
22 |   if(NOT Eigen3_FIND_VERSION_MINOR)
23 |     set(Eigen3_FIND_VERSION_MINOR 91)
24 |   endif(NOT Eigen3_FIND_VERSION_MINOR)
25 |   if(NOT Eigen3_FIND_VERSION_PATCH)
26 |     set(Eigen3_FIND_VERSION_PATCH 0)
27 |   endif(NOT Eigen3_FIND_VERSION_PATCH)
28 | 
29 |   set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}")
30 | endif(NOT Eigen3_FIND_VERSION)
31 | 
32 | macro(_eigen3_check_version)
33 |   file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header)
34 | 
35 |   string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}")
36 |   set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}")
37 |   string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}")
38 |   set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}")
39 |   string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}")
40 |   set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}")
41 | 
42 |   set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION})
43 |   if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
44 |     set(EIGEN3_VERSION_OK FALSE)
45 |   else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
46 |     set(EIGEN3_VERSION_OK TRUE)
47 |   endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
48 | 
49 |   if(NOT EIGEN3_VERSION_OK)
50 | 
51 |     message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, "
52 |                    "but at least version ${Eigen3_FIND_VERSION} is required")
53 |   endif(NOT EIGEN3_VERSION_OK)
54 | endmacro(_eigen3_check_version)
55 | 
56 | if (EIGEN3_INCLUDE_DIR)
57 | 
58 |   # in cache already
59 |   _eigen3_check_version()
60 |   set(EIGEN3_FOUND ${EIGEN3_VERSION_OK})
61 | 
62 | else (EIGEN3_INCLUDE_DIR)
63 | 
64 |   find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library
65 |       PATHS
66 |       ${CMAKE_INSTALL_PREFIX}/include
67 |       ${KDE4_INCLUDE_DIR}
68 |       PATH_SUFFIXES eigen3 eigen
69 |     )
70 | 
71 |   if(EIGEN3_INCLUDE_DIR)
72 |     _eigen3_check_version()
73 |   endif(EIGEN3_INCLUDE_DIR)
74 | 
75 |   include(FindPackageHandleStandardArgs)
76 |   find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK)
77 | 
78 |   mark_as_advanced(EIGEN3_INCLUDE_DIR)
79 | 
80 | endif(EIGEN3_INCLUDE_DIR)
81 | 
82 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Manuel Blum
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 |     * Redistributions of source code must retain the above copyright
 7 |       notice, this list of conditions and the following disclaimer.
 8 |     * Redistributions in binary form must reproduce the above copyright
 9 |       notice, this list of conditions and the following disclaimer in the
10 |       documentation and/or other materials provided with the distribution.
11 |     * Neither the name of the <organization> nor the
12 |       names of its contributors may be used to endorse or promote products
13 |       derived from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Multi-layer perceptrons using RPROP
  2 | ===================================
  3 | 
  4 | **nn** is a lightweight neural network library using resilient propagation for adapting the weights
  5 | 
  6 | Installation
  7 | ------------
  8 | 
  9 | **nn** was tested on Ubuntu, Arch Linux and MacOS
 10 | 
 11 | * install [CMake](http://http://www.cmake.org/), [Eigen3](http://eigen.tuxfamily.org/) and [subversion](http://subversion.tigris.org/). On Ubuntu this is done as follows:
 12 | 
 13 |         sudo apt-get install cmake subversion libeigen3-dev
 14 | 
 15 | * clone the **nn** repository or download it [here](https://bitbucket.org/mblum/nn/get/master.tar.gz)
 16 | * change to the **nn** directory and create a build folder 
 17 |   
 18 |         cd path/to/nn
 19 |         mkdir build
 20 | 
 21 | * run cmake from within the build folder and compile the library using make
 22 |       
 23 |         cd build
 24 |         cmake ..
 25 |         make
 26 | 
 27 | * run the example code
 28 | 
 29 |         ./tutorial
 30 | 
 31 | * to compile unit tests for **nn**, run cmake with the option `-DWITH_GTEST=ON`
 32 | 
 33 |         cmake .. -DWITH_GTEST=ON
 34 |         make
 35 |         make test
 36 | 
 37 | 
 38 | License
 39 | -------
 40 | 
 41 | **nn** is free software, licensed under the BSD license. A copy of this license is distributed with the software.
 42 | 
 43 | Usage of the library
 44 | --------------------
 45 | 
 46 | The source code for this tutorial can be found in `tutorial.cpp`.
 47 | 
 48 | ### Preparing your data
 49 | 
 50 | Organize your training data into a *(m x n_input)* matrix containing the training inputs. Each row of this matrix corresponds to a training sample and each column to a feature. Prepare a matrix of size *(m x n_output)* containing the target values, where *n_output* is the number of dimensions of the output. 
 51 | 
 52 |     matrix_t X(m, n_input);
 53 |     matrix_t Y(m, n_output);
 54 | 
 55 |     // fill with data
 56 | 
 57 | ### Initializing the neural network
 58 | 
 59 | This neural network implementation only supports fully connected feed forward multi-layer perceptrons (MLPs) with sigmoidal activation functions. The neurons are organized into *k* layers. There is at least one input layer, one output layer and an arbitrary number of hidden layers. Each neuron has outgoing connections to all neurons in the subsequent layer. The number of neurons in the input and the output layer is given by the dimensionality of the training data. After specifying the network topology you can create the `NeuralNet` object. The weights will be initialized randomly.
 60 | 
 61 |     Eigen::VectorXi topo(k);
 62 |     topo << n_input, n1, n2, ..., n_output;
 63 | 
 64 |     // initialize a neural network with given topology
 65 |     NeuralNet nn(topo);
 66 | 
 67 | ### Scaling the data
 68 | 
 69 | When working with MLPs you should always scale your data, such that all the features are in the same range and the output values are between 0 and 1. You can do this by passing your training data to the `autoscale` function, which computes the optimal mapping. After calling `autoscale` this mapping will be performed automatically, so you only have to do this once. To reset the scaling parameters to standard values call `autoscale_reset`. 
 70 | 
 71 |     nn.autoscale(X,Y);
 72 | 
 73 | ### Training the network
 74 | 
 75 | Alternate between computing the quadratic loss of the MLP and adapting the parameters until the loss converges. You can also specify a regularization parameter *lambda*, which punishes large weights and thereby avoids overfitting.
 76 | 
 77 |     for (int i = 0; i < max_steps; ++i) {
 78 |         err = nn.loss(X, Y, lambda);
 79 |         nn.rprop();
 80 |     }
 81 | 
 82 | ### Making predictions
 83 | 
 84 | If you trained a model, you can make predictions on new data by passing it through the network and observe the activation on the output layer. 
 85 | 
 86 |     nn.forward_pass(X_test);
 87 |     matrix_t Y_test = nn.get_activation();
 88 | 
 89 | ### Reading and writing models to disk
 90 | 
 91 | You can read and write MLPs to binary files.
 92 | 
 93 |     // write model to disk
 94 |     nn.write(filename);
 95 |         
 96 |     // read model from disk
 97 |     NeuralNet nn(filename);
 98 | 
 99 | ### Changing the floating number precision
100 | 
101 | **nn** uses double precision floats by default. You can change this behaviour in the file `nn.h`.
102 | 
103 |     #define F_TYPE double
104 | 
105 | ### MNIST dataset
106 | 
107 | In order to test **nn** on the MNIST dataset, download the dataset from [here](http://yann.lecun.com/exdb/mnist/) and run the `mnist` tool. 
108 | 
109 |     ./mnist path/to/data
110 | 
111 | The tool will train a MLP with two hidden layers, containing 300 and 100 neurons respectively connected by 266.610 weights. Using this setup error rates below 5% are accomplished on the test dataset.
112 | 
113 | ### Make nn run in parallel ###
114 | 
115 | Some algorithms of the Eigen library can exploit the multiple cores present in your hardware. This will happen automatically, if your compiler supports it. You can control the number of threads that will be used using by setting the OpenMP OMP_NUM_THREADS environment variable.
116 | 
117 |     OMP_NUM_THREADS=n ./my_program
118 | 
119 | ### Using **nn** in your own project
120 | 
121 | Just copy `nn.h` and `nn.cpp` into your workspace, make sure that the `Eigen` headers are found and start coding!
122 | 


--------------------------------------------------------------------------------
/mnist.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2013, Manuel Blum
  2 | // All rights reserved.
  3 | 
  4 | // Define this symbol to enable runtime tests for allocations
  5 | //#define EIGEN_RUNTIME_NO_MALLOC 
  6 | 
  7 | #include <Eigen/Dense>
  8 | #include <iostream>
  9 | #include <fstream>
 10 | #include <cstdio>
 11 | #include <string>
 12 | 
 13 | #include "nn.h"
 14 | 
 15 | inline void swap(int &val)
 16 | {
 17 | 	val = (val<<24) | ((val<<8) & 0x00ff0000) | ((val>>8) & 0x0000ff00) | (val>>24);
 18 | }
 19 | 
 20 | matrix_t read_mnist_images(std::string filename)
 21 | {
 22 |   matrix_t X;
 23 |   std::ifstream fs(filename.c_str(), std::ios::binary);
 24 |   if(fs) {
 25 |     int magic_number, num_images, num_rows, num_columns;
 26 |     fs.read((char*)&magic_number, sizeof(magic_number));
 27 |     fs.read((char*)&num_images, sizeof(num_images));
 28 |     fs.read((char*)&num_rows, sizeof(num_rows));
 29 |     fs.read((char*)&num_columns, sizeof(num_columns));
 30 |     if (magic_number != 2051) {
 31 |       swap(magic_number);
 32 |       swap(num_images);
 33 |       swap(num_rows);
 34 |       swap(num_columns);
 35 |     }
 36 | 
 37 |     X = matrix_t::Zero(num_images, num_rows*num_columns);
 38 | 
 39 |     for (size_t i=0; i<num_images; ++i) {
 40 |       for (size_t j=0; j<num_rows*num_columns; ++j) {
 41 |         unsigned char temp=0;
 42 |         fs.read((char*)&temp,sizeof(temp));
 43 |         X(i,j) = (double) temp;        
 44 |       }
 45 |     }
 46 |     fs.close();
 47 |   } else {
 48 |     std::cout << "error reading file: " << filename << std::endl;
 49 |     exit(1);
 50 |   }
 51 |   return X;
 52 | }
 53 | 
 54 | matrix_t read_mnist_labels(std::string  filename)
 55 | {
 56 |   matrix_t Y;
 57 |   std::ifstream fs(filename.c_str(), std::ios::binary);
 58 |   if(fs) {
 59 |     int magic_number, num_images, num_rows, num_columns;
 60 |     fs.read((char*)&magic_number, sizeof(magic_number));
 61 |     fs.read((char*)&num_images, sizeof(num_images));
 62 |     if (magic_number != 2049) {
 63 |       swap(magic_number);
 64 |       swap(num_images);
 65 |     }
 66 | 
 67 |     Y = matrix_t::Zero(num_images, 10);
 68 | 
 69 |     for (size_t i=0; i<num_images; ++i) {
 70 |       unsigned char temp=0;
 71 |       fs.read((char*)&temp,sizeof(temp));
 72 |       Y(i,(int) temp) = 1.0;        
 73 |     }
 74 |     fs.close();
 75 |   } else {
 76 |     std::cout << "error reading file: " << filename << std::endl;
 77 |     exit(1);
 78 |   }
 79 |   return Y;
 80 | }
 81 | 
 82 | int main (int argc, const char* argv[]) {
 83 | 
 84 |   if (argc != 2) {
 85 |     std::cout << "please provide path to mnist data ..." << std::endl;
 86 |     std::cout << "you can download the dataset at http://yann.lecun.com/exdb/mnist/" << std::endl;
 87 |     std::cout << std::endl << "usage: " << argv[0] << " path_to_data" << std::endl << std::endl;
 88 |     return 1;
 89 |   }
 90 | 
 91 |   std::string path = argv[1];
 92 | 
 93 |   std::cout << "reading data" << std::endl;
 94 | 
 95 |   matrix_t X_train = read_mnist_images(path + "/train-images-idx3-ubyte");
 96 |   matrix_t Y_train = read_mnist_labels(path + "/train-labels-idx1-ubyte");
 97 |   matrix_t X_test = read_mnist_images(path + "/t10k-images-idx3-ubyte");
 98 |   matrix_t Y_test = read_mnist_labels(path + "/t10k-labels-idx1-ubyte");
 99 | 
100 |   // number of optimization steps
101 |   int max_steps = 600;
102 |   // regularization parameter
103 |   double lambda = 0.0;
104 | 
105 |   // specify network topology
106 |   Eigen::VectorXi topo(3);
107 |   topo << X_train.cols(), 300, Y_test.cols();
108 |   std::cout << "topology: " << topo.transpose() << std::endl;
109 | 
110 |   // initialize a neural network with given topology
111 |   std::cout << "initializing network" << std::endl;
112 |   NeuralNet nn(topo);
113 | 
114 |   std::cout << "scaling the data" << std::endl;
115 |   nn.autoscale(X_train, Y_train);
116 |   
117 |   // train the network
118 |   std::cout << "starting training" << std::endl;
119 |   std::cout << "iter        error" << std::endl;
120 |   double err;
121 |   for (int i = 0; i < max_steps; ++i) {
122 |     err = nn.loss(X_train, Y_train, lambda);
123 |     nn.rprop();
124 |     printf("%4i   %10.7f\n", i, err);
125 |   }
126 | 
127 |   // test accuracy
128 |   nn.forward_pass(X_test);
129 |   matrix_t prediction = nn.get_activation();
130 |   int correct = 0;
131 |   int k;
132 |   for (size_t i=0; i<Y_test.rows(); ++i) {
133 |     prediction.row(i).maxCoeff(&k);
134 |     correct += Y_test(i, k);
135 |   }
136 | 
137 |   std::cout << "test accuracy: " << correct*1.0/Y_test.rows() << std::endl;
138 | 
139 |   nn.write("mnist.nn");
140 | 
141 |   return 0;
142 | }
143 | 
144 | 
145 | 
146 | 


--------------------------------------------------------------------------------
/nn.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2013, Manuel Blum
  2 | // All rights reserved.
  3 | 
  4 | #include <iostream>
  5 | #include <fstream>
  6 | #include <assert.h>
  7 | 
  8 | #include "nn.h"
  9 | 
 10 | 
 11 | // default parameters for Rprop
 12 | const RpropParams NeuralNet::p = {0.1, 50, 1e-6, 0.5, 1.2};
 13 | 
 14 | NeuralNet::NeuralNet(Eigen::VectorXi &topology) {
 15 |   assert(topology.size()>1);
 16 |   init_layer(topology);
 17 |   init_weights(0.5);
 18 |   autoscale_reset();
 19 | }
 20 | 
 21 | NeuralNet::NeuralNet(const char *filename) {
 22 |   std::ifstream fs(filename, std::ios::in | std::ios::binary);
 23 |   if (fs) {
 24 |     // number of layers
 25 |     int num_layers;
 26 |     fs.read((char *)&num_layers, sizeof(int));
 27 |     Eigen::VectorXi topology(num_layers);
 28 |     // topology
 29 |     fs.read((char *) topology.data(), topology.rows() * sizeof(int) );
 30 |     init_layer(topology);
 31 |     autoscale_reset();
 32 |     // scaling parameters
 33 |     fs.read((char *) Xscale.data(), Xscale.size() * sizeof(F_TYPE));
 34 |     fs.read((char *) Xshift.data(), Xshift.size() * sizeof(F_TYPE));
 35 |     fs.read((char *) Yscale.data(), Yscale.size() * sizeof(F_TYPE));
 36 |     fs.read((char *) Yshift.data(), Yshift.size() * sizeof(F_TYPE));
 37 |     // weights
 38 |     for (int i=1; i<layer.size(); ++i) {
 39 |       fs.read((char *) layer[i].W.data(), layer[i].W.rows() * layer[i].W.cols() * sizeof(F_TYPE));
 40 |       fs.read((char *) layer[i].b.data(), layer[i].b.size() * sizeof(F_TYPE));
 41 |     }
 42 |   }
 43 | }
 44 | 
 45 | void NeuralNet::init_layer(Eigen::VectorXi &topology) {
 46 |   // init input layer
 47 |   Layer l;
 48 |   l.size = topology(0);
 49 |   layer.push_back(l);
 50 |   // init hidden and output layer
 51 |   for (int i=1; i<topology.size(); ++i) {
 52 |     Layer l;
 53 |     l.size  = topology(i);    
 54 |     l.W.setZero(l.size, layer[i-1].size);    
 55 |     l.b.setZero(l.size);
 56 |     l.dEdW.setZero(l.size, layer[i-1].size);
 57 |     l.dEdb.setZero(l.size);
 58 |     // set initial Delta
 59 |     l.DeltaW.resize(l.size, layer[i-1].size);
 60 |     l.Deltab.resize(l.size);    
 61 |     l.DeltaW.setConstant(p.Delta_0);
 62 |     l.Deltab.setConstant(p.Delta_0);
 63 |     layer.push_back(l);
 64 |   }  
 65 | }
 66 | 
 67 | void NeuralNet::init_weights(F_TYPE sd) {
 68 |   for (int i=1; i<layer.size(); ++i) {
 69 |     layer[i].W.setRandom();
 70 |     layer[i].b.setRandom();
 71 |     layer[i].W *= sd;
 72 |     layer[i].b *= sd;
 73 |   }
 74 | }
 75 | 
 76 | NeuralNet::~NeuralNet() {
 77 | }
 78 | 
 79 | F_TYPE NeuralNet::loss(const matrix_t &X, const matrix_t &Y, F_TYPE lambda) {
 80 |   assert(layer.front().size == X.cols());
 81 |   assert(layer.back().size == Y.cols());
 82 |   assert(X.rows() == Y.rows());
 83 |   // number of samples
 84 |   size_t m = X.rows();
 85 |   // forward pass
 86 |   forward_pass(X);
 87 |   // compute error
 88 |   matrix_t error = layer.back().a - ((Y.rowwise()-Yshift.transpose()) * Yscale.asDiagonal());
 89 |   // compute cost
 90 |   F_TYPE J = 0.5*error.rowwise().squaredNorm().mean();
 91 |   // compute delta  
 92 |   layer.back().delta = (error.array() * sigmoid_gradient(layer.back().a).array()).matrix();
 93 |   for (size_t i=layer.size()-2; i>0; --i) {
 94 |     matrix_t g = sigmoid_gradient(layer[i].a);
 95 |     layer[i].delta = (layer[i+1].delta * layer[i+1].W).cwiseProduct(g);
 96 |   }
 97 |   // compute partial derivatives and RPROP parameters
 98 |   for (int i=1; i<layer.size(); ++i) {
 99 |     // add regularization to weights, bias weights are not regularized
100 |     J += 0.5 * lambda * layer[i].W.array().square().sum() / m;
101 |     matrix_t dEdW = (layer[i].delta.transpose() * layer[i-1].a + lambda*layer[i].W) / m;
102 |     vector_t dEdb = layer[i].delta.colwise().sum().transpose() / m;
103 |     layer[i].directionW = layer[i].dEdW.cwiseProduct(dEdW);
104 |     layer[i].directionb = layer[i].dEdb.cwiseProduct(dEdb);
105 |     layer[i].dEdW = dEdW;
106 |     layer[i].dEdb = dEdb;
107 |   }
108 |   return J;
109 | }
110 | 
111 | void NeuralNet::rprop() {
112 |   for (int i=1; i<layer.size(); ++i) {
113 |     for (int j=0; j<layer[i].size; ++j) {
114 |       for (int k=0; k<layer[i-1].size; ++k) {
115 |         F_TYPE u = rprop_update(layer[i].directionW(j,k), layer[i].DeltaW(j,k), layer[i].dEdW(j,k));
116 |         layer[i].W(j,k) += u;
117 |       }
118 |       layer[i].b(j) += rprop_update(layer[i].directionb(j), layer[i].Deltab(j), layer[i].dEdb(j));
119 |     }
120 |   }
121 | }
122 | 
123 | F_TYPE NeuralNet::rprop_update(F_TYPE &direction, F_TYPE &Delta, F_TYPE grad) {
124 |   if (direction > 0) {
125 |     Delta = std::min(Delta * p.eta_plus, p.Delta_max);
126 |     direction = grad;
127 |     if (grad > 0) return -Delta;
128 |     else return Delta;
129 |   } else if (direction < 0) {
130 |     Delta = std::max(Delta * p.eta_minus, p.Delta_min);
131 |     direction = 0;
132 |     return 0;
133 |   } else {
134 |     direction = grad;
135 |     if (grad > 0) return -Delta;
136 |     else return Delta;
137 |   }
138 | }
139 | 
140 | void NeuralNet::rprop_reset() {
141 |   for (int i=1; i<layer.size(); ++i) {
142 |     layer[i].dEdW.setZero();
143 |     layer[i].dEdb.setZero();
144 |     layer[i].DeltaW.setConstant(p.Delta_0);
145 |     layer[i].Deltab.setConstant(p.Delta_0);
146 |   }
147 | }
148 | 
149 | void NeuralNet::forward_pass(const matrix_t &X) {
150 |   assert(layer.front().size == X.cols());
151 |   // copy and scale data matrix
152 |   layer[0].a = (X.rowwise()-Xshift.transpose()) * Xscale.asDiagonal();
153 |   for (int i=1; i<layer.size(); ++i) {
154 |     // compute input for current layer
155 |     layer[i].z = layer[i-1].a * layer[i].W.transpose();
156 |     // add bias
157 |     layer[i].z.rowwise() += layer[i].b.transpose(); 
158 |     // apply activation function
159 |     layer[i].a = sigmoid(layer[i].z);
160 |   }
161 | }
162 | 
163 | matrix_t NeuralNet::get_activation() {
164 |   return (layer.back().a * Yscale.asDiagonal().inverse()).rowwise() + Yshift.transpose();
165 | }
166 | 
167 | matrix_t NeuralNet::sigmoid(const matrix_t &x) {
168 |   return ((-x).array().exp() + 1.0).inverse().matrix();
169 | }
170 | 
171 | matrix_t NeuralNet::sigmoid_gradient(const matrix_t &x) {
172 |   return x.cwiseProduct((1.0-x.array()).matrix());
173 | }
174 | 
175 | void NeuralNet::gradient_descent(F_TYPE alpha) {
176 |   for (int i=1; i<layer.size(); ++i) {
177 |     layer[i].W -= alpha*layer[i].dEdW;
178 |     layer[i].b -= alpha*layer[i].dEdb;
179 |   }
180 | }
181 | 
182 | bool NeuralNet::write(const char *filename) {
183 |   // open file
184 |   std::ofstream fs(filename, std::ios::out | std::ios::binary);
185 |   // write everything to disk
186 |   if (fs) {
187 |     // number of layers
188 |     int num_layers = layer.size();
189 |     fs.write((char *)&num_layers, sizeof(int));
190 |     // topology
191 |     for (int i=0; i<layer.size(); ++i) {
192 |       int num_units = layer[i].size;
193 |       fs.write((char *) &num_units, sizeof(int));
194 |     }
195 |     // scaling parameters
196 |     fs.write((char *) Xscale.data(), Xscale.size() * sizeof(F_TYPE));
197 |     fs.write((char *) Xshift.data(), Xshift.size() * sizeof(F_TYPE));
198 |     fs.write((char *) Yscale.data(), Yscale.size() * sizeof(F_TYPE));
199 |     fs.write((char *) Yshift.data(), Yshift.size() * sizeof(F_TYPE));
200 |     // weights
201 |     for (int i=1; i<layer.size(); ++i) {
202 |       fs.write((char *) layer[i].W.data(), layer[i].W.rows() * layer[i].W.cols() * sizeof(F_TYPE));
203 |       fs.write((char *) layer[i].b.data(), layer[i].b.size() * sizeof(F_TYPE));
204 |     }
205 |   } else {
206 |     return false;
207 |   }
208 |   fs.close();
209 |   return true;
210 | }
211 | 
212 | void NeuralNet::autoscale(const matrix_t &X, const matrix_t &Y) {
213 |   assert(layer.front().size == X.cols());
214 |   assert(layer.back().size == Y.cols());
215 |   assert(X.rows() == Y.rows());
216 |   // compute the mean of the input data
217 |   Xshift = X.colwise().mean();
218 |   // compute the standard deviation of the input data
219 |   Xscale = (X.rowwise()-Xshift.transpose()).array().square().colwise().mean().array().sqrt().inverse();
220 |   for (size_t i=0; i<Xscale.size(); ++i) if (Xscale(i) > 10e9) Xscale(i) = 1;
221 |   // compute the minimum target values
222 |   Yshift = Y.colwise().minCoeff();
223 |   // compute the maximum shifted target values
224 |   Yscale = (Y.colwise().maxCoeff() - Yshift.transpose()).array().inverse();
225 |   for (size_t i=0; i<Yscale.size(); ++i) if (Yscale(i) > 10e9) Yscale(i) = 1;
226 | }
227 | 
228 | void NeuralNet::autoscale_reset() {
229 |   Xscale = vector_t::Ones(layer.front().size);
230 |   Xshift = vector_t::Zero(layer.front().size);
231 |   Yscale = vector_t::Ones(layer.back().size);
232 |   Yshift = vector_t::Zero(layer.back().size);
233 | }
234 | 


--------------------------------------------------------------------------------
/nn.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2013, Manuel Blum
 2 | // All rights reserved.
 3 | 
 4 | #ifndef __NN_H__
 5 | #define __NN_H__
 6 | 
 7 | #define F_TYPE double
 8 | 
 9 | #include <Eigen/Core>
10 | #include <vector>
11 | 
12 | typedef Eigen::Matrix<F_TYPE, Eigen::Dynamic, Eigen::Dynamic> matrix_t;
13 | typedef Eigen::Matrix<F_TYPE, Eigen::Dynamic, 1> vector_t;
14 | typedef Eigen::Array<F_TYPE, Eigen::Dynamic, Eigen::Dynamic> array_t;
15 | 
16 | struct Layer {
17 |   size_t size;
18 |   matrix_t a, z, delta;
19 |   matrix_t W, dEdW, DeltaW, directionW;
20 |   vector_t b, dEdb, Deltab, directionb;
21 | };
22 | 
23 | struct RpropParams {
24 |   F_TYPE Delta_0, Delta_max, Delta_min, eta_minus, eta_plus;
25 | };
26 | 
27 | class NeuralNet {
28 | public:
29 |   /** Init neural net with given topology. */
30 |   NeuralNet(Eigen::VectorXi &topology);
31 |   /** Read neural net from file. */
32 |   NeuralNet(const char * filename);
33 |   /** Destructor. */
34 |   ~NeuralNet();
35 |   /** Initial weights randomly (zero mean, standard deviation sd) . */
36 |   void init_weights(F_TYPE sd);
37 |   /** Compute the loss function and its gradient. 
38 |    *  Rows of X are instances, columns are features. 
39 |    *  Lambda is a regularization parameter penalizing large weights. */
40 |   F_TYPE loss(const matrix_t &X, const matrix_t &Y, F_TYPE lambda);
41 |   /** Propagate data through the net.
42 |    *  Rows of X are instances, columns are features. */
43 |   void forward_pass(const matrix_t &X);
44 |   /** Return activation of output layer. */
45 |   matrix_t get_activation();
46 |   /** Perform one iteration of RPROP using the default parameters. */
47 |   void rprop();
48 |   void rprop_reset();
49 |   /** Perform one iteration of gradient descent using learning rate alpha. */
50 |   void gradient_descent(F_TYPE alpha);
51 |   /** Write net parameter to file. */
52 |   bool write(const char * filename);
53 |   /** Returns the logistic function values f(x) given x. */
54 |   static matrix_t sigmoid(const matrix_t &x);
55 |   /** Returns the gradient f'(x) of the logistic function given f(x). */
56 |   static matrix_t sigmoid_gradient(const matrix_t &x);
57 |   /** Holds the layers of the neural net. */
58 |   std::vector<Layer> layer;
59 |   /** Compute autoscale parameters. */
60 |   void autoscale(const matrix_t &X, const matrix_t &Y);
61 |   void autoscale_reset();  
62 |   /** Scaling parameters. */
63 |   vector_t Xshift;
64 |   vector_t Xscale;
65 |   vector_t Yshift;
66 |   vector_t Yscale;
67 | protected:
68 |   /** Allocate memory and initialize default values. */
69 |   void init_layer(Eigen::VectorXi &topology);
70 |   /** Return delta w for given arguments. */ 
71 |   F_TYPE rprop_update(F_TYPE &direction, F_TYPE &Delta, F_TYPE grad);
72 |   /** Default parameters for RPROP. */ 
73 |   static const RpropParams p;
74 | };
75 | 
76 | #endif
77 | 


--------------------------------------------------------------------------------
/nntest.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2013, Manuel Blum
  2 | // All rights reserved.
  3 | 
  4 | #include <Eigen/Dense>
  5 | #include <gtest/gtest.h>
  6 | 
  7 | #include "nn.h"
  8 | 
  9 | TEST(nn, sigmoid1)
 10 | {
 11 |     matrix_t X(3,3);
 12 |     matrix_t s(3,3);
 13 |     X << -2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2;
 14 |     s <<  0.1192, 0.1824, 0.2689, 0.3775, 0.5, 0.6225, 0.7311, 0.8176, 0.8808;
 15 |     matrix_t Y = NeuralNet::sigmoid(X);
 16 |     ASSERT_NEAR((Y.array()-s.array()).maxCoeff(), 0.0, 1e-4);
 17 | }
 18 | 
 19 | TEST(nn, sigmoid2)
 20 | {
 21 |     matrix_t X = matrix_t::Random(100,200) * 2;
 22 |     matrix_t Y = NeuralNet::sigmoid(X);
 23 |     ASSERT_EQ(X.rows(), Y.rows());
 24 |     ASSERT_EQ(X.cols(), Y.cols());
 25 | 
 26 |     for (int i=0; i<X.rows(); ++i)
 27 |     {
 28 |         for (int j=0; j<X.cols(); ++j)
 29 |         {
 30 |             ASSERT_NEAR(Y(i,j), 1/(1+exp(-X(i,j))), 1e-7);
 31 |         }
 32 |     }
 33 | }
 34 | 
 35 | TEST(nn, sigmoid_gradient1)
 36 | {
 37 |     matrix_t X(3,3);
 38 |     matrix_t s(3,3);
 39 |     X << -2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2;
 40 |     s <<  0.105, 0.1491, 0.1966, 0.2350, 0.25, 0.2350, 0.1966, 0.1491, 0.1050;
 41 |     matrix_t sig = NeuralNet::sigmoid(X);
 42 |     matrix_t Y = NeuralNet::sigmoid_gradient(sig);
 43 |     ASSERT_NEAR((Y.array()-s.array()).maxCoeff(), 0.0, 1e-4);
 44 | }
 45 | 
 46 | TEST(nn, sigmoid_gradient2)
 47 | {
 48 |     matrix_t X = matrix_t::Random(100,200) * 2;
 49 |     matrix_t sig = NeuralNet::sigmoid(X);
 50 |     matrix_t Y = NeuralNet::sigmoid_gradient(sig);
 51 |     ASSERT_EQ(X.rows(), Y.rows());
 52 |     ASSERT_EQ(X.cols(), Y.cols());
 53 | 
 54 |     for (int i=0; i<X.rows(); ++i)
 55 |     {
 56 |         for (int j=0; j<X.cols(); ++j)
 57 |         {
 58 |             double sigmoid = 1/(1+exp(-X(i,j)));
 59 |             ASSERT_NEAR(Y(i,j), sigmoid*(1-sigmoid), 1e-9);
 60 |         }
 61 |     }
 62 | }
 63 | 
 64 | // compare analytical with numerical gradients
 65 | TEST(nn, gradient)
 66 | {
 67 |     Eigen::VectorXi topo(4);
 68 |     topo << 3, 10, 10, 2;
 69 |     NeuralNet nn(topo);
 70 |     nn.init_weights(0.5);
 71 |     int m = 100;
 72 |     matrix_t X = matrix_t::Random(m,3);
 73 |     matrix_t Y = matrix_t::Random(m,2);
 74 |     Y = Y.array() * 0.4 + 0.5;
 75 |     ASSERT_GE(Y.minCoeff(), 0.1);
 76 |     ASSERT_LE(Y.maxCoeff(), 0.9);
 77 |     double lambda = 0.01;
 78 |     double e = 1e-4;
 79 |     ASSERT_EQ(nn.layer[0].size, topo(0));
 80 | 
 81 |     for (int i=1; i<nn.layer.size(); ++i)
 82 |     {
 83 |         ASSERT_EQ(nn.layer[i].W.rows(), nn.layer[i].size);
 84 |         ASSERT_EQ(nn.layer[i].W.cols(), nn.layer[i-1].size);
 85 |         ASSERT_EQ(nn.layer[i].b.rows(), nn.layer[i].size);
 86 |         ASSERT_EQ(nn.layer[i].size, topo(i));
 87 | 
 88 |         for (int j=0; j<nn.layer[i].W.rows(); ++j)
 89 |         {
 90 |             for (int k=0; k<nn.layer[i].W.cols(); ++k)
 91 |             {
 92 |                 double w = nn.layer[i].W(j,k);
 93 |                 nn.layer[i].W(j,k) = w - e;
 94 |                 double j1 = nn.loss(X, Y, lambda);
 95 |                 nn.layer[i].W(j,k) = w + e;
 96 |                 double j2 = nn.loss(X, Y, lambda);
 97 |                 nn.layer[i].W(j,k) = w;
 98 |                 nn.loss(X, Y, lambda);
 99 |                 ASSERT_NEAR((j2-j1)/(2*e), nn.layer[i].dEdW(j,k), 1e-9);
100 |             }
101 |         }
102 | 
103 |         for (int j=0; j<nn.layer[i].b.rows(); ++j)
104 |         {
105 |             double b = nn.layer[i].b(j);
106 |             nn.layer[i].b(j) = b - e;
107 |             double j1 = nn.loss(X, Y, lambda);
108 |             nn.layer[i].b(j) = b + e;
109 |             double j2 = nn.loss(X, Y, lambda);
110 |             nn.layer[i].b(j) = b;
111 |             nn.loss(X, Y, lambda);
112 |             ASSERT_NEAR((j2-j1)/(2*e), nn.layer[i].dEdb(j), 1e-9);
113 |         }
114 |     }
115 | }
116 | 
117 | TEST(nn, readwrite)
118 | {
119 |     Eigen::VectorXi topo(4);
120 |     topo << 3, 10, 10, 2;
121 |     NeuralNet nn(topo);
122 |     nn.init_weights(0.5);
123 |     nn.write("testnet.nn");
124 |     NeuralNet nnclone("testnet.nn");
125 |     ASSERT_EQ(topo.size(), nnclone.layer.size());
126 | 
127 |     for (int i=0; i<topo.size(); ++i)
128 |     {
129 |         ASSERT_EQ(topo(i), nnclone.layer[i].size);
130 |     }
131 | 
132 |     for (int i=1; i<nn.layer.size(); ++i)
133 |     {
134 |         for (int j=0; j<nn.layer[i].W.rows(); ++j)
135 |         {
136 |             for (int k=0; k<nn.layer[i].W.cols(); ++k)
137 |             {
138 |                 ASSERT_NEAR(nn.layer[i].W(j,k), nnclone.layer[i].W(j,k), 1e-12);
139 |             }
140 |         }
141 | 
142 |         for (int j=0; j<nn.layer[i].b.rows(); ++j)
143 |         {
144 |             ASSERT_NEAR(nn.layer[i].b(j), nnclone.layer[i].b(j), 1e-12);
145 |         }
146 |     }
147 | }
148 | 
149 | TEST(nn, testfunction)
150 | {
151 |     srand((unsigned int) time(NULL));
152 |     Eigen::VectorXi topo(4);
153 |     topo << 2, 10, 10, 1;
154 |     NeuralNet nn(topo);
155 |     matrix_t X = matrix_t::Random(1000,2);
156 |     matrix_t Y = matrix_t::Random(1000,1);
157 | 
158 |     for (int i=0; i<X.rows(); ++i)
159 |     {
160 |         Y(i,0) = (X(i,0) * X(i,1) + 1.2)*0.4;
161 |     }
162 | 
163 |     double lambda = 0.01, err;
164 | 
165 |     for (int i=0; i<300; ++i)
166 |     {
167 |         err = nn.loss(X, Y, lambda);
168 |         nn.rprop();
169 |     }
170 | 
171 |     ASSERT_LE(err, 0.01);
172 |     nn.write("testnet.nn");
173 |     NeuralNet nnclone("testnet.nn");
174 |     ASSERT_NEAR(nn.loss(X, Y, lambda), nnclone.loss(X, Y, lambda), 1e-12);
175 | }
176 | 


--------------------------------------------------------------------------------
/tutorial.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2013, Manuel Blum
 2 | // All rights reserved.
 3 | 
 4 | #include <Eigen/Dense>
 5 | #include <iostream>
 6 | #include <cstdio>
 7 | 
 8 | #include "nn.h"
 9 | 
10 | int main (int argc, const char* argv[]) {
11 | 
12 |   // input dimensionality
13 |   int n_input = 2;
14 |   // output dimensionality
15 |   int n_output = 1;
16 |   // number of training samples
17 |   int m = 4;
18 |   // number of layers
19 |   int k = 3;
20 |   // number of optimization steps
21 |   int max_steps = 50;
22 |   // regularization parameter
23 |   double lambda = 0.000001;
24 | 
25 |   // training inputs
26 |   matrix_t X(m, n_input);
27 |   matrix_t Y(m, n_output);
28 | 
29 |   // XOR problem
30 |   X << 0, 0, 0, 1, 1, 0, 1, 1;
31 |   Y << 0, 1, 1, 0;
32 |   std::cout << "training input: " << std::endl << X << std::endl;
33 |   std::cout << "training output: " << std::endl << Y << std::endl;
34 | 
35 |   // specify network topology
36 |   Eigen::VectorXi topo(k);
37 |   topo << n_input, 6, n_output;
38 |   std::cout << "topology: " << std::endl << topo << std::endl;
39 | 
40 |   // initialize a neural network with given topology
41 |   NeuralNet nn(topo);
42 | 
43 |   nn.autoscale(X,Y);
44 |   
45 |   // train the network
46 |   std::cout << "starting training" << std::endl;
47 |   double err;
48 |   for (int i = 0; i < max_steps; ++i) {
49 |     err = nn.loss(X, Y, lambda);
50 |     nn.rprop();
51 |     printf("%3i   %4.4f\n", i, err);
52 |   }
53 | 
54 |   // write model to disk
55 |   nn.write("example.nn");
56 | 
57 |   // read model from disk
58 |   NeuralNet nn2("example.nn");
59 | 
60 |   // testing 
61 |   nn2.forward_pass(X);
62 |   matrix_t Y_test = nn2.get_activation();
63 | 
64 |   std::cout << "test input:" << std::endl << X << std::endl;
65 |   std::cout << "test output:" << std::endl << Y_test << std::endl;
66 | 
67 |   return 0;
68 | }
69 | 
70 | 


--------------------------------------------------------------------------------