├── .gitignore
├── CNN
    ├── CNN.cpp
    ├── CNN.h
    ├── Makefile
    └── main.cpp
├── ConvolveLayer
    ├── ConvolveLayer.cpp
    ├── ConvolveLayer.h
    ├── Makefile
    └── main.cpp
├── GPUMat
    ├── GPUMat.cpp
    ├── GPUMat.h
    ├── GPU_Math_Func.cu
    ├── GPU_Math_Func.h
    ├── Makefile
    ├── caffe_references
    │   ├── GPUMat.cpp
    │   ├── GPUMat.h
    │   ├── device_alternate[1].hpp
    │   ├── math_functions[1].cu
    │   ├── math_functions[1].hpp
    │   └── syncedmem[1].cpp
    ├── cublastest
    │   ├── GPUMat.cpp
    │   ├── GPUMat.h
    │   ├── Makefile
    │   ├── armatest
    │   │   ├── armatest.sdf
    │   │   ├── armatest.sln
    │   │   ├── armatest
    │   │   │   ├── armatest.vcxproj.filters
    │   │   │   └── armatest.vcxproj.user
    │   │   └── x64
    │   │   │   └── Debug
    │   │   │       └── armatest.ilk
    │   ├── cublastest
    │   │   ├── cublastest.sdf
    │   │   ├── cublastest.sln
    │   │   ├── cublastest
    │   │   │   └── Debug
    │   │   │   │   └── kernel.cu.deps
    │   │   └── x64
    │   │   │   └── Debug
    │   │   │       └── cublastest.ilk
    │   ├── main.cpp
    │   ├── main.cu
    │   └── test
    ├── device_common.cpp
    ├── device_common.h
    ├── gpumat
    │   ├── .cproject
    │   └── .project
    ├── main.cu
    └── test_GPUMat.cpp
├── LSTM
    ├── LSTMLayer.cpp
    ├── LSTMLayer.h
    ├── Makefile
    ├── main.cpp
    ├── test
    └── testdata.dat
├── Makefile
├── MatArray
    ├── Makefile
    ├── MatArray.h
    └── main.cpp
├── PoolLayer
    ├── Makefile
    ├── PoolLayer.cpp
    ├── PoolLayer.h
    └── main.cpp
├── RBM
    ├── Makefile
    ├── ProgramArgs.cpp
    ├── ProgramArgs.h
    ├── RBM.cpp
    ├── RBM.h
    ├── mainSDA.cpp
    └── run.txt
├── README.md
├── include
    ├── ActivationFunc.h
    ├── ActivationLayer.h
    ├── BaseLayer.h
    ├── BaseModel.h
    ├── DeepLearning.pb.h
    ├── ElementMultiAddLayer.h
    ├── ElementwiseLayer.h
    ├── ElmanRL.h
    ├── Globals.h
    ├── Initializer.h
    ├── Layer.h
    ├── Layer_binaryOp.h
    ├── Layer_unitaryOp.h
    ├── LinearAdditionLayer.h
    ├── MultiAddLayer.h
    ├── MultiLayerPerceptron.h
    ├── Net.h
    ├── RNN.h
    ├── RecurrLayer.h
    ├── Util.h
    ├── common.h
    ├── io.h
    └── optimization.h
├── plotting
    ├── Driver.py
    └── Util.py
└── src
    ├── ActivationLayer.cpp
    ├── BaseLayer.cpp
    ├── DeepLearning.pb.cc
    ├── ElementMultiAddLayer.cpp
    ├── ElementwiseLayer.cpp
    ├── ElmanRL.cpp
    ├── LBFGS.cpp
    ├── LinearAdditionLayer.cpp
    ├── MultiAddLayer.cpp
    ├── MultiLayerPerceptron.cpp
    ├── Proto
        ├── DeepLearning.proto
        ├── Makefile
        ├── generateFile.sh
        └── test
    ├── RNN.cpp
    ├── RecurrLayer.cpp
    ├── SteepDescent.cpp
    ├── Util.cpp
    ├── io.cpp
    └── test
        ├── BaseLayer
            ├── Makefile
            ├── test_BaseLayer
            └── test_BaseLayer.cpp
        ├── ElmanRL
            ├── Makefile
            ├── RLtest.prototxt
            └── test
        ├── GRNN
            ├── GRNN.cpp
            ├── GRNN.h
            └── Makefile
        ├── IO
            ├── Makefile
            ├── net.prototxt
            ├── qsolver.prototxt
            ├── test
            └── test_IO.cpp
        ├── IOtest
            ├── DeepLearning.pb.cc
            ├── DeepLearning.pb.h
            ├── Makefile
            └── net.prototxt
        ├── Makefile.common
        ├── MultiLayerPerceptron
            ├── Makefile
            ├── net.prototxt
            ├── result.xt
            ├── runningparameters
            │   ├── sin(5x)+exp(5x)
            │   │   ├── net.prototxt
            │   │   └── readme
            │   └── sin(5x)
            │   │   ├── net.prototxt
            │   │   └── readme
            ├── testSimple.prototxt
            ├── test_funcApprox.cpp
            └── test_mlp.cpp
        ├── MultiThreadArmaMat
            └── Makefile
        ├── NN-RL
            ├── Makefile
            ├── Model_PoleFull.cpp
            ├── Model_PoleFull.h
            ├── Model_PoleSimple.cpp
            ├── Model_PoleSimple.h
            ├── NN_RLSolverBase.cpp
            ├── NN_RLSolverBase.h
            ├── NN_RLSolverMLP.cpp
            ├── NN_RLSolverMLP.h
            ├── NN_RLSolverMultiMLP.cpp
            ├── NN_RLSolverMultiMLP.h
            ├── NN_RLSolverRNN.cpp
            ├── NN_RLSolverRNN.h
            ├── NN_RL_Driver.cpp
            ├── Qtableresult
            │   ├── qtable1.tif
            │   ├── qtable2.tif
            │   ├── qtable3.tif
            │   └── qtableAction.tif
            ├── RLSolverBase.h
            ├── RLSolver_2DTable.cpp
            ├── RLSolver_2DTable.h
            ├── elman.prototxt
            ├── mlp.prototxt
            ├── multimlp.prototxt
            ├── net.prototxt
            ├── plotQMap.m
            ├── plotQtable.m
            ├── qsolver.prototxt
            └── rnn.prototxt
        ├── Optimization
            ├── Makefile
            └── main.cpp
        ├── RNN
            ├── Makefile
            ├── RLtest2.prototxt
            ├── gradcheck.prototxt
            ├── net.prototxt
            ├── test
            ├── testIntermediate.prototxt
            ├── testSimple.prototxt
            ├── test_RNN.cpp
            └── testlittleTimer.prototxt
        ├── RNNtestRLSet
            └── prediction
        ├── Trainer
            ├── Makefile
            ├── Trainer.cpp
            ├── Trainer.h
            ├── net.prototxt
            ├── test
            └── test_trainer.cpp
        ├── TwolayerPerceptron
            ├── Makefile
            ├── MultiLayerPerceptron.cpp
            ├── MultiLayerPerceptron.h
            └── mainSDA.cpp
        ├── Util
            ├── Makefile
            ├── test
            └── test_Utils.cpp
        ├── arma
            ├── Makefile
            └── example1.cpp
        └── example1.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | *.obj
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Compiled Dynamic libraries
12 | *.so
13 | *.dylib
14 | *.dll
15 | 
16 | # Fortran module files
17 | *.mod
18 | 
19 | # Compiled Static libraries
20 | *.lai
21 | *.la
22 | *.a
23 | *.lib
24 | 
25 | # Executables
26 | *.exe
27 | *.out
28 | *.app
29 | 
30 | # PDFs
31 | *.pdf
32 | *.lyx
33 | 
34 | # data
35 | *.dat
36 | *.txt
37 | *.bin
38 | *image*
39 | *.JPG
40 | *.png
41 | *.log
42 | *.zip
43 | #other
44 | *.orig
45 | /CppApplication_1/build/
46 | /CppApplication_1/dist/
47 | *~
48 | nbproject/
49 | *.swp
50 | *.pyc
51 | *data*
52 | MNIST
53 | test_*
54 | *.tlog
55 | *.idb
56 | *.pdb
57 | *.suo
58 | *.vcxproj
59 | 


--------------------------------------------------------------------------------
/CNN/CNN.h:
--------------------------------------------------------------------------------
 1 | #include <memory>
 2 | #include <armadillo>
 3 | #include "../BaseLayer/BaseLayer.h"
 4 | #include "../ConvolveLayer/ConvolveLayer.h"
 5 | #include "../PoolLayer/PoolLayer.h"
 6 | #include "../Optimization/optimization.h"
 7 | 
 8 | namespace NeuralNet{
 9 | 
10 | struct TrainingPara {
11 | 
12 |     TrainingPara(double eps0=1e-6, int NEpoch0 = 500,
13 |                  int miniBatchSize0 = 10, double alpha0 = 0.1, int save = 50, bool load = false):
14 |         eps(eps0),NEpoch(NEpoch0),
15 |         miniBatchSize(miniBatchSize0), alpha(alpha0), saveFrequency(save), loadFlag(load) {}
16 |     double eps;
17 |     int NEpoch;
18 |     int miniBatchSize;
19 |     double alpha;
20 |     int saveFrequency;
21 |     bool loadFlag;
22 | //  Method method;
23 |     void print() const {
24 | 
25 |         std::cout << eps << "\t";
26 |         std::cout << NEpoch << "\t";
27 |         std::cout << miniBatchSize << "\t";
28 |         std::cout << alpha << std::endl;
29 | 
30 |     }
31 | };
32 | 
33 | 
34 | class CNN {
35 |     friend class CNNTrainer;
36 | public:
37 |     CNN(){};
38 |     CNN(std::shared_ptr<arma::cube> trainingX0, std::shared_ptr<arma::mat> trainingY0, int nChanel0, TrainingPara trainingPara0);
39 |     void train();
40 |     void setTrainingData(std::shared_ptr<arma::cube> trainingX0, std::shared_ptr<arma::mat> trainingY0, int nChanel0);
41 |     void feedForward(std::shared_ptr<arma::cube>);
42 |     void backProp(std::shared_ptr<arma::mat>);
43 |     void test(std::shared_ptr<arma::cube> testX0, std::shared_ptr<arma::mat> testY0);
44 |     double calLayerError(std::shared_ptr<arma::cube> delta);
45 |     void calNumericGrad(std::shared_ptr<arma::cube>, std::shared_ptr<arma::mat>);
46 |     void calGrad(std::shared_ptr<arma::mat> trainingX);    
47 |     void vectoriseGrad(arma::vec &grad);
48 |     void deVectoriseWeight(arma::vec &x);
49 |     void vectoriseWeight(arma::vec &x);
50 |     void saveWeight(std::string str = "cnn_weights.dat");
51 |     void loadWeight(std::string str = "cnn_weights.dat");
52 |     
53 |     bool testGrad;
54 |     std::vector<PoolLayer> poolLayers;
55 |     std::vector<ConvolveLayer> convoLayers;
56 |     std::vector<BaseLayer> FCLayers;
57 |     int numInstance;
58 |     std::shared_ptr<arma::cube> trainingX;
59 |     std::shared_ptr<arma::mat> trainingY, output;
60 |     int nChanel;
61 |     TrainingPara trainingPara;
62 |     int inputDim_x, inputDim_y;
63 |     int outputDim;
64 |     int numFCLayers;
65 |     int numCLayers;
66 |     int totalDim;
67 | };
68 | 
69 | class CNNTrainer:public Optimization::ObjectFunc{
70 | public:
71 |     CNNTrainer(CNN &CNN);
72 |     void gradientChecking();
73 |     virtual double operator()(arma::vec &x, arma::vec &grad);
74 | //  std::shared_ptr<arma::vec> x_init;
75 | private:
76 |     CNN  &cnn;
77 | };
78 | 
79 | }


--------------------------------------------------------------------------------
/CNN/Makefile:
--------------------------------------------------------------------------------
 1 | # this make file now is dynamically linking
 2 | #OPTFLAGS = -O3 -march=nocona -mfpmath=sse -msse3 -Wuninitialized -flto
 3 | #MKL_INCLUDE = /opt/intel/mkl/include/
 4 | #MKL_INCLUDE = /opt/intel/composer_xe_2013.0.079/mkl/include/
 5 | MKL_INCLUDE = /opt/intel/composerxe/mkl/include
 6 | 
 7 | #CFLAGS = -std=c++0x -I/g/ssli/software/pkgs/boost_1_49_0 -c -DNDEBUG -D__LINUX $(OPTFLAGS) 
 8 | CPP = g++
 9 | #MKLROOT = /opt/intel/mkl
10 | #MKLROOT = /opt/intel/composer_xe_2013.0.079/mkl
11 | MKLROOT = /opt/intel/composerxe/mkl
12 | MKLLINKFLAGS = -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread
13 | MKL_DL_LINKFLAGS =  -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread -lm
14 | ARMA_INCLUDE=-I~/Downloads/armadillo-5.100.2/include/armadillo
15 | ARMA_LINKFLAGS=-L/usr/lib -L/opt/intel/mkl/lib/intel64 -larmadillo -lmkl_rt -llapack -lopenblas
16 | #MKLLINKFLAGS = -Wl,--start-group -Wl,--end-group -lpthread
17 | CXXFLAGS = -std=c++0x -I$(MKL_INCLUDE) $(ARMA_INCLUDE) -I/opt/boost/boost_1_57_0 -c -D__LINUX -DARMA_NO_DEBUG -DDEBUG -g3
18 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program -march=native
19 | #LINKFLAGS = -static $(LINKOPTFLAGS) $(MKLLINKFLAGS) -ldl
20 | LINK_DL_FLAGS =   $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldl #$(LINKOPTFLAGS)
21 | #LINKFLAGS = 
22 | #ODIR=obj
23 | ODIR =
24 | 
25 | OBJ = main.o PoolLayer.o ConvolveLayer.o BaseLayer.o CNN.o load_cifar10.o LBFGS.o
26 | 
27 | test : $(OBJ)
28 | 	$(CPP) -o test $(OBJ) $(LINK_DL_FLAGS)
29 | 
30 | BaseLayer.o : ../BaseLayer/BaseLayer.cpp ../BaseLayer/BaseLayer.h
31 | 	$(CPP) -c $(CXXFLAGS) $<
32 | 
33 | PoolLayer.o : ../PoolLayer/PoolLayer.cpp ../PoolLayer/PoolLayer.h
34 | 	$(CPP) -c $(CXXFLAGS) $<
35 | 	
36 | ConvolveLayer.o : ../ConvolveLayer/ConvolveLayer.cpp ../ConvolveLayer/ConvolveLayer.h
37 | 	$(CPP) -c $(CXXFLAGS) $<	
38 | 	
39 | load_cifar10.o : ../cifar-10/load_cifar10.cpp 
40 | 	$(CPP) -c $(CXXFLAGS) $<
41 | 	
42 | LBFGS.o : ../Optimization/LBFGS.cpp ../Optimization/optimization.h
43 | 	$(CPP) -c $(CXXFLAGS) $<	
44 | #%.o : %.cpp
45 | #	$(CPP) -c $(CXXFLAGS) 
46 | 
47 | 
48 | clean:
49 | 	rm -f *.o *~
50 | 


--------------------------------------------------------------------------------
/ConvolveLayer/ConvolveLayer.h:
--------------------------------------------------------------------------------
 1 | #include <memory>
 2 | #include <armadillo>
 3 | #include "../MatArray/MatArray.h"
 4 | 
 5 | 
 6 | struct ConvolveLayer {
 7 |     enum ActivationType {ReLU, tanh, sigmoid, linear};
 8 |     ConvolveLayer(int numFilters0, int filterDim0_x, int filterDim0_y, int stride0, ActivationType type0);
 9 |     void activateUp(std::shared_ptr<arma::cube>);
10 | // upate the parameters and propgate the error down for the lower layer
11 |     void updatePara(std::shared_ptr<arma::cube> delta_upper, double learningRate);
12 |     void calGrad(std::shared_ptr<arma::cube> delta_upper);
13 |     void calGrad_matrixMethod(std::shared_ptr<arma::cube> delta_upper);
14 |     void initializeWeight();
15 |     void setInputDim(int, int, int);
16 |     void propError(std::shared_ptr<arma::cube> delta_upper);
17 |     void vectoriseGrad(double *ptr, size_t offset);
18 |     void deVectoriseWeight(double *ptr, size_t offset);
19 |     void vectoriseWeight(double *ptr, size_t offset);
20 |     void convolve_naive(std::shared_ptr<arma::cube> input);
21 |     void im2col(std::shared_ptr<arma::cube> input, std::shared_ptr<arma::mat> &output);
22 |     void col2im(std::shared_ptr<arma::mat> input, std::shared_ptr<arma::cube> &output);
23 |     void convolve_matrixMethod(std::shared_ptr<arma::cube> input);
24 |     
25 |     int numFilters;
26 | //  every filter is a 4D cube
27 | //    MatArray<double>::Mat2DArray_ptr filters, grad_W;
28 |     Tensor_4D::ptr filters, grad_W;
29 |     std::shared_ptr<arma::cube> delta_out, input, output;
30 |     std::shared_ptr<arma::cube> B, grad_B;
31 |     std::shared_ptr<arma::mat> filters2D, input2D, grad_W2D;
32 |     
33 |     int filterDim_x, filterDim_y;
34 |     int inputDim_x;
35 |     int inputDim_y;
36 |     int inputDim_z;
37 |     int inputSize;
38 |     int outputSize;
39 |     int outputDim_x, outputDim_y, outputDim_z;
40 |     int stride;
41 |     int W_size, B_size, totalSize;
42 |     ActivationType type;
43 | };


--------------------------------------------------------------------------------
/ConvolveLayer/Makefile:
--------------------------------------------------------------------------------
 1 | # this make file now is dynamically linking
 2 | #OPTFLAGS = -O3 -march=nocona -mfpmath=sse -msse3 -Wuninitialized -flto
 3 | #MKL_INCLUDE = /opt/intel/mkl/include/
 4 | #MKL_INCLUDE = /opt/intel/composer_xe_2013.0.079/mkl/include/
 5 | MKL_INCLUDE = /opt/intel/composerxe/mkl/include
 6 | 
 7 | #CFLAGS = -std=c++0x -I/g/ssli/software/pkgs/boost_1_49_0 -c -DNDEBUG -D__LINUX $(OPTFLAGS) 
 8 | CPP = g++
 9 | #MKLROOT = /opt/intel/mkl
10 | #MKLROOT = /opt/intel/composer_xe_2013.0.079/mkl
11 | MKLROOT = /opt/intel/composerxe/mkl
12 | MKLLINKFLAGS = -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread
13 | MKL_DL_LINKFLAGS =  -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread -lm
14 | ARMA_INCLUDE=-I~/Downloads/armadillo-5.100.2/include/armadillo
15 | ARMA_LINKFLAGS=-L/usr/lib -L/opt/intel/mkl/lib/intel64 -larmadillo -lmkl_rt -llapack -lopenblas
16 | #MKLLINKFLAGS = -Wl,--start-group -Wl,--end-group -lpthread
17 | CXXFLAGS = -std=c++0x -I$(MKL_INCLUDE) $(ARMA_INCLUDE) -I/opt/boost/boost_1_57_0 -c -DDEBUG -D__LINUX -g3 
18 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program
19 | #LINKFLAGS = -static $(LINKOPTFLAGS) $(MKLLINKFLAGS) -ldl
20 | LINK_DL_FLAGS =   $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldl
21 | #LINKFLAGS = 
22 | #ODIR=obj
23 | ODIR =
24 | 
25 | OBJ = main.o PoolLayer.o
26 | 
27 | test : $(OBJ)
28 | 	$(CPP) -o test $(OBJ) $(LINK_DL_FLAGS)
29 | 	
30 | #%.o : %.cpp
31 | #	$(CPP) -c $(CXXFLAGS) 
32 | 
33 | 
34 | clean:
35 | 	rm -f *.o *~
36 | 


--------------------------------------------------------------------------------
/ConvolveLayer/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <fstream>
  3 | #include <string>
  4 | #include <cstdlib>
  5 | #include <cstdio>
  6 | #include <memory>
  7 | #include <armadillo>
  8 | #include <vector>
  9 | #include "ConvolveLayer.h"
 10 | 
 11 | 
 12 | void loadData_MNIST(std::shared_ptr<arma::mat> X,
 13 |                     std::shared_ptr<arma::mat> Y);
 14 | 
 15 | int main(int argc, char *argv[]) {
 16 |     std::shared_ptr<arma::mat> DataX(new arma::mat);
 17 |     std::shared_ptr<arma::mat> DataY(new arma::mat);
 18 |     std::shared_ptr<arma::mat> trainDataX(new arma::mat);
 19 |     std::shared_ptr<arma::mat> trainDataY(new arma::mat);
 20 |     std::shared_ptr<arma::mat> testDataX(new arma::mat);
 21 |     std::shared_ptr<arma::mat> testDataY(new arma::mat);
 22 |     std::shared_ptr<arma::mat> ValidationDataX(new arma::mat);
 23 |     std::shared_ptr<arma::mat> ValidationDataY(new arma::mat);
 24 | 
 25 |     loadData_MNIST(DataX,DataY);
 26 | 
 27 |     int ntrain = 1000;
 28 |     int ntest = 100;
 29 | //  now I split data into train, test, and validation
 30 |     trainDataX = std::make_shared<arma::mat>(DataX->cols(0,ntrain-1));
 31 |     trainDataY = std::make_shared<arma::mat>(DataY->cols(0,ntrain-1));
 32 |     testDataX = std::make_shared<arma::mat>(DataX->cols(ntrain,ntrain+ntest-1));
 33 |     testDataY = std::make_shared<arma::mat>(DataY->cols(ntrain,ntrain+ntest-1));
 34 | 
 35 |     DataX.reset();
 36 |     DataY.reset();
 37 | 
 38 | 
 39 | 
 40 |     int inputDim = trainDataX->n_cols;
 41 |     int outputDim = trainDataY->n_cols;
 42 |     std::cout << inputDim << std::endl;
 43 |     std::cout << outputDim << std::endl;
 44 |     std::cout << trainDataX->n_rows << std::endl;
 45 |     std::cout << trainDataY->n_rows << std::endl;
 46 | 
 47 | 
 48 |     int numLayers = 2;
 49 |     std::vector<int> dimensions;
 50 | 
 51 |     dimensions.push_back(784);
 52 |     dimensions.push_back(100);
 53 |     dimensions.push_back(50);
 54 | 
 55 | 
 56 | }
 57 | 
 58 | 
 59 | void loadData_MNIST(std::shared_ptr<arma::mat> X,
 60 |                     std::shared_ptr<arma::mat> Y) {
 61 | 
 62 |     std::string filename_base("../MNIST/data");
 63 |     std::string filename;
 64 |     char tag[50];
 65 |     char x;
 66 |     int count;
 67 |     int numFiles = 10;
 68 |     int featSize = 28*28;
 69 |     int labelSize = 10;
 70 |     int numSamples = 1000;
 71 |     X->set_size(featSize,numFiles*numSamples);
 72 |     Y->set_size(labelSize, numFiles*numSamples);
 73 |     Y->fill(0);
 74 | 
 75 | 
 76 |     for (int i = 0 ; i < numFiles ; i++) {
 77 |         sprintf(tag,"%d",i);
 78 |         filename=filename_base+(std::string)tag;
 79 |         std::cout << filename << std::endl;
 80 |         std::ifstream infile;
 81 |         infile.open(filename,std::ios::binary | std::ios::in);
 82 |         if (infile.is_open()) {
 83 | 
 84 |             for (int j = 0 ; j < numSamples ; j++) {
 85 | 
 86 |                 for (int k =0 ; k <featSize; k ++) {
 87 |                     infile.read(&x,1);
 88 | //        std::cout << x << std::endl;
 89 |                     (*X)(k, i+numFiles*j)=((unsigned char)x)/256.0;
 90 | 
 91 |                 }
 92 |                 (*Y)(i, i+numFiles*j) = 1;
 93 | //        count++;
 94 |             }
 95 | 
 96 |         } else {
 97 |             std::cout << "open file failure!" << std::endl;
 98 |         }
 99 | 
100 | // for (int j = 0 ; j < numSamples ; j++){
101 | //       for (int k =0 ; k <featSize; k ++){
102 | 
103 | //	           std::cout << x << std::endl;
104 | //	   std::cout<<  (*X)(j,k) << " ";
105 | //	   }
106 | //	   }
107 | 
108 |         std::cout << "dataloading finish!" <<std::endl;
109 | 
110 |     }
111 | 
112 | }
113 | 


--------------------------------------------------------------------------------
/GPUMat/GPUMat.cpp:
--------------------------------------------------------------------------------
 1 | #include "GPUMat.h"
 2 | #include "GPU_Math_Func.h"
 3 | 
 4 | 
 5 | GPUMat::GPUMat(int row0, int col0){
 6 | 	this->n_rows = row0;
 7 | 	this->n_cols = col0;
 8 | 	this->n_elem = row0 * col0;
 9 | 	CUDA_CHECK(cudaMalloc((void **)&_data_GPU ,this->n_elem * sizeof(*_data_GPU)));
10 | }
11 | #if 0
12 | GPUMat& GPUMat::copyFromCPU(const GPUMat& rhs){
13 | 	// Check for self-assignment!
14 |     if (this != &rhs) {
15 | 		delete _data_CPU;
16 | 		cudaFree(_data_GPU);
17 | 		cudaStat = cudaMalloc((void **)&_data_GPU ,n_elem*sizeof(double));
18 | 		cudaStat = cublasDcopy(handle, n_elem, rhs.memptr_GPU(),1, _data_GPU,1);
19 |        // Deallocate, allocate new space, copy values...
20 |     }
21 |     // 1.  Deallocate any memory that MyClass is using internally
22 |     // 2.  Allocate some memory to hold the contents of rhs
23 |     // 3.  Copy the values from rhs into this instance
24 |     // 4.  Return *this
25 | 	return *this;
26 | }
27 | #endif
28 | GPUMat& GPUMat::operator=(const GPUMat& rhs){
29 | 	// Check for self-assignment!
30 |     if (this != &rhs) {
31 | 		delete _data_CPU;
32 | 		cudaFree(_data_GPU);
33 | 		_data_CPU = (double *)malloc(rhs.n_elem * sizeof(double));
34 | 		const double *p = rhs.memptr_CPU();
35 | 		for (int i = 0; i < n_elem; i++) {
36 | 			_data_CPU[i] = *(p+i);		
37 | 		}
38 | 		
39 | 		this->syncToGPU();
40 |        // Deallocate, allocate new space, copy values...
41 |     }
42 |     // 1.  Deallocate any memory that MyClass is using internally
43 |     // 2.  Allocate some memory to hold the contents of rhs
44 |     // 3.  Copy the values from rhs into this instance
45 |     // 4.  Return *this
46 | 	return *this;
47 | }
48 | 
49 | GPUMat& GPUMat::st(){
50 | 	this->transposeFlag=((this->transposeFlag==false)?true:false);
51 | 	return *this;
52 | }
53 | 
54 | GPUMat& GPUMat::operator+=(const GPUMat& rhs){
55 | 
56 | 	ASSERT(this->n_elem==rhs.n_elem, "number of elements not equal for addition");
57 | 	gpu_add(n_elem, this->memptr_GPU(), rhs.memptr_GPU(), this->memptr_GPU());
58 | 	return *this;
59 | }
60 | 
61 | 
62 | const GPUMat GPUMat::operator+(const GPUMat& rhs) const{
63 | 	GPUMat result = *this;     // Make a copy of myself.  Same as MyClass result(*this);
64 |     result += rhs;            // Use += to add other to the copy.
65 |     return result;              // All done!
66 | 
67 | }
68 | 
69 | void GPUMat::ones() {
70 | 	gpu_set(this->n_elem, 1.0, this->memptr_GPU());
71 | }
72 | 
73 | void GPUMat::zeros(){
74 | 	gpu_set(this->n_elem, 0.0, this->memptr_GPU());
75 | }
76 | 
77 | void GPUMat::print(std::string str) {
78 | 	this->syncToCPU();
79 | 	std::cout << str << std::endl;
80 | 	for (int i = 0; i < this->n_rows; i++){
81 | 		for (int j = 0; j < this->n_cols; j++){
82 | 			std::cout << _data_CPU[j*n_rows + i] << "\t";
83 | 	
84 | 		}
85 | 		std::cout << std::endl;
86 | 
87 | 	}	
88 | }
89 | 
90 | 


--------------------------------------------------------------------------------
/GPUMat/GPUMat.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <memory>
 3 | #include <string>
 4 | #include <cstdlib>
 5 | #include <cuda_runtime.h>
 6 | #include "cublas_v2.h"
 7 | /* notes on the design of the GPUMat 
 8 | 	1) the synchronization to the CPU is lazy
 9 | 	
10 | */
11 | class GPUMat{
12 | private:
13 | 	double *_data_CPU, *_data_GPU;
14 | 	
15 | public:
16 | 	enum MemLocation {CPU_GPU, GPU_ONLY, CPU_ONLY};
17 | 	GPUMat(){}
18 | 	GPUMat(int row0, int col0);
19 | 	~GPUMat(){ 
20 | 		delete _data_CPU;
21 | 		cudaFree((void *)_data_GPU);
22 | //		cublasDestroy(handle);
23 | 	}
24 | 	void syncToGPU(){
25 | //		if(!_data_CPU) _data_CPU = (double *) malloc(n_elem * sizeof(double));
26 | //		if( loc==CPU_ONLY) loc = CPU_GPU;
27 | 		cudaMalloc((void **)&_data_GPU ,n_elem*sizeof(double));
28 | 		cublasSetMatrix (n_rows,n_cols, sizeof(double) ,_data_CPU,n_rows,_data_GPU ,n_cols); //a -> d_a
29 | 	}
30 | 	void syncToCPU(){
31 | //		if(!_data_CPU) _data_CPU = (double *) malloc(n_elem * sizeof(double));
32 | //		if( loc==CPU_ONLY) loc = CPU_GPU;
33 | 	_data_CPU = (double *) malloc(n_elem * sizeof(double));
34 | 		cublasGetMatrix (n_rows, n_cols, sizeof(double) ,_data_GPU ,n_rows,_data_CPU,n_rows);
35 | 	}
36 | 	void zeros();
37 | 	void ones();
38 | 	void randu();
39 | 	void randn();
40 | 	double* memptr_CPU(){return _data_CPU;}
41 | 	double* memptr_GPU(){return _data_GPU;}
42 | 	double* memptr_CPU() const {return _data_CPU;}
43 | 	double* memptr_GPU() const {return _data_GPU;}	
44 | 	GPUMat& st();
45 | 	GPUMat& operator=(const GPUMat& rhs);
46 | 	GPUMat& operator+=(const GPUMat& rhs);
47 | 	GPUMat& operator-=(const GPUMat& rhs);
48 | 	GPUMat& operator*=(const GPUMat& rhs);
49 | 	GPUMat& operator*=(const double scal);
50 | 	GPUMat& operator%=(const GPUMat& rhs);
51 | 	const GPUMat operator+(const GPUMat& rhs) const;
52 | 	const GPUMat operator-(const GPUMat& rhs) const;
53 | 	const GPUMat operator*(const GPUMat& rhs) const;
54 | 	const GPUMat operator%(const GPUMat& rhs) const;
55 | 	void print(std::string str="");
56 | 	
57 | 	int n_rows, n_cols, n_elem;
58 | 	bool transposeFlag;
59 | 	MemLocation loc;
60 | //	cudaError_t cudaStat ; // cudaMalloc status
61 | //	cublasStatus_t stat ; // CUBLAS functions status
62 | //	cublasHandle_t handle ; // CUBLAS context
63 | };
64 | 


--------------------------------------------------------------------------------
/GPUMat/GPU_Math_Func.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <stdint.h>
 3 | #include <cmath>  // for std::fabs and std::signbit
 4 | #include "cblas.h"
 5 | #include "device_common.h"
 6 | 
 7 | 
 8 | 
 9 | // Decaf gpu gemm provides an interface that is almost the same as the cpu
10 | // gemm function - following the c convention and calling the fortran-order
11 | // gpu code under the hood.
12 | void gpu_gemm(const CBLAS_TRANSPOSE TransA,
13 |     const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
14 |     const double alpha, const double* A, const double* B, const double beta,
15 |     double* C);
16 | 
17 | void gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
18 |     const double alpha, const double* A, const double* x, const double beta,
19 |     double* y);
20 | 
21 | void gpu_axpy(const int N, const double alpha, const double* X,
22 |     double* Y);
23 | 
24 | void gpu_axpby(const int N, const double alpha, const double* X,
25 |     const double beta, double* Y);
26 | 
27 | void gpu_memcpy(const size_t N, const void *X, void *Y);
28 | 
29 | void gpu_set(const int N, const double alpha, double *X);
30 | 
31 | inline void gpu_memset(const size_t N, const int alpha, void* X) {
32 |   CUDA_CHECK(cudaMemset(X, alpha, N));  // NOLINT(caffe/alt_fn)
33 | }
34 | 
35 | void gpu_add_scalar(const int N, const double alpha, double *X);
36 | 
37 | void gpu_scal(const int N, const double alpha, double *X);
38 | 
39 | void gpu_add(const int N, const double* a, const double* b, double* y);
40 | 
41 | void gpu_selfAdd(const int N, const double* a, const double* b, double* y);
42 | 
43 | void gpu_sub(const int N, const double* a, const double* b, double* y);
44 | 
45 | void gpu_mul(const int N, const double* a, const double* b, double* y);
46 | 
47 | void gpu_abs(const int n, const double* a, double* y);
48 | 
49 | template<typename functor>
50 | void gpu_transform(const int n, const double* a, functor y);
51 | 
52 | // gpu_rng_uniform with two arguments generates integers in the range
53 | // [0, UINT_MAX].
54 | void gpu_rng_uniform(const int n, unsigned int* r);
55 | 
56 | // gpu_rng_uniform with four arguments generates floats in the range
57 | // (a, b] (strictly greater than a, less than or equal to b) due to the
58 | // specification of curandGenerateUniform.  With a = 0, b = 1, just calls
59 | // curandGenerateUniform; with other limits will shift and scale the outputs
60 | // appropriately after calling curandGenerateUniform.
61 | 
62 | void gpu_rng_uniform(const int n, const double a, const double b, double* r);
63 | 
64 | void gpu_rng_gaussian(const int n, const double mu, const double sigma,
65 |                             double* r);
66 | 
67 | void gpu_rng_bernoulli(const int n, const double p, int* r);
68 | 
69 | void gpu_dot(const int n, const double* x, const double* y, double* out);
70 | 
71 | 


--------------------------------------------------------------------------------
/GPUMat/Makefile:
--------------------------------------------------------------------------------
 1 | CPP_CUDA = nvcc
 2 | CPP = nvcc
 3 | CXXFLAGS = --std=c++11 -I/opt/boost_1_57_0
 4 | #nvcc mmul_1.cu -lcublas -lcurand -o mmul_1
 5 | LINKFLAGS = -lcublas -lcurand -L~/workspace/libs/gtest-1.7.0/mybuilds -lgtest
 6 | 
 7 | OBJ = test_GPUMat.o GPUMat.o GPU_Math_Func.o device_common.o
 8 | 
 9 | all: test 
10 | 
11 | test : $(OBJ)
12 | 	$(CPP) -o $@ $(OBJ) $(LINKFLAGS)
13 | 	
14 | GPUMat.o : GPUMat.cpp
15 | 	$(CPP) -c $(CXXFLAGS) $@ $^
16 | 
17 | GPU_Math_Func.o : GPU_Math_Func.cu
18 | 	$(CPP_CUDA) -c $(CXXFLAGS) $@ $^
19 | test_GPUMat.o : test_GPUMat.cpp
20 | 	$(CPP) -c $(CXXFLAGS) $@ $^
21 | device_common.o : device_common.cpp
22 | 	$(CPP) -c $(CXXFLAGS) $< -o $@
23 | #%.o : %.cpp
24 | #	$(CPP) -c $(CXXFLAGS) 
25 | 
26 | 
27 | clean:
28 | 	rm -f *.o *~ test
29 | 


--------------------------------------------------------------------------------
/GPUMat/caffe_references/GPUMat.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //copy on CPU
 3 | //copy on GPU
 4 | GPUMat& GPUMat::copyFromCPU(const GPUMat& rhs){
 5 | 	// Check for self-assignment!
 6 |     if (this != &rhs) {
 7 | 		delete _data_CPU;
 8 | 		cudaFree(_data_GPU);
 9 | 		cudaStat = cudaMalloc((void **)&_data_GPU ,n_elem*sizeof(double));
10 | 		cudaStat = cublasDcopy(handle, n_elem, rhs.memptr_GPU(),1, _data_GPU,1);
11 |        // Deallocate, allocate new space, copy values...
12 |     }
13 |     // 1.  Deallocate any memory that MyClass is using internally
14 |     // 2.  Allocate some memory to hold the contents of rhs
15 |     // 3.  Copy the values from rhs into this instance
16 |     // 4.  Return *this
17 | 	return *this;
18 | }
19 | 
20 | GPUMat& GPUMat::operator=(const GPUMat& rhs){
21 | 	// Check for self-assignment!
22 |     if (this != &rhs) {
23 | 		delete _data_CPU;
24 | 		cudaFree(_data_GPU);
25 | 		_data_CPU = (double *)malloc(rhs.n_elem * sizeof(double));
26 | 		double *p = rhs.memptr();
27 | 		for (int i = 0; i < n_elem; i++) {
28 | 			_data_CPU[i] = *(p+i);		
29 | 		}
30 | 		
31 | 		this->syncToGPU();
32 |        // Deallocate, allocate new space, copy values...
33 |     }
34 |     // 1.  Deallocate any memory that MyClass is using internally
35 |     // 2.  Allocate some memory to hold the contents of rhs
36 |     // 3.  Copy the values from rhs into this instance
37 |     // 4.  Return *this
38 | 	return *this;
39 | }
40 | 
41 | GPUMat& GPUMat::st(){
42 | 	this->transposeFlag=((this->transposeFlag==false)?true:false);
43 | 	return *this;
44 | }
45 | 
46 | GPUMat& GPUMat::operator+=(const GPUMat& rhs){
47 | 
48 | 	double scale = 1;
49 | 	stat=cublasDaxpy(handle,n_elem,&scale,rhs.memptr_GPU(),1,this->memptr_GPU,1);	
50 | 	return *this;
51 | }
52 | 
53 | 
54 | const GPUMat GPUMat::operator+(const GPUMat& rhs) const{
55 | 	GPUMat result = *this;     // Make a copy of myself.  Same as MyClass result(*this);
56 |     result += other;            // Use += to add other to the copy.
57 |     return result;              // All done!
58 | 
59 | }
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/GPUMat/caffe_references/GPUMat.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <cuda_runtime.h>
 3 | #include "cublas_v2.h"
 4 | 
 5 | class GPUMat{
 6 | private:
 7 | 	double* _data_CPU, _data_GPU;
 8 | 	
 9 | public:
10 | 	enum MemLocation {CPU_GPU, GPU_ONLY, CPU_ONLY}
11 | 	GPUMat();
12 | 	GPUMat(int row0, int col0);
13 | 	~GPUMat(){ 
14 | 		delete _data_CPU;
15 | 		cudaFree(_data_GPU);
16 | 		cublasDestroy(handle);
17 | 	}
18 | 	void syncToGPU(){
19 | 		if(!_data_CPU) _data_CPU = (double *) malloc(n_elems * sizeof(double));
20 | 		if( loc==CPU_ONLY) loc = CPU_GPU;
21 | 		cudaStat = cudaMalloc((void **)&_data_GPU ,n_elem*sizeof(double));
22 | 		stat = cublasSetMatrix (n_rows,n_clos, sizeof(double) ,_data_CPU,n_rows,_data_GPU ,n_cols); //a -> d_a
23 | 	}
24 | 	void syncToCPU(){
25 | 		if(!_data_CPU) _data_CPU = (double *) malloc(n_elems * sizeof(double));
26 | 		if( loc==CPU_ONLY) loc = CPU_GPU;
27 | 		stat = cublasGetMatrix (n_rows, n_cols, sizeof(double) ,_data_GPU ,n_rows,_data_CPU,n_rows);
28 | 	}
29 | 	void zeros();
30 | 	void ones();
31 | 	double* memptr_CPU(){return _data_CPU;}
32 | 	double* memptr_GPU(){return _data_CPU;}
33 | 	GPUMat& st();
34 | 	GPUMat& operator=(const GPUMat& rhs);
35 | 	GPUMat& operator+=(const GPUMat& rhs);
36 | 	GPUMat& operator-=(const GPUMat& rhs);
37 | 	GPUMat& operator*=(const GPUMat& rhs);
38 | 	GPUMat& operator*=(const double scal);
39 | 	GPUMat& operator%=(const GPUMat& rhs);
40 | 	const GPUMat operator+(const GPUMat& rhs) const;
41 | 	const GPUMat operator-(const GPUMat& rhs) const;
42 | 	const GPUMat operator*(const GPUMat& rhs) const;
43 | 	const GPUMat operator%(const GPUMat& rhs) const;
44 | 	
45 | 	int n_rows, n_cols, n_elems;
46 | 	bool transposeFlag;
47 | 	cudaError_t cudaStat ; // cudaMalloc status
48 | 	cublasStatus_t stat ; // CUBLAS functions status
49 | 	cublasHandle_t handle ; // CUBLAS context
50 | }
51 | 


--------------------------------------------------------------------------------
/GPUMat/caffe_references/device_alternate[1].hpp:
--------------------------------------------------------------------------------
  1 | #ifndef CAFFE_UTIL_DEVICE_ALTERNATE_H_
  2 | #define CAFFE_UTIL_DEVICE_ALTERNATE_H_
  3 | 
  4 | #ifdef CPU_ONLY  // CPU-only Caffe.
  5 | 
  6 | #include <vector>
  7 | 
  8 | // Stub out GPU calls as unavailable.
  9 | 
 10 | #define NO_GPU LOG(FATAL) << "Cannot use GPU in CPU-only Caffe: check mode."
 11 | 
 12 | #define STUB_GPU(classname) \
 13 | template <typename Dtype> \
 14 | void classname<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, \
 15 |     const vector<Blob<Dtype>*>& top) { NO_GPU; } \
 16 | template <typename Dtype> \
 17 | void classname<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, \
 18 |     const vector<bool>& propagate_down, \
 19 |     const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
 20 | 
 21 | #define STUB_GPU_FORWARD(classname, funcname) \
 22 | template <typename Dtype> \
 23 | void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& bottom, \
 24 |     const vector<Blob<Dtype>*>& top) { NO_GPU; } \
 25 | 
 26 | #define STUB_GPU_BACKWARD(classname, funcname) \
 27 | template <typename Dtype> \
 28 | void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
 29 |     const vector<bool>& propagate_down, \
 30 |     const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
 31 | 
 32 | #else  // Normal GPU + CPU Caffe.
 33 | 
 34 | #include <cublas_v2.h>
 35 | #include <cuda.h>
 36 | #include <cuda_runtime.h>
 37 | #include <curand.h>
 38 | #include <driver_types.h>  // cuda driver types
 39 | #ifdef USE_CUDNN  // cuDNN acceleration library.
 40 | #include "caffe/util/cudnn.hpp"
 41 | #endif
 42 | 
 43 | //
 44 | // CUDA macros
 45 | //
 46 | 
 47 | // CUDA: various checks for different function calls.
 48 | #define CUDA_CHECK(condition) \
 49 |   /* Code block avoids redefinition of cudaError_t error */ \
 50 |   do { \
 51 |     cudaError_t error = condition; \
 52 |     CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
 53 |   } while (0)
 54 | 
 55 | #define CUBLAS_CHECK(condition) \
 56 |   do { \
 57 |     cublasStatus_t status = condition; \
 58 |     CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) << " " \
 59 |       << caffe::cublasGetErrorString(status); \
 60 |   } while (0)
 61 | 
 62 | #define CURAND_CHECK(condition) \
 63 |   do { \
 64 |     curandStatus_t status = condition; \
 65 |     CHECK_EQ(status, CURAND_STATUS_SUCCESS) << " " \
 66 |       << caffe::curandGetErrorString(status); \
 67 |   } while (0)
 68 | 
 69 | // CUDA: grid stride looping
 70 | #define CUDA_KERNEL_LOOP(i, n) \
 71 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
 72 |        i < (n); \
 73 |        i += blockDim.x * gridDim.x)
 74 | 
 75 | // CUDA: check for error after kernel execution and exit loudly if there is one.
 76 | #define CUDA_POST_KERNEL_CHECK CUDA_CHECK(cudaPeekAtLastError())
 77 | 
 78 | namespace caffe {
 79 | 
 80 | // CUDA: library error reporting.
 81 | const char* cublasGetErrorString(cublasStatus_t error);
 82 | const char* curandGetErrorString(curandStatus_t error);
 83 | 
 84 | // CUDA: thread number configuration.
 85 | // Use 1024 threads per block, which requires cuda sm_2x or above,
 86 | // or fall back to attempt compatibility (best of luck to you).
 87 | #if __CUDA_ARCH__ >= 200
 88 |     const int CAFFE_CUDA_NUM_THREADS = 1024;
 89 | #else
 90 |     const int CAFFE_CUDA_NUM_THREADS = 512;
 91 | #endif
 92 | 
 93 | // CUDA: number of blocks for threads.
 94 | inline int CAFFE_GET_BLOCKS(const int N) {
 95 |   return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS;
 96 | }
 97 | 
 98 | }  // namespace caffe
 99 | 
100 | #endif  // CPU_ONLY
101 | 
102 | #endif  // CAFFE_UTIL_DEVICE_ALTERNATE_H_
103 | 


--------------------------------------------------------------------------------
/GPUMat/caffe_references/syncedmem[1].cpp:
--------------------------------------------------------------------------------
  1 | #include <cstring>
  2 | 
  3 | #include "caffe/common.hpp"
  4 | #include "caffe/syncedmem.hpp"
  5 | #include "caffe/util/math_functions.hpp"
  6 | 
  7 | namespace caffe {
  8 | 
  9 | SyncedMemory::~SyncedMemory() {
 10 |   if (cpu_ptr_ && own_cpu_data_) {
 11 |     CaffeFreeHost(cpu_ptr_);
 12 |   }
 13 | 
 14 | #ifndef CPU_ONLY
 15 |   if (gpu_ptr_) {
 16 |     CUDA_CHECK(cudaFree(gpu_ptr_));
 17 |   }
 18 | #endif  // CPU_ONLY
 19 | }
 20 | 
 21 | inline void SyncedMemory::to_cpu() {
 22 |   switch (head_) {
 23 |   case UNINITIALIZED:
 24 |     CaffeMallocHost(&cpu_ptr_, size_);
 25 |     caffe_memset(size_, 0, cpu_ptr_);
 26 |     head_ = HEAD_AT_CPU;
 27 |     own_cpu_data_ = true;
 28 |     break;
 29 |   case HEAD_AT_GPU:
 30 | #ifndef CPU_ONLY
 31 |     if (cpu_ptr_ == NULL) {
 32 |       CaffeMallocHost(&cpu_ptr_, size_);
 33 |       own_cpu_data_ = true;
 34 |     }
 35 |     caffe_gpu_memcpy(size_, gpu_ptr_, cpu_ptr_);
 36 |     head_ = SYNCED;
 37 | #else
 38 |     NO_GPU;
 39 | #endif
 40 |     break;
 41 |   case HEAD_AT_CPU:
 42 |   case SYNCED:
 43 |     break;
 44 |   }
 45 | }
 46 | 
 47 | inline void SyncedMemory::to_gpu() {
 48 | #ifndef CPU_ONLY
 49 |   switch (head_) {
 50 |   case UNINITIALIZED:
 51 |     CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
 52 |     caffe_gpu_memset(size_, 0, gpu_ptr_);
 53 |     head_ = HEAD_AT_GPU;
 54 |     break;
 55 |   case HEAD_AT_CPU:
 56 |     if (gpu_ptr_ == NULL) {
 57 |       CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
 58 |     }
 59 |     caffe_gpu_memcpy(size_, cpu_ptr_, gpu_ptr_);
 60 |     head_ = SYNCED;
 61 |     break;
 62 |   case HEAD_AT_GPU:
 63 |   case SYNCED:
 64 |     break;
 65 |   }
 66 | #else
 67 |   NO_GPU;
 68 | #endif
 69 | }
 70 | 
 71 | const void* SyncedMemory::cpu_data() {
 72 |   to_cpu();
 73 |   return (const void*)cpu_ptr_;
 74 | }
 75 | 
 76 | void SyncedMemory::set_cpu_data(void* data) {
 77 |   CHECK(data);
 78 |   if (own_cpu_data_) {
 79 |     CaffeFreeHost(cpu_ptr_);
 80 |   }
 81 |   cpu_ptr_ = data;
 82 |   head_ = HEAD_AT_CPU;
 83 |   own_cpu_data_ = false;
 84 | }
 85 | 
 86 | const void* SyncedMemory::gpu_data() {
 87 | #ifndef CPU_ONLY
 88 |   to_gpu();
 89 |   return (const void*)gpu_ptr_;
 90 | #else
 91 |   NO_GPU;
 92 | #endif
 93 | }
 94 | 
 95 | void* SyncedMemory::mutable_cpu_data() {
 96 |   to_cpu();
 97 |   head_ = HEAD_AT_CPU;
 98 |   return cpu_ptr_;
 99 | }
100 | 
101 | void* SyncedMemory::mutable_gpu_data() {
102 | #ifndef CPU_ONLY
103 |   to_gpu();
104 |   head_ = HEAD_AT_GPU;
105 |   return gpu_ptr_;
106 | #else
107 |   NO_GPU;
108 | #endif
109 | }
110 | 
111 | 
112 | }  // namespace caffe
113 | 
114 | 


--------------------------------------------------------------------------------
/GPUMat/cublastest/GPUMat.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <cuda_runtime.h>
 4 | #include "cublas_v2.h"
 5 | 
 6 | double vec_dot_vec(double *y, double *x, int n){
 7 | 
 8 | 
 9 | return 0;
10 | }
11 | 
12 | void mat_add_mat(const double *x, double *y, double scalar, int n){
13 | 	cudaError_t cudaStat ; // cudaMalloc status
14 | 	cublasStatus_t stat ; // CUBLAS functions status
15 | 	cublasHandle_t handle ; // CUBLAS context	
16 | // on the device
17 | 	double *d_x; // d_x - x on the device
18 | 	double *d_y; // d_y - y on the device
19 | 
20 | 	cudaStat = cudaMalloc (( void **)& d_x, n*sizeof(*x)); // device
21 | 	// memory alloc for x
22 | 	cudaStat = cudaMalloc (( void **)& d_y, n*sizeof(*y)); // device
23 | 	// memory alloc for y
24 | 	stat = cublasCreate (& handle ); // initialize CUBLAS context
25 | 	stat = cublasSetVector (n, sizeof (*x), x ,1 ,d_x, 1); // cp x- >d_x
26 | 	stat = cublasSetVector (n, sizeof (*y), y ,1 ,d_y, 1); // cp y- >d_y
27 | 
28 | 	stat=cublasDaxpy(handle,n,&scalar,d_x,1,d_y,1);
29 | 
30 | 
31 | 	cudaFree (d_x ); // free device memory
32 | 	cudaFree (d_y ); // free device memory
33 | 	cublasDestroy ( handle ); // destroy CUBLAS context
34 | 
35 | }
36 | 
37 | 
38 | void mat_prod_mat(const double* a, cublasOperation_t op_a, const double* b, cublasOperation_t op_b, double*c, int m, int n, int k){
39 | 
40 | 	cudaError_t cudaStat ; // cudaMalloc status
41 | 	cublasStatus_t stat ; // CUBLAS functions status
42 | 	cublasHandle_t handle ; // CUBLAS context
43 | 
44 | 	// on the device
45 | 	double* d_a; // d_a - a on the device
46 | 	double* d_b; // d_b - b on the device
47 | 	double* d_c; // d_c - c on the device
48 | 	cudaStat = cudaMalloc((void **)&d_a ,m*k*sizeof(*a)); // device
49 | 	// memory alloc for a
50 | 	cudaStat = cudaMalloc((void **)&d_b ,k*n*sizeof(*b)); // device
51 | 	// memory alloc for b
52 | 	cudaStat = cudaMalloc((void **)&d_c ,m*n*sizeof(*c)); // device
53 | 	// memory alloc for c
54 | 	stat = cublasCreate(&handle); // initialize CUBLAS context
55 | // copy matrices from the host to the device
56 | 	stat = cublasSetMatrix (m,k, sizeof(*a) ,a,m,d_a ,m); //a -> d_a
57 | 	stat = cublasSetMatrix (k,n, sizeof(*b) ,b,k,d_b ,k); //b -> d_b
58 | 	stat = cublasSetMatrix (m,n, sizeof(*c) ,c,m,d_c ,m); //c -> d_c
59 | 	double al=1.0;	
60 | 	double bet=1.0;
61 | // matrix - matrix multiplication : d_c = al*d_a *d_b + bet *d_c
62 | // d_a -mxk matrix , d_b -kxn matrix , d_c -mxn matrix ;
63 | // al ,bet -scalars
64 | 	stat=cublasDgemm(handle,op_a,op_b,m,n,k,&al,d_a,m,d_b,k,&bet,d_c,m);
65 | 	
66 | 	stat = cublasGetMatrix (m, n, sizeof(*c) ,d_c ,m,c,m); // cp d_c - >c
67 | 
68 | 	cudaFree (d_a ); // free device memory
69 | 	cudaFree (d_b ); // free device memory
70 | 	cudaFree (d_c ); // free device memory
71 | 	cublasDestroy ( handle ); // destroy CUBLAS context
72 | 
73 | }
74 | 


--------------------------------------------------------------------------------
/GPUMat/cublastest/GPUMat.h:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <cuda_runtime.h>
 4 | #include "cublas_v2.h"
 5 | 
 6 | 
 7 | 
 8 | namespace NeuralNet{
 9 | 	namespace GPUMat{
10 | 
11 | // multiply the vector d_x by the scalar al and add to d_y
12 | // d_y = al*d_x + d_y , d_x ,d_y - n- vectors ; al - scalar
13 | //		void vec_add_vec(double *y, double *x, double scalar, int n);		
14 | 		double vec_dot_vec(double *y, double *x, int n);
15 | //3.3.2
16 | 		void mat_prod_vec(double *mat, int mat_m, int mat_n, cublasOperation_t op, double *vec, double *result); 
17 | 
18 | 		
19 | 		void mat_prod_mat(double* a, cublasOperation_t op_a, double* b, cublasOperation_t op_b, double*c, int m, int n, int k);
20 | 
21 | 		
22 | 		void mat_add_mat(double* y, double* x, double scalar, int n); 
23 | 
24 | 		void mat_elem_prod_mat();
25 | 				
26 | 		double mat_norm2(double *x, int n);
27 | 
28 | 
29 | 		
30 | 	//	template<typename Func>
31 | 	//	transform(double *x, Func func, int n);
32 | 
33 | 	}
34 | }
35 | 


--------------------------------------------------------------------------------
/GPUMat/cublastest/Makefile:
--------------------------------------------------------------------------------
 1 | CPP = nvcc
 2 | CXXFLAGS = --std=c++11 -I/home/yuguangyang/Downloads/armadillo-5.100.2/include -DARMA_DONT_USE_WRAPPER
 3 | #nvcc mmul_1.cu -lcublas -lcurand -o mmul_1
 4 | LINKFLAGS = -lcublas -lcurand -lblas -llapack
 5 | 
 6 | OBJ = main.o 
 7 | 
 8 | all: test 
 9 | 
10 | test : $(OBJ)
11 | 	$(CPP) -o $@ $(OBJ) $(LINKFLAGS)
12 | 	
13 | main.o : main.cpp
14 | 	$(CPP) -c $(CXXFLAGS) $@ $^
15 | 	
16 | GPUMat.o : GPUMat.cpp
17 | 	$(CPP) -c $@ $^
18 | 
19 | #%.o : %.cpp
20 | #	$(CPP) -c $(CXXFLAGS) 
21 | 
22 | 
23 | clean:
24 | 	rm -f *.o *~
25 | 


--------------------------------------------------------------------------------
/GPUMat/cublastest/armatest/armatest.sdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/GPUMat/cublastest/armatest/armatest.sdf


--------------------------------------------------------------------------------
/GPUMat/cublastest/armatest/armatest.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 2013
 4 | VisualStudioVersion = 12.0.31101.0
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "armatest", "armatest\armatest.vcxproj", "{19FAFC51-3297-45A1-86A9-E5C33A38ECEA}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|Win32 = Debug|Win32
11 | 		Debug|x64 = Debug|x64
12 | 		Release|Win32 = Release|Win32
13 | 		Release|x64 = Release|x64
14 | 	EndGlobalSection
15 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | 		{19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Debug|Win32.ActiveCfg = Release|Win32
17 | 		{19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Debug|Win32.Build.0 = Release|Win32
18 | 		{19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Debug|x64.ActiveCfg = Debug|x64
19 | 		{19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Debug|x64.Build.0 = Debug|x64
20 | 		{19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Release|Win32.ActiveCfg = Release|Win32
21 | 		{19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Release|Win32.Build.0 = Release|Win32
22 | 		{19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Release|x64.ActiveCfg = Release|x64
23 | 		{19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Release|x64.Build.0 = Release|x64
24 | 	EndGlobalSection
25 | 	GlobalSection(SolutionProperties) = preSolution
26 | 		HideSolutionNode = FALSE
27 | 	EndGlobalSection
28 | EndGlobal
29 | 


--------------------------------------------------------------------------------
/GPUMat/cublastest/armatest/armatest/armatest.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="Source Files">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="Header Files">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="Resource Files">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="..\..\..\..\..\..\..\..\Downloads\armadillo-5.300.4\armadillo-5.300.4\examples\example1.cpp">
19 |       <Filter>Source Files</Filter>
20 |     </ClCompile>
21 |   </ItemGroup>
22 | </Project>


--------------------------------------------------------------------------------
/GPUMat/cublastest/armatest/armatest/armatest.vcxproj.user:
--------------------------------------------------------------------------------
1 | ﻿<?xml version="1.0" encoding="utf-8"?>
2 | <Project ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
4 |     <LocalDebuggerEnvironment>PATH=C:\Users\yuguangyang\Downloads\armadillo-5.300.4\armadillo-5.300.4\examples\lib_win64;%PATH%</LocalDebuggerEnvironment>
5 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
6 |   </PropertyGroup>
7 | </Project>


--------------------------------------------------------------------------------
/GPUMat/cublastest/armatest/x64/Debug/armatest.ilk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/GPUMat/cublastest/armatest/x64/Debug/armatest.ilk


--------------------------------------------------------------------------------
/GPUMat/cublastest/cublastest/cublastest.sdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/GPUMat/cublastest/cublastest/cublastest.sdf


--------------------------------------------------------------------------------
/GPUMat/cublastest/cublastest/cublastest.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 2013
 4 | VisualStudioVersion = 12.0.31101.0
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cublastest", "cublastest\cublastest.vcxproj", "{CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|Win32 = Debug|Win32
11 | 		Debug|x64 = Debug|x64
12 | 		Release|Win32 = Release|Win32
13 | 		Release|x64 = Release|x64
14 | 	EndGlobalSection
15 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | 		{CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Debug|Win32.ActiveCfg = Debug|Win32
17 | 		{CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Debug|Win32.Build.0 = Debug|Win32
18 | 		{CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Debug|x64.ActiveCfg = Debug|x64
19 | 		{CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Debug|x64.Build.0 = Debug|x64
20 | 		{CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Release|Win32.ActiveCfg = Release|Win32
21 | 		{CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Release|Win32.Build.0 = Release|Win32
22 | 		{CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Release|x64.ActiveCfg = Release|x64
23 | 		{CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Release|x64.Build.0 = Release|x64
24 | 	EndGlobalSection
25 | 	GlobalSection(SolutionProperties) = preSolution
26 | 		HideSolutionNode = FALSE
27 | 	EndGlobalSection
28 | EndGlobal
29 | 


--------------------------------------------------------------------------------
/GPUMat/cublastest/cublastest/x64/Debug/cublastest.ilk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/GPUMat/cublastest/cublastest/x64/Debug/cublastest.ilk


--------------------------------------------------------------------------------
/GPUMat/cublastest/main.cpp:
--------------------------------------------------------------------------------
 1 | //#include<armadillo>
 2 | #include <memory>
 3 | #include <cuda_runtime.h>
 4 | #include "cublas_v2.h"
 5 | #include <iostream>
 6 | //#include "GPUMat.h"
 7 | #define N 10
 8 | void mat_prod_mat(const double* a, cublasOperation_t op_a, const double* b, cublasOperation_t op_b, double*c, int m, int n, int k);
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | int main(){
15 | 
16 | #if 0
17 | //	arma::mat a;	
18 | 	arma::mat a(N, N, arma::fill::randu);
19 | 	arma::mat b(N, N, arma::fill::randu);
20 | 	arma::mat c = a * b;
21 | 	
22 | 	c.save("armaresult.txt",arma::raw_ascii);	
23 | 
24 | #endif
25 | 	double *a;
26 | 	double *b, *c;
27 | 	a = (double *)malloc(N*N*sizeof(double));
28 | 	b = (double *)malloc(N*N*sizeof(double));
29 | 	c = (double *)malloc(N*N*sizeof(double));
30 | 
31 | 	for (int i = 0; i < N*N; i++){
32 | 		a[i] = 1.0* i / (N*N);
33 | 		b[i] = -a[i];
34 | 	}
35 | 
36 | #if 1	
37 | 	mat_prod_mat(a, CUBLAS_OP_N, b, CUBLAS_OP_N, c, N, N, N);	
38 | 	for (int i = 0; i < N*N; i++){
39 | 		std::cout << c[i] << std::endl;
40 | 	}
41 | 	
42 | //	c.save("gpuresult.txt", arma::raw_ascii);
43 | //	double *aa = nullptr;
44 | //	double *bb = nullptr;
45 | //	double *cc = nullptr;
46 | 
47 | //	std::swap(aa,a.memptr());
48 | //	std::swap(bb,b.memptr());
49 | //	std::swap(cc,c.memptr());
50 | #endif
51 | 	return 0;
52 | }
53 | 
54 | void mat_prod_mat(const double* a, cublasOperation_t op_a, const double* b, cublasOperation_t op_b, double*c, int m, int n, int k){
55 | 
56 | 	cudaError_t cudaStat ; // cudaMalloc status
57 | 	cublasStatus_t stat ; // CUBLAS functions status
58 | 	cublasHandle_t handle ; // CUBLAS context
59 | 
60 | 	// on the device
61 | 	double* d_a; // d_a - a on the device
62 | 	double* d_b; // d_b - b on the device
63 | 	double* d_c; // d_c - c on the device
64 | 	cudaStat = cudaMalloc((void **)&d_a ,m*k*sizeof(*a)); // device
65 | 	// memory alloc for a
66 | 	cudaStat = cudaMalloc((void **)&d_b ,k*n*sizeof(*b)); // device
67 | 	// memory alloc for b
68 | 	cudaStat = cudaMalloc((void **)&d_c ,m*n*sizeof(*c)); // device
69 | 	// memory alloc for c
70 | 	stat = cublasCreate(&handle); // initialize CUBLAS context
71 | // copy matrices from the host to the device
72 | 	stat = cublasSetMatrix (m,k, sizeof(*a) ,a,m,d_a ,m); //a -> d_a
73 | 	stat = cublasSetMatrix (k,n, sizeof(*b) ,b,k,d_b ,k); //b -> d_b
74 | 	stat = cublasSetMatrix (m,n, sizeof(*c) ,c,m,d_c ,m); //c -> d_c
75 | 	double al=1.0;	
76 | 	double bet=0.0;
77 | // matrix - matrix multiplication : d_c = al*d_a *d_b + bet *d_c
78 | // d_a -mxk matrix , d_b -kxn matrix , d_c -mxn matrix ;
79 | // al ,bet -scalars
80 | 	stat=cublasDgemm(handle,op_a,op_b,m,n,k,&al,d_a,m,d_b,k,&bet,d_c,m);
81 | 	
82 | 	stat = cublasGetMatrix (m, n, sizeof(*c) ,d_c ,m,c,m); // cp d_c - >c
83 | 
84 | 	cudaFree (d_a ); // free device memory
85 | 	cudaFree (d_b ); // free device memory
86 | 	cudaFree (d_c ); // free device memory
87 | 	cublasDestroy ( handle ); // destroy CUBLAS context
88 | 
89 | }
90 | 


--------------------------------------------------------------------------------
/GPUMat/cublastest/main.cu:
--------------------------------------------------------------------------------
 1 | #include<armadillo>
 2 | #include "GPUMat.h"
 3 | 
 4 | 
 5 | #define N 10
 6 | using namespace NeuralNet;
 7 | 
 8 | 
 9 | int main(){
10 | 
11 | //	arma::mat a;	
12 | 	arma::mat a(N, N, arma::fill::randu);
13 | 	arma::mat b(N, N, arma::fill::randu);
14 | 	arma::mat c = a * b;
15 | 	
16 | 	c.save("armaresult.txt",arma::raw_ascii);	
17 | 
18 | 
19 | 	
20 | #if 0	
21 | 	GPUMat::mat_prod_mat(a.memptr(), CUBLAS_OP_N, b.memptr(), CUBLAS_OP_N, c.memptr(), N, N, N);	
22 | 
23 | 	
24 | 	c.save("gpuresult.txt", arma::raw_ascii);
25 | 	double *aa = nullptr;
26 | 	double *bb = nullptr;
27 | 	double *cc = nullptr;
28 | 
29 | 	std::swap(aa,a.memptr());
30 | 	std::swap(bb,b.memptr());
31 | 	std::swap(cc,c.memptr());
32 | #endif
33 | 	return 0;
34 | }
35 | 


--------------------------------------------------------------------------------
/GPUMat/cublastest/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/GPUMat/cublastest/test


--------------------------------------------------------------------------------
/GPUMat/device_common.cpp:
--------------------------------------------------------------------------------
  1 | #include "device_common.h"
  2 | 
  3 | std::shared_ptr<GPUEnv> GPUEnv::singleton_;
  4 | 
  5 | GPUEnv::GPUEnv()
  6 |     : cublas_handle_(NULL), curand_generator_(NULL){
  7 |   // Try to create a cublas handler, and report an error if failed (but we will
  8 |   // keep the program running as one might just want to run CPU code).
  9 |   if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
 10 |     std::cerr << "Cannot create Cublas handle. Cublas won't be available.";
 11 |   }
 12 | #if 0  
 13 |   // Try to create a curand handler.
 14 |   if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)
 15 |       != CURAND_STATUS_SUCCESS ||
 16 |       curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen())
 17 |       != CURAND_STATUS_SUCCESS) {
 18 |     std::cerr << "Cannot create Curand generator. Curand won't be available.";
 19 |   }
 20 | #endif  
 21 | }
 22 | 
 23 | GPUEnv::~GPUEnv() {
 24 |   if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
 25 | #if 0
 26 |   if (curand_generator_) {
 27 |     CURAND_CHECK(curandDestroyGenerator(curand_generator_));
 28 |   }
 29 | #endif  
 30 | }
 31 | 
 32 | 
 33 | void GPUEnv::DeviceQuery() {
 34 |   cudaDeviceProp prop;
 35 |   int device;
 36 |   if (cudaSuccess != cudaGetDevice(&device)) {
 37 |     printf("No cuda device present.\n");
 38 |     return;
 39 |   }
 40 |   CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
 41 |   ofstream os;
 42 |   os.open("GPU_info.log");
 43 |   os << "Device id:                     " << device;
 44 |   os << "Major revision number:         " << prop.major;
 45 |   os << "Minor revision number:         " << prop.minor;
 46 |   os << "Name:                          " << prop.name;
 47 |   os << "Total global memory:           " << prop.totalGlobalMem;
 48 |   os << "Total shared memory per block: " << prop.sharedMemPerBlock;
 49 |   os << "Total registers per block:     " << prop.regsPerBlock;
 50 |   os << "Warp size:                     " << prop.warpSize;
 51 |   os << "Maximum memory pitch:          " << prop.memPitch;
 52 |   os << "Maximum threads per block:     " << prop.maxThreadsPerBlock;
 53 |   os << "Maximum dimension of block:    "
 54 |       << prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", "
 55 |       << prop.maxThreadsDim[2];
 56 |   os << "Maximum dimension of grid:     "
 57 |       << prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", "
 58 |       << prop.maxGridSize[2];
 59 |   os << "Clock rate:                    " << prop.clockRate;
 60 |   os << "Total constant memory:         " << prop.totalConstMem;
 61 |   os << "Texture alignment:             " << prop.textureAlignment;
 62 |   os << "Concurrent copy and execution: "
 63 |       << (prop.deviceOverlap ? "Yes" : "No");
 64 |   os << "Number of multiprocessors:     " << prop.multiProcessorCount;
 65 |   os << "Kernel execution timeout:      "
 66 |       << (prop.kernelExecTimeoutEnabled ? "Yes" : "No");
 67 |   return;
 68 | }
 69 | 
 70 | #if 0
 71 | void GPUEnv::set_random_seed(const unsigned int seed) {
 72 |   // Curand seed
 73 |   static bool g_curand_availability_logged = false;
 74 |   if (Get().curand_generator_) {
 75 |     CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator(),
 76 |         seed));
 77 |     CURAND_CHECK(curandSetGeneratorOffset(curand_generator(), 0));
 78 |   } else {
 79 |     if (!g_curand_availability_logged) {
 80 |         std::cerr <<
 81 |             "Curand not available. Skipping setting the curand seed.";
 82 |         g_curand_availability_logged = true;
 83 |     }
 84 |   }
 85 |   // RNG seed
 86 |   Get().random_generator_.reset(new RNG(seed));
 87 | }
 88 | 
 89 | class GPUEnv::RNG::Generator {
 90 |  public:
 91 |   Generator() : rng_(new GPUEnv::rng_t(cluster_seedgen())) {}
 92 |   explicit Generator(unsigned int seed) : rng_(new GPUEnv::rng_t(seed)) {}
 93 |   GPUEnv::rng_t* rng() { return rng_.get(); }
 94 |  private:
 95 |   shared_ptr<GPUEnv::rng_t> rng_;
 96 | };
 97 | 
 98 | GPUEnv::RNG::RNG() : generator_(new Generator()) { }
 99 | 
100 | GPUEnv::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { }
101 | 
102 | GPUEnv::RNG& GPUEnv::RNG::operator=(const RNG& other) {
103 |   generator_.reset(other.generator_.get());
104 |   return *this;
105 | }
106 | 
107 | void* GPUEnv::RNG::generator() {
108 |   return static_cast<void*>(generator_->rng());
109 | }
110 | #endif
111 | 


--------------------------------------------------------------------------------
/GPUMat/gpumat/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>gpumat</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
10 | 			<triggers>clean,full,incremental,</triggers>
11 | 			<arguments>
12 | 			</arguments>
13 | 		</buildCommand>
14 | 		<buildCommand>
15 | 			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
16 | 			<triggers>full,incremental,</triggers>
17 | 			<arguments>
18 | 			</arguments>
19 | 		</buildCommand>
20 | 	</buildSpec>
21 | 	<natures>
22 | 		<nature>org.eclipse.cdt.core.cnature</nature>
23 | 		<nature>org.eclipse.cdt.core.ccnature</nature>
24 | 		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
25 | 		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
26 | 	</natures>
27 | </projectDescription>
28 | 


--------------------------------------------------------------------------------
/GPUMat/main.cu:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | int main(){
4 | 
5 | 	return 0;
6 | }


--------------------------------------------------------------------------------
/GPUMat/test_GPUMat.cpp:
--------------------------------------------------------------------------------
 1 | #include "device_common.h"
 2 | #include "GPUMat.h"
 3 | #include "gtest/gtest.h"
 4 | 
 5 | TEST(GPUMATTest, selfAdd){
 6 | 	GPUMat g1(5,5);
 7 | 	GPUMat g2(5,5);
 8 | 	
 9 | }
10 | 
11 | int main(){
12 | 
13 | 	GPUEnv::GetInstance();
14 | 
15 | 	GPUMat g(5,5);
16 | 
17 | 	g.ones();
18 | 	g.print();
19 | 
20 | 
21 | 	return 0;
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/LSTM/LSTMLayer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <memory>
 3 | #include <armadillo>
 4 | #include <iostream>
 5 | #include <vector>
 6 | #include "BaseLayer.h"
 7 | #include "ElementwiseLayer.h"
 8 | #include "LinearAdditionLayer.h"
 9 | #include "ActivationLayer.h"
10 | #include "BaseLayer_LSTM.h"
11 | 
12 | namespace NeuralNet {
13 | 
14 |     class RNN_LSTM {
15 |         
16 |     public:
17 |         RNN_LSTM(int numHiddenLayers0, int hiddenLayerInputDim0,
18 |         int hiddenLayerOutputDim0, int inputDim0, int outputDim0, 
19 |         std::shared_ptr<arma::mat> trainingX0, std::shared_ptr<arma::mat> trainingY0);
20 |         void forward();
21 |         void backward();
22 |         void train();
23 |         void savePara(std::string filename); // try to save all the parameters in the LSTM for further use
24 |         void test();
25 |         void calNumericGrad();
26 |         BaseLayer_LSTM* getOutputLayer(){ return netOutputLayer;}
27 |     private:
28 |         std::vector<BaseLayer_LSTM> inGateLayers, forgetGateLayers, outputGateLayers, informationLayers, cellStateLayers;
29 |         std::vector<ElementwiseLayer> outputElementLayers, forgetElementGateLayers, inputElementGateLayers;
30 |         std::vector<LinearAdditionLayer> cellLinearAdditionLayers;
31 |         std::vector<ActivationLayer> cellStateActivationLayers;
32 | //        std::vector<BaseLayer> layerOutput_prev, cellState_prev;
33 |         BaseLayer_LSTM* netOutputLayer;
34 |         std::shared_ptr<arma::mat> trainingY, trainingX;
35 |         int numHiddenLayers, hiddenLayerInputDim, hiddenLayerOutputDim;
36 |         int rnnInputDim, rnnOutputDim;
37 |         
38 | 
39 |     };
40 | 
41 | }
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/LSTM/Makefile:
--------------------------------------------------------------------------------
 1 | DEEPLEARNING_PATH=-L/home/yuguangyang/Dropbox/DeepLearningPackage/YangCopy/DeepLearning/src/lib
 2 | DEEPLEARNING_INCLUDE=-I/home/yuguangyang/Dropbox/DeepLearningPackage/YangCopy/DeepLearning/include
 3 | MKL_INCLUDE = /opt/intel/composerxe/mkl/include
 4 | 
 5 | #CFLAGS = -std=c++0x -I/g/ssli/software/pkgs/boost_1_49_0 -c -DNDEBUG -D__LINUX $(OPTFLAGS) 
 6 | CPP = g++
 7 | ARMA_INCLUDE=-I/home/yuguangyang/Downloads/armadillo-5.100.2/include
 8 | ARMA_LINKFLAGS=-L/usr/lib -llapack -lblas
 9 | #MKLLINKFLAGS = -Wl,--start-group -Wl,--end-group -lpthread
10 | CXXFLAGS = -std=c++0x $(ARMA_INCLUDE) $(DEEPLEARNING_INCLUDE) -I/opt/boost/boost_1_57_0 -c -D__LINUX -DDEBUG -g3 -DARMA_DONT_USE_WRAPPER -I/usr/local/include
11 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program -march=native
12 | #LINKFLAGS = -static $(LINKOPTFLAGS) $(MKLLINKFLAGS) -ldl
13 | LINK_DL_FLAGS =   $(DEEPLEARNING_PATH) -L/usr/local/lib  $(ARMA_LINKFLAGS) -ldeeplearning -ldl #$(LINKOPTFLAGS)
14 | #LINKFLAGS = 
15 | #ODIR=obj
16 | ODIR =
17 | 
18 | OBJ = main.o LSTMLayer.o
19 | 
20 | test : $(OBJ)
21 | 	$(CPP) -o test $(OBJ) $(LINK_DL_FLAGS)
22 | 
23 | LSTMLayer.o: LSTMLayer.cpp
24 | 	$(CPP) -c $(CXXFLAGS) $<
25 | 	
26 | 
27 | clean:
28 | 	rm -f *.o *~
29 | 


--------------------------------------------------------------------------------
/LSTM/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/LSTM/test


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CPP = g++
 2 | ARMA_INCLUDE=-I/home/yuguangyang/Downloads/armadillo-5.100.2/include
 3 | ARMA_LINKFLAGS=-llapack -lblas
 4 | CXXFLAGS = -std=c++0x $(ARMA_INCLUDE) -I./include -I/usr/local/include -I/opt/boost/boost_1_57_0 -D__LINUX -DARMA_DONT_USE_WRAAPER
 5 | DEBUGFLAG=-DDEBUG -g3
 6 | RELEASEFLAG= -O3 -march=native -DARMA_NO_DEBUG
 7 | CXXFLAGS += $(RELEASEFLAG)
 8 | SRCS1 = $(wildcard src/*.cpp)
 9 | OBJ1 = $(SRCS1:%.cpp=%.o)
10 | SRCS2 = $(wildcard src/*.cc)
11 | OBJ2 = $(SRCS2:%.cc=%.o)
12 | #SRCS3=$(wildcard src/*.c)
13 | #OBJ3 = $(SRCS3:.c=.o)
14 | OBJ = $(OBJ1) $(OBJ2) $(OBJ3)
15 | 
16 | 
17 | # Specify extensions of files to delete when cleaning
18 | CLEANEXTS   = o a 
19 | 
20 | # Specify the target file and the install directory
21 | OUTPUTFILE  = libdeeplearning.a
22 | INSTALLDIR  = src/lib
23 | 
24 | $(OUTPUTFILE) : $(OBJ)
25 | 	ar ru $@ $^
26 | 	ranlib $@
27 | 
28 | %.o : src/%.cpp
29 | 	$(CPP) -c $(CXXFLAGS) $^
30 | 	
31 | %.o : src/%.cc
32 | 	$(CPP) -c $(CXXFLAGS) $^
33 | 
34 | 
35 | listfile:
36 | 	echo $(OBJ)
37 | 
38 | clean:
39 | 	for file in $(CLEANEXTS); do rm -f src/*.$$file; done
40 | 	
41 | install:
42 | 	mkdir -p $(INSTALLDIR)
43 | 	cp -p $(OUTPUTFILE) $(INSTALLDIR)
44 | 


--------------------------------------------------------------------------------
/MatArray/Makefile:
--------------------------------------------------------------------------------
 1 | # this make file now is dynamically linking
 2 | #OPTFLAGS = -O3 -march=nocona -mfpmath=sse -msse3 -Wuninitialized -flto
 3 | #MKL_INCLUDE = /opt/intel/mkl/include/
 4 | #MKL_INCLUDE = /opt/intel/composer_xe_2013.0.079/mkl/include/
 5 | MKL_INCLUDE = /opt/intel/composerxe/mkl/include
 6 | 
 7 | #CFLAGS = -std=c++0x -I/g/ssli/software/pkgs/boost_1_49_0 -c -DNDEBUG -D__LINUX $(OPTFLAGS) 
 8 | CPP = g++
 9 | #MKLROOT = /opt/intel/mkl
10 | #MKLROOT = /opt/intel/composer_xe_2013.0.079/mkl
11 | MKLROOT = /opt/intel/composerxe/mkl
12 | MKLLINKFLAGS = -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread
13 | MKL_DL_LINKFLAGS =  -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread -lm
14 | ARMA_INCLUDE=-I~/Downloads/armadillo-5.100.2/include/armadillo
15 | ARMA_LINKFLAGS=-L/usr/lib -L/opt/intel/mkl/lib/intel64 -larmadillo -lmkl_rt -llapack -lopenblas
16 | #MKLLINKFLAGS = -Wl,--start-group -Wl,--end-group -lpthread
17 | CXXFLAGS = -std=c++0x -I$(MKL_INCLUDE) $(ARMA_INCLUDE) -I/opt/boost/boost_1_57_0 -c -DDEBUG -D__LINUX -g3 
18 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program
19 | #LINKFLAGS = -static $(LINKOPTFLAGS) $(MKLLINKFLAGS) -ldl
20 | LINK_DL_FLAGS =   $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldl
21 | #LINKFLAGS = 
22 | #ODIR=obj
23 | ODIR =
24 | 
25 | OBJ = main.o
26 | 
27 | test : $(OBJ)
28 | 	$(CPP) -o test $(OBJ) $(LINK_DL_FLAGS)
29 | 	
30 | #%.o : %.cpp
31 | #	$(CPP) -c $(CXXFLAGS) 
32 | 
33 | 
34 | clean:
35 | 	rm -f *.o *~
36 | 	
37 | build-tests:
38 | 	
39 | 


--------------------------------------------------------------------------------
/MatArray/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <fstream>
  3 | #include <string>
  4 | #include <cstdlib>
  5 | #include <cstdio>
  6 | #include <memory>
  7 | #include <armadillo>
  8 | 
  9 | #include "MatArray.h"
 10 | 
 11 | 
 12 | 
 13 | int main(int argc, char *argv[]) {
 14 |     MatArray<double>::Mat1DArray_ptr matArr = MatArray<double>::build(5);
 15 | 
 16 |     for (int i = 0 ; i < 5; i++) {
 17 |         (*matArr)[i].randu(5,5);
 18 |         (*matArr)[i].print("1D");
 19 |     }
 20 | 
 21 |     MatArray<double>::Mat2DArray_ptr mat2DArr = MatArray<double>::build(2,2);
 22 | 
 23 |     for (int i = 0 ; i < 2; i++) {
 24 |         for (int j = 0; j < 2; j++) {
 25 |             (*mat2DArr)[i][j].randu(5,5);
 26 |             (*mat2DArr)[i][j].print("2D");
 27 |         }
 28 |     }
 29 | 
 30 | //  here I try to test Tensor_4D
 31 |     Tensor_4D tensor(2,3,4,5);
 32 |     
 33 |     assert(2==tensor.dim1());
 34 |     assert(3==tensor.dim2());
 35 |     assert(4==tensor.dim3());
 36 |     assert(5==tensor.dim4());
 37 |     assert(120==tensor.size());
 38 |     
 39 |     tensor.fill_randn();
 40 |     tensor.print();
 41 |     tensor.fill_zeros();
 42 |     tensor.print();
 43 |     
 44 |     arma::vec v(20,arma::fill::randn);
 45 |     Tensor_4D tensor2(v.memptr(), 20, 1,1,4,5);
 46 |     assert(1==tensor2.dim1());
 47 |     assert(1==tensor2.dim2());
 48 |     assert(4==tensor2.dim3());
 49 |     assert(5==tensor2.dim4());
 50 |     assert(20==tensor2.size());
 51 |     
 52 |     v.print("arma::v");
 53 |     tensor2.print();
 54 |     
 55 |     Tensor_4D tensor3(v.memptr(), 20, 1,1,4,5,true);
 56 |     tensor3.fill_zeros();
 57 |     v.print("arma::v");
 58 |     
 59 |     tensor2.substract(tensor3,1.0);
 60 |     tensor2.print();
 61 |     
 62 |     tensor3.substract(tensor2,1.0);
 63 |     tensor3.print();
 64 |     
 65 |     Tensor_4D t4(1,2,3,4);
 66 |     int count = 0;
 67 |     for (int i = 0; i < t4.dim4(); i++){
 68 |         for (int j = 0; j < t4.dim3(); j++){
 69 |             for (int k = 0; k < t4.dim2(); k++){
 70 |                 for (int m = 0; m < t4.dim1(); m++){
 71 |                     t4(m,k,j,i) = count++;
 72 |                 }
 73 |             }
 74 |         
 75 |         }
 76 |     }
 77 |     
 78 |     
 79 |     arma::vec v2(t4.getPtr(),t4.size());
 80 |     
 81 |     
 82 |     t4.print();
 83 |     
 84 |     for (int i= 0; i < t4.size(); i++)
 85 |         t4(i) -= i;
 86 |     
 87 |     t4.print();
 88 |     
 89 |     v2.print("arma::v2");
 90 |     
 91 |     
 92 |     Tensor_4D t5(2,2,2,2);
 93 |     
 94 |     t5.fill_randu();
 95 |     
 96 |     
 97 |     t5.print();
 98 |     
 99 |     t5.transform([](double val){return val-0.5;});
100 |     
101 |     t5.print();
102 |     
103 |     
104 |     
105 |     
106 |     
107 |     
108 | 
109 | }
110 | 
111 | 


--------------------------------------------------------------------------------
/PoolLayer/Makefile:
--------------------------------------------------------------------------------
 1 | # this make file now is dynamically linking
 2 | #OPTFLAGS = -O3 -march=nocona -mfpmath=sse -msse3 -Wuninitialized -flto
 3 | #MKL_INCLUDE = /opt/intel/mkl/include/
 4 | #MKL_INCLUDE = /opt/intel/composer_xe_2013.0.079/mkl/include/
 5 | MKL_INCLUDE = /opt/intel/composerxe/mkl/include
 6 | 
 7 | #CFLAGS = -std=c++0x -I/g/ssli/software/pkgs/boost_1_49_0 -c -DNDEBUG -D__LINUX $(OPTFLAGS) 
 8 | CPP = g++
 9 | #MKLROOT = /opt/intel/mkl
10 | #MKLROOT = /opt/intel/composer_xe_2013.0.079/mkl
11 | MKLROOT = /opt/intel/composerxe/mkl
12 | MKLLINKFLAGS = -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread
13 | MKL_DL_LINKFLAGS =  -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread -lm
14 | ARMA_INCLUDE=-I~/Downloads/armadillo-5.100.2/include/armadillo
15 | ARMA_LINKFLAGS=-L/usr/lib -L/opt/intel/mkl/lib/intel64 -larmadillo -lmkl_rt -llapack -lopenblas
16 | #MKLLINKFLAGS = -Wl,--start-group -Wl,--end-group -lpthread
17 | CXXFLAGS = -std=c++0x -I$(MKL_INCLUDE) $(ARMA_INCLUDE) -I/opt/boost/boost_1_57_0 -c -DDEBUG -D__LINUX -g3 
18 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program
19 | #LINKFLAGS = -static $(LINKOPTFLAGS) $(MKLLINKFLAGS) -ldl
20 | LINK_DL_FLAGS =   $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldl
21 | #LINKFLAGS = 
22 | #ODIR=obj
23 | ODIR =
24 | 
25 | OBJ = main.o PoolLayer.o
26 | 
27 | test : $(OBJ)
28 | 	$(CPP) -o test $(OBJ) $(LINK_DL_FLAGS)
29 | 	
30 | #%.o : %.cpp
31 | #	$(CPP) -c $(CXXFLAGS) 
32 | 
33 | 
34 | clean:
35 | 	rm -f *.o *~
36 | 


--------------------------------------------------------------------------------
/PoolLayer/PoolLayer.cpp:
--------------------------------------------------------------------------------
 1 | #include "PoolLayer.h"
 2 | using namespace NeuralNet;
 3 | 
 4 | PoolLayer::PoolLayer(int poolDim0_x, int poolDim0_y, Type type0) {
 5 |     poolDim_x = poolDim0_x;
 6 |     poolDim_y = poolDim0_y;
 7 |     type = type0;
 8 | }
 9 | 
10 | void PoolLayer::setInputDim(int inputDim0_x, int inputDim0_y, int inputDim0_z){
11 | 
12 |     inputDim_x = inputDim0_x;
13 |     inputDim_y = inputDim0_y;
14 |     inputDim_z = inputDim0_z;
15 |     inputSize = inputDim_x * inputDim_y * inputDim_z;
16 |     outputDim_x = inputDim0_x / poolDim_x;
17 |     outputDim_y = inputDim0_y / poolDim_y;
18 |     outputDim_z = inputDim0_z;
19 |     outputSize = outputDim_x * outputDim_y * outputDim_z;
20 | }
21 | 
22 | void PoolLayer::activateUp(std::shared_ptr<arma::cube> input0) {
23 |     input = input0;
24 |     int maxIdx1, maxIdx2;
25 |     int inputInstance = input->n_slices / inputDim_z;    
26 |     output = std::make_shared<arma::cube>(outputDim_x,outputDim_y, outputDim_z*inputInstance,arma::fill::zeros);
27 |     maxIdx_x = std::make_shared<arma::Cube<int>>(outputDim_x,outputDim_y, outputDim_z*inputInstance);
28 |     maxIdx_y = std::make_shared<arma::Cube<int>>(outputDim_x,outputDim_y, outputDim_z*inputInstance);
29 | 
30 |     if (type == mean) {
31 |         for (int d = 0; d < outputDim_z * inputInstance; d++) {
32 |             for (int i = 0; i < outputDim_x; i++) {
33 |                 for (int j = 0; j < outputDim_y; j++) {
34 |                     for (int m = i * poolDim_x; m < (i + 1) * poolDim_x; m++) {
35 |                         for (int n = j * poolDim_y; n < (j + 1) * poolDim_y; n++) {
36 |                             (*output)(i,j,d) += (*input)(m,n,d);
37 |                         }
38 |                     }
39 |                 }
40 |             }
41 |             (*output).slice(d) /= (1.0 * poolDim_x * poolDim_y);
42 |         }
43 |     } else if (type == max) {
44 |         (*output).zeros();
45 |         for (int d = 0; d < outputDim_z * inputInstance; d++) {
46 |             for (int i = 0; i < outputDim_x; i++) {
47 |                 for (int j = 0; j < outputDim_y; j++) {
48 |                     double maxtemp = 0.0;
49 |                     maxIdx1 = 0;
50 |                     maxIdx2 = 0;
51 |                     for (int m = i * poolDim_x; m < (i + 1) * poolDim_x; m++) {
52 |                         for (int n = j * poolDim_y; n < (j + 1) * poolDim_y; n++) {
53 |                             if (maxtemp < (*input)(m,n,d) ) {
54 |                                 maxtemp = (*input)(m,n,d);
55 |                                 maxIdx1 = m;
56 |                                 maxIdx2 = n;
57 |                             }
58 |                         }
59 |                     }
60 |                     (*output)(i,j,d) = maxtemp;
61 |                     (*maxIdx_x)(i,j,d) = maxIdx1;
62 |                     (*maxIdx_y)(i,j,d) = maxIdx2;
63 |                 }
64 |             }
65 |         }
66 |     }
67 | }
68 | 
69 | void PoolLayer::upSampling(std::shared_ptr<arma::cube> delta_in) {
70 |     int inputInstance = delta_in->n_slices / inputDim_z;
71 |     delta_out = std::make_shared<arma::cube>(inputDim_x,inputDim_y, inputDim_z * inputInstance, arma::fill::zeros);
72 |     if (type == mean) {
73 |         for (int d = 0; d < inputDim_z * inputInstance; d++) {
74 |             for (int i = 0; i < inputDim_x; i++) {
75 |                 for (int j = 0; j < inputDim_y; j++) {
76 |                     (*delta_out)(i,j,d) = (*delta_in)(i/poolDim_x,j/poolDim_y,d);
77 |                 }
78 |             }
79 |         }
80 |         (*delta_out) /= (1.0 * poolDim_x * poolDim_y);
81 |     } else if(type == max) {
82 |         for (int d = 0; d < outputDim_z * inputInstance; d++) {
83 |             for (int i = 0; i < outputDim_x; i++) {
84 |                 for (int j = 0; j < outputDim_y; j++) {
85 |                     (*delta_out)((*maxIdx_x)(i,j,d),(*maxIdx_y)(i,j,d),d) = (*delta_in)(i,j,d);             
86 |                 }
87 |             }
88 |         }
89 |     }
90 | 
91 | }


--------------------------------------------------------------------------------
/PoolLayer/PoolLayer.h:
--------------------------------------------------------------------------------
 1 | #include <memory>
 2 | #include <armadillo>
 3 | #include "../MatArray/MatArray.h"
 4 | 
 5 | namespace NeuralNet{
 6 | 
 7 | struct PoolLayer {
 8 |     enum Type { mean, max};
 9 |     PoolLayer() {}
10 |     PoolLayer(int poolDim_x, int poolDim_y, Type type0);
11 |     void setInputDim(int, int, int);
12 |     void activateUp(std::shared_ptr<arma::cube> input0);
13 |     void upSampling(std::shared_ptr<arma::cube> detla_in);
14 |     std::shared_ptr<arma::cube> input;
15 |     std::shared_ptr<arma::cube> output;
16 |     std::shared_ptr<arma::Cube<int>> maxIdx_x, maxIdx_y;
17 |     std::shared_ptr<arma::cube> detla_in;
18 |     std::shared_ptr<arma::cube> delta_out;
19 |     Type type;
20 |     int poolDim_x, poolDim_y;
21 |     int inputDim_x;
22 |     int inputDim_y;
23 |     int inputDim_z;
24 |     int outputDim_x, outputDim_y, outputDim_z;
25 |     int inputSize, outputSize;
26 | };
27 | 
28 | }


--------------------------------------------------------------------------------
/PoolLayer/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <fstream>
  3 | #include <string>
  4 | #include <cstdlib>
  5 | #include <cstdio>
  6 | #include <memory>
  7 | #include <armadillo>
  8 | #include <vector>
  9 | #include "PoolLayer.h"
 10 | #include "../MatArray/MatArray.h"
 11 | 
 12 | void loadData_MNIST(std::shared_ptr<arma::mat> X,
 13 |                     std::shared_ptr<arma::mat> Y);
 14 | 
 15 | int main(int argc, char *argv[]) {
 16 |     std::shared_ptr<arma::mat> DataX(new arma::mat);
 17 |     std::shared_ptr<arma::mat> DataY(new arma::mat);
 18 |     std::shared_ptr<arma::mat> trainDataX(new arma::mat);
 19 |     std::shared_ptr<arma::mat> trainDataY(new arma::mat);
 20 |     std::shared_ptr<arma::mat> testDataX(new arma::mat);
 21 |     std::shared_ptr<arma::mat> testDataY(new arma::mat);
 22 |     std::shared_ptr<arma::mat> ValidationDataX(new arma::mat);
 23 |     std::shared_ptr<arma::mat> ValidationDataY(new arma::mat);
 24 | 
 25 |     loadData_MNIST(DataX,DataY);
 26 | 
 27 |     int ntrain = 2;
 28 |     int ntest = 100;
 29 | //  now I split data into train, test, and validation
 30 |     trainDataX = std::make_shared<arma::mat>(DataX->rows(0,ntrain-1));
 31 |     trainDataY = std::make_shared<arma::mat>(DataY->rows(0,ntrain-1));
 32 |     testDataX = std::make_shared<arma::mat>(DataX->rows(ntrain,ntrain+ntest-1));
 33 |     testDataY = std::make_shared<arma::mat>(DataY->rows(ntrain,ntrain+ntest-1));
 34 | 
 35 |     std::shared_ptr<arma::cube> trainDataX2D(new arma::cube(28,28,ntrain));
 36 |     MatArray<double>::Mat1DArray_ptr trainDataX2D2 = MatArray<double>::build(ntrain);
 37 | 
 38 |     for (int i = 0 ; i < ntrain; i++) {
 39 |         (*trainDataX2D2)[i].set_size(28,28);
 40 |         for(int j = 0; j < 28; j++) {
 41 |             for( int k = 0; k < 28; k++) {
 42 |                 (*trainDataX2D)(j,k,i) = trainDataX->at(i,28*j+k);
 43 |                 (*trainDataX2D2)[i](j,k) = trainDataX->at(i,28*j+k);
 44 |             }
 45 |         }
 46 |         (*trainDataX2D2)[i].print();
 47 |     }
 48 | 
 49 |     trainDataX2D->save("cube.dat",arma::raw_ascii);
 50 |     DataX.reset();
 51 |     DataY.reset();
 52 | 
 53 |     PoolLayer pl(4,4, PoolLayer::mean, trainDataX2D);
 54 |     pl.activateUp();
 55 | //  pl.outputX->save("outputcube_mean.dat", arma::raw_ascii);
 56 |     /*
 57 |       int inputDim = trainDataX->n_cols;
 58 |       int outputDim = trainDataY->n_cols;
 59 |       std::cout << inputDim << std::endl;
 60 |       std::cout << outputDim << std::endl;
 61 |       std::cout << trainDataX->n_rows << std::endl;
 62 |       std::cout << trainDataY->n_rows << std::endl;
 63 | 
 64 | 
 65 |       int numLayers = 2;
 66 |       std::vector<int> dimensions;
 67 | 
 68 |       dimensions.push_back(784);
 69 |       dimensions.push_back(100);
 70 |       dimensions.push_back(50);
 71 | 
 72 |       bool trainFlag = true;
 73 |       bool testFlag = false;
 74 |       RBM::PreTrainPara trainingPara(1e-6, 10, 10, 0.1);
 75 |       trainingPara.print();
 76 |       std::string filename = "pretrain";
 77 |       std::shared_ptr<arma::umat> trainDataXBin(new arma::umat(trainDataX->n_rows,trainDataX->n_cols));
 78 |       *trainDataXBin = (*trainDataX) < 0.5;
 79 |       StackedRBM SRbm(numLayers, dimensions, trainDataXBin, trainingPara);
 80 | 
 81 |       if (trainFlag) {
 82 |         SRbm.preTrain(filename);
 83 |       }
 84 |     /*
 85 |       if (testFlag){
 86 |           if (!trainFlag) rbm.loadTrainResult(filename);
 87 |           testDataX->save("testSample.dat",arma::raw_ascii);
 88 |           rbm.TestViaReconstruct(testDataX);
 89 |       }
 90 |     */
 91 | 
 92 | }
 93 | 
 94 | 
 95 | void loadData_MNIST(std::shared_ptr<arma::mat> X,
 96 |                     std::shared_ptr<arma::mat> Y) {
 97 | 
 98 |     std::string filename_base("../MNIST/data");
 99 |     std::string filename;
100 |     char tag[50];
101 |     char x;
102 |     int count;
103 |     int numFiles = 10;
104 |     int featSize = 28*28;
105 |     int labelSize = 10;
106 |     int numSamples = 1000;
107 |     X->set_size(numFiles*numSamples,featSize);
108 |     Y->set_size(numFiles*numSamples,labelSize);
109 |     Y->fill(0);
110 | 
111 | 
112 |     for (int i = 0 ; i < numFiles ; i++) {
113 |         sprintf(tag,"%d",i);
114 |         filename=filename_base+(std::string)tag;
115 |         std::cout << filename << std::endl;
116 |         std::ifstream infile;
117 |         infile.open(filename,std::ios::binary | std::ios::in);
118 |         if (infile.is_open()) {
119 | 
120 |             for (int j = 0 ; j < numSamples ; j++) {
121 | 
122 |                 for (int k =0 ; k <featSize; k ++) {
123 |                     infile.read(&x,1);
124 | //        std::cout << x << std::endl;
125 |                     (*X)(i+numFiles*j,k)=((unsigned char)x)/256.0;
126 | 
127 |                 }
128 |                 (*Y)(i+numFiles*j,i) = 1;
129 | //        count++;
130 |             }
131 | 
132 |         } else {
133 |             std::cout << "open file failure!" << std::endl;
134 |         }
135 | 
136 | // for (int j = 0 ; j < numSamples ; j++){
137 | //       for (int k =0 ; k <featSize; k ++){
138 | 
139 | //	           std::cout << x << std::endl;
140 | //	   std::cout<<  (*X)(j,k) << " ";
141 | //	   }
142 | //	   }
143 | 
144 |         std::cout << "dataloading finish!" <<std::endl;
145 | 
146 |     }
147 | 
148 | }
149 | 


--------------------------------------------------------------------------------
/RBM/Makefile:
--------------------------------------------------------------------------------
 1 | # this make file now is dynamically linking
 2 | #OPTFLAGS = -O3 -march=nocona -mfpmath=sse -msse3 -Wuninitialized -flto
 3 | #MKL_INCLUDE = /opt/intel/mkl/include/
 4 | #MKL_INCLUDE = /opt/intel/composer_xe_2013.0.079/mkl/include/
 5 | MKL_INCLUDE = /opt/intel/composerxe/mkl/include
 6 | 
 7 | #CFLAGS = -std=c++0x -I/g/ssli/software/pkgs/boost_1_49_0 -c -DNDEBUG -D__LINUX $(OPTFLAGS) 
 8 | CPP = g++
 9 | #MKLROOT = /opt/intel/mkl
10 | #MKLROOT = /opt/intel/composer_xe_2013.0.079/mkl
11 | MKLROOT = /opt/intel/composerxe/mkl
12 | MKLLINKFLAGS = -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread
13 | MKL_DL_LINKFLAGS =  -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread -lm
14 | ARMA_INCLUDE=-I~/Downloads/armadillo-5.100.2/include/armadillo
15 | ARMA_LINKFLAGS=-L/usr/lib -L/opt/intel/mkl/lib/intel64 -larmadillo -lmkl_rt -llapack -lopenblas
16 | #MKLLINKFLAGS = -Wl,--start-group -Wl,--end-group -lpthread
17 | CXXFLAGS = -std=c++0x -I$(MKL_INCLUDE) $(ARMA_INCLUDE) -I/opt/boost/boost_1_57_0 -c -DDEBUG -D__LINUX -g3 
18 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program -march=native
19 | #LINKFLAGS = -static $(LINKOPTFLAGS) $(MKLLINKFLAGS) -ldl
20 | LINK_DL_FLAGS =   $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldl
21 | #LINKFLAGS = 
22 | #ODIR=obj
23 | ODIR =
24 | 
25 | OBJ = mainSDA.o RBM.o ProgramArgs.o
26 | 
27 | test : $(OBJ)
28 | 	$(CPP) -o test $(OBJ) $(LINK_DL_FLAGS)
29 | 		
30 | 	
31 | ARMA_INCLUDE=-I~/Downloads/armadillo-5.100.2/include -DARMA_DONT_USE_WRAPPER
32 | ARMA_LINKFLAGS=-L/usr/lib -llapack -lopenblas -pthread	
33 | LINKFLAGS = -static $(LINKOPTFLAGS) $(ARMA_LINKFLAGS) -lgfortran -lquadmath
34 | test_static : $(OBJ)
35 | 	$(CPP) -o $@ $(OBJ) $(LINKFLAGS)
36 | 
37 | 
38 | clean:
39 | 	rm -f *.o *~
40 | 


--------------------------------------------------------------------------------
/RBM/ProgramArgs.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <iostream>
 4 | #include <string>
 5 | #include <fstream>
 6 | #include <algorithm>
 7 | 
 8 | using namespace std;
 9 | 
10 | struct ProgramArgs {
11 |     ProgramArgs(int argc, char *argv[]);
12 | 	 void LoadFromFile(const string & argsFilename);
13 | 	 void ParseArg(string argAndVal); 
14 | 	 int ntrain, ntest, saveFrequency, inputDim, hiddenDim,nEpoch;
15 |          double learningRate, eps, momentum, miniBatchSize, learningRateDecay, dropOutRate, L2Decay; 
16 |          string dataPath;
17 |          bool dropOutFlag;
18 | };
19 | 


--------------------------------------------------------------------------------
/RBM/RBM.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <vector>
 3 | #include <string>
 4 | #include <memory>
 5 | #include <armadillo>
 6 | #include "../Utils/Util.h"
 7 | 
 8 | namespace NeuralNet{
 9 | 
10 | class RBM {
11 | public:
12 |     struct PreTrainPara {
13 | 
14 |         PreTrainPara(double eps0=1e-6, int NEpoch0 = 500,
15 |                      int miniBatchSize0 = 10, double alpha0 = 0.01,
16 |                      double momentum0 = 0.9, int saveFreq0 = 50, 
17 |                      double learningRateDecay0 = 1.0, bool dropOutFlag0 = false,
18 |                      double dropOutRate0 = 0.3, double L2Decay0 = 0.0002):
19 |             eps(eps0),NEpoch(NEpoch0), miniBatchSize(miniBatchSize0), 
20 |             alpha(alpha0), momentum(momentum0), saveFrequency(saveFreq0),
21 |             learningRateDecay(learningRateDecay0), dropOutFlag(dropOutFlag0),
22 |             dropOutRate(dropOutRate0), L2Decay(L2Decay0){}
23 |         double eps;
24 |         int NEpoch;
25 |         int miniBatchSize;
26 |         double alpha;
27 |         double momentum;
28 |         int saveFrequency;
29 |         double learningRateDecay;
30 |         bool dropOutFlag;
31 |         double dropOutRate; 
32 |         double L2Decay;
33 |         void print() const;
34 |     };
35 | 
36 | 
37 |     RBM(int visibleDim, int hiddenDim, RBM::PreTrainPara preTrainPara0);
38 |     RBM(int visibleDim, int hiddenDim, std::shared_ptr<arma::umat> trainingX0, RBM::PreTrainPara preTrainPara0);
39 |     void train();
40 |     void saveTrainResult(std::string filename);
41 |     void loadTrainResult(std::string filename);
42 |     void initializeWeight();
43 |     void propUp(std::shared_ptr<arma::umat>);
44 |     void reconstructVisible();
45 |     void reconstructHiddenProb();
46 |     double calReconstructError(std::shared_ptr<arma::umat> inputX);
47 |     double calEnergy(std::shared_ptr<arma::umat> inputX) const;
48 |     void TestViaReconstruct(std::shared_ptr<arma::mat> testDataX);
49 |     int inputDim;
50 |     int outputDim;
51 |     int numInstance;
52 |     Random_Bernoulli<unsigned long long> *randomGen;
53 |     std::shared_ptr<arma::mat> inputX, W , outputY, H_reconstructProb, grad_W, grad_W_old;
54 |     std::shared_ptr<arma::umat > H,V, V_reconstruct;
55 |     std::shared_ptr<arma::vec> A, B, grad_B, grad_B_old, grad_A, grad_A_old;
56 |     RBM::PreTrainPara trainingPara;
57 | 
58 | };
59 | 
60 | }


--------------------------------------------------------------------------------
/RBM/mainSDA.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <fstream>
  3 | #include <string>
  4 | #include <cstdlib>
  5 | #include <cstdio>
  6 | #include <memory>
  7 | #include <armadillo>
  8 | #include "RBM.h"
  9 | #include "ProgramArgs.h"
 10 | 
 11 | using namespace NeuralNet;
 12 | 
 13 | void loadData_MNIST(std::shared_ptr<arma::mat> X,
 14 |                     std::shared_ptr<arma::mat> Y, const std::string);
 15 | 
 16 | int main(int argc, char *argv[]) {
 17 |     std::shared_ptr<arma::mat> DataX(new arma::mat);
 18 |     std::shared_ptr<arma::mat> DataY(new arma::mat);
 19 |     std::shared_ptr<arma::mat> trainDataX(new arma::mat);
 20 |     std::shared_ptr<arma::mat> trainDataY(new arma::mat);
 21 |     std::shared_ptr<arma::mat> testDataX(new arma::mat);
 22 |     std::shared_ptr<arma::mat> testDataY(new arma::mat);
 23 |     std::shared_ptr<arma::mat> ValidationDataX(new arma::mat);
 24 |     std::shared_ptr<arma::mat> ValidationDataY(new arma::mat);
 25 |     
 26 |     ProgramArgs progArgs(argc, argv);
 27 |     
 28 |     loadData_MNIST(DataX,DataY, progArgs.dataPath);
 29 | 
 30 |     int ntrain = progArgs.ntrain;
 31 |     int ntest = progArgs.ntest;  
 32 |     int hiddenDim = progArgs.hiddenDim;
 33 |     int inputDim = progArgs.inputDim;
 34 |     
 35 |     RBM::PreTrainPara trainingPara(progArgs.eps, progArgs.nEpoch, progArgs.miniBatchSize,
 36 |             progArgs.learningRate, progArgs.momentum, progArgs.saveFrequency, progArgs.learningRateDecay, 
 37 |             progArgs.dropOutFlag, progArgs.dropOutRate);    
 38 | //  now I split data into train, test, and validation
 39 |     trainDataX = std::make_shared<arma::mat>(DataX->cols(0,ntrain-1));
 40 |     trainDataY = std::make_shared<arma::mat>(DataY->cols(0,ntrain-1));
 41 |     testDataX = std::make_shared<arma::mat>(DataX->cols(ntrain,ntrain+ntest-1));
 42 |     testDataY = std::make_shared<arma::mat>(DataY->cols(ntrain,ntrain+ntest-1));
 43 | 
 44 |     DataX.reset();
 45 |     DataY.reset();
 46 | 
 47 | 
 48 | 
 49 |     std::cout << trainDataX->n_cols << std::endl;
 50 |  
 51 |     trainingPara.print();
 52 | 
 53 |     bool trainFlag = true;
 54 |     bool testFlag = true;
 55 |     
 56 |     std::string filename = "pretrain_final";
 57 |     std::shared_ptr<arma::umat> trainDataXBin(new arma::umat(trainDataX->n_rows,trainDataX->n_cols));
 58 |     *trainDataXBin = (*trainDataX) > 0.5;
 59 |     RBM rbm(inputDim, hiddenDim, trainDataXBin, trainingPara);
 60 | 
 61 |     if (trainFlag) {
 62 |         rbm.train();
 63 |         rbm.saveTrainResult(filename);
 64 |     }
 65 | 
 66 |     if (testFlag) {
 67 |         if (!trainFlag) rbm.loadTrainResult(filename);
 68 |         testDataX->save("testSample.dat",arma::raw_ascii);
 69 |         rbm.TestViaReconstruct(testDataX);
 70 |     }
 71 | }
 72 | 
 73 | 
 74 | 
 75 | void loadData_MNIST(std::shared_ptr<arma::mat> X,
 76 |                     std::shared_ptr<arma::mat> Y,const std::string filepath) {
 77 | 
 78 |     std::string filename_base(filepath);
 79 |     std::string filename;
 80 |     char tag[50];
 81 |     char x;
 82 |     int count;
 83 |     int numFiles = 10;
 84 |     int featSize = 28*28;
 85 |     int labelSize = 10;
 86 |     int numSamples = 1000;
 87 |    X->set_size(featSize,numFiles*numSamples);
 88 |     Y->set_size(labelSize, numFiles*numSamples);
 89 |     Y->fill(0);
 90 | 
 91 | 
 92 |     for (int i = 0 ; i < numFiles ; i++) {
 93 |         sprintf(tag,"%d",i);
 94 |         filename=filename_base+(std::string)tag;
 95 |         std::cout << filename << std::endl;
 96 |         std::ifstream infile;
 97 |         infile.open(filename,std::ios::binary | std::ios::in);
 98 |         if (infile.is_open()) {
 99 | 
100 |             for (int j = 0 ; j < numSamples ; j++) {
101 | 
102 |                 for (int k =0 ; k <featSize; k ++) {
103 |                     infile.read(&x,1);
104 | //        std::cout << x << std::endl;
105 |                     (*X)(k, i+numFiles*j)=((unsigned char)x)/256.0;
106 | 
107 |                 }
108 |                 (*Y)(i, i+numFiles*j) = 1;
109 | //        count++;
110 |             }
111 | 
112 |         } else {
113 |             std::cout << "open file failure!" << std::endl;
114 |         }
115 |         std::cout << "dataloading finish!" <<std::endl;
116 | 
117 |     }
118 | 
119 | }
120 | 
121 | 


--------------------------------------------------------------------------------
/RBM/run.txt:
--------------------------------------------------------------------------------
 1 | learningRate=0.01
 2 | ntrain=5000
 3 | ntest=100
 4 | miniBatchSize=10
 5 | momentum=0.5
 6 | inputDim=784
 7 | hiddenDim=256
 8 | eps=1e-6
 9 | saveFrequency=50
10 | dataPath=../MNIST/data
11 | nEpoch=20
12 | learningRateDecay=0.9
13 | dropOutFlag=1
14 | dropOutRate=0.5
15 | L2Decay=0.0002
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DeepLearning
 2 | This repo is a C++ implementation of various neurual networks(including feed forward, restricted Boltzmann machine, CNN, RNN, LSTM) for deep learning purpose. We use Armadillo C++ linear algebra library (http://arma.sourceforge.net/) to support our linear algebra operation in the CPU. We also have our GPU linear algebra library(wrapping cuBlas) to speed-up matrix/vector operations. Our implmentation supports seamless switch from CPU to GPU using a single Flag.   
 3 | 
 4 | ## Requirement & Dependency
 5 | #### Compiler: GNU g++ > 4.8
 6 | #### Armadillo linear algebra library [Link] (http://arma.sourceforge.net/)
 7 | #### Cuda toolkit [link] (https://developer.nvidia.com/cuda-toolkit)
 8 | #### Boost [link] (http://www.boost.org/ )
 9 | #### Gtest [link] (https://code.google.com/p/googletest/)
10 | #### Google protocol buffer [link] (https://developers.google.com/protocol-buffers/)
11 | 
12 | 


--------------------------------------------------------------------------------
/include/ActivationFunc.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | 
 4 | 
 5 | namespace NeuralNet{
 6 | 	enum ActivationType {softmax, sigmoid, linear, tanh, ReLU};
 7 | 	
 8 | 	inline ActivationType GetActivationType(DeepLearning::NeuralNetParameter_ActivationType type){
 9 | 		switch (type) {
10 | 			case DeepLearning::NeuralNetParameter_ActivationType_sigmoid:
11 | 				return sigmoid;
12 | 				break;
13 | 			case DeepLearning::NeuralNetParameter_ActivationType_linear:
14 | 				return linear;
15 | 				break;
16 | 			case DeepLearning::NeuralNetParameter_ActivationType_tanh:
17 | 				return tanh;
18 | 				break;						
19 | 			case DeepLearning::NeuralNetParameter_ActivationType_softmax:
20 | 				return softmax;
21 | 				break;		
22 | 			case DeepLearning::NeuralNetParameter_ActivationType_ReLU:
23 | 				return ReLU;
24 | 				break;		
25 | 			default:
26 | 				std::cerr << "invalid activation type" << std::endl;
27 | 				exit(1);
28 | 				break;
29 | 		}
30 | 	}
31 | 	
32 | 	inline void ApplyActivation(std::shared_ptr<arma::mat> output, ActivationType actType){
33 |     std::shared_ptr<arma::mat> &p=output;
34 |     arma::mat maxVal = arma::max(*p,0);
35 |     arma::mat sumVal;
36 |     switch(actType) {
37 |             case softmax:
38 |                 for (int i = 0; i < p->n_cols; i++) {
39 |                     p->col(i) -= maxVal(i);
40 |                 }
41 |                 (*p).transform([](double val) {
42 |                     return exp(val);
43 |                 });
44 | 
45 |                 sumVal = arma::sum(*p, 0);
46 |                 for (int i = 0; i < p->n_cols; i++) {
47 |                     p->col(i) /= sumVal(i);
48 |                 }
49 |                 break;
50 |             case sigmoid:
51 |                 (*p).transform([](double val) {
52 |                     return 1.0 / (1.0 + exp(-val));
53 |                 });
54 |                 break;
55 |             case linear:
56 |                 break;
57 |             case ReLU:
58 |                 p->transform([](double val) {
59 |                     return val > 0 ? val : 0;
60 |                 });
61 |                 break;
62 |             case tanh:
63 |                 p->transform([](double val){return std::tanh(val);});
64 |                 break;
65 |             default:
66 |                 std::cerr << "invalid activation type" << std::endl;
67 |                 break;
68 |     }
69 | }
70 | 	inline void GetActivationGradient(std::shared_ptr<arma::mat> in, std::shared_ptr<arma::mat> out, ActivationType actType){
71 | 	
72 |     if (actType == softmax) {
73 |         out->ones(in->n_rows,in->n_cols);   
74 |     } else if (actType == sigmoid ) {
75 |         *out = (1 - (*in)) % (*in);        
76 |     } else if ( actType == tanh) {
77 |         *out = (1 - (*in) % (*in));
78 |     } else if ( actType == linear) {
79 |         out->ones(in->n_rows,in->n_cols);
80 |     } else if(actType == ReLU){
81 |         *out = *in;
82 |         out->transform([](double val) {return val > 0 ? 1.0: 0 ;});
83 |     }	
84 | }
85 | 
86 | }
87 | 


--------------------------------------------------------------------------------
/include/ActivationLayer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | 
 4 | namespace NeuralNet {
 5 | 
 6 |     struct ActivationLayer: public Layer_unitaryOp {
 7 | 
 8 |         ActivationLayer(ActivationType actType0) {
 9 |         	actType = actType0;
10 |         };
11 |         ActivationType actType;
12 | 		virtual void activateUp();
13 |         virtual void calGrad(std::shared_ptr<arma::mat> delta_in);
14 |     };
15 | 
16 | }
17 | 


--------------------------------------------------------------------------------
/include/BaseLayer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | namespace NeuralNet{
 4 | 
 5 | struct BaseLayer: public Layer_unitaryOp {
 6 |     BaseLayer() {}
 7 |     BaseLayer(int inputDim0, int outputDim0, ActivationType actType0, std::shared_ptr<Initializer> init_w = nullptr, 
 8 | 	std::shared_ptr<Initializer> init_B = nullptr, bool dropout = false, double dropr=0.3);
 9 | /*  save weights of the layers
10 |  */
11 |     virtual void save(std::string filename = "BaseLayer");
12 |     virtual void load(std::string filename = "BaseLayer");
13 | /*  given the input matrix, perform 
14 |  outputY = sigma (W*input + B), sigma is the activation function
15 | */    
16 |     virtual void activateUp();
17 |     void activateUp(std::shared_ptr<arma::mat> input);
18 | /*
19 |  given the error propogated from upper layers, update the W and B using gradient descent
20 |  */    
21 |     void updatePara(std::shared_ptr<arma::mat> delta_in, double learningRate);
22 | /*
23 |  calculate the gradient and propogate the error but not update W and B
24 |  */    
25 |     virtual void calGrad(std::shared_ptr<arma::mat> delta_in);
26 |     virtual void calGrad(std::shared_ptr<arma::mat> delta_in, int t);	
27 |     void accumulateGrad(std::shared_ptr<arma::mat> delta_in);
28 |     virtual void accumulateGrad(std::shared_ptr<arma::mat> delta_in, int t);
29 |     void updatePara_accu(double learningRate);
30 |     
31 | /* randomly initialize weight and bias*/    
32 |     void initializeWeight();    
33 | 
34 |     int W_size, B_size, totalSize;
35 | /*  weight and bias for this layer*/
36 |     std::shared_ptr<arma::mat> W, B;
37 |     std::shared_ptr<arma::mat> grad_W, grad_W_accu, grad_B, grad_B_accu;
38 | /* the error propogated from lower layers*/    
39 |     bool dropOutFlag;
40 |     double dropOutRate;
41 |     std::shared_ptr<Initializer> initializer_W, initializer_B; 
42 | 	arma::mat dropOutMat;
43 |     ActivationType actType;
44 |     // extract out the specific input or output at time point t during backpropagation
45 |      // to calculate the gradient
46 |     std::shared_ptr<arma::mat> getInputMemory(int t);
47 |     std::shared_ptr<arma::mat> getOutputMemory(int t);    
48 |     void clearAccuGrad(); 
49 | 
50 |     Random_Bernoulli<double> *randomGen;
51 |     void vectoriseGrad(std::shared_ptr<arma::vec> V);
52 |     void deVectoriseWeight(std::shared_ptr<arma::vec> V);
53 |     void vectoriseWeight(std::shared_ptr<arma::vec> V);
54 |     void vectoriseGrad(double *ptr, size_t offset);
55 |     void deVectoriseWeight(double *ptr, size_t offset);
56 |     void vectoriseWeight(double *ptr, size_t offset);
57 |     void fill_Bernoulli(double *, int size);
58 | 
59 | 
60 | };
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/include/BaseModel.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <array>
 3 | namespace ReinforcementLearning {
 4 | 
 5 |     typedef std::vector<double> State;
 6 | 
 7 |     class BaseModel {
 8 |     public:
 9 |         virtual ~BaseModel(){}
10 |         virtual void run(int action) = 0;
11 | 	virtual void run(int action, int steps){
12 | 		for (int i = 0; i < steps; i++){
13 | 			run(action);
14 | 		}
15 | 	};
16 |         virtual State getCurrState() {
17 |             return currState;
18 |         }
19 |         virtual void createInitialState() = 0;
20 |         virtual int getNumActions(){ return numActions;}
21 |         virtual double getRewards() {}
22 |         virtual bool terminate() {}
23 |     protected:
24 |         State currState, prevState;
25 |         int numActions;
26 |         int stateDim;
27 |     };
28 |     
29 |     struct Experience{
30 |         State oldState, newState;
31 |         int action;
32 |         double reward;
33 |         Experience(State old0, State new0, int a0, double c0):
34 |         oldState(old0),newState(new0), action(a0), reward(c0)
35 |         {}
36 |     };
37 |     
38 | }
39 | 


--------------------------------------------------------------------------------
/include/ElementMultiAddLayer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | namespace NeuralNet {
 4 | 
 5 |     struct ElementMultiAddLayer : public Layer_binaryOp {
 6 |         ElementMultiAddLayer();
 7 |         virtual ~ElementMultiAddLayer(){}
 8 |         virtual void activateUp();
 9 |         virtual void calGrad(std::shared_ptr<arma::mat> delta_in);
10 | 		virtual void calGrad(std::shared_ptr<arma::mat> delta_in, int t);
11 | 		void saveWeightMem();
12 |         std::shared_ptr<arma::mat> W_one, W_two;
13 |         std::shared_ptr<arma::mat> grad_W_one, grad_W_two;
14 | 		std::vector<std::shared_ptr<arma::mat>> W_one_mem, W_two_mem;
15 | 
16 |     };
17 | }
18 | 


--------------------------------------------------------------------------------
/include/ElementwiseLayer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | 
 4 | namespace NeuralNet {
 5 | 
 6 |     struct ElementwiseLayer: public Layer_binaryOp {
 7 | 
 8 |         ElementwiseLayer() {
 9 |             //we only need to assign memory to the output
10 |             delta_outOne = std::make_shared<arma::mat>();
11 |             delta_outTwo = std::make_shared<arma::mat>();
12 |             output = std::make_shared<arma::mat>();
13 |         };
14 |         virtual void activateUp();
15 |         virtual void calGrad(std::shared_ptr<arma::mat> delta_in, int timePoint);
16 | 		virtual void calGrad(std::shared_ptr<arma::mat> delta_in);
17 |     };
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/include/ElmanRL.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <memory>
 3 | #include <armadillo>
 4 | #include <iostream>
 5 | #include <vector>
 6 | #include "RNN.h"
 7 | #include "common.h"
 8 | namespace NeuralNet {
 9 | 
10 |     class ElmanRL: public RNN {
11 |         
12 |     public:
13 |         ElmanRL(DeepLearning::NeuralNetParameter);
14 | 		virtual ~ElmanRL(){}
15 |         
16 |         // implementing methods required by Net interface
17 |         virtual arma::mat forwardInTime(std::shared_ptr<arma::mat> x);        
18 |         void backward();
19 |     };
20 | }
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/include/Globals.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <emmintrin.h>
 4 | #include <xmmintrin.h>
 5 | #include <boost/math/special_functions/fpclassify.hpp>
 6 | #include <limits>
 7 | #include <math.h>
 8 | #include <iostream>
 9 | #include <string>
10 | #include <fstream>
11 | 
12 | using namespace std;
13 | 
14 | template<class T>
15 | void Swap(T a, T b) {
16 | 	T t = a;
17 | 	a = b;
18 | 	b = t;
19 | }
20 | 
21 | #define ASSERT(TST) ( (TST) ? (void)0 : (std::cerr << __FILE__ "(" << __LINE__	<< "): Assertion failed " #TST << std::endl,abort()) )
22 | 
23 | static const double INFTY = std::numeric_limits<double>::infinity();
24 | 
25 | static const double NaN = std::numeric_limits<double>::quiet_NaN();
26 | 
27 | static const double TOL = pow(std::numeric_limits<double>::epsilon(), (double)1.0 / 3);
28 | 
29 | static bool IsClose(double a, double b) {
30 | 	return abs(a - b) < TOL;
31 | }
32 | 
33 | static bool IsNaN(double x) { return boost::math::isnan(x); }
34 | 
35 | static bool IsInf(double x) { return boost::math::isinf(x); }
36 | 
37 | static bool IsDangerous(double x) { return IsNaN(x) || IsInf(x); }
38 | 
39 | static double LogSum(double x, double y) {
40 | 	double d = x - y;
41 | 	if (d < -30) return y;
42 | 	else if (d > 30) return x;
43 | 	else if (d > 0) return x + log(1.0 + exp(-d));
44 | 	else return y + log(1.0 + exp(d));
45 | }
46 | 
47 | static double Logistic(double x) {
48 | 	if (x < -30) return 0;
49 | 	else if (x > 30) return 1;
50 | 	else return 1.0 / (1.0 + exp(-x));
51 | }
52 | 
53 | static double LogLoss(double x) {
54 | 	if (x < -30) return -x;
55 | 	else if (x > 30) return 0;
56 | 	else return log(1 + exp(-x));
57 | }
58 | 
59 | template<class C>
60 | void Serialize(const C & c, const string & filename) {
61 | 	ofstream outStream(filename, ios::out|ios::binary);
62 | 	if (!outStream.is_open()) {
63 | 		cout << "Couldn't open serialized file " << filename.c_str() << endl;
64 | 		exit(1);
65 | 	}
66 | 
67 | 	c.Serialize(outStream);
68 | 
69 | 	outStream.close();
70 | }
71 | 
72 | template<class C>
73 | void Deserialize(C & c, const string & filename) {
74 | 	ifstream inStream(filename, ios::in|ios::binary);
75 | 	if (!inStream.is_open()) {
76 | 		cout << "Couldn't open serialized file " << filename.c_str() << endl;
77 | 		exit(1);
78 | 	}
79 | 
80 | 	c.Deserialize(inStream);
81 | 
82 | 	inStream.close();
83 | }
84 | 


--------------------------------------------------------------------------------
/include/Initializer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <memory>
 3 | #include <armadillo>
 4 | 
 5 | namespace NeuralNet{
 6 | class Initializer{
 7 | public:
 8 | 	virtual ~Initializer() {}
 9 | 	virtual void applyInitialization(std::shared_ptr<arma::mat>) = 0;
10 | };
11 | 
12 | class Initializer_normal: public Initializer{
13 | public:
14 | 	Initializer_normal(double std0, double mean0):std(std0), mean(mean0){}
15 | 	virtual ~Initializer_normal(){}
16 | 	virtual void applyInitialization(std::shared_ptr<arma::mat> m){
17 | 		m->randn();
18 | 		m->transform([&](double val){ return val*std + mean;});
19 | 	}
20 | private:
21 | 	double std, mean;
22 | 
23 | };
24 | 
25 | class Initializer_zero: public Initializer{
26 | public:
27 | 	Initializer_zero(){}
28 | 	virtual ~Initializer_zero(){}
29 | 	virtual void applyInitialization(std::shared_ptr<arma::mat> m){
30 | 		m->zeros();
31 | 	}
32 | };
33 | 
34 | class Initializer_identity: public Initializer{
35 | public:
36 | 	Initializer_identity(){}
37 | 	virtual ~Initializer_identity(){}
38 | 	virtual void applyInitialization(std::shared_ptr<arma::mat> m){
39 | 		m->eye();
40 | 	}
41 | };
42 | class Initializer_glorot_uniform: public Initializer{
43 | public:
44 | 	Initializer_glorot_uniform(){}
45 | 	virtual ~Initializer_glorot_uniform(){}
46 | 	virtual void applyInitialization(std::shared_ptr<arma::mat> W){
47 | 		int inputDim = W->n_cols;
48 | 		int outputDim = W->n_rows;
49 | 		W->randu();
50 |     	(*W) -= 0.5;
51 | 		(*W) *=sqrt(6.0/(inputDim+outputDim));
52 | 	}
53 | };
54 | 
55 | class InitializerBuilder{
56 | public:
57 | 	inline static std::shared_ptr<Initializer> GetInitializer(const DeepLearning::NeuralNetInitializerParameter para){
58 | 		switch (para.initializertype()) {
59 | 			case DeepLearning::NeuralNetInitializerParameter_InitializerType_normal:
60 | 			return std::shared_ptr<Initializer>(new Initializer_normal(para.normal_std(), para.normal_mean()));
61 | 			break;
62 | 			case DeepLearning::NeuralNetInitializerParameter_InitializerType_zero:
63 | 			return std::shared_ptr<Initializer>(new Initializer_zero);
64 | 			break;
65 | 			case DeepLearning::NeuralNetInitializerParameter_InitializerType_identity:
66 | 			return std::shared_ptr<Initializer>(new Initializer_identity);
67 | 			break;
68 | 			case DeepLearning::NeuralNetInitializerParameter_InitializerType_glorot_uniform:
69 | 			return std::shared_ptr<Initializer>(new Initializer_glorot_uniform);
70 | 			break;
71 | 			default:
72 | 			break;
73 | 		}
74 | 	}
75 | };
76 | 
77 | 
78 | }
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/include/Layer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | 
 4 | namespace NeuralNet{
 5 | 
 6 | struct Layer{
 7 | public:
 8 | 	virtual ~Layer(){}
 9 | 	Layer(){}
10 | 	Layer(int outputDim0):outputDim(outputDim0){}
11 | 	virtual void activateUp() = 0;
12 | 	virtual void calGrad(std::shared_ptr<arma::mat> delta_in) = 0;
13 | 	virtual void calGrad(std::shared_ptr<arma::mat> delta_in, int t) {}
14 | 	virtual void save(std::string filename){}
15 |     virtual void load(std::string filename){}
16 |     virtual void initializeWeight(){}
17 | 	virtual std::shared_ptr<arma::mat> getOutput(){ return output;}
18 | 	int outputDim;
19 | 	std::shared_ptr<arma::mat> output;
20 | 	std::vector<std::shared_ptr<arma::mat>> outputMem;
21 | 	virtual void saveOutputMemory();
22 | };
23 | 
24 | inline void Layer::saveOutputMemory(){
25 |     outputMem.push_back(std::shared_ptr<arma::mat>(new arma::mat(*output)));
26 | }
27 | }
28 | 


--------------------------------------------------------------------------------
/include/Layer_binaryOp.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | 
 4 | namespace NeuralNet{
 5 | 
 6 | struct Layer_binaryOp : public Layer{
 7 | public:
 8 | 	virtual ~Layer_binaryOp(){}
 9 | 	Layer_binaryOp(){}
10 | 	Layer_binaryOp(int inputOneDim0, int inputTwoDim0, int outputDim0):Layer(outputDim0), inputOneDim(inputOneDim0), inputTwoDim(inputTwoDim0){}
11 | 	// save inputs at all time points during the LSTM forward pass
12 |     virtual void saveInputMemory();
13 | 	virtual void setInputOne(std::shared_ptr<arma::mat> input0){ inputOne = input0;}
14 | 	virtual void setInputTwo(std::shared_ptr<arma::mat> input0){ inputTwo = input0;}
15 | 	virtual std::shared_ptr<arma::mat> getDelta_outOne() {return delta_outOne;} 
16 | 	virtual std::shared_ptr<arma::mat> getDelta_outTwo() {return delta_outTwo;} 
17 | 	
18 | 	std::shared_ptr<arma::mat> inputOne, inputTwo;
19 | 	std::shared_ptr<arma::mat> delta_outOne, delta_outTwo;
20 | 	int inputOneDim, inputTwoDim;
21 | 	std::vector<std::shared_ptr<arma::mat>> inputOneMem, inputTwoMem;
22 | 
23 | };
24 | 
25 | inline void Layer_binaryOp::saveInputMemory(){
26 |     inputOneMem.push_back(inputOne);
27 |     inputTwoMem.push_back(inputTwo);
28 | }
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/include/Layer_unitaryOp.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | 
 4 | namespace NeuralNet{
 5 | 
 6 | struct Layer_unitaryOp : public Layer{
 7 | public:
 8 | 	virtual ~Layer_unitaryOp(){}
 9 | 	Layer_unitaryOp(){}
10 | 	Layer_unitaryOp(int inputDim0, int outputDim0):Layer(outputDim0), inputDim(inputDim0){}
11 | 	    // save inputs at all time points during the LSTM forward pass
12 |     virtual void saveInputMemory();
13 | 	virtual void setInput(std::shared_ptr<arma::mat> input0){ input = input0;}
14 | 	virtual std::shared_ptr<arma::mat> getDelta_out() {return delta_out;} 
15 | 	std::shared_ptr<arma::mat> input;
16 | 	std::shared_ptr<arma::mat> delta_out;
17 | 	int inputDim;
18 | 	std::vector<std::shared_ptr<arma::mat>> inputMem;
19 | 
20 | };
21 | 
22 | inline void Layer_unitaryOp::saveInputMemory(){
23 |     inputMem.push_back(input);
24 | }
25 | }
26 | 


--------------------------------------------------------------------------------
/include/LinearAdditionLayer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | 
 4 | namespace NeuralNet {
 5 | 
 6 |     struct LinearAdditionLayer: public Layer_binaryOp {
 7 | 
 8 |         LinearAdditionLayer() {
 9 |             output = std::make_shared<arma::mat>();
10 |         }
11 |         virtual void activateUp();
12 |         virtual void calGrad(std::shared_ptr<arma::mat> delta_in);
13 | 		
14 | 
15 |     };
16 | 
17 | }
18 | 


--------------------------------------------------------------------------------
/include/MultiAddLayer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | namespace NeuralNet {
 4 | 
 5 |     struct MultiAddLayer : public Layer_binaryOp {
 6 |         MultiAddLayer(){}
 7 |         MultiAddLayer(int in1, int in2, int out, ActivationType actType0,
 8 |         std::shared_ptr<Initializer> init_W_one, std::shared_ptr<Initializer> init_W_two,
 9 |         std::shared_ptr<Initializer> init_B);
10 |         virtual ~MultiAddLayer(){}
11 |         virtual void activateUp();
12 |         virtual void save(std::string filename = "MultiAddLayer");
13 |         virtual void load(std::string filename = "MultiAddLayer");
14 |         virtual void calGrad(std::shared_ptr<arma::mat> delta_in);
15 | 		virtual void calGrad(std::shared_ptr<arma::mat> delta_in, int t);
16 |         virtual void initializeWeight();
17 |         void accumulateGrad(std::shared_ptr<arma::mat> delta_in, int t);
18 |         void clearAccuGrad();
19 |         ActivationType actType;
20 |         std::shared_ptr<arma::mat> W_one, W_two, B;
21 |         std::shared_ptr<arma::mat> grad_W_one, grad_W_two, grad_B;
22 |         std::shared_ptr<arma::mat> grad_W_one_accu, grad_W_two_accu, grad_B_accu;
23 |         std::shared_ptr<Initializer> initializer_W_one, initializer_W_two, initializer_B; 
24 | 
25 | 
26 |     };
27 | }
28 | 


--------------------------------------------------------------------------------
/include/MultiLayerPerceptron.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <armadillo>
 3 | #include "common.h"
 4 | #include "BaseLayer.h"
 5 | #include "optimization.h"
 6 | #include "DeepLearning.pb.h"
 7 | #include "Net.h"
 8 | namespace NeuralNet {
 9 | 
10 |     class MultiLayerPerceptron : public Net {
11 |     public:
12 |         MultiLayerPerceptron(DeepLearning::NeuralNetParameter);
13 |         virtual ~MultiLayerPerceptron() {
14 |         }
15 |         void train();
16 |         void initialize();
17 |         /* forward pass*/
18 |         void feedForward(std::shared_ptr<arma::mat>);
19 |         /* back propogate the error to update the parameters*/
20 |         void backProp(std::shared_ptr<arma::mat>, double learningRate);
21 |         void backProp(std::shared_ptr<arma::mat>);
22 |         void test(std::shared_ptr<arma::mat> trainingX, std::shared_ptr<arma::mat> trainingY);
23 |         /* calculate the numerical gradient for testing*/
24 |         void calNumericGrad(std::shared_ptr<arma::mat> trainingX, std::shared_ptr<arma::mat> trainingY);
25 |         void vectoriseGrad(arma::vec &grad);
26 |         void deVectoriseWeight(arma::vec &x);
27 |         void vectoriseWeight(arma::vec &x);
28 |         void calLoss(std::shared_ptr<arma::mat> delta);
29 |         virtual void forward();
30 |        	virtual void applyUpdates(std::vector<std::shared_ptr<arma::mat>>);
31 |         virtual void calGradient();
32 |         virtual double getLoss();
33 |         virtual void save(std::string filename);
34 |         virtual void load(std::string filename);
35 |         virtual std::shared_ptr<arma::mat> netOutput() {
36 |             return netOutput_;
37 |         }
38 |     private:
39 |         int numLayers;
40 |         int numInstance;
41 |         bool testGrad;
42 |         double error;
43 |         /**the collection of Base layers*/
44 |         std::vector<BaseLayer> layers;
45 |         /* dimension parameters for each layer*/
46 |         std::vector<int> dimensions;
47 |         /* network output*/
48 |         std::shared_ptr<arma::mat> netOutput_;
49 |         int totalDim;
50 | 
51 |     };
52 | 
53 |     class MLPTrainer : public Optimization::ObjectFunc {
54 |     public:
55 |         MLPTrainer(MultiLayerPerceptron &MLP);
56 | 
57 |         ~MLPTrainer() {
58 |         }
59 |         virtual double operator()(arma::vec &x, arma::vec &grad);
60 |         //    std::shared_ptr<arma::vec> x_init;
61 |     private:
62 |         MultiLayerPerceptron &MLP;
63 |     };
64 | }
65 | 


--------------------------------------------------------------------------------
/include/Net.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | 
 4 | namespace NeuralNet{
 5 | class Net {
 6 | public:
 7 |     virtual ~Net() { }
 8 |     virtual void applyUpdates(std::vector<std::shared_ptr<arma::mat>>) = 0;
 9 |     virtual std::vector<std::shared_ptr<arma::mat>> netGradients() { return netGradVector;}
10 |     virtual void setTrainingSamples(std::shared_ptr<arma::mat> X, std::shared_ptr<arma::mat> Y){ trainingX = X; trainingY = Y;}
11 | 	virtual void calGradient() = 0;
12 |     virtual double getLoss() = 0;
13 |     virtual void save(std::string filename) = 0;
14 |     virtual void load(std::string filename) = 0;
15 |     virtual void forward() = 0;
16 |     virtual std::shared_ptr<arma::mat> netOutput() = 0;
17 |     // the following are RNN specific
18 |     virtual void resetNetState(){}
19 |     virtual arma::mat forwardInTime(std::shared_ptr<arma::mat> x){}
20 |     //virtual std::shared_ptr<arma::mat> netOutputAtTime(int time){return 0;}
21 |     virtual void zeroTime(){}
22 |     virtual void updateInternalState(){}
23 |     virtual void resetWeight(){}
24 | protected:
25 | 	DeepLearning::NeuralNetParameter neuralNetPara;
26 | 	std::vector<std::shared_ptr<arma::mat>> netGradVector;
27 | 	std::shared_ptr<arma::mat> trainingX, trainingY;
28 | };
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/include/RNN.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <memory>
 3 | #include <armadillo>
 4 | #include <iostream>
 5 | #include <vector>
 6 | #include "BaseLayer.h"
 7 | #include "RecurrLayer.h"
 8 | #include "common.h"
 9 | namespace NeuralNet {
10 | 
11 |     class RNN: public Net {
12 |         
13 |     public:
14 |         RNN(DeepLearning::NeuralNetParameter);
15 | 		virtual ~RNN(){}
16 |         
17 |         // implementing methods required by Net interface
18 |         virtual void forward();
19 |         virtual void applyUpdates(std::vector<std::shared_ptr<arma::mat>>);
20 |         virtual void calGradient();
21 |         virtual double getLoss();
22 |         virtual void save(std::string filename);
23 |         virtual void load(std::string filename);
24 |         virtual std::shared_ptr<arma::mat> netOutput();
25 | //        virtual std::shared_ptr<arma::mat> netOutputAtTime(int time);
26 |         virtual arma::mat forwardInTime(std::shared_ptr<arma::mat> x);
27 |         virtual void resetNetState();
28 |         virtual void updateInternalState();
29 |         virtual void resetWeight();
30 |         virtual void zeroTime();
31 |         
32 |         
33 |         virtual void backward();
34 |         virtual void calNumericGrad();        
35 |         virtual void saveLayerInputOutput();
36 |         virtual int getTime();
37 |         virtual void setTime(int t);
38 |         virtual BaseLayer getOutputLayer(){return baseLayers[numBaseLayers - 1];}
39 |         virtual std::vector<RecurrLayer> getRecurrLayers(){ return recurrLayers;}
40 |     protected:
41 |         void fillNetGradVector();
42 |         std::shared_ptr<arma::mat> netOutput_;
43 |         std::vector<RecurrLayer> recurrLayers;
44 |         std::vector<BaseLayer> baseLayers;
45 |         int numRecurrLayers, recurrLayerInputDim, recurrLayerOutputDim, numBaseLayers;
46 |         int rnnInputDim, rnnOutputDim;
47 |         int time;
48 |     };
49 | }
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/include/RecurrLayer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | #include "MultiAddLayer.h"
 4 | namespace NeuralNet {
 5 | 
 6 |     struct RecurrLayer : public MultiAddLayer {
 7 |         RecurrLayer(){}
 8 |         RecurrLayer(int in1, int in2, int out, ActivationType actType0,
 9 |         std::shared_ptr<Initializer> init_W_one, std::shared_ptr<Initializer> init_W_two,
10 |         std::shared_ptr<Initializer> init_B);
11 |         virtual ~RecurrLayer(){}
12 |         void savePrevOutput();
13 |         void savePrevDeltaOutOne();
14 |         std::shared_ptr<arma::mat> getPrevOutput(){ return output_prev;}
15 |         std::shared_ptr<arma::mat> getPrevDeltaOutOne() { return delta_outOne_prev;}
16 |         std::shared_ptr<arma::mat> output_prev;
17 |         std::shared_ptr<arma::mat> delta_outOne_prev;
18 |     };
19 | }
20 | 


--------------------------------------------------------------------------------
/include/Util.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <random>
 3 | #include <string>
 4 | #include <memory>
 5 | #include <armadillo>
 6 | namespace NeuralNet{
 7 | template<typename T>	
 8 | struct Random_Bernoulli{
 9 | //		std::random_device rd;
10 |     std::mt19937 gen;
11 |     std::bernoulli_distribution *d;
12 | 	    
13 |     Random_Bernoulli(double p){
14 |         d = new std::bernoulli_distribution(p);
15 |     }
16 | 		
17 |     double next(){
18 | 	if((*d)(gen)) return 1.0;
19 | 	else return 0.0;			
20 |     }
21 | 
22 |     void modifier(T *p, int size){
23 |         for (int i = 0; i < size; i++){
24 |             // perform "drop"
25 |             if((*d)(gen)) 
26 |                 *(p+i) = (T)(0);                   
27 |         }
28 |     }
29 | }; 
30 | 
31 | class RandomStream{
32 | private:
33 |     std::shared_ptr<std::mt19937> genPtr;
34 |     std::shared_ptr<std::uniform_real_distribution<>> randomPtr_unitformReal;
35 |     std::shared_ptr<std::uniform_int_distribution<>> randomPtr_unitformInt; 
36 | public:     
37 |     RandomStream(){
38 |         
39 |         std::random_device rd;
40 |         genPtr = std::make_shared<std::mt19937>(rd());
41 |         randomPtr_unitformReal = std::make_shared<std::uniform_real_distribution<>>(0.0, 1.0);
42 |     }
43 |     RandomStream(int low , int high){
44 |         
45 |         std::random_device rd;
46 |         genPtr = std::make_shared<std::mt19937>(rd());
47 |         
48 |         randomPtr_unitformReal = std::make_shared<std::uniform_real_distribution<>>(0.0, 1.0);
49 |         randomPtr_unitformInt = std::make_shared<std::uniform_int_distribution<>>(low, high);
50 |     }
51 |     double nextDou(){return (*randomPtr_unitformReal)(*genPtr);}
52 |     int nextInt(){return (*randomPtr_unitformInt)(*genPtr);}
53 | };	
54 | 	
55 | void loadData_MNIST(std::shared_ptr<arma::mat> X, std::shared_ptr<arma::mat> Y, std::string filename);
56 | }
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/include/common.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <memory>
 3 | #include <iostream>
 4 | #include <vector>
 5 | #include <fstream>
 6 | #include <string>
 7 | #include <cmath>
 8 | #include <limits>
 9 | #include <sstream>
10 | #include "io.h"
11 | #include "DeepLearning.pb.h"
12 | #include "Util.h"
13 | #include "Initializer.h"
14 | #include "ActivationFunc.h"
15 | #include "Layer.h"
16 | #include "Layer_unitaryOp.h"
17 | #include "Layer_binaryOp.h"
18 | #include "Net.h"
19 | #include <armadillo>
20 | 


--------------------------------------------------------------------------------
/include/io.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <fcntl.h>
 3 | #include <fstream>
 4 | #include <string>
 5 | #include <stdint.h>
 6 | #include <iostream>
 7 | 
 8 | #include <google/protobuf/io/coded_stream.h>
 9 | #include <google/protobuf/io/zero_copy_stream_impl.h>
10 | #include <google/protobuf/text_format.h>
11 | #include <google/protobuf/message.h>
12 | 
13 | using google::protobuf::io::FileInputStream;
14 | using google::protobuf::io::FileOutputStream;
15 | using google::protobuf::io::ZeroCopyInputStream;
16 | using google::protobuf::io::CodedInputStream;
17 | using google::protobuf::io::ZeroCopyOutputStream;
18 | using google::protobuf::io::CodedOutputStream;
19 | using google::protobuf::Message;
20 | namespace DeepLearning{
21 | inline bool ReadProtoFromTextFile(const char* filename, Message* proto) {
22 |   int fd = open(filename, O_RDONLY);
23 | //  CHECK_NE(fd, -1) << "File not found: " << filename;
24 |   FileInputStream* input = new FileInputStream(fd);
25 |   bool success = google::protobuf::TextFormat::Parse(input, proto);
26 |   delete input;
27 | //  close(fd);
28 |   return success;
29 | }
30 | }
31 | 


--------------------------------------------------------------------------------
/include/optimization.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <memory>
 4 | #include <vector>
 5 | #include <armadillo>
 6 | #include <deque>
 7 | #include "Globals.h"
 8 | 
 9 | namespace Optimization{
10 | 
11 | struct ObjectFunc{
12 |     ObjectFunc(int dim0 = 0):dim(dim0){}
13 |     ~ObjectFunc(){}
14 |     int dim;
15 |     std::shared_ptr<arma::vec> x_init;
16 |     virtual double operator()(arma::vec &x, arma::vec &grad) = 0;
17 | };
18 | 
19 | class LBFGS{
20 | //	typedef double (* evaluateFunc)(const arma::vec x, arma::vec grad, const int n);
21 | public:
22 |     enum LineSearch {Wolfe, Armijo, MoreThuente};
23 | 	struct LBFGS_param{ 
24 |             int maxIter; 
25 |             int memoryLimit;
26 |             int maxLineSearch;
27 |             double maxStepSize;
28 |             double minStepSize;
29 |             int saveFrequency;
30 |             std::string saveFileName;
31 |             LBFGS_param(int, int, int, std::string);};
32 | 	struct PointValueDeriv {
33 |             double step, value, deriv;
34 |             PointValueDeriv(double step0 = NaN, double value0 = NaN, double deriv0 = NaN) : 
35 | 			    step(step0), value(value0), deriv(deriv0) { }
36 | 	};
37 | 	LBFGS(ObjectFunc &func, LBFGS_param param0, LineSearch method);
38 | 	void calDirection();
39 | 	void updateParam();
40 | 	void calStepLength_Armijo();
41 |         void calStepLength_Wolfe();
42 |         void calStepLength_MoreThuente();
43 | 	bool converge();
44 | 	void minimize();
45 |         void saveWeight(std::string str);
46 |         double cubicInterp(const LBFGS::PointValueDeriv& p0, const LBFGS::PointValueDeriv& p1);
47 |         ObjectFunc &calValGrad;
48 |         LBFGS_param param;
49 |         double maxIter;
50 |         double step;
51 |         double currValue;
52 |         int memoryLimit;
53 |         LineSearch lineSearchMethod;
54 | // 	s_{k-1} = x_k - x_{k-1}
55 | //  y_{k-1} = (grad_k - grad_{k-1})
56 | 	std::deque<arma::vec> s_list, y_list;
57 | // rho_k =1.0 /(y_k^T * s_k)	
58 | 	std::deque<double> rho_list;
59 | 	std::vector<double> alpha_list;	
60 |         arma::vec direction;
61 | 	arma::vec grad, x, x_init, x_new, grad_new;
62 | };
63 | 
64 | class SteepDescent{
65 | public:
66 |     struct SteepDescent_param{
67 |         SteepDescent_param(double eps0, double step0, int maxIter0):
68 |                             eps(eps0), step(step0), maxIter(maxIter0){}
69 |         double eps;
70 |         double step;
71 |         int maxIter;};
72 |     SteepDescent(ObjectFunc &func, SteepDescent_param param0);
73 |     void minimize();
74 | private:
75 |  //   bool converged();
76 |     double eps;
77 |     double step;
78 |     int maxIter;
79 |     arma::vec grad, grad_new, x, x_new;
80 |     double currValue;
81 |     SteepDescent_param param;
82 |     ObjectFunc &calValGrad;
83 | 
84 | };
85 | 
86 | 
87 | 
88 | 
89 | }


--------------------------------------------------------------------------------
/plotting/Driver.py:
--------------------------------------------------------------------------------
 1 | from Util import tile_raster_images
 2 | import numpy as np
 3 | try:
 4 | 	import PIL.Image as Image
 5 | except ImportError:
 6 | 	import Image
 7 | 
 8 | 
 9 | data = np.genfromtxt("../RBM/reconstruct.dat")
10 | 
11 | 
12 | image = Image.fromarray(tile_raster_images(X=data, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1)))
13 | image.show()
14 | image.save('reconstruct.png')
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/src/ActivationLayer.cpp:
--------------------------------------------------------------------------------
 1 | #include "ActivationLayer.h"
 2 | 
 3 | namespace NeuralNet{
 4 | 
 5 | 
 6 | void ActivationLayer::activateUp(){
 7 | 	output = input;
 8 |     ApplyActivation(input, actType); 
 9 | };
10 | 
11 | void ActivationLayer::calGrad(std::shared_ptr<arma::mat> delta_in){
12 | 	delta_out = delta_in;
13 | 	GetActivationGradient(delta_in, delta_out, actType);
14 | }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/src/ElementMultiAddLayer.cpp:
--------------------------------------------------------------------------------
 1 | #include "ElementMultiAddLayer.h"
 2 | 
 3 | using namespace NeuralNet;
 4 | 
 5 | ElementMultiAddLayer::ElementMultiAddLayer(){   
 6 |     grad_W_one = std::make_shared<arma::mat>();
 7 |     grad_W_two = std::make_shared<arma::mat>();
 8 |     delta_outOne = std::make_shared<arma::mat>();
 9 |     delta_outTwo = std::make_shared<arma::mat>();
10 |     output = std::make_shared<arma::mat>();
11 | }
12 | 
13 | void ElementMultiAddLayer::activateUp(){
14 |     *output = (*W_one) % (*inputOne) + (*W_two) % (*inputTwo);
15 | }
16 | 
17 | 
18 | void ElementMultiAddLayer::calGrad(std::shared_ptr<arma::mat> delta_in){
19 | 
20 |     *grad_W_one = (*inputOne);
21 |     *grad_W_two = (*inputTwo);
22 | 
23 |     (*delta_outOne) = *W_one;
24 |     (*delta_outTwo) = *W_two;
25 | }
26 | 
27 | void ElementMultiAddLayer::calGrad(std::shared_ptr<arma::mat> delta_in, int t){
28 | 
29 |     grad_W_one = inputOneMem[t];
30 |     grad_W_two = inputTwoMem[t];
31 | 
32 |     delta_outOne = W_one_mem[t];
33 |     delta_outTwo = W_two_mem[t];
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/src/ElementwiseLayer.cpp:
--------------------------------------------------------------------------------
 1 | #include "ElementwiseLayer.h"
 2 | 
 3 | namespace NeuralNet{
 4 | 
 5 | 
 6 | void ElementwiseLayer::activateUp(){
 7 |      // elementwise product
 8 |      (*output) = (*inputOne) % (*inputTwo); 
 9 | };
10 | 
11 | void ElementwiseLayer::calGrad(std::shared_ptr<arma::mat> delta_in){
12 | 	(*delta_outOne) = (*inputTwo) % (*delta_in);
13 | 	(*delta_outTwo) = (*inputOne) % (*delta_in);
14 | 
15 | }
16 | 
17 | void ElementwiseLayer::calGrad(std::shared_ptr<arma::mat> delta_in, int timePoint){
18 | 	(*delta_outOne) = (*inputTwoMem[timePoint]) % (*delta_in);
19 | 	(*delta_outTwo) = (*inputOneMem[timePoint]) % (*delta_in);
20 | 
21 | }
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/src/ElmanRL.cpp:
--------------------------------------------------------------------------------
 1 | #include "ElmanRL.h"
 2 | 
 3 | using namespace NeuralNet;
 4 | using namespace DeepLearning;
 5 | 
 6 | ElmanRL::ElmanRL(NeuralNetParameter neuralNetPara0):RNN(neuralNetPara0){
 7 | }
 8 | 
 9 | arma::mat ElmanRL::forwardInTime(std::shared_ptr<arma::mat> input) {
10 |     std::shared_ptr<arma::mat> commonInput(new arma::mat);
11 |     if (this->time == 0) {
12 |         for (int l = 0; l < numRecurrLayers; l++) {
13 |             (recurrLayers[l].getPrevOutput())->zeros(recurrLayerOutputDim, 1);
14 |             recurrLayers[l].inputOneMem.clear();
15 |             recurrLayers[l].inputTwoMem.clear();
16 |             recurrLayers[l].outputMem.clear();
17 |         }
18 |         for (int l = 0; l < numBaseLayers; l++) {
19 |         	baseLayers[l].inputMem.clear();
20 |         	baseLayers[l].outputMem.clear();
21 |     	}
22 |     }
23 |     for (int l = 0; l < numRecurrLayers; l++) {
24 |     	recurrLayers[l].inputOne = std::shared_ptr<arma::mat>(new arma::mat(*(recurrLayers[l].getPrevOutput())));
25 |   
26 |         if (l == 0) {
27 |         	
28 | 			recurrLayers[l].inputTwo = std::shared_ptr<arma::mat>(new arma::mat(input->rows(0, rnnInputDim - 1)));
29 |         } else {
30 | 			recurrLayers[l].inputTwo = std::shared_ptr<arma::mat>(new arma::mat(*(recurrLayers[l - 1].output)));
31 |         }
32 |         recurrLayers[l].activateUp();
33 | #if 0
34 |         recurrLayers[l].W_one->print("W_one");
35 |         recurrLayers[l].W_two->print("W_two");
36 |         recurrLayers[l].B->print("B");
37 |         recurrLayers[l].inputOne->print("input one");
38 |         recurrLayers[l].inputTwo->print("input two");
39 |         recurrLayers[l].output->print("output");
40 | #endif        
41 |     }
42 |     	
43 |     for (int l = 0; l < numBaseLayers; l++) {
44 |        	if (l == 0) {
45 |        		arma::mat action(input->rows(rnnInputDim, input->n_rows - 1));
46 |                 *commonInput = arma::join_cols(*(recurrLayers[numRecurrLayers-1].output), action);
47 |     		baseLayers[l].input = commonInput;   	
48 |        	} else {
49 |        		baseLayers[l].input = baseLayers[l - 1].output;
50 |        	}
51 |        	baseLayers[l].activateUp();
52 |     }
53 |     return *(baseLayers[numBaseLayers - 1].output);
54 | }
55 |  
56 | void ElmanRL::backward() {
57 |    
58 |     std::shared_ptr<arma::mat> delta(new arma::mat);
59 |     for (int l = 0; l < numRecurrLayers; l++) {
60 |         recurrLayers[l].clearAccuGrad();
61 |     }
62 |     for (int l = 0; l < numBaseLayers; l++) {
63 |         baseLayers[l].clearAccuGrad();
64 |     }      
65 |     int T = trainingY->n_cols;
66 |     for (int t = T - 1; t >= 0; t--){
67 |     // the top most layer from target - network's output
68 |         *delta = *(baseLayers[numBaseLayers - 1].outputMem[t]) - trainingY->col(t);    	
69 |     	for (int l = numBaseLayers - 1; l >=0; l--) {
70 |     		baseLayers[l].accumulateGrad(delta, t);
71 |     		*delta = *(baseLayers[l].delta_out);
72 |     	}
73 |         for (int l = numRecurrLayers - 1; l >= 0; l--){
74 |             // delta error from the same time, propagate from upper layer to lower layer 
75 |             if (l == numRecurrLayers - 1){ 
76 |                 *delta = baseLayers[0].delta_out->rows(0, recurrLayerOutputDim - 1); 
77 |             }else{ 
78 |                 *delta = *(recurrLayers[l+1].delta_outTwo);                
79 |             }   
80 |             
81 |             if (t < T - 1) {
82 |                *delta += *(recurrLayers[l].getPrevDeltaOutOne());
83 |             }              
84 |             // so far, the generated delta error is for the output h of each layer at each time
85 |             recurrLayers[l].accumulateGrad(delta, t);
86 |             recurrLayers[l].savePrevDeltaOutOne();
87 |         }
88 | 	}  
89 | }
90 | 
91 | 


--------------------------------------------------------------------------------
/src/LinearAdditionLayer.cpp:
--------------------------------------------------------------------------------
 1 | #include "LinearAdditionLayer.h"
 2 | 
 3 | namespace NeuralNet{
 4 | 
 5 | 
 6 | void LinearAdditionLayer::activateUp(){
 7 |      (*output) = (*inputOne) + (*inputTwo); 
 8 | };
 9 | 
10 | void LinearAdditionLayer::calGrad(std::shared_ptr<arma::mat> delta_in){
11 | 	delta_outOne = delta_in;
12 | 	delta_outTwo = delta_in;
13 | }
14 | 
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/src/MultiAddLayer.cpp:
--------------------------------------------------------------------------------
 1 | #include "MultiAddLayer.h"
 2 | 
 3 | using namespace NeuralNet;
 4 | 
 5 | MultiAddLayer::MultiAddLayer(int in1, int in2, int out, ActivationType actType0, std::shared_ptr<Initializer> init_W_one, std::shared_ptr<Initializer> init_W_two,
 6 |         std::shared_ptr<Initializer> init_B):
 7 |         Layer_binaryOp(in1,in2,out),actType(actType0),
 8 |         initializer_W_one(init_W_one), initializer_W_two(init_W_two),
 9 |         initializer_B(init_B){   
10 |     initializeWeight();
11 |     grad_W_one = std::make_shared<arma::mat>();
12 |     grad_W_two = std::make_shared<arma::mat>();
13 |     grad_B = std::make_shared<arma::mat>();
14 |     grad_W_one_accu = std::make_shared<arma::mat>(outputDim,inputOneDim, arma::fill::zeros);
15 |     grad_W_two_accu = std::make_shared<arma::mat>(outputDim,inputTwoDim, arma::fill::zeros);
16 |     grad_B_accu = std::make_shared<arma::mat>(outputDim,1, arma::fill::zeros);
17 |     delta_outOne = std::make_shared<arma::mat>();
18 |     delta_outTwo = std::make_shared<arma::mat>();
19 |     output = std::make_shared<arma::mat>();
20 | 
21 | }
22 | 
23 | void MultiAddLayer::activateUp(){
24 |     *output = (*W_one) * (*inputOne) + (*W_two) * (*inputTwo);
25 |     for (int i = 0; i < output->n_cols; i++) output->col(i) += *B;
26 |     ApplyActivation(output, actType);
27 | }
28 | 
29 | void MultiAddLayer::initializeWeight(){
30 | 
31 |     W_one = std::make_shared<arma::mat>(outputDim, inputOneDim);
32 |     W_two = std::make_shared<arma::mat>(outputDim, inputTwoDim);
33 |     B = std::make_shared<arma::mat>(outputDim, 1);
34 | 
35 |     if (initializer_W_one == nullptr || initializer_W_two == nullptr ||initializer_B == nullptr) {
36 |         std::cerr << "initializer is null!" << std::endl;
37 |         exit(1);
38 |     } else {
39 |         initializer_W_one->applyInitialization(W_one);
40 |         initializer_W_two->applyInitialization(W_two);
41 |         initializer_B->applyInitialization(B);
42 |     }
43 | }
44 | 
45 | void MultiAddLayer::calGrad(std::shared_ptr<arma::mat> delta_in){
46 |     //for delta: each column is the delta of a sample
47 |     std::shared_ptr<arma::mat> deriv(new arma::mat);
48 |     GetActivationGradient(output, deriv, this->actType);
49 |     arma::mat delta;
50 |  
51 |     delta = (*delta_in) % (*deriv);
52 |     *grad_B = arma::sum(delta,1);
53 |     *grad_W_one = delta * (*inputOne).st();
54 |     *grad_W_two = delta * (*inputTwo).st();
55 | 
56 |     (*delta_outOne) = W_one->st() * (delta);
57 |     (*delta_outTwo) = W_two->st() * (delta);
58 | }
59 | 
60 | void MultiAddLayer::calGrad(std::shared_ptr<arma::mat> delta_in, int t){
61 |     std::shared_ptr<arma::mat> deriv(new arma::mat);
62 |     GetActivationGradient(outputMem[t], deriv, this->actType);
63 |     arma::mat delta;
64 |  
65 |     delta = (*delta_in) % (*deriv);
66 |     *grad_B = arma::sum(delta,1);
67 |     *grad_W_one = delta * (*inputOneMem[t]).st();
68 |     *grad_W_two = delta * (*inputTwoMem[t]).st();
69 | 
70 |     (*delta_outOne) = W_one->st() * (delta);
71 |     (*delta_outTwo) = W_two->st() * (delta);
72 | }
73 | 
74 | void MultiAddLayer::save(std::string filename) {
75 |     W_one->save(filename+"_W_one.dat",arma::raw_ascii);
76 |     W_two->save(filename+"_W_two.dat",arma::raw_ascii);
77 |     B->save(filename+"_B.dat",arma::raw_ascii);
78 | }
79 | void MultiAddLayer::load(std::string filename) {
80 |     W_one->load(filename+"_W_one.dat",arma::raw_ascii);
81 |     W_two->load(filename+"_W_two.dat",arma::raw_ascii);
82 |     B->load(filename+"_B.dat",arma::raw_ascii);
83 | }
84 | 
85 | void MultiAddLayer::accumulateGrad(std::shared_ptr<arma::mat> delta_in, int t) {
86 |     calGrad(delta_in, t);
87 |     *grad_B_accu += *grad_B;
88 |     *grad_W_one_accu += *grad_W_one;
89 |     *grad_W_two_accu += *grad_W_two;
90 | }
91 | 
92 | void MultiAddLayer::clearAccuGrad(){
93 |     (*grad_B_accu).zeros();
94 |     (*grad_W_one_accu).zeros();
95 |     (*grad_W_two_accu).zeros();
96 | }


--------------------------------------------------------------------------------
/src/Proto/DeepLearning.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto2";
 2 | package DeepLearning;
 3 | message NeuralNetParameter {
 4 | 	optional string name = 1;
 5 | 	optional string type = 2;
 6 | 	enum ActivationType {sigmoid = 1; tanh = 2; linear = 3; softmax = 4; ReLU = 5;}
 7 | 	repeated LayerStructParameter layerStruct = 100;
 8 | 	optional NeuralNetTrainingParameter neuralNetTrainingParameter= 101;
 9 | 	optional RNNStructParameter rnnStruct = 102;
10 | 	
11 | }
12 | 
13 | message ReinforcementLearningParameter{
14 | 	optional QLearningSolverParameter qLearningSolverParameter= 100;
15 | }
16 | 
17 | message LayerStructParameter {
18 | 	optional int32 inputDim = 1;
19 | 	optional int32 outputDim = 2;
20 | 	optional NeuralNetParameter.ActivationType activationType = 3;
21 | 	optional string name = 4;
22 | 	optional string type = 5;
23 | 	optional NeuralNetInitializerParameter init_W = 103;
24 | 	optional NeuralNetInitializerParameter init_B = 104;
25 | 	optional NeuralNetInitializerParameter init_W_one = 105;
26 | 	optional NeuralNetInitializerParameter init_W_two = 106;	
27 | }
28 | 
29 | message RNNStructParameter{
30 | 	optional int32 numRecurrLayers = 1;
31 | 	optional int32 recurrLayerInputDim = 2;
32 | 	optional int32 recurrLayerOutputDim = 3;
33 | 	optional int32 inputDim = 4;
34 | 	optional NeuralNetParameter.ActivationType activationType = 6;
35 | 	optional NeuralNetInitializerParameter init_W_one = 103;
36 | 	optional NeuralNetInitializerParameter init_B = 104;
37 | 	optional NeuralNetInitializerParameter init_W_two = 105;
38 | }
39 | 
40 | message NeuralNetInitializerParameter{
41 | 	enum InitializerType {custom = 1; identity = 2; zero = 3; normal = 4; glorot_uniform = 5; IRNN = 6; orthogonal = 7;}
42 | 	optional double normal_std = 1;
43 | 	optional double normal_mean = 2;
44 | 	optional InitializerType initializerType = 3;
45 | }
46 | 
47 | message NeuralNetTrainingParameter {
48 |  enum TrainerType {SGD = 1; RMSProp = 2; SGDRNN=3;}
49 | 	optional double learningRate = 1;
50 | 	optional int32 maxIter = 2;
51 | 	optional int32 miniBatchSize = 3;
52 | 	optional int32 NEpoch = 4;
53 | 	optional double epi = 5 [default = 1e-6];
54 | 	optional TrainerType trainerType = 6 [default = SGD];
55 |  	optional double decayRate = 7 [default = 10];
56 | 	optional double momentum = 8 [default = 0.9];
57 | 	optional bool verbose = 101 [default = true];
58 | 	optional int32 printInfoFrequency = 10 [default = 1];
59 | 	optional bool clipFlag = 11 [default = false];
60 | 	optional double clipThreshold = 12 [default = 1];
61 | 	optional double RMSProp_rho = 13 [default = 0.9];
62 | 	optional bool showGradNorm = 102 [default = false];
63 | 	optional bool RNNScanFlag = 103 [default = false];
64 | 	optional int32 RNNScanStep = 104 [default = 1];
65 | 	optional int32 RNNTruncateLength = 105 [default = 10];
66 | }
67 | 
68 | message QLearningSolverParameter{
69 | 	optional int32 numTrainingEpisodes = 1;
70 | 	optional double learningRate = 2 [default = 0.1];
71 | 	optional double epsilon = 3 [default = 0.95];
72 | 	optional int32 EpisodeLength = 4;
73 | 	optional double discount = 5 [default = 0.95];
74 | 	optional int32 numEpisodesBeforeTraining = 6;
75 | 	optional int32 QTableOutputInterval = 7;
76 | 	optional int32 controlInterval = 8 [default = 1];
77 | 
78 | 
79 | 
80 | }
81 | 


--------------------------------------------------------------------------------
/src/Proto/Makefile:
--------------------------------------------------------------------------------
 1 | CC = g++
 2 | CFLAGS = -std=c++0x
 3 | LFLAGS = -L/usr/local/lib -lprotobuf
 4 | #all:vector_ser
 5 | 
 6 | test: test.o DeepLearning.pb.o
 7 | 	$(CC) -o $@ $^ $(LFLAGS)
 8 | 
 9 | %.o : %.cpp
10 | 	$(CC) -c $(CFLAGS) $^
11 | 
12 | %.o : %.cc
13 | 	$(CC) -c $(CFLAGS) $^
14 | 
15 | #vector_ser.o:vector_ser.cpp
16 | #	$(CC) -c $(CFLAGS) vector_ser.cpp
17 | 
18 | 
19 | clean:
20 | 	rm *.o test
21 | 


--------------------------------------------------------------------------------
/src/Proto/generateFile.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | protoc DeepLearning.proto --cpp_out=.
4 | mv DeepLearning.pb.h ../../include
5 | mv DeepLearning.pb.cc ../
6 | 


--------------------------------------------------------------------------------
/src/Proto/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/Proto/test


--------------------------------------------------------------------------------
/src/RecurrLayer.cpp:
--------------------------------------------------------------------------------
 1 | #include "RecurrLayer.h"
 2 | 
 3 | using namespace NeuralNet;
 4 | 
 5 | RecurrLayer::RecurrLayer(int in1, int in2, int out, ActivationType actType0, std::shared_ptr<Initializer> init_W_one, std::shared_ptr<Initializer> init_W_two,
 6 |         std::shared_ptr<Initializer> init_B):
 7 |         MultiAddLayer(in1,in2,out,actType0,init_W_one,init_W_two,init_B){
 8 |           
 9 | 	output_prev = std::make_shared<arma::mat>(); 
10 | 	delta_outOne_prev = std::make_shared<arma::mat>();
11 | }
12 | 
13 | void RecurrLayer::savePrevOutput(){
14 | 	*output_prev = *output;
15 | }
16 | void RecurrLayer::savePrevDeltaOutOne(){
17 | 	*delta_outOne_prev = *delta_outOne;
18 | }
19 | 


--------------------------------------------------------------------------------
/src/SteepDescent.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include "optimization.h"
 3 | #include <armadillo>
 4 | 
 5 | using namespace Optimization;
 6 | 
 7 | SteepDescent::SteepDescent(ObjectFunc &func, SteepDescent_param param0):
 8 |                             calValGrad(func), param(param0){
 9 | 
10 |     maxIter = param.maxIter;
11 |     eps = param.eps;
12 |     step = param.step;
13 |     
14 |     x.randn(calValGrad.dim);
15 |     
16 |     currValue = calValGrad(x, grad);
17 | }
18 | 
19 | 
20 | void SteepDescent::minimize() {
21 | //    arma::vec Grad(inputDim);
22 |     int iter = 0;
23 | //    if( !quiet ) {
24 |         std::cout << "Gradient Descent Starts !" << std::endl;
25 |         std::cout << "maxIter:" << maxIter << std::endl;
26 | //        std::cout << "alpha:" << alpha << std::endl;
27 | //    }
28 |     while( iter < maxIter) {
29 | 
30 |         x_new = x - step * grad;
31 |         double currValue = calValGrad(x_new,grad_new);
32 | //        if( !quiet ) {
33 |             std::cout << "iter:" << iter << "\t" ;
34 |             double gradNorm =  arma::norm(grad_new);
35 |             std::cout << "current gradient norm is:" << gradNorm << std::endl;
36 |             std::cout << "current value is:" << currValue << std::endl; 
37 | //        }
38 |         x = x_new;
39 |         grad = grad_new;
40 | 
41 |         if ( gradNorm < eps) break;
42 |         iter++;
43 | 
44 |     }
45 | 
46 | 
47 | }
48 | 
49 | /*
50 | bool GradDescent::converged() {
51 |     arma::vec diff;
52 |     diff = newX - oldX;
53 |     return arma::norm(diff) < eps;
54 | }
55 |  */
56 | 
57 | //GradDescent::~GradDescent(){}


--------------------------------------------------------------------------------
/src/Util.cpp:
--------------------------------------------------------------------------------
 1 | #include "Util.h"
 2 | 
 3 | namespace NeuralNet{
 4 | void loadData_MNIST(std::shared_ptr<arma::mat> X, std::shared_ptr<arma::mat> Y, std::string filename0){
 5 | 
 6 |     std::string filename_base(filename0);
 7 |     std::string filename;
 8 |     char tag[50];
 9 |     char x;
10 |     int count;
11 |     int numFiles = 10;
12 |     int featSize = 28*28;
13 |     int labelSize = 10;
14 |     int numSamples = 1000;
15 |     X->set_size(featSize, numFiles*numSamples);
16 |     Y->set_size(labelSize, numFiles*numSamples);
17 |     Y->fill(0);
18 | 
19 | 
20 |     for (int i = 0 ; i < numFiles ; i++) {
21 |         sprintf(tag,"%d",i);
22 |         filename=filename_base+(std::string)tag;
23 |         std::cout << filename << std::endl;
24 |         std::ifstream infile;
25 |         infile.open(filename,std::ios::binary | std::ios::in);
26 |         if (infile.is_open()) {
27 | 
28 |             for (int j = 0 ; j < numSamples ; j++) {
29 | 
30 |                 for (int k =0 ; k <featSize; k ++) {
31 |                     infile.read(&x,1);
32 | //        std::cout << x << std::endl;
33 |                     (*X)(k, i+numFiles*j)=((unsigned char)x)/256.0;
34 | 
35 |                 }
36 |                 (*Y)(i, i+numFiles*j) = 1;
37 | //        count++;
38 |             }
39 | 
40 |         } else {
41 |             std::cout << "open file failure!" << std::endl;
42 |         }
43 | 
44 |         std::cout << "dataloading finish!" <<std::endl;
45 | 
46 |     }
47 | 
48 | }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/io.cpp:
--------------------------------------------------------------------------------
 1 | #include <fcntl.h>
 2 | #include <fstream>
 3 | #include <string>
 4 | #include <stdint.h>
 5 | #include <iostream>
 6 | 
 7 | #include <google/protobuf/io/coded_stream.h>
 8 | #include <google/protobuf/io/zero_copy_stream_impl.h>
 9 | #include <google/protobuf/text_format.h>
10 | #include <google/protobuf/message.h>
11 | #include "DeepLearning.pb.h"
12 | 
13 | using google::protobuf::io::FileInputStream;
14 | using google::protobuf::io::FileOutputStream;
15 | using google::protobuf::io::ZeroCopyInputStream;
16 | using google::protobuf::io::CodedInputStream;
17 | using google::protobuf::io::ZeroCopyOutputStream;
18 | using google::protobuf::io::CodedOutputStream;
19 | using google::protobuf::Message;
20 | 
21 | bool ReadProtoFromTextFile(const char* filename, Message* proto) {
22 |   int fd = open(filename, O_RDONLY);
23 | //  CHECK_NE(fd, -1) << "File not found: " << filename;
24 |   FileInputStream* input = new FileInputStream(fd);
25 |   bool success = google::protobuf::TextFormat::Parse(input, proto);
26 |   delete input;
27 | //  close(fd);
28 |   return success;
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/src/test/BaseLayer/Makefile:
--------------------------------------------------------------------------------
 1 | include ../Makefile.common
 2 | 
 3 | test_BaseLayer: test_BaseLayer.o
 4 | 	$(CXX) -o $@ $^ $(LDFLAG)
 5 | 
 6 | %.o:%.cpp
 7 | 	$(CXX) -c $(CXXFLAGS) $^
 8 | 
 9 | 
10 | clean:
11 | 	rm test_BaseLayer *.o
12 | 


--------------------------------------------------------------------------------
/src/test/BaseLayer/test_BaseLayer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/BaseLayer/test_BaseLayer


--------------------------------------------------------------------------------
/src/test/BaseLayer/test_BaseLayer.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <fstream>
 3 | #include <string>
 4 | #include <cstdlib>
 5 | #include <cstdio>
 6 | #include <memory>
 7 | #include <armadillo>
 8 | #include "BaseLayer.h"
 9 | #include "gtest/gtest.h"
10 | using namespace NeuralNet;
11 | 
12 | 
13 | TEST(BaseLayerTest, fillBernoulli){
14 | 
15 |     BaseLayer layer(100,10,BaseLayer::sigmoid,true,0.5);
16 |     EXPECT_EQ(layer.dropOutRate,0.5);
17 | //    EXPECT_TRUE(layer.dropOutFlag);
18 |     layer.B.print();
19 |     layer.fill_Bernoulli(layer.B.memptr(),layer.B_size);
20 |     layer.B.print();
21 |     
22 | }
23 |  
24 | 
25 | 
26 | int main(int argc, char *argv[]) {
27 |     std::shared_ptr<arma::mat> trainDataX(new arma::mat);
28 |     std::shared_ptr<arma::mat> trainDataY(new arma::mat);
29 |     testing::InitGoogleTest(&argc, argv);
30 |     return RUN_ALL_TESTS();
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/src/test/ElmanRL/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | include ../Makefile.common
 3 | 
 4 | OBJ = test_ElmanRL.o Trainer.o
 5 | 
 6 | test : $(OBJ)
 7 | 	$(CXX) -o $@ $(OBJ) $(LDFLAG)
 8 | Trainer.o : ../Trainer/Trainer.cpp
 9 | 
10 | 	$(CXX) -c $(CXXFLAGS) $^
11 | clean:
12 | 	rm -f *.o *~
13 | 


--------------------------------------------------------------------------------
/src/test/ElmanRL/RLtest.prototxt:
--------------------------------------------------------------------------------
 1 | rnnStruct{
 2 | 	numRecurrLayers: 2
 3 | 	recurrLayerInputDim: 8
 4 | 	recurrLayerOutputDim: 8
 5 | 	inputDim: 2
 6 |         activationType: tanh
 7 |         init_W_one{
 8 |             initializerType: normal
 9 |             normal_std: 0.2
10 |             normal_mean:0
11 |         }
12 |         init_B {
13 |             initializerType: zero
14 |         }
15 |         init_W_two{
16 |             initializerType: glorot_uniform
17 |         }
18 | }
19 | 
20 | layerStruct{
21 | 	name: "BaseLayer2"
22 | 	inputDim: 9
23 | 	outputDim: 1
24 | 	activationType: linear
25 |         init_W{
26 |             initializerType: glorot_uniform
27 |         }
28 |         init_B {
29 |             initializerType: zero
30 |         }
31 | }
32 | 
33 | neuralNetTrainingParameter{
34 |         trainerType: RMSProp
35 | 	learningRate: 0.2
36 | 	miniBatchSize: 100
37 | 	NEpoch: 5000
38 |         momentum: 0.9
39 |         decayRate: 2500
40 |         showGradNorm: false
41 |         RNNScanFlag: false
42 |         RNNScanStep: 5
43 |         RNNTruncateLength: 10
44 | }
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/src/test/ElmanRL/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/ElmanRL/test


--------------------------------------------------------------------------------
/src/test/GRNN/GRNN.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <memory>
 3 | #include <armadillo>
 4 | #include <iostream>
 5 | #include <vector>
 6 | #include "BaseLayer.h"
 7 | #include "MultiAddLayer.h"
 8 | #include "ElementMultiAddLayer.h"
 9 | #include "common.h"
10 | namespace NeuralNet {
11 | 
12 |     class RNN: public Net {
13 |         
14 |     public:
15 |         RNN(int numHiddenLayers0, int hiddenLayerInputDim0,
16 |         int hiddenLayerOutputDim0, int inputDim0, int outputDim0, 
17 |         std::shared_ptr<arma::mat> trainingX0, std::shared_ptr<arma::mat> trainingY0);
18 |         RNN(DeepLearning::NeuralNetParameter);
19 |         
20 |         void backward();
21 |         void updatePara();
22 |         void train();
23 |         void test();
24 |         void calNumericGrad();
25 |         
26 |         // implementing methods required by Net interface
27 |         virtual void forward();
28 |         virtual void setTrainingSamples(std::shared_ptr<arma::mat> X, std::shared_ptr<arma::mat> Y);
29 |         virtual void applyUpdates(std::vector<std::shared_ptr<arma::mat>>);
30 |         virtual void calGradient();
31 |         virtual std::vector<std::shared_ptr<arma::mat>> netGradients();
32 |         virtual double getLoss();
33 |         virtual void save(std::string filename);
34 |         virtual void load(std::string filename);
35 |         virtual std::shared_ptr<arma::mat> netOutput();
36 |         virtual std::shared_ptr<arma::mat> netOutputAtTime(int time);
37 |         virtual arma::mat forwardInTime(std::shared_ptr<arma::mat> x);
38 |         virtual int getTime();
39 |         virtual void setTime(int t);
40 |         virtual void updateInternalState();
41 |         virtual void saveLayerInputOutput();
42 |         std::shared_ptr<BaseLayer> getOutputLayer(){return netOutputLayer;}
43 |     private:
44 |         void fillNetGradVector();
45 |         DeepLearning::NeuralNetParameter neuralNetPara;
46 |         double learningRate = 0.1;
47 |         /* network gradients*/
48 |         std::vector<std::shared_ptr<arma::mat>> netGradVector;
49 |         std::shared_ptr<arma::mat> netOutput_;
50 |         std::vector<MultiAddLayer> hiddenStateLayers, updateGateLayers, resetGateLayers;
51 |         std::vector<ElementMultiAddLayer> hiddenOutputLayers;
52 |         std::shared_ptr<BaseLayer> netOutputLayer;
53 |         std::shared_ptr<arma::mat> trainingY, trainingX;
54 |         std::vector<std::shared_ptr<arma::mat>> outputLayers_prev_output;
55 |         int numHiddenLayers, hiddenLayerInputDim, hiddenLayerOutputDim;
56 |         int rnnInputDim, rnnOutputDim;
57 |         int time;
58 |     };
59 | }
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/src/test/GRNN/Makefile:
--------------------------------------------------------------------------------
 1 | include ../Makefile.common
 2 | 
 3 | test_MALayer: test_MALayer.o MultiAddLayer.o
 4 | 	$(CXX) -o $@ $^ $(LDFLAG)
 5 | %.o:%.cpp
 6 | 	$(CXX) -c $(CXXFLAGS) $^
 7 | 
 8 | 
 9 | clean:
10 | 	rm *.o
11 | 


--------------------------------------------------------------------------------
/src/test/IO/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | include ../Makefile.common
 3 | 
 4 | 
 5 | 
 6 | OBJ = test_IO.o
 7 | 
 8 | test_IO : $(OBJ)
 9 | 	$(CXX) -o test $(OBJ) $(LDFLAG) 
10 | 
11 | clean:
12 | 	rm -f *.o *~
13 | 


--------------------------------------------------------------------------------
/src/test/IO/net.prototxt:
--------------------------------------------------------------------------------
 1 | layerStruct{
 2 | 	name: "BaseLayer1"
 3 | 	inputDim: 10
 4 | 	outputDim: 20
 5 | 	activationType: sigmoid
 6 | }
 7 | 
 8 | layerStruct{
 9 | 	name: "BaseLayer2"
10 | 	inputDim: 10
11 | 	outputDim: 20
12 | 	activationType: sigmoid
13 | }
14 | 
15 | neuralNetTrainingParameter{
16 | 	learningRate: 0.1
17 | 	miniBatchSize: 10
18 | 	NEpoch: 200
19 | }
20 | 
21 | rnnStruct{
22 | 	numHiddenLayers: 1
23 | 	hiddenLayerInputDim: 2
24 | 	hiddenLayerOutputDim: 6
25 | 	inputDim: 4
26 | 	outputDim: 5
27 | }
28 | 


--------------------------------------------------------------------------------
/src/test/IO/qsolver.prototxt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | qLearningSolverParameter{
 4 | 	numTrainingEpisodes: 300
 5 | 	epsilon: 0.95;
 6 | 	EpisodeLength: 150
 7 | 	discount: 0.95
 8 | }
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/src/test/IO/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/IO/test


--------------------------------------------------------------------------------
/src/test/IO/test_IO.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include "common.h"
 3 | using namespace DeepLearning;
 4 | int main(int argc, char *argv[]){
 5 | 
 6 | 	NeuralNetParameter message;
 7 | 	RNNStructParameter submessage;
 8 | 	if (argc == 2){
 9 | 
10 | 		ReadProtoFromTextFile(argv[1], &message);
11 | 	std::cout << message.layerstruct_size() << std::endl;
12 | 	for (int i = 0 ; i < message.layerstruct_size(); i++ ){
13 | 		if (message.layerstruct(i).has_name()) 
14 | 			std::cout << message.layerstruct(i).name() << std::endl;	
15 | 		if (message.layerstruct(i).has_activationtype()){
16 | 			std::cout << message.layerstruct(i).activationtype() << std::endl;
17 | 			if( message.layerstruct(i).activationtype() == LayerStructParameter_ActivationType_sigmoid)
18 | 				std::cout << "good" << std::endl;	
19 | 		}
20 | 	}
21 | 
22 | 	std::cout << message.neuralnettrainingparameter().learningrate() << std::endl;
23 | 	std::cout << message.neuralnettrainingparameter().minibatchsize()<< std::endl;
24 | 	std::cout << message.neuralnettrainingparameter().nepoch() << std::endl;
25 | 	std::cout << message.neuralnettrainingparameter().epi() << std::endl;
26 | 	std::cout << message.neuralnettrainingparameter().trainertype() << std::endl;
27 | 
28 | 	std::cout << std::endl;
29 | 	
30 | 	std::cout << "test Kai message" << std::endl;
31 | 	std::cout << message.rnnstruct().numhiddenlayers() << std::endl;
32 | 	std::cout << message.rnnstruct().hiddenlayeroutputdim() << std::endl;
33 | 
34 | 	submessage = message.rnnstruct();
35 | 
36 | 	std::cout << "test sub message" << std::endl;
37 | 	std::cout << submessage.numhiddenlayers() << std::endl;
38 | 	std::cout << submessage.hiddenlayeroutputdim() << std::endl;	
39 | 
40 | 
41 | 	}
42 | 
43 | 	return 0;
44 | }
45 | 


--------------------------------------------------------------------------------
/src/test/IOtest/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | include ../Makefile.common
 3 | 
 4 | 
 5 | 
 6 | OBJ = test_IO.o
 7 | 
 8 | test_IO : $(OBJ)
 9 | 	$(CXX) -o test $(OBJ) $(LDFLAG) 
10 | 
11 | clean:
12 | 	rm -f *.o *~
13 | 


--------------------------------------------------------------------------------
/src/test/IOtest/net.prototxt:
--------------------------------------------------------------------------------
 1 | layerStruct{
 2 | 	name: "BaseLayer1"
 3 | 	inputDim: 10
 4 | 	outputDim: 20
 5 | 	activationType: sigmoid
 6 | }
 7 | 
 8 | layerStruct{
 9 | 	name: "BaseLayer2"
10 | 	inputDim: 10
11 | 	outputDim: 20
12 | 	activationType: sigmoid
13 | }
14 | 
15 | neuralNetTrainingParameter{
16 | 	learningRate: 0.1
17 | 	miniBatchSize: 10
18 | 	NEpoch: 200
19 | }
20 | 
21 | rnnStruct{
22 | 	numHiddenLayers: 1
23 | 	hiddenLayerInputDim: 2
24 | 	hiddenLayerOutputDim: 6
25 | 	inputDim: 4
26 | 	outputDim: 5
27 | }
28 | 


--------------------------------------------------------------------------------
/src/test/Makefile.common:
--------------------------------------------------------------------------------
 1 | CC = gcc
 2 | CXX = g++
 3 | 
 4 | HOME=/home/yuguangyang/
 5 | ARMA_INCLUDE=-I$(HOME)Downloads/armadillo-5.100.2/include
 6 | DEEPLEARNING_INCLUDE=-I../../../include
 7 | GTEST_INCLUDE=-I$(HOME)workspace/libs/gtest-1.7.0/include
 8 | BOOST_INCLUDE=-I/opt/boost/boost_1_57_0
 9 | PROTO_INCLUDE=-I/usr/local/include
10 | 
11 | GTEST_PATH=-L$(HOME)workspace/libs/gtest-1.7.0/mybuilds
12 | DEEPLEARNING_PATH=-L../../lib
13 | PROTO_PATH=-L/usr/local/lib
14 | 
15 | DEBUGFLAG=-DDEBUG -g3
16 | RELEASEFLAG= -O3 -march=native -DARMA_NO_DEBUG
17 | CXXFLAGS=  -std=c++0x $(ARMA_INCLUDE) $(DEEPLEARNING_INCLUDE) $(GTEST_INCLUDE) $(BOOST_INCLUDE) $(PROTO_INCLUDE) -D__LINUX -DARMA_DONT_USE_WRAAPER 
18 | #CXXFLAGS += $(DEBUGFLAG)
19 | #CXXFLAGS += $(RELEASEFLAG)
20 | LINKOPTFLAGS= -O3 -flto=4 -fwhole-program
21 | LDFLAG=$(DEEPLEARNING_PATH) $(GTEST_PATH) $(PROTO_PATH) -L/opt/OpenBLAS/lib -ldeeplearning -llapack -lopenblas  -lprotobuf -pthread 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/src/test/MultiLayerPerceptron/Makefile:
--------------------------------------------------------------------------------
 1 | include ../Makefile.common
 2 | 
 3 | test_mlp: test_funcApprox.o Trainer.o
 4 | 	$(CXX) -o $@ $^ $(LDFLAG)
 5 | %.o:%.cpp
 6 | 	$(CXX) -c $(CXXFLAGS) $^
 7 | Trainer.o:../Trainer/Trainer.cpp
 8 | 	$(CXX) -c $(CXXFLAGS) $^
 9 | 
10 | clean:
11 | 	rm *.o
12 | 


--------------------------------------------------------------------------------
/src/test/MultiLayerPerceptron/net.prototxt:
--------------------------------------------------------------------------------
 1 | layerStruct{
 2 | 	name: "BaseLayer1"
 3 | 	inputDim: 1
 4 | 	outputDim: 40
 5 | 	activationType: sigmoid
 6 | }
 7 | 
 8 | layerStruct{
 9 | 	name: "BaseLayer1"
10 | 	inputDim: 40
11 | 	outputDim: 20
12 | 	activationType: sigmoid
13 | }
14 | 
15 | layerStruct{
16 | 	name: "BaseLayer2"
17 | 	inputDim: 20
18 | 	outputDim: 1
19 | 	activationType: linear
20 | }
21 | 
22 | neuralNetTrainingParameter{
23 |         trainerType: RMSprop
24 | 	learningRate: 0.01
25 | 	miniBatchSize: 100
26 | 	NEpoch: 20000
27 |         decayRate: 100000
28 | }
29 | 


--------------------------------------------------------------------------------
/src/test/MultiLayerPerceptron/runningparameters/sin(5x)+exp(5x)/net.prototxt:
--------------------------------------------------------------------------------
 1 | //This is for training non-linear function y = 5*sin(x), while x is rescaled to 0 to 1
 2 | 
 3 | layerStruct{
 4 | 	name: "BaseLayer1"
 5 | 	inputDim: 1
 6 | 	outputDim: 40
 7 | 	activationType: sigmoid
 8 | }
 9 | 
10 | layerStruct{
11 | 	name: "BaseLayer1"
12 | 	inputDim: 40
13 | 	outputDim: 20
14 | 	activationType: sigmoid
15 | }
16 | 
17 | layerStruct{
18 | 	name: "BaseLayer2"
19 | 	inputDim: 20
20 | 	outputDim: 1
21 | 	activationType: linear
22 | }
23 | 
24 | neuralNetTrainingParameter{
25 | 	learningRate: 0.1
26 | 	miniBatchSize: 100
27 | 	NEpoch: 20000
28 | }
29 | 


--------------------------------------------------------------------------------
/src/test/MultiLayerPerceptron/runningparameters/sin(5x)+exp(5x)/readme:
--------------------------------------------------------------------------------
1 | //This is for training non-linear function y = sin(5x)+exp(5x), while x is rescaled to 0 to 1
2 | 
3 | 


--------------------------------------------------------------------------------
/src/test/MultiLayerPerceptron/runningparameters/sin(5x)/net.prototxt:
--------------------------------------------------------------------------------
 1 | //This is for training non-linear function y = 5*sin(x), while x is rescaled to 0 to 1
 2 | 
 3 | layerStruct{
 4 | 	name: "BaseLayer1"
 5 | 	inputDim: 1
 6 | 	outputDim: 40
 7 | 	activationType: sigmoid
 8 | }
 9 | 
10 | layerStruct{
11 | 	name: "BaseLayer1"
12 | 	inputDim: 40
13 | 	outputDim: 20
14 | 	activationType: sigmoid
15 | }
16 | 
17 | layerStruct{
18 | 	name: "BaseLayer2"
19 | 	inputDim: 20
20 | 	outputDim: 1
21 | 	activationType: linear
22 | }
23 | 
24 | neuralNetTrainingParameter{
25 | 	learningRate: 0.1
26 | 	miniBatchSize: 100
27 | 	NEpoch: 20000
28 | }
29 | 


--------------------------------------------------------------------------------
/src/test/MultiLayerPerceptron/runningparameters/sin(5x)/readme:
--------------------------------------------------------------------------------
1 | //This is for training non-linear function y = 5*sin(x), while x is rescaled to 0 to 1
2 | 
3 | 


--------------------------------------------------------------------------------
/src/test/MultiLayerPerceptron/testSimple.prototxt:
--------------------------------------------------------------------------------
 1 | layerStruct{
 2 | 	name: "BaseLayer1"
 3 | 	inputDim: 1
 4 | 	outputDim: 10
 5 | 	activationType: tanh
 6 |         init_W{ 
 7 |             initializerType: glorot_uniform
 8 |         }
 9 |         init_B{ 
10 |             initializerType: zero
11 |         }
12 | }
13 | 
14 | layerStruct{
15 | 	name: "BaseLayer1"
16 | 	inputDim: 10
17 | 	outputDim: 1
18 | 	activationType: linear
19 |         init_W{ 
20 |             initializerType: glorot_uniform
21 |         }
22 |         init_B{ 
23 |             initializerType: zero
24 |         }
25 | }
26 | 
27 | neuralNetTrainingParameter{
28 |         trainerType: RMSProp
29 | 	learningRate: 0.15
30 | 	miniBatchSize: 10
31 | 	NEpoch: 1000
32 |         decayRate: 100
33 |         momentum: 0.99
34 | }
35 | 


--------------------------------------------------------------------------------
/src/test/MultiLayerPerceptron/test_funcApprox.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | #include "MultiLayerPerceptron.h"
 3 | #include "../Trainer/Trainer.h"
 4 | using namespace NeuralNet;
 5 | using namespace DeepLearning;
 6 | 
 7 | void testComplex(char* filename);
 8 | void testSimple(char* filename);
 9 | 
10 | int main(int argc, char** argv) {
11 | 
12 |     if (argc < 2) exit(1);
13 |  
14 |     testSimple(argv[1]);
15 |       
16 |     
17 |     return 0;
18 | }
19 | 
20 | 
21 | void testSimple(char* filename){
22 |     std::shared_ptr<arma::mat> X(new arma::mat(1,10));
23 |     std::shared_ptr<arma::mat> Y(new arma::mat(1,10));
24 |     
25 |     for (int i = 0; i < X->n_elem; i++){
26 |         X->at(i) = i;
27 |     }
28 |     
29 |     double xmin = X->min();
30 |     double xmax = X->max();
31 |     X->transform([&](double x){return x/(xmax - xmin)-0.5;});
32 |     Y->ones();
33 |     *Y = (*X); 
34 |     Y->transform([](double val){return sin(val);});
35 |     
36 |     NeuralNetParameter nnpara;
37 |     ReadProtoFromTextFile(filename, &nnpara);
38 | //  nnpara.neuralnettrainingparameter().set_minibatchsize(X->n_elem);
39 |     std::shared_ptr<Net> mlp(new MultiLayerPerceptron(nnpara));
40 |     std::shared_ptr<Trainer> trainer(TrainerBuilder::GetTrainer(mlp, nnpara));
41 |     
42 |     mlp->setTrainingSamples(X,nullptr);
43 |     mlp->forward();
44 |     (mlp->netOutput())->print();
45 |     trainer->setTrainingSamples(X, Y);
46 |     trainer->train();
47 |     Y->save("target.dat",arma::raw_ascii);
48 |     mlp->netOutput()->save("trainingResult.dat",arma::raw_ascii);
49 |     Y->print();
50 |      (mlp->netOutput())->print();
51 | 
52 | }
53 | 
54 | void testComplex(char* filename){
55 |     std::shared_ptr<arma::mat> X(new arma::mat(1,100));
56 |     std::shared_ptr<arma::mat> Y(new arma::mat(1,100));
57 |     
58 |     for (int i = 0; i < X->n_elem; i++){
59 |         X->at(i) = i;
60 |     }
61 |     
62 |     double xmin = X->min();
63 |     double xmax = X->max();
64 |     X->transform([&](double x){return x/(xmax - xmin);});
65 |     Y->ones();
66 |     *Y = 5*(*X); 
67 |     Y->transform([](double val){return sin(4*val);});
68 |     
69 |     NeuralNetParameter nnpara;
70 |     ReadProtoFromTextFile(filename, &nnpara);
71 | //  nnpara.neuralnettrainingparameter().set_minibatchsize(X->n_elem);
72 |     std::shared_ptr<Net> mlp(new MultiLayerPerceptron(nnpara));
73 |     std::shared_ptr<Trainer> trainer(TrainerBuilder::GetTrainer(mlp, nnpara));
74 |     
75 |     mlp->setTrainingSamples(X,nullptr);
76 |     mlp->forward();
77 |     (mlp->netOutput())->print();
78 |     trainer->setTrainingSamples(X, Y);
79 |     trainer->train();
80 |     Y->save("target.dat",arma::raw_ascii);
81 |     mlp->netOutput()->save("trainingResult.dat",arma::raw_ascii);
82 | 
83 | 
84 | }


--------------------------------------------------------------------------------
/src/test/MultiLayerPerceptron/test_mlp.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <fstream>
 3 | #include <string>
 4 | #include <cstdlib>
 5 | #include <cstdio>
 6 | #include <memory>
 7 | #include <armadillo>
 8 | #include <vector>
 9 | #include "MultiLayerPerceptron.h"
10 | #include "optimization.h"
11 | #include "Util.h"
12 | 
13 | using namespace NeuralNet;
14 | 
15 | 
16 | int main(int argc, char *argv[]) {
17 |     if (argc < 2) exit(1);
18 |     std::shared_ptr<arma::mat> DataX(new arma::mat);
19 |     std::shared_ptr<arma::mat> DataY(new arma::mat);
20 |     std::shared_ptr<arma::mat> trainDataX(new arma::mat);
21 |     std::shared_ptr<arma::mat> trainDataY(new arma::mat);
22 |     std::shared_ptr<arma::mat> testDataX(new arma::mat);
23 |     std::shared_ptr<arma::mat> testDataY(new arma::mat);
24 |     std::shared_ptr<arma::mat> ValidationDataX(new arma::mat);
25 |     std::shared_ptr<arma::mat> ValidationDataY(new arma::mat);
26 | 
27 |     loadData_MNIST(DataX,DataY,(std::string)argv[1]);
28 | 
29 |     int ntrain =2000;
30 |     int ntest = 1000;
31 | //  now I split data into train, test, and validation
32 |     trainDataX = std::make_shared<arma::mat>(DataX->cols(0,ntrain-1));
33 |     trainDataY = std::make_shared<arma::mat>(DataY->cols(0,ntrain-1));
34 |     testDataX = std::make_shared<arma::mat>(DataX->cols(ntrain,ntrain+ntest-1));
35 |     testDataY = std::make_shared<arma::mat>(DataY->cols(ntrain,ntrain+ntest-1));
36 | 
37 | 
38 |     int inputDim = trainDataX->n_cols;
39 |     int outputDim = trainDataY->n_cols;
40 |     trainDataX->save("trainingSamples.txt",arma::raw_ascii);
41 |     TrainingPara_MLP trainingPara(1e-6,100, 10, 0.25);
42 |     trainingPara.print();
43 |     std::vector<int> dimensions = {784,100,10};
44 |     MultiLayerPerceptron mlp(2, dimensions, trainDataX, trainDataY, trainingPara);
45 |     bool LBFGS_flag = false;
46 |     if (LBFGS_flag){
47 |     MLPTrainer mlpTrainer(mlp);
48 |     Optimization::LBFGS::LBFGS_param param(100,20, 50 , "result.txt");
49 |     Optimization::LBFGS lbfgs_opt(mlpTrainer,param, Optimization::LBFGS::Wolfe);
50 |     lbfgs_opt.minimize();
51 |     } else{
52 |     mlp.train();
53 |     }
54 |     mlp.test(testDataX,testDataY);
55 | 	return 0;
56 | }
57 | 
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/src/test/MultiThreadArmaMat/Makefile:
--------------------------------------------------------------------------------
 1 | HOME=/home/yuguangyang/
 2 | ARMA_INCLUDE=-I$(HOME)Downloads/armadillo-5.100.2/include
 3 | # You may need to edit this file to reflect the type and capabilities of your system.
 4 | # The defaults are for a Linux system and may need to be changed for other systems (eg. Mac OS X).
 5 | 
 6 | 
 7 | CXX=g++
 8 | 
 9 | #CXX=CC
10 | ## When using the Sun Studio compiler
11 | 
12 | ## If you've installed Armadillo's headers manually, you may need to tell the compiler where they are.
13 | ## For example, change ../include to /usr/local/include
14 | 
15 | 
16 | LIB_FLAGS = -L/opt/OpenBLAS/lib -lopenblas -llapack 
17 | #LIB_FLAGS = -lopenblas -llapack 
18 | #LIB_FLAGS = -framework Accelerate
19 | #LIB_FLAGS = -library=sunperf
20 | 
21 | ## NOTE: on Ubuntu and Debian based systems you may need to add -lgfortran to LIB_FLAGS
22 | ## NOTE: if you're using Mac OS, use the line with -framework Accelerate 
23 | ## NOTE: if you're using the Sun Studio compiler, use the line with -library=sunperf
24 | 
25 | 
26 | OPT = -O2
27 | ## As the Armadillo library uses recursive templates, compilation times depend on the level of optimisation:
28 | ##
29 | ## -O0: quick compilation, but the resulting program will be slow
30 | ## -O1: good trade-off between compilation time and execution speed
31 | ## -O2: produces programs which have almost all possible speedups, but compilation takes longer
32 | ## -O3: enables auto vectorisation when using gcc
33 | 
34 | #OPT = -xO4 -xannotate=no
35 | ## When using the Sun Studio compiler
36 | 
37 | 
38 | #EXTRA_OPT = -fwhole-program
39 | ## Uncomment the above line if you're compiling all source files into one program in a single hit
40 | 
41 | 
42 | #DEBUG = -DARMA_EXTRA_DEBUG
43 | ## Uncomment the above line to enable low-level debugging.
44 | ## Lots of debugging information will be printed when a compiled program is run.
45 | ## Please enable this option when reporting bugs.
46 | 
47 | 
48 | #FINAL = -DARMA_NO_DEBUG
49 | ## Uncomment the above line to disable Armadillo's checks.
50 | ## Not recommended unless your code has been first thoroughly tested!
51 | 
52 | 
53 | CXXFLAGS = $(ARMA_INCLUDE) -I/opt/OpenBLAS/include $(DEBUG) $(FINAL) $(OPT) $(EXTRA_OPT) 
54 | 
55 | all: test_arma
56 | 
57 | test_arma: test_arma.cpp
58 | 	$(CXX) $(CXXFLAGS)  -o $@  $<  $(LIB_FLAGS)
59 | 
60 | 
61 | .PHONY: clean
62 | 
63 | clean:
64 | 	rm -f test_arma
65 | 
66 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/Makefile:
--------------------------------------------------------------------------------
 1 | include ../Makefile.common
 2 | 
 3 | all: test_NNRL.exe
 4 | 
 5 | OBJ = NN_RL_Driver.o Model_PoleSimple.o Model_PoleFull.o Trainer.o NN_RLSolverBase.o NN_RLSolverMLP.o NN_RLSolverMultiMLP.o NN_RLSolverRNN.o RLSolver_2DTable.o
 6 | 
 7 | test_NNRL.exe: $(OBJ)
 8 | 	$(CXX) -o $@ $^ $(LDFLAG) 
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | %.o:%.cpp
15 | 	$(CXX) -c $(CXXFLAGS) $(DEBUGFLAG) $^
16 | 
17 | Trainer.o: ../Trainer/Trainer.cpp
18 | 	$(CXX) -c $(CXXFLAGS) $^
19 | 
20 | clean:
21 | 	rm *.o *.exe
22 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/Model_PoleFull.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <fstream>
 3 | #include <sstream>
 4 | #include <math.h>
 5 | #include "Model_PoleFull.h"
 6 | using namespace ReinforcementLearning;
 7 | // this model is from paper THE POLE BALANCING PROBLEM A Benchmark Control Theory Problem
 8 | Model_PoleFull::Model_PoleFull(double dt0) {
 9 |     currState.resize(2);
10 |     prevState.resize(2);
11 |     dt = dt0;
12 |     stateDim = 2;
13 |     hiddenCurrState.resize(4);
14 |     hiddenPrevState.resize(4);
15 |     
16 |     randNoise = std::make_shared<RandomStream>(-1, 1);
17 |     numActions = 3;
18 | }
19 | 
20 | void Model_PoleFull::run(int action) {
21 |     double force;
22 |     double accer_theta;
23 |     double accer_x;
24 |     switch (action) {
25 |         case 0:
26 |             force = -10 + randNoise->nextInt();
27 |             break;
28 |         case 1:
29 |             force = 10 + randNoise->nextInt();
30 |             break;
31 |         case 2:
32 |             force = randNoise->nextInt();
33 |             break;
34 |         default:break;
35 |     }
36 |     double l = 0.5;
37 |     double massSum = 1.1;
38 |     double massRatio = 1.0 / 11.0;
39 |     double &theta = hiddenCurrState[0];
40 |     double &theta_v = hiddenCurrState[1];
41 |     double &x = hiddenCurrState[2];
42 |     double &x_v = hiddenCurrState[3];
43 |     
44 |     accer_theta = 9.8 * sin(theta) - l * massRatio * 0.5 * pow(theta_v, 2.0) * sin(2.0 * theta) / 2.0 -   cos(theta) * force / massSum;
45 |     accer_theta /= (4.0 * l / 3.0 - 0.1 * massRatio * l * cos(theta) * cos(theta));
46 |     theta += theta_v * dt;
47 |     if (theta > M_PI) theta -= 2.0*M_PI;
48 |     if (theta < -M_PI) theta += 2.0*M_PI;
49 |     theta_v += accer_theta * dt;
50 |     
51 |     accer_x = force / massSum + massRatio * l * (pow(theta_v, 2.0) * sin(theta) - accer_theta * cos(theta));
52 |     x += x_v * dt;
53 |     x_v += accer_x * dt;
54 |     
55 |     currState[0] = hiddenCurrState[0];
56 |     currState[1] = hiddenCurrState[2];
57 | }
58 | 
59 | void Model_PoleFull::createInitialState() {
60 |     this->hiddenCurrState[0] = (randNoise->nextDou() - 0.5) * 0.0 * M_PI;
61 |     this->hiddenCurrState[1] = 0.0;
62 |     this->hiddenCurrState[2] = (randNoise->nextDou() - 0.5) * 0.0;
63 |     this->hiddenCurrState[3] = 0.0;
64 |     this->currState[0] = this->hiddenCurrState[0];
65 |     this->currState[1] = this->hiddenCurrState[2];
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/Model_PoleFull.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | This model is the Inverted Pendulum problem found in the paper
 3 | "lease-squared policy iterations"
 4 | */
 5 | #pragma once
 6 | #include <string>
 7 | #include <cmath>
 8 | #include <iostream>
 9 | #include "Util.h"
10 | #include "BaseModel.h"
11 | 
12 | using namespace NeuralNet;
13 | namespace ReinforcementLearning {  
14 | class Model_PoleFull: public BaseModel{
15 | public:
16 |     Model_PoleFull(double dt0);
17 |     ~Model_PoleFull(){}
18 |     virtual void run(int action);
19 |     virtual void createInitialState();
20 | private:
21 | 
22 |     State hiddenCurrState, hiddenPrevState;
23 |     std::shared_ptr<RandomStream> randNoise;    
24 |     double dt;
25 | };
26 | }


--------------------------------------------------------------------------------
/src/test/NN-RL/Model_PoleSimple.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <fstream>
 3 | #include <sstream>
 4 | #include <math.h>
 5 | #include "Model_PoleSimple.h"
 6 | using namespace ReinforcementLearning;
 7 | // this model is from paper Lease-squares policy iteration
 8 | Model_PoleSimple::Model_PoleSimple(double dt0) {
 9 |     currState.resize(2);
10 |     prevState.resize(2);
11 |     dt = dt0;
12 |     stateDim = 2;
13 |     randNoise = std::make_shared<RandomStream>(-10, 10);
14 |     numActions = 3;
15 | }
16 | 
17 | void Model_PoleSimple::run(int action) {
18 |     double force;
19 |     double accer;
20 |     switch (action) {
21 |         case 0:
22 |             force = -50 + randNoise->nextInt();
23 |             break;
24 |         case 1:
25 |             force = 50 + randNoise->nextInt();
26 |             break;
27 |         case 2:
28 |             force = randNoise->nextInt();
29 |             break;
30 |         default:break;
31 |     }
32 |     double &theta = currState[0];
33 |     double &theta_v = currState[1];
34 |     // we assume l = 0.5
35 |     // mp/(mc+mp) = 0.1, mp = 1, mc = 9)
36 |     accer = 9.8 * sin(theta) - 0.1 * 2.0 * 0.5 * pow(theta_v, 2.0) * sin(2.0 * theta) / 2.0 - 0.1 * cos(theta) * force;
37 |     accer /= (4.0 * 0.5 / 3.0 - 0.1 * 0.5 * cos(theta) * cos(theta) );
38 |     prevState = currState;
39 |     theta += theta_v * dt;
40 |     if (theta > M_PI) theta -= 2.0*M_PI;
41 |     if (theta < -M_PI) theta += 2.0*M_PI;
42 |     theta_v += accer * dt;
43 | }
44 | 
45 | void Model_PoleSimple::createInitialState() {
46 |     this->currState[0] = (randNoise->nextDou() - 0.5) * 0.2 * M_PI;
47 |     this->currState[1] = 0.0;
48 | }
49 | 
50 | 
51 | double Model_PoleSimple::getRewards() const {
52 |     if (this->terminate()) {
53 |         return -1.0;
54 |     } else {
55 |         return 0.0;
56 |     }
57 | }
58 | 
59 | bool Model_PoleSimple::terminate() const {
60 |     return (currState[0] < -0.5 * M_PI || currState[0] > 0.5 * M_PI);
61 | }


--------------------------------------------------------------------------------
/src/test/NN-RL/Model_PoleSimple.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | This model is the Inverted Pendulum problem found in the paper
 3 | "lease-squared policy iterations"
 4 | */
 5 | #pragma once
 6 | #include <string>
 7 | #include <cmath>
 8 | #include <iostream>
 9 | #include "Util.h"
10 | #include "BaseModel.h"
11 | 
12 | using namespace NeuralNet;
13 | namespace ReinforcementLearning {  
14 | class Model_PoleSimple: public BaseModel{
15 | public:
16 |     Model_PoleSimple(double dt0);
17 |     ~Model_PoleSimple(){}
18 |     virtual void run(int action);
19 |     virtual void createInitialState();
20 |     virtual double getRewards() const;
21 |     virtual bool terminate() const;
22 | private:
23 |     std::shared_ptr<RandomStream> randNoise;    
24 |     double dt;
25 | };
26 | }


--------------------------------------------------------------------------------
/src/test/NN-RL/NN_RLSolverBase.cpp:
--------------------------------------------------------------------------------
 1 | #include "NN_RLSolverBase.h"
 2 | 
 3 | using namespace ReinforcementLearning;
 4 | using namespace NeuralNet;
 5 | using namespace DeepLearning;
 6 | NN_RLSolverBase::NN_RLSolverBase(std::shared_ptr<BaseModel> m,
 7 |                          std::shared_ptr<Net> net0, 
 8 |                          std::shared_ptr<Trainer> trainer0, 
 9 |                          int Dim, DeepLearning::QLearningSolverParameter para):
10 |                         RLSolverBase(m,Dim,para), net(net0), trainer(trainer0){
11 |     netInputDim = stateDim + 1;
12 | }
13 | 
14 | void NN_RLSolverBase::getMaxQ(const State& S, double* Q, int* action) {
15 |     double maxQ;
16 |     int a = 0;
17 |     maxQ = -std::numeric_limits<double>::max();
18 |     for (int j = 0; j < model->getNumActions(); j++) {
19 |         double tempQ = this->calQ(S, j);
20 | //        std::cout << tempQ << std::endl;
21 |         if (maxQ < tempQ) {
22 |             maxQ = tempQ;
23 |             a = j;
24 |         }
25 |     }
26 |     *Q = maxQ;
27 |     *action = a;
28 |     return;
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/NN_RLSolverBase.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <armadillo>
 3 | #include "BaseModel.h"
 4 | #include "common.h"
 5 | #include "Net.h"
 6 | #include "RLSolverBase.h"
 7 | #include "../Trainer/Trainer.h"
 8 | 
 9 | namespace ReinforcementLearning {
10 | 
11 |     class NN_RLSolverBase: public RLSolverBase {
12 |     public:
13 |         NN_RLSolverBase(std::shared_ptr<BaseModel> m,
14 |                 std::shared_ptr<NeuralNet::Net> net0,
15 |                 std::shared_ptr<NeuralNet::Trainer> trainer0, int Dim, DeepLearning::QLearningSolverParameter para);
16 |         virtual ~NN_RLSolverBase(){}
17 |         virtual void train() = 0;
18 |         virtual void test(){}
19 |         virtual void generateTrainingSample(std::shared_ptr<arma::mat> trainingSampleX, std::shared_ptr<arma::mat> trainingSampleY)=0;
20 |         virtual void generateExperience() = 0;
21 | 	virtual double calQ(const State& S, int action) const = 0;
22 | 	virtual void getMaxQ(const State& S,double* Q, int* action);
23 |         virtual double getRewards(const State& newS) const = 0;
24 |         virtual bool terminate(const State& S) const = 0;
25 |         virtual void setNormalizationConst() = 0;
26 |     protected:
27 |         int netInputDim;
28 |         std::shared_ptr<NeuralNet::Net> net;
29 |         std::shared_ptr<NeuralNet::Trainer> trainer;
30 |         State state_norm;
31 |         double action_norm;	
32 | 
33 |     };
34 | }
35 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/NN_RLSolverMLP.cpp:
--------------------------------------------------------------------------------
  1 | #include "NN_RLSolverMLP.h"
  2 | 
  3 | using namespace ReinforcementLearning;
  4 | using namespace NeuralNet;
  5 | NN_RLSolverMLP::NN_RLSolverMLP(std::shared_ptr<BaseModel> m,
  6 |                          std::shared_ptr<Net> net0, 
  7 |                          std::shared_ptr<Trainer> trainer0, 
  8 |                          int Dim, DeepLearning::QLearningSolverParameter para):
  9 |                          NN_RLSolverBase(m,net0,trainer0,Dim,para){
 10 |     this->setNormalizationConst();
 11 | }
 12 | 
 13 | void NN_RLSolverMLP::setNormalizationConst(){
 14 |     state_norm.resize(stateDim+1);
 15 |     state_norm[0] = M_PI;
 16 |     state_norm[1] = 20.0;
 17 |     state_norm[2] = model->getNumActions()-1;
 18 | }
 19 | 
 20 | double NN_RLSolverMLP::calQ(const State& S, int action) const {
 21 |     std::shared_ptr<arma::mat> inputTemp(new arma::mat(netInputDim, 1));
 22 |     for (int k = 0; k < stateDim; k++)
 23 |         inputTemp->at(k) = S[k] / this->state_norm[k];
 24 |     inputTemp->at(stateDim) = action / state_norm[stateDim] - 0.5;
 25 |     net->setTrainingSamples(inputTemp, nullptr);
 26 |     net->forward();
 27 |     double tempQ = arma::as_scalar(*(net->netOutput()));
 28 |     return tempQ;
 29 | }
 30 | 
 31 | void NN_RLSolverMLP::train(){
 32 |     std::shared_ptr<arma::mat> trainingSampleX(new arma::mat);
 33 |     std::shared_ptr<arma::mat> trainingSampleY(new arma::mat);
 34 |     std::shared_ptr<arma::mat> prediction;
 35 |     int maxIter = trainingPara.numtrainingepisodes();
 36 |     for (int iter = 0; iter < maxIter; iter++){
 37 |         std::cout << "RLsolver iteration: " << iter << std::endl;
 38 |         this->generateExperience();
 39 |         if (iter > 20) {
 40 |             this->generateTrainingSample(trainingSampleX, trainingSampleY);
 41 |             trainingSampleX->save("X.dat", arma::raw_ascii);
 42 |             trainingSampleY->save("Y.dat", arma::raw_ascii);
 43 |             trainer->setTrainingSamples(trainingSampleX, trainingSampleY);
 44 |             trainer->train();
 45 |             prediction = trainer->predict(trainingSampleX);
 46 |             prediction->save("prediction.dat", arma::raw_ascii);
 47 |             std::cout << "average duration " << experienceSet.size() / 1.0 / iter << std::endl;
 48 |         }
 49 |     }   
 50 | }
 51 | 
 52 | void NN_RLSolverMLP::generateTrainingSample(std::shared_ptr<arma::mat> trainingX, std::shared_ptr<arma::mat> trainingY){
 53 |     trainingX->set_size(netInputDim, experienceSet.size());
 54 |     trainingY->set_size(1, experienceSet.size());
 55 |     double maxQ;
 56 |     int action;
 57 |     std::shared_ptr<arma::mat> inputTemp(new arma::mat(netInputDim, 1));
 58 |     for (int i = 0; i < this->experienceSet.size(); i++) {
 59 |         this->getMaxQ(experienceSet[i].newState,&maxQ,&action);
 60 |         std::cout << "maxQ:" <<maxQ << std::endl;
 61 |         double targetQ = experienceSet[i].reward +  trainingPara.discount()*maxQ;;
 62 |         std::cout << "targetQ:" <<targetQ << std::endl;
 63 |         for ( int k = 0; k < this->stateDim; k++)
 64 |             inputTemp->at(k) =  experienceSet[i].oldState[k] / this->state_norm[k];
 65 |         inputTemp->at(stateDim) = experienceSet[i].action / state_norm[stateDim] - 0.5;
 66 |         
 67 |         trainingX->col(i) = *inputTemp;
 68 |         trainingY->at(i) = targetQ;
 69 |     }
 70 | }
 71 | 
 72 | void NN_RLSolverMLP::generateExperience(){
 73 |     double maxQ;
 74 |     int action;
 75 |     double epi = trainingPara.epsilon();
 76 |     arma::mat outputTemp(1,1);
 77 |     std::shared_ptr<arma::mat> inputTemp(new arma::mat(netInputDim, 1));
 78 |     model->createInitialState();
 79 |     int i;
 80 |     for(i = 0; i < trainingPara.episodelength(); i++){
 81 |         if( this->terminate(model->getCurrState()) ) {
 82 |             break;
 83 |         }        
 84 |         State oldState = model->getCurrState();
 85 |         if (randChoice->nextDou()< epi){
 86 |             this->getMaxQ(oldState, &maxQ, &action);
 87 |         } else {
 88 |             action = randChoice->nextInt();
 89 |         }
 90 |             model->run(action);
 91 |             State currState = model->getCurrState();
 92 |             double r = this->getRewards(currState);
 93 |             oldState.shrink_to_fit();
 94 |             currState.shrink_to_fit();
 95 |             this->experienceSet.push_back(Experience(oldState,currState, action, r));
 96 |     }
 97 |      std::cout << "duration " << i << std::endl;
 98 | }
 99 | 
100 | double NN_RLSolverMLP::getRewards(const State &newS) const{    
101 |     if (this->terminate(newS)){
102 |         return -1.0;
103 |     } else {
104 |         return 0.0;
105 |     }
106 | }
107 | bool NN_RLSolverMLP::terminate(const State& S) const {
108 |     return (S[0] < - 0.5* M_PI || S[0] > 0.5 * M_PI);
109 | }
110 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/NN_RLSolverMLP.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <armadillo>
 3 | #include "BaseModel.h"
 4 | #include "Model_PoleSimple.h"
 5 | #include "NN_RLSolverBase.h"
 6 | #include "Net.h"
 7 | #include "../Trainer/Trainer.h"
 8 | 
 9 | namespace ReinforcementLearning {
10 |     class NN_RLSolverMLP: public NN_RLSolverBase {
11 |     public:
12 |         NN_RLSolverMLP(std::shared_ptr<BaseModel> m,
13 |                 std::shared_ptr<NeuralNet::Net> net0,
14 |                 std::shared_ptr<NeuralNet::Trainer> trainer0,
15 |                 int Dim, DeepLearning::QLearningSolverParameter para);
16 |         virtual ~NN_RLSolverMLP(){}
17 |         virtual void train();
18 |         virtual void generateTrainingSample(std::shared_ptr<arma::mat> trainingX, std::shared_ptr<arma::mat> trainingY);
19 | 	virtual void generateExperience();
20 | 	virtual double getRewards(const State& newS) const;
21 |         virtual bool terminate(const State& S) const;
22 |         virtual void setNormalizationConst();
23 |         virtual double calQ(const State& S, int action) const;
24 |         virtual void test(){}
25 |     protected:
26 |         std::vector<Experience> experienceSet;        
27 |     private:
28 |         std::vector<double> durationVec;
29 |     };
30 | }
31 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/NN_RLSolverMultiMLP.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <armadillo>
 3 | #include "BaseModel.h"
 4 | #include "Model_PoleSimple.h"
 5 | #include "NN_RLSolverBase.h"
 6 | #include "NN_RLSolverMLP.h"
 7 | #include "Net.h"
 8 | #include "../Trainer/Trainer.h"
 9 | 
10 | namespace ReinforcementLearning {
11 |     class NN_RLSolverMultiMLP: public NN_RLSolverMLP {
12 |     public:
13 |         NN_RLSolverMultiMLP(std::shared_ptr<BaseModel> m,
14 |                 std::vector<std::shared_ptr<Net>> net0,
15 |                 std::shared_ptr<NeuralNet::Trainer> trainer0,
16 |                 int Dim, DeepLearning::QLearningSolverParameter para);
17 |         virtual ~NN_RLSolverMultiMLP(){}
18 |         virtual void train();
19 |         virtual void generateTrainingSample();
20 |         virtual double calQ(const State& S, int action) const;
21 |         void outputPolicy();        
22 |     private:
23 | 	void outputQ(int i);
24 |         int numActions;
25 |     	std::vector<std::shared_ptr<NeuralNet::Net>> nets;
26 |     	std::vector<std::shared_ptr<arma::mat>> trainingSampleXs, trainingSampleYs;
27 |         std::vector<double> durationVec;
28 |         
29 |     // parameters to output the Q value, for checking purpose
30 |         int n_rows;
31 |         int n_cols;
32 |         double dx1;
33 |         double dx2;
34 |         double minx1;
35 |         double minx2;
36 | 
37 |         
38 |     };
39 | }
40 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/NN_RLSolverRNN.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <armadillo>
 3 | #include "BaseModel.h"
 4 | #include "Model_PoleSimple.h"
 5 | #include "NN_RLSolverMLP.h"
 6 | #include "Net.h"
 7 | #include "../Trainer/Trainer.h"
 8 | 
 9 | namespace ReinforcementLearning {
10 |     class NN_RLSolverRNN: public NN_RLSolverMLP {
11 |     public:
12 |         NN_RLSolverRNN(std::shared_ptr<BaseModel> m,
13 |                 std::shared_ptr<NeuralNet::Net> net0,
14 |                 std::shared_ptr<NeuralNet::Trainer> trainer0,
15 |                 int Dim, DeepLearning::QLearningSolverParameter para);
16 |         virtual ~NN_RLSolverRNN(){}
17 |         virtual void train();
18 |         virtual void generateExperience();
19 |         virtual void generateTrainingSampleVec(std::vector<std::shared_ptr<arma::mat>>& trainingSampleX, 
20 |         std::vector<std::shared_ptr<arma::mat>>& trainingSampleY);
21 |         virtual bool terminate(const State& S) const;
22 |         virtual void setNormalizationConst();
23 |         virtual double calQ(const State& S, int action) const;
24 |         virtual void test();
25 |         void outputTraining(std::vector<std::shared_ptr<arma::mat>> &trainingXVec,std::string);
26 |     protected:
27 |         std::vector<double> durationVec;
28 |         std::vector<std::shared_ptr<arma::mat>> trainingXVec, trainingYVec;
29 |         std::vector<std::vector<Experience>> experienceSeqVec;
30 |     };
31 | }
32 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/Qtableresult/qtable1.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/NN-RL/Qtableresult/qtable1.tif


--------------------------------------------------------------------------------
/src/test/NN-RL/Qtableresult/qtable2.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/NN-RL/Qtableresult/qtable2.tif


--------------------------------------------------------------------------------
/src/test/NN-RL/Qtableresult/qtable3.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/NN-RL/Qtableresult/qtable3.tif


--------------------------------------------------------------------------------
/src/test/NN-RL/Qtableresult/qtableAction.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/NN-RL/Qtableresult/qtableAction.tif


--------------------------------------------------------------------------------
/src/test/NN-RL/RLSolverBase.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | #include "BaseModel.h"
 4 | using namespace NeuralNet;
 5 | namespace ReinforcementLearning {
 6 | 
 7 |     class RLSolverBase {
 8 |     public:
 9 | 
10 |         RLSolverBase(std::shared_ptr<BaseModel> m, int Dim, DeepLearning::QLearningSolverParameter para) {
11 |             trainingPara = para;
12 |             model = m;
13 |             stateDim = Dim;
14 |             randChoice = std::make_shared<RandomStream>(0, model->getNumActions() - 1);
15 |         }
16 | 
17 |         virtual ~RLSolverBase() {
18 |         }
19 |         virtual void train() = 0;
20 |         virtual double getRewards(const State& newS) const {return 0.0;};
21 |     protected:
22 |         int stateDim;
23 |         std::shared_ptr<BaseModel> model;
24 |         std::shared_ptr<RandomStream> randChoice;
25 |         DeepLearning::QLearningSolverParameter trainingPara;
26 | 
27 |     };
28 | }
29 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/RLSolver_2DTable.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <utility>
 3 | #include "common.h"
 4 | #include "RLSolverBase.h"
 5 | 
 6 | 
 7 | namespace ReinforcementLearning {
 8 | 
 9 |     class RLSolver_2DTable : RLSolverBase{
10 |         public:
11 |         RLSolver_2DTable(std::shared_ptr<BaseModel> m, int Dim, 
12 |         DeepLearning::QLearningSolverParameter para, int n_row0, int n_col0, 
13 |         double dx, double dy, double min_x, double min_y);
14 | 
15 |         virtual ~RLSolver_2DTable() {}
16 |         virtual void train();
17 |         virtual void test();
18 |         void replayExperience();
19 |         virtual void updateQ(Experience);
20 |         virtual void getMaxQ(const State& S, double* Q, int* action) const;
21 |         arma::cube& getQTable(){return QTable;}
22 |         virtual void loadQTable(std::string filetag);
23 |         private:
24 |         void outputPolicy();
25 | 	void outputQ(std::string filename);
26 |         void writeTrajectory(int iter, std::ostream &os, int action, State state, double reward) const;
27 |         std::pair<int, int> stateToIndex(const State & S) const;
28 |         arma::cube QTable;
29 |         int n_rows, n_cols, numActions;
30 |         double dx1, dx2, minx1, minx2;
31 |         arma::Mat<int> count;
32 |         std::vector<Experience> experienceVec;
33 |     };
34 | }
35 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/elman.prototxt:
--------------------------------------------------------------------------------
 1 | rnnStruct{
 2 | 	numRecurrLayers: 1
 3 | 	recurrLayerInputDim: 8
 4 | 	recurrLayerOutputDim: 8
 5 | 	inputDim: 2
 6 |         activationType: tanh
 7 |         init_W_one{
 8 |             initializerType: zero
 9 |             normal_std: 0.1
10 |             normal_mean:0.1
11 |         }
12 |         init_B {
13 |             initializerType: zero
14 |         }
15 |         init_W_two{
16 |             initializerType: glorot_uniform
17 |         }
18 | }
19 | 
20 | layerStruct{
21 | 	name: "BaseLayer2"
22 | 	inputDim: 9
23 | 	outputDim: 50
24 | 	activationType: tanh
25 |         init_W{
26 |             initializerType: glorot_uniform
27 |         }
28 |         init_B {
29 |             initializerType: zero
30 |         }
31 | }
32 | 
33 | layerStruct{
34 | 	name: "BaseLayer2"
35 | 	inputDim: 50
36 | 	outputDim: 1
37 | 	activationType: linear
38 |         init_W{
39 |             initializerType: glorot_uniform
40 |         }
41 |         init_B {
42 |             initializerType: zero
43 |         }
44 | }
45 | 
46 | neuralNetTrainingParameter{
47 |         trainerType: SGDRNN
48 | 	learningRate: 0.01
49 | 	miniBatchSize: 10
50 | 	NEpoch: 5000
51 |         momentum: 0.95
52 |         decayRate: 1000
53 |         showGradNorm: false
54 |         RNNScanFlag: false
55 |         RNNScanStep: 5
56 |         RNNTruncateLength: 10
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/mlp.prototxt:
--------------------------------------------------------------------------------
 1 | 
 2 | layerStruct{
 3 | 	name: "BaseLayer1"
 4 | 	inputDim: 3
 5 | 	outputDim: 10
 6 | 	activationType: tanh
 7 | 	    init_W{
 8 |             initializerType: normal
 9 |             normal_mean: 0
10 |             normal_std: 0.01
11 |         }
12 |         init_B {
13 |             initializerType: zero
14 |         }
15 | }
16 | 
17 | layerStruct{
18 | 	name: "BaseLayer3"
19 | 	inputDim: 10
20 | 	outputDim: 1
21 | 	activationType: linear
22 |         init_W{
23 |             initializerType: normal
24 |             normal_mean: 0
25 |             normal_std: 0.01
26 |         }
27 |         init_B {
28 |             initializerType: zero
29 |         }
30 | }
31 | 
32 | neuralNetTrainingParameter{
33 |         trainerType: RMSProp
34 | 	learningRate: 0.01
35 | 	miniBatchSize: 100
36 | 	NEpoch: 100
37 |         momentum: 0.90
38 |         decayRate: 2000
39 |         printInfoFrequency: 10 
40 |         verbose: true
41 | }
42 | 
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/multimlp.prototxt:
--------------------------------------------------------------------------------
 1 | 
 2 | layerStruct{
 3 | 	name: "BaseLayer1"
 4 | 	inputDim: 2
 5 | 	outputDim: 10
 6 | 	activationType: tanh
 7 | 	    init_W{
 8 |             initializerType: glorot_uniform
 9 |             normal_mean: 0
10 |             normal_std: 0.1
11 |         }
12 |         init_B {
13 |             initializerType: zero
14 |         }
15 | }
16 | 
17 | layerStruct{
18 | 	name: "BaseLayer1"
19 | 	inputDim: 10
20 | 	outputDim: 20
21 | 	activationType: tanh
22 | 	    init_W{
23 |             initializerType: glorot_uniform
24 |             normal_mean: 0
25 |             normal_std: 0.1
26 |         }
27 |         init_B {
28 |             initializerType: zero
29 |         }
30 | }
31 | layerStruct{
32 | 	name: "BaseLayer3"
33 | 	inputDim: 20
34 | 	outputDim: 1
35 | 	activationType: linear
36 |         init_W{
37 |             initializerType: glorot_uniform
38 |             normal_mean: 0
39 |             normal_std: 0.1
40 |         }
41 |         init_B {
42 |             initializerType: zero
43 |         }
44 | }
45 | 
46 | neuralNetTrainingParameter{
47 |         trainerType: SGD
48 | 	learningRate: 5
49 | 	miniBatchSize: 300
50 | 	NEpoch: 5000
51 |         momentum: 0.9
52 |         decayRate: 2000
53 |         printInfoFrequency: 300 
54 |         verbose: true
55 | }
56 | 
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/net.prototxt:
--------------------------------------------------------------------------------
 1 | 
 2 | layerStruct{
 3 | 	name: "BaseLayer1"
 4 | 	inputDim: 3
 5 | 	outputDim: 10
 6 | 	activationType: sigmoid
 7 | 	    init_W{
 8 |             initializerType: glorot_uniform
 9 |         }
10 |         init_B {
11 |             initializerType: zero
12 |         }
13 | }
14 | 
15 | layerStruct{
16 | 	name: "BaseLayer2"
17 | 	inputDim: 10
18 | 	outputDim: 10
19 | 	activationType: sigmoid
20 |         init_W{
21 |             initializerType: glorot_uniform
22 |         }
23 |         init_B {
24 |             initializerType: zero
25 |         }
26 | }
27 | 
28 | layerStruct{
29 | 	name: "BaseLayer3"
30 | 	inputDim: 10
31 | 	outputDim: 1
32 | 	activationType: linear
33 |         init_W{
34 |             initializerType: glorot_uniform
35 |         }
36 |         init_B {
37 |             initializerType: zero
38 |         }
39 | }
40 | 
41 | neuralNetTrainingParameter{
42 |         
43 | 	learningRate: 1
44 | 	miniBatchSize: 10
45 | 	NEpoch: 500
46 |         momentum: 0.9
47 |         decayRate: 100
48 |         printInfoFrequency: 100 
49 |         verbose: false
50 | }
51 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/plotQMap.m:
--------------------------------------------------------------------------------
 1 | clear all
 2 | close all
 3 | 
 4 | for i = 0 : 2
 5 |     filename = strcat('QMap',num2str(i),'.dat');
 6 |     data{i+1} = load(filename);
 7 |     figure(i + 1)
 8 |     imagesc(data{i+1});
 9 |     colorbar;
10 | end
11 | 
12 |  data{4} = load('actionMapNN.dat');
13 |  figure(5)
14 |  imagesc(data{4});
15 |  colorbar;


--------------------------------------------------------------------------------
/src/test/NN-RL/plotQtable.m:
--------------------------------------------------------------------------------
 1 | clear all
 2 | close all
 3 | 
 4 | for i = 0 : 2
 5 |     filename = strcat('QTableFinal',num2str(i),'.dat');
 6 |     data{i+1} = load(filename);
 7 |     figure(i + 1)
 8 |     imagesc(data{i+1});
 9 |     colorbar;
10 | end
11 | 
12 | data{4} = load('actionMap.dat');
13 | figure(5)
14 | imagesc(data{4});
15 | colorbar;


--------------------------------------------------------------------------------
/src/test/NN-RL/qsolver.prototxt:
--------------------------------------------------------------------------------
 1 | qLearningSolverParameter{
 2 |         learningRate: 0.1
 3 |         discount: 0.95
 4 | 	numTrainingEpisodes: 200
 5 | 	epsilon: 0.2
 6 | 	EpisodeLength: 1000
 7 |         QTableOutputInterval: 100
 8 | }
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/src/test/NN-RL/rnn.prototxt:
--------------------------------------------------------------------------------
 1 | rnnStruct{
 2 | 	numHiddenLayers: 2
 3 | 	hiddenLayerInputDim: 20
 4 | 	hiddenLayerOutputDim: 20
 5 | 	inputDim: 3
 6 |         activationType: tanh
 7 |         init_W_one{
 8 |             initializerType: normal
 9 |             normal_std: 0.2
10 |             normal_mean:0
11 |         }
12 |         init_B {
13 |             initializerType: zero
14 |         }
15 |         init_W_two{
16 |             initializerType: glorot_uniform
17 |         }
18 | }
19 | 
20 | layerStruct{
21 | 	name: "BaseLayer2"
22 | 	inputDim: 20
23 | 	outputDim: 1
24 | 	activationType: linear
25 |         init_W{
26 |             initializerType: glorot_uniform
27 |         }
28 |         init_B {
29 |             initializerType: zero
30 |         }
31 | }
32 | 
33 | neuralNetTrainingParameter{
34 |         trainerType: SGDRNN
35 | 	learningRate: 0.05
36 | 	miniBatchSize: 100
37 | 	NEpoch: 5000
38 |         momentum: 0.90
39 |         decayRate: 2500
40 |         showGradNorm: false
41 |         RNNScanFlag: false
42 |         RNNScanStep: 5
43 |         RNNTruncateLength: 10
44 | }
45 | 
46 | 


--------------------------------------------------------------------------------
/src/test/Optimization/Makefile:
--------------------------------------------------------------------------------
 1 | CPP = g++
 2 | ARMA_INCLUDE=-I~/Downloads/armadillo-5.100.2/include
 3 | ARMA_LINKFLAGS= -lblas -llapack
 4 | CXXFLAGS = -std=c++0x -I$(ARMA_INCLUDE)  -c -DARMA_DONT_USE_WRAPPER  
 5 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program
 6 | LINKFLAGS = -static $(LINKOPTFLAGS)
 7 | LINK_DL_FLAGS =  $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldl
 8 | 
 9 | 
10 | 
11 | OBJ = main.o LBFGS.o SteepDescent.o
12 | 
13 | test : $(OBJ)
14 | 	$(CPP) -o test $(OBJ) $(LINKFLAGS)
15 | 	
16 | clean:
17 | 	rm -f *.o *~
18 | 


--------------------------------------------------------------------------------
/src/test/RNN/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | include ../Makefile.common
 3 | 
 4 | OBJ = test_RNN.o Trainer.o
 5 | 
 6 | test : $(OBJ)
 7 | 	$(CXX) -o $@ $(OBJ) $(LDFLAG)
 8 | Trainer.o : ../Trainer/Trainer.cpp
 9 | 
10 | 	$(CXX) -c $(CXXFLAGS) $^
11 | clean:
12 | 	rm -f *.o *~
13 | 


--------------------------------------------------------------------------------
/src/test/RNN/RLtest2.prototxt:
--------------------------------------------------------------------------------
 1 | rnnStruct{
 2 | 	numRecurrLayers: 2
 3 | 	recurrLayerInputDim: 8
 4 | 	recurrLayerOutputDim: 8
 5 | 	inputDim: 3
 6 |         activationType: tanh
 7 |         init_W_one{
 8 |             initializerType: normal
 9 |             normal_std: 0.2
10 |             normal_mean:0
11 |         }
12 |         init_B {
13 |             initializerType: zero
14 |         }
15 |         init_W_two{
16 |             initializerType: glorot_uniform
17 |         }
18 | }
19 | 
20 | layerStruct{
21 | 	name: "BaseLayer2"
22 | 	inputDim: 8
23 | 	outputDim: 1
24 | 	activationType: linear
25 |         init_W{
26 |             initializerType: glorot_uniform
27 |         }
28 |         init_B {
29 |             initializerType: zero
30 |         }
31 | }
32 | 
33 | neuralNetTrainingParameter{
34 |         trainerType: RMSProp
35 | 	learningRate: 0.2
36 | 	miniBatchSize: 100
37 | 	NEpoch: 5000
38 |         momentum: 0.9
39 |         decayRate: 2500
40 |         showGradNorm: false
41 |         RNNScanFlag: false
42 |         RNNScanStep: 5
43 |         RNNTruncateLength: 10
44 | }
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/src/test/RNN/gradcheck.prototxt:
--------------------------------------------------------------------------------
 1 | rnnStruct{
 2 | 	numHiddenLayers: 2
 3 | 	hiddenLayerInputDim: 8
 4 | 	hiddenLayerOutputDim: 8
 5 | 	inputDim: 1
 6 |         activationType: tanh
 7 |         init_W_one{
 8 |             initializerType: normal
 9 |             normal_std: 0.01
10 |             normal_mean:0
11 |         }
12 |         init_B {
13 |             initializerType: glorot_normal
14 |         }
15 |         init_W_two{
16 |             initializerType: glorot_normal
17 |         }
18 | }
19 | 
20 | layerStruct{
21 | 	name: "BaseLayer2"
22 | 	inputDim: 8
23 | 	outputDim: 1
24 | 	activationType: linear
25 |         init_W{
26 |             initializerType: glorot_normal
27 |         }
28 |         init_B {
29 |             initializerType: zero
30 |         }
31 | }
32 | 
33 | neuralNetTrainingParameter{
34 | 	learningRate: 0.01
35 | 	miniBatchSize: 100
36 | 	NEpoch: 5000
37 |         momentum: 0.9
38 |         decayRate: 10000
39 | }
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/src/test/RNN/net.prototxt:
--------------------------------------------------------------------------------
 1 | rnnStruct{
 2 | 	numHiddenLayers: 3
 3 | 	hiddenLayerInputDim: 8
 4 | 	hiddenLayerOutputDim: 8
 5 | 	inputDim: 1
 6 | 	outputDim: 1
 7 |         activationType: tanh
 8 |         init_W_one{
 9 |             initializerType: normal
10 |             normal_std: 0.01
11 |             normal_mean:0
12 |         }
13 |         init_B {
14 |             initializerType: zero
15 |         }
16 |         init_W_two{
17 |             initializerType: glorot_normal
18 |         }
19 | }
20 | 
21 | layerStruct{
22 | 	name: "BaseLayer2"
23 | 	inputDim: 8
24 | 	outputDim: 1
25 | 	activationType: linear
26 |         init_W{
27 |             initializerType: glorot_normal
28 |         }
29 |         init_B {
30 |             initializerType: zero
31 |         }
32 | }
33 | 
34 | neuralNetTrainingParameter{
35 | 	learningRate: 0.01
36 | 	miniBatchSize: 100
37 | 	NEpoch: 5000
38 |         momentum: 0.9
39 |         decayRate: 10000
40 | }
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/src/test/RNN/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/RNN/test


--------------------------------------------------------------------------------
/src/test/RNN/testIntermediate.prototxt:
--------------------------------------------------------------------------------
 1 | rnnStruct{
 2 | 	numRecurrLayers: 2
 3 | 	recurrLayerInputDim: 8
 4 | 	recurrLayerOutputDim: 8
 5 | 	inputDim: 1
 6 |         activationType: tanh
 7 |         init_W_one{
 8 |             initializerType: normal
 9 |             normal_std: 0.2
10 |             normal_mean:0
11 |         }
12 |         init_B {
13 |             initializerType: zero
14 |         }
15 |         init_W_two{
16 |             initializerType: glorot_uniform
17 |         }
18 | }
19 | 
20 | layerStruct{
21 | 	name: "BaseLayer2"
22 | 	inputDim: 8
23 | 	outputDim: 1
24 | 	activationType: linear
25 |         init_W{
26 |             initializerType: glorot_uniform
27 |         }
28 |         init_B {
29 |             initializerType: zero
30 |         }
31 | }
32 | 
33 | neuralNetTrainingParameter{
34 |         trainerType: RMSProp
35 | 	learningRate: 0.1
36 | 	miniBatchSize: 100
37 | 	NEpoch: 5000
38 |         momentum: 0.9
39 |         decayRate: 500
40 |         showGradNorm: false
41 |         RNNScanFlag: true
42 |         RNNScanStep: 5
43 |         RNNTruncateLength: 10
44 | }
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/src/test/RNN/testSimple.prototxt:
--------------------------------------------------------------------------------
 1 | rnnStruct{
 2 | 	numRecurrLayers: 1
 3 | 	recurrLayerInputDim: 8
 4 | 	recurrLayerOutputDim: 8
 5 | 	inputDim: 1
 6 |         activationType: tanh
 7 |         init_W_one{
 8 |             initializerType: normal
 9 |             normal_std: 0.2
10 |             normal_mean:0
11 |         }
12 |         init_B {
13 |             initializerType: zero
14 |         }
15 |         init_W_two{
16 |             initializerType: glorot_uniform
17 |         }
18 | }
19 | 
20 | layerStruct{
21 | 	name: "BaseLayer2"
22 | 	inputDim: 8
23 | 	outputDim: 1
24 | 	activationType: linear
25 |         init_W{
26 |             initializerType: glorot_uniform
27 |         }
28 |         init_B {
29 |             initializerType: zero
30 |         }
31 | }
32 | 
33 | neuralNetTrainingParameter{
34 |         trainerType: RMSProp
35 | 	learningRate: 0.1
36 | 	miniBatchSize: 100
37 | 	NEpoch: 5000
38 |         momentum: 0.9
39 |         decayRate: 500
40 |         showGradNorm: false
41 |         RNNScanFlag: true
42 |         RNNScanStep: 5
43 |         RNNTruncateLength: 10
44 | }
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/src/test/RNN/testlittleTimer.prototxt:
--------------------------------------------------------------------------------
 1 | rnnStruct{
 2 | 	numRecurrLayers: 2
 3 | 	recurrLayerInputDim: 8
 4 | 	recurrLayerOutputDim: 8
 5 | 	inputDim: 2
 6 |         activationType: tanh
 7 |         init_W_one{
 8 |             initializerType: normal
 9 |             normal_std: 0.2
10 |             normal_mean:0
11 |         }
12 |         init_B {
13 |             initializerType: zero
14 |         }
15 |         init_W_two{
16 |             initializerType: glorot_uniform
17 |         }
18 | }
19 | 
20 | layerStruct{
21 | 	name: "BaseLayer2"
22 | 	inputDim: 8
23 | 	outputDim: 1
24 | 	activationType: linear
25 |         init_W{
26 |             initializerType: glorot_uniform
27 |         }
28 |         init_B {
29 |             initializerType: zero
30 |         }
31 | }
32 | 
33 | neuralNetTrainingParameter{
34 |         trainerType: RMSProp
35 | 	learningRate: 0.2
36 | 	miniBatchSize: 100
37 | 	NEpoch: 5000
38 |         momentum: 0.9
39 |         decayRate: 2500
40 |         showGradNorm: false
41 |         RNNScanFlag: true
42 |         RNNScanStep: 5
43 |         RNNTruncateLength: 10
44 | }
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/src/test/RNNtestRLSet/prediction:
--------------------------------------------------------------------------------
1 |   -0.4582  -0.0240  -0.1143  -0.1255  -0.1676  -0.1394  -0.1225  -0.0605   0.0110   0.1483   0.0870  -0.3770  -0.4164  -0.4211  -0.4212  -0.4210  -0.4209  -0.4207  -0.4206  -0.4204  -0.4202  -0.4200  -0.4198  -0.4195  -0.4192  -0.4190  -0.4186  -0.4183  -0.4179  -0.4175  -0.4170  -0.4165  -0.4159  -0.4153  -0.4146  -0.4138  -0.4129  -0.4119  -0.4108  -0.4096  -0.4082  -0.4066  -0.4047  -0.4026
2 |    0.0896   0.0588   0.0238   0.0247   0.0246   0.0245   0.0244   0.0242   0.0240  -0.0119   0.0179   0.0223   0.0227   0.0222   0.0217   0.0211   0.0204   0.0194   0.0183   0.0170   0.0153   0.0132   0.0106  -0.9894
3 | 


--------------------------------------------------------------------------------
/src/test/Trainer/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | include ../Makefile.common
 3 | 
 4 | OBJ = test_trainer.o Trainer.o MultiLayerPerceptron.o
 5 | 
 6 | test_IO : $(OBJ)
 7 | 	$(CXX) -o test $(OBJ) $(LDFLAG)
 8 | MultiLayerPerceptron.o : ../MultiLayerPerceptron/MultiLayerPerceptron.cpp
 9 | 	$(CXX) -c $(CXXFLAGS) $^
10 | clean:
11 | 	rm -f *.o *~
12 | 


--------------------------------------------------------------------------------
/src/test/Trainer/net.prototxt:
--------------------------------------------------------------------------------
 1 | layerStruct{
 2 | 	name: "BaseLayer1"
 3 | 	inputDim: 3
 4 | 	outputDim: 8
 5 | 	activationType: sigmoid
 6 | }
 7 | 
 8 | layerStruct{
 9 | 	name: "BaseLayer2"
10 | 	inputDim: 8
11 | 	outputDim: 1
12 | 	activationType: sigmoid
13 | }
14 | 
15 | neuralNetTrainingParameter{
16 | 	learningRate: 0.1
17 | 	miniBatchSize: 10
18 | 	NEpoch: 20
19 |         momentum: 0.9
20 |         decayRate: 1000
21 | }
22 | 


--------------------------------------------------------------------------------
/src/test/Trainer/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/Trainer/test


--------------------------------------------------------------------------------
/src/test/Trainer/test_trainer.cpp:
--------------------------------------------------------------------------------
 1 | #include "Trainer.h"
 2 | #include "common.h"
 3 | #include "../MultiLayerPerceptron/MultiLayerPerceptron.h"
 4 | 
 5 | using namespace NeuralNet;
 6 | using namespace DeepLearning;
 7 | int main(int argc, char* argv[]){
 8 |     
 9 |     if (argc < 2) exit(1);
10 |     
11 |     NeuralNetParameter message; 
12 |     ReadProtoFromTextFile(argv[1], &message);
13 | 
14 |     std::shared_ptr<arma::mat> DataX(new arma::mat);
15 |     std::shared_ptr<arma::mat> DataY(new arma::mat);
16 |     std::shared_ptr<arma::mat> trainDataX(new arma::mat);
17 |     std::shared_ptr<arma::mat> trainDataY(new arma::mat);
18 |     std::shared_ptr<arma::mat> testDataX(new arma::mat);
19 |     std::shared_ptr<arma::mat> testDataY(new arma::mat);
20 |     std::shared_ptr<arma::mat> ValidationDataX(new arma::mat);
21 |     std::shared_ptr<arma::mat> ValidationDataY(new arma::mat);
22 | 
23 |     loadData_MNIST(DataX,DataY,(std::string)argv[2]);
24 | 
25 |     int ntrain =2000;
26 |     int ntest = 1000;
27 | //  now I split data into train, test, and validation
28 |     trainDataX = std::make_shared<arma::mat>(DataX->cols(0,ntrain-1));
29 |     trainDataY = std::make_shared<arma::mat>(DataY->cols(0,ntrain-1));
30 |     testDataX = std::make_shared<arma::mat>(DataX->cols(ntrain,ntrain+ntest-1));
31 |     testDataY = std::make_shared<arma::mat>(DataY->cols(ntrain,ntrain+ntest-1));
32 |     
33 |         
34 |     std::shared_ptr<Net> mlp(new MultiLayerPerceptron(message));
35 |     std::shared_ptr<Trainer> trainer( TrainerBuilder::GetTrainer(mlp,message));
36 |     trainer->setTrainingSamples(trainDataX, trainDataY);
37 |     trainer->train();
38 |     return 0;
39 | }


--------------------------------------------------------------------------------
/src/test/TwolayerPerceptron/Makefile:
--------------------------------------------------------------------------------
 1 | # this make file now is dynamically linking
 2 | #OPTFLAGS = -O3 -march=nocona -mfpmath=sse -msse3 -Wuninitialized -flto
 3 | #MKL_INCLUDE = /opt/intel/mkl/include/
 4 | #MKL_INCLUDE = /opt/intel/composer_xe_2013.0.079/mkl/include/
 5 | MKL_INCLUDE = /opt/intel/composerxe/mkl/include
 6 | 
 7 | #CFLAGS = -std=c++0x -I/g/ssli/software/pkgs/boost_1_49_0 -c -DNDEBUG -D__LINUX $(OPTFLAGS) 
 8 | CPP = g++
 9 | #MKLROOT = /opt/intel/mkl
10 | #MKLROOT = /opt/intel/composer_xe_2013.0.079/mkl
11 | MKLROOT = /opt/intel/composerxe/mkl
12 | MKLLINKFLAGS = -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread
13 | MKL_DL_LINKFLAGS =  -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread -lm
14 | ARMA_INCLUDE=-I~/Downloads/armadillo-5.100.2/include/armadillo
15 | ARMA_LINKFLAGS=-L/usr/lib -L/opt/intel/mkl/lib/intel64 -larmadillo -lmkl_rt -llapack -lopenblas
16 | #MKLLINKFLAGS = -Wl,--start-group -Wl,--end-group -lpthread
17 | CXXFLAGS = -std=c++0x -I$(MKL_INCLUDE) $(ARMA_INCLUDE) -I/opt/boost/boost_1_57_0 -c -DDEBUG -D__LINUX -g3
18 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program
19 | #LINKFLAGS = -static $(LINKOPTFLAGS) $(MKLLINKFLAGS) -ldl
20 | LINK_DL_FLAGS =  $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldl
21 | #LINKFLAGS = 
22 | #ODIR=obj
23 | ODIR =
24 | 
25 | 
26 | OBJ = mainSDA.o MultiLayerPerceptron.o BaseLayer.o 
27 | 
28 | test : $(OBJ)
29 | 	$(CPP) -o test $(OBJ) $(LINK_DL_FLAGS)
30 | 	
31 | BaseLayer.o : ../BaseLayer/BaseLayer.cpp ../BaseLayer/BaseLayer.h
32 | 	$(CPP) -c $(CXXFLAGS) $<
33 | 
34 | clean:
35 | 	rm -f *.o *~
36 | 


--------------------------------------------------------------------------------
/src/test/TwolayerPerceptron/MultiLayerPerceptron.cpp:
--------------------------------------------------------------------------------
 1 | #include <algorithm>
 2 | #include "MultiLayerPerceptron.h"
 3 | 
 4 | MultiLayerPerceptron::MultiLayerPerceptron(int inputDim0, int outputDim0, int hiddenDim0, std::shared_ptr<arma::mat> trainingX0,
 5 |         std::shared_ptr<arma::mat> trainingY0, TrainingPara trainingPara0) {
 6 | 
 7 | 
 8 |     inputDim = inputDim0;
 9 |     hiddenDim = hiddenDim0;
10 |     outputDim = outputDim0;
11 |     numLayers = 2;
12 |     trainingX = trainingX0;
13 |     trainingY = trainingY0;
14 |     numInstance = trainingX->n_rows;
15 |     trainingPara = trainingPara0;
16 | 
17 |     layers.push_back(BaseLayer(inputDim,hiddenDim,BaseLayer::sigmoid));
18 |     layers.push_back(BaseLayer(hiddenDim,outputDim,BaseLayer::softmax));
19 | //   layers[0].W.print("layer 0  W");
20 | //   layers[0].B.print("layer 0  B");
21 | //   layers[1].W.print("layer 1  W");
22 | //   layers[1].B.print("layer 1  B");
23 | }
24 | 
25 | 
26 | void MultiLayerPerceptron::train() {
27 |     // Here I used stochastic gradient descent
28 |     // first do the forward propagate
29 |     trainingPara.print();
30 |     int ntimes = numInstance / trainingPara.miniBatchSize;
31 |     std::shared_ptr<arma::mat> subInputX, subInputY;
32 |     double errorTotal;
33 |     int size = trainingPara.miniBatchSize;
34 |     double alpha = trainingPara.alpha / size;
35 |     for(int epoch = 0; epoch < trainingPara.NEpoch; epoch++) {
36 |         std::cout << epoch << std::endl;
37 |         errorTotal = 0.0;
38 |         for (int i = 0; i < ntimes; i++) {
39 | // first do the propogation
40 |             subInputX = std::make_shared<arma::mat>(trainingX->rows(i*size,(i+1)*size-1));
41 |             subInputY = std::make_shared<arma::mat>(trainingY->rows(i*size,(i+1)*size-1));
42 | 
43 |             layers[0].inputX = subInputX;
44 |             layers[0].activateUp(subInputX);
45 |             layers[1].inputX = layers[0].outputY;
46 |             layers[1].activateUp(layers[1].inputX);
47 | //       layers[0].outputY->print("layer0 outputY:");
48 | //       layers[1].outputY->print("layer1 outputY:");
49 | //       std::shared_ptr<arma::mat> predictY = layers[1].outputY;
50 |             arma::mat sigmoid_deriv2 = (*(layers[1].outputY)) % (1-*(layers[1].outputY));
51 |             arma::mat delta2 = ((-*subInputY + *(layers[1].outputY)).st()) % sigmoid_deriv2.st();
52 |             arma::mat grad1 =  delta2 * (*(layers[1].inputX));
53 |             arma::vec deltaSum2 = arma::sum(delta2,1);
54 | 
55 |             arma::mat errortemp = (-*subInputY + *(layers[1].outputY)).st();
56 | //       errortemp.print();
57 |             arma::vec error = arma::sum(errortemp,1);
58 | //            error.print();
59 | //        deltaSum2.print();
60 |             errorTotal += arma::as_scalar(error.st() * error);
61 |             *(layers[1].W) -= alpha*grad1;
62 |             *(layers[1].B) -= alpha*deltaSum2;
63 | 
64 | 
65 |             // delta0 should have the dimension of hidden Dimension
66 |             arma::mat sigmoid_deriv1 = (*(layers[0].outputY)) % (1-*(layers[0].outputY));
67 |             arma::mat delta1 = ( (layers[1].W)->st() * delta2) % sigmoid_deriv1.st();
68 |             arma::mat grad0 = delta1 * (*(layers[0].inputX));
69 |             arma::vec deltaSum1 = arma::sum(delta1,1);
70 |             *(layers[0].W) -=  alpha*grad0;
71 |             *(layers[0].B) -=  alpha*deltaSum1;
72 | 
73 |         }
74 |         std::cout << "error is: " << errorTotal << std::endl;
75 |     }
76 | 
77 | //    layers[1].outputY->print("final prediction");
78 | }
79 | 
80 | //if(converge(W_aug_old,W_aug_new)) break;
81 | 
82 | 
83 | void MultiLayerPerceptron::test(std::shared_ptr<arma::mat> trainingX,std::shared_ptr<arma::mat> trainingY) {
84 |     layers[0].inputX = trainingX;
85 |     layers[0].activateUp(trainingX);
86 |     layers[1].inputX = layers[0].outputY;
87 |     layers[1].activateUp(layers[1].inputX);
88 |     layers[1].outputY->save("testoutput.txt",arma::raw_ascii);
89 | 
90 | }
91 | 
92 | 
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/src/test/TwolayerPerceptron/MultiLayerPerceptron.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <armadillo>
 3 | #include "../BaseLayer/BaseLayer.h"
 4 | 
 5 | 
 6 | struct TrainingPara {
 7 | 
 8 |     TrainingPara(double eps0=1e-6, int NEpoch0 = 500,
 9 |                  int miniBatchSize0 = 10, double alpha0 = 0.1):
10 |         eps(eps0),NEpoch(NEpoch0),
11 |         miniBatchSize(miniBatchSize0), alpha(alpha0) {}
12 | 
13 | 
14 |     double eps;
15 |     int NEpoch;
16 |     int miniBatchSize;
17 |     double alpha;
18 | //  Method method;
19 |     void print() const {
20 | 
21 |         std::cout << eps << "\t";
22 |         std::cout << NEpoch << "\t";
23 |         std::cout << miniBatchSize << "\t";
24 |         std::cout << alpha << std::endl;
25 | 
26 |     }
27 | };
28 | 
29 | 
30 | 
31 | class MultiLayerPerceptron {
32 | public:
33 |     MultiLayerPerceptron(int inputDim0, int outputDim0, int hiddenDim0, std::shared_ptr<arma::mat> trainingX0,
34 |                          std::shared_ptr<arma::mat> trainingY0, TrainingPara trainingPara);
35 | 
36 |     void train();
37 |     void initialize();
38 |     void test(std::shared_ptr<arma::mat> trainingX,std::shared_ptr<arma::mat> trainingY);
39 | private:
40 |     bool converge();
41 |     TrainingPara trainingPara;
42 |     int numLayers;
43 |     int inputDim;
44 |     int hiddenDim;
45 |     int outputDim;
46 |     int numInstance;
47 |     std::vector<BaseLayer> layers;
48 |     std::shared_ptr<arma::mat> trainingX;
49 |     std::shared_ptr<arma::mat> trainingY;
50 | 
51 | 
52 | };


--------------------------------------------------------------------------------
/src/test/TwolayerPerceptron/mainSDA.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <fstream>
 3 | #include <string>
 4 | #include <cstdlib>
 5 | #include <cstdio>
 6 | #include <memory>
 7 | #include <armadillo>
 8 | #include "MultiLayerPerceptron.h"
 9 | 
10 | 
11 | 
12 | 
13 | void loadData_MNIST(std::shared_ptr<arma::mat> X,
14 |                     std::shared_ptr<arma::mat> Y);
15 | 
16 | int main(int argc, char *argv[]) {
17 |     std::shared_ptr<arma::mat> trainDataX(new arma::mat);
18 |     std::shared_ptr<arma::mat> trainDataY(new arma::mat);
19 |     loadData_MNIST(trainDataX,trainDataY);
20 | 
21 |     int inputDim = trainDataX->n_cols;
22 |     int outputDim = trainDataY->n_cols;
23 |     int hiddenDim = 100;
24 |     std::cout << inputDim << std::endl;
25 |     std::cout << outputDim << std::endl;
26 |     std::cout << trainDataX->n_rows << std::endl;
27 |     std::cout << trainDataY->n_rows << std::endl;
28 |     trainDataX->save("trainingSamples.txt",arma::raw_ascii);
29 |     TrainingPara trainingPara(1e-6,100, 10, 0.5);
30 |     trainingPara.print();
31 |     MultiLayerPerceptron mlp(inputDim,  outputDim, hiddenDim, trainDataX, trainDataY, trainingPara);
32 | 
33 |     mlp.train();
34 | 
35 |     mlp.test(trainDataX,trainDataY);
36 | // after training i do some testing
37 | 
38 | }
39 | 
40 | 
41 | void loadData_MNIST(std::shared_ptr<arma::mat> X,
42 |                     std::shared_ptr<arma::mat> Y) {
43 | 
44 |     std::string filename_base("../MNIST/data");
45 |     std::string filename;
46 |     char tag[50];
47 |     char x;
48 |     int count;
49 |     int numFiles = 10;
50 |     int featSize = 28*28;
51 |     int labelSize = 10;
52 |     int numSamples = 100;
53 |     X->set_size(numFiles*numSamples,featSize);
54 |     Y->set_size(numFiles*numSamples,labelSize);
55 |     Y->fill(0);
56 | //  std::cout << Y.Len() << std::endl;
57 | //  std::cout << X.NumR() << std::endl;
58 | //  std::cout << X.NumC() << std::endl;
59 | 
60 |     for (int i = 0 ; i < numFiles ; i++) {
61 |         sprintf(tag,"%d",i);
62 |         filename=filename_base+(std::string)tag;
63 |         std::cout << filename << std::endl;
64 |         std::ifstream infile;
65 |         infile.open(filename,std::ios::binary | std::ios::in);
66 |         if (infile.is_open()) {
67 | 
68 |             for (int j = 0 ; j < numSamples ; j++) {
69 | 
70 |                 for (int k =0 ; k <featSize; k ++) {
71 |                     infile.read(&x,1);
72 |                     (*X)(j+i*numSamples,k)=(unsigned char)x;
73 |                     (*X)(j+i*numSamples,k)/= 256.0;
74 |                 }
75 |                 (*Y)(j+i*numSamples,i) = 1;
76 |             }
77 | 
78 |         } else {
79 |             std::cout << "open file failure!" << std::endl;
80 |         }
81 | 
82 | // for (int j = 0 ; j < numSamples ; j++){
83 | //       for (int k =0 ; k <featSize; k ++){
84 | 
85 | //	           std::cout << x << std::endl;
86 | //	   std::cout<<  (*X)(j,k) << " ";
87 | //	   }
88 | //	   }
89 | 
90 |         std::cout << "dataloading finish!" <<std::endl;
91 | 
92 |     }
93 | 
94 | }


--------------------------------------------------------------------------------
/src/test/Util/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | include ../Makefile.common
 3 | 
 4 | OBJ = test_Utils.o
 5 | 
 6 | test_Utils : $(OBJ)
 7 | 	$(CXX) -o test $(OBJ) $(LDFLAG)
 8 | 
 9 | clean:
10 | 	rm -f *.o *~
11 | 


--------------------------------------------------------------------------------
/src/test/Util/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/Util/test


--------------------------------------------------------------------------------
/src/test/Util/test_Utils.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include "Util.h"
 3 | 
 4 | 
 5 | using namespace NeuralNet;
 6 | 	
 7 | int main(){	
 8 | 	
 9 | 	Random_Bernoulli<double> r(0.5);
10 | 	
11 | 	for(int i = 0; i < 100; i++)
12 | 		std::cout << r.next() << std::endl;
13 | 
14 | 
15 |         Random_Bernoulli<int> r2(0.5);
16 | 	
17 |         int p[25];
18 |         for(int i = 0; i < 25; i++ ){
19 |             p[i] = 1;
20 |         }
21 |         
22 |         r2.modifier(p,25);
23 |         std::cout << "second" << std::endl;
24 |         for(int i = 0; i < 25; i++)
25 |             std::cout << p[i] << std::endl;
26 |         
27 |         
28 |         Random_Bernoulli<unsigned long long> r3(0.5);
29 | 	
30 | 	unsigned long long p2[25];
31 |         for(int i = 0; i < 25; i++ ){
32 |             p2[i] = 1;
33 |         }
34 |         
35 |         r3.modifier(p2,25);
36 |         std::cout << "third" << std::endl;
37 |         for(int i = 0; i < 25; i++)
38 |             std::cout << p2[i] << std::endl;
39 | 	
40 | 	std::shared_ptr<arma::mat> X, Y;
41 | 	std::string filename="";
42 | 	
43 | 	loadData_MNIST(X, Y, filename);
44 | 	return 0;	
45 | }	
46 | 	
47 | 


--------------------------------------------------------------------------------
/src/test/arma/Makefile:
--------------------------------------------------------------------------------
 1 | # You may need to edit this file to reflect the type and capabilities of your system.
 2 | # The defaults are for a Linux system and may need to be changed for other systems (eg. Mac OS X).
 3 | 
 4 | 
 5 | CXX=g++
 6 | 
 7 | #CXX=CC
 8 | ## When using the Sun Studio compiler
 9 | 
10 | 
11 | ARMA_INCLUDE_FLAG = -I ../include
12 | ## If you've installed Armadillo's headers manually, you may need to tell the compiler where they are.
13 | ## For example, change ../include to /usr/local/include
14 | 
15 | 
16 | LIB_FLAGS = -lblas -llapack 
17 | #LIB_FLAGS = -lopenblas -llapack 
18 | #LIB_FLAGS = -framework Accelerate
19 | #LIB_FLAGS = -library=sunperf
20 | 
21 | ## NOTE: on Ubuntu and Debian based systems you may need to add -lgfortran to LIB_FLAGS
22 | ## NOTE: if you're using Mac OS, use the line with -framework Accelerate 
23 | ## NOTE: if you're using the Sun Studio compiler, use the line with -library=sunperf
24 | 
25 | 
26 | OPT = -O2
27 | ## As the Armadillo library uses recursive templates, compilation times depend on the level of optimisation:
28 | ##
29 | ## -O0: quick compilation, but the resulting program will be slow
30 | ## -O1: good trade-off between compilation time and execution speed
31 | ## -O2: produces programs which have almost all possible speedups, but compilation takes longer
32 | ## -O3: enables auto vectorisation when using gcc
33 | 
34 | #OPT = -xO4 -xannotate=no
35 | ## When using the Sun Studio compiler
36 | 
37 | 
38 | #EXTRA_OPT = -fwhole-program
39 | ## Uncomment the above line if you're compiling all source files into one program in a single hit
40 | 
41 | 
42 | #DEBUG = -DARMA_EXTRA_DEBUG
43 | ## Uncomment the above line to enable low-level debugging.
44 | ## Lots of debugging information will be printed when a compiled program is run.
45 | ## Please enable this option when reporting bugs.
46 | 
47 | 
48 | #FINAL = -DARMA_NO_DEBUG
49 | ## Uncomment the above line to disable Armadillo's checks.
50 | ## Not recommended unless your code has been first thoroughly tested!
51 | 
52 | 
53 | CXXFLAGS = $(ARMA_INCLUDE_FLAG) $(DEBUG) $(FINAL) $(OPT) $(EXTRA_OPT) 
54 | 
55 | all: example1
56 | 
57 | example1: example1.cpp
58 | 	$(CXX) $(CXXFLAGS)  -o $@  $<  $(LIB_FLAGS)
59 | 
60 | 
61 | .PHONY: clean
62 | 
63 | clean:
64 | 	rm -f example1
65 | 
66 | 


--------------------------------------------------------------------------------
/src/test/arma/example1.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <armadillo>
  3 | 
  4 | using namespace std;
  5 | using namespace arma;
  6 | 
  7 | 
  8 | int
  9 | main(int argc, char** argv)
 10 |   {
 11 |   cout << "Armadillo version: " << arma_version::as_string() << endl;
 12 |   
 13 |   mat A(2,3);  // directly specify the matrix size (elements are uninitialised)
 14 |   
 15 |   cout << "A.n_rows: " << A.n_rows << endl;  // .n_rows and .n_cols are read only
 16 |   cout << "A.n_cols: " << A.n_cols << endl;
 17 |   
 18 |   A(1,2) = 456.0;  // directly access an element (indexing starts at 0)
 19 |   A.print("A:");
 20 |   
 21 |   
 22 |   A = 5.0;         // scalars are treated as a 1x1 matrix
 23 |   A.print("A:");
 24 |   
 25 |   
 26 |   A.set_size(4,5); // change the size (data is not preserved)
 27 |   
 28 |   A.fill(5.0);     // set all elements to a particular value
 29 |   A.print("A:");
 30 |   
 31 |   // endr indicates "end of row"
 32 |   A << 0.165300 << 0.454037 << 0.995795 << 0.124098 << 0.047084 << endr
 33 |     << 0.688782 << 0.036549 << 0.552848 << 0.937664 << 0.866401 << endr
 34 |     << 0.348740 << 0.479388 << 0.506228 << 0.145673 << 0.491547 << endr
 35 |     << 0.148678 << 0.682258 << 0.571154 << 0.874724 << 0.444632 << endr
 36 |     << 0.245726 << 0.595218 << 0.409327 << 0.367827 << 0.385736 << endr;
 37 |   
 38 |   A.print("A:");
 39 |   
 40 |   // determinant
 41 |   cout << "det(A): " << det(A) << endl;
 42 |   
 43 |   // inverse
 44 |   cout << "inv(A): " << endl << inv(A) << endl;
 45 |   
 46 |   // save matrix as a file
 47 |   A.save("A.txt", raw_ascii);
 48 |   
 49 |   // load from file
 50 |   mat B;
 51 |   B.load("A.txt");
 52 |   
 53 |   // submatrices
 54 |   cout << "B( span(0,2), span(3,4) ):" << endl << B( span(0,2), span(3,4) ) << endl;
 55 |   
 56 |   cout << "B.row(0): " << endl << B.row(0) << endl;
 57 |   
 58 |   cout << "B.col(1): " << endl << B.col(1) << endl;
 59 |   
 60 |   // transpose
 61 |   cout << "B.t(): " << endl << B.t() << endl;
 62 |   
 63 |   // maximum from each column (traverse along rows)
 64 |   cout << "max(B): " << endl << max(B) << endl;
 65 |   
 66 |   // maximum from each row (traverse along columns)
 67 |   cout << "max(B,1): " << endl << max(B,1) << endl;
 68 |   
 69 |   // maximum value in B
 70 |   cout << "max(max(B)) = " << max(max(B)) << endl;
 71 |   
 72 |   // sum of each column (traverse along rows)
 73 |   cout << "sum(B): " << endl << sum(B) << endl;
 74 |   
 75 |   // sum of each row (traverse along columns)
 76 |   cout << "sum(B,1) =" << endl << sum(B,1) << endl;
 77 |   
 78 |   // sum of all elements
 79 |   cout << "accu(B): " << accu(B) << endl;
 80 |   
 81 |   // trace = sum along diagonal
 82 |   cout << "trace(B): " << trace(B) << endl;
 83 |   
 84 |   // generate the identity matrix
 85 |   mat C = eye<mat>(4,4);
 86 |   
 87 |   // random matrix with values uniformly distributed in the [0,1] interval
 88 |   mat D = randu<mat>(4,4);
 89 |   D.print("D:");
 90 |   
 91 |   // row vectors are treated like a matrix with one row
 92 |   rowvec r;
 93 |   r << 0.59119 << 0.77321 << 0.60275 << 0.35887 << 0.51683;
 94 |   r.print("r:");
 95 |   
 96 |   // column vectors are treated like a matrix with one column
 97 |   colvec q;
 98 |   q << 0.14333 << 0.59478 << 0.14481 << 0.58558 << 0.60809;
 99 |   q.print("q:");
100 |   
101 |   // dot or inner product
102 |   cout << "as_scalar(r*q): " << as_scalar(r*q) << endl;
103 |   
104 |   // outer product
105 |   cout << "q*r: " << endl << q*r << endl;
106 |   
107 |   // multiply-and-accumulate operation (no temporary matrices are created)
108 |   cout << "accu(A % B) = " << accu(A % B) << endl;
109 |   
110 |   // example of a compound operation
111 |   B += 2.0 * A.t();
112 |   B.print("B:");
113 |   
114 |   // imat specifies an integer matrix
115 |   imat AA;
116 |   imat BB;
117 |   
118 |   AA << 1 << 2 << 3 << endr << 4 << 5 << 6 << endr << 7 << 8 << 9;
119 |   BB << 3 << 2 << 1 << endr << 6 << 5 << 4 << endr << 9 << 8 << 7;
120 |   
121 |   // comparison of matrices (element-wise); output of a relational operator is a umat
122 |   umat ZZ = (AA >= BB);
123 |   ZZ.print("ZZ:");
124 |   
125 |   // cubes ("3D matrices")
126 |   cube Q( B.n_rows, B.n_cols, 2 );
127 |   
128 |   Q.slice(0) = B;
129 |   Q.slice(1) = 2.0 * B;
130 |   
131 |   Q.print("Q:");
132 |   
133 |   // 2D field of arbitrary length row vectors (fields can also store abitrary objects, eg. instances of std::string)
134 |   field<rowvec> xyz(3,2);
135 |   
136 |   xyz(0,0) = randu(1,2);
137 |   xyz(1,0) = randu(1,3);
138 |   xyz(2,0) = randu(1,4);
139 |   xyz(0,1) = randu(1,5);
140 |   xyz(1,1) = randu(1,6);
141 |   xyz(2,1) = randu(1,7);
142 |   
143 |   cout << "xyz:" << endl;
144 |   cout << xyz << endl;
145 |   
146 |   return 0;
147 |   }
148 | 
149 | 


--------------------------------------------------------------------------------
/src/test/example1.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <armadillo>
  3 | 
  4 | using namespace std;
  5 | using namespace arma;
  6 | 
  7 | 
  8 | int
  9 | main(int argc, char** argv)
 10 |   {
 11 |   cout << "Armadillo version: " << arma_version::as_string() << endl;
 12 |   
 13 |   mat A(2,3);  // directly specify the matrix size (elements are uninitialised)
 14 |   
 15 |   cout << "A.n_rows: " << A.n_rows << endl;  // .n_rows and .n_cols are read only
 16 |   cout << "A.n_cols: " << A.n_cols << endl;
 17 |   
 18 |   A(1,2) = 456.0;  // directly access an element (indexing starts at 0)
 19 |   A.print("A:");
 20 |   
 21 |   
 22 |   A = 5.0;         // scalars are treated as a 1x1 matrix
 23 |   A.print("A:");
 24 |   
 25 |   
 26 |   A.set_size(4,5); // change the size (data is not preserved)
 27 |   
 28 |   A.fill(5.0);     // set all elements to a particular value
 29 |   A.print("A:");
 30 |   
 31 |   // endr indicates "end of row"
 32 |   A << 0.165300 << 0.454037 << 0.995795 << 0.124098 << 0.047084 << endr
 33 |     << 0.688782 << 0.036549 << 0.552848 << 0.937664 << 0.866401 << endr
 34 |     << 0.348740 << 0.479388 << 0.506228 << 0.145673 << 0.491547 << endr
 35 |     << 0.148678 << 0.682258 << 0.571154 << 0.874724 << 0.444632 << endr
 36 |     << 0.245726 << 0.595218 << 0.409327 << 0.367827 << 0.385736 << endr;
 37 |   
 38 |   A.print("A:");
 39 |  /* 
 40 |   // determinant
 41 |   //cout << "det(A): " << det(A) << endl;
 42 |   
 43 |   // inverse
 44 |  // cout << "inv(A): " << endl << inv(A) << endl;
 45 |  */ 
 46 |   // save matrix as a file
 47 |   A.save("A.txt", raw_ascii);
 48 |   
 49 |   // load from file
 50 |   mat B;
 51 |   B.load("A.txt");
 52 |   
 53 |   // submatrices
 54 |   cout << "B( span(0,2), span(3,4) ):" << endl << B( span(0,2), span(3,4) ) << endl;
 55 |   
 56 |   cout << "B.row(0): " << endl << B.row(0) << endl;
 57 |   
 58 |   cout << "B.col(1): " << endl << B.col(1) << endl;
 59 |   
 60 |   // transpose
 61 |   cout << "B.t(): " << endl << B.t() << endl;
 62 |   
 63 |   // maximum from each column (traverse along rows)
 64 |   cout << "max(B): " << endl << max(B) << endl;
 65 |   
 66 |   // maximum from each row (traverse along columns)
 67 |   cout << "max(B,1): " << endl << max(B,1) << endl;
 68 |   
 69 |   // maximum value in B
 70 |   cout << "max(max(B)) = " << max(max(B)) << endl;
 71 |   
 72 |   // sum of each column (traverse along rows)
 73 |   cout << "sum(B): " << endl << sum(B) << endl;
 74 |   
 75 |   // sum of each row (traverse along columns)
 76 |   cout << "sum(B,1) =" << endl << sum(B,1) << endl;
 77 |   
 78 |   // sum of all elements
 79 |   cout << "accu(B): " << accu(B) << endl;
 80 |   
 81 |   // trace = sum along diagonal
 82 |   cout << "trace(B): " << trace(B) << endl;
 83 |   
 84 |   // generate the identity matrix
 85 |   mat C = eye<mat>(4,4);
 86 |   
 87 |   // random matrix with values uniformly distributed in the [0,1] interval
 88 |   mat D = randu<mat>(4,4);
 89 |   D.print("D:");
 90 |   
 91 |   // row vectors are treated like a matrix with one row
 92 |   rowvec r;
 93 |   r << 0.59119 << 0.77321 << 0.60275 << 0.35887 << 0.51683;
 94 |   r.print("r:");
 95 |   
 96 |   // column vectors are treated like a matrix with one column
 97 |   colvec q;
 98 |   q << 0.14333 << 0.59478 << 0.14481 << 0.58558 << 0.60809;
 99 |   q.print("q:");
100 |   
101 |   // dot or inner product
102 |   cout << "as_scalar(r*q): " << as_scalar(r*q) << endl;
103 |   
104 |   // outer product
105 |   cout << "q*r: " << endl << q*r << endl;
106 |   
107 |   // multiply-and-accumulate operation (no temporary matrices are created)
108 |   cout << "accu(A % B) = " << accu(A % B) << endl;
109 |   
110 |   // example of a compound operation
111 |   B += 2.0 * A.t();
112 |   B.print("B:");
113 |   
114 |   // imat specifies an integer matrix
115 |   imat AA;
116 |   imat BB;
117 |   
118 |   AA << 1 << 2 << 3 << endr << 4 << 5 << 6 << endr << 7 << 8 << 9;
119 |   BB << 3 << 2 << 1 << endr << 6 << 5 << 4 << endr << 9 << 8 << 7;
120 |   
121 |   // comparison of matrices (element-wise); output of a relational operator is a umat
122 |   umat ZZ = (AA >= BB);
123 |   ZZ.print("ZZ:");
124 |   
125 |   // cubes ("3D matrices")
126 |   cube Q( B.n_rows, B.n_cols, 2 );
127 |   
128 |   Q.slice(0) = B;
129 |   Q.slice(1) = 2.0 * B;
130 |   
131 |   Q.print("Q:");
132 | /*  
133 |   // 2D field of arbitrary length row vectors (fields can also store abitrary objects, eg. instances of std::string)
134 |   field<rowvec> xyz(3,2);
135 |   
136 |   xyz(0,0) = randu(1,2);
137 |   xyz(1,0) = randu(1,3);
138 |   xyz(2,0) = randu(1,4);
139 |   xyz(0,1) = randu(1,5);
140 |   xyz(1,1) = randu(1,6);
141 |   xyz(2,1) = randu(1,7);
142 |   
143 |   cout << "xyz:" << endl;
144 |   cout << xyz << endl;
145 | */  
146 |   return 0;
147 |   }
148 | 
149 | 


--------------------------------------------------------------------------------