├── .gitignore ├── CNN ├── CNN.cpp ├── CNN.h ├── Makefile └── main.cpp ├── ConvolveLayer ├── ConvolveLayer.cpp ├── ConvolveLayer.h ├── Makefile └── main.cpp ├── GPUMat ├── GPUMat.cpp ├── GPUMat.h ├── GPU_Math_Func.cu ├── GPU_Math_Func.h ├── Makefile ├── caffe_references │ ├── GPUMat.cpp │ ├── GPUMat.h │ ├── device_alternate[1].hpp │ ├── math_functions[1].cu │ ├── math_functions[1].hpp │ └── syncedmem[1].cpp ├── cublastest │ ├── GPUMat.cpp │ ├── GPUMat.h │ ├── Makefile │ ├── armatest │ │ ├── armatest.sdf │ │ ├── armatest.sln │ │ ├── armatest │ │ │ ├── armatest.vcxproj.filters │ │ │ └── armatest.vcxproj.user │ │ └── x64 │ │ │ └── Debug │ │ │ └── armatest.ilk │ ├── cublastest │ │ ├── cublastest.sdf │ │ ├── cublastest.sln │ │ ├── cublastest │ │ │ └── Debug │ │ │ │ └── kernel.cu.deps │ │ └── x64 │ │ │ └── Debug │ │ │ └── cublastest.ilk │ ├── main.cpp │ ├── main.cu │ └── test ├── device_common.cpp ├── device_common.h ├── gpumat │ ├── .cproject │ └── .project ├── main.cu └── test_GPUMat.cpp ├── LSTM ├── LSTMLayer.cpp ├── LSTMLayer.h ├── Makefile ├── main.cpp ├── test └── testdata.dat ├── Makefile ├── MatArray ├── Makefile ├── MatArray.h └── main.cpp ├── PoolLayer ├── Makefile ├── PoolLayer.cpp ├── PoolLayer.h └── main.cpp ├── RBM ├── Makefile ├── ProgramArgs.cpp ├── ProgramArgs.h ├── RBM.cpp ├── RBM.h ├── mainSDA.cpp └── run.txt ├── README.md ├── include ├── ActivationFunc.h ├── ActivationLayer.h ├── BaseLayer.h ├── BaseModel.h ├── DeepLearning.pb.h ├── ElementMultiAddLayer.h ├── ElementwiseLayer.h ├── ElmanRL.h ├── Globals.h ├── Initializer.h ├── Layer.h ├── Layer_binaryOp.h ├── Layer_unitaryOp.h ├── LinearAdditionLayer.h ├── MultiAddLayer.h ├── MultiLayerPerceptron.h ├── Net.h ├── RNN.h ├── RecurrLayer.h ├── Util.h ├── common.h ├── io.h └── optimization.h ├── plotting ├── Driver.py └── Util.py └── src ├── ActivationLayer.cpp ├── BaseLayer.cpp ├── DeepLearning.pb.cc ├── ElementMultiAddLayer.cpp ├── ElementwiseLayer.cpp ├── ElmanRL.cpp ├── LBFGS.cpp ├── LinearAdditionLayer.cpp ├── MultiAddLayer.cpp ├── MultiLayerPerceptron.cpp ├── Proto ├── DeepLearning.proto ├── Makefile ├── generateFile.sh └── test ├── RNN.cpp ├── RecurrLayer.cpp ├── SteepDescent.cpp ├── Util.cpp ├── io.cpp └── test ├── BaseLayer ├── Makefile ├── test_BaseLayer └── test_BaseLayer.cpp ├── ElmanRL ├── Makefile ├── RLtest.prototxt └── test ├── GRNN ├── GRNN.cpp ├── GRNN.h └── Makefile ├── IO ├── Makefile ├── net.prototxt ├── qsolver.prototxt ├── test └── test_IO.cpp ├── IOtest ├── DeepLearning.pb.cc ├── DeepLearning.pb.h ├── Makefile └── net.prototxt ├── Makefile.common ├── MultiLayerPerceptron ├── Makefile ├── net.prototxt ├── result.xt ├── runningparameters │ ├── sin(5x)+exp(5x) │ │ ├── net.prototxt │ │ └── readme │ └── sin(5x) │ │ ├── net.prototxt │ │ └── readme ├── testSimple.prototxt ├── test_funcApprox.cpp └── test_mlp.cpp ├── MultiThreadArmaMat └── Makefile ├── NN-RL ├── Makefile ├── Model_PoleFull.cpp ├── Model_PoleFull.h ├── Model_PoleSimple.cpp ├── Model_PoleSimple.h ├── NN_RLSolverBase.cpp ├── NN_RLSolverBase.h ├── NN_RLSolverMLP.cpp ├── NN_RLSolverMLP.h ├── NN_RLSolverMultiMLP.cpp ├── NN_RLSolverMultiMLP.h ├── NN_RLSolverRNN.cpp ├── NN_RLSolverRNN.h ├── NN_RL_Driver.cpp ├── Qtableresult │ ├── qtable1.tif │ ├── qtable2.tif │ ├── qtable3.tif │ └── qtableAction.tif ├── RLSolverBase.h ├── RLSolver_2DTable.cpp ├── RLSolver_2DTable.h ├── elman.prototxt ├── mlp.prototxt ├── multimlp.prototxt ├── net.prototxt ├── plotQMap.m ├── plotQtable.m ├── qsolver.prototxt └── rnn.prototxt ├── Optimization ├── Makefile └── main.cpp ├── RNN ├── Makefile ├── RLtest2.prototxt ├── gradcheck.prototxt ├── net.prototxt ├── test ├── testIntermediate.prototxt ├── testSimple.prototxt ├── test_RNN.cpp └── testlittleTimer.prototxt ├── RNNtestRLSet └── prediction ├── Trainer ├── Makefile ├── Trainer.cpp ├── Trainer.h ├── net.prototxt ├── test └── test_trainer.cpp ├── TwolayerPerceptron ├── Makefile ├── MultiLayerPerceptron.cpp ├── MultiLayerPerceptron.h └── mainSDA.cpp ├── Util ├── Makefile ├── test └── test_Utils.cpp ├── arma ├── Makefile └── example1.cpp └── example1.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | 30 | # PDFs 31 | *.pdf 32 | *.lyx 33 | 34 | # data 35 | *.dat 36 | *.txt 37 | *.bin 38 | *image* 39 | *.JPG 40 | *.png 41 | *.log 42 | *.zip 43 | #other 44 | *.orig 45 | /CppApplication_1/build/ 46 | /CppApplication_1/dist/ 47 | *~ 48 | nbproject/ 49 | *.swp 50 | *.pyc 51 | *data* 52 | MNIST 53 | test_* 54 | *.tlog 55 | *.idb 56 | *.pdb 57 | *.suo 58 | *.vcxproj 59 | -------------------------------------------------------------------------------- /CNN/CNN.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../BaseLayer/BaseLayer.h" 4 | #include "../ConvolveLayer/ConvolveLayer.h" 5 | #include "../PoolLayer/PoolLayer.h" 6 | #include "../Optimization/optimization.h" 7 | 8 | namespace NeuralNet{ 9 | 10 | struct TrainingPara { 11 | 12 | TrainingPara(double eps0=1e-6, int NEpoch0 = 500, 13 | int miniBatchSize0 = 10, double alpha0 = 0.1, int save = 50, bool load = false): 14 | eps(eps0),NEpoch(NEpoch0), 15 | miniBatchSize(miniBatchSize0), alpha(alpha0), saveFrequency(save), loadFlag(load) {} 16 | double eps; 17 | int NEpoch; 18 | int miniBatchSize; 19 | double alpha; 20 | int saveFrequency; 21 | bool loadFlag; 22 | // Method method; 23 | void print() const { 24 | 25 | std::cout << eps << "\t"; 26 | std::cout << NEpoch << "\t"; 27 | std::cout << miniBatchSize << "\t"; 28 | std::cout << alpha << std::endl; 29 | 30 | } 31 | }; 32 | 33 | 34 | class CNN { 35 | friend class CNNTrainer; 36 | public: 37 | CNN(){}; 38 | CNN(std::shared_ptr trainingX0, std::shared_ptr trainingY0, int nChanel0, TrainingPara trainingPara0); 39 | void train(); 40 | void setTrainingData(std::shared_ptr trainingX0, std::shared_ptr trainingY0, int nChanel0); 41 | void feedForward(std::shared_ptr); 42 | void backProp(std::shared_ptr); 43 | void test(std::shared_ptr testX0, std::shared_ptr testY0); 44 | double calLayerError(std::shared_ptr delta); 45 | void calNumericGrad(std::shared_ptr, std::shared_ptr); 46 | void calGrad(std::shared_ptr trainingX); 47 | void vectoriseGrad(arma::vec &grad); 48 | void deVectoriseWeight(arma::vec &x); 49 | void vectoriseWeight(arma::vec &x); 50 | void saveWeight(std::string str = "cnn_weights.dat"); 51 | void loadWeight(std::string str = "cnn_weights.dat"); 52 | 53 | bool testGrad; 54 | std::vector poolLayers; 55 | std::vector convoLayers; 56 | std::vector FCLayers; 57 | int numInstance; 58 | std::shared_ptr trainingX; 59 | std::shared_ptr trainingY, output; 60 | int nChanel; 61 | TrainingPara trainingPara; 62 | int inputDim_x, inputDim_y; 63 | int outputDim; 64 | int numFCLayers; 65 | int numCLayers; 66 | int totalDim; 67 | }; 68 | 69 | class CNNTrainer:public Optimization::ObjectFunc{ 70 | public: 71 | CNNTrainer(CNN &CNN); 72 | void gradientChecking(); 73 | virtual double operator()(arma::vec &x, arma::vec &grad); 74 | // std::shared_ptr x_init; 75 | private: 76 | CNN &cnn; 77 | }; 78 | 79 | } -------------------------------------------------------------------------------- /CNN/Makefile: -------------------------------------------------------------------------------- 1 | # this make file now is dynamically linking 2 | #OPTFLAGS = -O3 -march=nocona -mfpmath=sse -msse3 -Wuninitialized -flto 3 | #MKL_INCLUDE = /opt/intel/mkl/include/ 4 | #MKL_INCLUDE = /opt/intel/composer_xe_2013.0.079/mkl/include/ 5 | MKL_INCLUDE = /opt/intel/composerxe/mkl/include 6 | 7 | #CFLAGS = -std=c++0x -I/g/ssli/software/pkgs/boost_1_49_0 -c -DNDEBUG -D__LINUX $(OPTFLAGS) 8 | CPP = g++ 9 | #MKLROOT = /opt/intel/mkl 10 | #MKLROOT = /opt/intel/composer_xe_2013.0.079/mkl 11 | MKLROOT = /opt/intel/composerxe/mkl 12 | MKLLINKFLAGS = -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread 13 | MKL_DL_LINKFLAGS = -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread -lm 14 | ARMA_INCLUDE=-I~/Downloads/armadillo-5.100.2/include/armadillo 15 | ARMA_LINKFLAGS=-L/usr/lib -L/opt/intel/mkl/lib/intel64 -larmadillo -lmkl_rt -llapack -lopenblas 16 | #MKLLINKFLAGS = -Wl,--start-group -Wl,--end-group -lpthread 17 | CXXFLAGS = -std=c++0x -I$(MKL_INCLUDE) $(ARMA_INCLUDE) -I/opt/boost/boost_1_57_0 -c -D__LINUX -DARMA_NO_DEBUG -DDEBUG -g3 18 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program -march=native 19 | #LINKFLAGS = -static $(LINKOPTFLAGS) $(MKLLINKFLAGS) -ldl 20 | LINK_DL_FLAGS = $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldl #$(LINKOPTFLAGS) 21 | #LINKFLAGS = 22 | #ODIR=obj 23 | ODIR = 24 | 25 | OBJ = main.o PoolLayer.o ConvolveLayer.o BaseLayer.o CNN.o load_cifar10.o LBFGS.o 26 | 27 | test : $(OBJ) 28 | $(CPP) -o test $(OBJ) $(LINK_DL_FLAGS) 29 | 30 | BaseLayer.o : ../BaseLayer/BaseLayer.cpp ../BaseLayer/BaseLayer.h 31 | $(CPP) -c $(CXXFLAGS) $< 32 | 33 | PoolLayer.o : ../PoolLayer/PoolLayer.cpp ../PoolLayer/PoolLayer.h 34 | $(CPP) -c $(CXXFLAGS) $< 35 | 36 | ConvolveLayer.o : ../ConvolveLayer/ConvolveLayer.cpp ../ConvolveLayer/ConvolveLayer.h 37 | $(CPP) -c $(CXXFLAGS) $< 38 | 39 | load_cifar10.o : ../cifar-10/load_cifar10.cpp 40 | $(CPP) -c $(CXXFLAGS) $< 41 | 42 | LBFGS.o : ../Optimization/LBFGS.cpp ../Optimization/optimization.h 43 | $(CPP) -c $(CXXFLAGS) $< 44 | #%.o : %.cpp 45 | # $(CPP) -c $(CXXFLAGS) 46 | 47 | 48 | clean: 49 | rm -f *.o *~ 50 | -------------------------------------------------------------------------------- /ConvolveLayer/ConvolveLayer.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../MatArray/MatArray.h" 4 | 5 | 6 | struct ConvolveLayer { 7 | enum ActivationType {ReLU, tanh, sigmoid, linear}; 8 | ConvolveLayer(int numFilters0, int filterDim0_x, int filterDim0_y, int stride0, ActivationType type0); 9 | void activateUp(std::shared_ptr); 10 | // upate the parameters and propgate the error down for the lower layer 11 | void updatePara(std::shared_ptr delta_upper, double learningRate); 12 | void calGrad(std::shared_ptr delta_upper); 13 | void calGrad_matrixMethod(std::shared_ptr delta_upper); 14 | void initializeWeight(); 15 | void setInputDim(int, int, int); 16 | void propError(std::shared_ptr delta_upper); 17 | void vectoriseGrad(double *ptr, size_t offset); 18 | void deVectoriseWeight(double *ptr, size_t offset); 19 | void vectoriseWeight(double *ptr, size_t offset); 20 | void convolve_naive(std::shared_ptr input); 21 | void im2col(std::shared_ptr input, std::shared_ptr &output); 22 | void col2im(std::shared_ptr input, std::shared_ptr &output); 23 | void convolve_matrixMethod(std::shared_ptr input); 24 | 25 | int numFilters; 26 | // every filter is a 4D cube 27 | // MatArray::Mat2DArray_ptr filters, grad_W; 28 | Tensor_4D::ptr filters, grad_W; 29 | std::shared_ptr delta_out, input, output; 30 | std::shared_ptr B, grad_B; 31 | std::shared_ptr filters2D, input2D, grad_W2D; 32 | 33 | int filterDim_x, filterDim_y; 34 | int inputDim_x; 35 | int inputDim_y; 36 | int inputDim_z; 37 | int inputSize; 38 | int outputSize; 39 | int outputDim_x, outputDim_y, outputDim_z; 40 | int stride; 41 | int W_size, B_size, totalSize; 42 | ActivationType type; 43 | }; -------------------------------------------------------------------------------- /ConvolveLayer/Makefile: -------------------------------------------------------------------------------- 1 | # this make file now is dynamically linking 2 | #OPTFLAGS = -O3 -march=nocona -mfpmath=sse -msse3 -Wuninitialized -flto 3 | #MKL_INCLUDE = /opt/intel/mkl/include/ 4 | #MKL_INCLUDE = /opt/intel/composer_xe_2013.0.079/mkl/include/ 5 | MKL_INCLUDE = /opt/intel/composerxe/mkl/include 6 | 7 | #CFLAGS = -std=c++0x -I/g/ssli/software/pkgs/boost_1_49_0 -c -DNDEBUG -D__LINUX $(OPTFLAGS) 8 | CPP = g++ 9 | #MKLROOT = /opt/intel/mkl 10 | #MKLROOT = /opt/intel/composer_xe_2013.0.079/mkl 11 | MKLROOT = /opt/intel/composerxe/mkl 12 | MKLLINKFLAGS = -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread 13 | MKL_DL_LINKFLAGS = -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread -lm 14 | ARMA_INCLUDE=-I~/Downloads/armadillo-5.100.2/include/armadillo 15 | ARMA_LINKFLAGS=-L/usr/lib -L/opt/intel/mkl/lib/intel64 -larmadillo -lmkl_rt -llapack -lopenblas 16 | #MKLLINKFLAGS = -Wl,--start-group -Wl,--end-group -lpthread 17 | CXXFLAGS = -std=c++0x -I$(MKL_INCLUDE) $(ARMA_INCLUDE) -I/opt/boost/boost_1_57_0 -c -DDEBUG -D__LINUX -g3 18 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program 19 | #LINKFLAGS = -static $(LINKOPTFLAGS) $(MKLLINKFLAGS) -ldl 20 | LINK_DL_FLAGS = $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldl 21 | #LINKFLAGS = 22 | #ODIR=obj 23 | ODIR = 24 | 25 | OBJ = main.o PoolLayer.o 26 | 27 | test : $(OBJ) 28 | $(CPP) -o test $(OBJ) $(LINK_DL_FLAGS) 29 | 30 | #%.o : %.cpp 31 | # $(CPP) -c $(CXXFLAGS) 32 | 33 | 34 | clean: 35 | rm -f *.o *~ 36 | -------------------------------------------------------------------------------- /ConvolveLayer/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "ConvolveLayer.h" 10 | 11 | 12 | void loadData_MNIST(std::shared_ptr X, 13 | std::shared_ptr Y); 14 | 15 | int main(int argc, char *argv[]) { 16 | std::shared_ptr DataX(new arma::mat); 17 | std::shared_ptr DataY(new arma::mat); 18 | std::shared_ptr trainDataX(new arma::mat); 19 | std::shared_ptr trainDataY(new arma::mat); 20 | std::shared_ptr testDataX(new arma::mat); 21 | std::shared_ptr testDataY(new arma::mat); 22 | std::shared_ptr ValidationDataX(new arma::mat); 23 | std::shared_ptr ValidationDataY(new arma::mat); 24 | 25 | loadData_MNIST(DataX,DataY); 26 | 27 | int ntrain = 1000; 28 | int ntest = 100; 29 | // now I split data into train, test, and validation 30 | trainDataX = std::make_shared(DataX->cols(0,ntrain-1)); 31 | trainDataY = std::make_shared(DataY->cols(0,ntrain-1)); 32 | testDataX = std::make_shared(DataX->cols(ntrain,ntrain+ntest-1)); 33 | testDataY = std::make_shared(DataY->cols(ntrain,ntrain+ntest-1)); 34 | 35 | DataX.reset(); 36 | DataY.reset(); 37 | 38 | 39 | 40 | int inputDim = trainDataX->n_cols; 41 | int outputDim = trainDataY->n_cols; 42 | std::cout << inputDim << std::endl; 43 | std::cout << outputDim << std::endl; 44 | std::cout << trainDataX->n_rows << std::endl; 45 | std::cout << trainDataY->n_rows << std::endl; 46 | 47 | 48 | int numLayers = 2; 49 | std::vector dimensions; 50 | 51 | dimensions.push_back(784); 52 | dimensions.push_back(100); 53 | dimensions.push_back(50); 54 | 55 | 56 | } 57 | 58 | 59 | void loadData_MNIST(std::shared_ptr X, 60 | std::shared_ptr Y) { 61 | 62 | std::string filename_base("../MNIST/data"); 63 | std::string filename; 64 | char tag[50]; 65 | char x; 66 | int count; 67 | int numFiles = 10; 68 | int featSize = 28*28; 69 | int labelSize = 10; 70 | int numSamples = 1000; 71 | X->set_size(featSize,numFiles*numSamples); 72 | Y->set_size(labelSize, numFiles*numSamples); 73 | Y->fill(0); 74 | 75 | 76 | for (int i = 0 ; i < numFiles ; i++) { 77 | sprintf(tag,"%d",i); 78 | filename=filename_base+(std::string)tag; 79 | std::cout << filename << std::endl; 80 | std::ifstream infile; 81 | infile.open(filename,std::ios::binary | std::ios::in); 82 | if (infile.is_open()) { 83 | 84 | for (int j = 0 ; j < numSamples ; j++) { 85 | 86 | for (int k =0 ; k n_rows = row0; 7 | this->n_cols = col0; 8 | this->n_elem = row0 * col0; 9 | CUDA_CHECK(cudaMalloc((void **)&_data_GPU ,this->n_elem * sizeof(*_data_GPU))); 10 | } 11 | #if 0 12 | GPUMat& GPUMat::copyFromCPU(const GPUMat& rhs){ 13 | // Check for self-assignment! 14 | if (this != &rhs) { 15 | delete _data_CPU; 16 | cudaFree(_data_GPU); 17 | cudaStat = cudaMalloc((void **)&_data_GPU ,n_elem*sizeof(double)); 18 | cudaStat = cublasDcopy(handle, n_elem, rhs.memptr_GPU(),1, _data_GPU,1); 19 | // Deallocate, allocate new space, copy values... 20 | } 21 | // 1. Deallocate any memory that MyClass is using internally 22 | // 2. Allocate some memory to hold the contents of rhs 23 | // 3. Copy the values from rhs into this instance 24 | // 4. Return *this 25 | return *this; 26 | } 27 | #endif 28 | GPUMat& GPUMat::operator=(const GPUMat& rhs){ 29 | // Check for self-assignment! 30 | if (this != &rhs) { 31 | delete _data_CPU; 32 | cudaFree(_data_GPU); 33 | _data_CPU = (double *)malloc(rhs.n_elem * sizeof(double)); 34 | const double *p = rhs.memptr_CPU(); 35 | for (int i = 0; i < n_elem; i++) { 36 | _data_CPU[i] = *(p+i); 37 | } 38 | 39 | this->syncToGPU(); 40 | // Deallocate, allocate new space, copy values... 41 | } 42 | // 1. Deallocate any memory that MyClass is using internally 43 | // 2. Allocate some memory to hold the contents of rhs 44 | // 3. Copy the values from rhs into this instance 45 | // 4. Return *this 46 | return *this; 47 | } 48 | 49 | GPUMat& GPUMat::st(){ 50 | this->transposeFlag=((this->transposeFlag==false)?true:false); 51 | return *this; 52 | } 53 | 54 | GPUMat& GPUMat::operator+=(const GPUMat& rhs){ 55 | 56 | ASSERT(this->n_elem==rhs.n_elem, "number of elements not equal for addition"); 57 | gpu_add(n_elem, this->memptr_GPU(), rhs.memptr_GPU(), this->memptr_GPU()); 58 | return *this; 59 | } 60 | 61 | 62 | const GPUMat GPUMat::operator+(const GPUMat& rhs) const{ 63 | GPUMat result = *this; // Make a copy of myself. Same as MyClass result(*this); 64 | result += rhs; // Use += to add other to the copy. 65 | return result; // All done! 66 | 67 | } 68 | 69 | void GPUMat::ones() { 70 | gpu_set(this->n_elem, 1.0, this->memptr_GPU()); 71 | } 72 | 73 | void GPUMat::zeros(){ 74 | gpu_set(this->n_elem, 0.0, this->memptr_GPU()); 75 | } 76 | 77 | void GPUMat::print(std::string str) { 78 | this->syncToCPU(); 79 | std::cout << str << std::endl; 80 | for (int i = 0; i < this->n_rows; i++){ 81 | for (int j = 0; j < this->n_cols; j++){ 82 | std::cout << _data_CPU[j*n_rows + i] << "\t"; 83 | 84 | } 85 | std::cout << std::endl; 86 | 87 | } 88 | } 89 | 90 | -------------------------------------------------------------------------------- /GPUMat/GPUMat.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "cublas_v2.h" 7 | /* notes on the design of the GPUMat 8 | 1) the synchronization to the CPU is lazy 9 | 10 | */ 11 | class GPUMat{ 12 | private: 13 | double *_data_CPU, *_data_GPU; 14 | 15 | public: 16 | enum MemLocation {CPU_GPU, GPU_ONLY, CPU_ONLY}; 17 | GPUMat(){} 18 | GPUMat(int row0, int col0); 19 | ~GPUMat(){ 20 | delete _data_CPU; 21 | cudaFree((void *)_data_GPU); 22 | // cublasDestroy(handle); 23 | } 24 | void syncToGPU(){ 25 | // if(!_data_CPU) _data_CPU = (double *) malloc(n_elem * sizeof(double)); 26 | // if( loc==CPU_ONLY) loc = CPU_GPU; 27 | cudaMalloc((void **)&_data_GPU ,n_elem*sizeof(double)); 28 | cublasSetMatrix (n_rows,n_cols, sizeof(double) ,_data_CPU,n_rows,_data_GPU ,n_cols); //a -> d_a 29 | } 30 | void syncToCPU(){ 31 | // if(!_data_CPU) _data_CPU = (double *) malloc(n_elem * sizeof(double)); 32 | // if( loc==CPU_ONLY) loc = CPU_GPU; 33 | _data_CPU = (double *) malloc(n_elem * sizeof(double)); 34 | cublasGetMatrix (n_rows, n_cols, sizeof(double) ,_data_GPU ,n_rows,_data_CPU,n_rows); 35 | } 36 | void zeros(); 37 | void ones(); 38 | void randu(); 39 | void randn(); 40 | double* memptr_CPU(){return _data_CPU;} 41 | double* memptr_GPU(){return _data_GPU;} 42 | double* memptr_CPU() const {return _data_CPU;} 43 | double* memptr_GPU() const {return _data_GPU;} 44 | GPUMat& st(); 45 | GPUMat& operator=(const GPUMat& rhs); 46 | GPUMat& operator+=(const GPUMat& rhs); 47 | GPUMat& operator-=(const GPUMat& rhs); 48 | GPUMat& operator*=(const GPUMat& rhs); 49 | GPUMat& operator*=(const double scal); 50 | GPUMat& operator%=(const GPUMat& rhs); 51 | const GPUMat operator+(const GPUMat& rhs) const; 52 | const GPUMat operator-(const GPUMat& rhs) const; 53 | const GPUMat operator*(const GPUMat& rhs) const; 54 | const GPUMat operator%(const GPUMat& rhs) const; 55 | void print(std::string str=""); 56 | 57 | int n_rows, n_cols, n_elem; 58 | bool transposeFlag; 59 | MemLocation loc; 60 | // cudaError_t cudaStat ; // cudaMalloc status 61 | // cublasStatus_t stat ; // CUBLAS functions status 62 | // cublasHandle_t handle ; // CUBLAS context 63 | }; 64 | -------------------------------------------------------------------------------- /GPUMat/GPU_Math_Func.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include // for std::fabs and std::signbit 4 | #include "cblas.h" 5 | #include "device_common.h" 6 | 7 | 8 | 9 | // Decaf gpu gemm provides an interface that is almost the same as the cpu 10 | // gemm function - following the c convention and calling the fortran-order 11 | // gpu code under the hood. 12 | void gpu_gemm(const CBLAS_TRANSPOSE TransA, 13 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 14 | const double alpha, const double* A, const double* B, const double beta, 15 | double* C); 16 | 17 | void gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, 18 | const double alpha, const double* A, const double* x, const double beta, 19 | double* y); 20 | 21 | void gpu_axpy(const int N, const double alpha, const double* X, 22 | double* Y); 23 | 24 | void gpu_axpby(const int N, const double alpha, const double* X, 25 | const double beta, double* Y); 26 | 27 | void gpu_memcpy(const size_t N, const void *X, void *Y); 28 | 29 | void gpu_set(const int N, const double alpha, double *X); 30 | 31 | inline void gpu_memset(const size_t N, const int alpha, void* X) { 32 | CUDA_CHECK(cudaMemset(X, alpha, N)); // NOLINT(caffe/alt_fn) 33 | } 34 | 35 | void gpu_add_scalar(const int N, const double alpha, double *X); 36 | 37 | void gpu_scal(const int N, const double alpha, double *X); 38 | 39 | void gpu_add(const int N, const double* a, const double* b, double* y); 40 | 41 | void gpu_selfAdd(const int N, const double* a, const double* b, double* y); 42 | 43 | void gpu_sub(const int N, const double* a, const double* b, double* y); 44 | 45 | void gpu_mul(const int N, const double* a, const double* b, double* y); 46 | 47 | void gpu_abs(const int n, const double* a, double* y); 48 | 49 | template 50 | void gpu_transform(const int n, const double* a, functor y); 51 | 52 | // gpu_rng_uniform with two arguments generates integers in the range 53 | // [0, UINT_MAX]. 54 | void gpu_rng_uniform(const int n, unsigned int* r); 55 | 56 | // gpu_rng_uniform with four arguments generates floats in the range 57 | // (a, b] (strictly greater than a, less than or equal to b) due to the 58 | // specification of curandGenerateUniform. With a = 0, b = 1, just calls 59 | // curandGenerateUniform; with other limits will shift and scale the outputs 60 | // appropriately after calling curandGenerateUniform. 61 | 62 | void gpu_rng_uniform(const int n, const double a, const double b, double* r); 63 | 64 | void gpu_rng_gaussian(const int n, const double mu, const double sigma, 65 | double* r); 66 | 67 | void gpu_rng_bernoulli(const int n, const double p, int* r); 68 | 69 | void gpu_dot(const int n, const double* x, const double* y, double* out); 70 | 71 | -------------------------------------------------------------------------------- /GPUMat/Makefile: -------------------------------------------------------------------------------- 1 | CPP_CUDA = nvcc 2 | CPP = nvcc 3 | CXXFLAGS = --std=c++11 -I/opt/boost_1_57_0 4 | #nvcc mmul_1.cu -lcublas -lcurand -o mmul_1 5 | LINKFLAGS = -lcublas -lcurand -L~/workspace/libs/gtest-1.7.0/mybuilds -lgtest 6 | 7 | OBJ = test_GPUMat.o GPUMat.o GPU_Math_Func.o device_common.o 8 | 9 | all: test 10 | 11 | test : $(OBJ) 12 | $(CPP) -o $@ $(OBJ) $(LINKFLAGS) 13 | 14 | GPUMat.o : GPUMat.cpp 15 | $(CPP) -c $(CXXFLAGS) $@ $^ 16 | 17 | GPU_Math_Func.o : GPU_Math_Func.cu 18 | $(CPP_CUDA) -c $(CXXFLAGS) $@ $^ 19 | test_GPUMat.o : test_GPUMat.cpp 20 | $(CPP) -c $(CXXFLAGS) $@ $^ 21 | device_common.o : device_common.cpp 22 | $(CPP) -c $(CXXFLAGS) $< -o $@ 23 | #%.o : %.cpp 24 | # $(CPP) -c $(CXXFLAGS) 25 | 26 | 27 | clean: 28 | rm -f *.o *~ test 29 | -------------------------------------------------------------------------------- /GPUMat/caffe_references/GPUMat.cpp: -------------------------------------------------------------------------------- 1 | 2 | //copy on CPU 3 | //copy on GPU 4 | GPUMat& GPUMat::copyFromCPU(const GPUMat& rhs){ 5 | // Check for self-assignment! 6 | if (this != &rhs) { 7 | delete _data_CPU; 8 | cudaFree(_data_GPU); 9 | cudaStat = cudaMalloc((void **)&_data_GPU ,n_elem*sizeof(double)); 10 | cudaStat = cublasDcopy(handle, n_elem, rhs.memptr_GPU(),1, _data_GPU,1); 11 | // Deallocate, allocate new space, copy values... 12 | } 13 | // 1. Deallocate any memory that MyClass is using internally 14 | // 2. Allocate some memory to hold the contents of rhs 15 | // 3. Copy the values from rhs into this instance 16 | // 4. Return *this 17 | return *this; 18 | } 19 | 20 | GPUMat& GPUMat::operator=(const GPUMat& rhs){ 21 | // Check for self-assignment! 22 | if (this != &rhs) { 23 | delete _data_CPU; 24 | cudaFree(_data_GPU); 25 | _data_CPU = (double *)malloc(rhs.n_elem * sizeof(double)); 26 | double *p = rhs.memptr(); 27 | for (int i = 0; i < n_elem; i++) { 28 | _data_CPU[i] = *(p+i); 29 | } 30 | 31 | this->syncToGPU(); 32 | // Deallocate, allocate new space, copy values... 33 | } 34 | // 1. Deallocate any memory that MyClass is using internally 35 | // 2. Allocate some memory to hold the contents of rhs 36 | // 3. Copy the values from rhs into this instance 37 | // 4. Return *this 38 | return *this; 39 | } 40 | 41 | GPUMat& GPUMat::st(){ 42 | this->transposeFlag=((this->transposeFlag==false)?true:false); 43 | return *this; 44 | } 45 | 46 | GPUMat& GPUMat::operator+=(const GPUMat& rhs){ 47 | 48 | double scale = 1; 49 | stat=cublasDaxpy(handle,n_elem,&scale,rhs.memptr_GPU(),1,this->memptr_GPU,1); 50 | return *this; 51 | } 52 | 53 | 54 | const GPUMat GPUMat::operator+(const GPUMat& rhs) const{ 55 | GPUMat result = *this; // Make a copy of myself. Same as MyClass result(*this); 56 | result += other; // Use += to add other to the copy. 57 | return result; // All done! 58 | 59 | } 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /GPUMat/caffe_references/GPUMat.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "cublas_v2.h" 4 | 5 | class GPUMat{ 6 | private: 7 | double* _data_CPU, _data_GPU; 8 | 9 | public: 10 | enum MemLocation {CPU_GPU, GPU_ONLY, CPU_ONLY} 11 | GPUMat(); 12 | GPUMat(int row0, int col0); 13 | ~GPUMat(){ 14 | delete _data_CPU; 15 | cudaFree(_data_GPU); 16 | cublasDestroy(handle); 17 | } 18 | void syncToGPU(){ 19 | if(!_data_CPU) _data_CPU = (double *) malloc(n_elems * sizeof(double)); 20 | if( loc==CPU_ONLY) loc = CPU_GPU; 21 | cudaStat = cudaMalloc((void **)&_data_GPU ,n_elem*sizeof(double)); 22 | stat = cublasSetMatrix (n_rows,n_clos, sizeof(double) ,_data_CPU,n_rows,_data_GPU ,n_cols); //a -> d_a 23 | } 24 | void syncToCPU(){ 25 | if(!_data_CPU) _data_CPU = (double *) malloc(n_elems * sizeof(double)); 26 | if( loc==CPU_ONLY) loc = CPU_GPU; 27 | stat = cublasGetMatrix (n_rows, n_cols, sizeof(double) ,_data_GPU ,n_rows,_data_CPU,n_rows); 28 | } 29 | void zeros(); 30 | void ones(); 31 | double* memptr_CPU(){return _data_CPU;} 32 | double* memptr_GPU(){return _data_CPU;} 33 | GPUMat& st(); 34 | GPUMat& operator=(const GPUMat& rhs); 35 | GPUMat& operator+=(const GPUMat& rhs); 36 | GPUMat& operator-=(const GPUMat& rhs); 37 | GPUMat& operator*=(const GPUMat& rhs); 38 | GPUMat& operator*=(const double scal); 39 | GPUMat& operator%=(const GPUMat& rhs); 40 | const GPUMat operator+(const GPUMat& rhs) const; 41 | const GPUMat operator-(const GPUMat& rhs) const; 42 | const GPUMat operator*(const GPUMat& rhs) const; 43 | const GPUMat operator%(const GPUMat& rhs) const; 44 | 45 | int n_rows, n_cols, n_elems; 46 | bool transposeFlag; 47 | cudaError_t cudaStat ; // cudaMalloc status 48 | cublasStatus_t stat ; // CUBLAS functions status 49 | cublasHandle_t handle ; // CUBLAS context 50 | } 51 | -------------------------------------------------------------------------------- /GPUMat/caffe_references/device_alternate[1].hpp: -------------------------------------------------------------------------------- 1 | #ifndef CAFFE_UTIL_DEVICE_ALTERNATE_H_ 2 | #define CAFFE_UTIL_DEVICE_ALTERNATE_H_ 3 | 4 | #ifdef CPU_ONLY // CPU-only Caffe. 5 | 6 | #include 7 | 8 | // Stub out GPU calls as unavailable. 9 | 10 | #define NO_GPU LOG(FATAL) << "Cannot use GPU in CPU-only Caffe: check mode." 11 | 12 | #define STUB_GPU(classname) \ 13 | template \ 14 | void classname::Forward_gpu(const vector*>& bottom, \ 15 | const vector*>& top) { NO_GPU; } \ 16 | template \ 17 | void classname::Backward_gpu(const vector*>& top, \ 18 | const vector& propagate_down, \ 19 | const vector*>& bottom) { NO_GPU; } \ 20 | 21 | #define STUB_GPU_FORWARD(classname, funcname) \ 22 | template \ 23 | void classname::funcname##_##gpu(const vector*>& bottom, \ 24 | const vector*>& top) { NO_GPU; } \ 25 | 26 | #define STUB_GPU_BACKWARD(classname, funcname) \ 27 | template \ 28 | void classname::funcname##_##gpu(const vector*>& top, \ 29 | const vector& propagate_down, \ 30 | const vector*>& bottom) { NO_GPU; } \ 31 | 32 | #else // Normal GPU + CPU Caffe. 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include // cuda driver types 39 | #ifdef USE_CUDNN // cuDNN acceleration library. 40 | #include "caffe/util/cudnn.hpp" 41 | #endif 42 | 43 | // 44 | // CUDA macros 45 | // 46 | 47 | // CUDA: various checks for different function calls. 48 | #define CUDA_CHECK(condition) \ 49 | /* Code block avoids redefinition of cudaError_t error */ \ 50 | do { \ 51 | cudaError_t error = condition; \ 52 | CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \ 53 | } while (0) 54 | 55 | #define CUBLAS_CHECK(condition) \ 56 | do { \ 57 | cublasStatus_t status = condition; \ 58 | CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) << " " \ 59 | << caffe::cublasGetErrorString(status); \ 60 | } while (0) 61 | 62 | #define CURAND_CHECK(condition) \ 63 | do { \ 64 | curandStatus_t status = condition; \ 65 | CHECK_EQ(status, CURAND_STATUS_SUCCESS) << " " \ 66 | << caffe::curandGetErrorString(status); \ 67 | } while (0) 68 | 69 | // CUDA: grid stride looping 70 | #define CUDA_KERNEL_LOOP(i, n) \ 71 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ 72 | i < (n); \ 73 | i += blockDim.x * gridDim.x) 74 | 75 | // CUDA: check for error after kernel execution and exit loudly if there is one. 76 | #define CUDA_POST_KERNEL_CHECK CUDA_CHECK(cudaPeekAtLastError()) 77 | 78 | namespace caffe { 79 | 80 | // CUDA: library error reporting. 81 | const char* cublasGetErrorString(cublasStatus_t error); 82 | const char* curandGetErrorString(curandStatus_t error); 83 | 84 | // CUDA: thread number configuration. 85 | // Use 1024 threads per block, which requires cuda sm_2x or above, 86 | // or fall back to attempt compatibility (best of luck to you). 87 | #if __CUDA_ARCH__ >= 200 88 | const int CAFFE_CUDA_NUM_THREADS = 1024; 89 | #else 90 | const int CAFFE_CUDA_NUM_THREADS = 512; 91 | #endif 92 | 93 | // CUDA: number of blocks for threads. 94 | inline int CAFFE_GET_BLOCKS(const int N) { 95 | return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS; 96 | } 97 | 98 | } // namespace caffe 99 | 100 | #endif // CPU_ONLY 101 | 102 | #endif // CAFFE_UTIL_DEVICE_ALTERNATE_H_ 103 | -------------------------------------------------------------------------------- /GPUMat/caffe_references/syncedmem[1].cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "caffe/common.hpp" 4 | #include "caffe/syncedmem.hpp" 5 | #include "caffe/util/math_functions.hpp" 6 | 7 | namespace caffe { 8 | 9 | SyncedMemory::~SyncedMemory() { 10 | if (cpu_ptr_ && own_cpu_data_) { 11 | CaffeFreeHost(cpu_ptr_); 12 | } 13 | 14 | #ifndef CPU_ONLY 15 | if (gpu_ptr_) { 16 | CUDA_CHECK(cudaFree(gpu_ptr_)); 17 | } 18 | #endif // CPU_ONLY 19 | } 20 | 21 | inline void SyncedMemory::to_cpu() { 22 | switch (head_) { 23 | case UNINITIALIZED: 24 | CaffeMallocHost(&cpu_ptr_, size_); 25 | caffe_memset(size_, 0, cpu_ptr_); 26 | head_ = HEAD_AT_CPU; 27 | own_cpu_data_ = true; 28 | break; 29 | case HEAD_AT_GPU: 30 | #ifndef CPU_ONLY 31 | if (cpu_ptr_ == NULL) { 32 | CaffeMallocHost(&cpu_ptr_, size_); 33 | own_cpu_data_ = true; 34 | } 35 | caffe_gpu_memcpy(size_, gpu_ptr_, cpu_ptr_); 36 | head_ = SYNCED; 37 | #else 38 | NO_GPU; 39 | #endif 40 | break; 41 | case HEAD_AT_CPU: 42 | case SYNCED: 43 | break; 44 | } 45 | } 46 | 47 | inline void SyncedMemory::to_gpu() { 48 | #ifndef CPU_ONLY 49 | switch (head_) { 50 | case UNINITIALIZED: 51 | CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); 52 | caffe_gpu_memset(size_, 0, gpu_ptr_); 53 | head_ = HEAD_AT_GPU; 54 | break; 55 | case HEAD_AT_CPU: 56 | if (gpu_ptr_ == NULL) { 57 | CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); 58 | } 59 | caffe_gpu_memcpy(size_, cpu_ptr_, gpu_ptr_); 60 | head_ = SYNCED; 61 | break; 62 | case HEAD_AT_GPU: 63 | case SYNCED: 64 | break; 65 | } 66 | #else 67 | NO_GPU; 68 | #endif 69 | } 70 | 71 | const void* SyncedMemory::cpu_data() { 72 | to_cpu(); 73 | return (const void*)cpu_ptr_; 74 | } 75 | 76 | void SyncedMemory::set_cpu_data(void* data) { 77 | CHECK(data); 78 | if (own_cpu_data_) { 79 | CaffeFreeHost(cpu_ptr_); 80 | } 81 | cpu_ptr_ = data; 82 | head_ = HEAD_AT_CPU; 83 | own_cpu_data_ = false; 84 | } 85 | 86 | const void* SyncedMemory::gpu_data() { 87 | #ifndef CPU_ONLY 88 | to_gpu(); 89 | return (const void*)gpu_ptr_; 90 | #else 91 | NO_GPU; 92 | #endif 93 | } 94 | 95 | void* SyncedMemory::mutable_cpu_data() { 96 | to_cpu(); 97 | head_ = HEAD_AT_CPU; 98 | return cpu_ptr_; 99 | } 100 | 101 | void* SyncedMemory::mutable_gpu_data() { 102 | #ifndef CPU_ONLY 103 | to_gpu(); 104 | head_ = HEAD_AT_GPU; 105 | return gpu_ptr_; 106 | #else 107 | NO_GPU; 108 | #endif 109 | } 110 | 111 | 112 | } // namespace caffe 113 | 114 | -------------------------------------------------------------------------------- /GPUMat/cublastest/GPUMat.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "cublas_v2.h" 5 | 6 | double vec_dot_vec(double *y, double *x, int n){ 7 | 8 | 9 | return 0; 10 | } 11 | 12 | void mat_add_mat(const double *x, double *y, double scalar, int n){ 13 | cudaError_t cudaStat ; // cudaMalloc status 14 | cublasStatus_t stat ; // CUBLAS functions status 15 | cublasHandle_t handle ; // CUBLAS context 16 | // on the device 17 | double *d_x; // d_x - x on the device 18 | double *d_y; // d_y - y on the device 19 | 20 | cudaStat = cudaMalloc (( void **)& d_x, n*sizeof(*x)); // device 21 | // memory alloc for x 22 | cudaStat = cudaMalloc (( void **)& d_y, n*sizeof(*y)); // device 23 | // memory alloc for y 24 | stat = cublasCreate (& handle ); // initialize CUBLAS context 25 | stat = cublasSetVector (n, sizeof (*x), x ,1 ,d_x, 1); // cp x- >d_x 26 | stat = cublasSetVector (n, sizeof (*y), y ,1 ,d_y, 1); // cp y- >d_y 27 | 28 | stat=cublasDaxpy(handle,n,&scalar,d_x,1,d_y,1); 29 | 30 | 31 | cudaFree (d_x ); // free device memory 32 | cudaFree (d_y ); // free device memory 33 | cublasDestroy ( handle ); // destroy CUBLAS context 34 | 35 | } 36 | 37 | 38 | void mat_prod_mat(const double* a, cublasOperation_t op_a, const double* b, cublasOperation_t op_b, double*c, int m, int n, int k){ 39 | 40 | cudaError_t cudaStat ; // cudaMalloc status 41 | cublasStatus_t stat ; // CUBLAS functions status 42 | cublasHandle_t handle ; // CUBLAS context 43 | 44 | // on the device 45 | double* d_a; // d_a - a on the device 46 | double* d_b; // d_b - b on the device 47 | double* d_c; // d_c - c on the device 48 | cudaStat = cudaMalloc((void **)&d_a ,m*k*sizeof(*a)); // device 49 | // memory alloc for a 50 | cudaStat = cudaMalloc((void **)&d_b ,k*n*sizeof(*b)); // device 51 | // memory alloc for b 52 | cudaStat = cudaMalloc((void **)&d_c ,m*n*sizeof(*c)); // device 53 | // memory alloc for c 54 | stat = cublasCreate(&handle); // initialize CUBLAS context 55 | // copy matrices from the host to the device 56 | stat = cublasSetMatrix (m,k, sizeof(*a) ,a,m,d_a ,m); //a -> d_a 57 | stat = cublasSetMatrix (k,n, sizeof(*b) ,b,k,d_b ,k); //b -> d_b 58 | stat = cublasSetMatrix (m,n, sizeof(*c) ,c,m,d_c ,m); //c -> d_c 59 | double al=1.0; 60 | double bet=1.0; 61 | // matrix - matrix multiplication : d_c = al*d_a *d_b + bet *d_c 62 | // d_a -mxk matrix , d_b -kxn matrix , d_c -mxn matrix ; 63 | // al ,bet -scalars 64 | stat=cublasDgemm(handle,op_a,op_b,m,n,k,&al,d_a,m,d_b,k,&bet,d_c,m); 65 | 66 | stat = cublasGetMatrix (m, n, sizeof(*c) ,d_c ,m,c,m); // cp d_c - >c 67 | 68 | cudaFree (d_a ); // free device memory 69 | cudaFree (d_b ); // free device memory 70 | cudaFree (d_c ); // free device memory 71 | cublasDestroy ( handle ); // destroy CUBLAS context 72 | 73 | } 74 | -------------------------------------------------------------------------------- /GPUMat/cublastest/GPUMat.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "cublas_v2.h" 5 | 6 | 7 | 8 | namespace NeuralNet{ 9 | namespace GPUMat{ 10 | 11 | // multiply the vector d_x by the scalar al and add to d_y 12 | // d_y = al*d_x + d_y , d_x ,d_y - n- vectors ; al - scalar 13 | // void vec_add_vec(double *y, double *x, double scalar, int n); 14 | double vec_dot_vec(double *y, double *x, int n); 15 | //3.3.2 16 | void mat_prod_vec(double *mat, int mat_m, int mat_n, cublasOperation_t op, double *vec, double *result); 17 | 18 | 19 | void mat_prod_mat(double* a, cublasOperation_t op_a, double* b, cublasOperation_t op_b, double*c, int m, int n, int k); 20 | 21 | 22 | void mat_add_mat(double* y, double* x, double scalar, int n); 23 | 24 | void mat_elem_prod_mat(); 25 | 26 | double mat_norm2(double *x, int n); 27 | 28 | 29 | 30 | // template 31 | // transform(double *x, Func func, int n); 32 | 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /GPUMat/cublastest/Makefile: -------------------------------------------------------------------------------- 1 | CPP = nvcc 2 | CXXFLAGS = --std=c++11 -I/home/yuguangyang/Downloads/armadillo-5.100.2/include -DARMA_DONT_USE_WRAPPER 3 | #nvcc mmul_1.cu -lcublas -lcurand -o mmul_1 4 | LINKFLAGS = -lcublas -lcurand -lblas -llapack 5 | 6 | OBJ = main.o 7 | 8 | all: test 9 | 10 | test : $(OBJ) 11 | $(CPP) -o $@ $(OBJ) $(LINKFLAGS) 12 | 13 | main.o : main.cpp 14 | $(CPP) -c $(CXXFLAGS) $@ $^ 15 | 16 | GPUMat.o : GPUMat.cpp 17 | $(CPP) -c $@ $^ 18 | 19 | #%.o : %.cpp 20 | # $(CPP) -c $(CXXFLAGS) 21 | 22 | 23 | clean: 24 | rm -f *.o *~ 25 | -------------------------------------------------------------------------------- /GPUMat/cublastest/armatest/armatest.sdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/GPUMat/cublastest/armatest/armatest.sdf -------------------------------------------------------------------------------- /GPUMat/cublastest/armatest/armatest.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2013 4 | VisualStudioVersion = 12.0.31101.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "armatest", "armatest\armatest.vcxproj", "{19FAFC51-3297-45A1-86A9-E5C33A38ECEA}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Debug|x64 = Debug|x64 12 | Release|Win32 = Release|Win32 13 | Release|x64 = Release|x64 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Debug|Win32.ActiveCfg = Release|Win32 17 | {19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Debug|Win32.Build.0 = Release|Win32 18 | {19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Debug|x64.ActiveCfg = Debug|x64 19 | {19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Debug|x64.Build.0 = Debug|x64 20 | {19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Release|Win32.ActiveCfg = Release|Win32 21 | {19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Release|Win32.Build.0 = Release|Win32 22 | {19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Release|x64.ActiveCfg = Release|x64 23 | {19FAFC51-3297-45A1-86A9-E5C33A38ECEA}.Release|x64.Build.0 = Release|x64 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /GPUMat/cublastest/armatest/armatest/armatest.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | -------------------------------------------------------------------------------- /GPUMat/cublastest/armatest/armatest/armatest.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | PATH=C:\Users\yuguangyang\Downloads\armadillo-5.300.4\armadillo-5.300.4\examples\lib_win64;%PATH% 5 | WindowsLocalDebugger 6 | 7 | -------------------------------------------------------------------------------- /GPUMat/cublastest/armatest/x64/Debug/armatest.ilk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/GPUMat/cublastest/armatest/x64/Debug/armatest.ilk -------------------------------------------------------------------------------- /GPUMat/cublastest/cublastest/cublastest.sdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/GPUMat/cublastest/cublastest/cublastest.sdf -------------------------------------------------------------------------------- /GPUMat/cublastest/cublastest/cublastest.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2013 4 | VisualStudioVersion = 12.0.31101.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cublastest", "cublastest\cublastest.vcxproj", "{CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Debug|x64 = Debug|x64 12 | Release|Win32 = Release|Win32 13 | Release|x64 = Release|x64 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Debug|Win32.ActiveCfg = Debug|Win32 17 | {CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Debug|Win32.Build.0 = Debug|Win32 18 | {CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Debug|x64.ActiveCfg = Debug|x64 19 | {CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Debug|x64.Build.0 = Debug|x64 20 | {CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Release|Win32.ActiveCfg = Release|Win32 21 | {CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Release|Win32.Build.0 = Release|Win32 22 | {CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Release|x64.ActiveCfg = Release|x64 23 | {CD921E7B-5BDA-4DB0-909E-6A8D3E3FD517}.Release|x64.Build.0 = Release|x64 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /GPUMat/cublastest/cublastest/x64/Debug/cublastest.ilk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/GPUMat/cublastest/cublastest/x64/Debug/cublastest.ilk -------------------------------------------------------------------------------- /GPUMat/cublastest/main.cpp: -------------------------------------------------------------------------------- 1 | //#include 2 | #include 3 | #include 4 | #include "cublas_v2.h" 5 | #include 6 | //#include "GPUMat.h" 7 | #define N 10 8 | void mat_prod_mat(const double* a, cublasOperation_t op_a, const double* b, cublasOperation_t op_b, double*c, int m, int n, int k); 9 | 10 | 11 | 12 | 13 | 14 | int main(){ 15 | 16 | #if 0 17 | // arma::mat a; 18 | arma::mat a(N, N, arma::fill::randu); 19 | arma::mat b(N, N, arma::fill::randu); 20 | arma::mat c = a * b; 21 | 22 | c.save("armaresult.txt",arma::raw_ascii); 23 | 24 | #endif 25 | double *a; 26 | double *b, *c; 27 | a = (double *)malloc(N*N*sizeof(double)); 28 | b = (double *)malloc(N*N*sizeof(double)); 29 | c = (double *)malloc(N*N*sizeof(double)); 30 | 31 | for (int i = 0; i < N*N; i++){ 32 | a[i] = 1.0* i / (N*N); 33 | b[i] = -a[i]; 34 | } 35 | 36 | #if 1 37 | mat_prod_mat(a, CUBLAS_OP_N, b, CUBLAS_OP_N, c, N, N, N); 38 | for (int i = 0; i < N*N; i++){ 39 | std::cout << c[i] << std::endl; 40 | } 41 | 42 | // c.save("gpuresult.txt", arma::raw_ascii); 43 | // double *aa = nullptr; 44 | // double *bb = nullptr; 45 | // double *cc = nullptr; 46 | 47 | // std::swap(aa,a.memptr()); 48 | // std::swap(bb,b.memptr()); 49 | // std::swap(cc,c.memptr()); 50 | #endif 51 | return 0; 52 | } 53 | 54 | void mat_prod_mat(const double* a, cublasOperation_t op_a, const double* b, cublasOperation_t op_b, double*c, int m, int n, int k){ 55 | 56 | cudaError_t cudaStat ; // cudaMalloc status 57 | cublasStatus_t stat ; // CUBLAS functions status 58 | cublasHandle_t handle ; // CUBLAS context 59 | 60 | // on the device 61 | double* d_a; // d_a - a on the device 62 | double* d_b; // d_b - b on the device 63 | double* d_c; // d_c - c on the device 64 | cudaStat = cudaMalloc((void **)&d_a ,m*k*sizeof(*a)); // device 65 | // memory alloc for a 66 | cudaStat = cudaMalloc((void **)&d_b ,k*n*sizeof(*b)); // device 67 | // memory alloc for b 68 | cudaStat = cudaMalloc((void **)&d_c ,m*n*sizeof(*c)); // device 69 | // memory alloc for c 70 | stat = cublasCreate(&handle); // initialize CUBLAS context 71 | // copy matrices from the host to the device 72 | stat = cublasSetMatrix (m,k, sizeof(*a) ,a,m,d_a ,m); //a -> d_a 73 | stat = cublasSetMatrix (k,n, sizeof(*b) ,b,k,d_b ,k); //b -> d_b 74 | stat = cublasSetMatrix (m,n, sizeof(*c) ,c,m,d_c ,m); //c -> d_c 75 | double al=1.0; 76 | double bet=0.0; 77 | // matrix - matrix multiplication : d_c = al*d_a *d_b + bet *d_c 78 | // d_a -mxk matrix , d_b -kxn matrix , d_c -mxn matrix ; 79 | // al ,bet -scalars 80 | stat=cublasDgemm(handle,op_a,op_b,m,n,k,&al,d_a,m,d_b,k,&bet,d_c,m); 81 | 82 | stat = cublasGetMatrix (m, n, sizeof(*c) ,d_c ,m,c,m); // cp d_c - >c 83 | 84 | cudaFree (d_a ); // free device memory 85 | cudaFree (d_b ); // free device memory 86 | cudaFree (d_c ); // free device memory 87 | cublasDestroy ( handle ); // destroy CUBLAS context 88 | 89 | } 90 | -------------------------------------------------------------------------------- /GPUMat/cublastest/main.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "GPUMat.h" 3 | 4 | 5 | #define N 10 6 | using namespace NeuralNet; 7 | 8 | 9 | int main(){ 10 | 11 | // arma::mat a; 12 | arma::mat a(N, N, arma::fill::randu); 13 | arma::mat b(N, N, arma::fill::randu); 14 | arma::mat c = a * b; 15 | 16 | c.save("armaresult.txt",arma::raw_ascii); 17 | 18 | 19 | 20 | #if 0 21 | GPUMat::mat_prod_mat(a.memptr(), CUBLAS_OP_N, b.memptr(), CUBLAS_OP_N, c.memptr(), N, N, N); 22 | 23 | 24 | c.save("gpuresult.txt", arma::raw_ascii); 25 | double *aa = nullptr; 26 | double *bb = nullptr; 27 | double *cc = nullptr; 28 | 29 | std::swap(aa,a.memptr()); 30 | std::swap(bb,b.memptr()); 31 | std::swap(cc,c.memptr()); 32 | #endif 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /GPUMat/cublastest/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/GPUMat/cublastest/test -------------------------------------------------------------------------------- /GPUMat/device_common.cpp: -------------------------------------------------------------------------------- 1 | #include "device_common.h" 2 | 3 | std::shared_ptr GPUEnv::singleton_; 4 | 5 | GPUEnv::GPUEnv() 6 | : cublas_handle_(NULL), curand_generator_(NULL){ 7 | // Try to create a cublas handler, and report an error if failed (but we will 8 | // keep the program running as one might just want to run CPU code). 9 | if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) { 10 | std::cerr << "Cannot create Cublas handle. Cublas won't be available."; 11 | } 12 | #if 0 13 | // Try to create a curand handler. 14 | if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT) 15 | != CURAND_STATUS_SUCCESS || 16 | curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen()) 17 | != CURAND_STATUS_SUCCESS) { 18 | std::cerr << "Cannot create Curand generator. Curand won't be available."; 19 | } 20 | #endif 21 | } 22 | 23 | GPUEnv::~GPUEnv() { 24 | if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_)); 25 | #if 0 26 | if (curand_generator_) { 27 | CURAND_CHECK(curandDestroyGenerator(curand_generator_)); 28 | } 29 | #endif 30 | } 31 | 32 | 33 | void GPUEnv::DeviceQuery() { 34 | cudaDeviceProp prop; 35 | int device; 36 | if (cudaSuccess != cudaGetDevice(&device)) { 37 | printf("No cuda device present.\n"); 38 | return; 39 | } 40 | CUDA_CHECK(cudaGetDeviceProperties(&prop, device)); 41 | ofstream os; 42 | os.open("GPU_info.log"); 43 | os << "Device id: " << device; 44 | os << "Major revision number: " << prop.major; 45 | os << "Minor revision number: " << prop.minor; 46 | os << "Name: " << prop.name; 47 | os << "Total global memory: " << prop.totalGlobalMem; 48 | os << "Total shared memory per block: " << prop.sharedMemPerBlock; 49 | os << "Total registers per block: " << prop.regsPerBlock; 50 | os << "Warp size: " << prop.warpSize; 51 | os << "Maximum memory pitch: " << prop.memPitch; 52 | os << "Maximum threads per block: " << prop.maxThreadsPerBlock; 53 | os << "Maximum dimension of block: " 54 | << prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", " 55 | << prop.maxThreadsDim[2]; 56 | os << "Maximum dimension of grid: " 57 | << prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", " 58 | << prop.maxGridSize[2]; 59 | os << "Clock rate: " << prop.clockRate; 60 | os << "Total constant memory: " << prop.totalConstMem; 61 | os << "Texture alignment: " << prop.textureAlignment; 62 | os << "Concurrent copy and execution: " 63 | << (prop.deviceOverlap ? "Yes" : "No"); 64 | os << "Number of multiprocessors: " << prop.multiProcessorCount; 65 | os << "Kernel execution timeout: " 66 | << (prop.kernelExecTimeoutEnabled ? "Yes" : "No"); 67 | return; 68 | } 69 | 70 | #if 0 71 | void GPUEnv::set_random_seed(const unsigned int seed) { 72 | // Curand seed 73 | static bool g_curand_availability_logged = false; 74 | if (Get().curand_generator_) { 75 | CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator(), 76 | seed)); 77 | CURAND_CHECK(curandSetGeneratorOffset(curand_generator(), 0)); 78 | } else { 79 | if (!g_curand_availability_logged) { 80 | std::cerr << 81 | "Curand not available. Skipping setting the curand seed."; 82 | g_curand_availability_logged = true; 83 | } 84 | } 85 | // RNG seed 86 | Get().random_generator_.reset(new RNG(seed)); 87 | } 88 | 89 | class GPUEnv::RNG::Generator { 90 | public: 91 | Generator() : rng_(new GPUEnv::rng_t(cluster_seedgen())) {} 92 | explicit Generator(unsigned int seed) : rng_(new GPUEnv::rng_t(seed)) {} 93 | GPUEnv::rng_t* rng() { return rng_.get(); } 94 | private: 95 | shared_ptr rng_; 96 | }; 97 | 98 | GPUEnv::RNG::RNG() : generator_(new Generator()) { } 99 | 100 | GPUEnv::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { } 101 | 102 | GPUEnv::RNG& GPUEnv::RNG::operator=(const RNG& other) { 103 | generator_.reset(other.generator_.get()); 104 | return *this; 105 | } 106 | 107 | void* GPUEnv::RNG::generator() { 108 | return static_cast(generator_->rng()); 109 | } 110 | #endif 111 | -------------------------------------------------------------------------------- /GPUMat/gpumat/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | gpumat 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.cdt.managedbuilder.core.genmakebuilder 10 | clean,full,incremental, 11 | 12 | 13 | 14 | 15 | org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder 16 | full,incremental, 17 | 18 | 19 | 20 | 21 | 22 | org.eclipse.cdt.core.cnature 23 | org.eclipse.cdt.core.ccnature 24 | org.eclipse.cdt.managedbuilder.core.managedBuildNature 25 | org.eclipse.cdt.managedbuilder.core.ScannerConfigNature 26 | 27 | 28 | -------------------------------------------------------------------------------- /GPUMat/main.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | int main(){ 4 | 5 | return 0; 6 | } -------------------------------------------------------------------------------- /GPUMat/test_GPUMat.cpp: -------------------------------------------------------------------------------- 1 | #include "device_common.h" 2 | #include "GPUMat.h" 3 | #include "gtest/gtest.h" 4 | 5 | TEST(GPUMATTest, selfAdd){ 6 | GPUMat g1(5,5); 7 | GPUMat g2(5,5); 8 | 9 | } 10 | 11 | int main(){ 12 | 13 | GPUEnv::GetInstance(); 14 | 15 | GPUMat g(5,5); 16 | 17 | g.ones(); 18 | g.print(); 19 | 20 | 21 | return 0; 22 | 23 | } 24 | -------------------------------------------------------------------------------- /LSTM/LSTMLayer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "BaseLayer.h" 7 | #include "ElementwiseLayer.h" 8 | #include "LinearAdditionLayer.h" 9 | #include "ActivationLayer.h" 10 | #include "BaseLayer_LSTM.h" 11 | 12 | namespace NeuralNet { 13 | 14 | class RNN_LSTM { 15 | 16 | public: 17 | RNN_LSTM(int numHiddenLayers0, int hiddenLayerInputDim0, 18 | int hiddenLayerOutputDim0, int inputDim0, int outputDim0, 19 | std::shared_ptr trainingX0, std::shared_ptr trainingY0); 20 | void forward(); 21 | void backward(); 22 | void train(); 23 | void savePara(std::string filename); // try to save all the parameters in the LSTM for further use 24 | void test(); 25 | void calNumericGrad(); 26 | BaseLayer_LSTM* getOutputLayer(){ return netOutputLayer;} 27 | private: 28 | std::vector inGateLayers, forgetGateLayers, outputGateLayers, informationLayers, cellStateLayers; 29 | std::vector outputElementLayers, forgetElementGateLayers, inputElementGateLayers; 30 | std::vector cellLinearAdditionLayers; 31 | std::vector cellStateActivationLayers; 32 | // std::vector layerOutput_prev, cellState_prev; 33 | BaseLayer_LSTM* netOutputLayer; 34 | std::shared_ptr trainingY, trainingX; 35 | int numHiddenLayers, hiddenLayerInputDim, hiddenLayerOutputDim; 36 | int rnnInputDim, rnnOutputDim; 37 | 38 | 39 | }; 40 | 41 | } 42 | 43 | 44 | -------------------------------------------------------------------------------- /LSTM/Makefile: -------------------------------------------------------------------------------- 1 | DEEPLEARNING_PATH=-L/home/yuguangyang/Dropbox/DeepLearningPackage/YangCopy/DeepLearning/src/lib 2 | DEEPLEARNING_INCLUDE=-I/home/yuguangyang/Dropbox/DeepLearningPackage/YangCopy/DeepLearning/include 3 | MKL_INCLUDE = /opt/intel/composerxe/mkl/include 4 | 5 | #CFLAGS = -std=c++0x -I/g/ssli/software/pkgs/boost_1_49_0 -c -DNDEBUG -D__LINUX $(OPTFLAGS) 6 | CPP = g++ 7 | ARMA_INCLUDE=-I/home/yuguangyang/Downloads/armadillo-5.100.2/include 8 | ARMA_LINKFLAGS=-L/usr/lib -llapack -lblas 9 | #MKLLINKFLAGS = -Wl,--start-group -Wl,--end-group -lpthread 10 | CXXFLAGS = -std=c++0x $(ARMA_INCLUDE) $(DEEPLEARNING_INCLUDE) -I/opt/boost/boost_1_57_0 -c -D__LINUX -DDEBUG -g3 -DARMA_DONT_USE_WRAPPER -I/usr/local/include 11 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program -march=native 12 | #LINKFLAGS = -static $(LINKOPTFLAGS) $(MKLLINKFLAGS) -ldl 13 | LINK_DL_FLAGS = $(DEEPLEARNING_PATH) -L/usr/local/lib $(ARMA_LINKFLAGS) -ldeeplearning -ldl #$(LINKOPTFLAGS) 14 | #LINKFLAGS = 15 | #ODIR=obj 16 | ODIR = 17 | 18 | OBJ = main.o LSTMLayer.o 19 | 20 | test : $(OBJ) 21 | $(CPP) -o test $(OBJ) $(LINK_DL_FLAGS) 22 | 23 | LSTMLayer.o: LSTMLayer.cpp 24 | $(CPP) -c $(CXXFLAGS) $< 25 | 26 | 27 | clean: 28 | rm -f *.o *~ 29 | -------------------------------------------------------------------------------- /LSTM/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/LSTM/test -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CPP = g++ 2 | ARMA_INCLUDE=-I/home/yuguangyang/Downloads/armadillo-5.100.2/include 3 | ARMA_LINKFLAGS=-llapack -lblas 4 | CXXFLAGS = -std=c++0x $(ARMA_INCLUDE) -I./include -I/usr/local/include -I/opt/boost/boost_1_57_0 -D__LINUX -DARMA_DONT_USE_WRAAPER 5 | DEBUGFLAG=-DDEBUG -g3 6 | RELEASEFLAG= -O3 -march=native -DARMA_NO_DEBUG 7 | CXXFLAGS += $(RELEASEFLAG) 8 | SRCS1 = $(wildcard src/*.cpp) 9 | OBJ1 = $(SRCS1:%.cpp=%.o) 10 | SRCS2 = $(wildcard src/*.cc) 11 | OBJ2 = $(SRCS2:%.cc=%.o) 12 | #SRCS3=$(wildcard src/*.c) 13 | #OBJ3 = $(SRCS3:.c=.o) 14 | OBJ = $(OBJ1) $(OBJ2) $(OBJ3) 15 | 16 | 17 | # Specify extensions of files to delete when cleaning 18 | CLEANEXTS = o a 19 | 20 | # Specify the target file and the install directory 21 | OUTPUTFILE = libdeeplearning.a 22 | INSTALLDIR = src/lib 23 | 24 | $(OUTPUTFILE) : $(OBJ) 25 | ar ru $@ $^ 26 | ranlib $@ 27 | 28 | %.o : src/%.cpp 29 | $(CPP) -c $(CXXFLAGS) $^ 30 | 31 | %.o : src/%.cc 32 | $(CPP) -c $(CXXFLAGS) $^ 33 | 34 | 35 | listfile: 36 | echo $(OBJ) 37 | 38 | clean: 39 | for file in $(CLEANEXTS); do rm -f src/*.$$file; done 40 | 41 | install: 42 | mkdir -p $(INSTALLDIR) 43 | cp -p $(OUTPUTFILE) $(INSTALLDIR) 44 | -------------------------------------------------------------------------------- /MatArray/Makefile: -------------------------------------------------------------------------------- 1 | # this make file now is dynamically linking 2 | #OPTFLAGS = -O3 -march=nocona -mfpmath=sse -msse3 -Wuninitialized -flto 3 | #MKL_INCLUDE = /opt/intel/mkl/include/ 4 | #MKL_INCLUDE = /opt/intel/composer_xe_2013.0.079/mkl/include/ 5 | MKL_INCLUDE = /opt/intel/composerxe/mkl/include 6 | 7 | #CFLAGS = -std=c++0x -I/g/ssli/software/pkgs/boost_1_49_0 -c -DNDEBUG -D__LINUX $(OPTFLAGS) 8 | CPP = g++ 9 | #MKLROOT = /opt/intel/mkl 10 | #MKLROOT = /opt/intel/composer_xe_2013.0.079/mkl 11 | MKLROOT = /opt/intel/composerxe/mkl 12 | MKLLINKFLAGS = -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread 13 | MKL_DL_LINKFLAGS = -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread -lm 14 | ARMA_INCLUDE=-I~/Downloads/armadillo-5.100.2/include/armadillo 15 | ARMA_LINKFLAGS=-L/usr/lib -L/opt/intel/mkl/lib/intel64 -larmadillo -lmkl_rt -llapack -lopenblas 16 | #MKLLINKFLAGS = -Wl,--start-group -Wl,--end-group -lpthread 17 | CXXFLAGS = -std=c++0x -I$(MKL_INCLUDE) $(ARMA_INCLUDE) -I/opt/boost/boost_1_57_0 -c -DDEBUG -D__LINUX -g3 18 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program 19 | #LINKFLAGS = -static $(LINKOPTFLAGS) $(MKLLINKFLAGS) -ldl 20 | LINK_DL_FLAGS = $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldl 21 | #LINKFLAGS = 22 | #ODIR=obj 23 | ODIR = 24 | 25 | OBJ = main.o 26 | 27 | test : $(OBJ) 28 | $(CPP) -o test $(OBJ) $(LINK_DL_FLAGS) 29 | 30 | #%.o : %.cpp 31 | # $(CPP) -c $(CXXFLAGS) 32 | 33 | 34 | clean: 35 | rm -f *.o *~ 36 | 37 | build-tests: 38 | 39 | -------------------------------------------------------------------------------- /MatArray/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "MatArray.h" 10 | 11 | 12 | 13 | int main(int argc, char *argv[]) { 14 | MatArray::Mat1DArray_ptr matArr = MatArray::build(5); 15 | 16 | for (int i = 0 ; i < 5; i++) { 17 | (*matArr)[i].randu(5,5); 18 | (*matArr)[i].print("1D"); 19 | } 20 | 21 | MatArray::Mat2DArray_ptr mat2DArr = MatArray::build(2,2); 22 | 23 | for (int i = 0 ; i < 2; i++) { 24 | for (int j = 0; j < 2; j++) { 25 | (*mat2DArr)[i][j].randu(5,5); 26 | (*mat2DArr)[i][j].print("2D"); 27 | } 28 | } 29 | 30 | // here I try to test Tensor_4D 31 | Tensor_4D tensor(2,3,4,5); 32 | 33 | assert(2==tensor.dim1()); 34 | assert(3==tensor.dim2()); 35 | assert(4==tensor.dim3()); 36 | assert(5==tensor.dim4()); 37 | assert(120==tensor.size()); 38 | 39 | tensor.fill_randn(); 40 | tensor.print(); 41 | tensor.fill_zeros(); 42 | tensor.print(); 43 | 44 | arma::vec v(20,arma::fill::randn); 45 | Tensor_4D tensor2(v.memptr(), 20, 1,1,4,5); 46 | assert(1==tensor2.dim1()); 47 | assert(1==tensor2.dim2()); 48 | assert(4==tensor2.dim3()); 49 | assert(5==tensor2.dim4()); 50 | assert(20==tensor2.size()); 51 | 52 | v.print("arma::v"); 53 | tensor2.print(); 54 | 55 | Tensor_4D tensor3(v.memptr(), 20, 1,1,4,5,true); 56 | tensor3.fill_zeros(); 57 | v.print("arma::v"); 58 | 59 | tensor2.substract(tensor3,1.0); 60 | tensor2.print(); 61 | 62 | tensor3.substract(tensor2,1.0); 63 | tensor3.print(); 64 | 65 | Tensor_4D t4(1,2,3,4); 66 | int count = 0; 67 | for (int i = 0; i < t4.dim4(); i++){ 68 | for (int j = 0; j < t4.dim3(); j++){ 69 | for (int k = 0; k < t4.dim2(); k++){ 70 | for (int m = 0; m < t4.dim1(); m++){ 71 | t4(m,k,j,i) = count++; 72 | } 73 | } 74 | 75 | } 76 | } 77 | 78 | 79 | arma::vec v2(t4.getPtr(),t4.size()); 80 | 81 | 82 | t4.print(); 83 | 84 | for (int i= 0; i < t4.size(); i++) 85 | t4(i) -= i; 86 | 87 | t4.print(); 88 | 89 | v2.print("arma::v2"); 90 | 91 | 92 | Tensor_4D t5(2,2,2,2); 93 | 94 | t5.fill_randu(); 95 | 96 | 97 | t5.print(); 98 | 99 | t5.transform([](double val){return val-0.5;}); 100 | 101 | t5.print(); 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | } 110 | 111 | -------------------------------------------------------------------------------- /PoolLayer/Makefile: -------------------------------------------------------------------------------- 1 | # this make file now is dynamically linking 2 | #OPTFLAGS = -O3 -march=nocona -mfpmath=sse -msse3 -Wuninitialized -flto 3 | #MKL_INCLUDE = /opt/intel/mkl/include/ 4 | #MKL_INCLUDE = /opt/intel/composer_xe_2013.0.079/mkl/include/ 5 | MKL_INCLUDE = /opt/intel/composerxe/mkl/include 6 | 7 | #CFLAGS = -std=c++0x -I/g/ssli/software/pkgs/boost_1_49_0 -c -DNDEBUG -D__LINUX $(OPTFLAGS) 8 | CPP = g++ 9 | #MKLROOT = /opt/intel/mkl 10 | #MKLROOT = /opt/intel/composer_xe_2013.0.079/mkl 11 | MKLROOT = /opt/intel/composerxe/mkl 12 | MKLLINKFLAGS = -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread 13 | MKL_DL_LINKFLAGS = -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread -lm 14 | ARMA_INCLUDE=-I~/Downloads/armadillo-5.100.2/include/armadillo 15 | ARMA_LINKFLAGS=-L/usr/lib -L/opt/intel/mkl/lib/intel64 -larmadillo -lmkl_rt -llapack -lopenblas 16 | #MKLLINKFLAGS = -Wl,--start-group -Wl,--end-group -lpthread 17 | CXXFLAGS = -std=c++0x -I$(MKL_INCLUDE) $(ARMA_INCLUDE) -I/opt/boost/boost_1_57_0 -c -DDEBUG -D__LINUX -g3 18 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program 19 | #LINKFLAGS = -static $(LINKOPTFLAGS) $(MKLLINKFLAGS) -ldl 20 | LINK_DL_FLAGS = $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldl 21 | #LINKFLAGS = 22 | #ODIR=obj 23 | ODIR = 24 | 25 | OBJ = main.o PoolLayer.o 26 | 27 | test : $(OBJ) 28 | $(CPP) -o test $(OBJ) $(LINK_DL_FLAGS) 29 | 30 | #%.o : %.cpp 31 | # $(CPP) -c $(CXXFLAGS) 32 | 33 | 34 | clean: 35 | rm -f *.o *~ 36 | -------------------------------------------------------------------------------- /PoolLayer/PoolLayer.cpp: -------------------------------------------------------------------------------- 1 | #include "PoolLayer.h" 2 | using namespace NeuralNet; 3 | 4 | PoolLayer::PoolLayer(int poolDim0_x, int poolDim0_y, Type type0) { 5 | poolDim_x = poolDim0_x; 6 | poolDim_y = poolDim0_y; 7 | type = type0; 8 | } 9 | 10 | void PoolLayer::setInputDim(int inputDim0_x, int inputDim0_y, int inputDim0_z){ 11 | 12 | inputDim_x = inputDim0_x; 13 | inputDim_y = inputDim0_y; 14 | inputDim_z = inputDim0_z; 15 | inputSize = inputDim_x * inputDim_y * inputDim_z; 16 | outputDim_x = inputDim0_x / poolDim_x; 17 | outputDim_y = inputDim0_y / poolDim_y; 18 | outputDim_z = inputDim0_z; 19 | outputSize = outputDim_x * outputDim_y * outputDim_z; 20 | } 21 | 22 | void PoolLayer::activateUp(std::shared_ptr input0) { 23 | input = input0; 24 | int maxIdx1, maxIdx2; 25 | int inputInstance = input->n_slices / inputDim_z; 26 | output = std::make_shared(outputDim_x,outputDim_y, outputDim_z*inputInstance,arma::fill::zeros); 27 | maxIdx_x = std::make_shared>(outputDim_x,outputDim_y, outputDim_z*inputInstance); 28 | maxIdx_y = std::make_shared>(outputDim_x,outputDim_y, outputDim_z*inputInstance); 29 | 30 | if (type == mean) { 31 | for (int d = 0; d < outputDim_z * inputInstance; d++) { 32 | for (int i = 0; i < outputDim_x; i++) { 33 | for (int j = 0; j < outputDim_y; j++) { 34 | for (int m = i * poolDim_x; m < (i + 1) * poolDim_x; m++) { 35 | for (int n = j * poolDim_y; n < (j + 1) * poolDim_y; n++) { 36 | (*output)(i,j,d) += (*input)(m,n,d); 37 | } 38 | } 39 | } 40 | } 41 | (*output).slice(d) /= (1.0 * poolDim_x * poolDim_y); 42 | } 43 | } else if (type == max) { 44 | (*output).zeros(); 45 | for (int d = 0; d < outputDim_z * inputInstance; d++) { 46 | for (int i = 0; i < outputDim_x; i++) { 47 | for (int j = 0; j < outputDim_y; j++) { 48 | double maxtemp = 0.0; 49 | maxIdx1 = 0; 50 | maxIdx2 = 0; 51 | for (int m = i * poolDim_x; m < (i + 1) * poolDim_x; m++) { 52 | for (int n = j * poolDim_y; n < (j + 1) * poolDim_y; n++) { 53 | if (maxtemp < (*input)(m,n,d) ) { 54 | maxtemp = (*input)(m,n,d); 55 | maxIdx1 = m; 56 | maxIdx2 = n; 57 | } 58 | } 59 | } 60 | (*output)(i,j,d) = maxtemp; 61 | (*maxIdx_x)(i,j,d) = maxIdx1; 62 | (*maxIdx_y)(i,j,d) = maxIdx2; 63 | } 64 | } 65 | } 66 | } 67 | } 68 | 69 | void PoolLayer::upSampling(std::shared_ptr delta_in) { 70 | int inputInstance = delta_in->n_slices / inputDim_z; 71 | delta_out = std::make_shared(inputDim_x,inputDim_y, inputDim_z * inputInstance, arma::fill::zeros); 72 | if (type == mean) { 73 | for (int d = 0; d < inputDim_z * inputInstance; d++) { 74 | for (int i = 0; i < inputDim_x; i++) { 75 | for (int j = 0; j < inputDim_y; j++) { 76 | (*delta_out)(i,j,d) = (*delta_in)(i/poolDim_x,j/poolDim_y,d); 77 | } 78 | } 79 | } 80 | (*delta_out) /= (1.0 * poolDim_x * poolDim_y); 81 | } else if(type == max) { 82 | for (int d = 0; d < outputDim_z * inputInstance; d++) { 83 | for (int i = 0; i < outputDim_x; i++) { 84 | for (int j = 0; j < outputDim_y; j++) { 85 | (*delta_out)((*maxIdx_x)(i,j,d),(*maxIdx_y)(i,j,d),d) = (*delta_in)(i,j,d); 86 | } 87 | } 88 | } 89 | } 90 | 91 | } -------------------------------------------------------------------------------- /PoolLayer/PoolLayer.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../MatArray/MatArray.h" 4 | 5 | namespace NeuralNet{ 6 | 7 | struct PoolLayer { 8 | enum Type { mean, max}; 9 | PoolLayer() {} 10 | PoolLayer(int poolDim_x, int poolDim_y, Type type0); 11 | void setInputDim(int, int, int); 12 | void activateUp(std::shared_ptr input0); 13 | void upSampling(std::shared_ptr detla_in); 14 | std::shared_ptr input; 15 | std::shared_ptr output; 16 | std::shared_ptr> maxIdx_x, maxIdx_y; 17 | std::shared_ptr detla_in; 18 | std::shared_ptr delta_out; 19 | Type type; 20 | int poolDim_x, poolDim_y; 21 | int inputDim_x; 22 | int inputDim_y; 23 | int inputDim_z; 24 | int outputDim_x, outputDim_y, outputDim_z; 25 | int inputSize, outputSize; 26 | }; 27 | 28 | } -------------------------------------------------------------------------------- /PoolLayer/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "PoolLayer.h" 10 | #include "../MatArray/MatArray.h" 11 | 12 | void loadData_MNIST(std::shared_ptr X, 13 | std::shared_ptr Y); 14 | 15 | int main(int argc, char *argv[]) { 16 | std::shared_ptr DataX(new arma::mat); 17 | std::shared_ptr DataY(new arma::mat); 18 | std::shared_ptr trainDataX(new arma::mat); 19 | std::shared_ptr trainDataY(new arma::mat); 20 | std::shared_ptr testDataX(new arma::mat); 21 | std::shared_ptr testDataY(new arma::mat); 22 | std::shared_ptr ValidationDataX(new arma::mat); 23 | std::shared_ptr ValidationDataY(new arma::mat); 24 | 25 | loadData_MNIST(DataX,DataY); 26 | 27 | int ntrain = 2; 28 | int ntest = 100; 29 | // now I split data into train, test, and validation 30 | trainDataX = std::make_shared(DataX->rows(0,ntrain-1)); 31 | trainDataY = std::make_shared(DataY->rows(0,ntrain-1)); 32 | testDataX = std::make_shared(DataX->rows(ntrain,ntrain+ntest-1)); 33 | testDataY = std::make_shared(DataY->rows(ntrain,ntrain+ntest-1)); 34 | 35 | std::shared_ptr trainDataX2D(new arma::cube(28,28,ntrain)); 36 | MatArray::Mat1DArray_ptr trainDataX2D2 = MatArray::build(ntrain); 37 | 38 | for (int i = 0 ; i < ntrain; i++) { 39 | (*trainDataX2D2)[i].set_size(28,28); 40 | for(int j = 0; j < 28; j++) { 41 | for( int k = 0; k < 28; k++) { 42 | (*trainDataX2D)(j,k,i) = trainDataX->at(i,28*j+k); 43 | (*trainDataX2D2)[i](j,k) = trainDataX->at(i,28*j+k); 44 | } 45 | } 46 | (*trainDataX2D2)[i].print(); 47 | } 48 | 49 | trainDataX2D->save("cube.dat",arma::raw_ascii); 50 | DataX.reset(); 51 | DataY.reset(); 52 | 53 | PoolLayer pl(4,4, PoolLayer::mean, trainDataX2D); 54 | pl.activateUp(); 55 | // pl.outputX->save("outputcube_mean.dat", arma::raw_ascii); 56 | /* 57 | int inputDim = trainDataX->n_cols; 58 | int outputDim = trainDataY->n_cols; 59 | std::cout << inputDim << std::endl; 60 | std::cout << outputDim << std::endl; 61 | std::cout << trainDataX->n_rows << std::endl; 62 | std::cout << trainDataY->n_rows << std::endl; 63 | 64 | 65 | int numLayers = 2; 66 | std::vector dimensions; 67 | 68 | dimensions.push_back(784); 69 | dimensions.push_back(100); 70 | dimensions.push_back(50); 71 | 72 | bool trainFlag = true; 73 | bool testFlag = false; 74 | RBM::PreTrainPara trainingPara(1e-6, 10, 10, 0.1); 75 | trainingPara.print(); 76 | std::string filename = "pretrain"; 77 | std::shared_ptr trainDataXBin(new arma::umat(trainDataX->n_rows,trainDataX->n_cols)); 78 | *trainDataXBin = (*trainDataX) < 0.5; 79 | StackedRBM SRbm(numLayers, dimensions, trainDataXBin, trainingPara); 80 | 81 | if (trainFlag) { 82 | SRbm.preTrain(filename); 83 | } 84 | /* 85 | if (testFlag){ 86 | if (!trainFlag) rbm.loadTrainResult(filename); 87 | testDataX->save("testSample.dat",arma::raw_ascii); 88 | rbm.TestViaReconstruct(testDataX); 89 | } 90 | */ 91 | 92 | } 93 | 94 | 95 | void loadData_MNIST(std::shared_ptr X, 96 | std::shared_ptr Y) { 97 | 98 | std::string filename_base("../MNIST/data"); 99 | std::string filename; 100 | char tag[50]; 101 | char x; 102 | int count; 103 | int numFiles = 10; 104 | int featSize = 28*28; 105 | int labelSize = 10; 106 | int numSamples = 1000; 107 | X->set_size(numFiles*numSamples,featSize); 108 | Y->set_size(numFiles*numSamples,labelSize); 109 | Y->fill(0); 110 | 111 | 112 | for (int i = 0 ; i < numFiles ; i++) { 113 | sprintf(tag,"%d",i); 114 | filename=filename_base+(std::string)tag; 115 | std::cout << filename << std::endl; 116 | std::ifstream infile; 117 | infile.open(filename,std::ios::binary | std::ios::in); 118 | if (infile.is_open()) { 119 | 120 | for (int j = 0 ; j < numSamples ; j++) { 121 | 122 | for (int k =0 ; k 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | struct ProgramArgs { 11 | ProgramArgs(int argc, char *argv[]); 12 | void LoadFromFile(const string & argsFilename); 13 | void ParseArg(string argAndVal); 14 | int ntrain, ntest, saveFrequency, inputDim, hiddenDim,nEpoch; 15 | double learningRate, eps, momentum, miniBatchSize, learningRateDecay, dropOutRate, L2Decay; 16 | string dataPath; 17 | bool dropOutFlag; 18 | }; 19 | -------------------------------------------------------------------------------- /RBM/RBM.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "../Utils/Util.h" 7 | 8 | namespace NeuralNet{ 9 | 10 | class RBM { 11 | public: 12 | struct PreTrainPara { 13 | 14 | PreTrainPara(double eps0=1e-6, int NEpoch0 = 500, 15 | int miniBatchSize0 = 10, double alpha0 = 0.01, 16 | double momentum0 = 0.9, int saveFreq0 = 50, 17 | double learningRateDecay0 = 1.0, bool dropOutFlag0 = false, 18 | double dropOutRate0 = 0.3, double L2Decay0 = 0.0002): 19 | eps(eps0),NEpoch(NEpoch0), miniBatchSize(miniBatchSize0), 20 | alpha(alpha0), momentum(momentum0), saveFrequency(saveFreq0), 21 | learningRateDecay(learningRateDecay0), dropOutFlag(dropOutFlag0), 22 | dropOutRate(dropOutRate0), L2Decay(L2Decay0){} 23 | double eps; 24 | int NEpoch; 25 | int miniBatchSize; 26 | double alpha; 27 | double momentum; 28 | int saveFrequency; 29 | double learningRateDecay; 30 | bool dropOutFlag; 31 | double dropOutRate; 32 | double L2Decay; 33 | void print() const; 34 | }; 35 | 36 | 37 | RBM(int visibleDim, int hiddenDim, RBM::PreTrainPara preTrainPara0); 38 | RBM(int visibleDim, int hiddenDim, std::shared_ptr trainingX0, RBM::PreTrainPara preTrainPara0); 39 | void train(); 40 | void saveTrainResult(std::string filename); 41 | void loadTrainResult(std::string filename); 42 | void initializeWeight(); 43 | void propUp(std::shared_ptr); 44 | void reconstructVisible(); 45 | void reconstructHiddenProb(); 46 | double calReconstructError(std::shared_ptr inputX); 47 | double calEnergy(std::shared_ptr inputX) const; 48 | void TestViaReconstruct(std::shared_ptr testDataX); 49 | int inputDim; 50 | int outputDim; 51 | int numInstance; 52 | Random_Bernoulli *randomGen; 53 | std::shared_ptr inputX, W , outputY, H_reconstructProb, grad_W, grad_W_old; 54 | std::shared_ptr H,V, V_reconstruct; 55 | std::shared_ptr A, B, grad_B, grad_B_old, grad_A, grad_A_old; 56 | RBM::PreTrainPara trainingPara; 57 | 58 | }; 59 | 60 | } -------------------------------------------------------------------------------- /RBM/mainSDA.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "RBM.h" 9 | #include "ProgramArgs.h" 10 | 11 | using namespace NeuralNet; 12 | 13 | void loadData_MNIST(std::shared_ptr X, 14 | std::shared_ptr Y, const std::string); 15 | 16 | int main(int argc, char *argv[]) { 17 | std::shared_ptr DataX(new arma::mat); 18 | std::shared_ptr DataY(new arma::mat); 19 | std::shared_ptr trainDataX(new arma::mat); 20 | std::shared_ptr trainDataY(new arma::mat); 21 | std::shared_ptr testDataX(new arma::mat); 22 | std::shared_ptr testDataY(new arma::mat); 23 | std::shared_ptr ValidationDataX(new arma::mat); 24 | std::shared_ptr ValidationDataY(new arma::mat); 25 | 26 | ProgramArgs progArgs(argc, argv); 27 | 28 | loadData_MNIST(DataX,DataY, progArgs.dataPath); 29 | 30 | int ntrain = progArgs.ntrain; 31 | int ntest = progArgs.ntest; 32 | int hiddenDim = progArgs.hiddenDim; 33 | int inputDim = progArgs.inputDim; 34 | 35 | RBM::PreTrainPara trainingPara(progArgs.eps, progArgs.nEpoch, progArgs.miniBatchSize, 36 | progArgs.learningRate, progArgs.momentum, progArgs.saveFrequency, progArgs.learningRateDecay, 37 | progArgs.dropOutFlag, progArgs.dropOutRate); 38 | // now I split data into train, test, and validation 39 | trainDataX = std::make_shared(DataX->cols(0,ntrain-1)); 40 | trainDataY = std::make_shared(DataY->cols(0,ntrain-1)); 41 | testDataX = std::make_shared(DataX->cols(ntrain,ntrain+ntest-1)); 42 | testDataY = std::make_shared(DataY->cols(ntrain,ntrain+ntest-1)); 43 | 44 | DataX.reset(); 45 | DataY.reset(); 46 | 47 | 48 | 49 | std::cout << trainDataX->n_cols << std::endl; 50 | 51 | trainingPara.print(); 52 | 53 | bool trainFlag = true; 54 | bool testFlag = true; 55 | 56 | std::string filename = "pretrain_final"; 57 | std::shared_ptr trainDataXBin(new arma::umat(trainDataX->n_rows,trainDataX->n_cols)); 58 | *trainDataXBin = (*trainDataX) > 0.5; 59 | RBM rbm(inputDim, hiddenDim, trainDataXBin, trainingPara); 60 | 61 | if (trainFlag) { 62 | rbm.train(); 63 | rbm.saveTrainResult(filename); 64 | } 65 | 66 | if (testFlag) { 67 | if (!trainFlag) rbm.loadTrainResult(filename); 68 | testDataX->save("testSample.dat",arma::raw_ascii); 69 | rbm.TestViaReconstruct(testDataX); 70 | } 71 | } 72 | 73 | 74 | 75 | void loadData_MNIST(std::shared_ptr X, 76 | std::shared_ptr Y,const std::string filepath) { 77 | 78 | std::string filename_base(filepath); 79 | std::string filename; 80 | char tag[50]; 81 | char x; 82 | int count; 83 | int numFiles = 10; 84 | int featSize = 28*28; 85 | int labelSize = 10; 86 | int numSamples = 1000; 87 | X->set_size(featSize,numFiles*numSamples); 88 | Y->set_size(labelSize, numFiles*numSamples); 89 | Y->fill(0); 90 | 91 | 92 | for (int i = 0 ; i < numFiles ; i++) { 93 | sprintf(tag,"%d",i); 94 | filename=filename_base+(std::string)tag; 95 | std::cout << filename << std::endl; 96 | std::ifstream infile; 97 | infile.open(filename,std::ios::binary | std::ios::in); 98 | if (infile.is_open()) { 99 | 100 | for (int j = 0 ; j < numSamples ; j++) { 101 | 102 | for (int k =0 ; k 4.8 6 | #### Armadillo linear algebra library [Link] (http://arma.sourceforge.net/) 7 | #### Cuda toolkit [link] (https://developer.nvidia.com/cuda-toolkit) 8 | #### Boost [link] (http://www.boost.org/ ) 9 | #### Gtest [link] (https://code.google.com/p/googletest/) 10 | #### Google protocol buffer [link] (https://developers.google.com/protocol-buffers/) 11 | 12 | -------------------------------------------------------------------------------- /include/ActivationFunc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | 4 | 5 | namespace NeuralNet{ 6 | enum ActivationType {softmax, sigmoid, linear, tanh, ReLU}; 7 | 8 | inline ActivationType GetActivationType(DeepLearning::NeuralNetParameter_ActivationType type){ 9 | switch (type) { 10 | case DeepLearning::NeuralNetParameter_ActivationType_sigmoid: 11 | return sigmoid; 12 | break; 13 | case DeepLearning::NeuralNetParameter_ActivationType_linear: 14 | return linear; 15 | break; 16 | case DeepLearning::NeuralNetParameter_ActivationType_tanh: 17 | return tanh; 18 | break; 19 | case DeepLearning::NeuralNetParameter_ActivationType_softmax: 20 | return softmax; 21 | break; 22 | case DeepLearning::NeuralNetParameter_ActivationType_ReLU: 23 | return ReLU; 24 | break; 25 | default: 26 | std::cerr << "invalid activation type" << std::endl; 27 | exit(1); 28 | break; 29 | } 30 | } 31 | 32 | inline void ApplyActivation(std::shared_ptr output, ActivationType actType){ 33 | std::shared_ptr &p=output; 34 | arma::mat maxVal = arma::max(*p,0); 35 | arma::mat sumVal; 36 | switch(actType) { 37 | case softmax: 38 | for (int i = 0; i < p->n_cols; i++) { 39 | p->col(i) -= maxVal(i); 40 | } 41 | (*p).transform([](double val) { 42 | return exp(val); 43 | }); 44 | 45 | sumVal = arma::sum(*p, 0); 46 | for (int i = 0; i < p->n_cols; i++) { 47 | p->col(i) /= sumVal(i); 48 | } 49 | break; 50 | case sigmoid: 51 | (*p).transform([](double val) { 52 | return 1.0 / (1.0 + exp(-val)); 53 | }); 54 | break; 55 | case linear: 56 | break; 57 | case ReLU: 58 | p->transform([](double val) { 59 | return val > 0 ? val : 0; 60 | }); 61 | break; 62 | case tanh: 63 | p->transform([](double val){return std::tanh(val);}); 64 | break; 65 | default: 66 | std::cerr << "invalid activation type" << std::endl; 67 | break; 68 | } 69 | } 70 | inline void GetActivationGradient(std::shared_ptr in, std::shared_ptr out, ActivationType actType){ 71 | 72 | if (actType == softmax) { 73 | out->ones(in->n_rows,in->n_cols); 74 | } else if (actType == sigmoid ) { 75 | *out = (1 - (*in)) % (*in); 76 | } else if ( actType == tanh) { 77 | *out = (1 - (*in) % (*in)); 78 | } else if ( actType == linear) { 79 | out->ones(in->n_rows,in->n_cols); 80 | } else if(actType == ReLU){ 81 | *out = *in; 82 | out->transform([](double val) {return val > 0 ? 1.0: 0 ;}); 83 | } 84 | } 85 | 86 | } 87 | -------------------------------------------------------------------------------- /include/ActivationLayer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | 4 | namespace NeuralNet { 5 | 6 | struct ActivationLayer: public Layer_unitaryOp { 7 | 8 | ActivationLayer(ActivationType actType0) { 9 | actType = actType0; 10 | }; 11 | ActivationType actType; 12 | virtual void activateUp(); 13 | virtual void calGrad(std::shared_ptr delta_in); 14 | }; 15 | 16 | } 17 | -------------------------------------------------------------------------------- /include/BaseLayer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | namespace NeuralNet{ 4 | 5 | struct BaseLayer: public Layer_unitaryOp { 6 | BaseLayer() {} 7 | BaseLayer(int inputDim0, int outputDim0, ActivationType actType0, std::shared_ptr init_w = nullptr, 8 | std::shared_ptr init_B = nullptr, bool dropout = false, double dropr=0.3); 9 | /* save weights of the layers 10 | */ 11 | virtual void save(std::string filename = "BaseLayer"); 12 | virtual void load(std::string filename = "BaseLayer"); 13 | /* given the input matrix, perform 14 | outputY = sigma (W*input + B), sigma is the activation function 15 | */ 16 | virtual void activateUp(); 17 | void activateUp(std::shared_ptr input); 18 | /* 19 | given the error propogated from upper layers, update the W and B using gradient descent 20 | */ 21 | void updatePara(std::shared_ptr delta_in, double learningRate); 22 | /* 23 | calculate the gradient and propogate the error but not update W and B 24 | */ 25 | virtual void calGrad(std::shared_ptr delta_in); 26 | virtual void calGrad(std::shared_ptr delta_in, int t); 27 | void accumulateGrad(std::shared_ptr delta_in); 28 | virtual void accumulateGrad(std::shared_ptr delta_in, int t); 29 | void updatePara_accu(double learningRate); 30 | 31 | /* randomly initialize weight and bias*/ 32 | void initializeWeight(); 33 | 34 | int W_size, B_size, totalSize; 35 | /* weight and bias for this layer*/ 36 | std::shared_ptr W, B; 37 | std::shared_ptr grad_W, grad_W_accu, grad_B, grad_B_accu; 38 | /* the error propogated from lower layers*/ 39 | bool dropOutFlag; 40 | double dropOutRate; 41 | std::shared_ptr initializer_W, initializer_B; 42 | arma::mat dropOutMat; 43 | ActivationType actType; 44 | // extract out the specific input or output at time point t during backpropagation 45 | // to calculate the gradient 46 | std::shared_ptr getInputMemory(int t); 47 | std::shared_ptr getOutputMemory(int t); 48 | void clearAccuGrad(); 49 | 50 | Random_Bernoulli *randomGen; 51 | void vectoriseGrad(std::shared_ptr V); 52 | void deVectoriseWeight(std::shared_ptr V); 53 | void vectoriseWeight(std::shared_ptr V); 54 | void vectoriseGrad(double *ptr, size_t offset); 55 | void deVectoriseWeight(double *ptr, size_t offset); 56 | void vectoriseWeight(double *ptr, size_t offset); 57 | void fill_Bernoulli(double *, int size); 58 | 59 | 60 | }; 61 | 62 | } 63 | -------------------------------------------------------------------------------- /include/BaseModel.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | namespace ReinforcementLearning { 4 | 5 | typedef std::vector State; 6 | 7 | class BaseModel { 8 | public: 9 | virtual ~BaseModel(){} 10 | virtual void run(int action) = 0; 11 | virtual void run(int action, int steps){ 12 | for (int i = 0; i < steps; i++){ 13 | run(action); 14 | } 15 | }; 16 | virtual State getCurrState() { 17 | return currState; 18 | } 19 | virtual void createInitialState() = 0; 20 | virtual int getNumActions(){ return numActions;} 21 | virtual double getRewards() {} 22 | virtual bool terminate() {} 23 | protected: 24 | State currState, prevState; 25 | int numActions; 26 | int stateDim; 27 | }; 28 | 29 | struct Experience{ 30 | State oldState, newState; 31 | int action; 32 | double reward; 33 | Experience(State old0, State new0, int a0, double c0): 34 | oldState(old0),newState(new0), action(a0), reward(c0) 35 | {} 36 | }; 37 | 38 | } 39 | -------------------------------------------------------------------------------- /include/ElementMultiAddLayer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | namespace NeuralNet { 4 | 5 | struct ElementMultiAddLayer : public Layer_binaryOp { 6 | ElementMultiAddLayer(); 7 | virtual ~ElementMultiAddLayer(){} 8 | virtual void activateUp(); 9 | virtual void calGrad(std::shared_ptr delta_in); 10 | virtual void calGrad(std::shared_ptr delta_in, int t); 11 | void saveWeightMem(); 12 | std::shared_ptr W_one, W_two; 13 | std::shared_ptr grad_W_one, grad_W_two; 14 | std::vector> W_one_mem, W_two_mem; 15 | 16 | }; 17 | } 18 | -------------------------------------------------------------------------------- /include/ElementwiseLayer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | 4 | namespace NeuralNet { 5 | 6 | struct ElementwiseLayer: public Layer_binaryOp { 7 | 8 | ElementwiseLayer() { 9 | //we only need to assign memory to the output 10 | delta_outOne = std::make_shared(); 11 | delta_outTwo = std::make_shared(); 12 | output = std::make_shared(); 13 | }; 14 | virtual void activateUp(); 15 | virtual void calGrad(std::shared_ptr delta_in, int timePoint); 16 | virtual void calGrad(std::shared_ptr delta_in); 17 | }; 18 | 19 | } 20 | -------------------------------------------------------------------------------- /include/ElmanRL.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "RNN.h" 7 | #include "common.h" 8 | namespace NeuralNet { 9 | 10 | class ElmanRL: public RNN { 11 | 12 | public: 13 | ElmanRL(DeepLearning::NeuralNetParameter); 14 | virtual ~ElmanRL(){} 15 | 16 | // implementing methods required by Net interface 17 | virtual arma::mat forwardInTime(std::shared_ptr x); 18 | void backward(); 19 | }; 20 | } 21 | 22 | 23 | -------------------------------------------------------------------------------- /include/Globals.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | 14 | template 15 | void Swap(T a, T b) { 16 | T t = a; 17 | a = b; 18 | b = t; 19 | } 20 | 21 | #define ASSERT(TST) ( (TST) ? (void)0 : (std::cerr << __FILE__ "(" << __LINE__ << "): Assertion failed " #TST << std::endl,abort()) ) 22 | 23 | static const double INFTY = std::numeric_limits::infinity(); 24 | 25 | static const double NaN = std::numeric_limits::quiet_NaN(); 26 | 27 | static const double TOL = pow(std::numeric_limits::epsilon(), (double)1.0 / 3); 28 | 29 | static bool IsClose(double a, double b) { 30 | return abs(a - b) < TOL; 31 | } 32 | 33 | static bool IsNaN(double x) { return boost::math::isnan(x); } 34 | 35 | static bool IsInf(double x) { return boost::math::isinf(x); } 36 | 37 | static bool IsDangerous(double x) { return IsNaN(x) || IsInf(x); } 38 | 39 | static double LogSum(double x, double y) { 40 | double d = x - y; 41 | if (d < -30) return y; 42 | else if (d > 30) return x; 43 | else if (d > 0) return x + log(1.0 + exp(-d)); 44 | else return y + log(1.0 + exp(d)); 45 | } 46 | 47 | static double Logistic(double x) { 48 | if (x < -30) return 0; 49 | else if (x > 30) return 1; 50 | else return 1.0 / (1.0 + exp(-x)); 51 | } 52 | 53 | static double LogLoss(double x) { 54 | if (x < -30) return -x; 55 | else if (x > 30) return 0; 56 | else return log(1 + exp(-x)); 57 | } 58 | 59 | template 60 | void Serialize(const C & c, const string & filename) { 61 | ofstream outStream(filename, ios::out|ios::binary); 62 | if (!outStream.is_open()) { 63 | cout << "Couldn't open serialized file " << filename.c_str() << endl; 64 | exit(1); 65 | } 66 | 67 | c.Serialize(outStream); 68 | 69 | outStream.close(); 70 | } 71 | 72 | template 73 | void Deserialize(C & c, const string & filename) { 74 | ifstream inStream(filename, ios::in|ios::binary); 75 | if (!inStream.is_open()) { 76 | cout << "Couldn't open serialized file " << filename.c_str() << endl; 77 | exit(1); 78 | } 79 | 80 | c.Deserialize(inStream); 81 | 82 | inStream.close(); 83 | } 84 | -------------------------------------------------------------------------------- /include/Initializer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | namespace NeuralNet{ 6 | class Initializer{ 7 | public: 8 | virtual ~Initializer() {} 9 | virtual void applyInitialization(std::shared_ptr) = 0; 10 | }; 11 | 12 | class Initializer_normal: public Initializer{ 13 | public: 14 | Initializer_normal(double std0, double mean0):std(std0), mean(mean0){} 15 | virtual ~Initializer_normal(){} 16 | virtual void applyInitialization(std::shared_ptr m){ 17 | m->randn(); 18 | m->transform([&](double val){ return val*std + mean;}); 19 | } 20 | private: 21 | double std, mean; 22 | 23 | }; 24 | 25 | class Initializer_zero: public Initializer{ 26 | public: 27 | Initializer_zero(){} 28 | virtual ~Initializer_zero(){} 29 | virtual void applyInitialization(std::shared_ptr m){ 30 | m->zeros(); 31 | } 32 | }; 33 | 34 | class Initializer_identity: public Initializer{ 35 | public: 36 | Initializer_identity(){} 37 | virtual ~Initializer_identity(){} 38 | virtual void applyInitialization(std::shared_ptr m){ 39 | m->eye(); 40 | } 41 | }; 42 | class Initializer_glorot_uniform: public Initializer{ 43 | public: 44 | Initializer_glorot_uniform(){} 45 | virtual ~Initializer_glorot_uniform(){} 46 | virtual void applyInitialization(std::shared_ptr W){ 47 | int inputDim = W->n_cols; 48 | int outputDim = W->n_rows; 49 | W->randu(); 50 | (*W) -= 0.5; 51 | (*W) *=sqrt(6.0/(inputDim+outputDim)); 52 | } 53 | }; 54 | 55 | class InitializerBuilder{ 56 | public: 57 | inline static std::shared_ptr GetInitializer(const DeepLearning::NeuralNetInitializerParameter para){ 58 | switch (para.initializertype()) { 59 | case DeepLearning::NeuralNetInitializerParameter_InitializerType_normal: 60 | return std::shared_ptr(new Initializer_normal(para.normal_std(), para.normal_mean())); 61 | break; 62 | case DeepLearning::NeuralNetInitializerParameter_InitializerType_zero: 63 | return std::shared_ptr(new Initializer_zero); 64 | break; 65 | case DeepLearning::NeuralNetInitializerParameter_InitializerType_identity: 66 | return std::shared_ptr(new Initializer_identity); 67 | break; 68 | case DeepLearning::NeuralNetInitializerParameter_InitializerType_glorot_uniform: 69 | return std::shared_ptr(new Initializer_glorot_uniform); 70 | break; 71 | default: 72 | break; 73 | } 74 | } 75 | }; 76 | 77 | 78 | } 79 | 80 | 81 | -------------------------------------------------------------------------------- /include/Layer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | 4 | namespace NeuralNet{ 5 | 6 | struct Layer{ 7 | public: 8 | virtual ~Layer(){} 9 | Layer(){} 10 | Layer(int outputDim0):outputDim(outputDim0){} 11 | virtual void activateUp() = 0; 12 | virtual void calGrad(std::shared_ptr delta_in) = 0; 13 | virtual void calGrad(std::shared_ptr delta_in, int t) {} 14 | virtual void save(std::string filename){} 15 | virtual void load(std::string filename){} 16 | virtual void initializeWeight(){} 17 | virtual std::shared_ptr getOutput(){ return output;} 18 | int outputDim; 19 | std::shared_ptr output; 20 | std::vector> outputMem; 21 | virtual void saveOutputMemory(); 22 | }; 23 | 24 | inline void Layer::saveOutputMemory(){ 25 | outputMem.push_back(std::shared_ptr(new arma::mat(*output))); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /include/Layer_binaryOp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | 4 | namespace NeuralNet{ 5 | 6 | struct Layer_binaryOp : public Layer{ 7 | public: 8 | virtual ~Layer_binaryOp(){} 9 | Layer_binaryOp(){} 10 | Layer_binaryOp(int inputOneDim0, int inputTwoDim0, int outputDim0):Layer(outputDim0), inputOneDim(inputOneDim0), inputTwoDim(inputTwoDim0){} 11 | // save inputs at all time points during the LSTM forward pass 12 | virtual void saveInputMemory(); 13 | virtual void setInputOne(std::shared_ptr input0){ inputOne = input0;} 14 | virtual void setInputTwo(std::shared_ptr input0){ inputTwo = input0;} 15 | virtual std::shared_ptr getDelta_outOne() {return delta_outOne;} 16 | virtual std::shared_ptr getDelta_outTwo() {return delta_outTwo;} 17 | 18 | std::shared_ptr inputOne, inputTwo; 19 | std::shared_ptr delta_outOne, delta_outTwo; 20 | int inputOneDim, inputTwoDim; 21 | std::vector> inputOneMem, inputTwoMem; 22 | 23 | }; 24 | 25 | inline void Layer_binaryOp::saveInputMemory(){ 26 | inputOneMem.push_back(inputOne); 27 | inputTwoMem.push_back(inputTwo); 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /include/Layer_unitaryOp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | 4 | namespace NeuralNet{ 5 | 6 | struct Layer_unitaryOp : public Layer{ 7 | public: 8 | virtual ~Layer_unitaryOp(){} 9 | Layer_unitaryOp(){} 10 | Layer_unitaryOp(int inputDim0, int outputDim0):Layer(outputDim0), inputDim(inputDim0){} 11 | // save inputs at all time points during the LSTM forward pass 12 | virtual void saveInputMemory(); 13 | virtual void setInput(std::shared_ptr input0){ input = input0;} 14 | virtual std::shared_ptr getDelta_out() {return delta_out;} 15 | std::shared_ptr input; 16 | std::shared_ptr delta_out; 17 | int inputDim; 18 | std::vector> inputMem; 19 | 20 | }; 21 | 22 | inline void Layer_unitaryOp::saveInputMemory(){ 23 | inputMem.push_back(input); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /include/LinearAdditionLayer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | 4 | namespace NeuralNet { 5 | 6 | struct LinearAdditionLayer: public Layer_binaryOp { 7 | 8 | LinearAdditionLayer() { 9 | output = std::make_shared(); 10 | } 11 | virtual void activateUp(); 12 | virtual void calGrad(std::shared_ptr delta_in); 13 | 14 | 15 | }; 16 | 17 | } 18 | -------------------------------------------------------------------------------- /include/MultiAddLayer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | namespace NeuralNet { 4 | 5 | struct MultiAddLayer : public Layer_binaryOp { 6 | MultiAddLayer(){} 7 | MultiAddLayer(int in1, int in2, int out, ActivationType actType0, 8 | std::shared_ptr init_W_one, std::shared_ptr init_W_two, 9 | std::shared_ptr init_B); 10 | virtual ~MultiAddLayer(){} 11 | virtual void activateUp(); 12 | virtual void save(std::string filename = "MultiAddLayer"); 13 | virtual void load(std::string filename = "MultiAddLayer"); 14 | virtual void calGrad(std::shared_ptr delta_in); 15 | virtual void calGrad(std::shared_ptr delta_in, int t); 16 | virtual void initializeWeight(); 17 | void accumulateGrad(std::shared_ptr delta_in, int t); 18 | void clearAccuGrad(); 19 | ActivationType actType; 20 | std::shared_ptr W_one, W_two, B; 21 | std::shared_ptr grad_W_one, grad_W_two, grad_B; 22 | std::shared_ptr grad_W_one_accu, grad_W_two_accu, grad_B_accu; 23 | std::shared_ptr initializer_W_one, initializer_W_two, initializer_B; 24 | 25 | 26 | }; 27 | } 28 | -------------------------------------------------------------------------------- /include/MultiLayerPerceptron.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "common.h" 4 | #include "BaseLayer.h" 5 | #include "optimization.h" 6 | #include "DeepLearning.pb.h" 7 | #include "Net.h" 8 | namespace NeuralNet { 9 | 10 | class MultiLayerPerceptron : public Net { 11 | public: 12 | MultiLayerPerceptron(DeepLearning::NeuralNetParameter); 13 | virtual ~MultiLayerPerceptron() { 14 | } 15 | void train(); 16 | void initialize(); 17 | /* forward pass*/ 18 | void feedForward(std::shared_ptr); 19 | /* back propogate the error to update the parameters*/ 20 | void backProp(std::shared_ptr, double learningRate); 21 | void backProp(std::shared_ptr); 22 | void test(std::shared_ptr trainingX, std::shared_ptr trainingY); 23 | /* calculate the numerical gradient for testing*/ 24 | void calNumericGrad(std::shared_ptr trainingX, std::shared_ptr trainingY); 25 | void vectoriseGrad(arma::vec &grad); 26 | void deVectoriseWeight(arma::vec &x); 27 | void vectoriseWeight(arma::vec &x); 28 | void calLoss(std::shared_ptr delta); 29 | virtual void forward(); 30 | virtual void applyUpdates(std::vector>); 31 | virtual void calGradient(); 32 | virtual double getLoss(); 33 | virtual void save(std::string filename); 34 | virtual void load(std::string filename); 35 | virtual std::shared_ptr netOutput() { 36 | return netOutput_; 37 | } 38 | private: 39 | int numLayers; 40 | int numInstance; 41 | bool testGrad; 42 | double error; 43 | /**the collection of Base layers*/ 44 | std::vector layers; 45 | /* dimension parameters for each layer*/ 46 | std::vector dimensions; 47 | /* network output*/ 48 | std::shared_ptr netOutput_; 49 | int totalDim; 50 | 51 | }; 52 | 53 | class MLPTrainer : public Optimization::ObjectFunc { 54 | public: 55 | MLPTrainer(MultiLayerPerceptron &MLP); 56 | 57 | ~MLPTrainer() { 58 | } 59 | virtual double operator()(arma::vec &x, arma::vec &grad); 60 | // std::shared_ptr x_init; 61 | private: 62 | MultiLayerPerceptron &MLP; 63 | }; 64 | } 65 | -------------------------------------------------------------------------------- /include/Net.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | 4 | namespace NeuralNet{ 5 | class Net { 6 | public: 7 | virtual ~Net() { } 8 | virtual void applyUpdates(std::vector>) = 0; 9 | virtual std::vector> netGradients() { return netGradVector;} 10 | virtual void setTrainingSamples(std::shared_ptr X, std::shared_ptr Y){ trainingX = X; trainingY = Y;} 11 | virtual void calGradient() = 0; 12 | virtual double getLoss() = 0; 13 | virtual void save(std::string filename) = 0; 14 | virtual void load(std::string filename) = 0; 15 | virtual void forward() = 0; 16 | virtual std::shared_ptr netOutput() = 0; 17 | // the following are RNN specific 18 | virtual void resetNetState(){} 19 | virtual arma::mat forwardInTime(std::shared_ptr x){} 20 | //virtual std::shared_ptr netOutputAtTime(int time){return 0;} 21 | virtual void zeroTime(){} 22 | virtual void updateInternalState(){} 23 | virtual void resetWeight(){} 24 | protected: 25 | DeepLearning::NeuralNetParameter neuralNetPara; 26 | std::vector> netGradVector; 27 | std::shared_ptr trainingX, trainingY; 28 | }; 29 | 30 | } 31 | -------------------------------------------------------------------------------- /include/RNN.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "BaseLayer.h" 7 | #include "RecurrLayer.h" 8 | #include "common.h" 9 | namespace NeuralNet { 10 | 11 | class RNN: public Net { 12 | 13 | public: 14 | RNN(DeepLearning::NeuralNetParameter); 15 | virtual ~RNN(){} 16 | 17 | // implementing methods required by Net interface 18 | virtual void forward(); 19 | virtual void applyUpdates(std::vector>); 20 | virtual void calGradient(); 21 | virtual double getLoss(); 22 | virtual void save(std::string filename); 23 | virtual void load(std::string filename); 24 | virtual std::shared_ptr netOutput(); 25 | // virtual std::shared_ptr netOutputAtTime(int time); 26 | virtual arma::mat forwardInTime(std::shared_ptr x); 27 | virtual void resetNetState(); 28 | virtual void updateInternalState(); 29 | virtual void resetWeight(); 30 | virtual void zeroTime(); 31 | 32 | 33 | virtual void backward(); 34 | virtual void calNumericGrad(); 35 | virtual void saveLayerInputOutput(); 36 | virtual int getTime(); 37 | virtual void setTime(int t); 38 | virtual BaseLayer getOutputLayer(){return baseLayers[numBaseLayers - 1];} 39 | virtual std::vector getRecurrLayers(){ return recurrLayers;} 40 | protected: 41 | void fillNetGradVector(); 42 | std::shared_ptr netOutput_; 43 | std::vector recurrLayers; 44 | std::vector baseLayers; 45 | int numRecurrLayers, recurrLayerInputDim, recurrLayerOutputDim, numBaseLayers; 46 | int rnnInputDim, rnnOutputDim; 47 | int time; 48 | }; 49 | } 50 | 51 | 52 | -------------------------------------------------------------------------------- /include/RecurrLayer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | #include "MultiAddLayer.h" 4 | namespace NeuralNet { 5 | 6 | struct RecurrLayer : public MultiAddLayer { 7 | RecurrLayer(){} 8 | RecurrLayer(int in1, int in2, int out, ActivationType actType0, 9 | std::shared_ptr init_W_one, std::shared_ptr init_W_two, 10 | std::shared_ptr init_B); 11 | virtual ~RecurrLayer(){} 12 | void savePrevOutput(); 13 | void savePrevDeltaOutOne(); 14 | std::shared_ptr getPrevOutput(){ return output_prev;} 15 | std::shared_ptr getPrevDeltaOutOne() { return delta_outOne_prev;} 16 | std::shared_ptr output_prev; 17 | std::shared_ptr delta_outOne_prev; 18 | }; 19 | } 20 | -------------------------------------------------------------------------------- /include/Util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | namespace NeuralNet{ 7 | template 8 | struct Random_Bernoulli{ 9 | // std::random_device rd; 10 | std::mt19937 gen; 11 | std::bernoulli_distribution *d; 12 | 13 | Random_Bernoulli(double p){ 14 | d = new std::bernoulli_distribution(p); 15 | } 16 | 17 | double next(){ 18 | if((*d)(gen)) return 1.0; 19 | else return 0.0; 20 | } 21 | 22 | void modifier(T *p, int size){ 23 | for (int i = 0; i < size; i++){ 24 | // perform "drop" 25 | if((*d)(gen)) 26 | *(p+i) = (T)(0); 27 | } 28 | } 29 | }; 30 | 31 | class RandomStream{ 32 | private: 33 | std::shared_ptr genPtr; 34 | std::shared_ptr> randomPtr_unitformReal; 35 | std::shared_ptr> randomPtr_unitformInt; 36 | public: 37 | RandomStream(){ 38 | 39 | std::random_device rd; 40 | genPtr = std::make_shared(rd()); 41 | randomPtr_unitformReal = std::make_shared>(0.0, 1.0); 42 | } 43 | RandomStream(int low , int high){ 44 | 45 | std::random_device rd; 46 | genPtr = std::make_shared(rd()); 47 | 48 | randomPtr_unitformReal = std::make_shared>(0.0, 1.0); 49 | randomPtr_unitformInt = std::make_shared>(low, high); 50 | } 51 | double nextDou(){return (*randomPtr_unitformReal)(*genPtr);} 52 | int nextInt(){return (*randomPtr_unitformInt)(*genPtr);} 53 | }; 54 | 55 | void loadData_MNIST(std::shared_ptr X, std::shared_ptr Y, std::string filename); 56 | } 57 | 58 | 59 | -------------------------------------------------------------------------------- /include/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "io.h" 11 | #include "DeepLearning.pb.h" 12 | #include "Util.h" 13 | #include "Initializer.h" 14 | #include "ActivationFunc.h" 15 | #include "Layer.h" 16 | #include "Layer_unitaryOp.h" 17 | #include "Layer_binaryOp.h" 18 | #include "Net.h" 19 | #include 20 | -------------------------------------------------------------------------------- /include/io.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using google::protobuf::io::FileInputStream; 14 | using google::protobuf::io::FileOutputStream; 15 | using google::protobuf::io::ZeroCopyInputStream; 16 | using google::protobuf::io::CodedInputStream; 17 | using google::protobuf::io::ZeroCopyOutputStream; 18 | using google::protobuf::io::CodedOutputStream; 19 | using google::protobuf::Message; 20 | namespace DeepLearning{ 21 | inline bool ReadProtoFromTextFile(const char* filename, Message* proto) { 22 | int fd = open(filename, O_RDONLY); 23 | // CHECK_NE(fd, -1) << "File not found: " << filename; 24 | FileInputStream* input = new FileInputStream(fd); 25 | bool success = google::protobuf::TextFormat::Parse(input, proto); 26 | delete input; 27 | // close(fd); 28 | return success; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /include/optimization.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "Globals.h" 8 | 9 | namespace Optimization{ 10 | 11 | struct ObjectFunc{ 12 | ObjectFunc(int dim0 = 0):dim(dim0){} 13 | ~ObjectFunc(){} 14 | int dim; 15 | std::shared_ptr x_init; 16 | virtual double operator()(arma::vec &x, arma::vec &grad) = 0; 17 | }; 18 | 19 | class LBFGS{ 20 | // typedef double (* evaluateFunc)(const arma::vec x, arma::vec grad, const int n); 21 | public: 22 | enum LineSearch {Wolfe, Armijo, MoreThuente}; 23 | struct LBFGS_param{ 24 | int maxIter; 25 | int memoryLimit; 26 | int maxLineSearch; 27 | double maxStepSize; 28 | double minStepSize; 29 | int saveFrequency; 30 | std::string saveFileName; 31 | LBFGS_param(int, int, int, std::string);}; 32 | struct PointValueDeriv { 33 | double step, value, deriv; 34 | PointValueDeriv(double step0 = NaN, double value0 = NaN, double deriv0 = NaN) : 35 | step(step0), value(value0), deriv(deriv0) { } 36 | }; 37 | LBFGS(ObjectFunc &func, LBFGS_param param0, LineSearch method); 38 | void calDirection(); 39 | void updateParam(); 40 | void calStepLength_Armijo(); 41 | void calStepLength_Wolfe(); 42 | void calStepLength_MoreThuente(); 43 | bool converge(); 44 | void minimize(); 45 | void saveWeight(std::string str); 46 | double cubicInterp(const LBFGS::PointValueDeriv& p0, const LBFGS::PointValueDeriv& p1); 47 | ObjectFunc &calValGrad; 48 | LBFGS_param param; 49 | double maxIter; 50 | double step; 51 | double currValue; 52 | int memoryLimit; 53 | LineSearch lineSearchMethod; 54 | // s_{k-1} = x_k - x_{k-1} 55 | // y_{k-1} = (grad_k - grad_{k-1}) 56 | std::deque s_list, y_list; 57 | // rho_k =1.0 /(y_k^T * s_k) 58 | std::deque rho_list; 59 | std::vector alpha_list; 60 | arma::vec direction; 61 | arma::vec grad, x, x_init, x_new, grad_new; 62 | }; 63 | 64 | class SteepDescent{ 65 | public: 66 | struct SteepDescent_param{ 67 | SteepDescent_param(double eps0, double step0, int maxIter0): 68 | eps(eps0), step(step0), maxIter(maxIter0){} 69 | double eps; 70 | double step; 71 | int maxIter;}; 72 | SteepDescent(ObjectFunc &func, SteepDescent_param param0); 73 | void minimize(); 74 | private: 75 | // bool converged(); 76 | double eps; 77 | double step; 78 | int maxIter; 79 | arma::vec grad, grad_new, x, x_new; 80 | double currValue; 81 | SteepDescent_param param; 82 | ObjectFunc &calValGrad; 83 | 84 | }; 85 | 86 | 87 | 88 | 89 | } -------------------------------------------------------------------------------- /plotting/Driver.py: -------------------------------------------------------------------------------- 1 | from Util import tile_raster_images 2 | import numpy as np 3 | try: 4 | import PIL.Image as Image 5 | except ImportError: 6 | import Image 7 | 8 | 9 | data = np.genfromtxt("../RBM/reconstruct.dat") 10 | 11 | 12 | image = Image.fromarray(tile_raster_images(X=data, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) 13 | image.show() 14 | image.save('reconstruct.png') 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/ActivationLayer.cpp: -------------------------------------------------------------------------------- 1 | #include "ActivationLayer.h" 2 | 3 | namespace NeuralNet{ 4 | 5 | 6 | void ActivationLayer::activateUp(){ 7 | output = input; 8 | ApplyActivation(input, actType); 9 | }; 10 | 11 | void ActivationLayer::calGrad(std::shared_ptr delta_in){ 12 | delta_out = delta_in; 13 | GetActivationGradient(delta_in, delta_out, actType); 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /src/ElementMultiAddLayer.cpp: -------------------------------------------------------------------------------- 1 | #include "ElementMultiAddLayer.h" 2 | 3 | using namespace NeuralNet; 4 | 5 | ElementMultiAddLayer::ElementMultiAddLayer(){ 6 | grad_W_one = std::make_shared(); 7 | grad_W_two = std::make_shared(); 8 | delta_outOne = std::make_shared(); 9 | delta_outTwo = std::make_shared(); 10 | output = std::make_shared(); 11 | } 12 | 13 | void ElementMultiAddLayer::activateUp(){ 14 | *output = (*W_one) % (*inputOne) + (*W_two) % (*inputTwo); 15 | } 16 | 17 | 18 | void ElementMultiAddLayer::calGrad(std::shared_ptr delta_in){ 19 | 20 | *grad_W_one = (*inputOne); 21 | *grad_W_two = (*inputTwo); 22 | 23 | (*delta_outOne) = *W_one; 24 | (*delta_outTwo) = *W_two; 25 | } 26 | 27 | void ElementMultiAddLayer::calGrad(std::shared_ptr delta_in, int t){ 28 | 29 | grad_W_one = inputOneMem[t]; 30 | grad_W_two = inputTwoMem[t]; 31 | 32 | delta_outOne = W_one_mem[t]; 33 | delta_outTwo = W_two_mem[t]; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /src/ElementwiseLayer.cpp: -------------------------------------------------------------------------------- 1 | #include "ElementwiseLayer.h" 2 | 3 | namespace NeuralNet{ 4 | 5 | 6 | void ElementwiseLayer::activateUp(){ 7 | // elementwise product 8 | (*output) = (*inputOne) % (*inputTwo); 9 | }; 10 | 11 | void ElementwiseLayer::calGrad(std::shared_ptr delta_in){ 12 | (*delta_outOne) = (*inputTwo) % (*delta_in); 13 | (*delta_outTwo) = (*inputOne) % (*delta_in); 14 | 15 | } 16 | 17 | void ElementwiseLayer::calGrad(std::shared_ptr delta_in, int timePoint){ 18 | (*delta_outOne) = (*inputTwoMem[timePoint]) % (*delta_in); 19 | (*delta_outTwo) = (*inputOneMem[timePoint]) % (*delta_in); 20 | 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /src/ElmanRL.cpp: -------------------------------------------------------------------------------- 1 | #include "ElmanRL.h" 2 | 3 | using namespace NeuralNet; 4 | using namespace DeepLearning; 5 | 6 | ElmanRL::ElmanRL(NeuralNetParameter neuralNetPara0):RNN(neuralNetPara0){ 7 | } 8 | 9 | arma::mat ElmanRL::forwardInTime(std::shared_ptr input) { 10 | std::shared_ptr commonInput(new arma::mat); 11 | if (this->time == 0) { 12 | for (int l = 0; l < numRecurrLayers; l++) { 13 | (recurrLayers[l].getPrevOutput())->zeros(recurrLayerOutputDim, 1); 14 | recurrLayers[l].inputOneMem.clear(); 15 | recurrLayers[l].inputTwoMem.clear(); 16 | recurrLayers[l].outputMem.clear(); 17 | } 18 | for (int l = 0; l < numBaseLayers; l++) { 19 | baseLayers[l].inputMem.clear(); 20 | baseLayers[l].outputMem.clear(); 21 | } 22 | } 23 | for (int l = 0; l < numRecurrLayers; l++) { 24 | recurrLayers[l].inputOne = std::shared_ptr(new arma::mat(*(recurrLayers[l].getPrevOutput()))); 25 | 26 | if (l == 0) { 27 | 28 | recurrLayers[l].inputTwo = std::shared_ptr(new arma::mat(input->rows(0, rnnInputDim - 1))); 29 | } else { 30 | recurrLayers[l].inputTwo = std::shared_ptr(new arma::mat(*(recurrLayers[l - 1].output))); 31 | } 32 | recurrLayers[l].activateUp(); 33 | #if 0 34 | recurrLayers[l].W_one->print("W_one"); 35 | recurrLayers[l].W_two->print("W_two"); 36 | recurrLayers[l].B->print("B"); 37 | recurrLayers[l].inputOne->print("input one"); 38 | recurrLayers[l].inputTwo->print("input two"); 39 | recurrLayers[l].output->print("output"); 40 | #endif 41 | } 42 | 43 | for (int l = 0; l < numBaseLayers; l++) { 44 | if (l == 0) { 45 | arma::mat action(input->rows(rnnInputDim, input->n_rows - 1)); 46 | *commonInput = arma::join_cols(*(recurrLayers[numRecurrLayers-1].output), action); 47 | baseLayers[l].input = commonInput; 48 | } else { 49 | baseLayers[l].input = baseLayers[l - 1].output; 50 | } 51 | baseLayers[l].activateUp(); 52 | } 53 | return *(baseLayers[numBaseLayers - 1].output); 54 | } 55 | 56 | void ElmanRL::backward() { 57 | 58 | std::shared_ptr delta(new arma::mat); 59 | for (int l = 0; l < numRecurrLayers; l++) { 60 | recurrLayers[l].clearAccuGrad(); 61 | } 62 | for (int l = 0; l < numBaseLayers; l++) { 63 | baseLayers[l].clearAccuGrad(); 64 | } 65 | int T = trainingY->n_cols; 66 | for (int t = T - 1; t >= 0; t--){ 67 | // the top most layer from target - network's output 68 | *delta = *(baseLayers[numBaseLayers - 1].outputMem[t]) - trainingY->col(t); 69 | for (int l = numBaseLayers - 1; l >=0; l--) { 70 | baseLayers[l].accumulateGrad(delta, t); 71 | *delta = *(baseLayers[l].delta_out); 72 | } 73 | for (int l = numRecurrLayers - 1; l >= 0; l--){ 74 | // delta error from the same time, propagate from upper layer to lower layer 75 | if (l == numRecurrLayers - 1){ 76 | *delta = baseLayers[0].delta_out->rows(0, recurrLayerOutputDim - 1); 77 | }else{ 78 | *delta = *(recurrLayers[l+1].delta_outTwo); 79 | } 80 | 81 | if (t < T - 1) { 82 | *delta += *(recurrLayers[l].getPrevDeltaOutOne()); 83 | } 84 | // so far, the generated delta error is for the output h of each layer at each time 85 | recurrLayers[l].accumulateGrad(delta, t); 86 | recurrLayers[l].savePrevDeltaOutOne(); 87 | } 88 | } 89 | } 90 | 91 | -------------------------------------------------------------------------------- /src/LinearAdditionLayer.cpp: -------------------------------------------------------------------------------- 1 | #include "LinearAdditionLayer.h" 2 | 3 | namespace NeuralNet{ 4 | 5 | 6 | void LinearAdditionLayer::activateUp(){ 7 | (*output) = (*inputOne) + (*inputTwo); 8 | }; 9 | 10 | void LinearAdditionLayer::calGrad(std::shared_ptr delta_in){ 11 | delta_outOne = delta_in; 12 | delta_outTwo = delta_in; 13 | } 14 | 15 | } 16 | 17 | -------------------------------------------------------------------------------- /src/MultiAddLayer.cpp: -------------------------------------------------------------------------------- 1 | #include "MultiAddLayer.h" 2 | 3 | using namespace NeuralNet; 4 | 5 | MultiAddLayer::MultiAddLayer(int in1, int in2, int out, ActivationType actType0, std::shared_ptr init_W_one, std::shared_ptr init_W_two, 6 | std::shared_ptr init_B): 7 | Layer_binaryOp(in1,in2,out),actType(actType0), 8 | initializer_W_one(init_W_one), initializer_W_two(init_W_two), 9 | initializer_B(init_B){ 10 | initializeWeight(); 11 | grad_W_one = std::make_shared(); 12 | grad_W_two = std::make_shared(); 13 | grad_B = std::make_shared(); 14 | grad_W_one_accu = std::make_shared(outputDim,inputOneDim, arma::fill::zeros); 15 | grad_W_two_accu = std::make_shared(outputDim,inputTwoDim, arma::fill::zeros); 16 | grad_B_accu = std::make_shared(outputDim,1, arma::fill::zeros); 17 | delta_outOne = std::make_shared(); 18 | delta_outTwo = std::make_shared(); 19 | output = std::make_shared(); 20 | 21 | } 22 | 23 | void MultiAddLayer::activateUp(){ 24 | *output = (*W_one) * (*inputOne) + (*W_two) * (*inputTwo); 25 | for (int i = 0; i < output->n_cols; i++) output->col(i) += *B; 26 | ApplyActivation(output, actType); 27 | } 28 | 29 | void MultiAddLayer::initializeWeight(){ 30 | 31 | W_one = std::make_shared(outputDim, inputOneDim); 32 | W_two = std::make_shared(outputDim, inputTwoDim); 33 | B = std::make_shared(outputDim, 1); 34 | 35 | if (initializer_W_one == nullptr || initializer_W_two == nullptr ||initializer_B == nullptr) { 36 | std::cerr << "initializer is null!" << std::endl; 37 | exit(1); 38 | } else { 39 | initializer_W_one->applyInitialization(W_one); 40 | initializer_W_two->applyInitialization(W_two); 41 | initializer_B->applyInitialization(B); 42 | } 43 | } 44 | 45 | void MultiAddLayer::calGrad(std::shared_ptr delta_in){ 46 | //for delta: each column is the delta of a sample 47 | std::shared_ptr deriv(new arma::mat); 48 | GetActivationGradient(output, deriv, this->actType); 49 | arma::mat delta; 50 | 51 | delta = (*delta_in) % (*deriv); 52 | *grad_B = arma::sum(delta,1); 53 | *grad_W_one = delta * (*inputOne).st(); 54 | *grad_W_two = delta * (*inputTwo).st(); 55 | 56 | (*delta_outOne) = W_one->st() * (delta); 57 | (*delta_outTwo) = W_two->st() * (delta); 58 | } 59 | 60 | void MultiAddLayer::calGrad(std::shared_ptr delta_in, int t){ 61 | std::shared_ptr deriv(new arma::mat); 62 | GetActivationGradient(outputMem[t], deriv, this->actType); 63 | arma::mat delta; 64 | 65 | delta = (*delta_in) % (*deriv); 66 | *grad_B = arma::sum(delta,1); 67 | *grad_W_one = delta * (*inputOneMem[t]).st(); 68 | *grad_W_two = delta * (*inputTwoMem[t]).st(); 69 | 70 | (*delta_outOne) = W_one->st() * (delta); 71 | (*delta_outTwo) = W_two->st() * (delta); 72 | } 73 | 74 | void MultiAddLayer::save(std::string filename) { 75 | W_one->save(filename+"_W_one.dat",arma::raw_ascii); 76 | W_two->save(filename+"_W_two.dat",arma::raw_ascii); 77 | B->save(filename+"_B.dat",arma::raw_ascii); 78 | } 79 | void MultiAddLayer::load(std::string filename) { 80 | W_one->load(filename+"_W_one.dat",arma::raw_ascii); 81 | W_two->load(filename+"_W_two.dat",arma::raw_ascii); 82 | B->load(filename+"_B.dat",arma::raw_ascii); 83 | } 84 | 85 | void MultiAddLayer::accumulateGrad(std::shared_ptr delta_in, int t) { 86 | calGrad(delta_in, t); 87 | *grad_B_accu += *grad_B; 88 | *grad_W_one_accu += *grad_W_one; 89 | *grad_W_two_accu += *grad_W_two; 90 | } 91 | 92 | void MultiAddLayer::clearAccuGrad(){ 93 | (*grad_B_accu).zeros(); 94 | (*grad_W_one_accu).zeros(); 95 | (*grad_W_two_accu).zeros(); 96 | } -------------------------------------------------------------------------------- /src/Proto/DeepLearning.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | package DeepLearning; 3 | message NeuralNetParameter { 4 | optional string name = 1; 5 | optional string type = 2; 6 | enum ActivationType {sigmoid = 1; tanh = 2; linear = 3; softmax = 4; ReLU = 5;} 7 | repeated LayerStructParameter layerStruct = 100; 8 | optional NeuralNetTrainingParameter neuralNetTrainingParameter= 101; 9 | optional RNNStructParameter rnnStruct = 102; 10 | 11 | } 12 | 13 | message ReinforcementLearningParameter{ 14 | optional QLearningSolverParameter qLearningSolverParameter= 100; 15 | } 16 | 17 | message LayerStructParameter { 18 | optional int32 inputDim = 1; 19 | optional int32 outputDim = 2; 20 | optional NeuralNetParameter.ActivationType activationType = 3; 21 | optional string name = 4; 22 | optional string type = 5; 23 | optional NeuralNetInitializerParameter init_W = 103; 24 | optional NeuralNetInitializerParameter init_B = 104; 25 | optional NeuralNetInitializerParameter init_W_one = 105; 26 | optional NeuralNetInitializerParameter init_W_two = 106; 27 | } 28 | 29 | message RNNStructParameter{ 30 | optional int32 numRecurrLayers = 1; 31 | optional int32 recurrLayerInputDim = 2; 32 | optional int32 recurrLayerOutputDim = 3; 33 | optional int32 inputDim = 4; 34 | optional NeuralNetParameter.ActivationType activationType = 6; 35 | optional NeuralNetInitializerParameter init_W_one = 103; 36 | optional NeuralNetInitializerParameter init_B = 104; 37 | optional NeuralNetInitializerParameter init_W_two = 105; 38 | } 39 | 40 | message NeuralNetInitializerParameter{ 41 | enum InitializerType {custom = 1; identity = 2; zero = 3; normal = 4; glorot_uniform = 5; IRNN = 6; orthogonal = 7;} 42 | optional double normal_std = 1; 43 | optional double normal_mean = 2; 44 | optional InitializerType initializerType = 3; 45 | } 46 | 47 | message NeuralNetTrainingParameter { 48 | enum TrainerType {SGD = 1; RMSProp = 2; SGDRNN=3;} 49 | optional double learningRate = 1; 50 | optional int32 maxIter = 2; 51 | optional int32 miniBatchSize = 3; 52 | optional int32 NEpoch = 4; 53 | optional double epi = 5 [default = 1e-6]; 54 | optional TrainerType trainerType = 6 [default = SGD]; 55 | optional double decayRate = 7 [default = 10]; 56 | optional double momentum = 8 [default = 0.9]; 57 | optional bool verbose = 101 [default = true]; 58 | optional int32 printInfoFrequency = 10 [default = 1]; 59 | optional bool clipFlag = 11 [default = false]; 60 | optional double clipThreshold = 12 [default = 1]; 61 | optional double RMSProp_rho = 13 [default = 0.9]; 62 | optional bool showGradNorm = 102 [default = false]; 63 | optional bool RNNScanFlag = 103 [default = false]; 64 | optional int32 RNNScanStep = 104 [default = 1]; 65 | optional int32 RNNTruncateLength = 105 [default = 10]; 66 | } 67 | 68 | message QLearningSolverParameter{ 69 | optional int32 numTrainingEpisodes = 1; 70 | optional double learningRate = 2 [default = 0.1]; 71 | optional double epsilon = 3 [default = 0.95]; 72 | optional int32 EpisodeLength = 4; 73 | optional double discount = 5 [default = 0.95]; 74 | optional int32 numEpisodesBeforeTraining = 6; 75 | optional int32 QTableOutputInterval = 7; 76 | optional int32 controlInterval = 8 [default = 1]; 77 | 78 | 79 | 80 | } 81 | -------------------------------------------------------------------------------- /src/Proto/Makefile: -------------------------------------------------------------------------------- 1 | CC = g++ 2 | CFLAGS = -std=c++0x 3 | LFLAGS = -L/usr/local/lib -lprotobuf 4 | #all:vector_ser 5 | 6 | test: test.o DeepLearning.pb.o 7 | $(CC) -o $@ $^ $(LFLAGS) 8 | 9 | %.o : %.cpp 10 | $(CC) -c $(CFLAGS) $^ 11 | 12 | %.o : %.cc 13 | $(CC) -c $(CFLAGS) $^ 14 | 15 | #vector_ser.o:vector_ser.cpp 16 | # $(CC) -c $(CFLAGS) vector_ser.cpp 17 | 18 | 19 | clean: 20 | rm *.o test 21 | -------------------------------------------------------------------------------- /src/Proto/generateFile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | protoc DeepLearning.proto --cpp_out=. 4 | mv DeepLearning.pb.h ../../include 5 | mv DeepLearning.pb.cc ../ 6 | -------------------------------------------------------------------------------- /src/Proto/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/Proto/test -------------------------------------------------------------------------------- /src/RecurrLayer.cpp: -------------------------------------------------------------------------------- 1 | #include "RecurrLayer.h" 2 | 3 | using namespace NeuralNet; 4 | 5 | RecurrLayer::RecurrLayer(int in1, int in2, int out, ActivationType actType0, std::shared_ptr init_W_one, std::shared_ptr init_W_two, 6 | std::shared_ptr init_B): 7 | MultiAddLayer(in1,in2,out,actType0,init_W_one,init_W_two,init_B){ 8 | 9 | output_prev = std::make_shared(); 10 | delta_outOne_prev = std::make_shared(); 11 | } 12 | 13 | void RecurrLayer::savePrevOutput(){ 14 | *output_prev = *output; 15 | } 16 | void RecurrLayer::savePrevDeltaOutOne(){ 17 | *delta_outOne_prev = *delta_outOne; 18 | } 19 | -------------------------------------------------------------------------------- /src/SteepDescent.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "optimization.h" 3 | #include 4 | 5 | using namespace Optimization; 6 | 7 | SteepDescent::SteepDescent(ObjectFunc &func, SteepDescent_param param0): 8 | calValGrad(func), param(param0){ 9 | 10 | maxIter = param.maxIter; 11 | eps = param.eps; 12 | step = param.step; 13 | 14 | x.randn(calValGrad.dim); 15 | 16 | currValue = calValGrad(x, grad); 17 | } 18 | 19 | 20 | void SteepDescent::minimize() { 21 | // arma::vec Grad(inputDim); 22 | int iter = 0; 23 | // if( !quiet ) { 24 | std::cout << "Gradient Descent Starts !" << std::endl; 25 | std::cout << "maxIter:" << maxIter << std::endl; 26 | // std::cout << "alpha:" << alpha << std::endl; 27 | // } 28 | while( iter < maxIter) { 29 | 30 | x_new = x - step * grad; 31 | double currValue = calValGrad(x_new,grad_new); 32 | // if( !quiet ) { 33 | std::cout << "iter:" << iter << "\t" ; 34 | double gradNorm = arma::norm(grad_new); 35 | std::cout << "current gradient norm is:" << gradNorm << std::endl; 36 | std::cout << "current value is:" << currValue << std::endl; 37 | // } 38 | x = x_new; 39 | grad = grad_new; 40 | 41 | if ( gradNorm < eps) break; 42 | iter++; 43 | 44 | } 45 | 46 | 47 | } 48 | 49 | /* 50 | bool GradDescent::converged() { 51 | arma::vec diff; 52 | diff = newX - oldX; 53 | return arma::norm(diff) < eps; 54 | } 55 | */ 56 | 57 | //GradDescent::~GradDescent(){} -------------------------------------------------------------------------------- /src/Util.cpp: -------------------------------------------------------------------------------- 1 | #include "Util.h" 2 | 3 | namespace NeuralNet{ 4 | void loadData_MNIST(std::shared_ptr X, std::shared_ptr Y, std::string filename0){ 5 | 6 | std::string filename_base(filename0); 7 | std::string filename; 8 | char tag[50]; 9 | char x; 10 | int count; 11 | int numFiles = 10; 12 | int featSize = 28*28; 13 | int labelSize = 10; 14 | int numSamples = 1000; 15 | X->set_size(featSize, numFiles*numSamples); 16 | Y->set_size(labelSize, numFiles*numSamples); 17 | Y->fill(0); 18 | 19 | 20 | for (int i = 0 ; i < numFiles ; i++) { 21 | sprintf(tag,"%d",i); 22 | filename=filename_base+(std::string)tag; 23 | std::cout << filename << std::endl; 24 | std::ifstream infile; 25 | infile.open(filename,std::ios::binary | std::ios::in); 26 | if (infile.is_open()) { 27 | 28 | for (int j = 0 ; j < numSamples ; j++) { 29 | 30 | for (int k =0 ; k 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "DeepLearning.pb.h" 12 | 13 | using google::protobuf::io::FileInputStream; 14 | using google::protobuf::io::FileOutputStream; 15 | using google::protobuf::io::ZeroCopyInputStream; 16 | using google::protobuf::io::CodedInputStream; 17 | using google::protobuf::io::ZeroCopyOutputStream; 18 | using google::protobuf::io::CodedOutputStream; 19 | using google::protobuf::Message; 20 | 21 | bool ReadProtoFromTextFile(const char* filename, Message* proto) { 22 | int fd = open(filename, O_RDONLY); 23 | // CHECK_NE(fd, -1) << "File not found: " << filename; 24 | FileInputStream* input = new FileInputStream(fd); 25 | bool success = google::protobuf::TextFormat::Parse(input, proto); 26 | delete input; 27 | // close(fd); 28 | return success; 29 | } 30 | 31 | -------------------------------------------------------------------------------- /src/test/BaseLayer/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile.common 2 | 3 | test_BaseLayer: test_BaseLayer.o 4 | $(CXX) -o $@ $^ $(LDFLAG) 5 | 6 | %.o:%.cpp 7 | $(CXX) -c $(CXXFLAGS) $^ 8 | 9 | 10 | clean: 11 | rm test_BaseLayer *.o 12 | -------------------------------------------------------------------------------- /src/test/BaseLayer/test_BaseLayer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/BaseLayer/test_BaseLayer -------------------------------------------------------------------------------- /src/test/BaseLayer/test_BaseLayer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "BaseLayer.h" 9 | #include "gtest/gtest.h" 10 | using namespace NeuralNet; 11 | 12 | 13 | TEST(BaseLayerTest, fillBernoulli){ 14 | 15 | BaseLayer layer(100,10,BaseLayer::sigmoid,true,0.5); 16 | EXPECT_EQ(layer.dropOutRate,0.5); 17 | // EXPECT_TRUE(layer.dropOutFlag); 18 | layer.B.print(); 19 | layer.fill_Bernoulli(layer.B.memptr(),layer.B_size); 20 | layer.B.print(); 21 | 22 | } 23 | 24 | 25 | 26 | int main(int argc, char *argv[]) { 27 | std::shared_ptr trainDataX(new arma::mat); 28 | std::shared_ptr trainDataY(new arma::mat); 29 | testing::InitGoogleTest(&argc, argv); 30 | return RUN_ALL_TESTS(); 31 | } 32 | 33 | -------------------------------------------------------------------------------- /src/test/ElmanRL/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../Makefile.common 3 | 4 | OBJ = test_ElmanRL.o Trainer.o 5 | 6 | test : $(OBJ) 7 | $(CXX) -o $@ $(OBJ) $(LDFLAG) 8 | Trainer.o : ../Trainer/Trainer.cpp 9 | 10 | $(CXX) -c $(CXXFLAGS) $^ 11 | clean: 12 | rm -f *.o *~ 13 | -------------------------------------------------------------------------------- /src/test/ElmanRL/RLtest.prototxt: -------------------------------------------------------------------------------- 1 | rnnStruct{ 2 | numRecurrLayers: 2 3 | recurrLayerInputDim: 8 4 | recurrLayerOutputDim: 8 5 | inputDim: 2 6 | activationType: tanh 7 | init_W_one{ 8 | initializerType: normal 9 | normal_std: 0.2 10 | normal_mean:0 11 | } 12 | init_B { 13 | initializerType: zero 14 | } 15 | init_W_two{ 16 | initializerType: glorot_uniform 17 | } 18 | } 19 | 20 | layerStruct{ 21 | name: "BaseLayer2" 22 | inputDim: 9 23 | outputDim: 1 24 | activationType: linear 25 | init_W{ 26 | initializerType: glorot_uniform 27 | } 28 | init_B { 29 | initializerType: zero 30 | } 31 | } 32 | 33 | neuralNetTrainingParameter{ 34 | trainerType: RMSProp 35 | learningRate: 0.2 36 | miniBatchSize: 100 37 | NEpoch: 5000 38 | momentum: 0.9 39 | decayRate: 2500 40 | showGradNorm: false 41 | RNNScanFlag: false 42 | RNNScanStep: 5 43 | RNNTruncateLength: 10 44 | } 45 | 46 | 47 | -------------------------------------------------------------------------------- /src/test/ElmanRL/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/ElmanRL/test -------------------------------------------------------------------------------- /src/test/GRNN/GRNN.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "BaseLayer.h" 7 | #include "MultiAddLayer.h" 8 | #include "ElementMultiAddLayer.h" 9 | #include "common.h" 10 | namespace NeuralNet { 11 | 12 | class RNN: public Net { 13 | 14 | public: 15 | RNN(int numHiddenLayers0, int hiddenLayerInputDim0, 16 | int hiddenLayerOutputDim0, int inputDim0, int outputDim0, 17 | std::shared_ptr trainingX0, std::shared_ptr trainingY0); 18 | RNN(DeepLearning::NeuralNetParameter); 19 | 20 | void backward(); 21 | void updatePara(); 22 | void train(); 23 | void test(); 24 | void calNumericGrad(); 25 | 26 | // implementing methods required by Net interface 27 | virtual void forward(); 28 | virtual void setTrainingSamples(std::shared_ptr X, std::shared_ptr Y); 29 | virtual void applyUpdates(std::vector>); 30 | virtual void calGradient(); 31 | virtual std::vector> netGradients(); 32 | virtual double getLoss(); 33 | virtual void save(std::string filename); 34 | virtual void load(std::string filename); 35 | virtual std::shared_ptr netOutput(); 36 | virtual std::shared_ptr netOutputAtTime(int time); 37 | virtual arma::mat forwardInTime(std::shared_ptr x); 38 | virtual int getTime(); 39 | virtual void setTime(int t); 40 | virtual void updateInternalState(); 41 | virtual void saveLayerInputOutput(); 42 | std::shared_ptr getOutputLayer(){return netOutputLayer;} 43 | private: 44 | void fillNetGradVector(); 45 | DeepLearning::NeuralNetParameter neuralNetPara; 46 | double learningRate = 0.1; 47 | /* network gradients*/ 48 | std::vector> netGradVector; 49 | std::shared_ptr netOutput_; 50 | std::vector hiddenStateLayers, updateGateLayers, resetGateLayers; 51 | std::vector hiddenOutputLayers; 52 | std::shared_ptr netOutputLayer; 53 | std::shared_ptr trainingY, trainingX; 54 | std::vector> outputLayers_prev_output; 55 | int numHiddenLayers, hiddenLayerInputDim, hiddenLayerOutputDim; 56 | int rnnInputDim, rnnOutputDim; 57 | int time; 58 | }; 59 | } 60 | 61 | 62 | -------------------------------------------------------------------------------- /src/test/GRNN/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile.common 2 | 3 | test_MALayer: test_MALayer.o MultiAddLayer.o 4 | $(CXX) -o $@ $^ $(LDFLAG) 5 | %.o:%.cpp 6 | $(CXX) -c $(CXXFLAGS) $^ 7 | 8 | 9 | clean: 10 | rm *.o 11 | -------------------------------------------------------------------------------- /src/test/IO/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../Makefile.common 3 | 4 | 5 | 6 | OBJ = test_IO.o 7 | 8 | test_IO : $(OBJ) 9 | $(CXX) -o test $(OBJ) $(LDFLAG) 10 | 11 | clean: 12 | rm -f *.o *~ 13 | -------------------------------------------------------------------------------- /src/test/IO/net.prototxt: -------------------------------------------------------------------------------- 1 | layerStruct{ 2 | name: "BaseLayer1" 3 | inputDim: 10 4 | outputDim: 20 5 | activationType: sigmoid 6 | } 7 | 8 | layerStruct{ 9 | name: "BaseLayer2" 10 | inputDim: 10 11 | outputDim: 20 12 | activationType: sigmoid 13 | } 14 | 15 | neuralNetTrainingParameter{ 16 | learningRate: 0.1 17 | miniBatchSize: 10 18 | NEpoch: 200 19 | } 20 | 21 | rnnStruct{ 22 | numHiddenLayers: 1 23 | hiddenLayerInputDim: 2 24 | hiddenLayerOutputDim: 6 25 | inputDim: 4 26 | outputDim: 5 27 | } 28 | -------------------------------------------------------------------------------- /src/test/IO/qsolver.prototxt: -------------------------------------------------------------------------------- 1 | 2 | 3 | qLearningSolverParameter{ 4 | numTrainingEpisodes: 300 5 | epsilon: 0.95; 6 | EpisodeLength: 150 7 | discount: 0.95 8 | } 9 | 10 | 11 | -------------------------------------------------------------------------------- /src/test/IO/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/IO/test -------------------------------------------------------------------------------- /src/test/IO/test_IO.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "common.h" 3 | using namespace DeepLearning; 4 | int main(int argc, char *argv[]){ 5 | 6 | NeuralNetParameter message; 7 | RNNStructParameter submessage; 8 | if (argc == 2){ 9 | 10 | ReadProtoFromTextFile(argv[1], &message); 11 | std::cout << message.layerstruct_size() << std::endl; 12 | for (int i = 0 ; i < message.layerstruct_size(); i++ ){ 13 | if (message.layerstruct(i).has_name()) 14 | std::cout << message.layerstruct(i).name() << std::endl; 15 | if (message.layerstruct(i).has_activationtype()){ 16 | std::cout << message.layerstruct(i).activationtype() << std::endl; 17 | if( message.layerstruct(i).activationtype() == LayerStructParameter_ActivationType_sigmoid) 18 | std::cout << "good" << std::endl; 19 | } 20 | } 21 | 22 | std::cout << message.neuralnettrainingparameter().learningrate() << std::endl; 23 | std::cout << message.neuralnettrainingparameter().minibatchsize()<< std::endl; 24 | std::cout << message.neuralnettrainingparameter().nepoch() << std::endl; 25 | std::cout << message.neuralnettrainingparameter().epi() << std::endl; 26 | std::cout << message.neuralnettrainingparameter().trainertype() << std::endl; 27 | 28 | std::cout << std::endl; 29 | 30 | std::cout << "test Kai message" << std::endl; 31 | std::cout << message.rnnstruct().numhiddenlayers() << std::endl; 32 | std::cout << message.rnnstruct().hiddenlayeroutputdim() << std::endl; 33 | 34 | submessage = message.rnnstruct(); 35 | 36 | std::cout << "test sub message" << std::endl; 37 | std::cout << submessage.numhiddenlayers() << std::endl; 38 | std::cout << submessage.hiddenlayeroutputdim() << std::endl; 39 | 40 | 41 | } 42 | 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /src/test/IOtest/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../Makefile.common 3 | 4 | 5 | 6 | OBJ = test_IO.o 7 | 8 | test_IO : $(OBJ) 9 | $(CXX) -o test $(OBJ) $(LDFLAG) 10 | 11 | clean: 12 | rm -f *.o *~ 13 | -------------------------------------------------------------------------------- /src/test/IOtest/net.prototxt: -------------------------------------------------------------------------------- 1 | layerStruct{ 2 | name: "BaseLayer1" 3 | inputDim: 10 4 | outputDim: 20 5 | activationType: sigmoid 6 | } 7 | 8 | layerStruct{ 9 | name: "BaseLayer2" 10 | inputDim: 10 11 | outputDim: 20 12 | activationType: sigmoid 13 | } 14 | 15 | neuralNetTrainingParameter{ 16 | learningRate: 0.1 17 | miniBatchSize: 10 18 | NEpoch: 200 19 | } 20 | 21 | rnnStruct{ 22 | numHiddenLayers: 1 23 | hiddenLayerInputDim: 2 24 | hiddenLayerOutputDim: 6 25 | inputDim: 4 26 | outputDim: 5 27 | } 28 | -------------------------------------------------------------------------------- /src/test/Makefile.common: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CXX = g++ 3 | 4 | HOME=/home/yuguangyang/ 5 | ARMA_INCLUDE=-I$(HOME)Downloads/armadillo-5.100.2/include 6 | DEEPLEARNING_INCLUDE=-I../../../include 7 | GTEST_INCLUDE=-I$(HOME)workspace/libs/gtest-1.7.0/include 8 | BOOST_INCLUDE=-I/opt/boost/boost_1_57_0 9 | PROTO_INCLUDE=-I/usr/local/include 10 | 11 | GTEST_PATH=-L$(HOME)workspace/libs/gtest-1.7.0/mybuilds 12 | DEEPLEARNING_PATH=-L../../lib 13 | PROTO_PATH=-L/usr/local/lib 14 | 15 | DEBUGFLAG=-DDEBUG -g3 16 | RELEASEFLAG= -O3 -march=native -DARMA_NO_DEBUG 17 | CXXFLAGS= -std=c++0x $(ARMA_INCLUDE) $(DEEPLEARNING_INCLUDE) $(GTEST_INCLUDE) $(BOOST_INCLUDE) $(PROTO_INCLUDE) -D__LINUX -DARMA_DONT_USE_WRAAPER 18 | #CXXFLAGS += $(DEBUGFLAG) 19 | #CXXFLAGS += $(RELEASEFLAG) 20 | LINKOPTFLAGS= -O3 -flto=4 -fwhole-program 21 | LDFLAG=$(DEEPLEARNING_PATH) $(GTEST_PATH) $(PROTO_PATH) -L/opt/OpenBLAS/lib -ldeeplearning -llapack -lopenblas -lprotobuf -pthread 22 | 23 | 24 | -------------------------------------------------------------------------------- /src/test/MultiLayerPerceptron/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile.common 2 | 3 | test_mlp: test_funcApprox.o Trainer.o 4 | $(CXX) -o $@ $^ $(LDFLAG) 5 | %.o:%.cpp 6 | $(CXX) -c $(CXXFLAGS) $^ 7 | Trainer.o:../Trainer/Trainer.cpp 8 | $(CXX) -c $(CXXFLAGS) $^ 9 | 10 | clean: 11 | rm *.o 12 | -------------------------------------------------------------------------------- /src/test/MultiLayerPerceptron/net.prototxt: -------------------------------------------------------------------------------- 1 | layerStruct{ 2 | name: "BaseLayer1" 3 | inputDim: 1 4 | outputDim: 40 5 | activationType: sigmoid 6 | } 7 | 8 | layerStruct{ 9 | name: "BaseLayer1" 10 | inputDim: 40 11 | outputDim: 20 12 | activationType: sigmoid 13 | } 14 | 15 | layerStruct{ 16 | name: "BaseLayer2" 17 | inputDim: 20 18 | outputDim: 1 19 | activationType: linear 20 | } 21 | 22 | neuralNetTrainingParameter{ 23 | trainerType: RMSprop 24 | learningRate: 0.01 25 | miniBatchSize: 100 26 | NEpoch: 20000 27 | decayRate: 100000 28 | } 29 | -------------------------------------------------------------------------------- /src/test/MultiLayerPerceptron/runningparameters/sin(5x)+exp(5x)/net.prototxt: -------------------------------------------------------------------------------- 1 | //This is for training non-linear function y = 5*sin(x), while x is rescaled to 0 to 1 2 | 3 | layerStruct{ 4 | name: "BaseLayer1" 5 | inputDim: 1 6 | outputDim: 40 7 | activationType: sigmoid 8 | } 9 | 10 | layerStruct{ 11 | name: "BaseLayer1" 12 | inputDim: 40 13 | outputDim: 20 14 | activationType: sigmoid 15 | } 16 | 17 | layerStruct{ 18 | name: "BaseLayer2" 19 | inputDim: 20 20 | outputDim: 1 21 | activationType: linear 22 | } 23 | 24 | neuralNetTrainingParameter{ 25 | learningRate: 0.1 26 | miniBatchSize: 100 27 | NEpoch: 20000 28 | } 29 | -------------------------------------------------------------------------------- /src/test/MultiLayerPerceptron/runningparameters/sin(5x)+exp(5x)/readme: -------------------------------------------------------------------------------- 1 | //This is for training non-linear function y = sin(5x)+exp(5x), while x is rescaled to 0 to 1 2 | 3 | -------------------------------------------------------------------------------- /src/test/MultiLayerPerceptron/runningparameters/sin(5x)/net.prototxt: -------------------------------------------------------------------------------- 1 | //This is for training non-linear function y = 5*sin(x), while x is rescaled to 0 to 1 2 | 3 | layerStruct{ 4 | name: "BaseLayer1" 5 | inputDim: 1 6 | outputDim: 40 7 | activationType: sigmoid 8 | } 9 | 10 | layerStruct{ 11 | name: "BaseLayer1" 12 | inputDim: 40 13 | outputDim: 20 14 | activationType: sigmoid 15 | } 16 | 17 | layerStruct{ 18 | name: "BaseLayer2" 19 | inputDim: 20 20 | outputDim: 1 21 | activationType: linear 22 | } 23 | 24 | neuralNetTrainingParameter{ 25 | learningRate: 0.1 26 | miniBatchSize: 100 27 | NEpoch: 20000 28 | } 29 | -------------------------------------------------------------------------------- /src/test/MultiLayerPerceptron/runningparameters/sin(5x)/readme: -------------------------------------------------------------------------------- 1 | //This is for training non-linear function y = 5*sin(x), while x is rescaled to 0 to 1 2 | 3 | -------------------------------------------------------------------------------- /src/test/MultiLayerPerceptron/testSimple.prototxt: -------------------------------------------------------------------------------- 1 | layerStruct{ 2 | name: "BaseLayer1" 3 | inputDim: 1 4 | outputDim: 10 5 | activationType: tanh 6 | init_W{ 7 | initializerType: glorot_uniform 8 | } 9 | init_B{ 10 | initializerType: zero 11 | } 12 | } 13 | 14 | layerStruct{ 15 | name: "BaseLayer1" 16 | inputDim: 10 17 | outputDim: 1 18 | activationType: linear 19 | init_W{ 20 | initializerType: glorot_uniform 21 | } 22 | init_B{ 23 | initializerType: zero 24 | } 25 | } 26 | 27 | neuralNetTrainingParameter{ 28 | trainerType: RMSProp 29 | learningRate: 0.15 30 | miniBatchSize: 10 31 | NEpoch: 1000 32 | decayRate: 100 33 | momentum: 0.99 34 | } 35 | -------------------------------------------------------------------------------- /src/test/MultiLayerPerceptron/test_funcApprox.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include "MultiLayerPerceptron.h" 3 | #include "../Trainer/Trainer.h" 4 | using namespace NeuralNet; 5 | using namespace DeepLearning; 6 | 7 | void testComplex(char* filename); 8 | void testSimple(char* filename); 9 | 10 | int main(int argc, char** argv) { 11 | 12 | if (argc < 2) exit(1); 13 | 14 | testSimple(argv[1]); 15 | 16 | 17 | return 0; 18 | } 19 | 20 | 21 | void testSimple(char* filename){ 22 | std::shared_ptr X(new arma::mat(1,10)); 23 | std::shared_ptr Y(new arma::mat(1,10)); 24 | 25 | for (int i = 0; i < X->n_elem; i++){ 26 | X->at(i) = i; 27 | } 28 | 29 | double xmin = X->min(); 30 | double xmax = X->max(); 31 | X->transform([&](double x){return x/(xmax - xmin)-0.5;}); 32 | Y->ones(); 33 | *Y = (*X); 34 | Y->transform([](double val){return sin(val);}); 35 | 36 | NeuralNetParameter nnpara; 37 | ReadProtoFromTextFile(filename, &nnpara); 38 | // nnpara.neuralnettrainingparameter().set_minibatchsize(X->n_elem); 39 | std::shared_ptr mlp(new MultiLayerPerceptron(nnpara)); 40 | std::shared_ptr trainer(TrainerBuilder::GetTrainer(mlp, nnpara)); 41 | 42 | mlp->setTrainingSamples(X,nullptr); 43 | mlp->forward(); 44 | (mlp->netOutput())->print(); 45 | trainer->setTrainingSamples(X, Y); 46 | trainer->train(); 47 | Y->save("target.dat",arma::raw_ascii); 48 | mlp->netOutput()->save("trainingResult.dat",arma::raw_ascii); 49 | Y->print(); 50 | (mlp->netOutput())->print(); 51 | 52 | } 53 | 54 | void testComplex(char* filename){ 55 | std::shared_ptr X(new arma::mat(1,100)); 56 | std::shared_ptr Y(new arma::mat(1,100)); 57 | 58 | for (int i = 0; i < X->n_elem; i++){ 59 | X->at(i) = i; 60 | } 61 | 62 | double xmin = X->min(); 63 | double xmax = X->max(); 64 | X->transform([&](double x){return x/(xmax - xmin);}); 65 | Y->ones(); 66 | *Y = 5*(*X); 67 | Y->transform([](double val){return sin(4*val);}); 68 | 69 | NeuralNetParameter nnpara; 70 | ReadProtoFromTextFile(filename, &nnpara); 71 | // nnpara.neuralnettrainingparameter().set_minibatchsize(X->n_elem); 72 | std::shared_ptr mlp(new MultiLayerPerceptron(nnpara)); 73 | std::shared_ptr trainer(TrainerBuilder::GetTrainer(mlp, nnpara)); 74 | 75 | mlp->setTrainingSamples(X,nullptr); 76 | mlp->forward(); 77 | (mlp->netOutput())->print(); 78 | trainer->setTrainingSamples(X, Y); 79 | trainer->train(); 80 | Y->save("target.dat",arma::raw_ascii); 81 | mlp->netOutput()->save("trainingResult.dat",arma::raw_ascii); 82 | 83 | 84 | } -------------------------------------------------------------------------------- /src/test/MultiLayerPerceptron/test_mlp.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "MultiLayerPerceptron.h" 10 | #include "optimization.h" 11 | #include "Util.h" 12 | 13 | using namespace NeuralNet; 14 | 15 | 16 | int main(int argc, char *argv[]) { 17 | if (argc < 2) exit(1); 18 | std::shared_ptr DataX(new arma::mat); 19 | std::shared_ptr DataY(new arma::mat); 20 | std::shared_ptr trainDataX(new arma::mat); 21 | std::shared_ptr trainDataY(new arma::mat); 22 | std::shared_ptr testDataX(new arma::mat); 23 | std::shared_ptr testDataY(new arma::mat); 24 | std::shared_ptr ValidationDataX(new arma::mat); 25 | std::shared_ptr ValidationDataY(new arma::mat); 26 | 27 | loadData_MNIST(DataX,DataY,(std::string)argv[1]); 28 | 29 | int ntrain =2000; 30 | int ntest = 1000; 31 | // now I split data into train, test, and validation 32 | trainDataX = std::make_shared(DataX->cols(0,ntrain-1)); 33 | trainDataY = std::make_shared(DataY->cols(0,ntrain-1)); 34 | testDataX = std::make_shared(DataX->cols(ntrain,ntrain+ntest-1)); 35 | testDataY = std::make_shared(DataY->cols(ntrain,ntrain+ntest-1)); 36 | 37 | 38 | int inputDim = trainDataX->n_cols; 39 | int outputDim = trainDataY->n_cols; 40 | trainDataX->save("trainingSamples.txt",arma::raw_ascii); 41 | TrainingPara_MLP trainingPara(1e-6,100, 10, 0.25); 42 | trainingPara.print(); 43 | std::vector dimensions = {784,100,10}; 44 | MultiLayerPerceptron mlp(2, dimensions, trainDataX, trainDataY, trainingPara); 45 | bool LBFGS_flag = false; 46 | if (LBFGS_flag){ 47 | MLPTrainer mlpTrainer(mlp); 48 | Optimization::LBFGS::LBFGS_param param(100,20, 50 , "result.txt"); 49 | Optimization::LBFGS lbfgs_opt(mlpTrainer,param, Optimization::LBFGS::Wolfe); 50 | lbfgs_opt.minimize(); 51 | } else{ 52 | mlp.train(); 53 | } 54 | mlp.test(testDataX,testDataY); 55 | return 0; 56 | } 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /src/test/MultiThreadArmaMat/Makefile: -------------------------------------------------------------------------------- 1 | HOME=/home/yuguangyang/ 2 | ARMA_INCLUDE=-I$(HOME)Downloads/armadillo-5.100.2/include 3 | # You may need to edit this file to reflect the type and capabilities of your system. 4 | # The defaults are for a Linux system and may need to be changed for other systems (eg. Mac OS X). 5 | 6 | 7 | CXX=g++ 8 | 9 | #CXX=CC 10 | ## When using the Sun Studio compiler 11 | 12 | ## If you've installed Armadillo's headers manually, you may need to tell the compiler where they are. 13 | ## For example, change ../include to /usr/local/include 14 | 15 | 16 | LIB_FLAGS = -L/opt/OpenBLAS/lib -lopenblas -llapack 17 | #LIB_FLAGS = -lopenblas -llapack 18 | #LIB_FLAGS = -framework Accelerate 19 | #LIB_FLAGS = -library=sunperf 20 | 21 | ## NOTE: on Ubuntu and Debian based systems you may need to add -lgfortran to LIB_FLAGS 22 | ## NOTE: if you're using Mac OS, use the line with -framework Accelerate 23 | ## NOTE: if you're using the Sun Studio compiler, use the line with -library=sunperf 24 | 25 | 26 | OPT = -O2 27 | ## As the Armadillo library uses recursive templates, compilation times depend on the level of optimisation: 28 | ## 29 | ## -O0: quick compilation, but the resulting program will be slow 30 | ## -O1: good trade-off between compilation time and execution speed 31 | ## -O2: produces programs which have almost all possible speedups, but compilation takes longer 32 | ## -O3: enables auto vectorisation when using gcc 33 | 34 | #OPT = -xO4 -xannotate=no 35 | ## When using the Sun Studio compiler 36 | 37 | 38 | #EXTRA_OPT = -fwhole-program 39 | ## Uncomment the above line if you're compiling all source files into one program in a single hit 40 | 41 | 42 | #DEBUG = -DARMA_EXTRA_DEBUG 43 | ## Uncomment the above line to enable low-level debugging. 44 | ## Lots of debugging information will be printed when a compiled program is run. 45 | ## Please enable this option when reporting bugs. 46 | 47 | 48 | #FINAL = -DARMA_NO_DEBUG 49 | ## Uncomment the above line to disable Armadillo's checks. 50 | ## Not recommended unless your code has been first thoroughly tested! 51 | 52 | 53 | CXXFLAGS = $(ARMA_INCLUDE) -I/opt/OpenBLAS/include $(DEBUG) $(FINAL) $(OPT) $(EXTRA_OPT) 54 | 55 | all: test_arma 56 | 57 | test_arma: test_arma.cpp 58 | $(CXX) $(CXXFLAGS) -o $@ $< $(LIB_FLAGS) 59 | 60 | 61 | .PHONY: clean 62 | 63 | clean: 64 | rm -f test_arma 65 | 66 | -------------------------------------------------------------------------------- /src/test/NN-RL/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile.common 2 | 3 | all: test_NNRL.exe 4 | 5 | OBJ = NN_RL_Driver.o Model_PoleSimple.o Model_PoleFull.o Trainer.o NN_RLSolverBase.o NN_RLSolverMLP.o NN_RLSolverMultiMLP.o NN_RLSolverRNN.o RLSolver_2DTable.o 6 | 7 | test_NNRL.exe: $(OBJ) 8 | $(CXX) -o $@ $^ $(LDFLAG) 9 | 10 | 11 | 12 | 13 | 14 | %.o:%.cpp 15 | $(CXX) -c $(CXXFLAGS) $(DEBUGFLAG) $^ 16 | 17 | Trainer.o: ../Trainer/Trainer.cpp 18 | $(CXX) -c $(CXXFLAGS) $^ 19 | 20 | clean: 21 | rm *.o *.exe 22 | -------------------------------------------------------------------------------- /src/test/NN-RL/Model_PoleFull.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "Model_PoleFull.h" 6 | using namespace ReinforcementLearning; 7 | // this model is from paper THE POLE BALANCING PROBLEM A Benchmark Control Theory Problem 8 | Model_PoleFull::Model_PoleFull(double dt0) { 9 | currState.resize(2); 10 | prevState.resize(2); 11 | dt = dt0; 12 | stateDim = 2; 13 | hiddenCurrState.resize(4); 14 | hiddenPrevState.resize(4); 15 | 16 | randNoise = std::make_shared(-1, 1); 17 | numActions = 3; 18 | } 19 | 20 | void Model_PoleFull::run(int action) { 21 | double force; 22 | double accer_theta; 23 | double accer_x; 24 | switch (action) { 25 | case 0: 26 | force = -10 + randNoise->nextInt(); 27 | break; 28 | case 1: 29 | force = 10 + randNoise->nextInt(); 30 | break; 31 | case 2: 32 | force = randNoise->nextInt(); 33 | break; 34 | default:break; 35 | } 36 | double l = 0.5; 37 | double massSum = 1.1; 38 | double massRatio = 1.0 / 11.0; 39 | double &theta = hiddenCurrState[0]; 40 | double &theta_v = hiddenCurrState[1]; 41 | double &x = hiddenCurrState[2]; 42 | double &x_v = hiddenCurrState[3]; 43 | 44 | accer_theta = 9.8 * sin(theta) - l * massRatio * 0.5 * pow(theta_v, 2.0) * sin(2.0 * theta) / 2.0 - cos(theta) * force / massSum; 45 | accer_theta /= (4.0 * l / 3.0 - 0.1 * massRatio * l * cos(theta) * cos(theta)); 46 | theta += theta_v * dt; 47 | if (theta > M_PI) theta -= 2.0*M_PI; 48 | if (theta < -M_PI) theta += 2.0*M_PI; 49 | theta_v += accer_theta * dt; 50 | 51 | accer_x = force / massSum + massRatio * l * (pow(theta_v, 2.0) * sin(theta) - accer_theta * cos(theta)); 52 | x += x_v * dt; 53 | x_v += accer_x * dt; 54 | 55 | currState[0] = hiddenCurrState[0]; 56 | currState[1] = hiddenCurrState[2]; 57 | } 58 | 59 | void Model_PoleFull::createInitialState() { 60 | this->hiddenCurrState[0] = (randNoise->nextDou() - 0.5) * 0.0 * M_PI; 61 | this->hiddenCurrState[1] = 0.0; 62 | this->hiddenCurrState[2] = (randNoise->nextDou() - 0.5) * 0.0; 63 | this->hiddenCurrState[3] = 0.0; 64 | this->currState[0] = this->hiddenCurrState[0]; 65 | this->currState[1] = this->hiddenCurrState[2]; 66 | } 67 | 68 | -------------------------------------------------------------------------------- /src/test/NN-RL/Model_PoleFull.h: -------------------------------------------------------------------------------- 1 | /* 2 | This model is the Inverted Pendulum problem found in the paper 3 | "lease-squared policy iterations" 4 | */ 5 | #pragma once 6 | #include 7 | #include 8 | #include 9 | #include "Util.h" 10 | #include "BaseModel.h" 11 | 12 | using namespace NeuralNet; 13 | namespace ReinforcementLearning { 14 | class Model_PoleFull: public BaseModel{ 15 | public: 16 | Model_PoleFull(double dt0); 17 | ~Model_PoleFull(){} 18 | virtual void run(int action); 19 | virtual void createInitialState(); 20 | private: 21 | 22 | State hiddenCurrState, hiddenPrevState; 23 | std::shared_ptr randNoise; 24 | double dt; 25 | }; 26 | } -------------------------------------------------------------------------------- /src/test/NN-RL/Model_PoleSimple.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "Model_PoleSimple.h" 6 | using namespace ReinforcementLearning; 7 | // this model is from paper Lease-squares policy iteration 8 | Model_PoleSimple::Model_PoleSimple(double dt0) { 9 | currState.resize(2); 10 | prevState.resize(2); 11 | dt = dt0; 12 | stateDim = 2; 13 | randNoise = std::make_shared(-10, 10); 14 | numActions = 3; 15 | } 16 | 17 | void Model_PoleSimple::run(int action) { 18 | double force; 19 | double accer; 20 | switch (action) { 21 | case 0: 22 | force = -50 + randNoise->nextInt(); 23 | break; 24 | case 1: 25 | force = 50 + randNoise->nextInt(); 26 | break; 27 | case 2: 28 | force = randNoise->nextInt(); 29 | break; 30 | default:break; 31 | } 32 | double &theta = currState[0]; 33 | double &theta_v = currState[1]; 34 | // we assume l = 0.5 35 | // mp/(mc+mp) = 0.1, mp = 1, mc = 9) 36 | accer = 9.8 * sin(theta) - 0.1 * 2.0 * 0.5 * pow(theta_v, 2.0) * sin(2.0 * theta) / 2.0 - 0.1 * cos(theta) * force; 37 | accer /= (4.0 * 0.5 / 3.0 - 0.1 * 0.5 * cos(theta) * cos(theta) ); 38 | prevState = currState; 39 | theta += theta_v * dt; 40 | if (theta > M_PI) theta -= 2.0*M_PI; 41 | if (theta < -M_PI) theta += 2.0*M_PI; 42 | theta_v += accer * dt; 43 | } 44 | 45 | void Model_PoleSimple::createInitialState() { 46 | this->currState[0] = (randNoise->nextDou() - 0.5) * 0.2 * M_PI; 47 | this->currState[1] = 0.0; 48 | } 49 | 50 | 51 | double Model_PoleSimple::getRewards() const { 52 | if (this->terminate()) { 53 | return -1.0; 54 | } else { 55 | return 0.0; 56 | } 57 | } 58 | 59 | bool Model_PoleSimple::terminate() const { 60 | return (currState[0] < -0.5 * M_PI || currState[0] > 0.5 * M_PI); 61 | } -------------------------------------------------------------------------------- /src/test/NN-RL/Model_PoleSimple.h: -------------------------------------------------------------------------------- 1 | /* 2 | This model is the Inverted Pendulum problem found in the paper 3 | "lease-squared policy iterations" 4 | */ 5 | #pragma once 6 | #include 7 | #include 8 | #include 9 | #include "Util.h" 10 | #include "BaseModel.h" 11 | 12 | using namespace NeuralNet; 13 | namespace ReinforcementLearning { 14 | class Model_PoleSimple: public BaseModel{ 15 | public: 16 | Model_PoleSimple(double dt0); 17 | ~Model_PoleSimple(){} 18 | virtual void run(int action); 19 | virtual void createInitialState(); 20 | virtual double getRewards() const; 21 | virtual bool terminate() const; 22 | private: 23 | std::shared_ptr randNoise; 24 | double dt; 25 | }; 26 | } -------------------------------------------------------------------------------- /src/test/NN-RL/NN_RLSolverBase.cpp: -------------------------------------------------------------------------------- 1 | #include "NN_RLSolverBase.h" 2 | 3 | using namespace ReinforcementLearning; 4 | using namespace NeuralNet; 5 | using namespace DeepLearning; 6 | NN_RLSolverBase::NN_RLSolverBase(std::shared_ptr m, 7 | std::shared_ptr net0, 8 | std::shared_ptr trainer0, 9 | int Dim, DeepLearning::QLearningSolverParameter para): 10 | RLSolverBase(m,Dim,para), net(net0), trainer(trainer0){ 11 | netInputDim = stateDim + 1; 12 | } 13 | 14 | void NN_RLSolverBase::getMaxQ(const State& S, double* Q, int* action) { 15 | double maxQ; 16 | int a = 0; 17 | maxQ = -std::numeric_limits::max(); 18 | for (int j = 0; j < model->getNumActions(); j++) { 19 | double tempQ = this->calQ(S, j); 20 | // std::cout << tempQ << std::endl; 21 | if (maxQ < tempQ) { 22 | maxQ = tempQ; 23 | a = j; 24 | } 25 | } 26 | *Q = maxQ; 27 | *action = a; 28 | return; 29 | } 30 | 31 | -------------------------------------------------------------------------------- /src/test/NN-RL/NN_RLSolverBase.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "BaseModel.h" 4 | #include "common.h" 5 | #include "Net.h" 6 | #include "RLSolverBase.h" 7 | #include "../Trainer/Trainer.h" 8 | 9 | namespace ReinforcementLearning { 10 | 11 | class NN_RLSolverBase: public RLSolverBase { 12 | public: 13 | NN_RLSolverBase(std::shared_ptr m, 14 | std::shared_ptr net0, 15 | std::shared_ptr trainer0, int Dim, DeepLearning::QLearningSolverParameter para); 16 | virtual ~NN_RLSolverBase(){} 17 | virtual void train() = 0; 18 | virtual void test(){} 19 | virtual void generateTrainingSample(std::shared_ptr trainingSampleX, std::shared_ptr trainingSampleY)=0; 20 | virtual void generateExperience() = 0; 21 | virtual double calQ(const State& S, int action) const = 0; 22 | virtual void getMaxQ(const State& S,double* Q, int* action); 23 | virtual double getRewards(const State& newS) const = 0; 24 | virtual bool terminate(const State& S) const = 0; 25 | virtual void setNormalizationConst() = 0; 26 | protected: 27 | int netInputDim; 28 | std::shared_ptr net; 29 | std::shared_ptr trainer; 30 | State state_norm; 31 | double action_norm; 32 | 33 | }; 34 | } 35 | -------------------------------------------------------------------------------- /src/test/NN-RL/NN_RLSolverMLP.cpp: -------------------------------------------------------------------------------- 1 | #include "NN_RLSolverMLP.h" 2 | 3 | using namespace ReinforcementLearning; 4 | using namespace NeuralNet; 5 | NN_RLSolverMLP::NN_RLSolverMLP(std::shared_ptr m, 6 | std::shared_ptr net0, 7 | std::shared_ptr trainer0, 8 | int Dim, DeepLearning::QLearningSolverParameter para): 9 | NN_RLSolverBase(m,net0,trainer0,Dim,para){ 10 | this->setNormalizationConst(); 11 | } 12 | 13 | void NN_RLSolverMLP::setNormalizationConst(){ 14 | state_norm.resize(stateDim+1); 15 | state_norm[0] = M_PI; 16 | state_norm[1] = 20.0; 17 | state_norm[2] = model->getNumActions()-1; 18 | } 19 | 20 | double NN_RLSolverMLP::calQ(const State& S, int action) const { 21 | std::shared_ptr inputTemp(new arma::mat(netInputDim, 1)); 22 | for (int k = 0; k < stateDim; k++) 23 | inputTemp->at(k) = S[k] / this->state_norm[k]; 24 | inputTemp->at(stateDim) = action / state_norm[stateDim] - 0.5; 25 | net->setTrainingSamples(inputTemp, nullptr); 26 | net->forward(); 27 | double tempQ = arma::as_scalar(*(net->netOutput())); 28 | return tempQ; 29 | } 30 | 31 | void NN_RLSolverMLP::train(){ 32 | std::shared_ptr trainingSampleX(new arma::mat); 33 | std::shared_ptr trainingSampleY(new arma::mat); 34 | std::shared_ptr prediction; 35 | int maxIter = trainingPara.numtrainingepisodes(); 36 | for (int iter = 0; iter < maxIter; iter++){ 37 | std::cout << "RLsolver iteration: " << iter << std::endl; 38 | this->generateExperience(); 39 | if (iter > 20) { 40 | this->generateTrainingSample(trainingSampleX, trainingSampleY); 41 | trainingSampleX->save("X.dat", arma::raw_ascii); 42 | trainingSampleY->save("Y.dat", arma::raw_ascii); 43 | trainer->setTrainingSamples(trainingSampleX, trainingSampleY); 44 | trainer->train(); 45 | prediction = trainer->predict(trainingSampleX); 46 | prediction->save("prediction.dat", arma::raw_ascii); 47 | std::cout << "average duration " << experienceSet.size() / 1.0 / iter << std::endl; 48 | } 49 | } 50 | } 51 | 52 | void NN_RLSolverMLP::generateTrainingSample(std::shared_ptr trainingX, std::shared_ptr trainingY){ 53 | trainingX->set_size(netInputDim, experienceSet.size()); 54 | trainingY->set_size(1, experienceSet.size()); 55 | double maxQ; 56 | int action; 57 | std::shared_ptr inputTemp(new arma::mat(netInputDim, 1)); 58 | for (int i = 0; i < this->experienceSet.size(); i++) { 59 | this->getMaxQ(experienceSet[i].newState,&maxQ,&action); 60 | std::cout << "maxQ:" <stateDim; k++) 64 | inputTemp->at(k) = experienceSet[i].oldState[k] / this->state_norm[k]; 65 | inputTemp->at(stateDim) = experienceSet[i].action / state_norm[stateDim] - 0.5; 66 | 67 | trainingX->col(i) = *inputTemp; 68 | trainingY->at(i) = targetQ; 69 | } 70 | } 71 | 72 | void NN_RLSolverMLP::generateExperience(){ 73 | double maxQ; 74 | int action; 75 | double epi = trainingPara.epsilon(); 76 | arma::mat outputTemp(1,1); 77 | std::shared_ptr inputTemp(new arma::mat(netInputDim, 1)); 78 | model->createInitialState(); 79 | int i; 80 | for(i = 0; i < trainingPara.episodelength(); i++){ 81 | if( this->terminate(model->getCurrState()) ) { 82 | break; 83 | } 84 | State oldState = model->getCurrState(); 85 | if (randChoice->nextDou()< epi){ 86 | this->getMaxQ(oldState, &maxQ, &action); 87 | } else { 88 | action = randChoice->nextInt(); 89 | } 90 | model->run(action); 91 | State currState = model->getCurrState(); 92 | double r = this->getRewards(currState); 93 | oldState.shrink_to_fit(); 94 | currState.shrink_to_fit(); 95 | this->experienceSet.push_back(Experience(oldState,currState, action, r)); 96 | } 97 | std::cout << "duration " << i << std::endl; 98 | } 99 | 100 | double NN_RLSolverMLP::getRewards(const State &newS) const{ 101 | if (this->terminate(newS)){ 102 | return -1.0; 103 | } else { 104 | return 0.0; 105 | } 106 | } 107 | bool NN_RLSolverMLP::terminate(const State& S) const { 108 | return (S[0] < - 0.5* M_PI || S[0] > 0.5 * M_PI); 109 | } 110 | -------------------------------------------------------------------------------- /src/test/NN-RL/NN_RLSolverMLP.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "BaseModel.h" 4 | #include "Model_PoleSimple.h" 5 | #include "NN_RLSolverBase.h" 6 | #include "Net.h" 7 | #include "../Trainer/Trainer.h" 8 | 9 | namespace ReinforcementLearning { 10 | class NN_RLSolverMLP: public NN_RLSolverBase { 11 | public: 12 | NN_RLSolverMLP(std::shared_ptr m, 13 | std::shared_ptr net0, 14 | std::shared_ptr trainer0, 15 | int Dim, DeepLearning::QLearningSolverParameter para); 16 | virtual ~NN_RLSolverMLP(){} 17 | virtual void train(); 18 | virtual void generateTrainingSample(std::shared_ptr trainingX, std::shared_ptr trainingY); 19 | virtual void generateExperience(); 20 | virtual double getRewards(const State& newS) const; 21 | virtual bool terminate(const State& S) const; 22 | virtual void setNormalizationConst(); 23 | virtual double calQ(const State& S, int action) const; 24 | virtual void test(){} 25 | protected: 26 | std::vector experienceSet; 27 | private: 28 | std::vector durationVec; 29 | }; 30 | } 31 | -------------------------------------------------------------------------------- /src/test/NN-RL/NN_RLSolverMultiMLP.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "BaseModel.h" 4 | #include "Model_PoleSimple.h" 5 | #include "NN_RLSolverBase.h" 6 | #include "NN_RLSolverMLP.h" 7 | #include "Net.h" 8 | #include "../Trainer/Trainer.h" 9 | 10 | namespace ReinforcementLearning { 11 | class NN_RLSolverMultiMLP: public NN_RLSolverMLP { 12 | public: 13 | NN_RLSolverMultiMLP(std::shared_ptr m, 14 | std::vector> net0, 15 | std::shared_ptr trainer0, 16 | int Dim, DeepLearning::QLearningSolverParameter para); 17 | virtual ~NN_RLSolverMultiMLP(){} 18 | virtual void train(); 19 | virtual void generateTrainingSample(); 20 | virtual double calQ(const State& S, int action) const; 21 | void outputPolicy(); 22 | private: 23 | void outputQ(int i); 24 | int numActions; 25 | std::vector> nets; 26 | std::vector> trainingSampleXs, trainingSampleYs; 27 | std::vector durationVec; 28 | 29 | // parameters to output the Q value, for checking purpose 30 | int n_rows; 31 | int n_cols; 32 | double dx1; 33 | double dx2; 34 | double minx1; 35 | double minx2; 36 | 37 | 38 | }; 39 | } 40 | -------------------------------------------------------------------------------- /src/test/NN-RL/NN_RLSolverRNN.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "BaseModel.h" 4 | #include "Model_PoleSimple.h" 5 | #include "NN_RLSolverMLP.h" 6 | #include "Net.h" 7 | #include "../Trainer/Trainer.h" 8 | 9 | namespace ReinforcementLearning { 10 | class NN_RLSolverRNN: public NN_RLSolverMLP { 11 | public: 12 | NN_RLSolverRNN(std::shared_ptr m, 13 | std::shared_ptr net0, 14 | std::shared_ptr trainer0, 15 | int Dim, DeepLearning::QLearningSolverParameter para); 16 | virtual ~NN_RLSolverRNN(){} 17 | virtual void train(); 18 | virtual void generateExperience(); 19 | virtual void generateTrainingSampleVec(std::vector>& trainingSampleX, 20 | std::vector>& trainingSampleY); 21 | virtual bool terminate(const State& S) const; 22 | virtual void setNormalizationConst(); 23 | virtual double calQ(const State& S, int action) const; 24 | virtual void test(); 25 | void outputTraining(std::vector> &trainingXVec,std::string); 26 | protected: 27 | std::vector durationVec; 28 | std::vector> trainingXVec, trainingYVec; 29 | std::vector> experienceSeqVec; 30 | }; 31 | } 32 | -------------------------------------------------------------------------------- /src/test/NN-RL/Qtableresult/qtable1.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/NN-RL/Qtableresult/qtable1.tif -------------------------------------------------------------------------------- /src/test/NN-RL/Qtableresult/qtable2.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/NN-RL/Qtableresult/qtable2.tif -------------------------------------------------------------------------------- /src/test/NN-RL/Qtableresult/qtable3.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/NN-RL/Qtableresult/qtable3.tif -------------------------------------------------------------------------------- /src/test/NN-RL/Qtableresult/qtableAction.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/NN-RL/Qtableresult/qtableAction.tif -------------------------------------------------------------------------------- /src/test/NN-RL/RLSolverBase.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.h" 3 | #include "BaseModel.h" 4 | using namespace NeuralNet; 5 | namespace ReinforcementLearning { 6 | 7 | class RLSolverBase { 8 | public: 9 | 10 | RLSolverBase(std::shared_ptr m, int Dim, DeepLearning::QLearningSolverParameter para) { 11 | trainingPara = para; 12 | model = m; 13 | stateDim = Dim; 14 | randChoice = std::make_shared(0, model->getNumActions() - 1); 15 | } 16 | 17 | virtual ~RLSolverBase() { 18 | } 19 | virtual void train() = 0; 20 | virtual double getRewards(const State& newS) const {return 0.0;}; 21 | protected: 22 | int stateDim; 23 | std::shared_ptr model; 24 | std::shared_ptr randChoice; 25 | DeepLearning::QLearningSolverParameter trainingPara; 26 | 27 | }; 28 | } 29 | -------------------------------------------------------------------------------- /src/test/NN-RL/RLSolver_2DTable.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "common.h" 4 | #include "RLSolverBase.h" 5 | 6 | 7 | namespace ReinforcementLearning { 8 | 9 | class RLSolver_2DTable : RLSolverBase{ 10 | public: 11 | RLSolver_2DTable(std::shared_ptr m, int Dim, 12 | DeepLearning::QLearningSolverParameter para, int n_row0, int n_col0, 13 | double dx, double dy, double min_x, double min_y); 14 | 15 | virtual ~RLSolver_2DTable() {} 16 | virtual void train(); 17 | virtual void test(); 18 | void replayExperience(); 19 | virtual void updateQ(Experience); 20 | virtual void getMaxQ(const State& S, double* Q, int* action) const; 21 | arma::cube& getQTable(){return QTable;} 22 | virtual void loadQTable(std::string filetag); 23 | private: 24 | void outputPolicy(); 25 | void outputQ(std::string filename); 26 | void writeTrajectory(int iter, std::ostream &os, int action, State state, double reward) const; 27 | std::pair stateToIndex(const State & S) const; 28 | arma::cube QTable; 29 | int n_rows, n_cols, numActions; 30 | double dx1, dx2, minx1, minx2; 31 | arma::Mat count; 32 | std::vector experienceVec; 33 | }; 34 | } 35 | -------------------------------------------------------------------------------- /src/test/NN-RL/elman.prototxt: -------------------------------------------------------------------------------- 1 | rnnStruct{ 2 | numRecurrLayers: 1 3 | recurrLayerInputDim: 8 4 | recurrLayerOutputDim: 8 5 | inputDim: 2 6 | activationType: tanh 7 | init_W_one{ 8 | initializerType: zero 9 | normal_std: 0.1 10 | normal_mean:0.1 11 | } 12 | init_B { 13 | initializerType: zero 14 | } 15 | init_W_two{ 16 | initializerType: glorot_uniform 17 | } 18 | } 19 | 20 | layerStruct{ 21 | name: "BaseLayer2" 22 | inputDim: 9 23 | outputDim: 50 24 | activationType: tanh 25 | init_W{ 26 | initializerType: glorot_uniform 27 | } 28 | init_B { 29 | initializerType: zero 30 | } 31 | } 32 | 33 | layerStruct{ 34 | name: "BaseLayer2" 35 | inputDim: 50 36 | outputDim: 1 37 | activationType: linear 38 | init_W{ 39 | initializerType: glorot_uniform 40 | } 41 | init_B { 42 | initializerType: zero 43 | } 44 | } 45 | 46 | neuralNetTrainingParameter{ 47 | trainerType: SGDRNN 48 | learningRate: 0.01 49 | miniBatchSize: 10 50 | NEpoch: 5000 51 | momentum: 0.95 52 | decayRate: 1000 53 | showGradNorm: false 54 | RNNScanFlag: false 55 | RNNScanStep: 5 56 | RNNTruncateLength: 10 57 | } 58 | 59 | -------------------------------------------------------------------------------- /src/test/NN-RL/mlp.prototxt: -------------------------------------------------------------------------------- 1 | 2 | layerStruct{ 3 | name: "BaseLayer1" 4 | inputDim: 3 5 | outputDim: 10 6 | activationType: tanh 7 | init_W{ 8 | initializerType: normal 9 | normal_mean: 0 10 | normal_std: 0.01 11 | } 12 | init_B { 13 | initializerType: zero 14 | } 15 | } 16 | 17 | layerStruct{ 18 | name: "BaseLayer3" 19 | inputDim: 10 20 | outputDim: 1 21 | activationType: linear 22 | init_W{ 23 | initializerType: normal 24 | normal_mean: 0 25 | normal_std: 0.01 26 | } 27 | init_B { 28 | initializerType: zero 29 | } 30 | } 31 | 32 | neuralNetTrainingParameter{ 33 | trainerType: RMSProp 34 | learningRate: 0.01 35 | miniBatchSize: 100 36 | NEpoch: 100 37 | momentum: 0.90 38 | decayRate: 2000 39 | printInfoFrequency: 10 40 | verbose: true 41 | } 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /src/test/NN-RL/multimlp.prototxt: -------------------------------------------------------------------------------- 1 | 2 | layerStruct{ 3 | name: "BaseLayer1" 4 | inputDim: 2 5 | outputDim: 10 6 | activationType: tanh 7 | init_W{ 8 | initializerType: glorot_uniform 9 | normal_mean: 0 10 | normal_std: 0.1 11 | } 12 | init_B { 13 | initializerType: zero 14 | } 15 | } 16 | 17 | layerStruct{ 18 | name: "BaseLayer1" 19 | inputDim: 10 20 | outputDim: 20 21 | activationType: tanh 22 | init_W{ 23 | initializerType: glorot_uniform 24 | normal_mean: 0 25 | normal_std: 0.1 26 | } 27 | init_B { 28 | initializerType: zero 29 | } 30 | } 31 | layerStruct{ 32 | name: "BaseLayer3" 33 | inputDim: 20 34 | outputDim: 1 35 | activationType: linear 36 | init_W{ 37 | initializerType: glorot_uniform 38 | normal_mean: 0 39 | normal_std: 0.1 40 | } 41 | init_B { 42 | initializerType: zero 43 | } 44 | } 45 | 46 | neuralNetTrainingParameter{ 47 | trainerType: SGD 48 | learningRate: 5 49 | miniBatchSize: 300 50 | NEpoch: 5000 51 | momentum: 0.9 52 | decayRate: 2000 53 | printInfoFrequency: 300 54 | verbose: true 55 | } 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /src/test/NN-RL/net.prototxt: -------------------------------------------------------------------------------- 1 | 2 | layerStruct{ 3 | name: "BaseLayer1" 4 | inputDim: 3 5 | outputDim: 10 6 | activationType: sigmoid 7 | init_W{ 8 | initializerType: glorot_uniform 9 | } 10 | init_B { 11 | initializerType: zero 12 | } 13 | } 14 | 15 | layerStruct{ 16 | name: "BaseLayer2" 17 | inputDim: 10 18 | outputDim: 10 19 | activationType: sigmoid 20 | init_W{ 21 | initializerType: glorot_uniform 22 | } 23 | init_B { 24 | initializerType: zero 25 | } 26 | } 27 | 28 | layerStruct{ 29 | name: "BaseLayer3" 30 | inputDim: 10 31 | outputDim: 1 32 | activationType: linear 33 | init_W{ 34 | initializerType: glorot_uniform 35 | } 36 | init_B { 37 | initializerType: zero 38 | } 39 | } 40 | 41 | neuralNetTrainingParameter{ 42 | 43 | learningRate: 1 44 | miniBatchSize: 10 45 | NEpoch: 500 46 | momentum: 0.9 47 | decayRate: 100 48 | printInfoFrequency: 100 49 | verbose: false 50 | } 51 | -------------------------------------------------------------------------------- /src/test/NN-RL/plotQMap.m: -------------------------------------------------------------------------------- 1 | clear all 2 | close all 3 | 4 | for i = 0 : 2 5 | filename = strcat('QMap',num2str(i),'.dat'); 6 | data{i+1} = load(filename); 7 | figure(i + 1) 8 | imagesc(data{i+1}); 9 | colorbar; 10 | end 11 | 12 | data{4} = load('actionMapNN.dat'); 13 | figure(5) 14 | imagesc(data{4}); 15 | colorbar; -------------------------------------------------------------------------------- /src/test/NN-RL/plotQtable.m: -------------------------------------------------------------------------------- 1 | clear all 2 | close all 3 | 4 | for i = 0 : 2 5 | filename = strcat('QTableFinal',num2str(i),'.dat'); 6 | data{i+1} = load(filename); 7 | figure(i + 1) 8 | imagesc(data{i+1}); 9 | colorbar; 10 | end 11 | 12 | data{4} = load('actionMap.dat'); 13 | figure(5) 14 | imagesc(data{4}); 15 | colorbar; -------------------------------------------------------------------------------- /src/test/NN-RL/qsolver.prototxt: -------------------------------------------------------------------------------- 1 | qLearningSolverParameter{ 2 | learningRate: 0.1 3 | discount: 0.95 4 | numTrainingEpisodes: 200 5 | epsilon: 0.2 6 | EpisodeLength: 1000 7 | QTableOutputInterval: 100 8 | } 9 | 10 | 11 | -------------------------------------------------------------------------------- /src/test/NN-RL/rnn.prototxt: -------------------------------------------------------------------------------- 1 | rnnStruct{ 2 | numHiddenLayers: 2 3 | hiddenLayerInputDim: 20 4 | hiddenLayerOutputDim: 20 5 | inputDim: 3 6 | activationType: tanh 7 | init_W_one{ 8 | initializerType: normal 9 | normal_std: 0.2 10 | normal_mean:0 11 | } 12 | init_B { 13 | initializerType: zero 14 | } 15 | init_W_two{ 16 | initializerType: glorot_uniform 17 | } 18 | } 19 | 20 | layerStruct{ 21 | name: "BaseLayer2" 22 | inputDim: 20 23 | outputDim: 1 24 | activationType: linear 25 | init_W{ 26 | initializerType: glorot_uniform 27 | } 28 | init_B { 29 | initializerType: zero 30 | } 31 | } 32 | 33 | neuralNetTrainingParameter{ 34 | trainerType: SGDRNN 35 | learningRate: 0.05 36 | miniBatchSize: 100 37 | NEpoch: 5000 38 | momentum: 0.90 39 | decayRate: 2500 40 | showGradNorm: false 41 | RNNScanFlag: false 42 | RNNScanStep: 5 43 | RNNTruncateLength: 10 44 | } 45 | 46 | -------------------------------------------------------------------------------- /src/test/Optimization/Makefile: -------------------------------------------------------------------------------- 1 | CPP = g++ 2 | ARMA_INCLUDE=-I~/Downloads/armadillo-5.100.2/include 3 | ARMA_LINKFLAGS= -lblas -llapack 4 | CXXFLAGS = -std=c++0x -I$(ARMA_INCLUDE) -c -DARMA_DONT_USE_WRAPPER 5 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program 6 | LINKFLAGS = -static $(LINKOPTFLAGS) 7 | LINK_DL_FLAGS = $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldl 8 | 9 | 10 | 11 | OBJ = main.o LBFGS.o SteepDescent.o 12 | 13 | test : $(OBJ) 14 | $(CPP) -o test $(OBJ) $(LINKFLAGS) 15 | 16 | clean: 17 | rm -f *.o *~ 18 | -------------------------------------------------------------------------------- /src/test/RNN/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../Makefile.common 3 | 4 | OBJ = test_RNN.o Trainer.o 5 | 6 | test : $(OBJ) 7 | $(CXX) -o $@ $(OBJ) $(LDFLAG) 8 | Trainer.o : ../Trainer/Trainer.cpp 9 | 10 | $(CXX) -c $(CXXFLAGS) $^ 11 | clean: 12 | rm -f *.o *~ 13 | -------------------------------------------------------------------------------- /src/test/RNN/RLtest2.prototxt: -------------------------------------------------------------------------------- 1 | rnnStruct{ 2 | numRecurrLayers: 2 3 | recurrLayerInputDim: 8 4 | recurrLayerOutputDim: 8 5 | inputDim: 3 6 | activationType: tanh 7 | init_W_one{ 8 | initializerType: normal 9 | normal_std: 0.2 10 | normal_mean:0 11 | } 12 | init_B { 13 | initializerType: zero 14 | } 15 | init_W_two{ 16 | initializerType: glorot_uniform 17 | } 18 | } 19 | 20 | layerStruct{ 21 | name: "BaseLayer2" 22 | inputDim: 8 23 | outputDim: 1 24 | activationType: linear 25 | init_W{ 26 | initializerType: glorot_uniform 27 | } 28 | init_B { 29 | initializerType: zero 30 | } 31 | } 32 | 33 | neuralNetTrainingParameter{ 34 | trainerType: RMSProp 35 | learningRate: 0.2 36 | miniBatchSize: 100 37 | NEpoch: 5000 38 | momentum: 0.9 39 | decayRate: 2500 40 | showGradNorm: false 41 | RNNScanFlag: false 42 | RNNScanStep: 5 43 | RNNTruncateLength: 10 44 | } 45 | 46 | 47 | -------------------------------------------------------------------------------- /src/test/RNN/gradcheck.prototxt: -------------------------------------------------------------------------------- 1 | rnnStruct{ 2 | numHiddenLayers: 2 3 | hiddenLayerInputDim: 8 4 | hiddenLayerOutputDim: 8 5 | inputDim: 1 6 | activationType: tanh 7 | init_W_one{ 8 | initializerType: normal 9 | normal_std: 0.01 10 | normal_mean:0 11 | } 12 | init_B { 13 | initializerType: glorot_normal 14 | } 15 | init_W_two{ 16 | initializerType: glorot_normal 17 | } 18 | } 19 | 20 | layerStruct{ 21 | name: "BaseLayer2" 22 | inputDim: 8 23 | outputDim: 1 24 | activationType: linear 25 | init_W{ 26 | initializerType: glorot_normal 27 | } 28 | init_B { 29 | initializerType: zero 30 | } 31 | } 32 | 33 | neuralNetTrainingParameter{ 34 | learningRate: 0.01 35 | miniBatchSize: 100 36 | NEpoch: 5000 37 | momentum: 0.9 38 | decayRate: 10000 39 | } 40 | 41 | 42 | -------------------------------------------------------------------------------- /src/test/RNN/net.prototxt: -------------------------------------------------------------------------------- 1 | rnnStruct{ 2 | numHiddenLayers: 3 3 | hiddenLayerInputDim: 8 4 | hiddenLayerOutputDim: 8 5 | inputDim: 1 6 | outputDim: 1 7 | activationType: tanh 8 | init_W_one{ 9 | initializerType: normal 10 | normal_std: 0.01 11 | normal_mean:0 12 | } 13 | init_B { 14 | initializerType: zero 15 | } 16 | init_W_two{ 17 | initializerType: glorot_normal 18 | } 19 | } 20 | 21 | layerStruct{ 22 | name: "BaseLayer2" 23 | inputDim: 8 24 | outputDim: 1 25 | activationType: linear 26 | init_W{ 27 | initializerType: glorot_normal 28 | } 29 | init_B { 30 | initializerType: zero 31 | } 32 | } 33 | 34 | neuralNetTrainingParameter{ 35 | learningRate: 0.01 36 | miniBatchSize: 100 37 | NEpoch: 5000 38 | momentum: 0.9 39 | decayRate: 10000 40 | } 41 | 42 | 43 | -------------------------------------------------------------------------------- /src/test/RNN/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/RNN/test -------------------------------------------------------------------------------- /src/test/RNN/testIntermediate.prototxt: -------------------------------------------------------------------------------- 1 | rnnStruct{ 2 | numRecurrLayers: 2 3 | recurrLayerInputDim: 8 4 | recurrLayerOutputDim: 8 5 | inputDim: 1 6 | activationType: tanh 7 | init_W_one{ 8 | initializerType: normal 9 | normal_std: 0.2 10 | normal_mean:0 11 | } 12 | init_B { 13 | initializerType: zero 14 | } 15 | init_W_two{ 16 | initializerType: glorot_uniform 17 | } 18 | } 19 | 20 | layerStruct{ 21 | name: "BaseLayer2" 22 | inputDim: 8 23 | outputDim: 1 24 | activationType: linear 25 | init_W{ 26 | initializerType: glorot_uniform 27 | } 28 | init_B { 29 | initializerType: zero 30 | } 31 | } 32 | 33 | neuralNetTrainingParameter{ 34 | trainerType: RMSProp 35 | learningRate: 0.1 36 | miniBatchSize: 100 37 | NEpoch: 5000 38 | momentum: 0.9 39 | decayRate: 500 40 | showGradNorm: false 41 | RNNScanFlag: true 42 | RNNScanStep: 5 43 | RNNTruncateLength: 10 44 | } 45 | 46 | 47 | -------------------------------------------------------------------------------- /src/test/RNN/testSimple.prototxt: -------------------------------------------------------------------------------- 1 | rnnStruct{ 2 | numRecurrLayers: 1 3 | recurrLayerInputDim: 8 4 | recurrLayerOutputDim: 8 5 | inputDim: 1 6 | activationType: tanh 7 | init_W_one{ 8 | initializerType: normal 9 | normal_std: 0.2 10 | normal_mean:0 11 | } 12 | init_B { 13 | initializerType: zero 14 | } 15 | init_W_two{ 16 | initializerType: glorot_uniform 17 | } 18 | } 19 | 20 | layerStruct{ 21 | name: "BaseLayer2" 22 | inputDim: 8 23 | outputDim: 1 24 | activationType: linear 25 | init_W{ 26 | initializerType: glorot_uniform 27 | } 28 | init_B { 29 | initializerType: zero 30 | } 31 | } 32 | 33 | neuralNetTrainingParameter{ 34 | trainerType: RMSProp 35 | learningRate: 0.1 36 | miniBatchSize: 100 37 | NEpoch: 5000 38 | momentum: 0.9 39 | decayRate: 500 40 | showGradNorm: false 41 | RNNScanFlag: true 42 | RNNScanStep: 5 43 | RNNTruncateLength: 10 44 | } 45 | 46 | 47 | -------------------------------------------------------------------------------- /src/test/RNN/testlittleTimer.prototxt: -------------------------------------------------------------------------------- 1 | rnnStruct{ 2 | numRecurrLayers: 2 3 | recurrLayerInputDim: 8 4 | recurrLayerOutputDim: 8 5 | inputDim: 2 6 | activationType: tanh 7 | init_W_one{ 8 | initializerType: normal 9 | normal_std: 0.2 10 | normal_mean:0 11 | } 12 | init_B { 13 | initializerType: zero 14 | } 15 | init_W_two{ 16 | initializerType: glorot_uniform 17 | } 18 | } 19 | 20 | layerStruct{ 21 | name: "BaseLayer2" 22 | inputDim: 8 23 | outputDim: 1 24 | activationType: linear 25 | init_W{ 26 | initializerType: glorot_uniform 27 | } 28 | init_B { 29 | initializerType: zero 30 | } 31 | } 32 | 33 | neuralNetTrainingParameter{ 34 | trainerType: RMSProp 35 | learningRate: 0.2 36 | miniBatchSize: 100 37 | NEpoch: 5000 38 | momentum: 0.9 39 | decayRate: 2500 40 | showGradNorm: false 41 | RNNScanFlag: true 42 | RNNScanStep: 5 43 | RNNTruncateLength: 10 44 | } 45 | 46 | 47 | -------------------------------------------------------------------------------- /src/test/RNNtestRLSet/prediction: -------------------------------------------------------------------------------- 1 | -0.4582 -0.0240 -0.1143 -0.1255 -0.1676 -0.1394 -0.1225 -0.0605 0.0110 0.1483 0.0870 -0.3770 -0.4164 -0.4211 -0.4212 -0.4210 -0.4209 -0.4207 -0.4206 -0.4204 -0.4202 -0.4200 -0.4198 -0.4195 -0.4192 -0.4190 -0.4186 -0.4183 -0.4179 -0.4175 -0.4170 -0.4165 -0.4159 -0.4153 -0.4146 -0.4138 -0.4129 -0.4119 -0.4108 -0.4096 -0.4082 -0.4066 -0.4047 -0.4026 2 | 0.0896 0.0588 0.0238 0.0247 0.0246 0.0245 0.0244 0.0242 0.0240 -0.0119 0.0179 0.0223 0.0227 0.0222 0.0217 0.0211 0.0204 0.0194 0.0183 0.0170 0.0153 0.0132 0.0106 -0.9894 3 | -------------------------------------------------------------------------------- /src/test/Trainer/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../Makefile.common 3 | 4 | OBJ = test_trainer.o Trainer.o MultiLayerPerceptron.o 5 | 6 | test_IO : $(OBJ) 7 | $(CXX) -o test $(OBJ) $(LDFLAG) 8 | MultiLayerPerceptron.o : ../MultiLayerPerceptron/MultiLayerPerceptron.cpp 9 | $(CXX) -c $(CXXFLAGS) $^ 10 | clean: 11 | rm -f *.o *~ 12 | -------------------------------------------------------------------------------- /src/test/Trainer/net.prototxt: -------------------------------------------------------------------------------- 1 | layerStruct{ 2 | name: "BaseLayer1" 3 | inputDim: 3 4 | outputDim: 8 5 | activationType: sigmoid 6 | } 7 | 8 | layerStruct{ 9 | name: "BaseLayer2" 10 | inputDim: 8 11 | outputDim: 1 12 | activationType: sigmoid 13 | } 14 | 15 | neuralNetTrainingParameter{ 16 | learningRate: 0.1 17 | miniBatchSize: 10 18 | NEpoch: 20 19 | momentum: 0.9 20 | decayRate: 1000 21 | } 22 | -------------------------------------------------------------------------------- /src/test/Trainer/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepIntelligence/DeepLearning/ad6f5594bf57a53f5b7944b48d73c786c1114e83/src/test/Trainer/test -------------------------------------------------------------------------------- /src/test/Trainer/test_trainer.cpp: -------------------------------------------------------------------------------- 1 | #include "Trainer.h" 2 | #include "common.h" 3 | #include "../MultiLayerPerceptron/MultiLayerPerceptron.h" 4 | 5 | using namespace NeuralNet; 6 | using namespace DeepLearning; 7 | int main(int argc, char* argv[]){ 8 | 9 | if (argc < 2) exit(1); 10 | 11 | NeuralNetParameter message; 12 | ReadProtoFromTextFile(argv[1], &message); 13 | 14 | std::shared_ptr DataX(new arma::mat); 15 | std::shared_ptr DataY(new arma::mat); 16 | std::shared_ptr trainDataX(new arma::mat); 17 | std::shared_ptr trainDataY(new arma::mat); 18 | std::shared_ptr testDataX(new arma::mat); 19 | std::shared_ptr testDataY(new arma::mat); 20 | std::shared_ptr ValidationDataX(new arma::mat); 21 | std::shared_ptr ValidationDataY(new arma::mat); 22 | 23 | loadData_MNIST(DataX,DataY,(std::string)argv[2]); 24 | 25 | int ntrain =2000; 26 | int ntest = 1000; 27 | // now I split data into train, test, and validation 28 | trainDataX = std::make_shared(DataX->cols(0,ntrain-1)); 29 | trainDataY = std::make_shared(DataY->cols(0,ntrain-1)); 30 | testDataX = std::make_shared(DataX->cols(ntrain,ntrain+ntest-1)); 31 | testDataY = std::make_shared(DataY->cols(ntrain,ntrain+ntest-1)); 32 | 33 | 34 | std::shared_ptr mlp(new MultiLayerPerceptron(message)); 35 | std::shared_ptr trainer( TrainerBuilder::GetTrainer(mlp,message)); 36 | trainer->setTrainingSamples(trainDataX, trainDataY); 37 | trainer->train(); 38 | return 0; 39 | } -------------------------------------------------------------------------------- /src/test/TwolayerPerceptron/Makefile: -------------------------------------------------------------------------------- 1 | # this make file now is dynamically linking 2 | #OPTFLAGS = -O3 -march=nocona -mfpmath=sse -msse3 -Wuninitialized -flto 3 | #MKL_INCLUDE = /opt/intel/mkl/include/ 4 | #MKL_INCLUDE = /opt/intel/composer_xe_2013.0.079/mkl/include/ 5 | MKL_INCLUDE = /opt/intel/composerxe/mkl/include 6 | 7 | #CFLAGS = -std=c++0x -I/g/ssli/software/pkgs/boost_1_49_0 -c -DNDEBUG -D__LINUX $(OPTFLAGS) 8 | CPP = g++ 9 | #MKLROOT = /opt/intel/mkl 10 | #MKLROOT = /opt/intel/composer_xe_2013.0.079/mkl 11 | MKLROOT = /opt/intel/composerxe/mkl 12 | MKLLINKFLAGS = -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread 13 | MKL_DL_LINKFLAGS = -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread -lm 14 | ARMA_INCLUDE=-I~/Downloads/armadillo-5.100.2/include/armadillo 15 | ARMA_LINKFLAGS=-L/usr/lib -L/opt/intel/mkl/lib/intel64 -larmadillo -lmkl_rt -llapack -lopenblas 16 | #MKLLINKFLAGS = -Wl,--start-group -Wl,--end-group -lpthread 17 | CXXFLAGS = -std=c++0x -I$(MKL_INCLUDE) $(ARMA_INCLUDE) -I/opt/boost/boost_1_57_0 -c -DDEBUG -D__LINUX -g3 18 | LINKOPTFLAGS = -O3 -flto=4 -fwhole-program 19 | #LINKFLAGS = -static $(LINKOPTFLAGS) $(MKLLINKFLAGS) -ldl 20 | LINK_DL_FLAGS = $(MKL_DL_LINKFLAGS) $(ARMA_LINKFLAGS) -ldl 21 | #LINKFLAGS = 22 | #ODIR=obj 23 | ODIR = 24 | 25 | 26 | OBJ = mainSDA.o MultiLayerPerceptron.o BaseLayer.o 27 | 28 | test : $(OBJ) 29 | $(CPP) -o test $(OBJ) $(LINK_DL_FLAGS) 30 | 31 | BaseLayer.o : ../BaseLayer/BaseLayer.cpp ../BaseLayer/BaseLayer.h 32 | $(CPP) -c $(CXXFLAGS) $< 33 | 34 | clean: 35 | rm -f *.o *~ 36 | -------------------------------------------------------------------------------- /src/test/TwolayerPerceptron/MultiLayerPerceptron.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "MultiLayerPerceptron.h" 3 | 4 | MultiLayerPerceptron::MultiLayerPerceptron(int inputDim0, int outputDim0, int hiddenDim0, std::shared_ptr trainingX0, 5 | std::shared_ptr trainingY0, TrainingPara trainingPara0) { 6 | 7 | 8 | inputDim = inputDim0; 9 | hiddenDim = hiddenDim0; 10 | outputDim = outputDim0; 11 | numLayers = 2; 12 | trainingX = trainingX0; 13 | trainingY = trainingY0; 14 | numInstance = trainingX->n_rows; 15 | trainingPara = trainingPara0; 16 | 17 | layers.push_back(BaseLayer(inputDim,hiddenDim,BaseLayer::sigmoid)); 18 | layers.push_back(BaseLayer(hiddenDim,outputDim,BaseLayer::softmax)); 19 | // layers[0].W.print("layer 0 W"); 20 | // layers[0].B.print("layer 0 B"); 21 | // layers[1].W.print("layer 1 W"); 22 | // layers[1].B.print("layer 1 B"); 23 | } 24 | 25 | 26 | void MultiLayerPerceptron::train() { 27 | // Here I used stochastic gradient descent 28 | // first do the forward propagate 29 | trainingPara.print(); 30 | int ntimes = numInstance / trainingPara.miniBatchSize; 31 | std::shared_ptr subInputX, subInputY; 32 | double errorTotal; 33 | int size = trainingPara.miniBatchSize; 34 | double alpha = trainingPara.alpha / size; 35 | for(int epoch = 0; epoch < trainingPara.NEpoch; epoch++) { 36 | std::cout << epoch << std::endl; 37 | errorTotal = 0.0; 38 | for (int i = 0; i < ntimes; i++) { 39 | // first do the propogation 40 | subInputX = std::make_shared(trainingX->rows(i*size,(i+1)*size-1)); 41 | subInputY = std::make_shared(trainingY->rows(i*size,(i+1)*size-1)); 42 | 43 | layers[0].inputX = subInputX; 44 | layers[0].activateUp(subInputX); 45 | layers[1].inputX = layers[0].outputY; 46 | layers[1].activateUp(layers[1].inputX); 47 | // layers[0].outputY->print("layer0 outputY:"); 48 | // layers[1].outputY->print("layer1 outputY:"); 49 | // std::shared_ptr predictY = layers[1].outputY; 50 | arma::mat sigmoid_deriv2 = (*(layers[1].outputY)) % (1-*(layers[1].outputY)); 51 | arma::mat delta2 = ((-*subInputY + *(layers[1].outputY)).st()) % sigmoid_deriv2.st(); 52 | arma::mat grad1 = delta2 * (*(layers[1].inputX)); 53 | arma::vec deltaSum2 = arma::sum(delta2,1); 54 | 55 | arma::mat errortemp = (-*subInputY + *(layers[1].outputY)).st(); 56 | // errortemp.print(); 57 | arma::vec error = arma::sum(errortemp,1); 58 | // error.print(); 59 | // deltaSum2.print(); 60 | errorTotal += arma::as_scalar(error.st() * error); 61 | *(layers[1].W) -= alpha*grad1; 62 | *(layers[1].B) -= alpha*deltaSum2; 63 | 64 | 65 | // delta0 should have the dimension of hidden Dimension 66 | arma::mat sigmoid_deriv1 = (*(layers[0].outputY)) % (1-*(layers[0].outputY)); 67 | arma::mat delta1 = ( (layers[1].W)->st() * delta2) % sigmoid_deriv1.st(); 68 | arma::mat grad0 = delta1 * (*(layers[0].inputX)); 69 | arma::vec deltaSum1 = arma::sum(delta1,1); 70 | *(layers[0].W) -= alpha*grad0; 71 | *(layers[0].B) -= alpha*deltaSum1; 72 | 73 | } 74 | std::cout << "error is: " << errorTotal << std::endl; 75 | } 76 | 77 | // layers[1].outputY->print("final prediction"); 78 | } 79 | 80 | //if(converge(W_aug_old,W_aug_new)) break; 81 | 82 | 83 | void MultiLayerPerceptron::test(std::shared_ptr trainingX,std::shared_ptr trainingY) { 84 | layers[0].inputX = trainingX; 85 | layers[0].activateUp(trainingX); 86 | layers[1].inputX = layers[0].outputY; 87 | layers[1].activateUp(layers[1].inputX); 88 | layers[1].outputY->save("testoutput.txt",arma::raw_ascii); 89 | 90 | } 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /src/test/TwolayerPerceptron/MultiLayerPerceptron.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "../BaseLayer/BaseLayer.h" 4 | 5 | 6 | struct TrainingPara { 7 | 8 | TrainingPara(double eps0=1e-6, int NEpoch0 = 500, 9 | int miniBatchSize0 = 10, double alpha0 = 0.1): 10 | eps(eps0),NEpoch(NEpoch0), 11 | miniBatchSize(miniBatchSize0), alpha(alpha0) {} 12 | 13 | 14 | double eps; 15 | int NEpoch; 16 | int miniBatchSize; 17 | double alpha; 18 | // Method method; 19 | void print() const { 20 | 21 | std::cout << eps << "\t"; 22 | std::cout << NEpoch << "\t"; 23 | std::cout << miniBatchSize << "\t"; 24 | std::cout << alpha << std::endl; 25 | 26 | } 27 | }; 28 | 29 | 30 | 31 | class MultiLayerPerceptron { 32 | public: 33 | MultiLayerPerceptron(int inputDim0, int outputDim0, int hiddenDim0, std::shared_ptr trainingX0, 34 | std::shared_ptr trainingY0, TrainingPara trainingPara); 35 | 36 | void train(); 37 | void initialize(); 38 | void test(std::shared_ptr trainingX,std::shared_ptr trainingY); 39 | private: 40 | bool converge(); 41 | TrainingPara trainingPara; 42 | int numLayers; 43 | int inputDim; 44 | int hiddenDim; 45 | int outputDim; 46 | int numInstance; 47 | std::vector layers; 48 | std::shared_ptr trainingX; 49 | std::shared_ptr trainingY; 50 | 51 | 52 | }; -------------------------------------------------------------------------------- /src/test/TwolayerPerceptron/mainSDA.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "MultiLayerPerceptron.h" 9 | 10 | 11 | 12 | 13 | void loadData_MNIST(std::shared_ptr X, 14 | std::shared_ptr Y); 15 | 16 | int main(int argc, char *argv[]) { 17 | std::shared_ptr trainDataX(new arma::mat); 18 | std::shared_ptr trainDataY(new arma::mat); 19 | loadData_MNIST(trainDataX,trainDataY); 20 | 21 | int inputDim = trainDataX->n_cols; 22 | int outputDim = trainDataY->n_cols; 23 | int hiddenDim = 100; 24 | std::cout << inputDim << std::endl; 25 | std::cout << outputDim << std::endl; 26 | std::cout << trainDataX->n_rows << std::endl; 27 | std::cout << trainDataY->n_rows << std::endl; 28 | trainDataX->save("trainingSamples.txt",arma::raw_ascii); 29 | TrainingPara trainingPara(1e-6,100, 10, 0.5); 30 | trainingPara.print(); 31 | MultiLayerPerceptron mlp(inputDim, outputDim, hiddenDim, trainDataX, trainDataY, trainingPara); 32 | 33 | mlp.train(); 34 | 35 | mlp.test(trainDataX,trainDataY); 36 | // after training i do some testing 37 | 38 | } 39 | 40 | 41 | void loadData_MNIST(std::shared_ptr X, 42 | std::shared_ptr Y) { 43 | 44 | std::string filename_base("../MNIST/data"); 45 | std::string filename; 46 | char tag[50]; 47 | char x; 48 | int count; 49 | int numFiles = 10; 50 | int featSize = 28*28; 51 | int labelSize = 10; 52 | int numSamples = 100; 53 | X->set_size(numFiles*numSamples,featSize); 54 | Y->set_size(numFiles*numSamples,labelSize); 55 | Y->fill(0); 56 | // std::cout << Y.Len() << std::endl; 57 | // std::cout << X.NumR() << std::endl; 58 | // std::cout << X.NumC() << std::endl; 59 | 60 | for (int i = 0 ; i < numFiles ; i++) { 61 | sprintf(tag,"%d",i); 62 | filename=filename_base+(std::string)tag; 63 | std::cout << filename << std::endl; 64 | std::ifstream infile; 65 | infile.open(filename,std::ios::binary | std::ios::in); 66 | if (infile.is_open()) { 67 | 68 | for (int j = 0 ; j < numSamples ; j++) { 69 | 70 | for (int k =0 ; k 2 | #include "Util.h" 3 | 4 | 5 | using namespace NeuralNet; 6 | 7 | int main(){ 8 | 9 | Random_Bernoulli r(0.5); 10 | 11 | for(int i = 0; i < 100; i++) 12 | std::cout << r.next() << std::endl; 13 | 14 | 15 | Random_Bernoulli r2(0.5); 16 | 17 | int p[25]; 18 | for(int i = 0; i < 25; i++ ){ 19 | p[i] = 1; 20 | } 21 | 22 | r2.modifier(p,25); 23 | std::cout << "second" << std::endl; 24 | for(int i = 0; i < 25; i++) 25 | std::cout << p[i] << std::endl; 26 | 27 | 28 | Random_Bernoulli r3(0.5); 29 | 30 | unsigned long long p2[25]; 31 | for(int i = 0; i < 25; i++ ){ 32 | p2[i] = 1; 33 | } 34 | 35 | r3.modifier(p2,25); 36 | std::cout << "third" << std::endl; 37 | for(int i = 0; i < 25; i++) 38 | std::cout << p2[i] << std::endl; 39 | 40 | std::shared_ptr X, Y; 41 | std::string filename=""; 42 | 43 | loadData_MNIST(X, Y, filename); 44 | return 0; 45 | } 46 | 47 | -------------------------------------------------------------------------------- /src/test/arma/Makefile: -------------------------------------------------------------------------------- 1 | # You may need to edit this file to reflect the type and capabilities of your system. 2 | # The defaults are for a Linux system and may need to be changed for other systems (eg. Mac OS X). 3 | 4 | 5 | CXX=g++ 6 | 7 | #CXX=CC 8 | ## When using the Sun Studio compiler 9 | 10 | 11 | ARMA_INCLUDE_FLAG = -I ../include 12 | ## If you've installed Armadillo's headers manually, you may need to tell the compiler where they are. 13 | ## For example, change ../include to /usr/local/include 14 | 15 | 16 | LIB_FLAGS = -lblas -llapack 17 | #LIB_FLAGS = -lopenblas -llapack 18 | #LIB_FLAGS = -framework Accelerate 19 | #LIB_FLAGS = -library=sunperf 20 | 21 | ## NOTE: on Ubuntu and Debian based systems you may need to add -lgfortran to LIB_FLAGS 22 | ## NOTE: if you're using Mac OS, use the line with -framework Accelerate 23 | ## NOTE: if you're using the Sun Studio compiler, use the line with -library=sunperf 24 | 25 | 26 | OPT = -O2 27 | ## As the Armadillo library uses recursive templates, compilation times depend on the level of optimisation: 28 | ## 29 | ## -O0: quick compilation, but the resulting program will be slow 30 | ## -O1: good trade-off between compilation time and execution speed 31 | ## -O2: produces programs which have almost all possible speedups, but compilation takes longer 32 | ## -O3: enables auto vectorisation when using gcc 33 | 34 | #OPT = -xO4 -xannotate=no 35 | ## When using the Sun Studio compiler 36 | 37 | 38 | #EXTRA_OPT = -fwhole-program 39 | ## Uncomment the above line if you're compiling all source files into one program in a single hit 40 | 41 | 42 | #DEBUG = -DARMA_EXTRA_DEBUG 43 | ## Uncomment the above line to enable low-level debugging. 44 | ## Lots of debugging information will be printed when a compiled program is run. 45 | ## Please enable this option when reporting bugs. 46 | 47 | 48 | #FINAL = -DARMA_NO_DEBUG 49 | ## Uncomment the above line to disable Armadillo's checks. 50 | ## Not recommended unless your code has been first thoroughly tested! 51 | 52 | 53 | CXXFLAGS = $(ARMA_INCLUDE_FLAG) $(DEBUG) $(FINAL) $(OPT) $(EXTRA_OPT) 54 | 55 | all: example1 56 | 57 | example1: example1.cpp 58 | $(CXX) $(CXXFLAGS) -o $@ $< $(LIB_FLAGS) 59 | 60 | 61 | .PHONY: clean 62 | 63 | clean: 64 | rm -f example1 65 | 66 | -------------------------------------------------------------------------------- /src/test/arma/example1.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace std; 5 | using namespace arma; 6 | 7 | 8 | int 9 | main(int argc, char** argv) 10 | { 11 | cout << "Armadillo version: " << arma_version::as_string() << endl; 12 | 13 | mat A(2,3); // directly specify the matrix size (elements are uninitialised) 14 | 15 | cout << "A.n_rows: " << A.n_rows << endl; // .n_rows and .n_cols are read only 16 | cout << "A.n_cols: " << A.n_cols << endl; 17 | 18 | A(1,2) = 456.0; // directly access an element (indexing starts at 0) 19 | A.print("A:"); 20 | 21 | 22 | A = 5.0; // scalars are treated as a 1x1 matrix 23 | A.print("A:"); 24 | 25 | 26 | A.set_size(4,5); // change the size (data is not preserved) 27 | 28 | A.fill(5.0); // set all elements to a particular value 29 | A.print("A:"); 30 | 31 | // endr indicates "end of row" 32 | A << 0.165300 << 0.454037 << 0.995795 << 0.124098 << 0.047084 << endr 33 | << 0.688782 << 0.036549 << 0.552848 << 0.937664 << 0.866401 << endr 34 | << 0.348740 << 0.479388 << 0.506228 << 0.145673 << 0.491547 << endr 35 | << 0.148678 << 0.682258 << 0.571154 << 0.874724 << 0.444632 << endr 36 | << 0.245726 << 0.595218 << 0.409327 << 0.367827 << 0.385736 << endr; 37 | 38 | A.print("A:"); 39 | 40 | // determinant 41 | cout << "det(A): " << det(A) << endl; 42 | 43 | // inverse 44 | cout << "inv(A): " << endl << inv(A) << endl; 45 | 46 | // save matrix as a file 47 | A.save("A.txt", raw_ascii); 48 | 49 | // load from file 50 | mat B; 51 | B.load("A.txt"); 52 | 53 | // submatrices 54 | cout << "B( span(0,2), span(3,4) ):" << endl << B( span(0,2), span(3,4) ) << endl; 55 | 56 | cout << "B.row(0): " << endl << B.row(0) << endl; 57 | 58 | cout << "B.col(1): " << endl << B.col(1) << endl; 59 | 60 | // transpose 61 | cout << "B.t(): " << endl << B.t() << endl; 62 | 63 | // maximum from each column (traverse along rows) 64 | cout << "max(B): " << endl << max(B) << endl; 65 | 66 | // maximum from each row (traverse along columns) 67 | cout << "max(B,1): " << endl << max(B,1) << endl; 68 | 69 | // maximum value in B 70 | cout << "max(max(B)) = " << max(max(B)) << endl; 71 | 72 | // sum of each column (traverse along rows) 73 | cout << "sum(B): " << endl << sum(B) << endl; 74 | 75 | // sum of each row (traverse along columns) 76 | cout << "sum(B,1) =" << endl << sum(B,1) << endl; 77 | 78 | // sum of all elements 79 | cout << "accu(B): " << accu(B) << endl; 80 | 81 | // trace = sum along diagonal 82 | cout << "trace(B): " << trace(B) << endl; 83 | 84 | // generate the identity matrix 85 | mat C = eye(4,4); 86 | 87 | // random matrix with values uniformly distributed in the [0,1] interval 88 | mat D = randu(4,4); 89 | D.print("D:"); 90 | 91 | // row vectors are treated like a matrix with one row 92 | rowvec r; 93 | r << 0.59119 << 0.77321 << 0.60275 << 0.35887 << 0.51683; 94 | r.print("r:"); 95 | 96 | // column vectors are treated like a matrix with one column 97 | colvec q; 98 | q << 0.14333 << 0.59478 << 0.14481 << 0.58558 << 0.60809; 99 | q.print("q:"); 100 | 101 | // dot or inner product 102 | cout << "as_scalar(r*q): " << as_scalar(r*q) << endl; 103 | 104 | // outer product 105 | cout << "q*r: " << endl << q*r << endl; 106 | 107 | // multiply-and-accumulate operation (no temporary matrices are created) 108 | cout << "accu(A % B) = " << accu(A % B) << endl; 109 | 110 | // example of a compound operation 111 | B += 2.0 * A.t(); 112 | B.print("B:"); 113 | 114 | // imat specifies an integer matrix 115 | imat AA; 116 | imat BB; 117 | 118 | AA << 1 << 2 << 3 << endr << 4 << 5 << 6 << endr << 7 << 8 << 9; 119 | BB << 3 << 2 << 1 << endr << 6 << 5 << 4 << endr << 9 << 8 << 7; 120 | 121 | // comparison of matrices (element-wise); output of a relational operator is a umat 122 | umat ZZ = (AA >= BB); 123 | ZZ.print("ZZ:"); 124 | 125 | // cubes ("3D matrices") 126 | cube Q( B.n_rows, B.n_cols, 2 ); 127 | 128 | Q.slice(0) = B; 129 | Q.slice(1) = 2.0 * B; 130 | 131 | Q.print("Q:"); 132 | 133 | // 2D field of arbitrary length row vectors (fields can also store abitrary objects, eg. instances of std::string) 134 | field xyz(3,2); 135 | 136 | xyz(0,0) = randu(1,2); 137 | xyz(1,0) = randu(1,3); 138 | xyz(2,0) = randu(1,4); 139 | xyz(0,1) = randu(1,5); 140 | xyz(1,1) = randu(1,6); 141 | xyz(2,1) = randu(1,7); 142 | 143 | cout << "xyz:" << endl; 144 | cout << xyz << endl; 145 | 146 | return 0; 147 | } 148 | 149 | -------------------------------------------------------------------------------- /src/test/example1.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace std; 5 | using namespace arma; 6 | 7 | 8 | int 9 | main(int argc, char** argv) 10 | { 11 | cout << "Armadillo version: " << arma_version::as_string() << endl; 12 | 13 | mat A(2,3); // directly specify the matrix size (elements are uninitialised) 14 | 15 | cout << "A.n_rows: " << A.n_rows << endl; // .n_rows and .n_cols are read only 16 | cout << "A.n_cols: " << A.n_cols << endl; 17 | 18 | A(1,2) = 456.0; // directly access an element (indexing starts at 0) 19 | A.print("A:"); 20 | 21 | 22 | A = 5.0; // scalars are treated as a 1x1 matrix 23 | A.print("A:"); 24 | 25 | 26 | A.set_size(4,5); // change the size (data is not preserved) 27 | 28 | A.fill(5.0); // set all elements to a particular value 29 | A.print("A:"); 30 | 31 | // endr indicates "end of row" 32 | A << 0.165300 << 0.454037 << 0.995795 << 0.124098 << 0.047084 << endr 33 | << 0.688782 << 0.036549 << 0.552848 << 0.937664 << 0.866401 << endr 34 | << 0.348740 << 0.479388 << 0.506228 << 0.145673 << 0.491547 << endr 35 | << 0.148678 << 0.682258 << 0.571154 << 0.874724 << 0.444632 << endr 36 | << 0.245726 << 0.595218 << 0.409327 << 0.367827 << 0.385736 << endr; 37 | 38 | A.print("A:"); 39 | /* 40 | // determinant 41 | //cout << "det(A): " << det(A) << endl; 42 | 43 | // inverse 44 | // cout << "inv(A): " << endl << inv(A) << endl; 45 | */ 46 | // save matrix as a file 47 | A.save("A.txt", raw_ascii); 48 | 49 | // load from file 50 | mat B; 51 | B.load("A.txt"); 52 | 53 | // submatrices 54 | cout << "B( span(0,2), span(3,4) ):" << endl << B( span(0,2), span(3,4) ) << endl; 55 | 56 | cout << "B.row(0): " << endl << B.row(0) << endl; 57 | 58 | cout << "B.col(1): " << endl << B.col(1) << endl; 59 | 60 | // transpose 61 | cout << "B.t(): " << endl << B.t() << endl; 62 | 63 | // maximum from each column (traverse along rows) 64 | cout << "max(B): " << endl << max(B) << endl; 65 | 66 | // maximum from each row (traverse along columns) 67 | cout << "max(B,1): " << endl << max(B,1) << endl; 68 | 69 | // maximum value in B 70 | cout << "max(max(B)) = " << max(max(B)) << endl; 71 | 72 | // sum of each column (traverse along rows) 73 | cout << "sum(B): " << endl << sum(B) << endl; 74 | 75 | // sum of each row (traverse along columns) 76 | cout << "sum(B,1) =" << endl << sum(B,1) << endl; 77 | 78 | // sum of all elements 79 | cout << "accu(B): " << accu(B) << endl; 80 | 81 | // trace = sum along diagonal 82 | cout << "trace(B): " << trace(B) << endl; 83 | 84 | // generate the identity matrix 85 | mat C = eye(4,4); 86 | 87 | // random matrix with values uniformly distributed in the [0,1] interval 88 | mat D = randu(4,4); 89 | D.print("D:"); 90 | 91 | // row vectors are treated like a matrix with one row 92 | rowvec r; 93 | r << 0.59119 << 0.77321 << 0.60275 << 0.35887 << 0.51683; 94 | r.print("r:"); 95 | 96 | // column vectors are treated like a matrix with one column 97 | colvec q; 98 | q << 0.14333 << 0.59478 << 0.14481 << 0.58558 << 0.60809; 99 | q.print("q:"); 100 | 101 | // dot or inner product 102 | cout << "as_scalar(r*q): " << as_scalar(r*q) << endl; 103 | 104 | // outer product 105 | cout << "q*r: " << endl << q*r << endl; 106 | 107 | // multiply-and-accumulate operation (no temporary matrices are created) 108 | cout << "accu(A % B) = " << accu(A % B) << endl; 109 | 110 | // example of a compound operation 111 | B += 2.0 * A.t(); 112 | B.print("B:"); 113 | 114 | // imat specifies an integer matrix 115 | imat AA; 116 | imat BB; 117 | 118 | AA << 1 << 2 << 3 << endr << 4 << 5 << 6 << endr << 7 << 8 << 9; 119 | BB << 3 << 2 << 1 << endr << 6 << 5 << 4 << endr << 9 << 8 << 7; 120 | 121 | // comparison of matrices (element-wise); output of a relational operator is a umat 122 | umat ZZ = (AA >= BB); 123 | ZZ.print("ZZ:"); 124 | 125 | // cubes ("3D matrices") 126 | cube Q( B.n_rows, B.n_cols, 2 ); 127 | 128 | Q.slice(0) = B; 129 | Q.slice(1) = 2.0 * B; 130 | 131 | Q.print("Q:"); 132 | /* 133 | // 2D field of arbitrary length row vectors (fields can also store abitrary objects, eg. instances of std::string) 134 | field xyz(3,2); 135 | 136 | xyz(0,0) = randu(1,2); 137 | xyz(1,0) = randu(1,3); 138 | xyz(2,0) = randu(1,4); 139 | xyz(0,1) = randu(1,5); 140 | xyz(1,1) = randu(1,6); 141 | xyz(2,1) = randu(1,7); 142 | 143 | cout << "xyz:" << endl; 144 | cout << xyz << endl; 145 | */ 146 | return 0; 147 | } 148 | 149 | --------------------------------------------------------------------------------