├── saveData ├── saveNetWork.h └── saveNetWork.cpp ├── readData ├── readNetWork.h ├── readCifar10Data.h ├── readMnistData.h ├── readCifar100.h ├── readNetWork.cpp ├── readCifar10Data.cpp ├── readCifar100.cpp └── readMnistData.cpp ├── examples ├── cifar100 │ ├── cifar100.h │ └── cifar100.cpp ├── mnist │ ├── mnist.h │ └── mnist.cpp ├── dynamic_g_model │ ├── dynamic_g_entry.hpp │ └── dynamic_g_entry.cpp └── cifar10 │ ├── cifar-10.h │ └── cifar-10.cpp ├── dataAugmentation ├── dataProcess.h └── dataProcess.cu ├── layers ├── ShareLayer.cu ├── VoteLayer.h ├── ShareLayer.h ├── InceptionLayer.h ├── ActivationLayer.h ├── LRNLayer.h ├── DropOutLayer.h ├── DataLayer.h ├── PoolLayer.h ├── HiddenLayer.h ├── SoftMaxLayer.h ├── LayersBase.h ├── InceptionLayer.cu ├── ConvLayer.h ├── VoteLayer.cu ├── LayersBase.cpp ├── DataLayer.cu ├── DropOutLayer.cu ├── LRNLayer.cu └── ActivationLayer.cu ├── data ├── cifar100 │ └── get_cifar100.sh ├── cifar-10 │ └── get_cifar10.sh └── mnist │ └── get_mnist.sh ├── tests ├── test_layer.h └── test_layer.cpp ├── columnNet.cuh ├── .gitigore ├── main.cpp ├── common ├── MemoryMonitor.h ├── cuMatrixVector.h ├── MemoryMonitor.cpp ├── utility.cuh ├── cuBaseVector.h ├── checkError.h ├── cuMatrix.h └── utility.cu ├── composite ├── Concat.h ├── Inception.h ├── Concat.cu ├── Inception.cpp ├── NodeFission.cpp └── NodeFission.h ├── CMakeLists.txt ├── profile ├── dynamic_g_profile │ ├── dynamic_begin │ │ ├── MnistConfig.txt │ │ ├── Cifar10Config.txt │ │ └── Cifar100Config.txt │ └── dynamic_end │ │ ├── MnistConfig.txt │ │ ├── Cifar10Config.txt │ │ └── Cifar100Config.txt ├── MnistConfig.txt ├── Cifar10Config.txt └── Cifar100Config.txt ├── README.md ├── cuDNN_netWork.h └── cuDNN_netWork.cpp /saveData/saveNetWork.h: -------------------------------------------------------------------------------- 1 | /* 2 | * saveNetWork.h 3 | * 4 | * Created on: Jan 9, 2016 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef SAVENETWORK_H_ 9 | #define SAVENETWORK_H_ 10 | 11 | #include 12 | #include"../config/config.h" 13 | #include"../layers/LayersBase.h" 14 | 15 | void saveNetWork(); 16 | 17 | #endif /* SAVENETWORK_H_ */ 18 | -------------------------------------------------------------------------------- /readData/readNetWork.h: -------------------------------------------------------------------------------- 1 | /* 2 | * readNetWork.h 3 | * 4 | * Created on: Jan 10, 2016 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef READNETWORK_H_ 9 | #define READNETWORK_H_ 10 | 11 | #include 12 | #include"../config/config.h" 13 | #include"../layers/LayersBase.h" 14 | 15 | void readNetWork(); 16 | 17 | 18 | 19 | #endif /* READNETWORK_H_ */ 20 | -------------------------------------------------------------------------------- /examples/cifar100/cifar100.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | > File Name: cifar100.h 3 | > Author: TDX 4 | > Mail: sa614149@mail.ustc.edu.cn 5 | > Created Time: 2017年05月11日 星期四 14时54分06秒 6 | ************************************************************************/ 7 | 8 | #ifndef _CIFAR100_H 9 | #define _CIFAR100_H 10 | 11 | void runCifar100(); 12 | #endif 13 | -------------------------------------------------------------------------------- /dataAugmentation/dataProcess.h: -------------------------------------------------------------------------------- 1 | /* 2 | * dataProcess.h 3 | * 4 | * Created on: Apr 23, 2016 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef DATAPROCESS_H_ 9 | #define DATAPROCESS_H_ 10 | 11 | 12 | #include"../common/cuMatrixVector.h" 13 | #include"../common/cuMatrix.h" 14 | 15 | 16 | void dataProcessing(cuMatrixVector& tranSetX, cuMatrixVector& testSetX); 17 | 18 | 19 | #endif /* DATAPROCESS_H_ */ 20 | -------------------------------------------------------------------------------- /examples/mnist/mnist.h: -------------------------------------------------------------------------------- 1 | /* 2 | * mnist.h 3 | * 4 | * Created on: Mar 16, 2016 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef MNIST_H_ 9 | #define MNIST_H_ 10 | 11 | #include 12 | #include"common/cuMatrixVector.h" 13 | #include"common/cuMatrix.h" 14 | #include"readData/readMnistData.h" 15 | #include"config/config.h" 16 | #include"columnNet.cuh" 17 | #include"common/utility.cuh" 18 | 19 | 20 | void runMnist(); 21 | 22 | 23 | 24 | #endif /* MNIST_H_ */ 25 | -------------------------------------------------------------------------------- /examples/dynamic_g_model/dynamic_g_entry.hpp: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | > File Name: dynamic_g_entry.hPP 3 | > Author: TDX 4 | > Mail: sa614149@mail.ustc.edu.cn 5 | > Created Time: 2017年03月21日 星期二 20时10分05秒 6 | ************************************************************************/ 7 | 8 | #ifndef _DYNAMIC_G_ENTRY_HPP 9 | #define _DYNAMIC_G_ENTRY_HPP 10 | 11 | void dynamic_g_entry(); 12 | 13 | 14 | 15 | 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /examples/cifar10/cifar-10.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cifar-10.h 3 | * 4 | * Created on: Mar 16, 2016 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef CIFAR_10_H_ 9 | #define CIFAR_10_H_ 10 | 11 | #include"config/config.h" 12 | #include"readData/readCifar10Data.h" 13 | #include"common/cuMatrixVector.h" 14 | #include"common/cuMatrix.h" 15 | #include"columnNet.cuh" 16 | #include"common/utility.cuh" 17 | 18 | #include 19 | 20 | 21 | void runCifar10(); 22 | 23 | 24 | 25 | #endif /* CIFAR_10_H_ */ 26 | -------------------------------------------------------------------------------- /layers/ShareLayer.cu: -------------------------------------------------------------------------------- 1 | #include"ShareLayer.h" 2 | 3 | ShareLayer::ShareLayer(string name) 4 | { 5 | _name = name; 6 | _inputName = " "; 7 | srcData = NULL; 8 | dstData = NULL; 9 | diffData = NULL; 10 | number = 0; 11 | channels = 0; 12 | height = 0; 13 | width = 0; 14 | nextLayer.clear(); 15 | prevLayer.clear(); 16 | inputImageDim = 0; 17 | inputAmount = 0; 18 | outputSize = 0; 19 | } 20 | 21 | int ShareLayer::getOutputSize() 22 | { 23 | return outputSize; 24 | } 25 | -------------------------------------------------------------------------------- /data/cifar100/get_cifar100.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | # This scripts downloads the CIFAR100 (binary version) data and unzips it. 3 | 4 | DIR="$( cd "$(dirname "$0")" ; pwd -P )" 5 | cd $DIR 6 | 7 | echo "Downloading..." 8 | 9 | wget --no-check-certificate http://www.cs.toronto.edu/~kriz/cifar-100-binary.tar.gz 10 | 11 | echo "Unzipping..." 12 | 13 | tar -xf cifar-100-binary.tar.gz && rm -f cifar-100-binary.tar.gz 14 | 15 | # Creation is split out because leveldb sometimes causes segfault 16 | # and needs to be re-created. 17 | 18 | echo "Done." 19 | -------------------------------------------------------------------------------- /readData/readCifar10Data.h: -------------------------------------------------------------------------------- 1 | /* 2 | * readCifar10Data.h 3 | * 4 | * Created on: Mar 16, 2016 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef READCIFAR10DATA_H_ 9 | #define READCIFAR10DATA_H_ 10 | 11 | 12 | #include"../common/cuMatrixVector.h" 13 | #include"../common/cuMatrix.h" 14 | #include 15 | #include 16 | #include 17 | 18 | void read_Cifar10_Data(cuMatrixVector& trainX, 19 | cuMatrixVector& testX, 20 | cuMatrix*& trainY, 21 | cuMatrix*& testY); 22 | 23 | #endif /* READCIFAR10DATA_H_ */ 24 | -------------------------------------------------------------------------------- /data/cifar-10/get_cifar10.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | # This scripts downloads the CIFAR10 (binary version) data and unzips it. 3 | 4 | DIR="$( cd "$(dirname "$0")" ; pwd -P )" 5 | cd $DIR 6 | 7 | echo "Downloading..." 8 | 9 | wget --no-check-certificate http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz 10 | 11 | echo "Unzipping..." 12 | 13 | tar -xf cifar-10-binary.tar.gz && rm -f cifar-10-binary.tar.gz 14 | mv cifar-10-batches-bin/* . && rm -rf cifar-10-batches-bin 15 | 16 | # Creation is split out because leveldb sometimes causes segfault 17 | # and needs to be re-created. 18 | 19 | echo "Done." 20 | -------------------------------------------------------------------------------- /readData/readMnistData.h: -------------------------------------------------------------------------------- 1 | /* 2 | * readMnistData.h 3 | * 4 | * Created on: Nov 19, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef READMNISTDATA_H_ 9 | #define READMNISTDATA_H_ 10 | 11 | #include"../common/cuMatrixVector.h" 12 | #include"../common/cuMatrix.h" 13 | #include 14 | 15 | void readMnistData(cuMatrixVector& normalizedData, 16 | cuMatrix*& dataY, 17 | string Xpath, 18 | string Ypath, 19 | int normalized_width, 20 | int imageSize); 21 | 22 | 23 | #endif /* READMNISTDATA_H_ */ 24 | -------------------------------------------------------------------------------- /readData/readCifar100.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | > File Name: readCifar100.h 3 | > Author: TDX 4 | > Mail: sa614149@mail.ustc.edu.cn 5 | > Created Time: 2017年05月11日 星期四 15时16分06秒 6 | ************************************************************************/ 7 | 8 | #ifndef _READCIFAR100_H 9 | #define _READCIFAR100_H 10 | 11 | #include"common/cuMatrixVector.h" 12 | #include"common/cuMatrix.h" 13 | 14 | void readCifar100Data(cuMatrixVector&trainX, 15 | cuMatrixVector&testX, 16 | cuMatrix*&trainY, 17 | cuMatrix*&testY); 18 | #endif 19 | -------------------------------------------------------------------------------- /layers/VoteLayer.h: -------------------------------------------------------------------------------- 1 | #ifndef VOTE_LAYER_H 2 | #define VOTE_LAYER_H 3 | 4 | #include "LayersBase.h" 5 | #include "../common/cuMatrix.h" 6 | #include "../config/config.h" 7 | #include 8 | 9 | /* 10 | * Class Voting layer 11 | * */ 12 | class VoteLayer 13 | { 14 | public: 15 | static VoteLayer* instance(); 16 | VoteLayer(); 17 | ~VoteLayer(); 18 | void vote( int n_start_position, int n_batch_size, float* p_host_vote ); 19 | float result(); 20 | void init(int nNumOfTestData, int nClasses, cuMatrix* pLabels); 21 | void clear(); 22 | 23 | private: 24 | int m_nNumOfTestData; 25 | int m_nClasses; 26 | float* m_pHostVote;// m_nNumOfTestData * m_nClasses 27 | int* m_pLabels; //labels 28 | }; 29 | 30 | #endif 31 | 32 | -------------------------------------------------------------------------------- /layers/ShareLayer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ShareLayer.h 3 | * 4 | * Created on: Mar 7, 2016 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef SHARELAYER_H_ 9 | #define SHARELAYER_H_ 10 | 11 | #include 12 | #include 13 | #include"LayersBase.h" 14 | 15 | /* 16 | * Class SharedLayer 17 | * */ 18 | class ShareLayer : public LayersBase 19 | { 20 | public: 21 | ShareLayer(string name); 22 | ShareLayer(string name, LayersBase* layer); 23 | void ReShape(){} 24 | void forwardPropagation(string train_or_test){}; 25 | void backwardPropagation(float Momemtum){}; 26 | void readWeight(FILE* file){}; 27 | void saveWeight(FILE* file){}; 28 | int getOutputSize(); 29 | 30 | private: 31 | int outputSize; 32 | }; 33 | 34 | 35 | #endif /* SHARELAYER_H_ */ 36 | -------------------------------------------------------------------------------- /data/mnist/get_mnist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | # This scripts downloads the mnist data and unzips it. 3 | 4 | DIR="$( cd "$(dirname "$0")" ; pwd -P )" 5 | cd $DIR 6 | 7 | echo "Downloading..." 8 | 9 | wget --no-check-certificate http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz 10 | wget --no-check-certificate http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz 11 | wget --no-check-certificate http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz 12 | wget --no-check-certificate http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz 13 | 14 | echo "Unzipping..." 15 | 16 | gunzip train-images-idx3-ubyte.gz 17 | gunzip train-labels-idx1-ubyte.gz 18 | gunzip t10k-images-idx3-ubyte.gz 19 | gunzip t10k-labels-idx1-ubyte.gz 20 | 21 | # Creation is split out because leveldb sometimes causes segfault 22 | # and needs to be re-created. 23 | 24 | echo "Done." 25 | -------------------------------------------------------------------------------- /tests/test_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * test_layer.h 3 | * 4 | * Created on: Dec 26, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef TEST_LAYER_H_ 9 | #define TEST_LAYER_H_ 10 | 11 | #include 12 | #include"../layers/LayersBase.h" 13 | 14 | 15 | void CHECK_HOST_MATRIX_EQ(float* A, int sizeA, float* B, int sizeB); 16 | void CHECK_DEV_HOST_MATRIX_EQ(float* A, int sizeA, float* B, int sizeB); 17 | void printf_HostParameter(int number, int channels, int height, int width, float* A); 18 | void printf_DevParameter(int number, int channels, int height,int width, float*A); 19 | 20 | void copy_DeviceToHost(float*devData, float*&hostData, int number, int channels, int height, int width); 21 | void copy_HostToDevice(float*hostData, float*&devData, int number, int channels, int height, int width); 22 | void printfLayersParameter(LayersBase* layer); 23 | #endif /* TEST_LAYER_H_ */ 24 | -------------------------------------------------------------------------------- /columnNet.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * columnNet.cuh 3 | * 4 | * Created on: Nov 26, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef COLUMNNET_CUH_ 9 | #define COLUMNNET_CUH_ 10 | 11 | #include"common/cuMatrix.h" 12 | #include"common/cuMatrixVector.h" 13 | #include"config/config.h" 14 | 15 | void creatColumnNet(int sign); 16 | void cuTrainNetWork(cuMatrixVector &trainData, 17 | cuMatrix* &trainLabel, 18 | cuMatrixVector &testData, 19 | cuMatrix*&testLabel, 20 | int batchSize); 21 | 22 | void dynamic_g_trainNet(cuMatrixVector &trainData, 23 | cuMatrix* &trainLabel, 24 | cuMatrixVector &testData, 25 | cuMatrix*&testLabel, 26 | config* endConfig 27 | ); 28 | 29 | #endif /* COLUMNNET_CUH_ */ 30 | -------------------------------------------------------------------------------- /.gitigore: -------------------------------------------------------------------------------- 1 | # C++ .gitignore 2 | 3 | # Compiled Object files 4 | *.slo 5 | *.lo 6 | *.o 7 | *.obj 8 | 9 | # Precompiled Headers 10 | *.gch 11 | *.pch 12 | 13 | # Compiled Dynamic libraries 14 | *.so 15 | *.dylib 16 | *.dll 17 | 18 | # Fortran module files 19 | *.mod 20 | 21 | # Compiled Static libraries 22 | *.lai 23 | *.la 24 | *.a 25 | *.lib 26 | 27 | # Executables 28 | *.exe 29 | *.out 30 | *.app 31 | 32 | 33 | # C file gitignore 34 | 35 | # Object files 36 | *.o 37 | *.ko 38 | *.obj 39 | *.elf 40 | 41 | # Precompiled Headers 42 | *.gch 43 | *.pch 44 | 45 | # Libraries 46 | *.lib 47 | *.a 48 | *.la 49 | *.lo 50 | 51 | # Shared objects (inc. Windows DLLs) 52 | *.dll 53 | *.so 54 | *.so.* 55 | *.dylib 56 | 57 | # Executables 58 | *.exe 59 | *.out 60 | *.app 61 | *.i*86 62 | *.x86_64 63 | *.hex 64 | 65 | # Debug files 66 | *.dSYM/ 67 | 68 | 69 | #CUDA.gitignore 70 | 71 | *.i 72 | *.ii 73 | *.gpu 74 | *.ptx 75 | *.cubin 76 | *.fatbin 77 | 78 | 79 | # config file 80 | *.txt 81 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * main.cpp 3 | * Created on: Nov 19, 2015 4 | * Author: tdx 5 | */ 6 | #include 7 | #include 8 | #include"examples/mnist/mnist.h" 9 | #include"examples/cifar10/cifar-10.h" 10 | #include"examples/cifar100/cifar100.h" 11 | #include"examples/dynamic_g_model/dynamic_g_entry.hpp" 12 | #include 13 | 14 | int main(int argc, char** argv) 15 | { 16 | FLAGS_alsologtostderr = 1; 17 | google::InitGoogleLogging(argv[0]); 18 | 19 | LOG(INFO) << "Select the DataSet to Run:"; 20 | LOG(INFO) << "1.MNSIT 2.CIFAR-10 3.Dynamic Generation Model 4.CIFAR-100"; 21 | 22 | cudaSetDevice(0); 23 | int cmd; 24 | cin>> cmd; 25 | if(1 == cmd) 26 | runMnist(); 27 | else if(2 == cmd) 28 | runCifar10(); 29 | else if(3 == cmd) 30 | dynamic_g_entry(); 31 | else if(4 == cmd) 32 | runCifar100(); 33 | else 34 | LOG(FATAL) << "DataSet Select Error."; 35 | 36 | return 0; 37 | } 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /common/MemoryMonitor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * MemoryMonitor.h 3 | * 4 | * Created on: Nov 19, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef MEMORYMONITOR_H_ 9 | #define MEMORYMONITOR_H_ 10 | 11 | #include 12 | 13 | class MemoryMonitor 14 | { 15 | public: 16 | static MemoryMonitor *instanceObject(){ 17 | static MemoryMonitor *monitor=new MemoryMonitor(); 18 | return monitor; 19 | } 20 | 21 | void *cpuMallocMemory(int size); 22 | void gpuMallocMemory(void** devPtr ,int size); 23 | void freeCpuMemory(void* ptr); 24 | void freeGpuMemory(void*ptr); 25 | void gpuMemoryMemset(void* dev_data, int size); 26 | void cpuMemoryMemset(void* host_data, int size); 27 | void cpu2cpu(void* host_data2, void* host_data1, int size); 28 | void cpu2Gpu(void* dev_data, void* host_data, int size); 29 | void gpu2cpu(void* host_data, void* dev_data, int size); 30 | void gpu2gpu(void* dev_data2, void* dev_data1, int size); 31 | }; 32 | 33 | 34 | 35 | #endif /* MEMORYMONITOR_H_ */ 36 | -------------------------------------------------------------------------------- /layers/InceptionLayer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * InceptionLayer.h 3 | * 4 | * Created on: Mar 5, 2016 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef INCEPTIONLAYER_H_ 9 | #define INCEPTIONLAYER_H_ 10 | 11 | #include 12 | #include"LayersBase.h" 13 | #include"ConvLayer.h" 14 | #include"PoolLayer.h" 15 | #include"../composite/Inception.h" 16 | 17 | /* 18 | * Class Inception layer 19 | * */ 20 | class InceptionLayer : public LayersBase 21 | { 22 | public: 23 | InceptionLayer(string name, int sign); 24 | ~InceptionLayer(); 25 | int getOutputSize(); 26 | void ReShape(){} 27 | void forwardPropagation(string train_or_test); 28 | void backwardPropagation(float Momentum); 29 | void saveWeight(FILE* file){}; 30 | void readWeight(FILE* file){}; 31 | 32 | private: 33 | int one; 34 | int three; 35 | int five; 36 | int three_reduce; 37 | int five_reduce; 38 | int pool_proj; 39 | int outputSize; 40 | float lambda; 41 | float epsilon; 42 | Inception * inception; 43 | 44 | }; 45 | 46 | #endif /* INCEPTIONLAYER_H_ */ 47 | -------------------------------------------------------------------------------- /layers/ActivationLayer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ActivationLayer.h 3 | * 4 | * Created on: Dec 13, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef ACTIVATIONLAYER_H_ 9 | #define ACTIVATIONLAYER_H_ 10 | 11 | #include 12 | #include"LayersBase.h" 13 | #include"config/config.h" 14 | 15 | /* 16 | * Class activation layer 17 | * */ 18 | class ActivationLayer: public LayersBase 19 | { 20 | public: 21 | ActivationLayer(string name); 22 | ActivationLayer(const ActivationLayer* layer); 23 | ActivationLayer(const configBase* templateConfig); 24 | ~ActivationLayer(); 25 | void ReShape(); 26 | void forwardPropagation(string train_or_test); 27 | void backwardPropagation(float Momentum); 28 | void saveWeight(FILE*file){} 29 | void readWeight(FILE*file){} 30 | void createHandles(); 31 | void destroyHandles(); 32 | int getOutputSize(); 33 | 34 | private: 35 | int outputSize; 36 | int ActivationMode; 37 | cudnnActivationMode_t cudnnActivationMode; 38 | cudnnTensorDescriptor_t srcTensorDesc; 39 | cudnnTensorDescriptor_t dstTensorDesc; 40 | cudnnActivationDescriptor_t activDesc; 41 | }; 42 | 43 | #endif /* ACTIVATIONLAYER_H_ */ 44 | -------------------------------------------------------------------------------- /layers/LRNLayer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * LRNLayer.h 3 | * 4 | * Created on: Dec 31, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef LRNLAYER_H_ 9 | #define LRNLAYER_H_ 10 | 11 | #include 12 | #include 13 | #include"LayersBase.h" 14 | #include"../common/utility.cuh" 15 | #include"config/config.h" 16 | 17 | using namespace std; 18 | 19 | /* 20 | * Class LRN layer 21 | * */ 22 | class LRNLayer : public LayersBase 23 | { 24 | public: 25 | LRNLayer(string name); 26 | LRNLayer(const LRNLayer* layer); 27 | LRNLayer(const configBase* templateConfig); 28 | ~LRNLayer(); 29 | void ReShape(); 30 | void forwardPropagation(string train_or_test); 31 | void backwardPropagation(float Momentum); 32 | void saveWeight(FILE*file){} 33 | void readWeight(FILE*file){} 34 | void createHandles(); 35 | void destroyHandles(); 36 | int getOutputSize(); 37 | 38 | private: 39 | cudnnLRNDescriptor_t normDesc; 40 | cudnnTensorDescriptor_t srcTensorDesc; 41 | cudnnTensorDescriptor_t dstTensorDesc; 42 | int outputSize; 43 | int inputSize; 44 | unsigned lrnN ; 45 | double lrnAlpha; 46 | double lrnBeta; 47 | double lrnK; 48 | }; 49 | 50 | #endif /* LRNLAYER_H_ */ 51 | -------------------------------------------------------------------------------- /layers/DropOutLayer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * DropOutLayer.h 3 | * 4 | * Created on: Mar 15, 2016 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef DROPOUTLAYER_H_ 9 | #define DROPOUTLAYER_H_ 10 | 11 | #include"../config/config.h" 12 | #include"../tests/test_layer.h" 13 | #include"../common/utility.cuh" 14 | #include"LayersBase.h" 15 | #include 16 | 17 | /* 18 | * Class DropOut layer 19 | * */ 20 | class DropOutLayer : public LayersBase 21 | { 22 | public: 23 | DropOutLayer(string name); 24 | DropOutLayer(const DropOutLayer* layer); 25 | DropOutLayer(const configBase* templateConfig); 26 | ~DropOutLayer(); 27 | void CreateUniform(int size); 28 | void ReShape(); 29 | void Dropout_TrainSet(float* data, int size, float dropout_rate); 30 | void Dropout_TestSet(float* data, int size, float dropout_rate); 31 | void forwardPropagation(string train_or_test); 32 | void backwardPropagation(float Momemtum); 33 | void saveWeight(FILE* file){} 34 | void readWeight(FILE* file){} 35 | void createHandles(); 36 | void destroyHandles(); 37 | int getOutputSize(); 38 | 39 | private: 40 | int outputSize; 41 | float DropOut_rate; 42 | float* outputPtr; 43 | curandGenerator_t curandGenerator_DropOut; 44 | }; 45 | 46 | 47 | #endif /* DROPOUTLAYER_H_ */ 48 | -------------------------------------------------------------------------------- /common/cuMatrixVector.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cuMatrixVector.h 3 | * 4 | * Created on: Nov 19, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef CUMATRIXVECTOR_H_ 9 | #define CUMATRIXVECTOR_H_ 10 | 11 | #include"MemoryMonitor.h" 12 | #include"cuMatrix.h" 13 | #include 14 | #include 15 | using namespace std; 16 | 17 | template 18 | class cuMatrixVector 19 | { 20 | public: 21 | cuMatrixVector():m_hostPoint(0),m_devPoint(0){} 22 | 23 | ~cuMatrixVector() 24 | { 25 | MemoryMonitor::instanceObject()->freeCpuMemory(m_hostPoint); 26 | MemoryMonitor::instanceObject()->freeGpuMemory(m_devPoint); 27 | m_vec.clear(); 28 | } 29 | 30 | /*overload operator []*/ 31 | cuMatrix* operator[](size_t index) 32 | { 33 | if(index > m_vec.size()) 34 | { 35 | cout<<"cuMatrixVector:operator[] error "<* m) 43 | { 44 | m_vec.push_back(m); 45 | } 46 | 47 | //get size 48 | size_t size() 49 | { 50 | return m_vec.size(); 51 | } 52 | 53 | public: 54 | T** m_hostPoint; 55 | T** m_devPoint; 56 | vector* > m_vec; 57 | }; 58 | 59 | #endif /* CUMATRIXVECTOR_H_ */ 60 | -------------------------------------------------------------------------------- /layers/DataLayer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * DataLayer.h 3 | * 4 | * Created on: Nov 29, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef DATALAYER_H_ 9 | #define DATALAYER_H_ 10 | 11 | #include"LayersBase.h" 12 | #include"../common/cuMatrixVector.h" 13 | #include"../tests/test_layer.h" 14 | #include"../config/config.h" 15 | #include"../cuDNN_netWork.h" 16 | #include"../common/utility.cuh" 17 | #include 18 | #include 19 | 20 | /* 21 | * Class Data Layer 22 | * */ 23 | class DataLayer: public LayersBase 24 | { 25 | public: 26 | DataLayer(string name); 27 | DataLayer(const DataLayer* layer); 28 | DataLayer(const configBase* templateConfig){} 29 | ~DataLayer(); 30 | void getBatch_Images_Label(int index, cuMatrixVector &inputData, cuMatrix* &inputLabel); 31 | void RandomBatch_Images_Label(cuMatrixVector &inputData, cuMatrix* &inputLabel); 32 | void forwardPropagation(string train_or_test); 33 | void backwardPropagation(float Momentum); 34 | void ReShape(){} 35 | void saveWeight(FILE*file){} 36 | void readWeight(FILE*file){} 37 | int getOutputSize(); 38 | int getDataSize(); 39 | int* getDataLabel(); 40 | 41 | private: 42 | int dataSize; 43 | int batchSize; 44 | int *srcLabel; 45 | float* batchImage; 46 | }; 47 | 48 | #endif /* DATALAYER_H_ */ 49 | -------------------------------------------------------------------------------- /readData/readNetWork.cpp: -------------------------------------------------------------------------------- 1 | #include"readNetWork.h" 2 | #include 3 | #include 4 | 5 | /*read network parameter*/ 6 | void readNetWork() 7 | { 8 | char fileName[50]; 9 | int layerNum = Layers::instanceObject()->getLayersNum(); 10 | int imageSize = config::instanceObjtce()->get_imageSize(); 11 | int normalized_width = config::instanceObjtce()->get_normalizedWidth(); 12 | 13 | sprintf(fileName, "models/net_normalized_%d_%d.txt", imageSize, normalized_width); 14 | FILE *file = fopen(fileName, "r"); 15 | queue que; 16 | set hash; 17 | 18 | if (file != NULL) { 19 | configBase *config = (configBase*) config::instanceObjtce()->getFirstLayers(); 20 | que.push(config); 21 | hash.insert(config); 22 | while( !que.empty() ){ 23 | config = que.front(); 24 | que.pop(); 25 | LayersBase *layer = (LayersBase*) Layers::instanceObject()->getLayer(config->_name); 26 | layer->readWeight(file); 27 | for(int i = 0; i < config->_next.size(); i++){ 28 | configBase* tmp = config->_next[i]; 29 | if( hash.find(tmp) != hash.end()){ 30 | hash.insert(tmp); 31 | que.push(tmp); 32 | } 33 | } 34 | } 35 | } else { 36 | cout << "readNetWork:: Open Failed" << endl; 37 | exit(0); 38 | } 39 | fclose(file); 40 | file = NULL; 41 | 42 | } 43 | -------------------------------------------------------------------------------- /composite/Concat.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Concat.h 3 | * 4 | * Created on: Mar 6, 2016 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef CONCAT_H_ 9 | #define CONCAT_H_ 10 | 11 | #include"../layers/LayersBase.h" 12 | #include"../common/MemoryMonitor.h" 13 | #include"../common/checkError.h" 14 | #include"../common/utility.cuh" 15 | #include"../common/cuBaseVector.h" 16 | #include"../cuDNN_netWork.h" 17 | #include"../tests/test_layer.h" 18 | #include 19 | 20 | class Concat 21 | { 22 | public: 23 | typedef tupleparam_tuple; 24 | Concat(Layers*& Inner_Layers, const param_tuple& args); 25 | ~Concat(); 26 | void concatInit(); 27 | float* forwardSetup(); 28 | float* backwardSetup(); 29 | void split_DiffData(int index, float* diffData); 30 | 31 | private: 32 | int number; 33 | int channels; 34 | int height; 35 | int width; 36 | int size; 37 | int one; 38 | int three; 39 | int five; 40 | int pool_proj; 41 | int prev_num; 42 | int prev_channels; 43 | int prev_height; 44 | int prev_width; 45 | int* separateDim; 46 | int* host_offset; 47 | int* dev_offset; 48 | int* dev_channels; 49 | int* host_channels; 50 | float* separate_diffData; 51 | float* dstData; 52 | float* diffData; 53 | Layers *InnerLayers; 54 | cuBaseVector separate_dstData; 55 | cuBaseVector prevDiff; 56 | 57 | }; 58 | 59 | 60 | 61 | #endif /* CONCAT_H_ */ 62 | -------------------------------------------------------------------------------- /saveData/saveNetWork.cpp: -------------------------------------------------------------------------------- 1 | #include"saveNetWork.h" 2 | #include 3 | #include 4 | using namespace std; 5 | 6 | /*save network parameter*/ 7 | void saveNetWork() 8 | { 9 | char fileName[50]; 10 | int layerNum = Layers::instanceObject()->getLayersNum(); 11 | int imageSize = config::instanceObjtce()->get_imageSize(); 12 | int normalized_width = config::instanceObjtce()->get_normalizedWidth(); 13 | 14 | sprintf(fileName,"models/net_normalized_%d_%d.txt", imageSize, normalized_width); 15 | FILE *file = fopen(fileName, "w"); 16 | queue que; 17 | set hash; 18 | 19 | if (file != NULL) 20 | { 21 | configBase *config = (configBase*) config::instanceObjtce()->getFirstLayers(); 22 | que.push( config ); 23 | hash.insert( config ); 24 | while( !que.empty() ){ 25 | config = que.front(); 26 | que.pop(); 27 | LayersBase *layer = (LayersBase*) Layers::instanceObject()->getLayer(config->_name); 28 | layer->saveWeight(file); 29 | for(int i = 0; i < config->_next.size(); i++){ 30 | configBase* tmp = config->_next[i]; 31 | if( hash.find( tmp ) != hash.end() ){ 32 | hash.insert( tmp ); 33 | que.push( tmp); 34 | } 35 | } 36 | } 37 | }else{ 38 | cout<<"savaNetWork:: Open Failed"< 13 | #include 14 | #include"Concat.h" 15 | #include"../layers/ConvLayer.h" 16 | #include"../layers/PoolLayer.h" 17 | #include"../layers/LayersBase.h" 18 | #include"../layers/ShareLayer.h" 19 | #include"../tests/test_layer.h" 20 | 21 | 22 | 23 | using namespace std; 24 | 25 | class Inception 26 | { 27 | public: 28 | typedef tupleparam_tuple; 29 | Inception(LayersBase* prevLayer, int sign, float* rate, const param_tuple& args); 30 | ~Inception(); 31 | void forwardPropagation(string train_or_test); 32 | void backwardPropagation(float*& nextLayerDiffData, float Momemtum); 33 | /*get result*/ 34 | float* getConcatData(); 35 | /*get delta*/ 36 | float* getInceptionDiffData(); 37 | 38 | private: 39 | int one; 40 | int three; 41 | int five; 42 | int three_reduce; 43 | int five_reduce; 44 | int pool_proj; 45 | int inputAmount; 46 | int inputImageDim; 47 | float epsilon; 48 | float lambda; 49 | float* lrate; 50 | float* dstData; 51 | float* diffData; 52 | Concat* concat; 53 | ShareLayer* share_Layer; 54 | Layers* InnerLayers; 55 | ConvLayer* Conv_one; 56 | ConvLayer* Conv_three_reduce; 57 | ConvLayer* Conv_three; 58 | ConvLayer* Conv_five; 59 | ConvLayer* Conv_five_reduce; 60 | ConvLayer* Conv_pool_proj; 61 | PoolLayer* max_pool; 62 | 63 | }; 64 | 65 | 66 | 67 | #endif /* INCEPTION_H_ */ 68 | -------------------------------------------------------------------------------- /dataAugmentation/dataProcess.cu: -------------------------------------------------------------------------------- 1 | #include"dataProcess.h" 2 | 3 | void dataProcessing(cuMatrixVector& trainSetX, cuMatrixVector& testSetX) 4 | { 5 | int n_rows = trainSetX[0]->rows; 6 | int n_cols = trainSetX[0]->cols; 7 | int n_channels = trainSetX[0]->channels; 8 | int trainSize = (int)trainSetX.size(); 9 | 10 | cuMatrix* avg = new cuMatrix(n_rows, n_cols, n_channels); 11 | 12 | for(int id = 0; id < trainSize; id++) 13 | { 14 | int len = trainSetX[0]->getLength(); 15 | for(int j = 0; j < len; j++) 16 | { 17 | avg->getHost()[j] += trainSetX[id]->getHostData()[j]; 18 | } 19 | } 20 | 21 | for(int i = 0; i < avg->getLength(); i++) 22 | { 23 | avg->getHostData()[i] /= trainSize; 24 | } 25 | 26 | for(int id = 0; id < trainSize; id++) 27 | { 28 | int len = trainSetX[0]->getLength(); 29 | for(int j = 0; j < len; j++) 30 | { 31 | trainSetX[id]->getHostData()[j] -= avg->getHostData()[j]; 32 | } 33 | } 34 | 35 | MemoryMonitor::instanceObject()->cpuMemoryMemset(avg->getHostData(), avg->getLength() * sizeof(float)); 36 | int testSize = (int)testSetX.size(); 37 | 38 | for (int id = 0; id < testSize; id++) 39 | { 40 | int len = testSetX[0]->getLength(); 41 | for (int j = 0; j < len; j++) { 42 | avg->getHostData()[j] += testSetX[id]->getHostData()[j]; 43 | } 44 | } 45 | 46 | for (int i = 0; i < avg->getLength(); i++) { 47 | avg->getHostData()[i] /= testSize; 48 | } 49 | 50 | for (int id = 0; id < testSize; id++) 51 | { 52 | int len = testSetX[0]->getLength(); 53 | for (int j = 0; j < len; j++) { 54 | testSetX[id]->getHostData()[j] -= avg->getHostData()[j]; 55 | } 56 | } 57 | 58 | delete avg; 59 | } 60 | -------------------------------------------------------------------------------- /layers/PoolLayer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * PoolLayer.h 3 | * 4 | * Created on: Nov 28, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef POOLLAYER_H_ 9 | #define POOLLAYER_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include"LayersBase.h" 16 | #include"../common/utility.cuh" 17 | #include"../config/config.h" 18 | #include"../cuDNN_netWork.h" 19 | #include"../tests/test_layer.h" 20 | 21 | using namespace std; 22 | 23 | /* 24 | * Class pool layer 25 | * */ 26 | class PoolLayer : public LayersBase 27 | { 28 | public: 29 | typedef tuple param_tuple; 30 | PoolLayer(string name); 31 | PoolLayer(string name, const param_tuple& agrs); 32 | PoolLayer(const PoolLayer* layer); 33 | PoolLayer(const configBase* templateConfig); 34 | ~PoolLayer(); 35 | void ReShape(); 36 | void forwardPropagation(string train_or_test); 37 | void backwardPropagation(float Momentum); 38 | void saveWeight(FILE*file){} 39 | void readWeight(FILE*file){} 40 | void createHandles(); 41 | void destroyHandles(); 42 | int getOutputSize(); 43 | 44 | private: 45 | cudnnPoolingMode_t PoolingMode; 46 | ConfigPoolMethod* m_poolMethod; 47 | cudnnTensorDescriptor_t srcTensorDesc; 48 | cudnnTensorDescriptor_t dstTensorDesc; 49 | cudnnPoolingDescriptor_t poolingDesc; 50 | string pool_Type; 51 | int poolDim; 52 | int pad_h; 53 | int pad_w; 54 | int stride_h; 55 | int stride_w; 56 | int outputSize; 57 | int prev_num; 58 | int prev_channels; 59 | int prev_height; 60 | int prev_width; 61 | }; 62 | 63 | #endif /* POOLLAYER_H_ */ 64 | -------------------------------------------------------------------------------- /layers/HiddenLayer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * HiddenLayer.h 3 | * 4 | * Created on: Nov 28, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef HIDDENLAYER_H_ 9 | #define HIDDENLAYER_H_ 10 | 11 | #include"LayersBase.h" 12 | #include"../common/cuMatrix.h" 13 | #include"../common/cuMatrixVector.h" 14 | #include"../common/utility.cuh" 15 | #include"../config/config.h" 16 | #include"../cuDNN_netWork.h" 17 | #include"../tests/test_layer.h" 18 | #include 19 | #include 20 | #include "curand.h" 21 | 22 | /* 23 | * Class Hidden layer 24 | * */ 25 | class HiddenLayer: public LayersBase 26 | { 27 | public: 28 | HiddenLayer(string name, int sign); 29 | HiddenLayer(const HiddenLayer* layer); 30 | HiddenLayer(const configBase* templateConfig); 31 | ~HiddenLayer(); 32 | void initRandom(); 33 | void ReShape(){} 34 | void forwardPropagation(string train_or_test); 35 | void backwardPropagation(float Momentum); 36 | void saveWeight(FILE*file); 37 | void readWeight(FILE*file); 38 | void dropOut(float*data, int size, float dropout_rate); 39 | void createHandles(); 40 | void destroyHandles(); 41 | int getOutputSize(); 42 | void compute_cost(); 43 | 44 | private: 45 | 46 | curandGenerator_t curandGenerator_W; 47 | curandGenerator_t curandGenerator_B; 48 | float* dev_Weight, *host_Weight; 49 | float* dev_Bias, *host_Bias; 50 | float* dev_Wgrad,*dev_Bgrad; 51 | float*tmp_Wgrad, *tmp_Bgrad; 52 | float* dev_weightSquare; 53 | float* host_weightSquare; 54 | float epsilon; 55 | float* VectorOnes; 56 | int inputSize; 57 | int outputSize; 58 | int batchSize; 59 | int prev_num; 60 | int prev_channels; 61 | int prev_height; 62 | int prev_width; 63 | float lambda; 64 | }; 65 | 66 | 67 | 68 | #endif /* HIDDENLAYER_H_ */ 69 | -------------------------------------------------------------------------------- /layers/SoftMaxLayer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SoftMaxLayer.h 3 | * 4 | * Created on: Nov 28, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef SOFTMAXLAYER_H_ 9 | #define SOFTMAXLAYER_H_ 10 | 11 | #include 12 | #include"LayersBase.h" 13 | #include"../common/cuMatrix.h" 14 | #include"./DataLayer.h" 15 | #include"../config/config.h" 16 | #include"../cuDNN_netWork.h" 17 | #include"../tests/test_layer.h" 18 | #include"../saveData/saveNetWork.h" 19 | #include"../common/utility.cuh" 20 | 21 | /* 22 | *Class Softmax layer 23 | * */ 24 | class SoftMaxLayer : public LayersBase 25 | { 26 | public: 27 | SoftMaxLayer(string name); 28 | SoftMaxLayer(const SoftMaxLayer* layer); 29 | SoftMaxLayer(const configBase* templateConfig){} 30 | ~SoftMaxLayer(); 31 | void initRandom(); 32 | void ReShape(); 33 | void forwardPropagation(string train_or_test); 34 | void backwardPropagation(float Momentum); 35 | void saveWeight(FILE*file){} 36 | void readWeight(FILE*file){} 37 | void ClassificationResults(); 38 | void getBackPropDiffData(); 39 | void GetDataSize_BatchLabel(); 40 | void createHandles(); 41 | void destroyHandles(); 42 | int getOutputSize(); 43 | int getCorrectNum(); 44 | void compute_cost(); 45 | 46 | private: 47 | cudnnTensorDescriptor_t srcTensorDesc; 48 | cudnnTensorDescriptor_t dstTensorDesc; 49 | int inputSize; 50 | int outputSize; 51 | int nclasses; 52 | int batchSize; 53 | int dataSize; 54 | int* srcLabel; 55 | int cur_correctSize; 56 | int CorrectSize; 57 | int flag; 58 | int* devLabel; 59 | float lambda; 60 | float* srcDiff; 61 | float* host_result; 62 | float* m_devGroundTruth; 63 | float* m_hostGroundTruth; 64 | float* dev_logArray; 65 | }; 66 | #endif /* SOFTMAXLAYER_H_ */ 67 | -------------------------------------------------------------------------------- /common/MemoryMonitor.cpp: -------------------------------------------------------------------------------- 1 | #include"MemoryMonitor.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include"checkError.h" 7 | 8 | 9 | /*allocate cpu memory*/ 10 | void * MemoryMonitor::cpuMallocMemory(int size){ 11 | void* p=NULL; 12 | p = malloc(size); 13 | return p; 14 | } 15 | 16 | void MemoryMonitor::cpu2cpu(void* host_data2, void* host_data1, int size) 17 | { 18 | memcpy(host_data2, host_data1, size); 19 | } 20 | 21 | void MemoryMonitor::cpu2Gpu(void* dev_data, void* host_data, int size) 22 | { 23 | checkCudaErrors(cudaMemcpy(dev_data, host_data, size, cudaMemcpyHostToDevice)); 24 | } 25 | 26 | void MemoryMonitor::gpu2cpu(void* host_data, void* dev_data, int size) 27 | { 28 | checkCudaErrors(cudaMemcpy(host_data, dev_data, size, cudaMemcpyDeviceToHost)); 29 | } 30 | 31 | void MemoryMonitor::gpu2gpu(void* dev_data2, void* dev_data1, int size) 32 | { 33 | checkCudaErrors(cudaMemcpy(dev_data2, dev_data1, size, cudaMemcpyDeviceToDevice)); 34 | } 35 | 36 | /*allocate GPU memory*/ 37 | void MemoryMonitor::gpuMallocMemory(void**devPtr, int size) 38 | { 39 | if(NULL != (*devPtr)) 40 | { 41 | checkCudaErrors(cudaFree(*devPtr)); 42 | } 43 | checkCudaErrors(cudaMalloc(devPtr, size)); 44 | } 45 | 46 | /*GPU memory set zero*/ 47 | void MemoryMonitor::gpuMemoryMemset(void *dev_data, int size) 48 | { 49 | checkCudaErrors(cudaMemset(dev_data, 0 , size)); 50 | } 51 | 52 | /*cpu memory set zero*/ 53 | void MemoryMonitor::cpuMemoryMemset(void * host_data,int size) 54 | { 55 | memset(host_data, 0, size); 56 | } 57 | 58 | /*free cpu memory*/ 59 | void MemoryMonitor::freeCpuMemory(void *ptr) 60 | { 61 | free(ptr); 62 | } 63 | 64 | /*free Device memory*/ 65 | void MemoryMonitor::freeGpuMemory(void *ptr) 66 | { 67 | checkCudaErrors(cudaFree(ptr)); 68 | } 69 | -------------------------------------------------------------------------------- /examples/cifar10/cifar-10.cpp: -------------------------------------------------------------------------------- 1 | #include"cifar-10.h" 2 | #include "layers/VoteLayer.h" 3 | 4 | void runCifar10() 5 | { 6 | cuMatrixVector trainSetX; 7 | cuMatrixVector testSetX; 8 | cuMatrix *trainSetY, *testSetY; 9 | 10 | int batchSize; 11 | /*read the layers config*/ 12 | config::instanceObjtce()->initConfig("profile/Cifar10Config.txt"); 13 | batchSize = config::instanceObjtce()->get_batchSize(); 14 | 15 | /*read the cifar10 data*/ 16 | read_Cifar10_Data(trainSetX, testSetX, trainSetY, testSetY); 17 | 18 | cout<<"*******************************************************"<rows * trainSetX[0]->cols * trainSetX[0]->channels <<" features and "<< trainSetX.size() <<" samples"<cols <<" features and "<< trainSetY->rows <<" samples"<rows * testSetX[0]->cols * testSetX[0]->channels <<" features and "<< testSetX.size() <<" samples"<cols <<" features and "<< testSetY->rows <<" samples"<init( testSetY->rows, 10, testSetY ); 26 | 27 | int version = cudnnGetVersion(); 28 | cout<<"cudnnGetVersion(): "<> cmd; 35 | if(cmd == 1 || cmd == 2) 36 | creatColumnNet(cmd); 37 | else 38 | { 39 | cout<<"Init way input Error"< 12 | #include 13 | #include 14 | #include"cuBaseVector.h" 15 | #include"checkError.h" 16 | 17 | 18 | #define ACTIVATION_SIGMOID 0 19 | #define ACTIVATION_RELU 1 20 | #define ACTIVATION_TANH 2 21 | #define ACTIVATION_CLIPPED_RELU 3 22 | #define ACTIVATION_LRELU 4 23 | 24 | #define POOLING_MAX 0 25 | #define POOLING_AVERAGE_COUNT_INCLUDE_PADDING 1 // count for average includes padded values 26 | #define POOLING_AVERAGE_COUNT_EXCLUDE_PADDING 2 // count for average does not include padded values 27 | 28 | /*the ways of initial weight*/ 29 | #define RANDOM 1 30 | #define READ_FROM_FILE 2 31 | const double FLAGS_lr_gamma = 0.00001; //learning rate policy 32 | const double FLAGS_lr_power = 0.75; //Learing rate policy power 33 | 34 | /*get array maxValue*/ 35 | template 36 | T getMax(T*value, int size) 37 | { 38 | int max = value[0]; 39 | for(int i = 1; i < size; i++) 40 | { 41 | if(value[i] > max) 42 | max = value[i]; 43 | } 44 | 45 | return max; 46 | } 47 | 48 | 49 | string int_to_string(int num); 50 | void reverseArray(float* dev_array, int number, int channels, int height, int width); 51 | void showDevices(); 52 | __global__ void MultiChannelsMerge(float** inputs, float* outputs, int* channels, int* indexs, int row, int outChannels); 53 | __global__ void MultiArrayAdd(float** inputs, float* outputs, int number,int channels, int height, int width); 54 | __global__ void MultiChannelsSplit(float* inputs, float**outputs, int* channels, int* indexs, int row, int inChannels); 55 | __global__ void MultiChannelsSplit(float* inputs, float* outputs, int outChannels, int offset, int row, int inChannels); 56 | 57 | __global__ void compute_array_square(float* array, float* outArray, int size); 58 | 59 | #endif /* UTILITY_CUH_ */ 60 | -------------------------------------------------------------------------------- /examples/cifar100/cifar100.cpp: -------------------------------------------------------------------------------- 1 | #include"cifar100.h" 2 | #include"layers/VoteLayer.h" 3 | #include"config/config.h" 4 | #include"common/cuMatrixVector.h" 5 | #include"common/cuMatrix.h" 6 | #include"columnNet.cuh" 7 | #include"common/utility.cuh" 8 | #include"readData/readCifar100.h" 9 | 10 | #include 11 | #include 12 | 13 | void runCifar100() 14 | { 15 | cuMatrixVector trainSetX; 16 | cuMatrixVector testSetX; 17 | cuMatrix *trainSetY, *testSetY; 18 | 19 | int batchSize; 20 | // Read the layers config 21 | config::instanceObjtce()->initConfig("profile/Cifar100Config.txt"); 22 | batchSize = config::instanceObjtce()->get_batchSize(); 23 | 24 | // Read the cifar10 data 25 | readCifar100Data(trainSetX, testSetX, trainSetY, testSetY); 26 | 27 | LOG(INFO) <<"*******************************************************"; 28 | LOG(INFO) <<" Train_set : " << trainSetX[0]->rows * trainSetX[0]->cols * trainSetX[0]->channels << " features and " << trainSetX.size() << " samples"; 29 | LOG(INFO) <<" Train_label : " << trainSetY->cols << " features and " << trainSetY->rows << " samples"; 30 | LOG(INFO) <<" Test_set : " << testSetX[0]->rows * testSetX[0]->cols * testSetX[0]->channels << " features and " << testSetX.size() << " samples"; 31 | LOG(INFO) <<" Test_label : " << testSetY->cols << " * " << testSetY->rows <<" features and "<< testSetY->rows <<" samples"; 32 | LOG(INFO) <<"*******************************************************"; 33 | 34 | VoteLayer::instance()->init( testSetY->rows, 100, testSetY ); 35 | 36 | int version = cudnnGetVersion(); 37 | LOG(INFO) << "cudnnGetVersion(): " << version << " CUDNN VERSION from cudnn.h: " << CUDNN_VERSION; 38 | // Show the device information 39 | showDevices(); 40 | 41 | cout << endl << endl; 42 | LOG(INFO) << "Select The Way To Initial Parameter: "; 43 | LOG(INFO) << "1.Random 2.Read from file"; 44 | int cmd; 45 | cin>> cmd; 46 | if(cmd == 1 || cmd == 2) creatColumnNet(cmd); 47 | else 48 | LOG(FATAL) << "Init Way Input Error"; 49 | 50 | // Training Network 51 | cuTrainNetWork(trainSetX, trainSetY, testSetX, testSetY, batchSize); 52 | } 53 | -------------------------------------------------------------------------------- /common/cuBaseVector.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cuBaseVector.h 3 | * 4 | * Created on: Mar 13, 2016 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef CUBASEVECTOR_H_ 9 | #define CUBASEVECTOR_H_ 10 | 11 | #include"MemoryMonitor.h" 12 | #include 13 | #include 14 | 15 | using namespace std; 16 | 17 | template 18 | class cuBaseVector 19 | { 20 | public: 21 | /*constructed function*/ 22 | cuBaseVector():hostPoint(0),devPoint(0){} 23 | /*free memory*/ 24 | void vector_clear() 25 | { 26 | if (NULL != hostPoint) 27 | { 28 | MemoryMonitor::instanceObject()->freeCpuMemory(hostPoint); 29 | hostPoint = NULL; 30 | } 31 | if (NULL != devPoint) 32 | { 33 | MemoryMonitor::instanceObject()->freeGpuMemory(devPoint); 34 | devPoint = NULL; 35 | } 36 | vector a; 37 | m_vec.swap(a); 38 | } 39 | 40 | /*overload operator []*/ 41 | T* operator[](size_t index) 42 | { 43 | if (index > m_vec.size()) 44 | { 45 | cout << "cuBaseVector:operator[] error " << endl; 46 | exit(0); 47 | } 48 | return m_vec[index]; 49 | } 50 | 51 | //push back 52 | void push_back(T* m) { 53 | m_vec.push_back(m); 54 | } 55 | 56 | //get size 57 | size_t size() 58 | { 59 | return m_vec.size(); 60 | } 61 | 62 | void toGpu() 63 | { 64 | hostPoint = (T**) MemoryMonitor::instanceObject()->cpuMallocMemory(m_vec.size() * sizeof(T*)); 65 | 66 | if (!hostPoint) { 67 | printf("cuBaseVector: allocate m_hostPoint Failed\n"); 68 | exit(0); 69 | } 70 | 71 | devPoint = NULL; 72 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**) &devPoint, sizeof(T*) * m_vec.size()); 73 | 74 | for (int p = 0; p < m_vec.size(); p++) { 75 | hostPoint[p] = m_vec[p]; 76 | } 77 | checkCudaErrors(cudaMemcpy(devPoint, hostPoint, sizeof(T*) * m_vec.size(), cudaMemcpyHostToDevice)); 78 | } 79 | 80 | public: 81 | T** hostPoint; 82 | T** devPoint; 83 | 84 | private: 85 | vector m_vec; 86 | }; 87 | 88 | 89 | #endif /* CUBASEVECTOR_H_ */ 90 | -------------------------------------------------------------------------------- /readData/readCifar10Data.cpp: -------------------------------------------------------------------------------- 1 | #include"readCifar10Data.h" 2 | #include"common/utility.cuh" 3 | #include 4 | 5 | void read_batch(string fileName, 6 | cuMatrixVector& image_data, 7 | cuMatrix*& image_label) 8 | { 9 | ifstream file(fileName, ios::binary); 10 | 11 | if(file.is_open()) 12 | { 13 | int number_of_images = 10000; 14 | int n_rows = 32; 15 | int n_cols = 32; 16 | 17 | for(int i = 0; i < number_of_images; i++) 18 | { 19 | unsigned char label; 20 | file.read((char*)&label, sizeof(label)); 21 | cuMatrix* channels = new cuMatrix(n_rows, n_cols, 3); 22 | 23 | int index = image_data.size(); 24 | image_label->setValue(index, 0, 0, label); 25 | 26 | for(int ch = 0; ch < 3; ch++) 27 | { 28 | for(int r = 0; r < n_rows; r++) 29 | { 30 | for(int c = 0; c < n_cols; c++) 31 | { 32 | unsigned char temp = 0; 33 | file.read((char*)&temp, sizeof(temp)); 34 | channels->setValue(r, c, ch, 2.0f * (float)temp / 255.0f - 1.0); 35 | } 36 | } 37 | } 38 | image_data.push_back(channels); 39 | } 40 | }else 41 | { 42 | LOG(FATAL) << "ReadData: Can not find the data: " << fileName; 43 | } 44 | } 45 | 46 | void read_Cifar10_Data(cuMatrixVector& trainX, 47 | cuMatrixVector& testX, 48 | cuMatrix*& trainY, 49 | cuMatrix*& testY) 50 | { 51 | /*readf the train data and label*/ 52 | string file_dir = "data/cifar-10/data_batch_"; 53 | string suffix = ".bin"; 54 | 55 | trainY = new cuMatrix(50000, 1, 1); 56 | for(int i = 1; i <= 5; i++) 57 | { 58 | string fileName = file_dir + int_to_string(i) + suffix; 59 | read_batch(fileName, trainX, trainY); 60 | } 61 | 62 | /*read the test data and label*/ 63 | file_dir = "data/cifar-10/test_batch.bin"; 64 | testY = new cuMatrix(10000, 1, 1); 65 | read_batch(file_dir, testX, testY); 66 | } 67 | -------------------------------------------------------------------------------- /examples/mnist/mnist.cpp: -------------------------------------------------------------------------------- 1 | #include"mnist.h" 2 | #include "layers/VoteLayer.h" 3 | 4 | void runMnist() 5 | { 6 | cuMatrixVector trainSetX; 7 | cuMatrixVector testSetX; 8 | 9 | cuMatrix* trainSetY, *testSetY; 10 | 11 | int batchSize; 12 | int normalized_width; 13 | int imageSize; 14 | 15 | /*read the layers configure*/ 16 | config::instanceObjtce()->initConfig("profile/MnistConfig.txt"); 17 | batchSize = config::instanceObjtce()->get_batchSize(); 18 | normalized_width = config::instanceObjtce()->get_normalizedWidth(); 19 | imageSize = config::instanceObjtce()->get_imageSize(); 20 | 21 | /*read Mnist dataSet*/ 22 | readMnistData(trainSetX, trainSetY, "data/mnist/train-images-idx3-ubyte", "data/mnist/train-labels-idx1-ubyte", normalized_width, imageSize); 23 | readMnistData(testSetX, testSetY, "data/mnist/t10k-images-idx3-ubyte", "data/mnist/t10k-labels-idx1-ubyte", normalized_width, imageSize); 24 | cout<<"*******************************************************"<rows * trainSetX[0]->cols <<" features and "<< trainSetX.size() <<" samples"<cols <<" features and "<< trainSetY->rows <<" samples"<rows * testSetX[0]->cols <<" features and "<< testSetX.size() <<" samples"<cols <<" features and "<< testSetY->rows <<" samples"<init(testSetY->rows, 10, testSetY); 32 | 33 | int version = cudnnGetVersion(); 34 | cout<<"cudnnGetVersion(): "<> cmd; 41 | if(cmd == 1 || cmd == 2) 42 | creatColumnNet(cmd); 43 | else 44 | { 45 | cout<<"Init way input Error"< 13 | #include 14 | #include 15 | #include"math.h" 16 | using namespace std; 17 | 18 | /* 19 | * Class LayersBase 20 | * */ 21 | class LayersBase 22 | { 23 | public: 24 | LayersBase():m_nCurBranchIndex(0), m_fReduceRate(0), lrate(123456), m_fCost(0.0f){} 25 | virtual void forwardPropagation(string train_or_test) = 0; 26 | virtual void backwardPropagation(float Momentum) = 0; 27 | virtual int getOutputSize() = 0; 28 | virtual void saveWeight(FILE*file) = 0; 29 | virtual void readWeight(FILE*file) = 0; 30 | virtual void ReShape() = 0; 31 | 32 | void setCurBranchIndex(int nIndex = 0); 33 | void adjust_learnRate(int index, double lr_gamma, double lr_power); 34 | void insertPrevLayer(LayersBase* layer); 35 | void insertNextlayer(LayersBase* layer); 36 | void rateReduce(); 37 | void setRateReduce( float fReduce); 38 | float getRateReduce(); 39 | 40 | public: 41 | string _name; 42 | string _inputName; 43 | int number; 44 | int channels; 45 | int height; 46 | int width; 47 | float m_fCost; 48 | int inputImageDim; 49 | int inputAmount; 50 | int m_nCurBranchIndex; 51 | float lrate; 52 | float m_fReduceRate; 53 | float *diffData; 54 | float *srcData , *dstData; 55 | vectorprevLayer; 56 | vectornextLayer; 57 | }; 58 | 59 | /* 60 | * Class Layers 61 | * */ 62 | class Layers 63 | { 64 | public: 65 | 66 | static Layers* instanceObject() 67 | { 68 | static Layers* layers = new Layers(); 69 | return layers; 70 | } 71 | 72 | /*get layer by name*/ 73 | LayersBase * getLayer(string name); 74 | /*linear store the layers by name*/ 75 | void storLayers(string name, LayersBase* layer); 76 | void storLayers(string prev_name, string name, LayersBase* layer); 77 | /*store the layers name*/ 78 | void storLayersName(string); 79 | /*get layers name by index*/ 80 | string getLayersName(int index); 81 | /*get layer num*/ 82 | int getLayersNum() 83 | { 84 | return _layersMaps.size(); 85 | } 86 | bool hasLayer(string name); 87 | 88 | private: 89 | map _layersMaps; 90 | vector _layersName; 91 | }; 92 | 93 | 94 | #endif /* LAYERSBASE_H_ */ 95 | -------------------------------------------------------------------------------- /layers/InceptionLayer.cu: -------------------------------------------------------------------------------- 1 | #include"InceptionLayer.h" 2 | #include"../config/config.h" 3 | 4 | /* 5 | * Destructor 6 | * */ 7 | InceptionLayer::~InceptionLayer() 8 | { 9 | delete inception; 10 | }; 11 | 12 | /* 13 | * Get the outputSize 14 | * */ 15 | int InceptionLayer::getOutputSize() 16 | { 17 | return outputSize; 18 | } 19 | 20 | /* 21 | * Inception layer constructor 22 | * */ 23 | InceptionLayer::InceptionLayer(string name, int sign) 24 | { 25 | _name = name; 26 | _inputName = " "; 27 | number = 0; 28 | channels = 0; 29 | height = 0; 30 | width = 0; 31 | srcData = NULL; 32 | dstData = NULL; 33 | diffData = NULL; 34 | prevLayer.clear(); 35 | nextLayer.clear(); 36 | 37 | configInception* curConfig = (configInception*) config::instanceObjtce()->getLayersByName(_name); 38 | string prevLayerName = curConfig->_input; 39 | LayersBase* prev_Layer = (LayersBase*) Layers::instanceObject()->getLayer(prevLayerName); 40 | 41 | one = curConfig->_one; 42 | three = curConfig->_three; 43 | five = curConfig->_five; 44 | three_reduce = curConfig->_three_reduce; 45 | five_reduce = curConfig->_five_reduce; 46 | pool_proj = curConfig->_pool_proj; 47 | epsilon = curConfig->_init_w; 48 | lrate = curConfig->_lrate; 49 | lambda = curConfig->_weight_decay; 50 | 51 | inputAmount = prev_Layer->channels; 52 | inputImageDim = prev_Layer->height; 53 | number = prev_Layer->number; 54 | channels = one + three + five + pool_proj; 55 | height = prev_Layer->height; 56 | width = prev_Layer->width; 57 | outputSize = channels * height * width; 58 | 59 | /*create inception*/ 60 | inception = new Inception(prev_Layer, sign, &lrate, 61 | Inception::param_tuple(one, three, five, three_reduce, five_reduce, 62 | pool_proj, inputAmount, inputImageDim, epsilon, lambda)); 63 | } 64 | 65 | /* 66 | * Inception layer forward propagation 67 | * */ 68 | void InceptionLayer::forwardPropagation(string train_or_test) 69 | { 70 | srcData = prevLayer[0]->dstData; 71 | inception->forwardPropagation(train_or_test); 72 | dstData = inception->getConcatData(); 73 | } 74 | 75 | /* 76 | * Inception layer backward propagation 77 | * */ 78 | void InceptionLayer::backwardPropagation(float Momentum) 79 | { 80 | int nIndex = m_nCurBranchIndex; 81 | inception->backwardPropagation(nextLayer[nIndex]->diffData, Momentum); 82 | diffData = inception->getInceptionDiffData(); 83 | } 84 | 85 | -------------------------------------------------------------------------------- /readData/readCifar100.cpp: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | > File Name: readCifar100.cpp 3 | > Author: TDX 4 | > Mail: sa614149@mail.ustc.edu.cn 5 | > Created Time: 2017年05月11日 星期四 15时15分54秒 6 | ************************************************************************/ 7 | #include"readCifar100.h" 8 | #include 9 | #include 10 | #include 11 | 12 | #include"common/cuMatrixVector.h" 13 | #include"common/cuMatrix.h" 14 | #include 15 | #include 16 | 17 | using namespace std; 18 | 19 | void read_batch(string filename, cuMatrixVector& imageData, cuMatrix*& imageLabel, int number_of_image) 20 | { 21 | ifstream file(filename, ios::binary); 22 | 23 | if(file.is_open()) 24 | { 25 | int n_rows = 32; 26 | int n_cols = 32; 27 | for(int i = 0; i < number_of_image; i++) 28 | { 29 | unsigned char type1 = 0; 30 | unsigned char type2 = 0; 31 | file.read((char*)&type1, sizeof(type1)); 32 | file.read((char*)&type2, sizeof(type2)); 33 | 34 | cuMatrix* channels = new cuMatrix(n_rows, n_cols, 3); 35 | int index = imageData.size(); 36 | imageLabel->setValue(index, 0, 0, type2); 37 | 38 | for(int ch = 0; ch < 3; ch++) 39 | { 40 | for(int r = 0; r < n_rows; r++) 41 | { 42 | for(int c = 0; c < n_cols; c++) 43 | { 44 | unsigned char temp = 0; 45 | file.read((char*)&temp, sizeof(temp)); 46 | channels->setValue(r, c, ch, (float)temp / 255.0f * 2.0f - 1.0f); 47 | } 48 | } 49 | } 50 | 51 | imageData.push_back(channels); 52 | } 53 | }else 54 | { 55 | LOG(FATAL) << "Read CIFAR100 Data: Can Not Find The Data:" << filename; 56 | } 57 | } 58 | 59 | void readCifar100Data(cuMatrixVector&trainX, cuMatrixVector&testX, cuMatrix*&trainY, cuMatrix*&testY) 60 | { 61 | // Read the train data and label 62 | string file_dir = "data/cifar100/cifar-100-binary/train.bin"; 63 | trainY = new cuMatrix(50000, 1, 1); 64 | 65 | read_batch(file_dir, trainX, trainY, 50000); 66 | 67 | // Read the test Data and label 68 | file_dir = "data/cifar100/cifar-100-binary/test.bin"; 69 | testY = new cuMatrix(10000, 1, 1); 70 | read_batch(file_dir, testX, testY, 10000); 71 | 72 | } 73 | -------------------------------------------------------------------------------- /layers/ConvLayer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ConvLayer.h 3 | * 4 | * Created on: Nov 23, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef CONVLAYER_H_ 9 | #define CONVLAYER_H_ 10 | 11 | #include"./LayersBase.h" 12 | #include"../common/cuMatrixVector.h" 13 | #include"../common/cuMatrix.h" 14 | #include"../common/utility.cuh" 15 | #include "../cuDNN_netWork.h" 16 | #include"../config/config.h" 17 | #include"../common/MemoryMonitor.h" 18 | #include"../common/checkError.h" 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | using namespace std; 25 | 26 | /* 27 | * Class convolution Layer 28 | * */ 29 | class ConvLayer:public LayersBase 30 | { 31 | public: 32 | typedef tuple param_tuple; 33 | 34 | ConvLayer(string name, int sign); 35 | ConvLayer(string name, int sign, const param_tuple& args); 36 | ConvLayer(const ConvLayer* layer); 37 | ConvLayer(const configBase* templateConfig); 38 | ~ConvLayer(); 39 | void initRandom(); 40 | void ReShape(); 41 | void copyWeight(); 42 | void forwardPropagation(string train_or_test); 43 | void backwardPropagation(float Momentum); 44 | void saveWeight(FILE*file); 45 | void readWeight(FILE*file); 46 | void addBias(const cudnnTensorDescriptor_t& dstTensorDesc, int c, float *data ); 47 | void createHandles(); 48 | void destroyHandles(); 49 | int getOutputSize(); 50 | void compute_cost(); 51 | 52 | private: 53 | float *host_Weight, *dev_Weight; 54 | float *tmp_Wgrad, *tmp_Bgrad; 55 | float *host_Bias, *dev_Bias; 56 | float *dev_Wgrad, *dev_Bgrad; 57 | float* dev_weightSquare; 58 | float* host_weightSquare; 59 | float lambda; 60 | float epsilon; 61 | int kernelSize; 62 | int pad_h; 63 | int pad_w; 64 | int stride_h; 65 | int stride_w; 66 | int kernelAmount; 67 | int outputSize; 68 | int batchSize; 69 | int prev_num; 70 | int prev_channels; 71 | int prev_height; 72 | int prev_width; 73 | 74 | private: 75 | cudnnTensorDescriptor_t srcTensorDesc; 76 | cudnnTensorDescriptor_t dstTensorDesc; 77 | cudnnTensorDescriptor_t biasTensorDesc; 78 | cudnnFilterDescriptor_t filterDesc; 79 | cudnnConvolutionDescriptor_t convDesc; 80 | cudnnConvolutionFwdAlgo_t convFwdAlgo; 81 | cudnnConvolutionBwdFilterAlgo_t convBwdFilterAlgo; 82 | cudnnConvolutionBwdDataAlgo_t convBwdDataAlgo; 83 | curandGenerator_t curandGenerator_W; 84 | curandGenerator_t curandGenerator_B; 85 | }; 86 | 87 | 88 | 89 | #endif /* CONVLAYER_H_ */ 90 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.0) 2 | project (CUDA-MCDNN) 3 | 4 | # The version number. 5 | set (CUDA-MCDNN_VERSION_MAJOR 2) 6 | set (CUDA-MCDNN_VERSION_MINOR 0) 7 | 8 | # CMake Scripts dir 9 | #set(CMAKE_SCRIPT_DIR ${CMAKE_SOURCE_DIR}/CMakeScripts) 10 | 11 | #CMake module path for custom module finding 12 | #set( CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SCRIPT_DIR}) 13 | 14 | find_package (CUDA REQUIRED) 15 | include_directories (${CUDA_INCLUDE_DIRS}) 16 | #include_directories (${CUDA_SDK_ROOT_DIR}) 17 | MESSAGE("${CUDA_INCLUDE_DIRS}/../samples/common/inc") 18 | include_directories ("${CUDA_INCLUDE_DIRS}/../samples/common/inc") 19 | #/usr/local/cuda/NVIDIA_CUDA-7.0_Samples/common/inc") 20 | set(CUDA_SEPARABLE_COMPILATION ON) 21 | 22 | # glog 23 | SET(GOOGLE_GLOG_LIBRARY "/usr/lib/x86_64-linux-gnu/libglog.so") 24 | 25 | # opencv 26 | find_package( OpenCV REQUIRED ) 27 | INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR} ) 28 | set(OpenCV_LIBRARIES ${OpenCV_LIBS}) 29 | 30 | SET(CUDA_CUDNN_LIBRARY "/usr/local/cuda/lib64/libcudnn.so") 31 | 32 | include_directories(${PROJECT_SOURCE_DIR}) 33 | 34 | #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 35 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 36 | #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g -rdynamic") 37 | 38 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} 39 | -gencode arch=compute_20,code=sm_20 40 | -gencode arch=compute_20,code=sm_21 41 | -gencode arch=compute_30,code=sm_30 42 | -gencode arch=compute_35,code=sm_35 43 | -gencode arch=compute_50,code=sm_50 44 | -gencode arch=compute_50,code=compute_50 45 | -rdc=true 46 | ) 47 | 48 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} 49 | ) 50 | 51 | MESSAGE(${CUDA_NVCC_FLAGS}) 52 | 53 | #add_library(${EXAMPLE_NAME}.o OBJECT ${source}) 54 | #set(EXAMPLE_OBJ_LIB $) 55 | 56 | 57 | file(GLOB CUDA_MCDNN_CU_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} 58 | "readData/*" 59 | "layers/*" 60 | "saveData/*" 61 | "common/*" 62 | "tests/*" 63 | "config/*" 64 | "composite/*" 65 | "dataAugmentation/*" 66 | "examples/dynamic_g_model/*" 67 | "examples/mnist/*" 68 | "examples/cifar10/*" 69 | "examples/cifar100/*" 70 | "main.cpp" 71 | "columnNet.*" 72 | "cuDNN_netWork.*") 73 | # EXCLUDE_FROM_ALL "build/*") 74 | 75 | cuda_add_executable(CUDA-MCDNN ${CUDA_MCDNN_CU_SOURCES}) 76 | #cuda_add_library(CUDA-MCDNN STATIC ${CUDA_MCDNN_CPP_SOURCES}) 77 | target_link_libraries(CUDA-MCDNN 78 | ${OpenCV_LIBRARIES} 79 | ${CUDA_CUBLAS_LIBRARIES} 80 | ${CUDA_curand_LIBRARY} 81 | ${CUDA_LIBRARIES} 82 | ${CUDA_CUDNN_LIBRARY} 83 | ${GOOGLE_GLOG_LIBRARY} 84 | ) 85 | CUDA_ADD_CUBLAS_TO_TARGET(CUDA-MCDNN) 86 | 87 | #add_subdirectory(net) 88 | -------------------------------------------------------------------------------- /profile/dynamic_g_profile/dynamic_begin/MnistConfig.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * MnistConfig File 3 | */ 4 | 5 | /******************************************************** 6 | * 7 | * General Parameters Config 8 | * 9 | *NON_LINEARITY = NL_SIGMOID, NL_TANH, NL_RELU, NL_CLIPPED_RELU 10 | ********************************************************/ 11 | 12 | BATCH_SIZE = 128; 13 | NORMALIZED_WIDTH = 0; 14 | IMAGES_SIZE = 28; 15 | CNANNELS = 1; 16 | EPOCHS = 4000; 17 | ITER_PER_EPO = 400; 18 | 19 | 20 | 21 | /******************************************************** 22 | * 23 | * Layers Config 24 | * 25 | ********************************************************/ 26 | [ 27 | LAYER = DATA; 28 | NAME = data; 29 | ] 30 | 31 | [ 32 | LAYER = CONV; 33 | NAME = conv1; 34 | INPUT =data; 35 | KERNEL_SIZE = 5; 36 | KERNEL_AMOUNT = 32; 37 | PAD_H = 0; 38 | PAD_W = 0; 39 | STRIDE_H = 1; 40 | STRIDE_W = 1; 41 | INIT_W = 0.01; 42 | LEARN_RATE = 0.1; 43 | WEIGHT_DECAY = 1e-8; 44 | ] 45 | 46 | [ 47 | LAYER = ACTIVATION; 48 | NAME = activation1; 49 | INPUT = conv1; 50 | NON_LINEARITY = NL_RELU; 51 | ] 52 | 53 | 54 | [ 55 | LAYER = POOLING; 56 | NAME = pooling1; 57 | INPUT = activation1; 58 | POOLING_TYPE = POOL_MAX; 59 | POOLDIM = 2; 60 | PAD_H = 0; 61 | PAD_W = 0; 62 | STRIDE_H = 2; 63 | STRIDE_W = 2; 64 | ] 65 | 66 | [ 67 | LAYER = CONV; 68 | NAME = conv2; 69 | INPUT = pooling1; 70 | KERNEL_SIZE = 5; 71 | KERNEL_AMOUNT = 64; 72 | PAD_H = 0; 73 | PAD_W = 0; 74 | STRIDE_H = 1; 75 | STRIDE_W = 1; 76 | INIT_W = 0.01; 77 | LEARN_RATE = 0.1; 78 | WEIGHT_DECAY = 1e-8; 79 | ] 80 | 81 | 82 | [ 83 | LAYER = ACTIVATION; 84 | NAME = activation2; 85 | INPUT = conv2; 86 | NON_LINEARITY = NL_RELU; 87 | ] 88 | 89 | [ 90 | LAYER = POOLING; 91 | NAME = pooling2; 92 | INPUT = activation2; 93 | POOLING_TYPE = POOL_MAX; 94 | POOLDIM = 2; 95 | PAD_H = 0; 96 | PAD_W = 0; 97 | STRIDE_H = 2; 98 | STRIDE_W = 2; 99 | ] 100 | 101 | [ 102 | LAYER = DROPOUT; 103 | NAME = dropout1; 104 | INPUT = pooling2; 105 | DROP_RATE = 0.4; 106 | ] 107 | 108 | [ 109 | LAYER = HIDDEN; 110 | NAME = hidden1; 111 | INPUT = dropout1; 112 | NUM_HIDDEN_NEURONS = 1024; 113 | INIT_W = 0.01; 114 | LEARN_RATE = 0.1; 115 | WEIGHT_DECAY = 1e-8; 116 | ] 117 | 118 | [ 119 | LAYER = ACTIVATION; 120 | NAME = activation4; 121 | INPUT = hidden1; 122 | NON_LINEARITY = NL_RELU; 123 | ] 124 | 125 | [ 126 | LAYER = LRN; 127 | NAME = lrn; 128 | INPUT = activation4; 129 | LRNN = 5; 130 | LRNALPHA = 0.0001; 131 | LRNBETA = 0.75; 132 | ] 133 | 134 | [ 135 | LAYER = HIDDEN; 136 | NAME = hidden2; 137 | INPUT = lrn; 138 | NUM_HIDDEN_NEURONS = 10; 139 | INIT_W = 0.01; 140 | LEARN_RATE = 0.1; 141 | WEIGHT_DECAY = 1e-8; 142 | ] 143 | 144 | 145 | [ 146 | LAYER = SOFTMAX; 147 | NAME = softmax; 148 | INPUT = hidden2; 149 | NUM_CLASSES = 10; 150 | WEIGHT_DECAY = 1e-6; 151 | ] 152 | -------------------------------------------------------------------------------- /profile/dynamic_g_profile/dynamic_end/MnistConfig.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * MnistConfig File 3 | */ 4 | 5 | /******************************************************** 6 | * 7 | * General Parameters Config 8 | * 9 | *NON_LINEARITY = NL_SIGMOID, NL_TANH, NL_RELU, NL_CLIPPED_RELU 10 | ********************************************************/ 11 | 12 | BATCH_SIZE = 128; 13 | NORMALIZED_WIDTH = 0; 14 | IMAGES_SIZE = 28; 15 | CNANNELS = 1; 16 | EPOCHS = 4000; 17 | ITER_PER_EPO = 400; 18 | 19 | 20 | 21 | /******************************************************** 22 | * 23 | * Layers Config 24 | * 25 | ********************************************************/ 26 | [ 27 | LAYER = DATA; 28 | NAME = data; 29 | ] 30 | 31 | [ 32 | LAYER = CONV; 33 | NAME = conv1; 34 | INPUT =data; 35 | KERNEL_SIZE = 5; 36 | KERNEL_AMOUNT = 32; 37 | PAD_H = 0; 38 | PAD_W = 0; 39 | STRIDE_H = 1; 40 | STRIDE_W = 1; 41 | INIT_W = 0.01; 42 | LEARN_RATE = 0.1; 43 | WEIGHT_DECAY = 1e-8; 44 | ] 45 | 46 | [ 47 | LAYER = ACTIVATION; 48 | NAME = activation1; 49 | INPUT = conv1; 50 | NON_LINEARITY = NL_RELU; 51 | ] 52 | 53 | 54 | [ 55 | LAYER = POOLING; 56 | NAME = pooling1; 57 | INPUT = activation1; 58 | POOLING_TYPE = POOL_MAX; 59 | POOLDIM = 2; 60 | PAD_H = 0; 61 | PAD_W = 0; 62 | STRIDE_H = 2; 63 | STRIDE_W = 2; 64 | ] 65 | 66 | [ 67 | LAYER = CONV; 68 | NAME = conv2; 69 | INPUT = pooling1; 70 | KERNEL_SIZE = 5; 71 | KERNEL_AMOUNT = 64; 72 | PAD_H = 0; 73 | PAD_W = 0; 74 | STRIDE_H = 1; 75 | STRIDE_W = 1; 76 | INIT_W = 0.01; 77 | LEARN_RATE = 0.1; 78 | WEIGHT_DECAY = 1e-8; 79 | ] 80 | 81 | 82 | [ 83 | LAYER = ACTIVATION; 84 | NAME = activation2; 85 | INPUT = conv2; 86 | NON_LINEARITY = NL_RELU; 87 | ] 88 | [ 89 | LAYER = CONV; 90 | NAME = conv3; 91 | INPUT = activation2; 92 | KERNEL_SIZE = 5; 93 | KERNEL_AMOUNT = 64; 94 | PAD_H = 2; 95 | PAD_W = 2; 96 | STRIDE_H = 1; 97 | STRIDE_W = 1; 98 | INIT_W = 0.01; 99 | LEARN_RATE = 0.1; 100 | WEIGHT_DECAY = 1e-8; 101 | ] 102 | 103 | [ 104 | LAYER = ACTIVATION; 105 | NAME = activation3; 106 | INPUT = conv3; 107 | NON_LINEARITY = NL_RELU; 108 | ] 109 | 110 | [ 111 | LAYER = POOLING; 112 | NAME = pooling2; 113 | INPUT = activation3; 114 | POOLING_TYPE = POOL_MAX; 115 | POOLDIM = 2; 116 | PAD_H = 0; 117 | PAD_W = 0; 118 | STRIDE_H = 2; 119 | STRIDE_W = 2; 120 | ] 121 | 122 | [ 123 | LAYER = DROPOUT; 124 | NAME = dropout1; 125 | INPUT = pooling2; 126 | DROP_RATE = 0.4; 127 | ] 128 | 129 | [ 130 | LAYER = HIDDEN; 131 | NAME = hidden1; 132 | INPUT = dropout1; 133 | NUM_HIDDEN_NEURONS = 1024; 134 | INIT_W = 0.01; 135 | LEARN_RATE = 0.1; 136 | WEIGHT_DECAY = 1e-8; 137 | ] 138 | 139 | [ 140 | LAYER = ACTIVATION; 141 | NAME = activation4; 142 | INPUT = hidden1; 143 | NON_LINEARITY = NL_RELU; 144 | ] 145 | 146 | [ 147 | LAYER = LRN; 148 | NAME = lrn; 149 | INPUT = activation4; 150 | LRNN = 5; 151 | LRNALPHA = 0.0001; 152 | LRNBETA = 0.75; 153 | ] 154 | 155 | [ 156 | LAYER = HIDDEN; 157 | NAME = hidden2; 158 | INPUT = lrn; 159 | NUM_HIDDEN_NEURONS = 10; 160 | INIT_W = 0.01; 161 | LEARN_RATE = 0.1; 162 | WEIGHT_DECAY = 1e-8; 163 | ] 164 | 165 | 166 | [ 167 | LAYER = SOFTMAX; 168 | NAME = softmax; 169 | INPUT = hidden2; 170 | NUM_CLASSES = 10; 171 | WEIGHT_DECAY = 1e-6; 172 | ] 173 | -------------------------------------------------------------------------------- /profile/MnistConfig.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * MnistConfig File 3 | */ 4 | 5 | /******************************************************** 6 | * 7 | * General Parameters Config 8 | * 9 | *NON_LINEARITY = NL_SIGMOID, NL_TANH, NL_RELU, NL_LRELU 10 | *LR_POLICY = INV, STEP, FIXED 11 | ********************************************************/ 12 | 13 | BATCH_SIZE = 128; 14 | NORMALIZED_WIDTH = 0; 15 | IMAGES_SIZE = 28; 16 | CNANNELS = 1; 17 | EPOCHS = 4000; 18 | ITER_PER_EPO = 400; 19 | LR_POLICY = FIXED; 20 | 21 | 22 | 23 | /******************************************************** 24 | * 25 | * Layers Config 26 | * 27 | ********************************************************/ 28 | [ 29 | LAYER = DATA; 30 | NAME = data; 31 | ] 32 | 33 | [ 34 | LAYER = CONV; 35 | NAME = conv1; 36 | INPUT =data; 37 | KERNEL_SIZE = 5; 38 | KERNEL_AMOUNT = 64; 39 | PAD_H = 0; 40 | PAD_W = 0; 41 | STRIDE_H = 1; 42 | STRIDE_W = 1; 43 | INIT_W = 0.01; 44 | LEARN_RATE = 0.05; 45 | WEIGHT_DECAY = 1e-8; 46 | ] 47 | 48 | [ 49 | LAYER = ACTIVATION; 50 | NAME = activation1; 51 | INPUT = conv1; 52 | NON_LINEARITY = NL_RELU; 53 | ] 54 | 55 | 56 | [ 57 | LAYER = POOLING; 58 | NAME = pooling1; 59 | INPUT = activation1; 60 | POOLING_TYPE = POOL_MAX; 61 | POOLDIM = 2; 62 | PAD_H = 0; 63 | PAD_W = 0; 64 | STRIDE_H = 2; 65 | STRIDE_W = 2; 66 | ] 67 | 68 | [ 69 | LAYER = CONV; 70 | NAME = conv2; 71 | INPUT = pooling1; 72 | KERNEL_SIZE = 5; 73 | KERNEL_AMOUNT = 64; 74 | PAD_H = 0; 75 | PAD_W = 0; 76 | STRIDE_H = 1; 77 | STRIDE_W = 1; 78 | INIT_W = 0.01; 79 | LEARN_RATE = 0.05; 80 | WEIGHT_DECAY = 1e-8; 81 | ] 82 | 83 | 84 | [ 85 | LAYER = ACTIVATION; 86 | NAME = activation2; 87 | INPUT = conv2; 88 | NON_LINEARITY = NL_RELU; 89 | ] 90 | [ 91 | LAYER = CONV; 92 | NAME = conv3; 93 | INPUT = activation2; 94 | KERNEL_SIZE = 5; 95 | KERNEL_AMOUNT = 64; 96 | PAD_H = 2; 97 | PAD_W = 2; 98 | STRIDE_H = 1; 99 | STRIDE_W = 1; 100 | INIT_W = 0.01; 101 | LEARN_RATE = 0.05; 102 | WEIGHT_DECAY = 1e-8; 103 | ] 104 | 105 | [ 106 | LAYER = ACTIVATION; 107 | NAME = activation3; 108 | INPUT = conv3; 109 | NON_LINEARITY = NL_RELU; 110 | ] 111 | 112 | [ 113 | LAYER = POOLING; 114 | NAME = pooling2; 115 | INPUT = activation3; 116 | POOLING_TYPE = POOL_MAX; 117 | POOLDIM = 2; 118 | PAD_H = 0; 119 | PAD_W = 0; 120 | STRIDE_H = 2; 121 | STRIDE_W = 2; 122 | ] 123 | 124 | [ 125 | LAYER = DROPOUT; 126 | NAME = dropout1; 127 | INPUT = pooling2; 128 | DROP_RATE = 0.4; 129 | ] 130 | 131 | [ 132 | LAYER = HIDDEN; 133 | NAME = hidden1; 134 | INPUT = dropout1; 135 | NUM_HIDDEN_NEURONS = 1024; 136 | INIT_W = 0.1; 137 | LEARN_RATE = 0.05; 138 | WEIGHT_DECAY = 1e-8; 139 | ] 140 | 141 | [ 142 | LAYER = ACTIVATION; 143 | NAME = activation4; 144 | INPUT = hidden1; 145 | NON_LINEARITY = NL_RELU; 146 | ] 147 | 148 | [ 149 | LAYER = LRN; 150 | NAME = lrn; 151 | INPUT = activation4; 152 | LRNN = 5; 153 | LRNALPHA = 0.0001; 154 | LRNBETA = 0.75; 155 | ] 156 | 157 | [ 158 | LAYER = HIDDEN; 159 | NAME = hidden2; 160 | INPUT = lrn; 161 | NUM_HIDDEN_NEURONS = 10; 162 | INIT_W = 0.1; 163 | LEARN_RATE = 0.05; 164 | WEIGHT_DECAY = 1e-8; 165 | ] 166 | 167 | 168 | [ 169 | LAYER = SOFTMAX; 170 | NAME = softmax; 171 | INPUT = hidden2; 172 | NUM_CLASSES = 10; 173 | WEIGHT_DECAY = 1e-6; 174 | ] 175 | -------------------------------------------------------------------------------- /layers/VoteLayer.cu: -------------------------------------------------------------------------------- 1 | #include "VoteLayer.h" 2 | 3 | /* 4 | * Voting layer constructor 5 | * */ 6 | VoteLayer::VoteLayer(){ 7 | m_nNumOfTestData = -1; 8 | m_nClasses = -1; 9 | m_pHostVote = NULL; 10 | } 11 | 12 | VoteLayer* VoteLayer::instance(){ 13 | static VoteLayer tmp; 14 | return &tmp; 15 | } 16 | 17 | void VoteLayer::vote( int nStartPosition, int nBatchSize, float* pDevVote ) 18 | { 19 | int nRemain = nBatchSize; 20 | if( nBatchSize + nStartPosition * nBatchSize > m_nNumOfTestData ){ 21 | nRemain = m_nNumOfTestData - nStartPosition * nBatchSize; 22 | } 23 | else if ( nStartPosition * nBatchSize == m_nNumOfTestData ){ 24 | return; 25 | } 26 | //printf("startPosition %d %d %d\n", m_nNumOfTestData, nStartPosition, nRemain); 27 | 28 | if( nRemain <= 0 ) 29 | { 30 | printf("VoteLayer:vote error, nRemain = 0\n"); 31 | exit(0); 32 | } 33 | 34 | cuMatrixtmp(pDevVote, nRemain, m_nClasses, 1, true); 35 | tmp.toCpu(); 36 | 37 | if (m_pHostVote == NULL) 38 | { 39 | printf("VoteLayer has not be initialized\n"); 40 | exit(0); 41 | } 42 | 43 | for(int i = 0; i < nRemain; i++){ 44 | int nCurPosition = (nStartPosition * nBatchSize + i) * m_nClasses; 45 | //printf("%d\n", nCurPosition); 46 | if( nCurPosition >= m_nNumOfTestData * m_nClasses ){ 47 | printf(" nCurPosition >= m_nNumOfTestData\n"); 48 | exit(0); 49 | } 50 | for(int j = 0; j < m_nClasses; j++){ 51 | m_pHostVote[nCurPosition + j] += tmp.getHostData()[i * m_nClasses + j]; 52 | } 53 | } 54 | } 55 | 56 | void VoteLayer::clear() 57 | { 58 | memset(m_pHostVote, 0, m_nClasses * m_nNumOfTestData * sizeof(float)); 59 | } 60 | 61 | float VoteLayer::result() 62 | { 63 | if ( m_pHostVote == NULL ) 64 | { 65 | printf("VoteLayer has not be initialized\n"); 66 | exit(0); 67 | } 68 | 69 | float fResult = 0; 70 | //printf("%d %d\n",m_nNumOfTestData, m_nClasses); 71 | 72 | for(int i = 0; i < m_nNumOfTestData; i++){ 73 | float fMax = -1; 74 | int nIndex = -1; 75 | for(int j = 0; j < m_nClasses; j++){ 76 | float fValue = m_pHostVote[i * m_nClasses + j]; 77 | //printf("%f\n", fValue); 78 | if( fValue > fMax ){ 79 | fMax = fValue; 80 | nIndex = j; 81 | } 82 | } 83 | 84 | if( nIndex == m_pLabels[i] ) 85 | { 86 | fResult += 1.0; 87 | } 88 | } 89 | return fResult / m_nNumOfTestData; 90 | } 91 | 92 | VoteLayer::~VoteLayer() 93 | { 94 | MemoryMonitor::instanceObject()->freeCpuMemory( m_pHostVote ); 95 | } 96 | 97 | /* 98 | * Voting init 99 | * */ 100 | void VoteLayer::init(int nNumOfTestData, int nClasses, cuMatrix* pLabels) 101 | { 102 | if ( m_pHostVote == NULL ) 103 | { 104 | m_pLabels = pLabels->getHostData(); 105 | m_nClasses = nClasses; 106 | m_nNumOfTestData = nNumOfTestData; 107 | 108 | m_pHostVote = (float*) MemoryMonitor::instanceObject()->cpuMallocMemory(m_nNumOfTestData * m_nClasses * sizeof(float)); 109 | } 110 | else 111 | { 112 | printf("VoteLayers has be initialized\n"); 113 | exit(0); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /common/checkError.h: -------------------------------------------------------------------------------- 1 | /* 2 | * checkError.h 3 | * 4 | * Created on: Dec 8, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef CHECKERROR_H_ 9 | #define CHECKERROR_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | 17 | #define FatalError(s){ \ 18 | std::stringstream _where, _message; \ 19 | _where << __FILE__<<':'<<__LINE__; \ 20 | _message << std::string(s) + "\n" <<__FILE__ <<':'<<__LINE__; \ 21 | std::cerr << _message.str() <<"\nAboring..\n"; \ 22 | cudaDeviceReset(); \ 23 | exit(EXIT_FAILURE); \ 24 | } 25 | 26 | //cuda error check 27 | #define checkCudaErrors(status){ \ 28 | std::stringstream _error; \ 29 | if(0 != status) \ 30 | { \ 31 | _error<<"Cuda faliure: "< 56 | //void CHECK_EQ(T a, T b) 57 | //{ 58 | // std::string s ="NOT_EQ"; 59 | // if(a != b) 60 | // { 61 | // FatalError(s); 62 | // } 63 | //} 64 | 65 | #endif /* CHECKERROR_H_ */ 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | >CUDA-MCDNN 2 | >========== 3 | >Results 4 | >-------- 5 | >CNN accelerated by cuda and lib CUDNN v5 6 | >* The training time is greatly reduced compared to the first version 7 | >1. Test on mnist 8 | >2. Test on cifar-10 9 | 10 | >Feature 11 | >-------- 12 | >1. Use cudnn lib to develop CNN 13 | >2. Use Dropout and NetWork In NetWork(NIN) to train the NetWork 14 | >3. Use GoogLeNet Inception(no use CUDNN v5 lib) structure to train NetWork 15 | 16 | >Compile 17 | >------- 18 | >Depend on opencv and cuda 19 | >You can compile the code on windows or linux. 20 | ###SDK include path(-I) 21 | >* linux: /usr/local/cuda/samples/common/inc/ (For include file "helper_cuda"); /usr/local/include/opencv/ (Depend on situation) 22 | >* windows: X:/Program Files (x86) /NVIDIA Corporation/CUDA Samples/v6.5/common/inc (For include file "helper_cuda"); X:/Program Files/opencv/vs2010/install/include (Depend on situation) 23 | > 24 | ###Library search path(-L) 25 | >* linux: /usr/local/lib/ 26 | >* windows: X:/Program Files/opencv/vs2010/install/x86/cv10/lib (Depend on situation) 27 | > 28 | ###libraries(-l) 29 | >* ***cublas*** 30 | >* ***curand*** 31 | >* ***cudadevrt*** 32 | >* ***cudnn*** 33 | 34 | ###GPU compute 35 | >* capability 2.0 36 | 37 | ###CMake for Linux 38 | >1. mkdir build 39 | >2. cd build 40 | >3. cmake .. 41 | >4. make -j16 42 | >5. cd ../mnist/ 43 | >6. sh get_mnist.sh 44 | >7. cd ../cifar-10 45 | >8. sh get_cifar10.sh 46 | >9. cd ../ 47 | >10. ./build/CUDA-MCDNN 48 | 49 | ###Windows 50 | >1. Install vs2010. 51 | >3. Download and install cuda-5.0 or other higher versions 52 | >4. When you create a new project using VS2010, You can find NVIDIA-CUDA project template, create a cuda-project. 53 | >5. View-> Property Pages-> Configuration Properties-> CUDA C/C++ -> Device-> Code Generation-> compute_20,sm_20 54 | >6. View-> Property Pages-> Configuration Properties-> CUDA C/C++ -> Common-> Generate Relocatable Device Code-> Yes(-rdc=true) 55 | >7. View-> Property Pages-> Configuration Properties-> Linker-> Input-> Additional Dependencies-> libraries(-l) 56 | >8. View-> Property Pages-> Configuration Properties-> VC++ Directories-> General-> Library search path(-L) 57 | >9. View-> Property Pages-> Configuration Properties-> VC++ Directories-> General-> Include Directories(-I) 58 | 59 | ###Linux 60 | >1. Install opencv and cuda 61 | >2. Start the ***nsight*** from cuda 62 | >3. Create an 'empty cuda' project and import the clone code 63 | >4. Project->Proerties for add-> Build-> Settings->CUDA->Device linker mode: separate compilation 64 | >5. Project->Proerties for add-> Build-> Settings->CUDA->Generate PTX code 2.0 65 | >6. Project->Proerties for add-> Build-> Settings->CUDA->Generate GPU code 2.0 66 | >7. Project->Proerties for add-> Build-> Settings->Tool Settings->NVCC Compiler->includes: +/usr/local/cuda/samples/common/inc/; 67 | >8. Project->Proerties for add-> Build-> Settings->Tool Settings->NVCC Linkers->Libraries: libraries(-l) 68 | >9. Project->Proerties for add-> Build-> Settings->Tool Settings->NVCC Linkers->Libraries search path(-L): /usr/local/lib/ 69 | 70 | *** 71 | >Config 72 | >1. MNIST 73 | >2. CIFAR-10 74 | *** 75 | 76 | >Informations 77 | >------------ 78 | >* Author :tdx 79 | >* Mail :sa614149@mail.ustc.edu.cn 80 | >* Welcome for any suggest!! 81 | 82 | -------------------------------------------------------------------------------- /profile/dynamic_g_profile/dynamic_begin/Cifar10Config.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * Cifar10Config File 3 | */ 4 | 5 | /******************************************************** 6 | * 7 | * General Parameters Config 8 | * 9 | *NON_LINEARITY = NL_SIGMOID, NL_TANH, NL_RELU 10 | *LR_POLICY = INV, STEP, FIXED 11 | ********************************************************/ 12 | 13 | BATCH_SIZE = 100; 14 | NORMALIZED_WIDTH = 0; 15 | IMAGES_SIZE = 32; 16 | CNANNELS = 3; 17 | EPOCHS = 4000; 18 | ITER_PER_EPO = 500; 19 | LR_POLICY = INV; 20 | 21 | 22 | 23 | /******************************************************** 24 | * 25 | * Layers Config 26 | * 27 | ********************************************************/ 28 | [ 29 | LAYER = DATA; 30 | NAME = data; 31 | ] 32 | 33 | [ 34 | LAYER = CONV; 35 | NAME = conv1; 36 | INPUT = data; 37 | KERNEL_SIZE = 3; 38 | KERNEL_AMOUNT = 192; 39 | PAD_H = 1; 40 | PAD_W = 1; 41 | STRIDE_H = 1; 42 | STRIDE_W = 1; 43 | INIT_W = 0.01; 44 | LEARN_RATE = 0.05; 45 | WEIGHT_DECAY = 0.0001; 46 | ] 47 | [ 48 | LAYER = CONV; 49 | NAME = conv2; 50 | INPUT = conv1; 51 | KERNEL_SIZE = 3; 52 | KERNEL_AMOUNT = 192; 53 | PAD_H = 1; 54 | PAD_W = 1; 55 | STRIDE_H = 1; 56 | STRIDE_W = 1; 57 | INIT_W = 0.01; 58 | LEARN_RATE = 0.05; 59 | WEIGHT_DECAY = 0.0001; 60 | ] 61 | 62 | [ 63 | LAYER = ACTIVATION; 64 | NAME = activation1; 65 | INPUT = conv2; 66 | NON_LINEARITY = NL_RELU; 67 | ] 68 | [ 69 | LAYER = POOLING; 70 | NAME = pooling1; 71 | INPUT = activation1; 72 | POOLING_TYPE = POOL_MAX; 73 | POOLDIM = 2; 74 | PAD_H = 0; 75 | PAD_W = 0; 76 | STRIDE_H = 2; 77 | STRIDE_W = 2; 78 | ] 79 | 80 | [ 81 | LAYER = CONV; 82 | NAME = conv4; 83 | INPUT = pooling1; 84 | KERNEL_SIZE = 3; 85 | KERNEL_AMOUNT = 192; 86 | PAD_H = 0; 87 | PAD_W = 0; 88 | STRIDE_H = 1; 89 | STRIDE_W = 1; 90 | INIT_W = 0.1; 91 | LEARN_RATE = 0.02; 92 | WEIGHT_DECAY = 0.0001; 93 | ] 94 | [ 95 | LAYER = ACTIVATION; 96 | NAME = activation3; 97 | INPUT = conv4; 98 | NON_LINEARITY = NL_RELU; 99 | ] 100 | 101 | [ 102 | LAYER = CONV; 103 | NAME = conv6; 104 | INPUT = activation3; 105 | KERNEL_SIZE = 3; 106 | KERNEL_AMOUNT = 192; 107 | PAD_H = 1; 108 | PAD_W = 1; 109 | STRIDE_H = 1; 110 | STRIDE_W = 1; 111 | INIT_W = 0.1; 112 | LEARN_RATE = 0.02; 113 | WEIGHT_DECAY = 0.0001; 114 | ] 115 | 116 | [ 117 | LAYER = ACTIVATION; 118 | NAME = activation5; 119 | INPUT = conv6; 120 | NON_LINEARITY = NL_RELU; 121 | ] 122 | 123 | [ 124 | LAYER = POOLING; 125 | NAME = pooling2; 126 | INPUT = activation5; 127 | POOLING_TYPE = POOL_MAX; 128 | POOLDIM = 2; 129 | PAD_H = 0; 130 | PAD_W = 0; 131 | STRIDE_H = 2; 132 | STRIDE_W = 2; 133 | ] 134 | 135 | [ 136 | LAYER = DROPOUT; 137 | NAME = dropout1; 138 | INPUT = pooling2; 139 | DROP_RATE = 0.5; 140 | ] 141 | 142 | [ 143 | LAYER = CONV; 144 | NAME = conv8; 145 | INPUT = dropout1; 146 | KERNEL_SIZE = 1; 147 | KERNEL_AMOUNT = 192; 148 | PAD_H = 0; 149 | PAD_W = 0; 150 | STRIDE_H = 1; 151 | STRIDE_W = 1; 152 | INIT_W = 0.1; 153 | LEARN_RATE = 0.02; 154 | WEIGHT_DECAY = 0.0001; 155 | ] 156 | 157 | [ 158 | LAYER = ACTIVATION; 159 | NAME = activation7; 160 | INPUT = conv8; 161 | NON_LINEARITY = NL_RELU; 162 | ] 163 | 164 | [ 165 | LAYER = CONV; 166 | NAME = conv10; 167 | INPUT = activation7; 168 | KERNEL_SIZE = 1; 169 | KERNEL_AMOUNT = 10; 170 | PAD_H = 0; 171 | PAD_W = 0; 172 | STRIDE_H = 1; 173 | STRIDE_W = 1; 174 | INIT_W = 0.1; 175 | LEARN_RATE = 0.02; 176 | WEIGHT_DECAY = 0.0001; 177 | ] 178 | 179 | [ 180 | LAYER = ACTIVATION; 181 | NAME = activation9; 182 | INPUT = conv10; 183 | NON_LINEARITY = NL_RELU; 184 | ] 185 | 186 | [ 187 | LAYER = POOLING; 188 | NAME = pooling3; 189 | INPUT = activation9; 190 | POOLING_TYPE = POOL_AVE_EXCLUDE_PAD; 191 | POOLDIM = 7; 192 | PAD_H = 0; 193 | PAD_W = 0; 194 | STRIDE_H = 7; 195 | STRIDE_W = 7; 196 | ] 197 | [ 198 | LAYER = SOFTMAX; 199 | NAME = softmax; 200 | INPUT = pooling3; 201 | NUM_CLASSES = 10; 202 | WEIGHT_DECAY = 0.0001; 203 | ] 204 | -------------------------------------------------------------------------------- /cuDNN_netWork.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cuDNN_netWork.h 3 | * 4 | * Created on: Dec 8, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef CUDNN_NETWORK_H_ 9 | #define CUDNN_NETWORK_H_ 10 | 11 | #include"./layers/ConvLayer.h" 12 | #include"./layers/HiddenLayer.h" 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include"./common/checkError.h" 18 | 19 | void setTensorDesc(cudnnTensorDescriptor_t& tensorDesc, 20 | cudnnTensorFormat_t& tensorFormat, 21 | cudnnDataType_t& dataType, 22 | int n, 23 | int c, 24 | int h, 25 | int w); 26 | 27 | void matrixMulti(cublasHandle_t cublasHandle, 28 | int m, 29 | int n, 30 | int batchSize, 31 | float alpha, 32 | const float*A, 33 | const float*x, 34 | float beta, 35 | float *y); 36 | 37 | template 38 | class cuDNN_netWork 39 | { 40 | public: 41 | cudnnDataType_t& GetDataType() 42 | { 43 | return dataType; 44 | } 45 | 46 | cudnnTensorFormat_t& GetTensorFormat() 47 | { 48 | return tensorFormat; 49 | } 50 | 51 | cudnnHandle_t& GetcudnnHandle() 52 | { 53 | return cudnnHandle; 54 | } 55 | 56 | cublasHandle_t& GetcublasHandle() 57 | { 58 | return cublasHandle; 59 | } 60 | 61 | int& getConvFwdAlgorithm() 62 | { 63 | return convFwdAlgorithm; 64 | } 65 | 66 | int& getConvolutionBwdFilterAlgorithm() 67 | { 68 | return convBwdFilterAlgorithm; 69 | } 70 | 71 | int& getConvolutionBwdDataAlgorithm() 72 | { 73 | return convBwdDataAlgorithm; 74 | } 75 | 76 | void setConvolutionFwdAlgorithm(const cudnnConvolutionFwdAlgo_t& algo) 77 | { 78 | convFwdAlgorithm = static_cast(algo); 79 | } 80 | 81 | void setConvolutionBwdFilterAlgorithm(const cudnnConvolutionBwdFilterAlgo_t& algo) 82 | { 83 | convBwdFilterAlgorithm = static_cast(algo); 84 | } 85 | 86 | void setConvolutionBwdDataAlgorithm(const cudnnConvolutionBwdDataAlgo_t& algo) 87 | { 88 | convBwdDataAlgorithm = static_cast(algo); 89 | } 90 | 91 | private: 92 | int convFwdAlgorithm; 93 | int convBwdFilterAlgorithm; 94 | int convBwdDataAlgorithm; 95 | /*inlcude 3 type:float32、double64、float16*/ 96 | cudnnDataType_t dataType; 97 | cudnnTensorFormat_t tensorFormat; 98 | cudnnHandle_t cudnnHandle; 99 | cublasHandle_t cublasHandle; 100 | 101 | void createHandles() 102 | { 103 | checkCUDNN(cudnnCreate(&cudnnHandle)); 104 | checkCublasErrors(cublasCreate(&cublasHandle)); 105 | } 106 | 107 | void destroyHandles() 108 | { 109 | checkCUDNN(cudnnDestroy(cudnnHandle)); 110 | checkCublasErrors(cublasDestroy(cublasHandle)); 111 | } 112 | 113 | public: 114 | static cuDNN_netWork* instanceObject() 115 | { 116 | static cuDNN_netWork* cudnn = new cuDNN_netWork(); 117 | return cudnn; 118 | } 119 | cuDNN_netWork() 120 | { 121 | convFwdAlgorithm = -1; 122 | convBwdFilterAlgorithm = -1; 123 | convBwdDataAlgorithm = -1; 124 | 125 | switch(sizeof(T)) 126 | { 127 | case 4: 128 | dataType = CUDNN_DATA_FLOAT;break; 129 | case 8: 130 | dataType = CUDNN_DATA_DOUBLE;break; 131 | case 2: 132 | dataType = CUDNN_DATA_HALF;break; 133 | default:FatalError("Unsupported data type");break; 134 | } 135 | /*format type*/ 136 | tensorFormat = CUDNN_TENSOR_NCHW; 137 | createHandles(); 138 | } 139 | ~cuDNN_netWork() 140 | { 141 | destroyHandles(); 142 | } 143 | }; 144 | 145 | #endif /* CUDNN_NETWORK_H_ */ 146 | -------------------------------------------------------------------------------- /tests/test_layer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include"test_layer.h" 4 | #include 5 | #include"../common/checkError.h" 6 | #include"../common/MemoryMonitor.h" 7 | 8 | using namespace std; 9 | 10 | /*judge array equal or not*/ 11 | 12 | void CHECK_HOST_MATRIX_EQ(float*A, int sizeA, float*B, int sizeB) 13 | { 14 | string s ="NOT_EQ"; 15 | if(sizeA != sizeB) FatalError(s); 16 | cout<cpuMallocMemory(sizeB); 28 | checkCudaErrors(cudaMemcpy(tmpB, B, sizeB, cudaMemcpyDeviceToHost)); 29 | CHECK_HOST_MATRIX_EQ(A, sizeA, tmpB, sizeB); 30 | } 31 | 32 | /*printf hostdata,channels,height,width*/ 33 | //template 34 | void printf_HostParameter(int number, int channels, int height,int width, float*A) 35 | { 36 | for(int n=0; ncpuMallocMemory(number*channels*height*width*sizeof(float)); 60 | checkCudaErrors(cudaMemcpy(tmpA, A,number*channels*height*width*sizeof(float), cudaMemcpyDeviceToHost)); 61 | 62 | for(int n=0; ncpuMallocMemory(number * channels * height * width * sizeof(float)); 85 | checkCudaErrors(cudaMemcpy(hostData, devData, number * channels * height * width * sizeof(float), cudaMemcpyDeviceToHost)); 86 | } 87 | 88 | void copy_HostToDevice(float*hostData, float*&devData, int number, int channels, int height, int width) 89 | { 90 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**)&devData, number * channels * height * width * sizeof(float)); 91 | checkCudaErrors(cudaMemcpy(devData, hostData, number * channels * height * width * sizeof(float), cudaMemcpyHostToDevice)); 92 | } 93 | 94 | void printfLayersParameter(LayersBase* layer) 95 | { 96 | cout<<"name: "<_name<_inputName<number<channels<height<width<inputAmount<inputImageDim<number, layer->channels, layer->height, layer->width, layer->dstData); 105 | 106 | for(;;){} 107 | } 108 | -------------------------------------------------------------------------------- /cuDNN_netWork.cpp: -------------------------------------------------------------------------------- 1 | #include"./cuDNN_netWork.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include"./common/checkError.h" 7 | 8 | 9 | 10 | //#define ND_TENSOR_DESCRIPTOR 11 | 12 | /*cudnn set tensor dim*/ 13 | void setTensorDesc(cudnnTensorDescriptor_t& tensorDesc, 14 | cudnnTensorFormat_t& tensorFormat, 15 | cudnnDataType_t& dataType, 16 | int n, 17 | int c, 18 | int h, 19 | int w){ 20 | 21 | #if SIMPLE_TENSOR_DESCRIPTOR 22 | /*cudnn set 4d tensor*/ 23 | checkCUDNN(cudnnSetTensor4dDescriptor(tensorDesc, 24 | tensorFormat, 25 | dataType, 26 | n, 27 | c, 28 | h, 29 | w)); 30 | 31 | #elif defined(ND_TENSOR_DESCRIPTOR) 32 | 33 | const int nDim = 4; 34 | int dimA[nDim] = {n,c,h,w}; 35 | int strideA[nDim] = {c*h*w, h*w, w, 1}; 36 | checkCUDNN(cudnnSetTensorNdDescriptor(tensorDesc, 37 | dataType, 38 | 4, 39 | dimA, 40 | strideA)); 41 | 42 | #else 43 | checkCUDNN(cudnnSetTensor4dDescriptorEx(tensorDesc, 44 | dataType, 45 | n, 46 | c, 47 | h, 48 | w, 49 | c*h*w, 50 | h*w, 51 | w, 52 | 1)); 53 | 54 | #endif 55 | } 56 | 57 | /*matrixMulti*/ 58 | #define DISABLE_GEMV 59 | void matrixMulti(cublasHandle_t cublasHandle, 60 | int m, 61 | int n, 62 | int batchSize, 63 | float alpha, 64 | const float*A, 65 | const float*x, 66 | float beta, 67 | float *y) 68 | { 69 | #ifdef DISABLE_GEMV 70 | checkCublasErrors(cublasSgemm(cublasHandle, 71 | CUBLAS_OP_T, 72 | CUBLAS_OP_T, 73 | n, 74 | batchSize, 75 | m, 76 | &alpha, 77 | x, 78 | m, 79 | A, 80 | batchSize, 81 | &beta, 82 | y, 83 | n)); 84 | 85 | #else 86 | checkCublasErrors(cublasSgemv(cublasHandle, CUBLAS_OP_T, 87 | m, n, 88 | &alpha, 89 | A, m, 90 | x, 1, 91 | &beta, 92 | y, 1)); 93 | #endif 94 | } 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /profile/dynamic_g_profile/dynamic_begin/Cifar100Config.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * Cifar10Config File 3 | */ 4 | 5 | /******************************************************** 6 | * 7 | * General Parameters Config 8 | * 9 | *NON_LINEARITY = NL_SIGMOID, NL_TANH, NL_RELU 10 | *LR_POLICY = INV, STEP, FIXED 11 | ********************************************************/ 12 | 13 | BATCH_SIZE = 100; 14 | NORMALIZED_WIDTH = 0; 15 | IMAGES_SIZE = 32; 16 | CNANNELS = 3; 17 | EPOCHS = 4000; 18 | ITER_PER_EPO = 500; 19 | LR_POLICY = INV; 20 | 21 | 22 | 23 | /******************************************************** 24 | * 25 | * Layers Config 26 | * 27 | ********************************************************/ 28 | [ 29 | LAYER = DATA; 30 | NAME = data; 31 | ] 32 | 33 | [ 34 | LAYER = CONV; 35 | NAME = conv1; 36 | INPUT = data; 37 | KERNEL_SIZE = 3; 38 | KERNEL_AMOUNT = 192; 39 | PAD_H = 1; 40 | PAD_W = 1; 41 | STRIDE_H = 1; 42 | STRIDE_W = 1; 43 | INIT_W = 0.01; 44 | LEARN_RATE = 0.05; 45 | WEIGHT_DECAY = 0.0001; 46 | ] 47 | [ 48 | LAYER = CONV; 49 | NAME = conv2; 50 | INPUT = conv1; 51 | KERNEL_SIZE = 3; 52 | KERNEL_AMOUNT = 192; 53 | PAD_H = 1; 54 | PAD_W = 1; 55 | STRIDE_H = 1; 56 | STRIDE_W = 1; 57 | INIT_W = 0.01; 58 | LEARN_RATE = 0.05; 59 | WEIGHT_DECAY = 0.0001; 60 | ] 61 | 62 | [ 63 | LAYER = ACTIVATION; 64 | NAME = activation1; 65 | INPUT = conv2; 66 | NON_LINEARITY = NL_RELU; 67 | ] 68 | [ 69 | LAYER = POOLING; 70 | NAME = pooling1; 71 | INPUT = activation1; 72 | POOLING_TYPE = POOL_MAX; 73 | POOLDIM = 2; 74 | PAD_H = 0; 75 | PAD_W = 0; 76 | STRIDE_H = 2; 77 | STRIDE_W = 2; 78 | ] 79 | 80 | 81 | [ 82 | LAYER = CONV; 83 | NAME = conv4; 84 | INPUT = pooling1; 85 | KERNEL_SIZE = 3; 86 | KERNEL_AMOUNT = 192; 87 | PAD_H = 0; 88 | PAD_W = 0; 89 | STRIDE_H = 1; 90 | STRIDE_W = 1; 91 | INIT_W = 0.1; 92 | LEARN_RATE = 0.02; 93 | WEIGHT_DECAY = 0.0001; 94 | ] 95 | [ 96 | LAYER = ACTIVATION; 97 | NAME = activation3; 98 | INPUT = conv4; 99 | NON_LINEARITY = NL_RELU; 100 | ] 101 | 102 | [ 103 | LAYER = CONV; 104 | NAME = conv6; 105 | INPUT = activation3; 106 | KERNEL_SIZE = 3; 107 | KERNEL_AMOUNT = 192; 108 | PAD_H = 1; 109 | PAD_W = 1; 110 | STRIDE_H = 1; 111 | STRIDE_W = 1; 112 | INIT_W = 0.1; 113 | LEARN_RATE = 0.02; 114 | WEIGHT_DECAY = 0.0001; 115 | ] 116 | 117 | [ 118 | LAYER = ACTIVATION; 119 | NAME = activation5; 120 | INPUT = conv6; 121 | NON_LINEARITY = NL_RELU; 122 | ] 123 | 124 | [ 125 | LAYER = DROPOUT; 126 | NAME = dropout1; 127 | INPUT = activation5; 128 | DROP_RATE = 0.5; 129 | ] 130 | 131 | 132 | [ 133 | LAYER = CONV; 134 | NAME = conv8; 135 | INPUT = dropout1; 136 | KERNEL_SIZE = 3; 137 | KERNEL_AMOUNT = 192; 138 | PAD_H = 0; 139 | PAD_W = 0; 140 | STRIDE_H = 1; 141 | STRIDE_W = 1; 142 | INIT_W = 0.1; 143 | LEARN_RATE = 0.02; 144 | WEIGHT_DECAY = 0.0001; 145 | ] 146 | 147 | [ 148 | LAYER = ACTIVATION; 149 | NAME = activation7; 150 | INPUT = conv8; 151 | NON_LINEARITY = NL_RELU; 152 | ] 153 | 154 | [ 155 | LAYER = POOLING; 156 | NAME = pooling2; 157 | INPUT = activation7; 158 | POOLING_TYPE = POOL_MAX; 159 | POOLDIM = 2; 160 | PAD_H = 0; 161 | PAD_W = 0; 162 | STRIDE_H = 2; 163 | STRIDE_W = 2; 164 | ] 165 | 166 | [ 167 | LAYER = CONV; 168 | NAME = conv10; 169 | INPUT = pooling2; 170 | KERNEL_SIZE = 1; 171 | KERNEL_AMOUNT = 192; 172 | PAD_H = 0; 173 | PAD_W = 0; 174 | STRIDE_H = 1; 175 | STRIDE_W = 1; 176 | INIT_W = 0.1; 177 | LEARN_RATE = 0.02; 178 | WEIGHT_DECAY = 0.0001; 179 | ] 180 | 181 | [ 182 | LAYER = ACTIVATION; 183 | NAME = activation9; 184 | INPUT = conv10; 185 | NON_LINEARITY = NL_RELU; 186 | ] 187 | 188 | 189 | [ 190 | LAYER = CONV; 191 | NAME = conv12; 192 | INPUT = activation9; 193 | KERNEL_SIZE = 1; 194 | KERNEL_AMOUNT = 100; 195 | PAD_H = 0; 196 | PAD_W = 0; 197 | STRIDE_H = 1; 198 | STRIDE_W = 1; 199 | INIT_W = 0.1; 200 | LEARN_RATE = 0.02; 201 | WEIGHT_DECAY = 0.0001; 202 | ] 203 | 204 | [ 205 | LAYER = ACTIVATION; 206 | NAME = activation11; 207 | INPUT = conv12; 208 | NON_LINEARITY = NL_RELU; 209 | ] 210 | 211 | [ 212 | LAYER = POOLING; 213 | NAME = pooling3; 214 | INPUT = activation11; 215 | POOLING_TYPE = POOL_AVE_INCLUDE_PAD; 216 | POOLDIM = 6; 217 | PAD_H = 0; 218 | PAD_W = 0; 219 | STRIDE_H = 6; 220 | STRIDE_W = 6; 221 | ] 222 | [ 223 | LAYER = SOFTMAX; 224 | NAME = softmax; 225 | INPUT = pooling3; 226 | NUM_CLASSES = 100; 227 | WEIGHT_DECAY = 0.0001; 228 | ] 229 | -------------------------------------------------------------------------------- /common/cuMatrix.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cuMatrix.h 3 | * 4 | * Created on: Nov 19, 2015 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef CUMATRIX_H_ 9 | #define CUMATRIX_H_ 10 | 11 | #include"MemoryMonitor.h" 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include"checkError.h" 17 | 18 | /*row major*/ 19 | template 20 | class cuMatrix 21 | { 22 | public: 23 | cuMatrix(T * _data, int _r, int _c, int _ch, bool _isGpuData = false):rows(_r),cols(_c),channels(_ch),hostData(NULL),devData(NULL){ 24 | if(false == _isGpuData){ 25 | /*allocate host memory*/ 26 | mallocHostMemory(); 27 | /*deep copy*/ 28 | memcpy(hostData,_data,rows * cols *sizeof(*hostData) * channels); 29 | } 30 | else{ 31 | mallocDeviceMemory(); 32 | MemoryMonitor::instanceObject()->gpu2gpu(devData, _data, rows * cols * sizeof(*devData) * channels ); 33 | } 34 | } 35 | 36 | /*constructed function with rows and cols*/ 37 | cuMatrix(int _r, int _c, int _ch):rows(_r),cols(_c), channels(_ch),hostData(NULL), devData(NULL){} 38 | 39 | /*destructor*/ 40 | ~cuMatrix() 41 | { 42 | if(NULL != hostData) 43 | { 44 | MemoryMonitor::instanceObject()->freeCpuMemory(hostData); 45 | } 46 | if(NULL != devData) 47 | { 48 | MemoryMonitor::instanceObject()->freeGpuMemory(devData); 49 | } 50 | } 51 | 52 | /*set value*/ 53 | void setValue(int i, int j, int k, T v) 54 | { 55 | mallocHostMemory(); 56 | hostData[(i* cols + j) + rows * cols * k] = v; 57 | } 58 | 59 | /*get value*/ 60 | T getValue(int i, int j, int k) 61 | { 62 | return hostData[(i * cols+ j) + rows * cols * k]; 63 | } 64 | 65 | /*copy the host data to device*/ 66 | void toGpu() 67 | { 68 | mallocHostMemory(); 69 | mallocDeviceMemory(); 70 | checkCudaErrors(cudaMemcpy(devData, hostData, rows * cols * sizeof(*devData) * channels,cudaMemcpyHostToDevice)); 71 | } 72 | 73 | /*copy the data to host*/ 74 | void toCpu() 75 | { 76 | mallocDeviceMemory(); 77 | mallocHostMemory(); 78 | checkCudaErrors(cudaMemcpy(hostData, devData, sizeof(*hostData)*rows* cols * channels,cudaMemcpyDeviceToHost)); 79 | } 80 | 81 | /*get the number of value*/ 82 | int getLength() 83 | { 84 | return rows * cols * channels; 85 | } 86 | 87 | int getArea() 88 | { 89 | return rows * cols; 90 | } 91 | 92 | T* &getHostData() 93 | { 94 | return hostData; 95 | } 96 | 97 | /*get device data*/ 98 | T* &getDeviceData() 99 | { 100 | return devData; 101 | } 102 | 103 | T* &getHost() 104 | { 105 | mallocHostMemory(); 106 | return hostData; 107 | } 108 | 109 | T* &getDev() 110 | { 111 | mallocDeviceMemory(); 112 | return devData; 113 | } 114 | 115 | public: 116 | int rows; 117 | int cols; 118 | int channels; 119 | 120 | private: 121 | /*host data*/ 122 | T *hostData; 123 | /*device data*/ 124 | T *devData; 125 | 126 | private: 127 | /*allocate host memory*/ 128 | void mallocHostMemory() 129 | { 130 | if(NULL == hostData) 131 | { 132 | hostData=(T*)MemoryMonitor::instanceObject()->cpuMallocMemory(rows * cols * sizeof(*hostData) * channels); 133 | if(!hostData) 134 | { 135 | printf("cuMatrix:cuMatrix host Memory allocation Failed\n"); 136 | exit(0); 137 | } 138 | /*init allocation memory*/ 139 | memset(hostData,0,rows * cols * channels * sizeof(*hostData)); 140 | } 141 | } 142 | 143 | /*allocate device memory*/ 144 | void mallocDeviceMemory() 145 | { 146 | if(NULL == devData) 147 | { 148 | /*malloc device data*/ 149 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**)&devData, rows * cols * channels * sizeof(*devData)); 150 | checkCudaErrors(cudaMemset(devData, 0, rows * cols * channels * sizeof(*devData))); 151 | } 152 | } 153 | }; 154 | 155 | 156 | #endif /* CUMATRIX_H_ */ 157 | -------------------------------------------------------------------------------- /layers/LayersBase.cpp: -------------------------------------------------------------------------------- 1 | #include"LayersBase.h" 2 | #include"../config/config.h" 3 | 4 | /* 5 | * Set branch index 6 | * */ 7 | void LayersBase::setCurBranchIndex(int nIndex) 8 | { 9 | m_nCurBranchIndex = nIndex; 10 | } 11 | 12 | /* 13 | * Adjust learning rate 14 | * */ 15 | void LayersBase::adjust_learnRate(int index, double lr_gamma, double lr_power) 16 | { 17 | lrate = static_cast(lrate * pow((1.0 + lr_gamma * index), (-lr_power))); 18 | } 19 | 20 | /*Insert previous layer*/ 21 | void LayersBase::insertPrevLayer(LayersBase* layer) 22 | { 23 | prevLayer.push_back(layer); 24 | } 25 | 26 | /* 27 | * Insert next layer 28 | * */ 29 | void LayersBase::insertNextlayer(LayersBase* layer) 30 | { 31 | nextLayer.push_back(layer); 32 | } 33 | 34 | /* 35 | * Learning rate reduce 36 | * */ 37 | void LayersBase::rateReduce() 38 | { 39 | // if( lrate < 1 ) 40 | // { 41 | // if( m_fReduceRate <= 1.0f) 42 | // lrate /= 2.0f; 43 | // else if( m_fReduceRate <= 2.0f) 44 | // lrate /= 2.5f; 45 | // else if( m_fReduceRate <= 5.0f) 46 | // lrate /= 3.0f; 47 | // lrate /= m_fReduceRate; 48 | // } 49 | lrate /= m_fReduceRate; 50 | } 51 | 52 | void LayersBase::setRateReduce( float fReduce) 53 | { 54 | m_fReduceRate = fReduce; 55 | } 56 | 57 | float LayersBase::getRateReduce() 58 | { 59 | return m_fReduceRate; 60 | } 61 | 62 | /* 63 | * Get one layer from map 64 | * */ 65 | LayersBase* Layers::getLayer(string name) 66 | { 67 | if(_layersMaps.find(name) != _layersMaps.end()) 68 | { 69 | return _layersMaps[name]; 70 | }else 71 | { 72 | printf("layer: get layer %s is not exist\n",name.c_str()); 73 | exit(0); 74 | return NULL; 75 | } 76 | } 77 | 78 | /* 79 | *The layer is in Map or no 80 | * */ 81 | bool Layers::hasLayer(string name) 82 | { 83 | if(_layersMaps.find(name) != _layersMaps.end()) 84 | { 85 | return true; 86 | }else 87 | { 88 | return false; 89 | } 90 | } 91 | 92 | /* 93 | * Story the layername into vector 94 | * */ 95 | void Layers::storLayersName(string name) 96 | { 97 | _layersName.push_back(name); 98 | } 99 | 100 | /* 101 | * Story the layers into map 102 | * */ 103 | void Layers::storLayers(string prev_name, string name, LayersBase* layer) 104 | { 105 | if(_layersMaps.find(name) == _layersMaps.end()) 106 | { 107 | _layersMaps[name] = layer; 108 | storLayersName(name); 109 | 110 | /*create a linked list*/ 111 | if(string("NULL") == prev_name) 112 | { 113 | _layersMaps[name]->prevLayer.clear(); 114 | _layersMaps[name]->_inputName = " "; 115 | }else 116 | { 117 | _layersMaps[name]->_inputName = prev_name; 118 | //cout<<"prevName: "<insertNextlayer( _layersMaps[name] ); 120 | _layersMaps[name]->insertPrevLayer(_layersMaps[prev_name]); 121 | } 122 | 123 | }else 124 | { 125 | printf("layers: the layer %s have already in layersMap\n",name.c_str()); 126 | exit(0); 127 | } 128 | } 129 | 130 | /* 131 | * overload 132 | * Linear storage layer 133 | */ 134 | void Layers::storLayers(string name, LayersBase* layer) 135 | { 136 | if(_layersMaps.find(name) == _layersMaps.end()) 137 | { 138 | _layersMaps[name] = layer; 139 | storLayersName(name); 140 | 141 | /*create a linked list*/ 142 | if(_layersMaps.size() == 1) 143 | { 144 | _layersMaps[name]->prevLayer.clear(); 145 | _layersMaps[name]->_inputName = " "; 146 | 147 | }else 148 | { 149 | _layersMaps[name]->_inputName = _layersMaps[_layersName[_layersName.size() - 2]]->_name; 150 | _layersMaps[_layersName[_layersName.size() -2 ]]->insertNextlayer( _layersMaps[name] ); 151 | _layersMaps[name]->insertPrevLayer(_layersMaps[_layersName[_layersName.size() - 2]]); 152 | } 153 | }else 154 | { 155 | printf("layers: the layer %s have already in layersMap\n",name.c_str()); 156 | exit(0); 157 | 158 | } 159 | } 160 | 161 | /*Get the name of layer*/ 162 | string Layers::getLayersName(int index) 163 | { 164 | if(index >= _layersName.size()) 165 | { 166 | printf("layers: the index %d has already out of layersName size\n", index); 167 | exit(0); 168 | }else 169 | { 170 | return _layersName[index]; 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /profile/Cifar10Config.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * Cifar10Config File 3 | */ 4 | 5 | /******************************************************** 6 | * 7 | * General Parameters Config 8 | * 9 | *NON_LINEARITY = NL_SIGMOID, NL_TANH, NL_RELU NL_LRELU 10 | *LR_POLICY = INV, STEP, FIXED 11 | ********************************************************/ 12 | 13 | BATCH_SIZE = 100; 14 | NORMALIZED_WIDTH = 0; 15 | IMAGES_SIZE = 32; 16 | CNANNELS = 3; 17 | EPOCHS = 4000; 18 | ITER_PER_EPO = 500; 19 | LR_POLICY = INV; 20 | 21 | 22 | 23 | /******************************************************** 24 | * 25 | * Layers Config 26 | * 27 | ********************************************************/ 28 | [ 29 | LAYER = DATA; 30 | NAME = data; 31 | ] 32 | 33 | [ 34 | LAYER = CONV; 35 | NAME = conv1; 36 | INPUT = data; 37 | KERNEL_SIZE = 3; 38 | KERNEL_AMOUNT = 192; 39 | PAD_H = 1; 40 | PAD_W = 1; 41 | STRIDE_H = 1; 42 | STRIDE_W = 1; 43 | INIT_W = 0.01; 44 | LEARN_RATE = 0.05; 45 | WEIGHT_DECAY = 0.0001; 46 | ] 47 | [ 48 | LAYER = CONV; 49 | NAME = conv2; 50 | INPUT = conv1; 51 | KERNEL_SIZE = 3; 52 | KERNEL_AMOUNT = 192; 53 | PAD_H = 1; 54 | PAD_W = 1; 55 | STRIDE_H = 1; 56 | STRIDE_W = 1; 57 | INIT_W = 0.01; 58 | LEARN_RATE = 0.05; 59 | WEIGHT_DECAY = 0.0001; 60 | ] 61 | 62 | [ 63 | LAYER = ACTIVATION; 64 | NAME = activation1; 65 | INPUT = conv2; 66 | NON_LINEARITY = NL_RELU; 67 | ] 68 | [ 69 | LAYER = POOLING; 70 | NAME = pooling1; 71 | INPUT = activation1; 72 | POOLING_TYPE = POOL_MAX; 73 | POOLDIM = 2; 74 | PAD_H = 0; 75 | PAD_W = 0; 76 | STRIDE_H = 2; 77 | STRIDE_W = 2; 78 | ] 79 | 80 | [ 81 | LAYER = CONV; 82 | NAME = conv3; 83 | INPUT = pooling1; 84 | KERNEL_SIZE = 3; 85 | KERNEL_AMOUNT = 192; 86 | PAD_H = 0; 87 | PAD_W = 0; 88 | STRIDE_H = 1; 89 | STRIDE_W = 1; 90 | INIT_W = 0.01; 91 | LEARN_RATE = 0.02; 92 | WEIGHT_DECAY = 0.0001; 93 | ] 94 | 95 | [ 96 | LAYER = ACTIVATION; 97 | NAME = activation2; 98 | INPUT = conv3; 99 | NON_LINEARITY = NL_RELU; 100 | ] 101 | 102 | [ 103 | LAYER = CONV; 104 | NAME = conv4; 105 | INPUT = activation2; 106 | KERNEL_SIZE = 3; 107 | KERNEL_AMOUNT = 192; 108 | PAD_H = 0; 109 | PAD_W = 0; 110 | STRIDE_H = 1; 111 | STRIDE_W = 1; 112 | INIT_W = 0.1; 113 | LEARN_RATE = 0.02; 114 | WEIGHT_DECAY = 0.0001; 115 | ] 116 | [ 117 | LAYER = ACTIVATION; 118 | NAME = activation3; 119 | INPUT = conv4; 120 | NON_LINEARITY = NL_RELU; 121 | ] 122 | 123 | [ 124 | LAYER = CONV; 125 | NAME = conv5; 126 | INPUT = activation3; 127 | KERNEL_SIZE = 3; 128 | KERNEL_AMOUNT = 192; 129 | PAD_H = 0; 130 | PAD_W = 0; 131 | STRIDE_H = 1; 132 | STRIDE_W = 1; 133 | INIT_W = 0.1; 134 | LEARN_RATE = 0.02; 135 | WEIGHT_DECAY = 0.0001; 136 | ] 137 | 138 | 139 | [ 140 | LAYER = ACTIVATION; 141 | NAME = activation4; 142 | INPUT = conv5; 143 | NON_LINEARITY = NL_RELU; 144 | ] 145 | 146 | [ 147 | LAYER = CONV; 148 | NAME = conv6; 149 | INPUT = activation4; 150 | KERNEL_SIZE = 3; 151 | KERNEL_AMOUNT = 192; 152 | PAD_H = 1; 153 | PAD_W = 1; 154 | STRIDE_H = 1; 155 | STRIDE_W = 1; 156 | INIT_W = 0.1; 157 | LEARN_RATE = 0.02; 158 | WEIGHT_DECAY = 0.0001; 159 | ] 160 | 161 | [ 162 | LAYER = ACTIVATION; 163 | NAME = activation5; 164 | INPUT = conv6; 165 | NON_LINEARITY = NL_RELU; 166 | ] 167 | 168 | [ 169 | LAYER = POOLING; 170 | NAME = pooling2; 171 | INPUT = activation5; 172 | POOLING_TYPE = POOL_MAX; 173 | POOLDIM = 2; 174 | PAD_H = 0; 175 | PAD_W = 0; 176 | STRIDE_H = 2; 177 | STRIDE_W = 2; 178 | ] 179 | 180 | [ 181 | LAYER = DROPOUT; 182 | NAME = dropout1; 183 | INPUT = pooling2; 184 | DROP_RATE = 0.5; 185 | ] 186 | 187 | [ 188 | LAYER = CONV; 189 | NAME = conv7; 190 | INPUT = dropout1; 191 | KERNEL_SIZE = 3; 192 | KERNEL_AMOUNT = 192; 193 | PAD_H = 1; 194 | PAD_W = 1; 195 | STRIDE_H = 1; 196 | STRIDE_W = 1; 197 | INIT_W = 0.1; 198 | LEARN_RATE = 0.02; 199 | WEIGHT_DECAY = 0.0001; 200 | ] 201 | 202 | [ 203 | LAYER = ACTIVATION; 204 | NAME = activation6; 205 | INPUT = conv7; 206 | NON_LINEARITY = NL_RELU; 207 | ] 208 | 209 | [ 210 | LAYER = CONV; 211 | NAME = conv8; 212 | INPUT = activation6; 213 | KERNEL_SIZE = 1; 214 | KERNEL_AMOUNT = 192; 215 | PAD_H = 0; 216 | PAD_W = 0; 217 | STRIDE_H = 1; 218 | STRIDE_W = 1; 219 | INIT_W = 0.1; 220 | LEARN_RATE = 0.02; 221 | WEIGHT_DECAY = 0.0001; 222 | ] 223 | 224 | [ 225 | LAYER = ACTIVATION; 226 | NAME = activation7; 227 | INPUT = conv8; 228 | NON_LINEARITY = NL_RELU; 229 | ] 230 | 231 | [ 232 | LAYER = CONV; 233 | NAME = conv9; 234 | INPUT = activation7; 235 | KERNEL_SIZE = 1; 236 | KERNEL_AMOUNT = 192; 237 | PAD_H = 0; 238 | PAD_W = 0; 239 | STRIDE_H = 1; 240 | STRIDE_W = 1; 241 | INIT_W = 0.1; 242 | LEARN_RATE = 0.02; 243 | WEIGHT_DECAY = 0.0001; 244 | ] 245 | 246 | [ 247 | LAYER = ACTIVATION; 248 | NAME = activation8; 249 | INPUT = conv9; 250 | NON_LINEARITY = NL_RELU; 251 | ] 252 | 253 | [ 254 | LAYER = CONV; 255 | NAME = conv10; 256 | INPUT = activation8; 257 | KERNEL_SIZE = 1; 258 | KERNEL_AMOUNT = 10; 259 | PAD_H = 0; 260 | PAD_W = 0; 261 | STRIDE_H = 1; 262 | STRIDE_W = 1; 263 | INIT_W = 0.1; 264 | LEARN_RATE = 0.02; 265 | WEIGHT_DECAY = 0.0001; 266 | ] 267 | 268 | [ 269 | LAYER = ACTIVATION; 270 | NAME = activation9; 271 | INPUT = conv10; 272 | NON_LINEARITY = NL_RELU; 273 | ] 274 | 275 | [ 276 | LAYER = POOLING; 277 | NAME = pooling3; 278 | INPUT = activation9; 279 | POOLING_TYPE = POOL_AVE_EXCLUDE_PAD; 280 | POOLDIM = 5; 281 | PAD_H = 0; 282 | PAD_W = 0; 283 | STRIDE_H = 5; 284 | STRIDE_W = 5; 285 | ] 286 | [ 287 | LAYER = SOFTMAX; 288 | NAME = softmax; 289 | INPUT = pooling3; 290 | NUM_CLASSES = 10; 291 | WEIGHT_DECAY = 0.0001; 292 | ] 293 | -------------------------------------------------------------------------------- /profile/dynamic_g_profile/dynamic_end/Cifar10Config.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * Cifar10Config File 3 | */ 4 | 5 | /******************************************************** 6 | * 7 | * General Parameters Config 8 | * 9 | *NON_LINEARITY = NL_SIGMOID, NL_TANH, NL_RELU 10 | *LR_POLICY = INV, STEP, FIXED 11 | ********************************************************/ 12 | 13 | BATCH_SIZE = 100; 14 | NORMALIZED_WIDTH = 0; 15 | IMAGES_SIZE = 32; 16 | CNANNELS = 3; 17 | EPOCHS = 4000; 18 | ITER_PER_EPO = 500; 19 | LR_POLICY = INV; 20 | 21 | 22 | 23 | /******************************************************** 24 | * 25 | * Layers Config 26 | * 27 | ********************************************************/ 28 | [ 29 | LAYER = DATA; 30 | NAME = data; 31 | ] 32 | 33 | [ 34 | LAYER = CONV; 35 | NAME = conv1; 36 | INPUT = data; 37 | KERNEL_SIZE = 3; 38 | KERNEL_AMOUNT = 192; 39 | PAD_H = 1; 40 | PAD_W = 1; 41 | STRIDE_H = 1; 42 | STRIDE_W = 1; 43 | INIT_W = 0.01; 44 | LEARN_RATE = 0.05; 45 | WEIGHT_DECAY = 0.0001; 46 | ] 47 | [ 48 | LAYER = CONV; 49 | NAME = conv2; 50 | INPUT = conv1; 51 | KERNEL_SIZE = 3; 52 | KERNEL_AMOUNT = 192; 53 | PAD_H = 1; 54 | PAD_W = 1; 55 | STRIDE_H = 1; 56 | STRIDE_W = 1; 57 | INIT_W = 0.01; 58 | LEARN_RATE = 0.05; 59 | WEIGHT_DECAY = 0.0001; 60 | ] 61 | 62 | [ 63 | LAYER = ACTIVATION; 64 | NAME = activation1; 65 | INPUT = conv2; 66 | NON_LINEARITY = NL_RELU; 67 | ] 68 | [ 69 | LAYER = POOLING; 70 | NAME = pooling1; 71 | INPUT = activation1; 72 | POOLING_TYPE = POOL_MAX; 73 | POOLDIM = 2; 74 | PAD_H = 0; 75 | PAD_W = 0; 76 | STRIDE_H = 2; 77 | STRIDE_W = 2; 78 | ] 79 | 80 | [ 81 | LAYER = CONV; 82 | NAME = conv3; 83 | INPUT = pooling1; 84 | KERNEL_SIZE = 3; 85 | KERNEL_AMOUNT = 192; 86 | PAD_H = 0; 87 | PAD_W = 0; 88 | STRIDE_H = 1; 89 | STRIDE_W = 1; 90 | INIT_W = 0.01; 91 | LEARN_RATE = 0.02; 92 | WEIGHT_DECAY = 0.0001; 93 | ] 94 | 95 | [ 96 | LAYER = ACTIVATION; 97 | NAME = activation2; 98 | INPUT = conv3; 99 | NON_LINEARITY = NL_RELU; 100 | ] 101 | 102 | [ 103 | LAYER = CONV; 104 | NAME = conv4; 105 | INPUT = activation2; 106 | KERNEL_SIZE = 3; 107 | KERNEL_AMOUNT = 192; 108 | PAD_H = 0; 109 | PAD_W = 0; 110 | STRIDE_H = 1; 111 | STRIDE_W = 1; 112 | INIT_W = 0.1; 113 | LEARN_RATE = 0.02; 114 | WEIGHT_DECAY = 0.0001; 115 | ] 116 | [ 117 | LAYER = ACTIVATION; 118 | NAME = activation3; 119 | INPUT = conv4; 120 | NON_LINEARITY = NL_RELU; 121 | ] 122 | 123 | [ 124 | LAYER = CONV; 125 | NAME = conv5; 126 | INPUT = activation3; 127 | KERNEL_SIZE = 3; 128 | KERNEL_AMOUNT = 192; 129 | PAD_H = 0; 130 | PAD_W = 0; 131 | STRIDE_H = 1; 132 | STRIDE_W = 1; 133 | INIT_W = 0.1; 134 | LEARN_RATE = 0.02; 135 | WEIGHT_DECAY = 0.0001; 136 | ] 137 | 138 | 139 | [ 140 | LAYER = ACTIVATION; 141 | NAME = activation4; 142 | INPUT = conv5; 143 | NON_LINEARITY = NL_RELU; 144 | ] 145 | 146 | [ 147 | LAYER = CONV; 148 | NAME = conv6; 149 | INPUT = activation4; 150 | KERNEL_SIZE = 3; 151 | KERNEL_AMOUNT = 192; 152 | PAD_H = 1; 153 | PAD_W = 1; 154 | STRIDE_H = 1; 155 | STRIDE_W = 1; 156 | INIT_W = 0.1; 157 | LEARN_RATE = 0.02; 158 | WEIGHT_DECAY = 0.0001; 159 | ] 160 | 161 | [ 162 | LAYER = ACTIVATION; 163 | NAME = activation5; 164 | INPUT = conv6; 165 | NON_LINEARITY = NL_RELU; 166 | ] 167 | 168 | [ 169 | LAYER = POOLING; 170 | NAME = pooling2; 171 | INPUT = activation5; 172 | POOLING_TYPE = POOL_MAX; 173 | POOLDIM = 2; 174 | PAD_H = 0; 175 | PAD_W = 0; 176 | STRIDE_H = 2; 177 | STRIDE_W = 2; 178 | ] 179 | 180 | [ 181 | LAYER = DROPOUT; 182 | NAME = dropout1; 183 | INPUT = pooling2; 184 | DROP_RATE = 0.5; 185 | ] 186 | 187 | [ 188 | LAYER = CONV; 189 | NAME = conv7; 190 | INPUT = dropout1; 191 | KERNEL_SIZE = 3; 192 | KERNEL_AMOUNT = 192; 193 | PAD_H = 1; 194 | PAD_W = 1; 195 | STRIDE_H = 1; 196 | STRIDE_W = 1; 197 | INIT_W = 0.1; 198 | LEARN_RATE = 0.02; 199 | WEIGHT_DECAY = 0.0001; 200 | ] 201 | 202 | [ 203 | LAYER = ACTIVATION; 204 | NAME = activation6; 205 | INPUT = conv7; 206 | NON_LINEARITY = NL_RELU; 207 | ] 208 | 209 | [ 210 | LAYER = CONV; 211 | NAME = conv8; 212 | INPUT = activation6; 213 | KERNEL_SIZE = 1; 214 | KERNEL_AMOUNT = 192; 215 | PAD_H = 0; 216 | PAD_W = 0; 217 | STRIDE_H = 1; 218 | STRIDE_W = 1; 219 | INIT_W = 0.1; 220 | LEARN_RATE = 0.02; 221 | WEIGHT_DECAY = 0.0001; 222 | ] 223 | 224 | [ 225 | LAYER = ACTIVATION; 226 | NAME = activation7; 227 | INPUT = conv8; 228 | NON_LINEARITY = NL_RELU; 229 | ] 230 | 231 | [ 232 | LAYER = CONV; 233 | NAME = conv9; 234 | INPUT = activation7; 235 | KERNEL_SIZE = 1; 236 | KERNEL_AMOUNT = 192; 237 | PAD_H = 0; 238 | PAD_W = 0; 239 | STRIDE_H = 1; 240 | STRIDE_W = 1; 241 | INIT_W = 0.1; 242 | LEARN_RATE = 0.02; 243 | WEIGHT_DECAY = 0.0001; 244 | ] 245 | 246 | [ 247 | LAYER = ACTIVATION; 248 | NAME = activation8; 249 | INPUT = conv9; 250 | NON_LINEARITY = NL_RELU; 251 | ] 252 | 253 | [ 254 | LAYER = CONV; 255 | NAME = conv10; 256 | INPUT = activation8; 257 | KERNEL_SIZE = 1; 258 | KERNEL_AMOUNT = 10; 259 | PAD_H = 0; 260 | PAD_W = 0; 261 | STRIDE_H = 1; 262 | STRIDE_W = 1; 263 | INIT_W = 0.1; 264 | LEARN_RATE = 0.02; 265 | WEIGHT_DECAY = 0.0001; 266 | ] 267 | 268 | [ 269 | LAYER = ACTIVATION; 270 | NAME = activation9; 271 | INPUT = conv10; 272 | NON_LINEARITY = NL_RELU; 273 | ] 274 | 275 | [ 276 | LAYER = POOLING; 277 | NAME = pooling3; 278 | INPUT = activation9; 279 | POOLING_TYPE = POOL_AVE_EXCLUDE_PAD; 280 | POOLDIM = 5; 281 | PAD_H = 0; 282 | PAD_W = 0; 283 | STRIDE_H = 5; 284 | STRIDE_W = 5; 285 | ] 286 | [ 287 | LAYER = SOFTMAX; 288 | NAME = softmax; 289 | INPUT = pooling3; 290 | NUM_CLASSES = 10; 291 | WEIGHT_DECAY = 0.0001; 292 | ] 293 | -------------------------------------------------------------------------------- /layers/DataLayer.cu: -------------------------------------------------------------------------------- 1 | #include"DataLayer.h" 2 | #include 3 | /* 4 | * Datalayer destructor 5 | * */ 6 | DataLayer::~DataLayer() 7 | { 8 | MemoryMonitor::instanceObject()->freeGpuMemory(dstData); 9 | MemoryMonitor::instanceObject()->freeCpuMemory(srcLabel); 10 | MemoryMonitor::instanceObject()->freeCpuMemory(batchImage); 11 | } 12 | 13 | /* 14 | * Get outputSize 15 | * */ 16 | int DataLayer::getOutputSize() 17 | { 18 | return channels * height * width; 19 | } 20 | 21 | /* 22 | * Get dataSet size 23 | * */ 24 | int DataLayer::getDataSize() 25 | { 26 | return dataSize; 27 | } 28 | 29 | /* 30 | * Get the Data label 31 | * */ 32 | int* DataLayer::getDataLabel() 33 | { 34 | return srcLabel; 35 | } 36 | 37 | /* 38 | * Data layer constructor 39 | */ 40 | DataLayer::DataLayer(string name) 41 | { 42 | _name = name; 43 | _inputName = " "; 44 | srcData = NULL; 45 | dstData = NULL; 46 | srcLabel = NULL; 47 | batchImage = NULL; 48 | dataSize = 0; 49 | prevLayer.clear(); 50 | nextLayer.clear(); 51 | 52 | batchSize = config::instanceObjtce()->get_batchSize(); 53 | inputAmount = config::instanceObjtce()->getChannels(); 54 | inputImageDim = config::instanceObjtce()->get_imageSize(); 55 | 56 | number = config::instanceObjtce()->get_batchSize(); 57 | channels = config::instanceObjtce()->getChannels(); 58 | height = config::instanceObjtce()->get_imageSize(); 59 | width = config::instanceObjtce()->get_imageSize(); 60 | 61 | batchImage = (float*) MemoryMonitor::instanceObject()->cpuMallocMemory(number * channels * height * width * sizeof(float)); 62 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**)&dstData, number * channels * height * width * sizeof(float)); 63 | srcLabel = (int*)MemoryMonitor::instanceObject()->cpuMallocMemory(batchSize * 1 * 1 * 1 * sizeof(int)); 64 | LOG(INFO) << "(" << number <<"," << channels << "," << height << "," << width << ")";; 65 | } 66 | 67 | /* 68 | * Deep copy constructor 69 | */ 70 | DataLayer::DataLayer(const DataLayer* layer) 71 | { 72 | srcData = NULL; 73 | dstData = NULL; 74 | srcLabel = NULL; 75 | batchImage = NULL; 76 | dataSize = 0; 77 | prevLayer.clear(); 78 | nextLayer.clear(); 79 | 80 | static int idx = 0; 81 | _name = layer->_name + string("_") + int_to_string(idx);; 82 | idx ++; 83 | _inputName = layer->_inputName; 84 | batchSize = layer->batchSize; 85 | inputAmount = layer->inputAmount; 86 | inputImageDim = layer->inputImageDim; 87 | 88 | number = layer->number; 89 | channels = layer->channels; 90 | height = layer->height; 91 | width = layer->width; 92 | 93 | batchImage = (float*) MemoryMonitor::instanceObject()->cpuMallocMemory(number * channels * height * width * sizeof(float)); 94 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**) &dstData, number * channels * height * width * sizeof(float)); 95 | srcLabel = (int*) MemoryMonitor::instanceObject()->cpuMallocMemory(batchSize * 1 * 1 * 1 * sizeof(int)); 96 | 97 | MemoryMonitor::instanceObject()->cpu2cpu(batchImage, layer->batchImage, number * channels * height * width * sizeof(float)); 98 | MemoryMonitor::instanceObject()->cpu2cpu(srcLabel, layer->srcLabel, batchSize * 1 * 1 * 1 * sizeof(int)); 99 | MemoryMonitor::instanceObject()->gpu2gpu(dstData, layer->dstData, number * channels * height * width * sizeof(float)); 100 | 101 | srcData = layer->dstData; 102 | LOG(INFO) << "(" << number <<"," << channels << "," << height << "," << width << ")";; 103 | } 104 | 105 | /* 106 | * Data Layer Forward propagation 107 | * */ 108 | void DataLayer::forwardPropagation(string train_or_test) 109 | { 110 | //nothing 111 | } 112 | 113 | /* 114 | * Get batch size image 115 | * */ 116 | void DataLayer::getBatch_Images_Label(int index, 117 | cuMatrixVector&inputData, 118 | cuMatrix* &inputLabel) 119 | { 120 | dataSize = inputData.size(); 121 | int start = index * batchSize; 122 | 123 | int k = 0; 124 | for (int i = start;i< (start + batchSize > inputData.size() ? inputData.size() : (start + batchSize)); i++) { 125 | for (int c = 0; c < channels; c++) { 126 | for (int h = 0; h < height; h++) { 127 | for (int w = 0; w < width; w++) { 128 | batchImage[h * width + w + height * width * c + height * width * channels * k] = 129 | inputData[i]->getValue(h, w, 0); 130 | } 131 | } 132 | } 133 | 134 | srcLabel[k] = inputLabel->getValue(i, 0, 0); 135 | k++; 136 | } 137 | 138 | checkCudaErrors(cudaMemcpy(dstData, batchImage, number * channels * height * width * sizeof(float),cudaMemcpyHostToDevice)); 139 | } 140 | 141 | /* 142 | * Get batchSize image in random format 143 | * */ 144 | void DataLayer::RandomBatch_Images_Label(cuMatrixVector&inputData, cuMatrix*&inputLabel) 145 | { 146 | dataSize = inputData.size(); 147 | int randomNum = ((long)rand() + (long)rand()) % (inputData.size() - batchSize); 148 | 149 | for(int i = 0; i< batchSize; i++) 150 | { 151 | for(int c = 0; c < channels; c++) 152 | { 153 | for(int h = 0; h < height; h++) 154 | { 155 | for(int w = 0; w < width; w++) 156 | { 157 | batchImage[h * width + w + height * width * c + channels * height * width * i] = 158 | inputData[i + randomNum]->getValue(h, w, 0); 159 | } 160 | } 161 | } 162 | srcLabel[i] = inputLabel->getValue(i + randomNum, 0, 0); 163 | } 164 | checkCudaErrors(cudaMemcpy(dstData, batchImage, number *channels *height * width * sizeof(float), cudaMemcpyHostToDevice)); 165 | } 166 | 167 | /* 168 | * Data Layer Backward propagation 169 | * */ 170 | void DataLayer::backwardPropagation(float Momentum) 171 | { 172 | //nothing 173 | } 174 | -------------------------------------------------------------------------------- /examples/dynamic_g_model/dynamic_g_entry.cpp: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | > File Name: dynamic_g_entry.cpp 3 | > Author: TDX 4 | > Mail: sa614149@mail.ustc.edu.cn 5 | > Created Time: 2017年03月21日 星期二 20时08分52秒 6 | ************************************************************************/ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include"examples/mnist/mnist.h" 13 | #include"examples/cifar10/cifar-10.h" 14 | #include"common/cuMatrixVector.h" 15 | #include"common/cuMatrix.h" 16 | #include"readData/readMnistData.h" 17 | #include"readData/readCifar10Data.h" 18 | #include"readData/readCifar100.h" 19 | #include"common/utility.cuh" 20 | #include"columnNet.cuh" 21 | 22 | using namespace std; 23 | 24 | void dynamic_g_entry() 25 | { 26 | LOG(INFO) << "You Have Entered New Way To Generate Model, Please Select DataSet."; 27 | LOG(INFO) << "1. MNIST 2.CIFAR-10 3.CIFAR-100"; 28 | const string dirPath = "profile/dynamic_g_profile/"; 29 | const string sPhase_begin = "dynamic_begin/"; 30 | const string sPhase_end = "dynamic_end/"; 31 | 32 | cuMatrixVector trainSetX; 33 | cuMatrixVector testSetX; 34 | cuMatrix* trainSetY, *testSetY; 35 | 36 | int batchSize; 37 | int normalized_width; 38 | int imageSize; 39 | // Get end profile 40 | config* endConfig = new config(); 41 | 42 | int cmd; 43 | cin >> cmd; 44 | if(1 == cmd) 45 | { 46 | const string sMnist_begin_dir = dirPath + sPhase_begin + "MnistConfig.txt"; 47 | const string sMnist_end_dir = dirPath + sPhase_end + "MnistConfig.txt"; 48 | // read the begin profile 49 | config::instanceObjtce()->initConfig(sMnist_begin_dir); 50 | normalized_width = config::instanceObjtce()->get_normalizedWidth(); 51 | imageSize = config::instanceObjtce()->get_imageSize(); 52 | 53 | //read the end profile 54 | endConfig->initConfig(sMnist_end_dir); 55 | // Read Mnist dataSet 56 | readMnistData(trainSetX, trainSetY, "data/mnist/train-images-idx3-ubyte", "data/mnist/train-labels-idx1-ubyte", normalized_width, imageSize); 57 | readMnistData(testSetX, testSetY, "data/mnist/t10k-images-idx3-ubyte", "data/mnist/t10k-labels-idx1-ubyte", normalized_width, imageSize); 58 | LOG(INFO) << "*******************************************************"; 59 | LOG(INFO) << " Train_set : " << trainSetX[0]->rows << " x " << trainSetX[0]->cols << " features and " << trainSetX.size() << " samples"; 60 | LOG(INFO) << " Train_label : " << trainSetY->cols << " x " << trainSetY->cols << " features and " << trainSetY->rows << " samples"; 61 | LOG(INFO) << " Test_set : " << testSetX[0]->rows << " x " << testSetX[0]->cols << " features and " << testSetX.size() << " samples"; 62 | LOG(INFO) << " Test_label : " << testSetY->cols << " x " << testSetY->cols << " features and " << testSetY->rows << " samples"; 63 | LOG(INFO) << "*******************************************************"; 64 | 65 | } 66 | else if(2 == cmd) 67 | { 68 | const string sCifar10_begin_dir = dirPath + sPhase_begin + "Cifar10Config.txt"; 69 | const string sCifar10_end_dir = dirPath + sPhase_end + "Cifar10Config.txt"; 70 | //read the begin profile 71 | config::instanceObjtce()->initConfig(sCifar10_begin_dir); 72 | //read the end profile 73 | endConfig->initConfig(sCifar10_end_dir); 74 | //read Cifar-10 DataSet 75 | read_Cifar10_Data(trainSetX, testSetX, trainSetY, testSetY); 76 | 77 | LOG(INFO) << "*******************************************************";; 78 | LOG(INFO) << " Train_set : " << trainSetX[0]->rows << " x " << trainSetX[0]->cols << " features and " << trainSetX.size() << " samples"; 79 | LOG(INFO) << " Train_label : " << trainSetY->cols << " x " << trainSetY->cols << " features and " << trainSetY->rows << " samples"; 80 | LOG(INFO) << " Test_set : " << testSetX[0]->rows << " x " << testSetX[0]->cols << " features and " << testSetX.size() << " samples"; 81 | LOG(INFO) << " Test_label : " << testSetY->cols << " x " << testSetY->cols << " features and " << testSetY->rows << " samples"; 82 | LOG(INFO) << "*******************************************************"; 83 | } 84 | else if(3 == cmd) 85 | { 86 | const string sCifar10_begin_dir = dirPath + sPhase_begin + "Cifar100Config.txt"; 87 | const string sCifar10_end_dir = dirPath + sPhase_end + "Cifar100Config.txt"; 88 | //read the begin profile 89 | config::instanceObjtce()->initConfig(sCifar10_begin_dir); 90 | //read the end profile 91 | endConfig->initConfig(sCifar10_end_dir); 92 | //read Cifar-10 DataSet 93 | readCifar100Data(trainSetX, testSetX, trainSetY, testSetY); 94 | 95 | LOG(INFO) << "*******************************************************";; 96 | LOG(INFO) << " Train_set : " << trainSetX[0]->rows << " x " << trainSetX[0]->cols << " features and " << trainSetX.size() << " samples"; 97 | LOG(INFO) << " Train_label : " << trainSetY->cols << " x " << trainSetY->cols << " features and " << trainSetY->rows << " samples"; 98 | LOG(INFO) << " Test_set : " << testSetX[0]->rows << " x " << testSetX[0]->cols << " features and " << testSetX.size() << " samples"; 99 | LOG(INFO) << " Test_label : " << testSetY->cols << " x " << testSetY->cols << " features and " << testSetY->rows << " samples"; 100 | LOG(INFO) << "*******************************************************"; 101 | }else 102 | LOG(FATAL) << "DataSet Select Error."; 103 | 104 | int cuda_version = cudnnGetVersion(); 105 | LOG(INFO) << "cudnnGetVersion(): " << cuda_version << " CUDNN VERSION from cudnn.h: "<< CUDNN_VERSION; 106 | /*show the device information*/ 107 | showDevices(); 108 | // Create network 109 | creatColumnNet(1); 110 | //Training Network 111 | dynamic_g_trainNet(trainSetX, trainSetY, testSetX, testSetY, endConfig); 112 | 113 | } 114 | -------------------------------------------------------------------------------- /profile/Cifar100Config.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * Cifar10Config File 3 | */ 4 | 5 | /******************************************************** 6 | * 7 | * General Parameters Config 8 | * 9 | *NON_LINEARITY = NL_SIGMOID, NL_TANH, NL_RELU 10 | *LR_POLICY = INV, STEP, FIXED 11 | ********************************************************/ 12 | 13 | BATCH_SIZE = 100; 14 | NORMALIZED_WIDTH = 0; 15 | IMAGES_SIZE = 32; 16 | CNANNELS = 3; 17 | EPOCHS = 4000; 18 | ITER_PER_EPO = 500; 19 | LR_POLICY = INV; 20 | 21 | 22 | 23 | /******************************************************** 24 | * 25 | * Layers Config 26 | * 27 | ********************************************************/ 28 | [ 29 | LAYER = DATA; 30 | NAME = data; 31 | ] 32 | 33 | [ 34 | LAYER = CONV; 35 | NAME = conv1; 36 | INPUT = data; 37 | KERNEL_SIZE = 3; 38 | KERNEL_AMOUNT = 192; 39 | PAD_H = 1; 40 | PAD_W = 1; 41 | STRIDE_H = 1; 42 | STRIDE_W = 1; 43 | INIT_W = 0.01; 44 | LEARN_RATE = 0.05; 45 | WEIGHT_DECAY = 0.0001; 46 | ] 47 | [ 48 | LAYER = CONV; 49 | NAME = conv2; 50 | INPUT = conv1; 51 | KERNEL_SIZE = 3; 52 | KERNEL_AMOUNT = 192; 53 | PAD_H = 1; 54 | PAD_W = 1; 55 | STRIDE_H = 1; 56 | STRIDE_W = 1; 57 | INIT_W = 0.01; 58 | LEARN_RATE = 0.05; 59 | WEIGHT_DECAY = 0.0001; 60 | ] 61 | 62 | [ 63 | LAYER = ACTIVATION; 64 | NAME = activation1; 65 | INPUT = conv2; 66 | NON_LINEARITY = NL_RELU; 67 | ] 68 | [ 69 | LAYER = POOLING; 70 | NAME = pooling1; 71 | INPUT = activation1; 72 | POOLING_TYPE = POOL_MAX; 73 | POOLDIM = 2; 74 | PAD_H = 0; 75 | PAD_W = 0; 76 | STRIDE_H = 2; 77 | STRIDE_W = 2; 78 | ] 79 | 80 | [ 81 | LAYER = CONV; 82 | NAME = conv3; 83 | INPUT = pooling1; 84 | KERNEL_SIZE = 3; 85 | KERNEL_AMOUNT = 192; 86 | PAD_H = 0; 87 | PAD_W = 0; 88 | STRIDE_H = 1; 89 | STRIDE_W = 1; 90 | INIT_W = 0.01; 91 | LEARN_RATE = 0.02; 92 | WEIGHT_DECAY = 0.0001; 93 | ] 94 | 95 | [ 96 | LAYER = ACTIVATION; 97 | NAME = activation2; 98 | INPUT = conv3; 99 | NON_LINEARITY = NL_RELU; 100 | ] 101 | 102 | [ 103 | LAYER = CONV; 104 | NAME = conv4; 105 | INPUT = activation2; 106 | KERNEL_SIZE = 3; 107 | KERNEL_AMOUNT = 192; 108 | PAD_H = 0; 109 | PAD_W = 0; 110 | STRIDE_H = 1; 111 | STRIDE_W = 1; 112 | INIT_W = 0.1; 113 | LEARN_RATE = 0.02; 114 | WEIGHT_DECAY = 0.0001; 115 | ] 116 | [ 117 | LAYER = ACTIVATION; 118 | NAME = activation3; 119 | INPUT = conv4; 120 | NON_LINEARITY = NL_RELU; 121 | ] 122 | 123 | [ 124 | LAYER = CONV; 125 | NAME = conv5; 126 | INPUT = activation3; 127 | KERNEL_SIZE = 3; 128 | KERNEL_AMOUNT = 192; 129 | PAD_H = 0; 130 | PAD_W = 0; 131 | STRIDE_H = 1; 132 | STRIDE_W = 1; 133 | INIT_W = 0.1; 134 | LEARN_RATE = 0.02; 135 | WEIGHT_DECAY = 0.0001; 136 | ] 137 | 138 | 139 | [ 140 | LAYER = ACTIVATION; 141 | NAME = activation4; 142 | INPUT = conv5; 143 | NON_LINEARITY = NL_RELU; 144 | ] 145 | 146 | [ 147 | LAYER = CONV; 148 | NAME = conv6; 149 | INPUT = activation4; 150 | KERNEL_SIZE = 3; 151 | KERNEL_AMOUNT = 192; 152 | PAD_H = 1; 153 | PAD_W = 1; 154 | STRIDE_H = 1; 155 | STRIDE_W = 1; 156 | INIT_W = 0.1; 157 | LEARN_RATE = 0.02; 158 | WEIGHT_DECAY = 0.0001; 159 | ] 160 | 161 | [ 162 | LAYER = ACTIVATION; 163 | NAME = activation5; 164 | INPUT = conv6; 165 | NON_LINEARITY = NL_RELU; 166 | ] 167 | 168 | [ 169 | LAYER = DROPOUT; 170 | NAME = dropout1; 171 | INPUT = activation5; 172 | DROP_RATE = 0.5; 173 | ] 174 | 175 | [ 176 | LAYER = CONV; 177 | NAME = conv7; 178 | INPUT = dropout1; 179 | KERNEL_SIZE = 3; 180 | KERNEL_AMOUNT = 192; 181 | PAD_H = 1; 182 | PAD_W = 1; 183 | STRIDE_H = 1; 184 | STRIDE_W = 1; 185 | INIT_W = 0.1; 186 | LEARN_RATE = 0.02; 187 | WEIGHT_DECAY = 0.0001; 188 | ] 189 | 190 | [ 191 | LAYER = ACTIVATION; 192 | NAME = activation6; 193 | INPUT = conv7; 194 | NON_LINEARITY = NL_RELU; 195 | ] 196 | 197 | [ 198 | LAYER = CONV; 199 | NAME = conv8; 200 | INPUT = activation6; 201 | KERNEL_SIZE = 3; 202 | KERNEL_AMOUNT = 192; 203 | PAD_H = 0; 204 | PAD_W = 0; 205 | STRIDE_H = 1; 206 | STRIDE_W = 1; 207 | INIT_W = 0.1; 208 | LEARN_RATE = 0.02; 209 | WEIGHT_DECAY = 0.0001; 210 | ] 211 | 212 | [ 213 | LAYER = ACTIVATION; 214 | NAME = activation7; 215 | INPUT = conv8; 216 | NON_LINEARITY = NL_RELU; 217 | ] 218 | 219 | [ 220 | LAYER = CONV; 221 | NAME = conv9; 222 | INPUT = activation7; 223 | KERNEL_SIZE = 3; 224 | KERNEL_AMOUNT = 192; 225 | PAD_H = 1; 226 | PAD_W = 1; 227 | STRIDE_H = 1; 228 | STRIDE_W = 1; 229 | INIT_W = 0.1; 230 | LEARN_RATE = 0.02; 231 | WEIGHT_DECAY = 0.0001; 232 | ] 233 | 234 | [ 235 | LAYER = ACTIVATION; 236 | NAME = activation8; 237 | INPUT = conv9; 238 | NON_LINEARITY = NL_RELU; 239 | ] 240 | 241 | [ 242 | LAYER = POOLING; 243 | NAME = pooling2; 244 | INPUT = activation8; 245 | POOLING_TYPE = POOL_MAX; 246 | POOLDIM = 2; 247 | PAD_H = 0; 248 | PAD_W = 0; 249 | STRIDE_H = 2; 250 | STRIDE_W = 2; 251 | ] 252 | 253 | [ 254 | LAYER = CONV; 255 | NAME = conv10; 256 | INPUT = pooling2; 257 | KERNEL_SIZE = 1; 258 | KERNEL_AMOUNT = 192; 259 | PAD_H = 0; 260 | PAD_W = 0; 261 | STRIDE_H = 1; 262 | STRIDE_W = 1; 263 | INIT_W = 0.1; 264 | LEARN_RATE = 0.02; 265 | WEIGHT_DECAY = 0.0001; 266 | ] 267 | 268 | [ 269 | LAYER = ACTIVATION; 270 | NAME = activation9; 271 | INPUT = conv10; 272 | NON_LINEARITY = NL_RELU; 273 | ] 274 | 275 | [ 276 | LAYER = CONV; 277 | NAME = conv11; 278 | INPUT = activation9; 279 | KERNEL_SIZE = 1; 280 | KERNEL_AMOUNT = 192; 281 | PAD_H = 0; 282 | PAD_W = 0; 283 | STRIDE_H = 1; 284 | STRIDE_W = 1; 285 | INIT_W = 0.1; 286 | LEARN_RATE = 0.02; 287 | WEIGHT_DECAY = 0.0001; 288 | ] 289 | 290 | [ 291 | LAYER = ACTIVATION; 292 | NAME = activation10; 293 | INPUT = conv11; 294 | NON_LINEARITY = NL_RELU; 295 | ] 296 | 297 | [ 298 | LAYER = CONV; 299 | NAME = conv12; 300 | INPUT = activation10; 301 | KERNEL_SIZE = 1; 302 | KERNEL_AMOUNT = 100; 303 | PAD_H = 0; 304 | PAD_W = 0; 305 | STRIDE_H = 1; 306 | STRIDE_W = 1; 307 | INIT_W = 0.1; 308 | LEARN_RATE = 0.02; 309 | WEIGHT_DECAY = 0.0001; 310 | ] 311 | 312 | [ 313 | LAYER = ACTIVATION; 314 | NAME = activation11; 315 | INPUT = conv12; 316 | NON_LINEARITY = NL_RELU; 317 | ] 318 | 319 | [ 320 | LAYER = POOLING; 321 | NAME = pooling3; 322 | INPUT = activation11; 323 | POOLING_TYPE = POOL_AVE_INCLUDE_PAD; 324 | POOLDIM = 4; 325 | PAD_H = 0; 326 | PAD_W = 0; 327 | STRIDE_H = 4; 328 | STRIDE_W = 4; 329 | ] 330 | [ 331 | LAYER = SOFTMAX; 332 | NAME = softmax; 333 | INPUT = pooling3; 334 | NUM_CLASSES = 100; 335 | WEIGHT_DECAY = 0.0001; 336 | ] 337 | -------------------------------------------------------------------------------- /common/utility.cu: -------------------------------------------------------------------------------- 1 | #include"utility.cuh" 2 | #include"common/MemoryMonitor.h" 3 | 4 | /*int to string*/ 5 | string int_to_string(int num) 6 | { 7 | stringstream ss; 8 | ss<< num; 9 | string s; 10 | s = ss.str(); 11 | return s; 12 | } 13 | 14 | /*showDevices information*/ 15 | void showDevices() 16 | { 17 | int totalDevices; 18 | cudaGetDeviceCount(&totalDevices); 19 | std::cout<<"There are "<cpuMallocMemory(size); 153 | MemoryMonitor::instanceObject()->gpu2cpu(cpuArray, dev_array, size); 154 | 155 | while(oBegin < oEnd) 156 | { 157 | int inBegin = oBegin * times; 158 | int inEnd = oEnd * times; 159 | 160 | for(int i = 0; i < times; i++) 161 | { 162 | fSwap = cpuArray[inBegin]; 163 | cpuArray[inBegin] = cpuArray[inEnd]; 164 | cpuArray[inEnd] = fSwap; 165 | 166 | inBegin++; 167 | inEnd ++; 168 | } 169 | oBegin ++; 170 | oEnd --; 171 | } 172 | 173 | MemoryMonitor::instanceObject()->cpu2Gpu(dev_array, cpuArray, size); 174 | } 175 | 176 | __global__ void compute_array_square(float* array, float* outArray, int size) 177 | { 178 | int thread_index = threadIdx.x + blockIdx.x * blockDim.x; 179 | int num_threads = blockDim.x * gridDim.x; 180 | 181 | for(int i = 0; i < size; i += num_threads) 182 | { 183 | int index = i + thread_index; 184 | if(index < size) 185 | { 186 | outArray[index] = array[index] * array[index]; 187 | } 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /profile/dynamic_g_profile/dynamic_end/Cifar100Config.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * Cifar10Config File 3 | */ 4 | 5 | /******************************************************** 6 | * 7 | * General Parameters Config 8 | * 9 | *NON_LINEARITY = NL_SIGMOID, NL_TANH, NL_RELU 10 | *LR_POLICY = INV, STEP, FIXED 11 | ********************************************************/ 12 | 13 | BATCH_SIZE = 100; 14 | NORMALIZED_WIDTH = 0; 15 | IMAGES_SIZE = 32; 16 | CNANNELS = 3; 17 | EPOCHS = 4000; 18 | ITER_PER_EPO = 500; 19 | LR_POLICY = INV; 20 | 21 | 22 | 23 | /******************************************************** 24 | * 25 | * Layers Config 26 | * 27 | ********************************************************/ 28 | [ 29 | LAYER = DATA; 30 | NAME = data; 31 | ] 32 | 33 | [ 34 | LAYER = CONV; 35 | NAME = conv1; 36 | INPUT = data; 37 | KERNEL_SIZE = 3; 38 | KERNEL_AMOUNT = 192; 39 | PAD_H = 1; 40 | PAD_W = 1; 41 | STRIDE_H = 1; 42 | STRIDE_W = 1; 43 | INIT_W = 0.01; 44 | LEARN_RATE = 0.05; 45 | WEIGHT_DECAY = 0.0001; 46 | ] 47 | [ 48 | LAYER = CONV; 49 | NAME = conv2; 50 | INPUT = conv1; 51 | KERNEL_SIZE = 3; 52 | KERNEL_AMOUNT = 192; 53 | PAD_H = 1; 54 | PAD_W = 1; 55 | STRIDE_H = 1; 56 | STRIDE_W = 1; 57 | INIT_W = 0.01; 58 | LEARN_RATE = 0.05; 59 | WEIGHT_DECAY = 0.0001; 60 | ] 61 | 62 | [ 63 | LAYER = ACTIVATION; 64 | NAME = activation1; 65 | INPUT = conv2; 66 | NON_LINEARITY = NL_RELU; 67 | ] 68 | [ 69 | LAYER = POOLING; 70 | NAME = pooling1; 71 | INPUT = activation1; 72 | POOLING_TYPE = POOL_MAX; 73 | POOLDIM = 2; 74 | PAD_H = 0; 75 | PAD_W = 0; 76 | STRIDE_H = 2; 77 | STRIDE_W = 2; 78 | ] 79 | 80 | [ 81 | LAYER = CONV; 82 | NAME = conv3; 83 | INPUT = pooling1; 84 | KERNEL_SIZE = 3; 85 | KERNEL_AMOUNT = 192; 86 | PAD_H = 0; 87 | PAD_W = 0; 88 | STRIDE_H = 1; 89 | STRIDE_W = 1; 90 | INIT_W = 0.01; 91 | LEARN_RATE = 0.02; 92 | WEIGHT_DECAY = 0.0001; 93 | ] 94 | 95 | [ 96 | LAYER = ACTIVATION; 97 | NAME = activation2; 98 | INPUT = conv3; 99 | NON_LINEARITY = NL_RELU; 100 | ] 101 | 102 | [ 103 | LAYER = CONV; 104 | NAME = conv4; 105 | INPUT = activation2; 106 | KERNEL_SIZE = 3; 107 | KERNEL_AMOUNT = 192; 108 | PAD_H = 0; 109 | PAD_W = 0; 110 | STRIDE_H = 1; 111 | STRIDE_W = 1; 112 | INIT_W = 0.1; 113 | LEARN_RATE = 0.02; 114 | WEIGHT_DECAY = 0.0001; 115 | ] 116 | [ 117 | LAYER = ACTIVATION; 118 | NAME = activation3; 119 | INPUT = conv4; 120 | NON_LINEARITY = NL_RELU; 121 | ] 122 | 123 | [ 124 | LAYER = CONV; 125 | NAME = conv5; 126 | INPUT = activation3; 127 | KERNEL_SIZE = 3; 128 | KERNEL_AMOUNT = 192; 129 | PAD_H = 0; 130 | PAD_W = 0; 131 | STRIDE_H = 1; 132 | STRIDE_W = 1; 133 | INIT_W = 0.1; 134 | LEARN_RATE = 0.02; 135 | WEIGHT_DECAY = 0.0001; 136 | ] 137 | 138 | 139 | [ 140 | LAYER = ACTIVATION; 141 | NAME = activation4; 142 | INPUT = conv5; 143 | NON_LINEARITY = NL_RELU; 144 | ] 145 | 146 | [ 147 | LAYER = CONV; 148 | NAME = conv6; 149 | INPUT = activation4; 150 | KERNEL_SIZE = 3; 151 | KERNEL_AMOUNT = 192; 152 | PAD_H = 1; 153 | PAD_W = 1; 154 | STRIDE_H = 1; 155 | STRIDE_W = 1; 156 | INIT_W = 0.1; 157 | LEARN_RATE = 0.02; 158 | WEIGHT_DECAY = 0.0001; 159 | ] 160 | 161 | [ 162 | LAYER = ACTIVATION; 163 | NAME = activation5; 164 | INPUT = conv6; 165 | NON_LINEARITY = NL_RELU; 166 | ] 167 | 168 | [ 169 | LAYER = DROPOUT; 170 | NAME = dropout1; 171 | INPUT = activation5; 172 | DROP_RATE = 0.5; 173 | ] 174 | 175 | [ 176 | LAYER = CONV; 177 | NAME = conv7; 178 | INPUT = dropout1; 179 | KERNEL_SIZE = 3; 180 | KERNEL_AMOUNT = 192; 181 | PAD_H = 1; 182 | PAD_W = 1; 183 | STRIDE_H = 1; 184 | STRIDE_W = 1; 185 | INIT_W = 0.1; 186 | LEARN_RATE = 0.02; 187 | WEIGHT_DECAY = 0.0001; 188 | ] 189 | 190 | [ 191 | LAYER = ACTIVATION; 192 | NAME = activation6; 193 | INPUT = conv7; 194 | NON_LINEARITY = NL_RELU; 195 | ] 196 | 197 | [ 198 | LAYER = CONV; 199 | NAME = conv8; 200 | INPUT = activation6; 201 | KERNEL_SIZE = 3; 202 | KERNEL_AMOUNT = 192; 203 | PAD_H = 0; 204 | PAD_W = 0; 205 | STRIDE_H = 1; 206 | STRIDE_W = 1; 207 | INIT_W = 0.1; 208 | LEARN_RATE = 0.02; 209 | WEIGHT_DECAY = 0.0001; 210 | ] 211 | 212 | [ 213 | LAYER = ACTIVATION; 214 | NAME = activation7; 215 | INPUT = conv8; 216 | NON_LINEARITY = NL_RELU; 217 | ] 218 | 219 | [ 220 | LAYER = CONV; 221 | NAME = conv9; 222 | INPUT = activation7; 223 | KERNEL_SIZE = 3; 224 | KERNEL_AMOUNT = 192; 225 | PAD_H = 1; 226 | PAD_W = 1; 227 | STRIDE_H = 1; 228 | STRIDE_W = 1; 229 | INIT_W = 0.1; 230 | LEARN_RATE = 0.02; 231 | WEIGHT_DECAY = 0.0001; 232 | ] 233 | 234 | [ 235 | LAYER = ACTIVATION; 236 | NAME = activation8; 237 | INPUT = conv9; 238 | NON_LINEARITY = NL_RELU; 239 | ] 240 | 241 | [ 242 | LAYER = POOLING; 243 | NAME = pooling2; 244 | INPUT = activation8; 245 | POOLING_TYPE = POOL_MAX; 246 | POOLDIM = 2; 247 | PAD_H = 0; 248 | PAD_W = 0; 249 | STRIDE_H = 2; 250 | STRIDE_W = 2; 251 | ] 252 | 253 | [ 254 | LAYER = CONV; 255 | NAME = conv10; 256 | INPUT = pooling2; 257 | KERNEL_SIZE = 1; 258 | KERNEL_AMOUNT = 192; 259 | PAD_H = 0; 260 | PAD_W = 0; 261 | STRIDE_H = 1; 262 | STRIDE_W = 1; 263 | INIT_W = 0.1; 264 | LEARN_RATE = 0.02; 265 | WEIGHT_DECAY = 0.0001; 266 | ] 267 | 268 | [ 269 | LAYER = ACTIVATION; 270 | NAME = activation9; 271 | INPUT = conv10; 272 | NON_LINEARITY = NL_RELU; 273 | ] 274 | 275 | [ 276 | LAYER = CONV; 277 | NAME = conv11; 278 | INPUT = activation9; 279 | KERNEL_SIZE = 1; 280 | KERNEL_AMOUNT = 192; 281 | PAD_H = 0; 282 | PAD_W = 0; 283 | STRIDE_H = 1; 284 | STRIDE_W = 1; 285 | INIT_W = 0.1; 286 | LEARN_RATE = 0.02; 287 | WEIGHT_DECAY = 0.0001; 288 | ] 289 | 290 | [ 291 | LAYER = ACTIVATION; 292 | NAME = activation10; 293 | INPUT = conv11; 294 | NON_LINEARITY = NL_RELU; 295 | ] 296 | 297 | [ 298 | LAYER = CONV; 299 | NAME = conv12; 300 | INPUT = activation10; 301 | KERNEL_SIZE = 1; 302 | KERNEL_AMOUNT = 100; 303 | PAD_H = 0; 304 | PAD_W = 0; 305 | STRIDE_H = 1; 306 | STRIDE_W = 1; 307 | INIT_W = 0.1; 308 | LEARN_RATE = 0.02; 309 | WEIGHT_DECAY = 0.0001; 310 | ] 311 | 312 | [ 313 | LAYER = ACTIVATION; 314 | NAME = activation11; 315 | INPUT = conv12; 316 | NON_LINEARITY = NL_RELU; 317 | ] 318 | 319 | [ 320 | LAYER = POOLING; 321 | NAME = pooling3; 322 | INPUT = activation11; 323 | POOLING_TYPE = POOL_AVE_INCLUDE_PAD; 324 | POOLDIM = 4; 325 | PAD_H = 0; 326 | PAD_W = 0; 327 | STRIDE_H = 4; 328 | STRIDE_W = 4; 329 | ] 330 | [ 331 | LAYER = SOFTMAX; 332 | NAME = softmax; 333 | INPUT = pooling3; 334 | NUM_CLASSES = 100; 335 | WEIGHT_DECAY = 0.0001; 336 | ] 337 | -------------------------------------------------------------------------------- /composite/Concat.cu: -------------------------------------------------------------------------------- 1 | #include"Concat.h" 2 | 3 | 4 | void Concat::concatInit() 5 | { 6 | host_offset = (int*)MemoryMonitor::instanceObject()->cpuMallocMemory(4 * sizeof(int)); 7 | separateDim = (int*)MemoryMonitor::instanceObject()->cpuMallocMemory(4 * sizeof(int)); 8 | host_channels = (int*)MemoryMonitor::instanceObject()->cpuMallocMemory(4 * sizeof(int)); 9 | 10 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**) &dstData, number * channels * height * width * sizeof(float)); 11 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**) &dev_offset, 4 * sizeof(int)); 12 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**) &dev_channels, 4 * sizeof(int)); 13 | } 14 | 15 | /* 16 | * Destructor 17 | * */ 18 | Concat::~Concat() 19 | { 20 | MemoryMonitor::instanceObject()->freeCpuMemory(host_offset); 21 | MemoryMonitor::instanceObject()->freeCpuMemory(host_channels); 22 | MemoryMonitor::instanceObject()->freeCpuMemory(separateDim); 23 | MemoryMonitor::instanceObject()->freeGpuMemory(dstData); 24 | MemoryMonitor::instanceObject()->freeGpuMemory(diffData); 25 | MemoryMonitor::instanceObject()->freeGpuMemory(dev_offset); 26 | MemoryMonitor::instanceObject()->freeGpuMemory(dev_channels); 27 | MemoryMonitor::instanceObject()->freeGpuMemory(separate_diffData); 28 | prevDiff.vector_clear(); 29 | separate_dstData.vector_clear(); 30 | } 31 | 32 | /*incetion concat*/ 33 | Concat::Concat(Layers*& Inner_Layers, const param_tuple& args) 34 | { 35 | std::tie(one, three, five, pool_proj) = args; 36 | size = 0; 37 | host_offset = NULL; 38 | dev_offset = NULL; 39 | separateDim = NULL; 40 | host_channels = NULL; 41 | dev_channels = NULL; 42 | separate_diffData = NULL; 43 | diffData = NULL; 44 | dstData = NULL; 45 | InnerLayers = Inner_Layers; 46 | 47 | /*prevLayer dim*/ 48 | prev_num = InnerLayers[0].getLayer(InnerLayers[0].getLayersName(0))->prevLayer[0]->number; 49 | prev_channels = InnerLayers[0].getLayer(InnerLayers[0].getLayersName(0))->prevLayer[0]->channels; 50 | prev_height = InnerLayers[0].getLayer(InnerLayers[0].getLayersName(0))->prevLayer[0]->height; 51 | prev_width = InnerLayers[0].getLayer(InnerLayers[0].getLayersName(0))->prevLayer[0]->width; 52 | size = prev_num * prev_channels * prev_height * prev_width; 53 | 54 | number = prev_num; 55 | channels = one + three + five + pool_proj; 56 | height = prev_height; 57 | width = prev_width; 58 | 59 | this->concatInit(); 60 | 61 | host_offset[0] = 0; 62 | host_offset[1] = one * height * width; 63 | host_offset[2] = (one + three) * height * width; 64 | host_offset[3] = (one + three + five) * height * width; 65 | 66 | separateDim[0] = number * one * height * width; 67 | separateDim[1] = number * three * height * width; 68 | separateDim[2] = number * five * height * width; 69 | separateDim[3] = number * pool_proj * height * width; 70 | 71 | host_channels[0] = one; 72 | host_channels[1] = three; 73 | host_channels[2] = five; 74 | host_channels[3] = pool_proj; 75 | 76 | checkCudaErrors(cudaMemcpy(dev_offset, host_offset, 4 * sizeof(int), cudaMemcpyHostToDevice)); 77 | checkCudaErrors(cudaMemcpy(dev_channels, host_channels, 4 * sizeof(int), cudaMemcpyHostToDevice)); 78 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**)&separate_diffData, getMax(separateDim, 4) * sizeof(float)); 79 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**)&diffData, size * sizeof(float)); 80 | 81 | separate_dstData.push_back(InnerLayers[0].getLayer("one")->dstData); 82 | separate_dstData.push_back(InnerLayers[1].getLayer("three")->dstData); 83 | separate_dstData.push_back(InnerLayers[2].getLayer("five")->dstData); 84 | separate_dstData.push_back(InnerLayers[3].getLayer("pool_proj")->dstData); 85 | separate_dstData.toGpu(); 86 | 87 | prevDiff.push_back(InnerLayers[0].getLayer(InnerLayers[0].getLayersName(0))->diffData); 88 | prevDiff.push_back(InnerLayers[1].getLayer(InnerLayers[1].getLayersName(0))->diffData); 89 | prevDiff.push_back(InnerLayers[2].getLayer(InnerLayers[2].getLayersName(0))->diffData); 90 | prevDiff.push_back(InnerLayers[3].getLayer(InnerLayers[3].getLayersName(0))->diffData); 91 | prevDiff.toGpu(); 92 | } 93 | 94 | /*inception forwardPropagation*/ 95 | float* Concat::forwardSetup() 96 | { 97 | dim3 block(number, 4); 98 | dim3 thread(1024); 99 | MultiChannelsMerge<<>>(separate_dstData.devPoint, 100 | dstData, 101 | dev_channels, 102 | dev_offset, 103 | height, 104 | channels); 105 | cudaThreadSynchronize(); 106 | return dstData; 107 | } 108 | 109 | 110 | /*split the delta*/ 111 | void Concat::split_DiffData(int index, float* diffData) 112 | { 113 | int curChannel = host_channels[index]; 114 | int curOffset = host_offset[index]; 115 | 116 | dim3 block(number); 117 | dim3 thread(1024); 118 | MultiChannelsSplit<<>>(diffData, 119 | separate_diffData, 120 | curChannel, 121 | curOffset, 122 | height, 123 | channels); 124 | cudaThreadSynchronize(); 125 | InnerLayers[index].getLayer(InnerLayers[index].getLayersName(InnerLayers[index].getLayersNum()-1))->nextLayer[0]->diffData = separate_diffData; 126 | } 127 | 128 | 129 | /*inception backwardPropagation*/ 130 | float* Concat::backwardSetup() 131 | { 132 | /*necessary*/ 133 | MemoryMonitor::instanceObject()->gpuMemoryMemset(diffData, size * sizeof(float)); 134 | dim3 block(1); 135 | dim3 thread(1024); 136 | MultiArrayAdd<<>>(prevDiff.devPoint, 137 | diffData, 138 | prev_num, 139 | prev_channels, 140 | prev_height, 141 | prev_width); 142 | cudaThreadSynchronize(); 143 | return diffData; 144 | } 145 | -------------------------------------------------------------------------------- /composite/Inception.cpp: -------------------------------------------------------------------------------- 1 | #include"Inception.h" 2 | 3 | 4 | /* 5 | * Inception constructor 6 | * */ 7 | Inception::Inception(LayersBase* prevLayer, 8 | int sign, 9 | float* rate, 10 | const param_tuple& args) 11 | { 12 | std::tie(one, three, five, three_reduce, five_reduce, pool_proj, 13 | inputAmount, inputImageDim, epsilon, lambda) = args; 14 | 15 | dstData = NULL; 16 | diffData = NULL; 17 | lrate = rate; 18 | InnerLayers = new Layers[4]; 19 | 20 | Conv_one = new ConvLayer("one", sign, 21 | ConvLayer::param_tuple(0, 0, 1, 1, 1, 22 | one, 23 | inputAmount, 24 | inputImageDim, 25 | epsilon, 26 | *lrate, 27 | lambda)); 28 | 29 | Conv_three_reduce = new ConvLayer("three_reduce", sign, 30 | ConvLayer::param_tuple(0, 0, 1, 1, 1, 31 | three_reduce, 32 | inputAmount, 33 | inputImageDim, 34 | epsilon, 35 | *lrate, 36 | lambda)); 37 | 38 | Conv_three = new ConvLayer("three", sign, 39 | ConvLayer::param_tuple(1, 1, 1, 1, 3, 40 | three, 41 | three_reduce, 42 | inputImageDim, 43 | epsilon, 44 | *lrate, 45 | lambda)); 46 | 47 | Conv_five_reduce = new ConvLayer("five_reduce", sign, 48 | ConvLayer::param_tuple(0, 0, 1, 1, 1, 49 | five_reduce, 50 | inputAmount, 51 | inputImageDim, 52 | epsilon, 53 | *lrate, 54 | lambda)); 55 | 56 | Conv_five = new ConvLayer("five", sign, 57 | ConvLayer::param_tuple(2, 2, 1, 1, 5, 58 | five, 59 | five_reduce, 60 | inputImageDim, 61 | epsilon, 62 | *lrate, 63 | lambda)); 64 | 65 | max_pool = new PoolLayer("max_pool", 66 | PoolLayer::param_tuple("POOL_MAX", 3, 1, 1, 1, 1, 67 | inputImageDim, 68 | inputAmount)); 69 | 70 | Conv_pool_proj = new ConvLayer("pool_proj",sign, 71 | ConvLayer::param_tuple(0, 0, 1, 1, 1, 72 | pool_proj, 73 | inputAmount, 74 | inputImageDim, 75 | epsilon, 76 | *lrate, 77 | lambda)); 78 | /*mainly use in backpropagation*/ 79 | share_Layer = new ShareLayer("share"); 80 | 81 | /*four branch*/ 82 | InnerLayers[0].storLayers("one", Conv_one); 83 | InnerLayers[1].storLayers("three_reduce", Conv_three_reduce); 84 | InnerLayers[1].storLayers("three", Conv_three); 85 | InnerLayers[2].storLayers("five_reduce", Conv_five_reduce); 86 | InnerLayers[2].storLayers("five", Conv_five); 87 | InnerLayers[3].storLayers("max_pool", max_pool); 88 | InnerLayers[3].storLayers("pool_proj", Conv_pool_proj); 89 | 90 | /*the last layer is shared layer*/ 91 | for(int i = 0; i < 4; i++) 92 | { 93 | InnerLayers[i].getLayer(InnerLayers[i].getLayersName(0))->insertPrevLayer( prevLayer); 94 | InnerLayers[i].getLayer(InnerLayers[i].getLayersName(InnerLayers[i].getLayersNum() - 1))->insertNextlayer(share_Layer); 95 | } 96 | 97 | concat = new Concat(InnerLayers, Concat::param_tuple(one, three, five, pool_proj)); 98 | } 99 | 100 | /*get result*/ 101 | float* Inception::getConcatData() 102 | { 103 | return dstData; 104 | } 105 | 106 | /*get delta*/ 107 | float* Inception::getInceptionDiffData() 108 | { 109 | return diffData; 110 | } 111 | 112 | Inception::~Inception() 113 | { 114 | delete share_Layer; 115 | delete concat; 116 | delete InnerLayers; 117 | delete Conv_one; 118 | delete Conv_three_reduce; 119 | delete Conv_three; 120 | delete Conv_five; 121 | delete Conv_five_reduce; 122 | delete Conv_pool_proj; 123 | delete max_pool; 124 | } 125 | 126 | /* 127 | * Inception forwardPropagation 128 | * */ 129 | void Inception::forwardPropagation(string train_or_test) 130 | { 131 | LayersBase* layer; 132 | 133 | for(int i = 0; i < 4; i++) 134 | { 135 | for(int j = 0; j < InnerLayers[i].getLayersNum(); j++) 136 | { 137 | layer = InnerLayers[i].getLayer(InnerLayers[i].getLayersName(j)); 138 | layer->lrate = *lrate; 139 | layer->forwardPropagation(train_or_test); 140 | } 141 | } 142 | /*get the inception result data*/ 143 | dstData = concat->forwardSetup(); 144 | } 145 | 146 | 147 | /* 148 | * Inception backwardPropagation 149 | * */ 150 | void Inception::backwardPropagation(float*& nextLayerDiffData, float Momentum) 151 | { 152 | LayersBase* layer; 153 | for(int i = 0; i < 4; i++) 154 | { 155 | concat->split_DiffData(i, nextLayerDiffData); 156 | 157 | for(int j = InnerLayers[i].getLayersNum() - 1; j >= 0; j--) 158 | { 159 | layer = InnerLayers[i].getLayer(InnerLayers[i].getLayersName(j)); 160 | layer->backwardPropagation(Momentum); 161 | } 162 | } 163 | /*get inception diff*/ 164 | diffData = concat->backwardSetup(); 165 | } 166 | -------------------------------------------------------------------------------- /layers/DropOutLayer.cu: -------------------------------------------------------------------------------- 1 | #include"DropOutLayer.h" 2 | #include 3 | 4 | /* 5 | * DropOut layer constructor 6 | * */ 7 | DropOutLayer::DropOutLayer(string name) 8 | { 9 | _name = name; 10 | _inputName = " "; 11 | srcData = NULL; 12 | dstData = NULL; 13 | nextLayer.clear(); 14 | prevLayer.clear(); 15 | outputPtr = NULL; 16 | 17 | configDropOut* curConfig = (configDropOut*) config::instanceObjtce()->getLayersByName(_name); 18 | string prevLayerName = curConfig->_input; 19 | LayersBase* prev_Layer = (LayersBase*) Layers::instanceObject()->getLayer(prevLayerName); 20 | 21 | inputAmount = prev_Layer->channels; 22 | inputImageDim = prev_Layer->height; 23 | number = prev_Layer->number; 24 | channels = prev_Layer->channels; 25 | height = prev_Layer->height; 26 | width = prev_Layer->width; 27 | outputSize = channels * height * width; 28 | DropOut_rate = curConfig->dropOut_rate; 29 | 30 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**) &outputPtr, number * channels * height * width * sizeof(float)); 31 | this->createHandles(); 32 | LOG(INFO) << "(" << number << "," << channels << "," << height << "," << width << ")"; 33 | } 34 | 35 | /* 36 | * overload constructor 37 | */ 38 | DropOutLayer::DropOutLayer(const DropOutLayer* layer) 39 | { 40 | srcData = NULL; 41 | dstData = NULL; 42 | nextLayer.clear(); 43 | prevLayer.clear(); 44 | outputPtr = NULL; 45 | 46 | static int idx = 0; 47 | _name = layer->_name + string("_") + int_to_string(idx); 48 | idx ++; 49 | _inputName = layer->_inputName; 50 | 51 | inputAmount = layer->inputAmount; 52 | inputImageDim = layer->inputImageDim; 53 | number = layer->number; 54 | channels = layer->channels; 55 | height = layer->height; 56 | width = layer->width; 57 | outputSize = layer->outputSize; 58 | DropOut_rate = layer->DropOut_rate; 59 | 60 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**) &outputPtr, number * channels * height * width * sizeof(float)); 61 | 62 | this->createHandles(); 63 | LOG(INFO) << "(" << number << "," << channels << "," << height << "," << width << ")"; 64 | cout<<"Drop-copy"<_name; 79 | _inputName = templateConfig->_input; 80 | configDropOut* curConfig = (configDropOut*) templateConfig; 81 | LayersBase* prev_Layer = (LayersBase*) Layers::instanceObject()->getLayer(_inputName); 82 | 83 | inputAmount = prev_Layer->channels; 84 | inputImageDim = prev_Layer->height; 85 | number = prev_Layer->number; 86 | channels = prev_Layer->channels; 87 | height = prev_Layer->height; 88 | width = prev_Layer->width; 89 | outputSize = channels * height * width; 90 | DropOut_rate = curConfig->dropOut_rate; 91 | 92 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**) &outputPtr, number * channels * height * width * sizeof(float)); 93 | 94 | this->createHandles(); 95 | LOG(INFO) << "(" << number << "," << channels << "," << height << "," << width << ")"; 96 | cout<<"Drop-copy"<freeGpuMemory(outputPtr); 105 | destroyHandles(); 106 | } 107 | 108 | /* 109 | * Get the outputSize 110 | * */ 111 | int DropOutLayer::getOutputSize() 112 | { 113 | return outputSize; 114 | } 115 | 116 | /* 117 | * Create Random handle 118 | * */ 119 | void DropOutLayer::createHandles() 120 | { 121 | curandCreateGenerator(&curandGenerator_DropOut, CURAND_RNG_PSEUDO_MTGP32); 122 | } 123 | 124 | __global__ void dropout_train(float* data, float* outputPtr, int size, float probability) 125 | { 126 | int thread_index = threadIdx.x + blockIdx.x * blockDim.x; 127 | int num_threads = blockDim.x * gridDim.x; 128 | for(int i = 0; i < size; i += num_threads) 129 | { 130 | int index = i + thread_index; 131 | if(index < size) 132 | { 133 | if(outputPtr[index] < probability) 134 | data[index] = 0; 135 | } 136 | } 137 | } 138 | 139 | __global__ void dropout_test(float* data, int size, float probability) 140 | { 141 | int thread_index = threadIdx.x + blockIdx.x * blockDim.x; 142 | int num_threads = blockDim.x * gridDim.x; 143 | for(int i = 0; i < size; i += num_threads) 144 | { 145 | int index = i + thread_index; 146 | if(index < size) 147 | { 148 | data[index] = data[index] * probability; 149 | } 150 | } 151 | } 152 | 153 | void DropOutLayer::CreateUniform(int size) 154 | { 155 | curandSetPseudoRandomGeneratorSeed(curandGenerator_DropOut, time(NULL)); 156 | curandGenerateUniform(curandGenerator_DropOut, outputPtr, size); 157 | } 158 | 159 | void DropOutLayer::Dropout_TrainSet(float* data, int size, float dropout_rate) 160 | { 161 | int threadsPerBlock = 256; 162 | int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock; 163 | dropout_train<<>>(data, outputPtr, size, dropout_rate); 164 | cudaThreadSynchronize(); 165 | } 166 | 167 | void DropOutLayer::Dropout_TestSet(float* data, int size, float dropout_rate) 168 | { 169 | int threadsPerBlock = 256; 170 | int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock; 171 | dropout_test<<>>(srcData, number * channels * height * width, DropOut_rate); 172 | cudaThreadSynchronize(); 173 | } 174 | 175 | // ReShape the demension 176 | void DropOutLayer::ReShape() 177 | { 178 | 179 | LayersBase* prev_Layer = (LayersBase*) Layers::instanceObject()->getLayer(_inputName); 180 | 181 | inputAmount = prev_Layer->channels; 182 | inputImageDim = prev_Layer->height; 183 | number = prev_Layer->number; 184 | channels = prev_Layer->channels; 185 | height = prev_Layer->height; 186 | width = prev_Layer->width; 187 | outputSize = channels * height * width; 188 | } 189 | 190 | void DropOutLayer::forwardPropagation(string train_or_test) 191 | { 192 | srcData =prevLayer[0]->dstData; 193 | dstData = srcData; 194 | 195 | //dynamic adjust demensio 196 | ReShape(); 197 | /*use dropout in training, when testing multiply probability*/ 198 | if(train_or_test == "train") 199 | { 200 | CreateUniform(number * channels * height * width); 201 | Dropout_TrainSet(srcData, number * channels * height * width, DropOut_rate); 202 | } 203 | else 204 | Dropout_TestSet(srcData, number * channels * height * width, DropOut_rate); 205 | } 206 | 207 | 208 | void DropOutLayer::backwardPropagation(float Momemtum) 209 | { 210 | int nIndex = m_nCurBranchIndex; 211 | diffData = nextLayer[nIndex]->diffData; 212 | Dropout_TrainSet(diffData, number * channels * height * width, DropOut_rate); 213 | } 214 | 215 | void DropOutLayer::destroyHandles() 216 | { 217 | curandDestroyGenerator(curandGenerator_DropOut); 218 | 219 | } 220 | -------------------------------------------------------------------------------- /composite/NodeFission.cpp: -------------------------------------------------------------------------------- 1 | #include"NodeFission.h" 2 | #include"../config/config.h" 3 | #include"../layers/LayersBase.h" 4 | 5 | //node fission 6 | void NodeFission(LayersBase* splitNode, LayersBase* nextNode) { 7 | vector::iterator l_iter; 8 | vector::iterator c_iter; 9 | 10 | //modify config 11 | configBase* configFiss = FissionFactory::instanceObject()->createConfig(splitNode); 12 | 13 | configBase* curConfig = config::instanceObjtce()->getLayersByName(splitNode->_name); 14 | configBase* prevConfig = config::instanceObjtce()->getLayersByName(splitNode->_inputName); 15 | configBase* nextConfig = config::instanceObjtce()->getLayersByName(nextNode->_name); 16 | //necessary 17 | configFiss->_prev.clear(); 18 | configFiss->_next.clear(); 19 | 20 | prevConfig->_next.push_back(configFiss); 21 | configFiss->_prev.push_back(prevConfig); 22 | configFiss->_next.push_back(nextConfig); 23 | nextConfig->_prev.push_back(configFiss); 24 | 25 | config::instanceObjtce()->insertLayerByName(configFiss->_name, configFiss); 26 | 27 | for (c_iter = curConfig->_next.begin(); c_iter != curConfig->_next.end(); c_iter++) { 28 | if ((*c_iter)->_name == (nextConfig->_name)) { 29 | curConfig->_next.erase(c_iter); 30 | break; 31 | } 32 | } 33 | 34 | for (c_iter = nextConfig->_prev.begin(); c_iter != nextConfig->_prev.end(); c_iter++) { 35 | if ((*c_iter)->_name == (curConfig->_name)) { 36 | nextConfig->_prev.erase(c_iter); 37 | break; 38 | } 39 | } 40 | 41 | //这还要修改next的输入名字,因为只是删除next prev的指针,不用深度优先会出现错误 42 | nextConfig->_input = configFiss->_name; 43 | 44 | //modify Layers 45 | LayersBase* layerFiss = FissionFactory::instanceObject()->createLayer(splitNode); 46 | layerFiss->nextLayer.clear(); 47 | layerFiss->prevLayer.clear(); 48 | //layerFiss 49 | LayersBase* prevLayer = Layers::instanceObject()->getLayer(splitNode->_inputName); 50 | //prev 51 | Layers::instanceObject()->storLayers(prevLayer->_name, layerFiss->_name,layerFiss); 52 | //next 53 | layerFiss->insertNextlayer(nextNode); 54 | //splitNode 55 | //delete next 56 | for (l_iter = splitNode->nextLayer.begin(); l_iter != splitNode->nextLayer.end(); l_iter++) { 57 | if ((*l_iter)->_name == nextNode->_name) { 58 | splitNode->nextLayer.erase(l_iter); 59 | break; 60 | } 61 | } 62 | //delete prev 63 | for (l_iter = nextNode->prevLayer.begin(); l_iter != nextNode->prevLayer.end(); l_iter++) 64 | { 65 | if ((*l_iter)->_name == (splitNode->_name)) { 66 | nextNode->prevLayer.erase(l_iter); 67 | break; 68 | } 69 | } 70 | //修改名字 71 | nextNode->_inputName = layerFiss->_name; 72 | //insert one node 73 | nextNode->insertPrevLayer(layerFiss); 74 | 75 | } 76 | 77 | //node fission for dynamic_generate model 78 | void nodeGenerate(configBase* templateConfig, configBase* newConfig_prev, configBase* newConfig_next) 79 | { 80 | vector::iterator l_iter; 81 | vector::iterator c_iter; 82 | 83 | //make sure the point is not null, no use && 84 | if(NULL == newConfig_prev || NULL == newConfig_next) return; 85 | //modify config 86 | configBase* newConfig = DynamicFactory::instanceObject()->createConfig(templateConfig); 87 | //necessary 88 | newConfig->_prev.clear(); 89 | newConfig->_next.clear(); 90 | 91 | //cout<_name<<" "<_type<<" "<_input<<" "<_prev.size()<<" "<< newConfig->_next.size()<_next.push_back(newConfig); 94 | newConfig->_prev.push_back(newConfig_prev); 95 | newConfig->_next.push_back(newConfig_next); 96 | newConfig_next->_prev.push_back(newConfig); 97 | 98 | //cout<_name<<" "<_type<<" "<_input<<" "<_prev[0]->_name<<" "<< newConfig->_next[0]->_name<insertLayerByName(newConfig->_name, newConfig); 102 | //cout<<"no hidden"<_next.begin(); c_iter != newConfig_prev->_next.end(); c_iter++) 105 | { 106 | if ((*c_iter)->_name == (newConfig_next->_name)) { 107 | newConfig_prev->_next.erase(c_iter); 108 | break; 109 | } 110 | } 111 | // next layer delete origin prev layer point 112 | for (c_iter = newConfig_next->_prev.begin(); c_iter != newConfig_next->_prev.end(); c_iter++) { 113 | if ((*c_iter)->_name == (newConfig_prev->_name)) { 114 | newConfig_next->_prev.erase(c_iter); 115 | break; 116 | } 117 | } 118 | 119 | //cout<_input<_input = newConfig->_name; 122 | 123 | //modify Layers by new config node 124 | LayersBase* newLayer = DynamicFactory::instanceObject()->createLayer(newConfig); 125 | newLayer->nextLayer.clear(); 126 | newLayer->prevLayer.clear(); 127 | //prev layer 128 | LayersBase* newLayer_prev = Layers::instanceObject()->getLayer(newConfig->_input); 129 | //next layer 130 | LayersBase* newLayer_next = Layers::instanceObject()->getLayer(newConfig_next->_name); 131 | // new node insert next layer 132 | newLayer->insertNextlayer(newLayer_next); 133 | //insert into layer table 134 | Layers::instanceObject()->storLayers(newLayer_prev->_name, newLayer->_name, newLayer); 135 | //delete next 136 | for (l_iter = newLayer_prev->nextLayer.begin(); l_iter != newLayer_prev->nextLayer.end(); l_iter++) 137 | { 138 | if ((*l_iter)->_name == newLayer_next->_name) { 139 | newLayer_prev->nextLayer.erase(l_iter); 140 | break; 141 | } 142 | } 143 | //delete prev 144 | for (l_iter = newLayer_next->prevLayer.begin(); l_iter != newLayer_next->prevLayer.end(); l_iter++) 145 | { 146 | if ((*l_iter)->_name == (newLayer_prev->_name)) 147 | { 148 | newLayer_next->prevLayer.erase(l_iter); 149 | break; 150 | } 151 | } 152 | 153 | //must: modify nextlayer input name 154 | newLayer_next->_inputName = newLayer->_name; 155 | //insert one node 156 | newLayer_next->insertPrevLayer(newLayer); 157 | //cout<<"create layer success"<createConfig(splitNode); 165 | configBase* curConfig = config::instanceObjtce()->getLayersByName(splitNode->_name); 166 | configBase* prevConfig = config::instanceObjtce()->getLayersByName(splitNode->_inputName); 167 | //necessary 168 | configFiss->_prev.clear(); 169 | configFiss->_next.clear(); 170 | 171 | prevConfig->_next.push_back(configFiss); 172 | configFiss->_prev.push_back(prevConfig); 173 | config::instanceObjtce()->insertLayerByName(configFiss->_name, configFiss); 174 | 175 | 176 | //modify Layers 177 | LayersBase* layerFiss = FissionFactory::instanceObject()->createLayer(splitNode); 178 | layerFiss->nextLayer.clear(); 179 | layerFiss->prevLayer.clear(); 180 | //layerFiss 181 | LayersBase* prevLayer = Layers::instanceObject()->getLayer(splitNode->_inputName); 182 | //prev 183 | Layers::instanceObject()->storLayers(prevLayer->_name, layerFiss->_name,layerFiss); 184 | } 185 | -------------------------------------------------------------------------------- /readData/readMnistData.cpp: -------------------------------------------------------------------------------- 1 | #include"../common/cuMatrix.h" 2 | #include"../common/cuMatrixVector.h" 3 | #include"opencv2/highgui/highgui_c.h" 4 | #include"opencv2/highgui.hpp" 5 | #include"opencv2/imgproc.hpp" 6 | #include"readMnistData.h" 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | using namespace cv; 13 | 14 | /*Reverse the int*/ 15 | int ReverseInt(int digit) 16 | { 17 | unsigned char ch1,ch2,ch3,ch4; 18 | ch1 = digit & 255; 19 | ch2 = (digit >> 8) & 255; 20 | ch3 = (digit >> 16) & 255; 21 | ch4 = (digit >> 24) & 255; 22 | return ((int)ch1 << 24) + ((int)ch2 << 16) + ((int)ch3 << 8) + ((int)ch4); 23 | } 24 | 25 | /*read the data from mnist*/ 26 | void read_Mnist(string xpath, vector &get_image_data) 27 | { 28 | ifstream file(xpath, ios::binary); 29 | if(file.is_open()) 30 | { 31 | int magic_number = 0; 32 | int number_of_images = 0; 33 | int n_rows = 0; 34 | int n_cols = 0; 35 | file.read((char*) &magic_number, sizeof(magic_number)); 36 | magic_number = ReverseInt(magic_number); 37 | 38 | file.read((char*) &number_of_images, sizeof(number_of_images)); 39 | number_of_images = ReverseInt(number_of_images); 40 | 41 | file.read((char*) &n_rows, sizeof(n_rows)); 42 | n_rows = ReverseInt(n_rows); 43 | 44 | file.read((char*) &n_cols, sizeof(n_cols)); 45 | n_cols = ReverseInt(n_cols); 46 | 47 | for(int i = 0; i < number_of_images; i++) 48 | { 49 | Mat tpmat=Mat::zeros(n_rows, n_cols,CV_8UC1); 50 | for(int row = 0; row < n_rows; row ++) 51 | { 52 | for(int col = 0; col < n_cols ; col ++) 53 | { 54 | unsigned char temp = 0; 55 | file.read((char*) &temp, sizeof(temp)); 56 | tpmat.at(row,col) = temp; 57 | } 58 | } 59 | get_image_data.push_back(tpmat); 60 | } 61 | 62 | }else{ 63 | 64 | printf("read_Mnist:DataSet open error\n"); 65 | exit(1); 66 | } 67 | } 68 | 69 | /*read the label from mnist*/ 70 | void read_Mnist_label(string ypath, cuMatrix* &image_label) 71 | { 72 | ifstream file(ypath, ios::binary); 73 | 74 | if(file.is_open()) 75 | { 76 | int magic_number = 0; 77 | int number_of_label = 0; 78 | 79 | file.read((char*) &magic_number, sizeof(magic_number)); 80 | magic_number = ReverseInt(magic_number); 81 | 82 | file.read((char*) &number_of_label, sizeof(number_of_label)); 83 | number_of_label = ReverseInt(number_of_label); 84 | 85 | /*alloc label memory*/ 86 | image_label = new cuMatrix(number_of_label, 1, 1); 87 | 88 | for(int i=0; isetValue(i, 0, 0, temp); 93 | } 94 | }else{ 95 | 96 | printf("read_Mnist_label:labelSet open error\n"); 97 | exit(1); 98 | } 99 | } 100 | 101 | /*pading digit*/ 102 | Mat pad_image(Mat& input_mat, int end_size) 103 | { 104 | Mat out_Mat; 105 | int pading = end_size - input_mat.cols; 106 | int left_top_pad = round(pading / 2); 107 | int right_down_pad = pading - left_top_pad; 108 | if(left_top_pad + right_down_pad > 0) 109 | { 110 | copyMakeBorder(input_mat, 111 | out_Mat, 112 | left_top_pad, 113 | right_down_pad, 114 | left_top_pad, 115 | right_down_pad, 116 | BORDER_REPLICATE); 117 | }else{ 118 | 119 | int start_index = - left_top_pad; 120 | int end_index = input_mat.cols + right_down_pad; 121 | out_Mat = input_mat(Range(start_index,end_index), Range(start_index,end_index)); 122 | } 123 | return out_Mat; 124 | 125 | } 126 | 127 | /*normalized digit*/ 128 | Mat normalized_digit(Mat &inputMat, int normalized_width, int end_size) 129 | { 130 | /*Non zero col numbers*/ 131 | int nZeroCols = 0; 132 | 133 | for(unsigned int i = 0;i < inputMat.cols; i++) 134 | { 135 | unsigned int tempSum = 0; 136 | for(unsigned j = 0; j< inputMat.rows; j++) 137 | { 138 | tempSum += inputMat.at(j,i); 139 | } 140 | /*if the col is nonzero ,then +1*/ 141 | if(tempSum != 0) nZeroCols++; 142 | } 143 | 144 | Mat temp_outMat = inputMat; 145 | 146 | /*The difference between the current pixel of the normalized digital pixel*/ 147 | int width_diff = normalized_width - nZeroCols; 148 | 149 | if(width_diff) 150 | { 151 | int re_size = inputMat.cols + width_diff; 152 | resize(inputMat, temp_outMat, Size(re_size,re_size)); 153 | } 154 | return pad_image(temp_outMat,end_size); 155 | } 156 | 157 | /*normalized dataset*/ 158 | void get_normalizedData(vector &train_data, 159 | cuMatrix* &data_label, 160 | cuMatrixVector &normalizedData, 161 | int normalized_width, 162 | int end_size) 163 | { 164 | vector tempTrainData; 165 | if(normalized_width || end_size != 28) 166 | { 167 | if(normalized_width) 168 | cout<<"... normalizing digits to width "< "<getValue(i,0,0) != 1)) 178 | { 179 | tempTrainData.push_back(normalized_digit(train_data[i],normalized_width,end_size)); 180 | 181 | }else 182 | { 183 | tempTrainData.push_back(pad_image(train_data[i],end_size)); 184 | } 185 | } 186 | }else 187 | { 188 | cout<<"... skipping digit normalization and image padding"<* tmpmat = new cuMatrix(tempTrainData[0].rows, tempTrainData[0].cols, 1); 196 | for(int r = 0; r < tempTrainData[0].rows; r++) 197 | { 198 | for(int c = 0; c < tempTrainData[0].cols; c++) 199 | { 200 | float temp = (float)(tempTrainData[i].at< unsigned char>(r,c) * 2.0f / 255.0f - 1.0f); 201 | tmpmat->setValue(r, c, 0, temp); 202 | } 203 | } 204 | normalizedData.push_back(tmpmat); 205 | } 206 | } 207 | 208 | /*read the data and label*/ 209 | void readMnistData(cuMatrixVector& normalizedData, 210 | cuMatrix*& dataY, 211 | string Xpath, 212 | string Ypath, 213 | int normalized_width, 214 | int out_imageSize) 215 | { 216 | /*read mnist images into vector*/ 217 | vector trainData; 218 | read_Mnist(Xpath,trainData); 219 | 220 | /*read mnist label into cuMatrix*/ 221 | read_Mnist_label(Ypath,dataY); 222 | 223 | /*normalized data set*/ 224 | get_normalizedData(trainData, 225 | dataY, 226 | normalizedData, 227 | normalized_width, 228 | out_imageSize); 229 | } 230 | -------------------------------------------------------------------------------- /layers/LRNLayer.cu: -------------------------------------------------------------------------------- 1 | #include"LRNLayer.h" 2 | #include"../common/checkError.h" 3 | #include"../cuDNN_netWork.h" 4 | #include 5 | 6 | /* 7 | * Create CUDNN Handles 8 | * */ 9 | void LRNLayer::createHandles() 10 | { 11 | checkCUDNN(cudnnCreateTensorDescriptor(&srcTensorDesc)); 12 | checkCUDNN(cudnnCreateTensorDescriptor(&dstTensorDesc)); 13 | checkCUDNN(cudnnCreateLRNDescriptor(&normDesc)); 14 | } 15 | 16 | /* 17 | * Destroy CUDNN Handles 18 | * */ 19 | void LRNLayer::destroyHandles() 20 | { 21 | checkCUDNN(cudnnDestroyTensorDescriptor(srcTensorDesc)); 22 | checkCUDNN(cudnnDestroyTensorDescriptor(dstTensorDesc)) 23 | checkCUDNN(cudnnDestroyLRNDescriptor(normDesc)); 24 | } 25 | 26 | /* 27 | * Get outputSize 28 | * */ 29 | int LRNLayer::getOutputSize() 30 | { 31 | return outputSize; 32 | } 33 | 34 | /* 35 | * LRN layer constructor 36 | * */ 37 | LRNLayer::LRNLayer(string name) 38 | { 39 | _name = name; 40 | _inputName = " "; 41 | srcData = NULL; 42 | dstData = NULL; 43 | diffData = NULL; 44 | prevLayer.clear(); 45 | nextLayer.clear(); 46 | srcTensorDesc = NULL; 47 | dstTensorDesc = NULL; 48 | 49 | configLRN* curConfig = (configLRN*)config::instanceObjtce()->getLayersByName(_name); 50 | string prevLayerName = curConfig->_input; 51 | LayersBase* prev_Layer = (LayersBase*)Layers::instanceObject()->getLayer(prevLayerName); 52 | 53 | lrnN = curConfig->_lrnN; 54 | lrnAlpha = curConfig->_lrnAlpha; 55 | lrnBeta = curConfig->_lrnBeta; 56 | lrnK = 1.0; 57 | 58 | inputAmount = prev_Layer->channels; 59 | inputImageDim = prev_Layer->height; 60 | number = prev_Layer->number; 61 | channels = prev_Layer->channels; 62 | height = prev_Layer->height; 63 | width = prev_Layer->width; 64 | inputSize = prev_Layer->getOutputSize(); 65 | outputSize =inputSize; 66 | 67 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**)&dstData, number * channels * height * width * sizeof(float)); 68 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**)&diffData, number * channels * height* width * sizeof(float)); 69 | 70 | this->createHandles(); 71 | LOG(INFO) << "(" << number << "," << channels << "," << height << "," << width << ")"; 72 | } 73 | 74 | /* 75 | * Deep copy constructor 76 | */ 77 | LRNLayer::LRNLayer(const LRNLayer* layer) 78 | { 79 | srcData = NULL; 80 | dstData = NULL; 81 | diffData = NULL; 82 | prevLayer.clear(); 83 | nextLayer.clear(); 84 | srcTensorDesc = NULL; 85 | dstTensorDesc = NULL; 86 | 87 | static int idx = 0; 88 | _name = layer->_name + string("_") + int_to_string(idx); 89 | idx ++; 90 | _inputName = layer->_inputName; 91 | 92 | lrnN = layer->lrnN; 93 | lrnAlpha = layer->lrnAlpha; 94 | lrnBeta = layer->lrnBeta; 95 | lrnK = layer->lrnK; 96 | 97 | inputAmount = layer->inputAmount; 98 | inputImageDim = layer->inputImageDim; 99 | number = layer->number; 100 | channels = layer->channels; 101 | height = layer->height; 102 | width = layer->width; 103 | inputSize = layer->inputSize; 104 | outputSize = layer->outputSize; 105 | 106 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**) &dstData, number * channels * height * width * sizeof(float)); 107 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**) &diffData, number * channels * height * width * sizeof(float)); 108 | 109 | this->createHandles(); 110 | LOG(INFO) << "(" << number << "," << channels << "," << height << "," << width << ")"; 111 | } 112 | 113 | /* 114 | * Deep copy constructor 115 | */ 116 | LRNLayer::LRNLayer(const configBase* templateConfig) 117 | { 118 | srcData = NULL; 119 | dstData = NULL; 120 | diffData = NULL; 121 | prevLayer.clear(); 122 | nextLayer.clear(); 123 | srcTensorDesc = NULL; 124 | dstTensorDesc = NULL; 125 | 126 | _name = templateConfig->_name; 127 | _inputName = templateConfig->_input; 128 | configLRN* curConfig = (configLRN*) templateConfig; 129 | LayersBase* prev_Layer = (LayersBase*) Layers::instanceObject()->getLayer(_inputName); 130 | lrnN = curConfig->_lrnN; 131 | lrnAlpha = curConfig->_lrnAlpha; 132 | lrnBeta = curConfig->_lrnBeta; 133 | lrnK = 1.0; 134 | 135 | inputAmount = prev_Layer->channels; 136 | inputImageDim = prev_Layer->height; 137 | number = prev_Layer->number; 138 | channels = prev_Layer->channels; 139 | height = prev_Layer->height; 140 | width = prev_Layer->width; 141 | inputSize = prev_Layer->getOutputSize(); 142 | outputSize = inputSize; 143 | 144 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**) &dstData, number * channels * height * width * sizeof(float)); 145 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**) &diffData, number * channels * height * width * sizeof(float)); 146 | 147 | this->createHandles(); 148 | LOG(INFO) << "(" << number << "," << channels << "," << height << "," << width << ")"; 149 | } 150 | 151 | /* 152 | * Destructor 153 | * */ 154 | LRNLayer::~LRNLayer() 155 | { 156 | MemoryMonitor::instanceObject()->freeGpuMemory(dstData); 157 | MemoryMonitor::instanceObject()->freeGpuMemory(diffData); 158 | destroyHandles(); 159 | } 160 | 161 | // ReShape the demesion 162 | void LRNLayer::ReShape() 163 | { 164 | LayersBase* prev_Layer = (LayersBase*) Layers::instanceObject()->getLayer(_inputName); 165 | inputAmount = prev_Layer->channels; 166 | inputImageDim = prev_Layer->height; 167 | number = prev_Layer->number; 168 | channels = prev_Layer->channels; 169 | height = prev_Layer->height; 170 | width = prev_Layer->width; 171 | inputSize = prev_Layer->getOutputSize(); 172 | outputSize = inputSize; 173 | } 174 | 175 | /* 176 | * LRN Forward propagation 177 | * */ 178 | void LRNLayer::forwardPropagation(string train_or_test) 179 | { 180 | srcData = prevLayer[0]->dstData; 181 | 182 | // dynamic adjust demension 183 | ReShape(); 184 | checkCUDNN(cudnnSetLRNDescriptor(normDesc, 185 | lrnN, 186 | lrnAlpha, 187 | lrnBeta, 188 | lrnK)); 189 | 190 | checkCUDNN(cudnnSetTensor4dDescriptor(srcTensorDesc, 191 | cuDNN_netWork::instanceObject()->GetTensorFormat(), 192 | cuDNN_netWork::instanceObject()->GetDataType(), 193 | number, 194 | channels, 195 | height, 196 | width)); 197 | 198 | checkCUDNN(cudnnSetTensor4dDescriptor(dstTensorDesc, 199 | cuDNN_netWork::instanceObject()->GetTensorFormat(), 200 | cuDNN_netWork::instanceObject()->GetDataType(), 201 | number, 202 | channels, 203 | height, 204 | width)); 205 | 206 | float alpha = 1.0f; 207 | float beta = 0.0f; 208 | checkCUDNN(cudnnLRNCrossChannelForward(cuDNN_netWork::instanceObject()->GetcudnnHandle(), 209 | normDesc, 210 | CUDNN_LRN_CROSS_CHANNEL_DIM1, 211 | &alpha, 212 | srcTensorDesc, 213 | srcData, 214 | &beta, 215 | dstTensorDesc, 216 | dstData)); 217 | } 218 | 219 | /* 220 | * LRN Backward propagation 221 | * */ 222 | void LRNLayer::backwardPropagation(float Momentum) 223 | { 224 | float alpha = 1.0f; 225 | float beta = 0.0f; 226 | int nIndex = m_nCurBranchIndex; 227 | checkCUDNN(cudnnLRNCrossChannelBackward(cuDNN_netWork::instanceObject()->GetcudnnHandle(), 228 | normDesc, 229 | CUDNN_LRN_CROSS_CHANNEL_DIM1, 230 | &alpha, 231 | dstTensorDesc, 232 | dstData, 233 | dstTensorDesc, 234 | nextLayer[nIndex]->diffData, 235 | srcTensorDesc, 236 | srcData, 237 | &beta, 238 | srcTensorDesc, 239 | diffData)); 240 | } 241 | 242 | -------------------------------------------------------------------------------- /composite/NodeFission.h: -------------------------------------------------------------------------------- 1 | /* 2 | * NodeFission.h 3 | * 4 | * Created on: Mar 24, 2016 5 | * Author: tdx 6 | */ 7 | 8 | #ifndef NODEFISSION_H_ 9 | #define NODEFISSION_H_ 10 | 11 | #include 12 | #include"../layers/LayersBase.h" 13 | #include"../common/utility.cuh" 14 | #include"../config/config.h" 15 | #include"../layers/DataLayer.h" 16 | #include"../layers/ConvLayer.h" 17 | #include"../layers/ActivationLayer.h" 18 | #include"../layers/DropOutLayer.h" 19 | #include"../layers/HiddenLayer.h" 20 | #include"../layers/LRNLayer.h" 21 | #include"../layers/PoolLayer.h" 22 | #include"../layers/SoftMaxLayer.h" 23 | 24 | 25 | 26 | void NodeFission(LayersBase* splitNode, LayersBase* nextNode); 27 | void softmaxFission(LayersBase* splitNode); 28 | void nodeGenerate(configBase* templateConfig, configBase* newConfig_prev, configBase* newConfig_next); 29 | 30 | //static factory 31 | class FissionFactory 32 | { 33 | public: 34 | static FissionFactory* instanceObject(){ 35 | 36 | static FissionFactory* factory = new FissionFactory(); 37 | return factory; 38 | } 39 | 40 | static LayersBase* createLayer(LayersBase* node) 41 | { 42 | 43 | string nodeType = config::instanceObjtce()->getLayersByName(node->_name)->_type; 44 | 45 | if(string("DATA") == nodeType) 46 | { 47 | return new DataLayer((DataLayer*)node); 48 | }else if(string("CONV") == nodeType) 49 | { 50 | return new ConvLayer((ConvLayer*)node); 51 | }else if(string("POOLING") == nodeType) 52 | { 53 | return new PoolLayer((PoolLayer*)node);; 54 | 55 | }else if(string("HIDDEN") == nodeType) 56 | { 57 | return new HiddenLayer((HiddenLayer*)node); 58 | }else if(string("SOFTMAX") == nodeType) 59 | { 60 | return new SoftMaxLayer((SoftMaxLayer*)node); 61 | }else if(string("ACTIVATION") == nodeType) 62 | { 63 | return new ActivationLayer((ActivationLayer*)node); 64 | }else if(string("LRN") == nodeType) 65 | { 66 | return new LRNLayer((LRNLayer*)node); 67 | }else if(string("DROPOUT") == nodeType) 68 | { 69 | return new DropOutLayer((DropOutLayer*)node); 70 | }else 71 | { 72 | cout<< "NodeFisson: Layer not exist" << endl; 73 | exit(0); 74 | } 75 | } 76 | 77 | 78 | static configBase* createConfig(LayersBase* node) 79 | { 80 | configBase* curConfig = config::instanceObjtce()->getLayersByName(node->_name); 81 | string nodeType = config::instanceObjtce()->getLayersByName(node->_name)->_type; 82 | 83 | if (string("DATA") == nodeType) 84 | { 85 | static int idx = 0; 86 | configData* tmp = (configData*) curConfig; 87 | configData* config = new configData( *tmp ); 88 | config->_name = curConfig->_name + string("_") + int_to_string(idx); 89 | idx ++; 90 | return config; 91 | 92 | } else if (string("CONV") == nodeType) 93 | { 94 | static int idx = 0; 95 | configConv* tmp = (configConv*)curConfig; 96 | configConv* config = new configConv(*tmp); 97 | config->_name = curConfig->_name + string("_") + int_to_string(idx); 98 | idx++; 99 | return config; 100 | 101 | } else if (string("POOLING") == nodeType) 102 | { 103 | static int idx = 0; 104 | configPooling* tmp = (configPooling*)curConfig; 105 | configPooling* config = new configPooling(*tmp); 106 | config->_name = curConfig->_name + string("_") + int_to_string(idx); 107 | idx++; 108 | return config; 109 | 110 | } else if (string("HIDDEN") == nodeType) 111 | { 112 | static int idx = 0; 113 | configHidden* tmp = (configHidden*)curConfig; 114 | configHidden* config =new configHidden (*tmp); 115 | config->_name = curConfig->_name + string("_") + int_to_string(idx); 116 | idx++; 117 | return config; 118 | 119 | } else if (string("SOFTMAX") == nodeType) 120 | { 121 | static int idx = 0; 122 | configSoftMax* tmp = (configSoftMax*)curConfig; 123 | configSoftMax* config = new configSoftMax(*tmp); 124 | config->_name = curConfig->_name + string("_") + int_to_string(idx); 125 | idx++; 126 | return config; 127 | 128 | } else if (string("ACTIVATION") == nodeType) 129 | { 130 | static int idx = 0; 131 | configActivation* tmp = (configActivation*)curConfig; 132 | configActivation* config = new configActivation(*tmp); 133 | config->_name = curConfig->_name + string("_") + int_to_string(idx); 134 | idx++; 135 | return config; 136 | 137 | } else if (string("LRN") == nodeType){ 138 | static int idx = 0; 139 | configLRN* tmp = (configLRN*)curConfig; 140 | configLRN* config = new configLRN(*tmp); 141 | config->_name = curConfig->_name + string("_") + int_to_string(idx); 142 | idx++; 143 | return config; 144 | 145 | } else if (string("DROPOUT") == nodeType){ 146 | static int idx = 0; 147 | configDropOut* tmp = (configDropOut*)curConfig; 148 | configDropOut* config = new configDropOut(*tmp); 149 | config->_name = curConfig->_name + string("_") + int_to_string(idx); 150 | idx++; 151 | return config; 152 | }else{ 153 | cout<< "NodeFission: Config not exist" << endl; 154 | exit(0); 155 | } 156 | } 157 | 158 | }; 159 | 160 | //static factory 161 | class DynamicFactory 162 | { 163 | public: 164 | static DynamicFactory* instanceObject(){ 165 | 166 | static DynamicFactory* factory = new DynamicFactory(); 167 | return factory; 168 | } 169 | 170 | static LayersBase* createLayer(configBase* node) 171 | { 172 | 173 | string nodeType = node->_type; 174 | 175 | if(string("DATA") == nodeType) 176 | { 177 | return new DataLayer(node); 178 | 179 | }else if(string("CONV") == nodeType) 180 | { 181 | return new ConvLayer(node); 182 | }else if(string("POOLING") == nodeType) 183 | { 184 | return new PoolLayer(node);; 185 | 186 | }else if(string("HIDDEN") == nodeType) 187 | { 188 | return new HiddenLayer(node); 189 | }else if(string("SOFTMAX") == nodeType) 190 | { 191 | return new SoftMaxLayer(node); 192 | }else if(string("ACTIVATION") == nodeType) 193 | { 194 | return new ActivationLayer(node); 195 | }else if(string("LRN") == nodeType) 196 | { 197 | return new LRNLayer(node); 198 | }else if(string("DROPOUT") == nodeType) 199 | { 200 | return new DropOutLayer(node); 201 | }else 202 | { 203 | LOG(FATAL) << "NodeGenerate: Layer Type " << nodeType <<" not exist"; 204 | return NULL; 205 | } 206 | } 207 | 208 | static configBase* createConfig(configBase* templateConfig) 209 | { 210 | string nodeType = templateConfig->_type; 211 | 212 | if (string("DATA") == nodeType) 213 | { 214 | configData* tmp = (configData*) templateConfig; 215 | configData* config = new configData( *tmp ); 216 | return config; 217 | 218 | } else if (string("CONV") == nodeType) 219 | { 220 | configConv* tmp = (configConv*)templateConfig; 221 | configConv* config = new configConv(*tmp); 222 | return config; 223 | 224 | } else if (string("POOLING") == nodeType) 225 | { 226 | configPooling* tmp = (configPooling*)templateConfig; 227 | configPooling* config = new configPooling(*tmp); 228 | return config; 229 | 230 | } else if (string("HIDDEN") == nodeType) 231 | { 232 | configHidden* tmp = (configHidden*)templateConfig; 233 | configHidden* config =new configHidden (*tmp); 234 | return config; 235 | 236 | } else if (string("SOFTMAX") == nodeType) 237 | { 238 | configSoftMax* tmp = (configSoftMax*)templateConfig; 239 | configSoftMax* config = new configSoftMax(*tmp); 240 | return config; 241 | 242 | } else if (string("ACTIVATION") == nodeType) 243 | { 244 | configActivation* tmp = (configActivation*)templateConfig; 245 | configActivation* config = new configActivation(*tmp); 246 | return config; 247 | 248 | } else if (string("LRN") == nodeType) 249 | { 250 | configLRN* tmp = (configLRN*)templateConfig; 251 | configLRN* config = new configLRN(*tmp); 252 | return config; 253 | 254 | } else if (string("DROPOUT") == nodeType) 255 | { 256 | configDropOut* tmp = (configDropOut*)templateConfig; 257 | configDropOut* config = new configDropOut(*tmp); 258 | return config; 259 | }else 260 | { 261 | LOG(FATAL) << "NodeGenerate: Config type " << nodeType <<" not exist"; 262 | return NULL; 263 | } 264 | } 265 | 266 | }; 267 | 268 | 269 | #endif /* NODEFISSION_H_ */ 270 | -------------------------------------------------------------------------------- /layers/ActivationLayer.cu: -------------------------------------------------------------------------------- 1 | #include"ActivationLayer.h" 2 | #include"../config/config.h" 3 | #include"../cuDNN_netWork.h" 4 | #include"../tests/test_layer.h" 5 | #include"../common/utility.cuh" 6 | #include 7 | 8 | /* 9 | * Create CUDNN handles 10 | */ 11 | void ActivationLayer::createHandles() 12 | { 13 | checkCUDNN(cudnnCreateTensorDescriptor(&srcTensorDesc)); 14 | checkCUDNN(cudnnCreateTensorDescriptor(&dstTensorDesc)); 15 | checkCUDNN(cudnnCreateActivationDescriptor(&activDesc)); 16 | } 17 | 18 | /* 19 | * Destroy CUDNN Handles 20 | * */ 21 | void ActivationLayer::destroyHandles() 22 | { 23 | checkCUDNN(cudnnDestroyTensorDescriptor(srcTensorDesc)); 24 | checkCUDNN(cudnnDestroyTensorDescriptor(dstTensorDesc)); 25 | checkCUDNN(cudnnDestroyActivationDescriptor(activDesc)); 26 | } 27 | 28 | /* 29 | * Get the outpues size 30 | * */ 31 | int ActivationLayer::getOutputSize() 32 | { 33 | return outputSize; 34 | } 35 | 36 | /* 37 | * Activation layer constructor 38 | * */ 39 | ActivationLayer::ActivationLayer(string name) 40 | { 41 | _name = name; 42 | _inputName = " "; 43 | srcData = NULL; 44 | dstData = NULL; 45 | diffData = NULL; 46 | prevLayer.clear(); 47 | nextLayer.clear(); 48 | activDesc = NULL; 49 | srcTensorDesc = NULL; 50 | dstTensorDesc = NULL; 51 | 52 | configActivation * curConfig = (configActivation*) config::instanceObjtce()->getLayersByName(_name); 53 | string preLayerName = curConfig->_input; 54 | LayersBase* prev_Layer = (LayersBase*) Layers::instanceObject()->getLayer(preLayerName); 55 | 56 | inputAmount = prev_Layer->channels; 57 | inputImageDim = prev_Layer->height; 58 | number = prev_Layer->number; 59 | channels = prev_Layer->channels; 60 | height = prev_Layer->height; 61 | width = prev_Layer->width; 62 | outputSize = channels * height * width; 63 | ActivationMode = curConfig->_non_linearity; 64 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**)&dstData, number * channels * height * width * sizeof(float)); 65 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**)&diffData, number * channels * height * width * sizeof(float)); 66 | 67 | this->createHandles(); 68 | LOG(INFO) << "(" << number << "," << channels << "," << height << "," << width << ")"; 69 | } 70 | 71 | /* 72 | * Deep copy constructor 73 | */ 74 | ActivationLayer::ActivationLayer(const ActivationLayer* layer) 75 | { 76 | srcData = NULL; 77 | dstData = NULL; 78 | diffData = NULL; 79 | prevLayer.clear(); 80 | nextLayer.clear(); 81 | activDesc = NULL; 82 | srcTensorDesc = NULL; 83 | dstTensorDesc = NULL; 84 | 85 | static int idx = 0; 86 | _name = layer->_name + string("_") + int_to_string(idx); 87 | idx ++; 88 | _inputName = layer->_inputName; 89 | inputAmount = layer->inputAmount; 90 | inputImageDim = layer->inputImageDim; 91 | number = layer->number; 92 | channels = layer->channels; 93 | height = layer->height; 94 | width = layer->width; 95 | outputSize = layer->outputSize; 96 | ActivationMode = layer->ActivationMode; 97 | 98 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**)&dstData, number * channels * height * width * sizeof(float)); 99 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**)&diffData, number * channels * height * width * sizeof(float)); 100 | 101 | this->createHandles(); 102 | LOG(INFO) << "(" << number << "," << channels << "," << height << "," << width << ")"; 103 | cout<<"Activation-copy"<_name; 121 | _inputName = templateConfig->_input; 122 | configActivation* curConfig = (configActivation*) templateConfig; 123 | LayersBase* prev_Layer = (LayersBase*)Layers::instanceObject()->getLayer(_inputName); 124 | inputAmount = prev_Layer->channels; 125 | inputImageDim = prev_Layer->height; 126 | number = prev_Layer->number; 127 | channels = prev_Layer->channels; 128 | height = prev_Layer->height; 129 | width = prev_Layer->width; 130 | outputSize = channels * height * width; 131 | ActivationMode = curConfig->_non_linearity; 132 | 133 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**)&dstData, number * channels * height * width * sizeof(float)); 134 | MemoryMonitor::instanceObject()->gpuMallocMemory((void**)&diffData, number * channels * height * width * sizeof(float)); 135 | 136 | this->createHandles(); 137 | LOG(INFO) << "(" << number << "," << channels << "," << height << "," << width << ")"; 138 | cout<<"Activation-copy"<freeGpuMemory(dstData); 147 | MemoryMonitor::instanceObject()->freeGpuMemory(diffData); 148 | destroyHandles(); 149 | } 150 | 151 | /* 152 | * LRELU activation function forward compute 153 | */ 154 | __global__ void LreluForward(float* srcData, float* dstData, int data_size) 155 | { 156 | int thread_index = threadIdx.x + blockIdx.x * blockDim.x; 157 | int num_threads = blockDim.x * gridDim.x; 158 | for(int i = 0; i < data_size; i += num_threads) 159 | { 160 | int index = i + thread_index; 161 | if(index < data_size) 162 | { 163 | dstData[index] = srcData[index] > 0 ? srcData[index] : srcData[index] * 0.01; 164 | } 165 | } 166 | 167 | } 168 | 169 | // ReShape the demension 170 | void ActivationLayer::ReShape() 171 | { 172 | 173 | LayersBase* prev_Layer = (LayersBase*)Layers::instanceObject()->getLayer(_inputName); 174 | inputAmount = prev_Layer->channels; 175 | inputImageDim = prev_Layer->height; 176 | number = prev_Layer->number; 177 | channels = prev_Layer->channels; 178 | height = prev_Layer->height; 179 | width = prev_Layer->width; 180 | outputSize = channels * height * width; 181 | } 182 | 183 | /* 184 | * Activation forward propagation 185 | * */ 186 | void ActivationLayer::forwardPropagation(string train_or_test) 187 | { 188 | srcData = prevLayer[0]->dstData; 189 | 190 | // dynamic adjust demension 191 | ReShape(); 192 | if(ActivationMode == ACTIVATION_LRELU) 193 | { 194 | int data_size = number * channels * height * width; 195 | int num_threads = 256; 196 | int num_block = (data_size + num_threads - 1) / num_threads; 197 | 198 | LreluForward<<>>(srcData, dstData, data_size); 199 | cudaThreadSynchronize(); 200 | } 201 | else 202 | { 203 | cudnnActivationMode = (cudnnActivationMode_t)ActivationMode; 204 | checkCUDNN(cudnnSetActivationDescriptor(activDesc, 205 | cudnnActivationMode, 206 | CUDNN_PROPAGATE_NAN, 207 | 0.0)); 208 | 209 | checkCUDNN(cudnnSetTensor4dDescriptor(srcTensorDesc, 210 | cuDNN_netWork::instanceObject()->GetTensorFormat(), 211 | cuDNN_netWork::instanceObject()->GetDataType(), 212 | number, 213 | channels, 214 | height, 215 | width)); 216 | 217 | checkCUDNN(cudnnSetTensor4dDescriptor(dstTensorDesc, 218 | cuDNN_netWork::instanceObject()->GetTensorFormat(), 219 | cuDNN_netWork::instanceObject()->GetDataType(), 220 | number, 221 | channels, 222 | height, 223 | width)); 224 | 225 | MemoryMonitor::instanceObject()->gpuMemoryMemset(dstData,number*channels*height*width*sizeof(float)); 226 | float alpha = 1.0f; 227 | float beta = 0.0f; 228 | checkCUDNN(cudnnActivationForward(cuDNN_netWork::instanceObject()->GetcudnnHandle(), 229 | activDesc, 230 | &alpha, 231 | srcTensorDesc, 232 | srcData, 233 | &beta, 234 | dstTensorDesc, 235 | dstData)); 236 | } 237 | } 238 | 239 | /* 240 | * LRELU BackWard Compute 241 | */ 242 | __global__ void LreluBackward(float* srcDiff, float* dstDiff, float* srcData, int data_size) 243 | { 244 | int thread_index = threadIdx.x + blockIdx.x * blockDim.x; 245 | int num_threads = blockDim.x * gridDim.x; 246 | 247 | for(int i = 0; i < data_size; i += num_threads) 248 | { 249 | int index = i + thread_index; 250 | if(index < data_size) 251 | { 252 | dstDiff[index] = srcDiff[index] * ((srcData[index] > 0) + (srcData[index] <= 0) * 0.01); 253 | } 254 | } 255 | 256 | } 257 | 258 | /* 259 | * Activation Backward Propagation 260 | * */ 261 | void ActivationLayer::backwardPropagation(float Momentum) 262 | { 263 | if(ActivationMode == ACTIVATION_LRELU) 264 | { 265 | int nIndex = m_nCurBranchIndex; 266 | int data_size = number * channels * height * width; 267 | int num_threads = 256; 268 | int num_block = (data_size + num_threads - 1) / num_threads; 269 | 270 | LreluBackward<<>>(nextLayer[nIndex]->diffData, diffData, srcData, data_size); 271 | cudaThreadSynchronize(); 272 | } 273 | else 274 | { 275 | cudnnActivationMode = (cudnnActivationMode_t)ActivationMode; 276 | float alpha = 1.0f; 277 | float beta = 0.0f; 278 | int nIndex = m_nCurBranchIndex; 279 | checkCUDNN(cudnnActivationBackward(cuDNN_netWork::instanceObject()->GetcudnnHandle(), 280 | activDesc, 281 | &alpha, 282 | dstTensorDesc, 283 | dstData, 284 | dstTensorDesc, 285 | nextLayer[nIndex]->diffData, 286 | srcTensorDesc, 287 | srcData, 288 | &beta, 289 | srcTensorDesc, 290 | diffData)); 291 | } 292 | } 293 | 294 | --------------------------------------------------------------------------------