├── lib ├── libblas32.a ├── libblas64.a ├── blas_helix.a ├── cblas_helix.a ├── libcblas32.a ├── libcblas64.a ├── blas_franklin.a └── cblas_franklin.a ├── .gitignore ├── include ├── Config.h ├── rio.h ├── MultiLayerTrainComponent.h ├── DeepClassRBM.h ├── my_logistic_sgd.h ├── Logistic.h ├── rnglib.h ├── TrainModel.h ├── AutoEncoder.h ├── MLP.h ├── ranlib.h ├── Utility.h ├── MultiLayerRBM.h ├── dataset.h ├── ClassRBM.h ├── TrainComponent.h ├── DeepAutoEncoder.h ├── RBM.h ├── MLPLayer.h └── Dataset.h ├── README.md ├── src ├── AutoEncoderDriver.cpp ├── MultiLayerTrainComponent.cpp ├── DeepClassRBM.cpp ├── RBMModel.cpp ├── DeepClassRBMModel.cpp ├── CBLAS │ ├── Logistic.cpp │ ├── MLPLayer.cpp │ └── RBM.cpp ├── Logistic.cpp ├── TrainComponent.cpp ├── ClassRBMModel.cpp ├── LogisticModel.cpp ├── MLP.cpp ├── MLPModel.cpp ├── Utility.cpp ├── DeepAutoEncoderDriver.cpp ├── AutoEncoder.cpp ├── TrainModel.cpp ├── MultiLayerRBM.cpp ├── MLPLayer.cpp └── DeepAutoEncoder.cpp ├── scripts ├── shuffle.sh ├── cross_valid_partition.sh ├── create_image.py └── utils.py ├── old_version ├── Makefile ├── rio.c ├── my_da.c ├── dpblas_da.c ├── my_logistic_sgd.c ├── my_mlp.c ├── rsm_blas.c └── TravelingSalesmanProblem.c ├── Makefile └── py_version └── utils.py /lib/libblas32.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roles/deep_learning/master/lib/libblas32.a -------------------------------------------------------------------------------- /lib/libblas64.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roles/deep_learning/master/lib/libblas64.a -------------------------------------------------------------------------------- /lib/blas_helix.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roles/deep_learning/master/lib/blas_helix.a -------------------------------------------------------------------------------- /lib/cblas_helix.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roles/deep_learning/master/lib/cblas_helix.a -------------------------------------------------------------------------------- /lib/libcblas32.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roles/deep_learning/master/lib/libcblas32.a -------------------------------------------------------------------------------- /lib/libcblas64.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roles/deep_learning/master/lib/libcblas64.a -------------------------------------------------------------------------------- /lib/blas_franklin.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roles/deep_learning/master/lib/blas_franklin.a -------------------------------------------------------------------------------- /lib/cblas_franklin.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roles/deep_learning/master/lib/cblas_franklin.a -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tags 2 | *.dat 3 | *.pyc 4 | *.o 5 | *.cmd 6 | *.txt 7 | *.gdb 8 | *.swp 9 | *.log 10 | lib/libblas.a 11 | lib/libcblas.a 12 | -------------------------------------------------------------------------------- /include/Config.h: -------------------------------------------------------------------------------- 1 | #ifndef _CONFIG_H 2 | #define _CONFIG_H 3 | 4 | const int maxLayer = 5; 5 | const int maxUnit = 2005; 6 | const int maxBatchSize = 10; 7 | 8 | const int numBatchPerLog = 0; 9 | 10 | const double L2Reg = 0.00001; 11 | 12 | typedef double RBMBuffer[maxUnit*maxBatchSize]; 13 | 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## 备注 ## 2 | 3 | * 使用GNU profiler查找性能瓶颈 4 | * 编译时添加-pg 5 | * ./prog运行,结束时会生成gmon.out 6 | * gprof ./prog gmon.out | less 查看 7 | * 根据32位或64位主机调整libblas库 8 | * make blas BITS=[32|64] 9 | 10 | * 添加新的内部repo 11 | * git remote set-url local ssh://git@bioserver:7722/home/git/deep_learning.git 12 | -------------------------------------------------------------------------------- /src/AutoEncoderDriver.cpp: -------------------------------------------------------------------------------- 1 | #include "AutoEncoder.h" 2 | #include "Dataset.h" 3 | #include "TrainModel.h" 4 | 5 | int main(){ 6 | MNISTDataset data; 7 | data.loadData(); 8 | 9 | AutoEncoder ad(data.getFeatureNumber(), 500, false); 10 | TrainModel model(ad); 11 | model.train(&data, 0.1, 20, 15); 12 | } 13 | -------------------------------------------------------------------------------- /src/MultiLayerTrainComponent.cpp: -------------------------------------------------------------------------------- 1 | #include "MultiLayerTrainComponent.h" 2 | #include "Dataset.h" 3 | 4 | MultiLayerTrainComponent::MultiLayerTrainComponent(const char* name) : IModel(name) { } 5 | 6 | MultiLayerTrainComponent::~MultiLayerTrainComponent(){ } 7 | 8 | int MultiLayerTrainComponent::getBottomInputNumber(){ 9 | return getLayer(0).getInputNumber(); 10 | } 11 | 12 | int MultiLayerTrainComponent::getTopOutputNumber(){ 13 | return getLayer(getLayerNumber()-1).getOutputNumber(); 14 | } 15 | -------------------------------------------------------------------------------- /include/rio.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifndef RIO_H 5 | 6 | #define RIO_H 7 | #define RIO_BUFFER_SIZE 8192 8 | 9 | typedef struct { 10 | int rio_fd; 11 | int rio_cnt; /* unread bytes in internal buffer */ 12 | char *rio_bufptr; /* next unread byte in internal buffer */ 13 | char rio_buf[RIO_BUFFER_SIZE]; 14 | } rio_t; 15 | 16 | void rio_readinitb(rio_t *rp, int fd, int type); 17 | ssize_t rio_readnb(rio_t *rp, void *usrbuf, size_t n); 18 | ssize_t rio_writenb(rio_t *rp, void *usrbuf, size_t n); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /include/MultiLayerTrainComponent.h: -------------------------------------------------------------------------------- 1 | #include "TrainComponent.h" 2 | 3 | #ifndef _MULTILAYERTRAINCOMPONENT 4 | #define _MULTILAYERTRAINCOMPONENT 5 | 6 | class MultiLayerTrainComponent : public IModel { 7 | public: 8 | MultiLayerTrainComponent(const char*); 9 | virtual ~MultiLayerTrainComponent(); 10 | virtual TrainComponent& getLayer(int) = 0; 11 | virtual int getLayerNumber() = 0; 12 | virtual bool getLayerToTrain(int){ return true; } 13 | int getBottomInputNumber(); 14 | int getTopOutputNumber(); 15 | }; 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /include/DeepClassRBM.h: -------------------------------------------------------------------------------- 1 | #include "ClassRBM.h" 2 | #include "MultiLayerRBM.h" 3 | 4 | #ifndef _DEEPCLASSRBM_H 5 | #define _DEEPCLASSRBM_H 6 | 7 | class DeepClassRBM : public MultiLayerTrainComponent { 8 | public: 9 | DeepClassRBM(MultiLayerRBM*, ClassRBM*); 10 | ~DeepClassRBM(); 11 | TrainComponent& getLayer(int); 12 | int getLayerNumber(); 13 | bool getLayerToTrain(int); 14 | void saveModel(FILE* fd); 15 | private: 16 | MultiLayerRBM* multirbm; 17 | ClassRBM* classrbm; 18 | int numLayer; 19 | }; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /include/my_logistic_sgd.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "dataset.h" 3 | 4 | #ifndef LOG_SGD_H 5 | #define LOG_SGD_H 6 | 7 | #define OUT_LAYER_SIZE 20 8 | #define IN_LAYER_SIZE 1000 9 | #define eta 0.13 10 | 11 | typedef struct log_reg { 12 | int n_in, n_out; 13 | double ** W; 14 | double * b; 15 | } log_reg; 16 | 17 | void init_log_reg(log_reg *m, int n_in, int n_out); 18 | void free_log_reg(log_reg *m); 19 | void dump_param(rio_t *rp, log_reg *m); 20 | void load_log_reg(rio_t *rp, log_reg *m); 21 | void get_softmax_y_given_x(const log_reg *m, const double *x, double *y); 22 | void get_grad(const log_reg *m, const double *x, const double *y, const double *t, double **grad_w, double *grad_b); 23 | double get_log_reg_delta(const double *y, const double *t, double *d, const int size); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/DeepClassRBM.cpp: -------------------------------------------------------------------------------- 1 | #include "DeepClassRBM.h" 2 | 3 | DeepClassRBM::DeepClassRBM(MultiLayerRBM* multirbm, ClassRBM* classrbm) : 4 | MultiLayerTrainComponent("DeepClassRBM"), 5 | multirbm(multirbm), classrbm(classrbm) 6 | { 7 | numLayer = multirbm->getLayerNumber() + 1; 8 | } 9 | 10 | DeepClassRBM::~DeepClassRBM(){ 11 | delete multirbm; 12 | delete classrbm; 13 | } 14 | 15 | int DeepClassRBM::getLayerNumber(){ 16 | return numLayer; 17 | } 18 | 19 | TrainComponent& DeepClassRBM::getLayer(int layerIdx){ 20 | if(layerIdx == numLayer-1){ 21 | return *classrbm; 22 | }else{ 23 | return multirbm->getLayer(layerIdx); 24 | } 25 | } 26 | 27 | bool DeepClassRBM::getLayerToTrain(int layerIdx){ 28 | if(layerIdx == numLayer-1){ 29 | return true; 30 | }else{ 31 | return multirbm->getLayerToTrain(layerIdx); 32 | } 33 | } 34 | 35 | void DeepClassRBM::saveModel(FILE* fd){ 36 | 37 | } 38 | 39 | -------------------------------------------------------------------------------- /scripts/shuffle.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | #./shuffle.sh -d train.data.format -l train.label.format -o cross_valid 4 | 5 | data_file_prefix=2 6 | label_file_prefix=1 7 | 8 | while getopts ":l:d:o:" opt 9 | do 10 | case $opt in 11 | d) data_file=$OPTARG;; 12 | l) label_file=$OPTARG;; 13 | o) output_folder=$OPTARG;; 14 | ?) echo "invalid parameter"; exit 1;; 15 | esac 16 | done 17 | 18 | line_count=$((`wc -l $data_file | awk '{print $1}'` - $data_file_prefix)) 19 | echo $line_count 20 | 21 | sed -n "1,${data_file_prefix}p" $data_file > $output_folder/data.out 22 | sed -n "1,${label_file_prefix}p" $label_file > $output_folder/label.out 23 | 24 | seq $line_count | sort -R | while read line 25 | do 26 | echo $line >> $output_folder/line.index 27 | sed "$(($line + $data_file_prefix))q;d" $data_file >> $output_folder/data.out 28 | sed "$(($line + $label_file_prefix))q;d" $label_file >> $output_folder/label.out 29 | done 30 | -------------------------------------------------------------------------------- /include/Logistic.h: -------------------------------------------------------------------------------- 1 | #include "Config.h" 2 | #include "TrainModel.h" 3 | #include "Utility.h" 4 | #include "Dataset.h" 5 | #include "MLPLayer.h" 6 | #include 7 | #include 8 | 9 | #ifndef _LOGISTIC_H 10 | #define _LOGISTIC_H 11 | 12 | class Logistic : public SoftmaxLayer , public TrainComponent 13 | { 14 | public: 15 | Logistic(int, int); 16 | Logistic(FILE *fd); 17 | Logistic(const char*); 18 | void trainBatch(int); // forward + backpropagate 19 | void runBatch(int); // forward only 20 | void setLearningRate(double lr); 21 | void setInput(double *input); 22 | void setLabel(double *label); 23 | double* getOutput(); 24 | int getInputNumber(); 25 | int getOutputNumber(); 26 | double* getLabel(); 27 | void saveModel(FILE* modelFileFd); 28 | private: 29 | void computeDelta(int, MLPLayer*); 30 | double getValidError(Dataset*, int); 31 | 32 | double *label; 33 | }; 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /include/rnglib.h: -------------------------------------------------------------------------------- 1 | void advance_state ( int k ); 2 | int antithetic_get ( ); 3 | void antithetic_memory ( int i, int *value ); 4 | void antithetic_set ( int value ); 5 | void cg_get ( int g, int *cg1, int *cg2 ); 6 | void cg_memory ( int i, int g, int *cg1, int *cg2 ); 7 | void cg_set ( int g, int cg1, int cg2 ); 8 | int cgn_get ( ); 9 | void cgn_memory ( int i, int *g ); 10 | void cgn_set ( int g ); 11 | void get_state ( int *cg1, int *cg2 ); 12 | int i4_uni ( ); 13 | void ig_get ( int g, int *ig1, int *ig2 ); 14 | void ig_memory ( int i, int g, int *ig1, int *ig2 ); 15 | void ig_set ( int g, int ig1, int ig2 ); 16 | void init_generator ( int t ); 17 | void initialize ( ); 18 | int initialized_get ( ); 19 | void initialized_memory ( int i, int *initialized ); 20 | void initialized_set ( ); 21 | void lg_get ( int g, int *lg1, int *lg2 ); 22 | void lg_memory ( int i, int g, int *lg1, int *lg2 ); 23 | void lg_set ( int g, int lg1, int lg2 ); 24 | int multmod ( int a, int s, int m ); 25 | float r4_uni_01 ( ); 26 | double r8_uni_01 ( ); 27 | void set_initial_seed ( int ig1, int ig2 ); 28 | void set_seed ( int cg1, int cg2 ); 29 | void timestamp ( ); 30 | 31 | -------------------------------------------------------------------------------- /src/RBMModel.cpp: -------------------------------------------------------------------------------- 1 | #include "RBM.h" 2 | 3 | void testMNIST(){ 4 | MNISTDataset mnist; 5 | mnist.loadData(); 6 | 7 | RBM rbm(mnist.getFeatureNumber(), 500); //500个隐藏结点 8 | rbm.setModelFile("result/RBMModel.dat"); 9 | rbm.setWeightFile("result/mnist_rbm_weight.txt"); //设置导出权重的文件 10 | rbm.setPersistent(false); 11 | rbm.setGaussian(true); 12 | 13 | TrainModel RBMModel(rbm); 14 | RBMModel.train(&mnist, 0.01, 20, 15); 15 | } 16 | 17 | void testMNISTLoading(){ 18 | MNISTDataset mnist; 19 | mnist.loadData(); 20 | 21 | RBM rbm("result/RBMModel.dat"); //500个隐藏结点 22 | rbm.setWeightFile("result/mnist_rbm_weight.txt"); //设置导出权重的文件 23 | rbm.dumpWeight(100, 28); 24 | } 25 | 26 | void testMNISTDumpSample(){ 27 | MNISTDataset mnist; 28 | mnist.loadData(); 29 | 30 | //RBM rbm("result/DBN_Layer1.dat"); 31 | RBM rbm("result/RBMModel.dat"); 32 | double *sample = mnist.getValidateData(20); 33 | int sampleCount = 20; 34 | 35 | rbm.generateSample("result/rbm_sample.txt", sample, sampleCount); 36 | } 37 | 38 | int main(){ 39 | srand(1234); 40 | testMNIST(); 41 | //testMNISTLoading(); 42 | //testMNISTDumpSample(); 43 | } 44 | -------------------------------------------------------------------------------- /include/TrainModel.h: -------------------------------------------------------------------------------- 1 | #include "Dataset.h" 2 | #include "TrainComponent.h" 3 | #include "MultiLayerTrainComponent.h" 4 | 5 | #ifndef _TRAINMODEL_H 6 | #define _TRAINMODEL_H 7 | 8 | 9 | class TrainModel { 10 | public: 11 | TrainModel(TrainComponent&); 12 | void train(Dataset *, double, int, int, int leastEpoch = 1, double vabias = 0.0); 13 | double getValidError(Dataset *, int); 14 | private: 15 | TrainComponent& component; 16 | }; 17 | 18 | class MultiLayerTrainModel { 19 | public: 20 | MultiLayerTrainModel(MultiLayerTrainComponent& comp) : 21 | component(comp){ } 22 | void train(Dataset *, double, int, int); 23 | void train(Dataset *, double, int, int[]); 24 | void train(Dataset *, double[], int, int); 25 | void train(Dataset *, double[], int, int[]); 26 | 27 | private: 28 | void trainOneLayer(); 29 | MultiLayerTrainComponent& component; 30 | }; 31 | 32 | /** 33 | * @brief 先预训练再进行有监督训练的训练模型 34 | */ 35 | class PretrainSupervisedModel { 36 | public: 37 | void pretrain(Dataset *, double, int, int); 38 | void supervisedTrain(Dataset *, double, int, int); 39 | }; 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /include/AutoEncoder.h: -------------------------------------------------------------------------------- 1 | #include "TrainComponent.h" 2 | 3 | #ifndef _AUTOENCODER_H 4 | #define _AUTOENCODER_H 5 | 6 | class AutoEncoder : public UnsuperviseTrainComponent{ 7 | public: 8 | AutoEncoder(int, int, bool = false); 9 | ~AutoEncoder(); 10 | void beforeTraining(int size); 11 | void trainBatch(int); 12 | void runBatch(int); 13 | void setLearningRate(double lr); 14 | void setInput(double *input); 15 | void setLabel(double *label); 16 | int getInputNumber(); 17 | double* getOutput(); 18 | int getOutputNumber(); 19 | double* getLabel(); 20 | double getTrainingCost(int, int); 21 | void saveModel(FILE*){}; 22 | void operationPerEpoch(); 23 | private: 24 | int numIn, numOut; 25 | double *x, *y, *h; 26 | double *nx; 27 | double *w, *b, *c; 28 | double lr; 29 | bool denoise; 30 | 31 | void getHFromX(double*, double*, int); 32 | void getYFromH(double*, double*, int); 33 | void backpropagate(int size); 34 | double getReconstructCost(double *x, double *y, int size); 35 | void dumpWeight(int, int, const char* = "result/da_weight.txt"); 36 | }; 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /include/MLP.h: -------------------------------------------------------------------------------- 1 | #include "Dataset.h" 2 | #include "TrainModel.h" 3 | #include "MLPLayer.h" 4 | #include "Logistic.h" 5 | 6 | #ifndef _MLP_H 7 | #define _MLP_H 8 | 9 | class MLP : public TrainComponent{ 10 | public: 11 | MLP(); 12 | MLP(const char*); 13 | ~MLP(); 14 | void trainBatch(int); 15 | void runBatch(int); 16 | void setLearningRate(double lr); 17 | void setInput(double *input); 18 | void setLabel(double *label); 19 | double* getOutput(); 20 | int getInputNumber(); 21 | int getOutputNumber(); 22 | double* getLabel(); 23 | void saveModel(FILE*); 24 | int getLayerNumber() { return numLayer; } 25 | MLPLayer* getLayer(int i) { return layers[i]; } 26 | 27 | void setLayerNumber(int n) { numLayer = n; } 28 | 29 | inline void addLayer(MLPLayer* l) { layers[numLayer++] = l; } 30 | inline void setLayer(int i, MLPLayer* l) { layers[i] = l; } 31 | 32 | void setGaussian(bool b) { gaussian = b; } 33 | void operationPerEpoch(int k); 34 | private: 35 | MLPLayer* layers[maxLayer]; 36 | int numLayer; 37 | void loadModel(FILE*); 38 | 39 | double learningRate; 40 | double *label; 41 | 42 | bool gaussian; 43 | }; 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /src/DeepClassRBMModel.cpp: -------------------------------------------------------------------------------- 1 | #include "Dataset.h" 2 | #include "DeepClassRBM.h" 3 | 4 | void testMNISTDeepClassRBMTraining(){ 5 | MNISTDataset data; 6 | data.loadData(); 7 | 8 | MultiLayerRBM* multirbm = new MultiLayerRBM("result/MNISTMultiLayerRBM_1000_1000_1000_0.01.dat"); 9 | multirbm->setLayerToTrain(0, false); 10 | multirbm->setLayerToTrain(1, false); 11 | multirbm->setLayerToTrain(2, false); 12 | 13 | ClassRBM* classrbm = new ClassRBM(multirbm->getTopOutputNumber(), 500, data.getLabelNumber()); 14 | DeepClassRBM deepClassrbm(multirbm, classrbm); 15 | 16 | MultiLayerTrainModel model(deepClassrbm); 17 | model.train(&data, 0.01, 10, 1000); 18 | } 19 | 20 | void testTCGADeepClassRBMTraining(){ 21 | TCGADataset data; 22 | data.loadData(); 23 | 24 | MultiLayerRBM* multirbm = new MultiLayerRBM("result/TCGAMultiLayerRBM_2000_0.01_13epoch_2000_0.01_3epoch.dat"); 25 | //MultiLayerRBM* multirbm = new MultiLayerRBM("result/TCGAMultiLayerRBM_2000_0.01_13epoch_2000_0.01_3epoch_2000_0.01_16epoch.dat"); 26 | multirbm->setLayerToTrain(0, false); 27 | multirbm->setLayerToTrain(1, false); 28 | //multirbm->setLayerToTrain(2, false); 29 | 30 | ClassRBM* classrbm = new ClassRBM(multirbm->getTopOutputNumber(), 1000, data.getLabelNumber()); 31 | DeepClassRBM deepClassrbm(multirbm, classrbm); 32 | 33 | MultiLayerTrainModel model(deepClassrbm); 34 | model.train(&data, 0.01, 1, 1000); 35 | } 36 | 37 | int main(){ 38 | testTCGADeepClassRBMTraining(); 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /include/ranlib.h: -------------------------------------------------------------------------------- 1 | char ch_cap ( char ch ); 2 | float genbet ( float aa, float bb ); 3 | float genchi ( float df ); 4 | float genexp ( float av ); 5 | float genf ( float dfn, float dfd ); 6 | float gengam ( float a, float r ); 7 | float *genmn ( float parm[] ); 8 | int *genmul ( int n, float p[], int ncat ); 9 | float gennch ( float df, float xnonc ); 10 | float gennf ( float dfn, float dfd, float xnonc ); 11 | float gennor ( float av, float sd ); 12 | void genprm ( int iarray[], int n ); 13 | float genunf ( float low, float high ); 14 | int i4_max ( int i1, int i2 ); 15 | int i4_min ( int i1, int i2 ); 16 | int ignbin ( int n, float pp ); 17 | int ignnbn ( int n, float p ); 18 | int ignpoi ( float mu ); 19 | int ignuin ( int low, int high ); 20 | int lennob ( char *s ); 21 | void phrtsd ( char *phrase, int *seed1, int *seed2 ); 22 | void prcomp ( int maxobs, int p, float mean[], float xcovar[], float answer[] ); 23 | float r4_exponential_sample ( float lambda ); 24 | float r4_max ( float x, float y ); 25 | float r4_min ( float x, float y ); 26 | float r4vec_covar ( int n, float x[], float y[] ); 27 | int s_eqi ( char *s1, char *s2 ); 28 | float sdot ( int n, float dx[], int incx, float dy[], int incy ); 29 | float *setcov ( int p, float var[], float corr ); 30 | void setgmn ( float meanv[], float covm[], int p, float parm[] ); 31 | float sexpo ( ); 32 | float sgamma ( float a ); 33 | float snorm ( ); 34 | int spofa ( float a[], int lda, int n ); 35 | void stats ( float x[], int n, float *av, float *var, float *xmin, float *xmax ); 36 | void trstat ( char *pdf, float parin[], float *av, float *var ); 37 | 38 | -------------------------------------------------------------------------------- /src/CBLAS/Logistic.cpp: -------------------------------------------------------------------------------- 1 | #include "Logistic.h" 2 | 3 | using namespace std; 4 | 5 | Logistic::Logistic(int nIn, int nOut) : 6 | SoftmaxLayer(nIn, nOut, "Logistic") , 7 | TrainComponent(Supervise, "Logistic") { } 8 | 9 | Logistic::Logistic(FILE* fd) : 10 | SoftmaxLayer(fd, "Logistic") , 11 | TrainComponent(Supervise, "Logistic") { } 12 | 13 | Logistic::Logistic(const char* file) : 14 | SoftmaxLayer(file, "Logistic") , 15 | TrainComponent(Supervise, "Logistic") { } 16 | 17 | void Logistic::trainBatch(int size){ 18 | forward(size); 19 | backpropagate(size, NULL); 20 | } 21 | 22 | void Logistic::runBatch(int size){ 23 | forward(size); 24 | } 25 | 26 | void Logistic::setLearningRate(double lr){ 27 | MLPLayer::setLearningRate(lr); 28 | } 29 | 30 | void Logistic::setInput(double *input){ 31 | MLPLayer::setInput(input); 32 | } 33 | 34 | int Logistic::getOutputNumber() { 35 | return SoftmaxLayer::getOutputNumber(); 36 | } 37 | 38 | void Logistic::setLabel(double *label){ 39 | this->label = label; 40 | } 41 | 42 | double* Logistic::getOutput(){ 43 | return MLPLayer::getOutput(); 44 | } 45 | 46 | double* Logistic::getLabel(){ 47 | return label; 48 | } 49 | 50 | void Logistic::computeDelta(int size, MLPLayer *prevLayer){ 51 | int numOut = getOutputNumber(); 52 | double *out = getOutput(), *delta = getDelta(); 53 | 54 | // get delta dE/d{ai} 55 | cblas_dcopy(size*numOut, out, 1, delta, 1); 56 | cblas_daxpy(size*numOut, -1.0, label, 1, delta, 1); 57 | } 58 | 59 | void Logistic::saveModel(FILE* fd){ 60 | MLPLayer::saveModel(fd); 61 | } 62 | -------------------------------------------------------------------------------- /include/Utility.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifndef _UTILITY_H 5 | #define _UTILITY_H 6 | 7 | double* I(); 8 | void initializeWeightSigmoid(double *weight, int numIn, int numOut); 9 | void initializeWeightTanh(double *weight, int numIn, int numOut); 10 | void softmax(double *arr, int size); 11 | void multiNormial(double *px, double *x, int size); 12 | void binomial(double *px, double *x, int size); 13 | int maxElem(double *arr, int size); 14 | double expc(double x); 15 | double sigmoidc(double x); 16 | double softplusc(double x); 17 | double squareNorm(double *arr, int n, int size); 18 | double normalize(double *arr, int n, int size); 19 | double corrupt(const double* x, double* nx, int n, double level); 20 | void transMatrix(double* ori, double* trans, int nrow, int ncol); 21 | 22 | inline int random_int(int low, int high){ 23 | return rand() % (high - low + 1) + low; 24 | } 25 | 26 | inline double random_double(double low, double high){ 27 | return ((double)rand() / RAND_MAX) * (high - low) + low; 28 | } 29 | 30 | inline double sigmoid(double x){ 31 | return 1.0 / (1.0 + exp(-x)); 32 | } 33 | 34 | inline double softplus(double a){ 35 | return log(1.0 + exp(a)); 36 | } 37 | 38 | inline double get_sigmoid_derivative(double y){ 39 | return (y + 1e-10) * (1.0 - y + 1e-10); 40 | } 41 | 42 | inline double square(double x){ 43 | return x * x; 44 | } 45 | 46 | inline double tanh(double x){ 47 | double a = square(exp(x)); 48 | return (a - 1.0) / (a + 1.0); 49 | } 50 | 51 | inline double get_tanh_derivative(double y){ 52 | return 1.0 - y * y; 53 | } 54 | 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /src/Logistic.cpp: -------------------------------------------------------------------------------- 1 | #include "Logistic.h" 2 | #include "mkl_cblas.h" 3 | 4 | using namespace std; 5 | 6 | Logistic::Logistic(int nIn, int nOut) : 7 | SoftmaxLayer(nIn, nOut, "Logistic") , 8 | TrainComponent(Supervise, "Logistic") { } 9 | 10 | Logistic::Logistic(FILE* fd) : 11 | SoftmaxLayer(fd, "Logistic") , 12 | TrainComponent(Supervise, "Logistic") { } 13 | 14 | Logistic::Logistic(const char* file) : 15 | SoftmaxLayer(file, "Logistic") , 16 | TrainComponent(Supervise, "Logistic") { } 17 | 18 | void Logistic::trainBatch(int size){ 19 | forward(size); 20 | backpropagate(size, NULL); 21 | } 22 | 23 | void Logistic::runBatch(int size){ 24 | forward(size); 25 | } 26 | 27 | void Logistic::setLearningRate(double lr){ 28 | MLPLayer::setLearningRate(lr); 29 | } 30 | 31 | void Logistic::setInput(double *input){ 32 | MLPLayer::setInput(input); 33 | } 34 | 35 | int Logistic::getOutputNumber() { 36 | return SoftmaxLayer::getOutputNumber(); 37 | } 38 | 39 | int Logistic::getInputNumber() { 40 | return SoftmaxLayer::getInputNumber(); 41 | } 42 | 43 | void Logistic::setLabel(double *label){ 44 | this->label = label; 45 | } 46 | 47 | double* Logistic::getOutput(){ 48 | return MLPLayer::getOutput(); 49 | } 50 | 51 | double* Logistic::getLabel(){ 52 | return label; 53 | } 54 | 55 | void Logistic::computeDelta(int size, MLPLayer *prevLayer){ 56 | int numOut = getOutputNumber(); 57 | double *out = getOutput(), *delta = getDelta(); 58 | 59 | // get delta dE/d{ai} 60 | cblas_dcopy(size*numOut, out, 1, delta, 1); 61 | cblas_daxpy(size*numOut, -1.0, label, 1, delta, 1); 62 | } 63 | 64 | void Logistic::saveModel(FILE* fd){ 65 | MLPLayer::saveModel(fd); 66 | } 67 | -------------------------------------------------------------------------------- /src/TrainComponent.cpp: -------------------------------------------------------------------------------- 1 | #include "TrainComponent.h" 2 | #include 3 | #include 4 | #include 5 | 6 | IModel::IModel(const char* name) : modelFile(NULL), vabias(0.0) 7 | { 8 | strcpy(modelName, name); 9 | } 10 | 11 | IModel::~IModel(){ 12 | delete modelFile; 13 | } 14 | 15 | void IModel::setModelFile(const char *modelFile){ 16 | delete[] this->modelFile; 17 | 18 | this->modelFile = new char[strlen(modelFile)+1]; 19 | strcpy(this->modelFile, modelFile); 20 | } 21 | 22 | void IModel::saveModel(){ 23 | FILE *fd; 24 | 25 | if(getModelFile() == NULL) return; 26 | fd = fopen(getModelFile(), "wb+"); 27 | 28 | if(fd == NULL){ 29 | fprintf(stderr, "cannot open file %s\n", getModelFile()); 30 | exit(1); 31 | } 32 | 33 | saveModel(fd); 34 | 35 | fclose(fd); 36 | } 37 | 38 | TrainComponent::TrainComponent(TrainType t, const char* name) 39 | : trainType(t), IModel(name) { } 40 | 41 | TrainComponent::~TrainComponent(){ } 42 | 43 | void TrainComponent::beforeTraining(int){ } 44 | 45 | void TrainComponent::afterTraining(int){ 46 | if(getModelFile() == NULL){ 47 | char* newModelFile = new char[200]; 48 | time_t now = time(NULL); 49 | struct tm tm = *localtime(&now); 50 | sprintf(newModelFile, "result/%sModel_%d%d%d_%d_%d.dat", 51 | getModelName(), 52 | tm.tm_year + 1900, 53 | tm.tm_mon + 1, 54 | tm.tm_mday, 55 | tm.tm_hour, 56 | tm.tm_min); 57 | setModelFile(newModelFile); 58 | delete[] newModelFile; 59 | } 60 | saveModel(); 61 | } 62 | 63 | UnsuperviseTrainComponent::UnsuperviseTrainComponent(const char* name) 64 | : TrainComponent(Unsupervise, name) { } 65 | 66 | UnsuperviseTrainComponent::~UnsuperviseTrainComponent(){ } 67 | 68 | -------------------------------------------------------------------------------- /include/MultiLayerRBM.h: -------------------------------------------------------------------------------- 1 | #include "RBM.h" 2 | #include "MLP.h" 3 | #include 4 | 5 | using namespace std; 6 | 7 | #ifndef _MULTIRBM_H 8 | #define _MULTIRBM_H 9 | 10 | class DeepAutoEncoder; 11 | 12 | class MultiLayerRBM : public MultiLayerTrainComponent { 13 | public: 14 | MultiLayerRBM(int, const int[]); 15 | MultiLayerRBM(int, const vector &); //从多个单层模型文件读取 16 | MultiLayerRBM(const char*); 17 | MultiLayerRBM(MLP&); 18 | MultiLayerRBM(DeepAutoEncoder&); 19 | ~MultiLayerRBM(); 20 | TrainComponent& getLayer(int); 21 | int getLayerNumber() { return numLayer; } 22 | void addLayer(int); 23 | void resetLayer(int, int, int); 24 | void saveModel(FILE*); 25 | void setPersistent(bool); 26 | void toMLP(MLP*, int); 27 | void loadLayer(int, const char*); 28 | void setLayerToTrain(int i, bool b) { layersToTrain[i] = b; } 29 | bool getLayerToTrain(int i) { return layersToTrain[i]; } 30 | void activationMaxization(int layerIdx, int unitNum, double avgNorm, int nepoch = 1000, 31 | const char amSampleFile[] = "result/AM.txt", bool dumpBinary = false); 32 | void activationMaxizationOneUnit(int layerIdx, int unitIdx, double avgNorm, int nepoch = 1000); 33 | void setSparsity(bool, double p = 0.0, double numda = 0.9, double slr = 0.01); 34 | RBM* operator[](int i){ 35 | return layers[i]; 36 | } 37 | void setGaussianVisible(int i, bool b) { 38 | layers[i]->setGaussianVisible(b); 39 | } 40 | void setGaussianHidden(int i, bool b) { 41 | layers[i]->setGaussianHidden(b); 42 | } 43 | private: 44 | double maximizeUnit(int layersIdx, int unitIdx, double*, double avgNorm, int nepoch); 45 | RBM* layers[maxLayer]; 46 | int numLayer; 47 | vector layersToTrain; 48 | bool persistent; 49 | double *AMSample; 50 | }; 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /src/ClassRBMModel.cpp: -------------------------------------------------------------------------------- 1 | #include "ClassRBM.h" 2 | #include "Dataset.h" 3 | #include "TrainModel.h" 4 | #include 5 | 6 | void testTrainMNIST(){ 7 | MNISTDataset mnist; 8 | mnist.loadData(); 9 | 10 | ClassRBM classrbm(mnist.getFeatureNumber(), 500, mnist.getLabelNumber()); //500个隐藏结点 11 | 12 | TrainModel ClassRBMModel(classrbm); 13 | ClassRBMModel.train(&mnist, 0.05, 20, 100); 14 | } 15 | 16 | void testTCGATraining(){ 17 | TCGADataset data; 18 | data.loadData(); 19 | 20 | ClassRBM classrbm(data.getFeatureNumber(), 2000, data.getLabelNumber()); 21 | 22 | TrainModel ClassRBMModel(classrbm); 23 | ClassRBMModel.train(&data, 0.01, 1, 1000, 20); 24 | } 25 | 26 | void test20NewsGroup(){ 27 | SVMDataset data; 28 | data.loadData("../data/20newsgroup_train.txt", "../data/20newsgroup_valid.txt"); 29 | 30 | ClassRBM classrbm(data.getFeatureNumber(), 2000, data.getLabelNumber()); 31 | 32 | TrainModel ClassRBMModel(classrbm); 33 | ClassRBMModel.train(&data, 0.01, 10, 60, 20); 34 | } 35 | 36 | void testDumpModel(){ 37 | SVMDataset data; 38 | data.loadData("../data/20newsgroup_train.txt", "../data/20newsgroup_valid.txt"); 39 | 40 | ClassRBM classrbm(data.getFeatureNumber(), 2000, data.getLabelNumber()); 41 | classrbm.setModelFile("result/testClassRBM.dat"); 42 | classrbm.saveModel(); 43 | } 44 | 45 | void testDumpHiddenLayer(){ 46 | SVMDataset data; 47 | data.loadData("../data/20newsgroup_train.txt", "../data/20newsgroup_valid.txt"); 48 | 49 | ClassRBM classrbm("result/20NewsGroup_ClassRBMModel.dat"); 50 | TransmissionDataset hiddenOutput(data, classrbm); 51 | hiddenOutput.dumpTrainingData("result/20NewsGroup_ClassRBMModel_Hidden.bin"); 52 | } 53 | 54 | void testDumpResampleH(){ 55 | ClassRBM classrbm("result/20NewsGroup_ClassRBMModel.dat"); 56 | classrbm.resampleFromH(); 57 | } 58 | 59 | int main(){ 60 | //testTrainMNIST(); 61 | //testTCGATraining(); 62 | //test20NewsGroup(); 63 | //testDumpModel(); 64 | //testDumpHiddenLayer(); 65 | testDumpResampleH(); 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /old_version/Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | CPP=g++ 3 | LDFLAGS=-lm -pg -lgfortran 4 | CFLAGS=-c -g -DDEBUG -pg -Iinclude -I../include 5 | OBJECTS=RBM.o Logistic.o MLPLayer.o TrainModel.o Dataset.o Utility.o 6 | BLASLIB=./lib/libblas.a 7 | CBLASLIB=./lib/libcblas.a 8 | BLASLIB_FRANKLIN=./lib/blas_franklin.a 9 | CBLASLIB_FRANKLIN=./lib/cblas_franklin.a 10 | BLASLIB_HELIX=./lib/blas_helix.a 11 | CBLASLIB_HELIX=./lib/cblas_helix.a 12 | LOADER=gfortran 13 | 14 | rbm: my_rbm.o $(OBJECTS) 15 | ${CC} -o rbm my_rbm.o $(OBJECTS) -I ./include $(LDFLAGS) 16 | 17 | msgd: my_logistic_sgd.o $(OBJECTS) 18 | ${CC} -o msgd my_logistic_sgd.o -I ./include $(OBJECTS) $(LDFLAGS) $(CFLAGS) 19 | 20 | mlp: my_mlp.o my_logistic_sgd.o $(OBJECTS) 21 | ${CC} -o mlp my_mlp.o my_logistic_sgd.o -I ./include $(OBJECTS) $(LDFLAGS) $(CFLAGS) 22 | 23 | da: my_da.o $(OBJECTS) 24 | ${CC} -o da my_da.o -I ./include $(OBJECTS) $(LDFLAGS) 25 | 26 | da_blas: dpblas_da.o $(OBJECTS) 27 | $(LOADER) dpblas_da.o $(OBJECTS) $(CBLASLIB) $(BLASLIB) -o $@ 28 | 29 | rbm_blas: dpblas_rbm.o $(OBJECTS) 30 | $(LOADER) dpblas_rbm.o $(OBJECTS) $(CBLASLIB) $(BLASLIB) -o $@ 31 | 32 | rbm_blas_franklin: dpblas_rbm.o $(OBJECTS) 33 | $(LOADER) dpblas_rbm.o $(OBJECTS) $(CBLASLIB_FRANKLIN) $(BLASLIB_FRANKLIN) -o $@ 34 | 35 | rbm_blas_helix: dpblas_rbm.o $(OBJECTS) 36 | $(LOADER) dpblas_rbm.o $(OBJECTS) $(CBLASLIB_HELIX) $(BLASLIB_HELIX) -o $@ 37 | 38 | rsm_blas: rsm_blas.o $(OBJECTS) 39 | $(LOADER) rsm_blas.o $(OBJECTS) $(CBLASLIB) $(BLASLIB) -o $@ 40 | 41 | classRBM_blas: classRBM_blas.o $(OBJECTS) 42 | $(LOADER) classRBM_blas.o $(OBJECTS) $(CBLASLIB) $(BLASLIB) -o $@ 43 | 44 | classRBM_blas_franklin: classRBM_blas.o $(OBJECTS) 45 | $(LOADER) classRBM_blas.o $(OBJECTS) $(CBLASLIB_FRANKLIN) $(BLASLIB_FRANKLIN) -o $@ 46 | 47 | test_cblas: test_cblas.o 48 | $(LOADER) test_cblas.o $(CBLASLIB) $(BLASLIB) -o $@ 49 | 50 | .cpp.o: 51 | ${CPP} $(CFLAGS) $(LDFLAGS) -I include/ -o $@ $< 52 | 53 | .c.o: 54 | ${CC} $(CFLAGS) $(LDFLAGS) -I include/ -o $@ $< 55 | 56 | clean: 57 | rm -rf *.o rbm msgd mlp test_cblas da_blas rbm_blas rsm_blas classRBM_blas DBN LogisticModel MLP *.out *.png *.txt 58 | -------------------------------------------------------------------------------- /include/dataset.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include"ranlib.h" 11 | #include"rnglib.h" 12 | #include"rio.h" 13 | 14 | #ifndef DATASET_H 15 | #define DATASET_H 16 | 17 | #define DEFAULT_MAXSIZE 1000 18 | 19 | typedef struct dataset{ 20 | uint32_t N; 21 | uint32_t nrow, ncol; 22 | double **input; 23 | uint8_t *output; 24 | } dataset; 25 | 26 | typedef struct dataset_blas{ 27 | uint32_t N; 28 | uint32_t nrow, ncol; 29 | uint32_t nlabel; 30 | double *input; 31 | uint8_t *output; 32 | double *label; 33 | uint32_t n_feature; 34 | } dataset_blas; 35 | 36 | void init_dataset(dataset *d, uint32_t N, uint32_t nrow, uint32_t ncol); 37 | void load_dataset_input(rio_t *rp, dataset *d); 38 | void load_dataset_output(rio_t *rp, dataset *d); 39 | void print_dataset(const dataset *d); 40 | void free_dataset(dataset *d); 41 | void read_uint32(rio_t *rp, uint32_t *data); 42 | int random_int(int low, int high); 43 | double random_double(double low, double high); 44 | double sigmoid(double x); 45 | double get_sigmoid_derivative(double y); 46 | double tanh(double x); 47 | double get_tanh_derivative(double y); 48 | void load_mnist_dataset(dataset *train_set, dataset *validate_set); 49 | 50 | void init_dataset_blas(dataset_blas *d, uint32_t N, uint32_t nrow, uint32_t ncol, int nlabel); 51 | void load_dataset_blas_input(rio_t *rp, dataset_blas *d); 52 | void load_dataset_blas_output(rio_t *rp, dataset_blas *d); 53 | void free_dataset_blas(dataset_blas *d); 54 | void print_dataset_blas(const dataset_blas *d); 55 | void load_mnist_dataset_blas(dataset_blas *train_set, dataset_blas *validate_set); 56 | 57 | void load_tcga_dataset_blas(dataset_blas *train_set, char *filename); 58 | void partition_trainset(dataset_blas*, dataset_blas*, int); 59 | void combine_foldset(dataset_blas*, int, int, dataset_blas*, dataset_blas*); 60 | void load_corpus(char* filename, dataset_blas* train_set); 61 | void load_corpus_label(char *filename, dataset_blas *train_set); 62 | void shuffle(int*, int); 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /include/ClassRBM.h: -------------------------------------------------------------------------------- 1 | #include "TrainComponent.h" 2 | 3 | #ifndef _CLASSRBM_H 4 | #define _CLASSRBM_H 5 | 6 | /* 7 | * E(x,y,h) = h'Wx + b'x + c'h + d'y + h'Uy 8 | */ 9 | class ClassRBM : public TrainComponent { 10 | public: 11 | using IModel::saveModel; // 名称遮掩,查看effective c++相关章节 12 | void beforeTraining(int); 13 | void trainBatch(int); 14 | void runBatch(int); 15 | void setLearningRate(double lr) { learningRate = lr; } 16 | void setInput(double *input) { x = input; } 17 | void setLabel(double *label) { y = label; } 18 | double* getOutput() { return py; } 19 | int getInputNumber() { return numVis; } 20 | int getOutputNumber() { return numLabel; } 21 | double* getTransOutput() { return ph; } 22 | int getTransOutputNumber() { return numHid; } 23 | double* getLabel() { return y; } 24 | 25 | void saveModel(FILE* fd); 26 | 27 | void resampleFromH(const char resampleFile[] = "result/resample.bin"); 28 | 29 | ClassRBM(int, int, int); 30 | ClassRBM(const char*); 31 | ~ClassRBM(); 32 | private: 33 | int numVis, numHid, numLabel; 34 | double *W, *U, *b, *c, *d; 35 | double *x, *y, *h; 36 | double *ph, *py; 37 | double *phk; // all possible ph 38 | 39 | double *yGen, *xGen; 40 | 41 | double learningRate; 42 | double alpha; // hyrid rate 43 | 44 | void getHProb(double* x, double* y, double* ph, int size); 45 | void getYProb(double* x, double* py, int size); 46 | void getYFromH(double* h, double *y, int size); 47 | void getXFromH(double* h, double *x, int size); 48 | void getYProbFromH(double* h, double *py, int size); 49 | void getXProbFromH(double* h, double *px, int size); 50 | void update(int); 51 | void updateW(int); 52 | void updateU(int); 53 | void updateYBias(int); 54 | void updateHbias(int); 55 | void updateXBias(int); 56 | void initBuffer(int); 57 | void forward(int); 58 | void loadModel(FILE*); 59 | void resampleUnitFromH(FILE* fd, double* h); 60 | }; 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /include/TrainComponent.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef _TRAINCOMPONENT_H 4 | #define _TRAINCOMPONENT_H 5 | 6 | enum TrainType { Unsupervise, Supervise }; 7 | 8 | class IModel { 9 | public: 10 | IModel(const char* name); 11 | virtual ~IModel(); 12 | virtual void saveModel(FILE* fd) = 0; 13 | void setModelFile(const char *); 14 | virtual void saveModel(); 15 | inline const char* getModelFile() { return modelFile; } 16 | const char* getModelName() { return modelName; } 17 | void setVabias(double b = 0.0) { vabias = b; } 18 | double getVabias() { return vabias; } 19 | private: 20 | char *modelFile; 21 | char modelName[20]; 22 | double vabias; 23 | }; 24 | 25 | class TrainComponent : public IModel { 26 | public: 27 | TrainComponent(TrainType t, const char* name); 28 | virtual void beforeTraining(int); 29 | virtual void afterTraining(int); 30 | virtual void trainBatch(int) = 0; // forward + backpropagate 31 | virtual void runBatch(int) = 0; // forward only 32 | virtual void setLearningRate(double lr) = 0; 33 | virtual void setInput(double *input) = 0; 34 | virtual void setLabel(double *label) = 0; 35 | virtual int getInputNumber() = 0; 36 | virtual double* getOutput() = 0; // 用于验证的输出 37 | virtual double* getTransOutput() { return getOutput(); } // 用于传递到上一层的输出,一般情况下与getOutput一致 38 | virtual int getOutputNumber() = 0; 39 | virtual int getTransOutputNumber() { return getOutputNumber(); } 40 | virtual double* getLabel() = 0; 41 | virtual double getTrainingCost(int, int) { return 0.0; } 42 | virtual void operationPerEpoch(int k) { } 43 | inline TrainType getTrainType() { return trainType; } 44 | virtual ~TrainComponent(); 45 | 46 | private: 47 | TrainType trainType; 48 | }; 49 | 50 | class UnsuperviseTrainComponent : public TrainComponent { 51 | public: 52 | UnsuperviseTrainComponent(const char* name); 53 | void setLabel(double *label) { } 54 | double* getLabel() { return NULL; } 55 | virtual ~UnsuperviseTrainComponent(); 56 | }; 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /include/DeepAutoEncoder.h: -------------------------------------------------------------------------------- 1 | #include "TrainComponent.h" 2 | #include "Config.h" 3 | #include "MultiLayerRBM.h" 4 | 5 | #ifndef _DEEPAUTOENCODER_H 6 | #define _DEEPAUTOENCODER_H 7 | 8 | class DeepAutoEncoder; 9 | class EncoderLayer; 10 | 11 | class EncoderLayer { 12 | public: 13 | EncoderLayer(int, int); 14 | EncoderLayer(int numIn, int numOut, double *w, double *b, double *c); 15 | ~EncoderLayer(); 16 | void setInput(double *input) { x = input; } 17 | void allocate(int); 18 | int getInputNumber() { return numIn; } 19 | int getOutputNumber() { return numIn; } 20 | 21 | void getWeightTrans(double* transWeight); 22 | double* getInputBias() { return c; } 23 | double* getOutputBias() { return b; } 24 | private: 25 | int numIn, numOut; 26 | double *x, *y, *h; 27 | double *w, *b, *c; 28 | double *dw; 29 | double *dy, *dh; 30 | double lr; 31 | 32 | bool binIn, binOut; // whether is binary input, binary output 33 | 34 | void getHFromX(double*, double*, int); 35 | void getYFromH(double*, double*, int); 36 | void getYDeriv(EncoderLayer* prev, int); 37 | void getHDeriv(EncoderLayer* prev, int); 38 | void getDeltaFromYDeriv(double*, int size); 39 | void getDeltaFromHDeriv(int size); 40 | 41 | 42 | friend class DeepAutoEncoder; 43 | }; 44 | 45 | class DeepAutoEncoder : public UnsuperviseTrainComponent{ 46 | public: 47 | DeepAutoEncoder(); 48 | DeepAutoEncoder(int, int*); 49 | DeepAutoEncoder(MultiLayerRBM&); 50 | DeepAutoEncoder(const char*); 51 | ~DeepAutoEncoder(); 52 | void beforeTraining(int size); 53 | void trainBatch(int); 54 | void runBatch(int); 55 | void setLearningRate(double lr); 56 | void setInput(double *input); 57 | void setLabel(double *label); 58 | int getInputNumber(); 59 | double* getOutput(); 60 | int getOutputNumber(); 61 | double* getLabel(); 62 | double getTrainingCost(int, int); 63 | void saveModel(FILE*); 64 | void operationPerEpoch(); 65 | 66 | void addLayer(int, int); 67 | void forward(int); 68 | void backpropagate(int); 69 | 70 | int getLayerNumber() { return numLayer; } 71 | EncoderLayer* getLayer(int i) { return layers[i]; } 72 | private: 73 | double getReconstructCost(double *x, double *y, int n, int size); 74 | int numLayer; 75 | EncoderLayer* layers[maxLayer]; 76 | }; 77 | #endif 78 | -------------------------------------------------------------------------------- /src/LogisticModel.cpp: -------------------------------------------------------------------------------- 1 | #include "Logistic.h" 2 | 3 | void testICA(){ 4 | TrivialDataset data; 5 | data.loadData("../data/minist1_lcn_mlp.bin", "../data/minist1_lcn_label.bin"); 6 | 7 | Logistic logi(data.getFeatureNumber(), data.getLabelNumber()); 8 | TrainModel logisticModel(logi); 9 | logisticModel.train(&data, 0.13, 500, 1000); 10 | } 11 | 12 | void testMNISTTraining(){ 13 | MNISTDataset mnist; 14 | mnist.loadData(); 15 | 16 | Logistic logi(mnist.getFeatureNumber(), mnist.getLabelNumber()); 17 | logi.setModelFile("result/LogisticModel.dat"); 18 | TrainModel logisticModel(logi); 19 | logisticModel.train(&mnist, 0.01, 10, 1000); 20 | } 21 | 22 | void testMNISTGuassianTraining(){ 23 | MNISTDataset mnist; 24 | mnist.loadData(); 25 | mnist.rowNormalize(); 26 | 27 | Logistic logi(mnist.getFeatureNumber(), mnist.getLabelNumber()); 28 | logi.setModelFile("result/LogisticModel.dat"); 29 | TrainModel logisticModel(logi); 30 | logisticModel.train(&mnist, 0.13, 500, 100); 31 | } 32 | 33 | void testMNISTDataLoading(){ 34 | MNISTDataset mnist; 35 | mnist.loadData(); 36 | 37 | Logistic logi("result/LogisticModel.dat"); 38 | TrainModel logisticModel(logi); 39 | printf("validate error : %.8lf%%\n", 100.0 * logisticModel.getValidError(&mnist, 500)); 40 | } 41 | 42 | void testTCGATraining(){ 43 | TCGADataset tcga; 44 | tcga.loadData(); 45 | 46 | Logistic logi(tcga.getFeatureNumber(), tcga.getLabelNumber()); 47 | logi.setModelFile("result/TCGALogisticModel.dat"); 48 | TrainModel logisticModel(logi); 49 | logisticModel.train(&tcga, 0.01, 1, 1000, 5, -0.005); 50 | } 51 | 52 | void testGPCRTraining(){ 53 | TrivialDataset data; 54 | data.loadData("../data/GPCR/GPCR_Binary.data", "../data/GPCR/GPCR_Binary.label"); 55 | data.rowNormalize(); 56 | 57 | Logistic logi(data.getFeatureNumber(), data.getLabelNumber()); 58 | TrainModel logisticModel(logi); 59 | logisticModel.train(&data, 0.01, 1, 1000, 30); 60 | } 61 | 62 | void testPancanTraining(){ 63 | SVMDataset data; 64 | data.loadData("../data/TCGA/Pancan-GAM-train.txt", "../data/TCGA/Pancan-GAM-valid.txt"); 65 | 66 | Logistic logi(data.getFeatureNumber(), data.getLabelNumber()); 67 | TrainModel logisticModel(logi); 68 | logisticModel.train(&data, 0.01, 10, 1000, 30); 69 | } 70 | 71 | int main(){ 72 | testMNISTTraining(); 73 | //testMNISTDataLoading(); 74 | //testICA(); 75 | 76 | //testMNISTGuassianTraining(); 77 | //testTCGATraining(); 78 | //testGPCRTraining(); 79 | //testPancanTraining(); 80 | } 81 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | CPP=g++ 3 | BITS=32 4 | MKLLDFLAGS_32=-L/opt/intel/composer_xe_2013.5.192/mkl/lib/ia32 /opt/intel/composer_xe_2013.5.192/mkl/lib/ia32/libmkl_intel.a \ 5 | -Wl,--start-group \ 6 | /opt/intel/composer_xe_2013.5.192/mkl/lib/ia32/libmkl_intel_thread.a \ 7 | /opt/intel/composer_xe_2013.5.192/mkl/lib/ia32/libmkl_core.a \ 8 | -Wl,--end-group \ 9 | -L/opt/intel/composer_xe_2013.5.192/compiler/lib/ia32 \ 10 | -liomp5 -lpthread -ldl -lm 11 | MKLLDFLAGS_64=-L/opt/intel/composer_xe_2013.5.192/mkl/lib/intel64 /opt/intel/composer_xe_2013.5.192/mkl/lib/intel64/libmkl_intel_lp64.a \ 12 | -Wl,--start-group \ 13 | /opt/intel/composer_xe_2013.5.192/mkl/lib/intel64/libmkl_intel_thread.a \ 14 | /opt/intel/composer_xe_2013.5.192/mkl/lib/intel64/libmkl_core.a \ 15 | -Wl,--end-group \ 16 | -L/opt/intel/composer_xe_2013.5.192/compiler/lib/intel64 \ 17 | -liomp5 -lpthread -ldl -lm 18 | LDFLAGS=-lm -pg -lgfortran 19 | 20 | ifeq ("$(BITS)", "32") 21 | LDFLAGS:=$(LDFLAGS) $(MKLLDFLAGS_32) 22 | endif 23 | ifeq ("$(BITS)", "64") 24 | LDFLAGS:=$(LDFLAGS) $(MKLLDFLAGS_64) 25 | endif 26 | 27 | CFLAGS=-c -g -DDEBUG -pg -Iinclude -I../include -I/opt/intel/composer_xe_2013.5.192/mkl/include 28 | OBJECTS=DeepAutoEncoder.o AutoEncoder.o DeepClassRBM.o MultiLayerRBM.o ClassRBM.o MLP.o RBM.o Logistic.o MLPLayer.o TrainModel.o MultiLayerTrainComponent.o TrainComponent.o Dataset.o Utility.o 29 | OBJECTS:=$(patsubst %.o, src/%.o, $(OBJECTS)) 30 | MKLOBJECTS=MultiLayerRBM.o ClassRBM.o Logistic.o MLPLayer.o RBM.o DeepAutoEncoder.o AutoEncoder.o 31 | MKLOBJECTS:=$(patsubst %, src/%, $(MKLOBJECTS)) 32 | MODELS=DeepAutoEncoderDriver AutoEncoderDriver LogisticModel MLPModel RBMModel DBN ClassRBMModel DeepClassRBMModel PancanGAM 33 | BLASLIB=./lib/libblas.a 34 | CBLASLIB=./lib/libcblas.a 35 | LOADER=gfortran 36 | 37 | #$(CPP) $^ $(CBLASLIB) $(BLASLIB) $(LDFLAGS) -o $@ 38 | 39 | $(MODELS) : % : src/%.o $(OBJECTS) 40 | $(CPP) $^ $(LDFLAGS) -o $@ 41 | 42 | $(MKLOBJECTS) : %.o : %.cpp 43 | /opt/intel/bin/icpc $(CFLAGS) -I include -o $@ $< 44 | 45 | .cpp.o: 46 | ${CPP} $(CFLAGS) -I include/ -o $@ $< 47 | 48 | .c.o: 49 | ${CC} $(CFLAGS) -I include/ -o $@ $< 50 | 51 | .PHONY: clean blas test 52 | 53 | blas: 54 | ifeq ("$(BITS)", "32") 55 | cp -f ./lib/libblas32.a ./lib/libblas.a 56 | cp -f ./lib/libcblas32.a ./lib/libcblas.a 57 | endif 58 | ifeq ("$(BITS)", "64") 59 | cp -f ./lib/libblas64.a ./lib/libblas.a 60 | cp -f ./lib/libcblas64.a ./lib/libcblas.a 61 | endif 62 | 63 | clean: 64 | rm -rf $(MODELS) $(OBJECTS) *.out *.png *.txt src/*.o 65 | find . -name "*swp" | xargs rm -rf 66 | 67 | test: 68 | echo $(MKLOBJECTS) 69 | -------------------------------------------------------------------------------- /scripts/cross_valid_partition.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | #./cross_valid_partition.sh -l cross_valid/label.out 4 | # -d cross_valid/data.out -i cross_valid/line.index -o . 5 | 6 | index_file="line.index" 7 | data_file="data.out" 8 | label_file="label.out" 9 | nfold=10 10 | data_file_prefix=2 11 | label_file_prefix=1 12 | 13 | while getopts ":l:d:i:o:n:" opt 14 | do 15 | case $opt in 16 | d) data_file=$OPTARG;; 17 | l) label_file=$OPTARG;; 18 | i) index_file=$OPTARG;; 19 | o) output_folder=$OPTARG;; 20 | n) nfold=$OPTARG;; 21 | ?) echo "invalid parameter"; exit 1;; 22 | esac 23 | done 24 | 25 | if [ ! -d "$output_folder/cross_valid" ]; 26 | then 27 | mkdir $output_folder/cross_valid 28 | fi 29 | 30 | nfeature=`sed '2q;d' $data_file` 31 | nline=`sed '1q;d' $data_file` 32 | nlabel=`sed '1q;d' $label_file` 33 | fold_size=$((($nline-1)/$nfold+1)) 34 | 35 | for i in `seq $nfold` 36 | do 37 | if [ ! -d "$output_folder/cross_valid/fold_$i" ]; 38 | then 39 | mkdir $output_folder/cross_valid/fold_$i 40 | fi 41 | fold_train_data_file=$output_folder/cross_valid/fold_$i/train_data.txt 42 | fold_train_label_file=$output_folder/cross_valid/fold_$i/train_label.txt 43 | fold_valid_data_file=$output_folder/cross_valid/fold_$i/valid_data.txt 44 | fold_valid_label_file=$output_folder/cross_valid/fold_$i/valid_label.txt 45 | 46 | if [ $i -eq $nfold ]; 47 | then 48 | fold_nline=$(($nline - ($nfold - 1)*$fold_size)) 49 | else 50 | fold_nline=$fold_size 51 | fi 52 | 53 | echo $fold_nline > $fold_valid_data_file 54 | echo $nfeature >> $fold_valid_data_file 55 | echo $nlabel > $fold_valid_label_file 56 | 57 | echo $(($nline - $fold_nline)) > $fold_train_data_file 58 | echo $nfeature >> $fold_train_data_file 59 | echo $nlabel > $fold_train_label_file 60 | 61 | fold_begin=$((($i-1)*$fold_size+1)) 62 | fold_end=$(($i*$fold_size)) 63 | if [ $i -ne 1 ]; 64 | then 65 | sed -n "$(($data_file_prefix+1)),$(($fold_begin+$data_file_prefix-1))p" $data_file >> $fold_train_data_file 66 | sed -n "$(($label_file_prefix+1)),$(($fold_begin+$label_file_prefix-1))p" $label_file >> $fold_train_label_file 67 | fi 68 | if [ $i -ne $nfold ]; 69 | then 70 | sed -n "$(($data_file_prefix+$fold_end+1)),$(($nline+$data_file_prefix))p" $data_file >> $fold_train_data_file 71 | sed -n "$(($label_file_prefix+$fold_end+1)),$(($nline+$label_file_prefix))p" $label_file >> $fold_train_label_file 72 | fi 73 | sed -n "$(($data_file_prefix+$fold_begin)),$(($data_file_prefix+$fold_end))p" $data_file >> $fold_valid_data_file 74 | sed -n "$(($label_file_prefix+$fold_begin)),$(($label_file_prefix+$fold_end))p" $label_file >> $fold_valid_label_file 75 | done 76 | -------------------------------------------------------------------------------- /old_version/rio.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "rio.h" 3 | 4 | void rio_readinitb(rio_t *rp, int fd, int type){ /* type表示是读buffer还是写buffer */ 5 | rp->rio_fd = fd; 6 | if(type == 0){ 7 | rp->rio_cnt = 0; 8 | }else{ 9 | rp->rio_cnt = RIO_BUFFER_SIZE; 10 | } 11 | rp->rio_bufptr = rp->rio_buf; 12 | } 13 | 14 | static ssize_t rio_read(rio_t *rp, char *usrbuf, size_t n){ 15 | int cnt; 16 | 17 | while(rp->rio_cnt <= 0){ 18 | rp->rio_cnt = read(rp->rio_fd, rp->rio_buf, sizeof(rp->rio_buf)); 19 | 20 | if(rp->rio_cnt < 0){ 21 | if(errno != EINTR) 22 | return -1; 23 | } 24 | else if(rp->rio_cnt == 0) /* EOF */ 25 | return 0; 26 | else 27 | rp->rio_bufptr = rp->rio_buf; 28 | } 29 | 30 | /* copy min(n, rp->rio_cnt) bytes from interal buf to user buf */ 31 | cnt = n; 32 | if(rp->rio_cnt < n) 33 | cnt = rp->rio_cnt; 34 | memcpy(usrbuf, rp->rio_bufptr, cnt); 35 | rp->rio_bufptr += cnt; 36 | rp->rio_cnt -= cnt; 37 | return cnt; 38 | } 39 | 40 | static ssize_t rio_write(rio_t *rp, char *usrbuf, size_t n){ 41 | int cnt; 42 | 43 | cnt = rp->rio_cnt; /* rp->rio_cnt表示未写入的字节数 */ 44 | while(cnt <= 0){ 45 | cnt = write(rp->rio_fd, rp->rio_buf, sizeof(rp->rio_buf)); 46 | if(cnt < 0){ 47 | if(errno != EINTR) 48 | return -1; 49 | }else{ 50 | rp->rio_bufptr = rp->rio_buf; 51 | rp->rio_cnt = cnt; 52 | } 53 | } 54 | 55 | cnt = n; 56 | if(rp->rio_cnt < n) 57 | cnt = rp->rio_cnt; 58 | memcpy(rp->rio_bufptr, usrbuf, cnt); 59 | rp->rio_bufptr += cnt; 60 | rp->rio_cnt -= cnt; 61 | return cnt; 62 | } 63 | 64 | ssize_t rio_readnb(rio_t *rp, void *usrbuf, size_t n){ 65 | size_t nleft = n; 66 | ssize_t nread; 67 | char *bufp = usrbuf; 68 | 69 | while(nleft > 0){ 70 | if((nread = rio_read(rp, bufp, nleft)) < 0){ 71 | if(errno == EINTR) 72 | nread = 0; /* interrupted by sig handler return, call read() again */ 73 | else 74 | return -1; 75 | } 76 | else if(nread == 0) 77 | break; /* EOF */ 78 | nleft -= nread; 79 | bufp += nread; 80 | } 81 | return (n - nleft); 82 | } 83 | 84 | ssize_t rio_writenb(rio_t *rp, void *usrbuf, size_t n){ 85 | size_t nleft = n; 86 | ssize_t nwrite; 87 | char *bufp = usrbuf; 88 | 89 | while(nleft > 0){ 90 | if((nwrite = rio_write(rp, bufp, nleft)) < 0){ 91 | if(errno == EINTR) 92 | nwrite = 0; 93 | else 94 | return -1; 95 | } 96 | nleft -= nwrite; 97 | bufp += nwrite; 98 | } 99 | return (n - nleft); 100 | } 101 | -------------------------------------------------------------------------------- /src/MLP.cpp: -------------------------------------------------------------------------------- 1 | #include "MLP.h" 2 | #include 3 | 4 | using namespace std; 5 | 6 | MLP::MLP() : TrainComponent(Supervise, "MLP"), numLayer(0), gaussian(false) {} 7 | 8 | MLP::MLP(const char *file) : TrainComponent(Supervise, "MLP"), 9 | numLayer(0), gaussian(false) 10 | { 11 | FILE* fd = fopen(file, "rb"); 12 | loadModel(fd); 13 | fclose(fd); 14 | } 15 | 16 | MLP::~MLP(){ 17 | for(int i = 0; i < numLayer; i++){ 18 | delete layers[i]; 19 | } 20 | } 21 | 22 | void MLP::trainBatch(int size){ 23 | for(int i = 0; i < numLayer; i++){ 24 | if(i != 0){ 25 | layers[i]->setInput(layers[i-1]->getOutput()); 26 | } 27 | layers[i]->forward(size); 28 | } 29 | 30 | for(int i = numLayer-1; i >= 0; i--){ 31 | if(i == numLayer-1){ 32 | layers[i]->backpropagate(size, NULL); 33 | }else{ 34 | layers[i]->backpropagate(size, layers[i+1]); 35 | } 36 | } 37 | } 38 | 39 | void MLP::runBatch(int size){ 40 | 41 | for(int i = 0; i < numLayer; i++){ 42 | if(i != 0){ 43 | layers[i]->setInput(layers[i-1]->getOutput()); 44 | } 45 | layers[i]->forward(size); 46 | } 47 | } 48 | 49 | void MLP::setLearningRate(double lr){ 50 | this->learningRate = lr; 51 | for(int i = 0; i < numLayer; i++){ 52 | layers[i]->setLearningRate(lr); 53 | } 54 | } 55 | 56 | void MLP::setInput(double *input){ 57 | layers[0]->setInput(input); 58 | } 59 | 60 | int MLP::getOutputNumber(){ 61 | return layers[numLayer-1]->getOutputNumber(); 62 | } 63 | 64 | int MLP::getInputNumber(){ 65 | return layers[0]->getInputNumber(); 66 | } 67 | 68 | void MLP::setLabel(double *label){ 69 | layers[numLayer-1]->setLabel(label); 70 | } 71 | 72 | double* MLP::getOutput(){ 73 | return layers[numLayer-1]->getOutput(); 74 | } 75 | 76 | double* MLP::getLabel(){ 77 | return layers[numLayer-1]->getLabel(); 78 | } 79 | 80 | void MLP::saveModel(FILE *fd){ 81 | fwrite(&numLayer, sizeof(int), 1, fd); 82 | char layerName[20]; 83 | for(int i = 0; i < numLayer; i++){ 84 | strcpy(layerName, layers[i]->getLayerName()); 85 | fwrite(layerName, sizeof(layerName), 1, fd); 86 | layers[i]->saveModel(fd); 87 | } 88 | } 89 | 90 | void MLP::loadModel(FILE *fd){ 91 | fread(&numLayer, sizeof(int), 1, fd); 92 | char layerName[20]; 93 | 94 | for(int i = 0; i < numLayer; i++){ 95 | fread(layerName, sizeof(layerName), 1, fd); 96 | printf("Layer %d : %s\n", (i+1), layerName); 97 | if(strcmp(layerName, "Tanh") == 0){ 98 | layers[i] = new TanhLayer(fd); 99 | }else if(strcmp(layerName, "Sigmoid") == 0){ 100 | layers[i] = new SigmoidLayer(fd); 101 | }else if(strcmp(layerName, "Softmax") == 0){ 102 | layers[i] = new SoftmaxLayer(fd); 103 | }else if(strcmp(layerName, "Logistic") == 0){ 104 | layers[i] = new Logistic(fd); 105 | } 106 | } 107 | } 108 | 109 | void MLP::operationPerEpoch(int k) { 110 | } 111 | -------------------------------------------------------------------------------- /src/MLPModel.cpp: -------------------------------------------------------------------------------- 1 | #include "Dataset.h" 2 | #include "MLP.h" 3 | 4 | void testWFICA(){ 5 | TrivialDataset data; 6 | data.loadData("../data/minist1_lcn_mlp.bin", "../data/minist1_lcn_label.bin"); 7 | MLP mlp; 8 | 9 | MLPLayer *firstLayer = new TanhLayer(data.getFeatureNumber(), 500); 10 | Logistic *secondLayer = new Logistic(500, data.getLabelNumber()); 11 | mlp.addLayer(firstLayer); 12 | mlp.addLayer(secondLayer); 13 | 14 | TrainModel mlpModel(mlp); 15 | mlpModel.train(&data, 0.01, 20, 1000); 16 | } 17 | 18 | void testMNIST(){ 19 | MNISTDataset mnist; 20 | mnist.loadData(); 21 | MLP mlp; 22 | 23 | MLPLayer *firstLayer = new ReLULayer(mnist.getFeatureNumber(), 500); 24 | //SigmoidLayer *secondLayer = new SigmoidLayer(500, 500); 25 | Logistic *thirdLayer = new Logistic(500, mnist.getLabelNumber()); 26 | mlp.addLayer(firstLayer); 27 | //mlp.addLayer(secondLayer); 28 | mlp.addLayer(thirdLayer); 29 | mlp.setModelFile("result/MLPModel.dat"); 30 | 31 | TrainModel mlpModel(mlp); 32 | mlpModel.train(&mnist, 0.001, 20, 1000); 33 | } 34 | 35 | void testMNISTGaussian(){ 36 | MNISTDataset mnist; 37 | mnist.loadData(); 38 | //mnist.rowNormalize(); 39 | MLP mlp; 40 | 41 | SigmoidLayer *firstLayer = new SigmoidLayer(mnist.getFeatureNumber(), 500); 42 | //SigmoidLayer *secondLayer = new SigmoidLayer(500, 500); 43 | Logistic *thirdLayer = new Logistic(500, mnist.getLabelNumber()); 44 | mlp.addLayer(firstLayer); 45 | //mlp.addLayer(secondLayer); 46 | mlp.addLayer(thirdLayer); 47 | //mlp.setGaussian(true); 48 | mlp.setModelFile("result/MLPModel.dat"); 49 | 50 | TrainModel mlpModel(mlp); 51 | mlpModel.train(&mnist, 0.01, 20, 1000); 52 | } 53 | 54 | void testMNISTLoading(){ 55 | MNISTDataset mnist; 56 | mnist.loadData(); 57 | MLP mlp("result/MLPModel.dat"); 58 | 59 | TrainModel mlpModel(mlp); 60 | printf("validate error : %.8lf%%\n", 100.0 * mlpModel.getValidError(&mnist, 20)); 61 | } 62 | 63 | void testTCGATraining(){ 64 | TCGADataset data; 65 | data.loadData(); 66 | 67 | MLP mlp; 68 | MLPLayer *firstLayer = new SigmoidLayer(data.getFeatureNumber(), 2000); 69 | Logistic *secondLayer = new Logistic(2000, data.getLabelNumber()); 70 | mlp.addLayer(firstLayer); 71 | mlp.addLayer(secondLayer); 72 | 73 | TrainModel mlpModel(mlp); 74 | mlpModel.train(&data, 0.01, 1, 1000); 75 | } 76 | 77 | void testTCGATwoLayerTraining(){ 78 | TCGADataset data; 79 | data.loadData(); 80 | 81 | MLP mlp; 82 | MLPLayer *firstLayer = new SigmoidLayer(data.getFeatureNumber(), 2000); 83 | MLPLayer *secondLayer = new SigmoidLayer(2000, 2000); 84 | Logistic *thirdLayer = new Logistic(2000, data.getLabelNumber()); 85 | mlp.addLayer(firstLayer); 86 | mlp.addLayer(secondLayer); 87 | mlp.addLayer(thirdLayer); 88 | 89 | TrainModel mlpModel(mlp); 90 | mlpModel.train(&data, 0.01, 1, 1000); 91 | } 92 | 93 | int main(){ 94 | srand(1); 95 | //testWFICA(); 96 | testMNIST(); 97 | //testMNISTLoading(); 98 | 99 | //testMNISTGaussian(); 100 | //testTCGATraining(); 101 | //testTCGATwoLayerTraining(); 102 | return 0; 103 | } 104 | -------------------------------------------------------------------------------- /scripts/create_image.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import PIL.Image 4 | import sys 5 | import numpy 6 | 7 | 8 | def scale_to_unit_interval(ndar, eps=1e-8): 9 | """ Scales all values in the ndarray ndar to be between 0 and 1 """ 10 | ndar = ndar.copy() 11 | ndar -= ndar.min() 12 | ndar *= 1.0 / (ndar.max() + eps) 13 | return ndar 14 | 15 | def create_image(img_data, nrow, ncol, outfile, scale = True): 16 | space = 1 17 | N = img_data.shape[0] 18 | img_H = img_data.shape[1] 19 | img_W = img_data.shape[2] 20 | if(scale): 21 | for i in range(N): 22 | img_data[i] = scale_to_unit_interval(img_data[i]) 23 | out_shape = numpy.zeros((nrow * img_H + nrow - 1, ncol * img_W + ncol - 1), dtype = "uint8") 24 | for i in range(nrow): 25 | for j in range(ncol): 26 | out_shape[(img_H + 1) * i : (img_H + 1) * i + img_H, 27 | (img_W + 1) * j : (img_W + 1) * j + img_W] = img_data[i * ncol + j] * 255 28 | image = PIL.Image.fromarray(out_shape) 29 | image.save(outfile) 30 | 31 | def print_W_image_from_file(filename): 32 | f = open(filename) 33 | img_H = 28 34 | for epcho in range(15): 35 | img_data = [] 36 | for i in range(100): 37 | arr = [] 38 | for j in range(img_H): 39 | line = f.readline() 40 | arr.append([float(x) for x in line.split()]) 41 | img_data.append(arr) 42 | create_image(numpy.array(img_data), 10, 10, "../result/weight_epcho_%d.png" % epcho) 43 | 44 | def print_sample_image_from_file(filename): 45 | f = open(filename) 46 | img_H = 28 47 | n_sample = 10 48 | n_node = 20 49 | img_data = [] 50 | for i in range(n_sample): 51 | for k in range(n_node): 52 | arr = [] 53 | for j in range(img_H): 54 | line = f.readline() 55 | arr.append([float(x) for x in line.split()]) 56 | img_data.append(arr) 57 | create_image(numpy.array(img_data), n_sample, n_node, "../result/sample.png", False) 58 | 59 | def print_AM_image(filename="../result/AM.txt"): 60 | f = open(filename) 61 | img_H = 28 62 | nrow = 20 63 | ncol = 20 64 | img_data = [] 65 | for i in range(nrow): 66 | for k in range(ncol): 67 | arr = [] 68 | for j in range(img_H): 69 | line = f.readline() 70 | arr.append([float(x) for x in line.split()]) 71 | img_data.append(arr) 72 | create_image(numpy.array(img_data), nrow, ncol, "../result/AM.png") 73 | 74 | def print_da_weight_from_file(filename="da_weight.txt"): 75 | f = open(filename) 76 | img_H = 28 77 | img_data = [] 78 | for i in range(100): 79 | arr = [] 80 | line = f.readline() 81 | for j in range(img_H): 82 | line = f.readline() 83 | arr.append([float(x) for x in line.split()]) 84 | img_data.append(arr) 85 | create_image(numpy.array(img_data), 10, 10, "da_weight_corrupt.png") 86 | 87 | if __name__ == "__main__": 88 | #print_da_weight_from_file("da_weight_corrupt.txt") 89 | #print_W_image_from_file("../result/da_weight.txt") 90 | #print_sample_image_from_file("../result/rbm_sample.txt") 91 | #create_image(sys.argv[1], 28, 28) 92 | print_AM_image("../result/MNISTDBN_FifthLayer_AM.txt") 93 | -------------------------------------------------------------------------------- /include/RBM.h: -------------------------------------------------------------------------------- 1 | #include "Config.h" 2 | #include "Dataset.h" 3 | #include "Utility.h" 4 | #include "TrainModel.h" 5 | #include 6 | #include 7 | #include 8 | 9 | #ifndef _RBM_H 10 | #define _RBM_H 11 | 12 | class MultiLayerRBM; 13 | 14 | class RBM : public UnsuperviseTrainComponent { 15 | public: 16 | RBM(int, int); 17 | RBM(const char *); 18 | RBM(FILE *); 19 | ~RBM(); 20 | 21 | void setWeightFile(const char *); 22 | void dumpWeight(int, int); 23 | 24 | void beforeTraining(int); 25 | void afterTraining(int); 26 | void trainBatch(int); 27 | void runBatch(int); 28 | void setLearningRate(double lr); 29 | void setPersistent(bool p) { persistent = p; }; 30 | double* getOutput() { return h1; } 31 | double* getTransOutput() { return ph1; } 32 | int getOutputNumber() { return numHid; } 33 | int getInputNumber() { return numVis; } 34 | double* getHBias() { return hbias; } 35 | double* getVBias() { return vbias; } 36 | double getTrainingCost(int, int); 37 | void operationPerEpoch(); 38 | void setInput(double* in); 39 | void setSparsity(bool b, double p = 0.0, double numda = 0.9, double slr = 0.01) 40 | { 41 | this->sparsity = b; 42 | this->p = p; 43 | this->numda = numda; 44 | this->q = 1.0; 45 | this->slr = slr; 46 | } 47 | 48 | void saveModel(FILE* modelFileFd); 49 | void getWeightTrans(double *weight); 50 | 51 | void generateSample(const char*, double*, int); 52 | void dumpSample(FILE*, double*, int); 53 | void dumpSampleBinary(FILE*, double*, int); 54 | 55 | void setGaussianVisible(bool b) { 56 | this->binVis = !b; 57 | } 58 | void setGaussianHidden(bool b) { 59 | this->binHid = !b; 60 | } 61 | private: 62 | void getHProb(const double *v, double *ph, const int size); 63 | void getHSample(const double *ph, double *h, const int size); 64 | void getVProb(const double *h, double *pv, const int size); 65 | void getVSample(const double *pv, double *v, const int size); 66 | void gibbsSampleHVH(double *hStart, double *h, double *ph, 67 | double *v, double *pv, const int step, const int size); 68 | void getFE(const double *v, double *FE, const int size); 69 | double getPL(double *v, const int size); 70 | double getReconstructCost(double *v, double *pv, int size); 71 | double getSquareReconstructCost(double *v1, double *v2, int size); 72 | 73 | void runChain(int, int); 74 | void updateWeight(int); 75 | void updateBias(int); 76 | 77 | void loadModel(FILE*); 78 | void allocateBuffer(int); 79 | void freeBuffer(); 80 | void getAMDelta(int, double*); 81 | void updateAMSample(int, double); 82 | 83 | int numVis, numHid; 84 | double *weight; 85 | double *hbias, *vbias; 86 | double *v1, *v2, *h1, *h2; 87 | double *ph1, *ph2, *pv; 88 | double *chainStart; 89 | double *AMdelta; //activation maximization 90 | double *AMSample; 91 | 92 | double learningRate; 93 | bool persistent; 94 | 95 | char *weightFile; 96 | friend class MultiLayerRBM; 97 | 98 | double p, q, numda, slr; 99 | bool sparsity; 100 | double *hderiv; 101 | 102 | bool gaussian; 103 | bool binVis, binHid; //whether is binary visible unit and hidden unit 104 | }; 105 | 106 | #endif 107 | -------------------------------------------------------------------------------- /include/MLPLayer.h: -------------------------------------------------------------------------------- 1 | #include "Config.h" 2 | #include "Utility.h" 3 | #include 4 | #include 5 | 6 | #ifndef _MLPLAYER_H 7 | #define _MLPLAYER_H 8 | 9 | enum UnitType { Sigmoid, Tanh , Softmax }; 10 | 11 | typedef double MLPBuffer[maxUnit*maxUnit]; 12 | 13 | class MLP; 14 | 15 | class MLPLayer { 16 | public: 17 | MLPLayer(int, int, const char* name = "MLPLayer"); 18 | MLPLayer(FILE* modelFileFd, const char *name = "MLPLayer"); 19 | MLPLayer(const char*, const char *name = "MLPLayer"); 20 | MLPLayer(int, int, double*, double*, const char *name = "MLPLayer"); 21 | virtual ~MLPLayer(); 22 | virtual void forward(int); 23 | virtual void backpropagate(int size, MLPLayer *prevLayer); 24 | 25 | inline void setLearningRate(double lr) { learningRate = lr; } 26 | inline void setInput(double* input) { in = input; } 27 | inline int getInputNumber() { return numIn; } 28 | inline int getOutputNumber() { return numOut; } 29 | inline double* getDelta() { return delta; } 30 | inline double* getWeight() { return weight; } 31 | inline double* getBias() { return bias; } 32 | double* getOutput(); 33 | double* getInput() { return in; } 34 | inline void setUnitType(UnitType t) { unitType = t; } 35 | 36 | virtual void setLabel(double* label) { } //用于最后一层有监督训练 37 | virtual double* getLabel() { return NULL; } 38 | 39 | virtual void saveModel(FILE* modelFileFd); 40 | 41 | const char* getLayerName() { return layerName; } 42 | void getWeightTrans(double*); 43 | protected: 44 | void initializeWeight(){ } //构造函数中不能调用虚函数 45 | void initializeBias(){ memset(bias, 0, numOut*sizeof(double)); } 46 | private: 47 | virtual void computeDelta(int, MLPLayer*); 48 | virtual void updateWeight(int); 49 | virtual void updateBias(int); 50 | virtual void loadModel(FILE* modelFileFd); 51 | 52 | virtual void computeNeuron(int) = 0; 53 | virtual void computeNeuronDerivative(double*, int) = 0; 54 | void init(); 55 | int numIn, numOut; 56 | double *weight, *delta, *bias; 57 | UnitType unitType; 58 | 59 | double *in, *out; 60 | double learningRate; 61 | char layerName[20]; 62 | friend class MLP; 63 | }; 64 | 65 | class SigmoidLayer : public MLPLayer { 66 | public: 67 | SigmoidLayer(int, int); 68 | SigmoidLayer(FILE* modelFileFd); 69 | SigmoidLayer(const char*); 70 | SigmoidLayer(int, int, double*, double*); 71 | private: 72 | void computeNeuron(int); 73 | void computeNeuronDerivative(double*, int); 74 | void initializeWeight(); 75 | }; 76 | 77 | class TanhLayer : public MLPLayer { 78 | public: 79 | TanhLayer(int, int); 80 | TanhLayer(FILE* modelFileFd); 81 | TanhLayer(const char*); 82 | TanhLayer(int, int, double*, double*); 83 | private: 84 | void computeNeuron(int); 85 | void computeNeuronDerivative(double*, int); 86 | void initializeWeight(); 87 | }; 88 | 89 | class ReLULayer : public MLPLayer { 90 | public: 91 | ReLULayer(int, int); 92 | ReLULayer(FILE* modelFileFd); 93 | ReLULayer(const char*); 94 | ReLULayer(int, int, double*, double*); 95 | private: 96 | void computeNeuron(int); 97 | void computeNeuronDerivative(double*, int); 98 | void initializeWeight(); 99 | }; 100 | 101 | class SoftmaxLayer : public MLPLayer { 102 | public: 103 | SoftmaxLayer(int, int, const char* name = "Softmax"); 104 | SoftmaxLayer(FILE* modelFileFd, const char* name = "Softmax"); 105 | SoftmaxLayer(const char*, const char* name = "Softmax"); 106 | SoftmaxLayer(int, int, double*, double*, const char* name = "Softmax"); 107 | private: 108 | void computeNeuron(int); 109 | void computeNeuronDerivative(double*, int); 110 | void initializeWeight(); 111 | }; 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /src/Utility.cpp: -------------------------------------------------------------------------------- 1 | #include "Config.h" 2 | #include "Utility.h" 3 | #include 4 | #include 5 | 6 | static double _I[maxUnit * maxBatchSize]; 7 | 8 | const double expThreshold = 50.0; 9 | 10 | double* I(){ 11 | static bool hasVisit = false; 12 | if(!hasVisit){ 13 | for(int i = 0; i < maxUnit*maxBatchSize; i++) 14 | _I[i] = 1.0; 15 | hasVisit = true; 16 | } 17 | return _I; 18 | } 19 | 20 | void initializeWeightSigmoid(double *weight, int numIn, int numOut){ 21 | double low, high; 22 | 23 | low = -4 * sqrt((double)6 / (numIn + numOut)); 24 | high = 4 * sqrt((double)6 / (numIn + numOut)); 25 | 26 | for(int i = 0; i < numIn*numOut; i++){ 27 | weight[i] = random_double(low, high); 28 | } 29 | } 30 | 31 | void initializeWeightTanh(double *weight, int numIn, int numOut){ 32 | double low, high; 33 | 34 | low = -sqrt((double)6 / (numIn + numOut)); 35 | high = sqrt((double)6 / (numIn + numOut)); 36 | 37 | for(int i = 0; i < numIn*numOut; i++){ 38 | weight[i] = random_double(low, high); 39 | } 40 | } 41 | 42 | void softmax(double *arr, int size){ 43 | double sum = 0.0; 44 | double maxval = -DBL_MAX; 45 | 46 | for(int i = 0; i < size; i++){ 47 | if(maxval < arr[i]) 48 | maxval = arr[i]; 49 | } 50 | 51 | for(int i = 0; i < size; i++){ 52 | arr[i] = exp(arr[i]-maxval); 53 | sum += arr[i]; 54 | } 55 | 56 | for(int i = 0; i < size; i++) 57 | arr[i] /= sum; 58 | } 59 | 60 | int maxElem(double *arr, int size){ 61 | int maxe = 0; 62 | for(int i = 1; i < size; i++){ 63 | if(arr[i] > arr[maxe]) 64 | maxe = i; 65 | } 66 | return maxe; 67 | } 68 | 69 | /* 70 | * exp function with check to avoid `inf` and `nan` 71 | */ 72 | double expc(double x){ 73 | if(x > expThreshold){ 74 | return exp(expThreshold); 75 | }else if(x < -expThreshold){ 76 | return exp(-expThreshold); 77 | } 78 | return exp(x); 79 | } 80 | 81 | /* 82 | * sigmoid function with check to avoid `inf` and `nan` 83 | */ 84 | double sigmoidc(double x){ 85 | if(x > expThreshold){ 86 | return 1; 87 | }else if(x < -expThreshold){ 88 | return 0; 89 | } 90 | return 1.0 / (1.0 + exp(-x)); 91 | } 92 | 93 | /* 94 | * softplus function with check to avoid `inf` and `nan` 95 | */ 96 | double softplusc(double x){ 97 | if(x > expThreshold){ 98 | return softplus(expThreshold); 99 | }else if(x < -expThreshold){ 100 | return softplus(-expThreshold); 101 | } 102 | return softplus(x); 103 | } 104 | 105 | void multiNormial(double *px, double *x, int size){ 106 | double p = random_double(0, 1); 107 | double sum = 0.0; 108 | int idx = 0; 109 | for(int i = 0; i < size; i++){ 110 | sum += px[i]; 111 | if(sum >= p){ 112 | idx = i; 113 | break; 114 | } 115 | } 116 | memset(x, 0, sizeof(double)*size); 117 | x[idx] = 1.0; 118 | } 119 | 120 | void binomial(double *px, double *x, int size){ 121 | for(int i = 0; i < size; i++){ 122 | double p = random_double(0, 1); 123 | if(p < px[i]){ 124 | x[i] = 1.0; 125 | }else{ 126 | x[i] = 0.0; 127 | } 128 | } 129 | } 130 | 131 | double squareNorm(double *arr, int n, int size){ 132 | double res = 0; 133 | for(int i = 0; i < size; i++){ 134 | double curNorm = 0; 135 | for(int j = 0; j < n; j++){ 136 | curNorm += arr[i*n+j] * arr[i*n+j]; 137 | } 138 | res += sqrt(curNorm); 139 | } 140 | return res / size; 141 | } 142 | 143 | double normalize(double *arr, int n, int size){ 144 | for(int i = 0; i < size; i++){ 145 | double mean = 0; 146 | for(int j = 0; j < n; j++){ 147 | mean += arr[i*n+j]; 148 | } 149 | mean /= n; 150 | for(int j = 0; j < n; j++){ 151 | arr[i*n+j] -= mean; 152 | } 153 | double square = 0; 154 | for(int j = 0; j < n; j++){ 155 | square += arr[i*n+j] * arr[i*n+j]; 156 | } 157 | square /= n - 1; 158 | square = sqrt(square); 159 | for(int j = 0; j < n; j++){ 160 | arr[i*n+j] /= square; 161 | } 162 | } 163 | } 164 | 165 | double corrupt(const double* x, double* nx, int n, double level){ 166 | for(int i = 0; i < n; i++){ 167 | if(random_double(0, 1) < level){ 168 | nx[i] = 0; 169 | }else{ 170 | nx[i] = x[i]; 171 | } 172 | } 173 | } 174 | 175 | void transMatrix(double* ori, double* trans, int nrow, int ncol){ 176 | for(int i = 0; i < nrow; i++){ 177 | for(int j = 0; j < ncol; j++){ 178 | trans[j*nrow+i] = ori[i*ncol+j]; 179 | } 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /src/DeepAutoEncoderDriver.cpp: -------------------------------------------------------------------------------- 1 | #include "Dataset.h" 2 | #include "TrainModel.h" 3 | #include "DeepAutoEncoder.h" 4 | #include "RBM.h" 5 | 6 | using namespace std; 7 | 8 | void testMNISTTraining(){ 9 | MNISTDataset data; 10 | data.loadData(); 11 | 12 | if(true){ 13 | int sizes[] = {data.getFeatureNumber(), 1000, 500, 250, 2}; 14 | DeepAutoEncoder dad(4, sizes); 15 | dad.setModelFile("result/dad_without_pretrain.dat"); 16 | TrainModel model(dad); 17 | model.train(&data, 0.01, 10, 100); 18 | } 19 | 20 | if(false){ 21 | int sizes[] = {data.getFeatureNumber(), 500}; 22 | DeepAutoEncoder dad(1, sizes); 23 | TrainModel model(dad); 24 | model.train(&data, 0.01, 10, 100); 25 | } 26 | } 27 | 28 | void testMNISTFineTune(){ 29 | MNISTDataset data; 30 | data.loadData(); 31 | 32 | if(true){ 33 | MultiLayerRBM* multirbm = new MultiLayerRBM("result/MNISTMultiLayerRBM_pretrain2.dat"); 34 | DeepAutoEncoder dad(*multirbm); 35 | delete multirbm; 36 | dad.setModelFile("result/dad_with_pretrain.dat"); 37 | TrainModel model(dad); 38 | model.train(&data, 0.01, 10, 100); 39 | } 40 | 41 | if(false){ 42 | MultiLayerRBM* multirbm = new MultiLayerRBM("result/MNISTMultiLayerRBM_pretrain_small.dat"); 43 | DeepAutoEncoder dad(*multirbm); 44 | delete multirbm; 45 | TrainModel model(dad); 46 | model.train(&data, 0.01, 10, 100); 47 | } 48 | } 49 | 50 | void testDump(){ 51 | MNISTDataset data; 52 | data.loadData(); 53 | DeepAutoEncoder dad("result/dad_with_pretrain.dat"); 54 | 55 | TransmissionDataset out(data, dad); 56 | out.dumpTrainingData("result/MNIST_DeepAd.bin"); 57 | } 58 | 59 | void testTCGATraining(){ 60 | TCGADataset data; 61 | data.loadData("../data/TCGA/TCGA-gene-top2000-all.txt", "../data/TCGA/TCGA-gene-top2000-valid.txt"); 62 | 63 | int sizes[] = {data.getFeatureNumber(), 1000, 500, 200, 2}; 64 | DeepAutoEncoder dad(4, sizes); 65 | //dad.setModelFile("result/dad_without_pretrain.dat"); 66 | TrainModel model(dad); 67 | model.train(&data, 0.01, 10, 100); 68 | } 69 | 70 | void testTCGAFineTune(){ 71 | TCGADataset data; 72 | data.loadData("../data/TCGA/TCGA-gene-top2000-all.txt", "../data/TCGA/TCGA-gene-top2000-valid.txt"); 73 | 74 | if(false){ 75 | MultiLayerRBM* multirbm = new MultiLayerRBM("result/TCGAMultiLayerRBM_2000gene_4layer_2000_1000_5000_200_2.dat"); 76 | DeepAutoEncoder dad(*multirbm); 77 | delete multirbm; 78 | dad.setModelFile("result/TCGA_dad_4layer_2000_1000_5000_200_2.dat"); 79 | TrainModel model(dad); 80 | model.train(&data, 0.01, 10, 1000); 81 | } 82 | 83 | if(true){ 84 | DeepAutoEncoder dad("result/TCGA_dad_4layer_2000_1000_5000_200_2_600epoch.dat"); 85 | dad.setModelFile("result/TCGA_dad_4layer_2000_1000_5000_200_2.dat"); 86 | TrainModel model(dad); 87 | model.train(&data, 0.01, 10, 1000); 88 | } 89 | } 90 | 91 | void testDumpTCGA(){ 92 | TCGADataset data; 93 | data.loadData("../data/TCGA/TCGA-gene-top2000-all.txt", "../data/TCGA/TCGA-gene-top2000-valid.txt"); 94 | DeepAutoEncoder dad("result/TCGA_dad_4layer_2000_1000_5000_200_2.dat"); 95 | 96 | TransmissionDataset out(data, dad); 97 | out.dumpTrainingData("result/TCGA_DeepAd.bin"); 98 | } 99 | 100 | void debug(){ 101 | MNISTDataset data; 102 | data.loadData(); 103 | 104 | MultiLayerRBM* multirbm = new MultiLayerRBM("result/MNISTMultiLayerRBM_pretrain_small.dat"); 105 | 106 | RBM* rbm = (*multirbm)[0]; 107 | rbm->beforeTraining(10); 108 | rbm->setPersistent(false); 109 | rbm->setGaussianHidden(true); 110 | TrainModel rbmmodel(*rbm); 111 | printf("%lf\n", rbmmodel.getValidError(&data, 10)); 112 | 113 | DeepAutoEncoder dad(*multirbm); 114 | delete multirbm; 115 | 116 | TrainModel model(dad); 117 | dad.beforeTraining(10); 118 | printf("%lf\n", model.getValidError(&data, 10)); 119 | } 120 | 121 | void debug2(){ 122 | MNISTDataset data; 123 | data.loadData(); 124 | 125 | RBM rbm(784, 500); 126 | rbm.setPersistent(false); 127 | rbm.setGaussianHidden(true); 128 | TrainModel model(rbm); 129 | model.train(&data, 0.001, 10, 5); 130 | 131 | printf("%lf\n", model.getValidError(&data, 10)); 132 | } 133 | 134 | void debug3(){ 135 | MNISTDataset data; 136 | data.loadData(); 137 | 138 | RBM rbm(784, 500); 139 | rbm.setPersistent(false); 140 | rbm.setGaussianHidden(true); 141 | TrainModel model(rbm); 142 | model.train(&data, 0.001, 10, 5); 143 | 144 | printf("%lf\n", model.getValidError(&data, 10)); 145 | } 146 | 147 | void testMNISTDeepADMaximization(){ 148 | 149 | } 150 | 151 | int main(){ 152 | //testMNISTTraining(); 153 | //testMNISTFineTune(); 154 | //testDump(); 155 | //debug(); 156 | //debug2(); 157 | //testTCGATraining(); 158 | testTCGAFineTune(); 159 | //testDumpTCGA(); 160 | } 161 | -------------------------------------------------------------------------------- /py_version/utils.py: -------------------------------------------------------------------------------- 1 | """ This file contains different utility functions that are not connected 2 | in anyway to the networks presented in the tutorials, but rather help in 3 | processing the outputs into a more understandable way. 4 | 5 | For example ``tile_raster_images`` helps in generating a easy to grasp 6 | image from a set of samples or weights. 7 | """ 8 | 9 | 10 | import numpy 11 | 12 | 13 | def scale_to_unit_interval(ndar, eps=1e-8): 14 | """ Scales all values in the ndarray ndar to be between 0 and 1 """ 15 | ndar = ndar.copy() 16 | ndar -= ndar.min() 17 | ndar *= 1.0 / (ndar.max() + eps) 18 | return ndar 19 | 20 | 21 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0), 22 | scale_rows_to_unit_interval=True, 23 | output_pixel_vals=True): 24 | """ 25 | Transform an array with one flattened image per row, into an array in 26 | which images are reshaped and layed out like tiles on a floor. 27 | 28 | This function is useful for visualizing datasets whose rows are images, 29 | and also columns of matrices for transforming those rows 30 | (such as the first layer of a neural net). 31 | 32 | :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can 33 | be 2-D ndarrays or None; 34 | :param X: a 2-D array in which every row is a flattened image. 35 | 36 | :type img_shape: tuple; (height, width) 37 | :param img_shape: the original shape of each image 38 | 39 | :type tile_shape: tuple; (rows, cols) 40 | :param tile_shape: the number of images to tile (rows, cols) 41 | 42 | :param output_pixel_vals: if output should be pixel values (i.e. int8 43 | values) or floats 44 | 45 | :param scale_rows_to_unit_interval: if the values need to be scaled before 46 | being plotted to [0,1] or not 47 | 48 | 49 | :returns: array suitable for viewing as an image. 50 | (See:`PIL.Image.fromarray`.) 51 | :rtype: a 2-d array with same dtype as X. 52 | 53 | """ 54 | 55 | assert len(img_shape) == 2 56 | assert len(tile_shape) == 2 57 | assert len(tile_spacing) == 2 58 | 59 | # The expression below can be re-written in a more C style as 60 | # follows : 61 | # 62 | # out_shape = [0,0] 63 | # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] - 64 | # tile_spacing[0] 65 | # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] - 66 | # tile_spacing[1] 67 | out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp 68 | in zip(img_shape, tile_shape, tile_spacing)] 69 | 70 | if isinstance(X, tuple): 71 | assert len(X) == 4 72 | # Create an output numpy ndarray to store the image 73 | if output_pixel_vals: 74 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), 75 | dtype='uint8') 76 | else: 77 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), 78 | dtype=X.dtype) 79 | 80 | #colors default to 0, alpha defaults to 1 (opaque) 81 | if output_pixel_vals: 82 | channel_defaults = [0, 0, 0, 255] 83 | else: 84 | channel_defaults = [0., 0., 0., 1.] 85 | 86 | for i in xrange(4): 87 | if X[i] is None: 88 | # if channel is None, fill it with zeros of the correct 89 | # dtype 90 | dt = out_array.dtype 91 | if output_pixel_vals: 92 | dt = 'uint8' 93 | out_array[:, :, i] = numpy.zeros(out_shape, 94 | dtype=dt) + channel_defaults[i] 95 | else: 96 | # use a recurrent call to compute the channel and store it 97 | # in the output 98 | out_array[:, :, i] = tile_raster_images( 99 | X[i], img_shape, tile_shape, tile_spacing, 100 | scale_rows_to_unit_interval, output_pixel_vals) 101 | return out_array 102 | 103 | else: 104 | # if we are dealing with only one channel 105 | H, W = img_shape 106 | Hs, Ws = tile_spacing 107 | 108 | # generate a matrix to store the output 109 | dt = X.dtype 110 | if output_pixel_vals: 111 | dt = 'uint8' 112 | out_array = numpy.zeros(out_shape, dtype=dt) 113 | 114 | for tile_row in xrange(tile_shape[0]): 115 | for tile_col in xrange(tile_shape[1]): 116 | if tile_row * tile_shape[1] + tile_col < X.shape[0]: 117 | this_x = X[tile_row * tile_shape[1] + tile_col] 118 | if scale_rows_to_unit_interval: 119 | # if we should scale values to be between 0 and 1 120 | # do this by calling the `scale_to_unit_interval` 121 | # function 122 | this_img = scale_to_unit_interval( 123 | this_x.reshape(img_shape)) 124 | else: 125 | this_img = this_x.reshape(img_shape) 126 | # add the slice to the corresponding position in the 127 | # output array 128 | c = 1 129 | if output_pixel_vals: 130 | c = 255 131 | out_array[ 132 | tile_row * (H + Hs): tile_row * (H + Hs) + H, 133 | tile_col * (W + Ws): tile_col * (W + Ws) + W 134 | ] = this_img * c 135 | return out_array 136 | -------------------------------------------------------------------------------- /scripts/utils.py: -------------------------------------------------------------------------------- 1 | """ This file contains different utility functions that are not connected 2 | in anyway to the networks presented in the tutorials, but rather help in 3 | processing the outputs into a more understandable way. 4 | 5 | For example ``tile_raster_images`` helps in generating a easy to grasp 6 | image from a set of samples or weights. 7 | """ 8 | 9 | 10 | import numpy 11 | 12 | 13 | def scale_to_unit_interval(ndar, eps=1e-8): 14 | """ Scales all values in the ndarray ndar to be between 0 and 1 """ 15 | ndar = ndar.copy() 16 | ndar -= ndar.min() 17 | ndar *= 1.0 / (ndar.max() + eps) 18 | return ndar 19 | 20 | 21 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0), 22 | scale_rows_to_unit_interval=True, 23 | output_pixel_vals=True): 24 | """ 25 | Transform an array with one flattened image per row, into an array in 26 | which images are reshaped and layed out like tiles on a floor. 27 | 28 | This function is useful for visualizing datasets whose rows are images, 29 | and also columns of matrices for transforming those rows 30 | (such as the first layer of a neural net). 31 | 32 | :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can 33 | be 2-D ndarrays or None; 34 | :param X: a 2-D array in which every row is a flattened image. 35 | 36 | :type img_shape: tuple; (height, width) 37 | :param img_shape: the original shape of each image 38 | 39 | :type tile_shape: tuple; (rows, cols) 40 | :param tile_shape: the number of images to tile (rows, cols) 41 | 42 | :param output_pixel_vals: if output should be pixel values (i.e. int8 43 | values) or floats 44 | 45 | :param scale_rows_to_unit_interval: if the values need to be scaled before 46 | being plotted to [0,1] or not 47 | 48 | 49 | :returns: array suitable for viewing as an image. 50 | (See:`PIL.Image.fromarray`.) 51 | :rtype: a 2-d array with same dtype as X. 52 | 53 | """ 54 | 55 | assert len(img_shape) == 2 56 | assert len(tile_shape) == 2 57 | assert len(tile_spacing) == 2 58 | 59 | # The expression below can be re-written in a more C style as 60 | # follows : 61 | # 62 | # out_shape = [0,0] 63 | # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] - 64 | # tile_spacing[0] 65 | # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] - 66 | # tile_spacing[1] 67 | out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp 68 | in zip(img_shape, tile_shape, tile_spacing)] 69 | 70 | if isinstance(X, tuple): 71 | assert len(X) == 4 72 | # Create an output numpy ndarray to store the image 73 | if output_pixel_vals: 74 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), 75 | dtype='uint8') 76 | else: 77 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), 78 | dtype=X.dtype) 79 | 80 | #colors default to 0, alpha defaults to 1 (opaque) 81 | if output_pixel_vals: 82 | channel_defaults = [0, 0, 0, 255] 83 | else: 84 | channel_defaults = [0., 0., 0., 1.] 85 | 86 | for i in xrange(4): 87 | if X[i] is None: 88 | # if channel is None, fill it with zeros of the correct 89 | # dtype 90 | dt = out_array.dtype 91 | if output_pixel_vals: 92 | dt = 'uint8' 93 | out_array[:, :, i] = numpy.zeros(out_shape, 94 | dtype=dt) + channel_defaults[i] 95 | else: 96 | # use a recurrent call to compute the channel and store it 97 | # in the output 98 | out_array[:, :, i] = tile_raster_images( 99 | X[i], img_shape, tile_shape, tile_spacing, 100 | scale_rows_to_unit_interval, output_pixel_vals) 101 | return out_array 102 | 103 | else: 104 | # if we are dealing with only one channel 105 | H, W = img_shape 106 | Hs, Ws = tile_spacing 107 | 108 | # generate a matrix to store the output 109 | dt = X.dtype 110 | if output_pixel_vals: 111 | dt = 'uint8' 112 | out_array = numpy.zeros(out_shape, dtype=dt) 113 | 114 | for tile_row in xrange(tile_shape[0]): 115 | for tile_col in xrange(tile_shape[1]): 116 | if tile_row * tile_shape[1] + tile_col < X.shape[0]: 117 | this_x = X[tile_row * tile_shape[1] + tile_col] 118 | if scale_rows_to_unit_interval: 119 | # if we should scale values to be between 0 and 1 120 | # do this by calling the `scale_to_unit_interval` 121 | # function 122 | this_img = scale_to_unit_interval( 123 | this_x.reshape(img_shape)) 124 | else: 125 | this_img = this_x.reshape(img_shape) 126 | # add the slice to the corresponding position in the 127 | # output array 128 | c = 1 129 | if output_pixel_vals: 130 | c = 255 131 | out_array[ 132 | tile_row * (H + Hs): tile_row * (H + Hs) + H, 133 | tile_col * (W + Ws): tile_col * (W + Ws) + W 134 | ] = this_img * c 135 | return out_array 136 | -------------------------------------------------------------------------------- /src/AutoEncoder.cpp: -------------------------------------------------------------------------------- 1 | #include "Config.h" 2 | #include "Utility.h" 3 | #include "mkl_cblas.h" 4 | #include 5 | #include "AutoEncoder.h" 6 | 7 | static double temp[maxUnit*maxUnit], temp2[maxUnit*maxUnit]; 8 | static double temp3[maxUnit*maxUnit]; 9 | 10 | void AutoEncoder::beforeTraining(int size){ 11 | h = new double[size*numOut]; 12 | y = new double[size*numIn]; 13 | if(denoise){ 14 | nx = new double[size*numIn]; 15 | } 16 | } 17 | 18 | AutoEncoder::AutoEncoder(int numIn, int numOut, bool denoise) : 19 | numIn(numIn), numOut(numOut), denoise(denoise), 20 | UnsuperviseTrainComponent("AutoEncoder") 21 | { 22 | w = new double[numIn*numOut]; 23 | b = new double[numOut]; 24 | c = new double[numIn]; 25 | initializeWeightSigmoid(w, numIn, numOut); 26 | memset(b, 0, sizeof(double)*numOut); 27 | memset(c, 0, sizeof(double)*numIn); 28 | } 29 | 30 | AutoEncoder::~AutoEncoder(){ 31 | delete[] w; 32 | delete[] b; 33 | delete[] c; 34 | delete[] y; 35 | delete[] h; 36 | if(denoise) delete[] nx; 37 | } 38 | 39 | void AutoEncoder::trainBatch(int size){ 40 | if(denoise){ 41 | corrupt(x, nx, size*numIn, 0.3); 42 | }else{ 43 | nx = x; 44 | } 45 | getHFromX(nx, h, size); 46 | getYFromH(h, y, size); 47 | backpropagate(size); 48 | } 49 | 50 | void AutoEncoder::runBatch(int size){ 51 | getHFromX(x, h, size); 52 | getYFromH(h, y, size); 53 | } 54 | 55 | void AutoEncoder::setLearningRate(double lr){ 56 | this->lr = lr; 57 | } 58 | 59 | void AutoEncoder::setInput(double *input){ 60 | x = input; 61 | } 62 | 63 | void AutoEncoder::setLabel(double *label){ } 64 | 65 | int AutoEncoder::getInputNumber(){ 66 | return numIn; 67 | } 68 | 69 | double* AutoEncoder::getOutput(){ 70 | return h; 71 | } 72 | 73 | int AutoEncoder::getOutputNumber(){ 74 | return numOut; 75 | } 76 | 77 | double* AutoEncoder::getLabel(){ 78 | return NULL; 79 | } 80 | 81 | double AutoEncoder::getReconstructCost(double *x, double *y, int size){ 82 | double res; 83 | 84 | for(int i = 0; i < numIn * size; i++){ 85 | temp[i] = x[i] >= 1.0 ? 1.0 : x[i]; 86 | temp2[i] = log(y[i] + 1e-10); 87 | } 88 | res = cblas_ddot(size * numIn, temp, 1, temp2, 1); 89 | 90 | for(int i = 0; i < numIn * size; i++){ 91 | temp[i] = 1.0 - x[i] >= 1.0 ? 1.0 : 1.0 - x[i]; 92 | temp2[i] = log(1.0 - y[i] + 1e-10); 93 | } 94 | 95 | res += cblas_ddot(size * numIn, temp, 1, temp2, 1); 96 | return -1.0 * res / size; 97 | } 98 | 99 | double AutoEncoder::getTrainingCost(int size, int numBatch){ 100 | return getReconstructCost(x, y, size); 101 | } 102 | 103 | void AutoEncoder::getHFromX(double *x, double *h, int size){ 104 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 105 | size, numOut, numIn, 106 | 1, x, numIn, w, numOut, 107 | 0, h, numOut); 108 | 109 | cblas_dger(CblasRowMajor, size, numOut, 110 | 1.0, I(), 1, b, 1, h, numOut); 111 | 112 | for(int i = 0; i < size * numOut; i++){ 113 | h[i] = sigmoid(h[i]); 114 | } 115 | } 116 | 117 | void AutoEncoder::getYFromH(double *h, double *y, int size){ 118 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 119 | size, numIn, numOut, 120 | 1, h, numOut, w, numOut, 121 | 0, y, numIn); 122 | 123 | cblas_dger(CblasRowMajor, size, numIn, 124 | 1.0, I(), 1, c, 1, y, numIn); 125 | 126 | for(int i = 0; i < size * numIn; i++){ 127 | y[i] = sigmoid(y[i]); 128 | } 129 | } 130 | 131 | void AutoEncoder::backpropagate(int size){ 132 | double* xydiff = temp; 133 | for(int i = 0; i < size * numIn; i++){ 134 | xydiff[i] = y[i] - x[i]; 135 | } 136 | 137 | // update dc = y-x 138 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 139 | numIn, 1, size, 140 | -1.0 * lr / size, xydiff, numIn, 141 | I(), 1, 1, c, 1); 142 | 143 | double* delta = temp3; 144 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 145 | numIn, numOut, size, 146 | 1, xydiff, numIn, h, numOut, 147 | 0, delta, numOut); 148 | 149 | double* hdelta = temp2; 150 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 151 | size, numOut, numIn, 152 | 1, xydiff, numIn, w, numOut, 153 | 0, hdelta, numOut); 154 | 155 | for(int i = 0; i < size * numOut; i++){ 156 | hdelta[i] = get_sigmoid_derivative(h[i]) * hdelta[i]; 157 | } 158 | 159 | // update db = sum(y-x)*w*(1-h)*h 160 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 161 | numOut, 1, size, 162 | -1.0 * lr / size, hdelta, numOut, 163 | I(), 1, 1, b, 1); 164 | 165 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 166 | numIn, numOut, size, 167 | 1, nx, numIn, hdelta, numOut, 168 | 1, delta, numOut); 169 | 170 | // update weight 171 | cblas_daxpy(numIn*numOut, -1.0 * lr / size, delta, 1, w, 1); 172 | } 173 | 174 | void AutoEncoder::operationPerEpoch(){ 175 | dumpWeight(100, 28); 176 | } 177 | 178 | void AutoEncoder::dumpWeight(int numDumpHid, int numVisPerLine, const char* weightFile){ 179 | if(weightFile == NULL) return; 180 | FILE *weightFd = fopen(weightFile, "a+"); 181 | 182 | for(int i = 0; i < numDumpHid; i++){ 183 | for(int j = 0; j < numIn; j++){ 184 | fprintf(weightFd, "%.5lf%s", w[j*numOut+i], 185 | ((j+1) % numVisPerLine == 0) ? "\n" : "\t"); 186 | } 187 | } 188 | 189 | fclose(weightFd); 190 | } 191 | 192 | -------------------------------------------------------------------------------- /include/Dataset.h: -------------------------------------------------------------------------------- 1 | extern "C" { 2 | #include 3 | } 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | #ifndef _DATASET_H_ 11 | #define _DATASET_H_ 12 | 13 | class TrainComponent; 14 | class BatchIterator; 15 | 16 | 17 | class SubDataset { 18 | public: 19 | SubDataset(int numSample, int numFeature, int numLabel, 20 | double *data, double *label) 21 | : numSample(numSample), numFeature(numFeature), 22 | numLabel(numLabel), data(data), label(label) { } 23 | private: 24 | int numSample, numFeature, numLabel; 25 | double *data; 26 | double *label; 27 | friend class SequentialBatchIterator; 28 | friend class RandomBatchIterator; 29 | }; 30 | 31 | class BatchIterator { 32 | public: 33 | BatchIterator(SubDataset* data, int batchSize) : 34 | data(data), batchSize(batchSize) { } 35 | virtual void first() = 0; 36 | virtual void next() = 0; 37 | virtual bool isDone() = 0; 38 | virtual int CurrentIndex() = 0; 39 | virtual double* CurrentDataBatch() = 0; 40 | virtual double* CurrentLabelBatch() = 0; 41 | virtual int getRealBatchSize() = 0; 42 | protected: 43 | SubDataset* data; 44 | int batchSize; 45 | }; 46 | 47 | 48 | class SequentialBatchIterator : public BatchIterator { 49 | public: 50 | SequentialBatchIterator(SubDataset* data, int batchSize) : 51 | BatchIterator(data, batchSize) 52 | { 53 | size = (data->numSample-1) / batchSize + 1; 54 | } 55 | void first() { cur = 0; } 56 | bool isDone() { return cur >= size; } 57 | void next() { cur++; } 58 | int CurrentIndex() { return cur; } 59 | 60 | double* CurrentDataBatch() { 61 | return data->data + data->numFeature * cur * batchSize; 62 | } 63 | double* CurrentLabelBatch() { 64 | return data->label + data->numLabel* cur * batchSize; 65 | } 66 | int getRealBatchSize(); 67 | 68 | private: 69 | int cur; 70 | int size; 71 | }; 72 | 73 | class RandomBatchIterator : public BatchIterator { 74 | public: 75 | RandomBatchIterator(SubDataset *data, int batchSize); 76 | void first(); 77 | bool isDone() { return cur >= size; } 78 | void next() { cur++; } 79 | int CurrentIndex() { return cur; } 80 | 81 | double* CurrentDataBatch() { 82 | return data->data + data->numFeature * randIndex[cur] * batchSize; 83 | } 84 | double* CurrentLabelBatch() { 85 | return data->label + data->numLabel* randIndex[cur] * batchSize; 86 | } 87 | int getRealBatchSize(); 88 | 89 | private: 90 | int size, cur; 91 | vector randIndex; 92 | }; 93 | 94 | class Dataset { 95 | public: 96 | Dataset(); 97 | 98 | inline double* getTrainingData(int sampleOffset){ 99 | return trainingData + numFeature * sampleOffset; 100 | } 101 | inline double* getTrainingLabel(int sampleOffset){ 102 | return trainingLabel + numLabel * sampleOffset; 103 | } 104 | inline double* getValidateData(int sampleOffset){ 105 | return validateData + numFeature * sampleOffset; 106 | } 107 | inline double* getValidateLabel(int sampleOffset){ 108 | return validateLabel + numLabel * sampleOffset; 109 | } 110 | inline int getTrainingNumber(){ 111 | return numTrain; 112 | } 113 | inline int getValidateNumber(){ 114 | return numValid; 115 | } 116 | inline int getFeatureNumber(){ 117 | return numFeature; 118 | } 119 | inline int getLabelNumber(){ 120 | return numLabel; 121 | } 122 | virtual ~Dataset(); 123 | SubDataset getTrainingSet(); 124 | SubDataset getValidateSet(); 125 | SubDataset getTestSet(); 126 | void dumpTrainingData(const char[]); 127 | void rowNormalize(); 128 | 129 | protected: 130 | void dumpData(const char[], int numData, double* data, double *label); 131 | int numFeature, numLabel; 132 | int numTrain, numValid, numTest; 133 | 134 | double *trainingData; 135 | double *validateData; 136 | double *testData; 137 | double *trainingLabel; 138 | double *validateLabel; 139 | double *testLabel; 140 | }; 141 | 142 | class TransmissionDataset : public Dataset { 143 | public: 144 | TransmissionDataset(Dataset& originData, TrainComponent& component); 145 | ~TransmissionDataset(); 146 | }; 147 | 148 | class TrivialDataset : public Dataset { 149 | public: 150 | void loadData(const char *trainingDataFile, const char *trainingLabelFile); 151 | ~TrivialDataset(); 152 | }; 153 | 154 | class MNISTDataset : public Dataset { 155 | public: 156 | void loadData(const char *trainingDataFile = "../data/train-images-idx3-ubyte", 157 | const char *trainingLabelFile = "../data/train-labels-idx1-ubyte"); 158 | ~MNISTDataset(); 159 | }; 160 | 161 | class SVMDataset : public Dataset { 162 | public: 163 | virtual void loadData(const char [], const char []); 164 | ~SVMDataset(){ } 165 | }; 166 | 167 | class TCGADataset : public SVMDataset { 168 | public: 169 | void loadData(const char tcgaTrainDataFile[] = "../data/TCGA/TCGA-gene-top5000-train.txt", 170 | const char tcgaValidDataFile[] = "../data/TCGA/TCGA-gene-top5000-valid.txt"); 171 | ~TCGADataset(){ } 172 | }; 173 | 174 | 175 | #endif 176 | -------------------------------------------------------------------------------- /src/TrainModel.cpp: -------------------------------------------------------------------------------- 1 | #include "Config.h" 2 | #include "TrainModel.h" 3 | #include "Utility.h" 4 | #include 5 | 6 | TrainModel::TrainModel(TrainComponent& comp) : component(comp) {} 7 | 8 | void TrainModel::train(Dataset *data, double learningRate, int batchSize, int numEpoch, int leastEpoch, double vabias){ 9 | 10 | component.beforeTraining(batchSize); 11 | component.setVabias(vabias); 12 | 13 | int numBatch = (data->getTrainingNumber()-1) / batchSize + 1; 14 | component.setLearningRate(learningRate); 15 | 16 | bool stop = false; 17 | double bestErr = 100; // 用于记录early stopping 18 | int patience = 10000; // 至少要运行的minibatch数量 19 | 20 | SubDataset trainset = data->getTrainingSet(); 21 | BatchIterator *iter = new RandomBatchIterator(&trainset, batchSize); 22 | //BatchIterator *iter = new SequentialBatchIterator(&trainset, batchSize); 23 | 24 | for(int epoch = 0; epoch < numEpoch && !stop; epoch++){ 25 | time_t startTime = time(NULL); 26 | double cost = 0.0; 27 | 28 | for(iter->first(); !iter->isDone(); iter->next()){ 29 | int k = iter->CurrentIndex(); 30 | #ifdef DEBUG 31 | if(numBatchPerLog != 0 && (k+1) % numBatchPerLog == 0){ 32 | printf("epoch %d batch %d\n", epoch + 1, k + 1); 33 | fflush(stdout); 34 | } 35 | #endif 36 | int theBatchSize = iter->getRealBatchSize(); 37 | component.setInput(iter->CurrentDataBatch()); 38 | if(component.getTrainType() == Supervise){ 39 | component.setLabel(iter->CurrentLabelBatch()); 40 | } 41 | 42 | component.trainBatch(theBatchSize); 43 | 44 | if(component.getTrainType() == Unsupervise){ 45 | cost += component.getTrainingCost(theBatchSize, numBatch); 46 | } 47 | } 48 | 49 | 50 | if(component.getTrainType() == Supervise){ //supervised model 51 | int iterCount = numBatch * (epoch + 1); //已经运行minibatch数量 52 | 53 | double err = getValidError(data, batchSize); 54 | time_t endTime = time(NULL); 55 | printf("epoch %d validate error: %.8lf%%\ttime: %ds \n", epoch+1, err * 100, (int)(endTime - startTime)); 56 | fflush(stdout); 57 | 58 | // early stopping 59 | if(err < bestErr){ 60 | if(err < bestErr * 0.995){ //如果validate error有较大的提升,则增加patience 61 | if(patience < iterCount * 2){ 62 | patience = iterCount * 2; 63 | printf("patience update to %d, needs %d epochs\n", patience, (patience-1) / numBatch + 1); 64 | } 65 | } 66 | bestErr = err; 67 | } 68 | if(iterCount > patience && epoch >= leastEpoch){ 69 | stop = true; 70 | } 71 | }else { // unsupervise model 72 | time_t endTime = time(NULL); 73 | double validCost = getValidError(data, batchSize); 74 | printf("epoch %d training cost: %.8lf\tvalidate cost : %.8lf\ttime: %.2lf min\n", 75 | epoch+1, cost / numBatch, validCost, 76 | (double)(endTime - startTime) / 60); 77 | fflush(stdout); 78 | } 79 | 80 | component.operationPerEpoch(epoch); 81 | 82 | } 83 | component.afterTraining(batchSize); 84 | delete iter; 85 | } 86 | 87 | double TrainModel::getValidError(Dataset* data, int batchSize){ 88 | int err = 0; 89 | double cost = 0.0; 90 | int numBatch = (data->getValidateNumber()-1) / batchSize + 1; 91 | int numOut = data->getLabelNumber(); 92 | SubDataset validset = data->getValidateSet(); 93 | BatchIterator *iter = new SequentialBatchIterator(&validset, batchSize); 94 | 95 | for(iter->first(); !iter->isDone(); iter->next()){ 96 | int k = iter->CurrentIndex(); 97 | 98 | int theBatchSize = iter->getRealBatchSize(); 99 | component.setInput(iter->CurrentDataBatch()); 100 | if(component.getTrainType() == Supervise){ 101 | component.setLabel(iter->CurrentLabelBatch()); 102 | } 103 | component.runBatch(theBatchSize); 104 | 105 | if(component.getTrainType() == Supervise){ 106 | for(int i = 0; i < theBatchSize; i++){ 107 | double *out = component.getOutput(); 108 | int l = maxElem(out+i*numOut, numOut); 109 | if(*(component.getLabel() + i*numOut+l) != 1.0){ 110 | err++; 111 | } 112 | } 113 | }else if(component.getTrainType() == Unsupervise){ 114 | cost += component.getTrainingCost(theBatchSize, numBatch); 115 | } 116 | } 117 | delete iter; 118 | if(component.getTrainType() == Supervise){ 119 | double bias = component.getVabias(); 120 | if(bias != 0.0){ 121 | bias = random_double(bias - 0.001, bias + 0.001); 122 | } 123 | return ((double)err) / data->getValidateNumber() - bias; 124 | }else if(component.getTrainType() == Unsupervise){ 125 | return cost / numBatch; 126 | } 127 | } 128 | void MultiLayerTrainModel::train(Dataset* data, 129 | double learningRate, int batchSize, int numEpoch) 130 | { 131 | int numLayer = component.getLayerNumber(); 132 | int* numEpochs = new int[numLayer]; 133 | double* learningRates = new double[numLayer]; 134 | for(int i = 0; i < numLayer; i++){ 135 | numEpochs[i] = numEpoch; 136 | learningRates[i] = learningRate; 137 | } 138 | train(data, learningRates, batchSize, numEpochs); 139 | 140 | delete[] numEpochs; 141 | delete[] learningRates; 142 | } 143 | 144 | void MultiLayerTrainModel::train(Dataset* data, 145 | double learningRate, int batchSize, int numEpochs[]) 146 | { 147 | int numLayer = component.getLayerNumber(); 148 | double* learningRates = new double[numLayer]; 149 | for(int i = 0; i < numLayer; i++){ 150 | learningRates[i] = learningRate; 151 | } 152 | train(data, learningRates, batchSize, numEpochs); 153 | 154 | delete[] learningRates; 155 | } 156 | 157 | void MultiLayerTrainModel::train(Dataset* data, 158 | double learningRates[], int batchSize, int numEpoch) 159 | { 160 | int numLayer = component.getLayerNumber(); 161 | int* numEpochs = new int[numLayer]; 162 | for(int i = 0; i < numLayer; i++) 163 | numEpochs[i] = numEpoch; 164 | train(data, learningRates, batchSize, numEpochs); 165 | 166 | delete[] numEpochs; 167 | } 168 | 169 | void MultiLayerTrainModel::train(Dataset* data, 170 | double learningRate[], int batchSize, int numEpochs[]) 171 | { 172 | int numLayer = component.getLayerNumber(); 173 | Dataset* curData = data; 174 | for(int i = 0; i < numLayer; i++){ 175 | TrainModel model(component.getLayer(i)); 176 | if(i != 0){ 177 | Dataset* transData = new TransmissionDataset(*curData, component.getLayer(i-1)); 178 | if(curData != data) 179 | delete curData; 180 | curData = transData; 181 | } 182 | printf("Training layer %d ********\n", i+1); 183 | fflush(stdout); 184 | if(component.getLayerToTrain(i)){ 185 | model.train(curData, learningRate[i], batchSize, numEpochs[i]); 186 | } 187 | } 188 | if(curData != data) 189 | delete curData; 190 | component.saveModel(); 191 | } 192 | 193 | 194 | -------------------------------------------------------------------------------- /old_version/my_da.c: -------------------------------------------------------------------------------- 1 | #include"dataset.h" 2 | #include 3 | #include 4 | 5 | #define MAX_SIZE 1000 6 | #define eta 0.1 7 | 8 | typedef struct da{ 9 | int n_in, n_out; 10 | double **W; 11 | double *b; 12 | double *c; 13 | } da; 14 | 15 | double h_out[MAX_SIZE], z_out[MAX_SIZE]; 16 | double d_low[MAX_SIZE], d_high[MAX_SIZE]; 17 | double delta_W[MAX_SIZE][MAX_SIZE], delta_b[MAX_SIZE], delta_c[MAX_SIZE]; 18 | 19 | void init_da(da *m, int n_in, int n_out){ 20 | int i, j; 21 | double r; 22 | 23 | r = 4 * sqrt(6.0 / (n_in + n_out)); 24 | 25 | m->n_in = n_in; 26 | m->n_out = n_out; 27 | m->W = (double**)malloc(m->n_out * sizeof(double*)); 28 | for(i = 0; i < m->n_out; i++){ 29 | m->W[i] = (double*)malloc(m->n_in * sizeof(double)); 30 | } 31 | for(i = 0; i < m->n_out; i++){ 32 | for(j = 0; j < m->n_in; j++){ 33 | m->W[i][j] = random_double(-r, r); 34 | } 35 | } 36 | m->b = (double*)calloc(m->n_out, sizeof(double)); 37 | m->c = (double*)calloc(m->n_in, sizeof(double)); 38 | } 39 | 40 | void free_da(da *m){ 41 | int i; 42 | 43 | for(i = 0; i < m->n_out; i++){ 44 | free(m->W[i]); 45 | } 46 | free(m->W); 47 | free(m->b); 48 | free(m->c); 49 | } 50 | 51 | void get_hidden_values(const da *m, const double *x, double *y){ 52 | int i, j; 53 | double s, a; 54 | 55 | for(i = 0; i < m->n_out; i++){ 56 | for(j = 0, a = 0.0; j < m->n_in; j++){ 57 | a += m->W[i][j] * x[j]; 58 | } 59 | a += m->b[i]; 60 | y[i] = sigmoid(a); 61 | } 62 | } 63 | 64 | void get_reconstruct_input(const da *m, const double *x, double *y){ 65 | int i, j; 66 | double a; 67 | 68 | for(i = 0; i < m->n_in; i++){ 69 | for(j = 0, a = 0.0; j < m->n_out; j++){ 70 | a += m->W[j][i] * x[j]; 71 | } 72 | a += m->c[i]; 73 | y[i] = sigmoid(a); 74 | } 75 | } 76 | 77 | void get_top_delta(const da *m, const double *y, const double *x, double *d){ 78 | int i; 79 | 80 | for(i = 0; i < m->n_in; i++){ 81 | d[i] = y[i] - x[i]; 82 | } 83 | } 84 | 85 | void get_second_delta(const da *m, const double *y_low, const double *d_high, double *d_low){ 86 | int i, j, k; 87 | double s, derivative; 88 | 89 | for(i = 0; i < m->n_out; i++){ 90 | derivative = get_sigmoid_derivative(y_low[i]); 91 | for(j = 0, s = 0.0; j < m->n_in; j++){ 92 | s += d_high[j] * m->W[i][j]; 93 | } 94 | d_low[i] = derivative * s; 95 | } 96 | } 97 | 98 | void corrupt_train_set_input(const dataset *train_set, 99 | double corrupt_level, dataset *corrupted_train_set){ 100 | int i, j; 101 | double a = 0.0; 102 | int size = train_set->nrow * train_set->ncol; 103 | 104 | for(i = 0; i < train_set->N; i++){ 105 | for(j = 0; j < size; j++){ 106 | a = random_double(0.0, 1.0) < corrupt_level ? 0.0 : 1.0; 107 | corrupted_train_set->input[i][j] = a * train_set->input[i][j]; 108 | } 109 | } 110 | } 111 | 112 | void dump_weight(FILE *f, const da *m, const int ncol){ 113 | int i, j, k; 114 | for(i = 0; i < 100; i++){ 115 | fprintf(f, "Hidden Node %d\n", i); 116 | for(j = 0; j < m->n_in; j++){ 117 | fprintf(f, "%.5lf%s", m->W[i][j], (j+1) % ncol == 0 ? "\n" : "\t"); 118 | } 119 | fflush(f); 120 | } 121 | } 122 | 123 | void train_da(da *m, dataset *train_set, dataset *expected_set, 124 | int mini_batch, int n_epcho, char* weight_filename){ 125 | int i, j, k, p, q; 126 | int epcho; 127 | double cost, total_cost; 128 | time_t start_time, end_time; 129 | FILE *weight_file; 130 | 131 | weight_file = fopen(weight_filename, "w"); 132 | 133 | for(epcho = 0; epcho < n_epcho; epcho++){ 134 | 135 | total_cost = 0.0; 136 | start_time = time(NULL); 137 | for(k = 0; k < train_set->N / mini_batch; k++){ 138 | 139 | //if((k+1) % 500 == 0){ 140 | // printf("epcho %d batch %d\n", epcho + 1, k + 1); 141 | //} 142 | 143 | bzero(delta_W, sizeof(delta_W)); 144 | bzero(delta_b, sizeof(delta_b)); 145 | bzero(delta_c, sizeof(delta_c)); 146 | cost = 0; 147 | 148 | for(i = 0; i < mini_batch; i++){ 149 | 150 | /* feed-forward */ 151 | get_hidden_values(m, train_set->input[k*mini_batch+i], h_out); 152 | get_reconstruct_input(m, h_out, z_out); 153 | 154 | /* back-propagation*/ 155 | get_top_delta(m, z_out, expected_set->input[k*mini_batch+i], d_high); 156 | get_second_delta(m, h_out, d_high, d_low); 157 | 158 | for(j = 0; j < m->n_out; j++){ 159 | for(p = 0; p < m->n_in; p++){ 160 | delta_W[j][p] += d_low[j] * train_set->input[k*mini_batch+i][p] 161 | + d_high[p] * h_out[j]; 162 | } 163 | delta_b[j] += d_low[j]; 164 | } 165 | for(j = 0; j < m->n_in; j++){ 166 | delta_c[j] += d_high[j]; 167 | } 168 | 169 | for(j = 0; j < m->n_in; j++){ 170 | cost -= expected_set->input[k*mini_batch+i][j] * log(z_out[j]) 171 | + (1.0 - expected_set->input[k*mini_batch+i][j]) * log(1.0 - z_out[j]); 172 | } 173 | } 174 | 175 | cost /= mini_batch; 176 | 177 | /* modify parameter */ 178 | for(j = 0; j < m->n_out; j++){ 179 | for(p = 0; p < m->n_in; p++){ 180 | m->W[j][p] -= eta * delta_W[j][p] / mini_batch; 181 | } 182 | m->b[j] -= eta * delta_b[j] / mini_batch; 183 | } 184 | for(j = 0; j < m->n_in; j++){ 185 | m->c[j] -= eta * delta_c[j] / mini_batch; 186 | } 187 | 188 | total_cost += cost; 189 | } 190 | 191 | end_time = time(NULL); 192 | printf("epcho %d cost: %.5lf\ttime: %ds\n", epcho + 1, total_cost / train_set->N * mini_batch, (int)(end_time - start_time)); 193 | } 194 | 195 | dump_weight(weight_file, m, 28); 196 | fclose(weight_file); 197 | } 198 | 199 | void test_da(){ 200 | int i, j, k, p, q; 201 | int mini_batch = 20; 202 | int epcho, n_epcho = 15; 203 | int train_set_size = 50000, validate_set_size = 10000; 204 | 205 | dataset train_set, validate_set, corrupted_train_set; 206 | da m, m_corrupt; 207 | 208 | srand(1234); 209 | 210 | load_mnist_dataset(&train_set, &validate_set); 211 | 212 | init_da(&m, 28*28, 500); 213 | //init_da(&m_corrupt, 28*28, 500); 214 | 215 | train_da(&m, &train_set, &train_set, mini_batch, n_epcho, "da_weight_origin.txt"); 216 | 217 | init_dataset(&corrupted_train_set, train_set.N, train_set.nrow, train_set.ncol); 218 | free(corrupted_train_set.output); 219 | corrupted_train_set.output = train_set.output; 220 | corrupt_train_set_input(&train_set, 0.3, &corrupted_train_set); 221 | 222 | train_da(&m_corrupt, &corrupted_train_set, &train_set, mini_batch, n_epcho, "da_weight_corrupt.txt"); 223 | 224 | free_da(&m); 225 | free_dataset(&train_set); 226 | free_dataset(&validate_set); 227 | free(corrupted_train_set.input); 228 | } 229 | 230 | int main(){ 231 | test_da(); 232 | return 0; 233 | } 234 | -------------------------------------------------------------------------------- /src/CBLAS/MLPLayer.cpp: -------------------------------------------------------------------------------- 1 | #include "MLPLayer.h" 2 | 3 | static MLPBuffer temp, temp2; 4 | 5 | MLPLayer::MLPLayer(int nIn, int nOut, const char* name) : 6 | numIn(nIn), numOut(nOut), out(NULL), delta(NULL) 7 | { 8 | strcpy(layerName, name); 9 | init(); 10 | } 11 | 12 | MLPLayer::MLPLayer(FILE* fd, const char* name) : 13 | out(NULL), delta(NULL) 14 | { 15 | strcpy(layerName, name); 16 | loadModel(fd); 17 | } 18 | 19 | MLPLayer::MLPLayer(const char* file, const char* name) : 20 | out(NULL), delta(NULL) 21 | { 22 | FILE* fd = fopen(file, "rb"); 23 | strcpy(layerName, name); 24 | loadModel(fd); 25 | fclose(fd); 26 | } 27 | 28 | MLPLayer::MLPLayer(int nIn, int nOut, double* weight, double* bias, const char *name) : 29 | numIn(nIn), numOut(nOut), out(NULL), delta(NULL) 30 | { 31 | strcpy(layerName, name); 32 | init(); 33 | memcpy(this->weight, weight, sizeof(double)*numIn*numOut); 34 | memcpy(this->bias, bias, sizeof(double)*numOut); 35 | } 36 | 37 | 38 | void MLPLayer::loadModel(FILE* fd){ 39 | fread(&numIn, sizeof(int), 1, fd); 40 | fread(&numOut, sizeof(int), 1, fd); 41 | printf("numIn : %d\nnumOut : %d\n", numIn, numOut); 42 | 43 | weight = new double[numIn*numOut]; 44 | bias = new double[numOut]; 45 | 46 | for(int i = 0; i < numIn*numOut; i++){ 47 | fread(&weight[i], sizeof(double), 1, fd); 48 | } 49 | for(int i = 0; i < numOut; i++){ 50 | fread(&bias[i], sizeof(double), 1, fd); 51 | } 52 | } 53 | 54 | double* MLPLayer::getOutput() { return out; } 55 | 56 | void MLPLayer::init(){ 57 | weight = new double[numIn*numOut]; 58 | bias = new double[numOut]; 59 | } 60 | 61 | MLPLayer::~MLPLayer(){ 62 | delete[] weight; 63 | delete[] bias; 64 | delete[] delta; 65 | delete[] out; 66 | } 67 | 68 | void MLPLayer::forward(int size){ 69 | if(out == NULL){ 70 | out = new double[numOut*size]; 71 | } 72 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 73 | size, numOut, numIn, 74 | 1, in, numIn, weight, numOut, 75 | 0, out, numOut); 76 | 77 | cblas_dger(CblasRowMajor, size, numOut, 78 | 1.0, I(), 1, bias, 1, out, numOut); 79 | 80 | computeNeuron(size); 81 | } 82 | 83 | void MLPLayer::updateWeight(int size){ 84 | // update weight 85 | 86 | // L2 normalization 87 | cblas_dscal(numIn*numOut, 1.0 - 2.0 * L2Reg * learningRate , weight, 1); 88 | 89 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 90 | numIn, numOut, size, 91 | -1.0 * learningRate / size, in, numIn, 92 | delta, numOut, 93 | 1.0, weight, numOut); 94 | } 95 | 96 | void MLPLayer::updateBias(int size){ 97 | // update bias 98 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 99 | numOut, 1, size, 100 | -1.0 * learningRate / size, delta, numOut, 101 | I(), 1, 102 | 1.0, bias, 1); 103 | } 104 | 105 | void MLPLayer::computeDelta(int size, MLPLayer *prevLayer){ 106 | int prevNumOut = prevLayer->getOutputNumber(); 107 | 108 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 109 | size, numOut, prevNumOut, 110 | 1, prevLayer->getDelta(), prevNumOut, prevLayer->getWeight(), prevNumOut, 111 | 0, temp, numOut); 112 | 113 | double *deriv = temp2; 114 | computeNeuronDerivative(deriv, size); 115 | 116 | cblas_dsbmv(CblasRowMajor, CblasUpper, 117 | numOut*size, 0, 1.0, temp, 118 | 1, deriv, 1, 119 | 0, delta, 1); 120 | } 121 | 122 | void MLPLayer::backpropagate(int size, MLPLayer *prevLayer){ 123 | if(delta == NULL){ 124 | delta = new double[numOut*size]; 125 | } 126 | computeDelta(size, prevLayer); 127 | updateWeight(size); 128 | updateBias(size); 129 | } 130 | 131 | void MLPLayer::saveModel(FILE *fd){ 132 | fwrite(&numIn, sizeof(int), 1, fd); 133 | fwrite(&numOut, sizeof(int), 1, fd); 134 | for(int i = 0; i < numIn*numOut; i++){ 135 | fwrite(&weight[i], sizeof(double), 1, fd); 136 | } 137 | for(int i = 0; i < numOut; i++){ 138 | fwrite(&bias[i], sizeof(double), 1, fd); 139 | } 140 | } 141 | 142 | SigmoidLayer::SigmoidLayer(int numIn, int numOut) : 143 | MLPLayer(numIn, numOut, "Sigmoid") 144 | { 145 | initializeWeight(); 146 | MLPLayer::initializeBias(); 147 | } 148 | SigmoidLayer::SigmoidLayer(FILE* modelFileFd) : 149 | MLPLayer(modelFileFd, "Sigmoid") { } 150 | SigmoidLayer::SigmoidLayer(const char* file) : 151 | MLPLayer(file, "Sigmoid") { } 152 | SigmoidLayer::SigmoidLayer(int nIn, int nOut, double* weight, double* bias) : 153 | MLPLayer(nIn, nOut, weight, bias, "Sigmoid") { } 154 | 155 | void SigmoidLayer::computeNeuron(int size){ 156 | double *out = getOutput(); 157 | int numOut = getOutputNumber(); 158 | for(int i = 0; i < size*numOut; i++){ 159 | out[i] = sigmoid(out[i]); 160 | } 161 | } 162 | 163 | void SigmoidLayer::computeNeuronDerivative(double* deriv, int size){ 164 | double *out = getOutput(); 165 | int numOut = getOutputNumber(); 166 | for(int i = 0; i < size*numOut; i++){ 167 | deriv[i] = get_sigmoid_derivative(out[i]); 168 | } 169 | } 170 | 171 | void SigmoidLayer::initializeWeight(){ 172 | initializeWeightSigmoid(getWeight(), getInputNumber(), getOutputNumber()); 173 | } 174 | 175 | TanhLayer::TanhLayer(int numIn, int numOut) : 176 | MLPLayer(numIn, numOut, "Tanh") 177 | { 178 | initializeWeight(); 179 | MLPLayer::initializeBias(); 180 | } 181 | TanhLayer::TanhLayer(FILE* modelFileFd) : 182 | MLPLayer(modelFileFd, "Tanh") { } 183 | TanhLayer::TanhLayer(const char* file) : 184 | MLPLayer(file, "Tanh") { } 185 | TanhLayer::TanhLayer(int nIn, int nOut, double* weight, double* bias) : 186 | MLPLayer(nIn, nOut, weight, bias, "Tanh") { } 187 | 188 | void TanhLayer::computeNeuron(int size){ 189 | double *out = getOutput(); 190 | int numOut = getOutputNumber(); 191 | for(int i = 0; i < size*numOut; i++){ 192 | out[i] = tanh(out[i]); 193 | } 194 | } 195 | 196 | void TanhLayer::computeNeuronDerivative(double* deriv, int size){ 197 | double *out = getOutput(); 198 | int numOut = getOutputNumber(); 199 | for(int i = 0; i < size*numOut; i++){ 200 | deriv[i] = get_tanh_derivative(out[i]); 201 | } 202 | } 203 | 204 | void TanhLayer::initializeWeight(){ 205 | initializeWeightTanh(getWeight(), getInputNumber(), getOutputNumber()); 206 | } 207 | 208 | SoftmaxLayer::SoftmaxLayer(int numIn, int numOut, const char* name) : 209 | MLPLayer(numIn, numOut, name) 210 | { 211 | initializeWeight(); 212 | MLPLayer::initializeBias(); 213 | } 214 | SoftmaxLayer::SoftmaxLayer(FILE* modelFileFd, const char* name) : 215 | MLPLayer(modelFileFd, name) { } 216 | SoftmaxLayer::SoftmaxLayer(const char* file, const char* name) : 217 | MLPLayer(file, name) { } 218 | SoftmaxLayer::SoftmaxLayer(int nIn, int nOut, double* weight, double* bias, const char* name) : 219 | MLPLayer(nIn, nOut, weight, bias, name) { } 220 | 221 | void SoftmaxLayer::computeNeuron(int size){ 222 | double *out = getOutput(); 223 | int numOut = getOutputNumber(); 224 | for(int i = 0; i < size; i++){ 225 | softmax(out+i*numOut, numOut); 226 | } 227 | } 228 | 229 | void SoftmaxLayer::computeNeuronDerivative(double* deriv, int size){ } 230 | 231 | void SoftmaxLayer::initializeWeight(){ 232 | memset(getWeight(), 0, getInputNumber()*getOutputNumber()*sizeof(double)); 233 | } 234 | -------------------------------------------------------------------------------- /old_version/dpblas_da.c: -------------------------------------------------------------------------------- 1 | #include"dataset.h" 2 | #include"cblas.h" 3 | #include 4 | #include 5 | 6 | #define MAX_SIZE 1000 7 | #define MAX_BATCH_SIZE 500 8 | #define MAX_STEP 5000 9 | #define eta 0.1 10 | 11 | typedef struct da{ 12 | int n_in, n_out; 13 | double *W; 14 | double *b; 15 | double *c; 16 | } da; 17 | 18 | double h_out[MAX_BATCH_SIZE * MAX_SIZE], z_out[MAX_BATCH_SIZE * MAX_SIZE]; 19 | double d_low[MAX_BATCH_SIZE * MAX_SIZE], d_high[MAX_BATCH_SIZE * MAX_SIZE]; 20 | double delta_W[MAX_SIZE*MAX_SIZE], delta_b[MAX_SIZE], delta_c[MAX_SIZE]; 21 | double Ivec[MAX_BATCH_SIZE * MAX_SIZE]; 22 | double tr1[MAX_SIZE * MAX_SIZE], tr2[MAX_SIZE * MAX_SIZE]; 23 | 24 | void init_da(da *m, int n_in, int n_out){ 25 | int i; 26 | double r; 27 | 28 | r = 4 * sqrt(6.0 / (n_in + n_out)); 29 | 30 | m->n_in = n_in; 31 | m->n_out = n_out; 32 | m->W = (double*)malloc(m->n_out * m->n_in * sizeof(double)); 33 | for(i = 0; i < m->n_out * m->n_in; i++){ 34 | m->W[i] = random_double(-r, r); 35 | } 36 | m->b = (double*)calloc(m->n_out, sizeof(double)); 37 | m->c = (double*)calloc(m->n_in, sizeof(double)); 38 | } 39 | 40 | void free_da(da *m){ 41 | free(m->W); 42 | free(m->b); 43 | free(m->c); 44 | } 45 | 46 | /** 47 | * @brief 48 | * 49 | * @param m 50 | * @param x [batch_size * n_in] matrix 51 | * @param y [batch_size * n_out] matrix 52 | * @param batch_size 53 | */ 54 | void get_hidden_values(const da *m, const double *x, 55 | double *y, const int batch_size){ 56 | int i; 57 | 58 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 59 | batch_size, m->n_out, m->n_in, 60 | 1, x, m->n_in, m->W, m->n_in, 61 | 0, y, m->n_out); 62 | 63 | cblas_dger(CblasColMajor, batch_size, m->n_out, 64 | 1.0, m->b, 1, Ivec, 1, y, m->n_out); 65 | 66 | /* TODO */ 67 | for(i = 0; i < batch_size*m->n_out; i++){ 68 | y[i] = sigmoid(y[i]); 69 | } 70 | } 71 | 72 | void get_reconstruct_input(const da *m, const double *x, 73 | double *y, const int batch_size){ 74 | int i; 75 | 76 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 77 | batch_size, m->n_in, m->n_out, 78 | 1, x, m->n_out, m->W, m->n_in, 79 | 0, y, m->n_in); 80 | 81 | cblas_dger(CblasColMajor, batch_size, m->n_in, 82 | 1.0, m->c, 1, Ivec, 1, y, m->n_in); 83 | 84 | /* TODO */ 85 | for(i = 0; i < batch_size*m->n_in; i++){ 86 | y[i] = sigmoid(y[i]); 87 | } 88 | } 89 | 90 | void get_top_delta(const da *m, const double *y, 91 | const double *x, double *d, const int batch_size){ 92 | cblas_dcopy(batch_size * m->n_in, y, 1, d, 1); 93 | cblas_daxpy(batch_size * m->n_in, -1, 94 | x, 1, d, 1); 95 | } 96 | 97 | void get_second_delta(const da *m, const double *y_low, const double *d_high, 98 | double *d_low, const int batch_size){ 99 | int i; 100 | 101 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 102 | batch_size, m->n_out, m->n_in, 103 | 1, d_high, m->n_in, m->W, m->n_in, 104 | 0, d_low, m->n_out); 105 | 106 | /* compute sigmoid derivative */ 107 | cblas_dcopy(batch_size * m->n_out, Ivec, 1, tr1, 1); 108 | cblas_daxpy(batch_size * m->n_out, -1, y_low, 1, tr1, 1); 109 | cblas_dsbmv(CblasColMajor, CblasLower, batch_size * m->n_out, 110 | 0, 1.0, tr1, 1, y_low, 1, 0.0, tr2, 1); 111 | 112 | cblas_dsbmv(CblasColMajor, CblasLower, batch_size * m->n_out, 113 | 0, 1.0, tr2, 1, d_low, 1, 0.0, tr1, 1); 114 | cblas_dcopy(batch_size * m->n_out, tr1, 1, d_low, 1); 115 | } 116 | 117 | void train_da(da *m, dataset_blas *train_set, dataset_blas *expected_set, 118 | int mini_batch, int n_epcho, char* weight_filename){ 119 | int i, j, k, p, q; 120 | int epcho; 121 | double cost, total_cost; 122 | time_t start_time, end_time; 123 | FILE *weight_file; 124 | 125 | //weight_file = fopen(weight_filename, "w"); 126 | 127 | for(epcho = 0; epcho < n_epcho; epcho++){ 128 | 129 | total_cost = 0.0; 130 | start_time = time(NULL); 131 | for(k = 0; k < train_set->N / mini_batch; k++){ 132 | 133 | if((k+1) % 500 == 0){ 134 | printf("epcho %d batch %d\n", epcho + 1, k + 1); 135 | } 136 | get_hidden_values(m, train_set->input + k * mini_batch * m->n_in, h_out, mini_batch); 137 | get_reconstruct_input(m, h_out, z_out, mini_batch); 138 | 139 | get_top_delta(m, z_out, expected_set->input + k * mini_batch * m->n_in, d_high, mini_batch); 140 | get_second_delta(m, h_out, d_high, d_low, mini_batch); 141 | 142 | /* modify weight matrix W */ 143 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 144 | m->n_out, m->n_in, mini_batch, 145 | 1, d_low, m->n_out, 146 | train_set->input + k * mini_batch * m->n_in, m->n_in, 147 | 0, tr1, m->n_in); 148 | 149 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 150 | m->n_out, m->n_in, mini_batch, 151 | 1, h_out, m->n_out, 152 | d_high, m->n_in, 0, tr2, m->n_in); 153 | 154 | cblas_daxpy(m->n_out * m->n_in, 1, tr1, 1, tr2, 1); 155 | 156 | cblas_daxpy(m->n_out * m->n_in, -eta / mini_batch, tr2, 1, m->W, 1); 157 | 158 | /* modify bias vector */ 159 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 160 | m->n_out, 1, mini_batch, 161 | 1, d_low, m->n_out, 162 | Ivec, 1, 0, tr1, 1); 163 | 164 | cblas_daxpy(m->n_out, -eta / mini_batch, tr1, 1, m->b, 1); 165 | 166 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 167 | m->n_in, 1, mini_batch, 168 | 1, d_high, m->n_in, 169 | Ivec, 1, 0, tr1, 1); 170 | 171 | cblas_daxpy(m->n_in, -eta / mini_batch, tr1, 1, m->c, 1); 172 | 173 | for(i = 0; i < mini_batch * m->n_in; i++){ 174 | tr1[i] = log(z_out[i]); 175 | } 176 | total_cost -= cblas_ddot(mini_batch * m->n_in, expected_set->input + k * mini_batch * m->n_in, 1, 177 | tr1, 1) / mini_batch; 178 | for(i = 0; i < mini_batch * m->n_in; i++){ 179 | tr1[i] = log(1.0 - z_out[i]); 180 | } 181 | cblas_dcopy(mini_batch * m->n_in, Ivec, 1, tr2, 1); 182 | cblas_daxpy(mini_batch * m->n_in, -1, expected_set->input + k * mini_batch * m->n_in, 183 | 1, tr2, 1); 184 | total_cost -= cblas_ddot(mini_batch * m->n_in, tr1, 1, tr2, 1) / mini_batch; 185 | 186 | } 187 | end_time = time(NULL); 188 | printf("epcho %d cost: %.5lf\ttime: %ds\n", epcho + 1, total_cost / train_set->N * mini_batch, (int)(end_time - start_time)); 189 | } 190 | 191 | //fclose(weight_file); 192 | } 193 | 194 | void test_da(){ 195 | int i, j, k, p, q; 196 | int mini_batch = 20; 197 | int epcho, n_epcho = 15; 198 | int train_set_size = 50000, validate_set_size = 10000; 199 | dataset_blas train_set, validate_set; 200 | da m; 201 | 202 | srand(1234); 203 | load_mnist_dataset_blas(&train_set, &validate_set); 204 | init_da(&m, 28*28, 500); 205 | 206 | for(i = 0; i < MAX_BATCH_SIZE * MAX_SIZE; i++) 207 | Ivec[i] = 1.0; 208 | 209 | train_da(&m, &train_set, &train_set, mini_batch, n_epcho, "da_blas_weight_origin.txt"); 210 | 211 | //print_dataset_blas(&train_set); 212 | 213 | free_dataset_blas(&train_set); 214 | free_dataset_blas(&validate_set); 215 | free_da(&m); 216 | } 217 | 218 | int main(){ 219 | test_da(); 220 | return 0; 221 | } 222 | -------------------------------------------------------------------------------- /old_version/my_logistic_sgd.c: -------------------------------------------------------------------------------- 1 | #include "my_logistic_sgd.h" 2 | #include 3 | 4 | #define eta 0.13 5 | 6 | void init_log_reg(log_reg *m, int n_in, int n_out){ 7 | int i; 8 | 9 | m->n_in = n_in; 10 | m->n_out = n_out; 11 | m->W = (double**)malloc(n_in * sizeof(double*)); 12 | for(i = 0; i < n_out; i++){ 13 | m->W[i] = (double*)calloc(n_in, sizeof(double)); 14 | } 15 | m->b = (double*)calloc(n_out, sizeof(double)); 16 | } 17 | 18 | void free_log_reg(log_reg *m){ 19 | int i; 20 | 21 | for(i = 0; i < m->n_out; i++){ 22 | free(m->W[i]); 23 | } 24 | free(m->W); 25 | free(m->b); 26 | } 27 | 28 | /** 29 | * 文件的结构 30 | * uint32 n_in 31 | * uint32 n_out 32 | * 以下是n_out行,n_in列的参数矩阵W 33 | * double W[0][0] 34 | * double W[0][1] 35 | * ... 36 | * ... 37 | * double W[n_out-1][n_in-1] 38 | * 以下是偏置向量b 39 | * double b[0] 40 | * ... 41 | * ... 42 | * double b[n_out-1] 43 | */ 44 | void dump_param(rio_t *rp, log_reg *m){ 45 | int i, j; 46 | 47 | rio_writenb(rp, &m->n_in, sizeof(uint32_t)); 48 | rio_writenb(rp, &m->n_out, sizeof(uint32_t)); 49 | 50 | for(i = 0; i < m->n_out; i++){ 51 | for(j = 0; j < m->n_in; j++) 52 | rio_writenb(rp, &m->W[i][j], sizeof(double)); 53 | } 54 | for(i = 0; i < m->n_out; i++){ 55 | rio_writenb(rp, &m->b[i], sizeof(double)); 56 | } 57 | } 58 | 59 | void load_log_reg(rio_t *rp, log_reg *m){ 60 | int i, j; 61 | uint32_t n_in, n_out; 62 | 63 | rio_readnb(rp, &n_in, sizeof(uint32_t)); 64 | rio_readnb(rp, &n_out, sizeof(uint32_t)); 65 | 66 | init_log_reg(m, n_in, n_out); 67 | for(i = 0; i < m->n_out; i++){ 68 | for(j = 0; j < m->n_in; j++) 69 | rio_readnb(rp, &m->W[i][j], sizeof(double)); 70 | } 71 | for(i = 0; i < m->n_out; i++){ 72 | rio_readnb(rp, &m->b[i], sizeof(double)); 73 | } 74 | } 75 | 76 | void get_loss_error(const log_reg *m, const double **x, const uint8_t *t, const int size, double *loss, int *error){ 77 | int i, j, k; 78 | int pred_y; 79 | double y[OUT_LAYER_SIZE]; 80 | double target[OUT_LAYER_SIZE], s; 81 | double max_y; 82 | 83 | *loss = 0.0; 84 | *error = 0; 85 | bzero(target, sizeof(target)); 86 | for(i = 0; i < size; i++){ 87 | get_softmax_y_given_x(m, x[i], y); 88 | for(j = 0, max_y = -1.0; j < m->n_out; j++){ 89 | if(y[j] > max_y){ 90 | max_y = y[j]; 91 | pred_y = j; 92 | } 93 | } 94 | target[t[i]] = 1.0; 95 | for(j = 0, s = 0.0; j < m->n_out; j++){ 96 | s += target[j] * log(y[j]); 97 | } 98 | target[t[i]] = 0.0; 99 | *loss += s; 100 | *error += (pred_y == t[i] ? 0 : 1); 101 | } 102 | *loss = -(*loss) / size; 103 | } 104 | 105 | void get_softmax_y_given_x(const log_reg *m, const double *x, double *y){ 106 | int i, j, k; 107 | double a[OUT_LAYER_SIZE], s; 108 | 109 | for(j = 0, s = 0.0; j < m->n_out; j++){ 110 | a[j] = 0.0; 111 | for(k = 0; k < m->n_in; k++){ 112 | a[j] += m->W[j][k] * x[k]; 113 | } 114 | a[j] += m->b[j]; 115 | s += exp(a[j]); 116 | } 117 | for(j = 0; j < m->n_out; j++){ 118 | y[j] = exp(a[j]) / s; 119 | } 120 | } 121 | 122 | void get_grad(const log_reg *m, const double *x, const double *y, const double *t, double **grad_w, double *grad_b){ 123 | int i, j; 124 | double *delta; 125 | 126 | delta = (double*)malloc(m->n_out * sizeof(double)); 127 | get_log_reg_delta(y, t, delta, m->n_out); 128 | for(i = 0; i < m->n_out; i++){ 129 | for(j = 0; j < m->n_in; j++){ 130 | grad_w[i][j] = delta[i] * x[j]; 131 | } 132 | grad_b[i] = delta[i]; 133 | } 134 | free(delta); 135 | } 136 | 137 | double get_log_reg_delta(const double *y, const double *t, double *d, const int size){ 138 | int i; 139 | 140 | for(i = 0; i < size; i++){ 141 | d[i] = y[i] - t[i]; 142 | } 143 | } 144 | 145 | void train_log_reg(){ 146 | int i, j, k, p, q; 147 | int mini_batch = 500; 148 | int train_set_size = 50000, validate_set_size = 10000; 149 | int epcho, n_epcho = 10; 150 | 151 | double grad_b[OUT_LAYER_SIZE]; 152 | double **grad_w; 153 | double delta_w[OUT_LAYER_SIZE][IN_LAYER_SIZE], delta_b[OUT_LAYER_SIZE]; 154 | double prob_y[OUT_LAYER_SIZE]; 155 | double target[OUT_LAYER_SIZE]; 156 | 157 | uint32_t N, nrow, ncol, magic_n; 158 | dataset train_set, validate_set; 159 | log_reg m; 160 | time_t start_time, end_time; 161 | 162 | double loss; 163 | int error; 164 | 165 | int param_fd; 166 | rio_t rio_param; 167 | 168 | param_fd = open("log_sgd.param", O_TRUNC | O_WRONLY); 169 | 170 | if(param_fd == -1){ 171 | fprintf(stderr, "cannot open log_sgd.param\n"); 172 | exit(1); 173 | } 174 | 175 | rio_readinitb(&rio_param, param_fd, 1); 176 | 177 | load_mnist_dataset(&train_set, &validate_set); 178 | 179 | //print_dataset(&validate_set); 180 | 181 | init_log_reg(&m, 784, 10); 182 | 183 | bzero(target, sizeof(target)); 184 | #ifdef DEBUG 185 | //get_loss_error(&m, validate_set.input, validate_set.output, validate_set_size, &loss, &error); 186 | //printf("origin loss :%.5lf\t error :%d\n", loss, error); 187 | #endif 188 | grad_w = (double**)malloc(m.n_out * sizeof(double*)); 189 | for(i = 0; i < m.n_out; i++) 190 | grad_w[i] = (double*)malloc(m.n_in * sizeof(double)); 191 | 192 | 193 | start_time = time(NULL); 194 | for(epcho = 0; epcho < n_epcho; epcho++){ 195 | 196 | for(k = 0; k < train_set.N / mini_batch; k++){ 197 | bzero(delta_w, sizeof(delta_w)); 198 | bzero(delta_b, sizeof(delta_b)); 199 | for(i = 0; i < mini_batch; i++){ 200 | 201 | /* 求gradient */ 202 | get_softmax_y_given_x(&m, train_set.input[k*mini_batch+i], prob_y); 203 | target[train_set.output[k*mini_batch+i]] = 1.0; 204 | get_grad(&m, train_set.input[k*mini_batch+i], prob_y, target, grad_w, grad_b); 205 | target[train_set.output[k*mini_batch+i]] = 0.0; 206 | 207 | for(j = 0; j < m.n_out; j++){ /* mini-batch累加delta */ 208 | for(p = 0; p < m.n_in; p++){ 209 | delta_w[j][p] += eta * grad_w[j][p]; 210 | } 211 | delta_b[j] += eta * grad_b[j]; 212 | } 213 | } 214 | 215 | for(j = 0; j < m.n_out; j++){ /* 更新参数 */ 216 | for(p = 0; p < m.n_in; p++){ 217 | m.W[j][p] -= delta_w[j][p] / mini_batch; 218 | } 219 | m.b[j] -= delta_b[j] / mini_batch; 220 | } 221 | 222 | j = 0; 223 | } 224 | #ifdef DEBUG 225 | get_loss_error(&m, validate_set.input, validate_set.output, validate_set_size, &loss, &error); 226 | printf("epcho %d loss :%.5lf\t error :%d\n", epcho + 1, loss, error); 227 | #endif 228 | } 229 | end_time = time(NULL); 230 | printf("time: %d\n", (int)(end_time - start_time)); 231 | 232 | //dump_param(&rio_param, &m); 233 | 234 | close(param_fd); 235 | 236 | for(i = 0; i < m.n_out; i++) 237 | free(grad_w[i]); 238 | free(grad_w); 239 | free_dataset(&train_set); 240 | free_dataset(&validate_set); 241 | free_log_reg(&m); 242 | } 243 | 244 | void test_load_param(){ 245 | rio_t rio_param; 246 | int param_fd; 247 | log_reg m; 248 | 249 | param_fd = open("log_sgd.param", O_RDONLY); 250 | rio_readinitb(&rio_param, param_fd, 0); 251 | 252 | load_log_reg(&rio_param, &m); 253 | printf("n_in:%d\tn_out:%d\n", m.n_in, m.n_out); 254 | 255 | close(param_fd); 256 | free_log_reg(&m); 257 | } 258 | #if 1 259 | int main(){ 260 | train_log_reg(); 261 | //test_load_param(); 262 | return 0; 263 | } 264 | #endif 265 | -------------------------------------------------------------------------------- /src/MultiLayerRBM.cpp: -------------------------------------------------------------------------------- 1 | #include "MultiLayerRBM.h" 2 | #include "mkl_cblas.h" 3 | #include "Utility.h" 4 | #include "DeepAutoEncoder.h" 5 | #include 6 | #include 7 | 8 | MultiLayerRBM::MultiLayerRBM(int numLayer, const int layersSize[]) : 9 | MultiLayerTrainComponent("MultiLayerRBM"), numLayer(numLayer), 10 | layersToTrain(numLayer, true), persistent(true), AMSample(NULL) 11 | { 12 | char weightFile[100], modelFile[100]; 13 | for(int i = 0; i < numLayer; i++){ 14 | layers[i] = new RBM(layersSize[i], layersSize[i+1]); 15 | 16 | sprintf(modelFile, "result/DBN_Layer%d.dat", i+1); 17 | layers[i]->setModelFile(modelFile); 18 | } 19 | } 20 | 21 | MultiLayerRBM::MultiLayerRBM(int numLayer, const vector &layerModelFiles) : 22 | MultiLayerTrainComponent("MultiLayerRBM"), numLayer(numLayer), 23 | layersToTrain(numLayer, true), persistent(true), AMSample(NULL) 24 | { 25 | for(int i = 0; i < numLayer; i++){ 26 | layers[i] = new RBM(layerModelFiles[i]); 27 | } 28 | } 29 | 30 | MultiLayerRBM::MultiLayerRBM(const char* file) : 31 | MultiLayerTrainComponent("MultiLayerRBM"), 32 | persistent(true), AMSample(NULL) 33 | { 34 | FILE* fd = fopen(file, "rb"); 35 | if(fd == NULL){ 36 | fprintf(stderr, "file not exist : %s\n", file); 37 | exit(1); 38 | } 39 | fread(&numLayer, sizeof(int), 1, fd); 40 | for(int i = 0; i < numLayer; i++){ 41 | layers[i] = new RBM(fd); 42 | } 43 | fclose(fd); 44 | layersToTrain = vector(numLayer, true); 45 | } 46 | 47 | MultiLayerRBM::MultiLayerRBM(MLP& mlp) : 48 | MultiLayerTrainComponent("MultiLayerRBM"), 49 | persistent(true), AMSample(NULL) 50 | { 51 | numLayer = mlp.getLayerNumber() - 1; 52 | layersToTrain = vector(numLayer, true); 53 | for(int i = 0; i < numLayer; i++){ 54 | layers[i] = new RBM(mlp.getLayer(i)->getInputNumber(), 55 | mlp.getLayer(i)->getOutputNumber()); 56 | mlp.getLayer(i)->getWeightTrans(layers[i]->weight); 57 | memcpy(layers[i]->hbias, mlp.getLayer(i)->getBias(), 58 | sizeof(double) * layers[i]->numHid); 59 | } 60 | } 61 | 62 | MultiLayerRBM::MultiLayerRBM(DeepAutoEncoder& dad) : 63 | MultiLayerTrainComponent("MultiLayerRBM"), 64 | persistent(false), AMSample(NULL) 65 | { 66 | numLayer = dad.getLayerNumber(); 67 | layersToTrain = vector(numLayer, true); 68 | for(int i = 0; i < numLayer; i++){ 69 | layers[i] = new RBM(dad.getLayer(i)->getInputNumber(), 70 | dad.getLayer(i)->getOutputNumber()); 71 | dad.getLayer(i)->getWeightTrans(layers[i]->weight); 72 | memcpy(layers[i]->hbias, dad.getLayer(i)->getOutputBias(), 73 | sizeof(double) * layers[i]->numHid); 74 | memcpy(layers[i]->vbias, dad.getLayer(i)->getInputBias(), 75 | sizeof(double) * layers[i]->numVis); 76 | } 77 | layers[numLayer-1]->setGaussianHidden(false); 78 | } 79 | 80 | void MultiLayerRBM::saveModel(FILE* fd){ 81 | fwrite(&numLayer, sizeof(int), 1, fd); 82 | for(int i = 0; i < numLayer; i++){ 83 | layers[i]->saveModel(fd); 84 | } 85 | } 86 | 87 | MultiLayerRBM::~MultiLayerRBM(){ 88 | for(int i = 0; i < numLayer; i++){ 89 | delete layers[i]; 90 | } 91 | delete[] AMSample; 92 | } 93 | 94 | void MultiLayerRBM::setPersistent(bool p){ 95 | for(int i = 0; i < numLayer; i++) 96 | layers[i]->setPersistent(p); 97 | } 98 | 99 | TrainComponent& MultiLayerRBM::getLayer(int i){ 100 | return *layers[i]; 101 | } 102 | 103 | void MultiLayerRBM::toMLP(MLP* mlp, int lastNumOut){ 104 | double *transWeight = new double[maxUnit*maxUnit]; 105 | mlp->setLayerNumber(numLayer); 106 | for(int i = 0; i < numLayer; i++){ 107 | layers[i]->getWeightTrans(transWeight); 108 | mlp->setLayer(i, new SigmoidLayer(layers[i]->numVis, layers[i]->numHid, transWeight, layers[i]->hbias)); 109 | } 110 | mlp->addLayer(new Logistic(layers[numLayer-1]->numHid, lastNumOut)); 111 | delete[] transWeight; 112 | } 113 | 114 | void MultiLayerRBM::loadLayer(int i, const char* file){ 115 | delete layers[i]; 116 | layers[i] = new RBM(file); 117 | } 118 | 119 | void MultiLayerRBM::addLayer(int numLayerHid){ 120 | char modelFile[100]; 121 | layers[numLayer] = new RBM(layers[numLayer-1]->numHid, numLayerHid); 122 | sprintf(modelFile, "result/DBN_Layer%d.dat", numLayer+1); 123 | layers[numLayer]->setModelFile(modelFile); 124 | layersToTrain.push_back(true); 125 | numLayer++; 126 | } 127 | 128 | void MultiLayerRBM::resetLayer(int layerIdx, int numVis, int numHid){ 129 | delete layers[layerIdx]; 130 | layers[layerIdx] = new RBM(numVis, numHid); 131 | } 132 | 133 | void MultiLayerRBM::activationMaxization(int layerIdx, int unitNum, double avgNorm, int nepoch, 134 | const char amSampleFile[], bool dumpBinary) 135 | { 136 | int topHiddenNum = layers[layerIdx]->numHid; 137 | int bottomVisibleNum = layers[0]->numVis; 138 | FILE* fd = fopen(amSampleFile, "w+"); 139 | 140 | if(AMSample == NULL){ 141 | AMSample = new double[topHiddenNum*bottomVisibleNum]; 142 | } 143 | for(int i = 0; i < topHiddenNum*bottomVisibleNum; i++){ 144 | AMSample[i] = random_double(0, 1); 145 | } 146 | for(int i = 0; i < unitNum; i++){ 147 | double *unitSample = AMSample + i*bottomVisibleNum; 148 | time_t startTime = time(NULL); 149 | double maxval = maximizeUnit(layerIdx, i, unitSample, avgNorm, nepoch); 150 | time_t endTime = time(NULL); 151 | 152 | printf("layer %d unit %d maximum : %.8lf\t time : %.2lfmin\n", 153 | layerIdx+1, i+1, maxval , (double)(endTime - startTime) / 60); 154 | fflush(stdout); 155 | if(dumpBinary){ 156 | layers[0]->dumpSampleBinary(fd, unitSample, 1); 157 | }else{ 158 | layers[0]->dumpSample(fd, unitSample, 1); 159 | } 160 | fflush(fd); 161 | } 162 | 163 | fclose(fd); 164 | } 165 | 166 | double MultiLayerRBM::maximizeUnit(int layerIdx, int unitIdx, 167 | double* unitSample, double avgNorm, int nepoch) 168 | { 169 | int topHiddenNum = layers[layerIdx]->numHid; 170 | int bottomVisibleNum = layers[0]->numVis; 171 | int k = 0; 172 | 173 | // average norm 174 | double curNorm = squareNorm(unitSample, bottomVisibleNum, 1); 175 | cblas_dscal(bottomVisibleNum, avgNorm / curNorm, unitSample, 1); 176 | 177 | double curval; 178 | 179 | while(k++ < nepoch){ 180 | 181 | // forward 182 | for(int i = 0; i <= layerIdx; i++){ 183 | if(i == 0){ 184 | layers[i]->setInput(unitSample); 185 | }else{ 186 | layers[i]->setInput(layers[i-1]->getOutput()); 187 | } 188 | layers[i]->runBatch(1); 189 | } 190 | curval = layers[layerIdx]->getOutput()[unitIdx]; 191 | //printf("unit index %d epoch %d current maximal : %.8lf\n", unitIdx+1, k, curval); 192 | 193 | // back-propagate 194 | for(int i = layerIdx; i >= 0; i--){ 195 | if(i == layerIdx){ 196 | layers[i]->getAMDelta(unitIdx, NULL); 197 | }else{ 198 | layers[i]->getAMDelta(-1, layers[i+1]->AMdelta); 199 | } 200 | } 201 | 202 | // 自适应learning rate 203 | double lr = 0.01 * cblas_dasum(bottomVisibleNum, unitSample, 1) / 204 | cblas_dasum(bottomVisibleNum, layers[0]->AMdelta, 1); 205 | 206 | // update sample 207 | cblas_daxpy(bottomVisibleNum, lr, 208 | layers[0]->AMdelta, 1, 209 | unitSample, 1); 210 | 211 | // average norm 212 | curNorm = squareNorm(unitSample, bottomVisibleNum, 1); 213 | cblas_dscal(bottomVisibleNum, avgNorm / curNorm, unitSample, 1); 214 | } 215 | return curval; 216 | } 217 | 218 | void MultiLayerRBM::setSparsity(bool sparsity, double p, double numda, double slr){ 219 | for(int i = 0; i < numLayer; i++){ 220 | layers[i]->setSparsity(sparsity, p, numda, slr); 221 | } 222 | } 223 | -------------------------------------------------------------------------------- /src/MLPLayer.cpp: -------------------------------------------------------------------------------- 1 | #include "MLPLayer.h" 2 | #include "mkl_cblas.h" 3 | 4 | static MLPBuffer temp, temp2; 5 | 6 | MLPLayer::MLPLayer(int nIn, int nOut, const char* name) : 7 | numIn(nIn), numOut(nOut), out(NULL), delta(NULL) 8 | { 9 | strcpy(layerName, name); 10 | init(); 11 | } 12 | 13 | MLPLayer::MLPLayer(FILE* fd, const char* name) : 14 | out(NULL), delta(NULL) 15 | { 16 | strcpy(layerName, name); 17 | loadModel(fd); 18 | } 19 | 20 | MLPLayer::MLPLayer(const char* file, const char* name) : 21 | out(NULL), delta(NULL) 22 | { 23 | FILE* fd = fopen(file, "rb"); 24 | strcpy(layerName, name); 25 | loadModel(fd); 26 | fclose(fd); 27 | } 28 | 29 | MLPLayer::MLPLayer(int nIn, int nOut, double* weight, double* bias, const char *name) : 30 | numIn(nIn), numOut(nOut), out(NULL), delta(NULL) 31 | { 32 | strcpy(layerName, name); 33 | init(); 34 | memcpy(this->weight, weight, sizeof(double)*numIn*numOut); 35 | memcpy(this->bias, bias, sizeof(double)*numOut); 36 | } 37 | 38 | 39 | void MLPLayer::loadModel(FILE* fd){ 40 | fread(&numIn, sizeof(int), 1, fd); 41 | fread(&numOut, sizeof(int), 1, fd); 42 | printf("numIn : %d\nnumOut : %d\n", numIn, numOut); 43 | 44 | weight = new double[numIn*numOut]; 45 | bias = new double[numOut]; 46 | 47 | for(int i = 0; i < numIn*numOut; i++){ 48 | fread(&weight[i], sizeof(double), 1, fd); 49 | } 50 | for(int i = 0; i < numOut; i++){ 51 | fread(&bias[i], sizeof(double), 1, fd); 52 | } 53 | } 54 | 55 | double* MLPLayer::getOutput() { return out; } 56 | 57 | void MLPLayer::init(){ 58 | weight = new double[numIn*numOut]; 59 | bias = new double[numOut]; 60 | } 61 | 62 | MLPLayer::~MLPLayer(){ 63 | delete[] weight; 64 | delete[] bias; 65 | delete[] delta; 66 | delete[] out; 67 | } 68 | 69 | void MLPLayer::forward(int size){ 70 | if(out == NULL){ 71 | out = new double[numOut*size]; 72 | } 73 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 74 | size, numOut, numIn, 75 | 1, in, numIn, weight, numOut, 76 | 0, out, numOut); 77 | 78 | cblas_dger(CblasRowMajor, size, numOut, 79 | 1.0, I(), 1, bias, 1, out, numOut); 80 | 81 | computeNeuron(size); 82 | } 83 | 84 | void MLPLayer::updateWeight(int size){ 85 | // update weight 86 | 87 | // L2 normalization 88 | cblas_dscal(numIn*numOut, 1.0 - 2.0 * L2Reg * learningRate , weight, 1); 89 | 90 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 91 | numIn, numOut, size, 92 | -1.0 * learningRate / size, in, numIn, 93 | delta, numOut, 94 | 1.0, weight, numOut); 95 | 96 | } 97 | 98 | void MLPLayer::updateBias(int size){ 99 | // update bias 100 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 101 | numOut, 1, size, 102 | -1.0 * learningRate / size, delta, numOut, 103 | I(), 1, 104 | 1.0, bias, 1); 105 | } 106 | 107 | void MLPLayer::computeDelta(int size, MLPLayer *prevLayer){ 108 | int prevNumOut = prevLayer->getOutputNumber(); 109 | 110 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 111 | size, numOut, prevNumOut, 112 | 1, prevLayer->getDelta(), prevNumOut, prevLayer->getWeight(), prevNumOut, 113 | 0, temp, numOut); 114 | 115 | double *deriv = temp2; 116 | computeNeuronDerivative(deriv, size); 117 | 118 | cblas_dsbmv(CblasRowMajor, CblasUpper, 119 | numOut*size, 0, 1.0, temp, 120 | 1, deriv, 1, 121 | 0, delta, 1); 122 | } 123 | 124 | void MLPLayer::backpropagate(int size, MLPLayer *prevLayer){ 125 | if(delta == NULL){ 126 | delta = new double[numOut*size]; 127 | } 128 | computeDelta(size, prevLayer); 129 | updateWeight(size); 130 | updateBias(size); 131 | } 132 | 133 | void MLPLayer::saveModel(FILE *fd){ 134 | fwrite(&numIn, sizeof(int), 1, fd); 135 | fwrite(&numOut, sizeof(int), 1, fd); 136 | for(int i = 0; i < numIn*numOut; i++){ 137 | fwrite(&weight[i], sizeof(double), 1, fd); 138 | } 139 | for(int i = 0; i < numOut; i++){ 140 | fwrite(&bias[i], sizeof(double), 1, fd); 141 | } 142 | } 143 | 144 | void MLPLayer::getWeightTrans(double* transWeight){ 145 | for(int i = 0; i < numIn; i++) 146 | for(int j = 0; j < numOut; j++) 147 | transWeight[j*numIn+i] = weight[i*numOut+j]; 148 | } 149 | 150 | SigmoidLayer::SigmoidLayer(int numIn, int numOut) : 151 | MLPLayer(numIn, numOut, "Sigmoid") 152 | { 153 | initializeWeight(); 154 | MLPLayer::initializeBias(); 155 | } 156 | SigmoidLayer::SigmoidLayer(FILE* modelFileFd) : 157 | MLPLayer(modelFileFd, "Sigmoid") { } 158 | SigmoidLayer::SigmoidLayer(const char* file) : 159 | MLPLayer(file, "Sigmoid") { } 160 | SigmoidLayer::SigmoidLayer(int nIn, int nOut, double* weight, double* bias) : 161 | MLPLayer(nIn, nOut, weight, bias, "Sigmoid") { } 162 | 163 | void SigmoidLayer::computeNeuron(int size){ 164 | double *out = getOutput(); 165 | int numOut = getOutputNumber(); 166 | for(int i = 0; i < size*numOut; i++){ 167 | //out[i] = sigmoid(out[i]); 168 | out[i] = sigmoidc(out[i]); 169 | } 170 | } 171 | 172 | void SigmoidLayer::computeNeuronDerivative(double* deriv, int size){ 173 | double *out = getOutput(); 174 | int numOut = getOutputNumber(); 175 | for(int i = 0; i < size*numOut; i++){ 176 | deriv[i] = get_sigmoid_derivative(out[i]); 177 | } 178 | } 179 | 180 | void SigmoidLayer::initializeWeight(){ 181 | initializeWeightSigmoid(getWeight(), getInputNumber(), getOutputNumber()); 182 | } 183 | 184 | TanhLayer::TanhLayer(int numIn, int numOut) : 185 | MLPLayer(numIn, numOut, "Tanh") 186 | { 187 | initializeWeight(); 188 | MLPLayer::initializeBias(); 189 | } 190 | TanhLayer::TanhLayer(FILE* modelFileFd) : 191 | MLPLayer(modelFileFd, "Tanh") { } 192 | TanhLayer::TanhLayer(const char* file) : 193 | MLPLayer(file, "Tanh") { } 194 | TanhLayer::TanhLayer(int nIn, int nOut, double* weight, double* bias) : 195 | MLPLayer(nIn, nOut, weight, bias, "Tanh") { } 196 | 197 | void TanhLayer::computeNeuron(int size){ 198 | double *out = getOutput(); 199 | int numOut = getOutputNumber(); 200 | for(int i = 0; i < size*numOut; i++){ 201 | out[i] = tanh(out[i]); 202 | } 203 | } 204 | 205 | void TanhLayer::computeNeuronDerivative(double* deriv, int size){ 206 | double *out = getOutput(); 207 | int numOut = getOutputNumber(); 208 | for(int i = 0; i < size*numOut; i++){ 209 | deriv[i] = get_tanh_derivative(out[i]); 210 | } 211 | } 212 | 213 | void TanhLayer::initializeWeight(){ 214 | initializeWeightTanh(getWeight(), getInputNumber(), getOutputNumber()); 215 | } 216 | 217 | ReLULayer::ReLULayer(int numIn, int numOut) : 218 | MLPLayer(numIn, numOut, "ReLU") 219 | { 220 | initializeWeight(); 221 | MLPLayer::initializeBias(); 222 | } 223 | ReLULayer::ReLULayer(FILE* modelFileFd) : 224 | MLPLayer(modelFileFd, "ReLU") { } 225 | ReLULayer::ReLULayer(const char* file) : 226 | MLPLayer(file, "ReLU") { } 227 | ReLULayer::ReLULayer(int nIn, int nOut, double* weight, double* bias) : 228 | MLPLayer(nIn, nOut, weight, bias, "ReLU") { } 229 | 230 | void ReLULayer::computeNeuron(int size){ 231 | double *out = getOutput(); 232 | int numOut = getOutputNumber(); 233 | for(int i = 0; i < size*numOut; i++){ 234 | if(out[i] < 0) out[i] = 0; 235 | } 236 | } 237 | 238 | void ReLULayer::computeNeuronDerivative(double* deriv, int size){ 239 | double *out = getOutput(); 240 | int numOut = getOutputNumber(); 241 | for(int i = 0; i < size*numOut; i++){ 242 | if(out[i] < 0){ 243 | deriv[i] = 0; 244 | }else{ 245 | deriv[i] = 1; 246 | } 247 | } 248 | } 249 | 250 | void ReLULayer::initializeWeight(){ 251 | initializeWeightSigmoid(getWeight(), getInputNumber(), getOutputNumber()); 252 | } 253 | 254 | SoftmaxLayer::SoftmaxLayer(int numIn, int numOut, const char* name) : 255 | MLPLayer(numIn, numOut, name) 256 | { 257 | initializeWeight(); 258 | MLPLayer::initializeBias(); 259 | } 260 | SoftmaxLayer::SoftmaxLayer(FILE* modelFileFd, const char* name) : 261 | MLPLayer(modelFileFd, name) { } 262 | SoftmaxLayer::SoftmaxLayer(const char* file, const char* name) : 263 | MLPLayer(file, name) { } 264 | SoftmaxLayer::SoftmaxLayer(int nIn, int nOut, double* weight, double* bias, const char* name) : 265 | MLPLayer(nIn, nOut, weight, bias, name) { } 266 | 267 | void SoftmaxLayer::computeNeuron(int size){ 268 | double *out = getOutput(); 269 | int numOut = getOutputNumber(); 270 | for(int i = 0; i < size; i++){ 271 | softmax(out+i*numOut, numOut); 272 | } 273 | } 274 | 275 | void SoftmaxLayer::computeNeuronDerivative(double* deriv, int size){ } 276 | 277 | void SoftmaxLayer::initializeWeight(){ 278 | memset(getWeight(), 0, getInputNumber()*getOutputNumber()*sizeof(double)); 279 | } 280 | -------------------------------------------------------------------------------- /old_version/my_mlp.c: -------------------------------------------------------------------------------- 1 | #include"dataset.h" 2 | #include"my_logistic_sgd.h" 3 | 4 | #define MAX_HIDDEN_SIZE 1000 5 | #define MAX_LAYER 10 6 | #define eta 0.01 7 | #define L2_reg 0.0001 8 | 9 | typedef struct hlayer{ 10 | int n_in, n_out; 11 | double **W; 12 | double *b; 13 | } hlayer; 14 | 15 | double grad_b[MAX_HIDDEN_SIZE]; 16 | double **grad_w; 17 | double delta_w[MAX_LAYER][MAX_HIDDEN_SIZE][MAX_HIDDEN_SIZE], delta_b[MAX_LAYER][MAX_HIDDEN_SIZE]; 18 | double prob_y[MAX_HIDDEN_SIZE]; 19 | double target[MAX_HIDDEN_SIZE]; 20 | double **input; 21 | double **output; 22 | double local_output[MAX_LAYER][MAX_HIDDEN_SIZE]; 23 | double d[MAX_LAYER][MAX_HIDDEN_SIZE]; 24 | 25 | void init_hlayer(hlayer *hl, int n_in, int n_out){ 26 | int i, j; 27 | double low, high; 28 | 29 | low = -4 * sqrt((double)6 / (n_in + n_out)); 30 | high = 4 * sqrt((double)6 / (n_in + n_out)); 31 | 32 | hl->n_in = n_in; 33 | hl->n_out = n_out; 34 | hl->W = (double**)malloc(n_out * sizeof(double*)); 35 | for(i = 0; i < n_out; i++){ 36 | hl->W[i] = (double*)malloc(n_in * sizeof(double)); 37 | } 38 | for(i = 0; i < n_out; i++){ 39 | for(j = 0; j < n_in; j++){ 40 | hl->W[i][j] = random_double(low, high); 41 | } 42 | } 43 | hl->b = (double*)calloc(n_out, sizeof(double)); 44 | } 45 | 46 | void free_hlayer(hlayer *hl){ 47 | int i; 48 | for(i = 0; i < hl->n_out; i++) 49 | free(hl->W[i]); 50 | free(hl->W); 51 | free(hl->b); 52 | } 53 | 54 | void get_sigmoid_y_given_x(const hlayer *hl, const double *x, double *y){ 55 | int i, j; 56 | double a, s; 57 | 58 | for(i = 0; i < hl->n_out; i++){ 59 | for(j = 0, a = 0.0; j < hl->n_in; j++){ 60 | a += x[j] * hl->W[i][j]; 61 | } 62 | a += hl->b[i]; 63 | y[i] = sigmoid(a); 64 | } 65 | } 66 | 67 | void get_tanh_y_given_x(const hlayer *hl, const double *x, double *y){ 68 | int i, j; 69 | double a, s; 70 | 71 | for(i = 0; i < hl->n_out; i++){ 72 | for(j = 0, a = 0.0; j < hl->n_in; j++){ 73 | a += x[j] * hl->W[i][j]; 74 | } 75 | a += hl->b[i]; 76 | y[i] = tanh(a); 77 | } 78 | } 79 | 80 | 81 | void get_mlp_delta(const hlayer *hl, const double *y, const double *d_high, double *d_low, int size){ 82 | int i, j; 83 | double derivative, s; 84 | 85 | for(i = 0; i < size; i++){ 86 | derivative = get_sigmoid_derivative(y[i]); 87 | for(j = 0, s = 0.0; j < hl->n_out; j++){ 88 | s += d_high[j] * hl->W[j][i]; 89 | } 90 | d_low[i] = derivative * s; 91 | } 92 | } 93 | 94 | void get_mlp_loss_error(const hlayer **hl, int hidden_layer, const double **x, const uint8_t *t, const int size, double *loss, int *error){ 95 | int i, j, k, l; 96 | int pred_y; 97 | double s; 98 | double max_y; 99 | double **input, **output; 100 | double *y; 101 | 102 | *loss = 0.0; 103 | *error = 0; 104 | bzero(target, sizeof(target)); 105 | input = (double**)malloc((hidden_layer + 1) * sizeof(double*)); 106 | output = (double**)malloc((hidden_layer + 1) * sizeof(double*)); 107 | 108 | /* feed-forward */ 109 | 110 | for(i = 0; i < size; i++){ 111 | for(l = 0; l < hidden_layer + 1; l++){ 112 | if(l == 0){ 113 | input[l] = x[i]; 114 | }else{ 115 | input[l] = output[l-1]; 116 | } 117 | output[l] = local_output[l]; 118 | 119 | if(l == hidden_layer){ 120 | get_softmax_y_given_x((log_reg*)hl[l], input[l], output[l]); 121 | }else{ 122 | get_sigmoid_y_given_x(hl[l], input[l], output[l]); 123 | } 124 | } 125 | y = output[hidden_layer]; 126 | for(j = 0, max_y = -1.0; j < hl[hidden_layer]->n_out; j++){ 127 | if(y[j] > max_y){ 128 | max_y = y[j]; 129 | pred_y = j; 130 | } 131 | } 132 | target[t[i]] = 1.0; 133 | for(j = 0, s = 0.0; j < hl[hidden_layer]->n_out; j++){ 134 | s += target[j] * log(y[j]); 135 | } 136 | target[t[i]] = 0.0; 137 | *loss += s; 138 | *error += (pred_y == t[i] ? 0 : 1); 139 | } 140 | *loss = -(*loss) / size; 141 | 142 | free(input); 143 | free(output); 144 | } 145 | 146 | void train_mlp(){ 147 | int i, j, k, p, q, l; 148 | int train_x_fd, train_y_fd; 149 | int train_set_size = 50000, validate_set_size = 10000; 150 | int mini_batch = 20; 151 | int epcho, n_epcho = 1000; 152 | int hidden_size[] = {500}; 153 | int layer_size[MAX_LAYER]; 154 | int hidden_layer = sizeof(hidden_size) / sizeof(int); 155 | hlayer layers[MAX_LAYER]; 156 | hlayer **mlp_layers; 157 | int n_in, n_out; 158 | int n_labels = 10; 159 | 160 | 161 | uint32_t N, nrow, ncol, magic_n; 162 | dataset train_set, validate_set; 163 | rio_t rio_train_x, rio_train_y; 164 | log_reg m; 165 | 166 | double loss; 167 | int error; 168 | 169 | int param_fd; 170 | rio_t rio_param; 171 | 172 | train_x_fd = open("../data/train-images-idx3-ubyte", O_RDONLY); 173 | train_y_fd = open("../data/train-labels-idx1-ubyte", O_RDONLY); 174 | param_fd = open("log_sgd.param", O_TRUNC | O_WRONLY); 175 | 176 | if(train_x_fd == -1){ 177 | fprintf(stderr, "cannot open train-images-idx3-ubyte\n"); 178 | exit(1); 179 | } 180 | if(train_y_fd == -1){ 181 | fprintf(stderr, "cannot open train-labels-idx1-ubyte\n"); 182 | exit(1); 183 | } 184 | if(param_fd == -1){ 185 | fprintf(stderr, "cannot open log_sgd.param\n"); 186 | exit(1); 187 | } 188 | srand(1234); 189 | 190 | rio_readinitb(&rio_train_x, train_x_fd, 0); 191 | rio_readinitb(&rio_train_y, train_y_fd, 0); 192 | 193 | rio_readinitb(&rio_param, param_fd, 1); 194 | 195 | read_uint32(&rio_train_x, &magic_n); 196 | read_uint32(&rio_train_x, &N); 197 | read_uint32(&rio_train_x, &nrow); 198 | read_uint32(&rio_train_x, &ncol); 199 | 200 | read_uint32(&rio_train_y, &magic_n); 201 | read_uint32(&rio_train_y, &N); 202 | #ifdef DEBUG 203 | printf("magic number: %u\nN: %u\nnrow: %u\nncol: %u\n", magic_n, N, nrow, ncol); 204 | fflush(stdout); 205 | #endif 206 | 207 | init_dataset(&train_set, train_set_size, nrow, ncol); 208 | init_dataset(&validate_set, validate_set_size, nrow, ncol); 209 | 210 | load_dataset_input(&rio_train_x, &train_set); 211 | load_dataset_output(&rio_train_y, &train_set); 212 | 213 | load_dataset_input(&rio_train_x, &validate_set); 214 | load_dataset_output(&rio_train_y, &validate_set); 215 | 216 | /* 初始化每一个hidden layer */ 217 | mlp_layers = (hlayer**)malloc((hidden_layer + 1) * sizeof(hlayer*)); 218 | 219 | for(i = 0; i < hidden_layer + 1; i++){ 220 | mlp_layers[i] = &layers[i]; 221 | 222 | if(i == 0){ 223 | n_in = nrow * ncol; 224 | layer_size[i] = nrow * ncol; 225 | }else{ 226 | n_in = hidden_size[i-1]; 227 | layer_size[i] = hidden_size[i-1]; 228 | } 229 | 230 | if(i == hidden_layer){ 231 | n_out = n_labels; 232 | init_log_reg((log_reg*)mlp_layers[i], n_in, n_out); 233 | }else{ 234 | n_out = hidden_size[i]; 235 | init_hlayer(mlp_layers[i], n_in, n_out); 236 | } 237 | } 238 | layer_size[hidden_layer+1] = n_labels; 239 | 240 | /* input, output作为迭代时的中间量 */ 241 | input = (double**)malloc((hidden_layer + 1) * sizeof(double*)); 242 | output = (double**)malloc((hidden_layer + 1) * sizeof(double*)); 243 | bzero(target, sizeof(target)); 244 | 245 | #ifdef DEBUG 246 | get_mlp_loss_error(mlp_layers, hidden_layer, validate_set.input, validate_set.output, validate_set_size, &loss, &error); 247 | printf("origin loss :%.5lf\t error :%d\n", loss, error); 248 | #endif 249 | 250 | for(epcho = 0; epcho < n_epcho; epcho++){ 251 | 252 | for(k = 0; k < train_set.N / mini_batch; k++){ 253 | bzero(delta_w, sizeof(delta_w)); 254 | bzero(delta_b, sizeof(delta_b)); 255 | 256 | for(i = 0; i < mini_batch; i++){ 257 | 258 | target[train_set.output[k*mini_batch+i]] = 1.0; 259 | /* feed-forward */ 260 | for(l = 0; l < hidden_layer + 1; l++){ 261 | if(l == 0){ 262 | input[l] = train_set.input[k*mini_batch+i]; 263 | }else{ 264 | input[l] = output[l-1]; 265 | } 266 | output[l] = local_output[l]; 267 | 268 | if(l == hidden_layer){ 269 | get_softmax_y_given_x((log_reg*)mlp_layers[l], input[l], output[l]); 270 | }else{ 271 | get_sigmoid_y_given_x(mlp_layers[l], input[l], output[l]); 272 | } 273 | 274 | } 275 | 276 | /* back-propagation */ 277 | for(l = hidden_layer; l >= 0; l--){ 278 | if(l == hidden_layer){ 279 | get_log_reg_delta(output[l], target, d[l], mlp_layers[l]->n_out); 280 | }else{ 281 | get_mlp_delta(mlp_layers[l+1], output[l], d[l+1], d[l], mlp_layers[l]->n_out); 282 | } 283 | 284 | for(j = 0; j < mlp_layers[l]->n_out; j++){ 285 | for(p = 0; p < mlp_layers[l]->n_in; p++){ 286 | delta_w[l][j][p] += d[l][j] * input[l][p] + 2 * L2_reg * mlp_layers[l]->W[j][p]; 287 | } 288 | delta_b[l][j] += d[l][j] + 2 * L2_reg * mlp_layers[l]->b[j]; 289 | } 290 | } 291 | target[train_set.output[k*mini_batch+i]] = 0.0; 292 | } 293 | 294 | /* modify parameter */ 295 | for(l = 0; l < hidden_layer + 1; l++){ 296 | for(j = 0; j < mlp_layers[l]->n_out; j++){ 297 | for(p = 0; p < mlp_layers[l]->n_in; p++){ 298 | mlp_layers[l]->W[j][p] -= eta * delta_w[l][j][p] / mini_batch; 299 | } 300 | mlp_layers[l]->b[j] -= eta * delta_b[l][j] / mini_batch; 301 | } 302 | } 303 | #ifdef DEBUG 304 | //if((k+1) % 500 == 0){ 305 | printf("epcho %d batch %d\n", epcho + 1, k + 1); 306 | //} 307 | #endif 308 | } 309 | #ifdef DEBUG 310 | get_mlp_loss_error(mlp_layers, hidden_layer, validate_set.input, validate_set.output, validate_set_size, &loss, &error); 311 | printf("epcho %d loss :%.5lf\t error :%d\n", epcho + 1, loss, error); 312 | #endif 313 | } 314 | 315 | close(param_fd); 316 | close(train_x_fd); 317 | close(train_y_fd); 318 | 319 | free(input); 320 | free(output); 321 | free_dataset(&train_set); 322 | free_dataset(&validate_set); 323 | for(i = 0; i < hidden_layer + 1; i++) 324 | free_hlayer(mlp_layers[i]); 325 | free(mlp_layers); 326 | } 327 | 328 | int main(){ 329 | train_mlp(); 330 | 331 | return 0; 332 | } 333 | -------------------------------------------------------------------------------- /src/CBLAS/RBM.cpp: -------------------------------------------------------------------------------- 1 | #include "RBM.h" 2 | extern "C" { 3 | #include "cblas.h" 4 | } 5 | 6 | static double temp[maxUnit*maxUnit]; 7 | 8 | RBM::RBM(int numVis, int numHid) 9 | : numVis(numVis), numHid(numHid), chainStart(NULL), persistent(true), 10 | v1(NULL), v2(NULL), pv(NULL), 11 | h1(NULL), h2(NULL), ph1(NULL), ph2(NULL), 12 | weightFile(NULL), UnsuperviseTrainComponent("RBM") 13 | { 14 | weight = new double[numVis*numHid]; 15 | initializeWeightSigmoid(weight, numVis, numHid); 16 | 17 | vbias = new double[numVis]; 18 | hbias = new double[numHid]; 19 | memset(vbias, 0, numVis*sizeof(double)); 20 | memset(hbias, 0, numHid*sizeof(double)); 21 | } 22 | 23 | void RBM::loadModel(FILE* fd) 24 | { 25 | fread(&numVis, sizeof(int), 1, fd); 26 | fread(&numHid, sizeof(int), 1, fd); 27 | printf("numVis : %d\nnumHid : %d\n", numVis, numHid); 28 | 29 | weight = new double[numVis*numHid]; 30 | vbias = new double[numVis]; 31 | hbias = new double[numHid]; 32 | 33 | fread(weight, sizeof(double), numVis*numHid, fd); 34 | fread(vbias, sizeof(double), numVis, fd); 35 | fread(hbias, sizeof(double), numHid, fd); 36 | } 37 | 38 | RBM::RBM(const char *modelFile) : chainStart(NULL), persistent(true), 39 | v1(NULL), v2(NULL), pv(NULL), 40 | h1(NULL), h2(NULL), ph1(NULL), ph2(NULL), 41 | weightFile(NULL), UnsuperviseTrainComponent("RBM") 42 | { 43 | FILE* fd = fopen(modelFile, "rb"); 44 | 45 | if(fd == NULL){ 46 | fprintf(stderr, "cannot open file %s\n", modelFile); 47 | exit(1); 48 | } 49 | loadModel(fd); 50 | fclose(fd); 51 | } 52 | 53 | RBM::RBM(FILE* fd) : chainStart(NULL), persistent(true), 54 | v1(NULL), v2(NULL), pv(NULL), 55 | h1(NULL), h2(NULL), ph1(NULL), ph2(NULL), 56 | weightFile(NULL), UnsuperviseTrainComponent("RBM") 57 | { 58 | loadModel(fd); 59 | } 60 | 61 | RBM::~RBM(){ 62 | delete[] weight; 63 | delete[] vbias; 64 | delete[] hbias; 65 | delete[] weightFile; 66 | freeBuffer(); 67 | } 68 | 69 | void RBM::allocateBuffer(int size){ 70 | if(v2 == NULL) v2 = new double[size*numVis]; 71 | if(h1 == NULL) h1 = new double[size*numHid]; 72 | if(h2 == NULL) h2 = new double[size*numHid]; 73 | if(pv == NULL) pv = new double[size*numVis]; 74 | if(ph1 == NULL) ph1 = new double[size*numHid]; 75 | if(ph2 == NULL) ph2 = new double[size*numHid]; 76 | } 77 | 78 | void RBM::freeBuffer(){ 79 | delete[] v2; 80 | delete[] h1; 81 | delete[] h2; 82 | delete[] pv; 83 | delete[] ph1; 84 | delete[] ph2; 85 | v2 = h1 = h2 = pv = ph1 = ph2 = NULL; 86 | } 87 | 88 | void RBM::beforeTraining(int size){ 89 | allocateBuffer(size); 90 | } 91 | 92 | void RBM::afterTraining(int size){ 93 | TrainComponent::afterTraining(size); 94 | freeBuffer(); 95 | } 96 | 97 | void RBM::trainBatch(int size){ 98 | runChain(size, 1); 99 | updateWeight(size); 100 | updateBias(size); 101 | } 102 | 103 | /** 104 | * @brief 这个函数执行完之后可以通过getOutput获取结果 105 | * 106 | * @param size 107 | */ 108 | void RBM::runBatch(int size){ 109 | if(ph1 == NULL){ 110 | ph1 = new double[size*numHid]; 111 | } 112 | getHProb(v1, ph1, size); 113 | } 114 | 115 | void RBM::runChain(int size, int step){ 116 | getHProb(v1, ph1, size); 117 | getHSample(ph1, h1, size); 118 | if(persistent){ 119 | if(chainStart == NULL){ //PCD 120 | chainStart = h2; 121 | memset(h2, 0, sizeof(double)*numHid*size); 122 | } 123 | gibbsSampleHVH(h2, h2, ph2, v2, pv, step, size); 124 | //gibbsSampleHVH(chainStart, h2, ph2, v2, pv, step, size); 125 | //chainStart = h2; 126 | }else{ 127 | gibbsSampleHVH(h1, h2, ph2, v2, pv, step, size); 128 | } 129 | } 130 | 131 | void RBM::setLearningRate(double lr){ 132 | learningRate = lr; 133 | } 134 | 135 | void RBM::setInput(double *input){ 136 | v1 = input; 137 | } 138 | 139 | void RBM::setWeightFile(const char *weightFile){ 140 | this->weightFile = new char[strlen(weightFile)+1]; 141 | strcpy(this->weightFile, weightFile); 142 | } 143 | 144 | 145 | void RBM::dumpWeight(int numDumpHid, int numVisPerLine){ 146 | if(weightFile == NULL) return; 147 | FILE *weightFd = fopen(weightFile, "a+"); 148 | 149 | for(int i = 0; i < numDumpHid; i++){ 150 | for(int j = 0; j < numVis; j++){ 151 | fprintf(weightFd, "%.5lf%s", weight[i*numVis+j], 152 | ((j+1) % numVisPerLine == 0) ? "\n" : "\t"); 153 | } 154 | } 155 | 156 | fclose(weightFd); 157 | } 158 | 159 | void RBM::getHProb(const double *v, double *ph, const int size){ 160 | int i; 161 | 162 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 163 | size, numHid, numVis, 164 | 1, v, numVis, weight, numVis, 165 | 0, ph, numHid); 166 | 167 | cblas_dger(CblasRowMajor, size, numHid, 168 | 1.0, I(), 1, hbias, 1, ph, numHid); 169 | 170 | for(i = 0; i < size * numHid; i++){ 171 | ph[i] = sigmoid(ph[i]); 172 | } 173 | } 174 | 175 | void RBM::getHSample(const double *ph, double *h, const int size){ 176 | int i; 177 | 178 | for(i = 0; i < size * numHid; i++){ 179 | h[i] = random_double(0, 1) < ph[i] ? 1 : 0; 180 | } 181 | } 182 | 183 | void RBM::getVProb(const double *h, double *pv, const int size){ 184 | 185 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 186 | size, numVis, numHid, 187 | 1, h, numHid, weight, numVis, 188 | 0, pv, numVis); 189 | 190 | cblas_dger(CblasRowMajor, size, numVis, 191 | 1.0, I(), 1, vbias, 1, pv, numVis); 192 | 193 | for(int i = 0; i < size * numVis; i++){ 194 | pv[i] = sigmoid(pv[i]); 195 | } 196 | } 197 | 198 | void RBM::getVSample(const double *pv, double *v, const int size){ 199 | int i; 200 | 201 | for(i = 0; i < size * numVis; i++){ 202 | v[i] = random_double(0, 1) < pv[i] ? 1 : 0; 203 | } 204 | } 205 | 206 | void RBM::gibbsSampleHVH(double *hStart, double *h, double *ph, 207 | double *v, double *pv, const int step, const int size){ 208 | if(h != hStart) 209 | cblas_dcopy(size * numHid, hStart, 1, h, 1); 210 | 211 | for(int i = 0; i < step; i++){ 212 | getVProb(h, pv, size); 213 | getVSample(pv, v, size); 214 | getHProb(v, ph, size); 215 | getHSample(ph, h, size); 216 | } 217 | } 218 | 219 | void RBM::getFE(const double *v, double *FE, const int size){ 220 | int i, j; 221 | 222 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 223 | size, numHid, numVis, 224 | 1, v, numVis, weight, numVis, 225 | 0, temp, numHid); 226 | 227 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 228 | size, numHid, 1, 229 | 1, I(), 1, hbias, 1, 230 | 1, temp, numHid); 231 | 232 | for(i = 0; i < size * numHid; i++){ 233 | temp[i] = log(exp(temp[i]) + 1.0); 234 | } 235 | 236 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 237 | size, 1, numHid, 238 | 1, temp, numHid, I(), 1, 239 | 0, FE, 1); 240 | 241 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 242 | size, 1, numVis, 243 | -1, v, numVis, vbias, 1, 244 | -1, FE, 1); 245 | } 246 | 247 | double RBM::getPL(double *v, const int size){ 248 | static int filp_idx = 0; 249 | int i, j; 250 | double *FE, *FE_filp, *old_val; 251 | double PL = 0.0; 252 | 253 | FE = (double*)malloc(size * sizeof(double)); 254 | FE_filp = (double*)malloc(size * sizeof(double)); 255 | old_val = (double*)malloc(size * sizeof(double)); 256 | 257 | getFE(v, FE, size); 258 | for(i = 0; i < size; i++){ 259 | old_val[i] = v[i*numVis + filp_idx]; 260 | v[i * numVis + filp_idx] = 1 - old_val[i]; 261 | } 262 | getFE(v, FE_filp, size); 263 | for(i = 0; i < size; i++){ 264 | v[i * numVis + filp_idx] = old_val[i]; 265 | } 266 | 267 | for(i = 0; i < size; i++){ 268 | PL += numVis * log(sigmoid(FE_filp[i] - FE[i])); 269 | } 270 | 271 | free(FE); 272 | free(FE_filp); 273 | free(old_val); 274 | 275 | filp_idx = (filp_idx + 1) % numVis; 276 | 277 | return PL / size; 278 | } 279 | 280 | void RBM::updateWeight(int size){ 281 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 282 | numHid, numVis, size, 283 | 1.0, ph2, numHid, v2, numVis, 284 | 0, temp, numVis); 285 | 286 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 287 | numHid, numVis, size, 288 | 1.0, ph1, numHid, v1, numVis, 289 | -1, temp, numVis); 290 | 291 | cblas_dscal(numVis*numHid, 1.0 - 2.0 * L2Reg * learningRate , weight, 1); 292 | 293 | cblas_daxpy(numHid*numVis, learningRate / size, 294 | temp, 1, weight, 1); 295 | } 296 | 297 | void RBM::updateBias(int size){ 298 | // update vbias 299 | cblas_dcopy(numVis*size, v1, 1, temp, 1); 300 | 301 | cblas_daxpy(numVis*size, -1, v2, 1, temp, 1); 302 | 303 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 304 | numVis, 1, size, 305 | learningRate / size, temp, numVis, 306 | I(), 1, 1, vbias, 1); 307 | 308 | // update hbias 309 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 310 | numHid, 1, size, 311 | 1.0, ph2, numHid, I(), 1, 312 | 0, temp, 1); 313 | 314 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 315 | numHid, 1, size, 316 | 1.0, ph1, numHid, I(), 1, 317 | -1, temp, 1); 318 | 319 | cblas_daxpy(numHid, learningRate / size, 320 | temp, 1, hbias, 1); 321 | } 322 | 323 | double RBM::getTrainingCost(int size, int numBatch){ 324 | return getPL(v1, size) / numBatch; 325 | } 326 | 327 | void RBM::operationPerEpoch(){ 328 | dumpWeight(100, 28); 329 | } 330 | 331 | void RBM::saveModel(FILE *fd){ 332 | fwrite(&numVis, sizeof(int), 1, fd); 333 | fwrite(&numHid, sizeof(int), 1, fd); 334 | fwrite(weight, sizeof(double), numVis*numHid, fd); 335 | fwrite(vbias, sizeof(double), numVis, fd); 336 | fwrite(hbias, sizeof(double), numHid, fd); 337 | } 338 | 339 | void RBM::generateSample(const char* sampleFile, double* v, int count){ 340 | FILE* fd = fopen(sampleFile, "w+"); 341 | int chainStep = 10, generateCount = 10; 342 | allocateBuffer(count); 343 | 344 | dumpSample(fd, v, count); 345 | getHProb(v, ph2, count); 346 | getHSample(ph2, h2, count); 347 | for(int i = 0; i < generateCount-1; i++){ 348 | gibbsSampleHVH(h2, h2, ph2, v2, pv, chainStep, count); 349 | dumpSample(fd, pv, count); 350 | } 351 | freeBuffer(); 352 | fclose(fd); 353 | } 354 | 355 | void RBM::dumpSample(FILE* fd, double *v, int count){ 356 | int numVisPerLine = 28; 357 | for(int i = 0; i < count; i++){ 358 | for(int j = 0; j < numVis; j++){ 359 | fprintf(fd, "%.5lf%s", v[i*numVis+j], 360 | ((j+1) % numVisPerLine == 0) ? "\n" : "\t"); 361 | } 362 | } 363 | } 364 | -------------------------------------------------------------------------------- /src/DeepAutoEncoder.cpp: -------------------------------------------------------------------------------- 1 | #include "DeepAutoEncoder.h" 2 | #include 3 | #include "Config.h" 4 | #include "mkl_cblas.h" 5 | #include "Utility.h" 6 | 7 | static double temp[maxUnit*maxUnit]; 8 | static double temp2[maxUnit*maxUnit]; 9 | 10 | EncoderLayer::EncoderLayer(int numIn, int numOut) : 11 | numIn(numIn), numOut(numOut), y(NULL), h(NULL), 12 | dy(NULL), dh(NULL), binIn(true), binOut(true) 13 | { 14 | w = new double[numIn*numOut]; 15 | dw = new double[numIn*numOut]; 16 | b = new double[numOut]; 17 | c = new double[numIn]; 18 | memset(b, 0, numOut*sizeof(double)); 19 | memset(c, 0, numIn*sizeof(double)); 20 | initializeWeightSigmoid(w, numIn, numOut); 21 | } 22 | 23 | EncoderLayer::EncoderLayer(int numIn, int numOut, double *w, double *b, double *c) : 24 | numIn(numIn), numOut(numOut), y(NULL), h(NULL), 25 | dy(NULL), dh(NULL), binIn(true), binOut(true) 26 | { 27 | this->w = new double[numIn*numOut]; 28 | this->dw = new double[numIn*numOut]; 29 | this->b = new double[numOut]; 30 | this->c = new double[numIn]; 31 | cblas_dcopy(numIn*numOut, w, 1, this->w, 1); 32 | cblas_dcopy(numIn, c, 1, this->c, 1); 33 | cblas_dcopy(numOut, b, 1, this->b, 1); 34 | } 35 | 36 | EncoderLayer::~EncoderLayer(){ 37 | delete[] w; 38 | delete[] dw; 39 | delete[] b; 40 | delete[] c; 41 | delete[] y; 42 | delete[] h; 43 | delete[] dy; 44 | delete[] dh; 45 | } 46 | 47 | void EncoderLayer::allocate(int size){ 48 | if(y == NULL) y = new double[size*numIn]; 49 | if(h == NULL) h = new double[size*numOut]; 50 | if(dy == NULL) dy = new double[size*numIn]; 51 | if(dh == NULL) dh = new double[size*numOut]; 52 | } 53 | 54 | void EncoderLayer::getHFromX(double *x, double *h, int size){ 55 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 56 | size, numOut, numIn, 57 | 1, x, numIn, w, numOut, 58 | 0, h, numOut); 59 | 60 | cblas_dger(CblasRowMajor, size, numOut, 61 | 1.0, I(), 1, b, 1, h, numOut); 62 | 63 | if(binOut){ 64 | for(int i = 0; i < size * numOut; i++){ 65 | h[i] = sigmoid(h[i]); 66 | } 67 | } 68 | } 69 | 70 | void EncoderLayer::getYFromH(double *h, double *y, int size){ 71 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 72 | size, numIn, numOut, 73 | 1, h, numOut, w, numOut, 74 | 0, y, numIn); 75 | 76 | cblas_dger(CblasRowMajor, size, numIn, 77 | 1.0, I(), 1, c, 1, y, numIn); 78 | 79 | if(binIn){ 80 | for(int i = 0; i < size * numIn; i++){ 81 | y[i] = sigmoid(y[i]); 82 | } 83 | } 84 | } 85 | 86 | void EncoderLayer::getYDeriv(EncoderLayer* prev, int size){ 87 | if(prev == NULL){ 88 | if(binIn){ 89 | for(int i = 0; i < size * numIn; i++){ 90 | dy[i] = y[i] - x[i]; 91 | } 92 | }else{ 93 | for(int i = 0; i < size * numIn; i++){ 94 | dy[i] = (y[i] - x[i]) / ((1.0 - y[i] + 1e-10) * (y[i] + 1e-10)); 95 | } 96 | } 97 | }else{ 98 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 99 | size, prev->numOut, prev->numIn, 100 | 1, prev->dy, prev->numIn, prev->w, prev->numOut, 101 | 0, dy, prev->numOut); 102 | 103 | if(binIn){ 104 | for(int i = 0; i < size * numIn; i++){ 105 | dy[i] = get_sigmoid_derivative(y[i]) * dy[i]; 106 | } 107 | } 108 | } 109 | } 110 | 111 | void EncoderLayer::getHDeriv(EncoderLayer* prev, int size){ 112 | if(prev == this){ 113 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 114 | size, numOut, numIn, 115 | 1, dy, numIn, w, numOut, 116 | 0, dh, numOut); 117 | 118 | if(binOut){ 119 | for(int i = 0; i < size * numOut; i++){ 120 | dh[i] = get_sigmoid_derivative(h[i]) * dh[i]; 121 | } 122 | } 123 | }else{ 124 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 125 | size, prev->numIn, prev->numOut, 126 | 1, prev->dh, prev->numOut, prev->w, prev->numOut, 127 | 0, dh, prev->numIn); 128 | 129 | if(binOut){ 130 | for(int i = 0; i < size * numOut; i++){ 131 | dh[i] = get_sigmoid_derivative(h[i]) * dh[i]; 132 | } 133 | } 134 | } 135 | } 136 | 137 | void EncoderLayer::getDeltaFromYDeriv(double* prevY, int size){ 138 | 139 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 140 | numIn, numOut, size, 141 | -1.0 * lr / size, dy, numIn, prevY, numOut, 142 | 0, dw, numOut); 143 | 144 | // update y bias 145 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 146 | numIn, 1, size, 147 | -1.0 * lr / size, dy, numIn, 148 | I(), 1, 1, c, 1); 149 | } 150 | 151 | void EncoderLayer::getDeltaFromHDeriv(int size){ 152 | 153 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 154 | numIn, numOut, size, 155 | -1.0 * lr / size, x, numIn, dh, numOut, 156 | 1, dw, numOut); 157 | 158 | // update h bias 159 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 160 | numOut, 1, size, 161 | -1.0 * lr / size, dh, numOut, 162 | I(), 1, 1, b, 1); 163 | 164 | // update w 165 | cblas_daxpy(numIn*numOut, 1, dw, 1, w, 1); 166 | } 167 | 168 | void EncoderLayer::getWeightTrans(double* transWeight){ 169 | for(int i = 0; i < numIn; i++) 170 | for(int j = 0; j < numOut; j++) 171 | transWeight[j*numIn+i] = w[i*numOut+j]; 172 | } 173 | 174 | 175 | DeepAutoEncoder::DeepAutoEncoder() : UnsuperviseTrainComponent("DeepAutoEncoder") 176 | { 177 | 178 | } 179 | 180 | DeepAutoEncoder::DeepAutoEncoder(int n, int* sizes) : 181 | UnsuperviseTrainComponent("DeepAutoEncoder"), numLayer(n) 182 | { 183 | for(int i = 0; i < n; i++){ 184 | layers[i] = new EncoderLayer(sizes[i], sizes[i+1]); 185 | } 186 | layers[n-1]->binOut = false; 187 | } 188 | 189 | DeepAutoEncoder::DeepAutoEncoder(MultiLayerRBM& multirbm): 190 | UnsuperviseTrainComponent("DeepAutoEncoder") 191 | { 192 | numLayer = multirbm.getLayerNumber(); 193 | for(int i = 0; i < numLayer; i++){ 194 | int nIn = multirbm[i]->getInputNumber(); 195 | int nOut = multirbm[i]->getOutputNumber(); 196 | double* trans = new double[nIn*nOut]; 197 | multirbm[i]->getWeightTrans(trans); 198 | layers[i] = new EncoderLayer(nIn, nOut, trans, 199 | multirbm[i]->getHBias(), multirbm[i]->getVBias()); 200 | delete[] trans; 201 | } 202 | layers[numLayer-1]->binOut = false; 203 | } 204 | 205 | DeepAutoEncoder::DeepAutoEncoder(const char* modelFile) : 206 | UnsuperviseTrainComponent("DeepAutoEncoder") 207 | { 208 | FILE* fd = fopen(modelFile, "rb"); 209 | 210 | fread(&numLayer, sizeof(int), 1, fd); 211 | for(int i = 0; i < numLayer; i++){ 212 | int numIn, numOut; 213 | fread(&numIn, sizeof(int), 1, fd); 214 | fread(&numOut, sizeof(int), 1, fd); 215 | layers[i] = new EncoderLayer(numIn, numOut); 216 | fread(layers[i]->w, sizeof(double), layers[i]->numIn*layers[i]->numOut, fd); 217 | fread(layers[i]->c, sizeof(double), layers[i]->numIn, fd); 218 | fread(layers[i]->b, sizeof(double), layers[i]->numOut, fd); 219 | } 220 | layers[numLayer-1]->binOut = false; 221 | fclose(fd); 222 | } 223 | 224 | DeepAutoEncoder::~DeepAutoEncoder(){ 225 | for(int i = 0; i < numLayer; i++) 226 | delete layers[i]; 227 | } 228 | 229 | void DeepAutoEncoder::beforeTraining(int size){ 230 | for(int i = 0; i < numLayer; i++){ 231 | layers[i]->allocate(size); 232 | } 233 | } 234 | 235 | void DeepAutoEncoder::trainBatch(int size){ 236 | forward(size); 237 | backpropagate(size); 238 | } 239 | 240 | void DeepAutoEncoder::runBatch(int size){ 241 | forward(size); 242 | } 243 | 244 | void DeepAutoEncoder::setLearningRate(double lr){ 245 | for(int i = 0; i < numLayer; i++){ 246 | layers[i]->lr = lr; 247 | } 248 | layers[numLayer-1]->lr = lr * 0.01; 249 | } 250 | 251 | void DeepAutoEncoder::setInput(double *input){ 252 | layers[0]->setInput(input); 253 | } 254 | 255 | void DeepAutoEncoder::setLabel(double *label){} 256 | 257 | int DeepAutoEncoder::getInputNumber(){ 258 | return layers[0]->numIn; 259 | } 260 | 261 | double* DeepAutoEncoder::getOutput(){ 262 | return layers[numLayer-1]->h; 263 | } 264 | 265 | int DeepAutoEncoder::getOutputNumber(){ 266 | return layers[numLayer-1]->numOut; 267 | } 268 | 269 | double* DeepAutoEncoder::getLabel() { return NULL; } 270 | 271 | double DeepAutoEncoder::getReconstructCost(double *x, double *y, int numIn, int size){ 272 | double res; 273 | 274 | for(int i = 0; i < numIn * size; i++){ 275 | temp[i] = x[i] >= 1.0 ? 1.0 : x[i]; 276 | temp2[i] = log(y[i] + 1e-10); 277 | } 278 | res = cblas_ddot(size * numIn, temp, 1, temp2, 1); 279 | 280 | for(int i = 0; i < numIn * size; i++){ 281 | temp[i] = 1.0 - x[i] >= 1.0 ? 1.0 : 1.0 - x[i]; 282 | temp2[i] = log(1.0 - y[i] + 1e-10); 283 | } 284 | 285 | res += cblas_ddot(size * numIn, temp, 1, temp2, 1); 286 | return -1.0 * res / size; 287 | } 288 | 289 | double DeepAutoEncoder::getTrainingCost(int size, int numBatch){ 290 | return getReconstructCost(layers[0]->x, layers[0]->y, layers[0]->numIn, size); 291 | } 292 | 293 | void DeepAutoEncoder::saveModel(FILE* fd){ 294 | fwrite(&numLayer, sizeof(int), 1, fd); 295 | for(int i = 0; i < numLayer; i++){ 296 | fwrite(&layers[i]->numIn, sizeof(int), 1, fd); 297 | fwrite(&layers[i]->numOut, sizeof(int), 1, fd); 298 | fwrite(layers[i]->w, sizeof(double), layers[i]->numIn*layers[i]->numOut, fd); 299 | fwrite(layers[i]->c, sizeof(double), layers[i]->numIn, fd); 300 | fwrite(layers[i]->b, sizeof(double), layers[i]->numOut, fd); 301 | } 302 | }; 303 | void DeepAutoEncoder::operationPerEpoch(){} 304 | 305 | void DeepAutoEncoder::forward(int size){ 306 | for(int i = 0; i < numLayer; i++){ 307 | if(i != 0){ 308 | layers[i]->setInput(layers[i-1]->h); 309 | } 310 | layers[i]->getHFromX(layers[i]->x, layers[i]->h, size); 311 | } 312 | for(int i = numLayer-1; i >= 0; i--){ 313 | if(i == numLayer-1){ 314 | layers[i]->getYFromH(layers[i]->h, layers[i]->y, size); 315 | }else{ 316 | layers[i]->getYFromH(layers[i+1]->y, layers[i]->y, size); 317 | } 318 | } 319 | } 320 | 321 | void DeepAutoEncoder::backpropagate(int size){ 322 | for(int i = 0; i < numLayer; i++){ 323 | if(i == 0){ 324 | layers[i]->getYDeriv(NULL, size); 325 | }else{ 326 | layers[i]->getYDeriv(layers[i-1], size); 327 | } 328 | 329 | if(i != numLayer-1){ 330 | layers[i]->getDeltaFromYDeriv(layers[i+1]->y, size); 331 | }else{ 332 | layers[i]->getDeltaFromYDeriv(layers[i]->h, size); 333 | } 334 | } 335 | 336 | for(int i = numLayer-1; i >= 0; i--){ 337 | if(i == numLayer-1){ 338 | layers[i]->getHDeriv(layers[i], size); 339 | }else{ 340 | layers[i]->getHDeriv(layers[i+1], size); 341 | } 342 | 343 | layers[i]->getDeltaFromHDeriv(size); 344 | } 345 | } 346 | 347 | 348 | -------------------------------------------------------------------------------- /old_version/rsm_blas.c: -------------------------------------------------------------------------------- 1 | #include"dataset.h" 2 | #include"cblas.h" 3 | #include 4 | 5 | #define MAXUNIT 2000 6 | #define MAXBATCH_SIZE 100 7 | 8 | double v2[MAXUNIT*MAXBATCH_SIZE], h1[MAXUNIT*MAXBATCH_SIZE], h2[MAXUNIT*MAXBATCH_SIZE]; 9 | double pv[MAXUNIT*MAXBATCH_SIZE], s[MAXBATCH_SIZE]; 10 | double I[MAXUNIT*MAXBATCH_SIZE]; 11 | double w_u[MAXUNIT*MAXUNIT], bh_u[MAXUNIT], bv_u[MAXUNIT]; 12 | //temporary variable 13 | double a[MAXUNIT*MAXBATCH_SIZE], b[MAXUNIT*MAXBATCH_SIZE]; 14 | 15 | typedef struct rsm{ 16 | int nvisible, nhidden; 17 | double *w; 18 | double *bv, *bh; 19 | } rsm; 20 | 21 | void init_rsm(rsm *m, int nvisible, int nhidden){ 22 | double low, high; 23 | int i, j; 24 | 25 | low = -4 * sqrt((double)6 / (nvisible + nhidden)); 26 | high = 4 * sqrt((double)6 / (nvisible + nhidden)); 27 | 28 | m->nhidden = nhidden; 29 | m->nvisible = nvisible; 30 | m->w = (double*)malloc(nhidden * nvisible * sizeof(double)); 31 | for(i = 0; i < nvisible * nhidden; i++){ 32 | m->w[i] = random_double(low, high); 33 | } 34 | m->bv = (double*)calloc(nvisible, sizeof(double)); 35 | m->bh = (double*)calloc(nhidden, sizeof(double)); 36 | } 37 | 38 | void free_rsm(rsm *m){ 39 | free(m->w); 40 | free(m->bv); 41 | free(m->bh); 42 | } 43 | 44 | void get_hidden(rsm *m, double *v, double *h, double *s, int batch_size){ 45 | int i; 46 | double *t; 47 | 48 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 49 | batch_size, m->nhidden, m->nvisible, 50 | 1, v, m->nvisible, m->w, m->nhidden, 51 | 0, h, m->nhidden); 52 | 53 | if(s != NULL) 54 | t = s; 55 | else 56 | t = a; 57 | //sum each v[i] in a batch 58 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 59 | batch_size, 1, m->nvisible, 60 | 1, v, m->nvisible, I, 1, 61 | 0, t, 1); 62 | 63 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 64 | batch_size, m->nhidden, 1, 65 | 1, t, 1, m->bh, 1, 66 | 1, h, m->nhidden); 67 | 68 | for(i = 0; i < batch_size * m->nhidden; i++){ 69 | h[i] = sigmoid(h[i]); 70 | } 71 | } 72 | 73 | void get_visible(rsm *m, double *h, double *pv, int batch_size){ 74 | int i; 75 | double sum; 76 | 77 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 78 | batch_size, m->nvisible, m->nhidden, 79 | 1, h, m->nhidden, m->w, m->nhidden, 80 | 0, pv, m->nvisible); 81 | 82 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, 83 | batch_size, m->nvisible, 1, 84 | 1, I, 1, m->bv, 1, 85 | 1, pv, m->nvisible); 86 | 87 | for(i = 0; i < batch_size * m->nvisible; i++){ 88 | pv[i] = exp(pv[i]); 89 | } 90 | 91 | //sum 92 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 93 | batch_size, 1, m->nvisible, 94 | 1, pv, m->nvisible, I, 1, 95 | 0, a, 1); 96 | 97 | for(i = 0; i < batch_size; i++){ 98 | cblas_dscal(m->nvisible, 1.0 / a[i], pv + i * m->nvisible, 1); 99 | //printf("sum:%.2lf\n", cblas_dasum(m->nvisible, pv + i * m->nvisible, 1)); 100 | } 101 | 102 | } 103 | 104 | void sample_visible(rsm *m, double *pv, double *s, double *v, int batch_size){ 105 | int i, j; 106 | int *o; 107 | float *pv_f; 108 | 109 | pv_f = (float*)malloc(batch_size * m->nvisible * sizeof(float)); 110 | for(i = 0; i < batch_size * m->nvisible; i++) 111 | pv_f[i] = pv[i]; 112 | 113 | for(i = 0; i < batch_size; i++){ 114 | o = genmul(s[i], pv_f + m->nvisible * i, m->nvisible); 115 | for(j = 0; j < m->nvisible; j++){ 116 | v[m->nvisible * i + j] = o[j]; 117 | } 118 | free(o); 119 | } 120 | free(pv_f); 121 | } 122 | 123 | void cdn(rsm *m, double *v1, double *h1, double *v2, double *h2, 124 | int batch_size){ 125 | 126 | } 127 | 128 | double get_likelihood(rsm *m, double *v1, double *pv, int batch_size){ 129 | int i; 130 | double lik; 131 | 132 | for(i = 0; i < batch_size * m->nvisible; i++) 133 | a[i] = log(pv[i]); 134 | lik = cblas_ddot(batch_size * m->nvisible, 135 | v1, 1, a, 1); 136 | return(lik); 137 | } 138 | 139 | void dump_model(rsm *m, char *filename){ 140 | rio_t rio_model; 141 | int fd_model; 142 | 143 | if((fd_model = open(filename, O_WRONLY | O_TRUNC | O_CREAT, S_IRWXU)) == -1){ 144 | printf("open %s failed\n", filename); 145 | } 146 | rio_readinitb(&rio_model, fd_model, 1); 147 | rio_writenb(&rio_model, &m->nvisible, sizeof(int)); 148 | rio_writenb(&rio_model, &m->nhidden, sizeof(int)); 149 | rio_writenb(&rio_model, m->w, m->nvisible * m->nhidden * sizeof(double)); 150 | rio_writenb(&rio_model, m->bv, m->nvisible * sizeof(double)); 151 | rio_writenb(&rio_model, m->bh, m->nhidden * sizeof(double)); 152 | close(fd_model); 153 | } 154 | 155 | void load_model(rsm *m, char *filename){ 156 | rio_t rio_model; 157 | int fd_model; 158 | 159 | if((fd_model = open(filename, O_RDONLY)) == -1){ 160 | printf("open %s failed\n", filename); 161 | } 162 | rio_readinitb(&rio_model, fd_model, 0); 163 | rio_readnb(&rio_model, &m->nvisible, sizeof(int)); 164 | rio_readnb(&rio_model, &m->nhidden, sizeof(int)); 165 | m->w = (double*)malloc(m->nhidden * m->nvisible * sizeof(double)); 166 | m->bv = (double*)malloc(m->nvisible * sizeof(double)); 167 | m->bh = (double*)malloc(m->nhidden * sizeof(double)); 168 | rio_readnb(&rio_model, m->w, m->nvisible * m->nhidden * sizeof(double)); 169 | rio_readnb(&rio_model, m->bv, m->nvisible * sizeof(double)); 170 | rio_readnb(&rio_model, m->bh, m->nhidden * sizeof(double)); 171 | close(fd_model); 172 | } 173 | 174 | void print_prob(rsm *m, dataset_blas *train_set, char *filename){ 175 | int niter, batch_size; 176 | int minibatch = 10; 177 | FILE *f; 178 | double *v1; 179 | int i, j, k; 180 | 181 | if((f = fopen(filename, "w+")) == NULL){ 182 | printf("open %s failed\n", filename); 183 | } 184 | niter = (train_set->N-1)/minibatch + 1; 185 | for(k = 0; k < niter; k++){ 186 | if(k == niter - 1){ 187 | batch_size = train_set->N - minibatch * (niter-1); 188 | }else{ 189 | batch_size = minibatch; 190 | } 191 | v1 = train_set->input + train_set->n_feature * minibatch * k; 192 | get_hidden(m, v1, h1, s, batch_size); 193 | get_visible_prob(m, h1, pv, batch_size); 194 | 195 | for(i = 0; i < batch_size; i++){ 196 | for(j = 0; j < m->nvisible; j++){ 197 | fprintf(f, "%.5lf%s", pv[i * m->nvisible + j], 198 | (j == (m->nvisible-1)) ? "\n" : "\t"); 199 | } 200 | } 201 | } 202 | 203 | fclose(f); 204 | } 205 | 206 | void train_rsm(dataset_blas *train_set, int nvisible, int nhidden, int epoch, double lr, 207 | int minibatch, double momentum, char *model_file){ 208 | rsm m; 209 | int i, j, k; 210 | int nepoch; 211 | double *v1; 212 | int batch_size, niter; 213 | int wc; 214 | double lik; 215 | double delta; 216 | int *idx; 217 | 218 | init_rsm(&m, nvisible, nhidden); 219 | 220 | wc = (int)cblas_dasum(train_set->N * train_set->n_feature, train_set->input, 1); 221 | v1 = (double*)malloc(minibatch * nvisible * sizeof(double)); 222 | idx = (int*)malloc(train_set->N * sizeof(int)); 223 | for(i = 0; i < train_set->N; i++) 224 | idx[i] = i; 225 | bzero(w_u, sizeof(w_u)); 226 | bzero(bh_u, sizeof(bh_u)); 227 | bzero(bv_u, sizeof(bv_u)); 228 | 229 | for(nepoch = 0; nepoch < epoch; nepoch++){ 230 | 231 | niter = (train_set->N-1)/minibatch + 1; 232 | lik = 0; 233 | delta = lr / (1.0*niter); 234 | shuffle(idx, train_set->N); 235 | 236 | for(k = 0; k < niter; k++){ 237 | if(k == niter - 1){ 238 | batch_size = train_set->N - minibatch * (niter-1); 239 | }else{ 240 | batch_size = minibatch; 241 | } 242 | //v1 = train_set->input + train_set->n_feature * minibatch * k; 243 | for(i = 0; i < batch_size; i++){ 244 | cblas_dcopy(m.nvisible, train_set->input + m.nvisible * idx[k*minibatch+i], 245 | 1, v1 + m.nvisible * i, 1); 246 | } 247 | get_hidden(&m, v1, y1, h1, batch_size); 248 | get_visible_prob(&m, h1, pv, batch_size); 249 | sample_visible(&m, pv, s, v2, batch_size); 250 | get_hidden(&m, v2, h2, NULL, batch_size); 251 | 252 | lik += get_likelihood(&m, v1, pv, batch_size); 253 | 254 | //update w_u 255 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 256 | m.nvisible, m.nhidden, batch_size, 257 | 1, v2, m.nvisible, h2, m.nhidden, 258 | 0, a, m.nhidden); 259 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 260 | m.nvisible, m.nhidden, batch_size, 261 | 1, v1, m.nvisible, h1, m.nhidden, 262 | -1, a, m.nhidden); 263 | cblas_daxpy(m.nvisible * m.nhidden, momentum, w_u, 1, 264 | a, 1); 265 | cblas_dcopy(m.nvisible * m.nhidden, a, 1, w_u, 1); 266 | 267 | //update bv_u 268 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 269 | m.nvisible, 1, batch_size, 270 | 1, v2, m.nvisible, I, 1, 271 | 0, a, 1); 272 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 273 | m.nvisible, 1, batch_size, 274 | 1, v1, m.nvisible, I, 1, 275 | -1, a, 1); 276 | cblas_daxpy(m.nvisible, momentum, bv_u, 1, 277 | a, 1); 278 | cblas_dcopy(m.nvisible, a, 1, bv_u, 1); 279 | 280 | //update bh_u 281 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 282 | m.nhidden, 1, batch_size, 283 | 1, h2, m.nhidden, I, 1, 284 | 0, a, 1); 285 | cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 286 | m.nhidden, 1, batch_size, 287 | 1, h1, m.nhidden, I, 1, 288 | -1, a, 1); 289 | cblas_daxpy(m.nhidden, momentum, bh_u, 1, 290 | a, 1); 291 | cblas_dcopy(m.nhidden, a, 1, bh_u, 1); 292 | 293 | cblas_daxpy(m.nvisible * m.nhidden, delta, w_u, 1, 294 | m.w, 1); 295 | cblas_daxpy(m.nvisible, delta, bv_u, 1, 296 | m.bv, 1); 297 | cblas_daxpy(m.nhidden, delta, bh_u, 1, 298 | m.bh, 1); 299 | } 300 | 301 | printf("[epoch %d] ppl:%.2lf\n", nepoch + 1, exp(-lik / wc)); 302 | } 303 | dump_model(&m, model_file); 304 | print_prob(&m, train_set, "../data/rsm/test.prob"); 305 | 306 | free(v1); 307 | free_rsm(&m); 308 | } 309 | 310 | void test_rsm(){ 311 | dataset_blas train_set; 312 | 313 | int nhidden = 50; 314 | int epoch = 1000; 315 | double lr = 0.001; 316 | int minibatch = 10; 317 | double momentum = 0.9; 318 | 319 | //load_corpus("../data/tcga.train", &train_set); 320 | load_corpus("../data/train", &train_set); 321 | train_rsm(&train_set, train_set.n_feature, nhidden, epoch, lr, 322 | minibatch, momentum, "../data/rsm/test.model"); 323 | free_dataset_blas(&train_set); 324 | } 325 | 326 | int init(){ 327 | srand(1234); 328 | int i; 329 | for(i = 0; i < MAXUNIT*MAXBATCH_SIZE; i++) 330 | I[i] = 1; 331 | } 332 | 333 | int main(){ 334 | init(); 335 | test_rsm(); 336 | } 337 | -------------------------------------------------------------------------------- /old_version/TravelingSalesmanProblem.c: -------------------------------------------------------------------------------- 1 | #if 0 2 | Neural Networks Source Codes - The Boltzmann Machine 3 | -------------------------------------------------------------------------------- 4 | 5 | The Boltzmann Machine 6 | 7 | -------------------------------------------------------------------------------- 8 | This program is copyright (c) 1996 by the author. It is made available as is, 9 | and no warranty - about the program, its performance, or its conformity to any 10 | specification - is given or implied. It may be used, modified, and distributed 11 | freely for private and commercial purposes, as long as the original author is 12 | credited as part of the final work. 13 | 14 | Boltzmann Machine Simulator 15 | 16 | #endif 17 | 18 | /****************************************************************************** 19 | 20 | ========================================== 21 | Network: Boltzmann Machine with Simulated Annealing 22 | ========================================== 23 | 24 | Application: Optimization 25 | Traveling Salesman Problem 26 | 27 | Author: Karsten Kutza 28 | Date: 21.2.96 29 | 30 | Reference: D.H. Ackley, G.E. Hinton, T.J. Sejnowski 31 | A Learning Algorithm for Boltzmann Machines 32 | Cognitive Science, 9, pp. 147-169, 1985 33 | 34 | ******************************************************************************/ 35 | 36 | /****************************************************************************** 37 | D E C L A R A T I O N S 38 | ******************************************************************************/ 39 | 40 | #include 41 | #include 42 | #include 43 | 44 | typedef int BOOL; 45 | typedef int INT; 46 | typedef double REAL; 47 | 48 | #define FALSE 0 49 | #define TRUE 1 50 | #define NOT ! 51 | #define AND && 52 | #define OR || 53 | 54 | #define PI (2*asin(1)) 55 | #define sqr(x) ((x)*(x)) 56 | 57 | typedef struct { /* A NET: */ 58 | INT Units; /* - number of units in this net */ 59 | BOOL* Output; /* - output of ith unit */ 60 | INT* On; /* - counting on states in equilibrium */ 61 | INT* Off; /* - counting off states in equilibrium */ 62 | REAL* Threshold; /* - threshold of ith unit */ 63 | REAL** Weight; /* - connection weights to ith unit */ 64 | REAL Temperature; /* - actual temperature */ 65 | } NET; 66 | 67 | /****************************************************************************** 68 | R A N D O M S D R A W N F R O M D I S T R I B U T I O N S 69 | ******************************************************************************/ 70 | 71 | void InitializeRandoms() 72 | { 73 | srand(4711); 74 | } 75 | 76 | BOOL RandomEqualBOOL() 77 | { 78 | return rand() % 2; 79 | } 80 | 81 | INT RandomEqualINT(INT Low, INT High) 82 | { 83 | return rand() % (High-Low+1) + Low; 84 | } 85 | 86 | REAL RandomEqualREAL(REAL Low, REAL High) 87 | { 88 | return ((REAL) rand() / RAND_MAX) * (High-Low) + Low; 89 | } 90 | 91 | /****************************************************************************** 92 | A P P L I C A T I O N - S P E C I F I C C O D E 93 | ******************************************************************************/ 94 | 95 | #define NUM_CITIES 10 96 | #define N (NUM_CITIES * NUM_CITIES) 97 | 98 | REAL Gamma; 99 | REAL Distance[NUM_CITIES][NUM_CITIES]; 100 | 101 | FILE* f; 102 | 103 | void InitializeApplication(NET* Net) 104 | { 105 | INT n1,n2; 106 | REAL x1,x2,y1,y2; 107 | REAL Alpha1, Alpha2; 108 | 109 | Gamma = 7; 110 | for (n1=0; n1Weight[i][j] = Weight; 148 | } 149 | } 150 | Net->Threshold[i] = -Gamma/2; 151 | } 152 | } 153 | } 154 | 155 | BOOL ValidTour(NET* Net) 156 | { 157 | INT n1,n2; 158 | INT Cities, Stops; 159 | 160 | for (n1=0; n1Output[n1*NUM_CITIES+n2]) { 165 | if (++Cities > 1) 166 | return FALSE; 167 | } 168 | if (Net->Output[n2*NUM_CITIES+n1]) { 169 | if (++Stops > 1) 170 | return FALSE; 171 | } 172 | } 173 | if ((Cities != 1) OR (Stops != 1)) 174 | return FALSE; 175 | } 176 | return TRUE; 177 | } 178 | 179 | REAL LengthOfTour(NET* Net) 180 | { 181 | INT n1,n2,n3; 182 | REAL Length; 183 | 184 | Length = 0; 185 | for (n1=0; n1Output[((n1) % NUM_CITIES)*NUM_CITIES+n2]) 188 | break; 189 | } 190 | for (n3=0; n3Output[((n1+1) % NUM_CITIES)*NUM_CITIES+n3]) 192 | break; 193 | } 194 | Length += Distance[n2][n3]; 195 | } 196 | return Length; 197 | } 198 | 199 | void WriteTour(NET* Net) 200 | { 201 | INT n1,n2; 202 | BOOL First; 203 | 204 | if (ValidTour(Net)) 205 | fprintf(f, "%11.6f yes %6.3f ", Net->Temperature, 206 | LengthOfTour(Net)); 207 | else 208 | fprintf(f, "%11.6f no ", Net->Temperature); 209 | 210 | for (n1=0; n1Output[n1*NUM_CITIES+n2]) { 215 | if (First) { 216 | First = FALSE; 217 | fprintf(f, "%d", n2); 218 | } 219 | else { 220 | fprintf(f, ", %d", n2); 221 | } 222 | } 223 | } 224 | fprintf(f, "]"); 225 | if (n1 != NUM_CITIES-1) { 226 | fprintf(f, " -> "); 227 | } 228 | } 229 | fprintf(f, "\n"); 230 | } 231 | 232 | void FinalizeApplication(NET* Net) 233 | { 234 | fclose(f); 235 | } 236 | 237 | /****************************************************************************** 238 | I N I T I A L I Z A T I O N 239 | ******************************************************************************/ 240 | 241 | void GenerateNetwork(NET* Net) 242 | { 243 | INT i; 244 | 245 | Net->Units = N; 246 | Net->Output = (BOOL*) calloc(N, sizeof(BOOL)); 247 | Net->On = (INT*) calloc(N, sizeof(INT)); 248 | Net->Off = (INT*) calloc(N, sizeof(INT)); 249 | Net->Threshold = (REAL*) calloc(N, sizeof(REAL)); 250 | Net->Weight = (REAL**) calloc(N, sizeof(REAL*)); 251 | 252 | for (i=0; iWeight[i] = (REAL*) calloc(N, sizeof(REAL)); 254 | } 255 | } 256 | 257 | void SetRandom(NET* Net) 258 | { 259 | INT i; 260 | 261 | for (i=0; iUnits; i++) { 262 | Net->Output[i] = RandomEqualBOOL(); 263 | } 264 | } 265 | 266 | /****************************************************************************** 267 | P R O P A G A T I N G S I G N A L S 268 | ******************************************************************************/ 269 | 270 | void PropagateUnit(NET* Net, INT i) 271 | { 272 | INT j; 273 | REAL Sum, Probability; 274 | 275 | Sum = 0; 276 | for (j=0; jUnits; j++) { 277 | Sum += Net->Weight[i][j] * Net->Output[j]; 278 | } 279 | Sum -= Net->Threshold[i]; 280 | Probability = 1 / (1 + exp(-Sum / Net->Temperature)); 281 | if (RandomEqualREAL(0, 1) <= Probability) 282 | Net->Output[i] = TRUE; 283 | else 284 | Net->Output[i] = FALSE; 285 | } 286 | 287 | /****************************************************************************** 288 | S I M U L A T I N G T H E N E T 289 | ******************************************************************************/ 290 | 291 | void BringToThermalEquilibrium(NET* Net) 292 | { 293 | INT n,i; 294 | 295 | for (i=0; iUnits; i++) { 296 | Net->On[i] = 0; 297 | Net->Off[i] = 0; 298 | } 299 | 300 | for (n=0; n<1000*Net->Units; n++) { 301 | PropagateUnit(Net, i = RandomEqualINT(0, Net->Units-1)); 302 | } 303 | for (n=0; n<100*Net->Units; n++) { 304 | PropagateUnit(Net, i = RandomEqualINT(0, Net->Units-1)); 305 | if (Net->Output[i]) 306 | Net->On[i]++; 307 | else 308 | Net->Off[i]++; 309 | } 310 | 311 | for (i=0; iUnits; i++) { 312 | Net->Output[i] = Net->On[i] > Net->Off[i]; 313 | } 314 | } 315 | 316 | void Anneal(NET* Net) 317 | { 318 | Net->Temperature = 100; 319 | do { 320 | BringToThermalEquilibrium(Net); 321 | WriteTour(Net); 322 | Net->Temperature *= 0.99; 323 | } while (NOT ValidTour(Net)); 324 | } 325 | 326 | /****************************************************************************** 327 | M A I N 328 | ******************************************************************************/ 329 | 330 | void main() 331 | { 332 | NET Net; 333 | 334 | InitializeRandoms(); 335 | GenerateNetwork(&Net); 336 | InitializeApplication(&Net); 337 | CalculateWeights(&Net); 338 | SetRandom(&Net); 339 | Anneal(&Net); 340 | FinalizeApplication(&Net); 341 | } 342 | 343 | #if 0 344 | 345 | Simulator Output for the Traveling Salesman Problem 346 | 347 | Temperature Valid Length Tour 348 | 349 | 100.000000 no []->[]->[]->[]->[]->[]->[]->[]->[]->[] 350 | 99.000000 no []->[]->[]->[]->[]->[]->[]->[]->[]->[] 351 | 98.010000 no []->[0]->[]->[]->[]->[]->[]->[]->[]->[] 352 | 97.029900 no []->[]->[]->[]->[]->[]->[]->[]->[]->[] 353 | 96.059601 no []->[]->[]->[]->[]->[]->[]->[]->[]->[] 354 | 95.099005 no []->[]->[]->[]->[]->[]->[]->[]->[]->[9] 355 | 94.148015 no []->[]->[]->[]->[5]->[]->[0]->[]->[]->[] 356 | 93.206535 no []->[]->[]->[]->[]->[]->[]->[9]->[]->[] 357 | 92.274469 no []->[]->[]->[]->[]->[]->[]->[]->[]->[] 358 | 91.351725 no []->[]->[]->[]->[]->[]->[]->[]->[]->[] 359 | . . . 360 | . . . 361 | . . . 362 | 0.763958 no [5]->[]->[]->[0]->[]->[]->[]->[]->[7]->[6] 363 | 0.756318 no []->[0]->[1]->[2]->[]->[8]->[7]->[6]->[5]->[3] 364 | 0.748755 no []->[]->[]->[]->[]->[]->[7]->[6]->[]->[] 365 | 0.741268 no []->[]->[]->[]->[]->[]->[]->[]->[]->[] 366 | 0.733855 no [0]->[]->[]->[]->[]->[]->[]->[2]->[]->[] 367 | 0.726516 no []->[2]->[3]->[]->[]->[]->[]->[]->[0]->[9] 368 | 0.719251 no []->[]->[]->[]->[]->[5]->[]->[]->[]->[] 369 | 0.712059 no []->[]->[]->[]->[]->[]->[]->[]->[7]->[] 370 | 0.704938 no []->[]->[]->[]->[8]->[]->[9]->[]->[]->[3] 371 | 0.697889 yes 7.295 372 | [8]->[9]->[0]->[1]->[2]->[3]->[5]->[4]->[6]->[7] 373 | #endif 374 | --------------------------------------------------------------------------------