├── .gitignore
├── Makefile
├── README.md
├── data
    └── 12mer-kd
    │   ├── 10fold_idx
    │       ├── test_idx-1.txt
    │       ├── test_idx-10.txt
    │       ├── test_idx-2.txt
    │       ├── test_idx-3.txt
    │       ├── test_idx-4.txt
    │       ├── test_idx-5.txt
    │       ├── test_idx-6.txt
    │       ├── test_idx-7.txt
    │       ├── test_idx-8.txt
    │       ├── test_idx-9.txt
    │       ├── train_idx-1.txt
    │       ├── train_idx-10.txt
    │       ├── train_idx-2.txt
    │       ├── train_idx-3.txt
    │       ├── train_idx-4.txt
    │       ├── train_idx-5.txt
    │       ├── train_idx-6.txt
    │       ├── train_idx-7.txt
    │       ├── train_idx-8.txt
    │       └── train_idx-9.txt
    │   ├── 12mer-kd.txt
    │   ├── raw-12mer-kd.csv
    │   └── split_train_test.m
├── include
    ├── config.h
    ├── fmt.h
    ├── matrix
    │   ├── cuda_binary_kernel.cuh
    │   ├── cuda_helper.h
    │   ├── cuda_rand_kernel.cuh
    │   ├── cuda_unary_kernel.cuh
    │   ├── dense_matrix.h
    │   ├── fastWalshTransform_kernel_double.cuh
    │   ├── fastWalshTransform_kernel_float.cuh
    │   ├── gpuhandle.h
    │   ├── imatrix.h
    │   ├── mat_typedef.h
    │   ├── matrix_utils.h
    │   ├── mkl_helper.h
    │   ├── sp_data.h
    │   ├── sparse_matrix.h
    │   └── vector.h
    ├── net
    │   ├── abs_criterion_layer.h
    │   ├── avg_rank_criterion_layer.h
    │   ├── c_add_layer.h
    │   ├── c_mul_layer.h
    │   ├── classnll_criterion_layer.h
    │   ├── col_slice_layer.h
    │   ├── concat_layer.h
    │   ├── const_scalar_param.h
    │   ├── err_cnt_criterion_layer.h
    │   ├── exp_layer.h
    │   ├── gaussian_ll_layer.h
    │   ├── general_loss_criterion_layer.h
    │   ├── global_sum_layer.h
    │   ├── graph_pool_param.h
    │   ├── graph_struct.h
    │   ├── i_act_layer.h
    │   ├── i_criterion_layer.h
    │   ├── i_layer.h
    │   ├── i_param.h
    │   ├── inner_product_layer.h
    │   ├── input_layer.h
    │   ├── learner.h
    │   ├── linear_param.h
    │   ├── log_layer.h
    │   ├── loss_func.h
    │   ├── max_entropy_criterion_layer.h
    │   ├── mixture_nll_criterion_layer.h
    │   ├── model.h
    │   ├── mse_criterion_layer.h
    │   ├── msg_pass_param.h
    │   ├── multinomial_sample_layer.h
    │   ├── mvn_diag_nll_criterion_layer.h
    │   ├── nngraph.h
    │   ├── param_layer.h
    │   ├── relu_layer.h
    │   ├── repeat_layer.h
    │   ├── sigmoid_layer.h
    │   ├── softmax_layer.h
    │   ├── tanh_layer.h
    │   └── transpose_layer.h
    ├── nn_common.h
    └── utils.h
├── local_run.sh
├── make_common
└── src
    ├── kernel_loopy_bp.cpp
    ├── kernel_mean_field.cpp
    ├── matrix
        ├── cpu_dense_matrix.cpp
        ├── cpu_sparse_mat.cpp
        ├── cpu_vector.cpp
        ├── gpu_dense_matrix.cu
        ├── gpu_sparse_mat.cu
        ├── gpu_vector.cu
        └── gpuhandle.cu
    └── net
        ├── act_layer.cpp
        ├── act_layer.cu
        ├── concat_layer.cpp
        ├── fmt.cpp
        ├── gaussian_ll_layer.cpp
        ├── global_sum_layer.cpp
        ├── graph_pool_param.cpp
        ├── graph_pool_param.cu
        ├── learner.cpp
        ├── loss_func.cpp
        ├── loss_func.cu
        ├── max_entropy_criterion_layer.cpp
        ├── mixture_nll_criterion_layer.cpp
        ├── msg_pass_param.cpp
        ├── mvn_diag_nll_criterion_layer.cpp
        └── nngraph.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | build/
 2 | results/
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | include make_common
 2 | 
 3 | build_root = build
 4 | 
 5 | include_dirs = $(CUDA_HOME)/include $(MKL_ROOT)/include include/matrix include/net ./include
 6 | CXXFLAGS += $(addprefix -I,$(include_dirs))
 7 | NVCCFLAGS += $(addprefix -I,$(include_dirs))
 8 | NVCCFLAGS += -std=c++11 --use_fast_math
 9 | 
10 | cu_files = $(shell $(FIND) src/ -name "*.cu" -printf "%P\n")
11 | cpp_files = $(shell $(FIND) src/ -name "*.cpp" -printf "%P\n")
12 | cu_obj_files = $(subst .cu,.o,$(cu_files))
13 | cxx_obj_files = $(subst .cpp,.o,$(cpp_files))
14 | obj_build_root = $(build_root)/objs
15 | objs = $(addprefix $(obj_build_root)/cuda/,$(cu_obj_files)) $(addprefix $(obj_build_root)/cxx/,$(cxx_obj_files))
16 | DEPS = ${objs:.o=.d}
17 | 
18 | lib_dir = $(build_root)/lib
19 | net_lib = $(lib_dir)/libnet.a
20 | 
21 | all: $(net_lib) build/kernel_mean_field build/kernel_loopy_bp
22 | 
23 | $(net_lib): $(objs)
24 | 	$(dir_guard)
25 | 	ar rcs $@ $(objs)
26 | 
27 | $(obj_build_root)/cuda/%.o: src/%.cu
28 | 	$(dir_guard)
29 | 	$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} -odir $(@D)
30 | 	$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
31 | 		
32 | $(obj_build_root)/cxx/%.o: src/%.cpp
33 | 	$(dir_guard)
34 | 	$(CXX) $(CXXFLAGS) -MMD -c -o $@ $(filter %.cpp, $^)
35 | 	
36 | build/%: src/%.cpp $(net_lib) ./include/*
37 | 	$(dir_guard)
38 | 	$(CXX) $(CXXFLAGS) -o $@ $(filter %.cpp, %.a $^) -L$(lib_dir) -lnet $(LDFLAGS)
39 | 
40 | clean:
41 | 	rm -rf build
42 | 
43 | -include $(DEPS)
44 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # sequence2vec
 2 | 
 3 | #### Prerequisites
 4 | 
 5 | Tested under Ubuntu 14.04
 6 | 
 7 | 
 8 | ##### Download and install cuda from https://developer.nvidia.com/cuda-toolkit
 9 | 
10 |     wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_8.0.44-1_amd64.deb
11 |     sudo dpkg -i cuda-repo-ubuntu1404_8.0.44-1_amd64.deb
12 |     sudo apt-get update
13 |     sudo apt-get install cuda
14 |     
15 |   in .bashrc, add the following path (suppose you installed to the default path)
16 |   
17 |     export CUDA_HOME=/usr/local/cuda
18 |     export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
19 |     export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
20 |     
21 | ##### Download and install intel mkl
22 | 
23 |   in .bashrc, add the following path
24 |   
25 |     source {path_to_your_intel_root/name_of_parallel_tool_box}/bin/psxevars.sh
26 |     export MKL_ROOT={path_to_your_intel_root}/mkl
27 |     
28 | ##### Install cppformat (now called fmtlib)
29 | 
30 |     check https://github.com/fmtlib/fmt for help
31 |     
32 | #### Build
33 |     make
34 |     
35 | ##### Run Kd prediction
36 |     modify the configs in local_run.sh
37 |     ./local_run.sh
38 | 


--------------------------------------------------------------------------------
/data/12mer-kd/split_train_test.m:
--------------------------------------------------------------------------------
 1 | clear;
 2 | clc;
 3 | 
 4 | T = readtable('raw-12mer-kd.csv');
 5 | 
 6 | total = size(T, 1);
 7 |  
 8 | fold_size = floor(total / 10);
 9 | p = randperm(total);
10 | for fold = 1 : 10
11 |     test_range = (fold - 1) * fold_size + 1 : fold * fold_size;
12 |     train_range = [1 : (fold - 1) * fold_size, fold * fold_size + 1 : total];
13 |      
14 |     fid = fopen(sprintf('10fold_idx/test_idx-%d.txt', fold), 'w');
15 |     for i = 1 : length(test_range)
16 |         fprintf(fid, '%d\n', p(test_range(i)) - 1);
17 |     end
18 |     fclose(fid);
19 | 
20 |     fid = fopen(sprintf('10fold_idx/train_idx-%d.txt', fold), 'w');
21 |     for i = 1 : length(train_range)
22 |         fprintf(fid, '%d\n', p(train_range(i)) - 1);
23 |     end
24 |     fclose(fid);
25 | end
26 | 
27 | fid = fopen('12mer-kd.txt', 'w');
28 | fprintf(fid, '%d\n', total);
29 | for i = 1 : total
30 |     fprintf(fid, '%.10f %s\n', T.kd(i), T.str{i});
31 | end
32 | fclose(fid);


--------------------------------------------------------------------------------
/include/config.h:
--------------------------------------------------------------------------------
  1 | #ifndef CONFIG_H
  2 | #define CONFIG_H
  3 | 
  4 | #include <iostream>
  5 | #include <unistd.h>
  6 | #include <cstring>
  7 | 
  8 | typedef float Dtype;
  9 | 
 10 | struct cfg
 11 | {
 12 |     static bool evaluate, rev_order;
 13 |     static int dev_id, iter; 
 14 |     static int max_lv, conv_size, fp_len, kmer;
 15 |     static unsigned n_hidden;
 16 |     static Dtype scale;
 17 |     static unsigned batch_size; 
 18 |     static unsigned max_epoch; 
 19 |     static bool max_pool, global_pool;
 20 |     static int num_nodes;
 21 |     static unsigned test_interval; 
 22 |     static unsigned report_interval; 
 23 |     static unsigned save_interval; 
 24 |     static int window_size;
 25 |     static int node_dim;
 26 |     static bool pad;
 27 |     static Dtype lr;
 28 |     static Dtype l2_penalty; 
 29 |     static Dtype momentum; 
 30 |     static const char *result_file, *train_idx_file, *test_idx_file, *string_file, *save_dir; 
 31 |     
 32 |     static void LoadParams(const int argc, const char** argv)
 33 |     {
 34 |         for (int i = 1; i < argc; i += 2)
 35 |         {   
 36 |             if (strcmp(argv[i], "-kmer") == 0)
 37 |                 kmer = atoi(argv[i + 1]);
 38 |             if (strcmp(argv[i], "-scale") == 0)
 39 |                 scale = atof(argv[i + 1]);
 40 |             if (strcmp(argv[i], "-global_pool") == 0)
 41 |                 global_pool = (bool)atoi(argv[i + 1]);
 42 |             if (strcmp(argv[i], "-rev_order") == 0)
 43 |                 rev_order = (bool)atoi(argv[i + 1]);
 44 |             if (strcmp(argv[i], "-eval") == 0)
 45 |                 evaluate = (bool)atoi(argv[i + 1]);
 46 |             if (strcmp(argv[i], "-max_pool") == 0)
 47 |                 max_pool = (bool)atoi(argv[i + 1]);         
 48 |             if (strcmp(argv[i], "-pad") == 0)
 49 |                 pad = (bool)atoi(argv[i + 1]);          
 50 |             if (strcmp(argv[i], "-w") == 0)
 51 |                 window_size = atoi(argv[i + 1]);
 52 | 		    if (strcmp(argv[i], "-lr") == 0)
 53 | 		        lr = atof(argv[i + 1]);
 54 |             if (strcmp(argv[i], "-cur_iter") == 0)
 55 |                 iter = atoi(argv[i + 1]);
 56 | 		    if (strcmp(argv[i], "-hidden") == 0)
 57 | 			    n_hidden = atoi(argv[i + 1]);
 58 | 			if (strcmp(argv[i], "-lv") == 0)
 59 | 				max_lv = atoi(argv[i + 1]);
 60 |         	if (strcmp(argv[i], "-conv") == 0)
 61 | 				conv_size = atoi(argv[i + 1]);
 62 |         	if (strcmp(argv[i], "-fp") == 0)
 63 | 				fp_len = atoi(argv[i + 1]);
 64 | 		    if (strcmp(argv[i], "-b") == 0)
 65 |     			batch_size = atoi(argv[i + 1]);
 66 |     		if (strcmp(argv[i], "-maxe") == 0)
 67 | 	       		max_epoch = atoi(argv[i + 1]);
 68 | 		    if (strcmp(argv[i], "-int_test") == 0)
 69 |     			test_interval = atoi(argv[i + 1]);
 70 |     	   	if (strcmp(argv[i], "-int_report") == 0)
 71 |     			report_interval = atoi(argv[i + 1]);
 72 |     		if (strcmp(argv[i], "-int_save") == 0)
 73 |     			save_interval = atoi(argv[i + 1]);
 74 |     		if (strcmp(argv[i], "-l2") == 0)
 75 |     			l2_penalty = atof(argv[i + 1]);
 76 |     		if (strcmp(argv[i], "-m") == 0)
 77 |     			momentum = atof(argv[i + 1]);	
 78 |      		if (strcmp(argv[i], "-result") == 0)
 79 |     			result_file = argv[i + 1];
 80 |     		if (strcmp(argv[i], "-svdir") == 0)
 81 |     			save_dir = argv[i + 1];
 82 |     		if (strcmp(argv[i], "-string") == 0)
 83 | 				string_file = argv[i + 1];
 84 | 			if (strcmp(argv[i], "-train_idx") == 0)
 85 | 				train_idx_file = argv[i + 1];
 86 | 			if (strcmp(argv[i], "-test_idx") == 0)
 87 | 				test_idx_file = argv[i + 1];
 88 |             if (strcmp(argv[i], "-device") == 0)
 89 |     			dev_id = atoi(argv[i + 1]);
 90 |         }
 91 | 
 92 |         if (pad)
 93 |         {
 94 |             node_dim = 1;
 95 |             for (int i = 0; i < window_size; ++i)
 96 |                 node_dim *= 5;
 97 |         }
 98 |         else
 99 |             node_dim = 1 << (2 * window_size);     
100 | 
101 |         std::cerr << "max_pool = " << max_pool << std::endl;
102 |         std::cerr << "node_dim = " << node_dim << std::endl;
103 |         std::cerr << "pad = " << pad << std::endl;
104 |         std::cerr << "window_size = " << window_size << std::endl;
105 |         std::cerr << "n_hidden = " << n_hidden << std::endl;
106 |         std::cerr << "global_pool = " << global_pool << std::endl;
107 | 		std::cerr << "max level = " << max_lv << std::endl;
108 |     	std::cerr << "conv size = " << conv_size << std::endl;
109 |     	std::cerr << "fp len = " << fp_len << std::endl;
110 |         std::cerr << "batch_size = " << batch_size << std::endl;
111 |         std::cerr << "max_epoch = " << max_epoch << std::endl;
112 |     	std::cerr << "test_interval = " << test_interval << std::endl;
113 |     	std::cerr << "report_interval = " << report_interval << std::endl;
114 |     	std::cerr << "save_interval = " << save_interval << std::endl;
115 |     	std::cerr << "lr = " << lr << std::endl;
116 |     	std::cerr << "l2_penalty = " << l2_penalty << std::endl;
117 |     	std::cerr << "momentum = " << momentum << std::endl;
118 |     	std::cerr << "init iter = " << iter << std::endl;	
119 |         std::cerr << "device id = " << dev_id << std::endl;    
120 | 	std::cerr << "scale = " << scale << std::endl;
121 |     }
122 | };
123 | 
124 | bool cfg::global_pool = false;
125 | bool cfg::max_pool = false;
126 | bool cfg::rev_order = false;
127 | bool cfg::pad = false;
128 | bool cfg::evaluate = false;
129 | int cfg::dev_id = 0;
130 | int cfg::node_dim = 0;
131 | int cfg::iter = 0;
132 | int cfg::max_lv = 4;
133 | int cfg::kmer = 3;
134 | int cfg::conv_size = 20;
135 | int cfg::fp_len = 512;
136 | int cfg::num_nodes = 0;
137 | unsigned cfg::n_hidden = 100;
138 | unsigned cfg::batch_size = 50;
139 | unsigned cfg::max_epoch = 200;
140 | unsigned cfg::test_interval = 10000;
141 | unsigned cfg::report_interval = 100;
142 | unsigned cfg::save_interval = 50000;
143 | int cfg::window_size = 1;
144 | Dtype cfg::lr = 0.0005;
145 | Dtype cfg::l2_penalty = 0;
146 | Dtype cfg::momentum = 0;
147 | Dtype cfg::scale = 1;
148 | const char* cfg::train_idx_file = nullptr;
149 | const char* cfg::test_idx_file = nullptr;
150 | const char* cfg::string_file = nullptr;
151 | const char* cfg::result_file = nullptr;
152 | const char* cfg::save_dir = "./saved";
153 | 
154 | #endif
155 | 


--------------------------------------------------------------------------------
/include/matrix/cuda_binary_kernel.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef CUDA_BINARY_KERNEL_CUH
 2 | #define CUDA_BINARY_KERNEL_CUH
 3 | 
 4 | #include <cuda_runtime.h>
 5 | #include "gpuhandle.h"
 6 | 
 7 | //=================================== mul ======================================
 8 | 
 9 | template<typename Dtype>
10 | class BinaryMul
11 | {
12 | public:
13 | 	BinaryMul() {}
14 |     
15 | 	__device__ inline void operator()(Dtype& dst, const Dtype& lhs)
16 | 	{
17 | 		dst *= lhs;
18 | 	}
19 |     
20 |     __device__ inline void operator()(Dtype& dst, const Dtype& lhs, const Dtype& rhs)
21 | 	{
22 | 		dst = lhs * rhs;
23 | 	}    
24 | };
25 | 
26 | //=================================== mul ======================================
27 | 
28 | template<typename Dtype>
29 | class BinaryDiv
30 | {
31 | public:
32 | 	BinaryDiv() {}
33 |     
34 | 	__device__ inline void operator()(Dtype& dst, const Dtype& lhs)
35 | 	{
36 | 		dst /= lhs;
37 | 	}
38 |     
39 |     __device__ inline void operator()(Dtype& dst, const Dtype& lhs, const Dtype& rhs)
40 | 	{
41 | 		dst = lhs / rhs;
42 | 	}    
43 | };
44 | 
45 | 
46 | //=================================== call interface ======================================
47 | 
48 | template<typename Dtype, class BinaryEngine>
49 | __global__ void BinaryKernel(Dtype *dst, const Dtype *lhs, int numElements, BinaryEngine binary)
50 | {
51 |     int i = blockDim.x * blockIdx.x + threadIdx.x;
52 |     
53 |     if(i < numElements)
54 |     {
55 |         binary(dst[i], lhs[i]);
56 |     }
57 | }
58 | 
59 | template<typename Dtype, class BinaryEngine>
60 | void BinaryOp(Dtype *dst, const Dtype *lhs, int numElements, BinaryEngine binary, const unsigned& sid)
61 | {
62 |     int thread_num = min(c_uCudaThreadNum, numElements);    
63 |     int blocksPerGrid = (numElements + thread_num - 1) / thread_num;
64 |     BinaryKernel<<<blocksPerGrid, thread_num, 0, GPUHandle::streams[sid]>>> (dst, lhs, numElements, binary);
65 | }
66 | 
67 | template<typename Dtype, class BinaryEngine>
68 | __global__ void BinaryKernel(Dtype *dst, const Dtype* lhs, const Dtype *rhs, int numElements, BinaryEngine binary)
69 | {
70 |     int i = blockDim.x * blockIdx.x + threadIdx.x;
71 |     
72 |     if(i < numElements)
73 |     {
74 |         binary(dst[i], lhs[i], rhs[i]);
75 |     }
76 | }
77 | 
78 | template<typename Dtype, class BinaryEngine>
79 | void BinaryOp(Dtype *dst, const Dtype* lhs, const Dtype *rhs, int numElements, BinaryEngine binary, const unsigned& sid)
80 | {
81 |     int thread_num = min(c_uCudaThreadNum, numElements);    
82 |     int blocksPerGrid = (numElements + thread_num - 1) / thread_num;
83 |     BinaryKernel<<<blocksPerGrid, thread_num, 0, GPUHandle::streams[sid]>>> (dst, lhs, rhs, numElements, binary);
84 | }
85 | 
86 | #endif


--------------------------------------------------------------------------------
/include/matrix/cuda_helper.h:
--------------------------------------------------------------------------------
  1 | #ifndef CUDA_HELPER_H
  2 | #define CUDA_HELPER_H
  3 | 
  4 | #include <curand.h>
  5 | #include <cuda_runtime.h>
  6 | #include <cusparse_v2.h>
  7 | #include <cublas_v2.h>
  8 | 
  9 | __device__ inline int get_sp_row_idx(int i, int* row_ptr, int n_rows)
 10 | {
 11 |     int l = 0, r = n_rows - 1, row;
 12 |     while (l <= r)
 13 |     {
 14 |         row = (l + r) / 2;
 15 |         if (row_ptr[row] <= i)
 16 |         {
 17 |             if (row_ptr[row + 1] > i)
 18 |                 break;
 19 |             else 
 20 |                 l = row + 1;
 21 |         } else r = row - 1;
 22 |     }
 23 |     return row;
 24 | }
 25 | 
 26 | __device__ inline float cuda_pow(const float& x, const float& y)
 27 | {
 28 |         return powf(x, y);
 29 | }
 30 | 
 31 | __device__ inline double cuda_pow(const double& x, const double& y)
 32 | {
 33 |         return pow(x, y);
 34 | }
 35 | 
 36 | __device__ inline float cuda_exp(const float& src)
 37 | {
 38 |         return expf(src);
 39 | }
 40 |     
 41 | __device__ inline double cuda_exp(const double& src)
 42 | {
 43 |         return exp(src);
 44 | }
 45 | 
 46 | __device__ inline float cuda_log(const float& src)
 47 | {
 48 |         return logf(src);
 49 | }
 50 |     
 51 | __device__ inline double cuda_log(const double& src)
 52 | {
 53 |         return log(src);
 54 | }
 55 | 
 56 | inline float CudaHelper_Dot(cublasHandle_t& handle, int n, const float *x, const float* y)
 57 | {
 58 |         float result;
 59 |         cublasSdot(handle, n, x, 1, y, 1, &result);
 60 |         return result;
 61 | }
 62 | 
 63 | inline double CudaHelper_Dot(cublasHandle_t& handle, int n, const double *x, const double* y)
 64 | {
 65 |         double result;
 66 |         cublasDdot(handle, n, x, 1, y, 1, &result);
 67 |         return result;
 68 | }
 69 | 
 70 | inline float CudaHelper_Norm2(cublasHandle_t& handle, int n, const float *x)
 71 | {
 72 | 		float result;
 73 | 		cublasSnrm2(handle, n, x, 1, &result);
 74 | 		return result;
 75 | }
 76 | 
 77 | inline double CudaHelper_Norm2(cublasHandle_t& handle, int n, const double *x)
 78 | {
 79 | 		double result;
 80 | 		cublasDnrm2(handle, n, x, 1, &result);
 81 | 		return result;
 82 | }
 83 | 
 84 | inline void CudaHelper_Amax(cublasHandle_t& handle, int n, const float *x, int* result)
 85 | {
 86 |         cublasIsamax(handle, n, x, 1, result);
 87 | }
 88 | 
 89 | inline void CudaHelper_Amax(cublasHandle_t& handle, int n, const double *x, int* result)
 90 | {
 91 |         cublasIdamax(handle, n, x, 1, result);
 92 | }
 93 | 
 94 | inline float CudaHelper_Asum(cublasHandle_t& handle, int n, const float *x)
 95 | {
 96 | 		float result;
 97 | 		cublasSasum(handle, n, x, 1, &result);
 98 | 		return result;
 99 | }
100 | 
101 | inline double CudaHelper_Asum(cublasHandle_t& handle, int n, const double *x)
102 | {
103 | 		double result;
104 | 		cublasDasum(handle, n, x, 1, &result);
105 | 		return result;
106 | }
107 | 
108 | inline void CudaHelper_Ger(cublasHandle_t& handle, int m, int n, const float* alpha, const float* x, const float* y, float* A)
109 | {
110 | 		cublasSger(handle, m, n, alpha, x, 1, y, 1, A, m);
111 | }
112 | 
113 | inline void CudaHelper_Ger(cublasHandle_t& handle, int m, int n, const double* alpha, const double* x, const double* y, double* A)
114 | {
115 | 		cublasDger(handle, m, n, alpha, x, 1, y, 1, A, m);
116 | }
117 | 
118 | inline void CudaHelper_GeMV(cublasHandle_t& handle, cublasOperation_t trans, 
119 |                             int m, int n, 
120 |                             const float* alpha, const float* A, int lda, 
121 |                             const float *x, int incx, 
122 |                             const float *beta, float* y, int incy)
123 | {
124 |         cublasSgemv(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
125 | }
126 | 
127 | inline void CudaHelper_GeMV(cublasHandle_t& handle, cublasOperation_t trans, 
128 |                             int m, int n, 
129 |                             const double* alpha, const double* A, int lda, 
130 |                             const double *x, int incx, 
131 |                             const double *beta, double* y, int incy)
132 | {
133 |         cublasDgemv(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
134 | }
135 | 
136 | inline void CudaHelper_GeaM(cublasHandle_t& handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n,
137 | 							const float* alpha, const float* A, int lda, const float* beta, const float* B, int ldb,  
138 | 							float* C, int ldc)
139 | {
140 | 		cublasSgeam(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
141 | }
142 | 
143 | inline void CudaHelper_GeaM(cublasHandle_t& handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n,
144 | 							const double* alpha, const double* A, int lda, const double* beta, const double* B, int ldb, 
145 | 							double* C, int ldc)
146 | {
147 | 		cublasDgeam(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
148 | }
149 | 
150 | inline void CudaHelper_Axpy(cublasHandle_t& handle, int n, const float *alpha, const float *x, float *y)
151 | {
152 | 		cublasSaxpy(handle, n, alpha, x, 1, y, 1);
153 | }
154 | 
155 | inline void CudaHelper_Axpy(cublasHandle_t& handle, int n, const double *alpha, const double *x, double *y)
156 | {
157 | 		cublasDaxpy(handle, n, alpha, x, 1, y, 1);
158 | }
159 | 
160 | inline void CudaHelper_SetRandNormal(curandGenerator_t& generator, float* outputPtr, size_t n, float mean, float stddev)
161 | {
162 | 		curandGenerateNormal(generator, outputPtr, n, mean, stddev);
163 | }
164 | 
165 | inline void CudaHelper_SetRandNormal(curandGenerator_t& generator, double* outputPtr, size_t n, double mean, double stddev)
166 | {
167 | 		curandGenerateNormalDouble(generator, outputPtr, n, mean, stddev);
168 | }
169 | 
170 | #endif


--------------------------------------------------------------------------------
/include/matrix/cuda_rand_kernel.cuh:
--------------------------------------------------------------------------------
  1 | #ifndef CUDA_RAND_KERNEL_CUH
  2 | #define CUDA_RAND_KERNEL_CUH
  3 | 
  4 | #include <cuda_runtime.h>
  5 | #include <curand_kernel.h>
  6 | #include "gpuhandle.h"
  7 | 
  8 | template<typename Dtype>
  9 | class NormalRandomizer
 10 | {
 11 | public:
 12 | 	NormalRandomizer(Dtype _mean, Dtype _std) : mean(_mean), std(_std) {}
 13 | 	__device__ inline Dtype operator()(curandState_t* state)
 14 | 	{
 15 |         return curand_normal(state) * std + mean;
 16 | 	}
 17 |     
 18 | private:    
 19 |     Dtype mean;
 20 |     Dtype std;
 21 | };
 22 | 
 23 | template<typename Dtype>
 24 | class BinomialRandomizer
 25 | {
 26 | public:
 27 | 	BinomialRandomizer() {}
 28 | 	__device__ inline Dtype operator()(curandState_t* state)
 29 | 	{
 30 | 		return curand_uniform(state) > 0.5 ? 1.0 : -1.0;
 31 | 	}
 32 | };
 33 | 
 34 | template<typename Dtype>
 35 | class UniformRandomizer
 36 | {
 37 | public:
 38 | 	UniformRandomizer(Dtype _lb, Dtype _ub) : lb(_lb), ub(_ub) {}
 39 | 	__device__ inline Dtype operator()(curandState_t* state)
 40 | 	{
 41 | 		return curand_uniform(state) * (ub - lb) + lb;
 42 | 	}
 43 |     
 44 | private:    
 45 |     Dtype lb;
 46 |     Dtype ub; 
 47 | };
 48 | 
 49 | 
 50 | template<typename Dtype>
 51 | class ChisquareRandomizer
 52 | {
 53 | public:
 54 | 	ChisquareRandomizer(Dtype _degree) : alpha(_degree / 2) {}
 55 | 	
 56 | 	__device__ inline Dtype operator()(curandState_t* state)
 57 | 	{
 58 | 		Dtype x, v, u;
 59 |       	Dtype d = alpha - 1.0 / 3.0;
 60 |       	Dtype c = (1.0 / 3.0) / sqrt (d);
 61 | 
 62 |       	while (1){
 63 |        		do {
 64 |             	x = curand_normal(state);
 65 |               	v = 1.0 + c * x;
 66 |           	} while (v <= 0);
 67 | 
 68 |           	v = v * v * v;
 69 |           	u = curand_uniform(state);
 70 | 
 71 |           	if (u < 1 - 0.0331 * x * x * x * x) 
 72 |             	break;
 73 | 
 74 |           	if (log (u) < 0.5 * x * x + d * (1 - v + log (v)))
 75 |             	break;
 76 |       	}
 77 |       	// scale by 2.0 to get chisquare
 78 |       	return 2.0 * (d * v);		
 79 | 	}
 80 | 	
 81 | private:
 82 | 	const Dtype alpha;
 83 | };
 84 | 
 85 | template<typename Dtype, class RandEngine>
 86 | __global__ void RandKernel(Dtype *targets, int numElements, curandState_t* state, RandEngine rnd)
 87 | {
 88 |     const int tidx = NUM_RND_THREADS_PER_BLOCK * blockIdx.x + threadIdx.x;
 89 |     curandState_t localState = state[tidx];
 90 |     for (int i = tidx; i < numElements; i += NUM_RND_STREAMS) 
 91 |     {
 92 |         targets[i] = rnd(&localState);
 93 |     }
 94 |     state[tidx] = localState;
 95 | }
 96 | 
 97 | template<typename Dtype, class RandEngine>
 98 | void SetRand(Dtype *dst, int numElements, RandEngine rnd, const unsigned& sid)
 99 | {
100 |     RandKernel<<<NUM_RND_BLOCKS,NUM_RND_THREADS_PER_BLOCK, 0, GPUHandle::streams[sid]>>>(dst, numElements, GPUHandle::devRandStates, rnd);
101 | }
102 | 
103 | #endif


--------------------------------------------------------------------------------
/include/matrix/cuda_unary_kernel.cuh:
--------------------------------------------------------------------------------
  1 | #ifndef CUDA_UNARY_KERNEL_CUH
  2 | #define CUDA_UNARY_KERNEL_CUH
  3 | 
  4 | #include <cuda_runtime.h>
  5 | #include "gpuhandle.h"
  6 | #include "cuda_helper.h"
  7 | 
  8 | //=================================== power ======================================
  9 | 
 10 | template<typename Dtype>
 11 | class UnaryPow
 12 | {
 13 | public:
 14 | 	UnaryPow(Dtype _scalar) : scalar(_scalar) {}
 15 |     
 16 | 	__device__ inline void operator()(Dtype& dst)
 17 | 	{
 18 | 		dst = cuda_pow(dst, scalar);
 19 | 	}
 20 |     
 21 |     __device__ inline void operator()(Dtype& dst, const Dtype& src)
 22 | 	{
 23 | 		dst = cuda_pow(src, scalar);
 24 | 	}
 25 |     
 26 | private:
 27 |     Dtype scalar;    
 28 | };
 29 | 
 30 | //=================================== scale ======================================
 31 | 
 32 | template<typename Dtype>
 33 | class UnaryScale
 34 | {
 35 | public:
 36 | 	UnaryScale(Dtype _scalar) : scalar(_scalar) {}
 37 |     
 38 | 	__device__ inline void operator()(Dtype& dst)
 39 | 	{
 40 | 		dst *= scalar;
 41 | 	}
 42 |     
 43 |     __device__ inline void operator()(Dtype& dst, const Dtype& src)
 44 | 	{
 45 | 		dst = src * scalar;
 46 | 	}
 47 |     
 48 | private:
 49 |     Dtype scalar;    
 50 | };
 51 | 
 52 | //=================================== sqrt ======================================
 53 | 
 54 | template<typename Dtype>
 55 | class UnarySqrt{};
 56 | 
 57 | template<>
 58 | class UnarySqrt<float>
 59 | {
 60 | public:
 61 |     UnarySqrt() {}
 62 |     __device__ inline void operator()(float& dst)
 63 | 	{
 64 | 		dst = sqrtf(dst);
 65 | 	}
 66 |     
 67 |     __device__ inline void operator()(float& dst, const float& src)
 68 | 	{
 69 | 		dst = sqrtf(src);
 70 | 	}
 71 | };
 72 | 
 73 | template<>
 74 | class UnarySqrt<double>
 75 | {
 76 | public:
 77 |     UnarySqrt() {}
 78 |     __device__ inline void operator()(double& dst)
 79 | 	{
 80 | 		dst = sqrt(dst);
 81 | 	}
 82 |     
 83 |     __device__ inline void operator()(double& dst, const double& src)
 84 | 	{
 85 | 		dst = sqrt(src);
 86 | 	}
 87 | };
 88 | 
 89 | //=================================== set ======================================
 90 | 
 91 | template<typename Dtype>
 92 | class UnarySet
 93 | {
 94 | public:
 95 |     UnarySet(Dtype _scalar) : scalar(_scalar) {}
 96 |     
 97 |     __device__ inline void operator()(Dtype& dst)
 98 | 	{
 99 | 		dst = scalar;
100 | 	}
101 |     
102 | private:
103 |     Dtype scalar;    
104 | };
105 | 
106 | //=================================== add ======================================
107 | 
108 | template<typename Dtype>
109 | class UnaryAdd
110 | {
111 | public:
112 |     UnaryAdd(Dtype _scalar) : scalar(_scalar) {}
113 |     
114 |     __device__ inline void operator()(Dtype& dst)
115 | 	{
116 | 		dst += scalar;
117 | 	}
118 |     
119 |     __device__ inline void operator()(Dtype& dst, const Dtype& src)
120 | 	{
121 | 		dst = src + scalar;
122 | 	}
123 |     
124 | private:
125 |     Dtype scalar;    
126 | };
127 | 
128 | //=================================== inv ======================================
129 | 
130 | template<typename Dtype>
131 | class UnaryInv
132 | {
133 | public:
134 |     UnaryInv() {}
135 |     
136 |     __device__ inline void operator()(Dtype& dst)
137 | 	{
138 | 		dst = 1.0 / dst;
139 | 	}
140 |     
141 |     __device__ inline void operator()(Dtype& dst, const Dtype& src)
142 | 	{
143 | 		dst = 1.0 / src;
144 | 	}
145 | };
146 | 
147 | //=================================== inv_sqrt ======================================
148 | 
149 | template<typename Dtype>
150 | class UnaryInvSqrt
151 | {
152 | public:
153 |     UnaryInvSqrt() {}
154 |     
155 |     __device__ inline void operator()(Dtype& dst)
156 | 	{
157 | 		dst = my_inv_sqrt(dst);
158 | 	}
159 |     
160 |     __device__ inline void operator()(Dtype& dst, const Dtype& src)
161 | 	{
162 | 		dst = my_inv_sqrt(src);
163 | 	}
164 |     
165 | private:
166 |     __device__ inline float my_inv_sqrt(const float& src)
167 |     {
168 |         return rsqrtf(src);
169 |     }
170 |     
171 |     __device__ inline double my_inv_sqrt(const double& src)
172 |     {
173 |         return rsqrt(src);
174 |     }
175 | };
176 | 
177 | //=================================== sin ======================================
178 | 
179 | template<typename Dtype>
180 | class UnarySin
181 | {
182 | public:
183 |     UnarySin() {}
184 |     
185 |     __device__ inline void operator()(Dtype& dst)
186 | 	{
187 | 		dst = my_sin(dst);
188 | 	}
189 |     
190 |     __device__ inline void operator()(Dtype& dst, const Dtype& src)
191 | 	{
192 | 		dst = my_sin(src);
193 | 	}
194 |     
195 | private:
196 |     __device__ inline float my_sin(const float& src)
197 |     {
198 |         return sinf(src);
199 |     }
200 |     
201 |     __device__ inline double my_sin(const double& src)
202 |     {
203 |         return sin(src);
204 |     }
205 | };
206 | 
207 | //=================================== exp ======================================
208 | 
209 | template<typename Dtype>
210 | class UnaryExp
211 | {
212 | public:
213 |     UnaryExp() {}
214 |     
215 |     __device__ inline void operator()(Dtype& dst)
216 | 	{
217 | 		dst = cuda_exp(dst);
218 | 	}
219 |     
220 |     __device__ inline void operator()(Dtype& dst, const Dtype& src)
221 | 	{
222 | 		dst = cuda_exp(src);
223 | 	}
224 | };
225 | 
226 | //=================================== log ======================================
227 | template<typename Dtype>
228 | class UnaryLog
229 | {
230 | public:
231 |     UnaryLog() {}
232 |     
233 |     __device__ inline void operator()(Dtype& dst)
234 | 	{
235 | 		dst = cuda_log(dst);
236 | 	}
237 |     
238 |     __device__ inline void operator()(Dtype& dst, const Dtype& src)
239 | 	{
240 | 		dst = cuda_log(src);
241 | 	}
242 | };
243 | 
244 | //=================================== sigmoid ======================================
245 | 
246 | template<typename Dtype>
247 | class UnarySigmoid
248 | {
249 | public:
250 |     UnarySigmoid() {}
251 |     
252 |     __device__ inline void operator()(Dtype& dst)
253 | 	{
254 | 		dst = 1.0 / (1.0 + cuda_exp(-dst));
255 | 	}
256 |     
257 |     __device__ inline void operator()(Dtype& dst, const Dtype& src)
258 | 	{
259 | 		dst = 1.0 / (1.0 + cuda_exp(-src));
260 | 	}
261 | };
262 | 
263 | //=================================== cos ======================================
264 | 
265 | template<typename Dtype>
266 | class UnaryCos
267 | {
268 | public:
269 |     UnaryCos() {}
270 |     
271 |     __device__ inline void operator()(Dtype& dst)
272 | 	{
273 | 		dst = my_cos(dst);
274 | 	}
275 |     
276 |     __device__ inline void operator()(Dtype& dst, const Dtype& src)
277 | 	{
278 | 		dst = my_cos(src);
279 | 	}
280 |     
281 | private:
282 |     __device__ inline float my_cos(const float& src)
283 |     {
284 |         return cosf(src);
285 |     }
286 |     
287 |     __device__ inline double my_cos(const double& src)
288 |     {
289 |         return cos(src);
290 |     }
291 | };
292 | 
293 | //=================================== square ======================================
294 | 
295 | template<typename Dtype>
296 | class UnarySquare
297 | {
298 | public:
299 | 	UnarySquare() {}
300 |     
301 | 	__device__ inline void operator()(Dtype& dst)
302 | 	{
303 | 		dst = dst * dst;
304 | 	}
305 |     
306 |     __device__ inline void operator()(Dtype& dst, const Dtype& src)
307 | 	{
308 | 		dst = src * src;
309 | 	}    
310 | };
311 | 
312 | //=================================== relu ======================================
313 | 
314 | template<typename Dtype>
315 | class UnaryReLU
316 | {
317 | public:
318 | 	UnaryReLU() {}
319 |     
320 | 	__device__ inline void operator()(Dtype& dst)
321 | 	{
322 | 		dst = dst > 0 ? dst : 0;
323 | 	}
324 |     
325 |     __device__ inline void operator()(Dtype& dst, const Dtype& src)
326 | 	{
327 | 		dst = src > 0 ? src : 0;
328 | 	}
329 | };
330 | 
331 | 
332 | //=================================== call interface ======================================
333 | 
334 | template<typename Dtype, class UnaryEngine>
335 | __global__ void UnaryKernel(Dtype *dst, int numElements, UnaryEngine unary)
336 | {
337 |     int i = blockDim.x * blockIdx.x + threadIdx.x;
338 |     
339 |     if(i < numElements)
340 |     {
341 |         unary(dst[i]);
342 |     }
343 | }
344 | 
345 | template<typename Dtype, class UnaryEngine>
346 | void UnaryOp(Dtype *dst, int numElements, UnaryEngine unary, const unsigned& sid)
347 | {
348 |     int thread_num = min(c_uCudaThreadNum, numElements);    
349 |     int blocksPerGrid = (numElements + thread_num - 1) / thread_num;
350 |     UnaryKernel<<<blocksPerGrid, thread_num, 0, GPUHandle::streams[sid]>>> (dst, numElements, unary);
351 | }
352 | 
353 | template<typename Dtype, class UnaryEngine>
354 | __global__ void UnaryKernel(Dtype *dst, Dtype* src, int numElements, UnaryEngine unary)
355 | {
356 |     int i = blockDim.x * blockIdx.x + threadIdx.x;
357 |     
358 |     if(i < numElements)
359 |     {
360 |         unary(dst[i], src[i]);
361 |     }
362 | }
363 | 
364 | template<typename Dtype, class UnaryEngine>
365 | void UnaryOp(Dtype *dst, Dtype* src, int numElements, UnaryEngine unary, const unsigned& sid)
366 | {
367 |     int thread_num = min(c_uCudaThreadNum, numElements);    
368 |     int blocksPerGrid = (numElements + thread_num - 1) / thread_num;
369 |     UnaryKernel<<<blocksPerGrid, thread_num, 0, GPUHandle::streams[sid]>>> (dst, src, numElements, unary);
370 | }
371 | 
372 | #endif


--------------------------------------------------------------------------------
/include/matrix/fastWalshTransform_kernel_double.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 1993-2012 NVIDIA Corporation.  All rights reserved.
  3 |  *
  4 |  * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 |  * with this source code for terms and conditions that govern your use of
  6 |  * this software. Any use, reproduction, disclosure, or distribution of
  7 |  * this software and related documentation outside the terms of the EULA
  8 |  * is strictly prohibited.
  9 |  *
 10 |  */
 11 | 
 12 | 
 13 | 
 14 | #ifndef FWT_KERNEL_CUH_DOUBLE
 15 | #define FWT_KERNEL_CUH_DOUBLE
 16 | #ifndef fwt_kernel_cuh_double
 17 | #define fwt_kernel_cuh_double
 18 | 
 19 | 
 20 | 
 21 | ///////////////////////////////////////////////////////////////////////////////
 22 | // Elementary(for vectors less than elementary size) in-shared memory
 23 | // combined radix-2 + radix-4 Fast Walsh Transform
 24 | ///////////////////////////////////////////////////////////////////////////////
 25 | #define ELEMENTARY_LOG2SIZE 11
 26 | 
 27 | __global__ void fwtBatch1Kernel(double *d_Output, double *d_Input, int log2N)
 28 | {
 29 |     const int    N = 1 << log2N;
 30 |     const int base = blockIdx.x << log2N;
 31 | 
 32 |     //(2 ** 11) * 4 bytes == 8KB -- maximum d_data[] size for G80
 33 |     extern __shared__ double d_data[];
 34 |     double *d_Src = d_Input  + base;
 35 |     double *d_Dst = d_Output + base;
 36 | 
 37 |     for (int pos = threadIdx.x; pos < N; pos += blockDim.x)
 38 |     {
 39 |         d_data[pos] = d_Src[pos];
 40 |     }
 41 | 
 42 |     //Main radix-4 stages
 43 |     const int pos = threadIdx.x;
 44 | 
 45 |     for (int stride = N >> 2; stride > 0; stride >>= 2)
 46 |     {
 47 |         int lo = pos & (stride - 1);
 48 |         int i0 = ((pos - lo) << 2) + lo;
 49 |         int i1 = i0 + stride;
 50 |         int i2 = i1 + stride;
 51 |         int i3 = i2 + stride;
 52 | 
 53 |         __syncthreads();
 54 |         double D0 = d_data[i0];
 55 |         double D1 = d_data[i1];
 56 |         double D2 = d_data[i2];
 57 |         double D3 = d_data[i3];
 58 | 
 59 |         double T;
 60 |         T = D0;
 61 |         D0         = D0 + D2;
 62 |         D2         = T - D2;
 63 |         T = D1;
 64 |         D1         = D1 + D3;
 65 |         D3         = T - D3;
 66 |         T = D0;
 67 |         d_data[i0] = D0 + D1;
 68 |         d_data[i1] = T - D1;
 69 |         T = D2;
 70 |         d_data[i2] = D2 + D3;
 71 |         d_data[i3] = T - D3;
 72 |     }
 73 | 
 74 |     //Do single radix-2 stage for odd power of two
 75 |     if (log2N & 1)
 76 |     {
 77 |         __syncthreads();
 78 | 
 79 |         for (int pos = threadIdx.x; pos < N / 2; pos += blockDim.x)
 80 |         {
 81 |             int i0 = pos << 1;
 82 |             int i1 = i0 + 1;
 83 | 
 84 |             double D0 = d_data[i0];
 85 |             double D1 = d_data[i1];
 86 |             d_data[i0] = D0 + D1;
 87 |             d_data[i1] = D0 - D1;
 88 |         }
 89 |     }
 90 | 
 91 |     __syncthreads();
 92 | 
 93 |     for (int pos = threadIdx.x; pos < N; pos += blockDim.x)
 94 |     {
 95 |         d_Dst[pos] = d_data[pos];
 96 |     }
 97 | }
 98 | 
 99 | ////////////////////////////////////////////////////////////////////////////////
100 | // Single in-global memory radix-4 Fast Walsh Transform pass
101 | // (for strides exceeding elementary vector size)
102 | ////////////////////////////////////////////////////////////////////////////////
103 | __global__ void fwtBatch2Kernel(
104 |     double *d_Output,
105 |     double *d_Input,
106 |     int stride
107 | )
108 | {
109 |     const int pos = blockIdx.x * blockDim.x + threadIdx.x;
110 |     const int   N = blockDim.x *  gridDim.x * 4;
111 | 
112 |     double *d_Src = d_Input  + blockIdx.y * N;
113 |     double *d_Dst = d_Output + blockIdx.y * N;
114 | 
115 |     int lo = pos & (stride - 1);
116 |     int i0 = ((pos - lo) << 2) + lo;
117 |     int i1 = i0 + stride;
118 |     int i2 = i1 + stride;
119 |     int i3 = i2 + stride;
120 | 
121 |     double D0 = d_Src[i0];
122 |     double D1 = d_Src[i1];
123 |     double D2 = d_Src[i2];
124 |     double D3 = d_Src[i3];
125 | 
126 |     double T;
127 |     T = D0;
128 |     D0        = D0 + D2;
129 |     D2        = T - D2;
130 |     T = D1;
131 |     D1        = D1 + D3;
132 |     D3        = T - D3;
133 |     T = D0;
134 |     d_Dst[i0] = D0 + D1;
135 |     d_Dst[i1] = T - D1;
136 |     T = D2;
137 |     d_Dst[i2] = D2 + D3;
138 |     d_Dst[i3] = T - D3;
139 | }
140 | 
141 | ////////////////////////////////////////////////////////////////////////////////
142 | // Put everything together: batched Fast Walsh Transform CPU front-end
143 | ////////////////////////////////////////////////////////////////////////////////
144 | void fwtBatchGPU(double *d_Data, int M, int log2N)
145 | {
146 |     const int THREAD_N = 1024;
147 | 
148 |     int N = 1 << log2N;
149 |     dim3 grid((1 << log2N) / (4 * THREAD_N), M, 1);
150 | 
151 |     for (; log2N > ELEMENTARY_LOG2SIZE; log2N -= 2, N >>= 2, M <<= 2)
152 |     {
153 |         fwtBatch2Kernel<<<grid, THREAD_N>>>(d_Data, d_Data, N / 4);
154 |     }
155 | 
156 |     fwtBatch1Kernel<<<M, N / 4, N *sizeof(double)>>>(
157 |         d_Data,
158 |         d_Data,
159 |         log2N
160 |     );
161 | }
162 | 
163 | 
164 | ////////////////////////////////////////////////////////////////////////////////
165 | // Modulate two arrays
166 | ////////////////////////////////////////////////////////////////////////////////
167 | __global__ void modulateKernel(double *d_A, double *d_B, int N)
168 | {
169 |     int        tid = blockIdx.x * blockDim.x + threadIdx.x;
170 |     int numThreads = blockDim.x * gridDim.x;
171 |     double     rcpN = 1.0f / (double)N;
172 | 
173 |     for (int pos = tid; pos < N; pos += numThreads)
174 |     {
175 |         d_A[pos] *= d_B[pos] * rcpN;
176 |     }
177 | }
178 | 
179 | //Interface to modulateKernel()
180 | void modulateGPU(double *d_A, double *d_B, int N)
181 | {
182 |     modulateKernel<<<128, 256>>>(d_A, d_B, N);
183 | }
184 | 
185 | 
186 | 
187 | #endif
188 | #endif
189 | 


--------------------------------------------------------------------------------
/include/matrix/fastWalshTransform_kernel_float.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 1993-2012 NVIDIA Corporation.  All rights reserved.
  3 |  *
  4 |  * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 |  * with this source code for terms and conditions that govern your use of
  6 |  * this software. Any use, reproduction, disclosure, or distribution of
  7 |  * this software and related documentation outside the terms of the EULA
  8 |  * is strictly prohibited.
  9 |  *
 10 |  */
 11 | 
 12 | 
 13 | 
 14 | #ifndef FWT_KERNEL_CUH_FLOAT
 15 | #define FWT_KERNEL_CUH_FLOAT
 16 | #ifndef fwt_kernel_cuh_float
 17 | #define fwt_kernel_cuh_float
 18 | 
 19 | 
 20 | 
 21 | ///////////////////////////////////////////////////////////////////////////////
 22 | // Elementary(for vectors less than elementary size) in-shared memory
 23 | // combined radix-2 + radix-4 Fast Walsh Transform
 24 | ///////////////////////////////////////////////////////////////////////////////
 25 | #define ELEMENTARY_LOG2SIZE 11
 26 | 
 27 | __global__ void fwtBatch1Kernel(float *d_Output, float *d_Input, int log2N)
 28 | {
 29 |     const int    N = 1 << log2N;
 30 |     const int base = blockIdx.x << log2N;
 31 | 
 32 |     //(2 ** 11) * 4 bytes == 8KB -- maximum s_data[] size for G80
 33 |     extern __shared__ float s_data[];
 34 |     float *d_Src = d_Input  + base;
 35 |     float *d_Dst = d_Output + base;
 36 | 
 37 |     for (int pos = threadIdx.x; pos < N; pos += blockDim.x)
 38 |     {
 39 |         s_data[pos] = d_Src[pos];
 40 |     }
 41 | 
 42 |     //Main radix-4 stages
 43 |     const int pos = threadIdx.x;
 44 | 
 45 |     for (int stride = N >> 2; stride > 0; stride >>= 2)
 46 |     {
 47 |         int lo = pos & (stride - 1);
 48 |         int i0 = ((pos - lo) << 2) + lo;
 49 |         int i1 = i0 + stride;
 50 |         int i2 = i1 + stride;
 51 |         int i3 = i2 + stride;
 52 | 
 53 |         __syncthreads();
 54 |         float D0 = s_data[i0];
 55 |         float D1 = s_data[i1];
 56 |         float D2 = s_data[i2];
 57 |         float D3 = s_data[i3];
 58 | 
 59 |         float T;
 60 |         T = D0;
 61 |         D0         = D0 + D2;
 62 |         D2         = T - D2;
 63 |         T = D1;
 64 |         D1         = D1 + D3;
 65 |         D3         = T - D3;
 66 |         T = D0;
 67 |         s_data[i0] = D0 + D1;
 68 |         s_data[i1] = T - D1;
 69 |         T = D2;
 70 |         s_data[i2] = D2 + D3;
 71 |         s_data[i3] = T - D3;
 72 |     }
 73 | 
 74 |     //Do single radix-2 stage for odd power of two
 75 |     if (log2N & 1)
 76 |     {
 77 |         __syncthreads();
 78 | 
 79 |         for (int pos = threadIdx.x; pos < N / 2; pos += blockDim.x)
 80 |         {
 81 |             int i0 = pos << 1;
 82 |             int i1 = i0 + 1;
 83 | 
 84 |             float D0 = s_data[i0];
 85 |             float D1 = s_data[i1];
 86 |             s_data[i0] = D0 + D1;
 87 |             s_data[i1] = D0 - D1;
 88 |         }
 89 |     }
 90 | 
 91 |     __syncthreads();
 92 | 
 93 |     for (int pos = threadIdx.x; pos < N; pos += blockDim.x)
 94 |     {
 95 |         d_Dst[pos] = s_data[pos];
 96 |     }
 97 | }
 98 | 
 99 | ////////////////////////////////////////////////////////////////////////////////
100 | // Single in-global memory radix-4 Fast Walsh Transform pass
101 | // (for strides exceeding elementary vector size)
102 | ////////////////////////////////////////////////////////////////////////////////
103 | __global__ void fwtBatch2Kernel(
104 |     float *d_Output,
105 |     float *d_Input,
106 |     int stride
107 | )
108 | {
109 |     const int pos = blockIdx.x * blockDim.x + threadIdx.x;
110 |     const int   N = blockDim.x *  gridDim.x * 4;
111 | 
112 |     float *d_Src = d_Input  + blockIdx.y * N;
113 |     float *d_Dst = d_Output + blockIdx.y * N;
114 | 
115 |     int lo = pos & (stride - 1);
116 |     int i0 = ((pos - lo) << 2) + lo;
117 |     int i1 = i0 + stride;
118 |     int i2 = i1 + stride;
119 |     int i3 = i2 + stride;
120 | 
121 |     float D0 = d_Src[i0];
122 |     float D1 = d_Src[i1];
123 |     float D2 = d_Src[i2];
124 |     float D3 = d_Src[i3];
125 | 
126 |     float T;
127 |     T = D0;
128 |     D0        = D0 + D2;
129 |     D2        = T - D2;
130 |     T = D1;
131 |     D1        = D1 + D3;
132 |     D3        = T - D3;
133 |     T = D0;
134 |     d_Dst[i0] = D0 + D1;
135 |     d_Dst[i1] = T - D1;
136 |     T = D2;
137 |     d_Dst[i2] = D2 + D3;
138 |     d_Dst[i3] = T - D3;
139 | }
140 | 
141 | ////////////////////////////////////////////////////////////////////////////////
142 | // Put everything together: batched Fast Walsh Transform CPU front-end
143 | ////////////////////////////////////////////////////////////////////////////////
144 | void fwtBatchGPU(float *d_Data, int M, int log2N)
145 | {
146 |     const int THREAD_N = 1024;
147 | 
148 |     int N = 1 << log2N;
149 |     dim3 grid((1 << log2N) / (4 * THREAD_N), M, 1);
150 | 
151 |     for (; log2N > ELEMENTARY_LOG2SIZE; log2N -= 2, N >>= 2, M <<= 2)
152 |     {
153 |         fwtBatch2Kernel<<<grid, THREAD_N>>>(d_Data, d_Data, N / 4);
154 |     }
155 | 
156 |     fwtBatch1Kernel<<<M, N / 4, N *sizeof(float)>>>(
157 |         d_Data,
158 |         d_Data,
159 |         log2N
160 |     );
161 | }
162 | 
163 | 
164 | ////////////////////////////////////////////////////////////////////////////////
165 | // Modulate two arrays
166 | ////////////////////////////////////////////////////////////////////////////////
167 | __global__ void modulateKernel(float *d_A, float *d_B, int N)
168 | {
169 |     int        tid = blockIdx.x * blockDim.x + threadIdx.x;
170 |     int numThreads = blockDim.x * gridDim.x;
171 |     float     rcpN = 1.0f / (float)N;
172 | 
173 |     for (int pos = tid; pos < N; pos += numThreads)
174 |     {
175 |         d_A[pos] *= d_B[pos] * rcpN;
176 |     }
177 | }
178 | 
179 | //Interface to modulateKernel()
180 | void modulateGPU(float *d_A, float *d_B, int N)
181 | {
182 |     modulateKernel<<<128, 256>>>(d_A, d_B, N);
183 | }
184 | 
185 | 
186 | 
187 | #endif
188 | #endif
189 | 


--------------------------------------------------------------------------------
/include/matrix/gpuhandle.h:
--------------------------------------------------------------------------------
 1 | #ifndef GPUHANDLE_H
 2 | #define GPUHANDLE_H
 3 | 
 4 | #include "mat_typedef.h"
 5 | #include <curand.h>
 6 | #include <cuda_runtime.h>
 7 | #include <cusparse_v2.h>
 8 | #include <cublas_v2.h>
 9 | #include <ctime>
10 | #include <curand_kernel.h>
11 | 
12 | #define NUM_RND_BLOCKS                      96
13 | #define NUM_RND_THREADS_PER_BLOCK           128
14 | #define NUM_RND_STREAMS                     (NUM_RND_BLOCKS * NUM_RND_THREADS_PER_BLOCK)
15 | 
16 | struct GPUHandle
17 | {
18 | 	static cudaStream_t* streams;
19 | 	static cublasHandle_t cublashandle;
20 | 	static cusparseHandle_t cusparsehandle;
21 | 	static curandGenerator_t curandgenerator;
22 | 	static unsigned int streamcnt;
23 | 	
24 | 	static void Init(int dev_id, unsigned int _streamcnt = 1U);
25 | 	static void Destroy();
26 | 	
27 | 	static curandState_t* devRandStates;
28 | };
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/include/matrix/imatrix.h:
--------------------------------------------------------------------------------
 1 | #ifndef IMATRIX_H
 2 | #define IMATRIX_H
 3 | 
 4 | #include "matrix_utils.h"
 5 | #include "gpuhandle.h"
 6 | #include <stdexcept>
 7 | #define GPU_T(x) (x == Trans::N ? cublasOperation_t::CUBLAS_OP_N : cublasOperation_t::CUBLAS_OP_T)
 8 | #define CUSP_T(x) (x == Trans::N ? CUSPARSE_OPERATION_NON_TRANSPOSE : CUSPARSE_OPERATION_TRANSPOSE)
 9 | #define CPU_CharT(x) (x == Trans::N ? 'N' : 'T')
10 | #define CPU_T(x) (x == Trans::N ? CblasNoTrans : CblasTrans)
11 | 
12 | template<MatMode mode, typename Dtype>
13 | class SparseMat;
14 | template<MatMode mode, typename Dtype>
15 | class DenseMat;
16 | 
17 | template<MatMode mode, typename Dtype>
18 | class IMatrix 
19 | {
20 | public:
21 | 		virtual MatType GetMatType() = 0;
22 | 		virtual ~IMatrix() {}
23 | 		
24 | 		virtual void Serialize(FILE* fid) 
25 | 		{
26 | 			assert(fwrite(&rows, sizeof(size_t), 1, fid) == 1);
27 | 			assert(fwrite(&cols, sizeof(size_t), 1, fid) == 1);
28 | 			assert(fwrite(&count, sizeof(size_t), 1, fid) == 1);
29 | 		}
30 | 		
31 | 		virtual void Deserialize(FILE* fid)
32 | 		{
33 | 			assert(fread(&rows, sizeof(size_t), 1, fid) == 1);
34 | 			assert(fread(&cols, sizeof(size_t), 1, fid) == 1);
35 | 			assert(fread(&count, sizeof(size_t), 1, fid) == 1);
36 | 		}
37 |         
38 |         virtual void Print2Screen() = 0;
39 | 		
40 | 		virtual DenseMat<mode, Dtype>& DenseDerived() 
41 | 		{
42 | 			throw "Can not derive Dense Matrix from CSR Matrix";
43 | 		}
44 | 		
45 | 		virtual const DenseMat<mode, Dtype>& DenseDerived() const 
46 | 		{
47 | 			throw "Can not derive Dense Matrix from CSR Matrix";
48 | 		}
49 | 		
50 | 		virtual SparseMat<mode, Dtype>& SparseDerived()
51 | 		{
52 | 			throw "Can not derive CSR Matrix from Dense Matrix";
53 | 		}
54 | 		
55 | 		virtual const SparseMat<mode, Dtype>& SparseDerived() const 
56 | 		{
57 | 			throw "Can not derive CSR Matrix from Dense Matrix";
58 | 		}
59 | 		
60 | 		size_t rows, cols, count;		
61 | };
62 | 
63 | #endif


--------------------------------------------------------------------------------
/include/matrix/mat_typedef.h:
--------------------------------------------------------------------------------
 1 | #ifndef MAT_TYPEDEF_H
 2 | #define MAT_TYPEDEF_H
 3 | 
 4 | #include <cstddef>
 5 | 
 6 | enum MatMode
 7 | {
 8 | 	CPU = 0,
 9 | 	GPU = 1
10 | };
11 | 
12 | enum class Trans
13 | {
14 | 	N = 0,
15 | 	T = 1
16 | };
17 | 
18 | enum Phase
19 | {
20 |     TRAIN = 0,
21 |     TEST = 1  
22 | };
23 | 
24 | enum MatType
25 | {
26 | 	DENSE,
27 | 	SPARSE
28 | };
29 | 
30 | #define c_uCudaThreadNum 1024
31 | 
32 | const double eps = 1e-8;
33 | 
34 | #endif


--------------------------------------------------------------------------------
/include/matrix/matrix_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef MATRIX_UTILS_H
 2 | #define MATRIX_UTILS_H
 3 | 
 4 | #include "mat_typedef.h"
 5 | #include <cuda_runtime.h>
 6 | #include <mkl.h>
 7 | #include <cublas_v2.h>
 8 | #include <memory>
 9 | #include <cassert>
10 | #include <iostream>
11 | 
12 | template<MatMode mode>
13 | struct MatUtils
14 | {
15 | 	template<typename T>
16 | 	static void DelArr(T*& p);
17 | 
18 | 	template<typename T>
19 | 	static void MallocArr(T*& p, size_t nBytes);
20 | 
21 | 	template<typename T>
22 | 	static void ArrSetZeros(T*& p, size_t nBytes);
23 | };
24 | 
25 | template<>
26 | template<typename T>
27 | void MatUtils<CPU>::ArrSetZeros(T*& p, size_t nBytes)
28 | {
29 | 	if (p)
30 | 		memset(p, 0, nBytes);
31 | }
32 | 
33 | template<>
34 | template<typename T>
35 | void MatUtils<CPU>::DelArr(T*& p)
36 | {
37 | 	if (p)
38 | 	{
39 | 		delete[] p; 
40 | 		p = nullptr;
41 | 	}
42 | }
43 | 
44 | template<>
45 | template<typename T>
46 | void MatUtils<CPU>::MallocArr(T*& p, size_t nBytes)
47 | {
48 | 	if (nBytes)
49 | 		p = (T*) malloc(nBytes);
50 | 	else p = nullptr;
51 | }
52 | 
53 | template<>
54 | template<typename T>
55 | void MatUtils<GPU>::DelArr(T*& p)
56 | {
57 | 	if (p)
58 | 	{		
59 | 		cudaFree(p);
60 | 		p = nullptr;
61 | 	}
62 | }
63 | 
64 | template<>
65 | template<typename T>
66 | void MatUtils<GPU>::MallocArr(T*& p, size_t nBytes)
67 | {
68 | 	if (nBytes)
69 | 	{
70 | 		cudaError_t t = cudaMalloc(&p, nBytes);
71 | 		assert(t != cudaErrorMemoryAllocation);
72 | 	}
73 | 	else p = nullptr;
74 | }
75 | 
76 | 
77 | inline void GetDims(const size_t& lhs_rows, const size_t& lhs_cols, Trans ltrans, 
78 | 					const size_t& rhs_rows, const size_t& rhs_cols, Trans rtrans, 
79 | 					size_t &m, size_t &n, size_t &k)
80 | {
81 | 	m = ltrans == Trans::N ? lhs_rows : lhs_cols;
82 | 	n = rtrans == Trans::N ? rhs_cols : rhs_rows;
83 | 	k = ltrans == Trans::N ? lhs_cols : lhs_rows;
84 | 	assert((rtrans == Trans::N && rhs_rows == k) || (rtrans == Trans::T && rhs_cols == k));
85 | }
86 | 
87 | #endif


--------------------------------------------------------------------------------
/include/matrix/mkl_helper.h:
--------------------------------------------------------------------------------
  1 | #ifndef MKL_HELPER_H
  2 | #define MKL_HELPER_H
  3 | 
  4 | #include <mkl.h>
  5 | 
  6 | inline float MKLHelper_Dot(const MKL_INT n, const float* x, const float* y)
  7 | {
  8 |     return cblas_sdot(n, x, 1, y, 1);
  9 | }
 10 | 
 11 | inline double MKLHelper_Dot(const MKL_INT n, const double* x, const double* y)
 12 | {
 13 |     return cblas_ddot(n, x, 1, y, 1);
 14 | }
 15 | 
 16 | inline CBLAS_INDEX MKLHelper_Amax(const MKL_INT n, const float *x)
 17 | {
 18 |     return cblas_isamax(n, x, 1);
 19 | }
 20 | 
 21 | inline CBLAS_INDEX MKLHelper_Amax(const MKL_INT n, const double *x)
 22 | {
 23 |     return cblas_idamax(n, x, 1);
 24 | }
 25 | 
 26 | inline float MKLHelper_Asum(const MKL_INT n, const float *x)
 27 | {
 28 | 		return cblas_sasum(n, x, 1);
 29 | }
 30 | 
 31 | inline double MKLHelper_Asum(const MKL_INT n, const double *x)
 32 | {
 33 | 		return cblas_dasum(n, x, 1);
 34 | }
 35 | 
 36 | inline float MKLHelper_Norm2(const MKL_INT n, const float *x)
 37 | {
 38 | 		return cblas_snrm2(n, x, 1);
 39 | }
 40 | 
 41 | inline double MKLHelper_Norm2(const MKL_INT n, const double *x)
 42 | {
 43 | 		return cblas_dnrm2(n, x, 1);
 44 | }
 45 | 
 46 | inline void MKLHelper_Ger(const CBLAS_LAYOUT Layout, const MKL_INT m, const MKL_INT n, 
 47 | 						const float alpha, const float *x, const float *y, float *a)
 48 | {
 49 | 	const MKL_INT lda = Layout == CblasRowMajor ? n : m;
 50 | 	cblas_sger(Layout, m, n, alpha, x, 1, y, 1, a, lda);
 51 | }
 52 | 
 53 | inline void MKLHelper_Ger(const CBLAS_LAYOUT Layout, const MKL_INT m, const MKL_INT n, 
 54 | 						const double alpha, const double *x, const double *y, double *a)
 55 | {
 56 | 	const MKL_INT lda = Layout == CblasRowMajor ? n : m;
 57 | 	cblas_dger(Layout, m, n, alpha, x, 1, y, 1, a, lda);
 58 | }
 59 | 
 60 | inline void MKLHelper_Axpy(const MKL_INT n, const float a, const float *x, float *y)
 61 | {
 62 | 	cblas_saxpy(n, a, x, 1, y, 1);
 63 | }
 64 | 
 65 | inline void MKLHelper_Axpy(const MKL_INT n, const double a, const double *x, double *y)
 66 | {
 67 | 	cblas_daxpy(n, a, x, 1, y, 1);
 68 | }
 69 | 
 70 | inline void MKLHelper_Axpby(const MKL_INT n, const float a, const float *x, const float b, float *y)
 71 | {
 72 | 	cblas_saxpby(n, a, x, 1, b, y, 1);
 73 | }
 74 | 
 75 | inline void MKLHelper_Axpby(const MKL_INT n, const double a, const double *x, const double b, double *y)
 76 | {
 77 | 	cblas_daxpby(n, a, x, 1, b, y, 1);
 78 | }
 79 | 
 80 | inline void MKLHelper_Omatadd(char ordering, char transa, char transb, size_t m, size_t n, 
 81 | 							const float alpha, const float * A, size_t lda, 
 82 | 							const float beta, const float * B, size_t ldb, 
 83 | 							float * C, size_t ldc)
 84 | {
 85 | 	mkl_somatadd(ordering, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
 86 | }
 87 | 
 88 | inline void MKLHelper_Omatadd(char ordering, char transa, char transb, size_t m, size_t n, 
 89 | 							const double alpha, const double * A, size_t lda, 
 90 | 							const double beta, const double * B, size_t ldb, 
 91 | 							double * C, size_t ldc)
 92 | {
 93 | 	mkl_domatadd(ordering, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
 94 | }
 95 | 
 96 | inline void MKLHelper_GeMV(const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE trans, 
 97 |                            const MKL_INT m, const MKL_INT n,
 98 |                            const float alpha, const float *a, const MKL_INT lda, const float *x, const MKL_INT incx,
 99 |                            const float beta, float *y, const MKL_INT incy)
100 | {
101 |     cblas_sgemv(Layout, trans, m, n, alpha, a, lda, x, incx, beta, y, incy);   
102 | }
103 | 
104 | inline void MKLHelper_GeMV(const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE trans, 
105 |                            const MKL_INT m, const MKL_INT n,
106 |                            const double alpha, const double *a, const MKL_INT lda, const double *x, const MKL_INT incx,
107 |                            const double beta, double *y, const MKL_INT incy)
108 | {
109 |     cblas_dgemv(Layout, trans, m, n, alpha, a, lda, x, incx, beta, y, incy);   
110 | } 
111 | 
112 | inline void MKLHelper_GeMM(const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE transa, const CBLAS_TRANSPOSE transb, 
113 | 							const MKL_INT m, const MKL_INT n, const MKL_INT k, 
114 | 							const float alpha, const float *a, const MKL_INT lda, const float *b, const MKL_INT ldb, 
115 | 							const float beta, float *c, const MKL_INT ldc)
116 | {
117 | 	cblas_sgemm(Layout, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
118 | }
119 | 
120 | inline void MKLHelper_GeMM(const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE transa, const CBLAS_TRANSPOSE transb, 
121 | 							const MKL_INT m, const MKL_INT n, const MKL_INT k, 
122 | 							const double alpha, const double *a, const MKL_INT lda, const double *b, const MKL_INT ldb, 
123 | 							const double beta, double *c, const MKL_INT ldc)
124 | {
125 | 	cblas_dgemm(Layout, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
126 | }
127 | 
128 | inline void MKLHelper_CSRMM(char trans, MKL_INT m, MKL_INT n, MKL_INT k, float alpha, 
129 | 							char *matdescra, float *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, 
130 | 						    float *b, MKL_INT ldb, float beta, float *c, MKL_INT ldc)
131 | {
132 | 	mkl_scsrmm(&trans, &m, &n, &k, &alpha,
133 | 			matdescra, val, indx, pntrb, pntre, 
134 | 			b, &ldb, &beta, c, &ldc);
135 | }
136 | 
137 | inline void MKLHelper_CSRMM(char trans, MKL_INT m, MKL_INT n, MKL_INT k, double alpha, 
138 | 							char *matdescra, double *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, 
139 | 							double *b, MKL_INT ldb, double beta, double *c, MKL_INT ldc)
140 | {
141 | 	mkl_dcsrmm(&trans, &m, &n, &k, &alpha,
142 | 			matdescra, val, indx, pntrb, pntre, 
143 | 			b, &ldb, &beta, c, &ldc);
144 | }
145 | 
146 | inline void MKLHelper_Sin(const MKL_INT n, float* a, float* y)
147 | {
148 |     vsSin(n, a, y);
149 | }
150 | 
151 | inline void MKLHelper_Sin(const MKL_INT n, double* a, double* y)
152 | {
153 |     vdSin(n, a, y);
154 | }
155 | 
156 | inline void MKLHelper_Cos(const MKL_INT n, float* a, float* y)
157 | {
158 |     vsCos(n, a, y);
159 | }
160 | 
161 | inline void MKLHelper_Cos(const MKL_INT n, double* a, double* y)
162 | {
163 |     vdCos(n, a, y);
164 | }
165 | 
166 | inline void MKLHelper_Exp(const MKL_INT n, float* a, float* y)
167 | {
168 | 	vsExp(n, a, y);
169 | }
170 | 
171 | inline void MKLHelper_Exp(const MKL_INT n, double* a, double* y)
172 | {
173 | 	vdExp(n, a, y);
174 | }
175 | 
176 | inline void MKLHelper_Log(const MKL_INT n, float* a, float* y)
177 | {
178 | 	vsLn(n, a, y);
179 | }
180 | 
181 | inline void MKLHelper_Log(const MKL_INT n, double* a, double* y)
182 | {
183 | 	vdLn(n, a, y);
184 | }
185 | 
186 | inline void MKLHelper_Mul(const MKL_INT n, float* a, float* b, float* y)
187 | {
188 | 	vsMul(n, a, b, y);
189 | }
190 | 
191 | inline void MKLHelper_Mul(const MKL_INT n, double* a, double* b, double* y)
192 | {
193 | 	vdMul(n, a, b, y);
194 | }
195 | 
196 | inline void MKLHelper_Div(const MKL_INT n, float* a, float* b, float* y)
197 | {
198 | 	vsDiv(n, a, b, y);
199 | }
200 | 
201 | inline void MKLHelper_Div(const MKL_INT n, double* a, double* b, double* y)
202 | {
203 | 	vdDiv(n, a, b, y);
204 | }
205 | 
206 | inline void MKLHelper_Sqrt(const MKL_INT n, float* a, float* y)
207 | {
208 | 	vsSqrt(n, a, y);
209 | }
210 | 
211 | inline void MKLHelper_Sqrt(const MKL_INT n, double* a, double* y)
212 | {
213 | 	vdSqrt(n, a, y);
214 | }
215 | 
216 | inline void MKLHelper_InvSqrt(const MKL_INT n, float* a, float* y)
217 | {
218 | 	vsInvSqrt(n, a, y);
219 | }
220 | 
221 | inline void MKLHelper_InvSqrt(const MKL_INT n, double* a, double* y)
222 | {
223 | 	vdInvSqrt(n, a, y);
224 | }
225 | 
226 | inline void MKLHelper_Inv(const MKL_INT n, float* a, float* y)
227 | {
228 | 	vsInv(n, a, y);
229 | }
230 | 
231 | inline void MKLHelper_Inv(const MKL_INT n, double* a, double* y)
232 | {
233 | 	vdInv(n, a, y);
234 | }
235 | 
236 | inline void MKLHelper_Square(const MKL_INT n, float* a, float* y)
237 | {
238 |     vsSqr(n, a, y);
239 | }
240 | 
241 | inline void MKLHelper_Square(const MKL_INT n, double* a, double* y)
242 | {
243 |     vdSqr(n, a, y);
244 | }
245 | 
246 | inline void MKLHelper_PowerX(const MKL_INT n, float* a, float b, float* y)
247 | {
248 |     vsPowx(n, a, b, y);
249 | }
250 | 
251 | inline void MKLHelper_PowerX(const MKL_INT n, double* a, double b, double* y)
252 | {
253 |     vdPowx(n, a, b, y);
254 | }
255 | 
256 | #endif


--------------------------------------------------------------------------------
/include/matrix/sp_data.h:
--------------------------------------------------------------------------------
  1 | #ifndef SP_DATA_H
  2 | #define SP_DATA_H
  3 | 
  4 | #include "matrix_utils.h"
  5 | 
  6 | template<MatMode mode, typename Dtype>
  7 | class SpData
  8 | {
  9 | public:
 10 | 	inline SpData()
 11 | 	{
 12 | 		nnz = len_ptr = nzCap = ptrCap = 0;
 13 | 		val = nullptr;
 14 | 		col_idx = ptr = nullptr;
 15 | 	}
 16 | 	
 17 | 	inline SpData(int newNzCap, int newPtrCap)
 18 | 	{
 19 | 		nnz = len_ptr = 0;
 20 | 		nzCap = newNzCap; 
 21 | 		ptrCap = newPtrCap;
 22 | 		MatUtils<mode>::MallocArr(val, sizeof(Dtype) * nzCap);
 23 |         MatUtils<mode>::MallocArr(col_idx, sizeof(int) * nzCap);
 24 | 		MatUtils<mode>::MallocArr(ptr, sizeof(int) * ptrCap);
 25 | 	}
 26 | 	
 27 | 	void Serialize(FILE* fid)
 28 | 	{
 29 | 		assert(fwrite(&nnz, sizeof(int), 1, fid) == 1);
 30 | 		assert(fwrite(&len_ptr, sizeof(int), 1, fid) == 1);
 31 | 		assert(fwrite(&nzCap, sizeof(int), 1, fid) == 1);
 32 | 		assert(fwrite(&ptrCap, sizeof(int), 1, fid) == 1);
 33 | 	
 34 |         int *p_col_idx = col_idx, *p_ptr = ptr;
 35 |         Dtype* p_val = val;
 36 |         if (mode == GPU)
 37 |         {
 38 |             p_val = new Dtype[nzCap];
 39 |             p_col_idx = new int[nzCap];
 40 |             p_ptr = new int[ptrCap];
 41 |             cudaMemcpy(p_val, val, sizeof(Dtype) * nzCap, cudaMemcpyDeviceToHost);
 42 |             cudaMemcpy(p_col_idx, col_idx, sizeof(int) * nzCap, cudaMemcpyDeviceToHost);
 43 |             cudaMemcpy(p_ptr, ptr, sizeof(int) * ptrCap, cudaMemcpyDeviceToHost);
 44 |         }
 45 | 		assert(fwrite(p_val, sizeof(Dtype), nzCap, fid) == nzCap);
 46 | 		assert(fwrite(p_col_idx, sizeof(int), nzCap, fid) == nzCap);
 47 | 		assert(fwrite(p_ptr, sizeof(int), ptrCap, fid) == ptrCap);
 48 |         if (mode == GPU)
 49 |         {
 50 |             delete[] p_val;
 51 |             delete[] p_col_idx;
 52 |             delete[] p_ptr;
 53 |         }
 54 | 	}
 55 | 	
 56 | 	void Deserialize(FILE* fid)
 57 | 	{
 58 | 		assert(fread(&nnz, sizeof(int), 1, fid) == 1);
 59 | 		assert(fread(&len_ptr, sizeof(int), 1, fid) == 1);
 60 | 		assert(fread(&nzCap, sizeof(int), 1, fid) == 1);
 61 | 		assert(fread(&ptrCap, sizeof(int), 1, fid) == 1);
 62 | 		
 63 | 		MatUtils<mode>::DelArr(val);
 64 |         MatUtils<mode>::DelArr(col_idx);
 65 | 		MatUtils<mode>::DelArr(ptr);
 66 | 		MatUtils<mode>::MallocArr(val, sizeof(Dtype) * nzCap);
 67 |         MatUtils<mode>::MallocArr(col_idx, sizeof(int) * nzCap);
 68 | 		MatUtils<mode>::MallocArr(ptr, sizeof(int) * ptrCap);
 69 | 	
 70 |         int *p_col_idx = col_idx, *p_ptr = ptr;
 71 |         Dtype* p_val = val;
 72 |         if (mode == GPU)
 73 |         {
 74 |             p_val = new Dtype[nzCap];
 75 |             p_col_idx = new int[nzCap];
 76 |             p_ptr = new int[ptrCap];                       
 77 |         }        
 78 | 		assert(fread(p_val, sizeof(Dtype), nzCap, fid) == nzCap);
 79 | 		assert(fread(p_col_idx, sizeof(int), nzCap, fid) == nzCap);
 80 | 		assert(fread(p_ptr, sizeof(int), ptrCap, fid) == ptrCap);
 81 |         if (mode == GPU)
 82 |         {
 83 |             cudaMemcpy(val, p_val, sizeof(Dtype) * nzCap, cudaMemcpyHostToDevice);
 84 |             cudaMemcpy(col_idx, p_col_idx, sizeof(int) * nzCap, cudaMemcpyHostToDevice);
 85 |             cudaMemcpy(ptr, p_ptr, sizeof(int) * ptrCap, cudaMemcpyHostToDevice);
 86 |             delete[] p_val;
 87 |             delete[] p_col_idx;
 88 |             delete[] p_ptr;               
 89 |         }
 90 | 	}
 91 | 
 92 | 	~SpData()
 93 | 	{
 94 | 		MatUtils<mode>::DelArr(val);
 95 |         MatUtils<mode>::DelArr(col_idx);
 96 | 		MatUtils<mode>::DelArr(ptr);
 97 | 	}
 98 | 
 99 | 	Dtype* val;
100 | 	int* col_idx;
101 | 	int* ptr;
102 | 	
103 | 	int nnz;
104 | 	int len_ptr;
105 | 	int nzCap;
106 | 	int ptrCap;
107 | };
108 | 
109 | #endif


--------------------------------------------------------------------------------
/include/matrix/sparse_matrix.h:
--------------------------------------------------------------------------------
  1 | #ifndef SPARSE_MATRIX_H
  2 | #define SPARSE_MATRIX_H
  3 | 
  4 | #include "imatrix.h"
  5 | #include "sp_data.h"
  6 | #include <memory>
  7 | 
  8 | template<MatMode mode, typename Dtype>
  9 | class SparseMat : public IMatrix<mode, Dtype>
 10 | {
 11 | public:
 12 | 	
 13 | };
 14 | 
 15 | 
 16 | template<typename Dtype>
 17 | class SparseMat<CPU, Dtype> : public IMatrix<CPU, Dtype>
 18 | {
 19 | public:
 20 | 		SparseMat();
 21 | 		~SparseMat();
 22 |         template<MatMode otherMode>
 23 | 		SparseMat(SparseMat<otherMode, Dtype>& src)
 24 |         {
 25 |             CopyFrom(src);
 26 |         }
 27 | 		SparseMat(size_t _rows, size_t cols);
 28 | 		inline virtual MatType GetMatType() override
 29 | 		{
 30 | 			return SPARSE;
 31 | 		}
 32 | 		inline virtual SparseMat<CPU, Dtype>& SparseDerived() override
 33 | 		{
 34 | 			return *this;
 35 | 		}
 36 | 		
 37 | 		inline virtual const SparseMat<CPU, Dtype>& SparseDerived() const override
 38 | 		{
 39 | 			return *this;
 40 | 		}
 41 |         
 42 | 		Dtype Asum();
 43 |         
 44 |         virtual void Print2Screen() override;
 45 |         
 46 | 		virtual void Serialize(FILE* fid) override;
 47 | 		virtual void Deserialize(FILE* fid) override;
 48 | 		
 49 | 		void Resize(size_t newRos, size_t newCols);		
 50 | 		void ResizeSp(int newNNZ, int newNPtr); 
 51 | 	
 52 | 		void CopyFrom(SparseMat<CPU, Dtype>& src);
 53 | 		void CopyFrom(SparseMat<GPU, Dtype>& src);
 54 |         
 55 | 		std::shared_ptr< SpData<CPU, Dtype> > data;
 56 | };
 57 | 
 58 | template<typename Dtype>
 59 | class SparseMat<GPU, Dtype> : public IMatrix<GPU, Dtype>
 60 | {
 61 | public:
 62 | 		SparseMat();
 63 | 		~SparseMat();
 64 | 		template<MatMode otherMode>
 65 | 		SparseMat(SparseMat<otherMode, Dtype>& src)
 66 |         {
 67 |             CopyFrom(src);
 68 |         }
 69 | 		SparseMat(size_t _rows, size_t _cols, unsigned _streamid = 0U);
 70 | 		inline virtual MatType GetMatType() override
 71 | 		{
 72 | 			return SPARSE;
 73 | 		}	
 74 | 		inline virtual SparseMat<GPU, Dtype>& SparseDerived() override
 75 | 		{
 76 | 			return *this;
 77 | 		}
 78 | 		
 79 | 		inline virtual const SparseMat<GPU, Dtype>& SparseDerived() const override
 80 | 		{
 81 | 			return *this;
 82 | 		}
 83 |         
 84 |         virtual void Serialize(FILE* fid) override;
 85 | 		virtual void Deserialize(FILE* fid) override;
 86 | 		virtual void Print2Screen() override;
 87 |         
 88 | 		void Resize(size_t newRos, size_t newCols);		
 89 | 		void ResizeSp(int newNNZ, int newNPtr);
 90 | 	
 91 | 		Dtype Asum();
 92 |                 		
 93 | 		void CopyFrom(SparseMat<CPU, Dtype>& src);
 94 | 		void CopyFrom(SparseMat<GPU, Dtype>& src);
 95 | 				
 96 | 		std::shared_ptr< SpData<GPU, Dtype> > data;
 97 | 		unsigned int streamid;
 98 | 		cusparseMatDescr_t descr;		
 99 | };
100 | #endif


--------------------------------------------------------------------------------
/include/matrix/vector.h:
--------------------------------------------------------------------------------
 1 | #ifndef DENSE_VECTOR_H
 2 | #define DENSE_VECTOR_H
 3 | 
 4 | #include "imatrix.h"
 5 | #include <cassert>
 6 | 
 7 | template<MatMode mode, typename Dtype>
 8 | class Vector
 9 | {
10 | public:
11 | };
12 | 
13 | template<typename Dtype>
14 | class Vector<CPU, Dtype>
15 | {
16 | public:
17 | 		~Vector();
18 | 		Vector();		
19 | 		Vector(size_t _count);
20 | 		
21 | 		virtual void Serialize(FILE* fid) 
22 | 		{
23 | 			assert(fwrite(&count, sizeof(size_t), 1, fid) == 1);
24 | 			assert(fwrite(&mem_size, sizeof(size_t), 1, fid) == 1);
25 | 			assert(fwrite(data, sizeof(Dtype), mem_size, fid) == mem_size);
26 | 		}
27 | 		
28 | 		virtual void Deserialize(FILE* fid)
29 | 		{
30 | 			assert(fread(&count, sizeof(size_t), 1, fid) == 1);
31 | 			assert(fread(&mem_size, sizeof(size_t), 1, fid) == 1);
32 | 			MatUtils<CPU>::DelArr(data);
33 | 			MatUtils<CPU>::MallocArr(data, sizeof(Dtype) * mem_size);
34 | 			assert(fread(data, sizeof(Dtype), mem_size, fid) == mem_size);
35 | 		}
36 | 		
37 | 		void Resize(size_t _count);
38 | 		void Fill(Dtype scalar);
39 | 		
40 | 		Dtype* data;
41 | 		size_t count, mem_size;	
42 | };
43 | 
44 | template<typename Dtype>
45 | class Vector<GPU, Dtype>
46 | {
47 | public:
48 | 		~Vector();
49 | 		Vector();
50 | 		Vector(size_t _count, unsigned int _streamid = 0U);
51 |         
52 | 		virtual void Serialize(FILE* fid) 
53 | 		{
54 | 			assert(fwrite(&count, sizeof(size_t), 1, fid) == 1);
55 | 			assert(fwrite(&mem_size, sizeof(size_t), 1, fid) == 1);
56 |             Dtype* buf = new Dtype[mem_size];
57 |             cudaMemcpy(buf, data, sizeof(Dtype) * mem_size, cudaMemcpyDeviceToHost);
58 | 			assert(fwrite(buf, sizeof(Dtype), mem_size, fid) == mem_size);
59 |             delete[] buf;
60 | 		}
61 | 		
62 | 		virtual void Deserialize(FILE* fid)
63 | 		{
64 | 			assert(fread(&count, sizeof(size_t), 1, fid) == 1);
65 | 			assert(fread(&mem_size, sizeof(size_t), 1, fid) == 1);
66 | 			MatUtils<CPU>::DelArr(data);
67 | 			MatUtils<CPU>::MallocArr(data, sizeof(Dtype) * mem_size);
68 |             Dtype* buf = new Dtype[mem_size];	        
69 | 			assert(fread(buf, sizeof(Dtype), mem_size, fid) == mem_size);
70 |             cudaMemcpy(data, buf, sizeof(Dtype) * mem_size, cudaMemcpyHostToDevice);
71 |             delete[] buf;
72 | 		}
73 |         
74 | 		void Resize(size_t _count);
75 | 		void Fill(Dtype scalar);
76 | 		
77 | 		void CopyFrom(Vector<CPU, Dtype>& src);
78 | 		
79 | 		Dtype* data;
80 | 		size_t count, mem_size;	
81 | 		unsigned streamid;		
82 | };
83 | 
84 | #endif


--------------------------------------------------------------------------------
/include/net/abs_criterion_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef ABS_CRITERION_LAYER_H
 2 | #define ABS_CRITERION_LAYER_H
 3 | 
 4 | #include "i_criterion_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class ABSCriterionLayer : public ICriterionLayer<mode, Dtype>
 8 | {
 9 | public:
10 | 			ABSCriterionLayer(std::string _name, PropErr _properr = PropErr::T)
11 |                 : ABSCriterionLayer(_name, 1.0, _properr) {}
12 |                 
13 |             ABSCriterionLayer(std::string _name, Dtype _lambda, PropErr _properr = PropErr::T)
14 |                 : ICriterionLayer<mode, Dtype>(_name, _lambda, _properr) 
15 |             {
16 |                 this->grad = new DenseMat<mode, Dtype>();
17 |             }
18 |             
19 |             virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override
20 |             {
21 |                 assert(operands.size() == 2);
22 |                 
23 |                 auto& node_diff = this->grad->DenseDerived();			
24 |                 node_diff.GeaM(1.0, Trans::N, operands[0]->state->DenseDerived(), -1.0, Trans::N, operands[1]->state->DenseDerived());
25 |                 this->loss = node_diff.Asum();
26 |             }
27 |                                                 			
28 | 			virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override
29 |             {
30 |                 throw std::runtime_error("not impltemented");
31 |             }
32 | };
33 | 
34 | #endif


--------------------------------------------------------------------------------
/include/net/avg_rank_criterion_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef AVG_RANK_CRITERION_LAYER_H
 2 | #define AVG_RANK_CRITERION_LAYER_H
 3 | 
 4 | #include "i_criterion_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class AvgRankCriterionLayer : public ICriterionLayer<mode, Dtype>
 8 | {
 9 | public:
10 | 			AvgRankCriterionLayer(std::string _name, RankOrder _order)
11 |                 : ICriterionLayer<mode, Dtype>(_name, PropErr::N), order(_order) {}
12 |                         
13 |             static std::string str_type()
14 |             {
15 |                 return "AverageRank"; 
16 |             }
17 |             
18 |             virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override
19 |             {
20 |                 auto& pred = operands[0]->state->DenseDerived();
21 |                 auto& labels = operands[1]->state->SparseDerived();
22 |                 
23 |                 this->loss = LossFunc<mode, Dtype>::GetAverageRank(pred, labels, order);
24 |             }
25 |             
26 |             virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override
27 |             {
28 |                 throw std::runtime_error("no grad in this layer");
29 |             }      
30 |             
31 |             RankOrder order;            
32 | };
33 | 
34 | #endif


--------------------------------------------------------------------------------
/include/net/c_add_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef C_ADD_LAYER_H
 2 | #define C_ADD_LAYER_H
 3 | 
 4 | #include "i_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class CAddLayer : public ILayer<mode, Dtype>
 8 | {
 9 | public:
10 |     CAddLayer(std::string _name, PropErr _properr = PropErr::T)
11 |         : ILayer<mode, Dtype>(_name, _properr) 
12 |     {
13 |         this->state = new DenseMat<mode, Dtype>();
14 | 		this->grad = new DenseMat<mode, Dtype>();
15 |     }
16 |     
17 |     static std::string str_type()
18 |     {
19 |         return "CAdd"; 
20 |     }
21 |     
22 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override
23 |     {
24 |         assert(operands.size());
25 |         auto& cur_output = this->state->DenseDerived();
26 |         
27 |         for (size_t i = 0; i < operands.size(); ++i)
28 |         {
29 |             auto& prev_state = operands[i]->state->DenseDerived();
30 |             
31 |             if (i == 0)
32 |                 cur_output.CopyFrom(prev_state);
33 |             else 
34 |                 cur_output.Axpy(1.0, prev_state);    
35 |         }            
36 |     }    
37 |     
38 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override
39 |     {
40 |         assert(cur_idx >= 0 && cur_idx < operands.size());
41 |         auto& cur_grad = this->grad->DenseDerived();                
42 | 		auto& prev_grad = operands[cur_idx]->grad->DenseDerived();
43 | 		        
44 |         if (beta == 0)
45 |             prev_grad.CopyFrom(cur_grad);
46 |         else
47 |             prev_grad.Axpby(1.0, cur_grad, beta);
48 |     }
49 | };
50 | 
51 | #endif


--------------------------------------------------------------------------------
/include/net/c_mul_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef C_MUL_LAYER_H
 2 | #define C_MUL_LAYER_H
 3 | 
 4 | #include "i_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class CMulLayer : public ILayer<mode, Dtype>
 8 | {
 9 | public:
10 |     CMulLayer(std::string _name, PropErr _properr = PropErr::T)
11 |         : ILayer<mode, Dtype>(_name, _properr) 
12 |     {
13 |         this->state = new DenseMat<mode, Dtype>();
14 | 		this->grad = new DenseMat<mode, Dtype>();
15 |     }
16 |     
17 |     static std::string str_type()
18 |     {
19 |         return "CMul"; 
20 |     }
21 |     
22 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override
23 |     {
24 |         assert(operands.size() > 1);
25 |         
26 |         auto& cur_output = this->state->DenseDerived();
27 |         cur_output.EleWiseMul(operands[0]->state->DenseDerived(), operands[1]->state->DenseDerived());
28 |         
29 |         for (size_t i = 2; i < operands.size(); ++i)
30 |             cur_output.EleWiseMul(operands[i]->state->DenseDerived());        
31 |     }
32 |     
33 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override
34 |     {
35 |         assert(operands.size() > 1);
36 |         
37 |         auto& cur_grad = this->grad->DenseDerived();
38 |         
39 |         auto& prev_grad = beta == 0 ? operands[cur_idx]->grad->DenseDerived() : buf;
40 |         
41 |         prev_grad.CopyFrom(cur_grad);
42 |         
43 |         for (size_t i = 0; i < operands.size(); ++i)
44 |             if (i != cur_idx)
45 |                 prev_grad.EleWiseMul(operands[i]->state->DenseDerived());
46 |                 
47 |         if (beta != 0)
48 |             operands[cur_idx]->grad->DenseDerived().Axpby(1.0, prev_grad, beta);
49 |     }    
50 |     
51 |     DenseMat<mode, Dtype> buf;
52 | };
53 | 
54 | #endif


--------------------------------------------------------------------------------
/include/net/classnll_criterion_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef CLASSNLL_CRITERION_LAYER_H
 2 | #define CLASSNLL_CRITERION_LAYER_H
 3 | 
 4 | #include "i_criterion_layer.h"
 5 | #include "dense_matrix.h"
 6 | #include "sparse_matrix.h"
 7 | #include "loss_func.h"
 8 | 
 9 | template<MatMode mode, typename Dtype>
10 | class ClassNLLCriterionLayer : public ICriterionLayer<mode, Dtype>
11 | {
12 | public:
13 | 			ClassNLLCriterionLayer(std::string _name, bool _need_softmax, PropErr _properr = PropErr::T)
14 |                 : ClassNLLCriterionLayer<mode, Dtype>(_name, _need_softmax, 1.0, _properr) {}
15 |                 
16 | 			ClassNLLCriterionLayer(std::string _name, bool _need_softmax, Dtype _lambda, PropErr _properr = PropErr::T)
17 | 				 : ICriterionLayer<mode, Dtype>(_name, _lambda, _properr), need_softmax(_need_softmax)
18 |             {
19 |                 this->grad = new DenseMat<mode, Dtype>();
20 |             }
21 |             
22 |             static std::string str_type()
23 |             {
24 |                 return "ClassNLL"; 
25 |             }
26 |             
27 |             virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override
28 |             {
29 |                 auto& top = this->grad->DenseDerived();
30 |                 top.CopyFrom(operands[0]->state->DenseDerived());
31 |                 if (need_softmax)
32 |                     top.Softmax();
33 |                 auto& labels = operands[1]->state->SparseDerived();
34 |                 
35 |                 this->loss = LossFunc<mode, Dtype>::GetLogLoss(top, labels);
36 |                 
37 |                 if (need_softmax)
38 |                 {
39 |                     top.Axpy(-1.0, labels); // calc grad
40 |                     top.Scale(1.0 / top.rows); // normalize by batch size
41 |                 } else 
42 |                 {   
43 |                     top.Inv();
44 |                     top.EleWiseMul(labels);
45 |                     top.Scale(-1.0 / top.rows); // normalize by batch size
46 |                 }
47 |             }
48 |             
49 |             virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override
50 |             {
51 |                 assert(operands.size() == 2 && cur_idx == 0);
52 |                 
53 |                 auto& prev_grad = operands[0]->grad->DenseDerived();
54 |                 auto& cur_grad = this->grad->DenseDerived();
55 |                 if (beta == 0)
56 |                     prev_grad.CopyFrom(cur_grad);
57 |                 else
58 |                     prev_grad.Axpby(1.0, cur_grad, beta);                
59 |             }              
60 |                         
61 | protected:
62 |             const bool need_softmax;
63 | };
64 | 
65 | #endif


--------------------------------------------------------------------------------
/include/net/col_slice_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef COL_SLICE_LAYER_H
 2 | #define COL_SLICE_LAYER_H
 3 | 
 4 | #include "i_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class ColSliceLayer : public ILayer<mode, Dtype>
 8 | {
 9 | public:
10 |     ColSliceLayer(std::string _name, PropErr _properr = PropErr::T)
11 |         : ILayer<mode, Dtype>(_name, _properr) 
12 |     {
13 |         this->state = new DenseMat<mode, Dtype>();
14 | 		this->grad = new DenseMat<mode, Dtype>();
15 |     }
16 |     
17 |     static std::string str_type()
18 |     {
19 |         return "ColSlice"; 
20 |     }
21 |     
22 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override
23 |     {        
24 |         assert(operands.size() >= 2);
25 |         auto& cur_output = this->state->DenseDerived();
26 |         
27 |         if ((int)operands.size() == 2)
28 |         {
29 |             int col_idx = GetColIdx(operands[1]);
30 |             
31 |             auto& prev_output = operands[0]->state->DenseDerived();
32 |             cur_output.GetColsFrom(prev_output, col_idx, 1);
33 |         } else
34 |         {
35 |             throw std::runtime_error("only support single column selection");
36 |         }       
37 |     }    
38 |     
39 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override
40 |     {
41 |         assert(cur_idx == 0);
42 |         auto& cur_grad = this->grad->DenseDerived();                        
43 | 		auto& prev_grad = operands[cur_idx]->grad->DenseDerived();
44 |         
45 |         if ((int)operands.size() == 2)
46 |         {
47 |             int col_idx = GetColIdx(operands[1]);
48 |             prev_grad.SubmatAdd(0, col_idx, cur_grad, beta);
49 |         } else
50 |         {
51 |             throw std::runtime_error("only support single column selection");
52 |         }
53 |     }
54 |     
55 | protected:
56 | 
57 |     inline int GetColIdx(ILayer<mode, Dtype>* op)
58 |     {
59 |         auto& col_selected = op->state->SparseDerived();
60 |         assert(col_selected.data->nnz == 1);
61 |         return col_selected.data->col_idx[0];
62 |     }
63 | };
64 | 
65 | #endif


--------------------------------------------------------------------------------
/include/net/concat_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef CONCAT_LAYER_H
 2 | #define CONCAT_LAYER_H
 3 | 
 4 | #include "i_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class ConcatLayer : public ILayer<mode, Dtype>
 8 | {
 9 | public:
10 |     ConcatLayer(std::string _name, PropErr _properr = PropErr::T);        
11 |     
12 |     static std::string str_type()
13 |     {
14 |         return "Concat"; 
15 |     }
16 |     
17 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override;
18 |     
19 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override;
20 |     
21 | protected:
22 | 
23 |     DenseMat<mode, Dtype> buf;    
24 | };
25 | 
26 | 
27 | #endif


--------------------------------------------------------------------------------
/include/net/const_scalar_param.h:
--------------------------------------------------------------------------------
 1 | #ifndef CONST_SCALAR_PARAM
 2 | #define CONST_SCALAR_PARAM
 3 | 
 4 | #include "i_param.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class ConstScalarParam : public IConstParam<mode, Dtype>
 8 | {
 9 | public:
10 |     ConstScalarParam(std::string _name, Dtype _a, Dtype _b)
11 |         : IConstParam<mode, Dtype>(_name), a(_a), b(_b) {}
12 |     
13 |     virtual void InitConst(void* side_info) override {}
14 |        
15 |     virtual void ResetOutput(const IMatrix<mode, Dtype>* input, DenseMat<mode, Dtype>* output) override
16 |     {
17 |         output->Resize(input->rows, input->cols);        
18 |     }             
19 |     
20 |     virtual void UpdateOutput(IMatrix<mode, Dtype>* input, DenseMat<mode, Dtype>* output, Dtype beta, Phase phase) override
21 |     {
22 |         output->Axpby(a, input->DenseDerived(), beta);
23 |         output->Add(b);    
24 |     }
25 |     
26 |     virtual void UpdateGradInput(DenseMat<mode, Dtype>* gradInput, DenseMat<mode, Dtype>* gradOutput, Dtype beta) override
27 |     {
28 |         gradInput->Axpby(a, *gradOutput, beta);    
29 |     }
30 |     
31 |     const Dtype a, b;
32 | };
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/include/net/err_cnt_criterion_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef ERR_CNT_CRITERION_LAYER_H
 2 | #define ERR_CNT_CRITERION_LAYER_H
 3 | 
 4 | #include "i_criterion_layer.h"
 5 | #include "dense_matrix.h"
 6 | #include "sparse_matrix.h"
 7 | #include "loss_func.h"
 8 | 
 9 | template<MatMode mode, typename Dtype>
10 | class ErrCntCriterionLayer : public ICriterionLayer<mode, Dtype>
11 | {
12 | public:
13 | 			ErrCntCriterionLayer(std::string _name)
14 |                 : ICriterionLayer<mode, Dtype>(_name, PropErr::N)
15 |                 {
16 |                     
17 |                 }           
18 |                      
19 |             static std::string str_type()
20 |             {
21 |                 return "ErrCnt"; 
22 |             }
23 |             
24 | 			virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override
25 |             {
26 |                 auto& pred = operands[0]->state->DenseDerived();
27 |                 auto& labels = operands[1]->state->SparseDerived();          
28 |                 this->loss = LossFunc<mode, Dtype>::GetErrCnt(pred, labels);
29 |             }
30 |             
31 |             virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override
32 |             {
33 |                 throw std::runtime_error("no grad in this layer");
34 |             }                
35 |                         
36 | protected:
37 | };
38 | 
39 | #endif


--------------------------------------------------------------------------------
/include/net/exp_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef EXP_LAYER_H
 2 | #define EXP_LAYER_H
 3 | 
 4 | #include "i_act_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class ExpLayer : public IActLayer<mode, Dtype> 
 8 | {
 9 | public:
10 |     
11 |     ExpLayer(std::string _name, WriteType _wt = WriteType::OUTPLACE, PropErr _properr = PropErr::T)
12 |             : IActLayer<mode, Dtype>(_name, _wt, _properr) {}
13 | 
14 |     static std::string str_type()
15 |     {
16 |         return "Exp"; 
17 |     }
18 | 
19 |     virtual void Act(DenseMat<mode, Dtype>& prev_out, DenseMat<mode, Dtype>& cur_out) override
20 |     {
21 |         cur_out.Exp(prev_out);
22 |     }
23 |     
24 |     virtual void Derivative(DenseMat<mode, Dtype>& dst, DenseMat<mode, Dtype>& prev_output, 
25 |                             DenseMat<mode, Dtype>& cur_output, DenseMat<mode, Dtype>& cur_grad, Dtype beta) override
26 |     {
27 |         assert(beta == 0);
28 |         
29 |         dst.CopyFrom(cur_grad);
30 |         dst.EleWiseMul(cur_output);
31 |     }                                 
32 | };
33 | 
34 | #endif


--------------------------------------------------------------------------------
/include/net/gaussian_ll_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef GAUSSIAN_LL_LAYER_H
 2 | #define GAUSSIAN_LL_LAYER_H
 3 | 
 4 | #include "i_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class GaussianLLLayer : public ILayer<mode, Dtype> 
 8 | {
 9 | public:
10 |     GaussianLLLayer(std::string _name, PropErr _properr = PropErr::T);
11 |     
12 |     static std::string str_type()
13 |     {
14 |         return "GaussianLL"; 
15 |     }
16 |     
17 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override;
18 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override;
19 |     
20 | protected:
21 | 
22 |     DenseMat<mode, Dtype> buffer, diff;    
23 | };
24 | 
25 | #endif


--------------------------------------------------------------------------------
/include/net/general_loss_criterion_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef GENERAL_LOSS_CRITERION_LAYER_H
 2 | #define GENERAL_LOSS_CRITERION_LAYER_H
 3 | 
 4 | #include "i_criterion_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class GeneralLossCriterionLayer : public ICriterionLayer<mode, Dtype>
 8 | {
 9 | public:
10 |             GeneralLossCriterionLayer(std::string _name, PropErr _properr = PropErr::T)
11 |                 : GeneralLossCriterionLayer(_name, 1.0, _properr) {}
12 |                 
13 | 			GeneralLossCriterionLayer(std::string _name, Dtype _lambda, PropErr _properr = PropErr::T)
14 |                 : ICriterionLayer<mode, Dtype>(_name, _lambda, _properr)
15 |             {
16 |                 
17 |             }
18 |             
19 |             static std::string str_type()
20 |             {
21 |                 return "GeneralLossCriterion"; 
22 |             }
23 |             
24 | 			virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override
25 |             {		
26 |                 assert(operands.size() == 1);
27 |                 this->loss = operands[0]->state->DenseDerived().Sum() * this->lambda;
28 |             }
29 |             
30 | 			virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override
31 |             {
32 |                 assert(operands.size() == 1 && cur_idx == 0);
33 |                 auto& prev_grad = operands[0]->grad->DenseDerived();
34 |                 
35 |                 if (beta == 0)
36 |                     prev_grad.Fill(this->lambda / operands[0]->state->rows);
37 |                 else{
38 |                     prev_grad.Scale(beta);
39 |                     prev_grad.Add(this->lambda / operands[0]->state->rows);
40 |                 }
41 |             }    
42 | };
43 | 
44 | #endif


--------------------------------------------------------------------------------
/include/net/global_sum_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef GLOBAL_SUM_LAYER_H
 2 | #define GLOBAL_SUM_LAYER_H
 3 | 
 4 | #include "i_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class GlobalSumLayer : public ILayer<mode, Dtype>
 8 | {
 9 | public:
10 |     GlobalSumLayer(std::string _name, PropErr _properr = PropErr::T);
11 | 
12 |     static std::string str_type()
13 |     {
14 |         return "GlobalPool"; 
15 |     }
16 |     
17 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override;    
18 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override;    
19 |     
20 | protected:
21 | 
22 |     DenseMat<mode, Dtype> buf;    
23 | };
24 | 
25 | #endif


--------------------------------------------------------------------------------
/include/net/graph_pool_param.h:
--------------------------------------------------------------------------------
 1 | #ifndef GRAPH_POOL_PARAM_H
 2 | #define GRAPH_POOL_PARAM_H
 3 | 
 4 | #include "msg_pass_param.h"
 5 | #include "graph_struct.h"
 6 | #include "sparse_matrix.h"
 7 | 
 8 | template<MatMode mode, typename Dtype>
 9 | class NodeAvgPoolParam : public IMessagePassParam<mode, Dtype>
10 | {
11 | public:
12 | 		NodeAvgPoolParam(std::string _name)
13 |             : IMessagePassParam<mode, Dtype>(_name) {} 
14 |             
15 | protected:
16 |         virtual void InitCPUWeight(GraphStruct* graph) override;
17 | };
18 | 
19 | template<MatMode mode, typename Dtype>
20 | class NodeMaxPoolParam; 
21 | 
22 | template<typename Dtype>
23 | class NodeMaxPoolParam<CPU, Dtype> : public IConstParam<CPU, Dtype>
24 | {
25 | public:
26 |         NodeMaxPoolParam(std::string _name)
27 |             : IConstParam<CPU, Dtype>(_name) { max_index.clear(); }
28 | 
29 |         virtual void InitConst(void* side_info) override
30 |         {
31 |             graph = static_cast<GraphStruct*>(side_info); 
32 |         }
33 |         		
34 |         virtual void ResetOutput(const IMatrix<CPU, Dtype>* input, DenseMat<CPU, Dtype>* output) override
35 |         {
36 |             output->Zeros(graph->num_nodes, input->cols);
37 |         }
38 |              				 		
39 | 		virtual void UpdateOutput(IMatrix<CPU, Dtype>* input, DenseMat<CPU, Dtype>* output, Dtype beta, Phase phase) override;
40 |         
41 |         virtual void UpdateGradInput(DenseMat<CPU, Dtype>* gradInput, DenseMat<CPU, Dtype>* gradOutput, Dtype beta) override;                     
42 | protected:
43 |         GraphStruct* graph;
44 |         std::vector<int> max_index;                
45 | };
46 | 
47 | template<typename Dtype>
48 | class NodeMaxPoolParam<GPU, Dtype> : public IConstParam<GPU, Dtype>
49 | {
50 | public:
51 |     
52 | };
53 | 
54 | #endif


--------------------------------------------------------------------------------
/include/net/graph_struct.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef GRAPH_STRUCT_H
  3 | #define GRAPH_STRUCT_H
  4 | 
  5 | #include <vector>
  6 | #include <map>
  7 | #include <iostream>
  8 | #include <cassert>
  9 | 
 10 | template<typename T>
 11 | class LinkedTable
 12 | {
 13 | public:
 14 | 		LinkedTable()
 15 | 		{
 16 | 			n = ncap = 0;
 17 | 			head.clear();
 18 | 		}
 19 | 		
 20 | 		inline void AddEntry(int head_id, T content)
 21 | 		{			
 22 | 			if (head_id >= n)
 23 | 			{				
 24 | 				if (head_id + 1 > ncap)
 25 | 				{
 26 | 					ncap = std::max(ncap * 2, head_id + 1);
 27 | 					head.resize(ncap);	
 28 | 					for (int i = n; i < head_id + 1; ++i)
 29 | 						head[i].clear();
 30 | 				}
 31 | 				n = head_id + 1;
 32 | 			}
 33 | 			
 34 | 			head[head_id].push_back(content);
 35 | 		}
 36 | 		
 37 | 		inline void Resize(int new_n)
 38 | 		{
 39 | 			if (new_n > ncap)
 40 | 			{
 41 | 				ncap = std::max(ncap * 2, new_n);
 42 | 				head.resize(ncap);
 43 | 			}
 44 | 			n = new_n;
 45 | 			for (size_t i = 0; i < head.size(); ++i)
 46 | 				head[i].clear();
 47 | 		}
 48 | 		
 49 | 		int n;
 50 | 		std::vector< std::vector<T> > head;
 51 | private:
 52 | 		int ncap;		
 53 | };
 54 | 
 55 | class GraphStruct
 56 | {
 57 | public:
 58 | 	GraphStruct()
 59 | 	{
 60 | 		out_edges = new LinkedTable< std::pair<int, int> >();
 61 |         in_edges = new LinkedTable< std::pair<int, int> >();
 62 | 		subgraph = new LinkedTable< int >();
 63 |         edge_list.clear();
 64 | 	}
 65 |     
 66 | 	~GraphStruct()
 67 | 	{
 68 | 		delete out_edges;
 69 |         delete in_edges;
 70 | 		delete subgraph;
 71 | 	}
 72 | 	
 73 | 	inline void AddEdge(int idx, int x, int y)
 74 | 	{
 75 |         out_edges->AddEntry(x, std::pair<int, int>(idx, y));
 76 |         in_edges->AddEntry(y, std::pair<int, int>(idx, x));         
 77 | 		num_edges++;
 78 |         edge_list.push_back(std::make_pair(x, y));
 79 |         assert(num_edges == edge_list.size());
 80 |         assert(num_edges - 1 == (unsigned)idx);
 81 | 	}
 82 | 	
 83 | 	inline void AddNode(int subg_id, int n_idx)
 84 | 	{
 85 | 		subgraph->AddEntry(subg_id, n_idx);
 86 | 	}
 87 | 	
 88 | 	inline void Resize(unsigned _num_subgraph, unsigned _num_nodes = 0)
 89 | 	{
 90 | 		num_nodes = _num_nodes;
 91 | 		num_edges = 0;
 92 |         edge_list.clear();
 93 | 		num_subgraph = _num_subgraph;
 94 | 		
 95 | 		in_edges->Resize(num_nodes);
 96 |         out_edges->Resize(num_nodes);
 97 | 		subgraph->Resize(num_subgraph);
 98 | 	}
 99 | 	
100 | 	LinkedTable< std::pair<int, int> > *out_edges, *in_edges;
101 | 	LinkedTable< int >* subgraph;
102 | 	std::vector< std::pair<int, int> > edge_list;
103 |     
104 | 	unsigned num_nodes, num_edges, num_subgraph;	
105 | };
106 | 
107 | #endif


--------------------------------------------------------------------------------
/include/net/i_act_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef I_ACT_LAYER_H
 2 | #define I_ACT_LAYER_H
 3 | 
 4 | #include "i_layer.h"
 5 | 
 6 | enum class WriteType
 7 | {
 8 | 	INPLACE = 0,
 9 | 	OUTPLACE = 1	
10 | };
11 | 
12 | template<MatMode mode, typename Dtype>
13 | class IActLayer : public ILayer<mode, Dtype>
14 | {
15 | public:
16 | 	
17 | 	IActLayer(std::string _name, WriteType _wt, PropErr _properr = PropErr::T) 
18 |         : ILayer<mode, Dtype>(_name, _properr), wt(_wt)
19 |     {
20 |         this->state = new DenseMat<mode, Dtype>();
21 |         this->grad = new DenseMat<mode, Dtype>();
22 |     }
23 |     
24 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override
25 |     {
26 |         assert(operands.size() == 1);
27 |         auto* prev_state = operands[0]->state;
28 |         
29 |         if (wt == WriteType::INPLACE)
30 |             this->state = prev_state;
31 |         else 
32 |             this->state->DenseDerived().Resize(prev_state->rows, prev_state->cols);
33 |         
34 |         Act(prev_state->DenseDerived(), this->state->DenseDerived());             
35 |     }
36 |     
37 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override
38 |     {
39 |         assert(operands.size() == 1 && cur_idx == 0);
40 |         
41 |         auto& prev_grad = operands[0]->grad->DenseDerived();			
42 | 		auto& cur_grad = this->grad->DenseDerived();            
43 |         auto& prev_output = operands[0]->state->DenseDerived();
44 | 		auto& cur_output = this->state->DenseDerived();
45 |                                 
46 |         Derivative(prev_grad, prev_output, cur_output, cur_grad, beta);        
47 |     }
48 |     
49 |     virtual void Act(DenseMat<mode, Dtype>& prev_out, DenseMat<mode, Dtype>& cur_out) = 0;
50 |     virtual void Derivative(DenseMat<mode, Dtype>& dst, DenseMat<mode, Dtype>& prev_output, 
51 |                             DenseMat<mode, Dtype>& cur_output, DenseMat<mode, Dtype>& cur_grad, Dtype beta) = 0;
52 |                                     
53 | 	WriteType wt;
54 | };
55 | 
56 | #endif


--------------------------------------------------------------------------------
/include/net/i_criterion_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef I_CRITERION_LAYER_H
 2 | #define I_CRITERION_LAYER_H
 3 | 
 4 | #include "i_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class ICriterionLayer : public ILayer<mode, Dtype>
 8 | {
 9 | public:
10 | 		ICriterionLayer(std::string _name, PropErr _properr = PropErr::T) 
11 |             : ICriterionLayer<mode, Dtype>(_name, 1.0, _properr) {}
12 |             
13 | 		ICriterionLayer(std::string _name, Dtype _lambda, PropErr _properr = PropErr::T) 
14 |             : ILayer<mode, Dtype>(_name, _properr), lambda(_lambda), loss(0.0) {}
15 |                
16 | 		Dtype GetLoss()
17 |         {
18 |             return loss;    
19 |         }			
20 |         
21 |         virtual bool IsSupervised() override
22 |         {
23 |             return true;
24 |         }
25 |         
26 |         Dtype lambda;
27 |         
28 | protected:        
29 |         Dtype loss;
30 | };
31 | 
32 | #endif


--------------------------------------------------------------------------------
/include/net/i_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef ILAYER_H
 2 | #define ILAYER_H
 3 | 
 4 | #include "imatrix.h"
 5 | #include <vector>
 6 | 
 7 | enum class PropErr
 8 | {
 9 | 	N = 0,
10 | 	T = 1	
11 | };
12 | 
13 | template<MatMode mode, typename Dtype>
14 | class NNGraph;
15 | 
16 | template<MatMode mode, typename Dtype>
17 | class ILayer
18 | {
19 | public:
20 |     ILayer(std::string _name, PropErr _properr = PropErr::T) 
21 |             : name(_name), properr(_properr)
22 |             {
23 |                 this->state = this->grad = nullptr;
24 |             }
25 |             
26 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) = 0;    
27 |     
28 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) = 0;
29 |     
30 |     virtual bool HasParam()
31 |     {
32 |         return false;
33 |     }
34 |     
35 |     virtual bool IsSupervised()
36 |     {
37 |         return false;
38 |     }                
39 |                     
40 |     std::string name;
41 |     PropErr properr;
42 |     IMatrix<mode, Dtype>* state, *grad;
43 | };
44 | 
45 | template<MatMode mode, typename Dtype>
46 | class IParametric
47 | {
48 | public:
49 |         virtual void AccDeriv(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx) = 0;
50 | };
51 | 
52 | #endif


--------------------------------------------------------------------------------
/include/net/i_param.h:
--------------------------------------------------------------------------------
  1 | #ifndef IPARAM_H
  2 | #define IPARAM_H
  3 | 
  4 | #include "dense_matrix.h"
  5 | #include <string>
  6 | #include <map>
  7 | 
  8 | enum class BiasOption
  9 | {
 10 | 	NONE,
 11 | 	BIAS
 12 | };
 13 | 
 14 | template<MatMode mode, typename Dtype>
 15 | struct PP
 16 | {
 17 |         PP(){}
 18 |         
 19 |         PP(size_t rows, size_t cols)
 20 |         {
 21 |             value.Resize(rows, cols);
 22 |             grad.Resize(rows, cols);
 23 |         }
 24 |         
 25 |         DenseMat<mode, Dtype> value, grad;
 26 | };
 27 | 
 28 | template<MatMode mode, typename Dtype>
 29 | class IParam
 30 | {
 31 | public:
 32 |         IParam() {}
 33 | 		IParam(std::string _name)
 34 |             : name(_name)
 35 | 		{
 36 | 		}
 37 | 		
 38 | 		virtual void Serialize(FILE* fid) = 0;
 39 | 		
 40 | 		virtual void Deserialize(FILE* fid) = 0; 
 41 |         
 42 |         virtual bool IsDiff() = 0;
 43 |         
 44 |         virtual void UpdateOutput(IMatrix<mode, Dtype>* input, DenseMat<mode, Dtype>* output, Dtype beta, Phase phase) = 0;		
 45 | 		virtual void UpdateGradInput(DenseMat<mode, Dtype>* gradInput, DenseMat<mode, Dtype>* gradOutput, Dtype beta) = 0;
 46 |                 
 47 |         virtual void ResetOutput(const IMatrix<mode, Dtype>* input, DenseMat<mode, Dtype>* output) = 0;
 48 |                         
 49 |         std::string name;        
 50 | };
 51 | 
 52 | template<MatMode mode, typename Dtype>
 53 | class IDiffParam : public IParam<mode, Dtype>
 54 | {
 55 | public:  
 56 |         IDiffParam() {}
 57 |         IDiffParam(std::string _name)
 58 |             : IParam<mode, Dtype>(_name)
 59 |             {
 60 |                 p.clear();
 61 |             }
 62 |         virtual bool IsDiff() override
 63 |         {
 64 |             return true;
 65 |         }
 66 |         
 67 |         virtual void Serialize(FILE* fid) override
 68 |         {
 69 |             for (auto it = p.begin(); it != p.end(); ++it)
 70 |             {
 71 |                 it->second->value.Serialize(fid);
 72 |                 it->second->grad.Serialize(fid);
 73 |             }            
 74 |         }
 75 | 		
 76 | 		virtual void Deserialize(FILE* fid) override
 77 |         {
 78 |             for (auto it = p.begin(); it != p.end(); ++it)
 79 |             {
 80 |                 it->second->value.Deserialize(fid);
 81 |                 it->second->grad.Deserialize(fid);
 82 |             }
 83 |         }
 84 |         
 85 | 		virtual void AccDeriv(IMatrix<mode, Dtype>* input, DenseMat<mode, Dtype>* gradOutput) = 0;
 86 |                 
 87 |         std::map<std::string, PP<mode, Dtype>*> p;
 88 | };
 89 | 
 90 | template<MatMode mode, typename Dtype>
 91 | class IConstParam : public IParam<mode, Dtype>
 92 | {
 93 | public:
 94 |         IConstParam() {}
 95 |         IConstParam(std::string _name)
 96 |             : IParam<mode, Dtype>(_name)
 97 |             {
 98 |                 
 99 |             }    
100 |             
101 |         virtual void Serialize(FILE* fid) override {}
102 | 		
103 | 		virtual void Deserialize(FILE* fid) override {}
104 |         
105 |         virtual void InitConst(void* side_info) = 0;            
106 |         virtual bool IsDiff() override
107 |         {
108 |             return false;
109 |         }            
110 | };
111 | 
112 | #endif


--------------------------------------------------------------------------------
/include/net/inner_product_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef INNER_PRODUCT_LAYER_H
 2 | #define INNER_PRODUCT_LAYER_H
 3 | 
 4 | #include "i_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class InnerProductLayer : public ILayer<mode, Dtype>
 8 | {
 9 | public:
10 |     InnerProductLayer(std::string _name, PropErr _properr = PropErr::T)
11 |         : ILayer<mode, Dtype>(_name, _properr) 
12 |     {
13 |         this->state = new DenseMat<mode, Dtype>();
14 | 		this->grad = new DenseMat<mode, Dtype>();
15 |     }
16 |     
17 |     static std::string str_type()
18 |     {
19 |         return "InnerProduct"; 
20 |     }
21 |     
22 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override
23 |     {
24 |         assert(operands.size() == 2);
25 |         
26 |         auto& cur_output = this->state->DenseDerived();
27 |         buf.EleWiseMul(operands[0]->state->DenseDerived(), operands[1]->state->DenseDerived());
28 |         ones.Resize(buf.cols, 1); 
29 |         ones.Fill(1.0);
30 |         cur_output.GeMM(buf, ones, Trans::N, Trans::N, 1.0, 0.0);
31 |     }
32 |     
33 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override
34 |     {
35 |         assert(operands.size() == 2);
36 |         
37 |         auto& cur_grad = this->grad->DenseDerived();
38 |         auto& prev_grad = operands[cur_idx]->grad->DenseDerived();
39 |         auto& another_operand = operands[1 - cur_idx]->state->DenseDerived();
40 | 
41 |         buf.MulColVec(another_operand, cur_grad);
42 |         if (beta == 0)
43 |         	prev_grad.CopyFrom(buf);
44 |         else
45 |         	prev_grad.Axpby(1.0, buf, beta);
46 |     }    
47 |     
48 |     DenseMat<mode, Dtype> buf, ones;
49 | };
50 | 
51 | 
52 | #endif


--------------------------------------------------------------------------------
/include/net/input_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef INPUT_LAYER_H
 2 | #define INPUT_LAYER_H
 3 | 
 4 | #include "i_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class InputLayer : public ILayer<mode, Dtype>
 8 | {
 9 | public:
10 | 	InputLayer(std::string _name)
11 |             : ILayer<mode, Dtype>(_name, PropErr::N) {}
12 | 
13 |     static std::string str_type()
14 |     {
15 |         return "Input"; 
16 |     }
17 | 
18 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override {}
19 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override {}
20 | };
21 | 
22 | #endif


--------------------------------------------------------------------------------
/include/net/learner.h:
--------------------------------------------------------------------------------
 1 | #ifndef LEARNER_H
 2 | #define LEARNER_H
 3 | 
 4 | #include "model.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class ILearner
 8 | {
 9 | public:
10 |         explicit ILearner(Model<mode, Dtype>* m, Dtype _init_lr, Dtype _l2_penalty = 0)
11 |             : model(m), init_lr(_init_lr), l2_penalty(_l2_penalty), cur_lr(_init_lr), clip_threshold(5), clipping_enabled(true), cur_iter(0) {}
12 |                 
13 |         virtual void Update() = 0;                     
14 |         
15 |         Dtype ClipGradients();
16 | 
17 |         Model<mode, Dtype>* model; 
18 |         Dtype init_lr, l2_penalty, cur_lr;        
19 |         Dtype clip_threshold;
20 |         bool clipping_enabled;
21 |         int cur_iter;
22 | };
23 | 
24 | template<MatMode mode, typename Dtype>
25 | class SGDLearner : public ILearner<mode, Dtype>
26 | {
27 | public:
28 |         explicit SGDLearner(Model<mode, Dtype>* m, Dtype _init_lr, Dtype _l2_penalty = 0)
29 |                 : ILearner<mode, Dtype>(m, _init_lr, _l2_penalty) {}        
30 |         
31 |         virtual void Update() override;                          
32 | };
33 | 
34 | template<MatMode mode, typename Dtype>
35 | class MomentumSGDLearner : public ILearner<mode, Dtype>
36 | {
37 | public:
38 |         explicit MomentumSGDLearner(Model<mode, Dtype>* m, 
39 |                                     Dtype _init_lr, 
40 |                                     Dtype _momentum = 0.9, 
41 |                                     Dtype _l2_penalty = 0)
42 |                 : ILearner<mode, Dtype>(m, _init_lr, _l2_penalty), momentum(_momentum) 
43 |                 {
44 |                     acc_grad_dict.clear();
45 |                 }
46 |         
47 |         virtual void Update() override;                          
48 |         Dtype momentum;
49 |         std::map<std::string, std::shared_ptr< DenseMat<mode, Dtype> > > acc_grad_dict;
50 | };
51 | 
52 | template<MatMode mode, typename Dtype>
53 | class ExplicitBatchLearner : public ILearner<mode, Dtype>
54 | {
55 | public:
56 |         explicit ExplicitBatchLearner(Model<mode, Dtype>* m, Dtype _init_lr, Dtype _l2_penalty = 0)
57 |                 : ILearner<mode, Dtype>(m, _init_lr, _l2_penalty) 
58 |                 {
59 |                     acc_grad_dict.clear();
60 |                 }
61 |         
62 |         virtual void Update() override;      
63 |         void AccumulateGrad();                    
64 |         
65 |         std::map<std::string, std::shared_ptr< DenseMat<mode, Dtype> > > acc_grad_dict;
66 | };
67 | 
68 | template<MatMode mode, typename Dtype>
69 | class AdamLearner : public ILearner<mode, Dtype>
70 | {
71 | public:
72 |         explicit AdamLearner(Model<mode, Dtype>* m, 
73 |                             Dtype _init_lr,
74 |                             Dtype _l2_penalty = 0, 
75 |                             Dtype _beta_1 = 0.9, 
76 |                             Dtype _beta_2 = 0.999, 
77 |                             Dtype _eps = 1e-8)
78 |                 : ILearner<mode, Dtype>(m, _init_lr, _l2_penalty), beta_1(_beta_1), beta_2(_beta_2), eps(_eps)
79 |                 {
80 |                     first_moments.clear();
81 |                     second_moments.clear();
82 |                 }
83 | 
84 |         virtual void Update() override;
85 | 
86 |         std::map<std::string, std::shared_ptr< DenseMat<mode, Dtype> > > first_moments, second_moments;
87 |         Dtype beta_1, beta_2, eps;
88 |         DenseMat<mode, Dtype> m_hat, v_hat;
89 | };
90 | 
91 | #endif


--------------------------------------------------------------------------------
/include/net/linear_param.h:
--------------------------------------------------------------------------------
 1 | #ifndef LINEAR_PARAM_H
 2 | #define LINEAR_PARAM_H
 3 | 
 4 | #include "i_param.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class DenseMat;
 8 | 
 9 | template<MatMode mode, typename Dtype>
10 | class LinearParam : public IDiffParam<mode, Dtype>
11 | {
12 | public:
13 | 		LinearParam(FILE* fid)
14 |         {
15 |             this->Deserialize(fid);
16 |         }
17 |         
18 |         LinearParam(std::string _name, size_t _input_size, size_t _output_size, BiasOption _bo = BiasOption::BIAS)
19 |             : LinearParam<mode, Dtype>(_name, _input_size, _output_size, 0, 1.0 / sqrt(output_size), _bo) {}
20 |         
21 | 		LinearParam(std::string _name, size_t _input_size, size_t _output_size, Dtype mean, Dtype std, BiasOption _bo = BiasOption::BIAS)
22 |             : IDiffParam<mode, Dtype>(_name), bo(_bo)
23 |         {            
24 |             input_size = _input_size;
25 | 	        output_size = _output_size;
26 |             
27 |             this->p["weight"] = new PP<mode, Dtype>();            
28 |             if (bo == BiasOption::BIAS)            
29 |                 this->p["bias"] = new PP<mode, Dtype>();       
30 |             
31 | 	        Reset(mean, std);
32 |         }
33 |         
34 |         virtual void ResetOutput(const IMatrix<mode, Dtype>* input, DenseMat<mode, Dtype>* output) override
35 |         {
36 |             output->Zeros(input->rows, this->p["weight"]->value.cols);
37 |         }
38 |         
39 |         virtual void UpdateOutput(IMatrix<mode, Dtype>* input, DenseMat<mode, Dtype>* output, Dtype beta, Phase phase) override
40 |         {
41 |             auto& weight = this->p["weight"]->value;
42 |                             
43 |             if (input->GetMatType() == DENSE)
44 |                 output->GeMM(input->DenseDerived(), weight, Trans::N, Trans::N, 1.0, beta);
45 |             else
46 |                 output->SparseMM(input->SparseDerived(), weight, Trans::N, Trans::N, 1.0, beta);
47 |             
48 |             if (bo == BiasOption::BIAS)
49 |             {
50 |                 auto& bias = this->p["bias"]->value;
51 |                 output->AddRowVec(bias, 1.0);
52 |             }            
53 |         }
54 |         
55 |         virtual void UpdateGradInput(DenseMat<mode, Dtype>* gradInput, DenseMat<mode, Dtype>* gradOutput, Dtype beta) override
56 |         {
57 |             gradInput->GeMM(*gradOutput, this->p["weight"]->value, Trans::N, Trans::T, 1.0, beta);
58 |         }
59 |                         
60 | 		virtual void AccDeriv(IMatrix<mode, Dtype>* input, DenseMat<mode, Dtype>* gradOutput) override
61 |         {
62 |             if (input->GetMatType() == DENSE)
63 |                 this->p["weight"]->grad.GeMM(input->DenseDerived(), *gradOutput, Trans::T, Trans::N, 1.0, 1.0);
64 |             else
65 |                 this->p["weight"]->grad.SparseMM(input->SparseDerived(), *gradOutput, Trans::T, Trans::N, 1.0, 1.0);
66 |             
67 |             if (bo == BiasOption::BIAS)
68 |             {
69 |                 bias_multiplier.Resize(1, input->rows);
70 |                 bias_multiplier.Fill(1.0);
71 |                 this->p["bias"]->grad.GeMM(bias_multiplier, *gradOutput, Trans::N, Trans::N, 1.0, 1.0);
72 |             }           
73 |         }
74 |                   		
75 | 		virtual void Reset(Dtype mean, Dtype std)
76 |         {
77 |             this->p["weight"]->value.SetRandN(mean, std, input_size, output_size);
78 | 	        this->p["weight"]->grad.Zeros(input_size, output_size);
79 | 	        if (bo == BiasOption::BIAS)
80 |             {
81 |                 this->p["bias"]->value.Zeros(1, output_size);
82 |                 this->p["bias"]->grad.Zeros(1, output_size);
83 |             }
84 |         }
85 | 		
86 | protected:
87 |         BiasOption bo; 
88 | 		size_t input_size, output_size;
89 | 		DenseMat<mode, Dtype> bias_multiplier;
90 | };
91 | 
92 | #endif
93 | 


--------------------------------------------------------------------------------
/include/net/log_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOG_LAYER_H
 2 | #define LOG_LAYER_H
 3 | 
 4 | #include "i_act_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class LogLayer : public IActLayer<mode, Dtype>
 8 | {
 9 | public:
10 | 
11 |         LogLayer(std::string _name, PropErr _properr = PropErr::T)
12 |             : IActLayer<mode, Dtype>(_name, WriteType::OUTPLACE, _properr) {}
13 |             
14 |         static std::string str_type()
15 |         {
16 |             return "Log"; 
17 |         }
18 | 
19 |         virtual void Act(DenseMat<mode, Dtype>& prev_out, DenseMat<mode, Dtype>& cur_out) override
20 |         {
21 |             cur_out.Log(prev_out);
22 |         }
23 |     
24 |         virtual void Derivative(DenseMat<mode, Dtype>& dst, DenseMat<mode, Dtype>& prev_output, 
25 |                                 DenseMat<mode, Dtype>& cur_output, DenseMat<mode, Dtype>& cur_grad, Dtype beta) override
26 |         {
27 |             buf.CopyFrom(prev_output);
28 |             buf.Inv();
29 |             buf.EleWiseMul(cur_grad);
30 |             
31 |             dst.Axpby(1.0, buf, beta);
32 |         }                                               
33 |         
34 | protected:
35 | 
36 |         DenseMat<mode, Dtype> buf;        
37 | };
38 | 
39 | #endif


--------------------------------------------------------------------------------
/include/net/loss_func.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOSS_FUNC_H
 2 | #define LOSS_FUNC_H
 3 | 
 4 | #include "dense_matrix.h"
 5 | #include "sparse_matrix.h"
 6 | 
 7 | enum class RankOrder
 8 | {
 9 |     ASCE,
10 | 	DESC
11 | };
12 | 
13 | template<MatMode mode, typename Dtype>
14 | class LossFunc;
15 | 
16 | template<typename Dtype>
17 | class LossFunc<CPU, Dtype>
18 | {
19 | public:    
20 |     static Dtype GetLogLoss(DenseMat<CPU, Dtype>& pred, SparseMat<CPU, Dtype>& label);
21 |     static Dtype GetErrCnt(DenseMat<CPU, Dtype>& pred, SparseMat<CPU, Dtype>& label);
22 |     static Dtype GetAverageRank(DenseMat<CPU, Dtype>& pred, SparseMat<CPU, Dtype>& label, RankOrder order);
23 | };
24 | 
25 | template<typename Dtype>
26 | class LossFunc<GPU, Dtype>
27 | {
28 | public:
29 |     static Dtype GetLogLoss(DenseMat<GPU, Dtype>& pred, SparseMat<GPU, Dtype>& label);
30 |     static Dtype GetErrCnt(DenseMat<GPU, Dtype>& pred, SparseMat<GPU, Dtype>& label);
31 |     static Dtype GetAverageRank(DenseMat<GPU, Dtype>& pred, SparseMat<GPU, Dtype>& label, RankOrder order);
32 | 
33 | private:    
34 |     static DenseMat<GPU, Dtype> buf;    
35 | };
36 | 
37 | template<typename Dtype>
38 | DenseMat<GPU, Dtype> LossFunc<GPU, Dtype>::buf;
39 | 
40 | 
41 | 
42 | 
43 | #endif


--------------------------------------------------------------------------------
/include/net/max_entropy_criterion_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef MAX_ENTROPY_CRITERION_LAYER_H
 2 | #define MAX_ENTROPY_CRITERION_LAYER_H
 3 | 
 4 | #include "i_criterion_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class MaxEntropyCriterionLayer : public ICriterionLayer<mode, Dtype>
 8 | {
 9 | public:
10 |         
11 | 			MaxEntropyCriterionLayer(std::string _name, PropErr _properr = PropErr::T)
12 |                 : MaxEntropyCriterionLayer<mode, Dtype>(_name, 1.0, _properr) {}
13 |                 
14 | 			MaxEntropyCriterionLayer(std::string _name, Dtype _lambda, PropErr _properr = PropErr::T);
15 |                         
16 |             static std::string str_type()
17 |             {
18 |                 return "EntropyLoss"; 
19 |             }
20 |             
21 |             virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override;
22 |             
23 |             virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override;
24 | };
25 | 
26 | #endif


--------------------------------------------------------------------------------
/include/net/mixture_nll_criterion_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUM_LOSS_CRITERION_LAYER_H
 2 | #define SUM_LOSS_CRITERION_LAYER_H
 3 | 
 4 | #include "i_criterion_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class MixtureNLLCriterionLayer : public ICriterionLayer<mode, Dtype>
 8 | {
 9 | public:
10 | 
11 |     MixtureNLLCriterionLayer(std::string _name, PropErr _properr = PropErr::T)
12 |         : MixtureNLLCriterionLayer(_name, 1.0, _properr) {}
13 |         
14 |     MixtureNLLCriterionLayer(std::string _name, Dtype _lambda, PropErr _properr = PropErr::T);
15 |     
16 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override;
17 |     
18 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override;                      
19 | };
20 | 
21 | #endif


--------------------------------------------------------------------------------
/include/net/model.h:
--------------------------------------------------------------------------------
  1 | #ifndef MODEL_H
  2 | #define MODEL_H
  3 | 
  4 | #include "i_param.h"
  5 | #include <map>
  6 | #include <vector>
  7 | #include "fmt.h"
  8 | 
  9 | template<MatMode mode, typename Dtype>
 10 | class Model
 11 | {
 12 | public:
 13 | 
 14 |         Model()
 15 |         {
 16 |             flatten = false;
 17 |             diff_params.clear();
 18 |             const_params.clear();
 19 |             param_list.clear();
 20 |         }      
 21 |         
 22 |         inline void AddParam(IDiffParam<mode, Dtype>* param)
 23 |         {
 24 |             flatten = false;
 25 |             assert(diff_params.count(param->name) == 0);     
 26 |             diff_params[param->name] = param;
 27 |             all_params[param->name] = param;       
 28 |         }
 29 |         
 30 |         inline void AddParam(IConstParam<mode, Dtype>* param)
 31 |         {
 32 |             assert(const_params.count(param->name) == 0);
 33 |             const_params[param->name] = param;
 34 |             all_params[param->name] = param;
 35 |         }
 36 |         
 37 |         inline void SetupConstParams(std::map<std::string, void*> arg_dict)
 38 |         {
 39 |             for (auto& p : arg_dict)
 40 |             {
 41 |                 assert(const_params.count(p.first));
 42 |                 const_params[p.first]->InitConst(p.second);
 43 |             }
 44 |         }
 45 |         
 46 |         std::map< std::string, PP<mode, Dtype>* >& GetDiffParams()
 47 |         {
 48 |             if (!flatten)
 49 |                 DiffParams2List();
 50 |             return param_list; 
 51 |         }
 52 |         
 53 |         void DiffParams2List()
 54 |         {
 55 |             param_list.clear();
 56 |             for (auto& param_pair : diff_params)
 57 |             {
 58 |                 for (auto& weight_pair : param_pair.second->p)
 59 |                 {
 60 |                     param_list[param_pair.first + "-" + weight_pair.first] = weight_pair.second;
 61 |                 }
 62 |             }
 63 |         }
 64 |         void Load(std::string filename)
 65 |         {
 66 |             FILE* fid = fopen(filename.c_str(), "rb");
 67 |             
 68 |             for (auto it = diff_params.begin(); it != diff_params.end(); ++it)
 69 |                 it->second->Deserialize(fid); 
 70 |                 
 71 |             fclose(fid);
 72 |         }
 73 |        
 74 |         void Save(std::string filename)
 75 |         {
 76 |             FILE* fid = fopen(filename.c_str(), "wb");
 77 |             
 78 |             for (auto it = diff_params.begin(); it != diff_params.end(); ++it)
 79 |                 it->second->Serialize(fid); 
 80 |                 
 81 |             fclose(fid);            
 82 |         }
 83 |         
 84 |         std::map< std::string, IDiffParam<mode, Dtype>* > diff_params;
 85 |         std::map< std::string, IConstParam<mode, Dtype>* > const_params;
 86 |         std::map< std::string, IParam<mode, Dtype>*> all_params;
 87 |         bool flatten;                  
 88 |         std::map< std::string, PP<mode, Dtype>* > param_list;        
 89 | };
 90 | 
 91 | template<template <MatMode, typename> class ParamType, MatMode mode, typename Dtype, typename... Args>    
 92 | IDiffParam<mode, Dtype>* add_diff(Model<mode, Dtype>& model, std::string param_name, Args&&... args)
 93 | {
 94 |         auto* param = new ParamType<mode, Dtype>(param_name, std::forward<Args>(args)...);
 95 |         model.AddParam(param);                                            
 96 |         return param;
 97 | }
 98 | 
 99 | template<template <MatMode, typename> class ParamType, MatMode mode, typename Dtype, typename... Args>    
100 | IConstParam<mode, Dtype>* add_const(Model<mode, Dtype>& model, std::string param_name, Args&&... args)
101 | {
102 |         auto* param = new ParamType<mode, Dtype>(param_name, std::forward<Args>(args)...);
103 |         model.AddParam(param);                                            
104 |         return param;
105 | }
106 | 
107 | #endif
108 | 


--------------------------------------------------------------------------------
/include/net/mse_criterion_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef MSE_CRITERION_LAYER_H
 2 | #define MSE_CRITERION_LAYER_H
 3 | 
 4 | #include "i_criterion_layer.h"
 5 | #include "dense_matrix.h"
 6 | 
 7 | template<MatMode mode, typename Dtype>
 8 | class MSECriterionLayer : public ICriterionLayer<mode, Dtype>
 9 | {
10 | public:
11 | 			MSECriterionLayer(std::string _name, PropErr _properr = PropErr::T)
12 |                 : MSECriterionLayer(_name, 1.0, _properr) {}
13 |                 
14 | 			MSECriterionLayer(std::string _name, Dtype _lambda, PropErr _properr = PropErr::T)
15 |                 : ICriterionLayer<mode, Dtype>(_name, _lambda, _properr)
16 |             {
17 |                 this->grad = new DenseMat<mode, Dtype>();
18 |             }
19 |             
20 | 			virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override
21 |             {		
22 |                 auto& node_diff = this->grad->DenseDerived();						
23 |                 node_diff.GeaM(1.0, Trans::N, operands[0]->state->DenseDerived(), -1.0, Trans::N, operands[1]->state->DenseDerived());
24 |                 Dtype norm2 = node_diff.Norm2();
25 |                 this->loss = norm2 * norm2;
26 |                 
27 |                 if (this->properr == PropErr::T)
28 |                     node_diff.Scale(2.0 * this->lambda / operands[1]->state->rows);
29 |             }
30 |             
31 | 			virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override
32 |             {
33 |                 assert(operands.size() == 2 && cur_idx == 0);
34 |                 
35 |                 auto& cur_grad = this->grad->DenseDerived();
36 | 		        auto& prev_grad = operands[0]->grad->DenseDerived();
37 | 		
38 |                 if (beta == 0)
39 |                     prev_grad.CopyFrom(cur_grad);
40 |                 else
41 |                     prev_grad.Axpby(1.0, cur_grad, beta);	
42 |             }
43 | };
44 | 
45 | #endif


--------------------------------------------------------------------------------
/include/net/msg_pass_param.h:
--------------------------------------------------------------------------------
  1 | #ifndef MSG_PASS_PARAM_H
  2 | #define MSG_PASS_PARAM_H
  3 | 
  4 | #include "i_param.h"
  5 | #include "graph_struct.h"
  6 | #include "sparse_matrix.h"
  7 | 
  8 | template<MatMode mode, typename Dtype>
  9 | class IMessagePassParam : public IConstParam<mode, Dtype>
 10 | {
 11 | public:
 12 | 		IMessagePassParam(std::string _name);
 13 | 		
 14 |         virtual void InitConst(void* side_info) override
 15 |         {
 16 |             this->InitCPUWeight(static_cast<GraphStruct*>(side_info));
 17 |             if (mode == GPU)
 18 |                 this->weight.CopyFrom(*(this->cpu_weight));
 19 |         }
 20 |         		
 21 |         virtual void ResetOutput(const IMatrix<mode, Dtype>* input, DenseMat<mode, Dtype>* output) override
 22 |         {
 23 |             output->Resize(this->weight.rows, input->cols);
 24 |         }
 25 |              				 		
 26 | 		virtual void UpdateOutput(IMatrix<mode, Dtype>* input, DenseMat<mode, Dtype>* output, Dtype beta, Phase phase) override
 27 |         {
 28 |             auto& prev_states = input->DenseDerived();
 29 |             output->SparseMM(this->weight, prev_states, Trans::N, Trans::N, 1.0, beta);                
 30 |         }
 31 |         
 32 |         virtual void UpdateGradInput(DenseMat<mode, Dtype>* gradInput, DenseMat<mode, Dtype>* gradOutput, Dtype beta) override
 33 |         {
 34 |             auto& prev_grad = gradInput->DenseDerived();
 35 |             prev_grad.SparseMM(this->weight, *gradOutput, Trans::T, Trans::N, 1.0, beta);
 36 |         }
 37 |         
 38 | 		SparseMat<mode, Dtype> weight;
 39 | 		
 40 | protected:
 41 |         virtual void InitCPUWeight(GraphStruct* graph) = 0;
 42 |         SparseMat<CPU, Dtype>* cpu_weight;
 43 | };
 44 | 
 45 | template<MatMode mode, typename Dtype>
 46 | class Node2NodeMsgParam : public IMessagePassParam<mode, Dtype>
 47 | {
 48 | public:
 49 | 		Node2NodeMsgParam(std::string _name)
 50 |             : IMessagePassParam<mode, Dtype>(_name) {} 
 51 |             
 52 | protected:
 53 |         virtual void InitCPUWeight(GraphStruct* graph) override;
 54 | };
 55 | 
 56 | template<MatMode mode, typename Dtype>
 57 | class Edge2NodeMsgParam : public IMessagePassParam<mode, Dtype>
 58 | {
 59 | public:
 60 | 		Edge2NodeMsgParam(std::string _name)
 61 |             : IMessagePassParam<mode, Dtype>(_name) {} 
 62 |             
 63 | protected:
 64 |         virtual void InitCPUWeight(GraphStruct* graph) override;
 65 | };
 66 | 
 67 | template<MatMode mode, typename Dtype>
 68 | class Node2EdgeMsgParam : public IMessagePassParam<mode, Dtype>
 69 | {
 70 | public:
 71 | 		Node2EdgeMsgParam(std::string _name)
 72 |             : IMessagePassParam<mode, Dtype>(_name) {} 
 73 |             
 74 | protected:
 75 |         virtual void InitCPUWeight(GraphStruct* graph) override;
 76 | };
 77 | 
 78 | template<MatMode mode, typename Dtype>
 79 | class Edge2EdgeMsgParam : public IMessagePassParam<mode, Dtype>
 80 | {
 81 | public:
 82 | 		Edge2EdgeMsgParam(std::string _name)
 83 |             : IMessagePassParam<mode, Dtype>(_name) {} 
 84 |             
 85 | protected:
 86 |         virtual void InitCPUWeight(GraphStruct* graph) override;
 87 | };
 88 | 
 89 | template<MatMode mode, typename Dtype>
 90 | class SubgraphMsgParam : public IMessagePassParam<mode, Dtype>
 91 | {
 92 | public:
 93 | 		SubgraphMsgParam(std::string _name) 
 94 |             : IMessagePassParam<mode, Dtype>(_name) {}
 95 |         
 96 | protected:
 97 |         virtual void InitCPUWeight(GraphStruct* graph) override;
 98 | };
 99 | 
100 | template<MatMode mode, typename Dtype>
101 | class SubgraphConcatParam : public IConstParam<mode, Dtype>
102 | {
103 | public:
104 | 		SubgraphConcatParam(std::string _name) 
105 |             : IConstParam<mode, Dtype>(_name), graph(nullptr) {}
106 | 
107 |         virtual void ResetOutput(const IMatrix<mode, Dtype>* input, DenseMat<mode, Dtype>* output) override
108 |         {
109 |             assert(graph->num_nodes % graph->num_subgraph == 0);
110 |             int node_per_graph = graph->num_nodes / graph->num_subgraph; 
111 |             // assume the labels are consecutive
112 |             for (int i = 0; i < node_per_graph; ++i)
113 |                 for (int j = 0; j < graph->num_subgraph; ++j)
114 |                     assert(graph->subgraph->head[j][i] == j * node_per_graph + i); 
115 |             
116 |             output->Zeros(graph->num_subgraph, node_per_graph * input->cols);
117 |         }
118 |              				 		
119 | 		virtual void UpdateOutput(IMatrix<mode, Dtype>* input, DenseMat<mode, Dtype>* output, Dtype beta, Phase phase) override
120 |         {
121 |             assert(beta == 0);
122 |             int row = output->rows, col = output->cols;            
123 |             output->CopyFrom(input);
124 |             // resize will not change the content in output
125 |             output->Resize(row, col);             
126 |         }
127 |         
128 |         virtual void UpdateGradInput(DenseMat<mode, Dtype>* gradInput, DenseMat<mode, Dtype>* gradOutput, Dtype beta) override
129 |         {
130 |             assert(gradInput->rows * gradInput->cols == gradOutput->rows * gradOutput->cols);
131 |             gradOutput->Resize(gradInput->rows, gradInput->cols);
132 |             
133 |             if (beta == 0)
134 |                 gradInput->CopyFrom(*gradOutput);
135 |             else
136 |                 gradInput->Axpby(1.0, *gradOutput, beta);
137 |         }
138 |         
139 |         virtual void InitConst(void* side_info) override
140 |         {
141 |             this->graph = static_cast<GraphStruct*>(side_info);
142 |         }
143 | 
144 | protected:
145 |         GraphStruct* graph;         
146 | };
147 | 
148 | 
149 | #endif


--------------------------------------------------------------------------------
/include/net/multinomial_sample_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTINOMIAL_SAMPLE_LAYER_H
 2 | #define MULTINOMIAL_SAMPLE_LAYER_H
 3 | 
 4 | #include "i_act_layer.h"
 5 | #include<random>
 6 | #include<chrono>
 7 | 
 8 | template<MatMode mode, typename Dtype>
 9 | class MultinomialSampleLayer;
10 | 
11 | enum class SampleType
12 | {
13 | 	MAX = 0,
14 | 	STOCHASTIC = 1	
15 | };
16 | 
17 | template<typename Dtype>
18 | class MultinomialSampleLayer<CPU, Dtype> : public IActLayer<CPU, Dtype> 
19 | {
20 | public:
21 |     
22 |     MultinomialSampleLayer(std::string _name, SampleType _st, PropErr _properr = PropErr::T)
23 |             : IActLayer<CPU, Dtype>(_name, WriteType::OUTPLACE, _properr), st(_st) {}
24 | 
25 |     static std::string str_type()
26 |     {
27 |         return "MultinomialSample"; 
28 |     }            
29 | 
30 |     virtual void Act(DenseMat<CPU, Dtype>& prev_out, DenseMat<CPU, Dtype>& cur_out) override;
31 |     
32 |     virtual void Derivative(DenseMat<CPU, Dtype>& dst, DenseMat<CPU, Dtype>& prev_output, 
33 |                             DenseMat<CPU, Dtype>& cur_output, DenseMat<CPU, Dtype>& cur_grad, Dtype beta) override;     
34 | 
35 |     SampleType st;
36 | 
37 | };
38 | 
39 | 
40 | #endif


--------------------------------------------------------------------------------
/include/net/mvn_diag_nll_criterion_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef MVN_DIAG_NLL_CRITERION_LAYER_H
 2 | #define MVN_DIAG_NLL_CRITERION_LAYER_H
 3 | 
 4 | #include "i_criterion_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class MVNDianNLLCriterionLayer : public ICriterionLayer<mode, Dtype>
 8 | {
 9 | public:
10 | 
11 |     MVNDianNLLCriterionLayer(std::string _name, PropErr _properr = PropErr::T)
12 |         : MVNDianNLLCriterionLayer(_name, 1.0, _properr) {}
13 |         
14 |     MVNDianNLLCriterionLayer(std::string _name, Dtype _lambda, PropErr _properr = PropErr::T);
15 | 
16 |     static std::string str_type()
17 |     {
18 |         return "MVNDianNLLCriterion"; 
19 |     }          
20 |     
21 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override;
22 |     
23 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override;                 
24 | };
25 | 
26 | #endif


--------------------------------------------------------------------------------
/include/net/nngraph.h:
--------------------------------------------------------------------------------
  1 | #ifndef NNGRAPH_H
  2 | #define NNGRAPH_H
  3 | 
  4 | #include "i_layer.h"
  5 | #include "i_param.h"
  6 | #include "fmt.h"
  7 | #include <vector>
  8 | 
  9 | template<MatMode mode, typename Dtype>
 10 | class NNGraph
 11 | {
 12 | public:
 13 |     NNGraph()
 14 |     {
 15 |         layer_dict.clear();
 16 |         static_layer_dict.clear();
 17 |         ordered_layers.clear();
 18 |         name_idx_map.clear();
 19 |         hash.clear();
 20 |     }
 21 |     
 22 |     void Clear()
 23 |     {
 24 |         layer_dict.clear();
 25 |         static_layer_dict.clear();
 26 |         ordered_layers.clear();
 27 |         name_idx_map.clear();
 28 |         hash.clear();
 29 |     }
 30 |     
 31 |     void FeedForward(std::map<std::string, IMatrix<mode, Dtype>* > input, Phase phase);   
 32 |        
 33 |     std::map<std::string, Dtype> GetLoss();
 34 |             
 35 |     void BackPropagation();
 36 |     
 37 |     inline void InsertLayer(ILayer<mode, Dtype>* layer)
 38 |     {
 39 |         InsertLayer(layer, {});
 40 |     }
 41 |     
 42 |     inline void PrintComputationalGraph()
 43 |     {
 44 |         for (size_t i = 0; i < ordered_layers.size(); ++i)
 45 |         {
 46 |             auto* cur_layer = layer_dict[ordered_layers[i].first];
 47 |             auto& operands = ordered_layers[i].second;
 48 |             
 49 |             std::cerr << "( ";
 50 |             for (auto* layer : operands)
 51 |                 std::cerr << layer->name << " ";
 52 |             std::cerr << ") -> " << cur_layer->name << std::endl;
 53 |         }
 54 |     }
 55 |     
 56 |     inline bool HasLayer(std::string name)
 57 |     {
 58 |         return static_layer_dict.count(name) || layer_dict.count(name);
 59 |     }
 60 |     
 61 |     inline void InsertStaticLayer(ILayer<mode, Dtype>* layer)
 62 |     {
 63 |         assert(static_layer_dict.count(layer->name) == 0);
 64 |         assert(layer_dict.count(layer->name) == 0);
 65 |         static_layer_dict[layer->name] = layer;
 66 |         assert(layer->state);
 67 |         assert(layer->state->count);
 68 |     }
 69 |     
 70 |     inline void InsertLayer(ILayer<mode, Dtype>* layer, std::vector< ILayer<mode, Dtype>* > operands)
 71 |     {
 72 |         assert(layer_dict.count(layer->name) == 0);
 73 |         layer_dict[layer->name] = layer;
 74 |         name_idx_map[layer->name] = ordered_layers.size();
 75 |         for (auto* op : operands)
 76 |         {
 77 |             if (!HasLayer(op->name))
 78 |                 InsertStaticLayer(op);
 79 |         }
 80 |         ordered_layers.push_back(std::make_pair(layer->name, operands));        
 81 |     }                          
 82 |     
 83 |     template<MatMode anotherMode>
 84 |     void GetState(std::string layer_name, DenseMat<anotherMode, Dtype>& dst)
 85 |     {
 86 |         assert(layer_dict.count(layer_name));
 87 |         auto& output = layer_dict[layer_name]->state->DenseDerived();
 88 |         dst.CopyFrom(output);
 89 |     }
 90 |     
 91 |     std::map< std::string, unsigned > name_idx_map;
 92 |     std::map< std::string, ILayer<mode, Dtype>* > layer_dict, static_layer_dict;
 93 |     std::vector< std::pair<std::string, std::vector< ILayer<mode, Dtype>* > > > ordered_layers;
 94 |     std::vector< bool > hash;    
 95 | };
 96 | 
 97 | template<template <MatMode, typename> class LayerType, MatMode mode, typename Dtype, typename... Args>
 98 | inline ILayer<mode, Dtype>* cl(NNGraph<mode, Dtype>& gnn,
 99 |                                std::vector< ILayer<mode, Dtype>* > operands, Args&&... args)
100 | {
101 |         return cl<LayerType>(fmt::sprintf("%s-layer-%d", LayerType<mode, Dtype>::str_type(), gnn.layer_dict.size()),
102 |                              gnn, 
103 |                              operands, 
104 |                              std::forward<Args>(args)...);
105 | }
106 |     
107 | template<template <MatMode, typename> class LayerType, MatMode mode, typename Dtype, typename... Args>
108 | inline ILayer<mode, Dtype>* cl(const std::string layer_name, 
109 |                                NNGraph<mode, Dtype>& gnn,                                
110 |                                std::vector< ILayer<mode, Dtype>* > operands, 
111 |                                Args&&... args)
112 | {        
113 |         auto* layer = new LayerType<mode, Dtype>(layer_name, std::forward<Args>(args)...);
114 |         gnn.InsertLayer(layer, operands);
115 |         return layer;
116 | }
117 | 
118 | template<template <MatMode, typename> class LayerType, MatMode mode, typename Dtype, typename... Args>
119 | inline ILayer<mode, Dtype>* cl(NNGraph<mode, Dtype>& gnn, 
120 |                                std::vector< ILayer<mode, Dtype>* > operands,
121 |                                std::vector< IParam<mode, Dtype>* > params, 
122 |                                Args&&... args)
123 | {        
124 |         return cl<LayerType>(fmt::sprintf("%s-layer-%d", LayerType<mode, Dtype>::str_type(), gnn.layer_dict.size()),
125 |                              gnn, 
126 |                              operands, 
127 |                              params,                               
128 |                              std::forward<Args>(args)...);
129 | }
130 | 
131 | // workaround for deducting list
132 | template<template <MatMode, typename> class LayerType, MatMode mode, typename Dtype, typename... Args>
133 | inline ILayer<mode, Dtype>* cl(const std::string layer_name,
134 |                                NNGraph<mode, Dtype>& gnn,
135 |                                std::vector< ILayer<mode, Dtype>* > operands,
136 |                                std::vector< IParam<mode, Dtype>* > params, 
137 |                                Args&&... args)
138 | {
139 |         auto* layer = new LayerType<mode, Dtype>(layer_name, params, std::forward<Args>(args)...);
140 |         gnn.InsertLayer(layer, operands);
141 |         return layer;
142 | }
143 | 
144 | #endif
145 | 


--------------------------------------------------------------------------------
/include/net/param_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef PARAM_LAYER_H
 2 | #define PARAM_LAYER_H
 3 | 
 4 | #include "i_layer.h"
 5 | #include "i_param.h"
 6 | 
 7 | template<MatMode mode, typename Dtype>
 8 | class ParamLayer : public ILayer<mode, Dtype>, public IParametric<mode, Dtype>
 9 | {
10 | public:
11 |     ParamLayer(std::string _name, std::vector< IParam<mode, Dtype>* > _params, PropErr _properr = PropErr::T)
12 |         : ILayer<mode, Dtype>(_name, _properr), params(_params) 
13 |     {
14 |         this->state = new DenseMat<mode, Dtype>();
15 | 		this->grad = new DenseMat<mode, Dtype>();
16 |     }
17 | 
18 |     static std::string str_type()
19 |     {
20 |         return "Param"; 
21 |     }
22 |     
23 |     virtual bool HasParam() override
24 |     {
25 |         return true;
26 |     }
27 |     
28 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override
29 |     {
30 |         assert(operands.size() == params.size());
31 |         auto& cur_output = this->state->DenseDerived();
32 |         for (size_t i = 0; i < operands.size(); ++i)
33 |         {
34 |             if (i == 0)
35 |                 params[i]->ResetOutput(operands[i]->state, &cur_output); 
36 |             params[i]->UpdateOutput(operands[i]->state, &cur_output, i == 0 ? 0.0 : 1.0, phase);
37 |         }
38 |     }
39 |     
40 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override
41 |     {
42 |         auto& cur_grad = this->grad->DenseDerived();
43 |         auto& prev_grad = operands[cur_idx]->grad->DenseDerived(); 
44 | 		
45 | 		params[cur_idx]->UpdateGradInput(&prev_grad, &cur_grad, beta);
46 |     }    
47 |     
48 |     virtual void AccDeriv(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx) override
49 |     {
50 |         if (params[cur_idx]->IsDiff())
51 |         {
52 |             dynamic_cast<IDiffParam<mode, Dtype>*>(params[cur_idx])->AccDeriv(operands[cur_idx]->state,
53 |                                                                               &this->grad->DenseDerived());
54 |         }        
55 |     }
56 |     
57 |     std::vector< IParam<mode, Dtype>* > params;
58 | };
59 | 
60 | #endif


--------------------------------------------------------------------------------
/include/net/relu_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef RELU_LAYER_H
 2 | #define RELU_LAYER_H
 3 | 
 4 | #include "i_act_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class ReLULayer;
 8 | 
 9 | template<typename Dtype>
10 | class ReLULayer<CPU, Dtype> : public IActLayer<CPU, Dtype> 
11 | {
12 | public:
13 |     
14 |     ReLULayer(std::string _name, WriteType _wt = WriteType::OUTPLACE, PropErr _properr = PropErr::T)
15 |             : IActLayer<CPU, Dtype>(_name, _wt, _properr) {}
16 | 
17 |     static std::string str_type()
18 |     {
19 |         return "ReLU"; 
20 |     }            
21 | 
22 |     virtual void Act(DenseMat<CPU, Dtype>& prev_out, DenseMat<CPU, Dtype>& cur_out) override;
23 |     
24 |     virtual void Derivative(DenseMat<CPU, Dtype>& dst, DenseMat<CPU, Dtype>& prev_output, 
25 |                             DenseMat<CPU, Dtype>& cur_output, DenseMat<CPU, Dtype>& cur_grad, Dtype beta) override;     
26 | };
27 | 
28 | template<typename Dtype>
29 | class ReLULayer<GPU, Dtype> : public IActLayer<GPU, Dtype> 
30 | {
31 | public:
32 |     
33 |     ReLULayer(std::string _name, WriteType _wt = WriteType::OUTPLACE, PropErr _properr = PropErr::T)
34 |             : IActLayer<GPU, Dtype>(_name, _wt, _properr) {}
35 | 
36 |     static std::string str_type()
37 |     {
38 |         return "ReLU"; 
39 |     }            
40 | 
41 |     virtual void Act(DenseMat<GPU, Dtype>& prev_out, DenseMat<GPU, Dtype>& cur_out) override; 
42 |     
43 |     virtual void Derivative(DenseMat<GPU, Dtype>& dst, DenseMat<GPU, Dtype>& prev_output, 
44 |                             DenseMat<GPU, Dtype>& cur_output, DenseMat<GPU, Dtype>& cur_grad, Dtype beta) override;    
45 | };
46 | 
47 | #endif


--------------------------------------------------------------------------------
/include/net/repeat_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef REPEAT_LAYER_H
 2 | #define REPEAT_LAYER_H
 3 | 
 4 | #include "i_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class RepeatLayer : public ILayer<mode, Dtype>
 8 | {
 9 | public:
10 |     RepeatLayer(std::string _name, size_t _rep_cnt, PropErr _properr = PropErr::T)
11 |         : ILayer<mode, Dtype>(_name, _properr), rep_cnt(_rep_cnt) 
12 |     {
13 |         this->state = new DenseMat<mode, Dtype>();
14 | 		this->grad = new DenseMat<mode, Dtype>();
15 |     }
16 |     
17 |     static std::string str_type()
18 |     {
19 |         return "Repeat"; 
20 |     }
21 |     
22 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override
23 |     {
24 |         assert(operands.size() == 1);
25 |         auto& cur_output = this->state->DenseDerived();
26 |         auto& operand = operands[0]->state->DenseDerived();
27 | 
28 |         cur_output.Repmat(operand, this->rep_cnt, 1);
29 |     }    
30 |     
31 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override
32 |     {
33 |         auto& cur_grad = this->grad->DenseDerived();
34 |         auto& prev_grad = operands[cur_idx]->grad->DenseDerived();
35 | 
36 |         buf.RowSum(cur_grad);
37 |         prev_grad.Axpby(1.0, buf, beta);
38 |     }
39 | 
40 |     DenseMat<mode, Dtype> buf;
41 |     size_t rep_cnt;
42 | };
43 | 
44 | #endif


--------------------------------------------------------------------------------
/include/net/sigmoid_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SIGMOID_LAYER_H
 2 | #define SIGMOID_LAYER_H
 3 | 
 4 | #include "i_act_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class SigmoidLayer; 
 8 | 
 9 | template<typename Dtype>
10 | class SigmoidLayer<CPU, Dtype> : public IActLayer<CPU, Dtype> 
11 | {
12 | public:
13 |     SigmoidLayer(std::string _name, WriteType _wt = WriteType::OUTPLACE, PropErr _properr = PropErr::T)
14 |             : IActLayer<CPU, Dtype>(_name, _wt, _properr) {}            
15 | 
16 |     static std::string str_type()
17 |     {
18 |         return "Sigmoid"; 
19 |     }        
20 | 
21 |     virtual void Act(DenseMat<CPU, Dtype>& prev_out, DenseMat<CPU, Dtype>& cur_out) override;
22 |     
23 |     virtual void Derivative(DenseMat<CPU, Dtype>& dst, DenseMat<CPU, Dtype>& prev_output, 
24 |                             DenseMat<CPU, Dtype>& cur_output, DenseMat<CPU, Dtype>& cur_grad, Dtype beta) override; 
25 | };
26 | 
27 | template<typename Dtype>
28 | class SigmoidLayer<GPU, Dtype> : public IActLayer<GPU, Dtype> 
29 | {
30 | public:
31 |     SigmoidLayer(std::string _name, WriteType _wt = WriteType::OUTPLACE, PropErr _properr = PropErr::T)
32 |             : IActLayer<GPU, Dtype>(_name, _wt, _properr) {}    
33 | 
34 |     static std::string str_type()
35 |     {
36 |         return "Sigmoid"; 
37 |     }
38 |     
39 |     virtual void Act(DenseMat<GPU, Dtype>& prev_out, DenseMat<GPU, Dtype>& cur_out) override; 
40 |     
41 |     virtual void Derivative(DenseMat<GPU, Dtype>& dst, DenseMat<GPU, Dtype>& prev_output, 
42 |                             DenseMat<GPU, Dtype>& cur_output, DenseMat<GPU, Dtype>& cur_grad, Dtype beta) override; 
43 | };
44 | 
45 | 
46 | #endif


--------------------------------------------------------------------------------
/include/net/softmax_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SOFTMAX_LAYER_H
 2 | #define SOFTMAX_LAYER_H
 3 | 
 4 | #include "i_act_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class SoftmaxLayer; 
 8 | 
 9 | template<typename Dtype>
10 | class SoftmaxLayer<CPU, Dtype> : public IActLayer<CPU, Dtype>
11 | {
12 | public:
13 |     SoftmaxLayer(std::string _name, WriteType _wt = WriteType::OUTPLACE, PropErr _properr = PropErr::T)
14 |             : IActLayer<CPU, Dtype>(_name, _wt, _properr) {}
15 | 
16 |     static std::string str_type()
17 |     {
18 |         return "Softmax"; 
19 |     }
20 |     
21 |     virtual void Act(DenseMat<CPU, Dtype>& prev_out, DenseMat<CPU, Dtype>& cur_out) override;
22 |     
23 |     virtual void Derivative(DenseMat<CPU, Dtype>& dst, DenseMat<CPU, Dtype>& prev_output, 
24 |                             DenseMat<CPU, Dtype>& cur_output, DenseMat<CPU, Dtype>& cur_grad, Dtype beta) override;
25 | 
26 |     DenseMat<CPU, Dtype> buf;                            
27 | }; 
28 | 
29 | template<typename Dtype>
30 | class SoftmaxLayer<GPU, Dtype> : public IActLayer<GPU, Dtype>
31 | {
32 | public:
33 |     SoftmaxLayer(std::string _name, WriteType _wt = WriteType::OUTPLACE, PropErr _properr = PropErr::T)
34 |             : IActLayer<GPU, Dtype>(_name, _wt, _properr) {}
35 | 
36 |     static std::string str_type()
37 |     {
38 |         return "Softmax"; 
39 |     }
40 |     
41 |     virtual void Act(DenseMat<GPU, Dtype>& prev_out, DenseMat<GPU, Dtype>& cur_out) override;
42 |     
43 |     virtual void Derivative(DenseMat<GPU, Dtype>& dst, DenseMat<GPU, Dtype>& prev_output, 
44 |                             DenseMat<GPU, Dtype>& cur_output, DenseMat<GPU, Dtype>& cur_grad, Dtype beta) override;
45 |     DenseMat<GPU, Dtype> buf;                           
46 | };
47 | 
48 | #endif


--------------------------------------------------------------------------------
/include/net/tanh_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef TANH_LAYER_H
 2 | #define TANH_LAYER_H
 3 | 
 4 | #include "i_act_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class TanhLayer; 
 8 | 
 9 | template<typename Dtype>
10 | class TanhLayer<CPU, Dtype> : public IActLayer<CPU, Dtype> 
11 | {
12 | public:
13 |     TanhLayer(std::string _name, WriteType _wt = WriteType::OUTPLACE, PropErr _properr = PropErr::T)
14 |             : IActLayer<CPU, Dtype>(_name, _wt, _properr) {}            
15 | 
16 |     static std::string str_type()
17 |     {
18 |         return "Tanh"; 
19 |     }
20 | 
21 |     virtual void Act(DenseMat<CPU, Dtype>& prev_out, DenseMat<CPU, Dtype>& cur_out) override;
22 |     
23 |     virtual void Derivative(DenseMat<CPU, Dtype>& dst, DenseMat<CPU, Dtype>& prev_output, 
24 |                             DenseMat<CPU, Dtype>& cur_output, DenseMat<CPU, Dtype>& cur_grad, Dtype beta) override; 
25 | };
26 | 
27 | #endif


--------------------------------------------------------------------------------
/include/net/transpose_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef TRANSPOSE_LAYER_H
 2 | #define TRANSPOSE_LAYER_H
 3 | 
 4 | #include "i_layer.h"
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | class TransposeLayer : public ILayer<mode, Dtype>
 8 | {
 9 | public:
10 |     TransposeLayer(std::string _name, PropErr _properr = PropErr::T)
11 |         : ILayer<mode, Dtype>(_name, _properr) 
12 |     {
13 |         this->state = new DenseMat<mode, Dtype>();
14 | 		this->grad = new DenseMat<mode, Dtype>();
15 |     }
16 |     
17 |     static std::string str_type()
18 |     {
19 |         return "Transpose"; 
20 |     }
21 |     
22 |     virtual void UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase) override
23 |     {
24 |         assert(operands.size() == 1);
25 |         auto& cur_output = this->state->DenseDerived();
26 |         auto& operand = operands[0]->state->DenseDerived();
27 | 
28 |         assert(operand.rows == 1 || operand.cols == 1);
29 | 
30 |         cur_output.CopyFrom(operand);
31 |         cur_output.rows = operand.cols;
32 |         cur_output.cols = operand.rows;
33 |     }    
34 |     
35 |     virtual void BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta) override
36 |     {
37 |         auto& cur_grad = this->grad->DenseDerived();
38 |         auto& prev_grad = operands[cur_idx]->grad->DenseDerived();
39 | 
40 |         std::swap(cur_grad.rows, cur_grad.cols);
41 |         prev_grad.Axpby(1.0, cur_grad, beta);
42 |         std::swap(cur_grad.rows, cur_grad.cols);
43 |     }
44 | };
45 | 
46 | #endif


--------------------------------------------------------------------------------
/include/utils.h:
--------------------------------------------------------------------------------
  1 | #ifndef UTILS_H
  2 | #define UTILS_H
  3 | 
  4 | #include <iostream>
  5 | #include <fstream>
  6 | #include <algorithm>
  7 | #include <cmath>
  8 | #include <string>
  9 | #include "graph_struct.h"
 10 | 
 11 | struct Graph
 12 | {
 13 | 	Graph() {}
 14 | 
 15 | 	Graph(int _num_nodes) : num_nodes(_num_nodes)
 16 | 	{
 17 | 		adj.Resize(num_nodes); 
 18 | 		node_label.clear();
 19 | 	}
 20 | 
 21 | 	int num_nodes; 
 22 | 	LinkedTable<int> adj;
 23 | 	std::vector<int> node_label; 
 24 | };
 25 | 
 26 | inline void LoadIndexes(const char* filename, std::vector<int>& idx_list, size_t num_samples)
 27 | {
 28 | 	if (filename == nullptr)
 29 | 	{
 30 | 		std::cerr << num_samples << std::endl;
 31 | 		std::cerr << "loading all" << std::endl;
 32 | 		idx_list.resize(num_samples);
 33 | 		for (size_t i = 0; i < idx_list.size(); ++i)
 34 | 			idx_list[i] = i;
 35 | 		std::random_shuffle(idx_list.begin(), idx_list.end());
 36 | 		return;
 37 | 	}
 38 |     std::cerr << "loading indexes " << filename << std::endl;
 39 | 	idx_list.clear();
 40 | 	FILE* fid = fopen(filename, "r");
 41 | 	int idx;
 42 | 	while (fscanf(fid, "%d", &idx) != EOF)
 43 | 	{
 44 | 		idx_list.push_back(idx);
 45 | 	}
 46 | 	fclose(fid);
 47 | }
 48 | 
 49 | inline int Str2Id(const std::string& st)
 50 | {
 51 | 	int id = 0;
 52 | 	for (size_t i = 0; i < st.size(); ++i)
 53 | 	{
 54 | 		int t = 0;
 55 | 		switch (st[i])
 56 | 		{
 57 | 			case 'A':
 58 | 				t = 0;
 59 | 				break;
 60 | 			case 'T':
 61 | 				t = 1;
 62 | 				break;
 63 | 			case 'C':
 64 | 				t = 2;
 65 | 				break;
 66 | 			case 'G':
 67 | 				t = 3; 
 68 | 				break;
 69 | 			default:
 70 | 				t = 4;
 71 | 				break;
 72 | 		}
 73 | 		id = id * (4 + cfg::pad) + t;
 74 | 	}
 75 | 	return id;
 76 | }
 77 | 
 78 | inline Graph* BuildGraph(std::string st)
 79 | {
 80 | 	if (cfg::pad)
 81 | 	{
 82 | 		cfg::num_nodes = st.size();
 83 | 	}
 84 | 	else
 85 | 		cfg::num_nodes = st.size() - cfg::window_size + 1;
 86 | 
 87 | 	Graph* g = new Graph(cfg::num_nodes);
 88 | 
 89 | 	for (int j = 0; j < (int)cfg::num_nodes; ++j)
 90 | 	{
 91 | 		std::string buf = "";
 92 | 		if (cfg::pad)
 93 | 		{				
 94 | 			for (int t = j - cfg::window_size / 2; t < j - cfg::window_size / 2 + cfg::window_size; ++t)
 95 | 			{
 96 | 				if (t < 0)
 97 | 					buf = buf + " ";
 98 | 				else if (t >= (int)st.size())
 99 | 					buf = buf + " ";
100 | 				else
101 | 					buf = buf + st[t];
102 | 			}
103 | 		} else 
104 | 			buf = st.substr(j, cfg::window_size);
105 | 
106 | 		g->node_label.push_back(Str2Id(buf));
107 | 				
108 | 		if (j)
109 | 			g->adj.AddEntry(j, j - 1);
110 | 		if (j < cfg::num_nodes - 1)
111 | 			g->adj.AddEntry(j, j + 1);	
112 | 	}
113 | 	return g;
114 | }
115 | 
116 | inline void LoadRawData(std::vector< Graph >& graph_data, std::vector<Dtype>& labels)
117 | {
118 | 	graph_data.clear();
119 | 	labels.clear();
120 | 	std::ifstream ff(cfg::string_file);
121 | 
122 | 	int num_graph;
123 | 	ff >> num_graph;
124 | 
125 | 	std::string st;
126 | 	Dtype l;
127 | 	for (int i = 0; i < num_graph; ++i)
128 | 	{
129 | 		ff >> l >> st;
130 | 		labels.push_back(l * cfg::scale);
131 | 		
132 | 		Graph* g = BuildGraph(st);		
133 | 		graph_data.push_back(*g);
134 | 	}
135 | }
136 | 
137 | 
138 | #endif
139 | 


--------------------------------------------------------------------------------
/local_run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DATA=12mer-kd
 4 | 
 5 | DATA_ROOT=data/$DATA
 6 | RESULT_ROOT=results/$DATA
 7 | 
 8 | tool=kernel_loopy_bp
 9 | 
10 | LV=2
11 | scale=0.001
12 | w=2
13 | pad=0
14 | max_pool=1
15 | global_pool=1
16 | CONV_SIZE=256
17 | FP_LEN=256
18 | n_hidden=96
19 | bsize=32
20 | learning_rate=0.001
21 | max_iter=400000
22 | cur_iter=0
23 | dev_id=0
24 | fold=1
25 | save_dir=$RESULT_ROOT/$tool-lv-$LV-conv-$CONV_SIZE-fp-$FP_LEN-bsize-$bsize-fold-$fold
26 | 
27 | if [ ! -e $save_dir ];
28 | then
29 |     mkdir -p $save_dir
30 | fi
31 | 
32 | build/$tool \
33 | 	       -result best_pred-fold-$fold.txt \
34 | 	       -global_pool $global_pool \
35 | 	       -scale $scale \
36 |                -max_pool $max_pool \
37 |                -pad $pad \
38 |                -w $w \
39 | 	       -string $DATA_ROOT/${DATA}.txt \
40 |                -train_idx $DATA_ROOT/10fold_idx/train_idx-${fold}.txt \
41 |                -test_idx $DATA_ROOT/10fold_idx/test_idx-${fold}.txt \
42 |                -lr $learning_rate \
43 |                -device $dev_id \
44 |                -maxe $max_iter \
45 |                -svdir $save_dir \
46 |                -hidden $n_hidden \
47 |                -int_test 1000 \
48 |                -int_report 100 \
49 | 	       -int_save 1000 \
50 |                -l2 0.00 \
51 |                -m 0.9 \
52 |                -lv $LV \
53 |                -conv $CONV_SIZE \
54 |                -fp $FP_LEN \
55 |                -b $bsize \
56 |                -cur_iter $cur_iter \
57 |                2>&1 | tee $save_dir/log-$fold.txt 
58 | 


--------------------------------------------------------------------------------
/make_common:
--------------------------------------------------------------------------------
 1 | dir_guard = @mkdir -p $(@D)
 2 | 
 3 | CUDA_HOME := /usr/local/cuda
 4 | MKL_ROOT := /opt/intel/mkl
 5 | 
 6 | FIND := find
 7 | CXX := g++
 8 | NVCC := $(CUDA_HOME)/bin/nvcc
 9 | NVCCFLAGS += --default-stream per-thread
10 | CXXFLAGS += -Wall -O3 -std=c++11 -g -rdynamic
11 | LDFLAGS += -lm -L$(CUDA_HOME)/lib64 -lcudart -lcublas -lcurand -lcusparse -lmkl_rt
12 | CUDA_ARCH :=    -gencode arch=compute_30,code=sm_30 \
13 | 		-gencode arch=compute_35,code=sm_35 \
14 | 		-gencode arch=compute_50,code=sm_50 \
15 | 		-gencode arch=compute_50,code=compute_50
16 | 


--------------------------------------------------------------------------------
/src/kernel_loopy_bp.cpp:
--------------------------------------------------------------------------------
  1 | #include "nn_common.h"
  2 | 
  3 | void InitModel()
  4 | {
  5 | 	init_const_dict["n2e"] = &graph;
  6 | 	init_const_dict["e2e"] = &graph;
  7 | 	init_const_dict["e2n"] = &graph;	
  8 | 
  9 | 	const Dtype init_scale = 0.05;
 10 | 	
 11 | 	auto* n2esum_param = add_const<Node2EdgeMsgParam>(model, "n2e");	
 12 | 	auto* e2esum_param = add_const<Edge2EdgeMsgParam>(model, "e2e");	
 13 | 	
 14 | 	auto* w_n2l = add_diff<LinearParam>(model, "input-node-to-latent", cfg::node_dim, cfg::conv_size, 0, init_scale, BiasOption::NONE);
 15 |     auto* p_edge_conv = add_diff<LinearParam>(model, "linear-edge-conv", cfg::conv_size, cfg::conv_size, 0, init_scale, BiasOption::NONE);
 16 | 
 17 | 	auto* e2nsum_param = add_const<Edge2NodeMsgParam>(model, "e2n");
 18 | 	auto* out_params = add_diff<LinearParam>(model, "outparam", cfg::conv_size, cfg::fp_len, 0, init_scale);
 19 | 		
 20 | 	auto* h2_weight = add_diff<LinearParam>(model, "h2_weight", cfg::n_hidden, 1, 0, init_scale);
 21 | 
 22 | 	IParam<mode, Dtype> *node_pool_param = nullptr, *subg_param = nullptr, *h1_weight = nullptr;
 23 | 	if (cfg::max_pool)
 24 | 	{	
 25 | 		init_const_dict["max_pool"] = &graph;	
 26 | 		node_pool_param = add_const<NodeMaxPoolParam>(model, "max_pool");
 27 | 	}
 28 | 	if (cfg::global_pool)
 29 | 	{
 30 | 		init_const_dict["subgraph_pool"] = &graph;
 31 | 		subg_param = add_const<SubgraphMsgParam>(model, "subgraph_pool");
 32 | 		h1_weight = add_diff<LinearParam>(model, "h1_weight", cfg::fp_len, cfg::n_hidden, 0, init_scale);
 33 | 	} else {
 34 | 		init_const_dict["subgraph_concat"] = &graph;	
 35 | 		subg_param = add_const<SubgraphConcatParam>(model, "subgraph_concat");
 36 | 		h1_weight = add_diff<LinearParam>(model, "h1_weight", cfg::fp_len * cfg::num_nodes, cfg::n_hidden, 0, init_scale);
 37 | 	}
 38 | 
 39 |    	ILayer<mode, Dtype>* node_input = cl<InputLayer>("data", gnn, {});
 40 | 	auto* label_layer = cl<InputLayer>("label", gnn, {});
 41 |     auto* input_node_linear = cl<ParamLayer>(gnn, {node_input}, {w_n2l}); 
 42 |     auto* input_message = cl<ParamLayer>(gnn, {input_node_linear}, {n2esum_param});
 43 |     auto* input_potential_layer = cl<ReLULayer>(gnn, {input_message});
 44 | 
 45 |     int lv = 0;
 46 |     ILayer<mode, Dtype>* cur_message_layer = input_potential_layer;
 47 |     while (lv < cfg::max_lv)
 48 |     {
 49 |     	lv++;
 50 |     	auto* e2epool = cl<ParamLayer>(gnn, {cur_message_layer}, {e2esum_param}); 
 51 | 
 52 |     	auto* edge_linear = cl<ParamLayer>(gnn, {e2epool}, {p_edge_conv});
 53 | 
 54 |     	auto* merged_linear = cl<CAddLayer>(gnn, {edge_linear, input_message}); 
 55 | 
 56 |     	cur_message_layer = cl<ReLULayer>(gnn, {merged_linear}); 
 57 |     }
 58 | 
 59 |     auto* e2npool = cl<ParamLayer>(gnn, {cur_message_layer}, {e2nsum_param}); 
 60 |     auto* hidden_msg = cl<ReLULayer>(gnn, {e2npool});
 61 |     auto* out_linear = cl<ParamLayer>(gnn, {hidden_msg}, {out_params});
 62 | 	auto* reluact_fp = cl<ReLULayer>(gnn, {out_linear});	
 63 | 
 64 | 	ILayer<mode, Dtype>* y_potential = nullptr;
 65 | 	if (cfg::max_pool)
 66 | 	{
 67 | 		auto* out_pool = cl<ParamLayer>(gnn, {reluact_fp}, {node_pool_param});
 68 | 	    y_potential = cl<ParamLayer>(gnn, {out_pool}, {subg_param});	
 69 | 	} else 
 70 | 	{
 71 | 		y_potential = cl<ParamLayer>(gnn, {reluact_fp}, {subg_param});
 72 | 	}
 73 | 
 74 | 	auto* hidden = cl<ParamLayer>(gnn, {y_potential}, {h1_weight});
 75 | 
 76 | 	auto* reluact_out_nn = cl<ReLULayer>(gnn, {hidden}); 
 77 | 	
 78 | 	auto* output = cl<ParamLayer>("output", gnn, {reluact_out_nn}, {h2_weight});
 79 | 	
 80 | 	cl<MSECriterionLayer>("mse", gnn, {output, label_layer});	
 81 | 	cl<ABSCriterionLayer>("mae", gnn, {output, label_layer}, PropErr::N);
 82 | }
 83 | 
 84 | int main(int argc, const char** argv)
 85 | {
 86 | 	cfg::LoadParams(argc, argv);			
 87 | 
 88 | 	GPUHandle::Init(cfg::dev_id);	
 89 | 
 90 | 	LoadRawData(graph_data, labels);
 91 | 	LoadIndexes(cfg::train_idx_file, train_idx, labels.size());
 92 | 	LoadIndexes(cfg::test_idx_file, test_idx, labels.size());
 93 | 
 94 | 	InitModel();
 95 | 
 96 |     MainLoop(); 
 97 | 	
 98 | 	GPUHandle::Destroy();
 99 | 	return 0;
100 | }
101 | 


--------------------------------------------------------------------------------
/src/kernel_mean_field.cpp:
--------------------------------------------------------------------------------
 1 | #include "nn_common.h"
 2 | #include <set>
 3 | 
 4 | void InitModel()
 5 | {
 6 |     init_const_dict["n2n"] = &graph;
 7 | 	const Dtype init_scale = 0.05;
 8 | 	
 9 | 	auto* n2nsum_param = add_const<Node2NodeMsgParam>(model, "n2n");	
10 | 	
11 |     auto* w_n2l = add_diff<LinearParam>(model, "input-node-to-latent", cfg::node_dim, cfg::conv_size, 0, init_scale);
12 |     auto* p_node_conv = add_diff<LinearParam>(model, "linear-node-conv", cfg::conv_size, cfg::conv_size, 0, init_scale);
13 | 	auto* out_params = add_diff<LinearParam>(model, "outparam", cfg::conv_size, cfg::fp_len, 0, init_scale);
14 | 	auto* h2_weight = add_diff<LinearParam>(model, "h2_weight", cfg::n_hidden, 1, 0, init_scale);
15 | 
16 | 	IParam<mode, Dtype>* subg_param = nullptr, *node_pool_param = nullptr, *h1_weight = nullptr;
17 | 	if (cfg::max_pool)
18 | 	{	
19 | 		init_const_dict["max_pool"] = &graph;	
20 | 
21 | 		node_pool_param = add_const<NodeMaxPoolParam>(model, "max_pool");
22 | 	}
23 | 
24 | 	if (cfg::global_pool)
25 | 	{
26 | 		init_const_dict["subgraph_pool"] = &graph;
27 | 		subg_param = add_const<SubgraphMsgParam>(model, "subgraph_pool");
28 | 		h1_weight = add_diff<LinearParam>(model, "h1_weight", cfg::fp_len, cfg::n_hidden, 0, init_scale);
29 | 	} else {
30 | 		init_const_dict["subgraph_concat"] = &graph;	
31 | 		subg_param = add_const<SubgraphConcatParam>(model, "subgraph_concat");
32 | 		h1_weight = add_diff<LinearParam>(model, "h1_weight", cfg::fp_len * cfg::num_nodes, cfg::n_hidden, 0, init_scale);
33 | 	}
34 | 
35 | 	ILayer<mode, Dtype>* input_layer = cl<InputLayer>("data", gnn, {});
36 | 	auto* label_layer = cl<InputLayer>("label", gnn, {});
37 |     auto* input_message = cl<ParamLayer>(gnn, {input_layer}, {w_n2l}); 
38 | 	auto* input_potential_layer = cl<ReLULayer>(gnn, {input_message}); 
39 | 
40 | 	int lv = 0;
41 | 	ILayer<mode, Dtype>* cur_message_layer = input_potential_layer;
42 | 	while (lv < cfg::max_lv)
43 | 	{	
44 | 		lv++; 
45 | 		auto* n2npool = cl<ParamLayer>(gnn, {cur_message_layer}, {n2nsum_param});
46 | 
47 | 		auto* node_linear = cl<ParamLayer>(gnn, {n2npool}, {p_node_conv});
48 | 
49 | 		auto* merged_linear = cl<CAddLayer>(gnn, {node_linear, input_message});  
50 | 
51 | 		cur_message_layer = cl<ReLULayer>(gnn, {merged_linear}); 
52 | 	}			
53 | 	
54 | 	auto* out_linear = cl<ParamLayer>(gnn, {cur_message_layer}, {out_params});		
55 | 	auto* reluact_fp = cl<ReLULayer>(gnn, {out_linear});
56 | 
57 | 	ILayer<mode, Dtype>* y_potential = nullptr;
58 | 	if (cfg::max_pool)
59 | 	{
60 | 		auto* out_pool = cl<ParamLayer>(gnn, {reluact_fp}, {node_pool_param});
61 | 	    y_potential = cl<ParamLayer>(gnn, {out_pool}, {subg_param});	
62 | 	} else 
63 | 	{
64 | 		y_potential = cl<ParamLayer>(gnn, {reluact_fp}, {subg_param});
65 | 	}
66 | 
67 | 	auto* hidden = cl<ParamLayer>(gnn, {y_potential}, {h1_weight});
68 | 	
69 | 	auto* reluact_out_nn = cl<ReLULayer>(gnn, {hidden}); 
70 | 	
71 | 	auto* output = cl<ParamLayer>("output", gnn, {reluact_out_nn}, {h2_weight});
72 | 	
73 | 	cl<MSECriterionLayer>("mse", gnn, {output, label_layer});	
74 | 	cl<ABSCriterionLayer>("mae", gnn, {output, label_layer}, PropErr::N);
75 | }
76 | 
77 | int main(int argc, const char** argv)
78 | {
79 | 	cfg::LoadParams(argc, argv);			
80 | 
81 | 	GPUHandle::Init(cfg::dev_id);	
82 | 
83 | 	LoadRawData(graph_data, labels);
84 | 	LoadIndexes(cfg::train_idx_file, train_idx, labels.size());
85 | 	LoadIndexes(cfg::test_idx_file, test_idx, labels.size());
86 | 
87 | 	InitModel();
88 | 
89 |     MainLoop(); 
90 | 
91 | 	GPUHandle::Destroy();
92 | 	return 0;
93 | }
94 | 


--------------------------------------------------------------------------------
/src/matrix/cpu_sparse_mat.cpp:
--------------------------------------------------------------------------------
  1 | #include "sparse_matrix.h"
  2 | #include <algorithm>
  3 | #include <cstring>
  4 | #include "mkl_helper.h"
  5 | 
  6 | template<typename Dtype>
  7 | SparseMat<CPU, Dtype>::SparseMat()
  8 | {
  9 | 		this->count = this->rows = this->cols = 0;
 10 | 		data = std::make_shared< SpData<CPU, Dtype> >();
 11 | }
 12 | 
 13 | template<typename Dtype>
 14 | SparseMat<CPU, Dtype>::SparseMat(size_t _rows, size_t _cols)
 15 | {
 16 | 		this->rows = _rows; this->cols = _cols;
 17 | 		this->count = this->rows * this->cols;
 18 | 		data = std::make_shared< SpData<CPU, Dtype> >();
 19 | }
 20 | 
 21 | template<typename Dtype>
 22 | SparseMat<CPU, Dtype>::~SparseMat()
 23 | {	
 24 | 			
 25 | }
 26 | 
 27 | template<typename Dtype>
 28 | void SparseMat<CPU, Dtype>::Serialize(FILE* fid)
 29 | {
 30 | 		IMatrix<CPU, Dtype>::Serialize(fid);
 31 | 		data->Serialize(fid);
 32 | }
 33 | 
 34 | template<typename Dtype>
 35 | void SparseMat<CPU, Dtype>::Deserialize(FILE* fid)
 36 | {
 37 | 		IMatrix<CPU, Dtype>::Deserialize(fid);
 38 | 		data = std::make_shared< SpData<CPU, Dtype> >();
 39 | 		data->Deserialize(fid);
 40 | }
 41 | 		
 42 | template<typename Dtype>
 43 | void SparseMat<CPU, Dtype>::Resize(size_t newRos, size_t newCols)
 44 | {
 45 | 		this->count = newRos * newCols;
 46 | 		this->rows = newRos;
 47 | 		this->cols = newCols;
 48 | }
 49 | 
 50 | template<typename Dtype>
 51 | void SparseMat<CPU, Dtype>::Print2Screen() // debug only
 52 | {
 53 |         std::cerr << "========mat content========" << std::endl;
 54 |         for (size_t i = 0; i < this->rows; ++i)
 55 |         {
 56 |             for (int j = data->ptr[i]; j < data->ptr[i + 1]; ++j)
 57 |                 std::cerr << "(" << i << "," << data->col_idx[j] << ") : " << data->val[j] << std::endl;
 58 |         }
 59 |         std::cerr << "========    end    ========" << std::endl;    
 60 | }
 61 | 
 62 | template<typename Dtype>
 63 | void SparseMat<CPU, Dtype>::ResizeSp(int newNNZ, int newNPtr)
 64 | {
 65 | 		if (newNNZ > data->nzCap || newNPtr > data->ptrCap)
 66 | 		{
 67 | 			if (newNNZ > data->nzCap)
 68 | 				data->nzCap = std::max(newNNZ, data->nzCap * 2);
 69 | 			if (newNPtr > data->ptrCap)
 70 | 				data->ptrCap = std::max(newNPtr, data->ptrCap * 2);
 71 | 				data = std::make_shared< SpData<CPU, Dtype> >(data->nzCap, data->ptrCap);
 72 | 		}
 73 | 		data->nnz = newNNZ;
 74 | 		data->len_ptr = newNPtr;
 75 | }
 76 | 
 77 | template<typename Dtype>
 78 | Dtype SparseMat<CPU, Dtype>::Asum()
 79 | {
 80 |     return MKLHelper_Asum(data->nnz, data->val);
 81 | }
 82 | 
 83 | template<typename Dtype>
 84 | void SparseMat<CPU, Dtype>::CopyFrom(SparseMat<CPU, Dtype>& src)
 85 | {				
 86 | 		this->rows = src.rows;
 87 | 		this->cols = src.cols;
 88 | 		this->count = src.count;
 89 | 		ResizeSp(src.data->nnz, src.data->len_ptr);
 90 | 		memcpy(data->val, src.data->val, sizeof(Dtype) * src.data->nnz);
 91 | 		memcpy(data->col_idx, src.data->col_idx, sizeof(int) * src.data->nnz);
 92 | 		memcpy(data->ptr, src.data->ptr, sizeof(int) * src.data->len_ptr);						
 93 | }
 94 | 
 95 | template<typename Dtype>
 96 | void SparseMat<CPU, Dtype>::CopyFrom(SparseMat<GPU, Dtype>& src)
 97 | {				
 98 | 		this->rows = src.rows;
 99 | 		this->cols = src.cols;
100 | 		this->count = src.count;
101 | 		ResizeSp(src.data->nnz, src.data->len_ptr);
102 | 		cudaMemcpyAsync(data->val, src.data->val, sizeof(Dtype) * src.data->nnz, cudaMemcpyDeviceToHost, GPUHandle::streams[src.streamid]);	
103 |         cudaMemcpyAsync(data->col_idx, src.data->col_idx, sizeof(int) * src.data->nnz, cudaMemcpyDeviceToHost, GPUHandle::streams[src.streamid]);
104 | 		cudaMemcpyAsync(data->ptr, src.data->ptr, sizeof(int) * src.data->len_ptr, cudaMemcpyDeviceToHost, GPUHandle::streams[src.streamid]);
105 | }
106 | 
107 | template class SparseMat<CPU, double>;
108 | template class SparseMat<CPU, float>;


--------------------------------------------------------------------------------
/src/matrix/cpu_vector.cpp:
--------------------------------------------------------------------------------
 1 | #include "vector.h"
 2 | 
 3 | template<typename Dtype>
 4 | Vector<CPU, Dtype>::~Vector()
 5 | {
 6 | 	MatUtils<CPU>::DelArr(data);
 7 | }
 8 | 
 9 | template<typename Dtype>
10 | Vector<CPU, Dtype>::Vector()
11 | {
12 | 	count = mem_size = 0U;
13 | 	data = nullptr;
14 | }
15 | 
16 | template<typename Dtype>
17 | Vector<CPU, Dtype>::Vector(size_t _count)
18 | {	
19 | 	count = _count;
20 | 	mem_size = count;
21 | 	MatUtils<CPU>::MallocArr(data, sizeof(Dtype) * mem_size);
22 | }
23 | 
24 | template<typename Dtype>
25 | void Vector<CPU, Dtype>::Resize(size_t _count)
26 | {	
27 | 	count = _count;
28 | 	if (_count > mem_size)
29 | 	{
30 | 		mem_size = _count;
31 | 		MatUtils<CPU>::DelArr(data);
32 | 		MatUtils<CPU>::MallocArr(data, sizeof(Dtype) * mem_size);		
33 | 	}
34 | }
35 | 
36 | template<typename Dtype>
37 | void Vector<CPU, Dtype>::Fill(Dtype scalar)
38 | {	
39 | 	for (size_t i = 0; i < count; ++i)
40 | 		data[i] = scalar;
41 | }
42 | 
43 | 
44 | template class Vector<CPU, double>;
45 | template class Vector<CPU, float>;


--------------------------------------------------------------------------------
/src/matrix/gpu_sparse_mat.cu:
--------------------------------------------------------------------------------
  1 | #include "sparse_matrix.h"
  2 | #include "cuda_helper.h"
  3 | #include <algorithm>
  4 | 
  5 | template<typename Dtype>
  6 | SparseMat<GPU, Dtype>::SparseMat()
  7 | {
  8 | 		this->count = this->rows = this->cols = 0;
  9 | 		streamid = 0;
 10 | 		data = std::make_shared< SpData<GPU, Dtype> >();
 11 | 		cusparseCreateMatDescr(&descr);
 12 | 		cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL);
 13 | 		cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
 14 | }
 15 | 
 16 | template<typename Dtype>
 17 | SparseMat<GPU, Dtype>::SparseMat(size_t _rows, size_t _cols, unsigned _streamid)
 18 | {
 19 | 		this->rows = _rows; this->cols = _cols;
 20 | 		this->count = _rows * _cols;
 21 | 		streamid = _streamid;
 22 | 		data = std::make_shared< SpData<GPU, Dtype> >();
 23 | 		cusparseCreateMatDescr(&descr);
 24 | 		cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL);
 25 | 		cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
 26 | }
 27 | 
 28 | template<typename Dtype>
 29 | SparseMat<GPU, Dtype>::~SparseMat()
 30 | {
 31 | 		cusparseDestroyMatDescr(descr);
 32 | }
 33 | 
 34 | template<typename Dtype>
 35 | void SparseMat<GPU, Dtype>::Resize(size_t newRos, size_t newCols)
 36 | {
 37 | 		this->count = newRos * newCols;
 38 | 		this->rows = newRos;
 39 | 		this->cols = newCols;
 40 | }
 41 | 
 42 | template<typename Dtype>
 43 | void SparseMat<GPU, Dtype>::Print2Screen() // debug only
 44 | {
 45 |         throw "not implemented";
 46 | }
 47 | 
 48 | template<typename Dtype>
 49 | void SparseMat<GPU, Dtype>::ResizeSp(int newNNZ, int newNPtr)
 50 | {
 51 | 		if (newNNZ > data->nzCap || newNPtr > data->ptrCap)
 52 | 		{
 53 | 			if (newNNZ > data->nzCap)
 54 | 				data->nzCap = std::max(newNNZ, data->nzCap * 2);
 55 | 			if (newNPtr > data->ptrCap)
 56 | 				data->ptrCap = std::max(newNPtr, data->ptrCap * 2);
 57 | 				data = std::make_shared< SpData<GPU, Dtype> >(data->nzCap, data->ptrCap);
 58 | 		}
 59 | 		data->nnz = newNNZ;
 60 | 		data->len_ptr = newNPtr;
 61 | }
 62 | 	
 63 | template<typename Dtype>
 64 | Dtype SparseMat<GPU, Dtype>::Asum()
 65 | {
 66 | 		return CudaHelper_Asum(GPUHandle::cublashandle, data->nnz, data->val);
 67 | }
 68 | 	
 69 | template<typename Dtype>
 70 | void SparseMat<GPU, Dtype>::CopyFrom(SparseMat<CPU, Dtype>& src)
 71 | {				
 72 | 		this->rows = src.rows;
 73 | 		this->cols = src.cols;
 74 | 		this->count = src.count;
 75 | 		ResizeSp(src.data->nnz, src.data->len_ptr);
 76 | 		cudaMemcpyAsync(data->val, src.data->val, sizeof(Dtype) * src.data->nnz, cudaMemcpyHostToDevice, GPUHandle::streams[streamid]);	
 77 |         cudaMemcpyAsync(data->col_idx, src.data->col_idx, sizeof(int) * src.data->nnz, cudaMemcpyHostToDevice, GPUHandle::streams[streamid]);
 78 | 		cudaMemcpyAsync(data->ptr, src.data->ptr, sizeof(int) * src.data->len_ptr, cudaMemcpyHostToDevice, GPUHandle::streams[streamid]);
 79 | }
 80 | 
 81 | template<typename Dtype>
 82 | void SparseMat<GPU, Dtype>::CopyFrom(SparseMat<GPU, Dtype>& src)
 83 | {				
 84 | 		this->rows = src.rows;
 85 | 		this->cols = src.cols;
 86 | 		this->count = src.count;
 87 | 		ResizeSp(src.data->nnz, src.data->len_ptr);
 88 | 		cudaMemcpyAsync(data->val, src.data->val, sizeof(Dtype) * src.data->nnz, cudaMemcpyDeviceToDevice, GPUHandle::streams[streamid]);	
 89 |         cudaMemcpyAsync(data->col_idx, src.data->col_idx, sizeof(int) * src.data->nnz, cudaMemcpyDeviceToDevice, GPUHandle::streams[streamid]);
 90 | 		cudaMemcpyAsync(data->ptr, src.data->ptr, sizeof(int) * src.data->len_ptr, cudaMemcpyDeviceToDevice, GPUHandle::streams[streamid]);
 91 | }
 92 | 
 93 | template<typename Dtype>
 94 | void SparseMat<GPU, Dtype>::Serialize(FILE* fid)
 95 | {
 96 | 		IMatrix<GPU, Dtype>::Serialize(fid);
 97 | 		data->Serialize(fid);
 98 | }
 99 | 
100 | template<typename Dtype>
101 | void SparseMat<GPU, Dtype>::Deserialize(FILE* fid)
102 | {
103 | 		IMatrix<GPU, Dtype>::Deserialize(fid);
104 | 		data = std::make_shared< SpData<GPU, Dtype> >();
105 | 		data->Deserialize(fid);
106 | }
107 | 
108 | template class SparseMat<GPU, double>;
109 | template class SparseMat<GPU, float>;


--------------------------------------------------------------------------------
/src/matrix/gpu_vector.cu:
--------------------------------------------------------------------------------
 1 | #include "vector.h"
 2 | #include "cuda_unary_kernel.cuh"
 3 | #include "cuda_helper.h"
 4 | #include <cuda_runtime.h>
 5 | #include <iostream>
 6 | 
 7 | template<typename Dtype>
 8 | Vector<GPU, Dtype>::~Vector()
 9 | {
10 | 	MatUtils<GPU>::DelArr(data);
11 | }
12 | 
13 | template<typename Dtype>
14 | Vector<GPU, Dtype>::Vector()
15 | {
16 | 	count = mem_size = 0U;
17 | 	streamid = 0U;
18 | 	data = nullptr;
19 | }
20 | 
21 | template<typename Dtype>
22 | Vector<GPU, Dtype>::Vector(size_t _count, unsigned int _streamid)
23 | {	
24 | 	count = _count;
25 | 	mem_size = count + 1;
26 | 	MatUtils<GPU>::MallocArr(data, sizeof(Dtype) * mem_size);			
27 | 	streamid = _streamid;
28 | }
29 | 
30 | template<typename Dtype>
31 | void Vector<GPU, Dtype>::Resize(size_t _count)
32 | {	
33 | 	count = _count;
34 | 	if (count > mem_size)
35 | 	{
36 | 		mem_size = count + 1;
37 | 		MatUtils<GPU>::DelArr(data);
38 | 		MatUtils<GPU>::MallocArr(data, sizeof(Dtype) * mem_size);
39 | 	}
40 | }
41 | 
42 | template<typename Dtype>
43 | void Vector<GPU, Dtype>::Fill(Dtype scalar)
44 | {	
45 | 	if (fabs(scalar) < eps)
46 | 		cudaMemset(data, 0, count * sizeof(Dtype));
47 | 	else
48 |         UnaryOp(data, count, UnarySet<Dtype>(scalar), streamid);
49 | }
50 | 
51 | template<typename Dtype>
52 | void Vector<GPU, Dtype>::CopyFrom(Vector<CPU, Dtype>& src)
53 | {
54 | 	Resize(src.count);
55 | 	cudaMemcpyAsync(data, src.data, sizeof(Dtype) * count, cudaMemcpyHostToDevice, GPUHandle::streams[streamid]);
56 | }
57 | 
58 | template class Vector<GPU, double>;
59 | template class Vector<GPU, float>;


--------------------------------------------------------------------------------
/src/matrix/gpuhandle.cu:
--------------------------------------------------------------------------------
 1 | #include "gpuhandle.h"
 2 | 
 3 | __global__ void SetupRandKernel(curandState_t *state, unsigned long long seed) 
 4 | {
 5 |     const unsigned int tidx = NUM_RND_THREADS_PER_BLOCK * blockIdx.x + threadIdx.x;
 6 |     /* Each thread gets same seed, a different sequence number,
 7 |      no offset */
 8 |     curand_init(seed, tidx, 0, &state[tidx]);
 9 | }
10 | 
11 | void GPUHandle::Init(int dev_id, unsigned int _streamcnt)
12 | {
13 | 	streamcnt = _streamcnt;
14 | 	cudaDeviceReset();
15 | 	cudaSetDevice(dev_id);
16 | 	streams = new cudaStream_t[streamcnt];
17 | 	for(unsigned int id = 0; id < streamcnt; ++id)
18 | 	{
19 | 		cudaStreamCreate(&streams[id]);
20 | 	}
21 | 	cublasCreate(&cublashandle);
22 | 	cusparseCreate(&cusparsehandle);
23 | 	curandCreateGenerator(&curandgenerator, CURAND_RNG_PSEUDO_DEFAULT);
24 | 	
25 | 	curandSetPseudoRandomGeneratorSeed(curandgenerator, time(NULL));
26 | 	
27 |     cudaMalloc((void **)&devRandStates, NUM_RND_STREAMS * sizeof(curandState_t));
28 | 	SetupRandKernel<<<NUM_RND_BLOCKS, NUM_RND_THREADS_PER_BLOCK>>>(devRandStates, 1 + time(NULL)*2);
29 | }
30 | 
31 | void GPUHandle::Destroy()
32 | {
33 | 	for(unsigned int id = 0; id < streamcnt; ++id)
34 | 	{
35 | 		cudaStreamDestroy(streams[id]);
36 | 	}
37 | 	cublasDestroy_v2(cublashandle);
38 | 	cusparseDestroy(cusparsehandle);
39 | 	curandDestroyGenerator(curandgenerator);
40 |     cudaFree(devRandStates);
41 | 	streamcnt = 0U;
42 | }
43 | 
44 | curandState_t* GPUHandle::devRandStates = NULL;
45 | cudaStream_t* GPUHandle::streams = NULL;
46 | cublasHandle_t GPUHandle::cublashandle;
47 | cusparseHandle_t GPUHandle::cusparsehandle;
48 | curandGenerator_t GPUHandle::curandgenerator;
49 | unsigned int GPUHandle::streamcnt = 1U;
50 | 


--------------------------------------------------------------------------------
/src/net/act_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include "relu_layer.h"
  2 | #include "sigmoid_layer.h"
  3 | #include "tanh_layer.h"
  4 | #include "softmax_layer.h"
  5 | #include "multinomial_sample_layer.h"
  6 | #include "mkl_helper.h"
  7 | #include "dense_matrix.h"
  8 | 
  9 | // =========================================== relu layer ================================================
 10 | template<typename Dtype>
 11 | void ReLULayer<CPU, Dtype>::Act(DenseMat<CPU, Dtype>& prev_out, DenseMat<CPU, Dtype>& cur_out)
 12 | {
 13 |         for (size_t i = 0; i < cur_out.count; ++i)
 14 |             cur_out.data[i] = prev_out.data[i] > 0 ? prev_out.data[i] : 0; 
 15 | }
 16 | 
 17 | template<typename Dtype>
 18 | void ReLULayer<CPU, Dtype>::Derivative(DenseMat<CPU, Dtype>& dst, DenseMat<CPU, Dtype>& prev_output, 
 19 |                             DenseMat<CPU, Dtype>& cur_output, DenseMat<CPU, Dtype>& cur_grad, Dtype beta)
 20 | {
 21 |         dst.Scale(beta);
 22 |         for (int i = 0; i < dst.count; ++i)
 23 |             if (cur_output.data[i] > 0.0)
 24 |                 dst.data[i] += cur_grad.data[i];
 25 | }
 26 | 
 27 | template class ReLULayer<CPU, float>;
 28 | template class ReLULayer<CPU, double>;
 29 | 
 30 | // =========================================== sigmoid layer ================================================
 31 | 
 32 | template<typename Dtype>
 33 | void SigmoidLayer<CPU, Dtype>::Act(DenseMat<CPU, Dtype>& prev_out, DenseMat<CPU, Dtype>& cur_out)
 34 | {
 35 |     for (size_t i = 0; i < cur_out.count; ++i)
 36 |         cur_out.data[i] = 1.0 / (1.0 + exp(-prev_out.data[i])); 
 37 | }
 38 | 
 39 | template<typename Dtype>
 40 | void SigmoidLayer<CPU, Dtype>::Derivative(DenseMat<CPU, Dtype>& dst, DenseMat<CPU, Dtype>& prev_output, 
 41 |                             DenseMat<CPU, Dtype>& cur_output, DenseMat<CPU, Dtype>& cur_grad, Dtype beta)
 42 | {
 43 |     dst.Scale(beta);
 44 |     for (size_t i = 0; i < cur_output.count; ++i)
 45 | 		dst.data[i] += cur_grad.data[i] * cur_output.data[i] * (1 - cur_output.data[i]);        	
 46 | }
 47 | 
 48 | template class SigmoidLayer<CPU, float>;
 49 | template class SigmoidLayer<CPU, double>;
 50 | 
 51 | // =========================================== tanh layer ================================================
 52 | 
 53 | template<typename Dtype>
 54 | void TanhLayer<CPU, Dtype>::Act(DenseMat<CPU, Dtype>& prev_out, DenseMat<CPU, Dtype>& cur_out)
 55 | {
 56 |     Dtype x, y;
 57 |     for (size_t i = 0; i < cur_out.count; ++i)
 58 |     {
 59 |         x = exp(prev_out.data[i]); 
 60 |         y = exp(-prev_out.data[i]);
 61 |         cur_out.data[i] = (x - y) / (x + y);
 62 |     }
 63 | }
 64 | 
 65 | template<typename Dtype>
 66 | void TanhLayer<CPU, Dtype>::Derivative(DenseMat<CPU, Dtype>& dst, DenseMat<CPU, Dtype>& prev_output, 
 67 |                             DenseMat<CPU, Dtype>& cur_output, DenseMat<CPU, Dtype>& cur_grad, Dtype beta)
 68 | {
 69 |     dst.Scale(beta);
 70 |     for (size_t i = 0; i < cur_output.count; ++i)
 71 | 		dst.data[i] += cur_grad.data[i] * (1 - cur_output.data[i] * cur_output.data[i]);        	
 72 | }
 73 | 
 74 | template class TanhLayer<CPU, float>;
 75 | template class TanhLayer<CPU, double>;
 76 | 
 77 | // =========================================== softmax layer ================================================
 78 | 
 79 | template<typename Dtype>
 80 | void SoftmaxLayer<CPU, Dtype>::Act(DenseMat<CPU, Dtype>& prev_out, DenseMat<CPU, Dtype>& cur_out)
 81 | {
 82 |         if (&cur_out != &prev_out)
 83 |             cur_out.CopyFrom(prev_out);
 84 |         cur_out.Softmax();
 85 | }
 86 | 
 87 | template<typename Dtype>
 88 | void SoftmaxLayer<CPU, Dtype>::Derivative(DenseMat<CPU, Dtype>& dst, DenseMat<CPU, Dtype>& prev_output, 
 89 |                                DenseMat<CPU, Dtype>& cur_output, DenseMat<CPU, Dtype>& cur_grad, Dtype beta)
 90 | {
 91 |     buf.CopyFrom(cur_grad);
 92 |     
 93 |     Dtype z;    
 94 |     size_t offset = 0;
 95 |     for (size_t i = 0; i < buf.rows; ++i)
 96 |     {
 97 |         z = MKLHelper_Dot(buf.cols, cur_grad.data + offset, cur_output.data + offset);
 98 |         
 99 |         for (size_t j = 0; j < buf.cols; ++j)
100 |             buf.data[offset + j] -= z;
101 |         
102 |         offset += buf.cols; 
103 |     }
104 |     
105 |     buf.EleWiseMul(cur_output);
106 |     
107 |     dst.Axpby(1.0, buf, beta);
108 | }
109 | 
110 | template class SoftmaxLayer<CPU, float>;
111 | template class SoftmaxLayer<CPU, double>;
112 | 
113 | // =========================================== multinomial_sample layer ================================================
114 | 
115 | template<typename Dtype>
116 | void MultinomialSampleLayer<CPU, Dtype>::Act(DenseMat<CPU, Dtype>& prev_out, DenseMat<CPU, Dtype>& cur_out)
117 | {
118 |     auto seed = std::chrono::system_clock::now().time_since_epoch().count();
119 | 	std::default_random_engine generator(seed);
120 |     std::uniform_real_distribution<Dtype> distribution(0, 1);
121 | 
122 |     cur_out.Zeros(prev_out.rows, prev_out.cols);
123 |     Dtype* prob = prev_out.data;        
124 |     for (size_t i = 0; i < prev_out.rows; ++i)
125 |     {
126 |         size_t max_id = 0;
127 |         Dtype cum_sum = 0, threshold = distribution(generator);
128 |         for (size_t j = 0; j < prev_out.cols; ++j)
129 |         {
130 |             if (st == SampleType::MAX && prob[j] > prob[max_id])
131 |                 max_id = j;
132 |             if (st == SampleType::STOCHASTIC)
133 |             {
134 |                 cum_sum += prob[j];
135 |                 if (cum_sum >= threshold)
136 |                 {
137 |                     max_id = j;
138 |                     break;
139 |                 }
140 |             }
141 |         }
142 |         cur_out.data[cur_out.cols * i + max_id] = 1.0;
143 |         prob += prev_out.cols;
144 |     }
145 | }
146 | 
147 | template<typename Dtype>
148 | void MultinomialSampleLayer<CPU, Dtype>::Derivative(DenseMat<CPU, Dtype>& dst, DenseMat<CPU, Dtype>& prev_output, 
149 |                                DenseMat<CPU, Dtype>& cur_output, DenseMat<CPU, Dtype>& cur_grad, Dtype beta)
150 | {
151 |     dst.Axpby(1.0, cur_grad, beta);
152 | }
153 | 
154 | template class MultinomialSampleLayer<CPU, float>;
155 | template class MultinomialSampleLayer<CPU, double>;


--------------------------------------------------------------------------------
/src/net/act_layer.cu:
--------------------------------------------------------------------------------
  1 | #include "relu_layer.h"
  2 | #include "sigmoid_layer.h"
  3 | #include "softmax_layer.h"
  4 | #include "dense_matrix.h"
  5 | #include "cuda_helper.h"
  6 | #include "cuda_unary_kernel.cuh"
  7 | #include "sparse_matrix.h"
  8 | #include <cuda_runtime.h>
  9 | 
 10 | #define min(x, y) (x < y ? x : y)
 11 | 
 12 | // =========================================== relu layer ================================================
 13 | template<typename Dtype>
 14 | void ReLULayer<GPU, Dtype>::Act(DenseMat<GPU, Dtype>& prev_out, DenseMat<GPU, Dtype>& cur_out)
 15 | {
 16 |     UnaryOp(cur_out.data, prev_out.data, prev_out.count, UnaryReLU<Dtype>(), cur_out.streamid);
 17 | }
 18 | 
 19 | template<typename Dtype>
 20 | __global__ void ReLUDerivKernel(Dtype *dst, Dtype *out, Dtype* cur_grad, int numElements)
 21 | {
 22 |     int i = blockDim.x * blockIdx.x + threadIdx.x;
 23 | 
 24 |     if (i < numElements && out[i] > 0)
 25 |     {
 26 |         dst[i] += cur_grad[i];
 27 |     }
 28 | }
 29 | 
 30 | template<typename Dtype>
 31 | void ReLULayer<GPU, Dtype>::Derivative(DenseMat<GPU, Dtype>& dst, DenseMat<GPU, Dtype>& prev_output, 
 32 |                             DenseMat<GPU, Dtype>& cur_output, DenseMat<GPU, Dtype>& cur_grad, Dtype beta)
 33 | {
 34 |     dst.Scale(beta);
 35 |     
 36 |     int thread_num = min(c_uCudaThreadNum, dst.count);    
 37 |     int blocksPerGrid = (dst.count + thread_num - 1) / thread_num;
 38 |     ReLUDerivKernel <<< blocksPerGrid, thread_num, 0, GPUHandle::streams[dst.streamid] >>>(dst.data, cur_output.data, cur_grad.data, dst.count);
 39 | }
 40 | 
 41 | template class ReLULayer<GPU, float>;
 42 | template class ReLULayer<GPU, double>;
 43 | 
 44 | // =========================================== sigmoid layer ================================================
 45 | 
 46 | template<typename Dtype>
 47 | void SigmoidLayer<GPU, Dtype>::Act(DenseMat<GPU, Dtype>& prev_out, DenseMat<GPU, Dtype>& cur_out)
 48 | {
 49 |     UnaryOp(cur_out.data, prev_out.data, prev_out.count, UnarySigmoid<Dtype>(), cur_out.streamid);
 50 | }
 51 | 
 52 | template<typename Dtype>
 53 | __global__ void SigmoidDerivKernel(Dtype *dst, Dtype* cur_grad, Dtype* cur_output, int numElements)
 54 | {
 55 |     int i = blockDim.x * blockIdx.x + threadIdx.x;
 56 | 
 57 |     if (i < numElements)
 58 |     {
 59 |         dst[i] += cur_grad[i] * cur_output[i] * (1 - cur_output[i]);
 60 |     }
 61 | }
 62 | 
 63 | template<typename Dtype>
 64 | void SigmoidLayer<GPU, Dtype>::Derivative(DenseMat<GPU, Dtype>& dst, DenseMat<GPU, Dtype>& prev_output, 
 65 |                             DenseMat<GPU, Dtype>& cur_output, DenseMat<GPU, Dtype>& cur_grad, Dtype beta)
 66 | {
 67 |     dst.Scale(beta);
 68 |     
 69 |     int thread_num = min(c_uCudaThreadNum, dst.count);    
 70 |     int blocksPerGrid = (dst.count + thread_num - 1) / thread_num;
 71 |     SigmoidDerivKernel <<< blocksPerGrid, thread_num, 0, GPUHandle::streams[dst.streamid] >>>(dst.data, cur_grad.data, cur_output.data, dst.count);
 72 | }
 73 | 
 74 | template class SigmoidLayer<GPU, float>;
 75 | template class SigmoidLayer<GPU, double>;
 76 | 
 77 | // =========================================== softmax layer ================================================
 78 | 
 79 | template<typename Dtype>
 80 | void SoftmaxLayer<GPU, Dtype>::Act(DenseMat<GPU, Dtype>& prev_out, DenseMat<GPU, Dtype>& cur_out)
 81 | {
 82 |         if (&cur_out != &prev_out)
 83 |             cur_out.CopyFrom(prev_out);
 84 |         cur_out.Softmax();
 85 | }
 86 | 
 87 | // Copied from https://github.com/torch/cunn/blob/master/SoftMax.cu
 88 | template<typename Dtype>
 89 | __global__ void cunn_SoftMax_updateGradInput_kernel(Dtype *gradInput, Dtype *output, Dtype *gradOutput,
 90 |                                                     int nframe, int dim)
 91 | {
 92 |   __shared__ Dtype buffer[SOFTMAX_THREADS];
 93 |   Dtype *gradInput_k = gradInput + blockIdx.x*dim + blockIdx.y;
 94 |   Dtype *output_k = output + blockIdx.x*dim + blockIdx.y;
 95 |   Dtype *gradOutput_k = gradOutput + blockIdx.x*dim + blockIdx.y;
 96 | 
 97 |   int i_start = threadIdx.x;
 98 |   int i_end = dim;
 99 |   int i_step = blockDim.x;
100 | 
101 |   // sum?
102 |   buffer[threadIdx.x] = 0;
103 |   for (int i=i_start; i<i_end; i+=i_step)
104 |     buffer[threadIdx.x] += gradOutput_k[i] * output_k[i];
105 | 
106 |   __syncthreads();
107 | 
108 |   // reduce
109 |   if (threadIdx.x == 0)
110 |   {
111 |     Dtype sum_k = 0;
112 |     for (int i=0; i<blockDim.x; i++)
113 |       sum_k += buffer[i];
114 |     buffer[0] = sum_k;
115 |   }
116 | 
117 |   __syncthreads();
118 | 
119 |   Dtype sum_k = buffer[0];
120 |   for (int i=i_start; i<i_end; i+=i_step)
121 |     gradInput_k[i] = output_k[i] * (gradOutput_k[i] - sum_k);
122 | }
123 | 
124 | template<typename Dtype>
125 | void SoftmaxLayer<GPU, Dtype>::Derivative(DenseMat<GPU, Dtype>& dst, DenseMat<GPU, Dtype>& prev_output, 
126 |                                DenseMat<GPU, Dtype>& cur_output, DenseMat<GPU, Dtype>& cur_grad, Dtype beta)
127 | {
128 |     buf.Zeros(dst.rows, dst.cols);
129 |     dim3 blocks(cur_output.rows, 1);
130 |     dim3 threads(SOFTMAX_THREADS);
131 |     cunn_SoftMax_updateGradInput_kernel<<<blocks, 
132 |                                           threads, 
133 |                                           0, 
134 |                                           GPUHandle::streams[buf.streamid]>>>
135 |                                        (buf.data, cur_output.data, cur_grad.data, cur_output.rows, cur_output.cols);
136 |     dst.Axpby(1.0, buf, beta);                                            
137 | }
138 | 
139 | template class SoftmaxLayer<GPU, float>;
140 | template class SoftmaxLayer<GPU, double>;


--------------------------------------------------------------------------------
/src/net/concat_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "concat_layer.h"
 2 | #include "dense_matrix.h"
 3 | 
 4 | template<MatMode mode, typename Dtype>
 5 | ConcatLayer<mode, Dtype>::ConcatLayer(std::string _name, PropErr _properr)
 6 |                             : ILayer<mode, Dtype>(_name, _properr)
 7 | {
 8 |     this->state = new DenseMat<mode, Dtype>();
 9 |     this->grad = new DenseMat<mode, Dtype>();
10 | }
11 | 
12 | template<MatMode mode, typename Dtype>
13 | void ConcatLayer<mode, Dtype>::UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase)
14 | {
15 |     std::vector< DenseMat<mode, Dtype>* > prev_states;
16 |     prev_states.resize(operands.size());
17 |     
18 |     for (size_t i = 0; i < operands.size(); ++i)
19 |         prev_states[i] = &(operands[i]->state->DenseDerived());
20 |         
21 |     auto& state = this->state->DenseDerived();
22 |     state.ConcatCols(prev_states);        
23 | }
24 | 
25 | template<MatMode mode, typename Dtype>
26 | void ConcatLayer<mode, Dtype>::BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta)
27 | {
28 |     assert(cur_idx < operands.size());
29 |     
30 |     auto& prev_grad = operands[cur_idx]->grad->DenseDerived();
31 |     auto& grad = this->grad->DenseDerived();
32 |     
33 |     size_t col_start = 0;
34 |     for (size_t i = 0; i < cur_idx; ++i)
35 |         col_start += operands[i]->state->cols;            
36 |         
37 |     buf.GetColsFrom(grad, col_start, operands[cur_idx]->state->cols);
38 |         
39 |     if (beta == 0)
40 |         prev_grad.CopyFrom(buf);
41 |     else
42 |         prev_grad.Axpby(1.0, buf, beta);
43 | }
44 | 
45 | template class ConcatLayer<CPU, double>;
46 | template class ConcatLayer<CPU, float>;
47 | template class ConcatLayer<GPU, double>;
48 | template class ConcatLayer<GPU, float>;
49 | 


--------------------------------------------------------------------------------
/src/net/gaussian_ll_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "gaussian_ll_layer.h"
 2 | #include "dense_matrix.h"
 3 | 
 4 | #define pi 3.14159265358979
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | GaussianLLLayer<mode, Dtype>::GaussianLLLayer(std::string _name, PropErr _properr)
 8 |         : ILayer<mode, Dtype>(_name, _properr) 
 9 | {
10 |     this->grad = new DenseMat<mode, Dtype>();
11 |     this->state = new DenseMat<mode, Dtype>();    
12 | }        
13 |         
14 | template<MatMode mode, typename Dtype>
15 | void GaussianLLLayer<mode, Dtype>::UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase)
16 | {
17 |     assert(operands.size() == 3);
18 |     auto& mu = operands[0]->state->DenseDerived();
19 |     auto& sigma = operands[1]->state->DenseDerived();
20 |     auto& x = operands[2]->state->DenseDerived();
21 |     
22 |     // (x - mu)^2
23 |     auto& state = this->state->DenseDerived(); 
24 |     state.GeaM(1.0, Trans::N, mu, -1.0, Trans::N, x);
25 |     state.Square();
26 |     
27 |     // (x - mu)^2 / 2 / sigma / sigma
28 |     state.Scale(0.5);
29 |     state.EleWiseDiv(sigma);
30 |     state.EleWiseDiv(sigma);
31 |     
32 |     // (x - mu)^2 / 2 / sigma / sigma + 0.5 * log(2 pi)    
33 |     state.Add(0.5 * log(2 * pi));
34 |     
35 |     // log(sigma) + (x - mu)^2 / 2 / sigma / sigma + 0.5 * log(2 pi)
36 |     auto& grad = this->grad->DenseDerived();
37 |     grad.Log(sigma);
38 |     state.Axpy(1.0, grad);
39 |     
40 |     // log likelihood
41 |     state.Scale(-1.0);
42 |     
43 |     if (this->properr == PropErr::T)
44 |     {        
45 |         diff.GeaM(1.0, Trans::N, mu, -1.0, Trans::N, x);
46 |     }        
47 | }
48 | 
49 | template<MatMode mode, typename Dtype>
50 | void GaussianLLLayer<mode, Dtype>::BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta)
51 | {
52 |     assert(operands.size() == 3 && cur_idx <= 1);
53 |         
54 |     auto& cur_grad = this->grad->DenseDerived();                
55 |     auto& prev_grad = operands[cur_idx]->grad->DenseDerived();
56 |     auto& sigma = operands[1]->state->DenseDerived();
57 |     
58 |     if (cur_idx == 0)
59 |     {
60 |         buffer.CopyFrom(diff);
61 |         buffer.EleWiseDiv(sigma);
62 |         buffer.EleWiseDiv(sigma);
63 |         buffer.Scale(-1.0);                                 
64 |     } else 
65 |     {
66 |         buffer.CopyFrom(diff);
67 |         buffer.EleWiseDiv(sigma); 
68 |         buffer.Square();        
69 |         buffer.Add(-1.0);
70 |         
71 |         buffer.EleWiseDiv(sigma);        
72 |     }
73 |     
74 |     buffer.EleWiseMul(cur_grad);  
75 |     if (beta == 0)
76 |             prev_grad.CopyFrom(buffer);
77 |         else
78 |             prev_grad.Axpby(1.0, buffer, beta);
79 | }
80 | 
81 | template class GaussianLLLayer<CPU, float>;
82 | template class GaussianLLLayer<CPU, double>;
83 | template class GaussianLLLayer<GPU, float>;
84 | template class GaussianLLLayer<GPU, double>;
85 | 


--------------------------------------------------------------------------------
/src/net/global_sum_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "global_sum_layer.h"
 2 | #include "dense_matrix.h"
 3 | 
 4 | template<MatMode mode, typename Dtype>
 5 | GlobalSumLayer<mode, Dtype>::GlobalSumLayer(std::string _name, PropErr _properr)
 6 |                 : ILayer<mode, Dtype>(_name, _properr)
 7 | {
 8 |         this->state = new DenseMat<mode, Dtype>();
 9 | 		this->grad = new DenseMat<mode, Dtype>();    
10 | }   
11 | 
12 | template<MatMode mode, typename Dtype>
13 | void GlobalSumLayer<mode, Dtype>::UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase)
14 | {
15 |         assert(operands.size() == 1);
16 |         
17 |         auto& cur_output = this->state->DenseDerived();
18 |         auto& prev_state = operands[0]->state->DenseDerived();
19 |         
20 |         this->buf.Resize(prev_state.cols, 1);
21 |         this->buf.Fill(1.0);
22 |         
23 |         cur_output.GeMM(prev_state, buf, Trans::N, Trans::N, 1.0, 0);
24 | }
25 | 
26 | template<MatMode mode, typename Dtype>
27 | void GlobalSumLayer<mode, Dtype>::BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta)
28 | {
29 |         assert(operands.size() == 1 && cur_idx == 0);
30 |         
31 |         auto& cur_grad = this->grad->DenseDerived();
32 |         auto& prev_grad = operands[0]->grad->DenseDerived();
33 |         
34 |         prev_grad.GeMM(cur_grad, this->buf, Trans::N, Trans::T, 1.0, beta);        
35 | }
36 | 
37 | template class GlobalSumLayer<CPU, double>;
38 | template class GlobalSumLayer<CPU, float>;
39 | template class GlobalSumLayer<GPU, double>;
40 | template class GlobalSumLayer<GPU, float>;
41 | 


--------------------------------------------------------------------------------
/src/net/graph_pool_param.cpp:
--------------------------------------------------------------------------------
 1 | #include "graph_pool_param.h"
 2 | #include "dense_matrix.h"
 3 | #include "sparse_matrix.h"
 4 | 
 5 | // =============================== NodeAvgPoolParam =========================================
 6 | 
 7 | template<MatMode mode, typename Dtype>
 8 | void NodeAvgPoolParam<mode, Dtype>::InitCPUWeight(GraphStruct* graph)
 9 | {
10 |         this->cpu_weight->Resize(graph->num_nodes, graph->num_nodes);
11 | 		this->cpu_weight->ResizeSp(graph->num_edges + graph->num_nodes, graph->num_nodes + 1);
12 | 		
13 | 		int nnz = 0;
14 | 		auto& data = this->cpu_weight->data;
15 | 		for (int i = 0; i < graph->num_nodes; ++i)
16 | 		{
17 | 			data->ptr[i] = nnz;
18 |                         
19 | 			auto& list = graph->in_edges->head[i];
20 |             data->val[nnz] = 1.0 / (list.size() + 1);
21 |             data->col_idx[nnz] = i;
22 |             nnz++;
23 | 			for (size_t j = 0; j < list.size(); ++j)
24 | 			{
25 | 				data->val[nnz] = 1.0 / (list.size() + 1);
26 | 				data->col_idx[nnz] = list[j].second;
27 | 				nnz++;
28 | 			}
29 | 		}
30 | 		assert(nnz == this->cpu_weight->data->nnz);
31 | 		data->ptr[graph->num_nodes] = nnz;
32 | }
33 | 
34 | template class NodeAvgPoolParam<CPU, double>;
35 | template class NodeAvgPoolParam<CPU, float>;
36 | template class NodeAvgPoolParam<GPU, double>;
37 | template class NodeAvgPoolParam<GPU, float>;
38 | 
39 | // =============================== NodeMaxPoolParam =========================================
40 | 
41 | template<typename Dtype>
42 | void NodeMaxPoolParam<CPU, Dtype>::UpdateOutput(IMatrix<CPU, Dtype>* input, DenseMat<CPU, Dtype>* output, Dtype beta, Phase phase)
43 | {
44 |         if (max_index.size() < output->count)
45 |             max_index.resize(output->count);
46 |         Dtype* input_feature = input->DenseDerived().data;
47 |         size_t cur_offset, neighbor_offset;
48 |         for (size_t i = 0; i < graph->num_nodes; ++i)
49 |         {
50 |             auto& list = graph->in_edges->head[i];
51 |             for (size_t j = 0; j < output->cols; ++j)
52 |             {
53 |                 cur_offset = i * input->cols + j;
54 |                 Dtype cur_best = input_feature[cur_offset];
55 |                 max_index[cur_offset] = cur_offset;
56 |                 for (size_t k = 0; k < list.size(); ++k)
57 |                 {
58 |                     neighbor_offset = list[k].second * input->cols + j;
59 |                     if (input_feature[neighbor_offset] > cur_best)
60 |                     {
61 |                         cur_best = input_feature[neighbor_offset];
62 |                         max_index[cur_offset] = neighbor_offset;
63 |                     }                    
64 |                 }
65 |                 output->data[cur_offset] = output->data[cur_offset] * beta + cur_best;
66 |             }            
67 |         }
68 | }
69 | 
70 | template<typename Dtype>
71 | void NodeMaxPoolParam<CPU, Dtype>::UpdateGradInput(DenseMat<CPU, Dtype>* gradInput, DenseMat<CPU, Dtype>* gradOutput, Dtype beta)
72 | {
73 |         auto& prev_grad = gradInput->DenseDerived();
74 |         
75 |         prev_grad.Scale(beta);
76 |         size_t offset;
77 |         for (size_t i = 0; i < prev_grad.rows; ++i)
78 |             for (size_t j = 0; j < prev_grad.cols; ++j)
79 |             {
80 |                 offset = i * prev_grad.cols + j;
81 |                 prev_grad.data[max_index[offset]] += gradOutput->data[offset];
82 |             }
83 | }
84 | 
85 | template class NodeMaxPoolParam<CPU, double>;
86 | template class NodeMaxPoolParam<CPU, float>;


--------------------------------------------------------------------------------
/src/net/graph_pool_param.cu:
--------------------------------------------------------------------------------
1 | #include "graph_pool_param.h"
2 | #include "dense_matrix.h"
3 | #include "sparse_matrix.h"
4 | 
5 | // =============================== NodeAvgPoolParam =========================================
6 | 
7 | 
8 | 
9 | // =============================== NodeMaxPoolParam =========================================


--------------------------------------------------------------------------------
/src/net/learner.cpp:
--------------------------------------------------------------------------------
  1 | #include "learner.h"
  2 | #include <cmath>
  3 | 
  4 | template<MatMode mode, typename Dtype>
  5 | Dtype ILearner<mode, Dtype>::ClipGradients()
  6 | {
  7 |     if (this->clipping_enabled)
  8 |     {
  9 |         auto& param_list = this->model->GetDiffParams();
 10 |                 
 11 |         Dtype norm = 0.0;
 12 |         for (auto& param_pair : param_list)
 13 |         {
 14 |             auto* param = param_pair.second;
 15 |             Dtype norm2 = param->grad.Norm2();
 16 |             norm += norm2 * norm2;
 17 |         }
 18 |         norm = sqrt(norm);
 19 |         if (norm > this->clip_threshold)
 20 |             return this->clip_threshold / norm;
 21 |     }
 22 |     return 1.0;
 23 | }
 24 | 
 25 | template class ILearner<CPU, float>;
 26 | template class ILearner<GPU, float>;
 27 | template class ILearner<CPU, double>;
 28 | template class ILearner<GPU, double>;
 29 | 
 30 | template<MatMode mode, typename Dtype>
 31 | void SGDLearner<mode, Dtype>::Update()
 32 | {
 33 |     auto& param_list = this->model->GetDiffParams();
 34 |             
 35 |     for (auto& param_pair : param_list)
 36 |     {
 37 |         auto* param = param_pair.second;
 38 |         param->value.Axpby(-this->cur_lr, param->grad, 1 - this->cur_lr * this->l2_penalty);
 39 |         param->grad.Zeros();
 40 |     }
 41 | }
 42 | 
 43 | template class SGDLearner<CPU, float>;
 44 | template class SGDLearner<GPU, float>;
 45 | template class SGDLearner<CPU, double>;
 46 | template class SGDLearner<GPU, double>;
 47 | 
 48 | template<MatMode mode, typename Dtype>
 49 | void MomentumSGDLearner<mode, Dtype>::Update()
 50 | {
 51 |     auto& param_list = this->model->GetDiffParams();
 52 |             
 53 |     for (auto& param_pair : param_list)
 54 |     {        
 55 |         auto& name = param_pair.first;
 56 |         auto* param = param_pair.second;        
 57 |         if (momentum > 0)
 58 |         {
 59 |             if (acc_grad_dict.count(name) == 0)
 60 |             {
 61 |                 acc_grad_dict[name] = std::make_shared< DenseMat<mode, Dtype> >(param->grad.rows, param->grad.cols);
 62 |                 acc_grad_dict[name]->Zeros();
 63 |             }
 64 |             param->grad.Axpy(this->l2_penalty, param->value);
 65 |             acc_grad_dict[name]->Axpby(this->cur_lr, param->grad, momentum);
 66 |             param->value.Axpy(-1.0, *acc_grad_dict[name]);
 67 |         } else // do normal sgd
 68 |             param->value.Axpby(-this->cur_lr, param->grad, 1 - this->cur_lr * this->l2_penalty);
 69 |         param->grad.Zeros();
 70 |     }    
 71 | }
 72 | 
 73 | template class MomentumSGDLearner<CPU, float>;
 74 | template class MomentumSGDLearner<GPU, float>;
 75 | template class MomentumSGDLearner<CPU, double>;
 76 | template class MomentumSGDLearner<GPU, double>;
 77 | 
 78 | template<MatMode mode, typename Dtype>
 79 | void ExplicitBatchLearner<mode, Dtype>::Update()
 80 | {
 81 |     auto& param_list = this->model->GetDiffParams();
 82 |             
 83 |     for (auto& param_pair : param_list)
 84 |     {
 85 |         auto name = param_pair.first;
 86 |         auto* param = param_pair.second;
 87 |         
 88 |         assert(acc_grad_dict.count(name));
 89 |         
 90 |         param->value.Axpby(-this->cur_lr, *(acc_grad_dict[name]), 1 - this->cur_lr * this->l2_penalty);
 91 |         param->grad.Zeros();
 92 |         acc_grad_dict[name]->Zeros();
 93 |     }    
 94 | }
 95 | 
 96 | template<MatMode mode, typename Dtype>
 97 | void ExplicitBatchLearner<mode, Dtype>::AccumulateGrad()
 98 | {
 99 |     auto& param_list = this->model->GetDiffParams();
100 |             
101 |     for (auto& param_pair : param_list)
102 |     {
103 |         auto name = param_pair.first;
104 |         auto* param = param_pair.second;
105 |         if (acc_grad_dict.count(name) == 0)
106 |         {
107 |             acc_grad_dict[name] = std::make_shared< DenseMat<mode, Dtype> >(param->grad.rows, param->grad.cols);
108 |             acc_grad_dict[name]->Zeros();
109 |         }
110 |         
111 |         acc_grad_dict[name]->Axpy(1.0, param->grad);              
112 |         param->grad.Zeros();
113 |     }
114 | }
115 | 
116 | template class ExplicitBatchLearner<CPU, float>;
117 | template class ExplicitBatchLearner<GPU, float>;
118 | template class ExplicitBatchLearner<CPU, double>;
119 | template class ExplicitBatchLearner<GPU, double>;
120 | 
121 | template<MatMode mode, typename Dtype>
122 | void AdamLearner<mode, Dtype>::Update()
123 | {
124 |     auto& param_list = this->model->GetDiffParams();
125 |             
126 |     Dtype gscale = this->ClipGradients();
127 |     this->cur_iter++;
128 | 
129 |     for (auto& param_pair : param_list)
130 |     {
131 |         auto name = param_pair.first;
132 |         auto* param = param_pair.second;
133 |         if (first_moments.count(name) == 0 && second_moments.count(name) == 0)
134 |         {
135 |             first_moments[name] = std::make_shared< DenseMat<mode, Dtype> >(param->grad.rows, param->grad.cols);
136 |             first_moments[name]->Zeros();
137 |             second_moments[name] = std::make_shared< DenseMat<mode, Dtype> >(param->grad.rows, param->grad.cols);
138 |             second_moments[name]->Zeros();
139 |         }           
140 |         assert(first_moments.count(name) && second_moments.count(name));
141 |         auto& m_t = *(first_moments[name]); 
142 |         auto& v_t = *(second_moments[name]);
143 |         // clipping and weight decay
144 |         param->grad.Axpby(this->l2_penalty, param->value, gscale);
145 |         // m_t = beta_1 * m_{t-1} + (1 - beta_1) * gt
146 |         m_t.Axpby(1 - beta_1, param->grad, beta_1);
147 |         // v_t = beta_2 * v_{t-1} + (1 - beta_2) * gt^2
148 |         param->grad.Square();
149 |         v_t.Axpby(1 - beta_2, param->grad, beta_2);
150 | 
151 |         // 1 / (1 - beta^t)
152 |         Dtype s1 = 1.0 / (1 - pow(beta_1, this->cur_iter));
153 |         Dtype s2 = 1.0 / (1 - pow(beta_2, this->cur_iter)); 
154 | 
155 |         // v_hat = 1 ./ (sqrt(v_t / (1 - beta_2^t)) + eps)
156 |         v_hat.CopyFrom(v_t);
157 |         v_hat.Scale(s2);
158 |         v_hat.Sqrt();
159 |         v_hat.Add(eps);
160 |         v_hat.Inv();
161 | 
162 |         v_hat.EleWiseMul(m_t);
163 |         param->value.Axpby(-this->cur_lr * s1, v_hat, 1.0);
164 | 
165 |         param->grad.Zeros();
166 |     }
167 | }
168 | 
169 | template class AdamLearner<CPU, float>;
170 | template class AdamLearner<GPU, float>;
171 | template class AdamLearner<CPU, double>;
172 | template class AdamLearner<GPU, double>;
173 | 
174 | 


--------------------------------------------------------------------------------
/src/net/loss_func.cpp:
--------------------------------------------------------------------------------
 1 | #include "loss_func.h"
 2 | #include <cmath>
 3 | 
 4 | template<typename Dtype>
 5 | Dtype LossFunc<CPU, Dtype>::GetLogLoss(DenseMat<CPU, Dtype>& pred, SparseMat<CPU, Dtype>& label)
 6 | {
 7 |         Dtype loss = 0.0;
 8 |         for (size_t i = 0; i < label.rows; ++i)
 9 |         {
10 |             for (int k = label.data->ptr[i]; k < label.data->ptr[i + 1]; ++k)
11 |                 loss -= log(pred.data[label.cols * i + label.data->col_idx[k]]) * label.data->val[k];
12 |         }
13 |         return loss;
14 | } 
15 | 
16 | template<typename Dtype>
17 | Dtype LossFunc<CPU, Dtype>::GetErrCnt(DenseMat<CPU, Dtype>& pred, SparseMat<CPU, Dtype>& label)
18 | {
19 |         Dtype loss = 0.0;
20 |         for (size_t i = 0; i < pred.rows; ++i)
21 |         {
22 |             if (pred.GetRowMaxIdx(i) != (unsigned)label.data->col_idx[i])
23 |                 loss++;
24 |         }
25 |         return loss;
26 | }
27 | 
28 | template<typename Dtype>
29 | Dtype LossFunc<CPU, Dtype>::GetAverageRank(DenseMat<CPU, Dtype>& pred, SparseMat<CPU, Dtype>& label, RankOrder order)
30 | {
31 |         Dtype loss = 0.0;
32 |         size_t offset = 0;
33 |         for (size_t i = 0; i < pred.rows; ++i)
34 |         {           
35 |             unsigned cur_label = label.data->col_idx[i];
36 |             Dtype cur_val = pred.data[offset + cur_label];
37 |             for (size_t j = 0; j < pred.cols; ++j)
38 |                 if (j != cur_label)
39 |                 {
40 |                     if (order == RankOrder::DESC && pred.data[offset + j] > cur_val)
41 |                         loss++;
42 |                     if (order == RankOrder::ASCE && pred.data[offset + j] < cur_val)
43 |                         loss++;
44 |                 }                    
45 |             offset += pred.cols;
46 |         }
47 |         loss += pred.rows;
48 |         return loss;
49 | }
50 | 
51 | template class LossFunc<CPU, float>;
52 | template class LossFunc<CPU, double>;


--------------------------------------------------------------------------------
/src/net/loss_func.cu:
--------------------------------------------------------------------------------
 1 | #include "loss_func.h"
 2 | #include <cmath>
 3 | #include "cuda_helper.h"
 4 | #define min(x, y) (x < y ? x : y)
 5 | #define BLOCK_THREADS 128
 6 | 
 7 | template<typename Dtype>
 8 | __global__ void LogLossKernel(Dtype* dst, Dtype* pred, 
 9 |                               int* row_ptr, int* col_idx, Dtype* val, 
10 |                               int nnz, int n_rows, int n_cols)
11 | {
12 |     int i = blockDim.x * blockIdx.x + threadIdx.x;
13 | 
14 |     if (i < nnz)
15 |     {
16 |         int row = get_sp_row_idx(i, row_ptr, n_rows);
17 |         dst[i] = cuda_log(pred[row * n_cols + col_idx[i]]) * val[i];
18 |     }
19 | }
20 | 
21 | template<typename Dtype>
22 | Dtype LossFunc<GPU, Dtype>::GetLogLoss(DenseMat<GPU, Dtype>& pred, SparseMat<GPU, Dtype>& label)
23 | {
24 |         buf.Resize(label.data->nnz, 1);                 
25 |         int thread_num = min(c_uCudaThreadNum, label.data->nnz);
26 |         int blocksPerGrid = (label.data->nnz + thread_num - 1) / thread_num;
27 |         LogLossKernel <<< blocksPerGrid, thread_num >>> (buf.data, pred.data, 
28 |                                                          label.data->ptr, label.data->col_idx, label.data->val,
29 |                                                          label.data->nnz, pred.rows, pred.cols); 
30 |         Dtype loss = buf.Asum();
31 |         return loss; 
32 | }
33 | 
34 | template<typename Dtype>
35 | __global__ void ErrCntKernel(Dtype *orig_ptr, Dtype* err_cnt, int* truth, int dim)
36 | {
37 |      __shared__ int buffer[BLOCK_THREADS + 1];
38 |     Dtype* dst = orig_ptr + blockIdx.x * dim + blockIdx.y;
39 |     
40 |     int i_start = threadIdx.x;
41 |     int i_end = dim;
42 |     int i_step = blockDim.x;
43 |     Dtype z;
44 |     buffer[threadIdx.x] = i_start;
45 |     Dtype cur_max = dst[i_start];
46 |     for (int i = i_start; i < i_end; i += i_step)
47 |     {
48 |         z = dst[i];
49 |         if (cur_max < z)
50 |         {
51 |             cur_max = z;
52 |             buffer[threadIdx.x] = i;
53 |         }
54 |     }
55 |     
56 |     __syncthreads();
57 | 
58 |     // reduce
59 |     if (threadIdx.x == 0)
60 |     {
61 |         int pred = buffer[0];
62 |         int other;
63 |         cur_max = dst[pred];
64 |         for (int i = 1; i < blockDim.x; i++)
65 |         {
66 |             other = buffer[i];
67 |             if(cur_max < dst[other])
68 |             {
69 |                 cur_max = dst[other];
70 |                 pred = other;
71 |             }
72 |         }
73 |         if (pred != truth[blockIdx.x])
74 |             err_cnt[blockIdx.x] = 1.0;
75 |         else
76 |             err_cnt[blockIdx.x] = 0.0;
77 |     }
78 | }
79 | 
80 | template<typename Dtype>
81 | Dtype LossFunc<GPU, Dtype>::GetErrCnt(DenseMat<GPU, Dtype>& pred, SparseMat<GPU, Dtype>& label)
82 | {
83 |         buf.Resize(label.data->nnz, 1);        
84 |         dim3 blocks(pred.rows, 1);
85 |         dim3 threads(min(BLOCK_THREADS, pred.cols));
86 |         
87 |         ErrCntKernel <<< blocks, threads, 0, GPUHandle::streams[pred.streamid] >>> (pred.data, buf.data, label.data->col_idx, pred.cols);  
88 |         Dtype loss = buf.Asum();
89 |         return loss; 
90 | }
91 | 
92 | template<typename Dtype>
93 | Dtype LossFunc<GPU, Dtype>::GetAverageRank(DenseMat<GPU, Dtype>& pred, SparseMat<GPU, Dtype>& label, RankOrder order)
94 | {
95 |         throw std::runtime_error("not implemented");
96 | }
97 | 
98 | template class LossFunc<GPU, float>;
99 | template class LossFunc<GPU, double>;


--------------------------------------------------------------------------------
/src/net/max_entropy_criterion_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "max_entropy_criterion_layer.h"
 2 | #include "dense_matrix.h"
 3 | 
 4 | template<MatMode mode, typename Dtype>
 5 | MaxEntropyCriterionLayer<mode, Dtype>::MaxEntropyCriterionLayer(std::string _name, 
 6 |                                                                   Dtype _lambda, 
 7 |                                                                   PropErr _properr)
 8 | 				 : ICriterionLayer<mode, Dtype>(_name, _lambda, _properr)
 9 | {
10 |     this->state = new DenseMat<mode, Dtype>();
11 |     this->grad = new DenseMat<mode, Dtype>();
12 | }
13 | 
14 | template<MatMode mode, typename Dtype>
15 | void MaxEntropyCriterionLayer<mode, Dtype>::UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase)
16 | {
17 |     assert(operands.size() == 1);       
18 |     
19 |     auto& p = operands[0]->state->DenseDerived();
20 |     
21 |     auto& grad = this->grad->DenseDerived();
22 |     grad.Log(p);
23 |     
24 |     auto& state = this->state->DenseDerived();
25 |     state.EleWiseMul(p, grad);    
26 |     
27 |     // negative entropy, we assume to maximize the entropy here
28 |     this->loss = state.Sum();                                  
29 | }
30 | 
31 | template<MatMode mode, typename Dtype>
32 | void MaxEntropyCriterionLayer<mode, Dtype>::BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta)
33 | {
34 |     assert(operands.size() == 1 && cur_idx == 0);
35 |     
36 |     auto& grad = this->grad->DenseDerived();
37 |     auto& prev_grad = operands[cur_idx]->grad->DenseDerived();
38 |     
39 |     if (beta == 0)
40 |     {
41 |         prev_grad.CopyFrom(grad);
42 |         prev_grad.Scale(this->lambda / grad.rows);
43 |     }
44 |     else
45 |         prev_grad.Axpby(this->lambda / grad.rows, grad, beta);
46 |         
47 |     prev_grad.Add(this->lambda / grad.rows);                          
48 | }
49 | 
50 | template class MaxEntropyCriterionLayer<CPU, float>;
51 | template class MaxEntropyCriterionLayer<CPU, double>;
52 | template class MaxEntropyCriterionLayer<GPU, float>;
53 | template class MaxEntropyCriterionLayer<GPU, double>;
54 | 


--------------------------------------------------------------------------------
/src/net/mixture_nll_criterion_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "mixture_nll_criterion_layer.h"
 2 | #include "dense_matrix.h"
 3 | 
 4 | template<MatMode mode, typename Dtype>
 5 | MixtureNLLCriterionLayer<mode, Dtype>::MixtureNLLCriterionLayer(std::string _name, Dtype _lambda, PropErr _properr)
 6 |         : ICriterionLayer<mode, Dtype>(_name, _lambda, _properr)
 7 | {    
 8 |     this->state = new DenseMat<mode, Dtype>();    
 9 | }        
10 |         
11 | template<MatMode mode, typename Dtype>
12 | void MixtureNLLCriterionLayer<mode, Dtype>::UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase)
13 | {
14 |     assert(operands.size() == 2);
15 |     
16 |     auto& q_k = operands[0]->state->DenseDerived();
17 |     auto& log_p_xk = operands[1]->state->DenseDerived();
18 |     auto& state = this->state->DenseDerived();
19 |     
20 |     state.EleWiseMul(q_k, log_p_xk);
21 |     
22 |     this->loss = -1.0 * state.Sum();                 
23 | }
24 | 
25 | template<MatMode mode, typename Dtype>
26 | void MixtureNLLCriterionLayer<mode, Dtype>::BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta)
27 | {
28 |     assert(operands.size() == 2);
29 |     assert(cur_idx == 1);
30 |         
31 |     auto& q_k = operands[0]->state->DenseDerived();
32 |     auto& prev_grad = operands[cur_idx]->grad->DenseDerived();
33 |     
34 |     auto& buf = this->state->DenseDerived();    
35 |     buf.CopyFrom(q_k);
36 |         
37 |     buf.Scale(-this->lambda / buf.rows);
38 |     
39 |     if (beta == 0)
40 |         prev_grad.CopyFrom(buf);
41 |     else
42 |         prev_grad.Axpby(1.0, buf, beta);                    
43 | }
44 | 
45 | template class MixtureNLLCriterionLayer<CPU, float>;
46 | template class MixtureNLLCriterionLayer<CPU, double>;
47 | template class MixtureNLLCriterionLayer<GPU, float>;
48 | template class MixtureNLLCriterionLayer<GPU, double>;
49 | 


--------------------------------------------------------------------------------
/src/net/msg_pass_param.cpp:
--------------------------------------------------------------------------------
  1 | #include "msg_pass_param.h"
  2 | #include "sparse_matrix.h"
  3 | #include "dense_matrix.h"
  4 | 
  5 | // =============================== IMessagePassParam =========================================
  6 | 
  7 | template<>
  8 | IMessagePassParam<CPU, float>::IMessagePassParam(std::string _name) : 
  9 | 								  IConstParam<CPU, float>(_name)
 10 | {
 11 |         cpu_weight = &weight;
 12 | }
 13 | 
 14 | template<>
 15 | IMessagePassParam<CPU, double>::IMessagePassParam(std::string _name) :  
 16 | 								  IConstParam<CPU, double>(_name)
 17 | {
 18 |         cpu_weight = &weight;
 19 | }
 20 | 
 21 | template<>
 22 | IMessagePassParam<GPU, float>::IMessagePassParam(std::string _name) : 
 23 | 								  IConstParam<GPU, float>(_name)
 24 | {
 25 |         cpu_weight = new SparseMat<CPU, float>();
 26 | }
 27 | 
 28 | template<>
 29 | IMessagePassParam<GPU, double>::IMessagePassParam(std::string _name) : 
 30 | 								  IConstParam<GPU, double>(_name)
 31 | {
 32 |         cpu_weight = new SparseMat<CPU, double>();
 33 | }
 34 | 
 35 | template class IMessagePassParam<CPU, double>;
 36 | template class IMessagePassParam<CPU, float>;
 37 | template class IMessagePassParam<GPU, double>;
 38 | template class IMessagePassParam<GPU, float>;
 39 | 
 40 | // =============================== Node2NodeMsgParam =========================================
 41 | 
 42 | template<MatMode mode, typename Dtype>
 43 | void Node2NodeMsgParam<mode, Dtype>::InitCPUWeight(GraphStruct* graph)
 44 | {
 45 |         this->cpu_weight->Resize(graph->num_nodes, graph->num_nodes);		
 46 | 		this->cpu_weight->ResizeSp(graph->num_edges, graph->num_nodes + 1);
 47 | 		
 48 | 		int nnz = 0;
 49 | 		auto& data = this->cpu_weight->data;
 50 | 		for (int i = 0; i < graph->num_nodes; ++i)
 51 | 		{
 52 | 			data->ptr[i] = nnz;
 53 | 			auto& list = graph->in_edges->head[i];
 54 | 			for (size_t j = 0; j < list.size(); ++j)
 55 | 			{
 56 | 				data->val[nnz] = 1.0;
 57 | 				data->col_idx[nnz] = list[j].second;
 58 | 				nnz++;
 59 | 			}
 60 | 		}
 61 | 		assert(nnz == graph->num_edges);
 62 | 		data->ptr[graph->num_nodes] = nnz;
 63 | }
 64 | 
 65 | template class Node2NodeMsgParam<CPU, double>;
 66 | template class Node2NodeMsgParam<CPU, float>;
 67 | template class Node2NodeMsgParam<GPU, double>;
 68 | template class Node2NodeMsgParam<GPU, float>;
 69 | 
 70 | // =============================== Edge2NodeMsgParam =========================================
 71 | 
 72 | template<MatMode mode, typename Dtype>
 73 | void Edge2NodeMsgParam<mode, Dtype>::InitCPUWeight(GraphStruct* graph)
 74 | {
 75 | 		this->cpu_weight->Resize(graph->num_nodes, graph->num_edges);
 76 | 		this->cpu_weight->ResizeSp(graph->num_edges, graph->num_nodes + 1);
 77 | 		
 78 | 		int nnz = 0;
 79 | 		auto& data = this->cpu_weight->data;		
 80 | 		for (int i = 0; i < graph->num_nodes; ++i)
 81 | 		{
 82 | 			data->ptr[i] = nnz;
 83 | 			auto& list = graph->in_edges->head[i];
 84 | 			for (size_t j = 0; j < list.size(); ++j)
 85 | 			{
 86 | 				data->val[nnz] = 1.0;
 87 | 				data->col_idx[nnz] = list[j].first;
 88 | 				nnz++;
 89 | 			}
 90 | 		}
 91 | 		assert(nnz == graph->num_edges);
 92 | 		data->ptr[graph->num_nodes] = nnz;       
 93 | }
 94 | 
 95 | template class Edge2NodeMsgParam<CPU, double>;
 96 | template class Edge2NodeMsgParam<CPU, float>;
 97 | template class Edge2NodeMsgParam<GPU, double>;
 98 | template class Edge2NodeMsgParam<GPU, float>;
 99 | 
100 | // =============================== Node2EdgeMsgParam =========================================
101 | 
102 | template<MatMode mode, typename Dtype>
103 | void Node2EdgeMsgParam<mode, Dtype>::InitCPUWeight(GraphStruct* graph)
104 | {
105 |         int nnz = 0;
106 |         this->cpu_weight->Resize(graph->num_edges, graph->num_nodes);
107 |         this->cpu_weight->ResizeSp(graph->num_edges, graph->num_edges + 1);
108 |             
109 | 		auto& data = this->cpu_weight->data;
110 |         for (int i = 0; i < graph->num_edges; ++i)
111 |         {
112 |             data->ptr[i] = nnz;
113 |             data->val[nnz] = 1.0;
114 |             data->col_idx[nnz] = graph->edge_list[i].first;
115 |             nnz++;
116 |         }
117 |         data->ptr[graph->num_edges] = nnz;
118 |         assert(nnz == data->nnz);             
119 | }
120 | 
121 | template class Node2EdgeMsgParam<CPU, double>;
122 | template class Node2EdgeMsgParam<CPU, float>;
123 | template class Node2EdgeMsgParam<GPU, double>;
124 | template class Node2EdgeMsgParam<GPU, float>;
125 | 
126 | // =============================== Edge2EdgeMsgParam =========================================
127 | 
128 | template<MatMode mode, typename Dtype>
129 | void Edge2EdgeMsgParam<mode, Dtype>::InitCPUWeight(GraphStruct* graph)
130 | {
131 |         int nnz = 0;
132 |         this->cpu_weight->Resize(graph->num_edges, graph->num_edges);
133 |         size_t cnt = 0;
134 |         for (int i = 0; i < graph->num_nodes; ++i)
135 |         {
136 |             auto in_cnt = graph->in_edges->head[i].size();
137 |             cnt += in_cnt * (in_cnt - 1); 
138 |         }
139 |         this->cpu_weight->ResizeSp(cnt, graph->num_edges + 1);            
140 |             
141 |         auto& data = this->cpu_weight->data;
142 |         for (int i = 0; i < graph->num_edges; ++i)
143 |         {
144 |             data->ptr[i] = nnz;
145 |             int node_from = graph->edge_list[i].first, node_to = graph->edge_list[i].second; 
146 |             auto& list = graph->in_edges->head[node_from]; 
147 |             for (size_t j = 0; j < list.size(); ++j)
148 |             {
149 |                 if (list[j].second == node_to)
150 |                     continue; // the same edge in another direction
151 |                 data->val[nnz] = 1.0;
152 |                 data->col_idx[nnz] = list[j].first; // the edge index
153 |                 nnz++;
154 |             }
155 |         }
156 |         data->ptr[graph->num_edges] = nnz;
157 |         assert(nnz == data->nnz);
158 |         assert(data->nnz == cnt);
159 | }
160 | 
161 | template class Edge2EdgeMsgParam<CPU, double>;
162 | template class Edge2EdgeMsgParam<CPU, float>;
163 | template class Edge2EdgeMsgParam<GPU, double>;
164 | template class Edge2EdgeMsgParam<GPU, float>;
165 | // =============================== SubgraphMsgParam =========================================
166 | 
167 | template<MatMode mode, typename Dtype>
168 | void SubgraphMsgParam<mode, Dtype>::InitCPUWeight(GraphStruct* graph)
169 | {		
170 | 		this->cpu_weight->Resize(graph->num_subgraph, graph->num_nodes);		
171 | 		this->cpu_weight->ResizeSp(graph->num_nodes, graph->num_subgraph + 1);
172 | 		
173 | 		int nnz = 0;
174 | 		auto& data = this->cpu_weight->data;
175 | 		for (int i = 0; i < graph->num_subgraph; ++i)
176 | 		{
177 | 			data->ptr[i] = nnz;
178 | 			auto& list = graph->subgraph->head[i];
179 | 			for (size_t j = 0; j < list.size(); ++j)
180 | 			{
181 | 				data->val[nnz] = 1.0;
182 | 				data->col_idx[nnz] = list[j];
183 | 				nnz++;
184 | 			}
185 | 		}
186 | 		assert(nnz == graph->num_nodes);
187 | 		data->ptr[graph->num_subgraph] = nnz;
188 | }
189 | 
190 | 
191 | template class SubgraphMsgParam<CPU, double>;
192 | template class SubgraphMsgParam<CPU, float>;
193 | template class SubgraphMsgParam<GPU, double>;
194 | template class SubgraphMsgParam<GPU, float>;
195 | 


--------------------------------------------------------------------------------
/src/net/mvn_diag_nll_criterion_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "mvn_diag_nll_criterion_layer.h"
 2 | #include "dense_matrix.h"
 3 | 
 4 | #define pi 3.14159265358979
 5 | 
 6 | template<MatMode mode, typename Dtype>
 7 | MVNDianNLLCriterionLayer<mode, Dtype>::MVNDianNLLCriterionLayer(std::string _name, Dtype _lambda, PropErr _properr)
 8 |         : ICriterionLayer<mode, Dtype>(_name, _lambda, _properr)
 9 | {
10 |     this->grad = new DenseMat<mode, Dtype>();
11 |     this->state = new DenseMat<mode, Dtype>();    
12 | }        
13 |         
14 | template<MatMode mode, typename Dtype>
15 | void MVNDianNLLCriterionLayer<mode, Dtype>::UpdateOutput(std::vector< ILayer<mode, Dtype>* >& operands, Phase phase)
16 | {
17 |     assert(operands.size() == 3);
18 |     auto& mu = operands[0]->state->DenseDerived();
19 |     auto& sigma = operands[1]->state->DenseDerived();
20 |     auto& x = operands[2]->state->DenseDerived();
21 |     
22 |     // (x - mu)^2
23 |     auto& buf = this->state->DenseDerived(); 
24 |     buf.GeaM(1.0, Trans::N, mu, -1.0, Trans::N, x);
25 |     buf.Square();
26 |     
27 |     // (x - mu)^2 / 2 / sigma / sigma
28 |     buf.Scale(0.5);
29 |     buf.EleWiseDiv(sigma);
30 |     buf.EleWiseDiv(sigma);
31 |     
32 |     // (x - mu)^2 / 2 / sigma / sigma + 0.5 * log(2 pi)    
33 |     buf.Add(0.5 * log(2 * pi));
34 |     
35 |     // log(sigma) + (x - mu)^2 / 2 / sigma / sigma + 0.5 * log(2 pi)
36 |     auto& grad = this->grad->DenseDerived();
37 |     grad.Log(sigma);
38 |     buf.Axpy(1.0, grad);
39 |         
40 |     this->loss = buf.Sum();
41 |     
42 |     if (this->properr == PropErr::T)
43 |         grad.GeaM(1.0, Trans::N, mu, -1.0, Trans::N, x);
44 | }
45 | 
46 | template<MatMode mode, typename Dtype>
47 | void MVNDianNLLCriterionLayer<mode, Dtype>::BackPropErr(std::vector< ILayer<mode, Dtype>* >& operands, unsigned cur_idx, Dtype beta)
48 | {
49 |     assert(operands.size() == 3 && cur_idx <= 1);
50 |             
51 |     auto& diff = this->grad->DenseDerived();
52 |     auto& buf = this->state->DenseDerived();
53 |     auto& prev_grad = operands[cur_idx]->grad->DenseDerived();
54 |     auto& sigma = operands[1]->state->DenseDerived();    
55 |     
56 |     // d/d_mu
57 |     if (cur_idx == 0)
58 |     {
59 |         buf.CopyFrom(diff);
60 |         buf.EleWiseDiv(sigma);
61 |         buf.EleWiseDiv(sigma);
62 |         buf.Scale(this->lambda / buf.rows);
63 |         
64 |         if (beta == 0)
65 |             prev_grad.CopyFrom(buf);
66 |         else
67 |             prev_grad.Axpby(1.0, buf, beta);       
68 |     } else // d/d_sigma
69 |     {
70 |         // 1/sigma
71 |         buf.CopyFrom(sigma);
72 |         buf.Inv();
73 |         
74 |         if (beta == 0)
75 |         {
76 |             prev_grad.CopyFrom(buf);
77 |             prev_grad.Scale(this->lambda / buf.rows);
78 |         }
79 |         else
80 |             prev_grad.Axpby(1.0, buf, beta);
81 |         
82 |         // (x - mu) ^2 / sigma^3    
83 |         buf.Power(3);
84 |         buf.EleWiseMul(diff);
85 |         buf.EleWiseMul(diff);
86 |         
87 |         prev_grad.Axpy(-this->lambda / buf.rows, buf);
88 |     }
89 | }
90 | 
91 | template class MVNDianNLLCriterionLayer<CPU, float>;
92 | template class MVNDianNLLCriterionLayer<CPU, double>;
93 | template class MVNDianNLLCriterionLayer<GPU, float>;
94 | template class MVNDianNLLCriterionLayer<GPU, double>;
95 | 


--------------------------------------------------------------------------------
/src/net/nngraph.cpp:
--------------------------------------------------------------------------------
  1 | #include "nngraph.h"
  2 | #include <cstring>
  3 | #include "param_layer.h"
  4 | #include "i_criterion_layer.h"
  5 | 
  6 | template<MatMode mode, typename Dtype>
  7 | void NNGraph<mode, Dtype>::FeedForward(std::map<std::string, IMatrix<mode, Dtype>* > input, Phase phase)
  8 | {
  9 |     hash.resize(ordered_layers.size());
 10 |     for (size_t i = 0; i < hash.size(); ++i)
 11 |         hash[i] = false;
 12 |         
 13 |     // feed data
 14 |     for (auto it = input.begin(); it != input.end(); ++it)
 15 |     {
 16 |         layer_dict[it->first]->state = it->second;
 17 |         assert(name_idx_map.count(it->first));
 18 |         hash[name_idx_map[it->first]] = true;
 19 |     }
 20 |     
 21 |     // feed-forward
 22 |     for (size_t i = 0; i < ordered_layers.size(); ++i)
 23 |     {     
 24 |         assert(layer_dict.count(ordered_layers[i].first));
 25 |         auto* cur_layer = layer_dict[ordered_layers[i].first];
 26 |         auto& operands = ordered_layers[i].second;
 27 |         assert(name_idx_map.count(cur_layer->name));
 28 |         if (operands.size() == 0 && ! hash[name_idx_map[cur_layer->name]])
 29 |             continue;
 30 |         bool ready = true;
 31 |         for (auto* layer : operands)
 32 |         {
 33 |             if (static_layer_dict.count(layer->name))
 34 |                 continue;
 35 |             assert(name_idx_map.count(layer->name));
 36 |             auto idx = name_idx_map[layer->name];
 37 |             ready &= hash[idx];
 38 |         }
 39 |         hash[name_idx_map[cur_layer->name]] = ready;
 40 |         if (ready)            
 41 |             cur_layer->UpdateOutput(operands, phase);
 42 |         else if (phase != TEST)
 43 |             throw std::runtime_error("wrong computation flow");         
 44 |     }    
 45 | }
 46 | 
 47 | template<MatMode mode, typename Dtype>
 48 | std::map<std::string, Dtype> NNGraph<mode, Dtype>::GetLoss()
 49 | {
 50 |     std::map<std::string, Dtype> loss;
 51 |             
 52 |     for (auto it = ordered_layers.rbegin(); it != ordered_layers.rend(); ++it)
 53 |     {
 54 |         auto* cur_layer = layer_dict[it->first];
 55 |         
 56 |         if (!cur_layer->IsSupervised())
 57 |             continue;
 58 |         
 59 |         auto* criterion_layer = dynamic_cast<ICriterionLayer<mode, Dtype>*>(cur_layer);
 60 |         assert(criterion_layer);
 61 |         loss[it->first] = criterion_layer->GetLoss(); 
 62 |     }
 63 |     return loss;
 64 | }
 65 | 
 66 | template<MatMode mode, typename Dtype>
 67 | void NNGraph<mode, Dtype>::BackPropagation()
 68 | {    	
 69 |     hash.resize(ordered_layers.size());
 70 |     for (size_t i = 0; i < hash.size(); ++i)
 71 |         hash[i] = false;
 72 |                         
 73 |     for (auto it = ordered_layers.rbegin(); it != ordered_layers.rend(); ++it)
 74 |     {
 75 |         auto* cur_layer = layer_dict[it->first];
 76 |         auto& operands = it->second;
 77 |         
 78 |         if (cur_layer->properr != PropErr::T)
 79 | 			continue;
 80 |         if (!hash[ name_idx_map[cur_layer->name] ])
 81 |         {
 82 |             if (cur_layer->IsSupervised())
 83 |                 hash[ name_idx_map[cur_layer->name] ] = true;
 84 |             else continue;
 85 |         }
 86 |         
 87 |         for (size_t i = 0; i < operands.size(); ++i)
 88 |         {            
 89 |             auto* prev_layer = operands[i];
 90 |             assert(name_idx_map.count(prev_layer->name));
 91 |             auto prev_id = name_idx_map[prev_layer->name];
 92 |             if (prev_layer->properr == PropErr::T)
 93 |             {
 94 |                 Dtype beta = 1.0;
 95 |                 // if we haven't backprop the error to this layer
 96 |                 if (! hash[ prev_id ])
 97 |                 {
 98 |                     beta = 0.0;
 99 |                     hash[prev_id] = true;
100 |                     prev_layer->grad->DenseDerived().Zeros(prev_layer->state->rows, prev_layer->state->cols);
101 |                 }
102 |                 cur_layer->BackPropErr(operands, i, beta);
103 |             }
104 |             if (cur_layer->HasParam())
105 |                 dynamic_cast<IParametric<mode, Dtype>*>(cur_layer)->AccDeriv(operands, i);
106 |         }                          
107 |     }        
108 | }
109 | 
110 | template class NNGraph<CPU, float>;
111 | template class NNGraph<CPU, double>;
112 | template class NNGraph<GPU, float>;
113 | template class NNGraph<GPU, double>;


--------------------------------------------------------------------------------