├── .gitignore ├── .idea └── .gitignore ├── CMakeLists.txt ├── Makefile ├── README.md ├── example.cpp ├── include ├── Conv.h ├── MatAutoDiff.h ├── MatOp.h └── Model.h ├── main.cpp ├── src ├── Conv.cpp ├── MatAutoDiff.cpp └── Model.cpp └── train.txt /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .DS_Store 3 | .vscode/configurationCache.log 4 | .vscode/dryrun.log 5 | .vscode/settings.json 6 | .vscode/targets.log 7 | .vscode/c_cpp_properties.json 8 | .vscode/launch.json 9 | .vscode/tasks.json 10 | .vscode/c_cpp_properties.json 11 | .vscode/c_cpp_properties.json 12 | model.dSYM/Contents/Resources/DWARF/model 13 | model.dSYM/Contents/Info.plist 14 | model 15 | .vscode/c_cpp_properties.json 16 | .idea/vcs.xml 17 | .idea/misc.xml 18 | .idea/modules.xml 19 | .idea/Toy_ML_Framework.iml 20 | cmake-build-debug/.cmake/api/v1/query/cache-v2 21 | cmake-build-debug/.cmake/api/v1/query/cmakeFiles-v1 22 | cmake-build-debug/.cmake/api/v1/query/codemodel-v2 23 | cmake-build-debug/.cmake/api/v1/query/toolchains-v1 24 | cmake-build-debug/.cmake/api/v1/reply/cache-v2-9602ad7e8f29f37f6c1d.json 25 | cmake-build-debug/.cmake/api/v1/reply/cmakeFiles-v1-2f8fb98736035f5c97d5.json 26 | cmake-build-debug/.cmake/api/v1/reply/codemodel-v2-a474f0d6d98ffe951faf.json 27 | cmake-build-debug/.cmake/api/v1/reply/directory-.-Debug-f5ebdc15457944623624.json 28 | cmake-build-debug/.cmake/api/v1/reply/index-2022-06-18T08-46-35-0049.json 29 | cmake-build-debug/.cmake/api/v1/reply/target-Toy_ML_Framework-Debug-f7b458d6c604adf29dcd.json 30 | cmake-build-debug/.cmake/api/v1/reply/toolchains-v1-b5fd9a4d818f14e48453.json 31 | cmake-build-debug/.ninja_deps 32 | cmake-build-debug/.ninja_log 33 | cmake-build-debug/build.ninja 34 | cmake-build-debug/cmake_install.cmake 35 | cmake-build-debug/CMakeCache.txt 36 | cmake-build-debug/CMakeFiles/3.22.3/CMakeCCompiler.cmake 37 | cmake-build-debug/CMakeFiles/3.22.3/CMakeCXXCompiler.cmake 38 | cmake-build-debug/CMakeFiles/3.22.3/CMakeDetermineCompilerABI_C.bin 39 | cmake-build-debug/CMakeFiles/3.22.3/CMakeDetermineCompilerABI_CXX.bin 40 | cmake-build-debug/CMakeFiles/3.22.3/CMakeSystem.cmake 41 | cmake-build-debug/CMakeFiles/3.22.3/CompilerIdC/CMakeCCompilerId.c 42 | cmake-build-debug/CMakeFiles/3.22.3/CompilerIdC/CMakeCCompilerId.o 43 | cmake-build-debug/CMakeFiles/3.22.3/CompilerIdCXX/CMakeCXXCompilerId.cpp 44 | cmake-build-debug/CMakeFiles/3.22.3/CompilerIdCXX/CMakeCXXCompilerId.o 45 | cmake-build-debug/CMakeFiles/clion-environment.txt 46 | cmake-build-debug/CMakeFiles/clion-log.txt 47 | cmake-build-debug/CMakeFiles/cmake.check_cache 48 | cmake-build-debug/CMakeFiles/CMakeError.log 49 | cmake-build-debug/CMakeFiles/CMakeOutput.log 50 | cmake-build-debug/CMakeFiles/rules.ninja 51 | cmake-build-debug/CMakeFiles/TargetDirectories.txt 52 | cmake-build-debug/CMakeFiles/Toy_ML_Framework.dir/main.cpp.o 53 | cmake-build-debug/Testing/Temporary/LastTest.log 54 | cmake-build-debug/Toy_ML_Framework 55 | *.json 56 | *.o 57 | cmake-build-debug/CMakeFiles/FindOpenMP/OpenMPTryFlag.cpp 58 | cmake-build-debug/CMakeFiles/FindOpenMP/OpenMPTryFlag.c 59 | cmake-build-debug/CMakeFiles/FindOpenMP/OpenMPCheckVersion.cpp 60 | cmake-build-debug/CMakeFiles/FindOpenMP/OpenMPCheckVersion.c 61 | cmake-build-debug/CMakeFiles/FindOpenMP/ompver_CXX.bin 62 | cmake-build-debug/CMakeFiles/FindOpenMP/ompver_C.bin 63 | .idea/inspectionProfiles/Project_Default.xml 64 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.22) 2 | project(Toy_ML_Framework) 3 | 4 | set(BREW_DIR /opt/homebrew/Cellar) 5 | 6 | #set(BLAS_INC_DIR ${BREW_DIR}/openblas/0.3.17/include) 7 | #set(BLAS_LINK_DIR ${BREW_DIR}/openblas/0.3.17/lib) 8 | set(EIGEN_INC_DIR ${BREW_DIR}/eigen/3.3.9/include/eigen3) 9 | set(IDIR include) 10 | 11 | set(CMAKE_CXX_STANDARD 11) 12 | 13 | 14 | 15 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -g -fopenmp") 16 | 17 | # 执行main.cpp 18 | add_executable(Toy_ML_Framework main.cpp include/MatOp.h src/MatAutoDiff.cpp include/MatAutoDiff.h include/MatAutoDiff.h src/Model.cpp include/Model.h src/Conv.cpp include/Conv.h) 19 | 20 | # 执行img2col 21 | #add_executable(Toy_ML_Framework include/MatOp.h src/MatAutoDiff.cpp include/MatAutoDiff.h include/MatAutoDiff.h src/Model.cpp include/Model.h src/Conv.cpp include/Conv.h) 22 | 23 | # 添加cblas 24 | #include_directories (${BLAS_INC_DIR}) 25 | #link_directories(${BLAS_LINK_DIR}) 26 | #target_link_libraries(Toy_ML_Framework -lblas) 27 | 28 | include_directories (${EIGEN_INC_DIR}) 29 | 30 | find_package(OpenMP) 31 | if(OpenMP_CXX_FOUND) 32 | target_link_libraries(Toy_ML_Framework PUBLIC OpenMP::OpenMP_CXX) 33 | endif() 34 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | IDIR=include 2 | CBLASDIR=/opt/homebrew/Cellar/openblas/0.3.17/include 3 | CBLASLIB=/opt/homebrew/Cellar/openblas/0.3.17/lib 4 | CXX=g++-11 5 | CXXFLAGS=-I$(IDIR) -I$(CBLASDIR) -L$(CBLASLIB) -std=c++11 -fopenmp -O3 6 | 7 | ODIR=src 8 | LDIR =../lib 9 | 10 | LIBS=-lm -lblas 11 | 12 | _DEPS = deep_core.h vector_ops.h 13 | DEPS = $(patsubst %,$(IDIR)/%,$(_DEPS)) 14 | 15 | _OBJ = deep_core.o vector_ops.o 16 | OBJ = $(patsubst %,$(ODIR)/%,$(_OBJ)) 17 | 18 | %.o: %.cpp $(DEPS) 19 | $(CXX) -c -o $@ $< ${CXXFLAGS} 20 | 21 | %.o: %.cxx $(DEPS) 22 | $(CXX) -c -o $@ $< ${CXXFLAGS} 23 | 24 | nnetwork_mpi: $(OBJ) nnetwork_mpi.o 25 | $(CXX) -o $@ $^ $(LIBS) 26 | 27 | nnetwork: $(OBJ) nnetwork.o 28 | $(CXX) -o $@ $^ $(LIBS) 29 | 30 | model: $(OBJ) model.o 31 | $(CXX) -o $@ $^ $(LIBS) 32 | 33 | run_pthreads: 34 | ./nnetwork 35 | 36 | run_parallel: 37 | mpirun -np 4 ./nnetwork_mpi 38 | 39 | all: clean nnetwork_mpi nnetwork 40 | .PHONY: clean 41 | 42 | default: clean nnetwork 43 | 44 | .DEFAULT_GOAL := default 45 | 46 | clean: 47 | rm -f $(ODIR)/*.o *.o nnetwork_mpi nnetwork model 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Toy ML Framework 2 | 3 | This Framework comes from CSE599W Assignment1, which I'm aiming to build a Machine Learning System with Automatic differentiation. This framework will contain following modules: Automatic differentiation, User interface, CUDA Ops and etc. 4 | 5 | ### Automatic Differentiation 6 | 7 | The AD (Automatic differentiation) is the core module of the whole system. With AD there is no need to take care of the differentiation manually, which will be done by the system. All we need to do is impliment the Ops' compute and gradient function. 8 | 9 | *Computation graph* 10 | 11 | The basis of automatic differentiation is the computation graph, which can be divided into Forward Mode and Reverse Mode according to the different graph traversal methods. 12 | 13 | Forward Mode is to traverse the entire computation graph whenever we have an independent variable input in order to compute its differentiation, which generates a lot of unnecessary node visits. 14 | 15 | Reverse Mode is to traverse in reverse topological order from the output node, so that through a reverse traversal, we record the gradient of the precursor node of each node, and sum up all the gradients coming from the back after reaching each node to get the total gradient size, and then we continue to calculate the gradient components of the precursor node according to this gradient, and finally get the gradient of the output y with respect to each input variable. 16 | 17 | This framework uses Reverse Mode and generates a static computational graph. 18 | 19 | *Graph Node* 20 | 21 | Graph node is defined as follow. 22 | 23 | ```C++ 24 | class Node { 25 | public: 26 | Node(){ 27 | this->const_attr = 0.0; 28 | } 29 | vector input; 30 | Op *op; 31 | float const_attr; 32 | string name; 33 | int hash_code; 34 | bool isPlaceHolder; 35 | 36 | virtual Node operator+(Node &nodeB); 37 | 38 | virtual Node operator*(Node &nodeB); 39 | 40 | ~Node() = default; 41 | }; 42 | ``` 43 | 44 | All nodes have its own operator, which indicates that what type of computation will be done in current node. The Operator is defined as follow. 45 | 46 | ```C++ 47 | class Op { 48 | public: 49 | Op() = default; 50 | 51 | virtual Node getNewNode(); 52 | // T could be vector or matrix 53 | virtual vector compute(Node &node, vector &input_vals); 54 | 55 | virtual vector gradient(Node &node, Node &output_gradient); 56 | 57 | ~Op() = default; 58 | }; 59 | ``` 60 | 61 | Each operator needs to impliment three function. We need getNewNode to generate a node which corresponds current operator, and use compute and gradient to represent the math computation. 62 | 63 | *Constructing back-propagation graph of gradients* 64 | 65 | The example is y = x1 * x2 + x3. 66 | 67 | The construction of the gradient backpropagation graph is done through the gradients function, the implementation of which can be found in autodiff.cpp. The gradients function has two inputs: one is the output node, which is our y, and the other is the list of input nodes, which is our [x1, x2, x3]. Since we need to back-propagate, we traverse the nodes using a reverse topological sort, ensuring that when we visit a node, we have visited all the nodes after it (i.e. we have gotten all its gradient components). 68 | 69 | The first thing we can know is that the derivative of the output node y with respect to the expression x1*x2+x3 is 1. We store this value in the grad_map corresponding to y. 70 | 71 | Then we topologically sort the computational map and reverse to get the node access order and then we can construct the back propagation map. 72 | 73 | For each node we follow the following process. 74 | 75 | 1. look up the table, get the gradient passed by all output nodes of the current node and sum up (each gradient component is a Node node, as mentioned above, the gradient specific value calculation still needs to rely on the compute function to complete, so the sum up here is to build the expression node of the gradient sum). 76 | 77 | 2. save the gradient sum of the current node. 78 | 79 | 3. Use the gradient function of the current node operator to build the gradient component expression nodes of all input Nodes. 80 | 81 | 4. Save the gradient component nodes of each input Node to node_to_output_grads_list and wait for the summation. 82 | 83 | After getting the gradient expression nodes for each node, we can pass the gradient expression nodes back in a certain order and wait to put them into the executor for execution. 84 | 85 | *The Executor* 86 | 87 | The constructor input to Executor is a list [y, x1_grad, x2_grad, x3_grad], where y contains the expressions we need to evaluate, followed by the gradient value of each variable with respect to y. 88 | 89 | Executor has a run function to execute the entire computational graph as well as the gradient back-propagation graph. 90 | 91 | We already have all the nodes when we construct the executor instance, we just need to sort them topologically to get the node traversal list. Since we have already constructed the gradient backpropagation graph, we are actually combining the two graphs into one for topological sorting. 92 | 93 | In the forward propagation, we calculate the value of each node, and then we enter the backpropagation graph and still use the compute function to calculate the value of the current node according to the corresponding rules, at this time we calculate the gradient value of the corresponding gradient expression node. Finally, we return the values of the nodes [y, x1_grad, x2_grad, x3_grad] that we need to the main function. 94 | 95 | ### Todo List 96 | 97 | * Tensorflow/Keras like User Interface 98 | * CUDA Operators 99 | * Convolution Operators 100 | * Computation graph serialization and deserialization 101 | -------------------------------------------------------------------------------- /example.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "include/Model.h" 4 | #include 5 | #include 6 | 7 | #define BATCH_SIZE 10 8 | #define ITER 100 9 | 10 | using namespace Eigen; 11 | using namespace std; 12 | 13 | extern ReluOp relu_op; 14 | extern SoftmaxOp softmax_op; 15 | 16 | string training_data = "train.txt"; 17 | 18 | void Interface_example(){ 19 | Model model = Model(32, 100); 20 | 21 | model.Input(784, "input layer"); 22 | model.Dense(128, "Hidden layer 1", "relu"); 23 | model.Dense(64, "Hidden layer 2", "relu"); 24 | model.Output(10, "Output layer"); 25 | 26 | model.compile(); 27 | model.loadData(training_data); 28 | model.run(); 29 | } 30 | 31 | void NN_test(){ 32 | map node_to_gradient; 33 | cout << "3 layers NN test:" << endl; 34 | 35 | Node input = Variable("input"); 36 | Node W1 = Variable("W1"); 37 | Node W2 = Variable("W2"); 38 | Node W3 = Variable("W3"); 39 | Node y_true = Variable("y_true"); 40 | Node input_dot_W1 = input * W1; 41 | Node W1_relu = relu_op.getNewNode(input_dot_W1); 42 | Node W1_dot_W2 = W1_relu * W2; 43 | Node W2_relu = relu_op.getNewNode(W1_dot_W2); 44 | Node W2_dot_W3 = W2_relu * W3; 45 | Node y_predict = softmax_op.getNewNode(W2_dot_W3, y_true); 46 | 47 | vector input_nodes; 48 | input_nodes.push_back(input); 49 | input_nodes.push_back(W1); 50 | input_nodes.push_back(W2); 51 | input_nodes.push_back(W3); 52 | input_nodes.push_back(y_true); 53 | cout << "------------------------------------" << endl; 54 | cout << "Building graph..." << endl; 55 | vector grads = gradients(y_predict, input_nodes); 56 | cout << "Building graph done..." << endl; 57 | 58 | 59 | node_to_gradient[W1.hash_code] = grads[1].hash_code; 60 | node_to_gradient[W2.hash_code] = grads[2].hash_code; 61 | node_to_gradient[W3.hash_code] = grads[3].hash_code; 62 | 63 | vector exe_list; 64 | exe_list.push_back(y_predict); 65 | exe_list.insert(exe_list.end(), grads.begin(), grads.end()); 66 | Executor executor = Executor(exe_list); 67 | 68 | MatrixXd input_val(BATCH_SIZE, 784); 69 | MatrixXd W1_val = MatrixXd::Random(784,128); 70 | MatrixXd W2_val = MatrixXd::Random(128,64); 71 | MatrixXd W3_val = MatrixXd::Random(64,10); 72 | MatrixXd y_true_val(BATCH_SIZE, 10); 73 | 74 | float lr = .1/BATCH_SIZE; 75 | 76 | vector X_train; 77 | vector y_train; 78 | cout << "------------------------------------" << endl; 79 | cout << "Start loading data..." << endl; 80 | load_data(X_train, y_train, training_data, BATCH_SIZE); 81 | cout << "Finish loading data..." << endl; 82 | 83 | map feed_dic; 84 | feed_dic[W1.hash_code] = W1_val; 85 | feed_dic[W2.hash_code] = W2_val; 86 | feed_dic[W3.hash_code] = W3_val; 87 | 88 | int iter_; 89 | cout << "------------------------------------" << endl; 90 | cout << "START TRAINING" << endl; 91 | cout << "------------------------------------" << endl; 92 | for(iter_ = 0; iter_ < ITER; iter_++){ 93 | // Read training data 94 | read_batch_data(input_val, y_true_val, X_train, y_train, ITER); 95 | feed_dic[input.hash_code] = input_val; 96 | feed_dic[y_true.hash_code] = y_true_val; 97 | 98 | // Train 99 | vector res = executor.run(feed_dic); 100 | 101 | // Weight update 102 | feed_dic[W1.hash_code] = feed_dic[W1.hash_code] - lr * feed_dic[node_to_gradient[W1.hash_code]]; 103 | feed_dic[W2.hash_code] = feed_dic[W2.hash_code] - lr * feed_dic[node_to_gradient[W2.hash_code]]; 104 | feed_dic[W3.hash_code] = feed_dic[W3.hash_code] - lr * feed_dic[node_to_gradient[W3.hash_code]]; 105 | 106 | // Loss 107 | if(iter_ % 10 == 0){ 108 | MatrixXd loss_m = feed_dic[y_predict.hash_code] - feed_dic[y_true.hash_code]; 109 | cout << "Iteration: " << iter_ << ", Loss: " << loss_m.array().square().sum() / (BATCH_SIZE * 10) << endl; 110 | cout << "------------------------------------" << endl; 111 | } 112 | } 113 | } -------------------------------------------------------------------------------- /include/Conv.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #ifndef TOY_ML_FRAMEWORK_CONV_H 9 | #define TOY_ML_FRAMEWORK_CONV_H 10 | 11 | #endif //TOY_ML_FRAMEWORK_CONV_H 12 | 13 | using namespace std; 14 | using namespace Eigen; 15 | 16 | MatrixXd img2col2D(const MatrixXd &input, const vector &kernels, 17 | const int &kernel_H, const int &kernel_W, 18 | const int &paddle, const int &stride); 19 | 20 | MatrixXd MaxPooling2D(const MatrixXd &input, 21 | const int filter_H, const int filter_W, const int stride, 22 | const int img_H, const int img_W, 23 | vector>> &pooling_loc); 24 | 25 | void MaxPoolingPrime(const int img_H, const int img_W, const MatrixXd &input, const vector>> &pooling_loc); 26 | 27 | 28 | -------------------------------------------------------------------------------- /include/MatAutoDiff.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "MatOp.h" 6 | #include 7 | #include 8 | 9 | class Node { 10 | public: 11 | Node() { 12 | this->matmul_attr_trans_A = false; 13 | this->matmul_attr_trans_B = false; 14 | } 15 | 16 | vector input; 17 | Op *op; 18 | string name; 19 | int hash_code; 20 | MatrixXd res; 21 | bool isPlaceHolder; 22 | bool matmul_attr_trans_A; 23 | bool matmul_attr_trans_B; 24 | 25 | // Conv and Pooling kernel parameters 26 | int filter_H; 27 | int filter_W; 28 | int stride; 29 | int img_H; 30 | int img_W; 31 | vector>> pooling_loc; 32 | 33 | 34 | 35 | virtual Node operator*(Node &nodeB); 36 | 37 | virtual Node operator+(Node &nodeB); 38 | 39 | ~Node() = default; 40 | }; 41 | 42 | void topo_sort_dfs(Node &node, std::unordered_set &visited, vector &topo_order); 43 | void find_topo_sort(vector &node_list, vector &topo_order); 44 | Node sum_node_list(vector &node_list); 45 | 46 | class Executor { 47 | public: 48 | vector node_list; 49 | Executor() = default; 50 | 51 | Executor(vector &eval_node_list); 52 | 53 | ~Executor() = default; 54 | 55 | vector run(map &feed_dic); 56 | 57 | }; 58 | 59 | class Placeholders : public Op { 60 | public: 61 | Node getNewNode(); 62 | 63 | vector compute(Node &nodeA, vector &input_vals) override; 64 | 65 | vector gradient(Node &node, Node &output_gradient) override; 66 | 67 | Placeholders() = default; 68 | 69 | ~Placeholders() = default; 70 | }; 71 | 72 | class ReluPrimeOp : public Op { 73 | public: 74 | ReluPrimeOp() = default; 75 | 76 | Node getNewNode(Node &nodeA, Node &nodeB); 77 | 78 | vector compute(Node &nodeA, vector &input_vals) override; 79 | 80 | vector gradient(Node &node, Node &output_gradient) override; 81 | 82 | ~ReluPrimeOp() = default; 83 | }; 84 | 85 | class ReluOp : public Op { 86 | public: 87 | ReluOp() = default; 88 | 89 | Node getNewNode(Node &nodeA); 90 | 91 | vector compute(Node &nodeA, vector &input_vals) override; 92 | 93 | vector gradient(Node &node, Node &output_gradient) override; 94 | 95 | ~ReluOp() = default; 96 | 97 | }; 98 | 99 | class MaxPoolingPrimeOp : public Op { 100 | public: 101 | MaxPoolingPrimeOp() = default; 102 | 103 | Node getNewNode(Node &nodeA, const int img_H, const int img_W); 104 | 105 | vector compute(Node &nodeA, vector &input_vals) override; 106 | 107 | vector gradient(Node &node, Node &output_gradient) override; 108 | 109 | ~MaxPoolingPrimeOp() = default; 110 | }; 111 | 112 | class MaxPoolingOp : public Op { 113 | public: 114 | MaxPoolingOp() = default; 115 | 116 | Node getNewNode(Node &nodeA, const int filter_H, const int filter_W, const int stride); 117 | 118 | vector compute(Node &nodeA, vector &input_vals) override; 119 | 120 | vector gradient(Node &node, Node &output_gradient) override; 121 | 122 | ~MaxPoolingOp() = default; 123 | }; 124 | 125 | 126 | 127 | class SoftmaxGradient : public Op { 128 | public: 129 | SoftmaxGradient() = default; 130 | 131 | Node getNewNode(Node &nodeA, Node &nodeB); 132 | 133 | vector compute(Node &nodeA, vector &input_vals) override; 134 | 135 | vector gradient(Node &node, Node &output_gradient) override; 136 | }; 137 | 138 | class SoftmaxOp : public Op { 139 | public: 140 | SoftmaxOp() = default; 141 | 142 | Node getNewNode(Node &nodeA, Node &nodeB); 143 | 144 | vector compute(Node &nodeA, vector &input_vals) override; 145 | 146 | vector gradient(Node &node, Node &output_gradient) override; 147 | 148 | ~SoftmaxOp() = default; 149 | }; 150 | 151 | class OnesLikeOp : public Op { 152 | public: 153 | OnesLikeOp() = default; 154 | 155 | Node getNewNode(Node &nodeA); 156 | 157 | vector compute(Node &nodeA, vector &input_vals) override; 158 | 159 | vector gradient(Node &node, Node &output_gradient) override; 160 | 161 | ~OnesLikeOp() = default; 162 | }; 163 | 164 | class ZerosLikeOp : public Op { 165 | public: 166 | ZerosLikeOp() = default; 167 | 168 | Node getNewNode(Node &nodeA); 169 | 170 | vector compute(Node &nodeA, vector &input_vals) override; 171 | 172 | vector gradient(Node &node, Node &output_gradient) override; 173 | 174 | ~ZerosLikeOp() = default; 175 | }; 176 | 177 | class MatMulOp : public Op { 178 | public: 179 | MatMulOp() = default; 180 | 181 | Node getNewNode(Node &nodeA, Node &nodeB, bool trans_A, bool trans_B); 182 | 183 | vector compute(Node &nodeA, vector &input_vals) override; 184 | 185 | vector gradient(Node &node, Node &output_gradient) override; 186 | 187 | ~MatMulOp() = default; 188 | }; 189 | 190 | class MatAddOp : public Op { 191 | public: 192 | MatAddOp() = default; 193 | 194 | Node getNewNode(Node &nodeA, Node &nodeB, bool trans_A, bool trans_B); 195 | 196 | vector compute(Node &nodeA, vector &input_vals) override; 197 | 198 | vector gradient(Node &node, Node &output_gradient) override; 199 | 200 | ~MatAddOp() = default; 201 | }; 202 | 203 | 204 | 205 | Node Variable(string var_name); 206 | vector gradients(Node &output_node, vector &node_list); 207 | void load_data(vector &X_train, vector &y_train, string src); 208 | void read_batch_data(MatrixXd &input_val, MatrixXd &y_true_val, vector &X_train, vector &y_train, int batch_size); -------------------------------------------------------------------------------- /include/MatOp.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | using namespace Eigen; 9 | 10 | class Node; 11 | class Op { 12 | public: 13 | Op() = default; 14 | 15 | virtual Node getNewNode(); 16 | 17 | virtual vector compute(Node &node, vector &input_vals); 18 | 19 | virtual vector gradient(Node &node, Node &output_gradient); 20 | 21 | ~Op() = default; 22 | }; 23 | 24 | -------------------------------------------------------------------------------- /include/Model.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../include/MatAutoDiff.h" 4 | #include 5 | #include 6 | 7 | using namespace Eigen; 8 | using namespace std; 9 | 10 | class Layer { 11 | public: 12 | // public attribute 13 | int shape; 14 | Node input_node; 15 | Node layer; 16 | Node output; 17 | Node grad; 18 | int grad_node_hash; 19 | MatrixXd matrix_weight; 20 | 21 | // input layer 22 | Node input; 23 | 24 | //output layer 25 | Node y_true; 26 | MatrixXd y_true_val; 27 | 28 | Layer(int layer_shape, const string& layer_name, Layer &previous_layer, const string& activation); // Dense layer 29 | 30 | Layer(int input_shape, string input_name, int batch_size); // Input layer 31 | 32 | Layer(int layer_shape, const string& layer_name, Layer &previous_layer, int batch_size); // Output layer 33 | 34 | ~Layer() = default; 35 | }; 36 | 37 | class Model { 38 | public: 39 | vector sequential; 40 | vector grads; 41 | Executor executor; 42 | vector exe_list; 43 | float lr = .1/batch_size; 44 | map feed_dic; 45 | vector X_train; 46 | vector y_train; 47 | int batch_size = 10; 48 | int training_iter = 100; 49 | 50 | Model() = default; 51 | 52 | Model(int batch_size_, int training_iter_); 53 | 54 | void Dense(int layer_shape, const string& layer_name, const string& activation); 55 | 56 | void Input(int input_shape, string input_name); 57 | 58 | void Output(int layer_shape, const string& layer_name); 59 | 60 | void compile(); 61 | 62 | void loadData(vector &X_train_, vector y_train_); 63 | 64 | void run(); 65 | 66 | ~Model() = default; 67 | 68 | }; -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "include/Model.h" 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | using namespace std; 10 | using namespace Eigen; 11 | 12 | int main() { 13 | Eigen::initParallel(); 14 | string training_data = "../train.txt"; 15 | vector X; 16 | vector y; 17 | 18 | cout << "------------------------------------" << endl; 19 | cout << "Loading data..." << endl; 20 | load_data(X, y, training_data); 21 | cout << "Finish loading data..." << endl; 22 | 23 | Model model = Model(32, 100); 24 | 25 | model.Input(784, "input layer"); 26 | model.Dense(512, "Hidden layer 1", "relu"); 27 | model.Dense(512, "Hidden layer 2", "relu"); 28 | model.Dense(512, "Hidden layer 3", "relu"); 29 | model.Dense(512, "Hidden layer 4", "relu"); 30 | model.Dense(10, "Hidden layer 5", "none"); 31 | model.Output(10, "Output layer"); 32 | 33 | model.compile(); 34 | model.loadData(X, y); 35 | model.run(); 36 | } 37 | 38 | -------------------------------------------------------------------------------- /src/Conv.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/Conv.h" 2 | using namespace std; 3 | using namespace Eigen; 4 | 5 | /* 6 | * input按照NCHW分布,最外层vector储存每一张图片,Matrix存储图片 7 | * kernel也按照NCHW分布,最外层vector存储卷积核,Matrix存储对应的卷积核 8 | * 为了简单起见,仅实现单通道卷积,C=1 9 | */ 10 | MatrixXd img2col2D(const MatrixXd &input, const vector &kernels, const int &kernel_H, const int &kernel_W, const int &paddle, const int &stride){ 11 | vector feature_maps; 12 | long img_H = input.rows(); 13 | long img_W = input.cols(); 14 | /* 15 | * 对input img进行paddle 16 | */ 17 | // 生成一个全0的paddle矩阵 18 | MatrixXd paddle_img = MatrixXd::Zero(img_H + 2 * paddle, img_W + 2 * paddle); 19 | // 将图像放置在paddle矩阵的中心 20 | paddle_img.block(paddle, paddle, img_H, img_W) = input; 21 | 22 | img_H = paddle_img.rows(); 23 | img_W = paddle_img.cols(); 24 | 25 | // 生成输入矩阵 26 | int input_matrix_W = kernel_H * kernel_W; 27 | int input_matrix_H = int(((img_H - kernel_H + 1) / stride) * ((img_W - kernel_W + 1) / stride)); 28 | MatrixXd input_matrix(input_matrix_H, input_matrix_W); 29 | int row_idx = 0; 30 | int col_idx = 0; 31 | for(int i = 0; i < img_H - kernel_H + 1; i += stride){ 32 | for(int j = 0; j < img_W - kernel_W + 1; j += stride){ 33 | col_idx = 0; 34 | for(int m = i; m < i + kernel_H; ++m){ 35 | for(int n = j; n < j + kernel_W; ++n){ 36 | input_matrix(row_idx, col_idx) = paddle_img(m, n); 37 | col_idx++; 38 | } 39 | } 40 | row_idx++; 41 | } 42 | } 43 | 44 | //生成卷积矩阵 45 | int kernel_nums = kernels.size(); 46 | MatrixXd kernel_matrix(kernel_W * kernel_H, kernel_nums); 47 | for(int i = 0; i < kernel_nums; ++i){ 48 | for(int m = 0; m < kernel_H; ++m){ 49 | for(int n = 0; n < kernel_W; ++n){ 50 | kernel_matrix(m * kernel_H + n, i) = kernels[i](m, n); 51 | } 52 | } 53 | } 54 | 55 | // 复原feature map 56 | MatrixXd feature_matrix = input_matrix * kernel_matrix; 57 | // int feature_H = int(paddle_img.rows()) - kernel_H + 1; 58 | // int feature_W = int(paddle_img.cols()) - kernel_W + 1; 59 | // for(int i = 0; i < kernel_nums; ++i){ 60 | // MatrixXd feature = feature_matrix.col(i); 61 | // feature.resize(feature_H, feature_W); 62 | // feature_maps.emplace_back(feature); 63 | // } 64 | return std::move(feature_matrix); 65 | } 66 | /* 67 | * 输入:img2col卷积完成后的特征矩阵,每一列对应一个n*n的特征图,即一个卷积核形成的特征图,有多少卷积核特征矩阵就有多少列。 68 | * 输出:同样有多少卷积核就有多少列,每一列是进行了maxpooling的特征图,大小为 pooling_H * pooling_W 69 | */ 70 | MatrixXd MaxPooling2D(const MatrixXd &input, 71 | const int filter_H, const int filter_W, const int stride, 72 | const int img_H, const int img_W, 73 | vector>> &pooling_loc){ 74 | cout << "Feature matrix:" << endl; 75 | cout << input << endl << endl; 76 | const int pooling_H = (img_H - filter_H)/stride + 1; 77 | const int pooling_W = (img_W - filter_W)/stride + 1; 78 | MatrixXd pooling_res(pooling_H * pooling_W, input.cols()); 79 | 80 | for(int k = 0; k < input.cols(); ++k){ 81 | MatrixXd feature = input.col(k); 82 | feature.resize(img_H, img_W); 83 | feature.transposeInPlace(); // eigen默认是col major存储,因此在resize的时候是先填列再填行,但是我们实际的矩阵是先填行再填列的,需要转置后使用 84 | for(int i = 0; i < img_H; i += stride){ 85 | for(int j = 0; j < img_W; j += stride){ 86 | int pooling_row; 87 | int pooling_col; 88 | float max_val = feature.block(i, j, filter_H, filter_W).maxCoeff(&pooling_row, &pooling_col); 89 | pooling_res(i/stride * filter_W + j/stride, k) = max_val; 90 | pooling_loc[i/stride * filter_H + j/stride][k] = pair((i + pooling_row) * img_H + (j + pooling_col), k); 91 | } 92 | } 93 | } 94 | cout << "Pooling results:" << endl; 95 | cout << pooling_res << endl << endl; 96 | cout << "Pooling location:" << endl; 97 | for(int k = 0; k < pooling_loc.size(); ++k){ 98 | for(int i = 0; i < pooling_loc[0].size(); ++i){ 99 | cout << "(" << pooling_loc[k][i].first << "," << pooling_loc[k][i].second << "), "; 100 | } 101 | cout << endl; 102 | } 103 | 104 | return std::move(pooling_res); 105 | } 106 | 107 | void MaxPoolingPrime(const int img_H, const int img_W, const MatrixXd &input, const vector>> &pooling_loc){ 108 | MatrixXd mat = MatrixXd::Zero(img_H * img_W, input.cols()); 109 | cout << "------------------------" << endl; 110 | cout << "input diff:" << endl; 111 | cout << input << endl; 112 | int pooling_H = int(input.rows()); 113 | int pooling_W = int(input.cols()); 114 | for(int k = 0; k < input.cols(); ++k) 115 | for(int i = 0; i < pooling_H; ++i) 116 | for(int j = 0; j < pooling_W; ++j){ 117 | mat(pooling_loc[i][j].first, pooling_loc[i][j].second) = input(i, j); 118 | } 119 | cout << "MaxPooling back diff:" << endl; 120 | cout << mat << endl; 121 | } 122 | 123 | //int main(){ 124 | // MatrixXd input_img(4, 4); 125 | // input_img << 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16; 126 | // 127 | // cout << "Original image:" << endl; 128 | // cout << input_img << endl; 129 | // vector kernels; 130 | // int kernel_num = 4; 131 | // for(int i = 0; i < kernel_num; ++i){ 132 | // MatrixXd kernel; 133 | // kernel = MatrixXd::Random(3, 3); 134 | // kernels.emplace_back(kernel); 135 | // } 136 | // MatrixXd feature_matrix = img2col2D(input_img, kernels, 3, 3, 1, 1); 137 | // vector>> pooling_loc; 138 | // vector> temp(feature_matrix.cols()); 139 | // pooling_loc.resize(2 * 2, temp); 140 | // cout << "Pooling image:" << endl; 141 | // MatrixXd pooling_img = MaxPooling2D(feature_matrix, 142 | // 2, 2, 2, 143 | // 4, 4, 144 | // pooling_loc); 145 | // cout << pooling_img << endl; 146 | // MatrixXd pooling_diff(2 * 2, 4); 147 | // pooling_diff << 0.1, 0.5, 0.04, -0.5, 148 | // 0.2, 0.6, 0.03, -0.2, 149 | // 0.3, 0.7, 0.02, -0.4, 150 | // 0.4, 0.8, 0.01, -0.7; 151 | // MaxPoolingPrime(4, 4, pooling_diff, pooling_loc); 152 | //} -------------------------------------------------------------------------------- /src/MatAutoDiff.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "../include/MatAutoDiff.h" 6 | #include "../include/Conv.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #define random(x) rand()%(x) 14 | 15 | using namespace Eigen; 16 | using namespace std; 17 | 18 | Node Op::getNewNode() { 19 | Node newNode = Node(); 20 | newNode.op = this; 21 | return newNode; 22 | } 23 | 24 | vector Op::compute(Node &node, vector &input_vals) { 25 | return vector(); 26 | } 27 | 28 | vector Op::gradient(Node &node, Node &output_gradient) { 29 | return vector(); 30 | } 31 | 32 | Node Placeholders::getNewNode() { 33 | Node newNode = Node(); 34 | newNode.isPlaceHolder = true; 35 | newNode.op = this; 36 | return newNode; 37 | } 38 | 39 | vector Placeholders::compute(Node &node, vector &input_vals) { 40 | // input_vals[0]代表第一个数,input_vals[1]代表第二个数。 41 | // 这两个数可以是浮点数,也可以是矩阵。在这里是浮点数,并且存放在长度为1的vector中。 42 | vector res; 43 | return res; 44 | } 45 | 46 | vector Placeholders::gradient(Node &node, Node &output_gradient) { 47 | vector res; 48 | return res; 49 | } 50 | 51 | Placeholders placeholder_op = Placeholders(); 52 | 53 | Node OnesLikeOp::getNewNode(Node &nodeA) { 54 | Node newNode = Node(); 55 | newNode.op = this; 56 | newNode.input.push_back(nodeA); 57 | newNode.isPlaceHolder = false; 58 | newNode.name = "Oneslike(" + nodeA.name + ")"; 59 | int hash = 0; 60 | int i = 0; 61 | for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++) 62 | hash += newNode.name[i]; 63 | newNode.hash_code = hash; 64 | return newNode; 65 | } 66 | 67 | vector OnesLikeOp::compute(Node &nodeA, vector &input_vals) { 68 | MatrixXd ones = MatrixXd::Ones(input_vals[0].rows(), input_vals[0].cols()); 69 | vector res; 70 | res.push_back(ones); 71 | return res; 72 | } 73 | 74 | vector OnesLikeOp::gradient(Node &node, Node &output_gradient) { 75 | vector res; 76 | Node newNode = Node(); 77 | newNode.input.push_back(node.input[0]); 78 | newNode.name = "Zeroslike(" + node.input[0].name + ")"; 79 | int hash = 0; 80 | int i = 0; 81 | for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++) 82 | hash += newNode.name[i]; 83 | newNode.hash_code = hash; 84 | res.push_back(newNode); 85 | return res; 86 | } 87 | 88 | OnesLikeOp ones_like_op = OnesLikeOp(); 89 | 90 | Node ZerosLikeOp::getNewNode(Node &nodeA) { 91 | Node newNode = Node(); 92 | newNode.op = this; 93 | newNode.input.push_back(nodeA); 94 | newNode.name = "Zeroslike(" + nodeA.name + ")"; 95 | int hash = 0; 96 | int i = 0; 97 | for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++) 98 | hash += newNode.name[i]; 99 | newNode.hash_code = hash; 100 | newNode.isPlaceHolder = false; 101 | return newNode; 102 | } 103 | 104 | vector ZerosLikeOp::compute(Node &nodeA, vector &input_vals) { 105 | MatrixXd zeros = MatrixXd::Zero(input_vals[0].rows(), input_vals[0].cols()); 106 | vector res; 107 | res.push_back(zeros); 108 | return res; 109 | } 110 | 111 | vector ZerosLikeOp::gradient(Node &node, Node &output_gradient) { 112 | vector res; 113 | Node newNode = Node(); 114 | newNode.input.push_back(node.input[0]); 115 | newNode.name = "Zeroslike(" + node.input[0].name + ")"; 116 | int hash = 0; 117 | int i = 0; 118 | for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++) 119 | hash += newNode.name[i]; 120 | newNode.hash_code = hash; 121 | res.push_back(newNode); 122 | return res; 123 | } 124 | 125 | ZerosLikeOp zeros_like_op = ZerosLikeOp(); 126 | 127 | Node MatAddOp::getNewNode(Node &nodeA, Node &nodeB, bool trans_A, bool trans_B) { 128 | Node newNode = Node(); 129 | newNode.op = this; 130 | newNode.matmul_attr_trans_A = trans_A; 131 | newNode.matmul_attr_trans_B = trans_B; 132 | newNode.input.push_back(nodeA); 133 | newNode.input.push_back(nodeB); 134 | newNode.isPlaceHolder = false; 135 | newNode.name = "MatAdd(" + nodeA.name + ", " + nodeB.name + ") "; 136 | return newNode; 137 | } 138 | 139 | vector MatAddOp::compute(Node &nodeA, vector &input_vals) { 140 | if (nodeA.matmul_attr_trans_A) { 141 | input_vals[0].transposeInPlace(); 142 | } 143 | if (nodeA.matmul_attr_trans_B) { 144 | input_vals[1].transposeInPlace(); 145 | } 146 | MatrixXd res_mat = input_vals[0] + input_vals[1]; 147 | vector res; 148 | res.push_back(res_mat); 149 | return res; 150 | } 151 | 152 | vector MatAddOp::gradient(Node &node, Node &output_gradient) { 153 | vector res; 154 | res.push_back(output_gradient); 155 | res.push_back(output_gradient); 156 | return res; 157 | } 158 | 159 | MatAddOp matadd_op = MatAddOp(); 160 | 161 | Node MatMulOp::getNewNode(Node &nodeA, Node &nodeB, bool trans_A, bool trans_B) { 162 | Node newNode = Node(); 163 | newNode.op = this; 164 | newNode.matmul_attr_trans_A = trans_A; 165 | newNode.matmul_attr_trans_B = trans_B; 166 | newNode.input.push_back(nodeA); 167 | newNode.input.push_back(nodeB); 168 | newNode.isPlaceHolder = false; 169 | newNode.name = "MatMul(" + nodeA.name + ", " + nodeB.name + ")"; 170 | int hash = 0; 171 | int i = 0; 172 | for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++) 173 | hash += newNode.name[i]; 174 | newNode.hash_code = hash; 175 | return newNode; 176 | } 177 | 178 | vector MatMulOp::compute(Node &nodeA, vector &input_vals) { 179 | MatrixXd res_mat; 180 | omp_set_num_threads(4); 181 | if (nodeA.matmul_attr_trans_A && nodeA.matmul_attr_trans_B) { 182 | res_mat = input_vals[0].transpose() * input_vals[1].transpose(); 183 | } else if (!nodeA.matmul_attr_trans_A && nodeA.matmul_attr_trans_B) { 184 | res_mat = input_vals[0] * input_vals[1].transpose(); 185 | } else if (nodeA.matmul_attr_trans_A && !nodeA.matmul_attr_trans_B) { 186 | res_mat = input_vals[0].transpose() * input_vals[1]; 187 | } else { 188 | res_mat = input_vals[0] * input_vals[1]; 189 | } 190 | #pragma omp parallel for 191 | for(int i = 0; i < res_mat.cols(); ++i){ 192 | res_mat.col(i).normalize(); 193 | } 194 | vector res; 195 | res.push_back(res_mat); 196 | return res; 197 | } 198 | 199 | vector MatMulOp::gradient(Node &node, Node &output_gradient) { 200 | vector res; 201 | Node newNodeA = getNewNode(output_gradient, node.input[1], false, true); 202 | Node newNodeB = getNewNode(node.input[0], output_gradient, true, false); 203 | res.push_back(newNodeA); 204 | res.push_back(newNodeB); 205 | return res; 206 | } 207 | 208 | MatMulOp matmul_op = MatMulOp(); 209 | 210 | Node ReluPrimeOp::getNewNode(Node &nodeA, Node &nodeB) { 211 | Node newNode = Node(); 212 | newNode.op = this; 213 | newNode.input.push_back(nodeA); 214 | newNode.input.push_back(nodeB); 215 | newNode.name = "ReluPrime(" + nodeA.name + ")"; 216 | int hash = 0; 217 | int i = 0; 218 | for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++) 219 | hash += newNode.name[i]; 220 | newNode.hash_code = hash; 221 | newNode.isPlaceHolder = false; 222 | return newNode; 223 | } 224 | 225 | vector ReluPrimeOp::compute(Node &nodeA, vector &input_vals) { 226 | MatrixXd res_mat(input_vals[0].rows(), input_vals[0].cols()); 227 | for (int i = 0; i < input_vals[0].rows(); ++i) 228 | for (int j = 0; j < input_vals[0].cols(); ++j) { 229 | if (input_vals[0](i, j) <= 0) { 230 | res_mat(i, j) = 0.0; 231 | } else res_mat(i, j) = 1.0; 232 | } 233 | vector res; 234 | res.push_back(res_mat); 235 | return res; 236 | } 237 | 238 | vector ReluPrimeOp::gradient(Node &node, Node &output_gradient) { 239 | return {}; // No need to implement. 240 | } 241 | 242 | ReluPrimeOp relu_prime_op = ReluPrimeOp(); 243 | 244 | Node ReluOp::getNewNode(Node &nodeA) { 245 | Node newNode = Node(); 246 | newNode.op = this; 247 | newNode.input.push_back(nodeA); 248 | newNode.name = "Relu(" + nodeA.name + ")"; 249 | int hash = 0; 250 | int i = 0; 251 | for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++) 252 | hash += newNode.name[i]; 253 | newNode.hash_code = hash; 254 | newNode.isPlaceHolder = false; 255 | return newNode; 256 | } 257 | 258 | vector ReluOp::compute(Node &nodeA, vector &input_vals) { 259 | MatrixXd res_mat(input_vals[0].rows(), input_vals[0].cols()); 260 | #pragma omp parallel for 261 | for (int i = 0; i < input_vals[0].rows(); ++i) 262 | for (int j = 0; j < input_vals[0].cols(); ++j) { 263 | if (input_vals[0](i, j) <= 0) { 264 | res_mat(i, j) = 0.0; 265 | } else res_mat(i, j) = input_vals[0](i, j); 266 | } 267 | vector res; 268 | res.push_back(res_mat); 269 | return res; 270 | } 271 | 272 | vector ReluOp::gradient(Node &node, Node &output_gradient) { 273 | Node newNode = relu_prime_op.getNewNode(node.input[0], output_gradient); 274 | vector res; 275 | res.push_back(newNode); 276 | return res; 277 | } 278 | 279 | ReluOp relu_op = ReluOp(); 280 | 281 | Node MaxPoolingPrimeOp::getNewNode(Node &nodeA, const int img_H, const int img_W) { 282 | Node newNode = Node(); 283 | newNode.op = this; 284 | newNode.input.push_back(nodeA); 285 | newNode.name = "MaxPoolingPrime(" + nodeA.name + ")"; 286 | newNode.filter_W = nodeA.filter_W; 287 | newNode.filter_H = nodeA.filter_H; 288 | newNode.stride = nodeA.stride; 289 | // 获得原有特征图的大小 290 | newNode.img_H = img_H; 291 | newNode.img_W = img_W; 292 | int hash = 0; 293 | int i = 0; 294 | for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++) 295 | hash += newNode.name[i]; 296 | newNode.hash_code = hash; 297 | newNode.isPlaceHolder = false; 298 | return newNode; 299 | } 300 | 301 | vector MaxPoolingPrimeOp::compute(Node &nodeA, vector &input_vals){ 302 | MatrixXd mat = MatrixXd::Zero(nodeA.img_H * nodeA.img_W, input_vals[0].cols()); 303 | vector res; 304 | int pooling_H = int(input_vals[0].rows()); 305 | int pooling_W = int(input_vals[0].cols()); 306 | for(int k = 0; k < pooling_W; ++k) 307 | for(int i = 0; i < pooling_H; ++i) 308 | for(int j = 0; j < pooling_W; ++j){ 309 | mat(nodeA.pooling_loc[i][j].first, nodeA.pooling_loc[i][j].second) = input_vals[0](i, j); 310 | } 311 | res.emplace_back(mat); 312 | return std::move(res); 313 | } 314 | 315 | vector MaxPoolingPrimeOp::gradient(Node &node, Node &output_gradient){ 316 | return {}; // No need to implement. 317 | } 318 | 319 | MaxPoolingPrimeOp maxpooling_prime_op = MaxPoolingPrimeOp(); 320 | 321 | Node MaxPoolingOp::getNewNode(Node &nodeA, const int filter_H, const int filter_W, const int stride) { 322 | Node newNode = Node(); 323 | newNode.op = this; 324 | newNode.input.push_back(nodeA); 325 | newNode.name = "MaxPooling(" + nodeA.name + ")"; 326 | newNode.filter_W = filter_W; 327 | newNode.filter_H = filter_H; 328 | newNode.stride = stride; 329 | // 保存原有特征图的大小 330 | newNode.img_H = nodeA.img_H; 331 | newNode.img_W = nodeA.img_W; 332 | int hash = 0; 333 | int i = 0; 334 | for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++) 335 | hash += newNode.name[i]; 336 | newNode.hash_code = hash; 337 | newNode.isPlaceHolder = false; 338 | return newNode; 339 | } 340 | 341 | vector MaxPoolingOp::compute(Node &nodeA, vector &input_vals) { 342 | vector res; 343 | vector> temp(input_vals[0].cols()); 344 | nodeA.pooling_loc.resize(nodeA.filter_H * nodeA.filter_W, temp); 345 | res.emplace_back(MaxPooling2D(input_vals[0], nodeA.filter_H, 346 | nodeA.filter_W, nodeA.stride, 347 | nodeA.img_H, nodeA.img_W, 348 | nodeA.pooling_loc)); 349 | return std::move(res); 350 | } 351 | 352 | vector MaxPoolingOp::gradient(Node &node, Node &output_gradient) { 353 | Node newNode = maxpooling_prime_op.getNewNode(node.input[0], node.img_H, node.img_W); 354 | vector res; 355 | res.push_back(newNode); 356 | return res; 357 | } 358 | 359 | Node SoftmaxGradient::getNewNode(Node &nodeA, Node &nodeB) { 360 | Node newNode = Node(); 361 | newNode.op = this; 362 | newNode.input.push_back(nodeA); 363 | newNode.input.push_back(nodeB); 364 | newNode.isPlaceHolder = false; 365 | newNode.name = "SoftmaxGradient(y_predict:" + nodeA.name + ", y_true:" + nodeB.name + ")"; 366 | int hash = 0; 367 | int i = 0; 368 | for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++) 369 | hash += newNode.name[i]; 370 | newNode.hash_code = hash; 371 | newNode.isPlaceHolder = false; 372 | return newNode; 373 | } 374 | 375 | vector SoftmaxGradient::compute(Node &nodeA, vector &input_vals) { 376 | MatrixXd y_hat = input_vals[0]; // Predict 377 | MatrixXd y = input_vals[1]; // Ground Truth 378 | vector res_mat; 379 | res_mat.push_back(y_hat - y); 380 | return res_mat; 381 | } 382 | 383 | vector SoftmaxGradient::gradient(Node &node, Node &output_gradient) { 384 | return vector(); 385 | } 386 | 387 | SoftmaxGradient softmax_gradient_op = SoftmaxGradient(); 388 | 389 | Node SoftmaxOp::getNewNode(Node &nodeA, Node &nodeB) { 390 | Node newNode = Node(); 391 | newNode.op = this; 392 | newNode.input.push_back(nodeA); 393 | newNode.input.push_back(nodeB); 394 | newNode.isPlaceHolder = false; 395 | newNode.name = "Softmax(" + nodeA.name + ")"; 396 | int hash = 0; 397 | int i = 0; 398 | for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++) 399 | hash += newNode.name[i]; 400 | newNode.hash_code = hash; 401 | newNode.isPlaceHolder = false; 402 | return newNode; 403 | } 404 | 405 | vector SoftmaxOp::compute(Node &nodeA, vector &input_vals) { 406 | MatrixXd m = input_vals[0].array().exp(); 407 | MatrixXd sum = m.colwise().sum(); 408 | MatrixXd res_mat(m.rows(), m.cols()); 409 | for (int i = 0; i < m.rows(); i++) 410 | for (int j = 0; j < m.cols(); j++) 411 | res_mat(i, j) = m(i, j) / sum(0, j); 412 | vector res; 413 | res.push_back(res_mat); 414 | nodeA.res = res_mat; 415 | // cout << "Softmax res:" << endl; 416 | // cout << res_mat << endl; 417 | return res; 418 | } 419 | 420 | vector SoftmaxOp::gradient(Node &node, Node &output_gradient) { 421 | Node newNodeA = softmax_gradient_op.getNewNode(node, node.input[1]); 422 | Node newNodeB = zeros_like_op.getNewNode(node.input[1]); 423 | vector res_vec; 424 | res_vec.push_back(newNodeA); 425 | res_vec.push_back(newNodeB); 426 | return res_vec; 427 | } 428 | 429 | SoftmaxOp softmax_op = SoftmaxOp(); 430 | 431 | Node Variable(string var_name) { 432 | Node placeholder_node = placeholder_op.getNewNode(); 433 | placeholder_node.name = var_name; 434 | int hash = 0; 435 | int i = 0; 436 | for (hash = var_name.length(), i = 0; i < var_name.length(); i++) 437 | hash += var_name[i]; 438 | placeholder_node.hash_code = hash; 439 | return placeholder_node; 440 | } 441 | 442 | 443 | Node Node::operator*(Node &nodeB) { 444 | Node nodeA = *this; 445 | Node newNode = matmul_op.getNewNode(nodeA, nodeB, false, false); 446 | return newNode; 447 | } 448 | 449 | Node Node::operator+(Node &nodeB) { 450 | Node nodeA = *this; 451 | Node newNode = matadd_op.getNewNode(nodeA, nodeB, false, false); 452 | return newNode; 453 | } 454 | 455 | void topo_sort_dfs(Node &node, std::unordered_set &visited, vector &topo_order) { 456 | if (visited.count(node.hash_code)) { 457 | return; 458 | } 459 | visited.emplace(node.hash_code); 460 | for (Node &in_node: node.input) { 461 | topo_sort_dfs(in_node, visited, topo_order); 462 | } 463 | topo_order.push_back(node); 464 | } 465 | 466 | void find_topo_sort(vector &node_list, vector &topo_order) { 467 | std::unordered_set visited; 468 | for (Node &node: node_list) { 469 | topo_sort_dfs(node, visited, topo_order); 470 | } 471 | } 472 | 473 | Node sum_node_list(vector &node_list) { 474 | Node res = node_list[0]; 475 | int i = 1; 476 | int size = node_list.size(); 477 | for (i = 1; i < size; i++) { 478 | res = res + node_list[i]; 479 | } 480 | return res; 481 | } 482 | 483 | 484 | vector gradients(Node &output_node, vector &node_list) { 485 | map> node_to_output_grads_list; 486 | vector ones; 487 | Node out_ones = ones_like_op.getNewNode(output_node); 488 | ones.push_back(out_ones); 489 | node_to_output_grads_list[output_node.hash_code] = ones; 490 | 491 | map node_to_output_grad; 492 | 493 | vector output_node_list; 494 | output_node_list.push_back(output_node); 495 | vector reverse_topo_sort; 496 | find_topo_sort(output_node_list, reverse_topo_sort); 497 | reverse(reverse_topo_sort.begin(), reverse_topo_sort.end()); 498 | 499 | for (Node &node: reverse_topo_sort) { 500 | Node grad = sum_node_list(node_to_output_grads_list[node.hash_code]); 501 | node_to_output_grad[node.hash_code] = grad; 502 | vector input_grads = node.op->gradient(node, grad); // 计算当前节点前驱的梯度 503 | for (int i = 0; i < node.input.size(); i++) { 504 | node_to_output_grads_list[node.input[i].hash_code] = node_to_output_grads_list[node.input[i].hash_code]; 505 | node_to_output_grads_list[node.input[i].hash_code].push_back(input_grads[i]); 506 | } 507 | } 508 | vector grad_node_list; 509 | for (Node &node: node_list) { 510 | grad_node_list.push_back(node_to_output_grad[node.hash_code]); 511 | } 512 | return grad_node_list; 513 | } 514 | 515 | vector split(const string &s, char delim) { 516 | stringstream ss(s); 517 | string item; 518 | vector tokens; 519 | while (getline(ss, item, delim)) { 520 | tokens.push_back(item); 521 | } 522 | return tokens; 523 | } 524 | 525 | vector operator/(const vector &m2, const float m1) { 526 | 527 | /* Returns the product of a float and a vectors (elementwise multiplication). 528 | Inputs: 529 | m1: float 530 | m2: vector 531 | Output: vector, m1 * m2, product of two vectors m1 and m2 532 | */ 533 | 534 | const unsigned long VECTOR_SIZE = m2.size(); 535 | vector product(VECTOR_SIZE); 536 | 537 | for (unsigned i = 0; i != VECTOR_SIZE; ++i) { 538 | product[i] = m2[i] / m1; 539 | }; 540 | 541 | return product; 542 | } 543 | 544 | void load_data(vector &X_train, vector &y_train, string src) { 545 | string line; 546 | vector line_v; 547 | ifstream myfile(src); 548 | if (myfile.is_open()) { 549 | while (getline(myfile, line)) { 550 | line_v = split(line, '\t'); 551 | int digit = strtof((line_v[0]).c_str(), 0); 552 | for (unsigned i = 0; i < 10; ++i) { 553 | if (i == digit) { 554 | y_train.emplace_back(1.); 555 | } else y_train.emplace_back(0.); 556 | } 557 | 558 | int size = static_cast(line_v.size()); 559 | for (unsigned i = 1; i < size; ++i) { 560 | X_train.emplace_back(strtof((line_v[i]).c_str(), 0)); 561 | } 562 | } 563 | X_train = X_train / 255.0; 564 | myfile.close(); 565 | } 566 | } 567 | 568 | void read_batch_data(MatrixXd &input_val, MatrixXd &y_true_val, vector &X_train, vector &y_train, 569 | int batch_size) { 570 | 571 | int rand_indx = random(42000 - batch_size); 572 | for (unsigned i = rand_indx * 784; i < (rand_indx + batch_size) * 784; i += 784) { 573 | for (unsigned j = 0; j < 784; ++j) { 574 | input_val(i / 784 - rand_indx, j) = X_train[i + j]; 575 | } 576 | } 577 | for (unsigned i = rand_indx * 10; i < (rand_indx + batch_size) * 10; i += 10) { 578 | for (unsigned j = 0; j < 10; ++j) { 579 | y_true_val(i / 10 - rand_indx, j) = y_train[i + j]; 580 | } 581 | } 582 | } 583 | 584 | 585 | Executor::Executor(vector &eval_node_list) { 586 | this->node_list = eval_node_list; 587 | } 588 | 589 | vector Executor::run(map &feed_dic) { 590 | vector topo_order; 591 | find_topo_sort(this->node_list, topo_order); 592 | vector input_vals; // 输入 593 | vector output_vals; // 输出 594 | for (Node node: topo_order) { 595 | if (node.isPlaceHolder) { 596 | continue; 597 | } 598 | input_vals.clear(); 599 | for (Node in_node: node.input) { 600 | input_vals.push_back(feed_dic[in_node.hash_code]); 601 | } 602 | auto res = node.op->compute(node, input_vals); 603 | feed_dic[node.hash_code] = res[0]; 604 | } 605 | for (Node node: this->node_list) { 606 | output_vals.push_back(feed_dic[node.hash_code]); 607 | } 608 | return output_vals; 609 | } 610 | 611 | -------------------------------------------------------------------------------- /src/Model.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../include/Model.h" 3 | #include 4 | #include 5 | 6 | using namespace Eigen; 7 | using namespace std; 8 | 9 | extern ReluOp relu_op; 10 | extern SoftmaxOp softmax_op; 11 | 12 | 13 | Layer::Layer(int layer_shape, const string& layer_name, Layer &previous_layer, const string& activation){ 14 | // Dense layer 15 | this->shape = layer_shape; 16 | this->input_node = Variable(std::move(layer_name + " weights")); 17 | this->layer = previous_layer.output * input_node; 18 | if(activation == "relu"){ 19 | this->output = relu_op.getNewNode(this->layer); 20 | }else{ 21 | this->output = this->layer; 22 | } 23 | matrix_weight = MatrixXd::Random(previous_layer.shape,layer_shape); 24 | } 25 | 26 | Layer::Layer(int input_shape, string input_name, int batch_size){ 27 | // Input layer 28 | this->shape = input_shape; 29 | input = Variable(std::move(input_name)); 30 | this->input_node = this->input; 31 | this->layer = this->input; 32 | this->output = this->input; 33 | matrix_weight = MatrixXd::Random(batch_size, input_shape); 34 | } 35 | 36 | Layer::Layer(int layer_shape, const string& layer_name, Layer &previous_layer, int batch_size){ 37 | // Output layer 38 | this->shape = layer_shape; 39 | this->y_true = Variable("Ground Truth"); 40 | this->input_node = Variable(std::move(layer_name + " weights")); 41 | this->layer = previous_layer.output * input_node; 42 | this->output = softmax_op.getNewNode(this->layer, y_true); 43 | matrix_weight = MatrixXd::Random(previous_layer.shape,layer_shape); 44 | y_true_val = MatrixXd::Random(batch_size, layer_shape); 45 | } 46 | 47 | 48 | Model::Model(int batch_size_, int training_iter_){ 49 | this->lr = .1/batch_size_; 50 | this->batch_size = batch_size_; 51 | this->training_iter = training_iter_; 52 | } 53 | 54 | void Model::Dense(int layer_shape, const string& layer_name, const string& activation){ 55 | if(sequential.empty()){ 56 | cout << "Need input layer!" << endl; 57 | return; 58 | } 59 | Layer new_Layer = Layer(layer_shape, layer_name, sequential.back(), activation); 60 | sequential.emplace_back(new_Layer); 61 | } 62 | 63 | void Model::Input(int input_shape, string input_name){ 64 | if(!sequential.empty()){ 65 | cout << "Model not empty!" << endl; 66 | sequential.clear(); 67 | cout << "Model has been cleared!" << endl; 68 | } 69 | sequential.emplace_back(Layer(input_shape, std::move(input_name), batch_size)); 70 | } 71 | 72 | void Model::Output(int layer_shape, const string& layer_name){ 73 | Layer new_Layer = Layer(layer_shape, layer_name, sequential.back(), batch_size); 74 | sequential.emplace_back(new_Layer); 75 | } 76 | 77 | void Model::compile(){ 78 | cout << "------------------------------------" << endl; 79 | cout << "Building graph..." << endl; 80 | vector input_nodes; 81 | for(Layer &layer : sequential){ 82 | // 将模型中每一层的节点送入 83 | input_nodes.emplace_back(layer.input_node); 84 | feed_dic[layer.input_node.hash_code] = layer.matrix_weight; 85 | } 86 | // 将输出层的真实值节点送入 87 | input_nodes.emplace_back(sequential.back().y_true); 88 | grads = gradients(sequential.back().output, input_nodes); 89 | for(int i = 0; i < grads.size(); i++){ 90 | sequential[i].grad_node_hash = grads[i].hash_code; 91 | } 92 | cout << "Build graph done..." << endl; 93 | cout << "------------------------------------" << endl; 94 | cout << "Building Executor..." << endl; 95 | exe_list.push_back(sequential.back().output); 96 | exe_list.insert(exe_list.end(), grads.begin(), grads.end()); 97 | executor = Executor(exe_list); 98 | cout << "Build Executor done..." << endl; 99 | } 100 | 101 | void Model::loadData(vector &X_train_, vector y_train_){ 102 | this->X_train = X_train_; 103 | this->y_train = y_train_; 104 | } 105 | 106 | void Model::run(){ 107 | int iter_; 108 | cout << "------------------------------------" << endl; 109 | cout << "START TRAINING" << endl; 110 | cout << "------------------------------------" << endl; 111 | for(iter_ = 0; iter_ < training_iter; iter_++) { 112 | // Read training data 113 | read_batch_data(sequential.front().matrix_weight, sequential.back().y_true_val, X_train, y_train, batch_size); 114 | feed_dic[sequential.front().input_node.hash_code] = sequential.front().matrix_weight; 115 | feed_dic[sequential.back().y_true.hash_code] = sequential.back().y_true_val; 116 | 117 | // Train 118 | vector res = executor.run(feed_dic); 119 | 120 | // Weight update 121 | for (int i = 1; i < sequential.size() - 1; i++) { 122 | feed_dic[sequential[i].input_node.hash_code] -= lr * feed_dic[sequential[i].grad_node_hash]; 123 | } 124 | 125 | // Loss 126 | if (iter_ % 1 == 0) { 127 | MatrixXd loss_m = feed_dic[sequential.back().output.hash_code] - feed_dic[sequential.back().y_true.hash_code]; 128 | cout << "Iteration: " << iter_ << ", Loss: " << loss_m.array().square().sum() / (batch_size * 10) << "\r" << flush; 129 | } 130 | } 131 | } 132 | 133 | --------------------------------------------------------------------------------