├── .gitignore
├── .idea
    └── .gitignore
├── CMakeLists.txt
├── Makefile
├── README.md
├── example.cpp
├── include
    ├── Conv.h
    ├── MatAutoDiff.h
    ├── MatOp.h
    └── Model.h
├── main.cpp
├── src
    ├── Conv.cpp
    ├── MatAutoDiff.cpp
    └── Model.cpp
└── train.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | .DS_Store
 3 | .vscode/configurationCache.log
 4 | .vscode/dryrun.log
 5 | .vscode/settings.json
 6 | .vscode/targets.log
 7 | .vscode/c_cpp_properties.json
 8 | .vscode/launch.json
 9 | .vscode/tasks.json
10 | .vscode/c_cpp_properties.json
11 | .vscode/c_cpp_properties.json
12 | model.dSYM/Contents/Resources/DWARF/model
13 | model.dSYM/Contents/Info.plist
14 | model
15 | .vscode/c_cpp_properties.json
16 | .idea/vcs.xml
17 | .idea/misc.xml
18 | .idea/modules.xml
19 | .idea/Toy_ML_Framework.iml
20 | cmake-build-debug/.cmake/api/v1/query/cache-v2
21 | cmake-build-debug/.cmake/api/v1/query/cmakeFiles-v1
22 | cmake-build-debug/.cmake/api/v1/query/codemodel-v2
23 | cmake-build-debug/.cmake/api/v1/query/toolchains-v1
24 | cmake-build-debug/.cmake/api/v1/reply/cache-v2-9602ad7e8f29f37f6c1d.json
25 | cmake-build-debug/.cmake/api/v1/reply/cmakeFiles-v1-2f8fb98736035f5c97d5.json
26 | cmake-build-debug/.cmake/api/v1/reply/codemodel-v2-a474f0d6d98ffe951faf.json
27 | cmake-build-debug/.cmake/api/v1/reply/directory-.-Debug-f5ebdc15457944623624.json
28 | cmake-build-debug/.cmake/api/v1/reply/index-2022-06-18T08-46-35-0049.json
29 | cmake-build-debug/.cmake/api/v1/reply/target-Toy_ML_Framework-Debug-f7b458d6c604adf29dcd.json
30 | cmake-build-debug/.cmake/api/v1/reply/toolchains-v1-b5fd9a4d818f14e48453.json
31 | cmake-build-debug/.ninja_deps
32 | cmake-build-debug/.ninja_log
33 | cmake-build-debug/build.ninja
34 | cmake-build-debug/cmake_install.cmake
35 | cmake-build-debug/CMakeCache.txt
36 | cmake-build-debug/CMakeFiles/3.22.3/CMakeCCompiler.cmake
37 | cmake-build-debug/CMakeFiles/3.22.3/CMakeCXXCompiler.cmake
38 | cmake-build-debug/CMakeFiles/3.22.3/CMakeDetermineCompilerABI_C.bin
39 | cmake-build-debug/CMakeFiles/3.22.3/CMakeDetermineCompilerABI_CXX.bin
40 | cmake-build-debug/CMakeFiles/3.22.3/CMakeSystem.cmake
41 | cmake-build-debug/CMakeFiles/3.22.3/CompilerIdC/CMakeCCompilerId.c
42 | cmake-build-debug/CMakeFiles/3.22.3/CompilerIdC/CMakeCCompilerId.o
43 | cmake-build-debug/CMakeFiles/3.22.3/CompilerIdCXX/CMakeCXXCompilerId.cpp
44 | cmake-build-debug/CMakeFiles/3.22.3/CompilerIdCXX/CMakeCXXCompilerId.o
45 | cmake-build-debug/CMakeFiles/clion-environment.txt
46 | cmake-build-debug/CMakeFiles/clion-log.txt
47 | cmake-build-debug/CMakeFiles/cmake.check_cache
48 | cmake-build-debug/CMakeFiles/CMakeError.log
49 | cmake-build-debug/CMakeFiles/CMakeOutput.log
50 | cmake-build-debug/CMakeFiles/rules.ninja
51 | cmake-build-debug/CMakeFiles/TargetDirectories.txt
52 | cmake-build-debug/CMakeFiles/Toy_ML_Framework.dir/main.cpp.o
53 | cmake-build-debug/Testing/Temporary/LastTest.log
54 | cmake-build-debug/Toy_ML_Framework
55 | *.json
56 | *.o
57 | cmake-build-debug/CMakeFiles/FindOpenMP/OpenMPTryFlag.cpp
58 | cmake-build-debug/CMakeFiles/FindOpenMP/OpenMPTryFlag.c
59 | cmake-build-debug/CMakeFiles/FindOpenMP/OpenMPCheckVersion.cpp
60 | cmake-build-debug/CMakeFiles/FindOpenMP/OpenMPCheckVersion.c
61 | cmake-build-debug/CMakeFiles/FindOpenMP/ompver_CXX.bin
62 | cmake-build-debug/CMakeFiles/FindOpenMP/ompver_C.bin
63 | .idea/inspectionProfiles/Project_Default.xml
64 | 


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.22)
 2 | project(Toy_ML_Framework)
 3 | 
 4 | set(BREW_DIR /opt/homebrew/Cellar)
 5 | 
 6 | #set(BLAS_INC_DIR ${BREW_DIR}/openblas/0.3.17/include)
 7 | #set(BLAS_LINK_DIR ${BREW_DIR}/openblas/0.3.17/lib)
 8 | set(EIGEN_INC_DIR ${BREW_DIR}/eigen/3.3.9/include/eigen3)
 9 | set(IDIR include)
10 | 
11 | set(CMAKE_CXX_STANDARD 11)
12 | 
13 | 
14 | 
15 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -g -fopenmp")
16 | 
17 | # 执行main.cpp
18 | add_executable(Toy_ML_Framework main.cpp include/MatOp.h src/MatAutoDiff.cpp include/MatAutoDiff.h include/MatAutoDiff.h src/Model.cpp include/Model.h src/Conv.cpp include/Conv.h)
19 | 
20 | # 执行img2col
21 | #add_executable(Toy_ML_Framework include/MatOp.h src/MatAutoDiff.cpp include/MatAutoDiff.h include/MatAutoDiff.h src/Model.cpp include/Model.h src/Conv.cpp include/Conv.h)
22 | 
23 | # 添加cblas
24 | #include_directories (${BLAS_INC_DIR})
25 | #link_directories(${BLAS_LINK_DIR})
26 | #target_link_libraries(Toy_ML_Framework -lblas)
27 | 
28 | include_directories (${EIGEN_INC_DIR})
29 | 
30 | find_package(OpenMP)
31 | if(OpenMP_CXX_FOUND)
32 |     target_link_libraries(Toy_ML_Framework PUBLIC OpenMP::OpenMP_CXX)
33 | endif()
34 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | IDIR=include
 2 | CBLASDIR=/opt/homebrew/Cellar/openblas/0.3.17/include
 3 | CBLASLIB=/opt/homebrew/Cellar/openblas/0.3.17/lib
 4 | CXX=g++-11
 5 | CXXFLAGS=-I$(IDIR) -I$(CBLASDIR) -L$(CBLASLIB) -std=c++11 -fopenmp -O3
 6 | 
 7 | ODIR=src
 8 | LDIR =../lib
 9 | 
10 | LIBS=-lm -lblas
11 | 
12 | _DEPS = deep_core.h vector_ops.h
13 | DEPS = $(patsubst %,$(IDIR)/%,$(_DEPS))
14 | 
15 | _OBJ = deep_core.o vector_ops.o
16 | OBJ = $(patsubst %,$(ODIR)/%,$(_OBJ))
17 | 
18 | %.o: %.cpp $(DEPS)
19 | 	$(CXX) -c -o $@ $< ${CXXFLAGS}
20 | 
21 | %.o: %.cxx $(DEPS)
22 | 	$(CXX) -c -o $@ $< ${CXXFLAGS}
23 | 
24 | nnetwork_mpi: $(OBJ) nnetwork_mpi.o
25 | 	$(CXX) -o $@ $^ $(LIBS)
26 | 
27 | nnetwork: $(OBJ) nnetwork.o
28 | 	$(CXX) -o $@ $^ $(LIBS)
29 | 
30 | model: $(OBJ) model.o
31 | 	$(CXX) -o $@ $^ $(LIBS)
32 | 
33 | run_pthreads:
34 | 	./nnetwork
35 | 
36 | run_parallel:
37 | 	mpirun -np 4 ./nnetwork_mpi
38 | 
39 | all: clean nnetwork_mpi nnetwork
40 | .PHONY: clean
41 | 
42 | default: clean nnetwork 
43 | 
44 | .DEFAULT_GOAL := default
45 | 
46 | clean:
47 | 	rm -f $(ODIR)/*.o *.o nnetwork_mpi nnetwork model
48 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Toy ML Framework
  2 | 
  3 | This Framework comes from CSE599W Assignment1, which I'm aiming to build a Machine Learning System with Automatic differentiation. This framework will contain following modules: Automatic differentiation, User interface, CUDA Ops and etc.
  4 | 
  5 | ### Automatic Differentiation
  6 | 
  7 | The AD (Automatic differentiation) is the core module of the whole system. With AD there is no need to take care of the differentiation manually, which will be done by the system. All we need to do is impliment the Ops' compute and gradient function.
  8 | 
  9 | *Computation graph*
 10 | 
 11 | The basis of automatic differentiation is the computation graph, which can be divided into Forward Mode and Reverse Mode according to the different graph traversal methods.
 12 | 
 13 | Forward Mode is to traverse the entire computation graph whenever we have an independent variable input in order to compute its differentiation, which generates a lot of unnecessary node visits.
 14 | 
 15 | Reverse Mode is to traverse in reverse topological order from the output node, so that through a reverse traversal, we record the gradient of the precursor node of each node, and sum up all the gradients coming from the back after reaching each node to get the total gradient size, and then we continue to calculate the gradient components of the precursor node according to this gradient, and finally get the gradient of the output y with respect to each input variable.
 16 | 
 17 | This framework uses Reverse Mode and generates a static computational graph.
 18 | 
 19 | *Graph Node*
 20 | 
 21 | Graph node is defined as follow.
 22 | 
 23 | ```C++
 24 | class Node {
 25 | public:
 26 |     Node(){
 27 |         this->const_attr = 0.0;
 28 |     }
 29 |     vector<Node> input;
 30 |     Op *op;
 31 |     float const_attr;
 32 |     string name;
 33 |     int hash_code;
 34 |     bool isPlaceHolder;
 35 | 
 36 |     virtual Node operator+(Node &nodeB);
 37 | 
 38 |     virtual Node operator*(Node &nodeB);
 39 | 
 40 |     ~Node() = default;
 41 | };
 42 | ```
 43 | 
 44 | All nodes have its own operator, which indicates that what type of computation will be done in current node. The Operator is defined as follow.
 45 | 
 46 | ```C++
 47 | class Op {
 48 | public:
 49 |     Op() = default;
 50 | 
 51 |     virtual Node getNewNode();
 52 | 		// T could be vector or matrix
 53 |     virtual vector<T> compute(Node &node, vector<T> &input_vals);
 54 | 
 55 |     virtual vector<Node> gradient(Node &node, Node &output_gradient);
 56 | 
 57 |     ~Op() = default;
 58 | };
 59 | ```
 60 | 
 61 | Each operator needs to impliment three function. We need getNewNode to generate a node which corresponds current operator, and use compute and gradient to represent the math computation.
 62 | 
 63 | *Constructing back-propagation graph of gradients*
 64 | 
 65 | The example is y = x1 * x2 + x3.
 66 | 
 67 | The construction of the gradient backpropagation graph is done through the gradients function, the implementation of which can be found in autodiff.cpp. The gradients function has two inputs: one is the output node, which is our y, and the other is the list of input nodes, which is our [x1, x2, x3]. Since we need to back-propagate, we traverse the nodes using a reverse topological sort, ensuring that when we visit a node, we have visited all the nodes after it (i.e. we have gotten all its gradient components).
 68 | 
 69 | The first thing we can know is that the derivative of the output node y with respect to the expression x1*x2+x3 is 1. We store this value in the grad_map corresponding to y.
 70 | 
 71 | Then we topologically sort the computational map and reverse to get the node access order and then we can construct the back propagation map.
 72 | 
 73 | For each node we follow the following process.
 74 | 
 75 | 1. look up the table, get the gradient passed by all output nodes of the current node and sum up (each gradient component is a Node node, as mentioned above, the gradient specific value calculation still needs to rely on the compute function to complete, so the sum up here is to build the expression node of the gradient sum).
 76 | 
 77 | 2. save the gradient sum of the current node.
 78 | 
 79 | 3. Use the gradient function of the current node operator to build the gradient component expression nodes of all input Nodes.
 80 | 
 81 | 4. Save the gradient component nodes of each input Node to node_to_output_grads_list and wait for the summation.
 82 | 
 83 | After getting the gradient expression nodes for each node, we can pass the gradient expression nodes back in a certain order and wait to put them into the executor for execution.
 84 | 
 85 | *The Executor*
 86 | 
 87 | The constructor input to Executor is a list [y, x1_grad, x2_grad, x3_grad], where y contains the expressions we need to evaluate, followed by the gradient value of each variable with respect to y.
 88 | 
 89 | Executor has a run function to execute the entire computational graph as well as the gradient back-propagation graph.
 90 | 
 91 | We already have all the nodes when we construct the executor instance, we just need to sort them topologically to get the node traversal list. Since we have already constructed the gradient backpropagation graph, we are actually combining the two graphs into one for topological sorting.
 92 | 
 93 | In the forward propagation, we calculate the value of each node, and then we enter the backpropagation graph and still use the compute function to calculate the value of the current node according to the corresponding rules, at this time we calculate the gradient value of the corresponding gradient expression node. Finally, we return the values of the nodes [y, x1_grad, x2_grad, x3_grad] that we need to the main function.
 94 | 
 95 | ### Todo List
 96 | 
 97 | * Tensorflow/Keras like User Interface
 98 | * CUDA Operators
 99 | * Convolution Operators
100 | * Computation graph serialization and deserialization
101 | 


--------------------------------------------------------------------------------
/example.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <map>
  3 | #include "include/Model.h"
  4 | #include <Eigen/Dense>
  5 | #include <utility>
  6 | 
  7 | #define BATCH_SIZE 10
  8 | #define ITER 100
  9 | 
 10 | using namespace Eigen;
 11 | using namespace std;
 12 | 
 13 | extern ReluOp relu_op;
 14 | extern SoftmaxOp softmax_op;
 15 | 
 16 | string training_data = "train.txt";
 17 | 
 18 | void Interface_example(){
 19 |     Model model = Model(32, 100);
 20 | 
 21 |     model.Input(784, "input layer");
 22 |     model.Dense(128, "Hidden layer 1", "relu");
 23 |     model.Dense(64, "Hidden layer 2", "relu");
 24 |     model.Output(10, "Output layer");
 25 | 
 26 |     model.compile();
 27 |     model.loadData(training_data);
 28 |     model.run();
 29 | }
 30 | 
 31 | void NN_test(){
 32 |     map<int, int> node_to_gradient;
 33 |     cout << "3 layers NN test:" << endl;
 34 | 
 35 |     Node input = Variable("input");
 36 |     Node W1 = Variable("W1");
 37 |     Node W2 = Variable("W2");
 38 |     Node W3 = Variable("W3");
 39 |     Node y_true = Variable("y_true");
 40 |     Node input_dot_W1 = input * W1;
 41 |     Node W1_relu = relu_op.getNewNode(input_dot_W1);
 42 |     Node W1_dot_W2 = W1_relu * W2;
 43 |     Node W2_relu = relu_op.getNewNode(W1_dot_W2);
 44 |     Node W2_dot_W3 = W2_relu * W3;
 45 |     Node y_predict = softmax_op.getNewNode(W2_dot_W3, y_true);
 46 | 
 47 |     vector<Node> input_nodes;
 48 |     input_nodes.push_back(input);
 49 |     input_nodes.push_back(W1);
 50 |     input_nodes.push_back(W2);
 51 |     input_nodes.push_back(W3);
 52 |     input_nodes.push_back(y_true);
 53 |     cout << "------------------------------------" << endl;
 54 |     cout << "Building graph..." << endl;
 55 |     vector<Node> grads = gradients(y_predict, input_nodes);
 56 |     cout << "Building graph done..." << endl;
 57 | 
 58 | 
 59 |     node_to_gradient[W1.hash_code] = grads[1].hash_code;
 60 |     node_to_gradient[W2.hash_code] = grads[2].hash_code;
 61 |     node_to_gradient[W3.hash_code] = grads[3].hash_code;
 62 | 
 63 |     vector<Node> exe_list;
 64 |     exe_list.push_back(y_predict);
 65 |     exe_list.insert(exe_list.end(), grads.begin(), grads.end());
 66 |     Executor executor = Executor(exe_list);
 67 | 
 68 |     MatrixXd input_val(BATCH_SIZE, 784);
 69 |     MatrixXd W1_val = MatrixXd::Random(784,128);
 70 |     MatrixXd W2_val = MatrixXd::Random(128,64);
 71 |     MatrixXd W3_val = MatrixXd::Random(64,10);
 72 |     MatrixXd y_true_val(BATCH_SIZE, 10);
 73 | 
 74 |     float lr = .1/BATCH_SIZE;
 75 | 
 76 |     vector<float> X_train;
 77 |     vector<float> y_train;
 78 |     cout << "------------------------------------" << endl;
 79 |     cout << "Start loading data..." << endl;
 80 |     load_data(X_train, y_train, training_data, BATCH_SIZE);
 81 |     cout << "Finish loading data..." << endl;
 82 | 
 83 |     map<int, MatrixXd> feed_dic;
 84 |     feed_dic[W1.hash_code] = W1_val;
 85 |     feed_dic[W2.hash_code] = W2_val;
 86 |     feed_dic[W3.hash_code] = W3_val;
 87 | 
 88 |     int iter_;
 89 |     cout << "------------------------------------" << endl;
 90 |     cout << "START TRAINING" << endl;
 91 |     cout << "------------------------------------" << endl;
 92 |     for(iter_ = 0; iter_ < ITER; iter_++){
 93 |         // Read training data
 94 |         read_batch_data(input_val, y_true_val, X_train, y_train, ITER);
 95 |         feed_dic[input.hash_code] = input_val;
 96 |         feed_dic[y_true.hash_code] = y_true_val;
 97 | 
 98 |         // Train
 99 |         vector<MatrixXd> res = executor.run(feed_dic);
100 | 
101 |         // Weight update
102 |         feed_dic[W1.hash_code] = feed_dic[W1.hash_code] - lr * feed_dic[node_to_gradient[W1.hash_code]];
103 |         feed_dic[W2.hash_code] = feed_dic[W2.hash_code] - lr * feed_dic[node_to_gradient[W2.hash_code]];
104 |         feed_dic[W3.hash_code] = feed_dic[W3.hash_code] - lr * feed_dic[node_to_gradient[W3.hash_code]];
105 | 
106 |         // Loss
107 |         if(iter_ % 10 == 0){
108 |             MatrixXd loss_m = feed_dic[y_predict.hash_code] - feed_dic[y_true.hash_code];
109 |             cout << "Iteration: " << iter_ <<  ", Loss: " << loss_m.array().square().sum() / (BATCH_SIZE * 10) << endl;
110 |             cout << "------------------------------------" << endl;
111 |         }
112 |     }
113 | }


--------------------------------------------------------------------------------
/include/Conv.h:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <Eigen/Dense>
 3 | #include <utility>
 4 | #include <Eigen/Core>
 5 | #include <vector>
 6 | #include <utility>
 7 | 
 8 | #ifndef TOY_ML_FRAMEWORK_CONV_H
 9 | #define TOY_ML_FRAMEWORK_CONV_H
10 | 
11 | #endif //TOY_ML_FRAMEWORK_CONV_H
12 | 
13 | using namespace std;
14 | using namespace Eigen;
15 | 
16 | MatrixXd img2col2D(const MatrixXd &input, const vector<MatrixXd> &kernels,
17 |                            const int &kernel_H, const int &kernel_W,
18 |                            const int &paddle, const int &stride);
19 | 
20 | MatrixXd MaxPooling2D(const MatrixXd &input,
21 |                       const int filter_H, const int filter_W, const int stride,
22 |                       const int img_H, const int img_W,
23 |                       vector<vector<pair<int, int>>> &pooling_loc);
24 | 
25 | void MaxPoolingPrime(const int img_H, const int img_W, const MatrixXd &input, const vector<vector<pair<int, int>>> &pooling_loc);
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/include/MatAutoDiff.h:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <string>
  3 | #include <unordered_set>
  4 | #include <map>
  5 | #include "MatOp.h"
  6 | #include <vector>
  7 | #include <Eigen/Dense>
  8 | 
  9 | class Node {
 10 | public:
 11 |     Node() {
 12 |         this->matmul_attr_trans_A = false;
 13 |         this->matmul_attr_trans_B = false;
 14 |     }
 15 | 
 16 |     vector<Node> input;
 17 |     Op *op;
 18 |     string name;
 19 |     int hash_code;
 20 |     MatrixXd res;
 21 |     bool isPlaceHolder;
 22 |     bool matmul_attr_trans_A;
 23 |     bool matmul_attr_trans_B;
 24 | 
 25 |     // Conv and Pooling kernel parameters
 26 |     int filter_H;
 27 |     int filter_W;
 28 |     int stride;
 29 |     int img_H;
 30 |     int img_W;
 31 |     vector<vector<pair<int, int>>> pooling_loc;
 32 | 
 33 | 
 34 | 
 35 |     virtual Node operator*(Node &nodeB);
 36 | 
 37 |     virtual Node operator+(Node &nodeB);
 38 | 
 39 |     ~Node() = default;
 40 | };
 41 | 
 42 | void topo_sort_dfs(Node &node, std::unordered_set<int> &visited, vector<Node> &topo_order);
 43 | void find_topo_sort(vector<Node> &node_list, vector<Node> &topo_order);
 44 | Node sum_node_list(vector<Node> &node_list);
 45 | 
 46 | class Executor {
 47 | public:
 48 |     vector<Node> node_list;
 49 |     Executor() = default;
 50 | 
 51 |     Executor(vector<Node> &eval_node_list);
 52 | 
 53 |     ~Executor() = default;
 54 | 
 55 |     vector<MatrixXd> run(map<int, MatrixXd> &feed_dic);
 56 | 
 57 | };
 58 | 
 59 | class Placeholders : public Op {
 60 | public:
 61 |     Node getNewNode();
 62 | 
 63 |     vector<MatrixXd> compute(Node &nodeA, vector<MatrixXd> &input_vals) override;
 64 | 
 65 |     vector<Node> gradient(Node &node, Node &output_gradient) override;
 66 | 
 67 |     Placeholders() = default;
 68 | 
 69 |     ~Placeholders() = default;
 70 | };
 71 | 
 72 | class ReluPrimeOp : public Op {
 73 | public:
 74 |     ReluPrimeOp() = default;
 75 | 
 76 |     Node getNewNode(Node &nodeA, Node &nodeB);
 77 | 
 78 |     vector<MatrixXd> compute(Node &nodeA, vector<MatrixXd> &input_vals) override;
 79 | 
 80 |     vector<Node> gradient(Node &node, Node &output_gradient) override;
 81 | 
 82 |     ~ReluPrimeOp() = default;
 83 | };
 84 | 
 85 | class ReluOp : public Op {
 86 | public:
 87 |     ReluOp() = default;
 88 | 
 89 |     Node getNewNode(Node &nodeA);
 90 | 
 91 |     vector<MatrixXd> compute(Node &nodeA, vector<MatrixXd> &input_vals) override;
 92 | 
 93 |     vector<Node> gradient(Node &node, Node &output_gradient) override;
 94 | 
 95 |     ~ReluOp() = default;
 96 | 
 97 | };
 98 | 
 99 | class MaxPoolingPrimeOp : public Op {
100 | public:
101 |     MaxPoolingPrimeOp() = default;
102 | 
103 |     Node getNewNode(Node &nodeA, const int img_H, const int img_W);
104 | 
105 |     vector<MatrixXd> compute(Node &nodeA, vector<MatrixXd> &input_vals) override;
106 | 
107 |     vector<Node> gradient(Node &node, Node &output_gradient) override;
108 | 
109 |     ~MaxPoolingPrimeOp() = default;
110 | };
111 | 
112 | class MaxPoolingOp : public Op {
113 | public:
114 |     MaxPoolingOp() = default;
115 | 
116 |     Node getNewNode(Node &nodeA, const int filter_H, const int filter_W, const int stride);
117 | 
118 |     vector<MatrixXd> compute(Node &nodeA, vector<MatrixXd> &input_vals) override;
119 | 
120 |     vector<Node> gradient(Node &node, Node &output_gradient) override;
121 | 
122 |     ~MaxPoolingOp() = default;
123 | };
124 | 
125 | 
126 | 
127 | class SoftmaxGradient : public Op {
128 | public:
129 |     SoftmaxGradient() = default;
130 | 
131 |     Node getNewNode(Node &nodeA, Node &nodeB);
132 | 
133 |     vector<MatrixXd> compute(Node &nodeA, vector<MatrixXd> &input_vals) override;
134 | 
135 |     vector<Node> gradient(Node &node, Node &output_gradient) override;
136 | };
137 | 
138 | class SoftmaxOp : public Op {
139 | public:
140 |     SoftmaxOp() = default;
141 | 
142 |     Node getNewNode(Node &nodeA, Node &nodeB);
143 | 
144 |     vector<MatrixXd> compute(Node &nodeA, vector<MatrixXd> &input_vals) override;
145 | 
146 |     vector<Node> gradient(Node &node, Node &output_gradient) override;
147 | 
148 |     ~SoftmaxOp() = default;
149 | };
150 | 
151 | class OnesLikeOp : public Op {
152 | public:
153 |     OnesLikeOp() = default;
154 | 
155 |     Node getNewNode(Node &nodeA);
156 | 
157 |     vector<MatrixXd> compute(Node &nodeA, vector<MatrixXd> &input_vals) override;
158 | 
159 |     vector<Node> gradient(Node &node, Node &output_gradient) override;
160 | 
161 |     ~OnesLikeOp() = default;
162 | };
163 | 
164 | class ZerosLikeOp : public Op {
165 | public:
166 |     ZerosLikeOp() = default;
167 | 
168 |     Node getNewNode(Node &nodeA);
169 | 
170 |     vector<MatrixXd> compute(Node &nodeA, vector<MatrixXd> &input_vals) override;
171 | 
172 |     vector<Node> gradient(Node &node, Node &output_gradient) override;
173 | 
174 |     ~ZerosLikeOp() = default;
175 | };
176 | 
177 | class MatMulOp : public Op {
178 | public:
179 |     MatMulOp() = default;
180 | 
181 |     Node getNewNode(Node &nodeA, Node &nodeB, bool trans_A, bool trans_B);
182 | 
183 |     vector<MatrixXd> compute(Node &nodeA, vector<MatrixXd> &input_vals) override;
184 | 
185 |     vector<Node> gradient(Node &node, Node &output_gradient) override;
186 | 
187 |     ~MatMulOp() = default;
188 | };
189 | 
190 | class MatAddOp : public Op {
191 | public:
192 |     MatAddOp() = default;
193 | 
194 |     Node getNewNode(Node &nodeA, Node &nodeB, bool trans_A, bool trans_B);
195 | 
196 |     vector<MatrixXd> compute(Node &nodeA, vector<MatrixXd> &input_vals) override;
197 | 
198 |     vector<Node> gradient(Node &node, Node &output_gradient) override;
199 | 
200 |     ~MatAddOp() = default;
201 | };
202 | 
203 | 
204 | 
205 | Node Variable(string var_name);
206 | vector<Node> gradients(Node &output_node, vector<Node> &node_list);
207 | void load_data(vector<float> &X_train, vector<float> &y_train, string src);
208 | void read_batch_data(MatrixXd &input_val, MatrixXd &y_true_val, vector<float> &X_train, vector<float> &y_train, int batch_size);


--------------------------------------------------------------------------------
/include/MatOp.h:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <iostream>
 3 | #include <sstream>
 4 | #include <string>
 5 | #include <Eigen/Core>
 6 | 
 7 | using namespace std;
 8 | using namespace Eigen;
 9 | 
10 | class Node;
11 | class Op {
12 | public:
13 |     Op() = default;
14 | 
15 |     virtual Node getNewNode();
16 | 
17 |     virtual vector<MatrixXd> compute(Node &node, vector<MatrixXd> &input_vals);
18 | 
19 |     virtual vector<Node> gradient(Node &node, Node &output_gradient);
20 | 
21 |     ~Op() = default;
22 | };
23 | 
24 | 


--------------------------------------------------------------------------------
/include/Model.h:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <map>
 3 | #include "../include/MatAutoDiff.h"
 4 | #include <Eigen/Dense>
 5 | #include <utility>
 6 | 
 7 | using namespace Eigen;
 8 | using namespace std;
 9 | 
10 | class Layer {
11 | public:
12 |     // public attribute
13 |     int shape;
14 |     Node input_node;
15 |     Node layer;
16 |     Node output;
17 |     Node grad;
18 |     int grad_node_hash;
19 |     MatrixXd matrix_weight;
20 | 
21 |     // input layer
22 |     Node input;
23 | 
24 |     //output layer
25 |     Node y_true;
26 |     MatrixXd y_true_val;
27 | 
28 |     Layer(int layer_shape, const string& layer_name, Layer &previous_layer, const string& activation); // Dense layer
29 | 
30 |     Layer(int input_shape, string input_name, int batch_size); // Input layer
31 | 
32 |     Layer(int layer_shape, const string& layer_name, Layer &previous_layer, int batch_size); // Output layer
33 | 
34 |     ~Layer() = default;
35 | };
36 | 
37 | class Model {
38 | public:
39 |     vector<Layer> sequential;
40 |     vector<Node> grads;
41 |     Executor executor;
42 |     vector<Node> exe_list;
43 |     float lr = .1/batch_size;
44 |     map<int, MatrixXd> feed_dic;
45 |     vector<float> X_train;
46 |     vector<float> y_train;
47 |     int batch_size = 10;
48 |     int training_iter = 100;
49 | 
50 |     Model() = default;
51 | 
52 |     Model(int batch_size_, int training_iter_);
53 | 
54 |     void Dense(int layer_shape, const string& layer_name, const string& activation);
55 | 
56 |     void Input(int input_shape, string input_name);
57 | 
58 |     void Output(int layer_shape, const string& layer_name);
59 | 
60 |     void compile();
61 | 
62 |     void loadData(vector<float> &X_train_, vector<float> y_train_);
63 | 
64 |     void run();
65 | 
66 |     ~Model() = default;
67 | 
68 | };


--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <map>
 3 | #include "include/Model.h"
 4 | #include <Eigen/Dense>
 5 | #include <utility>
 6 | #include <Eigen/Core>
 7 | 
 8 | 
 9 | using namespace std;
10 | using namespace Eigen;
11 | 
12 | int main() {
13 |     Eigen::initParallel();
14 |     string training_data = "../train.txt";
15 |     vector<float> X;
16 |     vector<float> y;
17 | 
18 |     cout << "------------------------------------" << endl;
19 |     cout << "Loading data..." << endl;
20 |     load_data(X, y, training_data);
21 |     cout << "Finish loading data..." << endl;
22 | 
23 |     Model model = Model(32, 100);
24 | 
25 |     model.Input(784, "input layer");
26 |     model.Dense(512, "Hidden layer 1", "relu");
27 |     model.Dense(512, "Hidden layer 2", "relu");
28 |     model.Dense(512, "Hidden layer 3", "relu");
29 |     model.Dense(512, "Hidden layer 4", "relu");
30 |     model.Dense(10, "Hidden layer 5", "none");
31 |     model.Output(10, "Output layer");
32 | 
33 |     model.compile();
34 |     model.loadData(X, y);
35 |     model.run();
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/src/Conv.cpp:
--------------------------------------------------------------------------------
  1 | #include "../include/Conv.h"
  2 | using namespace std;
  3 | using namespace Eigen;
  4 | 
  5 | /*
  6 |  * input按照NCHW分布，最外层vector储存每一张图片，Matrix存储图片
  7 |  * kernel也按照NCHW分布，最外层vector存储卷积核，Matrix存储对应的卷积核
  8 |  * 为了简单起见，仅实现单通道卷积，C=1
  9 |  */
 10 | MatrixXd img2col2D(const MatrixXd &input, const vector<MatrixXd> &kernels, const int &kernel_H, const int &kernel_W, const int &paddle, const int &stride){
 11 |     vector<MatrixXd> feature_maps;
 12 |     long img_H = input.rows();
 13 |     long img_W = input.cols();
 14 |     /*
 15 |      * 对input img进行paddle
 16 |      */
 17 |     // 生成一个全0的paddle矩阵
 18 |     MatrixXd paddle_img = MatrixXd::Zero(img_H + 2 * paddle, img_W + 2 * paddle);
 19 |     // 将图像放置在paddle矩阵的中心
 20 |     paddle_img.block(paddle, paddle, img_H, img_W) = input;
 21 | 
 22 |     img_H = paddle_img.rows();
 23 |     img_W = paddle_img.cols();
 24 | 
 25 |     // 生成输入矩阵
 26 |     int input_matrix_W = kernel_H * kernel_W;
 27 |     int input_matrix_H = int(((img_H - kernel_H + 1) / stride) * ((img_W - kernel_W + 1) / stride));
 28 |     MatrixXd input_matrix(input_matrix_H, input_matrix_W);
 29 |     int row_idx = 0;
 30 |     int col_idx = 0;
 31 |     for(int i = 0; i < img_H - kernel_H + 1; i += stride){
 32 |         for(int j = 0; j < img_W - kernel_W + 1; j += stride){
 33 |             col_idx = 0;
 34 |             for(int m = i; m < i + kernel_H; ++m){
 35 |                 for(int n = j; n < j + kernel_W; ++n){
 36 |                     input_matrix(row_idx, col_idx) = paddle_img(m, n);
 37 |                     col_idx++;
 38 |                 }
 39 |             }
 40 |             row_idx++;
 41 |         }
 42 |     }
 43 | 
 44 |     //生成卷积矩阵
 45 |     int kernel_nums = kernels.size();
 46 |     MatrixXd kernel_matrix(kernel_W * kernel_H, kernel_nums);
 47 |     for(int i = 0; i < kernel_nums; ++i){
 48 |         for(int m = 0; m < kernel_H; ++m){
 49 |             for(int n = 0; n < kernel_W; ++n){
 50 |                 kernel_matrix(m * kernel_H + n, i) = kernels[i](m, n);
 51 |             }
 52 |         }
 53 |     }
 54 | 
 55 |     // 复原feature map
 56 |     MatrixXd feature_matrix = input_matrix * kernel_matrix;
 57 | //    int feature_H = int(paddle_img.rows()) - kernel_H + 1;
 58 | //    int feature_W = int(paddle_img.cols()) - kernel_W + 1;
 59 | //    for(int i = 0; i < kernel_nums; ++i){
 60 | //        MatrixXd feature = feature_matrix.col(i);
 61 | //        feature.resize(feature_H, feature_W);
 62 | //        feature_maps.emplace_back(feature);
 63 | //    }
 64 |     return std::move(feature_matrix);
 65 | }
 66 | /*
 67 |  * 输入：img2col卷积完成后的特征矩阵，每一列对应一个n*n的特征图，即一个卷积核形成的特征图，有多少卷积核特征矩阵就有多少列。
 68 |  * 输出：同样有多少卷积核就有多少列，每一列是进行了maxpooling的特征图，大小为 pooling_H * pooling_W
 69 |  */
 70 | MatrixXd MaxPooling2D(const MatrixXd &input,
 71 |                       const int filter_H, const int filter_W, const int stride,
 72 |                       const int img_H, const int img_W,
 73 |                       vector<vector<pair<int, int>>> &pooling_loc){
 74 |     cout << "Feature matrix:" << endl;
 75 |     cout << input << endl << endl;
 76 |     const int pooling_H = (img_H - filter_H)/stride + 1;
 77 |     const int pooling_W = (img_W - filter_W)/stride + 1;
 78 |     MatrixXd pooling_res(pooling_H * pooling_W, input.cols());
 79 | 
 80 |     for(int k = 0; k < input.cols(); ++k){
 81 |         MatrixXd feature = input.col(k);
 82 |         feature.resize(img_H, img_W);
 83 |         feature.transposeInPlace(); // eigen默认是col major存储，因此在resize的时候是先填列再填行，但是我们实际的矩阵是先填行再填列的，需要转置后使用
 84 |         for(int i = 0; i < img_H; i += stride){
 85 |             for(int j = 0; j < img_W; j += stride){
 86 |                 int pooling_row;
 87 |                 int pooling_col;
 88 |                 float max_val = feature.block(i, j, filter_H, filter_W).maxCoeff(&pooling_row, &pooling_col);
 89 |                 pooling_res(i/stride * filter_W + j/stride, k) = max_val;
 90 |                 pooling_loc[i/stride * filter_H + j/stride][k] = pair<int, int>((i + pooling_row) * img_H + (j + pooling_col), k);
 91 |             }
 92 |         }
 93 |     }
 94 |     cout << "Pooling results:" << endl;
 95 |     cout << pooling_res << endl << endl;
 96 |     cout << "Pooling location:" << endl;
 97 |     for(int k = 0; k < pooling_loc.size(); ++k){
 98 |         for(int i = 0; i < pooling_loc[0].size(); ++i){
 99 |             cout << "(" << pooling_loc[k][i].first << "," << pooling_loc[k][i].second << "), ";
100 |         }
101 |         cout << endl;
102 |     }
103 | 
104 |     return std::move(pooling_res);
105 | }
106 | 
107 | void MaxPoolingPrime(const int img_H, const int img_W, const MatrixXd &input, const vector<vector<pair<int, int>>> &pooling_loc){
108 |     MatrixXd mat = MatrixXd::Zero(img_H * img_W, input.cols());
109 |     cout << "------------------------" << endl;
110 |     cout << "input diff:" << endl;
111 |     cout << input << endl;
112 |     int pooling_H = int(input.rows());
113 |     int pooling_W = int(input.cols());
114 |     for(int k = 0; k < input.cols(); ++k)
115 |         for(int i = 0; i < pooling_H; ++i)
116 |             for(int j = 0; j < pooling_W; ++j){
117 |                 mat(pooling_loc[i][j].first, pooling_loc[i][j].second) = input(i, j);
118 |             }
119 |     cout << "MaxPooling back diff:" << endl;
120 |     cout << mat << endl;
121 | }
122 | 
123 | //int main(){
124 | //    MatrixXd input_img(4, 4);
125 | //    input_img << 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16;
126 | //
127 | //    cout << "Original image:" << endl;
128 | //    cout << input_img << endl;
129 | //    vector<MatrixXd> kernels;
130 | //    int kernel_num = 4;
131 | //    for(int i = 0; i < kernel_num; ++i){
132 | //        MatrixXd kernel;
133 | //        kernel = MatrixXd::Random(3, 3);
134 | //        kernels.emplace_back(kernel);
135 | //    }
136 | //    MatrixXd feature_matrix = img2col2D(input_img, kernels, 3, 3, 1, 1);
137 | //    vector<vector<pair<int, int>>> pooling_loc;
138 | //    vector<pair<int, int>> temp(feature_matrix.cols());
139 | //    pooling_loc.resize(2 * 2, temp);
140 | //    cout << "Pooling image:" << endl;
141 | //    MatrixXd pooling_img = MaxPooling2D(feature_matrix,
142 | //                                        2, 2, 2,
143 | //                                        4, 4,
144 | //                                        pooling_loc);
145 | //    cout << pooling_img << endl;
146 | //    MatrixXd pooling_diff(2 * 2, 4);
147 | //    pooling_diff << 0.1, 0.5, 0.04, -0.5,
148 | //                    0.2, 0.6, 0.03, -0.2,
149 | //                    0.3, 0.7, 0.02, -0.4,
150 | //                    0.4, 0.8, 0.01, -0.7;
151 | //    MaxPoolingPrime(4, 4, pooling_diff, pooling_loc);
152 | //}


--------------------------------------------------------------------------------
/src/MatAutoDiff.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <string>
  3 | #include <unordered_set>
  4 | #include <map>
  5 | #include "../include/MatAutoDiff.h"
  6 | #include "../include/Conv.h"
  7 | #include <vector>
  8 | #include <Eigen/Dense>
  9 | #include <cstdlib>
 10 | #include <ctime>
 11 | #include <omp.h>
 12 | 
 13 | #define random(x) rand()%(x)
 14 | 
 15 | using namespace Eigen;
 16 | using namespace std;
 17 | 
 18 | Node Op::getNewNode() {
 19 |     Node newNode = Node();
 20 |     newNode.op = this;
 21 |     return newNode;
 22 | }
 23 | 
 24 | vector<MatrixXd> Op::compute(Node &node, vector<MatrixXd> &input_vals) {
 25 |     return vector<MatrixXd>();
 26 | }
 27 | 
 28 | vector<Node> Op::gradient(Node &node, Node &output_gradient) {
 29 |     return vector<Node>();
 30 | }
 31 | 
 32 | Node Placeholders::getNewNode() {
 33 |     Node newNode = Node();
 34 |     newNode.isPlaceHolder = true;
 35 |     newNode.op = this;
 36 |     return newNode;
 37 | }
 38 | 
 39 | vector<MatrixXd> Placeholders::compute(Node &node, vector<MatrixXd> &input_vals) {
 40 |     // input_vals[0]代表第一个数，input_vals[1]代表第二个数。
 41 |     // 这两个数可以是浮点数，也可以是矩阵。在这里是浮点数，并且存放在长度为1的vector中。
 42 |     vector<MatrixXd> res;
 43 |     return res;
 44 | }
 45 | 
 46 | vector<Node> Placeholders::gradient(Node &node, Node &output_gradient) {
 47 |     vector<Node> res;
 48 |     return res;
 49 | }
 50 | 
 51 | Placeholders placeholder_op = Placeholders();
 52 | 
 53 | Node OnesLikeOp::getNewNode(Node &nodeA) {
 54 |     Node newNode = Node();
 55 |     newNode.op = this;
 56 |     newNode.input.push_back(nodeA);
 57 |     newNode.isPlaceHolder = false;
 58 |     newNode.name = "Oneslike(" + nodeA.name + ")";
 59 |     int hash = 0;
 60 |     int i = 0;
 61 |     for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++)
 62 |         hash += newNode.name[i];
 63 |     newNode.hash_code = hash;
 64 |     return newNode;
 65 | }
 66 | 
 67 | vector<MatrixXd> OnesLikeOp::compute(Node &nodeA, vector<MatrixXd> &input_vals) {
 68 |     MatrixXd ones = MatrixXd::Ones(input_vals[0].rows(), input_vals[0].cols());
 69 |     vector<MatrixXd> res;
 70 |     res.push_back(ones);
 71 |     return res;
 72 | }
 73 | 
 74 | vector<Node> OnesLikeOp::gradient(Node &node, Node &output_gradient) {
 75 |     vector<Node> res;
 76 |     Node newNode = Node();
 77 |     newNode.input.push_back(node.input[0]);
 78 |     newNode.name = "Zeroslike(" + node.input[0].name + ")";
 79 |     int hash = 0;
 80 |     int i = 0;
 81 |     for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++)
 82 |         hash += newNode.name[i];
 83 |     newNode.hash_code = hash;
 84 |     res.push_back(newNode);
 85 |     return res;
 86 | }
 87 | 
 88 | OnesLikeOp ones_like_op = OnesLikeOp();
 89 | 
 90 | Node ZerosLikeOp::getNewNode(Node &nodeA) {
 91 |     Node newNode = Node();
 92 |     newNode.op = this;
 93 |     newNode.input.push_back(nodeA);
 94 |     newNode.name = "Zeroslike(" + nodeA.name + ")";
 95 |     int hash = 0;
 96 |     int i = 0;
 97 |     for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++)
 98 |         hash += newNode.name[i];
 99 |     newNode.hash_code = hash;
100 |     newNode.isPlaceHolder = false;
101 |     return newNode;
102 | }
103 | 
104 | vector<MatrixXd> ZerosLikeOp::compute(Node &nodeA, vector<MatrixXd> &input_vals) {
105 |     MatrixXd zeros = MatrixXd::Zero(input_vals[0].rows(), input_vals[0].cols());
106 |     vector<MatrixXd> res;
107 |     res.push_back(zeros);
108 |     return res;
109 | }
110 | 
111 | vector<Node> ZerosLikeOp::gradient(Node &node, Node &output_gradient) {
112 |     vector<Node> res;
113 |     Node newNode = Node();
114 |     newNode.input.push_back(node.input[0]);
115 |     newNode.name = "Zeroslike(" + node.input[0].name + ")";
116 |     int hash = 0;
117 |     int i = 0;
118 |     for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++)
119 |         hash += newNode.name[i];
120 |     newNode.hash_code = hash;
121 |     res.push_back(newNode);
122 |     return res;
123 | }
124 | 
125 | ZerosLikeOp zeros_like_op = ZerosLikeOp();
126 | 
127 | Node MatAddOp::getNewNode(Node &nodeA, Node &nodeB, bool trans_A, bool trans_B) {
128 |     Node newNode = Node();
129 |     newNode.op = this;
130 |     newNode.matmul_attr_trans_A = trans_A;
131 |     newNode.matmul_attr_trans_B = trans_B;
132 |     newNode.input.push_back(nodeA);
133 |     newNode.input.push_back(nodeB);
134 |     newNode.isPlaceHolder = false;
135 |     newNode.name = "MatAdd(" + nodeA.name + ", " + nodeB.name + ") ";
136 |     return newNode;
137 | }
138 | 
139 | vector<MatrixXd> MatAddOp::compute(Node &nodeA, vector<MatrixXd> &input_vals) {
140 |     if (nodeA.matmul_attr_trans_A) {
141 |         input_vals[0].transposeInPlace();
142 |     }
143 |     if (nodeA.matmul_attr_trans_B) {
144 |         input_vals[1].transposeInPlace();
145 |     }
146 |     MatrixXd res_mat = input_vals[0] + input_vals[1];
147 |     vector<MatrixXd> res;
148 |     res.push_back(res_mat);
149 |     return res;
150 | }
151 | 
152 | vector<Node> MatAddOp::gradient(Node &node, Node &output_gradient) {
153 |     vector<Node> res;
154 |     res.push_back(output_gradient);
155 |     res.push_back(output_gradient);
156 |     return res;
157 | }
158 | 
159 | MatAddOp matadd_op = MatAddOp();
160 | 
161 | Node MatMulOp::getNewNode(Node &nodeA, Node &nodeB, bool trans_A, bool trans_B) {
162 |     Node newNode = Node();
163 |     newNode.op = this;
164 |     newNode.matmul_attr_trans_A = trans_A;
165 |     newNode.matmul_attr_trans_B = trans_B;
166 |     newNode.input.push_back(nodeA);
167 |     newNode.input.push_back(nodeB);
168 |     newNode.isPlaceHolder = false;
169 |     newNode.name = "MatMul(" + nodeA.name + ", " + nodeB.name + ")";
170 |     int hash = 0;
171 |     int i = 0;
172 |     for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++)
173 |         hash += newNode.name[i];
174 |     newNode.hash_code = hash;
175 |     return newNode;
176 | }
177 | 
178 | vector<MatrixXd> MatMulOp::compute(Node &nodeA, vector<MatrixXd> &input_vals) {
179 |     MatrixXd res_mat;
180 |     omp_set_num_threads(4);
181 |     if (nodeA.matmul_attr_trans_A && nodeA.matmul_attr_trans_B) {
182 |         res_mat = input_vals[0].transpose() * input_vals[1].transpose();
183 |     } else if (!nodeA.matmul_attr_trans_A && nodeA.matmul_attr_trans_B) {
184 |         res_mat = input_vals[0] * input_vals[1].transpose();
185 |     } else if (nodeA.matmul_attr_trans_A && !nodeA.matmul_attr_trans_B) {
186 |         res_mat = input_vals[0].transpose() * input_vals[1];
187 |     } else {
188 |         res_mat = input_vals[0] * input_vals[1];
189 |     }
190 | #pragma omp parallel for
191 |     for(int i = 0; i < res_mat.cols(); ++i){
192 |         res_mat.col(i).normalize();
193 |     }
194 |     vector<MatrixXd> res;
195 |     res.push_back(res_mat);
196 |     return res;
197 | }
198 | 
199 | vector<Node> MatMulOp::gradient(Node &node, Node &output_gradient) {
200 |     vector<Node> res;
201 |     Node newNodeA = getNewNode(output_gradient, node.input[1], false, true);
202 |     Node newNodeB = getNewNode(node.input[0], output_gradient, true, false);
203 |     res.push_back(newNodeA);
204 |     res.push_back(newNodeB);
205 |     return res;
206 | }
207 | 
208 | MatMulOp matmul_op = MatMulOp();
209 | 
210 | Node ReluPrimeOp::getNewNode(Node &nodeA, Node &nodeB) {
211 |     Node newNode = Node();
212 |     newNode.op = this;
213 |     newNode.input.push_back(nodeA);
214 |     newNode.input.push_back(nodeB);
215 |     newNode.name = "ReluPrime(" + nodeA.name + ")";
216 |     int hash = 0;
217 |     int i = 0;
218 |     for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++)
219 |         hash += newNode.name[i];
220 |     newNode.hash_code = hash;
221 |     newNode.isPlaceHolder = false;
222 |     return newNode;
223 | }
224 | 
225 | vector<MatrixXd> ReluPrimeOp::compute(Node &nodeA, vector<MatrixXd> &input_vals) {
226 |     MatrixXd res_mat(input_vals[0].rows(), input_vals[0].cols());
227 |     for (int i = 0; i < input_vals[0].rows(); ++i)
228 |         for (int j = 0; j < input_vals[0].cols(); ++j) {
229 |             if (input_vals[0](i, j) <= 0) {
230 |                 res_mat(i, j) = 0.0;
231 |             } else res_mat(i, j) = 1.0;
232 |         }
233 |     vector<MatrixXd> res;
234 |     res.push_back(res_mat);
235 |     return res;
236 | }
237 | 
238 | vector<Node> ReluPrimeOp::gradient(Node &node, Node &output_gradient) {
239 |     return {}; // No need to implement.
240 | }
241 | 
242 | ReluPrimeOp relu_prime_op = ReluPrimeOp();
243 | 
244 | Node ReluOp::getNewNode(Node &nodeA) {
245 |     Node newNode = Node();
246 |     newNode.op = this;
247 |     newNode.input.push_back(nodeA);
248 |     newNode.name = "Relu(" + nodeA.name + ")";
249 |     int hash = 0;
250 |     int i = 0;
251 |     for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++)
252 |         hash += newNode.name[i];
253 |     newNode.hash_code = hash;
254 |     newNode.isPlaceHolder = false;
255 |     return newNode;
256 | }
257 | 
258 | vector<MatrixXd> ReluOp::compute(Node &nodeA, vector<MatrixXd> &input_vals) {
259 |     MatrixXd res_mat(input_vals[0].rows(), input_vals[0].cols());
260 | #pragma omp parallel for
261 |     for (int i = 0; i < input_vals[0].rows(); ++i)
262 |         for (int j = 0; j < input_vals[0].cols(); ++j) {
263 |             if (input_vals[0](i, j) <= 0) {
264 |                 res_mat(i, j) = 0.0;
265 |             } else res_mat(i, j) = input_vals[0](i, j);
266 |         }
267 |     vector<MatrixXd> res;
268 |     res.push_back(res_mat);
269 |     return res;
270 | }
271 | 
272 | vector<Node> ReluOp::gradient(Node &node, Node &output_gradient) {
273 |     Node newNode = relu_prime_op.getNewNode(node.input[0], output_gradient);
274 |     vector<Node> res;
275 |     res.push_back(newNode);
276 |     return res;
277 | }
278 | 
279 | ReluOp relu_op = ReluOp();
280 | 
281 | Node MaxPoolingPrimeOp::getNewNode(Node &nodeA, const int img_H, const int img_W) {
282 |     Node newNode = Node();
283 |     newNode.op = this;
284 |     newNode.input.push_back(nodeA);
285 |     newNode.name = "MaxPoolingPrime(" + nodeA.name + ")";
286 |     newNode.filter_W = nodeA.filter_W;
287 |     newNode.filter_H = nodeA.filter_H;
288 |     newNode.stride = nodeA.stride;
289 |     // 获得原有特征图的大小
290 |     newNode.img_H = img_H;
291 |     newNode.img_W = img_W;
292 |     int hash = 0;
293 |     int i = 0;
294 |     for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++)
295 |         hash += newNode.name[i];
296 |     newNode.hash_code = hash;
297 |     newNode.isPlaceHolder = false;
298 |     return newNode;
299 | }
300 | 
301 | vector<MatrixXd> MaxPoolingPrimeOp::compute(Node &nodeA, vector<MatrixXd> &input_vals){
302 |     MatrixXd mat = MatrixXd::Zero(nodeA.img_H * nodeA.img_W, input_vals[0].cols());
303 |     vector<MatrixXd> res;
304 |     int pooling_H = int(input_vals[0].rows());
305 |     int pooling_W = int(input_vals[0].cols());
306 |     for(int k = 0; k < pooling_W; ++k)
307 |         for(int i = 0; i < pooling_H; ++i)
308 |             for(int j = 0; j < pooling_W; ++j){
309 |                 mat(nodeA.pooling_loc[i][j].first, nodeA.pooling_loc[i][j].second) = input_vals[0](i, j);
310 |             }
311 |     res.emplace_back(mat);
312 |     return std::move(res);
313 | }
314 | 
315 | vector<Node> MaxPoolingPrimeOp::gradient(Node &node, Node &output_gradient){
316 |     return {}; // No need to implement.
317 | }
318 | 
319 | MaxPoolingPrimeOp maxpooling_prime_op = MaxPoolingPrimeOp();
320 | 
321 | Node MaxPoolingOp::getNewNode(Node &nodeA, const int filter_H, const int filter_W, const int stride) {
322 |     Node newNode = Node();
323 |     newNode.op = this;
324 |     newNode.input.push_back(nodeA);
325 |     newNode.name = "MaxPooling(" + nodeA.name + ")";
326 |     newNode.filter_W = filter_W;
327 |     newNode.filter_H = filter_H;
328 |     newNode.stride = stride;
329 |     // 保存原有特征图的大小
330 |     newNode.img_H = nodeA.img_H;
331 |     newNode.img_W = nodeA.img_W;
332 |     int hash = 0;
333 |     int i = 0;
334 |     for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++)
335 |         hash += newNode.name[i];
336 |     newNode.hash_code = hash;
337 |     newNode.isPlaceHolder = false;
338 |     return newNode;
339 | }
340 | 
341 | vector<MatrixXd> MaxPoolingOp::compute(Node &nodeA, vector<MatrixXd> &input_vals) {
342 |     vector<MatrixXd> res;
343 |     vector<pair<int, int>> temp(input_vals[0].cols());
344 |     nodeA.pooling_loc.resize(nodeA.filter_H * nodeA.filter_W, temp);
345 |     res.emplace_back(MaxPooling2D(input_vals[0], nodeA.filter_H,
346 |                                   nodeA.filter_W, nodeA.stride,
347 |                                   nodeA.img_H, nodeA.img_W,
348 |                                   nodeA.pooling_loc));
349 |     return std::move(res);
350 | }
351 | 
352 | vector<Node> MaxPoolingOp::gradient(Node &node, Node &output_gradient) {
353 |     Node newNode = maxpooling_prime_op.getNewNode(node.input[0], node.img_H, node.img_W);
354 |     vector<Node> res;
355 |     res.push_back(newNode);
356 |     return res;
357 | }
358 | 
359 | Node SoftmaxGradient::getNewNode(Node &nodeA, Node &nodeB) {
360 |     Node newNode = Node();
361 |     newNode.op = this;
362 |     newNode.input.push_back(nodeA);
363 |     newNode.input.push_back(nodeB);
364 |     newNode.isPlaceHolder = false;
365 |     newNode.name = "SoftmaxGradient(y_predict:" + nodeA.name + ", y_true:" + nodeB.name + ")";
366 |     int hash = 0;
367 |     int i = 0;
368 |     for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++)
369 |         hash += newNode.name[i];
370 |     newNode.hash_code = hash;
371 |     newNode.isPlaceHolder = false;
372 |     return newNode;
373 | }
374 | 
375 | vector<MatrixXd> SoftmaxGradient::compute(Node &nodeA, vector<MatrixXd> &input_vals) {
376 |     MatrixXd y_hat = input_vals[0]; // Predict
377 |     MatrixXd y = input_vals[1]; // Ground Truth
378 |     vector<MatrixXd> res_mat;
379 |     res_mat.push_back(y_hat - y);
380 |     return res_mat;
381 | }
382 | 
383 | vector<Node> SoftmaxGradient::gradient(Node &node, Node &output_gradient) {
384 |     return vector<Node>();
385 | }
386 | 
387 | SoftmaxGradient softmax_gradient_op = SoftmaxGradient();
388 | 
389 | Node SoftmaxOp::getNewNode(Node &nodeA, Node &nodeB) {
390 |     Node newNode = Node();
391 |     newNode.op = this;
392 |     newNode.input.push_back(nodeA);
393 |     newNode.input.push_back(nodeB);
394 |     newNode.isPlaceHolder = false;
395 |     newNode.name = "Softmax(" + nodeA.name + ")";
396 |     int hash = 0;
397 |     int i = 0;
398 |     for (hash = newNode.name.length(), i = 0; i < newNode.name.length(); i++)
399 |         hash += newNode.name[i];
400 |     newNode.hash_code = hash;
401 |     newNode.isPlaceHolder = false;
402 |     return newNode;
403 | }
404 | 
405 | vector<MatrixXd> SoftmaxOp::compute(Node &nodeA, vector<MatrixXd> &input_vals) {
406 |     MatrixXd m = input_vals[0].array().exp();
407 |     MatrixXd sum = m.colwise().sum();
408 |     MatrixXd res_mat(m.rows(), m.cols());
409 |     for (int i = 0; i < m.rows(); i++)
410 |         for (int j = 0; j < m.cols(); j++)
411 |             res_mat(i, j) = m(i, j) / sum(0, j);
412 |     vector<MatrixXd> res;
413 |     res.push_back(res_mat);
414 |     nodeA.res = res_mat;
415 | //    cout << "Softmax res:" << endl;
416 | //    cout << res_mat << endl;
417 |     return res;
418 | }
419 | 
420 | vector<Node> SoftmaxOp::gradient(Node &node, Node &output_gradient) {
421 |     Node newNodeA = softmax_gradient_op.getNewNode(node, node.input[1]);
422 |     Node newNodeB = zeros_like_op.getNewNode(node.input[1]);
423 |     vector<Node> res_vec;
424 |     res_vec.push_back(newNodeA);
425 |     res_vec.push_back(newNodeB);
426 |     return res_vec;
427 | }
428 | 
429 | SoftmaxOp softmax_op = SoftmaxOp();
430 | 
431 | Node Variable(string var_name) {
432 |     Node placeholder_node = placeholder_op.getNewNode();
433 |     placeholder_node.name = var_name;
434 |     int hash = 0;
435 |     int i = 0;
436 |     for (hash = var_name.length(), i = 0; i < var_name.length(); i++)
437 |         hash += var_name[i];
438 |     placeholder_node.hash_code = hash;
439 |     return placeholder_node;
440 | }
441 | 
442 | 
443 | Node Node::operator*(Node &nodeB) {
444 |     Node nodeA = *this;
445 |     Node newNode = matmul_op.getNewNode(nodeA, nodeB, false, false);
446 |     return newNode;
447 | }
448 | 
449 | Node Node::operator+(Node &nodeB) {
450 |     Node nodeA = *this;
451 |     Node newNode = matadd_op.getNewNode(nodeA, nodeB, false, false);
452 |     return newNode;
453 | }
454 | 
455 | void topo_sort_dfs(Node &node, std::unordered_set<int> &visited, vector<Node> &topo_order) {
456 |     if (visited.count(node.hash_code)) {
457 |         return;
458 |     }
459 |     visited.emplace(node.hash_code);
460 |     for (Node &in_node: node.input) {
461 |         topo_sort_dfs(in_node, visited, topo_order);
462 |     }
463 |     topo_order.push_back(node);
464 | }
465 | 
466 | void find_topo_sort(vector<Node> &node_list, vector<Node> &topo_order) {
467 |     std::unordered_set<int> visited;
468 |     for (Node &node: node_list) {
469 |         topo_sort_dfs(node, visited, topo_order);
470 |     }
471 | }
472 | 
473 | Node sum_node_list(vector<Node> &node_list) {
474 |     Node res = node_list[0];
475 |     int i = 1;
476 |     int size = node_list.size();
477 |     for (i = 1; i < size; i++) {
478 |         res = res + node_list[i];
479 |     }
480 |     return res;
481 | }
482 | 
483 | 
484 | vector<Node> gradients(Node &output_node, vector<Node> &node_list) {
485 |     map<int, vector<Node>> node_to_output_grads_list;
486 |     vector<Node> ones;
487 |     Node out_ones = ones_like_op.getNewNode(output_node);
488 |     ones.push_back(out_ones);
489 |     node_to_output_grads_list[output_node.hash_code] = ones;
490 | 
491 |     map<int, Node> node_to_output_grad;
492 | 
493 |     vector<Node> output_node_list;
494 |     output_node_list.push_back(output_node);
495 |     vector<Node> reverse_topo_sort;
496 |     find_topo_sort(output_node_list, reverse_topo_sort);
497 |     reverse(reverse_topo_sort.begin(), reverse_topo_sort.end());
498 | 
499 |     for (Node &node: reverse_topo_sort) {
500 |         Node grad = sum_node_list(node_to_output_grads_list[node.hash_code]);
501 |         node_to_output_grad[node.hash_code] = grad;
502 |         vector<Node> input_grads = node.op->gradient(node, grad); // 计算当前节点前驱的梯度
503 |         for (int i = 0; i < node.input.size(); i++) {
504 |             node_to_output_grads_list[node.input[i].hash_code] = node_to_output_grads_list[node.input[i].hash_code];
505 |             node_to_output_grads_list[node.input[i].hash_code].push_back(input_grads[i]);
506 |         }
507 |     }
508 |     vector<Node> grad_node_list;
509 |     for (Node &node: node_list) {
510 |         grad_node_list.push_back(node_to_output_grad[node.hash_code]);
511 |     }
512 |     return grad_node_list;
513 | }
514 | 
515 | vector<string> split(const string &s, char delim) {
516 |     stringstream ss(s);
517 |     string item;
518 |     vector<string> tokens;
519 |     while (getline(ss, item, delim)) {
520 |         tokens.push_back(item);
521 |     }
522 |     return tokens;
523 | }
524 | 
525 | vector<float> operator/(const vector<float> &m2, const float m1) {
526 | 
527 |     /*  Returns the product of a float and a vectors (elementwise multiplication).
528 |      Inputs:
529 |      m1: float
530 |      m2: vector
531 |      Output: vector, m1 * m2, product of two vectors m1 and m2
532 |      */
533 | 
534 |     const unsigned long VECTOR_SIZE = m2.size();
535 |     vector<float> product(VECTOR_SIZE);
536 | 
537 |     for (unsigned i = 0; i != VECTOR_SIZE; ++i) {
538 |         product[i] = m2[i] / m1;
539 |     };
540 | 
541 |     return product;
542 | }
543 | 
544 | void load_data(vector<float> &X_train, vector<float> &y_train, string src) {
545 |     string line;
546 |     vector<string> line_v;
547 |     ifstream myfile(src);
548 |     if (myfile.is_open()) {
549 |         while (getline(myfile, line)) {
550 |             line_v = split(line, '\t');
551 |             int digit = strtof((line_v[0]).c_str(), 0);
552 |             for (unsigned i = 0; i < 10; ++i) {
553 |                 if (i == digit) {
554 |                     y_train.emplace_back(1.);
555 |                 } else y_train.emplace_back(0.);
556 |             }
557 | 
558 |             int size = static_cast<int>(line_v.size());
559 |             for (unsigned i = 1; i < size; ++i) {
560 |                 X_train.emplace_back(strtof((line_v[i]).c_str(), 0));
561 |             }
562 |         }
563 |         X_train = X_train / 255.0;
564 |         myfile.close();
565 |     }
566 | }
567 | 
568 | void read_batch_data(MatrixXd &input_val, MatrixXd &y_true_val, vector<float> &X_train, vector<float> &y_train,
569 |                      int batch_size) {
570 | 
571 |     int rand_indx = random(42000 - batch_size);
572 |     for (unsigned i = rand_indx * 784; i < (rand_indx + batch_size) * 784; i += 784) {
573 |         for (unsigned j = 0; j < 784; ++j) {
574 |             input_val(i / 784 - rand_indx, j) = X_train[i + j];
575 |         }
576 |     }
577 |     for (unsigned i = rand_indx * 10; i < (rand_indx + batch_size) * 10; i += 10) {
578 |         for (unsigned j = 0; j < 10; ++j) {
579 |             y_true_val(i / 10 - rand_indx, j) = y_train[i + j];
580 |         }
581 |     }
582 | }
583 | 
584 | 
585 | Executor::Executor(vector<Node> &eval_node_list) {
586 |     this->node_list = eval_node_list;
587 | }
588 | 
589 | vector<MatrixXd> Executor::run(map<int, MatrixXd> &feed_dic) {
590 |     vector<Node> topo_order;
591 |     find_topo_sort(this->node_list, topo_order);
592 |     vector<MatrixXd> input_vals; // 输入
593 |     vector<MatrixXd> output_vals; // 输出
594 |     for (Node node: topo_order) {
595 |         if (node.isPlaceHolder) {
596 |             continue;
597 |         }
598 |         input_vals.clear();
599 |         for (Node in_node: node.input) {
600 |             input_vals.push_back(feed_dic[in_node.hash_code]);
601 |         }
602 |         auto res = node.op->compute(node, input_vals);
603 |         feed_dic[node.hash_code] = res[0];
604 |     }
605 |     for (Node node: this->node_list) {
606 |         output_vals.push_back(feed_dic[node.hash_code]);
607 |     }
608 |     return output_vals;
609 | }
610 | 
611 | 


--------------------------------------------------------------------------------
/src/Model.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include "../include/Model.h"
  3 | #include <Eigen/Dense>
  4 | #include <utility>
  5 | 
  6 | using namespace Eigen;
  7 | using namespace std;
  8 | 
  9 | extern ReluOp relu_op;
 10 | extern SoftmaxOp softmax_op;
 11 | 
 12 | 
 13 | Layer::Layer(int layer_shape, const string& layer_name, Layer &previous_layer, const string& activation){
 14 |     // Dense layer
 15 |     this->shape = layer_shape;
 16 |     this->input_node = Variable(std::move(layer_name + " weights"));
 17 |     this->layer = previous_layer.output * input_node;
 18 |     if(activation == "relu"){
 19 |         this->output = relu_op.getNewNode(this->layer);
 20 |     }else{
 21 |         this->output = this->layer;
 22 |     }
 23 |     matrix_weight = MatrixXd::Random(previous_layer.shape,layer_shape);
 24 | }
 25 | 
 26 | Layer::Layer(int input_shape, string input_name, int batch_size){
 27 |     // Input layer
 28 |     this->shape = input_shape;
 29 |     input = Variable(std::move(input_name));
 30 |     this->input_node = this->input;
 31 |     this->layer = this->input;
 32 |     this->output = this->input;
 33 |     matrix_weight = MatrixXd::Random(batch_size, input_shape);
 34 | }
 35 | 
 36 | Layer::Layer(int layer_shape, const string& layer_name, Layer &previous_layer, int batch_size){
 37 |     // Output layer
 38 |     this->shape = layer_shape;
 39 |     this->y_true = Variable("Ground Truth");
 40 |     this->input_node = Variable(std::move(layer_name + " weights"));
 41 |     this->layer = previous_layer.output * input_node;
 42 |     this->output = softmax_op.getNewNode(this->layer, y_true);
 43 |     matrix_weight = MatrixXd::Random(previous_layer.shape,layer_shape);
 44 |     y_true_val = MatrixXd::Random(batch_size, layer_shape);
 45 | }
 46 | 
 47 | 
 48 | Model::Model(int batch_size_, int training_iter_){
 49 |     this->lr = .1/batch_size_;
 50 |     this->batch_size = batch_size_;
 51 |     this->training_iter = training_iter_;
 52 | }
 53 | 
 54 | void Model::Dense(int layer_shape, const string& layer_name, const string& activation){
 55 |     if(sequential.empty()){
 56 |         cout << "Need input layer!" << endl;
 57 |         return;
 58 |     }
 59 |     Layer new_Layer = Layer(layer_shape, layer_name, sequential.back(), activation);
 60 |     sequential.emplace_back(new_Layer);
 61 | }
 62 | 
 63 | void Model::Input(int input_shape, string input_name){
 64 |     if(!sequential.empty()){
 65 |         cout << "Model not empty!" << endl;
 66 |         sequential.clear();
 67 |         cout << "Model has been cleared!" << endl;
 68 |     }
 69 |     sequential.emplace_back(Layer(input_shape, std::move(input_name), batch_size));
 70 | }
 71 | 
 72 | void Model::Output(int layer_shape, const string& layer_name){
 73 |     Layer new_Layer = Layer(layer_shape, layer_name, sequential.back(), batch_size);
 74 |     sequential.emplace_back(new_Layer);
 75 | }
 76 | 
 77 | void Model::compile(){
 78 |     cout << "------------------------------------" << endl;
 79 |     cout << "Building graph..." << endl;
 80 |     vector<Node> input_nodes;
 81 |     for(Layer &layer : sequential){
 82 |         // 将模型中每一层的节点送入
 83 |         input_nodes.emplace_back(layer.input_node);
 84 |         feed_dic[layer.input_node.hash_code] = layer.matrix_weight;
 85 |     }
 86 |     // 将输出层的真实值节点送入
 87 |     input_nodes.emplace_back(sequential.back().y_true);
 88 |     grads = gradients(sequential.back().output, input_nodes);
 89 |     for(int i = 0; i < grads.size(); i++){
 90 |         sequential[i].grad_node_hash = grads[i].hash_code;
 91 |     }
 92 |     cout << "Build graph done..." << endl;
 93 |     cout << "------------------------------------" << endl;
 94 |     cout << "Building Executor..." << endl;
 95 |     exe_list.push_back(sequential.back().output);
 96 |     exe_list.insert(exe_list.end(), grads.begin(), grads.end());
 97 |     executor = Executor(exe_list);
 98 |     cout << "Build Executor done..." << endl;
 99 | }
100 | 
101 | void Model::loadData(vector<float> &X_train_, vector<float> y_train_){
102 |     this->X_train = X_train_;
103 |     this->y_train = y_train_;
104 | }
105 | 
106 | void Model::run(){
107 |     int iter_;
108 |     cout << "------------------------------------" << endl;
109 |     cout << "START TRAINING" << endl;
110 |     cout << "------------------------------------" << endl;
111 |     for(iter_ = 0; iter_ < training_iter; iter_++) {
112 |         // Read training data
113 |         read_batch_data(sequential.front().matrix_weight, sequential.back().y_true_val, X_train, y_train, batch_size);
114 |         feed_dic[sequential.front().input_node.hash_code] = sequential.front().matrix_weight;
115 |         feed_dic[sequential.back().y_true.hash_code] = sequential.back().y_true_val;
116 | 
117 |         // Train
118 |         vector<MatrixXd> res = executor.run(feed_dic);
119 | 
120 |         // Weight update
121 |         for (int i = 1; i < sequential.size() - 1; i++) {
122 |             feed_dic[sequential[i].input_node.hash_code] -= lr * feed_dic[sequential[i].grad_node_hash];
123 |         }
124 | 
125 |         // Loss
126 |         if (iter_ % 1 == 0) {
127 |             MatrixXd loss_m = feed_dic[sequential.back().output.hash_code] - feed_dic[sequential.back().y_true.hash_code];
128 |             cout << "Iteration: " << iter_ << ", Loss: " << loss_m.array().square().sum() / (batch_size * 10) << "\r" << flush;
129 |         }
130 |     }
131 | }
132 | 
133 | 


--------------------------------------------------------------------------------