├── README.md
├── include
    ├── BranchNode.h
    ├── ComputeGraph.h
    ├── Graph.h
    ├── LoopNode.h
    ├── Node.h
    ├── OperatorNode.h
    ├── Optimizer.h
    ├── Tensor.h
    ├── VirtualGraph.h
    ├── VirtualNode.h
    ├── cuda
    │   └── cuda_lib.h
    ├── multi_thread
    │   ├── matrix_task.h
    │   ├── ring_buffer.h
    │   └── thread_pool.h
    ├── op_node
    │   ├── AbsSum.h
    │   ├── Add.h
    │   ├── Bias.h
    │   ├── Dropout.h
    │   ├── Input.h
    │   ├── Minus.h
    │   ├── Mult.h
    │   ├── Parameter.h
    │   ├── Sigmoid.h
    │   └── SquareSum.h
    └── optimizer
    │   └── Adadelta.h
├── makefile
├── makefile.gpu
├── obj
    └── empty
├── src
    ├── BranchNode.cpp
    ├── ComputeGraph.cpp
    ├── Graph.cpp
    ├── LoopNode.cpp
    ├── Node.cpp
    ├── OperatorNode.cpp
    ├── Optimizer.cpp
    ├── Tensor.cpp
    ├── VirtualGraph.cpp
    ├── VirtualNode.cpp
    ├── cuda
    │   ├── Tensor.cpp
    │   └── cuda_lib.cu
    ├── multi_thread
    │   ├── matrix_task.cpp
    │   ├── ring_buffer.cpp
    │   └── thread_pool.cpp
    ├── op_node
    │   ├── AbsSum.cpp
    │   ├── Add.cpp
    │   ├── Bias.cpp
    │   ├── Dropout.cpp
    │   ├── Input.cpp
    │   ├── Minus.cpp
    │   ├── Mult.cpp
    │   ├── Parameter.cpp
    │   ├── Sigmoid.cpp
    │   └── SquareSum.cpp
    └── optimizer
    │   └── Adadelta.cpp
└── unit_test
    ├── graph_test.cpp
    ├── operatorNode_test.cpp
    ├── rnn_test.cpp
    ├── tensor_test.cpp
    └── xor_test.cpp


/README.md:
--------------------------------------------------------------------------------
 1 | # automatic-differentiation-framework
 2 | 一个支持控制流的自动求导框架
 3 | ## 项目结构
 4 | * src/目录下是主要源代码实现，包括计算图，虚拟图等主要算法
 5 | * src/op_node是各种计算节点的实现
 6 | * unit_test/目录下是各个模块的单元测试，主要包括一个训练判断xor操作的简单神经网络和一个训练8位二进制数字加法的循环神经网络rnn。rnn的实现利用了框架的控制流机制
 7 | 
 8 | ## 总览
 9 | 自动求导是简化神经网络模型实现的一个重要机制，使用框架实现神经网络主要有以下几步：
10 | 
11 | 1. 使用者通过把多个基本的运算节点组织成一张计算图;
12 | 2. 框架对这张计算图进行拓扑排序，并依次调用运算节点的运算函数op()从而实现前向传播；
13 | 3. 然后框架再对这张计算图进行逆拓扑排序，并依次调用运算节点的梯度计算函数grad_op()从而实现反向传播。
14 | 
15 | 运算节点操作的数据均为张量Tensor
16 | 
17 | ## 控制流
18 | 框架为了支持控制流，引入了虚拟图的概念。所谓控制流即两种虚拟图的节点，包括循环Loop和分支Branch。
19 | 
20 | 用户使用虚拟图构造了一个神经网络的蓝图，然后只有在实际运行的时候某些节点才能决定是否走当前分支。通过运行虚拟图构造出实际的计算图，
21 | 然后在计算图上进行反向传播，从而训练模型。
22 | 
23 | 由于引入了循环，会在逻辑上为虚拟图带来“环”，为了避免“环”的出现造成虚拟图无法进行拓扑排序，本框架将Loop循环节点看做一个子图，其中只包含一个循环。
24 | 相当于把原图按照循环划分成了多个子图，子图可以嵌套，然后把子图看成一个广义节点，则整体上的虚拟图将不出现循环，从而可以进行前向传播。
25 | 
26 | ## 其他特性
27 | 目前支持普通SGD优化器和Adadelta优化器，实现了dropout的运算节点
28 | 
29 | ## CUDA支持
30 | 本项目在2018年1月31号更新了对于CUDA的支持，从而支持在安装了CUDA的电脑上使用显卡加速。但是由于项目中例子比较简单，而且显卡计算优化尚不到位，导致小数据量情况下计算速度并不理想，不如cpu计算速度快。
31 | 
32 | cuda在本项目中主要用于优化矩阵运算，在src/cuda/cuda_lib.cu文件下，cuda计算过程已经使用share memory，block内线程同步等方式加以优化。
33 | 
34 | ## 博客地址
35 | 下面这个博客花了5节介绍了整个框架的大体思路。
36 | 
37 | https://www.jianshu.com/p/4c2032c685dc
38 | 


--------------------------------------------------------------------------------
/include/BranchNode.h:
--------------------------------------------------------------------------------
 1 | #ifndef BRANCHNODE_H_
 2 | #define BRANCHNODE_H_
 3 | #include "Node.h"
 4 | #include "Graph.h"
 5 | class BranchNode: public Node {
 6 |     public:
 7 |         // 当BrancnNode作为循环起始的一部分时，有时需要BranchNode所在的LoopNode提供初始化的计算节点名字，从到实现循环级联
 8 |         std::string m_dep_op_node_name;
 9 |         Node* (*choose_node) (int idx, Graph* compute_graph, BranchNode* branch_node);
10 |         BranchNode (std::string type, std::string id, Node* (*func) (int, Graph*, BranchNode*));
11 | };
12 | #endif
13 | 


--------------------------------------------------------------------------------
/include/ComputeGraph.h:
--------------------------------------------------------------------------------
 1 | #ifndef COMPUTEGRAPH_H_
 2 | #define COMPUTEGRAPH_H_
 3 | #include "Graph.h"
 4 | #include "Node.h"
 5 | #include "Optimizer.h"
 6 | #include <vector>
 7 | class ComputeGraph: public Graph {
 8 |     protected:
 9 |         int m_need_release_tensor_flag;
10 |         void release_tensor ();
11 |     public:
12 |         ComputeGraph ();
13 |         Optimizer* m_optimizer;
14 |         void forward_propagation (std::vector<Node*> &result_list);
15 |         void back_propagation ();
16 |         ~ComputeGraph ();// 并不释放其中包含的计算节点的内存空间，用于支持动态计算图
17 | };
18 | #endif
19 | 


--------------------------------------------------------------------------------
/include/Graph.h:
--------------------------------------------------------------------------------
 1 | #ifndef GRAPH_H_
 2 | #define GRAPH_H_
 3 | #include "Node.h"
 4 | #include <unordered_map>
 5 | #include <unordered_set>
 6 | #include <vector>
 7 | #include <queue>
 8 | class Graph {
 9 |     protected:
10 |         std::unordered_map<std::string, Node*> m_node_map;// 计算图中节点字典
11 |         int m_need_build_reverse_graph_flag;
12 |         std::unordered_map<std::string, std::vector<Node*> > m_reverse_table;// 计算图的转置图
13 |         void build_reverse_graph ();// 构建转置图
14 |     public:
15 |         Graph ();
16 |         std::unordered_map<std::string, std::vector<Node*> > m_adj_table;// 计算图邻接表
17 |         void add_node (std::string parent_name, Node* node);// 向计算图中添加节点
18 |         Node* get_node (std::string name);
19 |         void build_subgraph (std::vector<Node*> &endnode_list);// 根据终止节点列表构造出子图
20 |         void topological_sort (std::unordered_map<std::string, std::vector<Node*> > &adj_table, std::vector<Node*> &result);// 拓扑排序
21 |         void get_endnode (std::vector<Node*> &endNode_list);// 获取转置图中没有前驱的节点
22 |         virtual ~Graph ();// 析构函数
23 | };
24 | #endif
25 | 


--------------------------------------------------------------------------------
/include/LoopNode.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOOPNODE_H_
 2 | #define LOOPNODE_H_
 3 | #include "Node.h"
 4 | #include "VirtualGraph.h"
 5 | class LoopNode: public Node {
 6 |     public:
 7 |         VirtualGraph* m_sub_vgraph;
 8 |         Node* m_end_compute_node;// 该循环节点最终的输出节点
 9 |         // 初始化循环，为循环子图中的一些节点补上节点依赖，依赖的节点来自于LoopNode所依赖的节点
10 |         // 补上依赖的主要方式是把依赖的计算节点的名字加入到当前LoopNode中的子虚拟图的起始BranchNode中
11 |         void (*init) (LoopNode* loop_node);
12 |         int (*condition) (Graph* compute_graph, int idx);// 条件成立则返回1，否则返回0
13 |         LoopNode (std::string type, std::string id, void (*func1) (LoopNode*), int (*func2) (Graph*, int));
14 |         virtual void inner_loop (Graph* compute_graph);// 内循环
15 |         virtual ~LoopNode ();
16 | };
17 | #endif
18 | 


--------------------------------------------------------------------------------
/include/Node.h:
--------------------------------------------------------------------------------
 1 | #ifndef NODE_H_
 2 | #define NODE_H_
 3 | #include <string>
 4 | #include <vector>
 5 | class Node {
 6 |     public:
 7 |         std::vector<std::string> m_name;
 8 |         std::vector<Node*> m_parents;// 依赖节点列表
 9 |         int m_invisible;// 0可见，1不可见
10 |         
11 |         virtual std::string get_name ();
12 |         Node (std::string type, std::string id);
13 |         virtual ~Node ();
14 | };
15 | #endif
16 | 


--------------------------------------------------------------------------------
/include/OperatorNode.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPERATORNODE_H_
 2 | #define OPERATORNODE_H_
 3 | #include "Node.h"
 4 | #include "Tensor.h"
 5 | #include <string>
 6 | class OperatorNode: public Node {
 7 |     protected:
 8 |         void chain_rule (Tensor* grad, int parent_idx);
 9 |     public:
10 |         Tensor* m_output;
11 |         Tensor* m_sum_grad;
12 |         OperatorNode (std::string type, std::string id, std::string idx);
13 |         virtual void op ();
14 |         virtual void grad_op ();
15 |         virtual ~OperatorNode ();
16 |         virtual void release_tensor ();
17 | };
18 | #endif
19 | 


--------------------------------------------------------------------------------
/include/Optimizer.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPTIMIZER_H_
 2 | #define OPTIMIZER_H_
 3 | #include <vector>
 4 | #include "Node.h"
 5 | class Optimizer {
 6 |     protected:
 7 |         float m_a;
 8 |     public:
 9 |         Optimizer (float a);
10 |         virtual void optimize (std::vector<Node*> &topo_results);
11 |         virtual ~Optimizer ();
12 | };
13 | #endif
14 | 


--------------------------------------------------------------------------------
/include/Tensor.h:
--------------------------------------------------------------------------------
 1 | #ifndef TENSOR_H_
 2 | #define TENSOR_H_
 3 | #include <vector>
 4 | class Tensor {
 5 |     public:
 6 |         std::vector<int> m_shape;
 7 |         int m_size;
 8 |         float* m_tensor;
 9 |         Tensor (std::vector<int> shape, int need_init = 1);
10 |         Tensor (std::vector<int> shape, float data[]);
11 |         float get_value (std::vector<int> idxs);// 根据各个维度的下标获取值
12 |         void set_value (std::vector<int> idxs, float value);// 设置某坐标下的值
13 |         Tensor* matrix_mult (Tensor* tensor);// 二维矩阵乘法
14 |         Tensor* scalar_mult (float scalar);// 标量乘法
15 |         void scalar_acc_mult (float scalar);// 标量累乘
16 |         Tensor* element_mult (Tensor* tensor);// 逐元素相乘
17 |         float element_square_sum ();// 元素平方和
18 |         float element_abs_sum ();// 元素绝对值和
19 |         void element_square ();// 逐元素平方
20 |         void add (Tensor* tensor, Tensor* result);// 累加
21 |         Tensor* add (Tensor* tensor);// 普通加法
22 |         void init ();
23 |         void display ();
24 | 
25 |         ~Tensor ();
26 | };
27 | #endif
28 | 


--------------------------------------------------------------------------------
/include/VirtualGraph.h:
--------------------------------------------------------------------------------
 1 | #ifndef VIRTUALGRAPH_H_
 2 | #define VIRTUALGRAPH_H_
 3 | #include "Graph.h"
 4 | #include "ComputeGraph.h"
 5 | class VirtualGraph: public Graph {
 6 |     public:
 7 |         Node* build_compute_graph (Graph* compute_graph, int idx = 0);
 8 |         ~VirtualGraph ();
 9 | };
10 | #endif
11 | 


--------------------------------------------------------------------------------
/include/VirtualNode.h:
--------------------------------------------------------------------------------
 1 | #ifndef VIRTUALNODE_H_
 2 | #define VIRTUALNODE_H_
 3 | #include "op_node/Input.h"
 4 | #include "Node.h"
 5 | #include "Tensor.h"
 6 | #include "OperatorNode.h"
 7 | #include "Graph.h"
 8 | #include <string>
 9 | #include <unordered_map>
10 | class VirtualNode: public Node {
11 |     public:
12 |         Tensor* m_data;// 记录一些必要的数据，比如虚拟节点是dropout节点时所需要的filter
13 |         std::vector<Tensor*> m_input_data;// 原始输入数据缓存
14 |         std::unordered_map<std::string, Node*> m_op_node_map;// 缓存这个虚拟节点生成计算节点
15 |         void (*input_op) (Input* input);// Input计算节点的数据输入函数
16 |         int m_share_parameter;
17 |         float m_keep_rate;
18 |         VirtualNode (std::string type, std::string id, int share_parameter = 0, float keep_rate = 0.5);
19 |         void get_parents_op_nodes (int idx, Graph* compute_graph, std::vector<Node*> &node_list);
20 | 
21 |         // 根据虚拟节点的名字和内循环下标idx，确定生成的计算节点。如果在m_op_node_map中已经存在计算节点了则不重复生成计算节点，用于支持动态计算图
22 |         Node* get_op_node (int idx);
23 |         ~VirtualNode ();// 会释放该虚拟节点生成的计算节点的内存空间
24 | };
25 | #endif
26 | 


--------------------------------------------------------------------------------
/include/cuda/cuda_lib.h:
--------------------------------------------------------------------------------
 1 | #ifndef CUDALIB_H_
 2 | #define CUDALIB_H_
 3 | #define BLOCK_SIZE 32
 4 | #define GRID_SIZE 32
 5 | void cuda_matrix_mult (float* A, float* B, float* C, int a_row, int a_col, int b_row, int b_col);
 6 | void cuda_tensor_add (float* A, float* B, float* C, int size);
 7 | void cuda_scalar_tensor_mult (float* A, float* result, float s, int size);
 8 | void cuda_element_square (float* A, int size);
 9 | float cuda_element_square_sum (float* A, int size);
10 | float cuda_element_abs_sum (float* A, int size);
11 | void cuda_element_mult (float* A, float* B, float* C, int size);
12 | #endif
13 | 


--------------------------------------------------------------------------------
/include/multi_thread/matrix_task.h:
--------------------------------------------------------------------------------
 1 | #ifndef MATRIX_H_
 2 | #define MATRIX_H_
 3 | #include "thread_pool.h"
 4 | #include "../Tensor.h"
 5 | class matrix_mult_task: public task {
 6 |     private:
 7 |         Tensor* m_A;
 8 |         Tensor* m_B;
 9 |         Tensor* m_C;
10 |         int m_a_idx;// A矩阵的行号
11 |         int m_b_idx;// b矩阵的列号
12 |     public:
13 |         matrix_mult_task (Tensor* A, Tensor* B, Tensor* C, int a_idx, int b_idx);
14 |         void run ();
15 | };
16 | class matrix_add_task: public task {
17 |     private:
18 |         Tensor* m_A;
19 |         Tensor* m_B;
20 |         Tensor* m_C;
21 |         int m_thread_id;// 当前任务所在的线程id
22 |         int m_thread_num;// 一共有多少线程
23 |     public:
24 |         matrix_add_task (Tensor* A, Tensor* B, Tensor* C, int thread_id, int thread_num);
25 |         void run ();
26 | };
27 | class matrix_scalar_mult_task: public task {
28 |     private:
29 |         Tensor* m_A;
30 |         float m_scalar;
31 |         Tensor* m_C;
32 |         int m_thread_id;
33 |         int m_thread_num;
34 |     public:
35 |         matrix_scalar_mult_task (Tensor* A, float scalar, Tensor* C, int thread_id, int thread_num);
36 |         void run ();
37 | };
38 | #endif
39 | 


--------------------------------------------------------------------------------
/include/multi_thread/ring_buffer.h:
--------------------------------------------------------------------------------
 1 | #ifndef RINGBUFFER_H_
 2 | #define RINGBUFFER_H_
 3 | #include <pthread.h>
 4 | class ring_buffer {
 5 |     private:
 6 |         int m_dequeue_ptr;
 7 |         int m_enqueue_ptr;
 8 |         int m_size;
 9 |         void** m_buffer;
10 |         pthread_mutex_t m_dequeue_lock;// 多消费者锁
11 |         pthread_mutex_t m_enqueue_lock;// 多生产者锁
12 |     public:
13 |         ring_buffer (int size);
14 |         int is_full ();
15 |         int is_empty ();
16 |         int get_element (void** data);
17 |         int add_element (void* data);
18 |         ~ring_buffer ();
19 | };
20 | #endif
21 | 


--------------------------------------------------------------------------------
/include/multi_thread/thread_pool.h:
--------------------------------------------------------------------------------
 1 | #ifndef THREAD_POOL_H_
 2 | #define THREAD_POOL_H_
 3 | #include "ring_buffer.h"
 4 | #include <pthread.h>
 5 | #include <vector>
 6 | class task {
 7 |     public:
 8 |         virtual void run ();
 9 | };
10 | 
11 | void *get_task (void *arg);
12 | class thread_pool {
13 |     protected:
14 |         thread_pool ();
15 |     private:
16 |         static thread_pool* instance;
17 |         ring_buffer** m_task_buffers;// 每个worker独享一个ring_buffer
18 |         pthread_t* m_workers;
19 |         int m_idx;
20 |     public:
21 |         int m_worker_num;
22 |         static thread_pool* get_instance ();
23 |         void add_job (task* t_task);
24 |         void add_job_list (std::vector<task*> job_list);
25 |         ~thread_pool ();
26 | };
27 | #endif
28 | 


--------------------------------------------------------------------------------
/include/op_node/AbsSum.h:
--------------------------------------------------------------------------------
 1 | #ifndef ABSSUM_H_
 2 | #define ABSSUM_H_
 3 | #include "../OperatorNode.h"
 4 | class AbsSum: public OperatorNode {
 5 |     public:
 6 |         AbsSum (std::string type, std::string id, std::string idx);
 7 |         void op ();
 8 |         void grad_op ();
 9 |         ~AbsSum ();
10 |         void release_tensor ();
11 | };
12 | #endif
13 | 


--------------------------------------------------------------------------------
/include/op_node/Add.h:
--------------------------------------------------------------------------------
 1 | #ifndef ADD_H_
 2 | #define ADD_H_
 3 | #include "../OperatorNode.h"
 4 | class Add: public OperatorNode {
 5 |     public:
 6 |         Add (std::string type, std::string id, std::string idx);
 7 |         void op ();
 8 |         void grad_op ();
 9 |         ~Add ();
10 |         void release_tensor ();
11 | };
12 | #endif
13 | 


--------------------------------------------------------------------------------
/include/op_node/Bias.h:
--------------------------------------------------------------------------------
 1 | #ifndef BIAS_H_
 2 | #define BIAS_H_
 3 | #include "../OperatorNode.h"
 4 | class Bias: public OperatorNode {
 5 |     public:
 6 |         Bias (std::string type, std::string id, std::string idx);
 7 |         void op ();
 8 |         void grad_op ();
 9 |         ~Bias ();
10 |         void release_tensor ();
11 | };
12 | #endif
13 | 


--------------------------------------------------------------------------------
/include/op_node/Dropout.h:
--------------------------------------------------------------------------------
 1 | #ifndef DROPOUT_H_
 2 | #define DROPOUT_H_
 3 | #include "../OperatorNode.h"
 4 | class Dropout: public OperatorNode {
 5 |     public:
 6 |         float m_keep_rate;
 7 |         Tensor* m_filter;
 8 |         Dropout (std::string type, std::string id, std::string idx, Tensor* filter, float keep_rate);
 9 |         void op ();
10 |         void grad_op ();
11 |         ~Dropout ();
12 |         void release_tensor ();
13 | };
14 | #endif
15 | 


--------------------------------------------------------------------------------
/include/op_node/Input.h:
--------------------------------------------------------------------------------
 1 | #ifndef INPUT_H_
 2 | #define INPUT_H_
 3 | #include "../OperatorNode.h"
 4 | #include <vector>
 5 | class Input: public OperatorNode {
 6 |     public:
 7 |         int m_data_ptr;
 8 |         std::vector<Tensor*> m_data;
 9 |         Input (std::string type, std::string id, std::string idx, std::vector<Tensor*> input_data, void (*func) (Input*) = 0);
10 |         void (*op) (Input* input);// 参数是该函数所在对象本身
11 |         void release_tensor ();
12 |         ~Input ();
13 | };
14 | #endif
15 | 


--------------------------------------------------------------------------------
/include/op_node/Minus.h:
--------------------------------------------------------------------------------
 1 | #ifndef MINUS_H_
 2 | #define MINUS_H_
 3 | #include "../OperatorNode.h"
 4 | class Minus: public OperatorNode {
 5 |     public:
 6 |         Minus (std::string type, std::string id, std::string idx);
 7 |         void op ();
 8 |         void grad_op ();
 9 |         ~Minus ();
10 |         void release_tensor ();
11 | };
12 | #endif
13 | 


--------------------------------------------------------------------------------
/include/op_node/Mult.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULT_H_
 2 | #define MULT_H_
 3 | #include "../OperatorNode.h"
 4 | class Mult: public OperatorNode {
 5 |     public:
 6 |         Mult (std::string type, std::string id, std::string idx);
 7 |         void op ();
 8 |         void grad_op ();
 9 |         ~Mult ();
10 |         void release_tensor ();
11 | };
12 | #endif
13 | 


--------------------------------------------------------------------------------
/include/op_node/Parameter.h:
--------------------------------------------------------------------------------
 1 | #ifndef PARAMETER_H_
 2 | #define PARAMETER_H_
 3 | #include "../OperatorNode.h"
 4 | class Parameter: public OperatorNode {
 5 |     public:
 6 |         int m_share_data;
 7 |         Parameter (std::string type, std::string id, std::string idx, Tensor* data, int share_data = 0);
 8 |         ~Parameter ();
 9 |         void release_tensor ();
10 | };
11 | #endif
12 | 


--------------------------------------------------------------------------------
/include/op_node/Sigmoid.h:
--------------------------------------------------------------------------------
 1 | #ifndef SIGMOID_H_
 2 | #define SIGMOID_H_
 3 | #include "../OperatorNode.h"
 4 | class Sigmoid: public OperatorNode {
 5 |     public:
 6 |         Sigmoid (std::string type, std::string id, std::string idx);
 7 |         void op ();
 8 |         void grad_op ();
 9 |         ~Sigmoid ();
10 |         void release_tensor ();
11 | };
12 | #endif
13 | 


--------------------------------------------------------------------------------
/include/op_node/SquareSum.h:
--------------------------------------------------------------------------------
 1 | #ifndef SQUARESUM_H_
 2 | #define SQUARESUM_H_
 3 | #include "../OperatorNode.h"
 4 | class SquareSum: public OperatorNode {
 5 |     public:
 6 |         SquareSum (std::string type, std::string id, std::string idx);
 7 |         void op ();
 8 |         void grad_op ();
 9 |         ~SquareSum ();
10 |         void release_tensor ();
11 | };
12 | #endif
13 | 


--------------------------------------------------------------------------------
/include/optimizer/Adadelta.h:
--------------------------------------------------------------------------------
 1 | #ifndef ADADELTA_H_
 2 | #define ADADELTA_H_
 3 | #include "../Optimizer.h"
 4 | #include "../Tensor.h"
 5 | #include <unordered_map>
 6 | class Adadelta: public Optimizer {
 7 |     private:
 8 |         std::unordered_map<std::string, Tensor*> m_tensor_store;
 9 |         float m_epsl;
10 |         float m_lambda;
11 |     public:
12 |         Adadelta (float a);
13 |         void optimize (std::vector<Node*> &topo_results);
14 |         ~Adadelta ();
15 | };
16 | #endif
17 | 


--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
 1 | all:
 2 | #编译变量 
 3 | virtual_node_group=obj/VirtualNode.o obj/BranchNode.o obj/LoopNode.o
 4 | operator_node_group=obj/OperatorNode.o obj/Dropout.o obj/Input.o obj/Add.o obj/AbsSum.o obj/Bias.o obj/Mult.o obj/Minus.o obj/SquareSum.o obj/Sigmoid.o obj/Parameter.o
 5 | multi_thread_group=obj/ring_buffer.o obj/matrix_task.o obj/thread_pool.o
 6 | graph_group=obj/Graph.o obj/ComputeGraph.o obj/VirtualGraph.o
 7 | optimizer_group=obj/Optimizer.o obj/Adadelta.o
 8 | 
 9 | rnn_test: $(multi_thread_group) $(operator_node_group) $(graph_group) $(virtual_node_group) $(optimizer_group) obj/rnn_test.o obj/Node.o obj/Tensor.o
10 | 	g++ -std=c++11 -pthread $(multi_thread_group) $(operator_node_group) $(graph_group) $(virtual_node_group) $(optimizer_group) obj/rnn_test.o obj/Node.o obj/Tensor.o -o rnn_test
11 | 
12 | xor_test: $(multi_thread_group) $(graph_group) $(optimizer_group) $(operator_node_group) obj/xor_test.o obj/Node.o obj/VirtualNode.o obj/Tensor.o
13 | 	g++ -std=c++11 -pthread $(multi_thread_group) $(graph_group) $(optimizer_group) $(operator_node_group) obj/xor_test.o obj/Node.o obj/VirtualNode.o obj/Tensor.o -o xor_test
14 | 
15 | graph_test: obj/graph_test.o obj/Graph.o obj/Node.o
16 | 	g++ -std=c++11 obj/graph_test.o obj/Graph.o obj/Node.o -o graph_test
17 | 
18 | tensor_test: obj/tensor_test.o obj/Tensor.o obj/ring_buffer.o obj/matrix_task.o obj/thread_pool.o
19 | 	g++ -std=c++11 -pthread obj/tensor_test.o obj/Tensor.o obj/ring_buffer.o obj/matrix_task.o obj/thread_pool.o -o tensor_test
20 | 
21 | operatorNode_test: obj/operatorNode_test.o obj/ComputeGraph.o obj/Optimizer.o obj/Graph.o obj/Dropout.o obj/Sigmoid.o obj/AbsSum.o obj/SquareSum.o obj/Add.o obj/Bias.o obj/Mult.o obj/Minus.o obj/Parameter.o obj/OperatorNode.o obj/Node.o obj/Tensor.o
22 | 	g++ -std=c++11 obj/operatorNode_test.o obj/ComputeGraph.o obj/Optimizer.o obj/Graph.o obj/Dropout.o obj/Sigmoid.o obj/AbsSum.o obj/SquareSum.o obj/Add.o obj/Bias.o obj/Mult.o obj/Minus.o obj/Parameter.o obj/OperatorNode.o obj/Node.o obj/Tensor.o -o operatorNode_test
23 | 
24 | obj/rnn_test.o: unit_test/rnn_test.cpp
25 | 	g++ -std=c++11 -c unit_test/rnn_test.cpp -o obj/rnn_test.o
26 | obj/xor_test.o: unit_test/xor_test.cpp
27 | 	g++ -std=c++11 -c unit_test/xor_test.cpp -o obj/xor_test.o
28 | obj/operatorNode_test.o: unit_test/operatorNode_test.cpp
29 | 	g++ -std=c++11 -c unit_test/operatorNode_test.cpp -o obj/operatorNode_test.o
30 | obj/graph_test.o: unit_test/graph_test.cpp
31 | 	g++ -std=c++11 -c unit_test/graph_test.cpp -o obj/graph_test.o
32 | obj/tensor_test.o: unit_test/tensor_test.cpp
33 | 	g++ -std=c++11 -c unit_test/tensor_test.cpp -o obj/tensor_test.o
34 | 
35 | obj/SquareSum.o: src/op_node/SquareSum.cpp
36 | 	g++ -std=c++11 -c src/op_node/SquareSum.cpp -o obj/SquareSum.o
37 | obj/Sigmoid.o: src/op_node/Sigmoid.cpp
38 | 	g++ -std=c++11 -c src/op_node/Sigmoid.cpp -o obj/Sigmoid.o
39 | obj/Mult.o: src/op_node/Mult.cpp
40 | 	g++ -std=c++11 -c src/op_node/Mult.cpp -o obj/Mult.o
41 | obj/Minus.o: src/op_node/Minus.cpp
42 | 	g++ -std=c++11 -c src/op_node/Minus.cpp -o obj/Minus.o
43 | obj/Add.o: src/op_node/Add.cpp
44 | 	g++ -std=c++11 -c src/op_node/Add.cpp -o obj/Add.o
45 | obj/Bias.o: src/op_node/Bias.cpp
46 | 	g++ -std=c++11 -c src/op_node/Bias.cpp -o obj/Bias.o
47 | obj/Input.o: src/op_node/Input.cpp
48 | 	g++ -std=c++11 -c src/op_node/Input.cpp -o obj/Input.o
49 | obj/Parameter.o: src/op_node/Parameter.cpp
50 | 	g++ -std=c++11 -c src/op_node/Parameter.cpp -o obj/Parameter.o
51 | obj/AbsSum.o: src/op_node/AbsSum.cpp
52 | 	g++ -std=c++11 -c src/op_node/AbsSum.cpp -o obj/AbsSum.o
53 | obj/Dropout.o: src/op_node/Dropout.cpp
54 | 	g++ -std=c++11 -c src/op_node/Dropout.cpp -o obj/Dropout.o
55 | 
56 | obj/ComputeGraph.o: src/ComputeGraph.cpp
57 | 	g++ -std=c++11 -c src/ComputeGraph.cpp -o obj/ComputeGraph.o
58 | obj/VirtualGraph.o: src/VirtualGraph.cpp
59 | 	g++ -std=c++11 -c src/VirtualGraph.cpp -o obj/VirtualGraph.o
60 | obj/Graph.o: src/Graph.cpp
61 | 	g++ -std=c++11 -c src/Graph.cpp -o obj/Graph.o
62 | 
63 | obj/OperatorNode.o: src/OperatorNode.cpp 
64 | 	g++ -std=c++11 -c src/OperatorNode.cpp -o obj/OperatorNode.o
65 | obj/VirtualNode.o: src/VirtualNode.cpp 
66 | 	g++ -std=c++11 -c src/VirtualNode.cpp -o obj/VirtualNode.o
67 | obj/LoopNode.o: src/LoopNode.cpp
68 | 	g++ -std=c++11 -c src/LoopNode.cpp -o obj/LoopNode.o
69 | obj/BranchNode.o: src/BranchNode.cpp
70 | 	g++ -std=c++11 -c src/BranchNode.cpp -o obj/BranchNode.o
71 | obj/Node.o: src/Node.cpp
72 | 	g++ -std=c++11 -c src/Node.cpp -o obj/Node.o
73 | 
74 | obj/Optimizer.o: src/Optimizer.cpp
75 | 	g++ -std=c++11 -c src/Optimizer.cpp -o obj/Optimizer.o
76 | obj/Adadelta.o: src/optimizer/Adadelta.cpp
77 | 	g++ -std=c++11 -c src/optimizer/Adadelta.cpp -o obj/Adadelta.o
78 | 
79 | obj/Tensor.o: src/Tensor.cpp
80 | 	g++ -std=c++11 -c src/Tensor.cpp -o obj/Tensor.o
81 | 
82 | obj/ring_buffer.o: src/multi_thread/ring_buffer.cpp
83 | 	g++ -std=c++11 -c src/multi_thread/ring_buffer.cpp -o obj/ring_buffer.o
84 | obj/thread_pool.o: src/multi_thread/thread_pool.cpp
85 | 	g++ -std=c++11 -c src/multi_thread/thread_pool.cpp -o obj/thread_pool.o
86 | obj/matrix_task.o: src/multi_thread/matrix_task.cpp
87 | 	g++ -std=c++11 -c src/multi_thread/matrix_task.cpp -o obj/matrix_task.o
88 | 
89 | clean:
90 | 	rm obj/*o rnn_test xor_test tensor_test operatorNode_test graph_test
91 | 


--------------------------------------------------------------------------------
/makefile.gpu:
--------------------------------------------------------------------------------
 1 | all:
 2 | 
 3 | virtual_node_group=obj/VirtualNode.o obj/BranchNode.o obj/LoopNode.o
 4 | operator_node_group=obj/OperatorNode.o obj/Dropout.o obj/Input.o obj/Add.o obj/AbsSum.o obj/Bias.o obj/Mult.o obj/Minus.o obj/SquareSum.o obj/Sigmoid.o obj/Parameter.o
 5 | graph_group=obj/Graph.o obj/ComputeGraph.o obj/VirtualGraph.o
 6 | optimizer_group=obj/Optimizer.o obj/Adadelta.o
 7 | 
 8 | rnn_test: $(operator_node_group) $(graph_group) $(virtual_node_group) $(optimizer_group) obj/rnn_test.o obj/Node.o obj/Tensor.o obj/cuda_lib.o
 9 | 	g++ -std=c++11 $(operator_node_group) $(graph_group) $(virtual_node_group) $(optimizer_group) obj/rnn_test.o obj/Node.o obj/Tensor.o obj/cuda_lib.o -L/usr/local/cuda/lib64 -lcudart -o rnn_test
10 | 
11 | xor_test: $(operator_node_group) $(graph_group) $(optimizer_group) obj/xor_test.o obj/VirtualNode.o obj/Node.o obj/Tensor.o obj/cuda_lib.o
12 | 	g++ -std=c++11 $(operator_node_group) $(graph_group) $(optimizer_group) obj/xor_test.o obj/VirtualNode.o obj/Node.o obj/Tensor.o obj/cuda_lib.o -L/usr/local/cuda/lib64 -lcudart -o xor_test
13 | 
14 | graph_test: obj/graph_test.o obj/Graph.o obj/Node.o
15 | 	g++ -std=c++11 obj/graph_test.o obj/Graph.o obj/Node.o -o graph_test
16 | 
17 | tensor_test: obj/tensor_test.o obj/Tensor.o obj/cuda_lib.o
18 | 	g++ -std=c++11 obj/tensor_test.o obj/Tensor.o obj/cuda_lib.o -L/usr/local/cuda/lib64 -lcudart -o tensor_test
19 | 
20 | operatorNode_test: obj/operatorNode_test.o obj/ComputeGraph.o obj/Optimizer.o obj/Graph.o obj/Dropout.o obj/Sigmoid.o obj/AbsSum.o obj/SquareSum.o obj/Add.o obj/Bias.o obj/Mult.o obj/Minus.o obj/Parameter.o obj/OperatorNode.o obj/Node.o obj/Tensor.o obj/cuda_lib.o
21 | 	g++ -std=c++11 obj/operatorNode_test.o obj/ComputeGraph.o obj/Optimizer.o obj/Graph.o obj/Dropout.o obj/Sigmoid.o obj/AbsSum.o obj/SquareSum.o obj/Add.o obj/Bias.o obj/Mult.o obj/Minus.o obj/Parameter.o obj/OperatorNode.o obj/Node.o obj/Tensor.o obj/cuda_lib.o -L/usr/local/cuda/lib64 -lcudart -o operatorNode_test
22 | 
23 | obj/rnn_test.o: unit_test/rnn_test.cpp
24 | 	g++ -std=c++11 -c unit_test/rnn_test.cpp -o obj/rnn_test.o
25 | obj/xor_test.o: unit_test/xor_test.cpp
26 | 	g++ -std=c++11 -c unit_test/xor_test.cpp -o obj/xor_test.o
27 | obj/operatorNode_test.o: unit_test/operatorNode_test.cpp
28 | 	g++ -std=c++11 -c unit_test/operatorNode_test.cpp -o obj/operatorNode_test.o
29 | obj/graph_test.o: unit_test/graph_test.cpp
30 | 	g++ -std=c++11 -c unit_test/graph_test.cpp -o obj/graph_test.o
31 | obj/tensor_test.o: unit_test/tensor_test.cpp
32 | 	g++ -std=c++11 -c unit_test/tensor_test.cpp -o obj/tensor_test.o
33 | 
34 | obj/SquareSum.o: src/op_node/SquareSum.cpp
35 | 	g++ -std=c++11 -c src/op_node/SquareSum.cpp -o obj/SquareSum.o
36 | obj/Sigmoid.o: src/op_node/Sigmoid.cpp
37 | 	g++ -std=c++11 -c src/op_node/Sigmoid.cpp -o obj/Sigmoid.o
38 | obj/Mult.o: src/op_node/Mult.cpp
39 | 	g++ -std=c++11 -c src/op_node/Mult.cpp -o obj/Mult.o
40 | obj/Minus.o: src/op_node/Minus.cpp
41 | 	g++ -std=c++11 -c src/op_node/Minus.cpp -o obj/Minus.o
42 | obj/Add.o: src/op_node/Add.cpp
43 | 	g++ -std=c++11 -c src/op_node/Add.cpp -o obj/Add.o
44 | obj/Bias.o: src/op_node/Bias.cpp
45 | 	g++ -std=c++11 -c src/op_node/Bias.cpp -o obj/Bias.o
46 | obj/Input.o: src/op_node/Input.cpp
47 | 	g++ -std=c++11 -c src/op_node/Input.cpp -o obj/Input.o
48 | obj/Parameter.o: src/op_node/Parameter.cpp
49 | 	g++ -std=c++11 -c src/op_node/Parameter.cpp -o obj/Parameter.o
50 | obj/AbsSum.o: src/op_node/AbsSum.cpp
51 | 	g++ -std=c++11 -c src/op_node/AbsSum.cpp -o obj/AbsSum.o
52 | obj/Dropout.o: src/op_node/Dropout.cpp
53 | 	g++ -std=c++11 -c src/op_node/Dropout.cpp -o obj/Dropout.o
54 | 
55 | obj/ComputeGraph.o: src/ComputeGraph.cpp
56 | 	g++ -std=c++11 -c src/ComputeGraph.cpp -o obj/ComputeGraph.o
57 | obj/VirtualGraph.o: src/VirtualGraph.cpp
58 | 	g++ -std=c++11 -c src/VirtualGraph.cpp -o obj/VirtualGraph.o
59 | obj/Graph.o: src/Graph.cpp
60 | 	g++ -std=c++11 -c src/Graph.cpp -o obj/Graph.o
61 | 
62 | obj/OperatorNode.o: src/OperatorNode.cpp 
63 | 	g++ -std=c++11 -c src/OperatorNode.cpp -o obj/OperatorNode.o
64 | obj/VirtualNode.o: src/VirtualNode.cpp 
65 | 	g++ -std=c++11 -c src/VirtualNode.cpp -o obj/VirtualNode.o
66 | obj/LoopNode.o: src/LoopNode.cpp
67 | 	g++ -std=c++11 -c src/LoopNode.cpp -o obj/LoopNode.o
68 | obj/BranchNode.o: src/BranchNode.cpp
69 | 	g++ -std=c++11 -c src/BranchNode.cpp -o obj/BranchNode.o
70 | obj/Node.o: src/Node.cpp
71 | 	g++ -std=c++11 -c src/Node.cpp -o obj/Node.o
72 | 
73 | obj/Optimizer.o: src/Optimizer.cpp
74 | 	g++ -std=c++11 -c src/Optimizer.cpp -o obj/Optimizer.o
75 | obj/Adadelta.o: src/optimizer/Adadelta.cpp
76 | 	g++ -std=c++11 -c src/optimizer/Adadelta.cpp -o obj/Adadelta.o
77 | 
78 | # gpu based
79 | obj/Tensor.o: src/cuda/Tensor.cpp obj/cuda_lib.o
80 | 	g++ -std=c++11 -c src/cuda/Tensor.cpp -o obj/Tensor.o
81 | obj/cuda_lib.o: src/cuda/cuda_lib.cu
82 | 	nvcc -c -I/include/cuda -I/usr/local/cuda/include src/cuda/cuda_lib.cu -o obj/cuda_lib.o
83 | 
84 | clean:
85 | 	rm obj/*o 
86 | 


--------------------------------------------------------------------------------
/obj/empty:
--------------------------------------------------------------------------------
1 | object dir
2 | 


--------------------------------------------------------------------------------
/src/BranchNode.cpp:
--------------------------------------------------------------------------------
 1 | #include "../include/BranchNode.h"
 2 | #include "../include/op_node/Parameter.h"
 3 | #include "../include/Tensor.h"
 4 | #include <iostream>
 5 | #include <vector>
 6 | using namespace std;
 7 | BranchNode::BranchNode (string type, string id, Node* (*func) (int, Graph*, BranchNode*)): Node (type, id) {
 8 |     m_dep_op_node_name = "";
 9 |     choose_node = func;
10 | }
11 | 


--------------------------------------------------------------------------------
/src/ComputeGraph.cpp:
--------------------------------------------------------------------------------
 1 | #include "../include/ComputeGraph.h"
 2 | #include "../include/OperatorNode.h"
 3 | #include "../include/op_node/Input.h"
 4 | #include <iostream>
 5 | using namespace std;
 6 | ComputeGraph::ComputeGraph () {
 7 |     m_need_release_tensor_flag = 0;
 8 | }
 9 | void ComputeGraph::forward_propagation (vector<Node*> &result_list) {
10 |     if (m_need_release_tensor_flag == 1) {// 前向传播前释放上一次的运算结果
11 |         release_tensor ();
12 |     }
13 |     vector<Node*> topo_result;
14 |     topological_sort (m_adj_table, topo_result);
15 |     for (int i = 0; i < topo_result.size (); ++i) {
16 |         if (topo_result[i] -> m_name[0] == "Input") {
17 |             ((Input*) topo_result[i]) -> op ((Input*) topo_result[i]);
18 |         } else {
19 |             ((OperatorNode*) topo_result[i]) -> op ();
20 |         }
21 |     }
22 |     get_endnode (result_list);
23 |     m_need_release_tensor_flag = 1;
24 | }
25 | void ComputeGraph::back_propagation () {
26 |     if (m_need_build_reverse_graph_flag == 1) {
27 |         build_reverse_graph ();
28 |     }
29 |     vector<Node*> topo_result;
30 |     topological_sort (m_reverse_table, topo_result);
31 |     if (m_optimizer == 0) {
32 |         cout << "optimizer has not been set" << endl;
33 |     } else {
34 |         m_optimizer -> optimize (topo_result);
35 |     }
36 |     m_need_release_tensor_flag = 1;
37 | }
38 | void ComputeGraph::release_tensor () {
39 |     unordered_map<string, Node*>::iterator node_map_it = m_node_map.begin ();
40 |     while (node_map_it != m_node_map.end ()) {
41 |         ((OperatorNode*) (node_map_it -> second)) -> release_tensor ();
42 |         ++node_map_it;
43 |     }
44 |     m_need_release_tensor_flag = 0;
45 | }
46 | ComputeGraph::~ComputeGraph () {
47 |     cout << "compute graph free" << endl;
48 |     delete m_optimizer;
49 |     m_optimizer = 0;
50 |     m_node_map.clear ();
51 |     m_adj_table.clear ();
52 |     m_reverse_table.clear ();
53 | }
54 | 


--------------------------------------------------------------------------------
/src/Graph.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include "../include/Graph.h"
  3 | using namespace std;
  4 | Graph::Graph () {
  5 |     m_need_build_reverse_graph_flag = 1;
  6 | }
  7 | void Graph::add_node (string parent_name, Node* node) {
  8 |     m_node_map[node -> get_name ()] = node;// 将节点加入字典
  9 |     if (parent_name != "") {
 10 |         if (m_node_map.end () != m_node_map.find (parent_name)) {
 11 |             Node* parent_node = m_node_map[parent_name];
 12 |             node -> m_parents.push_back (parent_node);
 13 |             m_adj_table[parent_name].push_back (node);// 节点加入邻接表
 14 |         } else {
 15 |             cout << "parent node is not in graph" << endl;
 16 |         }
 17 |     }
 18 | }
 19 | Node* Graph::get_node (string name) {
 20 |     if (m_node_map.end () == m_node_map.find (name)) {
 21 |         return 0;
 22 |     } else {
 23 |         return m_node_map[name];
 24 |     }
 25 | }
 26 | void Graph::build_subgraph (vector<Node*> &endnode_list) {
 27 |     unordered_map<string, Node*>::iterator node_map_it = m_node_map.begin ();
 28 |     while (node_map_it != m_node_map.end ()) {// 所有节点设置为不可见
 29 |         node_map_it -> second -> m_invisible = 1;
 30 |         ++node_map_it;
 31 |     }
 32 |     // 构造子图
 33 |     queue<Node*> q;
 34 |     unordered_set<Node*> visit;
 35 |     for (int i = 0; i < endnode_list.size (); ++i) {
 36 |         q.push (endnode_list[i]);
 37 |         visit.insert (endnode_list[i]);
 38 |     }
 39 |     while (!q.empty ()) {
 40 |         Node* node = q.front ();
 41 |         q.pop ();
 42 |         node -> m_invisible = 0;
 43 |         for (int i = 0; i < node -> m_parents.size (); ++i) {
 44 |             if (visit.find (node -> m_parents[i]) == visit.end ()) {
 45 |                 visit.insert (node -> m_parents[i]);
 46 |                 q.push (node -> m_parents[i]);
 47 |             }
 48 |         }
 49 |     }
 50 | }
 51 | void Graph::topological_sort (std::unordered_map<std::string, std::vector<Node*> > &adj_table, std::vector<Node*> &result) {
 52 |     unordered_map<string, int> indegree;
 53 |     unordered_map<string, Node*>::iterator node_map_it = m_node_map.begin ();
 54 |     while (node_map_it != m_node_map.end ()) {
 55 |         indegree[node_map_it -> first] = 0;
 56 |         ++node_map_it;
 57 |     }
 58 |     unordered_map<string, vector<Node*> >::iterator adj_table_it = adj_table.begin ();
 59 |     while (adj_table_it != adj_table.end ()) {
 60 |         for (int i = 0; i < adj_table_it -> second.size (); ++i) {
 61 |             ++indegree[(adj_table_it -> second)[i] -> get_name ()];
 62 |         }
 63 |         ++adj_table_it;
 64 |     }
 65 |     queue<Node*> q;
 66 |     unordered_map<string, int>::iterator indegree_it = indegree.begin ();
 67 |     while (indegree_it != indegree.end ()) {
 68 |         if (indegree_it -> second == 0) {
 69 |             q.push (m_node_map[indegree_it -> first]);
 70 |         }
 71 |         ++indegree_it;
 72 |     }
 73 |     while (!q.empty ()) {
 74 |         Node* node = q.front ();
 75 |         q.pop ();
 76 |         if (node -> m_invisible == 0) {// 可见节点加入result
 77 |             result.push_back (node);
 78 |         }
 79 |         vector<Node*> adj_nodes = adj_table[node -> get_name ()];
 80 |         for (int i = 0; i < adj_nodes.size (); ++i) {
 81 |             --indegree[(adj_nodes[i]) -> get_name ()];
 82 |             if (indegree[(adj_nodes[i]) -> get_name ()] == 0) {
 83 |                 q.push (adj_nodes[i]);
 84 |             }
 85 |         }
 86 |     }
 87 | }
 88 | void Graph::build_reverse_graph () {
 89 |     unordered_map<string, vector<Node*> >::iterator adj_table_it = m_adj_table.begin ();
 90 |     while (adj_table_it != m_adj_table.end ()) {
 91 |         Node* parent = m_node_map[adj_table_it -> first];
 92 |         vector<Node*> adj_nodes = adj_table_it -> second;
 93 |         for (int i = 0; i < adj_nodes.size (); ++i) {
 94 |             string name = (adj_nodes[i]) -> get_name ();
 95 |             m_reverse_table[name].push_back (parent);
 96 |         }
 97 |         ++adj_table_it;
 98 |     }
 99 |     m_need_build_reverse_graph_flag = 0;
100 | }
101 | void Graph::get_endnode (vector<Node*> &endnode_list) {
102 |     if (m_need_build_reverse_graph_flag == 1) {// 没有构建转置图
103 |         build_reverse_graph ();
104 |     }
105 |     unordered_map<string, vector<Node*> >::iterator reverse_table_it = m_reverse_table.begin ();
106 |     unordered_map<string, Node*>::iterator node_map_it = m_node_map.begin ();
107 |     unordered_map<string, int> indegree;
108 |     while (node_map_it != m_node_map.end ()) {
109 |         indegree[node_map_it -> first] = 0;
110 |         ++node_map_it;
111 |     }
112 |     while (reverse_table_it != m_reverse_table.end ()) {
113 |         vector<Node*> adj_nodes = reverse_table_it -> second;
114 |         for (int i = 0; i < adj_nodes.size (); ++i) {
115 |             ++indegree[(adj_nodes[i]) -> get_name ()];
116 |         }
117 |         ++reverse_table_it;
118 |     }
119 |     unordered_map<string, int>::iterator indegree_it = indegree.begin ();
120 |     while (indegree_it != indegree.end ()) {
121 |         if (indegree_it -> second == 0) {
122 |             endnode_list.push_back (m_node_map[indegree_it -> first]);
123 |         }
124 |         ++indegree_it;
125 |     }
126 | }
127 | Graph::~Graph () {
128 |     cout << "free node_map" << endl;
129 |     unordered_map<string, Node*>::iterator node_map_it = m_node_map.begin ();
130 |     while (node_map_it != m_node_map.end ()) {
131 |         delete node_map_it -> second;
132 |         node_map_it -> second = 0;
133 |         ++node_map_it;
134 |     }
135 |     m_node_map.clear ();
136 |     m_adj_table.clear ();
137 |     m_reverse_table.clear ();
138 | }
139 | 


--------------------------------------------------------------------------------
/src/LoopNode.cpp:
--------------------------------------------------------------------------------
 1 | #include "../include/LoopNode.h"
 2 | #include <iostream>
 3 | using namespace std;
 4 | LoopNode::LoopNode (string type, string id, void (*func1) (LoopNode*), int (*func2) (Graph*, int)): Node (type, id) {
 5 |     m_sub_vgraph = new VirtualGraph ();
 6 |     m_end_compute_node = 0;
 7 |     init = func1;
 8 |     condition = func2;
 9 | }
10 | void LoopNode::inner_loop (Graph* compute_graph) {
11 |     int idx = 0;
12 |     init (this);// 初始化循环
13 |     while (condition (compute_graph, idx) == 0) {
14 |         m_end_compute_node = m_sub_vgraph -> build_compute_graph (compute_graph, idx);
15 |         ++idx;
16 |     }
17 | }
18 | LoopNode::~LoopNode () {
19 |     cout << "free LoopNode: " << get_name () << endl;
20 |     delete m_sub_vgraph;
21 |     m_sub_vgraph = 0;
22 |     m_end_compute_node = 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/src/Node.cpp:
--------------------------------------------------------------------------------
 1 | #include "../include/Node.h"
 2 | using namespace std;
 3 | Node::Node (string type, string id) {
 4 |     m_name.push_back (type);
 5 |     m_name.push_back (id);
 6 |     m_invisible = 0;
 7 | }
 8 | std::string Node::get_name () {
 9 |     string name = "";
10 |     for (int i = 0; i < m_name.size (); ++i) {
11 |         name += m_name[i] + ":";
12 |     }
13 |     return name;
14 | }
15 | Node::~Node () {
16 | }
17 | 


--------------------------------------------------------------------------------
/src/OperatorNode.cpp:
--------------------------------------------------------------------------------
 1 | #include "../include/OperatorNode.h"
 2 | #include <iostream>
 3 | using namespace std;
 4 | OperatorNode::OperatorNode (string type, string id, string idx): Node (type, id) {
 5 |     m_name.push_back (idx);
 6 |     m_sum_grad = 0;
 7 |     m_output = 0;
 8 | }
 9 | void OperatorNode::chain_rule (Tensor* grad, int parent_idx) {
10 |     OperatorNode* parent_op_node = (OperatorNode*)  m_parents[parent_idx];
11 |     if (parent_op_node -> m_sum_grad == 0) {
12 |         if (m_sum_grad == 0) {
13 |             // 把grad压缩为一行
14 |             vector<int> shape (2); shape[0] = 1; shape[1] = grad -> m_shape[1];
15 |             parent_op_node -> m_sum_grad = new Tensor (shape);
16 |             for (int i = 0; i < grad -> m_size; ++i) {
17 |                 int idx = i % parent_op_node -> m_sum_grad -> m_size;
18 |                 parent_op_node -> m_sum_grad -> m_tensor[idx] += grad -> m_tensor[i];
19 |             } 
20 |         } else {
21 |             parent_op_node -> m_sum_grad = m_sum_grad -> matrix_mult (grad);
22 |         }
23 |     } else {
24 |         if (m_sum_grad == 0) {
25 |             // 把grad压缩为一行
26 |             for (int i = 0; i < grad -> m_size; ++i) {
27 |                 int idx = i % parent_op_node -> m_sum_grad -> m_size;
28 |                 parent_op_node -> m_sum_grad -> m_tensor[idx] += grad -> m_tensor[i];
29 |             }
30 |         } else {
31 |             parent_op_node -> m_sum_grad -> add (m_sum_grad -> matrix_mult (grad), parent_op_node -> m_sum_grad);
32 |         }
33 |     }
34 | }
35 | void OperatorNode::op () {
36 | }
37 | void OperatorNode::grad_op () {
38 | }
39 | void OperatorNode::release_tensor () {
40 | }
41 | OperatorNode::~OperatorNode () {
42 |     // cout << "free operatorNode:" << get_name () << endl;
43 | }
44 | 


--------------------------------------------------------------------------------
/src/Optimizer.cpp:
--------------------------------------------------------------------------------
 1 | #include "../include/Optimizer.h"
 2 | #include "../include/OperatorNode.h"
 3 | #include "../include/Tensor.h"
 4 | #include <iostream>
 5 | using namespace std;
 6 | Optimizer::Optimizer (float a) {
 7 |     m_a = a;
 8 | }
 9 | void Optimizer::optimize (vector<Node*> &topo_results) {
10 |     for (int i = 0; i < topo_results.size (); ++i) {// 计算梯度
11 |         ((OperatorNode*) topo_results[i]) -> grad_op ();
12 |     }
13 |     for (int i = 0; i < topo_results.size (); ++i) {// 更新
14 |         OperatorNode* op_node = (OperatorNode*) topo_results[i];
15 |         if (op_node -> m_sum_grad != 0 && op_node -> m_name[0] == "Parameter") {
16 |             for (int j = 0; j < op_node -> m_output -> m_size; ++j) {
17 |                 op_node -> m_output -> m_tensor[j] -= m_a * op_node -> m_sum_grad -> m_tensor[j];
18 |             }
19 |         }
20 |     }
21 | }
22 | Optimizer::~Optimizer () {
23 |     cout << "free optimizer" << endl;
24 | }
25 | 


--------------------------------------------------------------------------------
/src/Tensor.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <math.h>
  3 | #include <time.h>
  4 | #include <stdlib.h>
  5 | #include "../include/Tensor.h"
  6 | #include "../include/multi_thread/matrix_task.h"
  7 | #include "../include/multi_thread/thread_pool.h"
  8 | using namespace std;
  9 | 
 10 | Tensor::Tensor (vector<int> shape, int need_init) {
 11 |     m_size = 1;
 12 |     for (int i = 0; i < shape.size (); ++i) {
 13 |         m_shape.push_back (shape[i]);
 14 |         m_size *= shape[i];
 15 |     }
 16 |     m_tensor = new float[m_size];
 17 |     if (need_init == 1) {
 18 |         for (int i = 0; i < m_size; ++i) {
 19 |             m_tensor[i] = 0.0;
 20 |         }
 21 |     }
 22 | }
 23 | 
 24 | Tensor::Tensor (vector<int> shape, float data[]) {
 25 |     m_size = 1;
 26 |     for (int i = 0; i < shape.size (); ++i) {
 27 |         m_shape.push_back (shape[i]);
 28 |         m_size *= shape[i];
 29 |     }
 30 |     m_tensor = new float[m_size];
 31 |     for (int i = 0; i < m_size; ++i) {
 32 |         m_tensor[i] = data[i];
 33 |     }
 34 | }
 35 | 
 36 | float Tensor::get_value (vector<int> idxs) {
 37 |     int idx = 0;
 38 |     int t = 1;
 39 |     for (int i = idxs.size () - 1; i >= 0; --i) {
 40 |         idx += idxs[i] * t;
 41 |         t *= m_shape[i];
 42 |     }
 43 |     return m_tensor[idx];
 44 | }
 45 | 
 46 | void Tensor::set_value (vector<int> idxs, float value) {
 47 |     int idx = 0;
 48 |     int t = 1;
 49 |     for (int i = idxs.size () - 1; i >= 0; --i) {
 50 |         idx += idxs[i] * t;
 51 |         t *= m_shape[i];
 52 |     }
 53 |     m_tensor[idx] = value;
 54 | }
 55 | 
 56 | Tensor* Tensor::matrix_mult (Tensor* tensor) {
 57 |     Tensor* result = 0;
 58 |     if (m_shape[1] == tensor -> m_shape[0]) {
 59 |         vector<int> result_shape (2);
 60 |         result_shape[0] = m_shape[0];
 61 |         result_shape[1] = tensor -> m_shape[1];
 62 |         result = new Tensor (result_shape, 0);
 63 |         int idx0 = 0, idx1 = 0, idx2 = 0;
 64 |         vector<task*> task_list;
 65 |         for (int i = 0; i < m_shape[0]; ++i) {
 66 |             for (int j = 0; j < tensor -> m_shape[1]; ++j) {
 67 |                 /*float r = 0;
 68 |                 float compensation = 0.0;
 69 |                 for (int k = 0; k < m_shape[1]; ++k) {
 70 |                     idx0 = i * m_shape[1] + k;
 71 |                     idx1 = k * tensor -> m_shape[1] + j;
 72 |                     // Kahan's Summation Formula
 73 |                     // r += m_tensor[idx0] * tensor -> m_tensor[idx1];
 74 |                     float y = m_tensor[idx0] * tensor -> m_tensor[idx1] - compensation;// 补偿
 75 |                     float t = r + y;// 发生舍入
 76 |                     compensation = (t - r) - y;// 记录下舍入误差
 77 |                     r = t;
 78 |                 }
 79 |                 idx2 = i * tensor -> m_shape[1] + j;
 80 |                 result -> m_tensor[idx2] = r;*/
 81 |                 task_list.push_back (new matrix_mult_task (this, tensor, result, i, j));
 82 |             }
 83 |         }
 84 |         (thread_pool::get_instance ()) -> add_job_list (task_list);
 85 |     }
 86 |     return result;
 87 | }
 88 | 
 89 | Tensor* Tensor::scalar_mult (float scalar) {
 90 |     Tensor* result = new Tensor (m_shape, 0);
 91 |     /*for (int i = 0; i < m_size; ++i) {
 92 |         result -> m_tensor[i] = m_tensor[i] * scalar;
 93 |     }*/
 94 |     vector<task*> task_list;
 95 |     int thread_num = (thread_pool::get_instance ()) -> m_worker_num;
 96 |     for (int i = 0; i < thread_num; ++i) {
 97 |         task_list.push_back (new matrix_scalar_mult_task (this, scalar, result, i, thread_num));
 98 |     }
 99 |     (thread_pool::get_instance ()) -> add_job_list (task_list);
100 |     return result;
101 | }
102 | 
103 | void Tensor::scalar_acc_mult (float scalar) {
104 |     /*for (int i = 0; i < m_size; ++i) {
105 |         m_tensor[i] = m_tensor[i] * scalar;
106 |     }*/
107 |     vector<task*> task_list;
108 |     int thread_num = (thread_pool::get_instance ()) -> m_worker_num;
109 |     for (int i = 0; i < thread_num; ++i) {
110 |         task_list.push_back (new matrix_scalar_mult_task (this, scalar, this, i, thread_num));
111 |     }
112 |     (thread_pool::get_instance ()) -> add_job_list (task_list);
113 | }
114 | 
115 | void Tensor::element_square () {
116 |     for (int i = 0; i < m_size; ++i) {
117 |         m_tensor[i] = m_tensor[i] * m_tensor[i];
118 |     }
119 | }
120 | 
121 | float Tensor::element_abs_sum () {
122 |     float result = 0;
123 |     float compensation = 0.0;
124 |     for (int i = 0; i < m_size; ++i) {
125 |         // result += fabs (m_tensor[i]);
126 |         // Kahan's Summation Formula
127 |         float y = fabs (m_tensor[i]) - compensation;// 补偿
128 |         float t = result + y;// 发生舍入
129 |         compensation = (t - result) - y;// 记录本次的舍入误差
130 |         result = t;
131 |     }
132 |     return result;
133 | }
134 | 
135 | float Tensor::element_square_sum () {
136 |     float result = 0;
137 |     float compensation = 0.0;
138 |     for (int i = 0; i < m_size; ++i) {
139 |         // result += m_tensor[i] * m_tensor[i];
140 |         // Kahan's Summation Formula
141 |         float y = m_tensor[i] * m_tensor[i] - compensation;
142 |         float t = result + y;
143 |         compensation = (t - result) - y;
144 |         result = t;
145 |     }
146 |     return result;
147 | }
148 | 
149 | Tensor* Tensor::element_mult (Tensor* tensor) {
150 |     Tensor* result = 0;
151 |     int same_shape = 1;
152 |     if (m_shape.size () == tensor -> m_shape.size ()) {
153 |         for (int i = 0; i < m_shape.size (); ++i) {
154 |             if (m_shape[i] != tensor -> m_shape[i]) {
155 |                 same_shape = 0;
156 |                 break;
157 |             }
158 |         }
159 |     } else {
160 |         same_shape = 0;
161 |     }
162 |     if (same_shape == 1) {
163 |         result = new Tensor (tensor -> m_shape, 0);
164 |         for (int i = 0; i < m_size; ++i) {
165 |             result -> m_tensor[i] = m_tensor[i] * tensor -> m_tensor[i];
166 |         }
167 |     }
168 |     return result;
169 | }
170 | 
171 | void Tensor::add (Tensor* tensor, Tensor* result) {
172 |     /*for (int i = 0; i < m_size; ++i) {
173 |         result -> m_tensor[i] = m_tensor[i] + tensor -> m_tensor[i];
174 |     }*/
175 |     vector<task*> task_list;
176 |     int thread_num = (thread_pool::get_instance ()) -> m_worker_num;
177 |     for (int i = 0; i < thread_num; ++i) {
178 |         task_list.push_back (new matrix_add_task (this, tensor, result, i, thread_num));
179 |     }
180 |     (thread_pool::get_instance ()) -> add_job_list (task_list);
181 | }
182 | 
183 | Tensor* Tensor::add (Tensor* tensor) {
184 |     Tensor* result = 0;
185 |     int same_shape = 1;
186 |     if (m_shape.size () == tensor -> m_shape.size ()) {
187 |         for (int i = 0; i < m_shape.size (); ++i) {
188 |             if (m_shape[i] != tensor -> m_shape[i]) {
189 |                 same_shape = 0;
190 |                 break;
191 |             }
192 |         }
193 |     } else {
194 |         same_shape = 0;
195 |     }
196 | 
197 |     if (same_shape == 1) {
198 |         result = new Tensor (tensor -> m_shape, 0);
199 |         /*for (int i = 0; i < m_size; ++i) {
200 |             result -> m_tensor[i] = m_tensor[i] + tensor -> m_tensor[i];
201 |         }*/
202 |         vector<task*> task_list;
203 |         int thread_num = (thread_pool::get_instance ()) -> m_worker_num;
204 |         for (int i = 0; i < thread_num; ++i) {
205 |             task_list.push_back (new matrix_add_task (this, tensor, result, i, thread_num));
206 |         }
207 |         (thread_pool::get_instance ()) -> add_job_list (task_list);
208 |     }
209 |     return result;
210 | }
211 | 
212 | void Tensor::init () {
213 |     // srand (time (0));
214 |     for (int i = 0; i < m_size; ++i) {
215 |         m_tensor[i] = (rand () % 1000) / 1000.0 - 0.5;
216 |     }
217 | }
218 | 
219 | void Tensor::display () {
220 |     vector<int> idxs0 (2);
221 |     for (int i = 0; i < m_shape[0]; ++i) {
222 |         for (int j = 0; j < m_shape[1]; ++j) {
223 |             idxs0[0] = i; idxs0[1] = j;
224 |             cout << get_value (idxs0) << " ";
225 |         }
226 |         cout << endl;
227 |     }
228 | }
229 | 
230 | Tensor::~Tensor () {
231 |     delete[] m_tensor;
232 | }
233 | 


--------------------------------------------------------------------------------
/src/VirtualGraph.cpp:
--------------------------------------------------------------------------------
 1 | #include "../include/VirtualGraph.h"
 2 | #include "../include/VirtualNode.h"
 3 | #include "../include/LoopNode.h"
 4 | #include "../include/OperatorNode.h"
 5 | #include "../include/op_node/Input.h"
 6 | #include <unordered_map>
 7 | #include <iostream>
 8 | using namespace std;
 9 | Node* VirtualGraph::build_compute_graph (Graph* compute_graph, int idx) {// 输入计算图的引用
10 |     vector<Node*> topo_result;
11 |     topological_sort (m_adj_table, topo_result);
12 |     Node* end_node = 0;
13 |     for (int i = 0; i < topo_result.size (); ++i) {// 构建
14 |         // cout << topo_result[i] -> get_name () << endl;
15 |         if (topo_result[i] -> m_name[0] == "Loop") {// 如果是循环节点
16 |             LoopNode* loop_node = (LoopNode*) topo_result[i];
17 |             loop_node -> inner_loop (compute_graph);// 执行循环
18 |             end_node = loop_node -> m_end_compute_node;
19 |         } else if (topo_result[i] -> m_name[0] == "Branch") {// 如果是分支结点
20 |             // do nothing
21 |         } else {// 普通虚拟节点
22 |             VirtualNode* v_node = (VirtualNode*) topo_result[i];
23 |             if (v_node -> m_parents.size () == 0) {// 该虚拟节点没有依赖的虚拟节点
24 |                 Node* op_node = v_node -> get_op_node (idx);
25 |                 compute_graph -> add_node ("", op_node);// 向计算图中添加节点
26 |                 if (op_node -> m_name[0] == "Input") {
27 |                     ((Input*) op_node) -> op ((Input*) op_node);// 执行该计算节点
28 |                 } else {
29 |                     ((OperatorNode*) op_node) -> op ();// 执行该计算节点
30 |                 }
31 |                 end_node = op_node;
32 |             } else {
33 |                 vector<Node*> parents_op_node;
34 |                 v_node -> get_parents_op_nodes (idx, compute_graph, parents_op_node);
35 |                 int need_op_node = 1;
36 |                 for (int i = 0; i < parents_op_node.size (); ++i) {
37 |                     if (parents_op_node[i] == 0) {// 存在没有生成的依赖的计算节点
38 |                         need_op_node = 0;
39 |                     }
40 |                 }
41 |                 if (need_op_node == 1) {// 当前计算节点可以生成
42 |                     Node* op_node = v_node -> get_op_node (idx);
43 |                     for (int i = 0; i < parents_op_node.size (); ++i) {
44 |                         compute_graph -> add_node (parents_op_node[i] -> get_name (), op_node);
45 |                     }
46 |                     ((OperatorNode*) op_node) -> op ();// 执行该计算节点
47 |                     end_node = op_node;
48 |                 }
49 |             }
50 |         }
51 |     }
52 |     return end_node;
53 | }
54 | VirtualGraph::~VirtualGraph () {
55 |     cout << "virtual graph free" << endl;
56 |     // 释放虚拟节点
57 |     cout << "virtual node_map free" << endl;
58 |     unordered_map<string, Node*>::iterator node_map_it = m_node_map.begin ();
59 |     while (node_map_it != m_node_map.end ()) {
60 |         delete node_map_it -> second;
61 |         node_map_it -> second = 0;
62 |         ++node_map_it;
63 |     }
64 |     m_node_map.clear ();
65 |     m_adj_table.clear ();
66 |     m_reverse_table.clear ();
67 | }
68 | 


--------------------------------------------------------------------------------
/src/VirtualNode.cpp:
--------------------------------------------------------------------------------
  1 | #include "../include/VirtualNode.h"
  2 | #include "../include/BranchNode.h"
  3 | #include "../include/LoopNode.h"
  4 | #include "../include/op_node/Add.h"
  5 | #include "../include/op_node/Input.h"
  6 | #include "../include/op_node/Mult.h"
  7 | #include "../include/op_node/Bias.h"
  8 | #include "../include/op_node/Minus.h"
  9 | #include "../include/op_node/SquareSum.h"
 10 | #include "../include/op_node/AbsSum.h"
 11 | #include "../include/op_node/Sigmoid.h"
 12 | #include "../include/op_node/Parameter.h"
 13 | #include "../include/op_node/Dropout.h"
 14 | #include <sstream>
 15 | #include <iostream>
 16 | using namespace std;
 17 | VirtualNode::VirtualNode (string type, string id, int share_parameter, float keep_rate): Node (type, id) {
 18 |     m_share_parameter = share_parameter;
 19 |     m_keep_rate = keep_rate;
 20 |     input_op = 0;
 21 | }
 22 | void VirtualNode::get_parents_op_nodes (int idx, Graph* compute_graph, vector<Node*> &node_list) {
 23 |     ostringstream oss;
 24 |     oss << idx << ":";
 25 |     for (int i = 0; i < m_parents.size (); ++i) {
 26 |         if (m_parents[i] -> m_name[0] == "Branch") {
 27 |             node_list.push_back (((BranchNode*) m_parents[i]) -> choose_node (idx, compute_graph, (BranchNode*) m_parents[i]));
 28 |         } else if (m_parents[i] -> m_name[0] == "Loop") {
 29 |             node_list.push_back (((LoopNode*) m_parents[i]) -> m_end_compute_node);
 30 |         } else {
 31 |             string op_node_name = m_parents[i] -> get_name () + oss.str ();
 32 |             node_list.push_back (compute_graph -> get_node (op_node_name));
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | Node* VirtualNode::get_op_node (int idx) {// 一个OperatorNode工厂
 38 |     ostringstream oss;
 39 |     oss << idx;
 40 |     Node* op_node = 0;
 41 |     string op_node_name = m_name[0] + ":" + m_name[1] + ":" + oss.str () + ":";
 42 |     if (m_op_node_map.find (op_node_name) == m_op_node_map.end ()) {// 之前没生成过该计算节点
 43 |         if (m_name[0] == "Add") {
 44 |             op_node = new Add (m_name[0], m_name[1], oss.str ());
 45 |         } else if (m_name[0] == "Input") {
 46 |             if (m_input_data.size () == 0) {
 47 |                 cout << "input data is not initialize" << endl;
 48 |             } else {
 49 |                 op_node = new Input (m_name[0], m_name[1], oss.str (), m_input_data, input_op);
 50 |             }
 51 |         } else if (m_name[0] == "Parameter") {
 52 |             if (m_data == 0) {
 53 |                 cout << "parameter node is not initialize" << endl;
 54 |             } else {
 55 |                 op_node = new Parameter (m_name[0], m_name[1], oss.str (), m_data, m_share_parameter);
 56 |             }
 57 |         } else if (m_name[0] == "SquareSum") {
 58 |             op_node = new SquareSum (m_name[0], m_name[1], oss.str ());
 59 |         } else if (m_name[0] == "AbsSum") {
 60 |             op_node = new AbsSum (m_name[0], m_name[1], oss.str ());
 61 |         } else if (m_name[0] == "Mult") {
 62 |             op_node = new Mult (m_name[0], m_name[1], oss.str ());
 63 |         } else if (m_name[0] == "Minus") {
 64 |             op_node = new Minus (m_name[0], m_name[1], oss.str ());
 65 |         } else if (m_name[0] == "Sigmoid") {
 66 |             op_node = new Sigmoid (m_name[0], m_name[1], oss.str ());
 67 |         } else if (m_name[0] == "Bias") {
 68 |             op_node =  new Bias (m_name[0], m_name[1], oss.str ());
 69 |         } else if (m_name[0] == "Dropout") {
 70 |             if (m_data == 0) {
 71 |                 cout << "dropout filter shape is not set" << endl;
 72 |             } else {
 73 |                 op_node = new Dropout (m_name[0], m_name[1], oss.str (), m_data, m_keep_rate);
 74 |             }
 75 |         } else {
 76 |             cout << "op node name error" << endl;
 77 |         }
 78 |         m_op_node_map[op_node_name] = op_node;
 79 |     } else {// 直接找到虚拟节点生成过的该计算节点
 80 |         op_node = m_op_node_map[op_node_name];
 81 |     }
 82 |     return op_node;
 83 | }
 84 | VirtualNode::~VirtualNode () {
 85 |     // cout << "free virtualNode: " << get_name () << endl;
 86 |     if (m_data != 0) {
 87 |         delete m_data;
 88 |     }
 89 |     for (int i = 0; i < m_input_data.size (); ++i) {
 90 |         delete m_input_data[i];
 91 |     }
 92 |     vector<Tensor*> ().swap (m_input_data);
 93 |     // 释放每个虚拟节点生成的计算节点
 94 |     unordered_map<string, Node*>::iterator op_node_map_it = m_op_node_map.begin ();
 95 |     while (op_node_map_it != m_op_node_map.end ()) {
 96 |         delete op_node_map_it -> second;
 97 |         ++op_node_map_it;
 98 |     }
 99 |     m_op_node_map.clear ();
100 | }
101 | 


--------------------------------------------------------------------------------
/src/cuda/Tensor.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <time.h>
  3 | #include <stdlib.h>
  4 | #include "../../include/Tensor.h"
  5 | #include "../../include/cuda/cuda_lib.h"
  6 | using namespace std;
  7 | 
  8 | Tensor::Tensor (vector<int> shape, int need_init) {
  9 |     m_size = 1;
 10 |     for (int i = 0; i < shape.size (); ++i) {
 11 |         m_shape.push_back (shape[i]);
 12 |         m_size *= shape[i];
 13 |     }
 14 |     m_tensor = new float[m_size];
 15 |     if (need_init == 1) {
 16 |         for (int i = 0; i < m_size; ++i) {
 17 |             m_tensor[i] = 0.0;
 18 |         }
 19 |     }
 20 | }
 21 | 
 22 | Tensor::Tensor (vector<int> shape, float data[]) {
 23 |     m_size = 1;
 24 |     for (int i = 0; i < shape.size (); ++i) {
 25 |         m_shape.push_back (shape[i]);
 26 |         m_size *= shape[i];
 27 |     }
 28 |     m_tensor = new float[m_size];
 29 |     for (int i = 0; i < m_size; ++i) {
 30 |         m_tensor[i] = data[i];
 31 |     }
 32 | }
 33 | 
 34 | float Tensor::get_value (vector<int> idxs) {
 35 |     int idx = 0;
 36 |     int t = 1;
 37 |     for (int i = idxs.size () - 1; i >= 0; --i) {
 38 |         idx += idxs[i] * t;
 39 |         t *= m_shape[i];
 40 |     }
 41 |     return m_tensor[idx];
 42 | }
 43 | 
 44 | void Tensor::set_value (vector<int> idxs, float value) {
 45 |     int idx = 0;
 46 |     int t = 1;
 47 |     for (int i = idxs.size () - 1; i >= 0; --i) {
 48 |         idx += idxs[i] * t;
 49 |         t *= m_shape[i];
 50 |     }
 51 |     m_tensor[idx] = value;
 52 | }
 53 | 
 54 | Tensor* Tensor::matrix_mult (Tensor* tensor) {
 55 |     Tensor* result = 0;
 56 |     if (m_shape[1] == tensor -> m_shape[0]) {
 57 |         vector<int> result_shape (2);
 58 |         result_shape[0] = m_shape[0];
 59 |         result_shape[1] = tensor -> m_shape[1];
 60 |         result = new Tensor (result_shape, 0);
 61 |         // 调用cuda
 62 |         cuda_matrix_mult (m_tensor, tensor -> m_tensor, result -> m_tensor, m_shape[0], m_shape[1], tensor -> m_shape[0], tensor -> m_shape[1]);
 63 |     }
 64 |     return result;
 65 | }
 66 | 
 67 | Tensor* Tensor::scalar_mult (float scalar) {
 68 |     Tensor* result = new Tensor (m_shape, 0);
 69 |     // 调用cuda
 70 |     cuda_scalar_tensor_mult (m_tensor, result -> m_tensor, scalar, m_size);
 71 |     return result;
 72 | }
 73 | 
 74 | void Tensor::scalar_acc_mult (float scalar) {
 75 |     cuda_scalar_tensor_mult (m_tensor, m_tensor, scalar, m_size);
 76 | }
 77 | 
 78 | float Tensor::element_abs_sum () {
 79 |     float result = 0;
 80 |     // 调用cuda
 81 |     result = cuda_element_abs_sum (m_tensor, m_size);
 82 |     return result;
 83 | }
 84 | 
 85 | float Tensor::element_square_sum () {
 86 |     float result = 0;
 87 |     // 调用cuda
 88 |     result = cuda_element_square_sum (m_tensor, m_size);
 89 |     return result;
 90 | }
 91 | 
 92 | void Tensor::element_square () {
 93 |     // 调用cuda
 94 |     cuda_element_square (m_tensor, m_size);
 95 | }
 96 | 
 97 | Tensor* Tensor::element_mult (Tensor* tensor) {
 98 |     Tensor* result = 0;
 99 |     int same_shape = 1;
100 |     if (m_shape.size () == tensor -> m_shape.size ()) {
101 |         for (int i = 0; i < m_shape.size (); ++i) {
102 |             if (m_shape[i] != tensor -> m_shape[i]) {
103 |                 same_shape = 0;
104 |                 break;
105 |             }
106 |         }
107 |     } else {
108 |         same_shape = 0;
109 |     }
110 |     if (same_shape == 1) {
111 |         result = new Tensor (tensor -> m_shape, 0);
112 |         // 调用cuda
113 |         cuda_element_mult (m_tensor, tensor -> m_tensor, result -> m_tensor, m_size);
114 |     }
115 |     return result;
116 | }
117 | 
118 | void Tensor::add (Tensor* tensor, Tensor* result) {
119 |     // 调用cuda
120 |     cuda_tensor_add (m_tensor, tensor -> m_tensor, result -> m_tensor, m_size);
121 | }
122 | 
123 | Tensor* Tensor::add (Tensor* tensor) {
124 |     Tensor* result = 0;
125 |     int same_shape = 1;
126 |     if (m_shape.size () == tensor -> m_shape.size ()) {
127 |         for (int i = 0; i < m_shape.size (); ++i) {
128 |             if (m_shape[i] != tensor -> m_shape[i]) {
129 |                 same_shape = 0;
130 |                 break;
131 |             }
132 |         }
133 |     } else {
134 |         same_shape = 0;
135 |     }
136 | 
137 |     if (same_shape == 1) {
138 |         result = new Tensor (tensor -> m_shape, 0);
139 |         // 调用cuda
140 |         cuda_tensor_add (m_tensor, tensor -> m_tensor, result -> m_tensor, m_size);
141 |     }
142 |     return result;
143 | }
144 | 
145 | void Tensor::init () {
146 |     // srand (time (0));
147 |     for (int i = 0; i < m_size; ++i) {
148 |         m_tensor[i] = (rand () % 1000) / 1000.0 - 0.5;
149 |     }
150 | }
151 | 
152 | void Tensor::display () {
153 |     vector<int> idxs0 (2);
154 |     for (int i = 0; i < m_shape[0]; ++i) {
155 |         for (int j = 0; j < m_shape[1]; ++j) {
156 |             idxs0[0] = i; idxs0[1] = j;
157 |             cout << get_value (idxs0) << " ";
158 |         }
159 |         cout << endl;
160 |     }
161 | }
162 | 
163 | Tensor::~Tensor () {
164 |     delete m_tensor;
165 | }
166 | 
167 | 


--------------------------------------------------------------------------------
/src/cuda/cuda_lib.cu:
--------------------------------------------------------------------------------
  1 | #include "../../include/cuda/cuda_lib.h"
  2 | #include <math.h>
  3 | #include <stdlib.h>
  4 | #include <stdio.h>
  5 | 
  6 | __global__ void matrix_mult (float* A, float* B, float* C, int a_row, int a_col, int b_row, int b_col) {
  7 |     // share memory 缓存A和B中对应的一对子矩阵，大小为BLOCK_SIZE * BLOCK_SIZE
  8 |     __shared__ float A_sub[BLOCK_SIZE * BLOCK_SIZE];
  9 |     __shared__ float B_sub[BLOCK_SIZE * BLOCK_SIZE];
 10 |     // 获取当前线程所在的block和thread的id
 11 |     int block_id_row = blockIdx.x;
 12 |     int block_id_col = blockIdx.y;
 13 |     int thread_id_row = threadIdx.x;
 14 |     int thread_id_col = threadIdx.y;
 15 |     // 计算当前线程对应A矩阵的行号和B矩阵的列号，也就是C矩阵的行号和列号
 16 |     int c_row_id = block_id_row * BLOCK_SIZE + thread_id_row;
 17 |     int c_col_id = block_id_col * BLOCK_SIZE + thread_id_col;
 18 |     
 19 |     int sbmtx_begin = 0;
 20 |     float c = 0.0;
 21 |     float compensation = 0.0;
 22 |     for (sbmtx_begin = 0; sbmtx_begin < a_col; sbmtx_begin += BLOCK_SIZE) {// 遍历每一对A，B矩阵c_row_id，c_col_id所在行列的子区间
 23 |         // 当前线程加载A，B矩阵中对应子矩阵的指定元素，保证当前block中的线程同时加载完一对A，B子矩阵
 24 |         A_sub[thread_id_row * BLOCK_SIZE + thread_id_col] = (c_row_id < a_row && sbmtx_begin + thread_id_col < a_col) ? A[c_row_id * a_col + sbmtx_begin + thread_id_col] : 0;
 25 |         B_sub[thread_id_row * BLOCK_SIZE + thread_id_col] = (c_col_id < b_col && sbmtx_begin + thread_id_row < b_row) ? B[(sbmtx_begin + thread_id_row) * b_col + c_col_id] : 0;
 26 |         // 等待同一个block中的线程加载完毕
 27 |         __syncthreads ();
 28 |         // 计算A矩阵c_row_id行和B矩阵c_col_id列一个区间的内积，并将每个区间结果累计
 29 |         #pragma unroll
 30 |         for (int i = 0; i < BLOCK_SIZE; ++i) {
 31 |             // c += A_sub[thread_id_row * BLOCK_SIZE + i] * B_sub[i * BLOCK_SIZE + thread_id_col];
 32 |             // Kahan's Summation Formula
 33 |             float y = A_sub[thread_id_row * BLOCK_SIZE + i] * B_sub[i * BLOCK_SIZE + thread_id_col] - compensation;
 34 |             float t = c + y;// 发生舍入
 35 |             compensation = (t - c) - y;// 记录下舍入误差
 36 |             c = t;
 37 |         }
 38 |         __syncthreads ();
 39 |     }
 40 |     if (c_row_id < a_row && c_col_id < b_col) {
 41 |         C[c_row_id * b_col + c_col_id] = c;
 42 |     }
 43 | }
 44 | void cuda_matrix_mult (float* A, float* B, float* C, int a_row, int a_col, int b_row, int b_col) {// A*B=C
 45 |     int size_a = a_row * a_col;
 46 |     int size_b = b_row * b_col;
 47 |     int size_c = a_row * b_col;
 48 |     // 在显存上分配空间
 49 |     float* dev_A, *dev_B, *dev_C;
 50 |     cudaMalloc ((void**) &dev_A, sizeof (float) * size_a);
 51 |     cudaMalloc ((void**) &dev_B, sizeof (float) * size_b);
 52 |     cudaMalloc ((void**) &dev_C, sizeof (float) * size_c);
 53 |     // copy数据到显存
 54 |     cudaMemcpy (dev_A, A, sizeof (float) * size_a, cudaMemcpyHostToDevice);
 55 |     cudaMemcpy (dev_B, B, sizeof (float) * size_b, cudaMemcpyHostToDevice);
 56 |     // 把结果C矩阵分割成grid_row * grid_col个BLOCK_SIZE * BLOCK_SIZE尺寸的block，可以认为C矩阵对应一个Grid
 57 |     int grid_row = a_row / BLOCK_SIZE + (a_row % BLOCK_SIZE == 0 ? 0 : 1);
 58 |     int grid_col = b_col / BLOCK_SIZE + (b_col % BLOCK_SIZE == 0 ? 0 : 1);
 59 |     dim3 grid (grid_row, grid_col);
 60 |     dim3 block (BLOCK_SIZE, BLOCK_SIZE);
 61 |     // 运行kernal函数
 62 |     matrix_mult <<<grid, block>>> (dev_A, dev_B, dev_C, a_row, a_col, b_row, b_col);
 63 |     // 把显存数据copy回内存
 64 |     cudaMemcpy (C, dev_C, sizeof (float) * size_c, cudaMemcpyDeviceToHost);
 65 |     // 释放显存
 66 |     cudaFree (dev_A);
 67 |     cudaFree (dev_B);
 68 |     cudaFree (dev_C);
 69 | }
 70 | 
 71 | __global__ void tensor_add (float* A, float* B, float* C, int size) {
 72 |     int thread_id = threadIdx.x;
 73 |     int block_id = blockIdx.x;
 74 |     int begin_idx = block_id * BLOCK_SIZE + thread_id;
 75 |     int read_offset = GRID_SIZE * BLOCK_SIZE;
 76 |     for (int i = begin_idx; i < size; i += read_offset) {// 这种方式尽可能保证显存数据的连续读取
 77 |         C[i] = A[i] + B[i];
 78 |     }
 79 | }
 80 | void cuda_tensor_add (float* A, float* B, float* C, int size) {
 81 |     float* dev_A, *dev_B, *dev_C;
 82 |     cudaMalloc ((void**) &dev_A, sizeof (float) * size);
 83 |     cudaMalloc ((void**) &dev_B, sizeof (float) * size);
 84 |     cudaMalloc ((void**) &dev_C, sizeof (float) * size);
 85 | 
 86 |     cudaMemcpy (dev_A, A, sizeof (float) * size, cudaMemcpyHostToDevice);
 87 |     cudaMemcpy (dev_B, B, sizeof (float) * size, cudaMemcpyHostToDevice);
 88 |     tensor_add <<<GRID_SIZE, BLOCK_SIZE>>> (dev_A, dev_B, dev_C, size);
 89 |     cudaMemcpy (C, dev_C, sizeof (float) * size, cudaMemcpyDeviceToHost);
 90 |     // 释放显存
 91 |     cudaFree (dev_A);
 92 |     cudaFree (dev_B);
 93 |     cudaFree (dev_C);
 94 | }
 95 | 
 96 | __global__ void scalar_tensor_mult (float* A, float* result, float s, int size) {
 97 |     int thread_id = threadIdx.x;
 98 |     int block_id = blockIdx.x;
 99 |     int begin_idx = block_id * BLOCK_SIZE + thread_id;
100 |     int read_offset = GRID_SIZE * BLOCK_SIZE;
101 |     for (int i = begin_idx; i < size; i += read_offset) {// 这种方式尽可能保证显存数据的连续读取
102 |         result[i] = A[i] * s;
103 |     }
104 | }
105 | void cuda_scalar_tensor_mult (float* A, float* result, float s, int size) {
106 |     float* dev_A, *dev_result;
107 |     cudaMalloc ((void**) &dev_A, sizeof (float) * size);
108 |     cudaMalloc ((void**) &dev_result, sizeof (float) * size);
109 | 
110 |     cudaMemcpy (dev_A, A, sizeof (float) * size, cudaMemcpyHostToDevice);
111 |     scalar_tensor_mult <<<GRID_SIZE, BLOCK_SIZE>>> (dev_A, dev_result, s, size);
112 |     cudaMemcpy (result, dev_result, sizeof (float) * size, cudaMemcpyDeviceToHost);
113 |     // 释放显存
114 |     cudaFree (dev_A);
115 |     cudaFree (dev_result);
116 | }
117 | 
118 | __global__ void element_abs_sum (float* A, int size, float* results) {
119 |     __shared__ float sub_results[BLOCK_SIZE];
120 |     int thread_id = threadIdx.x;
121 |     int block_id = blockIdx.x;
122 | 
123 |     int begin_idx = block_id * BLOCK_SIZE + thread_id;
124 |     int read_offset = GRID_SIZE * BLOCK_SIZE;
125 |     if (begin_idx >= size) {
126 |         sub_results[thread_id] = 0;
127 |     } else {
128 |         float r = 0;
129 |         for (int i = begin_idx; i < size; i += read_offset) {
130 |             r += fabs(A[i]);
131 |         }
132 |         sub_results[thread_id] = r;
133 |     }
134 |     // 将同一个block中得到的结果汇总到global存储中的results中
135 |     __syncthreads ();
136 |     int merge_offset = 1;
137 |     int mask = 2;
138 |     while (merge_offset <= BLOCK_SIZE) {
139 |         if (thread_id % mask == 0 && thread_id + merge_offset < BLOCK_SIZE) {
140 |             sub_results[thread_id] += sub_results[thread_id + merge_offset];
141 |         }
142 |         merge_offset = merge_offset * 2;
143 |         mask = mask * 2;
144 |         __syncthreads ();
145 |     }
146 |     if (thread_id == 0) {
147 |         results[block_id] = sub_results[0];
148 |     }
149 | }
150 | float cuda_element_abs_sum (float* A, int size) {
151 |     float* results = (float*) malloc (sizeof (float) * GRID_SIZE);
152 |     float* dev_A;
153 |     float* dev_results;
154 |     cudaMalloc ((void**) &dev_A, sizeof (float) * size);
155 |     cudaMalloc ((void**) &dev_results, sizeof (float) * GRID_SIZE);
156 | 
157 |     cudaMemcpy (dev_A, A, sizeof (float) * size, cudaMemcpyHostToDevice);
158 | 
159 |     // 运行kernal函数
160 |     element_abs_sum <<<GRID_SIZE, BLOCK_SIZE>>> (dev_A, size, dev_results);
161 | 
162 |     cudaMemcpy (results, dev_results, sizeof (float) * GRID_SIZE, cudaMemcpyDeviceToHost);
163 |     cudaFree (dev_results);
164 |     cudaFree (dev_A);
165 |     float abs_sum = 0;
166 |     // 在cpu端将显卡传回的数据汇总
167 |     for (int i = 0; i < GRID_SIZE; ++i) {
168 |         abs_sum += results[i];
169 |     }
170 |     free (results);
171 |     return abs_sum;
172 | }
173 | 
174 | __global__ void element_square_sum (float* A, int size, float* results) {
175 |     __shared__ float sub_results[BLOCK_SIZE];
176 |     int thread_id = threadIdx.x;
177 |     int block_id = blockIdx.x;
178 |     
179 |     int begin_idx = block_id * BLOCK_SIZE + thread_id;
180 |     int read_offset = GRID_SIZE * BLOCK_SIZE;
181 |     if (begin_idx >= size) {
182 |         sub_results[thread_id] = 0;
183 |     } else {
184 |         float r = 0;
185 |         for (int i = begin_idx; i < size; i += read_offset) {
186 |             r += A[i] * A[i];
187 |         }
188 |         sub_results[thread_id] = r;
189 |     }
190 |     // 将同一个block中得到的结果汇总到global存储中的results中
191 |     __syncthreads ();
192 |     int merge_offset = 1;
193 |     int mask = 2;
194 |     while (merge_offset <= BLOCK_SIZE) {
195 |         if (thread_id % mask == 0 && thread_id + merge_offset < BLOCK_SIZE) {
196 |             sub_results[thread_id] += sub_results[thread_id + merge_offset];
197 |         }
198 |         merge_offset = merge_offset * 2;
199 |         mask = mask * 2;
200 |         __syncthreads ();
201 |     }
202 |     if (thread_id == 0) {
203 |         results[block_id] = sub_results[0];
204 |     }
205 | }
206 | float cuda_element_square_sum (float* A, int size) {
207 |     float* results = (float*) malloc (sizeof (float) * GRID_SIZE);
208 |     float* dev_A;
209 |     float* dev_results;
210 |     cudaMalloc ((void**) &dev_A, sizeof (float) * size);
211 |     cudaMalloc ((void**) &dev_results, sizeof (float) * GRID_SIZE);
212 | 
213 |     cudaMemcpy (dev_A, A, sizeof (float) * size, cudaMemcpyHostToDevice);
214 |     
215 |     // 运行kernal函数
216 |     element_square_sum <<<GRID_SIZE, BLOCK_SIZE>>> (dev_A, size, dev_results);
217 | 
218 |     cudaMemcpy (results, dev_results, sizeof (float) * GRID_SIZE, cudaMemcpyDeviceToHost);
219 |     cudaFree (dev_results);
220 |     cudaFree (dev_A);
221 |     float square_sum = 0;
222 |     // 在cpu端将显卡传回的数据汇总
223 |     for (int i = 0; i < GRID_SIZE; ++i) {
224 |         square_sum += results[i];
225 |     }
226 |     free (results);
227 |     return square_sum;
228 | }
229 | 
230 | __global__ void element_square (float* A, int size) {
231 |     int thread_id = threadIdx.x;
232 |     int block_id = blockIdx.x;
233 |     int begin_idx = block_id * BLOCK_SIZE + thread_id;
234 |     int read_offset = GRID_SIZE * BLOCK_SIZE;
235 |     for (int i = begin_idx; i < size; i += read_offset) {
236 |         A[i] = A[i] * A[i];
237 |     }
238 | }
239 | void cuda_element_square (float* A, int size) {
240 |     float* dev_A;
241 |     cudaMalloc ((void**) &dev_A, sizeof (float) * size);
242 | 
243 |     cudaMemcpy (dev_A, A, sizeof (float) * size, cudaMemcpyHostToDevice);
244 |     element_square <<<GRID_SIZE, BLOCK_SIZE>>> (dev_A, size);
245 |     cudaMemcpy (A, dev_A, sizeof (float) * size, cudaMemcpyDeviceToHost);
246 |     // 释放显存
247 |     cudaFree (dev_A);
248 | }
249 | 
250 | __global__ void element_mult (float* A, float* B, float* C, int size) {
251 |     int thread_id = threadIdx.x;
252 |     int block_id = blockIdx.x;
253 |     int begin_idx = block_id * BLOCK_SIZE + thread_id;
254 |     int read_offset = GRID_SIZE * BLOCK_SIZE;
255 |     for (int i = begin_idx; i < size; i += read_offset) {
256 |         C[i] = A[i] * B[i];
257 |     }
258 | }
259 | void cuda_element_mult (float* A, float* B, float* C, int size) {
260 |     float* dev_A, *dev_B, *dev_C;
261 |     cudaMalloc ((void**) &dev_A, sizeof (float) * size);
262 |     cudaMalloc ((void**) &dev_B, sizeof (float) * size);
263 |     cudaMalloc ((void**) &dev_C, sizeof (float) * size);
264 | 
265 |     cudaMemcpy (dev_A, A, sizeof (float) * size, cudaMemcpyHostToDevice);
266 |     cudaMemcpy (dev_B, B, sizeof (float) * size, cudaMemcpyHostToDevice);
267 |     element_mult <<<GRID_SIZE, BLOCK_SIZE>>> (dev_A, dev_B, dev_C, size);
268 |     cudaMemcpy (C, dev_C, sizeof (float) * size, cudaMemcpyDeviceToHost);
269 |     // 释放显存
270 |     cudaFree (dev_A);
271 |     cudaFree (dev_B);
272 |     cudaFree (dev_C);
273 | }
274 | 


--------------------------------------------------------------------------------
/src/multi_thread/matrix_task.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/multi_thread/matrix_task.h"
 2 | using namespace std;
 3 | /*矩阵乘法子任务*/
 4 | matrix_mult_task::matrix_mult_task (Tensor* A, Tensor* B, Tensor* C, int a_idx, int b_idx) {
 5 |     m_A = A;
 6 |     m_B = B;
 7 |     m_C = C;
 8 |     m_a_idx = a_idx;
 9 |     m_b_idx = b_idx;
10 | }
11 | 
12 | void matrix_mult_task::run () {
13 |     int a_col = m_A -> m_shape[1];
14 |     float c = 0;
15 |     float compensation = 0.0;
16 |     int idx0 = 0, idx1 = 0, idx2 = 0;
17 |     idx0 = m_a_idx * m_A -> m_shape[1];
18 |     idx1 = m_b_idx;
19 |     int offset = m_B -> m_shape[1];
20 |     for (int i = 0; i < a_col; ++i) {
21 |         float y = m_A -> m_tensor[idx0] * m_B -> m_tensor[idx1] - compensation;// 补偿
22 |         float t = c + y;// 发生舍入
23 |         compensation = (t - c) - y;// 记录下舍入误差
24 |         c = t;
25 |         idx0 += 1;
26 |         idx1 += offset;
27 |     }
28 |     idx2 = m_a_idx * m_C -> m_shape[1] + m_b_idx;
29 |     m_C -> m_tensor[idx2] = c;
30 | }
31 | 
32 | /*矩阵加法子任务*/
33 | matrix_add_task::matrix_add_task (Tensor* A, Tensor* B, Tensor* C, int thread_id, int thread_num) {
34 |     m_A = A;
35 |     m_B = B;
36 |     m_C = C;
37 |     m_thread_id = thread_id;
38 |     m_thread_num = thread_num;
39 | }
40 | 
41 | void matrix_add_task::run () {
42 |     for (int idx = m_thread_id; idx < m_A -> m_size; idx += m_thread_num) {
43 |         m_C -> m_tensor[idx] = m_A -> m_tensor[idx] + m_B -> m_tensor[idx];
44 |     }
45 | }
46 | 
47 | /*矩阵标量乘法子任务*/
48 | matrix_scalar_mult_task::matrix_scalar_mult_task (Tensor* A, float scalar, Tensor* C, int thread_id, int thread_num) {
49 |     m_A = A;
50 |     m_scalar = scalar;
51 |     m_C = C;
52 |     m_thread_id = thread_id;
53 |     m_thread_num = thread_num;
54 | }
55 | 
56 | void matrix_scalar_mult_task::run () {
57 |     for (int idx = m_thread_id; idx < m_A -> m_size; idx += m_thread_num) {
58 |         m_C -> m_tensor[idx] = m_A -> m_tensor[idx] * m_scalar;
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/multi_thread/ring_buffer.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/multi_thread/ring_buffer.h"
 2 | #include <stdlib.h>
 3 | ring_buffer::ring_buffer (int size) {
 4 |     m_size = 1;
 5 |     while (m_size < size) {
 6 |         m_size = m_size << 1;
 7 |     }
 8 |     m_buffer = new void*[m_size];
 9 |     pthread_mutex_init (&m_dequeue_lock, NULL);
10 |     pthread_mutex_init (&m_enqueue_lock, NULL);
11 |     m_dequeue_ptr = 0;
12 |     m_enqueue_ptr = 0;
13 | }
14 | 
15 | int ring_buffer::is_empty () {// 获取队列是否为空的快照
16 |     if (m_dequeue_ptr == m_enqueue_ptr) {
17 |         return 1;
18 |     }
19 |     return 0;
20 | }
21 | 
22 | int ring_buffer::is_full () {// 获取队列是否为满的快照
23 |     if (m_dequeue_ptr == ((m_enqueue_ptr + 1) & (m_size - 1))) {
24 |         return 1;
25 |     }
26 |     return 0;
27 | }
28 | 
29 | int ring_buffer::get_element (void** data) {
30 |     // 非阻塞获取出队锁
31 |     while (pthread_mutex_trylock (&m_dequeue_lock) != 0);
32 |     if (is_empty ()) {
33 |         pthread_mutex_unlock (&m_dequeue_lock);
34 |         return 0;// 队列为空
35 |     } else {
36 |         (*data) = m_buffer[m_dequeue_ptr];
37 |         m_dequeue_ptr = (m_dequeue_ptr + 1) & (m_size - 1);
38 |         pthread_mutex_unlock (&m_dequeue_lock);
39 |         return 1;// 获取成功
40 |     }
41 | }
42 | 
43 | int ring_buffer::add_element (void* data) {
44 |     // 非阻塞获取入队锁
45 |     while (pthread_mutex_trylock (&m_enqueue_lock) != 0);
46 |     if (is_full ()) {
47 |         pthread_mutex_unlock (&m_enqueue_lock);
48 |         return 0;// 队列已满
49 |     } else {
50 |         m_buffer[m_enqueue_ptr] = data;
51 |         m_enqueue_ptr = (m_enqueue_ptr + 1) & (m_size - 1);
52 |         pthread_mutex_unlock (&m_enqueue_lock);
53 |         return 1;// 添加成功
54 |     }
55 | }
56 | 
57 | ring_buffer::~ring_buffer () {
58 |     delete[] m_buffer;
59 | }
60 | 


--------------------------------------------------------------------------------
/src/multi_thread/thread_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/multi_thread/thread_pool.h"
 2 | #include <iostream>
 3 | #include <atomic>
 4 | using namespace std;
 5 | 
 6 | atomic_int m_finish_counter;
 7 | /*thread func*/
 8 | void *get_task (void *arg) {
 9 |     ring_buffer* rb = (ring_buffer*) arg;
10 |     while (1) {
11 |         task* t_task = 0;
12 |         int flag = rb -> get_element ((void**) (&t_task));
13 |         if (flag != 0) {// 获取成功
14 |             t_task -> run ();// 运行task
15 |             ++m_finish_counter;
16 |         }
17 |     }
18 | }
19 | 
20 | thread_pool::thread_pool () {
21 |     m_worker_num = 4;
22 |     m_task_buffers = new ring_buffer*[m_worker_num];
23 |     for (int i = 0; i < m_worker_num; ++i) {
24 |         m_task_buffers[i] = new ring_buffer (1000);
25 |     }
26 |     m_workers = new pthread_t[m_worker_num];
27 |     for (int i = 0; i < m_worker_num; ++i) {
28 |         pthread_create (&m_workers[i], NULL, get_task, m_task_buffers[i]);
29 |     }
30 | }
31 | 
32 | thread_pool* thread_pool::instance = new thread_pool ();
33 | thread_pool* thread_pool::get_instance () {
34 |     return instance;
35 | }
36 | 
37 | void thread_pool::add_job (task* t_task) {
38 |     int idx = m_idx % m_worker_num;
39 |     ring_buffer* m_task_buffer = m_task_buffers[idx];
40 |     while (m_task_buffer -> add_element ((void*) t_task) == 0);
41 |     m_idx = m_idx + 1;
42 | }
43 | 
44 | void thread_pool::add_job_list (vector<task*> job_list) {
45 |     m_finish_counter = 0;
46 |     for (int i = 0; i < job_list.size (); ++i) {
47 |         add_job (job_list[i]);
48 |     }
49 |     while (m_finish_counter < job_list.size ());
50 | }
51 | 
52 | thread_pool::~thread_pool () {
53 |     for (int i = 0; i < m_worker_num; ++i) {
54 |         delete m_task_buffers[i];
55 |     }
56 |     delete[] m_task_buffers;
57 | }
58 | 
59 | void task::run () {
60 | }
61 | 


--------------------------------------------------------------------------------
/src/op_node/AbsSum.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/op_node/AbsSum.h"
 2 | #include <math.h>
 3 | using namespace std;
 4 | AbsSum::AbsSum (string type, string id, string idx): OperatorNode (type, id, idx) {
 5 | }
 6 | void AbsSum::op () {
 7 |     Tensor* parent_output = ((OperatorNode*) m_parents[0]) -> m_output;
 8 |     vector<int> shape (2); shape[0] = 1; shape[1] = 1;
 9 |     m_output = new Tensor (shape);
10 |     m_output -> m_tensor[0] = parent_output -> element_abs_sum ();
11 | }
12 | void AbsSum::grad_op () {
13 |     Tensor* parent_output = ((OperatorNode*) m_parents[0]) -> m_output;
14 |     vector<int> shape (2); shape[0] = 1; shape[1] = parent_output -> m_size;
15 | 
16 |     Tensor grad = Tensor (shape);
17 |     vector<int> idxs (2);
18 |     for (int i = 0; i < parent_output -> m_size; ++i) {
19 |         idxs[0] = 0; idxs[1] = i;
20 |         if (fabs (parent_output -> m_tensor[i] - 0.0) < 0.1) {
21 |             grad.set_value (idxs, 0);
22 |         } else if (parent_output -> m_tensor[i] > 0.0) {
23 |             grad.set_value (idxs, 1);
24 |         } else {
25 |             grad.set_value (idxs, -1);
26 |         }
27 |     }
28 |     chain_rule (&grad, 0);
29 | }
30 | void AbsSum::release_tensor () {
31 |     if (m_sum_grad != 0) {
32 |         delete m_sum_grad;
33 |         m_sum_grad = 0;
34 |     }
35 |     delete m_output;
36 |     m_output = 0;
37 | }
38 | AbsSum::~AbsSum () {
39 |     release_tensor ();
40 | }
41 | 


--------------------------------------------------------------------------------
/src/op_node/Add.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/op_node/Add.h"
 2 | #include <iostream>
 3 | using namespace std;
 4 | Add::Add (string type, string id, string idx): OperatorNode (type, id, idx) {
 5 | }
 6 | void Add::op () {
 7 |     Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output;
 8 |     Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output;
 9 |     m_output = parent0_output -> add (parent1_output);
10 |     if (m_output == 0) {
11 |         cout << "shape is not match:" << get_name () << endl;
12 |     }
13 | }
14 | void Add::grad_op () {
15 |     Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output;
16 |     Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output;
17 |     vector<int> shape0 (2);
18 |     shape0[0] = m_output -> m_size;
19 |     shape0[1] = parent0_output -> m_size;
20 |     vector<int> shape1 (2);
21 |     shape1[0] = m_output -> m_size;
22 |     shape1[1] = parent1_output -> m_size;
23 | 
24 |     Tensor grad0 = Tensor (shape0);
25 |     Tensor grad1 = Tensor (shape1);
26 | 
27 |     vector<int> idxs (2);
28 |     // grad0
29 |     for (int i = 0; i < m_output -> m_size; ++i) {
30 |         for (int j = 0; j < parent0_output -> m_size; ++j) {
31 |             if (i == j) {
32 |                 idxs[0] = i; idxs[1] = j;
33 |                 grad0.set_value (idxs, 1);
34 |             }
35 |         }
36 |     }
37 |     // grad1
38 |     for (int i = 0; i < m_output -> m_size; ++i) {
39 |         for (int j = 0; j < parent1_output -> m_size; ++j) {
40 |             if (i == j) {
41 |                 idxs[0] = i; idxs[1] = j;
42 |                 grad1.set_value (idxs, 1);
43 |             }
44 |         }
45 |     }
46 |     // chain rule
47 |     chain_rule (&grad0, 0);
48 |     chain_rule (&grad1, 1);
49 | }
50 | void Add::release_tensor () {
51 |     if (m_sum_grad != 0) {
52 |         delete m_sum_grad;
53 |         m_sum_grad = 0;
54 |     }
55 |     delete m_output;
56 |     m_output = 0;
57 | }
58 | Add::~Add () {
59 |     release_tensor ();
60 | }
61 | 


--------------------------------------------------------------------------------
/src/op_node/Bias.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/op_node/Bias.h"
 2 | #include <iostream>
 3 | using namespace std;
 4 | Bias::Bias (string type, string id, string idx): OperatorNode (type, id, idx) {
 5 | }
 6 | void Bias::op () {
 7 |     Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output;
 8 |     Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output;// 偏置向量
 9 |     m_output = 0;
10 |     if (parent1_output -> m_shape[0] == 1 && parent0_output -> m_shape[1] == parent1_output -> m_shape[1]) {
11 |         m_output = new Tensor (parent0_output -> m_shape, 0);
12 |         for (int i = 0; i < parent0_output -> m_size; ++i) {
13 |             int j = i % parent1_output -> m_size;
14 |             m_output -> m_tensor[i] = parent0_output -> m_tensor[i] + parent1_output -> m_tensor[j];
15 |         }
16 |     }
17 |     if (m_output == 0) {
18 |         cout << "shape is not match:" << get_name () << endl;
19 |     }
20 | }
21 | void Bias::grad_op () {
22 |     Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output;
23 |     Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output;
24 |     vector<int> shape0 (2);
25 |     shape0[0] = m_output -> m_size;
26 |     shape0[1] = parent0_output -> m_size;
27 |     vector<int> shape1 (2);
28 |     shape1[0] = m_output -> m_size;
29 |     shape1[1] = parent1_output -> m_size;
30 | 
31 |     Tensor grad0 = Tensor (shape0);
32 |     Tensor grad1 = Tensor (shape1);
33 |     
34 |     vector<int> idxs (2);
35 |     // grad0
36 |     for (int i = 0; i < m_output -> m_size; ++i) {
37 |         for (int j = 0; j < parent0_output -> m_size; ++j) {
38 |             if (i == j) {
39 |                 idxs[0] = i; idxs[1] = j;
40 |                 grad0.set_value (idxs, 1);
41 |             }
42 |         }
43 |     }
44 |     // grad1
45 |     for (int i = 0; i < m_output -> m_size; ++i) {
46 |         for (int j = 0; j < parent1_output -> m_size; ++j) {
47 |             if (i % parent1_output -> m_size == j) {
48 |                 idxs[0] = i; idxs[1] = j;
49 |                 grad1.set_value (idxs, 1);
50 |             }
51 |         }
52 |     }
53 | 
54 |     // chain rule
55 |     chain_rule (&grad0, 0);
56 |     chain_rule (&grad1, 1);
57 | }
58 | void Bias::release_tensor () {
59 |     if (m_sum_grad != 0) {
60 |         delete m_sum_grad;
61 |         m_sum_grad = 0;
62 |     }
63 |     delete m_output;
64 |     m_output = 0;
65 | }
66 | Bias::~Bias () {
67 |     release_tensor ();
68 | }
69 | 


--------------------------------------------------------------------------------
/src/op_node/Dropout.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/op_node/Dropout.h"
 2 | #include <time.h>
 3 | #include <stdlib.h>
 4 | #include <iostream>
 5 | using namespace std;
 6 | Dropout::Dropout (string type, string id, string idx, Tensor* filter, float keep_rate): OperatorNode (type, id, idx) {
 7 |     m_filter = new Tensor (filter -> m_shape, 0);
 8 |     m_keep_rate = keep_rate;
 9 | }
10 | void Dropout::op () {
11 |     float n = 1.0 / m_keep_rate;
12 |     srand (time (0));
13 |     for (int i = 0; i < m_filter -> m_size; ++i) {
14 |         float r = (rand () % 1000) / 1000.0;
15 |         if (r > m_keep_rate) {
16 |             m_filter -> m_tensor[i] = 0.0;
17 |         } else {
18 |             m_filter -> m_tensor[i] = n;
19 |         }
20 |     }
21 | 
22 |     Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output;
23 |     m_output = parent0_output -> element_mult (m_filter);
24 |     if (m_output == 0) {
25 |         cout << "filter size error" << endl;
26 |     }
27 | }
28 | void Dropout::grad_op () {
29 |     Tensor* parent_output = ((OperatorNode*) m_parents[0]) -> m_output;
30 |     vector<int> shape (2); shape[0] = m_output -> m_size; shape[1] = parent_output -> m_size;
31 | 
32 |     Tensor grad = Tensor (shape);
33 |     vector<int> idxs (2);
34 |     for (int i = 0; i < m_output -> m_size; ++i) {
35 |         for (int j = 0; j < parent_output -> m_size; ++j) {
36 |             if (i == j) {
37 |                 idxs[0] = i; idxs[1] = j;
38 |                 grad.set_value (idxs, m_filter -> m_tensor[i]);
39 |             }
40 |         }
41 |     }
42 |     // chain rule
43 |     chain_rule (&grad, 0);
44 | }
45 | void Dropout::release_tensor () {
46 |     if (m_sum_grad != 0) {
47 |         delete m_sum_grad;
48 |         m_sum_grad = 0;
49 |     }
50 |     delete m_output;
51 |     m_output = 0;
52 | }
53 | Dropout::~Dropout () {
54 |     release_tensor ();
55 |     delete m_filter;
56 |     m_filter = 0;
57 | }
58 | 


--------------------------------------------------------------------------------
/src/op_node/Input.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/op_node/Input.h"
 2 | using namespace std;
 3 | void input_default_op (Input* input) {
 4 |     input -> m_output = input -> m_data[input -> m_data_ptr];
 5 |     input -> m_data_ptr = (input -> m_data_ptr + 1) % input -> m_data.size ();
 6 | }
 7 | Input::Input (string type, string id, string idx, vector<Tensor*> input_data, void (*func) (Input*)): OperatorNode (type, id, idx) {
 8 |     m_data_ptr = 0;
 9 |     m_data = input_data;
10 |     if (func == 0) {
11 |         op = &input_default_op;
12 |     } else {
13 |         op = func;
14 |     }
15 | }
16 | 
17 | void Input::release_tensor () {
18 |     if (m_sum_grad != 0) {
19 |         delete m_sum_grad;
20 |         m_sum_grad = 0;
21 |     }
22 | }
23 | Input::~Input () {
24 |     release_tensor ();
25 | }
26 | 


--------------------------------------------------------------------------------
/src/op_node/Minus.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/op_node/Minus.h"
 2 | using namespace std;
 3 | Minus::Minus (string type, string id, string idx): OperatorNode (type, id, idx) {
 4 | }
 5 | void Minus::op () {
 6 |     Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output;
 7 |     Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output;
 8 |     m_output = parent1_output -> scalar_mult (-1.0);
 9 |     parent0_output -> add (m_output, m_output);
10 | }
11 | void Minus::grad_op () {
12 |     Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output;
13 |     Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output;
14 |     vector<int> shape0 (2);
15 |     shape0[0] = m_output -> m_size;
16 |     shape0[1] = parent0_output -> m_size;
17 |     vector<int> shape1 (2);
18 |     shape1[0] = m_output -> m_size;
19 |     shape1[1] = parent1_output -> m_size;
20 | 
21 |     Tensor grad0 = Tensor (shape0);
22 |     Tensor grad1 = Tensor (shape1);
23 |     
24 |     vector<int> idxs (2);
25 |     // grad0
26 |     for (int i = 0; i < m_output -> m_size; ++i) {
27 |         for (int j = 0; j < parent0_output -> m_size; ++j) {
28 |             if (i == j) {
29 |                 idxs[0] = i; idxs[1] = j;
30 |                 grad0.set_value (idxs, 1);
31 |             }
32 |         }
33 |     }
34 |     // grad1
35 |     for (int i = 0; i < m_output -> m_size; ++i) {
36 |         for (int j = 0; j < parent1_output -> m_size; ++j) {
37 |             if (i == j) {
38 |                 idxs[0] = i; idxs[1] = j;
39 |                 grad1.set_value (idxs, -1);
40 |             }
41 |         }
42 |     }
43 | 
44 |     // chain rule
45 |     chain_rule (&grad0, 0);
46 |     chain_rule (&grad1, 1);
47 | }
48 | void Minus::release_tensor () {
49 |     if (m_sum_grad != 0) {
50 |         delete m_sum_grad;
51 |         m_sum_grad = 0;
52 |     }
53 |     delete m_output;
54 |     m_output = 0;
55 | }
56 | Minus::~Minus () {
57 |     release_tensor ();
58 | }
59 | 


--------------------------------------------------------------------------------
/src/op_node/Mult.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/op_node/Mult.h"
 2 | #include <iostream>
 3 | using namespace std;
 4 | Mult::Mult (string type, string id, string idx): OperatorNode (type, id, idx) {
 5 | }
 6 | void Mult::op () {
 7 |     Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output;
 8 |     Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output;
 9 |     m_output = parent0_output -> matrix_mult (parent1_output);
10 |     if (m_output == 0) {
11 |         cout << "shape is not match:" << get_name () << endl;
12 |     }
13 | }
14 | void Mult::grad_op () {
15 |     Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output;
16 |     Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output;
17 |     vector<int> shape0 (2);
18 |     shape0[0] = m_output -> m_size;
19 |     shape0[1] = parent0_output -> m_size;
20 |     vector<int> shape1 (2);
21 |     shape1[0] = m_output -> m_size;
22 |     shape1[1] = parent1_output -> m_size;
23 | 
24 |     Tensor grad0 = Tensor (shape0);
25 |     Tensor grad1 = Tensor (shape1);
26 | 
27 |     vector<int> idxs0 (2);
28 |     vector<int> idxs1 (2);
29 |     // grad0
30 |     for (int i = 0; i < m_output -> m_shape[0]; ++i) {
31 |         for (int j = 0; j < m_output -> m_shape[1]; ++j) {
32 |             for (int l = 0; l < parent0_output -> m_shape[0]; ++l) {
33 |                 for (int m = 0; m < parent0_output -> m_shape[1]; ++m) {
34 |                     idxs0[0] = i * m_output -> m_shape[1] + j;
35 |                     idxs0[1] = l * parent0_output -> m_shape[1] + m;
36 |                     if (i == l) {
37 |                         idxs1[0] = m;
38 |                         idxs1[1] = j;
39 |                         grad0.set_value (idxs0, parent1_output -> get_value (idxs1));
40 |                     } else {
41 |                         grad0.set_value (idxs0, 0);
42 |                     }
43 |                 }
44 |             }
45 |         }
46 |     }
47 |     // grad1
48 |     for (int i = 0; i < m_output -> m_shape[0]; ++i) {
49 |         for (int j = 0; j < m_output -> m_shape[1]; ++j) {
50 |             for (int l = 0; l < parent1_output -> m_shape[0]; ++l) {
51 |                 for (int m = 0; m < parent1_output -> m_shape[1]; ++m) {
52 |                     idxs0[0] = i * m_output -> m_shape[1] + j;
53 |                     idxs0[1] = l * parent1_output -> m_shape[1] + m;
54 |                     if (j == m) {
55 |                         idxs1[0] = i;
56 |                         idxs1[1] = l;
57 |                         grad1.set_value (idxs0, parent0_output -> get_value (idxs1));
58 |                     } else {
59 |                         grad1.set_value (idxs0, 0);
60 |                     }
61 |                 }
62 |             }
63 |         }
64 |     }
65 |     // chain rule
66 |     chain_rule (&grad0, 0);
67 |     chain_rule (&grad1, 1);
68 | }
69 | void Mult::release_tensor () {
70 |     if (m_sum_grad != 0) {
71 |         delete m_sum_grad;
72 |         m_sum_grad = 0;
73 |     }
74 |     delete m_output;
75 |     m_output = 0;
76 | }
77 | Mult::~Mult () {
78 |     release_tensor ();
79 | }
80 | 


--------------------------------------------------------------------------------
/src/op_node/Parameter.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/op_node/Parameter.h"
 2 | using namespace std;
 3 | Parameter::Parameter (string type, string id, string idx, Tensor* data, int share_data): OperatorNode (type, id, idx) {
 4 |     m_share_data = share_data;
 5 |     if (share_data == 0) {
 6 |         m_output = new Tensor (data -> m_shape, data -> m_tensor);
 7 |     } else {
 8 |         m_output = data;
 9 |     }
10 | }
11 | void Parameter::release_tensor () {
12 |     if (m_sum_grad != 0) {
13 |         delete m_sum_grad;
14 |         m_sum_grad = 0;
15 |     }
16 | }
17 | Parameter::~Parameter () {
18 |     release_tensor ();
19 |     if (m_share_data == 0) {
20 |         delete m_output;
21 |     }
22 |     m_output = 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/src/op_node/Sigmoid.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/op_node/Sigmoid.h"
 2 | #include <cmath>
 3 | using namespace std;
 4 | Sigmoid::Sigmoid (string type, string id, string idx): OperatorNode (type, id, idx) {
 5 | }
 6 | void Sigmoid::op () {
 7 |     Tensor* parent_output = ((OperatorNode*) m_parents[0]) -> m_output;
 8 |     m_output = new Tensor (parent_output -> m_shape);
 9 |     for (int i = 0; i < parent_output -> m_size; ++i) {
10 |         m_output -> m_tensor[i] = 1.0 / (1 + pow (2.718, 0 - parent_output -> m_tensor[i]));
11 |     }
12 | }
13 | void Sigmoid::grad_op () {
14 |     Tensor* parent_output = ((OperatorNode*) m_parents[0]) -> m_output;
15 |     vector<int> shape (2); shape[0] = m_output -> m_size; shape[1] = parent_output -> m_size;
16 |     Tensor grad = Tensor (shape);
17 |     
18 |     vector<int> idxs (2);
19 |     // grad
20 |     for (int i = 0; i < m_output -> m_size; ++i) {
21 |         for (int j = 0; j < parent_output -> m_size; ++j) {
22 |             idxs[0] = i;
23 |             idxs[1] = j;
24 |             if (i == j) {
25 |                 grad.set_value (idxs, m_output -> m_tensor[i] * (1 - m_output -> m_tensor[i]));
26 |             }
27 |         }
28 |     }
29 |     chain_rule (&grad, 0);
30 | }
31 | void Sigmoid::release_tensor () {
32 |     if (m_sum_grad != 0) {
33 |         delete m_sum_grad;
34 |         m_sum_grad = 0;
35 |     }
36 |     delete m_output;
37 |     m_output = 0;
38 | }
39 | Sigmoid::~Sigmoid () {
40 |     release_tensor ();
41 | }
42 | 


--------------------------------------------------------------------------------
/src/op_node/SquareSum.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/op_node/SquareSum.h"
 2 | using namespace std;
 3 | SquareSum::SquareSum (string type, string id, string idx): OperatorNode (type, id, idx) {
 4 | }
 5 | void SquareSum::op () {
 6 |     Tensor* parent_output = ((OperatorNode*) m_parents[0]) -> m_output;
 7 |     vector<int> shape (2); shape[0] = 1; shape[1] = 1;
 8 |     m_output = new Tensor (shape);
 9 |     m_output -> m_tensor[0] = parent_output -> element_square_sum ();
10 | }
11 | void SquareSum::grad_op () {
12 |     Tensor* parent_output = ((OperatorNode*) m_parents[0]) -> m_output;
13 |     vector<int> shape (2); shape[0] = 1; shape[1] = parent_output -> m_size;
14 | 
15 |     Tensor grad = Tensor (shape);
16 |     vector<int> idxs (2);
17 |     for (int i = 0; i < parent_output -> m_size; ++i) {
18 |         idxs[0] = 0; idxs[1] = i;
19 |         grad.set_value (idxs, parent_output -> m_tensor[i] * 2);
20 |     }
21 |     chain_rule (&grad, 0);
22 | }
23 | void SquareSum::release_tensor () {
24 |     if (m_sum_grad != 0) {
25 |         delete m_sum_grad;
26 |         m_sum_grad = 0;
27 |     }
28 |     delete m_output;
29 |     m_output = 0;
30 | }
31 | SquareSum::~SquareSum () {
32 |     release_tensor ();
33 | }
34 | 


--------------------------------------------------------------------------------
/src/optimizer/Adadelta.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/optimizer/Adadelta.h"
 2 | #include "../../include/OperatorNode.h"
 3 | #include <math.h>
 4 | #include <iostream>
 5 | using namespace std;
 6 | Adadelta::Adadelta (float a): Optimizer (a) {
 7 |     m_epsl = 0.5;
 8 |     m_lambda = 0.3;
 9 | }
10 | void Adadelta::optimize (vector<Node*> &topo_results) {
11 |     for (int i = 0; i < topo_results.size (); ++i) {// 计算梯度
12 |         ((OperatorNode*) topo_results[i]) -> grad_op ();
13 |     }
14 |     for (int i = 0; i < topo_results.size (); ++i) {// 更新
15 |         OperatorNode* op_node = (OperatorNode*) topo_results[i];
16 |         if (op_node -> m_sum_grad != 0 && op_node -> m_name[0] == "Parameter") {
17 |             string name = op_node -> get_name ();
18 |             if (m_tensor_store.find (name) == m_tensor_store.end ()) {// 不存在累计梯度
19 |                 for (int j = 0; j < op_node -> m_output -> m_size; ++j) {
20 |                     op_node -> m_output -> m_tensor[j] -= m_a * op_node -> m_sum_grad -> m_tensor[j];
21 |                 }
22 |                 Tensor* tensor = new Tensor (op_node -> m_sum_grad -> m_shape, op_node -> m_sum_grad -> m_tensor);
23 |                 tensor -> element_square ();
24 |                 m_tensor_store[name] = tensor;
25 |             } else {
26 |                 Tensor* acc_grad = m_tensor_store[name];
27 |                 for (int j = 0; j < acc_grad -> m_size; ++j) {
28 |                     op_node -> m_output -> m_tensor[j] -= m_a * op_node -> m_sum_grad -> m_tensor[j]
29 |                                                           / (sqrt (acc_grad -> m_tensor[j]) + m_epsl);
30 |                 }
31 |                 // 更新累积梯度
32 |                 op_node -> m_sum_grad -> element_square ();
33 |                 op_node -> m_sum_grad -> scalar_acc_mult (m_lambda);
34 |                 acc_grad -> scalar_acc_mult (1 - m_lambda);
35 |                 acc_grad -> add (op_node -> m_sum_grad, acc_grad);
36 |             }
37 |         }
38 |     }
39 | }
40 | Adadelta::~Adadelta () {
41 |     unordered_map <string, Tensor*>::iterator tensor_store_it = m_tensor_store.begin ();
42 |     while (tensor_store_it != m_tensor_store.end ()) {
43 |         delete tensor_store_it -> second;
44 |         ++tensor_store_it;
45 |     }
46 |     m_tensor_store.clear ();
47 | }
48 | 


--------------------------------------------------------------------------------
/unit_test/graph_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "../include/Graph.h"
 2 | #include "../include/Node.h"
 3 | #include <vector>
 4 | #include <iostream>
 5 | using namespace std;
 6 | int main () {
 7 |     Node* a = new Node ("a", "0");
 8 |     Node* b = new Node ("b", "0");
 9 |     Node* c = new Node ("c", "0");
10 |     Node* d = new Node ("d", "0");
11 |     Node* e = new Node ("e", "0");
12 |     Node* f = new Node ("f", "0");
13 | 
14 |     // 构建图
15 |     Graph graph;
16 |     graph.add_node ("", a);
17 |     graph.add_node ("", b);
18 |     graph.add_node (a -> get_name (), c);
19 |     graph.add_node (b -> get_name (), c);
20 |     graph.add_node ("", d);
21 |     graph.add_node (c -> get_name (), e);
22 |     graph.add_node (d -> get_name (), e);
23 |     graph.add_node (c -> get_name (), f);
24 |     // 构建转置图
25 |     graph.build_reverse_graph ();    
26 |     // 构建子图, 基于图改写
27 |     vector<Node*> endnode_list;
28 |     endnode_list.push_back (e);
29 |     graph.build_subgraph (endnode_list);
30 |     // 拓扑排序
31 |     vector<Node*> toposort_result;
32 |     graph.topological_sort (graph.m_adj_table, toposort_result);// 原图拓扑排序
33 |     for (int i = 0; i < toposort_result.size (); ++i) {
34 |         cout << toposort_result[i] -> get_name () << " ";
35 |     }
36 |     cout << endl;
37 |     vector<Node*> reverse_toposort_result;
38 |     graph.topological_sort (graph.m_reverse_table, reverse_toposort_result);// 转置图拓扑排序
39 |     for (int i = 0; i < reverse_toposort_result.size (); ++i) {
40 |         cout << reverse_toposort_result[i] -> get_name () << " ";
41 |     }
42 |     cout << endl;
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/unit_test/operatorNode_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "../include/op_node/Add.h"
 2 | #include "../include/op_node/Bias.h"
 3 | #include "../include/op_node/SquareSum.h"
 4 | #include "../include/op_node/Minus.h"
 5 | #include "../include/op_node/AbsSum.h"
 6 | #include "../include/op_node/Mult.h"
 7 | #include "../include/op_node/Sigmoid.h"
 8 | #include "../include/op_node/Parameter.h"
 9 | #include "../include/op_node/Dropout.h"
10 | #include "../include/ComputeGraph.h"
11 | #include "../include/Tensor.h"
12 | #include "../include/OperatorNode.h"
13 | #include "../include/Optimizer.h"
14 | #include <sys/time.h>
15 | #include <vector>
16 | #include <iostream>
17 | using namespace std;
18 | 
19 | int main () {
20 |     vector<int> shape1; shape1.push_back (2); shape1.push_back (2);
21 |     vector<int> shape2; shape2.push_back (2); shape2.push_back (2);
22 |     Tensor* t1 = new Tensor (shape1);
23 |     Tensor* t2 = new Tensor (shape2);
24 |     t1 -> init ();
25 |     t2 -> init ();
26 |     t1 -> display (); cout << endl;
27 |     t2 -> display ();
28 | 
29 |     // 生成计算节点
30 |     OperatorNode* p1 = new Parameter ("Parameter", "1", "0", t1);
31 |     OperatorNode* p2 = new Parameter ("Parameter", "2", "0", t2);
32 |     OperatorNode* add = new Add ("Add", "1", "0");
33 |     OperatorNode* minus = new Minus ("Minus", "1", "0");
34 |     OperatorNode* mult = new Mult ("Mult", "1", "0");
35 |     OperatorNode* sigmoid = new Sigmoid ("Sigmoid", "1", "0");
36 |     OperatorNode* square_sum = new SquareSum ("SquareSum", "1", "0");
37 |     OperatorNode* abs_sum = new AbsSum ("AbsSum", "1", "0");
38 |     OperatorNode* b = new Bias ("Bias", "1", "0");
39 |     OperatorNode* dropout = new Dropout ("Dropout", "1", "0", t2, 0.5);
40 | 
41 |     // 构建计算图
42 |     ComputeGraph cg;
43 |     cg.add_node ("", p1);
44 |     cg.add_node ("", p2);
45 |     cg.add_node (p1 -> get_name (), add);
46 |     cg.add_node (p2 -> get_name (), add);// 测试加法
47 |     //cg.add_node (p1 -> get_name (), minus);
48 |     //cg.add_node (p2 -> get_name (), minus);// 测试减法
49 |     //cg.add_node (p1 -> get_name (), mult);
50 |     //cg.add_node (p2 -> get_name (), mult);// 测试乘法
51 |     //cg.add_node (p1 -> get_name (), sigmoid);// 测试sigmoid
52 |     //cg.add_node (p1 -> get_name (), square_sum);// 测试SquareSum
53 |     //cg.add_node (p1 -> get_name (), abs_sum);// 测试AbsSum
54 |     //cg.add_node (p1 -> get_name (), b);
55 |     //cg.add_node (p2 -> get_name (), b);// 测试偏置
56 |     //cg.add_node (p1 -> get_name (), dropout);// 测试dropout
57 |     // 构建转置图
58 |     cg.build_reverse_graph ();
59 |     // 初始化优化器，普通sgd
60 |     Optimizer* optimizer = new Optimizer (0.1);
61 |     cg.m_optimizer = optimizer;
62 | 
63 |     struct timeval start,end;  
64 |     gettimeofday(&start, 0);  
65 |     for (int i = 0; i < 1; ++i) {
66 |         vector<Node*> result;
67 |         cg.forward_propagation (result);
68 |         cout << "fp result:................." << endl;
69 |         ((OperatorNode*) result[0]) -> m_output -> display (); cout << endl;// 前向结果
70 |         //((OperatorNode*) result[1]) -> m_output -> display ();
71 |         cg.back_propagation ();
72 |         cout << "bp result:................." << endl;
73 |         p1 -> m_sum_grad -> display (); cout << endl;
74 |         p2 -> m_sum_grad -> display ();// 反向结果
75 |         cout << "new parameter:............." << endl;
76 |         p1 -> m_output -> display (); cout << endl;
77 |         p2 -> m_output -> display ();
78 |         cg.release_tensor ();
79 |     }
80 |     gettimeofday(&end, 0);  
81 |     long timeuse =1000000 * ( end.tv_sec - start.tv_sec ) + end.tv_usec - start.tv_usec;  
82 |     printf("time=%f\n",timeuse /1000000.0);  
83 | }
84 | 


--------------------------------------------------------------------------------
/unit_test/rnn_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "../include/Tensor.h"
  2 | #include "../include/op_node/Input.h"
  3 | #include "../include/VirtualNode.h"
  4 | #include "../include/OperatorNode.h"
  5 | #include "../include/VirtualGraph.h"
  6 | #include "../include/ComputeGraph.h"
  7 | #include "../include/Optimizer.h"
  8 | #include "../include/optimizer/Adadelta.h"
  9 | #include "../include/BranchNode.h"
 10 | #include "../include/LoopNode.h"
 11 | #include <vector>
 12 | #include <iostream>
 13 | #include <sstream>
 14 | #include <stdlib.h>
 15 | using namespace std;
 16 | Tensor* int_to_tensor (int a) {// 把int转化为8位01串，左边是低位，右边是高位
 17 |     float* data = new float[8];
 18 |     vector<int> shape (2); shape[0] = 1; shape[1] = 8;
 19 |     int mask = 1;
 20 |     for (int i = 0; i < 8; ++i) {
 21 |         if ((mask & a) == 0) {
 22 |             data[i] = 0;
 23 |         } else {
 24 |             data[i] = 1;
 25 |         }
 26 |         mask = mask << 1;
 27 |     }
 28 |     return new Tensor (shape, data);
 29 | }
 30 | int tensor_to_int (Tensor* tensor) {
 31 |     int result = 0;
 32 |     for (int i = 7; i >= 0; --i) {
 33 |         int a = tensor -> m_tensor[i] < 0.5 ? 0 : 1;
 34 |         result = result * 2 + a;
 35 |     }
 36 |     return result;
 37 | }
 38 | void prepare_data (int num, vector<Tensor*> &add_nums, vector<Tensor*> &sums) {
 39 |     for (int i = 0; i < num; ++i) {
 40 |         int a = rand () % 128;
 41 |         int b = rand () % 128;
 42 |         int c = a + b;
 43 |         Tensor* t_a = int_to_tensor (a);
 44 |         Tensor* t_b = int_to_tensor (b);
 45 |         Tensor* t_c = int_to_tensor (c);
 46 |         add_nums.push_back (t_a);
 47 |         add_nums.push_back (t_b);
 48 |         sums.push_back (t_c);
 49 |     }
 50 | }
 51 | // rnn中的分支选择函数
 52 | Node* choose_node (int idx, Graph* compute_graph, BranchNode* branch_node) {
 53 |     ostringstream oss;
 54 |     if (idx == 0) {
 55 |         int batch_size = 1;
 56 |         int hidden_size = 8;
 57 |         vector<int> shape (2);
 58 |         shape[0]  = batch_size; shape[1] = hidden_size;
 59 |         Tensor* init_tensor = new Tensor (shape);
 60 |         vector<Tensor*> data; data.push_back (init_tensor);
 61 |         Node* init_input = new Input ("Input", "init", "0", data);
 62 |         compute_graph -> add_node ("", init_input);
 63 |         ((Input*) init_input) -> op ((Input*) init_input);
 64 |         return init_input;
 65 |     } else {
 66 |         oss << (idx - 1) << ":";
 67 |         string name = "Mult:h:" + oss.str ();
 68 |         return compute_graph -> get_node (name);
 69 |     }
 70 | }
 71 | // rnn中的循环init,condition函数
 72 | void init (LoopNode* loop_node) {
 73 | }
 74 | int condition (Graph* compute_graph, int idx) {
 75 |     if (idx < 8) {
 76 |         return 0;
 77 |     } else {
 78 |         return 1;
 79 |     }
 80 | }
 81 | // rnn的输入函数，简单的数据输入预处理
 82 | void rnn_input_x (Input* input) {
 83 |     int batch_size = 1;
 84 |     vector<int> shape (2); shape[0] = batch_size; shape[1] = 2;
 85 |     float* data = new float[2 * batch_size];
 86 |     for (int i = 0; i < batch_size; ++i) {
 87 |         Tensor* a = input -> m_data[input -> m_data_ptr];
 88 |         Tensor* b = input -> m_data[input -> m_data_ptr + 1];
 89 |         int idx = atoi (input -> m_name[2].c_str ());
 90 |         data[i * 2] = a -> m_tensor[idx];
 91 |         data[i * 2 + 1] = b -> m_tensor[idx];
 92 |         input -> m_data_ptr = (input -> m_data_ptr + 2) % input -> m_data.size ();
 93 |     }
 94 |     input -> m_output = new Tensor (shape, data);
 95 | }
 96 | void rnn_input_y (Input* input) {
 97 |     int batch_size = 1;
 98 |     float* data = new float[1 * batch_size];
 99 |     vector<int> shape (2); shape[0] = batch_size; shape[1] = 1;
100 |     for (int i = 0; i < batch_size; ++i) {
101 |         Tensor* a = input -> m_data[input -> m_data_ptr];
102 |         int idx = atoi (input -> m_name[2].c_str ());
103 |         data[i] = a -> m_tensor[idx];
104 |         input -> m_data_ptr = (input -> m_data_ptr + 1) % input -> m_data.size ();
105 |     }
106 |     input -> m_output = new Tensor (shape, data);
107 | }
108 | 
109 | int main () {
110 |     // 准备数据集
111 |     vector<Tensor*> add_nums;
112 |     vector<Tensor*> sums;
113 |     prepare_data (10000, add_nums, sums);
114 | 
115 |     int hidden_size = 8;
116 | 
117 |     vector<int> shape_w1 (2); shape_w1[0] = 2; shape_w1[1] = hidden_size;
118 |     Tensor* t_w1 = new Tensor (shape_w1);
119 |     t_w1 -> init ();
120 | 
121 |     vector<int> shape_w2 (2); shape_w2[0] = hidden_size; shape_w2[1] = 1;
122 |     Tensor* t_w2 = new Tensor (shape_w2);
123 |     t_w2 -> init ();
124 |     
125 |     vector<int> shape_wh (2); shape_wh[0] = hidden_size; shape_wh[1] = hidden_size;
126 |     Tensor* t_wh = new Tensor (shape_wh);
127 |     t_wh -> init ();
128 | 
129 |     vector<int> shape_b1 (2); shape_b1[0] = 1; shape_b1[1] = hidden_size;
130 |     Tensor* t_b1 = new Tensor (shape_b1);
131 |     t_b1 -> init ();
132 | 
133 |     vector<int> shape_b2 (2); shape_b2[0] = 1; shape_b2[1] = 1;
134 |     Tensor* t_b2 = new Tensor (shape_b2);
135 |     t_b2 -> init ();
136 | 
137 |     // 准备虚拟节点
138 |     VirtualNode* input_x = new VirtualNode ("Input", "1");
139 |     input_x -> m_input_data = add_nums;
140 |     input_x -> input_op = &rnn_input_x;
141 | 
142 |     VirtualNode* wh = new VirtualNode ("Parameter", "wh", 1);
143 |     wh -> m_data = t_wh;
144 | 
145 |     VirtualNode* w1 = new VirtualNode ("Parameter", "w1", 1);
146 |     w1 -> m_data = t_w1;
147 | 
148 |     VirtualNode* multh = new VirtualNode ("Mult", "h");
149 |     VirtualNode* mult1 = new VirtualNode ("Mult", "1");
150 |     BranchNode* branch = new BranchNode ("Branch", "1", &choose_node);
151 |     VirtualNode* add1 = new VirtualNode ("Add", "1");
152 | 
153 |     VirtualNode* b1 = new VirtualNode ("Parameter", "b1", 1);
154 |     b1 -> m_data = t_b1;
155 |     
156 |     VirtualNode* bias1 = new VirtualNode ("Bias", "1");
157 |     VirtualNode* sigmoid1 = new VirtualNode ("Sigmoid", "1");
158 |     
159 |     VirtualNode* w2 = new VirtualNode ("Parameter", "w2", 1);
160 |     w2 -> m_data = t_w2;
161 | 
162 |     VirtualNode* mult2 = new VirtualNode ("Mult", "2");
163 |     
164 |     VirtualNode* b2 = new VirtualNode ("Parameter", "b2", 1);
165 |     b2 -> m_data = t_b2;
166 | 
167 |     VirtualNode* bias2 = new VirtualNode ("Bias", "2");
168 |     VirtualNode* sigmoid2 = new VirtualNode ("Sigmoid", "2");
169 | 
170 |     VirtualNode* input_y = new VirtualNode ("Input", "2");
171 |     input_y -> m_input_data = sums;
172 |     input_y -> input_op = &rnn_input_y;
173 |     
174 |     VirtualNode* minus = new VirtualNode ("Minus", "1");
175 |     VirtualNode* abs = new VirtualNode ("AbsSum", "1");
176 | 
177 |     LoopNode* loop = new LoopNode ("Loop", "1", &init, &condition);
178 | 
179 |     // 构建虚拟图
180 |     loop -> m_sub_vgraph -> add_node ("", input_x);
181 |     loop -> m_sub_vgraph -> add_node ("", w1);
182 |     loop -> m_sub_vgraph -> add_node (input_x -> get_name (), mult1);
183 |     loop -> m_sub_vgraph -> add_node (w1 -> get_name (), mult1);
184 |     loop -> m_sub_vgraph -> add_node ("", branch);
185 |     loop -> m_sub_vgraph -> add_node (branch -> get_name (), add1);
186 |     loop -> m_sub_vgraph -> add_node (mult1 -> get_name (), add1);
187 |     loop -> m_sub_vgraph -> add_node ("", b1);
188 |     loop -> m_sub_vgraph -> add_node (add1 -> get_name (), bias1);
189 |     loop -> m_sub_vgraph -> add_node (b1 -> get_name (), bias1);
190 |     loop -> m_sub_vgraph -> add_node (bias1 -> get_name (), sigmoid1);
191 | 
192 |     loop -> m_sub_vgraph -> add_node ("", wh);// 为下一次循环做准备
193 |     loop -> m_sub_vgraph -> add_node (sigmoid1 -> get_name (), multh);
194 |     loop -> m_sub_vgraph -> add_node (wh -> get_name (), multh);
195 | 
196 |     loop -> m_sub_vgraph -> add_node ("", w2);
197 |     loop -> m_sub_vgraph -> add_node (sigmoid1 -> get_name (), mult2);
198 |     loop -> m_sub_vgraph -> add_node (w2 -> get_name (), mult2);
199 |     loop -> m_sub_vgraph -> add_node ("", b2);
200 |     loop -> m_sub_vgraph -> add_node (mult2 -> get_name (), bias2);
201 |     loop -> m_sub_vgraph -> add_node (b2 -> get_name (), bias2);
202 |     loop -> m_sub_vgraph -> add_node (bias2 -> get_name (), sigmoid2);
203 |     loop -> m_sub_vgraph -> add_node ("", input_y);
204 |     loop -> m_sub_vgraph -> add_node (sigmoid2 -> get_name (), minus);
205 |     loop -> m_sub_vgraph -> add_node (input_y -> get_name (), minus);
206 |     loop -> m_sub_vgraph -> add_node (minus -> get_name (), abs);
207 |     
208 |     VirtualGraph* vg = new VirtualGraph ();
209 |     vg -> add_node ("", loop);
210 |     // 构建计算图
211 |     ComputeGraph* train_cg = new ComputeGraph ();
212 |     vg -> build_compute_graph (train_cg);
213 |     Optimizer* optimizer = new Adadelta (0.2);
214 |     train_cg -> m_optimizer = optimizer;
215 |     // 对计算图进行修剪
216 |     vector<Node*> endnode_list;
217 |     unordered_map<string, Node*>::iterator op_node_map_it = abs -> m_op_node_map.begin ();
218 |     while (op_node_map_it != abs -> m_op_node_map.end ()) {
219 |         endnode_list.push_back (op_node_map_it -> second);
220 |         ++op_node_map_it;
221 |     }
222 |     train_cg -> build_subgraph (endnode_list);
223 |     // 训练
224 |     for (int i = 0; i < 20000; ++i) {
225 |         vector<Node*> error;
226 |         if (i % 1000 == 0) {
227 |             int ptr = ((Input*) (train_cg -> get_node ("Input:1:0:"))) -> m_data_ptr;
228 |             cout << tensor_to_int (((Input*) (train_cg -> get_node ("Input:1:0:"))) -> m_data[ptr]) << "+"
229 |             << tensor_to_int (((Input*) (train_cg -> get_node ("Input:1:0:"))) -> m_data[ptr + 1]);
230 |         }
231 |         train_cg -> forward_propagation (error);
232 |         train_cg -> back_propagation ();
233 |         if (i % 1000 == 0) {
234 |             float r[8] = {0};
235 |             vector<int> r_shape (2); r_shape[0] = 1; r_shape[1] = 8;
236 |             for (int i = 0; i < 8; ++i) {
237 |                 ostringstream oss;
238 |                 oss << i << ":";
239 |                 string op_node_name = sigmoid2 -> get_name () + oss.str ();
240 |                 r[i] = ((OperatorNode*) (sigmoid2 -> m_op_node_map[op_node_name])) -> m_output -> m_tensor[0];
241 |             }
242 |             Tensor r_tensor = Tensor (r_shape, r);
243 |             cout << " guess = :" << tensor_to_int (&r_tensor) << endl;
244 |         }
245 |     }
246 | 
247 |     delete train_cg;
248 |     delete vg;
249 | }
250 | 


--------------------------------------------------------------------------------
/unit_test/tensor_test.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <vector>
 3 | #include "../include/Tensor.h"
 4 | using namespace std;
 5 | int main () {
 6 |     vector<int> shape (2);
 7 |     shape[0] = 20; shape[1] = 10;
 8 | 
 9 |     Tensor* tensor1 = new Tensor (shape);
10 |     tensor1 -> init ();
11 |     tensor1 -> display (); cout << endl;
12 |     
13 |     Tensor* tensor2 = new Tensor (shape);
14 |     tensor2 -> init ();
15 |     tensor2 -> display (); cout << endl;
16 | 
17 |     Tensor* random_tensor = new Tensor (shape);
18 |     random_tensor -> init ();
19 |     cout << "init test" << endl;
20 |     //random_tensor -> display ();// 初始化验证
21 | 
22 |     Tensor* mult_result = tensor1 -> matrix_mult (tensor2);// 验证乘法
23 |     cout << "matrix mult test" << endl;
24 |     //mult_result -> display ();
25 |       
26 |     Tensor* add_result = tensor1 -> add (tensor2);// 加法验证
27 |     cout << "add test 1" << endl;
28 |     //add_result -> display ();
29 |     
30 |     tensor1 -> add (tensor2, add_result);// 加法验证2
31 |     cout << "add test 2" << endl;
32 |     //add_result -> display ();
33 |     
34 |     Tensor* ele_mult_result = tensor1 -> element_mult (tensor2);
35 |     cout << "element mult test" << endl;
36 |     //ele_mult_result -> display ();
37 | 
38 |     Tensor* scalar_mult_result = tensor1 -> scalar_mult (3.0);// 标量乘法
39 |     cout << "scalar mult test" << endl;
40 |     scalar_mult_result -> display ();
41 | 
42 |     tensor1 -> scalar_acc_mult (2.0);// 标量累乘
43 |     cout << "scalar acc mult test" << endl;
44 |     tensor1 -> display ();
45 |     /*
46 |     tensor1 -> element_square ();
47 |     cout << "element square test" << endl;
48 |     tensor1 -> display ();
49 |     
50 |     vector<int> shape1 (3);
51 |     shape1[0] = 2; shape1[1] = 3; shape1[2] = 2;// {{{1,2},{3,4},{5,6}},{{7,8},{9,10},{11,12}}}
52 |     float data1[] = {1,2,3,4,5,6,7,8,9,10,11,12};
53 |     Tensor* three_dim_tensor = new Tensor (shape1, data1);
54 |     vector<int> idxs (3);
55 |     idxs[0] = 1; idxs[1] = 1; idxs[2] = 0;
56 |     cout << "index value test" << endl;
57 |     cout << three_dim_tensor -> get_value (idxs) << endl;
58 |     */
59 | }
60 | 


--------------------------------------------------------------------------------
/unit_test/xor_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "../include/op_node/Input.h"
  2 | #include "../include/op_node/SquareSum.h"
  3 | #include "../include/op_node/Minus.h"
  4 | #include "../include/op_node/Mult.h"
  5 | #include "../include/op_node/Bias.h"
  6 | #include "../include/op_node/Parameter.h"
  7 | #include "../include/VirtualGraph.h"
  8 | #include "../include/ComputeGraph.h"
  9 | #include "../include/VirtualNode.h"
 10 | #include "../include/Optimizer.h"
 11 | #include "../include/optimizer/Adadelta.h"
 12 | #include <iostream>
 13 | using namespace std;
 14 | int main () {
 15 |     // 准备数据集
 16 |     vector<Tensor*> data_x_list;
 17 |     vector<int> shape_x; shape_x.push_back (1); shape_x.push_back (2);
 18 |     float data_x1[] = {0,0};
 19 |     float data_x2[] = {0,1};
 20 |     float data_x3[] = {1,0};
 21 |     float data_x4[] = {1,1};
 22 |     data_x_list.push_back (new Tensor (shape_x, data_x1));
 23 |     data_x_list.push_back (new Tensor (shape_x, data_x2));
 24 |     data_x_list.push_back (new Tensor (shape_x, data_x3));
 25 |     data_x_list.push_back (new Tensor (shape_x, data_x4));
 26 |     vector<Tensor*> data_y_list;
 27 |     vector<int> shape_y; shape_y.push_back (1); shape_y.push_back (1);
 28 |     float data_y1[] = {0};
 29 |     float data_y2[] = {1};
 30 |     float data_y3[] = {1};
 31 |     float data_y4[] = {0};
 32 |     data_y_list.push_back (new Tensor (shape_y, data_y1));
 33 |     data_y_list.push_back (new Tensor (shape_y, data_y2));
 34 |     data_y_list.push_back (new Tensor (shape_y, data_y3));
 35 |     data_y_list.push_back (new Tensor (shape_y, data_y4));
 36 | 
 37 |     vector<int> shape_w1; shape_w1.push_back (2); shape_w1.push_back (2);
 38 |     Tensor* w1 = new Tensor (shape_w1);
 39 |     w1 -> init ();
 40 | 
 41 |     vector<int> shape_w2; shape_w2.push_back (2); shape_w2.push_back (1);
 42 |     Tensor* w2 = new Tensor (shape_w2);
 43 |     w2 -> init ();
 44 | 
 45 |     vector<int> shape_b1; shape_b1.push_back (1); shape_b1.push_back (2);
 46 |     Tensor* b1 = new Tensor (shape_b1);
 47 |     b1 -> init ();
 48 | 
 49 |     vector<int> shape_b2; shape_b2.push_back (1); shape_b2.push_back (1);
 50 |     Tensor* b2 = new Tensor (shape_b2);
 51 |     b2 -> init ();
 52 | 
 53 |     // 准备虚拟节点
 54 |     VirtualNode* input_x = new VirtualNode ("Input", "1");
 55 |     input_x -> m_input_data = data_x_list;
 56 | 
 57 |     VirtualNode* input_y = new VirtualNode ("Input", "2");
 58 |     input_y -> m_input_data = data_y_list;
 59 | 
 60 |     VirtualNode* w_1 = new VirtualNode ("Parameter", "1");
 61 |     w_1 -> m_data = w1;
 62 | 
 63 |     VirtualNode* w_2 = new VirtualNode ("Parameter", "2");
 64 |     w_2 -> m_data = w2;
 65 | 
 66 |     VirtualNode* b_1 = new VirtualNode ("Parameter", "3");
 67 |     b_1 -> m_data = b1;
 68 | 
 69 |     VirtualNode* b_2 = new VirtualNode ("Parameter", "4");
 70 |     b_2 -> m_data = b2;
 71 | 
 72 |     VirtualNode* mult1 = new VirtualNode ("Mult", "1");
 73 |     VirtualNode* mult2 = new VirtualNode ("Mult", "2");
 74 |     VirtualNode* sig1 = new VirtualNode ("Sigmoid", "1");
 75 |     VirtualNode* sig2 = new VirtualNode ("Sigmoid", "2");
 76 |     VirtualNode* minus = new VirtualNode ("Minus", "1");
 77 |     VirtualNode* ss = new VirtualNode ("SquareSum", "1");
 78 |     VirtualNode* bias1 = new VirtualNode ("Bias", "1");
 79 |     VirtualNode* bias2 = new VirtualNode ("Bias", "2");
 80 | 
 81 |     // 构建虚拟图
 82 |     VirtualGraph* vg = new VirtualGraph ();
 83 |     vg -> add_node ("", input_x);
 84 |     vg -> add_node ("", w_1);
 85 |     vg -> add_node (input_x -> get_name (), mult1);
 86 |     vg -> add_node (w_1 -> get_name (), mult1);
 87 |     vg -> add_node ("", b_1);
 88 |     vg -> add_node (mult1 -> get_name (), bias1);
 89 |     vg -> add_node (b_1 -> get_name (), bias1);
 90 |     vg -> add_node (bias1 -> get_name (), sig1);
 91 |     vg -> add_node ("", w_2);
 92 |     vg -> add_node (sig1 -> get_name (), mult2);
 93 |     vg -> add_node (w_2 -> get_name (), mult2);
 94 |     vg -> add_node ("", b_2);
 95 |     vg -> add_node (mult2 -> get_name (), bias2);
 96 |     vg -> add_node (b_2 -> get_name (), bias2);
 97 |     vg -> add_node (bias2 -> get_name (), sig2);
 98 |     vg -> add_node ("", input_y);
 99 |     vg -> add_node (sig2 -> get_name (), minus);
100 |     vg -> add_node (input_y -> get_name (), minus);
101 |     vg -> add_node (minus -> get_name (), ss);
102 | 
103 |     // 生成计算图
104 |     ComputeGraph* train_cg = new ComputeGraph ();
105 |     vg -> build_compute_graph (train_cg);
106 |     // 初始化优化器，Adadelta
107 |     Optimizer* optimizer = new Adadelta (1.0);
108 |     train_cg -> m_optimizer = optimizer;
109 |     // 训练
110 |     for (int i = 0; i < 1000; ++i) {
111 |         if (i < 900 == 0) {
112 |             cout << "input: ";
113 |             int ptr = ((Input*) (train_cg -> get_node ("Input:1:0:"))) -> m_data_ptr;
114 |             ((Input*) (train_cg -> get_node ("Input:1:0:"))) -> m_data[ptr] -> display ();
115 |         }
116 |         vector<Node*> error;
117 |         train_cg -> forward_propagation (error);
118 |         train_cg -> back_propagation ();
119 |         if (i < 900 == 0) {
120 |             cout << "xor: ";
121 |             ((OperatorNode*) (sig2 -> m_op_node_map["Sigmoid:2:0:"])) -> m_output -> display (); cout << endl;
122 |         }
123 |     }
124 | 
125 |     delete train_cg;
126 |     delete vg;
127 | }
128 | 


--------------------------------------------------------------------------------