├── README.md ├── include ├── BranchNode.h ├── ComputeGraph.h ├── Graph.h ├── LoopNode.h ├── Node.h ├── OperatorNode.h ├── Optimizer.h ├── Tensor.h ├── VirtualGraph.h ├── VirtualNode.h ├── cuda │ └── cuda_lib.h ├── multi_thread │ ├── matrix_task.h │ ├── ring_buffer.h │ └── thread_pool.h ├── op_node │ ├── AbsSum.h │ ├── Add.h │ ├── Bias.h │ ├── Dropout.h │ ├── Input.h │ ├── Minus.h │ ├── Mult.h │ ├── Parameter.h │ ├── Sigmoid.h │ └── SquareSum.h └── optimizer │ └── Adadelta.h ├── makefile ├── makefile.gpu ├── obj └── empty ├── src ├── BranchNode.cpp ├── ComputeGraph.cpp ├── Graph.cpp ├── LoopNode.cpp ├── Node.cpp ├── OperatorNode.cpp ├── Optimizer.cpp ├── Tensor.cpp ├── VirtualGraph.cpp ├── VirtualNode.cpp ├── cuda │ ├── Tensor.cpp │ └── cuda_lib.cu ├── multi_thread │ ├── matrix_task.cpp │ ├── ring_buffer.cpp │ └── thread_pool.cpp ├── op_node │ ├── AbsSum.cpp │ ├── Add.cpp │ ├── Bias.cpp │ ├── Dropout.cpp │ ├── Input.cpp │ ├── Minus.cpp │ ├── Mult.cpp │ ├── Parameter.cpp │ ├── Sigmoid.cpp │ └── SquareSum.cpp └── optimizer │ └── Adadelta.cpp └── unit_test ├── graph_test.cpp ├── operatorNode_test.cpp ├── rnn_test.cpp ├── tensor_test.cpp └── xor_test.cpp /README.md: -------------------------------------------------------------------------------- 1 | # automatic-differentiation-framework 2 | 一个支持控制流的自动求导框架 3 | ## 项目结构 4 | * src/目录下是主要源代码实现,包括计算图,虚拟图等主要算法 5 | * src/op_node是各种计算节点的实现 6 | * unit_test/目录下是各个模块的单元测试,主要包括一个训练判断xor操作的简单神经网络和一个训练8位二进制数字加法的循环神经网络rnn。rnn的实现利用了框架的控制流机制 7 | 8 | ## 总览 9 | 自动求导是简化神经网络模型实现的一个重要机制,使用框架实现神经网络主要有以下几步: 10 | 11 | 1. 使用者通过把多个基本的运算节点组织成一张计算图; 12 | 2. 框架对这张计算图进行拓扑排序,并依次调用运算节点的运算函数op()从而实现前向传播; 13 | 3. 然后框架再对这张计算图进行逆拓扑排序,并依次调用运算节点的梯度计算函数grad_op()从而实现反向传播。 14 | 15 | 运算节点操作的数据均为张量Tensor 16 | 17 | ## 控制流 18 | 框架为了支持控制流,引入了虚拟图的概念。所谓控制流即两种虚拟图的节点,包括循环Loop和分支Branch。 19 | 20 | 用户使用虚拟图构造了一个神经网络的蓝图,然后只有在实际运行的时候某些节点才能决定是否走当前分支。通过运行虚拟图构造出实际的计算图, 21 | 然后在计算图上进行反向传播,从而训练模型。 22 | 23 | 由于引入了循环,会在逻辑上为虚拟图带来“环”,为了避免“环”的出现造成虚拟图无法进行拓扑排序,本框架将Loop循环节点看做一个子图,其中只包含一个循环。 24 | 相当于把原图按照循环划分成了多个子图,子图可以嵌套,然后把子图看成一个广义节点,则整体上的虚拟图将不出现循环,从而可以进行前向传播。 25 | 26 | ## 其他特性 27 | 目前支持普通SGD优化器和Adadelta优化器,实现了dropout的运算节点 28 | 29 | ## CUDA支持 30 | 本项目在2018年1月31号更新了对于CUDA的支持,从而支持在安装了CUDA的电脑上使用显卡加速。但是由于项目中例子比较简单,而且显卡计算优化尚不到位,导致小数据量情况下计算速度并不理想,不如cpu计算速度快。 31 | 32 | cuda在本项目中主要用于优化矩阵运算,在src/cuda/cuda_lib.cu文件下,cuda计算过程已经使用share memory,block内线程同步等方式加以优化。 33 | 34 | ## 博客地址 35 | 下面这个博客花了5节介绍了整个框架的大体思路。 36 | 37 | https://www.jianshu.com/p/4c2032c685dc 38 | -------------------------------------------------------------------------------- /include/BranchNode.h: -------------------------------------------------------------------------------- 1 | #ifndef BRANCHNODE_H_ 2 | #define BRANCHNODE_H_ 3 | #include "Node.h" 4 | #include "Graph.h" 5 | class BranchNode: public Node { 6 | public: 7 | // 当BrancnNode作为循环起始的一部分时,有时需要BranchNode所在的LoopNode提供初始化的计算节点名字,从到实现循环级联 8 | std::string m_dep_op_node_name; 9 | Node* (*choose_node) (int idx, Graph* compute_graph, BranchNode* branch_node); 10 | BranchNode (std::string type, std::string id, Node* (*func) (int, Graph*, BranchNode*)); 11 | }; 12 | #endif 13 | -------------------------------------------------------------------------------- /include/ComputeGraph.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPUTEGRAPH_H_ 2 | #define COMPUTEGRAPH_H_ 3 | #include "Graph.h" 4 | #include "Node.h" 5 | #include "Optimizer.h" 6 | #include 7 | class ComputeGraph: public Graph { 8 | protected: 9 | int m_need_release_tensor_flag; 10 | void release_tensor (); 11 | public: 12 | ComputeGraph (); 13 | Optimizer* m_optimizer; 14 | void forward_propagation (std::vector &result_list); 15 | void back_propagation (); 16 | ~ComputeGraph ();// 并不释放其中包含的计算节点的内存空间,用于支持动态计算图 17 | }; 18 | #endif 19 | -------------------------------------------------------------------------------- /include/Graph.h: -------------------------------------------------------------------------------- 1 | #ifndef GRAPH_H_ 2 | #define GRAPH_H_ 3 | #include "Node.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | class Graph { 9 | protected: 10 | std::unordered_map m_node_map;// 计算图中节点字典 11 | int m_need_build_reverse_graph_flag; 12 | std::unordered_map > m_reverse_table;// 计算图的转置图 13 | void build_reverse_graph ();// 构建转置图 14 | public: 15 | Graph (); 16 | std::unordered_map > m_adj_table;// 计算图邻接表 17 | void add_node (std::string parent_name, Node* node);// 向计算图中添加节点 18 | Node* get_node (std::string name); 19 | void build_subgraph (std::vector &endnode_list);// 根据终止节点列表构造出子图 20 | void topological_sort (std::unordered_map > &adj_table, std::vector &result);// 拓扑排序 21 | void get_endnode (std::vector &endNode_list);// 获取转置图中没有前驱的节点 22 | virtual ~Graph ();// 析构函数 23 | }; 24 | #endif 25 | -------------------------------------------------------------------------------- /include/LoopNode.h: -------------------------------------------------------------------------------- 1 | #ifndef LOOPNODE_H_ 2 | #define LOOPNODE_H_ 3 | #include "Node.h" 4 | #include "VirtualGraph.h" 5 | class LoopNode: public Node { 6 | public: 7 | VirtualGraph* m_sub_vgraph; 8 | Node* m_end_compute_node;// 该循环节点最终的输出节点 9 | // 初始化循环,为循环子图中的一些节点补上节点依赖,依赖的节点来自于LoopNode所依赖的节点 10 | // 补上依赖的主要方式是把依赖的计算节点的名字加入到当前LoopNode中的子虚拟图的起始BranchNode中 11 | void (*init) (LoopNode* loop_node); 12 | int (*condition) (Graph* compute_graph, int idx);// 条件成立则返回1,否则返回0 13 | LoopNode (std::string type, std::string id, void (*func1) (LoopNode*), int (*func2) (Graph*, int)); 14 | virtual void inner_loop (Graph* compute_graph);// 内循环 15 | virtual ~LoopNode (); 16 | }; 17 | #endif 18 | -------------------------------------------------------------------------------- /include/Node.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_H_ 2 | #define NODE_H_ 3 | #include 4 | #include 5 | class Node { 6 | public: 7 | std::vector m_name; 8 | std::vector m_parents;// 依赖节点列表 9 | int m_invisible;// 0可见,1不可见 10 | 11 | virtual std::string get_name (); 12 | Node (std::string type, std::string id); 13 | virtual ~Node (); 14 | }; 15 | #endif 16 | -------------------------------------------------------------------------------- /include/OperatorNode.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATORNODE_H_ 2 | #define OPERATORNODE_H_ 3 | #include "Node.h" 4 | #include "Tensor.h" 5 | #include 6 | class OperatorNode: public Node { 7 | protected: 8 | void chain_rule (Tensor* grad, int parent_idx); 9 | public: 10 | Tensor* m_output; 11 | Tensor* m_sum_grad; 12 | OperatorNode (std::string type, std::string id, std::string idx); 13 | virtual void op (); 14 | virtual void grad_op (); 15 | virtual ~OperatorNode (); 16 | virtual void release_tensor (); 17 | }; 18 | #endif 19 | -------------------------------------------------------------------------------- /include/Optimizer.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTIMIZER_H_ 2 | #define OPTIMIZER_H_ 3 | #include 4 | #include "Node.h" 5 | class Optimizer { 6 | protected: 7 | float m_a; 8 | public: 9 | Optimizer (float a); 10 | virtual void optimize (std::vector &topo_results); 11 | virtual ~Optimizer (); 12 | }; 13 | #endif 14 | -------------------------------------------------------------------------------- /include/Tensor.h: -------------------------------------------------------------------------------- 1 | #ifndef TENSOR_H_ 2 | #define TENSOR_H_ 3 | #include 4 | class Tensor { 5 | public: 6 | std::vector m_shape; 7 | int m_size; 8 | float* m_tensor; 9 | Tensor (std::vector shape, int need_init = 1); 10 | Tensor (std::vector shape, float data[]); 11 | float get_value (std::vector idxs);// 根据各个维度的下标获取值 12 | void set_value (std::vector idxs, float value);// 设置某坐标下的值 13 | Tensor* matrix_mult (Tensor* tensor);// 二维矩阵乘法 14 | Tensor* scalar_mult (float scalar);// 标量乘法 15 | void scalar_acc_mult (float scalar);// 标量累乘 16 | Tensor* element_mult (Tensor* tensor);// 逐元素相乘 17 | float element_square_sum ();// 元素平方和 18 | float element_abs_sum ();// 元素绝对值和 19 | void element_square ();// 逐元素平方 20 | void add (Tensor* tensor, Tensor* result);// 累加 21 | Tensor* add (Tensor* tensor);// 普通加法 22 | void init (); 23 | void display (); 24 | 25 | ~Tensor (); 26 | }; 27 | #endif 28 | -------------------------------------------------------------------------------- /include/VirtualGraph.h: -------------------------------------------------------------------------------- 1 | #ifndef VIRTUALGRAPH_H_ 2 | #define VIRTUALGRAPH_H_ 3 | #include "Graph.h" 4 | #include "ComputeGraph.h" 5 | class VirtualGraph: public Graph { 6 | public: 7 | Node* build_compute_graph (Graph* compute_graph, int idx = 0); 8 | ~VirtualGraph (); 9 | }; 10 | #endif 11 | -------------------------------------------------------------------------------- /include/VirtualNode.h: -------------------------------------------------------------------------------- 1 | #ifndef VIRTUALNODE_H_ 2 | #define VIRTUALNODE_H_ 3 | #include "op_node/Input.h" 4 | #include "Node.h" 5 | #include "Tensor.h" 6 | #include "OperatorNode.h" 7 | #include "Graph.h" 8 | #include 9 | #include 10 | class VirtualNode: public Node { 11 | public: 12 | Tensor* m_data;// 记录一些必要的数据,比如虚拟节点是dropout节点时所需要的filter 13 | std::vector m_input_data;// 原始输入数据缓存 14 | std::unordered_map m_op_node_map;// 缓存这个虚拟节点生成计算节点 15 | void (*input_op) (Input* input);// Input计算节点的数据输入函数 16 | int m_share_parameter; 17 | float m_keep_rate; 18 | VirtualNode (std::string type, std::string id, int share_parameter = 0, float keep_rate = 0.5); 19 | void get_parents_op_nodes (int idx, Graph* compute_graph, std::vector &node_list); 20 | 21 | // 根据虚拟节点的名字和内循环下标idx,确定生成的计算节点。如果在m_op_node_map中已经存在计算节点了则不重复生成计算节点,用于支持动态计算图 22 | Node* get_op_node (int idx); 23 | ~VirtualNode ();// 会释放该虚拟节点生成的计算节点的内存空间 24 | }; 25 | #endif 26 | -------------------------------------------------------------------------------- /include/cuda/cuda_lib.h: -------------------------------------------------------------------------------- 1 | #ifndef CUDALIB_H_ 2 | #define CUDALIB_H_ 3 | #define BLOCK_SIZE 32 4 | #define GRID_SIZE 32 5 | void cuda_matrix_mult (float* A, float* B, float* C, int a_row, int a_col, int b_row, int b_col); 6 | void cuda_tensor_add (float* A, float* B, float* C, int size); 7 | void cuda_scalar_tensor_mult (float* A, float* result, float s, int size); 8 | void cuda_element_square (float* A, int size); 9 | float cuda_element_square_sum (float* A, int size); 10 | float cuda_element_abs_sum (float* A, int size); 11 | void cuda_element_mult (float* A, float* B, float* C, int size); 12 | #endif 13 | -------------------------------------------------------------------------------- /include/multi_thread/matrix_task.h: -------------------------------------------------------------------------------- 1 | #ifndef MATRIX_H_ 2 | #define MATRIX_H_ 3 | #include "thread_pool.h" 4 | #include "../Tensor.h" 5 | class matrix_mult_task: public task { 6 | private: 7 | Tensor* m_A; 8 | Tensor* m_B; 9 | Tensor* m_C; 10 | int m_a_idx;// A矩阵的行号 11 | int m_b_idx;// b矩阵的列号 12 | public: 13 | matrix_mult_task (Tensor* A, Tensor* B, Tensor* C, int a_idx, int b_idx); 14 | void run (); 15 | }; 16 | class matrix_add_task: public task { 17 | private: 18 | Tensor* m_A; 19 | Tensor* m_B; 20 | Tensor* m_C; 21 | int m_thread_id;// 当前任务所在的线程id 22 | int m_thread_num;// 一共有多少线程 23 | public: 24 | matrix_add_task (Tensor* A, Tensor* B, Tensor* C, int thread_id, int thread_num); 25 | void run (); 26 | }; 27 | class matrix_scalar_mult_task: public task { 28 | private: 29 | Tensor* m_A; 30 | float m_scalar; 31 | Tensor* m_C; 32 | int m_thread_id; 33 | int m_thread_num; 34 | public: 35 | matrix_scalar_mult_task (Tensor* A, float scalar, Tensor* C, int thread_id, int thread_num); 36 | void run (); 37 | }; 38 | #endif 39 | -------------------------------------------------------------------------------- /include/multi_thread/ring_buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef RINGBUFFER_H_ 2 | #define RINGBUFFER_H_ 3 | #include 4 | class ring_buffer { 5 | private: 6 | int m_dequeue_ptr; 7 | int m_enqueue_ptr; 8 | int m_size; 9 | void** m_buffer; 10 | pthread_mutex_t m_dequeue_lock;// 多消费者锁 11 | pthread_mutex_t m_enqueue_lock;// 多生产者锁 12 | public: 13 | ring_buffer (int size); 14 | int is_full (); 15 | int is_empty (); 16 | int get_element (void** data); 17 | int add_element (void* data); 18 | ~ring_buffer (); 19 | }; 20 | #endif 21 | -------------------------------------------------------------------------------- /include/multi_thread/thread_pool.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_POOL_H_ 2 | #define THREAD_POOL_H_ 3 | #include "ring_buffer.h" 4 | #include 5 | #include 6 | class task { 7 | public: 8 | virtual void run (); 9 | }; 10 | 11 | void *get_task (void *arg); 12 | class thread_pool { 13 | protected: 14 | thread_pool (); 15 | private: 16 | static thread_pool* instance; 17 | ring_buffer** m_task_buffers;// 每个worker独享一个ring_buffer 18 | pthread_t* m_workers; 19 | int m_idx; 20 | public: 21 | int m_worker_num; 22 | static thread_pool* get_instance (); 23 | void add_job (task* t_task); 24 | void add_job_list (std::vector job_list); 25 | ~thread_pool (); 26 | }; 27 | #endif 28 | -------------------------------------------------------------------------------- /include/op_node/AbsSum.h: -------------------------------------------------------------------------------- 1 | #ifndef ABSSUM_H_ 2 | #define ABSSUM_H_ 3 | #include "../OperatorNode.h" 4 | class AbsSum: public OperatorNode { 5 | public: 6 | AbsSum (std::string type, std::string id, std::string idx); 7 | void op (); 8 | void grad_op (); 9 | ~AbsSum (); 10 | void release_tensor (); 11 | }; 12 | #endif 13 | -------------------------------------------------------------------------------- /include/op_node/Add.h: -------------------------------------------------------------------------------- 1 | #ifndef ADD_H_ 2 | #define ADD_H_ 3 | #include "../OperatorNode.h" 4 | class Add: public OperatorNode { 5 | public: 6 | Add (std::string type, std::string id, std::string idx); 7 | void op (); 8 | void grad_op (); 9 | ~Add (); 10 | void release_tensor (); 11 | }; 12 | #endif 13 | -------------------------------------------------------------------------------- /include/op_node/Bias.h: -------------------------------------------------------------------------------- 1 | #ifndef BIAS_H_ 2 | #define BIAS_H_ 3 | #include "../OperatorNode.h" 4 | class Bias: public OperatorNode { 5 | public: 6 | Bias (std::string type, std::string id, std::string idx); 7 | void op (); 8 | void grad_op (); 9 | ~Bias (); 10 | void release_tensor (); 11 | }; 12 | #endif 13 | -------------------------------------------------------------------------------- /include/op_node/Dropout.h: -------------------------------------------------------------------------------- 1 | #ifndef DROPOUT_H_ 2 | #define DROPOUT_H_ 3 | #include "../OperatorNode.h" 4 | class Dropout: public OperatorNode { 5 | public: 6 | float m_keep_rate; 7 | Tensor* m_filter; 8 | Dropout (std::string type, std::string id, std::string idx, Tensor* filter, float keep_rate); 9 | void op (); 10 | void grad_op (); 11 | ~Dropout (); 12 | void release_tensor (); 13 | }; 14 | #endif 15 | -------------------------------------------------------------------------------- /include/op_node/Input.h: -------------------------------------------------------------------------------- 1 | #ifndef INPUT_H_ 2 | #define INPUT_H_ 3 | #include "../OperatorNode.h" 4 | #include 5 | class Input: public OperatorNode { 6 | public: 7 | int m_data_ptr; 8 | std::vector m_data; 9 | Input (std::string type, std::string id, std::string idx, std::vector input_data, void (*func) (Input*) = 0); 10 | void (*op) (Input* input);// 参数是该函数所在对象本身 11 | void release_tensor (); 12 | ~Input (); 13 | }; 14 | #endif 15 | -------------------------------------------------------------------------------- /include/op_node/Minus.h: -------------------------------------------------------------------------------- 1 | #ifndef MINUS_H_ 2 | #define MINUS_H_ 3 | #include "../OperatorNode.h" 4 | class Minus: public OperatorNode { 5 | public: 6 | Minus (std::string type, std::string id, std::string idx); 7 | void op (); 8 | void grad_op (); 9 | ~Minus (); 10 | void release_tensor (); 11 | }; 12 | #endif 13 | -------------------------------------------------------------------------------- /include/op_node/Mult.h: -------------------------------------------------------------------------------- 1 | #ifndef MULT_H_ 2 | #define MULT_H_ 3 | #include "../OperatorNode.h" 4 | class Mult: public OperatorNode { 5 | public: 6 | Mult (std::string type, std::string id, std::string idx); 7 | void op (); 8 | void grad_op (); 9 | ~Mult (); 10 | void release_tensor (); 11 | }; 12 | #endif 13 | -------------------------------------------------------------------------------- /include/op_node/Parameter.h: -------------------------------------------------------------------------------- 1 | #ifndef PARAMETER_H_ 2 | #define PARAMETER_H_ 3 | #include "../OperatorNode.h" 4 | class Parameter: public OperatorNode { 5 | public: 6 | int m_share_data; 7 | Parameter (std::string type, std::string id, std::string idx, Tensor* data, int share_data = 0); 8 | ~Parameter (); 9 | void release_tensor (); 10 | }; 11 | #endif 12 | -------------------------------------------------------------------------------- /include/op_node/Sigmoid.h: -------------------------------------------------------------------------------- 1 | #ifndef SIGMOID_H_ 2 | #define SIGMOID_H_ 3 | #include "../OperatorNode.h" 4 | class Sigmoid: public OperatorNode { 5 | public: 6 | Sigmoid (std::string type, std::string id, std::string idx); 7 | void op (); 8 | void grad_op (); 9 | ~Sigmoid (); 10 | void release_tensor (); 11 | }; 12 | #endif 13 | -------------------------------------------------------------------------------- /include/op_node/SquareSum.h: -------------------------------------------------------------------------------- 1 | #ifndef SQUARESUM_H_ 2 | #define SQUARESUM_H_ 3 | #include "../OperatorNode.h" 4 | class SquareSum: public OperatorNode { 5 | public: 6 | SquareSum (std::string type, std::string id, std::string idx); 7 | void op (); 8 | void grad_op (); 9 | ~SquareSum (); 10 | void release_tensor (); 11 | }; 12 | #endif 13 | -------------------------------------------------------------------------------- /include/optimizer/Adadelta.h: -------------------------------------------------------------------------------- 1 | #ifndef ADADELTA_H_ 2 | #define ADADELTA_H_ 3 | #include "../Optimizer.h" 4 | #include "../Tensor.h" 5 | #include 6 | class Adadelta: public Optimizer { 7 | private: 8 | std::unordered_map m_tensor_store; 9 | float m_epsl; 10 | float m_lambda; 11 | public: 12 | Adadelta (float a); 13 | void optimize (std::vector &topo_results); 14 | ~Adadelta (); 15 | }; 16 | #endif 17 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | all: 2 | #编译变量 3 | virtual_node_group=obj/VirtualNode.o obj/BranchNode.o obj/LoopNode.o 4 | operator_node_group=obj/OperatorNode.o obj/Dropout.o obj/Input.o obj/Add.o obj/AbsSum.o obj/Bias.o obj/Mult.o obj/Minus.o obj/SquareSum.o obj/Sigmoid.o obj/Parameter.o 5 | multi_thread_group=obj/ring_buffer.o obj/matrix_task.o obj/thread_pool.o 6 | graph_group=obj/Graph.o obj/ComputeGraph.o obj/VirtualGraph.o 7 | optimizer_group=obj/Optimizer.o obj/Adadelta.o 8 | 9 | rnn_test: $(multi_thread_group) $(operator_node_group) $(graph_group) $(virtual_node_group) $(optimizer_group) obj/rnn_test.o obj/Node.o obj/Tensor.o 10 | g++ -std=c++11 -pthread $(multi_thread_group) $(operator_node_group) $(graph_group) $(virtual_node_group) $(optimizer_group) obj/rnn_test.o obj/Node.o obj/Tensor.o -o rnn_test 11 | 12 | xor_test: $(multi_thread_group) $(graph_group) $(optimizer_group) $(operator_node_group) obj/xor_test.o obj/Node.o obj/VirtualNode.o obj/Tensor.o 13 | g++ -std=c++11 -pthread $(multi_thread_group) $(graph_group) $(optimizer_group) $(operator_node_group) obj/xor_test.o obj/Node.o obj/VirtualNode.o obj/Tensor.o -o xor_test 14 | 15 | graph_test: obj/graph_test.o obj/Graph.o obj/Node.o 16 | g++ -std=c++11 obj/graph_test.o obj/Graph.o obj/Node.o -o graph_test 17 | 18 | tensor_test: obj/tensor_test.o obj/Tensor.o obj/ring_buffer.o obj/matrix_task.o obj/thread_pool.o 19 | g++ -std=c++11 -pthread obj/tensor_test.o obj/Tensor.o obj/ring_buffer.o obj/matrix_task.o obj/thread_pool.o -o tensor_test 20 | 21 | operatorNode_test: obj/operatorNode_test.o obj/ComputeGraph.o obj/Optimizer.o obj/Graph.o obj/Dropout.o obj/Sigmoid.o obj/AbsSum.o obj/SquareSum.o obj/Add.o obj/Bias.o obj/Mult.o obj/Minus.o obj/Parameter.o obj/OperatorNode.o obj/Node.o obj/Tensor.o 22 | g++ -std=c++11 obj/operatorNode_test.o obj/ComputeGraph.o obj/Optimizer.o obj/Graph.o obj/Dropout.o obj/Sigmoid.o obj/AbsSum.o obj/SquareSum.o obj/Add.o obj/Bias.o obj/Mult.o obj/Minus.o obj/Parameter.o obj/OperatorNode.o obj/Node.o obj/Tensor.o -o operatorNode_test 23 | 24 | obj/rnn_test.o: unit_test/rnn_test.cpp 25 | g++ -std=c++11 -c unit_test/rnn_test.cpp -o obj/rnn_test.o 26 | obj/xor_test.o: unit_test/xor_test.cpp 27 | g++ -std=c++11 -c unit_test/xor_test.cpp -o obj/xor_test.o 28 | obj/operatorNode_test.o: unit_test/operatorNode_test.cpp 29 | g++ -std=c++11 -c unit_test/operatorNode_test.cpp -o obj/operatorNode_test.o 30 | obj/graph_test.o: unit_test/graph_test.cpp 31 | g++ -std=c++11 -c unit_test/graph_test.cpp -o obj/graph_test.o 32 | obj/tensor_test.o: unit_test/tensor_test.cpp 33 | g++ -std=c++11 -c unit_test/tensor_test.cpp -o obj/tensor_test.o 34 | 35 | obj/SquareSum.o: src/op_node/SquareSum.cpp 36 | g++ -std=c++11 -c src/op_node/SquareSum.cpp -o obj/SquareSum.o 37 | obj/Sigmoid.o: src/op_node/Sigmoid.cpp 38 | g++ -std=c++11 -c src/op_node/Sigmoid.cpp -o obj/Sigmoid.o 39 | obj/Mult.o: src/op_node/Mult.cpp 40 | g++ -std=c++11 -c src/op_node/Mult.cpp -o obj/Mult.o 41 | obj/Minus.o: src/op_node/Minus.cpp 42 | g++ -std=c++11 -c src/op_node/Minus.cpp -o obj/Minus.o 43 | obj/Add.o: src/op_node/Add.cpp 44 | g++ -std=c++11 -c src/op_node/Add.cpp -o obj/Add.o 45 | obj/Bias.o: src/op_node/Bias.cpp 46 | g++ -std=c++11 -c src/op_node/Bias.cpp -o obj/Bias.o 47 | obj/Input.o: src/op_node/Input.cpp 48 | g++ -std=c++11 -c src/op_node/Input.cpp -o obj/Input.o 49 | obj/Parameter.o: src/op_node/Parameter.cpp 50 | g++ -std=c++11 -c src/op_node/Parameter.cpp -o obj/Parameter.o 51 | obj/AbsSum.o: src/op_node/AbsSum.cpp 52 | g++ -std=c++11 -c src/op_node/AbsSum.cpp -o obj/AbsSum.o 53 | obj/Dropout.o: src/op_node/Dropout.cpp 54 | g++ -std=c++11 -c src/op_node/Dropout.cpp -o obj/Dropout.o 55 | 56 | obj/ComputeGraph.o: src/ComputeGraph.cpp 57 | g++ -std=c++11 -c src/ComputeGraph.cpp -o obj/ComputeGraph.o 58 | obj/VirtualGraph.o: src/VirtualGraph.cpp 59 | g++ -std=c++11 -c src/VirtualGraph.cpp -o obj/VirtualGraph.o 60 | obj/Graph.o: src/Graph.cpp 61 | g++ -std=c++11 -c src/Graph.cpp -o obj/Graph.o 62 | 63 | obj/OperatorNode.o: src/OperatorNode.cpp 64 | g++ -std=c++11 -c src/OperatorNode.cpp -o obj/OperatorNode.o 65 | obj/VirtualNode.o: src/VirtualNode.cpp 66 | g++ -std=c++11 -c src/VirtualNode.cpp -o obj/VirtualNode.o 67 | obj/LoopNode.o: src/LoopNode.cpp 68 | g++ -std=c++11 -c src/LoopNode.cpp -o obj/LoopNode.o 69 | obj/BranchNode.o: src/BranchNode.cpp 70 | g++ -std=c++11 -c src/BranchNode.cpp -o obj/BranchNode.o 71 | obj/Node.o: src/Node.cpp 72 | g++ -std=c++11 -c src/Node.cpp -o obj/Node.o 73 | 74 | obj/Optimizer.o: src/Optimizer.cpp 75 | g++ -std=c++11 -c src/Optimizer.cpp -o obj/Optimizer.o 76 | obj/Adadelta.o: src/optimizer/Adadelta.cpp 77 | g++ -std=c++11 -c src/optimizer/Adadelta.cpp -o obj/Adadelta.o 78 | 79 | obj/Tensor.o: src/Tensor.cpp 80 | g++ -std=c++11 -c src/Tensor.cpp -o obj/Tensor.o 81 | 82 | obj/ring_buffer.o: src/multi_thread/ring_buffer.cpp 83 | g++ -std=c++11 -c src/multi_thread/ring_buffer.cpp -o obj/ring_buffer.o 84 | obj/thread_pool.o: src/multi_thread/thread_pool.cpp 85 | g++ -std=c++11 -c src/multi_thread/thread_pool.cpp -o obj/thread_pool.o 86 | obj/matrix_task.o: src/multi_thread/matrix_task.cpp 87 | g++ -std=c++11 -c src/multi_thread/matrix_task.cpp -o obj/matrix_task.o 88 | 89 | clean: 90 | rm obj/*o rnn_test xor_test tensor_test operatorNode_test graph_test 91 | -------------------------------------------------------------------------------- /makefile.gpu: -------------------------------------------------------------------------------- 1 | all: 2 | 3 | virtual_node_group=obj/VirtualNode.o obj/BranchNode.o obj/LoopNode.o 4 | operator_node_group=obj/OperatorNode.o obj/Dropout.o obj/Input.o obj/Add.o obj/AbsSum.o obj/Bias.o obj/Mult.o obj/Minus.o obj/SquareSum.o obj/Sigmoid.o obj/Parameter.o 5 | graph_group=obj/Graph.o obj/ComputeGraph.o obj/VirtualGraph.o 6 | optimizer_group=obj/Optimizer.o obj/Adadelta.o 7 | 8 | rnn_test: $(operator_node_group) $(graph_group) $(virtual_node_group) $(optimizer_group) obj/rnn_test.o obj/Node.o obj/Tensor.o obj/cuda_lib.o 9 | g++ -std=c++11 $(operator_node_group) $(graph_group) $(virtual_node_group) $(optimizer_group) obj/rnn_test.o obj/Node.o obj/Tensor.o obj/cuda_lib.o -L/usr/local/cuda/lib64 -lcudart -o rnn_test 10 | 11 | xor_test: $(operator_node_group) $(graph_group) $(optimizer_group) obj/xor_test.o obj/VirtualNode.o obj/Node.o obj/Tensor.o obj/cuda_lib.o 12 | g++ -std=c++11 $(operator_node_group) $(graph_group) $(optimizer_group) obj/xor_test.o obj/VirtualNode.o obj/Node.o obj/Tensor.o obj/cuda_lib.o -L/usr/local/cuda/lib64 -lcudart -o xor_test 13 | 14 | graph_test: obj/graph_test.o obj/Graph.o obj/Node.o 15 | g++ -std=c++11 obj/graph_test.o obj/Graph.o obj/Node.o -o graph_test 16 | 17 | tensor_test: obj/tensor_test.o obj/Tensor.o obj/cuda_lib.o 18 | g++ -std=c++11 obj/tensor_test.o obj/Tensor.o obj/cuda_lib.o -L/usr/local/cuda/lib64 -lcudart -o tensor_test 19 | 20 | operatorNode_test: obj/operatorNode_test.o obj/ComputeGraph.o obj/Optimizer.o obj/Graph.o obj/Dropout.o obj/Sigmoid.o obj/AbsSum.o obj/SquareSum.o obj/Add.o obj/Bias.o obj/Mult.o obj/Minus.o obj/Parameter.o obj/OperatorNode.o obj/Node.o obj/Tensor.o obj/cuda_lib.o 21 | g++ -std=c++11 obj/operatorNode_test.o obj/ComputeGraph.o obj/Optimizer.o obj/Graph.o obj/Dropout.o obj/Sigmoid.o obj/AbsSum.o obj/SquareSum.o obj/Add.o obj/Bias.o obj/Mult.o obj/Minus.o obj/Parameter.o obj/OperatorNode.o obj/Node.o obj/Tensor.o obj/cuda_lib.o -L/usr/local/cuda/lib64 -lcudart -o operatorNode_test 22 | 23 | obj/rnn_test.o: unit_test/rnn_test.cpp 24 | g++ -std=c++11 -c unit_test/rnn_test.cpp -o obj/rnn_test.o 25 | obj/xor_test.o: unit_test/xor_test.cpp 26 | g++ -std=c++11 -c unit_test/xor_test.cpp -o obj/xor_test.o 27 | obj/operatorNode_test.o: unit_test/operatorNode_test.cpp 28 | g++ -std=c++11 -c unit_test/operatorNode_test.cpp -o obj/operatorNode_test.o 29 | obj/graph_test.o: unit_test/graph_test.cpp 30 | g++ -std=c++11 -c unit_test/graph_test.cpp -o obj/graph_test.o 31 | obj/tensor_test.o: unit_test/tensor_test.cpp 32 | g++ -std=c++11 -c unit_test/tensor_test.cpp -o obj/tensor_test.o 33 | 34 | obj/SquareSum.o: src/op_node/SquareSum.cpp 35 | g++ -std=c++11 -c src/op_node/SquareSum.cpp -o obj/SquareSum.o 36 | obj/Sigmoid.o: src/op_node/Sigmoid.cpp 37 | g++ -std=c++11 -c src/op_node/Sigmoid.cpp -o obj/Sigmoid.o 38 | obj/Mult.o: src/op_node/Mult.cpp 39 | g++ -std=c++11 -c src/op_node/Mult.cpp -o obj/Mult.o 40 | obj/Minus.o: src/op_node/Minus.cpp 41 | g++ -std=c++11 -c src/op_node/Minus.cpp -o obj/Minus.o 42 | obj/Add.o: src/op_node/Add.cpp 43 | g++ -std=c++11 -c src/op_node/Add.cpp -o obj/Add.o 44 | obj/Bias.o: src/op_node/Bias.cpp 45 | g++ -std=c++11 -c src/op_node/Bias.cpp -o obj/Bias.o 46 | obj/Input.o: src/op_node/Input.cpp 47 | g++ -std=c++11 -c src/op_node/Input.cpp -o obj/Input.o 48 | obj/Parameter.o: src/op_node/Parameter.cpp 49 | g++ -std=c++11 -c src/op_node/Parameter.cpp -o obj/Parameter.o 50 | obj/AbsSum.o: src/op_node/AbsSum.cpp 51 | g++ -std=c++11 -c src/op_node/AbsSum.cpp -o obj/AbsSum.o 52 | obj/Dropout.o: src/op_node/Dropout.cpp 53 | g++ -std=c++11 -c src/op_node/Dropout.cpp -o obj/Dropout.o 54 | 55 | obj/ComputeGraph.o: src/ComputeGraph.cpp 56 | g++ -std=c++11 -c src/ComputeGraph.cpp -o obj/ComputeGraph.o 57 | obj/VirtualGraph.o: src/VirtualGraph.cpp 58 | g++ -std=c++11 -c src/VirtualGraph.cpp -o obj/VirtualGraph.o 59 | obj/Graph.o: src/Graph.cpp 60 | g++ -std=c++11 -c src/Graph.cpp -o obj/Graph.o 61 | 62 | obj/OperatorNode.o: src/OperatorNode.cpp 63 | g++ -std=c++11 -c src/OperatorNode.cpp -o obj/OperatorNode.o 64 | obj/VirtualNode.o: src/VirtualNode.cpp 65 | g++ -std=c++11 -c src/VirtualNode.cpp -o obj/VirtualNode.o 66 | obj/LoopNode.o: src/LoopNode.cpp 67 | g++ -std=c++11 -c src/LoopNode.cpp -o obj/LoopNode.o 68 | obj/BranchNode.o: src/BranchNode.cpp 69 | g++ -std=c++11 -c src/BranchNode.cpp -o obj/BranchNode.o 70 | obj/Node.o: src/Node.cpp 71 | g++ -std=c++11 -c src/Node.cpp -o obj/Node.o 72 | 73 | obj/Optimizer.o: src/Optimizer.cpp 74 | g++ -std=c++11 -c src/Optimizer.cpp -o obj/Optimizer.o 75 | obj/Adadelta.o: src/optimizer/Adadelta.cpp 76 | g++ -std=c++11 -c src/optimizer/Adadelta.cpp -o obj/Adadelta.o 77 | 78 | # gpu based 79 | obj/Tensor.o: src/cuda/Tensor.cpp obj/cuda_lib.o 80 | g++ -std=c++11 -c src/cuda/Tensor.cpp -o obj/Tensor.o 81 | obj/cuda_lib.o: src/cuda/cuda_lib.cu 82 | nvcc -c -I/include/cuda -I/usr/local/cuda/include src/cuda/cuda_lib.cu -o obj/cuda_lib.o 83 | 84 | clean: 85 | rm obj/*o 86 | -------------------------------------------------------------------------------- /obj/empty: -------------------------------------------------------------------------------- 1 | object dir 2 | -------------------------------------------------------------------------------- /src/BranchNode.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/BranchNode.h" 2 | #include "../include/op_node/Parameter.h" 3 | #include "../include/Tensor.h" 4 | #include 5 | #include 6 | using namespace std; 7 | BranchNode::BranchNode (string type, string id, Node* (*func) (int, Graph*, BranchNode*)): Node (type, id) { 8 | m_dep_op_node_name = ""; 9 | choose_node = func; 10 | } 11 | -------------------------------------------------------------------------------- /src/ComputeGraph.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/ComputeGraph.h" 2 | #include "../include/OperatorNode.h" 3 | #include "../include/op_node/Input.h" 4 | #include 5 | using namespace std; 6 | ComputeGraph::ComputeGraph () { 7 | m_need_release_tensor_flag = 0; 8 | } 9 | void ComputeGraph::forward_propagation (vector &result_list) { 10 | if (m_need_release_tensor_flag == 1) {// 前向传播前释放上一次的运算结果 11 | release_tensor (); 12 | } 13 | vector topo_result; 14 | topological_sort (m_adj_table, topo_result); 15 | for (int i = 0; i < topo_result.size (); ++i) { 16 | if (topo_result[i] -> m_name[0] == "Input") { 17 | ((Input*) topo_result[i]) -> op ((Input*) topo_result[i]); 18 | } else { 19 | ((OperatorNode*) topo_result[i]) -> op (); 20 | } 21 | } 22 | get_endnode (result_list); 23 | m_need_release_tensor_flag = 1; 24 | } 25 | void ComputeGraph::back_propagation () { 26 | if (m_need_build_reverse_graph_flag == 1) { 27 | build_reverse_graph (); 28 | } 29 | vector topo_result; 30 | topological_sort (m_reverse_table, topo_result); 31 | if (m_optimizer == 0) { 32 | cout << "optimizer has not been set" << endl; 33 | } else { 34 | m_optimizer -> optimize (topo_result); 35 | } 36 | m_need_release_tensor_flag = 1; 37 | } 38 | void ComputeGraph::release_tensor () { 39 | unordered_map::iterator node_map_it = m_node_map.begin (); 40 | while (node_map_it != m_node_map.end ()) { 41 | ((OperatorNode*) (node_map_it -> second)) -> release_tensor (); 42 | ++node_map_it; 43 | } 44 | m_need_release_tensor_flag = 0; 45 | } 46 | ComputeGraph::~ComputeGraph () { 47 | cout << "compute graph free" << endl; 48 | delete m_optimizer; 49 | m_optimizer = 0; 50 | m_node_map.clear (); 51 | m_adj_table.clear (); 52 | m_reverse_table.clear (); 53 | } 54 | -------------------------------------------------------------------------------- /src/Graph.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../include/Graph.h" 3 | using namespace std; 4 | Graph::Graph () { 5 | m_need_build_reverse_graph_flag = 1; 6 | } 7 | void Graph::add_node (string parent_name, Node* node) { 8 | m_node_map[node -> get_name ()] = node;// 将节点加入字典 9 | if (parent_name != "") { 10 | if (m_node_map.end () != m_node_map.find (parent_name)) { 11 | Node* parent_node = m_node_map[parent_name]; 12 | node -> m_parents.push_back (parent_node); 13 | m_adj_table[parent_name].push_back (node);// 节点加入邻接表 14 | } else { 15 | cout << "parent node is not in graph" << endl; 16 | } 17 | } 18 | } 19 | Node* Graph::get_node (string name) { 20 | if (m_node_map.end () == m_node_map.find (name)) { 21 | return 0; 22 | } else { 23 | return m_node_map[name]; 24 | } 25 | } 26 | void Graph::build_subgraph (vector &endnode_list) { 27 | unordered_map::iterator node_map_it = m_node_map.begin (); 28 | while (node_map_it != m_node_map.end ()) {// 所有节点设置为不可见 29 | node_map_it -> second -> m_invisible = 1; 30 | ++node_map_it; 31 | } 32 | // 构造子图 33 | queue q; 34 | unordered_set visit; 35 | for (int i = 0; i < endnode_list.size (); ++i) { 36 | q.push (endnode_list[i]); 37 | visit.insert (endnode_list[i]); 38 | } 39 | while (!q.empty ()) { 40 | Node* node = q.front (); 41 | q.pop (); 42 | node -> m_invisible = 0; 43 | for (int i = 0; i < node -> m_parents.size (); ++i) { 44 | if (visit.find (node -> m_parents[i]) == visit.end ()) { 45 | visit.insert (node -> m_parents[i]); 46 | q.push (node -> m_parents[i]); 47 | } 48 | } 49 | } 50 | } 51 | void Graph::topological_sort (std::unordered_map > &adj_table, std::vector &result) { 52 | unordered_map indegree; 53 | unordered_map::iterator node_map_it = m_node_map.begin (); 54 | while (node_map_it != m_node_map.end ()) { 55 | indegree[node_map_it -> first] = 0; 56 | ++node_map_it; 57 | } 58 | unordered_map >::iterator adj_table_it = adj_table.begin (); 59 | while (adj_table_it != adj_table.end ()) { 60 | for (int i = 0; i < adj_table_it -> second.size (); ++i) { 61 | ++indegree[(adj_table_it -> second)[i] -> get_name ()]; 62 | } 63 | ++adj_table_it; 64 | } 65 | queue q; 66 | unordered_map::iterator indegree_it = indegree.begin (); 67 | while (indegree_it != indegree.end ()) { 68 | if (indegree_it -> second == 0) { 69 | q.push (m_node_map[indegree_it -> first]); 70 | } 71 | ++indegree_it; 72 | } 73 | while (!q.empty ()) { 74 | Node* node = q.front (); 75 | q.pop (); 76 | if (node -> m_invisible == 0) {// 可见节点加入result 77 | result.push_back (node); 78 | } 79 | vector adj_nodes = adj_table[node -> get_name ()]; 80 | for (int i = 0; i < adj_nodes.size (); ++i) { 81 | --indegree[(adj_nodes[i]) -> get_name ()]; 82 | if (indegree[(adj_nodes[i]) -> get_name ()] == 0) { 83 | q.push (adj_nodes[i]); 84 | } 85 | } 86 | } 87 | } 88 | void Graph::build_reverse_graph () { 89 | unordered_map >::iterator adj_table_it = m_adj_table.begin (); 90 | while (adj_table_it != m_adj_table.end ()) { 91 | Node* parent = m_node_map[adj_table_it -> first]; 92 | vector adj_nodes = adj_table_it -> second; 93 | for (int i = 0; i < adj_nodes.size (); ++i) { 94 | string name = (adj_nodes[i]) -> get_name (); 95 | m_reverse_table[name].push_back (parent); 96 | } 97 | ++adj_table_it; 98 | } 99 | m_need_build_reverse_graph_flag = 0; 100 | } 101 | void Graph::get_endnode (vector &endnode_list) { 102 | if (m_need_build_reverse_graph_flag == 1) {// 没有构建转置图 103 | build_reverse_graph (); 104 | } 105 | unordered_map >::iterator reverse_table_it = m_reverse_table.begin (); 106 | unordered_map::iterator node_map_it = m_node_map.begin (); 107 | unordered_map indegree; 108 | while (node_map_it != m_node_map.end ()) { 109 | indegree[node_map_it -> first] = 0; 110 | ++node_map_it; 111 | } 112 | while (reverse_table_it != m_reverse_table.end ()) { 113 | vector adj_nodes = reverse_table_it -> second; 114 | for (int i = 0; i < adj_nodes.size (); ++i) { 115 | ++indegree[(adj_nodes[i]) -> get_name ()]; 116 | } 117 | ++reverse_table_it; 118 | } 119 | unordered_map::iterator indegree_it = indegree.begin (); 120 | while (indegree_it != indegree.end ()) { 121 | if (indegree_it -> second == 0) { 122 | endnode_list.push_back (m_node_map[indegree_it -> first]); 123 | } 124 | ++indegree_it; 125 | } 126 | } 127 | Graph::~Graph () { 128 | cout << "free node_map" << endl; 129 | unordered_map::iterator node_map_it = m_node_map.begin (); 130 | while (node_map_it != m_node_map.end ()) { 131 | delete node_map_it -> second; 132 | node_map_it -> second = 0; 133 | ++node_map_it; 134 | } 135 | m_node_map.clear (); 136 | m_adj_table.clear (); 137 | m_reverse_table.clear (); 138 | } 139 | -------------------------------------------------------------------------------- /src/LoopNode.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/LoopNode.h" 2 | #include 3 | using namespace std; 4 | LoopNode::LoopNode (string type, string id, void (*func1) (LoopNode*), int (*func2) (Graph*, int)): Node (type, id) { 5 | m_sub_vgraph = new VirtualGraph (); 6 | m_end_compute_node = 0; 7 | init = func1; 8 | condition = func2; 9 | } 10 | void LoopNode::inner_loop (Graph* compute_graph) { 11 | int idx = 0; 12 | init (this);// 初始化循环 13 | while (condition (compute_graph, idx) == 0) { 14 | m_end_compute_node = m_sub_vgraph -> build_compute_graph (compute_graph, idx); 15 | ++idx; 16 | } 17 | } 18 | LoopNode::~LoopNode () { 19 | cout << "free LoopNode: " << get_name () << endl; 20 | delete m_sub_vgraph; 21 | m_sub_vgraph = 0; 22 | m_end_compute_node = 0; 23 | } 24 | -------------------------------------------------------------------------------- /src/Node.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/Node.h" 2 | using namespace std; 3 | Node::Node (string type, string id) { 4 | m_name.push_back (type); 5 | m_name.push_back (id); 6 | m_invisible = 0; 7 | } 8 | std::string Node::get_name () { 9 | string name = ""; 10 | for (int i = 0; i < m_name.size (); ++i) { 11 | name += m_name[i] + ":"; 12 | } 13 | return name; 14 | } 15 | Node::~Node () { 16 | } 17 | -------------------------------------------------------------------------------- /src/OperatorNode.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/OperatorNode.h" 2 | #include 3 | using namespace std; 4 | OperatorNode::OperatorNode (string type, string id, string idx): Node (type, id) { 5 | m_name.push_back (idx); 6 | m_sum_grad = 0; 7 | m_output = 0; 8 | } 9 | void OperatorNode::chain_rule (Tensor* grad, int parent_idx) { 10 | OperatorNode* parent_op_node = (OperatorNode*) m_parents[parent_idx]; 11 | if (parent_op_node -> m_sum_grad == 0) { 12 | if (m_sum_grad == 0) { 13 | // 把grad压缩为一行 14 | vector shape (2); shape[0] = 1; shape[1] = grad -> m_shape[1]; 15 | parent_op_node -> m_sum_grad = new Tensor (shape); 16 | for (int i = 0; i < grad -> m_size; ++i) { 17 | int idx = i % parent_op_node -> m_sum_grad -> m_size; 18 | parent_op_node -> m_sum_grad -> m_tensor[idx] += grad -> m_tensor[i]; 19 | } 20 | } else { 21 | parent_op_node -> m_sum_grad = m_sum_grad -> matrix_mult (grad); 22 | } 23 | } else { 24 | if (m_sum_grad == 0) { 25 | // 把grad压缩为一行 26 | for (int i = 0; i < grad -> m_size; ++i) { 27 | int idx = i % parent_op_node -> m_sum_grad -> m_size; 28 | parent_op_node -> m_sum_grad -> m_tensor[idx] += grad -> m_tensor[i]; 29 | } 30 | } else { 31 | parent_op_node -> m_sum_grad -> add (m_sum_grad -> matrix_mult (grad), parent_op_node -> m_sum_grad); 32 | } 33 | } 34 | } 35 | void OperatorNode::op () { 36 | } 37 | void OperatorNode::grad_op () { 38 | } 39 | void OperatorNode::release_tensor () { 40 | } 41 | OperatorNode::~OperatorNode () { 42 | // cout << "free operatorNode:" << get_name () << endl; 43 | } 44 | -------------------------------------------------------------------------------- /src/Optimizer.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/Optimizer.h" 2 | #include "../include/OperatorNode.h" 3 | #include "../include/Tensor.h" 4 | #include 5 | using namespace std; 6 | Optimizer::Optimizer (float a) { 7 | m_a = a; 8 | } 9 | void Optimizer::optimize (vector &topo_results) { 10 | for (int i = 0; i < topo_results.size (); ++i) {// 计算梯度 11 | ((OperatorNode*) topo_results[i]) -> grad_op (); 12 | } 13 | for (int i = 0; i < topo_results.size (); ++i) {// 更新 14 | OperatorNode* op_node = (OperatorNode*) topo_results[i]; 15 | if (op_node -> m_sum_grad != 0 && op_node -> m_name[0] == "Parameter") { 16 | for (int j = 0; j < op_node -> m_output -> m_size; ++j) { 17 | op_node -> m_output -> m_tensor[j] -= m_a * op_node -> m_sum_grad -> m_tensor[j]; 18 | } 19 | } 20 | } 21 | } 22 | Optimizer::~Optimizer () { 23 | cout << "free optimizer" << endl; 24 | } 25 | -------------------------------------------------------------------------------- /src/Tensor.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "../include/Tensor.h" 6 | #include "../include/multi_thread/matrix_task.h" 7 | #include "../include/multi_thread/thread_pool.h" 8 | using namespace std; 9 | 10 | Tensor::Tensor (vector shape, int need_init) { 11 | m_size = 1; 12 | for (int i = 0; i < shape.size (); ++i) { 13 | m_shape.push_back (shape[i]); 14 | m_size *= shape[i]; 15 | } 16 | m_tensor = new float[m_size]; 17 | if (need_init == 1) { 18 | for (int i = 0; i < m_size; ++i) { 19 | m_tensor[i] = 0.0; 20 | } 21 | } 22 | } 23 | 24 | Tensor::Tensor (vector shape, float data[]) { 25 | m_size = 1; 26 | for (int i = 0; i < shape.size (); ++i) { 27 | m_shape.push_back (shape[i]); 28 | m_size *= shape[i]; 29 | } 30 | m_tensor = new float[m_size]; 31 | for (int i = 0; i < m_size; ++i) { 32 | m_tensor[i] = data[i]; 33 | } 34 | } 35 | 36 | float Tensor::get_value (vector idxs) { 37 | int idx = 0; 38 | int t = 1; 39 | for (int i = idxs.size () - 1; i >= 0; --i) { 40 | idx += idxs[i] * t; 41 | t *= m_shape[i]; 42 | } 43 | return m_tensor[idx]; 44 | } 45 | 46 | void Tensor::set_value (vector idxs, float value) { 47 | int idx = 0; 48 | int t = 1; 49 | for (int i = idxs.size () - 1; i >= 0; --i) { 50 | idx += idxs[i] * t; 51 | t *= m_shape[i]; 52 | } 53 | m_tensor[idx] = value; 54 | } 55 | 56 | Tensor* Tensor::matrix_mult (Tensor* tensor) { 57 | Tensor* result = 0; 58 | if (m_shape[1] == tensor -> m_shape[0]) { 59 | vector result_shape (2); 60 | result_shape[0] = m_shape[0]; 61 | result_shape[1] = tensor -> m_shape[1]; 62 | result = new Tensor (result_shape, 0); 63 | int idx0 = 0, idx1 = 0, idx2 = 0; 64 | vector task_list; 65 | for (int i = 0; i < m_shape[0]; ++i) { 66 | for (int j = 0; j < tensor -> m_shape[1]; ++j) { 67 | /*float r = 0; 68 | float compensation = 0.0; 69 | for (int k = 0; k < m_shape[1]; ++k) { 70 | idx0 = i * m_shape[1] + k; 71 | idx1 = k * tensor -> m_shape[1] + j; 72 | // Kahan's Summation Formula 73 | // r += m_tensor[idx0] * tensor -> m_tensor[idx1]; 74 | float y = m_tensor[idx0] * tensor -> m_tensor[idx1] - compensation;// 补偿 75 | float t = r + y;// 发生舍入 76 | compensation = (t - r) - y;// 记录下舍入误差 77 | r = t; 78 | } 79 | idx2 = i * tensor -> m_shape[1] + j; 80 | result -> m_tensor[idx2] = r;*/ 81 | task_list.push_back (new matrix_mult_task (this, tensor, result, i, j)); 82 | } 83 | } 84 | (thread_pool::get_instance ()) -> add_job_list (task_list); 85 | } 86 | return result; 87 | } 88 | 89 | Tensor* Tensor::scalar_mult (float scalar) { 90 | Tensor* result = new Tensor (m_shape, 0); 91 | /*for (int i = 0; i < m_size; ++i) { 92 | result -> m_tensor[i] = m_tensor[i] * scalar; 93 | }*/ 94 | vector task_list; 95 | int thread_num = (thread_pool::get_instance ()) -> m_worker_num; 96 | for (int i = 0; i < thread_num; ++i) { 97 | task_list.push_back (new matrix_scalar_mult_task (this, scalar, result, i, thread_num)); 98 | } 99 | (thread_pool::get_instance ()) -> add_job_list (task_list); 100 | return result; 101 | } 102 | 103 | void Tensor::scalar_acc_mult (float scalar) { 104 | /*for (int i = 0; i < m_size; ++i) { 105 | m_tensor[i] = m_tensor[i] * scalar; 106 | }*/ 107 | vector task_list; 108 | int thread_num = (thread_pool::get_instance ()) -> m_worker_num; 109 | for (int i = 0; i < thread_num; ++i) { 110 | task_list.push_back (new matrix_scalar_mult_task (this, scalar, this, i, thread_num)); 111 | } 112 | (thread_pool::get_instance ()) -> add_job_list (task_list); 113 | } 114 | 115 | void Tensor::element_square () { 116 | for (int i = 0; i < m_size; ++i) { 117 | m_tensor[i] = m_tensor[i] * m_tensor[i]; 118 | } 119 | } 120 | 121 | float Tensor::element_abs_sum () { 122 | float result = 0; 123 | float compensation = 0.0; 124 | for (int i = 0; i < m_size; ++i) { 125 | // result += fabs (m_tensor[i]); 126 | // Kahan's Summation Formula 127 | float y = fabs (m_tensor[i]) - compensation;// 补偿 128 | float t = result + y;// 发生舍入 129 | compensation = (t - result) - y;// 记录本次的舍入误差 130 | result = t; 131 | } 132 | return result; 133 | } 134 | 135 | float Tensor::element_square_sum () { 136 | float result = 0; 137 | float compensation = 0.0; 138 | for (int i = 0; i < m_size; ++i) { 139 | // result += m_tensor[i] * m_tensor[i]; 140 | // Kahan's Summation Formula 141 | float y = m_tensor[i] * m_tensor[i] - compensation; 142 | float t = result + y; 143 | compensation = (t - result) - y; 144 | result = t; 145 | } 146 | return result; 147 | } 148 | 149 | Tensor* Tensor::element_mult (Tensor* tensor) { 150 | Tensor* result = 0; 151 | int same_shape = 1; 152 | if (m_shape.size () == tensor -> m_shape.size ()) { 153 | for (int i = 0; i < m_shape.size (); ++i) { 154 | if (m_shape[i] != tensor -> m_shape[i]) { 155 | same_shape = 0; 156 | break; 157 | } 158 | } 159 | } else { 160 | same_shape = 0; 161 | } 162 | if (same_shape == 1) { 163 | result = new Tensor (tensor -> m_shape, 0); 164 | for (int i = 0; i < m_size; ++i) { 165 | result -> m_tensor[i] = m_tensor[i] * tensor -> m_tensor[i]; 166 | } 167 | } 168 | return result; 169 | } 170 | 171 | void Tensor::add (Tensor* tensor, Tensor* result) { 172 | /*for (int i = 0; i < m_size; ++i) { 173 | result -> m_tensor[i] = m_tensor[i] + tensor -> m_tensor[i]; 174 | }*/ 175 | vector task_list; 176 | int thread_num = (thread_pool::get_instance ()) -> m_worker_num; 177 | for (int i = 0; i < thread_num; ++i) { 178 | task_list.push_back (new matrix_add_task (this, tensor, result, i, thread_num)); 179 | } 180 | (thread_pool::get_instance ()) -> add_job_list (task_list); 181 | } 182 | 183 | Tensor* Tensor::add (Tensor* tensor) { 184 | Tensor* result = 0; 185 | int same_shape = 1; 186 | if (m_shape.size () == tensor -> m_shape.size ()) { 187 | for (int i = 0; i < m_shape.size (); ++i) { 188 | if (m_shape[i] != tensor -> m_shape[i]) { 189 | same_shape = 0; 190 | break; 191 | } 192 | } 193 | } else { 194 | same_shape = 0; 195 | } 196 | 197 | if (same_shape == 1) { 198 | result = new Tensor (tensor -> m_shape, 0); 199 | /*for (int i = 0; i < m_size; ++i) { 200 | result -> m_tensor[i] = m_tensor[i] + tensor -> m_tensor[i]; 201 | }*/ 202 | vector task_list; 203 | int thread_num = (thread_pool::get_instance ()) -> m_worker_num; 204 | for (int i = 0; i < thread_num; ++i) { 205 | task_list.push_back (new matrix_add_task (this, tensor, result, i, thread_num)); 206 | } 207 | (thread_pool::get_instance ()) -> add_job_list (task_list); 208 | } 209 | return result; 210 | } 211 | 212 | void Tensor::init () { 213 | // srand (time (0)); 214 | for (int i = 0; i < m_size; ++i) { 215 | m_tensor[i] = (rand () % 1000) / 1000.0 - 0.5; 216 | } 217 | } 218 | 219 | void Tensor::display () { 220 | vector idxs0 (2); 221 | for (int i = 0; i < m_shape[0]; ++i) { 222 | for (int j = 0; j < m_shape[1]; ++j) { 223 | idxs0[0] = i; idxs0[1] = j; 224 | cout << get_value (idxs0) << " "; 225 | } 226 | cout << endl; 227 | } 228 | } 229 | 230 | Tensor::~Tensor () { 231 | delete[] m_tensor; 232 | } 233 | -------------------------------------------------------------------------------- /src/VirtualGraph.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/VirtualGraph.h" 2 | #include "../include/VirtualNode.h" 3 | #include "../include/LoopNode.h" 4 | #include "../include/OperatorNode.h" 5 | #include "../include/op_node/Input.h" 6 | #include 7 | #include 8 | using namespace std; 9 | Node* VirtualGraph::build_compute_graph (Graph* compute_graph, int idx) {// 输入计算图的引用 10 | vector topo_result; 11 | topological_sort (m_adj_table, topo_result); 12 | Node* end_node = 0; 13 | for (int i = 0; i < topo_result.size (); ++i) {// 构建 14 | // cout << topo_result[i] -> get_name () << endl; 15 | if (topo_result[i] -> m_name[0] == "Loop") {// 如果是循环节点 16 | LoopNode* loop_node = (LoopNode*) topo_result[i]; 17 | loop_node -> inner_loop (compute_graph);// 执行循环 18 | end_node = loop_node -> m_end_compute_node; 19 | } else if (topo_result[i] -> m_name[0] == "Branch") {// 如果是分支结点 20 | // do nothing 21 | } else {// 普通虚拟节点 22 | VirtualNode* v_node = (VirtualNode*) topo_result[i]; 23 | if (v_node -> m_parents.size () == 0) {// 该虚拟节点没有依赖的虚拟节点 24 | Node* op_node = v_node -> get_op_node (idx); 25 | compute_graph -> add_node ("", op_node);// 向计算图中添加节点 26 | if (op_node -> m_name[0] == "Input") { 27 | ((Input*) op_node) -> op ((Input*) op_node);// 执行该计算节点 28 | } else { 29 | ((OperatorNode*) op_node) -> op ();// 执行该计算节点 30 | } 31 | end_node = op_node; 32 | } else { 33 | vector parents_op_node; 34 | v_node -> get_parents_op_nodes (idx, compute_graph, parents_op_node); 35 | int need_op_node = 1; 36 | for (int i = 0; i < parents_op_node.size (); ++i) { 37 | if (parents_op_node[i] == 0) {// 存在没有生成的依赖的计算节点 38 | need_op_node = 0; 39 | } 40 | } 41 | if (need_op_node == 1) {// 当前计算节点可以生成 42 | Node* op_node = v_node -> get_op_node (idx); 43 | for (int i = 0; i < parents_op_node.size (); ++i) { 44 | compute_graph -> add_node (parents_op_node[i] -> get_name (), op_node); 45 | } 46 | ((OperatorNode*) op_node) -> op ();// 执行该计算节点 47 | end_node = op_node; 48 | } 49 | } 50 | } 51 | } 52 | return end_node; 53 | } 54 | VirtualGraph::~VirtualGraph () { 55 | cout << "virtual graph free" << endl; 56 | // 释放虚拟节点 57 | cout << "virtual node_map free" << endl; 58 | unordered_map::iterator node_map_it = m_node_map.begin (); 59 | while (node_map_it != m_node_map.end ()) { 60 | delete node_map_it -> second; 61 | node_map_it -> second = 0; 62 | ++node_map_it; 63 | } 64 | m_node_map.clear (); 65 | m_adj_table.clear (); 66 | m_reverse_table.clear (); 67 | } 68 | -------------------------------------------------------------------------------- /src/VirtualNode.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/VirtualNode.h" 2 | #include "../include/BranchNode.h" 3 | #include "../include/LoopNode.h" 4 | #include "../include/op_node/Add.h" 5 | #include "../include/op_node/Input.h" 6 | #include "../include/op_node/Mult.h" 7 | #include "../include/op_node/Bias.h" 8 | #include "../include/op_node/Minus.h" 9 | #include "../include/op_node/SquareSum.h" 10 | #include "../include/op_node/AbsSum.h" 11 | #include "../include/op_node/Sigmoid.h" 12 | #include "../include/op_node/Parameter.h" 13 | #include "../include/op_node/Dropout.h" 14 | #include 15 | #include 16 | using namespace std; 17 | VirtualNode::VirtualNode (string type, string id, int share_parameter, float keep_rate): Node (type, id) { 18 | m_share_parameter = share_parameter; 19 | m_keep_rate = keep_rate; 20 | input_op = 0; 21 | } 22 | void VirtualNode::get_parents_op_nodes (int idx, Graph* compute_graph, vector &node_list) { 23 | ostringstream oss; 24 | oss << idx << ":"; 25 | for (int i = 0; i < m_parents.size (); ++i) { 26 | if (m_parents[i] -> m_name[0] == "Branch") { 27 | node_list.push_back (((BranchNode*) m_parents[i]) -> choose_node (idx, compute_graph, (BranchNode*) m_parents[i])); 28 | } else if (m_parents[i] -> m_name[0] == "Loop") { 29 | node_list.push_back (((LoopNode*) m_parents[i]) -> m_end_compute_node); 30 | } else { 31 | string op_node_name = m_parents[i] -> get_name () + oss.str (); 32 | node_list.push_back (compute_graph -> get_node (op_node_name)); 33 | } 34 | } 35 | } 36 | 37 | Node* VirtualNode::get_op_node (int idx) {// 一个OperatorNode工厂 38 | ostringstream oss; 39 | oss << idx; 40 | Node* op_node = 0; 41 | string op_node_name = m_name[0] + ":" + m_name[1] + ":" + oss.str () + ":"; 42 | if (m_op_node_map.find (op_node_name) == m_op_node_map.end ()) {// 之前没生成过该计算节点 43 | if (m_name[0] == "Add") { 44 | op_node = new Add (m_name[0], m_name[1], oss.str ()); 45 | } else if (m_name[0] == "Input") { 46 | if (m_input_data.size () == 0) { 47 | cout << "input data is not initialize" << endl; 48 | } else { 49 | op_node = new Input (m_name[0], m_name[1], oss.str (), m_input_data, input_op); 50 | } 51 | } else if (m_name[0] == "Parameter") { 52 | if (m_data == 0) { 53 | cout << "parameter node is not initialize" << endl; 54 | } else { 55 | op_node = new Parameter (m_name[0], m_name[1], oss.str (), m_data, m_share_parameter); 56 | } 57 | } else if (m_name[0] == "SquareSum") { 58 | op_node = new SquareSum (m_name[0], m_name[1], oss.str ()); 59 | } else if (m_name[0] == "AbsSum") { 60 | op_node = new AbsSum (m_name[0], m_name[1], oss.str ()); 61 | } else if (m_name[0] == "Mult") { 62 | op_node = new Mult (m_name[0], m_name[1], oss.str ()); 63 | } else if (m_name[0] == "Minus") { 64 | op_node = new Minus (m_name[0], m_name[1], oss.str ()); 65 | } else if (m_name[0] == "Sigmoid") { 66 | op_node = new Sigmoid (m_name[0], m_name[1], oss.str ()); 67 | } else if (m_name[0] == "Bias") { 68 | op_node = new Bias (m_name[0], m_name[1], oss.str ()); 69 | } else if (m_name[0] == "Dropout") { 70 | if (m_data == 0) { 71 | cout << "dropout filter shape is not set" << endl; 72 | } else { 73 | op_node = new Dropout (m_name[0], m_name[1], oss.str (), m_data, m_keep_rate); 74 | } 75 | } else { 76 | cout << "op node name error" << endl; 77 | } 78 | m_op_node_map[op_node_name] = op_node; 79 | } else {// 直接找到虚拟节点生成过的该计算节点 80 | op_node = m_op_node_map[op_node_name]; 81 | } 82 | return op_node; 83 | } 84 | VirtualNode::~VirtualNode () { 85 | // cout << "free virtualNode: " << get_name () << endl; 86 | if (m_data != 0) { 87 | delete m_data; 88 | } 89 | for (int i = 0; i < m_input_data.size (); ++i) { 90 | delete m_input_data[i]; 91 | } 92 | vector ().swap (m_input_data); 93 | // 释放每个虚拟节点生成的计算节点 94 | unordered_map::iterator op_node_map_it = m_op_node_map.begin (); 95 | while (op_node_map_it != m_op_node_map.end ()) { 96 | delete op_node_map_it -> second; 97 | ++op_node_map_it; 98 | } 99 | m_op_node_map.clear (); 100 | } 101 | -------------------------------------------------------------------------------- /src/cuda/Tensor.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../../include/Tensor.h" 5 | #include "../../include/cuda/cuda_lib.h" 6 | using namespace std; 7 | 8 | Tensor::Tensor (vector shape, int need_init) { 9 | m_size = 1; 10 | for (int i = 0; i < shape.size (); ++i) { 11 | m_shape.push_back (shape[i]); 12 | m_size *= shape[i]; 13 | } 14 | m_tensor = new float[m_size]; 15 | if (need_init == 1) { 16 | for (int i = 0; i < m_size; ++i) { 17 | m_tensor[i] = 0.0; 18 | } 19 | } 20 | } 21 | 22 | Tensor::Tensor (vector shape, float data[]) { 23 | m_size = 1; 24 | for (int i = 0; i < shape.size (); ++i) { 25 | m_shape.push_back (shape[i]); 26 | m_size *= shape[i]; 27 | } 28 | m_tensor = new float[m_size]; 29 | for (int i = 0; i < m_size; ++i) { 30 | m_tensor[i] = data[i]; 31 | } 32 | } 33 | 34 | float Tensor::get_value (vector idxs) { 35 | int idx = 0; 36 | int t = 1; 37 | for (int i = idxs.size () - 1; i >= 0; --i) { 38 | idx += idxs[i] * t; 39 | t *= m_shape[i]; 40 | } 41 | return m_tensor[idx]; 42 | } 43 | 44 | void Tensor::set_value (vector idxs, float value) { 45 | int idx = 0; 46 | int t = 1; 47 | for (int i = idxs.size () - 1; i >= 0; --i) { 48 | idx += idxs[i] * t; 49 | t *= m_shape[i]; 50 | } 51 | m_tensor[idx] = value; 52 | } 53 | 54 | Tensor* Tensor::matrix_mult (Tensor* tensor) { 55 | Tensor* result = 0; 56 | if (m_shape[1] == tensor -> m_shape[0]) { 57 | vector result_shape (2); 58 | result_shape[0] = m_shape[0]; 59 | result_shape[1] = tensor -> m_shape[1]; 60 | result = new Tensor (result_shape, 0); 61 | // 调用cuda 62 | cuda_matrix_mult (m_tensor, tensor -> m_tensor, result -> m_tensor, m_shape[0], m_shape[1], tensor -> m_shape[0], tensor -> m_shape[1]); 63 | } 64 | return result; 65 | } 66 | 67 | Tensor* Tensor::scalar_mult (float scalar) { 68 | Tensor* result = new Tensor (m_shape, 0); 69 | // 调用cuda 70 | cuda_scalar_tensor_mult (m_tensor, result -> m_tensor, scalar, m_size); 71 | return result; 72 | } 73 | 74 | void Tensor::scalar_acc_mult (float scalar) { 75 | cuda_scalar_tensor_mult (m_tensor, m_tensor, scalar, m_size); 76 | } 77 | 78 | float Tensor::element_abs_sum () { 79 | float result = 0; 80 | // 调用cuda 81 | result = cuda_element_abs_sum (m_tensor, m_size); 82 | return result; 83 | } 84 | 85 | float Tensor::element_square_sum () { 86 | float result = 0; 87 | // 调用cuda 88 | result = cuda_element_square_sum (m_tensor, m_size); 89 | return result; 90 | } 91 | 92 | void Tensor::element_square () { 93 | // 调用cuda 94 | cuda_element_square (m_tensor, m_size); 95 | } 96 | 97 | Tensor* Tensor::element_mult (Tensor* tensor) { 98 | Tensor* result = 0; 99 | int same_shape = 1; 100 | if (m_shape.size () == tensor -> m_shape.size ()) { 101 | for (int i = 0; i < m_shape.size (); ++i) { 102 | if (m_shape[i] != tensor -> m_shape[i]) { 103 | same_shape = 0; 104 | break; 105 | } 106 | } 107 | } else { 108 | same_shape = 0; 109 | } 110 | if (same_shape == 1) { 111 | result = new Tensor (tensor -> m_shape, 0); 112 | // 调用cuda 113 | cuda_element_mult (m_tensor, tensor -> m_tensor, result -> m_tensor, m_size); 114 | } 115 | return result; 116 | } 117 | 118 | void Tensor::add (Tensor* tensor, Tensor* result) { 119 | // 调用cuda 120 | cuda_tensor_add (m_tensor, tensor -> m_tensor, result -> m_tensor, m_size); 121 | } 122 | 123 | Tensor* Tensor::add (Tensor* tensor) { 124 | Tensor* result = 0; 125 | int same_shape = 1; 126 | if (m_shape.size () == tensor -> m_shape.size ()) { 127 | for (int i = 0; i < m_shape.size (); ++i) { 128 | if (m_shape[i] != tensor -> m_shape[i]) { 129 | same_shape = 0; 130 | break; 131 | } 132 | } 133 | } else { 134 | same_shape = 0; 135 | } 136 | 137 | if (same_shape == 1) { 138 | result = new Tensor (tensor -> m_shape, 0); 139 | // 调用cuda 140 | cuda_tensor_add (m_tensor, tensor -> m_tensor, result -> m_tensor, m_size); 141 | } 142 | return result; 143 | } 144 | 145 | void Tensor::init () { 146 | // srand (time (0)); 147 | for (int i = 0; i < m_size; ++i) { 148 | m_tensor[i] = (rand () % 1000) / 1000.0 - 0.5; 149 | } 150 | } 151 | 152 | void Tensor::display () { 153 | vector idxs0 (2); 154 | for (int i = 0; i < m_shape[0]; ++i) { 155 | for (int j = 0; j < m_shape[1]; ++j) { 156 | idxs0[0] = i; idxs0[1] = j; 157 | cout << get_value (idxs0) << " "; 158 | } 159 | cout << endl; 160 | } 161 | } 162 | 163 | Tensor::~Tensor () { 164 | delete m_tensor; 165 | } 166 | 167 | -------------------------------------------------------------------------------- /src/cuda/cuda_lib.cu: -------------------------------------------------------------------------------- 1 | #include "../../include/cuda/cuda_lib.h" 2 | #include 3 | #include 4 | #include 5 | 6 | __global__ void matrix_mult (float* A, float* B, float* C, int a_row, int a_col, int b_row, int b_col) { 7 | // share memory 缓存A和B中对应的一对子矩阵,大小为BLOCK_SIZE * BLOCK_SIZE 8 | __shared__ float A_sub[BLOCK_SIZE * BLOCK_SIZE]; 9 | __shared__ float B_sub[BLOCK_SIZE * BLOCK_SIZE]; 10 | // 获取当前线程所在的block和thread的id 11 | int block_id_row = blockIdx.x; 12 | int block_id_col = blockIdx.y; 13 | int thread_id_row = threadIdx.x; 14 | int thread_id_col = threadIdx.y; 15 | // 计算当前线程对应A矩阵的行号和B矩阵的列号,也就是C矩阵的行号和列号 16 | int c_row_id = block_id_row * BLOCK_SIZE + thread_id_row; 17 | int c_col_id = block_id_col * BLOCK_SIZE + thread_id_col; 18 | 19 | int sbmtx_begin = 0; 20 | float c = 0.0; 21 | float compensation = 0.0; 22 | for (sbmtx_begin = 0; sbmtx_begin < a_col; sbmtx_begin += BLOCK_SIZE) {// 遍历每一对A,B矩阵c_row_id,c_col_id所在行列的子区间 23 | // 当前线程加载A,B矩阵中对应子矩阵的指定元素,保证当前block中的线程同时加载完一对A,B子矩阵 24 | A_sub[thread_id_row * BLOCK_SIZE + thread_id_col] = (c_row_id < a_row && sbmtx_begin + thread_id_col < a_col) ? A[c_row_id * a_col + sbmtx_begin + thread_id_col] : 0; 25 | B_sub[thread_id_row * BLOCK_SIZE + thread_id_col] = (c_col_id < b_col && sbmtx_begin + thread_id_row < b_row) ? B[(sbmtx_begin + thread_id_row) * b_col + c_col_id] : 0; 26 | // 等待同一个block中的线程加载完毕 27 | __syncthreads (); 28 | // 计算A矩阵c_row_id行和B矩阵c_col_id列一个区间的内积,并将每个区间结果累计 29 | #pragma unroll 30 | for (int i = 0; i < BLOCK_SIZE; ++i) { 31 | // c += A_sub[thread_id_row * BLOCK_SIZE + i] * B_sub[i * BLOCK_SIZE + thread_id_col]; 32 | // Kahan's Summation Formula 33 | float y = A_sub[thread_id_row * BLOCK_SIZE + i] * B_sub[i * BLOCK_SIZE + thread_id_col] - compensation; 34 | float t = c + y;// 发生舍入 35 | compensation = (t - c) - y;// 记录下舍入误差 36 | c = t; 37 | } 38 | __syncthreads (); 39 | } 40 | if (c_row_id < a_row && c_col_id < b_col) { 41 | C[c_row_id * b_col + c_col_id] = c; 42 | } 43 | } 44 | void cuda_matrix_mult (float* A, float* B, float* C, int a_row, int a_col, int b_row, int b_col) {// A*B=C 45 | int size_a = a_row * a_col; 46 | int size_b = b_row * b_col; 47 | int size_c = a_row * b_col; 48 | // 在显存上分配空间 49 | float* dev_A, *dev_B, *dev_C; 50 | cudaMalloc ((void**) &dev_A, sizeof (float) * size_a); 51 | cudaMalloc ((void**) &dev_B, sizeof (float) * size_b); 52 | cudaMalloc ((void**) &dev_C, sizeof (float) * size_c); 53 | // copy数据到显存 54 | cudaMemcpy (dev_A, A, sizeof (float) * size_a, cudaMemcpyHostToDevice); 55 | cudaMemcpy (dev_B, B, sizeof (float) * size_b, cudaMemcpyHostToDevice); 56 | // 把结果C矩阵分割成grid_row * grid_col个BLOCK_SIZE * BLOCK_SIZE尺寸的block,可以认为C矩阵对应一个Grid 57 | int grid_row = a_row / BLOCK_SIZE + (a_row % BLOCK_SIZE == 0 ? 0 : 1); 58 | int grid_col = b_col / BLOCK_SIZE + (b_col % BLOCK_SIZE == 0 ? 0 : 1); 59 | dim3 grid (grid_row, grid_col); 60 | dim3 block (BLOCK_SIZE, BLOCK_SIZE); 61 | // 运行kernal函数 62 | matrix_mult <<>> (dev_A, dev_B, dev_C, a_row, a_col, b_row, b_col); 63 | // 把显存数据copy回内存 64 | cudaMemcpy (C, dev_C, sizeof (float) * size_c, cudaMemcpyDeviceToHost); 65 | // 释放显存 66 | cudaFree (dev_A); 67 | cudaFree (dev_B); 68 | cudaFree (dev_C); 69 | } 70 | 71 | __global__ void tensor_add (float* A, float* B, float* C, int size) { 72 | int thread_id = threadIdx.x; 73 | int block_id = blockIdx.x; 74 | int begin_idx = block_id * BLOCK_SIZE + thread_id; 75 | int read_offset = GRID_SIZE * BLOCK_SIZE; 76 | for (int i = begin_idx; i < size; i += read_offset) {// 这种方式尽可能保证显存数据的连续读取 77 | C[i] = A[i] + B[i]; 78 | } 79 | } 80 | void cuda_tensor_add (float* A, float* B, float* C, int size) { 81 | float* dev_A, *dev_B, *dev_C; 82 | cudaMalloc ((void**) &dev_A, sizeof (float) * size); 83 | cudaMalloc ((void**) &dev_B, sizeof (float) * size); 84 | cudaMalloc ((void**) &dev_C, sizeof (float) * size); 85 | 86 | cudaMemcpy (dev_A, A, sizeof (float) * size, cudaMemcpyHostToDevice); 87 | cudaMemcpy (dev_B, B, sizeof (float) * size, cudaMemcpyHostToDevice); 88 | tensor_add <<>> (dev_A, dev_B, dev_C, size); 89 | cudaMemcpy (C, dev_C, sizeof (float) * size, cudaMemcpyDeviceToHost); 90 | // 释放显存 91 | cudaFree (dev_A); 92 | cudaFree (dev_B); 93 | cudaFree (dev_C); 94 | } 95 | 96 | __global__ void scalar_tensor_mult (float* A, float* result, float s, int size) { 97 | int thread_id = threadIdx.x; 98 | int block_id = blockIdx.x; 99 | int begin_idx = block_id * BLOCK_SIZE + thread_id; 100 | int read_offset = GRID_SIZE * BLOCK_SIZE; 101 | for (int i = begin_idx; i < size; i += read_offset) {// 这种方式尽可能保证显存数据的连续读取 102 | result[i] = A[i] * s; 103 | } 104 | } 105 | void cuda_scalar_tensor_mult (float* A, float* result, float s, int size) { 106 | float* dev_A, *dev_result; 107 | cudaMalloc ((void**) &dev_A, sizeof (float) * size); 108 | cudaMalloc ((void**) &dev_result, sizeof (float) * size); 109 | 110 | cudaMemcpy (dev_A, A, sizeof (float) * size, cudaMemcpyHostToDevice); 111 | scalar_tensor_mult <<>> (dev_A, dev_result, s, size); 112 | cudaMemcpy (result, dev_result, sizeof (float) * size, cudaMemcpyDeviceToHost); 113 | // 释放显存 114 | cudaFree (dev_A); 115 | cudaFree (dev_result); 116 | } 117 | 118 | __global__ void element_abs_sum (float* A, int size, float* results) { 119 | __shared__ float sub_results[BLOCK_SIZE]; 120 | int thread_id = threadIdx.x; 121 | int block_id = blockIdx.x; 122 | 123 | int begin_idx = block_id * BLOCK_SIZE + thread_id; 124 | int read_offset = GRID_SIZE * BLOCK_SIZE; 125 | if (begin_idx >= size) { 126 | sub_results[thread_id] = 0; 127 | } else { 128 | float r = 0; 129 | for (int i = begin_idx; i < size; i += read_offset) { 130 | r += fabs(A[i]); 131 | } 132 | sub_results[thread_id] = r; 133 | } 134 | // 将同一个block中得到的结果汇总到global存储中的results中 135 | __syncthreads (); 136 | int merge_offset = 1; 137 | int mask = 2; 138 | while (merge_offset <= BLOCK_SIZE) { 139 | if (thread_id % mask == 0 && thread_id + merge_offset < BLOCK_SIZE) { 140 | sub_results[thread_id] += sub_results[thread_id + merge_offset]; 141 | } 142 | merge_offset = merge_offset * 2; 143 | mask = mask * 2; 144 | __syncthreads (); 145 | } 146 | if (thread_id == 0) { 147 | results[block_id] = sub_results[0]; 148 | } 149 | } 150 | float cuda_element_abs_sum (float* A, int size) { 151 | float* results = (float*) malloc (sizeof (float) * GRID_SIZE); 152 | float* dev_A; 153 | float* dev_results; 154 | cudaMalloc ((void**) &dev_A, sizeof (float) * size); 155 | cudaMalloc ((void**) &dev_results, sizeof (float) * GRID_SIZE); 156 | 157 | cudaMemcpy (dev_A, A, sizeof (float) * size, cudaMemcpyHostToDevice); 158 | 159 | // 运行kernal函数 160 | element_abs_sum <<>> (dev_A, size, dev_results); 161 | 162 | cudaMemcpy (results, dev_results, sizeof (float) * GRID_SIZE, cudaMemcpyDeviceToHost); 163 | cudaFree (dev_results); 164 | cudaFree (dev_A); 165 | float abs_sum = 0; 166 | // 在cpu端将显卡传回的数据汇总 167 | for (int i = 0; i < GRID_SIZE; ++i) { 168 | abs_sum += results[i]; 169 | } 170 | free (results); 171 | return abs_sum; 172 | } 173 | 174 | __global__ void element_square_sum (float* A, int size, float* results) { 175 | __shared__ float sub_results[BLOCK_SIZE]; 176 | int thread_id = threadIdx.x; 177 | int block_id = blockIdx.x; 178 | 179 | int begin_idx = block_id * BLOCK_SIZE + thread_id; 180 | int read_offset = GRID_SIZE * BLOCK_SIZE; 181 | if (begin_idx >= size) { 182 | sub_results[thread_id] = 0; 183 | } else { 184 | float r = 0; 185 | for (int i = begin_idx; i < size; i += read_offset) { 186 | r += A[i] * A[i]; 187 | } 188 | sub_results[thread_id] = r; 189 | } 190 | // 将同一个block中得到的结果汇总到global存储中的results中 191 | __syncthreads (); 192 | int merge_offset = 1; 193 | int mask = 2; 194 | while (merge_offset <= BLOCK_SIZE) { 195 | if (thread_id % mask == 0 && thread_id + merge_offset < BLOCK_SIZE) { 196 | sub_results[thread_id] += sub_results[thread_id + merge_offset]; 197 | } 198 | merge_offset = merge_offset * 2; 199 | mask = mask * 2; 200 | __syncthreads (); 201 | } 202 | if (thread_id == 0) { 203 | results[block_id] = sub_results[0]; 204 | } 205 | } 206 | float cuda_element_square_sum (float* A, int size) { 207 | float* results = (float*) malloc (sizeof (float) * GRID_SIZE); 208 | float* dev_A; 209 | float* dev_results; 210 | cudaMalloc ((void**) &dev_A, sizeof (float) * size); 211 | cudaMalloc ((void**) &dev_results, sizeof (float) * GRID_SIZE); 212 | 213 | cudaMemcpy (dev_A, A, sizeof (float) * size, cudaMemcpyHostToDevice); 214 | 215 | // 运行kernal函数 216 | element_square_sum <<>> (dev_A, size, dev_results); 217 | 218 | cudaMemcpy (results, dev_results, sizeof (float) * GRID_SIZE, cudaMemcpyDeviceToHost); 219 | cudaFree (dev_results); 220 | cudaFree (dev_A); 221 | float square_sum = 0; 222 | // 在cpu端将显卡传回的数据汇总 223 | for (int i = 0; i < GRID_SIZE; ++i) { 224 | square_sum += results[i]; 225 | } 226 | free (results); 227 | return square_sum; 228 | } 229 | 230 | __global__ void element_square (float* A, int size) { 231 | int thread_id = threadIdx.x; 232 | int block_id = blockIdx.x; 233 | int begin_idx = block_id * BLOCK_SIZE + thread_id; 234 | int read_offset = GRID_SIZE * BLOCK_SIZE; 235 | for (int i = begin_idx; i < size; i += read_offset) { 236 | A[i] = A[i] * A[i]; 237 | } 238 | } 239 | void cuda_element_square (float* A, int size) { 240 | float* dev_A; 241 | cudaMalloc ((void**) &dev_A, sizeof (float) * size); 242 | 243 | cudaMemcpy (dev_A, A, sizeof (float) * size, cudaMemcpyHostToDevice); 244 | element_square <<>> (dev_A, size); 245 | cudaMemcpy (A, dev_A, sizeof (float) * size, cudaMemcpyDeviceToHost); 246 | // 释放显存 247 | cudaFree (dev_A); 248 | } 249 | 250 | __global__ void element_mult (float* A, float* B, float* C, int size) { 251 | int thread_id = threadIdx.x; 252 | int block_id = blockIdx.x; 253 | int begin_idx = block_id * BLOCK_SIZE + thread_id; 254 | int read_offset = GRID_SIZE * BLOCK_SIZE; 255 | for (int i = begin_idx; i < size; i += read_offset) { 256 | C[i] = A[i] * B[i]; 257 | } 258 | } 259 | void cuda_element_mult (float* A, float* B, float* C, int size) { 260 | float* dev_A, *dev_B, *dev_C; 261 | cudaMalloc ((void**) &dev_A, sizeof (float) * size); 262 | cudaMalloc ((void**) &dev_B, sizeof (float) * size); 263 | cudaMalloc ((void**) &dev_C, sizeof (float) * size); 264 | 265 | cudaMemcpy (dev_A, A, sizeof (float) * size, cudaMemcpyHostToDevice); 266 | cudaMemcpy (dev_B, B, sizeof (float) * size, cudaMemcpyHostToDevice); 267 | element_mult <<>> (dev_A, dev_B, dev_C, size); 268 | cudaMemcpy (C, dev_C, sizeof (float) * size, cudaMemcpyDeviceToHost); 269 | // 释放显存 270 | cudaFree (dev_A); 271 | cudaFree (dev_B); 272 | cudaFree (dev_C); 273 | } 274 | -------------------------------------------------------------------------------- /src/multi_thread/matrix_task.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/multi_thread/matrix_task.h" 2 | using namespace std; 3 | /*矩阵乘法子任务*/ 4 | matrix_mult_task::matrix_mult_task (Tensor* A, Tensor* B, Tensor* C, int a_idx, int b_idx) { 5 | m_A = A; 6 | m_B = B; 7 | m_C = C; 8 | m_a_idx = a_idx; 9 | m_b_idx = b_idx; 10 | } 11 | 12 | void matrix_mult_task::run () { 13 | int a_col = m_A -> m_shape[1]; 14 | float c = 0; 15 | float compensation = 0.0; 16 | int idx0 = 0, idx1 = 0, idx2 = 0; 17 | idx0 = m_a_idx * m_A -> m_shape[1]; 18 | idx1 = m_b_idx; 19 | int offset = m_B -> m_shape[1]; 20 | for (int i = 0; i < a_col; ++i) { 21 | float y = m_A -> m_tensor[idx0] * m_B -> m_tensor[idx1] - compensation;// 补偿 22 | float t = c + y;// 发生舍入 23 | compensation = (t - c) - y;// 记录下舍入误差 24 | c = t; 25 | idx0 += 1; 26 | idx1 += offset; 27 | } 28 | idx2 = m_a_idx * m_C -> m_shape[1] + m_b_idx; 29 | m_C -> m_tensor[idx2] = c; 30 | } 31 | 32 | /*矩阵加法子任务*/ 33 | matrix_add_task::matrix_add_task (Tensor* A, Tensor* B, Tensor* C, int thread_id, int thread_num) { 34 | m_A = A; 35 | m_B = B; 36 | m_C = C; 37 | m_thread_id = thread_id; 38 | m_thread_num = thread_num; 39 | } 40 | 41 | void matrix_add_task::run () { 42 | for (int idx = m_thread_id; idx < m_A -> m_size; idx += m_thread_num) { 43 | m_C -> m_tensor[idx] = m_A -> m_tensor[idx] + m_B -> m_tensor[idx]; 44 | } 45 | } 46 | 47 | /*矩阵标量乘法子任务*/ 48 | matrix_scalar_mult_task::matrix_scalar_mult_task (Tensor* A, float scalar, Tensor* C, int thread_id, int thread_num) { 49 | m_A = A; 50 | m_scalar = scalar; 51 | m_C = C; 52 | m_thread_id = thread_id; 53 | m_thread_num = thread_num; 54 | } 55 | 56 | void matrix_scalar_mult_task::run () { 57 | for (int idx = m_thread_id; idx < m_A -> m_size; idx += m_thread_num) { 58 | m_C -> m_tensor[idx] = m_A -> m_tensor[idx] * m_scalar; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/multi_thread/ring_buffer.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/multi_thread/ring_buffer.h" 2 | #include 3 | ring_buffer::ring_buffer (int size) { 4 | m_size = 1; 5 | while (m_size < size) { 6 | m_size = m_size << 1; 7 | } 8 | m_buffer = new void*[m_size]; 9 | pthread_mutex_init (&m_dequeue_lock, NULL); 10 | pthread_mutex_init (&m_enqueue_lock, NULL); 11 | m_dequeue_ptr = 0; 12 | m_enqueue_ptr = 0; 13 | } 14 | 15 | int ring_buffer::is_empty () {// 获取队列是否为空的快照 16 | if (m_dequeue_ptr == m_enqueue_ptr) { 17 | return 1; 18 | } 19 | return 0; 20 | } 21 | 22 | int ring_buffer::is_full () {// 获取队列是否为满的快照 23 | if (m_dequeue_ptr == ((m_enqueue_ptr + 1) & (m_size - 1))) { 24 | return 1; 25 | } 26 | return 0; 27 | } 28 | 29 | int ring_buffer::get_element (void** data) { 30 | // 非阻塞获取出队锁 31 | while (pthread_mutex_trylock (&m_dequeue_lock) != 0); 32 | if (is_empty ()) { 33 | pthread_mutex_unlock (&m_dequeue_lock); 34 | return 0;// 队列为空 35 | } else { 36 | (*data) = m_buffer[m_dequeue_ptr]; 37 | m_dequeue_ptr = (m_dequeue_ptr + 1) & (m_size - 1); 38 | pthread_mutex_unlock (&m_dequeue_lock); 39 | return 1;// 获取成功 40 | } 41 | } 42 | 43 | int ring_buffer::add_element (void* data) { 44 | // 非阻塞获取入队锁 45 | while (pthread_mutex_trylock (&m_enqueue_lock) != 0); 46 | if (is_full ()) { 47 | pthread_mutex_unlock (&m_enqueue_lock); 48 | return 0;// 队列已满 49 | } else { 50 | m_buffer[m_enqueue_ptr] = data; 51 | m_enqueue_ptr = (m_enqueue_ptr + 1) & (m_size - 1); 52 | pthread_mutex_unlock (&m_enqueue_lock); 53 | return 1;// 添加成功 54 | } 55 | } 56 | 57 | ring_buffer::~ring_buffer () { 58 | delete[] m_buffer; 59 | } 60 | -------------------------------------------------------------------------------- /src/multi_thread/thread_pool.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/multi_thread/thread_pool.h" 2 | #include 3 | #include 4 | using namespace std; 5 | 6 | atomic_int m_finish_counter; 7 | /*thread func*/ 8 | void *get_task (void *arg) { 9 | ring_buffer* rb = (ring_buffer*) arg; 10 | while (1) { 11 | task* t_task = 0; 12 | int flag = rb -> get_element ((void**) (&t_task)); 13 | if (flag != 0) {// 获取成功 14 | t_task -> run ();// 运行task 15 | ++m_finish_counter; 16 | } 17 | } 18 | } 19 | 20 | thread_pool::thread_pool () { 21 | m_worker_num = 4; 22 | m_task_buffers = new ring_buffer*[m_worker_num]; 23 | for (int i = 0; i < m_worker_num; ++i) { 24 | m_task_buffers[i] = new ring_buffer (1000); 25 | } 26 | m_workers = new pthread_t[m_worker_num]; 27 | for (int i = 0; i < m_worker_num; ++i) { 28 | pthread_create (&m_workers[i], NULL, get_task, m_task_buffers[i]); 29 | } 30 | } 31 | 32 | thread_pool* thread_pool::instance = new thread_pool (); 33 | thread_pool* thread_pool::get_instance () { 34 | return instance; 35 | } 36 | 37 | void thread_pool::add_job (task* t_task) { 38 | int idx = m_idx % m_worker_num; 39 | ring_buffer* m_task_buffer = m_task_buffers[idx]; 40 | while (m_task_buffer -> add_element ((void*) t_task) == 0); 41 | m_idx = m_idx + 1; 42 | } 43 | 44 | void thread_pool::add_job_list (vector job_list) { 45 | m_finish_counter = 0; 46 | for (int i = 0; i < job_list.size (); ++i) { 47 | add_job (job_list[i]); 48 | } 49 | while (m_finish_counter < job_list.size ()); 50 | } 51 | 52 | thread_pool::~thread_pool () { 53 | for (int i = 0; i < m_worker_num; ++i) { 54 | delete m_task_buffers[i]; 55 | } 56 | delete[] m_task_buffers; 57 | } 58 | 59 | void task::run () { 60 | } 61 | -------------------------------------------------------------------------------- /src/op_node/AbsSum.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/op_node/AbsSum.h" 2 | #include 3 | using namespace std; 4 | AbsSum::AbsSum (string type, string id, string idx): OperatorNode (type, id, idx) { 5 | } 6 | void AbsSum::op () { 7 | Tensor* parent_output = ((OperatorNode*) m_parents[0]) -> m_output; 8 | vector shape (2); shape[0] = 1; shape[1] = 1; 9 | m_output = new Tensor (shape); 10 | m_output -> m_tensor[0] = parent_output -> element_abs_sum (); 11 | } 12 | void AbsSum::grad_op () { 13 | Tensor* parent_output = ((OperatorNode*) m_parents[0]) -> m_output; 14 | vector shape (2); shape[0] = 1; shape[1] = parent_output -> m_size; 15 | 16 | Tensor grad = Tensor (shape); 17 | vector idxs (2); 18 | for (int i = 0; i < parent_output -> m_size; ++i) { 19 | idxs[0] = 0; idxs[1] = i; 20 | if (fabs (parent_output -> m_tensor[i] - 0.0) < 0.1) { 21 | grad.set_value (idxs, 0); 22 | } else if (parent_output -> m_tensor[i] > 0.0) { 23 | grad.set_value (idxs, 1); 24 | } else { 25 | grad.set_value (idxs, -1); 26 | } 27 | } 28 | chain_rule (&grad, 0); 29 | } 30 | void AbsSum::release_tensor () { 31 | if (m_sum_grad != 0) { 32 | delete m_sum_grad; 33 | m_sum_grad = 0; 34 | } 35 | delete m_output; 36 | m_output = 0; 37 | } 38 | AbsSum::~AbsSum () { 39 | release_tensor (); 40 | } 41 | -------------------------------------------------------------------------------- /src/op_node/Add.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/op_node/Add.h" 2 | #include 3 | using namespace std; 4 | Add::Add (string type, string id, string idx): OperatorNode (type, id, idx) { 5 | } 6 | void Add::op () { 7 | Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output; 8 | Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output; 9 | m_output = parent0_output -> add (parent1_output); 10 | if (m_output == 0) { 11 | cout << "shape is not match:" << get_name () << endl; 12 | } 13 | } 14 | void Add::grad_op () { 15 | Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output; 16 | Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output; 17 | vector shape0 (2); 18 | shape0[0] = m_output -> m_size; 19 | shape0[1] = parent0_output -> m_size; 20 | vector shape1 (2); 21 | shape1[0] = m_output -> m_size; 22 | shape1[1] = parent1_output -> m_size; 23 | 24 | Tensor grad0 = Tensor (shape0); 25 | Tensor grad1 = Tensor (shape1); 26 | 27 | vector idxs (2); 28 | // grad0 29 | for (int i = 0; i < m_output -> m_size; ++i) { 30 | for (int j = 0; j < parent0_output -> m_size; ++j) { 31 | if (i == j) { 32 | idxs[0] = i; idxs[1] = j; 33 | grad0.set_value (idxs, 1); 34 | } 35 | } 36 | } 37 | // grad1 38 | for (int i = 0; i < m_output -> m_size; ++i) { 39 | for (int j = 0; j < parent1_output -> m_size; ++j) { 40 | if (i == j) { 41 | idxs[0] = i; idxs[1] = j; 42 | grad1.set_value (idxs, 1); 43 | } 44 | } 45 | } 46 | // chain rule 47 | chain_rule (&grad0, 0); 48 | chain_rule (&grad1, 1); 49 | } 50 | void Add::release_tensor () { 51 | if (m_sum_grad != 0) { 52 | delete m_sum_grad; 53 | m_sum_grad = 0; 54 | } 55 | delete m_output; 56 | m_output = 0; 57 | } 58 | Add::~Add () { 59 | release_tensor (); 60 | } 61 | -------------------------------------------------------------------------------- /src/op_node/Bias.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/op_node/Bias.h" 2 | #include 3 | using namespace std; 4 | Bias::Bias (string type, string id, string idx): OperatorNode (type, id, idx) { 5 | } 6 | void Bias::op () { 7 | Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output; 8 | Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output;// 偏置向量 9 | m_output = 0; 10 | if (parent1_output -> m_shape[0] == 1 && parent0_output -> m_shape[1] == parent1_output -> m_shape[1]) { 11 | m_output = new Tensor (parent0_output -> m_shape, 0); 12 | for (int i = 0; i < parent0_output -> m_size; ++i) { 13 | int j = i % parent1_output -> m_size; 14 | m_output -> m_tensor[i] = parent0_output -> m_tensor[i] + parent1_output -> m_tensor[j]; 15 | } 16 | } 17 | if (m_output == 0) { 18 | cout << "shape is not match:" << get_name () << endl; 19 | } 20 | } 21 | void Bias::grad_op () { 22 | Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output; 23 | Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output; 24 | vector shape0 (2); 25 | shape0[0] = m_output -> m_size; 26 | shape0[1] = parent0_output -> m_size; 27 | vector shape1 (2); 28 | shape1[0] = m_output -> m_size; 29 | shape1[1] = parent1_output -> m_size; 30 | 31 | Tensor grad0 = Tensor (shape0); 32 | Tensor grad1 = Tensor (shape1); 33 | 34 | vector idxs (2); 35 | // grad0 36 | for (int i = 0; i < m_output -> m_size; ++i) { 37 | for (int j = 0; j < parent0_output -> m_size; ++j) { 38 | if (i == j) { 39 | idxs[0] = i; idxs[1] = j; 40 | grad0.set_value (idxs, 1); 41 | } 42 | } 43 | } 44 | // grad1 45 | for (int i = 0; i < m_output -> m_size; ++i) { 46 | for (int j = 0; j < parent1_output -> m_size; ++j) { 47 | if (i % parent1_output -> m_size == j) { 48 | idxs[0] = i; idxs[1] = j; 49 | grad1.set_value (idxs, 1); 50 | } 51 | } 52 | } 53 | 54 | // chain rule 55 | chain_rule (&grad0, 0); 56 | chain_rule (&grad1, 1); 57 | } 58 | void Bias::release_tensor () { 59 | if (m_sum_grad != 0) { 60 | delete m_sum_grad; 61 | m_sum_grad = 0; 62 | } 63 | delete m_output; 64 | m_output = 0; 65 | } 66 | Bias::~Bias () { 67 | release_tensor (); 68 | } 69 | -------------------------------------------------------------------------------- /src/op_node/Dropout.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/op_node/Dropout.h" 2 | #include 3 | #include 4 | #include 5 | using namespace std; 6 | Dropout::Dropout (string type, string id, string idx, Tensor* filter, float keep_rate): OperatorNode (type, id, idx) { 7 | m_filter = new Tensor (filter -> m_shape, 0); 8 | m_keep_rate = keep_rate; 9 | } 10 | void Dropout::op () { 11 | float n = 1.0 / m_keep_rate; 12 | srand (time (0)); 13 | for (int i = 0; i < m_filter -> m_size; ++i) { 14 | float r = (rand () % 1000) / 1000.0; 15 | if (r > m_keep_rate) { 16 | m_filter -> m_tensor[i] = 0.0; 17 | } else { 18 | m_filter -> m_tensor[i] = n; 19 | } 20 | } 21 | 22 | Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output; 23 | m_output = parent0_output -> element_mult (m_filter); 24 | if (m_output == 0) { 25 | cout << "filter size error" << endl; 26 | } 27 | } 28 | void Dropout::grad_op () { 29 | Tensor* parent_output = ((OperatorNode*) m_parents[0]) -> m_output; 30 | vector shape (2); shape[0] = m_output -> m_size; shape[1] = parent_output -> m_size; 31 | 32 | Tensor grad = Tensor (shape); 33 | vector idxs (2); 34 | for (int i = 0; i < m_output -> m_size; ++i) { 35 | for (int j = 0; j < parent_output -> m_size; ++j) { 36 | if (i == j) { 37 | idxs[0] = i; idxs[1] = j; 38 | grad.set_value (idxs, m_filter -> m_tensor[i]); 39 | } 40 | } 41 | } 42 | // chain rule 43 | chain_rule (&grad, 0); 44 | } 45 | void Dropout::release_tensor () { 46 | if (m_sum_grad != 0) { 47 | delete m_sum_grad; 48 | m_sum_grad = 0; 49 | } 50 | delete m_output; 51 | m_output = 0; 52 | } 53 | Dropout::~Dropout () { 54 | release_tensor (); 55 | delete m_filter; 56 | m_filter = 0; 57 | } 58 | -------------------------------------------------------------------------------- /src/op_node/Input.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/op_node/Input.h" 2 | using namespace std; 3 | void input_default_op (Input* input) { 4 | input -> m_output = input -> m_data[input -> m_data_ptr]; 5 | input -> m_data_ptr = (input -> m_data_ptr + 1) % input -> m_data.size (); 6 | } 7 | Input::Input (string type, string id, string idx, vector input_data, void (*func) (Input*)): OperatorNode (type, id, idx) { 8 | m_data_ptr = 0; 9 | m_data = input_data; 10 | if (func == 0) { 11 | op = &input_default_op; 12 | } else { 13 | op = func; 14 | } 15 | } 16 | 17 | void Input::release_tensor () { 18 | if (m_sum_grad != 0) { 19 | delete m_sum_grad; 20 | m_sum_grad = 0; 21 | } 22 | } 23 | Input::~Input () { 24 | release_tensor (); 25 | } 26 | -------------------------------------------------------------------------------- /src/op_node/Minus.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/op_node/Minus.h" 2 | using namespace std; 3 | Minus::Minus (string type, string id, string idx): OperatorNode (type, id, idx) { 4 | } 5 | void Minus::op () { 6 | Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output; 7 | Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output; 8 | m_output = parent1_output -> scalar_mult (-1.0); 9 | parent0_output -> add (m_output, m_output); 10 | } 11 | void Minus::grad_op () { 12 | Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output; 13 | Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output; 14 | vector shape0 (2); 15 | shape0[0] = m_output -> m_size; 16 | shape0[1] = parent0_output -> m_size; 17 | vector shape1 (2); 18 | shape1[0] = m_output -> m_size; 19 | shape1[1] = parent1_output -> m_size; 20 | 21 | Tensor grad0 = Tensor (shape0); 22 | Tensor grad1 = Tensor (shape1); 23 | 24 | vector idxs (2); 25 | // grad0 26 | for (int i = 0; i < m_output -> m_size; ++i) { 27 | for (int j = 0; j < parent0_output -> m_size; ++j) { 28 | if (i == j) { 29 | idxs[0] = i; idxs[1] = j; 30 | grad0.set_value (idxs, 1); 31 | } 32 | } 33 | } 34 | // grad1 35 | for (int i = 0; i < m_output -> m_size; ++i) { 36 | for (int j = 0; j < parent1_output -> m_size; ++j) { 37 | if (i == j) { 38 | idxs[0] = i; idxs[1] = j; 39 | grad1.set_value (idxs, -1); 40 | } 41 | } 42 | } 43 | 44 | // chain rule 45 | chain_rule (&grad0, 0); 46 | chain_rule (&grad1, 1); 47 | } 48 | void Minus::release_tensor () { 49 | if (m_sum_grad != 0) { 50 | delete m_sum_grad; 51 | m_sum_grad = 0; 52 | } 53 | delete m_output; 54 | m_output = 0; 55 | } 56 | Minus::~Minus () { 57 | release_tensor (); 58 | } 59 | -------------------------------------------------------------------------------- /src/op_node/Mult.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/op_node/Mult.h" 2 | #include 3 | using namespace std; 4 | Mult::Mult (string type, string id, string idx): OperatorNode (type, id, idx) { 5 | } 6 | void Mult::op () { 7 | Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output; 8 | Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output; 9 | m_output = parent0_output -> matrix_mult (parent1_output); 10 | if (m_output == 0) { 11 | cout << "shape is not match:" << get_name () << endl; 12 | } 13 | } 14 | void Mult::grad_op () { 15 | Tensor* parent0_output = ((OperatorNode*) m_parents[0]) -> m_output; 16 | Tensor* parent1_output = ((OperatorNode*) m_parents[1]) -> m_output; 17 | vector shape0 (2); 18 | shape0[0] = m_output -> m_size; 19 | shape0[1] = parent0_output -> m_size; 20 | vector shape1 (2); 21 | shape1[0] = m_output -> m_size; 22 | shape1[1] = parent1_output -> m_size; 23 | 24 | Tensor grad0 = Tensor (shape0); 25 | Tensor grad1 = Tensor (shape1); 26 | 27 | vector idxs0 (2); 28 | vector idxs1 (2); 29 | // grad0 30 | for (int i = 0; i < m_output -> m_shape[0]; ++i) { 31 | for (int j = 0; j < m_output -> m_shape[1]; ++j) { 32 | for (int l = 0; l < parent0_output -> m_shape[0]; ++l) { 33 | for (int m = 0; m < parent0_output -> m_shape[1]; ++m) { 34 | idxs0[0] = i * m_output -> m_shape[1] + j; 35 | idxs0[1] = l * parent0_output -> m_shape[1] + m; 36 | if (i == l) { 37 | idxs1[0] = m; 38 | idxs1[1] = j; 39 | grad0.set_value (idxs0, parent1_output -> get_value (idxs1)); 40 | } else { 41 | grad0.set_value (idxs0, 0); 42 | } 43 | } 44 | } 45 | } 46 | } 47 | // grad1 48 | for (int i = 0; i < m_output -> m_shape[0]; ++i) { 49 | for (int j = 0; j < m_output -> m_shape[1]; ++j) { 50 | for (int l = 0; l < parent1_output -> m_shape[0]; ++l) { 51 | for (int m = 0; m < parent1_output -> m_shape[1]; ++m) { 52 | idxs0[0] = i * m_output -> m_shape[1] + j; 53 | idxs0[1] = l * parent1_output -> m_shape[1] + m; 54 | if (j == m) { 55 | idxs1[0] = i; 56 | idxs1[1] = l; 57 | grad1.set_value (idxs0, parent0_output -> get_value (idxs1)); 58 | } else { 59 | grad1.set_value (idxs0, 0); 60 | } 61 | } 62 | } 63 | } 64 | } 65 | // chain rule 66 | chain_rule (&grad0, 0); 67 | chain_rule (&grad1, 1); 68 | } 69 | void Mult::release_tensor () { 70 | if (m_sum_grad != 0) { 71 | delete m_sum_grad; 72 | m_sum_grad = 0; 73 | } 74 | delete m_output; 75 | m_output = 0; 76 | } 77 | Mult::~Mult () { 78 | release_tensor (); 79 | } 80 | -------------------------------------------------------------------------------- /src/op_node/Parameter.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/op_node/Parameter.h" 2 | using namespace std; 3 | Parameter::Parameter (string type, string id, string idx, Tensor* data, int share_data): OperatorNode (type, id, idx) { 4 | m_share_data = share_data; 5 | if (share_data == 0) { 6 | m_output = new Tensor (data -> m_shape, data -> m_tensor); 7 | } else { 8 | m_output = data; 9 | } 10 | } 11 | void Parameter::release_tensor () { 12 | if (m_sum_grad != 0) { 13 | delete m_sum_grad; 14 | m_sum_grad = 0; 15 | } 16 | } 17 | Parameter::~Parameter () { 18 | release_tensor (); 19 | if (m_share_data == 0) { 20 | delete m_output; 21 | } 22 | m_output = 0; 23 | } 24 | -------------------------------------------------------------------------------- /src/op_node/Sigmoid.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/op_node/Sigmoid.h" 2 | #include 3 | using namespace std; 4 | Sigmoid::Sigmoid (string type, string id, string idx): OperatorNode (type, id, idx) { 5 | } 6 | void Sigmoid::op () { 7 | Tensor* parent_output = ((OperatorNode*) m_parents[0]) -> m_output; 8 | m_output = new Tensor (parent_output -> m_shape); 9 | for (int i = 0; i < parent_output -> m_size; ++i) { 10 | m_output -> m_tensor[i] = 1.0 / (1 + pow (2.718, 0 - parent_output -> m_tensor[i])); 11 | } 12 | } 13 | void Sigmoid::grad_op () { 14 | Tensor* parent_output = ((OperatorNode*) m_parents[0]) -> m_output; 15 | vector shape (2); shape[0] = m_output -> m_size; shape[1] = parent_output -> m_size; 16 | Tensor grad = Tensor (shape); 17 | 18 | vector idxs (2); 19 | // grad 20 | for (int i = 0; i < m_output -> m_size; ++i) { 21 | for (int j = 0; j < parent_output -> m_size; ++j) { 22 | idxs[0] = i; 23 | idxs[1] = j; 24 | if (i == j) { 25 | grad.set_value (idxs, m_output -> m_tensor[i] * (1 - m_output -> m_tensor[i])); 26 | } 27 | } 28 | } 29 | chain_rule (&grad, 0); 30 | } 31 | void Sigmoid::release_tensor () { 32 | if (m_sum_grad != 0) { 33 | delete m_sum_grad; 34 | m_sum_grad = 0; 35 | } 36 | delete m_output; 37 | m_output = 0; 38 | } 39 | Sigmoid::~Sigmoid () { 40 | release_tensor (); 41 | } 42 | -------------------------------------------------------------------------------- /src/op_node/SquareSum.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/op_node/SquareSum.h" 2 | using namespace std; 3 | SquareSum::SquareSum (string type, string id, string idx): OperatorNode (type, id, idx) { 4 | } 5 | void SquareSum::op () { 6 | Tensor* parent_output = ((OperatorNode*) m_parents[0]) -> m_output; 7 | vector shape (2); shape[0] = 1; shape[1] = 1; 8 | m_output = new Tensor (shape); 9 | m_output -> m_tensor[0] = parent_output -> element_square_sum (); 10 | } 11 | void SquareSum::grad_op () { 12 | Tensor* parent_output = ((OperatorNode*) m_parents[0]) -> m_output; 13 | vector shape (2); shape[0] = 1; shape[1] = parent_output -> m_size; 14 | 15 | Tensor grad = Tensor (shape); 16 | vector idxs (2); 17 | for (int i = 0; i < parent_output -> m_size; ++i) { 18 | idxs[0] = 0; idxs[1] = i; 19 | grad.set_value (idxs, parent_output -> m_tensor[i] * 2); 20 | } 21 | chain_rule (&grad, 0); 22 | } 23 | void SquareSum::release_tensor () { 24 | if (m_sum_grad != 0) { 25 | delete m_sum_grad; 26 | m_sum_grad = 0; 27 | } 28 | delete m_output; 29 | m_output = 0; 30 | } 31 | SquareSum::~SquareSum () { 32 | release_tensor (); 33 | } 34 | -------------------------------------------------------------------------------- /src/optimizer/Adadelta.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/optimizer/Adadelta.h" 2 | #include "../../include/OperatorNode.h" 3 | #include 4 | #include 5 | using namespace std; 6 | Adadelta::Adadelta (float a): Optimizer (a) { 7 | m_epsl = 0.5; 8 | m_lambda = 0.3; 9 | } 10 | void Adadelta::optimize (vector &topo_results) { 11 | for (int i = 0; i < topo_results.size (); ++i) {// 计算梯度 12 | ((OperatorNode*) topo_results[i]) -> grad_op (); 13 | } 14 | for (int i = 0; i < topo_results.size (); ++i) {// 更新 15 | OperatorNode* op_node = (OperatorNode*) topo_results[i]; 16 | if (op_node -> m_sum_grad != 0 && op_node -> m_name[0] == "Parameter") { 17 | string name = op_node -> get_name (); 18 | if (m_tensor_store.find (name) == m_tensor_store.end ()) {// 不存在累计梯度 19 | for (int j = 0; j < op_node -> m_output -> m_size; ++j) { 20 | op_node -> m_output -> m_tensor[j] -= m_a * op_node -> m_sum_grad -> m_tensor[j]; 21 | } 22 | Tensor* tensor = new Tensor (op_node -> m_sum_grad -> m_shape, op_node -> m_sum_grad -> m_tensor); 23 | tensor -> element_square (); 24 | m_tensor_store[name] = tensor; 25 | } else { 26 | Tensor* acc_grad = m_tensor_store[name]; 27 | for (int j = 0; j < acc_grad -> m_size; ++j) { 28 | op_node -> m_output -> m_tensor[j] -= m_a * op_node -> m_sum_grad -> m_tensor[j] 29 | / (sqrt (acc_grad -> m_tensor[j]) + m_epsl); 30 | } 31 | // 更新累积梯度 32 | op_node -> m_sum_grad -> element_square (); 33 | op_node -> m_sum_grad -> scalar_acc_mult (m_lambda); 34 | acc_grad -> scalar_acc_mult (1 - m_lambda); 35 | acc_grad -> add (op_node -> m_sum_grad, acc_grad); 36 | } 37 | } 38 | } 39 | } 40 | Adadelta::~Adadelta () { 41 | unordered_map ::iterator tensor_store_it = m_tensor_store.begin (); 42 | while (tensor_store_it != m_tensor_store.end ()) { 43 | delete tensor_store_it -> second; 44 | ++tensor_store_it; 45 | } 46 | m_tensor_store.clear (); 47 | } 48 | -------------------------------------------------------------------------------- /unit_test/graph_test.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/Graph.h" 2 | #include "../include/Node.h" 3 | #include 4 | #include 5 | using namespace std; 6 | int main () { 7 | Node* a = new Node ("a", "0"); 8 | Node* b = new Node ("b", "0"); 9 | Node* c = new Node ("c", "0"); 10 | Node* d = new Node ("d", "0"); 11 | Node* e = new Node ("e", "0"); 12 | Node* f = new Node ("f", "0"); 13 | 14 | // 构建图 15 | Graph graph; 16 | graph.add_node ("", a); 17 | graph.add_node ("", b); 18 | graph.add_node (a -> get_name (), c); 19 | graph.add_node (b -> get_name (), c); 20 | graph.add_node ("", d); 21 | graph.add_node (c -> get_name (), e); 22 | graph.add_node (d -> get_name (), e); 23 | graph.add_node (c -> get_name (), f); 24 | // 构建转置图 25 | graph.build_reverse_graph (); 26 | // 构建子图, 基于图改写 27 | vector endnode_list; 28 | endnode_list.push_back (e); 29 | graph.build_subgraph (endnode_list); 30 | // 拓扑排序 31 | vector toposort_result; 32 | graph.topological_sort (graph.m_adj_table, toposort_result);// 原图拓扑排序 33 | for (int i = 0; i < toposort_result.size (); ++i) { 34 | cout << toposort_result[i] -> get_name () << " "; 35 | } 36 | cout << endl; 37 | vector reverse_toposort_result; 38 | graph.topological_sort (graph.m_reverse_table, reverse_toposort_result);// 转置图拓扑排序 39 | for (int i = 0; i < reverse_toposort_result.size (); ++i) { 40 | cout << reverse_toposort_result[i] -> get_name () << " "; 41 | } 42 | cout << endl; 43 | 44 | } 45 | -------------------------------------------------------------------------------- /unit_test/operatorNode_test.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/op_node/Add.h" 2 | #include "../include/op_node/Bias.h" 3 | #include "../include/op_node/SquareSum.h" 4 | #include "../include/op_node/Minus.h" 5 | #include "../include/op_node/AbsSum.h" 6 | #include "../include/op_node/Mult.h" 7 | #include "../include/op_node/Sigmoid.h" 8 | #include "../include/op_node/Parameter.h" 9 | #include "../include/op_node/Dropout.h" 10 | #include "../include/ComputeGraph.h" 11 | #include "../include/Tensor.h" 12 | #include "../include/OperatorNode.h" 13 | #include "../include/Optimizer.h" 14 | #include 15 | #include 16 | #include 17 | using namespace std; 18 | 19 | int main () { 20 | vector shape1; shape1.push_back (2); shape1.push_back (2); 21 | vector shape2; shape2.push_back (2); shape2.push_back (2); 22 | Tensor* t1 = new Tensor (shape1); 23 | Tensor* t2 = new Tensor (shape2); 24 | t1 -> init (); 25 | t2 -> init (); 26 | t1 -> display (); cout << endl; 27 | t2 -> display (); 28 | 29 | // 生成计算节点 30 | OperatorNode* p1 = new Parameter ("Parameter", "1", "0", t1); 31 | OperatorNode* p2 = new Parameter ("Parameter", "2", "0", t2); 32 | OperatorNode* add = new Add ("Add", "1", "0"); 33 | OperatorNode* minus = new Minus ("Minus", "1", "0"); 34 | OperatorNode* mult = new Mult ("Mult", "1", "0"); 35 | OperatorNode* sigmoid = new Sigmoid ("Sigmoid", "1", "0"); 36 | OperatorNode* square_sum = new SquareSum ("SquareSum", "1", "0"); 37 | OperatorNode* abs_sum = new AbsSum ("AbsSum", "1", "0"); 38 | OperatorNode* b = new Bias ("Bias", "1", "0"); 39 | OperatorNode* dropout = new Dropout ("Dropout", "1", "0", t2, 0.5); 40 | 41 | // 构建计算图 42 | ComputeGraph cg; 43 | cg.add_node ("", p1); 44 | cg.add_node ("", p2); 45 | cg.add_node (p1 -> get_name (), add); 46 | cg.add_node (p2 -> get_name (), add);// 测试加法 47 | //cg.add_node (p1 -> get_name (), minus); 48 | //cg.add_node (p2 -> get_name (), minus);// 测试减法 49 | //cg.add_node (p1 -> get_name (), mult); 50 | //cg.add_node (p2 -> get_name (), mult);// 测试乘法 51 | //cg.add_node (p1 -> get_name (), sigmoid);// 测试sigmoid 52 | //cg.add_node (p1 -> get_name (), square_sum);// 测试SquareSum 53 | //cg.add_node (p1 -> get_name (), abs_sum);// 测试AbsSum 54 | //cg.add_node (p1 -> get_name (), b); 55 | //cg.add_node (p2 -> get_name (), b);// 测试偏置 56 | //cg.add_node (p1 -> get_name (), dropout);// 测试dropout 57 | // 构建转置图 58 | cg.build_reverse_graph (); 59 | // 初始化优化器,普通sgd 60 | Optimizer* optimizer = new Optimizer (0.1); 61 | cg.m_optimizer = optimizer; 62 | 63 | struct timeval start,end; 64 | gettimeofday(&start, 0); 65 | for (int i = 0; i < 1; ++i) { 66 | vector result; 67 | cg.forward_propagation (result); 68 | cout << "fp result:................." << endl; 69 | ((OperatorNode*) result[0]) -> m_output -> display (); cout << endl;// 前向结果 70 | //((OperatorNode*) result[1]) -> m_output -> display (); 71 | cg.back_propagation (); 72 | cout << "bp result:................." << endl; 73 | p1 -> m_sum_grad -> display (); cout << endl; 74 | p2 -> m_sum_grad -> display ();// 反向结果 75 | cout << "new parameter:............." << endl; 76 | p1 -> m_output -> display (); cout << endl; 77 | p2 -> m_output -> display (); 78 | cg.release_tensor (); 79 | } 80 | gettimeofday(&end, 0); 81 | long timeuse =1000000 * ( end.tv_sec - start.tv_sec ) + end.tv_usec - start.tv_usec; 82 | printf("time=%f\n",timeuse /1000000.0); 83 | } 84 | -------------------------------------------------------------------------------- /unit_test/rnn_test.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/Tensor.h" 2 | #include "../include/op_node/Input.h" 3 | #include "../include/VirtualNode.h" 4 | #include "../include/OperatorNode.h" 5 | #include "../include/VirtualGraph.h" 6 | #include "../include/ComputeGraph.h" 7 | #include "../include/Optimizer.h" 8 | #include "../include/optimizer/Adadelta.h" 9 | #include "../include/BranchNode.h" 10 | #include "../include/LoopNode.h" 11 | #include 12 | #include 13 | #include 14 | #include 15 | using namespace std; 16 | Tensor* int_to_tensor (int a) {// 把int转化为8位01串,左边是低位,右边是高位 17 | float* data = new float[8]; 18 | vector shape (2); shape[0] = 1; shape[1] = 8; 19 | int mask = 1; 20 | for (int i = 0; i < 8; ++i) { 21 | if ((mask & a) == 0) { 22 | data[i] = 0; 23 | } else { 24 | data[i] = 1; 25 | } 26 | mask = mask << 1; 27 | } 28 | return new Tensor (shape, data); 29 | } 30 | int tensor_to_int (Tensor* tensor) { 31 | int result = 0; 32 | for (int i = 7; i >= 0; --i) { 33 | int a = tensor -> m_tensor[i] < 0.5 ? 0 : 1; 34 | result = result * 2 + a; 35 | } 36 | return result; 37 | } 38 | void prepare_data (int num, vector &add_nums, vector &sums) { 39 | for (int i = 0; i < num; ++i) { 40 | int a = rand () % 128; 41 | int b = rand () % 128; 42 | int c = a + b; 43 | Tensor* t_a = int_to_tensor (a); 44 | Tensor* t_b = int_to_tensor (b); 45 | Tensor* t_c = int_to_tensor (c); 46 | add_nums.push_back (t_a); 47 | add_nums.push_back (t_b); 48 | sums.push_back (t_c); 49 | } 50 | } 51 | // rnn中的分支选择函数 52 | Node* choose_node (int idx, Graph* compute_graph, BranchNode* branch_node) { 53 | ostringstream oss; 54 | if (idx == 0) { 55 | int batch_size = 1; 56 | int hidden_size = 8; 57 | vector shape (2); 58 | shape[0] = batch_size; shape[1] = hidden_size; 59 | Tensor* init_tensor = new Tensor (shape); 60 | vector data; data.push_back (init_tensor); 61 | Node* init_input = new Input ("Input", "init", "0", data); 62 | compute_graph -> add_node ("", init_input); 63 | ((Input*) init_input) -> op ((Input*) init_input); 64 | return init_input; 65 | } else { 66 | oss << (idx - 1) << ":"; 67 | string name = "Mult:h:" + oss.str (); 68 | return compute_graph -> get_node (name); 69 | } 70 | } 71 | // rnn中的循环init,condition函数 72 | void init (LoopNode* loop_node) { 73 | } 74 | int condition (Graph* compute_graph, int idx) { 75 | if (idx < 8) { 76 | return 0; 77 | } else { 78 | return 1; 79 | } 80 | } 81 | // rnn的输入函数,简单的数据输入预处理 82 | void rnn_input_x (Input* input) { 83 | int batch_size = 1; 84 | vector shape (2); shape[0] = batch_size; shape[1] = 2; 85 | float* data = new float[2 * batch_size]; 86 | for (int i = 0; i < batch_size; ++i) { 87 | Tensor* a = input -> m_data[input -> m_data_ptr]; 88 | Tensor* b = input -> m_data[input -> m_data_ptr + 1]; 89 | int idx = atoi (input -> m_name[2].c_str ()); 90 | data[i * 2] = a -> m_tensor[idx]; 91 | data[i * 2 + 1] = b -> m_tensor[idx]; 92 | input -> m_data_ptr = (input -> m_data_ptr + 2) % input -> m_data.size (); 93 | } 94 | input -> m_output = new Tensor (shape, data); 95 | } 96 | void rnn_input_y (Input* input) { 97 | int batch_size = 1; 98 | float* data = new float[1 * batch_size]; 99 | vector shape (2); shape[0] = batch_size; shape[1] = 1; 100 | for (int i = 0; i < batch_size; ++i) { 101 | Tensor* a = input -> m_data[input -> m_data_ptr]; 102 | int idx = atoi (input -> m_name[2].c_str ()); 103 | data[i] = a -> m_tensor[idx]; 104 | input -> m_data_ptr = (input -> m_data_ptr + 1) % input -> m_data.size (); 105 | } 106 | input -> m_output = new Tensor (shape, data); 107 | } 108 | 109 | int main () { 110 | // 准备数据集 111 | vector add_nums; 112 | vector sums; 113 | prepare_data (10000, add_nums, sums); 114 | 115 | int hidden_size = 8; 116 | 117 | vector shape_w1 (2); shape_w1[0] = 2; shape_w1[1] = hidden_size; 118 | Tensor* t_w1 = new Tensor (shape_w1); 119 | t_w1 -> init (); 120 | 121 | vector shape_w2 (2); shape_w2[0] = hidden_size; shape_w2[1] = 1; 122 | Tensor* t_w2 = new Tensor (shape_w2); 123 | t_w2 -> init (); 124 | 125 | vector shape_wh (2); shape_wh[0] = hidden_size; shape_wh[1] = hidden_size; 126 | Tensor* t_wh = new Tensor (shape_wh); 127 | t_wh -> init (); 128 | 129 | vector shape_b1 (2); shape_b1[0] = 1; shape_b1[1] = hidden_size; 130 | Tensor* t_b1 = new Tensor (shape_b1); 131 | t_b1 -> init (); 132 | 133 | vector shape_b2 (2); shape_b2[0] = 1; shape_b2[1] = 1; 134 | Tensor* t_b2 = new Tensor (shape_b2); 135 | t_b2 -> init (); 136 | 137 | // 准备虚拟节点 138 | VirtualNode* input_x = new VirtualNode ("Input", "1"); 139 | input_x -> m_input_data = add_nums; 140 | input_x -> input_op = &rnn_input_x; 141 | 142 | VirtualNode* wh = new VirtualNode ("Parameter", "wh", 1); 143 | wh -> m_data = t_wh; 144 | 145 | VirtualNode* w1 = new VirtualNode ("Parameter", "w1", 1); 146 | w1 -> m_data = t_w1; 147 | 148 | VirtualNode* multh = new VirtualNode ("Mult", "h"); 149 | VirtualNode* mult1 = new VirtualNode ("Mult", "1"); 150 | BranchNode* branch = new BranchNode ("Branch", "1", &choose_node); 151 | VirtualNode* add1 = new VirtualNode ("Add", "1"); 152 | 153 | VirtualNode* b1 = new VirtualNode ("Parameter", "b1", 1); 154 | b1 -> m_data = t_b1; 155 | 156 | VirtualNode* bias1 = new VirtualNode ("Bias", "1"); 157 | VirtualNode* sigmoid1 = new VirtualNode ("Sigmoid", "1"); 158 | 159 | VirtualNode* w2 = new VirtualNode ("Parameter", "w2", 1); 160 | w2 -> m_data = t_w2; 161 | 162 | VirtualNode* mult2 = new VirtualNode ("Mult", "2"); 163 | 164 | VirtualNode* b2 = new VirtualNode ("Parameter", "b2", 1); 165 | b2 -> m_data = t_b2; 166 | 167 | VirtualNode* bias2 = new VirtualNode ("Bias", "2"); 168 | VirtualNode* sigmoid2 = new VirtualNode ("Sigmoid", "2"); 169 | 170 | VirtualNode* input_y = new VirtualNode ("Input", "2"); 171 | input_y -> m_input_data = sums; 172 | input_y -> input_op = &rnn_input_y; 173 | 174 | VirtualNode* minus = new VirtualNode ("Minus", "1"); 175 | VirtualNode* abs = new VirtualNode ("AbsSum", "1"); 176 | 177 | LoopNode* loop = new LoopNode ("Loop", "1", &init, &condition); 178 | 179 | // 构建虚拟图 180 | loop -> m_sub_vgraph -> add_node ("", input_x); 181 | loop -> m_sub_vgraph -> add_node ("", w1); 182 | loop -> m_sub_vgraph -> add_node (input_x -> get_name (), mult1); 183 | loop -> m_sub_vgraph -> add_node (w1 -> get_name (), mult1); 184 | loop -> m_sub_vgraph -> add_node ("", branch); 185 | loop -> m_sub_vgraph -> add_node (branch -> get_name (), add1); 186 | loop -> m_sub_vgraph -> add_node (mult1 -> get_name (), add1); 187 | loop -> m_sub_vgraph -> add_node ("", b1); 188 | loop -> m_sub_vgraph -> add_node (add1 -> get_name (), bias1); 189 | loop -> m_sub_vgraph -> add_node (b1 -> get_name (), bias1); 190 | loop -> m_sub_vgraph -> add_node (bias1 -> get_name (), sigmoid1); 191 | 192 | loop -> m_sub_vgraph -> add_node ("", wh);// 为下一次循环做准备 193 | loop -> m_sub_vgraph -> add_node (sigmoid1 -> get_name (), multh); 194 | loop -> m_sub_vgraph -> add_node (wh -> get_name (), multh); 195 | 196 | loop -> m_sub_vgraph -> add_node ("", w2); 197 | loop -> m_sub_vgraph -> add_node (sigmoid1 -> get_name (), mult2); 198 | loop -> m_sub_vgraph -> add_node (w2 -> get_name (), mult2); 199 | loop -> m_sub_vgraph -> add_node ("", b2); 200 | loop -> m_sub_vgraph -> add_node (mult2 -> get_name (), bias2); 201 | loop -> m_sub_vgraph -> add_node (b2 -> get_name (), bias2); 202 | loop -> m_sub_vgraph -> add_node (bias2 -> get_name (), sigmoid2); 203 | loop -> m_sub_vgraph -> add_node ("", input_y); 204 | loop -> m_sub_vgraph -> add_node (sigmoid2 -> get_name (), minus); 205 | loop -> m_sub_vgraph -> add_node (input_y -> get_name (), minus); 206 | loop -> m_sub_vgraph -> add_node (minus -> get_name (), abs); 207 | 208 | VirtualGraph* vg = new VirtualGraph (); 209 | vg -> add_node ("", loop); 210 | // 构建计算图 211 | ComputeGraph* train_cg = new ComputeGraph (); 212 | vg -> build_compute_graph (train_cg); 213 | Optimizer* optimizer = new Adadelta (0.2); 214 | train_cg -> m_optimizer = optimizer; 215 | // 对计算图进行修剪 216 | vector endnode_list; 217 | unordered_map::iterator op_node_map_it = abs -> m_op_node_map.begin (); 218 | while (op_node_map_it != abs -> m_op_node_map.end ()) { 219 | endnode_list.push_back (op_node_map_it -> second); 220 | ++op_node_map_it; 221 | } 222 | train_cg -> build_subgraph (endnode_list); 223 | // 训练 224 | for (int i = 0; i < 20000; ++i) { 225 | vector error; 226 | if (i % 1000 == 0) { 227 | int ptr = ((Input*) (train_cg -> get_node ("Input:1:0:"))) -> m_data_ptr; 228 | cout << tensor_to_int (((Input*) (train_cg -> get_node ("Input:1:0:"))) -> m_data[ptr]) << "+" 229 | << tensor_to_int (((Input*) (train_cg -> get_node ("Input:1:0:"))) -> m_data[ptr + 1]); 230 | } 231 | train_cg -> forward_propagation (error); 232 | train_cg -> back_propagation (); 233 | if (i % 1000 == 0) { 234 | float r[8] = {0}; 235 | vector r_shape (2); r_shape[0] = 1; r_shape[1] = 8; 236 | for (int i = 0; i < 8; ++i) { 237 | ostringstream oss; 238 | oss << i << ":"; 239 | string op_node_name = sigmoid2 -> get_name () + oss.str (); 240 | r[i] = ((OperatorNode*) (sigmoid2 -> m_op_node_map[op_node_name])) -> m_output -> m_tensor[0]; 241 | } 242 | Tensor r_tensor = Tensor (r_shape, r); 243 | cout << " guess = :" << tensor_to_int (&r_tensor) << endl; 244 | } 245 | } 246 | 247 | delete train_cg; 248 | delete vg; 249 | } 250 | -------------------------------------------------------------------------------- /unit_test/tensor_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../include/Tensor.h" 4 | using namespace std; 5 | int main () { 6 | vector shape (2); 7 | shape[0] = 20; shape[1] = 10; 8 | 9 | Tensor* tensor1 = new Tensor (shape); 10 | tensor1 -> init (); 11 | tensor1 -> display (); cout << endl; 12 | 13 | Tensor* tensor2 = new Tensor (shape); 14 | tensor2 -> init (); 15 | tensor2 -> display (); cout << endl; 16 | 17 | Tensor* random_tensor = new Tensor (shape); 18 | random_tensor -> init (); 19 | cout << "init test" << endl; 20 | //random_tensor -> display ();// 初始化验证 21 | 22 | Tensor* mult_result = tensor1 -> matrix_mult (tensor2);// 验证乘法 23 | cout << "matrix mult test" << endl; 24 | //mult_result -> display (); 25 | 26 | Tensor* add_result = tensor1 -> add (tensor2);// 加法验证 27 | cout << "add test 1" << endl; 28 | //add_result -> display (); 29 | 30 | tensor1 -> add (tensor2, add_result);// 加法验证2 31 | cout << "add test 2" << endl; 32 | //add_result -> display (); 33 | 34 | Tensor* ele_mult_result = tensor1 -> element_mult (tensor2); 35 | cout << "element mult test" << endl; 36 | //ele_mult_result -> display (); 37 | 38 | Tensor* scalar_mult_result = tensor1 -> scalar_mult (3.0);// 标量乘法 39 | cout << "scalar mult test" << endl; 40 | scalar_mult_result -> display (); 41 | 42 | tensor1 -> scalar_acc_mult (2.0);// 标量累乘 43 | cout << "scalar acc mult test" << endl; 44 | tensor1 -> display (); 45 | /* 46 | tensor1 -> element_square (); 47 | cout << "element square test" << endl; 48 | tensor1 -> display (); 49 | 50 | vector shape1 (3); 51 | shape1[0] = 2; shape1[1] = 3; shape1[2] = 2;// {{{1,2},{3,4},{5,6}},{{7,8},{9,10},{11,12}}} 52 | float data1[] = {1,2,3,4,5,6,7,8,9,10,11,12}; 53 | Tensor* three_dim_tensor = new Tensor (shape1, data1); 54 | vector idxs (3); 55 | idxs[0] = 1; idxs[1] = 1; idxs[2] = 0; 56 | cout << "index value test" << endl; 57 | cout << three_dim_tensor -> get_value (idxs) << endl; 58 | */ 59 | } 60 | -------------------------------------------------------------------------------- /unit_test/xor_test.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/op_node/Input.h" 2 | #include "../include/op_node/SquareSum.h" 3 | #include "../include/op_node/Minus.h" 4 | #include "../include/op_node/Mult.h" 5 | #include "../include/op_node/Bias.h" 6 | #include "../include/op_node/Parameter.h" 7 | #include "../include/VirtualGraph.h" 8 | #include "../include/ComputeGraph.h" 9 | #include "../include/VirtualNode.h" 10 | #include "../include/Optimizer.h" 11 | #include "../include/optimizer/Adadelta.h" 12 | #include 13 | using namespace std; 14 | int main () { 15 | // 准备数据集 16 | vector data_x_list; 17 | vector shape_x; shape_x.push_back (1); shape_x.push_back (2); 18 | float data_x1[] = {0,0}; 19 | float data_x2[] = {0,1}; 20 | float data_x3[] = {1,0}; 21 | float data_x4[] = {1,1}; 22 | data_x_list.push_back (new Tensor (shape_x, data_x1)); 23 | data_x_list.push_back (new Tensor (shape_x, data_x2)); 24 | data_x_list.push_back (new Tensor (shape_x, data_x3)); 25 | data_x_list.push_back (new Tensor (shape_x, data_x4)); 26 | vector data_y_list; 27 | vector shape_y; shape_y.push_back (1); shape_y.push_back (1); 28 | float data_y1[] = {0}; 29 | float data_y2[] = {1}; 30 | float data_y3[] = {1}; 31 | float data_y4[] = {0}; 32 | data_y_list.push_back (new Tensor (shape_y, data_y1)); 33 | data_y_list.push_back (new Tensor (shape_y, data_y2)); 34 | data_y_list.push_back (new Tensor (shape_y, data_y3)); 35 | data_y_list.push_back (new Tensor (shape_y, data_y4)); 36 | 37 | vector shape_w1; shape_w1.push_back (2); shape_w1.push_back (2); 38 | Tensor* w1 = new Tensor (shape_w1); 39 | w1 -> init (); 40 | 41 | vector shape_w2; shape_w2.push_back (2); shape_w2.push_back (1); 42 | Tensor* w2 = new Tensor (shape_w2); 43 | w2 -> init (); 44 | 45 | vector shape_b1; shape_b1.push_back (1); shape_b1.push_back (2); 46 | Tensor* b1 = new Tensor (shape_b1); 47 | b1 -> init (); 48 | 49 | vector shape_b2; shape_b2.push_back (1); shape_b2.push_back (1); 50 | Tensor* b2 = new Tensor (shape_b2); 51 | b2 -> init (); 52 | 53 | // 准备虚拟节点 54 | VirtualNode* input_x = new VirtualNode ("Input", "1"); 55 | input_x -> m_input_data = data_x_list; 56 | 57 | VirtualNode* input_y = new VirtualNode ("Input", "2"); 58 | input_y -> m_input_data = data_y_list; 59 | 60 | VirtualNode* w_1 = new VirtualNode ("Parameter", "1"); 61 | w_1 -> m_data = w1; 62 | 63 | VirtualNode* w_2 = new VirtualNode ("Parameter", "2"); 64 | w_2 -> m_data = w2; 65 | 66 | VirtualNode* b_1 = new VirtualNode ("Parameter", "3"); 67 | b_1 -> m_data = b1; 68 | 69 | VirtualNode* b_2 = new VirtualNode ("Parameter", "4"); 70 | b_2 -> m_data = b2; 71 | 72 | VirtualNode* mult1 = new VirtualNode ("Mult", "1"); 73 | VirtualNode* mult2 = new VirtualNode ("Mult", "2"); 74 | VirtualNode* sig1 = new VirtualNode ("Sigmoid", "1"); 75 | VirtualNode* sig2 = new VirtualNode ("Sigmoid", "2"); 76 | VirtualNode* minus = new VirtualNode ("Minus", "1"); 77 | VirtualNode* ss = new VirtualNode ("SquareSum", "1"); 78 | VirtualNode* bias1 = new VirtualNode ("Bias", "1"); 79 | VirtualNode* bias2 = new VirtualNode ("Bias", "2"); 80 | 81 | // 构建虚拟图 82 | VirtualGraph* vg = new VirtualGraph (); 83 | vg -> add_node ("", input_x); 84 | vg -> add_node ("", w_1); 85 | vg -> add_node (input_x -> get_name (), mult1); 86 | vg -> add_node (w_1 -> get_name (), mult1); 87 | vg -> add_node ("", b_1); 88 | vg -> add_node (mult1 -> get_name (), bias1); 89 | vg -> add_node (b_1 -> get_name (), bias1); 90 | vg -> add_node (bias1 -> get_name (), sig1); 91 | vg -> add_node ("", w_2); 92 | vg -> add_node (sig1 -> get_name (), mult2); 93 | vg -> add_node (w_2 -> get_name (), mult2); 94 | vg -> add_node ("", b_2); 95 | vg -> add_node (mult2 -> get_name (), bias2); 96 | vg -> add_node (b_2 -> get_name (), bias2); 97 | vg -> add_node (bias2 -> get_name (), sig2); 98 | vg -> add_node ("", input_y); 99 | vg -> add_node (sig2 -> get_name (), minus); 100 | vg -> add_node (input_y -> get_name (), minus); 101 | vg -> add_node (minus -> get_name (), ss); 102 | 103 | // 生成计算图 104 | ComputeGraph* train_cg = new ComputeGraph (); 105 | vg -> build_compute_graph (train_cg); 106 | // 初始化优化器,Adadelta 107 | Optimizer* optimizer = new Adadelta (1.0); 108 | train_cg -> m_optimizer = optimizer; 109 | // 训练 110 | for (int i = 0; i < 1000; ++i) { 111 | if (i < 900 == 0) { 112 | cout << "input: "; 113 | int ptr = ((Input*) (train_cg -> get_node ("Input:1:0:"))) -> m_data_ptr; 114 | ((Input*) (train_cg -> get_node ("Input:1:0:"))) -> m_data[ptr] -> display (); 115 | } 116 | vector error; 117 | train_cg -> forward_propagation (error); 118 | train_cg -> back_propagation (); 119 | if (i < 900 == 0) { 120 | cout << "xor: "; 121 | ((OperatorNode*) (sig2 -> m_op_node_map["Sigmoid:2:0:"])) -> m_output -> display (); cout << endl; 122 | } 123 | } 124 | 125 | delete train_cg; 126 | delete vg; 127 | } 128 | --------------------------------------------------------------------------------