├── .gitattributes ├── .github └── workflows │ ├── macos.yaml │ ├── static.yml │ └── ubuntu.yml ├── .gitignore ├── README.md ├── README_EN.md ├── autodiff ├── dor.h ├── graph.h ├── mor.h ├── node.h ├── por.h └── vectmath.h ├── conv_backward_test.cpp ├── conv_test.cpp ├── cuda_mat ├── add.cu ├── conv_cu_grad.cu ├── mat_grad.cu └── matrix_cudadef.cu ├── edge_layer_unit_test.cpp ├── grad_edge └── matrix_grad.h ├── index.html ├── install_diff ├── Makefile ├── bin │ ├── ann │ ├── gradient │ ├── gradient_descent │ ├── simple │ └── speed ├── examples │ ├── Makefile │ └── src │ │ ├── ann.cpp │ │ ├── gradient.cpp │ │ ├── gradient_descent.cpp │ │ ├── simple.cpp │ │ └── speed.cpp └── root │ ├── Makefile │ ├── include │ ├── dor 2.h │ ├── dor.h │ ├── graph 2.h │ ├── graph.h │ ├── mor 2.h │ ├── mor.h │ ├── node 2.h │ ├── node.h │ ├── por 2.h │ ├── por.h │ ├── vectmath 2.h │ └── vectmath.h │ └── src │ ├── graph.cpp │ └── node.cpp ├── logistic ├── .logistic_def.h.swp ├── logistic_def.cpp └── logistic_def.h ├── main.cpp ├── matrix ├── .matrix_pro.h.swp ├── matrix_def.h └── matrix_pro.h ├── picture ├── .DS_Store ├── 00.svg ├── 01.svg ├── 02.svg ├── Edge计算引擎.pdf ├── Edge计算引擎.xmind ├── WX20191119-105411@2x.png ├── WX20191119-125244@2x.png ├── apply_axis_0.png ├── apply_axis_1.png ├── autograd.jpg ├── cpu.svg ├── image-20200128154352842.png ├── image-20200418210521131.png ├── jabber.svg ├── logo.png ├── logo2.png ├── nerual_test1.png ├── path.png ├── processwire (1).svg ├── 啊.svg ├── 彩虹.svg ├── 方向.svg ├── 星月.svg ├── 火箭.svg └── 生产流程.pdf ├── root ├── Makefile ├── include │ ├── dor.h │ ├── edgelayer.h │ ├── graph.h │ ├── mor.h │ ├── node.h │ ├── por.h │ └── vectmath.h ├── obj │ ├── graph.o │ └── node.o └── src │ ├── graph.cpp │ └── node.cpp ├── unit_test.cpp ├── update_conv.md ├── vector_add └── welcome ├── 1.cpp ├── 2.txt ├── 3.txt ├── 4.txt ├── big.txt ├── ma ├── main └── score_wel.cpp /.gitattributes: -------------------------------------------------------------------------------- 1 | *.html linguist-language=c++ -------------------------------------------------------------------------------- /.github/workflows/macos.yaml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: Macos-test 4 | 5 | # Controls when the action will run. 6 | on: 7 | # Triggers the workflow on push or pull request events but only for the main branch 8 | push: 9 | branches: [ master ] 10 | pull_request: 11 | branches: [ main ] 12 | 13 | # Allows you to run this workflow manually from the Actions tab 14 | workflow_dispatch: 15 | 16 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 17 | jobs: 18 | # This workflow contains a single job called "build" 19 | build: 20 | # The type of runner that the job will run on 21 | runs-on: macos-latest 22 | if: "contains(github.event.head_commit.message, '[build]')" 23 | 24 | # Steps represent a sequence of tasks that will be executed as part of the job 25 | steps: 26 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 27 | - uses: actions/checkout@v2 28 | 29 | # Runs a single command using the runners shell 30 | - name: Run a one-line script 31 | run: echo Hello, Macos! 32 | - name: Run gcc and test the version 33 | run: gcc -v 34 | - name: lsa 35 | run: ls -a 36 | - name: install_diff dir 37 | run: ls -a 38 | working-directory: ./install_diff 39 | - name: make and install_1 40 | run: make 41 | working-directory: ./install_diff 42 | - name: make and install_2 43 | run: make install 44 | working-directory: ./install_diff 45 | - name: success info 46 | run: echo success install autodiff! 47 | - name: build 48 | run: g++ main.cpp -o ma -lautodiff 49 | - name: build_conv_test 50 | run: g++ conv_test.cpp -o conv_test -lautodiff 51 | - name: run 52 | run: ./ma 53 | - name: run conv test 54 | run: pwd && ./conv_test 55 | - name: run neural network 56 | run: g++ main.cpp -o main -lautodiff && ./main 57 | 58 | -------------------------------------------------------------------------------- /.github/workflows/static.yml: -------------------------------------------------------------------------------- 1 | name: Deploy static content to Pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v3 15 | 16 | - name: Build static content 17 | run: | 18 | # 你的构建命令,例如生成静态文件 19 | echo "Building static content..." 20 | 21 | - name: Upload artifact 22 | uses: actions/upload-pages-artifact@v2 # 使用 v2 版本 23 | with: 24 | name: static-content 25 | path: '.' # 上传构建后的静态文件目录 26 | 27 | - name: Deploy to GitHub Pages 28 | uses: actions/deploy-pages@v1 # 使用 v1 版本进行部署 -------------------------------------------------------------------------------- /.github/workflows/ubuntu.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: Ubuntu-test 4 | 5 | # Controls when the action will run. 6 | on: 7 | # Triggers the workflow on push or pull request events but only for the main branch 8 | push: 9 | branches: [ master ] 10 | pull_request: 11 | branches: [ main ] 12 | 13 | # Allows you to run this workflow manually from the Actions tab 14 | workflow_dispatch: 15 | 16 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 17 | jobs: 18 | # This workflow contains a single job called "build" 19 | build: 20 | # The type of runner that the job will run on 21 | runs-on: ubuntu-latest 22 | if: "contains(github.event.head_commit.message, '[build]')" 23 | 24 | 25 | # Steps represent a sequence of tasks that will be executed as part of the job 26 | steps: 27 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 28 | - uses: actions/checkout@v2 29 | 30 | # Runs a single command using the runners shell 31 | - name: Run a one-line script 32 | run: echo Hello, Macos! 33 | - name: Run gcc and test the version 34 | run: gcc -v 35 | - name: install_diff dir 36 | run: ls -a 37 | working-directory: ./install_diff 38 | - name: make and install_1 39 | run: make 40 | working-directory: ./install_diff 41 | - name: make and install_2 42 | run: make install 43 | working-directory: ./install_diff 44 | - name: success info 45 | run: echo success install autodiff! 46 | - name: build 47 | run: g++ main.cpp -o ma -lautodiff 48 | - name: build_conv_test 49 | run: g++ conv_test.cpp -o conv_test -lautodiff 50 | - name: run 51 | run: ./ma 52 | - name: run conv test 53 | run: ./conv_test 54 | - name: run neural network 55 | run: g++ main.cpp -o main -lautodiff && ./main 56 | 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | ma 3 | *.a 4 | conv_test 5 | .DS_Store 6 | test.cpp 7 | .vscode 8 | opencv-4.6.0/* 9 | CMakeFiles/ 10 | CMakeDownloadLog.txt 11 | opencv-4.6.0.zip 12 | main 13 | .gitignore 14 | .gitignore 15 | install_diff/bin/gradient_descent 16 | install_diff/bin/simple 17 | .gitignore 18 | install_diff/bin/ann 19 | install_diff/ 20 | matrix_add 21 | .gitignore 22 | index.html 23 | tools/stream_0_first_frame_20241031_165519_879.jpg 24 | tools/stream_0_first_frame_20241031_165555_446.jpg 25 | tools/stream_0_first_frame_20241031_165645_741.jpg 26 | tools/your_library.so 27 | tools/test_capture 28 | tools/test_capture_frame 29 | tools/read_rtsp.py 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
image-20200418210521131
2 | 3 | 4 | # Edge-Computing-Engine 5 | 6 | An open-source scientific computing engine designed for matrix computation and machine learning tasks, with CUDA acceleration support. 7 | 8 | [![Edge support](https://img.shields.io/badge/SUPPORT-Macos-brightgreen)](https://support.apple.com/downloads/macos) 9 | [![Edge support](https://img.shields.io/badge/SUPPORT-Ubuntu-brightgreen)](https://ubuntu.com/download/server) 10 | [![Edge passing](https://img.shields.io/badge/Edge--CI-passing-blue)](https://github.com/AllenZYJ/Edge-Computing-Engine/actions) 11 | [![GitHub license](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](https://raw.githubusercontent.com/tesseract-ocr/tesseract/master/LICENSE) 12 | 13 | # Overview 14 | 15 | Edge-Computing-Engine provides a flexible computational graph implementation that supports: 16 | 17 | 1. Forward propagation for neural network inference 18 | 2. Backward propagation for training via automatic differentiation 19 | 3. CUDA-accelerated operations for improved performance on NVIDIA GPUs 20 | 21 | The computational graph is built using nodes representing operations such as matrix multiplication, element-wise operations, and activation functions. 22 | 23 | # 1. GPU Features 24 | 25 | ### 1.1 Operation Types 26 | 27 | The framework supports the following operations: 28 | 29 | - **OP_INPUT**: Input nodes for data and parameters 30 | - **OP_ADD**: Element-wise addition 31 | - **OP_SUB**: Element-wise subtraction 32 | - **OP_MUL**: Element-wise multiplication 33 | - **OP_DIV**: Element-wise division 34 | - **OP_MATMUL**: Matrix multiplication 35 | - **OP_RELU**: ReLU activation function 36 | - **OP_SIGMOID**: Sigmoid activation function 37 | - **OP_TANH**: Tanh activation function (declared but not implemented) 38 | - **OP_SOFTMAX**: Softmax function (declared but not implemented) 39 | 40 | ### 1.2 Core Classes 41 | 42 | 1. **Matrix_CU**: A custom matrix class with CUDA support 43 | 2. **ComputeNode**: Represents a node in the computational graph 44 | 3. **ComputeGraph**: Manages the graph, including node creation, forward and backward propagation 45 | 46 | ### 1.3 CUDA Kernels 47 | 48 | The code includes several CUDA kernel functions: 49 | 50 | - **reluForwardKernel/reluBackwardKernel**: Forward and backward propagation for ReLU 51 | - **sigmoidForwardKernel/sigmoidBackwardKernel**: Forward and backward propagation for Sigmoid 52 | - **matmulForwardKernel/matmulBackwardAKernel/matmulBackwardBKernel**: Forward and backward propagation for matrix multiplication 53 | - **mulBackwardKernel**: Backward propagation for element-wise multiplication 54 | - **divBackwardKernel**: Backward propagation for element-wise division 55 | - **negGradKernel**: Helper kernel for computing negative gradients 56 | 57 | 58 | ### 1.4 CUDA Example 59 | 60 | ```cpp 61 | // Create a computational graph 62 | ComputeGraph graph; 63 | 64 | // Create input and weight nodes 65 | ComputeNode* input = graph.addInput(input_rows, input_cols, batch_size); 66 | ComputeNode* weight = graph.addInput(input_cols, output_cols); 67 | 68 | // Initialize with random values 69 | input->value.randomInit(); 70 | weight->value.randomInit(); 71 | 72 | // Build forward computation graph 73 | ComputeNode* fc1 = graph.addMatMul(input, weight); // Fully connected layer 74 | ComputeNode* output_node = graph.addReLU(fc1); // Apply ReLU activation 75 | 76 | // Perform forward propagation 77 | graph.forward(); 78 | 79 | // Compute gradients via backward propagation 80 | graph.backward(output_node); 81 | ``` 82 | ### 1.5 GPU_Usage 83 | 84 | ```shell 85 | root@5353:/backup# nvcc -arch=sm_80 -I/usr/local/cuda/include -L/usr/local/cuda/lib64 cuda_mat/mat_grad.cu -o main && ./main 86 | root@5353:/backup# ./main 87 | ====== Configuration ====== 88 | Input matrix: 10000 x 1000 89 | Weight matrix: 1000 x 500 90 | Batch size: 32 91 | Activation: ReLU 92 | 93 | ====== CUDA Results ====== 94 | Input first 5x5: 95 | 0.840188 0.394383 0.783099 0.79844 0.911647 96 | 0.45724 0.640368 0.717092 0.460067 0.54114 97 | 0.0860517 0.180886 0.426423 0.0470658 0.306902 98 | 0.0587587 0.0138811 0.622212 0.0391351 0.221551 99 | 0.456151 0.24875 0.0699601 0.742097 0.216935 100 | Weight first 5x5: 101 | 0.817311 0.0247108 0.0146763 0.939293 0.502699 102 | 0.604392 0.921644 0.675689 0.948712 0.58803 103 | 0.824073 0.747934 0.0448163 0.757354 0.858343 104 | 0.308744 0.659798 0.941692 0.255731 0.539655 105 | 0.89383 0.609149 0.799556 0.726306 0.640965 106 | Output first 5x5: 107 | 256.076 253.23 258.393 259.965 255.971 108 | 250.843 246.827 252.131 253.493 244.151 109 | 247.064 244.543 251.723 247.424 250.466 110 | 249.944 250.359 256.148 252.21 249.615 111 | 246.675 238.295 252.572 242.94 243.512 112 | Input gradient first 5x5: 113 | 244.628 251.341 255.388 250.4 249.891 114 | 244.628 251.341 255.388 250.4 249.891 115 | 244.628 251.341 255.388 250.4 249.891 116 | 244.628 251.341 255.388 250.4 249.891 117 | 244.628 251.341 255.388 250.4 249.891 118 | Weight gradient first 5x5: 119 | 159791 159791 159791 159791 159791 120 | 160010 160010 160010 160010 160010 121 | 160266 160266 160266 160266 160266 122 | 159899 159899 159899 159899 159899 123 | 159964 159964 159964 159964 159964 124 | 125 | ====== Performance ====== 126 | CUDA computation time: 2201.45 ms 127 | ``` 128 | 129 | ### 1.6 Requirements 130 | 131 | - CUDA-capable NVIDIA GPU (for GPU features) 132 | - CUDA Toolkit 133 | - C++11 or above 134 | - nvcc: NVIDIA (R) Cuda compiler driver 135 | Copyright (c) 2005-2024 NVIDIA Corporation 136 | Built on Thu_Mar_28_02:18:24_PDT_2024 137 | Cuda compilation tools, release 12.4, V12.4.131 138 | Build cuda_12.4.r12.4/compiler.34097967_0 139 | 140 | 141 | # 2. CPU Features 142 | 143 | - `read_csv(string &file_path)`: Reads a formatted file (csv) and returns a matrix with automatically calculated dimensions 144 | - Formatted file writing interface (similar to `pandas.to_csv`) 145 | - Broadcasting mechanism for matrices with padding interface 146 | - Fully connected layer forward and backward propagation interfaces with automatic differentiation support 147 | - Matrix differentiation and automatic differentiation interfaces 148 | - Various matrix operations: 149 | - `create(row,cols)`: Creates a matrix with specified dimensions 150 | - `add`, `subtract`, `mul`: Basic matrix operations 151 | - `get_T`: Matrix transposition 152 | - `flatten`: Returns a flattened array 153 | - `matrix_rs`: Matrix structure compression 154 | - `matrix_sum`, `matrix_mean`: Statistical operations 155 | - `iloc`: Matrix slicing 156 | - And many more 157 | - Activation functions (Relu) 158 | - Loss functions (MSE) 159 | - Neural network components (convolutional layers, pooling layers) 160 | ### 2.1 CPU_autodiff_Installation 161 | 162 | ```shell 163 | git clone git@github.com:AllenZYJ/Edge-Computing-Engine.git 164 | cd Edge-Computing-Engine/install_diff && make && make install 165 | ``` 166 | 167 | Note: If you're not a root user, you may need to add sudo to the make install command. 168 | 169 | ### 2.2 CPU_Usage 170 | 171 | ```shell 172 | root@5353:/backup# .g++ main.cpp -o main -lautodiff 173 | root@5353:/backup# ./main 174 | 0.000000+-0.000378*1.000000 175 | -0.000378+0.000000*1.000000 176 | .... 177 | -0.000378+0.000000*1.000000 178 | 0.000000+-0.000378*1.000000 179 | -0.000378+0.000000*1.000000 180 | -0.000378+0.000000*1.000000 181 | weight_1_grad: 182 | [ 183 | -0.000378 -0.000378 -0.000378 184 | -0.000378 -0.000378 -0.000378 185 | -0.000378 -0.000378 -0.000378 186 | ] 187 | neraul end; 188 | ``` 189 | 190 | # 3. Contributing 191 | 192 | Contributions are welcome. To contribute: 193 | 194 | 1. Fork the repository 195 | 2. Create a new branch: `git checkout -b my-new-feature` 196 | 3. Make your changes and commit them: `git commit -am 'Add some feature'` 197 | 4. Push your changes: `git push origin my-new-feature` 198 | 5. Submit a pull request 199 | 200 | Please ensure your code adheres to the existing style and passes tests before submitting. 201 | 202 | ## Stargazers over time 203 | 204 | [![Stargazers over time](https://starchart.cc/AllenZYJ/Edge-Computing-Engine.svg)](https://starchart.cc/AllenZYJ/Edge-Computing-Engine) 205 | 206 | ## Contact 207 | 208 | - Email: zk@likedge.top or edge@ibooker.org.cn 209 | - Website: [Likedge](http://likedge.top/) 210 | 211 | ## License 212 | 213 | Edge-Computing-Engine is released under the Apache 2.0 License. See the [`LICENSE`](https://www.apache.org/licenses/LICENSE-2.0) file for details. -------------------------------------------------------------------------------- /README_EN.md: -------------------------------------------------------------------------------- 1 | 2 |
image-20200418210521131
3 | 4 |

Edge-Computing-Engine

5 |
一个开源的科学计算引擎 6 | 7 | 8 | Email: zk@likedge.top 9 | 10 | [![Edge support](https://img.shields.io/badge/SUPPORT-Macos-brightgreen)](https://support.apple.com/downloads/macos) 11 | [![Edge support](https://img.shields.io/badge/SUPPORT-Ubuntu-brightgreen)](https://ubuntu.com/download/server) 12 | [![Edge passing](https://img.shields.io/badge/Edge--CI-passing-blue)](https://github.com/AllenZYJ/Edge-Computing-Engine/actions) 13 | [![GitHub license](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](https://raw.githubusercontent.com/tesseract-ocr/tesseract/master/LICENSE) 14 | 15 | 16 | 17 |
18 | 19 | 20 | ------ 21 | Edge-Computing-Engine is an open-source scientific computing engine designed for matrix computation and machine learning tasks. It provides a wide range of matrix operations and neural network building blocks. 22 | 23 | Email: zk@likedge.top 24 | 25 | 26 | 27 | ## Features 28 | 29 | - `read_csv(string &file_path)`: Reads a formatted file (csv) and returns a matrix with automatically calculated dimensions. 30 | - Formatted file writing interface. Similar to `pandas.to_csv`. 31 | - Broadcasting mechanism for matrices with padding interface. 32 | - Fully connected layer forward and backward propagation interfaces with automatic differentiation support. 33 | - Matrix differentiation and automatic differentiation interfaces. 34 | - `save_txt(Matrix mid1,string path = "./",string delimiter = ",",string header="./")`: Reads the file header and writes formatted files. It supports writing matrix-type data, custom headers, writing file paths, and custom delimiters (default is ", "). 35 | - `create(row,cols)`: Creates a matrix with specified dimensions and initializes all elements to 0. 36 | - `move_ele(int &ele1, int &ele2)`: Changes the value of an element at a specific position. 37 | - `add(Matrix mid1, Matrix mid2, int flag = 1)`: Matrix addition operation with optional bitwise operation acceleration. 38 | - `subtract(Matrix mid1, Matrix mid2)`: Matrix subtraction operation. 39 | - `mul(Matrix mid1, Matrix mid2)`: Matrix multiplication operation. 40 | - `times_mat(int times,Matrix mid1)`: Scalar matrix multiplication. 41 | - `get_T(Matrix mid1)`: Matrix transposition operation. 42 | - `mul(matrix1,matrix2)`: Matrix product (complete mathematical definition). 43 | - `flatten(Matrix mid1)`: Returns a flattened array. 44 | - `matrix_rs(Matrix mid1,int rs_row,int rs_col)`: Matrix structure compression. 45 | - `matrix_sum(Matrix mid1)`: Matrix summation. 46 | - `matrix_mean(Matrix mid1)`: Matrix mean. 47 | - `apply(Matrix mid1,Matrix mid2,int axis = 0)`: Matrix concatenation. 48 | - `iloc(Matrix mid1,int start_x=0,int end_x=0,int start_y=0,int end_y=0)`: Matrix slicing. 49 | - `mul_simple(Matrix mid1,Matrix mid2)`: Element-wise matrix multiplication for machine learning applications. 50 | - `Relu`: Activation function matrix interface. 51 | - `MSE`: Mean squared error matrix interface. 52 | - Random weight matrix creation interface. 53 | - Convolutional neural network definition (including but not limited to convolution kernel, pooling layer definition, and custom loss interface). 54 | 55 | ## Requirements 56 | 57 | - C++11 or above. 58 | 59 | ## Installation 60 | 61 | - Clone the repository: `git clone git@github.com:AllenZYJ/Edge-Computing-Engine.git` 62 | - Build the project: `cd Edge-Computing-Engine/install_diff && make && make install` 63 | 64 | ## Usage 65 | 66 | 67 | ```shell 68 | g++ main.cpp -o main -lautodiff 69 | ``` 70 | 71 | ```shell 72 | ./main 73 | ``` 74 | 75 | ## Contributing 76 | 77 | Contributions to Edge-Computing-Engine are welcome. To contribute, please follow these steps: 78 | 79 | - Fork the repository. 80 | - Create a new branch for your feature or bug fix: `git checkout -b my-new-feature` 81 | - Make your changes and commit them: `git commit -am 'Add some feature'` 82 | - Push your changes to the branch: `git push origin my-new-feature` 83 | - Submit a pull request. 84 | 85 | Please ensure your code adheres to the existing style and passes the existing tests before submitting a pull request. 86 | 87 | ## License 88 | 89 | email:zk@likedge.top | edge@ibooker.org.cn 90 | 91 | The author's personal website is [Likedge](http://likedge.top/), and the author's email is zk@likedge.top. 92 | 93 | Edge-Computing-Engine is released under the Apache2.0. See the `LICENSE` file for details. 94 | 95 | 96 |
97 | 98 | -------------------------------------------------------------------------------- /autodiff/dor.h: -------------------------------------------------------------------------------- 1 | #ifndef DYADIC_OPERATION_RESULT 2 | #define DYADIC_OPERATION_RESULT 3 | 4 | struct DyadicOperationResult { 5 | double value; 6 | double left_grad; 7 | double right_grad; 8 | 9 | DyadicOperationResult(double value, double left_grad, double right_grad){ 10 | this->value = value; 11 | this->left_grad = left_grad; 12 | this->right_grad = right_grad; 13 | } 14 | }; 15 | 16 | #endif /* end of include guard: DYADIC_OPERATION_RESULT */ 17 | -------------------------------------------------------------------------------- /autodiff/graph.h: -------------------------------------------------------------------------------- 1 | #ifndef GRAPH_H 2 | #define GRAPH_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class Graph { 9 | private: 10 | std::map > > nodes; 11 | static Graph* instance; 12 | Graph(); 13 | 14 | public: 15 | static long int uid_counter; 16 | static long int uid(); 17 | static Graph* getInstance(); 18 | 19 | void connect(const long int& uid, const std::pair& edge); 20 | std::vector > get(const long int& uid) const; 21 | bool has(const long int& uid) const; 22 | 23 | void new_recording(); 24 | }; 25 | 26 | #endif /* end of include guard: GRAPH_H */ 27 | -------------------------------------------------------------------------------- /autodiff/mor.h: -------------------------------------------------------------------------------- 1 | #ifndef MONADIC_OPERATION_RESULT 2 | #define MONADIC_OPERATION_RESULT 3 | 4 | struct MonadicOperationResult { 5 | double value; 6 | double grad; 7 | 8 | MonadicOperationResult(double value, double grad){ 9 | this->value = value; 10 | this->grad = grad; 11 | } 12 | }; 13 | 14 | #endif /* end of include guard: MONADIC_OPERATION_RESULT */ 15 | -------------------------------------------------------------------------------- /autodiff/node.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_H 2 | #define NODE_H 3 | 4 | #include 5 | #include 6 | 7 | #include "graph.h" 8 | #include "mor.h" 9 | #include "dor.h" 10 | #include "por.h" 11 | 12 | class Node { 13 | private: 14 | double value; 15 | long int uid; 16 | 17 | double gradient_recursive(Graph* graph, const long int& current_uid, const long int& stop_uid) const; 18 | 19 | public: 20 | Node(const double& value=0); 21 | Node(const Node& node); 22 | 23 | static Node monadic_operation(const Node& n, MonadicOperationResult (*)(const double&)); 24 | static Node dyadic_operation(const Node& l, const Node& r, DyadicOperationResult (*)(const double&, const double&)); 25 | static Node polyadic_operation(const std::vector& nodes, PolyadicOperationResult (*)(const std::vector&)); 26 | 27 | double gradient(const Node& node) const; 28 | std::vector gradient(const std::vector& nodes) const; 29 | std::vector > gradient(const std::vector >& nodes) const; 30 | 31 | friend Node operator+(const Node& l, const Node& r); 32 | friend Node operator-(const Node& l, const Node& r); 33 | friend Node operator*(const Node& l, const Node& r); 34 | friend Node operator/(const Node& l, const Node& r); 35 | 36 | Node& operator+=(const Node& r); 37 | Node& operator-=(const Node& r); 38 | Node& operator*=(const Node& r); 39 | Node& operator/=(const Node& r); 40 | 41 | friend bool operator==(const Node& l, const Node& r); 42 | friend bool operator<(const Node& l, const Node& r); 43 | friend bool operator>(const Node& l, const Node& r); 44 | friend bool operator<=(const Node& l, const Node& r); 45 | friend bool operator>=(const Node& l, const Node& r); 46 | 47 | friend Node sin(const Node& x); 48 | friend Node cos(const Node& x); 49 | friend Node tan(const Node& x); 50 | friend Node sinh(const Node& x); 51 | friend Node cosh(const Node& x); 52 | friend Node tanh(const Node& x); 53 | friend Node asin(const Node& x); 54 | friend Node acos(const Node& x); 55 | friend Node atan(const Node& x); 56 | 57 | friend Node log(const Node& x, const Node& base); 58 | friend Node log10(const Node& x); 59 | friend Node ln(const Node& x); 60 | 61 | friend Node pow(const Node& x, const Node& p); 62 | friend Node exp(const Node& x); 63 | friend Node sqrt(const Node& x); 64 | 65 | friend Node abs(const Node& x); 66 | friend Node min(const Node& l, const Node& r); 67 | friend Node max(const Node& l, const Node& r); 68 | 69 | friend std::ostream& operator<<(std::ostream& os, const Node& node); 70 | }; 71 | 72 | #endif /* end of include guard: NODE_H */ 73 | -------------------------------------------------------------------------------- /autodiff/por.h: -------------------------------------------------------------------------------- 1 | #ifndef POLYADIC_OPERATION_RESULT 2 | #define POLYADIC_OPERATION_RESULT 3 | 4 | #include 5 | 6 | struct PolyadicOperationResult { 7 | double value; 8 | std::vector gradients; 9 | 10 | PolyadicOperationResult(double value, const std::vector& gradients){ 11 | this->value = value; 12 | this->gradients = gradients; 13 | } 14 | }; 15 | 16 | #endif /* end of include guard: POLYADIC_OPERATION_RESULT */ 17 | -------------------------------------------------------------------------------- /autodiff/vectmath.h: -------------------------------------------------------------------------------- 1 | #ifndef VECTMATH 2 | #define VECTMATH 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // dot product 11 | template 12 | std::vector > dot(const std::vector >& a, const std::vector >& b){ 13 | assert(a[0].size()==b.size()); 14 | 15 | T w=0; 16 | std::vector > result(a.size(), std::vector(b[0].size())); 17 | for (int i=0 ; i 32 | std::vector& operator-=(std::vector& u, const std::vector& v){ 33 | assert(u.size()==v.size()); 34 | for(size_t i=0 ; i 41 | std::vector operator+(const std::vector& u, const std::vector& v){ 42 | assert(u.size()==v.size()); 43 | std::vector w(u.size()); 44 | for(size_t i=0 ; i 51 | std::vector operator*(const S& s, const std::vector& u){ 52 | std::vector result(u.size()); 53 | for(size_t i=0 ; i 60 | std::vector& operator>>(U (*fun)(U&), std::vector& u){ 61 | std::transform(u.begin(), u.end(), u.begin(), fun); 62 | return u; 63 | } 64 | 65 | template 66 | std::vector& operator>>(S (*fun)(S&), std::vector& u){ 67 | for(auto& v : u){ 68 | fun >> v; 69 | } 70 | return u; 71 | } 72 | 73 | template 74 | std::vector& operator>>(U (*fun)(), std::vector& u){ 75 | for(auto& e : u){ 76 | e = fun(); 77 | } 78 | return u; 79 | } 80 | 81 | template 82 | std::vector& operator>>(S (*fun)(), std::vector& u){ 83 | for(auto& v : u){ 84 | fun >> v; 85 | } 86 | return u; 87 | } 88 | 89 | template 90 | std::ostream& operator<<(std::ostream& os, const std::vector& u){ 91 | os << "["; 92 | for(size_t i=0 ; i 2 | #include 3 | #include "./matrix/matrix_def.h" 4 | #include "./matrix/matrix_pro.h" 5 | #include "./root/include/edgelayer.h" 6 | 7 | // 辅助函数:检查两个浮点数是否近似相等 8 | bool is_close(float a, float b, float rtol = 1e-5, float atol = 1e-8) { 9 | return fabs(a - b) <= (atol + rtol * fabs(b)); 10 | } 11 | 12 | int main() { 13 | // 创建测试输入 (batch=1, channels=3, height=4, width=4) 14 | Matrix4d input = CreateMatrix4d(1, 3, 4, 4); 15 | 16 | // 用固定的测试值填充输入 17 | float test_input[3][4][4] = { 18 | {{1,2,3,4}, {2,3,4,5}, {3,4,5,6}, {4,5,6,7}}, // channel 1 19 | {{2,4,6,8}, {4,6,8,10}, {6,8,10,12}, {8,10,12,14}}, // channel 2 20 | {{3,6,9,12}, {6,9,12,15}, {9,12,15,18}, {12,15,18,21}} // channel 3 21 | }; 22 | 23 | for(int c = 0; c < 3; c++) { 24 | for(int i = 0; i < 4; i++) { 25 | for(int j = 0; j < 4; j++) { 26 | input.matrix4d[0].matrix3d[c].matrix[i][j] = test_input[c][i][j]; 27 | } 28 | } 29 | } 30 | 31 | // 创建卷积层并设置固定的卷积核 32 | edge_layer* conv = new conv2d(input, 3, 2, 1, 3, 0, 1); 33 | conv2d* conv_ptr = dynamic_cast(conv); 34 | 35 | // 设置固定的卷积核值 36 | float test_kernel[3][2][3][3] = { 37 | {{{1,0,1}, {0,1,0}, {1,0,1}}, // in_channel 0, out_channel 0 38 | {{0,1,0}, {1,0,1}, {0,1,0}}}, // in_channel 0, out_channel 1 39 | {{{1,1,0}, {1,0,1}, {0,1,1}}, // in_channel 1, out_channel 0 40 | {{0,1,1}, {1,0,1}, {1,1,0}}}, // in_channel 1, out_channel 1 41 | {{{0,0,1}, {0,1,0}, {1,0,0}}, // in_channel 2, out_channel 0 42 | {{1,0,0}, {0,1,0}, {0,0,1}}} // in_channel 2, out_channel 1 43 | }; 44 | 45 | for(int in_c = 0; in_c < 3; in_c++) { 46 | for(int out_c = 0; out_c < 2; out_c++) { 47 | conv_ptr->set_kernel(&test_kernel[in_c][out_c][0][0], in_c, out_c); 48 | } 49 | } 50 | 51 | // 前向传播 52 | Matrix4d output = conv->forward(input); 53 | std::cout << "Forward output shape:" << std::endl; 54 | print_shape(output); 55 | std::cout << "Forward output values:" << std::endl; 56 | cout_mat4d(output); 57 | 58 | // 验证前向传播的输出尺寸 59 | assert(output.batch == 1); 60 | assert(output.dep == 2); 61 | // 计算期望的输出尺寸 62 | int expected_size = ((4 + 2*1 - 3) / 1) + 1; // ((4 + 2 - 3) / 1) + 1 = 4 63 | assert(output.wid == expected_size); 64 | assert(output.high == expected_size); 65 | 66 | // 创建固定的梯度输出 67 | int output_size = ((4 + 2*1 - 3) / 1) + 1; // 计算正确的输出尺寸 68 | Matrix4d grad_output = CreateMatrix4d(1, 2, output_size, output_size); 69 | float test_grad[2][4][4] = { 70 | {{1,1,1,1}, {1,1,1,1}, {1,1,1,1}, {1,1,1,1}}, // out_channel 0 71 | {{0.5,0.5,0.5,0.5}, {0.5,0.5,0.5,0.5}, {0.5,0.5,0.5,0.5}, {0.5,0.5,0.5,0.5}} // out_channel 1 72 | }; 73 | 74 | for(int c = 0; c < 2; c++) { 75 | for(int i = 0; i < 4; i++) { 76 | for(int j = 0; j < 4; j++) { 77 | grad_output.matrix4d[0].matrix3d[c].matrix[i][j] = test_grad[c][i][j]; 78 | } 79 | } 80 | } 81 | 82 | // 反向传播 83 | Matrix4d grad_input = conv->backward(grad_output); 84 | std::cout << "Backward gradients shape:" << std::endl; 85 | print_shape(grad_input); 86 | std::cout << "Backward gradients values:" << std::endl; 87 | cout_mat4d(grad_input); 88 | 89 | // 验证反向传播的梯度尺寸 90 | assert(grad_input.batch == 1); 91 | assert(grad_input.dep == 3); 92 | assert(grad_input.wid == 4); 93 | assert(grad_input.high == 4); 94 | 95 | // 验证一些特定位置的梯度值 96 | // 手动计算的预期梯度值 97 | float expected_grads[3][4][4] = { 98 | // channel 0 99 | { 100 | {2.0f, 1.5f, 1.5f, 1.0f}, 101 | {1.5f, 2.0f, 2.0f, 1.5f}, 102 | {1.5f, 2.0f, 2.0f, 1.5f}, 103 | {1.0f, 1.5f, 1.5f, 1.0f} 104 | }, 105 | // channel 1 106 | { 107 | {2.5f, 2.0f, 2.0f, 1.5f}, 108 | {2.0f, 3.0f, 3.0f, 2.0f}, 109 | {2.0f, 3.0f, 3.0f, 2.0f}, 110 | {1.5f, 2.0f, 2.0f, 1.5f} 111 | }, 112 | // channel 2 113 | { 114 | {1.5f, 1.0f, 1.0f, 0.5f}, 115 | {1.0f, 1.5f, 1.5f, 1.0f}, 116 | {1.0f, 1.5f, 1.5f, 1.0f}, 117 | {0.5f, 1.0f, 1.0f, 0.5f} 118 | } 119 | }; 120 | 121 | for(int c = 0; c < 3; c++) { 122 | for(int i = 0; i < 4; i++) { 123 | for(int j = 0; j < 4; j++) { 124 | float actual = grad_input.matrix4d[0].matrix3d[c].matrix[i][j]; 125 | float expected = expected_grads[c][i][j]; 126 | if (!is_close(actual, expected)) { 127 | std::cout << "Gradient mismatch at position [" << c << "][" << i << "][" << j << "]" << std::endl; 128 | std::cout << "Expected: " << expected << ", Got: " << actual << std::endl; 129 | } 130 | } 131 | } 132 | } 133 | 134 | std::cout << "All tests passed!" << std::endl; 135 | 136 | delete conv; 137 | return 0; 138 | } -------------------------------------------------------------------------------- /conv_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ███████╗██████╗ ██████╗ ███████╗ ███████╗███╗ ██╗ ██████╗ ██╗███╗ ██╗███████╗ 3 | ██╔════╝██╔══██╗██╔════╝ ██╔════╝ ██╔════╝████╗ ██║██╔════╝ ██║████╗ ██║██╔════╝ 4 | █████╗ ██║ ██║██║ ███╗█████╗ █████╗ ██╔██╗ ██║██║ ███╗██║██╔██╗ ██║█████╗ 5 | ██╔══╝ ██║ ██║██║ ██║██╔══╝ ██╔══╝ ██║╚██╗██║██║ ██║██║██║╚██╗██║██╔══╝ 6 | ███████╗██████╔╝╚██████╔╝███████╗ ███████╗██║ ╚████║╚██████╔╝██║██║ ╚████║███████╗ 7 | ╚══════╝╚═════╝ ╚═════╝ ╚══════╝ ╚══════╝╚═╝ ╚═══╝ ╚═════╝ ╚═╝╚═╝ ╚═══╝╚══════╝ 8 | Author:Edge 9 | Web:likedge.top 10 | Date:20200925 11 | */ 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "./matrix/matrix_def.h" 19 | #include "./matrix/matrix_pro.h" 20 | #include "./logistic/logistic_def.h" 21 | #include "./autodiff/node.h" 22 | #include "./grad_edge/matrix_grad.h" 23 | #include "./root/include/edgelayer.h" 24 | #include "./welcome/score_wel.cpp" 25 | // #include 26 | 27 | using namespace std; 28 | 29 | clock_t start_t, end_t; 30 | double duration; 31 | // #include"opencv4/opencv2/imgcodecs.hpp" 32 | // #include 33 | // using namespace cv; 34 | 35 | int main() 36 | { 37 | welcome(); 38 | cout << setprecision(13); 39 | start_t = clock(); 40 | Matrix4d mat_call = CreateMatrix4d(2, 3, 640, 640); 41 | print_shape(mat_call); 42 | 43 | edge_layer *conv2d_1 = new conv2d(mat_call, 3, 12, 1, 2, 0, 0); 44 | Matrix4d output_conv = conv2d_1->forward(mat_call); 45 | print_shape(output_conv); 46 | int parameter_1 = conv2d_1->parameter_counter(); 47 | 48 | edge_layer *conv2d_2 = new conv2d(output_conv, 12, 24, 7, 7, 0, 0); 49 | Matrix4d output_conv2 = conv2d_2->forward(output_conv); 50 | print_shape(output_conv2); 51 | std::cout << "EDGE Matrix Shape: (" << output_conv2.batch << "," << output_conv2.dep << "," 52 | << output_conv2.wid << "," << output_conv2.high << ")" << std::endl; 53 | parameter_1 += conv2d_2->parameter_counter(); 54 | 55 | edge_layer *conv2d_3 = new conv2d(output_conv2, 24, 1, 3, 3, 0, 0); 56 | Matrix4d output_conv3 = conv2d_3->forward(output_conv2); 57 | print_shape(output_conv3); 58 | parameter_1 += conv2d_3->parameter_counter(); 59 | end_t = clock(); 60 | cout<<"total parameters :"< 2 | #include 3 | #include 4 | #include 5 | 6 | // CUDA 核函数:向量加法 7 | __global__ void vectorAdd(const float *A, const float *B, float *C, int numElements) 8 | { 9 | int i = blockDim.x * blockIdx.x + threadIdx.x; 10 | if (i < numElements) 11 | { 12 | C[i] = A[i] + B[i]; 13 | } 14 | } 15 | 16 | int main() 17 | { 18 | // 设置向量大小 (1百万个元素) 19 | int numElements = 1 << 20; 20 | size_t size = numElements * sizeof(float); 21 | std::cout << "向量加法测试,每个向量包含 " << numElements << " 个元素\n"; 22 | 23 | // 分配主机内存 24 | float *h_A = new float[numElements]; 25 | float *h_B = new float[numElements]; 26 | float *h_C = new float[numElements]; 27 | float *d_A = nullptr; 28 | float *d_B = nullptr; 29 | float *d_C = nullptr; 30 | 31 | // 初始化输入向量 32 | for (int i = 0; i < numElements; ++i) 33 | { 34 | h_A[i] = rand() / (float)RAND_MAX; 35 | h_B[i] = rand() / (float)RAND_MAX; 36 | } 37 | 38 | // 分配设备内存 39 | cudaMalloc((void **)&d_A, size); 40 | cudaMalloc((void **)&d_B, size); 41 | cudaMalloc((void **)&d_C, size); 42 | 43 | // 拷贝数据到设备 44 | cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice); 45 | cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice); 46 | 47 | // 启动 CUDA 核函数 48 | int threadsPerBlock = 256; 49 | int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; 50 | std::cout << "CUDA 核函数配置: " << blocksPerGrid << " 个块, 每个块 " << threadsPerBlock << " 个线程\n"; 51 | 52 | // 测试 GPU 性能 53 | auto start_gpu = std::chrono::high_resolution_clock::now(); 54 | vectorAdd<<>>(d_A, d_B, d_C, numElements); 55 | cudaDeviceSynchronize(); // 等待 GPU 完成 56 | auto end_gpu = std::chrono::high_resolution_clock::now(); 57 | std::chrono::duration gpu_time = end_gpu - start_gpu; 58 | std::cout << "GPU 执行时间: " << gpu_time.count() * 1000 << " 毫秒\n"; 59 | 60 | // 拷贝结果回主机 61 | cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost); 62 | 63 | // 测试 CPU 性能 64 | auto start_cpu = std::chrono::high_resolution_clock::now(); 65 | for (int i = 0; i < numElements; ++i) 66 | { 67 | h_C[i] = h_A[i] + h_B[i]; 68 | } 69 | auto end_cpu = std::chrono::high_resolution_clock::now(); 70 | std::chrono::duration cpu_time = end_cpu - start_cpu; 71 | std::cout << "CPU 执行时间: " << cpu_time.count() * 1000 << " 毫秒\n"; 72 | 73 | // 验证结果 74 | for (int i = 0; i < numElements; i++) 75 | { 76 | if (fabs(h_A[i] + h_B[i] - h_C[i]) > 1e-5) 77 | { 78 | std::cerr << "结果验证失败!\n"; 79 | break; 80 | } 81 | } 82 | 83 | // 释放内存 84 | delete[] h_A; 85 | delete[] h_B; 86 | delete[] h_C; 87 | cudaFree(d_A); 88 | cudaFree(d_B); 89 | cudaFree(d_C); 90 | 91 | std::cout << "测试完成\n"; 92 | 93 | return 0; 94 | } -------------------------------------------------------------------------------- /cuda_mat/conv_cu_grad.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "mat_grad.cu" 12 | 13 | // 已有的CPU卷积实现 14 | void conv2dCPU(const float* input, const float* kernel, float* output, 15 | int batch_size, int in_channels, int out_channels, 16 | int in_height, int in_width, int kernel_height, int kernel_width, 17 | int out_height, int out_width, int stride_height, int stride_width, 18 | int pad_height, int pad_width) { 19 | 20 | for (int n = 0; n < batch_size; n++) { 21 | for (int c = 0; c < out_channels; c++) { 22 | for (int h = 0; h < out_height; h++) { 23 | for (int w = 0; w < out_width; w++) { 24 | float sum = 0.0f; 25 | 26 | for (int ic = 0; ic < in_channels; ic++) { 27 | for (int kh = 0; kh < kernel_height; kh++) { 28 | for (int kw = 0; kw < kernel_width; kw++) { 29 | int ih = h * stride_height + kh - pad_height; 30 | int iw = w * stride_width + kw - pad_width; 31 | 32 | if (ih >= 0 && ih < in_height && iw >= 0 && iw < in_width) { 33 | int input_idx = ((n * in_channels + ic) * in_height + ih) * in_width + iw; 34 | int kernel_idx = ((c * in_channels + ic) * kernel_height + kh) * kernel_width + kw; 35 | 36 | sum += input[input_idx] * kernel[kernel_idx]; 37 | } 38 | } 39 | } 40 | } 41 | 42 | int output_idx = ((n * out_channels + c) * out_height + h) * out_width + w; 43 | output[output_idx] = sum; 44 | } 45 | } 46 | } 47 | } 48 | } 49 | 50 | // 新增的速度比较函数 51 | void compareConvolutionSpeed() { 52 | std::cout << "\n====== CPU vs GPU Convolution Speed Comparison ======\n"; 53 | 54 | // 定义不同的测试配置 55 | struct TestConfig { 56 | int batch_size; 57 | int in_channels; 58 | int in_height; 59 | int in_width; 60 | int out_channels; 61 | int kernel_size; 62 | int stride; 63 | int padding; 64 | }; 65 | 66 | std::vector configs = { 67 | {1, 3, 32, 32, 16, 3, 1, 1}, // 小尺寸 68 | {4, 3, 64, 64, 32, 3, 1, 1}, // 中等尺寸 69 | {16, 3, 128, 128, 64, 3, 1, 1}, // 大尺寸 70 | {32, 3, 224, 224, 64, 3, 1, 1}, // 类似VGG的输入 71 | {32, 64, 56, 56, 128, 3, 1, 1}, // 类似中层特征图 72 | {16, 128, 28, 28, 256, 3, 1, 1}, // 类似深层特征图 73 | }; 74 | 75 | // 表头 76 | std::cout << std::left << std::setw(20) << "配置" 77 | << std::setw(15) << "输入尺寸" 78 | << std::setw(15) << "内核尺寸" 79 | << std::setw(15) << "CPU时间(ms)" 80 | << std::setw(15) << "GPU时间(ms)" 81 | << std::setw(15) << "加速比(x)" 82 | << std::endl; 83 | std::cout << std::string(95, '-') << std::endl; 84 | 85 | // 对每个配置进行测试 86 | for (size_t i = 0; i < configs.size(); i++) { 87 | const auto& cfg = configs[i]; 88 | 89 | // 计算输出尺寸 90 | int out_height = ((cfg.in_height + 2 * cfg.padding - cfg.kernel_size) / cfg.stride) + 1; 91 | int out_width = ((cfg.in_width + 2 * cfg.padding - cfg.kernel_size) / cfg.stride) + 1; 92 | 93 | // 创建计算图和节点(GPU) 94 | ComputeGraph graph; 95 | ComputeNode* input = graph.addInput(cfg.batch_size * cfg.in_channels, cfg.in_height * cfg.in_width); 96 | input->batchSize = cfg.batch_size; 97 | ComputeNode* kernel = graph.addInput(cfg.out_channels, cfg.in_channels * cfg.kernel_size * cfg.kernel_size); 98 | 99 | // 随机初始化值 100 | for (int i = 0; i < input->value.row * input->value.col; i++) { 101 | input->value.data[i] = static_cast(rand()) / RAND_MAX; 102 | } 103 | 104 | for (int i = 0; i < kernel->value.row * kernel->value.col; i++) { 105 | kernel->value.data[i] = static_cast(rand()) / RAND_MAX * 0.1f; 106 | } 107 | 108 | // 创建卷积节点 109 | ComputeNode* conv = graph.addConv2D(input, kernel, cfg.in_height, cfg.in_width, cfg.in_channels, 110 | cfg.stride, cfg.stride, cfg.padding, cfg.padding); 111 | 112 | // 为CPU实现准备数据 113 | std::vector input_cpu(cfg.batch_size * cfg.in_channels * cfg.in_height * cfg.in_width); 114 | std::vector kernel_cpu(cfg.out_channels * cfg.in_channels * cfg.kernel_size * cfg.kernel_size); 115 | std::vector output_cpu(cfg.batch_size * cfg.out_channels * out_height * out_width, 0.0f); 116 | 117 | // 重塑输入数据为CPU格式 118 | for (int n = 0; n < cfg.batch_size; n++) { 119 | for (int c = 0; c < cfg.in_channels; c++) { 120 | for (int h = 0; h < cfg.in_height; h++) { 121 | for (int w = 0; w < cfg.in_width; w++) { 122 | int flat_idx = (n * cfg.in_channels + c) * (cfg.in_height * cfg.in_width) + (h * cfg.in_width + w); 123 | int tensor_idx = ((n * cfg.in_channels + c) * cfg.in_height + h) * cfg.in_width + w; 124 | input_cpu[tensor_idx] = input->value.data[flat_idx]; 125 | } 126 | } 127 | } 128 | } 129 | 130 | // 重塑内核数据为CPU格式 131 | for (int oc = 0; oc < cfg.out_channels; oc++) { 132 | for (int ic = 0; ic < cfg.in_channels; ic++) { 133 | for (int kh = 0; kh < cfg.kernel_size; kh++) { 134 | for (int kw = 0; kw < cfg.kernel_size; kw++) { 135 | int flat_idx = oc * (cfg.in_channels * cfg.kernel_size * cfg.kernel_size) + 136 | (ic * cfg.kernel_size * cfg.kernel_size + kh * cfg.kernel_size + kw); 137 | int tensor_idx = ((oc * cfg.in_channels + ic) * cfg.kernel_size + kh) * cfg.kernel_size + kw; 138 | kernel_cpu[tensor_idx] = kernel->value.data[flat_idx]; 139 | } 140 | } 141 | } 142 | } 143 | 144 | // 计时器 145 | std::chrono::duration gpu_duration; 146 | std::chrono::duration cpu_duration; 147 | 148 | // GPU卷积计时(多次运行取平均值) 149 | const int num_runs = 10; 150 | auto gpu_start = std::chrono::high_resolution_clock::now(); 151 | for (int run = 0; run < num_runs; run++) { 152 | graph.forward(); 153 | // 为避免结果被缓存,修改输入小量值 154 | input->value.data[0] += 1e-5f; 155 | } 156 | auto gpu_end = std::chrono::high_resolution_clock::now(); 157 | gpu_duration = (gpu_end - gpu_start) / num_runs; 158 | 159 | // CPU卷积计时(对于大尺寸配置减少运行次数) 160 | int cpu_runs = (i < 3) ? num_runs : 3; // 大尺寸配置少运行几次 161 | auto cpu_start = std::chrono::high_resolution_clock::now(); 162 | for (int run = 0; run < cpu_runs; run++) { 163 | conv2dCPU(input_cpu.data(), kernel_cpu.data(), output_cpu.data(), 164 | cfg.batch_size, cfg.in_channels, cfg.out_channels, 165 | cfg.in_height, cfg.in_width, cfg.kernel_size, cfg.kernel_size, 166 | out_height, out_width, cfg.stride, cfg.stride, 167 | cfg.padding, cfg.padding); 168 | } 169 | auto cpu_end = std::chrono::high_resolution_clock::now(); 170 | cpu_duration = (cpu_end - cpu_start) / cpu_runs; 171 | 172 | // 计算加速比 173 | double speedup = cpu_duration.count() / gpu_duration.count(); 174 | 175 | // 打印结果 176 | std::cout << std::left << std::setw(20) << "配置 " + std::to_string(i+1) 177 | << std::setw(15) << cfg.batch_size * cfg.in_channels * cfg.in_height * cfg.in_width 178 | << std::setw(15) << cfg.out_channels * cfg.in_channels * cfg.kernel_size * cfg.kernel_size 179 | << std::setw(15) << std::fixed << std::setprecision(2) << cpu_duration.count() 180 | << std::setw(15) << std::fixed << std::setprecision(2) << gpu_duration.count() 181 | << std::setw(15) << std::fixed << std::setprecision(2) << speedup 182 | << std::endl; 183 | } 184 | 185 | std::cout << "\nCPU和GPU卷积速度比较完成\n"; 186 | } 187 | 188 | // 更新main函数以包含速度比较 189 | int main() { 190 | // 设置随机数种子 191 | srand(42); 192 | 193 | // 运行速度比较测试 194 | // 配置 输入尺寸 内核尺寸 CPU时间(ms) GPU时间(ms) 加速比(x) 195 | // ----------------------------------------------------------------------------------------------- 196 | // 配置 1 3072 432 2.01 5.70 0.35 197 | // 配置 2 49152 864 65.87 6.36 10.35 198 | // 配置 3 786432 1728 2126.64 212.35 10.01 199 | // 配置 4 4816896 1728 12969.35 1319.69 9.83 200 | // 配置 5 6422528 73728 33261.28 387.02 85.94 201 | // 配置 6 1605632 294912 16521.94 140.25 117.80 202 | compareConvolutionSpeed(); 203 | 204 | return 0; 205 | } -------------------------------------------------------------------------------- /cuda_mat/matrix_cudadef.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define CHECK_CUDA_ERROR(val) check((val), #val, __FILE__, __LINE__) 10 | template 11 | void check(T err, const char* const func, const char* const file, const int line) { 12 | if (err != cudaSuccess) { 13 | std::cerr << "CUDA Runtime Error at: " << file << ":" << line << std::endl; 14 | std::cerr << cudaGetErrorString(err) << " " << func << std::endl; 15 | exit(1); 16 | } 17 | } 18 | 19 | struct Matrix_CU { 20 | int row; 21 | int col; 22 | float* data; 23 | 24 | Matrix_CU(int r, int c) : row(r), col(c) { 25 | data = new float[r * c](); 26 | } 27 | // 删除拷贝构造函数和拷贝赋值运算符 28 | Matrix_CU(const Matrix_CU& other) : row(other.row), col(other.col) { 29 | data = new float[row * col]; 30 | std::memcpy(data, other.data, row * col * sizeof(float)); 31 | } 32 | 33 | // 拷贝赋值运算符 34 | Matrix_CU& operator=(const Matrix_CU& other) { 35 | if (this != &other) { 36 | delete[] data; 37 | row = other.row; 38 | col = other.col; 39 | data = new float[row * col]; 40 | std::memcpy(data, other.data, row * col * sizeof(float)); 41 | } 42 | return *this; 43 | } 44 | ~Matrix_CU() { 45 | delete[] data; 46 | } 47 | 48 | void randomInit() { 49 | for (int i = 0; i < row * col; ++i) { 50 | data[i] = static_cast(rand()) / RAND_MAX; 51 | } 52 | } 53 | 54 | void printFirstElement() const { 55 | std::cout << "First element: " << data[0] << std::endl; 56 | } 57 | void printSubMatrix(int numRows, int numCols) const { 58 | for (int i = 0; i < numRows; ++i) { 59 | for (int j = 0; j < numCols; ++j) { 60 | std::cout << data[i * col + j] << " "; 61 | } 62 | std::cout << std::endl; 63 | } 64 | } 65 | 66 | }; 67 | float getMaxValue(const Matrix_CU& matrix) { 68 | float maxVal = matrix.data[0]; 69 | for (int i = 1; i < matrix.row * matrix.col; ++i) { 70 | if (matrix.data[i] > maxVal) { 71 | maxVal = matrix.data[i]; 72 | } 73 | } 74 | return maxVal; 75 | } 76 | float getMinValue(const Matrix_CU& matrix) { 77 | float minVal = matrix.data[0]; 78 | for (int i = 1; i < matrix.row * matrix.col; ++i) { 79 | if (matrix.data[i] < minVal) { 80 | minVal = matrix.data[i]; 81 | } 82 | } 83 | return minVal; 84 | } 85 | float getAverageValue(const Matrix_CU& matrix) { 86 | float sum = 0.0f; 87 | for (int i = 0; i < matrix.row * matrix.col; ++i) { 88 | sum += matrix.data[i]; 89 | } 90 | return sum / (matrix.row * matrix.col); 91 | } 92 | float getSum(const Matrix_CU& matrix) { 93 | float sum = 0.0f; 94 | for (int i = 0; i < matrix.row * matrix.col; ++i) { 95 | sum += matrix.data[i]; 96 | } 97 | return sum; 98 | } 99 | 100 | float getStandardDeviation(const Matrix_CU& matrix) { 101 | float mean = getAverageValue(matrix); 102 | float sumSquaredDifferences = 0.0f; 103 | 104 | for (int i = 0; i < matrix.row * matrix.col; ++i) { 105 | float diff = matrix.data[i] - mean; 106 | sumSquaredDifferences += diff * diff; 107 | } 108 | 109 | return sqrt(sumSquaredDifferences / (matrix.row * matrix.col)); 110 | } 111 | 112 | __global__ void matrixAddKernel(const float* A, const float* B, float* C, int rows, int cols) { 113 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 114 | int idy = blockIdx.y * blockDim.y + threadIdx.y; 115 | 116 | if (idx < rows && idy < cols) { 117 | int linear_idx = idx * cols + idy; 118 | C[linear_idx] = A[linear_idx] + B[linear_idx]; 119 | } 120 | } 121 | 122 | 123 | // 减法 124 | __global__ void matrixSubKernel(const float* A, const float* B, float* C, int rows, int cols) { 125 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 126 | int idy = blockIdx.y * blockDim.y + threadIdx.y; 127 | 128 | if (idx < rows && idy < cols) { 129 | int linear_idx = idx * cols + idy; 130 | C[linear_idx] = A[linear_idx] - B[linear_idx]; 131 | } 132 | } 133 | 134 | // 乘法 135 | __global__ void matrixMulKernel(const float* A, const float* B, float* C, int rows, int cols) { 136 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 137 | int idy = blockIdx.y * blockDim.y + threadIdx.y; 138 | 139 | if (idx < rows && idy < cols) { 140 | int linear_idx = idx * cols + idy; 141 | C[linear_idx] = A[linear_idx] * B[linear_idx]; 142 | } 143 | } 144 | 145 | // 除法(注意避免除零错误) 146 | __global__ void matrixDivKernel(const float* A, const float* B, float* C, int rows, int cols) { 147 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 148 | int idy = blockIdx.y * blockDim.y + threadIdx.y; 149 | 150 | if (idx < rows && idy < cols) { 151 | int linear_idx = idx * cols + idy; 152 | C[linear_idx] = (B[linear_idx] != 0.0f) ? (A[linear_idx] / B[linear_idx]) : 0.0f; 153 | } 154 | } 155 | // 修改为直接操作输出矩阵,避免拷贝 156 | void matrixAddCUDA(const Matrix_CU& A, const Matrix_CU& B, Matrix_CU& C) { 157 | assert(A.row == B.row && A.col == B.col); 158 | assert(A.row == C.row && A.col == C.col); 159 | 160 | const int rows = A.row; 161 | const int cols = A.col; 162 | size_t size = rows * cols * sizeof(float); 163 | 164 | float *d_A, *d_B, *d_C; 165 | 166 | // 1. 分配设备内存 167 | CHECK_CUDA_ERROR(cudaMalloc(&d_A, size)); 168 | CHECK_CUDA_ERROR(cudaMalloc(&d_B, size)); 169 | CHECK_CUDA_ERROR(cudaMalloc(&d_C, size)); 170 | 171 | // 2. 拷贝数据到设备 172 | CHECK_CUDA_ERROR(cudaMemcpy(d_A, A.data, size, cudaMemcpyHostToDevice)); 173 | CHECK_CUDA_ERROR(cudaMemcpy(d_B, B.data, size, cudaMemcpyHostToDevice)); 174 | 175 | // 3. 配置内核参数 176 | dim3 blockSize(16, 16); // 256 threads per block 177 | dim3 gridSize((cols + blockSize.x - 1) / blockSize.x, 178 | (rows + blockSize.y - 1) / blockSize.y); 179 | auto start = std::chrono::high_resolution_clock::now(); 180 | // 4. 启动核函数 181 | matrixAddKernel<<>>(d_A, d_B, d_C, rows, cols); 182 | auto end = std::chrono::high_resolution_clock::now(); 183 | std::chrono::duration elapsed = end - start; 184 | std::cout << "GPU执行时间: " << elapsed.count() * 1000 << " ms\n"; 185 | CHECK_CUDA_ERROR(cudaGetLastError()); // 检查内核启动错误 186 | 187 | // 5. 拷贝结果回主机 188 | CHECK_CUDA_ERROR(cudaMemcpy(C.data, d_C, size, cudaMemcpyDeviceToHost)); 189 | 190 | // 6. 释放设备内存 191 | CHECK_CUDA_ERROR(cudaFree(d_A)); 192 | CHECK_CUDA_ERROR(cudaFree(d_B)); 193 | CHECK_CUDA_ERROR(cudaFree(d_C)); 194 | } 195 | void matrixSubCUDA(const Matrix_CU& A, const Matrix_CU& B, Matrix_CU& C) { 196 | assert(A.row == B.row && A.col == B.col); 197 | assert(A.row == C.row && A.col == C.col); 198 | 199 | const int rows = A.row; 200 | const int cols = A.col; 201 | size_t size = rows * cols * sizeof(float); 202 | 203 | float *d_A, *d_B, *d_C; 204 | 205 | CHECK_CUDA_ERROR(cudaMalloc(&d_A, size)); 206 | CHECK_CUDA_ERROR(cudaMalloc(&d_B, size)); 207 | CHECK_CUDA_ERROR(cudaMalloc(&d_C, size)); 208 | 209 | CHECK_CUDA_ERROR(cudaMemcpy(d_A, A.data, size, cudaMemcpyHostToDevice)); 210 | CHECK_CUDA_ERROR(cudaMemcpy(d_B, B.data, size, cudaMemcpyHostToDevice)); 211 | 212 | dim3 blockSize(16, 16); 213 | dim3 gridSize((cols + blockSize.x - 1) / blockSize.x, 214 | (rows + blockSize.y - 1) / blockSize.y); 215 | auto start = std::chrono::high_resolution_clock::now(); 216 | 217 | matrixSubKernel<<>>(d_A, d_B, d_C, rows, cols); 218 | 219 | auto end = std::chrono::high_resolution_clock::now(); 220 | std::chrono::duration elapsed = end - start; 221 | std::cout << "GPU执行时间 (减法): " << elapsed.count() * 1000 << " ms\n"; 222 | 223 | CHECK_CUDA_ERROR(cudaGetLastError()); 224 | CHECK_CUDA_ERROR(cudaMemcpy(C.data, d_C, size, cudaMemcpyDeviceToHost)); 225 | 226 | CHECK_CUDA_ERROR(cudaFree(d_A)); 227 | CHECK_CUDA_ERROR(cudaFree(d_B)); 228 | CHECK_CUDA_ERROR(cudaFree(d_C)); 229 | } 230 | 231 | void matrixMulCUDA(const Matrix_CU& A, const Matrix_CU& B, Matrix_CU& C) { 232 | assert(A.row == B.row && A.col == B.col); 233 | assert(A.row == C.row && A.col == C.col); 234 | 235 | const int rows = A.row; 236 | const int cols = A.col; 237 | size_t size = rows * cols * sizeof(float); 238 | 239 | float *d_A, *d_B, *d_C; 240 | 241 | CHECK_CUDA_ERROR(cudaMalloc(&d_A, size)); 242 | CHECK_CUDA_ERROR(cudaMalloc(&d_B, size)); 243 | CHECK_CUDA_ERROR(cudaMalloc(&d_C, size)); 244 | 245 | CHECK_CUDA_ERROR(cudaMemcpy(d_A, A.data, size, cudaMemcpyHostToDevice)); 246 | CHECK_CUDA_ERROR(cudaMemcpy(d_B, B.data, size, cudaMemcpyHostToDevice)); 247 | 248 | dim3 blockSize(16, 16); 249 | dim3 gridSize((cols + blockSize.x - 1) / blockSize.x, 250 | (rows + blockSize.y - 1) / blockSize.y); 251 | auto start = std::chrono::high_resolution_clock::now(); 252 | 253 | matrixMulKernel<<>>(d_A, d_B, d_C, rows, cols); 254 | 255 | auto end = std::chrono::high_resolution_clock::now(); 256 | std::chrono::duration elapsed = end - start; 257 | std::cout << "GPU执行时间 (乘法): " << elapsed.count() * 1000 << " ms\n"; 258 | 259 | CHECK_CUDA_ERROR(cudaGetLastError()); 260 | CHECK_CUDA_ERROR(cudaMemcpy(C.data, d_C, size, cudaMemcpyDeviceToHost)); 261 | 262 | CHECK_CUDA_ERROR(cudaFree(d_A)); 263 | CHECK_CUDA_ERROR(cudaFree(d_B)); 264 | CHECK_CUDA_ERROR(cudaFree(d_C)); 265 | } 266 | 267 | void matrixDivCUDA(const Matrix_CU& A, const Matrix_CU& B, Matrix_CU& C) { 268 | assert(A.row == B.row && A.col == B.col); 269 | assert(A.row == C.row && A.col == C.col); 270 | 271 | const int rows = A.row; 272 | const int cols = A.col; 273 | size_t size = rows * cols * sizeof(float); 274 | 275 | float *d_A, *d_B, *d_C; 276 | 277 | CHECK_CUDA_ERROR(cudaMalloc(&d_A, size)); 278 | CHECK_CUDA_ERROR(cudaMalloc(&d_B, size)); 279 | CHECK_CUDA_ERROR(cudaMalloc(&d_C, size)); 280 | 281 | CHECK_CUDA_ERROR(cudaMemcpy(d_A, A.data, size, cudaMemcpyHostToDevice)); 282 | CHECK_CUDA_ERROR(cudaMemcpy(d_B, B.data, size, cudaMemcpyHostToDevice)); 283 | 284 | dim3 blockSize(16, 16); 285 | dim3 gridSize((cols + blockSize.x - 1) / blockSize.x, 286 | (rows + blockSize.y - 1) / blockSize.y); 287 | auto start = std::chrono::high_resolution_clock::now(); 288 | 289 | matrixDivKernel<<>>(d_A, d_B, d_C, rows, cols); 290 | 291 | auto end = std::chrono::high_resolution_clock::now(); 292 | std::chrono::duration elapsed = end - start; 293 | std::cout << "GPU执行时间 (除法): " << elapsed.count() * 1000 << " ms\n"; 294 | 295 | CHECK_CUDA_ERROR(cudaGetLastError()); 296 | CHECK_CUDA_ERROR(cudaMemcpy(C.data, d_C, size, cudaMemcpyDeviceToHost)); 297 | 298 | CHECK_CUDA_ERROR(cudaFree(d_A)); 299 | CHECK_CUDA_ERROR(cudaFree(d_B)); 300 | CHECK_CUDA_ERROR(cudaFree(d_C)); 301 | } 302 | Matrix_CU matrixAddCPU(const Matrix_CU& A, const Matrix_CU& B) { 303 | assert(A.row == B.row && A.col == B.col); 304 | 305 | Matrix_CU C(A.row, A.col); 306 | 307 | auto start = std::chrono::high_resolution_clock::now(); 308 | 309 | for (int i = 0; i < A.row * A.col; ++i) { 310 | C.data[i] = A.data[i] + B.data[i]; 311 | } 312 | 313 | auto end = std::chrono::high_resolution_clock::now(); 314 | std::chrono::duration elapsed = end - start; 315 | std::cout << "CPU执行时间: " << elapsed.count() * 1000 << " ms\n"; 316 | 317 | return C; 318 | } 319 | Matrix_CU matrixSubCPU(const Matrix_CU& A, const Matrix_CU& B) { 320 | assert(A.row == B.row && A.col == B.col); 321 | 322 | Matrix_CU C(A.row, A.col); 323 | 324 | auto start = std::chrono::high_resolution_clock::now(); 325 | 326 | for (int i = 0; i < A.row * A.col; ++i) { 327 | C.data[i] = A.data[i] - B.data[i]; 328 | } 329 | 330 | auto end = std::chrono::high_resolution_clock::now(); 331 | std::chrono::duration elapsed = end - start; 332 | std::cout << "CPU执行时间 (减法): " << elapsed.count() * 1000 << " ms\n"; 333 | 334 | return C; 335 | } 336 | 337 | Matrix_CU matrixMulCPU(const Matrix_CU& A, const Matrix_CU& B) { 338 | assert(A.row == B.row && A.col == B.col); 339 | 340 | Matrix_CU C(A.row, A.col); 341 | 342 | auto start = std::chrono::high_resolution_clock::now(); 343 | 344 | for (int i = 0; i < A.row * A.col; ++i) { 345 | C.data[i] = A.data[i] * B.data[i]; 346 | } 347 | 348 | auto end = std::chrono::high_resolution_clock::now(); 349 | std::chrono::duration elapsed = end - start; 350 | std::cout << "CPU执行时间 (乘法): " << elapsed.count() * 1000 << " ms\n"; 351 | 352 | return C; 353 | } 354 | 355 | Matrix_CU matrixDivCPU(const Matrix_CU& A, const Matrix_CU& B) { 356 | assert(A.row == B.row && A.col == B.col); 357 | 358 | Matrix_CU C(A.row, A.col); 359 | 360 | auto start = std::chrono::high_resolution_clock::now(); 361 | 362 | for (int i = 0; i < A.row * A.col; ++i) { 363 | C.data[i] = (B.data[i] != 0.0f) ? (A.data[i] / B.data[i]) : 0.0f; 364 | } 365 | 366 | auto end = std::chrono::high_resolution_clock::now(); 367 | std::chrono::duration elapsed = end - start; 368 | std::cout << "CPU执行时间 (除法): " << elapsed.count() * 1000 << " ms\n"; 369 | 370 | return C; 371 | } 372 | void testMatrix_CUAddition() { 373 | const int rows = 10240; 374 | const int cols = 10240; 375 | std::cout << "测试矩阵大小: " << rows << "x" << cols << "\n"; 376 | 377 | Matrix_CU A(rows, cols); 378 | Matrix_CU B(rows, cols); 379 | Matrix_CU C_gpu(rows, cols); // GPU结果矩阵 380 | 381 | A.randomInit(); 382 | B.randomInit(); 383 | 384 | std::cout << "矩阵A: "; 385 | A.printFirstElement(); 386 | std::cout << "矩阵B: "; 387 | B.printFirstElement(); 388 | 389 | std::cout << "\nCPU版本:\n"; 390 | Matrix_CU C_cpu = matrixAddCPU(A, B); 391 | std::cout << "CPU结果: "; 392 | C_cpu.printFirstElement(); 393 | 394 | std::cout << "\nGPU版本:\n"; 395 | matrixAddCUDA(A, B, C_gpu); 396 | std::cout << "GPU结果: "; 397 | C_gpu.printFirstElement(); 398 | std::cout<< "标准差:" << getStandardDeviation(C_gpu); 399 | std::cout << "最大值: " << getMaxValue(C_gpu) << std::endl; 400 | std::cout << "最小值: " << getMinValue(C_gpu) << std::endl; 401 | std::cout << "平均值: " << getAverageValue(C_gpu) << std::endl; 402 | std::cout << "和: " << getSum(C_gpu) << std::endl; 403 | std::cout << "标准差: " << getStandardDeviation(C_gpu) << std::endl; 404 | // 验证结果 405 | bool correct = true; 406 | for (int i = 0; i < 10 && correct; ++i) { 407 | if (fabs(C_cpu.data[i] - C_gpu.data[i]) > 1e-5) { 408 | std::cout << "结果不匹配在索引 " << i 409 | << ": CPU=" << C_cpu.data[i] 410 | << ", GPU=" << C_gpu.data[i] << "\n"; 411 | correct = false; 412 | } 413 | } 414 | 415 | if (correct) { 416 | std::cout << "前10个元素验证成功!\n"; 417 | } 418 | 419 | } 420 | 421 | // int main() { 422 | // // 初始化CUDA设备 423 | // cudaDeviceProp prop; 424 | // CHECK_CUDA_ERROR(cudaGetDeviceProperties(&prop, 0)); 425 | // std::cout << "使用GPU: " << prop.name << "\n"; 426 | 427 | // // testMatrix_CUAddition(); 428 | // Matrix_CU A(10240, 10240); // 假设有 1024x1024 矩阵 429 | // Matrix_CU B(10240, 10240); 430 | // Matrix_CU C(10240, 10240); 431 | 432 | // matrixSubCUDA(A, B, C); 433 | // matrixMulCUDA(A, B, C); 434 | // matrixDivCUDA(A, B, C); 435 | 436 | // Matrix_CU C_cpu1 = matrixSubCPU(A, B); 437 | // std::cout << "CPU结果: "; 438 | // C_cpu1.printFirstElement(); 439 | // std::cout << "GPU结果: "; 440 | // C.printFirstElement(); 441 | // Matrix_CU C_cpu2 = matrixMulCPU(A, B); 442 | // std::cout << "CPU结果: "; 443 | // C_cpu2.printFirstElement(); 444 | // std::cout << "GPU结果: "; 445 | // C.printFirstElement(); 446 | // Matrix_CU C_cpu3 = matrixDivCPU(A, B); 447 | // std::cout << "CPU结果: "; 448 | // C_cpu3.printFirstElement(); 449 | // std::cout << "GPU结果: "; 450 | // C.printFirstElement(); 451 | // return 0; 452 | 453 | 454 | // } -------------------------------------------------------------------------------- /edge_layer_unit_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ███████╗██████╗ ██████╗ ███████╗ ███████╗███╗ ██╗ ██████╗ ██╗███╗ ██╗███████╗ 3 | ██╔════╝██╔══██╗██╔════╝ ██╔════╝ ██╔════╝████╗ ██║██╔════╝ ██║████╗ ██║██╔════╝ 4 | █████╗ ██║ ██║██║ ███╗█████╗ █████╗ ██╔██╗ ██║██║ ███╗██║██╔██╗ ██║█████╗ 5 | ██╔══╝ ██║ ██║██║ ██║██╔══╝ ██╔══╝ ██║╚██╗██║██║ ██║██║██║╚██╗██║██╔══╝ 6 | ███████╗██████╔╝╚██████╔╝███████╗ ███████╗██║ ╚████║╚██████╔╝██║██║ ╚████║███████╗ 7 | ╚══════╝╚═════╝ ╚═════╝ ╚══════╝ ╚══════╝╚═╝ ╚═══╝ ╚═════╝ ╚═╝╚═╝ ╚═══╝╚══════╝ 8 | Author:Edge 9 | Web:likedge.top 10 | Date:20200925 11 | */ 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "./matrix/matrix_def.h" 19 | #include "./matrix/matrix_pro.h" 20 | #include "./logistic/logistic_def.h" 21 | #include "./autodiff/node.h" 22 | #include "./grad_edge/matrix_grad.h" 23 | #include "./root/include/edgelayer.h" 24 | #include "./welcome/score_wel.cpp" 25 | // #include 26 | 27 | using namespace std; 28 | 29 | clock_t start_t, end_t; 30 | double duration; 31 | // #include"opencv4/opencv2/imgcodecs.hpp" 32 | // #include 33 | // using namespace cv; 34 | 35 | int main() 36 | { 37 | welcome(); 38 | cout << setprecision(13); 39 | start_t = clock(); 40 | Matrix4d mat_call = CreateMatrix4d(2, 3, 640, 640); 41 | print_shape(mat_call); 42 | edge_layer *conv2d_1 = new conv2d(mat_call, 3, 12, 1, 9, 0, 5); 43 | Matrix4d output_conv = conv2d_1->forward(mat_call); 44 | print_shape(output_conv); 45 | int parameter_1 = conv2d_1->parameter_counter(); 46 | 47 | edge_layer *conv2d_2 = new conv2d(output_conv, 12, 24, 7, 7, 0, 0); 48 | Matrix4d output_conv2 = conv2d_2->forward(output_conv); 49 | print_shape(output_conv2); 50 | std::cout << "EDGE Matrix Shape: (" << output_conv2.batch << "," << output_conv2.dep << "," 51 | << output_conv2.wid << "," << output_conv2.high << ")" << std::endl; 52 | parameter_1 += conv2d_2->parameter_counter(); 53 | 54 | edge_layer *conv2d_3 = new conv2d(output_conv2, 24, 1, 3, 3, 0, 0); 55 | Matrix4d output_conv3 = conv2d_3->forward(output_conv2); 56 | print_shape(output_conv3); 57 | parameter_1 += conv2d_3->parameter_counter(); 58 | end_t = clock(); 59 | cout << "total parameters :" << parameter_1 << endl; 60 | double total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC; 61 | printf("CPUduration:%f\n", total_t); 62 | } -------------------------------------------------------------------------------- /grad_edge/matrix_grad.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include"./autodiff/node.h" 8 | #include"../matrix/matrix_def.h" 9 | #include"../matrix/matrix_pro.h" 10 | using namespace std; 11 | float sigmoid(float x){ 12 | return 1/(1+exp(x)); 13 | } 14 | Node sigmoid(Node z){ 15 | Node sigmoid = 1/(1+(1/exp(z))); 16 | return sigmoid; 17 | } 18 | struct edge_network 19 | { 20 | edge_network(int input, int num_neuron){ 21 | } 22 | Matrix forward(Matrix data,Matrix weights,Matrix bais) 23 | { 24 | // cout<<"data: "; 25 | // cout_mat(data); 26 | // cout<<"weights: "< 2 | #include 3 | 4 | #include "../../root/include/vectmath.h" 5 | #include "../../root/include/node.h" 6 | 7 | typedef std::vector Vector; 8 | typedef std::vector Matrix; 9 | 10 | Node random_number(){ 11 | return rand()/(double)RAND_MAX; 12 | } 13 | 14 | Node tan_h(Node& x){ 15 | return (1-exp(-2*x))/(1+exp(-2*x)); 16 | } 17 | 18 | Node mean_square_error(Vector& y_true, Vector& y_pred){ 19 | Node loss; 20 | for(size_t i=0 ; iactivation = activation; 35 | this->input_shape = input; 36 | this->output_shape = output; 37 | weights.resize(input, Vector(output)); 38 | bias.resize(1, Vector(output)); 39 | random_number >> weights; 40 | random_number >> bias; 41 | } 42 | 43 | Matrix forward(Matrix& previous){ 44 | Matrix output = dot(previous, weights) + bias; 45 | output = activation >> output; 46 | return output; 47 | } 48 | 49 | void backward(Node& loss, const float& learning_rate){ 50 | weights -= learning_rate*loss.gradient(weights); 51 | bias -= learning_rate*loss.gradient(bias); 52 | } 53 | }; 54 | 55 | struct Network { 56 | std::vector layers; 57 | int input_shape; 58 | Graph* graph; 59 | 60 | Network(){ 61 | graph = Graph::getInstance(); 62 | } 63 | 64 | void input_layer(int input_shape){ 65 | this->input_shape = input_shape; 66 | } 67 | 68 | void add(int output_shape, Node (*activation)(Node&)){ 69 | int input = layers.empty()?input_shape:layers.back().output_shape; 70 | layers.push_back(Layer(input, output_shape, activation)); 71 | } 72 | 73 | Matrix run(Matrix& input){ 74 | Matrix output(input.size()); 75 | for(size_t j=0 ; jnew_recording(); 105 | } 106 | } 107 | std::cout << std::endl; 108 | } 109 | }; 110 | 111 | int main(int argc, char const *argv[]) { 112 | srand(time(NULL)); 113 | 114 | Matrix input = {{0,0},{0,1},{1,0},{1,1}}; 115 | Matrix output = {{0},{1},{1},{0}}; 116 | 117 | Network network; 118 | network.input_layer(2); 119 | network.add(3, tan_h); 120 | network.add(1, tan_h); 121 | network.fit(input, output, mean_square_error, 500, 0.1); 122 | 123 | Matrix pred = network.run(input); 124 | std::cout << pred << std::endl; 125 | return 0; 126 | } 127 | -------------------------------------------------------------------------------- /install_diff/examples/src/gradient.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../../root/include/vectmath.h" 4 | #include "../../root/include/node.h" 5 | 6 | Node function(std::vector x){ 7 | return pow(x[0]-x[1], 2) + x[0]*x[1]*x[2]; // (x-y)^2 + x*y*z 8 | } 9 | 10 | int main(int argc, char const *argv[]) { 11 | std::vector x = {5,6,7}; 12 | Node f = function(x); 13 | std::cout << "grad(f) = " << f.gradient(x) << std::endl; 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /install_diff/examples/src/gradient_descent.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../../root/include/vectmath.h" 4 | #include "../../root/include/node.h" 5 | 6 | Node function(std::vector& x){ 7 | return pow(x[0], 2) + pow(x[1], 2); // x^2 + y^2 8 | } 9 | 10 | int main(int argc, char const *argv[]) { 11 | Graph* graph = Graph::getInstance(); 12 | 13 | std::vector x = {50, 50}; 14 | Node f; 15 | 16 | int epochs = 30; 17 | float learning_rate = 0.1; 18 | for(size_t i=0 ; inew_recording(); 22 | } 23 | 24 | std::cout << "f = " << f << std::endl; 25 | std::cout << "x = " << x << std::endl; 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /install_diff/examples/src/simple.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../../root/include/node.h" 3 | 4 | int main(int argc, char const *argv[]) { 5 | Node x=2, y=3; 6 | Node f = x*y + sin(x); 7 | 8 | std::cout << "f(x,y) = x*y + sin(x)" << std::endl; 9 | std::cout << "f(" << x << "," << y << ") = " << f << std::endl; 10 | std::cout << "∂f/∂x = " << f.gradient(x) << std::endl; 11 | std::cout << "∂f/∂y = " << f.gradient(y) << std::endl; 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /install_diff/examples/src/speed.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "../../root/include/vectmath.h" 8 | #include "../../root/include/node.h" 9 | 10 | template 11 | std::vector > get_random_matrix(const int& height, const int& width, T t){ 12 | std::vector > mat(height, std::vector(width)); 13 | for(auto& v : mat){ 14 | for(auto& e : v){ 15 | e = rand()/(double)RAND_MAX; 16 | } 17 | } 18 | return mat; 19 | } 20 | 21 | int main(int argc, char const *argv[]) { 22 | srand(time(0)); 23 | 24 | int size = 30; 25 | std::vector > a = get_random_matrix(size, size, double()); 26 | std::vector > b = get_random_matrix(size, size, double()); 27 | std::vector > c = get_random_matrix(size, size, Node()); 28 | std::vector > d = get_random_matrix(size, size, Node()); 29 | 30 | std::cout << std::fixed; 31 | std::cout << std::setprecision(10); 32 | 33 | std::cout << "Running with double...\t"; 34 | std::cout.flush(); 35 | auto start = std::chrono::high_resolution_clock::now(); 36 | std::vector > ab = dot(a, b); 37 | auto finish = std::chrono::high_resolution_clock::now(); 38 | std::chrono::duration elapsed = finish - start; 39 | std::cout << "Elapsed time: " << elapsed.count() << " s" << std::endl; 40 | 41 | std::cout << "Running with Node...\t"; 42 | std::cout.flush(); 43 | start = std::chrono::high_resolution_clock::now(); 44 | std::vector > cd = dot(c, d); 45 | finish = std::chrono::high_resolution_clock::now(); 46 | elapsed = finish - start; 47 | std::cout << "Elapsed time: " << elapsed.count() << " s" << std::endl; 48 | 49 | std::cout << "Yet to be improved..." << std::endl; 50 | return 0; 51 | } 52 | -------------------------------------------------------------------------------- /install_diff/root/Makefile: -------------------------------------------------------------------------------- 1 | CXX = g++ 2 | ODIR = obj 3 | CXXFLAGS = -std=c++11 -O3 4 | OBJS = $(ODIR)/graph.o $(ODIR)/node.o 5 | 6 | all : $(ODIR) $(OBJS) 7 | 8 | $(ODIR)/graph.o : src/graph.cpp include/graph.h 9 | $(CXX) -c $< -o $@ $(CXXFLAGS) 10 | 11 | $(ODIR)/node.o : src/node.cpp include/node.h 12 | $(CXX) -c $< -o $@ $(CXXFLAGS) 13 | 14 | $(ODIR) : 15 | if [ ! -d $(ODIR) ]; then mkdir $(ODIR); fi 16 | 17 | clean : 18 | if [ -d $(ODIR) ]; then rm $(ODIR) -r; fi 19 | 20 | .PHONY : all 21 | .PHONY : clean 22 | 23 | -------------------------------------------------------------------------------- /install_diff/root/include/dor 2.h: -------------------------------------------------------------------------------- 1 | #ifndef DYADIC_OPERATION_RESULT 2 | #define DYADIC_OPERATION_RESULT 3 | 4 | struct DyadicOperationResult { 5 | double value; 6 | double left_grad; 7 | double right_grad; 8 | 9 | DyadicOperationResult(double value, double left_grad, double right_grad){ 10 | this->value = value; 11 | this->left_grad = left_grad; 12 | this->right_grad = right_grad; 13 | } 14 | }; 15 | 16 | #endif /* end of include guard: DYADIC_OPERATION_RESULT */ 17 | -------------------------------------------------------------------------------- /install_diff/root/include/dor.h: -------------------------------------------------------------------------------- 1 | #ifndef DYADIC_OPERATION_RESULT 2 | #define DYADIC_OPERATION_RESULT 3 | 4 | struct DyadicOperationResult { 5 | double value; 6 | double left_grad; 7 | double right_grad; 8 | 9 | DyadicOperationResult(double value, double left_grad, double right_grad){ 10 | this->value = value; 11 | this->left_grad = left_grad; 12 | this->right_grad = right_grad; 13 | } 14 | }; 15 | 16 | #endif /* end of include guard: DYADIC_OPERATION_RESULT */ 17 | -------------------------------------------------------------------------------- /install_diff/root/include/graph 2.h: -------------------------------------------------------------------------------- 1 | #ifndef GRAPH_H 2 | #define GRAPH_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class Graph { 9 | private: 10 | std::map > > nodes; 11 | static Graph* instance; 12 | Graph(); 13 | 14 | public: 15 | static long int uid_counter; 16 | static long int uid(); 17 | static Graph* getInstance(); 18 | 19 | void connect(const long int& uid, const std::pair& edge); 20 | std::vector > get(const long int& uid) const; 21 | bool has(const long int& uid) const; 22 | 23 | void new_recording(); 24 | }; 25 | 26 | #endif /* end of include guard: GRAPH_H */ 27 | -------------------------------------------------------------------------------- /install_diff/root/include/graph.h: -------------------------------------------------------------------------------- 1 | #ifndef GRAPH_H 2 | #define GRAPH_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class Graph { 9 | private: 10 | std::map > > nodes; 11 | static Graph* instance; 12 | Graph(); 13 | 14 | public: 15 | static long int uid_counter; 16 | static long int uid(); 17 | static Graph* getInstance(); 18 | 19 | void connect(const long int& uid, const std::pair& edge); 20 | std::vector > get(const long int& uid) const; 21 | bool has(const long int& uid) const; 22 | 23 | void new_recording(); 24 | }; 25 | 26 | #endif /* end of include guard: GRAPH_H */ 27 | -------------------------------------------------------------------------------- /install_diff/root/include/mor 2.h: -------------------------------------------------------------------------------- 1 | #ifndef MONADIC_OPERATION_RESULT 2 | #define MONADIC_OPERATION_RESULT 3 | 4 | struct MonadicOperationResult { 5 | double value; 6 | double grad; 7 | 8 | MonadicOperationResult(double value, double grad){ 9 | this->value = value; 10 | this->grad = grad; 11 | } 12 | }; 13 | 14 | #endif /* end of include guard: MONADIC_OPERATION_RESULT */ 15 | -------------------------------------------------------------------------------- /install_diff/root/include/mor.h: -------------------------------------------------------------------------------- 1 | #ifndef MONADIC_OPERATION_RESULT 2 | #define MONADIC_OPERATION_RESULT 3 | 4 | struct MonadicOperationResult { 5 | double value; 6 | double grad; 7 | 8 | MonadicOperationResult(double value, double grad){ 9 | this->value = value; 10 | this->grad = grad; 11 | } 12 | }; 13 | 14 | #endif /* end of include guard: MONADIC_OPERATION_RESULT */ 15 | -------------------------------------------------------------------------------- /install_diff/root/include/node 2.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_H 2 | #define NODE_H 3 | 4 | #include 5 | #include 6 | 7 | #include "graph.h" 8 | #include "mor.h" 9 | #include "dor.h" 10 | #include "por.h" 11 | 12 | class Node { 13 | private: 14 | double value; 15 | long int uid; 16 | 17 | double gradient_recursive(Graph* graph, const long int& current_uid, const long int& stop_uid) const; 18 | 19 | public: 20 | Node(const double& value=0); 21 | Node(const Node& node); 22 | 23 | static Node monadic_operation(const Node& n, MonadicOperationResult (*)(const double&)); 24 | static Node dyadic_operation(const Node& l, const Node& r, DyadicOperationResult (*)(const double&, const double&)); 25 | static Node polyadic_operation(const std::vector& nodes, PolyadicOperationResult (*)(const std::vector&)); 26 | 27 | double gradient(const Node& node) const; 28 | std::vector gradient(const std::vector& nodes) const; 29 | std::vector > gradient(const std::vector >& nodes) const; 30 | 31 | friend Node operator+(const Node& l, const Node& r); 32 | friend Node operator-(const Node& l, const Node& r); 33 | friend Node operator*(const Node& l, const Node& r); 34 | friend Node operator/(const Node& l, const Node& r); 35 | 36 | Node& operator+=(const Node& r); 37 | Node& operator-=(const Node& r); 38 | Node& operator*=(const Node& r); 39 | Node& operator/=(const Node& r); 40 | 41 | friend bool operator==(const Node& l, const Node& r); 42 | friend bool operator<(const Node& l, const Node& r); 43 | friend bool operator>(const Node& l, const Node& r); 44 | friend bool operator<=(const Node& l, const Node& r); 45 | friend bool operator>=(const Node& l, const Node& r); 46 | 47 | friend Node sin(const Node& x); 48 | friend Node cos(const Node& x); 49 | friend Node tan(const Node& x); 50 | friend Node sinh(const Node& x); 51 | friend Node cosh(const Node& x); 52 | friend Node tanh(const Node& x); 53 | friend Node asin(const Node& x); 54 | friend Node acos(const Node& x); 55 | friend Node atan(const Node& x); 56 | 57 | friend Node log(const Node& x, const Node& base); 58 | friend Node log10(const Node& x); 59 | friend Node ln(const Node& x); 60 | 61 | friend Node pow(const Node& x, const Node& p); 62 | friend Node exp(const Node& x); 63 | friend Node sqrt(const Node& x); 64 | 65 | friend Node abs(const Node& x); 66 | friend Node min(const Node& l, const Node& r); 67 | friend Node max(const Node& l, const Node& r); 68 | 69 | friend std::ostream& operator<<(std::ostream& os, const Node& node); 70 | }; 71 | 72 | #endif /* end of include guard: NODE_H */ 73 | -------------------------------------------------------------------------------- /install_diff/root/include/node.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_H 2 | #define NODE_H 3 | 4 | #include 5 | #include 6 | 7 | #include "graph.h" 8 | #include "mor.h" 9 | #include "dor.h" 10 | #include "por.h" 11 | 12 | class Node { 13 | private: 14 | double value; 15 | long int uid; 16 | 17 | double gradient_recursive(Graph* graph, const long int& current_uid, const long int& stop_uid) const; 18 | 19 | public: 20 | Node(const double& value=0); 21 | Node(const Node& node); 22 | 23 | static Node monadic_operation(const Node& n, MonadicOperationResult (*)(const double&)); 24 | static Node dyadic_operation(const Node& l, const Node& r, DyadicOperationResult (*)(const double&, const double&)); 25 | static Node polyadic_operation(const std::vector& nodes, PolyadicOperationResult (*)(const std::vector&)); 26 | 27 | double gradient(const Node& node) const; 28 | std::vector gradient(const std::vector& nodes) const; 29 | std::vector > gradient(const std::vector >& nodes) const; 30 | 31 | friend Node operator+(const Node& l, const Node& r); 32 | friend Node operator-(const Node& l, const Node& r); 33 | friend Node operator*(const Node& l, const Node& r); 34 | friend Node operator/(const Node& l, const Node& r); 35 | 36 | Node& operator+=(const Node& r); 37 | Node& operator-=(const Node& r); 38 | Node& operator*=(const Node& r); 39 | Node& operator/=(const Node& r); 40 | 41 | friend bool operator==(const Node& l, const Node& r); 42 | friend bool operator<(const Node& l, const Node& r); 43 | friend bool operator>(const Node& l, const Node& r); 44 | friend bool operator<=(const Node& l, const Node& r); 45 | friend bool operator>=(const Node& l, const Node& r); 46 | 47 | friend Node sin(const Node& x); 48 | friend Node cos(const Node& x); 49 | friend Node tan(const Node& x); 50 | friend Node sinh(const Node& x); 51 | friend Node cosh(const Node& x); 52 | friend Node tanh(const Node& x); 53 | friend Node asin(const Node& x); 54 | friend Node acos(const Node& x); 55 | friend Node atan(const Node& x); 56 | 57 | friend Node log(const Node& x, const Node& base); 58 | friend Node log10(const Node& x); 59 | friend Node ln(const Node& x); 60 | 61 | friend Node pow(const Node& x, const Node& p); 62 | friend Node exp(const Node& x); 63 | friend Node sqrt(const Node& x); 64 | 65 | friend Node abs(const Node& x); 66 | friend Node min(const Node& l, const Node& r); 67 | friend Node max(const Node& l, const Node& r); 68 | 69 | friend std::ostream& operator<<(std::ostream& os, const Node& node); 70 | }; 71 | 72 | #endif /* end of include guard: NODE_H */ 73 | -------------------------------------------------------------------------------- /install_diff/root/include/por 2.h: -------------------------------------------------------------------------------- 1 | #ifndef POLYADIC_OPERATION_RESULT 2 | #define POLYADIC_OPERATION_RESULT 3 | 4 | #include 5 | 6 | struct PolyadicOperationResult { 7 | double value; 8 | std::vector gradients; 9 | 10 | PolyadicOperationResult(double value, const std::vector& gradients){ 11 | this->value = value; 12 | this->gradients = gradients; 13 | } 14 | }; 15 | 16 | #endif /* end of include guard: POLYADIC_OPERATION_RESULT */ 17 | -------------------------------------------------------------------------------- /install_diff/root/include/por.h: -------------------------------------------------------------------------------- 1 | #ifndef POLYADIC_OPERATION_RESULT 2 | #define POLYADIC_OPERATION_RESULT 3 | 4 | #include 5 | 6 | struct PolyadicOperationResult { 7 | double value; 8 | std::vector gradients; 9 | 10 | PolyadicOperationResult(double value, const std::vector& gradients){ 11 | this->value = value; 12 | this->gradients = gradients; 13 | } 14 | }; 15 | 16 | #endif /* end of include guard: POLYADIC_OPERATION_RESULT */ 17 | -------------------------------------------------------------------------------- /install_diff/root/include/vectmath 2.h: -------------------------------------------------------------------------------- 1 | #ifndef VECTMATH 2 | #define VECTMATH 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // dot product 11 | template 12 | std::vector > dot(const std::vector >& a, const std::vector >& b){ 13 | assert(a[0].size()==b.size()); 14 | 15 | T w=0; 16 | std::vector > result(a.size(), std::vector(b[0].size())); 17 | for (int i=0 ; i 32 | std::vector& operator-=(std::vector& u, const std::vector& v){ 33 | assert(u.size()==v.size()); 34 | for(size_t i=0 ; i 41 | std::vector operator+(const std::vector& u, const std::vector& v){ 42 | assert(u.size()==v.size()); 43 | std::vector w(u.size()); 44 | for(size_t i=0 ; i 51 | std::vector operator*(const S& s, const std::vector& u){ 52 | std::vector result(u.size()); 53 | for(size_t i=0 ; i 60 | std::vector& operator>>(U (*fun)(U&), std::vector& u){ 61 | std::transform(u.begin(), u.end(), u.begin(), fun); 62 | return u; 63 | } 64 | 65 | template 66 | std::vector& operator>>(S (*fun)(S&), std::vector& u){ 67 | for(auto& v : u){ 68 | fun >> v; 69 | } 70 | return u; 71 | } 72 | 73 | template 74 | std::vector& operator>>(U (*fun)(), std::vector& u){ 75 | for(auto& e : u){ 76 | e = fun(); 77 | } 78 | return u; 79 | } 80 | 81 | template 82 | std::vector& operator>>(S (*fun)(), std::vector& u){ 83 | for(auto& v : u){ 84 | fun >> v; 85 | } 86 | return u; 87 | } 88 | 89 | template 90 | std::ostream& operator<<(std::ostream& os, const std::vector& u){ 91 | os << "["; 92 | for(size_t i=0 ; i 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // dot product 11 | template 12 | std::vector > dot(const std::vector >& a, const std::vector >& b){ 13 | assert(a[0].size()==b.size()); 14 | 15 | T w=0; 16 | std::vector > result(a.size(), std::vector(b[0].size())); 17 | for (int i=0 ; i 32 | std::vector& operator-=(std::vector& u, const std::vector& v){ 33 | assert(u.size()==v.size()); 34 | for(size_t i=0 ; i 41 | std::vector operator+(const std::vector& u, const std::vector& v){ 42 | assert(u.size()==v.size()); 43 | std::vector w(u.size()); 44 | for(size_t i=0 ; i 51 | std::vector operator*(const S& s, const std::vector& u){ 52 | std::vector result(u.size()); 53 | for(size_t i=0 ; i 60 | std::vector& operator>>(U (*fun)(U&), std::vector& u){ 61 | std::transform(u.begin(), u.end(), u.begin(), fun); 62 | return u; 63 | } 64 | 65 | template 66 | std::vector& operator>>(S (*fun)(S&), std::vector& u){ 67 | for(auto& v : u){ 68 | fun >> v; 69 | } 70 | return u; 71 | } 72 | 73 | template 74 | std::vector& operator>>(U (*fun)(), std::vector& u){ 75 | for(auto& e : u){ 76 | e = fun(); 77 | } 78 | return u; 79 | } 80 | 81 | template 82 | std::vector& operator>>(S (*fun)(), std::vector& u){ 83 | for(auto& v : u){ 84 | fun >> v; 85 | } 86 | return u; 87 | } 88 | 89 | template 90 | std::ostream& operator<<(std::ostream& os, const std::vector& u){ 91 | os << "["; 92 | for(size_t i=0 ; i 3 | 4 | Graph* Graph::instance = 0; 5 | long int Graph::uid_counter = 1; 6 | 7 | long int Graph::uid(){ 8 | return uid_counter++; 9 | } 10 | 11 | Graph* Graph::getInstance(){ 12 | if(instance==NULL){ 13 | instance = new Graph(); 14 | } 15 | return instance; 16 | } 17 | 18 | Graph::Graph(){} 19 | 20 | void Graph::connect(const long int& uid, const std::pair& edge){ 21 | nodes[uid].push_back(edge); 22 | } 23 | 24 | std::vector > Graph::get(const long int& uid) const{ 25 | return nodes.at(uid); 26 | } 27 | 28 | bool Graph::has(const long int& uid) const{ 29 | return nodes.find(uid)!=nodes.end(); 30 | } 31 | 32 | void Graph::new_recording(){ 33 | nodes.clear(); 34 | } 35 | -------------------------------------------------------------------------------- /install_diff/root/src/node.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/node.h" 2 | 3 | Node::Node(const double& value) { 4 | this->value = value; 5 | this->uid = Graph::uid(); 6 | } 7 | 8 | Node::Node(const Node& node){ 9 | this->value = node.value; 10 | this->uid = node.uid; 11 | } 12 | 13 | double Node::gradient_recursive(Graph* graph, const long int& current_uid, const long int& stop_uid) const{ 14 | if(current_uid==stop_uid){ 15 | return 1.0; 16 | } 17 | 18 | double sum=0.0; 19 | if(graph->has(current_uid)){ 20 | for(auto& pair : graph->get(current_uid)){ 21 | sum += pair.first*gradient_recursive(graph, pair.second, stop_uid); 22 | } 23 | } 24 | return sum; 25 | } 26 | 27 | double Node::gradient(const Node& node) const{ 28 | Graph* graph = Graph::getInstance(); 29 | return gradient_recursive(graph, this->uid, node.uid); 30 | } 31 | 32 | std::vector Node::gradient(const std::vector& nodes) const{ 33 | Graph* graph = Graph::getInstance(); 34 | std::vector grad(nodes.size()); 35 | for(size_t i=0 ; iuid, nodes[i].uid); 37 | } 38 | return grad; 39 | } 40 | 41 | std::vector > Node::gradient(const std::vector >& nodes) const{ 42 | Graph* graph = Graph::getInstance(); 43 | std::vector > grad(nodes.size()); 44 | for(size_t i=0 ; iuid, nodes[i][j].uid); 48 | } 49 | } 50 | return grad; 51 | } 52 | 53 | Node Node::monadic_operation(const Node& n, MonadicOperationResult (*fun)(const double&)){ 54 | MonadicOperationResult res = fun(n.value); 55 | Node result(res.value); 56 | Graph* graph = Graph::getInstance(); 57 | graph->connect(result.uid, std::make_pair(res.grad, n.uid)); 58 | return result; 59 | } 60 | 61 | Node Node::dyadic_operation(const Node& left, const Node& right, DyadicOperationResult (*fun)(const double&, const double&)){ 62 | DyadicOperationResult res = fun(left.value, right.value); 63 | Node result(res.value); 64 | Graph* graph = Graph::getInstance(); 65 | graph->connect(result.uid, std::make_pair(res.left_grad, left.uid)); 66 | graph->connect(result.uid, std::make_pair(res.right_grad, right.uid)); 67 | return result; 68 | } 69 | 70 | Node Node::polyadic_operation(const std::vector& nodes, PolyadicOperationResult (*fun)(const std::vector&)){ 71 | std::vector values(nodes.size()); 72 | for(size_t i=0 ; iconnect(result.uid, std::make_pair(res.gradients[i], nodes[i].uid)); 80 | } 81 | return result; 82 | } 83 | 84 | Node operator+(const Node& left, const Node& right){ 85 | return Node::dyadic_operation(left, right, [](const double& l, const double& r){ 86 | return DyadicOperationResult(l+r, 1.0, 1.0); 87 | }); 88 | } 89 | 90 | Node operator-(const Node& left, const Node& right){ 91 | return Node::dyadic_operation(left, right, [](const double& l, const double& r){ 92 | return DyadicOperationResult(l-r, 1.0, -1.0); 93 | }); 94 | } 95 | 96 | Node operator*(const Node& left, const Node& right){ 97 | return Node::dyadic_operation(left, right, [](const double& l, const double& r){ 98 | return DyadicOperationResult(l*r, r, l); 99 | }); 100 | } 101 | 102 | Node operator/(const Node& left, const Node& right){ 103 | return Node::dyadic_operation(left, right, [](const double& l, const double& r){ 104 | return DyadicOperationResult(l/r, 1.0/r, -1.0*l/(r*r)); 105 | }); 106 | } 107 | 108 | Node& Node::operator+=(const Node& r){ 109 | *this = *this + r; 110 | return *this; 111 | } 112 | 113 | Node& Node::operator-=(const Node& r){ 114 | *this = *this - r; 115 | return *this; 116 | } 117 | 118 | Node& Node::operator*=(const Node& r){ 119 | *this = *this * r; 120 | return *this; 121 | } 122 | 123 | Node& Node::operator/=(const Node& r){ 124 | *this = *this / r; 125 | return *this; 126 | } 127 | 128 | bool operator==(const Node& left, const Node& right){ 129 | return left.value==right.value; 130 | } 131 | 132 | bool operator<(const Node& left, const Node& right){ 133 | return left.value(const Node& left, const Node& right){ 137 | return left.value>right.value; 138 | } 139 | 140 | bool operator<=(const Node& left, const Node& right){ 141 | return left.value<=right.value; 142 | } 143 | 144 | bool operator>=(const Node& left, const Node& right){ 145 | return left.value>=right.value; 146 | } 147 | 148 | Node sin(const Node& x){ 149 | return Node::monadic_operation(x, [](const double& n){ 150 | return MonadicOperationResult(::sin(n), ::cos(n)); 151 | }); 152 | } 153 | 154 | Node cos(const Node& x){ 155 | return Node::monadic_operation(x, [](const double& n){ 156 | return MonadicOperationResult(::cos(n), -1.0*::sin(n)); 157 | }); 158 | } 159 | 160 | Node tan(const Node& x){ 161 | return Node::monadic_operation(x, [](const double& n){ 162 | return MonadicOperationResult(::tan(n), 1.0/::pow(::cos(n), 2)); 163 | }); 164 | } 165 | 166 | Node sinh(const Node& x){ 167 | return Node::monadic_operation(x, [](const double& n){ 168 | return MonadicOperationResult(::sinh(n), ::cosh(n)); 169 | }); 170 | } 171 | 172 | Node cosh(const Node& x){ 173 | return Node::monadic_operation(x, [](const double& n){ 174 | return MonadicOperationResult(::cosh(n), ::sinh(n)); 175 | }); 176 | } 177 | 178 | Node asin(const Node& x){ 179 | return Node::monadic_operation(x, [](const double& n){ 180 | return MonadicOperationResult(::asin(n), 1.0/(::sqrt(1-n*n))); 181 | }); 182 | } 183 | 184 | Node acos(const Node& x){ 185 | return Node::monadic_operation(x, [](const double& n){ 186 | return MonadicOperationResult(::acos(n), -1.0/(::sqrt(1-n*n))); 187 | }); 188 | } 189 | 190 | Node atan(const Node& x){ 191 | return Node::monadic_operation(x, [](const double& n){ 192 | return MonadicOperationResult(::atan(n), 1.0/(1+n*n)); 193 | }); 194 | } 195 | 196 | Node tanh(const Node& x){ 197 | return Node::monadic_operation(x, [](const double& n){ 198 | return MonadicOperationResult(::tanh(n), 1.0-::pow(::tanh(n), 2)); 199 | }); 200 | } 201 | 202 | Node log(const Node& x, const Node& base){ 203 | return Node::dyadic_operation(x, base, [](const double& a, const double& b){ 204 | return DyadicOperationResult(::log(a)/::log(b), 1.0/(a*::log(b)), -1.0*::log(a)/(b*::log(b))); 205 | }); 206 | } 207 | 208 | Node log10(const Node& x){ 209 | return Node::monadic_operation(x, [](const double& n){ 210 | return MonadicOperationResult(::log(n)/::log(10), 1.0/(n*::log(10))); 211 | }); 212 | } 213 | 214 | Node ln(const Node& x){ 215 | return Node::monadic_operation(x, [](const double& n){ 216 | return MonadicOperationResult(::log(n), 1.0/::log(n)); 217 | }); 218 | } 219 | 220 | Node pow(const Node& x, const Node& base){ 221 | return Node::dyadic_operation(x, base, [](const double& a, const double& b){ 222 | if(a<=0){ 223 | return DyadicOperationResult(::pow(a,b), b*::pow(a,b-1), 0); 224 | } 225 | return DyadicOperationResult(::pow(a,b), b*::pow(a,b-1), ::log(a)*::pow(a,b)); 226 | }); 227 | } 228 | 229 | Node exp(const Node& x){ 230 | return Node::monadic_operation(x, [](const double& n){ 231 | return MonadicOperationResult(::exp(n), ::exp(n)); 232 | }); 233 | } 234 | 235 | Node sqrt(const Node& x){ 236 | return Node::monadic_operation(x, [](const double& n){ 237 | return MonadicOperationResult(::sqrt(n), 1.0/(2*::sqrt(n))); 238 | }); 239 | } 240 | 241 | Node abs(const Node& x){ 242 | return Node::monadic_operation(x, [](const double& n){ 243 | int sign = n==0 ? 0 : n/::abs(n); 244 | return MonadicOperationResult(::abs(n), sign); 245 | }); 246 | } 247 | 248 | Node min(const Node& left, const Node& right){ 249 | return Node::dyadic_operation(left, right, [](const double& a, const double& b){ 250 | if(ab){ 254 | return DyadicOperationResult(b, 0, 1); 255 | } 256 | return DyadicOperationResult(a, 0, 0); 257 | }); 258 | } 259 | 260 | Node max(const Node& left, const Node& right){ 261 | return Node::dyadic_operation(left, right, [](const double& a, const double& b){ 262 | if(a>b){ 263 | return DyadicOperationResult(a, 1, 0); 264 | } 265 | if(a 2 | #include"../matrix/matrix_def.cpp" 3 | #include"../welcome/score_wel.cpp" 4 | #include"../matrix/matrix_pro.cpp" 5 | #include 6 | /* 7 | ███████╗██████╗ ██████╗ ███████╗ ███████╗███╗ ██╗ ██████╗ ██╗███╗ ██╗███████╗ 8 | ██╔════╝██╔══██╗██╔════╝ ██╔════╝ ██╔════╝████╗ ██║██╔════╝ ██║████╗ ██║██╔════╝ 9 | █████╗ ██║ ██║██║ ███╗█████╗ █████╗ ██╔██╗ ██║██║ ███╗██║██╔██╗ ██║█████╗ 10 | ██╔══╝ ██║ ██║██║ ██║██╔══╝ ██╔══╝ ██║╚██╗██║██║ ██║██║██║╚██╗██║██╔══╝ 11 | ███████╗██████╔╝╚██████╔╝███████╗ ███████╗██║ ╚████║╚██████╔╝██║██║ ╚████║███████╗ 12 | ╚══════╝╚═════╝ ╚═════╝ ╚══════╝ ╚══════╝╚═╝ ╚═══╝ ╚═════╝ ╚═╝╚═╝ ╚═══╝╚══════╝ 13 | Author:Edge 14 | Web:likedge.top 15 | Date:20200925 16 | */ 17 | double sigmoid_def() 18 | { 19 | cout< 14 | #ifdef __APPLE__ 15 | #include 16 | #else 17 | #include 18 | #endif 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include "./autodiff/node.h" 27 | #include"./matrix/matrix_def.h" 28 | #include"./matrix/matrix_pro.h" 29 | #include"./welcome/score_wel.cpp" 30 | #include"./logistic/logistic_def.h" 31 | #include"./grad_edge/matrix_grad.h" 32 | using namespace std; 33 | clock_t start, stop; 34 | double duration; 35 | Node z=1; 36 | Node t1 = 1,a13 = 1; 37 | /*自定义的损失函数*/ 38 | Node loss_act(Node t1,Node a13) 39 | { 40 | Node loss = 0.5*(pow((t1-a13),2)); 41 | return loss; 42 | } 43 | Node sigmoid_act(Node z) 44 | { 45 | Node sigmoid_act = 1/(1+(1/exp(z))); 46 | return sigmoid_act; 47 | } 48 | //loss 49 | Node (*loss)(Node,Node) = loss_act; 50 | Node (*act)(Node) = sigmoid_act; 51 | int main() 52 | { 53 | Matrix data_1 = ones(1,3); 54 | Matrix data_2 = ones(3,1); 55 | cout_mat(mul(data_1,data_2)); 56 | welcome(); 57 | //begin 58 | cout<<"auto build on gcc"< -------------------------------------------------------------------------------- /picture/01.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /picture/02.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /picture/Edge计算引擎.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/picture/Edge计算引擎.pdf -------------------------------------------------------------------------------- /picture/Edge计算引擎.xmind: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/picture/Edge计算引擎.xmind -------------------------------------------------------------------------------- /picture/WX20191119-105411@2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/picture/WX20191119-105411@2x.png -------------------------------------------------------------------------------- /picture/WX20191119-125244@2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/picture/WX20191119-125244@2x.png -------------------------------------------------------------------------------- /picture/apply_axis_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/picture/apply_axis_0.png -------------------------------------------------------------------------------- /picture/apply_axis_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/picture/apply_axis_1.png -------------------------------------------------------------------------------- /picture/autograd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/picture/autograd.jpg -------------------------------------------------------------------------------- /picture/cpu.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /picture/image-20200128154352842.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/picture/image-20200128154352842.png -------------------------------------------------------------------------------- /picture/image-20200418210521131.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/picture/image-20200418210521131.png -------------------------------------------------------------------------------- /picture/jabber.svg: -------------------------------------------------------------------------------- 1 | Jabber icon -------------------------------------------------------------------------------- /picture/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/picture/logo.png -------------------------------------------------------------------------------- /picture/logo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/picture/logo2.png -------------------------------------------------------------------------------- /picture/nerual_test1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/picture/nerual_test1.png -------------------------------------------------------------------------------- /picture/path.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/picture/path.png -------------------------------------------------------------------------------- /picture/processwire (1).svg: -------------------------------------------------------------------------------- 1 | ProcessWire icon -------------------------------------------------------------------------------- /picture/啊.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /picture/彩虹.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /picture/方向.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /picture/星月.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /picture/火箭.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /picture/生产流程.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/picture/生产流程.pdf -------------------------------------------------------------------------------- /root/Makefile: -------------------------------------------------------------------------------- 1 | CXX = g++ 2 | ODIR = obj 3 | CXXFLAGS = -std=c++11 -O3 4 | OBJS = $(ODIR)/graph.o $(ODIR)/node.o 5 | 6 | all : $(ODIR) $(OBJS) 7 | 8 | $(ODIR)/graph.o : src/graph.cpp include/graph.h 9 | $(CXX) -c $< -o $@ $(CXXFLAGS) 10 | 11 | $(ODIR)/node.o : src/node.cpp include/node.h 12 | $(CXX) -c $< -o $@ $(CXXFLAGS) 13 | 14 | $(ODIR) : 15 | if [ ! -d $(ODIR) ]; then mkdir $(ODIR); fi 16 | 17 | clean : 18 | if [ -d $(ODIR) ]; then rm $(ODIR) -r; fi 19 | 20 | .PHONY : all 21 | .PHONY : clean 22 | 23 | -------------------------------------------------------------------------------- /root/include/dor.h: -------------------------------------------------------------------------------- 1 | #ifndef DYADIC_OPERATION_RESULT 2 | #define DYADIC_OPERATION_RESULT 3 | 4 | struct DyadicOperationResult { 5 | double value; 6 | double left_grad; 7 | double right_grad; 8 | 9 | DyadicOperationResult(double value, double left_grad, double right_grad){ 10 | this->value = value; 11 | this->left_grad = left_grad; 12 | this->right_grad = right_grad; 13 | } 14 | }; 15 | 16 | #endif /* end of include guard: DYADIC_OPERATION_RESULT */ 17 | -------------------------------------------------------------------------------- /root/include/edgelayer.h: -------------------------------------------------------------------------------- 1 | class edge_layer 2 | { 3 | public: 4 | virtual ~edge_layer() {} 5 | virtual Matrix4d forward(Matrix4d mid1) = 0; 6 | virtual int parameter_counter() = 0; 7 | }; 8 | 9 | class conv2d : public edge_layer 10 | { 11 | public: 12 | conv2d(Matrix4d mid_4, int in_channel, int out_channle, int _stride, int ksize, int _mode, int _padding); 13 | int arg1; 14 | Matrix4d mid4; 15 | int input_dim; 16 | int output_channels; 17 | int stride; 18 | int kernel_size; 19 | int mode; 20 | int padding; 21 | 22 | Matrix4d forward(Matrix4d mid4) 23 | { 24 | std::cout << "in_channel = " << input_dim << std::endl; 25 | std::cout << "out_channle = " << output_channels << std::endl; 26 | std::cout << "_stride = " << stride << std::endl; 27 | std::cout << "ksize = " << kernel_size << std::endl; 28 | std::cout << "_mode = " << mode << std::endl; 29 | std::cout << "_padding = " << padding << std::endl; 30 | Matrix3d *output3d_arr = (Matrix3d *)malloc(mid4.batch * sizeof(Matrix3d)); 31 | for (int batch_idx = 0; batch_idx < mid4.batch; batch_idx++) 32 | { 33 | Matrix3d mid3 = mid4.matrix4d[batch_idx]; 34 | Matrix3d output3d = conv_test_with_output(mid3, input_dim, output_channels, stride, kernel_size, mode, false); 35 | output3d_arr[batch_idx] = output3d; 36 | } 37 | 38 | Matrix4d output4d = CreateMatrix4d(mid4.batch, output_channels, output3d_arr[0].wid, output3d_arr[0].high); 39 | for (int batch_idx = 0; batch_idx < mid4.batch; batch_idx++) 40 | { 41 | output4d.matrix4d[batch_idx] = output3d_arr[batch_idx]; 42 | } 43 | return output4d; 44 | } 45 | int parameter_counter() 46 | { 47 | int num_params = input_dim * output_channels * kernel_size * kernel_size; 48 | 49 | // if (bias) 50 | // { 51 | // num_params += out_channels; 52 | // } 53 | 54 | return num_params; 55 | } 56 | }; 57 | 58 | conv2d::conv2d(Matrix4d mid_1, int in_channel, int out_channle, int _stride, int ksize, int _mode, int _padding) 59 | { 60 | mid4 = mid_1; 61 | input_dim = in_channel; 62 | output_channels = out_channle; 63 | stride = _stride; 64 | kernel_size = ksize; 65 | mode = _mode; 66 | padding = _padding; 67 | } 68 | class bn : public edge_layer 69 | { 70 | public: 71 | double beta = 0.1; 72 | double gamma = 0.1; 73 | bn(double beta, double gamma); 74 | Matrix3d forward(Matrix3d mid1) 75 | { 76 | Matrix3d output_bn = CreateMatrix3d(mid1.dep, mid1.wid, mid1.high); 77 | for (int i = 0; i < mid1.dep; i++) 78 | { 79 | double mean_bn = matrix_mean(mid1.matrix3d[i]); 80 | double var_bn = matrix_var(mid1.matrix3d[i]); 81 | // cout << "sqrt var:" << sqrt(var_bn) << endl; 82 | output_bn.matrix3d[i] = subtract_ele(mid1.matrix3d[i], mean_bn); 83 | // cout << "beta:" << beta << endl; 84 | output_bn.matrix3d[i] = matrix_division(output_bn.matrix3d[i], sqrt(var_bn)); 85 | cout_mat(output_bn.matrix3d[i]); 86 | output_bn.matrix3d[i] = times_mat(beta, output_bn.matrix3d[i]); 87 | output_bn.matrix3d[i] = add_ele(output_bn.matrix3d[i], gamma); 88 | cout_mat(output_bn.matrix3d[i]); 89 | } 90 | return output_bn; 91 | } 92 | }; 93 | bn::bn(double beta_bn, double gamma_bn) 94 | { 95 | beta = beta_bn; 96 | gamma = gamma_bn; 97 | } 98 | class fc : public edge_layer 99 | { 100 | public: 101 | // int layer_call() 102 | // { 103 | // std::cout << "call back from fc" << std::endl; 104 | // return 100; 105 | // } 106 | }; 107 | // enum LAYER_TYPE 108 | // { 109 | // conv_2d, 110 | // batchnormal, 111 | // fullconnect 112 | // }; 113 | class focus : public edge_layer 114 | { 115 | public: 116 | int in_channel = 32; 117 | int out_channel = 64; 118 | focus(int in_channel, int out_channel); 119 | Matrix3d forward(Matrix3d mid1) 120 | { 121 | Matrix4d template_mid1 = CreateMatrix4d(in_channel, mid1.dep, mid1.wid, mid1.high); 122 | edge_layer *conv2d_1 = new conv2d(template_mid1, 3, 12, 1, 9, 0, 5); 123 | Matrix3d output_focus; 124 | return output_focus; 125 | } 126 | }; 127 | focus::focus(int in_channel, int out_channel) 128 | { 129 | in_channel = in_channel; 130 | out_channel = out_channel; 131 | } -------------------------------------------------------------------------------- /root/include/graph.h: -------------------------------------------------------------------------------- 1 | #ifndef GRAPH_H 2 | #define GRAPH_H 3 | 4 | #include 5 | #include 6 | #include 7 | class Graph { 8 | private: 9 | std::map > > nodes; 10 | static Graph* instance; 11 | Graph(); 12 | 13 | public: 14 | static long int uid_counter; 15 | static long int uid(); 16 | static Graph* getInstance(); 17 | 18 | void connect(const long int& uid, const std::pair& edge); 19 | std::vector > get(const long int& uid) const; 20 | bool has(const long int& uid) const; 21 | 22 | void new_recording(); 23 | }; 24 | 25 | #endif /* end of include guard: GRAPH_H */ 26 | -------------------------------------------------------------------------------- /root/include/mor.h: -------------------------------------------------------------------------------- 1 | #ifndef MONADIC_OPERATION_RESULT 2 | #define MONADIC_OPERATION_RESULT 3 | 4 | struct MonadicOperationResult { 5 | double value; 6 | double grad; 7 | 8 | MonadicOperationResult(double value, double grad){ 9 | this->value = value; 10 | this->grad = grad; 11 | } 12 | }; 13 | 14 | #endif /* end of include guard: MONADIC_OPERATION_RESULT */ 15 | -------------------------------------------------------------------------------- /root/include/node.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_H 2 | #define NODE_H 3 | 4 | #include 5 | #include 6 | 7 | #include "graph.h" 8 | #include "mor.h" 9 | #include "dor.h" 10 | #include "por.h" 11 | 12 | class Node { 13 | private: 14 | double value; 15 | long int uid; 16 | 17 | double gradient_recursive(Graph* graph, const long int& current_uid, const long int& stop_uid) const; 18 | 19 | public: 20 | Node(const double& value=0); 21 | Node(const Node& node); 22 | 23 | static Node monadic_operation(const Node& n, MonadicOperationResult (*)(const double&)); 24 | static Node dyadic_operation(const Node& l, const Node& r, DyadicOperationResult (*)(const double&, const double&)); 25 | static Node polyadic_operation(const std::vector& nodes, PolyadicOperationResult (*)(const std::vector&)); 26 | 27 | double gradient(const Node& node) const; 28 | std::vector gradient(const std::vector& nodes) const; 29 | std::vector > gradient(const std::vector >& nodes) const; 30 | 31 | friend Node operator+(const Node& l, const Node& r); 32 | friend Node operator-(const Node& l, const Node& r); 33 | friend Node operator*(const Node& l, const Node& r); 34 | friend Node operator/(const Node& l, const Node& r); 35 | 36 | Node& operator+=(const Node& r); 37 | Node& operator-=(const Node& r); 38 | Node& operator*=(const Node& r); 39 | Node& operator/=(const Node& r); 40 | 41 | friend bool operator==(const Node& l, const Node& r); 42 | friend bool operator<(const Node& l, const Node& r); 43 | friend bool operator>(const Node& l, const Node& r); 44 | friend bool operator<=(const Node& l, const Node& r); 45 | friend bool operator>=(const Node& l, const Node& r); 46 | 47 | friend Node sin(const Node& x); 48 | friend Node cos(const Node& x); 49 | friend Node tan(const Node& x); 50 | friend Node sinh(const Node& x); 51 | friend Node cosh(const Node& x); 52 | friend Node tanh(const Node& x); 53 | friend Node asin(const Node& x); 54 | friend Node acos(const Node& x); 55 | friend Node atan(const Node& x); 56 | 57 | friend Node log(const Node& x, const Node& base); 58 | friend Node log10(const Node& x); 59 | friend Node ln(const Node& x); 60 | 61 | friend Node pow(const Node& x, const Node& p); 62 | friend Node exp(const Node& x); 63 | friend Node sqrt(const Node& x); 64 | 65 | friend Node abs(const Node& x); 66 | friend Node min(const Node& l, const Node& r); 67 | friend Node max(const Node& l, const Node& r); 68 | 69 | friend std::ostream& operator<<(std::ostream& os, const Node& node); 70 | }; 71 | 72 | #endif /* end of include guard: NODE_H */ 73 | -------------------------------------------------------------------------------- /root/include/por.h: -------------------------------------------------------------------------------- 1 | #ifndef POLYADIC_OPERATION_RESULT 2 | #define POLYADIC_OPERATION_RESULT 3 | 4 | #include 5 | 6 | struct PolyadicOperationResult { 7 | double value; 8 | std::vector gradients; 9 | 10 | PolyadicOperationResult(double value, const std::vector& gradients){ 11 | this->value = value; 12 | this->gradients = gradients; 13 | } 14 | }; 15 | 16 | #endif /* end of include guard: POLYADIC_OPERATION_RESULT */ 17 | -------------------------------------------------------------------------------- /root/include/vectmath.h: -------------------------------------------------------------------------------- 1 | #ifndef VECTMATH 2 | #define VECTMATH 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // dot product 11 | template 12 | std::vector > dot(const std::vector >& a, const std::vector >& b){ 13 | assert(a[0].size()==b.size()); 14 | 15 | T w=0; 16 | std::vector > result(a.size(), std::vector(b[0].size())); 17 | for (int i=0 ; i 32 | std::vector& operator-=(std::vector& u, const std::vector& v){ 33 | assert(u.size()==v.size()); 34 | for(size_t i=0 ; i 41 | std::vector operator+(const std::vector& u, const std::vector& v){ 42 | assert(u.size()==v.size()); 43 | std::vector w(u.size()); 44 | for(size_t i=0 ; i 51 | std::vector operator*(const S& s, const std::vector& u){ 52 | std::vector result(u.size()); 53 | for(size_t i=0 ; i 60 | std::vector& operator>>(U (*fun)(U&), std::vector& u){ 61 | std::transform(u.begin(), u.end(), u.begin(), fun); 62 | return u; 63 | } 64 | 65 | template 66 | std::vector& operator>>(S (*fun)(S&), std::vector& u){ 67 | for(auto& v : u){ 68 | fun >> v; 69 | } 70 | return u; 71 | } 72 | 73 | template 74 | std::vector& operator>>(U (*fun)(), std::vector& u){ 75 | for(auto& e : u){ 76 | e = fun(); 77 | } 78 | return u; 79 | } 80 | 81 | template 82 | std::vector& operator>>(S (*fun)(), std::vector& u){ 83 | for(auto& v : u){ 84 | fun >> v; 85 | } 86 | return u; 87 | } 88 | 89 | template 90 | std::ostream& operator<<(std::ostream& os, const std::vector& u){ 91 | os << "["; 92 | for(size_t i=0 ; i& edge){ 20 | nodes[uid].push_back(edge); 21 | } 22 | 23 | std::vector > Graph::get(const long int& uid) const{ 24 | return nodes.at(uid); 25 | } 26 | 27 | bool Graph::has(const long int& uid) const{ 28 | return nodes.find(uid)!=nodes.end(); 29 | } 30 | 31 | void Graph::new_recording(){ 32 | nodes.clear(); 33 | } 34 | -------------------------------------------------------------------------------- /root/src/node.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/node.h" 2 | 3 | Node::Node(const double& value) { 4 | this->value = value; 5 | this->uid = Graph::uid(); 6 | } 7 | 8 | Node::Node(const Node& node){ 9 | this->value = node.value; 10 | this->uid = node.uid; 11 | } 12 | 13 | double Node::gradient_recursive(Graph* graph, const long int& current_uid, const long int& stop_uid) const{ 14 | if(current_uid==stop_uid){ 15 | return 1.0; 16 | } 17 | 18 | double sum=0.0; 19 | if(graph->has(current_uid)){ 20 | for(auto& pair : graph->get(current_uid)){ 21 | sum += pair.first*gradient_recursive(graph, pair.second, stop_uid); 22 | } 23 | } 24 | return sum; 25 | } 26 | 27 | double Node::gradient(const Node& node) const{ 28 | Graph* graph = Graph::getInstance(); 29 | return gradient_recursive(graph, this->uid, node.uid); 30 | } 31 | 32 | std::vector Node::gradient(const std::vector& nodes) const{ 33 | Graph* graph = Graph::getInstance(); 34 | std::vector grad(nodes.size()); 35 | for(size_t i=0 ; iuid, nodes[i].uid); 37 | } 38 | return grad; 39 | } 40 | 41 | std::vector > Node::gradient(const std::vector >& nodes) const{ 42 | Graph* graph = Graph::getInstance(); 43 | std::vector > grad(nodes.size()); 44 | for(size_t i=0 ; iuid, nodes[i][j].uid); 48 | } 49 | } 50 | return grad; 51 | } 52 | 53 | Node Node::monadic_operation(const Node& n, MonadicOperationResult (*fun)(const double&)){ 54 | MonadicOperationResult res = fun(n.value); 55 | Node result(res.value); 56 | Graph* graph = Graph::getInstance(); 57 | graph->connect(result.uid, std::make_pair(res.grad, n.uid)); 58 | return result; 59 | } 60 | 61 | Node Node::dyadic_operation(const Node& left, const Node& right, DyadicOperationResult (*fun)(const double&, const double&)){ 62 | DyadicOperationResult res = fun(left.value, right.value); 63 | Node result(res.value); 64 | Graph* graph = Graph::getInstance(); 65 | graph->connect(result.uid, std::make_pair(res.left_grad, left.uid)); 66 | graph->connect(result.uid, std::make_pair(res.right_grad, right.uid)); 67 | return result; 68 | } 69 | 70 | Node Node::polyadic_operation(const std::vector& nodes, PolyadicOperationResult (*fun)(const std::vector&)){ 71 | std::vector values(nodes.size()); 72 | for(size_t i=0 ; iconnect(result.uid, std::make_pair(res.gradients[i], nodes[i].uid)); 80 | } 81 | return result; 82 | } 83 | 84 | Node operator+(const Node& left, const Node& right){ 85 | return Node::dyadic_operation(left, right, [](const double& l, const double& r){ 86 | return DyadicOperationResult(l+r, 1.0, 1.0); 87 | }); 88 | } 89 | 90 | Node operator-(const Node& left, const Node& right){ 91 | return Node::dyadic_operation(left, right, [](const double& l, const double& r){ 92 | return DyadicOperationResult(l-r, 1.0, -1.0); 93 | }); 94 | } 95 | 96 | Node operator*(const Node& left, const Node& right){ 97 | return Node::dyadic_operation(left, right, [](const double& l, const double& r){ 98 | return DyadicOperationResult(l*r, r, l); 99 | }); 100 | } 101 | 102 | Node operator/(const Node& left, const Node& right){ 103 | return Node::dyadic_operation(left, right, [](const double& l, const double& r){ 104 | return DyadicOperationResult(l/r, 1.0/r, -1.0*l/(r*r)); 105 | }); 106 | } 107 | 108 | Node& Node::operator+=(const Node& r){ 109 | *this = *this + r; 110 | return *this; 111 | } 112 | 113 | Node& Node::operator-=(const Node& r){ 114 | *this = *this - r; 115 | return *this; 116 | } 117 | 118 | Node& Node::operator*=(const Node& r){ 119 | *this = *this * r; 120 | return *this; 121 | } 122 | 123 | Node& Node::operator/=(const Node& r){ 124 | *this = *this / r; 125 | return *this; 126 | } 127 | 128 | bool operator==(const Node& left, const Node& right){ 129 | return left.value==right.value; 130 | } 131 | 132 | bool operator<(const Node& left, const Node& right){ 133 | return left.value(const Node& left, const Node& right){ 137 | return left.value>right.value; 138 | } 139 | 140 | bool operator<=(const Node& left, const Node& right){ 141 | return left.value<=right.value; 142 | } 143 | 144 | bool operator>=(const Node& left, const Node& right){ 145 | return left.value>=right.value; 146 | } 147 | 148 | Node sin(const Node& x){ 149 | return Node::monadic_operation(x, [](const double& n){ 150 | return MonadicOperationResult(::sin(n), ::cos(n)); 151 | }); 152 | } 153 | 154 | Node cos(const Node& x){ 155 | return Node::monadic_operation(x, [](const double& n){ 156 | return MonadicOperationResult(::cos(n), -1.0*::sin(n)); 157 | }); 158 | } 159 | 160 | Node tan(const Node& x){ 161 | return Node::monadic_operation(x, [](const double& n){ 162 | return MonadicOperationResult(::tan(n), 1.0/::pow(::cos(n), 2)); 163 | }); 164 | } 165 | 166 | Node sinh(const Node& x){ 167 | return Node::monadic_operation(x, [](const double& n){ 168 | return MonadicOperationResult(::sinh(n), ::cosh(n)); 169 | }); 170 | } 171 | 172 | Node cosh(const Node& x){ 173 | return Node::monadic_operation(x, [](const double& n){ 174 | return MonadicOperationResult(::cosh(n), ::sinh(n)); 175 | }); 176 | } 177 | 178 | Node asin(const Node& x){ 179 | return Node::monadic_operation(x, [](const double& n){ 180 | return MonadicOperationResult(::asin(n), 1.0/(::sqrt(1-n*n))); 181 | }); 182 | } 183 | 184 | Node acos(const Node& x){ 185 | return Node::monadic_operation(x, [](const double& n){ 186 | return MonadicOperationResult(::acos(n), -1.0/(::sqrt(1-n*n))); 187 | }); 188 | } 189 | 190 | Node atan(const Node& x){ 191 | return Node::monadic_operation(x, [](const double& n){ 192 | return MonadicOperationResult(::atan(n), 1.0/(1+n*n)); 193 | }); 194 | } 195 | 196 | Node tanh(const Node& x){ 197 | return Node::monadic_operation(x, [](const double& n){ 198 | return MonadicOperationResult(::tanh(n), 1.0-::pow(::tanh(n), 2)); 199 | }); 200 | } 201 | 202 | Node log(const Node& x, const Node& base){ 203 | return Node::dyadic_operation(x, base, [](const double& a, const double& b){ 204 | return DyadicOperationResult(::log(a)/::log(b), 1.0/(a*::log(b)), -1.0*::log(a)/(b*::log(b))); 205 | }); 206 | } 207 | 208 | Node log10(const Node& x){ 209 | return Node::monadic_operation(x, [](const double& n){ 210 | return MonadicOperationResult(::log(n)/::log(10), 1.0/(n*::log(10))); 211 | }); 212 | } 213 | 214 | Node ln(const Node& x){ 215 | return Node::monadic_operation(x, [](const double& n){ 216 | return MonadicOperationResult(::log(n), 1.0/::log(n)); 217 | }); 218 | } 219 | 220 | Node pow(const Node& x, const Node& base){ 221 | return Node::dyadic_operation(x, base, [](const double& a, const double& b){ 222 | if(a<=0){ 223 | return DyadicOperationResult(::pow(a,b), b*::pow(a,b-1), 0); 224 | } 225 | return DyadicOperationResult(::pow(a,b), b*::pow(a,b-1), ::log(a)*::pow(a,b)); 226 | }); 227 | } 228 | 229 | Node exp(const Node& x){ 230 | return Node::monadic_operation(x, [](const double& n){ 231 | return MonadicOperationResult(::exp(n), ::exp(n)); 232 | }); 233 | } 234 | 235 | Node sqrt(const Node& x){ 236 | return Node::monadic_operation(x, [](const double& n){ 237 | return MonadicOperationResult(::sqrt(n), 1.0/(2*::sqrt(n))); 238 | }); 239 | } 240 | 241 | Node abs(const Node& x){ 242 | return Node::monadic_operation(x, [](const double& n){ 243 | int sign = n==0 ? 0 : n/::abs(n); 244 | return MonadicOperationResult(::abs(n), sign); 245 | }); 246 | } 247 | 248 | Node min(const Node& left, const Node& right){ 249 | return Node::dyadic_operation(left, right, [](const double& a, const double& b){ 250 | if(ab){ 254 | return DyadicOperationResult(b, 0, 1); 255 | } 256 | return DyadicOperationResult(a, 0, 0); 257 | }); 258 | } 259 | 260 | Node max(const Node& left, const Node& right){ 261 | return Node::dyadic_operation(left, right, [](const double& a, const double& b){ 262 | if(a>b){ 263 | return DyadicOperationResult(a, 1, 0); 264 | } 265 | if(a 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "./matrix/matrix_def.h" 8 | #include "./matrix/matrix_pro.h" 9 | #include "./logistic/logistic_def.h" 10 | #include "./autodiff/node.h" 11 | #include "./grad_edge/matrix_grad.h" 12 | #include "./root/include/edgelayer.h" 13 | #include "./welcome/score_wel.cpp" 14 | #include 15 | #include 16 | #include 17 | using namespace std; 18 | 19 | // TODO: 上面的函数定义 20 | 21 | void test_batch_conv_test(int batch_size, 22 | int depth, 23 | int height, 24 | int width, 25 | int output_channels, 26 | int stride, 27 | int kernel_size, 28 | int mode) 29 | { 30 | // 打印参数对应关系 31 | cout << "Parameters:" << endl; 32 | cout << "batch_size = " << batch_size << endl; 33 | cout << "depth = " << depth << endl; 34 | cout << "height = " << height << endl; 35 | cout << "width = " << width << endl; 36 | cout << "output_channels = " << output_channels << endl; 37 | cout << "stride = " << stride << endl; 38 | cout << "kernel_size = " << kernel_size << endl; 39 | cout << "mode = " << mode << endl; 40 | 41 | Matrix4d input4d = CreateMatrix4d(batch_size, depth, height, width); 42 | Matrix4d output4d = batch_conv_test(input4d, depth, output_channels, stride, kernel_size, mode,true); 43 | // 打印卷积维度计算过程 44 | int padding_wid = stride - (input4d.wid - kernel_size) % stride; 45 | if (padding_wid == stride) { 46 | padding_wid = 0; 47 | } 48 | int padding_high = stride - (input4d.high - kernel_size) % stride; 49 | if (padding_high == stride) { 50 | padding_high = 0; 51 | } 52 | int output_width = (width - kernel_size + 2 * padding_wid) / stride + 1; 53 | int output_height = (height - kernel_size + 2 * padding_high) / stride + 1; 54 | cout << "Output dimension calculation:" << endl; 55 | cout << "output_width = (" << width << " - " << kernel_size << " + 2 * " << padding_wid << ") / " << stride << " + 1 = " << output_width << endl; 56 | cout << "output_height = (" << height << " - " << kernel_size << " + 2 * " << padding_high << ") / " << stride << " + 1 = " << output_height << endl; 57 | getshape4d(output4d); 58 | assert(output4d.dep == output_channels); 59 | assert(output4d.wid == output_width); 60 | assert(output4d.high == output_height); 61 | } 62 | 63 | int main() { 64 | ofstream outfile("result.txt"); // 打开输出文件流 65 | outfile << "bs" << "\t" << "ic" << "\t" << "height" << "\t" << "width" << "\t" 66 | << "oc" << "\t" << "stride" << "\t" << "ksize" << "\t" << "mode" << endl; 67 | 68 | test_batch_conv_test(2, 24, 3, 3, 1, 3, 3, 0); 69 | // for (int width = 32; width <= 640; ++width) { 70 | // for (int height = 32; height <= 640; ++height) { 71 | // for (int output_channels = 1; output_channels <= 256; ++output_channels) { 72 | // for (int kernel_size = 1; kernel_size <= 7; ++kernel_size) { 73 | // for (int stride = 1; stride <= kernel_size; ++stride) { 74 | // for (int batch_size = 1; batch_size <= 4; ++batch_size) { 75 | // cout<<"---------------------------"<(conv); 73 | conv_ptr->set_kernel(&test_kernel[in_c][out_c][0][0], in_c, out_c); 74 | ``` 75 | 2. 验证梯度: 76 | 77 | ```c++ 78 | if (!is_close(actual, expected)) { 79 | std::cout << "Gradient mismatch at position [" << c << "][" << i << "][" << j << "]" << std::endl; 80 | } 81 | ``` 82 | ## 4. 注意事项 83 | 84 | 1. 内存管理: 85 | - 确保正确释放所有分配的内存 86 | - 使用完Matrix对象后要调用free_mat 87 | 88 | 2. 精度控制: 89 | - 使用 is_close 函数比较浮点数 90 | - 可以通过调整 rtol 和 atol 参数控制精度要求 91 | 92 | -------------------------------------------------------------------------------- /vector_add: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenZYJ/Edge-Computing-Engine/896c7ebe695e4a94cc5c120956d3256a821f516a/vector_add -------------------------------------------------------------------------------- /welcome/1.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | using namespace std; 6 | int main() 7 | { 8 | fstream input_stream; 9 | input_stream.open(".txt",ios::in); 10 | 11 | char input[1000000]; 12 | int cnt=0; 13 | if(input_stream.is_open()){ 14 | //cout<<"yes\n"; 15 | while(!input_stream.eof()){ 16 | input_stream.getline(input,999); 17 | cout< 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #define random(x) (rand()%x) 20 | using namespace std; 21 | int welcome(){ 22 | fstream input_stream; 23 | const char* candidates[] = {"./welcome/2.txt","./welcome/3.txt","./welcome/big.txt"}; 24 | srand((unsigned int)time(NULL)); 25 | input_stream.open(candidates[rand()%3],ios::in); 26 | char input[1000000]; 27 | int cnt=0; 28 | if(input_stream.is_open()){ 29 | while(!input_stream.eof()){ 30 | input_stream.getline(input,999); 31 | cout<