├── CMakeLists.txt
├── LICENSE
├── README.md
├── documents
    ├── Layer类和Blob的设计.md
    ├── Mat类的设计.md
    ├── Net类的设计.md
    ├── optional类与benckmark.md
    ├── 内存分配.md
    └── 测试框架.md
├── example
    ├── CMakeLists.txt
    ├── res18.cpp
    └── yolov5s.cpp
├── images
    ├── bus.jpg
    ├── dog.jpg
    ├── logo.png
    └── 内存.png
├── src
    ├── CMakeLists.txt
    ├── allocator.h
    ├── benchmark.cpp
    ├── benchmark.h
    ├── blob.cpp
    ├── blob.h
    ├── ir.cpp
    ├── ir.h
    ├── layer.cpp
    ├── layer.h
    ├── layers
    │   ├── cuda
    │   │   ├── cuda_gemm.cu
    │   │   ├── cuda_gemm.h
    │   │   ├── cuda_linear.cu
    │   │   ├── cuda_linear.h
    │   │   ├── cuda_relu.cu
    │   │   ├── cuda_relu.h
    │   │   ├── cuda_silu.cu
    │   │   └── cuda_silu.h
    │   └── cxx
    │   │   ├── adaptiveavgpool.cpp
    │   │   ├── adaptiveavgpool.h
    │   │   ├── cat.cpp
    │   │   ├── cat.h
    │   │   ├── contiguous.cpp
    │   │   ├── contiguous.h
    │   │   ├── convolution.cpp
    │   │   ├── convolution.h
    │   │   ├── expression.cpp
    │   │   ├── expression.h
    │   │   ├── flatten.cpp
    │   │   ├── flatten.h
    │   │   ├── input.cpp
    │   │   ├── input.h
    │   │   ├── linear.cpp
    │   │   ├── linear.h
    │   │   ├── maxpool.cpp
    │   │   ├── maxpool.h
    │   │   ├── output.cpp
    │   │   ├── output.h
    │   │   ├── permute.cpp
    │   │   ├── permute.h
    │   │   ├── relu.cpp
    │   │   ├── relu.h
    │   │   ├── silu.cpp
    │   │   ├── silu.h
    │   │   ├── upsample.cpp
    │   │   ├── upsample.h
    │   │   ├── view.cpp
    │   │   └── view.h
    ├── mat.cpp
    ├── mat.h
    ├── net.cpp
    ├── net.h
    ├── nncuda.cu
    ├── nncuda.h
    ├── optional.cpp
    ├── optional.h
    ├── register_layers.h
    ├── storezip.cpp
    └── storezip.h
└── test
    ├── CMakeLists.txt
    ├── test.cpp
    ├── test_adaptiveavgpool.h
    ├── test_cat.h
    ├── test_conv.h
    ├── test_cuda_silu.h
    ├── test_expression.h
    ├── test_flatten.h
    ├── test_fun.h
    ├── test_layer.h
    ├── test_linear.h
    ├── test_mat.h
    ├── test_maxpool2d.h
    ├── test_permute.h
    ├── test_relu.h
    ├── test_silu.h
    ├── test_ulti.h
    ├── test_upsample.h
    └── test_view.h


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | project(EasyNN)
 3 | 
 4 | 
 5 | #set(CMAKE_BUILD_TYPE Debug)
 6 | set(CMAKE_CXX_STANDARD 11)  # 将 C++ 标准设置为 C++ 11
 7 | set(CMAKE_CXX_STANDARD_REQUIRED ON)  # C++ 11 是强制要求，不会衰退至低版本
 8 | set(CMAKE_CXX_EXTENSIONS OFF)  # 禁止使用编译器特有扩展
 9 | 
10 | 
11 | 
12 | option(EASTNN_USE_CUDA "enable cuda " OFF)
13 | 
14 | if(EASTNN_USE_CUDA)
15 |     add_definitions(-DEASTNN_USE_CUDA=1)  
16 |     enable_language(CUDA)
17 | endif()
18 | 
19 | file(GLOB SRC src/*.cpp)
20 | file(GLOB CUDA_SRC src/*.cu src/layers/cuda/*.cu)
21 | file(GLOB LAYERS_SRC src/layers/cxx/*.cpp)
22 | 
23 | set(INCLUDE ${CMAKE_SOURCE_DIR}/src)
24 | set(TEST ${CMAKE_SOURCE_DIR}/test)
25 | set(EXAMPLE ${CMAKE_SOURCE_DIR}/example)
26 | 
27 | add_subdirectory(src)
28 | add_subdirectory(test)
29 | add_subdirectory(example)


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <img src="./images/logo.png" />
  2 | 
  3 | EasyNN是一个面向教学而研发的推理框架，旨在帮助大家在最短时间内写出一个支持ResNet和YOLOv5等模型的深度学习推理框架。**简单**是EasyNN最大的特点，只需要掌握C++基本语法和神经网络基础知识，你就可以在15天内，写出一个属于自己的网络推理框架。
  4 | 
  5 | # 特性
  6 | 
  7 | - **无第三方库依赖**：EasyNN内含简单的测试框架，并实现了一个简单的Mat数据类。
  8 | - **使用PNNX作为模型中间结构**：EasyNN采用[PNNX](https://github.com/Tencent/ncnn/tree/master/tools/pnnx)作为模型的中间结构，大大提高了开发效率。
  9 | - **OpenMP多线程加速**：EasyNN利用OpenMP技术实现了多线程加速，以提高推理速度。
 10 | - **CUDA加速**：EasyNN部分算子支持CUDA加速,卷积算子支持im2col+CUDA GEMM加速
 11 | - **简洁易读的代码**：EasyNN的代码仅采用部分C++11特性编写，代码简洁易读。
 12 | - **完善的开发文档和教程**：在开发文档中详细介绍了每个类和函数的作用，并在B站配备视频讲解，带各位敲每一行代码。
 13 | 
 14 | # 你可以学到的内容
 15 | 
 16 | - **C++语法和相关概念**：可以熟悉C++的基本语法和常用特性，如类的定义、成员函数的实现、继承、虚函数的使用等。
 17 | - **设计模式和编程范式**：EasyNN的开发过程中涉及到一些常见的设计模式，例如工厂模式和引用计数法。
 18 | - **框架开发全流程**：通过学习EasyNN的开发，可以了解一个推理框架的完整开发流程。从框架设计、代码实现到单元测试和调试。
 19 | - **常见算子的实现方法**：常见的卷积算子(Conv2d)、池化算子(MaxPool2d)等的实现。
 20 | - **CUDA高性能编程**：CUDA编程模型、GEMM、向量化读取、共享内存.
 21 | 
 22 | # 支持的PNNX算子
 23 | ```c++
 24 | register_layer(Input);              //no need
 25 | register_layer(Output);             //no need
 26 | register_layer(Contiguous);         //no need
 27 | 
 28 | register_layer(Relu);               //cuda accelerate
 29 | register_layer(Silu);               //cuda accelerate
 30 | 
 31 | register_layer(Convolution);        //im2col+sgemm cuda accelerate
 32 | register_layer(AdaptivePool);
 33 | register_layer(MaxPool);
 34 | register_layer(Upsample);
 35 | 
 36 | register_layer(Linear);             //cuda accelerate
 37 | register_layer(Expression);
 38 | register_layer(Flatten);
 39 | register_layer(View);
 40 | register_layer(Cat);
 41 | register_layer(Permute);
 42 | ```
 43 |    
 44 | 
 45 | # 编译与运行
 46 | 
 47 | 第一步：下载并编译代码
 48 | 
 49 | ```shell
 50 | git clone https://github.com/HuPengsheet/EasyNN.git
 51 | cd EasyNN
 52 | mkdir build && cd build
 53 | cmake ..
 54 | make -j4
 55 | ```
 56 | 
 57 | 第二步：下载对应的模型权重
 58 | 
 59 | ```shell
 60 | 方法一：使用wget从github上下载
 61 | cd ../example
 62 | wget https://github.com/HuPengsheet/EasyNN/releases/download/EasyNN1.0-model-file/model.tar.xz
 63 | tar -xf model.tar.xz
 64 | 
 65 | 方法二：通过百度云下载，把下载好的文件解压到，项目目录下的example下
 66 | 链接: https://pan.baidu.com/s/1RgbSGVNSfYZZtos6Y4Bedw 提取码: h9u6 
 67 | ```
 68 | 
 69 | 第三步：运行res18和yolov5推理代码，可以看到对应的推理结果
 70 | 
 71 | ```shell
 72 | #进入到build目前下
 73 | 
 74 | #运行res18的代码
 75 | ./example/res18
 76 | 
 77 | #运行yolov5s的代码
 78 | ./example/yolov5s
 79 | ```
 80 | 
 81 | 第四部（可选）：运行单元测试
 82 | 
 83 | ```shell
 84 | ./test/run_test
 85 | ```
 86 | 
 87 | # 开发文档
 88 | - [测试框架](https://github.com/HuPengsheet/EasyNN/blob/main/documents/%E6%B5%8B%E8%AF%95%E6%A1%86%E6%9E%B6.md)
 89 | - [内存分配](https://github.com/HuPengsheet/EasyNN/blob/main/documents/%E5%86%85%E5%AD%98%E5%88%86%E9%85%8D.md)
 90 | - [Mat类设计](https://github.com/HuPengsheet/EasyNN/blob/main/documents/Mat%E7%B1%BB%E7%9A%84%E8%AE%BE%E8%AE%A1.md)
 91 | - [Layer类和Blob的设计](https://github.com/HuPengsheet/EasyNN/blob/main/documents/Layer%E7%B1%BB%E5%92%8CBlob%E7%9A%84%E8%AE%BE%E8%AE%A1.md)
 92 | - [optional类与benckmark](https://github.com/HuPengsheet/EasyNN/blob/main/documents/optional%E7%B1%BB%E4%B8%8Ebenckmark.md)
 93 | - [Net类设计](https://github.com/HuPengsheet/EasyNN/blob/main/documents/Net%E7%B1%BB%E7%9A%84%E8%AE%BE%E8%AE%A1.md)
 94 | - 未完待续
 95 | 
 96 | # 致谢
 97 | 
 98 | 本项目中很大一部分代码参考了优秀的推理框架[ncnn](https://github.com/Tencent/ncnn)
 99 | 
100 | 其中example下的部分代码借鉴了[KuiperInfer](https://github.com/zjhellofss/KuiperInfer)
101 | 


--------------------------------------------------------------------------------
/documents/Layer类和Blob的设计.md:
--------------------------------------------------------------------------------
 1 | # Layer类和Blob的设计
 2 | 
 3 | # Layer类
 4 | 
 5 | ```c++
 6 | class Layer{
 7 | 
 8 | public:
 9 |     Layer();
10 |     virtual ~Layer();
11 |     virtual int loadParam(std::map<std::string, pnnx::Parameter>& params);
12 |     virtual int loadBin(std::map<std::string, pnnx::Attribute>& attrs);
13 |     virtual int forward(const Mat& input,Mat& output,const Optional& op);
14 |     virtual int forward(const std::vector<Mat>& input,std::vector<Mat>& output,const Optional& op);  
15 | 
16 | public:
17 | 
18 |     bool one_blob_only;  
19 | 
20 | public:
21 | 
22 |     std::string type;  
23 |     std::string name;   
24 | 
25 |     std::vector<int> bottoms;   
26 |     std::vector<int> tops;      
27 | 
28 | };
29 | ```
30 | 
31 | ​	Layer类表示的是一个算子，Layer是一个虚类，当我们要具体实现某一个算子的时候，我们只要继承自Layer，然后重写相关的函数即可。
32 | 
33 | ### 属性值
34 | 
35 | ```c++
36 | public:
37 | 
38 |     bool one_blob_only;   //这个算子是否单输入输出
39 | 
40 | public:
41 | 
42 |     std::string type;   //算子的类型
43 |     std::string name;   //算子的名字
44 | 
45 |     std::vector<int> bottoms;    //算子的输入blob序号vector
46 |     std::vector<int> tops;       //算子的输出blob序号vector
47 | ```
48 | 
49 | ### 方法
50 | 
51 | ```c++
52 | Layer();
53 | virtual ~Layer();
54 | virtual int loadParam(std::map<std::string, pnnx::Parameter>& params);  //加载模型的参数
55 | virtual int loadBin(std::map<std::string, pnnx::Attribute>& attrs);     //加载模型的权重
56 | virtual int forward(const Mat& input,Mat& output,const Optional& op);	//模型推理，对应one_blob_only=true
57 | virtual int forward(const std::vector<Mat>& input,std::vector<Mat>& output,const Optional& op);  //模型推理，对应one_blob_only=flase
58 | ```
59 | 
60 | ## Blob类
61 | 
62 | ```c++
63 | class  Blob
64 | {
65 | public:
66 |     // empty
67 |     Blob();
68 | 
69 | public:
70 | 
71 |     int producer;
72 |     int consumer;
73 |     Mat shape;
74 | };
75 | ```
76 | 
77 | ​	Blob类，记录的是算子的输入输出，producer表示生产该blob算子的序号，consumer使用该blob算子的序号，shape表示尺寸。
78 | 
79 | 


--------------------------------------------------------------------------------
/documents/Net类的设计.md:
--------------------------------------------------------------------------------
  1 | # Net类的设计
  2 | 
  3 | ```c++
  4 | class Net
  5 | {
  6 | public:
  7 |     Net();
  8 |     ~Net();
  9 |     void printLayer() const;
 10 |     int loadModel(const char * param_path,const char * bin_path);
 11 |     int extractBlob(const size_t num,Mat& output);
 12 |     int forwarLayer(int layer_index);
 13 |     int input(int index,const Mat& input);
 14 |     int clear();
 15 | 
 16 |     std::vector<Blob> blobs;
 17 |     std::vector<Mat> blob_mats;
 18 |     std::vector<Layer* > layers;
 19 | 
 20 |     size_t layer_num;
 21 |     size_t blob_num;
 22 |     Optional op;
 23 | private:
 24 |     pnnx::Graph* graph;
 25 | };
 26 | ```
 27 | 
 28 | ​	Net类是对pnnx类的解析和重封装，EasyNN采用的是pnnx作为模型的中间结构，有关pnnx的内容大家请参考[pnnx](https://github.com/Tencent/ncnn/tree/master/tools/pnnx)官网的介绍。
 29 | 
 30 | ## 属性值
 31 | 
 32 | ```c++
 33 | std::vector<Blob> blobs;
 34 | std::vector<Mat> blob_mats;
 35 | std::vector<Layer* > layers;
 36 | 
 37 | size_t layer_num;
 38 | size_t blob_num;
 39 | Optional op;
 40 | ```
 41 | 
 42 | ​	`blobs`记录的是模型文件里所有的blob，`blob_mats`存储的是blob对应的Mat数据，`layers`是模型所有算子的集合，`layer_num`和`blob_num`就是对应的算子个数和blob个数，`op`用来控制算子推理时的一些选项。
 43 | 
 44 | ## 方法
 45 | 
 46 | ### loadModel
 47 | 
 48 | ```c++
 49 | int Net::loadModel(const char * param_path,const char * bin_path)
 50 | {   
 51 |     int re =-1;
 52 |     re = graph->load(param_path,bin_path);  //使用PNNX中的方法，先将模型的参数和权重进行加载
 53 |     if(re==0)
 54 |     {
 55 |         layer_num = graph->ops.size();
 56 |         blob_num = graph->operands.size();
 57 |         blobs.resize(blob_num); 
 58 |         blob_mats.resize(blob_num); 
 59 |         layers.resize(layer_num);
 60 | 
 61 |         //遍历算子集合
 62 |         for(int i=0;i<layer_num;i++)
 63 |         {
 64 |             pnnx::Operator* op = graph->ops[i]; 
 65 |             std::string layer_type = extractLayer(op->type);   //提取算子的名字
 66 |             layer_factory factory = 0;
 67 |             for(auto l:layes_factory)
 68 |             {
 69 |                 if(layer_type==l.first) factory=l.second;   //根据算子的名字，查找出对应的算子工厂
 70 |             }
 71 |             if(!factory)
 72 |             {
 73 |                 printf("%s is not supportl\n",layer_type.c_str());   //如果没有这个算子，则报错退出
 74 |                 re=-1;
 75 |                 break;
 76 |             }
 77 |             Layer* layer = factory();   //使用算子工厂，实例化算子
 78 |             
 79 |             layer->name = op->name;     //初始化算子名字
 80 |             layer->type = layer_type;	//初始化算子类型
 81 | 
 82 |             //构建计算关系，每个layer的输入输出blob是哪个，每个blob是哪个layer产生，是哪个layer使用
 83 |             for(auto input:op->inputs)
 84 |             {
 85 |                 int blob_index = std::stoi(input->name);
 86 |                 layer->bottoms.push_back(blob_index);
 87 |                 Blob& blob = blobs[blob_index];
 88 |                 blob.consumer = i;
 89 |             }
 90 |             for(auto output:op->outputs)
 91 |             {
 92 |                 int blob_index = std::stoi(output->name);
 93 |                 layer->tops.push_back(blob_index);
 94 |                 Blob& blob = blobs[blob_index];
 95 |                 blob.producer = i;
 96 |             }
 97 | 
 98 |             layer->loadParam(op->params);  //加载算子的参数
 99 |             layer->loadBin(op->attrs);	   //加载算子的权重
100 |             layers[i]= layer;
101 |         }
102 |         delete graph;    //加载完成后，释放PNNX中的图
103 |     }
104 |     else
105 |     {
106 |         printf("load %s  %s fail\n",param_path,bin_path);
107 |         return re;
108 |     }
109 |     return re;
110 | }
111 | ```
112 | 
113 | ​	总体思路就是，把先把模型的参数和权重加载到pnnx::graph里面去，然后对pnnx::graph提取出我们自己想要的信息，去初始化EasyNN里面，我们用的到的信息。
114 | 
115 | ### printLayer
116 | 
117 | ```c++
118 | void Net::printLayer() const
119 | {
120 |     for(auto layer:graph->ops)
121 |     {
122 |         printf("%s \n",layer->name.c_str());
123 |     }
124 | }
125 | ```
126 | 
127 | ​	遍历所有的算子，并打印算子的名字。
128 | 
129 | ### input
130 | 
131 | ```
132 | int Net::input(int index,const Mat& input)
133 | {
134 |     blob_mats[index]=input;
135 |     return 0;
136 | }
137 | ```
138 | 
139 | ​	把数据放到指定位置的blob_mats中，一般情况下就是用来放整个模型的输出的
140 | 
141 | ### extractBlob
142 | 
143 | ```c++
144 | int Net::extractBlob(const size_t num,Mat& output) 
145 | {
146 |     Blob& blob = blobs[num];
147 |     if(num>blob_num-1 || num<0)
148 |     {
149 |         printf("the %ld blob is not exist ,please check out\n",num);
150 |         return -1;
151 |     }
152 |         
153 |     if(blob_mats[num].isEmpty())
154 |         forwarLayer(blob.producer);
155 |     
156 |     output = blob_mats[num];
157 |     return 0;
158 | }
159 | ```
160 | 
161 | ​	提取出模型某个blob的数据，一般也就是我们要的整个模型的输出。如果它为空，则表示产生它的算子没有被执行，因此需要调用forwarLayer执行这个算子，才能获得我们想要的数据。
162 | 
163 | ### forwarLayer
164 | 
165 | ```c++
166 | 
167 | int Net::forwarLayer(int layer_index)
168 | {   
169 | 
170 |     if(layer_index>layer_num-1 || layer_index<0)
171 |     {
172 |         printf("do not have this layer ,layer num is %d",layer_index);
173 |         return -1;
174 |     }
175 |         
176 |     Layer* layer = layers[layer_index];
177 |     for(auto input:layer->bottoms)
178 |     {
179 |         if(blob_mats[input].isEmpty())
180 |             forwarLayer(blobs[input].producer);   //递归调用，直到找到某个layer的输入blob已经存在，说明此时可以forwarLayer
181 |     }
182 |     if(layer->one_blob_only)
183 |     {
184 |         int bottom_blob_index = layer->bottoms[0];
185 |         int top_blob_index = layer->tops[0];
186 |         int re = layer->forward(blob_mats[bottom_blob_index],blob_mats[top_blob_index],op);
187 |         if(re!=0)
188 |         {
189 |             printf("%s forward fail",layer->name.c_str());
190 |             return -1;
191 |         }
192 |     }
193 |     else
194 |     {
195 |         std::vector<Mat> input_mats(layer->bottoms.size());
196 |         std::vector<Mat> output_mats(layer->tops.size());
197 | 
198 |         for(int i=0;i<layer->bottoms.size();i++)
199 |         {
200 |             input_mats[i] = blob_mats[layer->bottoms[i]];
201 |         }
202 | 
203 |         int re = layer->forward(input_mats,output_mats,op);
204 | 
205 |         if(re!=0)
206 |         {
207 |             printf("%s forward fail",layer->name.c_str());
208 |             return -1;
209 |         }
210 | 
211 |         for(int i=0;i<layer->tops.size();i++)
212 |         {
213 |             blob_mats[layer->tops[i]]=output_mats[i];
214 |         }       
215 | 
216 |     }
217 |     return 0;
218 | }
219 | ```
220 | 
221 | ​	`forwarLayer`是一个递归函数，我们要执行一个算子，那这个算子的输入blob_mat必须要存在，如果不存在则需要递归去执行产生这个blob的算子。当某一个算子的输入blob已经存在时，也就是我们手工用input放置在blob_mats中的数据，此时达到递归终止条件，开始一个一个执行算子。根据算子的one_blob_only参数，分别调用不同的`layer->forward`函数。
222 | 
223 | 


--------------------------------------------------------------------------------
/documents/optional类与benckmark.md:
--------------------------------------------------------------------------------
 1 | ## Optional类
 2 | 
 3 | ```c++
 4 | class Optional
 5 | {
 6 | public:
 7 |     Optional();
 8 |     int num_thread;
 9 | };
10 | ```
11 | 
12 | ​	只有一个参数，`num_thread`表示openmp的多线程个数，默认值为8。
13 | 
14 | ## benckmark
15 | 
16 | ```c++
17 | double get_current_time()
18 | {
19 |     auto now = std::chrono::high_resolution_clock::now();
20 |     auto usec = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch());
21 |     return usec.count() / 1000.0;
22 | }
23 | ```
24 | 
25 | ​	返回当前的时间，用于计时。


--------------------------------------------------------------------------------
/documents/内存分配.md:
--------------------------------------------------------------------------------
 1 | 相关代码在src/allocator.h里面
 2 | 
 3 | ```c++
 4 | #ifndef EASYNN_ALLOCATOR_H
 5 | #define EASYNN_ALLOCATOR_H
 6 | 
 7 | #include <stdlib.h>
 8 | 
 9 | #define EASYNN_MALLOC_ALIGN 64
10 | #define EASYNN_MALLOC_OVERREAD 64
11 | 
12 | static  size_t alignSize(size_t sz, int n)
13 | {
14 |     return (sz + n - 1) & -n;
15 | }
16 | 
17 | static void* fastMalloc(size_t size)
18 | {   
19 |     void * ptr = 0;
20 |     if (posix_memalign(&ptr, EASYNN_MALLOC_ALIGN, size + EASYNN_MALLOC_OVERREAD))
21 |         ptr = 0;
22 |     return ptr;
23 | }
24 | 
25 | static  void fastFree(void* ptr)
26 | {
27 |     if (ptr)
28 |     {
29 |         free(ptr);
30 |     }
31 | }
32 | 
33 | #endif
34 | ```
35 | 
36 | ​	`fastMalloc`实际上时封装`posix_memalign`返回对齐64字节内存的地址，同时会额外对分配`EASYNN_MALLOC_OVERREAD`，也就是64个字节。`alignSize`是一的功能是将给定的`size_t`类型的`sz`值对齐到最近的`n`的倍数。


--------------------------------------------------------------------------------
/documents/测试框架.md:
--------------------------------------------------------------------------------
  1 | 为了不额外的第三方库，EasyNN内部实现了一个简单的单元测试框架，用来实现单元测试的功能。
  2 | 
  3 | ## 单元测试介绍
  4 | 
  5 | 单元测试是一种软件测试方法，主要关注软件中的最小可测试单元，通常是单个函数、方法或类。单元测试的作用主要包括：
  6 | 
  7 | 1. 提升代码质量：通过在编写代码的过程中发现并修复错误，有助于提高代码的质量和减少bug的数量。
  8 | 2. 提升反馈速度：可以快速地提供反馈，让开发人员知道他们的代码是否按照预期工作，从而减少重复工作，提高开发效率。
  9 | 3. 保护代码：当对代码进行修改或添加新功能时，单元测试可以帮助确保这些更改没有破坏现有的功能。
 10 | 4. 简化代码维护：作为代码的一种文档，单元测试可以帮助开发人员理解代码的功能和工作方式，从而使代码的维护变得更容易。
 11 | 5. 改进代码设计：因为单元测试强制开发人员编写可测试的代码，这通常也意味着代码的设计更好。
 12 | 
 13 | ## 单元测试的使用
 14 | 
 15 | ```c++
 16 | TEST(Mat, refcount)
 17 | {   
 18 |     {
 19 |         easynn::Mat m1(10,10);
 20 |         EXPECT_EQ(*m1.refcount, 1);
 21 |         easynn::Mat m2(m1);
 22 |         easynn::Mat m3=m1;
 23 |         EXPECT_EQ(*m2.refcount, 3);
 24 |         EXPECT_EQ(*m3.refcount, 3);
 25 |     }
 26 | 
 27 |     easynn::Mat m1(10,10);
 28 |     {   
 29 |         EXPECT_EQ(*m1.refcount, 1);
 30 |         easynn::Mat m2(m1);
 31 |         easynn::Mat m3=m1;
 32 |         EXPECT_EQ(*m2.refcount, 3);
 33 |         EXPECT_EQ(*m3.refcount, 3);
 34 |     }
 35 |     EXPECT_EQ(*m1.refcount, 1);
 36 | 
 37 |     {
 38 |         
 39 |         easynn::Mat m2;
 40 |         easynn::Mat m3=m2;
 41 |         EXPECT_EQ((long)m2.refcount, 0);
 42 |         EXPECT_EQ((long)m3.refcount, 0);
 43 |     }
 44 | 
 45 | }
 46 | ```
 47 | 
 48 | ​	如上所示，就是一个单元测试的示例，`TEST(Mat, refcount)`，表示这是Mat类的单元测试，且测试的是引用计数的方法。
 49 | 
 50 | `EXPECT_EQ()`是一个宏，表示期望两个数值相等，如果不相等的话，则refcount这个单元测试会报错。
 51 | 
 52 | ## 代码解释
 53 | 
 54 | 相关的代码在test/test_ulti.h下
 55 | 
 56 | ```c++
 57 | #define QTEST_EXPECT(x, y, cond) \
 58 |     if (!((x)cond(y))) \
 59 |     { \
 60 |         printf("%s:%u: Failure\n", __FILE__, __LINE__); \
 61 |         if (strcmp(#cond, "==") == 0) \
 62 |         { \
 63 |             printf("Expected equality of these values:\n"); \
 64 |             printf("  %s\n", #x); \
 65 |             qtest_evaluate_if_required(#x, x); \
 66 |             printf("  %s\n", #y); \
 67 |             qtest_evaluate_if_required(#y, y); \
 68 |         } \
 69 |         else \
 70 |         { \
 71 |             printf("Expected: (%s) %s (%s), actual: %s vs %s\n", #x, #cond, #y, std::to_string(x).c_str(), std::to_string(y).c_str()); \
 72 |         } \
 73 |         *qtest_current_fail_cnt = *qtest_current_fail_cnt + 1; \
 74 |     }
 75 | 
 76 | #define EXPECT_EQ(x, y)  (x, y, ==)
 77 | #define EXPECT_NE(x, y) QTEST_EXPECT(x, y, !=)
 78 | #define EXPECT_LT(x, y) QTEST_EXPECT(x, y, <)
 79 | #define EXPECT_LE(x, y) QTEST_EXPECT(x, y, <=)
 80 | #define EXPECT_GT(x, y) QTEST_EXPECT(x, y, >)
 81 | #define EXPECT_GE(x, y) QTEST_EXPECT(x, y, >=)
 82 | 
 83 | #define EXPECT_TRUE(x) \
 84 |     if (!((x))) \
 85 |     { \
 86 |         printf("%s:%u: Failure\n", __FILE__, __LINE__); \
 87 |         printf("Value of: %s\n", #x); \
 88 |         printf("  Actual: false\n"); \
 89 |         printf("Expected: true\n"); \
 90 |         *qtest_current_fail_cnt = *qtest_current_fail_cnt + 1; \
 91 |     }
 92 | 
 93 | #define EXPECT_FALSE(x) \
 94 |     if (((x))) \
 95 |     { \
 96 |         printf("%s:%u: Failure\n", __FILE__, __LINE__); \
 97 |         printf("Value of: %s\n", #x); \
 98 |         printf("  Actual: true\n"); \
 99 |         printf("Expected: false\n"); \
100 |         *qtest_current_fail_cnt = *qtest_current_fail_cnt + 1; \
101 |     }
102 | 
103 | template<typename T>
104 | void qtest_evaluate_if_required(const char* str, T value)
105 | {
106 |     if (strcmp(str, std::to_string(value).c_str()) != 0)
107 |     {
108 |         std::cout << "    Which is: " << value << std::endl;
109 |     }
110 | }
111 | 
112 | #define ASSERT_EQ(x, y) \
113 |     if ((x)!=(y)) \
114 |     { \
115 |         printf("%s:%u: Failure\n", __FILE__, __LINE__); \
116 |         printf("Expected equality of these values:\n"); \
117 |         printf("  %s\n", #x); \
118 |         qtest_evaluate_if_required(#x, x); \
119 |         printf("  %s\n", #y); \
120 |         qtest_evaluate_if_required(#y, y); \
121 |         *qtest_current_fail_cnt = *qtest_current_fail_cnt + 1; \
122 |         return; \
123 |     }
124 | ```
125 | 
126 | ​	上面的代码是用来实现判断值想不想等的宏，例如EXPECT_EQ表示希望两个值相等，EXPECT_NE表示希望两个值不相等，如果输入的值与所期望的判断条件不等的话，那么就会相对应的输出错误的信息，同时记录下错误。
127 | 
128 | ```c++
129 | class TestEntity
130 | {
131 | private:
132 |     TestEntity() { };
133 |     ~TestEntity() { };
134 | 
135 | public:
136 |     std::string make_proper_str(size_t num, const std::string str, bool uppercase = false)
137 |     {
138 |         std::string res;
139 |         if (num > 1)
140 |         {
141 |             if (uppercase)
142 |                 res = std::to_string(num) + " " + str + "S";
143 |             else
144 |                 res = std::to_string(num) + " " + str + "s";
145 |         }
146 |         else
147 |         {
148 |             res = std::to_string(num) + " " + str;
149 |         }
150 |         return res;
151 |     }
152 | 
153 | public:
154 |     TestEntity(const TestEntity& other) = delete;
155 |     TestEntity operator=(const TestEntity& other) = delete;
156 |     
157 |     static TestEntity& get_instance()
158 |     {
159 |         static TestEntity entity;
160 |         return entity;
161 |     }
162 |     
163 |     int add(std::string test_set_name, std::string test_name, std::function<void(int*)> f, const char* fname)
164 |     {
165 |         TestItem item(f, fname);
166 |         test_sets[test_set_name].test_items.emplace_back(item);
167 |         return 0;
168 |     }
169 | 
170 |     int set_filter(std::string _filter)
171 |     {
172 |         filter = _filter;
173 |         return 0;
174 |     }
175 | 
176 |     int run_all_test_functions()
177 |     {
178 |         std::map<std::string, TestSet>::iterator it = test_sets.begin();
179 |         for (; it != test_sets.end(); it++)
180 |         {
181 |             std::string test_set_name = it->first;
182 |             TestSet& test_set = it->second;
183 |             std::vector<TestItem>& test_items = test_set.test_items;
184 | 
185 |             int cnt = 0;
186 |             for (int i = 0; i < test_items.size(); i++)
187 |             {
188 |                 const std::string fname = test_items[i].fname;
189 |                 if (filter.length() == 0 || (filter.length() > 0 && strmatch(fname, filter)))
190 |                 {
191 |                     cnt++;
192 |                 }
193 |             }
194 | 
195 |             if (cnt == 0) continue;
196 | 
197 |             matched_test_set_count++;
198 | 
199 |             const std::string test_item_str = make_proper_str(cnt, "test");
200 |             printf("%s[----------]%s %s from %s\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, test_item_str.c_str(), it->first.c_str());
201 |             for (int i = 0; i < test_items.size(); i++)
202 |             {
203 |                 auto f = test_items[i].f;
204 |                 std::string fname = test_items[i].fname;
205 |                 if (filter.length() > 0 && !strmatch(fname, filter))
206 |                 {
207 |                     continue;
208 |                 }
209 | 
210 |                 matched_test_case_count++;
211 | 
212 |                 int qtest_current_fail_cnt = 0;
213 |                 printf("%s[ RUN      ]%s %s\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, fname.c_str());
214 |                 f(&qtest_current_fail_cnt);
215 |                 if (qtest_current_fail_cnt == 0)
216 |                 {
217 |                     printf("%s[       OK ]%s %s (0 ms)\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, fname.c_str());
218 |                 }
219 |                 else
220 |                 {
221 |                     printf("%s[  FAILED  ]%s %s (0 ms)\n", QTEST_ESCAPE_COLOR_RED, QTEST_ESCAPE_COLOR_END, fname.c_str());
222 |                     qtest_fail_cnt++;
223 |                 }
224 |                 test_items[i].success = (qtest_current_fail_cnt == 0);
225 |             }
226 |             printf("%s[----------]%s %s from %s (0 ms total)\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, test_item_str.c_str(), it->first.c_str());
227 |             printf("\n");
228 |         }
229 | 
230 |         printf("%s[----------]%s Global test environment tear-down\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END);
231 |         std::string tests_str = make_proper_str(matched_test_case_count, "test");
232 |         std::string suite_str = make_proper_str(matched_test_set_count, "test suite");
233 |         printf("%s[==========]%s %s from %s ran. (0 ms total)\n",
234 |             QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END,
235 |             tests_str.c_str(),
236 |             suite_str.c_str()
237 |         );
238 | 
239 |         int passed_test_count = matched_test_case_count - qtest_fail_cnt;
240 |         std::string how_many_test_str = make_proper_str(passed_test_count, "test");
241 |         printf("%s[  PASSED  ]%s %s.\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, how_many_test_str.c_str());
242 | 
243 |         if (qtest_fail_cnt)
244 |         {
245 |             std::string failed_test_str = make_proper_str(qtest_fail_cnt, "test");
246 |             printf("%s[  FAILED  ]%s %s, listed below:\n", QTEST_ESCAPE_COLOR_RED, QTEST_ESCAPE_COLOR_END, failed_test_str.c_str());
247 | 
248 |             std::map<std::string, TestSet>::iterator it = test_sets.begin();
249 |             for (; it != test_sets.end(); it++)
250 |             {
251 |                 std::string test_set_name = it->first;
252 |                 TestSet test_set = it->second;
253 |                 std::vector<TestItem> test_items = test_set.test_items;
254 |                 for (int i = 0; i < test_items.size(); i++)
255 |                 {
256 |                     if (!test_items[i].success)
257 |                     {
258 |                         printf("%s[  FAILED  ]%s %s\n", QTEST_ESCAPE_COLOR_RED, QTEST_ESCAPE_COLOR_END, test_items[i].fname.c_str());
259 |                     }
260 |                 }
261 |             }
262 |         }
263 | 
264 |         if (qtest_fail_cnt > 0)
265 |         {
266 |             std::string failed_test_str = make_proper_str(qtest_fail_cnt, "FAILED TEST", true);
267 |             printf("\n %s\n", failed_test_str.c_str());
268 |         }
269 | 
270 |         return 0;
271 |     }
272 | 
273 | private:
274 |     // https://leetcode.cn/problems/wildcard-matching/solutions/315802/tong-pei-fu-pi-pei-by-leetcode-solution/
275 |     /// @param s string
276 |     /// @param p pattern
277 |     bool strmatch(std::string s, std::string p)
278 |     {
279 |         auto allStars = [](const std::string& str, int left, int right) {
280 |             for (int i = left; i < right; ++i) {
281 |                 if (str[i] != '*') {
282 |                     return false;
283 |                 }
284 |             }
285 |             return true;
286 |         };
287 |         auto charMatch = [](char u, char v)
288 |         {
289 |             return u == v || v == '?';
290 |         };
291 | 
292 |         while (s.size() && p.size() && p.back() != '*')
293 |         {
294 |             if (charMatch(s.back(), p.back())) {
295 |                 s.pop_back();
296 |                 p.pop_back();
297 |             }
298 |             else {
299 |                 return false;
300 |             }
301 |         }
302 |         if (p.empty()) {
303 |             return s.empty();
304 |         }
305 | 
306 |         int sIndex = 0;
307 |         int pIndex = 0;
308 |         int sRecord = -1;
309 |         int pRecord = -1;
310 |         while (sIndex < s.size() && pIndex < p.size()) {
311 |             if (p[pIndex] == '*') {
312 |                 ++pIndex;
313 |                 sRecord = sIndex;
314 |                 pRecord = pIndex;
315 |             }
316 |             else if (charMatch(s[sIndex], p[pIndex])) {
317 |                 ++sIndex;
318 |                 ++pIndex;
319 |             }
320 |             else if (sRecord != -1 && sRecord + 1 < s.size()) {
321 |                 ++sRecord;
322 |                 sIndex = sRecord;
323 |                 pIndex = pRecord;
324 |             }
325 |             else {
326 |                 return false;
327 |             }
328 |         }
329 |         return allStars(p, pIndex, p.size());
330 |     }
331 | 
332 | 
333 | public:
334 |     struct TestItem
335 |     {
336 |         std::function<void(int*)> f;
337 |         std::string fname;
338 |         bool success;
339 | 
340 |         TestItem(std::function<void(int*)> _f, std::string _fname):
341 |             f(_f), fname(_fname), success(true)
342 |         {}
343 |     };
344 | 
345 |     struct TestSet
346 |     {
347 |         std::vector<TestItem> test_items;
348 |     };
349 | 
350 |     std::map<std::string, TestSet> test_sets;
351 | 
352 | public:
353 |     int matched_test_case_count = 0;
354 |     int matched_test_set_count = 0;
355 | 
356 | private:
357 |     int qtest_fail_cnt = 0; // number of failures in one test set
358 |     std::string filter;
359 | };
360 | ```
361 | 
362 | ​	接下来就是TestEntity这个类的实现。总体上看，TestEntity是作为单例模式来使用，当我们在写单元测试时，如下
363 | 
364 | ```c++
365 | TEST(Mat, refcount)
366 | {
367 | }
368 | 
369 | #define TEST(set, name) \
370 |     void qtest_##set##_##name(int* qtest_current_fail_cnt); \
371 |     int qtest_mark_##set##_##name = TestEntity::get_instance().add(#set, #name, qtest_##set##_##name, #set "." #name); \
372 |     void qtest_##set##_##name(int* qtest_current_fail_cnt) \
373 | 
374 | ```
375 | 
376 | ​	Test是一个宏，宏展开后变成三行代码，第一行是把名字替换一下声明一个函数，第二行是调用一个函数`TestEntity::get_instance().add（）`函数
377 | 
378 | ```c++
379 | static TestEntity& get_instance()
380 | {
381 |     static TestEntity entity;
382 |     return entity;
383 | }
384 | 
385 | int add(std::string test_set_name, std::string test_name, std::function<void(int*)> f, const char* fname)
386 | {
387 |     TestItem item(f, fname);
388 |     test_sets[test_set_name].test_items.emplace_back(item);
389 |     return 0;
390 | }
391 | ```
392 | 
393 | ​	`get_instance`是返回返回类的实例，也是也就是单例模式，只有一个对象。add函数就是将我们的单元测试的指针保存起来。只要把这些单元测试的指针保存下来了，我们只要遍历并执行，做一些对应的处理，就可以了，相对应的函数是`run_all_test_functions()`。


--------------------------------------------------------------------------------
/example/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | find_package(OpenCV QUIET COMPONENTS core highgui imgproc)
 3 | 
 4 | link_directories(${CMAKE_SOURCE_DIR}/build/src/cxx)
 5 | 
 6 | file(GLOB  cpp_files ${EXAMPLE}/*.cpp)
 7 | foreach(cpp_file ${cpp_files})
 8 |     get_filename_component(FILE_NAME ${cpp_file} NAME)
 9 |     string(REPLACE ".cpp" "" FILE_NAME "${FILE_NAME}")   
10 |     add_executable(${FILE_NAME} ${cpp_file})
11 |     target_include_directories(${FILE_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS} ${INCLUDE})
12 |     target_link_libraries(${FILE_NAME} PRIVATE  ${OpenCV_LIBS} easynn)
13 | endforeach()


--------------------------------------------------------------------------------
/example/res18.cpp:
--------------------------------------------------------------------------------
  1 | #include"net.h"
  2 | #include"benchmark.h"
  3 | #include <opencv2/core/core.hpp>
  4 | #include <opencv2/highgui/highgui.hpp>
  5 | #include "opencv2/imgproc/imgproc.hpp"
  6 | 
  7 | 
  8 | 
  9 | 
 10 | static void normize(const easynn::Mat& m,std::vector<float> mean,std::vector<float> var)
 11 | {
 12 |     for (int q=0; q<m.c; q++)
 13 |     {
 14 |         float* ptr = m.channel(q);
 15 |         for (int z=0; z<m.d; z++)
 16 |         {
 17 |             for (int y=0; y<m.h; y++)
 18 |             {
 19 |                 for (int x=0; x<m.w; x++)
 20 |                 {
 21 |                     ptr[x] = (ptr[x]/255-mean[q])/var[q];
 22 |                     
 23 |                 }
 24 |                 ptr += m.w; 
 25 |             }
 26 |         }
 27 |     }
 28 | }
 29 | 
 30 | static int findMax(const easynn::Mat& m)
 31 | {
 32 |     int index=0;
 33 |     float max = m[0];
 34 |     for (int x=0; x<m.w; x++)
 35 |     {
 36 |         if(m[x]>max)
 37 |         {
 38 |             index=x;
 39 |             max = m[x];
 40 |         }
 41 |     }
 42 |     printf("max:%f\n",max);
 43 |     return index;
 44 | }
 45 | 
 46 | static void pretreatment(cv::Mat& input_image,easynn::Mat& output_image,int h,int w)
 47 | {
 48 |     cv::Mat resize_image;
 49 |     cv::resize(input_image, resize_image, cv::Size(224, 224));
 50 | 
 51 |     cv::Mat rgb_image;
 52 |     cv::cvtColor(resize_image, rgb_image, cv::COLOR_BGR2RGB);
 53 |     rgb_image.convertTo(rgb_image, CV_32FC3);
 54 |     std::vector<cv::Mat> split_images;
 55 |     cv::split(rgb_image, split_images);
 56 | 
 57 |     
 58 |     output_image.create(w,h,3);
 59 | 
 60 |     int index = 0;
 61 |     for (const auto& split_image : split_images)
 62 |     {
 63 |         memcpy((void*)output_image.channel(index), split_image.data, sizeof(float) * split_image.total());
 64 |         index += 1;
 65 |     }
 66 | } 
 67 | 
 68 | static void res18(const easynn::Mat& in,easynn::Mat& result)
 69 | {
 70 |     easynn::Net net;
 71 |     net.loadModel(\
 72 |     "../example/res18.pnnx.param",\
 73 |     "../example/res18.pnnx.bin");
 74 |     net.input(0,in);
 75 |     net.extractBlob(49,result);
 76 | }
 77 | 
 78 | int main()
 79 | {
 80 |     std::string image_path = "../images/dog.jpg";
 81 |     cv::Mat image = cv::imread(image_path, 1);
 82 |     if (image.empty())
 83 |     {
 84 |         fprintf(stderr, "cv::imread %s failed\n", image_path.c_str());
 85 |         return -1;
 86 |     }
 87 | 
 88 |     //cv::Mat to EasyNN Mat
 89 |     easynn::Mat in;
 90 |     pretreatment(image,in,224,224);
 91 | 
 92 |     //normize 
 93 |     std::vector<float> mean = {0.485f,0.456f,0.406f};
 94 |     std::vector<float> var = { 0.229f,0.224f,0.225f};
 95 |     normize(in,mean,var);
 96 | 
 97 |     // forward net
 98 |     easynn::Mat result;
 99 |     double start = easynn::get_current_time();
100 |     res18(in,result);
101 |     double end = easynn::get_current_time();
102 | 
103 |     printf("total time is %f ms\n",end-start);
104 |     
105 |     
106 |     //find Max score class
107 |     int cls = findMax(result);
108 |     printf("cls = %d\n",cls);
109 | 
110 |     return 0;
111 | }


--------------------------------------------------------------------------------
/example/yolov5s.cpp:
--------------------------------------------------------------------------------
  1 | #include"net.h"
  2 | #include"benchmark.h"
  3 | #include <opencv2/core/core.hpp>
  4 | #include <opencv2/highgui/highgui.hpp>
  5 | #include "opencv2/imgproc/imgproc.hpp"
  6 | 
  7 | struct Object
  8 | {
  9 |     cv::Rect_<float> rect;
 10 |     int label;
 11 |     float prob;
 12 | };
 13 | 
 14 | void normize(const easynn::Mat& m)
 15 | {
 16 |     for (int q=0; q<m.c; q++)
 17 |     {
 18 |         float* ptr = m.channel(q);
 19 |         for (int z=0; z<m.d; z++)
 20 |         {
 21 |             for (int y=0; y<m.h; y++)
 22 |             {
 23 |                 for (int x=0; x<m.w; x++)
 24 |                 {
 25 |                     ptr[x] = ptr[x]/255;
 26 |                     
 27 |                 }
 28 |                 ptr += m.w; 
 29 |             }
 30 |         }
 31 |     }
 32 | }
 33 | 
 34 | void pretreatment(cv::Mat& input_image,easynn::Mat& output_image,int h,int w)
 35 | {
 36 |     cv::Mat resize_image;
 37 |     cv::resize(input_image, resize_image, cv::Size(h, w));
 38 | 
 39 |     cv::Mat rgb_image;
 40 |     cv::cvtColor(resize_image, rgb_image, cv::COLOR_BGR2RGB);
 41 |     rgb_image.convertTo(rgb_image, CV_32FC3);
 42 |     std::vector<cv::Mat> split_images;
 43 |     cv::split(rgb_image, split_images);
 44 | 
 45 |     
 46 |     output_image.create(w,h,3);
 47 | 
 48 |     int index = 0;
 49 |     for (const auto& split_image : split_images)
 50 |     {
 51 |         memcpy((void*)output_image.channel(index), split_image.data, sizeof(float) * split_image.total());
 52 |         index += 1;
 53 |     }
 54 | } 
 55 | 
 56 | 
 57 | 
 58 | static inline float intersection_area(const Object& a, const Object& b)
 59 | {
 60 |     cv::Rect_<float> inter = a.rect & b.rect;
 61 |     return inter.area();
 62 | }
 63 | 
 64 | static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
 65 | {
 66 |     int i = left;
 67 |     int j = right;
 68 |     float p = faceobjects[(left + right) / 2].prob;
 69 | 
 70 |     while (i <= j)
 71 |     {
 72 |         while (faceobjects[i].prob > p)
 73 |             i++;
 74 | 
 75 |         while (faceobjects[j].prob < p)
 76 |             j--;
 77 | 
 78 |         if (i <= j)
 79 |         {
 80 |             // swap
 81 |             std::swap(faceobjects[i], faceobjects[j]);
 82 | 
 83 |             i++;
 84 |             j--;
 85 |         }
 86 |     }
 87 | 
 88 |     if (left < j) qsort_descent_inplace(faceobjects, left, j);
 89 |     if (i < right) qsort_descent_inplace(faceobjects, i, right);
 90 | 
 91 | }
 92 | 
 93 | static void qsort_descent_inplace(std::vector<Object>& faceobjects)
 94 | {
 95 |     if (faceobjects.empty())
 96 |         return;
 97 | 
 98 |     qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
 99 | }
100 | 
101 | static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
102 | {
103 |     picked.clear();
104 | 
105 |     const int n = faceobjects.size();
106 | 
107 |     std::vector<float> areas(n);
108 |     for (int i = 0; i < n; i++)
109 |     {
110 |         areas[i] = faceobjects[i].rect.area();
111 |     }
112 | 
113 |     for (int i = 0; i < n; i++)
114 |     {
115 |         const Object& a = faceobjects[i];
116 | 
117 |         int keep = 1;
118 |         for (int j = 0; j < (int)picked.size(); j++)
119 |         {
120 |             const Object& b = faceobjects[picked[j]];
121 | 
122 |             if (!agnostic && a.label != b.label)
123 |                 continue;
124 | 
125 |             // IOU计算
126 |             float inter_area = intersection_area(a, b);
127 |             float union_area = areas[i] + areas[picked[j]] - inter_area;
128 | 
129 |             // 去除里的很近的框
130 |             if (inter_area / union_area > nms_threshold)
131 |                 keep = 0;
132 |         }
133 | 
134 |         if (keep)
135 |             picked.push_back(i);
136 |     }
137 | }
138 | 
139 | static inline float sigmoid(float x)
140 | {
141 |     return static_cast<float>(1.f / (1.f + exp(-x)));
142 | }
143 | 
144 | static void generate_proposals(const easynn::Mat& anchors, int stride, const easynn::Mat& in_pad, const easynn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)
145 | {
146 |     
147 | 
148 |     int num_grid_x = feat_blob.d;
149 |     int num_grid_y = feat_blob.h;
150 | 
151 | 
152 |     const int num_class = feat_blob.w - 5;
153 | 
154 |     const int num_anchors = anchors.w / 2;
155 | 
156 |     for (int q = 0; q < num_anchors; q++)
157 |     {
158 |         const float anchor_w = anchors[q * 2];
159 |         const float anchor_h = anchors[q * 2 + 1];
160 | 
161 |         const easynn::Mat feat = feat_blob.channel(q);
162 | 
163 |         for (int i = 0; i < num_grid_y; i++)
164 |         {
165 |             for (int j = 0; j < num_grid_x; j++)
166 |             {
167 |                 //featptr指向的是最后一个维度w,这个维度是85个数据，对应的是coco的4个坐标+置信度+80个类别
168 |                 //即对应的是 dx dy dw dh confidence cls0 cls1 ... cls79 (4+1+80 = 85)
169 |                 const float* featptr = feat.depth(i).row(j);
170 |                 float box_confidence = sigmoid(featptr[4]);
171 |                 if (box_confidence >= prob_threshold)
172 |                 {
173 |                     // 遍历所有类别的分数，确定哪个类别分数最高，并记录下对应的分数
174 |                     int class_index = 0;
175 |                     float class_score = -FLT_MAX;
176 |                     for (int k = 0; k < num_class; k++)
177 |                     {
178 |                         float score = featptr[5 + k];
179 |                         if (score > class_score)
180 |                         {
181 |                             class_index = k;
182 |                             class_score = score;
183 |                         }
184 |                     }
185 |                     float confidence = box_confidence * sigmoid(class_score);
186 |                     if (confidence >= prob_threshold)
187 |                     {
188 |                         // 下面的转换对应的是yolov5/models/yolo.py里的代码 
189 |                         // y = x[i].sigmoid()
190 |                         // y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i]  # xy
191 |                         // y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
192 | 
193 |                         float dx = sigmoid(featptr[0]);
194 |                         float dy = sigmoid(featptr[1]);
195 |                         float dw = sigmoid(featptr[2]);
196 |                         float dh = sigmoid(featptr[3]);
197 | 
198 |                         float pb_cx = (dx * 2.f - 0.5f + j) * stride;
199 |                         float pb_cy = (dy * 2.f - 0.5f + i) * stride;
200 | 
201 |                         float pb_w = pow(dw * 2.f, 2) * anchor_w;
202 |                         float pb_h = pow(dh * 2.f, 2) * anchor_h;
203 | 
204 |                         float x0 = pb_cx - pb_w * 0.5f;
205 |                         float y0 = pb_cy - pb_h * 0.5f;
206 |                         float x1 = pb_cx + pb_w * 0.5f;
207 |                         float y1 = pb_cy + pb_h * 0.5f;
208 | 
209 |                         Object obj;
210 |                         obj.rect.x = x0;
211 |                         obj.rect.y = y0;
212 |                         obj.rect.width = x1 - x0;
213 |                         obj.rect.height = y1 - y0;
214 |                         obj.label = class_index;
215 |                         obj.prob = confidence;
216 | 
217 |                         objects.push_back(obj);
218 |                     }
219 |                 }
220 |             }
221 |         }
222 |     }    
223 | 
224 | }
225 | 
226 | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
227 | {
228 |     static const char* class_names[] = {
229 |         "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
230 |         "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
231 |         "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
232 |         "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
233 |         "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
234 |         "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
235 |         "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
236 |         "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
237 |         "hair drier", "toothbrush"
238 |     };
239 | 
240 |     cv::Mat image = bgr.clone();
241 | 
242 |     for (size_t i = 0; i < objects.size(); i++)
243 |     {
244 |         const Object& obj = objects[i];
245 | 
246 |         fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
247 |                 obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
248 | 
249 |         cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
250 | 
251 |         char text[256];
252 |         sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
253 | 
254 |         int baseLine = 0;
255 |         cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
256 | 
257 |         int x = obj.rect.x;
258 |         int y = obj.rect.y - label_size.height - baseLine;
259 |         if (y < 0)
260 |             y = 0;
261 |         if (x + label_size.width > image.cols)
262 |             x = image.cols - label_size.width;
263 | 
264 |         cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
265 |                       cv::Scalar(255, 255, 255), -1);
266 | 
267 |         cv::putText(image, text, cv::Point(x, y + label_size.height),
268 |                     cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
269 |     }
270 | 
271 |     cv::imshow("image", image);
272 |     cv::waitKey(0);
273 | }
274 | 
275 | static void yolo_detect(const easynn::Mat in,std::vector<Object>& proposals)
276 | {
277 |     easynn::Net net;
278 |     net.loadModel(\
279 |     "../example/yolov5s.torchscript.pnnx.param",\
280 |     "../example/yolov5s.torchscript.pnnx.bin"\
281 |     );
282 |     
283 |     net.input(0,in);
284 |     
285 |     const int target_size = 640;
286 |     const float prob_threshold = 0.25f;
287 |     
288 | 
289 |     
290 | 
291 |     {
292 |         easynn::Mat out;
293 |         net.extractBlob(150,out);
294 | 
295 |         easynn::Mat anchors(6);
296 |         anchors[0] = 10.f;
297 |         anchors[1] = 13.f;
298 |         anchors[2] = 16.f;
299 |         anchors[3] = 30.f;
300 |         anchors[4] = 33.f;
301 |         anchors[5] = 23.f;
302 | 
303 |         std::vector<Object> objects8;
304 |         generate_proposals(anchors, 8, in, out, prob_threshold, objects8);
305 | 
306 |         proposals.insert(proposals.end(), objects8.begin(), objects8.end());
307 |     }
308 | 
309 |     {
310 |         easynn::Mat out;
311 |         net.extractBlob(148,out);
312 |         easynn::Mat anchors(6);
313 |         anchors[0] = 30.f;
314 |         anchors[1] = 61.f;
315 |         anchors[2] = 62.f;
316 |         anchors[3] = 45.f;
317 |         anchors[4] = 59.f;
318 |         anchors[5] = 119.f;
319 | 
320 |         std::vector<Object> objects16;
321 |         generate_proposals(anchors, 16, in, out, prob_threshold, objects16);
322 | 
323 |         proposals.insert(proposals.end(), objects16.begin(), objects16.end());
324 |     }
325 | 
326 |     // stride 32
327 |     {
328 |         easynn::Mat out;
329 |         net.extractBlob(146,out);
330 |         easynn::Mat anchors(6);
331 |         anchors[0] = 116.f;
332 |         anchors[1] = 90.f;
333 |         anchors[2] = 156.f;
334 |         anchors[3] = 198.f;
335 |         anchors[4] = 373.f;
336 |         anchors[5] = 326.f;
337 | 
338 |         std::vector<Object> objects32;
339 |         generate_proposals(anchors, 32, in, out, prob_threshold, objects32);
340 | 
341 |         proposals.insert(proposals.end(), objects32.begin(), objects32.end());
342 |     }
343 | }
344 | 
345 | int main()
346 | {
347 |     std::string image_path = "../images/bus.jpg";
348 |     cv::Mat image = cv::imread(image_path, 1);
349 |     if (image.empty())
350 |     {
351 |         fprintf(stderr, "cv::imread %s failed\n", image_path.c_str());
352 |         return -1;
353 |     }
354 |     
355 |     // 计算缩放系数
356 |     float h = image.rows;
357 |     float w = image.cols;
358 |     float h_s = 640/h;
359 |     float w_s = 640/w;
360 |     
361 |     //cv::Mat to EasyNN Mat
362 |     easynn::Mat in;
363 |     pretreatment(image,in,640,640);
364 | 
365 |     //图片归一化
366 |     normize(in);
367 | 
368 |     //proposals里保存的是检测框
369 |     std::vector<Object> proposals;
370 |     double start = easynn::get_current_time();
371 |     yolo_detect(in,proposals);
372 |     double end = easynn::get_current_time();
373 | 
374 |     printf("total time is %f ms\n",end-start);
375 | 
376 |     //对所有的检测框，按置信度排序，方便NMS做处理
377 |     qsort_descent_inplace(proposals);
378 | 
379 | 
380 |     //picked里保存的是NMS后，保存下来的框的索引
381 |     std::vector<int> picked;
382 |     const float nms_threshold = 0.45f;
383 |     nms_sorted_bboxes(proposals, picked, nms_threshold);
384 | 
385 | 
386 | 
387 |     int count = picked.size();
388 | 
389 |     //objects保存的是NMS后的框，并除以缩放系数，返回到原始图像
390 |     std::vector<Object> objects;
391 |     objects.resize(count);
392 |     for (int i = 0; i < count; i++)
393 |     {
394 |         objects[i] = proposals[picked[i]];
395 | 
396 |         // adjust offset to original unpadded
397 |         float x0 = (objects[i].rect.x ) / w_s;
398 |         float y0 = (objects[i].rect.y ) / h_s;
399 |         float x1 = (objects[i].rect.x + objects[i].rect.width ) / w_s;
400 |         float y1 = (objects[i].rect.y + objects[i].rect.height) / h_s;
401 | 
402 |         objects[i].rect.x = x0;
403 |         objects[i].rect.y = y0;
404 |         objects[i].rect.width = x1 - x0;
405 |         objects[i].rect.height = y1 - y0;
406 |     }
407 | 
408 |      // 绘制检测框
409 |     draw_objects(image,objects);
410 |     return 0;
411 | }


--------------------------------------------------------------------------------
/images/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuPengsheet/EasyNN/9c77f157ada1f986648c86c8aef57e8b6ad80068/images/bus.jpg


--------------------------------------------------------------------------------
/images/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuPengsheet/EasyNN/9c77f157ada1f986648c86c8aef57e8b6ad80068/images/dog.jpg


--------------------------------------------------------------------------------
/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuPengsheet/EasyNN/9c77f157ada1f986648c86c8aef57e8b6ad80068/images/logo.png


--------------------------------------------------------------------------------
/images/内存.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuPengsheet/EasyNN/9c77f157ada1f986648c86c8aef57e8b6ad80068/images/内存.png


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | find_package(OpenMP)
3 | add_library(easynn STATIC ${SRC} ${LAYERS_SRC} ${CUDA_SRC})
4 | target_include_directories(easynn PUBLIC ${INCLUDE} )
5 | if(OpenMP_CXX_FOUND)
6 |     target_link_libraries(easynn OpenMP::OpenMP_CXX)
7 | endif()


--------------------------------------------------------------------------------
/src/allocator.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_ALLOCATOR_H
 2 | #define EASYNN_ALLOCATOR_H
 3 | 
 4 | #include <stdlib.h>
 5 | 
 6 | #define EASYNN_MALLOC_ALIGN 64
 7 | #define EASYNN_MALLOC_OVERREAD 64
 8 | 
 9 | static  size_t alignSize(size_t sz, int n)
10 | {
11 |     return (sz + n - 1) & -n;
12 | }
13 | 
14 | 
15 | static void* fastMalloc(size_t size)
16 | {   
17 |     void * ptr = 0;
18 |     if (posix_memalign(&ptr, EASYNN_MALLOC_ALIGN, size + EASYNN_MALLOC_OVERREAD))
19 |         ptr = 0;
20 |     return ptr;
21 | }
22 | 
23 | static  void fastFree(void* ptr)
24 | {
25 |     if (ptr)
26 |     {
27 |         free(ptr);
28 |     }
29 | }
30 | 
31 | #endif


--------------------------------------------------------------------------------
/src/benchmark.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <chrono>
 3 | #include <sys/time.h> 
 4 | #include <pthread.h>  
 5 | #include "benchmark.h"
 6 | namespace easynn{
 7 | 
 8 | double get_current_time()
 9 | {
10 |     auto now = std::chrono::high_resolution_clock::now();
11 |     auto usec = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch());
12 |     return usec.count() / 1000.0;
13 | }
14 | 
15 | }
16 | 
17 |    
18 | 


--------------------------------------------------------------------------------
/src/benchmark.h:
--------------------------------------------------------------------------------
1 | 
2 | namespace easynn{
3 | 
4 |     double get_current_time();
5 | 
6 | }
7 | 
8 | 


--------------------------------------------------------------------------------
/src/blob.cpp:
--------------------------------------------------------------------------------
 1 | #include "blob.h"
 2 | 
 3 | namespace easynn {
 4 | 
 5 | Blob::Blob()
 6 | {
 7 |     producer = -1;
 8 |     consumer = -1;
 9 |     shape = Mat();
10 | }
11 | 
12 | } // namespace easynn
13 | 


--------------------------------------------------------------------------------
/src/blob.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_BLOB_H
 2 | #define EASYNN_BLOB_H
 3 | 
 4 | #include "mat.h"
 5 | 
 6 | 
 7 | namespace easynn {
 8 | 
 9 | class  Blob
10 | {
11 | public:
12 |     // empty
13 |     Blob();
14 | 
15 | public:
16 | 
17 |     int producer;
18 |     int consumer;
19 |     Mat shape;
20 | };
21 | 
22 | } // namespace
23 | 
24 | #endif 


--------------------------------------------------------------------------------
/src/ir.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making ncnn available.
  2 | //
  3 | // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
  4 | //
  5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // https://opensource.org/licenses/BSD-3-Clause
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef PNNX_IR_H
 16 | #define PNNX_IR_H
 17 | 
 18 | #include <limits.h>
 19 | #include <complex>
 20 | #include <initializer_list>
 21 | #include <limits>
 22 | #include <map>
 23 | #include <set>
 24 | #include <string>
 25 | #include <vector>
 26 | 
 27 | 
 28 | 
 29 | namespace pnnx {
 30 | 
 31 | class Parameter
 32 | {
 33 | public:
 34 |     Parameter()
 35 |         : type(0)
 36 |     {
 37 |     }
 38 |     Parameter(bool _b)
 39 |         : type(1), b(_b)
 40 |     {
 41 |     }
 42 |     Parameter(int _i)
 43 |         : type(2), i(_i)
 44 |     {
 45 |     }
 46 |     Parameter(long _l)
 47 |         : type(2)
 48 |     {
 49 |         if (_l == std::numeric_limits<long>::max()) _l = INT_MAX;
 50 |         if (_l == std::numeric_limits<long>::min()) _l = INT_MIN;
 51 |         i = (int)_l;
 52 |     }
 53 |     Parameter(long long _l)
 54 |         : type(2)
 55 |     {
 56 |         if (_l == std::numeric_limits<long long>::max()) _l = INT_MAX;
 57 |         if (_l == std::numeric_limits<long long>::min()) _l = INT_MIN;
 58 |         i = (int)_l;
 59 |     }
 60 |     Parameter(float _f)
 61 |         : type(3), f(_f)
 62 |     {
 63 |     }
 64 |     Parameter(double _d)
 65 |         : type(3), f((float)_d)
 66 |     {
 67 |     }
 68 |     Parameter(const char* _s)
 69 |         : type(4), s(_s)
 70 |     {
 71 |     }
 72 |     Parameter(const std::string& _s)
 73 |         : type(4), s(_s)
 74 |     {
 75 |     }
 76 |     Parameter(const std::initializer_list<int>& _ai)
 77 |         : type(5), ai(_ai)
 78 |     {
 79 |     }
 80 |     Parameter(const std::initializer_list<int64_t>& _ai)
 81 |         : type(5)
 82 |     {
 83 |         for (const auto& x : _ai)
 84 |         {
 85 |             int64_t _l = x;
 86 |             if (_l == std::numeric_limits<int64_t>::max()) _l = INT_MAX;
 87 |             if (_l == std::numeric_limits<int64_t>::min()) _l = INT_MIN;
 88 |             ai.push_back((int)_l);
 89 |         }
 90 |     }
 91 |     Parameter(const std::vector<int>& _ai)
 92 |         : type(5), ai(_ai)
 93 |     {
 94 |     }
 95 |     Parameter(const std::initializer_list<float>& _af)
 96 |         : type(6), af(_af)
 97 |     {
 98 |     }
 99 |     Parameter(const std::initializer_list<double>& _af)
100 |         : type(6)
101 |     {
102 |         for (const auto& x : _af)
103 |             af.push_back((float)x);
104 |     }
105 |     Parameter(const std::vector<float>& _af)
106 |         : type(6), af(_af)
107 |     {
108 |     }
109 |     Parameter(const std::vector<double>& _af)
110 |         : type(6)
111 |     {
112 |         for (const auto& x : _af)
113 |             af.push_back((float)x);
114 |     }
115 |     Parameter(const std::initializer_list<const char*>& _as)
116 |         : type(7)
117 |     {
118 |         for (const auto& x : _as)
119 |             as.push_back(std::string(x));
120 |     }
121 |     Parameter(const std::initializer_list<std::string>& _as)
122 |         : type(7), as(_as)
123 |     {
124 |     }
125 |     Parameter(const std::vector<std::string>& _as)
126 |         : type(7), as(_as)
127 |     {
128 |     }
129 |     Parameter(const std::complex<float>& _c)
130 |         : type(10), c(_c)
131 |     {
132 |     }
133 |     Parameter(const std::complex<double>& _c)
134 |         : type(10), c(_c)
135 |     {
136 |     }
137 |     Parameter(const std::initializer_list<std::complex<float> >& _ac)
138 |         : type(11), ac(_ac)
139 |     {
140 |     }
141 |     Parameter(const std::initializer_list<std::complex<double> >& _ac)
142 |         : type(11)
143 |     {
144 |         for (const auto& x : _ac)
145 |             ac.push_back(std::complex<float>(x));
146 |     }
147 |     Parameter(const std::vector<std::complex<float> >& _ac)
148 |         : type(11), ac(_ac)
149 |     {
150 |     }
151 |     Parameter(const std::vector<std::complex<double> >& _ac)
152 |         : type(11)
153 |     {
154 |         for (const auto& x : _ac)
155 |             ac.push_back(std::complex<float>(x));
156 |     }
157 | 
158 | #if BUILD_PNNX
159 |     Parameter(const torch::jit::Node* value_node);
160 |     Parameter(const torch::jit::Value* value);
161 | #endif // BUILD_PNNX
162 | 
163 |     static Parameter parse_from_string(const std::string& value);
164 |     static std::string encode_to_string(const Parameter& param);
165 | 
166 |     // 0=null 1=b 2=i 3=f 4=s 5=ai 6=af 7=as 8=others 10=c 11=ac
167 |     int type;
168 | 
169 |     // value
170 |     bool b;
171 |     int i;
172 |     float f;
173 |     std::complex<float> c;
174 |     std::vector<int> ai;    //array int
175 |     std::vector<float> af;  //array float
176 |     std::vector<std::complex<float> > ac;   // array                                                                  complex
177 | 
178 |     // keep std::string typed member the last for cross cxxabi compatibility
179 |     std::string s;
180 |     std::vector<std::string> as;
181 | };
182 | 
183 | bool operator==(const Parameter& lhs, const Parameter& rhs);
184 | 
185 | class Attribute
186 | {
187 | public:
188 |     Attribute()
189 |         : type(0)
190 |     {
191 |     }
192 | 
193 | #if BUILD_PNNX
194 |     Attribute(const at::Tensor& t);
195 | #endif // BUILD_PNNX
196 | 
197 |     Attribute(const std::initializer_list<int>& shape, const std::vector<float>& t);
198 | 
199 |     size_t elemsize() const;
200 |     int elemcount() const;
201 | 
202 |     // convenient routines for manipulate fp32/fp16 weight
203 |     std::vector<float> get_float32_data() const;
204 |     void set_float32_data(const std::vector<float>& data);
205 | 
206 |     // 0=null 1=f32 2=f64 3=f16 4=i32 5=i64 6=i16 7=i8 8=u8 9=bool 10=c64 11=c128 12=c32
207 |     int type;
208 |     std::vector<int> shape;
209 | 
210 |     std::vector<char> data;
211 | 
212 |     std::map<std::string, Parameter> params;
213 | };
214 | 
215 | bool operator==(const Attribute& lhs, const Attribute& rhs);
216 | 
217 | // concat two attributes along the first axis
218 | Attribute operator+(const Attribute& a, const Attribute& b);
219 | 
220 | class Operator;
221 | class Operand
222 | {
223 | public:
224 |     void remove_consumer(const Operator* c);
225 | 
226 |     Operator* producer;
227 |     std::vector<Operator*> consumers;
228 | 
229 |     // 0=null 1=f32 2=f64 3=f16 4=i32 5=i64 6=i16 7=i8 8=u8 9=bool 10=c64 11=c128 12=c32
230 |     int type;
231 |     std::vector<int> shape;
232 | 
233 |     // keep std::string typed member the last for cross cxxabi compatibility
234 |     std::string name;
235 | 
236 |     std::map<std::string, Parameter> params;
237 | 
238 | private:
239 |     friend class Graph;
240 |     Operand()
241 |     {
242 |         type = 0;
243 |     }
244 | };
245 | 
246 | class Operator
247 | {
248 | public:
249 |     std::vector<Operand*> inputs;
250 |     std::vector<Operand*> outputs;
251 | 
252 |     // keep std::string typed member the last for cross cxxabi compatibility
253 |     std::string type;
254 |     std::string name;
255 | 
256 |     std::vector<std::string> inputnames;
257 |     std::map<std::string, Parameter> params;
258 |     std::map<std::string, Attribute> attrs;
259 | 
260 | private:
261 |     friend class Graph;
262 |     Operator()
263 |     {
264 |     }
265 | };
266 | 
267 | class Graph
268 | {
269 | public:
270 |     Graph();
271 |     ~Graph();
272 | 
273 |     int load(const std::string& parampath, const std::string& binpath);
274 |     int save(const std::string& parampath, const std::string& binpath);
275 | 
276 |     int python(const std::string& pypath, const std::string& binpath);
277 | 
278 |     int parse(const std::string& param);
279 | 
280 |     Operator* new_operator(const std::string& type, const std::string& name);
281 | 
282 |     Operator* new_operator_before(const std::string& type, const std::string& name, const Operator* cur);
283 | 
284 |     Operator* new_operator_after(const std::string& type, const std::string& name, const Operator* cur);
285 | 
286 | #if BUILD_PNNX
287 |     Operand* new_operand(const torch::jit::Value* v);
288 | #endif
289 | 
290 |     Operand* new_operand(const std::string& name);
291 | 
292 |     Operand* get_operand(const std::string& name);
293 |     const Operand* get_operand(const std::string& name) const;
294 | 
295 |     std::vector<Operator*> ops;
296 |     std::vector<Operand*> operands;
297 | 
298 | private:
299 |     Graph(const Graph& rhs);
300 |     Graph& operator=(const Graph& rhs);
301 | };
302 | 
303 | } // namespace pnnx
304 | 
305 | #endif // PNNX_IR_H
306 | 


--------------------------------------------------------------------------------
/src/layer.cpp:
--------------------------------------------------------------------------------
 1 | #include"layer.h"
 2 | 
 3 | namespace easynn{
 4 | 
 5 |     Layer::Layer()
 6 |     {
 7 |         return ;
 8 |     }
 9 |     Layer::~Layer()
10 |     {
11 |         
12 |     }
13 |     int Layer::loadParam(std::map<std::string, pnnx::Parameter>& params)
14 |     {
15 |         return 0;
16 |     }
17 |     int Layer::loadBin(std::map<std::string, pnnx::Attribute>& attrs)
18 |     {
19 |         return 0;
20 |     }
21 |     int Layer::forward(const Mat& input,Mat& output,const Optional& op)
22 |     {
23 |         return 0;
24 |     }
25 |     int Layer::forward(const std::vector<Mat>& input,std::vector<Mat>& output,const Optional& op)
26 |     {
27 |         return 0;
28 |     }
29 | 
30 | }//namespace


--------------------------------------------------------------------------------
/src/layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_LAYER_H
 2 | #define EASYNN_LAYER_H
 3 | 
 4 | #include<vector>
 5 | #include<string>
 6 | #include<map>
 7 | #include"mat.h"
 8 | #include"optional.h"
 9 | #include"ir.h"
10 | 
11 | namespace easynn{
12 | 
13 | class Layer{
14 | 
15 | public:
16 |     Layer();
17 |     virtual ~Layer();
18 |     virtual int loadParam(std::map<std::string, pnnx::Parameter>& params);
19 |     virtual int loadBin(std::map<std::string, pnnx::Attribute>& attrs);
20 |     virtual int forward(const Mat& input,Mat& output,const Optional& op);
21 |     virtual int forward(const std::vector<Mat>& input,std::vector<Mat>& output,const Optional& op);  
22 | 
23 | public:
24 | 
25 |     bool one_blob_only;  
26 | 
27 | public:
28 | 
29 |     std::string type;  
30 |     std::string name;   
31 | 
32 |     std::vector<int> bottoms;   
33 |     std::vector<int> tops;      
34 | 
35 | };
36 | 
37 | }
38 | 
39 | 
40 | 
41 | #endif


--------------------------------------------------------------------------------
/src/layers/cuda/cuda_gemm.cu:
--------------------------------------------------------------------------------
  1 | #include"nncuda.h"
  2 | #include"cuda_gemm.h"
  3 | #include<stdio.h>
  4 | 
  5 | 
  6 | namespace easynn{
  7 | 
  8 | 
  9 | template <int BLOCK>
 10 | __global__ void cuda_sgemm_forward(size_t m,size_t n,size_t k,float* a,float* b,float* c,float *bias)
 11 | {
 12 |     const int tx = threadIdx.x;
 13 |     const int ty = threadIdx.y;
 14 |     const int bx = blockIdx.x;
 15 |     const int by = blockIdx.y;
 16 |     int global_idx= bx*BLOCK+tx;
 17 |     int global_idy= by*BLOCK+ty;
 18 | 
 19 |     float *begin_a = a + bx * BLOCK * k;
 20 |     float *end_a = begin_a + k;
 21 |     float *a_bottom = a+(m-1)*k;
 22 |     float *a_block_bottom = begin_a+(BLOCK-1)*k;
 23 |     int a_x_gap = (a_bottom>=a_block_bottom) ? BLOCK : (BLOCK-(a_block_bottom-a_bottom)/k);
 24 | 
 25 | 
 26 |     float *begin_b = b + by * BLOCK;
 27 |     float *end_b = b+(k-1)*n;
 28 |     float *b_right = b+n;
 29 |     float *b_block_right = begin_b+BLOCK;
 30 |     int b_y_gap = (b_right>=b_block_right) ? BLOCK : (BLOCK-(b_block_right-b_right));
 31 |         
 32 | 
 33 |     float sum = 0.f;
 34 |     for (float *a_ptr = begin_a, *b_ptr = begin_b; a_ptr < end_a;a_ptr += BLOCK, b_ptr += BLOCK * n) 
 35 |     {
 36 | 
 37 |         __shared__ float ashare[BLOCK][BLOCK];
 38 |         __shared__ float bshare[BLOCK][BLOCK];
 39 | 
 40 |         float* a_block_right = a_ptr+BLOCK;
 41 |         int a_y_gap = (end_a>=a_block_right) ? BLOCK : (BLOCK-(a_block_right-end_a));
 42 | 
 43 |         float* b_block_bottom = b_ptr+(BLOCK-1) * n;
 44 |         int b_x_gap = (end_b>=b_block_bottom) ? BLOCK : (BLOCK-(b_block_bottom-end_b)/n);
 45 | 
 46 | 
 47 |         if(tx<a_x_gap&&ty<a_y_gap) ashare[tx][ty] = a_ptr[tx * k + ty];
 48 |         if(tx<b_x_gap&&ty<b_y_gap) bshare[tx][ty] = b_ptr[tx * n + ty];
 49 | 
 50 |         __syncthreads();
 51 | 
 52 | 
 53 | 
 54 |         #pragma unroll
 55 |         for (int kk = 0; kk < BLOCK; ++kk) 
 56 |         {
 57 |             sum += ashare[tx][kk] * bshare[kk][ty];
 58 |         }
 59 | 
 60 |         __syncthreads();
 61 | 
 62 |         ashare[tx][ty]=0;
 63 |         bshare[tx][ty]=0; 
 64 |         __syncthreads();
 65 | 
 66 | 
 67 |     }
 68 | 
 69 |     if(global_idx<m&&global_idy<n)
 70 |     {
 71 |         c[global_idx*n+global_idy] = sum+bias[global_idx];
 72 |     }
 73 | 
 74 | }
 75 | 
 76 | 
 77 | //a和b分别是两个以及im2col的矩阵
 78 | void cuda_gemm(const Mat& input_a,const Mat& input_b,Mat& output_c,const Mat& bias,const Optional& op)
 79 | {
 80 |     if(input_a.w!=input_b.h) 
 81 |     {
 82 |         printf("input_a.w!=input_b.h , can not mutl\n");
 83 |     }
 84 | 
 85 |     //printf("input_a.w=k=%d  input_a.h=m=%d  input_a.c=%d\n",input_a.w,input_a.h,input_a.c);
 86 |     //printf("input_b.w=n=%d  input_b.h=k=%d  input_b.c=%d\n",input_b.w,input_b.h,input_b.c);
 87 | 
 88 |     int m = input_a.h;
 89 |     int k = input_a.w;
 90 |     int n = input_b.w;
 91 |     
 92 |     //printf("%d \n",bias.w);
 93 | 
 94 |     output_c.create(m,n);
 95 | 
 96 |     float *d_a,*d_b,*d_c,*d_bias;
 97 |     size_t a_nbytes = m*k*sizeof(float);
 98 |     size_t b_nbytes = n*k*sizeof(float);
 99 |     size_t c_nbytes = m*n*sizeof(float);
100 |     size_t d_nbytes = m*sizeof(float);
101 | 
102 |     CHECK(cudaMalloc(&d_a,a_nbytes));
103 |     CHECK(cudaMalloc(&d_b,b_nbytes));
104 |     CHECK(cudaMalloc(&d_c,c_nbytes));
105 |     CHECK(cudaMalloc(&d_bias,d_nbytes));
106 | 
107 |     //printf("11 %p %p %p %p \n",d_a,d_b,d_c,d_bias);
108 | 
109 |     CHECK(cudaMemcpy(d_a,(float *)input_a.data,a_nbytes,cudaMemcpyHostToDevice));
110 |     CHECK(cudaMemcpy(d_b,(float *)input_b.data,b_nbytes,cudaMemcpyHostToDevice));
111 |     CHECK(cudaMemcpy(d_bias,(float *)bias.data,d_nbytes,cudaMemcpyHostToDevice));
112 | 
113 |     constexpr int BLOCK = 16;
114 |     //constexpr int STRIDE = 2;
115 |     dim3 block(BLOCK, BLOCK);
116 |     dim3 grid((m + BLOCK - 1) / BLOCK,(n + BLOCK - 1) / BLOCK);
117 |     cuda_sgemm_forward<BLOCK><<<grid,block>>>(m,n,k,d_a,d_b,d_c,d_bias);
118 |     cudaDeviceSynchronize();
119 | 
120 |     cudaMemcpy(output_c.data,d_c,c_nbytes,cudaMemcpyDeviceToHost);
121 |     //printf("22 %p %p %p %p \n",d_a,d_b,d_c,d_bias);
122 |     CHECK(cudaFree(d_a));
123 |     CHECK(cudaFree(d_b));
124 |     CHECK(cudaFree(d_c));
125 |     CHECK(cudaFree(d_bias));
126 | }
127 | 
128 | 
129 | 
130 | }// namespace


--------------------------------------------------------------------------------
/src/layers/cuda/cuda_gemm.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_CUDA_GEMM_H
 2 | #define EASYNN_CUDA_GEMM_H
 3 | 
 4 | #include"mat.h"
 5 | #include"optional.h"
 6 | 
 7 | namespace easynn
 8 | {
 9 |     void cuda_gemm(const Mat& input_a,const Mat& input_b,Mat& output_c,const Mat& bias,const Optional& op);
10 | }
11 | 
12 | 
13 | 
14 | #endif


--------------------------------------------------------------------------------
/src/layers/cuda/cuda_linear.cu:
--------------------------------------------------------------------------------
  1 | #include"nncuda.h"
  2 | #include"cuda_linear.h"
  3 | #include"cuda_gemm.h"
  4 | #include<stdio.h>
  5 | 
  6 | 
  7 | namespace easynn{
  8 | 
  9 | 
 10 | template <int BLOCK>
 11 | __global__ void cuda_sgemm_forward(size_t m,size_t n,size_t k,float* a,float* b,float* c,float *bias)
 12 | {
 13 |     const int tx = threadIdx.x;
 14 |     const int ty = threadIdx.y;
 15 |     const int bx = blockIdx.x;
 16 |     const int by = blockIdx.y;
 17 |     int global_idx= bx*BLOCK+tx;
 18 |     int global_idy= by*BLOCK+ty;
 19 | 
 20 |     float *begin_a = a + bx * BLOCK * k;
 21 |     float *end_a = begin_a + k;
 22 |     float *a_bottom = a+(m-1)*k;
 23 |     float *a_block_bottom = begin_a+(BLOCK-1)*k;
 24 |     int a_x_gap = (a_bottom>=a_block_bottom) ? BLOCK : (BLOCK-(a_block_bottom-a_bottom)/k);
 25 | 
 26 | 
 27 |     float *begin_b = b + by * BLOCK;
 28 |     float *end_b = b+(k-1)*n;
 29 |     float *b_right = b+n;
 30 |     float *b_block_right = begin_b+BLOCK;
 31 |     int b_y_gap = (b_right>=b_block_right) ? BLOCK : (BLOCK-(b_block_right-b_right));
 32 |         
 33 | 
 34 |     float sum = 0.f;
 35 |     for (float *a_ptr = begin_a, *b_ptr = begin_b; a_ptr < end_a;a_ptr += BLOCK, b_ptr += BLOCK * n) 
 36 |     {
 37 | 
 38 |         __shared__ float ashare[BLOCK][BLOCK];
 39 |         __shared__ float bshare[BLOCK][BLOCK];
 40 | 
 41 |         float* a_block_right = a_ptr+BLOCK;
 42 |         int a_y_gap = (end_a>=a_block_right) ? BLOCK : (BLOCK-(a_block_right-end_a));
 43 | 
 44 |         float* b_block_bottom = b_ptr+(BLOCK-1) * n;
 45 |         int b_x_gap = (end_b>=b_block_bottom) ? BLOCK : (BLOCK-(b_block_bottom-end_b)/n);
 46 | 
 47 | 
 48 |         if(tx<a_x_gap&&ty<a_y_gap) ashare[tx][ty] = a_ptr[tx * k + ty];
 49 |         if(tx<b_x_gap&&ty<b_y_gap) bshare[tx][ty] = b_ptr[tx * n + ty];
 50 | 
 51 |         __syncthreads();
 52 | 
 53 | 
 54 | 
 55 |         #pragma unroll
 56 |         for (int kk = 0; kk < BLOCK; ++kk) 
 57 |         {
 58 |             sum += ashare[tx][kk] * bshare[kk][ty];
 59 |         }
 60 | 
 61 |         __syncthreads();
 62 | 
 63 |         ashare[tx][ty]=0;
 64 |         bshare[tx][ty]=0; 
 65 |         __syncthreads();
 66 | 
 67 | 
 68 |     }
 69 | 
 70 |     if(global_idx<m&&global_idy<n)
 71 |     {
 72 |         c[global_idx*n+global_idy] = sum+bias[global_idx];
 73 |     }
 74 | 
 75 | }
 76 | 
 77 | 
 78 | //a和b分别是两个以及im2col的矩阵
 79 | void cuda_linear(const Mat& input_a,const Mat& input_b,Mat& output_c,const Mat& bias,const Optional& op)
 80 | {
 81 | 
 82 |     Mat cuda_b = input_b.reshape(1,input_b.w);
 83 | 
 84 |     if(input_a.w!=cuda_b.h) 
 85 |     {
 86 |         printf("input_a.w!=input_b.h , can not mutl\n");
 87 |     }
 88 | 
 89 |     // printf("input_a.w=k=%d  input_a.h=m=%d  input_a.c=%d\n",input_a.w,input_a.h,input_a.c);
 90 |     // printf("cuda_b.w=n=%d  cuda_b.h=k=%d  cuda_b.c=%d\n",cuda_b.w,cuda_b.h,cuda_b.c);
 91 | 
 92 |     int m = input_a.h;
 93 |     int k = input_a.w;
 94 |     int n = cuda_b.w;
 95 |     
 96 | 
 97 |     output_c.create(m,n);
 98 | 
 99 |     float *d_a,*d_b,*d_c,*d_bias;
100 |     size_t a_nbytes = m*k*sizeof(float);
101 |     size_t b_nbytes = n*k*sizeof(float);
102 |     size_t c_nbytes = m*n*sizeof(float);
103 |     size_t d_nbytes = m*sizeof(float);
104 | 
105 |     CHECK(cudaMalloc(&d_a,a_nbytes));
106 |     CHECK(cudaMalloc(&d_b,b_nbytes));
107 |     CHECK(cudaMalloc(&d_c,c_nbytes));
108 |     CHECK(cudaMalloc(&d_bias,d_nbytes));
109 | 
110 |     //printf("11 %p %p %p %p \n",d_a,d_b,d_c,d_bias);
111 | 
112 |     CHECK(cudaMemcpy(d_a,(float *)input_a.data,a_nbytes,cudaMemcpyHostToDevice));
113 |     CHECK(cudaMemcpy(d_b,(float *)cuda_b.data,b_nbytes,cudaMemcpyHostToDevice));
114 |     CHECK(cudaMemcpy(d_bias,(float *)bias.data,d_nbytes,cudaMemcpyHostToDevice));
115 | 
116 |     constexpr int BLOCK = 16;
117 |     //constexpr int STRIDE = 2;
118 |     dim3 block(BLOCK, BLOCK);
119 |     dim3 grid((m + BLOCK - 1) / BLOCK,(n + BLOCK - 1) / BLOCK);
120 |     cuda_sgemm_forward<BLOCK><<<grid,block>>>(m,n,k,d_a,d_b,d_c,d_bias);
121 |     cudaDeviceSynchronize();
122 | 
123 |     cudaMemcpy(output_c.data,d_c,c_nbytes,cudaMemcpyDeviceToHost);
124 |     //printf("22 %p %p %p %p \n",d_a,d_b,d_c,d_bias);
125 |     CHECK(cudaFree(d_a));
126 |     CHECK(cudaFree(d_b));
127 |     CHECK(cudaFree(d_c));
128 |     CHECK(cudaFree(d_bias));
129 | 
130 |     int count=output_c.w*output_c.h*output_c.c*output_c.d;
131 |     output_c.reshape(count);
132 | }
133 | 
134 | 
135 | 
136 | 
137 | 
138 | }// namespace


--------------------------------------------------------------------------------
/src/layers/cuda/cuda_linear.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_CUDA_LINEAR_H
 2 | #define EASYNN_CUDA_LINEAR_H
 3 | 
 4 | #include"mat.h"
 5 | #include"optional.h"
 6 | 
 7 | namespace easynn
 8 | {
 9 |     void cuda_linear(const Mat& input_a,const Mat& input_b,Mat& output_c,const Mat& bias,const Optional& op);
10 | }
11 | 
12 | 
13 | 
14 | #endif


--------------------------------------------------------------------------------
/src/layers/cuda/cuda_relu.cu:
--------------------------------------------------------------------------------
  1 | #include"nncuda.h"
  2 | #include"cuda_relu.h"
  3 | #include<stdio.h>
  4 | 
  5 | 
  6 | namespace easynn{
  7 | 
  8 | 
  9 | __global__ void cuda_relu_forward(float* input,float* output,int n)
 10 | {
 11 |     CUDA_KERNEL_LOOP(index, n) 
 12 |     {   
 13 |         output[index] = fmaxf(0.0f,input[index]);  
 14 |     }
 15 | 
 16 | }
 17 | 
 18 | void cuda_relu(const Mat& input,Mat& output,const Optional& op)
 19 | {
 20 |     int count=input.w*input.h*input.c*input.d;
 21 | 
 22 |     int nbytes = count*input.elemsize;
 23 | 
 24 |     Mat cu_mat_input = input.reshape(count);
 25 |     Mat cu_mat_output(count);
 26 | 
 27 |     float* d_input,*d_output;
 28 | 
 29 |     CHECK(cudaMalloc(&d_input,nbytes));
 30 |     CHECK(cudaMalloc(&d_output,nbytes));
 31 | 
 32 |     cudaMemcpy(d_input, cu_mat_input.data, nbytes, cudaMemcpyHostToDevice);
 33 | 
 34 |     cuda_relu_forward<<<EASYNN_GET_BLOCKS(count), EASYNN_CUDA_NUM_THREADS>>>(d_input,d_output,count);
 35 | 
 36 |     cudaMemcpy(cu_mat_output.data, d_output, nbytes, cudaMemcpyDeviceToHost);
 37 | 
 38 | 
 39 |     if (input.dims == 1)
 40 |         output=cu_mat_output.reshape(input.w);
 41 |     else if (input.dims == 2)
 42 |         output=cu_mat_output.reshape(input.w, input.h);
 43 |     else if (input.dims == 3)
 44 |         output=cu_mat_output.reshape(input.w, input.h, input.c);
 45 |     else if (input.dims == 4)
 46 |         output=cu_mat_output.reshape(input.w, input.h, input.d, input.c);
 47 | 
 48 |     CHECK(cudaFree(d_input));
 49 |     CHECK(cudaFree(d_output));
 50 |         
 51 | }
 52 | 
 53 | 
 54 | __global__ void cuda_relu_vec_forward(float *input,float*output,size_t n)
 55 | {
 56 | 
 57 |     int gtid =blockIdx.x*blockDim.x+threadIdx.x;
 58 |     int stride = blockDim.x*gridDim.x;
 59 | 
 60 |     for(int index=gtid;index<(n+CUDA_VEC_SIZE-1)/CUDA_VEC_SIZE;index+=stride){
 61 |         float4 a = reinterpret_cast<float4*>(input)[index];
 62 |         float4 c;
 63 |         c.x = fmaxf(0.0f,a.x); 
 64 |         c.y = fmaxf(0.0f,a.y);
 65 |         c.z = fmaxf(0.0f,a.z);
 66 |         c.w = fmaxf(0.0f,a.w);
 67 |         reinterpret_cast<float4*>(output)[index] = c;
 68 |     }
 69 | 
 70 | }
 71 | 
 72 | void cuda_relu_vec(const Mat& input,Mat& output,const Optional& op)
 73 | {
 74 |     int count=input.w*input.h*input.c*input.d;
 75 | 
 76 |     int N = (count+CUDA_VEC_SIZE-1)/CUDA_VEC_SIZE*CUDA_VEC_SIZE;
 77 | 
 78 |     int nbytes = N*input.elemsize;
 79 | 
 80 |     Mat cu_mat_input = input.reshape(count);
 81 |     Mat cu_mat_output(count);
 82 | 
 83 |     float* d_input,*d_output;
 84 | 
 85 |     CHECK(cudaMalloc(&d_input,nbytes));
 86 |     CHECK(cudaMalloc(&d_output,nbytes));
 87 |     cudaMemset(d_input, 0, nbytes);
 88 | 
 89 | 
 90 |     cudaMemcpy(d_input, cu_mat_input.data, count*sizeof(float), cudaMemcpyHostToDevice);
 91 | 
 92 |     cuda_relu_vec_forward<<<EASYNN_GET_VEC_BLOCKS(count), EASYNN_CUDA_NUM_THREADS>>>(d_input,d_output,count);
 93 | 
 94 |     cudaMemcpy(cu_mat_output.data, d_output, count*sizeof(float), cudaMemcpyDeviceToHost);
 95 | 
 96 | 
 97 |     if (input.dims == 1)
 98 |         output=cu_mat_output.reshape(input.w);
 99 |     else if (input.dims == 2)
100 |         output=cu_mat_output.reshape(input.w, input.h);
101 |     else if (input.dims == 3)
102 |         output=cu_mat_output.reshape(input.w, input.h, input.c);
103 |     else if (input.dims == 4)
104 |         output=cu_mat_output.reshape(input.w, input.h, input.d, input.c);
105 | 
106 |     CHECK(cudaFree(d_input));
107 |     CHECK(cudaFree(d_output));
108 |         
109 | }
110 | 
111 | }// namespace


--------------------------------------------------------------------------------
/src/layers/cuda/cuda_relu.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_CUDA_RELU_H
 2 | #define EASYNN_CUDA_RELU_H
 3 | 
 4 | #include"mat.h"
 5 | #include"optional.h"
 6 | 
 7 | namespace easynn
 8 | {
 9 |     void cuda_relu(const Mat& input,Mat& output,const Optional& op);
10 |     void cuda_relu_vec(const Mat& input,Mat& output,const Optional& op);
11 | }
12 | 
13 | 
14 | 
15 | #endif


--------------------------------------------------------------------------------
/src/layers/cuda/cuda_silu.cu:
--------------------------------------------------------------------------------
  1 | #include"nncuda.h"
  2 | #include"cuda_silu.h"
  3 | #include<stdio.h>
  4 | 
  5 | 
  6 | namespace easynn{
  7 | 
  8 | 
  9 | __global__ void cuda_silu_forward(float* input,float* output,int n)
 10 | {
 11 |     CUDA_KERNEL_LOOP(index, n) 
 12 |     {   
 13 |         //printf("%f     %f\n",input[index],1.0f / (1.0f + expf(input[index])));
 14 |         output[index] = input[index] / (1.0f + expf(-input[index]));  
 15 |     }
 16 | 
 17 | }
 18 | 
 19 | 
 20 | 
 21 | void cuda_silu(const Mat& input,Mat& output,const Optional& op)
 22 | {
 23 |     int count=input.w*input.h*input.c*input.d;
 24 | 
 25 |     int nbytes = count*input.elemsize;
 26 | 
 27 |     Mat cu_mat_input = input.reshape(count);
 28 |     Mat cu_mat_output(count);
 29 | 
 30 |     float* d_input,*d_output;
 31 | 
 32 |     CHECK(cudaMalloc(&d_input,nbytes));
 33 |     CHECK(cudaMalloc(&d_output,nbytes));
 34 | 
 35 |     cudaMemcpy(d_input, cu_mat_input.data, nbytes, cudaMemcpyHostToDevice);
 36 | 
 37 |     cuda_silu_forward<<<EASYNN_GET_BLOCKS(count), EASYNN_CUDA_NUM_THREADS>>>(d_input,d_output,count);
 38 | 
 39 |     cudaMemcpy(cu_mat_output.data, d_output, nbytes, cudaMemcpyDeviceToHost);
 40 | 
 41 | 
 42 |     if (input.dims == 1)
 43 |         output=cu_mat_output.reshape(input.w);
 44 |     else if (input.dims == 2)
 45 |         output=cu_mat_output.reshape(input.w, input.h);
 46 |     else if (input.dims == 3)
 47 |         output=cu_mat_output.reshape(input.w, input.h, input.c);
 48 |     else if (input.dims == 4)
 49 |         output=cu_mat_output.reshape(input.w, input.h, input.d, input.c);
 50 | 
 51 |     CHECK(cudaFree(d_input));
 52 |     CHECK(cudaFree(d_output));
 53 |         
 54 | }
 55 | 
 56 | __global__ void cuda_silu_vec_forward(float *input,float*output,size_t n)
 57 | {
 58 | 
 59 |     int gtid =blockIdx.x*blockDim.x+threadIdx.x;
 60 |     int stride = blockDim.x*gridDim.x;
 61 | 
 62 |     for(int index=gtid;index<(n+CUDA_VEC_SIZE-1)/CUDA_VEC_SIZE;index+=stride){
 63 |         float4 a = reinterpret_cast<float4*>(input)[index];
 64 |         float4 c;
 65 |         c.x = a.x / (1.0f + expf(-a.x)); 
 66 |         c.y = a.y / (1.0f + expf(-a.y));
 67 |         c.z = a.z / (1.0f + expf(-a.z));
 68 |         c.w = a.w / (1.0f + expf(-a.w));
 69 |         reinterpret_cast<float4*>(output)[index] = c;
 70 |     }
 71 | 
 72 | }
 73 | 
 74 | void cuda_silu_vec(const Mat& input,Mat& output,const Optional& op)
 75 | {
 76 |     int count=input.w*input.h*input.c*input.d;
 77 | 
 78 |     int N = (count+CUDA_VEC_SIZE-1)/CUDA_VEC_SIZE*CUDA_VEC_SIZE;
 79 | 
 80 |     int nbytes = N*input.elemsize;
 81 | 
 82 |     Mat cu_mat_input = input.reshape(count);
 83 |     Mat cu_mat_output(count);
 84 | 
 85 |     float* d_input,*d_output;
 86 | 
 87 |     CHECK(cudaMalloc(&d_input,nbytes));
 88 |     CHECK(cudaMalloc(&d_output,nbytes));
 89 |     cudaMemset(d_input, 0, nbytes);
 90 | 
 91 | 
 92 |     cudaMemcpy(d_input, cu_mat_input.data, count*sizeof(float), cudaMemcpyHostToDevice);
 93 | 
 94 |     cuda_silu_vec_forward<<<EASYNN_GET_VEC_BLOCKS(count), EASYNN_CUDA_NUM_THREADS>>>(d_input,d_output,count);
 95 | 
 96 |     cudaMemcpy(cu_mat_output.data, d_output, count*sizeof(float), cudaMemcpyDeviceToHost);
 97 | 
 98 | 
 99 |     if (input.dims == 1)
100 |         output=cu_mat_output.reshape(input.w);
101 |     else if (input.dims == 2)
102 |         output=cu_mat_output.reshape(input.w, input.h);
103 |     else if (input.dims == 3)
104 |         output=cu_mat_output.reshape(input.w, input.h, input.c);
105 |     else if (input.dims == 4)
106 |         output=cu_mat_output.reshape(input.w, input.h, input.d, input.c);
107 | 
108 |     CHECK(cudaFree(d_input));
109 |     CHECK(cudaFree(d_output));
110 |         
111 | }
112 | 
113 | }// namespace


--------------------------------------------------------------------------------
/src/layers/cuda/cuda_silu.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_CUDA_SILU_H
 2 | #define EASYNN_CUDA_SILU_H
 3 | 
 4 | #include"mat.h"
 5 | #include"optional.h"
 6 | 
 7 | namespace easynn
 8 | {
 9 |     void cuda_silu(const Mat& input,Mat& output,const Optional& op);
10 |     void cuda_silu_vec(const Mat& input,Mat& output,const Optional& op);
11 | }
12 | 
13 | 
14 | 
15 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/adaptiveavgpool.cpp:
--------------------------------------------------------------------------------
  1 | #include"adaptiveavgpool.h"
  2 | #include"benchmark.h"
  3 | 
  4 | namespace easynn{
  5 |     
  6 | AdaptivePool::AdaptivePool()
  7 | {
  8 |     one_blob_only=true;
  9 | }
 10 | int AdaptivePool::forward(const Mat& input,Mat& output,const Optional& op)
 11 | {
 12 |     double start = get_current_time();
 13 |     int input_h = input.h;
 14 |     int input_w = input.w;
 15 |     int out_h = output_size[0];
 16 |     int out_w = output_size[1];
 17 | 
 18 |     if (input.dims == 2)
 19 |         output.create(out_w, out_h);
 20 |     else if (input.dims == 3)
 21 |         output.create(out_w,out_h,input.c);
 22 |     else if (input.dims == 4)
 23 |         output.create(out_w,out_h,input.d,input.c);
 24 |         
 25 |     for(int i=0;i<output.c;i++)
 26 |     {
 27 |         float* ptr_in = input.channel(i);
 28 |         float* ptr_out = output.channel(i);
 29 | 
 30 |         for(int j=0;j<out_h;j++)
 31 |         {
 32 |             // floor div
 33 |             const int ih0 = input_h * j / out_h;
 34 |             // ceil div
 35 |             const int ih1 = (input_h * (j + 1) + out_h - 1) / out_h;
 36 |             const int hk = ih1 - ih0; 
 37 | 
 38 |             for(int k=0;k<out_w;k++)
 39 |             {
 40 |                 const int iw0 = input_w * k / out_w;
 41 |                 // ceil div
 42 |                 const int iw1 = (input_w * (k + 1) + out_w - 1) / out_w;
 43 |                 const int wk = iw1 - iw0;
 44 | 
 45 |                 float sum = 0;
 46 |                 for (int ih = ih0; ih < ih1; ih++)
 47 |                 {
 48 |                     for (int iw = iw0; iw < iw1; iw++)
 49 |                     {
 50 |                         sum += ptr_in[ih * input_w + iw];
 51 |                     }
 52 |                 }              
 53 |                 ptr_out[k] = sum / hk / wk;
 54 |             } 
 55 |             ptr_out +=out_w;
 56 |         }
 57 |     }
 58 |     double end = get_current_time();
 59 |     printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),input.c,output.c,input.h,input.w,out_h,out_w,end-start);
 60 |     return 0;
 61 | }
 62 | 
 63 | // int AdaptivePool::forward(const Mat& input,Mat& output,const Optional& op)
 64 | // {
 65 | //     int input_h = input.h;
 66 | //     int input_w = input.w;
 67 | //     int out_h = output_size[0];
 68 | //     int out_w = output_size[1];
 69 | 
 70 | //     if (input.dims == 2)
 71 | //         output.create(out_w, out_h);
 72 | //     else if (input.dims == 3)
 73 | //         output.create(out_w,out_h,input.c);
 74 | //     else if (input.dims == 4)
 75 | //         output.create(out_w,out_h,input.d,input.c);
 76 | 
 77 | //     int stride_h = input_h / out_h;
 78 | //     int stride_w = input_w / out_w;
 79 | 
 80 | //     int pooling_h = input_h - (out_h - 1) * stride_h;
 81 | //     int pooling_w = input_w - (out_w - 1) * stride_w;
 82 | 
 83 | //     size_t kernel_max = pooling_h*pooling_w;
 84 | //     std::vector<int> kernel_index(kernel_max);
 85 | //     {
 86 | //         int gap = input_w-pooling_w; 
 87 | //         int p=0;
 88 | //         int q=0;
 89 | //         for(int i=0;i<pooling_h;i++)
 90 | //         {
 91 | //             for(int j=0;j<pooling_w;j++)
 92 | //             {
 93 | //                 kernel_index[p] = q;
 94 | //                 p++;
 95 | //                 q++;
 96 | //             }
 97 | //             q +=gap;
 98 | //         }
 99 | //     }
100 | 
101 | //     for(int i=0;i<output.c;i++)
102 | //     {
103 | //         Mat ptr_in = input.channel(i);
104 | //         float* ptr_out = output.channel(i);
105 | 
106 | //         for(int j=0;j<out_h;j++)
107 | //         {
108 | //             for(int k=0;k<out_w;k++)
109 | //             {
110 | //                 const float* sptr = ptr_in.row(j * stride_h) + k * stride_w;
111 | //                 float sum = 0;
112 | //                 for(int m=0;m<kernel_max;m++)
113 | //                 {
114 | //                     sum += sptr[kernel_index[m]];
115 | //                 }                
116 | //                 ptr_out[k] = sum/kernel_max;
117 | //             } 
118 | //             ptr_out +=out_w;
119 | //         }
120 | //     }
121 | 
122 | //     std::cout<<"AdaptivePool forward"<<std::endl;
123 | //     return 0;
124 | // }
125 | 
126 | int AdaptivePool::loadParam(std::map<std::string, pnnx::Parameter>& params)
127 | {
128 |     output_size.assign(params["output_size"].ai.begin(),params["output_size"].ai.end());  
129 |     return 0;
130 | }
131 | 
132 | }//namespace


--------------------------------------------------------------------------------
/src/layers/cxx/adaptiveavgpool.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_ADAPTIVEAVGPOOL_H
 2 | #define EASYNN_ADAPTIVEAVGPOOL_H
 3 | 
 4 | #include"layer.h"
 5 | #include"mat.h"
 6 | namespace easynn{
 7 | 
 8 | 
 9 | class AdaptivePool: public Layer
10 | {
11 | public:
12 |     AdaptivePool();
13 |     virtual int forward(const Mat& input,Mat& output,const Optional& op);
14 |     virtual int loadParam(std::map<std::string, pnnx::Parameter>& params);
15 |     
16 | public:
17 |     std::vector<int> output_size;
18 | };
19 | 
20 | }//namespace
21 | 
22 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/cat.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HuPengsheet/EasyNN/9c77f157ada1f986648c86c8aef57e8b6ad80068/src/layers/cxx/cat.cpp


--------------------------------------------------------------------------------
/src/layers/cxx/cat.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_CAT_H
 2 | #define EASYNN_CAT_H
 3 | 
 4 | #include"layer.h"
 5 | #include"mat.h"
 6 | 
 7 | namespace easynn{
 8 | class Cat:public Layer
 9 | {
10 | 
11 | public:
12 |     Cat();
13 |     virtual int forward(const std::vector<Mat>& input,std::vector<Mat>& output,const Optional& op);
14 |     virtual int loadParam(std::map<std::string, pnnx::Parameter>& params);
15 | public:
16 |     int dim;
17 | 
18 | };
19 | 
20 | }//namespace
21 | 
22 | 
23 | 
24 | 
25 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/contiguous.cpp:
--------------------------------------------------------------------------------
 1 | #include"contiguous.h"
 2 | #include"benchmark.h"
 3 | namespace easynn{
 4 | 
 5 | 
 6 | Contiguous::Contiguous()
 7 | {
 8 |     one_blob_only=true;
 9 | }
10 | 
11 | int Contiguous::forward(const Mat& input,Mat& output,const Optional& op)
12 | {
13 |     double start = get_current_time();
14 |     
15 |     output = input.clone();
16 | 
17 |     double end = get_current_time();
18 |     printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),input.c,output.c,input.h,input.w,output.h,output.w,end-start);
19 | 
20 |     return 0;
21 | }
22 | 
23 | }//namespace


--------------------------------------------------------------------------------
/src/layers/cxx/contiguous.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_CONTIGUOUS_H
 2 | #define EASYNN_CONTIGUOUS_H
 3 | 
 4 | #include"layer.h"
 5 | #include"mat.h"
 6 | namespace easynn{
 7 | class Contiguous:public Layer
 8 | {
 9 | public:
10 |     Contiguous();
11 |     virtual int forward(const Mat& input,Mat& output,const Optional& op);
12 | };
13 | 
14 | }//namespace
15 | 
16 | 
17 | 
18 | 
19 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/convolution.cpp:
--------------------------------------------------------------------------------
  1 | #include<iostream>
  2 | #include<stdio.h>
  3 | #include<string.h>
  4 | #include"convolution.h"
  5 | #include"mat.h"
  6 | #include"benchmark.h"
  7 | 
  8 | #ifdef EASTNN_USE_CUDA
  9 | #include"layers/cuda/cuda_gemm.h"
 10 | #endif
 11 | 
 12 | namespace easynn{
 13 | 
 14 | Convolution::Convolution()
 15 | {
 16 |     one_blob_only=true;
 17 | }
 18 | 
 19 | void Convolution::copy_make_border_image(const Mat& input,Mat& input_pad)
 20 | {
 21 |     int padding_h  = padding[0];
 22 |     int padding_w = padding[1];
 23 |     int input_w = input.w;
 24 |     int input_h = input.h;
 25 |     int output_w = input_w+2*padding_w;
 26 |     int output_h = input_h+2*padding_h;
 27 |     if(padding_h==0 && padding_w==0)
 28 |     {
 29 |         input_pad = input;
 30 |         return ;
 31 |     }
 32 |     input_pad.create(output_w,output_h,in_channels);
 33 | 
 34 |     for(int i=0;i<in_channels;i++)
 35 |     {
 36 |         float * input_ptr = input.channel(i);
 37 |         float * pad_ptr = input_pad.channel(i);
 38 |         int j=0;
 39 |         //padding top
 40 |         for(;j<padding_h;j++)
 41 |         {
 42 |             for(int k=0;k<output_w;k++)
 43 |             {
 44 |                 pad_ptr[k]=0;
 45 |             }
 46 |             pad_ptr +=output_w;
 47 |         }
 48 | 
 49 |         //padding centor
 50 |         for(;j<output_h-padding_h;j++)
 51 |         {
 52 |             int k=0;
 53 |             for(;k<padding_w;k++)
 54 |             {
 55 |                 pad_ptr[k]=0;
 56 |             }
 57 |             for(;k<output_w-padding_w;k++)
 58 |             {
 59 |                 pad_ptr[k]=input_ptr[k-padding_w];
 60 |             }
 61 |            for(;k<output_w;k++)
 62 |             {
 63 |                 pad_ptr[k]=0;
 64 |             }            
 65 |             input_ptr += input_w;
 66 |             pad_ptr +=output_w;
 67 |             
 68 |         }
 69 | 
 70 |         //padding bottom
 71 |         for(;j<output_h;j++)
 72 |         {
 73 |             for(int k=0;k<output_w;k++)
 74 |             {
 75 |                 pad_ptr[k]=0;
 76 |             }
 77 |             pad_ptr +=output_w;
 78 |         }
 79 |     }
 80 | 
 81 | }
 82 | 
 83 | int Convolution::forward(const Mat& input,Mat& output,const Optional& op)
 84 | {   
 85 |     double start=get_current_time();
 86 |     
 87 |     int input_h = input.h;
 88 |     int input_w = input.w;
 89 |     int out_h = (input_h+2*padding[0]-dilation[0]*(kernel_size[0]-1)-1)/stride[0]+1;
 90 |     int out_w = (input_h+2*padding[1]-dilation[1]*(kernel_size[1]-1)-1)/stride[1]+1;
 91 |     //output.create(out_w,out_h,out_channels);
 92 |     
 93 |     Mat input_pad=input;
 94 |     if(strcmp(padding_mode.c_str(), "zeros")==0)
 95 |     {   
 96 |         copy_make_border_image(input,input_pad);
 97 |         input_h = input_pad.h;
 98 |         input_w = input_pad.w;
 99 |     }
100 |     else
101 |     {
102 |         printf("do not support padding mode %s\n",padding_mode.c_str());
103 |         return -1;
104 |     }
105 | 
106 | #ifdef EASTNN_USE_CUDA
107 |     cuda_im2col_gemm_bias(input_pad,weight,bias,output,kernel_size,stride,dilation,op);
108 |     double cuda_end =get_current_time();
109 |     printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),in_channels,out_channels,input.h,input.w,out_h,out_w,cuda_end-start);
110 |     return 0;
111 | #endif
112 | 
113 |     im2colGemm(input_pad,weight,bias,output,kernel_size,stride,dilation,op);
114 |     
115 |     double end =get_current_time();
116 |     printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),in_channels,out_channels,input.h,input.w,out_h,out_w,end-start);
117 | 
118 |     return 0;
119 | }
120 | 
121 | int Convolution::loadParam(std::map<std::string, pnnx::Parameter>& params)
122 | {
123 |     use_bias = params["bias"].b;
124 |     groups = params["groups"].i;
125 |     in_channels = params["in_channels"].i;
126 |     out_channels = params["out_channels"].i;
127 |     padding_mode = params["padding_mode"].s;
128 | 
129 |     padding.assign(params["padding"].ai.begin(),params["padding"].ai.end());     
130 |     dilation.assign(params["dilation"].ai.begin(),params["dilation"].ai.end());    
131 |     kernel_size.assign(params["kernel_size"].ai.begin(),params["kernel_size"].ai.end()); 
132 |     stride.assign(params["stride"].ai.begin(),params["stride"].ai.end());   
133 | 
134 |     return 0;   
135 | }
136 | 
137 | int Convolution::loadBin(std::map<std::string, pnnx::Attribute>& attrs)
138 | {   
139 | 
140 |     float* weight_data = (float*)(&attrs["weight"].data[0]);
141 |     size_t kernel_max = kernel_size[0]*kernel_size[1];
142 |     size_t kernel_channels = kernel_size[0]*kernel_size[1]*in_channels;
143 |     size_t data_size = in_channels*kernel_max*out_channels;
144 |     int w= kernel_size[0];
145 |     int h= kernel_size[1];
146 |     int d= in_channels;
147 |     int c= out_channels;
148 |     weight.create(w,h,d,c);
149 |     for(int i=0;i<out_channels;i++)
150 |     {
151 |         float* ptr=weight.channel(i);
152 |         for(int j=0;j<in_channels;j++)
153 |         {
154 |             for(int k=0;k<h;k++)
155 |             {
156 |                for(int m=0;m<w;m++)
157 |                {
158 |                 ptr[m]=weight_data[i*kernel_channels+j*kernel_max+k*h+m];
159 |                }
160 |                 ptr = ptr+w;
161 |             }
162 |         }
163 |     }
164 | 
165 | 
166 |     if(use_bias)
167 |     {
168 |         float* bias_data = (float*)(&attrs["bias"].data[0]);
169 |         bias.create(out_channels);
170 |         for(int i=0;i<out_channels;i++)
171 |         {
172 |             bias[i]=bias_data[i];
173 |         }
174 |     }
175 |     return 0;   
176 | }
177 | 
178 | void im2col(const Mat & input,Mat& output,const Optional& opt,const std::vector<int> kernel_size,const std::vector<int> stride,const std::vector<int> dilation)
179 | {
180 |     int input_w = input.w;
181 |     int input_h = input.h;
182 |     int in_channels=input.c;
183 | 
184 |     int kernel_w = kernel_size[0];
185 |     int kernel_h = kernel_size[1];
186 | 
187 |     int stride_w = stride[0];
188 |     int stride_h = stride[1];
189 | 
190 |     int dilation_w = dilation[0];
191 |     int dilation_h = dilation[1];
192 | 
193 | 
194 |     int out_w = (input_w-kernel_w)/stride_w+1;
195 |     int out_h = (input_h-kernel_h)/stride_h+1;
196 | 
197 |     int size = out_w*out_h;
198 |     int maxk = kernel_w * kernel_h;
199 |     output.create(size,maxk*in_channels);
200 |     const int gap = input_w * stride_h - out_w * stride_w;
201 | 
202 |    
203 | 
204 |     #pragma omp parallel for num_threads(opt.num_thread)
205 |     for(int p=0;p<in_channels;p++)
206 |     {
207 |         const Mat img = input.channel(p);
208 |         float* ptr = output.row(p * maxk);
209 |         
210 |         for (int u = 0; u < kernel_h; u++)
211 |         {
212 |             for (int v = 0; v < kernel_w; v++)
213 |             {
214 |                 const float* sptr = img.row(dilation_h * u) + dilation_w * v;
215 | 
216 |                 for (int i = 0; i < out_h; i++)
217 |                 {
218 |                     for (int j = 0; j < out_w; j++)
219 |                     {
220 |                         ptr[0] = sptr[0];
221 | 
222 |                         sptr += stride_w;
223 |                         ptr += 1;
224 |                     }
225 | 
226 |                     sptr += gap;
227 |                 }
228 |             }
229 |         }
230 |     }
231 | 
232 |     
233 | }
234 | 
235 | void kernel2col(const Mat & input,Mat& output,const Optional& opt)
236 | {
237 |     output = input.clone();
238 |     output = output.reshape(output.w*output.h*output.d,output.c);
239 |     
240 | }
241 | 
242 | void col2im(const Mat & input,Mat& output,const Optional& opt,const int out_w,const int out_h,const int out_channels)
243 | {
244 |     output = input.clone();
245 |     output = output.reshape(out_w,out_h,out_channels);
246 | }
247 | 
248 | void gemm(const Mat & a,const Mat& b,const Mat& bias,Mat& c,const Optional& opt)
249 | {
250 |     if(a.w!=b.h) 
251 |     {
252 |         printf("the shape can not multi \n");
253 |         return;
254 |     }
255 | 
256 |     if(a.dims!=2 ||b.dims!=2) 
257 |     {
258 |         printf("the dims are not 2 \n");
259 |         return;
260 |     }
261 | 
262 |     int m=a.h;
263 |     int k=a.w;
264 |     int n=b.w;
265 | 
266 |     c.create(n,m);
267 | 
268 |     for(int i=0;i<m;i++)
269 |     {
270 |         float bia=bias[i];
271 |         float * p = c.row(i);
272 |         for(int j=0;j<n;j++)
273 |         {
274 |             float sum=0;
275 |             for(int x=0;x<k;x++)
276 |             {
277 |                 sum+=a[i*k+x]*b[x*n+j];
278 |             }
279 |             p[j] = sum+bia;
280 |         }
281 |         
282 |     }
283 | 
284 | }
285 | 
286 | void im2colGemm(const Mat& input,const Mat& kernel,const Mat& bias,Mat& output,const std::vector<int> kernel_size,const std::vector<int> stride,const std::vector<int> dilation,const Optional& opt)
287 | {
288 |     int input_w = input.w;
289 |     int input_h = input.h;
290 |     int in_channels=input.c;
291 |     int kernel_w = kernel_size[0];
292 |     int kernel_h = kernel_size[1];
293 | 
294 |     int stride_w = stride[0];
295 |     int stride_h = stride[1];
296 | 
297 |     int dilation_w = dilation[0];
298 |     int dilation_h = dilation[1];
299 |     int out_w = (input_w-kernel_w)/stride_w+1;
300 |     int out_h = (input_h-kernel_h)/stride_h+1;
301 |     int out_c = kernel.c;
302 | 
303 |     Mat im_col;
304 |     im2col(input,im_col,opt,kernel_size,stride,dilation);
305 | 
306 |     Mat kernel_col;
307 |     kernel2col(kernel,kernel_col,opt);
308 | 
309 |     Mat out_col;
310 |     gemm(kernel_col,im_col,bias,out_col,opt);
311 | 
312 |     col2im(out_col,output,opt,out_w,out_h,out_c);
313 | }
314 | 
315 | #ifdef EASTNN_USE_CUDA
316 | void cuda_im2col_gemm_bias(const Mat& input,const Mat& kernel,const Mat& bias,Mat& output,const std::vector<int> kernel_size,const std::vector<int> stride,const std::vector<int> dilation,const Optional& opt)
317 | {
318 |     int input_w = input.w;
319 |     int input_h = input.h;
320 |     int in_channels=input.c;
321 |     int kernel_w = kernel_size[0];
322 |     int kernel_h = kernel_size[1];
323 |     
324 |     int stride_w = stride[0];
325 |     int stride_h = stride[1];
326 | 
327 |     int dilation_w = dilation[0];
328 |     int dilation_h = dilation[1];
329 |     int out_w = (input_w-kernel_w)/stride_w+1;
330 |     int out_h = (input_h-kernel_h)/stride_h+1;
331 |     int out_c = kernel.c;
332 |     
333 |     Mat im_col;
334 |     im2col(input,im_col,opt,kernel_size,stride,dilation);
335 | 
336 | 
337 |     Mat kernel_col;
338 |     kernel2col(kernel,kernel_col,opt);
339 | 
340 |     Mat out_col;
341 |     if(kernel_col.w<16||kernel_col.h<16||im_col.w<16||im_col.h<16) gemm(kernel_col,im_col,bias,out_col,opt);
342 |     else cuda_gemm(kernel_col,im_col,out_col,bias,opt);
343 | 
344 |     col2im(out_col,output,opt,out_w,out_h,out_c);
345 | 
346 | }
347 | 
348 | #endif
349 | }//namespace


--------------------------------------------------------------------------------
/src/layers/cxx/convolution.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_CONVOLUTION_H
 2 | #define EASYNN_CONVOLUTION_H
 3 | 
 4 | #include<vector>
 5 | #include<map>
 6 | #include"layer.h"
 7 | #include"mat.h"
 8 | namespace easynn{
 9 | 
10 | 
11 | class Convolution: public Layer
12 | {
13 | public:
14 |     Convolution();
15 | 
16 |     virtual int forward(const Mat& input,Mat& output,const Optional& op);
17 |     virtual int loadParam(std::map<std::string, pnnx::Parameter>& params);
18 |     virtual int loadBin(std::map<std::string, pnnx::Attribute>& attrs);
19 |     void copy_make_border_image(const Mat& input, Mat& input_pad);
20 | public:
21 | 
22 |     bool use_bias;        //type1
23 |     
24 |     int groups;       //type 2  
25 |     int in_channels;  //trpe 2
26 |     int out_channels; //type 2
27 | 
28 |     std::string padding_mode;     //type 4
29 | 
30 |     std::vector<int> padding;     //type 5
31 |     std::vector<int> dilation;    //type 5
32 |     std::vector<int> kernel_size; //type 5
33 |     std::vector<int> stride;      //type 5
34 |     
35 | 
36 |     Mat weight;
37 |     Mat bias;
38 | };
39 | 
40 | void im2col(const Mat & input,Mat& output,const Optional& opt,const std::vector<int> kernel_size,const std::vector<int> stride,const std::vector<int> dilation);
41 | void kernel2col(const Mat & input,Mat& output,const Optional& opt);
42 | void col2im(const Mat & input,Mat& output,const Optional& opt);
43 | void gemm(const Mat & A,const Mat& B,const Mat& bias,Mat& C,const Optional& opt);
44 | void im2colGemm(const Mat& input,const Mat& kernel,const Mat& bias,Mat& output,const std::vector<int> kernel_size,const std::vector<int> stride,const std::vector<int> dilation,const Optional& opt);
45 | 
46 | #ifdef EASTNN_USE_CUDA
47 | void cuda_im2col_gemm_bias(const Mat& input,const Mat& kernel,const Mat& bias,Mat& output,const std::vector<int> kernel_size,const std::vector<int> stride,const std::vector<int> dilation,const Optional& opt);
48 | #endif
49 | 
50 | } //namespace
51 | 
52 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/expression.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include"expression.h"
 3 | #include"mat.h"
 4 | #include"benchmark.h"
 5 | namespace easynn{
 6 | 
 7 | Expression::Expression()
 8 | {
 9 |     one_blob_only = false;
10 | }
11 | 
12 | int Expression::forward(const std::vector<Mat>& input,std::vector<Mat>& output,const Optional& op)
13 | {
14 |     double start = get_current_time();
15 |     Mat add_left = input[0];
16 |     Mat add_right = input[1];
17 |     
18 |     if(add_left.w!=add_right.w || add_left.h!=add_right.h || add_left.c!=add_right.c || add_left.d!=add_right.d)
19 |     {
20 |         printf("the shape is different, can not add \n");
21 |         return -1;
22 |     }
23 | 
24 |     Mat& out = output[0];
25 |     int w=add_left.w;
26 |     int h=add_left.h;
27 |     int d=add_left.d;
28 |     int c=add_left.c;
29 | 
30 |     if(add_left.dims == 1)
31 |         out.create(w);
32 |     if (add_left.dims == 2)
33 |         out.create(w, h);
34 |     else if (add_left.dims == 3)
35 |         out.create(w,h,c);
36 |     else if (add_left.dims == 4)
37 |         out.create(w,h,d,c);
38 | 
39 |     for (int q=0; q<c; q++)
40 |     {
41 |         float* left_ptr = add_left.channel(q);
42 |         float* right_output = add_right.channel(q);
43 |         float* out_ptr = out.channel(q);
44 | 
45 |         for (int z=0; z<d; z++)
46 |         {
47 |             for (int y=0; y<h; y++)
48 |             {
49 |                 for (int x=0; x<w; x++)
50 |                 {
51 |                     out_ptr[x]=left_ptr[x]+right_output[x];
52 |                 }
53 |                 left_ptr += w;
54 |                 right_output += w;
55 |                 out_ptr+=w;
56 |             }
57 |         }
58 |     }
59 | 
60 |     double end = get_current_time();
61 |     printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),c,c,h,w,h,w,end-start);
62 |     return 0;
63 | }
64 | 
65 | int Expression::loadParam(std::map<std::string, pnnx::Parameter>& params)
66 | {
67 |     expression=params["expr"].s;
68 |     return 0;
69 | }
70 |         
71 |     
72 | 
73 | }//namespace


--------------------------------------------------------------------------------
/src/layers/cxx/expression.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_EXPRESSION_H
 2 | #define EASYNN_EXPRESSION_H
 3 | 
 4 | #include"layer.h"
 5 | #include"mat.h"
 6 | 
 7 | namespace easynn{
 8 | class Expression:public Layer
 9 | {
10 | public:
11 |     Expression();
12 |     virtual int forward(const std::vector<Mat>& input,std::vector<Mat>& output,const Optional& op);
13 |     virtual int loadParam(std::map<std::string, pnnx::Parameter>& params);
14 | 
15 | public:
16 |     std::string expression;
17 | };
18 | 
19 | }//namespace
20 | 
21 | 
22 | 
23 | 
24 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/flatten.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<string.h>
 3 | #include"flatten.h"
 4 | #include"benchmark.h"
 5 | namespace easynn{
 6 | 
 7 | 
 8 | Flatten::Flatten()
 9 | {
10 |     one_blob_only = true;
11 | }
12 | 
13 | int Flatten::forward(const Mat& input,Mat& output,const Optional& op)
14 | {
15 |     double start = get_current_time();
16 |     int w = input.w;
17 |     int h = input.h;
18 |     int d = input.d;
19 |     int channels = input.c;
20 |     size_t elemsize = input.elemsize;
21 |     int size = w * h * d;
22 | 
23 |     output.create(size * channels);
24 | 
25 | 
26 |     for (int q = 0; q < channels; q++)
27 |     {
28 |         const unsigned char* ptr = input.channel(q);
29 |         unsigned char* outptr = (unsigned char*)output + size * elemsize * q;
30 |         memcpy(outptr, ptr, size * elemsize);
31 |     }
32 |     double end = get_current_time();
33 |     printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),input.c,output.c,input.h,input.w,output.h,output.w,end-start);
34 |     return 0;
35 | }
36 | 
37 | }//namespace


--------------------------------------------------------------------------------
/src/layers/cxx/flatten.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_FLATTEN_H
 2 | #define EASYNN_FLATTEN_H
 3 | 
 4 | #include"layer.h"
 5 | #include"mat.h"
 6 | namespace easynn{
 7 | 
 8 | 
 9 | class Flatten: public Layer
10 | {
11 | public:
12 |     Flatten();
13 |     virtual int forward(const Mat& input,Mat& output,const Optional& op);
14 | };
15 | 
16 | }//namespace
17 | 
18 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/input.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include"input.h"
 3 | namespace easynn{
 4 | 
 5 | 
 6 | Input::Input()
 7 | {
 8 | 
 9 | }
10 | 
11 | int Input::forward()
12 | {
13 |     std::cout<<"Input forward"<<std::endl;
14 |     return 0;
15 | }
16 | 
17 | }//namespace


--------------------------------------------------------------------------------
/src/layers/cxx/input.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_INPUT_H
 2 | #define EASYNN_INPUT_H
 3 | 
 4 | #include"layer.h"
 5 | 
 6 | namespace easynn{
 7 | 
 8 | 
 9 | class Input: public Layer
10 | {
11 | public:
12 |     Input();
13 |     virtual int forward();
14 | };
15 | 
16 | }//namespace
17 | 
18 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/linear.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include"linear.h"
 3 | #include"string.h"
 4 | #include"benchmark.h"
 5 | 
 6 | #ifdef EASTNN_USE_CUDA
 7 | #include"layers/cuda/cuda_linear.h"
 8 | #endif
 9 | 
10 | namespace easynn{
11 | 
12 | Linear::Linear()
13 | {
14 |     one_blob_only = true;
15 | }
16 | 
17 | int Linear::forward(const Mat& input,Mat& output,const Optional& op)
18 | {
19 | 
20 |     
21 |     if(input.dims!=1)
22 |     {
23 |         printf("do not support 2 or 3 or 4 dims Mat for Linear\n");
24 |         return -1;
25 |     }
26 | 
27 |     #ifdef EASTNN_USE_CUDA
28 |         double cuda_start = get_current_time();
29 |         cuda_linear(weight,input,output,bias,op);
30 |         double cuda_end = get_current_time();
31 |         printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),input.c,output.c,input.h,input.w,output.h,output.w,cuda_end-cuda_start);
32 |         return 0;
33 |     #endif
34 |     
35 |     double start = get_current_time();
36 |     output.create(out_features);
37 | 
38 |     for(int i=0;i<out_features;i++)
39 |     {   
40 |         float sum=0;
41 |         if(use_bias)
42 |             sum = bias[i];
43 |         for(int j=0;j<in_features;j++)
44 |         {
45 |             sum+=weight[i*in_features+j]*input[j];
46 |         }
47 |         output[i] = sum;
48 |     }
49 | 
50 |     double end = get_current_time();
51 |     printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),input.c,output.c,input.h,input.w,output.h,output.w,end-start);
52 |     return 0;
53 | }
54 | 
55 | int Linear::loadParam(std::map<std::string, pnnx::Parameter>& params)
56 | {   
57 |     use_bias = params["bias"].b;
58 |     in_features = params["in_features"].i;
59 |     out_features = params["out_features"].i;
60 | 
61 |     return 0;   
62 | }
63 | 
64 | int Linear::loadBin(std::map<std::string, pnnx::Attribute>& attrs)
65 | {   
66 | 
67 |     void* weight_data = (void*)(&attrs["weight"].data[0]);
68 |     weight.create(in_features,out_features);
69 |     memcpy((void *)weight,weight_data,in_features*out_features*weight.elemsize);
70 | 
71 | 
72 |     if(use_bias)
73 |     {
74 |         void* bias_data = (void*)(&attrs["bias"].data[0]);
75 |         bias.create(out_features);
76 |         memcpy((void *)bias,bias_data,out_features*bias.elemsize);
77 |     }
78 |     return 0;   
79 | }
80 | }//namespace


--------------------------------------------------------------------------------
/src/layers/cxx/linear.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_LINEAR_H
 2 | #define EASYNN_LINEAR_H
 3 | 
 4 | #include"layer.h"
 5 | #include"mat.h"
 6 | namespace easynn{
 7 | 
 8 | 
 9 | class Linear: public Layer
10 | {
11 | public:
12 |     Linear();
13 |     virtual int forward(const Mat& input,Mat& output,const Optional& op);
14 |     virtual int loadParam(std::map<std::string, pnnx::Parameter>& params);
15 |     virtual int loadBin(std::map<std::string, pnnx::Attribute>& attrs);
16 | 
17 | public:
18 | 
19 |     bool use_bias;
20 |     int in_features;
21 |     int out_features;
22 | 
23 |     Mat weight;
24 |     Mat bias;
25 | };
26 | 
27 | }//namespace
28 | 
29 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/maxpool.cpp:
--------------------------------------------------------------------------------
  1 | #include<iostream>
  2 | #include<string>
  3 | #include"maxpool.h"
  4 | #include"benchmark.h"
  5 | namespace easynn{
  6 | 
  7 | 
  8 | MaxPool::MaxPool()
  9 | {
 10 |     one_blob_only=true;
 11 | }
 12 | 
 13 | void MaxPool::copy_make_border_image(const Mat& input,Mat& input_pad)
 14 | {
 15 |     int padding_h  = padding[0];
 16 |     int padding_w = padding[1];
 17 |     int input_w = input.w;
 18 |     int input_h = input.h;
 19 |     int output_w = input_w+2*padding_w;
 20 |     int output_h = input_h+2*padding_h;
 21 |     if(padding_h==0 && padding_w==0)
 22 |     {
 23 |         input_pad = input;
 24 |         return ;
 25 |     }
 26 |     input_pad.create(output_w,output_h,input.c);
 27 | 
 28 |     for(int i=0;i<input.c;i++)
 29 |     {
 30 |         float * input_ptr = input.channel(i);
 31 |         float * pad_ptr = input_pad.channel(i);
 32 |         int j=0;
 33 |         //padding top
 34 |         for(;j<padding_h;j++)
 35 |         {
 36 |             for(int k=0;k<output_w;k++)
 37 |             {
 38 |                 pad_ptr[k]=0;
 39 |             }
 40 |             pad_ptr +=output_w;
 41 |         }
 42 | 
 43 |         //padding centor
 44 |         for(;j<output_h-padding_h;j++)
 45 |         {
 46 |             int k=0;
 47 |             for(;k<padding_w;k++)
 48 |             {
 49 |                 pad_ptr[k]=0;
 50 |             }
 51 |             for(;k<output_w-padding_w;k++)
 52 |             {
 53 |                 pad_ptr[k]=input_ptr[k-padding_w];
 54 |             }
 55 |            for(;k<output_w;k++)
 56 |             {
 57 |                 pad_ptr[k]=0;
 58 |             }            
 59 |             input_ptr += input_w;
 60 |             pad_ptr +=output_w;
 61 |             
 62 |         }
 63 | 
 64 |         //padding bottom
 65 |         for(;j<output_h;j++)
 66 |         {
 67 |             for(int k=0;k<output_w;k++)
 68 |             {
 69 |                 pad_ptr[k]=0;
 70 |             }
 71 |             pad_ptr +=output_w;
 72 |         }
 73 |     }
 74 | 
 75 | }
 76 | 
 77 | int MaxPool::forward(const Mat& input,Mat& output,const Optional& op)
 78 | {
 79 |     double start = get_current_time();
 80 |     if(input.dims==1)
 81 |     {
 82 |         printf("MaxPool do not support 1 dims Mat\n");
 83 |     }
 84 |     if(ceil_mode||return_indices)
 85 |     {
 86 |         printf("do not support ceil_mode and return_indices\n");
 87 |     }
 88 | 
 89 |     int input_h = input.h;
 90 |     int input_w = input.w;
 91 |     int out_h = (input_h+2*padding[0]-dilation[0]*(kernel_size[0]-1)-1)/stride[0]+1;
 92 |     int out_w = (input_h+2*padding[1]-dilation[1]*(kernel_size[1]-1)-1)/stride[1]+1;
 93 | 
 94 |     if (input.dims == 2)
 95 |         output.create(out_w, out_h);
 96 |     else if (input.dims == 3)
 97 |         output.create(out_w,out_h,input.c);
 98 |     else if (input.dims == 4)
 99 |         output.create(out_w,out_h,input.d,input.c);
100 | 
101 |     
102 |     Mat input_pad=input;
103 |     copy_make_border_image(input,input_pad);
104 |     input_h = input_pad.h;
105 |     input_w = input_pad.w;
106 |     
107 | 
108 |     size_t kernel_max = kernel_size[0]*kernel_size[1];
109 |     std::vector<int> kernel_index(kernel_max);
110 |     {
111 |         int gap = input_w * dilation[1] - kernel_size[1] * dilation[0];
112 |         int p=0;
113 |         int q=0;
114 |         for(int i=0;i<kernel_size[0];i++)
115 |         {
116 |             for(int j=0;j<kernel_size[1];j++)
117 |             {
118 |                 kernel_index[p] = q;
119 |                 p++;
120 |                 q+=dilation[1];
121 |             }
122 |             q +=gap;
123 |         }
124 |     }
125 | 
126 |     for(int i=0;i<output.c;i++)
127 |     {
128 |         Mat ptr_in = input_pad.channel(i);
129 |         float* ptr_out = output.channel(i);
130 | 
131 |         for(int j=0;j<out_h;j++)
132 |         {
133 |             for(int k=0;k<out_w;k++)
134 |             {
135 |                 const float* sptr = ptr_in.row(j * stride[0]) + k * stride[1];
136 |                 float max = sptr[0];
137 |                 for(int m=0;m<kernel_max;m++)
138 |                 {
139 |                     if(sptr[kernel_index[m]]>=max)
140 |                         max = sptr[kernel_index[m]];
141 |                 }                
142 |                 ptr_out[k] = max;
143 |             } 
144 |             ptr_out +=out_w; 
145 |         }
146 |     }
147 |     
148 |     double end = get_current_time();
149 |     printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),input.c,output.c,input.h,input.w,output.h,output.w,end-start);
150 | 
151 |     return 0;
152 | 
153 | }
154 | 
155 | int MaxPool::loadParam(std::map<std::string, pnnx::Parameter>& params)
156 | {
157 |     ceil_mode = params["ceil_mode"].b;
158 |     return_indices = params["return_indices"].b;
159 |     padding.assign(params["padding"].ai.begin(),params["padding"].ai.end());     
160 |     dilation.assign(params["dilation"].ai.begin(),params["dilation"].ai.end());    
161 |     kernel_size.assign(params["kernel_size"].ai.begin(),params["kernel_size"].ai.end()); 
162 |     stride.assign(params["stride"].ai.begin(),params["stride"].ai.end());   
163 | 
164 |     return 0; 
165 | }
166 | 
167 | }//namespace


--------------------------------------------------------------------------------
/src/layers/cxx/maxpool.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_MAXPOOL_H
 2 | #define EASYNN_MAXPOOL_H
 3 | 
 4 | #include"layer.h"
 5 | #include"mat.h"
 6 | namespace easynn{
 7 | 
 8 | 
 9 | class MaxPool: public Layer
10 | {
11 | public:
12 |     MaxPool();
13 |     virtual int forward(const Mat& input,Mat& output,const Optional& op);
14 |     virtual int loadParam(std::map<std::string, pnnx::Parameter>& params);
15 |     void copy_make_border_image(const Mat& input,Mat& input_pad);
16 | 
17 | public:
18 |     bool ceil_mode ;
19 |     bool return_indices;
20 |     std::vector<int> padding;     //type 5
21 |     std::vector<int> dilation;    //type 5
22 |     std::vector<int> kernel_size; //type 5
23 |     std::vector<int> stride;      //type 5
24 |     
25 | };
26 | 
27 | }//namespace
28 | 
29 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/output.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include"output.h"
 3 | namespace easynn{
 4 | 
 5 | 
 6 | Output::Output()
 7 | {
 8 | 
 9 | }
10 | 
11 | int Output::forward()
12 | {
13 |     std::cout<<"Output forward"<<std::endl;
14 |     return 0;
15 | }
16 | 
17 | }//namespace


--------------------------------------------------------------------------------
/src/layers/cxx/output.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_OUPUT_H
 2 | #define EASYNN_OUPUT_H
 3 | 
 4 | #include"layer.h"
 5 | 
 6 | namespace easynn{
 7 | 
 8 | 
 9 | class Output: public Layer
10 | {
11 | public:
12 |     Output();
13 |     virtual int forward();
14 | };
15 | 
16 | }//namespace
17 | 
18 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/permute.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include"permute.h"
 3 | #include"benchmark.h"
 4 | 
 5 | namespace easynn{
 6 | 
 7 | Permute::Permute()
 8 | {
 9 |     one_blob_only = true;
10 | }
11 | int Permute::forward(const Mat& input,Mat& output,const Optional& op)
12 | {
13 |     double start = get_current_time();
14 | 
15 |     if(dims.size()==5 && dims[0]==0)
16 |     {
17 |         dims.erase(dims.begin());
18 |         for(int i=0;i<dims.size();i++)
19 |         {
20 |             dims[i]-=1;
21 |         }
22 |     }
23 |     else if (dims.size()==5)
24 |     {
25 |         printf("do not support 5 dims permute\n");
26 |         return -1;
27 |     }
28 |     int h=input.h;
29 |     int w=input.w;
30 |     int d=input.d;
31 |     int channels=input.c;
32 |     size_t elemsize = input.elemsize;
33 |     output = input.clone();
34 |     if(dims.size()==4)
35 |     {
36 |         output.create(d, w, h, channels, elemsize);
37 |         for (int q = 0; q < channels; q++)
38 |         {
39 |             const Mat m = input.channel(q);
40 |             float* outptr = output.channel(q);
41 | 
42 |             for (int z = 0; z < h; z++)
43 |             {
44 |                 for (int i = 0; i < w; i++)
45 |                 {
46 |                     for (int j = 0; j < d; j++)
47 |                     {
48 |                         *outptr++ = m.depth(j).row(z)[i];
49 |                     }
50 |                 }
51 |             }
52 |         }
53 |     }
54 | 
55 |     else if(dims.size()==3)
56 |     {
57 |         printf("permute not support now\n");
58 |         return -1;
59 |     }
60 |     else if(dims.size()==2)
61 |     {
62 |         printf("permute not support now\n");
63 |         return -1;
64 |     }
65 |     else if(dims.size()==1)
66 |     {
67 |         output = input.clone();
68 |         return 0;
69 |     }
70 |     else 
71 |     {
72 |         printf("permute not support now\n");
73 |         return -1;
74 |     }
75 | 
76 |     
77 |     double end = get_current_time();
78 |     printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),input.c,output.c,input.h,input.w,output.h,output.w,end-start);
79 |     return 0;
80 | }
81 | 
82 | int Permute::loadParam(std::map<std::string, pnnx::Parameter>& params)
83 | {
84 | 
85 |     dims.assign(params["dims"].ai.begin(),params["dims"].ai.end());
86 |     return 0;
87 | }
88 | }//namespace


--------------------------------------------------------------------------------
/src/layers/cxx/permute.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_PERMUTE_H
 2 | #define EASYNN_PERMUTE_H
 3 | 
 4 | 
 5 | #include"layer.h"
 6 | #include"mat.h"
 7 | 
 8 | namespace easynn{
 9 | 
10 | 
11 | class Permute: public Layer
12 | {
13 | public:
14 |     Permute();
15 |     virtual int forward(const Mat& input,Mat& output,const Optional& op);
16 |     virtual int loadParam(std::map<std::string, pnnx::Parameter>& params);
17 | public:
18 |     std::vector<int> dims;
19 | 
20 | };
21 | 
22 | }//namespace
23 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/relu.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include"relu.h"
 3 | #include"benchmark.h"
 4 | 
 5 | #ifdef EASTNN_USE_CUDA
 6 | #include"layers/cuda/cuda_relu.h"
 7 | #endif
 8 | 
 9 | namespace easynn{
10 | 
11 | Relu::Relu()
12 | {
13 |     one_blob_only = true;
14 | }
15 | int Relu::forward(const Mat& input,Mat& output,const Optional& op)
16 | {
17 |     double start = get_current_time();
18 | 
19 |     #ifdef EASTNN_USE_CUDA
20 |         double cuda_start = get_current_time();
21 |         cuda_relu_vec(input,output,op);
22 |         double cuda_end = get_current_time();
23 |         printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),input.c,output.c,input.h,input.w,output.h,output.w,cuda_end-cuda_start);
24 |         return 0;
25 |     #endif
26 | 
27 |     if (input.dims == 1)
28 |         output.create(input.w);
29 |     else if (input.dims == 2)
30 |         output.create(input.w, input.h);
31 |     else if (input.dims == 3)
32 |         output.create(input.w, input.h, input.c);
33 |     else if (input.dims == 4)
34 |         output.create(input.w, input.h, input.d, input.c);
35 | 
36 | 
37 |     for (int q=0; q<input.c; q++)
38 |     {
39 |         float* ptr_input = input.channel(q);
40 |         float* ptr_output = output.channel(q);
41 |         for (int z=0; z<input.d; z++)
42 |         {
43 |             for (int y=0; y<input.h; y++)
44 |             {
45 |                 for (int x=0; x<input.w; x++)
46 |                 {
47 |                     if(ptr_input[x]<0)
48 |                         ptr_output[x]=0;
49 |                     else
50 |                         ptr_output[x] = ptr_input[x];   
51 |                 }
52 |                 ptr_input += input.w;
53 |                 ptr_output += output.w;
54 |             }
55 |         }
56 |     }
57 |     double end = get_current_time();
58 |     printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),input.c,output.c,input.h,input.w,output.h,output.w,end-start);
59 | 
60 |     return 0;
61 | }
62 | 
63 | 
64 | }//namespace


--------------------------------------------------------------------------------
/src/layers/cxx/relu.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_RELU_H
 2 | #define EASYNN_RELU_H
 3 | 
 4 | #include"layer.h"
 5 | #include"mat.h"
 6 | namespace easynn{
 7 | 
 8 | 
 9 | class Relu: public Layer
10 | {
11 | public:
12 |     Relu();
13 |     virtual int forward(const Mat& input,Mat& output,const Optional& op);
14 | };
15 | 
16 | }//namespace
17 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/silu.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<math.h>
 3 | #include"silu.h"
 4 | #include"benchmark.h"
 5 | 
 6 | #ifdef EASTNN_USE_CUDA
 7 | #include"layers/cuda/cuda_silu.h"
 8 | #endif
 9 | namespace easynn{
10 | 
11 | Silu::Silu()
12 | {
13 |     one_blob_only = true;
14 | }
15 | int Silu::forward(const Mat& input,Mat& output,const Optional& op)
16 | {
17 |     double start = get_current_time();
18 | 
19 | #ifdef EASTNN_USE_CUDA
20 |     double cuda_start = get_current_time();
21 |     cuda_silu_vec(input,output,op);
22 |     double cuda_end = get_current_time();
23 |     printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),input.c,output.c,input.h,input.w,output.h,output.w,cuda_end-cuda_start);
24 |     return 0;
25 | #endif
26 | 
27 |     if (input.dims == 1)
28 |         output.create(input.w);
29 |     else if (input.dims == 2)
30 |         output.create(input.w, input.h);
31 |     else if (input.dims == 3)
32 |         output.create(input.w, input.h, input.c);
33 |     else if (input.dims == 4)
34 |         output.create(input.w, input.h, input.d, input.c);
35 | 
36 | 
37 |     for (int q=0; q<input.c; q++)
38 |     {
39 |         float* ptr_input = input.channel(q);
40 |         float* ptr_output = output.channel(q);
41 |         for (int z=0; z<input.d; z++)
42 |         {
43 |             for (int y=0; y<input.h; y++)
44 |             {
45 |                 for (int x=0; x<input.w; x++)
46 |                 {
47 |                      ptr_output[x] = ptr_input[x]/(1+expf(-ptr_input[x]));   
48 |                 }
49 |                 ptr_input += input.w;
50 |                 ptr_output += output.w;
51 |             }
52 |         }
53 |     }
54 | 
55 | 
56 | 
57 |     double end = get_current_time();
58 |     printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),input.c,output.c,input.h,input.w,output.h,output.w,end-start);
59 | 
60 |     return 0;
61 | }
62 | 
63 | }//namespace


--------------------------------------------------------------------------------
/src/layers/cxx/silu.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_SILU_H
 2 | #define EASYNN_SILU_H
 3 | 
 4 | #include"layer.h"
 5 | #include"mat.h"
 6 | namespace easynn{
 7 | 
 8 | 
 9 | class Silu: public Layer
10 | {
11 | public:
12 |     Silu();
13 |     virtual int forward(const Mat& input,Mat& output,const Optional& op);
14 | };
15 | 
16 | }//namespace
17 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/upsample.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<string.h>
 3 | #include"upsample.h"
 4 | #include"benchmark.h"
 5 | 
 6 | namespace easynn{
 7 | 
 8 | Upsample::Upsample()
 9 | {
10 |     one_blob_only = true;
11 | }
12 | int Upsample::forward(const Mat& input,Mat& output,const Optional& op)
13 | {
14 |     if(strcmp(mode.c_str(),"nearest")!=0)
15 |     {
16 |         printf("do not support upsample model %s\n",mode.c_str());
17 |     }
18 |     double start = get_current_time();
19 |     int scale_h = scale_factor[0];
20 |     int scale_w = scale_factor[1];
21 |     int out_h = scale_h*input.h;
22 |     int out_w = scale_w*input.w;
23 | 
24 |     if (input.dims == 2||input.dims == 1)
25 |         output.create(out_w, out_h);
26 |     else if (input.dims == 3)
27 |         output.create(out_w, out_h, input.c);
28 |     else if (input.dims == 4)
29 |         output.create(out_w, out_h, input.d, input.c);
30 | 
31 |     for (int q=0; q<input.c; q++)
32 |     {
33 |         float* ptr_input = input.channel(q);
34 |         float* ptr_output = output.channel(q);
35 |         for (int z=0; z<input.d; z++)
36 |         {
37 |             for (int y=0; y<out_h; y++)
38 |             {
39 |                 int in_y = std::min((int)(y/scale_h), (input.h - 1));
40 |                 for (int x=0; x<out_w; x++)
41 |                 {   
42 |                     int in_x = std::min((int)(x/scale_w ), (input.w - 1));
43 |                     *ptr_output++ = ptr_input[in_y * input.w + in_x];   
44 |                 }
45 |             }
46 |         }
47 |     }
48 |     
49 |     double end = get_current_time();
50 |     printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),input.c,output.c,input.h,input.w,output.h,output.w,end-start);
51 |     return 0;
52 | }
53 | int Upsample::loadParam(std::map<std::string, pnnx::Parameter>& params)
54 | {
55 |     mode = params["mode"].s;
56 |     scale_factor.assign(params["scale_factor"].af.begin(),params["scale_factor"].af.end());
57 |     return 0;
58 | }
59 | 
60 | }//namespace


--------------------------------------------------------------------------------
/src/layers/cxx/upsample.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_UPSAMPLE_H
 2 | #define EASYNN_UPSAMPLE_H
 3 | 
 4 | #include"layer.h"
 5 | #include"mat.h"
 6 | namespace easynn{
 7 | 
 8 | 
 9 | class Upsample: public Layer
10 | {
11 | public:
12 |     Upsample();
13 |     virtual int forward(const Mat& input,Mat& output,const Optional& op);
14 |     virtual int loadParam(std::map<std::string, pnnx::Parameter>& params);
15 | public:
16 |     std::string mode;
17 |     std::vector<float> scale_factor; 
18 | 
19 | };
20 | 
21 | }//namespace
22 | #endif


--------------------------------------------------------------------------------
/src/layers/cxx/view.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include"view.h"
 3 | #include"benchmark.h"
 4 | 
 5 | namespace easynn{
 6 | 
 7 | View::View()
 8 | {
 9 |     one_blob_only = true;
10 | }
11 | 
12 | int View::forward(const Mat& input,Mat& output,const Optional& op)
13 | {
14 |     
15 |     double start = get_current_time();
16 |     if(shape.size()==5&&shape[0]==1)
17 |     {
18 | 	    shape.erase(shape.begin());
19 |     }
20 |     else if(shape.size()==5)
21 |     {
22 |         printf("view not support shape\n");
23 |         return -1;
24 |     }
25 | 
26 |     int index =-1;
27 |     for(int i=0;i<shape.size();i++)
28 |     {
29 |         if(shape[i]==-1)
30 |         {
31 |             index = i;
32 |         }
33 |     }
34 |     if(index!=-1)
35 |     {
36 |         int s=1;
37 |         for(int i=0;i<shape.size();i++)
38 |         {
39 |             if(i!=index)
40 |                 s *=shape[i];
41 |         }  
42 |         shape[index] = input.w*input.h*input.d*input.c/s;
43 |     }
44 |     output = input.clone();
45 |     if(shape.size()==4)
46 |     {
47 |         output=output.reshape(shape[3],shape[2],shape[1],shape[0]);
48 |     }
49 |     else if(shape.size()==3)
50 |     {
51 |         output=output.reshape(shape[2],shape[1],shape[0]);
52 |     }
53 |     else if(shape.size()==2)
54 |     {
55 |         output=output.reshape(shape[1],shape[0]);
56 |     }
57 |     else if(shape.size()==1)
58 |     {
59 |         output=output.reshape(shape[0]);
60 |     }
61 |     else 
62 |     {
63 |         return -1;
64 |     }
65 |     double end = get_current_time();
66 |     printf("%-25s,in_channels:%-4d, out_channels:%-4d, input_h:%-4d ,input_w:%-4d ,out_h:%-4d ,out_w:%-4d ,time=%fms\n",name.c_str(),input.c,output.c,input.h,input.w,output.h,output.w,end-start);
67 |     return 0;
68 | }
69 | 
70 | int View::loadParam(std::map<std::string, pnnx::Parameter>& params)
71 | {
72 |     shape.assign(params["shape"].ai.begin(),params["shape"].ai.end());
73 |     return 0;
74 | }
75 | }//namespace


--------------------------------------------------------------------------------
/src/layers/cxx/view.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_VIEW_H
 2 | #define EASYNN_VIEW_H
 3 | 
 4 | #include"layer.h"
 5 | #include"mat.h"
 6 | namespace easynn{
 7 | 
 8 | 
 9 | class View: public Layer
10 | {
11 | public:
12 |     View();
13 |     virtual int forward(const Mat& input,Mat& output,const Optional& op);
14 |     virtual int loadParam(std::map<std::string, pnnx::Parameter>& params);
15 | 
16 | public:
17 |     std::vector<int> shape;
18 | 
19 | };
20 | 
21 | }//namespace
22 | #endif


--------------------------------------------------------------------------------
/src/mat.h:
--------------------------------------------------------------------------------
  1 | #ifndef EASYNN_MAT_H
  2 | #define EASYNN_MAT_H
  3 | 
  4 | #include<stddef.h>
  5 | #include<vector>
  6 | namespace easynn{
  7 | 
  8 | class Mat
  9 | {
 10 | public:
 11 | 
 12 |     Mat();
 13 |     Mat(int _w,size_t _elemsize=4u); 
 14 |     Mat(int _w,int _h,size_t _elemsize=4u); 
 15 |     Mat(int _w,int _h,int _c,size_t _elemsize=4u); 
 16 |     Mat(int _w,int _h,int _d,int _c,size_t _elemsize=4u); 
 17 |     Mat(const Mat& m);
 18 | 
 19 |     Mat(int _w,void* data,size_t _elemsize=4u); 
 20 |     Mat(int _w,int _h,void* data,size_t _elemsize=4u); 
 21 |     Mat(int _w,int _h,int _c,void* data,size_t _elemsize=4u); 
 22 |     Mat(int _w,int _h,int _d,int _c,void* data,size_t _elemsize=4u); 
 23 | 
 24 | 
 25 |     Mat& operator=(const Mat& m);
 26 |     float& operator[](size_t index); 
 27 |     float& operator[](size_t index) const; 
 28 | 
 29 |     template<typename T>
 30 |     operator T*();
 31 |     template<typename T>
 32 |     operator const T*() const;
 33 |  
 34 |     void create(int _w,size_t _elemsize=4u); 
 35 |     void create(int _w,int _h,size_t _elemsize=4u); 
 36 |     void create(int _w,int _h,int _c,size_t _elemsize=4u); 
 37 |     void create(int _w,int _h,int _d,int _c,size_t _elemsize=4u); 
 38 | 
 39 |     void fill(int x);
 40 |     void fill(float x);
 41 | 
 42 |     Mat reshape(int _w) const;
 43 |     Mat reshape(int _w, int _h) const;
 44 |     Mat reshape(int _w, int _h, int _c) const;
 45 |     Mat reshape(int _w, int _h, int _d, int _c) const;
 46 | 
 47 |     void fillFromArray(std::vector<int> x);
 48 |     void fillFromArray(std::vector<std::vector<int>> x);
 49 |     void fillFromArray(std::vector<std::vector<std::vector<int>>> x);
 50 |     void fillFromArray(std::vector<std::vector<std::vector<std::vector<int>>>> x);
 51 |     void fillFromArray(std::vector<float> x);
 52 |     void fillFromArray(std::vector<std::vector<float>> x);
 53 |     void fillFromArray(std::vector<std::vector<std::vector<float>>> x);
 54 |     void fillFromArray(std::vector<std::vector<std::vector<std::vector<float>>>> x);
 55 |     
 56 |     ~Mat();
 57 |     
 58 |     void clean();
 59 |     void add_ref();
 60 | 
 61 |     float* row(int y);
 62 |     float* row(int y) const;
 63 |     Mat depth(int z);
 64 |     Mat depth(int z) const;
 65 |     Mat channel(int _c);
 66 |     Mat channel(int _c) const ;
 67 |     
 68 |     int isEmpty () const;
 69 |     int total() const;
 70 |     Mat clone() const;
 71 |     
 72 | 
 73 |     size_t dims;     //数据的维度 0 or 1 or 2 or 3 or 4
 74 |     int c;     
 75 |     int d;      
 76 |     int h;
 77 |     int w;
 78 |     size_t cstep;
 79 |     size_t elemsize;
 80 |     void* data;       //data的数据  
 81 |     int* refcount;   //引用计数的地址 
 82 |     
 83 | };
 84 | 
 85 | template<typename T>
 86 | Mat::operator T*()
 87 | {
 88 |     return (T*)data;
 89 | }
 90 | 
 91 | template<typename T>
 92 | Mat::operator const T*() const
 93 | {
 94 |     return (T*)data;
 95 | }
 96 | 
 97 | } //namespace
 98 | 
 99 | 
100 | 
101 | 
102 | #endif //EASYNN_MAT_H


--------------------------------------------------------------------------------
/src/net.cpp:
--------------------------------------------------------------------------------
  1 | #include"net.h"
  2 | #include"register_layers.h"
  3 | 
  4 | 
  5 | 
  6 | namespace easynn{
  7 | 
  8 | //提取layers的类型 如输入nn.Conv2d 输出Conv2d
  9 | std::string extractLayer(const std::string& input) {
 10 |     std::string afterDot;
 11 |     size_t dotPos = input.find(".");
 12 |     if (dotPos != std::string::npos) {
 13 |         afterDot = input.substr(dotPos + 1);
 14 |     }
 15 |     return afterDot;
 16 | }
 17 | 
 18 | Net::Net()
 19 | {
 20 |     op = Optional();
 21 |     graph = new pnnx::Graph;
 22 | }
 23 | 
 24 | Net::~Net()
 25 | {
 26 |     clear();
 27 |     for(auto layer:layers)
 28 |         delete layer;
 29 | }
 30 | 
 31 | int Net::clear()
 32 | {
 33 |     for(auto &m:blob_mats)
 34 |     {
 35 |         m.clean();
 36 |     }
 37 |     return 0;
 38 | }
 39 | 
 40 | int Net::forwarLayer(int layer_index)
 41 | {   
 42 | 
 43 |     if(layer_index>layer_num-1 || layer_index<0)
 44 |     {
 45 |         printf("do not have this layer ,layer num is %d",layer_index);
 46 |         return -1;
 47 |     }
 48 |         
 49 |     Layer* layer = layers[layer_index];
 50 |     for(auto input:layer->bottoms)
 51 |     {
 52 |         if(blob_mats[input].isEmpty())
 53 |             forwarLayer(blobs[input].producer);   //递归调用，直到找到某个layer的输入blob已经存在，说明此时可以forwarLayer
 54 |     }
 55 |     if(layer->one_blob_only)
 56 |     {
 57 |         int bottom_blob_index = layer->bottoms[0];
 58 |         int top_blob_index = layer->tops[0];
 59 |         int re = layer->forward(blob_mats[bottom_blob_index],blob_mats[top_blob_index],op);
 60 |         if(re!=0)
 61 |         {
 62 |             printf("%s forward fail",layer->name.c_str());
 63 |             return -1;
 64 |         }
 65 |     }
 66 |     else
 67 |     {
 68 |         std::vector<Mat> input_mats(layer->bottoms.size());
 69 |         std::vector<Mat> output_mats(layer->tops.size());
 70 | 
 71 |         for(int i=0;i<layer->bottoms.size();i++)
 72 |         {
 73 |             input_mats[i] = blob_mats[layer->bottoms[i]];
 74 |         }
 75 | 
 76 |         int re = layer->forward(input_mats,output_mats,op);
 77 | 
 78 |         if(re!=0)
 79 |         {
 80 |             printf("%s forward fail",layer->name.c_str());
 81 |             return -1;
 82 |         }
 83 | 
 84 |         for(int i=0;i<layer->tops.size();i++)
 85 |         {
 86 |             blob_mats[layer->tops[i]]=output_mats[i];
 87 |         }       
 88 | 
 89 |     }
 90 |     return 0;
 91 | }
 92 | 
 93 | int Net::input(int index,const Mat& input)
 94 | {
 95 |     blob_mats[index]=input;
 96 |     return 0;
 97 | }
 98 | 
 99 | int Net::extractBlob(const size_t num,Mat& output) 
100 | {
101 |     Blob& blob = blobs[num];
102 |     if(num>blob_num-1 || num<0)
103 |     {
104 |         printf("the %ld blob is not exist ,please check out\n",num);
105 |         return -1;
106 |     }
107 |         
108 |     if(blob_mats[num].isEmpty())
109 |         forwarLayer(blob.producer);
110 |     
111 |     output = blob_mats[num];
112 |     return 0;
113 | }
114 | 
115 | void Net::printLayer() const
116 | {
117 |     for(auto layer:graph->ops)
118 |     {
119 |         printf("%s \n",layer->name.c_str());;
120 |     }
121 | }
122 | 
123 | int Net::loadModel(const char * param_path,const char * bin_path)
124 | {   
125 |     int re =-1;
126 |     re = graph->load(param_path,bin_path);
127 |     if(re==0)
128 |     {
129 |         layer_num = graph->ops.size();
130 |         blob_num = graph->operands.size();
131 |         blobs.resize(blob_num); 
132 |         blob_mats.resize(blob_num); 
133 |         layers.resize(layer_num);
134 | 
135 |         for(int i=0;i<layer_num;i++)
136 |         {
137 |             pnnx::Operator* op = graph->ops[i]; 
138 |             std::string layer_type = extractLayer(op->type);
139 |             layer_factory factory = 0;
140 |             for(auto l:layes_factory)
141 |             {
142 |                 if(layer_type==l.first) factory=l.second;   //根据算子的名字，查找出对应的算子工厂
143 |             }
144 |             if(!factory)
145 |             {
146 |                 printf("%s is not supportl\n",layer_type.c_str());
147 |                 re=-1;
148 |                 break;
149 |             }
150 |             Layer* layer = factory();   //使用算子工厂，实例化算子
151 |             
152 |             layer->name = op->name;
153 |             layer->type = layer_type;
154 | 
155 |             //构建计算关系，每个layer的输入输出blob是哪个，每个blob是哪个layer产生，是哪个layer使用
156 |             for(auto input:op->inputs)
157 |             {
158 |                 int blob_index = std::stoi(input->name);
159 |                 layer->bottoms.push_back(blob_index);
160 |                 Blob& blob = blobs[blob_index];
161 |                 blob.consumer = i;
162 |             }
163 |             for(auto output:op->outputs)
164 |             {
165 |                 int blob_index = std::stoi(output->name);
166 |                 layer->tops.push_back(blob_index);
167 |                 Blob& blob = blobs[blob_index];
168 |                 blob.producer = i;
169 |             }
170 | 
171 |             layer->loadParam(op->params);
172 |             layer->loadBin(op->attrs);
173 |             layers[i]= layer;
174 |         }
175 |         delete graph;    //加载完成后，释放PNNX中的图
176 |     }
177 |     else
178 |     {
179 |         printf("load %s  %s fail\n",param_path,bin_path);
180 |         return re;
181 |     }
182 |     return re;
183 | }
184 | 
185 | }


--------------------------------------------------------------------------------
/src/net.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_NET_H
 2 | #define EASYNN_NET_H
 3 | 
 4 | #include<vector>
 5 | #include"ir.h"
 6 | #include"mat.h"
 7 | #include"blob.h"
 8 | #include"layer.h"
 9 | #include"optional.h"
10 | namespace easynn {
11 |     
12 | 
13 | class Net
14 | {
15 | public:
16 |     Net();
17 |     ~Net();
18 |     void printLayer() const;
19 |     int loadModel(const char * param_path,const char * bin_path);
20 |     int extractBlob(const size_t num,Mat& output);
21 |     int forwarLayer(int layer_index);
22 |     int input(int index,const Mat& input);
23 |     int clear();
24 | 
25 |     std::vector<Blob> blobs;
26 |     std::vector<Mat> blob_mats;
27 |     std::vector<Layer* > layers;
28 | 
29 |     size_t layer_num;
30 |     size_t blob_num;
31 |     Optional op;
32 | private:
33 |     pnnx::Graph* graph;
34 | };
35 |                                   
36 | 
37 | }
38 | 
39 | 
40 | 
41 | 
42 | #endif


--------------------------------------------------------------------------------
/src/nncuda.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #ifdef EASTNN_USE_CUDA
 4 | #include <cuda.h>
 5 | #include <cuda_runtime.h>
 6 | #include"nncuda.h"
 7 | 
 8 | namespace easynn{
 9 |     
10 | 
11 | 
12 | 
13 | 
14 | #endif   //EASTNN_USE_CUDA
15 | 
16 | 
17 | 
18 | }//namespace


--------------------------------------------------------------------------------
/src/nncuda.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_NNCUDA_H
 2 | #define EASYNN_NNCUDA_H
 3 | 
 4 | #ifdef EASTNN_USE_CUDA
 5 | 
 6 | #define EASYNN_MALLOC_ALIGN 64
 7 | #define EASYNN_MALLOC_OVERREAD 64
 8 | 
 9 | #define CHECK(call)                                   \
10 | do                                                    \
11 | {                                                     \
12 |     const cudaError_t error_code = call;              \
13 |     if (error_code != cudaSuccess)                    \
14 |     {                                                 \
15 |         printf("CUDA Error:\n");                      \
16 |         printf("    File:       %s\n", __FILE__);     \
17 |         printf("    Line:       %d\n", __LINE__);     \
18 |         printf("    Error code: %d\n", error_code);   \
19 |         printf("    Error text: %s\n",                \
20 |             cudaGetErrorString(error_code));          \
21 |         exit(1);                                      \
22 |     }                                                 \
23 | } while (0)
24 | 
25 | // CUDA: grid stride looping
26 | #define CUDA_KERNEL_LOOP(i, n) \
27 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
28 |        i < (n); \
29 |        i += blockDim.x * gridDim.x)
30 | 
31 | // CUDA: use 512 threads per block
32 | const int EASYNN_CUDA_NUM_THREADS = 256;
33 | const int CUDA_VEC_SIZE =4;
34 | 
35 | // CUDA: number of blocks for threads.
36 | inline int EASYNN_GET_BLOCKS(const int N) {
37 |   return (N + EASYNN_CUDA_NUM_THREADS - 1) / EASYNN_CUDA_NUM_THREADS;
38 | }
39 | 
40 | inline int EASYNN_GET_VEC_BLOCKS(const int N) {
41 |   return ((N + EASYNN_CUDA_NUM_THREADS - 1) / EASYNN_CUDA_NUM_THREADS+CUDA_VEC_SIZE-1)/CUDA_VEC_SIZE;
42 | }
43 | 
44 | 
45 | namespace easynn{
46 | 
47 | } //namespace easynn
48 | 
49 | 
50 | #endif  //EASTNN_USE_CUDA
51 | 
52 | #endif  //EASYNN_CUDA_H


--------------------------------------------------------------------------------
/src/optional.cpp:
--------------------------------------------------------------------------------
 1 | #include"optional.h"
 2 | 
 3 | namespace easynn
 4 | {
 5 |     Optional::Optional()
 6 |     {
 7 |         num_thread = 8;
 8 |     }
 9 | } // namespace easynn
10 | 


--------------------------------------------------------------------------------
/src/optional.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_OPTIONAL_H
 2 | #define EASYNN_OPTIONAL_H
 3 | 
 4 | namespace easynn{
 5 |     
 6 | 
 7 | class Optional
 8 | {
 9 | public:
10 |     Optional();
11 |     int num_thread;
12 | };
13 | 
14 | }//namespace easynn
15 | 
16 | #endif


--------------------------------------------------------------------------------
/src/register_layers.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_REGISTER_LAYERS_H
 2 | #define EASYNN_REGISTER_LAYERS_H
 3 | 
 4 | #include<string>
 5 | #include<vector>
 6 | #include<utility>
 7 | 
 8 | #include"layer.h"
 9 | #include"layers/cxx/input.h"
10 | #include"layers/cxx/output.h"
11 | #include"layers/cxx/relu.h"
12 | #include"layers/cxx/adaptiveavgpool.h"
13 | #include"layers/cxx/convolution.h"
14 | #include"layers/cxx/expression.h"
15 | #include"layers/cxx/flatten.h"
16 | #include"layers/cxx/linear.h"
17 | #include"layers/cxx/maxpool.h"
18 | #include"layers/cxx/cat.h"
19 | #include"layers/cxx/contiguous.h"
20 | #include"layers/cxx/permute.h"
21 | #include"layers/cxx/silu.h"
22 | #include"layers/cxx/upsample.h"
23 | #include"layers/cxx/view.h"
24 | 
25 | #define register_layer(name) \
26 |     easynn::Layer* name##Factory()\
27 |     {\
28 |         return new easynn::name;\
29 |     }
30 | 
31 | 
32 | typedef  easynn::Layer*(*layer_factory)();   
33 | 
34 | register_layer(Input);              //no need
35 | register_layer(Output);             //no need
36 | register_layer(Contiguous);         //no need
37 | 
38 | register_layer(Relu);               //cuda accelerate
39 | register_layer(Silu);               //cuda accelerate
40 | 
41 | register_layer(Convolution);        //im2col+sgemm cuda accelerate
42 | register_layer(AdaptivePool);
43 | register_layer(MaxPool);
44 | register_layer(Upsample);
45 | 
46 | register_layer(Linear);             //cuda accelerate
47 | register_layer(Expression);
48 | register_layer(Flatten);
49 | register_layer(View);
50 | register_layer(Cat);
51 | register_layer(Permute);
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 
58 | std::vector<std::pair<std::string,layer_factory>> layes_factory={
59 |     {"Input",InputFactory},
60 |     {"Output",OutputFactory},
61 |     {"ReLU",ReluFactory},
62 |     {"AdaptiveAvgPool2d",AdaptivePoolFactory},
63 |     {"Conv2d",ConvolutionFactory},
64 |     {"Expression",ExpressionFactory},
65 |     {"flatten",FlattenFactory},
66 |     {"Linear",LinearFactory},
67 |     {"MaxPool2d",MaxPoolFactory},
68 |     {"cat",CatFactory},
69 |     {"contiguous",ContiguousFactory},
70 |     {"permute",PermuteFactory},
71 |     {"silu",SiluFactory},
72 |     {"Upsample",UpsampleFactory},
73 |     {"view",ViewFactory}
74 |     };
75 | 
76 | #endif


--------------------------------------------------------------------------------
/src/storezip.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making ncnn available.
 2 | //
 3 | // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
 4 | //
 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // https://opensource.org/licenses/BSD-3-Clause
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef PNNX_STOREZIP_H
16 | #define PNNX_STOREZIP_H
17 | 
18 | #include <stdint.h>
19 | #include <map>
20 | #include <string>
21 | #include <vector>
22 | 
23 | namespace pnnx {
24 | float float16_to_float32(unsigned short value);
25 | unsigned short float32_to_float16(float value);
26 | class StoreZipReader
27 | {
28 | public:
29 |     StoreZipReader();
30 |     ~StoreZipReader();
31 | 
32 |     int open(const std::string& path);
33 | 
34 |     size_t get_file_size(const std::string& name);
35 | 
36 |     int read_file(const std::string& name, char* data);
37 | 
38 |     int close();
39 | 
40 | private:
41 |     FILE* fp;
42 | 
43 |     struct StoreZipMeta
44 |     {
45 |         size_t offset;
46 |         size_t size;
47 |     };
48 | 
49 |     std::map<std::string, StoreZipMeta> filemetas;
50 | };
51 | 
52 | class StoreZipWriter
53 | {
54 | public:
55 |     StoreZipWriter();
56 |     ~StoreZipWriter();
57 | 
58 |     int open(const std::string& path);
59 | 
60 |     int write_file(const std::string& name, const char* data, size_t size);
61 | 
62 |     int close();
63 | 
64 | private:
65 |     FILE* fp;
66 | 
67 |     struct StoreZipMeta
68 |     {
69 |         std::string name;
70 |         size_t lfh_offset;
71 |         uint32_t crc32;
72 |         uint32_t size;
73 |     };
74 | 
75 |     std::vector<StoreZipMeta> filemetas;
76 | };
77 | 
78 | } // namespace pnnx
79 | 
80 | #endif // PNNX_STOREZIP_H
81 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | link_directories(${CMAKE_SOURCE_DIR}/build/src/)
2 | 
3 | set(TEST_SRC  ${TEST}/test.cpp)
4 | 
5 | add_executable(run_test ${TEST_SRC})
6 | target_include_directories(run_test PUBLIC ${INCLUDE})
7 | target_link_libraries(run_test PRIVATE  easynn)
8 | 
9 | 


--------------------------------------------------------------------------------
/test/test.cpp:
--------------------------------------------------------------------------------
 1 | #include"test_ulti.h"
 2 | #include"test_mat.h"
 3 | #include"test_layer.h"
 4 | #include"test_conv.h"
 5 | #include"test_relu.h"
 6 | #include"test_maxpool2d.h"
 7 | #include"test_expression.h"
 8 | #include"test_adaptiveavgpool.h"
 9 | #include"test_flatten.h"
10 | #include"test_linear.h"
11 | #include"test_silu.h"
12 | #include"test_upsample.h"
13 | #include"test_view.h"
14 | 
15 | 
16 | int main()
17 | {
18 |     InitQTest();
19 |     return RUN_ALL_TESTS();
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/test/test_adaptiveavgpool.h:
--------------------------------------------------------------------------------
  1 | #include"test_ulti.h"
  2 | #include"test_fun.h"
  3 | #include"ir.h"
  4 | #include"net.h"
  5 | #include"mat.h"
  6 | #include"layers/cxx/adaptiveavgpool.h"
  7 | 
  8 | std::vector<std::vector<std::vector<float>>> ada_input_data=\
  9 |         {{{0.2295, 0.2599, 0.6441, 0.0168, 0.7927, 0.3168, 0.9468, 0.2946,
 10 |            0.6510, 0.1767, 0.7645, 0.4612},
 11 |           {0.3943, 0.9388, 0.9126, 0.3142, 0.2770, 0.9274, 0.2598, 0.9339,
 12 |            0.4860, 0.6099, 0.4828, 0.5948},
 13 |           {0.9998, 0.1657, 0.4309, 0.5119, 0.3204, 0.4685, 0.3708, 0.3458,
 14 |            0.6965, 0.7607, 0.4896, 0.3414},
 15 |           {0.8516, 0.7120, 0.7675, 0.5260, 0.1127, 0.4690, 0.0656, 0.1274,
 16 |            0.7237, 0.8612, 0.9578, 0.0717},
 17 |           {0.7778, 0.9389, 0.4477, 0.9347, 0.4932, 0.6393, 0.2195, 0.4559,
 18 |            0.1840, 0.9226, 0.1510, 0.1915},
 19 |           {0.3332, 0.2625, 0.9882, 0.6747, 0.0630, 0.0969, 0.1638, 0.2066,
 20 |            0.7487, 0.2935, 0.1886, 0.2975},
 21 |           {0.6795, 0.6518, 0.5196, 0.0989, 0.2921, 0.9160, 0.9370, 0.3772,
 22 |            0.0927, 0.2677, 0.7827, 0.2746},
 23 |           {0.0812, 0.1388, 0.5617, 0.6608, 0.4961, 0.8730, 0.1547, 0.4763,
 24 |            0.0420, 0.8068, 0.1394, 0.3010},
 25 |           {0.0321, 0.3149, 0.5720, 0.6835, 0.1845, 0.0031, 0.2095, 0.2987,
 26 |            0.9365, 0.2844, 0.7457, 0.8291},
 27 |           {0.3599, 0.6601, 0.8168, 0.4337, 0.8746, 0.5469, 0.3573, 0.4779,
 28 |            0.2486, 0.8504, 0.6616, 0.9320},
 29 |           {0.3530, 0.5634, 0.5081, 0.7176, 0.1062, 0.3808, 0.9981, 0.4353,
 30 |            0.3269, 0.6623, 0.3985, 0.7898},
 31 |           {0.9516, 0.0289, 0.1992, 0.1402, 0.2444, 0.1211, 0.1212, 0.0188,
 32 |            0.2831, 0.6390, 0.9189, 0.1517}},
 33 | 
 34 |          {{0.9418, 0.8566, 0.1098, 0.0454, 0.1247, 0.9565, 0.8543, 0.4707,
 35 |            0.3405, 0.7362, 0.9467, 0.7226},
 36 |           {0.6276, 0.2356, 0.0118, 0.8911, 0.4193, 0.6709, 0.9051, 0.1855,
 37 |            0.8339, 0.3113, 0.4981, 0.9342},
 38 |           {0.1486, 0.3907, 0.0704, 0.2226, 0.5163, 0.9326, 0.1216, 0.9232,
 39 |            0.8832, 0.3250, 0.1453, 0.5212},
 40 |           {0.7148, 0.5316, 0.3406, 0.0170, 0.3191, 0.2849, 0.2415, 0.5319,
 41 |            0.7618, 0.9995, 0.9593, 0.6969},
 42 |           {0.9408, 0.8048, 0.1872, 0.3158, 0.9612, 0.6753, 0.9354, 0.5379,
 43 |            0.3592, 0.4402, 0.2048, 0.0823},
 44 |           {0.7723, 0.6818, 0.7118, 0.6614, 0.7270, 0.5569, 0.6218, 0.3408,
 45 |            0.2093, 0.7379, 0.3721, 0.4468},
 46 |           {0.3118, 0.0977, 0.1137, 0.1801, 0.2204, 0.1414, 0.8682, 0.1054,
 47 |            0.8015, 0.2708, 0.6153, 0.6022},
 48 |           {0.8705, 0.5935, 0.9919, 0.0586, 0.9524, 0.9755, 0.1864, 0.5465,
 49 |            0.4812, 0.6421, 0.6969, 0.8026},
 50 |           {0.3568, 0.3298, 0.4970, 0.9867, 0.5367, 0.6762, 0.4278, 0.9811,
 51 |            0.1398, 0.8697, 0.5646, 0.3473},
 52 |           {0.8531, 0.9425, 0.6495, 0.4983, 0.8105, 0.4635, 0.8852, 0.0164,
 53 |            0.3397, 0.8758, 0.4604, 0.7227},
 54 |           {0.2770, 0.5340, 0.9153, 0.2417, 0.9824, 0.0425, 0.8894, 0.1229,
 55 |            0.9444, 0.2923, 0.2484, 0.8285},
 56 |           {0.1150, 0.5183, 0.5363, 0.5093, 0.0094, 0.2796, 0.5879, 0.4254,
 57 |            0.5275, 0.5013, 0.3831, 0.2962}},
 58 | 
 59 |          {{0.3096, 0.2493, 0.8229, 0.2754, 0.5033, 0.3588, 0.4517, 0.9865,
 60 |            0.6831, 0.1928, 0.3726, 0.9510},
 61 |           {0.5593, 0.9482, 0.7583, 0.5221, 0.0553, 0.6887, 0.1079, 0.8163,
 62 |            0.2664, 0.8677, 0.3635, 0.9620},
 63 |           {0.4066, 0.2021, 0.4684, 0.3672, 0.2404, 0.9130, 0.2811, 0.7541,
 64 |            0.5825, 0.5392, 0.1108, 0.4390},
 65 |           {0.2816, 0.7801, 0.3829, 0.0783, 0.3744, 0.2429, 0.8034, 0.6760,
 66 |            0.5320, 0.4850, 0.4305, 0.3114},
 67 |           {0.0435, 0.5865, 0.8042, 0.4533, 0.1508, 0.4364, 0.3380, 0.1109,
 68 |            0.7376, 0.0349, 0.4138, 0.2180},
 69 |           {0.2032, 0.2016, 0.6599, 0.8047, 0.4858, 0.3934, 0.9137, 0.2693,
 70 |            0.6279, 0.4159, 0.8641, 0.1526},
 71 |           {0.5429, 0.8051, 0.5889, 0.2773, 0.9594, 0.0128, 0.0453, 0.8524,
 72 |            0.8258, 0.7511, 0.1315, 0.4201},
 73 |           {0.2756, 0.8755, 0.7852, 0.8273, 0.8024, 0.9469, 0.0486, 0.9950,
 74 |            0.9743, 0.9827, 0.4943, 0.0917},
 75 |           {0.9856, 0.0872, 0.9139, 0.1970, 0.2668, 0.8008, 0.8758, 0.1363,
 76 |            0.2088, 0.5141, 0.5744, 0.0157},
 77 |           {0.1499, 0.4181, 0.6614, 0.1974, 0.6969, 0.3377, 0.8968, 0.6852,
 78 |            0.2181, 0.8499, 0.8425, 0.5653},
 79 |           {0.6385, 0.9671, 0.6611, 0.5936, 0.0432, 0.7459, 0.0015, 0.7680,
 80 |            0.4933, 0.8672, 0.3020, 0.1445},
 81 |           {0.0239, 0.0835, 0.6143, 0.0222, 0.7029, 0.3813, 0.3447, 0.4178,
 82 |            0.1323, 0.3780, 0.7844, 0.0424}}};
 83 | 
 84 | std::vector<std::vector<std::vector<float>>> ada_out_data1 = {{{0.4757}},{{0.5269}},{{0.4905}}};
 85 | 
 86 | std::vector<std::vector<std::vector<float>>> ada_out_data2=\
 87 |         {{{0.5281, 0.4583},
 88 |         {0.4379, 0.4785}},
 89 | 
 90 |         {{0.5105, 0.5594},
 91 |         {0.5019, 0.5360}},
 92 | 
 93 |         {{0.4448, 0.5018},
 94 |         {0.5247, 0.4909}}};
 95 | 
 96 | std::vector<std::vector<std::vector<float>>> ada_out_data3=\
 97 |         {{{0.5150, 0.5198},
 98 |          {0.5258, 0.3615},
 99 |          {0.4082, 0.5240}},
100 | 
101 |         {{0.4325, 0.6187},
102 |          {0.5627, 0.4962},
103 |          {0.5234, 0.5282}},
104 | 
105 |         {{0.4495, 0.5403},
106 |          {0.5384, 0.4879},
107 |          {0.4663, 0.4608}}};
108 | 
109 | std::vector<std::vector<std::vector<float>>> ada_out_data4 = \
110 |         {{{0.4556, 0.6888, 0.3502, 0.5785, 0.6088, 0.5914, 0.5085, 0.5758},
111 |          {0.6247, 0.6120, 0.3559, 0.4983, 0.4776, 0.6155, 0.5857, 0.4771},
112 |          {0.6823, 0.5190, 0.3677, 0.3426, 0.2274, 0.4733, 0.7673, 0.4651},
113 |          {0.8201, 0.7165, 0.5167, 0.4286, 0.2171, 0.3727, 0.7232, 0.3430},
114 |          {0.5781, 0.6593, 0.5414, 0.3231, 0.2614, 0.3988, 0.3889, 0.2071},
115 |          {0.4818, 0.6055, 0.2822, 0.3420, 0.4211, 0.3563, 0.3831, 0.3859},
116 |          {0.3878, 0.4680, 0.3870, 0.6443, 0.4863, 0.2471, 0.4992, 0.3744},
117 |          {0.1418, 0.3969, 0.5062, 0.3892, 0.2848, 0.4384, 0.4941, 0.5038},
118 |          {0.3417, 0.5910, 0.5441, 0.4023, 0.3359, 0.4904, 0.6355, 0.7921},
119 |          {0.4841, 0.6371, 0.5330, 0.4771, 0.5671, 0.3722, 0.6432, 0.6955},
120 |          {0.4742, 0.3249, 0.3021, 0.2131, 0.3933, 0.2660, 0.6547, 0.5647}},
121 | 
122 |         {{0.6654, 0.3034, 0.3701, 0.5429, 0.6039, 0.4577, 0.6231, 0.7754},
123 |          {0.3506, 0.1771, 0.5123, 0.6348, 0.5339, 0.7064, 0.3199, 0.5247},
124 |          {0.4464, 0.3333, 0.2688, 0.5132, 0.4546, 0.7750, 0.6073, 0.5807},
125 |          {0.7480, 0.4660, 0.4033, 0.5601, 0.5617, 0.5477, 0.6509, 0.4858},
126 |          {0.7999, 0.5964, 0.6664, 0.7301, 0.6090, 0.3618, 0.4387, 0.2765},
127 |          {0.4659, 0.4013, 0.4472, 0.4114, 0.4841, 0.3643, 0.4990, 0.5091},
128 |          {0.4684, 0.4492, 0.3529, 0.5724, 0.4266, 0.4837, 0.5563, 0.6793},
129 |          {0.5376, 0.6031, 0.6336, 0.7852, 0.5355, 0.5372, 0.6933, 0.6029},
130 |          {0.6206, 0.6047, 0.7081, 0.6217, 0.5776, 0.3692, 0.6926, 0.5237},
131 |          {0.6516, 0.7603, 0.6332, 0.5747, 0.4785, 0.3559, 0.4692, 0.5650},
132 |          {0.3611, 0.6260, 0.4357, 0.3285, 0.5064, 0.5051, 0.3563, 0.4391}},
133 | 
134 |         {{0.5166, 0.6947, 0.3390, 0.4015, 0.5906, 0.6881, 0.4491, 0.6623},
135 |          {0.5290, 0.5943, 0.2962, 0.4744, 0.4898, 0.6048, 0.4703, 0.4688},
136 |          {0.4176, 0.4584, 0.2651, 0.4427, 0.6286, 0.6362, 0.3914, 0.3229},
137 |          {0.4229, 0.6384, 0.2642, 0.3011, 0.4821, 0.5141, 0.3411, 0.3434},
138 |          {0.2587, 0.5630, 0.4737, 0.3666, 0.4080, 0.4364, 0.4322, 0.4121},
139 |          {0.4382, 0.5639, 0.6318, 0.4628, 0.5202, 0.6438, 0.5407, 0.3921},
140 |          {0.6248, 0.7637, 0.7166, 0.6804, 0.4853, 0.9119, 0.5899, 0.2844},
141 |          {0.5560, 0.6655, 0.5234, 0.7042, 0.5139, 0.5786, 0.6414, 0.2940},
142 |          {0.4102, 0.5202, 0.3395, 0.5256, 0.6485, 0.3121, 0.6952, 0.4995},
143 |          {0.5434, 0.6769, 0.3828, 0.4559, 0.5879, 0.5411, 0.7154, 0.4636},
144 |          {0.4283, 0.5815, 0.3405, 0.4683, 0.3830, 0.4529, 0.5829, 0.3183}}};
145 | 
146 | TEST(ADAPTIVEPOOL,forward1)
147 | {
148 |     easynn::Mat input(12,12,3);
149 |     easynn::Mat out(1,1,3);
150 |     input.fillFromArray(ada_input_data);
151 |     out.fillFromArray(ada_out_data1);
152 | 
153 |     easynn::AdaptivePool adapool;
154 |     adapool.output_size.push_back(1);
155 |     adapool.output_size.push_back(1);
156 |     easynn::Optional option;
157 | 
158 |     easynn::Mat m1;
159 |     adapool.forward(input,m1,option);
160 |     EXPECT_EQ(compareMat(out,m1),0);
161 | }
162 | 
163 | TEST(ADAPTIVEPOOL,forward2)
164 | {
165 |     easynn::Mat input(12,12,3);
166 |     easynn::Mat out(2,2,3);
167 |     input.fillFromArray(ada_input_data);
168 |     out.fillFromArray(ada_out_data2);
169 | 
170 |     easynn::AdaptivePool adapool;
171 |     adapool.output_size.push_back(2);
172 |     adapool.output_size.push_back(2);
173 |     easynn::Optional option;
174 | 
175 |     easynn::Mat m1;
176 |     adapool.forward(input,m1,option);
177 |     EXPECT_EQ(compareMat(out,m1),0);
178 | 
179 | }
180 | 
181 | TEST(ADAPTIVEPOOL,forward3)
182 | {
183 |     easynn::Mat input(12,12,3);
184 |     easynn::Mat out(2,3,3);
185 |     input.fillFromArray(ada_input_data);
186 |     out.fillFromArray(ada_out_data3);
187 | 
188 |     easynn::AdaptivePool adapool;
189 |     adapool.output_size.push_back(3);
190 |     adapool.output_size.push_back(2);
191 |     easynn::Optional option;
192 | 
193 |     easynn::Mat m1;
194 |     adapool.forward(input,m1,option);
195 |     EXPECT_EQ(compareMat(out,m1),0);
196 | 
197 | }
198 | 
199 | 
200 | 
201 | TEST(ADAPTIVEPOOL,forward4)
202 | {
203 |     easynn::Mat input(12,12,3);
204 |     easynn::Mat out(8,11,3);
205 |     input.fillFromArray(ada_input_data);
206 |     out.fillFromArray(ada_out_data4);
207 | 
208 |     easynn::AdaptivePool adapool;
209 |     adapool.output_size.push_back(11);
210 |     adapool.output_size.push_back(8);
211 |     easynn::Optional option;
212 | 
213 |     easynn::Mat m1;
214 |     adapool.forward(input,m1,option);
215 |     EXPECT_EQ(compareMat(out,m1),0);
216 | 
217 | }
218 | 
219 | TEST(ADAPTIVEPOOL,forward5)
220 | {
221 | 
222 |     easynn::Mat m1(3,5);
223 |     std::vector<std::vector<float>> x1={{-1,2,-2},{-2,-98,100},{3,-4,5},{-31,4,52},{3,-43,59}};
224 |     m1.fillFromArray(x1);
225 | 
226 |     easynn::Mat out(1,1);
227 |     std::vector<std::vector<float>> x2={{3.1333}};
228 |     out.fillFromArray(x2);
229 | 
230 |     easynn::AdaptivePool adapool;
231 |     adapool.output_size.push_back(1);
232 |     adapool.output_size.push_back(1);
233 |     easynn::Optional option;
234 | 
235 |     easynn::Mat m2;
236 |     adapool.forward(m1,m2,option);
237 |     EXPECT_EQ(compareMat(m2,out),0);
238 | 
239 | }


--------------------------------------------------------------------------------
/test/test_cat.h:
--------------------------------------------------------------------------------
 1 | #include"test_ulti.h"
 2 | #include"test_fun.h"
 3 | #include"ir.h"
 4 | #include"net.h"
 5 | #include"mat.h"
 6 | #include"layers/cxx/cat.h"
 7 | 
 8 | std::vector<std::vector<std::vector<float>>> cat_input1=\
 9 |                                                 {{{   1.0000,    1.0000,  200.0000,  200.0000},
10 |                                                     {   1.0000,    1.0000,  200.0000,  200.0000},
11 |                                                     {  98.0000,   98.0000, -100.3000, -100.3000},
12 |                                                     {  98.0000,   98.0000, -100.3000, -100.3000},
13 |                                                     {   4.0000,    4.0000,    5.0000,    5.0000},
14 |                                                     {   4.0000,    4.0000,    5.0000,    5.0000}},
15 | 
16 |                                                     {{   5.0000,    5.0000,   -2.0000,   -2.0000},
17 |                                                     {   5.0000,    5.0000,   -2.0000,   -2.0000},
18 |                                                     { -99.0000,  -99.0000,  100.0000,  100.0000},
19 |                                                     { -99.0000,  -99.0000,  100.0000,  100.0000},
20 |                                                     {   4.0000,    4.0000,   51.0000,   51.0000},
21 |                                                     {   4.0000,    4.0000,   51.0000,   51.0000}}};
22 | 
23 | std::vector<std::vector<std::vector<float>>> cat_input2=\
24 |                                                 {{{   2.0000,    2.0000,  200.0000,  200.0000},
25 |                                                     {   1.0000,    1.0000,  200.0000,  200.0000},
26 |                                                     {  98.0000,   98.0000, -100.3000, -100.3000},
27 |                                                     {  98.0000,   98.0000, -100.3000, -100.3000},
28 |                                                     {   4.0000,    4.0000,    5.0000,    5.0000},
29 |                                                     {   4.0000,    4.0000,    5.0000,    5.0000}},
30 | 
31 |                                                     {{   5.0000,    5.0000,   -2.0000,   -2.0000},
32 |                                                     {   5.0000,    5.0000,   -2.0000,   -2.0000},
33 |                                                     { -99.0000,  -99.0000,  100.0000,  100.0000},
34 |                                                     { -99.0000,  -99.0000,  100.0000,  100.0000},
35 |                                                     {   4.0000,    4.0000,   51.0000,   51.0000},
36 |                                                     {   4.0000,    4.0000,   51.0000,   51.0000}}};
37 | 
38 | std::vector<std::vector<std::vector<float>>> cat_output=\
39 |                                                 {{{   1.0000,    1.0000,  200.0000,  200.0000},
40 |                                                     {   1.0000,    1.0000,  200.0000,  200.0000},
41 |                                                     {  98.0000,   98.0000, -100.3000, -100.3000},
42 |                                                     {  98.0000,   98.0000, -100.3000, -100.3000},
43 |                                                     {   4.0000,    4.0000,    5.0000,    5.0000},
44 |                                                     {   4.0000,    4.0000,    5.0000,    5.0000}},
45 | 
46 |                                                     {{   5.0000,    5.0000,   -2.0000,   -2.0000},
47 |                                                     {   5.0000,    5.0000,   -2.0000,   -2.0000},
48 |                                                     { -99.0000,  -99.0000,  100.0000,  100.0000},
49 |                                                     { -99.0000,  -99.0000,  100.0000,  100.0000},
50 |                                                     {   4.0000,    4.0000,   51.0000,   51.0000},
51 |                                                     {   4.0000,    4.0000,   51.0000,   51.0000}},
52 | 
53 |                                                     {{   2.0000,    2.0000,  200.0000,  200.0000},
54 |                                                     {   1.0000,    1.0000,  200.0000,  200.0000},
55 |                                                     {  98.0000,   98.0000, -100.3000, -100.3000},
56 |                                                     {  98.0000,   98.0000, -100.3000, -100.3000},
57 |                                                     {   4.0000,    4.0000,    5.0000,    5.0000},
58 |                                                     {   4.0000,    4.0000,    5.0000,    5.0000}},
59 | 
60 |                                                     {{   5.0000,    5.0000,   -2.0000,   -2.0000},
61 |                                                     {   5.0000,    5.0000,   -2.0000,   -2.0000},
62 |                                                     { -99.0000,  -99.0000,  100.0000,  100.0000},
63 |                                                     { -99.0000,  -99.0000,  100.0000,  100.0000},
64 |                                                     {   4.0000,    4.0000,   51.0000,   51.0000},
65 |                                                     {   4.0000,    4.0000,   51.0000,   51.0000}}};
66 | 
67 | 
68 | 
69 | TEST(CAT,forward)
70 | {
71 | 
72 |     easynn::Mat input1(4,6,2);
73 |     easynn::Mat input2(4,6,2);
74 |     easynn::Mat output(4,6,4);
75 | 
76 |     input1.fillFromArray(cat_input1);
77 |     input2.fillFromArray(cat_input2);
78 |     output.fillFromArray(cat_output);
79 | 
80 |     easynn::Optional op;
81 |     easynn::Cat cat;
82 |     cat.dim = 0;
83 | 
84 |     std::vector<easynn::Mat> inputs;
85 |     inputs.push_back(input1);
86 |     inputs.push_back(input2);
87 | 
88 |     std::vector<easynn::Mat> outputs;
89 |     easynn::Mat m1;
90 |     outputs.push_back(m1);
91 | 
92 | 
93 |     cat.forward(inputs,outputs,op);
94 |     EXPECT_EQ(compareMat(outputs[0],output),0);
95 |     
96 | 
97 | }


--------------------------------------------------------------------------------
/test/test_cuda_silu.h:
--------------------------------------------------------------------------------
 1 | #include"test_ulti.h"
 2 | #include"test_fun.h"
 3 | #include"ir.h"
 4 | #include"net.h"
 5 | #include"mat.h"
 6 | #include"layers/cxx/silu.h"
 7 | #include"optional.h"
 8 | 
 9 | TEST(SILU,forward)
10 | {
11 |     easynn::Mat m1(5);
12 |     easynn::Mat m2(3,5);
13 |     easynn::Mat m3(2,3,2);
14 |     std::vector<float> x1={1.3, -1.2, 3.2 ,-5.10 ,10};
15 |     std::vector<std::vector<float>> x2={{-1,2,-2},{-2,-98,100},{3,-4,5},{-31,4,52},{3,-43,59}};
16 |     std::vector<std::vector<std::vector<float>>> x3={{{1,200},{98,-100.3},{4,5}},{{5,-2},{-99,100},{4,51}}};
17 |     
18 |     m1.fillFromArray(x1);
19 |     m2.fillFromArray(x2);
20 |     m3.fillFromArray(x3);
21 | 
22 |     easynn::Mat m4(5);
23 |     easynn::Mat m5(3,5);
24 |     easynn::Mat m6(2,3,2);
25 |     std::vector<float> x4={1.0216, -0.2778,  3.0747, -0.0309,  9.9995};
26 |     std::vector<std::vector<float>> x5={{-2.6894e-01,  1.7616e+00, -2.3841e-01},
27 |                                         {-2.3841e-01, -0.0000e+00,  1.0000e+02},
28 |                                         { 2.8577e+00, -7.1945e-02,  4.9665e+00},
29 |                                         {-1.0672e-12,  3.9281e+00,  5.2000e+01},
30 |                                         { 2.8577e+00, -9.0951e-18,  5.9000e+01}};
31 |     std::vector<std::vector<std::vector<float>>> x6={{{0.7311,200.000},{98.0000,-0.0000},{3.9281,4.9665}},{{4.9665,-0.23840},{-0.000,100.0000},{3.9281,51.0000}}};
32 |     
33 |     m4.fillFromArray(x4);
34 |     m5.fillFromArray(x5);
35 |     m6.fillFromArray(x6);
36 | 
37 |     easynn::Silu s1;
38 |     easynn::Mat out_m1;
39 |     easynn::Mat out_m2;
40 |     easynn::Mat out_m3;
41 |     easynn::Optional option;
42 |     s1.forward(m1,out_m1,option);
43 |     s1.forward(m2,out_m2,option);
44 |     s1.forward(m3,out_m3,option);
45 |     // printMat(out_m1);
46 |     // printMat(out_m2);
47 |     // printMat(out_m3);
48 |     EXPECT_EQ(compareMat(out_m1,m4),0);
49 |     EXPECT_EQ(compareMat(out_m2,m5),0);
50 |     EXPECT_EQ(compareMat(out_m3,m6),0);
51 | }


--------------------------------------------------------------------------------
/test/test_flatten.h:
--------------------------------------------------------------------------------
 1 | #include"test_ulti.h"
 2 | #include"test_fun.h"
 3 | #include"ir.h"
 4 | #include"net.h"
 5 | #include"mat.h"
 6 | #include"layers/cxx/flatten.h"
 7 | 
 8 | 
 9 | 
10 | TEST(FLATTER,forward1)
11 | {
12 | 
13 |     easynn::Mat m1(3,5);
14 |     std::vector<std::vector<float>> x1={{-1,2,-2},{-2,-98,100},{3,-4,5},{-31,4,52},{3,-43,59}};
15 |     m1.fillFromArray(x1);
16 | 
17 |     easynn::Mat out(15);
18 |     std::vector<float> x2={-1,2,-2,-2,-98,100,3,-4,5,-31,4,52,3,-43,59};
19 |     out.fillFromArray(x2);
20 | 
21 |     easynn::Flatten flatten;
22 |     easynn::Optional option;
23 | 
24 |     easynn::Mat m2;
25 |     flatten.forward(m1,m2,option);
26 |     EXPECT_EQ(compareMat(m2,out),0);
27 | }
28 | 
29 | 
30 | TEST(FLATTER,forward2)
31 | {
32 | 
33 |     easynn::Mat m1(2,3,2);
34 |     std::vector<std::vector<std::vector<float>>> x1={{{1,200},{98,-100.3},{4,5}},{{5,-2},{-99,100},{4,51}}};
35 |     m1.fillFromArray(x1);
36 | 
37 |     easynn::Mat out(12);
38 |     std::vector<float> x2={1,200,98,-100.3,4,5,5,-2,-99,100,4,51};
39 |     out.fillFromArray(x2);
40 | 
41 |     easynn::Flatten flatten;
42 |     easynn::Optional option;
43 | 
44 |     easynn::Mat m2;
45 |     flatten.forward(m1,m2,option);
46 |     EXPECT_EQ(compareMat(m2,out),0);
47 | }


--------------------------------------------------------------------------------
/test/test_fun.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_TETS_FUN_H
 2 | #define EASYNN_TETS_FUN_H
 3 | 
 4 | #include<stdio.h>
 5 | #include<stdlib.h>
 6 | #include"mat.h"
 7 | 
 8 | //Print Mat
 9 | void printMat(const easynn::Mat& m)
10 | {
11 |     if(m.isEmpty())
12 |     {
13 |         printf("mat is empty\n");
14 |         return ;
15 |     } 
16 |     printf("d=%d,c=%d,h=%d,w=%d \n",m.d,m.c,m.h,m.w);
17 |     for (int q=0; q<m.c; q++)
18 |     {
19 |         float* ptr = m.channel(q);
20 |         for (int z=0; z<m.d; z++)
21 |         {
22 |             for (int y=0; y<m.h; y++)
23 |             {
24 |                 for (int x=0; x<m.w; x++)
25 |                 {
26 |                     printf("%f ", ptr[x]);
27 |                 }
28 |                 ptr += m.w;
29 |                 printf("\n");
30 |             }
31 |             printf("\n");
32 |         }
33 |         printf("\n");
34 |     }
35 | }
36 | 
37 | 
38 | // int compareMat(const easynn::Mat& m1,const easynn::Mat& m2)
39 | // {
40 | //     if(m1.isEmpty()||m2.isEmpty())
41 | //     {
42 | //         //printf("mat is empty,con't compare\n");
43 | //         return -1;
44 | //     } 
45 | //     if(m1.c!=m2.c || m1.h!=m2.h || m1.w!=m2.w || m1.total()!=m2.total())
46 | //     {
47 | //         //printf("mat is shape is different,con't compare\n");
48 | //         return -1;
49 | //     }
50 | //     return !memcmp(m1.data, m2.data,m1.total());
51 | // }
52 | 
53 | int compareMat(const easynn::Mat& m1,const easynn::Mat& m2)
54 | {
55 |     if(m1.isEmpty()||m2.isEmpty())
56 |     {
57 |         //printf("mat is empty,con't compare\n");
58 |         return -1;
59 |     } 
60 |     if(m1.c!=m2.c || m1.h!=m2.h || m1.w!=m2.w || m1.total()!=m2.total())
61 |     {
62 |         //printf("mat is shape is different,con't compare\n");
63 |         return -1;
64 |     }
65 |     for (int q=0; q<m1.c; q++)
66 |     {
67 |         float* ptr1 = m1.channel(q);
68 |         float* ptr2 = m2.channel(q);
69 |         for (int z=0; z<m1.d; z++)
70 |         {
71 |             for (int y=0; y<m1.h; y++)
72 |             {
73 |                 for (int x=0; x<m1.w; x++)
74 |                 {
75 |                     if(abs(ptr1[x]-ptr2[x])>1e-2)
76 |                         return -1;
77 |                 }
78 |                 ptr1 += m1.w;
79 |                 ptr2 += m2.w;
80 |             }
81 |         }
82 |     }
83 |     return 0;
84 | }
85 | 
86 | #endif


--------------------------------------------------------------------------------
/test/test_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef EASYNN_TETS_LAYER_H
 2 | #define EASYNN_TETS_LAYER_H
 3 | #include"test_ulti.h"
 4 | #include"layers/cxx/convolution.h"
 5 | #include"layers/cxx/relu.h"
 6 | #include"layers/cxx/adaptiveavgpool.h"
 7 | #include"layers/cxx/expression.h"
 8 | #include"layers/cxx/flatten.h"
 9 | #include"layers/cxx/linear.h"
10 | #include"layers/cxx/maxpool.h"
11 | 
12 | TEST(layer,forward)
13 | {
14 |     
15 | }
16 | 
17 | 
18 | #endif


--------------------------------------------------------------------------------
/test/test_linear.h:
--------------------------------------------------------------------------------
 1 | #include"test_ulti.h"
 2 | #include"test_fun.h"
 3 | #include"ir.h"
 4 | #include"net.h"
 5 | #include"mat.h"
 6 | #include"layers/cxx/linear.h"
 7 | 
 8 | 
 9 | std::vector<float> linear_input={0.5020, 0.5654, 0.1067, 0.5434, 0.3533, 0.6418, 0.3589, 0.7402, 0.3442,0.0307};
10 | 
11 | std::vector<std::vector<float>> linear_weight = \
12 |         {{-3.0719e-01,  1.0780e-01, -2.0559e-01, -1.8527e-01, -1.6090e-01,
13 |           2.5734e-01, -1.6851e-01,  5.3505e-02, -3.0875e-01,  2.3678e-01},
14 |         {-1.3050e-01, -9.9854e-02,  4.6431e-02,  1.1079e-01,  3.0469e-01,
15 |          -1.7981e-01,  5.0703e-02, -2.2204e-01, -1.4597e-02, -5.3223e-02},
16 |         { 2.3940e-01, -8.6630e-02,  8.3712e-02, -1.9211e-01, -3.0404e-01,
17 |          -2.7101e-01,  2.7965e-01,  2.4131e-01, -1.1347e-01,  2.6031e-01},
18 |         { 1.3729e-01,  3.0503e-01, -1.4290e-01,  1.5471e-02, -2.8678e-01,
19 |          -2.7291e-01,  2.7201e-01, -2.5853e-01, -2.1701e-01, -2.1038e-01},
20 |         { 2.8197e-01, -5.5205e-02,  6.8871e-02, -2.0350e-01,  1.5180e-01,
21 |          -2.3172e-01, -1.1229e-01, -2.9781e-01, -1.4815e-01,  1.7417e-01},
22 |         { 1.0573e-01, -1.2884e-01,  2.6318e-01, -3.7795e-02,  2.3826e-01,
23 |          -8.4694e-02, -3.0863e-01, -2.5724e-01, -3.4230e-02,  3.5798e-02},
24 |         {-7.1868e-02, -1.2859e-01,  2.2359e-01, -3.7307e-03,  1.5497e-01,
25 |          -1.5896e-01, -2.6972e-01, -1.1099e-01, -2.2834e-01,  2.3606e-01},
26 |         {-7.4711e-02, -8.5522e-03,  2.7338e-01,  2.9650e-01, -2.2422e-02,
27 |           2.3819e-01, -2.6054e-01, -1.7454e-02,  6.3102e-02, -1.0971e-01},
28 |         { 1.4854e-01, -1.0415e-01,  8.6241e-02, -2.1226e-01,  5.2077e-02,
29 |          -4.4418e-02,  2.6772e-01, -2.7366e-01, -7.5754e-02, -1.7409e-01},
30 |         { 2.2415e-01, -2.7412e-01, -5.5169e-02,  1.9532e-01,  2.6117e-01,
31 |          -1.8216e-01, -2.5901e-01, -1.4548e-01,  2.0655e-01, -7.9075e-02},
32 |         {-1.7816e-01,  1.8178e-01, -3.8390e-02,  1.1722e-01,  2.1277e-01,
33 |          -1.2648e-01, -2.6228e-02, -2.1255e-01, -1.1402e-01,  6.4387e-05},
34 |         { 8.0941e-02,  2.7671e-01,  9.2103e-02, -1.2885e-01, -5.9995e-02,
35 |           1.0637e-01, -1.4164e-01, -6.1762e-02, -2.9040e-02, -2.1799e-01},
36 |         {-2.0439e-01,  1.2154e-01,  7.9595e-02, -2.0933e-01, -1.7893e-01,
37 |          -1.1194e-01,  4.1412e-02, -1.0967e-01, -2.0688e-01,  2.7727e-01},
38 |         { 1.9006e-01, -2.9862e-01,  2.5964e-01,  9.5588e-02, -2.8095e-01,
39 |           3.0640e-01,  9.5718e-02,  1.2926e-01,  9.3062e-02, -1.5968e-02},
40 |         {-3.8268e-03, -2.6479e-01,  5.1408e-02,  2.8399e-01,  2.8516e-02,
41 |          -1.0297e-01,  1.9454e-02, -1.5160e-01,  1.9272e-01,  1.9140e-01},
42 |         {-4.5786e-02, -2.7905e-01, -2.5719e-02,  1.0467e-01, -1.3473e-01,
43 |          -1.5019e-01, -2.7177e-02,  1.8830e-01,  1.5050e-03, -1.3721e-01},
44 |         {-2.2488e-01,  2.2837e-01,  6.6036e-02,  1.6110e-01,  1.5994e-01,
45 |           2.8693e-01, -8.8835e-02,  1.4462e-01, -2.2702e-01, -2.9243e-01},
46 |         { 7.1483e-02,  7.3101e-02,  3.7889e-02,  2.8790e-01, -2.5990e-01,
47 |          -1.7945e-02, -2.5433e-01, -2.3418e-01,  1.6764e-01, -4.4201e-02},
48 |         {-5.4821e-02,  1.5910e-01,  2.3857e-01, -2.7316e-01,  2.2437e-01,
49 |          -3.0088e-01,  3.8168e-02,  2.1991e-01, -1.8714e-01, -3.1526e-01},
50 |         { 3.0830e-01, -1.6000e-01,  1.9569e-01, -8.6420e-02,  1.6595e-01,
51 |          -2.7068e-01, -2.4449e-01, -2.5306e-02,  1.0159e-01,  2.0520e-01}};
52 | 
53 | std::vector<float> linear_bias =  {-0.1967,  0.2506,  0.0333,  0.1290,  0.2957,  0.1656, -0.1253, -0.0414,
54 |          0.2490, -0.2261, -0.0056,  0.2170, -0.0226, -0.0657,  0.1193,  0.2260,
55 |         -0.2299,  0.0264, -0.0509,  0.2654};
56 | 
57 | std::vector<float> linear_out = {-0.4242,  0.0332, -0.0244, -0.0878, -0.0987, -0.1287, -0.5098,  0.1634,
58 |          0.0105, -0.3250, -0.1446,  0.2877, -0.4257,  0.1996,  0.0384,  0.0813,
59 |          0.1096, -0.0475, -0.1229,  0.1233};
60 | 
61 | 
62 | TEST(LINEAR,loadBin)
63 | {
64 |     easynn::Net net;
65 |     EXPECT_EQ(net.loadModel("../example/linear.pnnx.param",\
66 |     "../example/linear.pnnx.bin"),0);
67 | }
68 | 
69 | 
70 | TEST(LINEAR,forward)
71 | {
72 |     easynn::Net net;
73 |     EXPECT_EQ(net.loadModel("../example/linear.pnnx.param",\
74 |     "../example/linear.pnnx.bin"),0);
75 | 
76 |     easynn::Mat input(10);
77 |     input.fillFromArray(linear_input);
78 | 
79 |     easynn::Mat output(20);
80 |     output.fillFromArray(linear_out);
81 | 
82 |     net.input(0,input);
83 |     easynn::Mat m;
84 |     net.extractBlob(1,m);
85 | 
86 |     EXPECT_EQ(compareMat(m,output),0);
87 | 
88 | }


--------------------------------------------------------------------------------
/test/test_mat.h:
--------------------------------------------------------------------------------
  1 | #include"test_ulti.h"
  2 | #include"test_fun.h"
  3 | #include<vector>
  4 | 
  5 | 
  6 | TEST(Mat, print_mat)
  7 | {
  8 |     easynn::Mat m1;
  9 |     easynn::Mat m2(2,3);
 10 |     easynn::Mat m3(2,3,2);
 11 |     easynn::Mat m4(2,3,2,3);
 12 |     //printMat(m1);
 13 |     //printMat(m2);
 14 |     //printMat(m3);
 15 |     //printMat(m4);
 16 | }
 17 | 
 18 | TEST(Mat, compare_mat)
 19 | {   
 20 |     easynn::Mat m1(10,10);
 21 |     easynn::Mat m2(10,10);
 22 |     easynn::Mat m3(100,10);
 23 |     easynn::Mat m4;
 24 |     EXPECT_EQ(compareMat(m1,m1), 0);
 25 |     EXPECT_EQ(compareMat(m2,m2), 0);
 26 |     EXPECT_EQ(compareMat(m1,m3), -1);
 27 |     EXPECT_EQ(compareMat(m4,m3), -1);
 28 | }
 29 | 
 30 | TEST(Mat, refcount)
 31 | {   
 32 |     {
 33 |         easynn::Mat m1(10,10);
 34 |         EXPECT_EQ(*m1.refcount, 1);
 35 |         easynn::Mat m2(m1);
 36 |         easynn::Mat m3=m1;
 37 |         EXPECT_EQ(*m2.refcount, 3);
 38 |         EXPECT_EQ(*m3.refcount, 3);
 39 |     }
 40 | 
 41 |     easynn::Mat m1(10,10);
 42 |     {   
 43 |         EXPECT_EQ(*m1.refcount, 1);
 44 |         easynn::Mat m2(m1);
 45 |         easynn::Mat m3=m1;
 46 |         EXPECT_EQ(*m2.refcount, 3);
 47 |         EXPECT_EQ(*m3.refcount, 3);
 48 |     }
 49 |     EXPECT_EQ(*m1.refcount, 1);
 50 | 
 51 |     {
 52 |         
 53 |         easynn::Mat m2;
 54 |         easynn::Mat m3=m2;
 55 |         EXPECT_EQ((long)m2.refcount, 0);
 56 |         EXPECT_EQ((long)m3.refcount, 0);
 57 |     }
 58 | 
 59 | }
 60 | 
 61 | TEST(Mat, fill)
 62 | {   
 63 |     easynn::Mat m1;
 64 |     easynn::Mat m2(3);
 65 |     easynn::Mat m3(3,4);
 66 |     easynn::Mat m4(3,4,2);
 67 |     easynn::Mat m5(3,4,2,2);
 68 |     m1.fill(2);
 69 |     m2.fill(2.2f);
 70 |     m3.fill(2);
 71 |     m4.fill(2);
 72 |     m5.fill(2.2f);
 73 |     //printMat(m3);
 74 | }
 75 | 
 76 | TEST(Mat,fillFromArray)
 77 | {   
 78 |     easynn::Mat m1(5);
 79 |     easynn::Mat m2(3,5);
 80 |     easynn::Mat m3(2,3,2);
 81 |     std::vector<int> x1={1,2,3,4,5};
 82 |     std::vector<std::vector<int>> x2={{1,2,2},{2,98,100},{3,4,5},{31,4,52},{3,43,59}};
 83 |     std::vector<std::vector<std::vector<int>>> x3={{{1,2},{98,100},{4,5}},{{5,2},{99,100},{4,51}}};
 84 |     m1.fillFromArray(x1);
 85 |     m2.fillFromArray(x2);
 86 |     m3.fillFromArray(x3);
 87 |     // printMat(m1);
 88 |     // printMat(m2);
 89 |     // printMat(m3);
 90 |     // printMat(m4);
 91 | }
 92 | 
 93 |  
 94 | TEST(Mat,clone)
 95 | {   
 96 |     easynn::Mat m1(5);
 97 |     easynn::Mat m2(3,5);
 98 |     easynn::Mat m3(2,3,2);
 99 |     std::vector<int> x1={1,2,3,4,5};
100 |     std::vector<std::vector<int>> x2={{1,2,2},{2,98,100},{3,4,5},{31,4,52},{3,43,59}};
101 |     std::vector<std::vector<std::vector<int>>> x3={{{1,2},{98,100},{4,5}},{{5,2},{99,100},{4,51}}};
102 |     m1.fillFromArray(x1);
103 |     m2.fillFromArray(x2);
104 |     m3.fillFromArray(x3);
105 |     
106 |     easynn::Mat m4 = m1.clone();
107 |     easynn::Mat m5 = m2.clone();
108 |     easynn::Mat m6 = m3.clone();
109 |     // printMat(m1);
110 |     //printMat(m4);
111 |     EXPECT_EQ(compareMat(m1,m4), 0);
112 |     EXPECT_EQ(compareMat(m2,m5), 0);
113 |     EXPECT_EQ(compareMat(m3,m6), 0);
114 | 
115 | }
116 | 
117 | TEST(Mat,channels)
118 | {   
119 |     easynn::Mat m1(5);
120 |     easynn::Mat m2(3,5);
121 |     easynn::Mat m3(2,3,2);
122 |     std::vector<int> x1={1,2,3,4,5};
123 |     std::vector<std::vector<int>> x2={{1,2,2},{2,98,100},{3,4,5},{31,4,52},{3,43,59}};
124 |     std::vector<std::vector<std::vector<int>>> x3={{{1,2},{98,100},{4,5}},{{5,2},{99,100},{4,51}}};
125 |     m1.fillFromArray(x1);
126 |     m2.fillFromArray(x2);
127 |     m3.fillFromArray(x3);
128 |     
129 |     float * ptr = m1.channel(0);
130 |     EXPECT_EQ(ptr[0],1);
131 |     EXPECT_EQ(ptr[1],2);
132 |     EXPECT_EQ(ptr[2],3);
133 | }


--------------------------------------------------------------------------------
/test/test_maxpool2d.h:
--------------------------------------------------------------------------------
  1 | #include"test_ulti.h"
  2 | #include"test_fun.h"
  3 | #include"ir.h"
  4 | #include"net.h"
  5 | #include"mat.h"
  6 | #include"layers/cxx/maxpool.h"
  7 | 
  8 | std::vector<std::vector<std::vector<float>>> mp_input_data=\
  9 |         {{{0.2295, 0.2599, 0.6441, 0.0168, 0.7927, 0.3168, 0.9468, 0.2946,
 10 |            0.6510, 0.1767, 0.7645, 0.4612},
 11 |           {0.3943, 0.9388, 0.9126, 0.3142, 0.2770, 0.9274, 0.2598, 0.9339,
 12 |            0.4860, 0.6099, 0.4828, 0.5948},
 13 |           {0.9998, 0.1657, 0.4309, 0.5119, 0.3204, 0.4685, 0.3708, 0.3458,
 14 |            0.6965, 0.7607, 0.4896, 0.3414},
 15 |           {0.8516, 0.7120, 0.7675, 0.5260, 0.1127, 0.4690, 0.0656, 0.1274,
 16 |            0.7237, 0.8612, 0.9578, 0.0717},
 17 |           {0.7778, 0.9389, 0.4477, 0.9347, 0.4932, 0.6393, 0.2195, 0.4559,
 18 |            0.1840, 0.9226, 0.1510, 0.1915},
 19 |           {0.3332, 0.2625, 0.9882, 0.6747, 0.0630, 0.0969, 0.1638, 0.2066,
 20 |            0.7487, 0.2935, 0.1886, 0.2975},
 21 |           {0.6795, 0.6518, 0.5196, 0.0989, 0.2921, 0.9160, 0.9370, 0.3772,
 22 |            0.0927, 0.2677, 0.7827, 0.2746},
 23 |           {0.0812, 0.1388, 0.5617, 0.6608, 0.4961, 0.8730, 0.1547, 0.4763,
 24 |            0.0420, 0.8068, 0.1394, 0.3010},
 25 |           {0.0321, 0.3149, 0.5720, 0.6835, 0.1845, 0.0031, 0.2095, 0.2987,
 26 |            0.9365, 0.2844, 0.7457, 0.8291},
 27 |           {0.3599, 0.6601, 0.8168, 0.4337, 0.8746, 0.5469, 0.3573, 0.4779,
 28 |            0.2486, 0.8504, 0.6616, 0.9320},
 29 |           {0.3530, 0.5634, 0.5081, 0.7176, 0.1062, 0.3808, 0.9981, 0.4353,
 30 |            0.3269, 0.6623, 0.3985, 0.7898},
 31 |           {0.9516, 0.0289, 0.1992, 0.1402, 0.2444, 0.1211, 0.1212, 0.0188,
 32 |            0.2831, 0.6390, 0.9189, 0.1517}},
 33 | 
 34 |          {{0.9418, 0.8566, 0.1098, 0.0454, 0.1247, 0.9565, 0.8543, 0.4707,
 35 |            0.3405, 0.7362, 0.9467, 0.7226},
 36 |           {0.6276, 0.2356, 0.0118, 0.8911, 0.4193, 0.6709, 0.9051, 0.1855,
 37 |            0.8339, 0.3113, 0.4981, 0.9342},
 38 |           {0.1486, 0.3907, 0.0704, 0.2226, 0.5163, 0.9326, 0.1216, 0.9232,
 39 |            0.8832, 0.3250, 0.1453, 0.5212},
 40 |           {0.7148, 0.5316, 0.3406, 0.0170, 0.3191, 0.2849, 0.2415, 0.5319,
 41 |            0.7618, 0.9995, 0.9593, 0.6969},
 42 |           {0.9408, 0.8048, 0.1872, 0.3158, 0.9612, 0.6753, 0.9354, 0.5379,
 43 |            0.3592, 0.4402, 0.2048, 0.0823},
 44 |           {0.7723, 0.6818, 0.7118, 0.6614, 0.7270, 0.5569, 0.6218, 0.3408,
 45 |            0.2093, 0.7379, 0.3721, 0.4468},
 46 |           {0.3118, 0.0977, 0.1137, 0.1801, 0.2204, 0.1414, 0.8682, 0.1054,
 47 |            0.8015, 0.2708, 0.6153, 0.6022},
 48 |           {0.8705, 0.5935, 0.9919, 0.0586, 0.9524, 0.9755, 0.1864, 0.5465,
 49 |            0.4812, 0.6421, 0.6969, 0.8026},
 50 |           {0.3568, 0.3298, 0.4970, 0.9867, 0.5367, 0.6762, 0.4278, 0.9811,
 51 |            0.1398, 0.8697, 0.5646, 0.3473},
 52 |           {0.8531, 0.9425, 0.6495, 0.4983, 0.8105, 0.4635, 0.8852, 0.0164,
 53 |            0.3397, 0.8758, 0.4604, 0.7227},
 54 |           {0.2770, 0.5340, 0.9153, 0.2417, 0.9824, 0.0425, 0.8894, 0.1229,
 55 |            0.9444, 0.2923, 0.2484, 0.8285},
 56 |           {0.1150, 0.5183, 0.5363, 0.5093, 0.0094, 0.2796, 0.5879, 0.4254,
 57 |            0.5275, 0.5013, 0.3831, 0.2962}},
 58 | 
 59 |          {{0.3096, 0.2493, 0.8229, 0.2754, 0.5033, 0.3588, 0.4517, 0.9865,
 60 |            0.6831, 0.1928, 0.3726, 0.9510},
 61 |           {0.5593, 0.9482, 0.7583, 0.5221, 0.0553, 0.6887, 0.1079, 0.8163,
 62 |            0.2664, 0.8677, 0.3635, 0.9620},
 63 |           {0.4066, 0.2021, 0.4684, 0.3672, 0.2404, 0.9130, 0.2811, 0.7541,
 64 |            0.5825, 0.5392, 0.1108, 0.4390},
 65 |           {0.2816, 0.7801, 0.3829, 0.0783, 0.3744, 0.2429, 0.8034, 0.6760,
 66 |            0.5320, 0.4850, 0.4305, 0.3114},
 67 |           {0.0435, 0.5865, 0.8042, 0.4533, 0.1508, 0.4364, 0.3380, 0.1109,
 68 |            0.7376, 0.0349, 0.4138, 0.2180},
 69 |           {0.2032, 0.2016, 0.6599, 0.8047, 0.4858, 0.3934, 0.9137, 0.2693,
 70 |            0.6279, 0.4159, 0.8641, 0.1526},
 71 |           {0.5429, 0.8051, 0.5889, 0.2773, 0.9594, 0.0128, 0.0453, 0.8524,
 72 |            0.8258, 0.7511, 0.1315, 0.4201},
 73 |           {0.2756, 0.8755, 0.7852, 0.8273, 0.8024, 0.9469, 0.0486, 0.9950,
 74 |            0.9743, 0.9827, 0.4943, 0.0917},
 75 |           {0.9856, 0.0872, 0.9139, 0.1970, 0.2668, 0.8008, 0.8758, 0.1363,
 76 |            0.2088, 0.5141, 0.5744, 0.0157},
 77 |           {0.1499, 0.4181, 0.6614, 0.1974, 0.6969, 0.3377, 0.8968, 0.6852,
 78 |            0.2181, 0.8499, 0.8425, 0.5653},
 79 |           {0.6385, 0.9671, 0.6611, 0.5936, 0.0432, 0.7459, 0.0015, 0.7680,
 80 |            0.4933, 0.8672, 0.3020, 0.1445},
 81 |           {0.0239, 0.0835, 0.6143, 0.0222, 0.7029, 0.3813, 0.3447, 0.4178,
 82 |            0.1323, 0.3780, 0.7844, 0.0424}}};
 83 | 
 84 |   std::vector<std::vector<std::vector<float>>> mp_data2 = \
 85 |         {{{0.9388, 0.9126, 0.9274, 0.9468, 0.6510, 0.7645},
 86 |          {0.9998, 0.7675, 0.4690, 0.3708, 0.8612, 0.9578},
 87 |          {0.9389, 0.9882, 0.6393, 0.4559, 0.9226, 0.2975},
 88 |          {0.6795, 0.6608, 0.9160, 0.9370, 0.8068, 0.7827},
 89 |          {0.6601, 0.8168, 0.8746, 0.4779, 0.9365, 0.9320},
 90 |          {0.9516, 0.7176, 0.3808, 0.9981, 0.6623, 0.9189}},
 91 | 
 92 |         {{0.9418, 0.8911, 0.9565, 0.9051, 0.8339, 0.9467},
 93 |          {0.7148, 0.3406, 0.9326, 0.9232, 0.9995, 0.9593},
 94 |          {0.9408, 0.7118, 0.9612, 0.9354, 0.7379, 0.4468},
 95 |          {0.8705, 0.9919, 0.9755, 0.8682, 0.8015, 0.8026},
 96 |          {0.9425, 0.9867, 0.8105, 0.9811, 0.8758, 0.7227},
 97 |          {0.5340, 0.9153, 0.9824, 0.8894, 0.9444, 0.8285}},
 98 | 
 99 |         {{0.9482, 0.8229, 0.6887, 0.9865, 0.8677, 0.9620},
100 |          {0.7801, 0.4684, 0.9130, 0.8034, 0.5825, 0.4390},
101 |          {0.5865, 0.8047, 0.4858, 0.9137, 0.7376, 0.8641},
102 |          {0.8755, 0.8273, 0.9594, 0.9950, 0.9827, 0.4943},
103 |          {0.9856, 0.9139, 0.8008, 0.8968, 0.8499, 0.8425},
104 |          {0.9671, 0.6611, 0.7459, 0.7680, 0.8672, 0.7844}}};
105 | 
106 | std::vector<std::vector<std::vector<float>>> mp_data3 = \
107 |         {{{0.9998, 0.9126, 0.9468, 0.9468, 0.7645},
108 |          {0.9998, 0.9347, 0.6393, 0.7237, 0.9578},
109 |          {0.9882, 0.9882, 0.9370, 0.9370, 0.9226},
110 |          {0.6795, 0.6835, 0.9370, 0.9370, 0.9365},
111 |          {0.8168, 0.8746, 0.9981, 0.9981, 0.9365}},
112 | 
113 |         {{0.9418, 0.8911, 0.9565, 0.9232, 0.9467},
114 |          {0.9408, 0.9612, 0.9612, 0.9354, 0.9995},
115 |          {0.9408, 0.9612, 0.9612, 0.9354, 0.8015},
116 |          {0.9919, 0.9919, 0.9755, 0.9811, 0.8697},
117 |          {0.9425, 0.9867, 0.9824, 0.9811, 0.9444}},
118 | 
119 |         {{0.9482, 0.8229, 0.9130, 0.9865, 0.8677},
120 |          {0.8042, 0.8042, 0.9130, 0.8034, 0.7376},
121 |          {0.8051, 0.9594, 0.9594, 0.9137, 0.8641},
122 |          {0.9856, 0.9594, 0.9594, 0.9950, 0.9827},
123 |          {0.9856, 0.9139, 0.8968, 0.8968, 0.8672}}};
124 | 
125 | std::vector<std::vector<std::vector<float>>> mp_data4 = \
126 |         {{{0.9388, 0.9388, 0.9274, 0.9468, 0.9339, 0.7645},
127 |          {0.9998, 0.9388, 0.9274, 0.9339, 0.9339, 0.9578},
128 |          {0.9389, 0.9882, 0.9347, 0.6393, 0.9226, 0.9578},
129 |          {0.6795, 0.9882, 0.9160, 0.9370, 0.8068, 0.8068},
130 |          {0.6601, 0.8168, 0.8746, 0.8730, 0.9365, 0.9320},
131 |          {0.9516, 0.8168, 0.8746, 0.9981, 0.8504, 0.9320}},
132 | 
133 |         {{0.9418, 0.8911, 0.9565, 0.9565, 0.8339, 0.9467},
134 |          {0.7148, 0.8911, 0.9326, 0.9326, 0.9995, 0.9995},
135 |          {0.9408, 0.8048, 0.9612, 0.9354, 0.9995, 0.9995},
136 |          {0.8705, 0.9919, 0.9755, 0.9755, 0.8015, 0.8026},
137 |          {0.9425, 0.9919, 0.9867, 0.9811, 0.9811, 0.8758},
138 |          {0.9425, 0.9425, 0.9824, 0.8894, 0.9444, 0.8758}},
139 | 
140 |         {{0.9482, 0.9482, 0.6887, 0.9865, 0.9865, 0.9620},
141 |          {0.9482, 0.9482, 0.9130, 0.9130, 0.8677, 0.9620},
142 |          {0.7801, 0.8047, 0.8047, 0.9137, 0.7376, 0.8641},
143 |          {0.8755, 0.8755, 0.9594, 0.9950, 0.9950, 0.9827},
144 |          {0.9856, 0.9139, 0.9469, 0.9950, 0.9950, 0.9827},
145 |          {0.9671, 0.9671, 0.7459, 0.8968, 0.8672, 0.8672}}};
146 |     
147 | TEST(MAXPOOL,forward1)
148 | {
149 |     easynn::Mat input(12,12,3);
150 |     input.fillFromArray(mp_input_data);
151 | 
152 |     easynn::MaxPool mxpool;
153 | 
154 | 
155 |     mxpool.padding.push_back(0);
156 |     mxpool.padding.push_back(0);
157 |     mxpool.dilation.push_back(1);
158 |     mxpool.dilation.push_back(1);
159 |     mxpool.kernel_size.push_back(1);
160 |     mxpool.kernel_size.push_back(1);
161 |     mxpool.stride.push_back(1);
162 |     mxpool.stride.push_back(1);
163 |     mxpool.ceil_mode=false;
164 |     mxpool.return_indices=false;
165 | 
166 |     easynn::Optional option;
167 |     easynn::Mat m1;
168 | 
169 |     mxpool.forward(input,m1,option);
170 |     
171 |     EXPECT_EQ(compareMat(input,m1),0);
172 | }
173 | 
174 | TEST(MAXPOOL,forward2)
175 | {
176 |     easynn::Mat input(12,12,3);
177 |     easynn::Mat output(6,6,3);
178 |     input.fillFromArray(mp_input_data);
179 |     output.fillFromArray(mp_data2);
180 |     easynn::MaxPool mxpool;
181 | 
182 |     mxpool.padding.push_back(0);
183 |     mxpool.padding.push_back(0);
184 |     mxpool.dilation.push_back(1);
185 |     mxpool.dilation.push_back(1);
186 |     mxpool.kernel_size.push_back(2);
187 |     mxpool.kernel_size.push_back(2);
188 |     mxpool.stride.push_back(2);
189 |     mxpool.stride.push_back(2);
190 |     mxpool.ceil_mode=false;
191 |     mxpool.return_indices=false;
192 | 
193 | 
194 |     easynn::Optional option;
195 |     easynn::Mat m1;
196 |     mxpool.forward(input,m1,option);
197 |     EXPECT_EQ(compareMat(output,m1),0);
198 | }
199 | 
200 | TEST(MAXPOOL,forward3)
201 | {
202 |     easynn::Mat input(12,12,3);
203 |     easynn::Mat output(5,5,3);
204 |     input.fillFromArray(mp_input_data);
205 |     output.fillFromArray(mp_data3);
206 |     easynn::MaxPool mxpool;
207 | 
208 |     mxpool.padding.push_back(0);
209 |     mxpool.padding.push_back(0);
210 |     mxpool.dilation.push_back(1);
211 |     mxpool.dilation.push_back(1);
212 |     mxpool.kernel_size.push_back(3);
213 |     mxpool.kernel_size.push_back(3);
214 |     mxpool.stride.push_back(2);
215 |     mxpool.stride.push_back(2);
216 |     mxpool.ceil_mode=false;
217 |     mxpool.return_indices=false;
218 | 
219 | 
220 |     easynn::Optional option;
221 |     easynn::Mat m1;
222 |     mxpool.forward(input,m1,option);
223 |     EXPECT_EQ(compareMat(output,m1),0);
224 | }
225 | 
226 | TEST(MAXPOOL,forward4)
227 | {
228 |     easynn::Mat input(12,12,3);
229 |     easynn::Mat output(6,6,3);
230 |     input.fillFromArray(mp_input_data);
231 |     output.fillFromArray(mp_data4);
232 |     easynn::MaxPool mxpool;
233 | 
234 |     mxpool.padding.push_back(1);
235 |     mxpool.padding.push_back(1);
236 |     mxpool.dilation.push_back(1);
237 |     mxpool.dilation.push_back(1);
238 |     mxpool.kernel_size.push_back(3);
239 |     mxpool.kernel_size.push_back(3);
240 |     mxpool.stride.push_back(2);
241 |     mxpool.stride.push_back(2);
242 |     mxpool.ceil_mode=false;
243 |     mxpool.return_indices=false;
244 | 
245 | 
246 |     easynn::Optional option;
247 |     easynn::Mat m1;
248 |     mxpool.forward(input,m1,option);
249 |     EXPECT_EQ(compareMat(output,m1),0);
250 | }


--------------------------------------------------------------------------------
/test/test_permute.h:
--------------------------------------------------------------------------------
 1 | #include"test_ulti.h"
 2 | #include"test_fun.h"
 3 | #include"ir.h"
 4 | #include"net.h"
 5 | #include"mat.h"
 6 | #include"layers/cxx/permute.h"
 7 | #include"optional.h"
 8 | 
 9 | 
10 | std::vector<std::vector<std::vector<std::vector<float>>>> permute_input = \
11 |         {{{{0.3765, 0.6601},
12 |            {0.8344, 0.1654}},
13 | 
14 |           {{0.8061, 0.5652},
15 |            {0.6344, 0.8243}},
16 | 
17 |           {{0.5888, 0.7781},
18 |            {0.2078, 0.1379}},
19 | 
20 |           {{0.5917, 0.8670},
21 |            {0.6892, 0.8355}}},
22 | 
23 | 
24 |          {{{0.0059, 0.9083},
25 |            {0.9211, 0.1498}},
26 | 
27 |           {{0.3458, 0.3369},
28 |            {0.7829, 0.9449}},
29 | 
30 |           {{0.7218, 0.4120},
31 |            {0.7404, 0.4953}},
32 | 
33 |           {{0.5888, 0.9852},
34 |            {0.4225, 0.4163}}},
35 | 
36 | 
37 |          {{{0.0247, 0.7075},
38 |            {0.2999, 0.9630}},
39 | 
40 |           {{0.8158, 0.0463},
41 |            {0.3084, 0.3792}},
42 | 
43 |           {{0.8941, 0.9431},
44 |            {0.2986, 0.4835}},
45 | 
46 |           {{0.6188, 0.7671},
47 |            {0.8713, 0.2998}}}};
48 | 
49 | std::vector<std::vector<std::vector<std::vector<float>>>> permute_output = \
50 |         {{{{0.3765, 0.8061, 0.5888, 0.5917},
51 |            {0.6601, 0.5652, 0.7781, 0.8670}},
52 | 
53 |           {{0.8344, 0.6344, 0.2078, 0.6892},
54 |            {0.1654, 0.8243, 0.1379, 0.8355}}},
55 | 
56 | 
57 |          {{{0.0059, 0.3458, 0.7218, 0.5888},
58 |            {0.9083, 0.3369, 0.4120, 0.9852}},
59 | 
60 |           {{0.9211, 0.7829, 0.7404, 0.4225},
61 |            {0.1498, 0.9449, 0.4953, 0.4163}}},
62 | 
63 | 
64 |          {{{0.0247, 0.8158, 0.8941, 0.6188},
65 |            {0.7075, 0.0463, 0.9431, 0.7671}},
66 | 
67 |           {{0.2999, 0.3084, 0.2986, 0.8713},
68 |            {0.9630, 0.3792, 0.4835, 0.2998}}}};
69 | 
70 | TEST(PERMUTE,forward)
71 | {
72 |     easynn::Mat input(2,2,4,3);
73 |     easynn::Mat output(4,2,2,3);
74 |     input.fillFromArray(permute_input);
75 |     output.fillFromArray(permute_output);
76 | 
77 |     easynn::Permute permute;
78 |     permute.dims.push_back(0);
79 |     permute.dims.push_back(1);
80 |     permute.dims.push_back(3);
81 |     permute.dims.push_back(4);
82 |     permute.dims.push_back(2);
83 |     easynn::Optional op;
84 |     
85 |     easynn::Mat m1;
86 |     permute.forward(input,m1,op);
87 |     printMat(m1);
88 |     printMat(output);
89 |     EXPECT_EQ(compareMat(m1,output),0);
90 | }


--------------------------------------------------------------------------------
/test/test_relu.h:
--------------------------------------------------------------------------------
 1 | #include"test_ulti.h"
 2 | #include"test_fun.h"
 3 | #include"ir.h"
 4 | #include"net.h"
 5 | #include"mat.h"
 6 | #include"layers/cxx/relu.h"
 7 | #include"optional.h"
 8 | 
 9 | TEST(Relu,forward)
10 | {
11 |     easynn::Mat m1(5);
12 |     easynn::Mat m2(3,5);
13 |     easynn::Mat m3(2,3,2);
14 |     std::vector<float> x1={1.3, -1.2, 3.2 ,-5.10 ,10};
15 |     std::vector<std::vector<float>> x2={{-1,2,-2},{-2,-98,100},{3,-4,5},{-31,4,52},{3,-43,59}};
16 |     std::vector<std::vector<std::vector<float>>> x3={{{1,200},{98,-100.3},{4,5}},{{5,-2},{-99,100},{4,51}}};
17 |     
18 |     m1.fillFromArray(x1);
19 |     m2.fillFromArray(x2);
20 |     m3.fillFromArray(x3);
21 | 
22 |     easynn::Mat m4(5);
23 |     easynn::Mat m5(3,5);
24 |     easynn::Mat m6(2,3,2);
25 |     std::vector<float> x4={1.3 , 0 , 3.2 , 0 , 10};
26 |     std::vector<std::vector<float>> x5={{0,2,0},{0,-0,100},{3,0,5},{0,4,52},{3,0,59}};
27 |     std::vector<std::vector<std::vector<float>>> x6={{{1,200},{98,0},{4,5}},{{5,0},{0,100},{4,51}}};
28 |     
29 |     m4.fillFromArray(x4);
30 |     m5.fillFromArray(x5);
31 |     m6.fillFromArray(x6);
32 | 
33 |     easynn::Relu r1;
34 |     easynn::Mat out_m1;
35 |     easynn::Mat out_m2;
36 |     easynn::Mat out_m3;
37 |     easynn::Optional option;
38 |     r1.forward(m1,out_m1,option);
39 |     r1.forward(m2,out_m2,option);
40 |     r1.forward(m3,out_m3,option);
41 |     EXPECT_EQ(compareMat(out_m1,m4),0);
42 |     EXPECT_EQ(compareMat(out_m2,m5),0);
43 |     EXPECT_EQ(compareMat(out_m3,m6),0);
44 | }


--------------------------------------------------------------------------------
/test/test_silu.h:
--------------------------------------------------------------------------------
 1 | #include"test_ulti.h"
 2 | #include"test_fun.h"
 3 | #include"ir.h"
 4 | #include"net.h"
 5 | #include"mat.h"
 6 | #include"layers/cxx/silu.h"
 7 | #include"optional.h"
 8 | 
 9 | TEST(SILU,forward)
10 | {
11 |     easynn::Mat m1(5);
12 |     easynn::Mat m2(3,5);
13 |     easynn::Mat m3(2,3,2);
14 |     std::vector<float> x1={1.3, -1.2, 3.2 ,-5.10 ,10};
15 |     std::vector<std::vector<float>> x2={{-1,2,-2},{-2,-98,100},{3,-4,5},{-31,4,52},{3,-43,59}};
16 |     std::vector<std::vector<std::vector<float>>> x3={{{1,200},{98,-100.3},{4,5}},{{5,-2},{-99,100},{4,51}}};
17 |     
18 |     m1.fillFromArray(x1);
19 |     m2.fillFromArray(x2);
20 |     m3.fillFromArray(x3);
21 | 
22 |     easynn::Mat m4(5);
23 |     easynn::Mat m5(3,5);
24 |     easynn::Mat m6(2,3,2);
25 |     std::vector<float> x4={1.0216, -0.2778,  3.0747, -0.0309,  9.9995};
26 |     std::vector<std::vector<float>> x5={{-2.6894e-01,  1.7616e+00, -2.3841e-01},
27 |                                         {-2.3841e-01, -0.0000e+00,  1.0000e+02},
28 |                                         { 2.8577e+00, -7.1945e-02,  4.9665e+00},
29 |                                         {-1.0672e-12,  3.9281e+00,  5.2000e+01},
30 |                                         { 2.8577e+00, -9.0951e-18,  5.9000e+01}};
31 |     std::vector<std::vector<std::vector<float>>> x6={{{0.7311,200.000},{98.0000,-0.0000},{3.9281,4.9665}},{{4.9665,-0.23840},{-0.000,100.0000},{3.9281,51.0000}}};
32 |     
33 |     m4.fillFromArray(x4);
34 |     m5.fillFromArray(x5);
35 |     m6.fillFromArray(x6);
36 | 
37 |     easynn::Silu s1;
38 |     easynn::Mat out_m1;
39 |     easynn::Mat out_m2;
40 |     easynn::Mat out_m3;
41 |     easynn::Optional option;
42 |     s1.forward(m1,out_m1,option);
43 |     s1.forward(m2,out_m2,option);
44 |     s1.forward(m3,out_m3,option);
45 |     EXPECT_EQ(compareMat(out_m1,m4),0);
46 |     EXPECT_EQ(compareMat(out_m2,m5),0);
47 |     EXPECT_EQ(compareMat(out_m3,m6),0);
48 | }


--------------------------------------------------------------------------------
/test/test_ulti.h:
--------------------------------------------------------------------------------
  1 | #ifndef EASYNN_TETS_ULTI_H
  2 | #define EASYNN_TETS_ULTI_H
  3 | 
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | #include <string.h>
  7 | #include <iostream>
  8 | #include <string>
  9 | #include <vector>
 10 | #include <functional>
 11 | #include <map>
 12 | #include "mat.h"
 13 | 
 14 | #define QTEST_EXPECT(x, y, cond) \
 15 |     if (!((x)cond(y))) \
 16 |     { \
 17 |         printf("%s:%u: Failure\n", __FILE__, __LINE__); \
 18 |         if (strcmp(#cond, "==") == 0) \
 19 |         { \
 20 |             printf("Expected equality of these values:\n"); \
 21 |             printf("  %s\n", #x); \
 22 |             qtest_evaluate_if_required(#x, x); \
 23 |             printf("  %s\n", #y); \
 24 |             qtest_evaluate_if_required(#y, y); \
 25 |         } \
 26 |         else \
 27 |         { \
 28 |             printf("Expected: (%s) %s (%s), actual: %s vs %s\n", #x, #cond, #y, std::to_string(x).c_str(), std::to_string(y).c_str()); \
 29 |         } \
 30 |         *qtest_current_fail_cnt = *qtest_current_fail_cnt + 1; \
 31 |     }
 32 | 
 33 | #define EXPECT_EQ(x, y) QTEST_EXPECT(x, y, ==)
 34 | #define EXPECT_NE(x, y) QTEST_EXPECT(x, y, !=)
 35 | #define EXPECT_LT(x, y) QTEST_EXPECT(x, y, <)
 36 | #define EXPECT_LE(x, y) QTEST_EXPECT(x, y, <=)
 37 | #define EXPECT_GT(x, y) QTEST_EXPECT(x, y, >)
 38 | #define EXPECT_GE(x, y) QTEST_EXPECT(x, y, >=)
 39 | 
 40 | #define EXPECT_TRUE(x) \
 41 |     if (!((x))) \
 42 |     { \
 43 |         printf("%s:%u: Failure\n", __FILE__, __LINE__); \
 44 |         printf("Value of: %s\n", #x); \
 45 |         printf("  Actual: false\n"); \
 46 |         printf("Expected: true\n"); \
 47 |         *qtest_current_fail_cnt = *qtest_current_fail_cnt + 1; \
 48 |     }
 49 | 
 50 | #define EXPECT_FALSE(x) \
 51 |     if (((x))) \
 52 |     { \
 53 |         printf("%s:%u: Failure\n", __FILE__, __LINE__); \
 54 |         printf("Value of: %s\n", #x); \
 55 |         printf("  Actual: true\n"); \
 56 |         printf("Expected: false\n"); \
 57 |         *qtest_current_fail_cnt = *qtest_current_fail_cnt + 1; \
 58 |     }
 59 | 
 60 | template<typename T>
 61 | void qtest_evaluate_if_required(const char* str, T value)
 62 | {
 63 |     if (strcmp(str, std::to_string(value).c_str()) != 0)
 64 |     {
 65 |         std::cout << "    Which is: " << value << std::endl;
 66 |     }
 67 | }
 68 | 
 69 | #define ASSERT_EQ(x, y) \
 70 |     if ((x)!=(y)) \
 71 |     { \
 72 |         printf("%s:%u: Failure\n", __FILE__, __LINE__); \
 73 |         printf("Expected equality of these values:\n"); \
 74 |         printf("  %s\n", #x); \
 75 |         qtest_evaluate_if_required(#x, x); \
 76 |         printf("  %s\n", #y); \
 77 |         qtest_evaluate_if_required(#y, y); \
 78 |         *qtest_current_fail_cnt = *qtest_current_fail_cnt + 1; \
 79 |         return; \
 80 |     }
 81 | 
 82 | #define QTEST_ESCAPE_COLOR_RED "\x1b[31m"
 83 | #define QTEST_ESCAPE_COLOR_GREEN "\x1b[32m"
 84 | #define QTEST_ESCAPE_COLOR_YELLOW "\x1b[33m"
 85 | #define QTEST_ESCAPE_COLOR_END "\x1b[0m"
 86 | 
 87 | class TestEntity
 88 | {
 89 | private:
 90 |     TestEntity() { };
 91 |     ~TestEntity() { };
 92 | 
 93 | public:
 94 |     std::string make_proper_str(size_t num, const std::string str, bool uppercase = false)
 95 |     {
 96 |         std::string res;
 97 |         if (num > 1)
 98 |         {
 99 |             if (uppercase)
100 |                 res = std::to_string(num) + " " + str + "S";
101 |             else
102 |                 res = std::to_string(num) + " " + str + "s";
103 |         }
104 |         else
105 |         {
106 |             res = std::to_string(num) + " " + str;
107 |         }
108 |         return res;
109 |     }
110 | 
111 | public:
112 |     TestEntity(const TestEntity& other) = delete;
113 |     TestEntity operator=(const TestEntity& other) = delete;
114 |     
115 |     static TestEntity& get_instance()
116 |     {
117 |         static TestEntity entity;
118 |         return entity;
119 |     }
120 |     
121 |     int add(std::string test_set_name, std::string test_name, std::function<void(int*)> f, const char* fname)
122 |     {
123 |         TestItem item(f, fname);
124 |         test_sets[test_set_name].test_items.emplace_back(item);
125 |         return 0;
126 |     }
127 | 
128 |     int set_filter(std::string _filter)
129 |     {
130 |         filter = _filter;
131 |         return 0;
132 |     }
133 | 
134 |     int run_all_test_functions()
135 |     {
136 |         std::map<std::string, TestSet>::iterator it = test_sets.begin();
137 |         for (; it != test_sets.end(); it++)
138 |         {
139 |             std::string test_set_name = it->first;
140 |             TestSet& test_set = it->second;
141 |             std::vector<TestItem>& test_items = test_set.test_items;
142 | 
143 |             int cnt = 0;
144 |             for (int i = 0; i < test_items.size(); i++)
145 |             {
146 |                 const std::string fname = test_items[i].fname;
147 |                 if (filter.length() == 0 || (filter.length() > 0 && strmatch(fname, filter)))
148 |                 {
149 |                     cnt++;
150 |                 }
151 |             }
152 | 
153 |             if (cnt == 0) continue;
154 | 
155 |             matched_test_set_count++;
156 | 
157 |             const std::string test_item_str = make_proper_str(cnt, "test");
158 |             printf("%s[----------]%s %s from %s\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, test_item_str.c_str(), it->first.c_str());
159 |             for (int i = 0; i < test_items.size(); i++)
160 |             {
161 |                 auto f = test_items[i].f;
162 |                 std::string fname = test_items[i].fname;
163 |                 if (filter.length() > 0 && !strmatch(fname, filter))
164 |                 {
165 |                     continue;
166 |                 }
167 | 
168 |                 matched_test_case_count++;
169 | 
170 |                 int qtest_current_fail_cnt = 0;
171 |                 printf("%s[ RUN      ]%s %s\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, fname.c_str());
172 |                 f(&qtest_current_fail_cnt);
173 |                 if (qtest_current_fail_cnt == 0)
174 |                 {
175 |                     printf("%s[       OK ]%s %s (0 ms)\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, fname.c_str());
176 |                 }
177 |                 else
178 |                 {
179 |                     printf("%s[  FAILED  ]%s %s (0 ms)\n", QTEST_ESCAPE_COLOR_RED, QTEST_ESCAPE_COLOR_END, fname.c_str());
180 |                     qtest_fail_cnt++;
181 |                 }
182 |                 test_items[i].success = (qtest_current_fail_cnt == 0);
183 |             }
184 |             printf("%s[----------]%s %s from %s (0 ms total)\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, test_item_str.c_str(), it->first.c_str());
185 |             printf("\n");
186 |         }
187 | 
188 |         printf("%s[----------]%s Global test environment tear-down\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END);
189 |         std::string tests_str = make_proper_str(matched_test_case_count, "test");
190 |         std::string suite_str = make_proper_str(matched_test_set_count, "test suite");
191 |         printf("%s[==========]%s %s from %s ran. (0 ms total)\n",
192 |             QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END,
193 |             tests_str.c_str(),
194 |             suite_str.c_str()
195 |         );
196 | 
197 |         int passed_test_count = matched_test_case_count - qtest_fail_cnt;
198 |         std::string how_many_test_str = make_proper_str(passed_test_count, "test");
199 |         printf("%s[  PASSED  ]%s %s.\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, how_many_test_str.c_str());
200 | 
201 |         if (qtest_fail_cnt)
202 |         {
203 |             std::string failed_test_str = make_proper_str(qtest_fail_cnt, "test");
204 |             printf("%s[  FAILED  ]%s %s, listed below:\n", QTEST_ESCAPE_COLOR_RED, QTEST_ESCAPE_COLOR_END, failed_test_str.c_str());
205 | 
206 |             std::map<std::string, TestSet>::iterator it = test_sets.begin();
207 |             for (; it != test_sets.end(); it++)
208 |             {
209 |                 std::string test_set_name = it->first;
210 |                 TestSet test_set = it->second;
211 |                 std::vector<TestItem> test_items = test_set.test_items;
212 |                 for (int i = 0; i < test_items.size(); i++)
213 |                 {
214 |                     if (!test_items[i].success)
215 |                     {
216 |                         printf("%s[  FAILED  ]%s %s\n", QTEST_ESCAPE_COLOR_RED, QTEST_ESCAPE_COLOR_END, test_items[i].fname.c_str());
217 |                     }
218 |                 }
219 |             }
220 |         }
221 | 
222 |         if (qtest_fail_cnt > 0)
223 |         {
224 |             std::string failed_test_str = make_proper_str(qtest_fail_cnt, "FAILED TEST", true);
225 |             printf("\n %s\n", failed_test_str.c_str());
226 |         }
227 | 
228 |         return 0;
229 |     }
230 | 
231 | private:
232 |     // https://leetcode.cn/problems/wildcard-matching/solutions/315802/tong-pei-fu-pi-pei-by-leetcode-solution/
233 |     /// @param s string
234 |     /// @param p pattern
235 |     bool strmatch(std::string s, std::string p)
236 |     {
237 |         auto allStars = [](const std::string& str, int left, int right) {
238 |             for (int i = left; i < right; ++i) {
239 |                 if (str[i] != '*') {
240 |                     return false;
241 |                 }
242 |             }
243 |             return true;
244 |         };
245 |         auto charMatch = [](char u, char v)
246 |         {
247 |             return u == v || v == '?';
248 |         };
249 | 
250 |         while (s.size() && p.size() && p.back() != '*')
251 |         {
252 |             if (charMatch(s.back(), p.back())) {
253 |                 s.pop_back();
254 |                 p.pop_back();
255 |             }
256 |             else {
257 |                 return false;
258 |             }
259 |         }
260 |         if (p.empty()) {
261 |             return s.empty();
262 |         }
263 | 
264 |         int sIndex = 0;
265 |         int pIndex = 0;
266 |         int sRecord = -1;
267 |         int pRecord = -1;
268 |         while (sIndex < s.size() && pIndex < p.size()) {
269 |             if (p[pIndex] == '*') {
270 |                 ++pIndex;
271 |                 sRecord = sIndex;
272 |                 pRecord = pIndex;
273 |             }
274 |             else if (charMatch(s[sIndex], p[pIndex])) {
275 |                 ++sIndex;
276 |                 ++pIndex;
277 |             }
278 |             else if (sRecord != -1 && sRecord + 1 < s.size()) {
279 |                 ++sRecord;
280 |                 sIndex = sRecord;
281 |                 pIndex = pRecord;
282 |             }
283 |             else {
284 |                 return false;
285 |             }
286 |         }
287 |         return allStars(p, pIndex, p.size());
288 |     }
289 | 
290 | 
291 | public:
292 |     struct TestItem
293 |     {
294 |         std::function<void(int*)> f;
295 |         std::string fname;
296 |         bool success;
297 | 
298 |         TestItem(std::function<void(int*)> _f, std::string _fname):
299 |             f(_f), fname(_fname), success(true)
300 |         {}
301 |     };
302 | 
303 |     struct TestSet
304 |     {
305 |         std::vector<TestItem> test_items;
306 |     };
307 | 
308 |     std::map<std::string, TestSet> test_sets;
309 | 
310 | public:
311 |     int matched_test_case_count = 0;
312 |     int matched_test_set_count = 0;
313 | 
314 | private:
315 |     int qtest_fail_cnt = 0; // number of failures in one test set
316 |     std::string filter;
317 | };
318 | 
319 | 
320 | //宏展开,函数声明,把函数指针加入TestEntity中,函数实现
321 | #define TEST(set, name) \
322 |     void qtest_##set##_##name(int* qtest_current_fail_cnt); \
323 |     int qtest_mark_##set##_##name = TestEntity::get_instance().add(#set, #name, qtest_##set##_##name, #set "." #name); \
324 |     void qtest_##set##_##name(int* qtest_current_fail_cnt) \
325 | 
326 | 
327 | 
328 | void InitQTest()
329 | {
330 |     int test_suite_count = TestEntity::get_instance().matched_test_set_count;
331 |     int test_count = TestEntity::get_instance().matched_test_case_count;
332 | 
333 |     std::string test_suite_str = TestEntity::get_instance().make_proper_str(test_suite_count, "test suite");
334 |     std::string test_count_str = TestEntity::get_instance().make_proper_str(test_count, "test");
335 |     printf("%s[==========]%s Running %s from %s.\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, test_count_str.c_str(), test_suite_str.c_str());
336 |     printf("%s[----------]%s Global test environment set-up.\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END);
337 | }
338 | 
339 | int RUN_ALL_TESTS()
340 | {
341 |     TestEntity::get_instance().run_all_test_functions();
342 |     return 0;
343 | }
344 | 
345 | #define QTEST_FILTER(filter_str) \
346 |     TestEntity::get_instance().set_filter(filter_str)
347 | 
348 | #endif


--------------------------------------------------------------------------------
/test/test_upsample.h:
--------------------------------------------------------------------------------
 1 | #include"test_ulti.h"
 2 | #include"test_fun.h"
 3 | #include"ir.h"
 4 | #include"net.h"
 5 | #include"mat.h"
 6 | #include"layers/cxx/upsample.h"
 7 | #include"optional.h"
 8 | 
 9 | 
10 | 
11 | 
12 | TEST(UPSAMPLE,forward)
13 | {
14 |     easynn::Mat m1(5);
15 |     easynn::Mat m2(3,5);
16 |     easynn::Mat m3(2,3,2);
17 |     std::vector<float> x1={1.0216, -0.2778,  3.0747, -0.0309,  9.9995};
18 |     std::vector<std::vector<float>> x2={{-1,2,-2},{-2,-98,100},{3,-4,5},{-31,4,52},{3,-43,59}};
19 |     std::vector<std::vector<std::vector<float>>> x3={{{1,200},{98,-100.3},{4,5}},{{5,-2},{-99,100},{4,51}}};
20 |     
21 |     m1.fillFromArray(x1);
22 |     m2.fillFromArray(x2);
23 |     m3.fillFromArray(x3);
24 | 
25 |     easynn::Mat m4(10,2);
26 |     easynn::Mat m5(6,10);
27 |     easynn::Mat m6(4,6,2);
28 |     std::vector<std::vector<float>> x4=\
29 |                         {{ 1.0216,  1.0216, -0.2778, -0.2778,  3.0747,  3.0747, -0.0309,
30 |                         -0.0309,  9.9995,  9.9995},
31 |                         { 1.0216,  1.0216, -0.2778, -0.2778,  3.0747,  3.0747, -0.0309,
32 |                         -0.0309,  9.9995,  9.9995}};
33 |     std::vector<std::vector<float>> x5=\
34 |                                         {{ -1.,  -1.,   2.,   2.,  -2.,  -2.},
35 |                                             { -1.,  -1.,   2.,   2.,  -2.,  -2.},
36 |                                             { -2.,  -2., -98., -98., 100., 100.},
37 |                                             { -2.,  -2., -98., -98., 100., 100.},
38 |                                             {  3.,   3.,  -4.,  -4.,   5.,   5.},
39 |                                             {  3.,   3.,  -4.,  -4.,   5.,   5.},
40 |                                             {-31., -31.,   4.,   4.,  52.,  52.},
41 |                                             {-31., -31.,   4.,   4.,  52.,  52.},
42 |                                             {  3.,   3., -43., -43.,  59.,  59.},
43 |                                             {  3.,   3., -43., -43.,  59.,  59.}};
44 |     std::vector<std::vector<std::vector<float>>> x6=\
45 |                                                 {{{   1.0000,    1.0000,  200.0000,  200.0000},
46 |                                                     {   1.0000,    1.0000,  200.0000,  200.0000},
47 |                                                     {  98.0000,   98.0000, -100.3000, -100.3000},
48 |                                                     {  98.0000,   98.0000, -100.3000, -100.3000},
49 |                                                     {   4.0000,    4.0000,    5.0000,    5.0000},
50 |                                                     {   4.0000,    4.0000,    5.0000,    5.0000}},
51 | 
52 |                                                     {{   5.0000,    5.0000,   -2.0000,   -2.0000},
53 |                                                     {   5.0000,    5.0000,   -2.0000,   -2.0000},
54 |                                                     { -99.0000,  -99.0000,  100.0000,  100.0000},
55 |                                                     { -99.0000,  -99.0000,  100.0000,  100.0000},
56 |                                                     {   4.0000,    4.0000,   51.0000,   51.0000},
57 |                                                     {   4.0000,    4.0000,   51.0000,   51.0000}}};
58 |     
59 |     m4.fillFromArray(x4);
60 |     m5.fillFromArray(x5);
61 |     m6.fillFromArray(x6);
62 | 
63 |     easynn::Upsample upsample;
64 |     upsample.mode = "nearest";
65 |     upsample.scale_factor.push_back(2.0);
66 |     upsample.scale_factor.push_back(2.0);
67 | 
68 | 
69 |     easynn::Mat out_m1;
70 |     easynn::Mat out_m2;
71 |     easynn::Mat out_m3;
72 |     easynn::Optional option;
73 |     upsample.forward(m1,out_m1,option);
74 |     upsample.forward(m2,out_m2,option);
75 |     upsample.forward(m3,out_m3,option);
76 | 
77 |     EXPECT_EQ(compareMat(out_m1,m4),0);
78 |     EXPECT_EQ(compareMat(out_m2,m5),0);
79 |     EXPECT_EQ(compareMat(out_m3,m6),0);
80 | 
81 | 
82 | }
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------