├── CMakeLists.txt
├── LICENSE
├── README.md
├── documents
├── Layer类和Blob的设计.md
├── Mat类的设计.md
├── Net类的设计.md
├── optional类与benckmark.md
├── 内存分配.md
└── 测试框架.md
├── example
├── CMakeLists.txt
├── res18.cpp
└── yolov5s.cpp
├── images
├── bus.jpg
├── dog.jpg
├── logo.png
└── 内存.png
├── src
├── CMakeLists.txt
├── allocator.h
├── benchmark.cpp
├── benchmark.h
├── blob.cpp
├── blob.h
├── ir.cpp
├── ir.h
├── layer.cpp
├── layer.h
├── layers
│ ├── cuda
│ │ ├── cuda_gemm.cu
│ │ ├── cuda_gemm.h
│ │ ├── cuda_linear.cu
│ │ ├── cuda_linear.h
│ │ ├── cuda_relu.cu
│ │ ├── cuda_relu.h
│ │ ├── cuda_silu.cu
│ │ └── cuda_silu.h
│ └── cxx
│ │ ├── adaptiveavgpool.cpp
│ │ ├── adaptiveavgpool.h
│ │ ├── cat.cpp
│ │ ├── cat.h
│ │ ├── contiguous.cpp
│ │ ├── contiguous.h
│ │ ├── convolution.cpp
│ │ ├── convolution.h
│ │ ├── expression.cpp
│ │ ├── expression.h
│ │ ├── flatten.cpp
│ │ ├── flatten.h
│ │ ├── input.cpp
│ │ ├── input.h
│ │ ├── linear.cpp
│ │ ├── linear.h
│ │ ├── maxpool.cpp
│ │ ├── maxpool.h
│ │ ├── output.cpp
│ │ ├── output.h
│ │ ├── permute.cpp
│ │ ├── permute.h
│ │ ├── relu.cpp
│ │ ├── relu.h
│ │ ├── silu.cpp
│ │ ├── silu.h
│ │ ├── upsample.cpp
│ │ ├── upsample.h
│ │ ├── view.cpp
│ │ └── view.h
├── mat.cpp
├── mat.h
├── net.cpp
├── net.h
├── nncuda.cu
├── nncuda.h
├── optional.cpp
├── optional.h
├── register_layers.h
├── storezip.cpp
└── storezip.h
└── test
├── CMakeLists.txt
├── test.cpp
├── test_adaptiveavgpool.h
├── test_cat.h
├── test_conv.h
├── test_cuda_silu.h
├── test_expression.h
├── test_flatten.h
├── test_fun.h
├── test_layer.h
├── test_linear.h
├── test_mat.h
├── test_maxpool2d.h
├── test_permute.h
├── test_relu.h
├── test_silu.h
├── test_ulti.h
├── test_upsample.h
└── test_view.h
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 | project(EasyNN)
3 |
4 |
5 | #set(CMAKE_BUILD_TYPE Debug)
6 | set(CMAKE_CXX_STANDARD 11) # 将 C++ 标准设置为 C++ 11
7 | set(CMAKE_CXX_STANDARD_REQUIRED ON) # C++ 11 是强制要求,不会衰退至低版本
8 | set(CMAKE_CXX_EXTENSIONS OFF) # 禁止使用编译器特有扩展
9 |
10 |
11 |
12 | option(EASTNN_USE_CUDA "enable cuda " OFF)
13 |
14 | if(EASTNN_USE_CUDA)
15 | add_definitions(-DEASTNN_USE_CUDA=1)
16 | enable_language(CUDA)
17 | endif()
18 |
19 | file(GLOB SRC src/*.cpp)
20 | file(GLOB CUDA_SRC src/*.cu src/layers/cuda/*.cu)
21 | file(GLOB LAYERS_SRC src/layers/cxx/*.cpp)
22 |
23 | set(INCLUDE ${CMAKE_SOURCE_DIR}/src)
24 | set(TEST ${CMAKE_SOURCE_DIR}/test)
25 | set(EXAMPLE ${CMAKE_SOURCE_DIR}/example)
26 |
27 | add_subdirectory(src)
28 | add_subdirectory(test)
29 | add_subdirectory(example)
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | EasyNN是一个面向教学而研发的推理框架,旨在帮助大家在最短时间内写出一个支持ResNet和YOLOv5等模型的深度学习推理框架。**简单**是EasyNN最大的特点,只需要掌握C++基本语法和神经网络基础知识,你就可以在15天内,写出一个属于自己的网络推理框架。
4 |
5 | # 特性
6 |
7 | - **无第三方库依赖**:EasyNN内含简单的测试框架,并实现了一个简单的Mat数据类。
8 | - **使用PNNX作为模型中间结构**:EasyNN采用[PNNX](https://github.com/Tencent/ncnn/tree/master/tools/pnnx)作为模型的中间结构,大大提高了开发效率。
9 | - **OpenMP多线程加速**:EasyNN利用OpenMP技术实现了多线程加速,以提高推理速度。
10 | - **CUDA加速**:EasyNN部分算子支持CUDA加速,卷积算子支持im2col+CUDA GEMM加速
11 | - **简洁易读的代码**:EasyNN的代码仅采用部分C++11特性编写,代码简洁易读。
12 | - **完善的开发文档和教程**:在开发文档中详细介绍了每个类和函数的作用,并在B站配备视频讲解,带各位敲每一行代码。
13 |
14 | # 你可以学到的内容
15 |
16 | - **C++语法和相关概念**:可以熟悉C++的基本语法和常用特性,如类的定义、成员函数的实现、继承、虚函数的使用等。
17 | - **设计模式和编程范式**:EasyNN的开发过程中涉及到一些常见的设计模式,例如工厂模式和引用计数法。
18 | - **框架开发全流程**:通过学习EasyNN的开发,可以了解一个推理框架的完整开发流程。从框架设计、代码实现到单元测试和调试。
19 | - **常见算子的实现方法**:常见的卷积算子(Conv2d)、池化算子(MaxPool2d)等的实现。
20 | - **CUDA高性能编程**:CUDA编程模型、GEMM、向量化读取、共享内存.
21 |
22 | # 支持的PNNX算子
23 | ```c++
24 | register_layer(Input); //no need
25 | register_layer(Output); //no need
26 | register_layer(Contiguous); //no need
27 |
28 | register_layer(Relu); //cuda accelerate
29 | register_layer(Silu); //cuda accelerate
30 |
31 | register_layer(Convolution); //im2col+sgemm cuda accelerate
32 | register_layer(AdaptivePool);
33 | register_layer(MaxPool);
34 | register_layer(Upsample);
35 |
36 | register_layer(Linear); //cuda accelerate
37 | register_layer(Expression);
38 | register_layer(Flatten);
39 | register_layer(View);
40 | register_layer(Cat);
41 | register_layer(Permute);
42 | ```
43 |
44 |
45 | # 编译与运行
46 |
47 | 第一步:下载并编译代码
48 |
49 | ```shell
50 | git clone https://github.com/HuPengsheet/EasyNN.git
51 | cd EasyNN
52 | mkdir build && cd build
53 | cmake ..
54 | make -j4
55 | ```
56 |
57 | 第二步:下载对应的模型权重
58 |
59 | ```shell
60 | 方法一:使用wget从github上下载
61 | cd ../example
62 | wget https://github.com/HuPengsheet/EasyNN/releases/download/EasyNN1.0-model-file/model.tar.xz
63 | tar -xf model.tar.xz
64 |
65 | 方法二:通过百度云下载,把下载好的文件解压到,项目目录下的example下
66 | 链接: https://pan.baidu.com/s/1RgbSGVNSfYZZtos6Y4Bedw 提取码: h9u6
67 | ```
68 |
69 | 第三步:运行res18和yolov5推理代码,可以看到对应的推理结果
70 |
71 | ```shell
72 | #进入到build目前下
73 |
74 | #运行res18的代码
75 | ./example/res18
76 |
77 | #运行yolov5s的代码
78 | ./example/yolov5s
79 | ```
80 |
81 | 第四部(可选):运行单元测试
82 |
83 | ```shell
84 | ./test/run_test
85 | ```
86 |
87 | # 开发文档
88 | - [测试框架](https://github.com/HuPengsheet/EasyNN/blob/main/documents/%E6%B5%8B%E8%AF%95%E6%A1%86%E6%9E%B6.md)
89 | - [内存分配](https://github.com/HuPengsheet/EasyNN/blob/main/documents/%E5%86%85%E5%AD%98%E5%88%86%E9%85%8D.md)
90 | - [Mat类设计](https://github.com/HuPengsheet/EasyNN/blob/main/documents/Mat%E7%B1%BB%E7%9A%84%E8%AE%BE%E8%AE%A1.md)
91 | - [Layer类和Blob的设计](https://github.com/HuPengsheet/EasyNN/blob/main/documents/Layer%E7%B1%BB%E5%92%8CBlob%E7%9A%84%E8%AE%BE%E8%AE%A1.md)
92 | - [optional类与benckmark](https://github.com/HuPengsheet/EasyNN/blob/main/documents/optional%E7%B1%BB%E4%B8%8Ebenckmark.md)
93 | - [Net类设计](https://github.com/HuPengsheet/EasyNN/blob/main/documents/Net%E7%B1%BB%E7%9A%84%E8%AE%BE%E8%AE%A1.md)
94 | - 未完待续
95 |
96 | # 致谢
97 |
98 | 本项目中很大一部分代码参考了优秀的推理框架[ncnn](https://github.com/Tencent/ncnn)
99 |
100 | 其中example下的部分代码借鉴了[KuiperInfer](https://github.com/zjhellofss/KuiperInfer)
101 |
--------------------------------------------------------------------------------
/documents/Layer类和Blob的设计.md:
--------------------------------------------------------------------------------
1 | # Layer类和Blob的设计
2 |
3 | # Layer类
4 |
5 | ```c++
6 | class Layer{
7 |
8 | public:
9 | Layer();
10 | virtual ~Layer();
11 | virtual int loadParam(std::map& params);
12 | virtual int loadBin(std::map& attrs);
13 | virtual int forward(const Mat& input,Mat& output,const Optional& op);
14 | virtual int forward(const std::vector& input,std::vector& output,const Optional& op);
15 |
16 | public:
17 |
18 | bool one_blob_only;
19 |
20 | public:
21 |
22 | std::string type;
23 | std::string name;
24 |
25 | std::vector bottoms;
26 | std::vector tops;
27 |
28 | };
29 | ```
30 |
31 | Layer类表示的是一个算子,Layer是一个虚类,当我们要具体实现某一个算子的时候,我们只要继承自Layer,然后重写相关的函数即可。
32 |
33 | ### 属性值
34 |
35 | ```c++
36 | public:
37 |
38 | bool one_blob_only; //这个算子是否单输入输出
39 |
40 | public:
41 |
42 | std::string type; //算子的类型
43 | std::string name; //算子的名字
44 |
45 | std::vector bottoms; //算子的输入blob序号vector
46 | std::vector tops; //算子的输出blob序号vector
47 | ```
48 |
49 | ### 方法
50 |
51 | ```c++
52 | Layer();
53 | virtual ~Layer();
54 | virtual int loadParam(std::map& params); //加载模型的参数
55 | virtual int loadBin(std::map& attrs); //加载模型的权重
56 | virtual int forward(const Mat& input,Mat& output,const Optional& op); //模型推理,对应one_blob_only=true
57 | virtual int forward(const std::vector& input,std::vector& output,const Optional& op); //模型推理,对应one_blob_only=flase
58 | ```
59 |
60 | ## Blob类
61 |
62 | ```c++
63 | class Blob
64 | {
65 | public:
66 | // empty
67 | Blob();
68 |
69 | public:
70 |
71 | int producer;
72 | int consumer;
73 | Mat shape;
74 | };
75 | ```
76 |
77 | Blob类,记录的是算子的输入输出,producer表示生产该blob算子的序号,consumer使用该blob算子的序号,shape表示尺寸。
78 |
79 |
--------------------------------------------------------------------------------
/documents/Net类的设计.md:
--------------------------------------------------------------------------------
1 | # Net类的设计
2 |
3 | ```c++
4 | class Net
5 | {
6 | public:
7 | Net();
8 | ~Net();
9 | void printLayer() const;
10 | int loadModel(const char * param_path,const char * bin_path);
11 | int extractBlob(const size_t num,Mat& output);
12 | int forwarLayer(int layer_index);
13 | int input(int index,const Mat& input);
14 | int clear();
15 |
16 | std::vector blobs;
17 | std::vector blob_mats;
18 | std::vector layers;
19 |
20 | size_t layer_num;
21 | size_t blob_num;
22 | Optional op;
23 | private:
24 | pnnx::Graph* graph;
25 | };
26 | ```
27 |
28 | Net类是对pnnx类的解析和重封装,EasyNN采用的是pnnx作为模型的中间结构,有关pnnx的内容大家请参考[pnnx](https://github.com/Tencent/ncnn/tree/master/tools/pnnx)官网的介绍。
29 |
30 | ## 属性值
31 |
32 | ```c++
33 | std::vector blobs;
34 | std::vector blob_mats;
35 | std::vector layers;
36 |
37 | size_t layer_num;
38 | size_t blob_num;
39 | Optional op;
40 | ```
41 |
42 | `blobs`记录的是模型文件里所有的blob,`blob_mats`存储的是blob对应的Mat数据,`layers`是模型所有算子的集合,`layer_num`和`blob_num`就是对应的算子个数和blob个数,`op`用来控制算子推理时的一些选项。
43 |
44 | ## 方法
45 |
46 | ### loadModel
47 |
48 | ```c++
49 | int Net::loadModel(const char * param_path,const char * bin_path)
50 | {
51 | int re =-1;
52 | re = graph->load(param_path,bin_path); //使用PNNX中的方法,先将模型的参数和权重进行加载
53 | if(re==0)
54 | {
55 | layer_num = graph->ops.size();
56 | blob_num = graph->operands.size();
57 | blobs.resize(blob_num);
58 | blob_mats.resize(blob_num);
59 | layers.resize(layer_num);
60 |
61 | //遍历算子集合
62 | for(int i=0;iops[i];
65 | std::string layer_type = extractLayer(op->type); //提取算子的名字
66 | layer_factory factory = 0;
67 | for(auto l:layes_factory)
68 | {
69 | if(layer_type==l.first) factory=l.second; //根据算子的名字,查找出对应的算子工厂
70 | }
71 | if(!factory)
72 | {
73 | printf("%s is not supportl\n",layer_type.c_str()); //如果没有这个算子,则报错退出
74 | re=-1;
75 | break;
76 | }
77 | Layer* layer = factory(); //使用算子工厂,实例化算子
78 |
79 | layer->name = op->name; //初始化算子名字
80 | layer->type = layer_type; //初始化算子类型
81 |
82 | //构建计算关系,每个layer的输入输出blob是哪个,每个blob是哪个layer产生,是哪个layer使用
83 | for(auto input:op->inputs)
84 | {
85 | int blob_index = std::stoi(input->name);
86 | layer->bottoms.push_back(blob_index);
87 | Blob& blob = blobs[blob_index];
88 | blob.consumer = i;
89 | }
90 | for(auto output:op->outputs)
91 | {
92 | int blob_index = std::stoi(output->name);
93 | layer->tops.push_back(blob_index);
94 | Blob& blob = blobs[blob_index];
95 | blob.producer = i;
96 | }
97 |
98 | layer->loadParam(op->params); //加载算子的参数
99 | layer->loadBin(op->attrs); //加载算子的权重
100 | layers[i]= layer;
101 | }
102 | delete graph; //加载完成后,释放PNNX中的图
103 | }
104 | else
105 | {
106 | printf("load %s %s fail\n",param_path,bin_path);
107 | return re;
108 | }
109 | return re;
110 | }
111 | ```
112 |
113 | 总体思路就是,把先把模型的参数和权重加载到pnnx::graph里面去,然后对pnnx::graph提取出我们自己想要的信息,去初始化EasyNN里面,我们用的到的信息。
114 |
115 | ### printLayer
116 |
117 | ```c++
118 | void Net::printLayer() const
119 | {
120 | for(auto layer:graph->ops)
121 | {
122 | printf("%s \n",layer->name.c_str());
123 | }
124 | }
125 | ```
126 |
127 | 遍历所有的算子,并打印算子的名字。
128 |
129 | ### input
130 |
131 | ```
132 | int Net::input(int index,const Mat& input)
133 | {
134 | blob_mats[index]=input;
135 | return 0;
136 | }
137 | ```
138 |
139 | 把数据放到指定位置的blob_mats中,一般情况下就是用来放整个模型的输出的
140 |
141 | ### extractBlob
142 |
143 | ```c++
144 | int Net::extractBlob(const size_t num,Mat& output)
145 | {
146 | Blob& blob = blobs[num];
147 | if(num>blob_num-1 || num<0)
148 | {
149 | printf("the %ld blob is not exist ,please check out\n",num);
150 | return -1;
151 | }
152 |
153 | if(blob_mats[num].isEmpty())
154 | forwarLayer(blob.producer);
155 |
156 | output = blob_mats[num];
157 | return 0;
158 | }
159 | ```
160 |
161 | 提取出模型某个blob的数据,一般也就是我们要的整个模型的输出。如果它为空,则表示产生它的算子没有被执行,因此需要调用forwarLayer执行这个算子,才能获得我们想要的数据。
162 |
163 | ### forwarLayer
164 |
165 | ```c++
166 |
167 | int Net::forwarLayer(int layer_index)
168 | {
169 |
170 | if(layer_index>layer_num-1 || layer_index<0)
171 | {
172 | printf("do not have this layer ,layer num is %d",layer_index);
173 | return -1;
174 | }
175 |
176 | Layer* layer = layers[layer_index];
177 | for(auto input:layer->bottoms)
178 | {
179 | if(blob_mats[input].isEmpty())
180 | forwarLayer(blobs[input].producer); //递归调用,直到找到某个layer的输入blob已经存在,说明此时可以forwarLayer
181 | }
182 | if(layer->one_blob_only)
183 | {
184 | int bottom_blob_index = layer->bottoms[0];
185 | int top_blob_index = layer->tops[0];
186 | int re = layer->forward(blob_mats[bottom_blob_index],blob_mats[top_blob_index],op);
187 | if(re!=0)
188 | {
189 | printf("%s forward fail",layer->name.c_str());
190 | return -1;
191 | }
192 | }
193 | else
194 | {
195 | std::vector input_mats(layer->bottoms.size());
196 | std::vector output_mats(layer->tops.size());
197 |
198 | for(int i=0;ibottoms.size();i++)
199 | {
200 | input_mats[i] = blob_mats[layer->bottoms[i]];
201 | }
202 |
203 | int re = layer->forward(input_mats,output_mats,op);
204 |
205 | if(re!=0)
206 | {
207 | printf("%s forward fail",layer->name.c_str());
208 | return -1;
209 | }
210 |
211 | for(int i=0;itops.size();i++)
212 | {
213 | blob_mats[layer->tops[i]]=output_mats[i];
214 | }
215 |
216 | }
217 | return 0;
218 | }
219 | ```
220 |
221 | `forwarLayer`是一个递归函数,我们要执行一个算子,那这个算子的输入blob_mat必须要存在,如果不存在则需要递归去执行产生这个blob的算子。当某一个算子的输入blob已经存在时,也就是我们手工用input放置在blob_mats中的数据,此时达到递归终止条件,开始一个一个执行算子。根据算子的one_blob_only参数,分别调用不同的`layer->forward`函数。
222 |
223 |
--------------------------------------------------------------------------------
/documents/optional类与benckmark.md:
--------------------------------------------------------------------------------
1 | ## Optional类
2 |
3 | ```c++
4 | class Optional
5 | {
6 | public:
7 | Optional();
8 | int num_thread;
9 | };
10 | ```
11 |
12 | 只有一个参数,`num_thread`表示openmp的多线程个数,默认值为8。
13 |
14 | ## benckmark
15 |
16 | ```c++
17 | double get_current_time()
18 | {
19 | auto now = std::chrono::high_resolution_clock::now();
20 | auto usec = std::chrono::duration_cast(now.time_since_epoch());
21 | return usec.count() / 1000.0;
22 | }
23 | ```
24 |
25 | 返回当前的时间,用于计时。
--------------------------------------------------------------------------------
/documents/内存分配.md:
--------------------------------------------------------------------------------
1 | 相关代码在src/allocator.h里面
2 |
3 | ```c++
4 | #ifndef EASYNN_ALLOCATOR_H
5 | #define EASYNN_ALLOCATOR_H
6 |
7 | #include
8 |
9 | #define EASYNN_MALLOC_ALIGN 64
10 | #define EASYNN_MALLOC_OVERREAD 64
11 |
12 | static size_t alignSize(size_t sz, int n)
13 | {
14 | return (sz + n - 1) & -n;
15 | }
16 |
17 | static void* fastMalloc(size_t size)
18 | {
19 | void * ptr = 0;
20 | if (posix_memalign(&ptr, EASYNN_MALLOC_ALIGN, size + EASYNN_MALLOC_OVERREAD))
21 | ptr = 0;
22 | return ptr;
23 | }
24 |
25 | static void fastFree(void* ptr)
26 | {
27 | if (ptr)
28 | {
29 | free(ptr);
30 | }
31 | }
32 |
33 | #endif
34 | ```
35 |
36 | `fastMalloc`实际上时封装`posix_memalign`返回对齐64字节内存的地址,同时会额外对分配`EASYNN_MALLOC_OVERREAD`,也就是64个字节。`alignSize`是一的功能是将给定的`size_t`类型的`sz`值对齐到最近的`n`的倍数。
--------------------------------------------------------------------------------
/documents/测试框架.md:
--------------------------------------------------------------------------------
1 | 为了不额外的第三方库,EasyNN内部实现了一个简单的单元测试框架,用来实现单元测试的功能。
2 |
3 | ## 单元测试介绍
4 |
5 | 单元测试是一种软件测试方法,主要关注软件中的最小可测试单元,通常是单个函数、方法或类。单元测试的作用主要包括:
6 |
7 | 1. 提升代码质量:通过在编写代码的过程中发现并修复错误,有助于提高代码的质量和减少bug的数量。
8 | 2. 提升反馈速度:可以快速地提供反馈,让开发人员知道他们的代码是否按照预期工作,从而减少重复工作,提高开发效率。
9 | 3. 保护代码:当对代码进行修改或添加新功能时,单元测试可以帮助确保这些更改没有破坏现有的功能。
10 | 4. 简化代码维护:作为代码的一种文档,单元测试可以帮助开发人员理解代码的功能和工作方式,从而使代码的维护变得更容易。
11 | 5. 改进代码设计:因为单元测试强制开发人员编写可测试的代码,这通常也意味着代码的设计更好。
12 |
13 | ## 单元测试的使用
14 |
15 | ```c++
16 | TEST(Mat, refcount)
17 | {
18 | {
19 | easynn::Mat m1(10,10);
20 | EXPECT_EQ(*m1.refcount, 1);
21 | easynn::Mat m2(m1);
22 | easynn::Mat m3=m1;
23 | EXPECT_EQ(*m2.refcount, 3);
24 | EXPECT_EQ(*m3.refcount, 3);
25 | }
26 |
27 | easynn::Mat m1(10,10);
28 | {
29 | EXPECT_EQ(*m1.refcount, 1);
30 | easynn::Mat m2(m1);
31 | easynn::Mat m3=m1;
32 | EXPECT_EQ(*m2.refcount, 3);
33 | EXPECT_EQ(*m3.refcount, 3);
34 | }
35 | EXPECT_EQ(*m1.refcount, 1);
36 |
37 | {
38 |
39 | easynn::Mat m2;
40 | easynn::Mat m3=m2;
41 | EXPECT_EQ((long)m2.refcount, 0);
42 | EXPECT_EQ((long)m3.refcount, 0);
43 | }
44 |
45 | }
46 | ```
47 |
48 | 如上所示,就是一个单元测试的示例,`TEST(Mat, refcount)`,表示这是Mat类的单元测试,且测试的是引用计数的方法。
49 |
50 | `EXPECT_EQ()`是一个宏,表示期望两个数值相等,如果不相等的话,则refcount这个单元测试会报错。
51 |
52 | ## 代码解释
53 |
54 | 相关的代码在test/test_ulti.h下
55 |
56 | ```c++
57 | #define QTEST_EXPECT(x, y, cond) \
58 | if (!((x)cond(y))) \
59 | { \
60 | printf("%s:%u: Failure\n", __FILE__, __LINE__); \
61 | if (strcmp(#cond, "==") == 0) \
62 | { \
63 | printf("Expected equality of these values:\n"); \
64 | printf(" %s\n", #x); \
65 | qtest_evaluate_if_required(#x, x); \
66 | printf(" %s\n", #y); \
67 | qtest_evaluate_if_required(#y, y); \
68 | } \
69 | else \
70 | { \
71 | printf("Expected: (%s) %s (%s), actual: %s vs %s\n", #x, #cond, #y, std::to_string(x).c_str(), std::to_string(y).c_str()); \
72 | } \
73 | *qtest_current_fail_cnt = *qtest_current_fail_cnt + 1; \
74 | }
75 |
76 | #define EXPECT_EQ(x, y) (x, y, ==)
77 | #define EXPECT_NE(x, y) QTEST_EXPECT(x, y, !=)
78 | #define EXPECT_LT(x, y) QTEST_EXPECT(x, y, <)
79 | #define EXPECT_LE(x, y) QTEST_EXPECT(x, y, <=)
80 | #define EXPECT_GT(x, y) QTEST_EXPECT(x, y, >)
81 | #define EXPECT_GE(x, y) QTEST_EXPECT(x, y, >=)
82 |
83 | #define EXPECT_TRUE(x) \
84 | if (!((x))) \
85 | { \
86 | printf("%s:%u: Failure\n", __FILE__, __LINE__); \
87 | printf("Value of: %s\n", #x); \
88 | printf(" Actual: false\n"); \
89 | printf("Expected: true\n"); \
90 | *qtest_current_fail_cnt = *qtest_current_fail_cnt + 1; \
91 | }
92 |
93 | #define EXPECT_FALSE(x) \
94 | if (((x))) \
95 | { \
96 | printf("%s:%u: Failure\n", __FILE__, __LINE__); \
97 | printf("Value of: %s\n", #x); \
98 | printf(" Actual: true\n"); \
99 | printf("Expected: false\n"); \
100 | *qtest_current_fail_cnt = *qtest_current_fail_cnt + 1; \
101 | }
102 |
103 | template
104 | void qtest_evaluate_if_required(const char* str, T value)
105 | {
106 | if (strcmp(str, std::to_string(value).c_str()) != 0)
107 | {
108 | std::cout << " Which is: " << value << std::endl;
109 | }
110 | }
111 |
112 | #define ASSERT_EQ(x, y) \
113 | if ((x)!=(y)) \
114 | { \
115 | printf("%s:%u: Failure\n", __FILE__, __LINE__); \
116 | printf("Expected equality of these values:\n"); \
117 | printf(" %s\n", #x); \
118 | qtest_evaluate_if_required(#x, x); \
119 | printf(" %s\n", #y); \
120 | qtest_evaluate_if_required(#y, y); \
121 | *qtest_current_fail_cnt = *qtest_current_fail_cnt + 1; \
122 | return; \
123 | }
124 | ```
125 |
126 | 上面的代码是用来实现判断值想不想等的宏,例如EXPECT_EQ表示希望两个值相等,EXPECT_NE表示希望两个值不相等,如果输入的值与所期望的判断条件不等的话,那么就会相对应的输出错误的信息,同时记录下错误。
127 |
128 | ```c++
129 | class TestEntity
130 | {
131 | private:
132 | TestEntity() { };
133 | ~TestEntity() { };
134 |
135 | public:
136 | std::string make_proper_str(size_t num, const std::string str, bool uppercase = false)
137 | {
138 | std::string res;
139 | if (num > 1)
140 | {
141 | if (uppercase)
142 | res = std::to_string(num) + " " + str + "S";
143 | else
144 | res = std::to_string(num) + " " + str + "s";
145 | }
146 | else
147 | {
148 | res = std::to_string(num) + " " + str;
149 | }
150 | return res;
151 | }
152 |
153 | public:
154 | TestEntity(const TestEntity& other) = delete;
155 | TestEntity operator=(const TestEntity& other) = delete;
156 |
157 | static TestEntity& get_instance()
158 | {
159 | static TestEntity entity;
160 | return entity;
161 | }
162 |
163 | int add(std::string test_set_name, std::string test_name, std::function f, const char* fname)
164 | {
165 | TestItem item(f, fname);
166 | test_sets[test_set_name].test_items.emplace_back(item);
167 | return 0;
168 | }
169 |
170 | int set_filter(std::string _filter)
171 | {
172 | filter = _filter;
173 | return 0;
174 | }
175 |
176 | int run_all_test_functions()
177 | {
178 | std::map::iterator it = test_sets.begin();
179 | for (; it != test_sets.end(); it++)
180 | {
181 | std::string test_set_name = it->first;
182 | TestSet& test_set = it->second;
183 | std::vector& test_items = test_set.test_items;
184 |
185 | int cnt = 0;
186 | for (int i = 0; i < test_items.size(); i++)
187 | {
188 | const std::string fname = test_items[i].fname;
189 | if (filter.length() == 0 || (filter.length() > 0 && strmatch(fname, filter)))
190 | {
191 | cnt++;
192 | }
193 | }
194 |
195 | if (cnt == 0) continue;
196 |
197 | matched_test_set_count++;
198 |
199 | const std::string test_item_str = make_proper_str(cnt, "test");
200 | printf("%s[----------]%s %s from %s\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, test_item_str.c_str(), it->first.c_str());
201 | for (int i = 0; i < test_items.size(); i++)
202 | {
203 | auto f = test_items[i].f;
204 | std::string fname = test_items[i].fname;
205 | if (filter.length() > 0 && !strmatch(fname, filter))
206 | {
207 | continue;
208 | }
209 |
210 | matched_test_case_count++;
211 |
212 | int qtest_current_fail_cnt = 0;
213 | printf("%s[ RUN ]%s %s\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, fname.c_str());
214 | f(&qtest_current_fail_cnt);
215 | if (qtest_current_fail_cnt == 0)
216 | {
217 | printf("%s[ OK ]%s %s (0 ms)\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, fname.c_str());
218 | }
219 | else
220 | {
221 | printf("%s[ FAILED ]%s %s (0 ms)\n", QTEST_ESCAPE_COLOR_RED, QTEST_ESCAPE_COLOR_END, fname.c_str());
222 | qtest_fail_cnt++;
223 | }
224 | test_items[i].success = (qtest_current_fail_cnt == 0);
225 | }
226 | printf("%s[----------]%s %s from %s (0 ms total)\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, test_item_str.c_str(), it->first.c_str());
227 | printf("\n");
228 | }
229 |
230 | printf("%s[----------]%s Global test environment tear-down\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END);
231 | std::string tests_str = make_proper_str(matched_test_case_count, "test");
232 | std::string suite_str = make_proper_str(matched_test_set_count, "test suite");
233 | printf("%s[==========]%s %s from %s ran. (0 ms total)\n",
234 | QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END,
235 | tests_str.c_str(),
236 | suite_str.c_str()
237 | );
238 |
239 | int passed_test_count = matched_test_case_count - qtest_fail_cnt;
240 | std::string how_many_test_str = make_proper_str(passed_test_count, "test");
241 | printf("%s[ PASSED ]%s %s.\n", QTEST_ESCAPE_COLOR_GREEN, QTEST_ESCAPE_COLOR_END, how_many_test_str.c_str());
242 |
243 | if (qtest_fail_cnt)
244 | {
245 | std::string failed_test_str = make_proper_str(qtest_fail_cnt, "test");
246 | printf("%s[ FAILED ]%s %s, listed below:\n", QTEST_ESCAPE_COLOR_RED, QTEST_ESCAPE_COLOR_END, failed_test_str.c_str());
247 |
248 | std::map::iterator it = test_sets.begin();
249 | for (; it != test_sets.end(); it++)
250 | {
251 | std::string test_set_name = it->first;
252 | TestSet test_set = it->second;
253 | std::vector test_items = test_set.test_items;
254 | for (int i = 0; i < test_items.size(); i++)
255 | {
256 | if (!test_items[i].success)
257 | {
258 | printf("%s[ FAILED ]%s %s\n", QTEST_ESCAPE_COLOR_RED, QTEST_ESCAPE_COLOR_END, test_items[i].fname.c_str());
259 | }
260 | }
261 | }
262 | }
263 |
264 | if (qtest_fail_cnt > 0)
265 | {
266 | std::string failed_test_str = make_proper_str(qtest_fail_cnt, "FAILED TEST", true);
267 | printf("\n %s\n", failed_test_str.c_str());
268 | }
269 |
270 | return 0;
271 | }
272 |
273 | private:
274 | // https://leetcode.cn/problems/wildcard-matching/solutions/315802/tong-pei-fu-pi-pei-by-leetcode-solution/
275 | /// @param s string
276 | /// @param p pattern
277 | bool strmatch(std::string s, std::string p)
278 | {
279 | auto allStars = [](const std::string& str, int left, int right) {
280 | for (int i = left; i < right; ++i) {
281 | if (str[i] != '*') {
282 | return false;
283 | }
284 | }
285 | return true;
286 | };
287 | auto charMatch = [](char u, char v)
288 | {
289 | return u == v || v == '?';
290 | };
291 |
292 | while (s.size() && p.size() && p.back() != '*')
293 | {
294 | if (charMatch(s.back(), p.back())) {
295 | s.pop_back();
296 | p.pop_back();
297 | }
298 | else {
299 | return false;
300 | }
301 | }
302 | if (p.empty()) {
303 | return s.empty();
304 | }
305 |
306 | int sIndex = 0;
307 | int pIndex = 0;
308 | int sRecord = -1;
309 | int pRecord = -1;
310 | while (sIndex < s.size() && pIndex < p.size()) {
311 | if (p[pIndex] == '*') {
312 | ++pIndex;
313 | sRecord = sIndex;
314 | pRecord = pIndex;
315 | }
316 | else if (charMatch(s[sIndex], p[pIndex])) {
317 | ++sIndex;
318 | ++pIndex;
319 | }
320 | else if (sRecord != -1 && sRecord + 1 < s.size()) {
321 | ++sRecord;
322 | sIndex = sRecord;
323 | pIndex = pRecord;
324 | }
325 | else {
326 | return false;
327 | }
328 | }
329 | return allStars(p, pIndex, p.size());
330 | }
331 |
332 |
333 | public:
334 | struct TestItem
335 | {
336 | std::function f;
337 | std::string fname;
338 | bool success;
339 |
340 | TestItem(std::function _f, std::string _fname):
341 | f(_f), fname(_fname), success(true)
342 | {}
343 | };
344 |
345 | struct TestSet
346 | {
347 | std::vector test_items;
348 | };
349 |
350 | std::map test_sets;
351 |
352 | public:
353 | int matched_test_case_count = 0;
354 | int matched_test_set_count = 0;
355 |
356 | private:
357 | int qtest_fail_cnt = 0; // number of failures in one test set
358 | std::string filter;
359 | };
360 | ```
361 |
362 | 接下来就是TestEntity这个类的实现。总体上看,TestEntity是作为单例模式来使用,当我们在写单元测试时,如下
363 |
364 | ```c++
365 | TEST(Mat, refcount)
366 | {
367 | }
368 |
369 | #define TEST(set, name) \
370 | void qtest_##set##_##name(int* qtest_current_fail_cnt); \
371 | int qtest_mark_##set##_##name = TestEntity::get_instance().add(#set, #name, qtest_##set##_##name, #set "." #name); \
372 | void qtest_##set##_##name(int* qtest_current_fail_cnt) \
373 |
374 | ```
375 |
376 | Test是一个宏,宏展开后变成三行代码,第一行是把名字替换一下声明一个函数,第二行是调用一个函数`TestEntity::get_instance().add()`函数
377 |
378 | ```c++
379 | static TestEntity& get_instance()
380 | {
381 | static TestEntity entity;
382 | return entity;
383 | }
384 |
385 | int add(std::string test_set_name, std::string test_name, std::function f, const char* fname)
386 | {
387 | TestItem item(f, fname);
388 | test_sets[test_set_name].test_items.emplace_back(item);
389 | return 0;
390 | }
391 | ```
392 |
393 | `get_instance`是返回返回类的实例,也是也就是单例模式,只有一个对象。add函数就是将我们的单元测试的指针保存起来。只要把这些单元测试的指针保存下来了,我们只要遍历并执行,做一些对应的处理,就可以了,相对应的函数是`run_all_test_functions()`。
--------------------------------------------------------------------------------
/example/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 | find_package(OpenCV QUIET COMPONENTS core highgui imgproc)
3 |
4 | link_directories(${CMAKE_SOURCE_DIR}/build/src/cxx)
5 |
6 | file(GLOB cpp_files ${EXAMPLE}/*.cpp)
7 | foreach(cpp_file ${cpp_files})
8 | get_filename_component(FILE_NAME ${cpp_file} NAME)
9 | string(REPLACE ".cpp" "" FILE_NAME "${FILE_NAME}")
10 | add_executable(${FILE_NAME} ${cpp_file})
11 | target_include_directories(${FILE_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS} ${INCLUDE})
12 | target_link_libraries(${FILE_NAME} PRIVATE ${OpenCV_LIBS} easynn)
13 | endforeach()
--------------------------------------------------------------------------------
/example/res18.cpp:
--------------------------------------------------------------------------------
1 | #include"net.h"
2 | #include"benchmark.h"
3 | #include
4 | #include
5 | #include "opencv2/imgproc/imgproc.hpp"
6 |
7 |
8 |
9 |
10 | static void normize(const easynn::Mat& m,std::vector mean,std::vector var)
11 | {
12 | for (int q=0; qmax)
37 | {
38 | index=x;
39 | max = m[x];
40 | }
41 | }
42 | printf("max:%f\n",max);
43 | return index;
44 | }
45 |
46 | static void pretreatment(cv::Mat& input_image,easynn::Mat& output_image,int h,int w)
47 | {
48 | cv::Mat resize_image;
49 | cv::resize(input_image, resize_image, cv::Size(224, 224));
50 |
51 | cv::Mat rgb_image;
52 | cv::cvtColor(resize_image, rgb_image, cv::COLOR_BGR2RGB);
53 | rgb_image.convertTo(rgb_image, CV_32FC3);
54 | std::vector split_images;
55 | cv::split(rgb_image, split_images);
56 |
57 |
58 | output_image.create(w,h,3);
59 |
60 | int index = 0;
61 | for (const auto& split_image : split_images)
62 | {
63 | memcpy((void*)output_image.channel(index), split_image.data, sizeof(float) * split_image.total());
64 | index += 1;
65 | }
66 | }
67 |
68 | static void res18(const easynn::Mat& in,easynn::Mat& result)
69 | {
70 | easynn::Net net;
71 | net.loadModel(\
72 | "../example/res18.pnnx.param",\
73 | "../example/res18.pnnx.bin");
74 | net.input(0,in);
75 | net.extractBlob(49,result);
76 | }
77 |
78 | int main()
79 | {
80 | std::string image_path = "../images/dog.jpg";
81 | cv::Mat image = cv::imread(image_path, 1);
82 | if (image.empty())
83 | {
84 | fprintf(stderr, "cv::imread %s failed\n", image_path.c_str());
85 | return -1;
86 | }
87 |
88 | //cv::Mat to EasyNN Mat
89 | easynn::Mat in;
90 | pretreatment(image,in,224,224);
91 |
92 | //normize
93 | std::vector mean = {0.485f,0.456f,0.406f};
94 | std::vector var = { 0.229f,0.224f,0.225f};
95 | normize(in,mean,var);
96 |
97 | // forward net
98 | easynn::Mat result;
99 | double start = easynn::get_current_time();
100 | res18(in,result);
101 | double end = easynn::get_current_time();
102 |
103 | printf("total time is %f ms\n",end-start);
104 |
105 |
106 | //find Max score class
107 | int cls = findMax(result);
108 | printf("cls = %d\n",cls);
109 |
110 | return 0;
111 | }
--------------------------------------------------------------------------------
/example/yolov5s.cpp:
--------------------------------------------------------------------------------
1 | #include"net.h"
2 | #include"benchmark.h"
3 | #include
4 | #include
5 | #include "opencv2/imgproc/imgproc.hpp"
6 |
7 | struct Object
8 | {
9 | cv::Rect_ rect;
10 | int label;
11 | float prob;
12 | };
13 |
14 | void normize(const easynn::Mat& m)
15 | {
16 | for (int q=0; q split_images;
43 | cv::split(rgb_image, split_images);
44 |
45 |
46 | output_image.create(w,h,3);
47 |
48 | int index = 0;
49 | for (const auto& split_image : split_images)
50 | {
51 | memcpy((void*)output_image.channel(index), split_image.data, sizeof(float) * split_image.total());
52 | index += 1;
53 | }
54 | }
55 |
56 |
57 |
58 | static inline float intersection_area(const Object& a, const Object& b)
59 | {
60 | cv::Rect_ inter = a.rect & b.rect;
61 | return inter.area();
62 | }
63 |
64 | static void qsort_descent_inplace(std::vector