├── CMakeLists.txt
├── README.md
├── build
    ├── Makefile
    └── yolov5.test
├── common.py
├── include
    └── ncnn
    │   ├── allocator.h
    │   ├── benchmark.h
    │   ├── blob.h
    │   ├── c_api.h
    │   ├── command.h
    │   ├── cpu.h
    │   ├── datareader.h
    │   ├── gpu.h
    │   ├── layer.h
    │   ├── layer_shader_type.h
    │   ├── layer_shader_type_enum.h
    │   ├── layer_type.h
    │   ├── layer_type_enum.h
    │   ├── mat.h
    │   ├── modelbin.h
    │   ├── net.h
    │   ├── option.h
    │   ├── paramdict.h
    │   ├── pipeline.h
    │   ├── pipelinecache.h
    │   ├── platform.h
    │   ├── simpleocv.h
    │   ├── simpleomp.h
    │   └── simplestl.h
├── lib
    ├── cmake
    │   └── ncnn
    │   │   ├── ncnn-release.cmake
    │   │   ├── ncnn.cmake
    │   │   └── ncnnConfig.cmake
    └── libncnn.a
├── test_jpg
    ├── 10.jpg
    ├── 105.jpg
    ├── 106.jpg
    └── 107.jpg
├── yolov5.cpp
└── yw_5s
    ├── voc.names
    ├── yw_5s.bin
    └── yw_5s.param


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.3)
 2 | project(yolov5)
 3 | 
 4 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 5 | 
 6 | FIND_PACKAGE(OpenMP REQUIRED)
 7 | if(OPENMP_FOUND)
 8 |     message("OPENMP FOUND")
 9 |     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
10 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
11 |     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
12 | endif()
13 | 
14 | find_package(OpenCV REQUIRED)
15 | message(STATUS "Opencv library status: ")
16 | message(STATUS "> version: ${OpenCV_VERSION} ")
17 | message(STATUS "> libraries: ${OpenCV_LIBS}")
18 | message(STATUS "> include: ${OpenCV_INCLUDE_DIRS} ")
19 | 
20 | include_directories(${OpenCV_INCLUDE_DIRS})
21 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include/ncnn)
22 | link_directories(${CMAKE_CURRENT_SOURCE_DIR}/lib)
23 | 
24 | 
25 | add_executable(yolov5.test yolov5.cpp)
26 | 
27 | target_link_libraries(yolov5.test ${OpenCV_LIBS} ncnn)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Yolo-V5s转换ncnn并推理测试Pipeline
 2 | 
 3 | 
 4 | ## 步骤
 5 | 
 6 | ### 1、yolov5代码修改：
 7 | 
 8 |     common.py中的class Focus(nn.Module)中，切片操作改为下采样<torch.nn.functional.interpolate>
 9 |     
10 |     要修改的common.py就在本文件夹下，可以替换到yolov5/models下面
11 |     
12 |     修改后，加载原来的模型再训一遍，否则结果不能看（换成下采样，精度会掉几个点，我自己的数据集测试集的map掉了0.03-0.04个点左右）。
13 |     
14 |     
15 | ### 2、yolov5模型转换成onnx：
16 | 
17 |    参考[官方链接](https://github.com/ultralytics/yolov5/issues/251)
18 | ```   
19 |     python models/export.py --weights yolov5s.pt --img 640 --batch 1  # export at 640x640 with batch size 1
20 | ```   
21 |     (注意代码中设置 model.model[-1].export = True)
22 |     
23 |     
24 | ### 3、onnx模型简化：
25 | ```   
26 |     pip3 install onnx-simplifier
27 |     python3 -m onnxsim yolov5s.onnx yolov5s.onnx
28 | ```   
29 |     
30 |     
31 | ### 4、编译安装ncnn：
32 | 
33 |    参考[官方链接](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-linux)
34 |     
35 |     注意其中的依赖项，尤其是protobuf，亲测2.5.0不行，需要源码升级为3.0以上，注意源码升级时下载cpp版即可
36 | ```   
37 |     make && make install
38 | ```   
39 |     
40 |     
41 | ### 5、在ncnn中转换onnx为bin/param：
42 |     
43 |     ncnn安装后，在 ncnn/build/tools/onnx 下面 找到 onnx2ncnn
44 |     
45 |     执行下面命令即可：
46 | ```
47 |     ./onnx2ncnn yolov5s.onnx yolov5s.param yolov5s.bin
48 | ```
49 | 
50 | 
51 | ### 6、C++推理代码:
52 | 
53 |    推理代码参考[本github yolov5.cpp](https://github.com/a954217436/yolov5s_ncnn_inference/blob/main/yolov5.cpp)
54 |     
55 |     输出层名字可以使用netron查看，我查到的是output、857、877，不同模型可能不一样
56 |     
57 |     注意CMakeLists.txt中，需要openMP，openCV，libncnn.a等
58 |     
59 | ```
60 |     mkdir build && cd build
61 |     cmake ..
62 |     make
63 |     ./yolov5.test ../test_jpg
64 | ```
65 | 
66 | 
67 | ### TODO:
68 |     
69 | * vulkan 推理测试
70 | * 5m/5l/5x 转换
71 | * 区分类别进行nms
72 | * 测试输入分辨率w!=h时的效果
73 | 
74 | 


--------------------------------------------------------------------------------
/build/Makefile:
--------------------------------------------------------------------------------
  1 | # CMAKE generated file: DO NOT EDIT!
  2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.10
  3 | 
  4 | # Default target executed when no arguments are given to make.
  5 | default_target: all
  6 | 
  7 | .PHONY : default_target
  8 | 
  9 | # Allow only one "make -f Makefile2" at a time, but pass parallelism.
 10 | .NOTPARALLEL:
 11 | 
 12 | 
 13 | #=============================================================================
 14 | # Special targets provided by cmake.
 15 | 
 16 | # Disable implicit rules so canonical targets will work.
 17 | .SUFFIXES:
 18 | 
 19 | 
 20 | # Remove some rules from gmake that .SUFFIXES does not remove.
 21 | SUFFIXES =
 22 | 
 23 | .SUFFIXES: .hpux_make_needs_suffix_list
 24 | 
 25 | 
 26 | # Suppress display of executed commands.
 27 | $(VERBOSE).SILENT:
 28 | 
 29 | 
 30 | # A target that is always out of date.
 31 | cmake_force:
 32 | 
 33 | .PHONY : cmake_force
 34 | 
 35 | #=============================================================================
 36 | # Set environment variables for the build.
 37 | 
 38 | # The shell in which to execute make rules.
 39 | SHELL = /bin/sh
 40 | 
 41 | # The CMake executable.
 42 | CMAKE_COMMAND = /usr/bin/cmake
 43 | 
 44 | # The command to remove a file.
 45 | RM = /usr/bin/cmake -E remove -f
 46 | 
 47 | # Escaping for special characters.
 48 | EQUALS = =
 49 | 
 50 | # The top-level source directory on which CMake was run.
 51 | CMAKE_SOURCE_DIR = /zhanghao/code/yolov5_to_ncnn/ncnn
 52 | 
 53 | # The top-level build directory on which CMake was run.
 54 | CMAKE_BINARY_DIR = /zhanghao/code/yolov5_to_ncnn/ncnn/build
 55 | 
 56 | #=============================================================================
 57 | # Targets provided globally by CMake.
 58 | 
 59 | # Special rule for the target rebuild_cache
 60 | rebuild_cache:
 61 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
 62 | 	/usr/bin/cmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
 63 | .PHONY : rebuild_cache
 64 | 
 65 | # Special rule for the target rebuild_cache
 66 | rebuild_cache/fast: rebuild_cache
 67 | 
 68 | .PHONY : rebuild_cache/fast
 69 | 
 70 | # Special rule for the target edit_cache
 71 | edit_cache:
 72 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..."
 73 | 	/usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available.
 74 | .PHONY : edit_cache
 75 | 
 76 | # Special rule for the target edit_cache
 77 | edit_cache/fast: edit_cache
 78 | 
 79 | .PHONY : edit_cache/fast
 80 | 
 81 | # The main all target
 82 | all: cmake_check_build_system
 83 | 	$(CMAKE_COMMAND) -E cmake_progress_start /zhanghao/code/yolov5_to_ncnn/ncnn/build/CMakeFiles /zhanghao/code/yolov5_to_ncnn/ncnn/build/CMakeFiles/progress.marks
 84 | 	$(MAKE) -f CMakeFiles/Makefile2 all
 85 | 	$(CMAKE_COMMAND) -E cmake_progress_start /zhanghao/code/yolov5_to_ncnn/ncnn/build/CMakeFiles 0
 86 | .PHONY : all
 87 | 
 88 | # The main clean target
 89 | clean:
 90 | 	$(MAKE) -f CMakeFiles/Makefile2 clean
 91 | .PHONY : clean
 92 | 
 93 | # The main clean target
 94 | clean/fast: clean
 95 | 
 96 | .PHONY : clean/fast
 97 | 
 98 | # Prepare targets for installation.
 99 | preinstall: all
100 | 	$(MAKE) -f CMakeFiles/Makefile2 preinstall
101 | .PHONY : preinstall
102 | 
103 | # Prepare targets for installation.
104 | preinstall/fast:
105 | 	$(MAKE) -f CMakeFiles/Makefile2 preinstall
106 | .PHONY : preinstall/fast
107 | 
108 | # clear depends
109 | depend:
110 | 	$(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
111 | .PHONY : depend
112 | 
113 | #=============================================================================
114 | # Target rules for targets named yolov5.test
115 | 
116 | # Build rule for target.
117 | yolov5.test: cmake_check_build_system
118 | 	$(MAKE) -f CMakeFiles/Makefile2 yolov5.test
119 | .PHONY : yolov5.test
120 | 
121 | # fast build rule for target.
122 | yolov5.test/fast:
123 | 	$(MAKE) -f CMakeFiles/yolov5.test.dir/build.make CMakeFiles/yolov5.test.dir/build
124 | .PHONY : yolov5.test/fast
125 | 
126 | yolov5.o: yolov5.cpp.o
127 | 
128 | .PHONY : yolov5.o
129 | 
130 | # target to build an object file
131 | yolov5.cpp.o:
132 | 	$(MAKE) -f CMakeFiles/yolov5.test.dir/build.make CMakeFiles/yolov5.test.dir/yolov5.cpp.o
133 | .PHONY : yolov5.cpp.o
134 | 
135 | yolov5.i: yolov5.cpp.i
136 | 
137 | .PHONY : yolov5.i
138 | 
139 | # target to preprocess a source file
140 | yolov5.cpp.i:
141 | 	$(MAKE) -f CMakeFiles/yolov5.test.dir/build.make CMakeFiles/yolov5.test.dir/yolov5.cpp.i
142 | .PHONY : yolov5.cpp.i
143 | 
144 | yolov5.s: yolov5.cpp.s
145 | 
146 | .PHONY : yolov5.s
147 | 
148 | # target to generate assembly for a file
149 | yolov5.cpp.s:
150 | 	$(MAKE) -f CMakeFiles/yolov5.test.dir/build.make CMakeFiles/yolov5.test.dir/yolov5.cpp.s
151 | .PHONY : yolov5.cpp.s
152 | 
153 | # Help Target
154 | help:
155 | 	@echo "The following are some of the valid targets for this Makefile:"
156 | 	@echo "... all (the default if no target is provided)"
157 | 	@echo "... clean"
158 | 	@echo "... depend"
159 | 	@echo "... rebuild_cache"
160 | 	@echo "... yolov5.test"
161 | 	@echo "... edit_cache"
162 | 	@echo "... yolov5.o"
163 | 	@echo "... yolov5.i"
164 | 	@echo "... yolov5.s"
165 | .PHONY : help
166 | 
167 | 
168 | 
169 | #=============================================================================
170 | # Special targets to cleanup operation of make.
171 | 
172 | # Special rule to run CMake to check the build system integrity.
173 | # No rule that depends on this can have commands that come from listfiles
174 | # because they might be regenerated.
175 | cmake_check_build_system:
176 | 	$(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
177 | .PHONY : cmake_check_build_system
178 | 
179 | 


--------------------------------------------------------------------------------
/build/yolov5.test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sean-wade/yolov5s_ncnn_inference/8a4b7a019303f3cc243adcb7c73847234506a712/build/yolov5.test


--------------------------------------------------------------------------------
/common.py:
--------------------------------------------------------------------------------
  1 | # This file contains modules common to various models
  2 | 
  3 | import math
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | from PIL import Image, ImageDraw
  8 | 
  9 | from utils.datasets import letterbox
 10 | from utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh
 11 | from utils.plots import color_list
 12 | 
 13 | 
 14 | def autopad(k, p=None):  # kernel, padding
 15 |     # Pad to 'same'
 16 |     if p is None:
 17 |         p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
 18 |     return p
 19 | 
 20 | 
 21 | def DWConv(c1, c2, k=1, s=1, act=True):
 22 |     # Depthwise convolution
 23 |     return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
 24 | 
 25 | 
 26 | class Conv(nn.Module):
 27 |     # Standard convolution
 28 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
 29 |         super(Conv, self).__init__()
 30 |         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
 31 |         self.bn = nn.BatchNorm2d(c2)
 32 |         self.act = nn.Hardswish() if act else nn.Identity()
 33 | 
 34 |     def forward(self, x):
 35 |         return self.act(self.bn(self.conv(x)))
 36 | 
 37 |     def fuseforward(self, x):
 38 |         return self.act(self.conv(x))
 39 | 
 40 | 
 41 | class Bottleneck(nn.Module):
 42 |     # Standard bottleneck
 43 |     def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
 44 |         super(Bottleneck, self).__init__()
 45 |         c_ = int(c2 * e)  # hidden channels
 46 |         self.cv1 = Conv(c1, c_, 1, 1)
 47 |         self.cv2 = Conv(c_, c2, 3, 1, g=g)
 48 |         self.add = shortcut and c1 == c2
 49 | 
 50 |     def forward(self, x):
 51 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
 52 | 
 53 | 
 54 | class BottleneckCSP(nn.Module):
 55 |     # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
 56 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
 57 |         super(BottleneckCSP, self).__init__()
 58 |         c_ = int(c2 * e)  # hidden channels
 59 |         self.cv1 = Conv(c1, c_, 1, 1)
 60 |         self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
 61 |         self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
 62 |         self.cv4 = Conv(2 * c_, c2, 1, 1)
 63 |         self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
 64 |         self.act = nn.LeakyReLU(0.1, inplace=True)
 65 |         self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
 66 | 
 67 |     def forward(self, x):
 68 |         y1 = self.cv3(self.m(self.cv1(x)))
 69 |         y2 = self.cv2(x)
 70 |         return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
 71 | 
 72 | 
 73 | class SPP(nn.Module):
 74 |     # Spatial pyramid pooling layer used in YOLOv3-SPP
 75 |     def __init__(self, c1, c2, k=(5, 9, 13)):
 76 |         super(SPP, self).__init__()
 77 |         c_ = c1 // 2  # hidden channels
 78 |         self.cv1 = Conv(c1, c_, 1, 1)
 79 |         self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
 80 |         self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
 81 | 
 82 |     def forward(self, x):
 83 |         x = self.cv1(x)
 84 |         return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
 85 | 
 86 | '''
 87 | #张昊注释
 88 | class Focus(nn.Module):
 89 |     # Focus wh information into c-space
 90 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
 91 |         super(Focus, self).__init__()
 92 |         self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
 93 | 
 94 |     def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
 95 |         y = self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
 96 |         print(" ======== >>>>>>>> Focus  : ", y.shape)
 97 |         return y
 98 | '''
 99 | 
100 | class Focus(nn.Module):
101 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
102 |         super(Focus, self).__init__()
103 |         self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
104 | 
105 |     def forward(self, x):
106 |         y  =   self.conv(torch.cat([torch.nn.functional.interpolate(x, scale_factor=0.5, recompute_scale_factor=True),
107 |                                     torch.nn.functional.interpolate(x, scale_factor=0.5, recompute_scale_factor=True),
108 |                                     torch.nn.functional.interpolate(x, scale_factor=0.5, recompute_scale_factor=True),
109 |                                     torch.nn.functional.interpolate(x, scale_factor=0.5, recompute_scale_factor=True)], 1))
110 |         #print(" ======== >>>>>>>> Focus  : ", y.shape)
111 |         return y
112 | 
113 | '''
114 | UserWarning: 
115 | The default behavior for interpolate/upsample with float scale_factor changed in 1.6.0 to align with other frameworks/libraries, 
116 | 
117 | and uses scale_factor directly, instead of relying on the computed output size. 
118 | 
119 | If you wish to keep the old behavior, please set recompute_scale_factor=True. 
120 | 
121 | See the documentation of nn.Upsample for details. 
122 |   
123 | warnings.warn("The default behavior for interpolate/upsample with float scale_factor changed "
124 | '''
125 | 
126 | class Concat(nn.Module):
127 |     # Concatenate a list of tensors along dimension
128 |     def __init__(self, dimension=1):
129 |         super(Concat, self).__init__()
130 |         self.d = dimension
131 | 
132 |     def forward(self, x):
133 |         return torch.cat(x, self.d)
134 | 
135 | 
136 | class NMS(nn.Module):
137 |     # Non-Maximum Suppression (NMS) module
138 |     conf = 0.25  # confidence threshold
139 |     iou = 0.45  # IoU threshold
140 |     classes = None  # (optional list) filter by class
141 | 
142 |     def __init__(self):
143 |         super(NMS, self).__init__()
144 | 
145 |     def forward(self, x):
146 |         return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
147 | 
148 | 
149 | class autoShape(nn.Module):
150 |     # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
151 |     img_size = 640  # inference size (pixels)
152 |     conf = 0.25  # NMS confidence threshold
153 |     iou = 0.45  # NMS IoU threshold
154 |     classes = None  # (optional list) filter by class
155 | 
156 |     def __init__(self, model):
157 |         super(autoShape, self).__init__()
158 |         self.model = model.eval()
159 | 
160 |     def forward(self, imgs, size=640, augment=False, profile=False):
161 |         # supports inference from various sources. For height=720, width=1280, RGB images example inputs are:
162 |         #   opencv:     imgs = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(720,1280,3)
163 |         #   PIL:        imgs = Image.open('image.jpg')  # HWC x(720,1280,3)
164 |         #   numpy:      imgs = np.zeros((720,1280,3))  # HWC
165 |         #   torch:      imgs = torch.zeros(16,3,720,1280)  # BCHW
166 |         #   multiple:   imgs = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
167 | 
168 |         p = next(self.model.parameters())  # for device and type
169 |         if isinstance(imgs, torch.Tensor):  # torch
170 |             return self.model(imgs.to(p.device).type_as(p), augment, profile)  # inference
171 | 
172 |         # Pre-process
173 |         if not isinstance(imgs, list):
174 |             imgs = [imgs]
175 |         shape0, shape1 = [], []  # image and inference shapes
176 |         batch = range(len(imgs))  # batch size
177 |         for i in batch:
178 |             imgs[i] = np.array(imgs[i])  # to numpy
179 |             if imgs[i].shape[0] < 5:  # image in CHW
180 |                 imgs[i] = imgs[i].transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
181 |             imgs[i] = imgs[i][:, :, :3] if imgs[i].ndim == 3 else np.tile(imgs[i][:, :, None], 3)  # enforce 3ch input
182 |             s = imgs[i].shape[:2]  # HWC
183 |             shape0.append(s)  # image shape
184 |             g = (size / max(s))  # gain
185 |             shape1.append([y * g for y in s])
186 |         shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
187 |         x = [letterbox(imgs[i], new_shape=shape1, auto=False)[0] for i in batch]  # pad
188 |         x = np.stack(x, 0) if batch[-1] else x[0][None]  # stack
189 |         x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
190 |         x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
191 | 
192 |         # Inference
193 |         with torch.no_grad():
194 |             y = self.model(x, augment, profile)[0]  # forward
195 |         y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)  # NMS
196 | 
197 |         # Post-process
198 |         for i in batch:
199 |             if y[i] is not None:
200 |                 y[i][:, :4] = scale_coords(shape1, y[i][:, :4], shape0[i])
201 | 
202 |         return Detections(imgs, y, self.names)
203 | 
204 | 
205 | class Detections:
206 |     # detections class for YOLOv5 inference results
207 |     def __init__(self, imgs, pred, names=None):
208 |         super(Detections, self).__init__()
209 |         self.imgs = imgs  # list of images as numpy arrays
210 |         self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
211 |         self.names = names  # class names
212 |         self.xyxy = pred  # xyxy pixels
213 |         self.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixels
214 |         d = pred[0].device  # device
215 |         gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs]  # normalizations
216 |         self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalized
217 |         self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
218 |         self.n = len(self.pred)
219 | 
220 |     def display(self, pprint=False, show=False, save=False):
221 |         colors = color_list()
222 |         for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
223 |             str = f'Image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
224 |             if pred is not None:
225 |                 for c in pred[:, -1].unique():
226 |                     n = (pred[:, -1] == c).sum()  # detections per class
227 |                     str += f'{n} {self.names[int(c)]}s, '  # add to string
228 |                 if show or save:
229 |                     img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img  # from np
230 |                     for *box, conf, cls in pred:  # xyxy, confidence, class
231 |                         # str += '%s %.2f, ' % (names[int(cls)], conf)  # label
232 |                         ImageDraw.Draw(img).rectangle(box, width=4, outline=colors[int(cls) % 10])  # plot
233 |             if save:
234 |                 f = f'results{i}.jpg'
235 |                 str += f"saved to '{f}'"
236 |                 img.save(f)  # save
237 |             if show:
238 |                 img.show(f'Image {i}')  # show
239 |             if pprint:
240 |                 print(str)
241 | 
242 |     def print(self):
243 |         self.display(pprint=True)  # print results
244 | 
245 |     def show(self):
246 |         self.display(show=True)  # show results
247 | 
248 |     def save(self):
249 |         self.display(save=True)  # save results
250 | 
251 |     def __len__(self):
252 |         return self.n
253 | 
254 |     def tolist(self):
255 |         # return a list of Detections objects, i.e. 'for result in results.tolist():'
256 |         x = [Detections([self.imgs[i]], [self.pred[i]], self.names) for i in range(self.n)]
257 |         for d in x:
258 |             for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
259 |                 setattr(d, k, getattr(d, k)[0])  # pop out of list
260 |         return x
261 | 
262 | 
263 | class Flatten(nn.Module):
264 |     # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
265 |     @staticmethod
266 |     def forward(x):
267 |         return x.view(x.size(0), -1)
268 | 
269 | 
270 | class Classify(nn.Module):
271 |     # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
272 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
273 |         super(Classify, self).__init__()
274 |         self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
275 |         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g)  # to x(b,c2,1,1)
276 |         self.flat = Flatten()
277 | 
278 |     def forward(self, x):
279 |         z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
280 |         return self.flat(self.conv(z))  # flatten to x(b,c2)
281 | 


--------------------------------------------------------------------------------
/include/ncnn/allocator.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making ncnn available.
  2 | //
  3 | // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
  4 | //
  5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // https://opensource.org/licenses/BSD-3-Clause
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef NCNN_ALLOCATOR_H
 16 | #define NCNN_ALLOCATOR_H
 17 | 
 18 | #ifdef _WIN32
 19 | #define WIN32_LEAN_AND_MEAN
 20 | #include <windows.h>
 21 | #endif
 22 | 
 23 | #include "platform.h"
 24 | 
 25 | #include <stdlib.h>
 26 | 
 27 | #if NCNN_VULKAN
 28 | #include <vulkan/vulkan.h>
 29 | #endif // NCNN_VULKAN
 30 | 
 31 | #if __ANDROID_API__ >= 26
 32 | #include <android/hardware_buffer.h>
 33 | #endif // __ANDROID_API__ >= 26
 34 | 
 35 | namespace ncnn {
 36 | 
 37 | #if __AVX__
 38 | // the alignment of all the allocated buffers
 39 | #define MALLOC_ALIGN 256
 40 | #else
 41 | // the alignment of all the allocated buffers
 42 | #define MALLOC_ALIGN 16
 43 | #endif
 44 | 
 45 | // Aligns a pointer to the specified number of bytes
 46 | // ptr Aligned pointer
 47 | // n Alignment size that must be a power of two
 48 | template<typename _Tp>
 49 | static inline _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp))
 50 | {
 51 |     return (_Tp*)(((size_t)ptr + n - 1) & -n);
 52 | }
 53 | 
 54 | // Aligns a buffer size to the specified number of bytes
 55 | // The function returns the minimum number that is greater or equal to sz and is divisible by n
 56 | // sz Buffer size to align
 57 | // n Alignment size that must be a power of two
 58 | static inline size_t alignSize(size_t sz, int n)
 59 | {
 60 |     return (sz + n - 1) & -n;
 61 | }
 62 | 
 63 | static inline void* fastMalloc(size_t size)
 64 | {
 65 | #if _MSC_VER
 66 |     return _aligned_malloc(size, MALLOC_ALIGN);
 67 | #elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17)
 68 |     void* ptr = 0;
 69 |     if (posix_memalign(&ptr, MALLOC_ALIGN, size))
 70 |         ptr = 0;
 71 |     return ptr;
 72 | #elif __ANDROID__ && __ANDROID_API__ < 17
 73 |     return memalign(MALLOC_ALIGN, size);
 74 | #else
 75 |     unsigned char* udata = (unsigned char*)malloc(size + sizeof(void*) + MALLOC_ALIGN);
 76 |     if (!udata)
 77 |         return 0;
 78 |     unsigned char** adata = alignPtr((unsigned char**)udata + 1, MALLOC_ALIGN);
 79 |     adata[-1] = udata;
 80 |     return adata;
 81 | #endif
 82 | }
 83 | 
 84 | static inline void fastFree(void* ptr)
 85 | {
 86 |     if (ptr)
 87 |     {
 88 | #if _MSC_VER
 89 |         _aligned_free(ptr);
 90 | #elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17)
 91 |         free(ptr);
 92 | #elif __ANDROID__ && __ANDROID_API__ < 17
 93 |         free(ptr);
 94 | #else
 95 |         unsigned char* udata = ((unsigned char**)ptr)[-1];
 96 |         free(udata);
 97 | #endif
 98 |     }
 99 | }
100 | 
101 | #if NCNN_THREADS
102 | // exchange-add operation for atomic operations on reference counters
103 | #if defined __riscv && !defined __riscv_atomic
104 | // riscv target without A extension
105 | static inline int NCNN_XADD(int* addr, int delta)
106 | {
107 |     int tmp = *addr;
108 |     *addr += delta;
109 |     return tmp;
110 | }
111 | #elif defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32)
112 | // atomic increment on the linux version of the Intel(tm) compiler
113 | #define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast<void*>(reinterpret_cast<volatile void*>(addr)), delta)
114 | #elif defined __GNUC__
115 | #if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)
116 | #ifdef __ATOMIC_ACQ_REL
117 | #define NCNN_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
118 | #else
119 | #define NCNN_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4)
120 | #endif
121 | #else
122 | #if defined __ATOMIC_ACQ_REL && !defined __clang__
123 | // version for gcc >= 4.7
124 | #define NCNN_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
125 | #else
126 | #define NCNN_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta))
127 | #endif
128 | #endif
129 | #elif defined _MSC_VER && !defined RC_INVOKED
130 | #define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
131 | #else
132 | // thread-unsafe branch
133 | static inline int NCNN_XADD(int* addr, int delta)
134 | {
135 |     int tmp = *addr;
136 |     *addr += delta;
137 |     return tmp;
138 | }
139 | #endif
140 | #else  // NCNN_THREADS
141 | static inline int NCNN_XADD(int* addr, int delta)
142 | {
143 |     int tmp = *addr;
144 |     *addr += delta;
145 |     return tmp;
146 | }
147 | #endif // NCNN_THREADS
148 | 
149 | class Allocator
150 | {
151 | public:
152 |     virtual ~Allocator();
153 |     virtual void* fastMalloc(size_t size) = 0;
154 |     virtual void fastFree(void* ptr) = 0;
155 | };
156 | 
157 | class PoolAllocator : public Allocator
158 | {
159 | public:
160 |     PoolAllocator();
161 |     ~PoolAllocator();
162 | 
163 |     // ratio range 0 ~ 1
164 |     // default cr = 0.75
165 |     void set_size_compare_ratio(float scr);
166 | 
167 |     // release all budgets immediately
168 |     void clear();
169 | 
170 |     virtual void* fastMalloc(size_t size);
171 |     virtual void fastFree(void* ptr);
172 | 
173 | private:
174 |     Mutex budgets_lock;
175 |     Mutex payouts_lock;
176 |     unsigned int size_compare_ratio; // 0~256
177 |     std::list<std::pair<size_t, void*> > budgets;
178 |     std::list<std::pair<size_t, void*> > payouts;
179 | };
180 | 
181 | class UnlockedPoolAllocator : public Allocator
182 | {
183 | public:
184 |     UnlockedPoolAllocator();
185 |     ~UnlockedPoolAllocator();
186 | 
187 |     // ratio range 0 ~ 1
188 |     // default cr = 0.75
189 |     void set_size_compare_ratio(float scr);
190 | 
191 |     // release all budgets immediately
192 |     void clear();
193 | 
194 |     virtual void* fastMalloc(size_t size);
195 |     virtual void fastFree(void* ptr);
196 | 
197 | private:
198 |     unsigned int size_compare_ratio; // 0~256
199 |     std::list<std::pair<size_t, void*> > budgets;
200 |     std::list<std::pair<size_t, void*> > payouts;
201 | };
202 | 
203 | #if NCNN_VULKAN
204 | 
205 | class VulkanDevice;
206 | 
207 | class VkBufferMemory
208 | {
209 | public:
210 |     VkBuffer buffer;
211 | 
212 |     // the base offset assigned by allocator
213 |     size_t offset;
214 |     size_t capacity;
215 | 
216 |     VkDeviceMemory memory;
217 |     void* mapped_ptr;
218 | 
219 |     // buffer state, modified by command functions internally
220 |     mutable VkAccessFlags access_flags;
221 |     mutable VkPipelineStageFlags stage_flags;
222 | 
223 |     // initialize and modified by mat
224 |     int refcount;
225 | };
226 | 
227 | class VkImageMemory
228 | {
229 | public:
230 |     VkImage image;
231 |     VkImageView imageview;
232 | 
233 |     // underlying info assigned by allocator
234 |     int width;
235 |     int height;
236 |     int depth;
237 |     VkFormat format;
238 | 
239 |     VkDeviceMemory memory;
240 |     void* mapped_ptr;
241 | 
242 |     // the base offset assigned by allocator
243 |     size_t bind_offset;
244 |     size_t bind_capacity;
245 | 
246 |     // image state, modified by command functions internally
247 |     mutable VkAccessFlags access_flags;
248 |     mutable VkImageLayout image_layout;
249 |     mutable VkPipelineStageFlags stage_flags;
250 | 
251 |     // in-execution state, modified by command functions internally
252 |     mutable int command_refcount;
253 | 
254 |     // initialize and modified by mat
255 |     int refcount;
256 | };
257 | 
258 | class VkAllocator
259 | {
260 | public:
261 |     VkAllocator(const VulkanDevice* _vkdev);
262 |     virtual ~VkAllocator()
263 |     {
264 |         clear();
265 |     }
266 |     virtual void clear()
267 |     {
268 |     }
269 | 
270 |     virtual VkBufferMemory* fastMalloc(size_t size) = 0;
271 |     virtual void fastFree(VkBufferMemory* ptr) = 0;
272 |     virtual int flush(VkBufferMemory* ptr);
273 |     virtual int invalidate(VkBufferMemory* ptr);
274 | 
275 |     virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0;
276 |     virtual void fastFree(VkImageMemory* ptr) = 0;
277 | 
278 | public:
279 |     const VulkanDevice* vkdev;
280 |     uint32_t buffer_memory_type_index;
281 |     uint32_t image_memory_type_index;
282 |     bool mappable;
283 |     bool coherent;
284 | 
285 | protected:
286 |     VkBuffer create_buffer(size_t size, VkBufferUsageFlags usage);
287 |     VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index);
288 |     VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer);
289 | 
290 |     VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage);
291 |     VkImageView create_imageview(VkImage image, VkFormat format);
292 | };
293 | 
294 | class VkBlobAllocator : public VkAllocator
295 | {
296 | public:
297 |     VkBlobAllocator(const VulkanDevice* vkdev);
298 |     virtual ~VkBlobAllocator();
299 | 
300 | public:
301 |     // release all budgets immediately
302 |     virtual void clear();
303 | 
304 |     virtual VkBufferMemory* fastMalloc(size_t size);
305 |     virtual void fastFree(VkBufferMemory* ptr);
306 |     virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
307 |     virtual void fastFree(VkImageMemory* ptr);
308 | 
309 | protected:
310 |     size_t block_size;
311 |     size_t buffer_offset_alignment;
312 |     size_t bind_memory_offset_alignment;
313 |     std::vector<std::list<std::pair<size_t, size_t> > > buffer_budgets;
314 |     std::vector<VkBufferMemory*> buffer_blocks;
315 |     std::vector<std::list<std::pair<size_t, size_t> > > image_memory_budgets;
316 |     std::vector<VkDeviceMemory> image_memory_blocks;
317 | };
318 | 
319 | class VkWeightAllocator : public VkAllocator
320 | {
321 | public:
322 |     VkWeightAllocator(const VulkanDevice* vkdev);
323 |     virtual ~VkWeightAllocator();
324 | 
325 | public:
326 |     // release all blocks immediately
327 |     virtual void clear();
328 | 
329 | public:
330 |     virtual VkBufferMemory* fastMalloc(size_t size);
331 |     virtual void fastFree(VkBufferMemory* ptr);
332 |     virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
333 |     virtual void fastFree(VkImageMemory* ptr);
334 | 
335 | protected:
336 |     size_t block_size;
337 |     size_t buffer_offset_alignment;
338 |     size_t bind_memory_offset_alignment;
339 |     std::vector<size_t> buffer_block_free_spaces;
340 |     std::vector<VkBufferMemory*> buffer_blocks;
341 |     std::vector<VkBufferMemory*> dedicated_buffer_blocks;
342 |     std::vector<size_t> image_memory_block_free_spaces;
343 |     std::vector<VkDeviceMemory> image_memory_blocks;
344 |     std::vector<VkDeviceMemory> dedicated_image_memory_blocks;
345 | };
346 | 
347 | class VkStagingAllocator : public VkAllocator
348 | {
349 | public:
350 |     VkStagingAllocator(const VulkanDevice* vkdev);
351 |     virtual ~VkStagingAllocator();
352 | 
353 | public:
354 |     // ratio range 0 ~ 1
355 |     // default cr = 0.75
356 |     void set_size_compare_ratio(float scr);
357 | 
358 |     // release all budgets immediately
359 |     virtual void clear();
360 | 
361 |     virtual VkBufferMemory* fastMalloc(size_t size);
362 |     virtual void fastFree(VkBufferMemory* ptr);
363 |     virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
364 |     virtual void fastFree(VkImageMemory* ptr);
365 | 
366 | protected:
367 |     unsigned int size_compare_ratio; // 0~256
368 |     std::list<VkBufferMemory*> buffer_budgets;
369 | };
370 | 
371 | class VkWeightStagingAllocator : public VkAllocator
372 | {
373 | public:
374 |     VkWeightStagingAllocator(const VulkanDevice* vkdev);
375 |     virtual ~VkWeightStagingAllocator();
376 | 
377 | public:
378 |     virtual VkBufferMemory* fastMalloc(size_t size);
379 |     virtual void fastFree(VkBufferMemory* ptr);
380 |     virtual VkImageMemory* fastMalloc(int /*w*/, int /*h*/, int /*c*/, size_t /*elemsize*/, int /*elempack*/)
381 |     {
382 |         return 0;
383 |     }
384 |     virtual void fastFree(VkImageMemory* /*ptr*/)
385 |     {
386 |     }
387 | 
388 | protected:
389 | };
390 | 
391 | #if __ANDROID_API__ >= 26
392 | class ImportAndroidHardwareBufferPipeline;
393 | class VkAndroidHardwareBufferImageAllocator : public VkAllocator
394 | {
395 | public:
396 |     VkAndroidHardwareBufferImageAllocator(const VulkanDevice* _vkdev, AHardwareBuffer* _hb);
397 |     virtual ~VkAndroidHardwareBufferImageAllocator();
398 | 
399 | public:
400 |     virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
401 |     virtual void fastFree(VkImageMemory* ptr);
402 |     virtual VkBufferMemory* fastMalloc(size_t /*size*/)
403 |     {
404 |         return 0;
405 |     }
406 |     virtual void fastFree(VkBufferMemory* /*ptr*/)
407 |     {
408 |     }
409 | 
410 | public:
411 |     int init();
412 | 
413 |     int width() const;
414 |     int height() const;
415 |     uint64_t external_format() const;
416 | 
417 | public:
418 |     AHardwareBuffer* hb;
419 |     AHardwareBuffer_Desc bufferDesc;
420 |     VkAndroidHardwareBufferFormatPropertiesANDROID bufferFormatProperties;
421 |     VkAndroidHardwareBufferPropertiesANDROID bufferProperties;
422 |     VkSamplerYcbcrConversionKHR samplerYcbcrConversion;
423 | };
424 | #endif // __ANDROID_API__ >= 26
425 | 
426 | #endif // NCNN_VULKAN
427 | 
428 | } // namespace ncnn
429 | 
430 | #endif // NCNN_ALLOCATOR_H
431 | 


--------------------------------------------------------------------------------
/include/ncnn/benchmark.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making ncnn available.
 2 | //
 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
 4 | //
 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // https://opensource.org/licenses/BSD-3-Clause
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef NCNN_BENCHMARK_H
16 | #define NCNN_BENCHMARK_H
17 | 
18 | #include "layer.h"
19 | #include "mat.h"
20 | #include "platform.h"
21 | 
22 | namespace ncnn {
23 | 
24 | // get now timestamp in ms
25 | double get_current_time();
26 | 
27 | #if NCNN_BENCHMARK
28 | 
29 | void benchmark(const Layer* layer, double start, double end);
30 | void benchmark(const Layer* layer, const Mat& bottom_blob, Mat& top_blob, double start, double end);
31 | 
32 | #endif // NCNN_BENCHMARK
33 | 
34 | } // namespace ncnn
35 | 
36 | #endif // NCNN_BENCHMARK_H
37 | 


--------------------------------------------------------------------------------
/include/ncnn/blob.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making ncnn available.
 2 | //
 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
 4 | //
 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // https://opensource.org/licenses/BSD-3-Clause
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef NCNN_BLOB_H
16 | #define NCNN_BLOB_H
17 | 
18 | #include "mat.h"
19 | #include "platform.h"
20 | 
21 | namespace ncnn {
22 | 
23 | class Blob
24 | {
25 | public:
26 |     // empty
27 |     Blob();
28 | 
29 | public:
30 | #if NCNN_STRING
31 |     // blob name
32 |     std::string name;
33 | #endif // NCNN_STRING
34 |     // layer index which produce this blob as output
35 |     int producer;
36 |     // layer index which need this blob as input
37 |     std::vector<int> consumers;
38 |     // shape hint
39 |     Mat shape;
40 | };
41 | 
42 | } // namespace ncnn
43 | 
44 | #endif // NCNN_BLOB_H
45 | 


--------------------------------------------------------------------------------
/include/ncnn/c_api.h:
--------------------------------------------------------------------------------
  1 | /* Tencent is pleased to support the open source community by making ncnn available.
  2 |  *
  3 |  * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
  4 |  *
  5 |  * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6 |  * in compliance with the License. You may obtain a copy of the License at
  7 |  *
  8 |  * https://opensource.org/licenses/BSD-3-Clause
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software distributed
 11 |  * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 |  * CONDITIONS OF ANY KIND, either express or implied. See the License for the
 13 |  * specific language governing permissions and limitations under the License.
 14 |  */
 15 | 
 16 | #ifndef NCNN_C_API_H
 17 | #define NCNN_C_API_H
 18 | 
 19 | #include <stddef.h>
 20 | #include "platform.h"
 21 | 
 22 | #ifdef __cplusplus
 23 | extern "C" {
 24 | #endif
 25 | 
 26 | /* mat api */
 27 | typedef struct __ncnn_mat_t* ncnn_mat_t;
 28 | 
 29 | ncnn_mat_t ncnn_mat_create();
 30 | ncnn_mat_t ncnn_mat_create_1d(int w);
 31 | ncnn_mat_t ncnn_mat_create_2d(int w, int h);
 32 | ncnn_mat_t ncnn_mat_create_3d(int w, int h, int c);
 33 | ncnn_mat_t ncnn_mat_create_1d_packed(int w, size_t elemsize, int elempack);
 34 | ncnn_mat_t ncnn_mat_create_2d_packed(int w, int h, size_t elemsize, int elempack);
 35 | ncnn_mat_t ncnn_mat_create_3d_packed(int w, int h, int c, size_t elemsize, int elempack);
 36 | void ncnn_mat_destroy(ncnn_mat_t mat);
 37 | 
 38 | int ncnn_mat_get_dims(ncnn_mat_t mat);
 39 | int ncnn_mat_get_w(ncnn_mat_t mat);
 40 | int ncnn_mat_get_h(ncnn_mat_t mat);
 41 | int ncnn_mat_get_c(ncnn_mat_t mat);
 42 | size_t ncnn_mat_get_elemsize(ncnn_mat_t mat);
 43 | int ncnn_mat_get_elempack(ncnn_mat_t mat);
 44 | size_t ncnn_mat_get_cstep(ncnn_mat_t mat);
 45 | void* ncnn_mat_get_data(ncnn_mat_t mat);
 46 | 
 47 | #if NCNN_PIXEL
 48 | 
 49 | /* mat pixel api */
 50 | #define NCNN_MAT_PIXEL_RGB       1
 51 | #define NCNN_MAT_PIXEL_BGR       2
 52 | #define NCNN_MAT_PIXEL_GRAY      3
 53 | #define NCNN_MAT_PIXEL_RGBA      4
 54 | #define NCNN_MAT_PIXEL_BGRA      5
 55 | #define NCNN_MAT_PIXEL_X2Y(X, Y) (X | (Y << 16))
 56 | ncnn_mat_t ncnn_mat_from_pixels(const unsigned char* pixels, int type, int w, int h, int stride);
 57 | ncnn_mat_t ncnn_mat_from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height);
 58 | void ncnn_mat_to_pixels(ncnn_mat_t mat, unsigned char* pixels, int type, int stride);
 59 | void ncnn_mat_to_pixels_resize(ncnn_mat_t mat, unsigned char* pixels, int type, int target_width, int target_height, int target_stride);
 60 | 
 61 | #endif // NCNN_PIXEL
 62 | 
 63 | void ncnn_mat_substract_mean_normalize(ncnn_mat_t mat, const float* mean_vals, const float* norm_vals);
 64 | 
 65 | /* option api */
 66 | typedef struct __ncnn_option_t* ncnn_option_t;
 67 | 
 68 | ncnn_option_t ncnn_option_create();
 69 | void ncnn_option_destroy(ncnn_option_t opt);
 70 | 
 71 | int ncnn_option_get_num_threads(ncnn_option_t opt);
 72 | void ncnn_option_set_num_threads(ncnn_option_t opt, int num_threads);
 73 | 
 74 | int ncnn_option_get_use_vulkan_compute(ncnn_option_t opt);
 75 | void ncnn_option_set_use_vulkan_compute(ncnn_option_t opt, int use_vulkan_compute);
 76 | 
 77 | /* blob api */
 78 | typedef struct __ncnn_blob_t* ncnn_blob_t;
 79 | 
 80 | const char* ncnn_blob_get_name(ncnn_blob_t blob);
 81 | 
 82 | int ncnn_blob_get_producer(ncnn_blob_t blob);
 83 | int ncnn_blob_get_consumer_count(ncnn_blob_t blob);
 84 | int ncnn_blob_get_consumer(ncnn_blob_t blob, int i);
 85 | 
 86 | void ncnn_blob_get_shape(ncnn_blob_t blob, int* dims, int* w, int* h, int* c);
 87 | 
 88 | /* layer api */
 89 | typedef struct __ncnn_layer_t* ncnn_layer_t;
 90 | 
 91 | const char* ncnn_layer_get_name(ncnn_layer_t layer);
 92 | 
 93 | int ncnn_layer_get_typeindex(ncnn_layer_t layer);
 94 | const char* ncnn_layer_get_type(ncnn_layer_t layer);
 95 | 
 96 | int ncnn_layer_get_bottom_count(ncnn_layer_t layer);
 97 | int ncnn_layer_get_bottom(ncnn_layer_t layer, int i);
 98 | int ncnn_layer_get_top_count(ncnn_layer_t layer);
 99 | int ncnn_layer_get_top(ncnn_layer_t layer, int i);
100 | 
101 | void ncnn_blob_get_bottom_shape(ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c);
102 | void ncnn_blob_get_top_shape(ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c);
103 | 
104 | /* net api */
105 | typedef struct __ncnn_net_t* ncnn_net_t;
106 | 
107 | ncnn_net_t ncnn_net_create();
108 | void ncnn_net_destroy(ncnn_net_t net);
109 | 
110 | void ncnn_net_set_option(ncnn_net_t net, ncnn_option_t opt);
111 | 
112 | int ncnn_net_load_param(ncnn_net_t net, const char* path);
113 | int ncnn_net_load_model(ncnn_net_t net, const char* path);
114 | 
115 | int ncnn_net_get_layer_count(ncnn_net_t net);
116 | ncnn_layer_t ncnn_net_get_layer(ncnn_net_t net, int i);
117 | int ncnn_net_get_blob_count(ncnn_net_t net);
118 | ncnn_blob_t ncnn_net_get_blob(ncnn_net_t net, int i);
119 | 
120 | /* extractor api */
121 | typedef struct __ncnn_extractor_t* ncnn_extractor_t;
122 | 
123 | ncnn_extractor_t ncnn_extractor_create(ncnn_net_t net);
124 | void ncnn_extractor_destroy(ncnn_extractor_t ex);
125 | 
126 | void ncnn_extractor_set_option(ncnn_extractor_t ex, ncnn_option_t opt);
127 | 
128 | int ncnn_extractor_input(ncnn_extractor_t ex, const char* name, ncnn_mat_t mat);
129 | int ncnn_extractor_extract(ncnn_extractor_t ex, const char* name, ncnn_mat_t* mat);
130 | 
131 | #ifdef __cplusplus
132 | } /* extern "C" */
133 | #endif
134 | 
135 | #endif // NCNN_C_API_H
136 | 


--------------------------------------------------------------------------------
/include/ncnn/command.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making ncnn available.
  2 | //
  3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
  4 | //
  5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // https://opensource.org/licenses/BSD-3-Clause
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef NCNN_COMMAND_H
 16 | #define NCNN_COMMAND_H
 17 | 
 18 | #include "platform.h"
 19 | 
 20 | #if NCNN_VULKAN
 21 | 
 22 | #include "mat.h"
 23 | 
 24 | #include <vulkan/vulkan.h>
 25 | 
 26 | namespace ncnn {
 27 | 
 28 | class Pipeline;
 29 | class VkCompute
 30 | {
 31 | public:
 32 |     VkCompute(const VulkanDevice* vkdev);
 33 |     virtual ~VkCompute();
 34 | 
 35 | public:
 36 |     void record_upload(const Mat& src, VkMat& dst, const Option& opt);
 37 | 
 38 |     void record_upload(const Mat& src, VkImageMat& dst, const Option& opt);
 39 | 
 40 |     void record_download(const VkMat& src, Mat& dst, const Option& opt);
 41 | 
 42 |     void record_download(const VkImageMat& src, Mat& dst, const Option& opt);
 43 | 
 44 |     void record_buffer_to_image(const VkMat& src, VkImageMat& dst, const Option& opt);
 45 | 
 46 |     void record_image_to_buffer(const VkImageMat& src, VkMat& dst, const Option& opt);
 47 | 
 48 |     void record_clone(const Mat& src, VkMat& dst, const Option& opt);
 49 | 
 50 |     void record_clone(const Mat& src, VkImageMat& dst, const Option& opt);
 51 | 
 52 |     void record_clone(const VkMat& src, Mat& dst, const Option& opt);
 53 | 
 54 |     void record_clone(const VkImageMat& src, Mat& dst, const Option& opt);
 55 | 
 56 |     void record_clone(const VkMat& src, VkMat& dst, const Option& opt);
 57 | 
 58 |     void record_clone(const VkImageMat& src, VkImageMat& dst, const Option& opt);
 59 | 
 60 |     void record_clone(const VkMat& src, VkImageMat& dst, const Option& opt);
 61 | 
 62 |     void record_clone(const VkImageMat& src, VkMat& dst, const Option& opt);
 63 | 
 64 |     void record_pipeline(const Pipeline* pipeline, const std::vector<VkMat>& bindings, const std::vector<vk_constant_type>& constants, const VkMat& dispatcher);
 65 | 
 66 |     void record_pipeline(const Pipeline* pipeline, const std::vector<VkImageMat>& bindings, const std::vector<vk_constant_type>& constants, const VkImageMat& dispatcher);
 67 | 
 68 |     void record_pipeline(const Pipeline* pipeline, const std::vector<VkMat>& buffer_bindings, const std::vector<VkImageMat>& image_bindings, const std::vector<vk_constant_type>& constants, const VkMat& dispatcher);
 69 |     void record_pipeline(const Pipeline* pipeline, const std::vector<VkMat>& buffer_bindings, const std::vector<VkImageMat>& image_bindings, const std::vector<vk_constant_type>& constants, const VkImageMat& dispatcher);
 70 |     void record_pipeline(const Pipeline* pipeline, const std::vector<VkMat>& buffer_bindings, const std::vector<VkImageMat>& image_bindings, const std::vector<vk_constant_type>& constants, const Mat& dispatcher);
 71 | 
 72 | #if NCNN_BENCHMARK
 73 |     void record_write_timestamp(uint32_t query);
 74 | #endif // NCNN_BENCHMARK
 75 | 
 76 | #if __ANDROID_API__ >= 26
 77 |     void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkMat& dst);
 78 | 
 79 |     void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkImageMat& dst);
 80 | #endif // __ANDROID_API__ >= 26
 81 | 
 82 |     int submit_and_wait();
 83 | 
 84 |     int reset();
 85 | 
 86 | #if NCNN_BENCHMARK
 87 |     int create_query_pool(uint32_t query_count);
 88 | 
 89 |     int get_query_pool_results(uint32_t first_query, uint32_t query_count, std::vector<uint64_t>& results);
 90 | #endif // NCNN_BENCHMARK
 91 | 
 92 | protected:
 93 |     int init();
 94 |     int begin_command_buffer();
 95 |     int end_command_buffer();
 96 | 
 97 | protected:
 98 |     const VulkanDevice* vkdev;
 99 | 
100 |     VkCommandPool compute_command_pool;
101 | 
102 |     VkCommandBuffer compute_command_buffer;
103 | 
104 |     VkFence compute_command_fence;
105 | 
106 |     std::vector<VkMat> upload_staging_buffers;
107 |     std::vector<VkMat> download_post_buffers;
108 |     std::vector<Mat> download_post_mats_fp16;
109 |     std::vector<Mat> download_post_mats;
110 | 
111 |     std::vector<VkImageMemory*> image_blocks_to_destroy;
112 | 
113 |     // the good-old path for device without VK_KHR_push_descriptor
114 |     std::vector<VkDescriptorPool> descriptor_pools;
115 |     std::vector<VkDescriptorSet> descriptorsets;
116 | 
117 |     struct record
118 |     {
119 |         enum
120 |         {
121 |             TYPE_copy_buffer,
122 |             TYPE_copy_image,
123 |             TYPE_copy_buffer_to_image,
124 |             TYPE_copy_image_to_buffer,
125 |             TYPE_bind_pipeline,
126 |             TYPE_bind_descriptorsets,
127 |             TYPE_push_constants,
128 |             TYPE_dispatch,
129 |             TYPE_memory_barrers,
130 |             TYPE_buffer_barrers,
131 |             TYPE_image_barrers,
132 | 
133 | #if NCNN_BENCHMARK
134 |             TYPE_write_timestamp,
135 | #endif // NCNN_BENCHMARK
136 | 
137 |             TYPE_post_download,
138 |             TYPE_post_cast_float16_to_float32,
139 |         };
140 | 
141 |         int type;
142 |         VkCommandBuffer command_buffer;
143 | 
144 |         union
145 |         {
146 |             struct
147 |             {
148 |                 VkBuffer src;
149 |                 VkBuffer dst;
150 |                 uint32_t region_count;
151 |                 const VkBufferCopy* regions;
152 |             } copy_buffer;
153 |             struct
154 |             {
155 |                 VkImage src;
156 |                 VkImageLayout src_layout;
157 |                 VkImage dst;
158 |                 VkImageLayout dst_layout;
159 |                 uint32_t region_count;
160 |                 const VkImageCopy* regions;
161 |             } copy_image;
162 |             struct
163 |             {
164 |                 VkBuffer src;
165 |                 VkImage dst;
166 |                 VkImageLayout layout;
167 |                 uint32_t region_count;
168 |                 const VkBufferImageCopy* regions;
169 |             } copy_buffer_to_image;
170 |             struct
171 |             {
172 |                 VkImage src;
173 |                 VkImageLayout layout;
174 |                 VkBuffer dst;
175 |                 uint32_t region_count;
176 |                 const VkBufferImageCopy* regions;
177 |             } copy_image_to_buffer;
178 | 
179 |             struct
180 |             {
181 |                 VkPipelineBindPoint bind_point;
182 |                 VkPipeline pipeline;
183 |             } bind_pipeline;
184 |             struct
185 |             {
186 |                 VkPipelineBindPoint bind_point;
187 |                 VkPipelineLayout pipeline_layout;
188 |                 uint32_t descriptorset_count;
189 |                 uint32_t descriptorset_offset;
190 |             } bind_descriptorsets;
191 |             struct
192 |             {
193 |                 VkPipelineLayout pipeline_layout;
194 |                 VkShaderStageFlags stage_flags;
195 |                 uint32_t size;
196 |                 const void* values;
197 |             } push_constants;
198 | 
199 |             struct
200 |             {
201 |                 uint32_t group_count_x;
202 |                 uint32_t group_count_y;
203 |                 uint32_t group_count_z;
204 |             } dispatch;
205 | 
206 |             struct
207 |             {
208 |                 VkPipelineStageFlags src_stage;
209 |                 VkPipelineStageFlags dst_stage;
210 |                 uint32_t barrier_count;
211 |                 const VkMemoryBarrier* barriers;
212 |             } memory_barrers;
213 |             struct
214 |             {
215 |                 VkPipelineStageFlags src_stage;
216 |                 VkPipelineStageFlags dst_stage;
217 |                 uint32_t barrier_count;
218 |                 const VkBufferMemoryBarrier* barriers;
219 |             } buffer_barrers;
220 |             struct
221 |             {
222 |                 VkPipelineStageFlags src_stage;
223 |                 VkPipelineStageFlags dst_stage;
224 |                 uint32_t barrier_count;
225 |                 const VkImageMemoryBarrier* barriers;
226 |             } image_barrers;
227 | 
228 | #if NCNN_BENCHMARK
229 |             struct
230 |             {
231 |                 uint32_t query;
232 |             } write_timestamp;
233 | #endif // NCNN_BENCHMARK
234 | 
235 |             struct
236 |             {
237 |                 uint32_t download_post_buffer_mat_offset;
238 |                 uint32_t download_post_mat_fp16_offset;
239 |             } post_download;
240 |             struct
241 |             {
242 |                 uint32_t download_post_mat_fp16_offset;
243 |                 uint32_t download_post_mat_offset;
244 |             } post_cast_float16_to_float32;
245 |         };
246 |     };
247 | 
248 |     std::vector<record> delayed_records;
249 | 
250 | #if NCNN_BENCHMARK
251 |     uint32_t query_count;
252 |     VkQueryPool query_pool;
253 | #endif // NCNN_BENCHMARK
254 | };
255 | 
256 | class VkTransfer
257 | {
258 | public:
259 |     VkTransfer(const VulkanDevice* vkdev);
260 |     ~VkTransfer();
261 | 
262 | public:
263 |     void record_upload(const Mat& src, VkMat& dst, const Option& opt, bool flatten = true);
264 | 
265 |     void record_upload(const Mat& src, VkImageMat& dst, const Option& opt);
266 | 
267 |     int submit_and_wait();
268 | 
269 | protected:
270 |     int init();
271 |     int begin_command_buffer();
272 |     int end_command_buffer();
273 | 
274 | protected:
275 |     const VulkanDevice* vkdev;
276 | 
277 |     VkCommandPool compute_command_pool;
278 |     VkCommandPool transfer_command_pool;
279 | 
280 |     VkCommandBuffer upload_command_buffer;
281 |     VkCommandBuffer compute_command_buffer;
282 | 
283 |     VkSemaphore upload_compute_semaphore;
284 | 
285 |     VkFence upload_command_fence;
286 |     VkFence compute_command_fence;
287 | 
288 |     std::vector<VkMat> upload_staging_buffers;
289 | };
290 | 
291 | } // namespace ncnn
292 | 
293 | #endif // NCNN_VULKAN
294 | 
295 | #endif // NCNN_COMMAND_H
296 | 


--------------------------------------------------------------------------------
/include/ncnn/cpu.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making ncnn available.
 2 | //
 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
 4 | //
 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // https://opensource.org/licenses/BSD-3-Clause
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef NCNN_CPU_H
16 | #define NCNN_CPU_H
17 | 
18 | #include <stddef.h>
19 | 
20 | #if defined __ANDROID__ || defined __linux__
21 | #include <sched.h> // cpu_set_t
22 | #endif
23 | 
24 | namespace ncnn {
25 | 
26 | class CpuSet
27 | {
28 | public:
29 |     CpuSet();
30 |     void enable(int cpu);
31 |     void disable(int cpu);
32 |     void disable_all();
33 |     bool is_enabled(int cpu) const;
34 |     int num_enabled() const;
35 | 
36 | public:
37 | #if defined __ANDROID__ || defined __linux__
38 |     cpu_set_t cpu_set;
39 | #endif
40 | #if __APPLE__
41 |     unsigned int policy;
42 | #endif
43 | };
44 | 
45 | // test optional cpu features
46 | // neon = armv7 neon or aarch64 asimd
47 | int cpu_support_arm_neon();
48 | // vfpv4 = armv7 fp16 + fma
49 | int cpu_support_arm_vfpv4();
50 | // asimdhp = aarch64 asimd half precision
51 | int cpu_support_arm_asimdhp();
52 | 
53 | // avx2 = x86_64 avx2 + fma + f16c
54 | int cpu_support_x86_avx2();
55 | 
56 | // cpu info
57 | int get_cpu_count();
58 | int get_little_cpu_count();
59 | int get_big_cpu_count();
60 | 
61 | // bind all threads on little clusters if powersave enabled
62 | // affacts HMP arch cpu like ARM big.LITTLE
63 | // only implemented on android at the moment
64 | // switching powersave is expensive and not thread-safe
65 | // 0 = all cores enabled(default)
66 | // 1 = only little clusters enabled
67 | // 2 = only big clusters enabled
68 | // return 0 if success for setter function
69 | int get_cpu_powersave();
70 | int set_cpu_powersave(int powersave);
71 | 
72 | // convenient wrapper
73 | const CpuSet& get_cpu_thread_affinity_mask(int powersave);
74 | 
75 | // set explicit thread affinity
76 | int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask);
77 | 
78 | // misc function wrapper for openmp routines
79 | int get_omp_num_threads();
80 | void set_omp_num_threads(int num_threads);
81 | 
82 | int get_omp_dynamic();
83 | void set_omp_dynamic(int dynamic);
84 | 
85 | int get_omp_thread_num();
86 | 
87 | int get_kmp_blocktime();
88 | void set_kmp_blocktime(int time_ms);
89 | 
90 | } // namespace ncnn
91 | 
92 | #endif // NCNN_CPU_H
93 | 


--------------------------------------------------------------------------------
/include/ncnn/datareader.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making ncnn available.
 2 | //
 3 | // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
 4 | //
 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // https://opensource.org/licenses/BSD-3-Clause
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef NCNN_DATAREADER_H
16 | #define NCNN_DATAREADER_H
17 | 
18 | #include "platform.h"
19 | #if NCNN_STDIO || NCNN_STRING
20 | #include <stdio.h>
21 | #endif
22 | 
23 | #if __ANDROID_API__ >= 9
24 | #include <android/asset_manager.h>
25 | #endif
26 | 
27 | namespace ncnn {
28 | 
29 | // data read wrapper
30 | class DataReader
31 | {
32 | public:
33 |     virtual ~DataReader();
34 | 
35 | #if NCNN_STRING
36 |     // parse plain param text
37 |     // return 1 if scan success
38 |     virtual int scan(const char* format, void* p) const;
39 | #endif // NCNN_STRING
40 | 
41 |     // read binary param and model data
42 |     // return bytes read
43 |     virtual size_t read(void* buf, size_t size) const;
44 | };
45 | 
46 | #if NCNN_STDIO
47 | class DataReaderFromStdio : public DataReader
48 | {
49 | public:
50 |     DataReaderFromStdio(FILE* fp);
51 | 
52 | #if NCNN_STRING
53 |     virtual int scan(const char* format, void* p) const;
54 | #endif // NCNN_STRING
55 |     virtual size_t read(void* buf, size_t size) const;
56 | 
57 | protected:
58 |     FILE* fp;
59 | };
60 | #endif // NCNN_STDIO
61 | 
62 | class DataReaderFromMemory : public DataReader
63 | {
64 | public:
65 |     DataReaderFromMemory(const unsigned char*& mem);
66 | 
67 | #if NCNN_STRING
68 |     virtual int scan(const char* format, void* p) const;
69 | #endif // NCNN_STRING
70 |     virtual size_t read(void* buf, size_t size) const;
71 | 
72 | protected:
73 |     const unsigned char*& mem;
74 | };
75 | 
76 | #if __ANDROID_API__ >= 9
77 | class DataReaderFromAndroidAsset : public DataReader
78 | {
79 | public:
80 |     DataReaderFromAndroidAsset(AAsset* asset);
81 | 
82 | #if NCNN_STRING
83 |     virtual int scan(const char* format, void* p) const;
84 | #endif // NCNN_STRING
85 |     virtual size_t read(void* buf, size_t size) const;
86 | 
87 | protected:
88 |     AAsset* asset;
89 |     mutable const unsigned char* mem;
90 | };
91 | #endif // __ANDROID_API__ >= 9
92 | 
93 | } // namespace ncnn
94 | 
95 | #endif // NCNN_DATAREADER_H
96 | 


--------------------------------------------------------------------------------
/include/ncnn/gpu.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making ncnn available.
  2 | //
  3 | // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
  4 | //
  5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // https://opensource.org/licenses/BSD-3-Clause
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef NCNN_GPU_H
 16 | #define NCNN_GPU_H
 17 | 
 18 | #include "platform.h"
 19 | 
 20 | #if NCNN_VULKAN
 21 | 
 22 | #include "mat.h"
 23 | 
 24 | #include <vulkan/vulkan.h>
 25 | 
 26 | namespace ncnn {
 27 | 
 28 | // instance
 29 | int create_gpu_instance();
 30 | void destroy_gpu_instance();
 31 | 
 32 | // instance extension capability
 33 | extern int support_VK_KHR_external_memory_capabilities;
 34 | extern int support_VK_KHR_get_physical_device_properties2;
 35 | extern int support_VK_KHR_get_surface_capabilities2;
 36 | extern int support_VK_KHR_surface;
 37 | extern int support_VK_EXT_debug_utils;
 38 | #if __ANDROID_API__ >= 26
 39 | extern int support_VK_KHR_android_surface;
 40 | #endif // __ANDROID_API__ >= 26
 41 | 
 42 | // VK_KHR_external_memory_capabilities
 43 | extern PFN_vkGetPhysicalDeviceExternalBufferPropertiesKHR vkGetPhysicalDeviceExternalBufferPropertiesKHR;
 44 | 
 45 | // VK_KHR_get_physical_device_properties2
 46 | extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR;
 47 | extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR;
 48 | extern PFN_vkGetPhysicalDeviceFormatProperties2KHR vkGetPhysicalDeviceFormatProperties2KHR;
 49 | extern PFN_vkGetPhysicalDeviceImageFormatProperties2KHR vkGetPhysicalDeviceImageFormatProperties2KHR;
 50 | extern PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR vkGetPhysicalDeviceQueueFamilyProperties2KHR;
 51 | extern PFN_vkGetPhysicalDeviceMemoryProperties2KHR vkGetPhysicalDeviceMemoryProperties2KHR;
 52 | extern PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR vkGetPhysicalDeviceSparseImageFormatProperties2KHR;
 53 | 
 54 | // VK_KHR_get_surface_capabilities2
 55 | extern PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR vkGetPhysicalDeviceSurfaceCapabilities2KHR;
 56 | extern PFN_vkGetPhysicalDeviceSurfaceFormats2KHR vkGetPhysicalDeviceSurfaceFormats2KHR;
 57 | 
 58 | // VK_KHR_surface
 59 | extern PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR;
 60 | extern PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR;
 61 | extern PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR;
 62 | extern PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR;
 63 | extern PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR;
 64 | 
 65 | #if __ANDROID_API__ >= 26
 66 | // VK_KHR_android_surface
 67 | extern PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR;
 68 | #endif // __ANDROID_API__ >= 26
 69 | 
 70 | // get info
 71 | int get_gpu_count();
 72 | int get_default_gpu_index();
 73 | 
 74 | class GpuInfo
 75 | {
 76 | public:
 77 |     // vulkan physical device
 78 |     VkPhysicalDevice physical_device;
 79 | 
 80 |     // memory properties
 81 |     VkPhysicalDeviceMemoryProperties physicalDeviceMemoryProperties;
 82 | 
 83 |     // info
 84 |     uint32_t api_version;
 85 |     uint32_t driver_version;
 86 |     uint32_t vendor_id;
 87 |     uint32_t device_id;
 88 |     std::string device_name;
 89 |     uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
 90 | 
 91 |     // 0 = discrete gpu
 92 |     // 1 = integrated gpu
 93 |     // 2 = virtual gpu
 94 |     // 3 = cpu
 95 |     int type;
 96 | 
 97 |     // hardware limit
 98 |     uint32_t max_shared_memory_size;
 99 |     uint32_t max_workgroup_count[3];
100 |     uint32_t max_workgroup_invocations;
101 |     uint32_t max_workgroup_size[3];
102 |     size_t memory_map_alignment;
103 |     size_t buffer_offset_alignment;
104 |     size_t non_coherent_atom_size;
105 |     size_t buffer_image_granularity;
106 |     uint32_t max_image_dimension_1d;
107 |     uint32_t max_image_dimension_2d;
108 |     uint32_t max_image_dimension_3d;
109 |     float timestamp_period;
110 | 
111 |     // runtime
112 |     uint32_t compute_queue_family_index;
113 |     uint32_t graphics_queue_family_index;
114 |     uint32_t transfer_queue_family_index;
115 | 
116 |     uint32_t compute_queue_count;
117 |     uint32_t graphics_queue_count;
118 |     uint32_t transfer_queue_count;
119 | 
120 |     // property
121 |     bool unified_compute_transfer_queue;
122 | 
123 |     // subgroup
124 |     uint32_t subgroup_size;
125 |     bool support_subgroup_basic;
126 |     bool support_subgroup_vote;
127 |     bool support_subgroup_ballot;
128 |     bool support_subgroup_shuffle;
129 | 
130 |     // bug is not feature
131 |     bool bug_storage_buffer_no_l1;
132 |     bool bug_corrupted_online_pipeline_cache;
133 | 
134 |     // but sometimes bug is a feature
135 |     bool bug_implicit_fp16_arithmetic;
136 | 
137 |     // fp16 and int8 feature
138 |     bool support_fp16_packed;
139 |     bool support_fp16_storage;
140 |     bool support_fp16_arithmetic;
141 |     bool support_int8_storage;
142 |     bool support_int8_arithmetic;
143 | 
144 |     // ycbcr conversion feature
145 |     bool support_ycbcr_conversion;
146 | 
147 |     // extension capability
148 |     int support_VK_KHR_8bit_storage;
149 |     int support_VK_KHR_16bit_storage;
150 |     int support_VK_KHR_bind_memory2;
151 |     int support_VK_KHR_dedicated_allocation;
152 |     int support_VK_KHR_descriptor_update_template;
153 |     int support_VK_KHR_external_memory;
154 |     int support_VK_KHR_get_memory_requirements2;
155 |     int support_VK_KHR_maintenance1;
156 |     int support_VK_KHR_push_descriptor;
157 |     int support_VK_KHR_sampler_ycbcr_conversion;
158 |     int support_VK_KHR_shader_float16_int8;
159 |     int support_VK_KHR_shader_float_controls;
160 |     int support_VK_KHR_storage_buffer_storage_class;
161 |     int support_VK_KHR_swapchain;
162 |     int support_VK_EXT_memory_budget;
163 |     int support_VK_EXT_queue_family_foreign;
164 | #if __ANDROID_API__ >= 26
165 |     int support_VK_ANDROID_external_memory_android_hardware_buffer;
166 | #endif // __ANDROID_API__ >= 26
167 | };
168 | 
169 | const GpuInfo& get_gpu_info(int device_index = get_default_gpu_index());
170 | 
171 | class VkAllocator;
172 | class VkCompute;
173 | class Layer;
174 | class Packing_vulkan;
175 | class Option;
176 | class PipelineCache;
177 | class VulkanDevice
178 | {
179 | public:
180 |     VulkanDevice(int device_index = get_default_gpu_index());
181 |     ~VulkanDevice();
182 | 
183 |     const GpuInfo& info;
184 | 
185 |     VkDevice vkdevice() const
186 |     {
187 |         return device;
188 |     }
189 | 
190 |     VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size) const;
191 | 
192 |     // with fixed workgroup size
193 |     VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z) const;
194 | 
195 |     // helper for creating pipeline
196 |     int create_descriptorset_layout(int binding_count, const int* binding_types, VkDescriptorSetLayout* descriptorset_layout) const;
197 |     int create_pipeline_layout(int push_constant_count, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout* pipeline_layout) const;
198 |     int create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector<vk_specialization_type>& specializations, VkPipeline* pipeline) const;
199 |     int create_descriptor_update_template(int binding_count, const int* binding_types, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout pipeline_layout, VkDescriptorUpdateTemplateKHR* descriptor_update_template) const;
200 | 
201 |     uint32_t find_memory_index(uint32_t memory_type_bits, VkFlags required, VkFlags preferred, VkFlags preferred_not) const;
202 |     bool is_mappable(uint32_t memory_type_index) const;
203 |     bool is_coherent(uint32_t memory_type_index) const;
204 | 
205 |     VkQueue acquire_queue(uint32_t queue_family_index) const;
206 |     void reclaim_queue(uint32_t queue_family_index, VkQueue queue) const;
207 | 
208 |     // allocator on this device
209 |     VkAllocator* acquire_blob_allocator() const;
210 |     void reclaim_blob_allocator(VkAllocator* allocator) const;
211 | 
212 |     VkAllocator* acquire_staging_allocator() const;
213 |     void reclaim_staging_allocator(VkAllocator* allocator) const;
214 | 
215 |     // immutable sampler for texelfetch
216 |     const VkSampler* immutable_texelfetch_sampler() const;
217 | 
218 |     // dummy buffer image
219 |     VkMat get_dummy_buffer() const;
220 |     VkImageMat get_dummy_image() const;
221 | 
222 |     // pipeline cache on this device
223 |     const PipelineCache* get_pipeline_cache() const;
224 | 
225 |     // test image allocation
226 |     bool shape_support_image_storage(const Mat& shape) const;
227 | 
228 |     // current gpu heap memory budget in MB
229 |     uint32_t get_heap_budget() const;
230 | 
231 |     // utility operator
232 |     void convert_packing(const VkMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
233 |     void convert_packing(const VkImageMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
234 |     void convert_packing(const VkMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
235 |     void convert_packing(const VkImageMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
236 | 
237 |     // VK_KHR_bind_memory2
238 |     PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR;
239 |     PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR;
240 | 
241 |     // VK_KHR_descriptor_update_template
242 |     PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
243 |     PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
244 |     PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR;
245 | 
246 |     // VK_KHR_get_memory_requirements2
247 |     PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR;
248 |     PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR;
249 |     PFN_vkGetImageSparseMemoryRequirements2KHR vkGetImageSparseMemoryRequirements2KHR;
250 | 
251 |     // VK_KHR_maintenance1
252 |     PFN_vkTrimCommandPoolKHR vkTrimCommandPoolKHR;
253 | 
254 |     // VK_KHR_push_descriptor
255 |     PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR;
256 |     PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR;
257 | 
258 |     // VK_KHR_sampler_ycbcr_conversion
259 |     PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR;
260 |     PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR;
261 | 
262 |     // VK_KHR_swapchain
263 |     PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR;
264 |     PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR;
265 |     PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR;
266 |     PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR;
267 |     PFN_vkQueuePresentKHR vkQueuePresentKHR;
268 | 
269 | #if __ANDROID_API__ >= 26
270 |     // VK_ANDROID_external_memory_android_hardware_buffer
271 |     PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID;
272 |     PFN_vkGetMemoryAndroidHardwareBufferANDROID vkGetMemoryAndroidHardwareBufferANDROID;
273 | #endif // __ANDROID_API__ >= 26
274 | 
275 | protected:
276 |     // device extension
277 |     int init_device_extension();
278 | 
279 |     // dummy buffer and image
280 |     int create_dummy_buffer_image();
281 |     void destroy_dummy_buffer_image();
282 | 
283 |     // utility operator
284 |     const ncnn::Packing_vulkan* get_utility_operator(int storage_type_from, int storage_type_to, int cast_type_from_index, int cast_type_to_index, int packing_type_to_index) const;
285 |     void destroy_utility_operator();
286 | 
287 | private:
288 |     VkDevice device;
289 | 
290 |     // hardware queue
291 |     mutable std::vector<VkQueue> compute_queues;
292 |     mutable std::vector<VkQueue> graphics_queues;
293 |     mutable std::vector<VkQueue> transfer_queues;
294 |     mutable Mutex queue_lock;
295 | 
296 |     // default blob allocator for each queue
297 |     mutable std::vector<VkAllocator*> blob_allocators;
298 |     mutable Mutex blob_allocator_lock;
299 | 
300 |     // default staging allocator for each queue
301 |     mutable std::vector<VkAllocator*> staging_allocators;
302 |     mutable Mutex staging_allocator_lock;
303 | 
304 |     // nearest sampler for texelfetch
305 |     VkSampler texelfetch_sampler;
306 | 
307 |     // dummy buffer and image
308 |     VkAllocator* dummy_allocator;
309 |     VkMat dummy_buffer;
310 |     VkImageMat dummy_image;
311 | 
312 |     // device-wide pipeline cache
313 |     PipelineCache* pipeline_cache;
314 | 
315 |     // utility operator
316 |     // from buffer | image
317 |     // to buffer | image
318 |     // from fp32-b/i | fp16p-b/i | fp16s-b/i
319 |     // to fp32-b/i | fp16p-b/i | fp16s-b/i
320 |     // to pack1 | pack4 | pack8
321 |     mutable ncnn::Packing_vulkan* uop_packing[2][2][3][3][3];
322 |     mutable Mutex uop_lock;
323 | };
324 | 
325 | VulkanDevice* get_gpu_device(int device_index = get_default_gpu_index());
326 | 
327 | // online spirv compilation
328 | int compile_spirv_module(const char* comp_data, int comp_data_size, const Option& opt, std::vector<uint32_t>& spirv);
329 | int compile_spirv_module(int shader_type_index, const Option& opt, std::vector<uint32_t>& spirv);
330 | 
331 | // info from spirv
332 | class ShaderInfo
333 | {
334 | public:
335 |     int specialization_count;
336 |     int binding_count;
337 |     int push_constant_count;
338 | 
339 |     // 0 = null
340 |     // 1 = storage buffer
341 |     // 2 = storage image
342 |     // 3 = combined image sampler
343 |     int binding_types[16]; // 16 is large enough I think ...
344 | };
345 | 
346 | int resolve_shader_info(const uint32_t* spv_data, size_t spv_data_size, ShaderInfo& shader_info);
347 | 
348 | } // namespace ncnn
349 | 
350 | #endif // NCNN_VULKAN
351 | 
352 | #endif // NCNN_GPU_H
353 | 


--------------------------------------------------------------------------------
/include/ncnn/layer.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making ncnn available.
  2 | //
  3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4 | //
  5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // https://opensource.org/licenses/BSD-3-Clause
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef NCNN_LAYER_H
 16 | #define NCNN_LAYER_H
 17 | 
 18 | #include "mat.h"
 19 | #include "modelbin.h"
 20 | #include "option.h"
 21 | #include "paramdict.h"
 22 | #include "platform.h"
 23 | 
 24 | #include <math.h>
 25 | 
 26 | #if NCNN_VULKAN
 27 | #include "command.h"
 28 | #include "pipeline.h"
 29 | 
 30 | #include <vulkan/vulkan.h>
 31 | #endif // NCNN_VULKAN
 32 | 
 33 | namespace ncnn {
 34 | 
 35 | class Layer
 36 | {
 37 | public:
 38 |     // empty
 39 |     Layer();
 40 |     // virtual destructor
 41 |     virtual ~Layer();
 42 | 
 43 |     // load layer specific parameter from parsed dict
 44 |     // return 0 if success
 45 |     virtual int load_param(const ParamDict& pd);
 46 | 
 47 |     // load layer specific weight data from model binary
 48 |     // return 0 if success
 49 |     virtual int load_model(const ModelBin& mb);
 50 | 
 51 |     // layer implementation specific setup
 52 |     // return 0 if success
 53 |     virtual int create_pipeline(const Option& opt);
 54 | 
 55 |     // layer implementation specific clean
 56 |     // return 0 if success
 57 |     virtual int destroy_pipeline(const Option& opt);
 58 | 
 59 | public:
 60 |     // one input and one output blob
 61 |     bool one_blob_only;
 62 | 
 63 |     // support inplace inference
 64 |     bool support_inplace;
 65 | 
 66 |     // support vulkan compute
 67 |     bool support_vulkan;
 68 | 
 69 |     // accept input blob with packed storage
 70 |     bool support_packing;
 71 | 
 72 |     // accept bf16
 73 |     bool support_bf16_storage;
 74 | 
 75 |     // accept fp16
 76 |     bool support_fp16_storage;
 77 | 
 78 |     // shader image storage
 79 |     bool support_image_storage;
 80 | 
 81 |     // TODO drop these fields
 82 |     bool use_int8_inference;
 83 |     bool support_weight_fp16_storage;
 84 | 
 85 | public:
 86 |     // implement inference
 87 |     // return 0 if success
 88 |     virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
 89 |     virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
 90 | 
 91 |     // implement inplace inference
 92 |     // return 0 if success
 93 |     virtual int forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt) const;
 94 |     virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;
 95 | 
 96 | #if NCNN_VULKAN
 97 | public:
 98 |     // upload weight blob from host to device
 99 |     virtual int upload_model(VkTransfer& cmd, const Option& opt);
100 | 
101 | public:
102 |     // implement inference
103 |     // return 0 if success
104 |     virtual int forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const;
105 |     virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const;
106 | 
107 |     // implement inference
108 |     // return 0 if success
109 |     virtual int forward(const std::vector<VkImageMat>& bottom_blobs, std::vector<VkImageMat>& top_blobs, VkCompute& cmd, const Option& opt) const;
110 |     virtual int forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const;
111 | 
112 |     // implement inplace inference
113 |     // return 0 if success
114 |     virtual int forward_inplace(std::vector<VkMat>& bottom_top_blobs, VkCompute& cmd, const Option& opt) const;
115 |     virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const;
116 | 
117 |     // implement inplace inference
118 |     // return 0 if success
119 |     virtual int forward_inplace(std::vector<VkImageMat>& bottom_top_blobs, VkCompute& cmd, const Option& opt) const;
120 |     virtual int forward_inplace(VkImageMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const;
121 | 
122 | public:
123 |     // assigned immediately after creating this layer
124 |     const VulkanDevice* vkdev;
125 | #endif // NCNN_VULKAN
126 | 
127 | public:
128 |     // layer type index
129 |     int typeindex;
130 | #if NCNN_STRING
131 |     // layer type name
132 |     std::string type;
133 |     // layer name
134 |     std::string name;
135 | #endif // NCNN_STRING
136 |     // blob index which this layer needs as input
137 |     std::vector<int> bottoms;
138 |     // blob index which this layer produces as output
139 |     std::vector<int> tops;
140 |     // shape hint
141 |     std::vector<Mat> bottom_shapes;
142 |     std::vector<Mat> top_shapes;
143 | };
144 | 
145 | // layer factory function
146 | typedef Layer* (*layer_creator_func)();
147 | 
148 | struct layer_registry_entry
149 | {
150 | #if NCNN_STRING
151 |     // layer type name
152 |     const char* name;
153 | #endif // NCNN_STRING
154 |     // layer factory entry
155 |     layer_creator_func creator;
156 | };
157 | 
158 | #if NCNN_STRING
159 | // get layer type from type name
160 | int layer_to_index(const char* type);
161 | // create layer from type name
162 | Layer* create_layer(const char* type);
163 | #endif // NCNN_STRING
164 | // create layer from layer type
165 | Layer* create_layer(int index);
166 | 
167 | #define DEFINE_LAYER_CREATOR(name)        \
168 |     ::ncnn::Layer* name##_layer_creator() \
169 |     {                                     \
170 |         return new name;                  \
171 |     }
172 | 
173 | } // namespace ncnn
174 | 
175 | #endif // NCNN_LAYER_H
176 | 


--------------------------------------------------------------------------------
/include/ncnn/layer_shader_type.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making ncnn available.
 2 | //
 3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
 4 | //
 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // https://opensource.org/licenses/BSD-3-Clause
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef NCNN_LAYER_SHADER_TYPE_H
16 | #define NCNN_LAYER_SHADER_TYPE_H
17 | 
18 | namespace ncnn {
19 | 
20 | namespace LayerShaderType {
21 | enum LayerShaderType
22 | {
23 | #include "layer_shader_type_enum.h"
24 | };
25 | } // namespace LayerShaderType
26 | 
27 | } // namespace ncnn
28 | 
29 | #endif // NCNN_LAYER_SHADER_TYPE_H
30 | 


--------------------------------------------------------------------------------
/include/ncnn/layer_shader_type_enum.h:
--------------------------------------------------------------------------------
1 | // Layer Shader Enum header
2 | //
3 | // This file is auto-generated by cmake, don't edit it.
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/include/ncnn/layer_type.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making ncnn available.
 2 | //
 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
 4 | //
 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // https://opensource.org/licenses/BSD-3-Clause
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef NCNN_LAYER_TYPE_H
16 | #define NCNN_LAYER_TYPE_H
17 | 
18 | namespace ncnn {
19 | 
20 | namespace LayerType {
21 | enum LayerType
22 | {
23 | #include "layer_type_enum.h"
24 |     CustomBit = (1 << 8),
25 | };
26 | } // namespace LayerType
27 | 
28 | } // namespace ncnn
29 | 
30 | #endif // NCNN_LAYER_TYPE_H
31 | 


--------------------------------------------------------------------------------
/include/ncnn/layer_type_enum.h:
--------------------------------------------------------------------------------
 1 | // Layer Type Enum header
 2 | //
 3 | // This file is auto-generated by cmake, don't edit it.
 4 | 
 5 | AbsVal = 0,
 6 | ArgMax = 1,
 7 | BatchNorm = 2,
 8 | Bias = 3,
 9 | BNLL = 4,
10 | Concat = 5,
11 | Convolution = 6,
12 | Crop = 7,
13 | Deconvolution = 8,
14 | Dropout = 9,
15 | Eltwise = 10,
16 | ELU = 11,
17 | Embed = 12,
18 | Exp = 13,
19 | Flatten = 14,
20 | InnerProduct = 15,
21 | Input = 16,
22 | Log = 17,
23 | LRN = 18,
24 | MemoryData = 19,
25 | MVN = 20,
26 | Pooling = 21,
27 | Power = 22,
28 | PReLU = 23,
29 | Proposal = 24,
30 | Reduction = 25,
31 | ReLU = 26,
32 | Reshape = 27,
33 | ROIPooling = 28,
34 | Scale = 29,
35 | Sigmoid = 30,
36 | Slice = 31,
37 | Softmax = 32,
38 | Split = 33,
39 | SPP = 34,
40 | TanH = 35,
41 | Threshold = 36,
42 | Tile = 37,
43 | RNN = 38,
44 | LSTM = 39,
45 | BinaryOp = 40,
46 | UnaryOp = 41,
47 | ConvolutionDepthWise = 42,
48 | Padding = 43,
49 | Squeeze = 44,
50 | ExpandDims = 45,
51 | Normalize = 46,
52 | Permute = 47,
53 | PriorBox = 48,
54 | DetectionOutput = 49,
55 | Interp = 50,
56 | DeconvolutionDepthWise = 51,
57 | ShuffleChannel = 52,
58 | InstanceNorm = 53,
59 | Clip = 54,
60 | Reorg = 55,
61 | YoloDetectionOutput = 56,
62 | Quantize = 57,
63 | Dequantize = 58,
64 | Yolov3DetectionOutput = 59,
65 | PSROIPooling = 60,
66 | ROIAlign = 61,
67 | Packing = 62,
68 | Requantize = 63,
69 | Cast = 64,
70 | HardSigmoid = 65,
71 | SELU = 66,
72 | HardSwish = 67,
73 | Noop = 68,
74 | PixelShuffle = 69,
75 | DeepCopy = 70,
76 | Mish = 71,
77 | StatisticsPooling = 72,
78 | Swish = 73,
79 | Gemm = 74,
80 | GroupNorm = 75,
81 | LayerNorm = 76,
82 | Softplus = 77,
83 | 
84 | 


--------------------------------------------------------------------------------
/include/ncnn/modelbin.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making ncnn available.
 2 | //
 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
 4 | //
 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // https://opensource.org/licenses/BSD-3-Clause
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef NCNN_MODELBIN_H
16 | #define NCNN_MODELBIN_H
17 | 
18 | #include "mat.h"
19 | 
20 | namespace ncnn {
21 | 
22 | class DataReader;
23 | class ModelBin
24 | {
25 | public:
26 |     virtual ~ModelBin();
27 |     // element type
28 |     // 0 = auto
29 |     // 1 = float32
30 |     // 2 = float16
31 |     // 3 = int8
32 |     // load vec
33 |     virtual Mat load(int w, int type) const = 0;
34 |     // load image
35 |     virtual Mat load(int w, int h, int type) const;
36 |     // load dim
37 |     virtual Mat load(int w, int h, int c, int type) const;
38 | };
39 | 
40 | class ModelBinFromDataReader : public ModelBin
41 | {
42 | public:
43 |     ModelBinFromDataReader(const DataReader& dr);
44 | 
45 |     virtual Mat load(int w, int type) const;
46 | 
47 | protected:
48 |     const DataReader& dr;
49 | };
50 | 
51 | class ModelBinFromMatArray : public ModelBin
52 | {
53 | public:
54 |     // construct from weight blob array
55 |     ModelBinFromMatArray(const Mat* weights);
56 | 
57 |     virtual Mat load(int w, int type) const;
58 | 
59 | protected:
60 |     mutable const Mat* weights;
61 | };
62 | 
63 | } // namespace ncnn
64 | 
65 | #endif // NCNN_MODELBIN_H
66 | 


--------------------------------------------------------------------------------
/include/ncnn/net.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making ncnn available.
  2 | //
  3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4 | //
  5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // https://opensource.org/licenses/BSD-3-Clause
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef NCNN_NET_H
 16 | #define NCNN_NET_H
 17 | 
 18 | #include "blob.h"
 19 | #include "layer.h"
 20 | #include "mat.h"
 21 | #include "option.h"
 22 | #include "platform.h"
 23 | 
 24 | #if __ANDROID_API__ >= 9
 25 | #include <android/asset_manager.h>
 26 | #endif // __ANDROID_API__ >= 9
 27 | 
 28 | namespace ncnn {
 29 | 
 30 | #if NCNN_VULKAN
 31 | class VkCompute;
 32 | class PipelineCache;
 33 | #endif // NCNN_VULKAN
 34 | class DataReader;
 35 | class Extractor;
 36 | class Net
 37 | {
 38 | public:
 39 |     // empty init
 40 |     Net();
 41 |     // clear and destroy
 42 |     ~Net();
 43 | 
 44 | public:
 45 |     // option can be changed before loading
 46 |     Option opt;
 47 | 
 48 | #if NCNN_VULKAN
 49 |     // set gpu device by index
 50 |     void set_vulkan_device(int device_index);
 51 | 
 52 |     // set gpu device by device handle, no owner transfer
 53 |     void set_vulkan_device(const VulkanDevice* vkdev);
 54 | 
 55 |     const VulkanDevice* vulkan_device() const;
 56 | #endif // NCNN_VULKAN
 57 | 
 58 | #if NCNN_STRING
 59 |     // register custom layer by layer type name
 60 |     // return 0 if success
 61 |     int register_custom_layer(const char* type, layer_creator_func creator);
 62 | #endif // NCNN_STRING
 63 |     // register custom layer by layer type
 64 |     // return 0 if success
 65 |     int register_custom_layer(int index, layer_creator_func creator);
 66 | 
 67 | #if NCNN_STRING
 68 |     int load_param(const DataReader& dr);
 69 | #endif // NCNN_STRING
 70 | 
 71 |     int load_param_bin(const DataReader& dr);
 72 | 
 73 |     int load_model(const DataReader& dr);
 74 | 
 75 | #if NCNN_STDIO
 76 | #if NCNN_STRING
 77 |     // load network structure from plain param file
 78 |     // return 0 if success
 79 |     int load_param(FILE* fp);
 80 |     int load_param(const char* protopath);
 81 |     int load_param_mem(const char* mem);
 82 | #endif // NCNN_STRING
 83 |     // load network structure from binary param file
 84 |     // return 0 if success
 85 |     int load_param_bin(FILE* fp);
 86 |     int load_param_bin(const char* protopath);
 87 | 
 88 |     // load network weight data from model file
 89 |     // return 0 if success
 90 |     int load_model(FILE* fp);
 91 |     int load_model(const char* modelpath);
 92 | #endif // NCNN_STDIO
 93 | 
 94 |     // load network structure from external memory
 95 |     // memory pointer must be 32-bit aligned
 96 |     // return bytes consumed
 97 |     int load_param(const unsigned char* mem);
 98 | 
 99 |     // reference network weight data from external memory
100 |     // weight data is not copied but referenced
101 |     // so external memory should be retained when used
102 |     // memory pointer must be 32-bit aligned
103 |     // return bytes consumed
104 |     int load_model(const unsigned char* mem);
105 | 
106 | #if __ANDROID_API__ >= 9
107 | #if NCNN_STRING
108 |     // convenient load network structure from android asset plain param file
109 |     int load_param(AAsset* asset);
110 |     int load_param(AAssetManager* mgr, const char* assetpath);
111 | #endif // NCNN_STRING
112 |     // convenient load network structure from android asset binary param file
113 |     int load_param_bin(AAsset* asset);
114 |     int load_param_bin(AAssetManager* mgr, const char* assetpath);
115 | 
116 |     // convenient load network weight data from android asset model file
117 |     int load_model(AAsset* asset);
118 |     int load_model(AAssetManager* mgr, const char* assetpath);
119 | #endif // __ANDROID_API__ >= 9
120 | 
121 |     // unload network structure and weight data
122 |     void clear();
123 | 
124 |     // construct an Extractor from network
125 |     Extractor create_extractor() const;
126 | 
127 | public:
128 |     std::vector<Blob> blobs;
129 |     std::vector<Layer*> layers;
130 | 
131 | protected:
132 |     // parse the structure of network
133 |     // fuse int8 op dequantize and quantize by requantize
134 |     int fuse_network();
135 | 
136 | #if NCNN_VULKAN
137 | 
138 |     int upload_model();
139 | 
140 |     int create_pipeline();
141 | 
142 |     int destroy_pipeline();
143 | 
144 | #endif // NCNN_VULKAN
145 | 
146 |     friend class Extractor;
147 | #if NCNN_STRING
148 |     int find_blob_index_by_name(const char* name) const;
149 |     int find_layer_index_by_name(const char* name) const;
150 |     int custom_layer_to_index(const char* type);
151 |     Layer* create_custom_layer(const char* type);
152 | #endif // NCNN_STRING
153 |     Layer* create_custom_layer(int index);
154 |     int forward_layer(int layer_index, std::vector<Mat>& blob_mats, const Option& opt) const;
155 | 
156 | #if NCNN_VULKAN
157 |     int forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector<VkMat>& blob_mats_gpu, VkCompute& cmd, const Option& opt) const;
158 |     int forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector<VkMat>& blob_mats_gpu, std::vector<VkImageMat>& blob_mats_gpu_image, VkCompute& cmd, const Option& opt) const;
159 | #endif // NCNN_VULKAN
160 | 
161 | protected:
162 |     std::vector<layer_registry_entry> custom_layer_registry;
163 | 
164 | #if NCNN_VULKAN
165 |     const VulkanDevice* vkdev;
166 | 
167 |     VkAllocator* weight_vkallocator;
168 |     VkAllocator* weight_staging_vkallocator;
169 | 
170 |     PipelineCache* pipeline_cache;
171 | #endif // NCNN_VULKAN
172 | };
173 | 
174 | class Extractor
175 | {
176 | public:
177 |     ~Extractor();
178 | 
179 |     // enable light mode
180 |     // intermediate blob will be recycled when enabled
181 |     // enabled by default
182 |     void set_light_mode(bool enable);
183 | 
184 |     // set thread count for this extractor
185 |     // this will overwrite the global setting
186 |     // default count is system depended
187 |     void set_num_threads(int num_threads);
188 | 
189 |     // set blob memory allocator
190 |     void set_blob_allocator(Allocator* allocator);
191 | 
192 |     // set workspace memory allocator
193 |     void set_workspace_allocator(Allocator* allocator);
194 | 
195 | #if NCNN_VULKAN
196 |     void set_vulkan_compute(bool enable);
197 | 
198 |     void set_blob_vkallocator(VkAllocator* allocator);
199 | 
200 |     void set_workspace_vkallocator(VkAllocator* allocator);
201 | 
202 |     void set_staging_vkallocator(VkAllocator* allocator);
203 | #endif // NCNN_VULKAN
204 | 
205 | #if NCNN_STRING
206 |     // set input by blob name
207 |     // return 0 if success
208 |     int input(const char* blob_name, const Mat& in);
209 | 
210 |     // get result by blob name
211 |     // return 0 if success
212 |     // type = 0, default
213 |     // type = 1, do not convert fp16/bf16 or / and packing
214 |     int extract(const char* blob_name, Mat& feat, int type = 0);
215 | #endif // NCNN_STRING
216 | 
217 |     // set input by blob index
218 |     // return 0 if success
219 |     int input(int blob_index, const Mat& in);
220 | 
221 |     // get result by blob index
222 |     // return 0 if success
223 |     // type = 0, default
224 |     // type = 1, do not convert fp16/bf16 or / and packing
225 |     int extract(int blob_index, Mat& feat, int type = 0);
226 | 
227 | #if NCNN_VULKAN
228 | #if NCNN_STRING
229 |     // set input by blob name
230 |     // return 0 if success
231 |     int input(const char* blob_name, const VkMat& in);
232 | 
233 |     // get result by blob name
234 |     // return 0 if success
235 |     int extract(const char* blob_name, VkMat& feat, VkCompute& cmd);
236 | 
237 |     // set input by blob name
238 |     // return 0 if success
239 |     int input(const char* blob_name, const VkImageMat& in);
240 | 
241 |     // get result by blob name
242 |     // return 0 if success
243 |     int extract(const char* blob_name, VkImageMat& feat, VkCompute& cmd);
244 | #endif // NCNN_STRING
245 | 
246 |     // set input by blob index
247 |     // return 0 if success
248 |     int input(int blob_index, const VkMat& in);
249 | 
250 |     // get result by blob index
251 |     // return 0 if success
252 |     int extract(int blob_index, VkMat& feat, VkCompute& cmd);
253 | 
254 |     // set input by blob index
255 |     // return 0 if success
256 |     int input(int blob_index, const VkImageMat& in);
257 | 
258 |     // get result by blob index
259 |     // return 0 if success
260 |     int extract(int blob_index, VkImageMat& feat, VkCompute& cmd);
261 | #endif // NCNN_VULKAN
262 | 
263 | protected:
264 |     friend Extractor Net::create_extractor() const;
265 |     Extractor(const Net* net, size_t blob_count);
266 | 
267 | private:
268 |     const Net* net;
269 |     std::vector<Mat> blob_mats;
270 |     Option opt;
271 | 
272 | #if NCNN_VULKAN
273 |     VkAllocator* local_blob_vkallocator;
274 |     VkAllocator* local_staging_vkallocator;
275 | 
276 |     std::vector<VkMat> blob_mats_gpu;
277 |     std::vector<VkImageMat> blob_mats_gpu_image;
278 | #endif // NCNN_VULKAN
279 | };
280 | 
281 | } // namespace ncnn
282 | 
283 | #endif // NCNN_NET_H
284 | 


--------------------------------------------------------------------------------
/include/ncnn/option.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making ncnn available.
  2 | //
  3 | // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
  4 | //
  5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // https://opensource.org/licenses/BSD-3-Clause
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef NCNN_OPTION_H
 16 | #define NCNN_OPTION_H
 17 | 
 18 | #include "platform.h"
 19 | 
 20 | namespace ncnn {
 21 | 
 22 | #if NCNN_VULKAN
 23 | class VkAllocator;
 24 | class PipelineCache;
 25 | #endif // NCNN_VULKAN
 26 | 
 27 | class Allocator;
 28 | class Option
 29 | {
 30 | public:
 31 |     // default option
 32 |     Option();
 33 | 
 34 | public:
 35 |     // light mode
 36 |     // intermediate blob will be recycled when enabled
 37 |     // enabled by default
 38 |     bool lightmode;
 39 | 
 40 |     // thread count
 41 |     // default value is the one returned by get_cpu_count()
 42 |     int num_threads;
 43 | 
 44 |     // blob memory allocator
 45 |     Allocator* blob_allocator;
 46 | 
 47 |     // workspace memory allocator
 48 |     Allocator* workspace_allocator;
 49 | 
 50 | #if NCNN_VULKAN
 51 |     // blob memory allocator
 52 |     VkAllocator* blob_vkallocator;
 53 | 
 54 |     // workspace memory allocator
 55 |     VkAllocator* workspace_vkallocator;
 56 | 
 57 |     // staging memory allocator
 58 |     VkAllocator* staging_vkallocator;
 59 | 
 60 |     // pipeline cache
 61 |     PipelineCache* pipeline_cache;
 62 | #endif // NCNN_VULKAN
 63 | 
 64 |     // the time openmp threads busy-wait for more work before going to sleep
 65 |     // default value is 20ms to keep the cores enabled
 66 |     // without too much extra power consumption afterwards
 67 |     int openmp_blocktime;
 68 | 
 69 |     // enable winograd convolution optimization
 70 |     // improve convolution 3x3 stride1 performance, may consume more memory
 71 |     // changes should be applied before loading network structure and weight
 72 |     // enabled by default
 73 |     bool use_winograd_convolution;
 74 | 
 75 |     // enable sgemm convolution optimization
 76 |     // improve convolution 1x1 stride1 performance, may consume more memory
 77 |     // changes should be applied before loading network structure and weight
 78 |     // enabled by default
 79 |     bool use_sgemm_convolution;
 80 | 
 81 |     // enable quantized int8 inference
 82 |     // use low-precision int8 path for quantized model
 83 |     // changes should be applied before loading network structure and weight
 84 |     // enabled by default
 85 |     bool use_int8_inference;
 86 | 
 87 |     // enable vulkan compute
 88 |     bool use_vulkan_compute;
 89 | 
 90 |     // enable options for gpu inference
 91 |     bool use_fp16_packed;
 92 |     bool use_fp16_storage;
 93 |     bool use_fp16_arithmetic;
 94 |     bool use_int8_storage;
 95 |     bool use_int8_arithmetic;
 96 | 
 97 |     // enable simd-friendly packed memory layout
 98 |     // improve all operator performance on all arm devices, will consume more memory
 99 |     // changes should be applied before loading network structure and weight
100 |     // enabled by default
101 |     bool use_packing_layout;
102 | 
103 |     bool use_shader_pack8;
104 | 
105 |     // subgroup option
106 |     bool use_subgroup_basic;
107 |     bool use_subgroup_vote;
108 |     bool use_subgroup_ballot;
109 |     bool use_subgroup_shuffle;
110 | 
111 |     // turn on for adreno
112 |     bool use_image_storage;
113 | 
114 |     // enable bf16 data type for storage
115 |     // improve most operator performance on all arm devices, may consume more memory
116 |     bool use_bf16_storage;
117 | 
118 |     // used for fp16 weight storage in AVX
119 |     // TODO drop this option
120 |     bool use_weight_fp16_storage;
121 | };
122 | 
123 | } // namespace ncnn
124 | 
125 | #endif // NCNN_OPTION_H
126 | 


--------------------------------------------------------------------------------
/include/ncnn/paramdict.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making ncnn available.
 2 | //
 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
 4 | //
 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // https://opensource.org/licenses/BSD-3-Clause
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef NCNN_PARAMDICT_H
16 | #define NCNN_PARAMDICT_H
17 | 
18 | #include "mat.h"
19 | 
20 | // at most 32 parameters
21 | #define NCNN_MAX_PARAM_COUNT 32
22 | 
23 | namespace ncnn {
24 | 
25 | class DataReader;
26 | class Net;
27 | class ParamDict
28 | {
29 | public:
30 |     // empty
31 |     ParamDict();
32 | 
33 |     // get int
34 |     int get(int id, int def) const;
35 |     // get float
36 |     float get(int id, float def) const;
37 |     // get array
38 |     Mat get(int id, const Mat& def) const;
39 | 
40 |     // set int
41 |     void set(int id, int i);
42 |     // set float
43 |     void set(int id, float f);
44 |     // set array
45 |     void set(int id, const Mat& v);
46 | 
47 | protected:
48 |     friend class Net;
49 | 
50 |     void clear();
51 | 
52 |     int load_param(const DataReader& dr);
53 |     int load_param_bin(const DataReader& dr);
54 | 
55 | protected:
56 |     struct
57 |     {
58 |         // 0 = null
59 |         // 1 = int/float
60 |         // 2 = int
61 |         // 3 = float
62 |         // 4 = array of int/float
63 |         // 5 = array of int
64 |         // 6 = array of float
65 |         int type;
66 |         union
67 |         {
68 |             int i;
69 |             float f;
70 |         };
71 |         Mat v;
72 |     } params[NCNN_MAX_PARAM_COUNT];
73 | };
74 | 
75 | } // namespace ncnn
76 | 
77 | #endif // NCNN_PARAMDICT_H
78 | 


--------------------------------------------------------------------------------
/include/ncnn/pipeline.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making ncnn available.
 2 | //
 3 | // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
 4 | //
 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // https://opensource.org/licenses/BSD-3-Clause
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef NCNN_PIPELINE_H
16 | #define NCNN_PIPELINE_H
17 | 
18 | #include "mat.h"
19 | #include "platform.h"
20 | #if NCNN_VULKAN
21 | #include "gpu.h"
22 | 
23 | #include <vulkan/vulkan.h>
24 | #endif // NCNN_VULKAN
25 | 
26 | namespace ncnn {
27 | 
28 | #if NCNN_VULKAN
29 | class Option;
30 | class Pipeline
31 | {
32 | public:
33 |     Pipeline(const VulkanDevice* vkdev);
34 |     virtual ~Pipeline();
35 | 
36 | public:
37 |     void set_optimal_local_size_xyz(int w = 4, int h = 4, int c = 4);
38 |     void set_optimal_local_size_xyz(const Mat& local_size_xyz);
39 |     void set_local_size_xyz(int w, int h, int c);
40 | 
41 |     int create(const uint32_t* spv_data, size_t spv_data_size, const std::vector<vk_specialization_type>& specializations);
42 | 
43 |     int create(int shader_type_index, const Option& opt, const std::vector<vk_specialization_type>& specializations);
44 | 
45 | public:
46 |     const VulkanDevice* vkdev;
47 | 
48 |     VkShaderModule shader_module;
49 |     VkDescriptorSetLayout descriptorset_layout;
50 |     VkPipelineLayout pipeline_layout;
51 |     VkPipeline pipeline;
52 |     VkDescriptorUpdateTemplateKHR descriptor_update_template;
53 | 
54 |     ShaderInfo shader_info;
55 | 
56 |     uint32_t local_size_x;
57 |     uint32_t local_size_y;
58 |     uint32_t local_size_z;
59 | };
60 | 
61 | #if __ANDROID_API__ >= 26
62 | class VkCompute;
63 | class ImportAndroidHardwareBufferPipeline : private Pipeline
64 | {
65 | public:
66 |     ImportAndroidHardwareBufferPipeline(const VulkanDevice* vkdev);
67 |     ~ImportAndroidHardwareBufferPipeline();
68 | 
69 |     int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, const Option& opt);
70 |     int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, int target_width, int target_height, const Option& opt);
71 |     void destroy();
72 | 
73 |     friend class VkCompute;
74 | 
75 | protected:
76 |     int create_shader_module(const Option& opt);
77 |     int create_sampler(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator);
78 |     int create_descriptorset_layout();
79 | 
80 | public:
81 |     int type_to;
82 |     int rotate_from;
83 |     bool need_resize;
84 | 
85 |     VkSampler sampler;
86 | };
87 | #endif // __ANDROID_API__ >= 26
88 | #endif // NCNN_VULKAN
89 | 
90 | } // namespace ncnn
91 | 
92 | #endif // NCNN_PIPELINE_H
93 | 


--------------------------------------------------------------------------------
/include/ncnn/pipelinecache.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making ncnn available.
  2 | //
  3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
  4 | //
  5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // https://opensource.org/licenses/BSD-3-Clause
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef NCNN_PIPELINECACHE_H
 16 | #define NCNN_PIPELINECACHE_H
 17 | 
 18 | #include "platform.h"
 19 | 
 20 | #if NCNN_VULKAN
 21 | #include <vulkan/vulkan.h>
 22 | #endif // NCNN_VULKAN
 23 | 
 24 | #include "mat.h"
 25 | #include "gpu.h"
 26 | 
 27 | namespace ncnn {
 28 | 
 29 | #if NCNN_VULKAN
 30 | 
 31 | class VulkanDevice;
 32 | 
 33 | class PipelineCache
 34 | {
 35 | public:
 36 |     PipelineCache(const VulkanDevice* _vkdev);
 37 | 
 38 |     ~PipelineCache();
 39 | 
 40 |     void clear();
 41 | 
 42 |     int get_pipeline(const uint32_t* spv_data, size_t spv_data_size, const std::vector<vk_specialization_type>& specializations,
 43 |                      uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z,
 44 |                      VkShaderModule* shader_module,
 45 |                      VkDescriptorSetLayout* descriptorset_layout,
 46 |                      VkPipelineLayout* pipeline_layout,
 47 |                      VkPipeline* pipeline,
 48 |                      VkDescriptorUpdateTemplateKHR* descriptor_update_template,
 49 |                      ShaderInfo& shader_info) const;
 50 | 
 51 |     int get_pipeline(int shader_type_index, const Option& opt, const std::vector<vk_specialization_type>& specializations,
 52 |                      uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z,
 53 |                      VkShaderModule* shader_module,
 54 |                      VkDescriptorSetLayout* descriptorset_layout,
 55 |                      VkPipelineLayout* pipeline_layout,
 56 |                      VkPipeline* pipeline,
 57 |                      VkDescriptorUpdateTemplateKHR* descriptor_update_template,
 58 |                      ShaderInfo& shader_info) const;
 59 | 
 60 | protected:
 61 |     int create_shader_module(int shader_type_index, const Option& opt, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z,
 62 |                              VkShaderModule* _shader_module, ShaderInfo& si) const;
 63 | 
 64 |     int new_pipeline(VkShaderModule shader_module, const ShaderInfo& shader_info, const std::vector<vk_specialization_type>& specializations,
 65 |                      VkDescriptorSetLayout* descriptorset_layout,
 66 |                      VkPipelineLayout* pipeline_layout,
 67 |                      VkPipeline* pipeline,
 68 |                      VkDescriptorUpdateTemplateKHR* descriptor_update_template) const;
 69 | 
 70 | public:
 71 |     const VulkanDevice* vkdev;
 72 | 
 73 |     // digest -> artifact
 74 |     struct pipeline_cache_digest
 75 |     {
 76 |         pipeline_cache_digest(const uint32_t* spv_data, size_t spv_data_size, const std::vector<vk_specialization_type>& specializations,
 77 |                               uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z);
 78 |         pipeline_cache_digest(int shader_type_index, const Option& opt, const std::vector<vk_specialization_type>& specializations,
 79 |                               uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z);
 80 | 
 81 |         bool operator==(const pipeline_cache_digest& rhs) const
 82 |         {
 83 |             return d0 == rhs.d0 && d1 == rhs.d1;
 84 |         }
 85 | 
 86 |         bool operator!=(const pipeline_cache_digest& rhs) const
 87 |         {
 88 |             return d0 != rhs.d0 || d1 != rhs.d1;
 89 |         }
 90 | 
 91 |         union
 92 |         {
 93 |             struct
 94 |             {
 95 |                 union
 96 |                 {
 97 |                     uint32_t spv_data_murmur3;
 98 |                     int shader_type_index;
 99 |                 };
100 |                 unsigned char opt_local_size_bits[4];
101 |             };
102 | 
103 |             uint64_t d0;
104 |         };
105 | 
106 |         union
107 |         {
108 |             struct
109 |             {
110 |                 uint32_t specializations_murmur3;
111 |                 uint32_t specializations_fnv1a;
112 |             };
113 | 
114 |             uint64_t d1;
115 |         };
116 |     };
117 | 
118 |     struct pipeline_cache_artifact
119 |     {
120 |         VkShaderModule shader_module;
121 |         VkDescriptorSetLayout descriptorset_layout;
122 |         VkPipelineLayout pipeline_layout;
123 |         VkPipeline pipeline;
124 |         VkDescriptorUpdateTemplateKHR descriptor_update_template;
125 |         ShaderInfo shader_info; // TODO use pointer ?
126 |     };
127 | 
128 |     mutable std::vector<pipeline_cache_digest> cache_digests;
129 |     mutable std::vector<pipeline_cache_artifact> cache_artifacts;
130 |     mutable Mutex cache_lock;
131 | };
132 | 
133 | #endif // NCNN_VULKAN
134 | 
135 | } // namespace ncnn
136 | 
137 | #endif // NCNN_PIPELINECACHE_H
138 | 


--------------------------------------------------------------------------------
/include/ncnn/platform.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making ncnn available.
  2 | //
  3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4 | //
  5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // https://opensource.org/licenses/BSD-3-Clause
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef NCNN_PLATFORM_H
 16 | #define NCNN_PLATFORM_H
 17 | 
 18 | #define NCNN_STDIO 1
 19 | #define NCNN_STRING 1
 20 | #define NCNN_SIMPLEOCV 0
 21 | #define NCNN_SIMPLEOMP 0
 22 | #define NCNN_SIMPLESTL 0
 23 | #define NCNN_THREADS 1
 24 | #define NCNN_BENCHMARK 0
 25 | #define NCNN_PIXEL 1
 26 | #define NCNN_PIXEL_ROTATE 1
 27 | #define NCNN_PIXEL_AFFINE 1
 28 | #define NCNN_VULKAN 0
 29 | #define NCNN_REQUANT 0
 30 | #define NCNN_RUNTIME_CPU 1
 31 | #define NCNN_AVX2 1
 32 | #define NCNN_ARM82 0
 33 | 
 34 | #ifdef __cplusplus
 35 | 
 36 | #if NCNN_THREADS
 37 | #if (defined _WIN32 && !(defined __MINGW32__))
 38 | #define WIN32_LEAN_AND_MEAN
 39 | #include <windows.h>
 40 | #include <process.h>
 41 | #else
 42 | #include <pthread.h>
 43 | #endif
 44 | #endif // NCNN_THREADS
 45 | 
 46 | #if __ANDROID_API__ >= 26
 47 | #define VK_USE_PLATFORM_ANDROID_KHR
 48 | #endif // __ANDROID_API__ >= 26
 49 | 
 50 | namespace ncnn {
 51 | 
 52 | #if NCNN_THREADS
 53 | #if (defined _WIN32 && !(defined __MINGW32__))
 54 | class Mutex
 55 | {
 56 | public:
 57 |     Mutex() { InitializeSRWLock(&srwlock); }
 58 |     ~Mutex() {}
 59 |     void lock() { AcquireSRWLockExclusive(&srwlock); }
 60 |     void unlock() { ReleaseSRWLockExclusive(&srwlock); }
 61 | private:
 62 |     friend class ConditionVariable;
 63 |     // NOTE SRWLock is available from windows vista
 64 |     SRWLOCK srwlock;
 65 | };
 66 | 
 67 | class ConditionVariable
 68 | {
 69 | public:
 70 |     ConditionVariable() { InitializeConditionVariable(&condvar); }
 71 |     ~ConditionVariable() {}
 72 |     void wait(Mutex& mutex) { SleepConditionVariableSRW(&condvar, &mutex.srwlock, INFINITE, 0); }
 73 |     void broadcast() { WakeAllConditionVariable(&condvar); }
 74 |     void signal() { WakeConditionVariable(&condvar); }
 75 | private:
 76 |     CONDITION_VARIABLE condvar;
 77 | };
 78 | 
 79 | static unsigned __stdcall start_wrapper(void* args);
 80 | class Thread
 81 | {
 82 | public:
 83 |     Thread(void* (*start)(void*), void* args = 0) { _start = start; _args = args; handle = (HANDLE)_beginthreadex(0, 0, start_wrapper, this, 0, 0); }
 84 |     ~Thread() {}
 85 |     void join() { WaitForSingleObject(handle, INFINITE); CloseHandle(handle); }
 86 | private:
 87 |     friend unsigned __stdcall start_wrapper(void* args)
 88 |     {
 89 |         Thread* t = (Thread*)args;
 90 |         t->_start(t->_args);
 91 |         return 0;
 92 |     }
 93 |     HANDLE handle;
 94 |     void* (*_start)(void*);
 95 |     void* _args;
 96 | };
 97 | 
 98 | class ThreadLocalStorage
 99 | {
100 | public:
101 |     ThreadLocalStorage() { key = TlsAlloc(); }
102 |     ~ThreadLocalStorage() { TlsFree(key); }
103 |     void set(void* value) { TlsSetValue(key, (LPVOID)value); }
104 |     void* get() { return (void*)TlsGetValue(key); }
105 | private:
106 |     DWORD key;
107 | };
108 | #else // (defined _WIN32 && !(defined __MINGW32__))
109 | class Mutex
110 | {
111 | public:
112 |     Mutex() { pthread_mutex_init(&mutex, 0); }
113 |     ~Mutex() { pthread_mutex_destroy(&mutex); }
114 |     void lock() { pthread_mutex_lock(&mutex); }
115 |     void unlock() { pthread_mutex_unlock(&mutex); }
116 | private:
117 |     friend class ConditionVariable;
118 |     pthread_mutex_t mutex;
119 | };
120 | 
121 | class ConditionVariable
122 | {
123 | public:
124 |     ConditionVariable() { pthread_cond_init(&cond, 0); }
125 |     ~ConditionVariable() { pthread_cond_destroy(&cond); }
126 |     void wait(Mutex& mutex) { pthread_cond_wait(&cond, &mutex.mutex); }
127 |     void broadcast() { pthread_cond_broadcast(&cond); }
128 |     void signal() { pthread_cond_signal(&cond); }
129 | private:
130 |     pthread_cond_t cond;
131 | };
132 | 
133 | class Thread
134 | {
135 | public:
136 |     Thread(void* (*start)(void*), void* args = 0) { pthread_create(&t, 0, start, args); }
137 |     ~Thread() {}
138 |     void join() { pthread_join(t, 0); }
139 | private:
140 |     pthread_t t;
141 | };
142 | 
143 | class ThreadLocalStorage
144 | {
145 | public:
146 |     ThreadLocalStorage() { pthread_key_create(&key, 0); }
147 |     ~ThreadLocalStorage() { pthread_key_delete(key); }
148 |     void set(void* value) { pthread_setspecific(key, value); }
149 |     void* get() { return pthread_getspecific(key); }
150 | private:
151 |     pthread_key_t key;
152 | };
153 | #endif // (defined _WIN32 && !(defined __MINGW32__))
154 | #else // NCNN_THREADS
155 | class Mutex
156 | {
157 | public:
158 |     Mutex() {}
159 |     ~Mutex() {}
160 |     void lock() {}
161 |     void unlock() {}
162 | };
163 | 
164 | class ConditionVariable
165 | {
166 | public:
167 |     ConditionVariable() {}
168 |     ~ConditionVariable() {}
169 |     void wait(Mutex& /*mutex*/) {}
170 |     void broadcast() {}
171 |     void signal() {}
172 | };
173 | 
174 | class Thread
175 | {
176 | public:
177 |     Thread(void* (*/*start*/)(void*), void* /*args*/ = 0) {}
178 |     ~Thread() {}
179 |     void join() {}
180 | };
181 | 
182 | class ThreadLocalStorage
183 | {
184 | public:
185 |     ThreadLocalStorage() {}
186 |     ~ThreadLocalStorage() {}
187 |     void set(void* /*value*/) {}
188 |     void* get() { return 0; }
189 | };
190 | #endif // NCNN_THREADS
191 | 
192 | class MutexLockGuard
193 | {
194 | public:
195 |     MutexLockGuard(Mutex& _mutex) : mutex(_mutex) { mutex.lock(); }
196 |     ~MutexLockGuard() { mutex.unlock(); }
197 | private:
198 |     Mutex& mutex;
199 | };
200 | 
201 | } // namespace ncnn
202 | 
203 | #if NCNN_SIMPLESTL
204 | #include "simplestl.h"
205 | #else
206 | #include <algorithm>
207 | #include <list>
208 | #include <vector>
209 | #include <string>
210 | #endif
211 | 
212 | #endif // __cplusplus
213 | 
214 | #if NCNN_STDIO
215 | #if __ANDROID_API__ >= 8
216 | #include <android/log.h>
217 | #define NCNN_LOGE(...) do { \
218 |     fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); \
219 |     __android_log_print(ANDROID_LOG_WARN, "ncnn", ##__VA_ARGS__); } while(0)
220 | #else // __ANDROID_API__ >= 8
221 | #include <stdio.h>
222 | #define NCNN_LOGE(...) do { \
223 |     fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); } while(0)
224 | #endif // __ANDROID_API__ >= 8
225 | #else
226 | #define NCNN_LOGE(...)
227 | #endif
228 | 
229 | #endif // NCNN_PLATFORM_H
230 | 


--------------------------------------------------------------------------------
/include/ncnn/simpleocv.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making ncnn available.
  2 | //
  3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4 | //
  5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // https://opensource.org/licenses/BSD-3-Clause
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef NCNN_SIMPLEOCV_H
 16 | #define NCNN_SIMPLEOCV_H
 17 | 
 18 | #include "platform.h"
 19 | 
 20 | #if NCNN_SIMPLEOCV
 21 | 
 22 | #include "mat.h"
 23 | 
 24 | #if defined(_MSC_VER) || defined(__GNUC__)
 25 | #pragma push_macro("min")
 26 | #pragma push_macro("max")
 27 | #undef min
 28 | #undef max
 29 | #endif
 30 | 
 31 | // minimal opencv style data structure implementation
 32 | namespace cv {
 33 | 
 34 | struct Size
 35 | {
 36 |     Size()
 37 |         : width(0), height(0)
 38 |     {
 39 |     }
 40 |     Size(int _w, int _h)
 41 |         : width(_w), height(_h)
 42 |     {
 43 |     }
 44 | 
 45 |     int width;
 46 |     int height;
 47 | };
 48 | 
 49 | template<typename _Tp>
 50 | struct Rect_
 51 | {
 52 |     Rect_()
 53 |         : x(0), y(0), width(0), height(0)
 54 |     {
 55 |     }
 56 |     Rect_(_Tp _x, _Tp _y, _Tp _w, _Tp _h)
 57 |         : x(_x), y(_y), width(_w), height(_h)
 58 |     {
 59 |     }
 60 | 
 61 |     _Tp x;
 62 |     _Tp y;
 63 |     _Tp width;
 64 |     _Tp height;
 65 | 
 66 |     // area
 67 |     _Tp area() const
 68 |     {
 69 |         return width * height;
 70 |     }
 71 | };
 72 | 
 73 | template<typename _Tp>
 74 | static inline Rect_<_Tp>& operator&=(Rect_<_Tp>& a, const Rect_<_Tp>& b)
 75 | {
 76 |     _Tp x1 = std::max(a.x, b.x), y1 = std::max(a.y, b.y);
 77 |     a.width = std::min(a.x + a.width, b.x + b.width) - x1;
 78 |     a.height = std::min(a.y + a.height, b.y + b.height) - y1;
 79 |     a.x = x1;
 80 |     a.y = y1;
 81 |     if (a.width <= 0 || a.height <= 0)
 82 |         a = Rect_<_Tp>();
 83 |     return a;
 84 | }
 85 | 
 86 | template<typename _Tp>
 87 | static inline Rect_<_Tp>& operator|=(Rect_<_Tp>& a, const Rect_<_Tp>& b)
 88 | {
 89 |     _Tp x1 = std::min(a.x, b.x), y1 = std::min(a.y, b.y);
 90 |     a.width = std::max(a.x + a.width, b.x + b.width) - x1;
 91 |     a.height = std::max(a.y + a.height, b.y + b.height) - y1;
 92 |     a.x = x1;
 93 |     a.y = y1;
 94 |     return a;
 95 | }
 96 | 
 97 | template<typename _Tp>
 98 | static inline Rect_<_Tp> operator&(const Rect_<_Tp>& a, const Rect_<_Tp>& b)
 99 | {
100 |     Rect_<_Tp> c = a;
101 |     return c &= b;
102 | }
103 | 
104 | template<typename _Tp>
105 | static inline Rect_<_Tp> operator|(const Rect_<_Tp>& a, const Rect_<_Tp>& b)
106 | {
107 |     Rect_<_Tp> c = a;
108 |     return c |= b;
109 | }
110 | 
111 | typedef Rect_<int> Rect;
112 | typedef Rect_<float> Rect2f;
113 | 
114 | template<typename _Tp>
115 | struct Point_
116 | {
117 |     Point_()
118 |         : x(0), y(0)
119 |     {
120 |     }
121 |     Point_(_Tp _x, _Tp _y)
122 |         : x(_x), y(_y)
123 |     {
124 |     }
125 | 
126 |     _Tp x;
127 |     _Tp y;
128 | };
129 | 
130 | typedef Point_<int> Point;
131 | typedef Point_<float> Point2f;
132 | 
133 | #define CV_8UC1  1
134 | #define CV_8UC3  3
135 | #define CV_8UC4  4
136 | #define CV_32FC1 4
137 | 
138 | struct Mat
139 | {
140 |     Mat()
141 |         : data(0), refcount(0), rows(0), cols(0), c(0)
142 |     {
143 |     }
144 | 
145 |     Mat(int _rows, int _cols, int flags)
146 |         : data(0), refcount(0)
147 |     {
148 |         create(_rows, _cols, flags);
149 |     }
150 | 
151 |     // copy
152 |     Mat(const Mat& m)
153 |         : data(m.data), refcount(m.refcount)
154 |     {
155 |         if (refcount)
156 |             NCNN_XADD(refcount, 1);
157 | 
158 |         rows = m.rows;
159 |         cols = m.cols;
160 |         c = m.c;
161 |     }
162 | 
163 |     Mat(int _rows, int _cols, int flags, void* _data)
164 |         : data((unsigned char*)_data), refcount(0)
165 |     {
166 |         rows = _rows;
167 |         cols = _cols;
168 |         c = flags;
169 |     }
170 | 
171 |     ~Mat()
172 |     {
173 |         release();
174 |     }
175 | 
176 |     // assign
177 |     Mat& operator=(const Mat& m)
178 |     {
179 |         if (this == &m)
180 |             return *this;
181 | 
182 |         if (m.refcount)
183 |             NCNN_XADD(m.refcount, 1);
184 | 
185 |         release();
186 | 
187 |         data = m.data;
188 |         refcount = m.refcount;
189 | 
190 |         rows = m.rows;
191 |         cols = m.cols;
192 |         c = m.c;
193 | 
194 |         return *this;
195 |     }
196 | 
197 |     void create(int _rows, int _cols, int flags)
198 |     {
199 |         release();
200 | 
201 |         rows = _rows;
202 |         cols = _cols;
203 |         c = flags;
204 | 
205 |         if (total() > 0)
206 |         {
207 |             // refcount address must be aligned, so we expand totalsize here
208 |             size_t totalsize = (total() + 3) >> 2 << 2;
209 |             data = (unsigned char*)ncnn::fastMalloc(totalsize + (int)sizeof(*refcount));
210 |             refcount = (int*)(((unsigned char*)data) + totalsize);
211 |             *refcount = 1;
212 |         }
213 |     }
214 | 
215 |     void release()
216 |     {
217 |         if (refcount && NCNN_XADD(refcount, -1) == 1)
218 |             ncnn::fastFree(data);
219 | 
220 |         data = 0;
221 | 
222 |         rows = 0;
223 |         cols = 0;
224 |         c = 0;
225 | 
226 |         refcount = 0;
227 |     }
228 | 
229 |     Mat clone() const
230 |     {
231 |         if (empty())
232 |             return Mat();
233 | 
234 |         Mat m(rows, cols, c);
235 | 
236 |         if (total() > 0)
237 |         {
238 |             memcpy(m.data, data, total());
239 |         }
240 | 
241 |         return m;
242 |     }
243 | 
244 |     bool empty() const
245 |     {
246 |         return data == 0 || total() == 0;
247 |     }
248 | 
249 |     int channels() const
250 |     {
251 |         return c;
252 |     }
253 | 
254 |     size_t total() const
255 |     {
256 |         return cols * rows * c;
257 |     }
258 | 
259 |     const unsigned char* ptr(int y) const
260 |     {
261 |         return data + y * cols * c;
262 |     }
263 | 
264 |     unsigned char* ptr(int y)
265 |     {
266 |         return data + y * cols * c;
267 |     }
268 | 
269 |     // roi
270 |     Mat operator()(const Rect& roi) const
271 |     {
272 |         if (empty())
273 |             return Mat();
274 | 
275 |         Mat m(roi.height, roi.width, c);
276 | 
277 |         int sy = roi.y;
278 |         for (int y = 0; y < roi.height; y++)
279 |         {
280 |             const unsigned char* sptr = ptr(sy) + roi.x * c;
281 |             unsigned char* dptr = m.ptr(y);
282 |             memcpy(dptr, sptr, roi.width * c);
283 |             sy++;
284 |         }
285 | 
286 |         return m;
287 |     }
288 | 
289 |     unsigned char* data;
290 | 
291 |     // pointer to the reference counter;
292 |     // when points to user-allocated data, the pointer is NULL
293 |     int* refcount;
294 | 
295 |     int rows;
296 |     int cols;
297 | 
298 |     int c;
299 | };
300 | 
301 | #define CV_LOAD_IMAGE_GRAYSCALE 1
302 | #define CV_LOAD_IMAGE_COLOR     3
303 | Mat imread(const std::string& path, int flags);
304 | void imwrite(const std::string& path, const Mat& m);
305 | 
306 | #if NCNN_PIXEL
307 | void resize(const Mat& src, Mat& dst, const Size& size, float sw = 0.f, float sh = 0.f, int flags = 0);
308 | #endif // NCNN_PIXEL
309 | 
310 | } // namespace cv
311 | 
312 | #if defined(_MSC_VER) || defined(__GNUC__)
313 | #pragma pop_macro("min")
314 | #pragma pop_macro("max")
315 | #endif
316 | 
317 | #endif // NCNN_SIMPLEOCV
318 | 
319 | #endif // NCNN_SIMPLEOCV_H
320 | 


--------------------------------------------------------------------------------
/include/ncnn/simpleomp.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making ncnn available.
 2 | //
 3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
 4 | //
 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // https://opensource.org/licenses/BSD-3-Clause
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef NCNN_SIMPLEOMP_H
16 | #define NCNN_SIMPLEOMP_H
17 | 
18 | #include "platform.h"
19 | 
20 | #if NCNN_SIMPLEOMP
21 | 
22 | #include <stdint.h>
23 | 
24 | // This minimal openmp runtime implementation only supports the llvm openmp abi
25 | // and only supports #pragma omp parallel for num_threads(X)
26 | 
27 | #ifdef __cplusplus
28 | extern "C" {
29 | #endif
30 | 
31 | int omp_get_max_threads();
32 | 
33 | void omp_set_num_threads(int num_threads);
34 | 
35 | int omp_get_dynamic();
36 | 
37 | void omp_set_dynamic(int dynamic);
38 | 
39 | int omp_get_num_threads();
40 | 
41 | int omp_get_thread_num();
42 | 
43 | int kmp_get_blocktime();
44 | 
45 | void kmp_set_blocktime(int blocktime);
46 | 
47 | #ifdef __cplusplus
48 | }
49 | #endif
50 | 
51 | #endif // NCNN_SIMPLEOMP
52 | 
53 | #endif // NCNN_SIMPLEOMP_H
54 | 


--------------------------------------------------------------------------------
/include/ncnn/simplestl.h:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making ncnn available.
  2 | //
  3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
  4 | //
  5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // https://opensource.org/licenses/BSD-3-Clause
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #ifndef NCNN_SIMPLESTL_H
 16 | #define NCNN_SIMPLESTL_H
 17 | 
 18 | #include <stddef.h>
 19 | #include <stdint.h>
 20 | #include <string.h>
 21 | 
 22 | #if !NCNN_SIMPLESTL
 23 | 
 24 | #include <new>
 25 | 
 26 | #else
 27 | 
 28 | // allocation functions
 29 | void* operator new(size_t size);
 30 | void* operator new[](size_t size);
 31 | // placement allocation functions
 32 | void* operator new(size_t size, void* ptr);
 33 | void* operator new[](size_t size, void* ptr);
 34 | // deallocation functions
 35 | void operator delete(void* ptr);
 36 | void operator delete[](void* ptr);
 37 | // deallocation functions since c++14
 38 | #if __cplusplus >= 201402L
 39 | void operator delete(void* ptr, size_t sz);
 40 | void operator delete[](void* ptr, size_t sz);
 41 | #endif
 42 | // placement deallocation functions
 43 | void operator delete(void* ptr, void* voidptr2);
 44 | void operator delete[](void* ptr, void* voidptr2);
 45 | 
 46 | #endif
 47 | 
 48 | // minimal stl data structure implementation
 49 | namespace std {
 50 | 
 51 | template<typename T>
 52 | const T& max(const T& a, const T& b)
 53 | {
 54 |     return (a < b) ? b : a;
 55 | }
 56 | 
 57 | template<typename T>
 58 | const T& min(const T& a, const T& b)
 59 | {
 60 |     return (a > b) ? b : a;
 61 | }
 62 | 
 63 | template<typename T>
 64 | void swap(T& a, T& b)
 65 | {
 66 |     T temp(a);
 67 |     a = b;
 68 |     b = temp;
 69 | }
 70 | 
 71 | template<typename T1, typename T2>
 72 | struct pair
 73 | {
 74 |     pair()
 75 |         : first(), second()
 76 |     {
 77 |     }
 78 |     pair(const T1& t1, const T2& t2)
 79 |         : first(t1), second(t2)
 80 |     {
 81 |     }
 82 | 
 83 |     T1 first;
 84 |     T2 second;
 85 | };
 86 | 
 87 | template<typename T1, typename T2>
 88 | bool operator==(const pair<T1, T2>& x, const pair<T1, T2>& y)
 89 | {
 90 |     return (x.first == y.first && x.second == y.second);
 91 | }
 92 | template<typename T1, typename T2>
 93 | bool operator<(const pair<T1, T2>& x, const pair<T1, T2>& y)
 94 | {
 95 |     return x.first < y.first || (!(y.first < x.first) && x.second < y.second);
 96 | }
 97 | template<typename T1, typename T2>
 98 | bool operator!=(const pair<T1, T2>& x, const pair<T1, T2>& y)
 99 | {
100 |     return !(x == y);
101 | }
102 | template<typename T1, typename T2>
103 | bool operator>(const pair<T1, T2>& x, const pair<T1, T2>& y)
104 | {
105 |     return y < x;
106 | }
107 | template<typename T1, typename T2>
108 | bool operator<=(const pair<T1, T2>& x, const pair<T1, T2>& y)
109 | {
110 |     return !(y < x);
111 | }
112 | template<typename T1, typename T2>
113 | bool operator>=(const pair<T1, T2>& x, const pair<T1, T2>& y)
114 | {
115 |     return !(x < y);
116 | }
117 | 
118 | template<typename T1, typename T2>
119 | pair<T1, T2> make_pair(const T1& t1, const T2& t2)
120 | {
121 |     return pair<T1, T2>(t1, t2);
122 | }
123 | 
124 | template<typename T>
125 | struct node
126 | {
127 |     node* prev_;
128 |     node* next_;
129 |     T data_;
130 | 
131 |     node()
132 |         : prev_(0), next_(0), data_()
133 |     {
134 |     }
135 |     node(const T& t)
136 |         : prev_(0), next_(0), data_(t)
137 |     {
138 |     }
139 | };
140 | 
141 | template<typename T>
142 | struct iter_list
143 | {
144 |     iter_list()
145 |         : curr_(0)
146 |     {
147 |     }
148 |     iter_list(node<T>* n)
149 |         : curr_(n)
150 |     {
151 |     }
152 |     iter_list(const iter_list& i)
153 |         : curr_(i.curr_)
154 |     {
155 |     }
156 |     ~iter_list()
157 |     {
158 |     }
159 | 
160 |     iter_list& operator=(const iter_list& i)
161 |     {
162 |         curr_ = i.curr_;
163 |         return *this;
164 |     }
165 | 
166 |     T& operator*()
167 |     {
168 |         return curr_->data_;
169 |     }
170 |     T* operator->()
171 |     {
172 |         return &(curr_->data_);
173 |     }
174 | 
175 |     bool operator==(const iter_list& i)
176 |     {
177 |         return curr_ == i.curr_;
178 |     }
179 |     bool operator!=(const iter_list& i)
180 |     {
181 |         return curr_ != i.curr_;
182 |     }
183 | 
184 |     iter_list& operator++()
185 |     {
186 |         curr_ = curr_->next_;
187 |         return *this;
188 |     }
189 |     iter_list& operator--()
190 |     {
191 |         curr_ = curr_->prev_;
192 |         return *this;
193 |     }
194 | 
195 |     node<T>* curr_;
196 | };
197 | 
198 | template<typename T>
199 | struct list
200 | {
201 |     typedef iter_list<T> iterator;
202 | 
203 |     list()
204 |     {
205 |         head_ = new node<T>();
206 |         tail_ = head_;
207 |         count_ = 0;
208 |     }
209 |     ~list()
210 |     {
211 |         clear();
212 |         delete head_;
213 |     }
214 |     list(const list& l)
215 |     {
216 |         head_ = new node<T>();
217 |         tail_ = head_;
218 |         count_ = 0;
219 | 
220 |         for (iter_list<T> i = l.begin(); i != l.end(); ++i)
221 |         {
222 |             push_back(*i);
223 |         }
224 |     }
225 | 
226 |     list& operator=(const list& l)
227 |     {
228 |         if (this == &l)
229 |         {
230 |             return *this;
231 |         }
232 |         clear();
233 | 
234 |         for (iter_list<T> i = l.begin(); i != l.end(); ++i)
235 |         {
236 |             push_back(*i);
237 |         }
238 |         return *this;
239 |     }
240 | 
241 |     void clear()
242 |     {
243 |         while (count_ > 0)
244 |         {
245 |             pop_front();
246 |         }
247 |     }
248 | 
249 |     void pop_front()
250 |     {
251 |         if (count_ > 0)
252 |         {
253 |             head_ = head_->next_;
254 |             delete head_->prev_;
255 |             head_->prev_ = 0;
256 |             --count_;
257 |         }
258 |     }
259 | 
260 |     size_t size() const
261 |     {
262 |         return count_;
263 |     }
264 |     iter_list<T> begin() const
265 |     {
266 |         return iter_list<T>(head_);
267 |     }
268 |     iter_list<T> end() const
269 |     {
270 |         return iter_list<T>(tail_);
271 |     }
272 |     bool empty() const
273 |     {
274 |         return count_ == 0;
275 |     }
276 | 
277 |     void push_back(const T& t)
278 |     {
279 |         if (count_ == 0)
280 |         {
281 |             head_ = new node<T>(t);
282 |             head_->prev_ = 0;
283 |             head_->next_ = tail_;
284 |             tail_->prev_ = head_;
285 |             count_ = 1;
286 |         }
287 |         else
288 |         {
289 |             node<T>* temp = new node<T>(t);
290 |             temp->prev_ = tail_->prev_;
291 |             temp->next_ = tail_;
292 |             tail_->prev_->next_ = temp;
293 |             tail_->prev_ = temp;
294 |             ++count_;
295 |         }
296 |     }
297 | 
298 |     iter_list<T> erase(iter_list<T> pos)
299 |     {
300 |         if (pos != end())
301 |         {
302 |             node<T>* temp = pos.curr_;
303 |             if (temp == head_)
304 |             {
305 |                 ++pos;
306 |                 temp->next_->prev_ = 0;
307 |                 head_ = temp->next_;
308 |             }
309 |             else
310 |             {
311 |                 --pos;
312 |                 temp->next_->prev_ = temp->prev_;
313 |                 temp->prev_->next_ = temp->next_;
314 |                 ++pos;
315 |             }
316 |             delete temp;
317 |             --count_;
318 |         }
319 |         return pos;
320 |     }
321 | 
322 | protected:
323 |     node<T>* head_;
324 |     node<T>* tail_;
325 |     size_t count_;
326 | };
327 | 
328 | template<typename T>
329 | struct greater
330 | {
331 |     bool operator()(const T& x, const T& y) const
332 |     {
333 |         return (x > y);
334 |     }
335 | };
336 | 
337 | template<typename T>
338 | struct less
339 | {
340 |     bool operator()(const T& x, const T& y) const
341 |     {
342 |         return (x < y);
343 |     }
344 | };
345 | 
346 | template<typename RandomAccessIter, typename Compare>
347 | void partial_sort(RandomAccessIter first, RandomAccessIter middle, RandomAccessIter last, Compare comp)
348 | {
349 |     // [TODO] heap sort should be used here, but we simply use bubble sort now
350 |     for (RandomAccessIter i = first; i < middle; ++i)
351 |     {
352 |         // bubble sort
353 |         for (RandomAccessIter j = last - 1; j > first; --j)
354 |         {
355 |             if (comp(*j, *(j - 1)))
356 |             {
357 |                 swap(*j, *(j - 1));
358 |             }
359 |         }
360 |     }
361 | }
362 | 
363 | template<typename T>
364 | struct vector
365 | {
366 |     vector()
367 |         : data_(0), size_(0), capacity_(0)
368 |     {
369 |     }
370 |     vector(const size_t new_size, const T& value = T())
371 |         : data_(0), size_(0), capacity_(0)
372 |     {
373 |         resize(new_size, value);
374 |     }
375 |     ~vector()
376 |     {
377 |         clear();
378 |     }
379 |     vector(const vector& v)
380 |         : data_(0), size_(0), capacity_(0)
381 |     {
382 |         resize(v.size());
383 |         for (size_t i = 0; i < size_; i++)
384 |         {
385 |             data_[i] = v.data_[i];
386 |         }
387 |     }
388 | 
389 |     vector& operator=(const vector& v)
390 |     {
391 |         if (this == &v)
392 |         {
393 |             return *this;
394 |         }
395 |         resize(0);
396 |         resize(v.size());
397 |         for (size_t i = 0; i < size_; i++)
398 |         {
399 |             data_[i] = v.data_[i];
400 |         }
401 |         return *this;
402 |     }
403 | 
404 |     void resize(const size_t new_size, const T& value = T())
405 |     {
406 |         try_alloc(new_size);
407 |         if (new_size > size_)
408 |         {
409 |             for (size_t i = size_; i < new_size; i++)
410 |             {
411 |                 new (&data_[i]) T(value);
412 |             }
413 |         }
414 |         else if (new_size < size_)
415 |         {
416 |             for (size_t i = new_size; i < size_; i++)
417 |             {
418 |                 data_[i].~T();
419 |             }
420 |         }
421 |         size_ = new_size;
422 |     }
423 | 
424 |     void clear()
425 |     {
426 |         for (size_t i = 0; i < size_; i++)
427 |         {
428 |             data_[i].~T();
429 |         }
430 |         delete[](char*) data_;
431 |         data_ = 0;
432 |         size_ = 0;
433 |         capacity_ = 0;
434 |     }
435 | 
436 |     T* data() const
437 |     {
438 |         return data_;
439 |     }
440 |     size_t size() const
441 |     {
442 |         return size_;
443 |     }
444 |     T& operator[](size_t i) const
445 |     {
446 |         return data_[i];
447 |     }
448 |     T* begin() const
449 |     {
450 |         return &data_[0];
451 |     }
452 |     T* end() const
453 |     {
454 |         return &data_[size_];
455 |     }
456 |     bool empty() const
457 |     {
458 |         return size_ == 0;
459 |     }
460 | 
461 |     void push_back(const T& t)
462 |     {
463 |         try_alloc(size_ + 1);
464 |         new (&data_[size_]) T(t);
465 |         size_++;
466 |     }
467 | 
468 |     void insert(T* pos, T* b, T* e)
469 |     {
470 |         vector* v = 0;
471 |         if (b >= begin() && b < end())
472 |         {
473 |             //the same vector
474 |             v = new vector(*this);
475 |             b = v->begin() + (b - begin());
476 |             e = v->begin() + (e - begin());
477 |         }
478 |         size_t diff = pos - begin();
479 |         try_alloc(size_ + (e - b));
480 |         pos = begin() + diff;
481 |         memmove(pos + (e - b), pos, (end() - pos) * sizeof(T));
482 |         size_t len = e - b;
483 |         size_ += len;
484 |         for (size_t i = 0; i < len; i++)
485 |         {
486 |             *pos = *b;
487 |             pos++;
488 |             b++;
489 |         }
490 |         delete v;
491 |     }
492 | 
493 |     T* erase(T* pos)
494 |     {
495 |         pos->~T();
496 |         memmove(pos, pos + 1, (end() - pos - 1) * sizeof(T));
497 |         size_--;
498 |         return pos;
499 |     }
500 | 
501 | protected:
502 |     T* data_;
503 |     size_t size_;
504 |     size_t capacity_;
505 |     void try_alloc(size_t new_size)
506 |     {
507 |         if (new_size * 3 / 2 > capacity_ / 2)
508 |         {
509 |             capacity_ = new_size * 2;
510 |             T* new_data = (T*)new char[capacity_ * sizeof(T)];
511 |             memset(new_data, 0, capacity_ * sizeof(T));
512 |             if (data_)
513 |             {
514 |                 memmove(new_data, data_, sizeof(T) * size_);
515 |                 delete[](char*) data_;
516 |             }
517 |             data_ = new_data;
518 |         }
519 |     }
520 | };
521 | 
522 | struct string : public vector<char>
523 | {
524 |     string()
525 |     {
526 |     }
527 |     string(const char* str)
528 |     {
529 |         size_t len = strlen(str);
530 |         resize(len);
531 |         memcpy(data_, str, len);
532 |     }
533 |     const char* c_str() const
534 |     {
535 |         return (const char*)data_;
536 |     }
537 |     bool operator==(const string& str2) const
538 |     {
539 |         return strcmp(data_, str2.data_) == 0;
540 |     }
541 |     bool operator==(const char* str2) const
542 |     {
543 |         return strcmp(data_, str2) == 0;
544 |     }
545 |     bool operator!=(const char* str2) const
546 |     {
547 |         return strcmp(data_, str2) != 0;
548 |     }
549 |     string& operator+=(const string& str1)
550 |     {
551 |         insert(end(), str1.begin(), str1.end());
552 |         return *this;
553 |     }
554 | };
555 | 
556 | inline string operator+(const string& str1, const string& str2)
557 | {
558 |     string str(str1);
559 |     str.insert(str.end(), str2.begin(), str2.end());
560 |     return str;
561 | }
562 | 
563 | } // namespace std
564 | 
565 | #endif // NCNN_SIMPLESTL_H
566 | 


--------------------------------------------------------------------------------
/lib/cmake/ncnn/ncnn-release.cmake:
--------------------------------------------------------------------------------
 1 | #----------------------------------------------------------------
 2 | # Generated CMake target import file for configuration "Release".
 3 | #----------------------------------------------------------------
 4 | 
 5 | # Commands may need to know the format version.
 6 | set(CMAKE_IMPORT_FILE_VERSION 1)
 7 | 
 8 | # Import target "ncnn" for configuration "Release"
 9 | set_property(TARGET ncnn APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
10 | set_target_properties(ncnn PROPERTIES
11 |   IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX"
12 |   IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libncnn.a"
13 |   )
14 | 
15 | list(APPEND _IMPORT_CHECK_TARGETS ncnn )
16 | list(APPEND _IMPORT_CHECK_FILES_FOR_ncnn "${_IMPORT_PREFIX}/lib/libncnn.a" )
17 | 
18 | # Commands beyond this point should not need to know the version.
19 | set(CMAKE_IMPORT_FILE_VERSION)
20 | 


--------------------------------------------------------------------------------
/lib/cmake/ncnn/ncnn.cmake:
--------------------------------------------------------------------------------
  1 | # Generated by CMake
  2 | 
  3 | if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.5)
  4 |    message(FATAL_ERROR "CMake >= 2.6.0 required")
  5 | endif()
  6 | cmake_policy(PUSH)
  7 | cmake_policy(VERSION 2.6)
  8 | #----------------------------------------------------------------
  9 | # Generated CMake target import file.
 10 | #----------------------------------------------------------------
 11 | 
 12 | # Commands may need to know the format version.
 13 | set(CMAKE_IMPORT_FILE_VERSION 1)
 14 | 
 15 | # Protect against multiple inclusion, which would fail when already imported targets are added once more.
 16 | set(_targetsDefined)
 17 | set(_targetsNotDefined)
 18 | set(_expectedTargets)
 19 | foreach(_expectedTarget ncnn)
 20 |   list(APPEND _expectedTargets ${_expectedTarget})
 21 |   if(NOT TARGET ${_expectedTarget})
 22 |     list(APPEND _targetsNotDefined ${_expectedTarget})
 23 |   endif()
 24 |   if(TARGET ${_expectedTarget})
 25 |     list(APPEND _targetsDefined ${_expectedTarget})
 26 |   endif()
 27 | endforeach()
 28 | if("${_targetsDefined}" STREQUAL "${_expectedTargets}")
 29 |   unset(_targetsDefined)
 30 |   unset(_targetsNotDefined)
 31 |   unset(_expectedTargets)
 32 |   set(CMAKE_IMPORT_FILE_VERSION)
 33 |   cmake_policy(POP)
 34 |   return()
 35 | endif()
 36 | if(NOT "${_targetsDefined}" STREQUAL "")
 37 |   message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_targetsDefined}\nTargets not yet defined: ${_targetsNotDefined}\n")
 38 | endif()
 39 | unset(_targetsDefined)
 40 | unset(_targetsNotDefined)
 41 | unset(_expectedTargets)
 42 | 
 43 | 
 44 | # Compute the installation prefix relative to this file.
 45 | get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH)
 46 | get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
 47 | get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
 48 | get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
 49 | if(_IMPORT_PREFIX STREQUAL "/")
 50 |   set(_IMPORT_PREFIX "")
 51 | endif()
 52 | 
 53 | # Create imported target ncnn
 54 | add_library(ncnn STATIC IMPORTED)
 55 | 
 56 | set_target_properties(ncnn PROPERTIES
 57 |   INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include/ncnn"
 58 |   INTERFACE_LINK_LIBRARIES "OpenMP::OpenMP_CXX"
 59 |   INTERFACE_POSITION_INDEPENDENT_CODE "ON"
 60 | )
 61 | 
 62 | if(CMAKE_VERSION VERSION_LESS 2.8.12)
 63 |   message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.")
 64 | endif()
 65 | 
 66 | # Load information for each installed configuration.
 67 | get_filename_component(_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
 68 | file(GLOB CONFIG_FILES "${_DIR}/ncnn-*.cmake")
 69 | foreach(f ${CONFIG_FILES})
 70 |   include(${f})
 71 | endforeach()
 72 | 
 73 | # Cleanup temporary variables.
 74 | set(_IMPORT_PREFIX)
 75 | 
 76 | # Loop over all imported files and verify that they actually exist
 77 | foreach(target ${_IMPORT_CHECK_TARGETS} )
 78 |   foreach(file ${_IMPORT_CHECK_FILES_FOR_${target}} )
 79 |     if(NOT EXISTS "${file}" )
 80 |       message(FATAL_ERROR "The imported target \"${target}\" references the file
 81 |    \"${file}\"
 82 | but this file does not exist.  Possible reasons include:
 83 | * The file was deleted, renamed, or moved to another location.
 84 | * An install or uninstall procedure did not complete successfully.
 85 | * The installation package was faulty and contained
 86 |    \"${CMAKE_CURRENT_LIST_FILE}\"
 87 | but not all the files it references.
 88 | ")
 89 |     endif()
 90 |   endforeach()
 91 |   unset(_IMPORT_CHECK_FILES_FOR_${target})
 92 | endforeach()
 93 | unset(_IMPORT_CHECK_TARGETS)
 94 | 
 95 | # This file does not depend on other imported targets which have
 96 | # been exported from the same project but in a separate export set.
 97 | 
 98 | # Commands beyond this point should not need to know the version.
 99 | set(CMAKE_IMPORT_FILE_VERSION)
100 | cmake_policy(POP)
101 | 


--------------------------------------------------------------------------------
/lib/cmake/ncnn/ncnnConfig.cmake:
--------------------------------------------------------------------------------
 1 | set(NCNN_OPENMP ON)
 2 | set(NCNN_VULKAN OFF)
 3 | set(NCNN_SYSTEM_GLSLANG ON)
 4 | 
 5 | if(NCNN_OPENMP)
 6 |     find_package(OpenMP)
 7 | endif()
 8 | 
 9 | if(NCNN_VULKAN)
10 |     find_package(Vulkan REQUIRED)
11 | 
12 |     if(NCNN_SYSTEM_GLSLANG)
13 |         set(GLSLANG_TARGET_DIR "")
14 |     else()
15 |         set(GLSLANG_TARGET_DIR "${CMAKE_CURRENT_LIST_DIR}/..")
16 |     endif(NCNN_SYSTEM_GLSLANG)
17 | 
18 |     find_package(Threads)
19 | 
20 |     include(${GLSLANG_TARGET_DIR}/OSDependentTargets.cmake)
21 |     include(${GLSLANG_TARGET_DIR}/OGLCompilerTargets.cmake)
22 |     if(EXISTS "${GLSLANG_TARGET_DIR}/HLSLTargets.cmake")
23 |         # hlsl support can be optional
24 |         include("${GLSLANG_TARGET_DIR}/HLSLTargets.cmake")
25 |     endif()
26 |     include(${GLSLANG_TARGET_DIR}/glslangTargets.cmake)
27 |     include(${GLSLANG_TARGET_DIR}/SPIRVTargets.cmake)
28 | endif(NCNN_VULKAN)
29 | 
30 | include(${CMAKE_CURRENT_LIST_DIR}/ncnn.cmake)
31 | 


--------------------------------------------------------------------------------
/lib/libncnn.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sean-wade/yolov5s_ncnn_inference/8a4b7a019303f3cc243adcb7c73847234506a712/lib/libncnn.a


--------------------------------------------------------------------------------
/test_jpg/10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sean-wade/yolov5s_ncnn_inference/8a4b7a019303f3cc243adcb7c73847234506a712/test_jpg/10.jpg


--------------------------------------------------------------------------------
/test_jpg/105.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sean-wade/yolov5s_ncnn_inference/8a4b7a019303f3cc243adcb7c73847234506a712/test_jpg/105.jpg


--------------------------------------------------------------------------------
/test_jpg/106.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sean-wade/yolov5s_ncnn_inference/8a4b7a019303f3cc243adcb7c73847234506a712/test_jpg/106.jpg


--------------------------------------------------------------------------------
/test_jpg/107.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sean-wade/yolov5s_ncnn_inference/8a4b7a019303f3cc243adcb7c73847234506a712/test_jpg/107.jpg


--------------------------------------------------------------------------------
/yolov5.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <dirent.h>
  3 | #include <string.h>
  4 | #include <stdlib.h>
  5 | #include <fstream>
  6 | #include <chrono>
  7 | #include <algorithm>
  8 | #include <vector>
  9 | #include <opencv2/core/core.hpp>
 10 | #include <opencv2/highgui/highgui.hpp>
 11 | #include <opencv2/imgproc/imgproc.hpp>
 12 | #include <opencv2/opencv.hpp>
 13 | // #include "vulkan/vulkan.hpp"
 14 | #include "net.h"
 15 | 
 16 | using namespace std;
 17 | 
 18 | 
 19 | const int numCLasses = 2;
 20 | //const int frameWidth = 1920;
 21 | //const int frameHeight = 1080;
 22 | std::vector<std::string> classnames;
 23 | 
 24 | vector<vector<vector<int>>> anchors{{{10, 13}, {16, 30}, {33, 23}},
 25 |                                     {{30, 61}, {62, 45}, {59, 119}},
 26 |                                     {{116, 90}, {156, 198}, {373, 326}}};
 27 | vector<vector<float>> dets;
 28 | 
 29 | inline float sigmoid(float x){
 30 |     return 1.0f / (1.0f + exp(-x));
 31 | }
 32 | 
 33 | void decodeResult(const ncnn::Mat& data, int stride, vector<vector<int>> anchors, float scoreThresh, int frameWidth, int frameHeight)
 34 | {
 35 |     for (int c=0; c<data.c; c++)
 36 |     {
 37 |         const float* ptr = data.channel(c);
 38 |         for (int y=0; y<data.h; y++)
 39 |         {
 40 |             float score = sigmoid(ptr[4]);
 41 |             if (score > scoreThresh)
 42 |             {
 43 |                 vector<float> det(6);
 44 |                 det[1] = (sigmoid(ptr[0] )* 2 - 0.5 + y % (int)(640 / stride)) * stride * frameWidth / 640; //center_x
 45 |                 det[2] = (sigmoid(ptr[1]) * 2 - 0.5 + (int)(y / (640 / stride))) * stride * frameHeight / 640; //center_y
 46 |                 det[3] = pow((sigmoid(ptr[2]) * 2), 2) * anchors[c][0] * frameWidth / 640; //w
 47 |                 det[4] = pow((sigmoid(ptr[3]) * 2), 2) * anchors[c][1] * frameHeight / 640; //h
 48 | 
 49 |                 det[1] = det[1] - det[3] / 2; //left
 50 |                 det[2] = det[2] - det[4] / 2; //top
 51 |                 det[3] = det[1] + det[3]; //right
 52 |                 det[4] = det[2] + det[4]; //bottom
 53 | 
 54 |                 for (int i=5; i<numCLasses+5; i++)
 55 |                 {
 56 |                     float conf = sigmoid(ptr[i]);
 57 |                     if (conf * score > det[0])
 58 |                     {
 59 |                         det[0] = conf * score; //score
 60 |                         det[5] = i - 5; //class_id
 61 |                     }
 62 |                 }
 63 |                 dets.push_back(det);
 64 |             }
 65 |             ptr += data.w;
 66 |         }
 67 |     }
 68 |     cout << "decodeResult done" << endl;
 69 | }
 70 | 
 71 | void nonMaxSuppression(float iouThresh)
 72 | {
 73 |     int length = dets.size();
 74 |     int index = length - 1;
 75 | 
 76 |     sort(dets.begin(), dets.end());
 77 |     vector<float> areas(length);
 78 |     for (int i=0; i<length; i++)
 79 |     {
 80 |         areas[i] = (dets[i][4] - dets[i][2]) * (dets[i][3] - dets[i][1]);
 81 |     }
 82 |     
 83 |     while (index  > 0)
 84 |     {
 85 |         int i = 0;
 86 |         while (i < index)
 87 |         {
 88 |             float left = max(dets[index][1], dets[i][1]);
 89 |             float top = max(dets[index][2], dets[i][2]);
 90 |             float right = min(dets[index][3], dets[i][3]);
 91 |             float bottom = min(dets[index][4], dets[i][4]);
 92 |             float overlap = max(0.0f, right - left) * max(0.0f, bottom - top);
 93 |             if (overlap / (areas[index] + areas[i] - overlap) > iouThresh)
 94 |             {
 95 |                 areas.erase(areas.begin() + i);
 96 |                 dets.erase(dets.begin() + i);
 97 |                 index --;
 98 |             }
 99 |             else
100 |             {
101 |                 i++;
102 |             }
103 |         }
104 |         index--;
105 |     }
106 |     cout << "nonMaxSuppression done" << endl;
107 | }
108 | 
109 | bool detect(ncnn::Net& net, string img_path)
110 | {
111 |     cv::Mat frame, img;
112 |     frame = cv::imread(img_path);
113 |     if(frame.empty())
114 |     {
115 |         cout << "img is None : " << img_path << endl;
116 |         return false;
117 |     }
118 |     const auto t0 = std::chrono::system_clock::now(); 
119 |     
120 |     ncnn::Mat input, output0, output1, output2;
121 |     ncnn::Extractor extractor = net.create_extractor();
122 |     cv::resize(frame, img, cv::Size(640, 640));
123 |     cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
124 |         
125 |     cout << "resize cvtColor done......"<<endl;
126 |         
127 |     const auto t1 = std::chrono::system_clock::now();
128 |     input = ncnn::Mat::from_pixels(img.data, ncnn::Mat::PIXEL_RGB, img.cols, img.rows);
129 |     float norm[3] = {1/255.f, 1/255.f, 1/255.f};
130 |     float mean[3] = {0, 0, 0};
131 |     input.substract_mean_normalize(mean,norm);   
132 |     
133 |     extractor.input("images", input);   
134 |     extractor.extract("output", output0);    
135 |     extractor.extract("857", output1);
136 |     extractor.extract("877", output2);
137 |     
138 |     cout << "extractor.extract 877 done......"<<endl;
139 | 
140 |     const auto t2 = std::chrono::system_clock::now();
141 |     
142 |     dets.clear();
143 |     decodeResult(output0, 8,  anchors[0], 0.6, frame.cols, frame.rows);
144 |     decodeResult(output1, 16, anchors[1], 0.6, frame.cols, frame.rows);
145 |     decodeResult(output2, 32, anchors[2], 0.6, frame.cols, frame.rows);
146 |     nonMaxSuppression(0.5);
147 | 
148 |     const auto t3 = std::chrono::system_clock::now();
149 | 
150 |     for (int i=0; i<dets.size(); i++)
151 |     {
152 |               
153 |         float left = dets[i][1];
154 |         float top = dets[i][2];
155 |         float right = dets[i][3];
156 |         float bottom = dets[i][4];
157 |         float score = dets[i][0];
158 |         int classID = dets[i][5];
159 |             
160 |         cout << "dets[" << i << "]: " << score << "," <<classID << "," <<left << "," <<top << "," <<right << "," << bottom <<endl;
161 |             
162 |         cv::rectangle(frame, cv::Rect(left, top, (right - left), (bottom - top)), cv::Scalar(255, 255, 0), 2);
163 |         cv::putText(frame,
164 |                     classnames[classID] + ": " + cv::format("%.2f", score),
165 |                     cv::Point(left, top),
166 |                     cv::FONT_HERSHEY_SIMPLEX, (right - left) / 200, cv::Scalar(255, 255, 0), 2);
167 |     }  
168 |     //cv::imshow("", frame);
169 |     //if(cv::waitKey(1)== 27) break;
170 |     int pos1 = img_path.find_last_of('/');
171 |     string name = img_path.substr(pos1 + 1);
172 |     cv::imwrite("result/" + name, frame);
173 |     const auto t4 = std::chrono::system_clock::now();
174 |     
175 |     cout << "Inference Time: " << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() * 1e-3 << "ms  ";
176 |     cout << "Post-processing Time: " << std::chrono::duration_cast<std::chrono::microseconds>(t3 - t2).count() * 1e-3 << "ms  ";
177 |     cout << "Total Time: " << std::chrono::duration_cast<std::chrono::microseconds>(t4 - t0).count() * 1e-3 << "ms" << endl;
178 |     
179 |     return true;
180 | }
181 | 
182 | int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &file_names) 
183 | {
184 |     DIR *p_dir = opendir(p_dir_name);
185 |     if (p_dir == nullptr) 
186 |     {
187 |         return -1;
188 |     }
189 | 
190 |     struct dirent* p_file = nullptr;
191 |     while ((p_file = readdir(p_dir)) != nullptr) 
192 |     {
193 |         if (strcmp(p_file->d_name, ".") != 0 && strcmp(p_file->d_name, "..") != 0) 
194 |         {
195 |             std::string cur_file_name(p_file->d_name);
196 |             file_names.push_back(cur_file_name);
197 |         }
198 |     }
199 | 
200 |     closedir(p_dir);
201 |     return 0;
202 | }
203 | 
204 | int main0()
205 | {
206 |     std::ifstream f("/zhanghao/code/yolov5_to_ncnn/ncnn/yw_5s/voc.names");
207 |     std::string name = "";
208 |     while (std::getline(f, name))
209 |     {
210 |         classnames.push_back(name);
211 |     }
212 | 
213 |     ncnn::Net net;
214 |     net.opt.num_threads=12;
215 | //     net.opt.use_vulkan_compute = 1;
216 | //     net.set_vulkan_device(0);
217 | 
218 |     net.load_param("/zhanghao/code/yolov5_to_ncnn/ncnn/yw_5s/yw_5s.param");
219 |     net.load_model("/zhanghao/code/yolov5_to_ncnn/ncnn/yw_5s/yw_5s.bin");
220 |     
221 |     cout << "load param && model done......"<<endl;
222 |     
223 |     for(int j=0;j<100;j++)
224 |         detect(net, "/zhanghao/code/yolov5_to_ncnn/ncnn/test_jpg/100.jpg");
225 | 
226 |     return 0;
227 | }
228 | 
229 | int main(int argc, char** argv)
230 | {
231 |     if (argc != 2)
232 |     {
233 |         fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
234 |         return -1;
235 |     }
236 |     
237 |     std::ifstream f("/zhanghao/code/yolov5_to_ncnn/ncnn/yw_5s/voc.names");
238 |     std::string name = "";
239 |     while (std::getline(f, name))
240 |     {
241 |         classnames.push_back(name);
242 |     }
243 |     ncnn::Net net;
244 |     net.opt.num_threads=12;
245 | //     net.opt.use_vulkan_compute = 1;
246 | //     net.set_vulkan_device(0);
247 | 
248 |     net.load_param("/zhanghao/code/yolov5_to_ncnn/ncnn/yw_5s/yw_5s.param");
249 |     net.load_model("/zhanghao/code/yolov5_to_ncnn/ncnn/yw_5s/yw_5s.bin");
250 |     
251 |     cout << "load param && model done......"<<endl;
252 |     
253 |     const char* imagepath = argv[1];
254 |     std::vector<std::string> file_names;
255 |     if (read_files_in_dir(imagepath, file_names) < 0) 
256 |     {
257 |         printf("read_files_in_dir failed.");
258 |         return -1;
259 |     }
260 |     
261 |     for (int f = 0; f < (int)file_names.size(); f++) 
262 |     {
263 |         detect(net, std::string(imagepath) + "/" + file_names[f]);
264 |     }
265 | 
266 |     return 0;
267 | }


--------------------------------------------------------------------------------
/yw_5s/voc.names:
--------------------------------------------------------------------------------
1 | yw_gkxfw
2 | yw_nc


--------------------------------------------------------------------------------
/yw_5s/yw_5s.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sean-wade/yolov5s_ncnn_inference/8a4b7a019303f3cc243adcb7c73847234506a712/yw_5s/yw_5s.bin


--------------------------------------------------------------------------------
/yw_5s/yw_5s.param:
--------------------------------------------------------------------------------
  1 | 7767517
  2 | 197 224
  3 | Input            images                   0 1 images
  4 | Split            splitncnn_input0         1 4 images images_splitncnn_0 images_splitncnn_1 images_splitncnn_2 images_splitncnn_3
  5 | Interp           Resize_28                1 1 images_splitncnn_3 195 0=1 1=1.000000e+00 2=1.000000e+00 3=320 4=320 6=0
  6 | Interp           Resize_57                1 1 images_splitncnn_2 224 0=1 1=1.000000e+00 2=1.000000e+00 3=320 4=320 6=0
  7 | Interp           Resize_86                1 1 images_splitncnn_1 253 0=1 1=1.000000e+00 2=1.000000e+00 3=320 4=320 6=0
  8 | Interp           Resize_115               1 1 images_splitncnn_0 282 0=1 1=1.000000e+00 2=1.000000e+00 3=320 4=320 6=0
  9 | Concat           Concat_116               4 1 195 224 253 282 283 0=0
 10 | Convolution      Conv_117                 1 1 283 284 0=32 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=3456
 11 | HardSwish        Div_125                  1 1 284 292 0=1.666667e-01 1=5.000000e-01
 12 | Convolution      Conv_126                 1 1 292 293 0=64 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=1 6=18432
 13 | HardSwish        Div_134                  1 1 293 301 0=1.666667e-01 1=5.000000e-01
 14 | Split            splitncnn_0              1 2 301 301_splitncnn_0 301_splitncnn_1
 15 | Convolution      Conv_135                 1 1 301_splitncnn_1 302 0=32 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=2048
 16 | HardSwish        Div_143                  1 1 302 310 0=1.666667e-01 1=5.000000e-01
 17 | Split            splitncnn_1              1 2 310 310_splitncnn_0 310_splitncnn_1
 18 | Convolution      Conv_144                 1 1 310_splitncnn_1 311 0=32 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=1024
 19 | HardSwish        Div_152                  1 1 311 319 0=1.666667e-01 1=5.000000e-01
 20 | Convolution      Conv_153                 1 1 319 320 0=32 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=9216
 21 | HardSwish        Div_161                  1 1 320 328 0=1.666667e-01 1=5.000000e-01
 22 | BinaryOp         Add_162                  2 1 310_splitncnn_0 328 329 0=0
 23 | Convolution      Conv_163                 1 1 329 330 0=32 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=1024
 24 | Convolution      Conv_164                 1 1 301_splitncnn_0 331 0=32 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=2048
 25 | Concat           Concat_165               2 1 330 331 332 0=0
 26 | BatchNorm        BatchNormalization_166   1 1 332 333 0=64
 27 | ReLU             LeakyRelu_167            1 1 333 334 0=1.000000e-01
 28 | Convolution      Conv_168                 1 1 334 335 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=4096
 29 | HardSwish        Div_176                  1 1 335 343 0=1.666667e-01 1=5.000000e-01
 30 | Convolution      Conv_177                 1 1 343 344 0=128 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=1 6=73728
 31 | HardSwish        Div_185                  1 1 344 352 0=1.666667e-01 1=5.000000e-01
 32 | Split            splitncnn_2              1 2 352 352_splitncnn_0 352_splitncnn_1
 33 | Convolution      Conv_186                 1 1 352_splitncnn_1 353 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=8192
 34 | HardSwish        Div_194                  1 1 353 361 0=1.666667e-01 1=5.000000e-01
 35 | Split            splitncnn_3              1 2 361 361_splitncnn_0 361_splitncnn_1
 36 | Convolution      Conv_195                 1 1 361_splitncnn_1 362 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=4096
 37 | HardSwish        Div_203                  1 1 362 370 0=1.666667e-01 1=5.000000e-01
 38 | Convolution      Conv_204                 1 1 370 371 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864
 39 | HardSwish        Div_212                  1 1 371 379 0=1.666667e-01 1=5.000000e-01
 40 | BinaryOp         Add_213                  2 1 361_splitncnn_0 379 380 0=0
 41 | Split            splitncnn_4              1 2 380 380_splitncnn_0 380_splitncnn_1
 42 | Convolution      Conv_214                 1 1 380_splitncnn_1 381 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=4096
 43 | HardSwish        Div_222                  1 1 381 389 0=1.666667e-01 1=5.000000e-01
 44 | Convolution      Conv_223                 1 1 389 390 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864
 45 | HardSwish        Div_231                  1 1 390 398 0=1.666667e-01 1=5.000000e-01
 46 | BinaryOp         Add_232                  2 1 380_splitncnn_0 398 399 0=0
 47 | Split            splitncnn_5              1 2 399 399_splitncnn_0 399_splitncnn_1
 48 | Convolution      Conv_233                 1 1 399_splitncnn_1 400 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=4096
 49 | HardSwish        Div_241                  1 1 400 408 0=1.666667e-01 1=5.000000e-01
 50 | Convolution      Conv_242                 1 1 408 409 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864
 51 | HardSwish        Div_250                  1 1 409 417 0=1.666667e-01 1=5.000000e-01
 52 | BinaryOp         Add_251                  2 1 399_splitncnn_0 417 418 0=0
 53 | Convolution      Conv_252                 1 1 418 419 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=4096
 54 | Convolution      Conv_253                 1 1 352_splitncnn_0 420 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=8192
 55 | Concat           Concat_254               2 1 419 420 421 0=0
 56 | BatchNorm        BatchNormalization_255   1 1 421 422 0=128
 57 | ReLU             LeakyRelu_256            1 1 422 423 0=1.000000e-01
 58 | Convolution      Conv_257                 1 1 423 424 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384
 59 | HardSwish        Div_265                  1 1 424 432 0=1.666667e-01 1=5.000000e-01
 60 | Split            splitncnn_6              1 2 432 432_splitncnn_0 432_splitncnn_1
 61 | Convolution      Conv_266                 1 1 432_splitncnn_1 433 0=256 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=1 6=294912
 62 | HardSwish        Div_274                  1 1 433 441 0=1.666667e-01 1=5.000000e-01
 63 | Split            splitncnn_7              1 2 441 441_splitncnn_0 441_splitncnn_1
 64 | Convolution      Conv_275                 1 1 441_splitncnn_1 442 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=32768
 65 | HardSwish        Div_283                  1 1 442 450 0=1.666667e-01 1=5.000000e-01
 66 | Split            splitncnn_8              1 2 450 450_splitncnn_0 450_splitncnn_1
 67 | Convolution      Conv_284                 1 1 450_splitncnn_1 451 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384
 68 | HardSwish        Div_292                  1 1 451 459 0=1.666667e-01 1=5.000000e-01
 69 | Convolution      Conv_293                 1 1 459 460 0=128 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=147456
 70 | HardSwish        Div_301                  1 1 460 468 0=1.666667e-01 1=5.000000e-01
 71 | BinaryOp         Add_302                  2 1 450_splitncnn_0 468 469 0=0
 72 | Split            splitncnn_9              1 2 469 469_splitncnn_0 469_splitncnn_1
 73 | Convolution      Conv_303                 1 1 469_splitncnn_1 470 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384
 74 | HardSwish        Div_311                  1 1 470 478 0=1.666667e-01 1=5.000000e-01
 75 | Convolution      Conv_312                 1 1 478 479 0=128 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=147456
 76 | HardSwish        Div_320                  1 1 479 487 0=1.666667e-01 1=5.000000e-01
 77 | BinaryOp         Add_321                  2 1 469_splitncnn_0 487 488 0=0
 78 | Split            splitncnn_10             1 2 488 488_splitncnn_0 488_splitncnn_1
 79 | Convolution      Conv_322                 1 1 488_splitncnn_1 489 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384
 80 | HardSwish        Div_330                  1 1 489 497 0=1.666667e-01 1=5.000000e-01
 81 | Convolution      Conv_331                 1 1 497 498 0=128 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=147456
 82 | HardSwish        Div_339                  1 1 498 506 0=1.666667e-01 1=5.000000e-01
 83 | BinaryOp         Add_340                  2 1 488_splitncnn_0 506 507 0=0
 84 | Convolution      Conv_341                 1 1 507 508 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=16384
 85 | Convolution      Conv_342                 1 1 441_splitncnn_0 509 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=32768
 86 | Concat           Concat_343               2 1 508 509 510 0=0
 87 | BatchNorm        BatchNormalization_344   1 1 510 511 0=256
 88 | ReLU             LeakyRelu_345            1 1 511 512 0=1.000000e-01
 89 | Convolution      Conv_346                 1 1 512 513 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=65536
 90 | HardSwish        Div_354                  1 1 513 521 0=1.666667e-01 1=5.000000e-01
 91 | Split            splitncnn_11             1 2 521 521_splitncnn_0 521_splitncnn_1
 92 | Convolution      Conv_355                 1 1 521_splitncnn_1 522 0=512 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=1 6=1179648
 93 | HardSwish        Div_363                  1 1 522 530 0=1.666667e-01 1=5.000000e-01
 94 | Convolution      Conv_364                 1 1 530 531 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=131072
 95 | HardSwish        Div_372                  1 1 531 539 0=1.666667e-01 1=5.000000e-01
 96 | Split            splitncnn_12             1 4 539 539_splitncnn_0 539_splitncnn_1 539_splitncnn_2 539_splitncnn_3
 97 | Pooling          MaxPool_373              1 1 539_splitncnn_3 540 0=0 1=5 11=5 2=1 12=1 3=2 13=2 14=2 15=2 5=1
 98 | Pooling          MaxPool_374              1 1 539_splitncnn_2 541 0=0 1=9 11=9 2=1 12=1 3=4 13=4 14=4 15=4 5=1
 99 | Pooling          MaxPool_375              1 1 539_splitncnn_1 542 0=0 1=13 11=13 2=1 12=1 3=6 13=6 14=6 15=6 5=1
100 | Concat           Concat_376               4 1 539_splitncnn_0 540 541 542 543 0=0
101 | Convolution      Conv_377                 1 1 543 544 0=512 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=524288
102 | HardSwish        Div_385                  1 1 544 552 0=1.666667e-01 1=5.000000e-01
103 | Split            splitncnn_13             1 2 552 552_splitncnn_0 552_splitncnn_1
104 | Convolution      Conv_386                 1 1 552_splitncnn_1 553 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=131072
105 | HardSwish        Div_394                  1 1 553 561 0=1.666667e-01 1=5.000000e-01
106 | Convolution      Conv_395                 1 1 561 562 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=65536
107 | HardSwish        Div_403                  1 1 562 570 0=1.666667e-01 1=5.000000e-01
108 | Convolution      Conv_404                 1 1 570 571 0=256 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=589824
109 | HardSwish        Div_412                  1 1 571 579 0=1.666667e-01 1=5.000000e-01
110 | Convolution      Conv_413                 1 1 579 580 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=65536
111 | Convolution      Conv_414                 1 1 552_splitncnn_0 581 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=131072
112 | Concat           Concat_415               2 1 580 581 582 0=0
113 | BatchNorm        BatchNormalization_416   1 1 582 583 0=512
114 | ReLU             LeakyRelu_417            1 1 583 584 0=1.000000e-01
115 | Convolution      Conv_418                 1 1 584 585 0=512 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=262144
116 | HardSwish        Div_426                  1 1 585 593 0=1.666667e-01 1=5.000000e-01
117 | Convolution      Conv_427                 1 1 593 594 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=131072
118 | HardSwish        Div_435                  1 1 594 602 0=1.666667e-01 1=5.000000e-01
119 | Split            splitncnn_14             1 2 602 602_splitncnn_0 602_splitncnn_1
120 | Interp           Resize_437               1 1 602_splitncnn_1 612 0=1 1=2.000000e+00 2=2.000000e+00 3=0 4=0 6=0
121 | Concat           Concat_438               2 1 612 521_splitncnn_0 613 0=0
122 | Split            splitncnn_15             1 2 613 613_splitncnn_0 613_splitncnn_1
123 | Convolution      Conv_439                 1 1 613_splitncnn_1 614 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=65536
124 | HardSwish        Div_447                  1 1 614 622 0=1.666667e-01 1=5.000000e-01
125 | Convolution      Conv_448                 1 1 622 623 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384
126 | HardSwish        Div_456                  1 1 623 631 0=1.666667e-01 1=5.000000e-01
127 | Convolution      Conv_457                 1 1 631 632 0=128 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=147456
128 | HardSwish        Div_465                  1 1 632 640 0=1.666667e-01 1=5.000000e-01
129 | Convolution      Conv_466                 1 1 640 641 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=16384
130 | Convolution      Conv_467                 1 1 613_splitncnn_0 642 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=65536
131 | Concat           Concat_468               2 1 641 642 643 0=0
132 | BatchNorm        BatchNormalization_469   1 1 643 644 0=256
133 | ReLU             LeakyRelu_470            1 1 644 645 0=1.000000e-01
134 | Convolution      Conv_471                 1 1 645 646 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=65536
135 | HardSwish        Div_479                  1 1 646 654 0=1.666667e-01 1=5.000000e-01
136 | Convolution      Conv_480                 1 1 654 655 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=32768
137 | HardSwish        Div_488                  1 1 655 663 0=1.666667e-01 1=5.000000e-01
138 | Split            splitncnn_16             1 2 663 663_splitncnn_0 663_splitncnn_1
139 | Interp           Resize_490               1 1 663_splitncnn_1 673 0=1 1=2.000000e+00 2=2.000000e+00 3=0 4=0 6=0
140 | Concat           Concat_491               2 1 673 432_splitncnn_0 674 0=0
141 | Split            splitncnn_17             1 2 674 674_splitncnn_0 674_splitncnn_1
142 | Convolution      Conv_492                 1 1 674_splitncnn_1 675 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384
143 | HardSwish        Div_500                  1 1 675 683 0=1.666667e-01 1=5.000000e-01
144 | Convolution      Conv_501                 1 1 683 684 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=4096
145 | HardSwish        Div_509                  1 1 684 692 0=1.666667e-01 1=5.000000e-01
146 | Convolution      Conv_510                 1 1 692 693 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864
147 | HardSwish        Div_518                  1 1 693 701 0=1.666667e-01 1=5.000000e-01
148 | Convolution      Conv_519                 1 1 701 702 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=4096
149 | Convolution      Conv_520                 1 1 674_splitncnn_0 703 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=16384
150 | Concat           Concat_521               2 1 702 703 704 0=0
151 | BatchNorm        BatchNormalization_522   1 1 704 705 0=128
152 | ReLU             LeakyRelu_523            1 1 705 706 0=1.000000e-01
153 | Convolution      Conv_524                 1 1 706 707 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384
154 | HardSwish        Div_532                  1 1 707 715 0=1.666667e-01 1=5.000000e-01
155 | Split            splitncnn_18             1 2 715 715_splitncnn_0 715_splitncnn_1
156 | Convolution      Conv_533                 1 1 715_splitncnn_1 716 0=128 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=1 6=147456
157 | HardSwish        Div_541                  1 1 716 724 0=1.666667e-01 1=5.000000e-01
158 | Concat           Concat_542               2 1 724 663_splitncnn_0 725 0=0
159 | Split            splitncnn_19             1 2 725 725_splitncnn_0 725_splitncnn_1
160 | Convolution      Conv_543                 1 1 725_splitncnn_1 726 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=32768
161 | HardSwish        Div_551                  1 1 726 734 0=1.666667e-01 1=5.000000e-01
162 | Convolution      Conv_552                 1 1 734 735 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384
163 | HardSwish        Div_560                  1 1 735 743 0=1.666667e-01 1=5.000000e-01
164 | Convolution      Conv_561                 1 1 743 744 0=128 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=147456
165 | HardSwish        Div_569                  1 1 744 752 0=1.666667e-01 1=5.000000e-01
166 | Convolution      Conv_570                 1 1 752 753 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=16384
167 | Convolution      Conv_571                 1 1 725_splitncnn_0 754 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=32768
168 | Concat           Concat_572               2 1 753 754 755 0=0
169 | BatchNorm        BatchNormalization_573   1 1 755 756 0=256
170 | ReLU             LeakyRelu_574            1 1 756 757 0=1.000000e-01
171 | Convolution      Conv_575                 1 1 757 758 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=65536
172 | HardSwish        Div_583                  1 1 758 766 0=1.666667e-01 1=5.000000e-01
173 | Split            splitncnn_20             1 2 766 766_splitncnn_0 766_splitncnn_1
174 | Convolution      Conv_584                 1 1 766_splitncnn_1 767 0=256 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=1 6=589824
175 | HardSwish        Div_592                  1 1 767 775 0=1.666667e-01 1=5.000000e-01
176 | Concat           Concat_593               2 1 775 602_splitncnn_0 776 0=0
177 | Split            splitncnn_21             1 2 776 776_splitncnn_0 776_splitncnn_1
178 | Convolution      Conv_594                 1 1 776_splitncnn_1 777 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=131072
179 | HardSwish        Div_602                  1 1 777 785 0=1.666667e-01 1=5.000000e-01
180 | Convolution      Conv_603                 1 1 785 786 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=65536
181 | HardSwish        Div_611                  1 1 786 794 0=1.666667e-01 1=5.000000e-01
182 | Convolution      Conv_612                 1 1 794 795 0=256 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=589824
183 | HardSwish        Div_620                  1 1 795 803 0=1.666667e-01 1=5.000000e-01
184 | Convolution      Conv_621                 1 1 803 804 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=65536
185 | Convolution      Conv_622                 1 1 776_splitncnn_0 805 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=131072
186 | Concat           Concat_623               2 1 804 805 806 0=0
187 | BatchNorm        BatchNormalization_624   1 1 806 807 0=512
188 | ReLU             LeakyRelu_625            1 1 807 808 0=1.000000e-01
189 | Convolution      Conv_626                 1 1 808 809 0=512 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=262144
190 | HardSwish        Div_634                  1 1 809 817 0=1.666667e-01 1=5.000000e-01
191 | Convolution      Conv_635                 1 1 715_splitncnn_0 818 0=21 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=2688
192 | Reshape          Reshape_649              1 1 818 836 0=6400 1=7 2=3
193 | Permute          Transpose_650            1 1 836 output 0=1
194 | Convolution      Conv_651                 1 1 766_splitncnn_0 838 0=21 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=5376
195 | Reshape          Reshape_665              1 1 838 856 0=1600 1=7 2=3
196 | Permute          Transpose_666            1 1 856 857 0=1
197 | Convolution      Conv_667                 1 1 817 858 0=21 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=10752
198 | Reshape          Reshape_681              1 1 858 876 0=400 1=7 2=3
199 | Permute          Transpose_682            1 1 876 877 0=1
200 | 


--------------------------------------------------------------------------------