├── CMakeLists.txt ├── README.md ├── build ├── Makefile └── yolov5.test ├── common.py ├── include └── ncnn │ ├── allocator.h │ ├── benchmark.h │ ├── blob.h │ ├── c_api.h │ ├── command.h │ ├── cpu.h │ ├── datareader.h │ ├── gpu.h │ ├── layer.h │ ├── layer_shader_type.h │ ├── layer_shader_type_enum.h │ ├── layer_type.h │ ├── layer_type_enum.h │ ├── mat.h │ ├── modelbin.h │ ├── net.h │ ├── option.h │ ├── paramdict.h │ ├── pipeline.h │ ├── pipelinecache.h │ ├── platform.h │ ├── simpleocv.h │ ├── simpleomp.h │ └── simplestl.h ├── lib ├── cmake │ └── ncnn │ │ ├── ncnn-release.cmake │ │ ├── ncnn.cmake │ │ └── ncnnConfig.cmake └── libncnn.a ├── test_jpg ├── 10.jpg ├── 105.jpg ├── 106.jpg └── 107.jpg ├── yolov5.cpp └── yw_5s ├── voc.names ├── yw_5s.bin └── yw_5s.param /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.3) 2 | project(yolov5) 3 | 4 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 5 | 6 | FIND_PACKAGE(OpenMP REQUIRED) 7 | if(OPENMP_FOUND) 8 | message("OPENMP FOUND") 9 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 11 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") 12 | endif() 13 | 14 | find_package(OpenCV REQUIRED) 15 | message(STATUS "Opencv library status: ") 16 | message(STATUS "> version: ${OpenCV_VERSION} ") 17 | message(STATUS "> libraries: ${OpenCV_LIBS}") 18 | message(STATUS "> include: ${OpenCV_INCLUDE_DIRS} ") 19 | 20 | include_directories(${OpenCV_INCLUDE_DIRS}) 21 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include/ncnn) 22 | link_directories(${CMAKE_CURRENT_SOURCE_DIR}/lib) 23 | 24 | 25 | add_executable(yolov5.test yolov5.cpp) 26 | 27 | target_link_libraries(yolov5.test ${OpenCV_LIBS} ncnn) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Yolo-V5s转换ncnn并推理测试Pipeline 2 | 3 | 4 | ## 步骤 5 | 6 | ### 1、yolov5代码修改: 7 | 8 | common.py中的class Focus(nn.Module)中,切片操作改为下采样 9 | 10 | 要修改的common.py就在本文件夹下,可以替换到yolov5/models下面 11 | 12 | 修改后,加载原来的模型再训一遍,否则结果不能看(换成下采样,精度会掉几个点,我自己的数据集测试集的map掉了0.03-0.04个点左右)。 13 | 14 | 15 | ### 2、yolov5模型转换成onnx: 16 | 17 | 参考[官方链接](https://github.com/ultralytics/yolov5/issues/251) 18 | ``` 19 | python models/export.py --weights yolov5s.pt --img 640 --batch 1 # export at 640x640 with batch size 1 20 | ``` 21 | (注意代码中设置 model.model[-1].export = True) 22 | 23 | 24 | ### 3、onnx模型简化: 25 | ``` 26 | pip3 install onnx-simplifier 27 | python3 -m onnxsim yolov5s.onnx yolov5s.onnx 28 | ``` 29 | 30 | 31 | ### 4、编译安装ncnn: 32 | 33 | 参考[官方链接](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-linux) 34 | 35 | 注意其中的依赖项,尤其是protobuf,亲测2.5.0不行,需要源码升级为3.0以上,注意源码升级时下载cpp版即可 36 | ``` 37 | make && make install 38 | ``` 39 | 40 | 41 | ### 5、在ncnn中转换onnx为bin/param: 42 | 43 | ncnn安装后,在 ncnn/build/tools/onnx 下面 找到 onnx2ncnn 44 | 45 | 执行下面命令即可: 46 | ``` 47 | ./onnx2ncnn yolov5s.onnx yolov5s.param yolov5s.bin 48 | ``` 49 | 50 | 51 | ### 6、C++推理代码: 52 | 53 | 推理代码参考[本github yolov5.cpp](https://github.com/a954217436/yolov5s_ncnn_inference/blob/main/yolov5.cpp) 54 | 55 | 输出层名字可以使用netron查看,我查到的是output、857、877,不同模型可能不一样 56 | 57 | 注意CMakeLists.txt中,需要openMP,openCV,libncnn.a等 58 | 59 | ``` 60 | mkdir build && cd build 61 | cmake .. 62 | make 63 | ./yolov5.test ../test_jpg 64 | ``` 65 | 66 | 67 | ### TODO: 68 | 69 | * vulkan 推理测试 70 | * 5m/5l/5x 转换 71 | * 区分类别进行nms 72 | * 测试输入分辨率w!=h时的效果 73 | 74 | -------------------------------------------------------------------------------- /build/Makefile: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.10 3 | 4 | # Default target executed when no arguments are given to make. 5 | default_target: all 6 | 7 | .PHONY : default_target 8 | 9 | # Allow only one "make -f Makefile2" at a time, but pass parallelism. 10 | .NOTPARALLEL: 11 | 12 | 13 | #============================================================================= 14 | # Special targets provided by cmake. 15 | 16 | # Disable implicit rules so canonical targets will work. 17 | .SUFFIXES: 18 | 19 | 20 | # Remove some rules from gmake that .SUFFIXES does not remove. 21 | SUFFIXES = 22 | 23 | .SUFFIXES: .hpux_make_needs_suffix_list 24 | 25 | 26 | # Suppress display of executed commands. 27 | $(VERBOSE).SILENT: 28 | 29 | 30 | # A target that is always out of date. 31 | cmake_force: 32 | 33 | .PHONY : cmake_force 34 | 35 | #============================================================================= 36 | # Set environment variables for the build. 37 | 38 | # The shell in which to execute make rules. 39 | SHELL = /bin/sh 40 | 41 | # The CMake executable. 42 | CMAKE_COMMAND = /usr/bin/cmake 43 | 44 | # The command to remove a file. 45 | RM = /usr/bin/cmake -E remove -f 46 | 47 | # Escaping for special characters. 48 | EQUALS = = 49 | 50 | # The top-level source directory on which CMake was run. 51 | CMAKE_SOURCE_DIR = /zhanghao/code/yolov5_to_ncnn/ncnn 52 | 53 | # The top-level build directory on which CMake was run. 54 | CMAKE_BINARY_DIR = /zhanghao/code/yolov5_to_ncnn/ncnn/build 55 | 56 | #============================================================================= 57 | # Targets provided globally by CMake. 58 | 59 | # Special rule for the target rebuild_cache 60 | rebuild_cache: 61 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." 62 | /usr/bin/cmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) 63 | .PHONY : rebuild_cache 64 | 65 | # Special rule for the target rebuild_cache 66 | rebuild_cache/fast: rebuild_cache 67 | 68 | .PHONY : rebuild_cache/fast 69 | 70 | # Special rule for the target edit_cache 71 | edit_cache: 72 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..." 73 | /usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. 74 | .PHONY : edit_cache 75 | 76 | # Special rule for the target edit_cache 77 | edit_cache/fast: edit_cache 78 | 79 | .PHONY : edit_cache/fast 80 | 81 | # The main all target 82 | all: cmake_check_build_system 83 | $(CMAKE_COMMAND) -E cmake_progress_start /zhanghao/code/yolov5_to_ncnn/ncnn/build/CMakeFiles /zhanghao/code/yolov5_to_ncnn/ncnn/build/CMakeFiles/progress.marks 84 | $(MAKE) -f CMakeFiles/Makefile2 all 85 | $(CMAKE_COMMAND) -E cmake_progress_start /zhanghao/code/yolov5_to_ncnn/ncnn/build/CMakeFiles 0 86 | .PHONY : all 87 | 88 | # The main clean target 89 | clean: 90 | $(MAKE) -f CMakeFiles/Makefile2 clean 91 | .PHONY : clean 92 | 93 | # The main clean target 94 | clean/fast: clean 95 | 96 | .PHONY : clean/fast 97 | 98 | # Prepare targets for installation. 99 | preinstall: all 100 | $(MAKE) -f CMakeFiles/Makefile2 preinstall 101 | .PHONY : preinstall 102 | 103 | # Prepare targets for installation. 104 | preinstall/fast: 105 | $(MAKE) -f CMakeFiles/Makefile2 preinstall 106 | .PHONY : preinstall/fast 107 | 108 | # clear depends 109 | depend: 110 | $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 111 | .PHONY : depend 112 | 113 | #============================================================================= 114 | # Target rules for targets named yolov5.test 115 | 116 | # Build rule for target. 117 | yolov5.test: cmake_check_build_system 118 | $(MAKE) -f CMakeFiles/Makefile2 yolov5.test 119 | .PHONY : yolov5.test 120 | 121 | # fast build rule for target. 122 | yolov5.test/fast: 123 | $(MAKE) -f CMakeFiles/yolov5.test.dir/build.make CMakeFiles/yolov5.test.dir/build 124 | .PHONY : yolov5.test/fast 125 | 126 | yolov5.o: yolov5.cpp.o 127 | 128 | .PHONY : yolov5.o 129 | 130 | # target to build an object file 131 | yolov5.cpp.o: 132 | $(MAKE) -f CMakeFiles/yolov5.test.dir/build.make CMakeFiles/yolov5.test.dir/yolov5.cpp.o 133 | .PHONY : yolov5.cpp.o 134 | 135 | yolov5.i: yolov5.cpp.i 136 | 137 | .PHONY : yolov5.i 138 | 139 | # target to preprocess a source file 140 | yolov5.cpp.i: 141 | $(MAKE) -f CMakeFiles/yolov5.test.dir/build.make CMakeFiles/yolov5.test.dir/yolov5.cpp.i 142 | .PHONY : yolov5.cpp.i 143 | 144 | yolov5.s: yolov5.cpp.s 145 | 146 | .PHONY : yolov5.s 147 | 148 | # target to generate assembly for a file 149 | yolov5.cpp.s: 150 | $(MAKE) -f CMakeFiles/yolov5.test.dir/build.make CMakeFiles/yolov5.test.dir/yolov5.cpp.s 151 | .PHONY : yolov5.cpp.s 152 | 153 | # Help Target 154 | help: 155 | @echo "The following are some of the valid targets for this Makefile:" 156 | @echo "... all (the default if no target is provided)" 157 | @echo "... clean" 158 | @echo "... depend" 159 | @echo "... rebuild_cache" 160 | @echo "... yolov5.test" 161 | @echo "... edit_cache" 162 | @echo "... yolov5.o" 163 | @echo "... yolov5.i" 164 | @echo "... yolov5.s" 165 | .PHONY : help 166 | 167 | 168 | 169 | #============================================================================= 170 | # Special targets to cleanup operation of make. 171 | 172 | # Special rule to run CMake to check the build system integrity. 173 | # No rule that depends on this can have commands that come from listfiles 174 | # because they might be regenerated. 175 | cmake_check_build_system: 176 | $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 177 | .PHONY : cmake_check_build_system 178 | 179 | -------------------------------------------------------------------------------- /build/yolov5.test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sean-wade/yolov5s_ncnn_inference/8a4b7a019303f3cc243adcb7c73847234506a712/build/yolov5.test -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | # This file contains modules common to various models 2 | 3 | import math 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | from PIL import Image, ImageDraw 8 | 9 | from utils.datasets import letterbox 10 | from utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh 11 | from utils.plots import color_list 12 | 13 | 14 | def autopad(k, p=None): # kernel, padding 15 | # Pad to 'same' 16 | if p is None: 17 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 18 | return p 19 | 20 | 21 | def DWConv(c1, c2, k=1, s=1, act=True): 22 | # Depthwise convolution 23 | return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) 24 | 25 | 26 | class Conv(nn.Module): 27 | # Standard convolution 28 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 29 | super(Conv, self).__init__() 30 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) 31 | self.bn = nn.BatchNorm2d(c2) 32 | self.act = nn.Hardswish() if act else nn.Identity() 33 | 34 | def forward(self, x): 35 | return self.act(self.bn(self.conv(x))) 36 | 37 | def fuseforward(self, x): 38 | return self.act(self.conv(x)) 39 | 40 | 41 | class Bottleneck(nn.Module): 42 | # Standard bottleneck 43 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion 44 | super(Bottleneck, self).__init__() 45 | c_ = int(c2 * e) # hidden channels 46 | self.cv1 = Conv(c1, c_, 1, 1) 47 | self.cv2 = Conv(c_, c2, 3, 1, g=g) 48 | self.add = shortcut and c1 == c2 49 | 50 | def forward(self, x): 51 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 52 | 53 | 54 | class BottleneckCSP(nn.Module): 55 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 56 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 57 | super(BottleneckCSP, self).__init__() 58 | c_ = int(c2 * e) # hidden channels 59 | self.cv1 = Conv(c1, c_, 1, 1) 60 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 61 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 62 | self.cv4 = Conv(2 * c_, c2, 1, 1) 63 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 64 | self.act = nn.LeakyReLU(0.1, inplace=True) 65 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 66 | 67 | def forward(self, x): 68 | y1 = self.cv3(self.m(self.cv1(x))) 69 | y2 = self.cv2(x) 70 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 71 | 72 | 73 | class SPP(nn.Module): 74 | # Spatial pyramid pooling layer used in YOLOv3-SPP 75 | def __init__(self, c1, c2, k=(5, 9, 13)): 76 | super(SPP, self).__init__() 77 | c_ = c1 // 2 # hidden channels 78 | self.cv1 = Conv(c1, c_, 1, 1) 79 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) 80 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) 81 | 82 | def forward(self, x): 83 | x = self.cv1(x) 84 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) 85 | 86 | ''' 87 | #张昊注释 88 | class Focus(nn.Module): 89 | # Focus wh information into c-space 90 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 91 | super(Focus, self).__init__() 92 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act) 93 | 94 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 95 | y = self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) 96 | print(" ======== >>>>>>>> Focus : ", y.shape) 97 | return y 98 | ''' 99 | 100 | class Focus(nn.Module): 101 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): 102 | super(Focus, self).__init__() 103 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act) 104 | 105 | def forward(self, x): 106 | y = self.conv(torch.cat([torch.nn.functional.interpolate(x, scale_factor=0.5, recompute_scale_factor=True), 107 | torch.nn.functional.interpolate(x, scale_factor=0.5, recompute_scale_factor=True), 108 | torch.nn.functional.interpolate(x, scale_factor=0.5, recompute_scale_factor=True), 109 | torch.nn.functional.interpolate(x, scale_factor=0.5, recompute_scale_factor=True)], 1)) 110 | #print(" ======== >>>>>>>> Focus : ", y.shape) 111 | return y 112 | 113 | ''' 114 | UserWarning: 115 | The default behavior for interpolate/upsample with float scale_factor changed in 1.6.0 to align with other frameworks/libraries, 116 | 117 | and uses scale_factor directly, instead of relying on the computed output size. 118 | 119 | If you wish to keep the old behavior, please set recompute_scale_factor=True. 120 | 121 | See the documentation of nn.Upsample for details. 122 | 123 | warnings.warn("The default behavior for interpolate/upsample with float scale_factor changed " 124 | ''' 125 | 126 | class Concat(nn.Module): 127 | # Concatenate a list of tensors along dimension 128 | def __init__(self, dimension=1): 129 | super(Concat, self).__init__() 130 | self.d = dimension 131 | 132 | def forward(self, x): 133 | return torch.cat(x, self.d) 134 | 135 | 136 | class NMS(nn.Module): 137 | # Non-Maximum Suppression (NMS) module 138 | conf = 0.25 # confidence threshold 139 | iou = 0.45 # IoU threshold 140 | classes = None # (optional list) filter by class 141 | 142 | def __init__(self): 143 | super(NMS, self).__init__() 144 | 145 | def forward(self, x): 146 | return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) 147 | 148 | 149 | class autoShape(nn.Module): 150 | # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS 151 | img_size = 640 # inference size (pixels) 152 | conf = 0.25 # NMS confidence threshold 153 | iou = 0.45 # NMS IoU threshold 154 | classes = None # (optional list) filter by class 155 | 156 | def __init__(self, model): 157 | super(autoShape, self).__init__() 158 | self.model = model.eval() 159 | 160 | def forward(self, imgs, size=640, augment=False, profile=False): 161 | # supports inference from various sources. For height=720, width=1280, RGB images example inputs are: 162 | # opencv: imgs = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) 163 | # PIL: imgs = Image.open('image.jpg') # HWC x(720,1280,3) 164 | # numpy: imgs = np.zeros((720,1280,3)) # HWC 165 | # torch: imgs = torch.zeros(16,3,720,1280) # BCHW 166 | # multiple: imgs = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images 167 | 168 | p = next(self.model.parameters()) # for device and type 169 | if isinstance(imgs, torch.Tensor): # torch 170 | return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference 171 | 172 | # Pre-process 173 | if not isinstance(imgs, list): 174 | imgs = [imgs] 175 | shape0, shape1 = [], [] # image and inference shapes 176 | batch = range(len(imgs)) # batch size 177 | for i in batch: 178 | imgs[i] = np.array(imgs[i]) # to numpy 179 | if imgs[i].shape[0] < 5: # image in CHW 180 | imgs[i] = imgs[i].transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) 181 | imgs[i] = imgs[i][:, :, :3] if imgs[i].ndim == 3 else np.tile(imgs[i][:, :, None], 3) # enforce 3ch input 182 | s = imgs[i].shape[:2] # HWC 183 | shape0.append(s) # image shape 184 | g = (size / max(s)) # gain 185 | shape1.append([y * g for y in s]) 186 | shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape 187 | x = [letterbox(imgs[i], new_shape=shape1, auto=False)[0] for i in batch] # pad 188 | x = np.stack(x, 0) if batch[-1] else x[0][None] # stack 189 | x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW 190 | x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 191 | 192 | # Inference 193 | with torch.no_grad(): 194 | y = self.model(x, augment, profile)[0] # forward 195 | y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS 196 | 197 | # Post-process 198 | for i in batch: 199 | if y[i] is not None: 200 | y[i][:, :4] = scale_coords(shape1, y[i][:, :4], shape0[i]) 201 | 202 | return Detections(imgs, y, self.names) 203 | 204 | 205 | class Detections: 206 | # detections class for YOLOv5 inference results 207 | def __init__(self, imgs, pred, names=None): 208 | super(Detections, self).__init__() 209 | self.imgs = imgs # list of images as numpy arrays 210 | self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) 211 | self.names = names # class names 212 | self.xyxy = pred # xyxy pixels 213 | self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels 214 | d = pred[0].device # device 215 | gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations 216 | self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized 217 | self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized 218 | self.n = len(self.pred) 219 | 220 | def display(self, pprint=False, show=False, save=False): 221 | colors = color_list() 222 | for i, (img, pred) in enumerate(zip(self.imgs, self.pred)): 223 | str = f'Image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} ' 224 | if pred is not None: 225 | for c in pred[:, -1].unique(): 226 | n = (pred[:, -1] == c).sum() # detections per class 227 | str += f'{n} {self.names[int(c)]}s, ' # add to string 228 | if show or save: 229 | img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np 230 | for *box, conf, cls in pred: # xyxy, confidence, class 231 | # str += '%s %.2f, ' % (names[int(cls)], conf) # label 232 | ImageDraw.Draw(img).rectangle(box, width=4, outline=colors[int(cls) % 10]) # plot 233 | if save: 234 | f = f'results{i}.jpg' 235 | str += f"saved to '{f}'" 236 | img.save(f) # save 237 | if show: 238 | img.show(f'Image {i}') # show 239 | if pprint: 240 | print(str) 241 | 242 | def print(self): 243 | self.display(pprint=True) # print results 244 | 245 | def show(self): 246 | self.display(show=True) # show results 247 | 248 | def save(self): 249 | self.display(save=True) # save results 250 | 251 | def __len__(self): 252 | return self.n 253 | 254 | def tolist(self): 255 | # return a list of Detections objects, i.e. 'for result in results.tolist():' 256 | x = [Detections([self.imgs[i]], [self.pred[i]], self.names) for i in range(self.n)] 257 | for d in x: 258 | for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: 259 | setattr(d, k, getattr(d, k)[0]) # pop out of list 260 | return x 261 | 262 | 263 | class Flatten(nn.Module): 264 | # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions 265 | @staticmethod 266 | def forward(x): 267 | return x.view(x.size(0), -1) 268 | 269 | 270 | class Classify(nn.Module): 271 | # Classification head, i.e. x(b,c1,20,20) to x(b,c2) 272 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups 273 | super(Classify, self).__init__() 274 | self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) 275 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1) 276 | self.flat = Flatten() 277 | 278 | def forward(self, x): 279 | z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list 280 | return self.flat(self.conv(z)) # flatten to x(b,c2) 281 | -------------------------------------------------------------------------------- /include/ncnn/allocator.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_ALLOCATOR_H 16 | #define NCNN_ALLOCATOR_H 17 | 18 | #ifdef _WIN32 19 | #define WIN32_LEAN_AND_MEAN 20 | #include 21 | #endif 22 | 23 | #include "platform.h" 24 | 25 | #include 26 | 27 | #if NCNN_VULKAN 28 | #include 29 | #endif // NCNN_VULKAN 30 | 31 | #if __ANDROID_API__ >= 26 32 | #include 33 | #endif // __ANDROID_API__ >= 26 34 | 35 | namespace ncnn { 36 | 37 | #if __AVX__ 38 | // the alignment of all the allocated buffers 39 | #define MALLOC_ALIGN 256 40 | #else 41 | // the alignment of all the allocated buffers 42 | #define MALLOC_ALIGN 16 43 | #endif 44 | 45 | // Aligns a pointer to the specified number of bytes 46 | // ptr Aligned pointer 47 | // n Alignment size that must be a power of two 48 | template 49 | static inline _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp)) 50 | { 51 | return (_Tp*)(((size_t)ptr + n - 1) & -n); 52 | } 53 | 54 | // Aligns a buffer size to the specified number of bytes 55 | // The function returns the minimum number that is greater or equal to sz and is divisible by n 56 | // sz Buffer size to align 57 | // n Alignment size that must be a power of two 58 | static inline size_t alignSize(size_t sz, int n) 59 | { 60 | return (sz + n - 1) & -n; 61 | } 62 | 63 | static inline void* fastMalloc(size_t size) 64 | { 65 | #if _MSC_VER 66 | return _aligned_malloc(size, MALLOC_ALIGN); 67 | #elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) 68 | void* ptr = 0; 69 | if (posix_memalign(&ptr, MALLOC_ALIGN, size)) 70 | ptr = 0; 71 | return ptr; 72 | #elif __ANDROID__ && __ANDROID_API__ < 17 73 | return memalign(MALLOC_ALIGN, size); 74 | #else 75 | unsigned char* udata = (unsigned char*)malloc(size + sizeof(void*) + MALLOC_ALIGN); 76 | if (!udata) 77 | return 0; 78 | unsigned char** adata = alignPtr((unsigned char**)udata + 1, MALLOC_ALIGN); 79 | adata[-1] = udata; 80 | return adata; 81 | #endif 82 | } 83 | 84 | static inline void fastFree(void* ptr) 85 | { 86 | if (ptr) 87 | { 88 | #if _MSC_VER 89 | _aligned_free(ptr); 90 | #elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) 91 | free(ptr); 92 | #elif __ANDROID__ && __ANDROID_API__ < 17 93 | free(ptr); 94 | #else 95 | unsigned char* udata = ((unsigned char**)ptr)[-1]; 96 | free(udata); 97 | #endif 98 | } 99 | } 100 | 101 | #if NCNN_THREADS 102 | // exchange-add operation for atomic operations on reference counters 103 | #if defined __riscv && !defined __riscv_atomic 104 | // riscv target without A extension 105 | static inline int NCNN_XADD(int* addr, int delta) 106 | { 107 | int tmp = *addr; 108 | *addr += delta; 109 | return tmp; 110 | } 111 | #elif defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32) 112 | // atomic increment on the linux version of the Intel(tm) compiler 113 | #define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast(reinterpret_cast(addr)), delta) 114 | #elif defined __GNUC__ 115 | #if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__) 116 | #ifdef __ATOMIC_ACQ_REL 117 | #define NCNN_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL) 118 | #else 119 | #define NCNN_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4) 120 | #endif 121 | #else 122 | #if defined __ATOMIC_ACQ_REL && !defined __clang__ 123 | // version for gcc >= 4.7 124 | #define NCNN_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL) 125 | #else 126 | #define NCNN_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta)) 127 | #endif 128 | #endif 129 | #elif defined _MSC_VER && !defined RC_INVOKED 130 | #define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta) 131 | #else 132 | // thread-unsafe branch 133 | static inline int NCNN_XADD(int* addr, int delta) 134 | { 135 | int tmp = *addr; 136 | *addr += delta; 137 | return tmp; 138 | } 139 | #endif 140 | #else // NCNN_THREADS 141 | static inline int NCNN_XADD(int* addr, int delta) 142 | { 143 | int tmp = *addr; 144 | *addr += delta; 145 | return tmp; 146 | } 147 | #endif // NCNN_THREADS 148 | 149 | class Allocator 150 | { 151 | public: 152 | virtual ~Allocator(); 153 | virtual void* fastMalloc(size_t size) = 0; 154 | virtual void fastFree(void* ptr) = 0; 155 | }; 156 | 157 | class PoolAllocator : public Allocator 158 | { 159 | public: 160 | PoolAllocator(); 161 | ~PoolAllocator(); 162 | 163 | // ratio range 0 ~ 1 164 | // default cr = 0.75 165 | void set_size_compare_ratio(float scr); 166 | 167 | // release all budgets immediately 168 | void clear(); 169 | 170 | virtual void* fastMalloc(size_t size); 171 | virtual void fastFree(void* ptr); 172 | 173 | private: 174 | Mutex budgets_lock; 175 | Mutex payouts_lock; 176 | unsigned int size_compare_ratio; // 0~256 177 | std::list > budgets; 178 | std::list > payouts; 179 | }; 180 | 181 | class UnlockedPoolAllocator : public Allocator 182 | { 183 | public: 184 | UnlockedPoolAllocator(); 185 | ~UnlockedPoolAllocator(); 186 | 187 | // ratio range 0 ~ 1 188 | // default cr = 0.75 189 | void set_size_compare_ratio(float scr); 190 | 191 | // release all budgets immediately 192 | void clear(); 193 | 194 | virtual void* fastMalloc(size_t size); 195 | virtual void fastFree(void* ptr); 196 | 197 | private: 198 | unsigned int size_compare_ratio; // 0~256 199 | std::list > budgets; 200 | std::list > payouts; 201 | }; 202 | 203 | #if NCNN_VULKAN 204 | 205 | class VulkanDevice; 206 | 207 | class VkBufferMemory 208 | { 209 | public: 210 | VkBuffer buffer; 211 | 212 | // the base offset assigned by allocator 213 | size_t offset; 214 | size_t capacity; 215 | 216 | VkDeviceMemory memory; 217 | void* mapped_ptr; 218 | 219 | // buffer state, modified by command functions internally 220 | mutable VkAccessFlags access_flags; 221 | mutable VkPipelineStageFlags stage_flags; 222 | 223 | // initialize and modified by mat 224 | int refcount; 225 | }; 226 | 227 | class VkImageMemory 228 | { 229 | public: 230 | VkImage image; 231 | VkImageView imageview; 232 | 233 | // underlying info assigned by allocator 234 | int width; 235 | int height; 236 | int depth; 237 | VkFormat format; 238 | 239 | VkDeviceMemory memory; 240 | void* mapped_ptr; 241 | 242 | // the base offset assigned by allocator 243 | size_t bind_offset; 244 | size_t bind_capacity; 245 | 246 | // image state, modified by command functions internally 247 | mutable VkAccessFlags access_flags; 248 | mutable VkImageLayout image_layout; 249 | mutable VkPipelineStageFlags stage_flags; 250 | 251 | // in-execution state, modified by command functions internally 252 | mutable int command_refcount; 253 | 254 | // initialize and modified by mat 255 | int refcount; 256 | }; 257 | 258 | class VkAllocator 259 | { 260 | public: 261 | VkAllocator(const VulkanDevice* _vkdev); 262 | virtual ~VkAllocator() 263 | { 264 | clear(); 265 | } 266 | virtual void clear() 267 | { 268 | } 269 | 270 | virtual VkBufferMemory* fastMalloc(size_t size) = 0; 271 | virtual void fastFree(VkBufferMemory* ptr) = 0; 272 | virtual int flush(VkBufferMemory* ptr); 273 | virtual int invalidate(VkBufferMemory* ptr); 274 | 275 | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0; 276 | virtual void fastFree(VkImageMemory* ptr) = 0; 277 | 278 | public: 279 | const VulkanDevice* vkdev; 280 | uint32_t buffer_memory_type_index; 281 | uint32_t image_memory_type_index; 282 | bool mappable; 283 | bool coherent; 284 | 285 | protected: 286 | VkBuffer create_buffer(size_t size, VkBufferUsageFlags usage); 287 | VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index); 288 | VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer); 289 | 290 | VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage); 291 | VkImageView create_imageview(VkImage image, VkFormat format); 292 | }; 293 | 294 | class VkBlobAllocator : public VkAllocator 295 | { 296 | public: 297 | VkBlobAllocator(const VulkanDevice* vkdev); 298 | virtual ~VkBlobAllocator(); 299 | 300 | public: 301 | // release all budgets immediately 302 | virtual void clear(); 303 | 304 | virtual VkBufferMemory* fastMalloc(size_t size); 305 | virtual void fastFree(VkBufferMemory* ptr); 306 | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); 307 | virtual void fastFree(VkImageMemory* ptr); 308 | 309 | protected: 310 | size_t block_size; 311 | size_t buffer_offset_alignment; 312 | size_t bind_memory_offset_alignment; 313 | std::vector > > buffer_budgets; 314 | std::vector buffer_blocks; 315 | std::vector > > image_memory_budgets; 316 | std::vector image_memory_blocks; 317 | }; 318 | 319 | class VkWeightAllocator : public VkAllocator 320 | { 321 | public: 322 | VkWeightAllocator(const VulkanDevice* vkdev); 323 | virtual ~VkWeightAllocator(); 324 | 325 | public: 326 | // release all blocks immediately 327 | virtual void clear(); 328 | 329 | public: 330 | virtual VkBufferMemory* fastMalloc(size_t size); 331 | virtual void fastFree(VkBufferMemory* ptr); 332 | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); 333 | virtual void fastFree(VkImageMemory* ptr); 334 | 335 | protected: 336 | size_t block_size; 337 | size_t buffer_offset_alignment; 338 | size_t bind_memory_offset_alignment; 339 | std::vector buffer_block_free_spaces; 340 | std::vector buffer_blocks; 341 | std::vector dedicated_buffer_blocks; 342 | std::vector image_memory_block_free_spaces; 343 | std::vector image_memory_blocks; 344 | std::vector dedicated_image_memory_blocks; 345 | }; 346 | 347 | class VkStagingAllocator : public VkAllocator 348 | { 349 | public: 350 | VkStagingAllocator(const VulkanDevice* vkdev); 351 | virtual ~VkStagingAllocator(); 352 | 353 | public: 354 | // ratio range 0 ~ 1 355 | // default cr = 0.75 356 | void set_size_compare_ratio(float scr); 357 | 358 | // release all budgets immediately 359 | virtual void clear(); 360 | 361 | virtual VkBufferMemory* fastMalloc(size_t size); 362 | virtual void fastFree(VkBufferMemory* ptr); 363 | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); 364 | virtual void fastFree(VkImageMemory* ptr); 365 | 366 | protected: 367 | unsigned int size_compare_ratio; // 0~256 368 | std::list buffer_budgets; 369 | }; 370 | 371 | class VkWeightStagingAllocator : public VkAllocator 372 | { 373 | public: 374 | VkWeightStagingAllocator(const VulkanDevice* vkdev); 375 | virtual ~VkWeightStagingAllocator(); 376 | 377 | public: 378 | virtual VkBufferMemory* fastMalloc(size_t size); 379 | virtual void fastFree(VkBufferMemory* ptr); 380 | virtual VkImageMemory* fastMalloc(int /*w*/, int /*h*/, int /*c*/, size_t /*elemsize*/, int /*elempack*/) 381 | { 382 | return 0; 383 | } 384 | virtual void fastFree(VkImageMemory* /*ptr*/) 385 | { 386 | } 387 | 388 | protected: 389 | }; 390 | 391 | #if __ANDROID_API__ >= 26 392 | class ImportAndroidHardwareBufferPipeline; 393 | class VkAndroidHardwareBufferImageAllocator : public VkAllocator 394 | { 395 | public: 396 | VkAndroidHardwareBufferImageAllocator(const VulkanDevice* _vkdev, AHardwareBuffer* _hb); 397 | virtual ~VkAndroidHardwareBufferImageAllocator(); 398 | 399 | public: 400 | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); 401 | virtual void fastFree(VkImageMemory* ptr); 402 | virtual VkBufferMemory* fastMalloc(size_t /*size*/) 403 | { 404 | return 0; 405 | } 406 | virtual void fastFree(VkBufferMemory* /*ptr*/) 407 | { 408 | } 409 | 410 | public: 411 | int init(); 412 | 413 | int width() const; 414 | int height() const; 415 | uint64_t external_format() const; 416 | 417 | public: 418 | AHardwareBuffer* hb; 419 | AHardwareBuffer_Desc bufferDesc; 420 | VkAndroidHardwareBufferFormatPropertiesANDROID bufferFormatProperties; 421 | VkAndroidHardwareBufferPropertiesANDROID bufferProperties; 422 | VkSamplerYcbcrConversionKHR samplerYcbcrConversion; 423 | }; 424 | #endif // __ANDROID_API__ >= 26 425 | 426 | #endif // NCNN_VULKAN 427 | 428 | } // namespace ncnn 429 | 430 | #endif // NCNN_ALLOCATOR_H 431 | -------------------------------------------------------------------------------- /include/ncnn/benchmark.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_BENCHMARK_H 16 | #define NCNN_BENCHMARK_H 17 | 18 | #include "layer.h" 19 | #include "mat.h" 20 | #include "platform.h" 21 | 22 | namespace ncnn { 23 | 24 | // get now timestamp in ms 25 | double get_current_time(); 26 | 27 | #if NCNN_BENCHMARK 28 | 29 | void benchmark(const Layer* layer, double start, double end); 30 | void benchmark(const Layer* layer, const Mat& bottom_blob, Mat& top_blob, double start, double end); 31 | 32 | #endif // NCNN_BENCHMARK 33 | 34 | } // namespace ncnn 35 | 36 | #endif // NCNN_BENCHMARK_H 37 | -------------------------------------------------------------------------------- /include/ncnn/blob.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_BLOB_H 16 | #define NCNN_BLOB_H 17 | 18 | #include "mat.h" 19 | #include "platform.h" 20 | 21 | namespace ncnn { 22 | 23 | class Blob 24 | { 25 | public: 26 | // empty 27 | Blob(); 28 | 29 | public: 30 | #if NCNN_STRING 31 | // blob name 32 | std::string name; 33 | #endif // NCNN_STRING 34 | // layer index which produce this blob as output 35 | int producer; 36 | // layer index which need this blob as input 37 | std::vector consumers; 38 | // shape hint 39 | Mat shape; 40 | }; 41 | 42 | } // namespace ncnn 43 | 44 | #endif // NCNN_BLOB_H 45 | -------------------------------------------------------------------------------- /include/ncnn/c_api.h: -------------------------------------------------------------------------------- 1 | /* Tencent is pleased to support the open source community by making ncnn available. 2 | * 3 | * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4 | * 5 | * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | * in compliance with the License. You may obtain a copy of the License at 7 | * 8 | * https://opensource.org/licenses/BSD-3-Clause 9 | * 10 | * Unless required by applicable law or agreed to in writing, software distributed 11 | * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | * CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | * specific language governing permissions and limitations under the License. 14 | */ 15 | 16 | #ifndef NCNN_C_API_H 17 | #define NCNN_C_API_H 18 | 19 | #include 20 | #include "platform.h" 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | /* mat api */ 27 | typedef struct __ncnn_mat_t* ncnn_mat_t; 28 | 29 | ncnn_mat_t ncnn_mat_create(); 30 | ncnn_mat_t ncnn_mat_create_1d(int w); 31 | ncnn_mat_t ncnn_mat_create_2d(int w, int h); 32 | ncnn_mat_t ncnn_mat_create_3d(int w, int h, int c); 33 | ncnn_mat_t ncnn_mat_create_1d_packed(int w, size_t elemsize, int elempack); 34 | ncnn_mat_t ncnn_mat_create_2d_packed(int w, int h, size_t elemsize, int elempack); 35 | ncnn_mat_t ncnn_mat_create_3d_packed(int w, int h, int c, size_t elemsize, int elempack); 36 | void ncnn_mat_destroy(ncnn_mat_t mat); 37 | 38 | int ncnn_mat_get_dims(ncnn_mat_t mat); 39 | int ncnn_mat_get_w(ncnn_mat_t mat); 40 | int ncnn_mat_get_h(ncnn_mat_t mat); 41 | int ncnn_mat_get_c(ncnn_mat_t mat); 42 | size_t ncnn_mat_get_elemsize(ncnn_mat_t mat); 43 | int ncnn_mat_get_elempack(ncnn_mat_t mat); 44 | size_t ncnn_mat_get_cstep(ncnn_mat_t mat); 45 | void* ncnn_mat_get_data(ncnn_mat_t mat); 46 | 47 | #if NCNN_PIXEL 48 | 49 | /* mat pixel api */ 50 | #define NCNN_MAT_PIXEL_RGB 1 51 | #define NCNN_MAT_PIXEL_BGR 2 52 | #define NCNN_MAT_PIXEL_GRAY 3 53 | #define NCNN_MAT_PIXEL_RGBA 4 54 | #define NCNN_MAT_PIXEL_BGRA 5 55 | #define NCNN_MAT_PIXEL_X2Y(X, Y) (X | (Y << 16)) 56 | ncnn_mat_t ncnn_mat_from_pixels(const unsigned char* pixels, int type, int w, int h, int stride); 57 | ncnn_mat_t ncnn_mat_from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height); 58 | void ncnn_mat_to_pixels(ncnn_mat_t mat, unsigned char* pixels, int type, int stride); 59 | void ncnn_mat_to_pixels_resize(ncnn_mat_t mat, unsigned char* pixels, int type, int target_width, int target_height, int target_stride); 60 | 61 | #endif // NCNN_PIXEL 62 | 63 | void ncnn_mat_substract_mean_normalize(ncnn_mat_t mat, const float* mean_vals, const float* norm_vals); 64 | 65 | /* option api */ 66 | typedef struct __ncnn_option_t* ncnn_option_t; 67 | 68 | ncnn_option_t ncnn_option_create(); 69 | void ncnn_option_destroy(ncnn_option_t opt); 70 | 71 | int ncnn_option_get_num_threads(ncnn_option_t opt); 72 | void ncnn_option_set_num_threads(ncnn_option_t opt, int num_threads); 73 | 74 | int ncnn_option_get_use_vulkan_compute(ncnn_option_t opt); 75 | void ncnn_option_set_use_vulkan_compute(ncnn_option_t opt, int use_vulkan_compute); 76 | 77 | /* blob api */ 78 | typedef struct __ncnn_blob_t* ncnn_blob_t; 79 | 80 | const char* ncnn_blob_get_name(ncnn_blob_t blob); 81 | 82 | int ncnn_blob_get_producer(ncnn_blob_t blob); 83 | int ncnn_blob_get_consumer_count(ncnn_blob_t blob); 84 | int ncnn_blob_get_consumer(ncnn_blob_t blob, int i); 85 | 86 | void ncnn_blob_get_shape(ncnn_blob_t blob, int* dims, int* w, int* h, int* c); 87 | 88 | /* layer api */ 89 | typedef struct __ncnn_layer_t* ncnn_layer_t; 90 | 91 | const char* ncnn_layer_get_name(ncnn_layer_t layer); 92 | 93 | int ncnn_layer_get_typeindex(ncnn_layer_t layer); 94 | const char* ncnn_layer_get_type(ncnn_layer_t layer); 95 | 96 | int ncnn_layer_get_bottom_count(ncnn_layer_t layer); 97 | int ncnn_layer_get_bottom(ncnn_layer_t layer, int i); 98 | int ncnn_layer_get_top_count(ncnn_layer_t layer); 99 | int ncnn_layer_get_top(ncnn_layer_t layer, int i); 100 | 101 | void ncnn_blob_get_bottom_shape(ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c); 102 | void ncnn_blob_get_top_shape(ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c); 103 | 104 | /* net api */ 105 | typedef struct __ncnn_net_t* ncnn_net_t; 106 | 107 | ncnn_net_t ncnn_net_create(); 108 | void ncnn_net_destroy(ncnn_net_t net); 109 | 110 | void ncnn_net_set_option(ncnn_net_t net, ncnn_option_t opt); 111 | 112 | int ncnn_net_load_param(ncnn_net_t net, const char* path); 113 | int ncnn_net_load_model(ncnn_net_t net, const char* path); 114 | 115 | int ncnn_net_get_layer_count(ncnn_net_t net); 116 | ncnn_layer_t ncnn_net_get_layer(ncnn_net_t net, int i); 117 | int ncnn_net_get_blob_count(ncnn_net_t net); 118 | ncnn_blob_t ncnn_net_get_blob(ncnn_net_t net, int i); 119 | 120 | /* extractor api */ 121 | typedef struct __ncnn_extractor_t* ncnn_extractor_t; 122 | 123 | ncnn_extractor_t ncnn_extractor_create(ncnn_net_t net); 124 | void ncnn_extractor_destroy(ncnn_extractor_t ex); 125 | 126 | void ncnn_extractor_set_option(ncnn_extractor_t ex, ncnn_option_t opt); 127 | 128 | int ncnn_extractor_input(ncnn_extractor_t ex, const char* name, ncnn_mat_t mat); 129 | int ncnn_extractor_extract(ncnn_extractor_t ex, const char* name, ncnn_mat_t* mat); 130 | 131 | #ifdef __cplusplus 132 | } /* extern "C" */ 133 | #endif 134 | 135 | #endif // NCNN_C_API_H 136 | -------------------------------------------------------------------------------- /include/ncnn/command.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_COMMAND_H 16 | #define NCNN_COMMAND_H 17 | 18 | #include "platform.h" 19 | 20 | #if NCNN_VULKAN 21 | 22 | #include "mat.h" 23 | 24 | #include 25 | 26 | namespace ncnn { 27 | 28 | class Pipeline; 29 | class VkCompute 30 | { 31 | public: 32 | VkCompute(const VulkanDevice* vkdev); 33 | virtual ~VkCompute(); 34 | 35 | public: 36 | void record_upload(const Mat& src, VkMat& dst, const Option& opt); 37 | 38 | void record_upload(const Mat& src, VkImageMat& dst, const Option& opt); 39 | 40 | void record_download(const VkMat& src, Mat& dst, const Option& opt); 41 | 42 | void record_download(const VkImageMat& src, Mat& dst, const Option& opt); 43 | 44 | void record_buffer_to_image(const VkMat& src, VkImageMat& dst, const Option& opt); 45 | 46 | void record_image_to_buffer(const VkImageMat& src, VkMat& dst, const Option& opt); 47 | 48 | void record_clone(const Mat& src, VkMat& dst, const Option& opt); 49 | 50 | void record_clone(const Mat& src, VkImageMat& dst, const Option& opt); 51 | 52 | void record_clone(const VkMat& src, Mat& dst, const Option& opt); 53 | 54 | void record_clone(const VkImageMat& src, Mat& dst, const Option& opt); 55 | 56 | void record_clone(const VkMat& src, VkMat& dst, const Option& opt); 57 | 58 | void record_clone(const VkImageMat& src, VkImageMat& dst, const Option& opt); 59 | 60 | void record_clone(const VkMat& src, VkImageMat& dst, const Option& opt); 61 | 62 | void record_clone(const VkImageMat& src, VkMat& dst, const Option& opt); 63 | 64 | void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkMat& dispatcher); 65 | 66 | void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkImageMat& dispatcher); 67 | 68 | void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const VkMat& dispatcher); 69 | void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const VkImageMat& dispatcher); 70 | void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const Mat& dispatcher); 71 | 72 | #if NCNN_BENCHMARK 73 | void record_write_timestamp(uint32_t query); 74 | #endif // NCNN_BENCHMARK 75 | 76 | #if __ANDROID_API__ >= 26 77 | void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkMat& dst); 78 | 79 | void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkImageMat& dst); 80 | #endif // __ANDROID_API__ >= 26 81 | 82 | int submit_and_wait(); 83 | 84 | int reset(); 85 | 86 | #if NCNN_BENCHMARK 87 | int create_query_pool(uint32_t query_count); 88 | 89 | int get_query_pool_results(uint32_t first_query, uint32_t query_count, std::vector& results); 90 | #endif // NCNN_BENCHMARK 91 | 92 | protected: 93 | int init(); 94 | int begin_command_buffer(); 95 | int end_command_buffer(); 96 | 97 | protected: 98 | const VulkanDevice* vkdev; 99 | 100 | VkCommandPool compute_command_pool; 101 | 102 | VkCommandBuffer compute_command_buffer; 103 | 104 | VkFence compute_command_fence; 105 | 106 | std::vector upload_staging_buffers; 107 | std::vector download_post_buffers; 108 | std::vector download_post_mats_fp16; 109 | std::vector download_post_mats; 110 | 111 | std::vector image_blocks_to_destroy; 112 | 113 | // the good-old path for device without VK_KHR_push_descriptor 114 | std::vector descriptor_pools; 115 | std::vector descriptorsets; 116 | 117 | struct record 118 | { 119 | enum 120 | { 121 | TYPE_copy_buffer, 122 | TYPE_copy_image, 123 | TYPE_copy_buffer_to_image, 124 | TYPE_copy_image_to_buffer, 125 | TYPE_bind_pipeline, 126 | TYPE_bind_descriptorsets, 127 | TYPE_push_constants, 128 | TYPE_dispatch, 129 | TYPE_memory_barrers, 130 | TYPE_buffer_barrers, 131 | TYPE_image_barrers, 132 | 133 | #if NCNN_BENCHMARK 134 | TYPE_write_timestamp, 135 | #endif // NCNN_BENCHMARK 136 | 137 | TYPE_post_download, 138 | TYPE_post_cast_float16_to_float32, 139 | }; 140 | 141 | int type; 142 | VkCommandBuffer command_buffer; 143 | 144 | union 145 | { 146 | struct 147 | { 148 | VkBuffer src; 149 | VkBuffer dst; 150 | uint32_t region_count; 151 | const VkBufferCopy* regions; 152 | } copy_buffer; 153 | struct 154 | { 155 | VkImage src; 156 | VkImageLayout src_layout; 157 | VkImage dst; 158 | VkImageLayout dst_layout; 159 | uint32_t region_count; 160 | const VkImageCopy* regions; 161 | } copy_image; 162 | struct 163 | { 164 | VkBuffer src; 165 | VkImage dst; 166 | VkImageLayout layout; 167 | uint32_t region_count; 168 | const VkBufferImageCopy* regions; 169 | } copy_buffer_to_image; 170 | struct 171 | { 172 | VkImage src; 173 | VkImageLayout layout; 174 | VkBuffer dst; 175 | uint32_t region_count; 176 | const VkBufferImageCopy* regions; 177 | } copy_image_to_buffer; 178 | 179 | struct 180 | { 181 | VkPipelineBindPoint bind_point; 182 | VkPipeline pipeline; 183 | } bind_pipeline; 184 | struct 185 | { 186 | VkPipelineBindPoint bind_point; 187 | VkPipelineLayout pipeline_layout; 188 | uint32_t descriptorset_count; 189 | uint32_t descriptorset_offset; 190 | } bind_descriptorsets; 191 | struct 192 | { 193 | VkPipelineLayout pipeline_layout; 194 | VkShaderStageFlags stage_flags; 195 | uint32_t size; 196 | const void* values; 197 | } push_constants; 198 | 199 | struct 200 | { 201 | uint32_t group_count_x; 202 | uint32_t group_count_y; 203 | uint32_t group_count_z; 204 | } dispatch; 205 | 206 | struct 207 | { 208 | VkPipelineStageFlags src_stage; 209 | VkPipelineStageFlags dst_stage; 210 | uint32_t barrier_count; 211 | const VkMemoryBarrier* barriers; 212 | } memory_barrers; 213 | struct 214 | { 215 | VkPipelineStageFlags src_stage; 216 | VkPipelineStageFlags dst_stage; 217 | uint32_t barrier_count; 218 | const VkBufferMemoryBarrier* barriers; 219 | } buffer_barrers; 220 | struct 221 | { 222 | VkPipelineStageFlags src_stage; 223 | VkPipelineStageFlags dst_stage; 224 | uint32_t barrier_count; 225 | const VkImageMemoryBarrier* barriers; 226 | } image_barrers; 227 | 228 | #if NCNN_BENCHMARK 229 | struct 230 | { 231 | uint32_t query; 232 | } write_timestamp; 233 | #endif // NCNN_BENCHMARK 234 | 235 | struct 236 | { 237 | uint32_t download_post_buffer_mat_offset; 238 | uint32_t download_post_mat_fp16_offset; 239 | } post_download; 240 | struct 241 | { 242 | uint32_t download_post_mat_fp16_offset; 243 | uint32_t download_post_mat_offset; 244 | } post_cast_float16_to_float32; 245 | }; 246 | }; 247 | 248 | std::vector delayed_records; 249 | 250 | #if NCNN_BENCHMARK 251 | uint32_t query_count; 252 | VkQueryPool query_pool; 253 | #endif // NCNN_BENCHMARK 254 | }; 255 | 256 | class VkTransfer 257 | { 258 | public: 259 | VkTransfer(const VulkanDevice* vkdev); 260 | ~VkTransfer(); 261 | 262 | public: 263 | void record_upload(const Mat& src, VkMat& dst, const Option& opt, bool flatten = true); 264 | 265 | void record_upload(const Mat& src, VkImageMat& dst, const Option& opt); 266 | 267 | int submit_and_wait(); 268 | 269 | protected: 270 | int init(); 271 | int begin_command_buffer(); 272 | int end_command_buffer(); 273 | 274 | protected: 275 | const VulkanDevice* vkdev; 276 | 277 | VkCommandPool compute_command_pool; 278 | VkCommandPool transfer_command_pool; 279 | 280 | VkCommandBuffer upload_command_buffer; 281 | VkCommandBuffer compute_command_buffer; 282 | 283 | VkSemaphore upload_compute_semaphore; 284 | 285 | VkFence upload_command_fence; 286 | VkFence compute_command_fence; 287 | 288 | std::vector upload_staging_buffers; 289 | }; 290 | 291 | } // namespace ncnn 292 | 293 | #endif // NCNN_VULKAN 294 | 295 | #endif // NCNN_COMMAND_H 296 | -------------------------------------------------------------------------------- /include/ncnn/cpu.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_CPU_H 16 | #define NCNN_CPU_H 17 | 18 | #include 19 | 20 | #if defined __ANDROID__ || defined __linux__ 21 | #include // cpu_set_t 22 | #endif 23 | 24 | namespace ncnn { 25 | 26 | class CpuSet 27 | { 28 | public: 29 | CpuSet(); 30 | void enable(int cpu); 31 | void disable(int cpu); 32 | void disable_all(); 33 | bool is_enabled(int cpu) const; 34 | int num_enabled() const; 35 | 36 | public: 37 | #if defined __ANDROID__ || defined __linux__ 38 | cpu_set_t cpu_set; 39 | #endif 40 | #if __APPLE__ 41 | unsigned int policy; 42 | #endif 43 | }; 44 | 45 | // test optional cpu features 46 | // neon = armv7 neon or aarch64 asimd 47 | int cpu_support_arm_neon(); 48 | // vfpv4 = armv7 fp16 + fma 49 | int cpu_support_arm_vfpv4(); 50 | // asimdhp = aarch64 asimd half precision 51 | int cpu_support_arm_asimdhp(); 52 | 53 | // avx2 = x86_64 avx2 + fma + f16c 54 | int cpu_support_x86_avx2(); 55 | 56 | // cpu info 57 | int get_cpu_count(); 58 | int get_little_cpu_count(); 59 | int get_big_cpu_count(); 60 | 61 | // bind all threads on little clusters if powersave enabled 62 | // affacts HMP arch cpu like ARM big.LITTLE 63 | // only implemented on android at the moment 64 | // switching powersave is expensive and not thread-safe 65 | // 0 = all cores enabled(default) 66 | // 1 = only little clusters enabled 67 | // 2 = only big clusters enabled 68 | // return 0 if success for setter function 69 | int get_cpu_powersave(); 70 | int set_cpu_powersave(int powersave); 71 | 72 | // convenient wrapper 73 | const CpuSet& get_cpu_thread_affinity_mask(int powersave); 74 | 75 | // set explicit thread affinity 76 | int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask); 77 | 78 | // misc function wrapper for openmp routines 79 | int get_omp_num_threads(); 80 | void set_omp_num_threads(int num_threads); 81 | 82 | int get_omp_dynamic(); 83 | void set_omp_dynamic(int dynamic); 84 | 85 | int get_omp_thread_num(); 86 | 87 | int get_kmp_blocktime(); 88 | void set_kmp_blocktime(int time_ms); 89 | 90 | } // namespace ncnn 91 | 92 | #endif // NCNN_CPU_H 93 | -------------------------------------------------------------------------------- /include/ncnn/datareader.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_DATAREADER_H 16 | #define NCNN_DATAREADER_H 17 | 18 | #include "platform.h" 19 | #if NCNN_STDIO || NCNN_STRING 20 | #include 21 | #endif 22 | 23 | #if __ANDROID_API__ >= 9 24 | #include 25 | #endif 26 | 27 | namespace ncnn { 28 | 29 | // data read wrapper 30 | class DataReader 31 | { 32 | public: 33 | virtual ~DataReader(); 34 | 35 | #if NCNN_STRING 36 | // parse plain param text 37 | // return 1 if scan success 38 | virtual int scan(const char* format, void* p) const; 39 | #endif // NCNN_STRING 40 | 41 | // read binary param and model data 42 | // return bytes read 43 | virtual size_t read(void* buf, size_t size) const; 44 | }; 45 | 46 | #if NCNN_STDIO 47 | class DataReaderFromStdio : public DataReader 48 | { 49 | public: 50 | DataReaderFromStdio(FILE* fp); 51 | 52 | #if NCNN_STRING 53 | virtual int scan(const char* format, void* p) const; 54 | #endif // NCNN_STRING 55 | virtual size_t read(void* buf, size_t size) const; 56 | 57 | protected: 58 | FILE* fp; 59 | }; 60 | #endif // NCNN_STDIO 61 | 62 | class DataReaderFromMemory : public DataReader 63 | { 64 | public: 65 | DataReaderFromMemory(const unsigned char*& mem); 66 | 67 | #if NCNN_STRING 68 | virtual int scan(const char* format, void* p) const; 69 | #endif // NCNN_STRING 70 | virtual size_t read(void* buf, size_t size) const; 71 | 72 | protected: 73 | const unsigned char*& mem; 74 | }; 75 | 76 | #if __ANDROID_API__ >= 9 77 | class DataReaderFromAndroidAsset : public DataReader 78 | { 79 | public: 80 | DataReaderFromAndroidAsset(AAsset* asset); 81 | 82 | #if NCNN_STRING 83 | virtual int scan(const char* format, void* p) const; 84 | #endif // NCNN_STRING 85 | virtual size_t read(void* buf, size_t size) const; 86 | 87 | protected: 88 | AAsset* asset; 89 | mutable const unsigned char* mem; 90 | }; 91 | #endif // __ANDROID_API__ >= 9 92 | 93 | } // namespace ncnn 94 | 95 | #endif // NCNN_DATAREADER_H 96 | -------------------------------------------------------------------------------- /include/ncnn/gpu.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_GPU_H 16 | #define NCNN_GPU_H 17 | 18 | #include "platform.h" 19 | 20 | #if NCNN_VULKAN 21 | 22 | #include "mat.h" 23 | 24 | #include 25 | 26 | namespace ncnn { 27 | 28 | // instance 29 | int create_gpu_instance(); 30 | void destroy_gpu_instance(); 31 | 32 | // instance extension capability 33 | extern int support_VK_KHR_external_memory_capabilities; 34 | extern int support_VK_KHR_get_physical_device_properties2; 35 | extern int support_VK_KHR_get_surface_capabilities2; 36 | extern int support_VK_KHR_surface; 37 | extern int support_VK_EXT_debug_utils; 38 | #if __ANDROID_API__ >= 26 39 | extern int support_VK_KHR_android_surface; 40 | #endif // __ANDROID_API__ >= 26 41 | 42 | // VK_KHR_external_memory_capabilities 43 | extern PFN_vkGetPhysicalDeviceExternalBufferPropertiesKHR vkGetPhysicalDeviceExternalBufferPropertiesKHR; 44 | 45 | // VK_KHR_get_physical_device_properties2 46 | extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR; 47 | extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR; 48 | extern PFN_vkGetPhysicalDeviceFormatProperties2KHR vkGetPhysicalDeviceFormatProperties2KHR; 49 | extern PFN_vkGetPhysicalDeviceImageFormatProperties2KHR vkGetPhysicalDeviceImageFormatProperties2KHR; 50 | extern PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR vkGetPhysicalDeviceQueueFamilyProperties2KHR; 51 | extern PFN_vkGetPhysicalDeviceMemoryProperties2KHR vkGetPhysicalDeviceMemoryProperties2KHR; 52 | extern PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR vkGetPhysicalDeviceSparseImageFormatProperties2KHR; 53 | 54 | // VK_KHR_get_surface_capabilities2 55 | extern PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR vkGetPhysicalDeviceSurfaceCapabilities2KHR; 56 | extern PFN_vkGetPhysicalDeviceSurfaceFormats2KHR vkGetPhysicalDeviceSurfaceFormats2KHR; 57 | 58 | // VK_KHR_surface 59 | extern PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR; 60 | extern PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR; 61 | extern PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR; 62 | extern PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR; 63 | extern PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR; 64 | 65 | #if __ANDROID_API__ >= 26 66 | // VK_KHR_android_surface 67 | extern PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR; 68 | #endif // __ANDROID_API__ >= 26 69 | 70 | // get info 71 | int get_gpu_count(); 72 | int get_default_gpu_index(); 73 | 74 | class GpuInfo 75 | { 76 | public: 77 | // vulkan physical device 78 | VkPhysicalDevice physical_device; 79 | 80 | // memory properties 81 | VkPhysicalDeviceMemoryProperties physicalDeviceMemoryProperties; 82 | 83 | // info 84 | uint32_t api_version; 85 | uint32_t driver_version; 86 | uint32_t vendor_id; 87 | uint32_t device_id; 88 | std::string device_name; 89 | uint8_t pipeline_cache_uuid[VK_UUID_SIZE]; 90 | 91 | // 0 = discrete gpu 92 | // 1 = integrated gpu 93 | // 2 = virtual gpu 94 | // 3 = cpu 95 | int type; 96 | 97 | // hardware limit 98 | uint32_t max_shared_memory_size; 99 | uint32_t max_workgroup_count[3]; 100 | uint32_t max_workgroup_invocations; 101 | uint32_t max_workgroup_size[3]; 102 | size_t memory_map_alignment; 103 | size_t buffer_offset_alignment; 104 | size_t non_coherent_atom_size; 105 | size_t buffer_image_granularity; 106 | uint32_t max_image_dimension_1d; 107 | uint32_t max_image_dimension_2d; 108 | uint32_t max_image_dimension_3d; 109 | float timestamp_period; 110 | 111 | // runtime 112 | uint32_t compute_queue_family_index; 113 | uint32_t graphics_queue_family_index; 114 | uint32_t transfer_queue_family_index; 115 | 116 | uint32_t compute_queue_count; 117 | uint32_t graphics_queue_count; 118 | uint32_t transfer_queue_count; 119 | 120 | // property 121 | bool unified_compute_transfer_queue; 122 | 123 | // subgroup 124 | uint32_t subgroup_size; 125 | bool support_subgroup_basic; 126 | bool support_subgroup_vote; 127 | bool support_subgroup_ballot; 128 | bool support_subgroup_shuffle; 129 | 130 | // bug is not feature 131 | bool bug_storage_buffer_no_l1; 132 | bool bug_corrupted_online_pipeline_cache; 133 | 134 | // but sometimes bug is a feature 135 | bool bug_implicit_fp16_arithmetic; 136 | 137 | // fp16 and int8 feature 138 | bool support_fp16_packed; 139 | bool support_fp16_storage; 140 | bool support_fp16_arithmetic; 141 | bool support_int8_storage; 142 | bool support_int8_arithmetic; 143 | 144 | // ycbcr conversion feature 145 | bool support_ycbcr_conversion; 146 | 147 | // extension capability 148 | int support_VK_KHR_8bit_storage; 149 | int support_VK_KHR_16bit_storage; 150 | int support_VK_KHR_bind_memory2; 151 | int support_VK_KHR_dedicated_allocation; 152 | int support_VK_KHR_descriptor_update_template; 153 | int support_VK_KHR_external_memory; 154 | int support_VK_KHR_get_memory_requirements2; 155 | int support_VK_KHR_maintenance1; 156 | int support_VK_KHR_push_descriptor; 157 | int support_VK_KHR_sampler_ycbcr_conversion; 158 | int support_VK_KHR_shader_float16_int8; 159 | int support_VK_KHR_shader_float_controls; 160 | int support_VK_KHR_storage_buffer_storage_class; 161 | int support_VK_KHR_swapchain; 162 | int support_VK_EXT_memory_budget; 163 | int support_VK_EXT_queue_family_foreign; 164 | #if __ANDROID_API__ >= 26 165 | int support_VK_ANDROID_external_memory_android_hardware_buffer; 166 | #endif // __ANDROID_API__ >= 26 167 | }; 168 | 169 | const GpuInfo& get_gpu_info(int device_index = get_default_gpu_index()); 170 | 171 | class VkAllocator; 172 | class VkCompute; 173 | class Layer; 174 | class Packing_vulkan; 175 | class Option; 176 | class PipelineCache; 177 | class VulkanDevice 178 | { 179 | public: 180 | VulkanDevice(int device_index = get_default_gpu_index()); 181 | ~VulkanDevice(); 182 | 183 | const GpuInfo& info; 184 | 185 | VkDevice vkdevice() const 186 | { 187 | return device; 188 | } 189 | 190 | VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size) const; 191 | 192 | // with fixed workgroup size 193 | VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z) const; 194 | 195 | // helper for creating pipeline 196 | int create_descriptorset_layout(int binding_count, const int* binding_types, VkDescriptorSetLayout* descriptorset_layout) const; 197 | int create_pipeline_layout(int push_constant_count, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout* pipeline_layout) const; 198 | int create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector& specializations, VkPipeline* pipeline) const; 199 | int create_descriptor_update_template(int binding_count, const int* binding_types, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout pipeline_layout, VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; 200 | 201 | uint32_t find_memory_index(uint32_t memory_type_bits, VkFlags required, VkFlags preferred, VkFlags preferred_not) const; 202 | bool is_mappable(uint32_t memory_type_index) const; 203 | bool is_coherent(uint32_t memory_type_index) const; 204 | 205 | VkQueue acquire_queue(uint32_t queue_family_index) const; 206 | void reclaim_queue(uint32_t queue_family_index, VkQueue queue) const; 207 | 208 | // allocator on this device 209 | VkAllocator* acquire_blob_allocator() const; 210 | void reclaim_blob_allocator(VkAllocator* allocator) const; 211 | 212 | VkAllocator* acquire_staging_allocator() const; 213 | void reclaim_staging_allocator(VkAllocator* allocator) const; 214 | 215 | // immutable sampler for texelfetch 216 | const VkSampler* immutable_texelfetch_sampler() const; 217 | 218 | // dummy buffer image 219 | VkMat get_dummy_buffer() const; 220 | VkImageMat get_dummy_image() const; 221 | 222 | // pipeline cache on this device 223 | const PipelineCache* get_pipeline_cache() const; 224 | 225 | // test image allocation 226 | bool shape_support_image_storage(const Mat& shape) const; 227 | 228 | // current gpu heap memory budget in MB 229 | uint32_t get_heap_budget() const; 230 | 231 | // utility operator 232 | void convert_packing(const VkMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; 233 | void convert_packing(const VkImageMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; 234 | void convert_packing(const VkMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; 235 | void convert_packing(const VkImageMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; 236 | 237 | // VK_KHR_bind_memory2 238 | PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR; 239 | PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR; 240 | 241 | // VK_KHR_descriptor_update_template 242 | PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; 243 | PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; 244 | PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; 245 | 246 | // VK_KHR_get_memory_requirements2 247 | PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR; 248 | PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR; 249 | PFN_vkGetImageSparseMemoryRequirements2KHR vkGetImageSparseMemoryRequirements2KHR; 250 | 251 | // VK_KHR_maintenance1 252 | PFN_vkTrimCommandPoolKHR vkTrimCommandPoolKHR; 253 | 254 | // VK_KHR_push_descriptor 255 | PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR; 256 | PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR; 257 | 258 | // VK_KHR_sampler_ycbcr_conversion 259 | PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR; 260 | PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR; 261 | 262 | // VK_KHR_swapchain 263 | PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR; 264 | PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; 265 | PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; 266 | PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; 267 | PFN_vkQueuePresentKHR vkQueuePresentKHR; 268 | 269 | #if __ANDROID_API__ >= 26 270 | // VK_ANDROID_external_memory_android_hardware_buffer 271 | PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID; 272 | PFN_vkGetMemoryAndroidHardwareBufferANDROID vkGetMemoryAndroidHardwareBufferANDROID; 273 | #endif // __ANDROID_API__ >= 26 274 | 275 | protected: 276 | // device extension 277 | int init_device_extension(); 278 | 279 | // dummy buffer and image 280 | int create_dummy_buffer_image(); 281 | void destroy_dummy_buffer_image(); 282 | 283 | // utility operator 284 | const ncnn::Packing_vulkan* get_utility_operator(int storage_type_from, int storage_type_to, int cast_type_from_index, int cast_type_to_index, int packing_type_to_index) const; 285 | void destroy_utility_operator(); 286 | 287 | private: 288 | VkDevice device; 289 | 290 | // hardware queue 291 | mutable std::vector compute_queues; 292 | mutable std::vector graphics_queues; 293 | mutable std::vector transfer_queues; 294 | mutable Mutex queue_lock; 295 | 296 | // default blob allocator for each queue 297 | mutable std::vector blob_allocators; 298 | mutable Mutex blob_allocator_lock; 299 | 300 | // default staging allocator for each queue 301 | mutable std::vector staging_allocators; 302 | mutable Mutex staging_allocator_lock; 303 | 304 | // nearest sampler for texelfetch 305 | VkSampler texelfetch_sampler; 306 | 307 | // dummy buffer and image 308 | VkAllocator* dummy_allocator; 309 | VkMat dummy_buffer; 310 | VkImageMat dummy_image; 311 | 312 | // device-wide pipeline cache 313 | PipelineCache* pipeline_cache; 314 | 315 | // utility operator 316 | // from buffer | image 317 | // to buffer | image 318 | // from fp32-b/i | fp16p-b/i | fp16s-b/i 319 | // to fp32-b/i | fp16p-b/i | fp16s-b/i 320 | // to pack1 | pack4 | pack8 321 | mutable ncnn::Packing_vulkan* uop_packing[2][2][3][3][3]; 322 | mutable Mutex uop_lock; 323 | }; 324 | 325 | VulkanDevice* get_gpu_device(int device_index = get_default_gpu_index()); 326 | 327 | // online spirv compilation 328 | int compile_spirv_module(const char* comp_data, int comp_data_size, const Option& opt, std::vector& spirv); 329 | int compile_spirv_module(int shader_type_index, const Option& opt, std::vector& spirv); 330 | 331 | // info from spirv 332 | class ShaderInfo 333 | { 334 | public: 335 | int specialization_count; 336 | int binding_count; 337 | int push_constant_count; 338 | 339 | // 0 = null 340 | // 1 = storage buffer 341 | // 2 = storage image 342 | // 3 = combined image sampler 343 | int binding_types[16]; // 16 is large enough I think ... 344 | }; 345 | 346 | int resolve_shader_info(const uint32_t* spv_data, size_t spv_data_size, ShaderInfo& shader_info); 347 | 348 | } // namespace ncnn 349 | 350 | #endif // NCNN_VULKAN 351 | 352 | #endif // NCNN_GPU_H 353 | -------------------------------------------------------------------------------- /include/ncnn/layer.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_LAYER_H 16 | #define NCNN_LAYER_H 17 | 18 | #include "mat.h" 19 | #include "modelbin.h" 20 | #include "option.h" 21 | #include "paramdict.h" 22 | #include "platform.h" 23 | 24 | #include 25 | 26 | #if NCNN_VULKAN 27 | #include "command.h" 28 | #include "pipeline.h" 29 | 30 | #include 31 | #endif // NCNN_VULKAN 32 | 33 | namespace ncnn { 34 | 35 | class Layer 36 | { 37 | public: 38 | // empty 39 | Layer(); 40 | // virtual destructor 41 | virtual ~Layer(); 42 | 43 | // load layer specific parameter from parsed dict 44 | // return 0 if success 45 | virtual int load_param(const ParamDict& pd); 46 | 47 | // load layer specific weight data from model binary 48 | // return 0 if success 49 | virtual int load_model(const ModelBin& mb); 50 | 51 | // layer implementation specific setup 52 | // return 0 if success 53 | virtual int create_pipeline(const Option& opt); 54 | 55 | // layer implementation specific clean 56 | // return 0 if success 57 | virtual int destroy_pipeline(const Option& opt); 58 | 59 | public: 60 | // one input and one output blob 61 | bool one_blob_only; 62 | 63 | // support inplace inference 64 | bool support_inplace; 65 | 66 | // support vulkan compute 67 | bool support_vulkan; 68 | 69 | // accept input blob with packed storage 70 | bool support_packing; 71 | 72 | // accept bf16 73 | bool support_bf16_storage; 74 | 75 | // accept fp16 76 | bool support_fp16_storage; 77 | 78 | // shader image storage 79 | bool support_image_storage; 80 | 81 | // TODO drop these fields 82 | bool use_int8_inference; 83 | bool support_weight_fp16_storage; 84 | 85 | public: 86 | // implement inference 87 | // return 0 if success 88 | virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; 89 | virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; 90 | 91 | // implement inplace inference 92 | // return 0 if success 93 | virtual int forward_inplace(std::vector& bottom_top_blobs, const Option& opt) const; 94 | virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; 95 | 96 | #if NCNN_VULKAN 97 | public: 98 | // upload weight blob from host to device 99 | virtual int upload_model(VkTransfer& cmd, const Option& opt); 100 | 101 | public: 102 | // implement inference 103 | // return 0 if success 104 | virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt) const; 105 | virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const; 106 | 107 | // implement inference 108 | // return 0 if success 109 | virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt) const; 110 | virtual int forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const; 111 | 112 | // implement inplace inference 113 | // return 0 if success 114 | virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt) const; 115 | virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; 116 | 117 | // implement inplace inference 118 | // return 0 if success 119 | virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt) const; 120 | virtual int forward_inplace(VkImageMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; 121 | 122 | public: 123 | // assigned immediately after creating this layer 124 | const VulkanDevice* vkdev; 125 | #endif // NCNN_VULKAN 126 | 127 | public: 128 | // layer type index 129 | int typeindex; 130 | #if NCNN_STRING 131 | // layer type name 132 | std::string type; 133 | // layer name 134 | std::string name; 135 | #endif // NCNN_STRING 136 | // blob index which this layer needs as input 137 | std::vector bottoms; 138 | // blob index which this layer produces as output 139 | std::vector tops; 140 | // shape hint 141 | std::vector bottom_shapes; 142 | std::vector top_shapes; 143 | }; 144 | 145 | // layer factory function 146 | typedef Layer* (*layer_creator_func)(); 147 | 148 | struct layer_registry_entry 149 | { 150 | #if NCNN_STRING 151 | // layer type name 152 | const char* name; 153 | #endif // NCNN_STRING 154 | // layer factory entry 155 | layer_creator_func creator; 156 | }; 157 | 158 | #if NCNN_STRING 159 | // get layer type from type name 160 | int layer_to_index(const char* type); 161 | // create layer from type name 162 | Layer* create_layer(const char* type); 163 | #endif // NCNN_STRING 164 | // create layer from layer type 165 | Layer* create_layer(int index); 166 | 167 | #define DEFINE_LAYER_CREATOR(name) \ 168 | ::ncnn::Layer* name##_layer_creator() \ 169 | { \ 170 | return new name; \ 171 | } 172 | 173 | } // namespace ncnn 174 | 175 | #endif // NCNN_LAYER_H 176 | -------------------------------------------------------------------------------- /include/ncnn/layer_shader_type.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_LAYER_SHADER_TYPE_H 16 | #define NCNN_LAYER_SHADER_TYPE_H 17 | 18 | namespace ncnn { 19 | 20 | namespace LayerShaderType { 21 | enum LayerShaderType 22 | { 23 | #include "layer_shader_type_enum.h" 24 | }; 25 | } // namespace LayerShaderType 26 | 27 | } // namespace ncnn 28 | 29 | #endif // NCNN_LAYER_SHADER_TYPE_H 30 | -------------------------------------------------------------------------------- /include/ncnn/layer_shader_type_enum.h: -------------------------------------------------------------------------------- 1 | // Layer Shader Enum header 2 | // 3 | // This file is auto-generated by cmake, don't edit it. 4 | 5 | 6 | -------------------------------------------------------------------------------- /include/ncnn/layer_type.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_LAYER_TYPE_H 16 | #define NCNN_LAYER_TYPE_H 17 | 18 | namespace ncnn { 19 | 20 | namespace LayerType { 21 | enum LayerType 22 | { 23 | #include "layer_type_enum.h" 24 | CustomBit = (1 << 8), 25 | }; 26 | } // namespace LayerType 27 | 28 | } // namespace ncnn 29 | 30 | #endif // NCNN_LAYER_TYPE_H 31 | -------------------------------------------------------------------------------- /include/ncnn/layer_type_enum.h: -------------------------------------------------------------------------------- 1 | // Layer Type Enum header 2 | // 3 | // This file is auto-generated by cmake, don't edit it. 4 | 5 | AbsVal = 0, 6 | ArgMax = 1, 7 | BatchNorm = 2, 8 | Bias = 3, 9 | BNLL = 4, 10 | Concat = 5, 11 | Convolution = 6, 12 | Crop = 7, 13 | Deconvolution = 8, 14 | Dropout = 9, 15 | Eltwise = 10, 16 | ELU = 11, 17 | Embed = 12, 18 | Exp = 13, 19 | Flatten = 14, 20 | InnerProduct = 15, 21 | Input = 16, 22 | Log = 17, 23 | LRN = 18, 24 | MemoryData = 19, 25 | MVN = 20, 26 | Pooling = 21, 27 | Power = 22, 28 | PReLU = 23, 29 | Proposal = 24, 30 | Reduction = 25, 31 | ReLU = 26, 32 | Reshape = 27, 33 | ROIPooling = 28, 34 | Scale = 29, 35 | Sigmoid = 30, 36 | Slice = 31, 37 | Softmax = 32, 38 | Split = 33, 39 | SPP = 34, 40 | TanH = 35, 41 | Threshold = 36, 42 | Tile = 37, 43 | RNN = 38, 44 | LSTM = 39, 45 | BinaryOp = 40, 46 | UnaryOp = 41, 47 | ConvolutionDepthWise = 42, 48 | Padding = 43, 49 | Squeeze = 44, 50 | ExpandDims = 45, 51 | Normalize = 46, 52 | Permute = 47, 53 | PriorBox = 48, 54 | DetectionOutput = 49, 55 | Interp = 50, 56 | DeconvolutionDepthWise = 51, 57 | ShuffleChannel = 52, 58 | InstanceNorm = 53, 59 | Clip = 54, 60 | Reorg = 55, 61 | YoloDetectionOutput = 56, 62 | Quantize = 57, 63 | Dequantize = 58, 64 | Yolov3DetectionOutput = 59, 65 | PSROIPooling = 60, 66 | ROIAlign = 61, 67 | Packing = 62, 68 | Requantize = 63, 69 | Cast = 64, 70 | HardSigmoid = 65, 71 | SELU = 66, 72 | HardSwish = 67, 73 | Noop = 68, 74 | PixelShuffle = 69, 75 | DeepCopy = 70, 76 | Mish = 71, 77 | StatisticsPooling = 72, 78 | Swish = 73, 79 | Gemm = 74, 80 | GroupNorm = 75, 81 | LayerNorm = 76, 82 | Softplus = 77, 83 | 84 | -------------------------------------------------------------------------------- /include/ncnn/modelbin.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_MODELBIN_H 16 | #define NCNN_MODELBIN_H 17 | 18 | #include "mat.h" 19 | 20 | namespace ncnn { 21 | 22 | class DataReader; 23 | class ModelBin 24 | { 25 | public: 26 | virtual ~ModelBin(); 27 | // element type 28 | // 0 = auto 29 | // 1 = float32 30 | // 2 = float16 31 | // 3 = int8 32 | // load vec 33 | virtual Mat load(int w, int type) const = 0; 34 | // load image 35 | virtual Mat load(int w, int h, int type) const; 36 | // load dim 37 | virtual Mat load(int w, int h, int c, int type) const; 38 | }; 39 | 40 | class ModelBinFromDataReader : public ModelBin 41 | { 42 | public: 43 | ModelBinFromDataReader(const DataReader& dr); 44 | 45 | virtual Mat load(int w, int type) const; 46 | 47 | protected: 48 | const DataReader& dr; 49 | }; 50 | 51 | class ModelBinFromMatArray : public ModelBin 52 | { 53 | public: 54 | // construct from weight blob array 55 | ModelBinFromMatArray(const Mat* weights); 56 | 57 | virtual Mat load(int w, int type) const; 58 | 59 | protected: 60 | mutable const Mat* weights; 61 | }; 62 | 63 | } // namespace ncnn 64 | 65 | #endif // NCNN_MODELBIN_H 66 | -------------------------------------------------------------------------------- /include/ncnn/net.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_NET_H 16 | #define NCNN_NET_H 17 | 18 | #include "blob.h" 19 | #include "layer.h" 20 | #include "mat.h" 21 | #include "option.h" 22 | #include "platform.h" 23 | 24 | #if __ANDROID_API__ >= 9 25 | #include 26 | #endif // __ANDROID_API__ >= 9 27 | 28 | namespace ncnn { 29 | 30 | #if NCNN_VULKAN 31 | class VkCompute; 32 | class PipelineCache; 33 | #endif // NCNN_VULKAN 34 | class DataReader; 35 | class Extractor; 36 | class Net 37 | { 38 | public: 39 | // empty init 40 | Net(); 41 | // clear and destroy 42 | ~Net(); 43 | 44 | public: 45 | // option can be changed before loading 46 | Option opt; 47 | 48 | #if NCNN_VULKAN 49 | // set gpu device by index 50 | void set_vulkan_device(int device_index); 51 | 52 | // set gpu device by device handle, no owner transfer 53 | void set_vulkan_device(const VulkanDevice* vkdev); 54 | 55 | const VulkanDevice* vulkan_device() const; 56 | #endif // NCNN_VULKAN 57 | 58 | #if NCNN_STRING 59 | // register custom layer by layer type name 60 | // return 0 if success 61 | int register_custom_layer(const char* type, layer_creator_func creator); 62 | #endif // NCNN_STRING 63 | // register custom layer by layer type 64 | // return 0 if success 65 | int register_custom_layer(int index, layer_creator_func creator); 66 | 67 | #if NCNN_STRING 68 | int load_param(const DataReader& dr); 69 | #endif // NCNN_STRING 70 | 71 | int load_param_bin(const DataReader& dr); 72 | 73 | int load_model(const DataReader& dr); 74 | 75 | #if NCNN_STDIO 76 | #if NCNN_STRING 77 | // load network structure from plain param file 78 | // return 0 if success 79 | int load_param(FILE* fp); 80 | int load_param(const char* protopath); 81 | int load_param_mem(const char* mem); 82 | #endif // NCNN_STRING 83 | // load network structure from binary param file 84 | // return 0 if success 85 | int load_param_bin(FILE* fp); 86 | int load_param_bin(const char* protopath); 87 | 88 | // load network weight data from model file 89 | // return 0 if success 90 | int load_model(FILE* fp); 91 | int load_model(const char* modelpath); 92 | #endif // NCNN_STDIO 93 | 94 | // load network structure from external memory 95 | // memory pointer must be 32-bit aligned 96 | // return bytes consumed 97 | int load_param(const unsigned char* mem); 98 | 99 | // reference network weight data from external memory 100 | // weight data is not copied but referenced 101 | // so external memory should be retained when used 102 | // memory pointer must be 32-bit aligned 103 | // return bytes consumed 104 | int load_model(const unsigned char* mem); 105 | 106 | #if __ANDROID_API__ >= 9 107 | #if NCNN_STRING 108 | // convenient load network structure from android asset plain param file 109 | int load_param(AAsset* asset); 110 | int load_param(AAssetManager* mgr, const char* assetpath); 111 | #endif // NCNN_STRING 112 | // convenient load network structure from android asset binary param file 113 | int load_param_bin(AAsset* asset); 114 | int load_param_bin(AAssetManager* mgr, const char* assetpath); 115 | 116 | // convenient load network weight data from android asset model file 117 | int load_model(AAsset* asset); 118 | int load_model(AAssetManager* mgr, const char* assetpath); 119 | #endif // __ANDROID_API__ >= 9 120 | 121 | // unload network structure and weight data 122 | void clear(); 123 | 124 | // construct an Extractor from network 125 | Extractor create_extractor() const; 126 | 127 | public: 128 | std::vector blobs; 129 | std::vector layers; 130 | 131 | protected: 132 | // parse the structure of network 133 | // fuse int8 op dequantize and quantize by requantize 134 | int fuse_network(); 135 | 136 | #if NCNN_VULKAN 137 | 138 | int upload_model(); 139 | 140 | int create_pipeline(); 141 | 142 | int destroy_pipeline(); 143 | 144 | #endif // NCNN_VULKAN 145 | 146 | friend class Extractor; 147 | #if NCNN_STRING 148 | int find_blob_index_by_name(const char* name) const; 149 | int find_layer_index_by_name(const char* name) const; 150 | int custom_layer_to_index(const char* type); 151 | Layer* create_custom_layer(const char* type); 152 | #endif // NCNN_STRING 153 | Layer* create_custom_layer(int index); 154 | int forward_layer(int layer_index, std::vector& blob_mats, const Option& opt) const; 155 | 156 | #if NCNN_VULKAN 157 | int forward_layer(int layer_index, std::vector& blob_mats, std::vector& blob_mats_gpu, VkCompute& cmd, const Option& opt) const; 158 | int forward_layer(int layer_index, std::vector& blob_mats, std::vector& blob_mats_gpu, std::vector& blob_mats_gpu_image, VkCompute& cmd, const Option& opt) const; 159 | #endif // NCNN_VULKAN 160 | 161 | protected: 162 | std::vector custom_layer_registry; 163 | 164 | #if NCNN_VULKAN 165 | const VulkanDevice* vkdev; 166 | 167 | VkAllocator* weight_vkallocator; 168 | VkAllocator* weight_staging_vkallocator; 169 | 170 | PipelineCache* pipeline_cache; 171 | #endif // NCNN_VULKAN 172 | }; 173 | 174 | class Extractor 175 | { 176 | public: 177 | ~Extractor(); 178 | 179 | // enable light mode 180 | // intermediate blob will be recycled when enabled 181 | // enabled by default 182 | void set_light_mode(bool enable); 183 | 184 | // set thread count for this extractor 185 | // this will overwrite the global setting 186 | // default count is system depended 187 | void set_num_threads(int num_threads); 188 | 189 | // set blob memory allocator 190 | void set_blob_allocator(Allocator* allocator); 191 | 192 | // set workspace memory allocator 193 | void set_workspace_allocator(Allocator* allocator); 194 | 195 | #if NCNN_VULKAN 196 | void set_vulkan_compute(bool enable); 197 | 198 | void set_blob_vkallocator(VkAllocator* allocator); 199 | 200 | void set_workspace_vkallocator(VkAllocator* allocator); 201 | 202 | void set_staging_vkallocator(VkAllocator* allocator); 203 | #endif // NCNN_VULKAN 204 | 205 | #if NCNN_STRING 206 | // set input by blob name 207 | // return 0 if success 208 | int input(const char* blob_name, const Mat& in); 209 | 210 | // get result by blob name 211 | // return 0 if success 212 | // type = 0, default 213 | // type = 1, do not convert fp16/bf16 or / and packing 214 | int extract(const char* blob_name, Mat& feat, int type = 0); 215 | #endif // NCNN_STRING 216 | 217 | // set input by blob index 218 | // return 0 if success 219 | int input(int blob_index, const Mat& in); 220 | 221 | // get result by blob index 222 | // return 0 if success 223 | // type = 0, default 224 | // type = 1, do not convert fp16/bf16 or / and packing 225 | int extract(int blob_index, Mat& feat, int type = 0); 226 | 227 | #if NCNN_VULKAN 228 | #if NCNN_STRING 229 | // set input by blob name 230 | // return 0 if success 231 | int input(const char* blob_name, const VkMat& in); 232 | 233 | // get result by blob name 234 | // return 0 if success 235 | int extract(const char* blob_name, VkMat& feat, VkCompute& cmd); 236 | 237 | // set input by blob name 238 | // return 0 if success 239 | int input(const char* blob_name, const VkImageMat& in); 240 | 241 | // get result by blob name 242 | // return 0 if success 243 | int extract(const char* blob_name, VkImageMat& feat, VkCompute& cmd); 244 | #endif // NCNN_STRING 245 | 246 | // set input by blob index 247 | // return 0 if success 248 | int input(int blob_index, const VkMat& in); 249 | 250 | // get result by blob index 251 | // return 0 if success 252 | int extract(int blob_index, VkMat& feat, VkCompute& cmd); 253 | 254 | // set input by blob index 255 | // return 0 if success 256 | int input(int blob_index, const VkImageMat& in); 257 | 258 | // get result by blob index 259 | // return 0 if success 260 | int extract(int blob_index, VkImageMat& feat, VkCompute& cmd); 261 | #endif // NCNN_VULKAN 262 | 263 | protected: 264 | friend Extractor Net::create_extractor() const; 265 | Extractor(const Net* net, size_t blob_count); 266 | 267 | private: 268 | const Net* net; 269 | std::vector blob_mats; 270 | Option opt; 271 | 272 | #if NCNN_VULKAN 273 | VkAllocator* local_blob_vkallocator; 274 | VkAllocator* local_staging_vkallocator; 275 | 276 | std::vector blob_mats_gpu; 277 | std::vector blob_mats_gpu_image; 278 | #endif // NCNN_VULKAN 279 | }; 280 | 281 | } // namespace ncnn 282 | 283 | #endif // NCNN_NET_H 284 | -------------------------------------------------------------------------------- /include/ncnn/option.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_OPTION_H 16 | #define NCNN_OPTION_H 17 | 18 | #include "platform.h" 19 | 20 | namespace ncnn { 21 | 22 | #if NCNN_VULKAN 23 | class VkAllocator; 24 | class PipelineCache; 25 | #endif // NCNN_VULKAN 26 | 27 | class Allocator; 28 | class Option 29 | { 30 | public: 31 | // default option 32 | Option(); 33 | 34 | public: 35 | // light mode 36 | // intermediate blob will be recycled when enabled 37 | // enabled by default 38 | bool lightmode; 39 | 40 | // thread count 41 | // default value is the one returned by get_cpu_count() 42 | int num_threads; 43 | 44 | // blob memory allocator 45 | Allocator* blob_allocator; 46 | 47 | // workspace memory allocator 48 | Allocator* workspace_allocator; 49 | 50 | #if NCNN_VULKAN 51 | // blob memory allocator 52 | VkAllocator* blob_vkallocator; 53 | 54 | // workspace memory allocator 55 | VkAllocator* workspace_vkallocator; 56 | 57 | // staging memory allocator 58 | VkAllocator* staging_vkallocator; 59 | 60 | // pipeline cache 61 | PipelineCache* pipeline_cache; 62 | #endif // NCNN_VULKAN 63 | 64 | // the time openmp threads busy-wait for more work before going to sleep 65 | // default value is 20ms to keep the cores enabled 66 | // without too much extra power consumption afterwards 67 | int openmp_blocktime; 68 | 69 | // enable winograd convolution optimization 70 | // improve convolution 3x3 stride1 performance, may consume more memory 71 | // changes should be applied before loading network structure and weight 72 | // enabled by default 73 | bool use_winograd_convolution; 74 | 75 | // enable sgemm convolution optimization 76 | // improve convolution 1x1 stride1 performance, may consume more memory 77 | // changes should be applied before loading network structure and weight 78 | // enabled by default 79 | bool use_sgemm_convolution; 80 | 81 | // enable quantized int8 inference 82 | // use low-precision int8 path for quantized model 83 | // changes should be applied before loading network structure and weight 84 | // enabled by default 85 | bool use_int8_inference; 86 | 87 | // enable vulkan compute 88 | bool use_vulkan_compute; 89 | 90 | // enable options for gpu inference 91 | bool use_fp16_packed; 92 | bool use_fp16_storage; 93 | bool use_fp16_arithmetic; 94 | bool use_int8_storage; 95 | bool use_int8_arithmetic; 96 | 97 | // enable simd-friendly packed memory layout 98 | // improve all operator performance on all arm devices, will consume more memory 99 | // changes should be applied before loading network structure and weight 100 | // enabled by default 101 | bool use_packing_layout; 102 | 103 | bool use_shader_pack8; 104 | 105 | // subgroup option 106 | bool use_subgroup_basic; 107 | bool use_subgroup_vote; 108 | bool use_subgroup_ballot; 109 | bool use_subgroup_shuffle; 110 | 111 | // turn on for adreno 112 | bool use_image_storage; 113 | 114 | // enable bf16 data type for storage 115 | // improve most operator performance on all arm devices, may consume more memory 116 | bool use_bf16_storage; 117 | 118 | // used for fp16 weight storage in AVX 119 | // TODO drop this option 120 | bool use_weight_fp16_storage; 121 | }; 122 | 123 | } // namespace ncnn 124 | 125 | #endif // NCNN_OPTION_H 126 | -------------------------------------------------------------------------------- /include/ncnn/paramdict.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_PARAMDICT_H 16 | #define NCNN_PARAMDICT_H 17 | 18 | #include "mat.h" 19 | 20 | // at most 32 parameters 21 | #define NCNN_MAX_PARAM_COUNT 32 22 | 23 | namespace ncnn { 24 | 25 | class DataReader; 26 | class Net; 27 | class ParamDict 28 | { 29 | public: 30 | // empty 31 | ParamDict(); 32 | 33 | // get int 34 | int get(int id, int def) const; 35 | // get float 36 | float get(int id, float def) const; 37 | // get array 38 | Mat get(int id, const Mat& def) const; 39 | 40 | // set int 41 | void set(int id, int i); 42 | // set float 43 | void set(int id, float f); 44 | // set array 45 | void set(int id, const Mat& v); 46 | 47 | protected: 48 | friend class Net; 49 | 50 | void clear(); 51 | 52 | int load_param(const DataReader& dr); 53 | int load_param_bin(const DataReader& dr); 54 | 55 | protected: 56 | struct 57 | { 58 | // 0 = null 59 | // 1 = int/float 60 | // 2 = int 61 | // 3 = float 62 | // 4 = array of int/float 63 | // 5 = array of int 64 | // 6 = array of float 65 | int type; 66 | union 67 | { 68 | int i; 69 | float f; 70 | }; 71 | Mat v; 72 | } params[NCNN_MAX_PARAM_COUNT]; 73 | }; 74 | 75 | } // namespace ncnn 76 | 77 | #endif // NCNN_PARAMDICT_H 78 | -------------------------------------------------------------------------------- /include/ncnn/pipeline.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_PIPELINE_H 16 | #define NCNN_PIPELINE_H 17 | 18 | #include "mat.h" 19 | #include "platform.h" 20 | #if NCNN_VULKAN 21 | #include "gpu.h" 22 | 23 | #include 24 | #endif // NCNN_VULKAN 25 | 26 | namespace ncnn { 27 | 28 | #if NCNN_VULKAN 29 | class Option; 30 | class Pipeline 31 | { 32 | public: 33 | Pipeline(const VulkanDevice* vkdev); 34 | virtual ~Pipeline(); 35 | 36 | public: 37 | void set_optimal_local_size_xyz(int w = 4, int h = 4, int c = 4); 38 | void set_optimal_local_size_xyz(const Mat& local_size_xyz); 39 | void set_local_size_xyz(int w, int h, int c); 40 | 41 | int create(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations); 42 | 43 | int create(int shader_type_index, const Option& opt, const std::vector& specializations); 44 | 45 | public: 46 | const VulkanDevice* vkdev; 47 | 48 | VkShaderModule shader_module; 49 | VkDescriptorSetLayout descriptorset_layout; 50 | VkPipelineLayout pipeline_layout; 51 | VkPipeline pipeline; 52 | VkDescriptorUpdateTemplateKHR descriptor_update_template; 53 | 54 | ShaderInfo shader_info; 55 | 56 | uint32_t local_size_x; 57 | uint32_t local_size_y; 58 | uint32_t local_size_z; 59 | }; 60 | 61 | #if __ANDROID_API__ >= 26 62 | class VkCompute; 63 | class ImportAndroidHardwareBufferPipeline : private Pipeline 64 | { 65 | public: 66 | ImportAndroidHardwareBufferPipeline(const VulkanDevice* vkdev); 67 | ~ImportAndroidHardwareBufferPipeline(); 68 | 69 | int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, const Option& opt); 70 | int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, int target_width, int target_height, const Option& opt); 71 | void destroy(); 72 | 73 | friend class VkCompute; 74 | 75 | protected: 76 | int create_shader_module(const Option& opt); 77 | int create_sampler(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator); 78 | int create_descriptorset_layout(); 79 | 80 | public: 81 | int type_to; 82 | int rotate_from; 83 | bool need_resize; 84 | 85 | VkSampler sampler; 86 | }; 87 | #endif // __ANDROID_API__ >= 26 88 | #endif // NCNN_VULKAN 89 | 90 | } // namespace ncnn 91 | 92 | #endif // NCNN_PIPELINE_H 93 | -------------------------------------------------------------------------------- /include/ncnn/pipelinecache.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_PIPELINECACHE_H 16 | #define NCNN_PIPELINECACHE_H 17 | 18 | #include "platform.h" 19 | 20 | #if NCNN_VULKAN 21 | #include 22 | #endif // NCNN_VULKAN 23 | 24 | #include "mat.h" 25 | #include "gpu.h" 26 | 27 | namespace ncnn { 28 | 29 | #if NCNN_VULKAN 30 | 31 | class VulkanDevice; 32 | 33 | class PipelineCache 34 | { 35 | public: 36 | PipelineCache(const VulkanDevice* _vkdev); 37 | 38 | ~PipelineCache(); 39 | 40 | void clear(); 41 | 42 | int get_pipeline(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations, 43 | uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, 44 | VkShaderModule* shader_module, 45 | VkDescriptorSetLayout* descriptorset_layout, 46 | VkPipelineLayout* pipeline_layout, 47 | VkPipeline* pipeline, 48 | VkDescriptorUpdateTemplateKHR* descriptor_update_template, 49 | ShaderInfo& shader_info) const; 50 | 51 | int get_pipeline(int shader_type_index, const Option& opt, const std::vector& specializations, 52 | uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, 53 | VkShaderModule* shader_module, 54 | VkDescriptorSetLayout* descriptorset_layout, 55 | VkPipelineLayout* pipeline_layout, 56 | VkPipeline* pipeline, 57 | VkDescriptorUpdateTemplateKHR* descriptor_update_template, 58 | ShaderInfo& shader_info) const; 59 | 60 | protected: 61 | int create_shader_module(int shader_type_index, const Option& opt, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, 62 | VkShaderModule* _shader_module, ShaderInfo& si) const; 63 | 64 | int new_pipeline(VkShaderModule shader_module, const ShaderInfo& shader_info, const std::vector& specializations, 65 | VkDescriptorSetLayout* descriptorset_layout, 66 | VkPipelineLayout* pipeline_layout, 67 | VkPipeline* pipeline, 68 | VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; 69 | 70 | public: 71 | const VulkanDevice* vkdev; 72 | 73 | // digest -> artifact 74 | struct pipeline_cache_digest 75 | { 76 | pipeline_cache_digest(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations, 77 | uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z); 78 | pipeline_cache_digest(int shader_type_index, const Option& opt, const std::vector& specializations, 79 | uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z); 80 | 81 | bool operator==(const pipeline_cache_digest& rhs) const 82 | { 83 | return d0 == rhs.d0 && d1 == rhs.d1; 84 | } 85 | 86 | bool operator!=(const pipeline_cache_digest& rhs) const 87 | { 88 | return d0 != rhs.d0 || d1 != rhs.d1; 89 | } 90 | 91 | union 92 | { 93 | struct 94 | { 95 | union 96 | { 97 | uint32_t spv_data_murmur3; 98 | int shader_type_index; 99 | }; 100 | unsigned char opt_local_size_bits[4]; 101 | }; 102 | 103 | uint64_t d0; 104 | }; 105 | 106 | union 107 | { 108 | struct 109 | { 110 | uint32_t specializations_murmur3; 111 | uint32_t specializations_fnv1a; 112 | }; 113 | 114 | uint64_t d1; 115 | }; 116 | }; 117 | 118 | struct pipeline_cache_artifact 119 | { 120 | VkShaderModule shader_module; 121 | VkDescriptorSetLayout descriptorset_layout; 122 | VkPipelineLayout pipeline_layout; 123 | VkPipeline pipeline; 124 | VkDescriptorUpdateTemplateKHR descriptor_update_template; 125 | ShaderInfo shader_info; // TODO use pointer ? 126 | }; 127 | 128 | mutable std::vector cache_digests; 129 | mutable std::vector cache_artifacts; 130 | mutable Mutex cache_lock; 131 | }; 132 | 133 | #endif // NCNN_VULKAN 134 | 135 | } // namespace ncnn 136 | 137 | #endif // NCNN_PIPELINECACHE_H 138 | -------------------------------------------------------------------------------- /include/ncnn/platform.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_PLATFORM_H 16 | #define NCNN_PLATFORM_H 17 | 18 | #define NCNN_STDIO 1 19 | #define NCNN_STRING 1 20 | #define NCNN_SIMPLEOCV 0 21 | #define NCNN_SIMPLEOMP 0 22 | #define NCNN_SIMPLESTL 0 23 | #define NCNN_THREADS 1 24 | #define NCNN_BENCHMARK 0 25 | #define NCNN_PIXEL 1 26 | #define NCNN_PIXEL_ROTATE 1 27 | #define NCNN_PIXEL_AFFINE 1 28 | #define NCNN_VULKAN 0 29 | #define NCNN_REQUANT 0 30 | #define NCNN_RUNTIME_CPU 1 31 | #define NCNN_AVX2 1 32 | #define NCNN_ARM82 0 33 | 34 | #ifdef __cplusplus 35 | 36 | #if NCNN_THREADS 37 | #if (defined _WIN32 && !(defined __MINGW32__)) 38 | #define WIN32_LEAN_AND_MEAN 39 | #include 40 | #include 41 | #else 42 | #include 43 | #endif 44 | #endif // NCNN_THREADS 45 | 46 | #if __ANDROID_API__ >= 26 47 | #define VK_USE_PLATFORM_ANDROID_KHR 48 | #endif // __ANDROID_API__ >= 26 49 | 50 | namespace ncnn { 51 | 52 | #if NCNN_THREADS 53 | #if (defined _WIN32 && !(defined __MINGW32__)) 54 | class Mutex 55 | { 56 | public: 57 | Mutex() { InitializeSRWLock(&srwlock); } 58 | ~Mutex() {} 59 | void lock() { AcquireSRWLockExclusive(&srwlock); } 60 | void unlock() { ReleaseSRWLockExclusive(&srwlock); } 61 | private: 62 | friend class ConditionVariable; 63 | // NOTE SRWLock is available from windows vista 64 | SRWLOCK srwlock; 65 | }; 66 | 67 | class ConditionVariable 68 | { 69 | public: 70 | ConditionVariable() { InitializeConditionVariable(&condvar); } 71 | ~ConditionVariable() {} 72 | void wait(Mutex& mutex) { SleepConditionVariableSRW(&condvar, &mutex.srwlock, INFINITE, 0); } 73 | void broadcast() { WakeAllConditionVariable(&condvar); } 74 | void signal() { WakeConditionVariable(&condvar); } 75 | private: 76 | CONDITION_VARIABLE condvar; 77 | }; 78 | 79 | static unsigned __stdcall start_wrapper(void* args); 80 | class Thread 81 | { 82 | public: 83 | Thread(void* (*start)(void*), void* args = 0) { _start = start; _args = args; handle = (HANDLE)_beginthreadex(0, 0, start_wrapper, this, 0, 0); } 84 | ~Thread() {} 85 | void join() { WaitForSingleObject(handle, INFINITE); CloseHandle(handle); } 86 | private: 87 | friend unsigned __stdcall start_wrapper(void* args) 88 | { 89 | Thread* t = (Thread*)args; 90 | t->_start(t->_args); 91 | return 0; 92 | } 93 | HANDLE handle; 94 | void* (*_start)(void*); 95 | void* _args; 96 | }; 97 | 98 | class ThreadLocalStorage 99 | { 100 | public: 101 | ThreadLocalStorage() { key = TlsAlloc(); } 102 | ~ThreadLocalStorage() { TlsFree(key); } 103 | void set(void* value) { TlsSetValue(key, (LPVOID)value); } 104 | void* get() { return (void*)TlsGetValue(key); } 105 | private: 106 | DWORD key; 107 | }; 108 | #else // (defined _WIN32 && !(defined __MINGW32__)) 109 | class Mutex 110 | { 111 | public: 112 | Mutex() { pthread_mutex_init(&mutex, 0); } 113 | ~Mutex() { pthread_mutex_destroy(&mutex); } 114 | void lock() { pthread_mutex_lock(&mutex); } 115 | void unlock() { pthread_mutex_unlock(&mutex); } 116 | private: 117 | friend class ConditionVariable; 118 | pthread_mutex_t mutex; 119 | }; 120 | 121 | class ConditionVariable 122 | { 123 | public: 124 | ConditionVariable() { pthread_cond_init(&cond, 0); } 125 | ~ConditionVariable() { pthread_cond_destroy(&cond); } 126 | void wait(Mutex& mutex) { pthread_cond_wait(&cond, &mutex.mutex); } 127 | void broadcast() { pthread_cond_broadcast(&cond); } 128 | void signal() { pthread_cond_signal(&cond); } 129 | private: 130 | pthread_cond_t cond; 131 | }; 132 | 133 | class Thread 134 | { 135 | public: 136 | Thread(void* (*start)(void*), void* args = 0) { pthread_create(&t, 0, start, args); } 137 | ~Thread() {} 138 | void join() { pthread_join(t, 0); } 139 | private: 140 | pthread_t t; 141 | }; 142 | 143 | class ThreadLocalStorage 144 | { 145 | public: 146 | ThreadLocalStorage() { pthread_key_create(&key, 0); } 147 | ~ThreadLocalStorage() { pthread_key_delete(key); } 148 | void set(void* value) { pthread_setspecific(key, value); } 149 | void* get() { return pthread_getspecific(key); } 150 | private: 151 | pthread_key_t key; 152 | }; 153 | #endif // (defined _WIN32 && !(defined __MINGW32__)) 154 | #else // NCNN_THREADS 155 | class Mutex 156 | { 157 | public: 158 | Mutex() {} 159 | ~Mutex() {} 160 | void lock() {} 161 | void unlock() {} 162 | }; 163 | 164 | class ConditionVariable 165 | { 166 | public: 167 | ConditionVariable() {} 168 | ~ConditionVariable() {} 169 | void wait(Mutex& /*mutex*/) {} 170 | void broadcast() {} 171 | void signal() {} 172 | }; 173 | 174 | class Thread 175 | { 176 | public: 177 | Thread(void* (*/*start*/)(void*), void* /*args*/ = 0) {} 178 | ~Thread() {} 179 | void join() {} 180 | }; 181 | 182 | class ThreadLocalStorage 183 | { 184 | public: 185 | ThreadLocalStorage() {} 186 | ~ThreadLocalStorage() {} 187 | void set(void* /*value*/) {} 188 | void* get() { return 0; } 189 | }; 190 | #endif // NCNN_THREADS 191 | 192 | class MutexLockGuard 193 | { 194 | public: 195 | MutexLockGuard(Mutex& _mutex) : mutex(_mutex) { mutex.lock(); } 196 | ~MutexLockGuard() { mutex.unlock(); } 197 | private: 198 | Mutex& mutex; 199 | }; 200 | 201 | } // namespace ncnn 202 | 203 | #if NCNN_SIMPLESTL 204 | #include "simplestl.h" 205 | #else 206 | #include 207 | #include 208 | #include 209 | #include 210 | #endif 211 | 212 | #endif // __cplusplus 213 | 214 | #if NCNN_STDIO 215 | #if __ANDROID_API__ >= 8 216 | #include 217 | #define NCNN_LOGE(...) do { \ 218 | fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); \ 219 | __android_log_print(ANDROID_LOG_WARN, "ncnn", ##__VA_ARGS__); } while(0) 220 | #else // __ANDROID_API__ >= 8 221 | #include 222 | #define NCNN_LOGE(...) do { \ 223 | fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); } while(0) 224 | #endif // __ANDROID_API__ >= 8 225 | #else 226 | #define NCNN_LOGE(...) 227 | #endif 228 | 229 | #endif // NCNN_PLATFORM_H 230 | -------------------------------------------------------------------------------- /include/ncnn/simpleocv.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_SIMPLEOCV_H 16 | #define NCNN_SIMPLEOCV_H 17 | 18 | #include "platform.h" 19 | 20 | #if NCNN_SIMPLEOCV 21 | 22 | #include "mat.h" 23 | 24 | #if defined(_MSC_VER) || defined(__GNUC__) 25 | #pragma push_macro("min") 26 | #pragma push_macro("max") 27 | #undef min 28 | #undef max 29 | #endif 30 | 31 | // minimal opencv style data structure implementation 32 | namespace cv { 33 | 34 | struct Size 35 | { 36 | Size() 37 | : width(0), height(0) 38 | { 39 | } 40 | Size(int _w, int _h) 41 | : width(_w), height(_h) 42 | { 43 | } 44 | 45 | int width; 46 | int height; 47 | }; 48 | 49 | template 50 | struct Rect_ 51 | { 52 | Rect_() 53 | : x(0), y(0), width(0), height(0) 54 | { 55 | } 56 | Rect_(_Tp _x, _Tp _y, _Tp _w, _Tp _h) 57 | : x(_x), y(_y), width(_w), height(_h) 58 | { 59 | } 60 | 61 | _Tp x; 62 | _Tp y; 63 | _Tp width; 64 | _Tp height; 65 | 66 | // area 67 | _Tp area() const 68 | { 69 | return width * height; 70 | } 71 | }; 72 | 73 | template 74 | static inline Rect_<_Tp>& operator&=(Rect_<_Tp>& a, const Rect_<_Tp>& b) 75 | { 76 | _Tp x1 = std::max(a.x, b.x), y1 = std::max(a.y, b.y); 77 | a.width = std::min(a.x + a.width, b.x + b.width) - x1; 78 | a.height = std::min(a.y + a.height, b.y + b.height) - y1; 79 | a.x = x1; 80 | a.y = y1; 81 | if (a.width <= 0 || a.height <= 0) 82 | a = Rect_<_Tp>(); 83 | return a; 84 | } 85 | 86 | template 87 | static inline Rect_<_Tp>& operator|=(Rect_<_Tp>& a, const Rect_<_Tp>& b) 88 | { 89 | _Tp x1 = std::min(a.x, b.x), y1 = std::min(a.y, b.y); 90 | a.width = std::max(a.x + a.width, b.x + b.width) - x1; 91 | a.height = std::max(a.y + a.height, b.y + b.height) - y1; 92 | a.x = x1; 93 | a.y = y1; 94 | return a; 95 | } 96 | 97 | template 98 | static inline Rect_<_Tp> operator&(const Rect_<_Tp>& a, const Rect_<_Tp>& b) 99 | { 100 | Rect_<_Tp> c = a; 101 | return c &= b; 102 | } 103 | 104 | template 105 | static inline Rect_<_Tp> operator|(const Rect_<_Tp>& a, const Rect_<_Tp>& b) 106 | { 107 | Rect_<_Tp> c = a; 108 | return c |= b; 109 | } 110 | 111 | typedef Rect_ Rect; 112 | typedef Rect_ Rect2f; 113 | 114 | template 115 | struct Point_ 116 | { 117 | Point_() 118 | : x(0), y(0) 119 | { 120 | } 121 | Point_(_Tp _x, _Tp _y) 122 | : x(_x), y(_y) 123 | { 124 | } 125 | 126 | _Tp x; 127 | _Tp y; 128 | }; 129 | 130 | typedef Point_ Point; 131 | typedef Point_ Point2f; 132 | 133 | #define CV_8UC1 1 134 | #define CV_8UC3 3 135 | #define CV_8UC4 4 136 | #define CV_32FC1 4 137 | 138 | struct Mat 139 | { 140 | Mat() 141 | : data(0), refcount(0), rows(0), cols(0), c(0) 142 | { 143 | } 144 | 145 | Mat(int _rows, int _cols, int flags) 146 | : data(0), refcount(0) 147 | { 148 | create(_rows, _cols, flags); 149 | } 150 | 151 | // copy 152 | Mat(const Mat& m) 153 | : data(m.data), refcount(m.refcount) 154 | { 155 | if (refcount) 156 | NCNN_XADD(refcount, 1); 157 | 158 | rows = m.rows; 159 | cols = m.cols; 160 | c = m.c; 161 | } 162 | 163 | Mat(int _rows, int _cols, int flags, void* _data) 164 | : data((unsigned char*)_data), refcount(0) 165 | { 166 | rows = _rows; 167 | cols = _cols; 168 | c = flags; 169 | } 170 | 171 | ~Mat() 172 | { 173 | release(); 174 | } 175 | 176 | // assign 177 | Mat& operator=(const Mat& m) 178 | { 179 | if (this == &m) 180 | return *this; 181 | 182 | if (m.refcount) 183 | NCNN_XADD(m.refcount, 1); 184 | 185 | release(); 186 | 187 | data = m.data; 188 | refcount = m.refcount; 189 | 190 | rows = m.rows; 191 | cols = m.cols; 192 | c = m.c; 193 | 194 | return *this; 195 | } 196 | 197 | void create(int _rows, int _cols, int flags) 198 | { 199 | release(); 200 | 201 | rows = _rows; 202 | cols = _cols; 203 | c = flags; 204 | 205 | if (total() > 0) 206 | { 207 | // refcount address must be aligned, so we expand totalsize here 208 | size_t totalsize = (total() + 3) >> 2 << 2; 209 | data = (unsigned char*)ncnn::fastMalloc(totalsize + (int)sizeof(*refcount)); 210 | refcount = (int*)(((unsigned char*)data) + totalsize); 211 | *refcount = 1; 212 | } 213 | } 214 | 215 | void release() 216 | { 217 | if (refcount && NCNN_XADD(refcount, -1) == 1) 218 | ncnn::fastFree(data); 219 | 220 | data = 0; 221 | 222 | rows = 0; 223 | cols = 0; 224 | c = 0; 225 | 226 | refcount = 0; 227 | } 228 | 229 | Mat clone() const 230 | { 231 | if (empty()) 232 | return Mat(); 233 | 234 | Mat m(rows, cols, c); 235 | 236 | if (total() > 0) 237 | { 238 | memcpy(m.data, data, total()); 239 | } 240 | 241 | return m; 242 | } 243 | 244 | bool empty() const 245 | { 246 | return data == 0 || total() == 0; 247 | } 248 | 249 | int channels() const 250 | { 251 | return c; 252 | } 253 | 254 | size_t total() const 255 | { 256 | return cols * rows * c; 257 | } 258 | 259 | const unsigned char* ptr(int y) const 260 | { 261 | return data + y * cols * c; 262 | } 263 | 264 | unsigned char* ptr(int y) 265 | { 266 | return data + y * cols * c; 267 | } 268 | 269 | // roi 270 | Mat operator()(const Rect& roi) const 271 | { 272 | if (empty()) 273 | return Mat(); 274 | 275 | Mat m(roi.height, roi.width, c); 276 | 277 | int sy = roi.y; 278 | for (int y = 0; y < roi.height; y++) 279 | { 280 | const unsigned char* sptr = ptr(sy) + roi.x * c; 281 | unsigned char* dptr = m.ptr(y); 282 | memcpy(dptr, sptr, roi.width * c); 283 | sy++; 284 | } 285 | 286 | return m; 287 | } 288 | 289 | unsigned char* data; 290 | 291 | // pointer to the reference counter; 292 | // when points to user-allocated data, the pointer is NULL 293 | int* refcount; 294 | 295 | int rows; 296 | int cols; 297 | 298 | int c; 299 | }; 300 | 301 | #define CV_LOAD_IMAGE_GRAYSCALE 1 302 | #define CV_LOAD_IMAGE_COLOR 3 303 | Mat imread(const std::string& path, int flags); 304 | void imwrite(const std::string& path, const Mat& m); 305 | 306 | #if NCNN_PIXEL 307 | void resize(const Mat& src, Mat& dst, const Size& size, float sw = 0.f, float sh = 0.f, int flags = 0); 308 | #endif // NCNN_PIXEL 309 | 310 | } // namespace cv 311 | 312 | #if defined(_MSC_VER) || defined(__GNUC__) 313 | #pragma pop_macro("min") 314 | #pragma pop_macro("max") 315 | #endif 316 | 317 | #endif // NCNN_SIMPLEOCV 318 | 319 | #endif // NCNN_SIMPLEOCV_H 320 | -------------------------------------------------------------------------------- /include/ncnn/simpleomp.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_SIMPLEOMP_H 16 | #define NCNN_SIMPLEOMP_H 17 | 18 | #include "platform.h" 19 | 20 | #if NCNN_SIMPLEOMP 21 | 22 | #include 23 | 24 | // This minimal openmp runtime implementation only supports the llvm openmp abi 25 | // and only supports #pragma omp parallel for num_threads(X) 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | int omp_get_max_threads(); 32 | 33 | void omp_set_num_threads(int num_threads); 34 | 35 | int omp_get_dynamic(); 36 | 37 | void omp_set_dynamic(int dynamic); 38 | 39 | int omp_get_num_threads(); 40 | 41 | int omp_get_thread_num(); 42 | 43 | int kmp_get_blocktime(); 44 | 45 | void kmp_set_blocktime(int blocktime); 46 | 47 | #ifdef __cplusplus 48 | } 49 | #endif 50 | 51 | #endif // NCNN_SIMPLEOMP 52 | 53 | #endif // NCNN_SIMPLEOMP_H 54 | -------------------------------------------------------------------------------- /include/ncnn/simplestl.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_SIMPLESTL_H 16 | #define NCNN_SIMPLESTL_H 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #if !NCNN_SIMPLESTL 23 | 24 | #include 25 | 26 | #else 27 | 28 | // allocation functions 29 | void* operator new(size_t size); 30 | void* operator new[](size_t size); 31 | // placement allocation functions 32 | void* operator new(size_t size, void* ptr); 33 | void* operator new[](size_t size, void* ptr); 34 | // deallocation functions 35 | void operator delete(void* ptr); 36 | void operator delete[](void* ptr); 37 | // deallocation functions since c++14 38 | #if __cplusplus >= 201402L 39 | void operator delete(void* ptr, size_t sz); 40 | void operator delete[](void* ptr, size_t sz); 41 | #endif 42 | // placement deallocation functions 43 | void operator delete(void* ptr, void* voidptr2); 44 | void operator delete[](void* ptr, void* voidptr2); 45 | 46 | #endif 47 | 48 | // minimal stl data structure implementation 49 | namespace std { 50 | 51 | template 52 | const T& max(const T& a, const T& b) 53 | { 54 | return (a < b) ? b : a; 55 | } 56 | 57 | template 58 | const T& min(const T& a, const T& b) 59 | { 60 | return (a > b) ? b : a; 61 | } 62 | 63 | template 64 | void swap(T& a, T& b) 65 | { 66 | T temp(a); 67 | a = b; 68 | b = temp; 69 | } 70 | 71 | template 72 | struct pair 73 | { 74 | pair() 75 | : first(), second() 76 | { 77 | } 78 | pair(const T1& t1, const T2& t2) 79 | : first(t1), second(t2) 80 | { 81 | } 82 | 83 | T1 first; 84 | T2 second; 85 | }; 86 | 87 | template 88 | bool operator==(const pair& x, const pair& y) 89 | { 90 | return (x.first == y.first && x.second == y.second); 91 | } 92 | template 93 | bool operator<(const pair& x, const pair& y) 94 | { 95 | return x.first < y.first || (!(y.first < x.first) && x.second < y.second); 96 | } 97 | template 98 | bool operator!=(const pair& x, const pair& y) 99 | { 100 | return !(x == y); 101 | } 102 | template 103 | bool operator>(const pair& x, const pair& y) 104 | { 105 | return y < x; 106 | } 107 | template 108 | bool operator<=(const pair& x, const pair& y) 109 | { 110 | return !(y < x); 111 | } 112 | template 113 | bool operator>=(const pair& x, const pair& y) 114 | { 115 | return !(x < y); 116 | } 117 | 118 | template 119 | pair make_pair(const T1& t1, const T2& t2) 120 | { 121 | return pair(t1, t2); 122 | } 123 | 124 | template 125 | struct node 126 | { 127 | node* prev_; 128 | node* next_; 129 | T data_; 130 | 131 | node() 132 | : prev_(0), next_(0), data_() 133 | { 134 | } 135 | node(const T& t) 136 | : prev_(0), next_(0), data_(t) 137 | { 138 | } 139 | }; 140 | 141 | template 142 | struct iter_list 143 | { 144 | iter_list() 145 | : curr_(0) 146 | { 147 | } 148 | iter_list(node* n) 149 | : curr_(n) 150 | { 151 | } 152 | iter_list(const iter_list& i) 153 | : curr_(i.curr_) 154 | { 155 | } 156 | ~iter_list() 157 | { 158 | } 159 | 160 | iter_list& operator=(const iter_list& i) 161 | { 162 | curr_ = i.curr_; 163 | return *this; 164 | } 165 | 166 | T& operator*() 167 | { 168 | return curr_->data_; 169 | } 170 | T* operator->() 171 | { 172 | return &(curr_->data_); 173 | } 174 | 175 | bool operator==(const iter_list& i) 176 | { 177 | return curr_ == i.curr_; 178 | } 179 | bool operator!=(const iter_list& i) 180 | { 181 | return curr_ != i.curr_; 182 | } 183 | 184 | iter_list& operator++() 185 | { 186 | curr_ = curr_->next_; 187 | return *this; 188 | } 189 | iter_list& operator--() 190 | { 191 | curr_ = curr_->prev_; 192 | return *this; 193 | } 194 | 195 | node* curr_; 196 | }; 197 | 198 | template 199 | struct list 200 | { 201 | typedef iter_list iterator; 202 | 203 | list() 204 | { 205 | head_ = new node(); 206 | tail_ = head_; 207 | count_ = 0; 208 | } 209 | ~list() 210 | { 211 | clear(); 212 | delete head_; 213 | } 214 | list(const list& l) 215 | { 216 | head_ = new node(); 217 | tail_ = head_; 218 | count_ = 0; 219 | 220 | for (iter_list i = l.begin(); i != l.end(); ++i) 221 | { 222 | push_back(*i); 223 | } 224 | } 225 | 226 | list& operator=(const list& l) 227 | { 228 | if (this == &l) 229 | { 230 | return *this; 231 | } 232 | clear(); 233 | 234 | for (iter_list i = l.begin(); i != l.end(); ++i) 235 | { 236 | push_back(*i); 237 | } 238 | return *this; 239 | } 240 | 241 | void clear() 242 | { 243 | while (count_ > 0) 244 | { 245 | pop_front(); 246 | } 247 | } 248 | 249 | void pop_front() 250 | { 251 | if (count_ > 0) 252 | { 253 | head_ = head_->next_; 254 | delete head_->prev_; 255 | head_->prev_ = 0; 256 | --count_; 257 | } 258 | } 259 | 260 | size_t size() const 261 | { 262 | return count_; 263 | } 264 | iter_list begin() const 265 | { 266 | return iter_list(head_); 267 | } 268 | iter_list end() const 269 | { 270 | return iter_list(tail_); 271 | } 272 | bool empty() const 273 | { 274 | return count_ == 0; 275 | } 276 | 277 | void push_back(const T& t) 278 | { 279 | if (count_ == 0) 280 | { 281 | head_ = new node(t); 282 | head_->prev_ = 0; 283 | head_->next_ = tail_; 284 | tail_->prev_ = head_; 285 | count_ = 1; 286 | } 287 | else 288 | { 289 | node* temp = new node(t); 290 | temp->prev_ = tail_->prev_; 291 | temp->next_ = tail_; 292 | tail_->prev_->next_ = temp; 293 | tail_->prev_ = temp; 294 | ++count_; 295 | } 296 | } 297 | 298 | iter_list erase(iter_list pos) 299 | { 300 | if (pos != end()) 301 | { 302 | node* temp = pos.curr_; 303 | if (temp == head_) 304 | { 305 | ++pos; 306 | temp->next_->prev_ = 0; 307 | head_ = temp->next_; 308 | } 309 | else 310 | { 311 | --pos; 312 | temp->next_->prev_ = temp->prev_; 313 | temp->prev_->next_ = temp->next_; 314 | ++pos; 315 | } 316 | delete temp; 317 | --count_; 318 | } 319 | return pos; 320 | } 321 | 322 | protected: 323 | node* head_; 324 | node* tail_; 325 | size_t count_; 326 | }; 327 | 328 | template 329 | struct greater 330 | { 331 | bool operator()(const T& x, const T& y) const 332 | { 333 | return (x > y); 334 | } 335 | }; 336 | 337 | template 338 | struct less 339 | { 340 | bool operator()(const T& x, const T& y) const 341 | { 342 | return (x < y); 343 | } 344 | }; 345 | 346 | template 347 | void partial_sort(RandomAccessIter first, RandomAccessIter middle, RandomAccessIter last, Compare comp) 348 | { 349 | // [TODO] heap sort should be used here, but we simply use bubble sort now 350 | for (RandomAccessIter i = first; i < middle; ++i) 351 | { 352 | // bubble sort 353 | for (RandomAccessIter j = last - 1; j > first; --j) 354 | { 355 | if (comp(*j, *(j - 1))) 356 | { 357 | swap(*j, *(j - 1)); 358 | } 359 | } 360 | } 361 | } 362 | 363 | template 364 | struct vector 365 | { 366 | vector() 367 | : data_(0), size_(0), capacity_(0) 368 | { 369 | } 370 | vector(const size_t new_size, const T& value = T()) 371 | : data_(0), size_(0), capacity_(0) 372 | { 373 | resize(new_size, value); 374 | } 375 | ~vector() 376 | { 377 | clear(); 378 | } 379 | vector(const vector& v) 380 | : data_(0), size_(0), capacity_(0) 381 | { 382 | resize(v.size()); 383 | for (size_t i = 0; i < size_; i++) 384 | { 385 | data_[i] = v.data_[i]; 386 | } 387 | } 388 | 389 | vector& operator=(const vector& v) 390 | { 391 | if (this == &v) 392 | { 393 | return *this; 394 | } 395 | resize(0); 396 | resize(v.size()); 397 | for (size_t i = 0; i < size_; i++) 398 | { 399 | data_[i] = v.data_[i]; 400 | } 401 | return *this; 402 | } 403 | 404 | void resize(const size_t new_size, const T& value = T()) 405 | { 406 | try_alloc(new_size); 407 | if (new_size > size_) 408 | { 409 | for (size_t i = size_; i < new_size; i++) 410 | { 411 | new (&data_[i]) T(value); 412 | } 413 | } 414 | else if (new_size < size_) 415 | { 416 | for (size_t i = new_size; i < size_; i++) 417 | { 418 | data_[i].~T(); 419 | } 420 | } 421 | size_ = new_size; 422 | } 423 | 424 | void clear() 425 | { 426 | for (size_t i = 0; i < size_; i++) 427 | { 428 | data_[i].~T(); 429 | } 430 | delete[](char*) data_; 431 | data_ = 0; 432 | size_ = 0; 433 | capacity_ = 0; 434 | } 435 | 436 | T* data() const 437 | { 438 | return data_; 439 | } 440 | size_t size() const 441 | { 442 | return size_; 443 | } 444 | T& operator[](size_t i) const 445 | { 446 | return data_[i]; 447 | } 448 | T* begin() const 449 | { 450 | return &data_[0]; 451 | } 452 | T* end() const 453 | { 454 | return &data_[size_]; 455 | } 456 | bool empty() const 457 | { 458 | return size_ == 0; 459 | } 460 | 461 | void push_back(const T& t) 462 | { 463 | try_alloc(size_ + 1); 464 | new (&data_[size_]) T(t); 465 | size_++; 466 | } 467 | 468 | void insert(T* pos, T* b, T* e) 469 | { 470 | vector* v = 0; 471 | if (b >= begin() && b < end()) 472 | { 473 | //the same vector 474 | v = new vector(*this); 475 | b = v->begin() + (b - begin()); 476 | e = v->begin() + (e - begin()); 477 | } 478 | size_t diff = pos - begin(); 479 | try_alloc(size_ + (e - b)); 480 | pos = begin() + diff; 481 | memmove(pos + (e - b), pos, (end() - pos) * sizeof(T)); 482 | size_t len = e - b; 483 | size_ += len; 484 | for (size_t i = 0; i < len; i++) 485 | { 486 | *pos = *b; 487 | pos++; 488 | b++; 489 | } 490 | delete v; 491 | } 492 | 493 | T* erase(T* pos) 494 | { 495 | pos->~T(); 496 | memmove(pos, pos + 1, (end() - pos - 1) * sizeof(T)); 497 | size_--; 498 | return pos; 499 | } 500 | 501 | protected: 502 | T* data_; 503 | size_t size_; 504 | size_t capacity_; 505 | void try_alloc(size_t new_size) 506 | { 507 | if (new_size * 3 / 2 > capacity_ / 2) 508 | { 509 | capacity_ = new_size * 2; 510 | T* new_data = (T*)new char[capacity_ * sizeof(T)]; 511 | memset(new_data, 0, capacity_ * sizeof(T)); 512 | if (data_) 513 | { 514 | memmove(new_data, data_, sizeof(T) * size_); 515 | delete[](char*) data_; 516 | } 517 | data_ = new_data; 518 | } 519 | } 520 | }; 521 | 522 | struct string : public vector 523 | { 524 | string() 525 | { 526 | } 527 | string(const char* str) 528 | { 529 | size_t len = strlen(str); 530 | resize(len); 531 | memcpy(data_, str, len); 532 | } 533 | const char* c_str() const 534 | { 535 | return (const char*)data_; 536 | } 537 | bool operator==(const string& str2) const 538 | { 539 | return strcmp(data_, str2.data_) == 0; 540 | } 541 | bool operator==(const char* str2) const 542 | { 543 | return strcmp(data_, str2) == 0; 544 | } 545 | bool operator!=(const char* str2) const 546 | { 547 | return strcmp(data_, str2) != 0; 548 | } 549 | string& operator+=(const string& str1) 550 | { 551 | insert(end(), str1.begin(), str1.end()); 552 | return *this; 553 | } 554 | }; 555 | 556 | inline string operator+(const string& str1, const string& str2) 557 | { 558 | string str(str1); 559 | str.insert(str.end(), str2.begin(), str2.end()); 560 | return str; 561 | } 562 | 563 | } // namespace std 564 | 565 | #endif // NCNN_SIMPLESTL_H 566 | -------------------------------------------------------------------------------- /lib/cmake/ncnn/ncnn-release.cmake: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------- 2 | # Generated CMake target import file for configuration "Release". 3 | #---------------------------------------------------------------- 4 | 5 | # Commands may need to know the format version. 6 | set(CMAKE_IMPORT_FILE_VERSION 1) 7 | 8 | # Import target "ncnn" for configuration "Release" 9 | set_property(TARGET ncnn APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) 10 | set_target_properties(ncnn PROPERTIES 11 | IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" 12 | IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libncnn.a" 13 | ) 14 | 15 | list(APPEND _IMPORT_CHECK_TARGETS ncnn ) 16 | list(APPEND _IMPORT_CHECK_FILES_FOR_ncnn "${_IMPORT_PREFIX}/lib/libncnn.a" ) 17 | 18 | # Commands beyond this point should not need to know the version. 19 | set(CMAKE_IMPORT_FILE_VERSION) 20 | -------------------------------------------------------------------------------- /lib/cmake/ncnn/ncnn.cmake: -------------------------------------------------------------------------------- 1 | # Generated by CMake 2 | 3 | if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.5) 4 | message(FATAL_ERROR "CMake >= 2.6.0 required") 5 | endif() 6 | cmake_policy(PUSH) 7 | cmake_policy(VERSION 2.6) 8 | #---------------------------------------------------------------- 9 | # Generated CMake target import file. 10 | #---------------------------------------------------------------- 11 | 12 | # Commands may need to know the format version. 13 | set(CMAKE_IMPORT_FILE_VERSION 1) 14 | 15 | # Protect against multiple inclusion, which would fail when already imported targets are added once more. 16 | set(_targetsDefined) 17 | set(_targetsNotDefined) 18 | set(_expectedTargets) 19 | foreach(_expectedTarget ncnn) 20 | list(APPEND _expectedTargets ${_expectedTarget}) 21 | if(NOT TARGET ${_expectedTarget}) 22 | list(APPEND _targetsNotDefined ${_expectedTarget}) 23 | endif() 24 | if(TARGET ${_expectedTarget}) 25 | list(APPEND _targetsDefined ${_expectedTarget}) 26 | endif() 27 | endforeach() 28 | if("${_targetsDefined}" STREQUAL "${_expectedTargets}") 29 | unset(_targetsDefined) 30 | unset(_targetsNotDefined) 31 | unset(_expectedTargets) 32 | set(CMAKE_IMPORT_FILE_VERSION) 33 | cmake_policy(POP) 34 | return() 35 | endif() 36 | if(NOT "${_targetsDefined}" STREQUAL "") 37 | message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_targetsDefined}\nTargets not yet defined: ${_targetsNotDefined}\n") 38 | endif() 39 | unset(_targetsDefined) 40 | unset(_targetsNotDefined) 41 | unset(_expectedTargets) 42 | 43 | 44 | # Compute the installation prefix relative to this file. 45 | get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) 46 | get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) 47 | get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) 48 | get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) 49 | if(_IMPORT_PREFIX STREQUAL "/") 50 | set(_IMPORT_PREFIX "") 51 | endif() 52 | 53 | # Create imported target ncnn 54 | add_library(ncnn STATIC IMPORTED) 55 | 56 | set_target_properties(ncnn PROPERTIES 57 | INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include/ncnn" 58 | INTERFACE_LINK_LIBRARIES "OpenMP::OpenMP_CXX" 59 | INTERFACE_POSITION_INDEPENDENT_CODE "ON" 60 | ) 61 | 62 | if(CMAKE_VERSION VERSION_LESS 2.8.12) 63 | message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") 64 | endif() 65 | 66 | # Load information for each installed configuration. 67 | get_filename_component(_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) 68 | file(GLOB CONFIG_FILES "${_DIR}/ncnn-*.cmake") 69 | foreach(f ${CONFIG_FILES}) 70 | include(${f}) 71 | endforeach() 72 | 73 | # Cleanup temporary variables. 74 | set(_IMPORT_PREFIX) 75 | 76 | # Loop over all imported files and verify that they actually exist 77 | foreach(target ${_IMPORT_CHECK_TARGETS} ) 78 | foreach(file ${_IMPORT_CHECK_FILES_FOR_${target}} ) 79 | if(NOT EXISTS "${file}" ) 80 | message(FATAL_ERROR "The imported target \"${target}\" references the file 81 | \"${file}\" 82 | but this file does not exist. Possible reasons include: 83 | * The file was deleted, renamed, or moved to another location. 84 | * An install or uninstall procedure did not complete successfully. 85 | * The installation package was faulty and contained 86 | \"${CMAKE_CURRENT_LIST_FILE}\" 87 | but not all the files it references. 88 | ") 89 | endif() 90 | endforeach() 91 | unset(_IMPORT_CHECK_FILES_FOR_${target}) 92 | endforeach() 93 | unset(_IMPORT_CHECK_TARGETS) 94 | 95 | # This file does not depend on other imported targets which have 96 | # been exported from the same project but in a separate export set. 97 | 98 | # Commands beyond this point should not need to know the version. 99 | set(CMAKE_IMPORT_FILE_VERSION) 100 | cmake_policy(POP) 101 | -------------------------------------------------------------------------------- /lib/cmake/ncnn/ncnnConfig.cmake: -------------------------------------------------------------------------------- 1 | set(NCNN_OPENMP ON) 2 | set(NCNN_VULKAN OFF) 3 | set(NCNN_SYSTEM_GLSLANG ON) 4 | 5 | if(NCNN_OPENMP) 6 | find_package(OpenMP) 7 | endif() 8 | 9 | if(NCNN_VULKAN) 10 | find_package(Vulkan REQUIRED) 11 | 12 | if(NCNN_SYSTEM_GLSLANG) 13 | set(GLSLANG_TARGET_DIR "") 14 | else() 15 | set(GLSLANG_TARGET_DIR "${CMAKE_CURRENT_LIST_DIR}/..") 16 | endif(NCNN_SYSTEM_GLSLANG) 17 | 18 | find_package(Threads) 19 | 20 | include(${GLSLANG_TARGET_DIR}/OSDependentTargets.cmake) 21 | include(${GLSLANG_TARGET_DIR}/OGLCompilerTargets.cmake) 22 | if(EXISTS "${GLSLANG_TARGET_DIR}/HLSLTargets.cmake") 23 | # hlsl support can be optional 24 | include("${GLSLANG_TARGET_DIR}/HLSLTargets.cmake") 25 | endif() 26 | include(${GLSLANG_TARGET_DIR}/glslangTargets.cmake) 27 | include(${GLSLANG_TARGET_DIR}/SPIRVTargets.cmake) 28 | endif(NCNN_VULKAN) 29 | 30 | include(${CMAKE_CURRENT_LIST_DIR}/ncnn.cmake) 31 | -------------------------------------------------------------------------------- /lib/libncnn.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sean-wade/yolov5s_ncnn_inference/8a4b7a019303f3cc243adcb7c73847234506a712/lib/libncnn.a -------------------------------------------------------------------------------- /test_jpg/10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sean-wade/yolov5s_ncnn_inference/8a4b7a019303f3cc243adcb7c73847234506a712/test_jpg/10.jpg -------------------------------------------------------------------------------- /test_jpg/105.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sean-wade/yolov5s_ncnn_inference/8a4b7a019303f3cc243adcb7c73847234506a712/test_jpg/105.jpg -------------------------------------------------------------------------------- /test_jpg/106.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sean-wade/yolov5s_ncnn_inference/8a4b7a019303f3cc243adcb7c73847234506a712/test_jpg/106.jpg -------------------------------------------------------------------------------- /test_jpg/107.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sean-wade/yolov5s_ncnn_inference/8a4b7a019303f3cc243adcb7c73847234506a712/test_jpg/107.jpg -------------------------------------------------------------------------------- /yolov5.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | // #include "vulkan/vulkan.hpp" 14 | #include "net.h" 15 | 16 | using namespace std; 17 | 18 | 19 | const int numCLasses = 2; 20 | //const int frameWidth = 1920; 21 | //const int frameHeight = 1080; 22 | std::vector classnames; 23 | 24 | vector>> anchors{{{10, 13}, {16, 30}, {33, 23}}, 25 | {{30, 61}, {62, 45}, {59, 119}}, 26 | {{116, 90}, {156, 198}, {373, 326}}}; 27 | vector> dets; 28 | 29 | inline float sigmoid(float x){ 30 | return 1.0f / (1.0f + exp(-x)); 31 | } 32 | 33 | void decodeResult(const ncnn::Mat& data, int stride, vector> anchors, float scoreThresh, int frameWidth, int frameHeight) 34 | { 35 | for (int c=0; c scoreThresh) 42 | { 43 | vector det(6); 44 | det[1] = (sigmoid(ptr[0] )* 2 - 0.5 + y % (int)(640 / stride)) * stride * frameWidth / 640; //center_x 45 | det[2] = (sigmoid(ptr[1]) * 2 - 0.5 + (int)(y / (640 / stride))) * stride * frameHeight / 640; //center_y 46 | det[3] = pow((sigmoid(ptr[2]) * 2), 2) * anchors[c][0] * frameWidth / 640; //w 47 | det[4] = pow((sigmoid(ptr[3]) * 2), 2) * anchors[c][1] * frameHeight / 640; //h 48 | 49 | det[1] = det[1] - det[3] / 2; //left 50 | det[2] = det[2] - det[4] / 2; //top 51 | det[3] = det[1] + det[3]; //right 52 | det[4] = det[2] + det[4]; //bottom 53 | 54 | for (int i=5; i det[0]) 58 | { 59 | det[0] = conf * score; //score 60 | det[5] = i - 5; //class_id 61 | } 62 | } 63 | dets.push_back(det); 64 | } 65 | ptr += data.w; 66 | } 67 | } 68 | cout << "decodeResult done" << endl; 69 | } 70 | 71 | void nonMaxSuppression(float iouThresh) 72 | { 73 | int length = dets.size(); 74 | int index = length - 1; 75 | 76 | sort(dets.begin(), dets.end()); 77 | vector areas(length); 78 | for (int i=0; i 0) 84 | { 85 | int i = 0; 86 | while (i < index) 87 | { 88 | float left = max(dets[index][1], dets[i][1]); 89 | float top = max(dets[index][2], dets[i][2]); 90 | float right = min(dets[index][3], dets[i][3]); 91 | float bottom = min(dets[index][4], dets[i][4]); 92 | float overlap = max(0.0f, right - left) * max(0.0f, bottom - top); 93 | if (overlap / (areas[index] + areas[i] - overlap) > iouThresh) 94 | { 95 | areas.erase(areas.begin() + i); 96 | dets.erase(dets.begin() + i); 97 | index --; 98 | } 99 | else 100 | { 101 | i++; 102 | } 103 | } 104 | index--; 105 | } 106 | cout << "nonMaxSuppression done" << endl; 107 | } 108 | 109 | bool detect(ncnn::Net& net, string img_path) 110 | { 111 | cv::Mat frame, img; 112 | frame = cv::imread(img_path); 113 | if(frame.empty()) 114 | { 115 | cout << "img is None : " << img_path << endl; 116 | return false; 117 | } 118 | const auto t0 = std::chrono::system_clock::now(); 119 | 120 | ncnn::Mat input, output0, output1, output2; 121 | ncnn::Extractor extractor = net.create_extractor(); 122 | cv::resize(frame, img, cv::Size(640, 640)); 123 | cv::cvtColor(img, img, cv::COLOR_BGR2RGB); 124 | 125 | cout << "resize cvtColor done......"<(t2 - t1).count() * 1e-3 << "ms "; 176 | cout << "Post-processing Time: " << std::chrono::duration_cast(t3 - t2).count() * 1e-3 << "ms "; 177 | cout << "Total Time: " << std::chrono::duration_cast(t4 - t0).count() * 1e-3 << "ms" << endl; 178 | 179 | return true; 180 | } 181 | 182 | int read_files_in_dir(const char *p_dir_name, std::vector &file_names) 183 | { 184 | DIR *p_dir = opendir(p_dir_name); 185 | if (p_dir == nullptr) 186 | { 187 | return -1; 188 | } 189 | 190 | struct dirent* p_file = nullptr; 191 | while ((p_file = readdir(p_dir)) != nullptr) 192 | { 193 | if (strcmp(p_file->d_name, ".") != 0 && strcmp(p_file->d_name, "..") != 0) 194 | { 195 | std::string cur_file_name(p_file->d_name); 196 | file_names.push_back(cur_file_name); 197 | } 198 | } 199 | 200 | closedir(p_dir); 201 | return 0; 202 | } 203 | 204 | int main0() 205 | { 206 | std::ifstream f("/zhanghao/code/yolov5_to_ncnn/ncnn/yw_5s/voc.names"); 207 | std::string name = ""; 208 | while (std::getline(f, name)) 209 | { 210 | classnames.push_back(name); 211 | } 212 | 213 | ncnn::Net net; 214 | net.opt.num_threads=12; 215 | // net.opt.use_vulkan_compute = 1; 216 | // net.set_vulkan_device(0); 217 | 218 | net.load_param("/zhanghao/code/yolov5_to_ncnn/ncnn/yw_5s/yw_5s.param"); 219 | net.load_model("/zhanghao/code/yolov5_to_ncnn/ncnn/yw_5s/yw_5s.bin"); 220 | 221 | cout << "load param && model done......"< file_names; 255 | if (read_files_in_dir(imagepath, file_names) < 0) 256 | { 257 | printf("read_files_in_dir failed."); 258 | return -1; 259 | } 260 | 261 | for (int f = 0; f < (int)file_names.size(); f++) 262 | { 263 | detect(net, std::string(imagepath) + "/" + file_names[f]); 264 | } 265 | 266 | return 0; 267 | } -------------------------------------------------------------------------------- /yw_5s/voc.names: -------------------------------------------------------------------------------- 1 | yw_gkxfw 2 | yw_nc -------------------------------------------------------------------------------- /yw_5s/yw_5s.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sean-wade/yolov5s_ncnn_inference/8a4b7a019303f3cc243adcb7c73847234506a712/yw_5s/yw_5s.bin -------------------------------------------------------------------------------- /yw_5s/yw_5s.param: -------------------------------------------------------------------------------- 1 | 7767517 2 | 197 224 3 | Input images 0 1 images 4 | Split splitncnn_input0 1 4 images images_splitncnn_0 images_splitncnn_1 images_splitncnn_2 images_splitncnn_3 5 | Interp Resize_28 1 1 images_splitncnn_3 195 0=1 1=1.000000e+00 2=1.000000e+00 3=320 4=320 6=0 6 | Interp Resize_57 1 1 images_splitncnn_2 224 0=1 1=1.000000e+00 2=1.000000e+00 3=320 4=320 6=0 7 | Interp Resize_86 1 1 images_splitncnn_1 253 0=1 1=1.000000e+00 2=1.000000e+00 3=320 4=320 6=0 8 | Interp Resize_115 1 1 images_splitncnn_0 282 0=1 1=1.000000e+00 2=1.000000e+00 3=320 4=320 6=0 9 | Concat Concat_116 4 1 195 224 253 282 283 0=0 10 | Convolution Conv_117 1 1 283 284 0=32 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=3456 11 | HardSwish Div_125 1 1 284 292 0=1.666667e-01 1=5.000000e-01 12 | Convolution Conv_126 1 1 292 293 0=64 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=1 6=18432 13 | HardSwish Div_134 1 1 293 301 0=1.666667e-01 1=5.000000e-01 14 | Split splitncnn_0 1 2 301 301_splitncnn_0 301_splitncnn_1 15 | Convolution Conv_135 1 1 301_splitncnn_1 302 0=32 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=2048 16 | HardSwish Div_143 1 1 302 310 0=1.666667e-01 1=5.000000e-01 17 | Split splitncnn_1 1 2 310 310_splitncnn_0 310_splitncnn_1 18 | Convolution Conv_144 1 1 310_splitncnn_1 311 0=32 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=1024 19 | HardSwish Div_152 1 1 311 319 0=1.666667e-01 1=5.000000e-01 20 | Convolution Conv_153 1 1 319 320 0=32 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=9216 21 | HardSwish Div_161 1 1 320 328 0=1.666667e-01 1=5.000000e-01 22 | BinaryOp Add_162 2 1 310_splitncnn_0 328 329 0=0 23 | Convolution Conv_163 1 1 329 330 0=32 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=1024 24 | Convolution Conv_164 1 1 301_splitncnn_0 331 0=32 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=2048 25 | Concat Concat_165 2 1 330 331 332 0=0 26 | BatchNorm BatchNormalization_166 1 1 332 333 0=64 27 | ReLU LeakyRelu_167 1 1 333 334 0=1.000000e-01 28 | Convolution Conv_168 1 1 334 335 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=4096 29 | HardSwish Div_176 1 1 335 343 0=1.666667e-01 1=5.000000e-01 30 | Convolution Conv_177 1 1 343 344 0=128 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=1 6=73728 31 | HardSwish Div_185 1 1 344 352 0=1.666667e-01 1=5.000000e-01 32 | Split splitncnn_2 1 2 352 352_splitncnn_0 352_splitncnn_1 33 | Convolution Conv_186 1 1 352_splitncnn_1 353 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=8192 34 | HardSwish Div_194 1 1 353 361 0=1.666667e-01 1=5.000000e-01 35 | Split splitncnn_3 1 2 361 361_splitncnn_0 361_splitncnn_1 36 | Convolution Conv_195 1 1 361_splitncnn_1 362 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=4096 37 | HardSwish Div_203 1 1 362 370 0=1.666667e-01 1=5.000000e-01 38 | Convolution Conv_204 1 1 370 371 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864 39 | HardSwish Div_212 1 1 371 379 0=1.666667e-01 1=5.000000e-01 40 | BinaryOp Add_213 2 1 361_splitncnn_0 379 380 0=0 41 | Split splitncnn_4 1 2 380 380_splitncnn_0 380_splitncnn_1 42 | Convolution Conv_214 1 1 380_splitncnn_1 381 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=4096 43 | HardSwish Div_222 1 1 381 389 0=1.666667e-01 1=5.000000e-01 44 | Convolution Conv_223 1 1 389 390 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864 45 | HardSwish Div_231 1 1 390 398 0=1.666667e-01 1=5.000000e-01 46 | BinaryOp Add_232 2 1 380_splitncnn_0 398 399 0=0 47 | Split splitncnn_5 1 2 399 399_splitncnn_0 399_splitncnn_1 48 | Convolution Conv_233 1 1 399_splitncnn_1 400 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=4096 49 | HardSwish Div_241 1 1 400 408 0=1.666667e-01 1=5.000000e-01 50 | Convolution Conv_242 1 1 408 409 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864 51 | HardSwish Div_250 1 1 409 417 0=1.666667e-01 1=5.000000e-01 52 | BinaryOp Add_251 2 1 399_splitncnn_0 417 418 0=0 53 | Convolution Conv_252 1 1 418 419 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=4096 54 | Convolution Conv_253 1 1 352_splitncnn_0 420 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=8192 55 | Concat Concat_254 2 1 419 420 421 0=0 56 | BatchNorm BatchNormalization_255 1 1 421 422 0=128 57 | ReLU LeakyRelu_256 1 1 422 423 0=1.000000e-01 58 | Convolution Conv_257 1 1 423 424 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384 59 | HardSwish Div_265 1 1 424 432 0=1.666667e-01 1=5.000000e-01 60 | Split splitncnn_6 1 2 432 432_splitncnn_0 432_splitncnn_1 61 | Convolution Conv_266 1 1 432_splitncnn_1 433 0=256 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=1 6=294912 62 | HardSwish Div_274 1 1 433 441 0=1.666667e-01 1=5.000000e-01 63 | Split splitncnn_7 1 2 441 441_splitncnn_0 441_splitncnn_1 64 | Convolution Conv_275 1 1 441_splitncnn_1 442 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=32768 65 | HardSwish Div_283 1 1 442 450 0=1.666667e-01 1=5.000000e-01 66 | Split splitncnn_8 1 2 450 450_splitncnn_0 450_splitncnn_1 67 | Convolution Conv_284 1 1 450_splitncnn_1 451 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384 68 | HardSwish Div_292 1 1 451 459 0=1.666667e-01 1=5.000000e-01 69 | Convolution Conv_293 1 1 459 460 0=128 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=147456 70 | HardSwish Div_301 1 1 460 468 0=1.666667e-01 1=5.000000e-01 71 | BinaryOp Add_302 2 1 450_splitncnn_0 468 469 0=0 72 | Split splitncnn_9 1 2 469 469_splitncnn_0 469_splitncnn_1 73 | Convolution Conv_303 1 1 469_splitncnn_1 470 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384 74 | HardSwish Div_311 1 1 470 478 0=1.666667e-01 1=5.000000e-01 75 | Convolution Conv_312 1 1 478 479 0=128 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=147456 76 | HardSwish Div_320 1 1 479 487 0=1.666667e-01 1=5.000000e-01 77 | BinaryOp Add_321 2 1 469_splitncnn_0 487 488 0=0 78 | Split splitncnn_10 1 2 488 488_splitncnn_0 488_splitncnn_1 79 | Convolution Conv_322 1 1 488_splitncnn_1 489 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384 80 | HardSwish Div_330 1 1 489 497 0=1.666667e-01 1=5.000000e-01 81 | Convolution Conv_331 1 1 497 498 0=128 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=147456 82 | HardSwish Div_339 1 1 498 506 0=1.666667e-01 1=5.000000e-01 83 | BinaryOp Add_340 2 1 488_splitncnn_0 506 507 0=0 84 | Convolution Conv_341 1 1 507 508 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=16384 85 | Convolution Conv_342 1 1 441_splitncnn_0 509 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=32768 86 | Concat Concat_343 2 1 508 509 510 0=0 87 | BatchNorm BatchNormalization_344 1 1 510 511 0=256 88 | ReLU LeakyRelu_345 1 1 511 512 0=1.000000e-01 89 | Convolution Conv_346 1 1 512 513 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=65536 90 | HardSwish Div_354 1 1 513 521 0=1.666667e-01 1=5.000000e-01 91 | Split splitncnn_11 1 2 521 521_splitncnn_0 521_splitncnn_1 92 | Convolution Conv_355 1 1 521_splitncnn_1 522 0=512 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=1 6=1179648 93 | HardSwish Div_363 1 1 522 530 0=1.666667e-01 1=5.000000e-01 94 | Convolution Conv_364 1 1 530 531 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=131072 95 | HardSwish Div_372 1 1 531 539 0=1.666667e-01 1=5.000000e-01 96 | Split splitncnn_12 1 4 539 539_splitncnn_0 539_splitncnn_1 539_splitncnn_2 539_splitncnn_3 97 | Pooling MaxPool_373 1 1 539_splitncnn_3 540 0=0 1=5 11=5 2=1 12=1 3=2 13=2 14=2 15=2 5=1 98 | Pooling MaxPool_374 1 1 539_splitncnn_2 541 0=0 1=9 11=9 2=1 12=1 3=4 13=4 14=4 15=4 5=1 99 | Pooling MaxPool_375 1 1 539_splitncnn_1 542 0=0 1=13 11=13 2=1 12=1 3=6 13=6 14=6 15=6 5=1 100 | Concat Concat_376 4 1 539_splitncnn_0 540 541 542 543 0=0 101 | Convolution Conv_377 1 1 543 544 0=512 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=524288 102 | HardSwish Div_385 1 1 544 552 0=1.666667e-01 1=5.000000e-01 103 | Split splitncnn_13 1 2 552 552_splitncnn_0 552_splitncnn_1 104 | Convolution Conv_386 1 1 552_splitncnn_1 553 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=131072 105 | HardSwish Div_394 1 1 553 561 0=1.666667e-01 1=5.000000e-01 106 | Convolution Conv_395 1 1 561 562 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=65536 107 | HardSwish Div_403 1 1 562 570 0=1.666667e-01 1=5.000000e-01 108 | Convolution Conv_404 1 1 570 571 0=256 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=589824 109 | HardSwish Div_412 1 1 571 579 0=1.666667e-01 1=5.000000e-01 110 | Convolution Conv_413 1 1 579 580 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=65536 111 | Convolution Conv_414 1 1 552_splitncnn_0 581 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=131072 112 | Concat Concat_415 2 1 580 581 582 0=0 113 | BatchNorm BatchNormalization_416 1 1 582 583 0=512 114 | ReLU LeakyRelu_417 1 1 583 584 0=1.000000e-01 115 | Convolution Conv_418 1 1 584 585 0=512 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=262144 116 | HardSwish Div_426 1 1 585 593 0=1.666667e-01 1=5.000000e-01 117 | Convolution Conv_427 1 1 593 594 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=131072 118 | HardSwish Div_435 1 1 594 602 0=1.666667e-01 1=5.000000e-01 119 | Split splitncnn_14 1 2 602 602_splitncnn_0 602_splitncnn_1 120 | Interp Resize_437 1 1 602_splitncnn_1 612 0=1 1=2.000000e+00 2=2.000000e+00 3=0 4=0 6=0 121 | Concat Concat_438 2 1 612 521_splitncnn_0 613 0=0 122 | Split splitncnn_15 1 2 613 613_splitncnn_0 613_splitncnn_1 123 | Convolution Conv_439 1 1 613_splitncnn_1 614 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=65536 124 | HardSwish Div_447 1 1 614 622 0=1.666667e-01 1=5.000000e-01 125 | Convolution Conv_448 1 1 622 623 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384 126 | HardSwish Div_456 1 1 623 631 0=1.666667e-01 1=5.000000e-01 127 | Convolution Conv_457 1 1 631 632 0=128 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=147456 128 | HardSwish Div_465 1 1 632 640 0=1.666667e-01 1=5.000000e-01 129 | Convolution Conv_466 1 1 640 641 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=16384 130 | Convolution Conv_467 1 1 613_splitncnn_0 642 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=65536 131 | Concat Concat_468 2 1 641 642 643 0=0 132 | BatchNorm BatchNormalization_469 1 1 643 644 0=256 133 | ReLU LeakyRelu_470 1 1 644 645 0=1.000000e-01 134 | Convolution Conv_471 1 1 645 646 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=65536 135 | HardSwish Div_479 1 1 646 654 0=1.666667e-01 1=5.000000e-01 136 | Convolution Conv_480 1 1 654 655 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=32768 137 | HardSwish Div_488 1 1 655 663 0=1.666667e-01 1=5.000000e-01 138 | Split splitncnn_16 1 2 663 663_splitncnn_0 663_splitncnn_1 139 | Interp Resize_490 1 1 663_splitncnn_1 673 0=1 1=2.000000e+00 2=2.000000e+00 3=0 4=0 6=0 140 | Concat Concat_491 2 1 673 432_splitncnn_0 674 0=0 141 | Split splitncnn_17 1 2 674 674_splitncnn_0 674_splitncnn_1 142 | Convolution Conv_492 1 1 674_splitncnn_1 675 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384 143 | HardSwish Div_500 1 1 675 683 0=1.666667e-01 1=5.000000e-01 144 | Convolution Conv_501 1 1 683 684 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=4096 145 | HardSwish Div_509 1 1 684 692 0=1.666667e-01 1=5.000000e-01 146 | Convolution Conv_510 1 1 692 693 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=36864 147 | HardSwish Div_518 1 1 693 701 0=1.666667e-01 1=5.000000e-01 148 | Convolution Conv_519 1 1 701 702 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=4096 149 | Convolution Conv_520 1 1 674_splitncnn_0 703 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=16384 150 | Concat Concat_521 2 1 702 703 704 0=0 151 | BatchNorm BatchNormalization_522 1 1 704 705 0=128 152 | ReLU LeakyRelu_523 1 1 705 706 0=1.000000e-01 153 | Convolution Conv_524 1 1 706 707 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384 154 | HardSwish Div_532 1 1 707 715 0=1.666667e-01 1=5.000000e-01 155 | Split splitncnn_18 1 2 715 715_splitncnn_0 715_splitncnn_1 156 | Convolution Conv_533 1 1 715_splitncnn_1 716 0=128 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=1 6=147456 157 | HardSwish Div_541 1 1 716 724 0=1.666667e-01 1=5.000000e-01 158 | Concat Concat_542 2 1 724 663_splitncnn_0 725 0=0 159 | Split splitncnn_19 1 2 725 725_splitncnn_0 725_splitncnn_1 160 | Convolution Conv_543 1 1 725_splitncnn_1 726 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=32768 161 | HardSwish Div_551 1 1 726 734 0=1.666667e-01 1=5.000000e-01 162 | Convolution Conv_552 1 1 734 735 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=16384 163 | HardSwish Div_560 1 1 735 743 0=1.666667e-01 1=5.000000e-01 164 | Convolution Conv_561 1 1 743 744 0=128 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=147456 165 | HardSwish Div_569 1 1 744 752 0=1.666667e-01 1=5.000000e-01 166 | Convolution Conv_570 1 1 752 753 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=16384 167 | Convolution Conv_571 1 1 725_splitncnn_0 754 0=128 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=32768 168 | Concat Concat_572 2 1 753 754 755 0=0 169 | BatchNorm BatchNormalization_573 1 1 755 756 0=256 170 | ReLU LeakyRelu_574 1 1 756 757 0=1.000000e-01 171 | Convolution Conv_575 1 1 757 758 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=65536 172 | HardSwish Div_583 1 1 758 766 0=1.666667e-01 1=5.000000e-01 173 | Split splitncnn_20 1 2 766 766_splitncnn_0 766_splitncnn_1 174 | Convolution Conv_584 1 1 766_splitncnn_1 767 0=256 1=3 11=3 2=1 12=1 3=2 13=2 4=1 14=1 15=1 16=1 5=1 6=589824 175 | HardSwish Div_592 1 1 767 775 0=1.666667e-01 1=5.000000e-01 176 | Concat Concat_593 2 1 775 602_splitncnn_0 776 0=0 177 | Split splitncnn_21 1 2 776 776_splitncnn_0 776_splitncnn_1 178 | Convolution Conv_594 1 1 776_splitncnn_1 777 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=131072 179 | HardSwish Div_602 1 1 777 785 0=1.666667e-01 1=5.000000e-01 180 | Convolution Conv_603 1 1 785 786 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=65536 181 | HardSwish Div_611 1 1 786 794 0=1.666667e-01 1=5.000000e-01 182 | Convolution Conv_612 1 1 794 795 0=256 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=589824 183 | HardSwish Div_620 1 1 795 803 0=1.666667e-01 1=5.000000e-01 184 | Convolution Conv_621 1 1 803 804 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=65536 185 | Convolution Conv_622 1 1 776_splitncnn_0 805 0=256 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=0 6=131072 186 | Concat Concat_623 2 1 804 805 806 0=0 187 | BatchNorm BatchNormalization_624 1 1 806 807 0=512 188 | ReLU LeakyRelu_625 1 1 807 808 0=1.000000e-01 189 | Convolution Conv_626 1 1 808 809 0=512 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=262144 190 | HardSwish Div_634 1 1 809 817 0=1.666667e-01 1=5.000000e-01 191 | Convolution Conv_635 1 1 715_splitncnn_0 818 0=21 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=2688 192 | Reshape Reshape_649 1 1 818 836 0=6400 1=7 2=3 193 | Permute Transpose_650 1 1 836 output 0=1 194 | Convolution Conv_651 1 1 766_splitncnn_0 838 0=21 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=5376 195 | Reshape Reshape_665 1 1 838 856 0=1600 1=7 2=3 196 | Permute Transpose_666 1 1 856 857 0=1 197 | Convolution Conv_667 1 1 817 858 0=21 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=10752 198 | Reshape Reshape_681 1 1 858 876 0=400 1=7 2=3 199 | Permute Transpose_682 1 1 876 877 0=1 200 | --------------------------------------------------------------------------------