├── CMakeLists.txt ├── README.md ├── data ├── bus.jpg └── output.jpg ├── include └── ncnn │ ├── allocator.h │ ├── benchmark.h │ ├── blob.h │ ├── c_api.h │ ├── command.h │ ├── cpu.h │ ├── datareader.h │ ├── gpu.h │ ├── layer.h │ ├── layer_shader_type.h │ ├── layer_shader_type_enum.h │ ├── layer_type.h │ ├── layer_type_enum.h │ ├── mat.h │ ├── modelbin.h │ ├── ncnn_export.h │ ├── net.h │ ├── option.h │ ├── paramdict.h │ ├── pipeline.h │ ├── pipelinecache.h │ ├── platform.h │ ├── simplemath.h │ ├── simpleocv.h │ ├── simpleomp.h │ ├── simplestl.h │ ├── simplevk.h │ └── vulkan_header_fix.h ├── lib ├── cmake │ └── ncnn │ │ ├── ncnn-release.cmake │ │ ├── ncnn.cmake │ │ └── ncnnConfig.cmake ├── libncnn.a └── pkgconfig │ └── ncnn.pc ├── models ├── yolo11n.ncnn.bin └── yolo11n.ncnn.param └── src └── yolov11.cpp /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.11) 2 | project(yolov11_ncnn) 3 | set(CMAKE_CXX_STANDARD 14) 4 | set(CMAKE_BUILD_TYPE Release) 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pie -fPIE -fPIC -Wall") 6 | 7 | find_package(OpenCV REQUIRED) 8 | if (OpenCV_FOUND) 9 | message(STATUS "OpenCV_LIBS: ${OpenCV_LIBS}") 10 | message(STATUS "OpenCV_INCLUDE_DIRS: ${OpenCV_INCLUDE_DIRS}") 11 | else () 12 | message(FATAL_ERROR "opencv Not Found!") 13 | endif (OpenCV_FOUND) 14 | 15 | find_package(OpenMP REQUIRED) 16 | if (OPENMP_FOUND) 17 | message("OPENMP FOUND") 18 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") 19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 20 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") 21 | else () 22 | message(FATAL_ERROR "OpenMP Not Found!") 23 | endif () 24 | 25 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) 26 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include/ncnn) 27 | link_directories(${CMAKE_CURRENT_SOURCE_DIR}/lib) 28 | 29 | add_executable(yolov11_ncnn src/yolov11.cpp) 30 | target_link_libraries(yolov11_ncnn ncnn ${OpenCV_LIBS}) 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # yolo11-ncnn 2 | 3 | ## Benchmark. 4 | | Model | Infer time| 5 | | ------------- | :-------------: | 6 | | YOLOv5s | 54 ms | 7 | | YOLO11n | 48 ms | 8 | 9 | ## How to build and run 10 | ``` shell 11 | mkdir build 12 | cd build 13 | cmake .. 14 | make 15 | ./yolov11_ncnn ../data/bus.jpg 16 | ``` 17 | 18 | ## Show 19 | ![show](./data/output.jpg) 20 | 21 | ## reference 22 | 23 |
24 |
25 | 26 | -------------------------------------------------------------------------------- /data/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhouweigogogo/yolo11-ncnn/af1b6036af4a8098a74b02acade01d99a70fb4aa/data/bus.jpg -------------------------------------------------------------------------------- /data/output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhouweigogogo/yolo11-ncnn/af1b6036af4a8098a74b02acade01d99a70fb4aa/data/output.jpg -------------------------------------------------------------------------------- /include/ncnn/allocator.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_ALLOCATOR_H 16 | #define NCNN_ALLOCATOR_H 17 | 18 | #ifdef _WIN32 19 | #define WIN32_LEAN_AND_MEAN 20 | #include 21 | #endif 22 | 23 | #include "platform.h" 24 | 25 | #include 26 | 27 | #if NCNN_PLATFORM_API 28 | #if __ANDROID_API__ >= 26 29 | #include 30 | #endif // __ANDROID_API__ >= 26 31 | #endif // NCNN_PLATFORM_API 32 | 33 | namespace ncnn { 34 | 35 | // the alignment of all the allocated buffers 36 | #if NCNN_AVX512 37 | #define NCNN_MALLOC_ALIGN 64 38 | #elif NCNN_AVX 39 | #define NCNN_MALLOC_ALIGN 32 40 | #else 41 | #define NCNN_MALLOC_ALIGN 16 42 | #endif 43 | 44 | // we have some optimized kernels that may overread buffer a bit in loop 45 | // it is common to interleave next-loop data load with arithmetic instructions 46 | // allocating more bytes keeps us safe from SEGV_ACCERR failure 47 | #define NCNN_MALLOC_OVERREAD 64 48 | 49 | // Aligns a pointer to the specified number of bytes 50 | // ptr Aligned pointer 51 | // n Alignment size that must be a power of two 52 | template 53 | static NCNN_FORCEINLINE _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp)) 54 | { 55 | return (_Tp*)(((size_t)ptr + n - 1) & -n); 56 | } 57 | 58 | // Aligns a buffer size to the specified number of bytes 59 | // The function returns the minimum number that is greater or equal to sz and is divisible by n 60 | // sz Buffer size to align 61 | // n Alignment size that must be a power of two 62 | static NCNN_FORCEINLINE size_t alignSize(size_t sz, int n) 63 | { 64 | return (sz + n - 1) & -n; 65 | } 66 | 67 | static NCNN_FORCEINLINE void* fastMalloc(size_t size) 68 | { 69 | #if _MSC_VER 70 | return _aligned_malloc(size, NCNN_MALLOC_ALIGN); 71 | #elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) 72 | void* ptr = 0; 73 | if (posix_memalign(&ptr, NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD)) 74 | ptr = 0; 75 | return ptr; 76 | #elif __ANDROID__ && __ANDROID_API__ < 17 77 | return memalign(NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD); 78 | #else 79 | unsigned char* udata = (unsigned char*)malloc(size + sizeof(void*) + NCNN_MALLOC_ALIGN + NCNN_MALLOC_OVERREAD); 80 | if (!udata) 81 | return 0; 82 | unsigned char** adata = alignPtr((unsigned char**)udata + 1, NCNN_MALLOC_ALIGN); 83 | adata[-1] = udata; 84 | return adata; 85 | #endif 86 | } 87 | 88 | static NCNN_FORCEINLINE void fastFree(void* ptr) 89 | { 90 | if (ptr) 91 | { 92 | #if _MSC_VER 93 | _aligned_free(ptr); 94 | #elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) 95 | free(ptr); 96 | #elif __ANDROID__ && __ANDROID_API__ < 17 97 | free(ptr); 98 | #else 99 | unsigned char* udata = ((unsigned char**)ptr)[-1]; 100 | free(udata); 101 | #endif 102 | } 103 | } 104 | 105 | #if NCNN_THREADS 106 | // exchange-add operation for atomic operations on reference counters 107 | #if defined __riscv && !defined __riscv_atomic 108 | // riscv target without A extension 109 | static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) 110 | { 111 | int tmp = *addr; 112 | *addr += delta; 113 | return tmp; 114 | } 115 | #elif defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32) 116 | // atomic increment on the linux version of the Intel(tm) compiler 117 | #define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast(reinterpret_cast(addr)), delta) 118 | #elif defined __GNUC__ 119 | #if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__) 120 | #ifdef __ATOMIC_ACQ_REL 121 | #define NCNN_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL) 122 | #else 123 | #define NCNN_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4) 124 | #endif 125 | #else 126 | #if defined __ATOMIC_ACQ_REL && !defined __clang__ 127 | // version for gcc >= 4.7 128 | #define NCNN_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL) 129 | #else 130 | #define NCNN_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta)) 131 | #endif 132 | #endif 133 | #elif defined _MSC_VER && !defined RC_INVOKED 134 | #define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta) 135 | #else 136 | // thread-unsafe branch 137 | static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) 138 | { 139 | int tmp = *addr; 140 | *addr += delta; 141 | return tmp; 142 | } 143 | #endif 144 | #else // NCNN_THREADS 145 | static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) 146 | { 147 | int tmp = *addr; 148 | *addr += delta; 149 | return tmp; 150 | } 151 | #endif // NCNN_THREADS 152 | 153 | class NCNN_EXPORT Allocator 154 | { 155 | public: 156 | virtual ~Allocator(); 157 | virtual void* fastMalloc(size_t size) = 0; 158 | virtual void fastFree(void* ptr) = 0; 159 | }; 160 | 161 | class PoolAllocatorPrivate; 162 | class NCNN_EXPORT PoolAllocator : public Allocator 163 | { 164 | public: 165 | PoolAllocator(); 166 | ~PoolAllocator(); 167 | 168 | // ratio range 0 ~ 1 169 | // default cr = 0 170 | void set_size_compare_ratio(float scr); 171 | 172 | // budget drop threshold 173 | // default threshold = 10 174 | void set_size_drop_threshold(size_t); 175 | 176 | // release all budgets immediately 177 | void clear(); 178 | 179 | virtual void* fastMalloc(size_t size); 180 | virtual void fastFree(void* ptr); 181 | 182 | private: 183 | PoolAllocator(const PoolAllocator&); 184 | PoolAllocator& operator=(const PoolAllocator&); 185 | 186 | private: 187 | PoolAllocatorPrivate* const d; 188 | }; 189 | 190 | class UnlockedPoolAllocatorPrivate; 191 | class NCNN_EXPORT UnlockedPoolAllocator : public Allocator 192 | { 193 | public: 194 | UnlockedPoolAllocator(); 195 | ~UnlockedPoolAllocator(); 196 | 197 | // ratio range 0 ~ 1 198 | // default cr = 0 199 | void set_size_compare_ratio(float scr); 200 | 201 | // budget drop threshold 202 | // default threshold = 10 203 | void set_size_drop_threshold(size_t); 204 | 205 | // release all budgets immediately 206 | void clear(); 207 | 208 | virtual void* fastMalloc(size_t size); 209 | virtual void fastFree(void* ptr); 210 | 211 | private: 212 | UnlockedPoolAllocator(const UnlockedPoolAllocator&); 213 | UnlockedPoolAllocator& operator=(const UnlockedPoolAllocator&); 214 | 215 | private: 216 | UnlockedPoolAllocatorPrivate* const d; 217 | }; 218 | 219 | #if NCNN_VULKAN 220 | 221 | class VulkanDevice; 222 | 223 | class NCNN_EXPORT VkBufferMemory 224 | { 225 | public: 226 | VkBuffer buffer; 227 | 228 | // the base offset assigned by allocator 229 | size_t offset; 230 | size_t capacity; 231 | 232 | VkDeviceMemory memory; 233 | void* mapped_ptr; 234 | 235 | // buffer state, modified by command functions internally 236 | mutable VkAccessFlags access_flags; 237 | mutable VkPipelineStageFlags stage_flags; 238 | 239 | // initialize and modified by mat 240 | int refcount; 241 | }; 242 | 243 | class NCNN_EXPORT VkImageMemory 244 | { 245 | public: 246 | VkImage image; 247 | VkImageView imageview; 248 | 249 | // underlying info assigned by allocator 250 | int width; 251 | int height; 252 | int depth; 253 | VkFormat format; 254 | 255 | VkDeviceMemory memory; 256 | void* mapped_ptr; 257 | 258 | // the base offset assigned by allocator 259 | size_t bind_offset; 260 | size_t bind_capacity; 261 | 262 | // image state, modified by command functions internally 263 | mutable VkAccessFlags access_flags; 264 | mutable VkImageLayout image_layout; 265 | mutable VkPipelineStageFlags stage_flags; 266 | 267 | // in-execution state, modified by command functions internally 268 | mutable int command_refcount; 269 | 270 | // initialize and modified by mat 271 | int refcount; 272 | }; 273 | 274 | class NCNN_EXPORT VkAllocator 275 | { 276 | public: 277 | explicit VkAllocator(const VulkanDevice* _vkdev); 278 | virtual ~VkAllocator(); 279 | 280 | virtual void clear(); 281 | 282 | virtual VkBufferMemory* fastMalloc(size_t size) = 0; 283 | virtual void fastFree(VkBufferMemory* ptr) = 0; 284 | virtual int flush(VkBufferMemory* ptr); 285 | virtual int invalidate(VkBufferMemory* ptr); 286 | 287 | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0; 288 | virtual void fastFree(VkImageMemory* ptr) = 0; 289 | 290 | public: 291 | const VulkanDevice* vkdev; 292 | uint32_t buffer_memory_type_index; 293 | uint32_t image_memory_type_index; 294 | uint32_t reserved_type_index; 295 | bool mappable; 296 | bool coherent; 297 | 298 | protected: 299 | VkBuffer create_buffer(size_t size, VkBufferUsageFlags usage); 300 | VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index); 301 | VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer); 302 | 303 | VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage); 304 | VkImageView create_imageview(VkImage image, VkFormat format); 305 | }; 306 | 307 | class VkBlobAllocatorPrivate; 308 | class NCNN_EXPORT VkBlobAllocator : public VkAllocator 309 | { 310 | public: 311 | explicit VkBlobAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 16 * 1024 * 1024); // 16M 312 | virtual ~VkBlobAllocator(); 313 | 314 | public: 315 | // release all budgets immediately 316 | virtual void clear(); 317 | 318 | virtual VkBufferMemory* fastMalloc(size_t size); 319 | virtual void fastFree(VkBufferMemory* ptr); 320 | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); 321 | virtual void fastFree(VkImageMemory* ptr); 322 | 323 | private: 324 | VkBlobAllocator(const VkBlobAllocator&); 325 | VkBlobAllocator& operator=(const VkBlobAllocator&); 326 | 327 | private: 328 | VkBlobAllocatorPrivate* const d; 329 | }; 330 | 331 | class VkWeightAllocatorPrivate; 332 | class NCNN_EXPORT VkWeightAllocator : public VkAllocator 333 | { 334 | public: 335 | explicit VkWeightAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 8 * 1024 * 1024); // 8M 336 | virtual ~VkWeightAllocator(); 337 | 338 | public: 339 | // release all blocks immediately 340 | virtual void clear(); 341 | 342 | public: 343 | virtual VkBufferMemory* fastMalloc(size_t size); 344 | virtual void fastFree(VkBufferMemory* ptr); 345 | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); 346 | virtual void fastFree(VkImageMemory* ptr); 347 | 348 | private: 349 | VkWeightAllocator(const VkWeightAllocator&); 350 | VkWeightAllocator& operator=(const VkWeightAllocator&); 351 | 352 | private: 353 | VkWeightAllocatorPrivate* const d; 354 | }; 355 | 356 | class VkStagingAllocatorPrivate; 357 | class NCNN_EXPORT VkStagingAllocator : public VkAllocator 358 | { 359 | public: 360 | explicit VkStagingAllocator(const VulkanDevice* vkdev); 361 | virtual ~VkStagingAllocator(); 362 | 363 | public: 364 | // ratio range 0 ~ 1 365 | // default cr = 0.75 366 | void set_size_compare_ratio(float scr); 367 | 368 | // release all budgets immediately 369 | virtual void clear(); 370 | 371 | virtual VkBufferMemory* fastMalloc(size_t size); 372 | virtual void fastFree(VkBufferMemory* ptr); 373 | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); 374 | virtual void fastFree(VkImageMemory* ptr); 375 | 376 | private: 377 | VkStagingAllocator(const VkStagingAllocator&); 378 | VkStagingAllocator& operator=(const VkStagingAllocator&); 379 | 380 | private: 381 | VkStagingAllocatorPrivate* const d; 382 | }; 383 | 384 | class VkWeightStagingAllocatorPrivate; 385 | class NCNN_EXPORT VkWeightStagingAllocator : public VkAllocator 386 | { 387 | public: 388 | explicit VkWeightStagingAllocator(const VulkanDevice* vkdev); 389 | virtual ~VkWeightStagingAllocator(); 390 | 391 | public: 392 | virtual VkBufferMemory* fastMalloc(size_t size); 393 | virtual void fastFree(VkBufferMemory* ptr); 394 | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); 395 | virtual void fastFree(VkImageMemory* ptr); 396 | 397 | private: 398 | VkWeightStagingAllocator(const VkWeightStagingAllocator&); 399 | VkWeightStagingAllocator& operator=(const VkWeightStagingAllocator&); 400 | 401 | private: 402 | VkWeightStagingAllocatorPrivate* const d; 403 | }; 404 | 405 | #if NCNN_PLATFORM_API 406 | #if __ANDROID_API__ >= 26 407 | class NCNN_EXPORT VkAndroidHardwareBufferImageAllocator : public VkAllocator 408 | { 409 | public: 410 | VkAndroidHardwareBufferImageAllocator(const VulkanDevice* _vkdev, AHardwareBuffer* _hb); 411 | virtual ~VkAndroidHardwareBufferImageAllocator(); 412 | 413 | public: 414 | virtual VkBufferMemory* fastMalloc(size_t size); 415 | virtual void fastFree(VkBufferMemory* ptr); 416 | virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); 417 | virtual void fastFree(VkImageMemory* ptr); 418 | 419 | private: 420 | VkAndroidHardwareBufferImageAllocator(const VkAndroidHardwareBufferImageAllocator&); 421 | VkAndroidHardwareBufferImageAllocator& operator=(const VkAndroidHardwareBufferImageAllocator&); 422 | 423 | public: 424 | int init(); 425 | 426 | int width() const; 427 | int height() const; 428 | uint64_t external_format() const; 429 | 430 | public: 431 | AHardwareBuffer* hb; 432 | AHardwareBuffer_Desc bufferDesc; 433 | VkAndroidHardwareBufferFormatPropertiesANDROID bufferFormatProperties; 434 | VkAndroidHardwareBufferPropertiesANDROID bufferProperties; 435 | VkSamplerYcbcrConversionKHR samplerYcbcrConversion; 436 | }; 437 | #endif // __ANDROID_API__ >= 26 438 | #endif // NCNN_PLATFORM_API 439 | 440 | #endif // NCNN_VULKAN 441 | 442 | } // namespace ncnn 443 | 444 | #endif // NCNN_ALLOCATOR_H 445 | -------------------------------------------------------------------------------- /include/ncnn/benchmark.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_BENCHMARK_H 16 | #define NCNN_BENCHMARK_H 17 | 18 | #include "layer.h" 19 | #include "mat.h" 20 | #include "platform.h" 21 | 22 | namespace ncnn { 23 | 24 | // get now timestamp in ms 25 | NCNN_EXPORT double get_current_time(); 26 | 27 | // sleep milliseconds 28 | NCNN_EXPORT void sleep(unsigned long long int milliseconds = 1000); 29 | 30 | #if NCNN_BENCHMARK 31 | 32 | NCNN_EXPORT void benchmark(const Layer* layer, double start, double end); 33 | NCNN_EXPORT void benchmark(const Layer* layer, const Mat& bottom_blob, Mat& top_blob, double start, double end); 34 | 35 | #endif // NCNN_BENCHMARK 36 | 37 | } // namespace ncnn 38 | 39 | #endif // NCNN_BENCHMARK_H 40 | -------------------------------------------------------------------------------- /include/ncnn/blob.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_BLOB_H 16 | #define NCNN_BLOB_H 17 | 18 | #include "mat.h" 19 | #include "platform.h" 20 | 21 | namespace ncnn { 22 | 23 | class NCNN_EXPORT Blob 24 | { 25 | public: 26 | // empty 27 | Blob(); 28 | 29 | public: 30 | #if NCNN_STRING 31 | // blob name 32 | std::string name; 33 | #endif // NCNN_STRING 34 | // layer index which produce this blob as output 35 | int producer; 36 | // layer index which need this blob as input 37 | int consumer; 38 | // shape hint 39 | Mat shape; 40 | }; 41 | 42 | } // namespace ncnn 43 | 44 | #endif // NCNN_BLOB_H 45 | -------------------------------------------------------------------------------- /include/ncnn/c_api.h: -------------------------------------------------------------------------------- 1 | /* Tencent is pleased to support the open source community by making ncnn available. 2 | * 3 | * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4 | * 5 | * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | * in compliance with the License. You may obtain a copy of the License at 7 | * 8 | * https://opensource.org/licenses/BSD-3-Clause 9 | * 10 | * Unless required by applicable law or agreed to in writing, software distributed 11 | * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | * CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | * specific language governing permissions and limitations under the License. 14 | */ 15 | 16 | #ifndef NCNN_C_API_H 17 | #define NCNN_C_API_H 18 | 19 | #include "platform.h" 20 | 21 | #if NCNN_C_API 22 | 23 | #include 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | NCNN_EXPORT const char* ncnn_version(void); 30 | 31 | /* allocator api */ 32 | typedef struct __ncnn_allocator_t* ncnn_allocator_t; 33 | struct NCNN_EXPORT __ncnn_allocator_t 34 | { 35 | void* pthis; 36 | 37 | void* (*fast_malloc)(ncnn_allocator_t allocator, size_t size); 38 | void (*fast_free)(ncnn_allocator_t allocator, void* ptr); 39 | }; 40 | 41 | NCNN_EXPORT ncnn_allocator_t ncnn_allocator_create_pool_allocator(void); 42 | NCNN_EXPORT ncnn_allocator_t ncnn_allocator_create_unlocked_pool_allocator(void); 43 | NCNN_EXPORT void ncnn_allocator_destroy(ncnn_allocator_t allocator); 44 | 45 | /* option api */ 46 | typedef struct __ncnn_option_t* ncnn_option_t; 47 | 48 | NCNN_EXPORT ncnn_option_t ncnn_option_create(void); 49 | NCNN_EXPORT void ncnn_option_destroy(ncnn_option_t opt); 50 | 51 | NCNN_EXPORT int ncnn_option_get_num_threads(const ncnn_option_t opt); 52 | NCNN_EXPORT void ncnn_option_set_num_threads(ncnn_option_t opt, int num_threads); 53 | 54 | NCNN_EXPORT int ncnn_option_get_use_local_pool_allocator(const ncnn_option_t opt); 55 | NCNN_EXPORT void ncnn_option_set_use_local_pool_allocator(ncnn_option_t opt, int use_local_pool_allocator); 56 | 57 | NCNN_EXPORT void ncnn_option_set_blob_allocator(ncnn_option_t opt, ncnn_allocator_t allocator); 58 | NCNN_EXPORT void ncnn_option_set_workspace_allocator(ncnn_option_t opt, ncnn_allocator_t allocator); 59 | 60 | NCNN_EXPORT int ncnn_option_get_use_vulkan_compute(const ncnn_option_t opt); 61 | NCNN_EXPORT void ncnn_option_set_use_vulkan_compute(ncnn_option_t opt, int use_vulkan_compute); 62 | 63 | /* mat api */ 64 | typedef struct __ncnn_mat_t* ncnn_mat_t; 65 | 66 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create(void); 67 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_1d(int w, ncnn_allocator_t allocator); 68 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_2d(int w, int h, ncnn_allocator_t allocator); 69 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_3d(int w, int h, int c, ncnn_allocator_t allocator); 70 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_4d(int w, int h, int d, int c, ncnn_allocator_t allocator); 71 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_1d(int w, void* data, ncnn_allocator_t allocator); 72 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_2d(int w, int h, void* data, ncnn_allocator_t allocator); 73 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_3d(int w, int h, int c, void* data, ncnn_allocator_t allocator); 74 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_4d(int w, int h, int d, int c, void* data, ncnn_allocator_t allocator); 75 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_1d_elem(int w, size_t elemsize, int elempack, ncnn_allocator_t allocator); 76 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_2d_elem(int w, int h, size_t elemsize, int elempack, ncnn_allocator_t allocator); 77 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_3d_elem(int w, int h, int c, size_t elemsize, int elempack, ncnn_allocator_t allocator); 78 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_4d_elem(int w, int h, int d, int c, size_t elemsize, int elempack, ncnn_allocator_t allocator); 79 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_1d_elem(int w, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); 80 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_2d_elem(int w, int h, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); 81 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_3d_elem(int w, int h, int c, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); 82 | NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_4d_elem(int w, int h, int d, int c, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); 83 | NCNN_EXPORT void ncnn_mat_destroy(ncnn_mat_t mat); 84 | 85 | NCNN_EXPORT void ncnn_mat_fill_float(ncnn_mat_t mat, float v); 86 | 87 | NCNN_EXPORT ncnn_mat_t ncnn_mat_clone(const ncnn_mat_t mat, ncnn_allocator_t allocator); 88 | NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_1d(const ncnn_mat_t mat, int w, ncnn_allocator_t allocator); 89 | NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_2d(const ncnn_mat_t mat, int w, int h, ncnn_allocator_t allocator); 90 | NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_3d(const ncnn_mat_t mat, int w, int h, int c, ncnn_allocator_t allocator); 91 | NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_4d(const ncnn_mat_t mat, int w, int h, int d, int c, ncnn_allocator_t allocator); 92 | 93 | NCNN_EXPORT int ncnn_mat_get_dims(const ncnn_mat_t mat); 94 | NCNN_EXPORT int ncnn_mat_get_w(const ncnn_mat_t mat); 95 | NCNN_EXPORT int ncnn_mat_get_h(const ncnn_mat_t mat); 96 | NCNN_EXPORT int ncnn_mat_get_d(const ncnn_mat_t mat); 97 | NCNN_EXPORT int ncnn_mat_get_c(const ncnn_mat_t mat); 98 | NCNN_EXPORT size_t ncnn_mat_get_elemsize(const ncnn_mat_t mat); 99 | NCNN_EXPORT int ncnn_mat_get_elempack(const ncnn_mat_t mat); 100 | NCNN_EXPORT size_t ncnn_mat_get_cstep(const ncnn_mat_t mat); 101 | NCNN_EXPORT void* ncnn_mat_get_data(const ncnn_mat_t mat); 102 | 103 | NCNN_EXPORT void* ncnn_mat_get_channel_data(const ncnn_mat_t mat, int c); 104 | 105 | #if NCNN_PIXEL 106 | 107 | /* mat pixel api */ 108 | #define NCNN_MAT_PIXEL_RGB 1 109 | #define NCNN_MAT_PIXEL_BGR 2 110 | #define NCNN_MAT_PIXEL_GRAY 3 111 | #define NCNN_MAT_PIXEL_RGBA 4 112 | #define NCNN_MAT_PIXEL_BGRA 5 113 | #define NCNN_MAT_PIXEL_X2Y(X, Y) (X | (Y << 16)) 114 | NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, ncnn_allocator_t allocator); 115 | NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, ncnn_allocator_t allocator); 116 | NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, ncnn_allocator_t allocator); 117 | NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, ncnn_allocator_t allocator); 118 | NCNN_EXPORT void ncnn_mat_to_pixels(const ncnn_mat_t mat, unsigned char* pixels, int type, int stride); 119 | NCNN_EXPORT void ncnn_mat_to_pixels_resize(const ncnn_mat_t mat, unsigned char* pixels, int type, int target_width, int target_height, int target_stride); 120 | 121 | #endif /* NCNN_PIXEL */ 122 | 123 | NCNN_EXPORT void ncnn_mat_substract_mean_normalize(ncnn_mat_t mat, const float* mean_vals, const float* norm_vals); 124 | 125 | NCNN_EXPORT void ncnn_convert_packing(const ncnn_mat_t src, ncnn_mat_t* dst, int elempack, const ncnn_option_t opt); 126 | NCNN_EXPORT void ncnn_flatten(const ncnn_mat_t src, ncnn_mat_t* dst, const ncnn_option_t opt); 127 | 128 | /* blob api */ 129 | typedef struct __ncnn_blob_t* ncnn_blob_t; 130 | 131 | #if NCNN_STRING 132 | NCNN_EXPORT const char* ncnn_blob_get_name(const ncnn_blob_t blob); 133 | #endif /* NCNN_STRING */ 134 | 135 | NCNN_EXPORT int ncnn_blob_get_producer(const ncnn_blob_t blob); 136 | NCNN_EXPORT int ncnn_blob_get_consumer(const ncnn_blob_t blob); 137 | 138 | NCNN_EXPORT void ncnn_blob_get_shape(const ncnn_blob_t blob, int* dims, int* w, int* h, int* c); 139 | 140 | /* paramdict api */ 141 | typedef struct __ncnn_paramdict_t* ncnn_paramdict_t; 142 | 143 | NCNN_EXPORT ncnn_paramdict_t ncnn_paramdict_create(void); 144 | NCNN_EXPORT void ncnn_paramdict_destroy(ncnn_paramdict_t pd); 145 | 146 | NCNN_EXPORT int ncnn_paramdict_get_type(const ncnn_paramdict_t pd, int id); 147 | 148 | NCNN_EXPORT int ncnn_paramdict_get_int(const ncnn_paramdict_t pd, int id, int def); 149 | NCNN_EXPORT float ncnn_paramdict_get_float(const ncnn_paramdict_t pd, int id, float def); 150 | NCNN_EXPORT ncnn_mat_t ncnn_paramdict_get_array(const ncnn_paramdict_t pd, int id, const ncnn_mat_t def); 151 | 152 | NCNN_EXPORT void ncnn_paramdict_set_int(ncnn_paramdict_t pd, int id, int i); 153 | NCNN_EXPORT void ncnn_paramdict_set_float(ncnn_paramdict_t pd, int id, float f); 154 | NCNN_EXPORT void ncnn_paramdict_set_array(ncnn_paramdict_t pd, int id, const ncnn_mat_t v); 155 | 156 | /* datareader api */ 157 | typedef struct __ncnn_datareader_t* ncnn_datareader_t; 158 | struct NCNN_EXPORT __ncnn_datareader_t 159 | { 160 | void* pthis; 161 | 162 | #if NCNN_STRING 163 | int (*scan)(ncnn_datareader_t dr, const char* format, void* p); 164 | #endif /* NCNN_STRING */ 165 | size_t (*read)(ncnn_datareader_t dr, void* buf, size_t size); 166 | }; 167 | 168 | NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create(void); 169 | #if NCNN_STDIO 170 | NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create_from_stdio(FILE* fp); 171 | #endif /* NCNN_STDIO */ 172 | NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create_from_memory(const unsigned char** mem); 173 | NCNN_EXPORT void ncnn_datareader_destroy(ncnn_datareader_t dr); 174 | 175 | /* modelbin api */ 176 | typedef struct __ncnn_modelbin_t* ncnn_modelbin_t; 177 | struct NCNN_EXPORT __ncnn_modelbin_t 178 | { 179 | void* pthis; 180 | 181 | ncnn_mat_t (*load_1d)(const ncnn_modelbin_t mb, int w, int type); 182 | ncnn_mat_t (*load_2d)(const ncnn_modelbin_t mb, int w, int h, int type); 183 | ncnn_mat_t (*load_3d)(const ncnn_modelbin_t mb, int w, int h, int c, int type); 184 | }; 185 | 186 | NCNN_EXPORT ncnn_modelbin_t ncnn_modelbin_create_from_datareader(const ncnn_datareader_t dr); 187 | NCNN_EXPORT ncnn_modelbin_t ncnn_modelbin_create_from_mat_array(const ncnn_mat_t* weights, int n); 188 | NCNN_EXPORT void ncnn_modelbin_destroy(ncnn_modelbin_t mb); 189 | 190 | /* layer api */ 191 | typedef struct __ncnn_layer_t* ncnn_layer_t; 192 | struct NCNN_EXPORT __ncnn_layer_t 193 | { 194 | void* pthis; 195 | 196 | int (*load_param)(ncnn_layer_t layer, const ncnn_paramdict_t pd); 197 | int (*load_model)(ncnn_layer_t layer, const ncnn_modelbin_t mb); 198 | 199 | int (*create_pipeline)(ncnn_layer_t layer, const ncnn_option_t opt); 200 | int (*destroy_pipeline)(ncnn_layer_t layer, const ncnn_option_t opt); 201 | 202 | int (*forward_1)(const ncnn_layer_t layer, const ncnn_mat_t bottom_blob, ncnn_mat_t* top_blob, const ncnn_option_t opt); 203 | int (*forward_n)(const ncnn_layer_t layer, const ncnn_mat_t* bottom_blobs, int n, ncnn_mat_t* top_blobs, int n2, const ncnn_option_t opt); 204 | 205 | int (*forward_inplace_1)(const ncnn_layer_t layer, ncnn_mat_t bottom_top_blob, const ncnn_option_t opt); 206 | int (*forward_inplace_n)(const ncnn_layer_t layer, ncnn_mat_t* bottom_top_blobs, int n, const ncnn_option_t opt); 207 | }; 208 | 209 | NCNN_EXPORT ncnn_layer_t ncnn_layer_create(void); 210 | NCNN_EXPORT ncnn_layer_t ncnn_layer_create_by_typeindex(int typeindex); 211 | #if NCNN_STRING 212 | NCNN_EXPORT ncnn_layer_t ncnn_layer_create_by_type(const char* type); 213 | NCNN_EXPORT int ncnn_layer_type_to_index(const char* type); 214 | #endif /* NCNN_STRING */ 215 | NCNN_EXPORT void ncnn_layer_destroy(ncnn_layer_t layer); 216 | 217 | #if NCNN_STRING 218 | NCNN_EXPORT const char* ncnn_layer_get_name(const ncnn_layer_t layer); 219 | #endif /* NCNN_STRING */ 220 | 221 | NCNN_EXPORT int ncnn_layer_get_typeindex(const ncnn_layer_t layer); 222 | #if NCNN_STRING 223 | NCNN_EXPORT const char* ncnn_layer_get_type(const ncnn_layer_t layer); 224 | #endif /* NCNN_STRING */ 225 | 226 | NCNN_EXPORT int ncnn_layer_get_one_blob_only(const ncnn_layer_t layer); 227 | NCNN_EXPORT int ncnn_layer_get_support_inplace(const ncnn_layer_t layer); 228 | NCNN_EXPORT int ncnn_layer_get_support_vulkan(const ncnn_layer_t layer); 229 | NCNN_EXPORT int ncnn_layer_get_support_packing(const ncnn_layer_t layer); 230 | NCNN_EXPORT int ncnn_layer_get_support_bf16_storage(const ncnn_layer_t layer); 231 | NCNN_EXPORT int ncnn_layer_get_support_fp16_storage(const ncnn_layer_t layer); 232 | NCNN_EXPORT int ncnn_layer_get_support_image_storage(const ncnn_layer_t layer); 233 | 234 | NCNN_EXPORT void ncnn_layer_set_one_blob_only(ncnn_layer_t layer, int enable); 235 | NCNN_EXPORT void ncnn_layer_set_support_inplace(ncnn_layer_t layer, int enable); 236 | NCNN_EXPORT void ncnn_layer_set_support_vulkan(ncnn_layer_t layer, int enable); 237 | NCNN_EXPORT void ncnn_layer_set_support_packing(ncnn_layer_t layer, int enable); 238 | NCNN_EXPORT void ncnn_layer_set_support_bf16_storage(ncnn_layer_t layer, int enable); 239 | NCNN_EXPORT void ncnn_layer_set_support_fp16_storage(ncnn_layer_t layer, int enable); 240 | NCNN_EXPORT void ncnn_layer_set_support_image_storage(ncnn_layer_t layer, int enable); 241 | 242 | NCNN_EXPORT int ncnn_layer_get_bottom_count(const ncnn_layer_t layer); 243 | NCNN_EXPORT int ncnn_layer_get_bottom(const ncnn_layer_t layer, int i); 244 | NCNN_EXPORT int ncnn_layer_get_top_count(const ncnn_layer_t layer); 245 | NCNN_EXPORT int ncnn_layer_get_top(const ncnn_layer_t layer, int i); 246 | 247 | NCNN_EXPORT void ncnn_blob_get_bottom_shape(const ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c); 248 | NCNN_EXPORT void ncnn_blob_get_top_shape(const ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c); 249 | 250 | /* layer factory function */ 251 | typedef ncnn_layer_t (*ncnn_layer_creator_t)(void* userdata); 252 | typedef void (*ncnn_layer_destroyer_t)(ncnn_layer_t layer, void* userdata); 253 | 254 | typedef struct __ncnn_net_custom_layer_factory_t* ncnn_net_custom_layer_factory_t; 255 | struct __ncnn_net_custom_layer_factory_t 256 | { 257 | ncnn_layer_creator_t creator; 258 | ncnn_layer_destroyer_t destroyer; 259 | void* userdata; 260 | ncnn_net_custom_layer_factory_t next; 261 | }; 262 | 263 | /* net api */ 264 | typedef struct __ncnn_net_t* ncnn_net_t; 265 | struct __ncnn_net_t 266 | { 267 | void* pthis; 268 | 269 | ncnn_net_custom_layer_factory_t custom_layer_factory; 270 | }; 271 | 272 | NCNN_EXPORT ncnn_net_t ncnn_net_create(void); 273 | NCNN_EXPORT void ncnn_net_destroy(ncnn_net_t net); 274 | 275 | NCNN_EXPORT ncnn_option_t ncnn_net_get_option(ncnn_net_t net); 276 | NCNN_EXPORT void ncnn_net_set_option(ncnn_net_t net, ncnn_option_t opt); 277 | 278 | #if NCNN_VULKAN 279 | NCNN_EXPORT void ncnn_net_set_vulkan_device(ncnn_net_t net, int device_index); 280 | #endif 281 | 282 | #if NCNN_STRING 283 | NCNN_EXPORT void ncnn_net_register_custom_layer_by_type(ncnn_net_t net, const char* type, ncnn_layer_creator_t creator, ncnn_layer_destroyer_t destroyer, void* userdata); 284 | #endif /* NCNN_STRING */ 285 | NCNN_EXPORT void ncnn_net_register_custom_layer_by_typeindex(ncnn_net_t net, int typeindex, ncnn_layer_creator_t creator, ncnn_layer_destroyer_t destroyer, void* userdata); 286 | 287 | #if NCNN_STDIO 288 | #if NCNN_STRING 289 | NCNN_EXPORT int ncnn_net_load_param(ncnn_net_t net, const char* path); 290 | #endif /* NCNN_STRING */ 291 | NCNN_EXPORT int ncnn_net_load_param_bin(ncnn_net_t net, const char* path); 292 | NCNN_EXPORT int ncnn_net_load_model(ncnn_net_t net, const char* path); 293 | #endif /* NCNN_STDIO */ 294 | 295 | #if NCNN_STDIO 296 | #if NCNN_STRING 297 | NCNN_EXPORT int ncnn_net_load_param_memory(ncnn_net_t net, const char* mem); 298 | #endif /* NCNN_STRING */ 299 | #endif /* NCNN_STDIO */ 300 | NCNN_EXPORT int ncnn_net_load_param_bin_memory(ncnn_net_t net, const unsigned char* mem); 301 | NCNN_EXPORT int ncnn_net_load_model_memory(ncnn_net_t net, const unsigned char* mem); 302 | 303 | #if NCNN_STRING 304 | NCNN_EXPORT int ncnn_net_load_param_datareader(ncnn_net_t net, const ncnn_datareader_t dr); 305 | #endif /* NCNN_STRING */ 306 | NCNN_EXPORT int ncnn_net_load_param_bin_datareader(ncnn_net_t net, const ncnn_datareader_t dr); 307 | NCNN_EXPORT int ncnn_net_load_model_datareader(ncnn_net_t net, const ncnn_datareader_t dr); 308 | 309 | NCNN_EXPORT void ncnn_net_clear(ncnn_net_t net); 310 | 311 | NCNN_EXPORT int ncnn_net_get_input_count(const ncnn_net_t net); 312 | NCNN_EXPORT int ncnn_net_get_output_count(const ncnn_net_t net); 313 | #if NCNN_STRING 314 | NCNN_EXPORT const char* ncnn_net_get_input_name(const ncnn_net_t net, int i); 315 | NCNN_EXPORT const char* ncnn_net_get_output_name(const ncnn_net_t net, int i); 316 | #endif /* NCNN_STRING */ 317 | NCNN_EXPORT int ncnn_net_get_input_index(const ncnn_net_t net, int i); 318 | NCNN_EXPORT int ncnn_net_get_output_index(const ncnn_net_t net, int i); 319 | 320 | /* extractor api */ 321 | typedef struct __ncnn_extractor_t* ncnn_extractor_t; 322 | 323 | NCNN_EXPORT ncnn_extractor_t ncnn_extractor_create(ncnn_net_t net); 324 | NCNN_EXPORT void ncnn_extractor_destroy(ncnn_extractor_t ex); 325 | 326 | NCNN_EXPORT void ncnn_extractor_set_option(ncnn_extractor_t ex, const ncnn_option_t opt); 327 | 328 | #if NCNN_STRING 329 | NCNN_EXPORT int ncnn_extractor_input(ncnn_extractor_t ex, const char* name, const ncnn_mat_t mat); 330 | NCNN_EXPORT int ncnn_extractor_extract(ncnn_extractor_t ex, const char* name, ncnn_mat_t* mat); 331 | #endif /* NCNN_STRING */ 332 | NCNN_EXPORT int ncnn_extractor_input_index(ncnn_extractor_t ex, int index, const ncnn_mat_t mat); 333 | NCNN_EXPORT int ncnn_extractor_extract_index(ncnn_extractor_t ex, int index, ncnn_mat_t* mat); 334 | 335 | /* mat process api */ 336 | #define NCNN_BORDER_CONSTANT 0 337 | #define NCNN_BORDER_REPLICATE 1 338 | #define NCNN_BORDER_REFLECT 2 339 | #define NCNN_BORDER_TRANSPARENT -233 340 | NCNN_EXPORT void ncnn_copy_make_border(const ncnn_mat_t src, ncnn_mat_t dst, int top, int bottom, int left, int right, int type, float v, const ncnn_option_t opt); 341 | NCNN_EXPORT void ncnn_copy_make_border_3d(const ncnn_mat_t src, ncnn_mat_t dst, int top, int bottom, int left, int right, int front, int behind, int type, float v, const ncnn_option_t opt); 342 | NCNN_EXPORT void ncnn_copy_cut_border(const ncnn_mat_t src, ncnn_mat_t dst, int top, int bottom, int left, int right, const ncnn_option_t opt); 343 | NCNN_EXPORT void ncnn_copy_cut_border_3d(const ncnn_mat_t src, ncnn_mat_t dst, int top, int bottom, int left, int right, int front, int behind, const ncnn_option_t opt); 344 | 345 | #if NCNN_PIXEL_DRAWING 346 | /* mat pixel drawing api*/ 347 | NCNN_EXPORT void ncnn_draw_rectangle_c1(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); 348 | NCNN_EXPORT void ncnn_draw_rectangle_c2(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); 349 | NCNN_EXPORT void ncnn_draw_rectangle_c3(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); 350 | NCNN_EXPORT void ncnn_draw_rectangle_c4(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); 351 | 352 | NCNN_EXPORT void ncnn_draw_text_c1(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); 353 | NCNN_EXPORT void ncnn_draw_text_c2(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); 354 | NCNN_EXPORT void ncnn_draw_text_c3(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); 355 | NCNN_EXPORT void ncnn_draw_text_c4(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); 356 | 357 | NCNN_EXPORT void ncnn_draw_circle_c1(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); 358 | NCNN_EXPORT void ncnn_draw_circle_c2(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); 359 | NCNN_EXPORT void ncnn_draw_circle_c3(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); 360 | NCNN_EXPORT void ncnn_draw_circle_c4(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); 361 | 362 | NCNN_EXPORT void ncnn_draw_line_c1(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); 363 | NCNN_EXPORT void ncnn_draw_line_c2(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); 364 | NCNN_EXPORT void ncnn_draw_line_c3(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); 365 | NCNN_EXPORT void ncnn_draw_line_c4(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); 366 | #endif /* NCNN_PIXEL_DRAWING */ 367 | 368 | #ifdef __cplusplus 369 | } /* extern "C" */ 370 | #endif 371 | 372 | #endif /* NCNN_C_API */ 373 | 374 | #endif /* NCNN_C_API_H */ 375 | -------------------------------------------------------------------------------- /include/ncnn/command.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_COMMAND_H 16 | #define NCNN_COMMAND_H 17 | 18 | #include "platform.h" 19 | 20 | #if NCNN_VULKAN 21 | 22 | #include "mat.h" 23 | 24 | namespace ncnn { 25 | 26 | class Pipeline; 27 | #if NCNN_PLATFORM_API 28 | #if __ANDROID_API__ >= 26 29 | class ImportAndroidHardwareBufferPipeline; 30 | #endif // __ANDROID_API__ >= 26 31 | #endif // NCNN_PLATFORM_API 32 | class VkComputePrivate; 33 | class NCNN_EXPORT VkCompute 34 | { 35 | public: 36 | explicit VkCompute(const VulkanDevice* vkdev); 37 | virtual ~VkCompute(); 38 | 39 | public: 40 | void record_upload(const Mat& src, VkMat& dst, const Option& opt); 41 | 42 | void record_upload(const Mat& src, VkImageMat& dst, const Option& opt); 43 | 44 | void record_download(const VkMat& src, Mat& dst, const Option& opt); 45 | 46 | void record_download(const VkImageMat& src, Mat& dst, const Option& opt); 47 | 48 | void record_buffer_to_image(const VkMat& src, VkImageMat& dst, const Option& opt); 49 | 50 | void record_image_to_buffer(const VkImageMat& src, VkMat& dst, const Option& opt); 51 | 52 | void record_clone(const Mat& src, VkMat& dst, const Option& opt); 53 | 54 | void record_clone(const Mat& src, VkImageMat& dst, const Option& opt); 55 | 56 | void record_clone(const VkMat& src, Mat& dst, const Option& opt); 57 | 58 | void record_clone(const VkImageMat& src, Mat& dst, const Option& opt); 59 | 60 | void record_clone(const VkMat& src, VkMat& dst, const Option& opt); 61 | 62 | void record_clone(const VkImageMat& src, VkImageMat& dst, const Option& opt); 63 | 64 | void record_clone(const VkMat& src, VkImageMat& dst, const Option& opt); 65 | 66 | void record_clone(const VkImageMat& src, VkMat& dst, const Option& opt); 67 | 68 | void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkMat& dispatcher); 69 | 70 | void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkImageMat& dispatcher); 71 | 72 | void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const VkMat& dispatcher); 73 | void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const VkImageMat& dispatcher); 74 | void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const Mat& dispatcher); 75 | 76 | #if NCNN_BENCHMARK 77 | void record_write_timestamp(uint32_t query); 78 | #endif // NCNN_BENCHMARK 79 | 80 | #if NCNN_PLATFORM_API 81 | #if __ANDROID_API__ >= 26 82 | void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkMat& dst); 83 | 84 | void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkImageMat& dst); 85 | #endif // __ANDROID_API__ >= 26 86 | #endif // NCNN_PLATFORM_API 87 | 88 | int submit_and_wait(); 89 | 90 | int reset(); 91 | 92 | #if NCNN_BENCHMARK 93 | int create_query_pool(uint32_t query_count); 94 | 95 | int get_query_pool_results(uint32_t first_query, uint32_t query_count, std::vector& results); 96 | #endif // NCNN_BENCHMARK 97 | 98 | protected: 99 | const VulkanDevice* vkdev; 100 | 101 | void barrier_readwrite(const VkMat& binding); 102 | void barrier_readwrite(const VkImageMat& binding); 103 | void barrier_readonly(const VkImageMat& binding); 104 | 105 | private: 106 | VkComputePrivate* const d; 107 | }; 108 | 109 | class VkTransferPrivate; 110 | class NCNN_EXPORT VkTransfer 111 | { 112 | public: 113 | explicit VkTransfer(const VulkanDevice* vkdev); 114 | virtual ~VkTransfer(); 115 | 116 | public: 117 | void record_upload(const Mat& src, VkMat& dst, const Option& opt, bool flatten = true); 118 | 119 | void record_upload(const Mat& src, VkImageMat& dst, const Option& opt); 120 | 121 | int submit_and_wait(); 122 | 123 | protected: 124 | const VulkanDevice* vkdev; 125 | 126 | private: 127 | VkTransferPrivate* const d; 128 | }; 129 | 130 | } // namespace ncnn 131 | 132 | #endif // NCNN_VULKAN 133 | 134 | #endif // NCNN_COMMAND_H 135 | -------------------------------------------------------------------------------- /include/ncnn/cpu.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_CPU_H 16 | #define NCNN_CPU_H 17 | 18 | #include 19 | 20 | #if defined _WIN32 21 | #define WIN32_LEAN_AND_MEAN 22 | #include 23 | #endif 24 | #if defined __ANDROID__ || defined __linux__ 25 | #include // cpu_set_t 26 | #endif 27 | 28 | #include "platform.h" 29 | 30 | namespace ncnn { 31 | 32 | class NCNN_EXPORT CpuSet 33 | { 34 | public: 35 | CpuSet(); 36 | void enable(int cpu); 37 | void disable(int cpu); 38 | void disable_all(); 39 | bool is_enabled(int cpu) const; 40 | int num_enabled() const; 41 | 42 | public: 43 | #if defined _WIN32 44 | ULONG_PTR mask; 45 | #endif 46 | #if defined __ANDROID__ || defined __linux__ 47 | cpu_set_t cpu_set; 48 | #endif 49 | #if __APPLE__ 50 | unsigned int policy; 51 | #endif 52 | }; 53 | 54 | // test optional cpu features 55 | // edsp = armv7 edsp 56 | NCNN_EXPORT int cpu_support_arm_edsp(); 57 | // neon = armv7 neon or aarch64 asimd 58 | NCNN_EXPORT int cpu_support_arm_neon(); 59 | // vfpv4 = armv7 fp16 + fma 60 | NCNN_EXPORT int cpu_support_arm_vfpv4(); 61 | // asimdhp = aarch64 asimd half precision 62 | NCNN_EXPORT int cpu_support_arm_asimdhp(); 63 | // cpuid = aarch64 cpuid info 64 | NCNN_EXPORT int cpu_support_arm_cpuid(); 65 | // asimddp = aarch64 asimd dot product 66 | NCNN_EXPORT int cpu_support_arm_asimddp(); 67 | // asimdfhm = aarch64 asimd fhm 68 | NCNN_EXPORT int cpu_support_arm_asimdfhm(); 69 | // bf16 = aarch64 bf16 70 | NCNN_EXPORT int cpu_support_arm_bf16(); 71 | // i8mm = aarch64 i8mm 72 | NCNN_EXPORT int cpu_support_arm_i8mm(); 73 | // sve = aarch64 sve 74 | NCNN_EXPORT int cpu_support_arm_sve(); 75 | // sve2 = aarch64 sve2 76 | NCNN_EXPORT int cpu_support_arm_sve2(); 77 | // svebf16 = aarch64 svebf16 78 | NCNN_EXPORT int cpu_support_arm_svebf16(); 79 | // svei8mm = aarch64 svei8mm 80 | NCNN_EXPORT int cpu_support_arm_svei8mm(); 81 | // svef32mm = aarch64 svef32mm 82 | NCNN_EXPORT int cpu_support_arm_svef32mm(); 83 | 84 | // avx = x86 avx 85 | NCNN_EXPORT int cpu_support_x86_avx(); 86 | // fma = x86 fma 87 | NCNN_EXPORT int cpu_support_x86_fma(); 88 | // xop = x86 xop 89 | NCNN_EXPORT int cpu_support_x86_xop(); 90 | // f16c = x86 f16c 91 | NCNN_EXPORT int cpu_support_x86_f16c(); 92 | // avx2 = x86 avx2 + fma + f16c 93 | NCNN_EXPORT int cpu_support_x86_avx2(); 94 | // avx_vnni = x86 avx vnni 95 | NCNN_EXPORT int cpu_support_x86_avx_vnni(); 96 | // avx512 = x86 avx512f + avx512cd + avx512bw + avx512dq + avx512vl 97 | NCNN_EXPORT int cpu_support_x86_avx512(); 98 | // avx512_vnni = x86 avx512 vnni 99 | NCNN_EXPORT int cpu_support_x86_avx512_vnni(); 100 | // avx512_bf16 = x86 avx512 bf16 101 | NCNN_EXPORT int cpu_support_x86_avx512_bf16(); 102 | // avx512_fp16 = x86 avx512 fp16 103 | NCNN_EXPORT int cpu_support_x86_avx512_fp16(); 104 | 105 | // lsx = loongarch lsx 106 | NCNN_EXPORT int cpu_support_loongarch_lsx(); 107 | // lasx = loongarch lasx 108 | NCNN_EXPORT int cpu_support_loongarch_lasx(); 109 | 110 | // msa = mips mas 111 | NCNN_EXPORT int cpu_support_mips_msa(); 112 | // mmi = loongson mmi 113 | NCNN_EXPORT int cpu_support_loongson_mmi(); 114 | 115 | // v = riscv vector 116 | NCNN_EXPORT int cpu_support_riscv_v(); 117 | // zfh = riscv half-precision float 118 | NCNN_EXPORT int cpu_support_riscv_zfh(); 119 | // vlenb = riscv vector length in bytes 120 | NCNN_EXPORT int cpu_riscv_vlenb(); 121 | 122 | // cpu info 123 | NCNN_EXPORT int get_cpu_count(); 124 | NCNN_EXPORT int get_little_cpu_count(); 125 | NCNN_EXPORT int get_big_cpu_count(); 126 | 127 | NCNN_EXPORT int get_physical_cpu_count(); 128 | NCNN_EXPORT int get_physical_little_cpu_count(); 129 | NCNN_EXPORT int get_physical_big_cpu_count(); 130 | 131 | // cpu l2 varies from 64k to 1M, but l3 can be zero 132 | NCNN_EXPORT int get_cpu_level2_cache_size(); 133 | NCNN_EXPORT int get_cpu_level3_cache_size(); 134 | 135 | // bind all threads on little clusters if powersave enabled 136 | // affects HMP arch cpu like ARM big.LITTLE 137 | // only implemented on android at the moment 138 | // switching powersave is expensive and not thread-safe 139 | // 0 = all cores enabled(default) 140 | // 1 = only little clusters enabled 141 | // 2 = only big clusters enabled 142 | // return 0 if success for setter function 143 | NCNN_EXPORT int get_cpu_powersave(); 144 | NCNN_EXPORT int set_cpu_powersave(int powersave); 145 | 146 | // convenient wrapper 147 | NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave); 148 | 149 | // set explicit thread affinity 150 | NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask); 151 | 152 | // runtime thread affinity info 153 | NCNN_EXPORT int is_current_thread_running_on_a53_a55(); 154 | 155 | // misc function wrapper for openmp routines 156 | NCNN_EXPORT int get_omp_num_threads(); 157 | NCNN_EXPORT void set_omp_num_threads(int num_threads); 158 | 159 | NCNN_EXPORT int get_omp_dynamic(); 160 | NCNN_EXPORT void set_omp_dynamic(int dynamic); 161 | 162 | NCNN_EXPORT int get_omp_thread_num(); 163 | 164 | NCNN_EXPORT int get_kmp_blocktime(); 165 | NCNN_EXPORT void set_kmp_blocktime(int time_ms); 166 | 167 | // need to flush denormals on Intel Chipset. 168 | // Other architectures such as ARM can be added as needed. 169 | // 0 = DAZ OFF, FTZ OFF 170 | // 1 = DAZ ON , FTZ OFF 171 | // 2 = DAZ OFF, FTZ ON 172 | // 3 = DAZ ON, FTZ ON 173 | NCNN_EXPORT int get_flush_denormals(); 174 | NCNN_EXPORT int set_flush_denormals(int flush_denormals); 175 | 176 | } // namespace ncnn 177 | 178 | #endif // NCNN_CPU_H 179 | -------------------------------------------------------------------------------- /include/ncnn/datareader.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_DATAREADER_H 16 | #define NCNN_DATAREADER_H 17 | 18 | #include "platform.h" 19 | #if NCNN_STDIO 20 | #include 21 | #endif 22 | 23 | #if NCNN_PLATFORM_API 24 | #if __ANDROID_API__ >= 9 25 | #include 26 | #endif 27 | #endif // NCNN_PLATFORM_API 28 | 29 | namespace ncnn { 30 | 31 | // data read wrapper 32 | class NCNN_EXPORT DataReader 33 | { 34 | public: 35 | DataReader(); 36 | virtual ~DataReader(); 37 | 38 | #if NCNN_STRING 39 | // parse plain param text 40 | // return 1 if scan success 41 | virtual int scan(const char* format, void* p) const; 42 | #endif // NCNN_STRING 43 | 44 | // read binary param and model data 45 | // return bytes read 46 | virtual size_t read(void* buf, size_t size) const; 47 | 48 | // get model data reference 49 | // return bytes referenced 50 | virtual size_t reference(size_t size, const void** buf) const; 51 | }; 52 | 53 | #if NCNN_STDIO 54 | class DataReaderFromStdioPrivate; 55 | class NCNN_EXPORT DataReaderFromStdio : public DataReader 56 | { 57 | public: 58 | explicit DataReaderFromStdio(FILE* fp); 59 | virtual ~DataReaderFromStdio(); 60 | 61 | #if NCNN_STRING 62 | virtual int scan(const char* format, void* p) const; 63 | #endif // NCNN_STRING 64 | virtual size_t read(void* buf, size_t size) const; 65 | 66 | private: 67 | DataReaderFromStdio(const DataReaderFromStdio&); 68 | DataReaderFromStdio& operator=(const DataReaderFromStdio&); 69 | 70 | private: 71 | DataReaderFromStdioPrivate* const d; 72 | }; 73 | #endif // NCNN_STDIO 74 | 75 | class DataReaderFromMemoryPrivate; 76 | class NCNN_EXPORT DataReaderFromMemory : public DataReader 77 | { 78 | public: 79 | explicit DataReaderFromMemory(const unsigned char*& mem); 80 | virtual ~DataReaderFromMemory(); 81 | 82 | #if NCNN_STRING 83 | virtual int scan(const char* format, void* p) const; 84 | #endif // NCNN_STRING 85 | virtual size_t read(void* buf, size_t size) const; 86 | virtual size_t reference(size_t size, const void** buf) const; 87 | 88 | private: 89 | DataReaderFromMemory(const DataReaderFromMemory&); 90 | DataReaderFromMemory& operator=(const DataReaderFromMemory&); 91 | 92 | private: 93 | DataReaderFromMemoryPrivate* const d; 94 | }; 95 | 96 | #if NCNN_PLATFORM_API 97 | #if __ANDROID_API__ >= 9 98 | class DataReaderFromAndroidAssetPrivate; 99 | class NCNN_EXPORT DataReaderFromAndroidAsset : public DataReader 100 | { 101 | public: 102 | explicit DataReaderFromAndroidAsset(AAsset* asset); 103 | virtual ~DataReaderFromAndroidAsset(); 104 | 105 | #if NCNN_STRING 106 | virtual int scan(const char* format, void* p) const; 107 | #endif // NCNN_STRING 108 | virtual size_t read(void* buf, size_t size) const; 109 | 110 | private: 111 | DataReaderFromAndroidAsset(const DataReaderFromAndroidAsset&); 112 | DataReaderFromAndroidAsset& operator=(const DataReaderFromAndroidAsset&); 113 | 114 | private: 115 | DataReaderFromAndroidAssetPrivate* const d; 116 | }; 117 | #endif // __ANDROID_API__ >= 9 118 | #endif // NCNN_PLATFORM_API 119 | 120 | } // namespace ncnn 121 | 122 | #endif // NCNN_DATAREADER_H 123 | -------------------------------------------------------------------------------- /include/ncnn/gpu.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_GPU_H 16 | #define NCNN_GPU_H 17 | 18 | #include "platform.h" 19 | 20 | #if NCNN_VULKAN 21 | 22 | #include "mat.h" 23 | 24 | namespace ncnn { 25 | 26 | // instance 27 | 28 | // Create VkInstance and initialize some objects that need to be calculated by GPU 29 | // Creates a VkInstance object, Checks the extended attributes supported by the Vulkan instance concerned, 30 | // Initializes, and creates Vulkan validation layers (if ENABLE_VALIDATION_LAYER is enabled), 31 | // Iterates over all supported physical devices, etc. 32 | NCNN_EXPORT int create_gpu_instance(const char* driver_path = 0); 33 | 34 | // Get global VkInstance variable 35 | // Must be called after create_gpu_instance() and before destroy_gpu_instance() 36 | NCNN_EXPORT VkInstance get_gpu_instance(); 37 | 38 | // Destroy VkInstance object and free the memory of the associated object 39 | // Usually called in the destructor of the main program exit 40 | // The function will internally ensure that all vulkan devices are idle before proceeding with destruction. 41 | NCNN_EXPORT void destroy_gpu_instance(); 42 | 43 | // vulkan core 44 | extern PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers; 45 | extern PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets; 46 | extern PFN_vkAllocateMemory vkAllocateMemory; 47 | extern PFN_vkBeginCommandBuffer vkBeginCommandBuffer; 48 | extern PFN_vkBindBufferMemory vkBindBufferMemory; 49 | extern PFN_vkBindImageMemory vkBindImageMemory; 50 | extern PFN_vkCmdBeginQuery vkCmdBeginQuery; 51 | extern PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; 52 | extern PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; 53 | extern PFN_vkCmdBindPipeline vkCmdBindPipeline; 54 | extern PFN_vkCmdCopyBuffer vkCmdCopyBuffer; 55 | extern PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage; 56 | extern PFN_vkCmdCopyImage vkCmdCopyImage; 57 | extern PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer; 58 | extern PFN_vkCmdCopyQueryPoolResults vkCmdCopyQueryPoolResults; 59 | extern PFN_vkCmdDispatch vkCmdDispatch; 60 | extern PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect; 61 | extern PFN_vkCmdEndQuery vkCmdEndQuery; 62 | extern PFN_vkCmdExecuteCommands vkCmdExecuteCommands; 63 | extern PFN_vkCmdFillBuffer vkCmdFillBuffer; 64 | extern PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; 65 | extern PFN_vkCmdPushConstants vkCmdPushConstants; 66 | extern PFN_vkCmdResetQueryPool vkCmdResetQueryPool; 67 | extern PFN_vkCmdResolveImage vkCmdResolveImage; 68 | extern PFN_vkCmdUpdateBuffer vkCmdUpdateBuffer; 69 | extern PFN_vkCmdWriteTimestamp vkCmdWriteTimestamp; 70 | extern PFN_vkCreateBuffer vkCreateBuffer; 71 | extern PFN_vkCreateBufferView vkCreateBufferView; 72 | extern PFN_vkCreateCommandPool vkCreateCommandPool; 73 | extern PFN_vkCreateComputePipelines vkCreateComputePipelines; 74 | extern PFN_vkCreateDescriptorPool vkCreateDescriptorPool; 75 | extern PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout; 76 | extern PFN_vkCreateDevice vkCreateDevice; 77 | extern PFN_vkCreateFence vkCreateFence; 78 | extern PFN_vkCreateImage vkCreateImage; 79 | extern PFN_vkCreateImageView vkCreateImageView; 80 | extern PFN_vkCreatePipelineCache vkCreatePipelineCache; 81 | extern PFN_vkCreatePipelineLayout vkCreatePipelineLayout; 82 | extern PFN_vkCreateQueryPool vkCreateQueryPool; 83 | extern PFN_vkCreateSampler vkCreateSampler; 84 | extern PFN_vkCreateSemaphore vkCreateSemaphore; 85 | extern PFN_vkCreateShaderModule vkCreateShaderModule; 86 | extern PFN_vkDestroyBuffer vkDestroyBuffer; 87 | extern PFN_vkDestroyBufferView vkDestroyBufferView; 88 | extern PFN_vkDestroyCommandPool vkDestroyCommandPool; 89 | extern PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool; 90 | extern PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout; 91 | extern PFN_vkDestroyDevice vkDestroyDevice; 92 | extern PFN_vkDestroyFence vkDestroyFence; 93 | extern PFN_vkDestroyImage vkDestroyImage; 94 | extern PFN_vkDestroyImageView vkDestroyImageView; 95 | extern PFN_vkDestroyInstance vkDestroyInstance; 96 | extern PFN_vkDestroyPipeline vkDestroyPipeline; 97 | extern PFN_vkDestroyPipelineCache vkDestroyPipelineCache; 98 | extern PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout; 99 | extern PFN_vkDestroyQueryPool vkDestroyQueryPool; 100 | extern PFN_vkDestroySampler vkDestroySampler; 101 | extern PFN_vkDestroySemaphore vkDestroySemaphore; 102 | extern PFN_vkDestroyShaderModule vkDestroyShaderModule; 103 | extern PFN_vkDeviceWaitIdle vkDeviceWaitIdle; 104 | extern PFN_vkEndCommandBuffer vkEndCommandBuffer; 105 | extern PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties; 106 | extern PFN_vkEnumerateDeviceLayerProperties vkEnumerateDeviceLayerProperties; 107 | extern PFN_vkEnumeratePhysicalDevices vkEnumeratePhysicalDevices; 108 | extern PFN_vkFlushMappedMemoryRanges vkFlushMappedMemoryRanges; 109 | extern PFN_vkFreeCommandBuffers vkFreeCommandBuffers; 110 | extern PFN_vkFreeDescriptorSets vkFreeDescriptorSets; 111 | extern PFN_vkFreeMemory vkFreeMemory; 112 | extern PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; 113 | extern PFN_vkGetDeviceMemoryCommitment vkGetDeviceMemoryCommitment; 114 | extern PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr; 115 | extern PFN_vkGetDeviceQueue vkGetDeviceQueue; 116 | extern PFN_vkGetFenceStatus vkGetFenceStatus; 117 | extern PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; 118 | extern PFN_vkGetImageSubresourceLayout vkGetImageSubresourceLayout; 119 | extern PFN_vkGetPhysicalDeviceFeatures vkGetPhysicalDeviceFeatures; 120 | extern PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties; 121 | extern PFN_vkGetPhysicalDeviceImageFormatProperties vkGetPhysicalDeviceImageFormatProperties; 122 | extern PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties; 123 | extern PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties; 124 | extern PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties; 125 | extern PFN_vkGetPipelineCacheData vkGetPipelineCacheData; 126 | extern PFN_vkGetQueryPoolResults vkGetQueryPoolResults; 127 | extern PFN_vkInvalidateMappedMemoryRanges vkInvalidateMappedMemoryRanges; 128 | extern PFN_vkMapMemory vkMapMemory; 129 | extern PFN_vkMergePipelineCaches vkMergePipelineCaches; 130 | extern PFN_vkQueueSubmit vkQueueSubmit; 131 | extern PFN_vkQueueWaitIdle vkQueueWaitIdle; 132 | extern PFN_vkResetCommandBuffer vkResetCommandBuffer; 133 | extern PFN_vkResetCommandPool vkResetCommandPool; 134 | extern PFN_vkResetDescriptorPool vkResetDescriptorPool; 135 | extern PFN_vkResetFences vkResetFences; 136 | extern PFN_vkUnmapMemory vkUnmapMemory; 137 | extern PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; 138 | extern PFN_vkWaitForFences vkWaitForFences; 139 | 140 | // instance extension capability 141 | extern int support_VK_KHR_external_memory_capabilities; 142 | extern int support_VK_KHR_get_physical_device_properties2; 143 | extern int support_VK_KHR_get_surface_capabilities2; 144 | extern int support_VK_KHR_surface; 145 | extern int support_VK_EXT_debug_utils; 146 | extern int support_VK_EXT_validation_features; 147 | extern int support_VK_EXT_validation_flags; 148 | #if __ANDROID_API__ >= 26 149 | extern int support_VK_KHR_android_surface; 150 | #endif // __ANDROID_API__ >= 26 151 | 152 | // VK_KHR_cooperative_matrix 153 | extern PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR; 154 | 155 | // VK_KHR_external_memory_capabilities 156 | extern PFN_vkGetPhysicalDeviceExternalBufferPropertiesKHR vkGetPhysicalDeviceExternalBufferPropertiesKHR; 157 | 158 | // VK_KHR_get_physical_device_properties2 159 | extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR; 160 | extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR; 161 | extern PFN_vkGetPhysicalDeviceFormatProperties2KHR vkGetPhysicalDeviceFormatProperties2KHR; 162 | extern PFN_vkGetPhysicalDeviceImageFormatProperties2KHR vkGetPhysicalDeviceImageFormatProperties2KHR; 163 | extern PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR vkGetPhysicalDeviceQueueFamilyProperties2KHR; 164 | extern PFN_vkGetPhysicalDeviceMemoryProperties2KHR vkGetPhysicalDeviceMemoryProperties2KHR; 165 | 166 | // VK_KHR_get_surface_capabilities2 167 | extern PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR vkGetPhysicalDeviceSurfaceCapabilities2KHR; 168 | extern PFN_vkGetPhysicalDeviceSurfaceFormats2KHR vkGetPhysicalDeviceSurfaceFormats2KHR; 169 | 170 | // VK_KHR_surface 171 | extern PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR; 172 | extern PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR; 173 | extern PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR; 174 | extern PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR; 175 | extern PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR; 176 | 177 | #if __ANDROID_API__ >= 26 178 | // VK_KHR_android_surface 179 | extern PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR; 180 | #endif // __ANDROID_API__ >= 26 181 | 182 | // VK_NV_cooperative_matrix 183 | extern PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV vkGetPhysicalDeviceCooperativeMatrixPropertiesNV; 184 | 185 | // get info 186 | NCNN_EXPORT int get_gpu_count(); 187 | NCNN_EXPORT int get_default_gpu_index(); 188 | 189 | class GpuInfoPrivate; 190 | class NCNN_EXPORT GpuInfo 191 | { 192 | public: 193 | explicit GpuInfo(); 194 | virtual ~GpuInfo(); 195 | 196 | // vulkan physical device 197 | VkPhysicalDevice physical_device() const; 198 | 199 | // memory properties 200 | const VkPhysicalDeviceMemoryProperties& physical_device_memory_properties() const; 201 | 202 | // info 203 | uint32_t api_version() const; 204 | uint32_t driver_version() const; 205 | uint32_t vendor_id() const; 206 | uint32_t device_id() const; 207 | const char* device_name() const; 208 | uint8_t* pipeline_cache_uuid() const; 209 | 210 | // 0 = discrete gpu 211 | // 1 = integrated gpu 212 | // 2 = virtual gpu 213 | // 3 = cpu 214 | int type() const; 215 | 216 | // hardware limit 217 | uint32_t max_shared_memory_size() const; 218 | uint32_t max_workgroup_count_x() const; 219 | uint32_t max_workgroup_count_y() const; 220 | uint32_t max_workgroup_count_z() const; 221 | uint32_t max_workgroup_invocations() const; 222 | uint32_t max_workgroup_size_x() const; 223 | uint32_t max_workgroup_size_y() const; 224 | uint32_t max_workgroup_size_z() const; 225 | size_t memory_map_alignment() const; 226 | size_t buffer_offset_alignment() const; 227 | size_t non_coherent_atom_size() const; 228 | size_t buffer_image_granularity() const; 229 | uint32_t max_image_dimension_1d() const; 230 | uint32_t max_image_dimension_2d() const; 231 | uint32_t max_image_dimension_3d() const; 232 | float timestamp_period() const; 233 | 234 | // runtime 235 | uint32_t compute_queue_family_index() const; 236 | uint32_t graphics_queue_family_index() const; 237 | uint32_t transfer_queue_family_index() const; 238 | 239 | uint32_t compute_queue_count() const; 240 | uint32_t graphics_queue_count() const; 241 | uint32_t transfer_queue_count() const; 242 | 243 | // property 244 | bool unified_compute_transfer_queue() const; 245 | 246 | // subgroup 247 | uint32_t subgroup_size() const; 248 | bool support_subgroup_basic() const; 249 | bool support_subgroup_vote() const; 250 | bool support_subgroup_ballot() const; 251 | bool support_subgroup_shuffle() const; 252 | 253 | // bug is not feature 254 | bool bug_storage_buffer_no_l1() const; 255 | bool bug_corrupted_online_pipeline_cache() const; 256 | bool bug_buffer_image_load_zero() const; 257 | 258 | // but sometimes bug is a feature 259 | bool bug_implicit_fp16_arithmetic() const; 260 | 261 | // fp16 and int8 feature 262 | bool support_fp16_packed() const; 263 | bool support_fp16_storage() const; 264 | bool support_fp16_uniform() const; 265 | bool support_fp16_arithmetic() const; 266 | bool support_int8_packed() const; 267 | bool support_int8_storage() const; 268 | bool support_int8_uniform() const; 269 | bool support_int8_arithmetic() const; 270 | 271 | // ycbcr conversion feature 272 | bool support_ycbcr_conversion() const; 273 | 274 | // cooperative matrix feature 275 | bool support_cooperative_matrix() const; 276 | bool support_cooperative_matrix_8_8_16() const; 277 | bool support_cooperative_matrix_16_8_8() const; 278 | bool support_cooperative_matrix_16_8_16() const; 279 | bool support_cooperative_matrix_16_16_16() const; 280 | 281 | // extension capability 282 | int support_VK_KHR_8bit_storage() const; 283 | int support_VK_KHR_16bit_storage() const; 284 | int support_VK_KHR_bind_memory2() const; 285 | int support_VK_KHR_buffer_device_address() const; 286 | int support_VK_KHR_create_renderpass2() const; 287 | int support_VK_KHR_cooperative_matrix() const; 288 | int support_VK_KHR_dedicated_allocation() const; 289 | int support_VK_KHR_descriptor_update_template() const; 290 | int support_VK_KHR_external_memory() const; 291 | int support_VK_KHR_get_memory_requirements2() const; 292 | int support_VK_KHR_maintenance1() const; 293 | int support_VK_KHR_maintenance2() const; 294 | int support_VK_KHR_maintenance3() const; 295 | int support_VK_KHR_multiview() const; 296 | int support_VK_KHR_portability_subset() const; 297 | int support_VK_KHR_push_descriptor() const; 298 | int support_VK_KHR_sampler_ycbcr_conversion() const; 299 | int support_VK_KHR_shader_float16_int8() const; 300 | int support_VK_KHR_shader_float_controls() const; 301 | int support_VK_KHR_storage_buffer_storage_class() const; 302 | int support_VK_KHR_swapchain() const; 303 | int support_VK_EXT_buffer_device_address() const; 304 | int support_VK_EXT_descriptor_indexing() const; 305 | int support_VK_EXT_memory_budget() const; 306 | int support_VK_EXT_memory_priority() const; 307 | int support_VK_EXT_queue_family_foreign() const; 308 | int support_VK_AMD_device_coherent_memory() const; 309 | #if __ANDROID_API__ >= 26 310 | int support_VK_ANDROID_external_memory_android_hardware_buffer() const; 311 | #endif // __ANDROID_API__ >= 26 312 | int support_VK_NV_cooperative_matrix() const; 313 | 314 | private: 315 | GpuInfo(const GpuInfo&); 316 | GpuInfo& operator=(const GpuInfo&); 317 | 318 | private: 319 | friend int create_gpu_instance(const char* driver_path); 320 | GpuInfoPrivate* const d; 321 | }; 322 | 323 | NCNN_EXPORT const GpuInfo& get_gpu_info(int device_index = get_default_gpu_index()); 324 | 325 | class VkAllocator; 326 | class VkCompute; 327 | class Option; 328 | class PipelineCache; 329 | class VulkanDevicePrivate; 330 | class NCNN_EXPORT VulkanDevice 331 | { 332 | public: 333 | VulkanDevice(int device_index = get_default_gpu_index()); 334 | ~VulkanDevice(); 335 | 336 | const GpuInfo& info; 337 | 338 | VkDevice vkdevice() const; 339 | 340 | VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size) const; 341 | 342 | // with fixed workgroup size 343 | VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z) const; 344 | 345 | // helper for creating pipeline 346 | int create_descriptorset_layout(int binding_count, const int* binding_types, VkDescriptorSetLayout* descriptorset_layout) const; 347 | int create_pipeline_layout(int push_constant_count, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout* pipeline_layout) const; 348 | int create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector& specializations, VkPipeline* pipeline) const; 349 | int create_descriptor_update_template(int binding_count, const int* binding_types, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout pipeline_layout, VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; 350 | 351 | uint32_t find_memory_index(uint32_t memory_type_bits, VkFlags required, VkFlags preferred, VkFlags preferred_not) const; 352 | bool is_mappable(uint32_t memory_type_index) const; 353 | bool is_coherent(uint32_t memory_type_index) const; 354 | 355 | VkQueue acquire_queue(uint32_t queue_family_index) const; 356 | void reclaim_queue(uint32_t queue_family_index, VkQueue queue) const; 357 | 358 | // allocator on this device 359 | VkAllocator* acquire_blob_allocator() const; 360 | void reclaim_blob_allocator(VkAllocator* allocator) const; 361 | 362 | VkAllocator* acquire_staging_allocator() const; 363 | void reclaim_staging_allocator(VkAllocator* allocator) const; 364 | 365 | // immutable sampler for texelfetch 366 | const VkSampler* immutable_texelfetch_sampler() const; 367 | 368 | // dummy buffer image 369 | VkMat get_dummy_buffer() const; 370 | VkImageMat get_dummy_image() const; 371 | VkImageMat get_dummy_image_readonly() const; 372 | 373 | // pipeline cache on this device 374 | const PipelineCache* get_pipeline_cache() const; 375 | 376 | // test image allocation 377 | bool shape_support_image_storage(const Mat& shape) const; 378 | 379 | // current gpu heap memory budget in MB 380 | uint32_t get_heap_budget() const; 381 | 382 | // utility operator 383 | void convert_packing(const VkMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; 384 | void convert_packing(const VkImageMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; 385 | void convert_packing(const VkMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; 386 | void convert_packing(const VkImageMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; 387 | 388 | // VK_KHR_bind_memory2 389 | PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR; 390 | PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR; 391 | 392 | // VK_KHR_buffer_device_address 393 | PFN_vkGetBufferDeviceAddressKHR vkGetBufferDeviceAddressKHR; 394 | PFN_vkGetBufferOpaqueCaptureAddressKHR vkGetBufferOpaqueCaptureAddressKHR; 395 | PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR vkGetDeviceMemoryOpaqueCaptureAddressKHR; 396 | 397 | // VK_KHR_descriptor_update_template 398 | PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; 399 | PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; 400 | PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; 401 | 402 | // VK_KHR_get_memory_requirements2 403 | PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR; 404 | PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR; 405 | 406 | // VK_KHR_maintenance1 407 | PFN_vkTrimCommandPoolKHR vkTrimCommandPoolKHR; 408 | 409 | // VK_KHR_maintenance3 410 | PFN_vkGetDescriptorSetLayoutSupportKHR vkGetDescriptorSetLayoutSupportKHR; 411 | 412 | // VK_KHR_push_descriptor 413 | PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR; 414 | PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR; 415 | 416 | // VK_KHR_sampler_ycbcr_conversion 417 | PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR; 418 | PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR; 419 | 420 | // VK_KHR_swapchain 421 | PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR; 422 | PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; 423 | PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; 424 | PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; 425 | PFN_vkQueuePresentKHR vkQueuePresentKHR; 426 | 427 | // VK_EXT_buffer_device_address 428 | PFN_vkGetBufferDeviceAddressEXT vkGetBufferDeviceAddressEXT; 429 | 430 | #if __ANDROID_API__ >= 26 431 | // VK_ANDROID_external_memory_android_hardware_buffer 432 | PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID; 433 | PFN_vkGetMemoryAndroidHardwareBufferANDROID vkGetMemoryAndroidHardwareBufferANDROID; 434 | #endif // __ANDROID_API__ >= 26 435 | 436 | protected: 437 | // device extension 438 | int init_device_extension(); 439 | 440 | private: 441 | VulkanDevice(const VulkanDevice&); 442 | VulkanDevice& operator=(const VulkanDevice&); 443 | 444 | private: 445 | VulkanDevicePrivate* const d; 446 | }; 447 | 448 | NCNN_EXPORT VulkanDevice* get_gpu_device(int device_index = get_default_gpu_index()); 449 | 450 | // online spirv compilation 451 | NCNN_EXPORT int compile_spirv_module(const char* comp_string, const Option& opt, std::vector& spirv); 452 | NCNN_EXPORT int compile_spirv_module(const char* comp_data, int comp_data_size, const Option& opt, std::vector& spirv); 453 | NCNN_EXPORT int compile_spirv_module(int shader_type_index, const Option& opt, std::vector& spirv); 454 | 455 | // info from spirv 456 | class NCNN_EXPORT ShaderInfo 457 | { 458 | public: 459 | int specialization_count; 460 | int binding_count; 461 | int push_constant_count; 462 | 463 | // 0 = null 464 | // 1 = storage buffer 465 | // 2 = storage image 466 | // 3 = combined image sampler 467 | int binding_types[16]; // 16 is large enough I think ... 468 | 469 | int reserved_0; 470 | int reserved_1; 471 | int reserved_2; 472 | int reserved_3; 473 | }; 474 | 475 | NCNN_EXPORT int resolve_shader_info(const uint32_t* spv_data, size_t spv_data_size, ShaderInfo& shader_info); 476 | 477 | } // namespace ncnn 478 | 479 | #endif // NCNN_VULKAN 480 | 481 | #endif // NCNN_GPU_H 482 | -------------------------------------------------------------------------------- /include/ncnn/layer.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_LAYER_H 16 | #define NCNN_LAYER_H 17 | 18 | #include "mat.h" 19 | #include "modelbin.h" 20 | #include "option.h" 21 | #include "paramdict.h" 22 | #include "platform.h" 23 | 24 | #if NCNN_VULKAN 25 | #include "command.h" 26 | #include "pipeline.h" 27 | #endif // NCNN_VULKAN 28 | 29 | namespace ncnn { 30 | 31 | class NCNN_EXPORT Layer 32 | { 33 | public: 34 | // empty 35 | Layer(); 36 | // virtual destructor 37 | virtual ~Layer(); 38 | 39 | // load layer specific parameter from parsed dict 40 | // return 0 if success 41 | virtual int load_param(const ParamDict& pd); 42 | 43 | // load layer specific weight data from model binary 44 | // return 0 if success 45 | virtual int load_model(const ModelBin& mb); 46 | 47 | // layer implementation specific setup 48 | // return 0 if success 49 | virtual int create_pipeline(const Option& opt); 50 | 51 | // layer implementation specific clean 52 | // return 0 if success 53 | virtual int destroy_pipeline(const Option& opt); 54 | 55 | public: 56 | // one input and one output blob 57 | bool one_blob_only; 58 | 59 | // support inplace inference 60 | bool support_inplace; 61 | 62 | // support vulkan compute 63 | bool support_vulkan; 64 | 65 | // accept input blob with packed storage 66 | bool support_packing; 67 | 68 | // accept bf16 69 | bool support_bf16_storage; 70 | 71 | // accept fp16 72 | bool support_fp16_storage; 73 | 74 | // accept int8 75 | bool support_int8_storage; 76 | 77 | // shader image storage 78 | bool support_image_storage; 79 | 80 | // shader tensor storage 81 | bool support_tensor_storage; 82 | 83 | bool support_reserved_00; 84 | 85 | bool support_reserved_0; 86 | bool support_reserved_1; 87 | bool support_reserved_2; 88 | bool support_reserved_3; 89 | bool support_reserved_4; 90 | bool support_reserved_5; 91 | bool support_reserved_6; 92 | bool support_reserved_7; 93 | bool support_reserved_8; 94 | bool support_reserved_9; 95 | 96 | // feature disabled set 97 | int featmask; 98 | 99 | public: 100 | // implement inference 101 | // return 0 if success 102 | virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; 103 | virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; 104 | 105 | // implement inplace inference 106 | // return 0 if success 107 | virtual int forward_inplace(std::vector& bottom_top_blobs, const Option& opt) const; 108 | virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; 109 | 110 | #if NCNN_VULKAN 111 | public: 112 | // upload weight blob from host to device 113 | virtual int upload_model(VkTransfer& cmd, const Option& opt); 114 | 115 | public: 116 | // implement inference 117 | // return 0 if success 118 | virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt) const; 119 | virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const; 120 | 121 | // implement inference 122 | // return 0 if success 123 | virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt) const; 124 | virtual int forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const; 125 | 126 | // implement inplace inference 127 | // return 0 if success 128 | virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt) const; 129 | virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; 130 | 131 | // implement inplace inference 132 | // return 0 if success 133 | virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt) const; 134 | virtual int forward_inplace(VkImageMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; 135 | 136 | public: 137 | // assigned immediately after creating this layer 138 | const VulkanDevice* vkdev; 139 | #endif // NCNN_VULKAN 140 | 141 | public: 142 | // custom user data 143 | void* userdata; 144 | // layer type index 145 | int typeindex; 146 | #if NCNN_STRING 147 | // layer type name 148 | std::string type; 149 | // layer name 150 | std::string name; 151 | #endif // NCNN_STRING 152 | // blob index which this layer needs as input 153 | std::vector bottoms; 154 | // blob index which this layer produces as output 155 | std::vector tops; 156 | // shape hint 157 | std::vector bottom_shapes; 158 | std::vector top_shapes; 159 | }; 160 | 161 | // layer factory function 162 | typedef Layer* (*layer_creator_func)(void*); 163 | typedef void (*layer_destroyer_func)(Layer*, void*); 164 | 165 | struct layer_registry_entry 166 | { 167 | #if NCNN_STRING 168 | // layer type name 169 | const char* name; 170 | #endif // NCNN_STRING 171 | // layer factory entry 172 | layer_creator_func creator; 173 | }; 174 | 175 | struct custom_layer_registry_entry 176 | { 177 | #if NCNN_STRING 178 | // layer type name 179 | const char* name; 180 | #endif // NCNN_STRING 181 | // layer factory entry 182 | layer_creator_func creator; 183 | layer_destroyer_func destroyer; 184 | void* userdata; 185 | }; 186 | 187 | struct overwrite_builtin_layer_registry_entry 188 | { 189 | // layer type index 190 | int typeindex; 191 | // layer factory entry 192 | layer_creator_func creator; 193 | layer_destroyer_func destroyer; 194 | void* userdata; 195 | }; 196 | 197 | #if NCNN_STRING 198 | // get layer type from type name 199 | NCNN_EXPORT int layer_to_index(const char* type); 200 | // create layer from type name 201 | NCNN_EXPORT Layer* create_layer(const char* type); 202 | NCNN_EXPORT Layer* create_layer_naive(const char* type); 203 | NCNN_EXPORT Layer* create_layer_cpu(const char* type); 204 | #if NCNN_VULKAN 205 | NCNN_EXPORT Layer* create_layer_vulkan(const char* type); 206 | #endif // NCNN_VULKAN 207 | #endif // NCNN_STRING 208 | // create layer from layer type 209 | NCNN_EXPORT Layer* create_layer(int index); 210 | NCNN_EXPORT Layer* create_layer_naive(int index); 211 | NCNN_EXPORT Layer* create_layer_cpu(int index); 212 | #if NCNN_VULKAN 213 | NCNN_EXPORT Layer* create_layer_vulkan(int index); 214 | #endif // NCNN_VULKAN 215 | 216 | #define DEFINE_LAYER_CREATOR(name) \ 217 | ::ncnn::Layer* name##_layer_creator(void* /*userdata*/) \ 218 | { \ 219 | return new name; \ 220 | } 221 | 222 | #define DEFINE_LAYER_DESTROYER(name) \ 223 | void name##_layer_destroyer(::ncnn::Layer* layer, void* /*userdata*/) \ 224 | { \ 225 | delete layer; \ 226 | } 227 | 228 | } // namespace ncnn 229 | 230 | #endif // NCNN_LAYER_H 231 | -------------------------------------------------------------------------------- /include/ncnn/layer_shader_type.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_LAYER_SHADER_TYPE_H 16 | #define NCNN_LAYER_SHADER_TYPE_H 17 | 18 | namespace ncnn { 19 | 20 | namespace LayerShaderType { 21 | enum LayerShaderType 22 | { 23 | #include "layer_shader_type_enum.h" 24 | }; 25 | } // namespace LayerShaderType 26 | 27 | } // namespace ncnn 28 | 29 | #endif // NCNN_LAYER_SHADER_TYPE_H 30 | -------------------------------------------------------------------------------- /include/ncnn/layer_shader_type_enum.h: -------------------------------------------------------------------------------- 1 | // Layer Shader Enum header 2 | // 3 | // This file is auto-generated by cmake, don't edit it. 4 | 5 | 6 | -------------------------------------------------------------------------------- /include/ncnn/layer_type.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_LAYER_TYPE_H 16 | #define NCNN_LAYER_TYPE_H 17 | 18 | namespace ncnn { 19 | 20 | namespace LayerType { 21 | enum LayerType 22 | { 23 | #include "layer_type_enum.h" 24 | CustomBit = (1 << 8), 25 | }; 26 | } // namespace LayerType 27 | 28 | } // namespace ncnn 29 | 30 | #endif // NCNN_LAYER_TYPE_H 31 | -------------------------------------------------------------------------------- /include/ncnn/layer_type_enum.h: -------------------------------------------------------------------------------- 1 | // Layer Type Enum header 2 | // 3 | // This file is auto-generated by cmake, don't edit it. 4 | 5 | AbsVal = 0, 6 | ArgMax = 1, 7 | BatchNorm = 2, 8 | Bias = 3, 9 | BNLL = 4, 10 | Concat = 5, 11 | Convolution = 6, 12 | Crop = 7, 13 | Deconvolution = 8, 14 | Dropout = 9, 15 | Eltwise = 10, 16 | ELU = 11, 17 | Embed = 12, 18 | Exp = 13, 19 | Flatten = 14, 20 | InnerProduct = 15, 21 | Input = 16, 22 | Log = 17, 23 | LRN = 18, 24 | MemoryData = 19, 25 | MVN = 20, 26 | Pooling = 21, 27 | Power = 22, 28 | PReLU = 23, 29 | Proposal = 24, 30 | Reduction = 25, 31 | ReLU = 26, 32 | Reshape = 27, 33 | ROIPooling = 28, 34 | Scale = 29, 35 | Sigmoid = 30, 36 | Slice = 31, 37 | Softmax = 32, 38 | Split = 33, 39 | SPP = 34, 40 | TanH = 35, 41 | Threshold = 36, 42 | Tile = 37, 43 | RNN = 38, 44 | LSTM = 39, 45 | BinaryOp = 40, 46 | UnaryOp = 41, 47 | ConvolutionDepthWise = 42, 48 | Padding = 43, 49 | Squeeze = 44, 50 | ExpandDims = 45, 51 | Normalize = 46, 52 | Permute = 47, 53 | PriorBox = 48, 54 | DetectionOutput = 49, 55 | Interp = 50, 56 | DeconvolutionDepthWise = 51, 57 | ShuffleChannel = 52, 58 | InstanceNorm = 53, 59 | Clip = 54, 60 | Reorg = 55, 61 | YoloDetectionOutput = 56, 62 | Quantize = 57, 63 | Dequantize = 58, 64 | Yolov3DetectionOutput = 59, 65 | PSROIPooling = 60, 66 | ROIAlign = 61, 67 | Packing = 62, 68 | Requantize = 63, 69 | Cast = 64, 70 | HardSigmoid = 65, 71 | SELU = 66, 72 | HardSwish = 67, 73 | Noop = 68, 74 | PixelShuffle = 69, 75 | DeepCopy = 70, 76 | Mish = 71, 77 | StatisticsPooling = 72, 78 | Swish = 73, 79 | Gemm = 74, 80 | GroupNorm = 75, 81 | LayerNorm = 76, 82 | Softplus = 77, 83 | GRU = 78, 84 | MultiHeadAttention = 79, 85 | GELU = 80, 86 | Convolution1D = 81, 87 | Pooling1D = 82, 88 | ConvolutionDepthWise1D = 83, 89 | Convolution3D = 84, 90 | ConvolutionDepthWise3D = 85, 91 | Pooling3D = 86, 92 | MatMul = 87, 93 | Deconvolution1D = 88, 94 | DeconvolutionDepthWise1D = 89, 95 | Deconvolution3D = 90, 96 | DeconvolutionDepthWise3D = 91, 97 | Einsum = 92, 98 | DeformableConv2D = 93, 99 | GLU = 94, 100 | Fold = 95, 101 | Unfold = 96, 102 | GridSample = 97, 103 | CumulativeSum = 98, 104 | CopyTo = 99, 105 | Erf = 100, 106 | Diag = 101, 107 | CELU = 102, 108 | Shrink = 103, 109 | RMSNorm = 104, 110 | 111 | -------------------------------------------------------------------------------- /include/ncnn/modelbin.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_MODELBIN_H 16 | #define NCNN_MODELBIN_H 17 | 18 | #include "mat.h" 19 | 20 | namespace ncnn { 21 | 22 | class DataReader; 23 | class NCNN_EXPORT ModelBin 24 | { 25 | public: 26 | ModelBin(); 27 | virtual ~ModelBin(); 28 | // element type 29 | // 0 = auto 30 | // 1 = float32 31 | // 2 = float16 32 | // 3 = int8 33 | // load vec 34 | virtual Mat load(int w, int type) const; 35 | // load image 36 | virtual Mat load(int w, int h, int type) const; 37 | // load dim 38 | virtual Mat load(int w, int h, int c, int type) const; 39 | // load cube 40 | virtual Mat load(int w, int h, int d, int c, int type) const; 41 | }; 42 | 43 | class ModelBinFromDataReaderPrivate; 44 | class NCNN_EXPORT ModelBinFromDataReader : public ModelBin 45 | { 46 | public: 47 | explicit ModelBinFromDataReader(const DataReader& dr); 48 | virtual ~ModelBinFromDataReader(); 49 | 50 | virtual Mat load(int w, int type) const; 51 | 52 | private: 53 | ModelBinFromDataReader(const ModelBinFromDataReader&); 54 | ModelBinFromDataReader& operator=(const ModelBinFromDataReader&); 55 | 56 | private: 57 | ModelBinFromDataReaderPrivate* const d; 58 | }; 59 | 60 | class ModelBinFromMatArrayPrivate; 61 | class NCNN_EXPORT ModelBinFromMatArray : public ModelBin 62 | { 63 | public: 64 | // construct from weight blob array 65 | explicit ModelBinFromMatArray(const Mat* weights); 66 | virtual ~ModelBinFromMatArray(); 67 | 68 | virtual Mat load(int w, int type) const; 69 | 70 | private: 71 | ModelBinFromMatArray(const ModelBinFromMatArray&); 72 | ModelBinFromMatArray& operator=(const ModelBinFromMatArray&); 73 | 74 | private: 75 | ModelBinFromMatArrayPrivate* const d; 76 | }; 77 | 78 | } // namespace ncnn 79 | 80 | #endif // NCNN_MODELBIN_H 81 | -------------------------------------------------------------------------------- /include/ncnn/ncnn_export.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef NCNN_EXPORT_H 3 | #define NCNN_EXPORT_H 4 | 5 | #ifdef NCNN_STATIC_DEFINE 6 | # define NCNN_EXPORT 7 | # define NCNN_NO_EXPORT 8 | #else 9 | # ifndef NCNN_EXPORT 10 | # ifdef ncnn_EXPORTS 11 | /* We are building this library */ 12 | # define NCNN_EXPORT 13 | # else 14 | /* We are using this library */ 15 | # define NCNN_EXPORT 16 | # endif 17 | # endif 18 | 19 | # ifndef NCNN_NO_EXPORT 20 | # define NCNN_NO_EXPORT 21 | # endif 22 | #endif 23 | 24 | #ifndef NCNN_DEPRECATED 25 | # define NCNN_DEPRECATED __attribute__ ((__deprecated__)) 26 | #endif 27 | 28 | #ifndef NCNN_DEPRECATED_EXPORT 29 | # define NCNN_DEPRECATED_EXPORT NCNN_EXPORT NCNN_DEPRECATED 30 | #endif 31 | 32 | #ifndef NCNN_DEPRECATED_NO_EXPORT 33 | # define NCNN_DEPRECATED_NO_EXPORT NCNN_NO_EXPORT NCNN_DEPRECATED 34 | #endif 35 | 36 | #if 0 /* DEFINE_NO_DEPRECATED */ 37 | # ifndef NCNN_NO_DEPRECATED 38 | # define NCNN_NO_DEPRECATED 39 | # endif 40 | #endif 41 | 42 | #endif /* NCNN_EXPORT_H */ 43 | -------------------------------------------------------------------------------- /include/ncnn/net.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_NET_H 16 | #define NCNN_NET_H 17 | 18 | #include "blob.h" 19 | #include "layer.h" 20 | #include "mat.h" 21 | #include "option.h" 22 | #include "platform.h" 23 | 24 | #if NCNN_PLATFORM_API 25 | #if __ANDROID_API__ >= 9 26 | #include 27 | #endif // __ANDROID_API__ >= 9 28 | #endif // NCNN_PLATFORM_API 29 | 30 | namespace ncnn { 31 | 32 | #if NCNN_VULKAN 33 | class VkCompute; 34 | #endif // NCNN_VULKAN 35 | class DataReader; 36 | class Extractor; 37 | class NetPrivate; 38 | class NCNN_EXPORT Net 39 | { 40 | public: 41 | // empty init 42 | Net(); 43 | // clear and destroy 44 | virtual ~Net(); 45 | 46 | public: 47 | // option can be changed before loading 48 | Option opt; 49 | 50 | #if NCNN_VULKAN 51 | // set gpu device by index 52 | void set_vulkan_device(int device_index); 53 | 54 | // set gpu device by device handle, no owner transfer 55 | void set_vulkan_device(const VulkanDevice* vkdev); 56 | 57 | const VulkanDevice* vulkan_device() const; 58 | #endif // NCNN_VULKAN 59 | 60 | #if NCNN_STRING 61 | // register custom layer or overwrite built-in layer by layer type name 62 | // return 0 if success 63 | int register_custom_layer(const char* type, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); 64 | virtual int custom_layer_to_index(const char* type); 65 | #endif // NCNN_STRING 66 | // register custom layer or overwrite built-in layer by layer type 67 | // return 0 if success 68 | int register_custom_layer(int index, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); 69 | 70 | #if NCNN_STRING 71 | int load_param(const DataReader& dr); 72 | #endif // NCNN_STRING 73 | 74 | int load_param_bin(const DataReader& dr); 75 | 76 | int load_model(const DataReader& dr); 77 | 78 | #if NCNN_STDIO 79 | #if NCNN_STRING 80 | // load network structure from plain param file 81 | // return 0 if success 82 | int load_param(FILE* fp); 83 | int load_param(const char* protopath); 84 | int load_param_mem(const char* mem); 85 | #endif // NCNN_STRING 86 | // load network structure from binary param file 87 | // return 0 if success 88 | int load_param_bin(FILE* fp); 89 | int load_param_bin(const char* protopath); 90 | 91 | // load network weight data from model file 92 | // return 0 if success 93 | int load_model(FILE* fp); 94 | int load_model(const char* modelpath); 95 | #endif // NCNN_STDIO 96 | 97 | // load network structure from external memory 98 | // memory pointer must be 32-bit aligned 99 | // return bytes consumed 100 | int load_param(const unsigned char* mem); 101 | 102 | // reference network weight data from external memory 103 | // weight data is not copied but referenced 104 | // so external memory should be retained when used 105 | // memory pointer must be 32-bit aligned 106 | // return bytes consumed 107 | int load_model(const unsigned char* mem); 108 | 109 | #if NCNN_PLATFORM_API 110 | #if __ANDROID_API__ >= 9 111 | #if NCNN_STRING 112 | // convenient load network structure from android asset plain param file 113 | int load_param(AAsset* asset); 114 | int load_param(AAssetManager* mgr, const char* assetpath); 115 | #endif // NCNN_STRING 116 | // convenient load network structure from android asset binary param file 117 | int load_param_bin(AAsset* asset); 118 | int load_param_bin(AAssetManager* mgr, const char* assetpath); 119 | 120 | // convenient load network weight data from android asset model file 121 | int load_model(AAsset* asset); 122 | int load_model(AAssetManager* mgr, const char* assetpath); 123 | #endif // __ANDROID_API__ >= 9 124 | #endif // NCNN_PLATFORM_API 125 | 126 | // unload network structure and weight data 127 | void clear(); 128 | 129 | // construct an Extractor from network 130 | Extractor create_extractor() const; 131 | 132 | // get input/output indexes/names 133 | const std::vector& input_indexes() const; 134 | const std::vector& output_indexes() const; 135 | #if NCNN_STRING 136 | const std::vector& input_names() const; 137 | const std::vector& output_names() const; 138 | #endif 139 | 140 | const std::vector& blobs() const; 141 | const std::vector& layers() const; 142 | 143 | std::vector& mutable_blobs(); 144 | std::vector& mutable_layers(); 145 | 146 | protected: 147 | friend class Extractor; 148 | #if NCNN_STRING 149 | int find_blob_index_by_name(const char* name) const; 150 | int find_layer_index_by_name(const char* name) const; 151 | virtual Layer* create_custom_layer(const char* type); 152 | virtual Layer* create_overwrite_builtin_layer(const char* type); 153 | #endif // NCNN_STRING 154 | virtual Layer* create_custom_layer(int index); 155 | virtual Layer* create_overwrite_builtin_layer(int typeindex); 156 | 157 | private: 158 | Net(const Net&); 159 | Net& operator=(const Net&); 160 | 161 | private: 162 | NetPrivate* const d; 163 | }; 164 | 165 | class ExtractorPrivate; 166 | class NCNN_EXPORT Extractor 167 | { 168 | public: 169 | virtual ~Extractor(); 170 | 171 | // copy 172 | Extractor(const Extractor&); 173 | 174 | // assign 175 | Extractor& operator=(const Extractor&); 176 | 177 | // clear blob mats and alloctors 178 | void clear(); 179 | 180 | // enable light mode 181 | // intermediate blob will be recycled when enabled 182 | // enabled by default 183 | void set_light_mode(bool enable); 184 | 185 | // deprecated, no-op 186 | // instead, set net.opt.num_threads before net.load_param() 187 | void set_num_threads(int num_threads); 188 | 189 | // set blob memory allocator 190 | void set_blob_allocator(Allocator* allocator); 191 | 192 | // set workspace memory allocator 193 | void set_workspace_allocator(Allocator* allocator); 194 | 195 | #if NCNN_VULKAN 196 | // deprecated, no-op 197 | // instead, set net.opt.use_vulkan_compute before net.load_param() 198 | void set_vulkan_compute(bool enable); 199 | 200 | void set_blob_vkallocator(VkAllocator* allocator); 201 | 202 | void set_workspace_vkallocator(VkAllocator* allocator); 203 | 204 | void set_staging_vkallocator(VkAllocator* allocator); 205 | #endif // NCNN_VULKAN 206 | 207 | #if NCNN_STRING 208 | // set input by blob name 209 | // return 0 if success 210 | int input(const char* blob_name, const Mat& in); 211 | 212 | // get result by blob name 213 | // return 0 if success 214 | // type = 0, default 215 | // type = 1, do not convert fp16/bf16 or / and packing 216 | int extract(const char* blob_name, Mat& feat, int type = 0); 217 | #endif // NCNN_STRING 218 | 219 | // set input by blob index 220 | // return 0 if success 221 | int input(int blob_index, const Mat& in); 222 | 223 | // get result by blob index 224 | // return 0 if success 225 | // type = 0, default 226 | // type = 1, do not convert fp16/bf16 or / and packing 227 | int extract(int blob_index, Mat& feat, int type = 0); 228 | 229 | #if NCNN_VULKAN 230 | #if NCNN_STRING 231 | // set input by blob name 232 | // return 0 if success 233 | int input(const char* blob_name, const VkMat& in); 234 | 235 | // get result by blob name 236 | // return 0 if success 237 | int extract(const char* blob_name, VkMat& feat, VkCompute& cmd); 238 | 239 | // set input by blob name 240 | // return 0 if success 241 | int input(const char* blob_name, const VkImageMat& in); 242 | 243 | // get result by blob name 244 | // return 0 if success 245 | int extract(const char* blob_name, VkImageMat& feat, VkCompute& cmd); 246 | #endif // NCNN_STRING 247 | 248 | // set input by blob index 249 | // return 0 if success 250 | int input(int blob_index, const VkMat& in); 251 | 252 | // get result by blob index 253 | // return 0 if success 254 | int extract(int blob_index, VkMat& feat, VkCompute& cmd); 255 | 256 | // set input by blob index 257 | // return 0 if success 258 | int input(int blob_index, const VkImageMat& in); 259 | 260 | // get result by blob index 261 | // return 0 if success 262 | int extract(int blob_index, VkImageMat& feat, VkCompute& cmd); 263 | #endif // NCNN_VULKAN 264 | 265 | protected: 266 | friend Extractor Net::create_extractor() const; 267 | Extractor(const Net* net, size_t blob_count); 268 | 269 | private: 270 | ExtractorPrivate* const d; 271 | }; 272 | 273 | } // namespace ncnn 274 | 275 | #endif // NCNN_NET_H 276 | -------------------------------------------------------------------------------- /include/ncnn/option.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_OPTION_H 16 | #define NCNN_OPTION_H 17 | 18 | #include "platform.h" 19 | 20 | namespace ncnn { 21 | 22 | #if NCNN_VULKAN 23 | class VkAllocator; 24 | class PipelineCache; 25 | #endif // NCNN_VULKAN 26 | 27 | class Allocator; 28 | class NCNN_EXPORT Option 29 | { 30 | public: 31 | // default option 32 | Option(); 33 | 34 | public: 35 | // light mode 36 | // intermediate blob will be recycled when enabled 37 | // enabled by default 38 | bool lightmode; 39 | 40 | // thread count 41 | // default value is the one returned by get_cpu_count() 42 | int num_threads; 43 | 44 | // blob memory allocator 45 | Allocator* blob_allocator; 46 | 47 | // workspace memory allocator 48 | Allocator* workspace_allocator; 49 | 50 | #if NCNN_VULKAN 51 | // blob memory allocator 52 | VkAllocator* blob_vkallocator; 53 | 54 | // workspace memory allocator 55 | VkAllocator* workspace_vkallocator; 56 | 57 | // staging memory allocator 58 | VkAllocator* staging_vkallocator; 59 | 60 | // pipeline cache 61 | PipelineCache* pipeline_cache; 62 | #endif // NCNN_VULKAN 63 | 64 | // the time openmp threads busy-wait for more work before going to sleep 65 | // default value is 20ms to keep the cores enabled 66 | // without too much extra power consumption afterwards 67 | int openmp_blocktime; 68 | 69 | // enable winograd convolution optimization 70 | // improve convolution 3x3 stride1 performance, may consume more memory 71 | // changes should be applied before loading network structure and weight 72 | // enabled by default 73 | bool use_winograd_convolution; 74 | 75 | // enable sgemm convolution optimization 76 | // improve convolution 1x1 stride1 performance, may consume more memory 77 | // changes should be applied before loading network structure and weight 78 | // enabled by default 79 | bool use_sgemm_convolution; 80 | 81 | // enable quantized int8 inference 82 | // use low-precision int8 path for quantized model 83 | // changes should be applied before loading network structure and weight 84 | // enabled by default 85 | bool use_int8_inference; 86 | 87 | // enable vulkan compute 88 | bool use_vulkan_compute; 89 | 90 | // enable bf16 data type for storage 91 | // improve most operator performance on all arm devices, may consume more memory 92 | bool use_bf16_storage; 93 | 94 | // enable options for gpu inference 95 | bool use_fp16_packed; 96 | bool use_fp16_storage; 97 | bool use_fp16_arithmetic; 98 | bool use_int8_packed; 99 | bool use_int8_storage; 100 | bool use_int8_arithmetic; 101 | 102 | // enable simd-friendly packed memory layout 103 | // improve all operator performance on all arm devices, will consume more memory 104 | // changes should be applied before loading network structure and weight 105 | // enabled by default 106 | bool use_packing_layout; 107 | 108 | bool use_shader_pack8; 109 | 110 | // subgroup option 111 | bool use_subgroup_basic; 112 | bool use_subgroup_vote; 113 | bool use_subgroup_ballot; 114 | bool use_subgroup_shuffle; 115 | 116 | // turn on for adreno 117 | bool use_image_storage; 118 | bool use_tensor_storage; 119 | 120 | bool use_reserved_0; 121 | 122 | // enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero) 123 | // default value is 3 124 | // 0 = DAZ OFF, FTZ OFF 125 | // 1 = DAZ ON , FTZ OFF 126 | // 2 = DAZ OFF, FTZ ON 127 | // 3 = DAZ ON, FTZ ON 128 | int flush_denormals; 129 | 130 | bool use_local_pool_allocator; 131 | 132 | // enable local memory optimization for gpu inference 133 | bool use_shader_local_memory; 134 | 135 | // enable cooperative matrix optimization for gpu inference 136 | bool use_cooperative_matrix; 137 | 138 | // more fine-grained control of winograd convolution 139 | bool use_winograd23_convolution; 140 | bool use_winograd43_convolution; 141 | bool use_winograd63_convolution; 142 | 143 | // this option is turned on for A53/A55 automatically 144 | // but you can force this on/off if you wish 145 | bool use_a53_a55_optimized_kernel; 146 | 147 | // enable options for shared variables in gpu shader 148 | bool use_fp16_uniform; 149 | bool use_int8_uniform; 150 | 151 | bool use_reserved_9; 152 | bool use_reserved_10; 153 | bool use_reserved_11; 154 | }; 155 | 156 | } // namespace ncnn 157 | 158 | #endif // NCNN_OPTION_H 159 | -------------------------------------------------------------------------------- /include/ncnn/paramdict.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_PARAMDICT_H 16 | #define NCNN_PARAMDICT_H 17 | 18 | #include "mat.h" 19 | 20 | // at most 32 parameters 21 | #define NCNN_MAX_PARAM_COUNT 32 22 | 23 | namespace ncnn { 24 | 25 | class DataReader; 26 | class Net; 27 | class ParamDictPrivate; 28 | class NCNN_EXPORT ParamDict 29 | { 30 | public: 31 | // empty 32 | ParamDict(); 33 | 34 | virtual ~ParamDict(); 35 | 36 | // copy 37 | ParamDict(const ParamDict&); 38 | 39 | // assign 40 | ParamDict& operator=(const ParamDict&); 41 | 42 | // get type 43 | int type(int id) const; 44 | 45 | // get int 46 | int get(int id, int def) const; 47 | // get float 48 | float get(int id, float def) const; 49 | // get array 50 | Mat get(int id, const Mat& def) const; 51 | 52 | // set int 53 | void set(int id, int i); 54 | // set float 55 | void set(int id, float f); 56 | // set array 57 | void set(int id, const Mat& v); 58 | 59 | protected: 60 | friend class Net; 61 | 62 | void clear(); 63 | 64 | int load_param(const DataReader& dr); 65 | int load_param_bin(const DataReader& dr); 66 | 67 | private: 68 | ParamDictPrivate* const d; 69 | }; 70 | 71 | } // namespace ncnn 72 | 73 | #endif // NCNN_PARAMDICT_H 74 | -------------------------------------------------------------------------------- /include/ncnn/pipeline.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_PIPELINE_H 16 | #define NCNN_PIPELINE_H 17 | 18 | #include "mat.h" 19 | #include "platform.h" 20 | #if NCNN_VULKAN 21 | #include "gpu.h" 22 | #endif // NCNN_VULKAN 23 | 24 | namespace ncnn { 25 | 26 | #if NCNN_VULKAN 27 | class Option; 28 | class PipelinePrivate; 29 | class NCNN_EXPORT Pipeline 30 | { 31 | public: 32 | explicit Pipeline(const VulkanDevice* vkdev); 33 | virtual ~Pipeline(); 34 | 35 | public: 36 | void set_optimal_local_size_xyz(int w = 4, int h = 4, int c = 4); 37 | void set_optimal_local_size_xyz(const Mat& local_size_xyz); 38 | void set_local_size_xyz(int w, int h, int c); 39 | 40 | int create(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations); 41 | 42 | int create(int shader_type_index, const Option& opt, const std::vector& specializations); 43 | 44 | public: 45 | VkShaderModule shader_module() const; 46 | VkDescriptorSetLayout descriptorset_layout() const; 47 | VkPipelineLayout pipeline_layout() const; 48 | VkPipeline pipeline() const; 49 | VkDescriptorUpdateTemplateKHR descriptor_update_template() const; 50 | 51 | const ShaderInfo& shader_info() const; 52 | 53 | uint32_t local_size_x() const; 54 | uint32_t local_size_y() const; 55 | uint32_t local_size_z() const; 56 | 57 | protected: 58 | void set_shader_module(VkShaderModule shader_module); 59 | void set_descriptorset_layout(VkDescriptorSetLayout descriptorset_layout); 60 | void set_pipeline_layout(VkPipelineLayout pipeline_layout); 61 | void set_pipeline(VkPipeline pipeline); 62 | void set_descriptor_update_template(VkDescriptorUpdateTemplateKHR descriptor_update_template); 63 | 64 | void set_shader_info(const ShaderInfo& shader_info); 65 | 66 | public: 67 | const VulkanDevice* vkdev; 68 | 69 | private: 70 | Pipeline(const Pipeline&); 71 | Pipeline& operator=(const Pipeline&); 72 | 73 | private: 74 | PipelinePrivate* const d; 75 | }; 76 | 77 | #if NCNN_PLATFORM_API 78 | #if __ANDROID_API__ >= 26 79 | class VkCompute; 80 | class NCNN_EXPORT ImportAndroidHardwareBufferPipeline : private Pipeline 81 | { 82 | public: 83 | explicit ImportAndroidHardwareBufferPipeline(const VulkanDevice* vkdev); 84 | virtual ~ImportAndroidHardwareBufferPipeline(); 85 | 86 | int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, const Option& opt); 87 | int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, int target_width, int target_height, const Option& opt); 88 | void destroy(); 89 | 90 | friend class VkCompute; 91 | 92 | protected: 93 | int create_shader_module(const Option& opt); 94 | int create_sampler(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator); 95 | int create_descriptorset_layout(); 96 | 97 | public: 98 | int type_to; 99 | int rotate_from; 100 | bool need_resize; 101 | 102 | VkSampler sampler; 103 | }; 104 | #endif // __ANDROID_API__ >= 26 105 | #endif // NCNN_PLATFORM_API 106 | 107 | #endif // NCNN_VULKAN 108 | 109 | } // namespace ncnn 110 | 111 | #endif // NCNN_PIPELINE_H 112 | -------------------------------------------------------------------------------- /include/ncnn/pipelinecache.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_PIPELINECACHE_H 16 | #define NCNN_PIPELINECACHE_H 17 | 18 | #include "platform.h" 19 | 20 | #include "mat.h" 21 | #include "gpu.h" 22 | 23 | namespace ncnn { 24 | 25 | #if NCNN_VULKAN 26 | 27 | class VulkanDevice; 28 | class PipelineCachePrivate; 29 | class NCNN_EXPORT PipelineCache 30 | { 31 | public: 32 | explicit PipelineCache(const VulkanDevice* _vkdev); 33 | 34 | virtual ~PipelineCache(); 35 | 36 | void clear(); 37 | 38 | int get_pipeline(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations, 39 | uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, 40 | VkShaderModule* shader_module, 41 | VkDescriptorSetLayout* descriptorset_layout, 42 | VkPipelineLayout* pipeline_layout, 43 | VkPipeline* pipeline, 44 | VkDescriptorUpdateTemplateKHR* descriptor_update_template, 45 | ShaderInfo& shader_info) const; 46 | 47 | int get_pipeline(int shader_type_index, const Option& opt, const std::vector& specializations, 48 | uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, 49 | VkShaderModule* shader_module, 50 | VkDescriptorSetLayout* descriptorset_layout, 51 | VkPipelineLayout* pipeline_layout, 52 | VkPipeline* pipeline, 53 | VkDescriptorUpdateTemplateKHR* descriptor_update_template, 54 | ShaderInfo& shader_info) const; 55 | 56 | protected: 57 | int create_shader_module(int shader_type_index, const Option& opt, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, 58 | VkShaderModule* _shader_module, ShaderInfo& si) const; 59 | 60 | int new_pipeline(VkShaderModule shader_module, const ShaderInfo& shader_info, const std::vector& specializations, 61 | VkDescriptorSetLayout* descriptorset_layout, 62 | VkPipelineLayout* pipeline_layout, 63 | VkPipeline* pipeline, 64 | VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; 65 | 66 | protected: 67 | const VulkanDevice* vkdev; 68 | 69 | private: 70 | PipelineCache(const PipelineCache&); 71 | PipelineCache& operator=(const PipelineCache&); 72 | 73 | private: 74 | PipelineCachePrivate* const d; 75 | }; 76 | 77 | #endif // NCNN_VULKAN 78 | 79 | } // namespace ncnn 80 | 81 | #endif // NCNN_PIPELINECACHE_H 82 | -------------------------------------------------------------------------------- /include/ncnn/platform.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_PLATFORM_H 16 | #define NCNN_PLATFORM_H 17 | 18 | #define NCNN_STDIO 1 19 | #define NCNN_STRING 1 20 | #define NCNN_SIMPLEOCV 0 21 | #define NCNN_SIMPLEOMP 0 22 | #define NCNN_SIMPLESTL 0 23 | #define NCNN_SIMPLEMATH 0 24 | #define NCNN_THREADS 1 25 | #define NCNN_BENCHMARK 0 26 | #define NCNN_C_API 1 27 | #define NCNN_PLATFORM_API 1 28 | #define NCNN_PIXEL 1 29 | #define NCNN_PIXEL_ROTATE 1 30 | #define NCNN_PIXEL_AFFINE 1 31 | #define NCNN_PIXEL_DRAWING 1 32 | #define NCNN_VULKAN 0 33 | #define NCNN_SIMPLEVK 1 34 | #define NCNN_SYSTEM_GLSLANG 0 35 | #define NCNN_RUNTIME_CPU 1 36 | #define NCNN_GNU_INLINE_ASM 1 37 | #define NCNN_AVX 1 38 | #define NCNN_XOP 1 39 | #define NCNN_FMA 1 40 | #define NCNN_F16C 1 41 | #define NCNN_AVX2 1 42 | #define NCNN_AVXVNNI 1 43 | #define NCNN_AVX512 1 44 | #define NCNN_AVX512VNNI 1 45 | #define NCNN_AVX512BF16 0 46 | #define NCNN_AVX512FP16 0 47 | #define NCNN_VFPV4 0 48 | #define NCNN_ARM82 0 49 | #define NCNN_ARM82DOT 0 50 | #define NCNN_ARM82FP16FML 0 51 | #define NCNN_ARM84BF16 0 52 | #define NCNN_ARM84I8MM 0 53 | #define NCNN_ARM86SVE 0 54 | #define NCNN_ARM86SVE2 0 55 | #define NCNN_ARM86SVEBF16 0 56 | #define NCNN_ARM86SVEI8MM 0 57 | #define NCNN_ARM86SVEF32MM 0 58 | #define NCNN_MSA 0 59 | #define NCNN_LSX 0 60 | #define NCNN_MMI 0 61 | #define NCNN_RVV 0 62 | #define NCNN_INT8 1 63 | #define NCNN_BF16 1 64 | #define NCNN_FORCE_INLINE 1 65 | 66 | #define NCNN_VERSION_STRING "1.0.20240926" 67 | 68 | #include "ncnn_export.h" 69 | 70 | #ifdef __cplusplus 71 | 72 | #if NCNN_THREADS 73 | #if defined _WIN32 74 | #define WIN32_LEAN_AND_MEAN 75 | #include 76 | #include 77 | #else 78 | #include 79 | #endif 80 | #endif // NCNN_THREADS 81 | 82 | #if __ANDROID_API__ >= 26 83 | #define VK_USE_PLATFORM_ANDROID_KHR 84 | #endif // __ANDROID_API__ >= 26 85 | 86 | namespace ncnn { 87 | 88 | #if NCNN_THREADS 89 | #if defined _WIN32 90 | class NCNN_EXPORT Mutex 91 | { 92 | public: 93 | Mutex() { InitializeSRWLock(&srwlock); } 94 | ~Mutex() {} 95 | void lock() { AcquireSRWLockExclusive(&srwlock); } 96 | void unlock() { ReleaseSRWLockExclusive(&srwlock); } 97 | private: 98 | friend class ConditionVariable; 99 | // NOTE SRWLock is available from windows vista 100 | SRWLOCK srwlock; 101 | }; 102 | 103 | class NCNN_EXPORT ConditionVariable 104 | { 105 | public: 106 | ConditionVariable() { InitializeConditionVariable(&condvar); } 107 | ~ConditionVariable() {} 108 | void wait(Mutex& mutex) { SleepConditionVariableSRW(&condvar, &mutex.srwlock, INFINITE, 0); } 109 | void broadcast() { WakeAllConditionVariable(&condvar); } 110 | void signal() { WakeConditionVariable(&condvar); } 111 | private: 112 | CONDITION_VARIABLE condvar; 113 | }; 114 | 115 | static unsigned __stdcall start_wrapper(void* args); 116 | class NCNN_EXPORT Thread 117 | { 118 | public: 119 | Thread(void* (*start)(void*), void* args = 0) { _start = start; _args = args; handle = (HANDLE)_beginthreadex(0, 0, start_wrapper, this, 0, 0); } 120 | ~Thread() {} 121 | void join() { WaitForSingleObject(handle, INFINITE); CloseHandle(handle); } 122 | private: 123 | friend unsigned __stdcall start_wrapper(void* args) 124 | { 125 | Thread* t = (Thread*)args; 126 | t->_start(t->_args); 127 | return 0; 128 | } 129 | HANDLE handle; 130 | void* (*_start)(void*); 131 | void* _args; 132 | }; 133 | 134 | class NCNN_EXPORT ThreadLocalStorage 135 | { 136 | public: 137 | ThreadLocalStorage() { key = TlsAlloc(); } 138 | ~ThreadLocalStorage() { TlsFree(key); } 139 | void set(void* value) { TlsSetValue(key, (LPVOID)value); } 140 | void* get() { return (void*)TlsGetValue(key); } 141 | private: 142 | DWORD key; 143 | }; 144 | #else // defined _WIN32 145 | class NCNN_EXPORT Mutex 146 | { 147 | public: 148 | Mutex() { pthread_mutex_init(&mutex, 0); } 149 | ~Mutex() { pthread_mutex_destroy(&mutex); } 150 | void lock() { pthread_mutex_lock(&mutex); } 151 | void unlock() { pthread_mutex_unlock(&mutex); } 152 | private: 153 | friend class ConditionVariable; 154 | pthread_mutex_t mutex; 155 | }; 156 | 157 | class NCNN_EXPORT ConditionVariable 158 | { 159 | public: 160 | ConditionVariable() { pthread_cond_init(&cond, 0); } 161 | ~ConditionVariable() { pthread_cond_destroy(&cond); } 162 | void wait(Mutex& mutex) { pthread_cond_wait(&cond, &mutex.mutex); } 163 | void broadcast() { pthread_cond_broadcast(&cond); } 164 | void signal() { pthread_cond_signal(&cond); } 165 | private: 166 | pthread_cond_t cond; 167 | }; 168 | 169 | class NCNN_EXPORT Thread 170 | { 171 | public: 172 | Thread(void* (*start)(void*), void* args = 0) { pthread_create(&t, 0, start, args); } 173 | ~Thread() {} 174 | void join() { pthread_join(t, 0); } 175 | private: 176 | pthread_t t; 177 | }; 178 | 179 | class NCNN_EXPORT ThreadLocalStorage 180 | { 181 | public: 182 | ThreadLocalStorage() { pthread_key_create(&key, 0); } 183 | ~ThreadLocalStorage() { pthread_key_delete(key); } 184 | void set(void* value) { pthread_setspecific(key, value); } 185 | void* get() { return pthread_getspecific(key); } 186 | private: 187 | pthread_key_t key; 188 | }; 189 | #endif // defined _WIN32 190 | #else // NCNN_THREADS 191 | class NCNN_EXPORT Mutex 192 | { 193 | public: 194 | Mutex() {} 195 | ~Mutex() {} 196 | void lock() {} 197 | void unlock() {} 198 | }; 199 | 200 | class NCNN_EXPORT ConditionVariable 201 | { 202 | public: 203 | ConditionVariable() {} 204 | ~ConditionVariable() {} 205 | void wait(Mutex& /*mutex*/) {} 206 | void broadcast() {} 207 | void signal() {} 208 | }; 209 | 210 | class NCNN_EXPORT Thread 211 | { 212 | public: 213 | Thread(void* (*/*start*/)(void*), void* /*args*/ = 0) {} 214 | ~Thread() {} 215 | void join() {} 216 | }; 217 | 218 | class NCNN_EXPORT ThreadLocalStorage 219 | { 220 | public: 221 | ThreadLocalStorage() { data = 0; } 222 | ~ThreadLocalStorage() {} 223 | void set(void* value) { data = value; } 224 | void* get() { return data; } 225 | private: 226 | void* data; 227 | }; 228 | #endif // NCNN_THREADS 229 | 230 | class NCNN_EXPORT MutexLockGuard 231 | { 232 | public: 233 | MutexLockGuard(Mutex& _mutex) : mutex(_mutex) { mutex.lock(); } 234 | ~MutexLockGuard() { mutex.unlock(); } 235 | private: 236 | Mutex& mutex; 237 | }; 238 | 239 | static inline void swap_endianness_16(void* x) 240 | { 241 | unsigned char* xx = (unsigned char*)x; 242 | unsigned char x0 = xx[0]; 243 | unsigned char x1 = xx[1]; 244 | xx[0] = x1; 245 | xx[1] = x0; 246 | } 247 | 248 | static inline void swap_endianness_32(void* x) 249 | { 250 | unsigned char* xx = (unsigned char*)x; 251 | unsigned char x0 = xx[0]; 252 | unsigned char x1 = xx[1]; 253 | unsigned char x2 = xx[2]; 254 | unsigned char x3 = xx[3]; 255 | xx[0] = x3; 256 | xx[1] = x2; 257 | xx[2] = x1; 258 | xx[3] = x0; 259 | } 260 | 261 | } // namespace ncnn 262 | 263 | #if NCNN_SIMPLESTL 264 | #include "simplestl.h" 265 | #else 266 | #include 267 | #include 268 | #include 269 | #include 270 | #endif 271 | 272 | // simplemath 273 | #if NCNN_SIMPLEMATH 274 | #include "simplemath.h" 275 | #else 276 | #include 277 | #include 278 | #endif 279 | 280 | #if NCNN_VULKAN 281 | #if NCNN_SIMPLEVK 282 | #include "simplevk.h" 283 | #else 284 | #include 285 | #endif 286 | #include "vulkan_header_fix.h" 287 | #endif // NCNN_VULKAN 288 | 289 | #endif // __cplusplus 290 | 291 | #if NCNN_STDIO 292 | #if NCNN_PLATFORM_API && __ANDROID_API__ >= 8 293 | #include 294 | #define NCNN_LOGE(...) do { \ 295 | fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); \ 296 | __android_log_print(ANDROID_LOG_WARN, "ncnn", ##__VA_ARGS__); } while(0) 297 | #else // NCNN_PLATFORM_API && __ANDROID_API__ >= 8 298 | #include 299 | #define NCNN_LOGE(...) do { \ 300 | fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); } while(0) 301 | #endif // NCNN_PLATFORM_API && __ANDROID_API__ >= 8 302 | #else 303 | #define NCNN_LOGE(...) 304 | #endif 305 | 306 | 307 | #if NCNN_FORCE_INLINE 308 | #ifdef _MSC_VER 309 | #define NCNN_FORCEINLINE __forceinline 310 | #elif defined(__GNUC__) 311 | #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) 312 | #elif defined(__CLANG__) 313 | #if __has_attribute(__always_inline__) 314 | #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) 315 | #else 316 | #define NCNN_FORCEINLINE inline 317 | #endif 318 | #else 319 | #define NCNN_FORCEINLINE inline 320 | #endif 321 | #else 322 | #define NCNN_FORCEINLINE inline 323 | #endif 324 | 325 | #endif // NCNN_PLATFORM_H 326 | -------------------------------------------------------------------------------- /include/ncnn/simplemath.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_SIMPLEMATH_H 16 | #define NCNN_SIMPLEMATH_H 17 | 18 | #include "platform.h" 19 | 20 | #if NCNN_SIMPLEMATH 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | /* 26 | * ==================================================== 27 | * discrete functions 28 | * ==================================================== 29 | */ 30 | NCNN_EXPORT float fabs(float); 31 | NCNN_EXPORT float fabsf(float); 32 | NCNN_EXPORT float fmod(float, float); 33 | NCNN_EXPORT float floor(float); 34 | NCNN_EXPORT float floorf(float); 35 | NCNN_EXPORT float round(float); 36 | NCNN_EXPORT float roundf(float); 37 | NCNN_EXPORT float ceil(float); 38 | NCNN_EXPORT float ceilf(float); 39 | NCNN_EXPORT float fmaxf(float, float); 40 | NCNN_EXPORT float truncf(float); 41 | NCNN_EXPORT float frac(float); 42 | /* 43 | * ==================================================== 44 | * trigonometric functions 45 | * ==================================================== 46 | */ 47 | NCNN_EXPORT float sinf(float); 48 | NCNN_EXPORT float cosf(float); 49 | NCNN_EXPORT float tanf(float); 50 | NCNN_EXPORT float asinf(float); 51 | NCNN_EXPORT float acosf(float); 52 | NCNN_EXPORT float atanf(float); 53 | NCNN_EXPORT float atan2f(float, float); 54 | NCNN_EXPORT float tanhf(float); 55 | 56 | /* 57 | * ==================================================== 58 | * power functions 59 | * ==================================================== 60 | */ 61 | NCNN_EXPORT float sqrtf(float); 62 | NCNN_EXPORT float sqrt(float); 63 | NCNN_EXPORT float powf(float, float); 64 | 65 | /* 66 | * ==================================================== 67 | * exponential and logarithm functions 68 | * ==================================================== 69 | */ 70 | NCNN_EXPORT float expf(float); 71 | NCNN_EXPORT float frexp(float, int*); 72 | NCNN_EXPORT float logf(float); 73 | NCNN_EXPORT float log(float); 74 | NCNN_EXPORT float log10f(float); 75 | 76 | /* 77 | * ==================================================== 78 | * probability functions 79 | * ==================================================== 80 | */ 81 | NCNN_EXPORT float erf(float); 82 | NCNN_EXPORT float erff(float); 83 | NCNN_EXPORT float erfcf(float); 84 | 85 | /* 86 | * ==================================================== 87 | * other functions 88 | * ==================================================== 89 | */ 90 | NCNN_EXPORT int msb(unsigned int); 91 | NCNN_EXPORT float fmaf(float, float, float); 92 | NCNN_EXPORT float copysignf(float, float); 93 | NCNN_EXPORT void fesetround(int); 94 | NCNN_EXPORT int fegetround(); 95 | NCNN_EXPORT float nearbyintf(float); 96 | 97 | #ifdef __cplusplus 98 | } // extern "C" 99 | #endif 100 | 101 | #endif // NCNN_SIMPLEMATH 102 | 103 | #endif // NCNN_SIMPLEMATH_H 104 | -------------------------------------------------------------------------------- /include/ncnn/simpleocv.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_SIMPLEOCV_H 16 | #define NCNN_SIMPLEOCV_H 17 | 18 | #include "platform.h" 19 | 20 | #if NCNN_SIMPLEOCV 21 | 22 | #include 23 | #include 24 | #include "allocator.h" 25 | #include "mat.h" 26 | 27 | #if defined(_MSC_VER) || defined(__GNUC__) 28 | #pragma push_macro("min") 29 | #pragma push_macro("max") 30 | #undef min 31 | #undef max 32 | #endif 33 | 34 | #ifndef NCNN_XADD 35 | using ncnn::NCNN_XADD; 36 | #endif 37 | 38 | typedef unsigned char uchar; 39 | typedef unsigned short ushort; 40 | typedef unsigned int uint; 41 | 42 | enum 43 | { 44 | CV_LOAD_IMAGE_UNCHANGED = -1, 45 | CV_LOAD_IMAGE_GRAYSCALE = 0, 46 | CV_LOAD_IMAGE_COLOR = 1, 47 | }; 48 | 49 | enum 50 | { 51 | CV_IMWRITE_JPEG_QUALITY = 1 52 | }; 53 | 54 | // minimal opencv style data structure implementation 55 | namespace cv { 56 | 57 | template 58 | static inline _Tp saturate_cast(int v) 59 | { 60 | return _Tp(v); 61 | } 62 | template<> 63 | inline uchar saturate_cast(int v) 64 | { 65 | return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); 66 | } 67 | 68 | template 69 | struct Scalar_ 70 | { 71 | Scalar_() 72 | { 73 | v[0] = 0; 74 | v[1] = 0; 75 | v[2] = 0; 76 | v[3] = 0; 77 | } 78 | Scalar_(_Tp _v0) 79 | { 80 | v[0] = _v0; 81 | v[1] = 0; 82 | v[2] = 0; 83 | v[3] = 0; 84 | } 85 | Scalar_(_Tp _v0, _Tp _v1, _Tp _v2) 86 | { 87 | v[0] = _v0; 88 | v[1] = _v1; 89 | v[2] = _v2; 90 | v[3] = 0; 91 | } 92 | Scalar_(_Tp _v0, _Tp _v1, _Tp _v2, _Tp _v3) 93 | { 94 | v[0] = _v0; 95 | v[1] = _v1; 96 | v[2] = _v2; 97 | v[3] = _v3; 98 | } 99 | 100 | const _Tp operator[](const int i) const 101 | { 102 | return v[i]; 103 | } 104 | 105 | _Tp operator[](const int i) 106 | { 107 | return v[i]; 108 | } 109 | 110 | _Tp v[4]; 111 | }; 112 | 113 | typedef Scalar_ Scalar; 114 | 115 | template 116 | struct Point_ 117 | { 118 | Point_() 119 | : x(0), y(0) 120 | { 121 | } 122 | Point_(_Tp _x, _Tp _y) 123 | : x(_x), y(_y) 124 | { 125 | } 126 | 127 | template 128 | operator Point_<_Tp2>() const 129 | { 130 | return Point_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y)); 131 | } 132 | 133 | _Tp x; 134 | _Tp y; 135 | }; 136 | 137 | typedef Point_ Point; 138 | typedef Point_ Point2f; 139 | 140 | template 141 | struct Size_ 142 | { 143 | Size_() 144 | : width(0), height(0) 145 | { 146 | } 147 | Size_(_Tp _w, _Tp _h) 148 | : width(_w), height(_h) 149 | { 150 | } 151 | 152 | template 153 | operator Size_<_Tp2>() const 154 | { 155 | return Size_<_Tp2>(saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); 156 | } 157 | 158 | _Tp width; 159 | _Tp height; 160 | }; 161 | 162 | typedef Size_ Size; 163 | typedef Size_ Size2f; 164 | 165 | template 166 | struct Rect_ 167 | { 168 | Rect_() 169 | : x(0), y(0), width(0), height(0) 170 | { 171 | } 172 | Rect_(_Tp _x, _Tp _y, _Tp _w, _Tp _h) 173 | : x(_x), y(_y), width(_w), height(_h) 174 | { 175 | } 176 | Rect_(Point_<_Tp> _p, Size_<_Tp> _size) 177 | : x(_p.x), y(_p.y), width(_size.width), height(_size.height) 178 | { 179 | } 180 | 181 | template 182 | operator Rect_<_Tp2>() const 183 | { 184 | return Rect_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); 185 | } 186 | 187 | _Tp x; 188 | _Tp y; 189 | _Tp width; 190 | _Tp height; 191 | 192 | // area 193 | _Tp area() const 194 | { 195 | return width * height; 196 | } 197 | }; 198 | 199 | template 200 | static inline Rect_<_Tp>& operator&=(Rect_<_Tp>& a, const Rect_<_Tp>& b) 201 | { 202 | _Tp x1 = std::max(a.x, b.x), y1 = std::max(a.y, b.y); 203 | a.width = std::min(a.x + a.width, b.x + b.width) - x1; 204 | a.height = std::min(a.y + a.height, b.y + b.height) - y1; 205 | a.x = x1; 206 | a.y = y1; 207 | if (a.width <= 0 || a.height <= 0) 208 | a = Rect_<_Tp>(); 209 | return a; 210 | } 211 | 212 | template 213 | static inline Rect_<_Tp>& operator|=(Rect_<_Tp>& a, const Rect_<_Tp>& b) 214 | { 215 | _Tp x1 = std::min(a.x, b.x), y1 = std::min(a.y, b.y); 216 | a.width = std::max(a.x + a.width, b.x + b.width) - x1; 217 | a.height = std::max(a.y + a.height, b.y + b.height) - y1; 218 | a.x = x1; 219 | a.y = y1; 220 | return a; 221 | } 222 | 223 | template 224 | static inline Rect_<_Tp> operator&(const Rect_<_Tp>& a, const Rect_<_Tp>& b) 225 | { 226 | Rect_<_Tp> c = a; 227 | return c &= b; 228 | } 229 | 230 | template 231 | static inline Rect_<_Tp> operator|(const Rect_<_Tp>& a, const Rect_<_Tp>& b) 232 | { 233 | Rect_<_Tp> c = a; 234 | return c |= b; 235 | } 236 | 237 | typedef Rect_ Rect; 238 | typedef Rect_ Rect2f; 239 | 240 | #define CV_8UC1 1 241 | #define CV_8UC3 3 242 | #define CV_8UC4 4 243 | #define CV_32FC1 4 244 | 245 | struct NCNN_EXPORT Mat 246 | { 247 | Mat() 248 | : data(0), refcount(0), rows(0), cols(0), c(0) 249 | { 250 | } 251 | 252 | Mat(int _rows, int _cols, int flags) 253 | : data(0), refcount(0) 254 | { 255 | create(_rows, _cols, flags); 256 | } 257 | 258 | // copy 259 | Mat(const Mat& m) 260 | : data(m.data), refcount(m.refcount) 261 | { 262 | if (refcount) 263 | NCNN_XADD(refcount, 1); 264 | 265 | rows = m.rows; 266 | cols = m.cols; 267 | c = m.c; 268 | } 269 | 270 | Mat(int _rows, int _cols, int flags, void* _data) 271 | : data((unsigned char*)_data), refcount(0) 272 | { 273 | rows = _rows; 274 | cols = _cols; 275 | c = flags; 276 | } 277 | 278 | ~Mat() 279 | { 280 | release(); 281 | } 282 | 283 | // assign 284 | Mat& operator=(const Mat& m) 285 | { 286 | if (this == &m) 287 | return *this; 288 | 289 | if (m.refcount) 290 | NCNN_XADD(m.refcount, 1); 291 | 292 | release(); 293 | 294 | data = m.data; 295 | refcount = m.refcount; 296 | 297 | rows = m.rows; 298 | cols = m.cols; 299 | c = m.c; 300 | 301 | return *this; 302 | } 303 | 304 | Mat& operator=(const Scalar& s) 305 | { 306 | if (total() > 0) 307 | { 308 | uchar* p = data; 309 | for (int i = 0; i < cols * rows; i++) 310 | { 311 | for (int j = 0; j < c; j++) 312 | { 313 | *p++ = s[j]; 314 | } 315 | } 316 | } 317 | 318 | return *this; 319 | } 320 | 321 | void create(int _rows, int _cols, int flags) 322 | { 323 | release(); 324 | 325 | rows = _rows; 326 | cols = _cols; 327 | c = flags; 328 | 329 | if (total() > 0) 330 | { 331 | // refcount address must be aligned, so we expand totalsize here 332 | size_t totalsize = (total() + 3) >> 2 << 2; 333 | data = (uchar*)ncnn::fastMalloc(totalsize + (int)sizeof(*refcount)); 334 | refcount = (int*)(((uchar*)data) + totalsize); 335 | *refcount = 1; 336 | } 337 | } 338 | 339 | void release() 340 | { 341 | if (refcount && NCNN_XADD(refcount, -1) == 1) 342 | ncnn::fastFree(data); 343 | 344 | data = 0; 345 | 346 | rows = 0; 347 | cols = 0; 348 | c = 0; 349 | 350 | refcount = 0; 351 | } 352 | 353 | Mat clone() const 354 | { 355 | if (empty()) 356 | return Mat(); 357 | 358 | Mat m(rows, cols, c); 359 | 360 | if (total() > 0) 361 | { 362 | memcpy(m.data, data, total()); 363 | } 364 | 365 | return m; 366 | } 367 | 368 | bool empty() const 369 | { 370 | return data == 0 || total() == 0; 371 | } 372 | 373 | int channels() const 374 | { 375 | return c; 376 | } 377 | 378 | int type() const 379 | { 380 | return c; 381 | } 382 | 383 | size_t total() const 384 | { 385 | return cols * rows * c; 386 | } 387 | 388 | const uchar* ptr(int y) const 389 | { 390 | return data + y * cols * c; 391 | } 392 | 393 | uchar* ptr(int y) 394 | { 395 | return data + y * cols * c; 396 | } 397 | 398 | template 399 | const _Tp* ptr(int y) const 400 | { 401 | return (const _Tp*)(data + y * cols * c); 402 | } 403 | 404 | template 405 | _Tp* ptr(int y) 406 | { 407 | return (_Tp*)(data + y * cols * c); 408 | } 409 | 410 | // roi 411 | Mat operator()(const Rect& roi) const 412 | { 413 | if (empty()) 414 | return Mat(); 415 | 416 | Mat m(roi.height, roi.width, c); 417 | 418 | int sy = roi.y; 419 | for (int y = 0; y < roi.height; y++) 420 | { 421 | const uchar* sptr = ptr(sy) + roi.x * c; 422 | uchar* dptr = m.ptr(y); 423 | memcpy(dptr, sptr, roi.width * c); 424 | sy++; 425 | } 426 | 427 | return m; 428 | } 429 | 430 | uchar* data; 431 | 432 | // pointer to the reference counter; 433 | // when points to user-allocated data, the pointer is NULL 434 | int* refcount; 435 | 436 | int rows; 437 | int cols; 438 | 439 | int c; 440 | }; 441 | 442 | enum ImreadModes 443 | { 444 | IMREAD_UNCHANGED = -1, 445 | IMREAD_GRAYSCALE = 0, 446 | IMREAD_COLOR = 1 447 | }; 448 | 449 | NCNN_EXPORT Mat imread(const std::string& path, int flags = IMREAD_COLOR); 450 | 451 | NCNN_EXPORT Mat imdecode(const std::vector& buf, int flags = IMREAD_COLOR); 452 | 453 | enum ImwriteFlags 454 | { 455 | IMWRITE_JPEG_QUALITY = 1 456 | }; 457 | 458 | NCNN_EXPORT bool imwrite(const std::string& path, const Mat& m, const std::vector& params = std::vector()); 459 | 460 | NCNN_EXPORT void imshow(const std::string& name, const Mat& m); 461 | 462 | NCNN_EXPORT int waitKey(int delay = 0); 463 | 464 | #if NCNN_PIXEL 465 | NCNN_EXPORT void resize(const Mat& src, Mat& dst, const Size& size, float sw = 0.f, float sh = 0.f, int flags = 0); 466 | #endif // NCNN_PIXEL 467 | 468 | #if NCNN_PIXEL_DRAWING 469 | 470 | enum 471 | { 472 | FILLED = -1 473 | }; 474 | 475 | NCNN_EXPORT void rectangle(Mat& img, Point pt1, Point pt2, const Scalar& color, int thickness = 1); 476 | 477 | NCNN_EXPORT void rectangle(Mat& img, Rect rec, const Scalar& color, int thickness = 1); 478 | 479 | NCNN_EXPORT void circle(Mat& img, Point center, int radius, const Scalar& color, int thickness = 1); 480 | 481 | NCNN_EXPORT void line(Mat& img, Point p0, Point p1, const Scalar& color, int thickness = 1); 482 | 483 | enum 484 | { 485 | FONT_HERSHEY_SIMPLEX = 0 486 | }; 487 | 488 | NCNN_EXPORT void putText(Mat& img, const std::string& text, Point org, int fontFace, double fontScale, Scalar color, int thickness = 1); 489 | 490 | NCNN_EXPORT Size getTextSize(const std::string& text, int fontFace, double fontScale, int thickness, int* baseLine); 491 | 492 | #endif // NCNN_PIXEL_DRAWING 493 | 494 | } // namespace cv 495 | 496 | #if defined(_MSC_VER) || defined(__GNUC__) 497 | #pragma pop_macro("min") 498 | #pragma pop_macro("max") 499 | #endif 500 | 501 | #endif // NCNN_SIMPLEOCV 502 | 503 | #endif // NCNN_SIMPLEOCV_H 504 | -------------------------------------------------------------------------------- /include/ncnn/simpleomp.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_SIMPLEOMP_H 16 | #define NCNN_SIMPLEOMP_H 17 | 18 | #include "platform.h" 19 | 20 | #if NCNN_SIMPLEOMP 21 | 22 | #include 23 | 24 | // This minimal openmp runtime implementation only supports the llvm openmp abi 25 | // and only supports #pragma omp parallel for num_threads(X) 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | NCNN_EXPORT int omp_get_max_threads(); 32 | 33 | NCNN_EXPORT void omp_set_num_threads(int num_threads); 34 | 35 | NCNN_EXPORT int omp_get_dynamic(); 36 | 37 | NCNN_EXPORT void omp_set_dynamic(int dynamic); 38 | 39 | NCNN_EXPORT int omp_get_num_threads(); 40 | 41 | NCNN_EXPORT int omp_get_thread_num(); 42 | 43 | NCNN_EXPORT int kmp_get_blocktime(); 44 | 45 | NCNN_EXPORT void kmp_set_blocktime(int blocktime); 46 | 47 | #ifdef __cplusplus 48 | } 49 | #endif 50 | 51 | #endif // NCNN_SIMPLEOMP 52 | 53 | #endif // NCNN_SIMPLEOMP_H 54 | -------------------------------------------------------------------------------- /include/ncnn/simplestl.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NCNN_SIMPLESTL_H 16 | #define NCNN_SIMPLESTL_H 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #if !NCNN_SIMPLESTL 23 | 24 | #include 25 | 26 | #else 27 | 28 | // allocation functions 29 | NCNN_EXPORT void* operator new(size_t size); 30 | NCNN_EXPORT void* operator new[](size_t size); 31 | // placement allocation functions 32 | NCNN_EXPORT void* operator new(size_t size, void* ptr); 33 | NCNN_EXPORT void* operator new[](size_t size, void* ptr); 34 | // deallocation functions 35 | NCNN_EXPORT void operator delete(void* ptr); 36 | NCNN_EXPORT void operator delete[](void* ptr); 37 | // deallocation functions since c++14 38 | #if __cplusplus >= 201402L 39 | NCNN_EXPORT void operator delete(void* ptr, size_t sz); 40 | NCNN_EXPORT void operator delete[](void* ptr, size_t sz); 41 | #endif 42 | // placement deallocation functions 43 | NCNN_EXPORT void operator delete(void* ptr, void* voidptr2); 44 | NCNN_EXPORT void operator delete[](void* ptr, void* voidptr2); 45 | 46 | #endif 47 | 48 | // minimal stl data structure implementation 49 | namespace std { 50 | 51 | template 52 | const T& max(const T& a, const T& b) 53 | { 54 | return (a < b) ? b : a; 55 | } 56 | 57 | template 58 | const T& min(const T& a, const T& b) 59 | { 60 | return (a > b) ? b : a; 61 | } 62 | 63 | template 64 | void swap(T& a, T& b) 65 | { 66 | T temp(a); 67 | a = b; 68 | b = temp; 69 | } 70 | 71 | template 72 | struct pair 73 | { 74 | pair() 75 | : first(), second() 76 | { 77 | } 78 | pair(const T1& t1, const T2& t2) 79 | : first(t1), second(t2) 80 | { 81 | } 82 | 83 | T1 first; 84 | T2 second; 85 | }; 86 | 87 | template 88 | bool operator==(const pair& x, const pair& y) 89 | { 90 | return (x.first == y.first && x.second == y.second); 91 | } 92 | template 93 | bool operator<(const pair& x, const pair& y) 94 | { 95 | return x.first < y.first || (!(y.first < x.first) && x.second < y.second); 96 | } 97 | template 98 | bool operator!=(const pair& x, const pair& y) 99 | { 100 | return !(x == y); 101 | } 102 | template 103 | bool operator>(const pair& x, const pair& y) 104 | { 105 | return y < x; 106 | } 107 | template 108 | bool operator<=(const pair& x, const pair& y) 109 | { 110 | return !(y < x); 111 | } 112 | template 113 | bool operator>=(const pair& x, const pair& y) 114 | { 115 | return !(x < y); 116 | } 117 | 118 | template 119 | pair make_pair(const T1& t1, const T2& t2) 120 | { 121 | return pair(t1, t2); 122 | } 123 | 124 | template 125 | struct node 126 | { 127 | node* prev_; 128 | node* next_; 129 | T data_; 130 | 131 | node() 132 | : prev_(0), next_(0), data_() 133 | { 134 | } 135 | node(const T& t) 136 | : prev_(0), next_(0), data_(t) 137 | { 138 | } 139 | }; 140 | 141 | template 142 | struct iter_list 143 | { 144 | iter_list() 145 | : curr_(0) 146 | { 147 | } 148 | iter_list(node* n) 149 | : curr_(n) 150 | { 151 | } 152 | iter_list(const iter_list& i) 153 | : curr_(i.curr_) 154 | { 155 | } 156 | ~iter_list() 157 | { 158 | } 159 | 160 | iter_list& operator=(const iter_list& i) 161 | { 162 | curr_ = i.curr_; 163 | return *this; 164 | } 165 | 166 | T& operator*() 167 | { 168 | return curr_->data_; 169 | } 170 | T* operator->() 171 | { 172 | return &(curr_->data_); 173 | } 174 | 175 | bool operator==(const iter_list& i) 176 | { 177 | return curr_ == i.curr_; 178 | } 179 | bool operator!=(const iter_list& i) 180 | { 181 | return curr_ != i.curr_; 182 | } 183 | 184 | iter_list& operator++() 185 | { 186 | curr_ = curr_->next_; 187 | return *this; 188 | } 189 | iter_list& operator--() 190 | { 191 | curr_ = curr_->prev_; 192 | return *this; 193 | } 194 | 195 | node* curr_; 196 | }; 197 | 198 | template 199 | struct list 200 | { 201 | typedef iter_list iterator; 202 | 203 | list() 204 | { 205 | head_ = new node(); 206 | tail_ = head_; 207 | count_ = 0; 208 | } 209 | ~list() 210 | { 211 | clear(); 212 | delete head_; 213 | } 214 | list(const list& l) 215 | { 216 | head_ = new node(); 217 | tail_ = head_; 218 | count_ = 0; 219 | 220 | for (iter_list i = l.begin(); i != l.end(); ++i) 221 | { 222 | push_back(*i); 223 | } 224 | } 225 | 226 | list& operator=(const list& l) 227 | { 228 | if (this == &l) 229 | { 230 | return *this; 231 | } 232 | clear(); 233 | 234 | for (iter_list i = l.begin(); i != l.end(); ++i) 235 | { 236 | push_back(*i); 237 | } 238 | return *this; 239 | } 240 | 241 | void clear() 242 | { 243 | while (count_ > 0) 244 | { 245 | pop_front(); 246 | } 247 | } 248 | 249 | void pop_front() 250 | { 251 | if (count_ > 0) 252 | { 253 | head_ = head_->next_; 254 | delete head_->prev_; 255 | head_->prev_ = 0; 256 | --count_; 257 | } 258 | } 259 | 260 | size_t size() const 261 | { 262 | return count_; 263 | } 264 | iter_list begin() const 265 | { 266 | return iter_list(head_); 267 | } 268 | iter_list end() const 269 | { 270 | return iter_list(tail_); 271 | } 272 | bool empty() const 273 | { 274 | return count_ == 0; 275 | } 276 | 277 | void push_back(const T& t) 278 | { 279 | if (count_ == 0) 280 | { 281 | head_ = new node(t); 282 | head_->prev_ = 0; 283 | head_->next_ = tail_; 284 | tail_->prev_ = head_; 285 | count_ = 1; 286 | } 287 | else 288 | { 289 | node* temp = new node(t); 290 | temp->prev_ = tail_->prev_; 291 | temp->next_ = tail_; 292 | tail_->prev_->next_ = temp; 293 | tail_->prev_ = temp; 294 | ++count_; 295 | } 296 | } 297 | 298 | iter_list erase(iter_list pos) 299 | { 300 | if (pos != end()) 301 | { 302 | node* temp = pos.curr_; 303 | if (temp == head_) 304 | { 305 | ++pos; 306 | temp->next_->prev_ = 0; 307 | head_ = temp->next_; 308 | } 309 | else 310 | { 311 | --pos; 312 | temp->next_->prev_ = temp->prev_; 313 | temp->prev_->next_ = temp->next_; 314 | ++pos; 315 | } 316 | delete temp; 317 | --count_; 318 | } 319 | return pos; 320 | } 321 | 322 | protected: 323 | node* head_; 324 | node* tail_; 325 | size_t count_; 326 | }; 327 | 328 | template 329 | struct greater 330 | { 331 | bool operator()(const T& x, const T& y) const 332 | { 333 | return (x > y); 334 | } 335 | }; 336 | 337 | template 338 | struct less 339 | { 340 | bool operator()(const T& x, const T& y) const 341 | { 342 | return (x < y); 343 | } 344 | }; 345 | 346 | template 347 | void partial_sort(RandomAccessIter first, RandomAccessIter middle, RandomAccessIter last, Compare comp) 348 | { 349 | // [TODO] heap sort should be used here, but we simply use bubble sort now 350 | for (RandomAccessIter i = first; i < middle; ++i) 351 | { 352 | // bubble sort 353 | for (RandomAccessIter j = last - 1; j > first; --j) 354 | { 355 | if (comp(*j, *(j - 1))) 356 | { 357 | swap(*j, *(j - 1)); 358 | } 359 | } 360 | } 361 | } 362 | 363 | template 364 | struct vector 365 | { 366 | vector() 367 | : data_(0), size_(0), capacity_(0) 368 | { 369 | } 370 | vector(const size_t new_size, const T& value = T()) 371 | : data_(0), size_(0), capacity_(0) 372 | { 373 | resize(new_size, value); 374 | } 375 | ~vector() 376 | { 377 | clear(); 378 | } 379 | vector(const vector& v) 380 | : data_(0), size_(0), capacity_(0) 381 | { 382 | resize(v.size()); 383 | for (size_t i = 0; i < size_; i++) 384 | { 385 | data_[i] = v.data_[i]; 386 | } 387 | } 388 | 389 | vector& operator=(const vector& v) 390 | { 391 | if (this == &v) 392 | { 393 | return *this; 394 | } 395 | resize(0); 396 | resize(v.size()); 397 | for (size_t i = 0; i < size_; i++) 398 | { 399 | data_[i] = v.data_[i]; 400 | } 401 | return *this; 402 | } 403 | 404 | void resize(const size_t new_size, const T& value = T()) 405 | { 406 | try_alloc(new_size); 407 | if (new_size > size_) 408 | { 409 | for (size_t i = size_; i < new_size; i++) 410 | { 411 | new (&data_[i]) T(value); 412 | } 413 | } 414 | else if (new_size < size_) 415 | { 416 | for (size_t i = new_size; i < size_; i++) 417 | { 418 | data_[i].~T(); 419 | } 420 | } 421 | size_ = new_size; 422 | } 423 | 424 | void clear() 425 | { 426 | for (size_t i = 0; i < size_; i++) 427 | { 428 | data_[i].~T(); 429 | } 430 | delete[](char*) data_; 431 | data_ = 0; 432 | size_ = 0; 433 | capacity_ = 0; 434 | } 435 | 436 | T* data() const 437 | { 438 | return data_; 439 | } 440 | size_t size() const 441 | { 442 | return size_; 443 | } 444 | T& operator[](size_t i) const 445 | { 446 | return data_[i]; 447 | } 448 | T* begin() const 449 | { 450 | return &data_[0]; 451 | } 452 | T* end() const 453 | { 454 | return &data_[size_]; 455 | } 456 | bool empty() const 457 | { 458 | return size_ == 0; 459 | } 460 | 461 | void push_back(const T& t) 462 | { 463 | try_alloc(size_ + 1); 464 | new (&data_[size_]) T(t); 465 | size_++; 466 | } 467 | 468 | void insert(T* pos, T* b, T* e) 469 | { 470 | vector* v = 0; 471 | if (b >= begin() && b < end()) 472 | { 473 | //the same vector 474 | v = new vector(*this); 475 | b = v->begin() + (b - begin()); 476 | e = v->begin() + (e - begin()); 477 | } 478 | size_t diff = pos - begin(); 479 | try_alloc(size_ + (e - b)); 480 | pos = begin() + diff; 481 | memmove(pos + (e - b), pos, (end() - pos) * sizeof(T)); 482 | size_t len = e - b; 483 | size_ += len; 484 | for (size_t i = 0; i < len; i++) 485 | { 486 | *pos = *b; 487 | pos++; 488 | b++; 489 | } 490 | delete v; 491 | } 492 | 493 | T* erase(T* pos) 494 | { 495 | pos->~T(); 496 | memmove(pos, pos + 1, (end() - pos - 1) * sizeof(T)); 497 | size_--; 498 | return pos; 499 | } 500 | 501 | protected: 502 | T* data_; 503 | size_t size_; 504 | size_t capacity_; 505 | void try_alloc(size_t new_size) 506 | { 507 | if (new_size * 3 / 2 > capacity_ / 2) 508 | { 509 | capacity_ = new_size * 2; 510 | T* new_data = (T*)new char[capacity_ * sizeof(T)]; 511 | memset(static_cast(new_data), 0, capacity_ * sizeof(T)); 512 | if (data_) 513 | { 514 | memmove(new_data, data_, sizeof(T) * size_); 515 | delete[](char*) data_; 516 | } 517 | data_ = new_data; 518 | } 519 | } 520 | }; 521 | 522 | struct NCNN_EXPORT string : public vector 523 | { 524 | string() 525 | { 526 | } 527 | string(const char* str) 528 | { 529 | size_t len = strlen(str); 530 | resize(len); 531 | memcpy(data_, str, len); 532 | } 533 | const char* c_str() const 534 | { 535 | return (const char*)data_; 536 | } 537 | bool operator==(const string& str2) const 538 | { 539 | return strcmp(data_, str2.data_) == 0; 540 | } 541 | bool operator==(const char* str2) const 542 | { 543 | return strcmp(data_, str2) == 0; 544 | } 545 | bool operator!=(const char* str2) const 546 | { 547 | return strcmp(data_, str2) != 0; 548 | } 549 | string& operator+=(const string& str1) 550 | { 551 | insert(end(), str1.begin(), str1.end()); 552 | return *this; 553 | } 554 | }; 555 | 556 | inline string operator+(const string& str1, const string& str2) 557 | { 558 | string str(str1); 559 | str.insert(str.end(), str2.begin(), str2.end()); 560 | return str; 561 | } 562 | 563 | } // namespace std 564 | 565 | #endif // NCNN_SIMPLESTL_H 566 | -------------------------------------------------------------------------------- /lib/cmake/ncnn/ncnn-release.cmake: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------- 2 | # Generated CMake target import file for configuration "release". 3 | #---------------------------------------------------------------- 4 | 5 | # Commands may need to know the format version. 6 | set(CMAKE_IMPORT_FILE_VERSION 1) 7 | 8 | # Import target "ncnn" for configuration "release" 9 | set_property(TARGET ncnn APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) 10 | set_target_properties(ncnn PROPERTIES 11 | IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" 12 | IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libncnn.a" 13 | ) 14 | 15 | list(APPEND _IMPORT_CHECK_TARGETS ncnn ) 16 | list(APPEND _IMPORT_CHECK_FILES_FOR_ncnn "${_IMPORT_PREFIX}/lib/libncnn.a" ) 17 | 18 | # Commands beyond this point should not need to know the version. 19 | set(CMAKE_IMPORT_FILE_VERSION) 20 | -------------------------------------------------------------------------------- /lib/cmake/ncnn/ncnn.cmake: -------------------------------------------------------------------------------- 1 | # Generated by CMake 2 | 3 | if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.6) 4 | message(FATAL_ERROR "CMake >= 2.6.0 required") 5 | endif() 6 | cmake_policy(PUSH) 7 | cmake_policy(VERSION 2.6...3.20) 8 | #---------------------------------------------------------------- 9 | # Generated CMake target import file. 10 | #---------------------------------------------------------------- 11 | 12 | # Commands may need to know the format version. 13 | set(CMAKE_IMPORT_FILE_VERSION 1) 14 | 15 | # Protect against multiple inclusion, which would fail when already imported targets are added once more. 16 | set(_targetsDefined) 17 | set(_targetsNotDefined) 18 | set(_expectedTargets) 19 | foreach(_expectedTarget ncnn) 20 | list(APPEND _expectedTargets ${_expectedTarget}) 21 | if(NOT TARGET ${_expectedTarget}) 22 | list(APPEND _targetsNotDefined ${_expectedTarget}) 23 | endif() 24 | if(TARGET ${_expectedTarget}) 25 | list(APPEND _targetsDefined ${_expectedTarget}) 26 | endif() 27 | endforeach() 28 | if("${_targetsDefined}" STREQUAL "${_expectedTargets}") 29 | unset(_targetsDefined) 30 | unset(_targetsNotDefined) 31 | unset(_expectedTargets) 32 | set(CMAKE_IMPORT_FILE_VERSION) 33 | cmake_policy(POP) 34 | return() 35 | endif() 36 | if(NOT "${_targetsDefined}" STREQUAL "") 37 | message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_targetsDefined}\nTargets not yet defined: ${_targetsNotDefined}\n") 38 | endif() 39 | unset(_targetsDefined) 40 | unset(_targetsNotDefined) 41 | unset(_expectedTargets) 42 | 43 | 44 | # Compute the installation prefix relative to this file. 45 | get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) 46 | get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) 47 | get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) 48 | get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) 49 | if(_IMPORT_PREFIX STREQUAL "/") 50 | set(_IMPORT_PREFIX "") 51 | endif() 52 | 53 | # Create imported target ncnn 54 | add_library(ncnn STATIC IMPORTED) 55 | 56 | set_target_properties(ncnn PROPERTIES 57 | INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include/ncnn" 58 | INTERFACE_LINK_LIBRARIES "OpenMP::OpenMP_CXX;Threads::Threads" 59 | INTERFACE_POSITION_INDEPENDENT_CODE "ON" 60 | ) 61 | 62 | if(CMAKE_VERSION VERSION_LESS 2.8.12) 63 | message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") 64 | endif() 65 | 66 | # Load information for each installed configuration. 67 | get_filename_component(_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) 68 | file(GLOB CONFIG_FILES "${_DIR}/ncnn-*.cmake") 69 | foreach(f ${CONFIG_FILES}) 70 | include(${f}) 71 | endforeach() 72 | 73 | # Cleanup temporary variables. 74 | set(_IMPORT_PREFIX) 75 | 76 | # Loop over all imported files and verify that they actually exist 77 | foreach(target ${_IMPORT_CHECK_TARGETS} ) 78 | foreach(file ${_IMPORT_CHECK_FILES_FOR_${target}} ) 79 | if(NOT EXISTS "${file}" ) 80 | message(FATAL_ERROR "The imported target \"${target}\" references the file 81 | \"${file}\" 82 | but this file does not exist. Possible reasons include: 83 | * The file was deleted, renamed, or moved to another location. 84 | * An install or uninstall procedure did not complete successfully. 85 | * The installation package was faulty and contained 86 | \"${CMAKE_CURRENT_LIST_FILE}\" 87 | but not all the files it references. 88 | ") 89 | endif() 90 | endforeach() 91 | unset(_IMPORT_CHECK_FILES_FOR_${target}) 92 | endforeach() 93 | unset(_IMPORT_CHECK_TARGETS) 94 | 95 | # This file does not depend on other imported targets which have 96 | # been exported from the same project but in a separate export set. 97 | 98 | # Commands beyond this point should not need to know the version. 99 | set(CMAKE_IMPORT_FILE_VERSION) 100 | cmake_policy(POP) 101 | -------------------------------------------------------------------------------- /lib/cmake/ncnn/ncnnConfig.cmake: -------------------------------------------------------------------------------- 1 | set(NCNN_OPENMP ON) 2 | set(NCNN_THREADS ON) 3 | set(NCNN_VULKAN OFF) 4 | set(NCNN_SHARED_LIB OFF) 5 | set(NCNN_SYSTEM_GLSLANG OFF) 6 | set(NCNN_SIMPLEVK ON) 7 | 8 | if(NCNN_OPENMP) 9 | find_package(OpenMP) 10 | endif() 11 | 12 | if(NCNN_THREADS) 13 | set(CMAKE_THREAD_PREFER_PTHREAD TRUE) 14 | set(THREADS_PREFER_PTHREAD_FLAG TRUE) 15 | find_package(Threads REQUIRED) 16 | endif() 17 | 18 | if(NCNN_VULKAN) 19 | if(NOT NCNN_SIMPLEVK) 20 | find_package(Vulkan REQUIRED) 21 | endif() 22 | 23 | if(NOT NCNN_SHARED_LIB) 24 | if(NCNN_SYSTEM_GLSLANG) 25 | find_package(glslang QUIET) 26 | if(NOT glslang_FOUND) 27 | set(GLSLANG_TARGET_DIR "") 28 | include(${GLSLANG_TARGET_DIR}/OSDependentTargets.cmake) 29 | include(${GLSLANG_TARGET_DIR}/OGLCompilerTargets.cmake) 30 | if(EXISTS "${GLSLANG_TARGET_DIR}/HLSLTargets.cmake") 31 | # hlsl support can be optional 32 | include("${GLSLANG_TARGET_DIR}/HLSLTargets.cmake") 33 | endif() 34 | include(${GLSLANG_TARGET_DIR}/glslangTargets.cmake) 35 | include(${GLSLANG_TARGET_DIR}/SPIRVTargets.cmake) 36 | endif() 37 | else() 38 | set(glslang_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../lib/cmake/glslang") 39 | find_package(glslang QUIET) 40 | endif() 41 | endif() 42 | endif() 43 | 44 | include(${CMAKE_CURRENT_LIST_DIR}/ncnn.cmake) 45 | -------------------------------------------------------------------------------- /lib/libncnn.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhouweigogogo/yolo11-ncnn/af1b6036af4a8098a74b02acade01d99a70fb4aa/lib/libncnn.a -------------------------------------------------------------------------------- /lib/pkgconfig/ncnn.pc: -------------------------------------------------------------------------------- 1 | prefix=${pcfiledir}/../.. 2 | librarydir=${prefix}/lib 3 | includedir=${prefix}/include 4 | 5 | Name: ncnn 6 | Description: high-performance neural network inference framework optimized for the mobile platform 7 | Version: 1.0.20240926 8 | URL: https://github.com/Tencent/ncnn 9 | Libs: -L"${librarydir}" -lncnn 10 | Cflags: -I"${includedir}" 11 | 12 | -------------------------------------------------------------------------------- /models/yolo11n.ncnn.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhouweigogogo/yolo11-ncnn/af1b6036af4a8098a74b02acade01d99a70fb4aa/models/yolo11n.ncnn.bin -------------------------------------------------------------------------------- /models/yolo11n.ncnn.param: -------------------------------------------------------------------------------- 1 | 7767517 2 | 255 303 3 | Input in0 0 1 in0 4 | Convolution conv_4 1 1 in0 1 0=16 1=3 11=3 12=1 13=2 14=1 2=1 3=2 4=1 5=1 6=432 5 | Swish silu_87 1 1 1 2 6 | Convolution conv_5 1 1 2 3 0=32 1=3 11=3 12=1 13=2 14=1 2=1 3=2 4=1 5=1 6=4608 7 | Swish silu_88 1 1 3 4 8 | Convolution conv_6 1 1 4 5 0=32 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=1024 9 | Swish silu_89 1 1 5 6 10 | Slice split_0 1 2 6 7 8 -23300=2,16,16 1=0 11 | Split splitncnn_0 1 3 8 9 10 11 12 | Convolution conv_7 1 1 11 12 0=8 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=1152 13 | Swish silu_90 1 1 12 13 14 | Convolution conv_8 1 1 13 14 0=16 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=1152 15 | Swish silu_91 1 1 14 15 16 | BinaryOp add_0 2 1 10 15 16 0=0 17 | Concat cat_0 3 1 7 9 16 17 0=0 18 | Convolution conv_9 1 1 17 18 0=64 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=3072 19 | Swish silu_92 1 1 18 19 20 | Convolution conv_10 1 1 19 20 0=64 1=3 11=3 12=1 13=2 14=1 2=1 3=2 4=1 5=1 6=36864 21 | Swish silu_93 1 1 20 21 22 | Convolution conv_11 1 1 21 22 0=64 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=4096 23 | Swish silu_94 1 1 22 23 24 | Slice split_1 1 2 23 24 25 -23300=2,32,32 1=0 25 | Split splitncnn_1 1 3 25 26 27 28 26 | Convolution conv_12 1 1 28 29 0=16 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=4608 27 | Swish silu_95 1 1 29 30 28 | Convolution conv_13 1 1 30 31 0=32 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=4608 29 | Swish silu_96 1 1 31 32 30 | BinaryOp add_1 2 1 27 32 33 0=0 31 | Concat cat_1 3 1 24 26 33 34 0=0 32 | Convolution conv_14 1 1 34 35 0=128 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=12288 33 | Swish silu_97 1 1 35 36 34 | Split splitncnn_2 1 2 36 37 38 35 | Convolution conv_15 1 1 38 39 0=128 1=3 11=3 12=1 13=2 14=1 2=1 3=2 4=1 5=1 6=147456 36 | Swish silu_98 1 1 39 40 37 | Convolution conv_16 1 1 40 41 0=128 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=16384 38 | Swish silu_99 1 1 41 42 39 | Slice split_2 1 2 42 43 44 -23300=2,64,64 1=0 40 | Split splitncnn_3 1 3 44 45 46 47 41 | Convolution conv_17 1 1 47 48 0=32 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=2048 42 | Swish silu_100 1 1 48 49 43 | Split splitncnn_4 1 2 49 50 51 44 | Convolution conv_18 1 1 51 52 0=32 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=9216 45 | Swish silu_101 1 1 52 53 46 | Convolution conv_19 1 1 53 54 0=32 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=9216 47 | Swish silu_102 1 1 54 55 48 | BinaryOp add_2 2 1 50 55 56 0=0 49 | Split splitncnn_5 1 2 56 57 58 50 | Convolution conv_20 1 1 58 59 0=32 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=9216 51 | Swish silu_103 1 1 59 60 52 | Convolution conv_21 1 1 60 61 0=32 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=9216 53 | Swish silu_104 1 1 61 62 54 | BinaryOp add_3 2 1 57 62 63 0=0 55 | Convolution conv_22 1 1 46 64 0=32 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=2048 56 | Swish silu_105 1 1 64 65 57 | Concat cat_2 2 1 63 65 66 0=0 58 | Convolution conv_23 1 1 66 67 0=64 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=4096 59 | Swish silu_106 1 1 67 68 60 | Concat cat_3 3 1 43 45 68 69 0=0 61 | Convolution conv_24 1 1 69 70 0=128 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=24576 62 | Swish silu_107 1 1 70 71 63 | Split splitncnn_6 1 2 71 72 73 64 | Convolution conv_25 1 1 73 74 0=256 1=3 11=3 12=1 13=2 14=1 2=1 3=2 4=1 5=1 6=294912 65 | Swish silu_108 1 1 74 75 66 | Convolution conv_26 1 1 75 76 0=256 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=65536 67 | Swish silu_109 1 1 76 77 68 | Slice split_3 1 2 77 78 79 -23300=2,128,128 1=0 69 | Split splitncnn_7 1 3 79 80 81 82 70 | Convolution conv_27 1 1 82 83 0=64 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=8192 71 | Swish silu_110 1 1 83 84 72 | Split splitncnn_8 1 2 84 85 86 73 | Convolution conv_28 1 1 86 87 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=36864 74 | Swish silu_111 1 1 87 88 75 | Convolution conv_29 1 1 88 89 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=36864 76 | Swish silu_112 1 1 89 90 77 | BinaryOp add_4 2 1 85 90 91 0=0 78 | Split splitncnn_9 1 2 91 92 93 79 | Convolution conv_30 1 1 93 94 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=36864 80 | Swish silu_113 1 1 94 95 81 | Convolution conv_31 1 1 95 96 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=36864 82 | Swish silu_114 1 1 96 97 83 | BinaryOp add_5 2 1 92 97 98 0=0 84 | Convolution conv_32 1 1 81 99 0=64 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=8192 85 | Swish silu_115 1 1 99 100 86 | Concat cat_4 2 1 98 100 101 0=0 87 | Convolution conv_33 1 1 101 102 0=128 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=16384 88 | Swish silu_116 1 1 102 103 89 | Concat cat_5 3 1 78 80 103 104 0=0 90 | Convolution conv_34 1 1 104 105 0=256 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=98304 91 | Swish silu_117 1 1 105 106 92 | Convolution conv_35 1 1 106 107 0=128 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=32768 93 | Swish silu_118 1 1 107 108 94 | Split splitncnn_10 1 2 108 109 110 95 | Pooling maxpool2d_84 1 1 110 111 0=0 1=5 11=5 12=1 13=2 2=1 3=2 5=1 96 | Split splitncnn_11 1 2 111 112 113 97 | Pooling maxpool2d_85 1 1 113 114 0=0 1=5 11=5 12=1 13=2 2=1 3=2 5=1 98 | Split splitncnn_12 1 2 114 115 116 99 | Pooling maxpool2d_86 1 1 116 117 0=0 1=5 11=5 12=1 13=2 2=1 3=2 5=1 100 | Concat cat_6 4 1 109 112 115 117 118 0=0 101 | Convolution conv_36 1 1 118 119 0=256 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=131072 102 | Swish silu_119 1 1 119 120 103 | Convolution conv_37 1 1 120 121 0=256 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=65536 104 | Swish silu_120 1 1 121 122 105 | Slice split_4 1 2 122 123 124 -23300=2,128,128 1=0 106 | Split splitncnn_13 1 2 124 125 126 107 | Convolution conv_38 1 1 126 127 0=256 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=32768 108 | Reshape view_167 1 1 127 128 0=400 1=128 2=2 109 | Slice split_5 1 3 128 129 130 131 -23300=3,32,32,64 1=1 110 | Split splitncnn_14 1 2 131 132 133 111 | Permute transpose_177 1 1 129 134 0=1 112 | MatMul matmul_169 2 1 134 130 135 113 | BinaryOp mul_6 1 1 135 136 0=2 1=1 2=1.767767e-01 114 | Reshape reshape_166 1 1 132 137 0=20 1=20 2=128 115 | Softmax softmax_3 1 1 136 138 0=2 1=1 116 | MatMul matmultransb_0 2 1 133 138 139 0=1 117 | Reshape view_168 1 1 139 140 0=20 1=20 2=128 118 | ConvolutionDepthWise convdw_179 1 1 137 141 0=128 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=1152 7=128 119 | BinaryOp add_7 2 1 140 141 142 0=0 120 | Convolution conv_39 1 1 142 143 0=128 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=16384 121 | BinaryOp add_8 2 1 125 143 144 0=0 122 | Split splitncnn_15 1 2 144 145 146 123 | Convolution conv_40 1 1 146 147 0=256 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=32768 124 | Swish silu_121 1 1 147 148 125 | Convolution conv_41 1 1 148 149 0=128 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=32768 126 | BinaryOp add_9 2 1 145 149 150 0=0 127 | Concat cat_7 2 1 123 150 151 0=0 128 | Convolution conv_42 1 1 151 152 0=256 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=65536 129 | Swish silu_122 1 1 152 153 130 | Split splitncnn_16 1 2 153 154 155 131 | Interp upsample_164 1 1 155 156 0=1 1=2.000000e+00 2=2.000000e+00 6=0 132 | Concat cat_8 2 1 156 72 157 0=0 133 | Convolution conv_43 1 1 157 158 0=128 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=49152 134 | Swish silu_123 1 1 158 159 135 | Slice split_6 1 2 159 160 161 -23300=2,64,64 1=0 136 | Split splitncnn_17 1 3 161 162 163 164 137 | Convolution conv_44 1 1 164 165 0=32 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=18432 138 | Swish silu_124 1 1 165 166 139 | Convolution conv_45 1 1 166 167 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=18432 140 | Swish silu_125 1 1 167 168 141 | BinaryOp add_10 2 1 163 168 169 0=0 142 | Concat cat_9 3 1 160 162 169 170 0=0 143 | Convolution conv_46 1 1 170 171 0=128 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=24576 144 | Swish silu_126 1 1 171 172 145 | Split splitncnn_18 1 2 172 173 174 146 | Interp upsample_165 1 1 174 175 0=1 1=2.000000e+00 2=2.000000e+00 6=0 147 | Concat cat_10 2 1 175 37 176 0=0 148 | Convolution conv_47 1 1 176 177 0=64 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=16384 149 | Swish silu_127 1 1 177 178 150 | Slice split_7 1 2 178 179 180 -23300=2,32,32 1=0 151 | Split splitncnn_19 1 3 180 181 182 183 152 | Convolution conv_48 1 1 183 184 0=16 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=4608 153 | Swish silu_128 1 1 184 185 154 | Convolution conv_49 1 1 185 186 0=32 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=4608 155 | Swish silu_129 1 1 186 187 156 | BinaryOp add_11 2 1 182 187 188 0=0 157 | Concat cat_11 3 1 179 181 188 189 0=0 158 | Convolution conv_50 1 1 189 190 0=64 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=6144 159 | Swish silu_130 1 1 190 191 160 | Split splitncnn_20 1 3 191 192 193 194 161 | Convolution conv_51 1 1 193 195 0=64 1=3 11=3 12=1 13=2 14=1 2=1 3=2 4=1 5=1 6=36864 162 | Swish silu_131 1 1 195 196 163 | Concat cat_12 2 1 196 173 197 0=0 164 | Convolution conv_52 1 1 197 198 0=128 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=24576 165 | Swish silu_132 1 1 198 199 166 | Slice split_8 1 2 199 200 201 -23300=2,64,64 1=0 167 | Split splitncnn_21 1 3 201 202 203 204 168 | Convolution conv_53 1 1 204 205 0=32 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=18432 169 | Swish silu_133 1 1 205 206 170 | Convolution conv_54 1 1 206 207 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=18432 171 | Swish silu_134 1 1 207 208 172 | BinaryOp add_12 2 1 203 208 209 0=0 173 | Concat cat_13 3 1 200 202 209 210 0=0 174 | Convolution conv_55 1 1 210 211 0=128 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=24576 175 | Swish silu_135 1 1 211 212 176 | Split splitncnn_22 1 3 212 213 214 215 177 | Convolution conv_56 1 1 214 216 0=128 1=3 11=3 12=1 13=2 14=1 2=1 3=2 4=1 5=1 6=147456 178 | Swish silu_136 1 1 216 217 179 | Concat cat_14 2 1 217 154 218 0=0 180 | Convolution conv_57 1 1 218 219 0=256 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=98304 181 | Swish silu_137 1 1 219 220 182 | Slice split_9 1 2 220 221 222 -23300=2,128,128 1=0 183 | Split splitncnn_23 1 3 222 223 224 225 184 | Convolution conv_58 1 1 225 226 0=64 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=8192 185 | Swish silu_138 1 1 226 227 186 | Split splitncnn_24 1 2 227 228 229 187 | Convolution conv_59 1 1 229 230 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=36864 188 | Swish silu_139 1 1 230 231 189 | Convolution conv_60 1 1 231 232 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=36864 190 | Swish silu_140 1 1 232 233 191 | BinaryOp add_13 2 1 228 233 234 0=0 192 | Split splitncnn_25 1 2 234 235 236 193 | Convolution conv_61 1 1 236 237 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=36864 194 | Swish silu_141 1 1 237 238 195 | Convolution conv_62 1 1 238 239 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=36864 196 | Swish silu_142 1 1 239 240 197 | BinaryOp add_14 2 1 235 240 241 0=0 198 | Convolution conv_63 1 1 224 242 0=64 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=8192 199 | Swish silu_143 1 1 242 243 200 | Concat cat_15 2 1 241 243 244 0=0 201 | Convolution conv_64 1 1 244 245 0=128 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=16384 202 | Swish silu_144 1 1 245 246 203 | Concat cat_16 3 1 221 223 246 247 0=0 204 | Convolution conv_65 1 1 247 248 0=256 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=98304 205 | Convolution conv_66 1 1 192 249 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=36864 206 | Swish silu_146 1 1 249 250 207 | Convolution conv_67 1 1 250 251 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=36864 208 | ConvolutionDepthWise convdw_180 1 1 194 252 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=576 7=64 209 | Swish silu_148 1 1 252 253 210 | Convolution conv_69 1 1 253 254 0=80 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=5120 211 | Swish silu_149 1 1 254 255 212 | ConvolutionDepthWise convdw_181 1 1 255 256 0=80 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=720 7=80 213 | Swish silu_150 1 1 256 257 214 | Convolution conv_70 1 1 257 258 0=80 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=6400 215 | Swish silu_151 1 1 258 259 216 | Swish silu_147 1 1 251 260 217 | Convolution conv_68 1 1 260 261 0=64 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=4096 218 | Convolution conv_72 1 1 213 262 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=73728 219 | Swish silu_152 1 1 262 263 220 | Convolution conv_73 1 1 263 264 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=36864 221 | ConvolutionDepthWise convdw_182 1 1 215 265 0=128 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=1152 7=128 222 | Swish silu_154 1 1 265 266 223 | Convolution conv_75 1 1 266 267 0=80 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=10240 224 | Swish silu_155 1 1 267 268 225 | ConvolutionDepthWise convdw_183 1 1 268 269 0=80 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=720 7=80 226 | Swish silu_156 1 1 269 270 227 | Convolution conv_76 1 1 270 271 0=80 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=6400 228 | Swish silu_157 1 1 271 272 229 | Swish silu_153 1 1 264 273 230 | Convolution conv_74 1 1 273 274 0=64 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=4096 231 | Swish silu_145 1 1 248 275 232 | Split splitncnn_26 1 2 275 276 277 233 | Convolution conv_78 1 1 276 278 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=147456 234 | Swish silu_158 1 1 278 279 235 | Convolution conv_79 1 1 279 280 0=64 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=36864 236 | ConvolutionDepthWise convdw_184 1 1 277 281 0=256 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2304 7=256 237 | Swish silu_160 1 1 281 282 238 | Convolution conv_81 1 1 282 283 0=80 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=20480 239 | Swish silu_161 1 1 283 284 240 | ConvolutionDepthWise convdw_185 1 1 284 285 0=80 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=720 7=80 241 | Swish silu_162 1 1 285 286 242 | Convolution conv_82 1 1 286 287 0=80 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=6400 243 | Swish silu_163 1 1 287 288 244 | Swish silu_159 1 1 280 289 245 | Convolution conv_80 1 1 289 290 0=64 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=4096 246 | Permute permute_176 1 1 290 291 0=3 247 | Convolution convsigmoid_2 1 1 288 292 0=80 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=6400 9=4 248 | Permute permute_175 1 1 292 293 0=3 249 | Concat cat_17 2 1 291 293 out2 0=2 250 | Permute permute_174 1 1 274 295 0=3 251 | Convolution convsigmoid_1 1 1 272 296 0=80 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=6400 9=4 252 | Permute permute_173 1 1 296 297 0=3 253 | Concat cat_18 2 1 295 297 out1 0=2 254 | Permute permute_172 1 1 261 299 0=3 255 | Convolution convsigmoid_0 1 1 259 300 0=80 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=6400 9=4 256 | Permute permute_171 1 1 300 301 0=3 257 | Concat cat_19 2 1 299 301 out0 0=2 258 | -------------------------------------------------------------------------------- /src/yolov11.cpp: -------------------------------------------------------------------------------- 1 | #include "layer.h" 2 | #include "net.h" 3 | 4 | #include "opencv2/opencv.hpp" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define MAX_STRIDE 32 12 | 13 | struct Object 14 | { 15 | cv::Rect_ rect; 16 | int label; 17 | float prob; 18 | }; 19 | 20 | static float softmax( 21 | const float* src, 22 | float* dst, 23 | int length 24 | ) 25 | { 26 | float alpha = -FLT_MAX; 27 | for (int c = 0; c < length; c++) 28 | { 29 | float score = src[c]; 30 | if (score > alpha) 31 | { 32 | alpha = score; 33 | } 34 | } 35 | 36 | float denominator = 0; 37 | float dis_sum = 0; 38 | for (int i = 0; i < length; ++i) 39 | { 40 | dst[i] = expf(src[i] - alpha); 41 | denominator += dst[i]; 42 | } 43 | for (int i = 0; i < length; ++i) 44 | { 45 | dst[i] /= denominator; 46 | dis_sum += i * dst[i]; 47 | } 48 | return dis_sum; 49 | } 50 | static float clamp( 51 | float val, 52 | float min = 0.f, 53 | float max = 1280.f 54 | ) 55 | { 56 | return val > min ? (val < max ? val : max) : min; 57 | } 58 | static void non_max_suppression( 59 | std::vector& proposals, 60 | std::vector& results, 61 | int orin_h, 62 | int orin_w, 63 | float dh = 0, 64 | float dw = 0, 65 | float ratio_h = 1.0f, 66 | float ratio_w = 1.0f, 67 | float conf_thres = 0.25f, 68 | float iou_thres = 0.65f 69 | ) 70 | { 71 | results.clear(); 72 | std::vector bboxes; 73 | std::vector scores; 74 | std::vector labels; 75 | std::vector indices; 76 | 77 | for (auto& pro : proposals) 78 | { 79 | bboxes.push_back(pro.rect); 80 | scores.push_back(pro.prob); 81 | labels.push_back(pro.label); 82 | } 83 | 84 | cv::dnn::NMSBoxes( 85 | bboxes, 86 | scores, 87 | conf_thres, 88 | iou_thres, 89 | indices 90 | ); 91 | 92 | for (auto i : indices) 93 | { 94 | auto& bbox = bboxes[i]; 95 | float x0 = bbox.x; 96 | float y0 = bbox.y; 97 | float x1 = bbox.x + bbox.width; 98 | float y1 = bbox.y + bbox.height; 99 | float& score = scores[i]; 100 | int& label = labels[i]; 101 | 102 | x0 = (x0 - dw) / ratio_w; 103 | y0 = (y0 - dh) / ratio_h; 104 | x1 = (x1 - dw) / ratio_w; 105 | y1 = (y1 - dh) / ratio_h; 106 | 107 | x0 = clamp(x0, 0.f, orin_w); 108 | y0 = clamp(y0, 0.f, orin_h); 109 | x1 = clamp(x1, 0.f, orin_w); 110 | y1 = clamp(y1, 0.f, orin_h); 111 | 112 | Object obj; 113 | obj.rect.x = x0; 114 | obj.rect.y = y0; 115 | obj.rect.width = x1 - x0; 116 | obj.rect.height = y1 - y0; 117 | obj.prob = score; 118 | obj.label = label; 119 | results.push_back(obj); 120 | } 121 | } 122 | 123 | static void generate_proposals( 124 | int stride, 125 | const ncnn::Mat& feat_blob, 126 | const float prob_threshold, 127 | std::vector& objects 128 | ) 129 | { 130 | const int reg_max = 16; 131 | float dst[16]; 132 | const int num_w = feat_blob.w; 133 | const int num_grid_y = feat_blob.c; 134 | const int num_grid_x = feat_blob.h; 135 | 136 | const int num_class = num_w - 4 * reg_max; 137 | 138 | for (int i = 0; i < num_grid_y; i++) 139 | { 140 | for (int j = 0; j < num_grid_x; j++) 141 | { 142 | 143 | const float* matat = feat_blob.channel(i).row(j); 144 | 145 | int class_index = 0; 146 | float class_score = -FLT_MAX; 147 | for (int c = 0; c < num_class; c++) 148 | { 149 | float score = matat[4 * reg_max + c]; 150 | if (score > class_score) 151 | { 152 | class_index = c; 153 | class_score = score; 154 | } 155 | } 156 | 157 | if (class_score >= prob_threshold) 158 | { 159 | 160 | float x0 = j + 0.5f - softmax(matat, dst, 16); 161 | float y0 = i + 0.5f - softmax(matat + 16, dst, 16); 162 | float x1 = j + 0.5f + softmax(matat + 2 * 16, dst, 16); 163 | float y1 = i + 0.5f + softmax(matat + 3 * 16, dst, 16); 164 | 165 | x0 *= stride; 166 | y0 *= stride; 167 | x1 *= stride; 168 | y1 *= stride; 169 | 170 | Object obj; 171 | obj.rect.x = x0; 172 | obj.rect.y = y0; 173 | obj.rect.width = x1 - x0; 174 | obj.rect.height = y1 - y0; 175 | obj.label = class_index; 176 | obj.prob = class_score; 177 | objects.push_back(obj); 178 | 179 | } 180 | } 181 | } 182 | } 183 | 184 | 185 | 186 | static int detect_yolov11(const cv::Mat& bgr, std::vector& objects) 187 | { 188 | ncnn::Net yolov11; 189 | 190 | // yolov11.opt.use_vulkan_compute = true; 191 | // yolov10.opt.use_bf16_storage = true; 192 | 193 | // original pretrained model from https://github.com/ultralytics/ultralytics 194 | // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models 195 | if (yolov11.load_param("../models/yolov11n.ncnn.param")) 196 | exit(-1); 197 | if (yolov11.load_model("../models/yolov11n.ncnn.bin")) 198 | exit(-1); 199 | 200 | const int target_size = 640; 201 | const float prob_threshold = 0.25f; 202 | const float nms_threshold = 0.45f; 203 | 204 | int img_w = bgr.cols; 205 | int img_h = bgr.rows; 206 | 207 | // letterbox pad to multiple of MAX_STRIDE 208 | int w = img_w; 209 | int h = img_h; 210 | float scale = 1.f; 211 | if (w > h) 212 | { 213 | scale = (float)target_size / w; 214 | w = target_size; 215 | h = h * scale; 216 | } 217 | else 218 | { 219 | scale = (float)target_size / h; 220 | h = target_size; 221 | w = w * scale; 222 | } 223 | 224 | ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h); 225 | 226 | // pad to target_size rectangle 227 | // ultralytics/yolo/data/dataloaders/v5augmentations.py letterbox 228 | // int wpad = (w + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - w; 229 | // int hpad = (h + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - h; 230 | 231 | int wpad = target_size - w; 232 | int hpad = target_size - h; 233 | 234 | int top = hpad / 2; 235 | int bottom = hpad - hpad / 2; 236 | int left = wpad / 2; 237 | int right = wpad - wpad / 2; 238 | 239 | ncnn::Mat in_pad; 240 | ncnn::copy_make_border(in, 241 | in_pad, 242 | top, 243 | bottom, 244 | left, 245 | right, 246 | ncnn::BORDER_CONSTANT, 247 | 114.f); 248 | 249 | const float norm_vals[3] = { 1 / 255.f, 1 / 255.f, 1 / 255.f }; 250 | in_pad.substract_mean_normalize(0, norm_vals); 251 | 252 | ncnn::Extractor ex = yolov11.create_extractor(); 253 | 254 | ex.input("in0", in_pad); 255 | 256 | std::vector proposals; 257 | 258 | 259 | // stride 8 260 | { 261 | ncnn::Mat out; 262 | ex.extract("out0", out); 263 | 264 | std::vector objects8; 265 | generate_proposals(8, out, prob_threshold, objects8); 266 | 267 | proposals.insert(proposals.end(), objects8.begin(), objects8.end()); 268 | } 269 | 270 | // stride 16 271 | { 272 | ncnn::Mat out; 273 | 274 | ex.extract("out1", out); 275 | 276 | std::vector objects16; 277 | generate_proposals(16, out, prob_threshold, objects16); 278 | 279 | proposals.insert(proposals.end(), objects16.begin(), objects16.end()); 280 | } 281 | 282 | // stride 32 283 | { 284 | ncnn::Mat out; 285 | 286 | ex.extract("out2", out); 287 | 288 | std::vector objects32; 289 | generate_proposals(32, out, prob_threshold, objects32); 290 | 291 | proposals.insert(proposals.end(), objects32.begin(), objects32.end()); 292 | } 293 | // objects = proposals; 294 | for (auto& pro : proposals) 295 | { 296 | float x0 = pro.rect.x; 297 | float y0 = pro.rect.y; 298 | float x1 = pro.rect.x + pro.rect.width; 299 | float y1 = pro.rect.y + pro.rect.height; 300 | float& score = pro.prob; 301 | int& label = pro.label; 302 | 303 | x0 = (x0 - (wpad / 2)) / scale; 304 | y0 = (y0 - (hpad / 2)) / scale; 305 | x1 = (x1 - (wpad / 2)) / scale; 306 | y1 = (y1 - (hpad / 2)) / scale; 307 | 308 | x0 = clamp(x0, 0.f, img_w); 309 | y0 = clamp(y0, 0.f, img_h); 310 | x1 = clamp(x1, 0.f, img_w); 311 | y1 = clamp(y1, 0.f, img_h); 312 | 313 | Object obj; 314 | obj.rect.x = x0; 315 | obj.rect.y = y0; 316 | obj.rect.width = x1 - x0; 317 | obj.rect.height = y1 - y0; 318 | obj.prob = score; 319 | obj.label = label; 320 | objects.push_back(obj); 321 | } 322 | non_max_suppression(proposals, objects, 323 | img_h, img_w, hpad / 2, wpad / 2, 324 | scale, scale, prob_threshold, nms_threshold); 325 | return 0; 326 | } 327 | 328 | static void draw_objects(const cv::Mat& bgr, const std::vector& objects) 329 | { 330 | static const char* class_names[] = { 331 | "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", 332 | "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", 333 | "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", 334 | "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", 335 | "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", 336 | "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", 337 | "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", 338 | "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", 339 | "hair drier", "toothbrush" 340 | }; 341 | 342 | cv::Mat image = bgr.clone(); 343 | 344 | for (size_t i = 0; i < objects.size(); i++) 345 | { 346 | const Object& obj = objects[i]; 347 | 348 | fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, 349 | obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); 350 | 351 | cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); 352 | 353 | char text[256]; 354 | sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); 355 | 356 | int baseLine = 0; 357 | cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); 358 | 359 | int x = obj.rect.x; 360 | int y = obj.rect.y - label_size.height - baseLine; 361 | if (y < 0) 362 | y = 0; 363 | if (x + label_size.width > image.cols) 364 | x = image.cols - label_size.width; 365 | 366 | cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), 367 | cv::Scalar(255, 255, 255), -1); 368 | 369 | cv::putText(image, text, cv::Point(x, y + label_size.height), 370 | cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); 371 | } 372 | cv::imwrite("output.jpg", image); 373 | // cv::imshow("image", image); 374 | // cv::waitKey(0); 375 | } 376 | 377 | int main(int argc, char** argv) 378 | { 379 | if (argc != 2) 380 | { 381 | fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); 382 | return -1; 383 | } 384 | 385 | const char* imagepath = argv[1]; 386 | 387 | cv::Mat m = cv::imread(imagepath, 1); 388 | // cv::resize(m, m, cv::Size(640,640)); 389 | if (m.empty()) 390 | { 391 | fprintf(stderr, "cv::imread %s failed\n", imagepath); 392 | return -1; 393 | } 394 | 395 | std::vector objects; 396 | detect_yolov11(m, objects); 397 | 398 | draw_objects(m, objects); 399 | 400 | return 0; 401 | } 402 | --------------------------------------------------------------------------------