├── assets ├── coco.torchscript.ncnn.bin └── coco.torchscript.ncnn.param ├── .gitignore ├── CMakeLists.txt ├── .github └── workflows │ └── release.yml ├── nanodet.h ├── README.md ├── wasmFeatureDetect.js ├── nanodetncnn.cpp ├── index.html └── nanodet.cpp /assets/coco.torchscript.ncnn.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nihui/ncnn-webassembly-nanodet/HEAD/assets/coco.torchscript.ncnn.bin -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE 2 | .vscode 3 | 4 | # CMake build directory 5 | build/ 6 | 7 | # files 8 | deploy/ 9 | model/ 10 | ncnn-20220216-webassembly/ 11 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(ncnn-webassembly-nanodet) 2 | 3 | cmake_minimum_required(VERSION 3.10) 4 | 5 | set(CMAKE_BUILD_TYPE release) 6 | 7 | if(NOT WASM_FEATURE) 8 | message(FATAL_ERROR "You must pass cmake option -DWASM_FEATURE and possible values are basic, simd, threads and simd-threads") 9 | endif() 10 | 11 | set(ncnn_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ncnn-20230223-webassembly/${WASM_FEATURE}/lib/cmake/ncnn") 12 | find_package(ncnn REQUIRED) 13 | 14 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=256MB -s EXIT_RUNTIME=1") 15 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=256MB -s EXIT_RUNTIME=1") 16 | set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=256MB -s EXIT_RUNTIME=1") 17 | 18 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sEXPORTED_FUNCTIONS=['_nanodet_ncnn','_malloc','_free'] --preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@.") 19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -sEXPORTED_FUNCTIONS=['_nanodet_ncnn','_malloc','_free'] --preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@.") 20 | set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} -sEXPORTED_FUNCTIONS=['_nanodet_ncnn','_malloc','_free'] --preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@.") 21 | 22 | if(${WASM_FEATURE} MATCHES "threads") 23 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp -pthread -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=4") 24 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -pthread -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=4") 25 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fopenmp -pthread -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=4") 26 | endif() 27 | 28 | add_executable(nanodet-${WASM_FEATURE} nanodet.cpp nanodetncnn.cpp) 29 | target_link_libraries(nanodet-${WASM_FEATURE} ncnn) 30 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: build-and-deploy 2 | on: 3 | push: 4 | branches: 5 | - master 6 | 7 | env: 8 | EMSCRIPTEN_VERSION: 3.1.28 9 | 10 | jobs: 11 | build-and-deploy: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: emsdk 16 | run: | 17 | git clone https://github.com/emscripten-core/emsdk.git 18 | cd emsdk 19 | ./emsdk install $EMSCRIPTEN_VERSION 20 | ./emsdk activate $EMSCRIPTEN_VERSION 21 | 22 | - name: ncnn 23 | run: | 24 | wget -q https://github.com/Tencent/ncnn/releases/download/20230223/ncnn-20230223-webassembly.zip 25 | unzip -q ncnn-20230223-webassembly.zip 26 | 27 | - name: build 28 | run: | 29 | source emsdk/emsdk_env.sh 30 | mkdir build && cd build 31 | cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=basic .. 32 | make -j4 33 | cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=simd .. 34 | make -j4 35 | cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=threads .. 36 | make -j4 37 | cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=simd-threads .. 38 | make -j4 39 | 40 | - name: collect-deploy-files 41 | run: | 42 | mkdir deploy && cd deploy 43 | cp ../build/*.data . 44 | cp ../build/*.js . 45 | cp ../build/*.wasm . 46 | cp ../*.html . 47 | cp ../*.js . 48 | 49 | - name: deploy 50 | uses: JamesIves/github-pages-deploy-action@4.1.1 51 | with: 52 | branch: gh-pages 53 | folder: deploy 54 | single-commit: true 55 | -------------------------------------------------------------------------------- /nanodet.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #ifndef NANODET_H 16 | #define NANODET_H 17 | 18 | #include 19 | #include 20 | 21 | struct Object 22 | { 23 | cv::Rect_ rect; 24 | int label; 25 | float prob; 26 | }; 27 | 28 | class NanoDet 29 | { 30 | public: 31 | NanoDet(); 32 | 33 | int load(const char* modeltype, bool use_gpu = false); 34 | 35 | int detect(const cv::Mat& rgba, std::vector& objects, float prob_threshold = 0.4f, float nms_threshold = 0.5f); 36 | 37 | int draw(cv::Mat& rgba, const std::vector& objects); 38 | 39 | private: 40 | ncnn::Net nanodet; 41 | int target_size = 416; 42 | std::vector strides = {8, 16, 32, 64}; 43 | std::vector class_names{ 44 | "person", "bicycle", "car", "motorcycle", "airplane", "bus", 45 | "train", "truck", "boat", "traffic light", "fire hydrant", 46 | "stop sign", "parking meter", "bench", "bird", "cat", "dog", 47 | "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", 48 | "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", 49 | "skis", "snowboard", "sports ball", "kite", "baseball bat", 50 | "baseball glove", "skateboard", "surfboard", "tennis racket", 51 | "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", 52 | "banana", "apple", "sandwich", "orange", "broccoli", "carrot", 53 | "hot dog", "pizza", "donut", "cake", "chair", "couch", 54 | "potted plant", "bed", "dining table", "toilet", "tv", "laptop", 55 | "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", 56 | "toaster", "sink", "refrigerator", "book", "clock", "vase", 57 | "scissors", "teddy bear", "hair drier", "toothbrush"}; 58 | float mean_vals[3] = {103.53f, 116.28f, 123.675f}; 59 | float norm_vals[3] = {1.f / 57.375f, 1.f / 57.12f, 1.f / 58.395f}; 60 | ncnn::UnlockedPoolAllocator blob_pool_allocator; 61 | ncnn::PoolAllocator workspace_pool_allocator; 62 | }; 63 | 64 | #endif // NANODET_H 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ncnn-webassembly-nanodet 2 | 3 | # 更新 nanodet-plus 版本 4 | 5 | 站在巨人的肩膀上,感谢 nihui 大佬 6 | 7 | 参考 https://github.com/Tencent/ncnn/blob/master/examples/nanodetplus_pnnx.cpp 修改了 nanodet-plus 版本的 Web 版 8 | 9 | open https://nihui.github.io/ncnn-webassembly-nanodet and enjoy 10 | 11 | # build and deploy 12 | 13 | 1. Install emscripten 14 | 15 | ```shell 16 | git clone https://github.com/emscripten-core/emsdk.git 17 | cd emsdk 18 | ./emsdk install 3.1.28 19 | ./emsdk activate 3.1.28 20 | 21 | source emsdk/emsdk_env.sh 22 | ``` 23 | 24 | 2. clone this repo 25 | 26 | ```shell 27 | git clone https://github.com/nihui/ncnn-webassembly-nanodet.git 28 | ``` 29 | 30 | 3. Enter the repo direcroty and Download and extract ncnn webassembly package 31 | 32 | ```shell 33 | wget https://github.com/Tencent/ncnn/releases/download/20230223/ncnn-20230223-webassembly.zip 34 | unzip ncnn-20230223-webassembly.zip 35 | ``` 36 | 37 | 4. Build four WASM feature variants 38 | 39 | ```shell 40 | mkdir build # in ncnn-webassembly-nanodet directory 41 | cd build 42 | cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=basic .. 43 | make -j4 44 | cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=simd .. 45 | make -j4 46 | cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=threads .. 47 | make -j4 48 | cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=simd-threads .. 49 | make -j4 50 | ``` 51 | 52 | 5. Deploy the _.data _.js _.wasm and _.html files to your web server(you can find them in /build directory and this repo) 53 | 54 | ``` 55 | # deploy files 56 | deploy/ 57 | ├── index.html 58 | ├── nanodet-basic.data 59 | ├── nanodet-basic.js 60 | ├── nanodet-basic.wasm 61 | ├── nanodet-simd.data 62 | ├── nanodet-simd.js 63 | ├── nanodet-simd-threads.data 64 | ├── nanodet-simd-threads.js 65 | ├── nanodet-simd-threads.wasm 66 | ├── nanodet-simd-threads.worker.js 67 | ├── nanodet-simd.wasm 68 | ├── nanodet-threads.data 69 | ├── nanodet-threads.js 70 | ├── nanodet-threads.wasm 71 | ├── nanodet-threads.worker.js 72 | └── wasmFeatureDetect.js 73 | ``` 74 | 75 | 6. Deploy local server(python3 as a example) 76 | 77 | ``` 78 | python3 -m http.server --directory deploy 79 | ``` 80 | 81 | 7. Access local server(chrome as a example) 82 | 83 | ``` 84 | # launch chrome browser, enter following command to address bar and press ENTER: 85 | chrome://flags/#unsafely-treat-insecure-origin-as-secure 86 | 87 | # enter following keyword to "Search flags" and press ENTER: 88 | "insecure origins" 89 | you will find "Insecure origins treated as secure" key 90 | 91 | #enter local server url and click right side dropdown list, select "Enabled" 92 | url example: http://192.168.1.100:8000 93 | 94 | #relaunch chrome browser and access http://192.168.1.100:8000 (replace 192.168.1.100 with your local ip) 95 | ``` 96 | -------------------------------------------------------------------------------- /wasmFeatureDetect.js: -------------------------------------------------------------------------------- 1 | !(function (e, n) { 2 | "object" == typeof exports && "undefined" != typeof module 3 | ? (module.exports = n()) 4 | : "function" == typeof define && define.amd 5 | ? define(n) 6 | : ((e = e || self).wasmFeatureDetect = n()); 7 | })(this, function () { 8 | "use strict"; 9 | return { 10 | bigInt: () => 11 | (async (e) => { 12 | try { 13 | return ( 14 | (await WebAssembly.instantiate(e)).instance.exports.b(BigInt(0)) === 15 | BigInt(0) 16 | ); 17 | } catch (e) { 18 | return !1; 19 | } 20 | })( 21 | new Uint8Array([ 22 | 0, 97, 115, 109, 1, 0, 0, 0, 1, 6, 1, 96, 1, 126, 1, 126, 3, 2, 1, 0, 23 | 7, 5, 1, 1, 98, 0, 0, 10, 6, 1, 4, 0, 32, 0, 11, 24 | ]) 25 | ), 26 | bulkMemory: async () => 27 | WebAssembly.validate( 28 | new Uint8Array([ 29 | 0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 5, 3, 1, 30 | 0, 1, 10, 14, 1, 12, 0, 65, 0, 65, 0, 65, 0, 252, 10, 0, 0, 11, 31 | ]) 32 | ), 33 | exceptions: async () => 34 | WebAssembly.validate( 35 | new Uint8Array([ 36 | 0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 10, 8, 1, 37 | 6, 0, 6, 64, 25, 11, 11, 38 | ]) 39 | ), 40 | multiValue: async () => 41 | WebAssembly.validate( 42 | new Uint8Array([ 43 | 0, 97, 115, 109, 1, 0, 0, 0, 1, 6, 1, 96, 0, 2, 127, 127, 3, 2, 1, 0, 44 | 10, 8, 1, 6, 0, 65, 0, 65, 0, 11, 45 | ]) 46 | ), 47 | mutableGlobals: async () => 48 | WebAssembly.validate( 49 | new Uint8Array([ 50 | 0, 97, 115, 109, 1, 0, 0, 0, 2, 8, 1, 1, 97, 1, 98, 3, 127, 1, 6, 6, 51 | 1, 127, 1, 65, 0, 11, 7, 5, 1, 1, 97, 3, 1, 52 | ]) 53 | ), 54 | referenceTypes: async () => 55 | WebAssembly.validate( 56 | new Uint8Array([ 57 | 0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 10, 7, 1, 58 | 5, 0, 208, 112, 26, 11, 59 | ]) 60 | ), 61 | saturatedFloatToInt: async () => 62 | WebAssembly.validate( 63 | new Uint8Array([ 64 | 0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 10, 12, 1, 65 | 10, 0, 67, 0, 0, 0, 0, 252, 0, 26, 11, 66 | ]) 67 | ), 68 | signExtensions: async () => 69 | WebAssembly.validate( 70 | new Uint8Array([ 71 | 0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 10, 8, 1, 72 | 6, 0, 65, 0, 192, 26, 11, 73 | ]) 74 | ), 75 | simd: async () => 76 | WebAssembly.validate( 77 | new Uint8Array([ 78 | 0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 10, 9, 1, 79 | 7, 0, 65, 0, 253, 15, 26, 11, 80 | ]) 81 | ), 82 | tailCall: async () => 83 | WebAssembly.validate( 84 | new Uint8Array([ 85 | 0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 10, 6, 1, 86 | 4, 0, 18, 0, 11, 87 | ]) 88 | ), 89 | threads: () => 90 | (async (e) => { 91 | try { 92 | return ( 93 | "undefined" != typeof MessageChannel && 94 | new MessageChannel().port1.postMessage(new SharedArrayBuffer(1)), 95 | WebAssembly.validate(e) 96 | ); 97 | } catch (e) { 98 | return !1; 99 | } 100 | })( 101 | new Uint8Array([ 102 | 0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 5, 4, 1, 103 | 3, 1, 1, 10, 11, 1, 9, 0, 65, 0, 254, 16, 2, 0, 26, 11, 104 | ]) 105 | ), 106 | }; 107 | }); 108 | -------------------------------------------------------------------------------- /nanodetncnn.cpp: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #include 16 | #include 17 | #include "nanodet.h" 18 | 19 | static int draw_fps(cv::Mat& rgba) 20 | { 21 | // resolve moving average 22 | float avg_fps = 0.f; 23 | { 24 | static double t0 = 0.f; 25 | static float fps_history[10] = {0.f}; 26 | 27 | double t1 = ncnn::get_current_time(); 28 | if (t0 == 0.f) 29 | { 30 | t0 = t1; 31 | return 0; 32 | } 33 | 34 | float fps = 1000.f / (t1 - t0); 35 | t0 = t1; 36 | 37 | for (int i = 9; i >= 1; i--) 38 | { 39 | fps_history[i] = fps_history[i - 1]; 40 | } 41 | fps_history[0] = fps; 42 | 43 | if (fps_history[9] == 0.f) 44 | { 45 | return 0; 46 | } 47 | 48 | for (int i = 0; i < 10; i++) 49 | { 50 | avg_fps += fps_history[i]; 51 | } 52 | avg_fps /= 10.f; 53 | } 54 | 55 | char text[32]; 56 | sprintf(text, "FPS=%.2f", avg_fps); 57 | 58 | int baseLine = 0; 59 | cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); 60 | 61 | int y = 0; 62 | int x = rgba.cols - label_size.width; 63 | 64 | cv::rectangle(rgba, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), 65 | cv::Scalar(255, 255, 255, 255), -1); 66 | 67 | cv::putText(rgba, text, cv::Point(x, y + label_size.height), 68 | cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0, 255)); 69 | 70 | return 0; 71 | } 72 | 73 | static NanoDet* g_nanodet = 0; 74 | 75 | static void on_image_render(cv::Mat& rgba) 76 | { 77 | if (!g_nanodet) 78 | { 79 | g_nanodet = new NanoDet; 80 | g_nanodet->load("coco.torchscript.ncnn"); 81 | } 82 | 83 | std::vector objects; 84 | g_nanodet->detect(rgba, objects); 85 | 86 | g_nanodet->draw(rgba, objects); 87 | 88 | draw_fps(rgba); 89 | } 90 | 91 | #ifdef __EMSCRIPTEN_PTHREADS__ 92 | 93 | static const unsigned char* rgba_data = 0; 94 | static int w = 0; 95 | static int h = 0; 96 | 97 | static ncnn::Mutex lock; 98 | static ncnn::ConditionVariable condition; 99 | 100 | static ncnn::Mutex finish_lock; 101 | static ncnn::ConditionVariable finish_condition; 102 | 103 | static void worker() 104 | { 105 | while (1) 106 | { 107 | lock.lock(); 108 | while (rgba_data == 0) 109 | { 110 | condition.wait(lock); 111 | } 112 | 113 | cv::Mat rgba(h, w, CV_8UC4, (void*)rgba_data); 114 | 115 | on_image_render(rgba); 116 | 117 | rgba_data = 0; 118 | 119 | lock.unlock(); 120 | 121 | finish_lock.lock(); 122 | finish_condition.signal(); 123 | finish_lock.unlock(); 124 | } 125 | } 126 | 127 | #include 128 | static std::thread t(worker); 129 | 130 | extern "C" { 131 | 132 | void nanodet_ncnn(unsigned char* _rgba_data, int _w, int _h) 133 | { 134 | lock.lock(); 135 | while (rgba_data != 0) 136 | { 137 | condition.wait(lock); 138 | } 139 | 140 | rgba_data = _rgba_data; 141 | w = _w; 142 | h = _h; 143 | 144 | lock.unlock(); 145 | 146 | condition.signal(); 147 | 148 | // wait for finished 149 | finish_lock.lock(); 150 | while (rgba_data != 0) 151 | { 152 | finish_condition.wait(finish_lock); 153 | } 154 | finish_lock.unlock(); 155 | } 156 | 157 | } 158 | 159 | #else // __EMSCRIPTEN_PTHREADS__ 160 | 161 | extern "C" { 162 | 163 | void nanodet_ncnn(unsigned char* rgba_data, int w, int h) 164 | { 165 | cv::Mat rgba(h, w, CV_8UC4, (void*)rgba_data); 166 | 167 | on_image_render(rgba); 168 | } 169 | 170 | } 171 | 172 | #endif // __EMSCRIPTEN_PTHREADS__ 173 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | ncnn webassembly nanodet 7 | 16 | 17 | 18 | 19 | 20 |
21 |

ncnn webassembly nanodet

22 |
23 | 24 |
25 |
26 | 27 |
28 | 29 |
30 | 31 | 32 | 33 | 218 | 219 | 220 | 221 | 222 | -------------------------------------------------------------------------------- /nanodet.cpp: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #include "nanodet.h" 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | static inline float intersection_area(const Object& a, const Object& b) 22 | { 23 | cv::Rect_ inter = a.rect & b.rect; 24 | return inter.area(); 25 | } 26 | 27 | static void qsort_descent_inplace(std::vector& faceobjects, int left, int right) 28 | { 29 | int i = left; 30 | int j = right; 31 | float p = faceobjects[(left + right) / 2].prob; 32 | 33 | while (i <= j) 34 | { 35 | while (faceobjects[i].prob > p) 36 | i++; 37 | 38 | while (faceobjects[j].prob < p) 39 | j--; 40 | 41 | if (i <= j) 42 | { 43 | // swap 44 | std::swap(faceobjects[i], faceobjects[j]); 45 | 46 | i++; 47 | j--; 48 | } 49 | } 50 | 51 | // #pragma omp parallel sections 52 | { 53 | // #pragma omp section 54 | { 55 | if (left < j) qsort_descent_inplace(faceobjects, left, j); 56 | } 57 | // #pragma omp section 58 | { 59 | if (i < right) qsort_descent_inplace(faceobjects, i, right); 60 | } 61 | } 62 | } 63 | 64 | static void qsort_descent_inplace(std::vector& faceobjects) 65 | { 66 | if (faceobjects.empty()) 67 | return; 68 | 69 | qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1); 70 | } 71 | 72 | static void nms_sorted_bboxes(const std::vector& faceobjects, std::vector& picked, float nms_threshold) 73 | { 74 | picked.clear(); 75 | 76 | const int n = faceobjects.size(); 77 | 78 | std::vector areas(n); 79 | for (int i = 0; i < n; i++) 80 | { 81 | areas[i] = faceobjects[i].rect.width * faceobjects[i].rect.height; 82 | } 83 | 84 | for (int i = 0; i < n; i++) 85 | { 86 | const Object& a = faceobjects[i]; 87 | 88 | int keep = 1; 89 | for (int j = 0; j < (int)picked.size(); j++) 90 | { 91 | const Object& b = faceobjects[picked[j]]; 92 | 93 | // intersection over union 94 | float inter_area = intersection_area(a, b); 95 | float union_area = areas[i] + areas[picked[j]] - inter_area; 96 | if (inter_area / union_area > nms_threshold) 97 | keep = 0; 98 | } 99 | 100 | if (keep) 101 | picked.push_back(i); 102 | } 103 | } 104 | 105 | static inline float sigmoid(float x) 106 | { 107 | return 1.0f / (1.0f + exp(-x)); 108 | } 109 | 110 | static void generate_proposals(const ncnn::Mat& pred, int stride, int _num_class, const ncnn::Mat& in_pad, float prob_threshold, std::vector& objects) 111 | { 112 | const int num_grid = pred.h; 113 | 114 | int num_grid_x = pred.w; 115 | int num_grid_y = pred.h; 116 | 117 | const int num_class = _num_class; // number of classes. 80 for COCO 118 | const int reg_max_1 = (pred.c - num_class) / 4; 119 | 120 | for (int i = 0; i < num_grid_y; i++) 121 | { 122 | for (int j = 0; j < num_grid_x; j++) 123 | { 124 | // find label with max score 125 | int label = -1; 126 | float score = -FLT_MAX; 127 | for (int k = 0; k < num_class; k++) 128 | { 129 | float s = pred.channel(k).row(i)[j]; 130 | if (s > score) 131 | { 132 | label = k; 133 | score = s; 134 | } 135 | } 136 | 137 | score = sigmoid(score); 138 | if (score >= prob_threshold) 139 | { 140 | ncnn::Mat bbox_pred(reg_max_1, 4); 141 | for (int k = 0; k < reg_max_1 * 4; k++) 142 | { 143 | bbox_pred[k] = pred.channel(num_class + k).row(i)[j]; 144 | } 145 | { 146 | ncnn::Layer* softmax = ncnn::create_layer("Softmax"); 147 | 148 | ncnn::ParamDict pd; 149 | pd.set(0, 1); // axis 150 | pd.set(1, 1); 151 | softmax->load_param(pd); 152 | 153 | ncnn::Option opt; 154 | opt.num_threads = 1; 155 | opt.use_packing_layout = false; 156 | 157 | softmax->create_pipeline(opt); 158 | 159 | softmax->forward_inplace(bbox_pred, opt); 160 | 161 | softmax->destroy_pipeline(opt); 162 | 163 | delete softmax; 164 | } 165 | 166 | float pred_ltrb[4]; 167 | for (int k = 0; k < 4; k++) 168 | { 169 | float dis = 0.f; 170 | const float* dis_after_sm = bbox_pred.row(k); 171 | for (int l = 0; l < reg_max_1; l++) 172 | { 173 | dis += l * dis_after_sm[l]; 174 | } 175 | 176 | pred_ltrb[k] = dis * stride; 177 | } 178 | 179 | float pb_cx = j * stride; 180 | float pb_cy = i * stride; 181 | 182 | float x0 = pb_cx - pred_ltrb[0]; 183 | float y0 = pb_cy - pred_ltrb[1]; 184 | float x1 = pb_cx + pred_ltrb[2]; 185 | float y1 = pb_cy + pred_ltrb[3]; 186 | 187 | Object obj; 188 | obj.rect.x = x0; 189 | obj.rect.y = y0; 190 | obj.rect.width = x1 - x0; 191 | obj.rect.height = y1 - y0; 192 | obj.label = label; 193 | obj.prob = score; 194 | 195 | objects.push_back(obj); 196 | } 197 | } 198 | } 199 | } 200 | 201 | NanoDet::NanoDet() 202 | { 203 | blob_pool_allocator.set_size_compare_ratio(0.f); 204 | workspace_pool_allocator.set_size_compare_ratio(0.f); 205 | } 206 | 207 | int NanoDet::load(const char* modeltype, bool use_gpu) 208 | { 209 | nanodet.clear(); 210 | blob_pool_allocator.clear(); 211 | workspace_pool_allocator.clear(); 212 | 213 | ncnn::set_cpu_powersave(2); 214 | ncnn::set_omp_num_threads(ncnn::get_big_cpu_count()); 215 | 216 | nanodet.opt = ncnn::Option(); 217 | 218 | #if NCNN_VULKAN 219 | nanodet.opt.use_vulkan_compute = use_gpu; 220 | #endif 221 | 222 | nanodet.opt.num_threads = ncnn::get_big_cpu_count(); 223 | nanodet.opt.blob_allocator = &blob_pool_allocator; 224 | nanodet.opt.workspace_allocator = &workspace_pool_allocator; 225 | 226 | char parampath[256]; 227 | char modelpath[256]; 228 | sprintf(parampath, "%s.param", modeltype); 229 | sprintf(modelpath, "%s.bin", modeltype); 230 | 231 | nanodet.load_param(parampath); 232 | nanodet.load_model(modelpath); 233 | 234 | return 0; 235 | } 236 | 237 | int NanoDet::detect(const cv::Mat& rgba, std::vector& objects, float prob_threshold, float nms_threshold) 238 | { 239 | int width = rgba.cols; 240 | int height = rgba.rows; 241 | 242 | // pad to multiple of 32 243 | int num_classes = class_names.size(); 244 | int w = width; 245 | int h = height; 246 | float scale = 1.f; 247 | if (w > h) 248 | { 249 | scale = (float)target_size / w; 250 | w = target_size; 251 | h = h * scale; 252 | } 253 | else 254 | { 255 | scale = (float)target_size / h; 256 | h = target_size; 257 | w = w * scale; 258 | } 259 | 260 | ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgba.data, ncnn::Mat::PIXEL_RGBA2BGR, width, height, w, h); 261 | 262 | // pad to target_size rectangle 263 | int wpad = (w + 31) / 32 * 32 - w; 264 | int hpad = (h + 31) / 32 * 32 - h; 265 | ncnn::Mat in_pad; 266 | ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); 267 | 268 | in_pad.substract_mean_normalize(mean_vals, norm_vals); 269 | 270 | ncnn::Extractor ex = nanodet.create_extractor(); 271 | 272 | ex.input("in0", in_pad); 273 | 274 | std::vector proposals; 275 | 276 | // stride 8 277 | { 278 | ncnn::Mat pred; 279 | ex.extract("231", pred); 280 | 281 | std::vector objects8; 282 | generate_proposals(pred, 8, num_classes, in_pad, prob_threshold, objects8); 283 | 284 | proposals.insert(proposals.end(), objects8.begin(), objects8.end()); 285 | } 286 | 287 | // stride 16 288 | { 289 | ncnn::Mat pred; 290 | ex.extract("228", pred); 291 | 292 | std::vector objects16; 293 | generate_proposals(pred, 16, num_classes, in_pad, prob_threshold, objects16); 294 | 295 | proposals.insert(proposals.end(), objects16.begin(), objects16.end()); 296 | } 297 | 298 | // stride 32 299 | { 300 | ncnn::Mat pred; 301 | ex.extract("225", pred); 302 | 303 | std::vector objects32; 304 | generate_proposals(pred, 32, num_classes, in_pad, prob_threshold, objects32); 305 | 306 | proposals.insert(proposals.end(), objects32.begin(), objects32.end()); 307 | } 308 | 309 | // stride 64 310 | { 311 | ncnn::Mat pred; 312 | ex.extract("222", pred); 313 | 314 | std::vector objects64; 315 | generate_proposals(pred, 64, num_classes, in_pad, prob_threshold, objects64); 316 | 317 | proposals.insert(proposals.end(), objects64.begin(), objects64.end()); 318 | } 319 | 320 | // sort all proposals by score from highest to lowest 321 | qsort_descent_inplace(proposals); 322 | 323 | // apply nms with nms_threshold 324 | std::vector picked; 325 | nms_sorted_bboxes(proposals, picked, nms_threshold); 326 | 327 | int count = picked.size(); 328 | 329 | objects.resize(count); 330 | for (int i = 0; i < count; i++) 331 | { 332 | objects[i] = proposals[picked[i]]; 333 | 334 | // adjust offset to original unpadded 335 | float x0 = (objects[i].rect.x - (wpad / 2)) / scale; 336 | float y0 = (objects[i].rect.y - (hpad / 2)) / scale; 337 | float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale; 338 | float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale; 339 | 340 | // clip 341 | x0 = std::max(std::min(x0, (float)(width - 1)), 0.f); 342 | y0 = std::max(std::min(y0, (float)(height - 1)), 0.f); 343 | x1 = std::max(std::min(x1, (float)(width - 1)), 0.f); 344 | y1 = std::max(std::min(y1, (float)(height - 1)), 0.f); 345 | 346 | objects[i].rect.x = x0; 347 | objects[i].rect.y = y0; 348 | objects[i].rect.width = x1 - x0; 349 | objects[i].rect.height = y1 - y0; 350 | } 351 | 352 | // sort objects by area 353 | struct 354 | { 355 | bool operator()(const Object& a, const Object& b) const 356 | { 357 | return a.rect.area() > b.rect.area(); 358 | } 359 | } objects_area_greater; 360 | std::sort(objects.begin(), objects.end(), objects_area_greater); 361 | 362 | return 0; 363 | } 364 | 365 | int NanoDet::draw(cv::Mat& rgba, const std::vector& objects) 366 | { 367 | static const unsigned char colors[19][3] = { 368 | { 54, 67, 244}, 369 | { 99, 30, 233}, 370 | {176, 39, 156}, 371 | {183, 58, 103}, 372 | {181, 81, 63}, 373 | {243, 150, 33}, 374 | {244, 169, 3}, 375 | {212, 188, 0}, 376 | {136, 150, 0}, 377 | { 80, 175, 76}, 378 | { 74, 195, 139}, 379 | { 57, 220, 205}, 380 | { 59, 235, 255}, 381 | { 7, 193, 255}, 382 | { 0, 152, 255}, 383 | { 34, 87, 255}, 384 | { 72, 85, 121}, 385 | {158, 158, 158}, 386 | {139, 125, 96} 387 | }; 388 | 389 | int color_index = 0; 390 | 391 | for (size_t i = 0; i < objects.size(); i++) 392 | { 393 | const Object& obj = objects[i]; 394 | 395 | // fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, 396 | // obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); 397 | 398 | const unsigned char* color = colors[color_index % 19]; 399 | color_index++; 400 | 401 | cv::Scalar cc(color[0], color[1], color[2], 255); 402 | 403 | cv::rectangle(rgba, cv::Rect(obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height), cc, 2); 404 | 405 | char text[256]; 406 | sprintf(text, "%s %.1f%%", class_names[obj.label].c_str(), obj.prob * 100); 407 | 408 | int baseLine = 0; 409 | cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); 410 | 411 | int x = obj.rect.x; 412 | int y = obj.rect.y - label_size.height - baseLine; 413 | if (y < 0) 414 | y = 0; 415 | if (x + label_size.width > rgba.cols) 416 | x = rgba.cols - label_size.width; 417 | 418 | cv::rectangle(rgba, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), cc, -1); 419 | 420 | cv::Scalar textcc = (color[0] + color[1] + color[2] >= 381) ? cv::Scalar(0, 0, 0, 255) : cv::Scalar(255, 255, 255, 255); 421 | 422 | cv::putText(rgba, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.5, textcc, 1); 423 | } 424 | 425 | return 0; 426 | } 427 | -------------------------------------------------------------------------------- /assets/coco.torchscript.ncnn.param: -------------------------------------------------------------------------------- 1 | 7767517 2 | 201 237 3 | Input in0 0 1 in0 4 | Convolution convrelu_0 1 1 in0 1 0=24 1=3 3=2 4=1 5=1 6=648 9=2 -23310=1,1.000000e-01 5 | Pooling maxpool2d_154 1 1 1 2 1=3 2=2 3=1 5=1 6 | Split splitncnn_0 1 2 2 3 4 7 | ConvolutionDepthWise convdw_162 1 1 4 5 0=24 1=3 3=2 4=1 5=1 6=216 7=24 8 | Convolution convrelu_1 1 1 3 6 0=58 1=1 5=1 6=1392 9=2 -23310=1,1.000000e-01 9 | ConvolutionDepthWise convdw_163 1 1 6 7 0=58 1=3 3=2 4=1 5=1 6=522 7=58 10 | Convolution convrelu_3 1 1 5 8 0=58 1=1 5=1 6=1392 9=2 -23310=1,1.000000e-01 11 | Convolution convrelu_2 1 1 7 9 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01 12 | Concat cat_0 2 1 8 9 10 13 | ShuffleChannel channelshuffle_0 1 1 10 11 0=2 14 | Slice chunk_0 1 2 11 12 13 -23300=2,-233,-233 15 | Convolution convrelu_4 1 1 13 14 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01 16 | ConvolutionDepthWise convdw_164 1 1 14 15 0=58 1=3 4=1 5=1 6=522 7=58 17 | Convolution convrelu_5 1 1 15 16 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01 18 | Concat cat_1 2 1 12 16 17 19 | ShuffleChannel channelshuffle_1 1 1 17 18 0=2 20 | Slice chunk_1 1 2 18 19 20 -23300=2,-233,-233 21 | Convolution convrelu_6 1 1 20 21 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01 22 | ConvolutionDepthWise convdw_165 1 1 21 22 0=58 1=3 4=1 5=1 6=522 7=58 23 | Convolution convrelu_7 1 1 22 23 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01 24 | Concat cat_2 2 1 19 23 24 25 | ShuffleChannel channelshuffle_2 1 1 24 25 0=2 26 | Slice chunk_2 1 2 25 26 27 -23300=2,-233,-233 27 | Convolution convrelu_8 1 1 27 28 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01 28 | ConvolutionDepthWise convdw_166 1 1 28 29 0=58 1=3 4=1 5=1 6=522 7=58 29 | Convolution convrelu_9 1 1 29 30 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01 30 | Concat cat_3 2 1 26 30 31 31 | ShuffleChannel channelshuffle_3 1 1 31 32 0=2 32 | Split splitncnn_1 1 3 32 33 34 35 33 | ConvolutionDepthWise convdw_167 1 1 35 36 0=116 1=3 3=2 4=1 5=1 6=1044 7=116 34 | Convolution convrelu_10 1 1 34 37 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 35 | ConvolutionDepthWise convdw_168 1 1 37 38 0=116 1=3 3=2 4=1 5=1 6=1044 7=116 36 | Convolution convrelu_12 1 1 36 39 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 37 | Convolution convrelu_11 1 1 38 40 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 38 | Concat cat_4 2 1 39 40 41 39 | ShuffleChannel channelshuffle_4 1 1 41 42 0=2 40 | Slice chunk_3 1 2 42 43 44 -23300=2,-233,-233 41 | Convolution convrelu_13 1 1 44 45 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 42 | ConvolutionDepthWise convdw_169 1 1 45 46 0=116 1=3 4=1 5=1 6=1044 7=116 43 | Convolution convrelu_14 1 1 46 47 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 44 | Concat cat_5 2 1 43 47 48 45 | ShuffleChannel channelshuffle_5 1 1 48 49 0=2 46 | Slice chunk_4 1 2 49 50 51 -23300=2,-233,-233 47 | Convolution convrelu_15 1 1 51 52 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 48 | ConvolutionDepthWise convdw_170 1 1 52 53 0=116 1=3 4=1 5=1 6=1044 7=116 49 | Convolution convrelu_16 1 1 53 54 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 50 | Concat cat_6 2 1 50 54 55 51 | ShuffleChannel channelshuffle_6 1 1 55 56 0=2 52 | Slice chunk_5 1 2 56 57 58 -23300=2,-233,-233 53 | Convolution convrelu_17 1 1 58 59 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 54 | ConvolutionDepthWise convdw_171 1 1 59 60 0=116 1=3 4=1 5=1 6=1044 7=116 55 | Convolution convrelu_18 1 1 60 61 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 56 | Concat cat_7 2 1 57 61 62 57 | ShuffleChannel channelshuffle_7 1 1 62 63 0=2 58 | Slice chunk_6 1 2 63 64 65 -23300=2,-233,-233 59 | Convolution convrelu_19 1 1 65 66 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 60 | ConvolutionDepthWise convdw_172 1 1 66 67 0=116 1=3 4=1 5=1 6=1044 7=116 61 | Convolution convrelu_20 1 1 67 68 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 62 | Concat cat_8 2 1 64 68 69 63 | ShuffleChannel channelshuffle_8 1 1 69 70 0=2 64 | Slice chunk_7 1 2 70 71 72 -23300=2,-233,-233 65 | Convolution convrelu_21 1 1 72 73 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 66 | ConvolutionDepthWise convdw_173 1 1 73 74 0=116 1=3 4=1 5=1 6=1044 7=116 67 | Convolution convrelu_22 1 1 74 75 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 68 | Concat cat_9 2 1 71 75 76 69 | ShuffleChannel channelshuffle_9 1 1 76 77 0=2 70 | Slice chunk_8 1 2 77 78 79 -23300=2,-233,-233 71 | Convolution convrelu_23 1 1 79 80 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 72 | ConvolutionDepthWise convdw_174 1 1 80 81 0=116 1=3 4=1 5=1 6=1044 7=116 73 | Convolution convrelu_24 1 1 81 82 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 74 | Concat cat_10 2 1 78 82 83 75 | ShuffleChannel channelshuffle_10 1 1 83 84 0=2 76 | Slice chunk_9 1 2 84 85 86 -23300=2,-233,-233 77 | Convolution convrelu_25 1 1 86 87 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 78 | ConvolutionDepthWise convdw_175 1 1 87 88 0=116 1=3 4=1 5=1 6=1044 7=116 79 | Convolution convrelu_26 1 1 88 89 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01 80 | Concat cat_11 2 1 85 89 90 81 | ShuffleChannel channelshuffle_11 1 1 90 91 0=2 82 | Split splitncnn_2 1 3 91 92 93 94 83 | ConvolutionDepthWise convdw_176 1 1 94 95 0=232 1=3 3=2 4=1 5=1 6=2088 7=232 84 | Convolution convrelu_27 1 1 93 96 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01 85 | ConvolutionDepthWise convdw_177 1 1 96 97 0=232 1=3 3=2 4=1 5=1 6=2088 7=232 86 | Convolution convrelu_29 1 1 95 98 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01 87 | Convolution convrelu_28 1 1 97 99 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01 88 | Concat cat_12 2 1 98 99 100 89 | ShuffleChannel channelshuffle_12 1 1 100 101 0=2 90 | Slice chunk_10 1 2 101 102 103 -23300=2,-233,-233 91 | Convolution convrelu_30 1 1 103 104 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01 92 | ConvolutionDepthWise convdw_178 1 1 104 105 0=232 1=3 4=1 5=1 6=2088 7=232 93 | Convolution convrelu_31 1 1 105 106 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01 94 | Concat cat_13 2 1 102 106 107 95 | ShuffleChannel channelshuffle_13 1 1 107 108 0=2 96 | Slice chunk_11 1 2 108 109 110 -23300=2,-233,-233 97 | Convolution convrelu_32 1 1 110 111 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01 98 | ConvolutionDepthWise convdw_179 1 1 111 112 0=232 1=3 4=1 5=1 6=2088 7=232 99 | Convolution convrelu_33 1 1 112 113 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01 100 | Concat cat_14 2 1 109 113 114 101 | ShuffleChannel channelshuffle_14 1 1 114 115 0=2 102 | Slice chunk_12 1 2 115 116 117 -23300=2,-233,-233 103 | Convolution convrelu_34 1 1 117 118 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01 104 | ConvolutionDepthWise convdw_180 1 1 118 119 0=232 1=3 4=1 5=1 6=2088 7=232 105 | Convolution convrelu_35 1 1 119 120 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01 106 | Concat cat_15 2 1 116 120 121 107 | ShuffleChannel channelshuffle_15 1 1 121 122 0=2 108 | Convolution convrelu_36 1 1 122 123 0=96 1=1 5=1 6=44544 9=2 -23310=1,1.000000e-01 109 | Split splitncnn_3 1 3 123 124 125 126 110 | Interp upsample_155 1 1 125 127 0=2 1=2.000000e+00 2=2.000000e+00 111 | Convolution convrelu_37 1 1 92 128 0=96 1=1 5=1 6=22272 9=2 -23310=1,1.000000e-01 112 | Concat cat_16 2 1 127 128 129 113 | Split splitncnn_4 1 2 129 130 131 114 | Convolution convrelu_38 1 1 130 132 0=48 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 115 | Split splitncnn_5 1 2 132 133 134 116 | ConvolutionDepthWise convdwrelu_0 1 1 134 135 0=48 1=3 4=1 5=1 6=432 7=48 9=2 -23310=1,1.000000e-01 117 | Concat cat_17 2 1 133 135 136 118 | Convolution conv_56 1 1 136 137 0=48 1=1 5=1 6=4608 119 | Split splitncnn_6 1 2 137 138 139 120 | ConvolutionDepthWise convdw_182 1 1 139 140 0=48 1=3 4=1 5=1 6=432 7=48 121 | Concat cat_18 2 1 138 140 141 122 | ConvolutionDepthWise convdw_183 1 1 131 142 0=192 1=5 4=2 5=1 6=4800 7=192 123 | Convolution conv_57 1 1 142 143 0=96 1=1 5=1 6=18432 124 | BinaryOp add_0 2 1 141 143 144 125 | Split splitncnn_7 1 2 144 145 146 126 | Interp upsample_156 1 1 146 147 0=2 1=2.000000e+00 2=2.000000e+00 127 | Convolution convrelu_39 1 1 33 148 0=96 1=1 5=1 6=11136 9=2 -23310=1,1.000000e-01 128 | Concat cat_19 2 1 147 148 149 129 | Split splitncnn_8 1 2 149 150 151 130 | Convolution convrelu_40 1 1 150 152 0=48 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 131 | Split splitncnn_9 1 2 152 153 154 132 | ConvolutionDepthWise convdwrelu_1 1 1 154 155 0=48 1=3 4=1 5=1 6=432 7=48 9=2 -23310=1,1.000000e-01 133 | Concat cat_20 2 1 153 155 156 134 | Convolution conv_59 1 1 156 157 0=48 1=1 5=1 6=4608 135 | Split splitncnn_10 1 2 157 158 159 136 | ConvolutionDepthWise convdw_185 1 1 159 160 0=48 1=3 4=1 5=1 6=432 7=48 137 | Concat cat_21 2 1 158 160 161 138 | ConvolutionDepthWise convdw_186 1 1 151 162 0=192 1=5 4=2 5=1 6=4800 7=192 139 | Convolution conv_60 1 1 162 163 0=96 1=1 5=1 6=18432 140 | BinaryOp add_1 2 1 161 163 164 141 | Split splitncnn_11 1 2 164 165 166 142 | ConvolutionDepthWise convdwrelu_2 1 1 166 167 0=96 1=5 3=2 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01 143 | Convolution convrelu_41 1 1 167 168 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 144 | Concat cat_22 2 1 168 145 169 145 | Split splitncnn_12 1 2 169 170 171 146 | Convolution convrelu_42 1 1 170 172 0=48 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 147 | Split splitncnn_13 1 2 172 173 174 148 | ConvolutionDepthWise convdwrelu_3 1 1 174 175 0=48 1=3 4=1 5=1 6=432 7=48 9=2 -23310=1,1.000000e-01 149 | Concat cat_23 2 1 173 175 176 150 | Convolution conv_63 1 1 176 177 0=48 1=1 5=1 6=4608 151 | Split splitncnn_14 1 2 177 178 179 152 | ConvolutionDepthWise convdw_189 1 1 179 180 0=48 1=3 4=1 5=1 6=432 7=48 153 | Concat cat_24 2 1 178 180 181 154 | ConvolutionDepthWise convdw_190 1 1 171 182 0=192 1=5 4=2 5=1 6=4800 7=192 155 | Convolution conv_64 1 1 182 183 0=96 1=1 5=1 6=18432 156 | BinaryOp add_2 2 1 181 183 184 157 | Split splitncnn_15 1 2 184 185 186 158 | ConvolutionDepthWise convdwrelu_4 1 1 186 187 0=96 1=5 3=2 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01 159 | Convolution convrelu_43 1 1 187 188 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 160 | Concat cat_25 2 1 188 124 189 161 | Split splitncnn_16 1 2 189 190 191 162 | Convolution convrelu_44 1 1 190 192 0=48 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 163 | Split splitncnn_17 1 2 192 193 194 164 | ConvolutionDepthWise convdwrelu_5 1 1 194 195 0=48 1=3 4=1 5=1 6=432 7=48 9=2 -23310=1,1.000000e-01 165 | Concat cat_26 2 1 193 195 196 166 | Convolution conv_67 1 1 196 197 0=48 1=1 5=1 6=4608 167 | Split splitncnn_18 1 2 197 198 199 168 | ConvolutionDepthWise convdw_193 1 1 199 200 0=48 1=3 4=1 5=1 6=432 7=48 169 | Concat cat_27 2 1 198 200 201 170 | ConvolutionDepthWise convdw_194 1 1 191 202 0=192 1=5 4=2 5=1 6=4800 7=192 171 | Convolution conv_68 1 1 202 203 0=96 1=1 5=1 6=18432 172 | BinaryOp add_3 2 1 201 203 204 173 | Split splitncnn_19 1 2 204 205 206 174 | ConvolutionDepthWise convdwrelu_6 1 1 126 207 0=96 1=5 3=2 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01 175 | Convolution convrelu_46 1 1 207 208 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 176 | ConvolutionDepthWise convdwrelu_7 1 1 206 209 0=96 1=5 3=2 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01 177 | Convolution convrelu_45 1 1 209 210 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 178 | BinaryOp add_4 2 1 208 210 211 179 | ConvolutionDepthWise convdwrelu_8 1 1 165 212 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01 180 | Convolution convrelu_47 1 1 212 213 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 181 | ConvolutionDepthWise convdwrelu_10 1 1 185 214 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01 182 | Convolution convrelu_48 1 1 214 215 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 183 | ConvolutionDepthWise convdwrelu_12 1 1 205 216 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01 184 | Convolution convrelu_49 1 1 216 217 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 185 | ConvolutionDepthWise convdwrelu_14 1 1 211 218 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01 186 | Convolution convrelu_50 1 1 218 219 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 187 | ConvolutionDepthWise convdwrelu_15 1 1 219 220 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01 188 | Convolution convrelu_51 1 1 220 221 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 189 | Convolution conv_82 1 1 221 222 0=112 1=1 5=1 6=10752 190 | ConvolutionDepthWise convdwrelu_13 1 1 217 223 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01 191 | Convolution convrelu_52 1 1 223 224 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 192 | Convolution conv_79 1 1 224 225 0=112 1=1 5=1 6=10752 193 | ConvolutionDepthWise convdwrelu_11 1 1 215 226 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01 194 | Convolution convrelu_53 1 1 226 227 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 195 | Convolution conv_76 1 1 227 228 0=112 1=1 5=1 6=10752 196 | ConvolutionDepthWise convdwrelu_9 1 1 213 229 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01 197 | Convolution convrelu_54 1 1 229 230 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01 198 | Convolution conv_73 1 1 230 231 0=112 1=1 5=1 6=10752 199 | Reshape flatten_160 1 1 231 232 0=-1 1=112 200 | Reshape flatten_159 1 1 228 233 0=-1 1=112 201 | Reshape flatten_158 1 1 225 234 0=-1 1=112 202 | Reshape flatten_157 1 1 222 235 0=-1 1=112 203 | Concat cat_28 4 1 232 233 234 235 out0 0=1 204 | --------------------------------------------------------------------------------