├── assets
    ├── coco.torchscript.ncnn.bin
    └── coco.torchscript.ncnn.param
├── .gitignore
├── CMakeLists.txt
├── .github
    └── workflows
    │   └── release.yml
├── nanodet.h
├── README.md
├── wasmFeatureDetect.js
├── nanodetncnn.cpp
├── index.html
└── nanodet.cpp


/assets/coco.torchscript.ncnn.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nihui/ncnn-webassembly-nanodet/HEAD/assets/coco.torchscript.ncnn.bin


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # IDE
 2 | .vscode
 3 | 
 4 | # CMake build directory
 5 | build/
 6 | 
 7 | # files
 8 | deploy/
 9 | model/
10 | ncnn-20220216-webassembly/
11 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(ncnn-webassembly-nanodet)
 2 | 
 3 | cmake_minimum_required(VERSION 3.10)
 4 | 
 5 | set(CMAKE_BUILD_TYPE release)
 6 | 
 7 | if(NOT WASM_FEATURE)
 8 |     message(FATAL_ERROR "You must pass cmake option -DWASM_FEATURE and possible values are basic, simd, threads and simd-threads")
 9 | endif()
10 | 
11 | set(ncnn_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ncnn-20230223-webassembly/${WASM_FEATURE}/lib/cmake/ncnn")
12 | find_package(ncnn REQUIRED)
13 | 
14 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=256MB -s EXIT_RUNTIME=1")
15 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=256MB -s EXIT_RUNTIME=1")
16 | set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=256MB -s EXIT_RUNTIME=1")
17 | 
18 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sEXPORTED_FUNCTIONS=['_nanodet_ncnn','_malloc','_free'] --preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@.")
19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -sEXPORTED_FUNCTIONS=['_nanodet_ncnn','_malloc','_free'] --preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@.")
20 | set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} -sEXPORTED_FUNCTIONS=['_nanodet_ncnn','_malloc','_free'] --preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@.")
21 | 
22 | if(${WASM_FEATURE} MATCHES "threads")
23 |     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp -pthread -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=4")
24 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -pthread -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=4")
25 |     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fopenmp -pthread -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=4")
26 | endif()
27 | 
28 | add_executable(nanodet-${WASM_FEATURE} nanodet.cpp nanodetncnn.cpp)
29 | target_link_libraries(nanodet-${WASM_FEATURE} ncnn)
30 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: build-and-deploy
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 | 
 7 | env:
 8 |   EMSCRIPTEN_VERSION: 3.1.28
 9 | 
10 | jobs:
11 |   build-and-deploy:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v2
15 |     - name: emsdk
16 |       run: |
17 |         git clone https://github.com/emscripten-core/emsdk.git
18 |         cd emsdk
19 |         ./emsdk install $EMSCRIPTEN_VERSION
20 |         ./emsdk activate $EMSCRIPTEN_VERSION
21 | 
22 |     - name: ncnn
23 |       run: |
24 |         wget -q https://github.com/Tencent/ncnn/releases/download/20230223/ncnn-20230223-webassembly.zip
25 |         unzip -q ncnn-20230223-webassembly.zip
26 | 
27 |     - name: build
28 |       run: |
29 |         source emsdk/emsdk_env.sh
30 |         mkdir build && cd build
31 |         cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=basic ..
32 |         make -j4
33 |         cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=simd ..
34 |         make -j4
35 |         cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=threads ..
36 |         make -j4
37 |         cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=simd-threads ..
38 |         make -j4
39 | 
40 |     - name: collect-deploy-files
41 |       run: |
42 |         mkdir deploy && cd deploy
43 |         cp ../build/*.data .
44 |         cp ../build/*.js .
45 |         cp ../build/*.wasm .
46 |         cp ../*.html .
47 |         cp ../*.js .
48 | 
49 |     - name: deploy
50 |       uses: JamesIves/github-pages-deploy-action@4.1.1
51 |       with:
52 |         branch: gh-pages
53 |         folder: deploy
54 |         single-commit: true
55 | 


--------------------------------------------------------------------------------
/nanodet.h:
--------------------------------------------------------------------------------
 1 | // Tencent is pleased to support the open source community by making ncnn available.
 2 | //
 3 | // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
 4 | //
 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 6 | // in compliance with the License. You may obtain a copy of the License at
 7 | //
 8 | // https://opensource.org/licenses/BSD-3-Clause
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software distributed
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 | // specific language governing permissions and limitations under the License.
14 | 
15 | #ifndef NANODET_H
16 | #define NANODET_H
17 | 
18 | #include <net.h>
19 | #include <simpleocv.h>
20 | 
21 | struct Object
22 | {
23 |     cv::Rect_<float> rect;
24 |     int label;
25 |     float prob;
26 | };
27 | 
28 | class NanoDet
29 | {
30 | public:
31 |     NanoDet();
32 | 
33 |     int load(const char* modeltype, bool use_gpu = false);
34 | 
35 |     int detect(const cv::Mat& rgba, std::vector<Object>& objects, float prob_threshold = 0.4f, float nms_threshold = 0.5f);
36 | 
37 |     int draw(cv::Mat& rgba, const std::vector<Object>& objects);
38 | 
39 | private:
40 |     ncnn::Net nanodet;
41 |     int target_size = 416;
42 |     std::vector<int> strides = {8, 16, 32, 64};
43 |     std::vector<std::string> class_names{
44 |         "person", "bicycle", "car", "motorcycle", "airplane", "bus",
45 |         "train", "truck", "boat", "traffic light", "fire hydrant",
46 |         "stop sign", "parking meter", "bench", "bird", "cat", "dog",
47 |         "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
48 |         "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
49 |         "skis", "snowboard", "sports ball", "kite", "baseball bat",
50 |         "baseball glove", "skateboard", "surfboard", "tennis racket",
51 |         "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
52 |         "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
53 |         "hot dog", "pizza", "donut", "cake", "chair", "couch",
54 |         "potted plant", "bed", "dining table", "toilet", "tv", "laptop",
55 |         "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
56 |         "toaster", "sink", "refrigerator", "book", "clock", "vase",
57 |         "scissors", "teddy bear", "hair drier", "toothbrush"};
58 |     float mean_vals[3] = {103.53f, 116.28f, 123.675f};
59 |     float norm_vals[3] = {1.f / 57.375f, 1.f / 57.12f, 1.f / 58.395f};
60 |     ncnn::UnlockedPoolAllocator blob_pool_allocator;
61 |     ncnn::PoolAllocator workspace_pool_allocator;
62 | };
63 | 
64 | #endif // NANODET_H
65 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ncnn-webassembly-nanodet
 2 | 
 3 | # 更新 nanodet-plus 版本
 4 | 
 5 | 站在巨人的肩膀上，感谢 nihui 大佬
 6 | 
 7 | 参考 https://github.com/Tencent/ncnn/blob/master/examples/nanodetplus_pnnx.cpp 修改了 nanodet-plus 版本的 Web 版
 8 | 
 9 | open https://nihui.github.io/ncnn-webassembly-nanodet and enjoy
10 | 
11 | # build and deploy
12 | 
13 | 1. Install emscripten
14 | 
15 | ```shell
16 | git clone https://github.com/emscripten-core/emsdk.git
17 | cd emsdk
18 | ./emsdk install 3.1.28
19 | ./emsdk activate 3.1.28
20 | 
21 | source emsdk/emsdk_env.sh
22 | ```
23 | 
24 | 2. clone this repo
25 |    
26 | ```shell
27 | git clone https://github.com/nihui/ncnn-webassembly-nanodet.git
28 | ```
29 | 
30 | 3. Enter the repo direcroty and Download and extract ncnn webassembly package
31 | 
32 | ```shell
33 | wget https://github.com/Tencent/ncnn/releases/download/20230223/ncnn-20230223-webassembly.zip
34 | unzip ncnn-20230223-webassembly.zip
35 | ```
36 | 
37 | 4. Build four WASM feature variants
38 | 
39 | ```shell
40 | mkdir build # in ncnn-webassembly-nanodet directory
41 | cd build
42 | cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=basic ..
43 | make -j4
44 | cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=simd ..
45 | make -j4
46 | cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=threads ..
47 | make -j4
48 | cmake -DCMAKE_TOOLCHAIN_FILE=$EMSDK/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -DWASM_FEATURE=simd-threads ..
49 | make -j4
50 | ```
51 | 
52 | 5. Deploy the _.data _.js _.wasm and _.html files to your web server(you can find them in /build directory and this repo)
53 | 
54 | ```
55 | # deploy files
56 | deploy/
57 | ├── index.html
58 | ├── nanodet-basic.data
59 | ├── nanodet-basic.js
60 | ├── nanodet-basic.wasm
61 | ├── nanodet-simd.data
62 | ├── nanodet-simd.js
63 | ├── nanodet-simd-threads.data
64 | ├── nanodet-simd-threads.js
65 | ├── nanodet-simd-threads.wasm
66 | ├── nanodet-simd-threads.worker.js
67 | ├── nanodet-simd.wasm
68 | ├── nanodet-threads.data
69 | ├── nanodet-threads.js
70 | ├── nanodet-threads.wasm
71 | ├── nanodet-threads.worker.js
72 | └── wasmFeatureDetect.js
73 | ```
74 | 
75 | 6. Deploy local server(python3 as a example)
76 | 
77 | ```
78 | python3 -m http.server --directory deploy
79 | ```
80 | 
81 | 7. Access local server(chrome as a example)
82 | 
83 | ```
84 | # launch chrome browser, enter following command to address bar and press ENTER:
85 | chrome://flags/#unsafely-treat-insecure-origin-as-secure
86 | 
87 | # enter following keyword to "Search flags" and press ENTER:
88 | "insecure origins"
89 | you will find "Insecure origins treated as secure" key
90 | 
91 | #enter local server url and click right side dropdown list, select "Enabled"
92 | url example: http://192.168.1.100:8000
93 | 
94 | #relaunch chrome browser and access http://192.168.1.100:8000 (replace 192.168.1.100 with your local ip)
95 | ```
96 | 


--------------------------------------------------------------------------------
/wasmFeatureDetect.js:
--------------------------------------------------------------------------------
  1 | !(function (e, n) {
  2 |   "object" == typeof exports && "undefined" != typeof module
  3 |     ? (module.exports = n())
  4 |     : "function" == typeof define && define.amd
  5 |     ? define(n)
  6 |     : ((e = e || self).wasmFeatureDetect = n());
  7 | })(this, function () {
  8 |   "use strict";
  9 |   return {
 10 |     bigInt: () =>
 11 |       (async (e) => {
 12 |         try {
 13 |           return (
 14 |             (await WebAssembly.instantiate(e)).instance.exports.b(BigInt(0)) ===
 15 |             BigInt(0)
 16 |           );
 17 |         } catch (e) {
 18 |           return !1;
 19 |         }
 20 |       })(
 21 |         new Uint8Array([
 22 |           0, 97, 115, 109, 1, 0, 0, 0, 1, 6, 1, 96, 1, 126, 1, 126, 3, 2, 1, 0,
 23 |           7, 5, 1, 1, 98, 0, 0, 10, 6, 1, 4, 0, 32, 0, 11,
 24 |         ])
 25 |       ),
 26 |     bulkMemory: async () =>
 27 |       WebAssembly.validate(
 28 |         new Uint8Array([
 29 |           0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 5, 3, 1,
 30 |           0, 1, 10, 14, 1, 12, 0, 65, 0, 65, 0, 65, 0, 252, 10, 0, 0, 11,
 31 |         ])
 32 |       ),
 33 |     exceptions: async () =>
 34 |       WebAssembly.validate(
 35 |         new Uint8Array([
 36 |           0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 10, 8, 1,
 37 |           6, 0, 6, 64, 25, 11, 11,
 38 |         ])
 39 |       ),
 40 |     multiValue: async () =>
 41 |       WebAssembly.validate(
 42 |         new Uint8Array([
 43 |           0, 97, 115, 109, 1, 0, 0, 0, 1, 6, 1, 96, 0, 2, 127, 127, 3, 2, 1, 0,
 44 |           10, 8, 1, 6, 0, 65, 0, 65, 0, 11,
 45 |         ])
 46 |       ),
 47 |     mutableGlobals: async () =>
 48 |       WebAssembly.validate(
 49 |         new Uint8Array([
 50 |           0, 97, 115, 109, 1, 0, 0, 0, 2, 8, 1, 1, 97, 1, 98, 3, 127, 1, 6, 6,
 51 |           1, 127, 1, 65, 0, 11, 7, 5, 1, 1, 97, 3, 1,
 52 |         ])
 53 |       ),
 54 |     referenceTypes: async () =>
 55 |       WebAssembly.validate(
 56 |         new Uint8Array([
 57 |           0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 10, 7, 1,
 58 |           5, 0, 208, 112, 26, 11,
 59 |         ])
 60 |       ),
 61 |     saturatedFloatToInt: async () =>
 62 |       WebAssembly.validate(
 63 |         new Uint8Array([
 64 |           0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 10, 12, 1,
 65 |           10, 0, 67, 0, 0, 0, 0, 252, 0, 26, 11,
 66 |         ])
 67 |       ),
 68 |     signExtensions: async () =>
 69 |       WebAssembly.validate(
 70 |         new Uint8Array([
 71 |           0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 10, 8, 1,
 72 |           6, 0, 65, 0, 192, 26, 11,
 73 |         ])
 74 |       ),
 75 |     simd: async () =>
 76 |       WebAssembly.validate(
 77 |         new Uint8Array([
 78 |           0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 10, 9, 1,
 79 |           7, 0, 65, 0, 253, 15, 26, 11,
 80 |         ])
 81 |       ),
 82 |     tailCall: async () =>
 83 |       WebAssembly.validate(
 84 |         new Uint8Array([
 85 |           0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 10, 6, 1,
 86 |           4, 0, 18, 0, 11,
 87 |         ])
 88 |       ),
 89 |     threads: () =>
 90 |       (async (e) => {
 91 |         try {
 92 |           return (
 93 |             "undefined" != typeof MessageChannel &&
 94 |               new MessageChannel().port1.postMessage(new SharedArrayBuffer(1)),
 95 |             WebAssembly.validate(e)
 96 |           );
 97 |         } catch (e) {
 98 |           return !1;
 99 |         }
100 |       })(
101 |         new Uint8Array([
102 |           0, 97, 115, 109, 1, 0, 0, 0, 1, 4, 1, 96, 0, 0, 3, 2, 1, 0, 5, 4, 1,
103 |           3, 1, 1, 10, 11, 1, 9, 0, 65, 0, 254, 16, 2, 0, 26, 11,
104 |         ])
105 |       ),
106 |   };
107 | });
108 | 


--------------------------------------------------------------------------------
/nanodetncnn.cpp:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making ncnn available.
  2 | //
  3 | // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
  4 | //
  5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // https://opensource.org/licenses/BSD-3-Clause
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #include <benchmark.h>
 16 | #include <simpleocv.h>
 17 | #include "nanodet.h"
 18 | 
 19 | static int draw_fps(cv::Mat& rgba)
 20 | {
 21 |     // resolve moving average
 22 |     float avg_fps = 0.f;
 23 |     {
 24 |         static double t0 = 0.f;
 25 |         static float fps_history[10] = {0.f};
 26 | 
 27 |         double t1 = ncnn::get_current_time();
 28 |         if (t0 == 0.f)
 29 |         {
 30 |             t0 = t1;
 31 |             return 0;
 32 |         }
 33 | 
 34 |         float fps = 1000.f / (t1 - t0);
 35 |         t0 = t1;
 36 | 
 37 |         for (int i = 9; i >= 1; i--)
 38 |         {
 39 |             fps_history[i] = fps_history[i - 1];
 40 |         }
 41 |         fps_history[0] = fps;
 42 | 
 43 |         if (fps_history[9] == 0.f)
 44 |         {
 45 |             return 0;
 46 |         }
 47 | 
 48 |         for (int i = 0; i < 10; i++)
 49 |         {
 50 |             avg_fps += fps_history[i];
 51 |         }
 52 |         avg_fps /= 10.f;
 53 |     }
 54 | 
 55 |     char text[32];
 56 |     sprintf(text, "FPS=%.2f", avg_fps);
 57 | 
 58 |     int baseLine = 0;
 59 |     cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
 60 | 
 61 |     int y = 0;
 62 |     int x = rgba.cols - label_size.width;
 63 | 
 64 |     cv::rectangle(rgba, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
 65 |                   cv::Scalar(255, 255, 255, 255), -1);
 66 | 
 67 |     cv::putText(rgba, text, cv::Point(x, y + label_size.height),
 68 |                 cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0, 255));
 69 | 
 70 |     return 0;
 71 | }
 72 | 
 73 | static NanoDet* g_nanodet = 0;
 74 | 
 75 | static void on_image_render(cv::Mat& rgba)
 76 | {
 77 |     if (!g_nanodet)
 78 |     {
 79 |         g_nanodet = new NanoDet;
 80 |         g_nanodet->load("coco.torchscript.ncnn");
 81 |     }
 82 | 
 83 |     std::vector<Object> objects;
 84 |     g_nanodet->detect(rgba, objects);
 85 | 
 86 |     g_nanodet->draw(rgba, objects);
 87 | 
 88 |     draw_fps(rgba);
 89 | }
 90 | 
 91 | #ifdef __EMSCRIPTEN_PTHREADS__
 92 | 
 93 | static const unsigned char* rgba_data = 0;
 94 | static int w = 0;
 95 | static int h = 0;
 96 | 
 97 | static ncnn::Mutex lock;
 98 | static ncnn::ConditionVariable condition;
 99 | 
100 | static ncnn::Mutex finish_lock;
101 | static ncnn::ConditionVariable finish_condition;
102 | 
103 | static void worker()
104 | {
105 |     while (1)
106 |     {
107 |         lock.lock();
108 |         while (rgba_data == 0)
109 |         {
110 |             condition.wait(lock);
111 |         }
112 | 
113 |         cv::Mat rgba(h, w, CV_8UC4, (void*)rgba_data);
114 | 
115 |         on_image_render(rgba);
116 | 
117 |         rgba_data = 0;
118 | 
119 |         lock.unlock();
120 | 
121 |         finish_lock.lock();
122 |         finish_condition.signal();
123 |         finish_lock.unlock();
124 |     }
125 | }
126 | 
127 | #include <thread>
128 | static std::thread t(worker);
129 | 
130 | extern "C" {
131 | 
132 | void nanodet_ncnn(unsigned char* _rgba_data, int _w, int _h)
133 |     {
134 |         lock.lock();
135 |         while (rgba_data != 0)
136 |         {
137 |             condition.wait(lock);
138 |         }
139 | 
140 |         rgba_data = _rgba_data;
141 |         w = _w;
142 |         h = _h;
143 | 
144 |         lock.unlock();
145 | 
146 |         condition.signal();
147 | 
148 |         // wait for finished
149 |         finish_lock.lock();
150 |         while (rgba_data != 0)
151 |         {
152 |             finish_condition.wait(finish_lock);
153 |         }
154 |         finish_lock.unlock();
155 |     }
156 | 
157 | }
158 | 
159 | #else // __EMSCRIPTEN_PTHREADS__
160 | 
161 | extern "C" {
162 | 
163 | void nanodet_ncnn(unsigned char* rgba_data, int w, int h)
164 |     {
165 |     cv::Mat rgba(h, w, CV_8UC4, (void*)rgba_data);
166 | 
167 |         on_image_render(rgba);
168 |     }
169 | 
170 | }
171 | 
172 | #endif // __EMSCRIPTEN_PTHREADS__
173 | 


--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
  1 | <html lang="en">
  2 | 
  3 | <head>
  4 |     <meta charset="utf-8">
  5 |     <meta name="viewport" content="width=device-width" />
  6 |     <title>ncnn webassembly nanodet</title>
  7 |     <style>
  8 |         video {
  9 | /*             position: absolute; */
 10 | /*             visibility: hidden; */
 11 |         }
 12 |         canvas {
 13 |             border: 1px solid black;
 14 |         }
 15 |     </style>
 16 | 
 17 | </head>
 18 | 
 19 | <body>
 20 |     <div>
 21 |         <h1>ncnn webassembly nanodet</h1>
 22 |         <div>
 23 |             <button disabled id="switch-camera-btn" style="height:48px">Switch Camera</button>
 24 |         </div>
 25 |         <div>
 26 |             <canvas id="canvas" width="640"></canvas>
 27 |         </div>
 28 |         <video id="video" playsinline autoplay></video>
 29 |     </div>
 30 | 
 31 |     <script src="wasmFeatureDetect.js"></script>
 32 | 
 33 |     <script type='text/javascript'>
 34 |         var Module = {};
 35 | 
 36 |         var has_simd;
 37 |         var has_threads;
 38 | 
 39 |         var wasmModuleLoaded = false;
 40 |         var wasmModuleLoadedCallbacks = [];
 41 | 
 42 |         Module.onRuntimeInitialized = function() {
 43 |             wasmModuleLoaded = true;
 44 |             for (var i = 0; i < wasmModuleLoadedCallbacks.length; i++) {
 45 |                 wasmModuleLoadedCallbacks[i]();
 46 |             }
 47 |         }
 48 | 
 49 |         wasmFeatureDetect.simd().then(simdSupported => {
 50 |             has_simd = simdSupported;
 51 | 
 52 |             wasmFeatureDetect.threads().then(threadsSupported => {
 53 |                 has_threads = threadsSupported;
 54 | 
 55 |                 if (has_simd)
 56 |                 {
 57 |                     if (has_threads)
 58 |                     {
 59 |                         nanodet_module_name = 'nanodet-simd-threads';
 60 |                     }
 61 |                     else
 62 |                     {
 63 |                         nanodet_module_name = 'nanodet-simd';
 64 |                     }
 65 |                 }
 66 |                 else
 67 |                 {
 68 |                     if (has_threads)
 69 |                     {
 70 |                         nanodet_module_name = 'nanodet-threads';
 71 |                     }
 72 |                     else
 73 |                     {
 74 |                         nanodet_module_name = 'nanodet-basic';
 75 |                     }
 76 |                 }
 77 | 
 78 |                 console.log('load ' + nanodet_module_name);
 79 | 
 80 |                 var nanodetwasm = nanodet_module_name + '.wasm';
 81 |                 var nanodetjs = nanodet_module_name + '.js';
 82 | 
 83 |                 fetch(nanodetwasm)
 84 |                     .then(response => response.arrayBuffer())
 85 |                     .then(buffer => {
 86 |                         Module.wasmBinary = buffer;
 87 |                         var script = document.createElement('script');
 88 |                         script.src = nanodetjs;
 89 |                         script.onload = function() {
 90 |                             console.log('Emscripten boilerplate loaded.');
 91 |                         }
 92 |                         document.body.appendChild(script);
 93 |                     });
 94 | 
 95 |             });
 96 |         });
 97 | 
 98 |         var shouldFaceUser = true;
 99 |         var stream = null;
100 |         var w = 640;
101 |         var h = 480;
102 | 
103 |         var dst = null;
104 |         var resultarray = null;
105 |         var resultbuffer = null;
106 |         window.addEventListener('DOMContentLoaded', function() {
107 |             var isStreaming = false;
108 |             switchcamerabtn = document.getElementById('switch-camera-btn');
109 |             video = document.getElementById('video');
110 |             canvas = document.getElementById('canvas');
111 |             ctx = canvas.getContext('2d');
112 | 
113 |             // Wait until the video stream canvas play
114 |             video.addEventListener('canplay', function(e) {
115 |                 if (!isStreaming) {
116 |                     // videoWidth isn't always set correctly in all browsers
117 |                     if (video.videoWidth > 0) h = video.videoHeight / (video.videoWidth / w);
118 |                     canvas.setAttribute('width', w);
119 |                     canvas.setAttribute('height', h);
120 |                     isStreaming = true;
121 |                 }
122 |             }, false);
123 | 
124 |             // Wait for the video to start to play
125 |             video.addEventListener('play', function() {
126 |                 //Setup image memory
127 |                 var id = ctx.getImageData(0, 0, canvas.width, canvas.height);
128 |                 var d = id.data;
129 | 
130 |                 if (wasmModuleLoaded) {
131 |                     mallocAndCallSFilter();
132 |                 } else {
133 |                     wasmModuleLoadedCallbacks.push(mallocAndCallSFilter);
134 |                 }
135 | 
136 |                 function mallocAndCallSFilter() {
137 |                     if (dst != null)
138 |                     {
139 |                         _free(dst);
140 |                         dst = null;
141 |                     }
142 | 
143 |                     dst = _malloc(d.length);
144 | 
145 |                     //console.log("What " + d.length);
146 | 
147 |                     sFilter();
148 |                 }
149 |             });
150 | 
151 |             // check whether we can use facingMode
152 |             var supports = navigator.mediaDevices.getSupportedConstraints();
153 |             if (supports['facingMode'] === true) {
154 |                 switchcamerabtn.disabled = false;
155 |             }
156 | 
157 |             switchcamerabtn.addEventListener('click', function() {
158 |                 if (stream == null)
159 |                     return
160 | 
161 |                 stream.getTracks().forEach(t => {
162 |                     t.stop();
163 |                 });
164 | 
165 |                 shouldFaceUser = !shouldFaceUser;
166 |                 capture();
167 |             });
168 | 
169 |             capture();
170 |         });
171 | 
172 |         function capture() {
173 |             var constraints = { audio: false, video: { width: 640, height: 480, facingMode: shouldFaceUser ? 'user' : 'environment' } };
174 |             navigator.mediaDevices.getUserMedia(constraints)
175 |                 .then(function(mediaStream) {
176 |                     var video = document.querySelector('video');
177 |                     stream = mediaStream;
178 |                     video.srcObject = mediaStream;
179 |                     video.onloadedmetadata = function(e) {
180 |                         video.play();
181 |                     };
182 |                 })
183 |                 .catch(function(err) {
184 |                     console.log(err.message);
185 |                 });
186 |         }
187 | 
188 | 
189 |         function ncnn_nanodet() {
190 |             var canvas = document.getElementById('canvas');
191 |             var ctx = canvas.getContext('2d');
192 | 
193 |             var imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
194 |             var data = imageData.data;
195 | 
196 |             HEAPU8.set(data, dst);
197 | 
198 |             _nanodet_ncnn(dst, canvas.width, canvas.height);
199 | 
200 |             var result = HEAPU8.subarray(dst, dst + data.length);
201 |             imageData.data.set(result);
202 |             ctx.putImageData(imageData, 0, 0);
203 |         }
204 | 
205 |         //Request Animation Frame function
206 |         var sFilter = function() {
207 |             if (video.paused || video.ended) return;
208 | 
209 |             ctx.fillRect(0, 0, w, h);
210 |             ctx.drawImage(video, 0, 0, w, h);
211 | 
212 |             ncnn_nanodet();
213 | 
214 |             window.requestAnimationFrame(sFilter);
215 |         }
216 | 
217 |     </script>
218 | 
219 | </body>
220 | 
221 | </html>
222 | 


--------------------------------------------------------------------------------
/nanodet.cpp:
--------------------------------------------------------------------------------
  1 | // Tencent is pleased to support the open source community by making ncnn available.
  2 | //
  3 | // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
  4 | //
  5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6 | // in compliance with the License. You may obtain a copy of the License at
  7 | //
  8 | // https://opensource.org/licenses/BSD-3-Clause
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software distributed
 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 13 | // specific language governing permissions and limitations under the License.
 14 | 
 15 | #include "nanodet.h"
 16 | 
 17 | #include <float.h>
 18 | #include <cpu.h>
 19 | #include <simpleocv.h>
 20 | 
 21 | static inline float intersection_area(const Object& a, const Object& b)
 22 | {
 23 |     cv::Rect_<float> inter = a.rect & b.rect;
 24 |     return inter.area();
 25 | }
 26 | 
 27 | static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
 28 | {
 29 |     int i = left;
 30 |     int j = right;
 31 |     float p = faceobjects[(left + right) / 2].prob;
 32 | 
 33 |     while (i <= j)
 34 |     {
 35 |         while (faceobjects[i].prob > p)
 36 |             i++;
 37 | 
 38 |         while (faceobjects[j].prob < p)
 39 |             j--;
 40 | 
 41 |         if (i <= j)
 42 |         {
 43 |             // swap
 44 |             std::swap(faceobjects[i], faceobjects[j]);
 45 | 
 46 |             i++;
 47 |             j--;
 48 |         }
 49 |     }
 50 | 
 51 |     //     #pragma omp parallel sections
 52 |     {
 53 |         //         #pragma omp section
 54 |         {
 55 |             if (left < j) qsort_descent_inplace(faceobjects, left, j);
 56 |         }
 57 |         //         #pragma omp section
 58 |         {
 59 |             if (i < right) qsort_descent_inplace(faceobjects, i, right);
 60 |         }
 61 |     }
 62 | }
 63 | 
 64 | static void qsort_descent_inplace(std::vector<Object>& faceobjects)
 65 | {
 66 |     if (faceobjects.empty())
 67 |         return;
 68 | 
 69 |     qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
 70 | }
 71 | 
 72 | static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
 73 | {
 74 |     picked.clear();
 75 | 
 76 |     const int n = faceobjects.size();
 77 | 
 78 |     std::vector<float> areas(n);
 79 |     for (int i = 0; i < n; i++)
 80 |     {
 81 |         areas[i] = faceobjects[i].rect.width * faceobjects[i].rect.height;
 82 |     }
 83 | 
 84 |     for (int i = 0; i < n; i++)
 85 |     {
 86 |         const Object& a = faceobjects[i];
 87 | 
 88 |         int keep = 1;
 89 |         for (int j = 0; j < (int)picked.size(); j++)
 90 |         {
 91 |             const Object& b = faceobjects[picked[j]];
 92 | 
 93 |             // intersection over union
 94 |             float inter_area = intersection_area(a, b);
 95 |             float union_area = areas[i] + areas[picked[j]] - inter_area;
 96 |             if (inter_area / union_area > nms_threshold)
 97 |                 keep = 0;
 98 |         }
 99 | 
100 |         if (keep)
101 |             picked.push_back(i);
102 |     }
103 | }
104 | 
105 | static inline float sigmoid(float x)
106 | {
107 |     return 1.0f / (1.0f + exp(-x));
108 | }
109 | 
110 | static void generate_proposals(const ncnn::Mat& pred, int stride, int _num_class, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
111 | {
112 |     const int num_grid = pred.h;
113 | 
114 |     int num_grid_x = pred.w;
115 |     int num_grid_y = pred.h;
116 | 
117 |     const int num_class = _num_class; // number of classes. 80 for COCO
118 |     const int reg_max_1 = (pred.c - num_class) / 4;
119 | 
120 |     for (int i = 0; i < num_grid_y; i++)
121 |     {
122 |         for (int j = 0; j < num_grid_x; j++)
123 |         {
124 |             // find label with max score
125 |             int label = -1;
126 |             float score = -FLT_MAX;
127 |             for (int k = 0; k < num_class; k++)
128 |             {
129 |                 float s = pred.channel(k).row(i)[j];
130 |                 if (s > score)
131 |                 {
132 |                     label = k;
133 |                     score = s;
134 |                 }
135 |             }
136 | 
137 |             score = sigmoid(score);
138 |             if (score >= prob_threshold)
139 |             {
140 |                 ncnn::Mat bbox_pred(reg_max_1, 4);
141 |                 for (int k = 0; k < reg_max_1 * 4; k++)
142 |                 {
143 |                     bbox_pred[k] = pred.channel(num_class + k).row(i)[j];
144 |                 }
145 |                 {
146 |                     ncnn::Layer* softmax = ncnn::create_layer("Softmax");
147 | 
148 |                     ncnn::ParamDict pd;
149 |                     pd.set(0, 1); // axis
150 |                     pd.set(1, 1);
151 |                     softmax->load_param(pd);
152 | 
153 |                     ncnn::Option opt;
154 |                     opt.num_threads = 1;
155 |                     opt.use_packing_layout = false;
156 | 
157 |                     softmax->create_pipeline(opt);
158 | 
159 |                     softmax->forward_inplace(bbox_pred, opt);
160 | 
161 |                     softmax->destroy_pipeline(opt);
162 | 
163 |                     delete softmax;
164 |                 }
165 | 
166 |                 float pred_ltrb[4];
167 |                 for (int k = 0; k < 4; k++)
168 |                 {
169 |                     float dis = 0.f;
170 |                     const float* dis_after_sm = bbox_pred.row(k);
171 |                     for (int l = 0; l < reg_max_1; l++)
172 |                     {
173 |                         dis += l * dis_after_sm[l];
174 |                     }
175 | 
176 |                     pred_ltrb[k] = dis * stride;
177 |                 }
178 | 
179 |                 float pb_cx = j * stride;
180 |                 float pb_cy = i * stride;
181 | 
182 |                 float x0 = pb_cx - pred_ltrb[0];
183 |                 float y0 = pb_cy - pred_ltrb[1];
184 |                 float x1 = pb_cx + pred_ltrb[2];
185 |                 float y1 = pb_cy + pred_ltrb[3];
186 | 
187 |                 Object obj;
188 |                 obj.rect.x = x0;
189 |                 obj.rect.y = y0;
190 |                 obj.rect.width = x1 - x0;
191 |                 obj.rect.height = y1 - y0;
192 |                 obj.label = label;
193 |                 obj.prob = score;
194 | 
195 |                 objects.push_back(obj);
196 |             }
197 |         }
198 |     }
199 | }
200 | 
201 | NanoDet::NanoDet()
202 | {
203 |     blob_pool_allocator.set_size_compare_ratio(0.f);
204 |     workspace_pool_allocator.set_size_compare_ratio(0.f);
205 | }
206 | 
207 | int NanoDet::load(const char* modeltype, bool use_gpu)
208 | {
209 |     nanodet.clear();
210 |     blob_pool_allocator.clear();
211 |     workspace_pool_allocator.clear();
212 | 
213 |     ncnn::set_cpu_powersave(2);
214 |     ncnn::set_omp_num_threads(ncnn::get_big_cpu_count());
215 | 
216 |     nanodet.opt = ncnn::Option();
217 | 
218 | #if NCNN_VULKAN
219 |     nanodet.opt.use_vulkan_compute = use_gpu;
220 | #endif
221 | 
222 |     nanodet.opt.num_threads = ncnn::get_big_cpu_count();
223 |     nanodet.opt.blob_allocator = &blob_pool_allocator;
224 |     nanodet.opt.workspace_allocator = &workspace_pool_allocator;
225 | 
226 |     char parampath[256];
227 |     char modelpath[256];
228 |     sprintf(parampath, "%s.param", modeltype);
229 |     sprintf(modelpath, "%s.bin", modeltype);
230 | 
231 |     nanodet.load_param(parampath);
232 |     nanodet.load_model(modelpath);
233 | 
234 |     return 0;
235 | }
236 | 
237 | int NanoDet::detect(const cv::Mat& rgba, std::vector<Object>& objects, float prob_threshold, float nms_threshold)
238 | {
239 |     int width = rgba.cols;
240 |     int height = rgba.rows;
241 | 
242 |     // pad to multiple of 32
243 |     int num_classes = class_names.size();
244 |     int w = width;
245 |     int h = height;
246 |     float scale = 1.f;
247 |     if (w > h)
248 |     {
249 |         scale = (float)target_size / w;
250 |         w = target_size;
251 |         h = h * scale;
252 |     }
253 |     else
254 |     {
255 |         scale = (float)target_size / h;
256 |         h = target_size;
257 |         w = w * scale;
258 |     }
259 | 
260 |     ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgba.data, ncnn::Mat::PIXEL_RGBA2BGR, width, height, w, h);
261 | 
262 |     // pad to target_size rectangle
263 |     int wpad = (w + 31) / 32 * 32 - w;
264 |     int hpad = (h + 31) / 32 * 32 - h;
265 |     ncnn::Mat in_pad;
266 |     ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
267 | 
268 |     in_pad.substract_mean_normalize(mean_vals, norm_vals);
269 | 
270 |     ncnn::Extractor ex = nanodet.create_extractor();
271 | 
272 |     ex.input("in0", in_pad);
273 | 
274 |     std::vector<Object> proposals;
275 | 
276 |     // stride 8
277 |     {
278 |         ncnn::Mat pred;
279 |         ex.extract("231", pred);
280 | 
281 |         std::vector<Object> objects8;
282 |         generate_proposals(pred, 8, num_classes, in_pad, prob_threshold, objects8);
283 | 
284 |         proposals.insert(proposals.end(), objects8.begin(), objects8.end());
285 |     }
286 | 
287 |     // stride 16
288 |     {
289 |         ncnn::Mat pred;
290 |         ex.extract("228", pred);
291 | 
292 |         std::vector<Object> objects16;
293 |         generate_proposals(pred, 16, num_classes, in_pad, prob_threshold, objects16);
294 | 
295 |         proposals.insert(proposals.end(), objects16.begin(), objects16.end());
296 |     }
297 | 
298 |     // stride 32
299 |     {
300 |         ncnn::Mat pred;
301 |         ex.extract("225", pred);
302 | 
303 |         std::vector<Object> objects32;
304 |         generate_proposals(pred, 32, num_classes, in_pad, prob_threshold, objects32);
305 | 
306 |         proposals.insert(proposals.end(), objects32.begin(), objects32.end());
307 |     }
308 | 
309 |     // stride 64
310 |     {
311 |         ncnn::Mat pred;
312 |         ex.extract("222", pred);
313 | 
314 |         std::vector<Object> objects64;
315 |         generate_proposals(pred, 64, num_classes, in_pad, prob_threshold, objects64);
316 | 
317 |         proposals.insert(proposals.end(), objects64.begin(), objects64.end());
318 |     }
319 | 
320 |     // sort all proposals by score from highest to lowest
321 |     qsort_descent_inplace(proposals);
322 | 
323 |     // apply nms with nms_threshold
324 |     std::vector<int> picked;
325 |     nms_sorted_bboxes(proposals, picked, nms_threshold);
326 | 
327 |     int count = picked.size();
328 | 
329 |     objects.resize(count);
330 |     for (int i = 0; i < count; i++)
331 |     {
332 |         objects[i] = proposals[picked[i]];
333 | 
334 |         // adjust offset to original unpadded
335 |         float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
336 |         float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
337 |         float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
338 |         float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
339 | 
340 |         // clip
341 |         x0 = std::max(std::min(x0, (float)(width - 1)), 0.f);
342 |         y0 = std::max(std::min(y0, (float)(height - 1)), 0.f);
343 |         x1 = std::max(std::min(x1, (float)(width - 1)), 0.f);
344 |         y1 = std::max(std::min(y1, (float)(height - 1)), 0.f);
345 | 
346 |         objects[i].rect.x = x0;
347 |         objects[i].rect.y = y0;
348 |         objects[i].rect.width = x1 - x0;
349 |         objects[i].rect.height = y1 - y0;
350 |     }
351 | 
352 |     // sort objects by area
353 |     struct
354 |     {
355 |         bool operator()(const Object& a, const Object& b) const
356 |         {
357 |             return a.rect.area() > b.rect.area();
358 |         }
359 |     } objects_area_greater;
360 |     std::sort(objects.begin(), objects.end(), objects_area_greater);
361 | 
362 |     return 0;
363 | }
364 | 
365 | int NanoDet::draw(cv::Mat& rgba, const std::vector<Object>& objects)
366 | {
367 |     static const unsigned char colors[19][3] = {
368 |         { 54,  67, 244},
369 |         { 99,  30, 233},
370 |         {176,  39, 156},
371 |         {183,  58, 103},
372 |         {181,  81,  63},
373 |         {243, 150,  33},
374 |         {244, 169,   3},
375 |         {212, 188,   0},
376 |         {136, 150,   0},
377 |         { 80, 175,  76},
378 |         { 74, 195, 139},
379 |         { 57, 220, 205},
380 |         { 59, 235, 255},
381 |         {  7, 193, 255},
382 |         {  0, 152, 255},
383 |         { 34,  87, 255},
384 |         { 72,  85, 121},
385 |         {158, 158, 158},
386 |         {139, 125,  96}
387 |     };
388 | 
389 |     int color_index = 0;
390 | 
391 |     for (size_t i = 0; i < objects.size(); i++)
392 |     {
393 |         const Object& obj = objects[i];
394 | 
395 |         //         fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
396 |         //                 obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
397 | 
398 |         const unsigned char* color = colors[color_index % 19];
399 |         color_index++;
400 | 
401 |         cv::Scalar cc(color[0], color[1], color[2], 255);
402 | 
403 |         cv::rectangle(rgba, cv::Rect(obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height), cc, 2);
404 | 
405 |         char text[256];
406 |         sprintf(text, "%s %.1f%%", class_names[obj.label].c_str(), obj.prob * 100);
407 | 
408 |         int baseLine = 0;
409 |         cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
410 | 
411 |         int x = obj.rect.x;
412 |         int y = obj.rect.y - label_size.height - baseLine;
413 |         if (y < 0)
414 |             y = 0;
415 |         if (x + label_size.width > rgba.cols)
416 |             x = rgba.cols - label_size.width;
417 | 
418 |         cv::rectangle(rgba, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), cc, -1);
419 | 
420 |         cv::Scalar textcc = (color[0] + color[1] + color[2] >= 381) ? cv::Scalar(0, 0, 0, 255) : cv::Scalar(255, 255, 255, 255);
421 | 
422 |         cv::putText(rgba, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.5, textcc, 1);
423 |     }
424 | 
425 |     return 0;
426 | }
427 | 


--------------------------------------------------------------------------------
/assets/coco.torchscript.ncnn.param:
--------------------------------------------------------------------------------
  1 | 7767517
  2 | 201 237
  3 | Input                    in0                      0 1 in0
  4 | Convolution              convrelu_0               1 1 in0 1 0=24 1=3 3=2 4=1 5=1 6=648 9=2 -23310=1,1.000000e-01
  5 | Pooling                  maxpool2d_154            1 1 1 2 1=3 2=2 3=1 5=1
  6 | Split                    splitncnn_0              1 2 2 3 4
  7 | ConvolutionDepthWise     convdw_162               1 1 4 5 0=24 1=3 3=2 4=1 5=1 6=216 7=24
  8 | Convolution              convrelu_1               1 1 3 6 0=58 1=1 5=1 6=1392 9=2 -23310=1,1.000000e-01
  9 | ConvolutionDepthWise     convdw_163               1 1 6 7 0=58 1=3 3=2 4=1 5=1 6=522 7=58
 10 | Convolution              convrelu_3               1 1 5 8 0=58 1=1 5=1 6=1392 9=2 -23310=1,1.000000e-01
 11 | Convolution              convrelu_2               1 1 7 9 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01
 12 | Concat                   cat_0                    2 1 8 9 10
 13 | ShuffleChannel           channelshuffle_0         1 1 10 11 0=2
 14 | Slice                    chunk_0                  1 2 11 12 13 -23300=2,-233,-233
 15 | Convolution              convrelu_4               1 1 13 14 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01
 16 | ConvolutionDepthWise     convdw_164               1 1 14 15 0=58 1=3 4=1 5=1 6=522 7=58
 17 | Convolution              convrelu_5               1 1 15 16 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01
 18 | Concat                   cat_1                    2 1 12 16 17
 19 | ShuffleChannel           channelshuffle_1         1 1 17 18 0=2
 20 | Slice                    chunk_1                  1 2 18 19 20 -23300=2,-233,-233
 21 | Convolution              convrelu_6               1 1 20 21 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01
 22 | ConvolutionDepthWise     convdw_165               1 1 21 22 0=58 1=3 4=1 5=1 6=522 7=58
 23 | Convolution              convrelu_7               1 1 22 23 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01
 24 | Concat                   cat_2                    2 1 19 23 24
 25 | ShuffleChannel           channelshuffle_2         1 1 24 25 0=2
 26 | Slice                    chunk_2                  1 2 25 26 27 -23300=2,-233,-233
 27 | Convolution              convrelu_8               1 1 27 28 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01
 28 | ConvolutionDepthWise     convdw_166               1 1 28 29 0=58 1=3 4=1 5=1 6=522 7=58
 29 | Convolution              convrelu_9               1 1 29 30 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01
 30 | Concat                   cat_3                    2 1 26 30 31
 31 | ShuffleChannel           channelshuffle_3         1 1 31 32 0=2
 32 | Split                    splitncnn_1              1 3 32 33 34 35
 33 | ConvolutionDepthWise     convdw_167               1 1 35 36 0=116 1=3 3=2 4=1 5=1 6=1044 7=116
 34 | Convolution              convrelu_10              1 1 34 37 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 35 | ConvolutionDepthWise     convdw_168               1 1 37 38 0=116 1=3 3=2 4=1 5=1 6=1044 7=116
 36 | Convolution              convrelu_12              1 1 36 39 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 37 | Convolution              convrelu_11              1 1 38 40 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 38 | Concat                   cat_4                    2 1 39 40 41
 39 | ShuffleChannel           channelshuffle_4         1 1 41 42 0=2
 40 | Slice                    chunk_3                  1 2 42 43 44 -23300=2,-233,-233
 41 | Convolution              convrelu_13              1 1 44 45 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 42 | ConvolutionDepthWise     convdw_169               1 1 45 46 0=116 1=3 4=1 5=1 6=1044 7=116
 43 | Convolution              convrelu_14              1 1 46 47 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 44 | Concat                   cat_5                    2 1 43 47 48
 45 | ShuffleChannel           channelshuffle_5         1 1 48 49 0=2
 46 | Slice                    chunk_4                  1 2 49 50 51 -23300=2,-233,-233
 47 | Convolution              convrelu_15              1 1 51 52 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 48 | ConvolutionDepthWise     convdw_170               1 1 52 53 0=116 1=3 4=1 5=1 6=1044 7=116
 49 | Convolution              convrelu_16              1 1 53 54 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 50 | Concat                   cat_6                    2 1 50 54 55
 51 | ShuffleChannel           channelshuffle_6         1 1 55 56 0=2
 52 | Slice                    chunk_5                  1 2 56 57 58 -23300=2,-233,-233
 53 | Convolution              convrelu_17              1 1 58 59 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 54 | ConvolutionDepthWise     convdw_171               1 1 59 60 0=116 1=3 4=1 5=1 6=1044 7=116
 55 | Convolution              convrelu_18              1 1 60 61 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 56 | Concat                   cat_7                    2 1 57 61 62
 57 | ShuffleChannel           channelshuffle_7         1 1 62 63 0=2
 58 | Slice                    chunk_6                  1 2 63 64 65 -23300=2,-233,-233
 59 | Convolution              convrelu_19              1 1 65 66 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 60 | ConvolutionDepthWise     convdw_172               1 1 66 67 0=116 1=3 4=1 5=1 6=1044 7=116
 61 | Convolution              convrelu_20              1 1 67 68 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 62 | Concat                   cat_8                    2 1 64 68 69
 63 | ShuffleChannel           channelshuffle_8         1 1 69 70 0=2
 64 | Slice                    chunk_7                  1 2 70 71 72 -23300=2,-233,-233
 65 | Convolution              convrelu_21              1 1 72 73 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 66 | ConvolutionDepthWise     convdw_173               1 1 73 74 0=116 1=3 4=1 5=1 6=1044 7=116
 67 | Convolution              convrelu_22              1 1 74 75 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 68 | Concat                   cat_9                    2 1 71 75 76
 69 | ShuffleChannel           channelshuffle_9         1 1 76 77 0=2
 70 | Slice                    chunk_8                  1 2 77 78 79 -23300=2,-233,-233
 71 | Convolution              convrelu_23              1 1 79 80 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 72 | ConvolutionDepthWise     convdw_174               1 1 80 81 0=116 1=3 4=1 5=1 6=1044 7=116
 73 | Convolution              convrelu_24              1 1 81 82 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 74 | Concat                   cat_10                   2 1 78 82 83
 75 | ShuffleChannel           channelshuffle_10        1 1 83 84 0=2
 76 | Slice                    chunk_9                  1 2 84 85 86 -23300=2,-233,-233
 77 | Convolution              convrelu_25              1 1 86 87 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 78 | ConvolutionDepthWise     convdw_175               1 1 87 88 0=116 1=3 4=1 5=1 6=1044 7=116
 79 | Convolution              convrelu_26              1 1 88 89 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
 80 | Concat                   cat_11                   2 1 85 89 90
 81 | ShuffleChannel           channelshuffle_11        1 1 90 91 0=2
 82 | Split                    splitncnn_2              1 3 91 92 93 94
 83 | ConvolutionDepthWise     convdw_176               1 1 94 95 0=232 1=3 3=2 4=1 5=1 6=2088 7=232
 84 | Convolution              convrelu_27              1 1 93 96 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
 85 | ConvolutionDepthWise     convdw_177               1 1 96 97 0=232 1=3 3=2 4=1 5=1 6=2088 7=232
 86 | Convolution              convrelu_29              1 1 95 98 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
 87 | Convolution              convrelu_28              1 1 97 99 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
 88 | Concat                   cat_12                   2 1 98 99 100
 89 | ShuffleChannel           channelshuffle_12        1 1 100 101 0=2
 90 | Slice                    chunk_10                 1 2 101 102 103 -23300=2,-233,-233
 91 | Convolution              convrelu_30              1 1 103 104 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
 92 | ConvolutionDepthWise     convdw_178               1 1 104 105 0=232 1=3 4=1 5=1 6=2088 7=232
 93 | Convolution              convrelu_31              1 1 105 106 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
 94 | Concat                   cat_13                   2 1 102 106 107
 95 | ShuffleChannel           channelshuffle_13        1 1 107 108 0=2
 96 | Slice                    chunk_11                 1 2 108 109 110 -23300=2,-233,-233
 97 | Convolution              convrelu_32              1 1 110 111 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
 98 | ConvolutionDepthWise     convdw_179               1 1 111 112 0=232 1=3 4=1 5=1 6=2088 7=232
 99 | Convolution              convrelu_33              1 1 112 113 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
100 | Concat                   cat_14                   2 1 109 113 114
101 | ShuffleChannel           channelshuffle_14        1 1 114 115 0=2
102 | Slice                    chunk_12                 1 2 115 116 117 -23300=2,-233,-233
103 | Convolution              convrelu_34              1 1 117 118 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
104 | ConvolutionDepthWise     convdw_180               1 1 118 119 0=232 1=3 4=1 5=1 6=2088 7=232
105 | Convolution              convrelu_35              1 1 119 120 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
106 | Concat                   cat_15                   2 1 116 120 121
107 | ShuffleChannel           channelshuffle_15        1 1 121 122 0=2
108 | Convolution              convrelu_36              1 1 122 123 0=96 1=1 5=1 6=44544 9=2 -23310=1,1.000000e-01
109 | Split                    splitncnn_3              1 3 123 124 125 126
110 | Interp                   upsample_155             1 1 125 127 0=2 1=2.000000e+00 2=2.000000e+00
111 | Convolution              convrelu_37              1 1 92 128 0=96 1=1 5=1 6=22272 9=2 -23310=1,1.000000e-01
112 | Concat                   cat_16                   2 1 127 128 129
113 | Split                    splitncnn_4              1 2 129 130 131
114 | Convolution              convrelu_38              1 1 130 132 0=48 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
115 | Split                    splitncnn_5              1 2 132 133 134
116 | ConvolutionDepthWise     convdwrelu_0             1 1 134 135 0=48 1=3 4=1 5=1 6=432 7=48 9=2 -23310=1,1.000000e-01
117 | Concat                   cat_17                   2 1 133 135 136
118 | Convolution              conv_56                  1 1 136 137 0=48 1=1 5=1 6=4608
119 | Split                    splitncnn_6              1 2 137 138 139
120 | ConvolutionDepthWise     convdw_182               1 1 139 140 0=48 1=3 4=1 5=1 6=432 7=48
121 | Concat                   cat_18                   2 1 138 140 141
122 | ConvolutionDepthWise     convdw_183               1 1 131 142 0=192 1=5 4=2 5=1 6=4800 7=192
123 | Convolution              conv_57                  1 1 142 143 0=96 1=1 5=1 6=18432
124 | BinaryOp                 add_0                    2 1 141 143 144
125 | Split                    splitncnn_7              1 2 144 145 146
126 | Interp                   upsample_156             1 1 146 147 0=2 1=2.000000e+00 2=2.000000e+00
127 | Convolution              convrelu_39              1 1 33 148 0=96 1=1 5=1 6=11136 9=2 -23310=1,1.000000e-01
128 | Concat                   cat_19                   2 1 147 148 149
129 | Split                    splitncnn_8              1 2 149 150 151
130 | Convolution              convrelu_40              1 1 150 152 0=48 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
131 | Split                    splitncnn_9              1 2 152 153 154
132 | ConvolutionDepthWise     convdwrelu_1             1 1 154 155 0=48 1=3 4=1 5=1 6=432 7=48 9=2 -23310=1,1.000000e-01
133 | Concat                   cat_20                   2 1 153 155 156
134 | Convolution              conv_59                  1 1 156 157 0=48 1=1 5=1 6=4608
135 | Split                    splitncnn_10             1 2 157 158 159
136 | ConvolutionDepthWise     convdw_185               1 1 159 160 0=48 1=3 4=1 5=1 6=432 7=48
137 | Concat                   cat_21                   2 1 158 160 161
138 | ConvolutionDepthWise     convdw_186               1 1 151 162 0=192 1=5 4=2 5=1 6=4800 7=192
139 | Convolution              conv_60                  1 1 162 163 0=96 1=1 5=1 6=18432
140 | BinaryOp                 add_1                    2 1 161 163 164
141 | Split                    splitncnn_11             1 2 164 165 166
142 | ConvolutionDepthWise     convdwrelu_2             1 1 166 167 0=96 1=5 3=2 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01
143 | Convolution              convrelu_41              1 1 167 168 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
144 | Concat                   cat_22                   2 1 168 145 169
145 | Split                    splitncnn_12             1 2 169 170 171
146 | Convolution              convrelu_42              1 1 170 172 0=48 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
147 | Split                    splitncnn_13             1 2 172 173 174
148 | ConvolutionDepthWise     convdwrelu_3             1 1 174 175 0=48 1=3 4=1 5=1 6=432 7=48 9=2 -23310=1,1.000000e-01
149 | Concat                   cat_23                   2 1 173 175 176
150 | Convolution              conv_63                  1 1 176 177 0=48 1=1 5=1 6=4608
151 | Split                    splitncnn_14             1 2 177 178 179
152 | ConvolutionDepthWise     convdw_189               1 1 179 180 0=48 1=3 4=1 5=1 6=432 7=48
153 | Concat                   cat_24                   2 1 178 180 181
154 | ConvolutionDepthWise     convdw_190               1 1 171 182 0=192 1=5 4=2 5=1 6=4800 7=192
155 | Convolution              conv_64                  1 1 182 183 0=96 1=1 5=1 6=18432
156 | BinaryOp                 add_2                    2 1 181 183 184
157 | Split                    splitncnn_15             1 2 184 185 186
158 | ConvolutionDepthWise     convdwrelu_4             1 1 186 187 0=96 1=5 3=2 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01
159 | Convolution              convrelu_43              1 1 187 188 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
160 | Concat                   cat_25                   2 1 188 124 189
161 | Split                    splitncnn_16             1 2 189 190 191
162 | Convolution              convrelu_44              1 1 190 192 0=48 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
163 | Split                    splitncnn_17             1 2 192 193 194
164 | ConvolutionDepthWise     convdwrelu_5             1 1 194 195 0=48 1=3 4=1 5=1 6=432 7=48 9=2 -23310=1,1.000000e-01
165 | Concat                   cat_26                   2 1 193 195 196
166 | Convolution              conv_67                  1 1 196 197 0=48 1=1 5=1 6=4608
167 | Split                    splitncnn_18             1 2 197 198 199
168 | ConvolutionDepthWise     convdw_193               1 1 199 200 0=48 1=3 4=1 5=1 6=432 7=48
169 | Concat                   cat_27                   2 1 198 200 201
170 | ConvolutionDepthWise     convdw_194               1 1 191 202 0=192 1=5 4=2 5=1 6=4800 7=192
171 | Convolution              conv_68                  1 1 202 203 0=96 1=1 5=1 6=18432
172 | BinaryOp                 add_3                    2 1 201 203 204
173 | Split                    splitncnn_19             1 2 204 205 206
174 | ConvolutionDepthWise     convdwrelu_6             1 1 126 207 0=96 1=5 3=2 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01
175 | Convolution              convrelu_46              1 1 207 208 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
176 | ConvolutionDepthWise     convdwrelu_7             1 1 206 209 0=96 1=5 3=2 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01
177 | Convolution              convrelu_45              1 1 209 210 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
178 | BinaryOp                 add_4                    2 1 208 210 211
179 | ConvolutionDepthWise     convdwrelu_8             1 1 165 212 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01
180 | Convolution              convrelu_47              1 1 212 213 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
181 | ConvolutionDepthWise     convdwrelu_10            1 1 185 214 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01
182 | Convolution              convrelu_48              1 1 214 215 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
183 | ConvolutionDepthWise     convdwrelu_12            1 1 205 216 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01
184 | Convolution              convrelu_49              1 1 216 217 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
185 | ConvolutionDepthWise     convdwrelu_14            1 1 211 218 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01
186 | Convolution              convrelu_50              1 1 218 219 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
187 | ConvolutionDepthWise     convdwrelu_15            1 1 219 220 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01
188 | Convolution              convrelu_51              1 1 220 221 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
189 | Convolution              conv_82                  1 1 221 222 0=112 1=1 5=1 6=10752
190 | ConvolutionDepthWise     convdwrelu_13            1 1 217 223 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01
191 | Convolution              convrelu_52              1 1 223 224 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
192 | Convolution              conv_79                  1 1 224 225 0=112 1=1 5=1 6=10752
193 | ConvolutionDepthWise     convdwrelu_11            1 1 215 226 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01
194 | Convolution              convrelu_53              1 1 226 227 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
195 | Convolution              conv_76                  1 1 227 228 0=112 1=1 5=1 6=10752
196 | ConvolutionDepthWise     convdwrelu_9             1 1 213 229 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01
197 | Convolution              convrelu_54              1 1 229 230 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
198 | Convolution              conv_73                  1 1 230 231 0=112 1=1 5=1 6=10752
199 | Reshape                  flatten_160              1 1 231 232 0=-1 1=112
200 | Reshape                  flatten_159              1 1 228 233 0=-1 1=112
201 | Reshape                  flatten_158              1 1 225 234 0=-1 1=112
202 | Reshape                  flatten_157              1 1 222 235 0=-1 1=112
203 | Concat                   cat_28                   4 1 232 233 234 235 out0 0=1
204 | 


--------------------------------------------------------------------------------