├── .gitignore ├── CMakeLists.txt ├── build.sh ├── data └── a.png ├── examples ├── CMakeLists.txt └── demo_color.cc ├── include └── simpleocv.h ├── readme.md └── src ├── allocator.cc ├── allocator.h ├── cpu.cpp ├── cpu.h ├── mat.cpp ├── mat.h ├── mat_pixel.cpp ├── mat_pixel_affine.cpp ├── mat_pixel_android.cpp ├── mat_pixel_drawing.cpp ├── mat_pixel_drawing_font.h ├── mat_pixel_reisze_flycv.cpp ├── mat_pixel_resize.cpp ├── mat_pixel_rotate.cpp ├── platform.h ├── simpleocv.cpp ├── stb_image.h └── stb_image_write.h /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | .idea/ 3 | Thor.* 4 | cmake-build-debug/ 5 | .vscode/ 6 | build/ 7 | cmake-build-debug/ 8 | vendor/ 9 | vendor/ 10 | .DS_Store 11 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8) 2 | project(simpleocv) 3 | 4 | set(CMAKE_CXX_STANDARD 11) 5 | 6 | option(BUILD_STATIC "enable to build static simpleocv lib" ON) 7 | option(DEBUG "Enable debug mode, for develop" ON) 8 | 9 | add_compile_definitions(NCNN_PIXEL) 10 | add_compile_definitions(NCNN_THREADS) 11 | 12 | include(GNUInstallDirs) 13 | include_directories(${PROJECT_SOURCE_DIR}/include) 14 | include_directories(${PROJECT_SOURCE_DIR}/src) 15 | 16 | file(GLOB_RECURSE SRCS "src/*.cpp" "src/*.cc" "src/*.hpp" "src/*.h") 17 | file(GLOB_RECURSE HEADERS "include/*.h" "include/*.hpp") 18 | 19 | if (BUILD_STATIC) 20 | add_library(simpleocv STATIC ${SRCS} ${HEADERS}) 21 | else() 22 | add_library(simpleocv SHARED ${SRCS} ${HEADERS}) 23 | endif() 24 | 25 | set_target_properties( 26 | simpleocv 27 | PROPERTIES 28 | PUBLIC_HEADER "${HEADERS}") 29 | target_include_directories(simpleocv PRIVATE .) 30 | 31 | add_subdirectory(examples/) 32 | 33 | install(TARGETS simpleocv 34 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 35 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 36 | PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) 37 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | mkdir build 2 | cd build 3 | cmake .. 4 | make -j8 5 | make install -------------------------------------------------------------------------------- /data/a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/simpleocv/09496ec534bab998cd6c6cfc425fc86cd60855a5/data/a.png -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(demo_color demo_color.cc) 2 | target_link_libraries(demo_color simpleocv) -------------------------------------------------------------------------------- /examples/demo_color.cc: -------------------------------------------------------------------------------- 1 | #include "simpleocv.h" 2 | #include 3 | 4 | int main(int argc, char **argv) { 5 | 6 | std::string img_f = argv[1]; 7 | 8 | cv::Mat a = cv::imread(img_f); 9 | 10 | auto a_size = a.size(); 11 | 12 | std::cout << a_size.width << "x" << a_size.height << std::endl; 13 | std::cout << a.cols << "x" << a.rows << std::endl; 14 | 15 | cv::putText(a, "28.9 C from SimpleOCV", cv::Point(20, 45), 1, 0.5, 16 | cv::Scalar(255, 0, 255)); 17 | cv::imwrite("a_gray.png", a); 18 | } -------------------------------------------------------------------------------- /include/simpleocv.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _SIMPLEOCV_H 3 | #define _SIMPLEOCV_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #if defined(_MSC_VER) || defined(__GNUC__) 11 | #pragma push_macro("min") 12 | #pragma push_macro("max") 13 | #undef min 14 | #undef max 15 | #endif 16 | 17 | typedef unsigned char uchar; 18 | typedef unsigned short ushort; 19 | typedef unsigned int uint; 20 | 21 | enum { 22 | CV_LOAD_IMAGE_UNCHANGED = -1, 23 | CV_LOAD_IMAGE_GRAYSCALE = 0, 24 | CV_LOAD_IMAGE_COLOR = 1, 25 | }; 26 | 27 | enum { CV_IMWRITE_JPEG_QUALITY = 1 }; 28 | 29 | #ifndef CV_PI 30 | #define CV_PI 3.1415926535897932384626433832795 31 | #endif 32 | #ifndef CV_LOG2 33 | #define CV_LOG2 0.69314718055994530941723212145818 34 | #endif 35 | 36 | #ifndef NCNN_EXPORT 37 | #define NCNN_EXPORT 38 | #endif 39 | 40 | #ifndef NCNN_NO_EXPORT 41 | #define NCNN_NO_EXPORT 42 | #endif 43 | 44 | #ifndef NCNN_FORCE_INLINE 45 | #ifdef _MSC_VER 46 | #define NCNN_FORCEINLINE __forceinline 47 | #elif defined(__GNUC__) 48 | #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) 49 | #elif defined(__CLANG__) 50 | #if __has_attribute(__always_inline__) 51 | #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) 52 | #else 53 | #define NCNN_FORCEINLINE inline 54 | #endif 55 | #else 56 | #define NCNN_FORCEINLINE inline 57 | #endif 58 | #else 59 | #define NCNN_FORCEINLINE inline 60 | #endif 61 | 62 | namespace sim { 63 | 64 | NCNN_FORCEINLINE int NCNN_XADD(int *addr, int delta) { 65 | int tmp = *addr; 66 | *addr += delta; 67 | return tmp; 68 | }; 69 | inline void *fastMalloc(size_t size); 70 | inline void fastFree(void *ptr); 71 | 72 | } // namespace sim 73 | 74 | #ifndef NCNN_XADD 75 | using sim::NCNN_XADD; 76 | #endif 77 | 78 | // minimal opencv style data structure implementation 79 | namespace cv { 80 | 81 | template static inline _Tp saturate_cast(int v) { 82 | return _Tp(v); 83 | } 84 | template <> inline uchar saturate_cast(int v) { 85 | return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); 86 | } 87 | 88 | template struct Scalar_ { 89 | Scalar_() { 90 | v[0] = 0; 91 | v[1] = 0; 92 | v[2] = 0; 93 | v[3] = 0; 94 | } 95 | Scalar_(_Tp _v0) { 96 | v[0] = _v0; 97 | v[1] = 0; 98 | v[2] = 0; 99 | v[3] = 0; 100 | } 101 | Scalar_(_Tp _v0, _Tp _v1) { 102 | v[0] = _v0; 103 | v[1] = _v1; 104 | v[2] = 0; 105 | v[3] = 0; 106 | width = v[0]; 107 | height = v[1]; 108 | } 109 | Scalar_(_Tp _v0, _Tp _v1, _Tp _v2) { 110 | v[0] = _v0; 111 | v[1] = _v1; 112 | v[2] = _v2; 113 | v[3] = 0; 114 | width = v[0]; 115 | height = v[1]; 116 | } 117 | Scalar_(_Tp _v0, _Tp _v1, _Tp _v2, _Tp _v3) { 118 | v[0] = _v0; 119 | v[1] = _v1; 120 | v[2] = _v2; 121 | v[3] = _v3; 122 | width = v[0]; 123 | height = v[1]; 124 | } 125 | 126 | const _Tp operator[](const int i) const { return v[i]; } 127 | // void operator[](const int i) {} 128 | _Tp &operator[](const int i) { return v[i]; } 129 | 130 | _Tp v[4]; 131 | 132 | int width = v[0]; 133 | int height = v[1]; 134 | }; 135 | 136 | typedef Scalar_ Scalar; 137 | 138 | template struct Point_ { 139 | Point_() : x(0), y(0) {} 140 | Point_(_Tp _x, _Tp _y) : x(_x), y(_y) {} 141 | 142 | template operator Point_<_Tp2>() const { 143 | return Point_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y)); 144 | } 145 | 146 | bool operator==(const Point_ &b) { return x == b.x && y == b.y; } 147 | bool operator!=(const Point_ &b) { return x != b.x || y != b.y; } 148 | Point_<_Tp> operator-(const Point_<_Tp> &b) { 149 | return Point_<_Tp>(x - b.x, y - b.y); 150 | } 151 | 152 | _Tp x; 153 | _Tp y; 154 | }; 155 | 156 | typedef Point_ Point; 157 | typedef Point_ Point2f; 158 | 159 | template struct Size_ { 160 | Size_() : width(0), height(0) {} 161 | Size_(_Tp _w, _Tp _h) : width(_w), height(_h) {} 162 | Size_(_Tp _w, _Tp _h, _Tp _c) : width(_w), height(_h), channel(_c) {} 163 | 164 | template operator Size_<_Tp2>() const { 165 | return Size_<_Tp2>(saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); 166 | } 167 | 168 | _Tp width; 169 | _Tp height; 170 | _Tp channel; 171 | }; 172 | 173 | typedef Size_ Size; 174 | typedef Size_ Size2f; 175 | 176 | template struct Rect_ { 177 | Rect_() : x(0), y(0), width(0), height(0) {} 178 | Rect_(_Tp _x, _Tp _y, _Tp _w, _Tp _h) : x(_x), y(_y), width(_w), height(_h) {} 179 | Rect_(Point_<_Tp> _p, Size_<_Tp> _size) 180 | : x(_p.x), y(_p.y), width(_size.width), height(_size.height) {} 181 | 182 | template operator Rect_<_Tp2>() const { 183 | return Rect_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), 184 | saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); 185 | } 186 | 187 | _Tp x; 188 | _Tp y; 189 | _Tp width; 190 | _Tp height; 191 | 192 | // area 193 | _Tp area() const { return width * height; } 194 | }; 195 | 196 | template 197 | static inline Rect_<_Tp> &operator&=(Rect_<_Tp> &a, const Rect_<_Tp> &b) { 198 | _Tp x1 = std::max(a.x, b.x), y1 = std::max(a.y, b.y); 199 | a.width = std::min(a.x + a.width, b.x + b.width) - x1; 200 | a.height = std::min(a.y + a.height, b.y + b.height) - y1; 201 | a.x = x1; 202 | a.y = y1; 203 | if (a.width <= 0 || a.height <= 0) 204 | a = Rect_<_Tp>(); 205 | return a; 206 | } 207 | 208 | template 209 | static inline Rect_<_Tp> &operator|=(Rect_<_Tp> &a, const Rect_<_Tp> &b) { 210 | _Tp x1 = std::min(a.x, b.x), y1 = std::min(a.y, b.y); 211 | a.width = std::max(a.x + a.width, b.x + b.width) - x1; 212 | a.height = std::max(a.y + a.height, b.y + b.height) - y1; 213 | a.x = x1; 214 | a.y = y1; 215 | return a; 216 | } 217 | 218 | template 219 | static inline Rect_<_Tp> operator&(const Rect_<_Tp> &a, const Rect_<_Tp> &b) { 220 | Rect_<_Tp> c = a; 221 | return c &= b; 222 | } 223 | 224 | template 225 | static inline Rect_<_Tp> operator|(const Rect_<_Tp> &a, const Rect_<_Tp> &b) { 226 | Rect_<_Tp> c = a; 227 | return c |= b; 228 | } 229 | 230 | typedef Rect_ Rect; 231 | typedef Rect_ Rect2f; 232 | 233 | #define CV_8UC1 1 234 | #define CV_8UC3 3 235 | #define CV_8UC4 4 236 | #define CV_32FC1 4 237 | #define CV_64F 5 238 | 239 | struct NCNN_EXPORT Mat { 240 | Mat() : data(0), refcount(0), rows(0), cols(0), c(0) {} 241 | 242 | Mat(int _rows, int _cols, int flags) : data(0), refcount(0) { 243 | create(_rows, _cols, flags); 244 | } 245 | 246 | // copy 247 | Mat(const Mat &m) : data(m.data), refcount(m.refcount) { 248 | if (refcount) 249 | NCNN_XADD(refcount, 1); 250 | 251 | rows = m.rows; 252 | cols = m.cols; 253 | c = m.c; 254 | } 255 | 256 | Mat(int _rows, int _cols, int flags, void *_data) 257 | : data((unsigned char *)_data), refcount(0) { 258 | rows = _rows; 259 | cols = _cols; 260 | c = flags; 261 | } 262 | 263 | ~Mat() { release(); } 264 | 265 | // assign 266 | Mat &operator=(const Mat &m) { 267 | if (this == &m) 268 | return *this; 269 | 270 | if (m.refcount) 271 | NCNN_XADD(m.refcount, 1); 272 | 273 | release(); 274 | 275 | data = m.data; 276 | refcount = m.refcount; 277 | 278 | rows = m.rows; 279 | cols = m.cols; 280 | c = m.c; 281 | 282 | return *this; 283 | } 284 | 285 | Mat &operator=(const Scalar &s) { 286 | if (total() > 0) { 287 | uchar *p = data; 288 | for (int i = 0; i < cols * rows; i++) { 289 | for (int j = 0; j < c; j++) { 290 | *p++ = s[j]; 291 | } 292 | } 293 | } 294 | 295 | return *this; 296 | } 297 | 298 | static inline cv::Mat zeros(const Mat &m, int type = CV_8UC3) { 299 | return Mat(m.rows, m.cols, m.c); 300 | } 301 | 302 | static inline cv::Mat zeros(const cv::Scalar &s, int type = CV_8UC3) { 303 | return Mat(s[0], s[1], s[2]); 304 | } 305 | 306 | void create(int _rows, int _cols, int flags) { 307 | release(); 308 | 309 | rows = _rows; 310 | cols = _cols; 311 | c = flags; 312 | 313 | if (total() > 0) { 314 | // refcount address must be aligned, so we expand totalsize here 315 | size_t totalsize = (total() + 3) >> 2 << 2; 316 | data = (uchar *)sim::fastMalloc(totalsize + (int)sizeof(*refcount)); 317 | refcount = (int *)(((uchar *)data) + totalsize); 318 | *refcount = 1; 319 | } 320 | } 321 | 322 | void release() { 323 | if (refcount && NCNN_XADD(refcount, -1) == 1) 324 | sim::fastFree(data); 325 | 326 | data = 0; 327 | 328 | rows = 0; 329 | cols = 0; 330 | c = 0; 331 | 332 | refcount = 0; 333 | } 334 | 335 | Mat clone() const { 336 | if (empty()) 337 | return Mat(); 338 | 339 | Mat m(rows, cols, c); 340 | 341 | if (total() > 0) { 342 | memcpy(m.data, data, total()); 343 | } 344 | 345 | return m; 346 | } 347 | 348 | bool empty() const { return data == 0 || total() == 0; } 349 | 350 | int channels() const { return c; } 351 | cv::Size size() const { return cv::Size(cols, rows, c); } 352 | 353 | int type() const { return c; } 354 | 355 | size_t total() const { return cols * rows * c; } 356 | 357 | const uchar *ptr(int y) const { return data + y * cols * c; } 358 | 359 | uchar *ptr(int y) { return data + y * cols * c; } 360 | 361 | template const _Tp *ptr(int y) const { 362 | return (const _Tp *)data + y * cols * c; 363 | } 364 | 365 | template _Tp *ptr(int y) { return (_Tp *)data + y * cols * c; } 366 | 367 | // roi 368 | Mat operator()(const Rect &roi) const { 369 | if (empty()) 370 | return Mat(); 371 | 372 | Mat m(roi.height, roi.width, c); 373 | 374 | int sy = roi.y; 375 | for (int y = 0; y < roi.height; y++) { 376 | const uchar *sptr = ptr(sy) + roi.x * c; 377 | uchar *dptr = m.ptr(y); 378 | memcpy(dptr, sptr, roi.width * c); 379 | sy++; 380 | } 381 | 382 | return m; 383 | } 384 | 385 | uchar *data; 386 | 387 | // pointer to the reference counter; 388 | // when points to user-allocated data, the pointer is NULL 389 | int *refcount; 390 | 391 | int rows; 392 | int cols; 393 | 394 | int c; 395 | }; 396 | 397 | enum ImreadModes { 398 | IMREAD_UNCHANGED = -1, 399 | IMREAD_GRAYSCALE = 0, 400 | IMREAD_COLOR = 1 401 | }; 402 | 403 | NCNN_EXPORT Mat imread(const std::string &path, int flags = IMREAD_COLOR); 404 | 405 | enum ImwriteFlags { IMWRITE_JPEG_QUALITY = 1 }; 406 | 407 | NCNN_EXPORT bool imwrite(const std::string &path, const Mat &m, 408 | const std::vector ¶ms = std::vector()); 409 | 410 | NCNN_EXPORT void imshow(const std::string &name, const Mat &m); 411 | 412 | NCNN_EXPORT int waitKey(int delay = 0); 413 | 414 | NCNN_EXPORT void resize(const Mat &src, Mat &dst, const Size &size, 415 | float sw = 0.f, float sh = 0.f, int flags = 0); 416 | 417 | enum { FILLED = -1 }; 418 | 419 | NCNN_EXPORT void rectangle(Mat &img, Point pt1, Point pt2, const Scalar &color, 420 | int thickness = 1, int lineType = 8, int shift = 0); 421 | NCNN_EXPORT void rectangle(Mat &img, Rect rec, const Scalar &color, 422 | int thickness = 1); 423 | 424 | NCNN_EXPORT void circle(Mat &img, Point center, int radius, const Scalar &color, 425 | int thickness = 1); 426 | 427 | NCNN_EXPORT void line(Mat &img, Point p0, Point p1, const Scalar &color, 428 | int thickness = 1); 429 | 430 | enum { FONT_HERSHEY_SIMPLEX = 0 }; 431 | enum { LINE_AA = 8 }; 432 | enum { INTER_NEAREST = 0, INTER_LINEAR = 1, INTER_CUBIC = 2, INTER_AREA = 3 }; 433 | 434 | NCNN_EXPORT void putText(Mat &img, const std::string &text, Point org, 435 | int fontFace, double fontScale, Scalar color, 436 | int thickness = 1); 437 | 438 | NCNN_EXPORT Size getTextSize(const std::string &text, int fontFace, 439 | double fontScale, int thickness, int *baseLine); 440 | 441 | } // namespace cv 442 | 443 | #if defined(_MSC_VER) || defined(__GNUC__) 444 | #pragma pop_macro("min") 445 | #pragma pop_macro("max") 446 | #endif 447 | 448 | #endif // NCNN_SIMPLEOCV_H 449 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Simple OpenCV 2 | 3 | > 大部分情况下,你并不需要OpenCV。 4 | 5 | 该仓库主要是在一些移动场景下,替代OpenCV的功能,例如一些图片的读写、预处理、resize、gui等被替换,并且大部分情况下你可以把SimpleOCV直接集成到你的项目中,甚至可以直接编译成wasm在浏览器运行。 6 | 7 | SimpleOCV大部分实现均来自于ncnn,为了使得它更加模块化,单独拎出来作为一个独立库,方便任何项目进行集成。 8 | 9 | 理论上我们也可以使用opencv-mobile的版本,但这个东西是在原有的opencv源码上patch出来的,灵活性太差了,直接下载预先编译好的二进制也会出很多问题。 10 | 11 | SimpleOCV尽可能秉承以下几个原则: 12 | 13 | - 最小化:精简你今需要的几个接口; 14 | - 跨平台:浏览器都能跑,还有什么不能跑; 15 | - 方便集成:任何C++项目都能集成 16 | 17 | 目的就是:对于不是很复杂的项目,彻底抛弃OpenCV,但是对于已有的项目,SimpleOCV的include接口,保持和OpenCV一模一样。 18 | 19 | ## 与其他库的对比 20 | 21 | 22 | | 平台 | OpenCV | FlyCV | opencv mobile | simpleocv | 23 | | :-------------- | ------------- | :----------------- | :-------------- | ------------------- | 24 | | Windows | ✔️ | ❌ | ✔️ | ✔️ | 25 | | Windows(MSVC) | ✔️ | ❌ | ✔️ | ✔️ | 26 | | macOS | ✔️ | ✔️ | ✔️ | ✔️ | 27 | | Linux | ✔️ | ✔️ | ✔️ | ✔️ | 28 | | Webassembly | - | - | - | ✔️ | 29 | | Speed | Fast | FastMore | Fast | FastMore | 30 | | Strength | Compelete | Compelete & Fast | Light | **Simple & Easy** | 31 | | Simplicity | Complicated | Normal | Complicated | **Easiest** | 32 | | Dpendencies | Many | Many | Less | **Zero** | 33 | 34 | ## 交流群 35 | 36 | 新建几个simpleocv的QQ交流群,欢迎加入交流,群里不定期推送微信交流群二维码: 37 | 38 | - 点击链接加入群聊【奇异AI-轻量视觉交流群】:https://jq.qq.com/?_wv=1027&k=0MaclEYg 39 | 40 | ## 更新 41 | 42 | - *`2022.12.26`*: 我增加了windows下的编译支持,现在可以直接编译一个windows .lib 静态库了,MSVC没有问题,这样你在大部分软件里面都可以直接集成,比opencv编译容易很多; 43 | 44 | ## 用法 45 | 46 | **你不需要opencv,只需要这样:** 47 | 48 | ```c++ 49 | #include "simpleocv.h" 50 | 51 | int main(int argc, char **argv) { 52 | 53 | std::string img_f = argv[1]; 54 | 55 | cv::Mat a = cv::imread(img_f); 56 | cv::putText(a, "28.9 C from SimpleOCV", cv::Point(20, 45), 1, 0.5, 57 | cv::Scalar(255, 0, 255)); 58 | cv::imwrite("a_gray.png", a); 59 | } 60 | ``` 61 | 62 | 然后,你就有了一个和opencv一模一样的能力。 63 | 64 | 上面你就可以看到这样一个可视化的图: 65 | 66 | ![](https://raw.githubusercontent.com/jinfagang/public_images/master/20221221165207.png) 67 | 68 | 请注意!这里面没有用到任何opencv的代码。 69 | 70 | 这是一个实际调用simpleocv的上层应用的效果; 71 | 72 | ![图片.png](https://s2.loli.net/2022/12/22/sMY7iRP4mJGNQKC.png) 73 | 74 | ## 高端用法 75 | 76 | SimpleOCV 最有用的还是集成到你的项目里,你可以把simpleocv作为一个3rd依赖,也可以手动的把编译出来的`libsimpleocv.a` 拷贝到你的链接目录,然后带上一个单一的头文件 `simpleocv.h` 就行了。 77 | 78 | ## 支持平台 79 | 80 | `simpleocv` 其实目的就是做一个minimal替代opencv的东西,让你在任何终端都可以无痛使用opencv,也不需要更改你的应用c++代码,同时还能保持一定的opencv优化能力,例如图像resize等的速度。目前支持的平台: 81 | 82 | - macOS测试没有问题,x86, arm库都能编译; 83 | - windows下MSVC可以编译(但是ncnn版本会有些许的问题); 84 | - wasm,webassembly端可以集成没有问题。 85 | 86 | ## 贡献 87 | 88 | 欢迎老铁们PR一些你用simpleocv实现的东西,例如画图,画box、画keypoints等等,然后写到 `examples/demo_xx.cc` 给我PR。感谢你。 89 | 90 | ## 编译 91 | 92 | ``` 93 | mkdir build 94 | cd build 95 | cmake .. 96 | make -j8 97 | 98 | ./examples/demo_color bus.jpg 99 | ``` 100 | 101 | ## 增设 102 | 103 | 相较于ncnn里面的版本,我做了些许的修改,记录如下: 104 | 105 | - 增加了`CV_PI, CV_F64C1` 等全局宏定义; 106 | - 增加了 `cv::Point` 对于 `-, ==, -=, !=`等操作符的复写; 107 | - 增加了 `cv::Mat, cv::Scalar` 等更贴近opencv的构造函数; 108 | - 将simpleocv的功能进行了约简,不依赖于ncnn,一个头文件就可以调用; 109 | - 增加了 `cv::Mat::zeros` 等初始化空白Mat函数; 110 | - 增加了 `cv::LINE_AA` 等宏; 111 | 112 | ## 计划 113 | 114 | 有些许的函数,其实可以添加进来的,这样可以让这个微型的替代版本更加鲁棒,感兴趣的可以PR: 115 | 116 | - [ ] `cv::copyMarkBorder` 函数引入; 117 | - [ ] `cv::polyLines` 函数引入; 118 | - [ ] 将默认的resize bilinear切换到resize nearest; 119 | - [ ] 引入一些flycv的优化; 120 | 121 | ## 后续 122 | 123 | 本项目旨在让你不依赖OpenCV,拥有基础的opencv能力,后续我会持续精简相关代码。并且尝试编译到其他平台,例如web,iOS等,这些平台你可以用SimploeOCV来做预处理,但是已经不需要opencv的依赖了。 124 | 125 | ## Copyright 126 | 127 | lucasjin && ncnn reserved. 128 | -------------------------------------------------------------------------------- /src/allocator.cc: -------------------------------------------------------------------------------- 1 | #include "./allocator.h" 2 | 3 | namespace sim { 4 | 5 | using sim::fastFree; 6 | using sim::fastMalloc; 7 | 8 | Allocator::~Allocator() {} 9 | 10 | class PoolAllocatorPrivate { 11 | public: 12 | Mutex budgets_lock; 13 | Mutex payouts_lock; 14 | unsigned int size_compare_ratio; // 0~256 15 | size_t size_drop_threshold; 16 | std::list> budgets; 17 | std::list> payouts; 18 | }; 19 | 20 | PoolAllocator::PoolAllocator() : Allocator(), d(new PoolAllocatorPrivate) { 21 | d->size_compare_ratio = 0; 22 | d->size_drop_threshold = 10; 23 | } 24 | 25 | PoolAllocator::~PoolAllocator() { 26 | clear(); 27 | 28 | if (!d->payouts.empty()) { 29 | NCNN_LOGE("FATAL ERROR! pool allocator destroyed too early"); 30 | #if NCNN_STDIO 31 | std::list>::iterator it = d->payouts.begin(); 32 | for (; it != d->payouts.end(); ++it) { 33 | void *ptr = it->second; 34 | NCNN_LOGE("%p still in use", ptr); 35 | } 36 | #endif 37 | } 38 | 39 | delete d; 40 | } 41 | 42 | PoolAllocator::PoolAllocator(const PoolAllocator &) : d(0) {} 43 | 44 | PoolAllocator &PoolAllocator::operator=(const PoolAllocator &) { return *this; } 45 | 46 | void PoolAllocator::clear() { 47 | d->budgets_lock.lock(); 48 | 49 | std::list>::iterator it = d->budgets.begin(); 50 | for (; it != d->budgets.end(); ++it) { 51 | void *ptr = it->second; 52 | fastFree(ptr); 53 | } 54 | d->budgets.clear(); 55 | 56 | d->budgets_lock.unlock(); 57 | } 58 | 59 | void PoolAllocator::set_size_compare_ratio(float scr) { 60 | if (scr < 0.f || scr > 1.f) { 61 | NCNN_LOGE("invalid size compare ratio %f", scr); 62 | return; 63 | } 64 | 65 | d->size_compare_ratio = (unsigned int)(scr * 256); 66 | } 67 | 68 | void PoolAllocator::set_size_drop_threshold(size_t threshold) { 69 | d->size_drop_threshold = threshold; 70 | } 71 | 72 | void *PoolAllocator::fastMalloc(size_t size) { 73 | d->budgets_lock.lock(); 74 | 75 | // find free budget 76 | std::list>::iterator it = d->budgets.begin(), 77 | it_max = d->budgets.begin(), 78 | it_min = d->budgets.begin(); 79 | for (; it != d->budgets.end(); ++it) { 80 | size_t bs = it->first; 81 | 82 | // size_compare_ratio ~ 100% 83 | if (bs >= size && ((bs * d->size_compare_ratio) >> 8) <= size) { 84 | void *ptr = it->second; 85 | 86 | d->budgets.erase(it); 87 | 88 | d->budgets_lock.unlock(); 89 | 90 | d->payouts_lock.lock(); 91 | 92 | d->payouts.push_back(std::make_pair(bs, ptr)); 93 | 94 | d->payouts_lock.unlock(); 95 | 96 | return ptr; 97 | } 98 | 99 | if (bs < it_min->first) { 100 | it_min = it; 101 | } 102 | if (bs > it_max->first) { 103 | it_max = it; 104 | } 105 | } 106 | 107 | if (d->budgets.size() >= d->size_drop_threshold) { 108 | // All chunks in pool are not chosen. Then try to drop some outdated 109 | // chunks and return them to OS. 110 | if (it_max->first < size) { 111 | // Current query is asking for a chunk larger than any cached chunks. 112 | // Then remove the smallest one. 113 | fastFree(it_min->second); 114 | d->budgets.erase(it_min); 115 | } else if (it_min->first > size) { 116 | // Current query is asking for a chunk smaller than any cached chunks. 117 | // Then remove the largest one. 118 | fastFree(it_max->second); 119 | d->budgets.erase(it_max); 120 | } 121 | } 122 | 123 | d->budgets_lock.unlock(); 124 | 125 | // new 126 | void *ptr = fastMalloc(size); 127 | 128 | d->payouts_lock.lock(); 129 | 130 | d->payouts.push_back(std::make_pair(size, ptr)); 131 | 132 | d->payouts_lock.unlock(); 133 | 134 | return ptr; 135 | } 136 | 137 | void PoolAllocator::fastFree(void *ptr) { 138 | d->payouts_lock.lock(); 139 | 140 | // return to budgets 141 | std::list>::iterator it = d->payouts.begin(); 142 | for (; it != d->payouts.end(); ++it) { 143 | if (it->second == ptr) { 144 | size_t size = it->first; 145 | 146 | d->payouts.erase(it); 147 | 148 | d->payouts_lock.unlock(); 149 | 150 | d->budgets_lock.lock(); 151 | 152 | d->budgets.push_back(std::make_pair(size, ptr)); 153 | 154 | d->budgets_lock.unlock(); 155 | 156 | return; 157 | } 158 | } 159 | 160 | d->payouts_lock.unlock(); 161 | 162 | NCNN_LOGE("FATAL ERROR! pool allocator get wild %p", ptr); 163 | fastFree(ptr); 164 | } 165 | 166 | class UnlockedPoolAllocatorPrivate { 167 | public: 168 | unsigned int size_compare_ratio; // 0~256 169 | size_t size_drop_threshold; 170 | std::list> budgets; 171 | std::list> payouts; 172 | }; 173 | 174 | UnlockedPoolAllocator::UnlockedPoolAllocator() 175 | : Allocator(), d(new UnlockedPoolAllocatorPrivate) { 176 | d->size_compare_ratio = 0; 177 | d->size_drop_threshold = 10; 178 | } 179 | 180 | UnlockedPoolAllocator::~UnlockedPoolAllocator() { 181 | clear(); 182 | 183 | if (!d->payouts.empty()) { 184 | NCNN_LOGE("FATAL ERROR! unlocked pool allocator destroyed too early"); 185 | #if NCNN_STDIO 186 | std::list>::iterator it = d->payouts.begin(); 187 | for (; it != d->payouts.end(); ++it) { 188 | void *ptr = it->second; 189 | NCNN_LOGE("%p still in use", ptr); 190 | } 191 | #endif 192 | } 193 | 194 | delete d; 195 | } 196 | 197 | UnlockedPoolAllocator::UnlockedPoolAllocator(const UnlockedPoolAllocator &) 198 | : d(0) {} 199 | 200 | UnlockedPoolAllocator & 201 | UnlockedPoolAllocator::operator=(const UnlockedPoolAllocator &) { 202 | return *this; 203 | } 204 | 205 | void UnlockedPoolAllocator::clear() { 206 | std::list>::iterator it = d->budgets.begin(); 207 | for (; it != d->budgets.end(); ++it) { 208 | void *ptr = it->second; 209 | fastFree(ptr); 210 | } 211 | d->budgets.clear(); 212 | } 213 | 214 | void UnlockedPoolAllocator::set_size_compare_ratio(float scr) { 215 | if (scr < 0.f || scr > 1.f) { 216 | NCNN_LOGE("invalid size compare ratio %f", scr); 217 | return; 218 | } 219 | 220 | d->size_compare_ratio = (unsigned int)(scr * 256); 221 | } 222 | 223 | void UnlockedPoolAllocator::set_size_drop_threshold(size_t threshold) { 224 | d->size_drop_threshold = threshold; 225 | } 226 | 227 | void *UnlockedPoolAllocator::fastMalloc(size_t size) { 228 | // find free budget 229 | std::list>::iterator it = d->budgets.begin(), 230 | it_max = d->budgets.begin(), 231 | it_min = d->budgets.begin(); 232 | for (; it != d->budgets.end(); ++it) { 233 | size_t bs = it->first; 234 | 235 | // size_compare_ratio ~ 100% 236 | if (bs >= size && ((bs * d->size_compare_ratio) >> 8) <= size) { 237 | void *ptr = it->second; 238 | 239 | d->budgets.erase(it); 240 | 241 | d->payouts.push_back(std::make_pair(bs, ptr)); 242 | 243 | return ptr; 244 | } 245 | 246 | if (bs > it_max->first) { 247 | it_max = it; 248 | } 249 | if (bs < it_min->first) { 250 | it_min = it; 251 | } 252 | } 253 | 254 | if (d->budgets.size() >= d->size_drop_threshold) { 255 | if (it_max->first < size) { 256 | fastFree(it_min->second); 257 | d->budgets.erase(it_min); 258 | } else if (it_min->first > size) { 259 | fastFree(it_max->second); 260 | d->budgets.erase(it_max); 261 | } 262 | } 263 | 264 | // new 265 | void *ptr = fastMalloc(size); 266 | 267 | d->payouts.push_back(std::make_pair(size, ptr)); 268 | 269 | return ptr; 270 | } 271 | 272 | void UnlockedPoolAllocator::fastFree(void *ptr) { 273 | // return to budgets 274 | std::list>::iterator it = d->payouts.begin(); 275 | for (; it != d->payouts.end(); ++it) { 276 | if (it->second == ptr) { 277 | size_t size = it->first; 278 | 279 | d->payouts.erase(it); 280 | 281 | d->budgets.push_back(std::make_pair(size, ptr)); 282 | 283 | return; 284 | } 285 | } 286 | 287 | NCNN_LOGE("FATAL ERROR! unlocked pool allocator get wild %p", ptr); 288 | fastFree(ptr); 289 | } 290 | 291 | } // namespace sim -------------------------------------------------------------------------------- /src/allocator.h: -------------------------------------------------------------------------------- 1 | #ifndef ALLOCATOR_H_ 2 | #define ALLOCATOR_H_ 3 | 4 | #include "platform.h" 5 | #include 6 | 7 | namespace sim { 8 | 9 | // the alignment of all the allocated buffers 10 | #if NCNN_AVX512 11 | #define NCNN_MALLOC_ALIGN 64 12 | #elif NCNN_AVX 13 | #define NCNN_MALLOC_ALIGN 32 14 | #else 15 | #define NCNN_MALLOC_ALIGN 16 16 | #endif 17 | 18 | // we have some optimized kernels that may overread buffer a bit in loop 19 | // it is common to interleave next-loop data load with arithmetic instructions 20 | // allocating more bytes keeps us safe from SEGV_ACCERR failure 21 | #define NCNN_MALLOC_OVERREAD 64 22 | 23 | template 24 | static inline _Tp *alignPtr(_Tp *ptr, int n = (int)sizeof(_Tp)) { 25 | return (_Tp *)(((size_t)ptr + n - 1) & -n); 26 | } 27 | 28 | // Aligns a buffer size to the specified number of bytes 29 | // The function returns the minimum number that is greater or equal to sz and is 30 | // divisible by n sz Buffer size to align n Alignment size that must be a power 31 | // of two 32 | static inline size_t alignSize(size_t sz, int n) { return (sz + n - 1) & -n; } 33 | 34 | inline void *fastMalloc(size_t size) { 35 | #if _MSC_VER 36 | return _aligned_malloc(size, NCNN_MALLOC_ALIGN); 37 | #elif (defined(__unix__) || defined(__APPLE__)) && \ 38 | _POSIX_C_SOURCE >= 200112L || \ 39 | (__ANDROID__ && __ANDROID_API__ >= 17) 40 | void *ptr = 0; 41 | if (posix_memalign(&ptr, NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD)) 42 | ptr = 0; 43 | return ptr; 44 | #elif __ANDROID__ && __ANDROID_API__ < 17 45 | return memalign(NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD); 46 | #else 47 | unsigned char *udata = (unsigned char *)malloc( 48 | size + sizeof(void *) + NCNN_MALLOC_ALIGN + NCNN_MALLOC_OVERREAD); 49 | if (!udata) 50 | return 0; 51 | unsigned char **adata = 52 | alignPtr((unsigned char **)udata + 1, NCNN_MALLOC_ALIGN); 53 | adata[-1] = udata; 54 | return adata; 55 | #endif 56 | } 57 | 58 | extern inline void fastFree(void *ptr) { 59 | if (ptr) { 60 | #if _MSC_VER 61 | _aligned_free(ptr); 62 | #elif (defined(__unix__) || defined(__APPLE__)) && \ 63 | _POSIX_C_SOURCE >= 200112L || \ 64 | (__ANDROID__ && __ANDROID_API__ >= 17) 65 | free(ptr); 66 | #elif __ANDROID__ && __ANDROID_API__ < 17 67 | free(ptr); 68 | #else 69 | unsigned char *udata = ((unsigned char **)ptr)[-1]; 70 | free(udata); 71 | #endif 72 | } 73 | } 74 | 75 | #if NCNN_THREADS 76 | // exchange-add operation for atomic operations on reference counters 77 | #if defined __riscv && !defined __riscv_atomic 78 | // riscv target without A extension 79 | NCNN_FORCEINLINE int NCNN_XADD(int *addr, int delta) { 80 | int tmp = *addr; 81 | *addr += delta; 82 | return tmp; 83 | } 84 | #elif defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32) 85 | // atomic increment on the linux version of the Intel(tm) compiler 86 | #define NCNN_XADD(addr, delta) \ 87 | (int)_InterlockedExchangeAdd( \ 88 | const_cast(reinterpret_cast(addr)), delta) 89 | #elif defined __GNUC__ 90 | #if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && \ 91 | !defined __EMSCRIPTEN__ && !defined(__CUDACC__) 92 | #ifdef __ATOMIC_ACQ_REL 93 | #define NCNN_XADD(addr, delta) \ 94 | __c11_atomic_fetch_add((_Atomic(int) *)(addr), delta, __ATOMIC_ACQ_REL) 95 | #else 96 | #define NCNN_XADD(addr, delta) \ 97 | __atomic_fetch_add((_Atomic(int) *)(addr), delta, 4) 98 | #endif 99 | #else 100 | #if defined __ATOMIC_ACQ_REL && !defined __clang__ 101 | // version for gcc >= 4.7 102 | #define NCNN_XADD(addr, delta) \ 103 | (int)__atomic_fetch_add((unsigned *)(addr), (unsigned)(delta), \ 104 | __ATOMIC_ACQ_REL) 105 | #else 106 | #define NCNN_XADD(addr, delta) \ 107 | (int)__sync_fetch_and_add((unsigned *)(addr), (unsigned)(delta)) 108 | #endif 109 | #endif 110 | #elif defined _MSC_VER && !defined RC_INVOKED 111 | #define NCNN_XADD(addr, delta) \ 112 | (int)_InterlockedExchangeAdd((long volatile *)addr, delta) 113 | #else 114 | // thread-unsafe branch 115 | NCNN_FORCEINLINE int NCNN_XADD(int *addr, int delta) { 116 | int tmp = *addr; 117 | *addr += delta; 118 | return tmp; 119 | } 120 | #endif 121 | #else // NCNN_THREADS 122 | NCNN_FORCEINLINE int NCNN_XADD(int *addr, int delta) { 123 | int tmp = *addr; 124 | *addr += delta; 125 | return tmp; 126 | } 127 | #endif // NCNN_THREADS 128 | 129 | class NCNN_EXPORT Allocator { 130 | public: 131 | virtual ~Allocator(); 132 | virtual void *fastMalloc(size_t size) = 0; 133 | virtual void fastFree(void *ptr) = 0; 134 | }; 135 | 136 | class PoolAllocatorPrivate; 137 | class NCNN_EXPORT PoolAllocator : public Allocator { 138 | public: 139 | PoolAllocator(); 140 | ~PoolAllocator(); 141 | 142 | // ratio range 0 ~ 1 143 | // default cr = 0 144 | void set_size_compare_ratio(float scr); 145 | 146 | // budget drop threshold 147 | // default threshold = 10 148 | void set_size_drop_threshold(size_t); 149 | 150 | // release all budgets immediately 151 | void clear(); 152 | 153 | virtual void *fastMalloc(size_t size); 154 | virtual void fastFree(void *ptr); 155 | 156 | private: 157 | PoolAllocator(const PoolAllocator &); 158 | PoolAllocator &operator=(const PoolAllocator &); 159 | 160 | private: 161 | PoolAllocatorPrivate *const d; 162 | }; 163 | 164 | class UnlockedPoolAllocatorPrivate; 165 | class NCNN_EXPORT UnlockedPoolAllocator : public Allocator { 166 | public: 167 | UnlockedPoolAllocator(); 168 | ~UnlockedPoolAllocator(); 169 | 170 | // ratio range 0 ~ 1 171 | // default cr = 0 172 | void set_size_compare_ratio(float scr); 173 | 174 | // budget drop threshold 175 | // default threshold = 10 176 | void set_size_drop_threshold(size_t); 177 | 178 | // release all budgets immediately 179 | void clear(); 180 | 181 | virtual void *fastMalloc(size_t size); 182 | virtual void fastFree(void *ptr); 183 | 184 | private: 185 | UnlockedPoolAllocator(const UnlockedPoolAllocator &); 186 | UnlockedPoolAllocator &operator=(const UnlockedPoolAllocator &); 187 | 188 | private: 189 | UnlockedPoolAllocatorPrivate *const d; 190 | }; 191 | 192 | } // namespace ncnn 193 | 194 | #endif -------------------------------------------------------------------------------- /src/cpu.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn 2 | // available. 3 | // 4 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 5 | // 6 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this 7 | // file except in compliance with the License. You may obtain a copy of the 8 | // License at 9 | // 10 | // https://opensource.org/licenses/BSD-3-Clause 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 15 | // License for the specific language governing permissions and limitations under 16 | // the License. 17 | 18 | #ifndef NCNN_CPU_H 19 | #define NCNN_CPU_H 20 | 21 | #include 22 | 23 | #if (defined _WIN32 && !(defined __MINGW32__)) 24 | #define WIN32_LEAN_AND_MEAN 25 | #include 26 | #endif 27 | #if defined __ANDROID__ || defined __linux__ 28 | #include // cpu_set_t 29 | #endif 30 | 31 | #include "platform.h" 32 | 33 | namespace sim { 34 | 35 | class NCNN_EXPORT CpuSet { 36 | public: 37 | CpuSet(); 38 | void enable(int cpu); 39 | void disable(int cpu); 40 | void disable_all(); 41 | bool is_enabled(int cpu) const; 42 | int num_enabled() const; 43 | 44 | public: 45 | #if (defined _WIN32 && !(defined __MINGW32__)) 46 | ULONG_PTR mask; 47 | #endif 48 | #if defined __ANDROID__ || defined __linux__ 49 | cpu_set_t cpu_set; 50 | #endif 51 | #if __APPLE__ 52 | unsigned int policy; 53 | #endif 54 | }; 55 | 56 | // test optional cpu features 57 | // edsp = armv7 edsp 58 | NCNN_EXPORT int cpu_support_arm_edsp(); 59 | // neon = armv7 neon or aarch64 asimd 60 | NCNN_EXPORT int cpu_support_arm_neon(); 61 | // vfpv4 = armv7 fp16 + fma 62 | NCNN_EXPORT int cpu_support_arm_vfpv4(); 63 | // asimdhp = aarch64 asimd half precision 64 | NCNN_EXPORT int cpu_support_arm_asimdhp(); 65 | // asimddp = aarch64 asimd dot product 66 | NCNN_EXPORT int cpu_support_arm_asimddp(); 67 | // asimdfhm = aarch64 asimd fhm 68 | NCNN_EXPORT int cpu_support_arm_asimdfhm(); 69 | // bf16 = aarch64 bf16 70 | NCNN_EXPORT int cpu_support_arm_bf16(); 71 | // i8mm = aarch64 i8mm 72 | NCNN_EXPORT int cpu_support_arm_i8mm(); 73 | // sve = aarch64 sve 74 | NCNN_EXPORT int cpu_support_arm_sve(); 75 | // sve2 = aarch64 sve2 76 | NCNN_EXPORT int cpu_support_arm_sve2(); 77 | // svebf16 = aarch64 svebf16 78 | NCNN_EXPORT int cpu_support_arm_svebf16(); 79 | // svei8mm = aarch64 svei8mm 80 | NCNN_EXPORT int cpu_support_arm_svei8mm(); 81 | // svef32mm = aarch64 svef32mm 82 | NCNN_EXPORT int cpu_support_arm_svef32mm(); 83 | 84 | // avx = x86 avx 85 | NCNN_EXPORT int cpu_support_x86_avx(); 86 | // fma = x86 fma 87 | NCNN_EXPORT int cpu_support_x86_fma(); 88 | // xop = x86 xop 89 | NCNN_EXPORT int cpu_support_x86_xop(); 90 | // f16c = x86 f16c 91 | NCNN_EXPORT int cpu_support_x86_f16c(); 92 | // avx2 = x86 avx2 + fma + f16c 93 | NCNN_EXPORT int cpu_support_x86_avx2(); 94 | // avx_vnni = x86 avx vnni 95 | NCNN_EXPORT int cpu_support_x86_avx_vnni(); 96 | // avx512 = x86 avx512f + avx512cd + avx512bw + avx512dq + avx512vl 97 | NCNN_EXPORT int cpu_support_x86_avx512(); 98 | // avx512_vnni = x86 avx512 vnni 99 | NCNN_EXPORT int cpu_support_x86_avx512_vnni(); 100 | // avx512_bf16 = x86 avx512 bf16 101 | NCNN_EXPORT int cpu_support_x86_avx512_bf16(); 102 | // avx512_fp16 = x86 avx512 fp16 103 | NCNN_EXPORT int cpu_support_x86_avx512_fp16(); 104 | 105 | // lsx = loongarch lsx 106 | NCNN_EXPORT int cpu_support_loongarch_lsx(); 107 | // lasx = loongarch lasx 108 | NCNN_EXPORT int cpu_support_loongarch_lasx(); 109 | 110 | // msa = mips mas 111 | NCNN_EXPORT int cpu_support_mips_msa(); 112 | // mmi = loongson mmi 113 | NCNN_EXPORT int cpu_support_loongson_mmi(); 114 | 115 | // v = riscv vector 116 | NCNN_EXPORT int cpu_support_riscv_v(); 117 | // zfh = riscv half-precision float 118 | NCNN_EXPORT int cpu_support_riscv_zfh(); 119 | // vlenb = riscv vector length in bytes 120 | NCNN_EXPORT int cpu_riscv_vlenb(); 121 | 122 | // cpu info 123 | NCNN_EXPORT int get_cpu_count(); 124 | NCNN_EXPORT int get_little_cpu_count(); 125 | NCNN_EXPORT int get_big_cpu_count(); 126 | 127 | NCNN_EXPORT int get_physical_cpu_count(); 128 | NCNN_EXPORT int get_physical_little_cpu_count(); 129 | NCNN_EXPORT int get_physical_big_cpu_count(); 130 | 131 | // bind all threads on little clusters if powersave enabled 132 | // affects HMP arch cpu like ARM big.LITTLE 133 | // only implemented on android at the moment 134 | // switching powersave is expensive and not thread-safe 135 | // 0 = all cores enabled(default) 136 | // 1 = only little clusters enabled 137 | // 2 = only big clusters enabled 138 | // return 0 if success for setter function 139 | NCNN_EXPORT int get_cpu_powersave(); 140 | NCNN_EXPORT int set_cpu_powersave(int powersave); 141 | 142 | // convenient wrapper 143 | NCNN_EXPORT const CpuSet &get_cpu_thread_affinity_mask(int powersave); 144 | 145 | // set explicit thread affinity 146 | NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet &thread_affinity_mask); 147 | 148 | // misc function wrapper for openmp routines 149 | NCNN_EXPORT int get_omp_num_threads(); 150 | NCNN_EXPORT void set_omp_num_threads(int num_threads); 151 | 152 | NCNN_EXPORT int get_omp_dynamic(); 153 | NCNN_EXPORT void set_omp_dynamic(int dynamic); 154 | 155 | NCNN_EXPORT int get_omp_thread_num(); 156 | 157 | NCNN_EXPORT int get_kmp_blocktime(); 158 | NCNN_EXPORT void set_kmp_blocktime(int time_ms); 159 | 160 | // need to flush denormals on Intel Chipset. 161 | // Other architectures such as ARM can be added as needed. 162 | // 0 = DAZ OFF, FTZ OFF 163 | // 1 = DAZ ON , FTZ OFF 164 | // 2 = DAZ OFF, FTZ ON 165 | // 3 = DAZ ON, FTZ ON 166 | NCNN_EXPORT int get_flush_denormals(); 167 | NCNN_EXPORT int set_flush_denormals(int flush_denormals); 168 | 169 | } // namespace ncnn 170 | 171 | #endif // NCNN_CPU_H 172 | -------------------------------------------------------------------------------- /src/mat.cpp: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn 2 | // available. 3 | // 4 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 5 | // 6 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this 7 | // file except in compliance with the License. You may obtain a copy of the 8 | // License at 9 | // 10 | // https://opensource.org/licenses/BSD-3-Clause 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 15 | // License for the specific language governing permissions and limitations under 16 | // the License. 17 | 18 | #include "mat.h" 19 | 20 | #if __ARM_NEON 21 | #include 22 | #endif // __ARM_NEON 23 | #include "cpu.h" 24 | // #include "layer.h" 25 | // #include "layer_type.h" 26 | 27 | #include 28 | 29 | #if NCNN_VULKAN 30 | #if NCNN_PLATFORM_API 31 | #if __ANDROID_API__ >= 26 32 | #include 33 | #endif // __ANDROID_API__ >= 26 34 | #endif // NCNN_PLATFORM_API 35 | #endif // NCNN_VULKAN 36 | 37 | namespace sim { 38 | 39 | Mat Mat::clone(Allocator *_allocator) const { 40 | if (empty()) 41 | return Mat(); 42 | 43 | Mat m; 44 | if (dims == 1) 45 | m.create(w, elemsize, elempack, _allocator); 46 | else if (dims == 2) 47 | m.create(w, h, elemsize, elempack, _allocator); 48 | else if (dims == 3) 49 | m.create(w, h, c, elemsize, elempack, _allocator); 50 | else if (dims == 4) 51 | m.create(w, h, d, c, elemsize, elempack, _allocator); 52 | 53 | if (total() > 0) { 54 | if (cstep == m.cstep) 55 | memcpy(m.data, data, total() * elemsize); 56 | else { 57 | // copy by channel for differnet cstep 58 | size_t size = (size_t)w * h * d * elemsize; 59 | for (int i = 0; i < c; i++) { 60 | memcpy(m.channel(i), channel(i), size); 61 | } 62 | } 63 | } 64 | 65 | return m; 66 | } 67 | 68 | void Mat::clone_from(const sim::Mat &mat, Allocator *allocator) { 69 | *this = mat.clone(allocator); 70 | } 71 | 72 | Mat Mat::reshape(int _w, Allocator *_allocator) const { 73 | if (w * h * d * c != _w) 74 | return Mat(); 75 | 76 | if (dims >= 3 && cstep != (size_t)w * h * d) { 77 | Mat m; 78 | m.create(_w, elemsize, elempack, _allocator); 79 | 80 | // flatten 81 | for (int i = 0; i < c; i++) { 82 | const void *ptr = (unsigned char *)data + i * cstep * elemsize; 83 | void *mptr = (unsigned char *)m.data + (size_t)i * w * h * d * elemsize; 84 | memcpy(mptr, ptr, (size_t)w * h * d * elemsize); 85 | } 86 | 87 | return m; 88 | } 89 | 90 | Mat m = *this; 91 | 92 | m.dims = 1; 93 | m.w = _w; 94 | m.h = 1; 95 | m.d = 1; 96 | m.c = 1; 97 | 98 | m.cstep = _w; 99 | 100 | return m; 101 | } 102 | 103 | Mat Mat::reshape(int _w, int _h, Allocator *_allocator) const { 104 | if (w * h * d * c != _w * _h) 105 | return Mat(); 106 | 107 | if (dims >= 3 && cstep != (size_t)w * h * d) { 108 | Mat m; 109 | m.create(_w, _h, elemsize, elempack, _allocator); 110 | 111 | // flatten 112 | for (int i = 0; i < c; i++) { 113 | const void *ptr = (unsigned char *)data + i * cstep * elemsize; 114 | void *mptr = (unsigned char *)m.data + (size_t)i * w * h * d * elemsize; 115 | memcpy(mptr, ptr, (size_t)w * h * d * elemsize); 116 | } 117 | 118 | return m; 119 | } 120 | 121 | Mat m = *this; 122 | 123 | m.dims = 2; 124 | m.w = _w; 125 | m.h = _h; 126 | m.d = 1; 127 | m.c = 1; 128 | 129 | m.cstep = (size_t)_w * _h; 130 | 131 | return m; 132 | } 133 | 134 | Mat Mat::reshape(int _w, int _h, int _c, Allocator *_allocator) const { 135 | if (w * h * d * c != _w * _h * _c) 136 | return Mat(); 137 | 138 | if (dims < 3) { 139 | if ((size_t)_w * _h != 140 | alignSize((size_t)_w * _h * elemsize, 16) / elemsize) { 141 | Mat m; 142 | m.create(_w, _h, _c, elemsize, elempack, _allocator); 143 | 144 | // align channel 145 | for (int i = 0; i < _c; i++) { 146 | const void *ptr = 147 | (unsigned char *)data + (size_t)i * _w * _h * elemsize; 148 | void *mptr = (unsigned char *)m.data + i * m.cstep * m.elemsize; 149 | memcpy(mptr, ptr, (size_t)_w * _h * elemsize); 150 | } 151 | 152 | return m; 153 | } 154 | } else if (c != _c) { 155 | // flatten and then align 156 | Mat tmp = reshape(_w * _h * _c, _allocator); 157 | return tmp.reshape(_w, _h, _c, _allocator); 158 | } 159 | 160 | Mat m = *this; 161 | 162 | m.dims = 3; 163 | m.w = _w; 164 | m.h = _h; 165 | m.d = 1; 166 | m.c = _c; 167 | 168 | m.cstep = alignSize((size_t)_w * _h * elemsize, 16) / elemsize; 169 | 170 | return m; 171 | } 172 | 173 | Mat Mat::reshape(int _w, int _h, int _d, int _c, Allocator *_allocator) const { 174 | if (w * h * d * c != _w * _h * _d * _c) 175 | return Mat(); 176 | 177 | if (dims < 3) { 178 | if ((size_t)_w * _h * _d != 179 | alignSize((size_t)_w * _h * _d * elemsize, 16) / elemsize) { 180 | Mat m; 181 | m.create(_w, _h, _d, _c, elemsize, elempack, _allocator); 182 | 183 | // align channel 184 | for (int i = 0; i < _c; i++) { 185 | const void *ptr = 186 | (unsigned char *)data + (size_t)i * _w * _h * _d * elemsize; 187 | void *mptr = (unsigned char *)m.data + i * m.cstep * m.elemsize; 188 | memcpy(mptr, ptr, (size_t)_w * _h * _d * elemsize); 189 | } 190 | 191 | return m; 192 | } 193 | } else if (c != _c) { 194 | // flatten and then align 195 | Mat tmp = reshape(_w * _h * _d * _c, _allocator); 196 | return tmp.reshape(_w, _h, _d, _c, _allocator); 197 | } 198 | 199 | Mat m = *this; 200 | 201 | m.dims = 4; 202 | m.w = _w; 203 | m.h = _h; 204 | m.d = _d; 205 | m.c = _c; 206 | 207 | m.cstep = alignSize((size_t)_w * _h * _d * elemsize, 16) / elemsize; 208 | 209 | return m; 210 | } 211 | 212 | void Mat::create(int _w, size_t _elemsize, Allocator *_allocator) { 213 | if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && 214 | allocator == _allocator) 215 | return; 216 | 217 | release(); 218 | 219 | elemsize = _elemsize; 220 | elempack = 1; 221 | allocator = _allocator; 222 | 223 | dims = 1; 224 | w = _w; 225 | h = 1; 226 | d = 1; 227 | c = 1; 228 | 229 | cstep = w; 230 | 231 | size_t totalsize = alignSize(total() * elemsize, 4); 232 | if (totalsize > 0) { 233 | if (allocator) 234 | data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); 235 | else 236 | data = fastMalloc(totalsize + (int)sizeof(*refcount)); 237 | } 238 | 239 | if (data) { 240 | refcount = (int *)(((unsigned char *)data) + totalsize); 241 | *refcount = 1; 242 | } 243 | } 244 | 245 | void Mat::create(int _w, int _h, size_t _elemsize, Allocator *_allocator) { 246 | if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && 247 | elempack == 1 && allocator == _allocator) 248 | return; 249 | 250 | release(); 251 | 252 | elemsize = _elemsize; 253 | elempack = 1; 254 | allocator = _allocator; 255 | 256 | dims = 2; 257 | w = _w; 258 | h = _h; 259 | d = 1; 260 | c = 1; 261 | 262 | cstep = (size_t)w * h; 263 | 264 | size_t totalsize = alignSize(total() * elemsize, 4); 265 | if (totalsize > 0) { 266 | if (allocator) 267 | data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); 268 | else 269 | data = fastMalloc(totalsize + (int)sizeof(*refcount)); 270 | } 271 | 272 | if (data) { 273 | refcount = (int *)(((unsigned char *)data) + totalsize); 274 | *refcount = 1; 275 | } 276 | } 277 | 278 | void Mat::create(int _w, int _h, int _c, size_t _elemsize, 279 | Allocator *_allocator) { 280 | if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && 281 | elempack == 1 && allocator == _allocator) 282 | return; 283 | 284 | release(); 285 | 286 | elemsize = _elemsize; 287 | elempack = 1; 288 | allocator = _allocator; 289 | 290 | dims = 3; 291 | w = _w; 292 | h = _h; 293 | d = 1; 294 | c = _c; 295 | 296 | cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; 297 | 298 | size_t totalsize = alignSize(total() * elemsize, 4); 299 | if (totalsize > 0) { 300 | if (allocator) 301 | data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); 302 | else 303 | data = fastMalloc(totalsize + (int)sizeof(*refcount)); 304 | } 305 | 306 | if (data) { 307 | refcount = (int *)(((unsigned char *)data) + totalsize); 308 | *refcount = 1; 309 | } 310 | } 311 | 312 | void Mat::create(int _w, int _h, int _d, int _c, size_t _elemsize, 313 | Allocator *_allocator) { 314 | if (dims == 4 && w == _w && h == _h && d == _d && c == _c && 315 | elemsize == _elemsize && elempack == 1 && allocator == _allocator) 316 | return; 317 | 318 | release(); 319 | 320 | elemsize = _elemsize; 321 | elempack = 1; 322 | allocator = _allocator; 323 | 324 | dims = 4; 325 | w = _w; 326 | h = _h; 327 | d = _d; 328 | c = _c; 329 | 330 | cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize; 331 | 332 | size_t totalsize = alignSize(total() * elemsize, 4); 333 | if (totalsize > 0) { 334 | if (allocator) 335 | data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); 336 | else 337 | data = fastMalloc(totalsize + (int)sizeof(*refcount)); 338 | } 339 | 340 | if (data) { 341 | refcount = (int *)(((unsigned char *)data) + totalsize); 342 | *refcount = 1; 343 | } 344 | } 345 | 346 | void Mat::create(int _w, size_t _elemsize, int _elempack, 347 | Allocator *_allocator) { 348 | if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && 349 | allocator == _allocator) 350 | return; 351 | 352 | release(); 353 | 354 | elemsize = _elemsize; 355 | elempack = _elempack; 356 | allocator = _allocator; 357 | 358 | dims = 1; 359 | w = _w; 360 | h = 1; 361 | d = 1; 362 | c = 1; 363 | 364 | cstep = w; 365 | 366 | size_t totalsize = alignSize(total() * elemsize, 4); 367 | if (totalsize > 0) { 368 | if (allocator) 369 | data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); 370 | else 371 | data = fastMalloc(totalsize + (int)sizeof(*refcount)); 372 | } 373 | 374 | if (data) { 375 | refcount = (int *)(((unsigned char *)data) + totalsize); 376 | *refcount = 1; 377 | } 378 | } 379 | 380 | void Mat::create(int _w, int _h, size_t _elemsize, int _elempack, 381 | Allocator *_allocator) { 382 | if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && 383 | elempack == _elempack && allocator == _allocator) 384 | return; 385 | 386 | release(); 387 | 388 | elemsize = _elemsize; 389 | elempack = _elempack; 390 | allocator = _allocator; 391 | 392 | dims = 2; 393 | w = _w; 394 | h = _h; 395 | d = 1; 396 | c = 1; 397 | 398 | cstep = (size_t)w * h; 399 | 400 | size_t totalsize = alignSize(total() * elemsize, 4); 401 | if (totalsize > 0) { 402 | if (allocator) 403 | data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); 404 | else 405 | data = fastMalloc(totalsize + (int)sizeof(*refcount)); 406 | } 407 | 408 | if (data) { 409 | refcount = (int *)(((unsigned char *)data) + totalsize); 410 | *refcount = 1; 411 | } 412 | } 413 | 414 | void Mat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, 415 | Allocator *_allocator) { 416 | if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && 417 | elempack == _elempack && allocator == _allocator) 418 | return; 419 | 420 | release(); 421 | 422 | elemsize = _elemsize; 423 | elempack = _elempack; 424 | allocator = _allocator; 425 | 426 | dims = 3; 427 | w = _w; 428 | h = _h; 429 | d = 1; 430 | c = _c; 431 | 432 | cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; 433 | 434 | size_t totalsize = alignSize(total() * elemsize, 4); 435 | if (totalsize > 0) { 436 | if (allocator) 437 | data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); 438 | else 439 | data = fastMalloc(totalsize + (int)sizeof(*refcount)); 440 | } 441 | 442 | if (data) { 443 | refcount = (int *)(((unsigned char *)data) + totalsize); 444 | *refcount = 1; 445 | } 446 | } 447 | 448 | void Mat::create(int _w, int _h, int _d, int _c, size_t _elemsize, 449 | int _elempack, Allocator *_allocator) { 450 | if (dims == 4 && w == _w && h == _h && d == _d && c == _c && 451 | elemsize == _elemsize && elempack == _elempack && allocator == _allocator) 452 | return; 453 | 454 | release(); 455 | 456 | elemsize = _elemsize; 457 | elempack = _elempack; 458 | allocator = _allocator; 459 | 460 | dims = 4; 461 | w = _w; 462 | h = _h; 463 | d = _d; 464 | c = _c; 465 | 466 | cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize; 467 | 468 | size_t totalsize = alignSize(total() * elemsize, 4); 469 | if (totalsize > 0) { 470 | if (allocator) 471 | data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount)); 472 | else 473 | data = fastMalloc(totalsize + (int)sizeof(*refcount)); 474 | } 475 | 476 | if (data) { 477 | refcount = (int *)(((unsigned char *)data) + totalsize); 478 | *refcount = 1; 479 | } 480 | } 481 | 482 | void Mat::create_like(const Mat &m, Allocator *_allocator) { 483 | int _dims = m.dims; 484 | if (_dims == 1) 485 | create(m.w, m.elemsize, m.elempack, _allocator); 486 | if (_dims == 2) 487 | create(m.w, m.h, m.elemsize, m.elempack, _allocator); 488 | if (_dims == 3) 489 | create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); 490 | if (_dims == 4) 491 | create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator); 492 | } 493 | 494 | #if NCNN_VULKAN 495 | void Mat::create_like(const VkMat &m, Allocator *_allocator) { 496 | int _dims = m.dims; 497 | if (_dims == 1) 498 | create(m.w, m.elemsize, m.elempack, _allocator); 499 | if (_dims == 2) 500 | create(m.w, m.h, m.elemsize, m.elempack, _allocator); 501 | if (_dims == 3) 502 | create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); 503 | if (_dims == 4) 504 | create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator); 505 | } 506 | 507 | void Mat::create_like(const VkImageMat &im, Allocator *_allocator) { 508 | int _dims = im.dims; 509 | if (_dims == 1) 510 | create(im.w, im.elemsize, im.elempack, _allocator); 511 | if (_dims == 2) 512 | create(im.w, im.h, im.elemsize, im.elempack, _allocator); 513 | if (_dims == 3) 514 | create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator); 515 | if (_dims == 4) 516 | create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator); 517 | } 518 | #endif // NCNN_VULKAN 519 | 520 | #if NCNN_VULKAN 521 | void VkMat::create(int _w, size_t _elemsize, VkAllocator *_allocator) { 522 | if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && 523 | allocator == _allocator) 524 | return; 525 | 526 | release(); 527 | 528 | elemsize = _elemsize; 529 | elempack = 1; 530 | allocator = _allocator; 531 | 532 | dims = 1; 533 | w = _w; 534 | h = 1; 535 | d = 1; 536 | c = 1; 537 | 538 | cstep = w; 539 | 540 | if (total() > 0) { 541 | size_t totalsize = alignSize(total() * elemsize, 4); 542 | 543 | data = allocator->fastMalloc(totalsize); 544 | } 545 | 546 | if (data) { 547 | refcount = 548 | (int *)((unsigned char *)data + offsetof(VkBufferMemory, refcount)); 549 | *refcount = 1; 550 | } 551 | } 552 | 553 | void VkMat::create(int _w, int _h, size_t _elemsize, VkAllocator *_allocator) { 554 | if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && 555 | elempack == 1 && allocator == _allocator) 556 | return; 557 | 558 | release(); 559 | 560 | elemsize = _elemsize; 561 | elempack = 1; 562 | allocator = _allocator; 563 | 564 | dims = 2; 565 | w = _w; 566 | h = _h; 567 | d = 1; 568 | c = 1; 569 | 570 | cstep = w * h; 571 | 572 | if (total() > 0) { 573 | size_t totalsize = alignSize(total() * elemsize, 4); 574 | 575 | data = allocator->fastMalloc(totalsize); 576 | } 577 | 578 | if (data) { 579 | refcount = 580 | (int *)((unsigned char *)data + offsetof(VkBufferMemory, refcount)); 581 | *refcount = 1; 582 | } 583 | } 584 | 585 | void VkMat::create(int _w, int _h, int _c, size_t _elemsize, 586 | VkAllocator *_allocator) { 587 | if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && 588 | elempack == 1 && allocator == _allocator) 589 | return; 590 | 591 | release(); 592 | 593 | elemsize = _elemsize; 594 | elempack = 1; 595 | allocator = _allocator; 596 | 597 | dims = 3; 598 | w = _w; 599 | h = _h; 600 | d = 1; 601 | c = _c; 602 | 603 | cstep = alignSize(w * h * elemsize, 16) / elemsize; 604 | 605 | if (total() > 0) { 606 | size_t totalsize = alignSize(total() * elemsize, 4); 607 | 608 | data = allocator->fastMalloc(totalsize); 609 | } 610 | 611 | if (data) { 612 | refcount = 613 | (int *)((unsigned char *)data + offsetof(VkBufferMemory, refcount)); 614 | *refcount = 1; 615 | } 616 | } 617 | 618 | void VkMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, 619 | VkAllocator *_allocator) { 620 | if (dims == 4 && w == _w && h == _h && d == _d && c == _c && 621 | elemsize == _elemsize && elempack == 1 && allocator == _allocator) 622 | return; 623 | 624 | release(); 625 | 626 | elemsize = _elemsize; 627 | elempack = 1; 628 | allocator = _allocator; 629 | 630 | dims = 4; 631 | w = _w; 632 | h = _h; 633 | d = _d; 634 | c = _c; 635 | 636 | cstep = alignSize(w * h * d * elemsize, 16) / elemsize; 637 | 638 | if (total() > 0) { 639 | size_t totalsize = alignSize(total() * elemsize, 4); 640 | 641 | data = allocator->fastMalloc(totalsize); 642 | } 643 | 644 | if (data) { 645 | refcount = 646 | (int *)((unsigned char *)data + offsetof(VkBufferMemory, refcount)); 647 | *refcount = 1; 648 | } 649 | } 650 | 651 | void VkMat::create(int _w, size_t _elemsize, int _elempack, 652 | VkAllocator *_allocator) { 653 | if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && 654 | allocator == _allocator) 655 | return; 656 | 657 | release(); 658 | 659 | elemsize = _elemsize; 660 | elempack = _elempack; 661 | allocator = _allocator; 662 | 663 | dims = 1; 664 | w = _w; 665 | h = 1; 666 | d = 1; 667 | c = 1; 668 | 669 | cstep = w; 670 | 671 | if (total() > 0) { 672 | size_t totalsize = alignSize(total() * elemsize, 4); 673 | 674 | data = allocator->fastMalloc(totalsize); 675 | } 676 | 677 | if (data) { 678 | refcount = 679 | (int *)((unsigned char *)data + offsetof(VkBufferMemory, refcount)); 680 | *refcount = 1; 681 | } 682 | } 683 | 684 | void VkMat::create(int _w, int _h, size_t _elemsize, int _elempack, 685 | VkAllocator *_allocator) { 686 | if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && 687 | elempack == _elempack && allocator == _allocator) 688 | return; 689 | 690 | release(); 691 | 692 | elemsize = _elemsize; 693 | elempack = _elempack; 694 | allocator = _allocator; 695 | 696 | dims = 2; 697 | w = _w; 698 | h = _h; 699 | d = 1; 700 | c = 1; 701 | 702 | cstep = w * h; 703 | 704 | if (total() > 0) { 705 | size_t totalsize = alignSize(total() * elemsize, 4); 706 | 707 | data = allocator->fastMalloc(totalsize); 708 | } 709 | 710 | if (data) { 711 | refcount = 712 | (int *)((unsigned char *)data + offsetof(VkBufferMemory, refcount)); 713 | *refcount = 1; 714 | } 715 | } 716 | 717 | void VkMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, 718 | VkAllocator *_allocator) { 719 | if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && 720 | elempack == _elempack && allocator == _allocator) 721 | return; 722 | 723 | release(); 724 | 725 | elemsize = _elemsize; 726 | elempack = _elempack; 727 | allocator = _allocator; 728 | 729 | dims = 3; 730 | w = _w; 731 | h = _h; 732 | d = 1; 733 | c = _c; 734 | 735 | cstep = alignSize(w * h * elemsize, 16) / elemsize; 736 | 737 | if (total() > 0) { 738 | size_t totalsize = alignSize(total() * elemsize, 4); 739 | 740 | data = allocator->fastMalloc(totalsize); 741 | } 742 | 743 | if (data) { 744 | refcount = 745 | (int *)((unsigned char *)data + offsetof(VkBufferMemory, refcount)); 746 | *refcount = 1; 747 | } 748 | } 749 | 750 | void VkMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, 751 | int _elempack, VkAllocator *_allocator) { 752 | if (dims == 4 && w == _w && h == _h && d == _d && c == _c && 753 | elemsize == _elemsize && elempack == _elempack && allocator == _allocator) 754 | return; 755 | 756 | release(); 757 | 758 | elemsize = _elemsize; 759 | elempack = _elempack; 760 | allocator = _allocator; 761 | 762 | dims = 4; 763 | w = _w; 764 | h = _h; 765 | d = _d; 766 | c = _c; 767 | 768 | cstep = alignSize(w * h * d * elemsize, 16) / elemsize; 769 | 770 | if (total() > 0) { 771 | size_t totalsize = alignSize(total() * elemsize, 4); 772 | 773 | data = allocator->fastMalloc(totalsize); 774 | } 775 | 776 | if (data) { 777 | refcount = 778 | (int *)((unsigned char *)data + offsetof(VkBufferMemory, refcount)); 779 | *refcount = 1; 780 | } 781 | } 782 | 783 | void VkMat::create_like(const Mat &m, VkAllocator *_allocator) { 784 | int _dims = m.dims; 785 | if (_dims == 1) 786 | create(m.w, m.elemsize, m.elempack, _allocator); 787 | if (_dims == 2) 788 | create(m.w, m.h, m.elemsize, m.elempack, _allocator); 789 | if (_dims == 3) 790 | create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); 791 | if (_dims == 4) 792 | create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator); 793 | } 794 | 795 | void VkMat::create_like(const VkMat &m, VkAllocator *_allocator) { 796 | int _dims = m.dims; 797 | if (_dims == 1) 798 | create(m.w, m.elemsize, m.elempack, _allocator); 799 | if (_dims == 2) 800 | create(m.w, m.h, m.elemsize, m.elempack, _allocator); 801 | if (_dims == 3) 802 | create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); 803 | if (_dims == 4) 804 | create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator); 805 | } 806 | 807 | void VkMat::create_like(const VkImageMat &im, VkAllocator *_allocator) { 808 | int _dims = im.dims; 809 | if (_dims == 1) 810 | create(im.w, im.elemsize, im.elempack, _allocator); 811 | if (_dims == 2) 812 | create(im.w, im.h, im.elemsize, im.elempack, _allocator); 813 | if (_dims == 3) 814 | create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator); 815 | if (_dims == 4) 816 | create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator); 817 | } 818 | 819 | void VkImageMat::create(int _w, size_t _elemsize, VkAllocator *_allocator) { 820 | if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && 821 | allocator == _allocator) 822 | return; 823 | 824 | release(); 825 | 826 | elemsize = _elemsize; 827 | elempack = 1; 828 | allocator = _allocator; 829 | 830 | dims = 1; 831 | w = _w; 832 | h = 1; 833 | d = 1; 834 | c = 1; 835 | 836 | if (total() > 0) { 837 | data = allocator->fastMalloc(w, h, c, elemsize, elempack); 838 | } 839 | 840 | if (data) { 841 | refcount = 842 | (int *)((unsigned char *)data + offsetof(VkImageMemory, refcount)); 843 | *refcount = 1; 844 | } 845 | } 846 | 847 | void VkImageMat::create(int _w, int _h, size_t _elemsize, 848 | VkAllocator *_allocator) { 849 | if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && 850 | elempack == 1 && allocator == _allocator) 851 | return; 852 | 853 | release(); 854 | 855 | elemsize = _elemsize; 856 | elempack = 1; 857 | allocator = _allocator; 858 | 859 | dims = 2; 860 | w = _w; 861 | h = _h; 862 | d = 1; 863 | c = 1; 864 | 865 | if (total() > 0) { 866 | data = allocator->fastMalloc(w, h, c, elemsize, elempack); 867 | } 868 | 869 | if (data) { 870 | refcount = 871 | (int *)((unsigned char *)data + offsetof(VkImageMemory, refcount)); 872 | *refcount = 1; 873 | } 874 | } 875 | 876 | void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, 877 | VkAllocator *_allocator) { 878 | if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && 879 | elempack == 1 && allocator == _allocator) 880 | return; 881 | 882 | release(); 883 | 884 | elemsize = _elemsize; 885 | elempack = 1; 886 | allocator = _allocator; 887 | 888 | dims = 3; 889 | w = _w; 890 | h = _h; 891 | d = 1; 892 | c = _c; 893 | 894 | if (total() > 0) { 895 | data = allocator->fastMalloc(w, h, c, elemsize, elempack); 896 | } 897 | 898 | if (data) { 899 | refcount = 900 | (int *)((unsigned char *)data + offsetof(VkImageMemory, refcount)); 901 | *refcount = 1; 902 | } 903 | } 904 | 905 | void VkImageMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, 906 | VkAllocator *_allocator) { 907 | if (dims == 4 && w == _w && h == _h && d == _d && c == _c && 908 | elemsize == _elemsize && elempack == 1 && allocator == _allocator) 909 | return; 910 | 911 | release(); 912 | 913 | elemsize = _elemsize; 914 | elempack = 1; 915 | allocator = _allocator; 916 | 917 | dims = 4; 918 | w = _w; 919 | h = _h; 920 | d = _d; 921 | c = _c; 922 | 923 | if (total() > 0) { 924 | // underlying image is 3d 925 | data = allocator->fastMalloc(w, h * d, c, elemsize, elempack); 926 | } 927 | 928 | if (data) { 929 | refcount = 930 | (int *)((unsigned char *)data + offsetof(VkImageMemory, refcount)); 931 | *refcount = 1; 932 | } 933 | } 934 | 935 | void VkImageMat::create(int _w, size_t _elemsize, int _elempack, 936 | VkAllocator *_allocator) { 937 | if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && 938 | allocator == _allocator) 939 | return; 940 | 941 | release(); 942 | 943 | elemsize = _elemsize; 944 | elempack = _elempack; 945 | allocator = _allocator; 946 | 947 | dims = 1; 948 | w = _w; 949 | h = 1; 950 | d = 1; 951 | c = 1; 952 | 953 | if (total() > 0) { 954 | data = allocator->fastMalloc(w, h, c, elemsize, elempack); 955 | } 956 | 957 | if (data) { 958 | refcount = 959 | (int *)((unsigned char *)data + offsetof(VkImageMemory, refcount)); 960 | *refcount = 1; 961 | } 962 | } 963 | 964 | void VkImageMat::create(int _w, int _h, size_t _elemsize, int _elempack, 965 | VkAllocator *_allocator) { 966 | if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && 967 | elempack == _elempack && allocator == _allocator) 968 | return; 969 | 970 | release(); 971 | 972 | elemsize = _elemsize; 973 | elempack = _elempack; 974 | allocator = _allocator; 975 | 976 | dims = 2; 977 | w = _w; 978 | h = _h; 979 | d = 1; 980 | c = 1; 981 | 982 | if (total() > 0) { 983 | data = allocator->fastMalloc(w, h, c, elemsize, elempack); 984 | } 985 | 986 | if (data) { 987 | refcount = 988 | (int *)((unsigned char *)data + offsetof(VkImageMemory, refcount)); 989 | *refcount = 1; 990 | } 991 | } 992 | 993 | void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, 994 | VkAllocator *_allocator) { 995 | if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && 996 | elempack == _elempack && allocator == _allocator) 997 | return; 998 | 999 | release(); 1000 | 1001 | elemsize = _elemsize; 1002 | elempack = _elempack; 1003 | allocator = _allocator; 1004 | 1005 | dims = 3; 1006 | w = _w; 1007 | h = _h; 1008 | d = 1; 1009 | c = _c; 1010 | 1011 | if (total() > 0) { 1012 | data = allocator->fastMalloc(w, h, c, elemsize, elempack); 1013 | } 1014 | 1015 | if (data) { 1016 | refcount = 1017 | (int *)((unsigned char *)data + offsetof(VkImageMemory, refcount)); 1018 | *refcount = 1; 1019 | } 1020 | } 1021 | 1022 | void VkImageMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, 1023 | int _elempack, VkAllocator *_allocator) { 1024 | if (dims == 4 && w == _w && h == _h && d == _d && c == _c && 1025 | elemsize == _elemsize && elempack == _elempack && allocator == _allocator) 1026 | return; 1027 | 1028 | release(); 1029 | 1030 | elemsize = _elemsize; 1031 | elempack = _elempack; 1032 | allocator = _allocator; 1033 | 1034 | dims = 4; 1035 | w = _w; 1036 | h = _h; 1037 | d = _d; 1038 | c = _c; 1039 | 1040 | if (total() > 0) { 1041 | // underlying image is 3d 1042 | data = allocator->fastMalloc(w, h * d, c, elemsize, elempack); 1043 | } 1044 | 1045 | if (data) { 1046 | refcount = 1047 | (int *)((unsigned char *)data + offsetof(VkImageMemory, refcount)); 1048 | *refcount = 1; 1049 | } 1050 | } 1051 | 1052 | void VkImageMat::create_like(const Mat &m, VkAllocator *_allocator) { 1053 | int _dims = m.dims; 1054 | if (_dims == 1) 1055 | create(m.w, m.elemsize, m.elempack, _allocator); 1056 | if (_dims == 2) 1057 | create(m.w, m.h, m.elemsize, m.elempack, _allocator); 1058 | if (_dims == 3) 1059 | create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); 1060 | if (_dims == 4) 1061 | create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator); 1062 | } 1063 | 1064 | void VkImageMat::create_like(const VkMat &m, VkAllocator *_allocator) { 1065 | int _dims = m.dims; 1066 | if (_dims == 1) 1067 | create(m.w, m.elemsize, m.elempack, _allocator); 1068 | if (_dims == 2) 1069 | create(m.w, m.h, m.elemsize, m.elempack, _allocator); 1070 | if (_dims == 3) 1071 | create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator); 1072 | if (_dims == 4) 1073 | create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator); 1074 | } 1075 | 1076 | void VkImageMat::create_like(const VkImageMat &im, VkAllocator *_allocator) { 1077 | int _dims = im.dims; 1078 | if (_dims == 1) 1079 | create(im.w, im.elemsize, im.elempack, _allocator); 1080 | if (_dims == 2) 1081 | create(im.w, im.h, im.elemsize, im.elempack, _allocator); 1082 | if (_dims == 3) 1083 | create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator); 1084 | if (_dims == 4) 1085 | create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator); 1086 | } 1087 | #endif // NCNN_VULKAN 1088 | 1089 | #ifdef NCNN_LAYER_FUNC 1090 | void Mat::substract_mean_normalize(const float *mean_vals, 1091 | const float *norm_vals) { 1092 | Layer *op; 1093 | 1094 | if (mean_vals && !norm_vals) { 1095 | // substract mean only 1096 | op = create_layer(LayerType::Bias); 1097 | 1098 | ParamDict pd; 1099 | pd.set(0, c); 1100 | 1101 | op->load_param(pd); 1102 | 1103 | Mat weights[1]; 1104 | weights[0] = Mat(c); 1105 | for (int q = 0; q < c; q++) { 1106 | weights[0][q] = -mean_vals[q]; 1107 | } 1108 | 1109 | op->load_model(ModelBinFromMatArray(weights)); 1110 | } else if (!mean_vals && norm_vals) { 1111 | // normalize only 1112 | op = create_layer(LayerType::Scale); 1113 | 1114 | ParamDict pd; 1115 | pd.set(0, c); 1116 | 1117 | op->load_param(pd); 1118 | 1119 | Mat weights[1]; 1120 | weights[0] = Mat(c); 1121 | for (int q = 0; q < c; q++) { 1122 | weights[0][q] = norm_vals[q]; 1123 | } 1124 | 1125 | op->load_model(ModelBinFromMatArray(weights)); 1126 | } else if (mean_vals && norm_vals) { 1127 | // substract mean and normalize 1128 | op = create_layer(LayerType::Scale); 1129 | 1130 | ParamDict pd; 1131 | pd.set(0, c); 1132 | pd.set(1, 1); 1133 | 1134 | op->load_param(pd); 1135 | 1136 | Mat weights[2]; 1137 | weights[0] = Mat(c); 1138 | weights[1] = Mat(c); 1139 | for (int q = 0; q < c; q++) { 1140 | weights[0][q] = norm_vals[q]; 1141 | weights[1][q] = -mean_vals[q] * norm_vals[q]; 1142 | } 1143 | 1144 | op->load_model(ModelBinFromMatArray(weights)); 1145 | } else // if (!mean_vals && !norm_vals) 1146 | { 1147 | return; 1148 | } 1149 | 1150 | Option opt; 1151 | opt.num_threads = 1; // TODO 1152 | 1153 | op->create_pipeline(opt); 1154 | 1155 | op->forward_inplace(*this, opt); 1156 | 1157 | op->destroy_pipeline(opt); 1158 | 1159 | delete op; 1160 | } 1161 | #endif 1162 | 1163 | Mat Mat::from_float16(const unsigned short *data, int size) { 1164 | Mat m(size); 1165 | if (m.empty()) 1166 | return m; 1167 | 1168 | float *ptr = m; //.data; 1169 | 1170 | #if __ARM_NEON && (__ARM_FP & 2) 1171 | int nn = cpu_support_arm_vfpv4() ? size >> 2 : 0; 1172 | int remain = size - (nn << 2); 1173 | #else 1174 | int remain = size; 1175 | #endif // __ARM_NEON 1176 | 1177 | #if __ARM_NEON && (__ARM_FP & 2) 1178 | #if __aarch64__ 1179 | if (nn > 0) { 1180 | asm volatile("0: \n" 1181 | "ld1 {v0.4h}, [%1], #8 \n" 1182 | "fcvtl v1.4s, v0.4h \n" 1183 | "subs %w0, %w0, #1 \n" 1184 | "st1 {v1.4s}, [%2], #16 \n" 1185 | "bne 0b \n" 1186 | : "=r"(nn), // %0 1187 | "=r"(data), // %1 1188 | "=r"(ptr) // %2 1189 | : "0"(nn), "1"(data), "2"(ptr) 1190 | : "cc", "memory", "v0", "v1"); 1191 | } 1192 | #else 1193 | if (nn > 0) { 1194 | asm volatile("0: \n" 1195 | "pld [%1, #64] \n" 1196 | "vld1.s16 {d0}, [%1]! \n" 1197 | "vcvt.f32.f16 q1, d0 \n" 1198 | "subs %0, #1 \n" 1199 | "vst1.f32 {d2-d3}, [%2 :128]! \n" 1200 | "bne 0b \n" 1201 | : "=r"(nn), // %0 1202 | "=r"(data), // %1 1203 | "=r"(ptr) // %2 1204 | : "0"(nn), "1"(data), "2"(ptr) 1205 | : "cc", "memory", "q0", "q1"); 1206 | } 1207 | #endif // __aarch64__ 1208 | #endif // __ARM_NEON 1209 | for (; remain > 0; remain--) { 1210 | *ptr = float16_to_float32(*data); 1211 | 1212 | data++; 1213 | ptr++; 1214 | } 1215 | 1216 | return m; 1217 | } 1218 | 1219 | #if NCNN_VULKAN 1220 | #if NCNN_PLATFORM_API 1221 | #if __ANDROID_API__ >= 26 1222 | VkImageMat VkImageMat::from_android_hardware_buffer( 1223 | VkAndroidHardwareBufferImageAllocator *allocator) { 1224 | int width = allocator->width(); 1225 | int height = allocator->height(); 1226 | 1227 | return VkImageMat(width, height, allocator); 1228 | } 1229 | #endif // __ANDROID_API__ >= 26 1230 | #endif // NCNN_PLATFORM_API 1231 | #endif // NCNN_VULKAN 1232 | 1233 | unsigned short float32_to_float16(float value) { 1234 | // 1 : 8 : 23 1235 | union { 1236 | unsigned int u; 1237 | float f; 1238 | } tmp; 1239 | 1240 | tmp.f = value; 1241 | 1242 | // 1 : 8 : 23 1243 | unsigned short sign = (tmp.u & 0x80000000) >> 31; 1244 | unsigned short exponent = (tmp.u & 0x7F800000) >> 23; 1245 | unsigned int significand = tmp.u & 0x7FFFFF; 1246 | 1247 | // NCNN_LOGE("%d %d %d", sign, exponent, significand); 1248 | 1249 | // 1 : 5 : 10 1250 | unsigned short fp16; 1251 | if (exponent == 0) { 1252 | // zero or denormal, always underflow 1253 | fp16 = (sign << 15) | (0x00 << 10) | 0x00; 1254 | } else if (exponent == 0xFF) { 1255 | // infinity or NaN 1256 | fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00); 1257 | } else { 1258 | // normalized 1259 | short newexp = exponent + (-127 + 15); 1260 | if (newexp >= 31) { 1261 | // overflow, return infinity 1262 | fp16 = (sign << 15) | (0x1F << 10) | 0x00; 1263 | } else if (newexp <= 0) { 1264 | // Some normal fp32 cannot be expressed as normal fp16 1265 | fp16 = (sign << 15) | (0x00 << 10) | 0x00; 1266 | } else { 1267 | // normal fp16 1268 | fp16 = (sign << 15) | (newexp << 10) | (significand >> 13); 1269 | } 1270 | } 1271 | 1272 | return fp16; 1273 | } 1274 | 1275 | float float16_to_float32(unsigned short value) { 1276 | // 1 : 5 : 10 1277 | unsigned short sign = (value & 0x8000) >> 15; 1278 | unsigned short exponent = (value & 0x7c00) >> 10; 1279 | unsigned short significand = value & 0x03FF; 1280 | 1281 | // NCNN_LOGE("%d %d %d", sign, exponent, significand); 1282 | 1283 | // 1 : 8 : 23 1284 | union { 1285 | unsigned int u; 1286 | float f; 1287 | } tmp; 1288 | if (exponent == 0) { 1289 | if (significand == 0) { 1290 | // zero 1291 | tmp.u = (sign << 31); 1292 | } else { 1293 | // denormal 1294 | exponent = 0; 1295 | // find non-zero bit 1296 | while ((significand & 0x200) == 0) { 1297 | significand <<= 1; 1298 | exponent++; 1299 | } 1300 | significand <<= 1; 1301 | significand &= 0x3FF; 1302 | tmp.u = (sign << 31) | ((-exponent + (-15 + 127)) << 23) | 1303 | (significand << 13); 1304 | } 1305 | } else if (exponent == 0x1F) { 1306 | // infinity or NaN 1307 | tmp.u = (sign << 31) | (0xFF << 23) | (significand << 13); 1308 | } else { 1309 | // normalized 1310 | tmp.u = 1311 | (sign << 31) | ((exponent + (-15 + 127)) << 23) | (significand << 13); 1312 | } 1313 | 1314 | return tmp.f; 1315 | } 1316 | 1317 | #ifdef NCNN_LAYER_FUNC 1318 | void copy_make_border(const Mat &src, Mat &dst, int top, int bottom, int left, 1319 | int right, int type, float v, const Option &opt) { 1320 | Layer *padding = create_layer(LayerType::Padding); 1321 | 1322 | ParamDict pd; 1323 | pd.set(0, top); 1324 | pd.set(1, bottom); 1325 | pd.set(2, left); 1326 | pd.set(3, right); 1327 | pd.set(4, type); 1328 | pd.set(5, v); 1329 | 1330 | padding->load_param(pd); 1331 | 1332 | padding->create_pipeline(opt); 1333 | 1334 | padding->forward(src, dst, opt); 1335 | 1336 | padding->destroy_pipeline(opt); 1337 | 1338 | delete padding; 1339 | } 1340 | 1341 | void copy_make_border_3d(const Mat &src, Mat &dst, int top, int bottom, 1342 | int left, int right, int front, int behind, int type, 1343 | float v, const Option &opt) { 1344 | Layer *padding = create_layer(LayerType::Padding); 1345 | 1346 | ParamDict pd; 1347 | pd.set(0, top); 1348 | pd.set(1, bottom); 1349 | pd.set(2, left); 1350 | pd.set(3, right); 1351 | pd.set(4, type); 1352 | pd.set(5, v); 1353 | pd.set(7, front); 1354 | pd.set(8, behind); 1355 | 1356 | padding->load_param(pd); 1357 | 1358 | padding->create_pipeline(opt); 1359 | 1360 | padding->forward(src, dst, opt); 1361 | 1362 | padding->destroy_pipeline(opt); 1363 | 1364 | delete padding; 1365 | } 1366 | 1367 | void copy_cut_border(const Mat &src, Mat &dst, int top, int bottom, int left, 1368 | int right, const Option &opt) { 1369 | if (left + right > src.w || top + bottom > src.h) { 1370 | NCNN_LOGE("copy_cut_border parameter error, top: %d, bottom: %d, left: %d, " 1371 | "right: %d, src.w: %d, src.h: %d", 1372 | top, bottom, left, right, src.w, src.h); 1373 | return; 1374 | } 1375 | Layer *crop = create_layer(LayerType::Crop); 1376 | 1377 | ParamDict pd; 1378 | pd.set(0, left); 1379 | pd.set(1, top); 1380 | pd.set(2, 0); 1381 | pd.set(3, src.w - left - right); 1382 | pd.set(4, src.h - top - bottom); 1383 | pd.set(5, -233); 1384 | 1385 | crop->load_param(pd); 1386 | 1387 | crop->create_pipeline(opt); 1388 | 1389 | crop->forward(src, dst, opt); 1390 | 1391 | crop->destroy_pipeline(opt); 1392 | 1393 | delete crop; 1394 | } 1395 | 1396 | void copy_cut_border_3d(const Mat &src, Mat &dst, int top, int bottom, int left, 1397 | int right, int front, int behind, const Option &opt) { 1398 | if (left + right > src.w || top + bottom > src.h || front + behind > src.d) { 1399 | NCNN_LOGE( 1400 | "copy_cut_border_3d parameter error, top: %d, bottom: %d, left: %d, " 1401 | "right: %d, front: %d, behind: %d, src.w: %d, src.h: %d, src.d: %d", 1402 | top, bottom, left, right, front, behind, src.w, src.h, src.d); 1403 | return; 1404 | } 1405 | Layer *crop = create_layer(LayerType::Crop); 1406 | 1407 | ParamDict pd; 1408 | pd.set(0, left); 1409 | pd.set(1, top); 1410 | pd.set(13, front); 1411 | pd.set(2, 0); 1412 | pd.set(3, src.w - left - right); 1413 | pd.set(4, src.h - top - bottom); 1414 | pd.set(14, src.d - front - behind); 1415 | pd.set(5, -233); 1416 | 1417 | crop->load_param(pd); 1418 | 1419 | crop->create_pipeline(opt); 1420 | 1421 | crop->forward(src, dst, opt); 1422 | 1423 | crop->destroy_pipeline(opt); 1424 | 1425 | delete crop; 1426 | } 1427 | 1428 | void resize_nearest(const Mat &src, Mat &dst, int w, int h, const Option &opt) { 1429 | Layer *interp = create_layer(LayerType::Interp); 1430 | 1431 | ParamDict pd; 1432 | pd.set(0, 1); 1433 | pd.set(3, h); 1434 | pd.set(4, w); 1435 | 1436 | interp->load_param(pd); 1437 | 1438 | interp->create_pipeline(opt); 1439 | 1440 | interp->forward(src, dst, opt); 1441 | 1442 | interp->destroy_pipeline(opt); 1443 | 1444 | delete interp; 1445 | } 1446 | 1447 | void resize_bilinear(const Mat &src, Mat &dst, int w, int h, 1448 | const Option &opt) { 1449 | Layer *interp = create_layer(LayerType::Interp); 1450 | 1451 | ParamDict pd; 1452 | pd.set(0, 2); 1453 | pd.set(3, h); 1454 | pd.set(4, w); 1455 | 1456 | interp->load_param(pd); 1457 | 1458 | interp->create_pipeline(opt); 1459 | 1460 | interp->forward(src, dst, opt); 1461 | 1462 | interp->destroy_pipeline(opt); 1463 | 1464 | delete interp; 1465 | } 1466 | 1467 | void resize_bicubic(const Mat &src, Mat &dst, int w, int h, const Option &opt) { 1468 | Layer *interp = create_layer(LayerType::Interp); 1469 | 1470 | ParamDict pd; 1471 | pd.set(0, 3); 1472 | pd.set(3, h); 1473 | pd.set(4, w); 1474 | 1475 | interp->load_param(pd); 1476 | 1477 | interp->create_pipeline(opt); 1478 | 1479 | interp->forward(src, dst, opt); 1480 | 1481 | interp->destroy_pipeline(opt); 1482 | 1483 | delete interp; 1484 | } 1485 | 1486 | void convert_packing(const Mat &src, Mat &dst, int _elempack, 1487 | const Option &opt) { 1488 | Layer *packing = create_layer(LayerType::Packing); 1489 | 1490 | ParamDict pd; 1491 | pd.set(0, _elempack); 1492 | 1493 | packing->load_param(pd); 1494 | 1495 | packing->create_pipeline(opt); 1496 | 1497 | packing->forward(src, dst, opt); 1498 | 1499 | packing->destroy_pipeline(opt); 1500 | 1501 | delete packing; 1502 | } 1503 | 1504 | void flatten(const Mat &src, Mat &dst, const Option &opt) { 1505 | Layer *flatten = create_layer(LayerType::Flatten); 1506 | 1507 | ParamDict pd; 1508 | 1509 | flatten->load_param(pd); 1510 | 1511 | flatten->create_pipeline(opt); 1512 | 1513 | flatten->forward(src, dst, opt); 1514 | 1515 | flatten->destroy_pipeline(opt); 1516 | 1517 | delete flatten; 1518 | } 1519 | 1520 | void cast_float32_to_float16(const Mat &src, Mat &dst, const Option &opt) { 1521 | Layer *cast = create_layer(LayerType::Cast); 1522 | 1523 | ParamDict pd; 1524 | pd.set(0, 1); 1525 | pd.set(1, 2); 1526 | 1527 | cast->load_param(pd); 1528 | 1529 | cast->create_pipeline(opt); 1530 | 1531 | cast->forward(src, dst, opt); 1532 | 1533 | cast->destroy_pipeline(opt); 1534 | 1535 | delete cast; 1536 | } 1537 | 1538 | void cast_float16_to_float32(const Mat &src, Mat &dst, const Option &opt) { 1539 | Layer *cast = create_layer(LayerType::Cast); 1540 | 1541 | ParamDict pd; 1542 | pd.set(0, 2); 1543 | pd.set(1, 1); 1544 | 1545 | cast->load_param(pd); 1546 | 1547 | cast->create_pipeline(opt); 1548 | 1549 | cast->forward(src, dst, opt); 1550 | 1551 | cast->destroy_pipeline(opt); 1552 | 1553 | delete cast; 1554 | } 1555 | 1556 | void cast_int8_to_float32(const Mat &src, Mat &dst, const Option &opt) { 1557 | Layer *cast = create_layer(LayerType::Cast); 1558 | 1559 | ParamDict pd; 1560 | pd.set(0, 3); 1561 | pd.set(1, 1); 1562 | 1563 | cast->load_param(pd); 1564 | 1565 | cast->create_pipeline(opt); 1566 | 1567 | cast->forward(src, dst, opt); 1568 | 1569 | cast->destroy_pipeline(opt); 1570 | 1571 | delete cast; 1572 | } 1573 | 1574 | void cast_float32_to_bfloat16(const Mat &src, Mat &dst, const Option &opt) { 1575 | Layer *cast = create_layer(LayerType::Cast); 1576 | 1577 | ParamDict pd; 1578 | pd.set(0, 1); 1579 | pd.set(1, 4); 1580 | 1581 | cast->load_param(pd); 1582 | 1583 | cast->create_pipeline(opt); 1584 | 1585 | cast->forward(src, dst, opt); 1586 | 1587 | cast->destroy_pipeline(opt); 1588 | 1589 | delete cast; 1590 | } 1591 | 1592 | void cast_bfloat16_to_float32(const Mat &src, Mat &dst, const Option &opt) { 1593 | Layer *cast = create_layer(LayerType::Cast); 1594 | 1595 | ParamDict pd; 1596 | pd.set(0, 4); 1597 | pd.set(1, 1); 1598 | 1599 | cast->load_param(pd); 1600 | 1601 | cast->create_pipeline(opt); 1602 | 1603 | cast->forward(src, dst, opt); 1604 | 1605 | cast->destroy_pipeline(opt); 1606 | 1607 | delete cast; 1608 | } 1609 | 1610 | void quantize_to_int8(const Mat &src, Mat &dst, const Mat &scale_data, 1611 | const Option &opt) { 1612 | Layer *quantize = create_layer(LayerType::Quantize); 1613 | 1614 | ParamDict pd; 1615 | pd.set(0, scale_data.w); 1616 | 1617 | quantize->load_param(pd); 1618 | 1619 | Mat weights[1]; 1620 | weights[0] = scale_data; 1621 | 1622 | quantize->load_model(ModelBinFromMatArray(weights)); 1623 | 1624 | quantize->create_pipeline(opt); 1625 | 1626 | quantize->forward(src, dst, opt); 1627 | 1628 | quantize->destroy_pipeline(opt); 1629 | 1630 | delete quantize; 1631 | } 1632 | 1633 | void dequantize_from_int32(const Mat &src, Mat &dst, const Mat &scale_data, 1634 | const Mat &bias_data, const Option &opt) { 1635 | Layer *dequantize = create_layer(LayerType::Dequantize); 1636 | 1637 | ParamDict pd; 1638 | pd.set(0, scale_data.w); 1639 | pd.set(1, bias_data.w); 1640 | 1641 | dequantize->load_param(pd); 1642 | 1643 | Mat weights[2]; 1644 | weights[0] = scale_data; 1645 | weights[1] = bias_data; 1646 | 1647 | dequantize->load_model(ModelBinFromMatArray(weights)); 1648 | 1649 | dequantize->create_pipeline(opt); 1650 | 1651 | dequantize->forward(src, dst, opt); 1652 | 1653 | dequantize->destroy_pipeline(opt); 1654 | 1655 | delete dequantize; 1656 | } 1657 | 1658 | void requantize_from_int32_to_int8(const Mat &src, Mat &dst, 1659 | const Mat &scale_in_data, 1660 | const Mat &scale_out_data, 1661 | const Mat &bias_data, int activation_type, 1662 | const Mat &activation_params, 1663 | const Option &opt) { 1664 | Layer *requantize = create_layer(LayerType::Requantize); 1665 | 1666 | ParamDict pd; 1667 | pd.set(0, scale_in_data.w); 1668 | pd.set(1, scale_out_data.w); 1669 | pd.set(2, bias_data.w); 1670 | pd.set(3, activation_type); 1671 | pd.set(4, activation_params); 1672 | 1673 | requantize->load_param(pd); 1674 | 1675 | Mat weights[3]; 1676 | weights[0] = scale_in_data; 1677 | weights[1] = scale_out_data; 1678 | weights[2] = bias_data; 1679 | 1680 | requantize->load_model(ModelBinFromMatArray(weights)); 1681 | 1682 | requantize->create_pipeline(opt); 1683 | 1684 | requantize->forward(src, dst, opt); 1685 | 1686 | requantize->destroy_pipeline(opt); 1687 | 1688 | delete requantize; 1689 | } 1690 | #endif 1691 | 1692 | } // namespace ncnn 1693 | -------------------------------------------------------------------------------- /src/mat_pixel_android.cpp: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #include "mat.h" 16 | 17 | #if NCNN_PIXEL 18 | 19 | #if NCNN_PLATFORM_API 20 | #if __ANDROID_API__ >= 9 21 | #include 22 | #include 23 | #endif // __ANDROID_API__ >= 9 24 | #endif // NCNN_PLATFORM_API 25 | 26 | namespace sim { 27 | 28 | #if NCNN_PLATFORM_API 29 | #if __ANDROID_API__ >= 9 30 | Mat Mat::from_android_bitmap(JNIEnv* env, jobject bitmap, int type_to, Allocator* allocator) 31 | { 32 | AndroidBitmapInfo info; 33 | AndroidBitmap_getInfo(env, bitmap, &info); 34 | 35 | int type_from; 36 | int elempack; 37 | 38 | if (info.format == ANDROID_BITMAP_FORMAT_A_8) 39 | { 40 | type_from = PIXEL_GRAY; 41 | elempack = 1; 42 | } 43 | else if (info.format == ANDROID_BITMAP_FORMAT_RGBA_8888) 44 | { 45 | type_from = PIXEL_RGBA; 46 | elempack = 4; 47 | } 48 | else 49 | { 50 | // unsuppored android bitmap format 51 | return Mat(); 52 | } 53 | 54 | // let PIXEL_RGBA2XXX become PIXEL_XXX 55 | type_to = (type_to & PIXEL_CONVERT_MASK) ? (type_to >> PIXEL_CONVERT_SHIFT) : (type_to & PIXEL_FORMAT_MASK); 56 | 57 | void* data; 58 | AndroidBitmap_lockPixels(env, bitmap, &data); 59 | 60 | int type = type_to == type_from ? type_from : (type_from | (type_to << PIXEL_CONVERT_SHIFT)); 61 | 62 | Mat m = Mat::from_pixels((const unsigned char*)data, type, info.width, info.height, info.stride, allocator); 63 | 64 | AndroidBitmap_unlockPixels(env, bitmap); 65 | 66 | return m; 67 | } 68 | 69 | Mat Mat::from_android_bitmap_resize(JNIEnv* env, jobject bitmap, int type_to, int target_width, int target_height, Allocator* allocator) 70 | { 71 | AndroidBitmapInfo info; 72 | AndroidBitmap_getInfo(env, bitmap, &info); 73 | 74 | int type_from; 75 | int elempack; 76 | 77 | if (info.format == ANDROID_BITMAP_FORMAT_A_8) 78 | { 79 | type_from = PIXEL_GRAY; 80 | elempack = 1; 81 | } 82 | else if (info.format == ANDROID_BITMAP_FORMAT_RGBA_8888) 83 | { 84 | type_from = PIXEL_RGBA; 85 | elempack = 4; 86 | } 87 | else 88 | { 89 | // unsuppored android bitmap format 90 | return Mat(); 91 | } 92 | 93 | // let PIXEL_RGBA2XXX become PIXEL_XXX 94 | type_to = (type_to & PIXEL_CONVERT_MASK) ? (type_to >> PIXEL_CONVERT_SHIFT) : (type_to & PIXEL_FORMAT_MASK); 95 | 96 | void* data; 97 | AndroidBitmap_lockPixels(env, bitmap, &data); 98 | 99 | int type = type_to == type_from ? type_from : (type_from | (type_to << PIXEL_CONVERT_SHIFT)); 100 | 101 | Mat m = Mat::from_pixels_resize((const unsigned char*)data, type, info.width, info.height, info.stride, target_width, target_height, allocator); 102 | 103 | AndroidBitmap_unlockPixels(env, bitmap); 104 | 105 | return m; 106 | } 107 | 108 | Mat Mat::from_android_bitmap_roi(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, Allocator* allocator) 109 | { 110 | AndroidBitmapInfo info; 111 | AndroidBitmap_getInfo(env, bitmap, &info); 112 | 113 | int type_from; 114 | int elempack; 115 | 116 | if (info.format == ANDROID_BITMAP_FORMAT_A_8) 117 | { 118 | type_from = PIXEL_GRAY; 119 | elempack = 1; 120 | } 121 | else if (info.format == ANDROID_BITMAP_FORMAT_RGBA_8888) 122 | { 123 | type_from = PIXEL_RGBA; 124 | elempack = 4; 125 | } 126 | else 127 | { 128 | // unsuppored android bitmap format 129 | return Mat(); 130 | } 131 | 132 | // let PIXEL_RGBA2XXX become PIXEL_XXX 133 | type_to = (type_to & PIXEL_CONVERT_MASK) ? (type_to >> PIXEL_CONVERT_SHIFT) : (type_to & PIXEL_FORMAT_MASK); 134 | 135 | void* data; 136 | AndroidBitmap_lockPixels(env, bitmap, &data); 137 | 138 | int type = type_to == type_from ? type_from : (type_from | (type_to << PIXEL_CONVERT_SHIFT)); 139 | 140 | Mat m = Mat::from_pixels_roi((const unsigned char*)data, type, info.width, info.height, info.stride, roix, roiy, roiw, roih, allocator); 141 | 142 | AndroidBitmap_unlockPixels(env, bitmap); 143 | 144 | return m; 145 | } 146 | 147 | Mat Mat::from_android_bitmap_roi_resize(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator) 148 | { 149 | AndroidBitmapInfo info; 150 | AndroidBitmap_getInfo(env, bitmap, &info); 151 | 152 | int type_from; 153 | int elempack; 154 | 155 | if (info.format == ANDROID_BITMAP_FORMAT_A_8) 156 | { 157 | type_from = PIXEL_GRAY; 158 | elempack = 1; 159 | } 160 | else if (info.format == ANDROID_BITMAP_FORMAT_RGBA_8888) 161 | { 162 | type_from = PIXEL_RGBA; 163 | elempack = 4; 164 | } 165 | else 166 | { 167 | // unsuppored android bitmap format 168 | return Mat(); 169 | } 170 | 171 | // let PIXEL_RGBA2XXX become PIXEL_XXX 172 | type_to = (type_to & PIXEL_CONVERT_MASK) ? (type_to >> PIXEL_CONVERT_SHIFT) : (type_to & PIXEL_FORMAT_MASK); 173 | 174 | void* data; 175 | AndroidBitmap_lockPixels(env, bitmap, &data); 176 | 177 | int type = type_to == type_from ? type_from : (type_from | (type_to << PIXEL_CONVERT_SHIFT)); 178 | 179 | Mat m = Mat::from_pixels_roi_resize((const unsigned char*)data, type, info.width, info.height, info.stride, roix, roiy, roiw, roih, target_width, target_height, allocator); 180 | 181 | AndroidBitmap_unlockPixels(env, bitmap); 182 | 183 | return m; 184 | } 185 | 186 | void Mat::to_android_bitmap(JNIEnv* env, jobject bitmap, int type_from) const 187 | { 188 | AndroidBitmapInfo info; 189 | AndroidBitmap_getInfo(env, bitmap, &info); 190 | 191 | int type_to; 192 | 193 | if (info.format == ANDROID_BITMAP_FORMAT_A_8) 194 | { 195 | type_to = PIXEL_GRAY; 196 | } 197 | else if (info.format == ANDROID_BITMAP_FORMAT_RGBA_8888) 198 | { 199 | type_to = PIXEL_RGBA; 200 | } 201 | else 202 | { 203 | // unsuppored android bitmap format 204 | return; 205 | } 206 | 207 | // let PIXEL_XXX2RGBA become PIXEL_XXX 208 | type_from = (type_from & PIXEL_CONVERT_MASK) ? (type_from & PIXEL_FORMAT_MASK) : type_from; 209 | 210 | void* _data; 211 | AndroidBitmap_lockPixels(env, bitmap, &_data); 212 | 213 | int type = type_from == type_to ? type_to : (type_from | (type_to << PIXEL_CONVERT_SHIFT)); 214 | 215 | to_pixels_resize((unsigned char*)_data, type, info.width, info.height, info.stride); 216 | 217 | AndroidBitmap_unlockPixels(env, bitmap); 218 | } 219 | #endif // __ANDROID_API__ >= 9 220 | #endif // NCNN_PLATFORM_API 221 | 222 | } // namespace ncnn 223 | 224 | #endif // NCNN_PIXEL 225 | -------------------------------------------------------------------------------- /src/mat_pixel_drawing.cpp: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #include "mat.h" 16 | #include 17 | 18 | #include "platform.h" 19 | 20 | #if _MSC_VER 21 | #define NOMINMAX 22 | #endif 23 | 24 | namespace sim { 25 | 26 | #if NCNN_PIXEL_DRAWING 27 | 28 | #include "mat_pixel_drawing_font.h" 29 | 30 | void draw_rectangle_c1(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness) 31 | { 32 | return draw_rectangle_c1(pixels, w, h, w, rx, ry, rw, rh, color, thickness); 33 | } 34 | 35 | void draw_rectangle_c2(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness) 36 | { 37 | return draw_rectangle_c2(pixels, w, h, w * 2, rx, ry, rw, rh, color, thickness); 38 | } 39 | 40 | void draw_rectangle_c3(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness) 41 | { 42 | return draw_rectangle_c3(pixels, w, h, w * 3, rx, ry, rw, rh, color, thickness); 43 | } 44 | 45 | void draw_rectangle_c4(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness) 46 | { 47 | return draw_rectangle_c4(pixels, w, h, w * 4, rx, ry, rw, rh, color, thickness); 48 | } 49 | 50 | void draw_rectangle_c1(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness) 51 | { 52 | const unsigned char* pen_color = (const unsigned char*)&color; 53 | 54 | if (thickness == -1) 55 | { 56 | // filled 57 | for (int y = ry; y < ry + rh; y++) 58 | { 59 | if (y < 0) 60 | continue; 61 | 62 | if (y >= h) 63 | break; 64 | 65 | unsigned char* p = pixels + stride * y; 66 | 67 | for (int x = rx; x < rx + rw; x++) 68 | { 69 | if (x < 0) 70 | continue; 71 | 72 | if (x >= w) 73 | break; 74 | 75 | p[x] = pen_color[0]; 76 | } 77 | } 78 | 79 | return; 80 | } 81 | 82 | const int t0 = thickness / 2; 83 | const int t1 = thickness - t0; 84 | 85 | // draw top 86 | { 87 | for (int y = ry - t0; y < ry + t1; y++) 88 | { 89 | if (y < 0) 90 | continue; 91 | 92 | if (y >= h) 93 | break; 94 | 95 | unsigned char* p = pixels + stride * y; 96 | 97 | for (int x = rx - t0; x < rx + rw + t1; x++) 98 | { 99 | if (x < 0) 100 | continue; 101 | 102 | if (x >= w) 103 | break; 104 | 105 | p[x] = pen_color[0]; 106 | } 107 | } 108 | } 109 | 110 | // draw bottom 111 | { 112 | for (int y = ry + rh - t0; y < ry + rh + t1; y++) 113 | { 114 | if (y < 0) 115 | continue; 116 | 117 | if (y >= h) 118 | break; 119 | 120 | unsigned char* p = pixels + stride * y; 121 | 122 | for (int x = rx - t0; x < rx + rw + t1; x++) 123 | { 124 | if (x < 0) 125 | continue; 126 | 127 | if (x >= w) 128 | break; 129 | 130 | p[x] = pen_color[0]; 131 | } 132 | } 133 | } 134 | 135 | // draw left 136 | for (int x = rx - t0; x < rx + t1; x++) 137 | { 138 | if (x < 0) 139 | continue; 140 | 141 | if (x >= w) 142 | break; 143 | 144 | for (int y = ry + t1; y < ry + rh - t0; y++) 145 | { 146 | if (y < 0) 147 | continue; 148 | 149 | if (y >= h) 150 | break; 151 | 152 | unsigned char* p = pixels + stride * y; 153 | 154 | p[x] = pen_color[0]; 155 | } 156 | } 157 | 158 | // draw right 159 | for (int x = rx + rw - t0; x < rx + rw + t1; x++) 160 | { 161 | if (x < 0) 162 | continue; 163 | 164 | if (x >= w) 165 | break; 166 | 167 | for (int y = ry + t1; y < ry + rh - t0; y++) 168 | { 169 | if (y < 0) 170 | continue; 171 | 172 | if (y >= h) 173 | break; 174 | 175 | unsigned char* p = pixels + stride * y; 176 | 177 | p[x] = pen_color[0]; 178 | } 179 | } 180 | } 181 | 182 | void draw_rectangle_c2(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness) 183 | { 184 | const unsigned char* pen_color = (const unsigned char*)&color; 185 | 186 | if (thickness == -1) 187 | { 188 | // filled 189 | for (int y = ry; y < ry + rh; y++) 190 | { 191 | if (y < 0) 192 | continue; 193 | 194 | if (y >= h) 195 | break; 196 | 197 | unsigned char* p = pixels + stride * y; 198 | 199 | for (int x = rx; x < rx + rw; x++) 200 | { 201 | if (x < 0) 202 | continue; 203 | 204 | if (x >= w) 205 | break; 206 | 207 | p[x * 2 + 0] = pen_color[0]; 208 | p[x * 2 + 1] = pen_color[1]; 209 | } 210 | } 211 | 212 | return; 213 | } 214 | 215 | const int t0 = thickness / 2; 216 | const int t1 = thickness - t0; 217 | 218 | // draw top 219 | { 220 | for (int y = ry - t0; y < ry + t1; y++) 221 | { 222 | if (y < 0) 223 | continue; 224 | 225 | if (y >= h) 226 | break; 227 | 228 | unsigned char* p = pixels + stride * y; 229 | 230 | for (int x = rx - t0; x < rx + rw + t1; x++) 231 | { 232 | if (x < 0) 233 | continue; 234 | 235 | if (x >= w) 236 | break; 237 | 238 | p[x * 2 + 0] = pen_color[0]; 239 | p[x * 2 + 1] = pen_color[1]; 240 | } 241 | } 242 | } 243 | 244 | // draw bottom 245 | { 246 | for (int y = ry + rh - t0; y < ry + rh + t1; y++) 247 | { 248 | if (y < 0) 249 | continue; 250 | 251 | if (y >= h) 252 | break; 253 | 254 | unsigned char* p = pixels + stride * y; 255 | 256 | for (int x = rx - t0; x < rx + rw + t1; x++) 257 | { 258 | if (x < 0) 259 | continue; 260 | 261 | if (x >= w) 262 | break; 263 | 264 | p[x * 2 + 0] = pen_color[0]; 265 | p[x * 2 + 1] = pen_color[1]; 266 | } 267 | } 268 | } 269 | 270 | // draw left 271 | for (int x = rx - t0; x < rx + t1; x++) 272 | { 273 | if (x < 0) 274 | continue; 275 | 276 | if (x >= w) 277 | break; 278 | 279 | for (int y = ry + t1; y < ry + rh - t0; y++) 280 | { 281 | if (y < 0) 282 | continue; 283 | 284 | if (y >= h) 285 | break; 286 | 287 | unsigned char* p = pixels + stride * y; 288 | 289 | p[x * 2 + 0] = pen_color[0]; 290 | p[x * 2 + 1] = pen_color[1]; 291 | } 292 | } 293 | 294 | // draw right 295 | for (int x = rx + rw - t0; x < rx + rw + t1; x++) 296 | { 297 | if (x < 0) 298 | continue; 299 | 300 | if (x >= w) 301 | break; 302 | 303 | for (int y = ry + t1; y < ry + rh - t0; y++) 304 | { 305 | if (y < 0) 306 | continue; 307 | 308 | if (y >= h) 309 | break; 310 | 311 | unsigned char* p = pixels + stride * y; 312 | 313 | p[x * 2 + 0] = pen_color[0]; 314 | p[x * 2 + 1] = pen_color[1]; 315 | } 316 | } 317 | } 318 | 319 | void draw_rectangle_c3(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness) 320 | { 321 | const unsigned char* pen_color = (const unsigned char*)&color; 322 | 323 | if (thickness == -1) 324 | { 325 | // filled 326 | for (int y = ry; y < ry + rh; y++) 327 | { 328 | if (y < 0) 329 | continue; 330 | 331 | if (y >= h) 332 | break; 333 | 334 | unsigned char* p = pixels + stride * y; 335 | 336 | for (int x = rx; x < rx + rw; x++) 337 | { 338 | if (x < 0) 339 | continue; 340 | 341 | if (x >= w) 342 | break; 343 | 344 | p[x * 3 + 0] = pen_color[0]; 345 | p[x * 3 + 1] = pen_color[1]; 346 | p[x * 3 + 2] = pen_color[2]; 347 | } 348 | } 349 | 350 | return; 351 | } 352 | 353 | const int t0 = thickness / 2; 354 | const int t1 = thickness - t0; 355 | 356 | // draw top 357 | { 358 | for (int y = ry - t0; y < ry + t1; y++) 359 | { 360 | if (y < 0) 361 | continue; 362 | 363 | if (y >= h) 364 | break; 365 | 366 | unsigned char* p = pixels + stride * y; 367 | 368 | for (int x = rx - t0; x < rx + rw + t1; x++) 369 | { 370 | if (x < 0) 371 | continue; 372 | 373 | if (x >= w) 374 | break; 375 | 376 | p[x * 3 + 0] = pen_color[0]; 377 | p[x * 3 + 1] = pen_color[1]; 378 | p[x * 3 + 2] = pen_color[2]; 379 | } 380 | } 381 | } 382 | 383 | // draw bottom 384 | { 385 | for (int y = ry + rh - t0; y < ry + rh + t1; y++) 386 | { 387 | if (y < 0) 388 | continue; 389 | 390 | if (y >= h) 391 | break; 392 | 393 | unsigned char* p = pixels + stride * y; 394 | 395 | for (int x = rx - t0; x < rx + rw + t1; x++) 396 | { 397 | if (x < 0) 398 | continue; 399 | 400 | if (x >= w) 401 | break; 402 | 403 | p[x * 3 + 0] = pen_color[0]; 404 | p[x * 3 + 1] = pen_color[1]; 405 | p[x * 3 + 2] = pen_color[2]; 406 | } 407 | } 408 | } 409 | 410 | // draw left 411 | for (int x = rx - t0; x < rx + t1; x++) 412 | { 413 | if (x < 0) 414 | continue; 415 | 416 | if (x >= w) 417 | break; 418 | 419 | for (int y = ry + t1; y < ry + rh - t0; y++) 420 | { 421 | if (y < 0) 422 | continue; 423 | 424 | if (y >= h) 425 | break; 426 | 427 | unsigned char* p = pixels + stride * y; 428 | 429 | p[x * 3 + 0] = pen_color[0]; 430 | p[x * 3 + 1] = pen_color[1]; 431 | p[x * 3 + 2] = pen_color[2]; 432 | } 433 | } 434 | 435 | // draw right 436 | for (int x = rx + rw - t0; x < rx + rw + t1; x++) 437 | { 438 | if (x < 0) 439 | continue; 440 | 441 | if (x >= w) 442 | break; 443 | 444 | for (int y = ry + t1; y < ry + rh - t0; y++) 445 | { 446 | if (y < 0) 447 | continue; 448 | 449 | if (y >= h) 450 | break; 451 | 452 | unsigned char* p = pixels + stride * y; 453 | 454 | p[x * 3 + 0] = pen_color[0]; 455 | p[x * 3 + 1] = pen_color[1]; 456 | p[x * 3 + 2] = pen_color[2]; 457 | } 458 | } 459 | } 460 | 461 | void draw_rectangle_c4(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness) 462 | { 463 | const unsigned char* pen_color = (const unsigned char*)&color; 464 | 465 | if (thickness == -1) 466 | { 467 | // filled 468 | for (int y = ry; y < ry + rh; y++) 469 | { 470 | if (y < 0) 471 | continue; 472 | 473 | if (y >= h) 474 | break; 475 | 476 | unsigned char* p = pixels + stride * y; 477 | 478 | for (int x = rx; x < rx + rw; x++) 479 | { 480 | if (x < 0) 481 | continue; 482 | 483 | if (x >= w) 484 | break; 485 | 486 | p[x * 4 + 0] = pen_color[0]; 487 | p[x * 4 + 1] = pen_color[1]; 488 | p[x * 4 + 2] = pen_color[2]; 489 | p[x * 4 + 3] = pen_color[3]; 490 | } 491 | } 492 | 493 | return; 494 | } 495 | 496 | const int t0 = thickness / 2; 497 | const int t1 = thickness - t0; 498 | 499 | // draw top 500 | { 501 | for (int y = ry - t0; y < ry + t1; y++) 502 | { 503 | if (y < 0) 504 | continue; 505 | 506 | if (y >= h) 507 | break; 508 | 509 | unsigned char* p = pixels + stride * y; 510 | 511 | for (int x = rx - t0; x < rx + rw + t1; x++) 512 | { 513 | if (x < 0) 514 | continue; 515 | 516 | if (x >= w) 517 | break; 518 | 519 | p[x * 4 + 0] = pen_color[0]; 520 | p[x * 4 + 1] = pen_color[1]; 521 | p[x * 4 + 2] = pen_color[2]; 522 | p[x * 4 + 3] = pen_color[3]; 523 | } 524 | } 525 | } 526 | 527 | // draw bottom 528 | { 529 | for (int y = ry + rh - t0; y < ry + rh + t1; y++) 530 | { 531 | if (y < 0) 532 | continue; 533 | 534 | if (y >= h) 535 | break; 536 | 537 | unsigned char* p = pixels + stride * y; 538 | 539 | for (int x = rx - t0; x < rx + rw + t1; x++) 540 | { 541 | if (x < 0) 542 | continue; 543 | 544 | if (x >= w) 545 | break; 546 | 547 | p[x * 4 + 0] = pen_color[0]; 548 | p[x * 4 + 1] = pen_color[1]; 549 | p[x * 4 + 2] = pen_color[2]; 550 | p[x * 4 + 3] = pen_color[3]; 551 | } 552 | } 553 | } 554 | 555 | // draw left 556 | for (int x = rx - t0; x < rx + t1; x++) 557 | { 558 | if (x < 0) 559 | continue; 560 | 561 | if (x >= w) 562 | break; 563 | 564 | for (int y = ry + t1; y < ry + rh - t0; y++) 565 | { 566 | if (y < 0) 567 | continue; 568 | 569 | if (y >= h) 570 | break; 571 | 572 | unsigned char* p = pixels + stride * y; 573 | 574 | p[x * 4 + 0] = pen_color[0]; 575 | p[x * 4 + 1] = pen_color[1]; 576 | p[x * 4 + 2] = pen_color[2]; 577 | p[x * 4 + 3] = pen_color[3]; 578 | } 579 | } 580 | 581 | // draw right 582 | for (int x = rx + rw - t0; x < rx + rw + t1; x++) 583 | { 584 | if (x < 0) 585 | continue; 586 | 587 | if (x >= w) 588 | break; 589 | 590 | for (int y = ry + t1; y < ry + rh - t0; y++) 591 | { 592 | if (y < 0) 593 | continue; 594 | 595 | if (y >= h) 596 | break; 597 | 598 | unsigned char* p = pixels + stride * y; 599 | 600 | p[x * 4 + 0] = pen_color[0]; 601 | p[x * 4 + 1] = pen_color[1]; 602 | p[x * 4 + 2] = pen_color[2]; 603 | p[x * 4 + 3] = pen_color[3]; 604 | } 605 | } 606 | } 607 | 608 | void draw_rectangle_yuv420sp(unsigned char* yuv420sp, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness) 609 | { 610 | // assert w % 2 == 0 611 | // assert h % 2 == 0 612 | // assert rx % 2 == 0 613 | // assert ry % 2 == 0 614 | // assert rw % 2 == 0 615 | // assert rh % 2 == 0 616 | // assert thickness % 2 == 0 617 | 618 | const unsigned char* pen_color = (const unsigned char*)&color; 619 | 620 | unsigned int v_y; 621 | unsigned int v_uv; 622 | unsigned char* pen_color_y = (unsigned char*)&v_y; 623 | unsigned char* pen_color_uv = (unsigned char*)&v_uv; 624 | pen_color_y[0] = pen_color[0]; 625 | pen_color_uv[0] = pen_color[1]; 626 | pen_color_uv[1] = pen_color[2]; 627 | 628 | unsigned char* Y = yuv420sp; 629 | draw_rectangle_c1(Y, w, h, rx, ry, rw, rh, v_y, thickness); 630 | 631 | unsigned char* UV = yuv420sp + w * h; 632 | int thickness_uv = thickness == -1 ? thickness : std::max(thickness / 2, 1); 633 | draw_rectangle_c2(UV, w / 2, h / 2, rx / 2, ry / 2, rw / 2, rh / 2, v_uv, thickness_uv); 634 | } 635 | 636 | static inline bool distance_lessequal(int x0, int y0, int x1, int y1, float r) 637 | { 638 | int dx = x0 - x1; 639 | int dy = y0 - y1; 640 | int q = dx * dx + dy * dy; 641 | return q <= r * r; 642 | } 643 | 644 | static inline bool distance_inrange(int x0, int y0, int x1, int y1, float r0, float r1) 645 | { 646 | int dx = x0 - x1; 647 | int dy = y0 - y1; 648 | int q = dx * dx + dy * dy; 649 | return q >= r0 * r0 && q < r1 * r1; 650 | } 651 | 652 | void draw_circle_c1(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness) 653 | { 654 | return draw_circle_c1(pixels, w, h, w, cx, cy, radius, color, thickness); 655 | } 656 | 657 | void draw_circle_c2(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness) 658 | { 659 | return draw_circle_c2(pixels, w, h, w * 2, cx, cy, radius, color, thickness); 660 | } 661 | 662 | void draw_circle_c3(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness) 663 | { 664 | return draw_circle_c3(pixels, w, h, w * 3, cx, cy, radius, color, thickness); 665 | } 666 | 667 | void draw_circle_c4(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness) 668 | { 669 | return draw_circle_c4(pixels, w, h, w * 4, cx, cy, radius, color, thickness); 670 | } 671 | 672 | void draw_circle_c1(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness) 673 | { 674 | const unsigned char* pen_color = (const unsigned char*)&color; 675 | 676 | if (thickness == -1) 677 | { 678 | // filled 679 | for (int y = cy - (radius - 1); y < cy + radius; y++) 680 | { 681 | if (y < 0) 682 | continue; 683 | 684 | if (y >= h) 685 | break; 686 | 687 | unsigned char* p = pixels + stride * y; 688 | 689 | for (int x = cx - (radius - 1); x < cx + radius; x++) 690 | { 691 | if (x < 0) 692 | continue; 693 | 694 | if (x >= w) 695 | break; 696 | 697 | // distance from cx cy 698 | if (distance_lessequal(x, y, cx, cy, radius)) 699 | { 700 | p[x] = pen_color[0]; 701 | } 702 | } 703 | } 704 | 705 | return; 706 | } 707 | 708 | const float t0 = thickness / 2.f; 709 | const float t1 = thickness - t0; 710 | 711 | for (int y = cy - (radius - 1) - t0; y < cy + radius + t1; y++) 712 | { 713 | if (y < 0) 714 | continue; 715 | 716 | if (y >= h) 717 | break; 718 | 719 | unsigned char* p = pixels + stride * y; 720 | 721 | for (int x = cx - (radius - 1) - t0; x < cx + radius + t1; x++) 722 | { 723 | if (x < 0) 724 | continue; 725 | 726 | if (x >= w) 727 | break; 728 | 729 | // distance from cx cy 730 | if (distance_inrange(x, y, cx, cy, radius - t0, radius + t1)) 731 | { 732 | p[x] = pen_color[0]; 733 | } 734 | } 735 | } 736 | } 737 | 738 | void draw_circle_c2(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness) 739 | { 740 | const unsigned char* pen_color = (const unsigned char*)&color; 741 | 742 | if (thickness == -1) 743 | { 744 | // filled 745 | for (int y = cy - (radius - 1); y < cy + radius; y++) 746 | { 747 | if (y < 0) 748 | continue; 749 | 750 | if (y >= h) 751 | break; 752 | 753 | unsigned char* p = pixels + stride * y; 754 | 755 | for (int x = cx - (radius - 1); x < cx + radius; x++) 756 | { 757 | if (x < 0) 758 | continue; 759 | 760 | if (x >= w) 761 | break; 762 | 763 | // distance from cx cy 764 | if (distance_lessequal(x, y, cx, cy, radius)) 765 | { 766 | p[x * 2 + 0] = pen_color[0]; 767 | p[x * 2 + 1] = pen_color[1]; 768 | } 769 | } 770 | } 771 | 772 | return; 773 | } 774 | 775 | const float t0 = thickness / 2.f; 776 | const float t1 = thickness - t0; 777 | 778 | for (int y = cy - radius - t0; y < cy + radius + t1; y++) 779 | { 780 | if (y < 0) 781 | continue; 782 | 783 | if (y >= h) 784 | break; 785 | 786 | unsigned char* p = pixels + stride * y; 787 | 788 | for (int x = cx - radius - t0; x < cx + radius + t1; x++) 789 | { 790 | if (x < 0) 791 | continue; 792 | 793 | if (x >= w) 794 | break; 795 | 796 | // distance from cx cy 797 | if (distance_inrange(x, y, cx, cy, radius - t0, radius + t1)) 798 | { 799 | p[x * 2 + 0] = pen_color[0]; 800 | p[x * 2 + 1] = pen_color[1]; 801 | } 802 | } 803 | } 804 | } 805 | 806 | void draw_circle_c3(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness) 807 | { 808 | const unsigned char* pen_color = (const unsigned char*)&color; 809 | 810 | if (thickness == -1) 811 | { 812 | // filled 813 | for (int y = cy - (radius - 1); y < cy + radius; y++) 814 | { 815 | if (y < 0) 816 | continue; 817 | 818 | if (y >= h) 819 | break; 820 | 821 | unsigned char* p = pixels + stride * y; 822 | 823 | for (int x = cx - (radius - 1); x < cx + radius; x++) 824 | { 825 | if (x < 0) 826 | continue; 827 | 828 | if (x >= w) 829 | break; 830 | 831 | // distance from cx cy 832 | if (distance_lessequal(x, y, cx, cy, radius)) 833 | { 834 | p[x * 3 + 0] = pen_color[0]; 835 | p[x * 3 + 1] = pen_color[1]; 836 | p[x * 3 + 2] = pen_color[2]; 837 | } 838 | } 839 | } 840 | 841 | return; 842 | } 843 | 844 | const float t0 = thickness / 2.f; 845 | const float t1 = thickness - t0; 846 | 847 | for (int y = cy - radius - t0; y < cy + radius + t1; y++) 848 | { 849 | if (y < 0) 850 | continue; 851 | 852 | if (y >= h) 853 | break; 854 | 855 | unsigned char* p = pixels + stride * y; 856 | 857 | for (int x = cx - radius - t0; x < cx + radius + t1; x++) 858 | { 859 | if (x < 0) 860 | continue; 861 | 862 | if (x >= w) 863 | break; 864 | 865 | // distance from cx cy 866 | if (distance_inrange(x, y, cx, cy, radius - t0, radius + t1)) 867 | { 868 | p[x * 3 + 0] = pen_color[0]; 869 | p[x * 3 + 1] = pen_color[1]; 870 | p[x * 3 + 2] = pen_color[2]; 871 | } 872 | } 873 | } 874 | } 875 | 876 | void draw_circle_c4(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness) 877 | { 878 | const unsigned char* pen_color = (const unsigned char*)&color; 879 | 880 | if (thickness == -1) 881 | { 882 | // filled 883 | for (int y = cy - (radius - 1); y < cy + radius; y++) 884 | { 885 | if (y < 0) 886 | continue; 887 | 888 | if (y >= h) 889 | break; 890 | 891 | unsigned char* p = pixels + stride * y; 892 | 893 | for (int x = cx - (radius - 1); x < cx + radius; x++) 894 | { 895 | if (x < 0) 896 | continue; 897 | 898 | if (x >= w) 899 | break; 900 | 901 | // distance from cx cy 902 | if (distance_lessequal(x, y, cx, cy, radius)) 903 | { 904 | p[x * 4 + 0] = pen_color[0]; 905 | p[x * 4 + 1] = pen_color[1]; 906 | p[x * 4 + 2] = pen_color[2]; 907 | p[x * 4 + 3] = pen_color[3]; 908 | } 909 | } 910 | } 911 | 912 | return; 913 | } 914 | 915 | const float t0 = thickness / 2.f; 916 | const float t1 = thickness - t0; 917 | 918 | for (int y = cy - (radius - 1) - t0; y < cy + radius + t1; y++) 919 | { 920 | if (y < 0) 921 | continue; 922 | 923 | if (y >= h) 924 | break; 925 | 926 | unsigned char* p = pixels + stride * y; 927 | 928 | for (int x = cx - (radius - 1) - t0; x < cx + radius + t1; x++) 929 | { 930 | if (x < 0) 931 | continue; 932 | 933 | if (x >= w) 934 | break; 935 | 936 | // distance from cx cy 937 | if (distance_inrange(x, y, cx, cy, radius - t0, radius + t1)) 938 | { 939 | p[x * 4 + 0] = pen_color[0]; 940 | p[x * 4 + 1] = pen_color[1]; 941 | p[x * 4 + 2] = pen_color[2]; 942 | p[x * 4 + 3] = pen_color[3]; 943 | } 944 | } 945 | } 946 | } 947 | 948 | void draw_circle_yuv420sp(unsigned char* yuv420sp, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness) 949 | { 950 | // assert w % 2 == 0 951 | // assert h % 2 == 0 952 | // assert cx % 2 == 0 953 | // assert cy % 2 == 0 954 | // assert radius % 2 == 0 955 | // assert thickness % 2 == 0 956 | 957 | const unsigned char* pen_color = (const unsigned char*)&color; 958 | 959 | unsigned int v_y; 960 | unsigned int v_uv; 961 | unsigned char* pen_color_y = (unsigned char*)&v_y; 962 | unsigned char* pen_color_uv = (unsigned char*)&v_uv; 963 | pen_color_y[0] = pen_color[0]; 964 | pen_color_uv[0] = pen_color[1]; 965 | pen_color_uv[1] = pen_color[2]; 966 | 967 | unsigned char* Y = yuv420sp; 968 | draw_circle_c1(Y, w, h, cx, cy, radius, v_y, thickness); 969 | 970 | unsigned char* UV = yuv420sp + w * h; 971 | int thickness_uv = thickness == -1 ? thickness : std::max(thickness / 2, 1); 972 | draw_circle_c2(UV, w / 2, h / 2, cx / 2, cy / 2, radius / 2, v_uv, thickness_uv); 973 | } 974 | 975 | static inline bool distance_lessthan(int x, int y, int x0, int y0, int x1, int y1, float t) 976 | { 977 | int dx01 = x1 - x0; 978 | int dy01 = y1 - y0; 979 | int dx0 = x - x0; 980 | int dy0 = y - y0; 981 | 982 | float r = (float)(dx0 * dx01 + dy0 * dy01) / (dx01 * dx01 + dy01 * dy01); 983 | 984 | if (r < 0 || r > 1) 985 | return false; 986 | 987 | float px = x0 + dx01 * r; 988 | float py = y0 + dy01 * r; 989 | float dx = x - px; 990 | float dy = y - py; 991 | float p = dx * dx + dy * dy; 992 | return p < t; 993 | } 994 | 995 | void draw_line_c1(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness) 996 | { 997 | draw_line_c1(pixels, w, h, w, x0, y0, x1, y1, color, thickness); 998 | } 999 | 1000 | void draw_line_c2(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness) 1001 | { 1002 | draw_line_c2(pixels, w, h, w * 2, x0, y0, x1, y1, color, thickness); 1003 | } 1004 | 1005 | void draw_line_c3(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness) 1006 | { 1007 | draw_line_c3(pixels, w, h, w * 3, x0, y0, x1, y1, color, thickness); 1008 | } 1009 | 1010 | void draw_line_c4(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness) 1011 | { 1012 | draw_line_c4(pixels, w, h, w * 4, x0, y0, x1, y1, color, thickness); 1013 | } 1014 | 1015 | void draw_line_c1(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness) 1016 | { 1017 | const unsigned char* pen_color = (const unsigned char*)&color; 1018 | 1019 | const float t0 = thickness / 2.f; 1020 | const float t1 = thickness - t0; 1021 | 1022 | int x_min = std::min(x0, x1); 1023 | int x_max = std::max(x0, x1); 1024 | int y_min = std::min(y0, y1); 1025 | int y_max = std::max(y0, y1); 1026 | 1027 | for (int y = y_min - t0; y < y_max + t1; y++) 1028 | { 1029 | if (y < 0) 1030 | continue; 1031 | 1032 | if (y >= h) 1033 | break; 1034 | 1035 | unsigned char* p = pixels + stride * y; 1036 | 1037 | for (int x = x_min - t0; x < x_max + t1; x++) 1038 | { 1039 | if (x < 0) 1040 | continue; 1041 | 1042 | if (x >= w) 1043 | break; 1044 | 1045 | // distance from line 1046 | if (distance_lessthan(x, y, x0, y0, x1, y1, t1)) 1047 | { 1048 | p[x] = pen_color[0]; 1049 | } 1050 | } 1051 | } 1052 | } 1053 | 1054 | void draw_line_c2(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness) 1055 | { 1056 | const unsigned char* pen_color = (const unsigned char*)&color; 1057 | 1058 | const float t0 = thickness / 2.f; 1059 | const float t1 = thickness - t0; 1060 | 1061 | int x_min = std::min(x0, x1); 1062 | int x_max = std::max(x0, x1); 1063 | int y_min = std::min(y0, y1); 1064 | int y_max = std::max(y0, y1); 1065 | 1066 | for (int y = y_min - t0; y < y_max + t1; y++) 1067 | { 1068 | if (y < 0) 1069 | continue; 1070 | 1071 | if (y >= h) 1072 | break; 1073 | 1074 | unsigned char* p = pixels + stride * y; 1075 | 1076 | for (int x = x_min - t0; x < x_max + t1; x++) 1077 | { 1078 | if (x < 0) 1079 | continue; 1080 | 1081 | if (x >= w) 1082 | break; 1083 | 1084 | // distance from line 1085 | if (distance_lessthan(x, y, x0, y0, x1, y1, t1)) 1086 | { 1087 | p[x * 2 + 0] = pen_color[0]; 1088 | p[x * 2 + 1] = pen_color[1]; 1089 | } 1090 | } 1091 | } 1092 | } 1093 | 1094 | void draw_line_c3(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness) 1095 | { 1096 | const unsigned char* pen_color = (const unsigned char*)&color; 1097 | 1098 | const float t0 = thickness / 2.f; 1099 | const float t1 = thickness - t0; 1100 | 1101 | int x_min = std::min(x0, x1); 1102 | int x_max = std::max(x0, x1); 1103 | int y_min = std::min(y0, y1); 1104 | int y_max = std::max(y0, y1); 1105 | 1106 | for (int y = y_min - t0; y < y_max + t1; y++) 1107 | { 1108 | if (y < 0) 1109 | continue; 1110 | 1111 | if (y >= h) 1112 | break; 1113 | 1114 | unsigned char* p = pixels + stride * y; 1115 | 1116 | for (int x = x_min - t0; x < x_max + t1; x++) 1117 | { 1118 | if (x < 0) 1119 | continue; 1120 | 1121 | if (x >= w) 1122 | break; 1123 | 1124 | // distance from line 1125 | if (distance_lessthan(x, y, x0, y0, x1, y1, t1)) 1126 | { 1127 | p[x * 3 + 0] = pen_color[0]; 1128 | p[x * 3 + 1] = pen_color[1]; 1129 | p[x * 3 + 2] = pen_color[2]; 1130 | } 1131 | } 1132 | } 1133 | } 1134 | 1135 | void draw_line_c4(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness) 1136 | { 1137 | const unsigned char* pen_color = (const unsigned char*)&color; 1138 | 1139 | const float t0 = thickness / 2.f; 1140 | const float t1 = thickness - t0; 1141 | 1142 | int x_min = std::min(x0, x1); 1143 | int x_max = std::max(x0, x1); 1144 | int y_min = std::min(y0, y1); 1145 | int y_max = std::max(y0, y1); 1146 | 1147 | for (int y = y_min - t0; y < y_max + t1; y++) 1148 | { 1149 | if (y < 0) 1150 | continue; 1151 | 1152 | if (y >= h) 1153 | break; 1154 | 1155 | unsigned char* p = pixels + stride * y; 1156 | 1157 | for (int x = x_min - t0; x < x_max + t1; x++) 1158 | { 1159 | if (x < 0) 1160 | continue; 1161 | 1162 | if (x >= w) 1163 | break; 1164 | 1165 | // distance from line 1166 | if (distance_lessthan(x, y, x0, y0, x1, y1, t1)) 1167 | { 1168 | p[x * 4 + 0] = pen_color[0]; 1169 | p[x * 4 + 1] = pen_color[1]; 1170 | p[x * 4 + 2] = pen_color[2]; 1171 | p[x * 4 + 3] = pen_color[3]; 1172 | } 1173 | } 1174 | } 1175 | } 1176 | 1177 | void draw_line_yuv420sp(unsigned char* yuv420sp, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness) 1178 | { 1179 | // assert w % 2 == 0 1180 | // assert h % 2 == 0 1181 | // assert x0 % 2 == 0 1182 | // assert y0 % 2 == 0 1183 | // assert x1 % 2 == 0 1184 | // assert y1 % 2 == 0 1185 | // assert thickness % 2 == 0 1186 | 1187 | const unsigned char* pen_color = (const unsigned char*)&color; 1188 | 1189 | unsigned int v_y; 1190 | unsigned int v_uv; 1191 | unsigned char* pen_color_y = (unsigned char*)&v_y; 1192 | unsigned char* pen_color_uv = (unsigned char*)&v_uv; 1193 | pen_color_y[0] = pen_color[0]; 1194 | pen_color_uv[0] = pen_color[1]; 1195 | pen_color_uv[1] = pen_color[2]; 1196 | 1197 | unsigned char* Y = yuv420sp; 1198 | draw_line_c1(Y, w, h, x0, y0, x1, y1, v_y, thickness); 1199 | 1200 | unsigned char* UV = yuv420sp + w * h; 1201 | int thickness_uv = thickness == -1 ? thickness : std::max(thickness / 2, 1); 1202 | draw_line_c2(UV, w / 2, h / 2, x0 / 2, y0 / 2, x1 / 2, y1 / 2, v_uv, thickness_uv); 1203 | } 1204 | 1205 | void get_text_drawing_size(const char* text, int fontpixelsize, int* w, int* h) 1206 | { 1207 | *w = 0; 1208 | *h = 0; 1209 | 1210 | const int n = strlen(text); 1211 | 1212 | int line_w = 0; 1213 | for (int i = 0; i < n; i++) 1214 | { 1215 | char ch = text[i]; 1216 | 1217 | if (ch == '\n') 1218 | { 1219 | // newline 1220 | *w = std::max(*w, line_w); 1221 | *h += fontpixelsize * 2; 1222 | line_w = 0; 1223 | } 1224 | 1225 | if (isprint(ch) != 0) 1226 | { 1227 | line_w += fontpixelsize; 1228 | } 1229 | } 1230 | 1231 | *w = std::max(*w, line_w); 1232 | *h += fontpixelsize * 2; 1233 | } 1234 | 1235 | void draw_text_c1(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color) 1236 | { 1237 | return draw_text_c1(pixels, w, h, w, text, x, y, fontpixelsize, color); 1238 | } 1239 | 1240 | void draw_text_c2(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color) 1241 | { 1242 | return draw_text_c2(pixels, w, h, w * 2, text, x, y, fontpixelsize, color); 1243 | } 1244 | 1245 | void draw_text_c3(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color) 1246 | { 1247 | return draw_text_c3(pixels, w, h, w * 3, text, x, y, fontpixelsize, color); 1248 | } 1249 | 1250 | void draw_text_c4(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color) 1251 | { 1252 | return draw_text_c4(pixels, w, h, w * 4, text, x, y, fontpixelsize, color); 1253 | } 1254 | 1255 | void draw_text_c1(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color) 1256 | { 1257 | const unsigned char* pen_color = (const unsigned char*)&color; 1258 | 1259 | unsigned char* resized_font_bitmap = new unsigned char[fontpixelsize * fontpixelsize * 2]; 1260 | 1261 | const int n = strlen(text); 1262 | 1263 | int cursor_x = x; 1264 | int cursor_y = y; 1265 | for (int i = 0; i < n; i++) 1266 | { 1267 | char ch = text[i]; 1268 | 1269 | if (ch == '\n') 1270 | { 1271 | // newline 1272 | cursor_x = x; 1273 | cursor_y += fontpixelsize * 2; 1274 | } 1275 | 1276 | if (isprint(ch) != 0) 1277 | { 1278 | const unsigned char* font_bitmap = mono_font_data[ch - ' ']; 1279 | 1280 | // draw resized character 1281 | resize_bilinear_c1(font_bitmap, 20, 40, resized_font_bitmap, fontpixelsize, fontpixelsize * 2); 1282 | 1283 | for (int j = cursor_y; j < cursor_y + fontpixelsize * 2; j++) 1284 | { 1285 | if (j < 0) 1286 | continue; 1287 | 1288 | if (j >= h) 1289 | break; 1290 | 1291 | const unsigned char* palpha = resized_font_bitmap + (j - cursor_y) * fontpixelsize; 1292 | unsigned char* p = pixels + stride * j; 1293 | 1294 | for (int k = cursor_x; k < cursor_x + fontpixelsize; k++) 1295 | { 1296 | if (k < 0) 1297 | continue; 1298 | 1299 | if (k >= w) 1300 | break; 1301 | 1302 | unsigned char alpha = palpha[k - cursor_x]; 1303 | 1304 | p[k] = (p[k] * (255 - alpha) + pen_color[0] * alpha) / 255; 1305 | } 1306 | } 1307 | 1308 | cursor_x += fontpixelsize; 1309 | } 1310 | } 1311 | 1312 | delete[] resized_font_bitmap; 1313 | } 1314 | 1315 | void draw_text_c2(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color) 1316 | { 1317 | const unsigned char* pen_color = (const unsigned char*)&color; 1318 | 1319 | unsigned char* resized_font_bitmap = new unsigned char[fontpixelsize * fontpixelsize * 2]; 1320 | 1321 | const int n = strlen(text); 1322 | 1323 | int cursor_x = x; 1324 | int cursor_y = y; 1325 | for (int i = 0; i < n; i++) 1326 | { 1327 | char ch = text[i]; 1328 | 1329 | if (ch == '\n') 1330 | { 1331 | // newline 1332 | cursor_x = x; 1333 | cursor_y += fontpixelsize * 2; 1334 | } 1335 | 1336 | if (isprint(ch) != 0) 1337 | { 1338 | int font_bitmap_index = ch - ' '; 1339 | const unsigned char* font_bitmap = mono_font_data[font_bitmap_index]; 1340 | 1341 | // draw resized character 1342 | resize_bilinear_c1(font_bitmap, 20, 40, resized_font_bitmap, fontpixelsize, fontpixelsize * 2); 1343 | 1344 | for (int j = cursor_y; j < cursor_y + fontpixelsize * 2; j++) 1345 | { 1346 | if (j < 0) 1347 | continue; 1348 | 1349 | if (j >= h) 1350 | break; 1351 | 1352 | const unsigned char* palpha = resized_font_bitmap + (j - cursor_y) * fontpixelsize; 1353 | unsigned char* p = pixels + stride * j; 1354 | 1355 | for (int k = cursor_x; k < cursor_x + fontpixelsize; k++) 1356 | { 1357 | if (k < 0) 1358 | continue; 1359 | 1360 | if (k >= w) 1361 | break; 1362 | 1363 | unsigned char alpha = palpha[k - cursor_x]; 1364 | 1365 | p[k * 2 + 0] = (p[k * 2 + 0] * (255 - alpha) + pen_color[0] * alpha) / 255; 1366 | p[k * 2 + 1] = (p[k * 2 + 1] * (255 - alpha) + pen_color[1] * alpha) / 255; 1367 | } 1368 | } 1369 | 1370 | cursor_x += fontpixelsize; 1371 | } 1372 | } 1373 | 1374 | delete[] resized_font_bitmap; 1375 | } 1376 | 1377 | void draw_text_c3(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color) 1378 | { 1379 | const unsigned char* pen_color = (const unsigned char*)&color; 1380 | 1381 | unsigned char* resized_font_bitmap = new unsigned char[fontpixelsize * fontpixelsize * 2]; 1382 | 1383 | const int n = strlen(text); 1384 | 1385 | int cursor_x = x; 1386 | int cursor_y = y; 1387 | for (int i = 0; i < n; i++) 1388 | { 1389 | char ch = text[i]; 1390 | 1391 | if (ch == '\n') 1392 | { 1393 | // newline 1394 | cursor_x = x; 1395 | cursor_y += fontpixelsize * 2; 1396 | } 1397 | 1398 | if (isprint(ch) != 0) 1399 | { 1400 | int font_bitmap_index = ch - ' '; 1401 | const unsigned char* font_bitmap = mono_font_data[font_bitmap_index]; 1402 | 1403 | // draw resized character 1404 | resize_bilinear_c1(font_bitmap, 20, 40, resized_font_bitmap, fontpixelsize, fontpixelsize * 2); 1405 | 1406 | for (int j = cursor_y; j < cursor_y + fontpixelsize * 2; j++) 1407 | { 1408 | if (j < 0) 1409 | continue; 1410 | 1411 | if (j >= h) 1412 | break; 1413 | 1414 | const unsigned char* palpha = resized_font_bitmap + (j - cursor_y) * fontpixelsize; 1415 | unsigned char* p = pixels + stride * j; 1416 | 1417 | for (int k = cursor_x; k < cursor_x + fontpixelsize; k++) 1418 | { 1419 | if (k < 0) 1420 | continue; 1421 | 1422 | if (k >= w) 1423 | break; 1424 | 1425 | unsigned char alpha = palpha[k - cursor_x]; 1426 | 1427 | p[k * 3 + 0] = (p[k * 3 + 0] * (255 - alpha) + pen_color[0] * alpha) / 255; 1428 | p[k * 3 + 1] = (p[k * 3 + 1] * (255 - alpha) + pen_color[1] * alpha) / 255; 1429 | p[k * 3 + 2] = (p[k * 3 + 2] * (255 - alpha) + pen_color[2] * alpha) / 255; 1430 | } 1431 | } 1432 | 1433 | cursor_x += fontpixelsize; 1434 | } 1435 | } 1436 | 1437 | delete[] resized_font_bitmap; 1438 | } 1439 | 1440 | void draw_text_c4(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color) 1441 | { 1442 | const unsigned char* pen_color = (const unsigned char*)&color; 1443 | 1444 | unsigned char* resized_font_bitmap = new unsigned char[fontpixelsize * fontpixelsize * 2]; 1445 | 1446 | const int n = strlen(text); 1447 | 1448 | int cursor_x = x; 1449 | int cursor_y = y; 1450 | for (int i = 0; i < n; i++) 1451 | { 1452 | char ch = text[i]; 1453 | 1454 | if (ch == '\n') 1455 | { 1456 | // newline 1457 | cursor_x = x; 1458 | cursor_y += fontpixelsize * 2; 1459 | } 1460 | 1461 | if (isprint(ch) != 0) 1462 | { 1463 | const unsigned char* font_bitmap = mono_font_data[ch - ' ']; 1464 | 1465 | // draw resized character 1466 | resize_bilinear_c1(font_bitmap, 20, 40, resized_font_bitmap, fontpixelsize, fontpixelsize * 2); 1467 | 1468 | for (int j = cursor_y; j < cursor_y + fontpixelsize * 2; j++) 1469 | { 1470 | if (j < 0) 1471 | continue; 1472 | 1473 | if (j >= h) 1474 | break; 1475 | 1476 | const unsigned char* palpha = resized_font_bitmap + (j - cursor_y) * fontpixelsize; 1477 | unsigned char* p = pixels + stride * j; 1478 | 1479 | for (int k = cursor_x; k < cursor_x + fontpixelsize; k++) 1480 | { 1481 | if (k < 0) 1482 | continue; 1483 | 1484 | if (k >= w) 1485 | break; 1486 | 1487 | unsigned char alpha = palpha[k - cursor_x]; 1488 | 1489 | p[k * 4 + 0] = (p[k * 4 + 0] * (255 - alpha) + pen_color[0] * alpha) / 255; 1490 | p[k * 4 + 1] = (p[k * 4 + 1] * (255 - alpha) + pen_color[1] * alpha) / 255; 1491 | p[k * 4 + 2] = (p[k * 4 + 2] * (255 - alpha) + pen_color[2] * alpha) / 255; 1492 | p[k * 4 + 3] = (p[k * 4 + 3] * (255 - alpha) + pen_color[3] * alpha) / 255; 1493 | } 1494 | } 1495 | 1496 | cursor_x += fontpixelsize; 1497 | } 1498 | } 1499 | 1500 | delete[] resized_font_bitmap; 1501 | } 1502 | 1503 | void draw_text_yuv420sp(unsigned char* yuv420sp, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color) 1504 | { 1505 | // assert w % 2 == 0 1506 | // assert h % 2 == 0 1507 | // assert x % 2 == 0 1508 | // assert y % 2 == 0 1509 | // assert fontpixelsize % 2 == 0 1510 | 1511 | const unsigned char* pen_color = (const unsigned char*)&color; 1512 | 1513 | unsigned int v_y; 1514 | unsigned int v_uv; 1515 | unsigned char* pen_color_y = (unsigned char*)&v_y; 1516 | unsigned char* pen_color_uv = (unsigned char*)&v_uv; 1517 | pen_color_y[0] = pen_color[0]; 1518 | pen_color_uv[0] = pen_color[1]; 1519 | pen_color_uv[1] = pen_color[2]; 1520 | 1521 | unsigned char* Y = yuv420sp; 1522 | draw_text_c1(Y, w, h, text, x, y, fontpixelsize, v_y); 1523 | 1524 | unsigned char* UV = yuv420sp + w * h; 1525 | draw_text_c2(UV, w / 2, h / 2, text, x / 2, y / 2, std::max(fontpixelsize / 2, 1), v_uv); 1526 | } 1527 | 1528 | #endif // NCNN_PIXEL_DRAWING 1529 | 1530 | } // namespace ncnn 1531 | -------------------------------------------------------------------------------- /src/mat_pixel_reisze_flycv.cpp: -------------------------------------------------------------------------------- 1 | // adopting FlyCV's resize implementation here 2 | // replace ncnn one 3 | // looks like ncnn resize is a little slow 4 | #include "mat.h" 5 | 6 | #include 7 | #include 8 | #if __ARM_NEON 9 | #include 10 | #endif // __ARM_NEON 11 | #include "platform.h" 12 | 13 | namespace sim { 14 | 15 | namespace mcv { 16 | 17 | // implementation here 18 | void resize_bilinear_c1(const unsigned char *src, int srcw, int srch, 19 | int srcstride, unsigned char *dst, int w, int h, 20 | int stride) {} 21 | 22 | void resize_bilinear_c2(const unsigned char *src, int srcw, int srch, 23 | int srcstride, unsigned char *dst, int w, int h, 24 | int stride) {} 25 | void resize_bilinear_c3(const unsigned char *src, int srcw, int srch, 26 | int srcstride, unsigned char *dst, int w, int h, 27 | int stride) {} 28 | void resize_bilinear_c4(const unsigned char *src, int srcw, int srch, 29 | int srcstride, unsigned char *dst, int w, int h, 30 | int stride) {} 31 | 32 | // Resize bilinear 33 | void resize_bilinear_c1(const unsigned char *src, int srcw, int srch, 34 | unsigned char *dst, int w, int h) { 35 | return resize_bilinear_c1(src, srcw, srch, srcw, dst, w, h, w); 36 | } 37 | 38 | void resize_bilinear_c2(const unsigned char *src, int srcw, int srch, 39 | unsigned char *dst, int w, int h) { 40 | return resize_bilinear_c2(src, srcw, srch, srcw * 2, dst, w, h, w * 2); 41 | } 42 | 43 | void resize_bilinear_c3(const unsigned char *src, int srcw, int srch, 44 | unsigned char *dst, int w, int h) { 45 | return resize_bilinear_c3(src, srcw, srch, srcw * 3, dst, w, h, w * 3); 46 | } 47 | 48 | void resize_bilinear_c4(const unsigned char *src, int srcw, int srch, 49 | unsigned char *dst, int w, int h) { 50 | return resize_bilinear_c4(src, srcw, srch, srcw * 4, dst, w, h, w * 4); 51 | } 52 | 53 | 54 | 55 | // Resize nearest 56 | 57 | } // namespace mcv 58 | 59 | } // namespace sim 60 | -------------------------------------------------------------------------------- /src/mat_pixel_resize.cpp: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn available. 2 | // 3 | // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 4 | // 5 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 | // in compliance with the License. You may obtain a copy of the License at 7 | // 8 | // https://opensource.org/licenses/BSD-3-Clause 9 | // 10 | // Unless required by applicable law or agreed to in writing, software distributed 11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 | // specific language governing permissions and limitations under the License. 14 | 15 | #include "mat.h" 16 | 17 | #include 18 | #include 19 | #if __ARM_NEON 20 | #include 21 | #endif // __ARM_NEON 22 | #include "platform.h" 23 | 24 | namespace sim { 25 | 26 | #if NCNN_PIXEL 27 | void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h) 28 | { 29 | return resize_bilinear_c1(src, srcw, srch, srcw, dst, w, h, w); 30 | } 31 | 32 | void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h) 33 | { 34 | return resize_bilinear_c2(src, srcw, srch, srcw * 2, dst, w, h, w * 2); 35 | } 36 | 37 | void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h) 38 | { 39 | return resize_bilinear_c3(src, srcw, srch, srcw * 3, dst, w, h, w * 3); 40 | } 41 | 42 | void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h) 43 | { 44 | return resize_bilinear_c4(src, srcw, srch, srcw * 4, dst, w, h, w * 4); 45 | } 46 | 47 | void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride) 48 | { 49 | const int INTER_RESIZE_COEF_BITS = 11; 50 | const int INTER_RESIZE_COEF_SCALE = 1 << INTER_RESIZE_COEF_BITS; 51 | // const int ONE=INTER_RESIZE_COEF_SCALE; 52 | 53 | double scale_x = (double)srcw / w; 54 | double scale_y = (double)srch / h; 55 | 56 | int* buf = new int[w + h + w + h]; 57 | 58 | int* xofs = buf; //new int[w]; 59 | int* yofs = buf + w; //new int[h]; 60 | 61 | short* ialpha = (short*)(buf + w + h); //new short[w * 2]; 62 | short* ibeta = (short*)(buf + w + h + w); //new short[h * 2]; 63 | 64 | float fx; 65 | float fy; 66 | int sx; 67 | int sy; 68 | 69 | #define SATURATE_CAST_SHORT(X) (short)::std::min(::std::max((int)(X + (X >= 0.f ? 0.5f : -0.5f)), SHRT_MIN), SHRT_MAX); 70 | 71 | for (int dx = 0; dx < w; dx++) 72 | { 73 | fx = (float)((dx + 0.5) * scale_x - 0.5); 74 | sx = static_cast(floor(fx)); 75 | fx -= sx; 76 | 77 | if (sx < 0) 78 | { 79 | sx = 0; 80 | fx = 0.f; 81 | } 82 | if (sx >= srcw - 1) 83 | { 84 | sx = srcw - 2; 85 | fx = 1.f; 86 | } 87 | 88 | xofs[dx] = sx; 89 | 90 | float a0 = (1.f - fx) * INTER_RESIZE_COEF_SCALE; 91 | float a1 = fx * INTER_RESIZE_COEF_SCALE; 92 | 93 | ialpha[dx * 2] = SATURATE_CAST_SHORT(a0); 94 | ialpha[dx * 2 + 1] = SATURATE_CAST_SHORT(a1); 95 | } 96 | 97 | for (int dy = 0; dy < h; dy++) 98 | { 99 | fy = (float)((dy + 0.5) * scale_y - 0.5); 100 | sy = static_cast(floor(fy)); 101 | fy -= sy; 102 | 103 | if (sy < 0) 104 | { 105 | sy = 0; 106 | fy = 0.f; 107 | } 108 | if (sy >= srch - 1) 109 | { 110 | sy = srch - 2; 111 | fy = 1.f; 112 | } 113 | 114 | yofs[dy] = sy; 115 | 116 | float b0 = (1.f - fy) * INTER_RESIZE_COEF_SCALE; 117 | float b1 = fy * INTER_RESIZE_COEF_SCALE; 118 | 119 | ibeta[dy * 2] = SATURATE_CAST_SHORT(b0); 120 | ibeta[dy * 2 + 1] = SATURATE_CAST_SHORT(b1); 121 | } 122 | 123 | #undef SATURATE_CAST_SHORT 124 | 125 | // loop body 126 | Mat rowsbuf0(w, (size_t)2u); 127 | Mat rowsbuf1(w, (size_t)2u); 128 | short* rows0 = (short*)rowsbuf0.data; 129 | short* rows1 = (short*)rowsbuf1.data; 130 | 131 | int prev_sy1 = -2; 132 | 133 | for (int dy = 0; dy < h; dy++) 134 | { 135 | sy = yofs[dy]; 136 | 137 | if (sy == prev_sy1) 138 | { 139 | // reuse all rows 140 | } 141 | else if (sy == prev_sy1 + 1) 142 | { 143 | // hresize one row 144 | short* rows0_old = rows0; 145 | rows0 = rows1; 146 | rows1 = rows0_old; 147 | const unsigned char* S1 = src + srcstride * (sy + 1); 148 | 149 | const short* ialphap = ialpha; 150 | short* rows1p = rows1; 151 | for (int dx = 0; dx < w; dx++) 152 | { 153 | sx = xofs[dx]; 154 | short a0 = ialphap[0]; 155 | short a1 = ialphap[1]; 156 | 157 | const unsigned char* S1p = S1 + sx; 158 | rows1p[dx] = (S1p[0] * a0 + S1p[1] * a1) >> 4; 159 | 160 | ialphap += 2; 161 | } 162 | } 163 | else 164 | { 165 | // hresize two rows 166 | const unsigned char* S0 = src + srcstride * (sy); 167 | const unsigned char* S1 = src + srcstride * (sy + 1); 168 | 169 | const short* ialphap = ialpha; 170 | short* rows0p = rows0; 171 | short* rows1p = rows1; 172 | for (int dx = 0; dx < w; dx++) 173 | { 174 | sx = xofs[dx]; 175 | short a0 = ialphap[0]; 176 | short a1 = ialphap[1]; 177 | 178 | const unsigned char* S0p = S0 + sx; 179 | const unsigned char* S1p = S1 + sx; 180 | rows0p[dx] = (S0p[0] * a0 + S0p[1] * a1) >> 4; 181 | rows1p[dx] = (S1p[0] * a0 + S1p[1] * a1) >> 4; 182 | 183 | ialphap += 2; 184 | } 185 | } 186 | 187 | prev_sy1 = sy; 188 | 189 | // vresize 190 | short b0 = ibeta[0]; 191 | short b1 = ibeta[1]; 192 | 193 | short* rows0p = rows0; 194 | short* rows1p = rows1; 195 | unsigned char* Dp = dst + stride * (dy); 196 | 197 | #if __ARM_NEON 198 | int nn = w >> 3; 199 | #else 200 | int nn = 0; 201 | #endif 202 | int remain = w - (nn << 3); 203 | 204 | #if __ARM_NEON 205 | #if __aarch64__ 206 | int16x4_t _b0 = vdup_n_s16(b0); 207 | int16x4_t _b1 = vdup_n_s16(b1); 208 | int32x4_t _v2 = vdupq_n_s32(2); 209 | for (; nn > 0; nn--) 210 | { 211 | int16x4_t _rows0p_sr4 = vld1_s16(rows0p); 212 | int16x4_t _rows1p_sr4 = vld1_s16(rows1p); 213 | int16x4_t _rows0p_1_sr4 = vld1_s16(rows0p + 4); 214 | int16x4_t _rows1p_1_sr4 = vld1_s16(rows1p + 4); 215 | 216 | int32x4_t _rows0p_sr4_mb0 = vmull_s16(_rows0p_sr4, _b0); 217 | int32x4_t _rows1p_sr4_mb1 = vmull_s16(_rows1p_sr4, _b1); 218 | int32x4_t _rows0p_1_sr4_mb0 = vmull_s16(_rows0p_1_sr4, _b0); 219 | int32x4_t _rows1p_1_sr4_mb1 = vmull_s16(_rows1p_1_sr4, _b1); 220 | 221 | int32x4_t _acc = _v2; 222 | _acc = vsraq_n_s32(_acc, _rows0p_sr4_mb0, 16); 223 | _acc = vsraq_n_s32(_acc, _rows1p_sr4_mb1, 16); 224 | 225 | int32x4_t _acc_1 = _v2; 226 | _acc_1 = vsraq_n_s32(_acc_1, _rows0p_1_sr4_mb0, 16); 227 | _acc_1 = vsraq_n_s32(_acc_1, _rows1p_1_sr4_mb1, 16); 228 | 229 | int16x4_t _acc16 = vshrn_n_s32(_acc, 2); 230 | int16x4_t _acc16_1 = vshrn_n_s32(_acc_1, 2); 231 | 232 | uint8x8_t _D = vqmovun_s16(vcombine_s16(_acc16, _acc16_1)); 233 | 234 | vst1_u8(Dp, _D); 235 | 236 | Dp += 8; 237 | rows0p += 8; 238 | rows1p += 8; 239 | } 240 | #else 241 | if (nn > 0) 242 | { 243 | asm volatile( 244 | "vdup.s16 d16, %8 \n" 245 | "mov r4, #2 \n" 246 | "vdup.s16 d17, %9 \n" 247 | "vdup.s32 q12, r4 \n" 248 | "pld [%0, #128] \n" 249 | "vld1.s16 {d2-d3}, [%0 :128]!\n" 250 | "pld [%1, #128] \n" 251 | "vld1.s16 {d6-d7}, [%1 :128]!\n" 252 | "0: \n" 253 | "vmull.s16 q0, d2, d16 \n" 254 | "vmull.s16 q1, d3, d16 \n" 255 | "vorr.s32 q10, q12, q12 \n" 256 | "vorr.s32 q11, q12, q12 \n" 257 | "vmull.s16 q2, d6, d17 \n" 258 | "vmull.s16 q3, d7, d17 \n" 259 | "vsra.s32 q10, q0, #16 \n" 260 | "vsra.s32 q11, q1, #16 \n" 261 | "pld [%0, #128] \n" 262 | "vld1.s16 {d2-d3}, [%0 :128]!\n" 263 | "vsra.s32 q10, q2, #16 \n" 264 | "vsra.s32 q11, q3, #16 \n" 265 | "pld [%1, #128] \n" 266 | "vld1.s16 {d6-d7}, [%1 :128]!\n" 267 | "vshrn.s32 d20, q10, #2 \n" 268 | "vshrn.s32 d21, q11, #2 \n" 269 | "vqmovun.s16 d20, q10 \n" 270 | "vst1.8 {d20}, [%2]! \n" 271 | "subs %3, #1 \n" 272 | "bne 0b \n" 273 | "sub %0, #16 \n" 274 | "sub %1, #16 \n" 275 | : "=r"(rows0p), // %0 276 | "=r"(rows1p), // %1 277 | "=r"(Dp), // %2 278 | "=r"(nn) // %3 279 | : "0"(rows0p), 280 | "1"(rows1p), 281 | "2"(Dp), 282 | "3"(nn), 283 | "r"(b0), // %8 284 | "r"(b1) // %9 285 | : "cc", "memory", "r4", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12"); 286 | } 287 | #endif // __aarch64__ 288 | #endif // __ARM_NEON 289 | for (; remain; --remain) 290 | { 291 | // D[x] = (rows0[x]*b0 + rows1[x]*b1) >> INTER_RESIZE_COEF_BITS; 292 | *Dp++ = (unsigned char)(((short)((b0 * (short)(*rows0p++)) >> 16) + (short)((b1 * (short)(*rows1p++)) >> 16) + 2) >> 2); 293 | } 294 | 295 | ibeta += 2; 296 | } 297 | 298 | delete[] buf; 299 | } 300 | 301 | void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride) 302 | { 303 | const int INTER_RESIZE_COEF_BITS = 11; 304 | const int INTER_RESIZE_COEF_SCALE = 1 << INTER_RESIZE_COEF_BITS; 305 | // const int ONE=INTER_RESIZE_COEF_SCALE; 306 | 307 | double scale_x = (double)srcw / w; 308 | double scale_y = (double)srch / h; 309 | 310 | int* buf = new int[w + h + w + h]; 311 | 312 | int* xofs = buf; //new int[w]; 313 | int* yofs = buf + w; //new int[h]; 314 | 315 | short* ialpha = (short*)(buf + w + h); //new short[w * 2]; 316 | short* ibeta = (short*)(buf + w + h + w); //new short[h * 2]; 317 | 318 | float fx; 319 | float fy; 320 | int sx; 321 | int sy; 322 | 323 | #define SATURATE_CAST_SHORT(X) (short)::std::min(::std::max((int)(X + (X >= 0.f ? 0.5f : -0.5f)), SHRT_MIN), SHRT_MAX); 324 | 325 | for (int dx = 0; dx < w; dx++) 326 | { 327 | fx = (float)((dx + 0.5) * scale_x - 0.5); 328 | sx = static_cast(floor(fx)); 329 | fx -= sx; 330 | 331 | if (sx < 0) 332 | { 333 | sx = 0; 334 | fx = 0.f; 335 | } 336 | if (sx >= srcw - 1) 337 | { 338 | sx = srcw - 2; 339 | fx = 1.f; 340 | } 341 | 342 | xofs[dx] = sx * 2; 343 | 344 | float a0 = (1.f - fx) * INTER_RESIZE_COEF_SCALE; 345 | float a1 = fx * INTER_RESIZE_COEF_SCALE; 346 | 347 | ialpha[dx * 2] = SATURATE_CAST_SHORT(a0); 348 | ialpha[dx * 2 + 1] = SATURATE_CAST_SHORT(a1); 349 | } 350 | 351 | for (int dy = 0; dy < h; dy++) 352 | { 353 | fy = (float)((dy + 0.5) * scale_y - 0.5); 354 | sy = static_cast(floor(fy)); 355 | fy -= sy; 356 | 357 | if (sy < 0) 358 | { 359 | sy = 0; 360 | fy = 0.f; 361 | } 362 | if (sy >= srch - 1) 363 | { 364 | sy = srch - 2; 365 | fy = 1.f; 366 | } 367 | 368 | yofs[dy] = sy; 369 | 370 | float b0 = (1.f - fy) * INTER_RESIZE_COEF_SCALE; 371 | float b1 = fy * INTER_RESIZE_COEF_SCALE; 372 | 373 | ibeta[dy * 2] = SATURATE_CAST_SHORT(b0); 374 | ibeta[dy * 2 + 1] = SATURATE_CAST_SHORT(b1); 375 | } 376 | 377 | #undef SATURATE_CAST_SHORT 378 | 379 | // loop body 380 | Mat rowsbuf0(w * 2 + 2, (size_t)2u); 381 | Mat rowsbuf1(w * 2 + 2, (size_t)2u); 382 | short* rows0 = (short*)rowsbuf0.data; 383 | short* rows1 = (short*)rowsbuf1.data; 384 | 385 | int prev_sy1 = -2; 386 | 387 | for (int dy = 0; dy < h; dy++) 388 | { 389 | sy = yofs[dy]; 390 | 391 | if (sy == prev_sy1) 392 | { 393 | // reuse all rows 394 | } 395 | else if (sy == prev_sy1 + 1) 396 | { 397 | // hresize one row 398 | short* rows0_old = rows0; 399 | rows0 = rows1; 400 | rows1 = rows0_old; 401 | const unsigned char* S1 = src + srcstride * (sy + 1); 402 | 403 | const short* ialphap = ialpha; 404 | short* rows1p = rows1; 405 | for (int dx = 0; dx < w; dx++) 406 | { 407 | sx = xofs[dx]; 408 | 409 | const unsigned char* S1p = S1 + sx; 410 | #if __ARM_NEON 411 | int16x4_t _a0a1XX = vld1_s16(ialphap); 412 | int16x4_t _a0a0a1a1 = vzip_s16(_a0a1XX, _a0a1XX).val[0]; 413 | uint8x8_t _S1 = uint8x8_t(); 414 | 415 | _S1 = vld1_lane_u8(S1p, _S1, 0); 416 | _S1 = vld1_lane_u8(S1p + 1, _S1, 1); 417 | _S1 = vld1_lane_u8(S1p + 2, _S1, 2); 418 | _S1 = vld1_lane_u8(S1p + 3, _S1, 3); 419 | 420 | int16x8_t _S116 = vreinterpretq_s16_u16(vmovl_u8(_S1)); 421 | int16x4_t _S1lowhigh = vget_low_s16(_S116); 422 | int32x4_t _S1ma0a1 = vmull_s16(_S1lowhigh, _a0a0a1a1); 423 | int32x2_t _rows1low = vadd_s32(vget_low_s32(_S1ma0a1), vget_high_s32(_S1ma0a1)); 424 | int32x4_t _rows1 = vcombine_s32(_rows1low, vget_high_s32(_S1ma0a1)); 425 | int16x4_t _rows1_sr4 = vshrn_n_s32(_rows1, 4); 426 | vst1_s16(rows1p, _rows1_sr4); 427 | #else 428 | short a0 = ialphap[0]; 429 | short a1 = ialphap[1]; 430 | 431 | rows1p[0] = (S1p[0] * a0 + S1p[2] * a1) >> 4; 432 | rows1p[1] = (S1p[1] * a0 + S1p[3] * a1) >> 4; 433 | #endif // __ARM_NEON 434 | 435 | ialphap += 2; 436 | rows1p += 2; 437 | } 438 | } 439 | else 440 | { 441 | // hresize two rows 442 | const unsigned char* S0 = src + srcstride * (sy); 443 | const unsigned char* S1 = src + srcstride * (sy + 1); 444 | 445 | const short* ialphap = ialpha; 446 | short* rows0p = rows0; 447 | short* rows1p = rows1; 448 | for (int dx = 0; dx < w; dx++) 449 | { 450 | sx = xofs[dx]; 451 | short a0 = ialphap[0]; 452 | short a1 = ialphap[1]; 453 | 454 | const unsigned char* S0p = S0 + sx; 455 | const unsigned char* S1p = S1 + sx; 456 | #if __ARM_NEON 457 | int16x4_t _a0 = vdup_n_s16(a0); 458 | int16x4_t _a1 = vdup_n_s16(a1); 459 | uint8x8_t _S0 = uint8x8_t(); 460 | uint8x8_t _S1 = uint8x8_t(); 461 | 462 | _S0 = vld1_lane_u8(S0p, _S0, 0); 463 | _S0 = vld1_lane_u8(S0p + 1, _S0, 1); 464 | _S0 = vld1_lane_u8(S0p + 2, _S0, 2); 465 | _S0 = vld1_lane_u8(S0p + 3, _S0, 3); 466 | 467 | _S1 = vld1_lane_u8(S1p, _S1, 0); 468 | _S1 = vld1_lane_u8(S1p + 1, _S1, 1); 469 | _S1 = vld1_lane_u8(S1p + 2, _S1, 2); 470 | _S1 = vld1_lane_u8(S1p + 3, _S1, 3); 471 | 472 | int16x8_t _S016 = vreinterpretq_s16_u16(vmovl_u8(_S0)); 473 | int16x8_t _S116 = vreinterpretq_s16_u16(vmovl_u8(_S1)); 474 | int16x4_t _S0lowhigh = vget_low_s16(_S016); 475 | int16x4_t _S1lowhigh = vget_low_s16(_S116); 476 | int32x2x2_t _S0S1low_S0S1high = vtrn_s32(vreinterpret_s32_s16(_S0lowhigh), vreinterpret_s32_s16(_S1lowhigh)); 477 | int32x4_t _rows01 = vmull_s16(vreinterpret_s16_s32(_S0S1low_S0S1high.val[0]), _a0); 478 | _rows01 = vmlal_s16(_rows01, vreinterpret_s16_s32(_S0S1low_S0S1high.val[1]), _a1); 479 | int16x4_t _rows01_sr4 = vshrn_n_s32(_rows01, 4); 480 | int16x4_t _rows1_sr4 = vext_s16(_rows01_sr4, _rows01_sr4, 2); 481 | vst1_s16(rows0p, _rows01_sr4); 482 | vst1_s16(rows1p, _rows1_sr4); 483 | #else 484 | rows0p[0] = (S0p[0] * a0 + S0p[2] * a1) >> 4; 485 | rows0p[1] = (S0p[1] * a0 + S0p[3] * a1) >> 4; 486 | rows1p[0] = (S1p[0] * a0 + S1p[2] * a1) >> 4; 487 | rows1p[1] = (S1p[1] * a0 + S1p[3] * a1) >> 4; 488 | #endif // __ARM_NEON 489 | 490 | ialphap += 2; 491 | rows0p += 2; 492 | rows1p += 2; 493 | } 494 | } 495 | 496 | prev_sy1 = sy; 497 | 498 | // vresize 499 | short b0 = ibeta[0]; 500 | short b1 = ibeta[1]; 501 | 502 | short* rows0p = rows0; 503 | short* rows1p = rows1; 504 | unsigned char* Dp = dst + stride * (dy); 505 | 506 | #if __ARM_NEON 507 | int nn = (w * 2) >> 3; 508 | #else 509 | int nn = 0; 510 | #endif 511 | int remain = (w * 2) - (nn << 3); 512 | 513 | #if __ARM_NEON 514 | #if __aarch64__ 515 | int16x4_t _b0 = vdup_n_s16(b0); 516 | int16x4_t _b1 = vdup_n_s16(b1); 517 | int32x4_t _v2 = vdupq_n_s32(2); 518 | for (; nn > 0; nn--) 519 | { 520 | int16x4_t _rows0p_sr4 = vld1_s16(rows0p); 521 | int16x4_t _rows1p_sr4 = vld1_s16(rows1p); 522 | int16x4_t _rows0p_1_sr4 = vld1_s16(rows0p + 4); 523 | int16x4_t _rows1p_1_sr4 = vld1_s16(rows1p + 4); 524 | 525 | int32x4_t _rows0p_sr4_mb0 = vmull_s16(_rows0p_sr4, _b0); 526 | int32x4_t _rows1p_sr4_mb1 = vmull_s16(_rows1p_sr4, _b1); 527 | int32x4_t _rows0p_1_sr4_mb0 = vmull_s16(_rows0p_1_sr4, _b0); 528 | int32x4_t _rows1p_1_sr4_mb1 = vmull_s16(_rows1p_1_sr4, _b1); 529 | 530 | int32x4_t _acc = _v2; 531 | _acc = vsraq_n_s32(_acc, _rows0p_sr4_mb0, 16); 532 | _acc = vsraq_n_s32(_acc, _rows1p_sr4_mb1, 16); 533 | 534 | int32x4_t _acc_1 = _v2; 535 | _acc_1 = vsraq_n_s32(_acc_1, _rows0p_1_sr4_mb0, 16); 536 | _acc_1 = vsraq_n_s32(_acc_1, _rows1p_1_sr4_mb1, 16); 537 | 538 | int16x4_t _acc16 = vshrn_n_s32(_acc, 2); 539 | int16x4_t _acc16_1 = vshrn_n_s32(_acc_1, 2); 540 | 541 | uint8x8_t _D = vqmovun_s16(vcombine_s16(_acc16, _acc16_1)); 542 | 543 | vst1_u8(Dp, _D); 544 | 545 | Dp += 8; 546 | rows0p += 8; 547 | rows1p += 8; 548 | } 549 | #else 550 | if (nn > 0) 551 | { 552 | asm volatile( 553 | "vdup.s16 d16, %8 \n" 554 | "mov r4, #2 \n" 555 | "vdup.s16 d17, %9 \n" 556 | "vdup.s32 q12, r4 \n" 557 | "pld [%0, #128] \n" 558 | "vld1.s16 {d2-d3}, [%0 :128]!\n" 559 | "pld [%1, #128] \n" 560 | "vld1.s16 {d6-d7}, [%1 :128]!\n" 561 | "0: \n" 562 | "vmull.s16 q0, d2, d16 \n" 563 | "vmull.s16 q1, d3, d16 \n" 564 | "vorr.s32 q10, q12, q12 \n" 565 | "vorr.s32 q11, q12, q12 \n" 566 | "vmull.s16 q2, d6, d17 \n" 567 | "vmull.s16 q3, d7, d17 \n" 568 | "vsra.s32 q10, q0, #16 \n" 569 | "vsra.s32 q11, q1, #16 \n" 570 | "pld [%0, #128] \n" 571 | "vld1.s16 {d2-d3}, [%0 :128]!\n" 572 | "vsra.s32 q10, q2, #16 \n" 573 | "vsra.s32 q11, q3, #16 \n" 574 | "pld [%1, #128] \n" 575 | "vld1.s16 {d6-d7}, [%1 :128]!\n" 576 | "vshrn.s32 d20, q10, #2 \n" 577 | "vshrn.s32 d21, q11, #2 \n" 578 | "vqmovun.s16 d20, q10 \n" 579 | "vst1.8 {d20}, [%2]! \n" 580 | "subs %3, #1 \n" 581 | "bne 0b \n" 582 | "sub %0, #16 \n" 583 | "sub %1, #16 \n" 584 | : "=r"(rows0p), // %0 585 | "=r"(rows1p), // %1 586 | "=r"(Dp), // %2 587 | "=r"(nn) // %3 588 | : "0"(rows0p), 589 | "1"(rows1p), 590 | "2"(Dp), 591 | "3"(nn), 592 | "r"(b0), // %8 593 | "r"(b1) // %9 594 | : "cc", "memory", "r4", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12"); 595 | } 596 | #endif // __aarch64__ 597 | #endif // __ARM_NEON 598 | for (; remain; --remain) 599 | { 600 | // D[x] = (rows0[x]*b0 + rows1[x]*b1) >> INTER_RESIZE_COEF_BITS; 601 | *Dp++ = (unsigned char)(((short)((b0 * (short)(*rows0p++)) >> 16) + (short)((b1 * (short)(*rows1p++)) >> 16) + 2) >> 2); 602 | } 603 | 604 | ibeta += 2; 605 | } 606 | 607 | delete[] buf; 608 | } 609 | 610 | void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride) 611 | { 612 | const int INTER_RESIZE_COEF_BITS = 11; 613 | const int INTER_RESIZE_COEF_SCALE = 1 << INTER_RESIZE_COEF_BITS; 614 | // const int ONE=INTER_RESIZE_COEF_SCALE; 615 | 616 | double scale_x = (double)srcw / w; 617 | double scale_y = (double)srch / h; 618 | 619 | int* buf = new int[w + h + w + h]; 620 | 621 | int* xofs = buf; //new int[w]; 622 | int* yofs = buf + w; //new int[h]; 623 | 624 | short* ialpha = (short*)(buf + w + h); //new short[w * 2]; 625 | short* ibeta = (short*)(buf + w + h + w); //new short[h * 2]; 626 | 627 | float fx; 628 | float fy; 629 | int sx; 630 | int sy; 631 | 632 | #define SATURATE_CAST_SHORT(X) (short)::std::min(::std::max((int)(X + (X >= 0.f ? 0.5f : -0.5f)), SHRT_MIN), SHRT_MAX); 633 | 634 | for (int dx = 0; dx < w; dx++) 635 | { 636 | fx = (float)((dx + 0.5) * scale_x - 0.5); 637 | sx = static_cast(floor(fx)); 638 | fx -= sx; 639 | 640 | if (sx < 0) 641 | { 642 | sx = 0; 643 | fx = 0.f; 644 | } 645 | if (sx >= srcw - 1) 646 | { 647 | sx = srcw - 2; 648 | fx = 1.f; 649 | } 650 | 651 | xofs[dx] = sx * 3; 652 | 653 | float a0 = (1.f - fx) * INTER_RESIZE_COEF_SCALE; 654 | float a1 = fx * INTER_RESIZE_COEF_SCALE; 655 | 656 | ialpha[dx * 2] = SATURATE_CAST_SHORT(a0); 657 | ialpha[dx * 2 + 1] = SATURATE_CAST_SHORT(a1); 658 | } 659 | 660 | for (int dy = 0; dy < h; dy++) 661 | { 662 | fy = (float)((dy + 0.5) * scale_y - 0.5); 663 | sy = static_cast(floor(fy)); 664 | fy -= sy; 665 | 666 | if (sy < 0) 667 | { 668 | sy = 0; 669 | fy = 0.f; 670 | } 671 | if (sy >= srch - 1) 672 | { 673 | sy = srch - 2; 674 | fy = 1.f; 675 | } 676 | 677 | yofs[dy] = sy; 678 | 679 | float b0 = (1.f - fy) * INTER_RESIZE_COEF_SCALE; 680 | float b1 = fy * INTER_RESIZE_COEF_SCALE; 681 | 682 | ibeta[dy * 2] = SATURATE_CAST_SHORT(b0); 683 | ibeta[dy * 2 + 1] = SATURATE_CAST_SHORT(b1); 684 | } 685 | 686 | #undef SATURATE_CAST_SHORT 687 | 688 | // loop body 689 | Mat rowsbuf0(w * 3 + 1, (size_t)2u); 690 | Mat rowsbuf1(w * 3 + 1, (size_t)2u); 691 | short* rows0 = (short*)rowsbuf0.data; 692 | short* rows1 = (short*)rowsbuf1.data; 693 | 694 | int prev_sy1 = -2; 695 | 696 | for (int dy = 0; dy < h; dy++) 697 | { 698 | sy = yofs[dy]; 699 | 700 | if (sy == prev_sy1) 701 | { 702 | // reuse all rows 703 | } 704 | else if (sy == prev_sy1 + 1) 705 | { 706 | // hresize one row 707 | short* rows0_old = rows0; 708 | rows0 = rows1; 709 | rows1 = rows0_old; 710 | const unsigned char* S1 = src + srcstride * (sy + 1); 711 | 712 | const short* ialphap = ialpha; 713 | short* rows1p = rows1; 714 | for (int dx = 0; dx < w; dx++) 715 | { 716 | sx = xofs[dx]; 717 | short a0 = ialphap[0]; 718 | short a1 = ialphap[1]; 719 | 720 | const unsigned char* S1p = S1 + sx; 721 | #if __ARM_NEON 722 | int16x4_t _a0 = vdup_n_s16(a0); 723 | int16x4_t _a1 = vdup_n_s16(a1); 724 | uint8x8_t _S1 = uint8x8_t(); 725 | 726 | _S1 = vld1_lane_u8(S1p, _S1, 0); 727 | _S1 = vld1_lane_u8(S1p + 1, _S1, 1); 728 | _S1 = vld1_lane_u8(S1p + 2, _S1, 2); 729 | _S1 = vld1_lane_u8(S1p + 3, _S1, 3); 730 | _S1 = vld1_lane_u8(S1p + 4, _S1, 4); 731 | _S1 = vld1_lane_u8(S1p + 5, _S1, 5); 732 | 733 | int16x8_t _S116 = vreinterpretq_s16_u16(vmovl_u8(_S1)); 734 | int16x4_t _S1low = vget_low_s16(_S116); 735 | int16x4_t _S1high = vext_s16(_S1low, vget_high_s16(_S116), 3); 736 | int32x4_t _rows1 = vmull_s16(_S1low, _a0); 737 | _rows1 = vmlal_s16(_rows1, _S1high, _a1); 738 | int16x4_t _rows1_sr4 = vshrn_n_s32(_rows1, 4); 739 | vst1_s16(rows1p, _rows1_sr4); 740 | #else 741 | rows1p[0] = (S1p[0] * a0 + S1p[3] * a1) >> 4; 742 | rows1p[1] = (S1p[1] * a0 + S1p[4] * a1) >> 4; 743 | rows1p[2] = (S1p[2] * a0 + S1p[5] * a1) >> 4; 744 | #endif // __ARM_NEON 745 | 746 | ialphap += 2; 747 | rows1p += 3; 748 | } 749 | } 750 | else 751 | { 752 | // hresize two rows 753 | const unsigned char* S0 = src + srcstride * (sy); 754 | const unsigned char* S1 = src + srcstride * (sy + 1); 755 | 756 | const short* ialphap = ialpha; 757 | short* rows0p = rows0; 758 | short* rows1p = rows1; 759 | for (int dx = 0; dx < w; dx++) 760 | { 761 | sx = xofs[dx]; 762 | short a0 = ialphap[0]; 763 | short a1 = ialphap[1]; 764 | 765 | const unsigned char* S0p = S0 + sx; 766 | const unsigned char* S1p = S1 + sx; 767 | #if __ARM_NEON 768 | int16x4_t _a0 = vdup_n_s16(a0); 769 | int16x4_t _a1 = vdup_n_s16(a1); 770 | uint8x8_t _S0 = uint8x8_t(); 771 | uint8x8_t _S1 = uint8x8_t(); 772 | 773 | _S0 = vld1_lane_u8(S0p, _S0, 0); 774 | _S0 = vld1_lane_u8(S0p + 1, _S0, 1); 775 | _S0 = vld1_lane_u8(S0p + 2, _S0, 2); 776 | _S0 = vld1_lane_u8(S0p + 3, _S0, 3); 777 | _S0 = vld1_lane_u8(S0p + 4, _S0, 4); 778 | _S0 = vld1_lane_u8(S0p + 5, _S0, 5); 779 | 780 | _S1 = vld1_lane_u8(S1p, _S1, 0); 781 | _S1 = vld1_lane_u8(S1p + 1, _S1, 1); 782 | _S1 = vld1_lane_u8(S1p + 2, _S1, 2); 783 | _S1 = vld1_lane_u8(S1p + 3, _S1, 3); 784 | _S1 = vld1_lane_u8(S1p + 4, _S1, 4); 785 | _S1 = vld1_lane_u8(S1p + 5, _S1, 5); 786 | 787 | int16x8_t _S016 = vreinterpretq_s16_u16(vmovl_u8(_S0)); 788 | int16x8_t _S116 = vreinterpretq_s16_u16(vmovl_u8(_S1)); 789 | int16x4_t _S0low = vget_low_s16(_S016); 790 | int16x4_t _S1low = vget_low_s16(_S116); 791 | int16x4_t _S0high = vext_s16(_S0low, vget_high_s16(_S016), 3); 792 | int16x4_t _S1high = vext_s16(_S1low, vget_high_s16(_S116), 3); 793 | int32x4_t _rows0 = vmull_s16(_S0low, _a0); 794 | int32x4_t _rows1 = vmull_s16(_S1low, _a0); 795 | _rows0 = vmlal_s16(_rows0, _S0high, _a1); 796 | _rows1 = vmlal_s16(_rows1, _S1high, _a1); 797 | int16x4_t _rows0_sr4 = vshrn_n_s32(_rows0, 4); 798 | int16x4_t _rows1_sr4 = vshrn_n_s32(_rows1, 4); 799 | vst1_s16(rows0p, _rows0_sr4); 800 | vst1_s16(rows1p, _rows1_sr4); 801 | #else 802 | rows0p[0] = (S0p[0] * a0 + S0p[3] * a1) >> 4; 803 | rows0p[1] = (S0p[1] * a0 + S0p[4] * a1) >> 4; 804 | rows0p[2] = (S0p[2] * a0 + S0p[5] * a1) >> 4; 805 | rows1p[0] = (S1p[0] * a0 + S1p[3] * a1) >> 4; 806 | rows1p[1] = (S1p[1] * a0 + S1p[4] * a1) >> 4; 807 | rows1p[2] = (S1p[2] * a0 + S1p[5] * a1) >> 4; 808 | #endif // __ARM_NEON 809 | 810 | ialphap += 2; 811 | rows0p += 3; 812 | rows1p += 3; 813 | } 814 | } 815 | 816 | prev_sy1 = sy; 817 | 818 | // vresize 819 | short b0 = ibeta[0]; 820 | short b1 = ibeta[1]; 821 | 822 | short* rows0p = rows0; 823 | short* rows1p = rows1; 824 | unsigned char* Dp = dst + stride * (dy); 825 | 826 | #if __ARM_NEON 827 | int nn = (w * 3) >> 3; 828 | #else 829 | int nn = 0; 830 | #endif 831 | int remain = (w * 3) - (nn << 3); 832 | 833 | #if __ARM_NEON 834 | #if __aarch64__ 835 | int16x4_t _b0 = vdup_n_s16(b0); 836 | int16x4_t _b1 = vdup_n_s16(b1); 837 | int32x4_t _v2 = vdupq_n_s32(2); 838 | for (; nn > 0; nn--) 839 | { 840 | int16x4_t _rows0p_sr4 = vld1_s16(rows0p); 841 | int16x4_t _rows1p_sr4 = vld1_s16(rows1p); 842 | int16x4_t _rows0p_1_sr4 = vld1_s16(rows0p + 4); 843 | int16x4_t _rows1p_1_sr4 = vld1_s16(rows1p + 4); 844 | 845 | int32x4_t _rows0p_sr4_mb0 = vmull_s16(_rows0p_sr4, _b0); 846 | int32x4_t _rows1p_sr4_mb1 = vmull_s16(_rows1p_sr4, _b1); 847 | int32x4_t _rows0p_1_sr4_mb0 = vmull_s16(_rows0p_1_sr4, _b0); 848 | int32x4_t _rows1p_1_sr4_mb1 = vmull_s16(_rows1p_1_sr4, _b1); 849 | 850 | int32x4_t _acc = _v2; 851 | _acc = vsraq_n_s32(_acc, _rows0p_sr4_mb0, 16); 852 | _acc = vsraq_n_s32(_acc, _rows1p_sr4_mb1, 16); 853 | 854 | int32x4_t _acc_1 = _v2; 855 | _acc_1 = vsraq_n_s32(_acc_1, _rows0p_1_sr4_mb0, 16); 856 | _acc_1 = vsraq_n_s32(_acc_1, _rows1p_1_sr4_mb1, 16); 857 | 858 | int16x4_t _acc16 = vshrn_n_s32(_acc, 2); 859 | int16x4_t _acc16_1 = vshrn_n_s32(_acc_1, 2); 860 | 861 | uint8x8_t _D = vqmovun_s16(vcombine_s16(_acc16, _acc16_1)); 862 | 863 | vst1_u8(Dp, _D); 864 | 865 | Dp += 8; 866 | rows0p += 8; 867 | rows1p += 8; 868 | } 869 | #else 870 | if (nn > 0) 871 | { 872 | asm volatile( 873 | "vdup.s16 d16, %8 \n" 874 | "mov r4, #2 \n" 875 | "vdup.s16 d17, %9 \n" 876 | "vdup.s32 q12, r4 \n" 877 | "pld [%0, #128] \n" 878 | "vld1.s16 {d2-d3}, [%0 :128]!\n" 879 | "pld [%1, #128] \n" 880 | "vld1.s16 {d6-d7}, [%1 :128]!\n" 881 | "0: \n" 882 | "vmull.s16 q0, d2, d16 \n" 883 | "vmull.s16 q1, d3, d16 \n" 884 | "vorr.s32 q10, q12, q12 \n" 885 | "vorr.s32 q11, q12, q12 \n" 886 | "vmull.s16 q2, d6, d17 \n" 887 | "vmull.s16 q3, d7, d17 \n" 888 | "vsra.s32 q10, q0, #16 \n" 889 | "vsra.s32 q11, q1, #16 \n" 890 | "pld [%0, #128] \n" 891 | "vld1.s16 {d2-d3}, [%0 :128]!\n" 892 | "vsra.s32 q10, q2, #16 \n" 893 | "vsra.s32 q11, q3, #16 \n" 894 | "pld [%1, #128] \n" 895 | "vld1.s16 {d6-d7}, [%1 :128]!\n" 896 | "vshrn.s32 d20, q10, #2 \n" 897 | "vshrn.s32 d21, q11, #2 \n" 898 | "vqmovun.s16 d20, q10 \n" 899 | "vst1.8 {d20}, [%2]! \n" 900 | "subs %3, #1 \n" 901 | "bne 0b \n" 902 | "sub %0, #16 \n" 903 | "sub %1, #16 \n" 904 | : "=r"(rows0p), // %0 905 | "=r"(rows1p), // %1 906 | "=r"(Dp), // %2 907 | "=r"(nn) // %3 908 | : "0"(rows0p), 909 | "1"(rows1p), 910 | "2"(Dp), 911 | "3"(nn), 912 | "r"(b0), // %8 913 | "r"(b1) // %9 914 | : "cc", "memory", "r4", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12"); 915 | } 916 | #endif // __aarch64__ 917 | #endif // __ARM_NEON 918 | for (; remain; --remain) 919 | { 920 | // D[x] = (rows0[x]*b0 + rows1[x]*b1) >> INTER_RESIZE_COEF_BITS; 921 | *Dp++ = (unsigned char)(((short)((b0 * (short)(*rows0p++)) >> 16) + (short)((b1 * (short)(*rows1p++)) >> 16) + 2) >> 2); 922 | } 923 | 924 | ibeta += 2; 925 | } 926 | 927 | delete[] buf; 928 | } 929 | 930 | void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride) 931 | { 932 | const int INTER_RESIZE_COEF_BITS = 11; 933 | const int INTER_RESIZE_COEF_SCALE = 1 << INTER_RESIZE_COEF_BITS; 934 | // const int ONE=INTER_RESIZE_COEF_SCALE; 935 | 936 | double scale_x = (double)srcw / w; 937 | double scale_y = (double)srch / h; 938 | 939 | int* buf = new int[w + h + w + h]; 940 | 941 | int* xofs = buf; //new int[w]; 942 | int* yofs = buf + w; //new int[h]; 943 | 944 | short* ialpha = (short*)(buf + w + h); //new short[w * 2]; 945 | short* ibeta = (short*)(buf + w + h + w); //new short[h * 2]; 946 | 947 | float fx; 948 | float fy; 949 | int sx; 950 | int sy; 951 | 952 | #define SATURATE_CAST_SHORT(X) (short)::std::min(::std::max((int)(X + (X >= 0.f ? 0.5f : -0.5f)), SHRT_MIN), SHRT_MAX); 953 | 954 | for (int dx = 0; dx < w; dx++) 955 | { 956 | fx = (float)((dx + 0.5) * scale_x - 0.5); 957 | sx = static_cast(floor(fx)); 958 | fx -= sx; 959 | 960 | if (sx < 0) 961 | { 962 | sx = 0; 963 | fx = 0.f; 964 | } 965 | if (sx >= srcw - 1) 966 | { 967 | sx = srcw - 2; 968 | fx = 1.f; 969 | } 970 | 971 | xofs[dx] = sx * 4; 972 | 973 | float a0 = (1.f - fx) * INTER_RESIZE_COEF_SCALE; 974 | float a1 = fx * INTER_RESIZE_COEF_SCALE; 975 | 976 | ialpha[dx * 2] = SATURATE_CAST_SHORT(a0); 977 | ialpha[dx * 2 + 1] = SATURATE_CAST_SHORT(a1); 978 | } 979 | 980 | for (int dy = 0; dy < h; dy++) 981 | { 982 | fy = (float)((dy + 0.5) * scale_y - 0.5); 983 | sy = static_cast(floor(fy)); 984 | fy -= sy; 985 | 986 | if (sy < 0) 987 | { 988 | sy = 0; 989 | fy = 0.f; 990 | } 991 | if (sy >= srch - 1) 992 | { 993 | sy = srch - 2; 994 | fy = 1.f; 995 | } 996 | 997 | yofs[dy] = sy; 998 | 999 | float b0 = (1.f - fy) * INTER_RESIZE_COEF_SCALE; 1000 | float b1 = fy * INTER_RESIZE_COEF_SCALE; 1001 | 1002 | ibeta[dy * 2] = SATURATE_CAST_SHORT(b0); 1003 | ibeta[dy * 2 + 1] = SATURATE_CAST_SHORT(b1); 1004 | } 1005 | 1006 | #undef SATURATE_CAST_SHORT 1007 | 1008 | // loop body 1009 | Mat rowsbuf0(w * 4, (size_t)2u); 1010 | Mat rowsbuf1(w * 4, (size_t)2u); 1011 | short* rows0 = (short*)rowsbuf0.data; 1012 | short* rows1 = (short*)rowsbuf1.data; 1013 | 1014 | int prev_sy1 = -2; 1015 | 1016 | for (int dy = 0; dy < h; dy++) 1017 | { 1018 | sy = yofs[dy]; 1019 | 1020 | if (sy == prev_sy1) 1021 | { 1022 | // reuse all rows 1023 | } 1024 | else if (sy == prev_sy1 + 1) 1025 | { 1026 | // hresize one row 1027 | short* rows0_old = rows0; 1028 | rows0 = rows1; 1029 | rows1 = rows0_old; 1030 | const unsigned char* S1 = src + srcstride * (sy + 1); 1031 | 1032 | const short* ialphap = ialpha; 1033 | short* rows1p = rows1; 1034 | for (int dx = 0; dx < w; dx++) 1035 | { 1036 | sx = xofs[dx]; 1037 | short a0 = ialphap[0]; 1038 | short a1 = ialphap[1]; 1039 | 1040 | const unsigned char* S1p = S1 + sx; 1041 | #if __ARM_NEON 1042 | int16x4_t _a0 = vdup_n_s16(a0); 1043 | int16x4_t _a1 = vdup_n_s16(a1); 1044 | uint8x8_t _S1 = vld1_u8(S1p); 1045 | int16x8_t _S116 = vreinterpretq_s16_u16(vmovl_u8(_S1)); 1046 | int16x4_t _S1low = vget_low_s16(_S116); 1047 | int16x4_t _S1high = vget_high_s16(_S116); 1048 | int32x4_t _rows1 = vmull_s16(_S1low, _a0); 1049 | _rows1 = vmlal_s16(_rows1, _S1high, _a1); 1050 | int16x4_t _rows1_sr4 = vshrn_n_s32(_rows1, 4); 1051 | vst1_s16(rows1p, _rows1_sr4); 1052 | #else 1053 | rows1p[0] = (S1p[0] * a0 + S1p[4] * a1) >> 4; 1054 | rows1p[1] = (S1p[1] * a0 + S1p[5] * a1) >> 4; 1055 | rows1p[2] = (S1p[2] * a0 + S1p[6] * a1) >> 4; 1056 | rows1p[3] = (S1p[3] * a0 + S1p[7] * a1) >> 4; 1057 | #endif // __ARM_NEON 1058 | 1059 | ialphap += 2; 1060 | rows1p += 4; 1061 | } 1062 | } 1063 | else 1064 | { 1065 | // hresize two rows 1066 | const unsigned char* S0 = src + srcstride * (sy); 1067 | const unsigned char* S1 = src + srcstride * (sy + 1); 1068 | 1069 | const short* ialphap = ialpha; 1070 | short* rows0p = rows0; 1071 | short* rows1p = rows1; 1072 | for (int dx = 0; dx < w; dx++) 1073 | { 1074 | sx = xofs[dx]; 1075 | short a0 = ialphap[0]; 1076 | short a1 = ialphap[1]; 1077 | 1078 | const unsigned char* S0p = S0 + sx; 1079 | const unsigned char* S1p = S1 + sx; 1080 | #if __ARM_NEON 1081 | int16x4_t _a0 = vdup_n_s16(a0); 1082 | int16x4_t _a1 = vdup_n_s16(a1); 1083 | uint8x8_t _S0 = vld1_u8(S0p); 1084 | uint8x8_t _S1 = vld1_u8(S1p); 1085 | int16x8_t _S016 = vreinterpretq_s16_u16(vmovl_u8(_S0)); 1086 | int16x8_t _S116 = vreinterpretq_s16_u16(vmovl_u8(_S1)); 1087 | int16x4_t _S0low = vget_low_s16(_S016); 1088 | int16x4_t _S1low = vget_low_s16(_S116); 1089 | int16x4_t _S0high = vget_high_s16(_S016); 1090 | int16x4_t _S1high = vget_high_s16(_S116); 1091 | int32x4_t _rows0 = vmull_s16(_S0low, _a0); 1092 | int32x4_t _rows1 = vmull_s16(_S1low, _a0); 1093 | _rows0 = vmlal_s16(_rows0, _S0high, _a1); 1094 | _rows1 = vmlal_s16(_rows1, _S1high, _a1); 1095 | int16x4_t _rows0_sr4 = vshrn_n_s32(_rows0, 4); 1096 | int16x4_t _rows1_sr4 = vshrn_n_s32(_rows1, 4); 1097 | vst1_s16(rows0p, _rows0_sr4); 1098 | vst1_s16(rows1p, _rows1_sr4); 1099 | #else 1100 | rows0p[0] = (S0p[0] * a0 + S0p[4] * a1) >> 4; 1101 | rows0p[1] = (S0p[1] * a0 + S0p[5] * a1) >> 4; 1102 | rows0p[2] = (S0p[2] * a0 + S0p[6] * a1) >> 4; 1103 | rows0p[3] = (S0p[3] * a0 + S0p[7] * a1) >> 4; 1104 | rows1p[0] = (S1p[0] * a0 + S1p[4] * a1) >> 4; 1105 | rows1p[1] = (S1p[1] * a0 + S1p[5] * a1) >> 4; 1106 | rows1p[2] = (S1p[2] * a0 + S1p[6] * a1) >> 4; 1107 | rows1p[3] = (S1p[3] * a0 + S1p[7] * a1) >> 4; 1108 | #endif // __ARM_NEON 1109 | 1110 | ialphap += 2; 1111 | rows0p += 4; 1112 | rows1p += 4; 1113 | } 1114 | } 1115 | 1116 | prev_sy1 = sy; 1117 | 1118 | // vresize 1119 | short b0 = ibeta[0]; 1120 | short b1 = ibeta[1]; 1121 | 1122 | short* rows0p = rows0; 1123 | short* rows1p = rows1; 1124 | unsigned char* Dp = dst + stride * (dy); 1125 | 1126 | #if __ARM_NEON 1127 | int nn = (w * 4) >> 3; 1128 | #else 1129 | int nn = 0; 1130 | #endif 1131 | int remain = (w * 4) - (nn << 3); 1132 | 1133 | #if __ARM_NEON 1134 | #if __aarch64__ 1135 | int16x4_t _b0 = vdup_n_s16(b0); 1136 | int16x4_t _b1 = vdup_n_s16(b1); 1137 | int32x4_t _v2 = vdupq_n_s32(2); 1138 | for (; nn > 0; nn--) 1139 | { 1140 | int16x4_t _rows0p_sr4 = vld1_s16(rows0p); 1141 | int16x4_t _rows1p_sr4 = vld1_s16(rows1p); 1142 | int16x4_t _rows0p_1_sr4 = vld1_s16(rows0p + 4); 1143 | int16x4_t _rows1p_1_sr4 = vld1_s16(rows1p + 4); 1144 | 1145 | int32x4_t _rows0p_sr4_mb0 = vmull_s16(_rows0p_sr4, _b0); 1146 | int32x4_t _rows1p_sr4_mb1 = vmull_s16(_rows1p_sr4, _b1); 1147 | int32x4_t _rows0p_1_sr4_mb0 = vmull_s16(_rows0p_1_sr4, _b0); 1148 | int32x4_t _rows1p_1_sr4_mb1 = vmull_s16(_rows1p_1_sr4, _b1); 1149 | 1150 | int32x4_t _acc = _v2; 1151 | _acc = vsraq_n_s32(_acc, _rows0p_sr4_mb0, 16); 1152 | _acc = vsraq_n_s32(_acc, _rows1p_sr4_mb1, 16); 1153 | 1154 | int32x4_t _acc_1 = _v2; 1155 | _acc_1 = vsraq_n_s32(_acc_1, _rows0p_1_sr4_mb0, 16); 1156 | _acc_1 = vsraq_n_s32(_acc_1, _rows1p_1_sr4_mb1, 16); 1157 | 1158 | int16x4_t _acc16 = vshrn_n_s32(_acc, 2); 1159 | int16x4_t _acc16_1 = vshrn_n_s32(_acc_1, 2); 1160 | 1161 | uint8x8_t _D = vqmovun_s16(vcombine_s16(_acc16, _acc16_1)); 1162 | 1163 | vst1_u8(Dp, _D); 1164 | 1165 | Dp += 8; 1166 | rows0p += 8; 1167 | rows1p += 8; 1168 | } 1169 | #else 1170 | if (nn > 0) 1171 | { 1172 | asm volatile( 1173 | "vdup.s16 d16, %8 \n" 1174 | "mov r4, #2 \n" 1175 | "vdup.s16 d17, %9 \n" 1176 | "vdup.s32 q12, r4 \n" 1177 | "pld [%0, #128] \n" 1178 | "vld1.s16 {d2-d3}, [%0 :128]!\n" 1179 | "pld [%1, #128] \n" 1180 | "vld1.s16 {d6-d7}, [%1 :128]!\n" 1181 | "0: \n" 1182 | "vmull.s16 q0, d2, d16 \n" 1183 | "vmull.s16 q1, d3, d16 \n" 1184 | "vorr.s32 q10, q12, q12 \n" 1185 | "vorr.s32 q11, q12, q12 \n" 1186 | "vmull.s16 q2, d6, d17 \n" 1187 | "vmull.s16 q3, d7, d17 \n" 1188 | "vsra.s32 q10, q0, #16 \n" 1189 | "vsra.s32 q11, q1, #16 \n" 1190 | "pld [%0, #128] \n" 1191 | "vld1.s16 {d2-d3}, [%0 :128]!\n" 1192 | "vsra.s32 q10, q2, #16 \n" 1193 | "vsra.s32 q11, q3, #16 \n" 1194 | "pld [%1, #128] \n" 1195 | "vld1.s16 {d6-d7}, [%1 :128]!\n" 1196 | "vshrn.s32 d20, q10, #2 \n" 1197 | "vshrn.s32 d21, q11, #2 \n" 1198 | "vqmovun.s16 d20, q10 \n" 1199 | "vst1.8 {d20}, [%2]! \n" 1200 | "subs %3, #1 \n" 1201 | "bne 0b \n" 1202 | "sub %0, #16 \n" 1203 | "sub %1, #16 \n" 1204 | : "=r"(rows0p), // %0 1205 | "=r"(rows1p), // %1 1206 | "=r"(Dp), // %2 1207 | "=r"(nn) // %3 1208 | : "0"(rows0p), 1209 | "1"(rows1p), 1210 | "2"(Dp), 1211 | "3"(nn), 1212 | "r"(b0), // %8 1213 | "r"(b1) // %9 1214 | : "cc", "memory", "r4", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12"); 1215 | } 1216 | #endif // __aarch64__ 1217 | #endif // __ARM_NEON 1218 | for (; remain; --remain) 1219 | { 1220 | // D[x] = (rows0[x]*b0 + rows1[x]*b1) >> INTER_RESIZE_COEF_BITS; 1221 | *Dp++ = (unsigned char)(((short)((b0 * (short)(*rows0p++)) >> 16) + (short)((b1 * (short)(*rows1p++)) >> 16) + 2) >> 2); 1222 | } 1223 | 1224 | ibeta += 2; 1225 | } 1226 | 1227 | delete[] buf; 1228 | } 1229 | 1230 | void resize_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h) 1231 | { 1232 | // assert srcw % 2 == 0 1233 | // assert srch % 2 == 0 1234 | // assert w % 2 == 0 1235 | // assert h % 2 == 0 1236 | 1237 | const unsigned char* srcY = src; 1238 | unsigned char* dstY = dst; 1239 | resize_bilinear_c1(srcY, srcw, srch, dstY, w, h); 1240 | 1241 | const unsigned char* srcUV = src + srcw * srch; 1242 | unsigned char* dstUV = dst + w * h; 1243 | resize_bilinear_c2(srcUV, srcw / 2, srch / 2, dstUV, w / 2, h / 2); 1244 | } 1245 | #endif // NCNN_PIXEL 1246 | 1247 | } // namespace ncnn 1248 | -------------------------------------------------------------------------------- /src/platform.h: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn 2 | // available. 3 | // 4 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 5 | // 6 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this 7 | // file except in compliance with the License. You may obtain a copy of the 8 | // License at 9 | // 10 | // https://opensource.org/licenses/BSD-3-Clause 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 15 | // License for the specific language governing permissions and limitations under 16 | // the License. 17 | 18 | #ifndef NCNN_PLATFORM_H 19 | #define NCNN_PLATFORM_H 20 | 21 | #define NCNN_STDIO 1 22 | #define NCNN_STRING 1 23 | #define NCNN_SIMPLEOCV 0 24 | #define NCNN_SIMPLEOMP 1 25 | #define NCNN_SIMPLESTL 0 26 | #define NCNN_THREADS 1 27 | #define NCNN_BENCHMARK 0 28 | #define NCNN_C_API 1 29 | #define NCNN_PLATFORM_API 1 30 | #define NCNN_PIXEL 1 31 | #define NCNN_PIXEL_ROTATE 1 32 | #define NCNN_PIXEL_AFFINE 1 33 | #define NCNN_PIXEL_DRAWING 1 34 | #define NCNN_VULKAN 0 35 | #define NCNN_SYSTEM_GLSLANG 0 36 | #define NCNN_RUNTIME_CPU 1 37 | #define NCNN_AVX 0 38 | #define NCNN_XOP 0 39 | #define NCNN_FMA 0 40 | #define NCNN_F16C 0 41 | #define NCNN_AVX2 0 42 | #define NCNN_AVXVNNI 0 43 | #define NCNN_AVX512 0 44 | #define NCNN_AVX512VNNI 0 45 | #define NCNN_AVX512BF16 0 46 | #define NCNN_AVX512FP16 0 47 | #define NCNN_VFPV4 1 48 | #if __aarch64__ 49 | #define NCNN_ARM82 1 50 | #define NCNN_ARM82DOT 1 51 | #define NCNN_ARM82FP16FML 1 52 | #define NCNN_ARM84BF16 1 53 | #define NCNN_ARM84I8MM 1 54 | #define NCNN_ARM86SVE 1 55 | #define NCNN_ARM86SVE2 1 56 | #define NCNN_ARM86SVEBF16 1 57 | #define NCNN_ARM86SVEI8MM 1 58 | #define NCNN_ARM86SVEF32MM 1 59 | #endif // __aarch64__ 60 | #define NCNN_MSA 0 61 | #define NCNN_LSX 0 62 | #define NCNN_MMI 0 63 | #define NCNN_RVV 0 64 | #define NCNN_INT8 1 65 | #define NCNN_BF16 1 66 | #define NCNN_FORCE_INLINE 1 67 | 68 | #define NCNN_VERSION_STRING "1.0.22.12.13" 69 | 70 | #ifdef NCNN_STATIC_DEFINE 71 | #define NCNN_EXPORT 72 | #define NCNN_NO_EXPORT 73 | #else 74 | #ifndef NCNN_EXPORT 75 | #ifdef ncnn_EXPORTS 76 | /* We are building this library */ 77 | #define NCNN_EXPORT 78 | #else 79 | /* We are using this library */ 80 | #define NCNN_EXPORT 81 | #endif 82 | #endif 83 | 84 | #ifndef NCNN_NO_EXPORT 85 | #define NCNN_NO_EXPORT 86 | #endif 87 | #endif 88 | 89 | #ifndef NCNN_DEPRECATED 90 | #define NCNN_DEPRECATED __attribute__((__deprecated__)) 91 | #endif 92 | 93 | #ifndef NCNN_DEPRECATED_EXPORT 94 | #define NCNN_DEPRECATED_EXPORT NCNN_EXPORT NCNN_DEPRECATED 95 | #endif 96 | 97 | #ifndef NCNN_DEPRECATED_NO_EXPORT 98 | #define NCNN_DEPRECATED_NO_EXPORT NCNN_NO_EXPORT NCNN_DEPRECATED 99 | #endif 100 | 101 | #if 0 /* DEFINE_NO_DEPRECATED */ 102 | #ifndef NCNN_NO_DEPRECATED 103 | #define NCNN_NO_DEPRECATED 104 | #endif 105 | #endif 106 | 107 | #ifdef __cplusplus 108 | 109 | #if NCNN_THREADS 110 | #if (defined _WIN32 && !(defined __MINGW32__)) 111 | #define WIN32_LEAN_AND_MEAN 112 | #include 113 | #include 114 | #else 115 | #include 116 | #endif 117 | #endif // NCNN_THREADS 118 | 119 | #if __ANDROID_API__ >= 26 120 | #define VK_USE_PLATFORM_ANDROID_KHR 121 | #endif // __ANDROID_API__ >= 26 122 | 123 | namespace sim { 124 | 125 | #if NCNN_THREADS 126 | #if (defined _WIN32 && !(defined __MINGW32__)) 127 | class NCNN_EXPORT Mutex { 128 | public: 129 | Mutex() { InitializeSRWLock(&srwlock); } 130 | ~Mutex() {} 131 | void lock() { AcquireSRWLockExclusive(&srwlock); } 132 | void unlock() { ReleaseSRWLockExclusive(&srwlock); } 133 | 134 | private: 135 | friend class ConditionVariable; 136 | // NOTE SRWLock is available from windows vista 137 | SRWLOCK srwlock; 138 | }; 139 | 140 | class NCNN_EXPORT ConditionVariable { 141 | public: 142 | ConditionVariable() { InitializeConditionVariable(&condvar); } 143 | ~ConditionVariable() {} 144 | void wait(Mutex &mutex) { 145 | SleepConditionVariableSRW(&condvar, &mutex.srwlock, INFINITE, 0); 146 | } 147 | void broadcast() { WakeAllConditionVariable(&condvar); } 148 | void signal() { WakeConditionVariable(&condvar); } 149 | 150 | private: 151 | CONDITION_VARIABLE condvar; 152 | }; 153 | 154 | static unsigned __stdcall start_wrapper(void *args); 155 | class NCNN_EXPORT Thread { 156 | public: 157 | Thread(void *(*start)(void *), void *args = 0) { 158 | _start = start; 159 | _args = args; 160 | handle = (HANDLE)_beginthreadex(0, 0, start_wrapper, this, 0, 0); 161 | } 162 | ~Thread() {} 163 | void join() { 164 | WaitForSingleObject(handle, INFINITE); 165 | CloseHandle(handle); 166 | } 167 | 168 | private: 169 | friend unsigned __stdcall start_wrapper(void *args) { 170 | Thread *t = (Thread *)args; 171 | t->_start(t->_args); 172 | return 0; 173 | } 174 | HANDLE handle; 175 | void *(*_start)(void *); 176 | void *_args; 177 | }; 178 | 179 | class NCNN_EXPORT ThreadLocalStorage { 180 | public: 181 | ThreadLocalStorage() { key = TlsAlloc(); } 182 | ~ThreadLocalStorage() { TlsFree(key); } 183 | void set(void *value) { TlsSetValue(key, (LPVOID)value); } 184 | void *get() { return (void *)TlsGetValue(key); } 185 | 186 | private: 187 | DWORD key; 188 | }; 189 | #else // (defined _WIN32 && !(defined __MINGW32__)) 190 | class NCNN_EXPORT Mutex { 191 | public: 192 | Mutex() { pthread_mutex_init(&mutex, 0); } 193 | ~Mutex() { pthread_mutex_destroy(&mutex); } 194 | void lock() { pthread_mutex_lock(&mutex); } 195 | void unlock() { pthread_mutex_unlock(&mutex); } 196 | 197 | private: 198 | friend class ConditionVariable; 199 | pthread_mutex_t mutex; 200 | }; 201 | 202 | class NCNN_EXPORT ConditionVariable { 203 | public: 204 | ConditionVariable() { pthread_cond_init(&cond, 0); } 205 | ~ConditionVariable() { pthread_cond_destroy(&cond); } 206 | void wait(Mutex &mutex) { pthread_cond_wait(&cond, &mutex.mutex); } 207 | void broadcast() { pthread_cond_broadcast(&cond); } 208 | void signal() { pthread_cond_signal(&cond); } 209 | 210 | private: 211 | pthread_cond_t cond; 212 | }; 213 | 214 | class NCNN_EXPORT Thread { 215 | public: 216 | Thread(void *(*start)(void *), void *args = 0) { 217 | pthread_create(&t, 0, start, args); 218 | } 219 | ~Thread() {} 220 | void join() { pthread_join(t, 0); } 221 | 222 | private: 223 | pthread_t t; 224 | }; 225 | 226 | class NCNN_EXPORT ThreadLocalStorage { 227 | public: 228 | ThreadLocalStorage() { pthread_key_create(&key, 0); } 229 | ~ThreadLocalStorage() { pthread_key_delete(key); } 230 | void set(void *value) { pthread_setspecific(key, value); } 231 | void *get() { return pthread_getspecific(key); } 232 | 233 | private: 234 | pthread_key_t key; 235 | }; 236 | #endif // (defined _WIN32 && !(defined __MINGW32__)) 237 | #else // NCNN_THREADS 238 | class NCNN_EXPORT Mutex { 239 | public: 240 | Mutex() {} 241 | ~Mutex() {} 242 | void lock() {} 243 | void unlock() {} 244 | }; 245 | 246 | class NCNN_EXPORT ConditionVariable { 247 | public: 248 | ConditionVariable() {} 249 | ~ConditionVariable() {} 250 | void wait(Mutex & /*mutex*/) {} 251 | void broadcast() {} 252 | void signal() {} 253 | }; 254 | 255 | class NCNN_EXPORT Thread { 256 | public: 257 | Thread(void *(*/*start*/)(void *), void * /*args*/ = 0) {} 258 | ~Thread() {} 259 | void join() {} 260 | }; 261 | 262 | class NCNN_EXPORT ThreadLocalStorage { 263 | public: 264 | ThreadLocalStorage() { data = 0; } 265 | ~ThreadLocalStorage() {} 266 | void set(void *value) { data = value; } 267 | void *get() { return data; } 268 | 269 | private: 270 | void *data; 271 | }; 272 | #endif // NCNN_THREADS 273 | 274 | class NCNN_EXPORT MutexLockGuard { 275 | public: 276 | MutexLockGuard(Mutex &_mutex) : mutex(_mutex) { mutex.lock(); } 277 | ~MutexLockGuard() { mutex.unlock(); } 278 | 279 | private: 280 | Mutex &mutex; 281 | }; 282 | 283 | } // namespace ncnn 284 | 285 | #if NCNN_SIMPLESTL 286 | #include "simplestl.h" 287 | #else 288 | #include 289 | #include 290 | #include 291 | #include 292 | #endif 293 | 294 | #endif // __cplusplus 295 | 296 | #if NCNN_STDIO 297 | #if NCNN_PLATFORM_API && __ANDROID_API__ >= 8 298 | #include 299 | #define NCNN_LOGE(...) \ 300 | do { \ 301 | fprintf(stderr, ##__VA_ARGS__); \ 302 | fprintf(stderr, "\n"); \ 303 | __android_log_print(ANDROID_LOG_WARN, "ncnn", ##__VA_ARGS__); \ 304 | } while (0) 305 | #else // NCNN_PLATFORM_API && __ANDROID_API__ >= 8 306 | #include 307 | #define NCNN_LOGE(...) \ 308 | do { \ 309 | fprintf(stderr, ##__VA_ARGS__); \ 310 | fprintf(stderr, "\n"); \ 311 | } while (0) 312 | #endif // NCNN_PLATFORM_API && __ANDROID_API__ >= 8 313 | #else 314 | #define NCNN_LOGE(...) 315 | #endif 316 | 317 | #if NCNN_FORCE_INLINE 318 | #ifdef _MSC_VER 319 | #define NCNN_FORCEINLINE __forceinline 320 | #elif defined(__GNUC__) 321 | #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) 322 | #elif defined(__CLANG__) 323 | #if __has_attribute(__always_inline__) 324 | #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) 325 | #else 326 | #define NCNN_FORCEINLINE inline 327 | #endif 328 | #else 329 | #define NCNN_FORCEINLINE inline 330 | #endif 331 | #else 332 | #define NCNN_FORCEINLINE inline 333 | #endif 334 | 335 | #if defined(_MSC_VER) || defined(__GNUC__) 336 | #pragma push_macro("min") 337 | #pragma push_macro("max") 338 | #undef min 339 | #undef max 340 | #endif 341 | 342 | #endif // NCNN_PLATFORM_H 343 | -------------------------------------------------------------------------------- /src/simpleocv.cpp: -------------------------------------------------------------------------------- 1 | // Tencent is pleased to support the open source community by making ncnn 2 | // available. 3 | // 4 | // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. 5 | // 6 | // Licensed under the BSD 3-Clause License (the "License"); you may not use this 7 | // file except in compliance with the License. You may obtain a copy of the 8 | // License at 9 | // 10 | // https://opensource.org/licenses/BSD-3-Clause 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 15 | // License for the specific language governing permissions and limitations under 16 | // the License. 17 | 18 | #include "simpleocv.h" 19 | 20 | #include "mat.h" 21 | #include 22 | 23 | #define STB_IMAGE_IMPLEMENTATION 24 | #define STBI_NO_THREAD_LOCALS 25 | #define STBI_ONLY_JPEG 26 | #define STBI_ONLY_PNG 27 | #define STBI_ONLY_BMP 28 | #define STBI_ONLY_PNM 29 | #define STB_IMAGE_STATIC 30 | #include "stb_image.h" 31 | 32 | #define STB_IMAGE_WRITE_IMPLEMENTATION 33 | #define STB_IMAGE_WRITE_STATIC 34 | #include "stb_image_write.h" 35 | 36 | namespace cv { 37 | 38 | Mat imread(const std::string &path, int flags) { 39 | int desired_channels = 0; 40 | if (flags == IMREAD_UNCHANGED) { 41 | desired_channels = 0; 42 | } else if (flags == IMREAD_GRAYSCALE) { 43 | desired_channels = 1; 44 | } else if (flags == IMREAD_COLOR) { 45 | desired_channels = 3; 46 | } else { 47 | // unknown flags 48 | return Mat(); 49 | } 50 | 51 | int w; 52 | int h; 53 | int c; 54 | unsigned char *pixeldata = 55 | stbi_load(path.c_str(), &w, &h, &c, desired_channels); 56 | if (!pixeldata) { 57 | // load failed 58 | return Mat(); 59 | } 60 | 61 | if (desired_channels) { 62 | c = desired_channels; 63 | } 64 | 65 | // copy pixeldata to Mat 66 | Mat img; 67 | if (c == 1) { 68 | img.create(h, w, CV_8UC1); 69 | } else if (c == 3) { 70 | img.create(h, w, CV_8UC3); 71 | } else if (c == 4) { 72 | img.create(h, w, CV_8UC4); 73 | } else { 74 | // unexpected channels 75 | stbi_image_free(pixeldata); 76 | return Mat(); 77 | } 78 | 79 | memcpy(img.data, pixeldata, w * h * c); 80 | 81 | stbi_image_free(pixeldata); 82 | 83 | // // resolve exif orientation 84 | // { 85 | // std::ifstream ifs; 86 | // ifs.open(filename.c_str(), std::ifstream::in); 87 | // 88 | // if (ifs.good()) 89 | // { 90 | // ExifReader exif_reader(ifs); 91 | // if (exif_reader.parse()) 92 | // { 93 | // ExifEntry_t e = exif_reader.getTag(ORIENTATION); 94 | // int orientation = e.field_u16; 95 | // if (orientation >= 1 && orientation <= 8) 96 | // rotate_by_orientation(img, img, orientation); 97 | // } 98 | // } 99 | // 100 | // ifs.close(); 101 | // } 102 | 103 | // rgb to bgr 104 | if (c == 3) { 105 | uchar *p = img.data; 106 | for (int i = 0; i < w * h; i++) { 107 | std::swap(p[0], p[2]); 108 | p += 3; 109 | } 110 | } 111 | if (c == 4) { 112 | uchar *p = img.data; 113 | for (int i = 0; i < w * h; i++) { 114 | std::swap(p[0], p[2]); 115 | p += 4; 116 | } 117 | } 118 | 119 | return img; 120 | } 121 | 122 | bool imwrite(const std::string &path, const Mat &m, 123 | const std::vector ¶ms) { 124 | const char *_ext = strrchr(path.c_str(), '.'); 125 | if (!_ext) { 126 | // missing extension 127 | return false; 128 | } 129 | 130 | std::string ext = _ext; 131 | Mat img = m.clone(); 132 | 133 | // bgr to rgb 134 | int c = 0; 135 | if (img.type() == CV_8UC1) { 136 | c = 1; 137 | } else if (img.type() == CV_8UC3) { 138 | c = 3; 139 | uchar *p = img.data; 140 | for (int i = 0; i < img.cols * img.rows; i++) { 141 | std::swap(p[0], p[2]); 142 | p += 3; 143 | } 144 | } else if (img.type() == CV_8UC4) { 145 | c = 4; 146 | uchar *p = img.data; 147 | for (int i = 0; i < img.cols * img.rows; i++) { 148 | std::swap(p[0], p[2]); 149 | p += 4; 150 | } 151 | } else { 152 | // unexpected image channels 153 | return false; 154 | } 155 | 156 | bool success = false; 157 | 158 | if (ext == ".jpg" || ext == ".jpeg" || ext == ".JPG" || ext == ".JPEG") { 159 | int quality = 95; 160 | for (size_t i = 0; i < params.size(); i += 2) { 161 | if (params[i] == IMWRITE_JPEG_QUALITY) { 162 | quality = params[i + 1]; 163 | break; 164 | } 165 | } 166 | success = 167 | stbi_write_jpg(path.c_str(), img.cols, img.rows, c, img.data, quality); 168 | } else if (ext == ".png" || ext == ".PNG") { 169 | success = stbi_write_png(path.c_str(), img.cols, img.rows, c, img.data, 0); 170 | } else if (ext == ".bmp" || ext == ".BMP") { 171 | success = stbi_write_bmp(path.c_str(), img.cols, img.rows, c, img.data); 172 | } else { 173 | // unknown extension type 174 | return false; 175 | } 176 | 177 | return success; 178 | } 179 | 180 | void imshow(const std::string &name, const Mat &m) { 181 | NCNN_LOGE("imshow save image to %s.png", name.c_str()); 182 | 183 | imwrite(name + ".png", m); 184 | } 185 | 186 | int waitKey(int delay) { 187 | NCNN_LOGE("waitKey stub"); 188 | return -1; 189 | } 190 | 191 | void resize(const Mat &src, Mat &dst, const Size &size, float sw, float sh, 192 | int flags) { 193 | (void)flags; 194 | 195 | int srcw = src.cols; 196 | int srch = src.rows; 197 | 198 | int w = size.width; 199 | int h = size.height; 200 | 201 | if (w == 0 || h == 0) { 202 | w = srcw * sw; 203 | h = srch * sh; 204 | } 205 | 206 | if (w == 0 || h == 0) 207 | return; 208 | 209 | if (w == srcw && h == srch) { 210 | dst = src.clone(); 211 | return; 212 | } 213 | 214 | cv::Mat tmp(h, w, src.c); 215 | if (tmp.empty()) 216 | return; 217 | 218 | if (src.c == 1) 219 | sim::resize_bilinear_c1(src.data, srcw, srch, tmp.data, w, h); 220 | else if (src.c == 3) 221 | sim::resize_bilinear_c3(src.data, srcw, srch, tmp.data, w, h); 222 | else if (src.c == 4) 223 | sim::resize_bilinear_c4(src.data, srcw, srch, tmp.data, w, h); 224 | 225 | dst = tmp; 226 | } 227 | 228 | void rectangle(Mat &img, Point pt1, Point pt2, const Scalar &color, 229 | int thickness, int lineType, int shift) { 230 | Rect rec; 231 | rec.x = std::min(pt1.x, pt2.x); 232 | rec.y = std::min(pt1.y, pt2.y); 233 | rec.width = std::max(pt1.x, pt2.x) - rec.x; 234 | rec.height = std::max(pt1.y, pt2.y) - rec.y; 235 | rectangle(img, rec, color, thickness); 236 | } 237 | 238 | void rectangle(Mat &img, Rect rec, const Scalar &_color, int thickness) { 239 | unsigned int color = 0; 240 | unsigned char *border_color = (unsigned char *)&color; 241 | 242 | if (img.c == 1) { 243 | border_color[0] = _color[0]; 244 | sim::draw_rectangle_c1(img.data, img.cols, img.rows, rec.x, rec.y, 245 | rec.width, rec.height, color, thickness); 246 | } else if (img.c == 3) { 247 | border_color[0] = _color[0]; 248 | border_color[1] = _color[1]; 249 | border_color[2] = _color[2]; 250 | sim::draw_rectangle_c3(img.data, img.cols, img.rows, rec.x, rec.y, 251 | rec.width, rec.height, color, thickness); 252 | } else if (img.c == 4) { 253 | border_color[0] = _color[0]; 254 | border_color[1] = _color[1]; 255 | border_color[2] = _color[2]; 256 | border_color[3] = _color[3]; 257 | sim::draw_rectangle_c4(img.data, img.cols, img.rows, rec.x, rec.y, 258 | rec.width, rec.height, color, thickness); 259 | } 260 | } 261 | 262 | void circle(Mat &img, Point center, int radius, const Scalar &_color, 263 | int thickness) { 264 | unsigned int color = 0; 265 | unsigned char *border_color = (unsigned char *)&color; 266 | 267 | if (img.c == 1) { 268 | border_color[0] = _color[0]; 269 | sim::draw_circle_c1(img.data, img.cols, img.rows, center.x, center.y, 270 | radius, color, thickness); 271 | } else if (img.c == 3) { 272 | border_color[0] = _color[0]; 273 | border_color[1] = _color[1]; 274 | border_color[2] = _color[2]; 275 | sim::draw_circle_c3(img.data, img.cols, img.rows, center.x, center.y, 276 | radius, color, thickness); 277 | } else if (img.c == 4) { 278 | border_color[0] = _color[0]; 279 | border_color[1] = _color[1]; 280 | border_color[2] = _color[2]; 281 | border_color[3] = _color[3]; 282 | sim::draw_circle_c4(img.data, img.cols, img.rows, center.x, center.y, 283 | radius, color, thickness); 284 | } 285 | } 286 | 287 | void line(Mat &img, Point p0, Point p1, const Scalar &_color, int thickness) { 288 | unsigned int color = 0; 289 | unsigned char *border_color = (unsigned char *)&color; 290 | 291 | if (img.c == 1) { 292 | border_color[0] = _color[0]; 293 | sim::draw_line_c1(img.data, img.cols, img.rows, p0.x, p0.y, p1.x, p1.y, 294 | color, thickness); 295 | } else if (img.c == 3) { 296 | border_color[0] = _color[0]; 297 | border_color[1] = _color[1]; 298 | border_color[2] = _color[2]; 299 | sim::draw_line_c3(img.data, img.cols, img.rows, p0.x, p0.y, p1.x, p1.y, 300 | color, thickness); 301 | } else if (img.c == 4) { 302 | border_color[0] = _color[0]; 303 | border_color[1] = _color[1]; 304 | border_color[2] = _color[2]; 305 | border_color[3] = _color[3]; 306 | sim::draw_line_c4(img.data, img.cols, img.rows, p0.x, p0.y, p1.x, p1.y, 307 | color, thickness); 308 | } 309 | } 310 | 311 | void putText(Mat &img, const std::string &text, Point org, int fontFace, 312 | double fontScale, Scalar _color, int thickness) { 313 | const int fontpixelsize = 20 * fontScale; 314 | 315 | unsigned int color = 0; 316 | unsigned char *border_color = (unsigned char *)&color; 317 | 318 | if (img.c == 1) { 319 | border_color[0] = _color[0]; 320 | sim::draw_text_c1(img.data, img.cols, img.rows, text.c_str(), org.x, 321 | org.y - fontpixelsize * 2, fontpixelsize, color); 322 | } else if (img.c == 3) { 323 | border_color[0] = _color[0]; 324 | border_color[1] = _color[1]; 325 | border_color[2] = _color[2]; 326 | sim::draw_text_c3(img.data, img.cols, img.rows, text.c_str(), org.x, 327 | org.y - fontpixelsize * 2, fontpixelsize, color); 328 | } else if (img.c == 4) { 329 | border_color[0] = _color[0]; 330 | border_color[1] = _color[1]; 331 | border_color[2] = _color[2]; 332 | border_color[3] = _color[3]; 333 | sim::draw_text_c4(img.data, img.cols, img.rows, text.c_str(), org.x, 334 | org.y - fontpixelsize * 2, fontpixelsize, color); 335 | } 336 | } 337 | 338 | Size getTextSize(const std::string &text, int fontFace, double fontScale, 339 | int thickness, int *baseLine) { 340 | const int fontpixelsize = 20 * fontScale; 341 | 342 | int w; 343 | int h; 344 | sim::get_text_drawing_size(text.c_str(), fontpixelsize, &w, &h); 345 | 346 | *baseLine = 0; 347 | 348 | return Size(w, h); 349 | } 350 | 351 | } // namespace cv 352 | --------------------------------------------------------------------------------