├── .gitignore ├── data ├── zgr.jpg ├── zgr2.jpg └── zgrbig.jpg ├── pic ├── siftysifty.jpg ├── siftymatchsifty2.jpg └── siftysiftymatchopencv.jpg ├── include ├── boxfilter.h ├── filter.h ├── iirfilter.h ├── gaussfiler.h ├── linearfilter.h ├── siftysiftytest.h ├── structs.h ├── utils.h ├── siftysifty.h └── imageutils.h ├── main.cpp ├── CMakeLists.txt ├── README.md ├── LICENSE └── src ├── gaussfiler.cpp ├── boxfilter.cpp ├── siftysiftytest.cpp ├── iirfilter.cpp ├── siftysifty.cpp └── linearfilter.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea 2 | /cmake-build-debug 3 | -------------------------------------------------------------------------------- /data/zgr.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/SiftySifty/HEAD/data/zgr.jpg -------------------------------------------------------------------------------- /data/zgr2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/SiftySifty/HEAD/data/zgr2.jpg -------------------------------------------------------------------------------- /data/zgrbig.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/SiftySifty/HEAD/data/zgrbig.jpg -------------------------------------------------------------------------------- /pic/siftysifty.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/SiftySifty/HEAD/pic/siftysifty.jpg -------------------------------------------------------------------------------- /pic/siftymatchsifty2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/SiftySifty/HEAD/pic/siftymatchsifty2.jpg -------------------------------------------------------------------------------- /pic/siftysiftymatchopencv.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/SiftySifty/HEAD/pic/siftysiftymatchopencv.jpg -------------------------------------------------------------------------------- /include/boxfilter.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/3/21. 3 | */ 4 | #ifndef SIFTYSIFTY_BOXFILTER_H 5 | #define SIFTYSIFTY_BOXFILTER_H 6 | 7 | namespace SiftySifty { 8 | /** 9 | * box blur 10 | */ 11 | void boxFilter(int16_t *src, int16_t *dst, int width, int height, int radius); 12 | 13 | } 14 | 15 | #endif //SIFTYSIFTY_BOXFILTER_H 16 | -------------------------------------------------------------------------------- /include/filter.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/3/23. 3 | */ 4 | #ifndef SIFTYSIFTY_FILTER_H 5 | #define SIFTYSIFTY_FILTER_H 6 | 7 | namespace SiftySifty { 8 | 9 | static const int FILTER_SHIFT = 16; 10 | static const int FILTER_SCALE = (1 << FILTER_SHIFT); 11 | static const int FILTER_DELTA = (1 << (FILTER_SHIFT - 1)); 12 | 13 | } 14 | 15 | #endif //SIFTYSIFTY_FILTER_H 16 | -------------------------------------------------------------------------------- /include/iirfilter.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/3/23. 3 | */ 4 | 5 | #ifndef SIFTYSIFTY_IIRFILTER_H 6 | #define SIFTYSIFTY_IIRFILTER_H 7 | 8 | namespace SiftySifty { 9 | 10 | void IIRFilter(int16_t *src, int16_t *dst, int width, int height, float sigma); 11 | 12 | void IIRFilter(uint8_t *src, uint8_t *dst, int width, int height, float sigma); 13 | 14 | } 15 | 16 | #endif //SIFTYSIFTY_IIRFILTER_H 17 | -------------------------------------------------------------------------------- /include/gaussfiler.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/3/21. 3 | */ 4 | #ifndef SIFTYSIFTY_GAUSSFILER_H 5 | #define SIFTYSIFTY_GAUSSFILER_H 6 | 7 | namespace SiftySifty { 8 | 9 | /** 10 | * use 3 box filter to fitting gauss filter 11 | */ 12 | void gaussFilterBy3BoxFilter(short *src, short *dst, int width, int height, float sigma); 13 | 14 | /** 15 | * usr IIR filter to fitting gauss filter 16 | */ 17 | void gaussFilterByIIRFilter(short *src, short *dst, int width, int height, float sigma); 18 | 19 | } 20 | 21 | #endif //SIFTYSIFTY_GAUSSFILER_H 22 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "siftysiftytest.h" 4 | 5 | using namespace std; 6 | using namespace SiftySifty; 7 | 8 | #define IMAGE_PATH "../data/zgr.jpg" 9 | #define IMAGE_PATH2 "../data/zgr2.jpg" 10 | #define IMAGE_PATH_BIG "../data/zgrbig.jpg" 11 | 12 | int main() { 13 | if (false) { 14 | drawKeyPoint(IMAGE_PATH); 15 | } 16 | 17 | if (false) { 18 | drawKeyPointCmpToOpenCV(IMAGE_PATH); 19 | } 20 | 21 | if (false) { 22 | matchKeyPoint(IMAGE_PATH, IMAGE_PATH); 23 | } 24 | 25 | if (true) { 26 | matchKeyPoint(IMAGE_PATH, IMAGE_PATH2); 27 | } 28 | 29 | if (false) { 30 | matchKeyPointSiftySiftyWithOpenCV(IMAGE_PATH, IMAGE_PATH); 31 | } 32 | 33 | if (false) { 34 | testSpeedSiftySiftyAndOpenCV(IMAGE_PATH_BIG); 35 | } 36 | 37 | return 0; 38 | } 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /include/linearfilter.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/1/18. 3 | */ 4 | #ifndef SIFT_LINEARFILTER_H 5 | #define SIFT_LINEARFILTER_H 6 | 7 | namespace SiftySifty { 8 | /** 9 | * filter the rows 10 | */ 11 | void linearFilterHorizon(uint8_t *src, 12 | uint8_t *dst, 13 | int width, 14 | int height, 15 | int (*mult)[256], 16 | int delta, 17 | int shift, 18 | int size); 19 | 20 | /** 21 | * filter the cols 22 | */ 23 | void linearFilterVertical(uint8_t *src, 24 | uint8_t *dst, 25 | int width, 26 | int height, 27 | int (*mult)[256], 28 | int delta, 29 | int shift, 30 | int size); 31 | } 32 | #endif //SIFT_LINEARFILTER_H 33 | -------------------------------------------------------------------------------- /include/siftysiftytest.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/4/3. 3 | */ 4 | #ifndef SIFTYSIFTY_SIFTYTEST_H 5 | #define SIFTYSIFTY_SIFTYTEST_H 6 | 7 | #include 8 | 9 | using namespace std; 10 | 11 | namespace SiftySifty { 12 | 13 | /** 14 | * use the opencv to draw the siftysifty keypoint 15 | * @param path 16 | */ 17 | void drawKeyPoint(string path); 18 | 19 | /** 20 | * draw the keypoint on the same pic by SiftySifty and OpenCV 21 | * @param path 22 | */ 23 | void drawKeyPointCmpToOpenCV(string path); 24 | 25 | /** 26 | * math the keypoint that extracted by SiftySifty 27 | * @param path 28 | */ 29 | void matchKeyPoint(string path1, string path2); 30 | 31 | /** 32 | * match the siftysifty keypoint with opencv keypoint 33 | * @param path 34 | */ 35 | void matchKeyPointSiftySiftyWithOpenCV(string path1, string path2); 36 | 37 | /** 38 | * test the speed of SiftySifty and OpenCV 39 | * @param path 40 | */ 41 | void testSpeedSiftySiftyAndOpenCV(string path); 42 | 43 | } 44 | 45 | #endif //SIFTYSIFTY_SIFTYTEST_H 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.7) 2 | project(SiftySifty) 3 | 4 | FIND_PACKAGE(OpenCV REQUIRED) 5 | 6 | # for speed test should add openmp support 7 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 8 | # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fopenmp -lstdc++ -O3") 9 | # set(CMAKE_CXX_COMPILER /usr/local/bin/gcc-7) 10 | 11 | FIND_PACKAGE(OpenCV REQUIRED) 12 | 13 | include_directories(include) 14 | 15 | set(SOURCE_FILES 16 | main.cpp 17 | include/utils.h 18 | include/structs.h 19 | src/linearfilter.cpp 20 | include/linearfilter.h 21 | include/boxfilter.h 22 | src/boxfilter.cpp 23 | src/gaussfiler.cpp 24 | include/gaussfiler.h 25 | include/siftysifty.h 26 | src/siftysifty.cpp 27 | src/iirfilter.cpp 28 | include/iirfilter.h 29 | include/filter.h 30 | src/siftysiftytest.cpp 31 | include/siftysiftytest.h 32 | include/imageutils.h) 33 | 34 | add_executable(SiftySifty ${SOURCE_FILES}) 35 | 36 | target_link_libraries(SiftySifty ${OpenCV_LIBS}) 37 | -------------------------------------------------------------------------------- /include/structs.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/1/16. 3 | */ 4 | 5 | #ifndef SIFT_STRUCTS_H_H 6 | #define SIFT_STRUCTS_H_H 7 | 8 | #include 9 | #include 10 | 11 | namespace SiftySifty { 12 | 13 | typedef struct KeyPoint { 14 | /**the coordinate of keyPoint*/ 15 | float x; 16 | float y; 17 | 18 | /**the coordinate in pyramid*/ 19 | int octaveX; 20 | int octaveY; 21 | 22 | /**nothing*/ 23 | float score; 24 | 25 | float response; 26 | 27 | int octave; 28 | 29 | int octaveLayer; 30 | 31 | float octaveLayersShift; 32 | 33 | /**the scale of the keyPoint*/ 34 | float size; 35 | 36 | /**the scale of the keyPoint, in the pyramid*/ 37 | float octaveSize; 38 | 39 | /**the direction of the keypoint*/ 40 | float angle; 41 | 42 | /**the descriptor of the keyPoint*/ 43 | float *descriptor; 44 | } KeyPoint; 45 | 46 | template 47 | struct Mat { 48 | T *data; 49 | 50 | int width; 51 | int height; 52 | }; 53 | 54 | } 55 | #endif //SIFT_STRUCTS_H_H 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SiftySifty 2 | SiftySifty is a open source library that extract SIFT keypoins from a image. Writed by pure c++ and does't need any other library. 3 | 4 | ## Include 5 | Only include extracting SIFT keypoints form a image, does't contains match and display. The SIFT algorithm ref:Lowe, D. Distinctive image features from scale-invariant keypoints. International Journal of Computer Vision, 60, 2 (2004), pp.91--110. Website:http://www.cs.ubc.ca/~lowe/keypoints/ 6 | 7 | ## Demo 8 | Running The demo must need OpenCV. The demo includes display the keypoints on a image and match between with OpenCV. 9 | 10 | ### Display SIFT keypoints by SiftySifty 11 | the keypoints that draws in the image is detected by SiftySifty and displayed by OepnCV. 12 | 13 | ![](pic/siftysifty.jpg) 14 |
15 |
16 | ### Math the images using SIFT keypoints by SiftySifty 17 | the keypoints that draws in the image is detected by SiftySifty and matching&display is by OepnCV. 18 | ![](pic/siftymatchsifty2.jpg) 19 |
20 |
21 | ### Math the images using SIFT keypoints by SiftySifty and OpenCV 22 | math keypoints between SiftySifty and OpenCV, left is SiftySifty, right is OpenCV. Displayed by OepnCV. 23 | ![](pic/siftysiftymatchopencv.jpg) 24 | 25 | 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2017, 惊奇漫画 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /src/gaussfiler.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/3/21. 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "boxfilter.h" 10 | #include "iirfilter.h" 11 | #include "gaussfiler.h" 12 | 13 | namespace SiftySifty { 14 | 15 | /** 16 | * use 3 box filter to fitting gauss filter 17 | */ 18 | void gaussFilterBy3BoxFilter(int16_t *src, int16_t *dst, int width, int height, float sigma) { 19 | /** 20 | * get the radius for 3 box filter 21 | * ref:http://blog.ivank.net/fastest-gaussian-blur.html 22 | */ 23 | float wIdeal = sqrt(12.0 * sigma * sigma / 3 + 1.0); 24 | int wl = floor(wIdeal); 25 | 26 | if (0 == wl % 2) { 27 | wl--; 28 | } 29 | 30 | int wu = wl + 2; 31 | 32 | float mIdeal = (12.0 * sigma * sigma - 3 * wl * wl - 4 * 3 * wl - 3 * 3) / (-4 * wl - 4); 33 | int m = round(mIdeal); 34 | 35 | int radius[3]; 36 | for (int i = 0; i < 3; ++i) { 37 | radius[i] = (i < m ? wl : wu) / 2; 38 | } 39 | 40 | short *tmp = (short *) malloc(sizeof(short) * width * height); 41 | 42 | boxFilter(src, dst, width, height, radius[0]); 43 | boxFilter(dst, tmp, width, height, radius[1]); 44 | boxFilter(tmp, dst, width, height, radius[2]); 45 | 46 | free(tmp); 47 | } 48 | 49 | /** 50 | * use the IIR method to fit the gauss filter 51 | * @param src 52 | * @param dst 53 | * @param width 54 | * @param height 55 | * @param sigma 56 | */ 57 | void gaussFilterByIIRFilter(short *src, short *dst, int width, int height, float sigma) { 58 | IIRFilter(src, dst, width, height, sigma); 59 | } 60 | 61 | } -------------------------------------------------------------------------------- /include/utils.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/1/16. 3 | */ 4 | 5 | #ifndef SIFT_UTILS_H 6 | #define SIFT_UTILS_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "structs.h" 15 | 16 | #ifdef _OPENMP 17 | 18 | #include 19 | 20 | #endif 21 | 22 | namespace SiftySifty { 23 | 24 | #ifndef max_value 25 | #define max_value(a, b) ((a) > ((b)) ? (a) : (b)) 26 | #endif 27 | 28 | #ifndef min_value 29 | #define min_value(a, b) (((a) < (b)) ? (a) : (b)) 30 | #endif 31 | 32 | /**PI*/ 33 | static float PI = 3.1415926f; 34 | 35 | static int HARDWARE_CPU_NUM = -1; 36 | 37 | static int getHardwareCPUNum() { 38 | if (0 >= HARDWARE_CPU_NUM) { 39 | HARDWARE_CPU_NUM = static_cast(sysconf(_SC_NPROCESSORS_CONF)); 40 | 41 | if (0 >= HARDWARE_CPU_NUM) { 42 | HARDWARE_CPU_NUM = 4; 43 | } 44 | } 45 | 46 | return HARDWARE_CPU_NUM; 47 | } 48 | 49 | static long getCurrentTime() { 50 | struct timeval tv; 51 | gettimeofday(&tv, NULL); 52 | return tv.tv_sec * 1000 + tv.tv_usec / 1000; 53 | } 54 | 55 | /** 56 | * get the angle of [0, 360) 57 | * @param y 58 | * @param x 59 | * @return 60 | */ 61 | static float atan2f360(float y, float x) { 62 | float aX = fabsf(x); 63 | float aY = fabsf(y); 64 | 65 | if (0 == x) { 66 | return (y > 0) ? 90 : 270; 67 | } 68 | 69 | if (0 == y) { 70 | return (x >= 0) ? 0 : 180; 71 | } 72 | 73 | float angle = atan2f(aY, aX) * 180.f / PI; 74 | 75 | if (x > 0) { 76 | return (y > 0) ? angle : (360 - angle); 77 | } else { 78 | return (y > 0) ? (180 - angle) : (180 + angle); 79 | } 80 | } 81 | 82 | template 83 | Mat *newMat(int width, int height) { 84 | if (0 >= width || 0 >= height) { 85 | return nullptr; 86 | } 87 | 88 | Mat *mat = (Mat *) malloc(sizeof(Mat)); 89 | mat->width = width; 90 | mat->height = height; 91 | mat->data = (T *) malloc(sizeof(T) * width * height); 92 | 93 | return mat; 94 | } 95 | 96 | template 97 | void deleteMat(Mat *mat) { 98 | if (nullptr != mat) { 99 | if (nullptr != mat->data) { 100 | free(mat->data); 101 | } 102 | 103 | free(mat); 104 | } 105 | } 106 | 107 | } 108 | #endif //SIFT_UTILS_H 109 | -------------------------------------------------------------------------------- /include/siftysifty.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/3/22. 3 | */ 4 | #ifndef SIFTYSIFTY_SIFTYSIFTY_H 5 | #define SIFTYSIFTY_SIFTYSIFTY_H 6 | 7 | #include 8 | #include 9 | 10 | #include "utils.h" 11 | #include "structs.h" 12 | #include "gaussfiler.h" 13 | #include "siftysifty.h" 14 | 15 | using namespace std; 16 | 17 | namespace SiftySifty { 18 | /**the gray image will be scale by the SIFT_IMAGE_SCALE*/ 19 | static const int SIFT_IMAGE_SCALE_SHIFT = 6; 20 | static const int SIFT_IMAGE_SCALE = (1 << SIFT_IMAGE_SCALE_SHIFT); 21 | 22 | /**默认的金字塔每一层需要计算特征点的图片个数*/ 23 | /**the pic's number on one layer*/ 24 | static const int SIFT_OCTAVE_LAYERS = 3; 25 | 26 | /**the sigma of sift*/ 27 | static const float SIFT_SIGMA = 1.6f; 28 | 29 | /**the base pic's sigma*/ 30 | static const float SIFT_INIT_SIGMA = 0.5f; 31 | 32 | /**sift's contrast threshold*/ 33 | static const float SIFT_CONTRAST_THRESHOLD = 0.04f; 34 | 35 | /**the edge threshold*/ 36 | static const float SIFT_EDGE_THESHOLD = 10; 37 | 38 | /**if the inited pic will be doubled*/ 39 | static const bool SIFT_DOUBLE_INITED_IMAGE = true; 40 | 41 | /**the region width of for descriptor*/ 42 | static const int SIFT_DESCRIPTOR_WIDTH = 4; 43 | 44 | /**the number of image region*/ 45 | static const int SIFT_DESCRIPTOR_HIST_BIN = 8; 46 | 47 | static const float SIFT_ORIENTATION_PEAK_RATIO = 0.8f; 48 | 49 | /**360 splited to 36*/ 50 | static const int SIFT_ORIENTATION_HIST_BINS = 36; 51 | 52 | /**adjust 5 time*/ 53 | static const int SIFT_MAX_ADJUST_STEP = 5; 54 | 55 | /**the border of the image*/ 56 | static const int SIFT_IMAGE_BORDER = 5; 57 | 58 | static const float SIFT_ORIENTATION_SIGMA_FCTER = 1.5f; 59 | 60 | /**the radius of hist SIFT_ORIENTATION_RADIUS * sigma*/ 61 | static const float SIFT_ORIENTATION_RADIUS = (3 * SIFT_ORIENTATION_SIGMA_FCTER); 62 | 63 | /**SIFT_DESCRPTOR_SCAE_FCTER * sigma*/ 64 | static const float SIFT_DESCRIPTOR_SCAE_FCTER = 3.0f; 65 | 66 | static const float SIFT_DESCRIPTOR_MAGNITUDE_THRESHOLD = 0.2f; 67 | 68 | static const float SIFT_DESCRIPTOR_FCTOR = 512.0f; 69 | 70 | void sift(uint8_t *image, 71 | int width, int height, 72 | vector &keyPoints, 73 | int octaveLayers, 74 | float sigma, 75 | float contrastThreshold, 76 | int edgeThreshold, 77 | bool doubleInitImage, 78 | int descriptorWidth, 79 | int descriptorHistBin); 80 | 81 | void sift(uint8_t *image, int width, int height, vector &keyPoints); 82 | 83 | void initSpeed(uint8_t *src, int width, int height); 84 | 85 | } 86 | 87 | #endif //SIFTYSIFTY_SIFTYSIFTY_H 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /src/boxfilter.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/3/21. 3 | */ 4 | #include 5 | 6 | #include "utils.h" 7 | #include "filter.h" 8 | #include "boxfilter.h" 9 | 10 | namespace SiftySifty { 11 | 12 | /** 13 | * boxFilter in row 14 | * dst[i] = sum(src[i - radius, i + radius]) / (2 * radius + 1) 15 | * scale = (int) (1.0f / (2 * radius + 1) * (1 << shift)) 16 | * delta = (1 << (shift - 1)) 17 | * 18 | * dst[i] = (sum(src[i - radius, i + radius]) * scale + delta) >> shift 19 | * 20 | * the src have border with radius's cols in left and radius's cols in right 21 | * 22 | * T can be uint8_t/int8_t/uint16_t/int16_t 23 | */ 24 | template 25 | void boxFilterRow(T *src, T *dst, int width, int height, int radius, int scale, int delta, int shift) { 26 | int radius2 = radius + radius; 27 | int size = radius2 + 1; 28 | 29 | int maxThreadNum = getHardwareCPUNum(); 30 | int threadIndex = 0; 31 | 32 | int stride = max_value((int) (roundf(1.f * height / maxThreadNum)), 1); 33 | 34 | #pragma omp parallel for private(threadIndex) 35 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 36 | int start = threadIndex * stride; 37 | int end = (threadIndex == (maxThreadNum - 1)) ? height : min_value(start + stride, height); 38 | 39 | T *srcData = src + start * (width + radius2); 40 | T *dstData = dst + start * width; 41 | 42 | int64_t sum; 43 | 44 | for (int y = start; y < end; ++y) { 45 | sum = 0; 46 | 47 | for (int x = 0; x < size; ++x) { 48 | sum += srcData[x]; 49 | } 50 | 51 | dstData[0] = (T) ((sum * scale + delta) >> shift); 52 | 53 | for (int x = 1; x < width; ++x) { 54 | sum += srcData[x + radius2] - srcData[x - 1]; 55 | 56 | dstData[x] = (T) ((sum * scale + delta) >> shift); 57 | } 58 | 59 | srcData += (width + radius2); 60 | dstData += width; 61 | } 62 | } 63 | } 64 | 65 | /** 66 | * the src with radius rows int top and bottom 67 | */ 68 | template 69 | void boxFilterCol(T *src, T *dst, int width, int height, int radius, int scale, int delta, int shift) { 70 | int radius2 = radius + radius; 71 | int size = radius2 + 1; 72 | 73 | int maxThreadNum = getHardwareCPUNum(); 74 | int threadIndex = 0; 75 | 76 | /**split the width's cols to maxThreadNum's thread*/ 77 | int stride = max_value((int) (roundf(1.f * width / maxThreadNum)), 1); 78 | 79 | #pragma omp parallel for private(threadIndex) 80 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 81 | int start = threadIndex * stride; 82 | int end = (threadIndex == (maxThreadNum - 1)) ? width : min_value(width, start + stride); 83 | 84 | int range = end - start; 85 | 86 | int interval = radius2 * width; 87 | 88 | int64_t *sum = (int64_t *) malloc(sizeof(int64_t) * range); 89 | memset(sum, 0, sizeof(int64_t) * range); 90 | 91 | T *srcData = src + start; 92 | T *dstData = dst + start; 93 | 94 | for (int y = 0; y < radius2; ++y) { 95 | for (int x = 0; x < range; ++x) { 96 | sum[x] += srcData[x]; 97 | } 98 | 99 | srcData += width; 100 | } 101 | 102 | for (int y = 0; y < height; ++y) { 103 | for (int x = 0; x < range; ++x) { 104 | sum[x] += srcData[x]; 105 | 106 | dstData[x] = (T) ((sum[x] * scale + delta) >> shift); 107 | 108 | sum[x] -= srcData[x - interval]; 109 | } 110 | 111 | srcData += width; 112 | dstData += width; 113 | } 114 | 115 | free(sum); 116 | } 117 | } 118 | 119 | /** 120 | * box blur 121 | * first blur int row 122 | * than blur int cols 123 | */ 124 | template 125 | void boxFilter(T *src, T *dst, int width, int height, int radius) { 126 | if (0 >= radius) { 127 | memcpy(dst, src, sizeof(T) * width * height); 128 | 129 | return; 130 | } 131 | 132 | int radius2 = radius + radius; 133 | int size = radius2 + 1; 134 | 135 | int scale = (int) (1.0 / size * FILTER_SCALE); 136 | 137 | T *srcTemp = (T *) malloc(sizeof(T) * (width + radius2) * height); 138 | 139 | T *srcData = src; 140 | T *srcTempData = srcTemp; 141 | 142 | /**copy memory to srcMediate*/ 143 | for (int y = 0; y < height; ++y) { 144 | std::fill(srcTempData, srcTempData + radius, srcData[0]); 145 | 146 | memcpy(srcTempData + radius, srcData, sizeof(T) * width); 147 | 148 | std::fill(srcTempData + radius + width, 149 | srcTempData + radius2 + width, 150 | srcData[width - 1]); 151 | 152 | srcData += width; 153 | srcTempData += (width + radius2); 154 | } 155 | 156 | T *dstTemp = (T *) malloc(sizeof(T) * width * (height + radius2)); 157 | 158 | /**blur in row*/ 159 | boxFilterRow(srcTemp, dstTemp + (radius * width), 160 | width, height, radius, 161 | scale, FILTER_DELTA, FILTER_SHIFT); 162 | 163 | for (int y = 0; y < radius; ++y) { 164 | memcpy(dstTemp + y * width, dstTemp + radius * width, sizeof(T) * width); 165 | memcpy(dstTemp + (radius + height + y) * width, dstTemp + (radius + height - 1) * width, sizeof(T) * width); 166 | } 167 | 168 | boxFilterCol(dstTemp, dst, width, height, radius, scale, FILTER_DELTA, FILTER_SHIFT); 169 | 170 | free(srcTemp); 171 | free(dstTemp); 172 | } 173 | 174 | /** 175 | * box blur 176 | */ 177 | void boxFilter(int16_t *src, int16_t *dst, int width, int height, int radius) { 178 | boxFilter(src, dst, width, height, radius); 179 | } 180 | 181 | } 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /src/siftysiftytest.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/4/3. 3 | */ 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "utils.h" 13 | #include "siftysifty.h" 14 | #include "siftysiftytest.h" 15 | 16 | using namespace std; 17 | using namespace cv; 18 | 19 | namespace SiftySifty { 20 | /** 21 | * use the opencv to draw the siftysifty keypoint 22 | * @param path 23 | */ 24 | void drawKeyPoint(string path) 25 | { 26 | /**read the pic*/ 27 | cv::Mat originImage = imread(path); 28 | cv::Mat grayImage; 29 | cvtColor(originImage, grayImage, CV_RGB2GRAY); 30 | 31 | vector keyPoints; 32 | SiftySifty::sift(grayImage.data, grayImage.cols, grayImage.rows, keyPoints); 33 | 34 | vector opencvKeyPoints(keyPoints.size()); 35 | for (int i = 0; i < keyPoints.size(); ++i) { 36 | opencvKeyPoints[i].pt.x = keyPoints[i].x; 37 | opencvKeyPoints[i].pt.y = keyPoints[i].y; 38 | opencvKeyPoints[i].size = keyPoints[i].size; 39 | opencvKeyPoints[i].angle = keyPoints[i].angle; 40 | } 41 | 42 | /**draw it*/ 43 | cv::Mat output; 44 | drawKeypoints(grayImage, opencvKeyPoints, output, Scalar::all(-1), DrawMatchesFlags::DRAW_RICH_KEYPOINTS); 45 | imshow("drawKeyPoint", output); 46 | 47 | cvWaitKey(0); 48 | } 49 | 50 | /** 51 | * draw the keypoint on the same pic by SiftySifty and OpenCV 52 | * @param path 53 | */ 54 | void drawKeyPointCmpToOpenCV(string path) 55 | { 56 | /**read the pic*/ 57 | cv::Mat originImage = imread(path); 58 | cv::Mat grayImage; 59 | cvtColor(originImage, grayImage, CV_RGB2GRAY); 60 | 61 | vector keyPoints; 62 | SiftySifty::sift(grayImage.data, grayImage.cols, grayImage.rows, keyPoints); 63 | 64 | vector opencvKeyPoints1(keyPoints.size()); 65 | for (int i = 0; i < keyPoints.size(); ++i) 66 | { 67 | opencvKeyPoints1[i].pt.x = keyPoints[i].x; 68 | opencvKeyPoints1[i].pt.y = keyPoints[i].y; 69 | opencvKeyPoints1[i].size = keyPoints[i].size; 70 | opencvKeyPoints1[i].angle = keyPoints[i].angle; 71 | } 72 | 73 | cv::Mat output1; 74 | drawKeypoints(grayImage, opencvKeyPoints1, output1, Scalar::all(-1), DrawMatchesFlags::DRAW_RICH_KEYPOINTS); 75 | imshow("SiftySifty", output1); 76 | 77 | vector opencvKeyPoints2; 78 | Ptr f2d = xfeatures2d::SIFT::create(); 79 | f2d->detect(grayImage, opencvKeyPoints2); 80 | 81 | cv::Mat output2; 82 | drawKeypoints(grayImage, opencvKeyPoints2, output2, Scalar::all(-1), DrawMatchesFlags::DRAW_RICH_KEYPOINTS); 83 | imshow("OpenCV", output2); 84 | 85 | cvWaitKey(0); 86 | } 87 | 88 | /** 89 | * math the keypoint that extracted by SiftySifty 90 | * @param path 91 | */ 92 | void matchKeyPoint(string path1, string path2) 93 | { 94 | /**read the pic*/ 95 | cv::Mat originImage1 = imread(path1); 96 | cv::Mat grayImage1; 97 | cvtColor(originImage1, grayImage1, CV_RGB2GRAY); 98 | 99 | vector keyPoints1; 100 | SiftySifty::sift(grayImage1.data, grayImage1.cols, grayImage1.rows, keyPoints1); 101 | 102 | vector opencvKeyPoints1(keyPoints1.size()); 103 | cv::Mat ds1(keyPoints1.size(), 128, CV_32F); 104 | for (int i = 0; i < keyPoints1.size(); ++i) 105 | { 106 | opencvKeyPoints1[i].pt.x = keyPoints1[i].x; 107 | opencvKeyPoints1[i].pt.y = keyPoints1[i].y; 108 | opencvKeyPoints1[i].size = keyPoints1[i].size; 109 | opencvKeyPoints1[i].angle = keyPoints1[i].angle; 110 | 111 | memcpy((float *) ds1.data + i * 128, keyPoints1[i].descriptor, sizeof(float) * 128); 112 | } 113 | 114 | /**read the pic*/ 115 | cv::Mat originImage2 = imread(path2); 116 | cv::Mat grayImage2; 117 | cvtColor(originImage2, grayImage2, CV_RGB2GRAY); 118 | 119 | vector keyPoints2; 120 | SiftySifty::sift(grayImage2.data, grayImage2.cols, grayImage2.rows, keyPoints2); 121 | 122 | vector opencvKeyPoints2(keyPoints2.size()); 123 | cv::Mat ds2(keyPoints2.size(), 128, CV_32F); 124 | for (int i = 0; i < keyPoints2.size(); ++i) 125 | { 126 | opencvKeyPoints2[i].pt.x = keyPoints2[i].x; 127 | opencvKeyPoints2[i].pt.y = keyPoints2[i].y; 128 | opencvKeyPoints2[i].size = keyPoints2[i].size; 129 | opencvKeyPoints2[i].angle = keyPoints2[i].angle; 130 | 131 | memcpy((float *) ds2.data + i * 128, keyPoints2[i].descriptor, sizeof(float) * 128); 132 | } 133 | 134 | BFMatcher matcher; 135 | vector matches; 136 | matcher.match(ds1, ds2, matches); 137 | 138 | cv::Mat img_matches; 139 | drawMatches(grayImage1, opencvKeyPoints1, grayImage2, opencvKeyPoints2, matches, img_matches); 140 | imshow("SiftySifty match SiftySifty", img_matches); 141 | 142 | cvWaitKey(0); 143 | } 144 | 145 | /** 146 | * match the siftysifty keypoint with opencv keypoint 147 | * @param path 148 | */ 149 | void matchKeyPointSiftySiftyWithOpenCV(string path1, string path2) 150 | { 151 | /**read the pic*/ 152 | cv::Mat originImage1 = imread(path1); 153 | cv::Mat grayImage1; 154 | cvtColor(originImage1, grayImage1, CV_RGB2GRAY); 155 | 156 | vector keyPoints1; 157 | SiftySifty::sift(grayImage1.data, grayImage1.cols, grayImage1.rows, keyPoints1); 158 | 159 | vector opencvKeyPoints1(keyPoints1.size()); 160 | cv::Mat ds1(keyPoints1.size(), 128, CV_32F); 161 | for (int i = 0; i < keyPoints1.size(); ++i) 162 | { 163 | opencvKeyPoints1[i].pt.x = keyPoints1[i].x; 164 | opencvKeyPoints1[i].pt.y = keyPoints1[i].y; 165 | opencvKeyPoints1[i].size = keyPoints1[i].size; 166 | opencvKeyPoints1[i].angle = keyPoints1[i].angle; 167 | 168 | memcpy((float *) ds1.data + i * 128, keyPoints1[i].descriptor, sizeof(float) * 128); 169 | } 170 | 171 | /**read the pic*/ 172 | cv::Mat originImage2 = imread(path2); 173 | cv::Mat grayImage2; 174 | cvtColor(originImage2, grayImage2, CV_RGB2GRAY); 175 | vector opencvKeyPoints2; 176 | Ptr f2d = xfeatures2d::SIFT::create(); 177 | cv::Mat ds2; 178 | f2d->detectAndCompute(grayImage2, noArray(), opencvKeyPoints2, ds2); 179 | 180 | BFMatcher matcher; 181 | vector matches; 182 | matcher.match(ds1, ds2, matches); 183 | 184 | cv::Mat img_matches; 185 | drawMatches(grayImage1, opencvKeyPoints1, grayImage2, opencvKeyPoints2, matches, img_matches); 186 | imshow("SiftySifty match OpenCV", img_matches); 187 | 188 | cvWaitKey(0); 189 | } 190 | 191 | /** 192 | * test the speed of SiftySifty and OpenCV 193 | * @param path 194 | */ 195 | void testSpeedSiftySiftyAndOpenCV(string path) { 196 | /**read the pic*/ 197 | cv::Mat originImage = imread(path); 198 | cv::Mat grayImage; 199 | cvtColor(originImage, grayImage, CV_RGB2GRAY); 200 | 201 | long total = 0; 202 | for (int i = 0; i < 100; ++i) { 203 | long t1 = getCurrentTime(); 204 | 205 | vector keyPoints; 206 | sift(grayImage.data, grayImage.cols, grayImage.rows, keyPoints); 207 | 208 | long t2 = getCurrentTime(); 209 | 210 | long cur = t2 - t1; 211 | total += cur; 212 | 213 | cout << "SiftySifty, time:" << (i + 1) << ", cost:" << cur << "ms" << endl; 214 | } 215 | 216 | float siftysiftyCost = 1.0 * total / 100; 217 | 218 | total = 0; 219 | for (int i = 0; i < 100; ++i) { 220 | long t1 = getCurrentTime(); 221 | 222 | vector opencvKeyPoints; 223 | Ptr f2d = xfeatures2d::SIFT::create(); 224 | cv::Mat ds; 225 | f2d->detectAndCompute(grayImage, noArray(), opencvKeyPoints, ds); 226 | 227 | long t2 = getCurrentTime(); 228 | 229 | long cur = t2 - t1; 230 | total += cur; 231 | 232 | cout << "OpenCV, time:" << (i + 1) << ", cost:" << cur << "ms" << endl; 233 | } 234 | 235 | float opencvCost = 1.0 * total / 100; 236 | 237 | cout << "SiftySifty cost time(the average of 100 times):" << siftysiftyCost << "ms" << endl; 238 | cout << "OpenCV cost time(the average of 100 times):" << opencvCost << "ms" << endl; 239 | } 240 | 241 | } 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | -------------------------------------------------------------------------------- /src/iirfilter.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/3/23. 3 | */ 4 | 5 | #include "utils.h" 6 | #include "filter.h" 7 | #include "iirfilter.h" 8 | 9 | #ifdef _OPENMP 10 | 11 | #include 12 | 13 | #endif 14 | 15 | namespace SiftySifty { 16 | /** 17 | * use iir to filter the src 18 | * ref:"Recursive Implementation of the gaussian filter." 19 | * w[n] = (B * input[n] + b1 * w[n-1] + b2 * w[n-2] + b3 * w[n-3] + delta) >> shift 20 | */ 21 | template 22 | void IIRFilterRow(T *src, T *dst, 23 | const int width, const int height, 24 | const int32_t B, 25 | const int32_t b1, const int32_t b2, const int32_t b3, 26 | const int32_t delta, const int32_t shift) { 27 | int maxThreadNum = getHardwareCPUNum(); 28 | int threadIndex = 0; 29 | 30 | int stride = max_value((int) (roundf(1.f * height / maxThreadNum)), 1); 31 | 32 | #pragma omp parallel for private(threadIndex) 33 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 34 | int start = threadIndex * stride; 35 | int end = (threadIndex == (maxThreadNum - 1)) ? height : min_value(start + stride, height); 36 | 37 | T *srcData = src + start * width; 38 | T *dstData = dst + start * width; 39 | 40 | int size; 41 | 42 | T *w = (T *) malloc(sizeof(T) * (width + 3)); 43 | T tail; 44 | 45 | for (int y = start; y < end; ++y) { 46 | size = width - 1; 47 | 48 | w[0] = w[1] = w[2] = srcData[0]; 49 | 50 | for (int x = 0, n = 3; x <= size; ++x, ++n) { 51 | w[n] = (T) ((B * srcData[x] + b1 * w[n - 1] + b2 * w[n - 2] + b3 * w[n - 3] + delta) >> shift); 52 | } 53 | 54 | tail = w[size + 3]; 55 | 56 | dstData[size] = (T) ((B * w[size + 3] + b1 * tail + b2 * tail + b3 * tail + delta) >> shift); 57 | size--; 58 | dstData[size] = (T) ((B * w[size + 3] + b1 * dstData[size + 1] + b2 * tail + b3 * tail + delta) >> shift); 59 | size--; 60 | dstData[size] = (T) ((B * w[size + 3] + b1 * dstData[size + 1] + b2 * dstData[size + 2] + b3 * tail + delta) 61 | >> shift); 62 | size--; 63 | 64 | for (int x = size; x >= 0; --x) { 65 | dstData[x] = (T) ( 66 | (B * w[x + 3] + b1 * dstData[x + 1] + b2 * dstData[x + 2] + b3 * dstData[x + 3] + delta) 67 | >> shift); 68 | } 69 | 70 | srcData += width; 71 | dstData += width; 72 | } 73 | 74 | free(w); 75 | } 76 | } 77 | 78 | /** 79 | * filter on col same as the row 80 | * @tparam T 81 | * @param src 82 | * @param dst 83 | * @param width 84 | * @param height 85 | * @param delta 86 | * @param shift 87 | * @param B 88 | * @param b1 89 | * @param b2 90 | * @param b3 91 | */ 92 | template 93 | void IIRFilterCol(T *src, T *dst, 94 | const int width, const int height, 95 | const int32_t B, 96 | const int32_t b1, const int32_t b2, const int32_t b3, 97 | const int32_t delta, const int32_t shift) { 98 | int maxThreadNum = getHardwareCPUNum(); 99 | int threadIndex = 0; 100 | 101 | int stride = max_value((int) (roundf(1.f * width / maxThreadNum)), 1); 102 | 103 | #pragma omp parallel for private(threadIndex) 104 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 105 | int start = threadIndex * stride; 106 | int end = (threadIndex == (maxThreadNum - 1)) ? width : min_value(start + stride, width); 107 | int range = end - start; 108 | 109 | T *w = (T *) malloc(sizeof(T) * range * (height + 3)); 110 | 111 | T *srcData = src + start; 112 | T *dstData = dst + start; 113 | T *wData = w + 3 * range; 114 | 115 | T *srcOffsetData; 116 | T *dstOffsetData; 117 | T *wOffsetData; 118 | 119 | memcpy(w, srcData, sizeof(T) * range); 120 | memcpy(w + range, srcData, sizeof(T) * range); 121 | memcpy(w + 2 * range, srcData, sizeof(T) * range); 122 | 123 | int off1 = -range; 124 | int off2 = off1 - range; 125 | int off3 = off2 - range; 126 | 127 | int size = height - 1; 128 | 129 | /**forward pass*/ 130 | for (int y = 0; y <= size; ++y) { 131 | srcOffsetData = srcData; 132 | wOffsetData = wData; 133 | 134 | for (int x = 0; x < range; ++x) { 135 | wOffsetData[0] = (T) ((B * srcOffsetData[0] + b1 * wOffsetData[off1] + b2 * wOffsetData[off2] + 136 | b3 * wOffsetData[off3] + delta) >> shift); 137 | 138 | srcOffsetData++; 139 | wOffsetData++; 140 | } 141 | 142 | srcData += width; 143 | wData += range; 144 | } 145 | 146 | /**backward pass*/ 147 | T *tail = (T *) malloc(sizeof(T) * range); 148 | memcpy(tail, w + range * (size + 3), sizeof(T) * range); 149 | 150 | off1 = width; 151 | off2 = off1 + width; 152 | off3 = off2 + width; 153 | 154 | dstData = dst + start + size * width; 155 | wData = w + (size + 3) * range; 156 | 157 | dstOffsetData = dstData; 158 | wOffsetData = wData; 159 | 160 | for (int x = 0; x < range; ++x) { 161 | dstOffsetData[0] = (T) ((B * wOffsetData[0] + b1 * tail[x] + b2 * tail[x] + b3 * tail[x] + delta) >> shift); 162 | 163 | dstOffsetData++; 164 | wOffsetData++; 165 | } 166 | 167 | dstData -= width; 168 | wData -= range; 169 | 170 | dstOffsetData = dstData; 171 | wOffsetData = wData; 172 | 173 | for (int x = 0; x < range; ++x) { 174 | dstOffsetData[0] = (T) ((B * wOffsetData[0] + b1 * dstOffsetData[off1] + b2 * tail[x] + b3 * tail[x] 175 | + delta) >> shift); 176 | 177 | dstOffsetData++; 178 | wOffsetData++; 179 | } 180 | 181 | dstData -= width; 182 | wData -= range; 183 | 184 | dstOffsetData = dstData; 185 | wOffsetData = wData; 186 | 187 | for (int x = 0; x < range; ++x) { 188 | dstOffsetData[0] = (T) ( 189 | (B * wOffsetData[0] + b1 * dstOffsetData[off1] + b2 * dstOffsetData[off2] + b3 * tail[x] 190 | + delta) >> shift); 191 | 192 | dstOffsetData++; 193 | wOffsetData++; 194 | } 195 | 196 | dstData -= width; 197 | wData -= range; 198 | 199 | for (int y = size - 3; y >= 0; --y) { 200 | dstOffsetData = dstData; 201 | wOffsetData = wData; 202 | 203 | for (int x = 0; x < range; ++x) { 204 | dstOffsetData[0] = (T) ((B * wOffsetData[0] + b1 * dstOffsetData[off1] + b2 * dstOffsetData[off2] 205 | + b3 * dstOffsetData[off3] + delta) >> shift); 206 | 207 | dstOffsetData++; 208 | wOffsetData++; 209 | } 210 | 211 | dstData -= width; 212 | wData -= range; 213 | } 214 | 215 | free(tail); 216 | free(w); 217 | } 218 | } 219 | 220 | template 221 | void IIRFilter(T *src, T *dst, const int width, const int height, const float sigma) { 222 | if (nullptr == src || nullptr == dst || 3 > width || 3 > height || 0 > sigma) { 223 | return; 224 | } 225 | 226 | double_t q, q2, q3; 227 | 228 | if (sigma >= 2.5) { 229 | q = 0.98711 * sigma - 0.96330; 230 | } else if (sigma >= 0.5 && sigma < 2.5) { 231 | q = 3.97156 - 4.14554 * sqrt(1.0 - 0.26891 * sigma); 232 | } else { 233 | q = 0.1147705018520355224609375; 234 | } 235 | 236 | q2 = q * q; 237 | q3 = q * q2; 238 | 239 | double_t db0 = 1.57825 + 2.44413 * q + 1.4281 * q2 + 0.422205 * q3; 240 | double_t db1 = 2.44413 * q + 2.85619 * q2 + 1.26661 * q3; 241 | double_t db2 = -(1.4281 * q2 + 1.26661 * q3); 242 | double_t db3 = 0.4222205 * q3; 243 | 244 | double_t dB = 1.0 - (db1 + db2 + db3) / db0; 245 | 246 | int32_t B = (int32_t) (dB * FILTER_SCALE); 247 | int32_t b1 = (int32_t) (db1 / db0 * FILTER_SCALE); 248 | int32_t b2 = (int32_t) (db2 / db0 * FILTER_SCALE); 249 | int32_t b3 = (int32_t) (db3 / db0 * FILTER_SCALE); 250 | 251 | T *tmp = (T *) malloc(sizeof(T) * width * height); 252 | 253 | IIRFilterRow(src, tmp, width, height, B, b1, b2, b3, FILTER_DELTA, FILTER_SHIFT); 254 | IIRFilterCol(tmp, dst, width, height, B, b1, b2, b3, FILTER_DELTA, FILTER_SHIFT); 255 | 256 | free(tmp); 257 | } 258 | 259 | void IIRFilter(int16_t *src, int16_t *dst, int width, int height, float sigma) { 260 | IIRFilter(src, dst, width, height, sigma); 261 | } 262 | 263 | void IIRFilter(uint8_t *src, uint8_t *dst, int width, int height, float sigma) { 264 | IIRFilter(src, dst, width, height, sigma); 265 | } 266 | 267 | } 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | -------------------------------------------------------------------------------- /include/imageutils.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/4/8. 3 | */ 4 | 5 | #ifndef SIFTYSIFTY_IMAGEUTILS_H 6 | #define SIFTYSIFTY_IMAGEUTILS_H 7 | 8 | #include "structs.h" 9 | #include "utils.h" 10 | 11 | #ifdef _OPENMP 12 | #include 13 | #endif 14 | 15 | namespace SiftySifty { 16 | 17 | /** 18 | * half sample the src Mat 19 | * @tparam T type 20 | * @param src src mat 21 | * @param dst dst mat 22 | * @return true/false 23 | */ 24 | template 25 | void halfSampleMat(Mat *src, Mat *dst) { 26 | if (nullptr == src || nullptr == dst || nullptr == src->data || nullptr == dst->data) { 27 | return; 28 | } 29 | 30 | int srcWidth = src->width; 31 | int srcHeight = src->height; 32 | 33 | int dstWidth = dst->width; 34 | int dstHeight = dst->height; 35 | 36 | if ((srcWidth >> 1) != dstWidth || (srcHeight >> 1) != dstHeight) { 37 | return; 38 | } 39 | 40 | int maxThreadNum = getHardwareCPUNum(); 41 | int threadIndex = 0; 42 | 43 | int stride = max_value((int) (roundf(1.0f * dstHeight / maxThreadNum)), 1); 44 | 45 | #pragma omp parallel for private(threadIndex) 46 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 47 | int start = threadIndex * stride; 48 | int end = (threadIndex == (maxThreadNum - 1)) ? dstHeight : min_value(start + stride, dstHeight); 49 | 50 | T *srcData = src->data + (srcWidth * (start << 1)); 51 | T *dstData = dst->data + (dstWidth * start); 52 | 53 | for (int y = start; y < end; ++y) { 54 | for (int x = 0; x < dstWidth; ++x) { 55 | dstData[x] = srcData[(x << 1)]; 56 | } 57 | 58 | srcData += (srcWidth + srcWidth); 59 | dstData += dstWidth; 60 | } 61 | } 62 | } 63 | 64 | 65 | /** 66 | * resize src to dst 67 | * @tparam T 68 | * @param src 69 | * @param dst 70 | * @return 71 | */ 72 | template 73 | void resizeMat(Mat *src, Mat *dst) { 74 | if (nullptr == src || nullptr == dst || nullptr == src->data || nullptr == dst->data) { 75 | return; 76 | } 77 | 78 | int srcWidth = src->width; 79 | int srcHeight = src->height; 80 | 81 | int dstWidth = dst->width; 82 | int dstHeight = dst->height; 83 | 84 | T *srcData = src->data; 85 | T *dstData = dst->data; 86 | 87 | if (srcWidth == dstWidth && srcHeight == dstHeight) { 88 | memcpy(dstData, srcData, sizeof(T) * srcWidth * srcHeight); 89 | 90 | return; 91 | } 92 | 93 | int32_t shift = 22; 94 | int64_t scale = (1 << (shift >> 1)); 95 | int64_t delta = (1 << (shift - 1)); 96 | 97 | float xRatio = 1.f * (srcWidth - 1.f) / dstWidth; 98 | float yRatio = 1.f * (srcHeight - 1.f) / dstHeight; 99 | 100 | int maxThreadNum = getHardwareCPUNum(); 101 | int threadIndex = 0; 102 | 103 | int stride = max_value((int) (roundf(1.0f * dstHeight / maxThreadNum)), 1); 104 | 105 | #pragma omp parallel for private(threadIndex) 106 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 107 | int start = threadIndex * stride; 108 | int end = (threadIndex == (maxThreadNum - 1)) ? dstHeight : min_value(start + stride, dstHeight); 109 | 110 | T *dstOffsetData = dstData + start * dstWidth; 111 | 112 | for (int y = start; y < end; ++y) { 113 | float yOffset = (y + 0.5f) * yRatio; 114 | int yUp = (int) floorf(yOffset); 115 | 116 | yOffset -= yUp; 117 | 118 | int64_t multUp = (int64_t) (yOffset * scale); 119 | int64_t multDown = scale - multUp; 120 | 121 | for (int x = 0; x < dstWidth; ++x) { 122 | float xOffset = (x + 0.5f) * xRatio; 123 | int xLeft = (int) floorf(xOffset); 124 | 125 | xOffset -= xLeft; 126 | 127 | int64_t multLeft = (int64_t) (xOffset * scale); 128 | int64_t multRight = scale - multLeft; 129 | 130 | T *srcOffsetData = srcData + yUp * srcWidth + xLeft; 131 | 132 | dstOffsetData[x] = (T) ((srcOffsetData[0] * multRight * multDown 133 | + srcOffsetData[1] * multLeft * multDown 134 | + srcOffsetData[srcWidth] * multRight * multUp 135 | + srcOffsetData[srcWidth + 1] * multLeft * multUp 136 | + delta) >> shift); 137 | } 138 | 139 | dstOffsetData += dstWidth; 140 | } 141 | } 142 | } 143 | 144 | /** 145 | * resize the src to dst 146 | * @tparam T 147 | * @param src 148 | * @param srcWidth 149 | * @param srcHeight 150 | * @param dst 151 | * @param dstWidth 152 | * @param dstHeight 153 | * @return 154 | */ 155 | template 156 | void resizeMat2(Mat *srcMat, Mat *dstMat) { 157 | if (nullptr == srcMat || nullptr == dstMat) { 158 | return; 159 | } 160 | 161 | T *src = srcMat->data; 162 | T *dst = dstMat->data; 163 | 164 | int srcWidth = srcMat->width; 165 | int srcHeight = srcMat->height; 166 | int dstWidth = dstMat->width; 167 | int dstHeight = dstMat->height; 168 | 169 | if (srcWidth == dstWidth && srcHeight == dstHeight) { 170 | memcpy(dst, src, sizeof(T) * srcWidth * srcHeight); 171 | return; 172 | } 173 | 174 | int32_t shift = 22; 175 | int64_t scale = (1 << (shift >> 1)); 176 | int64_t delta = (1 << (shift - 1)); 177 | 178 | /** 179 | * src = (dst + 0.5) * srcWidth / dstWidth - 0.5 180 | */ 181 | float xRatio = 1.f * srcWidth / dstWidth; 182 | float yRatio = 1.f * srcHeight / dstHeight; 183 | 184 | int *xTable = (int*) malloc(sizeof(int) * 2 * dstWidth); 185 | int64_t *xMult = (int64_t*) malloc(sizeof(int64_t) * 2 * dstWidth); 186 | 187 | for (int x = 0; x < dstWidth; ++x) { 188 | float xOffset = (x + 0.5f) * xRatio - 0.5f; 189 | int xLeft; 190 | 191 | int64_t multLeft, multRight; 192 | 193 | if (0 >= xOffset) { 194 | xLeft = 0; 195 | multLeft = 0; 196 | multRight = scale; 197 | } else if (xOffset >= (srcWidth - 1)) { 198 | xLeft = srcWidth - 2; 199 | multLeft = scale; 200 | multRight = 0; 201 | } else { 202 | xLeft = (int) floorf(xOffset); 203 | 204 | xOffset -= xLeft; 205 | 206 | multLeft = (int64_t) (xOffset * scale); 207 | multRight = scale - multLeft; 208 | } 209 | 210 | xTable[(x << 1)] = xLeft; 211 | xTable[(x << 1) + 1] = xLeft + 1; 212 | xMult[(x << 1)] = multLeft; 213 | xMult[(x << 1) + 1] = multRight; 214 | } 215 | 216 | int maxThreadNum = getHardwareCPUNum(); 217 | int threadIndex = 0; 218 | 219 | int stride = max_value((int) (roundf(1.0f * dstHeight / maxThreadNum)), 1); 220 | 221 | #pragma omp parallel for private(threadIndex) 222 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 223 | int start = threadIndex * stride; 224 | int end = (threadIndex == (maxThreadNum - 1)) ? dstHeight : min_value(start + stride, dstHeight); 225 | 226 | T *dstData = dst + start * dstWidth; 227 | 228 | for (int y = start; y < end; ++y) { 229 | float yOffset = (y + 0.5f) * yRatio - 0.5f; 230 | int yUp; 231 | int64_t multUp, multDown; 232 | 233 | if (0 >= yOffset) { 234 | yUp = 0; 235 | multUp = 0; 236 | multDown = scale; 237 | } else if (yOffset >= (srcHeight - 1)) { 238 | yUp = srcHeight - 2; 239 | multUp = scale; 240 | multDown = 0; 241 | } else { 242 | yUp = (int) floorf(yOffset); 243 | yOffset -= yUp; 244 | 245 | multUp = (int64_t) (yOffset * scale); 246 | multDown = scale - multUp; 247 | } 248 | 249 | T *upSrc = src + yUp * srcWidth; 250 | T *downSrc = upSrc + srcWidth; 251 | 252 | for (int x = 0; x < dstWidth; ++x) { 253 | int x2 = (x << 1); 254 | 255 | dstData[x] = (T)((((upSrc[xTable[x2]] * xMult[x2+1] 256 | + upSrc[xTable[x2+1]]*xMult[x2]) * multDown 257 | + (downSrc[xTable[x2]] * xMult[x2+1] 258 | + downSrc[xTable[x2+1]]*xMult[x2]) * multUp) 259 | + delta) >> shift); 260 | } 261 | 262 | dstData += dstWidth; 263 | } 264 | } 265 | 266 | free(xTable); 267 | free(xMult); 268 | } 269 | 270 | template 271 | void scaleMatByScale(T1 *src, T2 *dst, int width, int height, int scale) { 272 | int maxThreadNum = getHardwareCPUNum(); 273 | int threadIndex = 0; 274 | 275 | int stride = max_value((int) (roundf(1.0f * height / maxThreadNum)), 1); 276 | 277 | #pragma omp parallel for private(threadIndex) 278 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 279 | int start = threadIndex * stride; 280 | int end = (threadIndex == (maxThreadNum - 1)) ? height : min_value(start + stride, height); 281 | 282 | T1 *srcData = src + start * width; 283 | T2 *dstData = dst + start * width; 284 | 285 | int64_t length = (end - start) * width; 286 | int64_t limit = length - 3; 287 | 288 | int64_t i = 0; 289 | for (; i < limit; i += 4) { 290 | dstData[i] = (srcData[i] * scale); 291 | dstData[i + 1] = (srcData[i + 1] * scale); 292 | dstData[i + 2] = (srcData[i + 2] * scale); 293 | dstData[i + 3] = (srcData[i + 3] * scale); 294 | } 295 | 296 | for (; i < length; ++i) { 297 | dstData[i] = (srcData[i] * scale); 298 | } 299 | } 300 | } 301 | 302 | template 303 | void scaleMatByShift(T1 *src, T2 *dst, int width, int height, int shift) { 304 | int maxThreadNum = getHardwareCPUNum(); 305 | int threadIndex = 0; 306 | 307 | int stride = max_value((int) (roundf(1.0f * height / maxThreadNum)), 1); 308 | 309 | #pragma omp parallel for private(threadIndex) 310 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 311 | int start = threadIndex * stride; 312 | int end = (threadIndex == (maxThreadNum - 1)) ? height : min_value(start + stride, height); 313 | 314 | T1 *srcData = src + start * width; 315 | T2 *dstData = dst + start * width; 316 | 317 | int64_t length = (end - start) * width; 318 | int64_t limit = length - 3; 319 | 320 | int64_t i = 0; 321 | for (; i < limit; i += 4) { 322 | dstData[i] = (srcData[i] << shift); 323 | dstData[i + 1] = (srcData[i + 1] << shift); 324 | dstData[i + 2] = (srcData[i + 2] << shift); 325 | dstData[i + 3] = (srcData[i + 3] << shift); 326 | } 327 | 328 | for (; i < length; ++i) { 329 | dstData[i] = (srcData[i] << shift); 330 | } 331 | } 332 | } 333 | 334 | template 335 | void subMat(T *src1, T *src2, T *dst, int width, int height) { 336 | int maxThreadNum = getHardwareCPUNum(); 337 | int threadIndex = 0; 338 | 339 | int stride = max_value((int) (roundf(1.0f * height / maxThreadNum)), 1); 340 | 341 | #pragma omp parallel for private(threadIndex) 342 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 343 | int start = threadIndex * stride; 344 | int end = (threadIndex == (maxThreadNum - 1)) ? height : min_value(start + stride, height); 345 | 346 | T *src1Data = src1 + start * width; 347 | T *src2Data = src2 + start * width; 348 | T *dstData = dst + start * width; 349 | 350 | int64_t length = (end - start) * width; 351 | int64_t limit = length - 3; 352 | 353 | int64_t i = 0; 354 | for (; i < limit; i += 4) { 355 | dstData[i] = src1Data[i] - src2Data[i]; 356 | dstData[i + 1] = src1Data[i + 1] - src2Data[i + 1]; 357 | dstData[i + 2] = src1Data[i + 2] - src2Data[i + 2]; 358 | dstData[i + 3] = src1Data[i + 3] - src2Data[i + 3]; 359 | } 360 | 361 | for (; i < length; ++i) { 362 | dstData[i] = src1Data[i] - src2Data[i]; 363 | } 364 | } 365 | } 366 | 367 | } 368 | 369 | 370 | #endif //SIFTYSIFTY_IMAGEUTILS_H 371 | -------------------------------------------------------------------------------- /src/siftysifty.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/3/22. 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "utils.h" 12 | #include "gaussfiler.h" 13 | #include "imageutils.h" 14 | #include "iirfilter.h" 15 | #include "siftysifty.h" 16 | 17 | #ifdef _OPENMP 18 | #include 19 | #endif 20 | 21 | namespace SiftySifty { 22 | 23 | /** 24 | * gauss to Mat 25 | */ 26 | void gaussFilter(Mat *src, Mat *dst, float sigma) { 27 | if (nullptr == src || nullptr == dst || 0 >= sigma || src->width != dst->width || src->height != dst->height) { 28 | return; 29 | } 30 | 31 | gaussFilterByIIRFilter(src->data, dst->data, src->width, src->height, sigma); 32 | } 33 | 34 | /** 35 | * gauss blur on the mat 36 | */ 37 | void gaussFilter(Mat *mat, float sigma) { 38 | Mat *tmp = newMat(mat->width, mat->height); 39 | 40 | memcpy(tmp->data, mat->data, sizeof(int16_t) * mat->width * mat->height); 41 | 42 | gaussFilter(tmp, mat, sigma); 43 | 44 | deleteMat(tmp); 45 | } 46 | 47 | /** 48 | * create the base image of sift 49 | * @param src the gray image with uint8_t 50 | * @param width width 51 | * @param height height 52 | * @param doubleImage if double the image 53 | * @param sigma the init sigma 54 | * @param shift the base image will be src * (1 << shift) 55 | * @return 56 | */ 57 | Mat *initBaseImage(uint8_t *src, int width, int height, bool doubleImage, double sigma, int shift) { 58 | Mat *base = newMat(width, height); 59 | 60 | /**scale the src*/ 61 | scaleMatByShift(src, base->data, width, height, shift); 62 | 63 | if (doubleImage) { 64 | float diffSigma = (float) sqrt(sigma * sigma - 4.0 * SIFT_INIT_SIGMA * SIFT_INIT_SIGMA); 65 | 66 | Mat *doubleBase = newMat(base->width * 2, base->height * 2); 67 | 68 | resizeMat2(base, doubleBase); 69 | gaussFilter(doubleBase, diffSigma); 70 | 71 | deleteMat(base); 72 | 73 | return doubleBase; 74 | } else { 75 | float diffSigma = (float) sqrt(sigma * sigma - SIFT_INIT_SIGMA * SIFT_SIGMA); 76 | 77 | gaussFilter(base, diffSigma); 78 | 79 | return base; 80 | } 81 | } 82 | 83 | vector *> > buildGaussPyramid(SiftySifty::Mat *base, 84 | const int octave, 85 | const int octaveLayers, 86 | const float sigma) { 87 | double sig[octaveLayers + 3]; 88 | sig[0] = sigma; 89 | 90 | double k = pow(2.0, 1.0 / octaveLayers); 91 | for (int i = 1; i < octaveLayers + 3; i++) { 92 | double sigPrev = pow(k, (double) (i - 1)) * sigma; 93 | double sigTotal = sigPrev * k; 94 | 95 | sig[i] = sqrt(sigTotal * sigTotal - sigPrev * sigPrev); 96 | } 97 | 98 | vector *> > gaussPyramid(octave, 99 | vector *>(octaveLayers + 3)); 100 | 101 | for (int y = 0; y < octave; ++y) { 102 | for (int x = 0; x < (octaveLayers + 3); ++x) { 103 | if (0 == y && 0 == x) { 104 | gaussPyramid[y][x] = base; 105 | } else if (0 == x) { 106 | Mat *pre = gaussPyramid[y - 1][octaveLayers]; 107 | Mat *mat = newMat(pre->width >> 1, pre->height >> 1); 108 | 109 | halfSampleMat(pre, mat); 110 | 111 | gaussPyramid[y][x] = mat; 112 | } else { 113 | Mat *pre = gaussPyramid[y][x - 1]; 114 | Mat *mat = newMat(pre->width, pre->height); 115 | 116 | gaussFilter(pre, mat, sig[x]); 117 | 118 | gaussPyramid[y][x] = mat; 119 | } 120 | } 121 | } 122 | 123 | return gaussPyramid; 124 | } 125 | 126 | /** 127 | * build dogPyramid 128 | */ 129 | vector *> > buildDoGPyramid(vector *> > &gaussPyramid, 130 | const int octave, 131 | const int octaveLayers) { 132 | vector *> > doGPyramid(octave, 133 | vector *>(octaveLayers + 2)); 134 | 135 | for (int y = 0; y < octave; ++y) { 136 | for (int x = 0; x < (octaveLayers + 2); ++x) { 137 | SiftySifty::Mat *src1 = gaussPyramid[y][x + 1]; 138 | SiftySifty::Mat *src2 = gaussPyramid[y][x]; 139 | 140 | SiftySifty::Mat *dst = newMat(src1->width, src1->height); 141 | 142 | subMat(src1->data, src2->data, dst->data, src1->width, src1->height); 143 | 144 | doGPyramid[y][x] = dst; 145 | } 146 | } 147 | 148 | return doGPyramid; 149 | } 150 | 151 | /** 152 | * adjust the real extrema point 153 | * @param doGPyramid dog pyramid 154 | * @param keyPoint the current keypoint 155 | * @param r the row of keypoint 156 | * @param c the col of keypoint 157 | * @param layer the layer of the keypoint 158 | * @param octaveLayers the total images of the layer 159 | * @param curOctave the current octave 160 | * @param contrastThreshold contrast threshold 161 | * @param edgeThreshold edge threshold 162 | * @param sigma 163 | * @param offset 164 | * @return true: is a keypint false:not 165 | */ 166 | bool adjustExtremaPoint(vector * > > &doGPyramid, 167 | KeyPoint &keyPoint, 168 | int &r, int &c, int &l, 169 | int octaveLayers, 170 | int curOctave, 171 | float contrastThreshold, 172 | float edgeThreshold, 173 | float sigma, 174 | int *offset) { 175 | /** 176 | * the origin sift paper use the float to store the image, 177 | * so the imageScale have to be scaled by the SIFT_IMAGE_SCALE 178 | */ 179 | const float imageScale = 1.0f / 255.0f * SIFT_IMAGE_SCALE; 180 | 181 | /**benn used to calcualte the first-order derivative*/ 182 | const float deriveScale = 0.5f * imageScale; 183 | 184 | /**second-order derivative*/ 185 | const float secondDeriveScale = imageScale; 186 | 187 | /**cross--order derivative*/ 188 | const float crossDeriveScale = 0.25f * imageScale; 189 | 190 | float xR = 0, xC = 0, xL = 0; 191 | 192 | bool confirm = false; 193 | 194 | for (int i = 0; i < SIFT_MAX_ADJUST_STEP; ++i) { 195 | int width = doGPyramid[curOctave][l]->width; 196 | int height = doGPyramid[curOctave][l]->height; 197 | 198 | int16_t *cur = doGPyramid[curOctave][l]->data + width * r + c; 199 | int16_t *pre = doGPyramid[curOctave][l - 1]->data + width * r + c; 200 | int16_t *nex = doGPyramid[curOctave][l + 1]->data + width * r + c; 201 | 202 | /** 203 | * calculate the derive of x, y, sigma 204 | * | 205 | * | 206 | * | 207 | * ---------------->x 208 | * | 209 | * | 210 | * | 211 | * ^ y 212 | */ 213 | float dx = (cur[offset[5]] - cur[offset[3]]) * deriveScale; 214 | float dy = (cur[offset[7]] - cur[offset[1]]) * deriveScale; 215 | float ds = (nex[0] - pre[0]) * deriveScale; 216 | 217 | /** 218 | * calculate dxx dxy dxs dyx dyy dys dsx dsy dss 219 | */ 220 | float value2 = 2.0f * cur[0]; 221 | float dxx = (cur[offset[5]] + cur[offset[3]] - value2) * secondDeriveScale; 222 | float dyy = (cur[offset[7]] + cur[offset[1]] - value2) * secondDeriveScale; 223 | float dss = (nex[0] + pre[0] - value2) * secondDeriveScale; 224 | 225 | float dxy = (cur[offset[8]] + cur[offset[0]] - cur[offset[2]] - cur[offset[6]]) * crossDeriveScale; 226 | float dxs = (nex[offset[5]] + pre[offset[3]] - nex[offset[3]] - pre[offset[5]]) * crossDeriveScale; 227 | float dys = (nex[offset[7]] + pre[offset[1]] - nex[offset[1]] - pre[offset[7]]) * crossDeriveScale; 228 | 229 | /** 230 | * X = -[dx, dy, ds] ^ T * ([Dxx, Dxy, Dxs]) ^ -1 231 | * ([Dyx, Dyy, Dys]) 232 | * ([Dsx, Dsy, Dss]) 233 | */ 234 | float detD = dxx * dyy * dss + dxy * dys * dxs + dxs * dxy * dys - 235 | dxx * dys * dys - dxy * dxy * dss - dxs * dyy * dxs; 236 | 237 | if (fabsf(detD) < 1e-6) { 238 | return false; 239 | } 240 | 241 | detD = 1.0 / detD; 242 | 243 | xC = dx * (dyy * dss - dys * dys) + dy * (-dxy * dss + dys * dxs) + ds * (dxy * dys - dxs * dyy); 244 | xR = dx * (dys * dxs - dss * dxy) + dy * (-dxs * dxs + dss * dxx) + ds * (dxs * dxy - dxx * dys); 245 | xL = dx * (dxy * dys - dxs * dyy) + dy * (-dxx * dys + dxs * dxy) + ds * (dxx * dyy - dxy * dxy); 246 | 247 | xC = -detD * xC; 248 | xR = -detD * xR; 249 | xL = -detD * xL; 250 | 251 | if (fabsf(xC) < 0.5f && fabsf(xR) < 0.5f && fabsf(xL) < 0.5f) { 252 | confirm = true; 253 | break; 254 | } 255 | 256 | if (std::abs(xC) > (float) (INT_MAX / 3) || 257 | std::abs(xR) > (float) (INT_MAX / 3) || 258 | std::abs(xL) > (float) (INT_MAX / 3)) { 259 | return false; 260 | } 261 | 262 | r += (int) (roundf(xR)); 263 | c += (int) (roundf(xC)); 264 | l += (int) (roundf(xL)); 265 | 266 | /**out of border*/ 267 | if (l < 1 || l > octaveLayers 268 | || c < SIFT_IMAGE_BORDER || c >= width - SIFT_IMAGE_BORDER 269 | || r < SIFT_IMAGE_BORDER || r >= height - SIFT_IMAGE_BORDER) { 270 | return false; 271 | } 272 | } 273 | 274 | if (!confirm) { 275 | return false; 276 | } 277 | 278 | int width = doGPyramid[curOctave][l]->width; 279 | 280 | int16_t *cur = doGPyramid[curOctave][l]->data + width * r + c; 281 | int16_t *pre = doGPyramid[curOctave][l - 1]->data + width * r + c; 282 | int16_t *nex = doGPyramid[curOctave][l + 1]->data + width * r + c; 283 | 284 | float dx = (cur[offset[5]] - cur[offset[3]]) * deriveScale; 285 | float dy = (cur[offset[7]] - cur[offset[1]]) * deriveScale; 286 | float ds = (nex[0] - pre[0]) * deriveScale; 287 | 288 | float response = cur[0] * imageScale + 0.5f * (dx * xC + dy * xR + ds * xL); 289 | 290 | if (fabsf(response) * octaveLayers < contrastThreshold) { 291 | return false; 292 | } 293 | 294 | float value2 = 2.0f * cur[0]; 295 | float dxx = (cur[offset[5]] + cur[offset[3]] - value2) * secondDeriveScale; 296 | float dyy = (cur[offset[7]] + cur[offset[1]] - value2) * secondDeriveScale; 297 | float dxy = (cur[offset[8]] + cur[offset[0]] - cur[offset[2]] - cur[offset[6]]) * crossDeriveScale; 298 | 299 | float tr = dxx + dyy; 300 | float det = dxx * dyy - dxy * dxy; 301 | 302 | if (0 > det || tr * tr * edgeThreshold >= (edgeThreshold + 1) * (edgeThreshold + 1) * det) { 303 | return false; 304 | } 305 | 306 | keyPoint.x = (c + xC) * (1 << curOctave); 307 | keyPoint.y = (r + xR) * (1 << curOctave); 308 | 309 | keyPoint.octaveX = c; 310 | keyPoint.octaveY = r; 311 | 312 | keyPoint.octave = curOctave; 313 | keyPoint.octaveLayer = l; 314 | 315 | keyPoint.octaveLayersShift = xL; 316 | 317 | keyPoint.size = sigma * powf(2.f, (l + xL) / octaveLayers) * (1 << curOctave); 318 | keyPoint.octaveSize = sigma * powf(2.f, (l + xL) / octaveLayers); 319 | 320 | /**[-1, 1]*/ 321 | keyPoint.response = response; 322 | 323 | return true; 324 | } 325 | 326 | /** 327 | * calculate hist 328 | */ 329 | float calculateHist(SiftySifty::Mat *image, int x, int y, int radius, float sigma, float *hist, int n) { 330 | int16_t *imageData = image->data; 331 | int width = image->width; 332 | int height = image->height; 333 | 334 | int length = (2 * radius + 1) * (2 * radius + 1); 335 | 336 | float scale = -1.0f / (2.0f * sigma * sigma); 337 | 338 | float *buffer = (float *) malloc(sizeof(float) * (5 * length + n + 4)); 339 | 340 | float *weights = buffer, *dx = weights + length, *dy = dx + length, *ori = dy + length, *mag = ori + length; 341 | float *tmpHist = mag + length + 2; 342 | 343 | memset(tmpHist, 0, sizeof(float) * n); 344 | 345 | int r, c; 346 | int realLength = 0; 347 | for (int i = -radius; i <= radius; ++i) { 348 | r = y + i; 349 | if (0 >= r || r >= (height - 1)) { 350 | continue; 351 | } 352 | 353 | for (int j = -radius; j <= radius; ++j) { 354 | c = x + j; 355 | if (0 >= c || c >= (width - 1)) { 356 | continue; 357 | } 358 | /** 359 | * ^ y 360 | * | 361 | * | 362 | * | 363 | * -----------------------> x 364 | * | 365 | * | 366 | * | 367 | */ 368 | dx[realLength] = (float) (imageData[r * width + c + 1] - imageData[r * width + c - 1]); 369 | dy[realLength] = (float) (imageData[(r - 1) * width + c] - imageData[(r + 1) * width + c]); 370 | 371 | weights[realLength] = (i * i + j * j) * scale; 372 | 373 | realLength++; 374 | } 375 | } 376 | 377 | for (int i = 0; i < realLength; ++i) { 378 | weights[i] = expf(weights[i]); 379 | mag[i] = sqrtf(dx[i] * dx[i] + dy[i] * dy[i]); 380 | ori[i] = atan2f360(dy[i], dx[i]); 381 | } 382 | 383 | for (int i = 0; i < realLength; ++i) { 384 | int index = static_cast(roundf(ori[i] * n / 360.0f)); 385 | 386 | if (index > n) { 387 | index -= n; 388 | } 389 | 390 | if (0 > index) { 391 | index += n; 392 | } 393 | 394 | tmpHist[index] += (weights[i] * mag[i]); 395 | } 396 | 397 | tmpHist[-1] = tmpHist[n - 1]; 398 | tmpHist[-2] = tmpHist[n - 2]; 399 | tmpHist[n] = tmpHist[0]; 400 | tmpHist[n + 1] = tmpHist[1]; 401 | 402 | for (int i = 0; i < n; ++i) { 403 | hist[i] = (tmpHist[i - 2] + tmpHist[i + 2]) * (1.f / 16.f) + 404 | (tmpHist[i - 1] + tmpHist[i + 1]) * (4.f / 16.f) + tmpHist[i] * (6.f / 16.f); 405 | } 406 | 407 | float maxValue = hist[0]; 408 | for (int i = 0; i < n; ++i) { 409 | maxValue = max_value(hist[i], maxValue); 410 | } 411 | 412 | free(buffer); 413 | 414 | return maxValue; 415 | } 416 | 417 | void findKeyPoints(vector &kpts, KeyPoint &keyPoint, float threshold, float *hist, int n) { 418 | for (int i = 0; i < n; ++i) { 419 | int left = (i > 0) ? (i - 1) : (n - 1); 420 | int right = (i < (n - 1)) ? (i + 1) : 0; 421 | 422 | if (hist[i] > hist[left] && hist[i] >= hist[right] && hist[i] >= threshold) { 423 | float bin = i + 0.5f * (hist[left] - hist[right]) / (hist[left] - 2 * hist[i] + hist[right]); 424 | 425 | bin = (bin < 0) ? (n + bin) : ((bin >= n) ? (bin - n) : bin); 426 | 427 | keyPoint.angle = 360.0f - (360.0f / n) * bin; 428 | 429 | if (fabsf(keyPoint.angle - 360.0f) < FLT_EPSILON) { 430 | keyPoint.angle = 0.f; 431 | } 432 | 433 | kpts.push_back(keyPoint); 434 | } 435 | } 436 | } 437 | 438 | /** 439 | * find the extrema point on one pic 440 | * @param gaussPyramid gauss pyramid 441 | * @param doGPyramid dog pyramid 442 | * @param keyPoints store the keyPoints 443 | * @param octave the size of pyramid 444 | * @param octaveLayers the image's number in one layer 445 | * @param contrastThreshold the contrast threshold int sift paper 446 | * @param edgeThreshold the edge threshold in sift paper 447 | * @param valueThreshold the image value threshold 448 | * @param curOctave the current octave 449 | * @param curLayer the curent layer 450 | * @param n 451 | * @param sigma 452 | * @param offset 453 | */ 454 | void findExtremaPointOne(vector * > > &gaussPyramid, 455 | vector * > > &doGPyramid, 456 | vector &keyPoints, 457 | int octave, 458 | int octaveLayers, 459 | float contrastThreshold, 460 | float edgeThreshold, 461 | int valueThreshold, 462 | int curOctave, 463 | int curLayer, 464 | int n, 465 | float sigma, 466 | int *offset) { 467 | /**get the pre current next image*/ 468 | Mat *pre = doGPyramid[curOctave][curLayer - 1]; 469 | Mat *cur = doGPyramid[curOctave][curLayer]; 470 | Mat *nex = doGPyramid[curOctave][curLayer + 1]; 471 | 472 | int width = cur->width; 473 | int height = cur->height; 474 | 475 | int maxThreadNum = getHardwareCPUNum(); 476 | int threadIndex = 0; 477 | 478 | /**remove the border*/ 479 | int stride = max((int) (roundf(1.0f * (height - SIFT_IMAGE_BORDER - SIFT_IMAGE_BORDER) / maxThreadNum)), 1); 480 | 481 | #ifdef _OPENMP 482 | omp_lock_t lock; 483 | omp_init_lock(&lock); 484 | #endif 485 | 486 | #pragma omp parallel for private(threadIndex) 487 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 488 | int start = threadIndex * stride + SIFT_IMAGE_BORDER; 489 | int end = (threadIndex == (maxThreadNum - 1)) ? 490 | (height - SIFT_IMAGE_BORDER) : min_value(start + stride, height - SIFT_IMAGE_BORDER); 491 | 492 | float hist[n]; 493 | 494 | KeyPoint keyPoint; 495 | vector kpts; 496 | 497 | int16_t *prePtr = pre->data + start * width + SIFT_IMAGE_BORDER; 498 | int16_t *curPtr = cur->data + start * width + SIFT_IMAGE_BORDER; 499 | int16_t *nexPtr = nex->data + start * width + SIFT_IMAGE_BORDER; 500 | 501 | for (int y = start; y < end; ++y) { 502 | int16_t *preData = prePtr - 1; 503 | int16_t *curData = curPtr - 1; 504 | int16_t *nexData = nexPtr - 1; 505 | 506 | for (int x = SIFT_IMAGE_BORDER; x < (width - SIFT_IMAGE_BORDER); ++x) { 507 | preData++; 508 | curData++; 509 | nexData++; 510 | 511 | int val = curData[0]; 512 | 513 | if (!(abs(val) > valueThreshold && 514 | ((val > 0 && val >= curData[offset[0]] && val >= curData[offset[1]] && 515 | val >= curData[offset[2]] && val >= curData[offset[3]] && val >= curData[offset[5]] && 516 | val >= curData[offset[6]] && val >= curData[offset[7]] && val >= curData[offset[8]] && 517 | val >= preData[offset[0]] && val >= preData[offset[1]] && val >= preData[offset[2]] && 518 | val >= preData[offset[3]] && val >= preData[offset[4]] && val >= preData[offset[5]] && 519 | val >= preData[offset[6]] && val >= preData[offset[7]] && val >= preData[offset[8]] && 520 | val >= nexData[offset[0]] && val >= nexData[offset[1]] && val >= nexData[offset[2]] && 521 | val >= nexData[offset[3]] && val >= nexData[offset[4]] && val >= nexData[offset[5]] && 522 | val >= nexData[offset[6]] && val >= nexData[offset[7]] && val >= nexData[offset[8]]) || 523 | (val < 0 && val <= curData[offset[0]] && val <= curData[offset[1]] && 524 | val <= curData[offset[2]] && val <= curData[offset[3]] && val <= curData[offset[5]] && 525 | val <= curData[offset[6]] && val <= curData[offset[7]] && val <= curData[offset[8]] && 526 | val <= preData[offset[0]] && val <= preData[offset[1]] && val <= preData[offset[2]] && 527 | val <= preData[offset[3]] && val <= preData[offset[4]] && val <= preData[offset[5]] && 528 | val <= preData[offset[6]] && val <= preData[offset[7]] && val <= preData[offset[8]] && 529 | val <= nexData[offset[0]] && val <= nexData[offset[1]] && val <= nexData[offset[2]] && 530 | val <= nexData[offset[3]] && val <= nexData[offset[4]] && val <= nexData[offset[5]] && 531 | val <= nexData[offset[6]] && val <= nexData[offset[7]] && val <= nexData[offset[8]])))) { 532 | continue; 533 | } 534 | 535 | int extremaR = y, extremaC = x, extremaL = curLayer; 536 | 537 | if (!adjustExtremaPoint(doGPyramid, 538 | keyPoint, 539 | extremaR, extremaC, extremaL, 540 | octaveLayers, 541 | curOctave, 542 | contrastThreshold, 543 | edgeThreshold, 544 | sigma, 545 | offset)) { 546 | continue; 547 | } 548 | 549 | float octaveSigma = keyPoint.octaveSize; 550 | 551 | float maxValue = calculateHist(gaussPyramid[curOctave][extremaL], 552 | extremaC, 553 | extremaR, 554 | (int) roundf(SIFT_ORIENTATION_SIGMA_FCTER * octaveSigma), 555 | SIFT_ORIENTATION_RADIUS * octaveSigma, 556 | hist, 557 | n); 558 | 559 | findKeyPoints(kpts, keyPoint, maxValue * SIFT_ORIENTATION_PEAK_RATIO, hist, n); 560 | } 561 | 562 | prePtr += width; 563 | curPtr += width; 564 | nexPtr += width; 565 | } 566 | 567 | if (!kpts.empty()) { 568 | #ifdef _OPENMP 569 | omp_set_lock(&lock); 570 | #endif 571 | for (auto iter = kpts.begin(); iter < kpts.end(); ++iter) { 572 | keyPoints.push_back(*iter); 573 | } 574 | 575 | #ifdef _OPENMP 576 | omp_unset_lock(&lock); 577 | #endif 578 | } 579 | } 580 | 581 | #ifdef _OPENMP 582 | omp_destroy_lock(&lock); 583 | #endif 584 | } 585 | 586 | /** 587 | * find the extrema point 588 | */ 589 | void findExtremaPoint(vector * >> &gaussPyramid, 590 | vector * >> &doGPyramid, 591 | vector &keyPoints, 592 | int octave, 593 | int octaveLayers, 594 | float contrastThreshold, 595 | float edgeThreshold, 596 | float sigma) { 597 | int n = SIFT_ORIENTATION_HIST_BINS; 598 | int threshold = (int) (0.5f * contrastThreshold / octaveLayers * 255 * SIFT_IMAGE_SCALE); 599 | 600 | int offset[9]; 601 | 602 | for (int o = 0; o < octave; ++o) { 603 | int width = doGPyramid[o][0]->width; 604 | 605 | offset[0] = -width - 1; 606 | offset[1] = -width; 607 | offset[2] = -width + 1; 608 | offset[3] = -1; 609 | offset[4] = 0; 610 | offset[5] = 1; 611 | offset[6] = width - 1; 612 | offset[7] = width; 613 | offset[8] = width + 1; 614 | 615 | for (int l = 1; l <= octaveLayers; ++l) { 616 | findExtremaPointOne(gaussPyramid, 617 | doGPyramid, 618 | keyPoints, 619 | octave, 620 | octaveLayers, 621 | contrastThreshold, 622 | edgeThreshold, 623 | threshold, 624 | o, 625 | l, 626 | n, 627 | sigma, 628 | offset); 629 | } 630 | } 631 | } 632 | 633 | /** 634 | * if the inited image is been doubled, than resize the size 635 | */ 636 | void resizeKeyPoints(vector &keyPoints) { 637 | auto iter = keyPoints.begin(); 638 | 639 | for (; iter < keyPoints.end(); ++iter) { 640 | (*iter).x /= 2.0f; 641 | (*iter).y /= 2.0f; 642 | (*iter).size /= 2.0f; 643 | } 644 | } 645 | 646 | /** 647 | * sort the keypoint 648 | */ 649 | struct KeyPointCMP { 650 | vector keyPoints; 651 | 652 | KeyPointCMP(const vector &keyPoints) { 653 | this->keyPoints = keyPoints; 654 | } 655 | 656 | bool operator()(int i, int j) const { 657 | SiftySifty::KeyPoint kp1 = keyPoints[i]; 658 | SiftySifty::KeyPoint kp2 = keyPoints[j]; 659 | 660 | if (kp1.x != kp2.x) { 661 | return kp1.x < kp2.x; 662 | } 663 | 664 | if (kp1.y != kp2.y) { 665 | return kp1.y < kp2.y; 666 | } 667 | 668 | if (kp1.size != kp2.size) { 669 | return kp1.size < kp2.size; 670 | } 671 | 672 | if (kp1.angle != kp2.angle) { 673 | return kp1.angle < kp2.angle; 674 | } 675 | 676 | return i < j; 677 | } 678 | }; 679 | 680 | /** 681 | * remove the doubled keypoint 682 | * @param keyPoints 683 | */ 684 | void removeDoubleKeyPoints(vector &keyPoints) { 685 | int i, j; 686 | int n = keyPoints.size(); 687 | 688 | /**sorted keypoint*/ 689 | vector sortedIndex(n); 690 | 691 | /**mark the keypoint*/ 692 | vector map(n, 1); 693 | 694 | for (int i = 0; i < sortedIndex.size(); ++i) { 695 | sortedIndex[i] = i; 696 | } 697 | 698 | /**sort the keypoint*/ 699 | std::sort(sortedIndex.begin(), sortedIndex.end(), KeyPointCMP(keyPoints)); 700 | 701 | for (i = 1, j = 0; i < n; ++i) { 702 | KeyPoint kp1 = keyPoints[sortedIndex[j]]; 703 | KeyPoint kp2 = keyPoints[sortedIndex[i]]; 704 | 705 | if (kp1.x != kp2.x 706 | || kp1.y != kp2.y 707 | || kp1.size != kp2.size 708 | || kp1.angle != kp2.angle) { 709 | j = i; 710 | } else { 711 | map[i] = 0; 712 | } 713 | } 714 | 715 | for (i = 0, j = 0; i < n; ++i) { 716 | if (1 == map[i]) { 717 | if (i != j) { 718 | keyPoints[j] = keyPoints[i]; 719 | } 720 | 721 | j++; 722 | } 723 | } 724 | 725 | keyPoints.resize(j); 726 | } 727 | 728 | /** 729 | * calculate the keypoint's descriptor 730 | * @param image 731 | * @param x 732 | * @param y 733 | * @param angle 734 | * @param scale 735 | * @param d 736 | * @param n 737 | * @param descriptor 738 | */ 739 | void calculateDescriptorOne(Mat *image, 740 | int x, int y, 741 | float angle, 742 | float scale, 743 | int d, 744 | int n, 745 | float *descriptor) { 746 | int16_t *data = image->data; 747 | int width = image->width; 748 | int height = image->height; 749 | 750 | float expScale = -1.f / (d * d * 0.5f); 751 | 752 | float sin = sinf(angle * PI / 180.0f); 753 | float cos = cosf(angle * PI / 180.0f); 754 | 755 | /**360 to n*/ 756 | float binPerRad = n / 360.f; 757 | 758 | float histWidth = SIFT_DESCRIPTOR_SCAE_FCTER * scale; 759 | 760 | int radius = (int) (roundf(histWidth * 1.4142135623730951f * (d + 1) * 0.5f)); 761 | 762 | radius = min_value(radius, (int) sqrt(width * width + height * height)); 763 | 764 | sin /= histWidth; 765 | cos /= histWidth; 766 | 767 | int len = (radius + radius + 1) * (radius + radius + 1); 768 | int histLen = (d + 2) * (d + 2) * (n + 2); 769 | 770 | float *buffer = (float *) malloc(sizeof(float) * (len * 7 + histLen)); 771 | float *dx = buffer, *dy = dx + len, *mag = dy + len, *ori = mag + len, *weight = ori + len; 772 | float *rBin = weight + len, *cBin = rBin + len, *hist = cBin + len; 773 | 774 | memset(hist, 0, sizeof(float) * histLen); 775 | 776 | int realLen = 0; 777 | for (int i = -radius; i <= radius; ++i) { 778 | for (int j = -radius; j <= radius; ++j) { 779 | float cRotate = cos * j - sin * i; 780 | float rRotate = sin * j + cos * i; 781 | 782 | float rBin0 = rRotate + d / 2.0f - 0.5f; 783 | float cBin0 = cRotate + d / 2.0f - 0.5f; 784 | 785 | int realX = x + j; 786 | int realY = y + i; 787 | 788 | if (rBin0 > -1 && rBin0 < d && cBin0 > -1 && cBin0 < d 789 | && realX > 0 && realX < width - 1 && realY > 0 && realY < height - 1) { 790 | dx[realLen] = (float) (data[realY * width + realX + 1] - data[realY * width + realX - 1]); 791 | dy[realLen] = (float) (data[(realY - 1) * width + realX] - data[(realY + 1) * width + realX]); 792 | 793 | rBin[realLen] = rBin0; 794 | cBin[realLen] = cBin0; 795 | 796 | weight[realLen] = (rRotate * rRotate + cRotate * cRotate) * expScale; 797 | 798 | realLen++; 799 | } 800 | } 801 | } 802 | 803 | for (int i = 0; i < realLen; ++i) { 804 | ori[i] = atan2f360(dy[i], dx[i]); 805 | mag[i] = sqrtf(dy[i] * dy[i] + dx[i] * dx[i]); 806 | weight[i] = exp(weight[i]); 807 | } 808 | 809 | for (int i = 0; i < realLen; ++i) { 810 | float rbin = rBin[i]; 811 | float cbin = cBin[i]; 812 | float obin = (ori[i] - angle) * binPerRad; 813 | 814 | float magnitude = mag[i] * weight[i]; 815 | 816 | int r0 = (int) floorf(rbin); 817 | int c0 = (int) floorf(cbin); 818 | int o0 = (int) floorf(obin); 819 | 820 | rbin -= r0; 821 | cbin -= c0; 822 | obin -= o0; 823 | 824 | if (o0 < 0) { 825 | o0 += n; 826 | } 827 | if (o0 >= n) { 828 | o0 -= n; 829 | } 830 | 831 | float v_r1 = magnitude * rbin, v_r0 = magnitude - v_r1; 832 | float v_rc11 = v_r1 * cbin, v_rc10 = v_r1 - v_rc11; 833 | float v_rc01 = v_r0 * cbin, v_rc00 = v_r0 - v_rc01; 834 | float v_rco111 = v_rc11 * obin, v_rco110 = v_rc11 - v_rco111; 835 | float v_rco101 = v_rc10 * obin, v_rco100 = v_rc10 - v_rco101; 836 | float v_rco011 = v_rc01 * obin, v_rco010 = v_rc01 - v_rco011; 837 | float v_rco001 = v_rc00 * obin, v_rco000 = v_rc00 - v_rco001; 838 | 839 | int idx = ((r0 + 1) * (d + 2) + c0 + 1) * (n + 2) + o0; 840 | 841 | hist[idx] += v_rco000; 842 | hist[idx + 1] += v_rco001; 843 | hist[idx + (n + 2)] += v_rco010; 844 | hist[idx + (n + 3)] += v_rco011; 845 | hist[idx + (d + 2) * (n + 2)] += v_rco100; 846 | hist[idx + (d + 2) * (n + 2) + 1] += v_rco101; 847 | hist[idx + (d + 3) * (n + 2)] += v_rco110; 848 | hist[idx + (d + 3) * (n + 2) + 1] += v_rco111; 849 | } 850 | 851 | for (int i = 0; i < d; ++i) { 852 | for (int j = 0; j < d; ++j) { 853 | int idx = ((i + 1) * (d + 2) + (j + 1)) * (n + 2); 854 | hist[idx] += hist[idx + n]; 855 | hist[idx + 1] += hist[idx + n + 1]; 856 | 857 | for (int k = 0; k < n; k++) { 858 | descriptor[(i * d + j) * n + k] = hist[idx + k]; 859 | } 860 | } 861 | } 862 | 863 | len = d * d * n; 864 | float norm = 0; 865 | for (int i = 0; i < len; ++i) { 866 | norm += descriptor[i] * descriptor[i]; 867 | } 868 | 869 | float threshold = sqrt(norm) * SIFT_DESCRIPTOR_MAGNITUDE_THRESHOLD; 870 | 871 | norm = 0; 872 | for (int i = 0; i < len; ++i) { 873 | descriptor[i] = min_value(threshold, descriptor[i]); 874 | norm += descriptor[i] * descriptor[i]; 875 | } 876 | 877 | norm = SIFT_DESCRIPTOR_FCTOR / max_value(sqrt(norm), FLT_EPSILON); 878 | for (int i = 0; i < len; ++i) { 879 | descriptor[i] = (uint8_t) (descriptor[i] * norm); 880 | } 881 | 882 | free(buffer); 883 | } 884 | 885 | void calculateDescriptor(vector *> > &gaussPyramid, vector &keyPoints, int d, int n) { 886 | for (auto iter = keyPoints.begin(); iter < keyPoints.end(); ++iter) { 887 | int octave = (*iter).octave; 888 | int octaveLayer = (*iter).octaveLayer; 889 | 890 | int x = (*iter).octaveX; 891 | int y = (*iter).octaveY; 892 | 893 | float angle = 360.f - (*iter).angle; 894 | float size = (*iter).octaveSize; 895 | 896 | (*iter).descriptor = (float *) malloc(sizeof(float) * d * d * n); 897 | 898 | calculateDescriptorOne(gaussPyramid[octave][octaveLayer], 899 | x, y, 900 | angle, size, 901 | d, n, 902 | (*iter).descriptor); 903 | } 904 | } 905 | 906 | /** 907 | * calculate the descriptor 908 | * @param gaussPyramid 909 | * @param keyPoints 910 | * @param d 911 | * @param n 912 | */ 913 | void calculateDescriptor1(vector *> > &gaussPyramid, vector &keyPoints, int d, int n) { 914 | int size = keyPoints.size(); 915 | 916 | int maxThreadNum = getHardwareCPUNum(); 917 | int threadIndex = 0; 918 | 919 | /**remove the border*/ 920 | int stride = max((int) (roundf(1.0f * size / maxThreadNum)), 1); 921 | 922 | #pragma omp parallel for private(threadIndex) 923 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 924 | int start = threadIndex * stride; 925 | int end = (threadIndex == (maxThreadNum - 1)) ? size : min_value(start + stride, size); 926 | 927 | for (int i = start; i < end; ++i) { 928 | int octave = keyPoints[i].octave; 929 | int octaveLayer = keyPoints[i].octaveLayer; 930 | 931 | int x = keyPoints[i].octaveX; 932 | int y = keyPoints[i].octaveY; 933 | 934 | float angle = 360.f - keyPoints[i].angle; 935 | float size = keyPoints[i].octaveSize; 936 | 937 | keyPoints[i].descriptor = (float *) malloc(sizeof(float) * d * d * n); 938 | 939 | calculateDescriptorOne(gaussPyramid[octave][octaveLayer], 940 | x, y, 941 | angle, size, 942 | d, n, 943 | keyPoints[i].descriptor); 944 | } 945 | } 946 | } 947 | 948 | template 949 | void deleteMatPyramid(vector * >> &pyramid) { 950 | auto i1 = pyramid.begin(); 951 | 952 | for (; i1 < pyramid.end(); ++i1) { 953 | auto i2 = (*i1).begin(); 954 | 955 | for (; i2 < (*i1).end(); ++i2) { 956 | deleteMat(*i2); 957 | } 958 | 959 | (*i1).clear(); 960 | } 961 | 962 | pyramid.clear(); 963 | } 964 | 965 | void sift(uint8_t *image, int width, int height, 966 | vector &keyPoints, 967 | int octaveLayers, 968 | float sigma, 969 | float contrastThreshold, 970 | int edgeThreshold, 971 | bool doubleInitImage, 972 | int descriptorWidth, 973 | int descriptorHistBin) { 974 | 975 | Mat *base = initBaseImage(image, width, height, doubleInitImage, sigma, SIFT_IMAGE_SCALE_SHIFT); 976 | 977 | int octave = (int) (round(log(min(base->width, base->height)) / log(2) - 2)); 978 | 979 | vector *> > gaussPyramid = buildGaussPyramid(base, octave, octaveLayers, sigma); 980 | 981 | vector *> > dogPyramid = buildDoGPyramid(gaussPyramid, octave, octaveLayers); 982 | 983 | findExtremaPoint(gaussPyramid, 984 | dogPyramid, 985 | keyPoints, 986 | octave, 987 | octaveLayers, 988 | contrastThreshold, 989 | edgeThreshold, 990 | sigma); 991 | 992 | if (doubleInitImage) { 993 | resizeKeyPoints(keyPoints); 994 | } 995 | 996 | removeDoubleKeyPoints(keyPoints); 997 | 998 | /**calcaulate descip*/ 999 | calculateDescriptor1(gaussPyramid, keyPoints, descriptorWidth, descriptorHistBin); 1000 | 1001 | deleteMatPyramid(gaussPyramid); 1002 | deleteMatPyramid(dogPyramid); 1003 | } 1004 | 1005 | void sift(uint8_t *image, int width, int height, vector &keyPoints) { 1006 | sift(image, width, height, 1007 | keyPoints, 1008 | SIFT_OCTAVE_LAYERS, 1009 | SIFT_SIGMA, 1010 | SIFT_CONTRAST_THRESHOLD, 1011 | SIFT_EDGE_THESHOLD, 1012 | SIFT_DOUBLE_INITED_IMAGE, 1013 | SIFT_DESCRIPTOR_WIDTH, 1014 | SIFT_DESCRIPTOR_HIST_BIN); 1015 | } 1016 | 1017 | } 1018 | 1019 | 1020 | 1021 | 1022 | 1023 | 1024 | 1025 | 1026 | 1027 | 1028 | 1029 | 1030 | -------------------------------------------------------------------------------- /src/linearfilter.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by yanyuanchi on 2017/1/18. 3 | */ 4 | 5 | #include 6 | 7 | #include "utils.h" 8 | #include "linearfilter.h" 9 | 10 | namespace SiftySifty { 11 | void linearFilterHorizonByKernel7(uint8_t *src, 12 | uint8_t *dst, 13 | int width, 14 | int height, 15 | int (*mult)[256], 16 | int delta, 17 | int shift) { 18 | 19 | int maxThreadNum = getHardwareCPUNum(); 20 | int threadIndex = 0; 21 | 22 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 23 | 24 | #pragma omp parallel for private(threadIndex) 25 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 26 | int start = threadIndex * stride; 27 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 28 | 29 | uint8_t *srcData = src + start * (width + 6); 30 | uint8_t *dstData = dst + start * width; 31 | 32 | uint8_t *realSrcData; 33 | 34 | int sum; 35 | 36 | for (int y = start; y < end; ++y) { 37 | for (int x = 0; x < width; ++x) { 38 | realSrcData = srcData + x; 39 | sum = delta; 40 | 41 | sum += mult[0][realSrcData[0]]; 42 | sum += mult[1][realSrcData[1]]; 43 | sum += mult[2][realSrcData[2]]; 44 | sum += mult[3][realSrcData[3]]; 45 | sum += mult[4][realSrcData[4]]; 46 | sum += mult[5][realSrcData[5]]; 47 | sum += mult[6][realSrcData[6]]; 48 | 49 | dstData[x] = static_cast((sum + delta) >> shift); 50 | } 51 | 52 | srcData += (width + 6); 53 | dstData += width; 54 | } 55 | } 56 | } 57 | 58 | void linearFilterHorizonByKernel9(uint8_t *src, 59 | uint8_t *dst, 60 | int width, 61 | int height, 62 | int (*mult)[256], 63 | int delta, 64 | int shift) { 65 | int maxThreadNum = getHardwareCPUNum(); 66 | int threadIndex = 0; 67 | 68 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 69 | 70 | #pragma omp parallel for private(threadIndex) 71 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 72 | int start = threadIndex * stride; 73 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 74 | 75 | uint8_t *srcData = src + start * (width + 8); 76 | uint8_t *dstData = dst + start * width; 77 | 78 | uint8_t *realSrcData; 79 | 80 | int sum; 81 | 82 | for (int y = start; y < end; ++y) { 83 | for (int x = 0; x < width; ++x) { 84 | realSrcData = srcData + x; 85 | sum = delta; 86 | 87 | sum += mult[0][realSrcData[0]]; 88 | sum += mult[1][realSrcData[1]]; 89 | sum += mult[2][realSrcData[2]]; 90 | sum += mult[3][realSrcData[3]]; 91 | sum += mult[4][realSrcData[4]]; 92 | sum += mult[5][realSrcData[5]]; 93 | sum += mult[6][realSrcData[6]]; 94 | sum += mult[7][realSrcData[7]]; 95 | sum += mult[8][realSrcData[8]]; 96 | 97 | dstData[x] = static_cast((sum + delta) >> shift); 98 | } 99 | 100 | srcData += (width + 8); 101 | dstData += width; 102 | } 103 | } 104 | } 105 | 106 | void linearFilterHorizonByKernel11(uint8_t *src, 107 | uint8_t *dst, 108 | int width, 109 | int height, 110 | int (*mult)[256], 111 | int delta, 112 | int shift) { 113 | int maxThreadNum = getHardwareCPUNum(); 114 | int threadIndex = 0; 115 | 116 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 117 | 118 | #pragma omp parallel for private(threadIndex) 119 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 120 | int start = threadIndex * stride; 121 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 122 | 123 | uint8_t *srcData = src + start * (width + 10); 124 | uint8_t *dstData = dst + start * width; 125 | 126 | uint8_t *realSrcData; 127 | 128 | int sum; 129 | 130 | for (int y = start; y < end; ++y) { 131 | for (int x = 0; x < width; ++x) { 132 | realSrcData = srcData + x; 133 | sum = delta; 134 | 135 | sum += mult[0][realSrcData[0]]; 136 | sum += mult[1][realSrcData[1]]; 137 | sum += mult[2][realSrcData[2]]; 138 | sum += mult[3][realSrcData[3]]; 139 | sum += mult[4][realSrcData[4]]; 140 | sum += mult[5][realSrcData[5]]; 141 | sum += mult[6][realSrcData[6]]; 142 | sum += mult[7][realSrcData[7]]; 143 | sum += mult[8][realSrcData[8]]; 144 | sum += mult[9][realSrcData[9]]; 145 | sum += mult[10][realSrcData[10]]; 146 | 147 | dstData[x] = static_cast((sum + delta) >> shift); 148 | } 149 | 150 | srcData += (width + 10); 151 | dstData += width; 152 | } 153 | } 154 | } 155 | 156 | 157 | void linearFilterHorizonByKernel13(uint8_t *src, 158 | uint8_t *dst, 159 | int width, 160 | int height, 161 | int (*mult)[256], 162 | int delta, 163 | int shift) { 164 | int maxThreadNum = getHardwareCPUNum(); 165 | int threadIndex = 0; 166 | 167 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 168 | 169 | #pragma omp parallel for private(threadIndex) 170 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 171 | int start = threadIndex * stride; 172 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 173 | 174 | uint8_t *srcData = src + start * (width + 12); 175 | uint8_t *dstData = dst + start * width; 176 | 177 | uint8_t *realSrcData; 178 | 179 | int sum; 180 | 181 | for (int y = start; y < end; ++y) { 182 | for (int x = 0; x < width; ++x) { 183 | realSrcData = srcData + x; 184 | sum = delta; 185 | 186 | sum += mult[0][realSrcData[0]]; 187 | sum += mult[1][realSrcData[1]]; 188 | sum += mult[2][realSrcData[2]]; 189 | sum += mult[3][realSrcData[3]]; 190 | sum += mult[4][realSrcData[4]]; 191 | sum += mult[5][realSrcData[5]]; 192 | sum += mult[6][realSrcData[6]]; 193 | sum += mult[7][realSrcData[7]]; 194 | sum += mult[8][realSrcData[8]]; 195 | sum += mult[9][realSrcData[9]]; 196 | sum += mult[10][realSrcData[10]]; 197 | sum += mult[11][realSrcData[11]]; 198 | sum += mult[12][realSrcData[12]]; 199 | 200 | dstData[x] = static_cast((sum + delta) >> shift); 201 | } 202 | 203 | srcData += (width + 12); 204 | dstData += width; 205 | } 206 | } 207 | } 208 | 209 | void linearFilterHorizonByKernel15(uint8_t *src, 210 | uint8_t *dst, 211 | int width, 212 | int height, 213 | int (*mult)[256], 214 | int delta, 215 | int shift) { 216 | int maxThreadNum = getHardwareCPUNum(); 217 | int threadIndex = 0; 218 | 219 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 220 | 221 | #pragma omp parallel for private(threadIndex) 222 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 223 | int start = threadIndex * stride; 224 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 225 | 226 | uint8_t *srcData = src + start * (width + 14); 227 | uint8_t *dstData = dst + start * width; 228 | 229 | uint8_t *realSrcData; 230 | 231 | int sum; 232 | 233 | for (int y = start; y < end; ++y) { 234 | for (int x = 0; x < width; ++x) { 235 | realSrcData = srcData + x; 236 | sum = delta; 237 | 238 | sum += mult[0][realSrcData[0]]; 239 | sum += mult[1][realSrcData[1]]; 240 | sum += mult[2][realSrcData[2]]; 241 | sum += mult[3][realSrcData[3]]; 242 | sum += mult[4][realSrcData[4]]; 243 | sum += mult[5][realSrcData[5]]; 244 | sum += mult[6][realSrcData[6]]; 245 | sum += mult[7][realSrcData[7]]; 246 | sum += mult[8][realSrcData[8]]; 247 | sum += mult[9][realSrcData[9]]; 248 | sum += mult[10][realSrcData[10]]; 249 | sum += mult[11][realSrcData[11]]; 250 | sum += mult[12][realSrcData[12]]; 251 | sum += mult[13][realSrcData[13]]; 252 | sum += mult[14][realSrcData[14]]; 253 | 254 | dstData[x] = static_cast((sum + delta) >> shift); 255 | } 256 | 257 | srcData += (width + 14); 258 | dstData += width; 259 | } 260 | } 261 | } 262 | 263 | void linearFilterHorizonByKernel17(uint8_t *src, 264 | uint8_t *dst, 265 | int width, 266 | int height, 267 | int (*mult)[256], 268 | int delta, 269 | int shift) { 270 | int maxThreadNum = getHardwareCPUNum(); 271 | int threadIndex = 0; 272 | 273 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 274 | 275 | #pragma omp parallel for private(threadIndex) 276 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 277 | int start = threadIndex * stride; 278 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 279 | 280 | uint8_t *srcData = src + start * (width + 16); 281 | uint8_t *dstData = dst + start * width; 282 | 283 | uint8_t *realSrcData; 284 | 285 | int sum; 286 | 287 | for (int y = start; y < end; ++y) { 288 | for (int x = 0; x < width; ++x) { 289 | realSrcData = srcData + x; 290 | sum = delta; 291 | 292 | sum += mult[0][realSrcData[0]]; 293 | sum += mult[1][realSrcData[1]]; 294 | sum += mult[2][realSrcData[2]]; 295 | sum += mult[3][realSrcData[3]]; 296 | sum += mult[4][realSrcData[4]]; 297 | sum += mult[5][realSrcData[5]]; 298 | sum += mult[6][realSrcData[6]]; 299 | sum += mult[7][realSrcData[7]]; 300 | sum += mult[8][realSrcData[8]]; 301 | sum += mult[9][realSrcData[9]]; 302 | sum += mult[10][realSrcData[10]]; 303 | sum += mult[11][realSrcData[11]]; 304 | sum += mult[12][realSrcData[12]]; 305 | sum += mult[13][realSrcData[13]]; 306 | sum += mult[14][realSrcData[14]]; 307 | sum += mult[15][realSrcData[15]]; 308 | sum += mult[16][realSrcData[16]]; 309 | 310 | dstData[x] = static_cast((sum + delta) >> shift); 311 | } 312 | 313 | srcData += (width + 16); 314 | dstData += width; 315 | } 316 | } 317 | } 318 | 319 | void linearFilterHorizonByKernel19(uint8_t *src, 320 | uint8_t *dst, 321 | int width, 322 | int height, 323 | int (*mult)[256], 324 | int delta, 325 | int shift) { 326 | int maxThreadNum = getHardwareCPUNum(); 327 | int threadIndex = 0; 328 | 329 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 330 | 331 | #pragma omp parallel for private(threadIndex) 332 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 333 | int start = threadIndex * stride; 334 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 335 | 336 | uint8_t *srcData = src + start * (width + 18); 337 | uint8_t *dstData = dst + start * width; 338 | 339 | uint8_t *realSrcData; 340 | 341 | int sum; 342 | 343 | for (int y = start; y < end; ++y) { 344 | for (int x = 0; x < width; ++x) { 345 | realSrcData = srcData + x; 346 | sum = delta; 347 | 348 | sum += mult[0][realSrcData[0]]; 349 | sum += mult[1][realSrcData[1]]; 350 | sum += mult[2][realSrcData[2]]; 351 | sum += mult[3][realSrcData[3]]; 352 | sum += mult[4][realSrcData[4]]; 353 | sum += mult[5][realSrcData[5]]; 354 | sum += mult[6][realSrcData[6]]; 355 | sum += mult[7][realSrcData[7]]; 356 | sum += mult[8][realSrcData[8]]; 357 | sum += mult[9][realSrcData[9]]; 358 | sum += mult[10][realSrcData[10]]; 359 | sum += mult[11][realSrcData[11]]; 360 | sum += mult[12][realSrcData[12]]; 361 | sum += mult[13][realSrcData[13]]; 362 | sum += mult[14][realSrcData[14]]; 363 | sum += mult[15][realSrcData[15]]; 364 | sum += mult[16][realSrcData[16]]; 365 | sum += mult[17][realSrcData[17]]; 366 | sum += mult[18][realSrcData[18]]; 367 | 368 | dstData[x] = static_cast((sum + delta) >> shift); 369 | } 370 | 371 | srcData += (width + 18); 372 | dstData += width; 373 | } 374 | } 375 | } 376 | 377 | void linearFilterHorizonByKernel(uint8_t *src, 378 | uint8_t *dst, 379 | int width, 380 | int height, 381 | int (*mult)[256], 382 | int delta, 383 | int shift, 384 | int size) { 385 | int radius = (size - 1) / 2; 386 | 387 | int maxThreadNum = getHardwareCPUNum(); 388 | int threadIndex = 0; 389 | 390 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 391 | 392 | #pragma omp parallel for private(threadIndex) 393 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 394 | int start = threadIndex * stride; 395 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 396 | 397 | uint8_t *srcData = src + start * (width + radius + radius); 398 | uint8_t *dstData = dst + start * width; 399 | 400 | uint8_t *realSrcData; 401 | 402 | int sum; 403 | 404 | for (int y = start; y < end; ++y) { 405 | for (int x = 0; x < width; ++x) { 406 | realSrcData = srcData + x; 407 | sum = delta; 408 | 409 | for (int k = 0; k < size; ++k) { 410 | sum += mult[k][realSrcData[k]]; 411 | } 412 | 413 | dstData[x] = static_cast((sum + delta) >> shift); 414 | } 415 | 416 | srcData += width + radius + radius; 417 | dstData += width; 418 | } 419 | } 420 | } 421 | 422 | void linearFilterHorizon(uint8_t *src, 423 | uint8_t *dst, 424 | int width, 425 | int height, 426 | int (*mult)[256], 427 | int delta, 428 | int shift, 429 | int size) { 430 | int radius = (size - 1) / 2; 431 | 432 | uint8_t *tmp = new uint8_t[(width + radius + radius) * height]; 433 | 434 | /** 435 | * -------------------------------- 436 | * | | | | 437 | * |radius | |radius | 438 | * | | | | 439 | * | | | | 440 | * | | | | 441 | * | | | | 442 | * -------------------------------- 443 | */ 444 | uint8_t *srcOffset = src; 445 | uint8_t *tmpOffset = tmp; 446 | 447 | for (int y = 0; y < height; ++y) { 448 | std::fill(tmpOffset, 449 | tmpOffset + radius, 450 | srcOffset[0]); 451 | 452 | memcpy(tmpOffset + radius, 453 | srcOffset, 454 | sizeof(uint8_t) * width); 455 | 456 | std::fill(tmpOffset + radius + width, 457 | tmpOffset + +radius + radius + width, 458 | srcOffset[width - 1]); 459 | 460 | srcOffset += width; 461 | tmpOffset += width + radius + radius; 462 | } 463 | 464 | if (7 == size) { 465 | linearFilterHorizonByKernel7(tmp, dst, width, height, mult, delta, shift); 466 | } else if (9 == size) { 467 | linearFilterHorizonByKernel9(tmp, dst, width, height, mult, delta, shift); 468 | } else if (11 == size) { 469 | linearFilterHorizonByKernel11(tmp, dst, width, height, mult, delta, shift); 470 | } else if (13 == size) { 471 | linearFilterHorizonByKernel13(tmp, dst, width, height, mult, delta, shift); 472 | } else if (15 == size) { 473 | linearFilterHorizonByKernel15(tmp, dst, width, height, mult, delta, shift); 474 | } else if (17 == size) { 475 | linearFilterHorizonByKernel17(tmp, dst, width, height, mult, delta, shift); 476 | } else if (19 == size) { 477 | linearFilterHorizonByKernel19(tmp, dst, width, height, mult, delta, shift); 478 | } else { 479 | linearFilterHorizonByKernel(tmp, dst, width, height, mult, delta, shift, size); 480 | } 481 | 482 | delete[] tmp; 483 | } 484 | 485 | 486 | void linearFilterVerticalByKernel7(uint8_t *src, 487 | uint8_t *dst, 488 | int width, 489 | int height, 490 | int (*mult)[256], 491 | int delta, 492 | int shift) { 493 | int maxThreadNum = getHardwareCPUNum(); 494 | int threadIndex = 0; 495 | 496 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 497 | 498 | #pragma omp parallel for private(threadIndex) 499 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 500 | int start = threadIndex * stride; 501 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 502 | 503 | uint8_t *srcData = src + start * width; 504 | uint8_t *dstData = dst + start * width; 505 | 506 | uint8_t *realSrcData; 507 | 508 | int sum; 509 | 510 | for (int y = start; y < end; ++y) { 511 | for (int x = 0; x < width; ++x) { 512 | realSrcData = srcData + x; 513 | 514 | sum = delta; 515 | 516 | sum += mult[0][realSrcData[0]]; 517 | 518 | realSrcData += width; 519 | sum += mult[1][realSrcData[0]]; 520 | 521 | realSrcData += width; 522 | sum += mult[2][realSrcData[0]]; 523 | 524 | realSrcData += width; 525 | sum += mult[3][realSrcData[0]]; 526 | 527 | realSrcData += width; 528 | sum += mult[4][realSrcData[0]]; 529 | 530 | realSrcData += width; 531 | sum += mult[5][realSrcData[0]]; 532 | 533 | realSrcData += width; 534 | sum += mult[6][realSrcData[0]]; 535 | 536 | dstData[x] = static_cast((sum + delta) >> shift); 537 | } 538 | 539 | srcData += width; 540 | dstData += width; 541 | } 542 | } 543 | } 544 | 545 | void linearFilterVerticalByKernel9(uint8_t *src, 546 | uint8_t *dst, 547 | int width, 548 | int height, 549 | int (*mult)[256], 550 | int delta, 551 | int shift) { 552 | int maxThreadNum = getHardwareCPUNum(); 553 | int threadIndex = 0; 554 | 555 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 556 | 557 | #pragma omp parallel for private(threadIndex) 558 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 559 | int start = threadIndex * stride; 560 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 561 | 562 | uint8_t *srcData = src + start * width; 563 | uint8_t *dstData = dst + start * width; 564 | 565 | uint8_t *realSrcData; 566 | 567 | int sum; 568 | 569 | for (int y = start; y < end; ++y) { 570 | for (int x = 0; x < width; ++x) { 571 | realSrcData = srcData + x; 572 | sum = delta; 573 | 574 | sum += mult[0][realSrcData[0]]; 575 | 576 | realSrcData += width; 577 | sum += mult[1][realSrcData[0]]; 578 | 579 | realSrcData += width; 580 | sum += mult[2][realSrcData[0]]; 581 | 582 | realSrcData += width; 583 | sum += mult[3][realSrcData[0]]; 584 | 585 | realSrcData += width; 586 | sum += mult[4][realSrcData[0]]; 587 | 588 | realSrcData += width; 589 | sum += mult[5][realSrcData[0]]; 590 | 591 | realSrcData += width; 592 | sum += mult[6][realSrcData[0]]; 593 | 594 | realSrcData += width; 595 | sum += mult[7][realSrcData[0]]; 596 | 597 | realSrcData += width; 598 | sum += mult[8][realSrcData[0]]; 599 | 600 | dstData[x] = static_cast((sum + delta) >> shift); 601 | } 602 | 603 | srcData += width; 604 | dstData += width; 605 | } 606 | } 607 | } 608 | 609 | void linearFilterVerticalByKernel11(uint8_t *src, 610 | uint8_t *dst, 611 | int width, 612 | int height, 613 | int (*mult)[256], 614 | int delta, 615 | int shift) { 616 | int maxThreadNum = getHardwareCPUNum(); 617 | int threadIndex = 0; 618 | 619 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 620 | 621 | #pragma omp parallel for private(threadIndex) 622 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 623 | int start = threadIndex * stride; 624 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 625 | 626 | uint8_t *srcData = src + start * width; 627 | uint8_t *dstData = dst + start * width; 628 | 629 | uint8_t *realSrcData; 630 | 631 | int sum; 632 | 633 | for (int y = start; y < end; ++y) { 634 | for (int x = 0; x < width; ++x) { 635 | realSrcData = srcData + x; 636 | sum = delta; 637 | 638 | sum += mult[0][realSrcData[0]]; 639 | 640 | realSrcData += width; 641 | sum += mult[1][realSrcData[0]]; 642 | 643 | realSrcData += width; 644 | sum += mult[2][realSrcData[0]]; 645 | 646 | realSrcData += width; 647 | sum += mult[3][realSrcData[0]]; 648 | 649 | realSrcData += width; 650 | sum += mult[4][realSrcData[0]]; 651 | 652 | realSrcData += width; 653 | sum += mult[5][realSrcData[0]]; 654 | 655 | realSrcData += width; 656 | sum += mult[6][realSrcData[0]]; 657 | 658 | realSrcData += width; 659 | sum += mult[7][realSrcData[0]]; 660 | 661 | realSrcData += width; 662 | sum += mult[8][realSrcData[0]]; 663 | 664 | realSrcData += width; 665 | sum += mult[9][realSrcData[0]]; 666 | 667 | realSrcData += width; 668 | sum += mult[10][realSrcData[0]]; 669 | 670 | dstData[x] = static_cast((sum + delta) >> shift); 671 | } 672 | 673 | srcData += width; 674 | dstData += width; 675 | } 676 | } 677 | } 678 | 679 | void linearFilterVerticalByKernel13(uint8_t *src, 680 | uint8_t *dst, 681 | int width, 682 | int height, 683 | int (*mult)[256], 684 | int delta, 685 | int shift) { 686 | int maxThreadNum = getHardwareCPUNum(); 687 | int threadIndex = 0; 688 | 689 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 690 | 691 | #pragma omp parallel for private(threadIndex) 692 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 693 | int start = threadIndex * stride; 694 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 695 | 696 | uint8_t *srcData = src + start * width; 697 | uint8_t *dstData = dst + start * width; 698 | 699 | uint8_t *realSrcData; 700 | 701 | int sum; 702 | 703 | for (int y = start; y < end; ++y) { 704 | for (int x = 0; x < width; ++x) { 705 | realSrcData = srcData + x; 706 | sum = delta; 707 | 708 | sum += mult[0][realSrcData[0]]; 709 | 710 | realSrcData += width; 711 | sum += mult[1][realSrcData[0]]; 712 | 713 | realSrcData += width; 714 | sum += mult[2][realSrcData[0]]; 715 | 716 | realSrcData += width; 717 | sum += mult[3][realSrcData[0]]; 718 | 719 | realSrcData += width; 720 | sum += mult[4][realSrcData[0]]; 721 | 722 | realSrcData += width; 723 | sum += mult[5][realSrcData[0]]; 724 | 725 | realSrcData += width; 726 | sum += mult[6][realSrcData[0]]; 727 | 728 | realSrcData += width; 729 | sum += mult[7][realSrcData[0]]; 730 | 731 | realSrcData += width; 732 | sum += mult[8][realSrcData[0]]; 733 | 734 | realSrcData += width; 735 | sum += mult[9][realSrcData[0]]; 736 | 737 | realSrcData += width; 738 | sum += mult[10][realSrcData[0]]; 739 | 740 | realSrcData += width; 741 | sum += mult[11][realSrcData[0]]; 742 | 743 | realSrcData += width; 744 | sum += mult[12][realSrcData[0]]; 745 | 746 | dstData[x] = static_cast((sum + delta) >> shift); 747 | } 748 | 749 | srcData += width; 750 | dstData += width; 751 | } 752 | } 753 | } 754 | 755 | void linearFilterVerticalByKernel15(uint8_t *src, 756 | uint8_t *dst, 757 | int width, 758 | int height, 759 | int (*mult)[256], 760 | int delta, 761 | int shift) { 762 | int maxThreadNum = getHardwareCPUNum(); 763 | int threadIndex = 0; 764 | 765 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 766 | 767 | #pragma omp parallel for private(threadIndex) 768 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 769 | int start = threadIndex * stride; 770 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 771 | 772 | uint8_t *srcData = src + start * width; 773 | uint8_t *dstData = dst + start * width; 774 | 775 | uint8_t *realSrcData; 776 | 777 | int sum; 778 | 779 | for (int y = start; y < end; ++y) { 780 | for (int x = 0; x < width; ++x) { 781 | realSrcData = srcData + x; 782 | sum = delta; 783 | 784 | sum += mult[0][realSrcData[0]]; 785 | 786 | realSrcData += width; 787 | sum += mult[1][realSrcData[0]]; 788 | 789 | realSrcData += width; 790 | sum += mult[2][realSrcData[0]]; 791 | 792 | realSrcData += width; 793 | sum += mult[3][realSrcData[0]]; 794 | 795 | realSrcData += width; 796 | sum += mult[4][realSrcData[0]]; 797 | 798 | realSrcData += width; 799 | sum += mult[5][realSrcData[0]]; 800 | 801 | realSrcData += width; 802 | sum += mult[6][realSrcData[0]]; 803 | 804 | realSrcData += width; 805 | sum += mult[7][realSrcData[0]]; 806 | 807 | realSrcData += width; 808 | sum += mult[8][realSrcData[0]]; 809 | 810 | realSrcData += width; 811 | sum += mult[9][realSrcData[0]]; 812 | 813 | realSrcData += width; 814 | sum += mult[10][realSrcData[0]]; 815 | 816 | realSrcData += width; 817 | sum += mult[11][realSrcData[0]]; 818 | 819 | realSrcData += width; 820 | sum += mult[12][realSrcData[0]]; 821 | 822 | realSrcData += width; 823 | sum += mult[13][realSrcData[0]]; 824 | 825 | realSrcData += width; 826 | sum += mult[14][realSrcData[0]]; 827 | 828 | dstData[x] = static_cast((sum + delta) >> shift); 829 | } 830 | 831 | srcData += width; 832 | dstData += width; 833 | } 834 | } 835 | } 836 | 837 | void linearFilterVerticalByKernel17(uint8_t *src, 838 | uint8_t *dst, 839 | int width, 840 | int height, 841 | int (*mult)[256], 842 | int delta, 843 | int shift) { 844 | int maxThreadNum = getHardwareCPUNum(); 845 | int threadIndex = 0; 846 | 847 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 848 | 849 | #pragma omp parallel for private(threadIndex) 850 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 851 | int start = threadIndex * stride; 852 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 853 | 854 | uint8_t *srcData = src + start * width; 855 | uint8_t *dstData = dst + start * width; 856 | 857 | uint8_t *realSrcData; 858 | 859 | int sum; 860 | 861 | for (int y = start; y < end; ++y) { 862 | for (int x = 0; x < width; ++x) { 863 | realSrcData = srcData + x; 864 | sum = delta; 865 | 866 | sum += mult[0][realSrcData[0]]; 867 | 868 | realSrcData += width; 869 | sum += mult[1][realSrcData[0]]; 870 | 871 | realSrcData += width; 872 | sum += mult[2][realSrcData[0]]; 873 | 874 | realSrcData += width; 875 | sum += mult[3][realSrcData[0]]; 876 | 877 | realSrcData += width; 878 | sum += mult[4][realSrcData[0]]; 879 | 880 | realSrcData += width; 881 | sum += mult[5][realSrcData[0]]; 882 | 883 | realSrcData += width; 884 | sum += mult[6][realSrcData[0]]; 885 | 886 | realSrcData += width; 887 | sum += mult[7][realSrcData[0]]; 888 | 889 | realSrcData += width; 890 | sum += mult[8][realSrcData[0]]; 891 | 892 | realSrcData += width; 893 | sum += mult[9][realSrcData[0]]; 894 | 895 | realSrcData += width; 896 | sum += mult[10][realSrcData[0]]; 897 | 898 | realSrcData += width; 899 | sum += mult[11][realSrcData[0]]; 900 | 901 | realSrcData += width; 902 | sum += mult[12][realSrcData[0]]; 903 | 904 | realSrcData += width; 905 | sum += mult[13][realSrcData[0]]; 906 | 907 | realSrcData += width; 908 | sum += mult[14][realSrcData[0]]; 909 | 910 | realSrcData += width; 911 | sum += mult[15][realSrcData[0]]; 912 | 913 | realSrcData += width; 914 | sum += mult[16][realSrcData[0]]; 915 | 916 | dstData[x] = static_cast((sum + delta) >> shift); 917 | } 918 | 919 | srcData += width; 920 | dstData += width; 921 | } 922 | } 923 | } 924 | 925 | void linearFilterVerticalByKernel19(uint8_t *src, 926 | uint8_t *dst, 927 | int width, 928 | int height, 929 | int (*mult)[256], 930 | int delta, 931 | int shift) { 932 | int maxThreadNum = getHardwareCPUNum(); 933 | int threadIndex = 0; 934 | 935 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 936 | 937 | #pragma omp parallel for private(threadIndex) 938 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 939 | int start = threadIndex * stride; 940 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 941 | 942 | uint8_t *srcData = src + start * width; 943 | uint8_t *dstData = dst + start * width; 944 | 945 | uint8_t *realSrcData; 946 | 947 | int sum; 948 | 949 | for (int y = start; y < end; ++y) { 950 | for (int x = 0; x < width; ++x) { 951 | realSrcData = srcData + x; 952 | sum = delta; 953 | 954 | sum += mult[0][realSrcData[0]]; 955 | 956 | realSrcData += width; 957 | sum += mult[1][realSrcData[0]]; 958 | 959 | realSrcData += width; 960 | sum += mult[2][realSrcData[0]]; 961 | 962 | realSrcData += width; 963 | sum += mult[3][realSrcData[0]]; 964 | 965 | realSrcData += width; 966 | sum += mult[4][realSrcData[0]]; 967 | 968 | realSrcData += width; 969 | sum += mult[5][realSrcData[0]]; 970 | 971 | realSrcData += width; 972 | sum += mult[6][realSrcData[0]]; 973 | 974 | realSrcData += width; 975 | sum += mult[7][realSrcData[0]]; 976 | 977 | realSrcData += width; 978 | sum += mult[8][realSrcData[0]]; 979 | 980 | realSrcData += width; 981 | sum += mult[9][realSrcData[0]]; 982 | 983 | realSrcData += width; 984 | sum += mult[10][realSrcData[0]]; 985 | 986 | realSrcData += width; 987 | sum += mult[11][realSrcData[0]]; 988 | 989 | realSrcData += width; 990 | sum += mult[12][realSrcData[0]]; 991 | 992 | realSrcData += width; 993 | sum += mult[13][realSrcData[0]]; 994 | 995 | realSrcData += width; 996 | sum += mult[14][realSrcData[0]]; 997 | 998 | realSrcData += width; 999 | sum += mult[15][realSrcData[0]]; 1000 | 1001 | realSrcData += width; 1002 | sum += mult[16][realSrcData[0]]; 1003 | 1004 | realSrcData += width; 1005 | sum += mult[17][realSrcData[0]]; 1006 | 1007 | realSrcData += width; 1008 | sum += mult[18][realSrcData[0]]; 1009 | 1010 | dstData[x] = static_cast((sum + delta) >> shift); 1011 | } 1012 | 1013 | srcData += width; 1014 | dstData += width; 1015 | } 1016 | } 1017 | } 1018 | 1019 | void linearFilterVerticalByKernel(uint8_t *src, 1020 | uint8_t *dst, 1021 | int width, 1022 | int height, 1023 | int (*mult)[256], 1024 | int delta, 1025 | int shift, 1026 | int size) { 1027 | int maxThreadNum = getHardwareCPUNum(); 1028 | int threadIndex = 0; 1029 | 1030 | int stride = std::max(static_cast(roundf(1.f * height / maxThreadNum)), 1); 1031 | 1032 | #pragma omp parallel for private(threadIndex) 1033 | for (threadIndex = 0; threadIndex < maxThreadNum; ++threadIndex) { 1034 | int start = threadIndex * stride; 1035 | int end = (threadIndex == (maxThreadNum - 1)) ? height : std::min(start + stride, height); 1036 | 1037 | uint8_t *srcData = src + start * width; 1038 | uint8_t *dstData = dst + start * width; 1039 | 1040 | uint8_t *realSrcData; 1041 | 1042 | int *sum = (int *) malloc(sizeof(int) * width); 1043 | 1044 | for (int y = start; y < end; ++y) { 1045 | std::fill(sum, sum + width, delta); 1046 | 1047 | realSrcData = srcData; 1048 | 1049 | for (int k = 0; k < size; ++k) { 1050 | for (int x = 0; x < width; ++x) { 1051 | sum[x] += mult[k][realSrcData[x]]; 1052 | } 1053 | 1054 | realSrcData += width; 1055 | } 1056 | 1057 | for (int x = 0; x < width; ++x) { 1058 | dstData[x] = static_cast(((sum[x] + delta) >> shift)); 1059 | } 1060 | 1061 | srcData += width; 1062 | dstData += width; 1063 | } 1064 | 1065 | free(sum); 1066 | } 1067 | } 1068 | 1069 | void linearFilterVertical(uint8_t *src, 1070 | uint8_t *dst, 1071 | int width, 1072 | int height, 1073 | int (*mult)[256], 1074 | int delta, 1075 | int shift, 1076 | int size) { 1077 | int radius = (size - 1) / 2; 1078 | uint8_t *tmp = new uint8_t[width * (height + radius + radius)]; 1079 | 1080 | memcpy(tmp + radius * width, src, sizeof(uint8_t) * width * height); 1081 | 1082 | for (int i = 0; i < radius; ++i) { 1083 | memcpy(tmp + i * width, src, sizeof(uint8_t) * width); 1084 | memcpy(tmp + (radius + height + i) * width, src + (height - 1) * width, sizeof(uint8_t) * width); 1085 | } 1086 | 1087 | if (7 == size) { 1088 | linearFilterVerticalByKernel7(tmp, dst, width, height, mult, delta, shift); 1089 | } else if (9 == size) { 1090 | linearFilterVerticalByKernel9(tmp, dst, width, height, mult, delta, shift); 1091 | } else if (11 == size) { 1092 | linearFilterVerticalByKernel11(tmp, dst, width, height, mult, delta, shift); 1093 | } else if (13 == size) { 1094 | linearFilterVerticalByKernel13(tmp, dst, width, height, mult, delta, shift); 1095 | } else if (15 == size) { 1096 | linearFilterVerticalByKernel15(tmp, dst, width, height, mult, delta, shift); 1097 | } else if (17 == size) { 1098 | linearFilterVerticalByKernel17(tmp, dst, width, height, mult, delta, shift); 1099 | } else if (19 == size) { 1100 | linearFilterVerticalByKernel19(tmp, dst, width, height, mult, delta, shift); 1101 | } else { 1102 | linearFilterVerticalByKernel(tmp, dst, width, height, mult, delta, shift, size); 1103 | } 1104 | 1105 | delete[] tmp; 1106 | } 1107 | 1108 | } 1109 | 1110 | 1111 | 1112 | 1113 | 1114 | 1115 | 1116 | 1117 | 1118 | 1119 | 1120 | 1121 | 1122 | 1123 | 1124 | 1125 | 1126 | 1127 | 1128 | 1129 | 1130 | 1131 | 1132 | 1133 | 1134 | 1135 | 1136 | --------------------------------------------------------------------------------