├── .gitignore ├── CMakeLists.txt ├── FastTextConfig.cmake.in ├── LICENSE ├── README.md ├── cvBoostChar.xml ├── src ├── CMakeLists.txt ├── CharClassifier.cpp ├── CharClassifier.h ├── FASTex.cpp ├── FASTex.hpp ├── FTPyramid.cpp ├── FTPyramid.hpp ├── FT_common.cpp ├── FT_common.hpp ├── FastTextLine.cpp ├── FastTextLine.h ├── FastTextLineDetector.cpp ├── FastTextLineDetector.h ├── HoughTLDetector.cpp ├── HoughTLDetector.h ├── IOUtils.cpp ├── IOUtils.h ├── KeyPoints.cpp ├── KeyPoints.h ├── Python │ ├── CMakeLists.txt │ ├── FindNumPy.cmake │ ├── pyFastTextAPIG.c │ ├── pyFastTextG.cpp │ └── pyFastTextG.h ├── Segmenter.cpp ├── Segmenter.h ├── TimeUtils.cpp ├── TimeUtils.h ├── detectors.cpp ├── detectors.h ├── geometry.cpp ├── geometry.h ├── process_dir.cpp ├── segm │ ├── flood_fill.cpp │ ├── flood_fill.h │ ├── segmentation.cpp │ └── segmentation.h └── vis │ ├── componentsVis.cpp │ └── componentsVis.h └── tools ├── .gitignore ├── evaluateSegmentation.py ├── ft.py ├── icdarUtils.py ├── segmentation.py ├── testLines.py ├── trainCharFeatures.py ├── utils.py ├── utls.py └── vis.py /.gitignore: -------------------------------------------------------------------------------- 1 | /Release/ 2 | /CMakeLists.txt~ 3 | /perf.data 4 | /perf.data.old 5 | /Debug/ 6 | /.settings/ 7 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(FastText) 3 | 4 | SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) 5 | set(EXECUTABLE_OUTPUT_PATH "${CMAKE_BINARY_DIR}/bin") 6 | 7 | option (BUILD_WITH_DEBUG_INFO "Build with debugging information" ON) 8 | option(BUILD_SHARED_LIBS "Build shared libraries (.dll/.so) instead of static ones (.lib/.a)" OFF ) 9 | option(BUILD_PARALLEL "With OpenMP" On ) 10 | 11 | set(MODULES_DIR "${PROJECT_SOURCE_DIR}") 12 | 13 | if (MSVC) 14 | add_definitions( -DNOMINMAX ) 15 | add_definitions( -D_VARIADIC_MAX=10 ) 16 | endif() 17 | 18 | #-------------------Extra libraries ----------------------------------- 19 | #extra flags for OpenMP for flann 20 | if(NOT WIN32) 21 | if(BUILD_PARALLEL) 22 | find_package(OpenMP) 23 | if(OPENMP_FOUND) 24 | if(NOT WIN32) 25 | set(EXTRA_PERF_LIBS gomp) 26 | set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") 27 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 28 | add_definitions( -DPARALLEL ) 29 | endif(NOT WIN32) 30 | endif(OPENMP_FOUND) 31 | else(BUILD_PARALLEL) 32 | message( STATUS "ANDROID BUILD") 33 | endif(BUILD_PARALLEL) 34 | endif(NOT WIN32) 35 | #-------------------Extra libraries ----------------------------------- 36 | 37 | if(ANDROID) 38 | add_definitions("-std=gnu++0x") 39 | add_definitions("-DANDROID_LOG") 40 | SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -ftree-vectorize -ffast-math") 41 | else(ANDROID) 42 | #add support for new c++ standard 43 | if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 44 | set(CMAKE_CXX_FLAGS "-std=c++11") 45 | endif() 46 | 47 | if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) 48 | add_definitions("-std=c++11") 49 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-long-long -Wno-reorder -ftree-vectorize") 50 | endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) 51 | endif(ANDROID) 52 | 53 | # ---------------------------------------------------------------------------- 54 | # Output directories 55 | # ---------------------------------------------------------------------------- 56 | if(ANDROID) 57 | set(LIBRARY_OUTPUT_PATH "${CMAKE_BINARY_DIR}/lib/${ANDROID_NDK_ABI_NAME}") 58 | set(EXECUTABLE_OUTPUT_PATH "${CMAKE_BINARY_DIR}/bin") 59 | 60 | else() 61 | set(LIBRARY_OUTPUT_PATH "${CMAKE_BINARY_DIR}/lib") 62 | set(EXECUTABLE_OUTPUT_PATH "${CMAKE_BINARY_DIR}") 63 | set(3P_LIBRARY_OUTPUT_PATH "${CMAKE_BINARY_DIR}/3rdparty/lib${LIB_SUFFIX}") 64 | endif() 65 | 66 | find_package(OpenCV) 67 | if(OpenCV_DIR) 68 | message( STATUS "Using OpenCV from ${OpenCV_DIR}, include dirs: ${OpenCV_INCLUDE_DIRS} ${OpenCV_VERSION_MAJOR}") 69 | include_directories(${OpenCV_INCLUDE_DIRS}) 70 | endif(OpenCV_DIR) 71 | 72 | if(${OpenCV_VERSION_MAJOR} EQUAL "2") 73 | add_definitions("-DOPENCV_24") 74 | endif(${OpenCV_VERSION_MAJOR} EQUAL "2") 75 | 76 | if(WITH_SKEW_DETECTION) 77 | find_package(skewDetection) 78 | add_definitions("-DWITH_SKEW_DETECTION") 79 | set(EXTRA_LIBS "skewDetection") 80 | endif(WITH_SKEW_DETECTION) 81 | 82 | add_subdirectory(src) 83 | 84 | 85 | if(ANDROID) 86 | include(cmake/OpenCVDetectAndroidSDK.cmake REQUIRED) 87 | 88 | if(NOT ANDROID_TOOLS_Pkg_Revision GREATER 13) 89 | message(WARNING "OpenCV requires Android SDK tools revision 14 or newer. Otherwise tests and samples will no be compiled.") 90 | endif() 91 | 92 | # Generate FastText.mk for ndk-build (Android build tool) 93 | include(cmake/FastTextGenAndroidMK.cmake REQUIRED) 94 | endif(ANDROID) 95 | 96 | # --------------------------------------------------------------------------- 97 | # Export FastTextConfig.cmake for using in other projects 98 | # --------------------------------------------------------------------------- 99 | export(TARGETS FTreader FILE "${PROJECT_BINARY_DIR}/FastTextLibraryDepends.cmake") 100 | configure_file(FastTextConfig.cmake.in 101 | "${PROJECT_BINARY_DIR}/FastTextConfig.cmake" @ONLY) 102 | 103 | 104 | # --------------------------------------------------------------------------- 105 | # Build the documentation 106 | # --------------------------------------------------------------------------- 107 | if(BUILD_DOC) 108 | message( STATUS "Creating documentation ...") 109 | find_package(Doxygen) 110 | if(DOXYGEN_FOUND) 111 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in ${CMAKE_BINARY_DIR}/Doxyfile @ONLY) 112 | add_custom_target( 113 | doc ${DOXYGEN_EXECUTABLE} ${CMAKE_BINARY_DIR}/Doxyfile WORKING_DIRECTORY ${CMAKE_BINARY_DIR} 114 | COMMENT "Generating API documentation with Doxygen" VERBATIM 115 | ) 116 | endif(DOXYGEN_FOUND) 117 | endif (BUILD_DOC) 118 | 119 | -------------------------------------------------------------------------------- /FastTextConfig.cmake.in: -------------------------------------------------------------------------------- 1 | 2 | include_directories(@CMAKE_SOURCE_DIR@/src) 3 | include(@CMAKE_BINARY_DIR@/FastTextLibraryDepends.cmake) 4 | 5 | set(FastText_LIBS FTreader @OpenCV_LIBS@) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FASText 2 | 3 | ### FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 4 | - http://cmp.felk.cvut.cz/~neumalu1/neumann_iccv2015.pdf 5 | 6 | To build a standalone library, run 7 | ``` 8 | mkdir Release 9 | cd Release 10 | cmake -D CMAKE_BUILD_TYPE=Release .. 11 | make 12 | ``` 13 | Prerequisites: 14 | - OpenCV 15 | - python + numpy (optional) 16 | 17 | After building the executables, you can use toy examples in python: 18 | ``` 19 | cd tools 20 | python segmentation.py 21 | ``` 22 | - will process and draw FASText keypoints on scale pyramid. 23 | 24 | ``` 25 | cd tools 26 | python evaluateSegmentation.py 27 | ``` 28 | - will reproduce results on ICDAR 2013 dataset (requires Challenge 2 dataset & GT segmentations) 29 | 30 | For text lines clustering, you can run the simple demo: 31 | ``` 32 | cd tools 33 | python testLines.py 34 | ``` 35 | The text line clustering is just simple voting in Hough space where each region vote for each line going through region centroid 36 | 37 | Please cite this paper if you use this data or code: 38 | ``` 39 | @InProceedings{Busta_2015_ICCV, 40 | author = {Busta, Michal and Neumann, Lukas and Matas, Jiri}, 41 | title = {FASText: Efficient Unconstrained Scene Text Detector}, 42 | journal = {The IEEE International Conference on Computer Vision (ICCV)}, 43 | month = {June}, 44 | year = {2015} 45 | } 46 | ``` 47 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") 4 | 5 | include_directories ("${PROJECT_SOURCE_DIR}/src") 6 | 7 | add_library(FTreader STATIC 8 | "FTPyramid.cpp" 9 | "FT_common.cpp" 10 | "FASTex.cpp" 11 | "KeyPoints.cpp" 12 | "segm/segmentation.cpp" 13 | "segm/flood_fill.cpp" 14 | "IOUtils.cpp" 15 | "TimeUtils.cpp" 16 | "Segmenter.cpp" 17 | "CharClassifier.cpp" 18 | "detectors.cpp" 19 | "vis/componentsVis.cpp" 20 | "HoughTLDetector.cpp" 21 | "FastTextLine.cpp" 22 | "FastTextLineDetector.cpp" 23 | "geometry.cpp" 24 | ) 25 | 26 | add_executable(process_dir 27 | "process_dir.cpp" 28 | ) 29 | 30 | target_link_libraries (process_dir 31 | FTreader 32 | ${EXTRA_LIBS} 33 | ${OpenCV_LIBS} 34 | ) 35 | 36 | if(NOT WIN32 AND NOT ANDROID) 37 | add_subdirectory(Python) 38 | endif(NOT WIN32 AND NOT ANDROID) 39 | 40 | 41 | -------------------------------------------------------------------------------- /src/CharClassifier.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * CharClassifier.cpp 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * Copyright (c) 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * Based on: 20 | * 21 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 22 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 23 | */ 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #include "CharClassifier.h" 30 | 31 | using namespace std; 32 | 33 | namespace cmp 34 | { 35 | 36 | CharClassifier::CharClassifier() : classificationTime(0) 37 | { 38 | // TODO Auto-generated constructor stub 39 | 40 | } 41 | 42 | CharClassifier::~CharClassifier() 43 | { 44 | // TODO Auto-generated destructor stub 45 | } 46 | 47 | bool CharClassifier::classifyLetter(LetterCandidate& letter, cv::Mat debugImage) 48 | { 49 | return true; 50 | } 51 | 52 | double CharClassifier::isWord(LetterCandidate& letter, cv::Mat debugImage) 53 | { 54 | return 0; 55 | } 56 | 57 | bool CharClassifier::extractLineFeatures(LetterCandidate& letter) 58 | { 59 | cv::Mat mask; 60 | if(letter.contours.size() == 0) 61 | { 62 | cv::copyMakeBorder(letter.mask, mask, 1, 1, 1, 1, cv::BORDER_CONSTANT); 63 | cv::findContours(mask, letter.contours, letter.hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE); 64 | if(letter.contours.size() == 0) 65 | return false; 66 | } 67 | 68 | double epsion = (mask.cols + mask.rows) / 2.0 * 0.022; 69 | 70 | letter.contoursAp.resize(letter.contours.size()); 71 | float outerContourArea = 0; 72 | for(size_t i = 0; i < letter.contours.size(); i++) 73 | { 74 | 75 | cv::approxPolyDP(letter.contours[i], letter.contoursAp[i], epsion, true); 76 | if(letter.hierarchy[i][3] < 0) //contour 77 | { 78 | float area = cv::contourArea(letter.contours[i]); 79 | letter.featuresArea += area; 80 | if(area >= outerContourArea) 81 | { 82 | outerContourArea = area; 83 | if( letter.cHullPoints.size() == 0 ) 84 | cv::convexHull(letter.contours[i], letter.cHullPoints); 85 | if( letter.cHullPoints.size() == 0 ) 86 | letter.cHullPoints = letter.contours[i]; 87 | letter.convexHullArea = cv::contourArea(letter.cHullPoints); 88 | } 89 | }else{ //hole 90 | float area = cv::contourArea(letter.contours[i]); 91 | letter.featuresArea -= area; 92 | } 93 | } 94 | letter.meanStrokeWidth = 2; 95 | 96 | return true; 97 | } 98 | 99 | void extractFeatureVect(cv::Mat& maskO, std::vector& featureVector, LetterCandidate& letter) 100 | { 101 | featureVector.reserve(6); 102 | 103 | if(letter.contours.size() == 0) 104 | { 105 | cv::Mat mask; 106 | cv::copyMakeBorder(maskO, mask, 1, 1, 1, 1, cv::BORDER_CONSTANT); 107 | findContours(mask, letter.contours, letter.hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_NONE); 108 | } 109 | float holesArea = 0; 110 | float outerContourArea = 0; 111 | float perimeter = 0; 112 | 113 | double al = cv::arcLength(letter.contours[0], true); 114 | double epsion = al * 0.022; 115 | letter.contoursAp.resize(letter.contours.size()); 116 | for(size_t i = 0; i < letter.contours.size(); i++) 117 | { 118 | 119 | cv::approxPolyDP(letter.contours[i], letter.contoursAp[i], epsion, true); 120 | if(letter.hierarchy[i][3] < 0) //contour 121 | { 122 | float area = cv::contourArea(letter.contours[i]); 123 | letter.featuresArea += area; 124 | if(area > outerContourArea) 125 | { 126 | outerContourArea = area; 127 | perimeter = cv::arcLength(letter.contours[i], false); 128 | if( letter.cHullPoints.size() == 0 ) 129 | cv::convexHull(letter.contours[i], letter.cHullPoints); 130 | letter.convexHullArea = cv::contourArea(letter.cHullPoints); 131 | } 132 | }else{ //hole 133 | float area = cv::contourArea(letter.contours[i]); 134 | holesArea += area; 135 | letter.featuresArea -= area; 136 | } 137 | } 138 | 139 | featureVector.push_back(letter.getStrokeAreaRatioP() / letter.area); 140 | if( perimeter == 0) 141 | featureVector.push_back(0); 142 | else 143 | featureVector.push_back(letter.featuresArea / (float) (perimeter * perimeter)); 144 | if(outerContourArea == 0) 145 | featureVector.push_back(0); 146 | else 147 | featureVector.push_back(letter.convexHullArea / (float) outerContourArea); 148 | featureVector.push_back((float) holesArea / (float) letter.area); 149 | //std::cout << outerContourArea << std::endl; 150 | if(letter.cHullPoints.size() == 0 || perimeter == 0) 151 | { 152 | featureVector.push_back(0); 153 | featureVector.push_back(MIN(maskO.rows, maskO.cols) / (float) MAX(maskO.rows, maskO.cols)); 154 | } 155 | else 156 | { 157 | featureVector.push_back(cv::arcLength(letter.cHullPoints, true) / perimeter); 158 | cv::RotatedRect rotatedRect = cv::minAreaRect(letter.cHullPoints); 159 | cv::Point2f vertices[4]; 160 | rotatedRect.points(vertices); 161 | 162 | float width = rotatedRect.size.width; 163 | float height = rotatedRect.size.height; 164 | featureVector.push_back(MIN(width, height) / MAX(width, height)); 165 | } 166 | } 167 | 168 | void extractFeatureVectNoSsp(cv::Mat& maskO, std::vector& featureVector) 169 | { 170 | std::vector > contours; 171 | std::vector hierarchy; 172 | if(contours.size() == 0) 173 | { 174 | cv::Mat mask; 175 | cv::copyMakeBorder(maskO, mask, 1, 1, 1, 1, cv::BORDER_CONSTANT); 176 | findContours(mask, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_NONE); 177 | } 178 | float holesArea = 0; 179 | float outerContourArea = 0; 180 | float perimeter = 0; 181 | 182 | double al = cv::arcLength(contours[0], true); 183 | double epsion = al * 0.022; 184 | std::vector > contoursAp; 185 | contoursAp.resize(contours.size()); 186 | std::vector cHullPoints; 187 | double featuresArea = 0; 188 | double convexHullArea = 0; 189 | for(size_t i = 0; i < contours.size(); i++) 190 | { 191 | 192 | cv::approxPolyDP(contours[i], contoursAp[i], epsion, true); 193 | if(hierarchy[i][3] < 0) //contour 194 | { 195 | float area = cv::contourArea(contours[i]); 196 | featuresArea += area; 197 | if(area >= outerContourArea) 198 | { 199 | outerContourArea = area; 200 | perimeter = cv::arcLength(contours[i], false); 201 | if( cHullPoints.size() == 0 ) 202 | cv::convexHull(contours[i], cHullPoints); 203 | convexHullArea = cv::contourArea(cHullPoints); 204 | } 205 | }else{ //hole 206 | float area = cv::contourArea(contours[i]); 207 | holesArea += area; 208 | featuresArea -= area; 209 | } 210 | } 211 | 212 | if( perimeter == 0) 213 | featureVector.push_back(0); 214 | else 215 | featureVector.push_back(featuresArea / (float) (perimeter * perimeter)); 216 | if(outerContourArea == 0) 217 | featureVector.push_back(0); 218 | else 219 | featureVector.push_back(convexHullArea / (float) outerContourArea); 220 | featureVector.push_back((float) holesArea / (float) cv::countNonZero(maskO)); 221 | //std::cout << outerContourArea << std::endl; 222 | if(cHullPoints.size() == 0 || perimeter == 0) 223 | { 224 | featureVector.push_back(0); 225 | featureVector.push_back(MIN(maskO.rows, maskO.cols) / (float) MAX(maskO.rows, maskO.cols)); 226 | } 227 | else 228 | { 229 | featureVector.push_back(cv::arcLength(cHullPoints, true) / perimeter); 230 | cv::RotatedRect rotatedRect = cv::minAreaRect(cHullPoints); 231 | cv::Point2f vertices[4]; 232 | rotatedRect.points(vertices); 233 | 234 | float width = cv::norm(vertices[0] - vertices[1]); 235 | float height = cv::norm(vertices[1] - vertices[2]); 236 | featureVector.push_back(MIN(width, height) / MAX(width, height)); 237 | } 238 | } 239 | 240 | 241 | static void extractCharFeatures(cv::Mat& maskO, cv::Mat& featureVector, LetterCandidate& letter) 242 | { 243 | cv::Mat mask; 244 | cv::copyMakeBorder(maskO, mask, 1, 1, 1, 1, cv::BORDER_CONSTANT); 245 | featureVector = cv::Mat::zeros(1, 6, CV_32F); 246 | float *pFeatureVector = featureVector.ptr(0); 247 | 248 | if(letter.contours.size() == 0) 249 | { 250 | findContours(mask, letter.contours, letter.hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_NONE); 251 | } 252 | float holesArea = 0; 253 | float outerContourArea = 0; 254 | float perimeter = 0; 255 | double al = cv::arcLength(letter.contours[0], true); 256 | double epsion = al * 0.022; 257 | letter.contoursAp.resize(1); 258 | for(size_t i = 0; i < letter.contours.size(); i++) 259 | { 260 | if( i == 0) 261 | cv::approxPolyDP(letter.contours[i], letter.contoursAp[i], epsion, true); 262 | if(letter.hierarchy[i][3] < 0) //contour 263 | { 264 | float area = cv::contourArea(letter.contours[i]); 265 | area = MAX(area, al); 266 | letter.featuresArea += area; 267 | if(area >= outerContourArea) 268 | { 269 | outerContourArea = area; 270 | perimeter = cv::arcLength(letter.contours[i], false); 271 | if( letter.cHullPoints.size() == 0 ) 272 | cv::convexHull(letter.contours[i], letter.cHullPoints); 273 | if(letter.cHullPoints.size() == 0) 274 | { 275 | letter.cHullPoints = letter.contours[i]; 276 | } 277 | letter.convexHullArea = cv::contourArea(letter.cHullPoints); 278 | } 279 | }else{ //hole 280 | float area = cv::contourArea(letter.contours[i]); 281 | holesArea += area; 282 | letter.featuresArea -= area; 283 | } 284 | } 285 | 286 | double cHullLength = cv::arcLength(letter.cHullPoints, true); 287 | letter.convexHullArea = MAX(letter.convexHullArea, cHullLength); 288 | 289 | *(pFeatureVector) = letter.getStrokeAreaRatioP() / letter.area; 290 | pFeatureVector++; 291 | if( perimeter == 0) 292 | *(pFeatureVector) = 0; 293 | else 294 | *(pFeatureVector) = letter.featuresArea / (float) (perimeter * perimeter); 295 | pFeatureVector++; 296 | if(outerContourArea == 0) 297 | *(pFeatureVector) = 0; 298 | else 299 | *(pFeatureVector) = letter.convexHullArea / (float) outerContourArea; 300 | pFeatureVector++; 301 | *(pFeatureVector) = (float) holesArea / (float) letter.area; 302 | pFeatureVector++; 303 | //std::cout << outerContourArea << std::endl; 304 | 305 | if(letter.cHullPoints.size() == 0 || perimeter == 0) 306 | { 307 | *(pFeatureVector) = 0; 308 | pFeatureVector++; 309 | *(pFeatureVector) = MIN(mask.rows, mask.cols) / (float) MAX(mask.rows, mask.cols); 310 | } 311 | else 312 | { 313 | *(pFeatureVector) = cHullLength / perimeter; 314 | pFeatureVector++; 315 | cv::RotatedRect rotatedRect = cv::minAreaRect(letter.cHullPoints); 316 | cv::Point2f vertices[4]; 317 | rotatedRect.points(vertices); 318 | 319 | float width = rotatedRect.size.width; 320 | float height = rotatedRect.size.height; 321 | *(pFeatureVector) = MIN(width, height) / MAX(width, height); 322 | } 323 | } 324 | 325 | bool CvBoostCharClassifier::classifyLetter(LetterCandidate& letter, cv::Mat debugImag) 326 | { 327 | double probability; 328 | bool val = predictProbability(letter, probability, debugImag ); 329 | letter.quality = probability; 330 | return val || letter.quality > 0.2; 331 | } 332 | 333 | double CvBoostCharClassifier::isWord(LetterCandidate& letter, cv::Mat debugImage) 334 | { 335 | if( letter.featureVector.empty() ) 336 | extractCharFeatures(letter.mask, letter.featureVector, letter); 337 | 338 | cv::Mat featureVectorMulti; 339 | cv::Mat cols = cv::Mat::zeros(1, 1, CV_32F); 340 | cv::hconcat(letter.featureVector, cols, featureVectorMulti); 341 | 342 | featureVectorMulti.at(0, 6) = letter.keypointIds.size(); 343 | 344 | int64 startTime = cv::getTickCount(); 345 | #ifdef OPENCV_24 346 | float sum = classifier->predict(featureVectorMulti, cv::Mat(), cv::Range::all(), false, true); 347 | double probability = 1.0f / (1.0f + exp (-sum) ); 348 | #else 349 | 350 | float votes = classifier->predict( featureVectorMulti, cv::noArray(), cv::ml::DTrees::PREDICT_SUM | cv::ml::StatModel::RAW_OUTPUT); 351 | double probability = (double)1-(double)1/(1+exp(-2*votes)); 352 | #endif 353 | classificationTime += cv::getTickCount() - startTime; 354 | 355 | return probability; 356 | } 357 | 358 | bool CvBoostCharClassifier::predictProbability(LetterCandidate& letter, double& probability, cv::Mat debugImag ) 359 | { 360 | if( letter.featureVector.empty() ) 361 | extractCharFeatures(letter.mask, letter.featureVector, letter); 362 | int64 startTime = cv::getTickCount(); 363 | #ifdef OPENCV_24 364 | float sum = classifier->predict(letter.featureVector, cv::Mat(), cv::Range::all(), false, true); 365 | 366 | int cls_idx = sum >= 0; 367 | const int* cmap = classifier->get_data()->cat_map->data.i; 368 | const int* cofs = classifier->get_data()->cat_ofs->data.i; 369 | const int* vtype = classifier->get_data()->var_type->data.i; 370 | 371 | int val = (float) cmap[cofs[vtype[classifier->get_data()->var_count]] + cls_idx]; 372 | probability = 1.0f / (1.0f + exp (-sum) ); 373 | #else 374 | float votes = classifier->predict( letter.featureVector, cv::noArray(), cv::ml::DTrees::PREDICT_SUM | cv::ml::StatModel::RAW_OUTPUT); 375 | probability = (double)1-(double)1/(1+exp(-2*votes)); 376 | int val = probability > 0.5; 377 | #endif 378 | classificationTime += cv::getTickCount() - startTime; 379 | 380 | return val; 381 | } 382 | 383 | void CvBoostCharClassifier::load(std::string& modelFile){ 384 | std::cout << "Loading CharCls Model from: " << modelFile << std::endl; 385 | #ifdef OPENCV_24 386 | classifier = new CvBoost(); 387 | classifier->load(modelFile.c_str(), "classifier"); 388 | #else 389 | classifier = cv::ml::StatModel::load( modelFile.c_str()/*, "classifier" */); 390 | #endif 391 | } 392 | 393 | } /* namespace cmp */ 394 | -------------------------------------------------------------------------------- /src/CharClassifier.h: -------------------------------------------------------------------------------- 1 | /* 2 | * CharClassifier.h 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * Copyright (c) 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * Based on: 20 | * 21 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 22 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 23 | */ 24 | #ifndef CHARCLASSIFIER_H_ 25 | #define CHARCLASSIFIER_H_ 26 | 27 | #include 28 | 29 | #include "segm/segmentation.h" 30 | 31 | namespace cmp 32 | { 33 | 34 | /** 35 | * @class cmp::CharClassifier 36 | * 37 | * @brief The character classifier interface 38 | * 39 | */ 40 | class CharClassifier 41 | { 42 | public: 43 | CharClassifier(); 44 | virtual ~CharClassifier(); 45 | 46 | virtual bool classifyLetter(LetterCandidate& letter, cv::Mat debugImage = cv::Mat()); 47 | 48 | virtual double isWord(LetterCandidate& letter, cv::Mat debugImage = cv::Mat()); 49 | 50 | virtual bool predictProbability(LetterCandidate& letter, double& probability, cv::Mat debugImage = cv::Mat() ){ 51 | probability = 0.5; 52 | return classifyLetter(letter, debugImage); 53 | } 54 | 55 | static bool extractLineFeatures(LetterCandidate& letter); 56 | 57 | int64 classificationTime; 58 | }; 59 | 60 | void extractFeatureVect(cv::Mat& maskO, std::vector& featureVector, LetterCandidate& letter); 61 | void extractFeatureVectNoSsp(cv::Mat& maskO, std::vector& featureVector); 62 | 63 | /** 64 | * 65 | */ 66 | class CvBoostCharClassifier : public CharClassifier 67 | { 68 | public: 69 | 70 | CvBoostCharClassifier() : CharClassifier(){ 71 | 72 | }; 73 | 74 | CvBoostCharClassifier(const char* modelFile) : CharClassifier(){ 75 | std::string sname = modelFile; 76 | load(sname); 77 | }; 78 | 79 | virtual ~CvBoostCharClassifier(){ 80 | 81 | }; 82 | 83 | virtual bool classifyLetter(LetterCandidate& letter, cv::Mat debugImage = cv::Mat() ); 84 | 85 | virtual double isWord(LetterCandidate& letter, cv::Mat debugImage = cv::Mat()); 86 | 87 | virtual bool predictProbability(LetterCandidate& letter, double& probability, cv::Mat debugImag = cv::Mat() ); 88 | 89 | //loads model file 90 | void load(std::string& modelFile); 91 | 92 | private: 93 | // Trained AdaBoost classifier 94 | #ifdef OPENCV_24 95 | cv::Ptr classifier; 96 | #else 97 | cv::Ptr classifier; 98 | #endif 99 | }; 100 | 101 | } /* namespace cmp */ 102 | 103 | #endif /* CHARCLASSIFIER_H_ */ 104 | -------------------------------------------------------------------------------- /src/FASTex.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * FASTex.hpp 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * Copyright (c) 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * Based on: 20 | * 21 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 22 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 23 | */ 24 | #ifndef CMP_FAST_HPP_ 25 | #define CMP_FAST_HPP_ 26 | 27 | #include "KeyPoints.h" 28 | #include 29 | #include 30 | 31 | namespace cmp{ 32 | 33 | inline long ColourDistance(const uchar* e1, const uchar* e2) 34 | { 35 | int ur1 = e1[2]; 36 | int ur2 = e2[2]; 37 | long rmean = ( ur1 + ur2 ) / 2; 38 | long r = ur1 - ur2; 39 | int ug1 = e1[1]; 40 | int ug2 = e2[1]; 41 | long g = ug1 - ug2; 42 | int ub1 = e1[0]; 43 | int ub2 = e2[0]; 44 | long b = ub1 - ub2; 45 | return (((512+rmean)*r*r)>>8) + 4*g*g + (((767-rmean)*b*b)>>8); 46 | } 47 | 48 | inline long ColourDistanceMAX(const uchar* e1, const uchar* e2, uchar& sign) 49 | { 50 | int d1 = e1[0] - (int) e2[0]; 51 | int ad1 = abs(d1); 52 | int d2 = e1[0] - (int) e2[0]; 53 | int ad2 = abs(d2); 54 | int d3 = e1[0] - (int) e2[0]; 55 | int ad3 = abs(d3); 56 | if(ad1 > ad2) 57 | { 58 | if(ad1 > ad3) 59 | { 60 | sign = d1 > 0; 61 | return ad1; 62 | } 63 | sign = d3 > 0; 64 | return ad3; 65 | }else 66 | { 67 | if(ad2 > ad3) 68 | { 69 | sign = d2 > 0; 70 | return ad2; 71 | } 72 | sign = d3 > 0; 73 | return ad3; 74 | } 75 | } 76 | 77 | inline long ColourDistanceVec(const cv::Vec3b& e1, const cv::Vec3b& e2) 78 | { 79 | int ur1 = e1[2]; 80 | int ur2 = e2[2]; 81 | long rmean = ( ur1 + ur2 ) / 2; 82 | long r = ur1 - ur2; 83 | int ug1 = e1[1]; 84 | int ug2 = e2[1]; 85 | long g = ug1 - ug2; 86 | int ub1 = e1[0]; 87 | int ub2 = e2[0]; 88 | long b = ub1 - ub2; 89 | return (((512+rmean)*r*r)>>8) + 4*g*g + (((767-rmean)*b*b)>>8); 90 | } 91 | 92 | inline long ColourDistanceGray(const uchar& e1, const uchar& e2) 93 | { 94 | return e2 - e1; 95 | } 96 | 97 | template 98 | inline long ColourDistanceRGB(const cv::Vec3b& e1, const cv::Vec3b& e2) 99 | { 100 | return e2[channel] - e1[channel]; 101 | } 102 | 103 | template 104 | inline long ColourDistanceRGBP(const uchar& e1, const uchar& e2) 105 | { 106 | return (&e2)[channel] - (&e1)[channel]; 107 | } 108 | 109 | template 110 | inline long ColourDistanceRGBIP(const uchar& e1, const uchar& e2) 111 | { 112 | return (&e1)[channel] - (&e2)[channel]; 113 | } 114 | 115 | inline long ColourDistanceGrayABS(const uchar* e1, const uchar* e2) 116 | { 117 | return abs(((int) *e2) - *e1); 118 | } 119 | 120 | inline long ColourDistanceGrayP(const uchar* e1, const uchar* e2) 121 | { 122 | return (*e2 - *e1); 123 | } 124 | 125 | inline long ColourDistanceGrayI(const uchar& e1, const uchar& e2) 126 | { 127 | return e1 - e2; 128 | } 129 | 130 | template 131 | inline long ColourDistanceRGBI(const cv::Vec3b& e1, const cv::Vec3b& e2) 132 | { 133 | return (e1)[channel] - (e2)[channel]; 134 | } 135 | 136 | inline long ColourDistanceGrayIP(const uchar* e1, const uchar* e2) 137 | { 138 | return ( *e1 - *e2); 139 | } 140 | 141 | inline long ColourDistanceGrayNorm(const uchar* e1, const uchar* e2) 142 | { 143 | int ur1 = e1[0]; 144 | int ur2 = e2[0]; 145 | long rmean = ( ur1 + ur2 ) / 2; 146 | long r = ur1 - ur2; 147 | return (((512+rmean)*r*r)>>8); 148 | } 149 | 150 | inline int getValueCorner12(const uchar * ptr, int* pixel, int* corners, const int& k, const int& ks, const uchar& (*dist)(const uchar&, const uchar&) ) 151 | { 152 | int x = ptr[pixel[k]]; 153 | 154 | if( k == 3 && ks != 2 && ks != 3 ){ 155 | x = dist(x, ptr[corners[0]]); 156 | }else if(k == 5 && ks != 4 && ks != 5){ 157 | x = dist(x, ptr[corners[1]]); 158 | }else if(k == 8 && ks != 7 && ks != 8){ 159 | x = dist(x, ptr[corners[2]]); 160 | }else if(k == 11 && ks != 11){ 161 | x = dist(x, ptr[corners[3]]); 162 | } 163 | 164 | return x; 165 | } 166 | 167 | inline void getCrossCorner12(const uchar * ptr, int* corners, int* cornersOut, const int& k, int& k1, int& k2, const uchar& (*dist)(const uchar&, const uchar&) ) 168 | { 169 | switch(k){ 170 | case 0: 171 | case 1: 172 | case 11: 173 | k1 = dist(ptr[cornersOut[1]], ptr[corners[1]]); 174 | k2 = dist(ptr[cornersOut[2]], ptr[corners[2]]); 175 | break; 176 | case 2: 177 | case 3: 178 | case 4: 179 | k1 = dist(ptr[cornersOut[2]], ptr[corners[2]]); 180 | k2 = dist(ptr[cornersOut[3]], ptr[corners[3]]); 181 | break; 182 | case 5: 183 | case 6: 184 | case 7: 185 | k1 = dist(ptr[cornersOut[0]], ptr[corners[0]]); 186 | k2 = dist(ptr[cornersOut[3]], ptr[corners[3]]); 187 | break; 188 | case 8: 189 | case 9: 190 | case 10: 191 | k1 = dist(ptr[cornersOut[0]], ptr[corners[0]]); 192 | k2 = dist(ptr[cornersOut[1]], ptr[corners[1]]); 193 | break; 194 | } 195 | } 196 | 197 | /** 198 | * The interface method 199 | */ 200 | class CV_EXPORTS_W FASTextI 201 | { 202 | public: 203 | 204 | enum 205 | { 206 | KEY_POINTS_BLACK = 0, KEY_POINTS_WHITE = 1, KEY_POINTS_ALL = 3 207 | }; 208 | 209 | CV_WRAP FASTextI( long threshold = 10, bool nonmaxSuppression=true, int keypointsTypes = KEY_POINTS_ALL, int Kmin = 9, int Kmax = 11); 210 | 211 | virtual ~FASTextI(){ 212 | 213 | }; 214 | 215 | void detect( const cv::Mat& image, std::vector& keypoints, const cv::Mat& mask ) const 216 | { 217 | keypoints.clear(); 218 | 219 | if( image.empty() ) 220 | return; 221 | 222 | CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) ); 223 | detectImpl( image, keypoints, mask ); 224 | } 225 | 226 | void segment( const cv::Mat& image, std::vector& keypoints, std::unordered_multimap >& keypointsPixels, const cv::Mat& mask ) const 227 | { 228 | keypoints.clear(); 229 | 230 | if( image.empty() ) 231 | return; 232 | 233 | CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) ); 234 | segmentImpl( image, keypoints, keypointsPixels, mask ); 235 | } 236 | 237 | virtual bool isColorDetector(){ 238 | return false; 239 | } 240 | 241 | void setThreshold(long threshold){ 242 | this->threshold = threshold; 243 | } 244 | 245 | void setKeypointsTypes(int keypointsTypes){ 246 | this->keypointsTypes = keypointsTypes; 247 | } 248 | 249 | protected: 250 | 251 | virtual void detectImpl( const cv::Mat& image, std::vector& keypoints, const cv::Mat& mask=cv::Mat() ) const = 0; 252 | 253 | virtual void segmentImpl( const cv::Mat& image, std::vector& keypoints, std::unordered_multimap >& keypointsPixels, const cv::Mat& mask=cv::Mat() ) const 254 | { 255 | detectImpl( image, keypoints, mask); 256 | } 257 | 258 | long threshold; 259 | bool nonmaxSuppression; 260 | int Kmin; 261 | int Kmax; 262 | 263 | int keypointsTypes; 264 | 265 | std::vector > fastAngles; 266 | }; 267 | 268 | /** 269 | * Gray level FASText Feature detector 270 | */ 271 | class CV_EXPORTS_W FASTextGray : public FASTextI 272 | { 273 | public: 274 | 275 | CV_WRAP FASTextGray( long threshold=10, bool nonmaxSuppression=true, int keypointsTypes = KEY_POINTS_ALL, int Kmin = 9, int Kmax = 11); 276 | 277 | virtual ~FASTextGray(){ 278 | 279 | }; 280 | 281 | protected: 282 | 283 | virtual void detectImpl( const cv::Mat& image, std::vector& keypoints, const cv::Mat& mask=cv::Mat() ) const; 284 | }; 285 | 286 | }//namespace cmp; 287 | 288 | #endif /* FAST_HPP_ */ 289 | -------------------------------------------------------------------------------- /src/FTPyramid.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * FTPyramid.cpp 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * Copyright (c) 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * Based on: 20 | * 21 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 22 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 23 | */ 24 | #include "FTPyramid.hpp" 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #include 31 | 32 | #include "TimeUtils.h" 33 | #include "detectors.h" 34 | 35 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 36 | 37 | using namespace cv; 38 | 39 | #define ADJUST_FEATURES 1 40 | 41 | namespace cmp 42 | { 43 | //as in ORB 44 | static inline float getScale(int level, double scaleFactor) 45 | { 46 | return (float)std::pow(scaleFactor, (double)(level)); 47 | } 48 | 49 | /** 50 | * Constructor 51 | */ 52 | FTPyr::FTPyr(int nfeatures, float scaleFactor, int nlevels, int edgeThreshold, 53 | int keypointTypes, int Kmin, int Kmax, bool color, bool erodeImages, bool createKeypointSegmenter) : 54 | pyramidTime(0), fastKeypointTime(0), nfeatures(nfeatures), scaleFactor(scaleFactor), nlevels(nlevels), 55 | edgeThreshold(edgeThreshold), keypointTypes(keypointTypes), Kmin(Kmin), Kmax(Kmax), 56 | erodeImages(erodeImages) 57 | { 58 | fastext = cv::Ptr (new GridAdaptedFeatureDetector (cv::Ptr (new FASTextGray(edgeThreshold, true, keypointTypes, Kmin, Kmax)))); 59 | } 60 | 61 | void FTPyr::computeFASText(vector >& allKeypoints, 62 | vector& offsets, 63 | vector > >& keypointsPixels, 64 | int nfeatures, vector& thresholds, 65 | vector& keypointTypes) 66 | { 67 | int nlevels = (int)imagePyramid.size(); 68 | int levelsDecim = 1; 69 | for( size_t i = 1; i < scales.size(); i++) 70 | { 71 | if( scales[i - 1] != scales[i] ) 72 | levelsDecim++; 73 | } 74 | vector nfeaturesPerLevel(nlevels); 75 | 76 | int totalFeatures = nfeatures; 77 | float factor = scales[1]; 78 | if(factor == 1) 79 | factor = scales[3]; 80 | #ifdef ADJUST_FEATURES 81 | for(size_t i = 0; i < imagePyramid.size(); i++ ) 82 | { 83 | if( imagePyramid[i].cols > 1024 || imagePyramid[i].rows > 1024 ) 84 | { 85 | totalFeatures /= factor; 86 | }else 87 | break; 88 | } 89 | #endif 90 | 91 | // fill the extractors and descriptors for the corresponding scales 92 | float ndesiredFeaturesPerScale = totalFeatures*(1 - factor)/(1 - (float) pow((double)factor, (double)levelsDecim)); 93 | //float ndesiredFeaturesPerScale2 = totalFeatures / levelsDecim * 3; 94 | 95 | int sumFeatures = 0; 96 | for( int level = 0; level < nlevels-1; level++ ) 97 | { 98 | nfeaturesPerLevel[level] = cvRound(ndesiredFeaturesPerScale); 99 | sumFeatures += nfeaturesPerLevel[level]; 100 | if( scales[level] != scales[level + 1]) 101 | { 102 | ndesiredFeaturesPerScale *= factor; 103 | } 104 | } 105 | nfeaturesPerLevel[nlevels-1] = ndesiredFeaturesPerScale; 106 | //nfeaturesPerLevel[nlevels-1] = std::max(nfeatures - sumFeatures, 0); 107 | 108 | 109 | allKeypoints.resize(nlevels); 110 | keypointsPixels.resize(nlevels); 111 | offsets.resize(nlevels); 112 | int keypointsSize = 0; 113 | double prevsf = -1; 114 | for (int level = (nlevels - 1); level >= 0; level--) 115 | { 116 | if(keypointsSize > totalFeatures ) 117 | break; 118 | 119 | float sf = 1 / scales[level]; 120 | 121 | int featuresNum = nfeaturesPerLevel[level]; 122 | allKeypoints[level].reserve(featuresNum*3); 123 | 124 | GridAdaptedFeatureDetector* gaDetector = dynamic_cast(&*fastext); 125 | if(gaDetector != NULL) 126 | { 127 | gaDetector->setMaxTotalKeypoints(2 * featuresNum); 128 | FASTextGray* grayDetector = dynamic_cast(&*gaDetector->getDetector()); 129 | if(grayDetector != NULL) 130 | { 131 | grayDetector->setKeypointsTypes(keypointTypes[level]); 132 | } 133 | } 134 | FASTextGray* grayDetector = dynamic_cast(&*fastext); 135 | if(grayDetector != NULL) 136 | { 137 | grayDetector->setKeypointsTypes(keypointTypes[level]); 138 | } 139 | 140 | vector & keypoints = allKeypoints[level]; 141 | fastext->setThreshold( thresholds[level] ); 142 | fastext->segment(imagePyramid[level], keypoints, keypointsPixels[level], maskPyramid[level]); 143 | offsets[level] = keypoints.size(); 144 | 145 | if(keypointsPixels[level].size() == 0) 146 | KeyPointsFilterC::retainBest(keypoints, keypointsPixels[level], featuresNum); 147 | if(prevsf != -1 && prevsf != sf ) 148 | keypointsSize += keypoints.size(); 149 | prevsf = sf; 150 | 151 | // Set the level of the coordinates 152 | for (vector::iterator keypoint = keypoints.begin(), 153 | keypointEnd = keypoints.end(); keypoint != keypointEnd; keypoint++) 154 | { 155 | keypoint->octave = level; 156 | keypoint->size = sf; 157 | keypoint->intensityOut += keypoint->pt; 158 | keypoint->intensityIn += keypoint->pt; 159 | 160 | } 161 | } 162 | } 163 | 164 | 165 | void FTPyr::detectImpl( const Mat& image, vector& keypoints, std::unordered_multimap >& keypointsPixels, const Mat& mask) 166 | { 167 | if(image.empty() ) 168 | return; 169 | 170 | //ROI handling 171 | int border = 3; 172 | 173 | if( image.type() != CV_8UC1 && ! fastext->isColorDetector()) 174 | cvtColor(image, image, COLOR_BGR2GRAY); 175 | 176 | int levelsNum = this->nlevels; 177 | if( levelsNum == -1) //the automatic levels decision 178 | { 179 | levelsNum = 1; 180 | int cols = MAX(image.cols, image.rows); 181 | while(cols > 30) 182 | { 183 | levelsNum++; 184 | cols /= this->scaleFactor; 185 | } 186 | } 187 | 188 | // Pre-compute the scale pyramids 189 | long long start = TimeUtils::MiliseconsNow(); 190 | int levelsTotal = levelsNum; 191 | if( erodeImages ) 192 | levelsTotal += 2 * ( levelsNum ); 193 | if(imagePyramid.size() == 0 || imagePyramid.size() != (size_t) levelsTotal) 194 | { 195 | imagePyramid.resize(levelsTotal); 196 | maskPyramid.resize(levelsTotal); 197 | } 198 | scales.clear(); 199 | scalesRef.clear(); 200 | scaleKeypointTypes.clear(); 201 | thresholds.clear(); 202 | int inLevelIndex = 0; 203 | bool hasErosion = false; 204 | for (int level = 0; level < levelsNum; ++level) 205 | { 206 | float scale = 1/getScale(level, scaleFactor); 207 | scales.push_back(scale); 208 | scalesRef.push_back(level); 209 | thresholds.push_back(this->edgeThreshold); 210 | Size sz(cvRound(image.cols*scale), cvRound(image.rows*scale)); 211 | Size wholeSize(sz.width + border*2, sz.height + border*2); 212 | Mat temp; 213 | Mat tempErode; 214 | Mat tempDilate; 215 | Mat masktemp; 216 | if( !imagePyramid[inLevelIndex].empty() && imagePyramid[inLevelIndex].rows == wholeSize.height && imagePyramid[inLevelIndex].cols == wholeSize.width ) 217 | { 218 | temp = imagePyramid[inLevelIndex]; 219 | }else 220 | { 221 | temp = cv::Mat(wholeSize, image.type()); 222 | } 223 | imagePyramid[inLevelIndex] = temp(Rect(border, border, sz.width, sz.height)); 224 | 225 | if( !mask.empty() ) 226 | { 227 | masktemp = Mat(wholeSize, mask.type()); 228 | maskPyramid[inLevelIndex] = masktemp(Rect(border, border, sz.width, sz.height)); 229 | } 230 | 231 | // pyramid 232 | if( level != 0 ) 233 | { 234 | 235 | int step = 1; 236 | if(hasErosion) 237 | step = 3; 238 | resize(imagePyramid[inLevelIndex-step], imagePyramid[inLevelIndex], sz, 0, 0, INTER_LINEAR); 239 | scaleKeypointTypes.push_back(keypointTypes); 240 | copyMakeBorder(imagePyramid[inLevelIndex], temp, border, border, border, border, 241 | BORDER_REFLECT_101+BORDER_ISOLATED); 242 | if( erodeImages ) 243 | { 244 | if( !imagePyramid[inLevelIndex + 1].empty() && imagePyramid[inLevelIndex + 1].rows == wholeSize.height && imagePyramid[inLevelIndex + 1].cols == wholeSize.width ) 245 | { 246 | tempErode = imagePyramid[inLevelIndex + 1]; 247 | tempDilate = imagePyramid[inLevelIndex + 2]; 248 | }else 249 | { 250 | tempErode = cv::Mat(wholeSize, image.type()); 251 | tempDilate = cv::Mat(wholeSize, image.type()); 252 | } 253 | imagePyramid[inLevelIndex + 1] = tempErode(Rect(border, border, sz.width, sz.height)); 254 | Mat element = getStructuringElement( MORPH_CROSS, Size( 3, 3 ), Point( 1, 1 ) ); 255 | cv::erode( temp, tempErode, element ); 256 | scaleKeypointTypes.push_back(1); 257 | thresholds.push_back(this->edgeThreshold); 258 | scalesRef.push_back(level); 259 | imagePyramid[inLevelIndex + 1] = tempErode(Rect(border, border, sz.width, sz.height)); 260 | 261 | cv::dilate( temp, tempDilate, element ); 262 | scaleKeypointTypes.push_back(2); 263 | thresholds.push_back(this->edgeThreshold); 264 | scalesRef.push_back(level); 265 | imagePyramid[inLevelIndex + 2] = tempDilate(Rect(border, border, sz.width, sz.height)); 266 | scales.push_back(scale); 267 | scales.push_back(scale); 268 | hasErosion = true; 269 | } 270 | if (!mask.empty()) 271 | { 272 | resize(maskPyramid[inLevelIndex-1], maskPyramid[inLevelIndex], sz, 0, 0, INTER_LINEAR); 273 | threshold(maskPyramid[inLevelIndex], maskPyramid[inLevelIndex], 254, 0, THRESH_TOZERO); 274 | if( erodeImages ) 275 | { 276 | maskPyramid[inLevelIndex + 1] = maskPyramid[inLevelIndex]; 277 | maskPyramid[inLevelIndex + 2] = maskPyramid[inLevelIndex]; 278 | } 279 | } 280 | if (!mask.empty()) 281 | { 282 | copyMakeBorder(maskPyramid[level], masktemp, border, border, border, border, 283 | BORDER_CONSTANT+BORDER_ISOLATED); 284 | } 285 | if( erodeImages ) 286 | { 287 | inLevelIndex += 2; 288 | } 289 | } 290 | else 291 | { 292 | copyMakeBorder(image, temp, border, border, border, border, 293 | BORDER_REFLECT_101); 294 | scaleKeypointTypes.push_back(keypointTypes); 295 | if( erodeImages ) 296 | { 297 | if( !imagePyramid[inLevelIndex + 1].empty() && imagePyramid[inLevelIndex + 1].rows == wholeSize.height && imagePyramid[inLevelIndex + 1].cols == wholeSize.width ) 298 | { 299 | tempErode = imagePyramid[inLevelIndex + 1]; 300 | tempDilate = imagePyramid[inLevelIndex + 2]; 301 | }else 302 | { 303 | tempErode = cv::Mat(wholeSize, image.type()); 304 | tempDilate = cv::Mat(wholeSize, image.type()); 305 | } 306 | imagePyramid[inLevelIndex + 1] = tempErode(Rect(border, border, sz.width, sz.height)); 307 | Mat element = getStructuringElement( MORPH_CROSS, Size( 3, 3 ), Point( 1, 1 ) ); 308 | cv::erode( imagePyramid[inLevelIndex], imagePyramid[inLevelIndex + 1], element ); 309 | scaleKeypointTypes.push_back(1); 310 | scalesRef.push_back(level); 311 | thresholds.push_back(this->edgeThreshold); 312 | copyMakeBorder(imagePyramid[inLevelIndex + 1], tempErode, border, border, border, border, BORDER_REFLECT_101+BORDER_ISOLATED); 313 | imagePyramid[inLevelIndex + 1] = tempErode(Rect(border, border, sz.width, sz.height)); 314 | imagePyramid[inLevelIndex + 2] = tempDilate(Rect(border, border, sz.width, sz.height)); 315 | cv::dilate( imagePyramid[inLevelIndex], imagePyramid[inLevelIndex + 2], element ); 316 | scaleKeypointTypes.push_back(2); 317 | scalesRef.push_back(level); 318 | thresholds.push_back(this->edgeThreshold); 319 | copyMakeBorder(imagePyramid[inLevelIndex + 2], tempDilate, border, border, border, border, BORDER_REFLECT_101+BORDER_ISOLATED); 320 | imagePyramid[inLevelIndex + 2] = tempDilate(Rect(border, border, sz.width, sz.height)); 321 | scales.push_back(scale); 322 | scales.push_back(scale); 323 | hasErosion = true; 324 | 325 | inLevelIndex += 2; 326 | } 327 | if( !mask.empty() ) 328 | copyMakeBorder(mask, masktemp, border, border, border, border, 329 | BORDER_CONSTANT+BORDER_ISOLATED); 330 | } 331 | inLevelIndex++; 332 | } 333 | pyramidTime = TimeUtils::MiliseconsNow() - start; 334 | 335 | 336 | // Pre-compute the keypoints (we keep the best over all scales, so this has to be done beforehand 337 | vector < vector > allKeypoints; 338 | vector offsets; 339 | std::vector > > allKeypointsPixels; 340 | 341 | start = TimeUtils::MiliseconsNow(); 342 | 343 | computeFASText(allKeypoints, offsets, allKeypointsPixels, 344 | nfeatures, thresholds, scaleKeypointTypes); 345 | 346 | fastKeypointTime = TimeUtils::MiliseconsNow() - start; 347 | // make sure we have the right number of keypoints keypoints 348 | /*vector temp; 349 | 350 | for (int level = 0; level < n_levels; ++level) 351 | { 352 | vector& keypoints = all_keypoints[level]; 353 | temp.insert(temp.end(), keypoints.begin(), keypoints.end()); 354 | keypoints.clear(); 355 | } 356 | 357 | KeyPoint::retainBest(temp, n_features_); 358 | 359 | for (vector::iterator keypoint = temp.begin(), 360 | keypoint_end = temp.end(); keypoint != keypoint_end; ++keypoint) 361 | all_keypoints[keypoint->octave].push_back(*keypoint);*/ 362 | 363 | Mat descriptors; 364 | vector pattern; 365 | 366 | keypoints.clear(); 367 | keypointsPixels.clear(); 368 | int offset = 0; 369 | for (size_t level = 0; level < allKeypoints.size(); ++level) 370 | { 371 | // Get the features and compute their orientation 372 | vector& kps = allKeypoints[level]; 373 | // Copy to the output data 374 | bool chekcKpId = allKeypointsPixels[level].size() > 0; 375 | if (level != 0) 376 | { 377 | float scale = 1 / scales[level]; 378 | int keypointNo = 0; 379 | for (vector::iterator keypoint = kps.begin(), 380 | keypointEnd = kps.end(); keypoint != keypointEnd; ++keypoint) 381 | { 382 | if( keypoint->class_id == keypointNo || !chekcKpId ) 383 | { 384 | keypoint->pt *= scale; 385 | keypoint->intensityOut *= scale; 386 | keypoint->intensityIn *= scale; 387 | keypoint->class_id += offset; 388 | keypoints.push_back(*keypoint); 389 | } 390 | keypointNo++; 391 | } 392 | 393 | }else{ 394 | int keypointNo = 0; 395 | for (vector::iterator keypoint = kps.begin(), 396 | keypointEnd = kps.end(); keypoint != keypointEnd; ++keypoint) 397 | { 398 | if( keypoint->class_id == keypointNo || !chekcKpId ) 399 | { 400 | keypoints.push_back(*keypoint); 401 | } 402 | keypointNo++; 403 | } 404 | } 405 | 406 | std::unordered_multimap >& keypointsPixelsSub = allKeypointsPixels[level]; 407 | for (std::unordered_multimap >::iterator itr = keypointsPixelsSub.begin(); itr != keypointsPixelsSub.end(); itr++) 408 | { 409 | assert(itr->second.first < image.cols); 410 | assert(itr->second.second < image.rows); 411 | keypointsPixels.insert( std::pair >( itr->first + offset, std::pair(itr->second.first, itr->second.second))); 412 | } 413 | offset += offsets[level]; 414 | } 415 | } 416 | 417 | }//namespace cmp 418 | -------------------------------------------------------------------------------- /src/FTPyramid.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * FTPyramid.hpp 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * Copyright (c) 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * Based on: 20 | * 21 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 22 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 23 | */ 24 | #ifndef FASTTEXT_SRC_FTPYRAMID_HPP_ 25 | #define FASTTEXT_SRC_FTPYRAMID_HPP_ 26 | 27 | #include 28 | 29 | #include 30 | 31 | #include "FASTex.hpp" 32 | #include "KeyPoints.h" 33 | 34 | using namespace std; 35 | 36 | namespace cmp{ 37 | 38 | /** 39 | * The FASText pyramid processing implementation 40 | */ 41 | class CV_EXPORTS_W FTPyr 42 | { 43 | public: 44 | CV_WRAP explicit FTPyr(int nfeatures = 500, float scaleFactor = 1.2f, int nlevels = 8, int edgeThreshold = 31, int keypointTypes = 2, 45 | int Kmin = 9, int Kmax = 11, bool color = false, bool erodeImages = false, bool createKeypointSegmenter = false); 46 | 47 | void detect( const cv::Mat& image, std::vector& keypoints, std::unordered_multimap >& keypointsPixels, const cv::Mat& mask = cv::Mat() ) 48 | { 49 | keypoints.clear(); 50 | 51 | if( image.empty() ) 52 | return; 53 | 54 | CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) ); 55 | 56 | detectImpl( image, keypoints, keypointsPixels, mask ); 57 | } 58 | 59 | /** 60 | * return the image pyramid 61 | */ 62 | vector& getImagePyramid() 63 | { 64 | return imagePyramid; 65 | } 66 | 67 | double pyramidTime; 68 | double fastKeypointTime; 69 | 70 | double getScaleFactor(){ 71 | return scaleFactor; 72 | } 73 | 74 | int getEdgeThreshold(){ 75 | return edgeThreshold; 76 | } 77 | 78 | double getLevelScale(int level){ 79 | return scales[level]; 80 | } 81 | 82 | vector& getScales(){ 83 | return scales; 84 | } 85 | 86 | vector& getScalesRef(){ 87 | return scalesRef; 88 | } 89 | 90 | vector& getThresholds(){ 91 | return thresholds; 92 | } 93 | 94 | protected: 95 | 96 | void computeFASText(vector >& allKeypoints, 97 | vector& offsets, 98 | vector > >& keypointsPixels, 99 | int nfeatures, vector& thresholds, 100 | vector& keypointTypes); 101 | 102 | void detectImpl( const cv::Mat& image, vector& keypoints, std::unordered_multimap >& keypointsPixels, const cv::Mat& mask=cv::Mat() ); 103 | 104 | CV_PROP_RW int nfeatures; 105 | CV_PROP_RW double scaleFactor; 106 | CV_PROP_RW int nlevels; 107 | CV_PROP_RW int edgeThreshold; 108 | 109 | int keypointTypes; 110 | int Kmin; 111 | int Kmax; 112 | 113 | vector imagePyramid; 114 | vector maskPyramid; 115 | vector scales; 116 | vector thresholds; 117 | vector scaleKeypointTypes; 118 | vector scalesRef; 119 | 120 | cv::Ptr fastext; 121 | 122 | bool erodeImages; 123 | }; 124 | 125 | }//namespace cmp 126 | 127 | #endif /* FASTTEXT_SRC_FTPYRAMID_HPP_ */ 128 | -------------------------------------------------------------------------------- /src/FT_common.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * FT_common.cpp 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * Copyright (c) 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * Based on: 20 | * 21 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 22 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 23 | */ 24 | 25 | #include "FT_common.hpp" 26 | 27 | #define VERIFY_CORNERS 0 28 | 29 | namespace cmp { 30 | 31 | void makeOffsets(int pixel[34], int* corners, int* cornersOut, int rowStride, int patternSize, int pixelIndex[34], int pixelcheck[24], int pixelcheck16[16]) 32 | { 33 | static const int offsets24[][2] = 34 | { 35 | {0, 4}, { 1, 4}, { 2, 4}, { 3, 3}, { 4, 2}, { 4, 1}, { 4, 0}, { 4, -1}, 36 | { 4, -2}, { 3, -3}, { 2, -4}, { 1, -4}, {0, -4}, {-1, -4}, {-2, -4}, {-3, -3}, 37 | {-4, -2}, {-4, -1}, {-4, 0}, {-4, 1}, {-4, 2}, {-3, 3}, {-2, 4}, {-1, 4} 38 | }; 39 | 40 | static const int offsets16[][2] = 41 | { 42 | {0, 3}, { 1, 3}, { 2, 2}, { 3, 1}, { 3, 0}, { 3, -1}, { 2, -2}, { 1, -3}, 43 | {0, -3}, {-1, -3}, {-2, -2}, {-3, -1}, {-3, 0}, {-3, 1}, {-2, 2}, {-1, 3} 44 | }; 45 | 46 | static const int corners16[][2] = 47 | { 48 | { 3, 2}, { 2, 3}, { 3, -2}, { 2, -3}, 49 | {-2, -3}, {-3, -2} , {-3, 2}, {-2, 3} 50 | }; 51 | 52 | static const int offsets12[][2] = 53 | { 54 | {0, 2}, { 1, 2}, { 2, 1}, { 2, 0}, { 2, -1}, { 1, -2}, 55 | {0, -2}, {-1, -2}, {-2, -1}, {-2, 0}, {-2, 1}, {-1, 2} 56 | }; 57 | 58 | static const int corners12[][2] = 59 | { 60 | { 2, 2}, { 2, -2}, 61 | {-2, -2}, {-2, 2} 62 | }; 63 | static const int cornersOut12[][2] = 64 | { 65 | { 3, 3}, { 3, -3}, 66 | {-3, -3}, {-3, 3} 67 | }; 68 | 69 | 70 | static const int offsets8[][2] = 71 | { 72 | {0, 1}, { 1, 1}, { 1, 0}, { 1, -1}, 73 | {0, -1}, {-1, -1}, {-1, 0}, {-1, 1} 74 | }; 75 | 76 | const int (*offsets)[2] = patternSize == 16 ? offsets16 : 77 | patternSize == 12 ? offsets12 : 78 | patternSize == 8 ? offsets8 : 0; 79 | 80 | CV_Assert(pixel && offsets); 81 | 82 | int k = 0; 83 | for( ; k < patternSize; k++ ) 84 | { 85 | pixel[k] = offsets[k][0] + offsets[k][1] * rowStride; 86 | pixelIndex[k] = k; 87 | } 88 | for( ; k < 34; k++ ) 89 | { 90 | pixel[k] = pixel[k - patternSize]; 91 | pixelIndex[k] = k - patternSize; 92 | } 93 | if(patternSize == 16) 94 | { 95 | for( k = 0; k < 8; k++ ) 96 | corners[k] = corners16[k][0] + corners16[k][1] * rowStride; 97 | }else{ 98 | for( k = 0; k < 4; k++ ) 99 | { 100 | corners[k] = corners12[k][0] + corners12[k][1] * rowStride; 101 | cornersOut[k] = cornersOut12[k][0] + cornersOut12[k][1] * rowStride; 102 | } 103 | } 104 | for( k = 0; k < 24; k++ ) 105 | pixelcheck[k] = offsets24[k][0] + offsets24[k][1] * rowStride; 106 | for( k = 0; k < 16; k++ ) 107 | pixelcheck16[k] = offsets16[k][0] + offsets16[k][1] * rowStride; 108 | } 109 | 110 | void makeOffsetsC(int pixel[34], int pixelCounter[34], int corners[8], int rowStride, int patternSize, int pixelcheck[24], int pixelcheck16[16]) 111 | { 112 | static const int offsets24[][2] = 113 | { 114 | {0, 4}, { 1, 4}, { 2, 4}, { 3, 3}, { 4, 2}, { 4, 1}, { 4, 0}, { 4, -1}, 115 | { 4, -2}, { 3, -3}, { 2, -4}, { 1, -4}, {0, -4}, {-1, -4}, {-2, -4}, {-3, -3}, 116 | {-4, -2}, {-4, -1}, {-4, 0}, {-4, 1}, {-4, 2}, {-3, 3}, {-2, 4}, {-1, 4} 117 | }; 118 | 119 | static const int offsets16[][2] = 120 | { 121 | {0, 3}, { 1, 3}, { 2, 2}, { 3, 1}, { 3, 0}, { 3, -1}, { 2, -2}, { 1, -3}, 122 | {0, -3}, {-1, -3}, {-2, -2}, {-3, -1}, {-3, 0}, {-3, 1}, {-2, 2}, {-1, 3} 123 | }; 124 | 125 | static const int corners16[][2] = 126 | { 127 | { 3, 2}, { 2, 3}, { 3, -2}, { 2, -3}, 128 | {-2, -3}, {-3, -2} , {-3, 2}, {-2, 3} 129 | }; 130 | 131 | static const int offsets12[][2] = 132 | { 133 | {0, 2}, { 1, 2}, { 2, 1}, { 2, 0}, { 2, -1}, { 1, -2}, 134 | {0, -2}, {-1, -2}, {-2, -1}, {-2, 0}, {-2, 1}, {-1, 2} 135 | }; 136 | 137 | static const int corners12[][2] = 138 | { 139 | { 2, 2}, { 2, -2}, 140 | {-2, -2}, {-2, 2} 141 | }; 142 | 143 | static const int offsets8[][2] = 144 | { 145 | {0, 1}, { 1, 1}, { 1, 0}, { 1, -1}, 146 | {0, -1}, {-1, -1}, {-1, 0}, {-1, 1} 147 | }; 148 | 149 | const int (*offsets)[2] = patternSize == 16 ? offsets16 : 150 | patternSize == 12 ? offsets12 : 151 | patternSize == 8 ? offsets8 : 0; 152 | 153 | CV_Assert(pixel && offsets); 154 | 155 | int k = 0; 156 | for( ; k < patternSize; k++ ) 157 | { 158 | pixel[k] = 3 * offsets[k][0] + offsets[k][1] * rowStride; 159 | pixelCounter[k] = k; 160 | } 161 | for( ; k < 34; k++ ) 162 | { 163 | pixel[k] = pixel[k - patternSize]; 164 | pixelCounter[k] = k - patternSize; 165 | } 166 | 167 | if(patternSize == 16) 168 | { 169 | for( k = 0; k < 8; k++ ) 170 | corners[k] = 3 * corners16[k][0] + corners16[k][1] * rowStride; 171 | }else{ 172 | for( k = 0; k < 4; k++ ) 173 | corners[k] = 3 * corners12[k][0] + corners12[k][1] * rowStride; 174 | } 175 | for( k = 0; k < 24; k++ ) 176 | pixelcheck[k] = 3 * offsets24[k][0] + offsets24[k][1] * rowStride; 177 | for( k = 0; k < 16; k++ ) 178 | pixelcheck16[k] = 3 * offsets16[k][0] + offsets16[k][1] * rowStride; 179 | } 180 | 181 | } // namespace cmp 182 | -------------------------------------------------------------------------------- /src/FT_common.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * FT_common.hpp 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * Copyright (c) 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * Based on: 20 | * 21 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 22 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 23 | */ 24 | 25 | #ifndef __CMP_FT_COMMON_HPP__ 26 | #define __CMP_FT_COMMON_HPP__ 27 | 28 | #ifdef __cplusplus 29 | 30 | #include "FASTex.hpp" 31 | #include 32 | 33 | 34 | namespace cmp 35 | { 36 | 37 | void makeOffsets(int pixel[34], int* corners, int* cornersOut, int row_stride, int patternSize, int pixelIndex[34], int pixelcheck[24], int pixelcheck16[16]); 38 | void makeOffsetsC(int pixel[34], int pixelCounter[34], int corners[8], int rowStride, int patternSize, int pixelcheck[24], int pixelcheck16[16]); 39 | 40 | template 41 | int cornerScore(const uchar* ptr, const int pixel[], int threshold); 42 | 43 | template 44 | static inline bool isMostSameAccessible12(const uchar* ptr, int img_step, int xstep, int cn, int mostSameIdx, int threshold, long (*distFunction)(const _Tp&, const _Tp&)) 45 | { 46 | if( mostSameIdx > 11 ) 47 | mostSameIdx -= 12; 48 | switch(mostSameIdx){ 49 | case 0: 50 | if( !( distFunction(ptr[cn], ptr[img_step + cn]) <= threshold ) ) 51 | return false; 52 | break; 53 | case 1: 54 | if( !( distFunction(ptr[cn], ptr[img_step + cn]) <= threshold 55 | || distFunction(ptr[cn], ptr[img_step + xstep + cn]) <= threshold ) ) 56 | return false; 57 | break; 58 | case 11: 59 | if( !(distFunction(ptr[cn], ptr[img_step + cn]) <= threshold 60 | || distFunction(ptr[cn], ptr[img_step -xstep + cn]) <= threshold) ) 61 | return false; 62 | break; 63 | case 2: 64 | if( !(distFunction(ptr[cn], ptr[1 * xstep + cn]) <= threshold 65 | || distFunction(ptr[cn], ptr[img_step + xstep + cn]) <= threshold ) ) 66 | return false; 67 | break; 68 | case 3: 69 | if( !(distFunction(ptr[cn], ptr[xstep + cn]) <= threshold) ) 70 | return false; 71 | break; 72 | case 4: 73 | if( !(distFunction(ptr[cn], ptr[1 * xstep + cn]) <= threshold 74 | || distFunction(ptr[cn], ptr[-img_step + xstep + cn]) <= threshold ) ) 75 | return false; 76 | break; 77 | case 5: 78 | if( !( distFunction(ptr[cn], ptr[-img_step + cn]) <= threshold 79 | || distFunction(ptr[cn], ptr[-img_step + xstep + cn]) <= threshold )) 80 | return false; 81 | break; 82 | case 6: 83 | if( !(distFunction(ptr[cn], ptr[-img_step + cn]) <= threshold ) ) 84 | return false; 85 | break; 86 | case 7: 87 | if( !(distFunction(ptr[cn], ptr[-img_step + cn]) <= threshold 88 | || distFunction(ptr[cn], ptr[-img_step - xstep + cn]) <= threshold) ) 89 | return false; 90 | break; 91 | case 8: 92 | if( !(distFunction(ptr[cn], ptr[-1*xstep + cn]) <= threshold 93 | || distFunction(ptr[cn], ptr[-1*xstep - img_step + cn]) <= threshold ) ) 94 | return false; 95 | break; 96 | case 9: 97 | if( !( distFunction(ptr[cn], ptr[-1*xstep + cn]) <= threshold ) ) 98 | return false; 99 | break; 100 | case 10: 101 | if( !(distFunction(ptr[cn], ptr[-1*xstep + cn]) <= threshold 102 | || distFunction(ptr[cn], ptr[-1*xstep + img_step + cn]) <= threshold) ) 103 | return false; 104 | break; 105 | } 106 | return true; 107 | } 108 | 109 | }//namespace cmp 110 | 111 | #endif 112 | #endif //__CMP_FT_COMMON_HPP__ 113 | -------------------------------------------------------------------------------- /src/FastTextLine.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * FTextLine.cpp 3 | * 4 | * Created on: Feb 27, 2015 5 | * Author: Michal Busta 6 | */ 7 | #include "FastTextLine.h" 8 | 9 | #include 10 | #include 11 | 12 | #ifdef ANDROID_LOG 13 | # include 14 | #endif 15 | 16 | #include "geometry.h" 17 | 18 | namespace cmp 19 | { 20 | 21 | FTextLine::FTextLine() 22 | { 23 | // TODO Auto-generated constructor stub 24 | minRect.size.width = 0; 25 | minRect.size.height = 0; 26 | } 27 | 28 | FTextLine::FTextLine(double theta) : theta(theta) 29 | { 30 | // TODO Auto-generated constructor stub 31 | minRect.size.width = 0; 32 | minRect.size.height = 0; 33 | } 34 | 35 | FTextLine::~FTextLine() 36 | { 37 | // TODO Auto-generated destructor stub 38 | } 39 | 40 | void FTextLine::addLetter(int letterId, std::vector& letterCandidates) 41 | { 42 | if( regionSet.find(letterId) != regionSet.end() ) 43 | return; 44 | LetterCandidate& refR = letterCandidates[letterId]; 45 | if( bbox.width == 0 ) 46 | bbox = refR.bbox; 47 | else 48 | bbox |= refR.bbox; 49 | regionSet.insert(letterId); 50 | duplicates += refR.duplicates.size() + 1; 51 | } 52 | 53 | cv::Mat FTextLine::getNormalizedMask(const cv::Mat& image, std::vector& letterCandidates, double scale) 54 | { 55 | cv::RotatedRect rr = getMinAreaRect(letterCandidates); 56 | rr.center.x *= scale; 57 | rr.center.y *= scale; 58 | rr.size.width *= scale; 59 | rr.size.height *= scale; 60 | 61 | if(rr.size.height > rr.size.width) 62 | { 63 | std::swap(rr.size.height, rr.size.width); 64 | rr.angle += 90; 65 | } 66 | 67 | rext = rr; 68 | rext.size.width *= 1.2; 69 | rext.size.height *= 1.2; 70 | 71 | extbox = rext.boundingRect(); 72 | extbox.x = MAX(extbox.x, 0); 73 | extbox.y = MAX(extbox.y, 0); 74 | if( (extbox.x + extbox.width) >= image.cols ) 75 | extbox.width = image.cols - extbox.x; 76 | if( (extbox.y + extbox.height) >= image.rows ) 77 | extbox.height = image.rows - extbox.y; 78 | 79 | cv::Mat tmp = image(extbox); 80 | cv::Point center = cv::Point(extbox.width / 2, extbox.height / 2); 81 | cv::Mat rot_mat = getRotationMatrix2D( cv::Point(extbox.width / 2, extbox.height / 2), rr.angle, 1 ); 82 | 83 | rot_mat.at(0,2) += rext.size.width/2.0 - center.x; 84 | rot_mat.at(1,2) += rext.size.height/2.0 - center.y; 85 | //rot_matI.at(0,2) -= rext.size.width/2.0 - center.x; 86 | //rot_matI.at(1,2) -= rext.size.height/2.0 - center.y; 87 | 88 | /// Rotate the warped image 89 | cv::warpAffine( tmp, norm_line, rot_mat, rext.size ); 90 | cv::invertAffineTransform(rot_mat, norm_mat); 91 | return norm_line; 92 | } 93 | 94 | cv::Mat FTextLine::createDebugImage(const cv::Mat& image, std::vector& letterCandidates, bool color, bool drawRect) 95 | { 96 | 97 | cv::Mat tmp = cv::Mat::zeros(image.rows, image.cols, CV_8UC1); 98 | for( std::set::iterator it = regionSet.begin(); it != regionSet.end(); it++ ) 99 | { 100 | LetterCandidate& ref1 = letterCandidates[*it]; 101 | 102 | cv::Rect rootRect = cv::Rect(ref1.bbox.x, ref1.bbox.y, ref1.bbox.width, ref1.bbox.height); 103 | cv::Mat mask = ref1.mask; 104 | if( ref1.scaleFactor != 1) 105 | { 106 | cv::resize(mask, mask, cv::Size(ref1.bbox.width, ref1.bbox.height)); 107 | } 108 | if( (rootRect.x + rootRect.width) >= tmp.cols ) 109 | continue; 110 | if( (rootRect.y + rootRect.height) >= tmp.rows ) 111 | continue; 112 | if( rootRect.width != mask.cols || rootRect.height != mask.rows ) 113 | continue; 114 | cv::bitwise_or(tmp(rootRect), mask, tmp(rootRect)); 115 | for(auto itj : ref1.duplicates) 116 | { 117 | LetterCandidate& refd = letterCandidates[itj]; 118 | rootRect = cv::Rect(refd.bbox.x, refd.bbox.y, refd.bbox.width, refd.bbox.height); 119 | mask = refd.mask; 120 | if( refd.scaleFactor != 1) 121 | { 122 | cv::resize(mask, mask, cv::Size(ref1.bbox.width, ref1.bbox.height)); 123 | } 124 | if( (rootRect.x + rootRect.width) >= tmp.cols ) 125 | continue; 126 | if( (rootRect.y + rootRect.height) >= tmp.rows ) 127 | continue; 128 | if( rootRect.width != mask.cols || rootRect.height != mask.rows ) 129 | continue; 130 | cv::bitwise_or(tmp(rootRect), mask, tmp(rootRect)); 131 | } 132 | } 133 | 134 | tmp = ~tmp; 135 | if( color ) 136 | { 137 | cv::cvtColor(tmp, tmp, cv::COLOR_GRAY2BGR); 138 | } 139 | return tmp; 140 | } 141 | 142 | cv::RotatedRect FTextLine::getMinAreaRect(std::vector& letterCandidates) 143 | { 144 | if( minRect.size.width != 0) 145 | return minRect; 146 | std::vector pointsAll; 147 | for( auto it = regionSet.begin(); it != regionSet.end(); it++ ) 148 | { 149 | LetterCandidate& ref1 = letterCandidates[*it]; 150 | if( !ref1.isValid ) 151 | continue; 152 | pointsAll.insert(pointsAll.end(), ref1.cHullPoints.begin(), ref1.cHullPoints.end()); 153 | } 154 | 155 | #ifdef VERBOSE 156 | cv::Mat tmp = img.clone(); 157 | if( tmp.channels() == 1) 158 | cv::cvtColor(tmp, tmp, cv::COLOR_GRAY2BGR); 159 | 160 | for( auto pt : pointsTop ) 161 | { 162 | cv::circle(tmp, pt, 2, cv::Scalar(255, 0, 0)); 163 | } 164 | for( auto pt : pointsBottom ) 165 | { 166 | cv::circle(tmp, pt, 2, cv::Scalar(0, 255, 0)); 167 | } 168 | for( auto pt : ref0.cHullPoints) 169 | cv::circle(tmp, pt, 2, cv::Scalar(0, 0, 255)); 170 | for( auto pt : ref1.cHullPoints) 171 | cv::circle(tmp, pt, 4, cv::Scalar(0, 255, 255)); 172 | cv::imshow("ts", tmp); 173 | cv::waitKey(0); 174 | #endif 175 | 176 | minRect = minAreaRect( cv::Mat(pointsAll) ); 177 | if(minRect.size.width < minRect.size.height){ 178 | int swp = minRect.size.width; 179 | minRect.size.width = minRect.size.height; 180 | minRect.size.height = swp; 181 | minRect.angle += 90; 182 | 183 | } 184 | bbox = minRect.boundingRect(); 185 | return minRect; 186 | } 187 | 188 | void FTextLine::splitHullLines(std::vector& letterCandidates) 189 | { 190 | cv::Point start(centerLine[2], centerLine[3] ); 191 | cv::Point end(centerLine[2] + 100 * centerLine[0], centerLine[3] + 100 * centerLine[1]); 192 | pointsTop.clear(); 193 | pointsBottom.clear(); 194 | for( auto& rid : this->regionSet ){ 195 | LetterCandidate& ref1 = letterCandidates[rid]; 196 | if( !ref1.pointsScaled ) 197 | ref1.scalePoints(); 198 | cv::Point top(-1, -1); 199 | double distTop = 0; 200 | cv::Point bottom(-1, -1); 201 | double distBottom = 0; 202 | for( size_t i = 0; i < ref1.cHullPoints.size(); i++ ){ 203 | int sign = 0; 204 | double d = distance_to_line(centerLine, ref1.cHullPoints[i], sign ); 205 | if( sign > 0){ 206 | if( d > distTop ){ 207 | top = ref1.cHullPoints[i]; 208 | distTop = d; 209 | } 210 | }else{ 211 | if( d > distBottom ){ 212 | bottom = ref1.cHullPoints[i]; 213 | distBottom = d; 214 | } 215 | } 216 | } 217 | if( top.x != -1 ){ 218 | this->pointsTop.push_back(top); 219 | 220 | } 221 | if( bottom.x != -1 ){ 222 | this->pointsBottom.push_back(bottom); 223 | } 224 | if( top.x != -1 && bottom.x != -1 ){ 225 | this->validRegSet.insert(rid); 226 | } 227 | } 228 | this->regionSet = this->validRegSet; 229 | } 230 | 231 | } /* namespace cmp */ 232 | -------------------------------------------------------------------------------- /src/FastTextLine.h: -------------------------------------------------------------------------------- 1 | /* 2 | * TextLine.h 3 | * 4 | * Created on: Feb 27, 2015 5 | * Author: Michal Busta 6 | */ 7 | #ifndef FASTTEXTLINE_H_ 8 | #define FASTTEXTLINE_H_ 9 | 10 | #include 11 | 12 | #include 13 | 14 | #include "Segmenter.h" 15 | 16 | namespace cmp 17 | { 18 | 19 | inline void expandRoi(const cv::Mat& image, cv::Rect& bbox, int canvas, int& xOffset, int& yOffset, int&xOver, int& yOver ) 20 | { 21 | xOffset = 0; 22 | xOver = bbox.width; 23 | if(bbox.x >= canvas) 24 | { 25 | xOffset = canvas; 26 | bbox.x -= xOffset; 27 | bbox.width += canvas; 28 | } 29 | bbox.width += canvas; 30 | yOffset = 0; 31 | yOver = bbox.height; 32 | if(bbox.y >= canvas) 33 | { 34 | yOffset = canvas; 35 | bbox.y -= canvas; 36 | bbox.height += canvas; 37 | } 38 | bbox.height += canvas; 39 | 40 | if( bbox.x + bbox.width > image.cols) 41 | { 42 | bbox.width = image.cols - bbox.x; 43 | } 44 | if( bbox.y + bbox.height > image.rows) 45 | { 46 | bbox.height = image.rows - bbox.y; 47 | } 48 | xOver = bbox.width - xOver; 49 | yOver = bbox.height - yOver; 50 | } 51 | 52 | /** 53 | * @class cmp::TextLine 54 | * 55 | * @brief TODO brief description 56 | * 57 | * TODO type description 58 | */ 59 | class FTextLine 60 | { 61 | public: 62 | FTextLine(); 63 | 64 | FTextLine(double theta); 65 | 66 | virtual ~FTextLine(); 67 | 68 | void addLetter(int letterId, std::vector& letterCandidates); 69 | 70 | cv::Mat createDebugImage(const cv::Mat& image, std::vector& letterCandidates, bool color, bool drawRect = false); 71 | 72 | cv::Mat getNormalizedMask(const cv::Mat& image, std::vector& letterCandidates, double scale); 73 | 74 | cv::RotatedRect getMinAreaRect(std::vector& letterCandidates); 75 | 76 | void splitHullLines(std::vector& letterCandidates); 77 | 78 | cv::Rect bbox; 79 | 80 | int duplicates = 0; 81 | 82 | double angle = 0; 83 | 84 | std::set regionSet; 85 | std::set validRegSet; 86 | 87 | std::vector centers; 88 | std::vector pointsTop; 89 | std::vector pointsBottom; 90 | 91 | cv::Vec4f centerLine; 92 | cv::Vec4f topLine; 93 | cv::Vec4f bottomLine; 94 | 95 | bool isSegmentable = true; 96 | 97 | cv::RotatedRect minRect; 98 | 99 | double theta = 0; 100 | float quality = 0; 101 | 102 | vector > contours; 103 | 104 | double height = 0; 105 | 106 | std::string text; 107 | std::vector probs; 108 | std::vector pos_start; 109 | std::vector pos_end; 110 | 111 | cv::Mat norm_mat; 112 | float ocr_scale = 1.0f; 113 | cv::Mat norm_line; 114 | 115 | cv::RotatedRect rext; //rotated rectangle used for classification 116 | cv::Rect extbox; 117 | 118 | int type = 0; 119 | 120 | cv::Mat normImage; 121 | }; 122 | 123 | } /* namespace cmp */ 124 | 125 | #endif /* FASTTEXTLINE_H_ */ 126 | -------------------------------------------------------------------------------- /src/FastTextLineDetector.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * TextLineDetector.cpp 3 | * 4 | * Created on: Dec 17, 2014 5 | * Author: Michal Busta 6 | */ 7 | #include "FastTextLineDetector.h" 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include "geometry.h" 17 | #include "HoughTLDetector.h" 18 | 19 | using namespace cv; 20 | 21 | //#define VERBOSE 1 22 | 23 | namespace cmp 24 | { 25 | 26 | FastTextLineDetector::FastTextLineDetector() 27 | { 28 | 29 | } 30 | 31 | FastTextLineDetector::~FastTextLineDetector() 32 | { 33 | // TODO Auto-generated destructor stub 34 | } 35 | 36 | void FastTextLineDetector::findTextLines(const cv::Mat& image, std::vector& letterCandidates, std::vector& scales, std::vector& textLines) 37 | { 38 | textLines.clear(); 39 | 40 | int index = -1; 41 | for (std::vector::iterator kv = letterCandidates.begin(); kv != letterCandidates.end(); kv++) 42 | { 43 | index += 1; 44 | if(kv->duplicate != -1) 45 | continue; 46 | if(kv->cHullPoints.size() == 0) 47 | { 48 | CharClassifier::extractLineFeatures(*kv); 49 | } 50 | kv->scalePoints(); 51 | } 52 | std::vector hLines; 53 | double letterHeight = MIN(image.rows, image.cols); 54 | do{ 55 | HoughTLDetector houghTlDetector; 56 | houghTlDetector.findTextLines(letterCandidates, image, letterHeight, hLines, 0); 57 | houghTlDetector.findTextLines(letterCandidates, image, letterHeight, hLines, 1); 58 | letterHeight /= 2; 59 | }while( letterHeight > 5 ); 60 | 61 | std::vector hLinesFinal; 62 | hLinesFinal = hLines; 63 | 64 | //hLinesFinal = hLines; 65 | std::vector initialTextLines; 66 | initialTextLines.reserve(hLinesFinal.size()); 67 | for( size_t i = 0; i < hLinesFinal.size(); i++ ) 68 | { 69 | LineGroup& group = hLinesFinal[i]; 70 | if( group.conflict) 71 | { 72 | continue; 73 | } 74 | initialTextLines.push_back(FTextLine(group.theta - M_PI / 2)); 75 | for( auto& rid : group.regionIds ){ 76 | initialTextLines.back().addLetter(rid, letterCandidates); 77 | } 78 | 79 | initialTextLines.back().getMinAreaRect(letterCandidates); 80 | } 81 | 82 | for( auto& tl : initialTextLines ) 83 | { 84 | if(!tl.isSegmentable) 85 | continue; 86 | std::vector centerLine; 87 | centerLine.reserve(tl.regionSet.size()); 88 | if( tl.regionSet.size() < 3 ) { 89 | tl.regionSet.clear(); 90 | tl.isSegmentable = false; 91 | continue; 92 | } 93 | for( auto regId : tl.regionSet ){ 94 | centerLine.push_back(letterCandidates[regId].getConvexCentroid()); 95 | } 96 | 97 | cv::fitLine(centerLine, tl.centerLine, CV_DIST_L2, 0,0.01,0.01); 98 | tl.splitHullLines(letterCandidates); 99 | if( tl.pointsTop.size() > 3 ) 100 | cv::fitLine(tl.pointsTop, tl.topLine, CV_DIST_L2, 0,0.01,0.01); 101 | if( tl.pointsBottom.size() > 3 ) 102 | cv::fitLine(tl.pointsBottom, tl.bottomLine, CV_DIST_L2, 0,0.01,0.01); 103 | double angle2 = atan2(tl.centerLine[1], tl.centerLine[0]); 104 | tl.angle = angle2; 105 | tl.minRect.size.width = 0; 106 | cv::RotatedRect rr = tl.getMinAreaRect(letterCandidates); 107 | tl.height = MIN(rr.size.width, rr.size.height); 108 | #ifdef VERBOSE 109 | cv::Mat tmp = image.clone(); 110 | cvtColor(tmp, tmp, CV_GRAY2BGR); 111 | cv::line(tmp, cv::Point(tl.centerLine.val[2], tl.centerLine.val[3]), cv::Point(tl.centerLine.val[2] + 100 * tl.centerLine.val[0], tl.centerLine.val[3] + 100 * tl.centerLine.val[1]), cv::Scalar(255, 255, 255) ); 112 | for(auto& center : centerLine ) 113 | cv::circle(tmp, center, 5, cv::Scalar(255, 255, 255), 2); 114 | for(auto& center : tl.pointsTop ) 115 | cv::circle(tmp, center, 5, cv::Scalar(0, 255, 0), 2); 116 | cv::imshow("tmp", tmp); 117 | cv::waitKey(0); 118 | #endif 119 | 120 | } 121 | 122 | for( size_t i = 0; i < initialTextLines.size(); i++) 123 | { 124 | if(!initialTextLines[i].isSegmentable) 125 | continue; 126 | textLines.push_back(initialTextLines[i]); 127 | } 128 | } 129 | 130 | 131 | } /* namespace cmp */ 132 | -------------------------------------------------------------------------------- /src/FastTextLineDetector.h: -------------------------------------------------------------------------------- 1 | /* 2 | * TextLineDetector.h 3 | * 4 | * Created on: Dec 17, 2014 5 | * Author: Michal Busta 6 | */ 7 | #ifndef FASTTEXTLINEDETECTOR_H_ 8 | #define FASTTEXTLINEDETECTOR_H_ 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | #include "segm/segmentation.h" 18 | 19 | #include "CharClassifier.h" 20 | #include "FastTextLine.h" 21 | 22 | namespace cmp 23 | { 24 | 25 | /** 26 | * @class cmp::TextLineDetector 27 | * 28 | * @brief TODO brief description 29 | * 30 | * TODO type description 31 | */ 32 | class FastTextLineDetector 33 | { 34 | public: 35 | FastTextLineDetector(); 36 | virtual ~FastTextLineDetector(); 37 | 38 | void findTextLines(const cv::Mat& image, std::vector& letterCandidates, std::vector& scales, std::vector& textLines); 39 | 40 | int minHeight = 5; 41 | 42 | }; 43 | 44 | } /* namespace cmp */ 45 | 46 | #endif /* FASTTEXTLINEDETECTOR_H_ */ 47 | -------------------------------------------------------------------------------- /src/HoughTLDetector.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * HoughTLDetector.cpp 3 | * 4 | * Created on: Jun 17, 2015 5 | * Author: Michal Busta 6 | */ 7 | #include 8 | #ifdef OPENCV_2 9 | # include 10 | #endif 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include "HoughTLDetector.h" 17 | #include "geometry.h" 18 | 19 | //#define VERBOSE 1 20 | 21 | namespace cmp 22 | { 23 | 24 | #define NUM_ANGLE 16 25 | 26 | class LineAccumulator{ 27 | public: 28 | 29 | LineAccumulator(const cv::Mat& img, float rho, float theta) 30 | { 31 | int width = img.cols; 32 | int height = img.rows; 33 | theta_sampling_step = theta; 34 | numangle = roundf((M_PI) / theta); 35 | numangle_2 = numangle / 2; 36 | numrho = roundf(((width + height) * 2 + 1) / rho); 37 | this->rho = rho; 38 | float rho2 = 0.5 * rho; 39 | float irho = 1 / rho; 40 | float irho2 = 1 / (2 * rho2); 41 | 42 | sizes[0] = numangle + 2 * voting_offset; 43 | sizes[1] = numrho + 2 * voting_offset; 44 | idx2 = sizes[1]; 45 | acc = cv::Mat(2, sizes, CV_32SC1, cv::Scalar(0)); 46 | 47 | tabSin.resize(numangle + 2); 48 | tabCos.resize(numangle + 2); 49 | tabSin2.resize(numangle + 2); 50 | tabCos2.resize(numangle + 2); 51 | float ang = 0; 52 | for( int i = 0; i < numangle + 2; ang += theta, i++) 53 | { 54 | tabSin[i] = (float)(sin((double)ang) * irho); 55 | tabCos[i] = (float)(cos((double)ang) * irho); 56 | 57 | tabSin2[i] = (float)(sin((double)ang - M_PI_2) * irho2); 58 | tabCos2[i] = (float)(cos((double)ang - M_PI_2) * irho2); 59 | } 60 | } 61 | 62 | void addRegion(cv::Point& center, int regId, std::vector& letterCandidates) 63 | { 64 | LetterCandidate& ref = letterCandidates[regId]; 65 | 66 | //check for duplicate 67 | int r = cvRound( center.x * tabCos[0] + center.y * tabSin[0] ); 68 | r += (numrho - 1) / 2; 69 | int index = r; 70 | for(auto& rid : regions2d[index] ){ 71 | LetterCandidate& ref2 = letterCandidates[rid]; 72 | if( ref2.isWord != ref.isWord ) 73 | continue; 74 | cv::Rect int_box = ref.bbox & ref2.bbox; 75 | cv::Rect or_box = ref.bbox | ref2.bbox; 76 | if( int_box.area() / (float) or_box.area() > 0.7 ){ 77 | return; 78 | } 79 | } 80 | 81 | 82 | if( ref.isWord){ 83 | ref.rotatedRect.angle = fabs(ref.rotatedRect.angle); 84 | if( ref.rotatedRect.size.width < ref.rotatedRect.size.height ){ 85 | ref.rotatedRect.angle += 90; 86 | int swp = ref.rotatedRect.size.width; 87 | ref.rotatedRect.size.width = ref.rotatedRect.size.height; 88 | ref.rotatedRect.size.height = swp; 89 | } 90 | float theta = ref.rotatedRect.angle / 180.0 * M_PI - M_PI_2; 91 | while( theta < 0) 92 | theta += M_PI; 93 | while( theta > M_PI) 94 | theta -= M_PI; 95 | //theta = M_PI_2; 96 | assert(theta >= 0); 97 | assert(theta < M_PI); 98 | int n = theta / theta_sampling_step; 99 | assert(n >= 0); 100 | assert(n < numangle); 101 | center.x = ref.bbox.x + ref.bbox.width / 2; 102 | center.y = ref.bbox.y + ref.bbox.height / 2; 103 | int r = cvRound( center.x * tabCos[n] + center.y * tabSin[n] ); 104 | r += (numrho - 1) / 2; 105 | acc.at(n + voting_offset, r + voting_offset) += this->min_value; 106 | int index = n * idx2 + r; 107 | regions2d[index].insert(regId); 108 | 109 | /* 110 | acc.at(n + 1 + voting_offset, r + voting_offset) += this->min_value; 111 | index = (n + 1) * idx2 + r; 112 | regions2d[index].insert(regId); 113 | 114 | acc.at(n - 1 + voting_offset, r + voting_offset) += this->min_value; 115 | index = (n - 1) * idx2 + r; 116 | regions2d[index].insert(regId); 117 | */ 118 | return; 119 | } 120 | 121 | for( int n = 0; n < numangle; n++) 122 | { 123 | int r = cvRound( center.x * tabCos[n] + center.y * tabSin[n] ); 124 | r += (numrho - 1) / 2; 125 | int index = n * idx2 + r; 126 | if( ref.quality > 0.3) 127 | acc.at(n + voting_offset, r + voting_offset) += 1; 128 | regions2d[index].insert(regId); 129 | } 130 | } 131 | 132 | void findMaxima(std::vector& lines, std::vector& letterCandidates) 133 | { 134 | std::unordered_multimap reg_to_line; 135 | std::vector lineIx; 136 | for (int x = voting_offset; x < acc.cols - voting_offset; x++) 137 | { 138 | double min, maxVal; 139 | cv::minMaxLoc(acc(cv::Rect(x, 0, 1, acc.rows - 1)), &min, &maxVal); 140 | for (int n = voting_offset; n < acc.rows - voting_offset; n++) 141 | { 142 | int value = acc.at(n, x); 143 | if (value < min_value) 144 | { 145 | continue; 146 | } 147 | 148 | if(value < acc.at(n, x + 1) || value < acc.at(n, x - 1)){ 149 | continue; 150 | } 151 | 152 | int sumVal = value + acc.at(n, x - 1) + acc.at(n, x + 1); 153 | if( sumVal < maxVal){ 154 | continue; 155 | } 156 | 157 | int index = (n - voting_offset) * idx2 + (x - voting_offset); 158 | bool is_maxima = true; 159 | for( auto& rid: regions2d[index] ){ 160 | if(!is_maxima) 161 | break; 162 | LetterCandidate& ref = letterCandidates[rid]; 163 | cv::Point center = ref.getConvexCentroid(); 164 | 165 | for( int n2 = 0; n2 < numangle; n2++){ 166 | int r = cvRound( center.x * tabCos[n2] + center.y * tabSin[n2] ); 167 | r += (numrho - 1) / 2; 168 | if( acc.at(n2 + voting_offset , r + voting_offset) > sumVal){ 169 | is_maxima = false; 170 | break; 171 | } 172 | } 173 | } 174 | 175 | if(!is_maxima){ 176 | continue; 177 | } 178 | 179 | 180 | //if( (n - voting_offset) != numangle / 2) 181 | // continue; 182 | 183 | //double line_rho23 = ((x - 1) - (numrho - 1)*0.5f) * rho; 184 | std::multimap > line_rho2; 185 | for( auto& rid: regions2d[index] ){ 186 | LetterCandidate& ref = letterCandidates[rid]; 187 | cv::Point center = ref.bbox.tl(); 188 | float r201 = center.x * tabCos2[n - voting_offset] + center.y * tabSin2[n - voting_offset]; 189 | center = ref.bbox.br(); 190 | int r202 = center.x * tabCos2[n - voting_offset] + center.y * tabSin2[n - voting_offset]; 191 | line_rho2.insert( {MIN(r201, r202), std::pair(MAX(r201, r202), rid ) }); 192 | 193 | } 194 | if( line_rho2.size() == 1 ){ 195 | continue; 196 | } 197 | 198 | std::vector spacing; 199 | spacing.reserve(line_rho2.size()); 200 | std::map>::iterator itp = line_rho2.begin(); 201 | std::map>::iterator itn = itp; 202 | itn++; 203 | do{ 204 | spacing.push_back(MAX(0.0f, itn->first - itp->second.first)); 205 | if( itp->second.first > itn->second.first ){ 206 | std::map>::iterator itc = itn; 207 | while( itc->second.first < itp->second.first){ 208 | itc->second.first = itp->second.first; 209 | } 210 | 211 | } 212 | itp++; 213 | itn++; 214 | }while(itn != line_rho2.end()); 215 | 216 | if( spacing.size() == 0 ) 217 | continue; 218 | 219 | int r = (x - voting_offset); 220 | double line_rho = (r - (numrho - 1)*0.5f) * rho; 221 | int lineId = lines.size(); 222 | lines.push_back(cv::Vec4d(line_rho, (n - voting_offset) * theta_sampling_step, value, lineId)); 223 | itp = line_rho2.begin(); 224 | itn = itp; 225 | itn++; 226 | int s = 0; 227 | while( itn != line_rho2.end() ){ 228 | regionsMap[lineId].insert(itp->second.second); 229 | if( ((spacing[s] > 3.8f ) ) ) { 230 | if( regionsMap[lineId].size() >= 2 ) { 231 | double maxQuality = 0; 232 | for(auto& rid : regionsMap[lineId] ) 233 | maxQuality = MAX(maxQuality, letterCandidates[rid].quality); 234 | if( maxQuality < 0.5 ){ 235 | regionsMap[lineId].clear(); 236 | }else{ 237 | lines.back().val[2] = regionsMap[lineId].size(); 238 | lineId = lines.size(); 239 | lines.push_back(cv::Vec4d(line_rho, (n - voting_offset) * theta_sampling_step, value, lineId)); 240 | } 241 | }else{ 242 | regionsMap[lineId].clear(); 243 | } 244 | } 245 | itp++; 246 | itn++; 247 | s++; 248 | } 249 | if( regionsMap[lineId].size() > 0 ) { 250 | double maxQuality = 0; 251 | for(auto& rid : regionsMap[lineId] ) 252 | maxQuality = MAX(maxQuality, letterCandidates[rid].quality); 253 | if( maxQuality < 0.5 ){ 254 | regionsMap[lineId].clear(); 255 | } 256 | regionsMap[lineId].insert(itp->second.second); 257 | lines.back().val[2] = regionsMap[lineId].size(); 258 | } 259 | } 260 | } 261 | } 262 | 263 | cv::Mat acc; 264 | std::vector tabSin; 265 | std::vector tabCos; 266 | std::vector tabSin2; 267 | std::vector tabCos2; 268 | 269 | int numrho; 270 | float rho; 271 | float theta_sampling_step; 272 | int numangle; 273 | int numangle_2; 274 | 275 | int sizes[3]; 276 | int idx2; 277 | 278 | int min_value = 3; 279 | 280 | int voting_offset = 2; 281 | 282 | std::unordered_map > regionsMap; 283 | std::unordered_map > regions2d; 284 | }; 285 | 286 | 287 | inline void drawLine(cv::Mat& cdst, double rho, double theta, cv::Scalar color) 288 | { 289 | cv::Point pt1, pt2; 290 | double a = cos(theta), b = sin(theta); 291 | double x0 = a*rho, y0 = b*rho; 292 | pt1.x = cvRound(x0 + 2000 * (-b)); 293 | pt1.y = cvRound(y0 + 2000 * (a)); 294 | pt2.x = cvRound(x0 - 2000 * (-b)); 295 | pt2.y = cvRound(y0 - 2000 * (a)); 296 | line(cdst, pt1, pt2, color, 2, CV_AA); 297 | } 298 | 299 | void HoughTLDetector::findTextLines(std::vector& letterCandidates, const cv::Mat& originalImage, double letterHeight, std::vector& lineGroups, int type) { 300 | 301 | #ifdef VERBOSE 302 | double t_g = (double) cv::getTickCount(); 303 | double maxRho = round(sqrt(originalImage.rows * originalImage.rows + originalImage.cols * originalImage.cols)); 304 | #endif 305 | 306 | ///LineAccumulator acc(originalImage.rows, originalImage.cols, letterHeight / 2); 307 | LineAccumulator acc(originalImage, letterHeight / 2, M_PI / 16); 308 | 309 | #ifdef VERBOSE 310 | std::cout << "Letter Height: " << letterHeight << " - " << letterHeight / 2 << std::endl; 311 | cv::Mat cdst = originalImage.clone(); 312 | cv::cvtColor(cdst, cdst, cv::COLOR_GRAY2BGR); 313 | 314 | double mr = (int)maxRho; 315 | //int rhoIx = (int)round(rho / rhoSamplingStep) + _rhoOffset; 316 | for(double i = 0; i < mr; i += acc.rho){ 317 | drawLine(cdst, i, M_PI_2, cv::Scalar(128, 128, 128)); 318 | } 319 | 320 | for (size_t i = 0; i < letterCandidates.size(); i++) 321 | { 322 | if( letterCandidates[i].keyPoint.type != type || letterCandidates[i].duplicate != -1) 323 | continue; 324 | double size = letterCandidates[i].bbox.height; 325 | if( letterCandidates[i].isWord ) 326 | size = MIN(letterCandidates[i].bbox.height, letterCandidates[i].bbox.width); 327 | letterCandidates[i].angleScore = 0; 328 | double hr = MIN(size, letterHeight) / MAX(size, letterHeight); 329 | if( hr < 0.5){ 330 | continue; 331 | } 332 | if(letterCandidates[i].isWord) 333 | cv::rectangle(cdst, letterCandidates[i].bbox, cv::Scalar(0, 255, 0)); 334 | else 335 | cv::rectangle(cdst, letterCandidates[i].bbox, cv::Scalar(0, 0, 255)); 336 | } 337 | 338 | cv::imshow("voting step", cdst); 339 | cv::waitKey(0); 340 | 341 | #endif 342 | 343 | for (size_t i = 0; i < letterCandidates.size(); i++) 344 | { 345 | if( letterCandidates[i].keyPoint.type != type || letterCandidates[i].duplicate != -1){ 346 | continue; 347 | } 348 | double size = (letterCandidates[i].bbox.height); 349 | if( letterCandidates[i].isWord ) 350 | size = MIN(letterCandidates[i].bbox.height, letterCandidates[i].bbox.width); 351 | double hr = MIN(size, letterHeight) / MAX(size, letterHeight); 352 | if( hr < 0.5){ 353 | continue; 354 | } 355 | 356 | cv::Point center = letterCandidates[i].getConvexCentroid(); 357 | acc.addRegion(center, i, letterCandidates); 358 | } 359 | 360 | 361 | std::vector initialLines; 362 | acc.min_value = 6; 363 | acc.findMaxima(initialLines, letterCandidates); 364 | 365 | #ifdef VERBOSE 366 | std::cout << "Hough maxima in " << (cv::getTickCount() - t_g) / (cv::getTickFrequency()) * 1000 << ", " << initialLines.size() << "\n"; 367 | sort(initialLines.begin(), initialLines.end(), 368 | [&](const cv::Vec4d & a, const cv::Vec4d & b) -> bool { 369 | return a.val[2] > b.val[2]; 370 | }); 371 | #endif 372 | 373 | for (size_t i = 0; i < initialLines.size(); i++) 374 | { 375 | if( acc.regionsMap[initialLines[i].val[3]].size() < 1) 376 | continue; 377 | double rho = initialLines[i][0], theta = initialLines[i][1]; 378 | #ifdef VERBOSE 379 | 380 | 381 | 382 | cv::Mat cdst = originalImage.clone(); 383 | cv::cvtColor(cdst, cdst, cv::COLOR_GRAY2BGR); 384 | drawLine(cdst, rho, theta, cv::Scalar(128, 128, 128)); 385 | #endif 386 | lineGroups.push_back(LineGroup( initialLines[i].val[2], rho, theta, 1 )); 387 | lineGroups.back().regionIds = acc.regionsMap[initialLines[i].val[3]]; 388 | 389 | #ifdef VERBOSE 390 | for( auto rid : lineGroups.back().regionIds){ 391 | assert(rid < (int) letterCandidates.size()); 392 | if( letterCandidates[rid].isWord ){ 393 | cv::rectangle(cdst, letterCandidates[rid].bbox, cv::Scalar(0, 255, 0)); 394 | } 395 | else 396 | cv::rectangle(cdst, letterCandidates[rid].bbox, cv::Scalar(0, 0, 255)); 397 | } 398 | std::cout << "Group: " << initialLines[i].val[0] << '/' << initialLines[i].val[1] * 180 / M_PI << "/" << initialLines[i].val[2] << "/" << lineGroups.back().regionIds.size() << std::endl; 399 | cv::imshow("ts", cdst); 400 | cv::waitKey(0); 401 | #endif 402 | } 403 | //std::cout << "Hough To groups " << (cv::getTickCount() - t_g) / (cv::getTickFrequency()) * 1000 << "\n"; 404 | //std::cout << "Tuples groups " << (cv::getTickCount() - t_g) / (cv::getTickFrequency()) * 1000 << "\n"; 405 | } 406 | 407 | } /* namespace cmp */ 408 | 409 | -------------------------------------------------------------------------------- /src/HoughTLDetector.h: -------------------------------------------------------------------------------- 1 | /* 2 | * HoughTLDetector.h 3 | * 4 | * Created on: Jun 17, 2015 5 | * Author: Michal Busta 6 | */ 7 | #ifndef HOUGHTLDETECTOR_H_ 8 | #define HOUGHTLDETECTOR_H_ 9 | 10 | #include 11 | 12 | #include "segm/segmentation.h" 13 | 14 | namespace cmp 15 | { 16 | 17 | typedef cv::Vec Vec9i; 18 | 19 | struct LineGroup{ 20 | 21 | LineGroup(double score, double rho, double theta, double scale) : score(score), rho(rho), theta(theta), scale(scale){ 22 | 23 | isVertical = !(theta * 180 / M_PI > 45 && theta * 180 / M_PI < 135); 24 | density = 0; 25 | } 26 | 27 | LineGroup(std::vector& pointsTop, std::vector pointsBottom, float density) : pointsTop(pointsTop), pointsBottom(pointsBottom), density(density), score(0), rho(0), theta(0), scale(0){ 28 | 29 | } 30 | 31 | float density; 32 | 33 | std::set groupIds; 34 | std::set regionIds; 35 | 36 | std::vector pointsTop; 37 | std::vector pointsBottom; 38 | 39 | double score; 40 | bool processed = false; 41 | double rho = 0; 42 | double theta = 0; 43 | double scale; 44 | /** the keypoints types */ 45 | int type = 0; 46 | bool conflict = false; 47 | 48 | cv::Rect bbox; 49 | 50 | void sortIds(std::vector& letterCandidates); 51 | 52 | bool isVertical = false; 53 | }; 54 | 55 | 56 | /** 57 | * @class cmp::HoughTLDetector 58 | * 59 | * @brief TODO brief description 60 | * 61 | * TODO type description 62 | */ 63 | class HoughTLDetector 64 | { 65 | public: 66 | 67 | void findTextLines(std::vector& letterCandidates, const cv::Mat& originalImage, double letterHeight, std::vector& lineGroups, int type); 68 | 69 | }; 70 | 71 | } /* namespace cmp */ 72 | 73 | #endif /* HOUGHTLDETECTOR_H_ */ 74 | -------------------------------------------------------------------------------- /src/IOUtils.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | #include 5 | #include 6 | 7 | 8 | #ifdef _WIN32 9 | //# include "Shellapi.h" 10 | #include 11 | # include 12 | # include 13 | # define mkdir(a) _mkdir(a) 14 | # define GetCurrentDir _getcwd 15 | #else 16 | #if !defined(ANDROID) 17 | # include 18 | #else 19 | # include 20 | #endif 21 | # include 22 | # include 23 | //# include 24 | # include 25 | # include 26 | # include 27 | # define GetCurrentDir getcwd 28 | #endif 29 | 30 | #ifndef S_ISDIR 31 | #define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) 32 | #endif 33 | 34 | #ifndef S_ISREG 35 | #define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) 36 | #endif 37 | 38 | #ifndef MAX_PATH 39 | # define MAX_PATH 256 40 | #endif 41 | 42 | #include 43 | #include 44 | #include "IOUtils.h" 45 | using namespace cv; 46 | using namespace std; 47 | 48 | namespace cmp 49 | { 50 | 51 | IOUtils::IOUtils(void) 52 | { 53 | } 54 | 55 | IOUtils::~IOUtils(void) 56 | { 57 | } 58 | 59 | 60 | void IOUtils::ShowImageInWindow(Mat img, int flags, const char* windowName) 61 | { 62 | namedWindow(windowName, flags); 63 | imshow(windowName, img); 64 | waitKey(); 65 | cv::destroyWindow(windowName); 66 | } 67 | 68 | string IOUtils::SaveTempImage(Mat img, string fileName, const bool forceWrite) 69 | { 70 | #ifdef _DEBUG 71 | const bool debug = true; 72 | #else 73 | const bool debug = false; 74 | #endif 75 | if(forceWrite || debug) 76 | { 77 | #ifdef _WIN32 78 | string tempPath = "C:\\Temp\\TextSpotter\\imageOutput\\" + fileName + ".png"; 79 | #else 80 | string tempPath = "/tmp/" + fileName + ".png"; 81 | #endif 82 | imwrite(tempPath, img); 83 | return tempPath; 84 | } 85 | return ""; 86 | } 87 | 88 | 89 | /** 90 | * 91 | * @param directory 92 | * @param searchPattern 93 | * @param returnFullPath if true, full file path is returned 94 | * @return files in directory according to search pattern 95 | */ 96 | vector IOUtils::GetFilesInDirectory(const string& directory, const string& searchPattern, bool returnFullPath) 97 | { 98 | string fullSearch = CombinePath( directory, searchPattern ); 99 | #if defined(_WIN32) 100 | vector files; 101 | 102 | WIN32_FIND_DATA ffd; 103 | 104 | 105 | HANDLE hFind = FindFirstFile(fullSearch.c_str(), &ffd); 106 | 107 | if (INVALID_HANDLE_VALUE == hFind) 108 | return files; 109 | 110 | 111 | 112 | do 113 | { 114 | if (!(ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) 115 | { 116 | string fileName (ffd.cFileName); 117 | if( returnFullPath ) 118 | files.push_back(CombinePath(directory, fileName)); 119 | else 120 | files.push_back(fileName); 121 | } 122 | } 123 | while (FindNextFile(hFind, &ffd) != 0); 124 | 125 | FindClose(hFind); 126 | 127 | return files; 128 | #elif not defined(ANDROID) 129 | vector files; 130 | 131 | glob_t p; 132 | glob(fullSearch.c_str(), GLOB_TILDE, NULL, &p); 133 | for (size_t i=0; i files; 145 | DIR *dir; 146 | struct dirent *drnt; 147 | dir = opendir(directory.c_str()); 148 | while ((drnt = readdir(dir)) != NULL) 149 | { 150 | string name(drnt->d_name); 151 | unsigned char type = drnt->d_type; 152 | if (name != directory && name.length() >= 4) 153 | { 154 | if (type == DT_DIR) { 155 | continue; 156 | } 157 | else if (name.find(".png") == (name.length() - 4)) { 158 | files.push_back( directory + "/" + name ); 159 | } 160 | else if (name.find(".jpg") == (name.length() - 4)) { 161 | files.push_back( directory + "/" + name ); 162 | } 163 | } 164 | } 165 | return files; 166 | #endif 167 | 168 | } 169 | 170 | 171 | vector IOUtils::GetDirectoriesInDirectory(const string& directory, const string& searchPattern, bool returnFullPath) 172 | { 173 | string fullSearch = CombinePath( directory, searchPattern ); 174 | 175 | #if defined(_WIN32) 176 | vector directories; 177 | 178 | WIN32_FIND_DATA ffd; 179 | 180 | 181 | HANDLE hFind = FindFirstFile(fullSearch.c_str(), &ffd); 182 | 183 | if (INVALID_HANDLE_VALUE == hFind) 184 | return directories; 185 | 186 | 187 | 188 | do 189 | { 190 | if ((ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) 191 | { 192 | string fileName (ffd.cFileName); 193 | 194 | if (fileName != "." && fileName != "..") 195 | { 196 | if( returnFullPath ) 197 | directories.push_back(CombinePath(directory, fileName)); 198 | else 199 | directories.push_back(fileName); 200 | 201 | } 202 | } 203 | } 204 | while (FindNextFile(hFind, &ffd) != 0); 205 | 206 | FindClose(hFind); 207 | 208 | return directories; 209 | #elif !defined(ANDROID) 210 | 211 | vector files; 212 | 213 | glob_t p; 214 | glob(fullSearch.c_str(), GLOB_TILDE, NULL, &p); 215 | for (size_t i=0; i files; 226 | DIR *dir; 227 | struct dirent *drnt; 228 | dir = opendir(directory.c_str()); 229 | while ((drnt = readdir(dir)) != NULL) 230 | { 231 | string name(drnt->d_name); 232 | unsigned char type = drnt->d_type; 233 | if (name != directory && name.length() >= 4) 234 | { 235 | if (type == DT_DIR) { 236 | continue; 237 | } 238 | else if (name.find(".png") == (name.length() - 4)) { 239 | files.push_back( directory + "/" + name ); 240 | } 241 | else if (name.find(".jpg") == (name.length() - 4)) { 242 | files.push_back( directory + "/" + name ); 243 | } 244 | } 245 | } 246 | return files; 247 | #endif 248 | } 249 | 250 | bool IOUtils::IsDirectory(const string& path) 251 | { 252 | bool test = false; 253 | struct stat stats; 254 | if (!stat(path.c_str(), &stats)) { 255 | if (S_ISDIR(stats.st_mode)) { 256 | test = true; 257 | } 258 | } 259 | return test; 260 | } 261 | 262 | /** 263 | * @param path 264 | * @return true if path exits on file-system 265 | */ 266 | bool IOUtils::PathExist(const string& path) 267 | { 268 | #ifdef _WIN32 269 | return ::GetFileAttributes(path.c_str()) != INVALID_FILE_ATTRIBUTES; 270 | #else 271 | struct stat st; 272 | if(stat(path.c_str(),&st) == 0) 273 | return true; 274 | 275 | return false; 276 | #endif 277 | } 278 | 279 | std::string IOUtils::CombinePath(std::string directory, std::string file) 280 | { 281 | 282 | string result = directory; 283 | #ifdef _WIN32 284 | if (result[result.size() -1] != '\\') 285 | result += '\\'; 286 | #else 287 | if (result[result.size() -1] != '/') 288 | result += '/'; 289 | #endif 290 | 291 | result += file; 292 | 293 | return result; 294 | } 295 | 296 | string IOUtils::Basename(string path) 297 | { 298 | #ifdef _WIN32 299 | string reversed; 300 | for(string::reverse_iterator c=path.rbegin(); c!=path.rend(); c++) 301 | { 302 | if(*c != '\\' && *c!='/' ) 303 | { 304 | reversed.push_back(*c); 305 | } 306 | else break; 307 | } 308 | std::reverse(reversed.begin(), reversed.end()); 309 | return reversed; 310 | #else 311 | char *str = new char[path.size()+1]; 312 | path.copy(str, path.size()); 313 | str[path.size()] = '\0'; 314 | string r=basename(str); 315 | delete[] str; 316 | return r; 317 | #endif 318 | } 319 | 320 | string IOUtils::RemoveExtension(string str) 321 | { 322 | return str.substr(0,str.find_last_of(".")); 323 | } 324 | 325 | string IOUtils::Dirname(string path) 326 | { 327 | #ifdef _WIN32 328 | cerr << "FIXME: Utils::dirname not implemented on WIN32." << endl; 329 | return ""; 330 | #else 331 | char *str = new char[path.size()+1]; 332 | path.copy(str, path.size()); 333 | str[path.size()] = '\0'; 334 | string r=dirname(str); 335 | delete[] str; 336 | return r; 337 | #endif 338 | } 339 | 340 | 341 | 342 | bool IOUtils::DeleteFile(const char* fileName) 343 | { 344 | #ifdef _WIN32 345 | return ::DeleteFile(fileName); 346 | #else 347 | return (unlink(fileName) == 0); 348 | #endif 349 | } 350 | 351 | /** 352 | * Creates new directory 353 | * 354 | * No sanity checks! 355 | * @param dirName 356 | */ 357 | void IOUtils::CreateDir(const std::string& dirName) 358 | { 359 | #if defined(ANDROID) 360 | cvError(CV_StsError, "Utils::CreateDirectory", "Not implemented!", __FILE__, __LINE__); 361 | #else 362 | //TODO check results 363 | mkdir(dirName.c_str(), ALLPERMS); 364 | #endif 365 | } 366 | 367 | string IOUtils::GetCurrentDirectory() 368 | { 369 | char cCurrentPath[FILENAME_MAX]; 370 | if (!GetCurrentDir(cCurrentPath, sizeof(cCurrentPath) / sizeof(char))) 371 | { 372 | cv::error(cv::Exception(CV_StsError, "Utils::GetCurrentDirectory", "Unknown error!", __FILE__, __LINE__)); 373 | } 374 | 375 | string ret = cCurrentPath; 376 | return ret; 377 | } 378 | 379 | 380 | string IOUtils::GetFileNameWithoutExtension(string filePath) 381 | { 382 | int pos1 = filePath.find_last_of('\\'); 383 | int pos2 = filePath.find_last_of('/'); 384 | int pos = max(pos1, pos2); 385 | string fileNameWithoutExtension = filePath.substr(pos+1); 386 | fileNameWithoutExtension = fileNameWithoutExtension.substr(0, fileNameWithoutExtension.find_last_of('.')); 387 | 388 | return fileNameWithoutExtension; 389 | } 390 | 391 | 392 | int IOUtils::StartProcess(string executable, string commandLine) 393 | { 394 | #ifdef _WIN32 395 | 396 | 397 | 398 | PROCESS_INFORMATION processInformation = {0}; 399 | STARTUPINFO startupInfo = {0}; 400 | 401 | startupInfo.cb = sizeof(STARTUPINFO); 402 | 403 | string cmd = executable + " " + commandLine; 404 | CHAR szCommandLine[MAX_PATH]; 405 | memset(szCommandLine, 0, MAX_PATH); 406 | strcpy(szCommandLine, cmd.c_str()); 407 | 408 | // Create the process 409 | BOOL result = CreateProcess(NULL, szCommandLine, 410 | NULL, NULL, TRUE, 411 | NORMAL_PRIORITY_CLASS, 412 | NULL, NULL, &startupInfo, &processInformation); 413 | 414 | 415 | 416 | if (!result) 417 | return -1; 418 | else 419 | return 0; 420 | 421 | #else 422 | string cmd = executable + " " + commandLine; 423 | 424 | int ret = system(cmd.c_str()); 425 | if (WIFSIGNALED(ret) && 426 | (WTERMSIG(ret) == SIGINT || WTERMSIG(ret) == SIGQUIT)) 427 | return -1; 428 | return 0; 429 | #endif 430 | 431 | } 432 | 433 | int IOUtils::StartProcessAndWait(string executable, string commandLine, string stdOutputFile) 434 | { 435 | std::cout << "Running command: " << executable << " with parameters: " << commandLine << std::endl; 436 | #ifdef _WIN32 437 | 438 | 439 | PROCESS_INFORMATION processInformation = {0}; 440 | STARTUPINFO startupInfo = {0}; 441 | 442 | startupInfo.cb = sizeof(STARTUPINFO); 443 | 444 | HANDLE hOutputFile = INVALID_HANDLE_VALUE; 445 | if (!stdOutputFile.empty()) 446 | { 447 | SECURITY_ATTRIBUTES sec; 448 | sec.nLength = sizeof(SECURITY_ATTRIBUTES); 449 | sec.lpSecurityDescriptor = NULL; 450 | sec.bInheritHandle = TRUE; 451 | 452 | hOutputFile = CreateFile ( stdOutputFile.c_str(), 453 | GENERIC_WRITE, 454 | FILE_SHARE_READ | FILE_SHARE_WRITE, 455 | &sec, 456 | CREATE_ALWAYS, 457 | FILE_ATTRIBUTE_NORMAL, 458 | NULL); 459 | 460 | if (hOutputFile != INVALID_HANDLE_VALUE) 461 | { 462 | startupInfo.dwFlags = STARTF_USESHOWWINDOW | STARTF_USESTDHANDLES; 463 | startupInfo.wShowWindow = SW_HIDE; 464 | startupInfo.hStdOutput = hOutputFile; 465 | 466 | 467 | } 468 | } 469 | 470 | 471 | 472 | 473 | string cmd = executable + " " + commandLine; 474 | CHAR szCommandLine[MAX_PATH]; 475 | memset(szCommandLine, 0, MAX_PATH); 476 | strcpy(szCommandLine, cmd.c_str()); 477 | 478 | // Create the process 479 | BOOL result = CreateProcess(NULL, szCommandLine, 480 | NULL, NULL, TRUE, 481 | NORMAL_PRIORITY_CLASS, 482 | GetEnvironmentStrings(), NULL, &startupInfo, &processInformation); 483 | 484 | 485 | 486 | if (!result) 487 | return -1; 488 | 489 | // Successfully created the process. Wait for it to finish. 490 | WaitForSingleObject( processInformation.hProcess, INFINITE ); 491 | 492 | // Get the exit code. 493 | DWORD exitCode; 494 | result = GetExitCodeProcess(processInformation.hProcess, &exitCode); 495 | 496 | // Close the handles. 497 | CloseHandle( processInformation.hProcess ); 498 | CloseHandle( processInformation.hThread ); 499 | 500 | if (hOutputFile != INVALID_HANDLE_VALUE) 501 | CloseHandle(hOutputFile); 502 | 503 | if (!result) 504 | { 505 | // Could not get exit code. 506 | return -2; 507 | } 508 | 509 | return (int)exitCode; 510 | 511 | 512 | #else 513 | string cmd = executable + " " + commandLine; 514 | if (!stdOutputFile.empty()) { 515 | cmd += " > " + stdOutputFile; 516 | } 517 | int ret = system(cmd.c_str()); 518 | if (WIFSIGNALED(ret) && 519 | (WTERMSIG(ret) == SIGINT || WTERMSIG(ret) == SIGQUIT)) 520 | return -1; 521 | return 0; 522 | #endif 523 | 524 | } 525 | 526 | std::string IOUtils::RemoveBasepath(string pathstr, int level) 527 | { 528 | #ifdef _WIN32 529 | char separator='\\'; 530 | #else 531 | char separator='/'; 532 | #endif 533 | int pos=0; 534 | for(string::iterator c=pathstr.begin(); c!=pathstr.end(); ++c) 535 | { 536 | if (level==0) break; 537 | 538 | if(*c==separator) 539 | { 540 | level--; 541 | } 542 | pos++; 543 | } 544 | return pathstr.substr(pos); 545 | } 546 | 547 | std::string IOUtils::GetTempPath(void) 548 | { 549 | #ifdef _WIN32 550 | 551 | TCHAR lpTempPathBuffer[MAX_PATH]; 552 | ::GetTempPath(MAX_PATH, lpTempPathBuffer); 553 | 554 | return lpTempPathBuffer; 555 | #else 556 | return (""); //TODO: Linux version 557 | #endif 558 | } 559 | 560 | void IOUtils::CpFile(const std::string& source, const std::string& dst) 561 | { 562 | std::ifstream src( source.c_str(), ios::binary ); 563 | ofstream dest( dst.c_str(), ios::binary); 564 | 565 | dest << src.rdbuf(); 566 | 567 | src.close(); 568 | dest.close(); 569 | } 570 | 571 | }//namespace cmp 572 | 573 | -------------------------------------------------------------------------------- /src/IOUtils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | namespace cmp 9 | { 10 | /** 11 | 12 | @brief Input/Output utility methods. 13 | 14 | @author Lukas Neumann 15 | @date 3.9.2012 16 | 17 | */ 18 | class IOUtils 19 | { 20 | private: 21 | IOUtils(void); 22 | ~IOUtils(void); 23 | 24 | public: 25 | 26 | static std::vector GetFilesInDirectory( const std::string& directory, const std::string& searchPattern, bool returnFullPath = false ); 27 | static std::vector GetDirectoriesInDirectory(const std::string& directory, const std::string& searchPattern, bool returnFullPath = false); 28 | 29 | static std::string GetFileNameWithoutExtension(std::string filePath); 30 | static std::string RemoveBasepath(std::string str, int level=1); 31 | static std::string CombinePath(std::string directory, std::string file); 32 | static std::string Basename(std::string path); 33 | static std::string Dirname(std::string path); 34 | static std::string RemoveExtension(std::string str); 35 | 36 | static std::string GetTempPath(); 37 | 38 | static bool DeleteFile(const char* fileName); 39 | static void CreateDir(const std::string& dirName); 40 | 41 | static std::string GetCurrentDirectory(); 42 | static bool IsDirectory(const std::string& path); 43 | static bool PathExist(const std::string& path); 44 | 45 | static int StartProcessAndWait(std::string executable, std::string commandLine, std::string stdOutputFile); 46 | static int StartProcess(std::string executable, std::string commandLine); 47 | 48 | static void ShowImageInWindow(cv::Mat img, int flags = 1, const char* windowName = "Image"); 49 | static std::string SaveTempImage(cv::Mat img, std::string windowName, const bool forceWrite=false); 50 | 51 | static void CpFile( const std::string& source, const std::string& dst ); 52 | }; 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/KeyPoints.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * KeyPoints.cpp 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * Copyright (c) 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * Based on: 20 | * 21 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 22 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 23 | */ 24 | #include "KeyPoints.h" 25 | 26 | namespace cmp 27 | { 28 | 29 | KeyPointsFilterC::KeyPointsFilterC() 30 | { 31 | // TODO Auto-generated constructor stub 32 | 33 | } 34 | 35 | KeyPointsFilterC::~KeyPointsFilterC() 36 | { 37 | // TODO Auto-generated destructor stub 38 | } 39 | 40 | 41 | struct KeypointResponseGreaterThanThreshold 42 | { 43 | KeypointResponseGreaterThanThreshold(float _value) : 44 | value(_value) 45 | { 46 | } 47 | inline bool operator()(const FastKeyPoint& kpt) const 48 | { 49 | return kpt.response >= value; 50 | } 51 | float value; 52 | }; 53 | 54 | struct KeypointResponseGreater 55 | { 56 | inline bool operator()(const FastKeyPoint& kp1, const FastKeyPoint& kp2) const 57 | { 58 | return kp1.response > kp2.response; 59 | } 60 | }; 61 | 62 | // takes keypoints and culls them by the response 63 | void KeyPointsFilterC::retainBest(std::vector& keypoints, std::unordered_multimap >& keypointPixels, int n_points) 64 | { 65 | //this is only necessary if the keypoints size is greater than the number of desired points. 66 | if( n_points >= 0 && keypoints.size() > (size_t)n_points ) 67 | { 68 | if (n_points==0) 69 | { 70 | keypoints.clear(); 71 | return; 72 | } 73 | if(keypointPixels.size() == 0) 74 | { 75 | std::sort(keypoints.begin(), keypoints.end(), KeypointResponseGreater()); 76 | 77 | //this is the boundary response, and in the case of FAST may be ambigous 78 | float ambiguous_response = keypoints[n_points - 1].response; 79 | //use std::partition to grab all of the keypoints with the boundary response. 80 | std::vector::iterator new_end = 81 | std::partition(keypoints.begin() + n_points - 1, keypoints.end(), 82 | KeypointResponseGreaterThanThreshold(ambiguous_response)); 83 | //resize the keypoints, given this new end point. nth_element and partition reordered the points inplace 84 | keypoints.erase(new_end, keypoints.end()); 85 | }else{ 86 | std::pair >::iterator, std::unordered_multimap>::iterator> ret, ret2; 87 | 88 | sort(keypoints.begin(), keypoints.end(), 89 | [&](const FastKeyPoint & a, const FastKeyPoint & b) 90 | { 91 | ret = keypointPixels.equal_range(a.class_id); 92 | int dist1 = abs(20 - std::distance(ret.first, ret.second)); 93 | ret2 = keypointPixels.equal_range(b.class_id); 94 | int dist2 = abs(20 - std::distance(ret2.first, ret2.second)); 95 | return dist1 < dist2; 96 | }); 97 | keypoints.resize( n_points ); 98 | } 99 | } 100 | } 101 | 102 | struct RoiPredicate 103 | { 104 | RoiPredicate( const cv::Rect& _r ) : r(_r) 105 | {} 106 | 107 | bool operator()( const FastKeyPoint& keyPt ) const 108 | { 109 | return !r.contains( keyPt.pt ); 110 | } 111 | 112 | cv::Rect r; 113 | }; 114 | 115 | void KeyPointsFilterC::runByImageBorder( std::vector& keypoints, cv::Size imageSize, int borderSize ) 116 | { 117 | if( borderSize > 0) 118 | { 119 | if (imageSize.height <= borderSize * 2 || imageSize.width <= borderSize * 2) 120 | keypoints.clear(); 121 | else 122 | keypoints.erase( std::remove_if(keypoints.begin(), keypoints.end(), 123 | RoiPredicate(cv::Rect(cv::Point(borderSize, borderSize), 124 | cv::Point(imageSize.width - borderSize, imageSize.height - borderSize)))), 125 | keypoints.end() ); 126 | } 127 | } 128 | 129 | class MaskPredicate 130 | { 131 | public: 132 | MaskPredicate( const cv::Mat& _mask ) : mask(_mask) {} 133 | bool operator() (const FastKeyPoint& key_pt) const 134 | { 135 | return mask.at( (int)(key_pt.pt.y + 0.5f), (int)(key_pt.pt.x + 0.5f) ) == 0; 136 | } 137 | 138 | private: 139 | const cv::Mat mask; 140 | MaskPredicate& operator=(const MaskPredicate&); 141 | }; 142 | 143 | void KeyPointsFilterC::runByPixelsMask( std::vector& keypoints, const cv::Mat& mask ) 144 | { 145 | if( mask.empty() ) 146 | return; 147 | 148 | keypoints.erase(std::remove_if(keypoints.begin(), keypoints.end(), MaskPredicate(mask)), keypoints.end()); 149 | } 150 | 151 | } /* namespace cmp */ 152 | 153 | -------------------------------------------------------------------------------- /src/KeyPoints.h: -------------------------------------------------------------------------------- 1 | /* 2 | * KeyPoints.h 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * Copyright (c) 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * Based on: 20 | * 21 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 22 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 23 | */ 24 | #ifndef KEYPOINTSFILTERC_H_ 25 | #define KEYPOINTSFILTERC_H_ 26 | 27 | #include 28 | #include 29 | 30 | namespace cmp 31 | { 32 | 33 | class CV_EXPORTS_W_SIMPLE FastKeyPoint : public cv::KeyPoint 34 | { 35 | public: 36 | 37 | CV_WRAP FastKeyPoint() : cv::KeyPoint(), count(0), isMerged(false) {} 38 | //! the full constructor 39 | CV_WRAP FastKeyPoint(cv::Point2f _pt, float _size, float _angle=-1, 40 | float _response=0, int _octave=0, int _class_id=-1, uchar count = 0, bool isMerged = false) : cv::KeyPoint(_pt, _size, _angle, _response, _octave, _class_id), count(count), isMerged(isMerged) {} 41 | 42 | CV_WRAP FastKeyPoint(float x, float y, float _size, float _angle=-1, 43 | float _response=0, int _octave=0, int _class_id=-1, uchar count = 0, bool isMerged = false): cv::KeyPoint(x, y, _size, _angle, _response, _octave, _class_id), count(count), isMerged(isMerged) {} 44 | 45 | cv::Point2f intensityIn; 46 | 47 | cv::Point2f intensityOut; 48 | 49 | uchar count; 50 | 51 | bool isMerged; 52 | 53 | uchar type = 0; 54 | 55 | uchar channel = 0; 56 | 57 | uchar maxima = 0; 58 | }; 59 | 60 | /** 61 | * @class cmp::KeyPointsFilterC 62 | * 63 | * @brief TODO brief description 64 | * 65 | * TODO type description 66 | */ 67 | class KeyPointsFilterC 68 | { 69 | public: 70 | KeyPointsFilterC(); 71 | virtual ~KeyPointsFilterC(); 72 | 73 | static void retainBest(std::vector& keypoints, std::unordered_multimap >& keypointPixels, int n_points); 74 | 75 | static void runByImageBorder( std::vector& keypoints, cv::Size imageSize, int borderSize ); 76 | 77 | static void runByPixelsMask( std::vector& keypoints, const cv::Mat& mask ); 78 | }; 79 | 80 | } /* namespace cmp */ 81 | 82 | #endif /* KEYPOINTSFILTERC_H_ */ 83 | -------------------------------------------------------------------------------- /src/Python/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/src/Python) 3 | 4 | find_package(PythonLibs 2.7 REQUIRED) 5 | 6 | find_package(NumPy REQUIRED) 7 | 8 | include_directories(${PYTHON_INCLUDE_DIRS}) 9 | include_directories("/usr/include/python2.7") 10 | 11 | include_directories(${PYTHON_INCLUDE_DIRS}) 12 | include_directories(${NUMPY_INCLUDE_DIRS}) 13 | 14 | include_directories(${PROJECT_SOURCE_DIR}/src) 15 | 16 | add_library(fasttext_py 17 | "pyFastTextG.cpp" 18 | ) 19 | 20 | add_library(ftext SHARED 21 | "pyFastTextAPIG.c" 22 | ) 23 | 24 | target_link_libraries(ftext fasttext_py FTreader ${OpenCV_LIBS} ${PYTHON_LIBRARIES}) 25 | 26 | set_target_properties( ftext 27 | PROPERTIES 28 | ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" 29 | LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" 30 | RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" 31 | PREFIX "" 32 | ) 33 | -------------------------------------------------------------------------------- /src/Python/FindNumPy.cmake: -------------------------------------------------------------------------------- 1 | # - Find the NumPy libraries 2 | # This module finds if NumPy is installed, and sets the following variables 3 | # indicating where it is. 4 | # 5 | # TODO: Update to provide the libraries and paths for linking npymath lib. 6 | # 7 | # NUMPY_FOUND - was NumPy found 8 | # NUMPY_VERSION - the version of NumPy found as a string 9 | # NUMPY_VERSION_MAJOR - the major version number of NumPy 10 | # NUMPY_VERSION_MINOR - the minor version number of NumPy 11 | # NUMPY_VERSION_PATCH - the patch version number of NumPy 12 | # NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601 13 | # NUMPY_INCLUDE_DIRS - path to the NumPy include files 14 | 15 | #============================================================================ 16 | # Copyright 2012 Continuum Analytics, Inc. 17 | # 18 | # MIT License 19 | # 20 | # Permission is hereby granted, free of charge, to any person obtaining 21 | # a copy of this software and associated documentation files 22 | # (the "Software"), to deal in the Software without restriction, including 23 | # without limitation the rights to use, copy, modify, merge, publish, 24 | # distribute, sublicense, and/or sell copies of the Software, and to permit 25 | # persons to whom the Software is furnished to do so, subject to 26 | # the following conditions: 27 | # 28 | # The above copyright notice and this permission notice shall be included 29 | # in all copies or substantial portions of the Software. 30 | # 31 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 32 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 33 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 34 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 35 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 36 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 37 | # OTHER DEALINGS IN THE SOFTWARE. 38 | # 39 | #============================================================================ 40 | 41 | # Finding NumPy involves calling the Python interpreter 42 | if(NumPy_FIND_REQUIRED) 43 | find_package(PythonInterp REQUIRED) 44 | else() 45 | find_package(PythonInterp) 46 | endif() 47 | 48 | if(PYTHONINTERP_FOUND) 49 | execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" 50 | "import numpy as n; print(n.__version__); print(n.get_include());" 51 | RESULT_VARIABLE _NUMPY_SEARCH_SUCCESS 52 | OUTPUT_VARIABLE _NUMPY_VALUES 53 | ERROR_VARIABLE _NUMPY_ERROR_VALUE 54 | OUTPUT_STRIP_TRAILING_WHITESPACE) 55 | 56 | if(_NUMPY_SEARCH_SUCCESS MATCHES 0) 57 | set(NUMPY_FOUND TRUE) 58 | 59 | # Convert the process output into a list 60 | string(REGEX REPLACE ";" "\\\\;" _NUMPY_VALUES ${_NUMPY_VALUES}) 61 | string(REGEX REPLACE "\n" ";" _NUMPY_VALUES ${_NUMPY_VALUES}) 62 | list(GET _NUMPY_VALUES 0 NUMPY_VERSION) 63 | list(GET _NUMPY_VALUES 1 NUMPY_INCLUDE_DIRS) 64 | 65 | # Make sure all directory separators are '/' 66 | string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIRS ${NUMPY_INCLUDE_DIRS}) 67 | 68 | # Get the major and minor version numbers 69 | string(REGEX REPLACE "\\." ";" _NUMPY_VERSION_LIST ${NUMPY_VERSION}) 70 | list(GET _NUMPY_VERSION_LIST 0 NUMPY_VERSION_MAJOR) 71 | list(GET _NUMPY_VERSION_LIST 1 NUMPY_VERSION_MINOR) 72 | list(GET _NUMPY_VERSION_LIST 2 NUMPY_VERSION_PATCH) 73 | math(EXPR NUMPY_VERSION_DECIMAL 74 | "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}") 75 | 76 | find_package_message(NUMPY 77 | "Found NumPy: version \"${NUMPY_VERSION}\" ${NUMPY_INCLUDE_DIRS}" 78 | "${NUMPY_INCLUDE_DIRS}${NUMPY_VERSION}") 79 | else() 80 | if(NumPy_FIND_REQUIRED) 81 | message(FATAL_ERROR 82 | "NumPy import failure:\n${_NUMPY_ERROR_VALUE}") 83 | endif() 84 | set(NUMPY_FOUND FALSE) 85 | endif() 86 | else() 87 | set(NUMPY_FOUND FALSE) 88 | endif() 89 | -------------------------------------------------------------------------------- /src/Python/pyFastTextAPIG.c: -------------------------------------------------------------------------------- 1 | /* 2 | * pyFastTextAPI.c 3 | * 4 | * Created on: Oct 17, 2014 5 | * Author: Michal Busta 6 | * 7 | * Copyright 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * Based on: 10 | * 11 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 12 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 13 | */ 14 | 15 | #include "pyFastTextG.h" 16 | 17 | static PyObject *FastTextError; 18 | 19 | 20 | 21 | static PyObject* findKeyPoints_cfunc (PyObject *dummy, PyObject *args) 22 | { 23 | PyObject *arg1=NULL; 24 | PyArrayObject *out=NULL; 25 | PyArrayObject *arr1=NULL; 26 | PyArrayObject* img = NULL; 27 | npy_intp* img_dims = NULL; 28 | 29 | float scaleFactor = 2.0f; 30 | int nlevels = 3; 31 | int edgeThreshold = 12; 32 | int keypointTypes = 2; 33 | int kMin = 9; 34 | int kMax = 16; 35 | 36 | if (!PyArg_ParseTuple(args, "O|fiiiii", &arg1, &scaleFactor, &nlevels, &edgeThreshold, &keypointTypes, &kMin, &kMax)) 37 | return NULL; 38 | 39 | img = (PyArrayObject *) arg1; 40 | img_dims = PyArray_DIMS(img); 41 | int numOfDim = PyArray_NDIM(img); 42 | 43 | out = find_keypoints(img, numOfDim, img_dims, scaleFactor, nlevels, edgeThreshold, keypointTypes, kMin, kMax); 44 | 45 | return (PyObject *) out; 46 | } 47 | 48 | static PyObject* initialize_cfunc (PyObject *dummy, PyObject *args) 49 | { 50 | float scaleFactor = 2.0f; 51 | int nlevels = 3; 52 | int edgeThreshold = 12; 53 | int keypointTypes = 2; 54 | int kMin = 9; 55 | int kMax = 11; 56 | int segmenterType = 0; 57 | const char * charClsFile; 58 | const char * outputDir; 59 | int erode = 1; 60 | int segmentGrad = 0; 61 | int minCompSize = 0; 62 | float thresholdFactor = 1.0; 63 | float minTupleTopBottomAngle = 0; 64 | int segmDeltaInt = 0; 65 | int instance = -1; 66 | float maxSpaceHeightRatio = -1; 67 | int createKeypointSegmenter = 0; 68 | if (!PyArg_ParseTuple(args, "|fiiiiisiiiifi", &scaleFactor, &nlevels, &edgeThreshold, &keypointTypes, &kMin, &kMax, &charClsFile, 69 | &erode, &segmentGrad, &minCompSize, &instance, &thresholdFactor, &segmDeltaInt)) 70 | return NULL; 71 | 72 | instance = initialize(scaleFactor, nlevels, edgeThreshold, keypointTypes, kMin, kMax, charClsFile, erode, segmentGrad, minCompSize, instance, thresholdFactor, segmDeltaInt); 73 | 74 | return Py_BuildValue("i", instance); 75 | } 76 | 77 | static PyObject* get_char_segmentations_cfunc (PyObject *dummy, PyObject *args) 78 | { 79 | PyObject *arg1=NULL; 80 | PyArrayObject *out=NULL; 81 | PyArrayObject *arr1=NULL; 82 | PyArrayObject* img = NULL; 83 | npy_intp* img_dims = NULL; 84 | 85 | const char * imageName; 86 | const char * outputDir = NULL; 87 | int instance = 0; 88 | int minHeight = 0; 89 | if (!PyArg_ParseTuple(args, "O|ssii", &arg1, &outputDir, &imageName, &instance, &minHeight )) 90 | return NULL; 91 | 92 | img = (PyArrayObject *) arg1; 93 | img_dims = PyArray_DIMS(img); 94 | int numOfDim = PyArray_NDIM(img); 95 | 96 | out = get_char_segmentations(img, numOfDim, img_dims, outputDir, imageName, instance, minHeight); 97 | 98 | return (PyObject *) out; 99 | } 100 | 101 | static PyObject* getLastKeyPoints_cfunc (PyObject *dummy, PyObject *args) 102 | { 103 | PyObject *arg1=NULL; 104 | PyArrayObject *out=NULL; 105 | 106 | out = get_last_detection_keypoints(); 107 | 108 | return (PyObject *) out; 109 | } 110 | 111 | static PyObject* getKeypointStrokes_cfunc (PyObject *dummy, PyObject *args) 112 | { 113 | PyObject *arg1=NULL; 114 | PyArrayObject *out=NULL; 115 | int instance = 0; 116 | int keypointId = 0; 117 | if (!PyArg_ParseTuple(args, "i|i", &keypointId, &instance )) 118 | return NULL; 119 | 120 | out = get_keypoint_strokes(keypointId, instance); 121 | 122 | return (PyObject *) out; 123 | } 124 | 125 | static PyObject* getDetectionStat_cfunc (PyObject *dummy, PyObject *args) 126 | { 127 | PyObject *arg1=NULL; 128 | PyArrayObject *out=NULL; 129 | out = get_detection_stat(); 130 | return (PyObject *) out; 131 | } 132 | 133 | static PyObject* getImageAtScale_cfunc (PyObject *dummy, PyObject *args) 134 | { 135 | PyObject *arg1=NULL; 136 | PyArrayObject *out=NULL; 137 | int imageScale = 0; 138 | int instance = 0; 139 | if (!PyArg_ParseTuple(args, "i|i", &imageScale, &instance)) 140 | return NULL; 141 | out = get_image_at_scale(imageScale, instance); 142 | return (PyObject *) out; 143 | } 144 | 145 | static PyObject* getSegmMask_cfunc (PyObject *dummy, PyObject *args) 146 | { 147 | PyObject *arg1=NULL; 148 | PyArrayObject *out=NULL; 149 | int maskId = 0; 150 | if (!PyArg_ParseTuple(args, "i", &maskId)) 151 | return NULL; 152 | out = get_segmentation_mask(maskId); 153 | return (PyObject *) out; 154 | } 155 | 156 | static PyObject* getImageScales_cfunc (PyObject *dummy, PyObject *args) 157 | { 158 | PyObject *arg1=NULL; 159 | PyArrayObject *out=NULL; 160 | int instance = 0; 161 | if (!PyArg_ParseTuple(args, "|i", &instance)) 162 | return NULL; 163 | out = get_image_scales(instance); 164 | return (PyObject *) out; 165 | } 166 | 167 | static PyObject* getLastOrbKeyPoints_cfunc (PyObject *dummy, PyObject *args) 168 | { 169 | PyObject *arg1=NULL; 170 | PyArrayObject *out=NULL; 171 | 172 | out = get_last_detection_orb_keypoints(); 173 | 174 | return (PyObject *) out; 175 | } 176 | 177 | static PyObject* find_text_lines_cfunc (PyObject *dummy, PyObject *args) 178 | { 179 | PyObject *arg1=NULL; 180 | PyArrayObject *out=NULL; 181 | PyArrayObject *arr1=NULL; 182 | 183 | const char * imageName; 184 | const char * outputDir = NULL; 185 | int instance = 0; 186 | int merge_inners = 0; 187 | if (!PyArg_ParseTuple(args, "|ssi", &outputDir, &imageName, &instance )) 188 | return NULL; 189 | 190 | out = find_text_lines(outputDir, imageName, instance); 191 | return (PyObject *) out; 192 | } 193 | 194 | static PyObject* getNormalizedLine_cfunc (PyObject *dummy, PyObject *args) 195 | { 196 | PyObject *arg1=NULL; 197 | PyArrayObject *out=NULL; 198 | int instance = 0; 199 | int line = 0; 200 | if (!PyArg_ParseTuple(args, "i|i", &line, &instance)) 201 | return NULL; 202 | 203 | out = get_normalized_line(line, instance); 204 | 205 | return (PyObject *) out; 206 | } 207 | 208 | static PyObject* acumulateCharFeatures(PyObject *dummy, PyObject *args) 209 | { 210 | PyObject *arg1=NULL; 211 | PyArrayObject *out=NULL; 212 | int classNo = 0; 213 | int detNo = 0; 214 | 215 | if (!PyArg_ParseTuple(args, "ii", &classNo, &detNo)) 216 | return NULL; 217 | accum_character_features(classNo, detNo); 218 | return Py_BuildValue(""); 219 | } 220 | 221 | static PyObject* trainCharFeatures(PyObject *dummy, PyObject *args) 222 | { 223 | train_character_features(); 224 | return Py_BuildValue(""); 225 | } 226 | 227 | static PyMethodDef FastTextMethods[] = { 228 | 229 | {"findKeyPoints", findKeyPoints_cfunc, METH_VARARGS, "Find Keipoints in the image"}, 230 | {"init", initialize_cfunc, METH_VARARGS, "Initializes FastText detector"}, 231 | {"getCharSegmentations", get_char_segmentations_cfunc, METH_VARARGS, "Returns the character segmentations"}, 232 | {"getLastDetectionKeypoints", getLastKeyPoints_cfunc, METH_VARARGS, "Returns the character segmentations"}, 233 | {"getKeypointStrokes", getKeypointStrokes_cfunc, METH_VARARGS, "Returns the strokes of given keypoint"}, 234 | {"getDetectionStat", getDetectionStat_cfunc, METH_NOARGS, "Returns the detection statistics"}, 235 | {"getImageAtScale", getImageAtScale_cfunc, METH_VARARGS, "Returns the detection image in given scale"}, 236 | {"getSegmentationMask", getSegmMask_cfunc, METH_VARARGS, "Returns the segmentation mask by ID"}, 237 | {"getImageScales", getImageScales_cfunc, METH_VARARGS, "Returns the image pyramid scales"}, 238 | {"getLastDetectionOrbKeypoints", getLastOrbKeyPoints_cfunc, METH_VARARGS, "Find ORB keypoints in the image"}, 239 | {"findTextLines", find_text_lines_cfunc, METH_VARARGS, "Finds and returns text lines in the image"}, 240 | {"getNormalizedLine", getNormalizedLine_cfunc, METH_VARARGS, "Returns the normalized line segmentation"}, 241 | {"acummulateCharFeatures", acumulateCharFeatures, METH_VARARGS, "todo"}, 242 | {"trainCharFeatures", trainCharFeatures, METH_NOARGS, "todo"}, 243 | {NULL, NULL, 0, NULL} /* Sentinel */ 244 | }; 245 | 246 | 247 | PyMODINIT_FUNC 248 | initftext(void) 249 | { 250 | PyObject *m; 251 | 252 | m = Py_InitModule("ftext", FastTextMethods); 253 | import_array(); 254 | if (m == NULL) 255 | return; 256 | 257 | FastTextError = PyErr_NewException((char*) "ftext.error", NULL, NULL); 258 | Py_INCREF(FastTextError); 259 | PyModule_AddObject(m, "error", FastTextError); 260 | } 261 | 262 | -------------------------------------------------------------------------------- /src/Python/pyFastTextG.h: -------------------------------------------------------------------------------- 1 | /* 2 | * pyFastText.h 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * Copyright 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * Based on: 10 | * 11 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 12 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 13 | */ 14 | #ifndef PYFASTTEXT_H_ 15 | #define PYFASTTEXT_H_ 16 | 17 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 18 | 19 | #include 20 | 21 | #define PY_ARRAY_UNIQUE_SYMBOL cool_ARRAY_API 22 | #include 23 | 24 | #ifdef __cplusplus 25 | extern "C" { 26 | #endif 27 | 28 | 29 | PyArrayObject* find_keypoints(PyArrayObject* img, int numOfDims, npy_intp* img_dims, int scaleFactor, int nlevels, int edgeThreshold, int keypointTypes, int kMin, int kMax); 30 | 31 | 32 | int initialize(float scaleFactor, int nlevels, int edgeThreshold, int keypointTypes, int kMin, int kMax, 33 | const char* charClsFile, int erode, int segmentGrad, int minComponentSize, int instance, float thresholdFactor, int segmDeltaInt); 34 | 35 | PyArrayObject* get_char_segmentations(PyArrayObject* img, int numOfDims, npy_intp* img_dims, const char * outputDir, const char * imageName, int instance, int minHeight); 36 | 37 | PyArrayObject* find_text_lines(const char * outputDir, const char * imageName, int instance); 38 | 39 | PyArrayObject* get_normalized_line(int lineNo, int instance); 40 | 41 | PyArrayObject* get_keypoint_strokes(int keypointId, int instance); 42 | 43 | PyArrayObject* get_last_detection_keypoints(); 44 | 45 | PyArrayObject* get_last_detection_orb_keypoints(); 46 | 47 | PyArrayObject* get_detection_stat(); 48 | 49 | PyArrayObject* get_image_at_scale(int level, int instance); 50 | 51 | PyArrayObject* get_segmentation_mask(int maskId); 52 | 53 | PyArrayObject* get_image_scales(int instance); 54 | 55 | void accum_character_features(int classNo, int segmId); 56 | 57 | void train_character_features(void); 58 | 59 | #ifdef __cplusplus 60 | } 61 | #endif 62 | 63 | #endif /* PYFASTTEXT_H_ */ 64 | -------------------------------------------------------------------------------- /src/Segmenter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Segmenter.h 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * 8 | * Copyright (c) 2015, Michal Busta, Lukas Neumann, Jiri Matas. 9 | * 10 | * This program is free software; you can redistribute it and/or modify 11 | * it under the terms of the GNU General Public License as published by 12 | * the Free Software Foundation; either version 2 of the License, or 13 | * (at your option) any later version. 14 | * 15 | * This program is distributed in the hope that it will be useful, 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | * GNU General Public License for more details. 19 | * 20 | * Based on: 21 | * 22 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 23 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 24 | */ 25 | #ifndef SEGMENTER_H_ 26 | #define SEGMENTER_H_ 27 | 28 | #include 29 | #include 30 | 31 | #include "KeyPoints.h" 32 | #include "segm/segmentation.h" 33 | #include "CharClassifier.h" 34 | 35 | #include 36 | #include "FTPyramid.hpp" 37 | 38 | namespace cmp 39 | { 40 | 41 | #define MIN_COMP_SIZE 12 42 | 43 | /** 44 | * @class cmp::Segmenter 45 | * 46 | * @brief The letter segmentation class 47 | * 48 | * Segments the letter components from detected keypoints 49 | */ 50 | class Segmenter 51 | { 52 | public: 53 | Segmenter(cv::Ptr charClassifier = cv::Ptr (new CvBoostCharClassifier()), int maxComponentSize = MAX_COMP_SIZE, int minCompSize = MIN_COMP_SIZE); 54 | virtual ~Segmenter(); 55 | 56 | virtual void getLetterCandidates(cv::Mat& img, std::vector& img1_keypoints, std::unordered_multimap >& keypointsPixels, std::vector& letters, cv::Mat debugImage = cv::Mat(), int minHeight = 5) = 0; 57 | 58 | 59 | virtual cv::Mat getSegmenationMap(){ 60 | return segmMap; 61 | } 62 | 63 | int64 getClassificationTime(){ 64 | return classificationTime; 65 | }; 66 | 67 | cv::Ptr getCharClassifier(){ 68 | return charClassifier; 69 | } 70 | 71 | std::vector& getLetterCandidates(){ 72 | return letterCandidates; 73 | } 74 | 75 | bool segmentGrad = false; 76 | 77 | int minCompSize; 78 | 79 | int minHeight = 5; 80 | 81 | int minSizeSegmCount = 0; 82 | 83 | int segmentKeyPoints = 3; 84 | 85 | double strokeAreaTime = 0; 86 | 87 | int componentsCount = 0; 88 | 89 | int64 strokesTime = 0; 90 | 91 | std::unordered_map > > > keypointStrokes; 92 | 93 | int maxStrokeLength = 50; 94 | 95 | protected: 96 | 97 | inline void classifyLetters(std::vector& img1_keypoints, std::unordered_multimap >& keypointsPixels, vector& scales, std::vector& letters, cv::Mat debugImg = cv::Mat()); 98 | 99 | int maxComponentSize; 100 | 101 | cv::Mat segmMap; 102 | cv::Mat idMap; 103 | 104 | std::vector buffer; 105 | std::vector queue; 106 | 107 | cv::Ptr charClassifier; 108 | 109 | cv::Ptr wordClassifier; 110 | 111 | int64 classificationTime; 112 | 113 | 114 | std::vector letterCandidates; 115 | 116 | bool dumpTrainingData = false; 117 | 118 | }; 119 | 120 | struct SegmentOption{ 121 | 122 | SegmentOption(int segmentationType, float scoreFactor): segmentationType(segmentationType), scoreFactor(scoreFactor){ 123 | 124 | } 125 | 126 | int segmentationType; 127 | float scoreFactor; 128 | }; 129 | 130 | class PyramidSegmenter : public Segmenter 131 | { 132 | public: 133 | PyramidSegmenter(cv::Ptr ftDetector, cv::Ptr charClassifier = cv::Ptr(), 134 | int maxComponentSize = 2 * MAX_COMP_SIZE, int minCompSize = MIN_COMP_SIZE, float threshodFactor = 1.0, 135 | int delataIntResegment = 0, int segmentLevelOffset = 0) : Segmenter(charClassifier, maxComponentSize, minCompSize), ftDetector(ftDetector), threshodFactor(threshodFactor), delataIntResegment(delataIntResegment), segmentLevelOffset(segmentLevelOffset) 136 | { 137 | segmentOptions.push_back(SegmentOption(0, 1.0)); 138 | //segmentOptions.push_back(SegmentOption(0, 0.4)); 139 | }; 140 | 141 | virtual void getLetterCandidates(cv::Mat& img, std::vector& img1_keypoints, std::unordered_multimap >& keypointsPixels, std::vector& letters, cv::Mat debugImage = cv::Mat(), int minHeight = 5); 142 | 143 | virtual void segmentStrokes(cv::Mat& img, std::vector& img1_keypoints, std::unordered_multimap >& keypointsPixels, std::vector& letters, cv::Mat debugImage = cv::Mat(), int minHeight = 5); 144 | 145 | virtual cv::Mat getSegmenationMap(){ 146 | return segmPyramid[0]; 147 | } 148 | 149 | static int getSegmIndex(cv::Mat& img, LetterCandidate& letter, int norm) 150 | { 151 | 152 | int index = ((letter.bbox.y + letter.bbox.height / 2) / norm) * img.cols + (letter.bbox.x + letter.bbox.width / 2) / norm; 153 | return index; 154 | } 155 | 156 | private: 157 | cv::Ptr ftDetector; 158 | 159 | std::vector segmPyramid; 160 | std::vector idPyramid; 161 | std::vector pixelsOffset; 162 | 163 | float threshodFactor; 164 | 165 | std::vector segmentOptions; 166 | 167 | int delataIntResegment; 168 | 169 | int segmentLevelOffset; 170 | 171 | }; 172 | 173 | } /* namespace cmp */ 174 | 175 | #endif /* SEGMENTER_H_ */ 176 | -------------------------------------------------------------------------------- /src/TimeUtils.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * TimeUtils.cpp 3 | * 4 | * Created on: Nov 20, 2014 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | */ 8 | #include "TimeUtils.h" 9 | 10 | #include 11 | 12 | #ifdef _WIN32 13 | #include 14 | #include 15 | #endif 16 | 17 | namespace cmp 18 | { 19 | 20 | long long TimeUtils::MiliseconsNow() 21 | { 22 | #ifdef _WIN32 23 | static LARGE_INTEGER s_frequency; 24 | static BOOL s_use_qpc = QueryPerformanceFrequency(&s_frequency); 25 | if (s_use_qpc) { 26 | LARGE_INTEGER now; 27 | QueryPerformanceCounter(&now); 28 | return (1000LL * now.QuadPart) / s_frequency.QuadPart; 29 | } 30 | else { 31 | return GetTickCount(); 32 | } 33 | #else 34 | return cv::getTickCount() / (cv::getTickFrequency()) * 1000; 35 | #endif 36 | } 37 | 38 | #ifdef _WIN32 39 | #include 40 | double get_wall_time(){ 41 | LARGE_INTEGER time,freq; 42 | if (!QueryPerformanceFrequency(&freq)){ 43 | // Handle error 44 | return 0; 45 | } 46 | if (!QueryPerformanceCounter(&time)){ 47 | // Handle error 48 | return 0; 49 | } 50 | return (double)time.QuadPart / freq.QuadPart; 51 | } 52 | double get_cpu_time(){ 53 | FILETIME a,b,c,d; 54 | if (GetProcessTimes(GetCurrentProcess(),&a,&b,&c,&d) != 0){ 55 | // Returns total user time. 56 | // Can be tweaked to include kernel times as well. 57 | return 58 | (double)(d.dwLowDateTime | 59 | ((unsigned long long)d.dwHighDateTime << 32)) * 0.0001; 60 | }else{ 61 | // Handle error 62 | return 0; 63 | } 64 | } 65 | 66 | // Posix/Linux 67 | #else 68 | #include 69 | double get_wall_time(){ 70 | struct timeval time; 71 | if (gettimeofday(&time,NULL)){ 72 | // Handle error 73 | return 0; 74 | } 75 | return (double)time.tv_sec + (double)time.tv_usec * .001; 76 | } 77 | double get_cpu_time(){ 78 | return (double)clock() / CLOCKS_PER_SEC; 79 | } 80 | #endif 81 | 82 | } /* namespace cmp */ 83 | -------------------------------------------------------------------------------- /src/TimeUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * TimeUtils.h 3 | * 4 | * Created on: Nov 20, 2014 5 | * Author: Michal.Busta at gmail.com 6 | */ 7 | #ifndef TIMEUTILS_H_ 8 | #define TIMEUTILS_H_ 9 | 10 | namespace cmp 11 | { 12 | 13 | /** 14 | * @class cmp::TimeUtils 15 | * 16 | * @brief TODO brief description 17 | * 18 | * TODO type description 19 | */ 20 | class TimeUtils 21 | { 22 | public: 23 | static long long MiliseconsNow(); 24 | }; 25 | 26 | double get_wall_time(); 27 | 28 | } /* namespace cmp */ 29 | 30 | #endif /* TIMEUTILS_H_ */ 31 | -------------------------------------------------------------------------------- /src/detectors.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * detectors.cpp 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * Copyright (c) 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * Based on: 20 | * 21 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 22 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 23 | */ 24 | #include "detectors.h" 25 | 26 | namespace cmp 27 | { 28 | 29 | class GridAdaptedFeatureDetectorInvoker : public cv::ParallelLoopBody 30 | { 31 | private: 32 | int gridRows_, gridCols_; 33 | int maxPerCell_; 34 | std::vector& keypoints_; 35 | std::unordered_multimap >& keypointsPixels; 36 | const cv::Mat& image_; 37 | const cv::Mat& mask_; 38 | const cv::Ptr& detector_; 39 | cv::Mutex* kptLock_; 40 | 41 | GridAdaptedFeatureDetectorInvoker& operator=(const GridAdaptedFeatureDetectorInvoker&); // to quiet MSVC 42 | 43 | public: 44 | 45 | GridAdaptedFeatureDetectorInvoker(const cv::Ptr& detector, const cv::Mat& image, const cv::Mat& mask, 46 | std::vector& keypoints, std::unordered_multimap >& keypointsPixels, 47 | int maxPerCell, int gridRows, int gridCols, 48 | cv::Mutex* kptLock) 49 | : gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell), 50 | keypoints_(keypoints), keypointsPixels(keypointsPixels), image_(image), mask_(mask), detector_(detector), 51 | kptLock_(kptLock) 52 | { 53 | 54 | } 55 | 56 | void operator() (const cv::Range& range) const 57 | { 58 | for (int i = range.start; i < range.end; ++i) 59 | { 60 | int celly = i / gridCols_; 61 | int cellx = i - celly * gridCols_; 62 | 63 | cv::Range row_range((celly*image_.rows)/gridRows_, ((celly+1)*image_.rows)/gridRows_); 64 | cv::Range col_range((cellx*image_.cols)/gridCols_, ((cellx+1)*image_.cols)/gridCols_); 65 | if(row_range.end < image_.rows - 5) 66 | { 67 | row_range.end += 3; 68 | } 69 | if(col_range.end < image_.cols - 5) 70 | { 71 | col_range.end += 3; 72 | } 73 | 74 | cv::Mat sub_image = image_(row_range, col_range); 75 | cv::Mat sub_mask; 76 | if (!mask_.empty()) sub_mask = mask_(row_range, col_range); 77 | 78 | std::vector sub_keypoints; 79 | sub_keypoints.reserve(2 * maxPerCell_); 80 | std::unordered_multimap > keypointsPixelsSub; 81 | detector_->segment( sub_image, sub_keypoints, keypointsPixelsSub, sub_mask ); 82 | if( keypointsPixelsSub.size() == 0 ) 83 | KeyPointsFilterC::retainBest(sub_keypoints, keypointsPixelsSub, 2 * maxPerCell_); 84 | 85 | std::vector::iterator it = sub_keypoints.begin(), end = sub_keypoints.end(); 86 | for( ; it != end; ++it ) 87 | { 88 | it->pt.x += col_range.start; 89 | it->pt.y += row_range.start; 90 | } 91 | 92 | { 93 | cv::AutoLock join_keypoints(*kptLock_); 94 | int offset = keypoints_.size(); 95 | if( keypointsPixelsSub.size() > 0 ) 96 | { 97 | std::vector::iterator it = sub_keypoints.begin(), end = sub_keypoints.end(); 98 | for( ; it != end; ++it ) 99 | { 100 | it->class_id += offset; 101 | } 102 | } 103 | 104 | keypoints_.insert( keypoints_.end(), sub_keypoints.begin(), sub_keypoints.end() ); 105 | 106 | for (std::unordered_multimap >::iterator itr = keypointsPixelsSub.begin(); itr != keypointsPixelsSub.end(); itr++) { 107 | keypointsPixels.insert( std::pair >( itr->first + offset, std::pair(itr->second.first + col_range.start, itr->second.second + row_range.start))); 108 | } 109 | } 110 | } 111 | } 112 | }; 113 | 114 | GridAdaptedFeatureDetector::GridAdaptedFeatureDetector( const cv::Ptr& detector, int maxTotalKeypoints, int gridRows, int gridCols): detector(detector), maxTotalKeypoints(maxTotalKeypoints), gridRows(gridRows), gridCols(gridCols) 115 | { 116 | 117 | } 118 | 119 | void GridAdaptedFeatureDetector::detectImpl( const cv::Mat& image, std::vector& keypoints, const cv::Mat& mask ) const 120 | { 121 | if (image.empty() ) 122 | { 123 | keypoints.clear(); 124 | return; 125 | } 126 | 127 | if(MIN(image.cols, image.rows) < 128 ) 128 | { 129 | detector->detect( image, keypoints, mask ); 130 | }else 131 | { 132 | 133 | keypoints.reserve(2 * maxTotalKeypoints); 134 | int maxPerCell = (maxTotalKeypoints / (gridRows * gridCols)); 135 | 136 | cv::Mutex kptLock; 137 | std::unordered_multimap > keypointsPixels; 138 | GridAdaptedFeatureDetectorInvoker body(detector, image, mask, keypoints, keypointsPixels, maxPerCell, gridRows, gridCols, &kptLock); 139 | //body(cv::Range(0, gridRows * gridCols)); 140 | cv::parallel_for_(cv::Range(0, gridRows * gridCols), body); 141 | //KeyPointsFilterC::retainBest(keypoints, maxTotalKeypoints); 142 | } 143 | } 144 | 145 | void GridAdaptedFeatureDetector::segmentImpl( const cv::Mat& image, std::vector& keypoints, std::unordered_multimap >& keypointsPixels, const cv::Mat& mask) const 146 | { 147 | if (image.empty() ) 148 | { 149 | keypoints.clear(); 150 | return; 151 | } 152 | 153 | if(MIN(image.cols, image.rows) < 128 ) 154 | { 155 | detector->segment( image, keypoints, keypointsPixels, mask ); 156 | }else 157 | { 158 | 159 | keypoints.reserve(2 * maxTotalKeypoints); 160 | int maxPerCell = (maxTotalKeypoints / (gridRows * gridCols)); 161 | 162 | cv::Mutex kptLock; 163 | GridAdaptedFeatureDetectorInvoker body(detector, image, mask, keypoints, keypointsPixels, maxPerCell, gridRows, gridCols, &kptLock); 164 | //body(cv::Range(0, gridRows * gridCols)); 165 | cv::parallel_for_(cv::Range(0, gridRows * gridCols), body); 166 | //KeyPointsFilterC::retainBest(keypoints, maxTotalKeypoints); 167 | } 168 | } 169 | 170 | } /* namespace cmp */ 171 | -------------------------------------------------------------------------------- /src/detectors.h: -------------------------------------------------------------------------------- 1 | /* 2 | * detectors.h 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * Copyright (c) 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * Based on: 20 | * 21 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 22 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 23 | */ 24 | #ifndef DETECTORS_H_ 25 | #define DETECTORS_H_ 26 | 27 | #include "FASTex.hpp" 28 | 29 | namespace cmp 30 | { 31 | 32 | /* 33 | * Adapts a detector to partition the source image into a grid and detect 34 | * points in each cell. 35 | */ 36 | class CV_EXPORTS_W GridAdaptedFeatureDetector : public FASTextI 37 | { 38 | public: 39 | /* 40 | * detector Detector that will be adapted. 41 | * maxTotalKeypoints Maximum count of keypoints detected on the image. Only the strongest keypoints 42 | * will be keeped. 43 | * gridRows Grid rows count. 44 | * gridCols Grid column count. 45 | */ 46 | CV_WRAP GridAdaptedFeatureDetector( const cv::Ptr& detector, 47 | int maxTotalKeypoints=1000, 48 | int gridRows=8, int gridCols=8 ); 49 | 50 | 51 | void setMaxTotalKeypoints(int maxTotalKeypoints){ 52 | this->maxTotalKeypoints = maxTotalKeypoints; 53 | } 54 | 55 | virtual bool isColorDetector(){ 56 | return detector->isColorDetector(); 57 | } 58 | 59 | cv::Ptr getDetector(){ 60 | return detector; 61 | } 62 | 63 | void setThreshold(long threshold){ 64 | detector->setThreshold(threshold); 65 | } 66 | 67 | protected: 68 | virtual void detectImpl( const cv::Mat& image, std::vector& keypoints, const cv::Mat& mask=cv::Mat() ) const; 69 | 70 | virtual void segmentImpl( const cv::Mat& image, std::vector& keypoints, std::unordered_multimap >& keypointsPixels, const cv::Mat& mask=cv::Mat() ) const; 71 | 72 | cv::Ptr detector; 73 | int maxTotalKeypoints; 74 | int gridRows; 75 | int gridCols; 76 | }; 77 | 78 | } /* namespace cmp */ 79 | 80 | #endif /* DETECTORS_H_ */ 81 | -------------------------------------------------------------------------------- /src/geometry.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * geometry.cpp 3 | * 4 | * Created on: Feb 11, 2015 5 | * Author: Michal Busta 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include "geometry.h" 12 | 13 | namespace cmp 14 | { 15 | 16 | double angleDiff(double a,double b){ 17 | double dif = fmod(b - a + M_PI , 2*M_PI); 18 | if (dif < 0) 19 | dif += 2*M_PI; 20 | return dif - M_PI; 21 | } 22 | 23 | double distance_to_line( cv::Point begin, cv::Point end, cv::Point x, int& sign ) 24 | { 25 | //translate the begin to the origin 26 | end -= begin; 27 | x -= begin; 28 | 29 | //¿do you see the triangle? 30 | double area = x.cross(end); 31 | sign = (area > 0) - (area < 0); 32 | return fabs(area / cv::norm(end)); 33 | } 34 | 35 | double distance_to_line( const cv::Vec4f& line, cv::Point x, int& sign ) 36 | { 37 | //translate the begin to the origin 38 | cv::Point2f end = cv::Point2f(line.val[0], line.val[1]); 39 | cv::Point xf = x - cv::Point(line.val[2], line.val[3]); 40 | 41 | //¿do you see the triangle? 42 | double area = xf.cross(end); 43 | sign = (area > 0) - (area < 0); 44 | return fabs(area / cv::norm(end)); 45 | } 46 | 47 | 48 | double innerAngle(cv::Vec4i& line1, cv::Vec4i& line2, bool invert) 49 | { 50 | cv::Point p11(line1.val[0],line1.val[1]); 51 | cv::Point p12(line1.val[2], line1.val[3]); 52 | cv::Point p21(line2.val[0], line2.val[1]); 53 | cv::Point p22(line2.val[2], line2.val[3]); 54 | 55 | cv::Point v1 = p12 - p11; 56 | cv::Point v2 = p21 - p22; 57 | if(invert) 58 | { 59 | v1 = p11 - p12; 60 | v2 = p21 - p22; 61 | } 62 | 63 | double dot = v1.dot(v2); 64 | double cross = v1.cross(v2); 65 | double angle = atan2(cross, dot); 66 | return angle; 67 | } 68 | 69 | bool isLeft(cv::Point a, cv::Point b, cv::Point c) 70 | { 71 | return ((b.x - a.x)*(c.y - a.y) - (b.y - a.y)*(c.x - a.x)) > 0; 72 | } 73 | 74 | bool isBetween(cv::Vec4f bottomLine, cv::Vec4f line2, cv::Point point, double& mindist ) 75 | { 76 | 77 | cv::Point a(bottomLine[2], bottomLine[3]); 78 | cv::Point b(bottomLine[2] + 100 * bottomLine[0] , bottomLine[3] + 100 * bottomLine[1]); 79 | cv::Point aa(line2[2], line2[3]); 80 | cv::Point bb(line2[2] + 100 * bottomLine[0] , line2[3] + 100 * bottomLine[1]); 81 | int s1 = 0; 82 | double d1 = distance_to_line( a, b, aa, s1 ); 83 | int s2 = 0; 84 | double d2 = distance_to_line( a, b, point, s2 ); 85 | int s3 = 0; 86 | double d3 = distance_to_line( aa, bb, point, s3 ); 87 | mindist = MIN(d2, d3); 88 | if(d2 > d1) 89 | return false; 90 | if(d3 > d1) 91 | return false; 92 | return true; 93 | } 94 | 95 | double innerAngle(const cv::Point& line1, const cv::Point& line2) 96 | { 97 | double dot = line1.x*line2.x + line1.y*line2.y;//dot product 98 | double det = line1.x*line2.y - line1.y*line2.x; //determinant 99 | return M_PI - atan2(det, dot); // # atan2(y, x) or atan2(sin, cos) 100 | } 101 | 102 | int rotatedRectangleIntersection( const cv::RotatedRect& rect1, const cv::RotatedRect& rect2, cv::OutputArray intersectingRegion ) 103 | { 104 | const float samePointEps = 0.00001f; // used to test if two points are the same 105 | 106 | cv::Point2f vec1[4], vec2[4]; 107 | cv::Point2f pts1[4], pts2[4]; 108 | 109 | std::vector intersection; 110 | 111 | rect1.points(pts1); 112 | rect2.points(pts2); 113 | 114 | int ret = INTERSECT_FULL; 115 | 116 | // Specical case of rect1 == rect2 117 | { 118 | bool same = true; 119 | 120 | for( int i = 0; i < 4; i++ ) 121 | { 122 | if( fabs(pts1[i].x - pts2[i].x) > samePointEps || (fabs(pts1[i].y - pts2[i].y) > samePointEps) ) 123 | { 124 | same = false; 125 | break; 126 | } 127 | } 128 | 129 | if(same) 130 | { 131 | intersection.resize(4); 132 | 133 | for( int i = 0; i < 4; i++ ) 134 | { 135 | intersection[i] = pts1[i]; 136 | } 137 | 138 | cv::Mat(intersection).copyTo(intersectingRegion); 139 | 140 | return INTERSECT_FULL; 141 | } 142 | } 143 | 144 | // Line vector 145 | // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1] 146 | for( int i = 0; i < 4; i++ ) 147 | { 148 | vec1[i].x = pts1[(i+1)%4].x - pts1[i].x; 149 | vec1[i].y = pts1[(i+1)%4].y - pts1[i].y; 150 | 151 | vec2[i].x = pts2[(i+1)%4].x - pts2[i].x; 152 | vec2[i].y = pts2[(i+1)%4].y - pts2[i].y; 153 | } 154 | 155 | // Line test - test all line combos for intersection 156 | for( int i = 0; i < 4; i++ ) 157 | { 158 | for( int j = 0; j < 4; j++ ) 159 | { 160 | // Solve for 2x2 Ax=b 161 | float x21 = pts2[j].x - pts1[i].x; 162 | float y21 = pts2[j].y - pts1[i].y; 163 | 164 | float vx1 = vec1[i].x; 165 | float vy1 = vec1[i].y; 166 | 167 | float vx2 = vec2[j].x; 168 | float vy2 = vec2[j].y; 169 | 170 | float det = vx2*vy1 - vx1*vy2; 171 | 172 | float t1 = (vx2*y21 - vy2*x21) / det; 173 | float t2 = (vx1*y21 - vy1*x21) / det; 174 | 175 | // This takes care of parallel lines 176 | if( cvIsInf(t1) || cvIsInf(t2) || cvIsNaN(t1) || cvIsNaN(t2) ) 177 | { 178 | continue; 179 | } 180 | 181 | if( t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f ) 182 | { 183 | float xi = pts1[i].x + vec1[i].x*t1; 184 | float yi = pts1[i].y + vec1[i].y*t1; 185 | 186 | intersection.push_back(cv::Point2f(xi,yi)); 187 | } 188 | } 189 | } 190 | 191 | if( !intersection.empty() ) 192 | { 193 | ret = INTERSECT_PARTIAL; 194 | } 195 | 196 | // Check for vertices from rect1 inside recct2 197 | for( int i = 0; i < 4; i++ ) 198 | { 199 | // We do a sign test to see which side the point lies. 200 | // If the point all lie on the same sign for all 4 sides of the rect, 201 | // then there's an intersection 202 | int posSign = 0; 203 | int negSign = 0; 204 | 205 | float x = pts1[i].x; 206 | float y = pts1[i].y; 207 | 208 | for( int j = 0; j < 4; j++ ) 209 | { 210 | // line equation: Ax + By + C = 0 211 | // see which side of the line this point is at 212 | float A = -vec2[j].y; 213 | float B = vec2[j].x; 214 | float C = -(A*pts2[j].x + B*pts2[j].y); 215 | 216 | float s = A*x+ B*y+ C; 217 | 218 | if( s >= 0 ) 219 | { 220 | posSign++; 221 | } 222 | else 223 | { 224 | negSign++; 225 | } 226 | } 227 | 228 | if( posSign == 4 || negSign == 4 ) 229 | { 230 | intersection.push_back(pts1[i]); 231 | } 232 | } 233 | 234 | // Reverse the check - check for vertices from rect2 inside recct1 235 | for( int i = 0; i < 4; i++ ) 236 | { 237 | // We do a sign test to see which side the point lies. 238 | // If the point all lie on the same sign for all 4 sides of the rect, 239 | // then there's an intersection 240 | int posSign = 0; 241 | int negSign = 0; 242 | 243 | float x = pts2[i].x; 244 | float y = pts2[i].y; 245 | 246 | for( int j = 0; j < 4; j++ ) 247 | { 248 | // line equation: Ax + By + C = 0 249 | // see which side of the line this point is at 250 | float A = -vec1[j].y; 251 | float B = vec1[j].x; 252 | float C = -(A*pts1[j].x + B*pts1[j].y); 253 | 254 | float s = A*x + B*y + C; 255 | 256 | if( s >= 0 ) 257 | { 258 | posSign++; 259 | } 260 | else 261 | { 262 | negSign++; 263 | } 264 | } 265 | 266 | if( posSign == 4 || negSign == 4 ) 267 | { 268 | intersection.push_back(pts2[i]); 269 | } 270 | } 271 | 272 | // Get rid of dupes 273 | for( int i = 0; i < (int)intersection.size()-1; i++ ) 274 | { 275 | for( size_t j = i+1; j < intersection.size(); j++ ) 276 | { 277 | float dx = intersection[i].x - intersection[j].x; 278 | float dy = intersection[i].y - intersection[j].y; 279 | double d2 = dx*dx + dy*dy; // can be a really small number, need double here 280 | 281 | if( d2 < samePointEps*samePointEps ) 282 | { 283 | // Found a dupe, remove it 284 | std::swap(intersection[j], intersection.back()); 285 | intersection.pop_back(); 286 | j--; // restart check 287 | } 288 | } 289 | } 290 | 291 | if( intersection.empty() ) 292 | { 293 | return INTERSECT_NONE ; 294 | } 295 | 296 | // If this check fails then it means we're getting dupes, increase samePointEps 297 | //CV_Assert( intersection.size() <= 8 ); 298 | 299 | cv::Mat(intersection).copyTo(intersectingRegion); 300 | 301 | return ret; 302 | } 303 | 304 | void getConvexHullLines(std::vector& cHullPoints1, std::vector& cHullPoints2, const cv::Mat& img, std::vector& convexLines, std::vector& chull, double& dist) 305 | { 306 | std::vector allHullPoins; 307 | 308 | allHullPoins.reserve(cHullPoints1.size() + cHullPoints2.size()); 309 | std::set index1; 310 | for(std::vector::iterator it = cHullPoints1.begin(); it < cHullPoints1.end(); it++ ) 311 | { 312 | allHullPoins.push_back( *it ); 313 | index1.insert( it->x + it->y * img.cols ); 314 | } 315 | std::set index2; 316 | for(std::vector::iterator it = cHullPoints2.begin(); it < cHullPoints2.end(); it++ ) 317 | { 318 | allHullPoins.push_back(*it ); 319 | index2.insert( it->x + it->y * img.cols ); 320 | } 321 | 322 | std::vector hull; 323 | convexHull(allHullPoins, hull, false, false); 324 | if(hull.size() <= 3) 325 | return; 326 | 327 | chull.resize(hull.size()); 328 | for(size_t i = 0; i < hull.size(); i++) 329 | chull[i] = allHullPoins[hull[i]]; 330 | 331 | 332 | std::vector defects; 333 | //convexityDefects(allHullPoins, hull, defects); 334 | 335 | for( size_t i = 1; i < chull.size() + 1; i++ ) 336 | { 337 | cv::Point p1 = chull[i - 1]; 338 | size_t index = i; 339 | if( index >= chull.size()) 340 | index = 0; 341 | cv::Point p2 = chull[index]; 342 | if( index1.find( p1.x + p1.y * img.cols ) != index1.end() && index1.find( p2.x + p2.y * img.cols ) != index1.end()) 343 | continue; 344 | if( index2.find( p1.x + p1.y * img.cols ) != index2.end() && index2.find( p2.x + p2.y * img.cols ) != index2.end()) 345 | continue; 346 | defects.push_back(cv::Vec4i(i - 1, index, 0, 0)); 347 | } 348 | 349 | std::vector defectsCross; 350 | 351 | for( size_t i = 0; i < defects.size(); i++ ) 352 | { 353 | cv::Point p1 = chull[defects[i].val[0]]; 354 | cv::Point p2 = chull[defects[i].val[1]]; 355 | if( index1.find( p1.x + p1.y * img.cols ) != index1.end() && index1.find( p2.x + p2.y * img.cols ) != index1.end()) 356 | continue; 357 | if( index2.find( p1.x + p1.y * img.cols ) != index2.end() && index2.find( p2.x + p2.y * img.cols ) != index2.end()) 358 | continue; 359 | defectsCross.push_back(defects[i]); 360 | if(index1.find( p1.x + p1.y * img.cols ) != index1.end()) 361 | { 362 | convexLines.push_back( cv::Vec4i(chull[defects[i].val[0]].x, chull[defects[i].val[0]].y, chull[defects[i].val[1]].x, chull[defects[i].val[1]].y) ); 363 | }else 364 | { 365 | convexLines.push_back( cv::Vec4i(chull[defects[i].val[1]].x, chull[defects[i].val[1]].y, chull[defects[i].val[0]].x, chull[defects[i].val[0]].y) ); 366 | } 367 | } 368 | if(defectsCross.size() == 2) 369 | { 370 | dist = INT_MAX; 371 | for( size_t i = 0; i < cHullPoints1.size(); i++) 372 | { 373 | for( size_t j = 0; j < cHullPoints2.size(); j++) 374 | { 375 | cv::Point d = cHullPoints1[i] - cHullPoints2[j]; 376 | double distc = d.x * d.x + d.y * d.y; 377 | if( distc < dist ) 378 | dist = distc; 379 | } 380 | } 381 | } 382 | } 383 | 384 | } /* namespace cmp */ 385 | -------------------------------------------------------------------------------- /src/geometry.h: -------------------------------------------------------------------------------- 1 | /* 2 | * geometry.h 3 | * 4 | * Created on: Feb 11, 2015 5 | * Author: Michal Busta 6 | */ 7 | #ifndef GEOMETRY_H_ 8 | #define GEOMETRY_H_ 9 | 10 | #include 11 | #include 12 | 13 | #ifndef M_PI 14 | #define M_PI 3.14159265358979323846 15 | #endif 16 | 17 | namespace cmp 18 | { 19 | 20 | double angleDiff(double a, double b); 21 | 22 | double distance_to_line( cv::Point begin, cv::Point end, cv::Point x, int& sign ); 23 | 24 | double distance_to_line( const cv::Vec4f& line, cv::Point x, int& sign ); 25 | 26 | double innerAngle(cv::Vec4i& line1, cv::Vec4i& line2, bool invert = false); 27 | 28 | double innerAngle(const cv::Point& line1, const cv::Point& line2); 29 | 30 | bool isBetween(cv::Vec4f bottomLine, cv::Vec4f line2, cv::Point point, double& mindist ); 31 | 32 | enum RectanglesIntersectTypes { 33 | INTERSECT_NONE = 0, //!< No intersection 34 | INTERSECT_PARTIAL = 1, //!< There is a partial intersection 35 | INTERSECT_FULL = 2 //!< One of the rectangle is fully enclosed in the other 36 | }; 37 | 38 | int rotatedRectangleIntersection( const cv::RotatedRect& rect1, const cv::RotatedRect& rect2, cv::OutputArray intersectingRegion ); 39 | 40 | void getConvexHullLines(std::vector& cHullPoints1, std::vector& cHullPoints2, const cv::Mat& img, std::vector& convexLines, std::vector& chull, double& dist); 41 | 42 | /** 43 | * @param img 44 | * @return The bounding box of non-zero image pixels 45 | */ 46 | inline cv::Rect getNonZeroBBox(const cv::Mat& img, int thresh = 0) 47 | { 48 | int minX = std::numeric_limits::max(); 49 | int maxX = 0; 50 | int minY = std::numeric_limits::max(); 51 | int maxY = 0; 52 | 53 | for (int y=0; y thresh) 59 | { 60 | minX = MIN(minX, x); 61 | maxX = MAX(maxX, x); 62 | minY = MIN(minY, y); 63 | maxY = MAX(maxY, y); 64 | } 65 | 66 | } 67 | } 68 | if(minX == std::numeric_limits::max()) 69 | minX = 0; 70 | if(minY == std::numeric_limits::max()) 71 | minY = 0; 72 | return cv::Rect( minX, minY, maxX - minX + 1, maxY - minY + 1); 73 | } 74 | 75 | } /* namespace cmp */ 76 | 77 | #endif /* GEOMETRY_H_ */ 78 | -------------------------------------------------------------------------------- /src/process_dir.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * test_processing.cpp 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | */ 7 | 8 | #include 9 | #include 10 | #include "segm/segmentation.h" 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | #include "FTPyramid.hpp" 19 | 20 | #include "IOUtils.h" 21 | #include "TimeUtils.h" 22 | #include "CharClassifier.h" 23 | #include "Segmenter.h" 24 | 25 | #include "FastTextLineDetector.h" 26 | 27 | #define VERBOSE 1 28 | 29 | using namespace cmp; 30 | 31 | int main(int argc, char **argv) 32 | { 33 | 34 | 35 | //cv::GaussianBlur(gray, gray, cv::Size(3, 3), 0); 36 | float scaleFactor = 1.6f; 37 | int nlevels = -1; 38 | int edgeThreshold = 12; 39 | int keypointTypes = 3; 40 | int kMin = 9; 41 | int kMax = 11; 42 | bool color = false; 43 | 44 | cv::Ptr ftDetector = cv::Ptr (new cmp::FTPyr(3000, scaleFactor, nlevels, edgeThreshold, keypointTypes, kMin, kMax, color, false, false)); 45 | cv::Ptr charClassifier = cv::Ptr (new cmp::CvBoostCharClassifier("cvBoostChar.xml")); 46 | cv::Ptr segmenter = cv::Ptr (new cmp::PyramidSegmenter(ftDetector, charClassifier)); 47 | 48 | FastTextLineDetector textLineDetector; 49 | 50 | long long segmentationTime = 0; 51 | long long lettersTotal = 0; 52 | long long keypointsTime = 0; 53 | long long keypointsTotal = 0; 54 | long long clsTime = 0; 55 | 56 | std::vector files = cmp::IOUtils::GetFilesInDirectory( argv[1], "*.png", true ); 57 | std::vector files2 = cmp::IOUtils::GetFilesInDirectory( argv[1], "*.jpg", true ); 58 | files.insert(files.end(), files2.begin(), files2.end()); 59 | std::vector queue; 60 | std::string outDir = "/tmp/processDir/"; 61 | for(size_t x = 0; x < files.size(); x++) 62 | { 63 | std::cout << "Processing: " << files[x] << std::endl; 64 | cv::Mat img = cv::imread(files[x]); 65 | if(img.empty()) 66 | continue; 67 | 68 | cv::Mat gray; 69 | cv::cvtColor(img, gray, cv::COLOR_BGR2GRAY); 70 | 71 | cv::Mat strokes; 72 | std::string imgName = outDir; 73 | imgName += cmp::IOUtils::Basename(files[x]) + ".png"; 74 | cv::imwrite(imgName, strokes); 75 | 76 | cv::Mat procImg = img; 77 | 78 | std::vector img1_keypoints; 79 | std::unordered_multimap > keypointsPixels; 80 | std::vector letters; 81 | if( color || true) 82 | { 83 | long long start = TimeUtils::MiliseconsNow(); 84 | if(color){ 85 | ftDetector->detect(img, img1_keypoints, keypointsPixels); 86 | }else{ 87 | ftDetector->detect(gray, img1_keypoints, keypointsPixels); 88 | } 89 | std::cout << "Detected keypoints: " << img1_keypoints.size() << std::endl; 90 | keypointsTime += TimeUtils::MiliseconsNow() - start; 91 | keypointsTotal += img1_keypoints.size(); 92 | 93 | start = TimeUtils::MiliseconsNow(); 94 | //cv::Mat imgOut; 95 | //cv::imshow("gray", gray); 96 | 97 | segmenter->getLetterCandidates( gray, img1_keypoints, keypointsPixels, letters ); 98 | std::cout << "Segmented: " << letters.size() << "/" << segmenter->getLetterCandidates().size() << std::endl; 99 | lettersTotal += letters.size(); 100 | segmentationTime += TimeUtils::MiliseconsNow() - start; 101 | clsTime += segmenter->getClassificationTime(); 102 | 103 | std::vector textLines; 104 | textLineDetector.findTextLines(gray, segmenter->getLetterCandidates(), ftDetector->getScales(), textLines); 105 | #ifdef VERBOSE 106 | cv::Mat lineImage = img.clone(); 107 | for(size_t i = 0; i < textLines.size(); i++){ 108 | FTextLine& line = textLines[i]; 109 | cv::RotatedRect rr = line.getMinAreaRect(segmenter->getLetterCandidates()); 110 | 111 | cv::Scalar c(255, 0, 0); 112 | cv::Point2f rect_points[4]; rr.points( rect_points ); 113 | cv::line(lineImage, rect_points[0], rect_points[1], c, 1); 114 | cv::line(lineImage, rect_points[1], rect_points[2], c, 1); 115 | cv::line(lineImage, rect_points[2], rect_points[3], c, 1); 116 | cv::line(lineImage, rect_points[3], rect_points[0], c, 1); 117 | 118 | } 119 | cv::imshow("textLines", lineImage); 120 | cv::waitKey(0); 121 | #endif 122 | 123 | } 124 | } 125 | std::cout << "Total keypoints time: " << keypointsTime << std::endl; 126 | std::cout << "Total segmentation time: " << segmentationTime << std::endl; 127 | std::cout << "Cls time: " << clsTime / (cv::getTickFrequency()) * 1000 << std::endl; 128 | 129 | std::cout << "Keypoints total: " << keypointsTotal << std::endl; 130 | std::cout << "Letters total: " << lettersTotal << std::endl; 131 | 132 | } 133 | -------------------------------------------------------------------------------- /src/segm/flood_fill.h: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * Software License Agreement (BSD License) 3 | * 4 | * Copyright (c) 2009, Willow Garage, Inc. 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above 14 | * copyright notice, this list of conditions and the following 15 | * disclaimer in the documentation and/or other materials provided 16 | * with the distribution. 17 | * * Neither the name of the Willow Garage nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 | * POSSIBILITY OF SUCH DAMAGE. 33 | *********************************************************************/ 34 | 35 | #ifndef SRC_SEGM_FLOOD_FILL_H_ 36 | #define SRC_SEGM_FLOOD_FILL_H_ 37 | 38 | #include 39 | 40 | #include 41 | 42 | namespace cmp{ 43 | 44 | typedef struct CvFFillSegment 45 | { 46 | ushort y; 47 | ushort l; 48 | ushort r; 49 | ushort prevl; 50 | ushort prevr; 51 | short dir; 52 | } 53 | CvFFillSegment; 54 | 55 | int floodFill( std::vector& buffer, cv::InputOutputArray _imageId, cv::InputOutputArray _image, cv::Point seedPoint, int channel, double scaleFactor, 56 | int& compCounter, long threshold, int maxSize, int minCompSize, cv::Mat& segmImg, cv::Mat& segmMap, cv::Rect& rect, int& area, std::unordered_map& keypointHash, std::vector& keypointIds, 57 | bool resegment, bool gradFill, int srcCols, 58 | cv::Scalar loDiff = cv::Scalar(), cv::Scalar upDiff = cv::Scalar()); 59 | 60 | }//namespace cmp 61 | 62 | #endif /* SRC_SEGM_FLOOD_FILL_H_ */ 63 | -------------------------------------------------------------------------------- /src/segm/segmentation.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * segmentaion.cpp 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * Copyright (c) 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * Based on: 20 | * 21 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 22 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 23 | */ 24 | #include "segmentation.h" 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | #include "FASTex.hpp" 35 | 36 | #define BORDER_SIZE 5 37 | 38 | using namespace std; 39 | 40 | namespace cmp{ 41 | 42 | cv::Point LetterCandidate::getCentroid() 43 | { 44 | if(centroid.x == 0) 45 | { 46 | cv::Moments m = cv::moments(this->mask, true); 47 | centroid = cv::Point((int) cvRound(bbox.x + ( m.m10 / m.m00 ) * this->scaleFactor ), (int) cvRound(bbox.y + (m.m01 / m.m00 ) * this->scaleFactor)); 48 | } 49 | return centroid; 50 | } 51 | 52 | cv::Point LetterCandidate::getConvexCentroid() 53 | { 54 | if(convexCentroid.x == 0) 55 | { 56 | if(this->cHullPoints.size() > 3){ 57 | cv::Moments m = cv::moments(this->cHullPoints); 58 | convexCentroid = cv::Point((int) cvRound(m.m10 / m.m00), (int) cvRound(m.m01 / m.m00)); 59 | assert(convexCentroid.x > 0 && convexCentroid.y > 0); 60 | }else{ 61 | convexCentroid = cv::Point(bbox.x + bbox.width / 2, bbox.y + bbox.height / 2); 62 | } 63 | 64 | } 65 | return convexCentroid; 66 | } 67 | 68 | float LetterCandidate::getStrokeAreaRatio(std::vector& img1_keypoints, std::vector& scales, std::unordered_map > > >& keypointStrokes) 69 | { 70 | if( strokeAreaRatio != -1) 71 | return strokeAreaRatio; 72 | cv::Mat tmp = cv::Mat::zeros(mask.rows, mask.cols, CV_8UC1); 73 | for( auto kpid : keypointIds ) 74 | { 75 | cmp::FastKeyPoint& kp = img1_keypoints[kpid]; 76 | if( kp.type != this->keyPoint.type ) 77 | continue; 78 | if( abs(kp.octave - this->keyPoint.octave) > 2 ) 79 | continue; 80 | int radius = 2 / scales[kp.octave] / this->scaleFactor; 81 | double sf = 1.0 / scales[kp.octave]; 82 | cv::Scalar color( 255, 255, 255 ); 83 | if( kp.count == 5) 84 | { 85 | cv::circle(tmp, cv::Point((kp.pt.x - bbox.x) / this->scaleFactor, (kp.pt.y - bbox.y) / this->scaleFactor), radius, color, -1); 86 | } 87 | else 88 | { 89 | std::vector > >& storkeDirections = keypointStrokes[kpid]; 90 | for( auto strokes : storkeDirections ) 91 | { 92 | int thickness = kp.count / scales[kp.octave] / this->scaleFactor; 93 | thickness = MAX(1, thickness); 94 | thickness = MIN(255, thickness); 95 | for( auto sd : strokes) 96 | { 97 | 98 | cv::line( tmp, 99 | cv::Point(roundf((sd->center.x * sf - bbox.x) / this->scaleFactor), roundf((sd->center.y * sf - bbox.y) / this->scaleFactor)) , 100 | cv::Point(roundf((sd->direction.x * sf - bbox.x) / this->scaleFactor), roundf((sd->direction.y * sf - bbox.y) / this->scaleFactor)), 101 | color, thickness ); 102 | } 103 | } 104 | } 105 | 106 | } 107 | 108 | cv::Mat strokeArea; 109 | cv::bitwise_and( tmp, mask, strokeArea); 110 | int pixels = countNonZero(strokeArea); 111 | strokeAreaRatio = pixels / (float) countNonZero(mask); 112 | /* 113 | cv::imshow("mask", mask); 114 | cv::imshow("tmp", tmp); 115 | cv::imshow("strokeArea", strokeArea); 116 | cv::waitKey(0); 117 | */ 118 | return strokeAreaRatio; 119 | 120 | } 121 | 122 | float LetterCandidate::getStrokeAreaRatio(std::vector& img1_keypoints, std::unordered_multimap >& keypointsPixels) 123 | { 124 | if( strokeAreaRatio != -1) 125 | return strokeAreaRatio; 126 | std::pair >::iterator, std::unordered_multimap>::iterator> ret; 127 | strokeArea = 0; 128 | for( auto kpid : keypointIds ) 129 | { 130 | cmp::FastKeyPoint& kp = img1_keypoints[kpid]; 131 | if( kp.octave != this->keyPoint.octave) 132 | continue; 133 | ret = keypointsPixels.equal_range(kp.class_id); 134 | strokeArea += std::distance(ret.first, ret.second); 135 | } 136 | strokeAreaRatio = strokeArea / (float) this->area; 137 | return strokeAreaRatio; 138 | } 139 | 140 | 141 | bool LetterCandidate::contains(LetterCandidate& other ) 142 | { 143 | return bbox.contains(other.bbox.tl()) && bbox.contains(other.bbox.br()); 144 | } 145 | 146 | // helper function: 147 | // finds a cosine of angle between vectors 148 | // from pt0->pt1 and from pt0->pt2 149 | static double angleP( cv::Point pt1, cv::Point pt2, cv::Point pt0 ) 150 | { 151 | double dx1 = pt1.x - pt0.x; 152 | double dy1 = pt1.y - pt0.y; 153 | double dx2 = pt2.x - pt0.x; 154 | double dy2 = pt2.y - pt0.y; 155 | return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10); 156 | } 157 | 158 | bool LetterCandidate::isConvex() 159 | { 160 | return cv::isContourConvex(contoursAp[0]); 161 | } 162 | 163 | 164 | bool LetterCandidate::isRect() 165 | { 166 | if( contoursAp[0].size() == 4) 167 | { 168 | double maxCosine = 0; 169 | for( int j = 2; j < 5; j++ ) 170 | { 171 | // find the maximum cosine of the angle between joint edges 172 | double cosine = fabs(angleP(contoursAp[0][j%4], contoursAp[0][j-2], contoursAp[0][j-1])); 173 | maxCosine = MAX(maxCosine, cosine); 174 | } 175 | 176 | // if cosines of all angles are small 177 | // (all angles are ~90 degree) then write quandrange 178 | // vertices to resultant sequence 179 | if( maxCosine < 0.3 ) 180 | return true; 181 | } 182 | return false; 183 | } 184 | 185 | cv::Mat LetterCandidate::createChildsImage(const cv::Mat& image, std::vector& letterCandidates) 186 | { 187 | cv::Mat tmp = cv::Mat::zeros(image.rows, image.cols, CV_8UC1); 188 | 189 | 190 | for( std::set::iterator it = childs.begin(); it != childs.end(); it++ ) 191 | { 192 | LetterCandidate& ref1 = letterCandidates[*it]; 193 | cv::Rect rootRect = cv::Rect(ref1.bbox.x, ref1.bbox.y, ref1.bbox.width, ref1.bbox.height); 194 | cv::rectangle(tmp, rootRect, cv::Scalar(255, 0, 0)); 195 | cv::Mat mask = ref1.mask; 196 | if( ref1.scaleFactor != 1) 197 | { 198 | cv::resize(mask, mask, cv::Size(ref1.bbox.width, ref1.bbox.height)); 199 | } 200 | if( (rootRect.x + rootRect.width) >= tmp.cols ) 201 | continue; 202 | if( (rootRect.y + rootRect.height) >= tmp.rows ) 203 | continue; 204 | if( rootRect.width != mask.cols || rootRect.height != mask.rows ) 205 | continue; 206 | cv::bitwise_or(tmp(rootRect), mask, tmp(rootRect)); 207 | for(auto itj : ref1.duplicates) 208 | { 209 | LetterCandidate& refd = letterCandidates[itj]; 210 | rootRect = cv::Rect(refd.bbox.x, refd.bbox.y, refd.bbox.width, refd.bbox.height); 211 | mask = refd.mask; 212 | if( refd.scaleFactor != 1) 213 | { 214 | cv::resize(mask, mask, cv::Size(ref1.bbox.width, ref1.bbox.height)); 215 | } 216 | if( (rootRect.x + rootRect.width) >= tmp.cols ) 217 | continue; 218 | if( (rootRect.y + rootRect.height) >= tmp.rows ) 219 | continue; 220 | if( rootRect.width != mask.cols || rootRect.height != mask.rows ) 221 | continue; 222 | cv::bitwise_or(tmp(rootRect), mask, tmp(rootRect)); 223 | } 224 | } 225 | tmp = ~tmp; 226 | cv::cvtColor(tmp, tmp, cv::COLOR_GRAY2BGR); 227 | cv::rectangle(tmp, this->bbox, cv::Scalar(0, 255, 0)); 228 | if( tmp.cols > 1024) 229 | cv::resize(tmp, tmp, cv::Size(tmp.cols / 2, tmp.rows / 2)); 230 | return tmp; 231 | } 232 | 233 | cv::Mat LetterCandidate::generateStrokeWidthMap(std::vector& img1_keypoints, std::vector& scales, std::unordered_map > > >& keypointStrokes) 234 | { 235 | cv::Mat tmp = this->mask.clone(); 236 | cv::cvtColor(tmp, tmp, cv::COLOR_GRAY2BGR); 237 | 238 | for( auto kpid : keypointIds ) 239 | { 240 | cmp::FastKeyPoint& kp = img1_keypoints[kpid]; 241 | if( abs(kp.octave - this->keyPoint.octave) > 2 ) 242 | continue; 243 | int radius = 2 / scales[kp.octave] / this->scaleFactor; 244 | double sf = 1.0 / scales[kp.octave]; 245 | cv::Scalar color( 0, 0, 255 ); 246 | if( kp.count == 5) 247 | { 248 | color = cv::Scalar(255, 0, 0); 249 | cv::circle(tmp, cv::Point(roundf((kp.pt.x - bbox.x) / this->scaleFactor), roundf((kp.pt.y - bbox.y) / this->scaleFactor)), radius, color, -1); 250 | } 251 | else 252 | { 253 | std::vector > >& storkeDirections = keypointStrokes[kpid]; 254 | for( auto strokes : storkeDirections ) 255 | { 256 | int thickness = kp.count / scales[kp.octave] / this->scaleFactor; 257 | thickness = MAX(1, thickness); 258 | thickness = MIN(255, thickness); 259 | for( auto sd : strokes) 260 | { 261 | 262 | cv::line( tmp, cv::Point(roundf((sd->center.x * sf - bbox.x) / this->scaleFactor), roundf((sd->center.y * sf - bbox.y) / this->scaleFactor)) , 263 | cv::Point(roundf((sd->direction.x * sf - bbox.x) / this->scaleFactor), roundf((sd->direction.y * sf - bbox.y) / this->scaleFactor)), color, thickness ); 264 | } 265 | } 266 | //cv::circle(tmp, cv::Point((kp.pt.x - bbox.x) / this->scaleFactor, (kp.pt.y - bbox.y) / this->scaleFactor), 1, cv::Scalar(0, 255, 0), -1); 267 | //cv::circle(tmp, cv::Point((kp.intensityMin.x - bbox.x) / this->scaleFactor, (kp.intensityMin.y - bbox.y) / this->scaleFactor), 1, cv::Scalar(0, 255, 0), -1); 268 | } 269 | 270 | } 271 | return tmp; 272 | } 273 | 274 | cv::Mat LetterCandidate::generateKeypointImg(const cv::Mat& img, std::vector& img1_keypoints, std::unordered_multimap >& keypointsPixels) 275 | { 276 | cv::Mat tmp = this->mask.clone(); 277 | cv::cvtColor(tmp, tmp, cv::COLOR_GRAY2BGR); 278 | 279 | std::pair >::iterator, std::unordered_multimap>::iterator> ret; 280 | cv::Scalar color(0, 255, 0); 281 | for( auto kpid : keypointIds ) 282 | { 283 | cmp::FastKeyPoint& kp = img1_keypoints[kpid]; 284 | if( kp.octave != this->keyPoint.octave) 285 | continue; 286 | ret = keypointsPixels.equal_range(kp.class_id); 287 | for (std::unordered_multimap >::iterator it=ret.first; it!=ret.second; it++) 288 | { 289 | assert(it->second.first * this->scaleFactor < img.cols); 290 | assert(it->second.second * this->scaleFactor <= (img.rows + 5)); 291 | cv::circle(tmp, cv::Point((it->second.first - this->bbox.x / this->scaleFactor) , (it->second.second - bbox.y / this->scaleFactor)), 1, color); 292 | } 293 | } 294 | return tmp; 295 | } 296 | 297 | float getMinAnglesDiff(float& angle1, float& angle2) 298 | { 299 | float dif0 = fabs(angle1 - angle2); 300 | if( angle1 > 45 && angle2 < 45 ) 301 | { 302 | float angle1N = angle1 - 180; 303 | float dif1 = fabs(angle1 - angle2); 304 | if( angle2 < -45 ) 305 | { 306 | float angle2N = angle2 + 180; 307 | float dif2 = fabs(angle1N - angle2N); 308 | if(dif2 < dif0 && dif2 < dif1) 309 | { 310 | angle2 = angle2N; 311 | angle1 = angle1N; 312 | return dif2; 313 | } 314 | } 315 | if(dif1 < dif0) 316 | { 317 | angle1 = angle1N; 318 | return dif1; 319 | } 320 | 321 | }else if( angle2 > 45 && angle1 < 45 ) 322 | { 323 | float angle2N = angle2 - 180; 324 | float dif1 = fabs(angle1 - angle2N); 325 | 326 | if( angle1 < -45 ) 327 | { 328 | float angle1N = angle1 + 180; 329 | float dif2 = fabs(angle1N - angle2N); 330 | if(dif2 < dif0 && dif2 < dif1) 331 | { 332 | angle2 = angle2N; 333 | angle1 = angle1N; 334 | return dif2; 335 | } 336 | } 337 | if(dif1 < dif0) 338 | { 339 | angle2 = angle2N; 340 | return dif1; 341 | } 342 | 343 | } 344 | return dif0; 345 | } 346 | 347 | double constrainAngle(double x) 348 | { 349 | x = fmod(x + 180,360); 350 | if (x < 0) 351 | x += 360; 352 | return x - 180; 353 | } 354 | 355 | static void transformPoint(const cv::Mat& affMat, const cv::Point& input, cv::Rect& bbox, cv::Rect& maxRect, cv::Point& output) 356 | { 357 | int x = input.x + bbox.x - maxRect.x; 358 | int y = input.y + bbox.y - maxRect.y; 359 | 360 | float ytr = x * affMat.at(0, 1) + y * affMat.at(1, 1) + affMat.at(2, 1); 361 | float xtr = x * affMat.at(0, 0) + y * affMat.at(1, 0) + affMat.at(2, 0); 362 | output.x = xtr; 363 | output.y = ytr; 364 | } 365 | 366 | }//namespace cmp 367 | 368 | -------------------------------------------------------------------------------- /src/segm/segmentation.h: -------------------------------------------------------------------------------- 1 | /* 2 | * segmentation.h 3 | * 4 | * Created on: Dec 15, 2015 5 | * Author: Michal.Busta at gmail.com 6 | * 7 | * Copyright (c) 2015, Michal Busta, Lukas Neumann, Jiri Matas. 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation; either version 2 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * Based on: 20 | * 21 | * FASText: Efficient Unconstrained Scene Text Detector,Busta M., Neumann L., Matas J.: ICCV 2015. 22 | * Machine learning for high-speed corner detection, E. Rosten and T. Drummond, ECCV 2006 23 | */ 24 | #ifndef FASTTEXT_SRC_SEGMENTATION_H_ 25 | #define FASTTEXT_SRC_SEGMENTATION_H_ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include "KeyPoints.h" 33 | #include "flood_fill.h" 34 | 35 | #ifndef M_PI 36 | #define M_PI 3.14159265358979323846 37 | #endif 38 | 39 | 40 | #define MAX_COMP_SIZE 150 41 | 42 | namespace cmp{ 43 | 44 | class StrokeDir { 45 | public: 46 | cv::Point center; 47 | cv::Point direction; 48 | long threshold; 49 | int idx; 50 | 51 | StrokeDir(int idx, long threshold, cv::Point center = cv::Point(), cv::Point direction = cv::Point()) : idx(idx), center(center), direction(direction), threshold(threshold) 52 | { 53 | 54 | } 55 | }; 56 | 57 | class LetterCandidate{ 58 | 59 | public: 60 | 61 | LetterCandidate(cv::Mat mask = cv::Mat(), cv::Rect bbox = cv::Rect(), cv::Scalar cornerPixel = cv::Scalar(), cv::Scalar meanInk = cv::Scalar(), 62 | int area = 0, cmp::FastKeyPoint keyPoint = FastKeyPoint(), int projection = 0, float scaleFactor = 1.0, 63 | cv::Point centroid = cv::Point(), float angle = 0, int hullPoints = 0, float quality = 0): 64 | mask(mask), bbox(bbox), area(area), angle(angle), hullPoints(hullPoints), keyPoint(keyPoint), scaleFactor(scaleFactor), quality(quality), 65 | duplicate(-1), outputOrder(-1), pointsScaled(false), projection(projection), centroid(centroid), cornerPixel(cornerPixel), meanInk(meanInk) { 66 | 67 | merged = false; 68 | isValid = true; 69 | } 70 | 71 | bool contains(LetterCandidate& other ); 72 | 73 | void setDuplicate( LetterCandidate& other, int refComp, int thisComp ){ 74 | assert(refComp != -1); 75 | assert(this->duplicate == -1); 76 | this->duplicate = refComp; 77 | other.duplicates.push_back(thisComp); 78 | other.parents.insert(this->parents.begin(), this->parents.end()); 79 | other.childs.insert(this->childs.begin(), this->childs.end()); 80 | for(auto it = neibours.begin(); it != neibours.end(); it++) 81 | other.addNeibour(*it); 82 | } 83 | 84 | void addNeibour(int refComp){ 85 | if(neibours.find(refComp) == neibours.end()) 86 | { 87 | neibours.insert(refComp); 88 | } 89 | } 90 | 91 | void addChild(int refComp, std::vector& letterCandidates, int refComp2){ 92 | 93 | for( auto pid : parents) 94 | { 95 | letterCandidates[pid].childs.insert(refComp); 96 | //.addChild(refComp, letterCandidates, refComp2); 97 | } 98 | this->childs.insert(refComp); 99 | letterCandidates[refComp].parents.insert(refComp2); 100 | 101 | } 102 | 103 | cv::Mat createChildsImage(const cv::Mat& image, std::vector& letterCandidates); 104 | 105 | cv::Mat generateStrokeWidthMap(std::vector& img1_keypoints, std::vector& scales, std::unordered_map > > >& keypointStrokes); 106 | 107 | cv::Mat generateKeypointImg(const cv::Mat& img, std::vector& img1_keypoints, std::unordered_multimap >& keypointsPixels); 108 | 109 | bool isConvex(); 110 | bool isRect(); 111 | cv::Point getCentroid(); 112 | cv::Point getConvexCentroid(); 113 | 114 | float getStrokeAreaRatioP(){ 115 | return strokeAreaRatio; 116 | } 117 | 118 | float getStrokeAreaRatio(std::vector& img1_keypoints, std::vector& scales, std::unordered_map > > >& keypointStrokes); 119 | 120 | float getStrokeAreaRatio(std::vector& img1_keypoints, std::unordered_multimap >& keypointsPixels); 121 | 122 | inline void scalePoints(){ 123 | if(pointsScaled) 124 | return; 125 | for( size_t i = 0; i < cHullPoints.size(); i++) 126 | { 127 | cHullPoints[i].x = bbox.x + round(cHullPoints[i].x * scaleFactor); 128 | cHullPoints[i].y = bbox.y + round(cHullPoints[i].y * scaleFactor); 129 | } 130 | area *= scaleFactor * scaleFactor; 131 | rotatedRect.size.width *= scaleFactor; 132 | rotatedRect.size.height *= scaleFactor; 133 | pointsScaled = true; 134 | } 135 | 136 | cv::Mat mask; 137 | cv::Rect bbox; 138 | cv::RotatedRect rotatedRect; 139 | float angle; 140 | int hullPoints; 141 | int area; 142 | int strokeArea = 1; 143 | float convexHullArea = 0; 144 | float featuresArea = 0; 145 | float quality; 146 | bool isWord = false; 147 | float scaleFactor; 148 | bool merged; 149 | int groupAssigned = -1; 150 | cmp::FastKeyPoint keyPoint; 151 | std::vector > contours; 152 | std::vector > contoursAp; 153 | std::vector hierarchy; 154 | std::vector cHullPoints; 155 | std::set parents; 156 | std::set childs; 157 | std::set neibours; 158 | std::vector duplicates; 159 | int duplicate; 160 | 161 | std::vector leftGroups; 162 | std::vector rightGroups; 163 | 164 | cv::Scalar intensityOut; 165 | cv::Scalar intensityInt; 166 | 167 | std::vector keypointIds; 168 | 169 | //std::set groups; 170 | int outputOrder; 171 | bool pointsScaled; 172 | int projection; 173 | bool isValid; 174 | 175 | cv::Scalar cornerPixel; 176 | cv::Scalar meanInk; 177 | 178 | int meanStrokeWidth = 0; 179 | 180 | std::vector textHypotheses; 181 | std::vector textHypothesesConfidences; 182 | 183 | float strokeAreaRatio = -1; 184 | 185 | int mergedKeypoints = 0; 186 | 187 | bool anglesFiltered = false; 188 | 189 | int gid = -1; 190 | 191 | cv::Mat featureVector; 192 | 193 | private: 194 | 195 | cv::Point centroid; 196 | cv::Point convexCentroid; 197 | 198 | }; 199 | 200 | }//namespace cmp 201 | 202 | #endif /* FASTTEXT_SRC_SEGMENTATION_H_ */ 203 | -------------------------------------------------------------------------------- /src/vis/componentsVis.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * componentsVis.cpp 3 | * 4 | * Created on: Dec 4, 2015 5 | * Author: Michal Busta at gmail.com 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "componentsVis.h" 13 | 14 | using namespace cv; 15 | 16 | namespace cmp{ 17 | 18 | static void drawCharacter(const LetterCandidate& region, Mat& output, CvMemStorage* storage, Mat& green, Mat& red, cv::Mat& blue ) 19 | { 20 | if (region.bbox.br().x >= output.cols || region.bbox.br().y >= output.rows) 21 | return; 22 | 23 | 24 | Mat maskImage = region.mask; 25 | if( region.scaleFactor != 1.0) 26 | { 27 | cv::Mat scaledMask; 28 | cv::resize(maskImage, scaledMask, cv::Size(roundf(maskImage.cols * region.scaleFactor), roundf(maskImage.rows * region.scaleFactor))); 29 | maskImage = scaledMask; 30 | }else{ 31 | maskImage = region.mask.clone(); 32 | } 33 | 34 | IplImage iplContours = maskImage; 35 | CvSeq *contour; 36 | cvFindContours( &iplContours, storage, &contour, sizeof(CvContour), 37 | CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, cvPoint(0, 0) ); 38 | 39 | cv::Rect roid = region.bbox; 40 | roid.width += 1; 41 | roid.height += 1; 42 | Mat roi = output(roid); 43 | try{ 44 | cv::Mat iplMask = output(region.bbox); 45 | if( !region.isWord ) 46 | { 47 | if( region.quality > 0.5 ) 48 | cv::add((1 - region.quality) * iplMask, region.quality * 0.3 * green(region.bbox), iplMask, maskImage); 49 | else 50 | cv::add((region.quality) * iplMask, (1 - region.quality) * 0.3 * blue(region.bbox), iplMask, maskImage); 51 | }else{ 52 | cv::add((1 - region.quality) * iplMask, region.quality * 0.3 * red(region.bbox), iplMask, maskImage); 53 | } 54 | }catch(...){ 55 | std::cout << "Roi: " << region.bbox << ", cols: " << maskImage.cols << ", rows: " << maskImage.rows << std::endl; 56 | } 57 | //cvDrawContours( &iplMask, contour, color, CvScalar(), 1); 58 | } 59 | 60 | 61 | Mat createCSERImage(std::vector& regions, const std::vector& keypoints, std::unordered_multimap >& keypointsPixels, const Mat& sourceImage) 62 | { 63 | Mat greyImage; 64 | if(sourceImage.channels() == 3) 65 | { 66 | cvtColor(sourceImage, greyImage, CV_RGB2GRAY); 67 | }else{ 68 | greyImage = sourceImage; 69 | } 70 | sort(regions.begin(), regions.end(), 71 | [](const LetterCandidate * a, const LetterCandidate * b) -> bool 72 | { 73 | if( a->isWord != b->isWord ) 74 | { 75 | if( !a->isWord ) 76 | return false; 77 | else 78 | return true; 79 | } 80 | return a->quality < b->quality; 81 | }); 82 | 83 | 84 | 85 | Mat output; 86 | cvtColor(greyImage, output, CV_GRAY2RGB); 87 | 88 | RNG rng(12345); 89 | for (vector::const_iterator j = regions.begin(); j < regions.end(); j++) 90 | { 91 | Scalar color = Scalar(rng.uniform(0,255), rng.uniform(0, 255), rng.uniform(0, 255)); 92 | for(auto kpid : (*j)->keypointIds) 93 | { 94 | const cmp::FastKeyPoint& kp = keypoints[kpid]; 95 | if(kp.octave != (*j)->keyPoint.octave) 96 | continue; 97 | std::pair >::iterator, std::unordered_multimap>::iterator> ret; 98 | ret = keypointsPixels.equal_range(kp.class_id); 99 | //if( keypointsPixels.size() > 0) 100 | // assert( std::distance(ret.first, ret.second) > 0 ); 101 | for (std::unordered_multimap >::iterator it=ret.first; it!=ret.second; ++it) 102 | { 103 | cv::circle(output, cv::Point(it->second.first * (*j)->scaleFactor, it->second.second * (*j)->scaleFactor), 1 * (*j)->scaleFactor, color); 104 | } 105 | } 106 | } 107 | 108 | CvMemStorage* storage = cvCreateMemStorage(); 109 | Mat green = Mat(sourceImage.size(), CV_8UC3, CV_RGB(0, 255, 0)); 110 | Mat red = Mat(sourceImage.size(), CV_8UC3, CV_RGB(0, 0, 255)); 111 | Mat blue = Mat(sourceImage.size(), CV_8UC3, CV_RGB(255, 0, 0)); 112 | for (vector::const_iterator j = regions.begin(); j < regions.end(); j++) 113 | { 114 | if( (*j)->quality < 0.1 ) 115 | continue; 116 | 117 | if( (*j)->duplicate != -1 ) 118 | continue; 119 | 120 | //std::cout << "q: " << (*j)->quality << ", " << (*j)->isWord << std::endl; 121 | drawCharacter(**j, output, storage, green, red, blue); 122 | } 123 | 124 | 125 | cvReleaseMemStorage(&storage); 126 | 127 | return output; 128 | 129 | } 130 | 131 | }//namespace cmp 132 | -------------------------------------------------------------------------------- /src/vis/componentsVis.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../segm/segmentation.h" 3 | #include "../Segmenter.h" 4 | 5 | namespace cmp{ 6 | 7 | cv::Mat createCSERImage(std::vector& regions, const std::vector& keypoints, std::unordered_multimap >& keypointsPixels, const cv::Mat& sourceImage); 8 | 9 | 10 | }//namespace cmp 11 | -------------------------------------------------------------------------------- /tools/.gitignore: -------------------------------------------------------------------------------- 1 | /ft.pyc 2 | /icdarUtils.pyc 3 | /utls.pyc 4 | /vis.pyc 5 | -------------------------------------------------------------------------------- /tools/ft.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Sep 15, 2015 3 | 4 | @author: busta 5 | ''' 6 | 7 | import os, sys 8 | 9 | baseDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 10 | print baseDir 11 | #sys.path.append(os.path.join(baseDir, "Release/bin" )) 12 | sys.path.append(os.path.join(baseDir, "Release" )) 13 | 14 | sys.path.append("/Users/flipajs/Downloads/temp2/FASText/Release") 15 | #sys.path.append(os.path.join(baseDir, "Debug" )) 16 | 17 | import numpy as np 18 | 19 | import ftext 20 | import math 21 | 22 | defCharClsModel = '{0}/cvBoostChar.xml'.format(baseDir) 23 | 24 | class FASTex(object): 25 | ''' 26 | classdocs 27 | ''' 28 | 29 | 30 | def __init__(self, charClsModelFile = defCharClsModel, scaleFactor = 1.6, nlevels = -1, edgeThreshold = 15, keypointTypes = 3, kMin = 9, kMax = 11, erode = 0, segmentGrad = 0, 31 | minCompSize = 0, segmDeltaInt = 1): 32 | ''' 33 | Constructor 34 | ''' 35 | ftext.init(scaleFactor, nlevels, edgeThreshold, keypointTypes, kMin, kMax, charClsModelFile, erode, segmentGrad, minCompSize, 0, 1.0, segmDeltaInt) 36 | 37 | self.edgeThreshold = edgeThreshold 38 | 39 | def findKeypoints(self, img, outputDir, baseName): 40 | return ftext.findKeyPoints(img, outputDir, baseName) 41 | 42 | 43 | def getCharSegmentations(self, img, outputDir='', baseName=''): 44 | ''' 45 | @param img - the source image (numpy arry) 46 | @param outputDir - the debug directory for visualizations 47 | @param baseName 48 | 49 | returns the np array where row is: [bbox.x, bbox.y, bbox.width, bbox.height, keyPoint.pt.x, keyPoint.pt.y, octave, ?, duplicate, quality, [keypointsIds]] 50 | ''' 51 | return ftext.getCharSegmentations(img, outputDir, baseName) 52 | 53 | def findTextLines(self, outputDir='', baseName=''): 54 | ''' 55 | @param outputDir - the debug directory for visualizations 56 | @param baseName 57 | 58 | returns the np array where row is: [bbox.x, bbox.y, bbox.width, bbox.height, rotated rectangle points (pt1.x, pt1.y, ... pt3.y) ] 59 | ''' 60 | return ftext.findTextLines(outputDir, baseName) 61 | 62 | def getNormalizedLine(self, lineNo): 63 | ''' 64 | @param lineNo - the id of line - row in np array from findTextLines 65 | 66 | returns the line image normalized against the rotation 67 | ''' 68 | return ftext.getNormalizedLine(lineNo) 69 | 70 | def getLastDetectionKeypoints(self): 71 | return ftext.getLastDetectionKeypoints() 72 | 73 | def getImageAtScale(self, scale): 74 | return ftext.getImageAtScale(scale) 75 | 76 | def getImageScales(self): 77 | return ftext.getImageScales() 78 | 79 | def getDetectionStat(self): 80 | return ftext.getDetectionStat() 81 | 82 | def getLastDetectionOrbKeypoints(self): 83 | return ftext.getLastDetectionOrbKeypoints() 84 | 85 | def getSegmentationMask(self, maskNo): 86 | return ftext.getSegmentationMask(maskNo) 87 | 88 | def saveKeypints(self, keypoints, outFile): 89 | 90 | keypointSegments = {} 91 | for i in range(keypoints.shape[0]): 92 | strokes = ftext.getKeypointStrokes(i) 93 | keypointSegments[i] = strokes 94 | 95 | np.savez(outFile, keypoints=keypoints, keypointSegments = keypointSegments) 96 | 97 | -------------------------------------------------------------------------------- /tools/segmentation.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Jan 8, 2015 3 | 4 | @author: busta 5 | ''' 6 | 7 | import numpy as np 8 | import cv2 9 | import sys 10 | from ft import FASTex 11 | from vis import draw_keypoints 12 | 13 | if __name__ == '__main__': 14 | 15 | outputDir = '/tmp' 16 | edgeThreshold = 13 17 | 18 | ft = FASTex(edgeThreshold= edgeThreshold, nlevels=-1, minCompSize = 4) 19 | 20 | imgName = '/home/busta/Desktop/cutr.png' 21 | 22 | if len(sys.argv) > 1: 23 | if sys.argv[1].endswith(".png") or sys.argv[1].endswith(".jpg"): 24 | imgName = sys.argv[1] 25 | 26 | img = cv2.imread(imgName, 0) 27 | imgc = cv2.imread(imgName) 28 | 29 | #print(out) 30 | segmentations = ft.getCharSegmentations(img, outputDir, 'base') 31 | print segmentations 32 | for i in range(segmentations.shape[0]): 33 | rectn = segmentations[i, :] 34 | rectn[2] += rectn[0] 35 | rectn[3] += rectn[1] 36 | 37 | mask = ft.getSegmentationMask(i) 38 | 39 | ''' 40 | for i in range(lines.shape[0]): 41 | line = lines[i] 42 | if line[25] == 0: 43 | continue 44 | lineSegm = ft.getNormalizedLine(i) 45 | cv2.imshow("ts", lineSegm) 46 | cv2.waitKey(0) 47 | ''' 48 | 49 | keypoints = ft.getLastDetectionKeypoints() 50 | draw_keypoints(imgc, keypoints, edgeThreshold, inter = True, color = 0) 51 | 52 | 53 | while imgc.shape[1] > 1024: 54 | shape = img.shape 55 | shapet = ( shape[0] / 2, shape[1] / 2) 56 | dst = np.zeros(shapet, dtype=np.uint8) 57 | dst = cv2.resize(imgc, (0,0), fx=0.5, fy=0.5) 58 | imgc = dst 59 | 60 | 61 | pass 62 | -------------------------------------------------------------------------------- /tools/testLines.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Jan 8, 2015 3 | 4 | @author: busta 5 | ''' 6 | 7 | import numpy as np 8 | import cv2 9 | import sys 10 | from ft import FASTex 11 | from vis import draw_keypoints 12 | 13 | if __name__ == '__main__': 14 | 15 | outputDir = '/tmp' 16 | edgeThreshold = 13 17 | 18 | ft = FASTex(edgeThreshold= edgeThreshold, nlevels=-1, minCompSize = 4) 19 | 20 | imgName = '/datagrid/personal/TextSpotter/evaluation-sets/bornDigital/img_100.png' 21 | 22 | if len(sys.argv) > 1: 23 | if sys.argv[1].endswith(".png") or sys.argv[1].endswith(".jpg"): 24 | imgName = sys.argv[1] 25 | 26 | img = cv2.imread(imgName, 0) 27 | imgc = cv2.imread(imgName) 28 | 29 | #print(out) 30 | segmentations = ft.getCharSegmentations(img, outputDir, 'base') 31 | print segmentations 32 | for i in range(segmentations.shape[0]): 33 | rectn = segmentations[i, :] 34 | rectn[2] += rectn[0] 35 | rectn[3] += rectn[1] 36 | 37 | lines = ft.findTextLines(outputDir, 'base') 38 | 39 | 40 | for i in range(lines.shape[0]): 41 | line = lines[i] 42 | lineSegm = ft.getNormalizedLine(i) 43 | cv2.imshow("textLine", lineSegm) 44 | cv2.waitKey(0) 45 | -------------------------------------------------------------------------------- /tools/trainCharFeatures.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Dec 3, 2014 3 | 4 | @author: busta 5 | ''' 6 | import matplotlib.pyplot as plt 7 | 8 | import sys, os 9 | import numpy as np 10 | import cv2 11 | import utls 12 | import utils 13 | 14 | 15 | baseDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 16 | print baseDir 17 | #sys.path.append(os.path.join(baseDir, "Release/bin" )) 18 | sys.path.append(os.path.join(baseDir, "Release" )) 19 | 20 | 21 | import ftext 22 | import glob 23 | import pylab 24 | import datetime 25 | from collections import defaultdict 26 | from icdarUtils import computeWordOvelap 27 | 28 | from ft import FASTex 29 | 30 | 31 | MIN_SEGM_OVRLAP = 0.6 32 | evalPunctuation = False 33 | 34 | def init_ftext(minCompSize = 5): 35 | 36 | scaleFactor = 1.6 37 | nleves = -1 38 | edgeThreshold = 13 39 | keypointTypes = 3 40 | 41 | #charClsModelFile = '/tmp/cvBoostChar.xml' 42 | 43 | edgeThreshold = 14 44 | fastex = FASTex(edgeThreshold = edgeThreshold) 45 | 46 | def run_evaluation(inputDir, outputDir, invert = False, isFp = False): 47 | 48 | if not os.path.exists(outputDir): 49 | os.mkdir(outputDir) 50 | 51 | images = glob.glob('{0}/*.jpg'.format(inputDir)) 52 | images.extend(glob.glob('{0}/*.JPG'.format(inputDir))) 53 | images.extend(glob.glob('{0}/*.png'.format(inputDir))) 54 | segmDir = '{0}/segmentations'.format(inputDir) 55 | 56 | for image in images: 57 | print('Processing {0}'.format(image)) 58 | 59 | img = cv2.imread(image, 0) 60 | imgc = cv2.imread(image) 61 | imgproc = img 62 | 63 | imgKp = np.copy(img) 64 | imgKp.fill(0) 65 | 66 | baseName = os.path.basename(image) 67 | baseName = baseName[:-4] 68 | workPoint = 0.3 69 | segmentations = ftext.getCharSegmentations(imgproc) #, outputDir, baseName) 70 | segmentations = segmentations[:, 0:10] 71 | segmentations = np.column_stack( [ segmentations , np.zeros( (segmentations.shape[0], 2), dtype = np.float ) ] ) 72 | maskDuplicates = segmentations[:, 8] == -1 73 | segmentationsDuplicates = segmentations[maskDuplicates, :] 74 | maskNoNei = segmentationsDuplicates[:, 9] > workPoint 75 | segmentationsNoNei = segmentationsDuplicates[maskNoNei, :] 76 | keypoints = ftext.getLastDetectionKeypoints() 77 | imgKp[keypoints[:, 1].astype(int), keypoints[:, 0].astype(int)] = 255 78 | scales = ftext.getImageScales() 79 | statc = ftext.getDetectionStat() 80 | words = ftext.findTextLines() 81 | segmLine = segmentations[segmentations[:, 7] == 1.0, :] 82 | segmentations[:, 2] += segmentations[:, 0] 83 | segmentations[:, 3] += segmentations[:, 1] 84 | 85 | 86 | if isFp: 87 | for detId in range(0, segmentations.shape[0]): 88 | ftext.acummulateCharFeatures(0, detId) 89 | 90 | continue 91 | 92 | lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName) 93 | if not os.path.exists(lineGt): 94 | lineGt = '{0}/{1}.txt'.format(inputDir, baseName) 95 | 96 | lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName) 97 | if os.path.exists(lineGt): 98 | try: 99 | word_gt = utls.read_icdar2013_txt_gt(lineGt) 100 | except ValueError: 101 | try: 102 | word_gt = utls.read_icdar2013_txt_gt(lineGt, separator = ',') 103 | except ValueError: 104 | word_gt = utls.read_icdar2015_txt_gt(lineGt, separator = ',') 105 | else: 106 | lineGt = '{0}/{1}.txt'.format(inputDir, baseName) 107 | word_gt = utls.read_mrrc_txt_gt(lineGt, separator = ',') 108 | 109 | 110 | 111 | 112 | rWcurrent = 0.0 113 | for gt_box in word_gt: 114 | if len(gt_box[4]) == 1: 115 | continue 116 | best_match = 0 117 | cv2.rectangle(imgc, (gt_box[0], gt_box[1]), (gt_box[2], gt_box[3]), (0, 255, 0)) 118 | for det_word in words: 119 | rect_int = utils.intersect( det_word, gt_box ) 120 | int_area = utils.area(rect_int) 121 | union_area = utils.area(utils.union(det_word, gt_box)) 122 | 123 | if union_area == 0: 124 | continue 125 | 126 | ratio = int_area / float(union_area) 127 | det_word[11] = max(det_word[11], ratio) 128 | 129 | if ratio > best_match: 130 | best_match = ratio 131 | rWcurrent += best_match 132 | 133 | best_match = 0 134 | for detId in range(segmentations.shape[0]): 135 | rectn = segmentations[detId, :] 136 | rect_int = utils.intersect( rectn, gt_box ) 137 | int_area = utils.area(rect_int) 138 | union_area = utils.area(utils.union(rectn, gt_box)) 139 | 140 | ratio = int_area / float(union_area) 141 | rectn[11] = max(ratio, rectn[11]) 142 | if ratio > best_match: 143 | best_match = ratio 144 | if ratio > 0.7: 145 | 146 | #print( "Word Match!" ) 147 | #tmp = ftext.getSegmentationMask(detId) 148 | #cv2.imshow("ts", tmp) 149 | #cv2.waitKey(0) 150 | 151 | ftext.acummulateCharFeatures(2, detId) 152 | 153 | 154 | segmImg = '{0}/{1}_GT.bmp'.format(segmDir, baseName) 155 | if not os.path.exists(segmImg): 156 | segmImg = '{0}/gt_{1}.png'.format(segmDir, baseName) 157 | if not os.path.exists(segmImg): 158 | segmImg = '{0}/{1}.png'.format(segmDir, baseName) 159 | segmImg = cv2.imread(segmImg, 0) 160 | if invert and segmImg is not None: 161 | segmImg = ~segmImg 162 | 163 | gt_rects = [] 164 | miss_rects = [] 165 | segmGt = '{0}/{1}_GT.txt'.format(segmDir, baseName) 166 | if os.path.exists(segmGt) and False: 167 | (gt_rects, groups) = utls.read_icdar2013_segm_gt(segmGt) 168 | segmImg = '{0}/{1}_GT.bmp'.format(segmDir, baseName) 169 | if not os.path.exists(segmImg): 170 | segmImg = '{0}/gt_{1}.png'.format(segmDir, baseName) 171 | segmImg = cv2.imread(segmImg) 172 | else: 173 | contours = cv2.findContours(np.copy(segmImg), mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_SIMPLE)[1] 174 | for cont in contours: 175 | rect = cv2.boundingRect( cont ) 176 | rect = [rect[0], rect[1], rect[0] + rect[2], rect[1] + rect[3], '?', 0, 0] 177 | gt_rects.append(rect) 178 | 179 | 180 | 181 | for detId in range(segmentations.shape[0]): 182 | rectn = segmentations[detId, :] 183 | 184 | for k in range(len(gt_rects)): 185 | gt_rect = gt_rects[k] 186 | best_match = 0 187 | best_match_line = 0 188 | if (gt_rect[4] == ',' or gt_rect[4] == '.' or gt_rect[4] == '\'' or gt_rect[4] == ':' or gt_rect[4] == '-') and not evalPunctuation: 189 | continue 190 | 191 | minSingleOverlap = MIN_SEGM_OVRLAP 192 | if gt_rect[4] == 'i' or gt_rect[4] == '!': 193 | minSingleOverlap = 0.5 194 | 195 | 196 | rect_int = utils.intersect( rectn, gt_rect ) 197 | int_area = utils.area(rect_int) 198 | union_area = utils.area(utils.union(rectn, gt_rect)) 199 | ratio = int_area / float(union_area) 200 | rectn[10] = max(ratio, rectn[10]) 201 | 202 | if rectn[9] > workPoint: 203 | gt_rect[6] = max(ratio, gt_rect[6]) 204 | 205 | if ratio > best_match: 206 | best_match = ratio 207 | 208 | if ratio > best_match_line and rectn[7] == 1.0 : 209 | best_match_line = ratio 210 | if ratio > minSingleOverlap: 211 | ftext.acummulateCharFeatures(1, detId) 212 | 213 | 214 | if ratio < minSingleOverlap: 215 | if k < len(gt_rects) - 1: 216 | gt_rect2 = gt_rects[k + 1] 217 | chars2Rect = utils.union(gt_rect2, gt_rect) 218 | rect_int = utils.intersect( rectn, chars2Rect ) 219 | int_area = utils.area(rect_int) 220 | union_area = utils.area(utils.union(rectn, chars2Rect)) 221 | ratio = int_area / float(union_area) 222 | rectn[10] = max(ratio, rectn[10]) 223 | 224 | if ratio > 0.8: 225 | best_match2 = ratio 226 | gt_rect[5] = ratio 227 | gt_rect2[5] = ratio 228 | ftext.acummulateCharFeatures(2, detId) 229 | 230 | 231 | thickness = 1 232 | color = (255, 0, 255) 233 | if best_match >= minSingleOverlap: 234 | color = (0, 255, 0) 235 | if best_match > 0.7: 236 | thickness = 2 237 | cv2.rectangle(imgc, (gt_rect[0], gt_rect[1]), (gt_rect[2], gt_rect[3]), color, thickness) 238 | 239 | if rectn[10] == 0 and rectn[11] == 0: 240 | ftext.acummulateCharFeatures(0, detId) 241 | 242 | 243 | ''' 244 | if len(miss_rects) > 0: 245 | cv2.imshow("ts", imgc) 246 | cv2.imshow("kp", imgKp) 247 | cv2.waitKey(0) 248 | ''' 249 | 250 | def run_words(inputDir, outputDir, invert = False): 251 | 252 | if not os.path.exists(outputDir): 253 | os.mkdir(outputDir) 254 | 255 | #images = glob.glob('{0}/*.png'.format('/datagrid/personal/TextSpotter/evaluation-sets/MS-text_database')) 256 | #images = glob.glob('{0}/*.jpg'.format('/datagrid/personal/TextSpotter/evaluation-sets/neocr_dataset')) 257 | images = glob.glob('{0}/*.jpg'.format(inputDir)) 258 | images.extend(glob.glob('{0}/*.JPG'.format(inputDir))) 259 | images.extend(glob.glob('{0}/*.png'.format(inputDir))) 260 | 261 | matched_words = 0 262 | word_count = 0 263 | 264 | for image in sorted(images): 265 | print('Processing {0}'.format(image)) 266 | 267 | img = cv2.imread(image, 0) 268 | imgc = cv2.imread(image) 269 | imgproc = img 270 | 271 | imgKp = np.copy(img) 272 | imgKp.fill(0) 273 | 274 | baseName = os.path.basename(image) 275 | baseName = baseName[:-4] 276 | workPoint = 0.3 277 | segmentations = ftext.getCharSegmentations(imgproc) #, outputDir, baseName) 278 | segmentations = segmentations[:, 0:10] 279 | segmentations = np.column_stack( [ segmentations , np.zeros( (segmentations.shape[0], 2), dtype = np.float ) ] ) 280 | maskDuplicates = segmentations[:, 8] == -1 281 | segmentationsDuplicates = segmentations[maskDuplicates, :] 282 | maskNoNei = segmentationsDuplicates[:, 9] > workPoint 283 | keypoints = ftext.getLastDetectionKeypoints() 284 | imgKp[keypoints[:, 1].astype(int), keypoints[:, 0].astype(int)] = 255 285 | scales = ftext.getImageScales() 286 | statc = ftext.getDetectionStat() 287 | words = ftext.findTextLines() 288 | segmentations[:, 2] += segmentations[:, 0] 289 | segmentations[:, 3] += segmentations[:, 1] 290 | 291 | 292 | lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName) 293 | if not os.path.exists(lineGt): 294 | lineGt = '{0}/{1}.txt'.format(inputDir, baseName) 295 | 296 | lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName) 297 | if os.path.exists(lineGt): 298 | try: 299 | word_gt = utls.read_icdar2013_txt_gt(lineGt) 300 | except ValueError: 301 | try: 302 | word_gt = utls.read_icdar2013_txt_gt(lineGt, separator = ',') 303 | except ValueError: 304 | word_gt = utls.read_icdar2015_txt_gt(lineGt, separator = ',') 305 | else: 306 | lineGt = '{0}/{1}.txt'.format(inputDir, baseName) 307 | word_gt = utls.read_mrrc_txt_gt(lineGt, separator = ',') 308 | 309 | cw = 0 310 | for detId in range(segmentations.shape[0]): 311 | best_match = 0 312 | 313 | for gt_box in word_gt: 314 | if len(gt_box[4]) == 1: 315 | continue 316 | if gt_box[4][0] == "#": 317 | continue 318 | cw += 1 319 | 320 | rectn = segmentations[detId, :] 321 | rect_int = utils.intersect( rectn, gt_box ) 322 | int_area = utils.area(rect_int) 323 | union_area = utils.area(utils.union(rectn, gt_box)) 324 | 325 | ratio = int_area / float(union_area) 326 | rectn[11] = max(ratio, rectn[11]) 327 | if ratio > best_match: 328 | best_match = ratio 329 | if ratio > 0.7: 330 | 331 | #print( "Word Match!" ) 332 | #cv2.rectangle(imgc, (rectn[0], rectn[1]), (rectn[2], rectn[3]), (0, 255, 0)) 333 | #cv2.imshow("ts", imgc) 334 | #cv2.waitKey(0) 335 | ftext.acummulateCharFeatures(2, detId) 336 | if gt_box[5] != -1: 337 | matched_words += 1 338 | gt_box[5] = -1 339 | 340 | if best_match == 0: 341 | ftext.acummulateCharFeatures(0, detId) 342 | 343 | word_count += cw 344 | print("word recall: {0}".format(matched_words / float(word_count))) 345 | 346 | if __name__ == '__main__': 347 | 348 | init_ftext() 349 | 350 | 351 | inputDir = '/home/busta/data/icdar2013-Train' 352 | outputBase = '/mnt/textspotter/FastTextEval/BDT' 353 | outputDir = '{0}/{1}'.format(outputBase, datetime.date.today().strftime('%Y-%m-%d')) 354 | run_evaluation(inputDir, outputDir, True) 355 | 356 | #run_words('/home/busta/data/icdar2013-Test', '/tmp/ch4') 357 | run_words('/home/busta/data/icdar2015-Ch4-Train', '/tmp/ch4') 358 | ftext.trainCharFeatures() 359 | 360 | 361 | -------------------------------------------------------------------------------- /tools/utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Nov 25, 2014 3 | 4 | @author: busta 5 | ''' 6 | 7 | def intersect(a, b): 8 | '''Determine the intersection of two rectangles''' 9 | rect = (0,0,0,0) 10 | r0 = max(a[0],b[0]) 11 | c0 = max(a[1],b[1]) 12 | r1 = min(a[2],b[2]) 13 | c1 = min(a[3],b[3]) 14 | # Do we have a valid intersection? 15 | if r1 > r0 and c1 > c0: 16 | rect = (r0,c0,r1,c1) 17 | return rect 18 | 19 | def union(a, b): 20 | r0 = min(a[0],b[0]) 21 | c0 = min(a[1],b[1]) 22 | r1 = max(a[2],b[2]) 23 | c1 = max(a[3],b[3]) 24 | return (r0,c0,r1,c1) 25 | 26 | def area(a): 27 | '''Computes rectangle area''' 28 | width = a[2] - a[0] 29 | height = a[3] - a[1] 30 | return width * height -------------------------------------------------------------------------------- /tools/utls.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Oct 2, 2013 3 | 4 | @author: busta 5 | ''' 6 | 7 | import math 8 | import numpy as np 9 | 10 | from xml.dom import minidom 11 | from csv import reader 12 | 13 | def intersect(a, b): 14 | ''' returns the intersection of two lists ''' 15 | 16 | ind_dict = dict((k,i) for i,k in enumerate(a)) 17 | inter = set( ind_dict.keys() ).intersection(b) 18 | 19 | indices = [ ind_dict[x] for x in inter ] 20 | 21 | return (inter, indices) 22 | 23 | def compupte_area(geo): 24 | ''' Computes area from the feature geometry ''' 25 | a = geo[3] 26 | b = geo[4] 27 | # Calculating the Area of the Visual Words detected in the image 28 | area = math.pi * a * b 29 | return area 30 | 31 | def get_area_scale_index(area): 32 | ''' 33 | The Scale of feature is defined as sqrt(area) and its values ranges from 34 | 8 to 67. All values < 9 are taken as 8 and > 67 are taken as 67 35 | ''' 36 | 37 | scale = np.around( np.sqrt( area ), 0).astype(np.int) 38 | scale = np.clip(scale, 8, 67) 39 | scale = scale - 8; 40 | return scale 41 | 42 | def get_y_relative_index(featurey, yline, line_height): 43 | ''' returns the relative position of feature against the line''' 44 | rel_position = np.round(10*(featurey - yline) / line_height).astype(np.int) 45 | rel_position = np.clip(rel_position, -20, 20) 46 | rel_position = rel_position + 20 47 | return rel_position 48 | 49 | def y_index_2relative(index, height): 50 | index = index - 20 51 | return index * height / 10.0 52 | 53 | 54 | def reproject_feature(y0, height, relposition): 55 | 56 | rel_pos = y_index_2relative(relposition, height) 57 | pos_low = np.round( y0 - rel_pos ) 58 | pos_up = np.round( y0 + height - rel_pos ) 59 | 60 | return (pos_low, pos_up) 61 | 62 | def get_inbox_mask(bbox, geo, epsilon = 0): 63 | ''' return the mask of features inside the rotated rectangle ''' 64 | slope = (bbox[7] - bbox[5]) / float( bbox[6] - bbox[4]) 65 | geoyT = geo[1, :] + ( bbox[6] - geo[0, :] ) * slope 66 | 67 | maskX = np.logical_and( geo[0, :] >= (bbox[0] - epsilon), geo[0, :] <= (bbox[2] + epsilon) ) 68 | maskY = np.logical_and( geoyT >= (bbox[7] - epsilon), geoyT <= (bbox[9] + epsilon) ) 69 | 70 | return np.logical_and(maskX, maskY) 71 | 72 | 73 | def textContent(node): 74 | ''' Returns text content of xml node ''' 75 | 76 | if node.nodeType in (node.TEXT_NODE, node.CDATA_SECTION_NODE): 77 | return node.nodeValue 78 | else: 79 | return ''.join(textContent(n) for n in node.childNodes) 80 | 81 | def read_icdar2011_gt(gt_file): 82 | 83 | doc = minidom.parse(gt_file) 84 | gtimages = {} 85 | noOfGtImages = 0 86 | noGt = 0 87 | for node in doc.getElementsByTagName('image'): 88 | 89 | image_name = textContent(node.getElementsByTagName('imageName')[0]) 90 | gtimages[image_name] = [] 91 | noOfGtImages += 1 92 | 93 | for rect in node.getElementsByTagName('taggedRectangle'): 94 | 95 | x = float(rect.getAttributeNode('x').nodeValue) 96 | y = float(rect.getAttributeNode('y').nodeValue) 97 | width = float(rect.getAttributeNode('width').nodeValue) 98 | height = float(rect.getAttributeNode('height').nodeValue) 99 | 100 | gtimages[image_name].append( [x, y, x + width, y + height] ) 101 | noGt += 1 102 | 103 | return (gtimages, noOfGtImages, noGt) 104 | 105 | def read_icdar2013_segm_gt(gt_file, separator = ' '): 106 | 107 | f = open( gt_file, "r") 108 | lines = f.readlines() 109 | gt_rectangles = [] 110 | groups = [] 111 | group = [] 112 | objId = 0 113 | for line in lines: 114 | if line[0] == '#': 115 | continue 116 | splitLine = line.split(separator); 117 | if len(splitLine) < 5: 118 | if len(group) > 0: 119 | groups.append(group) 120 | group = [] 121 | continue 122 | 123 | xline = '{0}'.format(line.strip()) 124 | 125 | for splitLine in reader([xline], skipinitialspace=True, quotechar='"', delimiter=separator): 126 | break 127 | 128 | minX = min(int(float(splitLine[5])), int(float(splitLine[7]))) 129 | maxX = max(int(float(splitLine[5])), int(float(splitLine[7]))) 130 | minY = min(int(float(splitLine[6])), int(float(splitLine[8]))) 131 | maxY = max(int(float(splitLine[6])), int(float(splitLine[8]))) 132 | 133 | gt_rectangles.append([minX, minY, maxX, maxY, splitLine[9], 0, 0] ) 134 | group.append(objId) 135 | objId += 1 136 | 137 | 138 | return (gt_rectangles, groups) 139 | 140 | def read_icdar2013_txt_gt(gt_file, separator = ' '): 141 | 142 | f = open( gt_file, "r") 143 | lines = f.readlines() 144 | gt_rectangles = [] 145 | for line in lines: 146 | if line[0] == '#': 147 | continue 148 | splitLine = line.split(separator); 149 | if len(splitLine) < 5: 150 | continue 151 | xline = '{0}'.format(line.strip()) 152 | 153 | for splitLine in reader([xline], skipinitialspace=True, quotechar='"', delimiter=separator): 154 | break 155 | 156 | 157 | minX = min(int(float(splitLine[0].strip())), int(float(splitLine[2].strip()))) 158 | maxX = max(int(float(splitLine[0].strip())), int(float(splitLine[2].strip()))) 159 | minY = min(int(float(splitLine[1].strip())), int(float(splitLine[3].strip()))) 160 | maxY = max(int(float(splitLine[1].strip())), int(float(splitLine[3].strip()))) 161 | 162 | gt_rectangles.append( (minX, minY, maxX, maxY, splitLine[4]) ) 163 | 164 | 165 | return gt_rectangles 166 | 167 | def read_mock_segm_gt(gt_file, separator = ' '): 168 | 169 | f = open( gt_file, "r") 170 | lines = f.readlines() 171 | gt_rectangles = [] 172 | groups = [] 173 | group = [] 174 | stage = 0 175 | for line in lines: 176 | if line[0] == '#': 177 | continue 178 | 179 | if line.startswith('RECTANGLES:'): 180 | stage = 1 181 | elif line.startswith('LINES:'): 182 | stage = 2 183 | elif stage == 1: 184 | 185 | splitLine = line.split(separator); 186 | xline = '{0}'.format(line.strip()) 187 | for splitLine in reader([xline], skipinitialspace=True, quotechar='"', delimiter=separator): 188 | break 189 | 190 | x = float(splitLine[1]) 191 | y = float(splitLine[2]) 192 | width = float(splitLine[3]) 193 | height = float(splitLine[4]) 194 | 195 | if len( splitLine) > 6: 196 | gt_rectangles.append( (x, y, x + width, y + height, splitLine[6]) ) 197 | else: 198 | gt_rectangles.append( (x, y, x + width, y + height, " ") ) 199 | elif stage == 2: 200 | splitLine = line.split(separator); 201 | xline = '{0}'.format(line.strip()) 202 | for splitLine in reader([xline], skipinitialspace=True, quotechar='"', delimiter=separator): 203 | break 204 | 205 | group = [] 206 | for i in range(len(splitLine)): 207 | group.append(int(splitLine[i])) 208 | groups.append(group) 209 | 210 | 211 | 212 | return (gt_rectangles, groups) 213 | 214 | --------------------------------------------------------------------------------