├── .gitignore ├── CMakeLists.txt ├── README.md ├── boost_train ├── boost_char_train.cpp ├── boost_groups_train.cpp ├── build.sh ├── build_chars_cvs.py ├── build_groups_cvs.py ├── extract_char_features.cpp ├── extract_group_features.cpp ├── trained_boost_char.xml └── trained_boost_groups.xml ├── fast_clustering.cpp ├── group_classifier.cpp ├── group_classifier.h ├── main.cpp ├── max_meaningful_clustering.cpp ├── max_meaningful_clustering.h ├── min_bounding_box.cpp ├── min_bounding_box.h ├── mser.cpp ├── mser.h ├── nfa.cpp ├── region.cpp ├── region.h ├── region_classifier.cpp ├── region_classifier.h ├── sample_images ├── T050.JPG ├── T051.JPG └── T072.JPG ├── text_extract.h └── utils.h /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | .*swp 3 | .*swo 4 | out.png 5 | CMakeCache.txt 6 | CMakeFiles 7 | Makefile 8 | cmake_install.cmake 9 | text_extraction 10 | 11 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(opencv_sandbox) 3 | 4 | # Select a default build configuration if none was chosen 5 | IF(NOT CMAKE_BUILD_TYPE) 6 | SET(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: None (CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel." FORCE) 7 | ENDIF() 8 | 9 | find_package(OpenCV REQUIRED) 10 | 11 | ADD_EXECUTABLE(text_extraction fast_clustering.cpp group_classifier.cpp main.cpp max_meaningful_clustering.cpp min_bounding_box.cpp mser.cpp nfa.cpp region_classifier.cpp region.cpp) 12 | 13 | FIND_PACKAGE(OpenCV REQUIRED) 14 | IF(OpenCV_FOUND) 15 | TARGET_LINK_LIBRARIES(text_extraction ${OpenCV_LIBS}) 16 | ENDIF() 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | text_extraction 2 | =============== 3 | 4 | This code is the implementation of the method proposed in the paper “Multi-script text extraction from natural scenes” (Gomez & Karatzas), International Conference on Document Analysis and Recognition, ICDAR2013. 5 | 6 | This code should reproduce the same quantitative results published on the paper for the KAIST dataset (for the task of text segmentation at pixel level). If you plan to compare this method with your's in other datasets please drop us a line ({lgomez,dimos}@cvc.uab.es). Thanks! 7 | 8 | 9 | Includes the following third party code: 10 | 11 | - fast_clustering.cpp Copyright (c) 2011 Daniel Müllner, under the BSD license. http://math.stanford.edu/~muellner/fastcluster.html 12 | - mser.cpp Copyright (c) 2011 Idiap Research Institute, under the GPL license. http://www.idiap.ch/~cdubout/ 13 | - binomial coefficient approximations are due to Rafael Grompone von Gioi. http://www.ipol.im/pub/art/2012/gjmr-lsd/ 14 | -------------------------------------------------------------------------------- /boost_train/boost_char_train.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "opencv/cv.h" 3 | #include "opencv/ml.h" 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | using namespace cv; 9 | 10 | int main(int argc, char** argv) { 11 | 12 | /* STEP 2. Opening the file */ 13 | //1. Declare a structure to keep the data 14 | CvMLData cvml; 15 | 16 | //2. Read the file 17 | cvml.read_csv("char_dataset.csv"); 18 | //cvml.read_csv("strokes_dataset_noresized.csv"); 19 | 20 | //3. Indicate which column is the response 21 | cvml.set_response_idx(0); 22 | 23 | 24 | /* STEP 3. Splitting the samples */ 25 | //1. Select 50% for the training (an integer value is also allowed here) 26 | CvTrainTestSplit cvtts(0.9f, true); 27 | //2. Assign the division to the data 28 | cvml.set_train_test_split(&cvtts); 29 | 30 | /* STEP 4. The training */ 31 | //1. Declare the classifier 32 | CvBoost boost; 33 | 34 | ifstream ifile("./trained_boost_char.xml"); 35 | if (ifile) 36 | { 37 | // The file exists, so we don't need to train 38 | boost.load("./trained_boost_char.xml", "boost"); 39 | } else { 40 | //2. Train it with 100 features 41 | printf("Training ... \n"); 42 | boost.train(&cvml, CvBoostParams(CvBoost::REAL, 200, 0, 1, false, 0), false); 43 | } 44 | 45 | /* STEP 5. Calculating the testing and training error */ 46 | // 1. Declare a couple of vectors to save the predictions of each sample 47 | std::vector train_responses, test_responses; 48 | // 2. Calculate the training error 49 | float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses); 50 | // 3. Calculate the test error 51 | float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses); 52 | printf("Error train %f \n", fl1); 53 | printf("Error test %f \n", fl2); 54 | 55 | 56 | //Try a char 57 | static const float arr[] = {0,1.659899,0.684169,0.412175,150.000000,81.000000,0.540000,0.358025,0.151203,0.000000,0.000000}; 58 | 59 | vector sample (arr, arr + sizeof(arr) / sizeof(arr[0]) ); 60 | float prediction = boost.predict( Mat(sample), Mat(), Range::all(), false, false ); 61 | float votes = boost.predict( Mat(sample), Mat(), Range::all(), false, true ); 62 | 63 | printf("\n The sample (360) is predicted as: %f (with number of votes = %f)\n", prediction,votes); 64 | 65 | //Try a NONchar 66 | static const float arr2[] = {0,1.250000,0.433013,0.346410,9.000000,8.000000,0.888889,0.833333,0.375000,0.000000,0.000000}; 67 | 68 | vector sample2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) ); 69 | prediction = boost.predict( Mat(sample2), Mat(), Range::all(), false, false ); 70 | votes = boost.predict( Mat(sample2), Mat(), Range::all(), false, true ); 71 | 72 | printf("\n The sample (367) is predicted as: %f (with number of votes = %f)\n", prediction,votes); 73 | 74 | /* STEP 6. Save your classifier */ 75 | // Save the trained classifier 76 | boost.save("./trained_boost_char.xml", "boost"); 77 | 78 | return EXIT_SUCCESS; 79 | } 80 | -------------------------------------------------------------------------------- /boost_train/boost_groups_train.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "opencv/cv.h" 3 | #include "opencv/ml.h" 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | using namespace cv; 9 | 10 | int main(int argc, char** argv) { 11 | 12 | /* STEP 2. Opening the file */ 13 | //1. Declare a structure to keep the data 14 | CvMLData cvml; 15 | 16 | //2. Read the file 17 | cvml.read_csv("groups_dataset.csv"); 18 | //cvml.read_csv("strokes_dataset_noresized.csv"); 19 | 20 | //3. Indicate which column is the response 21 | cvml.set_response_idx(0); 22 | 23 | 24 | /* STEP 3. Splitting the samples */ 25 | //1. Select 50% for the training (an integer value is also allowed here) 26 | CvTrainTestSplit cvtts(0.9f, true); 27 | //2. Assign the division to the data 28 | cvml.set_train_test_split(&cvtts); 29 | 30 | /* STEP 4. The training */ 31 | //1. Declare the classifier 32 | CvBoost boost; 33 | 34 | ifstream ifile("./trained_boost_groups.xml"); 35 | if (ifile) 36 | { 37 | // The file exists, so we don't need to train 38 | boost.load("./trained_boost_groups.xml", "boost"); 39 | } else { 40 | //2. Train it with 100 features 41 | printf("Training ... \n"); 42 | boost.train(&cvml, CvBoostParams(CvBoost::REAL, 500, 0, 1, false, 0), false); 43 | } 44 | 45 | /* STEP 5. Calculating the testing and training error */ 46 | // 1. Declare a couple of vectors to save the predictions of each sample 47 | std::vector train_responses, test_responses; 48 | // 2. Calculate the training error 49 | float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses); 50 | // 3. Calculate the test error 51 | float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses); 52 | printf("Error train %f \n", fl1); 53 | printf("Error test %f \n", fl2); 54 | 55 | static const float arr[] = {0,-1.980394,1.249858,-0.631116,2.819193,0.305448,0.108346,0.801116,0.104873,0.130908,0.559806,0.255053,0.455610,0.294118,0.455645,1.549193,0.087770,0.144896,1.650866}; 56 | vector sample (arr, arr + sizeof(arr) / sizeof(arr[0]) ); 57 | float prediction = boost.predict( Mat(sample), Mat(), Range::all(), false, false ); 58 | float votes = boost.predict( Mat(sample), Mat(), Range::all(), false, true ); 59 | 60 | printf("\n The group sample is predicted as: %f (with number of votes = %f)\n", prediction,votes); 61 | 62 | //static const float arr2[] = {0,0.911369,1.052156,1.154478,3.321924,0.829768,0.249785,0.616930,0.246637,0.399782,0.337159,0.103893,0.308142,0.666667,0.745356,1.118034,0.009747,0.011016,1.130162}; 63 | static const float arr2[] = {0,1.14335,3.00412,2.62747,3.26428,2.32749,0.713018,0.47244,0.289846,0.613508,0.40514,0.216716,0.53305,0.878788,3.21698,3.6607,0.0422318,0.114392,2.70868}; 64 | vector sample2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) ); 65 | float prediction2 = boost.predict( Mat(sample2), Mat(), Range::all(), false, false ); 66 | float votes2 = boost.predict( Mat(sample2), Mat(), Range::all(), false, true ); 67 | 68 | printf("\n The group sample is predicted as: %f (with number of votes = %f)\n", prediction2,votes2); 69 | 70 | /* STEP 6. Save your classifier */ 71 | // Save the trained classifier 72 | boost.save("./trained_boost_groups.xml", "boost"); 73 | 74 | return EXIT_SUCCESS; 75 | } 76 | -------------------------------------------------------------------------------- /boost_train/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | g++ -O3 -march='core2' `pkg-config opencv --cflags` -c extract_char_features.cpp -o extract_char_features.o 4 | libtool --tag=CXX --mode=link g++ -O3 -march='core2' -o extract_char_features extract_char_features.o `pkg-config opencv --libs` 5 | 6 | g++ -O3 -march='core2' `pkg-config opencv --cflags` -c boost_char_train.cpp -o boost_char_train.o 7 | libtool --tag=CXX --mode=link g++ -O3 -march='core2' -o boost_char_train boost_char_train.o `pkg-config opencv --libs` 8 | 9 | g++ -O3 -march='core2' `pkg-config opencv --cflags` -c extract_group_features.cpp -o extract_group_features.o 10 | libtool --tag=CXX --mode=link g++ -O3 -march='core2' -o extract_group_features extract_group_features.o `pkg-config opencv --libs` 11 | 12 | g++ -O3 -march='core2' `pkg-config opencv --cflags` -c boost_groups_train.cpp -o boost_groups_train.o 13 | libtool --tag=CXX --mode=link g++ -O3 -march='core2' -o boost_groups_train boost_groups_train.o `pkg-config opencv --libs` 14 | -------------------------------------------------------------------------------- /boost_train/build_chars_cvs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import random 4 | import string 5 | 6 | 7 | command = "rm char_dataset.csv" 8 | process = subprocess.Popen(command, shell=True) 9 | process.wait() 10 | 11 | #generate some synthetic characters 12 | fonts = ["verdana.ttf","arial.ttf","times.ttf","comic.ttf"] 13 | for i in range(0,7500): 14 | gen_command = "convert -background black -fill white -font /usr/share/fonts/truetype/msttcorefonts/"+fonts[i%4]+" -pointsize "+str(random.randrange(18, 98))+" label:"+random.choice(string.ascii_letters)+" -rotate "+str(random.randrange(0,360))+" -page +0+0 synth.tiff" 15 | process = subprocess.Popen(gen_command, shell=True) 16 | process.wait() 17 | print gen_command 18 | command = "./extract_char_features synth.tiff C >> char_dataset.csv" 19 | gen_process = subprocess.Popen(command, shell=True) 20 | gen_process.wait() 21 | 22 | 23 | # labeled boundaries 24 | for dirname, dirnames, filenames in os.walk('../../Escriptori/text_extraction/data/train/characters/CHARS'): 25 | for filename in filenames: 26 | if ('jpg' in filename): 27 | image_filename = os.path.join(dirname, filename) 28 | print image_filename 29 | command = "./extract_char_features "+image_filename+" C >> char_dataset.csv" 30 | process = subprocess.Popen(command, shell=True) 31 | process.wait() 32 | 33 | for dirname, dirnames, filenames in os.walk('../../Escriptori/text_extraction/data/train/characters/NO_CHARS'): 34 | for filename in filenames: 35 | if ('jpg' in filename): 36 | image_filename = os.path.join(dirname, filename) 37 | print image_filename 38 | command = "./extract_char_features "+image_filename+" N >> char_dataset.csv" 39 | process = subprocess.Popen(command, shell=True) 40 | process.wait() 41 | -------------------------------------------------------------------------------- /boost_train/build_groups_cvs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import random 4 | import string 5 | 6 | command = "rm groups_dataset.csv" 7 | process = subprocess.Popen(command, shell=True) 8 | process.wait() 9 | 10 | #generate some synthetic texts 11 | msfontdir = "/usr/share/fonts/truetype/msttcorefonts/" 12 | #dictionery with words examples 13 | words = ["llnear","comblnatlon","any","system","vectors","wlth","all","zero","coefflclents","zero","vector","only","way","express","zero","vector","llnear","comblnatlon","these","vectors","are","llnearly","lndependent","Glven","set","vectors","that","span","space,","any","vector","llnear","comblnatlon","other","vectors","set","not","llnearly","lndependent","then","span","would","remaln","the","same","remove","from","the","set.","Thus,","set","llnearly","dependent","vectors","redundant","the","sense","that","llnearly","lndependent","subset","wlll","span","same","subspace.","Therefore,","are","mostly","lnterested","llnearly","lndependent","set","vectors","that","spans","vector","space","whlch","call","basls","","Any","set","vectors","that","spans","contalns","basls,","and","any","llnearly","lndependent","set","vectors","can","extended","basls","turns","out","that","accept","axlom","cholce,","every","vector","space","has","basls","nevertheless,","thls","basls","may","unnatural,","and","lndeed","may","not","even","constructable.","For","lnstance","there","exlsts","basls","for","real","numbers","consldered","vector","space","over","the","ratlonals,","but","expllclt","basls","has","been","constructed"] 14 | 15 | #generate some synthetic characters from MS core fonts 16 | fonts = ["Andale_Mono.ttf","andalemo.ttf","arialbd.ttf","arialbi.ttf","Arial_Black.ttf","Arial_Bold_Italic.ttf","Arial_Bold.ttf","Arial_Italic.ttf","ariali.ttf","arial.ttf","Arial.ttf","ariblk.ttf","comicbd.ttf","Comic_Sans_MS_Bold.ttf","Comic_Sans_MS.ttf","comic.ttf","courbd.ttf","courbi.ttf","Courier_New_Bold_Italic.ttf","Courier_New_Bold.ttf","Courier_New_Italic.ttf","Courier_New.ttf","couri.ttf","cour.ttf","Georgia_Bold_Italic.ttf","Georgia_Bold.ttf","georgiab.ttf","Georgia_Italic.ttf","georgiai.ttf","georgia.ttf","Georgia.ttf","georgiaz.ttf","impact.ttf","Impact.ttf","timesbd.ttf","timesbi.ttf","timesi.ttf","Times_New_Roman_Bold_Italic.ttf","Times_New_Roman_Bold.ttf","Times_New_Roman_Italic.ttf","Times_New_Roman.ttf","times.ttf","trebucbd.ttf","trebucbi.ttf","Trebuchet_MS_Bold_Italic.ttf","Trebuchet_MS_Bold.ttf","Trebuchet_MS_Italic.ttf","Trebuchet_MS.ttf","trebucit.ttf","trebuc.ttf","Verdana_Bold_Italic.ttf","Verdana_Bold.ttf","verdanab.ttf","Verdana_Italic.ttf","verdanai.ttf","verdana.ttf","Verdana.ttf","verdanaz.ttf"]; 17 | for i in range(0,731): 18 | num_words = random.randrange(1,5) 19 | text = "" 20 | for j in range(0,num_words): 21 | separator = " " 22 | if (random.randrange(0,10) > 5): 23 | separator = "\\n"; 24 | if (random.randrange(0,10) > 5): 25 | text = text + words[random.randrange(0,len(words))] + separator 26 | else: 27 | text = text + words[random.randrange(0,len(words))].upper() + separator 28 | 29 | 30 | gen_command = "convert -background white -fill black -font "+msfontdir+fonts[i%len(fonts)]+" -pointsize "+str(random.randrange(100, 198))+" label:'"+text+"' -rotate "+str(random.randrange(0,360))+" -page +0+0 synth.tiff" 31 | #convert -background white -fill black -font Cursi -pointsize 24 -gravity center label:'ImageMagick\n' label_centered.gif 32 | process = subprocess.Popen(gen_command, shell=True) 33 | process.wait() 34 | print gen_command 35 | command = "./extract_group_features synth.tiff C >> groups_dataset.csv" 36 | gen_process = subprocess.Popen(command, shell=True) 37 | gen_process.wait() 38 | 39 | # labeled boundaries 40 | for dirname, dirnames, filenames in os.walk('../../Escriptori/text_extraction/data/train/groups/TEXT/'): 41 | for filename in filenames: 42 | if ('jpg' in filename): 43 | image_filename = os.path.join(dirname, filename) 44 | print image_filename 45 | command = "./extract_group_features "+image_filename+" C >> groups_dataset.csv" 46 | process = subprocess.Popen(command, shell=True) 47 | process.wait() 48 | 49 | for dirname, dirnames, filenames in os.walk('../../Escriptori/text_extraction/data/train/groups/NO_TEXT/'): 50 | for filename in filenames: 51 | if ('jpg' in filename): 52 | image_filename = os.path.join(dirname, filename) 53 | print image_filename 54 | command = "./extract_group_features "+image_filename+" N >> groups_dataset.csv" 55 | process = subprocess.Popen(command, shell=True) 56 | process.wait() 57 | -------------------------------------------------------------------------------- /boost_train/extract_char_features.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | using namespace cv; 9 | using namespace std; 10 | 11 | 12 | 13 | 14 | int main( int argc, char** argv ) 15 | { 16 | 17 | RotatedRect bbox; 18 | int area = 0; 19 | int perimeter = 0; 20 | int num_holes = 0; 21 | int holes_area = 0; 22 | float stroke_mean = 0; 23 | float stroke_std = 0; 24 | 25 | Mat bw = imread(argv[1], 0); 26 | threshold( bw, bw, 128, 255, THRESH_BINARY ); 27 | 28 | Mat tmp; 29 | distanceTransform(bw, tmp, CV_DIST_L1,3); //L1 gives distance in round integers while L2 floats 30 | 31 | Scalar mean,std; 32 | meanStdDev(tmp,mean,std,bw); 33 | stroke_mean = mean[0]; 34 | stroke_std = std[0]; 35 | 36 | vector > contours0; 37 | vector hierarchy; 38 | findContours( bw, contours0, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE); 39 | 40 | area = contourArea(Mat(contours0.at(0))); 41 | 42 | for (int k=0; k0.01)||(contourArea(Mat(contours0.at(k)))>31))) 46 | { 47 | num_holes++; 48 | holes_area += (int)contourArea(Mat(contours0.at(k))); 49 | } 50 | } 51 | 52 | perimeter = (int)contours0.at(0).size(); 53 | 54 | bbox = minAreaRect(contours0.at(0)); 55 | 56 | fprintf(stdout,"%s,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f\n", argv[2], stroke_mean, stroke_std, stroke_std/stroke_mean, (float)area, (float)perimeter, (float)perimeter/area, (float)min(bbox.size.width, bbox.size.height)/max(bbox.size.width, bbox.size.height), sqrt(area)/perimeter, (float)num_holes, (float)holes_area/area); 57 | 58 | return(0); 59 | } 60 | 61 | -------------------------------------------------------------------------------- /boost_train/extract_group_features.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace cv; 11 | using namespace std; 12 | 13 | 14 | 15 | // Boosted tree classifier for single characters 16 | CvBoost character_classifier; 17 | 18 | float classifyRegion( Mat& region, float &_stroke_mean, float &_aspect_ratio, float &_compactness, float &_num_holes, float &_holearea_area_ratio ) 19 | { 20 | 21 | RotatedRect bbox; 22 | int area = 0; 23 | int perimeter = 0; 24 | int holes_area = 0; 25 | float stroke_mean = 0; 26 | float stroke_std = 0; 27 | Mat bw, tmp; 28 | region.copyTo(bw); 29 | distanceTransform(bw, tmp, CV_DIST_L1,3); //L1 gives distance in round integers while L2 floats 30 | 31 | Scalar mean,std; 32 | meanStdDev(tmp,mean,std,bw); 33 | stroke_mean = mean[0]; 34 | stroke_std = std[0]; 35 | 36 | vector > contours0; 37 | vector hierarchy; 38 | findContours( bw, contours0, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE); 39 | 40 | area = contourArea(Mat(contours0.at(0))); 41 | 42 | _num_holes = 0; 43 | 44 | for (int k=0; k0.01)||(contourArea(Mat(contours0.at(k)))>31))) 48 | { 49 | _num_holes++; 50 | holes_area += (int)contourArea(Mat(contours0.at(k))); 51 | } 52 | } 53 | 54 | perimeter = (int)contours0.at(0).size(); 55 | 56 | bbox = minAreaRect(contours0.at(0)); 57 | 58 | //fprintf(stdout,"X %f %f %f %f %f\n", stroke_std/stroke_mean, (float)min(bbox.size.width, bbox.size.height)/max(bbox.size.width, bbox.size.height), sqrt(area)/perimeter, (float)_num_holes, (float)holes_area/area); 59 | 60 | _stroke_mean = stroke_mean; 61 | _aspect_ratio = (float)min(bbox.size.width, bbox.size.height)/max(bbox.size.width, bbox.size.height); 62 | _compactness = sqrt(area)/perimeter; 63 | _holearea_area_ratio = (float)holes_area/area; 64 | 65 | 66 | float arr[] = {0, stroke_mean, stroke_std, stroke_std/stroke_mean, (float)area, (float)perimeter, (float)perimeter/area, _aspect_ratio, _compactness, _num_holes, _holearea_area_ratio}; 67 | vector sample (arr, arr + sizeof(arr) / sizeof(arr[0]) ); 68 | 69 | float votes = character_classifier.predict( Mat(sample), Mat(), Range::all(), false, true ); 70 | return votes; 71 | } 72 | 73 | 74 | 75 | int main( int argc, char** argv ) 76 | { 77 | 78 | 79 | ifstream ifile("./trained_boost_char.xml"); 80 | if (ifile) 81 | { 82 | character_classifier.load("./trained_boost_char.xml", "boost"); 83 | } else { 84 | fprintf(stderr,"File ./trained_boost_char.xml not found! \n"); 85 | exit(-1); 86 | } 87 | 88 | Mat bw = imread(argv[1], 0); 89 | 90 | copyMakeBorder(bw, bw, 1, 1, 1, 1, BORDER_CONSTANT, Scalar(255)); 91 | threshold( bw, bw, 128, 255, THRESH_BINARY_INV ); //group samples are black over white 92 | 93 | vector > contours; 94 | vector hierarchy; 95 | 96 | 97 | Mat bw2; 98 | bw.copyTo(bw2); 99 | 100 | findContours( bw2, contours, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE); 101 | 102 | 103 | int num_regions = 0; 104 | 105 | for( int i = 0; i < contours.size(); i++ ) 106 | if ((hierarchy[i][3]==-1)) 107 | num_regions++; 108 | 109 | Mat votes ( num_regions, 1, CV_32F, 1 ); 110 | Mat stroke_means ( num_regions, 1, CV_32F, 1 ); 111 | Mat aspect_ratios ( num_regions, 1, CV_32F, 1 ); 112 | Mat compactnesses ( num_regions, 1, CV_32F, 1 ); 113 | Mat nums_holes ( num_regions, 1, CV_32F, 1 ); 114 | Mat holeareas_area ( num_regions, 1, CV_32F, 1 ); 115 | 116 | int idx = 0; 117 | for( int i = 0; i < contours.size(); i++ ) 118 | { 119 | if ((hierarchy[i][3]==-1)) 120 | { 121 | Rect bbox = boundingRect(contours.at(i)); 122 | 123 | Mat canvas = Mat::zeros(cvSize(bw.cols, bw.rows),CV_8UC1); 124 | drawContours( canvas, contours, i, Scalar(255), CV_FILLED, 8, hierarchy ); 125 | 126 | Mat region = Mat::zeros(cvSize(bbox.width+20, bbox.height+20),CV_8UC1); 127 | 128 | canvas(bbox).copyTo( region(Rect(10, 10, bbox.width, bbox.height)) ); 129 | 130 | 131 | float stroke_mean, aspect_ratio, compactness, num_holes, holearea_area_ratio; 132 | votes.at(idx,0) = classifyRegion(region, stroke_mean, aspect_ratio, compactness, num_holes, holearea_area_ratio); 133 | stroke_means.at(idx,0) = stroke_mean; 134 | aspect_ratios.at(idx,0) = aspect_ratio; 135 | compactnesses.at(idx,0) = compactness; 136 | nums_holes.at(idx,0) = num_holes; 137 | holeareas_area.at(idx,0) = holearea_area_ratio; 138 | idx++; 139 | } 140 | 141 | } 142 | 143 | Scalar mean,std; 144 | meanStdDev( votes, mean, std ); 145 | fprintf( stdout, "%s,%f,%f,%f", argv[2], mean[0], std[0], std[0]/mean[0] ); 146 | meanStdDev( stroke_means, mean, std ); 147 | fprintf( stdout, ",%f,%f,%f", mean[0], std[0], std[0]/mean[0] ); 148 | meanStdDev( aspect_ratios, mean, std ); 149 | fprintf( stdout, ",%f,%f,%f", mean[0], std[0], std[0]/mean[0] ); 150 | meanStdDev( compactnesses, mean, std ); 151 | fprintf( stdout, ",%f,%f,%f", mean[0], std[0], std[0]/mean[0] ); 152 | meanStdDev( nums_holes, mean, std ); 153 | fprintf( stdout, ",%f,%f,%f", mean[0], std[0], std[0]/mean[0] ); 154 | meanStdDev( holeareas_area, mean, std ); 155 | fprintf( stdout, ",%f,%f,%f\n", mean[0], std[0], std[0]/mean[0] ); 156 | 157 | return 0; 158 | } 159 | 160 | 161 | -------------------------------------------------------------------------------- /fast_clustering.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | fastcluster: Fast hierarchical clustering routines for R and Python 3 | 4 | Copyright © 2011 Daniel Müllner 5 | 6 | 7 | This library implements various fast algorithms for hierarchical, agglomerative 8 | ` clustering methods: 9 | 10 | (1) Algorithms for the "stored matrix approach": the input is the array of 11 | pairwise dissimilarities. 12 | 13 | MST_linkage_core: single linkage clustering with the "minimum spanning tree 14 | algorithm (Rohlfs) 15 | 16 | NN_chain_core: nearest-neighbor-chain algorithm, suitable for single, 17 | complete, average, weighted and Ward linkage (Murtagh) 18 | 19 | generic_linkage: generic algorithm, suitable for all distance update formulas 20 | (Müllner) 21 | 22 | (2) Algorithms for the "stored data approach": the input are points in a vector 23 | space. 24 | 25 | MST_linkage_core_vector: single linkage clustering for vector data 26 | 27 | generic_linkage_vector: generic algorithm for vector data, suitable for 28 | the Ward, centroid and median methods. 29 | 30 | generic_linkage_vector_alternative: alternative scheme for updating the 31 | nearest neighbors. This method seems faster than "generic_linkage_vector" 32 | for the centroid and median methods but slower for the Ward method. 33 | */ 34 | 35 | //#define __STDC_LIMIT_MACROS 36 | //#include 37 | 38 | #include // for infinity() 39 | 40 | #include 41 | #ifndef DBL_MANT_DIG 42 | #error The constant DBL_MANT_DIG could not be defined. 43 | #endif 44 | 45 | //#include 46 | #include 47 | 48 | #ifndef LONG_MAX 49 | #include 50 | #endif 51 | #ifndef LONG_MAX 52 | #error The constant LONG_MAX could not be defined. 53 | #endif 54 | #ifndef INT_MAX 55 | #error The constant INT_MAX could not be defined. 56 | #endif 57 | 58 | #ifndef INT32_MAX 59 | #define __STDC_LIMIT_MACROS 60 | #include 61 | #endif 62 | 63 | #include 64 | 65 | typedef int_fast32_t t_index; 66 | #ifndef INT32_MAX 67 | #define MAX_INDEX 0x7fffffffL 68 | #else 69 | #define MAX_INDEX INT32_MAX 70 | #endif 71 | #if (LONG_MAX < MAX_INDEX) 72 | #error The integer format "t_index" must not have a greater range than "long int". 73 | #endif 74 | #if (INT_MAX > MAX_INDEX) 75 | #error The integer format "int" must not have a greater range than "t_index". 76 | #endif 77 | typedef double t_float; 78 | #define T_FLOAT_MANT_DIG DBL_MANT_DIG 79 | 80 | enum method_codes { 81 | // non-Euclidean methods 82 | METHOD_METR_SINGLE = 0, 83 | METHOD_METR_COMPLETE = 1, 84 | METHOD_METR_AVERAGE = 2, 85 | METHOD_METR_WEIGHTED = 3, 86 | METHOD_METR_WARD = 4, 87 | METHOD_METR_CENTROID = 5, 88 | METHOD_METR_MEDIAN = 6 89 | }; 90 | 91 | enum { 92 | // Euclidean methods 93 | METHOD_VECTOR_SINGLE = 0, 94 | METHOD_VECTOR_WARD = 1, 95 | METHOD_VECTOR_CENTROID = 2, 96 | METHOD_VECTOR_MEDIAN = 3 97 | }; 98 | 99 | enum { 100 | // Return values 101 | RET_SUCCESS = 0, 102 | RET_MEMORY_ERROR = 1, 103 | RET_STL_ERROR = 2, 104 | RET_UNKNOWN_ERROR = 3 105 | }; 106 | 107 | // self-destructing array pointer 108 | template 109 | class auto_array_ptr{ 110 | private: 111 | type * ptr; 112 | public: 113 | auto_array_ptr() { ptr = NULL; } 114 | template 115 | auto_array_ptr(index const size) { init(size); } 116 | template 117 | auto_array_ptr(index const size, value const val) { init(size, val); } 118 | ~auto_array_ptr() { 119 | delete [] ptr; } 120 | void free() { 121 | delete [] ptr; 122 | ptr = NULL; 123 | } 124 | template 125 | void init(index const size) { 126 | ptr = new type [size]; 127 | } 128 | template 129 | void init(index const size, value const val) { 130 | init(size); 131 | for (index i=0; idist < a.dist; 143 | } 144 | */ 145 | 146 | inline friend bool operator< (const node a, const node b) { 147 | // Numbers are always smaller than NaNs. 148 | return a.dist < b.dist || (a.dist==a.dist && b.dist!=b.dist); 149 | } 150 | }; 151 | 152 | class cluster_result { 153 | private: 154 | auto_array_ptr Z; 155 | t_index pos; 156 | 157 | public: 158 | cluster_result(const t_index size) 159 | : Z(size) 160 | { 161 | pos = 0; 162 | } 163 | 164 | void append(const t_index node1, const t_index node2, const t_float dist) { 165 | Z[pos].node1 = node1; 166 | Z[pos].node2 = node2; 167 | Z[pos].dist = dist; 168 | pos++; 169 | } 170 | 171 | node * operator[] (const t_index idx) const { return Z + idx; } 172 | 173 | /* Define several methods to postprocess the distances. All these functions 174 | are monotone, so they do not change the sorted order of distances. */ 175 | 176 | void sqrt() const { 177 | for (t_index i=0; i succ; 231 | 232 | private: 233 | auto_array_ptr pred; 234 | // Not necessarily private, we just do not need it in this instance. 235 | 236 | public: 237 | doubly_linked_list(const t_index size) 238 | // Initialize to the given size. 239 | : succ(size+1), pred(size+1) 240 | { 241 | for (t_index i=0; i(2*N-3-(r_))*(r_)>>1)+(c_)-1] ) 271 | // Z is an ((N-1)x4)-array 272 | #define Z_(_r, _c) (Z[(_r)*4 + (_c)]) 273 | 274 | /* 275 | Lookup function for a union-find data structure. 276 | 277 | The function finds the root of idx by going iteratively through all 278 | parent elements until a root is found. An element i is a root if 279 | nodes[i] is zero. To make subsequent searches faster, the entry for 280 | idx and all its parents is updated with the root element. 281 | */ 282 | class union_find { 283 | private: 284 | auto_array_ptr parent; 285 | t_index nextparent; 286 | 287 | public: 288 | void init(const t_index size) { 289 | parent.init(2*size-1, 0); 290 | nextparent = size; 291 | } 292 | 293 | t_index Find (t_index idx) const { 294 | if (parent[idx] !=0 ) { // a → b 295 | t_index p = idx; 296 | idx = parent[idx]; 297 | if (parent[idx] !=0 ) { // a → b → c 298 | do { 299 | idx = parent[idx]; 300 | } while (parent[idx] != 0); 301 | do { 302 | t_index tmp = parent[p]; 303 | parent[p] = idx; 304 | p = tmp; 305 | } while (parent[p] != idx); 306 | } 307 | } 308 | return idx; 309 | } 310 | 311 | void Union (const t_index node1, const t_index node2) { 312 | parent[node1] = parent[node2] = nextparent++; 313 | } 314 | }; 315 | 316 | static void MST_linkage_core(const t_index N, const t_float * const D, 317 | cluster_result & Z2) { 318 | /* 319 | N: integer, number of data points 320 | D: condensed distance matrix N*(N-1)/2 321 | Z2: output data structure 322 | 323 | The basis of this algorithm is an algorithm by Rohlf: 324 | 325 | F. James Rohlf, Hierarchical clustering using the minimum spanning tree, 326 | The Computer Journal, vol. 16, 1973, p. 93–95. 327 | 328 | This implementation should handle Inf values correctly (designed to 329 | do so but not tested). 330 | 331 | This implementation avoids NaN if possible. It treats NaN as if it was 332 | greater than +Infinity, ie. whenever we find a non-NaN value, this is 333 | preferred in all the minimum-distance searches. 334 | */ 335 | t_index i; 336 | t_index idx2; 337 | doubly_linked_list active_nodes(N); 338 | auto_array_ptr d(N); 339 | 340 | t_index prev_node; 341 | t_float min; 342 | 343 | // first iteration 344 | idx2 = 1; 345 | min = d[1] = D[0]; 346 | for (i=2; min!=min && i D_(i, prev_node)) 371 | d[i] = D_(i, prev_node); 372 | if (d[i] < min) { 373 | min = d[i]; 374 | idx2 = i; 375 | } 376 | } 377 | for (; min!=min && i D_(prev_node, i)) 383 | d[i] = D_(prev_node, i); 384 | if (d[i] < min) { 385 | min = d[i]; 386 | idx2 = i; 387 | } 388 | } 389 | Z2.append(prev_node, idx2, min); 390 | } 391 | } 392 | 393 | /* Functions for the update of the dissimilarity array */ 394 | 395 | inline static void f_single( t_float * const b, const t_float a ) { 396 | if (*b > a) *b = a; 397 | } 398 | inline static void f_complete( t_float * const b, const t_float a ) { 399 | if (*b < a) *b = a; 400 | } 401 | inline static void f_average( t_float * const b, const t_float a, const t_float s, const t_float t) { 402 | *b = s*a + t*(*b); 403 | } 404 | inline static void f_weighted( t_float * const b, const t_float a) { 405 | *b = (a+*b)/2; 406 | } 407 | inline static void f_ward( t_float * const b, const t_float a, const t_float c, const t_float s, const t_float t, const t_float v) { 408 | *b = ( (v+s)*a - v*c + (v+t)*(*b) ) / (s+t+v); 409 | //*b = a+(*b)-(t*a+s*(*b)+v*c)/(s+t+v); 410 | } 411 | inline static void f_centroid( t_float * const b, const t_float a, const t_float stc, const t_float s, const t_float t) { 412 | *b = s*a + t*(*b) - stc; 413 | } 414 | inline static void f_median( t_float * const b, const t_float a, const t_float c_4) { 415 | *b = (a+(*b))/2 - c_4; 416 | } 417 | 418 | template 419 | static void NN_chain_core(const t_index N, t_float * const D, t_members * const members, cluster_result & Z2) { 420 | /* 421 | N: integer 422 | D: condensed distance matrix N*(N-1)/2 423 | Z2: output data structure 424 | 425 | This is the NN-chain algorithm, described on page 86 in the following book: 426 | 427 | Fionn Murtagh, Multidimensional Clustering Algorithms, 428 | Vienna, Würzburg: Physica-Verlag, 1985. 429 | 430 | This implementation does not give defined results when NaN or Inf values 431 | are present in the array D. 432 | */ 433 | t_index i; 434 | 435 | auto_array_ptr NN_chain(N); 436 | t_index NN_chain_tip = 0; 437 | 438 | t_index idx1, idx2; 439 | 440 | t_float size1, size2; 441 | doubly_linked_list active_nodes(N); 442 | 443 | t_float min; 444 | 445 | for (t_index j=0; jidx2) { 490 | t_index tmp = idx1; 491 | idx1 = idx2; 492 | idx2 = tmp; 493 | } 494 | 495 | if (method==METHOD_METR_AVERAGE || 496 | method==METHOD_METR_WARD) { 497 | size1 = static_cast(members[idx1]); 498 | size2 = static_cast(members[idx2]); 499 | members[idx2] += members[idx1]; 500 | } 501 | 502 | // Remove the smaller index from the valid indices (active_nodes). 503 | active_nodes.remove(idx1); 504 | 505 | switch (method) { 506 | case METHOD_METR_SINGLE: 507 | /* 508 | Single linkage. 509 | 510 | Characteristic: new distances are never longer than the old distances. 511 | */ 512 | // Update the distance matrix in the range [start, idx1). 513 | for (i=active_nodes.start; i(members[i]); 586 | for (i=active_nodes.start; i(members[i]) ); 589 | // Update the distance matrix in the range (idx1, idx2). 590 | for (; i(members[i]) ); 593 | // Update the distance matrix in the range (idx2, N). 594 | for (i=active_nodes.succ[idx2]; i(members[i]) ); 597 | break; 598 | } 599 | } 600 | } 601 | 602 | class binary_min_heap { 603 | /* 604 | Class for a binary min-heap. The data resides in an array A. The elements of A 605 | are not changed but two lists I and R of indices are generated which point to 606 | elements of A and backwards. 607 | 608 | The heap tree structure is 609 | 610 | H[2*i+1] H[2*i+2] 611 | \ / 612 | \ / 613 | ≤ ≤ 614 | \ / 615 | \ / 616 | H[i] 617 | 618 | where the children must be less or equal than their parent. Thus, H[0] contains 619 | the minimum. The lists I and R are made such that H[i] = A[I[i]] and R[I[i]] = i. 620 | 621 | This implementation avoids NaN if possible. It treats NaN as if it was 622 | greater than +Infinity, ie. whenever we find a non-NaN value, this is 623 | preferred in all comparisons. 624 | */ 625 | private: 626 | t_float * A; 627 | t_index size; 628 | auto_array_ptr I; 629 | auto_array_ptr R; 630 | 631 | public: 632 | binary_min_heap(const t_index size) 633 | : I(size), R(size) 634 | { // Allocate memory and initialize the lists I and R to the identity. This does 635 | // not make it a heap. Call heapify afterwards! 636 | this->size = size; 637 | for (t_index i=0; isize = size1; 646 | for (t_index i=0; iA = A; 661 | for (idx=(size>>1); idx>0; ) { 662 | idx--; 663 | update_geq_(idx); 664 | } 665 | } 666 | 667 | inline t_index argmin() const { 668 | // Return the minimal element. 669 | return I[0]; 670 | } 671 | 672 | void heap_pop() { 673 | // Remove the minimal element from the heap. 674 | size--; 675 | I[0] = I[size]; 676 | R[I[0]] = 0; 677 | update_geq_(0); 678 | } 679 | 680 | void remove(t_index idx) { 681 | // Remove an element from the heap. 682 | size--; 683 | R[I[size]] = R[idx]; 684 | I[R[idx]] = I[size]; 685 | if ( H(size)<=A[idx] || A[idx]!=A[idx] ) { 686 | update_leq_(R[idx]); 687 | } 688 | else { 689 | update_geq_(R[idx]); 690 | } 691 | } 692 | 693 | void replace ( const t_index idxold, const t_index idxnew, const t_float val) { 694 | R[idxnew] = R[idxold]; 695 | I[R[idxnew]] = idxnew; 696 | if (val<=A[idxold] || A[idxold]!=A[idxold]) // avoid NaN! ???????????????????? 697 | update_leq(idxnew, val); 698 | else 699 | update_geq(idxnew, val); 700 | } 701 | 702 | void update ( const t_index idx, const t_float val ) const { 703 | // Update the element A[i] with val and re-arrange the indices the preserve the 704 | // heap condition. 705 | if (val<=A[idx] || A[idx]!=A[idx]) // avoid NaN! ???????????????????? 706 | update_leq(idx, val); 707 | else 708 | update_geq(idx, val); 709 | } 710 | 711 | void update_leq ( const t_index idx, const t_float val ) const { 712 | // Use this when the new value is not more than the old value. 713 | A[idx] = val; 714 | update_leq_(R[idx]); 715 | } 716 | 717 | void update_geq ( const t_index idx, const t_float val ) const { 718 | // Use this when the new value is not less than the old value. 719 | A[idx] = val; 720 | update_geq_(R[idx]); 721 | } 722 | 723 | private: 724 | void update_leq_ (t_index i) const { 725 | t_index j; 726 | for ( ; (i>0) && ( H(i)>1) || H(j)!=H(j) ); i=j) 727 | // avoid NaN! 728 | heap_swap(i,j); 729 | } 730 | 731 | void update_geq_ (t_index i) const { 732 | t_index j; 733 | for ( ; (j=2*i+1)=H(i) || H(j)!=H(j) ) { // avoid Nan! 735 | j++; 736 | if ( j>=size || H(j)>=H(i) || H(j)!=H(j) ) break; // avoid NaN! 737 | } 738 | else if ( j+1 759 | static void generic_linkage(const t_index N, t_float * const D, t_members * const members, cluster_result & Z2) { 760 | /* 761 | N: integer, number of data points 762 | D: condensed distance matrix N*(N-1)/2 763 | Z2: output data structure 764 | 765 | This implementation does not give defined results when NaN or Inf values 766 | are present in the array D. 767 | */ 768 | 769 | const t_index N_1 = N-1; 770 | t_index i, j; // loop variables 771 | t_index idx1, idx2; // row and column indices 772 | 773 | auto_array_ptr n_nghbr(N_1); // array of nearest neighbors 774 | auto_array_ptr mindist(N_1); // distances to the nearest neighbors 775 | auto_array_ptr row_repr(N); // row_repr[i]: node number that the i-th row 776 | // represents 777 | doubly_linked_list active_nodes(N); 778 | binary_min_heap nn_distances(N_1); // minimum heap structure for the distance 779 | // to the nearest neighbor of each point 780 | 781 | t_index node1, node2; // node numbers in the output 782 | t_float size1, size2; // and their cardinalities 783 | 784 | t_float min; // minimum and row index for nearest-neighbor search 785 | t_index idx; 786 | 787 | for (i=0; ii} D(i,j) for i in range(N-1) 795 | t_float * DD = D; 796 | for (i=0; ii} D(i,j) 824 | 825 | Normally, we have equality. However, this minimum may become invalid due to 826 | the updates in the distance matrix. The rules are: 827 | 828 | 1) If mindist[i] is equal to D(i, n_nghbr[i]), this is the correct minimum 829 | and n_nghbr[i] is a nearest neighbor. 830 | 831 | 2) If mindist[i] is smaller than D(i, n_nghbr[i]), this might not be the 832 | correct minimum. The minimum needs to be recomputed. 833 | 834 | 3) mindist[i] is never bigger than the true minimum. Hence, we never miss the 835 | true minimum if we take the smallest mindist entry, re-compute the value if 836 | necessary (thus maybe increasing it) and looking for the now smallest 837 | mindist entry until a valid minimal entry is found. This step is done in the 838 | lines below. 839 | 840 | The update process for D below takes care that these rules are fulfilled. This 841 | makes sure that the minima in the rows D(i,i+1:)of D are re-calculated when 842 | necessary but re-calculation is avoided whenever possible. 843 | 844 | The re-calculation of the minima makes the worst-case runtime of this algorithm 845 | cubic in N. We avoid this whenever possible, and in most cases the runtime 846 | appears to be quadratic. 847 | */ 848 | idx1 = nn_distances.argmin(); 849 | if (method != METHOD_METR_SINGLE) { 850 | while ( D_(idx1, n_nghbr[idx1]) > mindist[idx1] ) { 851 | // Recompute the minimum mindist[idx1] and n_nghbr[idx1]. 852 | n_nghbr[idx1] = j = active_nodes.succ[idx1]; // exists, maximally N-1 853 | min = D_(idx1,j); 854 | for (j=active_nodes.succ[j]; j(members[idx1]); 878 | size2 = static_cast(members[idx2]); 879 | members[idx2] += members[idx1]; 880 | } 881 | Z2.append(node1, node2, mindist[idx1]); 882 | 883 | // Remove idx1 from the list of active indices (active_nodes). 884 | active_nodes.remove(idx1); 885 | // Index idx2 now represents the new (merged) node with label N+i. 886 | row_repr[idx2] = N+i; 887 | 888 | // Update the distance matrix 889 | switch (method) { 890 | case METHOD_METR_SINGLE: 891 | /* 892 | Single linkage. 893 | 894 | Characteristic: new distances are never longer than the old distances. 895 | */ 896 | // Update the distance matrix in the range [start, idx1). 897 | for (j=active_nodes.start; j(members[j]) ); 1033 | if (n_nghbr[j] == idx1) 1034 | n_nghbr[j] = idx2; 1035 | } 1036 | // Update the distance matrix in the range (idx1, idx2). 1037 | for (; j(members[j]) ); 1040 | if (D_(j, idx2)(members[j]) ); 1050 | min = D_(idx2,j); 1051 | for (j=active_nodes.succ[j]; j(members[j]) ); 1054 | if (D_(idx2,j) 1158 | static void MST_linkage_core_vector(const t_index N, 1159 | t_dissimilarity & dist, 1160 | cluster_result & Z2) { 1161 | /* 1162 | N: integer, number of data points 1163 | dist: function pointer to the metric 1164 | Z2: output data structure 1165 | 1166 | The basis of this algorithm is an algorithm by Rohlf: 1167 | 1168 | F. James Rohlf, Hierarchical clustering using the minimum spanning tree, 1169 | The Computer Journal, vol. 16, 1973, p. 93–95. 1170 | 1171 | This implementation should handle Inf values correctly (designed to 1172 | do so but not tested). 1173 | 1174 | This implementation avoids NaN if possible. It treats NaN as if it was 1175 | greater than +Infinity, ie. whenever we find a non-NaN value, this is 1176 | preferred in all the minimum-distance searches. 1177 | */ 1178 | t_index i; 1179 | t_index idx2; 1180 | doubly_linked_list active_nodes(N); 1181 | auto_array_ptr d(N); 1182 | 1183 | t_index prev_node; 1184 | t_float min; 1185 | 1186 | // first iteration 1187 | idx2 = 1; 1188 | min = d[1] = dist(0,1); 1189 | for (i=2; min!=min && i tmp) 1219 | d[i] = tmp; 1220 | if (d[i] < min) { 1221 | min = d[i]; 1222 | idx2 = i; 1223 | } 1224 | } 1225 | Z2.append(prev_node, idx2, min); 1226 | } 1227 | } 1228 | 1229 | template 1230 | static void generic_linkage_vector(const t_index N, 1231 | t_dissimilarity & dist, 1232 | cluster_result & Z2) { 1233 | /* 1234 | N: integer, number of data points 1235 | dist: function pointer to the metric 1236 | Z2: output data structure 1237 | 1238 | This algorithm is valid for the distance update methods 1239 | "Ward", "centroid" and "median" only! 1240 | 1241 | This implementation does not give defined results when NaN or Inf values 1242 | are returned by the distance function. 1243 | */ 1244 | const t_index N_1 = N-1; 1245 | t_index i, j; // loop variables 1246 | t_index idx1, idx2; // row and column indices 1247 | 1248 | auto_array_ptr n_nghbr(N_1); // array of nearest neighbors 1249 | auto_array_ptr mindist(N_1); // distances to the nearest neighbors 1250 | auto_array_ptr row_repr(N); // row_repr[i]: node number that the i-th 1251 | // row represents 1252 | doubly_linked_list active_nodes(N); 1253 | binary_min_heap nn_distances(N_1); // minimum heap structure for the distance 1254 | // to the nearest neighbor of each point 1255 | 1256 | t_index node1, node2; // node numbers in the output 1257 | t_float min; // minimum and row index for nearest-neighbor search 1258 | 1259 | for (i=0; ii} D(i,j) for i in range(N-1) 1267 | for (i=0; i 1447 | static void generic_linkage_vector_alternative(const t_index N, 1448 | t_dissimilarity & dist, 1449 | cluster_result & Z2) { 1450 | /* 1451 | N: integer, number of data points 1452 | dist: function pointer to the metric 1453 | Z2: output data structure 1454 | 1455 | This algorithm is valid for the distance update methods 1456 | "Ward", "centroid" and "median" only! 1457 | 1458 | This implementation does not give defined results when NaN or Inf values 1459 | are returned by the distance function. 1460 | */ 1461 | const t_index N_1 = N-1; 1462 | t_index i, j=0; // loop variables 1463 | t_index idx1, idx2; // row and column indices 1464 | 1465 | auto_array_ptr n_nghbr(2*N-2); // array of nearest neighbors 1466 | auto_array_ptr mindist(2*N-2); // distances to the nearest neighbors 1467 | 1468 | doubly_linked_list active_nodes(N+N_1); 1469 | binary_min_heap nn_distances(N_1, 2*N-2, 1); // minimum heap structure for the 1470 | // distance to the nearest neighbor of each point 1471 | 1472 | t_float min; // minimum for nearest-neighbor searches 1473 | 1474 | // Initialize the minimal distances: 1475 | // Find the nearest neighbor of each point. 1476 | // n_nghbr[i] = argmin_{jZ = Z; 1650 | pos = 0; 1651 | } 1652 | 1653 | void append(const t_index node1, const t_index node2, const t_float dist, const t_float size) { 1654 | if (node1(node1); 1656 | Z[pos++] = static_cast(node2); 1657 | } 1658 | else { 1659 | Z[pos++] = static_cast(node2); 1660 | Z[pos++] = static_cast(node1); 1661 | } 1662 | Z[pos++] = dist; 1663 | Z[pos++] = size; 1664 | } 1665 | }; 1666 | 1667 | /* 1668 | Generate the specific output format for a dendrogram from the 1669 | clustering output. 1670 | 1671 | The list of merging steps can be sorted or unsorted. 1672 | */ 1673 | 1674 | // The size of a node is either 1 (a single point) or is looked up from 1675 | // one of the clusters. 1676 | #define size_(r_) ( ((r_ 1679 | static void generate_dendrogram(t_float * const Z, cluster_result & Z2, const t_index N) { 1680 | //fprintf(stderr, " entering generate_dendrogram\n"); 1681 | 1682 | // The array "nodes" is a union-find data structure for the cluster 1683 | // identites (only needed for unsorted cluster_result input). 1684 | union_find nodes; 1685 | if (!sorted) { 1686 | std::stable_sort(Z2[0], Z2[N-1]); 1687 | nodes.init(N); 1688 | } 1689 | 1690 | linkage_output output(Z); 1691 | t_index node1, node2; 1692 | 1693 | for (t_index i=0; inode1; 1697 | node2 = Z2[i]->node2; 1698 | } 1699 | else { 1700 | // Find the cluster identifiers for these points. 1701 | node1 = nodes.Find(Z2[i]->node1); 1702 | node2 = nodes.Find(Z2[i]->node2); 1703 | // Merge the nodes in the union-find data structure by making them 1704 | // children of a new node. 1705 | nodes.Union(node1, node2); 1706 | } 1707 | //fprintf(stderr, " node1 = %d , node2 = %d , Z2[i]->dist = %f , size_(node1)+size_(node2) = %f", node1, node2, Z2[i]->dist, size_(node1)+size_(node2)); 1708 | output.append(node1, node2, Z2[i]->dist, size_(node1)+size_(node2)); 1709 | } 1710 | } 1711 | 1712 | /* 1713 | Clustering on vector data 1714 | */ 1715 | 1716 | enum { 1717 | // metrics 1718 | METRIC_EUCLIDEAN = 0, 1719 | METRIC_MINKOWSKI = 1, 1720 | METRIC_CITYBLOCK = 2, 1721 | METRIC_SEUCLIDEAN = 3, 1722 | METRIC_SQEUCLIDEAN = 4, 1723 | METRIC_COSINE = 5, 1724 | METRIC_HAMMING = 6, 1725 | METRIC_JACCARD = 7, 1726 | METRIC_CHEBYCHEV = 8, 1727 | METRIC_CANBERRA = 9, 1728 | METRIC_BRAYCURTIS = 10, 1729 | METRIC_MAHALANOBIS = 11, 1730 | METRIC_YULE = 12, 1731 | METRIC_MATCHING = 13, 1732 | METRIC_DICE = 14, 1733 | METRIC_ROGERSTANIMOTO = 15, 1734 | METRIC_RUSSELLRAO = 16, 1735 | METRIC_SOKALSNEATH = 17, 1736 | METRIC_KULSINSKI = 18, 1737 | METRIC_USER = 19, 1738 | METRIC_INVALID = 20, // sentinel 1739 | METRIC_JACCARD_BOOL = 21 // separate function for Jaccard metric on Boolean 1740 | }; // input data 1741 | 1742 | /* 1743 | This class handles all the information about the dissimilarity 1744 | computation. 1745 | */ 1746 | 1747 | class dissimilarity { 1748 | private: 1749 | t_float * Xa; 1750 | auto_array_ptr Xnew; 1751 | std::ptrdiff_t dim; // size_t saves many statis_cast<> in products 1752 | t_index N; 1753 | t_index * members; 1754 | void (cluster_result::*postprocessfn) (const t_float) const; 1755 | t_float postprocessarg; 1756 | 1757 | t_float (dissimilarity::*distfn) (const t_index, const t_index) const; 1758 | 1759 | auto_array_ptr precomputed; 1760 | t_float * precomputed2; 1761 | 1762 | t_float * V; 1763 | const t_float * V_data; 1764 | 1765 | public: 1766 | dissimilarity (t_float * const Xa, int N, int dim, 1767 | t_index * const members, 1768 | const unsigned char method, 1769 | const unsigned char metric, 1770 | bool temp_point_array) 1771 | : Xa(Xa), 1772 | dim(dim), 1773 | N(N), 1774 | members(members), 1775 | postprocessfn(NULL), 1776 | V(NULL) 1777 | { 1778 | //fprintf(stderr, " constructing dissimilarity\n"); 1779 | //for (int i=0; i<8; i++) 1780 | //fprintf(stderr, " my vector %f \n", Xa[i]); 1781 | switch (method) { 1782 | case METHOD_METR_SINGLE: 1783 | postprocessfn = NULL; // default 1784 | switch (metric) { 1785 | case METRIC_EUCLIDEAN: 1786 | set_euclidean(); 1787 | break; 1788 | case METRIC_SEUCLIDEAN: 1789 | /*if (extraarg==NULL) { 1790 | PyErr_SetString(PyExc_TypeError, 1791 | "The 'seuclidean' metric needs a variance parameter."); 1792 | throw pythonerror(); 1793 | } 1794 | V = reinterpret_cast(PyArray_FromAny(extraarg, 1795 | PyArray_DescrFromType(NPY_DOUBLE), 1796 | 1, 1, 1797 | NPY_ARRAY_CARRAY_RO, 1798 | NULL)); 1799 | if (PyErr_Occurred()) { 1800 | throw pythonerror(); 1801 | } 1802 | if (PyArray_DIM(V, 0)!=dim) { 1803 | PyErr_SetString(PyExc_ValueError, 1804 | "The variance vector must have the same dimensionality as the data."); 1805 | throw pythonerror(); 1806 | } 1807 | V_data = reinterpret_cast(PyArray_DATA(V)); 1808 | distfn = &dissimilarity::seuclidean; 1809 | postprocessfn = &cluster_result::sqrt; 1810 | break;*/ 1811 | case METRIC_SQEUCLIDEAN: 1812 | distfn = &dissimilarity::sqeuclidean; 1813 | break; 1814 | case METRIC_CITYBLOCK: 1815 | set_cityblock(); 1816 | break; 1817 | case METRIC_CHEBYCHEV: 1818 | set_chebychev(); 1819 | break; 1820 | case METRIC_MINKOWSKI: 1821 | //set_minkowski(extraarg); 1822 | break; 1823 | case METRIC_COSINE: 1824 | distfn = &dissimilarity::cosine; 1825 | postprocessfn = &cluster_result::plusone; 1826 | // precompute norms 1827 | precomputed.init(N); 1828 | for (t_index i=0; i(dim); 1840 | break; 1841 | case METRIC_JACCARD: 1842 | distfn = &dissimilarity::jaccard; 1843 | break; 1844 | case METRIC_CANBERRA: 1845 | distfn = &dissimilarity::canberra; 1846 | break; 1847 | case METRIC_BRAYCURTIS: 1848 | distfn = &dissimilarity::braycurtis; 1849 | break; 1850 | case METRIC_MAHALANOBIS: 1851 | /*if (extraarg==NULL) { 1852 | PyErr_SetString(PyExc_TypeError, 1853 | "The 'mahalanobis' metric needs a parameter for the inverse covariance."); 1854 | throw pythonerror(); 1855 | } 1856 | V = reinterpret_cast(PyArray_FromAny(extraarg, 1857 | PyArray_DescrFromType(NPY_DOUBLE), 1858 | 2, 2, 1859 | NPY_ARRAY_CARRAY_RO, 1860 | NULL)); 1861 | if (PyErr_Occurred()) { 1862 | throw pythonerror(); 1863 | } 1864 | if (PyArray_DIM(V, 0)!=N || PyArray_DIM(V, 1)!=dim) { 1865 | PyErr_SetString(PyExc_ValueError, 1866 | "The inverse covariance matrix has the wrong size."); 1867 | throw pythonerror(); 1868 | } 1869 | V_data = reinterpret_cast(PyArray_DATA(V)); 1870 | distfn = &dissimilarity::mahalanobis; 1871 | postprocessfn = &cluster_result::sqrt; 1872 | break;*/ 1873 | case METRIC_YULE: 1874 | distfn = &dissimilarity::yule; 1875 | break; 1876 | case METRIC_MATCHING: 1877 | distfn = &dissimilarity::matching; 1878 | postprocessfn = &cluster_result::divide; 1879 | postprocessarg = static_cast(dim); 1880 | break; 1881 | case METRIC_DICE: 1882 | distfn = &dissimilarity::dice; 1883 | break; 1884 | case METRIC_ROGERSTANIMOTO: 1885 | distfn = &dissimilarity::rogerstanimoto; 1886 | break; 1887 | case METRIC_RUSSELLRAO: 1888 | distfn = &dissimilarity::russellrao; 1889 | postprocessfn = &cluster_result::divide; 1890 | postprocessarg = static_cast(dim); 1891 | break; 1892 | case METRIC_SOKALSNEATH: 1893 | distfn = &dissimilarity::sokalsneath; 1894 | break; 1895 | case METRIC_KULSINSKI: 1896 | distfn = &dissimilarity::kulsinski; 1897 | postprocessfn = &cluster_result::plusone; 1898 | precomputed.init(N); 1899 | for (t_index i=0; i(sum); 1905 | } 1906 | break; 1907 | default: // case METRIC_JACCARD_BOOL: 1908 | distfn = &dissimilarity::jaccard_bool; 1909 | } 1910 | break; 1911 | 1912 | case METHOD_METR_WARD: 1913 | postprocessfn = &cluster_result::sqrtdouble; 1914 | break; 1915 | 1916 | default: 1917 | postprocessfn = &cluster_result::sqrt; 1918 | } 1919 | 1920 | if (temp_point_array) { 1921 | Xnew.init((N-1)*dim); 1922 | } 1923 | //fprintf(stderr, " first distance %f \n", (this->*distfn)(0,1)); 1924 | } 1925 | 1926 | ~dissimilarity() { 1927 | free(V); 1928 | } 1929 | 1930 | inline t_float operator () (const t_index i, const t_index j) const { 1931 | return (this->*distfn)(i,j); 1932 | } 1933 | 1934 | inline t_float X (const t_index i, const t_index j) const { 1935 | return Xa[i*dim+j]; 1936 | } 1937 | 1938 | inline bool Xb (const t_index i, const t_index j) const { 1939 | return reinterpret_cast(Xa)[i*dim+j]; 1940 | } 1941 | 1942 | inline t_float * Xptr(const t_index i, const t_index j) const { 1943 | return Xa+i*dim+j; 1944 | } 1945 | 1946 | void merge(const t_index i, const t_index j, const t_index newnode) const { 1947 | t_float const * const Pi = i(members[i]) + 1951 | Pj[k]*static_cast(members[j])) / 1952 | static_cast(members[i]+members[j]); 1953 | } 1954 | members[newnode] = members[i]+members[j]; 1955 | } 1956 | 1957 | void merge_weighted(const t_index i, const t_index j, const t_index newnode) const { 1958 | t_float const * const Pi = i(members[i]) + 1970 | Pj[k]*static_cast(members[j])) / 1971 | static_cast(members[i]+members[j]); 1972 | } 1973 | members[j] += members[i]; 1974 | } 1975 | 1976 | void merge_inplace_weighted(const t_index i, const t_index j) const { 1977 | t_float const * const Pi = Xa+i*dim; 1978 | t_float * const Pj = Xa+j*dim; 1979 | for(t_index k=0; k(members[i]); 1992 | t_float mj = static_cast(members[j]); 1993 | return sqeuclidean(i,j)*mi*mj/(mi+mj); 1994 | } 1995 | 1996 | inline t_float ward_initial(const t_index i, const t_index j) const { 1997 | // alias for sqeuclidean 1998 | // Factor 2!!! 1999 | return sqeuclidean(i,j); 2000 | } 2001 | 2002 | inline static t_float ward_initial_conversion(const t_float min) { 2003 | return min*.5; 2004 | } 2005 | 2006 | inline t_float ward_extended(const t_index i, const t_index j) const { 2007 | t_float mi = static_cast(members[i]); 2008 | t_float mj = static_cast(members[j]); 2009 | return sqeuclidean_extended(i,j)*mi*mj/(mi+mj); 2010 | } 2011 | 2012 | t_float sqeuclidean(const t_index i, const t_index j) const { 2013 | t_float sum = 0; 2014 | //fprintf(stderr, " entering sqeuclidean\n"); 2015 | /* 2016 | for (t_index k=0; kmax) { 2091 | max = diff; 2092 | } 2093 | } 2094 | return max; 2095 | } 2096 | 2097 | t_float cosine(const t_index i, const t_index j) const { 2098 | t_float sum = 0; 2099 | for (t_index k=0; k(sum1) / static_cast(sum2); 2123 | } 2124 | 2125 | t_float canberra(const t_index i, const t_index j) const { 2126 | t_float sum = 0; 2127 | for (t_index k=0; k(2*NTFFT) / static_cast(NTFFT + NFFTT); 2200 | } 2201 | 2202 | // Prevent a zero denominator for equal vectors. 2203 | t_float dice(const t_index i, const t_index j) const { 2204 | nbool_correspond(i, j); 2205 | return (NXO==0) ? 0 : 2206 | static_cast(NXO) / static_cast(NXO+2*NTT); 2207 | } 2208 | 2209 | t_float rogerstanimoto(const t_index i, const t_index j) const { 2210 | nbool_correspond_xo(i, j); 2211 | return static_cast(2*NXO) / static_cast(NXO+dim); 2212 | } 2213 | 2214 | t_float russellrao(const t_index i, const t_index j) const { 2215 | nbool_correspond_tt(i, j); 2216 | return static_cast(dim-NTT); 2217 | } 2218 | 2219 | // Prevent a zero denominator for equal vectors. 2220 | t_float sokalsneath(const t_index i, const t_index j) const { 2221 | nbool_correspond(i, j); 2222 | return (NXO==0) ? 0 : 2223 | static_cast(2*NXO) / static_cast(NTT+2*NXO); 2224 | } 2225 | 2226 | t_float kulsinski(const t_index i, const t_index j) const { 2227 | nbool_correspond_tt(i, j); 2228 | return static_cast(NTT) * (precomputed[i] + precomputed[j]); 2229 | } 2230 | 2231 | // 'matching' distance = Hamming distance 2232 | t_float matching(const t_index i, const t_index j) const { 2233 | nbool_correspond_xo(i, j); 2234 | return static_cast(NXO); 2235 | } 2236 | 2237 | // Prevent a zero denominator for equal vectors. 2238 | t_float jaccard_bool(const t_index i, const t_index j) const { 2239 | nbool_correspond(i, j); 2240 | return (NXO==0) ? 0 : 2241 | static_cast(NXO) / static_cast(NXO+NTT); 2242 | } 2243 | }; 2244 | 2245 | 2246 | /*Clustering for the "stored matrix approach": the input is the array of pairwise dissimilarities*/ 2247 | static int linkage(t_float *D, int N, t_float * Z, unsigned char method) 2248 | { 2249 | 2250 | try{ 2251 | 2252 | if (N < 1 ) { 2253 | // N must be at least 1. 2254 | //fprintf(stderr,"At least one element is needed for clustering."); 2255 | return -1; 2256 | } 2257 | 2258 | // (1) 2259 | // The biggest index used below is 4*(N-2)+3, as an index to Z. This must fit 2260 | // into the data type used for indices. 2261 | // (2) 2262 | // The largest representable integer, without loss of precision, by a floating 2263 | // point number of type t_float is 2^T_FLOAT_MANT_DIG. Here, we make sure that 2264 | // all cluster labels from 0 to 2N-2 in the output can be accurately represented 2265 | // by a floating point number. 2266 | if (N > MAX_INDEX/4 || (N-1)>>(T_FLOAT_MANT_DIG-1) > 0) { 2267 | //fprintf(stderr,"Data is too big, index overflow."); 2268 | return -1; 2269 | } 2270 | 2271 | if (method>METHOD_METR_MEDIAN) { 2272 | //fprintf(stderr,"Invalid method index."); 2273 | return -1; 2274 | } 2275 | 2276 | 2277 | cluster_result Z2(N-1); 2278 | auto_array_ptr members; 2279 | // For these methods, the distance update formula needs the number of 2280 | // data points in a cluster. 2281 | if (method==METHOD_METR_AVERAGE || 2282 | method==METHOD_METR_WARD || 2283 | method==METHOD_METR_CENTROID) { 2284 | members.init(N, 1); 2285 | } 2286 | // Operate on squared distances for these methods. 2287 | if (method==METHOD_METR_WARD || 2288 | method==METHOD_METR_CENTROID || 2289 | method==METHOD_METR_MEDIAN) { 2290 | for (std::ptrdiff_t i=0; i < static_cast(N)*(N-1)/2; i++) 2291 | D[i] *= D[i]; 2292 | } 2293 | 2294 | switch (method) { 2295 | case METHOD_METR_SINGLE: 2296 | MST_linkage_core(N, D, Z2); 2297 | break; 2298 | case METHOD_METR_COMPLETE: 2299 | NN_chain_core(N, D, NULL, Z2); 2300 | break; 2301 | case METHOD_METR_AVERAGE: 2302 | NN_chain_core(N, D, members, Z2); 2303 | break; 2304 | case METHOD_METR_WEIGHTED: 2305 | NN_chain_core(N, D, NULL, Z2); 2306 | break; 2307 | case METHOD_METR_WARD: 2308 | NN_chain_core(N, D, members, Z2); 2309 | break; 2310 | case METHOD_METR_CENTROID: 2311 | generic_linkage(N, D, members, Z2); 2312 | break; 2313 | default: // case METHOD_METR_MEDIAN 2314 | generic_linkage(N, D, NULL, Z2); 2315 | } 2316 | 2317 | if (method==METHOD_METR_WARD || 2318 | method==METHOD_METR_CENTROID || 2319 | method==METHOD_METR_MEDIAN) { 2320 | Z2.sqrt(); 2321 | } 2322 | 2323 | if (method==METHOD_METR_CENTROID ||method==METHOD_METR_MEDIAN) { 2324 | generate_dendrogram(Z, Z2, N); 2325 | } 2326 | else { 2327 | generate_dendrogram(Z, Z2, N); 2328 | } 2329 | 2330 | } // try 2331 | catch (const std::bad_alloc&) { 2332 | //fprintf(stderr, "Not enough Memory"); 2333 | return -1; 2334 | } 2335 | catch(const std::exception& e){ 2336 | //fprintf(stderr, "Uncaught exception"); 2337 | return -1; 2338 | } 2339 | catch(...){ 2340 | //fprintf(stderr, "C++ exception (unknown reason). Please send a bug report."); 2341 | return -1; 2342 | } 2343 | return 0; 2344 | 2345 | } 2346 | /*Clustering for the "stored data approach": the input are points in a vector space.*/ 2347 | static int linkage_vector(t_float *X, int N, int dim, t_float * Z, unsigned char method, unsigned char metric) { 2348 | 2349 | //fprintf(stderr, "entering linkage_vector\n"); 2350 | //for (int ii=0; ii<8; ii++) 2351 | //fprintf(stderr, " my vector %f \n", X[ii]); 2352 | 2353 | try{ 2354 | 2355 | if (N < 1 ) { 2356 | // N must be at least 1. 2357 | //fprintf(stderr,"At least one element is needed for clustering."); 2358 | return -1; 2359 | } 2360 | 2361 | if (dim < 1 ) { 2362 | //fprintf(stderr,"Invalid dimension of the data set."); 2363 | return -1; 2364 | } 2365 | 2366 | // (1) 2367 | // The biggest index used below is 4*(N-2)+3, as an index to Z. This must fit 2368 | // into the data type used for indices. 2369 | // (2) 2370 | // The largest representable integer, without loss of precision, by a floating 2371 | // point number of type t_float is 2^T_FLOAT_MANT_DIG. Here, we make sure that 2372 | // all cluster labels from 0 to 2N-2 in the output can be accurately represented 2373 | // by a floating point number. 2374 | if (N > MAX_INDEX/4 || (N-1)>>(T_FLOAT_MANT_DIG-1) > 0) { 2375 | //fprintf(stderr,"Data is too big, index overflow."); 2376 | return -1; 2377 | } 2378 | 2379 | cluster_result Z2(N-1); 2380 | 2381 | auto_array_ptr members; 2382 | if (method==METHOD_METR_WARD || method==METHOD_METR_CENTROID) { 2383 | members.init(2*N-1, 1); 2384 | } 2385 | 2386 | if ((method!=METHOD_METR_SINGLE && metric!=METRIC_EUCLIDEAN) || 2387 | metric>=METRIC_INVALID) { 2388 | //fprintf(stderr, "Invalid metric index."); 2389 | return -1; 2390 | } 2391 | 2392 | /*if (PyArray_ISBOOL(X)) { 2393 | if (metric==METRIC_HAMMING) { 2394 | metric = METRIC_MATCHING; // Alias 2395 | } 2396 | if (metric==METRIC_JACCARD) { 2397 | metric = METRIC_JACCARD_BOOL; 2398 | } 2399 | }*/ 2400 | 2401 | /* temp_point_array must be true if the alternative algorithm 2402 | is used below (currently for the centroid and median methods). */ 2403 | bool temp_point_array = (method==METHOD_METR_CENTROID || 2404 | method==METHOD_METR_MEDIAN); 2405 | 2406 | dissimilarity dist(X, N, dim, members, method, metric, temp_point_array); 2407 | 2408 | // TODO lluis: just convert the dist into a sparse matrix like you do with D (for the co_occurrence mat) DistMatrix, and then you can call : 2409 | // NN_chain_core(N, DistMatrix, NULL, Z2); (or whatever) 2410 | 2411 | if (method!=METHOD_METR_SINGLE && 2412 | method!=METHOD_METR_WARD && 2413 | method!=METHOD_METR_CENTROID && 2414 | method!=METHOD_METR_MEDIAN) { 2415 | //fprintf(stderr, "Invalid method index."); 2416 | return -1; 2417 | } 2418 | 2419 | switch (method) { 2420 | case METHOD_METR_SINGLE: 2421 | //fprintf(stderr, " calling MST_linkage_core_vector %d \n", N); 2422 | MST_linkage_core_vector(N, dist, Z2); 2423 | break; 2424 | case METHOD_METR_WARD: 2425 | generic_linkage_vector(N, dist, Z2); 2426 | break; 2427 | case METHOD_METR_CENTROID: 2428 | generic_linkage_vector_alternative(N, dist, Z2); 2429 | break; 2430 | default: // case METHOD_METR_MEDIAN: 2431 | generic_linkage_vector_alternative(N, dist, Z2); 2432 | } 2433 | 2434 | if (method==METHOD_METR_WARD || 2435 | method==METHOD_METR_CENTROID) { 2436 | members.free(); 2437 | } 2438 | 2439 | dist.postprocess(Z2); 2440 | 2441 | //fprintf(stderr, " generating dendogram\n"); 2442 | if (method!=METHOD_METR_SINGLE) { 2443 | generate_dendrogram(Z, Z2, N); 2444 | } 2445 | else { 2446 | generate_dendrogram(Z, Z2, N); 2447 | } 2448 | 2449 | } // try 2450 | catch (const std::bad_alloc&) { 2451 | //fprintf(stderr, "Not enough Memory"); 2452 | return -1; 2453 | } 2454 | catch(const std::exception& e){ 2455 | //fprintf(stderr, "Uncaught exception"); 2456 | return -1; 2457 | } 2458 | catch(...){ 2459 | //fprintf(stderr, "C++ exception (unknown reason). Please send a bug report."); 2460 | return -1; 2461 | } 2462 | return 0; 2463 | } 2464 | 2465 | 2466 | 2467 | // Just a main test function to test fastcluter lib. it compiles with: 2468 | // g++ -O3 -Wall -pedantic -ansi -Wconversion -Wsign-conversion -Wextra clustering.cpp -o clustering 2469 | /* 2470 | int main() 2471 | { 2472 | unsigned int N = 1113; 2473 | t_float *X = (t_float*)malloc(2*N * sizeof(t_float)); 2474 | 2475 | for (unsigned int i=0; i *group, vector *regions) 21 | { 22 | assert(group != NULL); 23 | assert(group->size() > 1); 24 | assert(regions != NULL); 25 | 26 | Mat votes ( group->size(), 1, CV_32F, 1 ); 27 | Mat strokes ( group->size(), 1, CV_32F, 1 ); 28 | Mat aspect_ratios ( group->size(), 1, CV_32F, 1 ); 29 | Mat compactnesses ( group->size(), 1, CV_32F, 1 ); 30 | Mat nums_holes ( group->size(), 1, CV_32F, 1 ); 31 | Mat holeareas_area ( group->size(), 1, CV_32F, 1 ); 32 | 33 | for (int i=group->size()-1; i>=0; i--) 34 | { 35 | // TODO check first if regions->at(group->at(i)).votes_ has already been calculated !!! 36 | regions->at(group->at(i)).classifier_votes_ = character_classifier_->get_votes(®ions->at(group->at(i))); 37 | 38 | votes.at(i,0) = regions->at(group->at(i)).classifier_votes_; 39 | strokes.at(i,0) = (float)regions->at(group->at(i)).stroke_mean_; 40 | aspect_ratios.at(i,0) = (float)min( regions->at(group->at(i)).rect_.size.width, regions->at(group->at(i)).rect_.size.height)/max( regions->at(group->at(i)).rect_.size.width, regions->at(group->at(i)).rect_.size.height); 41 | compactnesses.at(i,0) = sqrt(regions->at(group->at(i)).area_)/regions->at(group->at(i)).perimeter_; 42 | nums_holes.at(i,0) = (float)regions->at(group->at(i)).num_holes_; 43 | holeareas_area.at(i,0) = (float)regions->at(group->at(i)).holes_area_/regions->at(group->at(i)).area_; 44 | } 45 | 46 | vector sample; 47 | sample.push_back(0); 48 | 49 | Scalar mean,std; 50 | meanStdDev( votes, mean, std ); 51 | sample.push_back( mean[0]); 52 | sample.push_back( std[0]); 53 | sample.push_back( std[0]/mean[0] ); 54 | meanStdDev( strokes, mean, std ); 55 | sample.push_back( mean[0]); 56 | sample.push_back( std[0]); 57 | sample.push_back( std[0]/mean[0] ); 58 | meanStdDev( aspect_ratios, mean, std ); 59 | sample.push_back( mean[0]); 60 | sample.push_back( std[0]); 61 | sample.push_back( std[0]/mean[0] ); 62 | meanStdDev( compactnesses, mean, std ); 63 | sample.push_back( mean[0]); 64 | sample.push_back( std[0]); 65 | sample.push_back( std[0]/mean[0] ); 66 | meanStdDev( nums_holes, mean, std ); 67 | sample.push_back( mean[0]); 68 | sample.push_back( std[0]); 69 | sample.push_back( std[0]/mean[0] ); 70 | meanStdDev( holeareas_area, mean, std ); 71 | sample.push_back(mean[0]); 72 | sample.push_back( std[0]); 73 | sample.push_back( std[0]/mean[0] ); 74 | 75 | 76 | float votes_group = boost_.predict( Mat(sample), Mat(), Range::all(), false, true ); 77 | 78 | return (double)1-(double)1/(1+exp(-2*votes_group)); 79 | } 80 | -------------------------------------------------------------------------------- /group_classifier.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef GROUP_CLASSIFIER_H 3 | #define GROUP_CLASSIFIER_H 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "region.h" 13 | #include "region_classifier.h" 14 | 15 | using namespace cv; 16 | using namespace std; 17 | 18 | class GroupClassifier 19 | { 20 | public: 21 | 22 | /// Constructor. 23 | /// @param[in] trained_boost_filename 24 | /// @param[in] character_classifier a pointer to CvBoost for character classification 25 | GroupClassifier(char *trained_boost_filename, RegionClassifier *character_classifier); 26 | 27 | /// Classify a region. Returns true iif a group of regions is classified as a text group 28 | /// @param[in] regions A pointer to the group of regions indexes to be classified. 29 | /// @param[in] regions A pointer to the whole regions vector. 30 | double operator()(vector *group, vector *regions); 31 | 32 | 33 | private: 34 | 35 | // Boosted tree classifier 36 | CvBoost boost_; 37 | 38 | // Boosted tree classifier for single characters 39 | RegionClassifier *character_classifier_; 40 | 41 | // Classification parameter 42 | float decision_threshold_; 43 | }; 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #define _MAIN 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "region.h" 10 | #include "mser.h" 11 | #include "max_meaningful_clustering.h" 12 | #include "region_classifier.h" 13 | #include "group_classifier.h" 14 | 15 | #define NUM_FEATURES 11 16 | 17 | #define DECISION_THRESHOLD_EA 0.5 18 | #define DECISION_THRESHOLD_SF 0.999999999 19 | 20 | using namespace std; 21 | using namespace cv; 22 | 23 | #include "utils.h" 24 | 25 | int main( int argc, char** argv ) 26 | { 27 | 28 | 29 | Mat img, grey, lab_img, gradient_magnitude, segmentation, all_segmentations; 30 | 31 | vector regions; 32 | ::MSER mser8(false,25,0.000008,0.03,1,0.7); 33 | 34 | RegionClassifier region_boost("boost_train/trained_boost_char.xml", 0); 35 | GroupClassifier group_boost("boost_train/trained_boost_groups.xml", ®ion_boost); 36 | 37 | img = imread(argv[1]); 38 | cvtColor(img, grey, CV_BGR2GRAY); 39 | cvtColor(img, lab_img, CV_BGR2Lab); 40 | gradient_magnitude = Mat_(img.size()); 41 | get_gradient_magnitude( grey, gradient_magnitude); 42 | 43 | segmentation = Mat::zeros(img.size(),CV_8UC3); 44 | all_segmentations = Mat::zeros(240,320*11,CV_8UC3); 45 | 46 | for (int step =1; step<3; step++) 47 | { 48 | 49 | 50 | if (step == 2) 51 | grey = 255-grey; 52 | 53 | //double t_tot = (double)cvGetTickCount(); 54 | 55 | //double t = (double)cvGetTickCount(); 56 | mser8((uchar*)grey.data, grey.cols, grey.rows, regions); 57 | 58 | 59 | //t = cvGetTickCount() - t; 60 | //cout << "Detected " << regions.size() << " regions" << " in " << t/((double)cvGetTickFrequency()*1000.) << " ms." << endl; 61 | //t = (double)cvGetTickCount(); 62 | 63 | for (int i=0; i=0; i--) 73 | { 74 | regions[i].extract_features(lab_img, grey, gradient_magnitude); 75 | if ( (regions.at(i).stroke_std_/regions.at(i).stroke_mean_ > 0.8) || (regions.at(i).num_holes_>2) || (regions.at(i).bbox_.width <=3) || (regions.at(i).bbox_.height <=3) ) 76 | regions.erase(regions.begin()+i); 77 | else 78 | max_stroke = max(max_stroke, regions[i].stroke_mean_); 79 | } 80 | 81 | //t = cvGetTickCount() - t; 82 | //cout << "Features extracted in " << t/((double)cvGetTickFrequency()*1000.) << " ms." << endl; 83 | //t = (double)cvGetTickCount(); 84 | 85 | MaxMeaningfulClustering mm_clustering(METHOD_METR_SINGLE, METRIC_SEUCLIDEAN); 86 | 87 | vector< vector > meaningful_clusters; 88 | vector< vector > final_clusters; 89 | Mat co_occurrence_matrix = Mat::zeros((int)regions.size(), (int)regions.size(), CV_64F); 90 | 91 | int dims[NUM_FEATURES] = {3,3,3,3,3,3,3,3,3,5,5}; 92 | 93 | for (int f=0; f= DECISION_THRESHOLD_SF) ) 155 | { 156 | final_clusters.push_back(meaningful_clusters.at(k)); 157 | } 158 | } 159 | 160 | Mat tmp_segmentation = Mat::zeros(img.size(),CV_8UC3); 161 | Mat tmp_all_segmentations = Mat::zeros(240,320*11,CV_8UC3); 162 | drawClusters(tmp_segmentation, ®ions, &meaningful_clusters); 163 | Mat tmp = Mat::zeros(240,320,CV_8UC3); 164 | resize(tmp_segmentation,tmp,tmp.size()); 165 | tmp.copyTo(tmp_all_segmentations(Rect(320*f,0,320,240))); 166 | all_segmentations = all_segmentations + tmp_all_segmentations; 167 | 168 | free(data); 169 | meaningful_clusters.clear(); 170 | } 171 | //t = cvGetTickCount() - t; 172 | //cout << "Clusterings (" << NUM_FEATURES << ") done in " << t/((double)cvGetTickFrequency()*1000.) << " ms." << endl; 173 | //t = (double)cvGetTickCount(); 174 | 175 | /**/ 176 | double minVal; 177 | double maxVal; 178 | minMaxLoc(co_occurrence_matrix, &minVal, &maxVal); 179 | 180 | maxVal = NUM_FEATURES - 1; //TODO this is true only if you are using "grow == 1" in accumulate_evidence function 181 | minVal=0; 182 | 183 | co_occurrence_matrix = maxVal - co_occurrence_matrix; 184 | co_occurrence_matrix = co_occurrence_matrix / maxVal; 185 | 186 | //we want a sparse matrix 187 | 188 | t_float *D = (t_float*)malloc((regions.size()*regions.size()) * sizeof(t_float)); 189 | int pos = 0; 190 | for (int i = 0; i(i, j); 195 | pos++; 196 | } 197 | } 198 | 199 | // fast clustering from the co-occurrence matrix 200 | mm_clustering(D, regions.size(), METHOD_METR_AVERAGE, &meaningful_clusters); // TODO try with METHOD_METR_COMPLETE 201 | free(D); 202 | 203 | //t = cvGetTickCount() - t; 204 | //cout << "Evidence Accumulation Clustering done in " << t/((double)cvGetTickFrequency()*1000.) << " ms. Got " << meaningful_clusters.size() << " clusters." << endl; 205 | //t = (double)cvGetTickCount(); 206 | 207 | 208 | for (int i=meaningful_clusters.size()-1; i>=0; i--) 209 | { 210 | //if ( (! group_boost(&meaningful_clusters.at(i), ®ions)) || (meaningful_clusters.at(i).size()<3) ) 211 | if ( (group_boost(&meaningful_clusters.at(i), ®ions) >= DECISION_THRESHOLD_EA) ) 212 | { 213 | final_clusters.push_back(meaningful_clusters.at(i)); 214 | } 215 | } 216 | 217 | drawClusters(segmentation, ®ions, &final_clusters); 218 | 219 | if (step == 2) 220 | { 221 | cvtColor(segmentation, grey, CV_BGR2GRAY); 222 | threshold(grey,grey,1,255,CV_THRESH_BINARY); 223 | imwrite("out.png", grey); 224 | 225 | if (argc > 2) 226 | { 227 | Mat gt; 228 | gt = imread(argv[2]); 229 | cvtColor(gt, gt, CV_RGB2GRAY); 230 | threshold(gt, gt, 1, 255, CV_THRESH_BINARY_INV); // <- for KAIST gt 231 | //threshold(gt, gt, 254, 255, CV_THRESH_BINARY); // <- for ICDAR gt 232 | Mat tmp_mask = (255-gt) & (grey); 233 | cout << "Pixel level recall = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl; 234 | cout << "Pixel level precission = " << (float)countNonZero(tmp_mask) / countNonZero(grey) << endl; 235 | } 236 | else 237 | { 238 | imshow("Original", img); 239 | imshow("Text extraction", segmentation); 240 | waitKey(0); 241 | } 242 | 243 | } 244 | 245 | 246 | regions.clear(); 247 | //t_tot = cvGetTickCount() - t_tot; 248 | //cout << " Total processing for one frame " << t_tot/((double)cvGetTickFrequency()*1000.) << " ms." << endl; 249 | 250 | } 251 | 252 | } 253 | -------------------------------------------------------------------------------- /max_meaningful_clustering.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "max_meaningful_clustering.h" 3 | 4 | MaxMeaningfulClustering::MaxMeaningfulClustering(unsigned char method, unsigned char metric) 5 | { 6 | 7 | } 8 | 9 | void MaxMeaningfulClustering::operator()(t_float *data, unsigned int num, int dim, unsigned char method, unsigned char metric, vector< vector > *meaningful_clusters) 10 | { 11 | 12 | t_float *Z = (t_float*)malloc(((num-1)*4) * sizeof(t_float)); // we need 4 floats foreach sample merge. 13 | linkage_vector(data, (int)num, dim, Z, method, metric); 14 | 15 | vector merge_info; 16 | build_merge_info(Z, data, (int)num, dim, false, &merge_info, meaningful_clusters); 17 | 18 | free(Z); 19 | merge_info.clear(); 20 | } 21 | 22 | void MaxMeaningfulClustering::operator()(t_float *data, unsigned int num, unsigned char method, vector< vector > *meaningful_clusters) 23 | { 24 | 25 | t_float *Z = (t_float*)malloc(((num-1)*4) * sizeof(t_float)); // we need 4 floats foreach sample merge. 26 | linkage(data, (int)num, Z, method); //TODO think if complete linkage is the correct 27 | 28 | vector merge_info; 29 | build_merge_info(Z, (int)num, &merge_info, meaningful_clusters); 30 | 31 | free(Z); 32 | merge_info.clear(); 33 | } 34 | 35 | void MaxMeaningfulClustering::build_merge_info(t_float *Z, t_float *X, int N, int dim, bool use_full_merge_rule, vector *merge_info, vector< vector > *meaningful_clusters) 36 | { 37 | 38 | // walk the whole dendogram 39 | for (int i=0; i<(N-1)*4; i=i+4) 40 | { 41 | HCluster cluster; 42 | cluster.num_elem = Z[i+3]; //number of elements 43 | 44 | int node1 = Z[i]; 45 | int node2 = Z[i+1]; 46 | float dist = Z[i+2]; 47 | 48 | if (node1 point; 51 | for (int n=0; nat(node1-N).points.size(); i++) 59 | { 60 | cluster.points.push_back(merge_info->at(node1-N).points[i]); 61 | cluster.elements.push_back(merge_info->at(node1-N).elements[i]); 62 | } 63 | //update the extended volume of node1 using the dist where this cluster merge with another 64 | merge_info->at(node1-N).dist_ext = dist; 65 | } 66 | if (node2 point; 69 | for (int n=0; nat(node2-N).points.size(); i++) 77 | { 78 | cluster.points.push_back(merge_info->at(node2-N).points[i]); 79 | cluster.elements.push_back(merge_info->at(node2-N).elements[i]); 80 | } 81 | 82 | //update the extended volume of node2 using the dist where this cluster merge with another 83 | merge_info->at(node2-N).dist_ext = dist; 84 | } 85 | 86 | Minibox mb; 87 | for (int i=0; i= 1) 95 | cluster.volume = 0.999999; 96 | if (cluster.volume == 0) 97 | cluster.volume = 0.001; //TODO is this the minimum we can get? 98 | 99 | cluster.volume_ext=1; 100 | 101 | if (node1>=N) 102 | { 103 | merge_info->at(node1-N).volume_ext = cluster.volume; 104 | } 105 | if (node2>=N) 106 | { 107 | merge_info->at(node2-N).volume_ext = cluster.volume; 108 | } 109 | 110 | cluster.node1 = node1; 111 | cluster.node2 = node2; 112 | 113 | merge_info->push_back(cluster); 114 | 115 | } 116 | 117 | for (int i=0; isize(); i++) 118 | { 119 | 120 | merge_info->at(i).nfa = nfa(merge_info->at(i).volume, merge_info->at(i).volume_ext, merge_info->at(i).num_elem, N); 121 | int node1 = merge_info->at(i).node1; 122 | int node2 = merge_info->at(i).node2; 123 | 124 | { 125 | if ((node1at(i).max_meaningful = true; 129 | merge_info->at(i).max_in_branch.push_back(i); 130 | merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; 131 | } else { 132 | if ((node1>=N)&&(node2>=N)) 133 | { 134 | //els dos nodes son "sets" per tant hem d'avaluar el merging condition 135 | if ( ( (use_full_merge_rule) && ((merge_info->at(i).nfa < merge_info->at(node1-N).nfa + merge_info->at(node2-N).nfa) && (merge_info->at(i).nfaat(node1-N).min_nfa_in_branch,merge_info->at(node2-N).min_nfa_in_branch))) ) || ( (!use_full_merge_rule) && ((merge_info->at(i).nfaat(node1-N).min_nfa_in_branch,merge_info->at(node2-N).min_nfa_in_branch))) ) ) 136 | { 137 | merge_info->at(i).max_meaningful = true; 138 | merge_info->at(i).max_in_branch.push_back(i); 139 | merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; 140 | for (int k =0; kat(node1-N).max_in_branch.size(); k++) 141 | merge_info->at(merge_info->at(node1-N).max_in_branch.at(k)).max_meaningful = false; 142 | for (int k =0; kat(node2-N).max_in_branch.size(); k++) 143 | merge_info->at(merge_info->at(node2-N).max_in_branch.at(k)).max_meaningful = false; 144 | } else { 145 | merge_info->at(i).max_meaningful = false; 146 | merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node1-N).max_in_branch.begin(),merge_info->at(node1-N).max_in_branch.end()); 147 | merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node2-N).max_in_branch.begin(),merge_info->at(node2-N).max_in_branch.end()); 148 | if (merge_info->at(i).nfaat(node1-N).min_nfa_in_branch,merge_info->at(node2-N).min_nfa_in_branch)) 149 | merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; 150 | else 151 | merge_info->at(i).min_nfa_in_branch = min(merge_info->at(node1-N).min_nfa_in_branch,merge_info->at(node2-N).min_nfa_in_branch); 152 | } 153 | } else { 154 | 155 | //un dels nodes es un "set" i l'altre es un single sample, s'avalua el merging condition pero amb compte 156 | if (node1>=N) 157 | { 158 | if ((merge_info->at(i).nfa < merge_info->at(node1-N).nfa + 1) && (merge_info->at(i).nfaat(node1-N).min_nfa_in_branch)) 159 | { 160 | merge_info->at(i).max_meaningful = true; 161 | merge_info->at(i).max_in_branch.push_back(i); 162 | merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; 163 | for (int k =0; kat(node1-N).max_in_branch.size(); k++) 164 | merge_info->at(merge_info->at(node1-N).max_in_branch.at(k)).max_meaningful = false; 165 | } else { 166 | merge_info->at(i).max_meaningful = false; 167 | merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node1-N).max_in_branch.begin(),merge_info->at(node1-N).max_in_branch.end()); 168 | merge_info->at(i).min_nfa_in_branch = min(merge_info->at(i).nfa,merge_info->at(node1-N).min_nfa_in_branch); 169 | } 170 | } else { 171 | if ((merge_info->at(i).nfa < merge_info->at(node2-N).nfa + 1) && (merge_info->at(i).nfaat(node2-N).min_nfa_in_branch)) 172 | { 173 | merge_info->at(i).max_meaningful = true; 174 | merge_info->at(i).max_in_branch.push_back(i); 175 | merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; 176 | for (int k =0; kat(node2-N).max_in_branch.size(); k++) 177 | merge_info->at(merge_info->at(node2-N).max_in_branch.at(k)).max_meaningful = false; 178 | } else { 179 | merge_info->at(i).max_meaningful = false; 180 | merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node2-N).max_in_branch.begin(),merge_info->at(node2-N).max_in_branch.end()); 181 | merge_info->at(i).min_nfa_in_branch = min(merge_info->at(i).nfa,merge_info->at(node2-N).min_nfa_in_branch); 182 | } 183 | } 184 | } 185 | } 186 | 187 | 188 | } 189 | 190 | } 191 | 192 | for (int i=0; isize(); i++) 193 | { 194 | if (merge_info->at(i).max_meaningful) 195 | { 196 | vector cluster; 197 | for (int k=0; kat(i).elements.size();k++) 198 | cluster.push_back(merge_info->at(i).elements.at(k)); 199 | meaningful_clusters->push_back(cluster); 200 | } 201 | } 202 | 203 | } 204 | 205 | void MaxMeaningfulClustering::build_merge_info(t_float *Z, int N, vector *merge_info, vector< vector > *meaningful_clusters) 206 | { 207 | 208 | // walk the whole dendogram 209 | for (int i=0; i<(N-1)*4; i=i+4) 210 | { 211 | HCluster cluster; 212 | cluster.num_elem = Z[i+3]; //number of elements 213 | 214 | int node1 = Z[i]; 215 | int node2 = Z[i+1]; 216 | float dist = Z[i+2]; 217 | if (dist != dist) //this is to avoid NaN values 218 | dist=0; 219 | 220 | //fprintf(stderr," merging %d %d\n",node1,node2); 221 | 222 | if (node1at(node1-N).elements.size(); i++) 229 | { 230 | cluster.elements.push_back(merge_info->at(node1-N).elements[i]); 231 | } 232 | } 233 | if (node2at(node2-N).elements.size(); i++) 240 | { 241 | cluster.elements.push_back(merge_info->at(node2-N).elements[i]); 242 | } 243 | } 244 | 245 | cluster.dist = dist; 246 | if (cluster.dist >= 1) 247 | cluster.dist = 0.999999; 248 | if (cluster.dist == 0) 249 | cluster.dist = 1.e-25; //TODO is this the minimum we can get? 250 | 251 | cluster.dist_ext = 1; 252 | 253 | if (node1>=N) 254 | { 255 | merge_info->at(node1-N).dist_ext = cluster.dist; 256 | } 257 | if (node2>=N) 258 | { 259 | merge_info->at(node2-N).dist_ext = cluster.dist; 260 | } 261 | 262 | cluster.node1 = node1; 263 | cluster.node2 = node2; 264 | 265 | 266 | merge_info->push_back(cluster); 267 | 268 | } 269 | 270 | //print all merge info 271 | //cout << "---------------------------------------------------------" << endl; 272 | //cout << "-- MERGE INFO ---- Evidence Accumulation " << endl; 273 | //cout << "---------------------------------------------------------" << endl; 274 | 275 | for (int i=0; isize(); i++) 276 | { 277 | 278 | merge_info->at(i).nfa = nfa(merge_info->at(i).dist, merge_info->at(i).dist_ext, merge_info->at(i).num_elem, N); 279 | int node1 = merge_info->at(i).node1; 280 | int node2 = merge_info->at(i).node2; 281 | 282 | { 283 | 284 | if ((node1at(i).max_meaningful = true; 288 | merge_info->at(i).max_in_branch.push_back(i); 289 | merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; 290 | //fprintf(stderr,"%d = (%d,%d) els dos nodes son single samples (nfa=1) per tant aquest merge_info->at(i) es maxim min_nfa_in_branch = %d \n",i,node1-N,node2-N,merge_info->at(i).min_nfa_in_branch); 291 | } else { 292 | if ((node1>=N)&&(node2>=N)) 293 | { 294 | //els dos nodes son "sets" per tant hem d'avaluar el merging condition 295 | if ((merge_info->at(i).nfa < merge_info->at(node1-N).nfa + merge_info->at(node2-N).nfa) && (merge_info->at(i).nfaat(node1-N).min_nfa_in_branch,merge_info->at(node2-N).min_nfa_in_branch))) 296 | { 297 | //fprintf(stderr,"%d = (%d,%d) MAX because merging condition 1 (%d < %d + %d ) && (%dat(i).nfa,merge_info->at(node1-N).nfa, merge_info->at(node2-N).nfa, merge_info->at(i).nfa, merge_info->at(node1-N).nfa,merge_info->at(node2-N).nfa); 298 | merge_info->at(i).max_meaningful = true; 299 | merge_info->at(i).max_in_branch.push_back(i); 300 | merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; 301 | for (int k =0; kat(node1-N).max_in_branch.size(); k++) 302 | merge_info->at(merge_info->at(node1-N).max_in_branch.at(k)).max_meaningful = false; 303 | for (int k =0; kat(node2-N).max_in_branch.size(); k++) 304 | merge_info->at(merge_info->at(node2-N).max_in_branch.at(k)).max_meaningful = false; 305 | //fprintf(stderr," min_nfa_in_branch = %d \n",merge_info->at(i).min_nfa_in_branch); 306 | } else { 307 | merge_info->at(i).max_meaningful = false; 308 | merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node1-N).max_in_branch.begin(),merge_info->at(node1-N).max_in_branch.end()); 309 | merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node2-N).max_in_branch.begin(),merge_info->at(node2-N).max_in_branch.end()); 310 | if (merge_info->at(i).nfaat(node1-N).min_nfa_in_branch,merge_info->at(node2-N).min_nfa_in_branch)) 311 | merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; 312 | else 313 | merge_info->at(i).min_nfa_in_branch = min(merge_info->at(node1-N).min_nfa_in_branch,merge_info->at(node2-N).min_nfa_in_branch); 314 | //fprintf(stderr,"%d = (%d,%d) NONmax min_nfa_in_branch = %d \n",i,node1-N,node2-N,merge_info->at(i).min_nfa_in_branch); 315 | } 316 | } else { 317 | 318 | //un dels nodes es un "set" i l'altre es un single sample, s'avalua el merging condition pero amb compte 319 | if (node1>=N) 320 | { 321 | if ((merge_info->at(i).nfa < merge_info->at(node1-N).nfa + 1) && (merge_info->at(i).nfaat(node1-N).min_nfa_in_branch)) 322 | { 323 | //fprintf(stderr,"%d = (%d,%d) MAX because merging condition 2 (%d < %d + 1 ) && (%d<%d) \n",i,node1-N,node2-N,merge_info->at(i).nfa,merge_info->at(node1-N).nfa, merge_info->at(i).nfa, merge_info->at(node1-N).min_nfa_in_branch); 324 | merge_info->at(i).max_meaningful = true; 325 | merge_info->at(i).max_in_branch.push_back(i); 326 | merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; 327 | for (int k =0; kat(node1-N).max_in_branch.size(); k++) 328 | merge_info->at(merge_info->at(node1-N).max_in_branch.at(k)).max_meaningful = false; 329 | } else { 330 | merge_info->at(i).max_meaningful = false; 331 | merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node1-N).max_in_branch.begin(),merge_info->at(node1-N).max_in_branch.end()); 332 | merge_info->at(i).min_nfa_in_branch = min(merge_info->at(i).nfa,merge_info->at(node1-N).min_nfa_in_branch); 333 | //fprintf(stderr,"%d = (%d,%d) NONmax2 min_nfa_in_branch = %d \n",i,node1-N,node2-N,merge_info->at(i).min_nfa_in_branch); 334 | } 335 | } else { 336 | if ((merge_info->at(i).nfa < merge_info->at(node2-N).nfa + 1) && (merge_info->at(i).nfaat(node2-N).min_nfa_in_branch)) 337 | { 338 | //fprintf(stderr,"%d = (%d,%d) MAX because merging condition 3 (%d < %d + 1 ) && (%d<%d) \n ",i,node1-N,node2-N,merge_info->at(i).nfa,merge_info->at(node2-N).nfa, merge_info->at(i).nfa, merge_info->at(node2-N).min_nfa_in_branch); 339 | merge_info->at(i).max_meaningful = true; 340 | merge_info->at(i).max_in_branch.push_back(i); 341 | merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; 342 | for (int k =0; kat(node2-N).max_in_branch.size(); k++) 343 | merge_info->at(merge_info->at(node2-N).max_in_branch.at(k)).max_meaningful = false; 344 | } else { 345 | merge_info->at(i).max_meaningful = false; 346 | merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(),merge_info->at(node2-N).max_in_branch.begin(),merge_info->at(node2-N).max_in_branch.end()); 347 | merge_info->at(i).min_nfa_in_branch = min(merge_info->at(i).nfa,merge_info->at(node2-N).min_nfa_in_branch); 348 | //fprintf(stderr,"%d = (%d,%d) NONmax3 min_nfa_in_branch = %d \n",i,node1-N,node2-N,merge_info->at(i).min_nfa_in_branch); 349 | } 350 | } 351 | } 352 | } 353 | 354 | 355 | } 356 | } 357 | 358 | for (int i=0; isize(); i++) 359 | { 360 | if (merge_info->at(i).max_meaningful) 361 | { 362 | vector cluster; 363 | for (int k=0; kat(i).elements.size();k++) 364 | cluster.push_back(merge_info->at(i).elements.at(k)); 365 | meaningful_clusters->push_back(cluster); 366 | } 367 | } 368 | 369 | } 370 | 371 | int MaxMeaningfulClustering::nfa(float sigma, float sigma2, int k, int N) 372 | { 373 | return -1*(int)NFA( N, k, (double) sigma, 0); //this uses an approximation for the nfa calculations (faster) 374 | } 375 | -------------------------------------------------------------------------------- /max_meaningful_clustering.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef MAX_MEANINGFUL_CLUSTERING_H 3 | #define MAX_MEANINGFUL_CLUSTERING_H 4 | 5 | #include 6 | 7 | #include "fast_clustering.cpp" 8 | #include "nfa.cpp" 9 | #include "min_bounding_box.h" 10 | 11 | using namespace std; 12 | 13 | typedef struct { 14 | int num_elem; // number of elements 15 | vector elements; // elements (contour ID) 16 | int nfa; // the number of false alarms for this merge (we are using only the nfa exponent so this is an int) 17 | float dist; // distance of the merge 18 | float dist_ext; // distamce where this merge will merge with another 19 | long double volume; // volume of the bounding sphere (or bounding box) 20 | long double volume_ext; // volume of the sphere(or box) + envolvent empty space 21 | vector > points; // nD points in this cluster 22 | bool max_meaningful; //is this merge max meaningul ? 23 | vector max_in_branch; //otherwise which merges are the max_meaningful in this branch 24 | int min_nfa_in_branch;//here we store the min nfa detected within the chilhood of this merge and this one (we are using only the nfa exponent) 25 | int node1; 26 | int node2; 27 | } HCluster; 28 | 29 | class MaxMeaningfulClustering 30 | { 31 | public: 32 | 33 | /// Constructor. 34 | MaxMeaningfulClustering(unsigned char method, unsigned char metric); 35 | 36 | /// Does hierarchical clustering and detects the Max Meaningful Clusters 37 | /// @param[in] data The data feature vectors to be analyzed. 38 | /// @param[in] Num Number of data samples. 39 | /// @param[in] dim Dimension of the feature vectors. 40 | /// @param[in] method Clustering method. 41 | /// @param[in] metric Similarity metric for clustering. 42 | /// @param[out] meaningful_clusters Detected Max Meaningful Clusters. 43 | void operator()(t_float *data, unsigned int num, int dim, unsigned char method, unsigned char metric, vector< vector > *meaningful_clusters); 44 | void operator()(t_float *data, unsigned int num, unsigned char method, vector< vector > *meaningful_clusters); 45 | 46 | private: 47 | /// Helper function 48 | void build_merge_info(t_float *dendogram, t_float *data, int num, int dim, bool use_full_merge_rule, vector *merge_info, vector< vector > *meaningful_clusters); 49 | void build_merge_info(t_float *dendogram, int num, vector *merge_info, vector< vector > *meaningful_clusters); 50 | 51 | /// Number of False Alarms 52 | int nfa(float sigma, float sigma2, int k, int N); 53 | 54 | }; 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /min_bounding_box.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "min_bounding_box.h" 3 | 4 | Minibox::Minibox() 5 | { 6 | initialized = false; 7 | } 8 | 9 | void Minibox::check_in (vector *p) 10 | { 11 | if(!initialized) for (int i=0; isize(); i++) 12 | { 13 | edge_begin.push_back(p->at(i)); 14 | edge_end.push_back(p->at(i)+0.00000000000000001); 15 | initialized = true; 16 | } 17 | else for (int i=0; isize(); i++) 18 | { 19 | edge_begin.at(i) = min(p->at(i),edge_begin.at(i)); 20 | edge_end.at(i) = max(p->at(i),edge_end.at(i)); 21 | //fprintf(stderr," edge_begin[%d] = %e\n",i,edge_begin[i]); 22 | //fprintf(stderr," edge_end[%d] = %e\n",i,edge_end[i]); 23 | } 24 | } 25 | 26 | long double Minibox::volume () 27 | { 28 | long double volume = 1; 29 | for (int i=0; i 13 | 14 | using namespace std; 15 | 16 | class Minibox { 17 | private: 18 | vector edge_begin; 19 | vector edge_end; 20 | bool initialized; 21 | 22 | public: 23 | // creates an empty box 24 | Minibox(); 25 | 26 | // copies p to the internal point set 27 | void check_in (vector *p); 28 | 29 | // returns the volume of the box 30 | long double volume(); 31 | }; 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /mser.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------------------- 2 | // Linear time Maximally Stable Extremal Regions implementation as described in D. Nistér and 3 | // H. Stewénius. Linear Time Maximally Stable Extremal Regions. Proceedings of the European 4 | // Conference on Computer Vision (ECCV), 2008. 5 | // 6 | // Copyright (c) 2011 Idiap Research Institute, http://www.idiap.ch/. 7 | // Written by Charles Dubout /. 8 | // 9 | // MSER is free software: you can redistribute it and/or modify it under the terms of the GNU 10 | // General Public License version 3 as published by the Free Software Foundation. 11 | // 12 | // MSER is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even 13 | // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 14 | // Public License for more details. 15 | // 16 | // You should have received a copy of the GNU General Public License along with MSER. If not, see 17 | // . 18 | //-------------------------------------------------------------------------------------------------- 19 | 20 | #include "mser.h" 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | using namespace std; 27 | 28 | MSER::MSER(bool eight, int delta, double minArea, double maxArea, double maxVariation, 29 | double minDiversity) : eight_(eight), delta_(delta), minArea_(minArea), 30 | maxArea_(maxArea), maxVariation_(maxVariation), minDiversity_(minDiversity), pool_(256), 31 | poolIndex_(0) 32 | { 33 | // Parameter check 34 | assert(delta > 0); 35 | assert(minArea >= 0.0); 36 | assert(maxArea <= 1.0); 37 | assert(minArea < maxArea); 38 | assert(maxVariation > 0.0); 39 | assert(minDiversity >= 0.0); 40 | } 41 | 42 | void MSER::operator()(const uint8_t * bits, int width, int height, vector & regions) 43 | { 44 | // 1. Clear the accessible pixel mask, the heap of boundary pixels and the component stack. Push 45 | // a dummy-component onto the stack, with grey-level higher than any allowed in the image. 46 | vector accessible(width * height); 47 | vector boundaryPixels[256]; 48 | int priority = 256; 49 | vector regionStack; 50 | 51 | regionStack.push_back(new (&pool_[poolIndex_++]) Region); 52 | 53 | // 2. Make the source pixel (with its first edge) the current pixel, mark it as accessible and 54 | // store the grey-level of it in the variable current level. 55 | int curPixel = 0; 56 | int curEdge = 0; 57 | int curLevel = bits[0]; 58 | accessible[0] = true; 59 | 60 | // 3. Push an empty component with current level onto the component stack. 61 | step_3: 62 | regionStack.push_back(new (&pool_[poolIndex_++]) Region(curLevel, curPixel)); 63 | 64 | if (poolIndex_ == pool_.size()) 65 | doublePool(regionStack); 66 | 67 | // 4. Explore the remaining edges to the neighbors of the current pixel, in order, as follows: 68 | // For each neighbor, check if the neighbor is already accessible. If it is not, mark it as 69 | // accessible and retrieve its grey-level. If the grey-level is not lower than the current one, 70 | // push it onto the heap of boundary pixels. If on the other hand the grey-level is lower than 71 | // the current one, enter the current pixel back into the queue of boundary pixels for later 72 | // processing (with the next edge number), consider the new pixel and its grey-level and go to 3. 73 | for (;;) { 74 | const int x = curPixel % width; 75 | const int y = curPixel / width; 76 | 77 | for (; curEdge < (eight_ ? 8 : 4); ++curEdge) { 78 | int neighborPixel = curPixel; 79 | 80 | if (eight_) { 81 | switch (curEdge) { 82 | case 0: if (x < width - 1) neighborPixel = curPixel + 1; break; 83 | case 1: if ((x < width - 1) && (y > 0)) neighborPixel = curPixel - width + 1; break; 84 | case 2: if (y > 0) neighborPixel = curPixel - width; break; 85 | case 3: if ((x > 0) && (y > 0)) neighborPixel = curPixel - width - 1; break; 86 | case 4: if (x > 0) neighborPixel = curPixel - 1; break; 87 | case 5: if ((x > 0) && (y < height - 1)) neighborPixel = curPixel + width - 1; break; 88 | case 6: if (y < height - 1) neighborPixel = curPixel + width; break; 89 | default: if ((x < width - 1) && (y < height - 1)) neighborPixel = curPixel + width + 1; break; 90 | } 91 | } 92 | else { 93 | switch (curEdge) { 94 | case 0: if (x < width - 1) neighborPixel = curPixel + 1; break; 95 | case 1: if (y < height - 1) neighborPixel = curPixel + width; break; 96 | case 2: if (x > 0) neighborPixel = curPixel - 1; break; 97 | default: if (y > 0) neighborPixel = curPixel - width; break; 98 | } 99 | } 100 | 101 | if (neighborPixel != curPixel && !accessible[neighborPixel]) { 102 | const int neighborLevel = bits[neighborPixel]; 103 | accessible[neighborPixel] = true; 104 | 105 | if (neighborLevel >= curLevel) { 106 | boundaryPixels[neighborLevel].push_back(neighborPixel << 4); 107 | 108 | if (neighborLevel < priority) 109 | priority = neighborLevel; 110 | } 111 | else { 112 | boundaryPixels[curLevel].push_back((curPixel << 4) | (curEdge + 1)); 113 | 114 | if (curLevel < priority) 115 | priority = curLevel; 116 | 117 | curPixel = neighborPixel; 118 | curEdge = 0; 119 | curLevel = neighborLevel; 120 | 121 | goto step_3; 122 | } 123 | } 124 | } 125 | 126 | // 5. Accumulate the current pixel to the component at the top of the stack (water 127 | // saturates the current pixel). 128 | regionStack.back()->accumulate(x, y); 129 | 130 | // 6. Pop the heap of boundary pixels. If the heap is empty, we are done. If the returned 131 | // pixel is at the same grey-level as the previous, go to 4. 132 | if (priority == 256) { 133 | regionStack.back()->detect(delta_, minArea_ * width * height, 134 | maxArea_ * width * height, maxVariation_, minDiversity_, 135 | regions); 136 | poolIndex_ = 0; 137 | return; 138 | } 139 | 140 | curPixel = boundaryPixels[priority].back() >> 4; 141 | curEdge = boundaryPixels[priority].back() & 15; 142 | 143 | boundaryPixels[priority].pop_back(); 144 | 145 | while (boundaryPixels[priority].empty() && (priority < 256)) 146 | ++priority; 147 | 148 | const int newPixelGreyLevel = bits[curPixel]; 149 | 150 | if (newPixelGreyLevel != curLevel) { 151 | curLevel = newPixelGreyLevel; 152 | 153 | // 7. The returned pixel is at a higher grey-level, so we must now process 154 | // all components on the component stack until we reach the higher 155 | // grey-level. This is done with the processStack sub-routine, see below. 156 | // Then go to 4. 157 | processStack(newPixelGreyLevel, curPixel, regionStack); 158 | } 159 | } 160 | } 161 | 162 | void MSER::processStack(int newPixelGreyLevel, int pixel, vector & regionStack) 163 | { 164 | // 1. Process component on the top of the stack. The next grey-level is the minimum of 165 | // newPixelGreyLevel and the grey-level for the second component on the stack. 166 | do { 167 | Region * top = regionStack.back(); 168 | 169 | regionStack.pop_back(); 170 | 171 | // 2. If newPixelGreyLevel is smaller than the grey-level on the second component on the 172 | // stack, set the top of stack grey-level to newPixelGreyLevel and return from sub-routine 173 | // (This occurs when the new pixel is at a grey-level for which there is not yet a component 174 | // instantiated, so we let the top of stack be that level by just changing its grey-level. 175 | if (newPixelGreyLevel < regionStack.back()->level_) { 176 | regionStack.push_back(new (&pool_[poolIndex_++]) Region(newPixelGreyLevel, pixel)); 177 | 178 | if (poolIndex_ == pool_.size()) 179 | top = reinterpret_cast(reinterpret_cast(top) + 180 | doublePool(regionStack)); 181 | 182 | regionStack.back()->merge(top); 183 | 184 | return; 185 | } 186 | 187 | // 3. Remove the top of stack and merge it into the second component on stack as follows: 188 | // Add the first and second moment accumulators together and/or join the pixel lists. 189 | // Either merge the histories of the components, or take the history from the winner. Note 190 | // here that the top of stack should be considered one ’time-step’ back, so its current 191 | // size is part of the history. Therefore the top of stack would be the winner if its 192 | // current size is larger than the previous size of second on stack. 193 | regionStack.back()->merge(top); 194 | } 195 | // 4. If(newPixelGreyLevel>top of stack grey-level) go to 1. 196 | while (newPixelGreyLevel > regionStack.back()->level_); 197 | } 198 | 199 | ptrdiff_t MSER::doublePool(vector & regionStack) 200 | { 201 | assert(!pool_.empty()); // Cannot double the size of an empty pool 202 | 203 | vector newPool(pool_.size() * 2); 204 | copy(pool_.begin(), pool_.end(), newPool.begin()); 205 | 206 | // Cast to char in case the two pointers do not share the same alignment 207 | const ptrdiff_t offset = reinterpret_cast(&newPool[0]) - 208 | reinterpret_cast(&pool_[0]); 209 | 210 | for (size_t i = 0; i < pool_.size(); ++i) { 211 | if (newPool[i].parent_) 212 | newPool[i].parent_ = 213 | reinterpret_cast(reinterpret_cast(newPool[i].parent_) + offset); 214 | 215 | if (newPool[i].child_) 216 | newPool[i].child_ = 217 | reinterpret_cast(reinterpret_cast(newPool[i].child_) + offset); 218 | 219 | if (newPool[i].next_) 220 | newPool[i].next_ = 221 | reinterpret_cast(reinterpret_cast(newPool[i].next_) + offset); 222 | } 223 | 224 | for (size_t i = 0; i < regionStack.size(); ++i) 225 | regionStack[i] = 226 | reinterpret_cast(reinterpret_cast(regionStack[i]) + offset); 227 | 228 | pool_.swap(newPool); 229 | 230 | return offset; 231 | } 232 | -------------------------------------------------------------------------------- /mser.h: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------------------- 2 | // Linear time Maximally Stable Extremal Regions implementation as described in D. Nistér and 3 | // H. Stewénius. Linear Time Maximally Stable Extremal Regions. Proceedings of the European 4 | // Conference on Computer Vision (ECCV), 2008. 5 | // 6 | // Copyright (c) 2011 Idiap Research Institute, http://www.idiap.ch/. 7 | // Written by Charles Dubout /. 8 | // 9 | // MSER is free software: you can redistribute it and/or modify it under the terms of the GNU 10 | // General Public License version 3 as published by the Free Software Foundation. 11 | // 12 | // MSER is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even 13 | // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 14 | // Public License for more details. 15 | // 16 | // You should have received a copy of the GNU General Public License along with MSER. If not, see 17 | // . 18 | //-------------------------------------------------------------------------------------------------- 19 | 20 | #ifndef MSER_H 21 | #define MSER_H 22 | 23 | #include 24 | #include 25 | 26 | #include "region.h" 27 | 28 | /// The MSER class extracts maximally stable extremal regions from a grayscale (8 bits) image. 29 | /// @note The MSER class is not reentrant, so if you want to extract regions in parallel, each 30 | /// thread needs to have its own MSER class instance. 31 | class MSER 32 | { 33 | public: 34 | 35 | /// Constructor. 36 | /// @param[in] eight Use 8-connected pixels instead of 4-connected. 37 | /// @param[in] delta DELTA parameter of the MSER algorithm. Roughly speaking, the stability of a 38 | /// region is the relative variation of the region area when the intensity is changed by delta. 39 | /// @param[in] minArea Minimum area of any stable region relative to the image domain area. 40 | /// @param[in] maxArea Maximum area of any stable region relative to the image domain area. 41 | /// @param[in] maxVariation Maximum variation (absolute stability score) of the regions. 42 | /// @param[in] minDiversity Minimum diversity of the regions. When the relative area of two 43 | /// nested regions is below this threshold, then only the most stable one is selected. 44 | MSER(bool eight = false, int delta = 2, double minArea = 0.0001, double maxArea = 0.5, 45 | double maxVariation = 0.5, double minDiversity = 0.33); 46 | 47 | /// Extracts maximally stable extremal regions from a grayscale (8 bits) image. 48 | /// @param[in] bits Pointer to the first scanline of the image. 49 | /// @param[in] width Width of the image. 50 | /// @param[in] height Height of the image. 51 | /// @param[out] regions Detected MSER. 52 | void operator()(const uint8_t * bits, int width, int height, std::vector & regions); 53 | 54 | private: 55 | // Helper method 56 | void processStack(int newPixelGreyLevel, int pixel, std::vector & regionStack); 57 | 58 | // Double the size of the memory pool 59 | std::ptrdiff_t doublePool(std::vector & regionStack); 60 | 61 | // Parameters 62 | bool eight_; 63 | int delta_; 64 | double minArea_; 65 | double maxArea_; 66 | double maxVariation_; 67 | double minDiversity_; 68 | 69 | // Memory pool of regions for faster allocation 70 | std::vector pool_; 71 | std::size_t poolIndex_; 72 | }; 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /nfa.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /*----------------------------------------------------------------------------*/ 5 | /** Doubles relative error factor 6 | */ 7 | #define RELATIVE_ERROR_FACTOR 100.0 8 | 9 | 10 | /*----------------------------------------------------------------------------*/ 11 | /** Compare doubles by relative error. 12 | 13 | The resulting rounding error after floating point computations 14 | depend on the specific operations done. The same number computed by 15 | different algorithms could present different rounding errors. For a 16 | useful comparison, an estimation of the relative rounding error 17 | should be considered and compared to a factor times EPS. The factor 18 | should be related to the cumulated rounding error in the chain of 19 | computation. Here, as a simplification, a fixed factor is used. 20 | */ 21 | static int double_equal(double a, double b) 22 | { 23 | double abs_diff,aa,bb,abs_max; 24 | 25 | /* trivial case */ 26 | if( a == b ) return true; 27 | 28 | abs_diff = fabs(a-b); 29 | aa = fabs(a); 30 | bb = fabs(b); 31 | abs_max = aa > bb ? aa : bb; 32 | 33 | /* DBL_MIN is the smallest normalized number, thus, the smallest 34 | number whose relative error is bounded by DBL_EPSILON. For 35 | smaller numbers, the same quantization steps as for DBL_MIN 36 | are used. Then, for smaller numbers, a meaningful "relative" 37 | error should be computed by dividing the difference by DBL_MIN. */ 38 | if( abs_max < DBL_MIN ) abs_max = DBL_MIN; 39 | 40 | /* equal if relative error <= factor x eps */ 41 | return (abs_diff / abs_max) <= (RELATIVE_ERROR_FACTOR * DBL_EPSILON); 42 | } 43 | 44 | 45 | /*----------------------------------------------------------------------------*/ 46 | /*----------------------------- NFA computation ------------------------------*/ 47 | /*----------------------------------------------------------------------------*/ 48 | 49 | /*----------------------------------------------------------------------------*/ 50 | /** Computes the natural logarithm of the absolute value of 51 | the gamma function of x using the Lanczos approximation. 52 | See http://www.rskey.org/gamma.htm 53 | 54 | The formula used is 55 | @f[ 56 | \Gamma(x) = \frac{ \sum_{n=0}^{N} q_n x^n }{ \Pi_{n=0}^{N} (x+n) } 57 | (x+5.5)^{x+0.5} e^{-(x+5.5)} 58 | @f] 59 | so 60 | @f[ 61 | \log\Gamma(x) = \log\left( \sum_{n=0}^{N} q_n x^n \right) 62 | + (x+0.5) \log(x+5.5) - (x+5.5) - \sum_{n=0}^{N} \log(x+n) 63 | @f] 64 | and 65 | q0 = 75122.6331530, 66 | q1 = 80916.6278952, 67 | q2 = 36308.2951477, 68 | q3 = 8687.24529705, 69 | q4 = 1168.92649479, 70 | q5 = 83.8676043424, 71 | q6 = 2.50662827511. 72 | */ 73 | static double log_gamma_lanczos(double x) 74 | { 75 | static double q[7] = { 75122.6331530, 80916.6278952, 36308.2951477, 76 | 8687.24529705, 1168.92649479, 83.8676043424, 77 | 2.50662827511 }; 78 | double a = (x+0.5) * log(x+5.5) - (x+5.5); 79 | double b = 0.0; 80 | int n; 81 | 82 | for(n=0;n<7;n++) 83 | { 84 | a -= log( x + (double) n ); 85 | b += q[n] * pow( x, (double) n ); 86 | } 87 | return a + log(b); 88 | } 89 | 90 | /*----------------------------------------------------------------------------*/ 91 | /** Computes the natural logarithm of the absolute value of 92 | the gamma function of x using Windschitl method. 93 | See http://www.rskey.org/gamma.htm 94 | 95 | The formula used is 96 | @f[ 97 | \Gamma(x) = \sqrt{\frac{2\pi}{x}} \left( \frac{x}{e} 98 | \sqrt{ x\sinh(1/x) + \frac{1}{810x^6} } \right)^x 99 | @f] 100 | so 101 | @f[ 102 | \log\Gamma(x) = 0.5\log(2\pi) + (x-0.5)\log(x) - x 103 | + 0.5x\log\left( x\sinh(1/x) + \frac{1}{810x^6} \right). 104 | @f] 105 | This formula is a good approximation when x > 15. 106 | */ 107 | static double log_gamma_windschitl(double x) 108 | { 109 | return 0.918938533204673 + (x-0.5)*log(x) - x 110 | + 0.5*x*log( x*sinh(1/x) + 1/(810.0*pow(x,6.0)) ); 111 | } 112 | 113 | /*----------------------------------------------------------------------------*/ 114 | /** Computes the natural logarithm of the absolute value of 115 | the gamma function of x. When x>15 use log_gamma_windschitl(), 116 | otherwise use log_gamma_lanczos(). 117 | */ 118 | #define log_gamma(x) ((x)>15.0?log_gamma_windschitl(x):log_gamma_lanczos(x)) 119 | 120 | 121 | /*----------------------------------------------------------------------------*/ 122 | /** Size of the table to store already computed inverse values. 123 | */ 124 | #define TABSIZE 100000 125 | 126 | /*----------------------------------------------------------------------------*/ 127 | /** Computes -log10(NFA). 128 | 129 | NFA stands for Number of False Alarms: 130 | @f[ 131 | \mathrm{NFA} = NT \cdot B(n,k,p) 132 | @f] 133 | 134 | - NT - number of tests 135 | - B(n,k,p) - tail of binomial distribution with parameters n,k and p: 136 | @f[ 137 | B(n,k,p) = \sum_{j=k}^n 138 | \left(\begin{array}{c}n\\j\end{array}\right) 139 | p^{j} (1-p)^{n-j} 140 | @f] 141 | 142 | The value -log10(NFA) is equivalent but more intuitive than NFA: 143 | - -1 corresponds to 10 mean false alarms 144 | - 0 corresponds to 1 mean false alarm 145 | - 1 corresponds to 0.1 mean false alarms 146 | - 2 corresponds to 0.01 mean false alarms 147 | - ... 148 | 149 | Used this way, the bigger the value, better the detection, 150 | and a logarithmic scale is used. 151 | 152 | @param n,k,p binomial parameters. 153 | @param logNT logarithm of Number of Tests 154 | 155 | The computation is based in the gamma function by the following 156 | relation: 157 | @f[ 158 | \left(\begin{array}{c}n\\k\end{array}\right) 159 | = \frac{ \Gamma(n+1) }{ \Gamma(k+1) \cdot \Gamma(n-k+1) }. 160 | @f] 161 | We use efficient algorithms to compute the logarithm of 162 | the gamma function. 163 | 164 | To make the computation faster, not all the sum is computed, part 165 | of the terms are neglected based on a bound to the error obtained 166 | (an error of 10% in the result is accepted). 167 | */ 168 | static double NFA(int n, int k, double p, double logNT) 169 | { 170 | static double inv[TABSIZE]; /* table to keep computed inverse values */ 171 | double tolerance = 0.1; /* an error of 10% in the result is accepted */ 172 | double log1term,term,bin_term,mult_term,bin_tail,err,p_term; 173 | int i; 174 | 175 | if (p<=0) 176 | p=0.000000000000000000000000000001; 177 | if (p>=1) 178 | p=0.999999999999999999999999999999; 179 | 180 | /* check parameters */ 181 | if( n<0 || k<0 || k>n || p<=0.0 || p>=1.0 ) { 182 | //fprintf(stderr,"nfa: wrong n, k or p values. (%d , %d , %f)",n,k,p); 183 | //exit(-1); 184 | } 185 | 186 | /* trivial cases */ 187 | if( n==0 || k==0 ) return -logNT; 188 | if( n==k ) return -logNT - (double) n * log10(p); 189 | 190 | /* probability term */ 191 | p_term = p / (1.0-p); 192 | 193 | /* compute the first term of the series */ 194 | /* 195 | binomial_tail(n,k,p) = sum_{i=k}^n bincoef(n,i) * p^i * (1-p)^{n-i} 196 | where bincoef(n,i) are the binomial coefficients. 197 | But 198 | bincoef(n,k) = gamma(n+1) / ( gamma(k+1) * gamma(n-k+1) ). 199 | We use this to compute the first term. Actually the log of it. 200 | */ 201 | log1term = log_gamma( (double) n + 1.0 ) - log_gamma( (double) k + 1.0 ) 202 | - log_gamma( (double) (n-k) + 1.0 ) 203 | + (double) k * log(p) + (double) (n-k) * log(1.0-p); 204 | term = exp(log1term); 205 | 206 | /* in some cases no more computations are needed */ 207 | if( double_equal(term,0.0) ) /* the first term is almost zero */ 208 | { 209 | if( (double) k > (double) n * p ) /* at begin or end of the tail? */ 210 | return -log1term / M_LN10 - logNT; /* end: use just the first term */ 211 | else 212 | return -logNT; /* begin: the tail is roughly 1 */ 213 | } 214 | 215 | /* compute more terms if needed */ 216 | bin_tail = term; 217 | for(i=k+1;i<=n;i++) 218 | { 219 | /* 220 | As 221 | term_i = bincoef(n,i) * p^i * (1-p)^(n-i) 222 | and 223 | bincoef(n,i)/bincoef(n,i-1) = n-1+1 / i, 224 | then, 225 | term_i / term_i-1 = (n-i+1)/i * p/(1-p) 226 | and 227 | term_i = term_i-1 * (n-i+1)/i * p/(1-p). 228 | 1/i is stored in a table as they are computed, 229 | because divisions are expensive. 230 | p/(1-p) is computed only once and stored in 'p_term'. 231 | */ 232 | bin_term = (double) (n-i+1) * ( ii. 242 | Then, the error on the binomial tail when truncated at 243 | the i term can be bounded by a geometric series of form 244 | term_i * sum mult_term_i^j. */ 245 | err = term * ( ( 1.0 - pow( mult_term, (double) (n-i+1) ) ) / 246 | (1.0-mult_term) - 1.0 ); 247 | 248 | /* One wants an error at most of tolerance*final_result, or: 249 | tolerance * abs(-log10(bin_tail)-logNT). 250 | Now, the error that can be accepted on bin_tail is 251 | given by tolerance*final_result divided by the derivative 252 | of -log10(x) when x=bin_tail. that is: 253 | tolerance * abs(-log10(bin_tail)-logNT) / (1/bin_tail) 254 | Finally, we truncate the tail if the error is less than: 255 | tolerance * abs(-log10(bin_tail)-logNT) * bin_tail */ 256 | if( err < tolerance * fabs(-log10(bin_tail)-logNT) * bin_tail ) break; 257 | } 258 | } 259 | return -log10(bin_tail) - logNT; 260 | } 261 | -------------------------------------------------------------------------------- /region.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "region.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | using namespace cv; 11 | 12 | Region::Region(int level, int pixel) : level_(level), pixel_(pixel), area_(0), 13 | variation_(numeric_limits::infinity()), stable_(false), parent_(0), child_(0), next_(0), bbox_x1_(10000), bbox_y1_(10000), bbox_x2_(0), bbox_y2_(0) 14 | { 15 | fill_n(moments_, 5, 0.0); 16 | } 17 | 18 | //inline void Region::accumulate(int x, int y) 19 | void Region::accumulate(int x, int y) 20 | { 21 | ++area_; 22 | moments_[0] += x; 23 | moments_[1] += y; 24 | moments_[2] += x * x; 25 | moments_[3] += x * y; 26 | moments_[4] += y * y; 27 | 28 | bbox_x1_ = min(bbox_x1_, x); 29 | bbox_y1_ = min(bbox_y1_, y); 30 | bbox_x2_ = max(bbox_x2_, x); 31 | bbox_y2_ = max(bbox_y2_, y); 32 | } 33 | 34 | void Region::merge(Region * child) 35 | { 36 | assert(!child->parent_); 37 | assert(!child->next_); 38 | 39 | // Add the moments together 40 | area_ += child->area_; 41 | moments_[0] += child->moments_[0]; 42 | moments_[1] += child->moments_[1]; 43 | moments_[2] += child->moments_[2]; 44 | moments_[3] += child->moments_[3]; 45 | moments_[4] += child->moments_[4]; 46 | 47 | // Rebuild bounding box 48 | bbox_x1_ = min(bbox_x1_, child->bbox_x1_); 49 | bbox_y1_ = min(bbox_y1_, child->bbox_y1_); 50 | bbox_x2_ = max(bbox_x2_, child->bbox_x2_); 51 | bbox_y2_ = max(bbox_y2_, child->bbox_y2_); 52 | 53 | child->next_ = child_; 54 | child_ = child; 55 | child->parent_ = this; 56 | } 57 | 58 | void Region::process(int delta, int minArea, int maxArea, double maxVariation) 59 | { 60 | // Find the last parent with level not higher than level + delta 61 | const Region * parent = this; 62 | 63 | while (parent->parent_ && (parent->parent_->level_ <= (level_ + delta))) 64 | parent = parent->parent_; 65 | 66 | // Calculate variation 67 | variation_ = static_cast(parent->area_ - area_) / area_; 68 | 69 | // Whether or not the region *could* be stable 70 | const bool stable = (!parent_ || (variation_ <= parent_->variation_)) && 71 | (area_ >= minArea) && (area_ <= maxArea) && (variation_ <= maxVariation); 72 | 73 | // Process all the children 74 | for (Region * child = child_; child; child = child->next_) { 75 | child->process(delta, minArea, maxArea, maxVariation); 76 | 77 | if (stable && (variation_ < child->variation_)) 78 | stable_ = true; 79 | } 80 | 81 | // The region can be stable even without any children 82 | if (!child_ && stable) 83 | stable_ = true; 84 | } 85 | 86 | bool Region::check(double variation, int area) const 87 | { 88 | if (area_ <= area) 89 | return true; 90 | 91 | if (stable_ && (variation_ < variation)) 92 | return false; 93 | 94 | for (Region * child = child_; child; child = child->next_) 95 | if (!child->check(variation, area)) 96 | return false; 97 | 98 | return true; 99 | } 100 | 101 | void Region::save(double minDiversity, vector & regions) 102 | { 103 | if (stable_) { 104 | const int minParentArea = area_ / (1.0 - minDiversity) + 0.5; 105 | 106 | const Region * parent = this; 107 | 108 | while (parent->parent_ && (parent->parent_->area_ < minParentArea)) { 109 | parent = parent->parent_; 110 | 111 | if (parent->stable_ && (parent->variation_ <= variation_)) { 112 | stable_ = false; 113 | break; 114 | } 115 | } 116 | 117 | if (stable_) { 118 | const int maxChildArea = area_ * (1.0 - minDiversity) + 0.5; 119 | 120 | if (!check(variation_, maxChildArea)) 121 | stable_ = false; 122 | } 123 | 124 | if (stable_) { 125 | regions.push_back(*this); 126 | regions.back().parent_ = 0; 127 | regions.back().child_ = 0; 128 | regions.back().next_ = 0; 129 | } 130 | } 131 | 132 | for (Region * child = child_; child; child = child->next_) 133 | child->save(minDiversity, regions); 134 | } 135 | 136 | void Region::detect(int delta, int minArea, int maxArea, double maxVariation, 137 | double minDiversity, vector & regions) 138 | { 139 | process(delta, minArea, maxArea, maxVariation); 140 | save(minDiversity, regions); 141 | } 142 | 143 | /* function: er_fill is borowed from vlfeat-0.9.14/toolbox/mser/vl_erfill.c 144 | ** description: Extremal Regions filling 145 | ** author: Andrea Vedaldi 146 | **/ 147 | 148 | /* 149 | Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson. 150 | All rights reserved. 151 | 152 | The function is part of the VLFeat library and is made available under 153 | the terms of the BSD license (see the COPYING file). 154 | */ 155 | void Region::er_fill(Mat& _grey_img) 156 | { 157 | const uint8_t *src = (uint8_t*)_grey_img.data; 158 | 159 | 160 | double er = pixel_; 161 | int ndims = 2; 162 | int dims [2]; 163 | dims[0] = _grey_img.cols; 164 | dims[1] = _grey_img.rows; 165 | int last = 0 ; 166 | int last_expanded = 0 ; 167 | uint8_t value = 0 ; 168 | 169 | double const * er_pt ; 170 | 171 | int* subs_pt ; /* N-dimensional subscript */ 172 | int* nsubs_pt ; /* diff-subscript to point to neigh. */ 173 | int* strides_pt ; /* strides to move in image array */ 174 | uint8_t* visited_pt ; /* flag */ 175 | int* members_pt ; /* region members */ 176 | bool invert = false; 177 | 178 | /* get dimensions */ 179 | int nel = dims[0]*dims[1]; 180 | uint8_t *I_pt = (uint8_t *)src; 181 | 182 | /* allocate stuff */ 183 | subs_pt = (int*) malloc( sizeof(int) * ndims ) ; 184 | nsubs_pt = (int*) malloc( sizeof(int) * ndims ) ; 185 | strides_pt = (int*) malloc( sizeof(int) * ndims ) ; 186 | visited_pt = (uint8_t*)malloc( sizeof(uint8_t) * nel ) ; 187 | members_pt = (int*) malloc( sizeof(int) * nel ) ; 188 | 189 | er_pt = &er; 190 | 191 | /* compute strides to move into the N-dimensional image array */ 192 | strides_pt [0] = 1 ; 193 | int k; 194 | for(k = 1 ; k < ndims ; ++k) { 195 | strides_pt [k] = strides_pt [k-1] * dims [k-1] ; 196 | } 197 | 198 | //fprintf(stderr,"strides_pt %d %d \n",strides_pt [0],strides_pt [1]); 199 | 200 | /* load first pixel */ 201 | memset(visited_pt, 0, sizeof(uint8_t) * nel) ; 202 | { 203 | int idx = (int) *er_pt ; 204 | if (idx < 0) { 205 | idx = -idx; 206 | invert = true ; 207 | } 208 | if( idx < 0 || idx > nel+1 ) { 209 | fprintf(stderr,"ER=%d out of range [1,%d]",idx,nel) ; 210 | return; 211 | } 212 | members_pt [last++] = idx ; 213 | } 214 | value = I_pt[ members_pt[0] ] ; 215 | 216 | /* ----------------------------------------------------------------- 217 | * Fill region 218 | * -------------------------------------------------------------- */ 219 | while(last_expanded < last) { 220 | 221 | /* pop next node xi */ 222 | int index = members_pt[last_expanded++] ; 223 | 224 | /* convert index into a subscript sub; also initialize nsubs 225 | to (-1,-1,...,-1) */ 226 | { 227 | int temp = index ; 228 | for(k = ndims-1 ; k >=0 ; --k) { 229 | nsubs_pt [k] = -1 ; 230 | subs_pt [k] = temp / strides_pt [k] ; 231 | temp = temp % strides_pt [k] ; 232 | } 233 | } 234 | 235 | /* process neighbors of xi */ 236 | while(true) { 237 | int good = true ; 238 | int nindex = 0 ; 239 | 240 | /* compute NSUBS+SUB, the correspoinding neighbor index NINDEX 241 | and check that the pixel is within image boundaries. */ 242 | for(k = 0 ; k < ndims && good ; ++k) { 243 | int temp = nsubs_pt [k] + subs_pt [k] ; 244 | good &= 0 <= temp && temp < (signed) dims[k] ; 245 | nindex += temp * strides_pt [k] ; 246 | } 247 | 248 | /* process neighbor 249 | 1 - the pixel is within image boundaries; 250 | 2 - the pixel is indeed different from the current node 251 | (this happens when nsub=(0,0,...,0)); 252 | 3 - the pixel has value not greather than val 253 | is a pixel older than xi 254 | 4 - the pixel has not been visited yet 255 | */ 256 | if(good 257 | && nindex != index 258 | && ((!invert && I_pt [nindex] <= value) || 259 | ( invert && I_pt [nindex] >= value)) 260 | && ! visited_pt [nindex] ) { 261 | 262 | //fprintf(stderr,"nvalue %d value %d",(int)(I_pt [nindex]),(int)(I_pt [index])); 263 | //fprintf(stderr," index %d\n",index); 264 | //fprintf(stderr,"neightbour index %d\n",nindex); 265 | 266 | /* mark as visited */ 267 | visited_pt [nindex] = 1 ; 268 | 269 | /* add to list */ 270 | members_pt [last++] = nindex ; 271 | } 272 | 273 | /* move to next neighbor */ 274 | k = 0 ; 275 | while(++ nsubs_pt [k] > 1) { 276 | nsubs_pt [k++] = -1 ; 277 | if(k == ndims) goto done_all_neighbors ; 278 | } 279 | } /* next neighbor */ 280 | done_all_neighbors : ; 281 | } /* goto pop next member */ 282 | 283 | /* 284 | * Save results 285 | */ 286 | { 287 | for (int i = 0 ; i < last ; ++i) { 288 | pixels_.push_back(members_pt[i]); 289 | //fprintf(stderr," pixel inserted %d: %d\n",i,members_pt[i]); 290 | } 291 | } 292 | 293 | 294 | free( members_pt ) ; 295 | free( visited_pt ) ; 296 | free( strides_pt ) ; 297 | free( nsubs_pt ) ; 298 | free( subs_pt ) ; 299 | 300 | return; 301 | } 302 | 303 | void Region::extract_features(Mat& _lab_img, Mat& _grey_img, Mat& _gradient_magnitude) 304 | { 305 | 306 | bbox_x2_++; 307 | bbox_y2_++; 308 | 309 | center_.x = bbox_x2_-bbox_x1_ / 2; 310 | center_.y = bbox_y2_-bbox_y1_ / 2; 311 | 312 | bbox_ = cvRect(bbox_x1_,bbox_y1_,bbox_x2_-bbox_x1_,bbox_y2_-bbox_y1_); 313 | 314 | Mat canvas = Mat::zeros(_lab_img.size(),CV_8UC1); 315 | uchar* rsptr = (uchar*)canvas.data; 316 | for (int p=0; p > contours0; 380 | vector hierarchy; 381 | findContours( bw, contours0, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE); 382 | for (int k=0; k0.01)||(contourArea(Mat(contours0.at(k)))>31))) 386 | { 387 | num_holes_++; 388 | holes_area_ += (int)contourArea(Mat(contours0.at(k))); 389 | } 390 | } 391 | perimeter_ = (int)contours0.at(0).size(); 392 | rect_ = minAreaRect(contours0.at(0)); 393 | } 394 | -------------------------------------------------------------------------------- /region.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef REGION_H 3 | #define REGION_H 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | /// A Maximally Stable Extremal Region. 12 | class Region 13 | { 14 | public: 15 | int level_; ///< Level at which the region is processed. 16 | int pixel_; ///< Index of the initial pixel (y * width + x). 17 | int area_; ///< Area of the region (moment zero). 18 | double moments_[5]; ///< First and second moments of the region (x, y, x^2, xy, y^2). 19 | double variation_; ///< MSER variation. 20 | 21 | /// Axis oriented bounding box of the region 22 | int bbox_x1_; 23 | int bbox_y1_; 24 | int bbox_x2_; 25 | int bbox_y2_; 26 | 27 | /// Constructor. 28 | /// @param[in] level Level at which the region is processed. 29 | /// @param[in] pixel Index of the initial pixel (y * width + x). 30 | Region(int level = 256, int pixel = 0); 31 | 32 | /// Fills an Extremal Region (ER) by region growing from the Index of the initial pixel(pixel_). 33 | /// @param[in] grey_img Grey level image 34 | void er_fill(cv::Mat& _grey_img); 35 | 36 | std::vector pixels_; ///< list pf all pixels indexes (y * width + x) of the region 37 | 38 | /// Extract_features. 39 | /// @param[in] lab_img L*a*b* color image to extract color information 40 | /// @param[in] grey_img Grey level version of the original image 41 | /// @param[in] gradient_magnitude of the original image 42 | void extract_features(cv::Mat& _lab_img, cv::Mat& _grey_img, cv::Mat& _gradient_magnitude); 43 | 44 | cv::Point center_; ///< Center coordinates of the region 45 | cv::Rect bbox_; ///< Axis aligned bounding box 46 | cv::RotatedRect rect_; ///< Axis aligned bounding box 47 | int perimeter_; ///< Perimeter of the region 48 | int num_holes_; ///< Number of holes of the region 49 | int holes_area_; ///< Total area filled by all holes of this regions 50 | float intensity_mean_; ///< mean intensity of the whole region 51 | float intensity_std_; ///< intensity standard deviation of the whole region 52 | std::vector color_mean_; ///< mean color (L*a*b*) of the whole region 53 | std::vector color_std_; ///< color (L*a*b*) standard deviation of the whole region 54 | float boundary_intensity_mean_; ///< mean intensity of the boundary of the region 55 | float boundary_intensity_std_; ///< intensity standard deviation of the boundary of the region 56 | std::vector boundary_color_mean_; ///< mean color (L*a*b*) of the boundary of the region 57 | std::vector boundary_color_std_; ///< color (L*a*b*) standard deviation of the boundary of the region 58 | double stroke_mean_; ///< mean stroke of the whole region 59 | double stroke_std_; ///< stroke standard deviation of the whole region 60 | double gradient_mean_; ///< mean gradient magnitude of the whole region 61 | double gradient_std_; ///< gradient magnitude standard deviation of the whole region 62 | 63 | float classifier_votes_; ///< Votes of the Region_Classifier for this region 64 | 65 | private: 66 | bool stable_; // Flag indicating if the region is stable 67 | Region * parent_; // Pointer to the parent region 68 | Region * child_; // Pointer to the first child 69 | Region * next_; // Pointer to the next (sister) region 70 | 71 | void accumulate(int x, int y); 72 | void merge(Region * child); 73 | void detect(int delta, int minArea, int maxArea, double maxVariation, double minDiversity, 74 | std::vector & regions); 75 | void process(int delta, int minArea, int maxArea, double maxVariation); 76 | bool check(double variation, int area) const; 77 | void save(double minDiversity, std::vector & regions); 78 | 79 | friend class MSER; 80 | }; 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /region_classifier.cpp: -------------------------------------------------------------------------------- 1 | #include "region_classifier.h" 2 | 3 | RegionClassifier::RegionClassifier(char *trained_boost_filename, float decision_threshold) : decision_threshold_(decision_threshold) 4 | { 5 | 6 | assert(trained_boost_filename != NULL); 7 | 8 | ifstream ifile1(trained_boost_filename); 9 | if (ifile1) 10 | { 11 | //fprintf(stdout,"Loading boost character classifier ... \n"); 12 | boost_.load(trained_boost_filename, "boost"); 13 | } else { 14 | fprintf(stderr,"Boost character classifier, file not found! \n"); 15 | exit(-1); 16 | } 17 | } 18 | 19 | bool RegionClassifier::operator()(Region *region) 20 | { 21 | assert(region != NULL); 22 | 23 | float sample_arr[] = {0, region->stroke_mean_, region->stroke_std_, region->stroke_std_/region->stroke_mean_, (float)region->area_, (float)region->perimeter_, (float)region->perimeter_/region->area_, (float)min( region->rect_.size.width, region->rect_.size.height)/max( region->rect_.size.width, region->rect_.size.height), sqrt(region->area_)/region->perimeter_, (float)region->num_holes_, (float)region->holes_area_/region->area_}; 24 | vector sample (sample_arr, sample_arr + sizeof(sample_arr) / sizeof(sample_arr[0]) ); 25 | 26 | float votes = boost_.predict( Mat(sample), Mat(), Range::all(), false, true ); 27 | 28 | if (votes <= decision_threshold_) 29 | return true; 30 | 31 | return false; 32 | } 33 | 34 | float RegionClassifier::get_votes(Region *region) 35 | { 36 | assert(region != NULL); 37 | 38 | float sample_arr[] = {0, region->stroke_mean_, region->stroke_std_, region->stroke_std_/region->stroke_mean_, (float)region->area_, (float)region->perimeter_, (float)region->perimeter_/region->area_, (float)min( region->rect_.size.width, region->rect_.size.height)/max( region->rect_.size.width, region->rect_.size.height), sqrt(region->area_)/region->perimeter_, (float)region->num_holes_, (float)region->holes_area_/region->area_}; 39 | vector sample (sample_arr, sample_arr + sizeof(sample_arr) / sizeof(sample_arr[0]) ); 40 | 41 | return boost_.predict( Mat(sample), Mat(), Range::all(), false, true ); 42 | } 43 | -------------------------------------------------------------------------------- /region_classifier.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef REGION_CLASSIFIER_H 3 | #define REGION_CLASSIFIER_H 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "region.h" 13 | 14 | using namespace cv; 15 | using namespace std; 16 | 17 | class RegionClassifier 18 | { 19 | public: 20 | 21 | /// Constructor. 22 | /// @param[in] trained_boost_filename 23 | /// @param[in] decision_threshold 24 | RegionClassifier(char *trained_boost_filename, float prediction_threshold=0.); 25 | 26 | /// Classify a region. Returns true iif region is classified as a text character 27 | /// @param[in] regions A pointer to the region to be classified. 28 | bool operator()(Region *region); 29 | 30 | /// Classify a region. Returns the average classification votes 31 | /// @param[in] regions A pointer to the region to be classified. 32 | float get_votes(Region *region); 33 | 34 | private: 35 | 36 | // Boosted tree classifier 37 | CvBoost boost_; 38 | 39 | // Classification parameter 40 | float decision_threshold_; 41 | }; 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /sample_images/T050.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lluisgomez/text_extraction/092d3f198a23c99ae16f670ad95c89ac41525991/sample_images/T050.JPG -------------------------------------------------------------------------------- /sample_images/T051.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lluisgomez/text_extraction/092d3f198a23c99ae16f670ad95c89ac41525991/sample_images/T051.JPG -------------------------------------------------------------------------------- /sample_images/T072.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lluisgomez/text_extraction/092d3f198a23c99ae16f670ad95c89ac41525991/sample_images/T072.JPG -------------------------------------------------------------------------------- /text_extract.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lluisgomez/text_extraction/092d3f198a23c99ae16f670ad95c89ac41525991/text_extract.h -------------------------------------------------------------------------------- /utils.h: -------------------------------------------------------------------------------- 1 | 2 | void accumulate_evidence(vector *meaningful_cluster, int grow, Mat *co_occurrence) 3 | { 4 | //for (int k=0; ksize(); k++) 5 | for (int i=0; isize(); i++) 6 | for (int j=i; jsize(); j++) 7 | if (meaningful_cluster->at(i) != meaningful_cluster->at(j)) 8 | { 9 | co_occurrence->at(meaningful_cluster->at(i), meaningful_cluster->at(j)) += grow; 10 | co_occurrence->at(meaningful_cluster->at(j), meaningful_cluster->at(i)) += grow; 11 | } 12 | } 13 | 14 | void get_gradient_magnitude(Mat& _grey_img, Mat& _gradient_magnitude) 15 | { 16 | cv::Mat C = cv::Mat_(_grey_img); 17 | 18 | cv::Mat kernel = (cv::Mat_(1,3) << -1,0,1); 19 | cv::Mat grad_x; 20 | filter2D(C, grad_x, -1, kernel, cv::Point(-1,-1), 0, cv::BORDER_DEFAULT); 21 | 22 | cv::Mat kernel2 = (cv::Mat_(3,1) << -1,0,1); 23 | cv::Mat grad_y; 24 | filter2D(C, grad_y, -1, kernel2, cv::Point(-1,-1), 0, cv::BORDER_DEFAULT); 25 | 26 | for(int i=0; i(i,j) = sqrt(pow(grad_x.at(i,j),2)+pow(grad_y.at(i,j),2)); 29 | 30 | } 31 | 32 | static uchar bcolors[][3] = 33 | { 34 | {0,0,255}, 35 | {0,128,255}, 36 | {0,255,255}, 37 | {0,255,0}, 38 | {255,128,0}, 39 | {255,255,0}, 40 | {255,0,0}, 41 | {255,0,255}, 42 | {255,255,255} 43 | }; 44 | 45 | void drawClusters(Mat& img, vector *regions, vector > *meaningful_clusters) 46 | { 47 | //img = img*0; 48 | uchar* rsptr = (uchar*)img.data; 49 | for (int i=0; isize(); i++) 50 | { 51 | 52 | for (int c=0; cat(i).size(); c++) 53 | { 54 | 55 | for (int p=0; pat(meaningful_clusters->at(i).at(c)).pixels_.size(); p++) 56 | { 57 | rsptr[regions->at(meaningful_clusters->at(i).at(c)).pixels_.at(p)*3] = bcolors[i%9][2]; 58 | rsptr[regions->at(meaningful_clusters->at(i).at(c)).pixels_.at(p)*3+1] = bcolors[i%9][1]; 59 | rsptr[regions->at(meaningful_clusters->at(i).at(c)).pixels_.at(p)*3+2] = bcolors[i%9][0]; 60 | } 61 | } 62 | } 63 | } 64 | --------------------------------------------------------------------------------