├── .gitignore ├── CMakeLists.txt ├── IndexerLauncher.cmake ├── NearestSearch.cmake ├── SearcherTester.cmake ├── build_debug.sh ├── build_release.sh ├── data_util.cpp ├── data_util.h ├── docs ├── doxygen.cfg ├── how_to_install.dox ├── index.dox ├── main_page.dox ├── pictures │ └── mult.jpg └── search.dox ├── indexer.h ├── indexer_launcher.cpp ├── launch_indexer_double.sh ├── make_project.bat ├── multitable.hpp ├── ordered_lists_merger.h ├── perfomance_util.cpp ├── perfomance_util.h ├── run_indexer.py ├── run_indexer_vlad.py ├── run_searcher.py ├── run_searcher_vlad.py ├── searcher.h ├── searcher_tester.cpp ├── sift1M_double_4096.dat ├── sift1M_double_4096_8.dat └── test_searcher_double.sh /.gitignore: -------------------------------------------------------------------------------- 1 | html/ 2 | build 3 | run_indexer_vlad.py 4 | run_searcher_vlad.py 5 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ########################################################################################## 2 | # CMake build script for NearestSearch. 3 | # 4 | ########################################################################################## 5 | 6 | #let all libraries be static, not shared 7 | OPTION(BUILD_SHARED_LIBS "Build shared libraries (DLLs)." OFF) 8 | 9 | ########################################################################################## 10 | # lets start describing our project. 11 | project (NearestSearch CXX C) 12 | cmake_minimum_required(VERSION 2.6) 13 | 14 | find_package(Boost COMPONENTS program_options serialization system filesystem thread REQUIRED) 15 | 16 | IF (UNIX) 17 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -fopenmp -DMKL_ILP64 -m64") 18 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -fopenmp -DMKL_ILP64 -m64") 19 | ENDIF (UNIX) 20 | 21 | IF (NOT DEFINED CMAKE_CURRENT_LIST_DIR) 22 | SET(CMAKE_CURRENT_LIST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) 23 | ENDIF (NOT DEFINED CMAKE_CURRENT_LIST_DIR) 24 | 25 | MESSAGE("current dir: ${CMAKE_CURRENT_LIST_DIR}") 26 | SET (Source_Path ${CMAKE_CURRENT_LIST_DIR}) 27 | 28 | MESSAGE ("PROJECT_BINARY_DIR " ${PROJECT_BINARY_DIR}) 29 | 30 | # ADD BOOSTDIR 31 | IF (UNIX) 32 | SET(BOOST_DIR /usr/include/boost) 33 | SET(BOOST_LIB /usr/local/lib) 34 | ENDIF (UNIX) 35 | IF(WIN32) 36 | SET(BOOST_DIR C:/Users/arbabenko/Soft/boost_1_47_0) 37 | SET(BOOST_LIB C:/Users/arbabenko/Soft/boost_1_47_0/lib/x64/lib) 38 | ENDIF(WIN32) 39 | 40 | # ADD BLAS 41 | IF (UNIX) 42 | SET(BLAS_DIR /opt/intel/composer_xe_2013.2.146/mkl/include) 43 | SET(BLAS_LIB /opt/intel/composer_xe_2013.2.146/mkl/lib/intel64 44 | /opt/intel/composer_xe_2013.2.146/compiler/lib/intel64) 45 | ENDIF (UNIX) 46 | IF(WIN32) 47 | SET(BLAS_DIR "C:/Program Files (x86)/Intel/Composer XE 2011 SP1/mkl/include") 48 | SET(BLAS_LIB "C:/Program Files (x86)/Intel/Composer XE 2011 SP1/mkl/lib/intel64" 49 | "C:/Program Files (x86)/Intel/Composer XE 2011 SP1/compiler/lib/intel64") 50 | ENDIF(WIN32) 51 | 52 | ########################################################################################## 53 | INCLUDE (${Source_Path}/NearestSearch.cmake NO_POLICY_SCOPE) 54 | INCLUDE_DIRECTORIES(${BOOST_DIR}) 55 | INCLUDE_DIRECTORIES(${BLAS_DIR}) 56 | LINK_DIRECTORIES(${BOOST_LIB}) 57 | LINK_DIRECTORIES(${BLAS_LIB}) 58 | INCLUDE_DIRECTORIES(${Source_Path}) 59 | ADD_LIBRARY (nearest_search_lib STATIC ${NEAREST_ALL_CC}) 60 | 61 | ########################################################################################## 62 | INCLUDE (${Source_Path}/IndexerLauncher.cmake NO_POLICY_SCOPE) 63 | INCLUDE_DIRECTORIES(${BOOST_DIR}) 64 | INCLUDE_DIRECTORIES(${BLAS_DIR}) 65 | LINK_DIRECTORIES(${BOOST_LIB}) 66 | LINK_DIRECTORIES(${BLAS_LIB}) 67 | INCLUDE_DIRECTORIES(${Source_Path}) 68 | ADD_EXECUTABLE (indexer_launcher ${IndexerLauncher}) 69 | TARGET_LINK_LIBRARIES (indexer_launcher nearest_search_lib) 70 | target_link_libraries( indexer_launcher ${Boost_LIBRARIES} ) 71 | IF (UNIX) 72 | TARGET_LINK_LIBRARIES (indexer_launcher libmkl_intel_ilp64.a libmkl_gnu_thread.a libmkl_core.a dl pthread m) 73 | ENDIF (UNIX) 74 | IF(WIN32) 75 | TARGET_LINK_LIBRARIES (indexer_launcher mkl_intel_lp64.lib mkl_intel_thread.lib mkl_core.lib libiomp5md.lib) 76 | ENDIF(WIN32) 77 | 78 | ########################################################################################## 79 | INCLUDE (${Source_Path}/SearcherTester.cmake NO_POLICY_SCOPE) 80 | INCLUDE_DIRECTORIES(${BOOST_DIR}) 81 | INCLUDE_DIRECTORIES(${BLAS_DIR}) 82 | LINK_DIRECTORIES(${BOOST_LIB}) 83 | LINK_DIRECTORIES(${BLAS_LIB}) 84 | INCLUDE_DIRECTORIES(${Source_Path}) 85 | ADD_EXECUTABLE (searcher_tester ${SearchTester}) 86 | TARGET_LINK_LIBRARIES (searcher_tester nearest_search_lib) 87 | target_link_libraries( searcher_tester ${Boost_LIBRARIES} ) 88 | IF (UNIX) 89 | TARGET_LINK_LIBRARIES (searcher_tester libmkl_intel_ilp64.a libmkl_gnu_thread.a libmkl_core.a dl pthread m) 90 | ENDIF (UNIX) 91 | IF(WIN32) 92 | TARGET_LINK_LIBRARIES (searcher_tester mkl_intel_lp64.lib mkl_intel_thread.lib mkl_core.lib libiomp5md.lib) 93 | ENDIF(WIN32) 94 | 95 | 96 | -------------------------------------------------------------------------------- /IndexerLauncher.cmake: -------------------------------------------------------------------------------- 1 | # lets list sample's binaries 2 | set (IndexerLauncher ${Source_Path}/indexer_launcher.cpp) 3 | 4 | 5 | -------------------------------------------------------------------------------- /NearestSearch.cmake: -------------------------------------------------------------------------------- 1 | # lets divide binaries in groups, for comfort navigation 2 | SOURCE_GROUP(util FILES ${Source_Path}/data_util.h 3 | ${Source_Path}/data_util.cpp 4 | ${Source_Path}/multitable.hpp 5 | ${Source_Path}/perfomance_util.h 6 | ${Source_Path}/perfomance_util.cpp) 7 | 8 | SET(UTIL ${Source_Path}/data_util.h 9 | ${Source_Path}/data_util.cpp 10 | ${Source_Path}/multitable.hpp 11 | ${Source_Path}/perfomance_util.h 12 | ${Source_Path}/perfomance_util.cpp) 13 | 14 | SOURCE_GROUP(indexer FILES ${Source_Path}/indexer.h) 15 | 16 | SET(INDEXER ${Source_Path}/indexer.h) 17 | 18 | SOURCE_GROUP(searcher FILES ${Source_Path}/searcher.h 19 | ${Source_Path}/ordered_lists_merger.h) 20 | 21 | SET(SEARCHER ${Source_Path}/searcher.h 22 | ${Source_Path}/ordered_lists_merger.h) 23 | 24 | 25 | # lets list all Nearest's source binaries 26 | SET(NEAREST_ALL_CC ${UTIL} ${INDEXER} ${SEARCHER}) 27 | 28 | -------------------------------------------------------------------------------- /SearcherTester.cmake: -------------------------------------------------------------------------------- 1 | # lets list sample's binaries 2 | set (SearchTester ${Source_Path}/searcher_tester.cpp) 3 | 4 | 5 | -------------------------------------------------------------------------------- /build_debug.sh: -------------------------------------------------------------------------------- 1 | mkdir -p build_dup 2 | cd build_dup 3 | rm ./CMakeCache.txt 4 | cmake -DCMAKE_BUILD_TYPE=Debug .. 5 | make -------------------------------------------------------------------------------- /build_release.sh: -------------------------------------------------------------------------------- 1 | mkdir -p build_master 2 | cd build_master 3 | rm ./CMakeCache.txt 4 | cmake -DCMAKE_BUILD_TYPE=Release .. 5 | make 6 | -------------------------------------------------------------------------------- /data_util.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2012 Yandex Artem Babenko 2 | 3 | #include "data_util.h" 4 | 5 | Distance Eucldistance(const Point& x, const Point& y) { 6 | Distance result = 0; 7 | Distance current_coord_diff; 8 | for(Dimensions d = 0; d < x.size(); ++d){ 9 | current_coord_diff = x[d] - y[d]; 10 | result += current_coord_diff * current_coord_diff; 11 | } 12 | return result; 13 | } 14 | 15 | Distance Eucldistance(const Point& x, const Point& y, 16 | const Dimensions start, const Dimensions finish) { 17 | Distance result = 0; 18 | Distance current_coord_diff; 19 | for(Dimensions d = start; d < finish; ++d){ 20 | current_coord_diff = x[d] - y[d - start]; 21 | result += current_coord_diff * current_coord_diff; 22 | } 23 | return result; 24 | } 25 | 26 | void GetSubpoints(const Points& points, 27 | const Dimensions start_dim, 28 | const Dimensions final_dim, 29 | Points* subpoints) { 30 | if(final_dim < start_dim) { 31 | throw std::logic_error("Final dim < Start dim"); 32 | } 33 | subpoints->resize(points.size()); 34 | for(PointId pid = 0; pid < points.size(); ++pid) { 35 | subpoints->at(pid).resize(final_dim - start_dim); 36 | for(Dimensions dim = start_dim; dim < final_dim; ++dim) { 37 | subpoints->at(pid)[dim] = points[pid][start_dim + dim]; 38 | } 39 | } 40 | } 41 | 42 | ClusterId GetNearestClusterId(const Point& point, 43 | const Centroids& centroids, 44 | const Dimensions start_dim, 45 | const Dimensions final_dim) { 46 | if(final_dim < start_dim) { 47 | throw std::logic_error("Final dim < Start dim"); 48 | } 49 | ClusterId nearest = 0; 50 | Distance min_distance = Eucldistance(point, centroids[0], start_dim, final_dim); 51 | for(PointId pid = 1; pid < centroids.size(); ++pid) { 52 | Distance current_distance = 0; 53 | current_distance = Eucldistance(point, centroids[pid], start_dim, final_dim); 54 | if(current_distance < min_distance) { 55 | min_distance = current_distance; 56 | nearest = pid; 57 | } 58 | } 59 | return nearest; 60 | } 61 | 62 | void GetResidual(const Point& point, const CoarseQuantization& coarse_quantizations, 63 | const vector& centroids, Point* residual) { 64 | residual->resize(point.size()); 65 | Dimensions subvector_dimension = point.size() / centroids.size(); 66 | cblas_saxpy(point.size(), 1, &(point[0]), 1, &(residual->at(0)), 1); 67 | for(int subvector_index = 0; subvector_index < centroids.size(); ++subvector_index) { 68 | Dimensions start_dim = subvector_index * subvector_dimension; 69 | const Point& current_coarse_centroid = centroids[subvector_index][coarse_quantizations[subvector_index]]; 70 | cblas_saxpy(subvector_dimension, -1, &(current_coarse_centroid[0]), 1, &(residual->at(start_dim)), 1); 71 | } 72 | } 73 | 74 | void GetResidual(const Point& point, const CoarseQuantization& coarse_quantizations, 75 | const vector& centroids, Coord* residual) { 76 | Dimensions subvector_dimension = point.size() / centroids.size(); 77 | cblas_scopy(point.size(), &(point[0]), 1, residual, 1); 78 | for(int subvector_index = 0; subvector_index < centroids.size(); ++subvector_index) { 79 | Dimensions start_dim = subvector_index * subvector_dimension; 80 | const Point& current_coarse_centroid = centroids[subvector_index][coarse_quantizations[subvector_index]]; 81 | cblas_saxpy(subvector_dimension, -1, &(current_coarse_centroid[0]), 1, &(residual[start_dim]), 1); 82 | } 83 | } 84 | 85 | void GetNearestClusterIdsForPointSubset(const Points& points, const Centroids& centroids, 86 | const PointId start_pid, const PointId final_pid, 87 | vector* nearest) { 88 | if(final_pid < start_pid) { 89 | throw std::logic_error("Final pid < Start pid"); 90 | } 91 | cout << start_pid << " point processing started\n"; 92 | for(PointId pid = start_pid; pid < final_pid; ++pid) { 93 | if(pid % 10000 == 0) { 94 | cout << pid << endl; 95 | } 96 | nearest->at(pid) = GetNearestClusterId(points[pid], centroids, 0, points[0].size()); 97 | } 98 | cout << final_pid << " point processing finished\n"; 99 | } 100 | 101 | void GetNearestClusterIdsForSubpoints(const Points& points, const Centroids& centroids, 102 | const Dimensions start_dim, const Dimensions final_dim, 103 | int threads_count, vector* nearest) { 104 | if(final_dim < start_dim) { 105 | throw std::logic_error("Final dim < Start dim"); 106 | } 107 | cout << "Start getting nearest Cluster Ids..." << endl; 108 | Points subpoints; 109 | GetSubpoints(points, start_dim, final_dim, &subpoints); 110 | boost::thread_group threads; 111 | int subpoints_count = points.size() / threads_count; 112 | for(int thread_id = 0; thread_id < threads_count; ++thread_id) { 113 | PointId start_pid = subpoints_count * thread_id; 114 | PointId final_pid = start_pid + subpoints_count; 115 | threads.create_thread(boost::bind(&GetNearestClusterIdsForPointSubset, subpoints, centroids, 116 | start_pid, final_pid, nearest)); 117 | } 118 | threads.join_all(); 119 | cout << "Finish getting nearest Cluster Ids..." << endl; 120 | } 121 | 122 | void GetPointsCoarseQuaintizations(const Points& points, 123 | const vector& centroids, 124 | const int threads_count, 125 | vector* coarse_quantizations) { 126 | int number_of_subvectors = centroids.size(); 127 | coarse_quantizations->resize(points.size(), CoarseQuantization(number_of_subvectors)); 128 | Dimensions subvector_dimension = points[0].size() / number_of_subvectors; 129 | for(int centroids_index = 0; centroids_index < number_of_subvectors; ++centroids_index) { 130 | vector cluster_labels; 131 | cluster_labels.resize(points.size()); 132 | Dimensions start_dim = centroids_index * subvector_dimension; 133 | Dimensions final_dim = std::min((Dimensions)points[0].size(), start_dim + subvector_dimension); 134 | GetNearestClusterIdsForSubpoints(points, centroids[centroids_index], 135 | start_dim, final_dim, threads_count, &cluster_labels); 136 | for(PointId pid = 0; pid < points.size(); ++pid) { 137 | coarse_quantizations->at(pid)[centroids_index] = cluster_labels[pid]; 138 | } 139 | } 140 | } -------------------------------------------------------------------------------- /data_util.h: -------------------------------------------------------------------------------- 1 | /** @file */ 2 | 3 | // Copyright 2012 Yandex Artem Babenko 4 | #pragma once 5 | 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | #include "mkl_cblas.h" 19 | 20 | #include "multitable.hpp" 21 | 22 | using std::bitset; 23 | using std::cout; 24 | using std::ifstream; 25 | using std::ios; 26 | using std::endl; 27 | using std::multimap; 28 | using std::pair; 29 | using std::set; 30 | using std::string; 31 | using std::vector; 32 | 33 | /** 34 | * \typedef 35 | * Data type for coordinate (bool, char, int, float, etc.) 36 | */ 37 | typedef float Coord; 38 | /** 39 | * \typedef 40 | * Data type for distance in multidimensional space 41 | */ 42 | typedef float Distance; 43 | /** 44 | * \typedef 45 | * Dimensionality of space = number of point coordinates 46 | */ 47 | typedef int Dimensions; 48 | /** 49 | * \typedef 50 | * Data type for point identifier 51 | */ 52 | typedef int PointId; 53 | /** 54 | * \typedef 55 | * Data type for cluster identifier 56 | */ 57 | typedef int ClusterId; 58 | /** 59 | * \typedef 60 | * Just vector of coordinates 61 | */ 62 | typedef vector Point; 63 | /** 64 | * \typedef 65 | * Class for a number of points 66 | */ 67 | typedef vector Points; 68 | /** 69 | * \typedef 70 | * Class for a number of point identifiers 71 | */ 72 | typedef vector PointIds; 73 | /** 74 | * \typedef 75 | * Class for a set of points 76 | */ 77 | typedef set SetPoints; 78 | /** 79 | * \typedef 80 | * Class for a number of cluster identifiers 81 | */ 82 | typedef vector ClusterIds; 83 | /** 84 | * \typedef 85 | * Class for representation of point coarse quantization, 86 | * ids of nearest centroids for each group of dimensions 87 | */ 88 | typedef vector CoarseQuantization; 89 | /** 90 | * \typedef 91 | * Data type for fine cluster identifier 92 | */ 93 | typedef unsigned char FineClusterId; 94 | /** 95 | * \typedef 96 | * Class for representation of point fine quantization 97 | */ 98 | typedef vector FineQuantization; 99 | /** 100 | * \typedef 101 | * Class for clusters representation 102 | * ClusterId -> (PointId, PointId, PointId, .... ) 103 | */ 104 | typedef vector ClustersToPoints; 105 | /** 106 | * \typedef 107 | * Class for belonging to clusters representation 108 | * PointId -> ClusterId 109 | */ 110 | typedef std::vector PointsToClusters; 111 | /** 112 | * \typedef 113 | * Centroids of the clustered points set 114 | */ 115 | typedef std::vector Centroids; 116 | 117 | /** 118 | * \enum This enumeration presents different types of input point 119 | * coordinate can be float or uint8 120 | */ 121 | enum PointType { 122 | FVEC, 123 | BVEC 124 | }; 125 | 126 | /** 127 | * \enum This enumeration presents different ways to get rerank info. 128 | * Algorithm can quantize residuals after coarse quantization or init points without 129 | * centroids subtraction 130 | */ 131 | enum RerankMode { 132 | USE_RESIDUALS, 133 | USE_INIT_POINTS 134 | }; 135 | 136 | /** 137 | * \struct MultiIndex incorporates all data structures we need to make search 138 | */ 139 | template 140 | struct MultiIndex { 141 | vector multiindex; 142 | Multitable cell_edges; ///< Table with index cell edges in array 143 | }; 144 | 145 | /** 146 | * Function calculates squared euclidian distance between two points (points must have the same dimensionality) 147 | * @param x first point 148 | * @param y second point 149 | */ 150 | Distance Eucldistance(const Point& x, const Point& y); 151 | /** 152 | * Function calculates squared euclidian distance point with small dimensionality and 153 | * subpoint of point with bigger dimensionality. 154 | * @param x first point 155 | * @param y second point 156 | * @param start first dimension of subpoint 157 | * @param finish dimension after the last dimension of subpoint 158 | */ 159 | Distance Eucldistance(const Point& x, const Point& y, Dimensions start, Dimensions finish); 160 | 161 | /** 162 | * This simple function casts number of type T to the nearest number of type U 163 | */ 164 | template 165 | inline U Round(T number) { 166 | return (U)(number); 167 | } 168 | 169 | /** 170 | * Function reads point written in .fvecs or .bvecs format. 171 | * Input points have coordinates of type T. 172 | * Result points have coordinates of type U 173 | * @param filename .fvecs or .bvecs file name 174 | * @param points_count how many points to read 175 | * @param points result list of read points 176 | */ 177 | template 178 | void ReadPoints(const string& filename, 179 | vector >* points, 180 | int count) { 181 | ifstream input; 182 | input.open(filename.c_str(), ios::binary); 183 | if(!input.good()) { 184 | throw std::logic_error("Invalid filename"); 185 | } 186 | points->resize(count); 187 | int dimension; 188 | for(PointId pid = 0; pid < count; ++pid) { 189 | input.read((char*)&dimension, sizeof(dimension)); 190 | if(dimension <= 0) { 191 | throw std::logic_error("Bad file content: non-positive dimension"); 192 | } 193 | points->at(pid).resize(dimension); 194 | for(Dimensions d = 0; d < dimension; ++d) { 195 | T buffer; 196 | input.read((char*)&(buffer), sizeof(T)); 197 | points->at(pid)[d] = Round(buffer); 198 | } 199 | } 200 | } 201 | 202 | /** 203 | * Function reads one vector of coordinates of type T. 204 | * Function assumes that the first int32-number in input stream is 205 | * vector dimensionality. Result vector will have coordinates of type U. 206 | * @param input input stream 207 | * @param v result vector 208 | */ 209 | template 210 | void ReadVector(ifstream& input, vector* v) { 211 | if(!input.good()) { 212 | throw std::logic_error("Bad input stream"); 213 | } 214 | int dimension; 215 | input.read((char*)&dimension, sizeof(dimension)); 216 | if(dimension <= 0) { 217 | throw std::logic_error("Bad file content: non-positive dimension"); 218 | } 219 | v->resize(dimension); 220 | for(Dimensions d = 0; d < dimension; ++d) { 221 | T buffer; 222 | input.read((char*)&buffer, sizeof(buffer)); 223 | v->at(d) = Round(buffer); 224 | } 225 | } 226 | 227 | /** 228 | * Function reads vocabulary of centroids produced by matlab script. 229 | * @param input input stream 230 | * @param dimension one centroid dimensionality 231 | * @param vocabulary_size centroids count 232 | * @param Centroids* result centroids 233 | */ 234 | template 235 | void ReadVocabulary(ifstream& input, 236 | Dimensions dimension, 237 | int vocabulary_size, 238 | Centroids* centroids) { 239 | if(!input.good()) { 240 | throw std::logic_error("Bad input stream"); 241 | } 242 | centroids->resize(vocabulary_size); 243 | for(ClusterId centroid_index = 0; centroid_index < centroids->size(); ++centroid_index) { 244 | centroids->at(centroid_index).resize(dimension); 245 | for(Dimensions dimension_index = 0; dimension_index < dimension; ++dimension_index) { 246 | T buffer; 247 | input.read((char*)&buffer, sizeof(buffer)); 248 | centroids->at(centroid_index)[dimension_index] = Round(buffer); 249 | } 250 | } 251 | } 252 | 253 | /** 254 | * Function reads vocabularies of centroids produced by matlab script. 255 | * Function assumes that the first int32 in input is dimensionality of centroids and 256 | * the second is the number of centroids in each vocabulary 257 | * @param input input stream 258 | * @param dimension one centroid dimensionality 259 | * @param vocabulary_size centroids count 260 | * @param Centroids* result centroids 261 | */ 262 | template 263 | void ReadVocabularies(const string& filename, 264 | Dimensions space_dimension, 265 | vector* centroids) { 266 | ifstream vocabulary; 267 | vocabulary.open(filename.c_str(), ios::binary); 268 | if(!vocabulary.good()) { 269 | throw std::logic_error("Bad vocabulary file"); 270 | } 271 | int dimension; 272 | vocabulary.read((char*)&dimension, sizeof(dimension)); 273 | if(dimension <= 0) { 274 | throw std::logic_error("Bad file content: non-positive dimension"); 275 | } 276 | int vocabs_count = space_dimension / dimension; 277 | if(space_dimension < dimension) { 278 | throw std::logic_error("Space dimension is less than vocabulary dimension"); 279 | } 280 | centroids->resize(vocabs_count); 281 | int vocabulary_size; 282 | vocabulary.read((char*)&vocabulary_size, sizeof(vocabulary_size)); 283 | for(int vocab_item = 0; vocab_item < vocabs_count; ++vocab_item) { 284 | ReadVocabulary(vocabulary, dimension, vocabulary_size, &(centroids->at(vocab_item))); 285 | } 286 | } 287 | 288 | /** 289 | * This function reads fine vocabs of centroids 290 | * @param fine_vocabs_filename file with vocabularies 291 | * @param fine_vocabs fine centroids lists 292 | */ 293 | template 294 | void ReadFineVocabs(const string& fine_vocabs_filename, vector* fine_vocabs) { 295 | ifstream fine_vocabs_stream; 296 | fine_vocabs_stream.open(fine_vocabs_filename.c_str(), ios::binary); 297 | if(!fine_vocabs_stream.good()) { 298 | throw std::logic_error("Bad fine vocabulary file"); 299 | } 300 | int vocabs_count, centroids_count, vocabs_dim; 301 | fine_vocabs_stream.read((char*)&vocabs_count, sizeof(vocabs_count)); 302 | if(vocabs_count < 1) { 303 | throw std::logic_error("Bad fine vocabulary file content: number of vocabularies < 1"); 304 | } 305 | fine_vocabs_stream.read((char*)¢roids_count, sizeof(centroids_count)); 306 | if(centroids_count < 1) { 307 | throw std::logic_error("Bad fine vocabulary file content: vocabulary capacity < 1"); 308 | } 309 | fine_vocabs_stream.read((char*)&vocabs_dim, sizeof(vocabs_dim)); 310 | if(vocabs_dim < 1) { 311 | throw std::logic_error("Bad fine vocabulary file content: vocabulary dimension < 1"); 312 | } 313 | fine_vocabs->resize(vocabs_count); 314 | for(int voc_index = 0; voc_index < vocabs_count; ++voc_index) { 315 | ReadVocabulary(fine_vocabs_stream, vocabs_dim, centroids_count, &(fine_vocabs->at(voc_index))); 316 | } 317 | } 318 | 319 | /** 320 | * This function returns subpoints limited by start_dim and final_dim 321 | * for every point in points 322 | * @param points all points 323 | * @param start_dim first dimension of subpoint 324 | * @param final_dim dimension after the last dimension of subpoint 325 | * @param subpoints result subpoints 326 | */ 327 | void GetSubpoints(const Points& points, 328 | const Dimensions start_dim, 329 | const Dimensions final_dim, 330 | Points* subpoints); 331 | 332 | /** 333 | * This function returns identifier of clusters which centroid is the nearest to 334 | * subpoint limited by start_dim and final_dim 335 | * @param point full point 336 | * @param Centroids all centroids (function finds the nearest one) 337 | * @param start_dim first dimension of subpoint 338 | * @param final_dim dimension after the last dimension of subpoint 339 | */ 340 | ClusterId GetNearestClusterId(const Point& point, const Centroids& centroids, 341 | const Dimensions start_dim, const Dimensions final_dim); 342 | 343 | /** 344 | * This function calculates quantization residual. 345 | * @param point initial point 346 | * @param coarse_quantizations point coarse quantization 347 | * @param centroids lists of centroids 348 | * @param residual result residual 349 | */ 350 | void GetResidual(const Point& point, const CoarseQuantization& coarse_quantizations, 351 | const vector& centroids, Point* residual); 352 | /** 353 | * This function calculates quantization residual. 354 | * @param point initial point 355 | * @param coarse_quantizations point coarse quantization 356 | * @param centroids lists of centroids 357 | * @param residual pointer to start of residual 358 | */ 359 | void GetResidual(const Point& point, const CoarseQuantization& coarse_quantizations, 360 | const vector& centroids, Coord* residual); 361 | 362 | /** 363 | * This function finds nearest cluster identifiers for points from start_pid to final_pid. 364 | * We need this function for multi-threading 365 | * @param points all points 366 | * @param centroids centroids of clusters 367 | * @param start_pid first point function finds nearest cluster 368 | * @param final_pid point after the last point function finds nearest cluster 369 | */ 370 | void GetNearestClusterIdsForPointSubset(const Points& points, const Centroids& centroids, 371 | const PointId start_pid, const PointId final_pid, 372 | vector* nearest); 373 | 374 | /** 375 | * This function finds cluster identifiers nearest to subpoints for a number of points. 376 | * Subpoints are limited by start_dim and finish_dim 377 | * @param points all points 378 | * @param centroids centroids of clusters 379 | * @param start_dim first dimesion of subpoint 380 | * @param final_dim dimesion after the last dimension of subpoint 381 | * @param threads_count number of threads 382 | * @param nearest result 383 | */ 384 | void GetNearestClusterIdsForSubpoints(const Points& points, const Centroids& centroids, 385 | const Dimensions start_dim, const Dimensions final_dim, 386 | int threads_count, vector* nearest); 387 | 388 | /** 389 | * This function calculates points coarse product quantizations 390 | * @param points all points 391 | * @param centroids centroids of clusters 392 | * @param threads_count number of threads 393 | * @param coarse_quantizations result quantizations 394 | */ 395 | void GetPointsCoarseQuaintizations(const Points& points, const vector& centroids, 396 | const int threads_count, 397 | vector* coarse_quantizations); 398 | 399 | 400 | /** 401 | * \struct All indexation parameters 402 | */ 403 | struct IndexConfig { 404 | RerankMode rerank_mode; 405 | vector fine_vocabs; 406 | }; 407 | 408 | /** 409 | * \struct Type of record in multiindex, contains 410 | * id of point and 8 bytes for ADC reranking 411 | */ 412 | struct RerankADC8 { 413 | PointId pid; 414 | FineClusterId quantizations[8]; 415 | template 416 | void serialize(Archive& arc, unsigned int version) { 417 | arc & pid; 418 | arc & quantizations; 419 | } 420 | }; 421 | 422 | /** 423 | * \struct Type of record in multiindex, contains 424 | * id of point and 16 bytes for ADC reranking 425 | */ 426 | struct RerankADC16 { 427 | PointId pid; 428 | FineClusterId quantizations[16]; 429 | template 430 | void serialize(Archive& arc, unsigned int version) { 431 | arc & pid; 432 | arc & quantizations; 433 | } 434 | }; 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | -------------------------------------------------------------------------------- /docs/doxygen.cfg: -------------------------------------------------------------------------------- 1 | # Doxyfile 1.7.6 2 | 3 | # This file describes the settings to be used by the documentation system 4 | # doxygen (www.doxygen.org) for a project. 5 | # 6 | # All text after a hash (#) is considered a comment and will be ignored. 7 | # The format is: 8 | # TAG = value [value, ...] 9 | # For lists items can also be appended using: 10 | # TAG += value [value, ...] 11 | # Values that contain spaces should be placed between quotes (" "). 12 | 13 | #--------------------------------------------------------------------------- 14 | # Project related configuration options 15 | #--------------------------------------------------------------------------- 16 | 17 | # This tag specifies the encoding used for all characters in the config file 18 | # that follow. The default is UTF-8 which is also the encoding used for all 19 | # text before the first occurrence of this tag. Doxygen uses libiconv (or the 20 | # iconv built into libc) for the transcoding. See 21 | # http://www.gnu.org/software/libiconv for the list of possible encodings. 22 | 23 | DOXYFILE_ENCODING = UTF-8 24 | 25 | # The PROJECT_NAME tag is a single word (or sequence of words) that should 26 | # identify the project. Note that if you do not use Doxywizard you need 27 | # to put quotes around the project name if it contains spaces. 28 | 29 | PROJECT_NAME = "MultiIndex" 30 | 31 | # The PROJECT_NUMBER tag can be used to enter a project or revision number. 32 | # This could be handy for archiving the generated documentation or 33 | # if some version control system is used. 34 | 35 | PROJECT_NUMBER = 36 | 37 | # Using the PROJECT_BRIEF tag one can provide an optional one line description 38 | # for a project that appears at the top of each page and should give viewer 39 | # a quick idea about the purpose of the project. Keep the description short. 40 | 41 | PROJECT_BRIEF = 42 | 43 | # With the PROJECT_LOGO tag one can specify an logo or icon that is 44 | # included in the documentation. The maximum height of the logo should not 45 | # exceed 55 pixels and the maximum width should not exceed 200 pixels. 46 | # Doxygen will copy the logo to the output directory. 47 | 48 | PROJECT_LOGO = 49 | 50 | # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 51 | # base path where the generated documentation will be put. 52 | # If a relative path is entered, it will be relative to the location 53 | # where doxygen was started. If left blank the current directory will be used. 54 | 55 | OUTPUT_DIRECTORY = "." 56 | 57 | # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 58 | # 4096 sub-directories (in 2 levels) under the output directory of each output 59 | # format and will distribute the generated files over these directories. 60 | # Enabling this option can be useful when feeding doxygen a huge amount of 61 | # source files, where putting all generated files in the same directory would 62 | # otherwise cause performance problems for the file system. 63 | 64 | CREATE_SUBDIRS = NO 65 | 66 | # The OUTPUT_LANGUAGE tag is used to specify the language in which all 67 | # documentation generated by doxygen is written. Doxygen will use this 68 | # information to generate all constant output in the proper language. 69 | # The default language is English, other supported languages are: 70 | # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, 71 | # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, 72 | # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English 73 | # messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, 74 | # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, 75 | # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. 76 | 77 | OUTPUT_LANGUAGE = English 78 | 79 | # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will 80 | # include brief member descriptions after the members that are listed in 81 | # the file and class documentation (similar to JavaDoc). 82 | # Set to NO to disable this. 83 | 84 | BRIEF_MEMBER_DESC = YES 85 | 86 | # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend 87 | # the brief description of a member or function before the detailed description. 88 | # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the 89 | # brief descriptions will be completely suppressed. 90 | 91 | REPEAT_BRIEF = YES 92 | 93 | # This tag implements a quasi-intelligent brief description abbreviator 94 | # that is used to form the text in various listings. Each string 95 | # in this list, if found as the leading text of the brief description, will be 96 | # stripped from the text and the result after processing the whole list, is 97 | # used as the annotated text. Otherwise, the brief description is used as-is. 98 | # If left blank, the following values are used ("$name" is automatically 99 | # replaced with the name of the entity): "The $name class" "The $name widget" 100 | # "The $name file" "is" "provides" "specifies" "contains" 101 | # "represents" "a" "an" "the" 102 | 103 | ABBREVIATE_BRIEF = 104 | 105 | # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then 106 | # Doxygen will generate a detailed section even if there is only a brief 107 | # description. 108 | 109 | ALWAYS_DETAILED_SEC = NO 110 | 111 | # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all 112 | # inherited members of a class in the documentation of that class as if those 113 | # members were ordinary class members. Constructors, destructors and assignment 114 | # operators of the base classes will not be shown. 115 | 116 | INLINE_INHERITED_MEMB = NO 117 | 118 | # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full 119 | # path before files name in the file list and in the header files. If set 120 | # to NO the shortest path that makes the file name unique will be used. 121 | 122 | FULL_PATH_NAMES = YES 123 | 124 | # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag 125 | # can be used to strip a user-defined part of the path. Stripping is 126 | # only done if one of the specified strings matches the left-hand part of 127 | # the path. The tag can be used to show relative paths in the file list. 128 | # If left blank the directory from which doxygen is run is used as the 129 | # path to strip. 130 | 131 | STRIP_FROM_PATH = 132 | 133 | # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of 134 | # the path mentioned in the documentation of a class, which tells 135 | # the reader which header file to include in order to use a class. 136 | # If left blank only the name of the header file containing the class 137 | # definition is used. Otherwise one should specify the include paths that 138 | # are normally passed to the compiler using the -I flag. 139 | 140 | STRIP_FROM_INC_PATH = 141 | 142 | # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter 143 | # (but less readable) file names. This can be useful if your file system 144 | # doesn't support long names like on DOS, Mac, or CD-ROM. 145 | 146 | SHORT_NAMES = NO 147 | 148 | # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen 149 | # will interpret the first line (until the first dot) of a JavaDoc-style 150 | # comment as the brief description. If set to NO, the JavaDoc 151 | # comments will behave just like regular Qt-style comments 152 | # (thus requiring an explicit @brief command for a brief description.) 153 | 154 | JAVADOC_AUTOBRIEF = NO 155 | 156 | # If the QT_AUTOBRIEF tag is set to YES then Doxygen will 157 | # interpret the first line (until the first dot) of a Qt-style 158 | # comment as the brief description. If set to NO, the comments 159 | # will behave just like regular Qt-style comments (thus requiring 160 | # an explicit \brief command for a brief description.) 161 | 162 | QT_AUTOBRIEF = NO 163 | 164 | # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen 165 | # treat a multi-line C++ special comment block (i.e. a block of //! or /// 166 | # comments) as a brief description. This used to be the default behaviour. 167 | # The new default is to treat a multi-line C++ comment block as a detailed 168 | # description. Set this tag to YES if you prefer the old behaviour instead. 169 | 170 | MULTILINE_CPP_IS_BRIEF = NO 171 | 172 | # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented 173 | # member inherits the documentation from any documented member that it 174 | # re-implements. 175 | 176 | INHERIT_DOCS = YES 177 | 178 | # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce 179 | # a new page for each member. If set to NO, the documentation of a member will 180 | # be part of the file/class/namespace that contains it. 181 | 182 | SEPARATE_MEMBER_PAGES = NO 183 | 184 | # The TAB_SIZE tag can be used to set the number of spaces in a tab. 185 | # Doxygen uses this value to replace tabs by spaces in code fragments. 186 | 187 | TAB_SIZE = 8 188 | 189 | # This tag can be used to specify a number of aliases that acts 190 | # as commands in the documentation. An alias has the form "name=value". 191 | # For example adding "sideeffect=\par Side Effects:\n" will allow you to 192 | # put the command \sideeffect (or @sideeffect) in the documentation, which 193 | # will result in a user-defined paragraph with heading "Side Effects:". 194 | # You can put \n's in the value part of an alias to insert newlines. 195 | 196 | ALIASES = 197 | 198 | # This tag can be used to specify a number of word-keyword mappings (TCL only). 199 | # A mapping has the form "name=value". For example adding 200 | # "class=itcl::class" will allow you to use the command class in the 201 | # itcl::class meaning. 202 | 203 | TCL_SUBST = 204 | 205 | # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C 206 | # sources only. Doxygen will then generate output that is more tailored for C. 207 | # For instance, some of the names that are used will be different. The list 208 | # of all members will be omitted, etc. 209 | 210 | OPTIMIZE_OUTPUT_FOR_C = NO 211 | 212 | # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java 213 | # sources only. Doxygen will then generate output that is more tailored for 214 | # Java. For instance, namespaces will be presented as packages, qualified 215 | # scopes will look different, etc. 216 | 217 | OPTIMIZE_OUTPUT_JAVA = NO 218 | 219 | # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran 220 | # sources only. Doxygen will then generate output that is more tailored for 221 | # Fortran. 222 | 223 | OPTIMIZE_FOR_FORTRAN = NO 224 | 225 | # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL 226 | # sources. Doxygen will then generate output that is tailored for 227 | # VHDL. 228 | 229 | OPTIMIZE_OUTPUT_VHDL = NO 230 | 231 | # Doxygen selects the parser to use depending on the extension of the files it 232 | # parses. With this tag you can assign which parser to use for a given extension. 233 | # Doxygen has a built-in mapping, but you can override or extend it using this 234 | # tag. The format is ext=language, where ext is a file extension, and language 235 | # is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, 236 | # C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make 237 | # doxygen treat .inc files as Fortran files (default is PHP), and .f files as C 238 | # (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions 239 | # you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. 240 | 241 | EXTENSION_MAPPING = 242 | 243 | # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want 244 | # to include (a tag file for) the STL sources as input, then you should 245 | # set this tag to YES in order to let doxygen match functions declarations and 246 | # definitions whose arguments contain STL classes (e.g. func(std::string); v.s. 247 | # func(std::string) {}). This also makes the inheritance and collaboration 248 | # diagrams that involve STL classes more complete and accurate. 249 | 250 | BUILTIN_STL_SUPPORT = YES 251 | 252 | # If you use Microsoft's C++/CLI language, you should set this option to YES to 253 | # enable parsing support. 254 | 255 | CPP_CLI_SUPPORT = NO 256 | 257 | # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. 258 | # Doxygen will parse them like normal C++ but will assume all classes use public 259 | # instead of private inheritance when no explicit protection keyword is present. 260 | 261 | SIP_SUPPORT = NO 262 | 263 | # For Microsoft's IDL there are propget and propput attributes to indicate getter 264 | # and setter methods for a property. Setting this option to YES (the default) 265 | # will make doxygen replace the get and set methods by a property in the 266 | # documentation. This will only work if the methods are indeed getting or 267 | # setting a simple type. If this is not the case, or you want to show the 268 | # methods anyway, you should set this option to NO. 269 | 270 | IDL_PROPERTY_SUPPORT = YES 271 | 272 | # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC 273 | # tag is set to YES, then doxygen will reuse the documentation of the first 274 | # member in the group (if any) for the other members of the group. By default 275 | # all members of a group must be documented explicitly. 276 | 277 | DISTRIBUTE_GROUP_DOC = NO 278 | 279 | # Set the SUBGROUPING tag to YES (the default) to allow class member groups of 280 | # the same type (for instance a group of public functions) to be put as a 281 | # subgroup of that type (e.g. under the Public Functions section). Set it to 282 | # NO to prevent subgrouping. Alternatively, this can be done per class using 283 | # the \nosubgrouping command. 284 | 285 | SUBGROUPING = YES 286 | 287 | # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and 288 | # unions are shown inside the group in which they are included (e.g. using 289 | # @ingroup) instead of on a separate page (for HTML and Man pages) or 290 | # section (for LaTeX and RTF). 291 | 292 | INLINE_GROUPED_CLASSES = NO 293 | 294 | # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and 295 | # unions with only public data fields will be shown inline in the documentation 296 | # of the scope in which they are defined (i.e. file, namespace, or group 297 | # documentation), provided this scope is documented. If set to NO (the default), 298 | # structs, classes, and unions are shown on a separate page (for HTML and Man 299 | # pages) or section (for LaTeX and RTF). 300 | 301 | INLINE_SIMPLE_STRUCTS = NO 302 | 303 | # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum 304 | # is documented as struct, union, or enum with the name of the typedef. So 305 | # typedef struct TypeS {} TypeT, will appear in the documentation as a struct 306 | # with name TypeT. When disabled the typedef will appear as a member of a file, 307 | # namespace, or class. And the struct will be named TypeS. This can typically 308 | # be useful for C code in case the coding convention dictates that all compound 309 | # types are typedef'ed and only the typedef is referenced, never the tag name. 310 | 311 | TYPEDEF_HIDES_STRUCT = NO 312 | 313 | # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to 314 | # determine which symbols to keep in memory and which to flush to disk. 315 | # When the cache is full, less often used symbols will be written to disk. 316 | # For small to medium size projects (<1000 input files) the default value is 317 | # probably good enough. For larger projects a too small cache size can cause 318 | # doxygen to be busy swapping symbols to and from disk most of the time 319 | # causing a significant performance penalty. 320 | # If the system has enough physical memory increasing the cache will improve the 321 | # performance by keeping more symbols in memory. Note that the value works on 322 | # a logarithmic scale so increasing the size by one will roughly double the 323 | # memory usage. The cache size is given by this formula: 324 | # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, 325 | # corresponding to a cache size of 2^16 = 65536 symbols 326 | 327 | SYMBOL_CACHE_SIZE = 0 328 | 329 | #--------------------------------------------------------------------------- 330 | # Build related configuration options 331 | #--------------------------------------------------------------------------- 332 | 333 | # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in 334 | # documentation are documented, even if no documentation was available. 335 | # Private class members and static file members will be hidden unless 336 | # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES 337 | 338 | EXTRACT_ALL = YES 339 | 340 | # If the EXTRACT_PRIVATE tag is set to YES all private members of a class 341 | # will be included in the documentation. 342 | 343 | EXTRACT_PRIVATE = YES 344 | 345 | # If the EXTRACT_STATIC tag is set to YES all static members of a file 346 | # will be included in the documentation. 347 | 348 | EXTRACT_STATIC = YES 349 | 350 | # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) 351 | # defined locally in source files will be included in the documentation. 352 | # If set to NO only classes defined in header files are included. 353 | 354 | EXTRACT_LOCAL_CLASSES = YES 355 | 356 | # This flag is only useful for Objective-C code. When set to YES local 357 | # methods, which are defined in the implementation section but not in 358 | # the interface are included in the documentation. 359 | # If set to NO (the default) only methods in the interface are included. 360 | 361 | EXTRACT_LOCAL_METHODS = NO 362 | 363 | # If this flag is set to YES, the members of anonymous namespaces will be 364 | # extracted and appear in the documentation as a namespace called 365 | # 'anonymous_namespace{file}', where file will be replaced with the base 366 | # name of the file that contains the anonymous namespace. By default 367 | # anonymous namespaces are hidden. 368 | 369 | EXTRACT_ANON_NSPACES = NO 370 | 371 | # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all 372 | # undocumented members of documented classes, files or namespaces. 373 | # If set to NO (the default) these members will be included in the 374 | # various overviews, but no documentation section is generated. 375 | # This option has no effect if EXTRACT_ALL is enabled. 376 | 377 | HIDE_UNDOC_MEMBERS = NO 378 | 379 | # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all 380 | # undocumented classes that are normally visible in the class hierarchy. 381 | # If set to NO (the default) these classes will be included in the various 382 | # overviews. This option has no effect if EXTRACT_ALL is enabled. 383 | 384 | HIDE_UNDOC_CLASSES = NO 385 | 386 | # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all 387 | # friend (class|struct|union) declarations. 388 | # If set to NO (the default) these declarations will be included in the 389 | # documentation. 390 | 391 | HIDE_FRIEND_COMPOUNDS = NO 392 | 393 | # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any 394 | # documentation blocks found inside the body of a function. 395 | # If set to NO (the default) these blocks will be appended to the 396 | # function's detailed documentation block. 397 | 398 | HIDE_IN_BODY_DOCS = NO 399 | 400 | # The INTERNAL_DOCS tag determines if documentation 401 | # that is typed after a \internal command is included. If the tag is set 402 | # to NO (the default) then the documentation will be excluded. 403 | # Set it to YES to include the internal documentation. 404 | 405 | INTERNAL_DOCS = NO 406 | 407 | # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate 408 | # file names in lower-case letters. If set to YES upper-case letters are also 409 | # allowed. This is useful if you have classes or files whose names only differ 410 | # in case and if your file system supports case sensitive file names. Windows 411 | # and Mac users are advised to set this option to NO. 412 | 413 | CASE_SENSE_NAMES = NO 414 | 415 | # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen 416 | # will show members with their full class and namespace scopes in the 417 | # documentation. If set to YES the scope will be hidden. 418 | 419 | HIDE_SCOPE_NAMES = NO 420 | 421 | # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen 422 | # will put a list of the files that are included by a file in the documentation 423 | # of that file. 424 | 425 | SHOW_INCLUDE_FILES = YES 426 | 427 | # If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen 428 | # will list include files with double quotes in the documentation 429 | # rather than with sharp brackets. 430 | 431 | FORCE_LOCAL_INCLUDES = NO 432 | 433 | # If the INLINE_INFO tag is set to YES (the default) then a tag [inline] 434 | # is inserted in the documentation for inline members. 435 | 436 | INLINE_INFO = YES 437 | 438 | # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen 439 | # will sort the (detailed) documentation of file and class members 440 | # alphabetically by member name. If set to NO the members will appear in 441 | # declaration order. 442 | 443 | SORT_MEMBER_DOCS = YES 444 | 445 | # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the 446 | # brief documentation of file, namespace and class members alphabetically 447 | # by member name. If set to NO (the default) the members will appear in 448 | # declaration order. 449 | 450 | SORT_BRIEF_DOCS = NO 451 | 452 | # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen 453 | # will sort the (brief and detailed) documentation of class members so that 454 | # constructors and destructors are listed first. If set to NO (the default) 455 | # the constructors will appear in the respective orders defined by 456 | # SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. 457 | # This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO 458 | # and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. 459 | 460 | SORT_MEMBERS_CTORS_1ST = NO 461 | 462 | # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the 463 | # hierarchy of group names into alphabetical order. If set to NO (the default) 464 | # the group names will appear in their defined order. 465 | 466 | SORT_GROUP_NAMES = NO 467 | 468 | # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be 469 | # sorted by fully-qualified names, including namespaces. If set to 470 | # NO (the default), the class list will be sorted only by class name, 471 | # not including the namespace part. 472 | # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. 473 | # Note: This option applies only to the class list, not to the 474 | # alphabetical list. 475 | 476 | SORT_BY_SCOPE_NAME = NO 477 | 478 | # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to 479 | # do proper type resolution of all parameters of a function it will reject a 480 | # match between the prototype and the implementation of a member function even 481 | # if there is only one candidate or it is obvious which candidate to choose 482 | # by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen 483 | # will still accept a match between prototype and implementation in such cases. 484 | 485 | STRICT_PROTO_MATCHING = NO 486 | 487 | # The GENERATE_TODOLIST tag can be used to enable (YES) or 488 | # disable (NO) the todo list. This list is created by putting \todo 489 | # commands in the documentation. 490 | 491 | GENERATE_TODOLIST = YES 492 | 493 | # The GENERATE_TESTLIST tag can be used to enable (YES) or 494 | # disable (NO) the test list. This list is created by putting \test 495 | # commands in the documentation. 496 | 497 | GENERATE_TESTLIST = YES 498 | 499 | # The GENERATE_BUGLIST tag can be used to enable (YES) or 500 | # disable (NO) the bug list. This list is created by putting \bug 501 | # commands in the documentation. 502 | 503 | GENERATE_BUGLIST = YES 504 | 505 | # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or 506 | # disable (NO) the deprecated list. This list is created by putting 507 | # \deprecated commands in the documentation. 508 | 509 | GENERATE_DEPRECATEDLIST= YES 510 | 511 | # The ENABLED_SECTIONS tag can be used to enable conditional 512 | # documentation sections, marked by \if sectionname ... \endif. 513 | 514 | ENABLED_SECTIONS = 515 | 516 | # The MAX_INITIALIZER_LINES tag determines the maximum number of lines 517 | # the initial value of a variable or macro consists of for it to appear in 518 | # the documentation. If the initializer consists of more lines than specified 519 | # here it will be hidden. Use a value of 0 to hide initializers completely. 520 | # The appearance of the initializer of individual variables and macros in the 521 | # documentation can be controlled using \showinitializer or \hideinitializer 522 | # command in the documentation regardless of this setting. 523 | 524 | MAX_INITIALIZER_LINES = 30 525 | 526 | # Set the SHOW_USED_FILES tag to NO to disable the list of files generated 527 | # at the bottom of the documentation of classes and structs. If set to YES the 528 | # list will mention the files that were used to generate the documentation. 529 | 530 | SHOW_USED_FILES = YES 531 | 532 | # If the sources in your project are distributed over multiple directories 533 | # then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy 534 | # in the documentation. The default is NO. 535 | 536 | SHOW_DIRECTORIES = NO 537 | 538 | # Set the SHOW_FILES tag to NO to disable the generation of the Files page. 539 | # This will remove the Files entry from the Quick Index and from the 540 | # Folder Tree View (if specified). The default is YES. 541 | 542 | SHOW_FILES = YES 543 | 544 | # Set the SHOW_NAMESPACES tag to NO to disable the generation of the 545 | # Namespaces page. 546 | # This will remove the Namespaces entry from the Quick Index 547 | # and from the Folder Tree View (if specified). The default is YES. 548 | 549 | SHOW_NAMESPACES = YES 550 | 551 | # The FILE_VERSION_FILTER tag can be used to specify a program or script that 552 | # doxygen should invoke to get the current version for each file (typically from 553 | # the version control system). Doxygen will invoke the program by executing (via 554 | # popen()) the command , where is the value of 555 | # the FILE_VERSION_FILTER tag, and is the name of an input file 556 | # provided by doxygen. Whatever the program writes to standard output 557 | # is used as the file version. See the manual for examples. 558 | 559 | FILE_VERSION_FILTER = 560 | 561 | # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed 562 | # by doxygen. The layout file controls the global structure of the generated 563 | # output files in an output format independent way. The create the layout file 564 | # that represents doxygen's defaults, run doxygen with the -l option. 565 | # You can optionally specify a file name after the option, if omitted 566 | # DoxygenLayout.xml will be used as the name of the layout file. 567 | 568 | LAYOUT_FILE = 569 | 570 | # The CITE_BIB_FILES tag can be used to specify one or more bib files 571 | # containing the references data. This must be a list of .bib files. The 572 | # .bib extension is automatically appended if omitted. Using this command 573 | # requires the bibtex tool to be installed. See also 574 | # http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style 575 | # of the bibliography can be controlled using LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the search path. 576 | 577 | CITE_BIB_FILES = 578 | 579 | #--------------------------------------------------------------------------- 580 | # configuration options related to warning and progress messages 581 | #--------------------------------------------------------------------------- 582 | 583 | # The QUIET tag can be used to turn on/off the messages that are generated 584 | # by doxygen. Possible values are YES and NO. If left blank NO is used. 585 | 586 | QUIET = NO 587 | 588 | # The WARNINGS tag can be used to turn on/off the warning messages that are 589 | # generated by doxygen. Possible values are YES and NO. If left blank 590 | # NO is used. 591 | 592 | WARNINGS = YES 593 | 594 | # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings 595 | # for undocumented members. If EXTRACT_ALL is set to YES then this flag will 596 | # automatically be disabled. 597 | 598 | WARN_IF_UNDOCUMENTED = YES 599 | 600 | # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for 601 | # potential errors in the documentation, such as not documenting some 602 | # parameters in a documented function, or documenting parameters that 603 | # don't exist or using markup commands wrongly. 604 | 605 | WARN_IF_DOC_ERROR = YES 606 | 607 | # The WARN_NO_PARAMDOC option can be enabled to get warnings for 608 | # functions that are documented, but have no documentation for their parameters 609 | # or return value. If set to NO (the default) doxygen will only warn about 610 | # wrong or incomplete parameter documentation, but not about the absence of 611 | # documentation. 612 | 613 | WARN_NO_PARAMDOC = NO 614 | 615 | # The WARN_FORMAT tag determines the format of the warning messages that 616 | # doxygen can produce. The string should contain the $file, $line, and $text 617 | # tags, which will be replaced by the file and line number from which the 618 | # warning originated and the warning text. Optionally the format may contain 619 | # $version, which will be replaced by the version of the file (if it could 620 | # be obtained via FILE_VERSION_FILTER) 621 | 622 | WARN_FORMAT = "$file:$line: $text" 623 | 624 | # The WARN_LOGFILE tag can be used to specify a file to which warning 625 | # and error messages should be written. If left blank the output is written 626 | # to stderr. 627 | 628 | WARN_LOGFILE = 629 | 630 | #--------------------------------------------------------------------------- 631 | # configuration options related to the input files 632 | #--------------------------------------------------------------------------- 633 | 634 | # The INPUT tag can be used to specify the files and/or directories that contain 635 | # documented source files. You may enter file names like "myfile.cpp" or 636 | # directories like "/usr/src/myproject". Separate the files or directories 637 | # with spaces. 638 | 639 | INPUT = 640 | 641 | # This tag can be used to specify the character encoding of the source files 642 | # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is 643 | # also the default input encoding. Doxygen uses libiconv (or the iconv built 644 | # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for 645 | # the list of possible encodings. 646 | 647 | INPUT_ENCODING = UTF-8 648 | 649 | # If the value of the INPUT tag contains directories, you can use the 650 | # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 651 | # and *.h) to filter out the source-files in the directories. If left 652 | # blank the following patterns are tested: 653 | # *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh 654 | # *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py 655 | # *.f90 *.f *.for *.vhd *.vhdl 656 | 657 | FILE_PATTERNS = 658 | 659 | # The RECURSIVE tag can be used to turn specify whether or not subdirectories 660 | # should be searched for input files as well. Possible values are YES and NO. 661 | # If left blank NO is used. 662 | 663 | RECURSIVE = YES 664 | 665 | # The EXCLUDE tag can be used to specify files and/or directories that should be 666 | # excluded from the INPUT source files. This way you can easily exclude a 667 | # subdirectory from a directory tree whose root is specified with the INPUT tag. 668 | # Note that relative paths are relative to the directory from which doxygen is 669 | # run. 670 | 671 | EXCLUDE = "doc/examples" 672 | 673 | # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or 674 | # directories that are symbolic links (a Unix file system feature) are excluded 675 | # from the input. 676 | 677 | EXCLUDE_SYMLINKS = NO 678 | 679 | # If the value of the INPUT tag contains directories, you can use the 680 | # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude 681 | # certain files from those directories. Note that the wildcards are matched 682 | # against the file with absolute path, so to exclude all test directories 683 | # for example use the pattern */test/* 684 | 685 | EXCLUDE_PATTERNS = 686 | 687 | # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names 688 | # (namespaces, classes, functions, etc.) that should be excluded from the 689 | # output. The symbol name can be a fully qualified name, a word, or if the 690 | # wildcard * is used, a substring. Examples: ANamespace, AClass, 691 | # AClass::ANamespace, ANamespace::*Test 692 | 693 | EXCLUDE_SYMBOLS = 694 | 695 | # The EXAMPLE_PATH tag can be used to specify one or more files or 696 | # directories that contain example code fragments that are included (see 697 | # the \include command). 698 | 699 | EXAMPLE_PATH = "doc/examples" 700 | 701 | # If the value of the EXAMPLE_PATH tag contains directories, you can use the 702 | # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 703 | # and *.h) to filter out the source-files in the directories. If left 704 | # blank all files are included. 705 | 706 | EXAMPLE_PATTERNS = 707 | 708 | # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be 709 | # searched for input files to be used with the \include or \dontinclude 710 | # commands irrespective of the value of the RECURSIVE tag. 711 | # Possible values are YES and NO. If left blank NO is used. 712 | 713 | EXAMPLE_RECURSIVE = YES 714 | 715 | # The IMAGE_PATH tag can be used to specify one or more files or 716 | # directories that contain image that are included in the documentation (see 717 | # the \image command). 718 | 719 | IMAGE_PATH = "docs/pictures" 720 | 721 | # The INPUT_FILTER tag can be used to specify a program that doxygen should 722 | # invoke to filter for each input file. Doxygen will invoke the filter program 723 | # by executing (via popen()) the command , where 724 | # is the value of the INPUT_FILTER tag, and is the name of an 725 | # input file. Doxygen will then use the output that the filter program writes 726 | # to standard output. 727 | # If FILTER_PATTERNS is specified, this tag will be 728 | # ignored. 729 | 730 | INPUT_FILTER = 731 | 732 | # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern 733 | # basis. 734 | # Doxygen will compare the file name with each pattern and apply the 735 | # filter if there is a match. 736 | # The filters are a list of the form: 737 | # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further 738 | # info on how filters are used. If FILTER_PATTERNS is empty or if 739 | # non of the patterns match the file name, INPUT_FILTER is applied. 740 | 741 | FILTER_PATTERNS = 742 | 743 | # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using 744 | # INPUT_FILTER) will be used to filter the input files when producing source 745 | # files to browse (i.e. when SOURCE_BROWSER is set to YES). 746 | 747 | FILTER_SOURCE_FILES = NO 748 | 749 | # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file 750 | # pattern. A pattern will override the setting for FILTER_PATTERN (if any) 751 | # and it is also possible to disable source filtering for a specific pattern 752 | # using *.ext= (so without naming a filter). This option only has effect when 753 | # FILTER_SOURCE_FILES is enabled. 754 | 755 | FILTER_SOURCE_PATTERNS = 756 | 757 | #--------------------------------------------------------------------------- 758 | # configuration options related to source browsing 759 | #--------------------------------------------------------------------------- 760 | 761 | # If the SOURCE_BROWSER tag is set to YES then a list of source files will 762 | # be generated. Documented entities will be cross-referenced with these sources. 763 | # Note: To get rid of all source code in the generated output, make sure also 764 | # VERBATIM_HEADERS is set to NO. 765 | 766 | SOURCE_BROWSER = NO 767 | 768 | # Setting the INLINE_SOURCES tag to YES will include the body 769 | # of functions and classes directly in the documentation. 770 | 771 | INLINE_SOURCES = NO 772 | 773 | # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct 774 | # doxygen to hide any special comment blocks from generated source code 775 | # fragments. Normal C and C++ comments will always remain visible. 776 | 777 | STRIP_CODE_COMMENTS = YES 778 | 779 | # If the REFERENCED_BY_RELATION tag is set to YES 780 | # then for each documented function all documented 781 | # functions referencing it will be listed. 782 | 783 | REFERENCED_BY_RELATION = NO 784 | 785 | # If the REFERENCES_RELATION tag is set to YES 786 | # then for each documented function all documented entities 787 | # called/used by that function will be listed. 788 | 789 | REFERENCES_RELATION = NO 790 | 791 | # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) 792 | # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from 793 | # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will 794 | # link to the source code. 795 | # Otherwise they will link to the documentation. 796 | 797 | REFERENCES_LINK_SOURCE = YES 798 | 799 | # If the USE_HTAGS tag is set to YES then the references to source code 800 | # will point to the HTML generated by the htags(1) tool instead of doxygen 801 | # built-in source browser. The htags tool is part of GNU's global source 802 | # tagging system (see http://www.gnu.org/software/global/global.html). You 803 | # will need version 4.8.6 or higher. 804 | 805 | USE_HTAGS = NO 806 | 807 | # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen 808 | # will generate a verbatim copy of the header file for each class for 809 | # which an include is specified. Set to NO to disable this. 810 | 811 | VERBATIM_HEADERS = YES 812 | 813 | #--------------------------------------------------------------------------- 814 | # configuration options related to the alphabetical class index 815 | #--------------------------------------------------------------------------- 816 | 817 | # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index 818 | # of all compounds will be generated. Enable this if the project 819 | # contains a lot of classes, structs, unions or interfaces. 820 | 821 | ALPHABETICAL_INDEX = YES 822 | 823 | # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then 824 | # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns 825 | # in which this list will be split (can be a number in the range [1..20]) 826 | 827 | COLS_IN_ALPHA_INDEX = 5 828 | 829 | # In case all classes in a project start with a common prefix, all 830 | # classes will be put under the same header in the alphabetical index. 831 | # The IGNORE_PREFIX tag can be used to specify one or more prefixes that 832 | # should be ignored while generating the index headers. 833 | 834 | IGNORE_PREFIX = 835 | 836 | #--------------------------------------------------------------------------- 837 | # configuration options related to the HTML output 838 | #--------------------------------------------------------------------------- 839 | 840 | # If the GENERATE_HTML tag is set to YES (the default) Doxygen will 841 | # generate HTML output. 842 | 843 | GENERATE_HTML = YES 844 | 845 | # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. 846 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be 847 | # put in front of it. If left blank `html' will be used as the default path. 848 | 849 | HTML_OUTPUT = html 850 | 851 | # The HTML_FILE_EXTENSION tag can be used to specify the file extension for 852 | # each generated HTML page (for example: .htm,.php,.asp). If it is left blank 853 | # doxygen will generate files with .html extension. 854 | 855 | HTML_FILE_EXTENSION = .html 856 | 857 | # The HTML_HEADER tag can be used to specify a personal HTML header for 858 | # each generated HTML page. If it is left blank doxygen will generate a 859 | # standard header. Note that when using a custom header you are responsible 860 | # for the proper inclusion of any scripts and style sheets that doxygen 861 | # needs, which is dependent on the configuration options used. 862 | # It is advised to generate a default header using "doxygen -w html 863 | # header.html footer.html stylesheet.css YourConfigFile" and then modify 864 | # that header. Note that the header is subject to change so you typically 865 | # have to redo this when upgrading to a newer version of doxygen or when 866 | # changing the value of configuration settings such as GENERATE_TREEVIEW! 867 | 868 | HTML_HEADER = 869 | 870 | # The HTML_FOOTER tag can be used to specify a personal HTML footer for 871 | # each generated HTML page. If it is left blank doxygen will generate a 872 | # standard footer. 873 | 874 | HTML_FOOTER = 875 | 876 | # The HTML_STYLESHEET tag can be used to specify a user-defined cascading 877 | # style sheet that is used by each HTML page. It can be used to 878 | # fine-tune the look of the HTML output. If the tag is left blank doxygen 879 | # will generate a default style sheet. Note that doxygen will try to copy 880 | # the style sheet file to the HTML output directory, so don't put your own 881 | # style sheet in the HTML output directory as well, or it will be erased! 882 | 883 | HTML_STYLESHEET = 884 | 885 | # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or 886 | # other source files which should be copied to the HTML output directory. Note 887 | # that these files will be copied to the base HTML output directory. Use the 888 | # $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these 889 | # files. In the HTML_STYLESHEET file, use the file name only. Also note that 890 | # the files will be copied as-is; there are no commands or markers available. 891 | 892 | HTML_EXTRA_FILES = 893 | 894 | # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. 895 | # Doxygen will adjust the colors in the style sheet and background images 896 | # according to this color. Hue is specified as an angle on a colorwheel, 897 | # see http://en.wikipedia.org/wiki/Hue for more information. 898 | # For instance the value 0 represents red, 60 is yellow, 120 is green, 899 | # 180 is cyan, 240 is blue, 300 purple, and 360 is red again. 900 | # The allowed range is 0 to 359. 901 | 902 | HTML_COLORSTYLE_HUE = 220 903 | 904 | # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of 905 | # the colors in the HTML output. For a value of 0 the output will use 906 | # grayscales only. A value of 255 will produce the most vivid colors. 907 | 908 | HTML_COLORSTYLE_SAT = 100 909 | 910 | # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to 911 | # the luminance component of the colors in the HTML output. Values below 912 | # 100 gradually make the output lighter, whereas values above 100 make 913 | # the output darker. The value divided by 100 is the actual gamma applied, 914 | # so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, 915 | # and 100 does not change the gamma. 916 | 917 | HTML_COLORSTYLE_GAMMA = 80 918 | 919 | # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML 920 | # page will contain the date and time when the page was generated. Setting 921 | # this to NO can help when comparing the output of multiple runs. 922 | 923 | HTML_TIMESTAMP = YES 924 | 925 | # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, 926 | # files or namespaces will be aligned in HTML using tables. If set to 927 | # NO a bullet list will be used. 928 | 929 | HTML_ALIGN_MEMBERS = YES 930 | 931 | # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML 932 | # documentation will contain sections that can be hidden and shown after the 933 | # page has loaded. For this to work a browser that supports 934 | # JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox 935 | # Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). 936 | 937 | HTML_DYNAMIC_SECTIONS = NO 938 | 939 | # If the GENERATE_DOCSET tag is set to YES, additional index files 940 | # will be generated that can be used as input for Apple's Xcode 3 941 | # integrated development environment, introduced with OSX 10.5 (Leopard). 942 | # To create a documentation set, doxygen will generate a Makefile in the 943 | # HTML output directory. Running make will produce the docset in that 944 | # directory and running "make install" will install the docset in 945 | # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find 946 | # it at startup. 947 | # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html 948 | # for more information. 949 | 950 | GENERATE_DOCSET = NO 951 | 952 | # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the 953 | # feed. A documentation feed provides an umbrella under which multiple 954 | # documentation sets from a single provider (such as a company or product suite) 955 | # can be grouped. 956 | 957 | DOCSET_FEEDNAME = "Doxygen generated docs" 958 | 959 | # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that 960 | # should uniquely identify the documentation set bundle. This should be a 961 | # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen 962 | # will append .docset to the name. 963 | 964 | DOCSET_BUNDLE_ID = org.doxygen.Project 965 | 966 | # When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify 967 | # the documentation publisher. This should be a reverse domain-name style 968 | # string, e.g. com.mycompany.MyDocSet.documentation. 969 | 970 | DOCSET_PUBLISHER_ID = org.doxygen.Publisher 971 | 972 | # The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. 973 | 974 | DOCSET_PUBLISHER_NAME = Publisher 975 | 976 | # If the GENERATE_HTMLHELP tag is set to YES, additional index files 977 | # will be generated that can be used as input for tools like the 978 | # Microsoft HTML help workshop to generate a compiled HTML help file (.chm) 979 | # of the generated HTML documentation. 980 | 981 | GENERATE_HTMLHELP = NO 982 | 983 | # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can 984 | # be used to specify the file name of the resulting .chm file. You 985 | # can add a path in front of the file if the result should not be 986 | # written to the html output directory. 987 | 988 | CHM_FILE = 989 | 990 | # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can 991 | # be used to specify the location (absolute path including file name) of 992 | # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run 993 | # the HTML help compiler on the generated index.hhp. 994 | 995 | HHC_LOCATION = 996 | 997 | # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag 998 | # controls if a separate .chi index file is generated (YES) or that 999 | # it should be included in the master .chm file (NO). 1000 | 1001 | GENERATE_CHI = NO 1002 | 1003 | # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING 1004 | # is used to encode HtmlHelp index (hhk), content (hhc) and project file 1005 | # content. 1006 | 1007 | CHM_INDEX_ENCODING = 1008 | 1009 | # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag 1010 | # controls whether a binary table of contents is generated (YES) or a 1011 | # normal table of contents (NO) in the .chm file. 1012 | 1013 | BINARY_TOC = NO 1014 | 1015 | # The TOC_EXPAND flag can be set to YES to add extra items for group members 1016 | # to the contents of the HTML help documentation and to the tree view. 1017 | 1018 | TOC_EXPAND = NO 1019 | 1020 | # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and 1021 | # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated 1022 | # that can be used as input for Qt's qhelpgenerator to generate a 1023 | # Qt Compressed Help (.qch) of the generated HTML documentation. 1024 | 1025 | GENERATE_QHP = NO 1026 | 1027 | # If the QHG_LOCATION tag is specified, the QCH_FILE tag can 1028 | # be used to specify the file name of the resulting .qch file. 1029 | # The path specified is relative to the HTML output folder. 1030 | 1031 | QCH_FILE = 1032 | 1033 | # The QHP_NAMESPACE tag specifies the namespace to use when generating 1034 | # Qt Help Project output. For more information please see 1035 | # http://doc.trolltech.com/qthelpproject.html#namespace 1036 | 1037 | QHP_NAMESPACE = org.doxygen.Project 1038 | 1039 | # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating 1040 | # Qt Help Project output. For more information please see 1041 | # http://doc.trolltech.com/qthelpproject.html#virtual-folders 1042 | 1043 | QHP_VIRTUAL_FOLDER = doc 1044 | 1045 | # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to 1046 | # add. For more information please see 1047 | # http://doc.trolltech.com/qthelpproject.html#custom-filters 1048 | 1049 | QHP_CUST_FILTER_NAME = 1050 | 1051 | # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the 1052 | # custom filter to add. For more information please see 1053 | # 1054 | # Qt Help Project / Custom Filters. 1055 | 1056 | QHP_CUST_FILTER_ATTRS = 1057 | 1058 | # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this 1059 | # project's 1060 | # filter section matches. 1061 | # 1062 | # Qt Help Project / Filter Attributes. 1063 | 1064 | QHP_SECT_FILTER_ATTRS = 1065 | 1066 | # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can 1067 | # be used to specify the location of Qt's qhelpgenerator. 1068 | # If non-empty doxygen will try to run qhelpgenerator on the generated 1069 | # .qhp file. 1070 | 1071 | QHG_LOCATION = 1072 | 1073 | # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files 1074 | # will be generated, which together with the HTML files, form an Eclipse help 1075 | # plugin. To install this plugin and make it available under the help contents 1076 | # menu in Eclipse, the contents of the directory containing the HTML and XML 1077 | # files needs to be copied into the plugins directory of eclipse. The name of 1078 | # the directory within the plugins directory should be the same as 1079 | # the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before 1080 | # the help appears. 1081 | 1082 | GENERATE_ECLIPSEHELP = NO 1083 | 1084 | # A unique identifier for the eclipse help plugin. When installing the plugin 1085 | # the directory name containing the HTML and XML files should also have 1086 | # this name. 1087 | 1088 | ECLIPSE_DOC_ID = org.doxygen.Project 1089 | 1090 | # The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) 1091 | # at top of each HTML page. The value NO (the default) enables the index and 1092 | # the value YES disables it. Since the tabs have the same information as the 1093 | # navigation tree you can set this option to NO if you already set 1094 | # GENERATE_TREEVIEW to YES. 1095 | 1096 | DISABLE_INDEX = NO 1097 | 1098 | # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index 1099 | # structure should be generated to display hierarchical information. 1100 | # If the tag value is set to YES, a side panel will be generated 1101 | # containing a tree-like index structure (just like the one that 1102 | # is generated for HTML Help). For this to work a browser that supports 1103 | # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). 1104 | # Windows users are probably better off using the HTML help feature. 1105 | # Since the tree basically has the same information as the tab index you 1106 | # could consider to set DISABLE_INDEX to NO when enabling this option. 1107 | 1108 | GENERATE_TREEVIEW = NO 1109 | 1110 | # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values 1111 | # (range [0,1..20]) that doxygen will group on one line in the generated HTML 1112 | # documentation. Note that a value of 0 will completely suppress the enum 1113 | # values from appearing in the overview section. 1114 | 1115 | ENUM_VALUES_PER_LINE = 4 1116 | 1117 | # By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, 1118 | # and Class Hierarchy pages using a tree view instead of an ordered list. 1119 | 1120 | USE_INLINE_TREES = NO 1121 | 1122 | # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be 1123 | # used to set the initial width (in pixels) of the frame in which the tree 1124 | # is shown. 1125 | 1126 | TREEVIEW_WIDTH = 250 1127 | 1128 | # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open 1129 | # links to external symbols imported via tag files in a separate window. 1130 | 1131 | EXT_LINKS_IN_WINDOW = NO 1132 | 1133 | # Use this tag to change the font size of Latex formulas included 1134 | # as images in the HTML documentation. The default is 10. Note that 1135 | # when you change the font size after a successful doxygen run you need 1136 | # to manually remove any form_*.png images from the HTML output directory 1137 | # to force them to be regenerated. 1138 | 1139 | FORMULA_FONTSIZE = 10 1140 | 1141 | # Use the FORMULA_TRANPARENT tag to determine whether or not the images 1142 | # generated for formulas are transparent PNGs. Transparent PNGs are 1143 | # not supported properly for IE 6.0, but are supported on all modern browsers. 1144 | # Note that when changing this option you need to delete any form_*.png files 1145 | # in the HTML output before the changes have effect. 1146 | 1147 | FORMULA_TRANSPARENT = YES 1148 | 1149 | # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax 1150 | # (see http://www.mathjax.org) which uses client side Javascript for the 1151 | # rendering instead of using prerendered bitmaps. Use this if you do not 1152 | # have LaTeX installed or if you want to formulas look prettier in the HTML 1153 | # output. When enabled you also need to install MathJax separately and 1154 | # configure the path to it using the MATHJAX_RELPATH option. 1155 | 1156 | USE_MATHJAX = NO 1157 | 1158 | # When MathJax is enabled you need to specify the location relative to the 1159 | # HTML output directory using the MATHJAX_RELPATH option. The destination 1160 | # directory should contain the MathJax.js script. For instance, if the mathjax 1161 | # directory is located at the same level as the HTML output directory, then 1162 | # MATHJAX_RELPATH should be ../mathjax. The default value points to the 1163 | # mathjax.org site, so you can quickly see the result without installing 1164 | # MathJax, but it is strongly recommended to install a local copy of MathJax 1165 | # before deployment. 1166 | 1167 | MATHJAX_RELPATH = http://www.mathjax.org/mathjax 1168 | 1169 | # The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension 1170 | # names that should be enabled during MathJax rendering. 1171 | 1172 | MATHJAX_EXTENSIONS = 1173 | 1174 | # When the SEARCHENGINE tag is enabled doxygen will generate a search box 1175 | # for the HTML output. The underlying search engine uses javascript 1176 | # and DHTML and should work on any modern browser. Note that when using 1177 | # HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets 1178 | # (GENERATE_DOCSET) there is already a search function so this one should 1179 | # typically be disabled. For large projects the javascript based search engine 1180 | # can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. 1181 | 1182 | SEARCHENGINE = YES 1183 | 1184 | # When the SERVER_BASED_SEARCH tag is enabled the search engine will be 1185 | # implemented using a PHP enabled web server instead of at the web client 1186 | # using Javascript. Doxygen will generate the search PHP script and index 1187 | # file to put on the web server. The advantage of the server 1188 | # based approach is that it scales better to large projects and allows 1189 | # full text search. The disadvantages are that it is more difficult to setup 1190 | # and does not have live searching capabilities. 1191 | 1192 | SERVER_BASED_SEARCH = NO 1193 | 1194 | #--------------------------------------------------------------------------- 1195 | # configuration options related to the LaTeX output 1196 | #--------------------------------------------------------------------------- 1197 | 1198 | # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will 1199 | # generate Latex output. 1200 | 1201 | GENERATE_LATEX = NO 1202 | 1203 | # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. 1204 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be 1205 | # put in front of it. If left blank `latex' will be used as the default path. 1206 | 1207 | LATEX_OUTPUT = latex 1208 | 1209 | # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be 1210 | # invoked. If left blank `latex' will be used as the default command name. 1211 | # Note that when enabling USE_PDFLATEX this option is only used for 1212 | # generating bitmaps for formulas in the HTML output, but not in the 1213 | # Makefile that is written to the output directory. 1214 | 1215 | LATEX_CMD_NAME = latex 1216 | 1217 | # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to 1218 | # generate index for LaTeX. If left blank `makeindex' will be used as the 1219 | # default command name. 1220 | 1221 | MAKEINDEX_CMD_NAME = makeindex 1222 | 1223 | # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact 1224 | # LaTeX documents. This may be useful for small projects and may help to 1225 | # save some trees in general. 1226 | 1227 | COMPACT_LATEX = NO 1228 | 1229 | # The PAPER_TYPE tag can be used to set the paper type that is used 1230 | # by the printer. Possible values are: a4, letter, legal and 1231 | # executive. If left blank a4wide will be used. 1232 | 1233 | PAPER_TYPE = a4 1234 | 1235 | # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX 1236 | # packages that should be included in the LaTeX output. 1237 | 1238 | EXTRA_PACKAGES = 1239 | 1240 | # The LATEX_HEADER tag can be used to specify a personal LaTeX header for 1241 | # the generated latex document. The header should contain everything until 1242 | # the first chapter. If it is left blank doxygen will generate a 1243 | # standard header. Notice: only use this tag if you know what you are doing! 1244 | 1245 | LATEX_HEADER = 1246 | 1247 | # The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for 1248 | # the generated latex document. The footer should contain everything after 1249 | # the last chapter. If it is left blank doxygen will generate a 1250 | # standard footer. Notice: only use this tag if you know what you are doing! 1251 | 1252 | LATEX_FOOTER = 1253 | 1254 | # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated 1255 | # is prepared for conversion to pdf (using ps2pdf). The pdf file will 1256 | # contain links (just like the HTML output) instead of page references 1257 | # This makes the output suitable for online browsing using a pdf viewer. 1258 | 1259 | PDF_HYPERLINKS = YES 1260 | 1261 | # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of 1262 | # plain latex in the generated Makefile. Set this option to YES to get a 1263 | # higher quality PDF documentation. 1264 | 1265 | USE_PDFLATEX = YES 1266 | 1267 | # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. 1268 | # command to the generated LaTeX files. This will instruct LaTeX to keep 1269 | # running if errors occur, instead of asking the user for help. 1270 | # This option is also used when generating formulas in HTML. 1271 | 1272 | LATEX_BATCHMODE = NO 1273 | 1274 | # If LATEX_HIDE_INDICES is set to YES then doxygen will not 1275 | # include the index chapters (such as File Index, Compound Index, etc.) 1276 | # in the output. 1277 | 1278 | LATEX_HIDE_INDICES = NO 1279 | 1280 | # If LATEX_SOURCE_CODE is set to YES then doxygen will include 1281 | # source code with syntax highlighting in the LaTeX output. 1282 | # Note that which sources are shown also depends on other settings 1283 | # such as SOURCE_BROWSER. 1284 | 1285 | LATEX_SOURCE_CODE = NO 1286 | 1287 | # The LATEX_BIB_STYLE tag can be used to specify the style to use for the 1288 | # bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See 1289 | # http://en.wikipedia.org/wiki/BibTeX for more info. 1290 | 1291 | LATEX_BIB_STYLE = plain 1292 | 1293 | #--------------------------------------------------------------------------- 1294 | # configuration options related to the RTF output 1295 | #--------------------------------------------------------------------------- 1296 | 1297 | # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output 1298 | # The RTF output is optimized for Word 97 and may not look very pretty with 1299 | # other RTF readers or editors. 1300 | 1301 | GENERATE_RTF = NO 1302 | 1303 | # The RTF_OUTPUT tag is used to specify where the RTF docs will be put. 1304 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be 1305 | # put in front of it. If left blank `rtf' will be used as the default path. 1306 | 1307 | RTF_OUTPUT = rtf 1308 | 1309 | # If the COMPACT_RTF tag is set to YES Doxygen generates more compact 1310 | # RTF documents. This may be useful for small projects and may help to 1311 | # save some trees in general. 1312 | 1313 | COMPACT_RTF = NO 1314 | 1315 | # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated 1316 | # will contain hyperlink fields. The RTF file will 1317 | # contain links (just like the HTML output) instead of page references. 1318 | # This makes the output suitable for online browsing using WORD or other 1319 | # programs which support those fields. 1320 | # Note: wordpad (write) and others do not support links. 1321 | 1322 | RTF_HYPERLINKS = NO 1323 | 1324 | # Load style sheet definitions from file. Syntax is similar to doxygen's 1325 | # config file, i.e. a series of assignments. You only have to provide 1326 | # replacements, missing definitions are set to their default value. 1327 | 1328 | RTF_STYLESHEET_FILE = 1329 | 1330 | # Set optional variables used in the generation of an rtf document. 1331 | # Syntax is similar to doxygen's config file. 1332 | 1333 | RTF_EXTENSIONS_FILE = 1334 | 1335 | #--------------------------------------------------------------------------- 1336 | # configuration options related to the man page output 1337 | #--------------------------------------------------------------------------- 1338 | 1339 | # If the GENERATE_MAN tag is set to YES (the default) Doxygen will 1340 | # generate man pages 1341 | 1342 | GENERATE_MAN = NO 1343 | 1344 | # The MAN_OUTPUT tag is used to specify where the man pages will be put. 1345 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be 1346 | # put in front of it. If left blank `man' will be used as the default path. 1347 | 1348 | MAN_OUTPUT = man 1349 | 1350 | # The MAN_EXTENSION tag determines the extension that is added to 1351 | # the generated man pages (default is the subroutine's section .3) 1352 | 1353 | MAN_EXTENSION = .3 1354 | 1355 | # If the MAN_LINKS tag is set to YES and Doxygen generates man output, 1356 | # then it will generate one additional man file for each entity 1357 | # documented in the real man page(s). These additional files 1358 | # only source the real man page, but without them the man command 1359 | # would be unable to find the correct page. The default is NO. 1360 | 1361 | MAN_LINKS = NO 1362 | 1363 | #--------------------------------------------------------------------------- 1364 | # configuration options related to the XML output 1365 | #--------------------------------------------------------------------------- 1366 | 1367 | # If the GENERATE_XML tag is set to YES Doxygen will 1368 | # generate an XML file that captures the structure of 1369 | # the code including all documentation. 1370 | 1371 | GENERATE_XML = NO 1372 | 1373 | # The XML_OUTPUT tag is used to specify where the XML pages will be put. 1374 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be 1375 | # put in front of it. If left blank `xml' will be used as the default path. 1376 | 1377 | XML_OUTPUT = xml 1378 | 1379 | # The XML_SCHEMA tag can be used to specify an XML schema, 1380 | # which can be used by a validating XML parser to check the 1381 | # syntax of the XML files. 1382 | 1383 | XML_SCHEMA = 1384 | 1385 | # The XML_DTD tag can be used to specify an XML DTD, 1386 | # which can be used by a validating XML parser to check the 1387 | # syntax of the XML files. 1388 | 1389 | XML_DTD = 1390 | 1391 | # If the XML_PROGRAMLISTING tag is set to YES Doxygen will 1392 | # dump the program listings (including syntax highlighting 1393 | # and cross-referencing information) to the XML output. Note that 1394 | # enabling this will significantly increase the size of the XML output. 1395 | 1396 | XML_PROGRAMLISTING = YES 1397 | 1398 | #--------------------------------------------------------------------------- 1399 | # configuration options for the AutoGen Definitions output 1400 | #--------------------------------------------------------------------------- 1401 | 1402 | # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will 1403 | # generate an AutoGen Definitions (see autogen.sf.net) file 1404 | # that captures the structure of the code including all 1405 | # documentation. Note that this feature is still experimental 1406 | # and incomplete at the moment. 1407 | 1408 | GENERATE_AUTOGEN_DEF = NO 1409 | 1410 | #--------------------------------------------------------------------------- 1411 | # configuration options related to the Perl module output 1412 | #--------------------------------------------------------------------------- 1413 | 1414 | # If the GENERATE_PERLMOD tag is set to YES Doxygen will 1415 | # generate a Perl module file that captures the structure of 1416 | # the code including all documentation. Note that this 1417 | # feature is still experimental and incomplete at the 1418 | # moment. 1419 | 1420 | GENERATE_PERLMOD = NO 1421 | 1422 | # If the PERLMOD_LATEX tag is set to YES Doxygen will generate 1423 | # the necessary Makefile rules, Perl scripts and LaTeX code to be able 1424 | # to generate PDF and DVI output from the Perl module output. 1425 | 1426 | PERLMOD_LATEX = NO 1427 | 1428 | # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be 1429 | # nicely formatted so it can be parsed by a human reader. 1430 | # This is useful 1431 | # if you want to understand what is going on. 1432 | # On the other hand, if this 1433 | # tag is set to NO the size of the Perl module output will be much smaller 1434 | # and Perl will parse it just the same. 1435 | 1436 | PERLMOD_PRETTY = YES 1437 | 1438 | # The names of the make variables in the generated doxyrules.make file 1439 | # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. 1440 | # This is useful so different doxyrules.make files included by the same 1441 | # Makefile don't overwrite each other's variables. 1442 | 1443 | PERLMOD_MAKEVAR_PREFIX = 1444 | 1445 | #--------------------------------------------------------------------------- 1446 | # Configuration options related to the preprocessor 1447 | #--------------------------------------------------------------------------- 1448 | 1449 | # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will 1450 | # evaluate all C-preprocessor directives found in the sources and include 1451 | # files. 1452 | 1453 | ENABLE_PREPROCESSING = YES 1454 | 1455 | # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro 1456 | # names in the source code. If set to NO (the default) only conditional 1457 | # compilation will be performed. Macro expansion can be done in a controlled 1458 | # way by setting EXPAND_ONLY_PREDEF to YES. 1459 | 1460 | MACRO_EXPANSION = NO 1461 | 1462 | # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES 1463 | # then the macro expansion is limited to the macros specified with the 1464 | # PREDEFINED and EXPAND_AS_DEFINED tags. 1465 | 1466 | EXPAND_ONLY_PREDEF = NO 1467 | 1468 | # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files 1469 | # pointed to by INCLUDE_PATH will be searched when a #include is found. 1470 | 1471 | SEARCH_INCLUDES = YES 1472 | 1473 | # The INCLUDE_PATH tag can be used to specify one or more directories that 1474 | # contain include files that are not input files but should be processed by 1475 | # the preprocessor. 1476 | 1477 | INCLUDE_PATH = 1478 | 1479 | # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard 1480 | # patterns (like *.h and *.hpp) to filter out the header-files in the 1481 | # directories. If left blank, the patterns specified with FILE_PATTERNS will 1482 | # be used. 1483 | 1484 | INCLUDE_FILE_PATTERNS = 1485 | 1486 | # The PREDEFINED tag can be used to specify one or more macro names that 1487 | # are defined before the preprocessor is started (similar to the -D option of 1488 | # gcc). The argument of the tag is a list of macros of the form: name 1489 | # or name=definition (no spaces). If the definition and the = are 1490 | # omitted =1 is assumed. To prevent a macro definition from being 1491 | # undefined via #undef or recursively expanded use the := operator 1492 | # instead of the = operator. 1493 | 1494 | PREDEFINED = 1495 | 1496 | # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then 1497 | # this tag can be used to specify a list of macro names that should be expanded. 1498 | # The macro definition that is found in the sources will be used. 1499 | # Use the PREDEFINED tag if you want to use a different macro definition that 1500 | # overrules the definition found in the source code. 1501 | 1502 | EXPAND_AS_DEFINED = 1503 | 1504 | # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then 1505 | # doxygen's preprocessor will remove all references to function-like macros 1506 | # that are alone on a line, have an all uppercase name, and do not end with a 1507 | # semicolon, because these will confuse the parser if not removed. 1508 | 1509 | SKIP_FUNCTION_MACROS = YES 1510 | 1511 | #--------------------------------------------------------------------------- 1512 | # Configuration::additions related to external references 1513 | #--------------------------------------------------------------------------- 1514 | 1515 | # The TAGFILES option can be used to specify one or more tagfiles. 1516 | # Optionally an initial location of the external documentation 1517 | # can be added for each tagfile. The format of a tag file without 1518 | # this location is as follows: 1519 | # 1520 | # TAGFILES = file1 file2 ... 1521 | # Adding location for the tag files is done as follows: 1522 | # 1523 | # TAGFILES = file1=loc1 "file2 = loc2" ... 1524 | # where "loc1" and "loc2" can be relative or absolute paths or 1525 | # URLs. If a location is present for each tag, the installdox tool 1526 | # does not have to be run to correct the links. 1527 | # Note that each tag file must have a unique name 1528 | # (where the name does NOT include the path) 1529 | # If a tag file is not located in the directory in which doxygen 1530 | # is run, you must also specify the path to the tagfile here. 1531 | 1532 | TAGFILES = 1533 | 1534 | # When a file name is specified after GENERATE_TAGFILE, doxygen will create 1535 | # a tag file that is based on the input files it reads. 1536 | 1537 | GENERATE_TAGFILE = 1538 | 1539 | # If the ALLEXTERNALS tag is set to YES all external classes will be listed 1540 | # in the class index. If set to NO only the inherited external classes 1541 | # will be listed. 1542 | 1543 | ALLEXTERNALS = NO 1544 | 1545 | # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed 1546 | # in the modules index. If set to NO, only the current project's groups will 1547 | # be listed. 1548 | 1549 | EXTERNAL_GROUPS = YES 1550 | 1551 | # The PERL_PATH should be the absolute path and name of the perl script 1552 | # interpreter (i.e. the result of `which perl'). 1553 | 1554 | PERL_PATH = /usr/bin/perl 1555 | 1556 | #--------------------------------------------------------------------------- 1557 | # Configuration options related to the dot tool 1558 | #--------------------------------------------------------------------------- 1559 | 1560 | # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will 1561 | # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base 1562 | # or super classes. Setting the tag to NO turns the diagrams off. Note that 1563 | # this option also works with HAVE_DOT disabled, but it is recommended to 1564 | # install and use dot, since it yields more powerful graphs. 1565 | 1566 | CLASS_DIAGRAMS = YES 1567 | 1568 | # You can define message sequence charts within doxygen comments using the \msc 1569 | # command. Doxygen will then run the mscgen tool (see 1570 | # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the 1571 | # documentation. The MSCGEN_PATH tag allows you to specify the directory where 1572 | # the mscgen tool resides. If left empty the tool is assumed to be found in the 1573 | # default search path. 1574 | 1575 | MSCGEN_PATH = 1576 | 1577 | # If set to YES, the inheritance and collaboration graphs will hide 1578 | # inheritance and usage relations if the target is undocumented 1579 | # or is not a class. 1580 | 1581 | HIDE_UNDOC_RELATIONS = YES 1582 | 1583 | # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is 1584 | # available from the path. This tool is part of Graphviz, a graph visualization 1585 | # toolkit from AT&T and Lucent Bell Labs. The other options in this section 1586 | # have no effect if this option is set to NO (the default) 1587 | 1588 | HAVE_DOT = YES 1589 | 1590 | # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is 1591 | # allowed to run in parallel. When set to 0 (the default) doxygen will 1592 | # base this on the number of processors available in the system. You can set it 1593 | # explicitly to a value larger than 0 to get control over the balance 1594 | # between CPU load and processing speed. 1595 | 1596 | DOT_NUM_THREADS = 0 1597 | 1598 | # By default doxygen will use the Helvetica font for all dot files that 1599 | # doxygen generates. When you want a differently looking font you can specify 1600 | # the font name using DOT_FONTNAME. You need to make sure dot is able to find 1601 | # the font, which can be done by putting it in a standard location or by setting 1602 | # the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the 1603 | # directory containing the font. 1604 | 1605 | DOT_FONTNAME = Helvetica 1606 | 1607 | # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. 1608 | # The default size is 10pt. 1609 | 1610 | DOT_FONTSIZE = 10 1611 | 1612 | # By default doxygen will tell dot to use the Helvetica font. 1613 | # If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to 1614 | # set the path where dot can find it. 1615 | 1616 | DOT_FONTPATH = 1617 | 1618 | # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen 1619 | # will generate a graph for each documented class showing the direct and 1620 | # indirect inheritance relations. Setting this tag to YES will force the 1621 | # CLASS_DIAGRAMS tag to NO. 1622 | 1623 | CLASS_GRAPH = YES 1624 | 1625 | # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen 1626 | # will generate a graph for each documented class showing the direct and 1627 | # indirect implementation dependencies (inheritance, containment, and 1628 | # class references variables) of the class with other documented classes. 1629 | 1630 | COLLABORATION_GRAPH = YES 1631 | 1632 | # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen 1633 | # will generate a graph for groups, showing the direct groups dependencies 1634 | 1635 | GROUP_GRAPHS = YES 1636 | 1637 | # If the UML_LOOK tag is set to YES doxygen will generate inheritance and 1638 | # collaboration diagrams in a style similar to the OMG's Unified Modeling 1639 | # Language. 1640 | 1641 | UML_LOOK = NO 1642 | 1643 | # If set to YES, the inheritance and collaboration graphs will show the 1644 | # relations between templates and their instances. 1645 | 1646 | TEMPLATE_RELATIONS = NO 1647 | 1648 | # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT 1649 | # tags are set to YES then doxygen will generate a graph for each documented 1650 | # file showing the direct and indirect include dependencies of the file with 1651 | # other documented files. 1652 | 1653 | INCLUDE_GRAPH = YES 1654 | 1655 | # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and 1656 | # HAVE_DOT tags are set to YES then doxygen will generate a graph for each 1657 | # documented header file showing the documented files that directly or 1658 | # indirectly include this file. 1659 | 1660 | INCLUDED_BY_GRAPH = YES 1661 | 1662 | # If the CALL_GRAPH and HAVE_DOT options are set to YES then 1663 | # doxygen will generate a call dependency graph for every global function 1664 | # or class method. Note that enabling this option will significantly increase 1665 | # the time of a run. So in most cases it will be better to enable call graphs 1666 | # for selected functions only using the \callgraph command. 1667 | 1668 | CALL_GRAPH = NO 1669 | 1670 | # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then 1671 | # doxygen will generate a caller dependency graph for every global function 1672 | # or class method. Note that enabling this option will significantly increase 1673 | # the time of a run. So in most cases it will be better to enable caller 1674 | # graphs for selected functions only using the \callergraph command. 1675 | 1676 | CALLER_GRAPH = NO 1677 | 1678 | # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen 1679 | # will generate a graphical hierarchy of all classes instead of a textual one. 1680 | 1681 | GRAPHICAL_HIERARCHY = YES 1682 | 1683 | # If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES 1684 | # then doxygen will show the dependencies a directory has on other directories 1685 | # in a graphical way. The dependency relations are determined by the #include 1686 | # relations between the files in the directories. 1687 | 1688 | DIRECTORY_GRAPH = YES 1689 | 1690 | # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images 1691 | # generated by dot. Possible values are svg, png, jpg, or gif. 1692 | # If left blank png will be used. If you choose svg you need to set 1693 | # HTML_FILE_EXTENSION to xhtml in order to make the SVG files 1694 | # visible in IE 9+ (other browsers do not have this requirement). 1695 | 1696 | DOT_IMAGE_FORMAT = png 1697 | 1698 | # If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to 1699 | # enable generation of interactive SVG images that allow zooming and panning. 1700 | # Note that this requires a modern browser other than Internet Explorer. 1701 | # Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you 1702 | # need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files 1703 | # visible. Older versions of IE do not have SVG support. 1704 | 1705 | INTERACTIVE_SVG = NO 1706 | 1707 | # The tag DOT_PATH can be used to specify the path where the dot tool can be 1708 | # found. If left blank, it is assumed the dot tool can be found in the path. 1709 | 1710 | DOT_PATH = 1711 | 1712 | # The DOTFILE_DIRS tag can be used to specify one or more directories that 1713 | # contain dot files that are included in the documentation (see the 1714 | # \dotfile command). 1715 | 1716 | DOTFILE_DIRS = 1717 | 1718 | # The MSCFILE_DIRS tag can be used to specify one or more directories that 1719 | # contain msc files that are included in the documentation (see the 1720 | # \mscfile command). 1721 | 1722 | MSCFILE_DIRS = 1723 | 1724 | # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of 1725 | # nodes that will be shown in the graph. If the number of nodes in a graph 1726 | # becomes larger than this value, doxygen will truncate the graph, which is 1727 | # visualized by representing a node as a red box. Note that doxygen if the 1728 | # number of direct children of the root node in a graph is already larger than 1729 | # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note 1730 | # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. 1731 | 1732 | DOT_GRAPH_MAX_NODES = 50 1733 | 1734 | # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the 1735 | # graphs generated by dot. A depth value of 3 means that only nodes reachable 1736 | # from the root by following a path via at most 3 edges will be shown. Nodes 1737 | # that lay further from the root node will be omitted. Note that setting this 1738 | # option to 1 or 2 may greatly reduce the computation time needed for large 1739 | # code bases. Also note that the size of a graph can be further restricted by 1740 | # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. 1741 | 1742 | MAX_DOT_GRAPH_DEPTH = 0 1743 | 1744 | # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent 1745 | # background. This is disabled by default, because dot on Windows does not 1746 | # seem to support this out of the box. Warning: Depending on the platform used, 1747 | # enabling this option may lead to badly anti-aliased labels on the edges of 1748 | # a graph (i.e. they become hard to read). 1749 | 1750 | DOT_TRANSPARENT = NO 1751 | 1752 | # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output 1753 | # files in one run (i.e. multiple -o and -T options on the command line). This 1754 | # makes dot run faster, but since only newer versions of dot (>1.8.10) 1755 | # support this, this feature is disabled by default. 1756 | 1757 | DOT_MULTI_TARGETS = NO 1758 | 1759 | # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will 1760 | # generate a legend page explaining the meaning of the various boxes and 1761 | # arrows in the dot generated graphs. 1762 | 1763 | GENERATE_LEGEND = YES 1764 | 1765 | # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will 1766 | # remove the intermediate dot files that are used to generate 1767 | # the various graphs. 1768 | 1769 | DOT_CLEANUP = YES 1770 | -------------------------------------------------------------------------------- /docs/how_to_install.dox: -------------------------------------------------------------------------------- 1 | /** \page HowToInstall How to install 2 | 3 | \tableofcontents 4 | 5 | \section SOFT Install third-part software 6 | 7 | To use our code you should install some third-part software 8 | 9 | - CMake www.cmake.org/cmake/resources/software.html 10 | - Boost www.boost.org 11 | - IntelMKL software.intel.com/en-us/articles/intel-mkl/ - implementation of BLAS 12 | 13 | \section CMAKE Correct CMakeLists.txt file 14 | 15 | You should insert path to Boost and BLAS (MKL) sources and libraries in file CMakeLists.txt. 16 | 17 | \code 18 | ########################################################################################## 19 | # CMake build script for MultiIndex 20 | # 21 | ########################################################################################## 22 | 23 | #let all libraries be static, not shared 24 | OPTION(BUILD_SHARED_LIBS "Build shared libraries (DLLs)." OFF) 25 | 26 | ########################################################################################## 27 | # lets start describing our project. 28 | project (NearestSearch CXX C) 29 | cmake_minimum_required(VERSION 2.6) 30 | 31 | IF (UNIX) 32 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") 33 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2") 34 | ENDIF (UNIX) 35 | 36 | IF (NOT DEFINED CMAKE_CURRENT_LIST_DIR) 37 | SET(CMAKE_CURRENT_LIST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) 38 | ENDIF (NOT DEFINED CMAKE_CURRENT_LIST_DIR) 39 | 40 | MESSAGE("current dir: ${CMAKE_CURRENT_LIST_DIR}") 41 | SET (Source_Path ${CMAKE_CURRENT_LIST_DIR}) 42 | 43 | MESSAGE ("PROJECT_BINARY_DIR " ${PROJECT_BINARY_DIR}) 44 | 45 | # ADD BOOSTDIR 46 | SET(BOOST_DIR ) 47 | SET(BOOST_LIB ) 48 | 49 | # ADD BLAS 50 | SET(BLAS_DIR ) 51 | SET(BLAS_LIB ) 52 | 53 | SET(REQUIRED_BLAS_LIBS mkl_intel_lp64.lib mkl_intel_thread.lib mkl_core.lib libiomp5md.lib) 54 | 55 | ########################################################################################## 56 | INCLUDE (${Source_Path}/NearestSearch.cmake NO_POLICY_SCOPE) 57 | INCLUDE_DIRECTORIES(${BOOST_DIR}) 58 | INCLUDE_DIRECTORIES(${BLAS_DIR}) 59 | LINK_DIRECTORIES(${BOOST_LIB}) 60 | LINK_DIRECTORIES(${BLAS_LIB}) 61 | INCLUDE_DIRECTORIES(${Source_Path}) 62 | ADD_LIBRARY (nearest_search_lib STATIC ${NEAREST_ALL_CC}) 63 | 64 | ########################################################################################## 65 | INCLUDE (${Source_Path}/IndexerLauncher.cmake NO_POLICY_SCOPE) 66 | INCLUDE_DIRECTORIES(${BOOST_DIR}) 67 | INCLUDE_DIRECTORIES(${BLAS_DIR}) 68 | LINK_DIRECTORIES(${BOOST_LIB}) 69 | LINK_DIRECTORIES(${BLAS_LIB}) 70 | INCLUDE_DIRECTORIES(${Source_Path}) 71 | ADD_EXECUTABLE (indexer_launcher ${IndexerLauncher}) 72 | TARGET_LINK_LIBRARIES (indexer_launcher nearest_search_lib) 73 | TARGET_LINK_LIBRARIES (indexer_launcher ${REQUIRED_BLAS_LIBS}) 74 | 75 | ########################################################################################## 76 | INCLUDE (${Source_Path}/SearcherTester.cmake NO_POLICY_SCOPE) 77 | INCLUDE_DIRECTORIES(${BOOST_DIR}) 78 | INCLUDE_DIRECTORIES(${BLAS_DIR}) 79 | LINK_DIRECTORIES(${BOOST_LIB}) 80 | LINK_DIRECTORIES(${BLAS_LIB}) 81 | INCLUDE_DIRECTORIES(${Source_Path}) 82 | ADD_EXECUTABLE (searcher_tester ${SearchTester}) 83 | TARGET_LINK_LIBRARIES (searcher_tester nearest_search_lib) 84 | TARGET_LINK_LIBRARIES (searcher_tester ${REQUIRED_BLAS_LIBS}) 85 | \endcode 86 | 87 | The list of libraries to link (${REQUIRED_BLAS_LIBS}) may differ from the above. 88 | You can get the list for your architecture here. 89 | 90 | \section BUILD Build project 91 | 92 | - create folder "build" in directory with source files 93 | - (for Windows users) run "make_project.bat" 94 | - (for Unix users) do next steps 95 | \code 96 | cd build 97 | cmake .. 98 | \endcode 99 | 100 | These operations create Solution for VisualStudio (Windows) or makefile (Unix). Then you can build the project. 101 | 102 | **/ -------------------------------------------------------------------------------- /docs/index.dox: -------------------------------------------------------------------------------- 1 | /** \page Indexation How to create a multi-index 2 | 3 | \tableofcontents 4 | 5 | \section ALGO Algorithm 6 | 7 | The process of the multi-index construction is described in our paper. Here we provide the details of the implementation.\n 8 | \n 9 | After the vocabularies are trained (see below), the index construction progress in two stages: assigning points to multi-index entries ("coarse quantization") and calculating information for reranking. Because one can use different reranking approaches for the same coarse quantization, the first stage of the algorithm saves the coarse quantizations for all points in the database to hard drive. These coarse quantizations are just the entry identifiers (e.g. codeword pairs). So if the file with coarse quantizations has already been produced there is no need to calculate them again (in this case, remove the flag --build_coarse from the command line parameters).\n 10 | \n 11 | In the CPU, a multi-index consists of a long onedimensional array containing the compressed points aligned by entries (i.e. a group of points belonging to the same entry is stored contiguously) and a table containing the starting index in the array for every entry of the multi-index. 12 | The class MultiIndexer is thus a C++ template by the type of the record in this array. In this way, you can easy implement your own reranking approach by defining new structure NewRecordType and implementing function GetRecord for your structure.\n 13 | \n 14 | For index contstruction you should provide coarse vocabularies for building the multi-index structure and fine vocabularies for calculating the reranking information (assuming that you are using the provided reranking procedure). We assume that these files are prepared outside this code (C++ is not the simplest way to create vocabularies, just for your reference we provide a MATLAB-script to create them below).\n 15 | 16 | \section FORMATS File formats 17 | 18 | Our code uses the .bvecs and .fvecs file formats developed by INRIA LEAR and TEXMEX groups. 19 | 20 | - Coarse vocabularies\n 21 | Our code assumes that coarse vocabularies are in the following format:\n 22 | 4 bytes(one int32) - number of items in each vocabulary (N)\n 23 | 4 bytes(one int32) - dimension of item (d)\n 24 | 4*N*d*M bytes(N*d*M floats) - vocabulary items one after another (M is the multiplicity of algorithm)\n 25 | \n 26 | Matlab script to build coarse vocabularies 27 | \code 28 | clear all; 29 | 30 | all_data = bvecs_read('sift1M.bvecs'); 31 | 32 | all_data = single(all_data); 33 | vocabSize = 4096; 34 | % add implementation of K-means 35 | vocab1 = your_kmeans(single(all_data(1:end/2,:)),vocabSize); 36 | vocab2 = your_kmeans(single(all_data(end/2+1:end,:)),vocabSize); 37 | 38 | file = fopen(['sift1M_double_4096_' num2str(vocabSize) '.dat'], 'w'); 39 | dim = size(vocab1, 1); 40 | sz = size(vocab1, 2); 41 | fwrite(file, dim, 'int32'); 42 | fwrite(file, sz, 'int32'); 43 | fwrite(file, vocab1, 'float'); 44 | fwrite(file, vocab2, 'float'); 45 | fclose(file); 46 | save(['sift1M_double_4096_' num2str(vocabSize) '.mat'], 'vocab1', 'vocab2'); 47 | \endcode 48 | - Fine vocabularies\n 49 | Our code assumes that fine vocabularies are in the following format:\n 50 | 4 bytes(one int32) - number of vocabularies (m)\n 51 | 4 bytes(one int32) - number of items in each vocabulary (N)\n 52 | 4 bytes(one int32) - dimension of item (d)\n 53 | 4*N*d*m bytes(N*d*m floats) - vocabulary items one after another\n 54 | 55 | Matlab script to build fine vocabularies (used VlFeat library) 56 | \code 57 | clear all; 58 | all_data = fvecs_read('sift1M.fvecs'); 59 | 60 | vocabSize = 4096; 61 | load(['sift1M_double_' num2str(vocabSize) '.mat'], 'vocab1', 'vocab2'); 62 | 63 | vocab1 = int32(vocab1); 64 | vocab2 = int32(vocab2); 65 | i1 = vl_ikmeanspush(all_data(1:end/2,:), vocab1); 66 | i2 = vl_ikmeanspush(all_data(end/2+1:end,:), vocab2); 67 | residual = single(all_data)- single([vocab1(:,i1); vocab2(:,i2)]); 68 | bytes_per_point = 8; 69 | 70 | D = size(residual,1) / bytes_per_point; 71 | residual_vocab = cell(bytes_per_point,1); 72 | dist = cell(bytes_per_point,1); 73 | for m = 1:bytes_per_point 74 | chunk = residual(D*m-D+1:D*m,:); 75 | % add implementation of K-means 76 | residual_vocab{m} = your_kmeans(chunk,256); 77 | dist{m} = vl_alldist2(residual_vocab{m}); 78 | end 79 | 80 | save(['sift1M_double_4096_8.mat'],'residual_vocab','dist'); 81 | 82 | file = fopen(['sift1M_double_4096_8.dat'], 'w'); 83 | vocabs_count = size(residual_vocab, 1); 84 | each_vocab_count = size(residual_vocab{1}, 2); 85 | each_vocab_dim = size(residual_vocab{1}, 1); 86 | fwrite(file, vocabs_count, 'int32'); 87 | fwrite(file, each_vocab_count, 'int32'); 88 | fwrite(file, each_vocab_dim, 'int32'); 89 | for i = 1:vocabs_count 90 | for j = 1:each_vocab_count 91 | a = residual_vocab{i}(:,j); 92 | fwrite(file, a, 'float'); 93 | end 94 | end 95 | fclose(file); 96 | 97 | \endcode 98 | 99 | \section EXAMPLE Indexing sample 100 | 101 | To build an invertered index for a set of points you should run "indexer_launcher" application with some command line parameters. 102 | 103 | \code 104 | --threads_count - the number of threads to use for the multi-threaded index construction 105 | --multiplicity - the number of groups of dimensions the vectors will be split into. Equals 2 or 4 for the experiments in the paper. 106 | --points_file - the path to the file with the vector database (should be in .bvecs or .fvecs format) 107 | --coarse_vocabs_file - the path to the file with the coarse vocabularies (see the format description above) 108 | --fine_vocabs_file - the path to the file with fine vocabularies for reranking (see the format description above) 109 | --input_point_type - "BVEC" or "FVEC" 110 | --points_count - the number of points to index 111 | --space_dim - the space dimensionality (e.g. 128 for SIFTs) 112 | --files_prefix - the common prefix for storing the multi-index files (used to control runs with different parameters) 113 | --coarse_quantization_file - the path to the file with coarse quantizations 114 | --metainfo_file - the path to the file with metainformation (deprecated, just write "fake.txt") 115 | --use_residuals - the reranking method flag. Specify it if you want to use residuals for reranking (Multi-D-ADC) and omit it if you want to use initial points (Multi-ADC) 116 | --build_coarse - specify this flag if you want to recompute coarse quantizations (otherwise, will use the previously computed, if available) 117 | \endcode 118 | 119 | Windows users can try launch_indexer.bat script. It launches indexing of the ANN_SIFT1M dataset using the provided vocabularies. 120 | Unix users should just write a similar launch_indexer.sh script. 121 | **/ -------------------------------------------------------------------------------- /docs/main_page.dox: -------------------------------------------------------------------------------- 1 | /** @mainpage MultiIndex 2 | 3 | This is a brief documentation for the source code of the inverted multiindex algorithm 4 | for fast and memory-efficient indexing and approximate nearest-neighbor search in high-dimensional spaces. One can download source-code from here. 5 | 6 | - \ref HowToInstall 7 | 8 | - \ref Indexation 9 | 10 | - \ref Search 11 | 12 | \image html mult.jpg 13 | 14 | Copyright @ Yandex 2012. \n 15 | Author: Artem Babenko \n 16 | Contact: arbabenko@yandex-team.ru \n 17 | Link to sources: https://github.com/arbabenko/MultiIndex \n 18 | \n 19 | THIS SOFTWARE IS LICENSED UNDER THE BSD LICENSE. YOU CAN USE, MODIFY AND/ OR REDISTRIBUTE THE SOFTWARE UNDER THE 20 | TERMS OF THE BSD LICENSE. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR OR YANDEX BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | **/ 23 | -------------------------------------------------------------------------------- /docs/pictures/mult.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arbabenko/MultiIndex/bb0578821093f19d9c44a3ed7f50b8930e1d0199/docs/pictures/mult.jpg -------------------------------------------------------------------------------- /docs/search.dox: -------------------------------------------------------------------------------- 1 | /** \page Search How to search in a multi-index 2 | 3 | \tableofcontents 4 | 5 | \section ALGO The search algorithm 6 | 7 | Given a query point, the search algorithm traverses the inverted multi-index entries in the order of increasing distance from the entry centroid to the query as descibed in the paper. \n 8 | It accumulates the points from the traversed entries and stops when their count reaches the value requested by the user.\n 9 | If rerank mode is on (flag --do_rerank is set) the code also estimates the distance to query for every traversed point using the extra information stored for reranking. In this case, the traversed points are sorted by the increasing distance estimate.\n 10 | 11 | As for the index construction, you should provide the coarse vocabularies and the fine vocabularies (these should be the same vocabularies as used for the index construction). To measure the accuracy of the system, you should provide a file with a list of query points and the ground truth nearest neighbors.\n 12 | 13 | \section FORMATS File formats 14 | 15 | - Queries \n 16 | Our code assumes that queries are in the .bvecs or the .bvecs format.\n 17 | - Groundtruth \n 18 | Our code assumes that the ground truth nearest neighbors are in this .ivecs format.\n 19 | - Coarse vocabularies\n 20 | Our code assumes that coarse vocabularies are in the following format:\n 21 | 4 bytes(one int32) - number of items in each vocabulary (N)\n 22 | 4 bytes(one int32) - dimension of item (d)\n 23 | 4*N*d*M bytes(N*d*M floats) - vocabulary items one after another (M is the multiplicity of algorithm)\n 24 | - Fine vocabularies\n 25 | Our code assumes that fine vocabularies are in the following format:\n 26 | 4 bytes(one int32) - number of vocabularies (m)\n 27 | 4 bytes(one int32) - number of items in each vocabulary (N)\n 28 | 4 bytes(one int32) - dimension of item (d)\n 29 | 4*N*d*m bytes(N*d*m floats) - vocabulary items (each centroid is stored contiguously\n 30 | 31 | \section EXAMPLE Search sample 32 | 33 | To launch the search for all queries and to estimate the accuracy of the search algorithm run the "searcher_tester" application. The following command-line options control the execution: 34 | \code 35 | --coarse_vocabs_file - the path to the file with coarse vocabs (see the format description above) 36 | --fine_vocabs_file - the path to the file with fine vocabs for reranking(see the format description above) 37 | --query_point_type - "BVEC" or "FVEC" 38 | --use_residuals - the reranking method flag. Specify it if you are using the residuals for reranking (Multi-D-ADC) and omit it if you are using the initial vector (Multi-ADC) 39 | --space_dim - space dimensionality (e.g. 128 for SIFTs) 40 | --subspaces_centroids_count - the number of nearest vocabulary items to consider (L in the paper) 41 | --index_files_prefix - the common prefix of the multi-index files (to control runs with different parameters) 42 | --queries_file - the path to the file with queries (should be in .bvecs or .fvecs format) 43 | --groundtruth_file - the path to the file with groundtruth (should be in .ivecs format) 44 | --queries_count - the number of queries to search 45 | --neighbours_count - the number of neighbours involved in reranking 46 | --report_file - the path to the file to store the search quality report 47 | --do_rerank - this flag indicates whether the search algorithm should rerank points based on the estimated distance to the query 48 | \endcode 49 | 50 | Windows users can try test_searcher.bat script. It launches search in the index builded by launch_indexer.bat script. 51 | Unix users should just write a similar test_searcher.sh script. 52 | 53 | **/ -------------------------------------------------------------------------------- /indexer.h: -------------------------------------------------------------------------------- 1 | /** @file */ 2 | 3 | // Copyright 2012 Yandex Artem Babenko 4 | #ifndef INDEXER_H_ 5 | #define INDEXER_H_ 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #include "data_util.h" 23 | #include "multitable.hpp" 24 | 25 | 26 | using std::ifstream; 27 | using std::map; 28 | using std::multimap; 29 | using std::ofstream; 30 | using std::string; 31 | 32 | using boost::lexical_cast; 33 | using boost::split; 34 | 35 | extern int THREADS_COUNT; 36 | 37 | extern Dimensions SPACE_DIMENSION; 38 | 39 | extern enum PointType point_type; 40 | 41 | IndexConfig gConfig; 42 | 43 | /** 44 | * This is the main class for creating multiindex for a set of points 45 | * in a multidimensional space. Clusterization and vocabs learning happen 46 | * outside of this class, multiindexer receives prepared vocabs in input 47 | */ 48 | template 49 | class MultiIndexer { 50 | public: 51 | /** 52 | * This is the simple MultiIndexer constructor 53 | * @param multiplicity how many parts input points will be divide on 54 | */ 55 | MultiIndexer(const int multiplicity = 2); 56 | /** 57 | * This is the main function of MultiIndexer 58 | * @param points_filename file with points in .fvecs or .bvecs format 59 | * @param points_count how many points should we index 60 | * @param coarse_vocabs vocabularies for coarse quantization 61 | * @param fine_vocabs vocabularies for fine quantization for reranking 62 | * @param mode determines the way of rerank info calculating 63 | * @param build_coarse_quantization should we get coarse quantization or not 64 | * @param files_prefix all index filenames will have this prefix 65 | * @param coarse_quantization_filename file with coarse quantization (if exists) 66 | */ 67 | void BuildMultiIndex(const string& points_filename, 68 | const string& metainfo_filename, 69 | const int points_count, 70 | const vector& coarse_vocabs, 71 | const vector& fine_vocabs, 72 | const RerankMode& mode, 73 | const bool build_coarse_quantization, 74 | const string& files_prefix, 75 | const string& coarse_quantization_filename = ""); 76 | private: 77 | /** 78 | * This function prepares for each point its coarse quantization 79 | * @param points_filename file with points in .fvecs or .bvecs format 80 | * @param points_count how many points should we handle 81 | * @param coarse_vocabs vocabularies for coarse quantization 82 | */ 83 | void PrepareCoarseQuantization(const string& points_filename, 84 | const int points_count, 85 | const vector& coarse_vocabs); 86 | /** 87 | * This function prepares for each point in subset its coarse quantization 88 | * @param points_filename file with points in .fvecs or .bvecs format 89 | * @param start_pid identifier of the first point in subset 90 | * @param subset_size points count in subset 91 | * @param coarse_vocabs vocabularies for coarse quantization 92 | * @param transposed_coarse_quantizations result 93 | */ 94 | void GetCoarseQuantizationsForSubset(const string& points_filename, 95 | const int start_pid, 96 | const int subset_size, 97 | const vector& coarse_vocabs, 98 | vector >* 99 | transposed_coarse_quantizations); 100 | /** 101 | * This function serializes prepared coarse quantizations to file 102 | * @param transposed_coarse_quantizations quantizations to serialize. 103 | * They are transposed because of effective memory usage 104 | * @param filename file we should serialize to 105 | */ 106 | void SerializeCoarseQuantizations(const vector >& 107 | transposed_coarse_quantizations, 108 | const string& filename); 109 | /** 110 | * This function saves index to files. 111 | * All filenames start form the common files prefix 112 | */ 113 | void SerializeMultiIndexFiles(); 114 | /** 115 | * This function converts counts of points in cells to cell edges 116 | */ 117 | void ConvertPointsInCellsCountToCellEdges(); 118 | 119 | /** 120 | * This function fills multiindex data structures. 121 | * @param points_filename file with points in .fvecs or .bvecs format 122 | * @param points_count how many points should we index 123 | * @param coarse_vocabs vocabularies for coarse quantization 124 | * @param fine_vocabs vocabularies for fine quantization for reranking 125 | * @param mode determines the way of rerank info calculating 126 | */ 127 | void FillMultiIndex(const string& points_filename, 128 | const int points_count, 129 | const vector& coarse_vocabs, 130 | const vector& fine_vocabs, 131 | const RerankMode& mode); 132 | /** 133 | * This function fills multiindex data structures. 134 | * @param points_filename file with points in .fvecs or .bvecs format 135 | * @param start_pid identifier of the first point in subset 136 | * @param subset_size points count in subset 137 | * @param coarse_vocabs vocabularies for coarse quantization 138 | * @param fine_vocabs vocabularies for fine quantization for reranking 139 | * @param mode determines the way of rerank info calculating 140 | * @param points_written_in_index auxillary structure for correct index filling 141 | */ 142 | void FillMultiIndexForSubset(const string& points_filename, 143 | const PointId start_pid, 144 | const int points_count, 145 | const vector& coarse_vocabs, 146 | const vector& fine_vocabs, 147 | const RerankMode& mode, 148 | Multitable* points_written_in_index); 149 | 150 | /** 151 | * This function reads point coarse quantization from file 152 | * @param pid identifier of target point 153 | * @param filename file with coarse quantizations 154 | * @param coarse_quantization result 155 | */ 156 | void GetPointCoarseQuantization(const PointId pid, 157 | const string& filename, 158 | vector* coarse_quantization); 159 | /** 160 | * This function calculates rerank info for point 161 | * @param point target point 162 | * @param pid identifier of target point 163 | * @param fine_vocabs vocabularies for rerank info calculation 164 | */ 165 | void FillPointRerankInfo(const Point& point, 166 | const PointId pid, 167 | const vector& fine_vocabs); 168 | /** 169 | * This function restores counts of points from coarse quantizations 170 | * @param points_filename file with points in .fvecs or .bvecs format 171 | * @param points_count how many points should we index 172 | * @param coarse_vocabs vocabularies for coarse quantization 173 | * We need them to init counts table correctly 174 | */ 175 | void RestorePointsInCellsCountFromCourseQuantization(const string& points_filename, 176 | const int points_count, 177 | const vector& coarse_vocabs); 178 | /** 179 | * This simple function returns size of one coordinate of input point 180 | */ 181 | int GetInputCoordSizeof(); 182 | /** 183 | * This simple function reads one point from input stream 184 | * @param input input stream 185 | * @param point result point 186 | */ 187 | void ReadPoint(ifstream& input, Point* point); 188 | /** 189 | * Initialize all structures for BLAS operations 190 | * @param coarse_vocabs coarse vocabularies 191 | */ 192 | void InitBlasStructures(const vector& coarse_vocabs); 193 | /** 194 | * All index filenames will start from this prefix 195 | */ 196 | string files_prefix_; 197 | /** 198 | * Filename of file with coarse quantizations 199 | */ 200 | string coarse_quantization_filename_; 201 | /** 202 | * Multiplicity (how many parts point space is divided on) 203 | */ 204 | int multiplicity_; 205 | /** 206 | * Table with number of points in each cell 207 | */ 208 | Multitable point_in_cells_count_; 209 | /** 210 | * Multiindex 211 | */ 212 | MultiIndex multiindex_; 213 | /** 214 | * Mutex for critical section in filling index stage 215 | */ 216 | boost::mutex cell_counts_mutex_; 217 | /** 218 | * Struct for BLAS 219 | */ 220 | vector coarse_vocabs_matrices_; 221 | /** 222 | * Struct for BLAS 223 | */ 224 | vector > coarse_centroids_norms_; 225 | }; 226 | 227 | template 228 | inline void GetRecord(const Point& point, const PointId pid, 229 | const vector coarse_quantization, 230 | const vector& coarse_vocabs, 231 | Record* result) { 232 | } 233 | 234 | template 235 | void InitParameters(const vector& fine_vocabs, 236 | const RerankMode& mode, 237 | const string& metainfo_filename) { 238 | gConfig.fine_vocabs = fine_vocabs; 239 | gConfig.rerank_mode = mode; 240 | } 241 | 242 | 243 | //////////////////// IMPLEMENTATION ////////////////////// 244 | template 245 | MultiIndexer::MultiIndexer(const int multiplicity) { 246 | if(multiplicity < 0) { 247 | throw std::logic_error("Multiplicity < 0"); 248 | } 249 | multiplicity_ = multiplicity; 250 | } 251 | 252 | template 253 | int MultiIndexer::GetInputCoordSizeof() { 254 | if(point_type == FVEC) { 255 | return (int)sizeof(float); 256 | } else if(point_type == BVEC) { 257 | return (int)sizeof(unsigned char); 258 | } 259 | } 260 | 261 | template 262 | void MultiIndexer::ReadPoint(ifstream& input, Point* point) { 263 | if(!input.good()) { 264 | throw std::logic_error("Bad input stream"); 265 | } 266 | if(point_type == FVEC) { 267 | ReadVector(input, point); 268 | } else if(point_type == BVEC) { 269 | ReadVector(input, point); 270 | } 271 | } 272 | 273 | template 274 | void MultiIndexer::SerializeCoarseQuantizations(const vector >& 275 | transposed_coarse_quantizations, 276 | const string& filename) { 277 | ofstream quantizations_stream; 278 | quantizations_stream.open(filename.c_str(), ios::binary); 279 | if(!quantizations_stream.good()) { 280 | throw std::logic_error("Bad input stream"); 281 | } 282 | cout << "Writing coarse quantizations started" << endl; 283 | for(PointId pid = 0; pid < transposed_coarse_quantizations[0].size(); ++pid) { 284 | for(int subspace_index = 0; subspace_index < multiplicity_; ++subspace_index) { 285 | ClusterId quantization = transposed_coarse_quantizations[subspace_index][pid]; 286 | quantizations_stream.write((char*)&quantization, sizeof(quantization)); 287 | } 288 | } 289 | quantizations_stream.close(); 290 | cout << "Writing coarse quantizations started" << endl; 291 | } 292 | 293 | template 294 | void MultiIndexer::SerializeMultiIndexFiles() { 295 | cout << "Start multiindex serializing....\n"; 296 | ofstream cell_edges(string(files_prefix_ + "_cell_edges.bin").c_str(), ios::binary); 297 | boost::archive::binary_oarchive arc_cell_edges(cell_edges); 298 | arc_cell_edges << multiindex_.cell_edges; 299 | ofstream multi_array(string(files_prefix_ + "_multi_array.bin").c_str(), ios::binary); 300 | boost::archive::binary_oarchive arc_multi_array(multi_array); 301 | arc_multi_array << multiindex_.multiindex; 302 | cout << "Finish multiindex serializing....\n"; 303 | } 304 | 305 | template 306 | void MultiIndexer::GetCoarseQuantizationsForSubset(const string& points_filename, 307 | const int start_pid, 308 | const int subset_size, 309 | const vector& coarse_vocabs, 310 | vector >* 311 | transposed_coarse_quantizations) { 312 | ifstream point_stream; 313 | point_stream.open(points_filename.c_str(), ios::binary); 314 | if(!point_stream.good()) { 315 | throw std::logic_error("Bad input points stream"); 316 | } 317 | // we assume points are stored in .fvecs or .bvecs format 318 | point_stream.seekg(start_pid * (GetInputCoordSizeof() * SPACE_DIMENSION + sizeof(Dimensions)), ios::beg); 319 | vector coarse_quantization(multiplicity_); 320 | for(int point_number = 0; point_number < subset_size; ++point_number) { 321 | if(point_number % 10000 == 0) { 322 | cout << "Getting coarse quantization, point # " << start_pid + point_number << endl; 323 | } 324 | Point current_point; 325 | ReadPoint(point_stream, ¤t_point); 326 | int subpoints_dimension = SPACE_DIMENSION / multiplicity_; 327 | for(int coarse_index = 0; coarse_index < multiplicity_; ++coarse_index) { 328 | Dimensions start_dim = coarse_index * subpoints_dimension; 329 | Dimensions final_dim = start_dim + subpoints_dimension; 330 | ClusterId nearest = GetNearestClusterId(current_point, coarse_vocabs.at(coarse_index), 331 | start_dim, final_dim); 332 | transposed_coarse_quantizations->at(coarse_index)[start_pid + point_number] = nearest; 333 | coarse_quantization[coarse_index] = nearest; 334 | cblas_saxpy(subpoints_dimension, -1, &(coarse_vocabs.at(coarse_index)[nearest][0]), 1, &(current_point[start_dim]), 1); 335 | } 336 | 337 | int global_index = point_in_cells_count_.GetCellGlobalIndex(coarse_quantization); 338 | cell_counts_mutex_.lock(); 339 | ++(point_in_cells_count_.table[global_index]); 340 | cell_counts_mutex_.unlock(); 341 | } 342 | } 343 | 344 | template 345 | void MultiIndexer::PrepareCoarseQuantization(const string& points_filename, 346 | const int points_count, 347 | const vector& coarse_vocabs) { 348 | // we use transposed quantizations for efficient memory usage 349 | vector > transposed_coarse_quantizations; 350 | transposed_coarse_quantizations.resize(multiplicity_); 351 | vector multiindex_table_dimensions; 352 | for(int i = 0; i < multiplicity_; ++i) { 353 | transposed_coarse_quantizations[i].resize(points_count); 354 | multiindex_table_dimensions.push_back(coarse_vocabs[i].size()); 355 | } 356 | point_in_cells_count_.Resize(multiindex_table_dimensions); 357 | cout << "Memory for coarse quantizations allocated" << endl; 358 | boost::thread_group index_threads; 359 | int thread_points_count = points_count / THREADS_COUNT; 360 | for(int thread_id = 0; thread_id < THREADS_COUNT; ++thread_id) { 361 | PointId start_pid = thread_points_count * thread_id; 362 | index_threads.create_thread(boost::bind(&MultiIndexer::GetCoarseQuantizationsForSubset, 363 | this, points_filename, start_pid, thread_points_count, 364 | boost::cref(coarse_vocabs), &transposed_coarse_quantizations)); 365 | } 366 | index_threads.join_all(); 367 | if(coarse_quantization_filename_.empty()) { 368 | coarse_quantization_filename_ = files_prefix_ + "_coarse_quantizations.bin"; 369 | } 370 | cout << "Coarse quantizations are calculated" << endl; 371 | SerializeCoarseQuantizations(transposed_coarse_quantizations, coarse_quantization_filename_); 372 | cout << "Coarse quantizations are serialized" << endl; 373 | } 374 | 375 | template 376 | void MultiIndexer::ConvertPointsInCellsCountToCellEdges() { 377 | cout << "Converting points in cells to cell edges...\n"; 378 | multiindex_.cell_edges = point_in_cells_count_; 379 | multiindex_.cell_edges.table[0] = 0; 380 | for(int global_index = 1; 381 | global_index < point_in_cells_count_.table.size(); 382 | ++global_index) { 383 | multiindex_.cell_edges.table[global_index] = multiindex_.cell_edges.table[global_index - 1] + 384 | point_in_cells_count_.table[global_index - 1]; 385 | } 386 | // we do not need this table more 387 | point_in_cells_count_.table.clear(); 388 | cout << "Finish converting points in cells to cell edges...\n"; 389 | } 390 | 391 | template 392 | void MultiIndexer::GetPointCoarseQuantization(const PointId pid, 393 | const string& filename, 394 | vector* coarse_quantization) { 395 | ifstream coarse_quantization_stream; 396 | coarse_quantization_stream.open(filename.c_str(), ios::binary); 397 | if(!coarse_quantization_stream.good()) { 398 | throw std::logic_error("Bad input coarse quantizations stream"); 399 | } 400 | coarse_quantization_stream.seekg((long long)pid * sizeof(ClusterId) * multiplicity_, ios::beg); 401 | for(int coarse_index = 0; coarse_index < multiplicity_; ++coarse_index) { 402 | coarse_quantization_stream.read((char*)&(coarse_quantization->at(coarse_index)), 403 | sizeof(coarse_quantization->at(coarse_index))); 404 | } 405 | } 406 | 407 | template 408 | void MultiIndexer::FillMultiIndexForSubset(const string& points_filename, 409 | const PointId start_pid, 410 | const int points_count, 411 | const vector& coarse_vocabs, 412 | const vector& fine_vocabs, 413 | const RerankMode& mode, 414 | Multitable* points_written_in_index) { 415 | ifstream point_stream; 416 | point_stream.open(points_filename.c_str(), ios::binary); 417 | if(!point_stream.good()) { 418 | throw std::logic_error("Bad input points stream"); 419 | } 420 | point_stream.seekg((long long)start_pid * (GetInputCoordSizeof() * SPACE_DIMENSION + sizeof(Dimensions)), ios::beg); 421 | for(int point_number = 0; point_number < points_count; ++point_number) { 422 | if(point_number % 10000 == 0) { 423 | cout << "Filling multiindex, point # " << start_pid + point_number << endl; 424 | } 425 | Point current_point; 426 | ReadPoint(point_stream, ¤t_point); 427 | vector coarse_quantization(multiplicity_); 428 | GetPointCoarseQuantization(start_pid + point_number, 429 | coarse_quantization_filename_, 430 | &coarse_quantization); 431 | int current_written_count = points_written_in_index->GetValue(coarse_quantization); 432 | int pid_multiindex = multiindex_.cell_edges.GetValue(coarse_quantization) + current_written_count; 433 | GetRecord(current_point, start_pid + point_number, 434 | coarse_quantization, coarse_vocabs, &(multiindex_.multiindex[pid_multiindex])); 435 | cell_counts_mutex_.lock(); 436 | points_written_in_index->SetValue(current_written_count + 1, coarse_quantization); 437 | cell_counts_mutex_.unlock(); 438 | } 439 | } 440 | 441 | template 442 | void MultiIndexer::FillMultiIndex(const string& points_filename, 443 | const int points_count, 444 | const vector& coarse_vocabs, 445 | const vector& fine_vocabs, 446 | const RerankMode& mode) { 447 | ConvertPointsInCellsCountToCellEdges(); 448 | multiindex_.multiindex.resize(points_count); 449 | cout << "Indexing started..." << endl; 450 | 451 | Multitable points_written_in_index(multiindex_.cell_edges.dimensions); 452 | int thread_points_count = points_count / THREADS_COUNT; 453 | boost::thread_group threads; 454 | for(int thread_id = 0; thread_id < THREADS_COUNT; ++thread_id) { 455 | PointId start_pid = thread_points_count * thread_id; 456 | threads.create_thread(boost::bind(&MultiIndexer::FillMultiIndexForSubset, this, points_filename, start_pid, 457 | thread_points_count, boost::cref(coarse_vocabs), 458 | boost::cref(fine_vocabs), mode, &points_written_in_index)); 459 | } 460 | threads.join_all(); 461 | cout << "Indexing finished..." << endl; 462 | } 463 | 464 | template 465 | void MultiIndexer::RestorePointsInCellsCountFromCourseQuantization(const string& points_filename, 466 | const int points_count, 467 | const vector& coarse_vocabs) { 468 | vector dimensions; 469 | for(int i = 0; i < multiplicity_; ++i) { 470 | dimensions.push_back(coarse_vocabs[i].size()); 471 | } 472 | point_in_cells_count_.Resize(dimensions); 473 | ifstream coarse_quantization_stream; 474 | coarse_quantization_stream.open(coarse_quantization_filename_.c_str(), ios::binary); 475 | if(!coarse_quantization_stream.good()) { 476 | throw std::logic_error("Bad input coarse quantizations stream"); 477 | } 478 | CoarseQuantization quantization(multiplicity_); 479 | for(PointId pid = 0; pid < points_count; ++pid) { 480 | if(pid % 100000 == 0) { 481 | cout << pid << endl; 482 | } 483 | for(int subspace_index = 0; subspace_index < multiplicity_; ++subspace_index) { 484 | coarse_quantization_stream.read((char*)&(quantization[subspace_index]), 485 | sizeof(ClusterId)); 486 | } 487 | int cell_global_index = point_in_cells_count_.GetCellGlobalIndex(quantization); 488 | point_in_cells_count_.table[cell_global_index] += 1; 489 | } 490 | } 491 | 492 | template 493 | void MultiIndexer::BuildMultiIndex(const string& points_filename, 494 | const string& metainfo_filename, 495 | const int points_count, 496 | const vector& coarse_vocabs, 497 | const vector& fine_vocabs, 498 | const RerankMode& mode, 499 | const bool build_coarse_quantization, 500 | const string& files_prefix, 501 | const string& coarse_quantization_filename) { 502 | InitParameters(fine_vocabs, mode, metainfo_filename); 503 | InitBlasStructures(coarse_vocabs); 504 | files_prefix_ = files_prefix; 505 | coarse_quantization_filename_ = coarse_quantization_filename; 506 | if(build_coarse_quantization) { 507 | PrepareCoarseQuantization(points_filename, points_count, coarse_vocabs); 508 | } else { 509 | RestorePointsInCellsCountFromCourseQuantization(points_filename, 510 | points_count, 511 | coarse_vocabs); 512 | } 513 | FillMultiIndex(points_filename, points_count, coarse_vocabs, fine_vocabs, mode); 514 | cout << "Multiindex created" << endl; 515 | SerializeMultiIndexFiles(); 516 | cout << "Multiindex serialized" << endl; 517 | } 518 | 519 | template 520 | void MultiIndexer::InitBlasStructures(const vector& coarse_vocabs) { 521 | coarse_vocabs_matrices_.resize(coarse_vocabs.size()); 522 | coarse_centroids_norms_.resize(coarse_vocabs.size(), vector(coarse_vocabs[0].size())); 523 | for(int coarse_id = 0; coarse_id < coarse_vocabs_matrices_.size(); ++coarse_id) { 524 | coarse_vocabs_matrices_[coarse_id] = new float[coarse_vocabs[0].size() * coarse_vocabs[0][0].size()]; 525 | for(int i = 0; i < coarse_vocabs[0].size(); ++i) { 526 | Coord norm = 0; 527 | for(int j = 0; j < coarse_vocabs[0][0].size(); ++j) { 528 | coarse_vocabs_matrices_[coarse_id][coarse_vocabs[0][0].size() * i + j] = coarse_vocabs[coarse_id][i][j]; 529 | norm += coarse_vocabs[coarse_id][i][j] * coarse_vocabs[coarse_id][i][j]; 530 | } 531 | coarse_centroids_norms_[coarse_id][i] = norm; 532 | } 533 | } 534 | } 535 | 536 | template<> 537 | inline void GetRecord (const Point& point, const PointId pid, 538 | const vector coarse_quantization, 539 | const vector& coarse_vocabs, 540 | PointId* result) { 541 | *result = pid; 542 | } 543 | 544 | inline void FillAdcInfo(const Point& point, const PointId pid, 545 | const vector& fine_vocabs, 546 | char* result) { 547 | int subvectors_count = fine_vocabs.size(); 548 | int subvector_dim = point.size() / subvectors_count; 549 | for(int subvector_index = 0; subvector_index < subvectors_count; ++subvector_index) { 550 | Dimensions start_dim = subvector_index * subvector_dim; 551 | Dimensions final_dim = start_dim + subvector_dim; 552 | *((FineClusterId*)result) = (FineClusterId)GetNearestClusterId(point, fine_vocabs[subvector_index], 553 | start_dim, final_dim); 554 | result += sizeof(FineClusterId); 555 | } 556 | } 557 | 558 | template<> 559 | inline void GetRecord (const Point& point, const PointId pid, 560 | const vector coarse_quantization, 561 | const vector& coarse_vocabs, 562 | RerankADC8* result) { 563 | result->pid = pid; 564 | char* rerank_info_ptr = (char*)result + sizeof(pid); 565 | if(gConfig.rerank_mode == USE_RESIDUALS) { 566 | Point residual; 567 | GetResidual(point, coarse_quantization, coarse_vocabs, &residual); 568 | FillAdcInfo(residual, pid, gConfig.fine_vocabs, rerank_info_ptr); 569 | } else if (gConfig.rerank_mode == USE_INIT_POINTS) { 570 | FillAdcInfo(point, pid, gConfig.fine_vocabs, rerank_info_ptr); 571 | } 572 | } 573 | 574 | template<> 575 | inline void GetRecord (const Point& point, const PointId pid, 576 | const vector coarse_quantization, 577 | const vector& coarse_vocabs, 578 | RerankADC16* result) { 579 | result->pid = pid; 580 | char* rerank_info_ptr = (char*)result + sizeof(pid); 581 | if(gConfig.rerank_mode == USE_RESIDUALS) { 582 | Point residual; 583 | GetResidual(point, coarse_quantization, coarse_vocabs, &residual); 584 | FillAdcInfo(residual, pid, gConfig.fine_vocabs, rerank_info_ptr); 585 | } else if (gConfig.rerank_mode == USE_INIT_POINTS) { 586 | FillAdcInfo(point, pid, gConfig.fine_vocabs, rerank_info_ptr); 587 | } 588 | } 589 | 590 | #endif 591 | 592 | 593 | 594 | 595 | -------------------------------------------------------------------------------- /indexer_launcher.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2012 Yandex Artem Babenko 2 | #include 3 | 4 | #include 5 | 6 | #include "indexer.h" 7 | 8 | using namespace boost::program_options; 9 | 10 | /** 11 | * Number of threads for indexing 12 | */ 13 | int THREADS_COUNT; 14 | /** 15 | * Type, should be BVEC or FVEC 16 | */ 17 | PointType point_type; 18 | /** 19 | * Number of coordinates in a point 20 | */ 21 | Dimensions SPACE_DIMENSION; 22 | /** 23 | * File with vocabularies for multiindex structure 24 | */ 25 | string coarse_vocabs_file; 26 | /** 27 | * File with vocabularies for reranking 28 | */ 29 | string fine_vocabs_file; 30 | /** 31 | * File with points to index 32 | */ 33 | string points_file; 34 | /** 35 | * File with points metainfo (imageId, etc.) 36 | */ 37 | string metainfo_file; 38 | /** 39 | * Reranking approach, should be USE_RESIDUALS or USE_INIT_POINTS 40 | */ 41 | RerankMode mode; 42 | /** 43 | * Common prefix of all multiindex files 44 | */ 45 | string files_prefix; 46 | /** 47 | * Should we calculate coarse quantizations (they can be precomputed) 48 | */ 49 | bool build_coarse_quantizations; 50 | /** 51 | * File with points coarse quantizations 52 | */ 53 | string coarse_quantizations_file; 54 | /** 55 | * How many points should we index 56 | */ 57 | int points_count; 58 | /** 59 | * Multiplicity of multiindex 60 | */ 61 | int multiplicity; 62 | 63 | int SetOptions(int argc, char** argv) { 64 | options_description description("Options"); 65 | description.add_options() 66 | ("threads_count,t", value()) 67 | ("multiplicity,m", value()) 68 | ("points_file,p", value()) 69 | ("metainfo_file,z", value()) 70 | ("coarse_vocabs_file,c", value()) 71 | ("fine_vocabs_file,f", value()) 72 | ("input_point_type,i", value()) 73 | ("build_coarse,b", bool_switch(), "Flag B") 74 | ("use_residuals,r", bool_switch(), "Flag R") 75 | ("points_count,p", value()) 76 | ("coarse_quantization_file,q", value()) 77 | ("space_dim,d", value()) 78 | ("files_prefix,_", value()); 79 | variables_map name_to_value; 80 | try { 81 | store(command_line_parser(argc, argv).options(description).run(), name_to_value); 82 | } catch (const invalid_command_line_syntax& inv_syntax) { 83 | switch (inv_syntax.kind()) { 84 | case invalid_syntax::missing_parameter : 85 | cout << "Missing argument for option '" << inv_syntax.tokens() << "'.\n"; 86 | break; 87 | default: 88 | cout << "Syntax error, kind " << int(inv_syntax.kind()) << "\n"; 89 | break; 90 | }; 91 | return 1; 92 | } catch (const unknown_option& unkn_option) { 93 | cout << "Unknown option '" << unkn_option.get_option_name() << "'\n"; 94 | return 1; 95 | } 96 | if (name_to_value.count("help")) { 97 | cout << description << "\n"; 98 | return 1; 99 | } 100 | 101 | THREADS_COUNT = name_to_value["threads_count"].as(); 102 | multiplicity = name_to_value["multiplicity"].as(); 103 | points_file = name_to_value["points_file"].as(); 104 | metainfo_file = name_to_value["metainfo_file"].as(); 105 | coarse_vocabs_file = name_to_value["coarse_vocabs_file"].as(); 106 | fine_vocabs_file = name_to_value["fine_vocabs_file"].as(); 107 | SPACE_DIMENSION = name_to_value["space_dim"].as(); 108 | files_prefix = name_to_value["files_prefix"].as(); 109 | points_count = name_to_value["points_count"].as(); 110 | 111 | build_coarse_quantizations = (name_to_value["build_coarse"].as() == true) ? true : false; 112 | mode = name_to_value["use_residuals"].as() == true ? USE_RESIDUALS : USE_INIT_POINTS; 113 | 114 | if (name_to_value.find("coarse_quantization_file") != name_to_value.end()) { 115 | coarse_quantizations_file = name_to_value["coarse_quantization_file"].as(); 116 | } 117 | if (name_to_value["input_point_type"].as() == "FVEC") { 118 | point_type = FVEC; 119 | } else if(name_to_value["input_point_type"].as() == "BVEC") { 120 | point_type = BVEC; 121 | } 122 | return 0; 123 | } 124 | 125 | int main(int argc, char** argv) { 126 | SetOptions(argc, argv); 127 | cout << "Options are set ...\n"; 128 | vector coarse_vocabs; 129 | vector fine_vocabs; 130 | ReadVocabularies(coarse_vocabs_file, SPACE_DIMENSION, &coarse_vocabs); 131 | ReadFineVocabs(fine_vocabs_file, &fine_vocabs); 132 | cout << "Vocs are read ...\n"; 133 | if(fine_vocabs.size() == 8) { 134 | MultiIndexer indexer(multiplicity); 135 | indexer.BuildMultiIndex(points_file, metainfo_file, points_count, coarse_vocabs, 136 | fine_vocabs, mode, build_coarse_quantizations, 137 | files_prefix, coarse_quantizations_file); 138 | } else if(fine_vocabs.size() == 16) { 139 | MultiIndexer indexer(multiplicity); 140 | indexer.BuildMultiIndex(points_file, metainfo_file, points_count, coarse_vocabs, 141 | fine_vocabs, mode, build_coarse_quantizations, 142 | files_prefix, coarse_quantizations_file); 143 | } 144 | return 0; 145 | } -------------------------------------------------------------------------------- /launch_indexer_double.sh: -------------------------------------------------------------------------------- 1 | cd build_master 2 | ./indexer_launcher \ 3 | --threads_count=32 \ 4 | --multiplicity=2 \ 5 | --points_file="/sata/ResearchData/BigAnn/bases/sift1M.bvecs" \ 6 | --coarse_vocabs_file="../sift1M_double_4096.dat" \ 7 | --fine_vocabs_file="../sift1M_double_4096_8.dat" \ 8 | --input_point_type="BVEC" \ 9 | --points_count=1000000 \ 10 | --space_dim=128 \ 11 | --files_prefix="/sata/ResearchData/BigAnn/indices/sift1M_double_4096_8" \ 12 | --coarse_quantization_file="/sata/ResearchData/BigAnn/cq/sift1M_double_4096_coarse_quantizations.bin" \ 13 | --metainfo_file="fake.txt" \ 14 | --use_residuals \ 15 | --build_coarse 16 | 17 | -------------------------------------------------------------------------------- /make_project.bat: -------------------------------------------------------------------------------- 1 | cd build 2 | del CMakeCache.txt 3 | cmake -DMAKE_ONLY=BUILD_ALL -G "Visual Studio 10 Win64" .. 4 | pause 5 | -------------------------------------------------------------------------------- /multitable.hpp: -------------------------------------------------------------------------------- 1 | /** @file */ 2 | // Copyright 2012 Yandex Artem Babenko 3 | #pragma once 4 | 5 | #include 6 | 7 | using std::vector; 8 | 9 | /** 10 | * This class implements interface of multidimensional array with 11 | * fast write/read operations. In fact data is stored in a long array. 12 | * Global index of particular item in this array is calculated from item coordinates. 13 | */ 14 | template 15 | struct Multitable { 16 | /** 17 | * This constructor gets width of table for each dimension 18 | * @param dimensions array of sizes of table along each dimension 19 | */ 20 | Multitable(const vector& dimensions = vector()); 21 | /** 22 | * This function resize the table to new dimensions 23 | * @param dimensions array of sizes of table along each dimension 24 | */ 25 | void Resize(const vector& dimensions, T value = T()); 26 | /** 27 | * This function sets value in one cell 28 | * @param value value to set 29 | * @param cell_indices coordinates of cell in the table 30 | */ 31 | void SetValue(T value, const vector& cell_indices); 32 | /** 33 | * This function gets value of one cell 34 | * @param cell_indices coordinates of cell in the table 35 | */ 36 | T GetValue(const vector& cell_indices); 37 | /** 38 | * Actual data as one-dimensional array 39 | */ 40 | vector table; 41 | /** 42 | * Dimensions of table 43 | */ 44 | vector dimensions; 45 | /** 46 | * Function for Boost.Serialization 47 | */ 48 | template 49 | void serialize(Archive& arc, unsigned int version) { 50 | arc & table; 51 | arc & dimensions; 52 | } 53 | /** 54 | * Function converts cell coordinates to global index in a long array 55 | * @param cell_indices coordinates of cell in the table 56 | */ 57 | int GetCellGlobalIndex(const vector& cell_indices) const; 58 | }; 59 | 60 | template 61 | int Multitable::GetCellGlobalIndex(const vector& indices) const { 62 | if(indices.empty()) { 63 | throw std::logic_error("Empty indices array!"); 64 | } 65 | int global_index = 0; 66 | int subtable_capacity = table.size(); 67 | for(int dimension_index = 0; dimension_index < dimensions.size(); ++dimension_index) { 68 | subtable_capacity = subtable_capacity / dimensions[dimension_index]; 69 | global_index += subtable_capacity * indices[dimension_index]; 70 | } 71 | return global_index; 72 | } 73 | 74 | template 75 | void Multitable::Resize(const vector& new_dimensions, T value) { 76 | int table_size = 1; 77 | dimensions = new_dimensions; 78 | for(int dimension_index = 0; dimension_index < new_dimensions.size(); ++dimension_index) { 79 | table_size *= new_dimensions[dimension_index]; 80 | } 81 | table.resize(table_size, value); 82 | } 83 | 84 | template 85 | Multitable::Multitable(const vector& dimensions) { 86 | Resize(dimensions); 87 | } 88 | 89 | template 90 | void Multitable::SetValue(T value, const vector& indices) { 91 | int global_index = GetCellGlobalIndex(indices); 92 | table.at(global_index) = value; 93 | } 94 | 95 | template 96 | T Multitable::GetValue(const vector& indices) { 97 | int global_index = GetCellGlobalIndex(indices); 98 | return table.at(global_index); 99 | } -------------------------------------------------------------------------------- /ordered_lists_merger.h: -------------------------------------------------------------------------------- 1 | /** @file */ 2 | // Copyright 2012 Yandex Artem Babenko 3 | #pragma once 4 | 5 | #include "data_util.h" 6 | #include "multitable.hpp" 7 | 8 | /** 9 | * \typedef 10 | * Typedef for indices of merged list element 11 | */ 12 | typedef vector MergedItemIndices; 13 | 14 | /** 15 | * This class merges any number of ordered lists and yields 16 | * elements of merged list in Order-increasing order. 17 | * Initial lists are ordered by Order. 18 | * This class is used in multilist algorithm to get coordinates of cells in multiindex. 19 | * Class does not return the whole result list, it just yields by one item. 20 | * Class also assumes that input lists are the same length 21 | */ 22 | template 23 | class OrderedListsMerger { 24 | public: 25 | /** 26 | * This constructor inits merger for input lists 27 | */ 28 | OrderedListsMerger(); 29 | /** 30 | * This function sets lists to merge 31 | * @param lists input lists to merge (must be ordered) 32 | */ 33 | void setLists(const vector > >& lists); 34 | /** 35 | * This function yields indices if next item of merged list. 36 | * It returns "false" if all items have been already yielded and "true" otherwise 37 | * @param merged_item_indices result indices 38 | */ 39 | inline bool GetNextMergedItemIndices(MergedItemIndices* merged_item_indices); 40 | /** 41 | * Pointer to input lists 42 | */ 43 | const vector > >* lists_ptr; 44 | /** 45 | * Getter 46 | */ 47 | Multitable& GetYieldedItems() { 48 | return yielded_items_indices_; 49 | } 50 | private: 51 | /** 52 | * This function pushes new item into priority queue 53 | * @param merged_item_indices indices of item to add 54 | */ 55 | void InsertMergedItemIndicesInHeap(const MergedItemIndices& merged_item_indices); 56 | /** 57 | * This function tries to update priority queue after yielding 58 | * @param merged_item_indices new indices we should try to push in priority queue 59 | */ 60 | void UpdatePrioirityQueue(MergedItemIndices& merged_item_indices); 61 | /** 62 | * Proirity queue for multilist algorithm 63 | */ 64 | multimap heap_; 65 | /** 66 | * Table with "1"-value for yielded items and "0"-value otherwise 67 | */ 68 | Multitable yielded_items_indices_; 69 | }; 70 | 71 | //////////////////// IMPLEMENTATION ////////////////////////////////////////// 72 | 73 | template 74 | OrderedListsMerger::OrderedListsMerger() { 75 | } 76 | 77 | template 78 | void OrderedListsMerger::InsertMergedItemIndicesInHeap(const MergedItemIndices& merged_item_indices) { 79 | OrderType sum = 0; 80 | for(int list_index = 0; list_index < lists_ptr->size(); ++list_index) { 81 | sum += lists_ptr->at(list_index)[merged_item_indices[list_index]].first; 82 | } 83 | heap_.insert(std::make_pair(sum, merged_item_indices)); 84 | } 85 | 86 | template 87 | void OrderedListsMerger::setLists(const vector > >& lists) { 88 | lists_ptr = &lists; 89 | heap_.clear(); 90 | MergedItemIndices first_item_indices(lists.size()); 91 | for(int list_index = 0; list_index < lists.size(); ++list_index) { 92 | first_item_indices[list_index] = 0; 93 | } 94 | memset(&(yielded_items_indices_.table[0]), 0, yielded_items_indices_.table.size()); 95 | InsertMergedItemIndicesInHeap(first_item_indices); 96 | } 97 | 98 | template 99 | void OrderedListsMerger::UpdatePrioirityQueue(MergedItemIndices& merged_item_indices) { 100 | for(int list_index = 0; list_index < lists_ptr->size(); ++list_index) { 101 | if(merged_item_indices[list_index] >= lists_ptr->at(list_index).size()) { 102 | return; 103 | } 104 | int current_index = merged_item_indices[list_index]; 105 | merged_item_indices[list_index] -= 1; 106 | if(current_index > 0 && !yielded_items_indices_.GetValue(merged_item_indices)) { 107 | merged_item_indices[list_index] += 1; 108 | return; 109 | } else { 110 | merged_item_indices[list_index] += 1; 111 | } 112 | } 113 | InsertMergedItemIndicesInHeap(merged_item_indices); 114 | } 115 | 116 | template 117 | inline bool OrderedListsMerger::GetNextMergedItemIndices(MergedItemIndices* next_merged_item_indices) { 118 | if(heap_.empty()) { 119 | return false; 120 | } 121 | *next_merged_item_indices = heap_.begin()->second; 122 | yielded_items_indices_.SetValue(1, *next_merged_item_indices); 123 | for(int list_index = 0; list_index < lists_ptr->size(); ++list_index) { 124 | next_merged_item_indices->at(list_index) += 1; 125 | UpdatePrioirityQueue(*next_merged_item_indices); 126 | next_merged_item_indices->at(list_index) -= 1; 127 | } 128 | heap_.erase(heap_.begin()); 129 | return true; 130 | } 131 | 132 | template class OrderedListsMerger; 133 | template class OrderedListsMerger >; -------------------------------------------------------------------------------- /perfomance_util.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2012 Yandex Artem Babenko 2 | #include "perfomance_util.h" 3 | 4 | extern string report_file; 5 | 6 | PerfTester::PerfTester() { 7 | report_file_ = report_file; 8 | current_points_count = 0; 9 | handled_queries_count = 0; 10 | cells_traversed = 0; 11 | nearest_subcentroids_time = 0; 12 | cache_init_time = 0; 13 | merger_init_time = 0; 14 | full_traversal_time = 0; 15 | cell_coordinates_time = 0; 16 | cell_edges_time = 0; 17 | residual_time = 0; 18 | refining_time = 0; 19 | full_search_time = 0; 20 | 21 | for(int i = 0; i < 21; ++i) { 22 | list_length_thresholds_.push_back(std::pow(2.0, i)); 23 | } 24 | current_threshold_index_ = 0; 25 | list_length_times_.resize(list_length_thresholds_.size(), 0.0); 26 | } 27 | 28 | void PerfTester::ResetQuerywiseStatistic() { 29 | current_threshold_index_ = 0; 30 | current_points_count = 0; 31 | } 32 | 33 | void PerfTester::NextNeighbour() { 34 | ++current_points_count; 35 | if(current_points_count >= list_length_thresholds_[current_threshold_index_]) { 36 | clock_t current_time = clock(); 37 | list_length_times_[current_threshold_index_] += current_time - search_start; 38 | ++current_threshold_index_; 39 | } 40 | } 41 | 42 | void PerfTester::DoReport(std::ofstream& out) { 43 | out << "Queries count: " 44 | << handled_queries_count << endl; 45 | out << "Average cells count: " 46 | << (double)cells_traversed / handled_queries_count << endl; 47 | out << "Average nearest subcentroids getting time: " 48 | << (double)nearest_subcentroids_time / handled_queries_count << endl; 49 | out << "Average cache init time: " 50 | << (double)cache_init_time / handled_queries_count << endl; 51 | out << "Average merger init time: " 52 | << (double)merger_init_time / handled_queries_count << endl; 53 | out << "Average full traversal time: " 54 | << (double)full_traversal_time / handled_queries_count << endl; 55 | out << "Average cells coordinates getting time: " 56 | << (double)cell_coordinates_time / handled_queries_count << endl; 57 | out << "Average cell edges getting time: " 58 | << (double)cell_edges_time/ handled_queries_count << endl; 59 | out << "Average residual time: " 60 | << (double)residual_time / handled_queries_count << endl; 61 | out << "Average refining time: " 62 | <<(double)refining_time / handled_queries_count << endl; 63 | out << "Average full search time: " 64 | << (double)full_search_time / handled_queries_count << endl; 65 | } 66 | 67 | void PerfTester::DoReport() { 68 | std::ofstream out(report_file_.c_str()); 69 | DoReport(out); 70 | } 71 | 72 | int GetRecallAt(const int length, const vector& groundtruth, 73 | const vector& result) { 74 | if(groundtruth.empty()) { 75 | cout << "Groundtruth is empty!" << endl; 76 | return 0; 77 | } 78 | for(int index = 0; index < length && index < result.size(); ++index) { 79 | if(result[index].second == groundtruth[0]) { 80 | return 1; 81 | } 82 | } 83 | return 0; 84 | } 85 | 86 | double GetPresicionAt(const int length, const set& groundtruth, 87 | const vector& result) { 88 | int found = 0; 89 | for(int index = 0; index < length && index < result.size() ; ++index) { 90 | if(groundtruth.find(result[index].second) != groundtruth.end()) { 91 | found += 1; 92 | } 93 | } 94 | return (double)found / length; 95 | } 96 | 97 | double GetRecall(const vector& groundtruth, 98 | const vector& result) { 99 | if(groundtruth.empty()) { 100 | cout << "Groundtruth is empty!" << endl; 101 | return 0; 102 | } 103 | std::set returned_points; 104 | for(int i = 0; i < result.size(); ++i) { 105 | returned_points.insert(result[i].second); 106 | } 107 | double found = 0.0; 108 | for(int index = 0; index < groundtruth.size(); ++index) { 109 | if(returned_points.find(groundtruth[index]) != returned_points.end()) { 110 | found += 1; 111 | } 112 | } 113 | return found / groundtruth.size(); 114 | } -------------------------------------------------------------------------------- /perfomance_util.h: -------------------------------------------------------------------------------- 1 | /** @file */ 2 | // Copyright 2012 Yandex Artem Babenko 3 | #include 4 | #include 5 | #include 6 | 7 | #include "data_util.h" 8 | 9 | using std::cout; 10 | using std::endl; 11 | using std::ofstream; 12 | using std::pair; 13 | using std::set; 14 | using std::vector; 15 | 16 | /** 17 | * \typedef 18 | * Typedef for point identifier and distance from query 19 | */ 20 | typedef pair DistanceToPoint; 21 | 22 | /** 23 | * This simple class stores timing of search working process 24 | */ 25 | class PerfTester { 26 | public: 27 | PerfTester(); 28 | /** 29 | * Number of neighbours already found 30 | */ 31 | int current_points_count; 32 | /** 33 | * Pretty report of timing 34 | */ 35 | void DoReport(); 36 | /** 37 | * Reset all prevoius statistic before 38 | * new query handling 39 | */ 40 | void ResetQuerywiseStatistic(); 41 | /** 42 | * Signal about next point 43 | */ 44 | void NextNeighbour(); 45 | /** 46 | * Number of handled queries 47 | */ 48 | int handled_queries_count; 49 | /** 50 | * Number of traversed items of multiindex 51 | */ 52 | int cells_traversed; 53 | unsigned long long nearest_subcentroids_time; 54 | unsigned long long cache_init_time; 55 | unsigned long long merger_init_time; 56 | unsigned long long full_traversal_time; 57 | unsigned long long cell_coordinates_time; 58 | unsigned long long cell_edges_time; 59 | unsigned long long residual_time; 60 | unsigned long long refining_time; 61 | unsigned long long full_search_time; 62 | unsigned long long search_start; 63 | private: 64 | string report_file_; 65 | void DoReport(ofstream& out); 66 | vector list_length_thresholds_; 67 | int current_threshold_index_; 68 | vector list_length_times_; 69 | }; 70 | 71 | /** 72 | * This function returns recall at specified length 73 | * @param length specified size of search results 74 | * @param groundtruth groundtruth 75 | * @param result search results 76 | */ 77 | int GetRecallAt(const int length, const vector& groundtruth, 78 | const vector& result); 79 | /** 80 | * This function returns precision at specified length 81 | * @param length specified size of search results 82 | * @param groundtruth groundtruth 83 | * @param result search results 84 | */ 85 | double GetPresicionAt(const int length, const set& groundtruth, 86 | const vector& result); 87 | 88 | /** 89 | * This function returns recall at full length 90 | */ 91 | double GetRecall(const vector& groundtruth, 92 | const vector& result); -------------------------------------------------------------------------------- /run_indexer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | import datetime 4 | import sys 5 | 6 | ############# launch configuration ################# 7 | 8 | # folder to keep built binaries in 9 | build_folder = './build_master' 10 | 11 | # number of threads to use (max = 32) 12 | threads_count = 32 13 | 14 | # Multi-1 or Multi-2 or Multi-4 15 | multiplicity = 2 16 | 17 | # Folder with BigAnn base 18 | bigann_root = '/sata/ResearchData/BigAnn' 19 | 20 | # input point type (BVEC or FVEC) 21 | input_type = 'BVEC' 22 | 23 | # file with input point (.bvecs or .fvecs) 24 | points_file = 'sift1M.bvecs' 25 | 26 | # prefix of all vocabs, coarse quantizations, etc. 27 | prefix = 'sift1M' 28 | 29 | # input points count 30 | points_count = 1000000 31 | 32 | # dimension of input space 33 | space_dim = 128 34 | 35 | # coarse vocabs size 36 | coarse_vocabs_size = 16384 37 | 38 | # fine vocabs count 39 | fine_vocabs_count = 8 40 | 41 | # should we use residuals? 42 | use_residuals = 1 43 | 44 | # should we calculate coarse quantizations? 45 | build_coarse = 1 46 | 47 | # postfix added by users to all multiindex files 48 | user_added_postfix = '' 49 | 50 | ################################################## 51 | 52 | multiplicity_extension = '' 53 | if multiplicity == 1: 54 | multiplicity_extension = 'single' 55 | if multiplicity == 2: 56 | multiplicity_extension = 'double' 57 | if multiplicity == 4: 58 | multiplicity_extension = 'quad' 59 | 60 | coarse_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '.dat' 61 | fine_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + '.dat' 62 | filename_prefix = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + user_added_postfix 63 | coarse_quantization_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + user_added_postfix + '_coarse_quantizations.bin' 64 | 65 | launch_time = datetime.datetime.now().strftime("%I_%M%p_%B_%d_%Y") 66 | os.system('mkdir -p ' + build_folder + '/' + launch_time) 67 | os.system('cp ' + build_folder + '/indexer_launcher ' + build_folder + '/' + launch_time) 68 | os.system('cp run_indexer.py ' + build_folder + '/' + launch_time) 69 | 70 | launch_line = build_folder + '/' + launch_time + '/indexer_launcher ' 71 | launch_line = launch_line + '--threads_count=' + str(threads_count) + ' ' 72 | launch_line = launch_line + '--multiplicity=' + str(multiplicity) + ' ' 73 | launch_line = launch_line + '--points_file=' + bigann_root + '/bases/' + points_file + ' ' 74 | launch_line = launch_line + '--coarse_vocabs_file=' + bigann_root + '/coarse_vocabs/' + coarse_vocabs_filename + ' ' 75 | launch_line = launch_line + '--fine_vocabs_file=' + bigann_root + '/fine_vocabs/' + fine_vocabs_filename + ' ' 76 | launch_line = launch_line + '--input_point_type=' + input_type + ' ' 77 | launch_line = launch_line + '--points_count=' + str(points_count) + ' ' 78 | launch_line = launch_line + '--space_dim=' + str(space_dim) + ' ' 79 | launch_line = launch_line + '--files_prefix=' + bigann_root + '/indices/' + filename_prefix + ' ' 80 | launch_line = launch_line + '--coarse_quantization_file=' + bigann_root + '/cq/' + coarse_quantization_filename + ' ' 81 | launch_line = launch_line + '--metainfo_file=fake.txt' + ' ' 82 | if use_residuals: 83 | launch_line = launch_line + '--use_residuals' + ' ' 84 | if build_coarse: 85 | launch_line = launch_line + '--build_coarse' + ' ' 86 | 87 | f = open(build_folder + '/' + launch_time + '/launch.sh', 'w') 88 | f.write(launch_line) 89 | f.close() 90 | os.system('nohup ' + launch_line + ' > ' + build_folder + '/' + launch_time + '/log.txt' + ' &') 91 | print 'Log file: ' + build_folder + '/' + launch_time + '/log.txt' 92 | -------------------------------------------------------------------------------- /run_indexer_vlad.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | import datetime 4 | import sys 5 | 6 | ############# launch configuration ################# 7 | 8 | # folder to keep built binaries in 9 | build_folder = './build_master' 10 | 11 | # number of threads to use (max = 32) 12 | threads_count = 32 13 | 14 | # Multi-1 or Multi-2 or Multi-4 15 | multiplicity = 2 16 | 17 | # Folder with BigAnn base 18 | bigann_root = '/sata/ResearchData/BigAnn' 19 | 20 | # input point type (BVEC or FVEC) 21 | input_type = 'FVEC' 22 | 23 | # file with input point (.bvecs or .fvecs) 24 | points_file = 'vlad500K_base.fvecs' 25 | 26 | # prefix of all vocabs, coarse quantizations, etc. 27 | prefix = 'vlad500K' 28 | 29 | # input points count 30 | points_count = 500000 31 | 32 | # dimension of input space 33 | space_dim = 128 34 | 35 | # coarse vocabs size 36 | coarse_vocabs_size = 4096 37 | 38 | # fine vocabs count 39 | fine_vocabs_count = 8 40 | 41 | # should we use residuals? 42 | use_residuals = 1 43 | 44 | # should we calculate coarse quantizations? 45 | build_coarse = 1 46 | 47 | # postfix added by users to all multiindex files 48 | user_added_postfix = '' 49 | 50 | ################################################## 51 | 52 | multiplicity_extension = '' 53 | if multiplicity == 1: 54 | multiplicity_extension = 'single' 55 | if multiplicity == 2: 56 | multiplicity_extension = 'double' 57 | if multiplicity == 4: 58 | multiplicity_extension = 'quad' 59 | 60 | coarse_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '.dat' 61 | fine_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + '.dat' 62 | filename_prefix = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + user_added_postfix 63 | coarse_quantization_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + user_added_postfix + '_coarse_quantizations.bin' 64 | 65 | launch_time = datetime.datetime.now().strftime("%I_%M%p_%B_%d_%Y") 66 | os.system('mkdir -p ' + build_folder + '/' + launch_time) 67 | os.system('cp ' + build_folder + '/indexer_launcher ' + build_folder + '/' + launch_time) 68 | os.system('cp run_indexer_vlad.py ' + build_folder + '/' + launch_time) 69 | 70 | launch_line = build_folder + '/' + launch_time + '/indexer_launcher ' 71 | launch_line = launch_line + '--threads_count=' + str(threads_count) + ' ' 72 | launch_line = launch_line + '--multiplicity=' + str(multiplicity) + ' ' 73 | launch_line = launch_line + '--points_file=' + bigann_root + '/bases/' + points_file + ' ' 74 | launch_line = launch_line + '--coarse_vocabs_file=' + bigann_root + '/coarse_vocabs/' + coarse_vocabs_filename + ' ' 75 | launch_line = launch_line + '--fine_vocabs_file=' + bigann_root + '/fine_vocabs/' + fine_vocabs_filename + ' ' 76 | launch_line = launch_line + '--input_point_type=' + input_type + ' ' 77 | launch_line = launch_line + '--points_count=' + str(points_count) + ' ' 78 | launch_line = launch_line + '--space_dim=' + str(space_dim) + ' ' 79 | launch_line = launch_line + '--files_prefix=' + bigann_root + '/indices/' + filename_prefix + ' ' 80 | launch_line = launch_line + '--coarse_quantization_file=' + bigann_root + '/cq/' + coarse_quantization_filename + ' ' 81 | launch_line = launch_line + '--metainfo_file=fake.txt' + ' ' 82 | if use_residuals: 83 | launch_line = launch_line + '--use_residuals' + ' ' 84 | if build_coarse: 85 | launch_line = launch_line + '--build_coarse' + ' ' 86 | 87 | f = open(build_folder + '/' + launch_time + '/launch.sh', 'w') 88 | f.write(launch_line) 89 | f.close() 90 | os.system('nohup ' + launch_line + ' > ' + build_folder + '/' + launch_time + '/log.txt' + ' &') 91 | print 'Log file: ' + build_folder + '/' + launch_time + '/log.txt' 92 | -------------------------------------------------------------------------------- /run_searcher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | import datetime 4 | 5 | ############# launch configuration ################# 6 | 7 | # folder to keep built binaries in 8 | build_folder = './build_master' 9 | 10 | 11 | # Folder with BigAnn base 12 | bigann_root = '/sata/ResearchData/BigAnn' 13 | 14 | # input query point type (BVEC or FVEC) 15 | query_input_type = 'BVEC' 16 | 17 | # Multi-1 or Multi-2 or Multi-4 18 | multiplicity = 2 19 | 20 | # prefix of all vocabs, coarse quantizations, etc. 21 | prefix = 'sift1M' 22 | 23 | # dimension of input space 24 | space_dim = 128 25 | 26 | # coarse vocabs size 27 | coarse_vocabs_size = 4096 28 | 29 | # fine vocabs count 30 | fine_vocabs_count = 8 31 | 32 | # should we use residuals? 33 | use_residuals = 1 34 | 35 | # number of centroids handled in each subdimension 36 | subspace_centroids_count = 1024 37 | 38 | # queries file 39 | queries_file = 'sift1B_queries.bvecs' 40 | 41 | # groundtruth file 42 | gnd_file = 'sift1M_groundtruth.ivecs' 43 | 44 | # number of queries 45 | queries_count = 1000 46 | 47 | # number of neighbors to seek 48 | neighbors_count = 10000 49 | 50 | # should we rerank ? 51 | do_rerank = 1 52 | 53 | # postfix added by users to all multiindex files 54 | user_added_postfix = '' 55 | 56 | ################################################## 57 | 58 | multiplicity_extension = '' 59 | if multiplicity == 1: 60 | multiplicity_extension = 'single' 61 | if multiplicity == 2: 62 | multiplicity_extension = 'double' 63 | if multiplicity == 4: 64 | multiplicity_extension = 'quad' 65 | 66 | coarse_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '.dat' 67 | fine_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + '.dat' 68 | filename_prefix = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + user_added_postfix 69 | 70 | launch_time = datetime.datetime.now().strftime("%I_%M_%S%p_%B_%d_%Y") 71 | os.system('mkdir -p ' + build_folder + '/' + launch_time) 72 | os.system('cp ' + build_folder + '/searcher_tester ' + build_folder + '/' + launch_time) 73 | os.system('cp run_searcher.py ' + build_folder + '/' + launch_time) 74 | report_filename = build_folder + '/' + launch_time + '/report' 75 | 76 | launch_line = build_folder + '/' + launch_time + '/searcher_tester ' 77 | launch_line = launch_line + '--queries_file=' + bigann_root + '/bases/' + queries_file + ' ' 78 | launch_line = launch_line + '--groundtruth_file=' + bigann_root + '/gnd/' + gnd_file + ' ' 79 | launch_line = launch_line + '--coarse_vocabs_file=' + bigann_root + '/coarse_vocabs/' + coarse_vocabs_filename + ' ' 80 | launch_line = launch_line + '--fine_vocabs_file=' + bigann_root + '/fine_vocabs/' + fine_vocabs_filename + ' ' 81 | launch_line = launch_line + '--query_point_type=' + query_input_type + ' ' 82 | launch_line = launch_line + '--queries_count=' + str(queries_count) + ' ' 83 | launch_line = launch_line + '--neighbours_count=' + str(neighbors_count) + ' ' 84 | launch_line = launch_line + '--subspaces_centroids_count=' + str(subspace_centroids_count) + ' ' 85 | launch_line = launch_line + '--space_dim=' + str(space_dim) + ' ' 86 | launch_line = launch_line + '--index_files_prefix=' + bigann_root + '/indices/' + filename_prefix + ' ' 87 | launch_line = launch_line + '--report_file=' + report_filename + ' ' 88 | if use_residuals: 89 | launch_line = launch_line + '--use_residuals' + ' ' 90 | if do_rerank: 91 | launch_line = launch_line + '--do_rerank' + ' ' 92 | 93 | f = open(build_folder + '/' + launch_time + '/launch.sh', 'w') 94 | f.write(launch_line) 95 | f.close() 96 | log_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + '_' + str(neighbors_count) + '.txt' 97 | os.system('nohup ' + launch_line + ' > ' + build_folder + '/' + launch_time + '/' + log_filename + ' &') 98 | print 'Log file: ' + build_folder + '/' + launch_time + '/' + log_filename 99 | -------------------------------------------------------------------------------- /run_searcher_vlad.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | import datetime 4 | 5 | ############# launch configuration ################# 6 | 7 | # folder to keep built binaries in 8 | build_folder = './build_master' 9 | 10 | 11 | # Folder with BigAnn base 12 | bigann_root = '/sata/ResearchData/BigAnn' 13 | 14 | # input query point type (BVEC or FVEC) 15 | query_input_type = 'FVEC' 16 | 17 | # Multi-1 or Multi-2 or Multi-4 18 | multiplicity = 2 19 | 20 | # prefix of all vocabs, coarse quantizations, etc. 21 | prefix = 'vlad500K' 22 | 23 | # dimension of input space 24 | space_dim = 128 25 | 26 | # coarse vocabs size 27 | coarse_vocabs_size = 4096 28 | 29 | # fine vocabs count 30 | fine_vocabs_count = 8 31 | 32 | # should we use residuals? 33 | use_residuals = 1 34 | 35 | # number of centroids handled in each subdimension 36 | subspace_centroids_count = 4096 37 | 38 | # queries file 39 | queries_file = 'vlad1M_queries.fvecs' 40 | 41 | # groundtruth file 42 | gnd_file = 'vlad500K_groundtruth.ivecs' 43 | 44 | # number of queries 45 | queries_count = 1000 46 | 47 | # number of neighbors to seek 48 | neighbors_count = 10000 49 | 50 | # should we rerank ? 51 | do_rerank = 0 52 | 53 | # postfix added by users to all multiindex files 54 | user_added_postfix = '' 55 | 56 | ################################################## 57 | 58 | multiplicity_extension = '' 59 | if multiplicity == 1: 60 | multiplicity_extension = 'single' 61 | if multiplicity == 2: 62 | multiplicity_extension = 'double' 63 | if multiplicity == 4: 64 | multiplicity_extension = 'quad' 65 | 66 | coarse_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '.dat' 67 | fine_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + '.dat' 68 | filename_prefix = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + user_added_postfix 69 | 70 | launch_time = datetime.datetime.now().strftime("%I_%M_%S%p_%B_%d_%Y") 71 | os.system('mkdir -p ' + build_folder + '/' + launch_time) 72 | os.system('cp ' + build_folder + '/searcher_tester ' + build_folder + '/' + launch_time) 73 | os.system('cp run_searcher_vlad.py ' + build_folder + '/' + launch_time) 74 | report_filename = build_folder + '/' + launch_time + '/report' 75 | 76 | launch_line = build_folder + '/' + launch_time + '/searcher_tester ' 77 | launch_line = launch_line + '--queries_file=' + bigann_root + '/bases/' + queries_file + ' ' 78 | launch_line = launch_line + '--groundtruth_file=' + bigann_root + '/gnd/' + gnd_file + ' ' 79 | launch_line = launch_line + '--coarse_vocabs_file=' + bigann_root + '/coarse_vocabs/' + coarse_vocabs_filename + ' ' 80 | launch_line = launch_line + '--fine_vocabs_file=' + bigann_root + '/fine_vocabs/' + fine_vocabs_filename + ' ' 81 | launch_line = launch_line + '--query_point_type=' + query_input_type + ' ' 82 | launch_line = launch_line + '--queries_count=' + str(queries_count) + ' ' 83 | launch_line = launch_line + '--neighbours_count=' + str(neighbors_count) + ' ' 84 | launch_line = launch_line + '--subspaces_centroids_count=' + str(subspace_centroids_count) + ' ' 85 | launch_line = launch_line + '--space_dim=' + str(space_dim) + ' ' 86 | launch_line = launch_line + '--index_files_prefix=' + bigann_root + '/indices/' + filename_prefix + ' ' 87 | launch_line = launch_line + '--report_file=' + report_filename + ' ' 88 | if use_residuals: 89 | launch_line = launch_line + '--use_residuals' + ' ' 90 | if do_rerank: 91 | launch_line = launch_line + '--do_rerank' + ' ' 92 | 93 | f = open(build_folder + '/' + launch_time + '/launch.sh', 'w') 94 | f.write(launch_line) 95 | f.close() 96 | log_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + '_' + str(neighbors_count) + '.txt' 97 | os.system('nohup ' + launch_line + ' > ' + build_folder + '/' + launch_time + '/' + log_filename + ' &') 98 | print 'Log file: ' + build_folder + '/' + launch_time + '/' + log_filename 99 | -------------------------------------------------------------------------------- /searcher.h: -------------------------------------------------------------------------------- 1 | /** @file */ 2 | // Copyright 2012 Yandex Artem Babenko 3 | #ifndef SEARCHER_H_ 4 | #define SEARCHER_H_ 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | #include "data_util.h" 19 | #include "ordered_lists_merger.h" 20 | #include "perfomance_util.h" 21 | 22 | extern int THREADS_COUNT; 23 | 24 | extern Dimensions SPACE_DIMENSION; 25 | 26 | extern enum PointType point_type; 27 | 28 | /** 29 | * \typedef This typedef is used in the first stage of search when 30 | * we get nearest centroids for each coarse subpace 31 | */ 32 | typedef vector > NearestSubspaceCentroids; 33 | 34 | /** 35 | * This is the main class for nearest neighbour search using multiindex 36 | */ 37 | template 38 | class MultiSearcher { 39 | public: 40 | /** 41 | * Default constructor 42 | */ 43 | MultiSearcher(); 44 | /** 45 | * Initiation function 46 | * @param index_files_prefix prefix of multiindex files providing the search 47 | * @param coarse_vocabs_filename file with coarse vocabs 48 | * @param fine_vocabs_filename file with fine vocabs for reranking 49 | * @param mode reranking approach 50 | * @param do_rerank should algorithm rerank short list or not 51 | */ 52 | void Init(const string& index_files_prefix, 53 | const string& coarse_vocabs_filename, 54 | const string& fine_vocabs_filename, 55 | const RerankMode& mode, 56 | const int subspace_centroids_to_consider, 57 | bool do_rerank); 58 | /** 59 | * Main interface function 60 | * @param point query point 61 | * @param k number of neighbours to get 62 | * @param subpace_centroids_to_consider it defines the size of working index table 63 | * @param neighbours result - vector of point identifiers ordered by increasing of distance to query 64 | */ 65 | void GetNearestNeighbours(const Point& point, int k, 66 | vector >* neighbours) const; 67 | /** 68 | * Returns searcher perfomance tester 69 | */ 70 | PerfTester& GetPerfTester(); 71 | private: 72 | /** 73 | * This functions deserializes all structures for search 74 | * @param index_files_prefix prefix of multiindex files providing the search 75 | * @param coarse_vocabs_filename file with coarse vocabs 76 | * @param fine_vocabs_filename file with fine vocabs for reranking 77 | */ 78 | void DeserializeData(const string& index_files_prefix, 79 | const string& coarse_vocabs_filename, 80 | const string& fine_vocabs_filename); 81 | /** 82 | * Function gets some nearest centroids for each coarse subspace 83 | * @param point query point 84 | * @param subspace_centroins_count how many nearest subcentroids to get 85 | * @param subspaces_short_lists result 86 | */ 87 | void GetNearestSubspacesCentroids(const Point& point, 88 | const int subspace_centroins_count, 89 | vector* subspaces_short_lists) const; 90 | 91 | /** 92 | * This fuctions traverses another cell of multiindex table 93 | * @param point query point 94 | * @param nearest_subpoints vector algorithm adds nearest neighbours in 95 | */ 96 | bool TraverseNextMultiIndexCell(const Point& point, 97 | vector >* nearest_subpoints) const; 98 | /** 99 | * This fuctions converts cells coordinates to appropriate range in array 100 | * @param cell_coordinates coordinates of the cell 101 | * @param cell_start first index of range 102 | * @param cell_finish last index of range 103 | */ 104 | inline void GetCellEdgesInMultiIndexArray(const vector& cell_coordinates, 105 | int* cell_start, int* cell_finish) const; 106 | /** 107 | * This fuctions converts complex objects to arrays and 108 | * pointers for usage in BLAS 109 | */ 110 | void InitBlasStructures(); 111 | /** 112 | * Lists of coarse centroids 113 | */ 114 | vector coarse_vocabs_; 115 | /** 116 | * Lists of fine centroids 117 | */ 118 | vector fine_vocabs_; 119 | /** 120 | * Merger for ordered merging subspaces centroids lists 121 | */ 122 | mutable OrderedListsMerger merger_; 123 | /** 124 | * Should algorithm use reranking or not 125 | */ 126 | bool do_rerank_; 127 | /** 128 | * Searcher perfomance tester 129 | */ 130 | mutable PerfTester perf_tester_; 131 | /** 132 | * Common prefix of every index files 133 | */ 134 | string index_files_prefix_; 135 | /** 136 | * Multiindex data structures 137 | */ 138 | MultiIndex multiindex_; 139 | /** 140 | * Reranking approach 141 | */ 142 | RerankMode rerank_mode_; 143 | /** 144 | * Struct for BLAS 145 | */ 146 | vector coarse_vocabs_matrices_; 147 | /** 148 | * Struct for BLAS 149 | */ 150 | vector > coarse_centroids_norms_; 151 | /** 152 | * Struct for BLAS 153 | */ 154 | mutable Coord* products_; 155 | /** 156 | * Struct for BLAS 157 | */ 158 | mutable vector query_norms_; 159 | /** 160 | * Struct for BLAS 161 | */ 162 | mutable float* residual_; 163 | /** 164 | * Number of nearest to query centroids 165 | * to consider for each dimension 166 | */ 167 | int subspace_centroids_to_consider_; 168 | /** 169 | * Number of neighbours found to this moment 170 | */ 171 | mutable int found_neghbours_count_; 172 | }; 173 | 174 | template 175 | inline void RecordToMetainfoAndDistance(const Coord* point, 176 | const Record& record, 177 | pair* result, 178 | const vector& cell_coordinates, 179 | const vector& fine_vocabs) { 180 | } 181 | 182 | /////////////// IMPLEMENTATION ///////////////////// 183 | 184 | template 185 | MultiSearcher::MultiSearcher() { 186 | } 187 | 188 | template 189 | void MultiSearcher::DeserializeData(const string& index_files_prefix, 190 | const string& coarse_vocabs_filename, 191 | const string& fine_vocabs_filename) { 192 | cout << "Data deserializing started...\n"; 193 | ifstream cell_edges(string(index_files_prefix + "_cell_edges.bin").c_str(), ios::binary); 194 | if(!cell_edges.good()) { 195 | throw std::logic_error("Bad input cell edges stream"); 196 | } 197 | boost::archive::binary_iarchive arc_cell_edges(cell_edges); 198 | arc_cell_edges >> multiindex_.cell_edges; 199 | cout << "Cell edges deserialized...\n"; 200 | ifstream multi_array(string(index_files_prefix + "_multi_array.bin").c_str(), ios::binary); 201 | if(!multi_array.good()) { 202 | throw std::logic_error("Bad input cell edges stream"); 203 | } 204 | boost::archive::binary_iarchive arc_multi_array(multi_array); 205 | arc_multi_array >> multiindex_.multiindex; 206 | cout << "Multiindex deserialized...\n"; 207 | ReadVocabularies(coarse_vocabs_filename, SPACE_DIMENSION, &coarse_vocabs_); 208 | cout << "Coarse vocabs deserialized...\n"; 209 | ReadFineVocabs(fine_vocabs_filename, &fine_vocabs_); 210 | cout << "Fine vocabs deserialized...\n"; 211 | } 212 | 213 | template 214 | void MultiSearcher::Init(const string& index_files_prefix, 215 | const string& coarse_vocabs_filename, 216 | const string& fine_vocabs_filename, 217 | const RerankMode& mode, 218 | const int subspace_centroids_to_consider, 219 | const bool do_rerank) { 220 | do_rerank_ = do_rerank; 221 | index_files_prefix_ = index_files_prefix; 222 | subspace_centroids_to_consider_ = subspace_centroids_to_consider; 223 | DeserializeData(index_files_prefix, coarse_vocabs_filename, fine_vocabs_filename); 224 | rerank_mode_ = mode; 225 | merger_.GetYieldedItems().table.resize(std::pow((float)subspace_centroids_to_consider, 226 | (int)coarse_vocabs_.size())); 227 | for(int i = 0; i < coarse_vocabs_.size(); ++i) { 228 | merger_.GetYieldedItems().dimensions.push_back(subspace_centroids_to_consider); 229 | } 230 | InitBlasStructures(); 231 | } 232 | 233 | template 234 | void MultiSearcher::InitBlasStructures(){ 235 | coarse_vocabs_matrices_.resize(coarse_vocabs_.size()); 236 | coarse_centroids_norms_.resize(coarse_vocabs_.size(), vector(coarse_vocabs_[0].size())); 237 | for(int coarse_id = 0; coarse_id < coarse_vocabs_matrices_.size(); ++coarse_id) { 238 | coarse_vocabs_matrices_[coarse_id] = new float[coarse_vocabs_[0].size() * coarse_vocabs_[0][0].size()]; 239 | for(int i = 0; i < coarse_vocabs_[0].size(); ++i) { 240 | Coord norm = 0; 241 | for(int j = 0; j < coarse_vocabs_[0][0].size(); ++j) { 242 | coarse_vocabs_matrices_[coarse_id][coarse_vocabs_[0][0].size() * i + j] = coarse_vocabs_[coarse_id][i][j]; 243 | norm += coarse_vocabs_[coarse_id][i][j] * coarse_vocabs_[coarse_id][i][j]; 244 | } 245 | coarse_centroids_norms_[coarse_id][i] = norm; 246 | } 247 | } 248 | products_ = new Coord[coarse_vocabs_[0].size()]; 249 | query_norms_.resize(coarse_vocabs_[0].size()); 250 | residual_ = new Coord[coarse_vocabs_[0][0].size() * coarse_vocabs_.size()]; 251 | } 252 | 253 | template 254 | PerfTester& MultiSearcher::GetPerfTester() { 255 | return perf_tester_; 256 | } 257 | 258 | template 259 | void MultiSearcher::GetNearestSubspacesCentroids(const Point& point, 260 | const int subspace_centroins_count, 261 | vector* 262 | subspaces_short_lists) const { 263 | std::stringstream aa; 264 | subspaces_short_lists->resize(coarse_vocabs_.size()); 265 | Dimensions subspace_dimension = point.size() / coarse_vocabs_.size(); 266 | for(int subspace_index = 0; subspace_index < coarse_vocabs_.size(); ++subspace_index) { 267 | Dimensions start_dim = subspace_index * subspace_dimension; 268 | Dimensions final_dim = std::min((Dimensions)point.size(), start_dim + subspace_dimension); 269 | Coord query_norm = cblas_sdot(final_dim - start_dim, &(point[start_dim]), 1, &(point[start_dim]), 1); 270 | std::fill(query_norms_.begin(), query_norms_.end(), query_norm); 271 | cblas_saxpy(coarse_vocabs_[0].size(), 1, &(coarse_centroids_norms_[subspace_index][0]), 1, &(query_norms_[0]), 1); 272 | cblas_sgemv(CblasRowMajor, CblasNoTrans, coarse_vocabs_[0].size(), subspace_dimension, -2.0, 273 | coarse_vocabs_matrices_[subspace_index], subspace_dimension, &(point[start_dim]), 1, 1, &(query_norms_[0]), 1); 274 | subspaces_short_lists->at(subspace_index).resize(query_norms_.size()); 275 | for(int i = 0; i < query_norms_.size(); ++i) { 276 | subspaces_short_lists->at(subspace_index)[i] = std::make_pair(query_norms_[i], i); 277 | } 278 | std::nth_element(subspaces_short_lists->at(subspace_index).begin(), 279 | subspaces_short_lists->at(subspace_index).begin() + subspace_centroins_count, 280 | subspaces_short_lists->at(subspace_index).end()); 281 | subspaces_short_lists->at(subspace_index).resize(subspace_centroins_count); 282 | std::sort(subspaces_short_lists->at(subspace_index).begin(), 283 | subspaces_short_lists->at(subspace_index).end()); 284 | } 285 | } 286 | 287 | template 288 | void MultiSearcher::GetCellEdgesInMultiIndexArray(const vector& cell_coordinates, 289 | int* cell_start, int* cell_finish) const { 290 | int global_index = multiindex_.cell_edges.GetCellGlobalIndex(cell_coordinates); 291 | *cell_start = multiindex_.cell_edges.table[global_index]; 292 | if(global_index + 1 == multiindex_.cell_edges.table.size()) { 293 | *cell_finish = multiindex_.multiindex.size(); 294 | } else { 295 | *cell_finish = multiindex_.cell_edges.table[global_index + 1]; 296 | } 297 | } 298 | 299 | template 300 | bool MultiSearcher::TraverseNextMultiIndexCell(const Point& point, 301 | vector >* 302 | nearest_subpoints) const { 303 | MergedItemIndices cell_inner_indices; 304 | clock_t before = clock(); 305 | if(!merger_.GetNextMergedItemIndices(&cell_inner_indices)) { 306 | return false; 307 | } 308 | clock_t after = clock(); 309 | perf_tester_.cell_coordinates_time += after - before; 310 | vector cell_coordinates(cell_inner_indices.size()); 311 | for(int list_index = 0; list_index < merger_.lists_ptr->size(); ++list_index) { 312 | cell_coordinates[list_index] = merger_.lists_ptr->at(list_index)[cell_inner_indices[list_index]].second; 313 | } 314 | int cell_start, cell_finish; 315 | before = clock(); 316 | GetCellEdgesInMultiIndexArray(cell_coordinates, &cell_start, &cell_finish); 317 | after = clock(); 318 | perf_tester_.cell_edges_time += after - before; 319 | if(cell_start >= cell_finish) { 320 | return true; 321 | } 322 | typename vector::const_iterator it = multiindex_.multiindex.begin() + cell_start; 323 | GetResidual(point, cell_coordinates, coarse_vocabs_, residual_); 324 | cell_finish = std::min((int)cell_finish, cell_start + (int)nearest_subpoints->size() - found_neghbours_count_); 325 | for(int array_index = cell_start; array_index < cell_finish; ++array_index) { 326 | if(rerank_mode_ == USE_RESIDUALS) { 327 | RecordToMetainfoAndDistance(residual_, *it, 328 | &(nearest_subpoints->at(found_neghbours_count_)), 329 | cell_coordinates, fine_vocabs_); 330 | } else if(rerank_mode_ == USE_INIT_POINTS) { 331 | RecordToMetainfoAndDistance(&(point[0]), *it, 332 | &(nearest_subpoints->at(found_neghbours_count_)), 333 | cell_coordinates, fine_vocabs_); 334 | } 335 | perf_tester_.NextNeighbour(); 336 | ++found_neghbours_count_; 337 | ++it; 338 | } 339 | return true; 340 | } 341 | 342 | 343 | template 344 | void MultiSearcher::GetNearestNeighbours(const Point& point, int k, 345 | vector >* neighbours) const { 346 | assert(k > 0); 347 | perf_tester_.handled_queries_count += 1; 348 | neighbours->resize(k); 349 | perf_tester_.ResetQuerywiseStatistic(); 350 | clock_t start = clock(); 351 | perf_tester_.search_start = start; 352 | clock_t before = clock(); 353 | vector subspaces_short_lists; 354 | assert(subspace_centroids_to_consider_ > 0); 355 | GetNearestSubspacesCentroids(point, subspace_centroids_to_consider_, &subspaces_short_lists); 356 | clock_t after = clock(); 357 | perf_tester_.nearest_subcentroids_time += after - before; 358 | clock_t before_merger = clock(); 359 | merger_.setLists(subspaces_short_lists); 360 | clock_t after_merger = clock(); 361 | perf_tester_.merger_init_time += after_merger - before_merger; 362 | clock_t before_traversal = clock(); 363 | found_neghbours_count_ = 0; 364 | bool traverse_next_cell = true; 365 | int cells_visited = 0; 366 | while(found_neghbours_count_ < k && traverse_next_cell) { 367 | perf_tester_.cells_traversed += 1; 368 | traverse_next_cell = TraverseNextMultiIndexCell(point, neighbours); 369 | cells_visited += 1; 370 | } 371 | clock_t after_traversal = clock(); 372 | perf_tester_.full_traversal_time += after_traversal - before_traversal; 373 | if(do_rerank_) { 374 | std::sort(neighbours->begin(), neighbours->end()); 375 | } 376 | clock_t finish = clock(); 377 | perf_tester_.full_search_time += finish - start; 378 | } 379 | 380 | template<> 381 | inline void RecordToMetainfoAndDistance(const Coord* point, const RerankADC8& record, 382 | pair* result, 383 | const vector& cell_coordinates, 384 | const vector& fine_vocabs) { 385 | result->second = record.pid; 386 | int coarse_clusters_count = cell_coordinates.size(); 387 | int fine_clusters_count = fine_vocabs.size(); 388 | int coarse_to_fine_ratio = fine_clusters_count / coarse_clusters_count; 389 | int subvectors_dim = SPACE_DIMENSION / fine_clusters_count; 390 | char* rerank_info_ptr = (char*)&record + sizeof(record.pid); 391 | for(int centroid_index = 0; centroid_index < fine_clusters_count; ++centroid_index) { 392 | int start_dim = centroid_index * subvectors_dim; 393 | int final_dim = start_dim + subvectors_dim; 394 | FineClusterId pid_nearest_centroid = *((FineClusterId*)rerank_info_ptr); 395 | rerank_info_ptr += sizeof(FineClusterId); 396 | int current_coarse_index = centroid_index / coarse_to_fine_ratio; 397 | Distance subvector_distance = 0; 398 | for(int i = start_dim; i < final_dim; ++i) { 399 | Coord diff = fine_vocabs[centroid_index][pid_nearest_centroid][i - start_dim] - point[i]; 400 | subvector_distance += diff * diff; 401 | } 402 | result->first += subvector_distance; 403 | } 404 | } 405 | 406 | template<> 407 | inline void RecordToMetainfoAndDistance(const Coord* point, const RerankADC16& record, 408 | pair* result, 409 | const vector& cell_coordinates, 410 | const vector& fine_vocabs) { 411 | result->second = record.pid; 412 | int coarse_clusters_count = cell_coordinates.size(); 413 | int fine_clusters_count = fine_vocabs.size(); 414 | int coarse_to_fine_ratio = fine_clusters_count / coarse_clusters_count; 415 | int subvectors_dim = SPACE_DIMENSION / fine_clusters_count; 416 | char* rerank_info_ptr = (char*)&record + sizeof(record.pid); 417 | for(int centroid_index = 0; centroid_index < fine_clusters_count; ++centroid_index) { 418 | int start_dim = centroid_index * subvectors_dim; 419 | int final_dim = start_dim + subvectors_dim; 420 | FineClusterId pid_nearest_centroid = *((FineClusterId*)rerank_info_ptr); 421 | rerank_info_ptr += sizeof(FineClusterId); 422 | int current_coarse_index = centroid_index / coarse_to_fine_ratio; 423 | Distance subvector_distance = 0; 424 | for(int i = start_dim; i < final_dim; ++i) { 425 | Coord diff = fine_vocabs[centroid_index][pid_nearest_centroid][i - start_dim] - point[i]; 426 | subvector_distance += diff * diff; 427 | } 428 | result->first += subvector_distance; 429 | } 430 | } 431 | 432 | template class MultiSearcher; 433 | template class MultiSearcher; 434 | template class MultiSearcher; 435 | 436 | #endif 437 | 438 | -------------------------------------------------------------------------------- /searcher_tester.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2012 Yandex Artem Babenko 2 | #include 3 | 4 | #include 5 | 6 | #include 7 | 8 | #include "searcher.h" 9 | #include "indexer.h" 10 | 11 | using namespace boost::program_options; 12 | 13 | /** 14 | * Number of threads for indexing 15 | */ 16 | Dimensions SPACE_DIMENSION; 17 | /** 18 | * File with vocabularies for multiindex structure 19 | */ 20 | string coarse_vocabs_file; 21 | /** 22 | * File with vocabularies for reranking 23 | */ 24 | string fine_vocabs_file; 25 | /** 26 | * Reranking approach, should be USE_RESIDUALS or USE_INIT_POINTS 27 | */ 28 | RerankMode mode; 29 | /** 30 | * Common prefix of all multiindex files 31 | */ 32 | string index_files_prefix; 33 | /** 34 | * File with queries (.bvec or .fvec) 35 | */ 36 | string queries_file; 37 | /** 38 | * Type, should be BVEC or FVEC 39 | */ 40 | PointType query_point_type; 41 | /** 42 | * File with groundtruth (.ivec) 43 | */ 44 | string groundtruth_file; 45 | /** 46 | * Number of queries to search 47 | */ 48 | int queries_count; 49 | /** 50 | * Should we rerank? 51 | */ 52 | bool do_rerank; 53 | /** 54 | * Number of neighbours to look over 55 | */ 56 | int neighbours_count; 57 | /** 58 | * File to write report in 59 | */ 60 | string report_file; 61 | /** 62 | * Number of nearest centroids for each group of dimensions to handle 63 | */ 64 | int subspaces_centroids_count; 65 | 66 | 67 | 68 | int SetOptions(int argc, char** argv) { 69 | options_description description("Options"); 70 | description.add_options() 71 | ("index_files_prefix,i", value()) 72 | ("queries_file,q", value()) 73 | ("queries_count,n", value()) 74 | ("neighbours_count,k", value()) 75 | ("groundtruth_file,g", value()) 76 | ("coarse_vocabs_file,c", value()) 77 | ("fine_vocabs_file,f", value()) 78 | ("query_point_type,t", value()) 79 | ("do_rerank,l", bool_switch(), "Flag B") 80 | ("use_residuals,r", bool_switch(), "Flag R") 81 | ("points_count,p", value()) 82 | ("report_file,o", value()) 83 | ("space_dim,d", value()) 84 | ("subspaces_centroids_count,s", value()); 85 | variables_map name_to_value; 86 | try { 87 | store(command_line_parser(argc, argv).options(description).run(), name_to_value); 88 | } catch (const invalid_command_line_syntax &inv_syntax) { 89 | switch (inv_syntax.kind()) { 90 | case invalid_syntax::missing_parameter : 91 | cout << "Missing argument for option '" << inv_syntax.tokens() << "'.\n"; 92 | break; 93 | default: 94 | cout << "Syntax error, kind " << int(inv_syntax.kind()) << "\n"; 95 | break; 96 | }; 97 | return 1; 98 | } catch (const unknown_option &unkn_opt) { 99 | cout << "Unknown option '" << unkn_opt.get_option_name() << "'\n"; 100 | return 1; 101 | } 102 | if (name_to_value.count("help")) { 103 | cout << description << "\n"; 104 | return 1; 105 | } 106 | 107 | coarse_vocabs_file = name_to_value["coarse_vocabs_file"].as(); 108 | fine_vocabs_file = name_to_value["fine_vocabs_file"].as(); 109 | SPACE_DIMENSION = name_to_value["space_dim"].as(); 110 | index_files_prefix = name_to_value["index_files_prefix"].as(); 111 | queries_file = name_to_value["queries_file"].as(); 112 | report_file = name_to_value["report_file"].as(); 113 | groundtruth_file = name_to_value["groundtruth_file"].as(); 114 | queries_count = name_to_value["queries_count"].as(); 115 | neighbours_count = name_to_value["neighbours_count"].as(); 116 | subspaces_centroids_count = name_to_value["subspaces_centroids_count"].as(); 117 | 118 | do_rerank = (name_to_value["do_rerank"].as() == true) ? true : false; 119 | mode = (name_to_value["use_residuals"].as() == true) ? USE_RESIDUALS : USE_INIT_POINTS; 120 | if (name_to_value["query_point_type"].as() == "FVEC") { 121 | query_point_type = FVEC; 122 | } else if(name_to_value["query_point_type"].as() == "BVEC") { 123 | query_point_type = BVEC; 124 | } 125 | return 0; 126 | } 127 | 128 | template 129 | void TestSearcher(TSearcher& searcher, 130 | const Points& queries, 131 | const vector >& groundtruth) { 132 | searcher.Init(index_files_prefix, coarse_vocabs_file, 133 | fine_vocabs_file, mode, 134 | subspaces_centroids_count, 135 | do_rerank); 136 | cout << "Searcher inited ...\n"; 137 | vector result; 138 | float recall = 0.0; 139 | vector recalls(5, 0.0); 140 | clock_t start = clock(); 141 | for(int i = 0; i < queries_count; ++i) { 142 | std::cout << i << std::endl; 143 | neighbours_count = 10000; 144 | result.clear(); 145 | searcher.GetNearestNeighbours(queries[i], neighbours_count, &result); 146 | recalls[0] += GetRecallAt(1, groundtruth[i], result); 147 | recalls[1] += GetRecallAt(10, groundtruth[i], result); 148 | recalls[2] += GetRecallAt(100, groundtruth[i], result); 149 | recalls[3] += GetRecallAt(1000, groundtruth[i], result); 150 | recalls[4] += GetRecallAt(10000, groundtruth[i], result); 151 | } 152 | cout << "R@1 " << recalls[0] / queries_count << "\n" << 153 | "R@10 " << recalls[1] / queries_count << "\n" << 154 | "R@100 " << recalls[2] / queries_count << "\n" << 155 | "R@1000 " << recalls[3] / queries_count << "\n" << 156 | "R@10000 " << recalls[4] / queries_count << endl; 157 | searcher.GetPerfTester().DoReport(); 158 | clock_t finish = clock(); 159 | std::cout << "Average search time(ms): "<<(double)(finish - start) / queries.size() << std::endl; 160 | } 161 | 162 | int main(int argc, char** argv) { 163 | SetOptions(argc, argv); 164 | cout << "Options are set ...\n"; 165 | Points queries; 166 | if(query_point_type == BVEC) { 167 | ReadPoints(queries_file, &queries, queries_count); 168 | } else if (query_point_type == FVEC) { 169 | ReadPoints(queries_file, &queries, queries_count); 170 | } 171 | cout << "Queries are read ...\n"; 172 | vector > groundtruth; 173 | ReadPoints(groundtruth_file, &groundtruth, queries_count); 174 | MKL_Set_Num_Threads(1); 175 | cout << "Groundtruth is read ...\n"; 176 | vector fine_vocabs; 177 | ReadFineVocabs(fine_vocabs_file, &fine_vocabs); 178 | if(fine_vocabs.size() == 8) { 179 | MultiSearcher searcher; 180 | TestSearcher > (searcher, queries, groundtruth); 181 | } else if(fine_vocabs.size() == 16) { 182 | MultiSearcher searcher; 183 | TestSearcher > (searcher, queries, groundtruth); 184 | } 185 | return 0; 186 | } 187 | -------------------------------------------------------------------------------- /sift1M_double_4096.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arbabenko/MultiIndex/bb0578821093f19d9c44a3ed7f50b8930e1d0199/sift1M_double_4096.dat -------------------------------------------------------------------------------- /sift1M_double_4096_8.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arbabenko/MultiIndex/bb0578821093f19d9c44a3ed7f50b8930e1d0199/sift1M_double_4096_8.dat -------------------------------------------------------------------------------- /test_searcher_double.sh: -------------------------------------------------------------------------------- 1 | cd build_master 2 | ./searcher_tester \ 3 | --coarse_vocabs_file="../sift1M_double_4096.dat" \ 4 | --fine_vocabs_file="../sift1M_double_4096_8.dat" \ 5 | --query_point_type="BVEC" \ 6 | --use_residuals \ 7 | --space_dim=128 \ 8 | --subspaces_centroids_count=1024 \ 9 | --index_files_prefix="/sata/ResearchData/BigAnn/indices/sift1M_double_4096_8" \ 10 | --queries_file="/sata/ResearchData/BigAnn/bases/sift1B_queries.bvecs" \ 11 | --groundtruth_file="/sata/ResearchData/BigAnn/gnd/sift1M_groundtruth.ivecs" \ 12 | --queries_count=500 \ 13 | --neighbours_count=10000 \ 14 | --report_file="sift1M_4096_8_report.txt" \ 15 | --do_rerank 16 | --------------------------------------------------------------------------------