├── .gitignore
├── CMakeLists.txt
├── IndexerLauncher.cmake
├── NearestSearch.cmake
├── SearcherTester.cmake
├── build_debug.sh
├── build_release.sh
├── data_util.cpp
├── data_util.h
├── docs
    ├── doxygen.cfg
    ├── how_to_install.dox
    ├── index.dox
    ├── main_page.dox
    ├── pictures
    │   └── mult.jpg
    └── search.dox
├── indexer.h
├── indexer_launcher.cpp
├── launch_indexer_double.sh
├── make_project.bat
├── multitable.hpp
├── ordered_lists_merger.h
├── perfomance_util.cpp
├── perfomance_util.h
├── run_indexer.py
├── run_indexer_vlad.py
├── run_searcher.py
├── run_searcher_vlad.py
├── searcher.h
├── searcher_tester.cpp
├── sift1M_double_4096.dat
├── sift1M_double_4096_8.dat
└── test_searcher_double.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | html/
2 | build
3 | run_indexer_vlad.py
4 | run_searcher_vlad.py
5 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ##########################################################################################
 2 | # CMake build script for NearestSearch.
 3 | #
 4 | ##########################################################################################
 5 | 
 6 | #let all libraries be static, not shared
 7 | OPTION(BUILD_SHARED_LIBS "Build shared libraries (DLLs)." OFF)
 8 | 
 9 | ##########################################################################################
10 | # lets start describing our project.
11 | project (NearestSearch CXX C)
12 | cmake_minimum_required(VERSION 2.6)
13 | 
14 | find_package(Boost COMPONENTS program_options serialization system filesystem thread REQUIRED)
15 | 
16 | IF (UNIX)
17 | 	SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -fopenmp -DMKL_ILP64 -m64")
18 | 	SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -fopenmp -DMKL_ILP64 -m64")
19 | ENDIF (UNIX)
20 | 
21 | IF (NOT DEFINED CMAKE_CURRENT_LIST_DIR)
22 |     SET(CMAKE_CURRENT_LIST_DIR ${CMAKE_CURRENT_SOURCE_DIR})
23 | ENDIF (NOT DEFINED CMAKE_CURRENT_LIST_DIR)
24 | 
25 | MESSAGE("current dir: ${CMAKE_CURRENT_LIST_DIR}")
26 | SET (Source_Path ${CMAKE_CURRENT_LIST_DIR})
27 | 
28 | MESSAGE ("PROJECT_BINARY_DIR " ${PROJECT_BINARY_DIR})
29 | 
30 | # ADD BOOSTDIR
31 | IF (UNIX)
32 | SET(BOOST_DIR /usr/include/boost)
33 | SET(BOOST_LIB /usr/local/lib)
34 | ENDIF (UNIX)
35 | IF(WIN32)
36 | SET(BOOST_DIR C:/Users/arbabenko/Soft/boost_1_47_0)
37 | SET(BOOST_LIB C:/Users/arbabenko/Soft/boost_1_47_0/lib/x64/lib)
38 | ENDIF(WIN32)
39 | 
40 | # ADD BLAS
41 | IF (UNIX)
42 | SET(BLAS_DIR /opt/intel/composer_xe_2013.2.146/mkl/include)
43 | SET(BLAS_LIB /opt/intel/composer_xe_2013.2.146/mkl/lib/intel64 
44 |              /opt/intel/composer_xe_2013.2.146/compiler/lib/intel64)
45 | ENDIF (UNIX)
46 | IF(WIN32)
47 | SET(BLAS_DIR "C:/Program Files (x86)/Intel/Composer XE 2011 SP1/mkl/include")
48 | SET(BLAS_LIB "C:/Program Files (x86)/Intel/Composer XE 2011 SP1/mkl/lib/intel64"
49 |              "C:/Program Files (x86)/Intel/Composer XE 2011 SP1/compiler/lib/intel64")
50 | ENDIF(WIN32)
51 | 
52 | ##########################################################################################
53 | INCLUDE (${Source_Path}/NearestSearch.cmake NO_POLICY_SCOPE)
54 | INCLUDE_DIRECTORIES(${BOOST_DIR})
55 | INCLUDE_DIRECTORIES(${BLAS_DIR})
56 | LINK_DIRECTORIES(${BOOST_LIB})
57 | LINK_DIRECTORIES(${BLAS_LIB})
58 | INCLUDE_DIRECTORIES(${Source_Path})
59 | ADD_LIBRARY (nearest_search_lib STATIC ${NEAREST_ALL_CC})
60 | 
61 | ##########################################################################################
62 | INCLUDE (${Source_Path}/IndexerLauncher.cmake NO_POLICY_SCOPE)
63 | INCLUDE_DIRECTORIES(${BOOST_DIR})
64 | INCLUDE_DIRECTORIES(${BLAS_DIR})
65 | LINK_DIRECTORIES(${BOOST_LIB})
66 | LINK_DIRECTORIES(${BLAS_LIB})
67 | INCLUDE_DIRECTORIES(${Source_Path})
68 | ADD_EXECUTABLE (indexer_launcher ${IndexerLauncher})
69 | TARGET_LINK_LIBRARIES (indexer_launcher nearest_search_lib)
70 | target_link_libraries( indexer_launcher ${Boost_LIBRARIES} )
71 | IF (UNIX)
72 | TARGET_LINK_LIBRARIES (indexer_launcher  libmkl_intel_ilp64.a libmkl_gnu_thread.a libmkl_core.a dl pthread m)
73 | ENDIF (UNIX)
74 | IF(WIN32)
75 | TARGET_LINK_LIBRARIES (indexer_launcher  mkl_intel_lp64.lib mkl_intel_thread.lib mkl_core.lib libiomp5md.lib)
76 | ENDIF(WIN32)
77 |   
78 | ##########################################################################################
79 | INCLUDE (${Source_Path}/SearcherTester.cmake NO_POLICY_SCOPE)
80 | INCLUDE_DIRECTORIES(${BOOST_DIR})
81 | INCLUDE_DIRECTORIES(${BLAS_DIR})
82 | LINK_DIRECTORIES(${BOOST_LIB})
83 | LINK_DIRECTORIES(${BLAS_LIB})
84 | INCLUDE_DIRECTORIES(${Source_Path})
85 | ADD_EXECUTABLE (searcher_tester ${SearchTester})
86 | TARGET_LINK_LIBRARIES (searcher_tester  nearest_search_lib)
87 | target_link_libraries( searcher_tester ${Boost_LIBRARIES} )
88 | IF (UNIX)
89 | TARGET_LINK_LIBRARIES (searcher_tester libmkl_intel_ilp64.a libmkl_gnu_thread.a libmkl_core.a dl pthread m)
90 | ENDIF (UNIX)
91 | IF(WIN32)
92 | TARGET_LINK_LIBRARIES (searcher_tester mkl_intel_lp64.lib mkl_intel_thread.lib mkl_core.lib libiomp5md.lib)
93 | ENDIF(WIN32)
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/IndexerLauncher.cmake:
--------------------------------------------------------------------------------
1 | # lets list sample's binaries
2 | set (IndexerLauncher ${Source_Path}/indexer_launcher.cpp)
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/NearestSearch.cmake:
--------------------------------------------------------------------------------
 1 | # lets divide binaries in groups, for comfort navigation
 2 | SOURCE_GROUP(util FILES ${Source_Path}/data_util.h
 3 |                         ${Source_Path}/data_util.cpp
 4 |                         ${Source_Path}/multitable.hpp
 5 |                         ${Source_Path}/perfomance_util.h
 6 |                         ${Source_Path}/perfomance_util.cpp)
 7 | 						
 8 | SET(UTIL                ${Source_Path}/data_util.h
 9 |                         ${Source_Path}/data_util.cpp
10 |                         ${Source_Path}/multitable.hpp
11 |                         ${Source_Path}/perfomance_util.h
12 |                         ${Source_Path}/perfomance_util.cpp)
13 | 						
14 | SOURCE_GROUP(indexer FILES ${Source_Path}/indexer.h)
15 | 						   
16 | SET(INDEXER                ${Source_Path}/indexer.h)
17 | 						   
18 | SOURCE_GROUP(searcher FILES ${Source_Path}/searcher.h
19 |                             ${Source_Path}/ordered_lists_merger.h)
20 | 							
21 | SET(SEARCHER                ${Source_Path}/searcher.h
22 |                             ${Source_Path}/ordered_lists_merger.h)
23 | 							
24 | 
25 | # lets list all Nearest's source binaries
26 | SET(NEAREST_ALL_CC ${UTIL} ${INDEXER} ${SEARCHER})
27 | 
28 | 


--------------------------------------------------------------------------------
/SearcherTester.cmake:
--------------------------------------------------------------------------------
1 | # lets list sample's binaries
2 | set (SearchTester ${Source_Path}/searcher_tester.cpp)
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/build_debug.sh:
--------------------------------------------------------------------------------
1 | mkdir -p build_dup
2 | cd build_dup
3 | rm ./CMakeCache.txt
4 | cmake -DCMAKE_BUILD_TYPE=Debug ..
5 | make


--------------------------------------------------------------------------------
/build_release.sh:
--------------------------------------------------------------------------------
1 | mkdir -p build_master
2 | cd build_master
3 | rm ./CMakeCache.txt
4 | cmake -DCMAKE_BUILD_TYPE=Release ..
5 | make
6 | 


--------------------------------------------------------------------------------
/data_util.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2012 Yandex Artem Babenko
  2 | 
  3 | #include "data_util.h"
  4 | 
  5 | Distance Eucldistance(const Point& x, const Point& y) {
  6 |   Distance result = 0;
  7 |   Distance current_coord_diff;
  8 | 	for(Dimensions d = 0; d < x.size(); ++d){
  9 |       current_coord_diff = x[d] - y[d];
 10 |       result += current_coord_diff * current_coord_diff; 
 11 | 	}
 12 |   return result;
 13 | }
 14 | 
 15 | Distance Eucldistance(const Point& x, const Point& y,
 16 |                       const Dimensions start, const Dimensions finish) {
 17 |   Distance result = 0;
 18 |   Distance current_coord_diff;
 19 | 	for(Dimensions d = start; d < finish; ++d){
 20 |       current_coord_diff = x[d] - y[d - start];
 21 |       result += current_coord_diff * current_coord_diff;
 22 | 	}
 23 |   return result;
 24 | }
 25 | 
 26 | void GetSubpoints(const Points& points,
 27 |                   const Dimensions start_dim,
 28 |                   const Dimensions final_dim,
 29 |                   Points* subpoints) {
 30 |   if(final_dim < start_dim) {
 31 |     throw std::logic_error("Final dim < Start dim");
 32 |   }
 33 |   subpoints->resize(points.size());
 34 |   for(PointId pid = 0; pid < points.size(); ++pid) {
 35 |     subpoints->at(pid).resize(final_dim - start_dim);
 36 |     for(Dimensions dim = start_dim; dim < final_dim; ++dim) {
 37 |       subpoints->at(pid)[dim] = points[pid][start_dim + dim];
 38 |     }
 39 |   }
 40 | }
 41 | 
 42 | ClusterId GetNearestClusterId(const Point& point,
 43 |                               const Centroids& centroids,
 44 |                               const Dimensions start_dim,
 45 |                               const Dimensions final_dim) {
 46 |   if(final_dim < start_dim) {
 47 |     throw std::logic_error("Final dim < Start dim");
 48 |   }
 49 |   ClusterId nearest = 0;
 50 |   Distance min_distance = Eucldistance(point, centroids[0], start_dim, final_dim);
 51 |   for(PointId pid = 1; pid < centroids.size(); ++pid) {
 52 |     Distance current_distance = 0;
 53 |     current_distance = Eucldistance(point, centroids[pid], start_dim, final_dim);
 54 |     if(current_distance < min_distance) {
 55 |       min_distance = current_distance;
 56 |       nearest = pid;
 57 |     }
 58 |   }
 59 |   return nearest;
 60 | }
 61 | 
 62 | void GetResidual(const Point& point, const CoarseQuantization& coarse_quantizations,
 63 |                  const vector<Centroids>& centroids, Point* residual) {
 64 |   residual->resize(point.size());
 65 |   Dimensions subvector_dimension = point.size() / centroids.size();
 66 |   cblas_saxpy(point.size(), 1, &(point[0]), 1, &(residual->at(0)), 1);
 67 |   for(int subvector_index = 0; subvector_index < centroids.size(); ++subvector_index) {
 68 |     Dimensions start_dim = subvector_index * subvector_dimension;
 69 |     const Point& current_coarse_centroid = centroids[subvector_index][coarse_quantizations[subvector_index]];
 70 |     cblas_saxpy(subvector_dimension, -1, &(current_coarse_centroid[0]), 1, &(residual->at(start_dim)), 1);
 71 |   }
 72 | }
 73 | 
 74 | void GetResidual(const Point& point, const CoarseQuantization& coarse_quantizations,
 75 |                  const vector<Centroids>& centroids, Coord* residual) {
 76 |   Dimensions subvector_dimension = point.size() / centroids.size();
 77 |   cblas_scopy(point.size(), &(point[0]), 1, residual, 1);
 78 |   for(int subvector_index = 0; subvector_index < centroids.size(); ++subvector_index) {
 79 |     Dimensions start_dim = subvector_index * subvector_dimension;
 80 |     const Point& current_coarse_centroid = centroids[subvector_index][coarse_quantizations[subvector_index]];
 81 |     cblas_saxpy(subvector_dimension, -1, &(current_coarse_centroid[0]), 1, &(residual[start_dim]), 1);
 82 |   }
 83 | }
 84 | 
 85 | void GetNearestClusterIdsForPointSubset(const Points& points, const Centroids& centroids,
 86 |                                         const PointId start_pid, const PointId final_pid,
 87 |                                         vector<ClusterId>* nearest) {
 88 |   if(final_pid < start_pid) {
 89 |     throw std::logic_error("Final pid < Start pid");
 90 |   }
 91 |   cout << start_pid << " point processing started\n";
 92 |   for(PointId pid = start_pid; pid < final_pid; ++pid) {
 93 |     if(pid % 10000 == 0) {
 94 |       cout << pid << endl;
 95 |     }
 96 |     nearest->at(pid) = GetNearestClusterId(points[pid], centroids, 0, points[0].size());
 97 |   } 
 98 |   cout << final_pid << " point processing finished\n";
 99 | }
100 | 
101 | void GetNearestClusterIdsForSubpoints(const Points& points, const Centroids& centroids,
102 |                                       const Dimensions start_dim, const Dimensions final_dim,
103 |                                       int threads_count, vector<ClusterId>* nearest) {
104 |   if(final_dim < start_dim) {
105 |     throw std::logic_error("Final dim < Start dim");
106 |   }
107 |   cout << "Start getting nearest Cluster Ids..." << endl;
108 |   Points subpoints;
109 |   GetSubpoints(points, start_dim, final_dim, &subpoints);
110 |   boost::thread_group threads;
111 |   int subpoints_count = points.size() / threads_count;
112 |   for(int thread_id = 0; thread_id < threads_count; ++thread_id) {
113 |     PointId start_pid = subpoints_count * thread_id;
114 |     PointId final_pid = start_pid + subpoints_count;
115 |     threads.create_thread(boost::bind(&GetNearestClusterIdsForPointSubset, subpoints, centroids,
116 |                                       start_pid, final_pid, nearest));
117 |   }
118 |   threads.join_all();
119 |   cout << "Finish getting nearest Cluster Ids..." << endl;
120 | }
121 | 
122 | void GetPointsCoarseQuaintizations(const Points& points,
123 |                                    const vector<Centroids>& centroids,
124 |                                    const int threads_count,
125 |                                    vector<CoarseQuantization>* coarse_quantizations) {
126 |   int number_of_subvectors = centroids.size();
127 |   coarse_quantizations->resize(points.size(), CoarseQuantization(number_of_subvectors));
128 |   Dimensions subvector_dimension = points[0].size() / number_of_subvectors;
129 |   for(int centroids_index = 0; centroids_index < number_of_subvectors; ++centroids_index) {
130 |     vector<ClusterId> cluster_labels;
131 |     cluster_labels.resize(points.size());
132 |     Dimensions start_dim = centroids_index * subvector_dimension;
133 |     Dimensions final_dim = std::min((Dimensions)points[0].size(), start_dim + subvector_dimension);
134 |     GetNearestClusterIdsForSubpoints(points, centroids[centroids_index],
135 |                                      start_dim, final_dim, threads_count, &cluster_labels);
136 |     for(PointId pid = 0; pid < points.size(); ++pid) {
137 |       coarse_quantizations->at(pid)[centroids_index] = cluster_labels[pid];
138 |     }
139 |   }
140 | }


--------------------------------------------------------------------------------
/data_util.h:
--------------------------------------------------------------------------------
  1 | /** @file */
  2 | 
  3 | // Copyright 2012 Yandex Artem Babenko
  4 | #pragma once
  5 | 
  6 | 
  7 | #include <bitset>
  8 | #include <fstream>
  9 | #include <ios>
 10 | #include <iostream>
 11 | #include <map>
 12 | #include <set>
 13 | #include <vector>
 14 | 
 15 | #include <boost/bind.hpp>
 16 | #include <boost/thread.hpp>
 17 | 
 18 | #include "mkl_cblas.h"
 19 | 
 20 | #include "multitable.hpp"
 21 | 
 22 | using std::bitset;
 23 | using std::cout;
 24 | using std::ifstream;
 25 | using std::ios;
 26 | using std::endl;
 27 | using std::multimap;
 28 | using std::pair;
 29 | using std::set;
 30 | using std::string;
 31 | using std::vector;
 32 | 
 33 | /**
 34 |  * \typedef
 35 |  *  Data type for coordinate (bool, char, int, float, etc.)
 36 |  */
 37 | typedef float Coord;
 38 | /**
 39 |  * \typedef
 40 |  *  Data type for distance in multidimensional space
 41 |  */
 42 | typedef float Distance;
 43 | /**
 44 |  * \typedef
 45 |  *  Dimensionality of space = number of point coordinates
 46 |  */
 47 | typedef int Dimensions;
 48 | /**
 49 |  * \typedef
 50 |  *  Data type for point identifier
 51 |  */
 52 | typedef int PointId;
 53 | /**
 54 |  * \typedef
 55 |  *  Data type for cluster identifier
 56 |  */
 57 | typedef int ClusterId;
 58 | /**
 59 |  * \typedef
 60 |  *  Just vector of coordinates
 61 |  */
 62 | typedef vector<Coord> Point;
 63 | /**
 64 |  * \typedef
 65 |  *  Class for a number of points
 66 |  */
 67 | typedef vector<Point> Points;
 68 | /**
 69 |  * \typedef
 70 |  *  Class for a number of point identifiers
 71 |  */
 72 | typedef vector<PointId> PointIds;
 73 | /**
 74 |  * \typedef
 75 |  *  Class for a set of points
 76 |  */
 77 | typedef set<PointId> SetPoints;
 78 | /**
 79 |  * \typedef
 80 |  *  Class for a number of cluster identifiers
 81 |  */
 82 | typedef vector<ClusterId> ClusterIds;
 83 | /**
 84 |  * \typedef
 85 |  *  Class for representation of point coarse quantization,
 86 |  *  ids of nearest centroids for each group of dimensions
 87 |  */
 88 | typedef vector<ClusterId> CoarseQuantization;
 89 | /**
 90 |  * \typedef
 91 |  *  Data type for fine cluster identifier
 92 |  */
 93 | typedef unsigned char FineClusterId;
 94 | /**
 95 |  * \typedef
 96 |  *  Class for representation of point fine quantization
 97 |  */
 98 | typedef vector<FineClusterId> FineQuantization;
 99 | /**
100 |  * \typedef
101 |  *  Class for clusters representation
102 |  *  ClusterId -> (PointId, PointId, PointId, .... )
103 |  */
104 | typedef vector<SetPoints> ClustersToPoints;
105 | /**
106 |  * \typedef
107 |  *  Class for belonging to clusters representation
108 |  *  PointId -> ClusterId
109 |  */
110 | typedef std::vector<ClusterId> PointsToClusters; 
111 | /**
112 |  * \typedef
113 |  *  Centroids of the clustered points set
114 |  */
115 | typedef std::vector<Point> Centroids;
116 | 
117 | /**
118 |  * \enum This enumeration presents different types of input point
119 |  *       coordinate can be float or uint8
120 |  */
121 | enum PointType {
122 |   FVEC,
123 |   BVEC
124 | };
125 | 
126 | /**
127 |  * \enum This enumeration presents different ways to get rerank info.
128 |  * Algorithm can quantize residuals after coarse quantization or init points without
129 |  * centroids subtraction
130 |  */
131 | enum RerankMode {
132 |   USE_RESIDUALS, 
133 |   USE_INIT_POINTS
134 | };
135 | 
136 | /**
137 |  * \struct MultiIndex incorporates all data structures we need to make search
138 |  */
139 | template<class Record>
140 | struct MultiIndex {
141 |   vector<Record> multiindex;
142 |   Multitable<int> cell_edges;    ///< Table with index cell edges in array
143 | };
144 | 
145 | /**
146 |  * Function calculates squared euclidian distance between two points (points must have the same dimensionality)
147 |  * @param x first point
148 |  * @param y second point
149 |  */
150 | Distance Eucldistance(const Point& x, const Point& y);
151 | /**
152 |  * Function calculates squared euclidian distance point with small dimensionality and 
153 |  * subpoint of point with bigger dimensionality.  
154 |  * @param x first point
155 |  * @param y second point
156 |  * @param start first dimension of subpoint
157 |  * @param finish dimension after the last dimension of subpoint
158 |  */
159 | Distance Eucldistance(const Point& x, const Point& y, Dimensions start, Dimensions finish);
160 | 
161 | /**
162 |  * This simple function casts number of type T to the nearest number of type U 
163 |  */
164 | template<class T, class U>
165 | inline U Round(T number) {
166 |   return (U)(number);
167 | }
168 | 
169 | /**
170 |  * Function reads point written in .fvecs or .bvecs format.
171 |  * Input points have coordinates of type T.
172 |  * Result points have coordinates of type U
173 |  * @param filename .fvecs or .bvecs file name
174 |  * @param points_count how many points to read
175 |  * @param points result list of read points
176 |  */
177 | template<class T, class U>
178 | void ReadPoints(const string& filename,
179 |                 vector<vector<U> >* points,
180 |                 int count) {
181 |   ifstream input;
182 | 	input.open(filename.c_str(), ios::binary);
183 |   if(!input.good()) {
184 |     throw std::logic_error("Invalid filename");
185 |   }
186 |   points->resize(count);
187 |   int dimension;
188 |   for(PointId pid = 0; pid < count; ++pid) {
189 |     input.read((char*)&dimension, sizeof(dimension));
190 |     if(dimension <= 0) {
191 |       throw std::logic_error("Bad file content: non-positive dimension");
192 |     }
193 |     points->at(pid).resize(dimension);
194 |     for(Dimensions d = 0; d < dimension; ++d) {
195 |       T buffer;
196 |       input.read((char*)&(buffer), sizeof(T));
197 |       points->at(pid)[d] = Round<T, U>(buffer);
198 |     }
199 |   }
200 | }
201 | 
202 | /**
203 |  * Function reads one vector of coordinates of type T.
204 |  * Function assumes that the first int32-number in input stream is
205 |  * vector dimensionality. Result vector will have coordinates of type U.
206 |  * @param input input stream
207 |  * @param v result vector
208 |  */
209 | template<class T, class U>
210 | void ReadVector(ifstream& input, vector<U>* v) {
211 |   if(!input.good()) {
212 |     throw std::logic_error("Bad input stream");
213 |   }
214 |   int dimension;
215 |   input.read((char*)&dimension, sizeof(dimension));
216 |   if(dimension <= 0) {
217 |     throw std::logic_error("Bad file content: non-positive dimension");
218 |   }
219 |   v->resize(dimension);
220 |   for(Dimensions d = 0; d < dimension; ++d) {
221 |     T buffer;
222 |     input.read((char*)&buffer, sizeof(buffer));
223 |     v->at(d) = Round<T, U>(buffer);
224 |   }    
225 | }
226 | 
227 | /**
228 |  * Function reads vocabulary of centroids produced by matlab script.
229 |  * @param input input stream
230 |  * @param dimension one centroid dimensionality
231 |  * @param vocabulary_size centroids count
232 |  * @param Centroids* result centroids
233 |  */
234 | template<class T>
235 | void ReadVocabulary(ifstream& input,
236 |                     Dimensions dimension,
237 |                     int vocabulary_size,
238 |                     Centroids* centroids) {
239 |   if(!input.good()) {
240 |     throw std::logic_error("Bad input stream");
241 |   }
242 |   centroids->resize(vocabulary_size);
243 |   for(ClusterId centroid_index = 0; centroid_index < centroids->size(); ++centroid_index) {
244 |     centroids->at(centroid_index).resize(dimension);
245 |     for(Dimensions dimension_index = 0; dimension_index < dimension; ++dimension_index) {
246 |       T buffer;
247 |       input.read((char*)&buffer, sizeof(buffer));
248 |       centroids->at(centroid_index)[dimension_index] = Round<T, Coord>(buffer);
249 |     }
250 |   }
251 | }
252 | 
253 | /**
254 |  * Function reads vocabularies of centroids produced by matlab script.
255 |  * Function assumes that the first int32 in input is dimensionality of centroids and
256 |  * the second is the number of centroids in each vocabulary
257 |  * @param input input stream
258 |  * @param dimension one centroid dimensionality
259 |  * @param vocabulary_size centroids count
260 |  * @param Centroids* result centroids
261 |  */
262 | template<class T>
263 | void ReadVocabularies(const string& filename,
264 |                       Dimensions space_dimension,
265 |                       vector<Centroids>* centroids) {
266 |   ifstream vocabulary;
267 |   vocabulary.open(filename.c_str(), ios::binary);
268 |   if(!vocabulary.good()) {
269 |     throw std::logic_error("Bad vocabulary file");
270 |   }
271 |   int dimension;
272 |   vocabulary.read((char*)&dimension, sizeof(dimension));
273 |   if(dimension <= 0) {
274 |     throw std::logic_error("Bad file content: non-positive dimension");
275 |   }
276 |   int vocabs_count = space_dimension / dimension;
277 |   if(space_dimension < dimension) {
278 |     throw std::logic_error("Space dimension is less than vocabulary dimension");
279 |   }
280 |   centroids->resize(vocabs_count);
281 |   int vocabulary_size;
282 |   vocabulary.read((char*)&vocabulary_size, sizeof(vocabulary_size));
283 |   for(int vocab_item = 0; vocab_item < vocabs_count; ++vocab_item) {
284 |     ReadVocabulary<T>(vocabulary, dimension, vocabulary_size, &(centroids->at(vocab_item)));
285 |   }
286 | }
287 | 
288 | /**
289 |  * This function reads fine vocabs of centroids
290 |  * @param fine_vocabs_filename file with vocabularies
291 |  * @param fine_vocabs fine centroids lists
292 |  */
293 | template<class T>
294 | void ReadFineVocabs(const string& fine_vocabs_filename, vector<Centroids>* fine_vocabs) {
295 |   ifstream fine_vocabs_stream;
296 |   fine_vocabs_stream.open(fine_vocabs_filename.c_str(), ios::binary);
297 |   if(!fine_vocabs_stream.good()) {
298 |     throw std::logic_error("Bad fine vocabulary file");
299 |   }
300 |   int vocabs_count, centroids_count, vocabs_dim;
301 |   fine_vocabs_stream.read((char*)&vocabs_count, sizeof(vocabs_count));
302 |   if(vocabs_count < 1) {
303 |     throw std::logic_error("Bad fine vocabulary file content: number of vocabularies < 1");
304 |   }
305 |   fine_vocabs_stream.read((char*)&centroids_count, sizeof(centroids_count));
306 |   if(centroids_count < 1) {
307 |     throw std::logic_error("Bad fine vocabulary file content: vocabulary capacity < 1");
308 |   }
309 |   fine_vocabs_stream.read((char*)&vocabs_dim, sizeof(vocabs_dim));
310 |   if(vocabs_dim < 1) {
311 |     throw std::logic_error("Bad fine vocabulary file content: vocabulary dimension < 1");
312 |   }
313 |   fine_vocabs->resize(vocabs_count);
314 |   for(int voc_index = 0; voc_index < vocabs_count; ++voc_index) {
315 |     ReadVocabulary<T>(fine_vocabs_stream, vocabs_dim, centroids_count, &(fine_vocabs->at(voc_index)));
316 |   }
317 | }
318 | 
319 | /**
320 |  * This function returns subpoints limited by start_dim and final_dim
321 |  * for every point in points
322 |  * @param points all points
323 |  * @param start_dim first dimension of subpoint
324 |  * @param final_dim dimension after the last dimension of subpoint
325 |  * @param subpoints result subpoints
326 |  */
327 | void GetSubpoints(const Points& points,
328 |                   const Dimensions start_dim,
329 |                   const Dimensions final_dim,
330 |                   Points* subpoints);
331 | 
332 | /**
333 |  * This function returns identifier of clusters which centroid is the nearest to 
334 |  * subpoint limited by start_dim and final_dim
335 |  * @param point full point
336 |  * @param Centroids all centroids (function finds the nearest one)
337 |  * @param start_dim first dimension of subpoint
338 |  * @param final_dim dimension after the last dimension of subpoint
339 |  */
340 | ClusterId GetNearestClusterId(const Point& point, const Centroids& centroids,
341 |                               const Dimensions start_dim, const Dimensions final_dim);
342 | 
343 | /**
344 |  * This function calculates quantization residual. 
345 |  * @param point initial point
346 |  * @param coarse_quantizations point coarse quantization
347 |  * @param centroids lists of centroids
348 |  * @param residual result residual
349 |  */
350 | void GetResidual(const Point& point, const CoarseQuantization& coarse_quantizations,
351 |                  const vector<Centroids>& centroids, Point* residual);
352 | /**
353 |  * This function calculates quantization residual. 
354 |  * @param point initial point
355 |  * @param coarse_quantizations point coarse quantization
356 |  * @param centroids lists of centroids
357 |  * @param residual pointer to start of residual
358 |  */
359 | void GetResidual(const Point& point, const CoarseQuantization& coarse_quantizations,
360 |                  const vector<Centroids>& centroids, Coord* residual);
361 | 
362 | /**
363 |  * This function finds nearest cluster identifiers for points from start_pid to final_pid.
364 |  * We need this function for multi-threading
365 |  * @param points all points
366 |  * @param centroids centroids of clusters
367 |  * @param start_pid first point function finds nearest cluster
368 |  * @param final_pid point after the last point function finds nearest cluster
369 |  */
370 | void GetNearestClusterIdsForPointSubset(const Points& points, const Centroids& centroids,
371 |                                         const PointId start_pid, const PointId final_pid,
372 |                                         vector<ClusterId>* nearest);
373 | 
374 | /**
375 |  * This function finds cluster identifiers nearest to subpoints for a number of points.
376 |  * Subpoints are limited by start_dim and finish_dim
377 |  * @param points all points
378 |  * @param centroids centroids of clusters
379 |  * @param start_dim first dimesion of subpoint
380 |  * @param final_dim dimesion after the last dimension of subpoint
381 |  * @param threads_count number of threads
382 |  * @param nearest result
383 |  */
384 | void GetNearestClusterIdsForSubpoints(const Points& points, const Centroids& centroids,
385 |                                       const Dimensions start_dim, const Dimensions final_dim,
386 |                                       int threads_count, vector<ClusterId>* nearest);
387 | 
388 | /**
389 |  * This function calculates points coarse product quantizations
390 |  * @param points all points
391 |  * @param centroids centroids of clusters
392 |  * @param threads_count number of threads
393 |  * @param coarse_quantizations result quantizations
394 |  */
395 | void GetPointsCoarseQuaintizations(const Points& points, const vector<Centroids>& centroids,
396 |                                    const int threads_count,
397 |                                    vector<CoarseQuantization>* coarse_quantizations);
398 | 
399 | 
400 | /**
401 |  * \struct All indexation parameters
402 |  */
403 | struct IndexConfig {
404 |   RerankMode rerank_mode;
405 |   vector<Centroids> fine_vocabs;
406 | };
407 | 
408 | /**
409 |  * \struct Type of record in multiindex, contains
410 |  * id of point and 8 bytes for ADC reranking
411 |  */
412 | struct RerankADC8 {
413 |   PointId pid;
414 |   FineClusterId quantizations[8];
415 |   template<class Archive>
416 |   void serialize(Archive& arc, unsigned int version) {
417 |     arc & pid;
418 |     arc & quantizations;
419 |   }
420 | };
421 | 
422 | /**
423 |  * \struct Type of record in multiindex, contains
424 |  * id of point and 16 bytes for ADC reranking
425 |  */
426 | struct RerankADC16 {
427 |   PointId pid;
428 |   FineClusterId quantizations[16];
429 |   template<class Archive>
430 |   void serialize(Archive& arc, unsigned int version) {
431 |     arc & pid;
432 |     arc & quantizations;
433 |   }
434 | };
435 | 
436 | 
437 | 
438 | 
439 | 
440 | 
441 | 
442 | 
443 | 
444 | 
445 | 
446 | 
447 | 
448 | 
449 | 
450 | 
451 | 
452 | 
453 | 
454 | 


--------------------------------------------------------------------------------
/docs/doxygen.cfg:
--------------------------------------------------------------------------------
   1 | # Doxyfile 1.7.6
   2 | 
   3 | # This file describes the settings to be used by the documentation system
   4 | # doxygen (www.doxygen.org) for a project.
   5 | #
   6 | # All text after a hash (#) is considered a comment and will be ignored.
   7 | # The format is:
   8 | #       TAG = value [value, ...]
   9 | # For lists items can also be appended using:
  10 | #       TAG += value [value, ...]
  11 | # Values that contain spaces should be placed between quotes (" ").
  12 | 
  13 | #---------------------------------------------------------------------------
  14 | # Project related configuration options
  15 | #---------------------------------------------------------------------------
  16 | 
  17 | # This tag specifies the encoding used for all characters in the config file
  18 | # that follow. The default is UTF-8 which is also the encoding used for all
  19 | # text before the first occurrence of this tag. Doxygen uses libiconv (or the
  20 | # iconv built into libc) for the transcoding. See
  21 | # http://www.gnu.org/software/libiconv for the list of possible encodings.
  22 | 
  23 | DOXYFILE_ENCODING      = UTF-8
  24 | 
  25 | # The PROJECT_NAME tag is a single word (or sequence of words) that should
  26 | # identify the project. Note that if you do not use Doxywizard you need
  27 | # to put quotes around the project name if it contains spaces.
  28 | 
  29 | PROJECT_NAME           = "MultiIndex"
  30 | 
  31 | # The PROJECT_NUMBER tag can be used to enter a project or revision number.
  32 | # This could be handy for archiving the generated documentation or
  33 | # if some version control system is used.
  34 | 
  35 | PROJECT_NUMBER         =
  36 | 
  37 | # Using the PROJECT_BRIEF tag one can provide an optional one line description
  38 | # for a project that appears at the top of each page and should give viewer
  39 | # a quick idea about the purpose of the project. Keep the description short.
  40 | 
  41 | PROJECT_BRIEF          =
  42 | 
  43 | # With the PROJECT_LOGO tag one can specify an logo or icon that is
  44 | # included in the documentation. The maximum height of the logo should not
  45 | # exceed 55 pixels and the maximum width should not exceed 200 pixels.
  46 | # Doxygen will copy the logo to the output directory.
  47 | 
  48 | PROJECT_LOGO           =
  49 | 
  50 | # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
  51 | # base path where the generated documentation will be put.
  52 | # If a relative path is entered, it will be relative to the location
  53 | # where doxygen was started. If left blank the current directory will be used.
  54 | 
  55 | OUTPUT_DIRECTORY       = "."
  56 | 
  57 | # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
  58 | # 4096 sub-directories (in 2 levels) under the output directory of each output
  59 | # format and will distribute the generated files over these directories.
  60 | # Enabling this option can be useful when feeding doxygen a huge amount of
  61 | # source files, where putting all generated files in the same directory would
  62 | # otherwise cause performance problems for the file system.
  63 | 
  64 | CREATE_SUBDIRS         = NO
  65 | 
  66 | # The OUTPUT_LANGUAGE tag is used to specify the language in which all
  67 | # documentation generated by doxygen is written. Doxygen will use this
  68 | # information to generate all constant output in the proper language.
  69 | # The default language is English, other supported languages are:
  70 | # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
  71 | # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
  72 | # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
  73 | # messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
  74 | # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak,
  75 | # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
  76 | 
  77 | OUTPUT_LANGUAGE        = English
  78 | 
  79 | # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
  80 | # include brief member descriptions after the members that are listed in
  81 | # the file and class documentation (similar to JavaDoc).
  82 | # Set to NO to disable this.
  83 | 
  84 | BRIEF_MEMBER_DESC      = YES
  85 | 
  86 | # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
  87 | # the brief description of a member or function before the detailed description.
  88 | # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
  89 | # brief descriptions will be completely suppressed.
  90 | 
  91 | REPEAT_BRIEF           = YES
  92 | 
  93 | # This tag implements a quasi-intelligent brief description abbreviator
  94 | # that is used to form the text in various listings. Each string
  95 | # in this list, if found as the leading text of the brief description, will be
  96 | # stripped from the text and the result after processing the whole list, is
  97 | # used as the annotated text. Otherwise, the brief description is used as-is.
  98 | # If left blank, the following values are used ("$name" is automatically
  99 | # replaced with the name of the entity): "The $name class" "The $name widget"
 100 | # "The $name file" "is" "provides" "specifies" "contains"
 101 | # "represents" "a" "an" "the"
 102 | 
 103 | ABBREVIATE_BRIEF       =
 104 | 
 105 | # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
 106 | # Doxygen will generate a detailed section even if there is only a brief
 107 | # description.
 108 | 
 109 | ALWAYS_DETAILED_SEC    = NO
 110 | 
 111 | # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
 112 | # inherited members of a class in the documentation of that class as if those
 113 | # members were ordinary class members. Constructors, destructors and assignment
 114 | # operators of the base classes will not be shown.
 115 | 
 116 | INLINE_INHERITED_MEMB  = NO
 117 | 
 118 | # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
 119 | # path before files name in the file list and in the header files. If set
 120 | # to NO the shortest path that makes the file name unique will be used.
 121 | 
 122 | FULL_PATH_NAMES        = YES
 123 | 
 124 | # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
 125 | # can be used to strip a user-defined part of the path. Stripping is
 126 | # only done if one of the specified strings matches the left-hand part of
 127 | # the path. The tag can be used to show relative paths in the file list.
 128 | # If left blank the directory from which doxygen is run is used as the
 129 | # path to strip.
 130 | 
 131 | STRIP_FROM_PATH        =
 132 | 
 133 | # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
 134 | # the path mentioned in the documentation of a class, which tells
 135 | # the reader which header file to include in order to use a class.
 136 | # If left blank only the name of the header file containing the class
 137 | # definition is used. Otherwise one should specify the include paths that
 138 | # are normally passed to the compiler using the -I flag.
 139 | 
 140 | STRIP_FROM_INC_PATH    =
 141 | 
 142 | # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
 143 | # (but less readable) file names. This can be useful if your file system
 144 | # doesn't support long names like on DOS, Mac, or CD-ROM.
 145 | 
 146 | SHORT_NAMES            = NO
 147 | 
 148 | # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
 149 | # will interpret the first line (until the first dot) of a JavaDoc-style
 150 | # comment as the brief description. If set to NO, the JavaDoc
 151 | # comments will behave just like regular Qt-style comments
 152 | # (thus requiring an explicit @brief command for a brief description.)
 153 | 
 154 | JAVADOC_AUTOBRIEF      = NO
 155 | 
 156 | # If the QT_AUTOBRIEF tag is set to YES then Doxygen will
 157 | # interpret the first line (until the first dot) of a Qt-style
 158 | # comment as the brief description. If set to NO, the comments
 159 | # will behave just like regular Qt-style comments (thus requiring
 160 | # an explicit \brief command for a brief description.)
 161 | 
 162 | QT_AUTOBRIEF           = NO
 163 | 
 164 | # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
 165 | # treat a multi-line C++ special comment block (i.e. a block of //! or ///
 166 | # comments) as a brief description. This used to be the default behaviour.
 167 | # The new default is to treat a multi-line C++ comment block as a detailed
 168 | # description. Set this tag to YES if you prefer the old behaviour instead.
 169 | 
 170 | MULTILINE_CPP_IS_BRIEF = NO
 171 | 
 172 | # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
 173 | # member inherits the documentation from any documented member that it
 174 | # re-implements.
 175 | 
 176 | INHERIT_DOCS           = YES
 177 | 
 178 | # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
 179 | # a new page for each member. If set to NO, the documentation of a member will
 180 | # be part of the file/class/namespace that contains it.
 181 | 
 182 | SEPARATE_MEMBER_PAGES  = NO
 183 | 
 184 | # The TAB_SIZE tag can be used to set the number of spaces in a tab.
 185 | # Doxygen uses this value to replace tabs by spaces in code fragments.
 186 | 
 187 | TAB_SIZE               = 8
 188 | 
 189 | # This tag can be used to specify a number of aliases that acts
 190 | # as commands in the documentation. An alias has the form "name=value".
 191 | # For example adding "sideeffect=\par Side Effects:\n" will allow you to
 192 | # put the command \sideeffect (or @sideeffect) in the documentation, which
 193 | # will result in a user-defined paragraph with heading "Side Effects:".
 194 | # You can put \n's in the value part of an alias to insert newlines.
 195 | 
 196 | ALIASES                =
 197 | 
 198 | # This tag can be used to specify a number of word-keyword mappings (TCL only).
 199 | # A mapping has the form "name=value". For example adding
 200 | # "class=itcl::class" will allow you to use the command class in the
 201 | # itcl::class meaning.
 202 | 
 203 | TCL_SUBST              =
 204 | 
 205 | # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
 206 | # sources only. Doxygen will then generate output that is more tailored for C.
 207 | # For instance, some of the names that are used will be different. The list
 208 | # of all members will be omitted, etc.
 209 | 
 210 | OPTIMIZE_OUTPUT_FOR_C  = NO
 211 | 
 212 | # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
 213 | # sources only. Doxygen will then generate output that is more tailored for
 214 | # Java. For instance, namespaces will be presented as packages, qualified
 215 | # scopes will look different, etc.
 216 | 
 217 | OPTIMIZE_OUTPUT_JAVA   = NO
 218 | 
 219 | # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
 220 | # sources only. Doxygen will then generate output that is more tailored for
 221 | # Fortran.
 222 | 
 223 | OPTIMIZE_FOR_FORTRAN   = NO
 224 | 
 225 | # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
 226 | # sources. Doxygen will then generate output that is tailored for
 227 | # VHDL.
 228 | 
 229 | OPTIMIZE_OUTPUT_VHDL   = NO
 230 | 
 231 | # Doxygen selects the parser to use depending on the extension of the files it
 232 | # parses. With this tag you can assign which parser to use for a given extension.
 233 | # Doxygen has a built-in mapping, but you can override or extend it using this
 234 | # tag. The format is ext=language, where ext is a file extension, and language
 235 | # is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C,
 236 | # C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make
 237 | # doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
 238 | # (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions
 239 | # you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
 240 | 
 241 | EXTENSION_MAPPING      =
 242 | 
 243 | # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
 244 | # to include (a tag file for) the STL sources as input, then you should
 245 | # set this tag to YES in order to let doxygen match functions declarations and
 246 | # definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
 247 | # func(std::string) {}). This also makes the inheritance and collaboration
 248 | # diagrams that involve STL classes more complete and accurate.
 249 | 
 250 | BUILTIN_STL_SUPPORT    = YES
 251 | 
 252 | # If you use Microsoft's C++/CLI language, you should set this option to YES to
 253 | # enable parsing support.
 254 | 
 255 | CPP_CLI_SUPPORT        = NO
 256 | 
 257 | # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
 258 | # Doxygen will parse them like normal C++ but will assume all classes use public
 259 | # instead of private inheritance when no explicit protection keyword is present.
 260 | 
 261 | SIP_SUPPORT            = NO
 262 | 
 263 | # For Microsoft's IDL there are propget and propput attributes to indicate getter
 264 | # and setter methods for a property. Setting this option to YES (the default)
 265 | # will make doxygen replace the get and set methods by a property in the
 266 | # documentation. This will only work if the methods are indeed getting or
 267 | # setting a simple type. If this is not the case, or you want to show the
 268 | # methods anyway, you should set this option to NO.
 269 | 
 270 | IDL_PROPERTY_SUPPORT   = YES
 271 | 
 272 | # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
 273 | # tag is set to YES, then doxygen will reuse the documentation of the first
 274 | # member in the group (if any) for the other members of the group. By default
 275 | # all members of a group must be documented explicitly.
 276 | 
 277 | DISTRIBUTE_GROUP_DOC   = NO
 278 | 
 279 | # Set the SUBGROUPING tag to YES (the default) to allow class member groups of
 280 | # the same type (for instance a group of public functions) to be put as a
 281 | # subgroup of that type (e.g. under the Public Functions section). Set it to
 282 | # NO to prevent subgrouping. Alternatively, this can be done per class using
 283 | # the \nosubgrouping command.
 284 | 
 285 | SUBGROUPING            = YES
 286 | 
 287 | # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
 288 | # unions are shown inside the group in which they are included (e.g. using
 289 | # @ingroup) instead of on a separate page (for HTML and Man pages) or
 290 | # section (for LaTeX and RTF).
 291 | 
 292 | INLINE_GROUPED_CLASSES = NO
 293 | 
 294 | # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and
 295 | # unions with only public data fields will be shown inline in the documentation
 296 | # of the scope in which they are defined (i.e. file, namespace, or group
 297 | # documentation), provided this scope is documented. If set to NO (the default),
 298 | # structs, classes, and unions are shown on a separate page (for HTML and Man
 299 | # pages) or section (for LaTeX and RTF).
 300 | 
 301 | INLINE_SIMPLE_STRUCTS  = NO
 302 | 
 303 | # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
 304 | # is documented as struct, union, or enum with the name of the typedef. So
 305 | # typedef struct TypeS {} TypeT, will appear in the documentation as a struct
 306 | # with name TypeT. When disabled the typedef will appear as a member of a file,
 307 | # namespace, or class. And the struct will be named TypeS. This can typically
 308 | # be useful for C code in case the coding convention dictates that all compound
 309 | # types are typedef'ed and only the typedef is referenced, never the tag name.
 310 | 
 311 | TYPEDEF_HIDES_STRUCT   = NO
 312 | 
 313 | # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
 314 | # determine which symbols to keep in memory and which to flush to disk.
 315 | # When the cache is full, less often used symbols will be written to disk.
 316 | # For small to medium size projects (<1000 input files) the default value is
 317 | # probably good enough. For larger projects a too small cache size can cause
 318 | # doxygen to be busy swapping symbols to and from disk most of the time
 319 | # causing a significant performance penalty.
 320 | # If the system has enough physical memory increasing the cache will improve the
 321 | # performance by keeping more symbols in memory. Note that the value works on
 322 | # a logarithmic scale so increasing the size by one will roughly double the
 323 | # memory usage. The cache size is given by this formula:
 324 | # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
 325 | # corresponding to a cache size of 2^16 = 65536 symbols
 326 | 
 327 | SYMBOL_CACHE_SIZE      = 0
 328 | 
 329 | #---------------------------------------------------------------------------
 330 | # Build related configuration options
 331 | #---------------------------------------------------------------------------
 332 | 
 333 | # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
 334 | # documentation are documented, even if no documentation was available.
 335 | # Private class members and static file members will be hidden unless
 336 | # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
 337 | 
 338 | EXTRACT_ALL            = YES
 339 | 
 340 | # If the EXTRACT_PRIVATE tag is set to YES all private members of a class
 341 | # will be included in the documentation.
 342 | 
 343 | EXTRACT_PRIVATE        = YES
 344 | 
 345 | # If the EXTRACT_STATIC tag is set to YES all static members of a file
 346 | # will be included in the documentation.
 347 | 
 348 | EXTRACT_STATIC         = YES
 349 | 
 350 | # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
 351 | # defined locally in source files will be included in the documentation.
 352 | # If set to NO only classes defined in header files are included.
 353 | 
 354 | EXTRACT_LOCAL_CLASSES  = YES
 355 | 
 356 | # This flag is only useful for Objective-C code. When set to YES local
 357 | # methods, which are defined in the implementation section but not in
 358 | # the interface are included in the documentation.
 359 | # If set to NO (the default) only methods in the interface are included.
 360 | 
 361 | EXTRACT_LOCAL_METHODS  = NO
 362 | 
 363 | # If this flag is set to YES, the members of anonymous namespaces will be
 364 | # extracted and appear in the documentation as a namespace called
 365 | # 'anonymous_namespace{file}', where file will be replaced with the base
 366 | # name of the file that contains the anonymous namespace. By default
 367 | # anonymous namespaces are hidden.
 368 | 
 369 | EXTRACT_ANON_NSPACES   = NO
 370 | 
 371 | # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
 372 | # undocumented members of documented classes, files or namespaces.
 373 | # If set to NO (the default) these members will be included in the
 374 | # various overviews, but no documentation section is generated.
 375 | # This option has no effect if EXTRACT_ALL is enabled.
 376 | 
 377 | HIDE_UNDOC_MEMBERS     = NO
 378 | 
 379 | # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
 380 | # undocumented classes that are normally visible in the class hierarchy.
 381 | # If set to NO (the default) these classes will be included in the various
 382 | # overviews. This option has no effect if EXTRACT_ALL is enabled.
 383 | 
 384 | HIDE_UNDOC_CLASSES     = NO
 385 | 
 386 | # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
 387 | # friend (class|struct|union) declarations.
 388 | # If set to NO (the default) these declarations will be included in the
 389 | # documentation.
 390 | 
 391 | HIDE_FRIEND_COMPOUNDS  = NO
 392 | 
 393 | # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
 394 | # documentation blocks found inside the body of a function.
 395 | # If set to NO (the default) these blocks will be appended to the
 396 | # function's detailed documentation block.
 397 | 
 398 | HIDE_IN_BODY_DOCS      = NO
 399 | 
 400 | # The INTERNAL_DOCS tag determines if documentation
 401 | # that is typed after a \internal command is included. If the tag is set
 402 | # to NO (the default) then the documentation will be excluded.
 403 | # Set it to YES to include the internal documentation.
 404 | 
 405 | INTERNAL_DOCS          = NO
 406 | 
 407 | # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
 408 | # file names in lower-case letters. If set to YES upper-case letters are also
 409 | # allowed. This is useful if you have classes or files whose names only differ
 410 | # in case and if your file system supports case sensitive file names. Windows
 411 | # and Mac users are advised to set this option to NO.
 412 | 
 413 | CASE_SENSE_NAMES       = NO
 414 | 
 415 | # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
 416 | # will show members with their full class and namespace scopes in the
 417 | # documentation. If set to YES the scope will be hidden.
 418 | 
 419 | HIDE_SCOPE_NAMES       = NO
 420 | 
 421 | # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
 422 | # will put a list of the files that are included by a file in the documentation
 423 | # of that file.
 424 | 
 425 | SHOW_INCLUDE_FILES     = YES
 426 | 
 427 | # If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
 428 | # will list include files with double quotes in the documentation
 429 | # rather than with sharp brackets.
 430 | 
 431 | FORCE_LOCAL_INCLUDES   = NO
 432 | 
 433 | # If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
 434 | # is inserted in the documentation for inline members.
 435 | 
 436 | INLINE_INFO            = YES
 437 | 
 438 | # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
 439 | # will sort the (detailed) documentation of file and class members
 440 | # alphabetically by member name. If set to NO the members will appear in
 441 | # declaration order.
 442 | 
 443 | SORT_MEMBER_DOCS       = YES
 444 | 
 445 | # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
 446 | # brief documentation of file, namespace and class members alphabetically
 447 | # by member name. If set to NO (the default) the members will appear in
 448 | # declaration order.
 449 | 
 450 | SORT_BRIEF_DOCS        = NO
 451 | 
 452 | # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
 453 | # will sort the (brief and detailed) documentation of class members so that
 454 | # constructors and destructors are listed first. If set to NO (the default)
 455 | # the constructors will appear in the respective orders defined by
 456 | # SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
 457 | # This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
 458 | # and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
 459 | 
 460 | SORT_MEMBERS_CTORS_1ST = NO
 461 | 
 462 | # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
 463 | # hierarchy of group names into alphabetical order. If set to NO (the default)
 464 | # the group names will appear in their defined order.
 465 | 
 466 | SORT_GROUP_NAMES       = NO
 467 | 
 468 | # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
 469 | # sorted by fully-qualified names, including namespaces. If set to
 470 | # NO (the default), the class list will be sorted only by class name,
 471 | # not including the namespace part.
 472 | # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
 473 | # Note: This option applies only to the class list, not to the
 474 | # alphabetical list.
 475 | 
 476 | SORT_BY_SCOPE_NAME     = NO
 477 | 
 478 | # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
 479 | # do proper type resolution of all parameters of a function it will reject a
 480 | # match between the prototype and the implementation of a member function even
 481 | # if there is only one candidate or it is obvious which candidate to choose
 482 | # by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
 483 | # will still accept a match between prototype and implementation in such cases.
 484 | 
 485 | STRICT_PROTO_MATCHING  = NO
 486 | 
 487 | # The GENERATE_TODOLIST tag can be used to enable (YES) or
 488 | # disable (NO) the todo list. This list is created by putting \todo
 489 | # commands in the documentation.
 490 | 
 491 | GENERATE_TODOLIST      = YES
 492 | 
 493 | # The GENERATE_TESTLIST tag can be used to enable (YES) or
 494 | # disable (NO) the test list. This list is created by putting \test
 495 | # commands in the documentation.
 496 | 
 497 | GENERATE_TESTLIST      = YES
 498 | 
 499 | # The GENERATE_BUGLIST tag can be used to enable (YES) or
 500 | # disable (NO) the bug list. This list is created by putting \bug
 501 | # commands in the documentation.
 502 | 
 503 | GENERATE_BUGLIST       = YES
 504 | 
 505 | # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
 506 | # disable (NO) the deprecated list. This list is created by putting
 507 | # \deprecated commands in the documentation.
 508 | 
 509 | GENERATE_DEPRECATEDLIST= YES
 510 | 
 511 | # The ENABLED_SECTIONS tag can be used to enable conditional
 512 | # documentation sections, marked by \if sectionname ... \endif.
 513 | 
 514 | ENABLED_SECTIONS       =
 515 | 
 516 | # The MAX_INITIALIZER_LINES tag determines the maximum number of lines
 517 | # the initial value of a variable or macro consists of for it to appear in
 518 | # the documentation. If the initializer consists of more lines than specified
 519 | # here it will be hidden. Use a value of 0 to hide initializers completely.
 520 | # The appearance of the initializer of individual variables and macros in the
 521 | # documentation can be controlled using \showinitializer or \hideinitializer
 522 | # command in the documentation regardless of this setting.
 523 | 
 524 | MAX_INITIALIZER_LINES  = 30
 525 | 
 526 | # Set the SHOW_USED_FILES tag to NO to disable the list of files generated
 527 | # at the bottom of the documentation of classes and structs. If set to YES the
 528 | # list will mention the files that were used to generate the documentation.
 529 | 
 530 | SHOW_USED_FILES        = YES
 531 | 
 532 | # If the sources in your project are distributed over multiple directories
 533 | # then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
 534 | # in the documentation. The default is NO.
 535 | 
 536 | SHOW_DIRECTORIES       = NO
 537 | 
 538 | # Set the SHOW_FILES tag to NO to disable the generation of the Files page.
 539 | # This will remove the Files entry from the Quick Index and from the
 540 | # Folder Tree View (if specified). The default is YES.
 541 | 
 542 | SHOW_FILES             = YES
 543 | 
 544 | # Set the SHOW_NAMESPACES tag to NO to disable the generation of the
 545 | # Namespaces page.
 546 | # This will remove the Namespaces entry from the Quick Index
 547 | # and from the Folder Tree View (if specified). The default is YES.
 548 | 
 549 | SHOW_NAMESPACES        = YES
 550 | 
 551 | # The FILE_VERSION_FILTER tag can be used to specify a program or script that
 552 | # doxygen should invoke to get the current version for each file (typically from
 553 | # the version control system). Doxygen will invoke the program by executing (via
 554 | # popen()) the command <command> <input-file>, where <command> is the value of
 555 | # the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
 556 | # provided by doxygen. Whatever the program writes to standard output
 557 | # is used as the file version. See the manual for examples.
 558 | 
 559 | FILE_VERSION_FILTER    =
 560 | 
 561 | # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
 562 | # by doxygen. The layout file controls the global structure of the generated
 563 | # output files in an output format independent way. The create the layout file
 564 | # that represents doxygen's defaults, run doxygen with the -l option.
 565 | # You can optionally specify a file name after the option, if omitted
 566 | # DoxygenLayout.xml will be used as the name of the layout file.
 567 | 
 568 | LAYOUT_FILE            =
 569 | 
 570 | # The CITE_BIB_FILES tag can be used to specify one or more bib files
 571 | # containing the references data. This must be a list of .bib files. The
 572 | # .bib extension is automatically appended if omitted. Using this command
 573 | # requires the bibtex tool to be installed. See also
 574 | # http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style
 575 | # of the bibliography can be controlled using LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the search path.
 576 | 
 577 | CITE_BIB_FILES         =
 578 | 
 579 | #---------------------------------------------------------------------------
 580 | # configuration options related to warning and progress messages
 581 | #---------------------------------------------------------------------------
 582 | 
 583 | # The QUIET tag can be used to turn on/off the messages that are generated
 584 | # by doxygen. Possible values are YES and NO. If left blank NO is used.
 585 | 
 586 | QUIET                  = NO
 587 | 
 588 | # The WARNINGS tag can be used to turn on/off the warning messages that are
 589 | # generated by doxygen. Possible values are YES and NO. If left blank
 590 | # NO is used.
 591 | 
 592 | WARNINGS               = YES
 593 | 
 594 | # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
 595 | # for undocumented members. If EXTRACT_ALL is set to YES then this flag will
 596 | # automatically be disabled.
 597 | 
 598 | WARN_IF_UNDOCUMENTED   = YES
 599 | 
 600 | # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
 601 | # potential errors in the documentation, such as not documenting some
 602 | # parameters in a documented function, or documenting parameters that
 603 | # don't exist or using markup commands wrongly.
 604 | 
 605 | WARN_IF_DOC_ERROR      = YES
 606 | 
 607 | # The WARN_NO_PARAMDOC option can be enabled to get warnings for
 608 | # functions that are documented, but have no documentation for their parameters
 609 | # or return value. If set to NO (the default) doxygen will only warn about
 610 | # wrong or incomplete parameter documentation, but not about the absence of
 611 | # documentation.
 612 | 
 613 | WARN_NO_PARAMDOC       = NO
 614 | 
 615 | # The WARN_FORMAT tag determines the format of the warning messages that
 616 | # doxygen can produce. The string should contain the $file, $line, and $text
 617 | # tags, which will be replaced by the file and line number from which the
 618 | # warning originated and the warning text. Optionally the format may contain
 619 | # $version, which will be replaced by the version of the file (if it could
 620 | # be obtained via FILE_VERSION_FILTER)
 621 | 
 622 | WARN_FORMAT            = "$file:$line: $text"
 623 | 
 624 | # The WARN_LOGFILE tag can be used to specify a file to which warning
 625 | # and error messages should be written. If left blank the output is written
 626 | # to stderr.
 627 | 
 628 | WARN_LOGFILE           =
 629 | 
 630 | #---------------------------------------------------------------------------
 631 | # configuration options related to the input files
 632 | #---------------------------------------------------------------------------
 633 | 
 634 | # The INPUT tag can be used to specify the files and/or directories that contain
 635 | # documented source files. You may enter file names like "myfile.cpp" or
 636 | # directories like "/usr/src/myproject". Separate the files or directories
 637 | # with spaces.
 638 | 
 639 | INPUT                  = 
 640 | 
 641 | # This tag can be used to specify the character encoding of the source files
 642 | # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
 643 | # also the default input encoding. Doxygen uses libiconv (or the iconv built
 644 | # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
 645 | # the list of possible encodings.
 646 | 
 647 | INPUT_ENCODING         = UTF-8
 648 | 
 649 | # If the value of the INPUT tag contains directories, you can use the
 650 | # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
 651 | # and *.h) to filter out the source-files in the directories. If left
 652 | # blank the following patterns are tested:
 653 | # *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
 654 | # *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
 655 | # *.f90 *.f *.for *.vhd *.vhdl
 656 | 
 657 | FILE_PATTERNS          =
 658 | 
 659 | # The RECURSIVE tag can be used to turn specify whether or not subdirectories
 660 | # should be searched for input files as well. Possible values are YES and NO.
 661 | # If left blank NO is used.
 662 | 
 663 | RECURSIVE              = YES
 664 | 
 665 | # The EXCLUDE tag can be used to specify files and/or directories that should be
 666 | # excluded from the INPUT source files. This way you can easily exclude a
 667 | # subdirectory from a directory tree whose root is specified with the INPUT tag.
 668 | # Note that relative paths are relative to the directory from which doxygen is
 669 | # run.
 670 | 
 671 | EXCLUDE                = "doc/examples"
 672 | 
 673 | # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 674 | # directories that are symbolic links (a Unix file system feature) are excluded
 675 | # from the input.
 676 | 
 677 | EXCLUDE_SYMLINKS       = NO
 678 | 
 679 | # If the value of the INPUT tag contains directories, you can use the
 680 | # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
 681 | # certain files from those directories. Note that the wildcards are matched
 682 | # against the file with absolute path, so to exclude all test directories
 683 | # for example use the pattern */test/*
 684 | 
 685 | EXCLUDE_PATTERNS       =
 686 | 
 687 | # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
 688 | # (namespaces, classes, functions, etc.) that should be excluded from the
 689 | # output. The symbol name can be a fully qualified name, a word, or if the
 690 | # wildcard * is used, a substring. Examples: ANamespace, AClass,
 691 | # AClass::ANamespace, ANamespace::*Test
 692 | 
 693 | EXCLUDE_SYMBOLS        =
 694 | 
 695 | # The EXAMPLE_PATH tag can be used to specify one or more files or
 696 | # directories that contain example code fragments that are included (see
 697 | # the \include command).
 698 | 
 699 | EXAMPLE_PATH           = "doc/examples"
 700 | 
 701 | # If the value of the EXAMPLE_PATH tag contains directories, you can use the
 702 | # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
 703 | # and *.h) to filter out the source-files in the directories. If left
 704 | # blank all files are included.
 705 | 
 706 | EXAMPLE_PATTERNS       =
 707 | 
 708 | # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
 709 | # searched for input files to be used with the \include or \dontinclude
 710 | # commands irrespective of the value of the RECURSIVE tag.
 711 | # Possible values are YES and NO. If left blank NO is used.
 712 | 
 713 | EXAMPLE_RECURSIVE      = YES
 714 | 
 715 | # The IMAGE_PATH tag can be used to specify one or more files or
 716 | # directories that contain image that are included in the documentation (see
 717 | # the \image command).
 718 | 
 719 | IMAGE_PATH             = "docs/pictures"
 720 | 
 721 | # The INPUT_FILTER tag can be used to specify a program that doxygen should
 722 | # invoke to filter for each input file. Doxygen will invoke the filter program
 723 | # by executing (via popen()) the command <filter> <input-file>, where <filter>
 724 | # is the value of the INPUT_FILTER tag, and <input-file> is the name of an
 725 | # input file. Doxygen will then use the output that the filter program writes
 726 | # to standard output.
 727 | # If FILTER_PATTERNS is specified, this tag will be
 728 | # ignored.
 729 | 
 730 | INPUT_FILTER           =
 731 | 
 732 | # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
 733 | # basis.
 734 | # Doxygen will compare the file name with each pattern and apply the
 735 | # filter if there is a match.
 736 | # The filters are a list of the form:
 737 | # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
 738 | # info on how filters are used. If FILTER_PATTERNS is empty or if
 739 | # non of the patterns match the file name, INPUT_FILTER is applied.
 740 | 
 741 | FILTER_PATTERNS        =
 742 | 
 743 | # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
 744 | # INPUT_FILTER) will be used to filter the input files when producing source
 745 | # files to browse (i.e. when SOURCE_BROWSER is set to YES).
 746 | 
 747 | FILTER_SOURCE_FILES    = NO
 748 | 
 749 | # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
 750 | # pattern. A pattern will override the setting for FILTER_PATTERN (if any)
 751 | # and it is also possible to disable source filtering for a specific pattern
 752 | # using *.ext= (so without naming a filter). This option only has effect when
 753 | # FILTER_SOURCE_FILES is enabled.
 754 | 
 755 | FILTER_SOURCE_PATTERNS =
 756 | 
 757 | #---------------------------------------------------------------------------
 758 | # configuration options related to source browsing
 759 | #---------------------------------------------------------------------------
 760 | 
 761 | # If the SOURCE_BROWSER tag is set to YES then a list of source files will
 762 | # be generated. Documented entities will be cross-referenced with these sources.
 763 | # Note: To get rid of all source code in the generated output, make sure also
 764 | # VERBATIM_HEADERS is set to NO.
 765 | 
 766 | SOURCE_BROWSER         = NO
 767 | 
 768 | # Setting the INLINE_SOURCES tag to YES will include the body
 769 | # of functions and classes directly in the documentation.
 770 | 
 771 | INLINE_SOURCES         = NO
 772 | 
 773 | # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
 774 | # doxygen to hide any special comment blocks from generated source code
 775 | # fragments. Normal C and C++ comments will always remain visible.
 776 | 
 777 | STRIP_CODE_COMMENTS    = YES
 778 | 
 779 | # If the REFERENCED_BY_RELATION tag is set to YES
 780 | # then for each documented function all documented
 781 | # functions referencing it will be listed.
 782 | 
 783 | REFERENCED_BY_RELATION = NO
 784 | 
 785 | # If the REFERENCES_RELATION tag is set to YES
 786 | # then for each documented function all documented entities
 787 | # called/used by that function will be listed.
 788 | 
 789 | REFERENCES_RELATION    = NO
 790 | 
 791 | # If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
 792 | # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
 793 | # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
 794 | # link to the source code.
 795 | # Otherwise they will link to the documentation.
 796 | 
 797 | REFERENCES_LINK_SOURCE = YES
 798 | 
 799 | # If the USE_HTAGS tag is set to YES then the references to source code
 800 | # will point to the HTML generated by the htags(1) tool instead of doxygen
 801 | # built-in source browser. The htags tool is part of GNU's global source
 802 | # tagging system (see http://www.gnu.org/software/global/global.html). You
 803 | # will need version 4.8.6 or higher.
 804 | 
 805 | USE_HTAGS              = NO
 806 | 
 807 | # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
 808 | # will generate a verbatim copy of the header file for each class for
 809 | # which an include is specified. Set to NO to disable this.
 810 | 
 811 | VERBATIM_HEADERS       = YES
 812 | 
 813 | #---------------------------------------------------------------------------
 814 | # configuration options related to the alphabetical class index
 815 | #---------------------------------------------------------------------------
 816 | 
 817 | # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
 818 | # of all compounds will be generated. Enable this if the project
 819 | # contains a lot of classes, structs, unions or interfaces.
 820 | 
 821 | ALPHABETICAL_INDEX     = YES
 822 | 
 823 | # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
 824 | # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
 825 | # in which this list will be split (can be a number in the range [1..20])
 826 | 
 827 | COLS_IN_ALPHA_INDEX    = 5
 828 | 
 829 | # In case all classes in a project start with a common prefix, all
 830 | # classes will be put under the same header in the alphabetical index.
 831 | # The IGNORE_PREFIX tag can be used to specify one or more prefixes that
 832 | # should be ignored while generating the index headers.
 833 | 
 834 | IGNORE_PREFIX          =
 835 | 
 836 | #---------------------------------------------------------------------------
 837 | # configuration options related to the HTML output
 838 | #---------------------------------------------------------------------------
 839 | 
 840 | # If the GENERATE_HTML tag is set to YES (the default) Doxygen will
 841 | # generate HTML output.
 842 | 
 843 | GENERATE_HTML          = YES
 844 | 
 845 | # The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
 846 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
 847 | # put in front of it. If left blank `html' will be used as the default path.
 848 | 
 849 | HTML_OUTPUT            = html
 850 | 
 851 | # The HTML_FILE_EXTENSION tag can be used to specify the file extension for
 852 | # each generated HTML page (for example: .htm,.php,.asp). If it is left blank
 853 | # doxygen will generate files with .html extension.
 854 | 
 855 | HTML_FILE_EXTENSION    = .html
 856 | 
 857 | # The HTML_HEADER tag can be used to specify a personal HTML header for
 858 | # each generated HTML page. If it is left blank doxygen will generate a
 859 | # standard header. Note that when using a custom header you are responsible
 860 | #  for the proper inclusion of any scripts and style sheets that doxygen
 861 | # needs, which is dependent on the configuration options used.
 862 | # It is advised to generate a default header using "doxygen -w html
 863 | # header.html footer.html stylesheet.css YourConfigFile" and then modify
 864 | # that header. Note that the header is subject to change so you typically
 865 | # have to redo this when upgrading to a newer version of doxygen or when
 866 | # changing the value of configuration settings such as GENERATE_TREEVIEW!
 867 | 
 868 | HTML_HEADER            =
 869 | 
 870 | # The HTML_FOOTER tag can be used to specify a personal HTML footer for
 871 | # each generated HTML page. If it is left blank doxygen will generate a
 872 | # standard footer.
 873 | 
 874 | HTML_FOOTER            =
 875 | 
 876 | # The HTML_STYLESHEET tag can be used to specify a user-defined cascading
 877 | # style sheet that is used by each HTML page. It can be used to
 878 | # fine-tune the look of the HTML output. If the tag is left blank doxygen
 879 | # will generate a default style sheet. Note that doxygen will try to copy
 880 | # the style sheet file to the HTML output directory, so don't put your own
 881 | # style sheet in the HTML output directory as well, or it will be erased!
 882 | 
 883 | HTML_STYLESHEET        =
 884 | 
 885 | # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
 886 | # other source files which should be copied to the HTML output directory. Note
 887 | # that these files will be copied to the base HTML output directory. Use the
 888 | # $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
 889 | # files. In the HTML_STYLESHEET file, use the file name only. Also note that
 890 | # the files will be copied as-is; there are no commands or markers available.
 891 | 
 892 | HTML_EXTRA_FILES       =
 893 | 
 894 | # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
 895 | # Doxygen will adjust the colors in the style sheet and background images
 896 | # according to this color. Hue is specified as an angle on a colorwheel,
 897 | # see http://en.wikipedia.org/wiki/Hue for more information.
 898 | # For instance the value 0 represents red, 60 is yellow, 120 is green,
 899 | # 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
 900 | # The allowed range is 0 to 359.
 901 | 
 902 | HTML_COLORSTYLE_HUE    = 220
 903 | 
 904 | # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
 905 | # the colors in the HTML output. For a value of 0 the output will use
 906 | # grayscales only. A value of 255 will produce the most vivid colors.
 907 | 
 908 | HTML_COLORSTYLE_SAT    = 100
 909 | 
 910 | # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
 911 | # the luminance component of the colors in the HTML output. Values below
 912 | # 100 gradually make the output lighter, whereas values above 100 make
 913 | # the output darker. The value divided by 100 is the actual gamma applied,
 914 | # so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
 915 | # and 100 does not change the gamma.
 916 | 
 917 | HTML_COLORSTYLE_GAMMA  = 80
 918 | 
 919 | # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
 920 | # page will contain the date and time when the page was generated. Setting
 921 | # this to NO can help when comparing the output of multiple runs.
 922 | 
 923 | HTML_TIMESTAMP         = YES
 924 | 
 925 | # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
 926 | # files or namespaces will be aligned in HTML using tables. If set to
 927 | # NO a bullet list will be used.
 928 | 
 929 | HTML_ALIGN_MEMBERS     = YES
 930 | 
 931 | # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
 932 | # documentation will contain sections that can be hidden and shown after the
 933 | # page has loaded. For this to work a browser that supports
 934 | # JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
 935 | # Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
 936 | 
 937 | HTML_DYNAMIC_SECTIONS  = NO
 938 | 
 939 | # If the GENERATE_DOCSET tag is set to YES, additional index files
 940 | # will be generated that can be used as input for Apple's Xcode 3
 941 | # integrated development environment, introduced with OSX 10.5 (Leopard).
 942 | # To create a documentation set, doxygen will generate a Makefile in the
 943 | # HTML output directory. Running make will produce the docset in that
 944 | # directory and running "make install" will install the docset in
 945 | # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
 946 | # it at startup.
 947 | # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
 948 | # for more information.
 949 | 
 950 | GENERATE_DOCSET        = NO
 951 | 
 952 | # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
 953 | # feed. A documentation feed provides an umbrella under which multiple
 954 | # documentation sets from a single provider (such as a company or product suite)
 955 | # can be grouped.
 956 | 
 957 | DOCSET_FEEDNAME        = "Doxygen generated docs"
 958 | 
 959 | # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
 960 | # should uniquely identify the documentation set bundle. This should be a
 961 | # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
 962 | # will append .docset to the name.
 963 | 
 964 | DOCSET_BUNDLE_ID       = org.doxygen.Project
 965 | 
 966 | # When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify
 967 | # the documentation publisher. This should be a reverse domain-name style
 968 | # string, e.g. com.mycompany.MyDocSet.documentation.
 969 | 
 970 | DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
 971 | 
 972 | # The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
 973 | 
 974 | DOCSET_PUBLISHER_NAME  = Publisher
 975 | 
 976 | # If the GENERATE_HTMLHELP tag is set to YES, additional index files
 977 | # will be generated that can be used as input for tools like the
 978 | # Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
 979 | # of the generated HTML documentation.
 980 | 
 981 | GENERATE_HTMLHELP      = NO
 982 | 
 983 | # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
 984 | # be used to specify the file name of the resulting .chm file. You
 985 | # can add a path in front of the file if the result should not be
 986 | # written to the html output directory.
 987 | 
 988 | CHM_FILE               =
 989 | 
 990 | # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
 991 | # be used to specify the location (absolute path including file name) of
 992 | # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
 993 | # the HTML help compiler on the generated index.hhp.
 994 | 
 995 | HHC_LOCATION           =
 996 | 
 997 | # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
 998 | # controls if a separate .chi index file is generated (YES) or that
 999 | # it should be included in the master .chm file (NO).
1000 | 
1001 | GENERATE_CHI           = NO
1002 | 
1003 | # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
1004 | # is used to encode HtmlHelp index (hhk), content (hhc) and project file
1005 | # content.
1006 | 
1007 | CHM_INDEX_ENCODING     =
1008 | 
1009 | # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
1010 | # controls whether a binary table of contents is generated (YES) or a
1011 | # normal table of contents (NO) in the .chm file.
1012 | 
1013 | BINARY_TOC             = NO
1014 | 
1015 | # The TOC_EXPAND flag can be set to YES to add extra items for group members
1016 | # to the contents of the HTML help documentation and to the tree view.
1017 | 
1018 | TOC_EXPAND             = NO
1019 | 
1020 | # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
1021 | # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
1022 | # that can be used as input for Qt's qhelpgenerator to generate a
1023 | # Qt Compressed Help (.qch) of the generated HTML documentation.
1024 | 
1025 | GENERATE_QHP           = NO
1026 | 
1027 | # If the QHG_LOCATION tag is specified, the QCH_FILE tag can
1028 | # be used to specify the file name of the resulting .qch file.
1029 | # The path specified is relative to the HTML output folder.
1030 | 
1031 | QCH_FILE               =
1032 | 
1033 | # The QHP_NAMESPACE tag specifies the namespace to use when generating
1034 | # Qt Help Project output. For more information please see
1035 | # http://doc.trolltech.com/qthelpproject.html#namespace
1036 | 
1037 | QHP_NAMESPACE          = org.doxygen.Project
1038 | 
1039 | # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
1040 | # Qt Help Project output. For more information please see
1041 | # http://doc.trolltech.com/qthelpproject.html#virtual-folders
1042 | 
1043 | QHP_VIRTUAL_FOLDER     = doc
1044 | 
1045 | # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
1046 | # add. For more information please see
1047 | # http://doc.trolltech.com/qthelpproject.html#custom-filters
1048 | 
1049 | QHP_CUST_FILTER_NAME   =
1050 | 
1051 | # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
1052 | # custom filter to add. For more information please see
1053 | # <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
1054 | # Qt Help Project / Custom Filters</a>.
1055 | 
1056 | QHP_CUST_FILTER_ATTRS  =
1057 | 
1058 | # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
1059 | # project's
1060 | # filter section matches.
1061 | # <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
1062 | # Qt Help Project / Filter Attributes</a>.
1063 | 
1064 | QHP_SECT_FILTER_ATTRS  =
1065 | 
1066 | # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
1067 | # be used to specify the location of Qt's qhelpgenerator.
1068 | # If non-empty doxygen will try to run qhelpgenerator on the generated
1069 | # .qhp file.
1070 | 
1071 | QHG_LOCATION           =
1072 | 
1073 | # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
1074 | #  will be generated, which together with the HTML files, form an Eclipse help
1075 | # plugin. To install this plugin and make it available under the help contents
1076 | # menu in Eclipse, the contents of the directory containing the HTML and XML
1077 | # files needs to be copied into the plugins directory of eclipse. The name of
1078 | # the directory within the plugins directory should be the same as
1079 | # the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
1080 | # the help appears.
1081 | 
1082 | GENERATE_ECLIPSEHELP   = NO
1083 | 
1084 | # A unique identifier for the eclipse help plugin. When installing the plugin
1085 | # the directory name containing the HTML and XML files should also have
1086 | # this name.
1087 | 
1088 | ECLIPSE_DOC_ID         = org.doxygen.Project
1089 | 
1090 | # The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs)
1091 | # at top of each HTML page. The value NO (the default) enables the index and
1092 | # the value YES disables it. Since the tabs have the same information as the
1093 | # navigation tree you can set this option to NO if you already set
1094 | # GENERATE_TREEVIEW to YES.
1095 | 
1096 | DISABLE_INDEX          = NO
1097 | 
1098 | # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
1099 | # structure should be generated to display hierarchical information.
1100 | # If the tag value is set to YES, a side panel will be generated
1101 | # containing a tree-like index structure (just like the one that
1102 | # is generated for HTML Help). For this to work a browser that supports
1103 | # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
1104 | # Windows users are probably better off using the HTML help feature.
1105 | # Since the tree basically has the same information as the tab index you
1106 | # could consider to set DISABLE_INDEX to NO when enabling this option.
1107 | 
1108 | GENERATE_TREEVIEW      = NO
1109 | 
1110 | # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
1111 | # (range [0,1..20]) that doxygen will group on one line in the generated HTML
1112 | # documentation. Note that a value of 0 will completely suppress the enum
1113 | # values from appearing in the overview section.
1114 | 
1115 | ENUM_VALUES_PER_LINE   = 4
1116 | 
1117 | # By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
1118 | # and Class Hierarchy pages using a tree view instead of an ordered list.
1119 | 
1120 | USE_INLINE_TREES       = NO
1121 | 
1122 | # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
1123 | # used to set the initial width (in pixels) of the frame in which the tree
1124 | # is shown.
1125 | 
1126 | TREEVIEW_WIDTH         = 250
1127 | 
1128 | # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
1129 | # links to external symbols imported via tag files in a separate window.
1130 | 
1131 | EXT_LINKS_IN_WINDOW    = NO
1132 | 
1133 | # Use this tag to change the font size of Latex formulas included
1134 | # as images in the HTML documentation. The default is 10. Note that
1135 | # when you change the font size after a successful doxygen run you need
1136 | # to manually remove any form_*.png images from the HTML output directory
1137 | # to force them to be regenerated.
1138 | 
1139 | FORMULA_FONTSIZE       = 10
1140 | 
1141 | # Use the FORMULA_TRANPARENT tag to determine whether or not the images
1142 | # generated for formulas are transparent PNGs. Transparent PNGs are
1143 | # not supported properly for IE 6.0, but are supported on all modern browsers.
1144 | # Note that when changing this option you need to delete any form_*.png files
1145 | # in the HTML output before the changes have effect.
1146 | 
1147 | FORMULA_TRANSPARENT    = YES
1148 | 
1149 | # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
1150 | # (see http://www.mathjax.org) which uses client side Javascript for the
1151 | # rendering instead of using prerendered bitmaps. Use this if you do not
1152 | # have LaTeX installed or if you want to formulas look prettier in the HTML
1153 | # output. When enabled you also need to install MathJax separately and
1154 | # configure the path to it using the MATHJAX_RELPATH option.
1155 | 
1156 | USE_MATHJAX            = NO
1157 | 
1158 | # When MathJax is enabled you need to specify the location relative to the
1159 | # HTML output directory using the MATHJAX_RELPATH option. The destination
1160 | # directory should contain the MathJax.js script. For instance, if the mathjax
1161 | # directory is located at the same level as the HTML output directory, then
1162 | # MATHJAX_RELPATH should be ../mathjax. The default value points to the
1163 | # mathjax.org site, so you can quickly see the result without installing
1164 | # MathJax, but it is strongly recommended to install a local copy of MathJax
1165 | # before deployment.
1166 | 
1167 | MATHJAX_RELPATH        = http://www.mathjax.org/mathjax
1168 | 
1169 | # The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension
1170 | # names that should be enabled during MathJax rendering.
1171 | 
1172 | MATHJAX_EXTENSIONS     =
1173 | 
1174 | # When the SEARCHENGINE tag is enabled doxygen will generate a search box
1175 | # for the HTML output. The underlying search engine uses javascript
1176 | # and DHTML and should work on any modern browser. Note that when using
1177 | # HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
1178 | # (GENERATE_DOCSET) there is already a search function so this one should
1179 | # typically be disabled. For large projects the javascript based search engine
1180 | # can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
1181 | 
1182 | SEARCHENGINE           = YES
1183 | 
1184 | # When the SERVER_BASED_SEARCH tag is enabled the search engine will be
1185 | # implemented using a PHP enabled web server instead of at the web client
1186 | # using Javascript. Doxygen will generate the search PHP script and index
1187 | # file to put on the web server. The advantage of the server
1188 | # based approach is that it scales better to large projects and allows
1189 | # full text search. The disadvantages are that it is more difficult to setup
1190 | # and does not have live searching capabilities.
1191 | 
1192 | SERVER_BASED_SEARCH    = NO
1193 | 
1194 | #---------------------------------------------------------------------------
1195 | # configuration options related to the LaTeX output
1196 | #---------------------------------------------------------------------------
1197 | 
1198 | # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
1199 | # generate Latex output.
1200 | 
1201 | GENERATE_LATEX         = NO
1202 | 
1203 | # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
1204 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1205 | # put in front of it. If left blank `latex' will be used as the default path.
1206 | 
1207 | LATEX_OUTPUT           = latex
1208 | 
1209 | # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
1210 | # invoked. If left blank `latex' will be used as the default command name.
1211 | # Note that when enabling USE_PDFLATEX this option is only used for
1212 | # generating bitmaps for formulas in the HTML output, but not in the
1213 | # Makefile that is written to the output directory.
1214 | 
1215 | LATEX_CMD_NAME         = latex
1216 | 
1217 | # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
1218 | # generate index for LaTeX. If left blank `makeindex' will be used as the
1219 | # default command name.
1220 | 
1221 | MAKEINDEX_CMD_NAME     = makeindex
1222 | 
1223 | # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
1224 | # LaTeX documents. This may be useful for small projects and may help to
1225 | # save some trees in general.
1226 | 
1227 | COMPACT_LATEX          = NO
1228 | 
1229 | # The PAPER_TYPE tag can be used to set the paper type that is used
1230 | # by the printer. Possible values are: a4, letter, legal and
1231 | # executive. If left blank a4wide will be used.
1232 | 
1233 | PAPER_TYPE             = a4
1234 | 
1235 | # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
1236 | # packages that should be included in the LaTeX output.
1237 | 
1238 | EXTRA_PACKAGES         =
1239 | 
1240 | # The LATEX_HEADER tag can be used to specify a personal LaTeX header for
1241 | # the generated latex document. The header should contain everything until
1242 | # the first chapter. If it is left blank doxygen will generate a
1243 | # standard header. Notice: only use this tag if you know what you are doing!
1244 | 
1245 | LATEX_HEADER           =
1246 | 
1247 | # The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
1248 | # the generated latex document. The footer should contain everything after
1249 | # the last chapter. If it is left blank doxygen will generate a
1250 | # standard footer. Notice: only use this tag if you know what you are doing!
1251 | 
1252 | LATEX_FOOTER           =
1253 | 
1254 | # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
1255 | # is prepared for conversion to pdf (using ps2pdf). The pdf file will
1256 | # contain links (just like the HTML output) instead of page references
1257 | # This makes the output suitable for online browsing using a pdf viewer.
1258 | 
1259 | PDF_HYPERLINKS         = YES
1260 | 
1261 | # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
1262 | # plain latex in the generated Makefile. Set this option to YES to get a
1263 | # higher quality PDF documentation.
1264 | 
1265 | USE_PDFLATEX           = YES
1266 | 
1267 | # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
1268 | # command to the generated LaTeX files. This will instruct LaTeX to keep
1269 | # running if errors occur, instead of asking the user for help.
1270 | # This option is also used when generating formulas in HTML.
1271 | 
1272 | LATEX_BATCHMODE        = NO
1273 | 
1274 | # If LATEX_HIDE_INDICES is set to YES then doxygen will not
1275 | # include the index chapters (such as File Index, Compound Index, etc.)
1276 | # in the output.
1277 | 
1278 | LATEX_HIDE_INDICES     = NO
1279 | 
1280 | # If LATEX_SOURCE_CODE is set to YES then doxygen will include
1281 | # source code with syntax highlighting in the LaTeX output.
1282 | # Note that which sources are shown also depends on other settings
1283 | # such as SOURCE_BROWSER.
1284 | 
1285 | LATEX_SOURCE_CODE      = NO
1286 | 
1287 | # The LATEX_BIB_STYLE tag can be used to specify the style to use for the
1288 | # bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See
1289 | # http://en.wikipedia.org/wiki/BibTeX for more info.
1290 | 
1291 | LATEX_BIB_STYLE        = plain
1292 | 
1293 | #---------------------------------------------------------------------------
1294 | # configuration options related to the RTF output
1295 | #---------------------------------------------------------------------------
1296 | 
1297 | # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
1298 | # The RTF output is optimized for Word 97 and may not look very pretty with
1299 | # other RTF readers or editors.
1300 | 
1301 | GENERATE_RTF           = NO
1302 | 
1303 | # The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
1304 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1305 | # put in front of it. If left blank `rtf' will be used as the default path.
1306 | 
1307 | RTF_OUTPUT             = rtf
1308 | 
1309 | # If the COMPACT_RTF tag is set to YES Doxygen generates more compact
1310 | # RTF documents. This may be useful for small projects and may help to
1311 | # save some trees in general.
1312 | 
1313 | COMPACT_RTF            = NO
1314 | 
1315 | # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
1316 | # will contain hyperlink fields. The RTF file will
1317 | # contain links (just like the HTML output) instead of page references.
1318 | # This makes the output suitable for online browsing using WORD or other
1319 | # programs which support those fields.
1320 | # Note: wordpad (write) and others do not support links.
1321 | 
1322 | RTF_HYPERLINKS         = NO
1323 | 
1324 | # Load style sheet definitions from file. Syntax is similar to doxygen's
1325 | # config file, i.e. a series of assignments. You only have to provide
1326 | # replacements, missing definitions are set to their default value.
1327 | 
1328 | RTF_STYLESHEET_FILE    =
1329 | 
1330 | # Set optional variables used in the generation of an rtf document.
1331 | # Syntax is similar to doxygen's config file.
1332 | 
1333 | RTF_EXTENSIONS_FILE    =
1334 | 
1335 | #---------------------------------------------------------------------------
1336 | # configuration options related to the man page output
1337 | #---------------------------------------------------------------------------
1338 | 
1339 | # If the GENERATE_MAN tag is set to YES (the default) Doxygen will
1340 | # generate man pages
1341 | 
1342 | GENERATE_MAN           = NO
1343 | 
1344 | # The MAN_OUTPUT tag is used to specify where the man pages will be put.
1345 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1346 | # put in front of it. If left blank `man' will be used as the default path.
1347 | 
1348 | MAN_OUTPUT             = man
1349 | 
1350 | # The MAN_EXTENSION tag determines the extension that is added to
1351 | # the generated man pages (default is the subroutine's section .3)
1352 | 
1353 | MAN_EXTENSION          = .3
1354 | 
1355 | # If the MAN_LINKS tag is set to YES and Doxygen generates man output,
1356 | # then it will generate one additional man file for each entity
1357 | # documented in the real man page(s). These additional files
1358 | # only source the real man page, but without them the man command
1359 | # would be unable to find the correct page. The default is NO.
1360 | 
1361 | MAN_LINKS              = NO
1362 | 
1363 | #---------------------------------------------------------------------------
1364 | # configuration options related to the XML output
1365 | #---------------------------------------------------------------------------
1366 | 
1367 | # If the GENERATE_XML tag is set to YES Doxygen will
1368 | # generate an XML file that captures the structure of
1369 | # the code including all documentation.
1370 | 
1371 | GENERATE_XML           = NO
1372 | 
1373 | # The XML_OUTPUT tag is used to specify where the XML pages will be put.
1374 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1375 | # put in front of it. If left blank `xml' will be used as the default path.
1376 | 
1377 | XML_OUTPUT             = xml
1378 | 
1379 | # The XML_SCHEMA tag can be used to specify an XML schema,
1380 | # which can be used by a validating XML parser to check the
1381 | # syntax of the XML files.
1382 | 
1383 | XML_SCHEMA             =
1384 | 
1385 | # The XML_DTD tag can be used to specify an XML DTD,
1386 | # which can be used by a validating XML parser to check the
1387 | # syntax of the XML files.
1388 | 
1389 | XML_DTD                =
1390 | 
1391 | # If the XML_PROGRAMLISTING tag is set to YES Doxygen will
1392 | # dump the program listings (including syntax highlighting
1393 | # and cross-referencing information) to the XML output. Note that
1394 | # enabling this will significantly increase the size of the XML output.
1395 | 
1396 | XML_PROGRAMLISTING     = YES
1397 | 
1398 | #---------------------------------------------------------------------------
1399 | # configuration options for the AutoGen Definitions output
1400 | #---------------------------------------------------------------------------
1401 | 
1402 | # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
1403 | # generate an AutoGen Definitions (see autogen.sf.net) file
1404 | # that captures the structure of the code including all
1405 | # documentation. Note that this feature is still experimental
1406 | # and incomplete at the moment.
1407 | 
1408 | GENERATE_AUTOGEN_DEF   = NO
1409 | 
1410 | #---------------------------------------------------------------------------
1411 | # configuration options related to the Perl module output
1412 | #---------------------------------------------------------------------------
1413 | 
1414 | # If the GENERATE_PERLMOD tag is set to YES Doxygen will
1415 | # generate a Perl module file that captures the structure of
1416 | # the code including all documentation. Note that this
1417 | # feature is still experimental and incomplete at the
1418 | # moment.
1419 | 
1420 | GENERATE_PERLMOD       = NO
1421 | 
1422 | # If the PERLMOD_LATEX tag is set to YES Doxygen will generate
1423 | # the necessary Makefile rules, Perl scripts and LaTeX code to be able
1424 | # to generate PDF and DVI output from the Perl module output.
1425 | 
1426 | PERLMOD_LATEX          = NO
1427 | 
1428 | # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
1429 | # nicely formatted so it can be parsed by a human reader.
1430 | # This is useful
1431 | # if you want to understand what is going on.
1432 | # On the other hand, if this
1433 | # tag is set to NO the size of the Perl module output will be much smaller
1434 | # and Perl will parse it just the same.
1435 | 
1436 | PERLMOD_PRETTY         = YES
1437 | 
1438 | # The names of the make variables in the generated doxyrules.make file
1439 | # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
1440 | # This is useful so different doxyrules.make files included by the same
1441 | # Makefile don't overwrite each other's variables.
1442 | 
1443 | PERLMOD_MAKEVAR_PREFIX =
1444 | 
1445 | #---------------------------------------------------------------------------
1446 | # Configuration options related to the preprocessor
1447 | #---------------------------------------------------------------------------
1448 | 
1449 | # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
1450 | # evaluate all C-preprocessor directives found in the sources and include
1451 | # files.
1452 | 
1453 | ENABLE_PREPROCESSING   = YES
1454 | 
1455 | # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
1456 | # names in the source code. If set to NO (the default) only conditional
1457 | # compilation will be performed. Macro expansion can be done in a controlled
1458 | # way by setting EXPAND_ONLY_PREDEF to YES.
1459 | 
1460 | MACRO_EXPANSION        = NO
1461 | 
1462 | # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
1463 | # then the macro expansion is limited to the macros specified with the
1464 | # PREDEFINED and EXPAND_AS_DEFINED tags.
1465 | 
1466 | EXPAND_ONLY_PREDEF     = NO
1467 | 
1468 | # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
1469 | # pointed to by INCLUDE_PATH will be searched when a #include is found.
1470 | 
1471 | SEARCH_INCLUDES        = YES
1472 | 
1473 | # The INCLUDE_PATH tag can be used to specify one or more directories that
1474 | # contain include files that are not input files but should be processed by
1475 | # the preprocessor.
1476 | 
1477 | INCLUDE_PATH           =
1478 | 
1479 | # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
1480 | # patterns (like *.h and *.hpp) to filter out the header-files in the
1481 | # directories. If left blank, the patterns specified with FILE_PATTERNS will
1482 | # be used.
1483 | 
1484 | INCLUDE_FILE_PATTERNS  =
1485 | 
1486 | # The PREDEFINED tag can be used to specify one or more macro names that
1487 | # are defined before the preprocessor is started (similar to the -D option of
1488 | # gcc). The argument of the tag is a list of macros of the form: name
1489 | # or name=definition (no spaces). If the definition and the = are
1490 | # omitted =1 is assumed. To prevent a macro definition from being
1491 | # undefined via #undef or recursively expanded use the := operator
1492 | # instead of the = operator.
1493 | 
1494 | PREDEFINED             =
1495 | 
1496 | # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
1497 | # this tag can be used to specify a list of macro names that should be expanded.
1498 | # The macro definition that is found in the sources will be used.
1499 | # Use the PREDEFINED tag if you want to use a different macro definition that
1500 | # overrules the definition found in the source code.
1501 | 
1502 | EXPAND_AS_DEFINED      =
1503 | 
1504 | # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
1505 | # doxygen's preprocessor will remove all references to function-like macros
1506 | # that are alone on a line, have an all uppercase name, and do not end with a
1507 | # semicolon, because these will confuse the parser if not removed.
1508 | 
1509 | SKIP_FUNCTION_MACROS   = YES
1510 | 
1511 | #---------------------------------------------------------------------------
1512 | # Configuration::additions related to external references
1513 | #---------------------------------------------------------------------------
1514 | 
1515 | # The TAGFILES option can be used to specify one or more tagfiles.
1516 | # Optionally an initial location of the external documentation
1517 | # can be added for each tagfile. The format of a tag file without
1518 | # this location is as follows:
1519 | #
1520 | # TAGFILES = file1 file2 ...
1521 | # Adding location for the tag files is done as follows:
1522 | #
1523 | # TAGFILES = file1=loc1 "file2 = loc2" ...
1524 | # where "loc1" and "loc2" can be relative or absolute paths or
1525 | # URLs. If a location is present for each tag, the installdox tool
1526 | # does not have to be run to correct the links.
1527 | # Note that each tag file must have a unique name
1528 | # (where the name does NOT include the path)
1529 | # If a tag file is not located in the directory in which doxygen
1530 | # is run, you must also specify the path to the tagfile here.
1531 | 
1532 | TAGFILES               =
1533 | 
1534 | # When a file name is specified after GENERATE_TAGFILE, doxygen will create
1535 | # a tag file that is based on the input files it reads.
1536 | 
1537 | GENERATE_TAGFILE       =
1538 | 
1539 | # If the ALLEXTERNALS tag is set to YES all external classes will be listed
1540 | # in the class index. If set to NO only the inherited external classes
1541 | # will be listed.
1542 | 
1543 | ALLEXTERNALS           = NO
1544 | 
1545 | # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
1546 | # in the modules index. If set to NO, only the current project's groups will
1547 | # be listed.
1548 | 
1549 | EXTERNAL_GROUPS        = YES
1550 | 
1551 | # The PERL_PATH should be the absolute path and name of the perl script
1552 | # interpreter (i.e. the result of `which perl').
1553 | 
1554 | PERL_PATH              = /usr/bin/perl
1555 | 
1556 | #---------------------------------------------------------------------------
1557 | # Configuration options related to the dot tool
1558 | #---------------------------------------------------------------------------
1559 | 
1560 | # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
1561 | # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
1562 | # or super classes. Setting the tag to NO turns the diagrams off. Note that
1563 | # this option also works with HAVE_DOT disabled, but it is recommended to
1564 | # install and use dot, since it yields more powerful graphs.
1565 | 
1566 | CLASS_DIAGRAMS         = YES
1567 | 
1568 | # You can define message sequence charts within doxygen comments using the \msc
1569 | # command. Doxygen will then run the mscgen tool (see
1570 | # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
1571 | # documentation. The MSCGEN_PATH tag allows you to specify the directory where
1572 | # the mscgen tool resides. If left empty the tool is assumed to be found in the
1573 | # default search path.
1574 | 
1575 | MSCGEN_PATH            =
1576 | 
1577 | # If set to YES, the inheritance and collaboration graphs will hide
1578 | # inheritance and usage relations if the target is undocumented
1579 | # or is not a class.
1580 | 
1581 | HIDE_UNDOC_RELATIONS   = YES
1582 | 
1583 | # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
1584 | # available from the path. This tool is part of Graphviz, a graph visualization
1585 | # toolkit from AT&T and Lucent Bell Labs. The other options in this section
1586 | # have no effect if this option is set to NO (the default)
1587 | 
1588 | HAVE_DOT               = YES
1589 | 
1590 | # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
1591 | # allowed to run in parallel. When set to 0 (the default) doxygen will
1592 | # base this on the number of processors available in the system. You can set it
1593 | # explicitly to a value larger than 0 to get control over the balance
1594 | # between CPU load and processing speed.
1595 | 
1596 | DOT_NUM_THREADS        = 0
1597 | 
1598 | # By default doxygen will use the Helvetica font for all dot files that
1599 | # doxygen generates. When you want a differently looking font you can specify
1600 | # the font name using DOT_FONTNAME. You need to make sure dot is able to find
1601 | # the font, which can be done by putting it in a standard location or by setting
1602 | # the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
1603 | # directory containing the font.
1604 | 
1605 | DOT_FONTNAME           = Helvetica
1606 | 
1607 | # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
1608 | # The default size is 10pt.
1609 | 
1610 | DOT_FONTSIZE           = 10
1611 | 
1612 | # By default doxygen will tell dot to use the Helvetica font.
1613 | # If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to
1614 | # set the path where dot can find it.
1615 | 
1616 | DOT_FONTPATH           =
1617 | 
1618 | # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
1619 | # will generate a graph for each documented class showing the direct and
1620 | # indirect inheritance relations. Setting this tag to YES will force the
1621 | # CLASS_DIAGRAMS tag to NO.
1622 | 
1623 | CLASS_GRAPH            = YES
1624 | 
1625 | # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
1626 | # will generate a graph for each documented class showing the direct and
1627 | # indirect implementation dependencies (inheritance, containment, and
1628 | # class references variables) of the class with other documented classes.
1629 | 
1630 | COLLABORATION_GRAPH    = YES
1631 | 
1632 | # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
1633 | # will generate a graph for groups, showing the direct groups dependencies
1634 | 
1635 | GROUP_GRAPHS           = YES
1636 | 
1637 | # If the UML_LOOK tag is set to YES doxygen will generate inheritance and
1638 | # collaboration diagrams in a style similar to the OMG's Unified Modeling
1639 | # Language.
1640 | 
1641 | UML_LOOK               = NO
1642 | 
1643 | # If set to YES, the inheritance and collaboration graphs will show the
1644 | # relations between templates and their instances.
1645 | 
1646 | TEMPLATE_RELATIONS     = NO
1647 | 
1648 | # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
1649 | # tags are set to YES then doxygen will generate a graph for each documented
1650 | # file showing the direct and indirect include dependencies of the file with
1651 | # other documented files.
1652 | 
1653 | INCLUDE_GRAPH          = YES
1654 | 
1655 | # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
1656 | # HAVE_DOT tags are set to YES then doxygen will generate a graph for each
1657 | # documented header file showing the documented files that directly or
1658 | # indirectly include this file.
1659 | 
1660 | INCLUDED_BY_GRAPH      = YES
1661 | 
1662 | # If the CALL_GRAPH and HAVE_DOT options are set to YES then
1663 | # doxygen will generate a call dependency graph for every global function
1664 | # or class method. Note that enabling this option will significantly increase
1665 | # the time of a run. So in most cases it will be better to enable call graphs
1666 | # for selected functions only using the \callgraph command.
1667 | 
1668 | CALL_GRAPH             = NO
1669 | 
1670 | # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
1671 | # doxygen will generate a caller dependency graph for every global function
1672 | # or class method. Note that enabling this option will significantly increase
1673 | # the time of a run. So in most cases it will be better to enable caller
1674 | # graphs for selected functions only using the \callergraph command.
1675 | 
1676 | CALLER_GRAPH           = NO
1677 | 
1678 | # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
1679 | # will generate a graphical hierarchy of all classes instead of a textual one.
1680 | 
1681 | GRAPHICAL_HIERARCHY    = YES
1682 | 
1683 | # If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
1684 | # then doxygen will show the dependencies a directory has on other directories
1685 | # in a graphical way. The dependency relations are determined by the #include
1686 | # relations between the files in the directories.
1687 | 
1688 | DIRECTORY_GRAPH        = YES
1689 | 
1690 | # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
1691 | # generated by dot. Possible values are svg, png, jpg, or gif.
1692 | # If left blank png will be used. If you choose svg you need to set
1693 | # HTML_FILE_EXTENSION to xhtml in order to make the SVG files
1694 | # visible in IE 9+ (other browsers do not have this requirement).
1695 | 
1696 | DOT_IMAGE_FORMAT       = png
1697 | 
1698 | # If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
1699 | # enable generation of interactive SVG images that allow zooming and panning.
1700 | # Note that this requires a modern browser other than Internet Explorer.
1701 | # Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you
1702 | # need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files
1703 | # visible. Older versions of IE do not have SVG support.
1704 | 
1705 | INTERACTIVE_SVG        = NO
1706 | 
1707 | # The tag DOT_PATH can be used to specify the path where the dot tool can be
1708 | # found. If left blank, it is assumed the dot tool can be found in the path.
1709 | 
1710 | DOT_PATH               =
1711 | 
1712 | # The DOTFILE_DIRS tag can be used to specify one or more directories that
1713 | # contain dot files that are included in the documentation (see the
1714 | # \dotfile command).
1715 | 
1716 | DOTFILE_DIRS           =
1717 | 
1718 | # The MSCFILE_DIRS tag can be used to specify one or more directories that
1719 | # contain msc files that are included in the documentation (see the
1720 | # \mscfile command).
1721 | 
1722 | MSCFILE_DIRS           =
1723 | 
1724 | # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
1725 | # nodes that will be shown in the graph. If the number of nodes in a graph
1726 | # becomes larger than this value, doxygen will truncate the graph, which is
1727 | # visualized by representing a node as a red box. Note that doxygen if the
1728 | # number of direct children of the root node in a graph is already larger than
1729 | # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
1730 | # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
1731 | 
1732 | DOT_GRAPH_MAX_NODES    = 50
1733 | 
1734 | # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
1735 | # graphs generated by dot. A depth value of 3 means that only nodes reachable
1736 | # from the root by following a path via at most 3 edges will be shown. Nodes
1737 | # that lay further from the root node will be omitted. Note that setting this
1738 | # option to 1 or 2 may greatly reduce the computation time needed for large
1739 | # code bases. Also note that the size of a graph can be further restricted by
1740 | # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
1741 | 
1742 | MAX_DOT_GRAPH_DEPTH    = 0
1743 | 
1744 | # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
1745 | # background. This is disabled by default, because dot on Windows does not
1746 | # seem to support this out of the box. Warning: Depending on the platform used,
1747 | # enabling this option may lead to badly anti-aliased labels on the edges of
1748 | # a graph (i.e. they become hard to read).
1749 | 
1750 | DOT_TRANSPARENT        = NO
1751 | 
1752 | # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
1753 | # files in one run (i.e. multiple -o and -T options on the command line). This
1754 | # makes dot run faster, but since only newer versions of dot (>1.8.10)
1755 | # support this, this feature is disabled by default.
1756 | 
1757 | DOT_MULTI_TARGETS      = NO
1758 | 
1759 | # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
1760 | # generate a legend page explaining the meaning of the various boxes and
1761 | # arrows in the dot generated graphs.
1762 | 
1763 | GENERATE_LEGEND        = YES
1764 | 
1765 | # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
1766 | # remove the intermediate dot files that are used to generate
1767 | # the various graphs.
1768 | 
1769 | DOT_CLEANUP            = YES
1770 | 


--------------------------------------------------------------------------------
/docs/how_to_install.dox:
--------------------------------------------------------------------------------
  1 | /** \page HowToInstall How to install
  2 | 
  3 | \tableofcontents
  4 | 	
  5 | \section SOFT Install third-part software
  6 | 
  7 | To use our code you should install some third-part software
  8 | 
  9 | - CMake <a href=http://www.cmake.org/cmake/resources/software.html>www.cmake.org/cmake/resources/software.html</a>
 10 | - Boost <a href=http://www.boost.org>www.boost.org</a>
 11 | - IntelMKL <a href=http://software.intel.com/en-us/articles/intel-mkl/>software.intel.com/en-us/articles/intel-mkl/</a> - implementation of BLAS
 12 | 
 13 | \section CMAKE Correct CMakeLists.txt file
 14 | 
 15 | You should insert path to Boost and BLAS (MKL) sources and libraries in file CMakeLists.txt.
 16 | 
 17 | \code
 18 | ##########################################################################################
 19 | # CMake build script for MultiIndex
 20 | #
 21 | ##########################################################################################
 22 | 
 23 | #let all libraries be static, not shared
 24 | OPTION(BUILD_SHARED_LIBS "Build shared libraries (DLLs)." OFF)
 25 | 
 26 | ##########################################################################################
 27 | # lets start describing our project.
 28 | project (NearestSearch CXX C)
 29 | cmake_minimum_required(VERSION 2.6)
 30 | 
 31 | IF (UNIX)
 32 | 	SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
 33 | 	SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2")
 34 | ENDIF (UNIX)
 35 | 
 36 | IF (NOT DEFINED CMAKE_CURRENT_LIST_DIR)
 37 |     SET(CMAKE_CURRENT_LIST_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 38 | ENDIF (NOT DEFINED CMAKE_CURRENT_LIST_DIR)
 39 | 
 40 | MESSAGE("current dir: ${CMAKE_CURRENT_LIST_DIR}")
 41 | SET (Source_Path ${CMAKE_CURRENT_LIST_DIR})
 42 | 
 43 | MESSAGE ("PROJECT_BINARY_DIR " ${PROJECT_BINARY_DIR})
 44 | 
 45 | # ADD BOOSTDIR
 46 | SET(BOOST_DIR <INSERT HERE THE PATH TO BOOST SOURCES>)
 47 | SET(BOOST_LIB <INSERT HERE THE PATH TO BOOST LIBS>)
 48 | 
 49 | # ADD BLAS
 50 | SET(BLAS_DIR <INSERT HERE THE PATH TO MKL SOURCES>)
 51 | SET(BLAS_LIB <INSERT HERE THE PATH TO MKL LIBS>)
 52 | 
 53 | SET(REQUIRED_BLAS_LIBS mkl_intel_lp64.lib mkl_intel_thread.lib mkl_core.lib libiomp5md.lib)
 54 | 
 55 | ##########################################################################################
 56 | INCLUDE (${Source_Path}/NearestSearch.cmake NO_POLICY_SCOPE)
 57 | INCLUDE_DIRECTORIES(${BOOST_DIR})
 58 | INCLUDE_DIRECTORIES(${BLAS_DIR})
 59 | LINK_DIRECTORIES(${BOOST_LIB})
 60 | LINK_DIRECTORIES(${BLAS_LIB})
 61 | INCLUDE_DIRECTORIES(${Source_Path})
 62 | ADD_LIBRARY (nearest_search_lib STATIC ${NEAREST_ALL_CC})
 63 | 
 64 | ##########################################################################################
 65 | INCLUDE (${Source_Path}/IndexerLauncher.cmake NO_POLICY_SCOPE)
 66 | INCLUDE_DIRECTORIES(${BOOST_DIR})
 67 | INCLUDE_DIRECTORIES(${BLAS_DIR})
 68 | LINK_DIRECTORIES(${BOOST_LIB})
 69 | LINK_DIRECTORIES(${BLAS_LIB})
 70 | INCLUDE_DIRECTORIES(${Source_Path})
 71 | ADD_EXECUTABLE (indexer_launcher ${IndexerLauncher})
 72 | TARGET_LINK_LIBRARIES (indexer_launcher nearest_search_lib)
 73 | TARGET_LINK_LIBRARIES (indexer_launcher  ${REQUIRED_BLAS_LIBS})
 74 |   
 75 | ##########################################################################################
 76 | INCLUDE (${Source_Path}/SearcherTester.cmake NO_POLICY_SCOPE)
 77 | INCLUDE_DIRECTORIES(${BOOST_DIR})
 78 | INCLUDE_DIRECTORIES(${BLAS_DIR})
 79 | LINK_DIRECTORIES(${BOOST_LIB})
 80 | LINK_DIRECTORIES(${BLAS_LIB})
 81 | INCLUDE_DIRECTORIES(${Source_Path})
 82 | ADD_EXECUTABLE (searcher_tester ${SearchTester})
 83 | TARGET_LINK_LIBRARIES (searcher_tester nearest_search_lib)
 84 | TARGET_LINK_LIBRARIES (searcher_tester ${REQUIRED_BLAS_LIBS})
 85 | \endcode
 86 | 
 87 | The list of libraries to link (${REQUIRED_BLAS_LIBS}) may differ from the above.
 88 | You can get the list for your architecture <a href=http://software.intel.com/en-us/articles/intel-mkl-link-line-advisor/>here</a>. 
 89 | 
 90 | \section BUILD Build project
 91 | 
 92 | - create folder "build" in directory with source files
 93 | - (for Windows users) run "make_project.bat"
 94 | - (for Unix users) do next steps
 95 | \code
 96 | cd build
 97 | cmake ..
 98 | \endcode 
 99 | 
100 | These operations create Solution for VisualStudio (Windows) or makefile (Unix). Then you can build the project.
101 | 
102 | **/


--------------------------------------------------------------------------------
/docs/index.dox:
--------------------------------------------------------------------------------
  1 | /** \page Indexation How to create a multi-index
  2 | 
  3 | \tableofcontents
  4 | 
  5 | \section ALGO Algorithm
  6 | 
  7 | The process of the multi-index construction is described in our <a href=http://download.yandex.ru/company/cvpr2012.pdf>paper</a>. Here we provide the details of the implementation.\n
  8 | \n
  9 | After the vocabularies are trained (see below), the index construction progress in two stages: assigning points to multi-index entries ("coarse quantization") and calculating information for reranking. Because one can use different reranking approaches for the same coarse quantization, the first stage of the algorithm saves the coarse quantizations for all points in the database to hard drive. These coarse quantizations are just the entry identifiers (e.g. codeword pairs). So if the file with coarse quantizations has already been produced there is no need to calculate them again (in this case, remove the flag --build_coarse from the command line parameters).\n
 10 | \n
 11 | In the CPU, a multi-index consists of a long onedimensional array containing the compressed points aligned by entries (i.e. a group of points belonging to the same entry is stored contiguously) and a table containing the starting index in the array for every entry of the multi-index.
 12 | The class MultiIndexer is thus a C++ template by the type of the record in this array. In this way, you can easy implement your own reranking approach by defining new structure NewRecordType and implementing function GetRecord<NewRecordType> for your structure.\n
 13 | \n
 14 | For index contstruction you should provide coarse vocabularies for building the multi-index structure and fine vocabularies for calculating the reranking information (assuming that you are using the provided reranking procedure). We assume that these files are prepared outside this code (C++ is not the simplest way to create vocabularies, just for your reference we provide a MATLAB-script to create them below).\n
 15 | 
 16 | \section FORMATS File formats
 17 | 
 18 | Our code uses the <a href=http://corpus-texmex.irisa.fr>.bvecs</a> and  <a href=http://corpus-texmex.irisa.fr>.fvecs</a> file formats developed by INRIA LEAR and TEXMEX groups.
 19 | 
 20 | - <b>Coarse vocabularies</b>\n
 21 | Our code assumes that coarse vocabularies are in the following format:\n
 22 | 4 bytes(one int32) - number of items in each vocabulary (N)\n
 23 | 4 bytes(one int32) - dimension of item (d)\n
 24 | 4*N*d*M bytes(N*d*M floats) - vocabulary items one after another (M is the multiplicity of algorithm)\n
 25 | \n
 26 | Matlab script to build coarse vocabularies
 27 | \code
 28 | clear all;
 29 | 
 30 | all_data = bvecs_read('sift1M.bvecs');
 31 | 
 32 | all_data = single(all_data);
 33 | vocabSize = 4096;
 34 | % add implementation of K-means
 35 | vocab1 = your_kmeans(single(all_data(1:end/2,:)),vocabSize);
 36 | vocab2 = your_kmeans(single(all_data(end/2+1:end,:)),vocabSize);
 37 | 
 38 | file = fopen(['sift1M_double_4096_' num2str(vocabSize) '.dat'], 'w');
 39 | dim = size(vocab1, 1);
 40 | sz = size(vocab1, 2);
 41 | fwrite(file, dim, 'int32');
 42 | fwrite(file, sz, 'int32');
 43 | fwrite(file, vocab1, 'float');
 44 | fwrite(file, vocab2, 'float');
 45 | fclose(file);
 46 | save(['sift1M_double_4096_' num2str(vocabSize) '.mat'], 'vocab1', 'vocab2');
 47 | \endcode
 48 | - <b>Fine vocabularies</b>\n
 49 | Our code assumes that fine vocabularies are in the following format:\n
 50 | 4 bytes(one int32) - number of vocabularies (m)\n
 51 | 4 bytes(one int32) - number of items in each vocabulary (N)\n
 52 | 4 bytes(one int32) - dimension of item (d)\n
 53 | 4*N*d*m bytes(N*d*m floats) - vocabulary items one after another\n
 54 | 
 55 | Matlab script to build fine vocabularies (used <a href=http://www.vlfeat.org/>VlFeat</a> library)
 56 | \code
 57 | clear all;
 58 | all_data = fvecs_read('sift1M.fvecs');
 59 | 
 60 | vocabSize = 4096;
 61 | load(['sift1M_double_' num2str(vocabSize) '.mat'], 'vocab1', 'vocab2');
 62 | 
 63 | vocab1 = int32(vocab1);
 64 | vocab2 = int32(vocab2);
 65 | i1 = vl_ikmeanspush(all_data(1:end/2,:), vocab1);
 66 | i2 = vl_ikmeanspush(all_data(end/2+1:end,:), vocab2);  
 67 | residual = single(all_data)- single([vocab1(:,i1); vocab2(:,i2)]);
 68 | bytes_per_point = 8;    
 69 | 
 70 | D = size(residual,1) / bytes_per_point;
 71 | residual_vocab = cell(bytes_per_point,1);
 72 | dist = cell(bytes_per_point,1);
 73 | for m = 1:bytes_per_point
 74 |     chunk = residual(D*m-D+1:D*m,:);
 75 | 	% add implementation of K-means
 76 |     residual_vocab{m} = your_kmeans(chunk,256);
 77 |     dist{m} = vl_alldist2(residual_vocab{m});          
 78 | end
 79 | 
 80 | save(['sift1M_double_4096_8.mat'],'residual_vocab','dist');
 81 | 
 82 | file = fopen(['sift1M_double_4096_8.dat'], 'w');
 83 | vocabs_count = size(residual_vocab, 1);
 84 | each_vocab_count = size(residual_vocab{1}, 2);
 85 | each_vocab_dim = size(residual_vocab{1}, 1);
 86 | fwrite(file, vocabs_count, 'int32');
 87 | fwrite(file, each_vocab_count, 'int32');
 88 | fwrite(file, each_vocab_dim, 'int32');
 89 | for i = 1:vocabs_count
 90 |     for j = 1:each_vocab_count
 91 |         a = residual_vocab{i}(:,j);
 92 |         fwrite(file, a, 'float');
 93 |     end
 94 | end
 95 | fclose(file);
 96 | 
 97 | \endcode
 98 | 
 99 | \section EXAMPLE Indexing sample
100 | 
101 | To build an invertered index for a set of points you should run "indexer_launcher" application with some command line parameters.
102 | 
103 | \code
104 | --threads_count            - the number of threads to use for the multi-threaded index construction
105 | --multiplicity             - the number of groups of dimensions the vectors will be split into. Equals 2 or 4 for the experiments in the paper.
106 | --points_file              - the path to the file with the vector database (should be in .bvecs or .fvecs format)
107 | --coarse_vocabs_file       - the path to the file with the coarse vocabularies (see the format description above)
108 | --fine_vocabs_file         - the path to the file with fine vocabularies for reranking (see the format description above)
109 | --input_point_type         - "BVEC" or "FVEC"
110 | --points_count             - the number of points to index
111 | --space_dim                - the space dimensionality (e.g. 128 for SIFTs)
112 | --files_prefix             - the common prefix for storing the multi-index files (used to control runs with different parameters)
113 | --coarse_quantization_file - the path to the file with coarse quantizations
114 | --metainfo_file            - the path to the file with metainformation (deprecated, just write "fake.txt")
115 | --use_residuals            - the reranking method flag. Specify it if you want to use residuals for reranking (Multi-D-ADC) and omit it if you want to use initial points (Multi-ADC)
116 | --build_coarse             - specify this flag if you want to recompute coarse quantizations (otherwise, will use the previously computed, if available)
117 | \endcode
118 | 
119 | Windows users can try launch_indexer.bat script. It launches indexing of the <a href=http://corpus-texmex.irisa.fr/>ANN_SIFT1M dataset</a> using the provided vocabularies.
120 | Unix users should just write a similar launch_indexer.sh script.
121 | **/


--------------------------------------------------------------------------------
/docs/main_page.dox:
--------------------------------------------------------------------------------
 1 | /** @mainpage MultiIndex
 2 | 
 3 | This is a brief documentation for the source code of the <a href=http://download.yandex.ru/company/cvpr2012.pdf>inverted multiindex algorithm</a>
 4 | for fast and memory-efficient indexing and approximate nearest-neighbor search in high-dimensional spaces. <b>One can download source-code from <a href=https://github.com/arbabenko/MultiIndex>here</a>.</b>
 5 | 
 6 |   - \ref HowToInstall
 7 |   
 8 |   - \ref Indexation
 9 |   
10 |   - \ref Search
11 | 
12 |   \image html mult.jpg
13 | 
14 | Copyright @ <a href=http://www.yandex.ru>Yandex</a> 2012. \n
15 | Author: Artem Babenko \n
16 | Contact: arbabenko@yandex-team.ru \n
17 | Link to sources: <a href=https://github.com/arbabenko/MultiIndex>https://github.com/arbabenko/MultiIndex</a> \n
18 | \n
19 | THIS SOFTWARE IS LICENSED UNDER THE BSD LICENSE. YOU CAN USE, MODIFY AND/ OR REDISTRIBUTE THE SOFTWARE UNDER THE
20 | TERMS OF THE BSD LICENSE. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR OR YANDEX BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |  
22 | **/
23 | 


--------------------------------------------------------------------------------
/docs/pictures/mult.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arbabenko/MultiIndex/bb0578821093f19d9c44a3ed7f50b8930e1d0199/docs/pictures/mult.jpg


--------------------------------------------------------------------------------
/docs/search.dox:
--------------------------------------------------------------------------------
 1 | /** \page Search How to search in a multi-index
 2 | 
 3 | \tableofcontents
 4 | 
 5 | \section ALGO The search algorithm
 6 | 
 7 | Given a query point, the search algorithm traverses the inverted multi-index entries in the order of increasing distance from the entry centroid to the query as descibed in the <a href=http://download.yandex.ru/company/cvpr2012.pdf>paper</a>. \n
 8 | It accumulates the points from the traversed entries and stops when their count reaches the value requested by the user.\n
 9 | If rerank mode is on (flag --do_rerank is set) the code also estimates the distance to query for every traversed point using the extra information stored for reranking. In this case, the traversed points are sorted by the increasing distance estimate.\n
10 | 
11 | As for the index construction, you should provide the coarse vocabularies and the fine vocabularies (these should be the same vocabularies as used for the index construction). To measure the accuracy of the system, you should provide a file with a list of query points and the ground truth nearest neighbors.\n
12 | 
13 | \section FORMATS File formats
14 | 
15 | - <b>Queries</b> \n
16 | Our code assumes that queries are in the <a href=http://corpus-texmex.irisa.fr>.bvecs</a> or the <a href=http://corpus-texmex.irisa.fr>.bvecs</a> format.\n
17 | - <b>Groundtruth</b> \n
18 | Our code assumes that the ground truth nearest neighbors are in this <a href=http://corpus-texmex.irisa.fr>.ivecs</a> format.\n
19 | - <b>Coarse vocabularies</b>\n
20 | Our code assumes that coarse vocabularies are in the following format:\n
21 | 4 bytes(one int32) - number of items in each vocabulary (N)\n
22 | 4 bytes(one int32) - dimension of item (d)\n
23 | 4*N*d*M bytes(N*d*M floats) - vocabulary items one after another (M is the multiplicity of algorithm)\n
24 | - <b>Fine vocabularies</b>\n
25 | Our code assumes that fine vocabularies are in the following format:\n
26 | 4 bytes(one int32) - number of vocabularies (m)\n
27 | 4 bytes(one int32) - number of items in each vocabulary (N)\n
28 | 4 bytes(one int32) - dimension of item (d)\n
29 | 4*N*d*m bytes(N*d*m floats) - vocabulary items (each centroid is stored contiguously\n
30 | 
31 | \section EXAMPLE Search sample
32 | 
33 | To launch the search for all queries and to estimate the accuracy of the search algorithm run the "searcher_tester" application. The following command-line options control the execution:
34 | \code
35 | --coarse_vocabs_file        - the path to the file with coarse vocabs (see the format description above)
36 | --fine_vocabs_file          - the path to the file with fine vocabs for reranking(see the format description above)
37 | --query_point_type          - "BVEC" or "FVEC"
38 | --use_residuals             - the reranking method flag. Specify it if you are using the residuals for reranking (Multi-D-ADC) and omit it if you are using the initial vector (Multi-ADC)
39 | --space_dim                 - space dimensionality (e.g. 128 for SIFTs)
40 | --subspaces_centroids_count - the number of nearest vocabulary items to consider (L in the paper)
41 | --index_files_prefix        - the common prefix of the multi-index files (to control runs with different parameters)
42 | --queries_file              - the path to the file with queries (should be in .bvecs or .fvecs format)
43 | --groundtruth_file          - the path to the file with groundtruth (should be in .ivecs format)
44 | --queries_count             - the number of queries to search
45 | --neighbours_count          - the number of neighbours involved in reranking
46 | --report_file               - the path to the file to store the search quality report
47 | --do_rerank                 - this flag indicates whether the search algorithm should rerank points based on the estimated distance to the query
48 | \endcode
49 | 
50 | Windows users can try test_searcher.bat script. It launches search in the index builded by launch_indexer.bat script.
51 | Unix users should just write a similar test_searcher.sh script.
52 | 
53 | **/


--------------------------------------------------------------------------------
/indexer.h:
--------------------------------------------------------------------------------
  1 | /** @file */
  2 | 
  3 | // Copyright 2012 Yandex Artem Babenko
  4 | #ifndef INDEXER_H_
  5 | #define INDEXER_H_
  6 | 
  7 | #include <ctime>
  8 | #include <map>
  9 | 
 10 | #include <boost/archive/binary_iarchive.hpp>
 11 | #include <boost/archive/binary_oarchive.hpp>
 12 | 
 13 | #include <boost/algorithm/string/split.hpp>
 14 | #include <boost/algorithm/string.hpp>
 15 | 
 16 | #include <boost/lexical_cast.hpp>
 17 | 
 18 | #include <boost/serialization/serialization.hpp>
 19 | #include <boost/serialization/set.hpp>
 20 | #include <boost/serialization/vector.hpp>
 21 | 
 22 | #include "data_util.h"
 23 | #include "multitable.hpp"
 24 | 
 25 | 
 26 | using std::ifstream;
 27 | using std::map;
 28 | using std::multimap;
 29 | using std::ofstream;
 30 | using std::string;
 31 | 
 32 | using boost::lexical_cast;
 33 | using boost::split;
 34 | 
 35 | extern int THREADS_COUNT;
 36 | 
 37 | extern Dimensions SPACE_DIMENSION;
 38 | 
 39 | extern enum PointType point_type;
 40 | 
 41 | IndexConfig gConfig;
 42 | 
 43 | /**
 44 |  * This is the main class for creating multiindex for a set of points
 45 |  * in a multidimensional space. Clusterization and vocabs learning happen
 46 |  * outside of this class, multiindexer receives prepared vocabs in input
 47 |  */
 48 | template<class Record>
 49 | class MultiIndexer {
 50 |  public:
 51 |  /**
 52 |   * This is the simple MultiIndexer constructor
 53 |   * @param multiplicity how many parts input points will be divide on
 54 |   */
 55 |   MultiIndexer(const int multiplicity = 2);
 56 |  /**
 57 |   * This is the main function of MultiIndexer
 58 |   * @param points_filename file with points in .fvecs or .bvecs format
 59 |   * @param points_count how many points should we index
 60 |   * @param coarse_vocabs vocabularies for coarse quantization
 61 |   * @param fine_vocabs vocabularies for fine quantization for reranking
 62 |   * @param mode determines the way of rerank info calculating
 63 |   * @param build_coarse_quantization should we get coarse quantization or not
 64 |   * @param files_prefix all index filenames will have this prefix
 65 |   * @param coarse_quantization_filename file with coarse quantization (if exists)
 66 |   */
 67 |   void BuildMultiIndex(const string& points_filename,
 68 |                        const string& metainfo_filename,
 69 |                        const int points_count,
 70 |                        const vector<Centroids>& coarse_vocabs,
 71 |                        const vector<Centroids>& fine_vocabs,
 72 |                        const RerankMode& mode,
 73 |                        const bool build_coarse_quantization,
 74 |                        const string& files_prefix,
 75 |                        const string& coarse_quantization_filename = "");
 76 |  private:
 77 |  /**
 78 |   * This function prepares for each point its coarse quantization
 79 |   * @param points_filename file with points in .fvecs or .bvecs format
 80 |   * @param points_count how many points should we handle
 81 |   * @param coarse_vocabs vocabularies for coarse quantization
 82 |   */
 83 |   void PrepareCoarseQuantization(const string& points_filename,
 84 |                                  const int points_count,
 85 |                                  const vector<Centroids>& coarse_vocabs);
 86 |  /**
 87 |   * This function prepares for each point in subset its coarse quantization
 88 |   * @param points_filename file with points in .fvecs or .bvecs format
 89 |   * @param start_pid identifier of the first point in subset
 90 |   * @param subset_size points count in subset
 91 |   * @param coarse_vocabs vocabularies for coarse quantization
 92 |   * @param transposed_coarse_quantizations result
 93 |   */
 94 |   void GetCoarseQuantizationsForSubset(const string& points_filename,
 95 |                                        const int start_pid,
 96 |                                        const int subset_size,
 97 |                                        const vector<Centroids>& coarse_vocabs,
 98 |                                        vector<vector<ClusterId> >*
 99 |                                        transposed_coarse_quantizations);
100 |  /**
101 |   * This function serializes prepared coarse quantizations to file
102 |   * @param transposed_coarse_quantizations quantizations to serialize.
103 |   * They are transposed because of effective memory usage
104 |   * @param filename file we should serialize to
105 |   */
106 |   void SerializeCoarseQuantizations(const vector<vector<ClusterId> >&
107 |                                     transposed_coarse_quantizations,
108 |                                     const string& filename);
109 |  /**
110 |   * This function saves index to files.
111 |   * All filenames start form the common files prefix
112 |   */
113 |   void SerializeMultiIndexFiles();
114 |  /**
115 |   * This function converts counts of points in cells to cell edges
116 |   */
117 |   void ConvertPointsInCellsCountToCellEdges();
118 | 
119 |  /**
120 |   * This function fills multiindex data structures.
121 |   * @param points_filename file with points in .fvecs or .bvecs format
122 |   * @param points_count how many points should we index
123 |   * @param coarse_vocabs vocabularies for coarse quantization
124 |   * @param fine_vocabs vocabularies for fine quantization for reranking
125 |   * @param mode determines the way of rerank info calculating
126 |   */
127 |   void FillMultiIndex(const string& points_filename,
128 |                       const int points_count,
129 |                       const vector<Centroids>& coarse_vocabs,
130 |                       const vector<Centroids>& fine_vocabs,
131 |                       const RerankMode& mode);
132 |  /**
133 |   * This function fills multiindex data structures.
134 |   * @param points_filename file with points in .fvecs or .bvecs format
135 |   * @param start_pid identifier of the first point in subset
136 |   * @param subset_size points count in subset
137 |   * @param coarse_vocabs vocabularies for coarse quantization
138 |   * @param fine_vocabs vocabularies for fine quantization for reranking
139 |   * @param mode determines the way of rerank info calculating
140 |   * @param points_written_in_index auxillary structure for correct index filling
141 |   */
142 |   void FillMultiIndexForSubset(const string& points_filename,
143 |                                const PointId start_pid,
144 |                                const int points_count,
145 |                                const vector<Centroids>& coarse_vocabs,
146 |                                const vector<Centroids>& fine_vocabs,
147 |                                const RerankMode& mode,
148 |                                Multitable<int>* points_written_in_index);
149 | 
150 |  /**
151 |   * This function reads point coarse quantization from file
152 |   * @param pid identifier of target point
153 |   * @param filename file with coarse quantizations
154 |   * @param coarse_quantization result
155 |   */
156 |   void GetPointCoarseQuantization(const PointId pid,
157 |                                   const string& filename,
158 |                                   vector<ClusterId>* coarse_quantization);
159 |  /**
160 |   * This function calculates rerank info for point
161 |   * @param point target point
162 |   * @param pid identifier of target point
163 |   * @param fine_vocabs vocabularies for rerank info calculation
164 |   */
165 |   void FillPointRerankInfo(const Point& point,
166 |                            const PointId pid,
167 |                            const vector<Centroids>& fine_vocabs);
168 |  /**
169 |   * This function restores counts of points from coarse quantizations
170 |   * @param points_filename file with points in .fvecs or .bvecs format
171 |   * @param points_count how many points should we index
172 |   * @param coarse_vocabs vocabularies for coarse quantization
173 |   * We need them to init counts table correctly
174 |   */
175 |   void RestorePointsInCellsCountFromCourseQuantization(const string& points_filename,
176 |                                                        const int points_count,
177 |                                                        const vector<Centroids>& coarse_vocabs);
178 |  /**
179 |   * This simple function returns size of one coordinate of input point
180 |   */
181 |   int GetInputCoordSizeof();
182 |  /**
183 |   * This simple function reads one point from input stream
184 |   * @param input input stream
185 |   * @param point result point
186 |   */
187 |   void ReadPoint(ifstream& input, Point* point);
188 |  /**
189 |   * Initialize all structures for BLAS operations
190 |   * @param coarse_vocabs coarse vocabularies
191 |   */
192 |   void InitBlasStructures(const vector<Centroids>& coarse_vocabs);	
193 |  /**
194 |   *  All index filenames will start from this prefix
195 |   */
196 |   string files_prefix_;
197 |  /**
198 |   *  Filename of file with coarse quantizations
199 |   */
200 |   string coarse_quantization_filename_;
201 |  /**
202 |   *  Multiplicity (how many parts point space is divided on)
203 |   */
204 |   int multiplicity_;
205 |  /**
206 |   *  Table with number of points in each cell
207 |   */
208 |   Multitable<int> point_in_cells_count_;
209 |  /**
210 |   *  Multiindex
211 |   */
212 |   MultiIndex<Record> multiindex_;
213 |  /**
214 |   *  Mutex for critical section in filling index stage
215 |   */
216 |   boost::mutex cell_counts_mutex_;
217 |  /**
218 |   * Struct for BLAS
219 |   */
220 |   vector<float*> coarse_vocabs_matrices_;
221 |  /**
222 |   * Struct for BLAS
223 |   */
224 |   vector<vector<float> > coarse_centroids_norms_;
225 | };
226 | 
227 | template<class Record>
228 | inline void GetRecord(const Point& point, const PointId pid,
229 |                       const vector<ClusterId> coarse_quantization,
230 |                       const vector<Centroids>& coarse_vocabs,
231 |                       Record* result) {
232 | }
233 | 
234 | template<class Record>
235 | void InitParameters(const vector<Centroids>& fine_vocabs,
236 |                     const RerankMode& mode,
237 |                     const string& metainfo_filename) {
238 |   gConfig.fine_vocabs = fine_vocabs;
239 |   gConfig.rerank_mode = mode;
240 | }
241 | 
242 | 
243 | //////////////////// IMPLEMENTATION //////////////////////
244 | template<class Record>
245 | MultiIndexer<Record>::MultiIndexer(const int multiplicity) {
246 |   if(multiplicity < 0) {
247 |     throw std::logic_error("Multiplicity < 0");
248 |   }
249 |   multiplicity_ = multiplicity;
250 | }
251 | 
252 | template<class Record>
253 | int MultiIndexer<Record>::GetInputCoordSizeof() {
254 |   if(point_type == FVEC) {
255 |     return (int)sizeof(float);
256 |   } else if(point_type == BVEC) {
257 |     return (int)sizeof(unsigned char);
258 |   }
259 | }
260 | 
261 | template<class Record>
262 | void MultiIndexer<Record>::ReadPoint(ifstream& input, Point* point) {
263 |   if(!input.good()) {
264 |     throw std::logic_error("Bad input stream");
265 |   }
266 |   if(point_type == FVEC) {
267 |     ReadVector<float, Coord>(input, point);
268 |   } else if(point_type == BVEC) {
269 |     ReadVector<unsigned char, Coord>(input, point);
270 |   }    
271 | }
272 | 
273 | template<class Record>
274 | void MultiIndexer<Record>::SerializeCoarseQuantizations(const vector<vector<ClusterId> >&
275 | 		                                                          transposed_coarse_quantizations,
276 |                                                         const string& filename) {
277 |   ofstream quantizations_stream;
278 |   quantizations_stream.open(filename.c_str(), ios::binary);
279 |   if(!quantizations_stream.good()) {
280 |     throw std::logic_error("Bad input stream");
281 |   }
282 |   cout << "Writing coarse quantizations started" << endl;
283 |   for(PointId pid = 0; pid < transposed_coarse_quantizations[0].size(); ++pid) {
284 |     for(int subspace_index = 0; subspace_index < multiplicity_; ++subspace_index) {
285 |       ClusterId quantization = transposed_coarse_quantizations[subspace_index][pid];
286 |       quantizations_stream.write((char*)&quantization, sizeof(quantization));
287 |     }
288 |   }
289 |   quantizations_stream.close();
290 |   cout << "Writing coarse quantizations started" << endl;
291 | }
292 | 
293 | template<class Record>
294 | void MultiIndexer<Record>::SerializeMultiIndexFiles() {
295 |   cout << "Start multiindex serializing....\n";
296 |   ofstream cell_edges(string(files_prefix_ + "_cell_edges.bin").c_str(), ios::binary);
297 |   boost::archive::binary_oarchive arc_cell_edges(cell_edges);
298 |   arc_cell_edges << multiindex_.cell_edges;
299 |   ofstream multi_array(string(files_prefix_ + "_multi_array.bin").c_str(), ios::binary);
300 |   boost::archive::binary_oarchive arc_multi_array(multi_array);
301 |   arc_multi_array << multiindex_.multiindex;
302 |   cout << "Finish multiindex serializing....\n";
303 | }
304 | 
305 | template<class Record>
306 | void MultiIndexer<Record>::GetCoarseQuantizationsForSubset(const string& points_filename,
307 |                                                            const int start_pid,
308 |                                                            const int subset_size,
309 |                                                            const vector<Centroids>& coarse_vocabs,
310 |                                                            vector<vector<ClusterId> >*
311 |                                                                   transposed_coarse_quantizations) {
312 |   ifstream point_stream;
313 |   point_stream.open(points_filename.c_str(), ios::binary);
314 |   if(!point_stream.good()) {
315 |     throw std::logic_error("Bad input points stream");
316 |   }
317 |   // we assume points are stored in .fvecs or .bvecs format
318 |   point_stream.seekg(start_pid * (GetInputCoordSizeof() * SPACE_DIMENSION + sizeof(Dimensions)), ios::beg);
319 |   vector<ClusterId> coarse_quantization(multiplicity_);
320 |   for(int point_number = 0; point_number < subset_size; ++point_number) {
321 |     if(point_number % 10000 == 0) {
322 |       cout << "Getting coarse quantization, point # " << start_pid + point_number << endl;
323 |     }
324 |     Point current_point;
325 |     ReadPoint(point_stream, &current_point);
326 |     int subpoints_dimension = SPACE_DIMENSION / multiplicity_;
327 |     for(int coarse_index = 0; coarse_index < multiplicity_; ++coarse_index) {
328 |       Dimensions start_dim = coarse_index * subpoints_dimension;
329 |       Dimensions final_dim = start_dim + subpoints_dimension;
330 |       ClusterId nearest = GetNearestClusterId(current_point, coarse_vocabs.at(coarse_index),
331 |                                               start_dim, final_dim);
332 |       transposed_coarse_quantizations->at(coarse_index)[start_pid + point_number] = nearest;
333 |       coarse_quantization[coarse_index] = nearest;
334 |       cblas_saxpy(subpoints_dimension, -1, &(coarse_vocabs.at(coarse_index)[nearest][0]), 1, &(current_point[start_dim]), 1);
335 |     }
336 |     
337 |     int global_index = point_in_cells_count_.GetCellGlobalIndex(coarse_quantization);
338 |     cell_counts_mutex_.lock();
339 |     ++(point_in_cells_count_.table[global_index]);
340 |     cell_counts_mutex_.unlock();
341 |   }
342 | }
343 | 
344 | template<class Record>
345 | void MultiIndexer<Record>::PrepareCoarseQuantization(const string& points_filename,
346 |                                                      const int points_count,
347 |                                                      const vector<Centroids>& coarse_vocabs) {
348 |   // we use transposed quantizations for efficient memory usage
349 |   vector<vector<ClusterId> > transposed_coarse_quantizations; 
350 |   transposed_coarse_quantizations.resize(multiplicity_);
351 |   vector<int> multiindex_table_dimensions;
352 |   for(int i = 0; i < multiplicity_; ++i) {
353 |     transposed_coarse_quantizations[i].resize(points_count);
354 |     multiindex_table_dimensions.push_back(coarse_vocabs[i].size());
355 |   }
356 |   point_in_cells_count_.Resize(multiindex_table_dimensions);
357 |   cout << "Memory for coarse quantizations allocated" << endl;
358 |   boost::thread_group index_threads;
359 |   int thread_points_count = points_count / THREADS_COUNT;
360 |   for(int thread_id = 0; thread_id < THREADS_COUNT; ++thread_id) {
361 |     PointId start_pid = thread_points_count * thread_id;
362 |     index_threads.create_thread(boost::bind(&MultiIndexer::GetCoarseQuantizationsForSubset,
363 |                                             this, points_filename, start_pid, thread_points_count,
364 |                                             boost::cref(coarse_vocabs), &transposed_coarse_quantizations));
365 |   }
366 |   index_threads.join_all();
367 |   if(coarse_quantization_filename_.empty()) {
368 |     coarse_quantization_filename_ = files_prefix_ + "_coarse_quantizations.bin";
369 |   }
370 |   cout << "Coarse quantizations are calculated" << endl;
371 |   SerializeCoarseQuantizations(transposed_coarse_quantizations, coarse_quantization_filename_);
372 |   cout << "Coarse quantizations are serialized" << endl;
373 | }
374 | 
375 | template<class Record>
376 | void MultiIndexer<Record>::ConvertPointsInCellsCountToCellEdges() {
377 |   cout << "Converting points in cells to cell edges...\n";
378 |   multiindex_.cell_edges = point_in_cells_count_;
379 |   multiindex_.cell_edges.table[0] = 0;
380 |   for(int global_index = 1;
381 |       global_index < point_in_cells_count_.table.size();
382 |       ++global_index) {
383 |     multiindex_.cell_edges.table[global_index] = multiindex_.cell_edges.table[global_index - 1] +
384 |                                                  point_in_cells_count_.table[global_index - 1];
385 |   }
386 |   // we do not need this table more
387 |   point_in_cells_count_.table.clear();
388 |   cout << "Finish converting points in cells to cell edges...\n";
389 | }
390 | 
391 | template<class Record>
392 | void MultiIndexer<Record>::GetPointCoarseQuantization(const PointId pid,
393 |                                                       const string& filename,
394 |                                                       vector<ClusterId>* coarse_quantization) {
395 |   ifstream coarse_quantization_stream;
396 |   coarse_quantization_stream.open(filename.c_str(), ios::binary);
397 |   if(!coarse_quantization_stream.good()) {
398 |     throw std::logic_error("Bad input coarse quantizations stream");
399 |   }
400 |   coarse_quantization_stream.seekg((long long)pid * sizeof(ClusterId) * multiplicity_, ios::beg);
401 |   for(int coarse_index = 0; coarse_index < multiplicity_; ++coarse_index) {
402 |     coarse_quantization_stream.read((char*)&(coarse_quantization->at(coarse_index)),
403 |                                     sizeof(coarse_quantization->at(coarse_index)));
404 |   }
405 | }
406 | 
407 | template<class Record>
408 | void MultiIndexer<Record>::FillMultiIndexForSubset(const string& points_filename,
409 |                                                    const PointId start_pid,
410 |                                                    const int points_count,
411 |                                                    const vector<Centroids>& coarse_vocabs,
412 |                                                    const vector<Centroids>& fine_vocabs,
413 |                                                    const RerankMode& mode,
414 |                                                    Multitable<int>* points_written_in_index) {
415 |   ifstream point_stream;
416 |   point_stream.open(points_filename.c_str(), ios::binary);
417 |   if(!point_stream.good()) {
418 |     throw std::logic_error("Bad input points stream");
419 |   }
420 |   point_stream.seekg((long long)start_pid * (GetInputCoordSizeof() * SPACE_DIMENSION + sizeof(Dimensions)), ios::beg);
421 |   for(int point_number = 0; point_number < points_count; ++point_number) {
422 |     if(point_number % 10000 == 0) {
423 |       cout << "Filling multiindex, point # " << start_pid + point_number << endl;
424 |     }
425 |   Point current_point;
426 |   ReadPoint(point_stream, &current_point);
427 |   vector<ClusterId> coarse_quantization(multiplicity_);
428 |   GetPointCoarseQuantization(start_pid + point_number,
429 |                              coarse_quantization_filename_,
430 |                              &coarse_quantization);
431 |   int current_written_count = points_written_in_index->GetValue(coarse_quantization);
432 |   int pid_multiindex = multiindex_.cell_edges.GetValue(coarse_quantization) + current_written_count;
433 |   GetRecord<Record>(current_point, start_pid + point_number,
434 |                     coarse_quantization, coarse_vocabs, &(multiindex_.multiindex[pid_multiindex]));
435 |   cell_counts_mutex_.lock();
436 |   points_written_in_index->SetValue(current_written_count + 1, coarse_quantization);
437 |   cell_counts_mutex_.unlock();
438 |   }
439 | }
440 | 
441 | template<class Record>
442 | void MultiIndexer<Record>::FillMultiIndex(const string& points_filename,
443 |                                           const int points_count,
444 |                                           const vector<Centroids>& coarse_vocabs,
445 |                                           const vector<Centroids>& fine_vocabs,
446 |                                           const RerankMode& mode) {
447 |   ConvertPointsInCellsCountToCellEdges();
448 |   multiindex_.multiindex.resize(points_count);
449 |   cout << "Indexing started..." << endl;
450 | 
451 |   Multitable<int> points_written_in_index(multiindex_.cell_edges.dimensions);
452 |   int thread_points_count = points_count / THREADS_COUNT;
453 |   boost::thread_group threads;
454 |   for(int thread_id = 0; thread_id < THREADS_COUNT; ++thread_id) {
455 |     PointId start_pid = thread_points_count * thread_id;
456 |     threads.create_thread(boost::bind(&MultiIndexer::FillMultiIndexForSubset, this, points_filename, start_pid,
457 |                                       thread_points_count, boost::cref(coarse_vocabs),
458 |                                       boost::cref(fine_vocabs), mode, &points_written_in_index));
459 |   }
460 |   threads.join_all();
461 |   cout << "Indexing finished..." << endl;
462 | }
463 | 
464 | template<class Record>
465 | void MultiIndexer<Record>::RestorePointsInCellsCountFromCourseQuantization(const string& points_filename,
466 |                                                                            const int points_count,
467 |                                                                            const vector<Centroids>& coarse_vocabs) {
468 |   vector<int> dimensions;
469 |   for(int i = 0; i < multiplicity_; ++i) {
470 |     dimensions.push_back(coarse_vocabs[i].size());
471 |   }
472 |   point_in_cells_count_.Resize(dimensions);
473 |   ifstream coarse_quantization_stream;
474 |   coarse_quantization_stream.open(coarse_quantization_filename_.c_str(), ios::binary);
475 |   if(!coarse_quantization_stream.good()) {
476 |     throw std::logic_error("Bad input coarse quantizations stream");
477 |   }
478 |   CoarseQuantization quantization(multiplicity_);
479 |   for(PointId pid = 0; pid < points_count; ++pid) {
480 |     if(pid % 100000 == 0) {
481 |       cout << pid << endl;
482 |     }
483 |     for(int subspace_index = 0; subspace_index < multiplicity_; ++subspace_index) {
484 |       coarse_quantization_stream.read((char*)&(quantization[subspace_index]), 
485 |                                       sizeof(ClusterId));
486 |     }
487 |     int cell_global_index = point_in_cells_count_.GetCellGlobalIndex(quantization);
488 |     point_in_cells_count_.table[cell_global_index] += 1;
489 |   }
490 | }
491 | 
492 | template<class Record>
493 | void MultiIndexer<Record>::BuildMultiIndex(const string& points_filename,
494 |                                            const string& metainfo_filename,
495 |                                            const int points_count,
496 |                                            const vector<Centroids>& coarse_vocabs,
497 |                                            const vector<Centroids>& fine_vocabs,
498 |                                            const RerankMode& mode,
499 |                                            const bool build_coarse_quantization,
500 |                                            const string& files_prefix,
501 |                                            const string& coarse_quantization_filename) {
502 |   InitParameters<Record>(fine_vocabs, mode, metainfo_filename);
503 |   InitBlasStructures(coarse_vocabs);
504 |   files_prefix_ = files_prefix;
505 |   coarse_quantization_filename_ = coarse_quantization_filename;
506 |   if(build_coarse_quantization) {
507 |     PrepareCoarseQuantization(points_filename, points_count, coarse_vocabs);
508 |   } else {
509 |   RestorePointsInCellsCountFromCourseQuantization(points_filename,
510 |                                                   points_count,
511 |                                                   coarse_vocabs);
512 |   }
513 |   FillMultiIndex(points_filename, points_count, coarse_vocabs, fine_vocabs, mode);
514 |   cout << "Multiindex created" << endl;
515 |   SerializeMultiIndexFiles();
516 |   cout << "Multiindex serialized" << endl;
517 | }
518 | 
519 | template<class Record>
520 | void MultiIndexer<Record>::InitBlasStructures(const vector<Centroids>& coarse_vocabs) {
521 |   coarse_vocabs_matrices_.resize(coarse_vocabs.size());
522 |   coarse_centroids_norms_.resize(coarse_vocabs.size(), vector<float>(coarse_vocabs[0].size()));
523 |   for(int coarse_id = 0; coarse_id < coarse_vocabs_matrices_.size(); ++coarse_id) {
524 |     coarse_vocabs_matrices_[coarse_id] = new float[coarse_vocabs[0].size() * coarse_vocabs[0][0].size()];
525 |     for(int i = 0; i < coarse_vocabs[0].size(); ++i) {
526 |       Coord norm = 0;
527 |       for(int j = 0; j < coarse_vocabs[0][0].size(); ++j) {
528 |         coarse_vocabs_matrices_[coarse_id][coarse_vocabs[0][0].size() * i + j] = coarse_vocabs[coarse_id][i][j];
529 |         norm += coarse_vocabs[coarse_id][i][j] * coarse_vocabs[coarse_id][i][j];
530 |       }
531 |       coarse_centroids_norms_[coarse_id][i] = norm;
532 |     }
533 |   }
534 | }
535 | 
536 | template<>
537 | inline void GetRecord<PointId> (const Point& point, const PointId pid,
538 |                                 const vector<ClusterId> coarse_quantization,
539 |                                 const vector<Centroids>& coarse_vocabs,
540 |                                 PointId* result) {
541 |   *result = pid;
542 | }
543 | 
544 | inline void FillAdcInfo(const Point& point, const PointId pid,
545 |                         const vector<Centroids>& fine_vocabs,
546 |                         char* result) {
547 |   int subvectors_count = fine_vocabs.size();
548 |   int subvector_dim = point.size() / subvectors_count;
549 |   for(int subvector_index = 0; subvector_index < subvectors_count; ++subvector_index) {
550 |     Dimensions start_dim = subvector_index * subvector_dim;
551 |     Dimensions final_dim = start_dim + subvector_dim;
552 |     *((FineClusterId*)result) = (FineClusterId)GetNearestClusterId(point, fine_vocabs[subvector_index],
553 | 			                                                       start_dim, final_dim);
554 |     result += sizeof(FineClusterId);
555 |   }
556 | }
557 | 
558 | template<>
559 | inline void GetRecord<RerankADC8> (const Point& point, const PointId pid,
560 |                                    const vector<ClusterId> coarse_quantization,
561 |                                    const vector<Centroids>& coarse_vocabs,
562 |                                    RerankADC8* result) {
563 |   result->pid = pid;
564 |   char* rerank_info_ptr = (char*)result + sizeof(pid);
565 |   if(gConfig.rerank_mode == USE_RESIDUALS) {
566 |     Point residual;
567 |     GetResidual(point, coarse_quantization, coarse_vocabs, &residual);
568 |     FillAdcInfo(residual, pid, gConfig.fine_vocabs, rerank_info_ptr);
569 |   } else if (gConfig.rerank_mode == USE_INIT_POINTS) {
570 |     FillAdcInfo(point, pid, gConfig.fine_vocabs, rerank_info_ptr);
571 |   }
572 | }
573 | 
574 | template<>
575 | inline void GetRecord<RerankADC16> (const Point& point, const PointId pid,
576 |                                     const vector<ClusterId> coarse_quantization,
577 |                                     const vector<Centroids>& coarse_vocabs,
578 |                                     RerankADC16* result) {
579 |   result->pid = pid;
580 |   char* rerank_info_ptr = (char*)result + sizeof(pid);
581 |   if(gConfig.rerank_mode == USE_RESIDUALS) {
582 |     Point residual;
583 |     GetResidual(point, coarse_quantization, coarse_vocabs, &residual);
584 |     FillAdcInfo(residual, pid, gConfig.fine_vocabs, rerank_info_ptr);
585 |   } else if (gConfig.rerank_mode == USE_INIT_POINTS) {
586 |     FillAdcInfo(point, pid, gConfig.fine_vocabs, rerank_info_ptr);
587 |   }
588 | }
589 | 
590 | #endif
591 | 
592 | 
593 | 
594 | 
595 | 


--------------------------------------------------------------------------------
/indexer_launcher.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2012 Yandex Artem Babenko
  2 | #include <iostream>
  3 | 
  4 | #include <boost/program_options.hpp>
  5 | 
  6 | #include "indexer.h"
  7 | 
  8 | using namespace boost::program_options;
  9 | 
 10 | /**
 11 |  * Number of threads for indexing
 12 |  */
 13 | int THREADS_COUNT;
 14 | /**
 15 |  * Type, should be BVEC or FVEC
 16 |  */
 17 | PointType point_type;
 18 | /**
 19 |  * Number of coordinates in a point
 20 |  */
 21 | Dimensions SPACE_DIMENSION;
 22 | /**
 23 |  * File with vocabularies for multiindex structure
 24 |  */
 25 | string coarse_vocabs_file;
 26 | /**
 27 |  * File with vocabularies for reranking
 28 |  */
 29 | string fine_vocabs_file;
 30 | /**
 31 |  * File with points to index
 32 |  */
 33 | string points_file;
 34 | /**
 35 |  * File with points metainfo (imageId, etc.)
 36 |  */
 37 | string metainfo_file;
 38 | /**
 39 |  * Reranking approach, should be USE_RESIDUALS or USE_INIT_POINTS
 40 |  */
 41 | RerankMode mode;
 42 | /**
 43 |  * Common prefix of all multiindex files
 44 |  */
 45 | string files_prefix;
 46 | /**
 47 |  * Should we calculate coarse quantizations (they can be precomputed)
 48 |  */
 49 | bool build_coarse_quantizations;
 50 | /**
 51 |  * File with points coarse quantizations
 52 |  */
 53 | string coarse_quantizations_file;
 54 | /**
 55 |  * How many points should we index
 56 |  */
 57 | int points_count;
 58 | /**
 59 |  * Multiplicity of multiindex
 60 |  */
 61 | int multiplicity;
 62 | 
 63 | int SetOptions(int argc, char** argv) {
 64 |   options_description description("Options");
 65 |   description.add_options()
 66 |     ("threads_count,t", value<int>())
 67 |     ("multiplicity,m", value<int>())
 68 |     ("points_file,p", value<string>())
 69 |     ("metainfo_file,z", value<string>())
 70 |     ("coarse_vocabs_file,c", value<string>())
 71 |     ("fine_vocabs_file,f", value<string>())
 72 |     ("input_point_type,i", value<string>())
 73 |     ("build_coarse,b", bool_switch(), "Flag B")
 74 |     ("use_residuals,r", bool_switch(), "Flag R")
 75 |     ("points_count,p", value<int>())
 76 |     ("coarse_quantization_file,q", value<string>())
 77 |     ("space_dim,d", value<int>())
 78 |     ("files_prefix,_", value<string>());
 79 |   variables_map name_to_value;
 80 |   try {
 81 |     store(command_line_parser(argc, argv).options(description).run(), name_to_value);
 82 |   } catch (const invalid_command_line_syntax& inv_syntax) {
 83 |     switch (inv_syntax.kind()) {
 84 |       case invalid_syntax::missing_parameter :
 85 |         cout << "Missing argument for option '" << inv_syntax.tokens() << "'.\n";
 86 |         break;
 87 |       default:
 88 |         cout << "Syntax error, kind " << int(inv_syntax.kind()) << "\n";
 89 |         break;
 90 |       };
 91 |     return 1;
 92 |   } catch (const unknown_option& unkn_option) {
 93 |     cout << "Unknown option '" << unkn_option.get_option_name() << "'\n";
 94 |     return 1;
 95 |   }
 96 |   if (name_to_value.count("help")) {
 97 |     cout << description << "\n";
 98 |     return 1;
 99 |   }
100 | 
101 |   THREADS_COUNT =              name_to_value["threads_count"].as<int>();
102 |   multiplicity =               name_to_value["multiplicity"].as<int>();
103 |   points_file =                name_to_value["points_file"].as<string>();
104 |   metainfo_file =              name_to_value["metainfo_file"].as<string>();
105 |   coarse_vocabs_file =         name_to_value["coarse_vocabs_file"].as<string>();
106 |   fine_vocabs_file =           name_to_value["fine_vocabs_file"].as<string>();
107 |   SPACE_DIMENSION =            name_to_value["space_dim"].as<int>();
108 |   files_prefix =               name_to_value["files_prefix"].as<string>();
109 |   points_count =               name_to_value["points_count"].as<int>();
110 |  
111 |   build_coarse_quantizations = (name_to_value["build_coarse"].as<bool>() == true) ? true : false;
112 |   mode = name_to_value["use_residuals"].as<bool>() == true ? USE_RESIDUALS : USE_INIT_POINTS;
113 | 
114 |   if (name_to_value.find("coarse_quantization_file") != name_to_value.end()) {
115 |     coarse_quantizations_file =  name_to_value["coarse_quantization_file"].as<string>();
116 |   }
117 |   if (name_to_value["input_point_type"].as<string>() == "FVEC") {
118 |     point_type = FVEC;
119 |   } else if(name_to_value["input_point_type"].as<string>() == "BVEC") {
120 |     point_type = BVEC;
121 |   }
122 |   return 0;
123 | }
124 | 
125 | int main(int argc, char** argv) {
126 |   SetOptions(argc, argv);
127 |   cout << "Options are set ...\n";
128 |   vector<Centroids> coarse_vocabs;
129 |   vector<Centroids> fine_vocabs;
130 |   ReadVocabularies<float>(coarse_vocabs_file, SPACE_DIMENSION, &coarse_vocabs);
131 |   ReadFineVocabs<float>(fine_vocabs_file, &fine_vocabs);
132 |   cout << "Vocs are read ...\n";
133 |   if(fine_vocabs.size() == 8) {
134 |     MultiIndexer<RerankADC8> indexer(multiplicity);
135 |     indexer.BuildMultiIndex(points_file, metainfo_file, points_count, coarse_vocabs, 
136 |                             fine_vocabs, mode, build_coarse_quantizations,
137 |                             files_prefix, coarse_quantizations_file);
138 |   } else if(fine_vocabs.size() == 16) {
139 |     MultiIndexer<RerankADC16> indexer(multiplicity);
140 |     indexer.BuildMultiIndex(points_file, metainfo_file, points_count, coarse_vocabs, 
141 |                             fine_vocabs, mode, build_coarse_quantizations,
142 |                             files_prefix, coarse_quantizations_file);  
143 |   }
144 |   return 0;
145 | }


--------------------------------------------------------------------------------
/launch_indexer_double.sh:
--------------------------------------------------------------------------------
 1 | cd build_master
 2 | ./indexer_launcher \
 3 | --threads_count=32 \
 4 | --multiplicity=2 \
 5 | --points_file="/sata/ResearchData/BigAnn/bases/sift1M.bvecs" \
 6 | --coarse_vocabs_file="../sift1M_double_4096.dat" \
 7 | --fine_vocabs_file="../sift1M_double_4096_8.dat" \
 8 | --input_point_type="BVEC" \
 9 | --points_count=1000000 \
10 | --space_dim=128 \
11 | --files_prefix="/sata/ResearchData/BigAnn/indices/sift1M_double_4096_8" \
12 | --coarse_quantization_file="/sata/ResearchData/BigAnn/cq/sift1M_double_4096_coarse_quantizations.bin" \
13 | --metainfo_file="fake.txt" \
14 | --use_residuals \
15 | --build_coarse
16 | 
17 | 


--------------------------------------------------------------------------------
/make_project.bat:
--------------------------------------------------------------------------------
1 | cd build
2 | del CMakeCache.txt
3 | cmake -DMAKE_ONLY=BUILD_ALL  -G "Visual Studio 10 Win64"  ..
4 | pause
5 | 


--------------------------------------------------------------------------------
/multitable.hpp:
--------------------------------------------------------------------------------
 1 | /** @file */
 2 | // Copyright 2012 Yandex Artem Babenko
 3 | #pragma once
 4 | 
 5 | #include <vector>
 6 | 
 7 | using std::vector;
 8 | 
 9 | /**
10 |  *  This class implements interface of multidimensional array with
11 |  *  fast write/read operations. In fact data is stored in a long array.
12 |  *  Global index of particular item in this array is calculated from item coordinates.
13 |  */
14 | template<class T>
15 | struct Multitable {
16 |  /**
17 |   *  This constructor gets width of table for each dimension
18 |   *  @param dimensions array of sizes of table along each dimension
19 |   */
20 |   Multitable(const vector<int>& dimensions = vector<int>());
21 |  /**
22 |   *  This function resize the table to new dimensions
23 |   *  @param dimensions array of sizes of table along each dimension
24 |   */
25 |   void Resize(const vector<int>& dimensions, T value = T());
26 |  /**
27 |   *  This function sets value in one cell
28 |   *  @param value value to set
29 |   *  @param cell_indices coordinates of cell in the table
30 |   */
31 |   void SetValue(T value, const vector<int>& cell_indices);
32 |  /**
33 |   *  This function gets value of one cell
34 |   *  @param cell_indices coordinates of cell in the table
35 |   */
36 |   T GetValue(const vector<int>& cell_indices);
37 |  /**
38 |   *  Actual data as one-dimensional array
39 |   */
40 |   vector<T> table;
41 |  /**
42 |   *  Dimensions of table
43 |   */
44 |   vector<int> dimensions;
45 |  /**
46 |   *  Function for Boost.Serialization
47 |   */
48 |   template<class Archive>
49 |   void serialize(Archive& arc, unsigned int version) {
50 |     arc & table;
51 |     arc & dimensions;
52 |   }
53 |  /**
54 |   *  Function converts cell coordinates to global index in a long array
55 |   *  @param cell_indices coordinates of cell in the table
56 |   */
57 |   int GetCellGlobalIndex(const vector<int>& cell_indices) const;
58 | };
59 | 
60 | template<class T>
61 | int Multitable<T>::GetCellGlobalIndex(const vector<int>& indices) const {
62 |   if(indices.empty()) {
63 |     throw std::logic_error("Empty indices array!");
64 |   }
65 |   int global_index = 0;
66 |   int subtable_capacity = table.size();
67 |   for(int dimension_index = 0; dimension_index < dimensions.size(); ++dimension_index) {
68 |     subtable_capacity = subtable_capacity / dimensions[dimension_index];
69 |     global_index += subtable_capacity * indices[dimension_index];
70 |   }
71 |   return global_index;
72 | }
73 | 
74 | template<class T>
75 | void Multitable<T>::Resize(const vector<int>& new_dimensions, T value) {
76 |   int table_size = 1;
77 |   dimensions = new_dimensions;
78 |   for(int dimension_index = 0; dimension_index < new_dimensions.size(); ++dimension_index) {
79 |     table_size *= new_dimensions[dimension_index];
80 |   }
81 |   table.resize(table_size, value);
82 | }
83 | 
84 | template<class T>
85 | Multitable<T>::Multitable(const vector<int>& dimensions) {
86 |   Resize(dimensions);
87 | }
88 | 
89 | template<class T>
90 | void Multitable<T>::SetValue(T value, const vector<int>& indices) {
91 |   int global_index = GetCellGlobalIndex(indices);
92 |   table.at(global_index) = value;
93 | }
94 | 
95 | template<class T>
96 | T Multitable<T>::GetValue(const vector<int>& indices) {
97 |   int global_index = GetCellGlobalIndex(indices);
98 |   return table.at(global_index);
99 | }


--------------------------------------------------------------------------------
/ordered_lists_merger.h:
--------------------------------------------------------------------------------
  1 | /** @file */
  2 | // Copyright 2012 Yandex Artem Babenko
  3 | #pragma once
  4 | 
  5 | #include "data_util.h"
  6 | #include "multitable.hpp"
  7 | 
  8 | /**
  9 |  * \typedef
 10 |  *  Typedef for indices of merged list element
 11 |  */
 12 | typedef vector<int> MergedItemIndices;
 13 | 
 14 | /**
 15 |  * This class merges any number of ordered lists and yields
 16 |  * elements of merged list in Order-increasing order.
 17 |  * Initial lists are ordered by Order.
 18 |  * This class is used in multilist algorithm to get coordinates of cells in multiindex.
 19 |  * Class does not return the whole result list, it just yields by one item.
 20 |  * Class also assumes that input lists are the same length
 21 |  */
 22 | template<class OrderType, class MetaInfo>
 23 | class OrderedListsMerger {
 24 |  public:
 25 |  /**
 26 |   *  This constructor inits merger for input lists
 27 |   */
 28 |   OrderedListsMerger();
 29 |  /**
 30 |   * This function sets lists to merge
 31 |   * @param lists input lists to merge (must be ordered)
 32 |   */
 33 |   void setLists(const vector<vector<pair<OrderType, MetaInfo> > >& lists);
 34 |  /**
 35 |   * This function yields indices if next item of merged list.
 36 |   * It returns "false" if all items have been already yielded and "true" otherwise
 37 |   * @param merged_item_indices result indices
 38 |   */
 39 |   inline bool GetNextMergedItemIndices(MergedItemIndices* merged_item_indices);
 40 |  /**
 41 |   * Pointer to input lists
 42 |   */
 43 |   const vector<vector<pair<OrderType, MetaInfo> > >* lists_ptr;
 44 |  /**
 45 |   * Getter
 46 |   */
 47 |   Multitable<char>& GetYieldedItems() {
 48 |     return yielded_items_indices_;
 49 |   }
 50 |  private:
 51 |  /**
 52 |   *  This function pushes new item into priority queue
 53 |   * @param merged_item_indices indices of item to add
 54 |   */
 55 |   void InsertMergedItemIndicesInHeap(const MergedItemIndices& merged_item_indices);
 56 |  /**
 57 |   * This function tries to update priority queue after yielding
 58 |   * @param merged_item_indices new indices we should try to push in priority queue
 59 |   */
 60 |   void UpdatePrioirityQueue(MergedItemIndices& merged_item_indices);
 61 |  /**
 62 |   *  Proirity queue for multilist algorithm 
 63 |   */
 64 |   multimap<OrderType, MergedItemIndices> heap_;
 65 |  /**
 66 |   *  Table with "1"-value for yielded items and "0"-value  otherwise
 67 |   */
 68 |   Multitable<char> yielded_items_indices_;
 69 | };
 70 | 
 71 | ////////////////////      IMPLEMENTATION         //////////////////////////////////////////
 72 | 
 73 | template<class OrderType, class MetaInfo>
 74 | OrderedListsMerger<OrderType, MetaInfo>::OrderedListsMerger() {
 75 | }
 76 | 
 77 | template<class OrderType, class MetaInfo>
 78 | void OrderedListsMerger<OrderType, MetaInfo>::InsertMergedItemIndicesInHeap(const MergedItemIndices& merged_item_indices) {
 79 |   OrderType sum = 0;
 80 |   for(int list_index = 0; list_index < lists_ptr->size(); ++list_index) {
 81 |     sum += lists_ptr->at(list_index)[merged_item_indices[list_index]].first;
 82 |   }
 83 |   heap_.insert(std::make_pair(sum, merged_item_indices));
 84 | }
 85 | 
 86 | template<class OrderType, class MetaInfo>
 87 | void OrderedListsMerger<OrderType, MetaInfo>::setLists(const vector<vector<pair<OrderType, MetaInfo> > >& lists) {
 88 |   lists_ptr = &lists;
 89 |   heap_.clear();
 90 |   MergedItemIndices first_item_indices(lists.size());
 91 |   for(int list_index = 0; list_index < lists.size(); ++list_index) {
 92 |     first_item_indices[list_index] = 0;
 93 |   }
 94 |   memset(&(yielded_items_indices_.table[0]), 0, yielded_items_indices_.table.size());
 95 |   InsertMergedItemIndicesInHeap(first_item_indices);
 96 | }
 97 | 
 98 | template<class OrderType, class MetaInfo>
 99 | void OrderedListsMerger<OrderType, MetaInfo>::UpdatePrioirityQueue(MergedItemIndices& merged_item_indices) {
100 |   for(int list_index = 0; list_index < lists_ptr->size(); ++list_index) {
101 |     if(merged_item_indices[list_index] >= lists_ptr->at(list_index).size()) {
102 |       return;
103 |     }
104 |     int current_index = merged_item_indices[list_index];
105 |     merged_item_indices[list_index] -= 1;
106 |     if(current_index > 0 && !yielded_items_indices_.GetValue(merged_item_indices)) {
107 |       merged_item_indices[list_index] += 1;
108 |       return;
109 |     } else {
110 |       merged_item_indices[list_index] += 1;
111 |     }
112 |   }
113 |   InsertMergedItemIndicesInHeap(merged_item_indices);
114 | }
115 | 
116 | template<class OrderType, class MetaInfo>
117 | inline bool OrderedListsMerger<OrderType, MetaInfo>::GetNextMergedItemIndices(MergedItemIndices* next_merged_item_indices) {
118 |   if(heap_.empty()) {
119 |     return false;
120 |   }
121 |   *next_merged_item_indices = heap_.begin()->second;
122 |   yielded_items_indices_.SetValue(1, *next_merged_item_indices);
123 |   for(int list_index = 0; list_index < lists_ptr->size(); ++list_index) {
124 |     next_merged_item_indices->at(list_index) += 1;
125 |     UpdatePrioirityQueue(*next_merged_item_indices);
126 |     next_merged_item_indices->at(list_index) -= 1;
127 |   }
128 |   heap_.erase(heap_.begin());
129 |   return true;
130 | }
131 | 
132 | template class OrderedListsMerger<Distance, PointId>;
133 | template class OrderedListsMerger<Distance, pair<ClusterId, ClusterId> >;


--------------------------------------------------------------------------------
/perfomance_util.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2012 Yandex Artem Babenko
  2 | #include "perfomance_util.h"
  3 | 
  4 | extern string report_file;
  5 | 
  6 | PerfTester::PerfTester() {
  7 |   report_file_ = report_file;
  8 |   current_points_count = 0;
  9 |   handled_queries_count = 0;
 10 |   cells_traversed = 0;
 11 |   nearest_subcentroids_time = 0;
 12 |   cache_init_time = 0;
 13 |   merger_init_time = 0;
 14 |   full_traversal_time = 0;
 15 |   cell_coordinates_time = 0;
 16 |   cell_edges_time = 0;
 17 |   residual_time = 0;
 18 |   refining_time = 0;
 19 |   full_search_time = 0;
 20 | 
 21 |   for(int i = 0; i < 21; ++i) {
 22 |     list_length_thresholds_.push_back(std::pow(2.0, i));
 23 |   }
 24 |   current_threshold_index_ = 0;
 25 |   list_length_times_.resize(list_length_thresholds_.size(), 0.0);
 26 | }
 27 | 
 28 | void PerfTester::ResetQuerywiseStatistic() {
 29 |   current_threshold_index_ = 0;
 30 |   current_points_count = 0;
 31 | }
 32 | 
 33 | void PerfTester::NextNeighbour() {
 34 |   ++current_points_count;
 35 |   if(current_points_count >= list_length_thresholds_[current_threshold_index_]) {
 36 |     clock_t current_time = clock();
 37 |     list_length_times_[current_threshold_index_] += current_time - search_start;
 38 |     ++current_threshold_index_;
 39 |   }
 40 | }
 41 | 
 42 | void PerfTester::DoReport(std::ofstream& out) {
 43 |   out << "Queries count: "
 44 |       << handled_queries_count << endl;
 45 |   out << "Average cells count: "
 46 |       << (double)cells_traversed / handled_queries_count << endl;
 47 |   out << "Average nearest subcentroids getting time: "
 48 |       << (double)nearest_subcentroids_time / handled_queries_count << endl;
 49 |   out << "Average cache init time: "
 50 |       << (double)cache_init_time / handled_queries_count << endl;
 51 |   out << "Average merger init time: "
 52 |       << (double)merger_init_time / handled_queries_count << endl;
 53 |   out << "Average full traversal time: "
 54 |       << (double)full_traversal_time / handled_queries_count << endl;
 55 |   out << "Average cells coordinates getting time: "
 56 |       << (double)cell_coordinates_time / handled_queries_count << endl;
 57 |   out << "Average cell edges getting time: "
 58 |       << (double)cell_edges_time/ handled_queries_count << endl;
 59 |   out << "Average residual time: "
 60 |       << (double)residual_time / handled_queries_count << endl;
 61 |   out << "Average refining time: "
 62 |       <<(double)refining_time / handled_queries_count << endl;
 63 |   out << "Average full search time: "
 64 |       << (double)full_search_time / handled_queries_count << endl;
 65 | }
 66 | 
 67 | void PerfTester::DoReport() {
 68 |   std::ofstream out(report_file_.c_str());
 69 |   DoReport(out);
 70 | }
 71 | 
 72 | int GetRecallAt(const int length, const vector<PointId>& groundtruth,
 73 |                 const vector<DistanceToPoint>& result) {
 74 |   if(groundtruth.empty()) {
 75 |     cout << "Groundtruth is empty!" << endl;
 76 |     return 0;
 77 |   }
 78 |   for(int index = 0; index < length && index < result.size(); ++index) {
 79 |     if(result[index].second == groundtruth[0]) {
 80 |       return 1;
 81 |     }
 82 |   }
 83 |   return 0;
 84 | }
 85 | 
 86 | double GetPresicionAt(const int length, const set<PointId>& groundtruth,
 87 |                       const vector<DistanceToPoint>& result) {
 88 |   int found = 0;
 89 |   for(int index = 0; index < length && index < result.size() ; ++index) {
 90 |     if(groundtruth.find(result[index].second) != groundtruth.end()) {
 91 |       found += 1;
 92 |     }
 93 |   }
 94 |   return (double)found / length; 
 95 | }
 96 | 
 97 | double GetRecall(const vector<PointId>& groundtruth,
 98 |                  const vector<DistanceToPoint>& result) {
 99 |   if(groundtruth.empty()) {
100 |     cout << "Groundtruth is empty!" << endl;
101 |     return 0;
102 |   }
103 |   std::set<PointId> returned_points;
104 |   for(int i = 0; i < result.size(); ++i) {
105 |       returned_points.insert(result[i].second);
106 |   }
107 |   double found = 0.0;
108 |   for(int index = 0; index < groundtruth.size(); ++index) {
109 |       if(returned_points.find(groundtruth[index]) != returned_points.end()) {
110 |           found += 1;
111 |       }
112 |   }
113 |   return found / groundtruth.size();
114 | }


--------------------------------------------------------------------------------
/perfomance_util.h:
--------------------------------------------------------------------------------
 1 | /** @file */
 2 | // Copyright 2012 Yandex Artem Babenko
 3 | #include <iostream>
 4 | #include <set>
 5 | #include <vector>
 6 | 
 7 | #include "data_util.h"
 8 | 
 9 | using std::cout;
10 | using std::endl;
11 | using std::ofstream;
12 | using std::pair;
13 | using std::set;
14 | using std::vector;
15 | 
16 | /**
17 |  * \typedef
18 |  *  Typedef for point identifier and distance from query
19 |  */
20 | typedef pair<Distance, PointId> DistanceToPoint;
21 | 
22 | /**
23 |  *  This simple class stores timing of search working process
24 |  */
25 | class PerfTester {
26 |  public:
27 |   PerfTester();
28 |  /**
29 |   *  Number of neighbours already found
30 |   */
31 |   int current_points_count;
32 |  /**
33 |   *  Pretty report of timing
34 |   */
35 |   void DoReport();
36 |  /**
37 |   *  Reset all prevoius statistic before
38 |   *  new query handling
39 |   */
40 |   void ResetQuerywiseStatistic();
41 |  /**
42 |   *  Signal about next point
43 |   */
44 |   void NextNeighbour();
45 |  /**
46 |   *  Number of handled queries
47 |   */
48 |   int handled_queries_count;
49 |  /**
50 |   *  Number of traversed items of multiindex
51 |   */
52 |   int cells_traversed;
53 |   unsigned long long nearest_subcentroids_time;
54 |   unsigned long long cache_init_time;
55 |   unsigned long long merger_init_time;
56 |   unsigned long long full_traversal_time;
57 |   unsigned long long cell_coordinates_time;
58 |   unsigned long long cell_edges_time;
59 |   unsigned long long residual_time;
60 |   unsigned long long refining_time;
61 |   unsigned long long full_search_time;
62 |   unsigned long long search_start;
63 |  private:
64 |   string report_file_;
65 |   void DoReport(ofstream& out);
66 |   vector<int> list_length_thresholds_;
67 |   int current_threshold_index_;
68 |   vector<float> list_length_times_;
69 | };
70 | 
71 | /**
72 |  *  This function returns recall at specified length
73 |  * @param length specified size of search results
74 |  * @param groundtruth groundtruth
75 |  * @param result search results
76 |  */
77 | int GetRecallAt(const int length, const vector<PointId>& groundtruth,
78 |                 const vector<DistanceToPoint>& result);
79 | /**
80 |  *  This function returns precision at specified length
81 |  * @param length specified size of search results
82 |  * @param groundtruth groundtruth
83 |  * @param result search results
84 |  */
85 | double GetPresicionAt(const int length, const set<PointId>& groundtruth,
86 |                       const vector<DistanceToPoint>& result);
87 | 
88 | /**
89 |  *  This function returns recall at full length
90 |  */
91 | double GetRecall(const vector<PointId>& groundtruth,
92 |                  const vector<DistanceToPoint>& result);


--------------------------------------------------------------------------------
/run_indexer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import os
 3 | import datetime
 4 | import sys
 5 | 
 6 | ############# launch configuration #################
 7 | 
 8 | # folder to keep built binaries in
 9 | build_folder = './build_master'
10 | 
11 | # number of threads to use (max = 32)
12 | threads_count = 32
13 | 
14 | # Multi-1 or Multi-2 or Multi-4
15 | multiplicity = 2
16 | 
17 | # Folder with BigAnn base
18 | bigann_root = '/sata/ResearchData/BigAnn'
19 | 
20 | # input point type (BVEC or FVEC)
21 | input_type = 'BVEC'
22 | 
23 | # file with input point (.bvecs or .fvecs)
24 | points_file = 'sift1M.bvecs'
25 | 
26 | # prefix of all vocabs, coarse quantizations, etc.
27 | prefix = 'sift1M'
28 | 
29 | # input points count
30 | points_count = 1000000
31 | 
32 | # dimension of input space
33 | space_dim = 128
34 | 
35 | # coarse vocabs size
36 | coarse_vocabs_size = 16384
37 | 
38 | # fine vocabs count
39 | fine_vocabs_count = 8
40 | 
41 | # should we use residuals?
42 | use_residuals = 1
43 | 
44 | # should we calculate coarse quantizations?
45 | build_coarse = 1
46 | 
47 | # postfix added by users to all multiindex files
48 | user_added_postfix = ''
49 | 
50 | ##################################################
51 | 
52 | multiplicity_extension = ''
53 | if multiplicity == 1:
54 |     multiplicity_extension = 'single'
55 | if multiplicity == 2:
56 |     multiplicity_extension = 'double'
57 | if multiplicity == 4:
58 |     multiplicity_extension = 'quad'
59 | 
60 | coarse_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '.dat'
61 | fine_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + '.dat'
62 | filename_prefix = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + user_added_postfix
63 | coarse_quantization_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + user_added_postfix + '_coarse_quantizations.bin'
64 | 
65 | launch_time = datetime.datetime.now().strftime("%I_%M%p_%B_%d_%Y")
66 | os.system('mkdir -p ' + build_folder + '/' + launch_time)
67 | os.system('cp ' + build_folder + '/indexer_launcher ' + build_folder + '/' + launch_time)
68 | os.system('cp run_indexer.py ' + build_folder + '/' + launch_time)
69 | 
70 | launch_line = build_folder + '/' + launch_time + '/indexer_launcher '
71 | launch_line = launch_line + '--threads_count=' + str(threads_count) + ' '
72 | launch_line = launch_line + '--multiplicity=' + str(multiplicity) + ' '
73 | launch_line = launch_line + '--points_file=' + bigann_root + '/bases/' + points_file + ' '
74 | launch_line = launch_line + '--coarse_vocabs_file=' + bigann_root + '/coarse_vocabs/' + coarse_vocabs_filename + ' '
75 | launch_line = launch_line + '--fine_vocabs_file=' + bigann_root + '/fine_vocabs/' + fine_vocabs_filename + ' '
76 | launch_line = launch_line + '--input_point_type=' + input_type + ' '
77 | launch_line = launch_line + '--points_count=' + str(points_count) + ' '
78 | launch_line = launch_line + '--space_dim=' + str(space_dim) + ' '
79 | launch_line = launch_line + '--files_prefix=' + bigann_root + '/indices/' + filename_prefix + ' ' 
80 | launch_line = launch_line + '--coarse_quantization_file=' + bigann_root + '/cq/' + coarse_quantization_filename + ' '
81 | launch_line = launch_line + '--metainfo_file=fake.txt' + ' '
82 | if use_residuals:
83 |     launch_line = launch_line + '--use_residuals' + ' '
84 | if build_coarse:
85 |     launch_line = launch_line + '--build_coarse' + ' '
86 | 
87 | f = open(build_folder + '/' + launch_time + '/launch.sh', 'w')
88 | f.write(launch_line)
89 | f.close()
90 | os.system('nohup ' + launch_line + ' > ' + build_folder + '/' + launch_time + '/log.txt' + ' &')
91 | print 'Log file: ' + build_folder + '/' + launch_time + '/log.txt'
92 | 


--------------------------------------------------------------------------------
/run_indexer_vlad.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import os
 3 | import datetime
 4 | import sys
 5 | 
 6 | ############# launch configuration #################
 7 | 
 8 | # folder to keep built binaries in
 9 | build_folder = './build_master'
10 | 
11 | # number of threads to use (max = 32)
12 | threads_count = 32
13 | 
14 | # Multi-1 or Multi-2 or Multi-4
15 | multiplicity = 2
16 | 
17 | # Folder with BigAnn base
18 | bigann_root = '/sata/ResearchData/BigAnn'
19 | 
20 | # input point type (BVEC or FVEC)
21 | input_type = 'FVEC'
22 | 
23 | # file with input point (.bvecs or .fvecs)
24 | points_file = 'vlad500K_base.fvecs'
25 | 
26 | # prefix of all vocabs, coarse quantizations, etc.
27 | prefix = 'vlad500K'
28 | 
29 | # input points count
30 | points_count = 500000
31 | 
32 | # dimension of input space
33 | space_dim = 128
34 | 
35 | # coarse vocabs size
36 | coarse_vocabs_size = 4096
37 | 
38 | # fine vocabs count
39 | fine_vocabs_count = 8
40 | 
41 | # should we use residuals?
42 | use_residuals = 1
43 | 
44 | # should we calculate coarse quantizations?
45 | build_coarse = 1
46 | 
47 | # postfix added by users to all multiindex files
48 | user_added_postfix = ''
49 | 
50 | ##################################################
51 | 
52 | multiplicity_extension = ''
53 | if multiplicity == 1:
54 |     multiplicity_extension = 'single'
55 | if multiplicity == 2:
56 |     multiplicity_extension = 'double'
57 | if multiplicity == 4:
58 |     multiplicity_extension = 'quad'
59 | 
60 | coarse_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '.dat'
61 | fine_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + '.dat'
62 | filename_prefix = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + user_added_postfix
63 | coarse_quantization_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + user_added_postfix + '_coarse_quantizations.bin'
64 | 
65 | launch_time = datetime.datetime.now().strftime("%I_%M%p_%B_%d_%Y")
66 | os.system('mkdir -p ' + build_folder + '/' + launch_time)
67 | os.system('cp ' + build_folder + '/indexer_launcher ' + build_folder + '/' + launch_time)
68 | os.system('cp run_indexer_vlad.py ' + build_folder + '/' + launch_time)
69 | 
70 | launch_line = build_folder + '/' + launch_time + '/indexer_launcher '
71 | launch_line = launch_line + '--threads_count=' + str(threads_count) + ' '
72 | launch_line = launch_line + '--multiplicity=' + str(multiplicity) + ' '
73 | launch_line = launch_line + '--points_file=' + bigann_root + '/bases/' + points_file + ' '
74 | launch_line = launch_line + '--coarse_vocabs_file=' + bigann_root + '/coarse_vocabs/' + coarse_vocabs_filename + ' '
75 | launch_line = launch_line + '--fine_vocabs_file=' + bigann_root + '/fine_vocabs/' + fine_vocabs_filename + ' '
76 | launch_line = launch_line + '--input_point_type=' + input_type + ' '
77 | launch_line = launch_line + '--points_count=' + str(points_count) + ' '
78 | launch_line = launch_line + '--space_dim=' + str(space_dim) + ' '
79 | launch_line = launch_line + '--files_prefix=' + bigann_root + '/indices/' + filename_prefix + ' ' 
80 | launch_line = launch_line + '--coarse_quantization_file=' + bigann_root + '/cq/' + coarse_quantization_filename + ' '
81 | launch_line = launch_line + '--metainfo_file=fake.txt' + ' '
82 | if use_residuals:
83 |     launch_line = launch_line + '--use_residuals' + ' '
84 | if build_coarse:
85 |     launch_line = launch_line + '--build_coarse' + ' '
86 | 
87 | f = open(build_folder + '/' + launch_time + '/launch.sh', 'w')
88 | f.write(launch_line)
89 | f.close()
90 | os.system('nohup ' + launch_line + ' > ' + build_folder + '/' + launch_time + '/log.txt' + ' &')
91 | print 'Log file: ' + build_folder + '/' + launch_time + '/log.txt'
92 | 


--------------------------------------------------------------------------------
/run_searcher.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import os
 3 | import datetime
 4 | 
 5 | ############# launch configuration #################
 6 | 
 7 | # folder to keep built binaries in
 8 | build_folder = './build_master'
 9 | 
10 | 
11 | # Folder with BigAnn base
12 | bigann_root = '/sata/ResearchData/BigAnn'
13 | 
14 | # input query point type (BVEC or FVEC)
15 | query_input_type = 'BVEC'
16 | 
17 | # Multi-1 or Multi-2 or Multi-4
18 | multiplicity = 2
19 | 
20 | # prefix of all vocabs, coarse quantizations, etc.
21 | prefix = 'sift1M'
22 | 
23 | # dimension of input space
24 | space_dim = 128
25 | 
26 | # coarse vocabs size
27 | coarse_vocabs_size = 4096
28 | 
29 | # fine vocabs count
30 | fine_vocabs_count = 8
31 | 
32 | # should we use residuals?
33 | use_residuals = 1
34 | 
35 | # number of centroids handled in each subdimension
36 | subspace_centroids_count = 1024
37 | 
38 | # queries file
39 | queries_file = 'sift1B_queries.bvecs'
40 | 
41 | # groundtruth file
42 | gnd_file = 'sift1M_groundtruth.ivecs'
43 | 
44 | # number of queries
45 | queries_count = 1000
46 | 
47 | # number of neighbors to seek
48 | neighbors_count = 10000
49 | 
50 | # should we rerank ?
51 | do_rerank = 1
52 | 
53 | # postfix added by users to all multiindex files
54 | user_added_postfix = ''
55 | 
56 | ##################################################
57 | 
58 | multiplicity_extension = ''
59 | if multiplicity == 1:
60 |     multiplicity_extension = 'single'
61 | if multiplicity == 2:
62 |     multiplicity_extension = 'double'
63 | if multiplicity == 4:
64 |     multiplicity_extension = 'quad'
65 | 
66 | coarse_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '.dat'
67 | fine_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + '.dat'
68 | filename_prefix = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + user_added_postfix
69 | 
70 | launch_time = datetime.datetime.now().strftime("%I_%M_%S%p_%B_%d_%Y")
71 | os.system('mkdir -p ' + build_folder + '/' + launch_time)
72 | os.system('cp ' + build_folder + '/searcher_tester ' + build_folder + '/' + launch_time)
73 | os.system('cp run_searcher.py ' + build_folder + '/' + launch_time)
74 | report_filename = build_folder + '/' + launch_time + '/report'
75 | 
76 | launch_line = build_folder + '/' + launch_time + '/searcher_tester '
77 | launch_line = launch_line + '--queries_file=' + bigann_root + '/bases/' + queries_file + ' '
78 | launch_line = launch_line + '--groundtruth_file=' + bigann_root + '/gnd/' + gnd_file + ' '
79 | launch_line = launch_line + '--coarse_vocabs_file=' + bigann_root + '/coarse_vocabs/' + coarse_vocabs_filename + ' '
80 | launch_line = launch_line + '--fine_vocabs_file=' + bigann_root + '/fine_vocabs/' + fine_vocabs_filename + ' '
81 | launch_line = launch_line + '--query_point_type=' + query_input_type + ' '
82 | launch_line = launch_line + '--queries_count=' + str(queries_count) + ' '
83 | launch_line = launch_line + '--neighbours_count=' + str(neighbors_count) + ' '
84 | launch_line = launch_line + '--subspaces_centroids_count=' + str(subspace_centroids_count) + ' '
85 | launch_line = launch_line + '--space_dim=' + str(space_dim) + ' '
86 | launch_line = launch_line + '--index_files_prefix=' + bigann_root + '/indices/' + filename_prefix + ' '
87 | launch_line = launch_line + '--report_file=' + report_filename + ' ' 
88 | if use_residuals:
89 |     launch_line = launch_line + '--use_residuals' + ' '
90 | if do_rerank:
91 |     launch_line = launch_line + '--do_rerank' + ' '
92 | 
93 | f = open(build_folder + '/' + launch_time + '/launch.sh', 'w')
94 | f.write(launch_line)
95 | f.close()
96 | log_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + '_' + str(neighbors_count) + '.txt'
97 | os.system('nohup ' + launch_line + ' > ' + build_folder + '/' + launch_time + '/' + log_filename + ' &')
98 | print 'Log file: ' + build_folder + '/' + launch_time + '/' + log_filename 
99 | 


--------------------------------------------------------------------------------
/run_searcher_vlad.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import os
 3 | import datetime
 4 | 
 5 | ############# launch configuration #################
 6 | 
 7 | # folder to keep built binaries in
 8 | build_folder = './build_master'
 9 | 
10 | 
11 | # Folder with BigAnn base
12 | bigann_root = '/sata/ResearchData/BigAnn'
13 | 
14 | # input query point type (BVEC or FVEC)
15 | query_input_type = 'FVEC'
16 | 
17 | # Multi-1 or Multi-2 or Multi-4
18 | multiplicity = 2
19 | 
20 | # prefix of all vocabs, coarse quantizations, etc.
21 | prefix = 'vlad500K'
22 | 
23 | # dimension of input space
24 | space_dim = 128
25 | 
26 | # coarse vocabs size
27 | coarse_vocabs_size = 4096
28 | 
29 | # fine vocabs count
30 | fine_vocabs_count = 8
31 | 
32 | # should we use residuals?
33 | use_residuals = 1
34 | 
35 | # number of centroids handled in each subdimension
36 | subspace_centroids_count = 4096
37 | 
38 | # queries file
39 | queries_file = 'vlad1M_queries.fvecs'
40 | 
41 | # groundtruth file
42 | gnd_file = 'vlad500K_groundtruth.ivecs'
43 | 
44 | # number of queries
45 | queries_count = 1000
46 | 
47 | # number of neighbors to seek
48 | neighbors_count = 10000
49 | 
50 | # should we rerank ?
51 | do_rerank = 0
52 | 
53 | # postfix added by users to all multiindex files
54 | user_added_postfix = ''
55 | 
56 | ##################################################
57 | 
58 | multiplicity_extension = ''
59 | if multiplicity == 1:
60 |     multiplicity_extension = 'single'
61 | if multiplicity == 2:
62 |     multiplicity_extension = 'double'
63 | if multiplicity == 4:
64 |     multiplicity_extension = 'quad'
65 | 
66 | coarse_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '.dat'
67 | fine_vocabs_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + '.dat'
68 | filename_prefix = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + user_added_postfix
69 | 
70 | launch_time = datetime.datetime.now().strftime("%I_%M_%S%p_%B_%d_%Y")
71 | os.system('mkdir -p ' + build_folder + '/' + launch_time)
72 | os.system('cp ' + build_folder + '/searcher_tester ' + build_folder + '/' + launch_time)
73 | os.system('cp run_searcher_vlad.py ' + build_folder + '/' + launch_time)
74 | report_filename = build_folder + '/' + launch_time + '/report'
75 | 
76 | launch_line = build_folder + '/' + launch_time + '/searcher_tester '
77 | launch_line = launch_line + '--queries_file=' + bigann_root + '/bases/' + queries_file + ' '
78 | launch_line = launch_line + '--groundtruth_file=' + bigann_root + '/gnd/' + gnd_file + ' '
79 | launch_line = launch_line + '--coarse_vocabs_file=' + bigann_root + '/coarse_vocabs/' + coarse_vocabs_filename + ' '
80 | launch_line = launch_line + '--fine_vocabs_file=' + bigann_root + '/fine_vocabs/' + fine_vocabs_filename + ' '
81 | launch_line = launch_line + '--query_point_type=' + query_input_type + ' '
82 | launch_line = launch_line + '--queries_count=' + str(queries_count) + ' '
83 | launch_line = launch_line + '--neighbours_count=' + str(neighbors_count) + ' '
84 | launch_line = launch_line + '--subspaces_centroids_count=' + str(subspace_centroids_count) + ' '
85 | launch_line = launch_line + '--space_dim=' + str(space_dim) + ' '
86 | launch_line = launch_line + '--index_files_prefix=' + bigann_root + '/indices/' + filename_prefix + ' '
87 | launch_line = launch_line + '--report_file=' + report_filename + ' ' 
88 | if use_residuals:
89 |     launch_line = launch_line + '--use_residuals' + ' '
90 | if do_rerank:
91 |     launch_line = launch_line + '--do_rerank' + ' '
92 | 
93 | f = open(build_folder + '/' + launch_time + '/launch.sh', 'w')
94 | f.write(launch_line)
95 | f.close()
96 | log_filename = prefix + '_' + multiplicity_extension + '_' + str(coarse_vocabs_size) + '_' + str(fine_vocabs_count) + '_' + str(neighbors_count) + '.txt'
97 | os.system('nohup ' + launch_line + ' > ' + build_folder + '/' + launch_time + '/' + log_filename + ' &')
98 | print 'Log file: ' + build_folder + '/' + launch_time + '/' + log_filename 
99 | 


--------------------------------------------------------------------------------
/searcher.h:
--------------------------------------------------------------------------------
  1 | /** @file */
  2 | // Copyright 2012 Yandex Artem Babenko
  3 | #ifndef SEARCHER_H_
  4 | #define SEARCHER_H_
  5 | 
  6 | #include <algorithm>
  7 | #include <map>
  8 | 
  9 | #include <boost/archive/binary_iarchive.hpp>
 10 | #include <boost/archive/binary_oarchive.hpp>
 11 | 
 12 | #include <boost/serialization/serialization.hpp>
 13 | #include <boost/serialization/set.hpp>
 14 | #include <boost/serialization/vector.hpp>
 15 | 
 16 | #include <mkl_cblas.h>
 17 | 
 18 | #include "data_util.h"
 19 | #include "ordered_lists_merger.h"
 20 | #include "perfomance_util.h"
 21 | 
 22 | extern int THREADS_COUNT;
 23 | 
 24 | extern Dimensions SPACE_DIMENSION;
 25 | 
 26 | extern enum PointType point_type;
 27 | 
 28 | /**
 29 |  * \typedef This typedef is used in the first stage of search when
 30 |  * we get nearest centroids for each coarse subpace
 31 |  */
 32 | typedef vector<pair<Distance, ClusterId> > NearestSubspaceCentroids;
 33 | 
 34 | /**
 35 |  * This is the main class for nearest neighbour search using multiindex
 36 |  */
 37 | template<class Record, class MetaInfo>
 38 | class MultiSearcher {
 39 |  public:
 40 |  /**
 41 |   * Default constructor
 42 |   */
 43 |   MultiSearcher();
 44 |  /**
 45 |   * Initiation function
 46 |   * @param index_files_prefix prefix of multiindex files providing the search
 47 |   * @param coarse_vocabs_filename file with coarse vocabs
 48 |   * @param fine_vocabs_filename file with fine vocabs for reranking
 49 |   * @param mode reranking approach
 50 |   * @param do_rerank should algorithm rerank short list or not
 51 |   */
 52 |   void Init(const string& index_files_prefix,
 53 |             const string& coarse_vocabs_filename,
 54 |             const string& fine_vocabs_filename,
 55 |             const RerankMode& mode,
 56 |             const int subspace_centroids_to_consider,
 57 |             bool do_rerank);
 58 |  /**
 59 |   * Main interface function
 60 |   * @param point query point
 61 |   * @param k number of neighbours to get
 62 |   * @param subpace_centroids_to_consider it defines the size of working index table
 63 |   * @param neighbours result - vector of point identifiers ordered by increasing of distance to query
 64 |   */
 65 |   void GetNearestNeighbours(const Point& point, int k, 
 66 |                             vector<pair<Distance, MetaInfo> >* neighbours) const;
 67 |  /**
 68 |   * Returns searcher perfomance tester
 69 |   */
 70 |   PerfTester& GetPerfTester();
 71 |  private:
 72 |  /**
 73 |   * This functions deserializes all structures for search
 74 |   * @param index_files_prefix prefix of multiindex files providing the search
 75 |   * @param coarse_vocabs_filename file with coarse vocabs
 76 |   * @param fine_vocabs_filename file with fine vocabs for reranking
 77 |   */
 78 |   void DeserializeData(const string& index_files_prefix,
 79 |                        const string& coarse_vocabs_filename,
 80 |                        const string& fine_vocabs_filename);
 81 |  /**
 82 |   * Function gets some nearest centroids for each coarse subspace
 83 |   * @param point query point
 84 |   * @param subspace_centroins_count how many nearest subcentroids to get
 85 |   * @param subspaces_short_lists result
 86 |   */
 87 |   void GetNearestSubspacesCentroids(const Point& point,
 88 |                                     const int subspace_centroins_count,
 89 |                                     vector<NearestSubspaceCentroids>* subspaces_short_lists) const;
 90 | 
 91 |  /**
 92 |   * This fuctions traverses another cell of multiindex table 
 93 |   * @param point query point
 94 |   * @param nearest_subpoints vector algorithm adds nearest neighbours in
 95 |   */
 96 |   bool TraverseNextMultiIndexCell(const Point& point,
 97 |                                   vector<pair<Distance, MetaInfo> >* nearest_subpoints) const;
 98 |  /**
 99 |   * This fuctions converts cells coordinates to appropriate range in array 
100 |   * @param cell_coordinates coordinates of the cell
101 |   * @param cell_start first index of range
102 |   * @param cell_finish last index of range
103 |   */
104 | inline void GetCellEdgesInMultiIndexArray(const vector<int>& cell_coordinates,
105 |                                           int* cell_start, int* cell_finish) const;
106 |  /**
107 |   * This fuctions converts complex objects to arrays and
108 |   * pointers for usage in BLAS
109 |   */
110 |   void InitBlasStructures();
111 |  /**
112 |   * Lists of coarse centroids
113 |   */
114 |   vector<Centroids> coarse_vocabs_;
115 |  /**
116 |   * Lists of fine centroids
117 |   */
118 |   vector<Centroids> fine_vocabs_;
119 |  /**
120 |   * Merger for ordered merging subspaces centroids lists
121 |   */
122 |   mutable OrderedListsMerger<Distance, ClusterId> merger_;
123 |  /**
124 |   * Should algorithm use reranking or not
125 |   */
126 |   bool do_rerank_;
127 |  /**
128 |   * Searcher perfomance tester
129 |   */
130 |   mutable PerfTester perf_tester_;
131 |  /**
132 |   * Common prefix of every index files
133 |   */
134 |   string index_files_prefix_;
135 |  /**
136 |   * Multiindex data structures
137 |   */
138 |   MultiIndex<Record> multiindex_;
139 |  /**
140 |   * Reranking approach
141 |   */
142 |   RerankMode rerank_mode_;
143 |  /**
144 |   * Struct for BLAS
145 |   */
146 |   vector<float*> coarse_vocabs_matrices_;
147 |  /**
148 |   * Struct for BLAS
149 |   */
150 |   vector<vector<float> > coarse_centroids_norms_;
151 |  /**
152 |   * Struct for BLAS
153 |   */
154 |   mutable Coord* products_;
155 |  /**
156 |   * Struct for BLAS
157 |   */
158 |   mutable vector<Coord> query_norms_;
159 |  /**
160 |   * Struct for BLAS
161 |   */
162 |   mutable float* residual_;
163 |  /**
164 |   * Number of nearest to query centroids
165 |   * to consider for each dimension
166 |   */
167 |   int subspace_centroids_to_consider_;
168 |  /**
169 |   * Number of neighbours found to this moment
170 |   */
171 |   mutable int found_neghbours_count_;
172 | };
173 | 
174 | template<class Record, class MetaInfo>
175 | inline void RecordToMetainfoAndDistance(const Coord* point,
176 |                                         const Record& record,
177 |                                         pair<Distance, MetaInfo>* result,
178 |                                         const vector<int>& cell_coordinates,
179 |                                         const vector<Centroids>& fine_vocabs) {
180 | }
181 | 
182 | /////////////// IMPLEMENTATION /////////////////////
183 | 
184 | template<class Record, class MetaInfo>
185 | MultiSearcher<Record, MetaInfo>::MultiSearcher() {
186 | }
187 | 
188 | template<class Record, class MetaInfo>
189 | void MultiSearcher<Record, MetaInfo>::DeserializeData(const string& index_files_prefix,
190 |                                                       const string& coarse_vocabs_filename,
191 |                                                       const string& fine_vocabs_filename) {
192 |   cout << "Data deserializing started...\n";
193 |   ifstream cell_edges(string(index_files_prefix + "_cell_edges.bin").c_str(), ios::binary);
194 |   if(!cell_edges.good()) {
195 |     throw std::logic_error("Bad input cell edges stream");
196 |   }
197 |   boost::archive::binary_iarchive arc_cell_edges(cell_edges);
198 |   arc_cell_edges >> multiindex_.cell_edges;
199 |   cout << "Cell edges deserialized...\n";
200 |   ifstream multi_array(string(index_files_prefix + "_multi_array.bin").c_str(), ios::binary);
201 |   if(!multi_array.good()) {
202 |     throw std::logic_error("Bad input cell edges stream");
203 |   }
204 |   boost::archive::binary_iarchive arc_multi_array(multi_array);
205 |   arc_multi_array >> multiindex_.multiindex;
206 |   cout << "Multiindex deserialized...\n";
207 |   ReadVocabularies<float>(coarse_vocabs_filename, SPACE_DIMENSION, &coarse_vocabs_);
208 |   cout << "Coarse vocabs deserialized...\n";
209 |   ReadFineVocabs<float>(fine_vocabs_filename, &fine_vocabs_);
210 |   cout << "Fine vocabs deserialized...\n";
211 | }
212 | 
213 | template<class Record, class MetaInfo>
214 | void MultiSearcher<Record, MetaInfo>::Init(const string& index_files_prefix,
215 |                                            const string& coarse_vocabs_filename,
216 |                                            const string& fine_vocabs_filename,
217 |                                            const RerankMode& mode,
218 |                                            const int subspace_centroids_to_consider,
219 |                                            const bool do_rerank) {
220 |   do_rerank_ = do_rerank;
221 |   index_files_prefix_ = index_files_prefix;
222 |   subspace_centroids_to_consider_ = subspace_centroids_to_consider;
223 |   DeserializeData(index_files_prefix, coarse_vocabs_filename, fine_vocabs_filename);
224 |   rerank_mode_ = mode;
225 |   merger_.GetYieldedItems().table.resize(std::pow((float)subspace_centroids_to_consider,
226 | 		                                         (int)coarse_vocabs_.size()));
227 |   for(int i = 0; i < coarse_vocabs_.size(); ++i) {
228 |     merger_.GetYieldedItems().dimensions.push_back(subspace_centroids_to_consider);
229 |   }
230 |   InitBlasStructures();
231 | }
232 | 
233 | template<class Record, class MetaInfo>
234 | void MultiSearcher<Record, MetaInfo>::InitBlasStructures(){
235 |   coarse_vocabs_matrices_.resize(coarse_vocabs_.size());
236 |   coarse_centroids_norms_.resize(coarse_vocabs_.size(), vector<float>(coarse_vocabs_[0].size()));
237 |   for(int coarse_id = 0; coarse_id < coarse_vocabs_matrices_.size(); ++coarse_id) {
238 |     coarse_vocabs_matrices_[coarse_id] = new float[coarse_vocabs_[0].size() * coarse_vocabs_[0][0].size()];
239 |     for(int i = 0; i < coarse_vocabs_[0].size(); ++i) {
240 |       Coord norm = 0;
241 |       for(int j = 0; j < coarse_vocabs_[0][0].size(); ++j) {
242 |         coarse_vocabs_matrices_[coarse_id][coarse_vocabs_[0][0].size() * i + j] = coarse_vocabs_[coarse_id][i][j];
243 |         norm += coarse_vocabs_[coarse_id][i][j] * coarse_vocabs_[coarse_id][i][j];
244 |       }
245 |       coarse_centroids_norms_[coarse_id][i] = norm;
246 |     }
247 |   }
248 |   products_ = new Coord[coarse_vocabs_[0].size()];
249 |   query_norms_.resize(coarse_vocabs_[0].size());
250 |   residual_ = new Coord[coarse_vocabs_[0][0].size() * coarse_vocabs_.size()];
251 | }
252 | 
253 | template<class Record, class MetaInfo>
254 | PerfTester& MultiSearcher<Record, MetaInfo>::GetPerfTester() {
255 |   return perf_tester_;
256 | }
257 | 
258 | template<class Record, class MetaInfo>
259 | void MultiSearcher<Record, MetaInfo>::GetNearestSubspacesCentroids(const Point& point,
260 |                                                                    const int subspace_centroins_count,
261 |                                                                    vector<NearestSubspaceCentroids>*
262 |                                                                    subspaces_short_lists) const {
263 |   std::stringstream aa;
264 |   subspaces_short_lists->resize(coarse_vocabs_.size());
265 |   Dimensions subspace_dimension = point.size() / coarse_vocabs_.size();
266 |   for(int subspace_index = 0; subspace_index < coarse_vocabs_.size(); ++subspace_index) {
267 |     Dimensions start_dim = subspace_index * subspace_dimension;
268 |     Dimensions final_dim = std::min((Dimensions)point.size(), start_dim + subspace_dimension);
269 |     Coord query_norm = cblas_sdot(final_dim - start_dim, &(point[start_dim]), 1, &(point[start_dim]), 1);
270 |     std::fill(query_norms_.begin(), query_norms_.end(), query_norm);
271 |     cblas_saxpy(coarse_vocabs_[0].size(), 1, &(coarse_centroids_norms_[subspace_index][0]), 1, &(query_norms_[0]), 1);
272 |     cblas_sgemv(CblasRowMajor, CblasNoTrans, coarse_vocabs_[0].size(), subspace_dimension, -2.0,
273 |                 coarse_vocabs_matrices_[subspace_index], subspace_dimension, &(point[start_dim]), 1, 1, &(query_norms_[0]), 1);
274 |     subspaces_short_lists->at(subspace_index).resize(query_norms_.size());
275 |     for(int i = 0; i < query_norms_.size(); ++i) {
276 |       subspaces_short_lists->at(subspace_index)[i] = std::make_pair(query_norms_[i], i);
277 |     }
278 |     std::nth_element(subspaces_short_lists->at(subspace_index).begin(),
279 |                      subspaces_short_lists->at(subspace_index).begin() + subspace_centroins_count,
280 |                      subspaces_short_lists->at(subspace_index).end());
281 |     subspaces_short_lists->at(subspace_index).resize(subspace_centroins_count);
282 |     std::sort(subspaces_short_lists->at(subspace_index).begin(),
283 |               subspaces_short_lists->at(subspace_index).end());
284 |   }
285 | }
286 | 
287 | template<class Record, class MetaInfo>
288 | void MultiSearcher<Record, MetaInfo>::GetCellEdgesInMultiIndexArray(const vector<int>& cell_coordinates,
289 |                                                                     int* cell_start, int* cell_finish) const {
290 |   int global_index = multiindex_.cell_edges.GetCellGlobalIndex(cell_coordinates);
291 |   *cell_start = multiindex_.cell_edges.table[global_index];
292 |   if(global_index + 1 == multiindex_.cell_edges.table.size()) {
293 |     *cell_finish = multiindex_.multiindex.size();
294 |   } else {
295 |     *cell_finish = multiindex_.cell_edges.table[global_index + 1];
296 |   }
297 | }
298 | 
299 | template<class Record, class MetaInfo>
300 | bool MultiSearcher<Record, MetaInfo>::TraverseNextMultiIndexCell(const Point& point,
301 |                                                                  vector<pair<Distance, MetaInfo> >*
302 |                                                                              nearest_subpoints) const {
303 |   MergedItemIndices cell_inner_indices;
304 |   clock_t before = clock();
305 |   if(!merger_.GetNextMergedItemIndices(&cell_inner_indices)) {
306 |     return false;
307 |   }
308 |   clock_t after = clock();
309 |   perf_tester_.cell_coordinates_time += after - before;
310 |   vector<int> cell_coordinates(cell_inner_indices.size());
311 |   for(int list_index = 0; list_index < merger_.lists_ptr->size(); ++list_index) {
312 |     cell_coordinates[list_index] = merger_.lists_ptr->at(list_index)[cell_inner_indices[list_index]].second;
313 |   }
314 |   int cell_start, cell_finish;
315 |   before = clock();
316 |   GetCellEdgesInMultiIndexArray(cell_coordinates, &cell_start, &cell_finish);
317 |   after = clock();
318 |   perf_tester_.cell_edges_time += after - before;
319 |   if(cell_start >= cell_finish) {
320 |     return true;
321 |   }
322 |   typename vector<Record>::const_iterator it = multiindex_.multiindex.begin() + cell_start;
323 |   GetResidual(point, cell_coordinates, coarse_vocabs_, residual_);
324 |   cell_finish = std::min((int)cell_finish, cell_start + (int)nearest_subpoints->size() - found_neghbours_count_);
325 |   for(int array_index = cell_start; array_index < cell_finish; ++array_index) {
326 |     if(rerank_mode_ == USE_RESIDUALS) {
327 |       RecordToMetainfoAndDistance<Record, MetaInfo>(residual_, *it,
328 |                                                     &(nearest_subpoints->at(found_neghbours_count_)),
329 |                                                     cell_coordinates, fine_vocabs_);
330 |     } else if(rerank_mode_ == USE_INIT_POINTS) {
331 |       RecordToMetainfoAndDistance<Record, MetaInfo>(&(point[0]), *it,
332 |                                                     &(nearest_subpoints->at(found_neghbours_count_)),
333 |                                                     cell_coordinates, fine_vocabs_);
334 |     }
335 |     perf_tester_.NextNeighbour();
336 |     ++found_neghbours_count_;
337 |     ++it;
338 |   }
339 |   return true;
340 | }
341 | 
342 | 
343 | template<class Record, class MetaInfo>
344 | void MultiSearcher<Record, MetaInfo>::GetNearestNeighbours(const Point& point, int k, 
345 |                                                            vector<pair<Distance, MetaInfo> >* neighbours) const {
346 |   assert(k > 0);
347 |   perf_tester_.handled_queries_count += 1;
348 |   neighbours->resize(k);
349 |   perf_tester_.ResetQuerywiseStatistic();
350 |   clock_t start = clock();
351 |   perf_tester_.search_start = start;
352 |   clock_t before = clock();
353 |   vector<NearestSubspaceCentroids> subspaces_short_lists;
354 |   assert(subspace_centroids_to_consider_ > 0);
355 |   GetNearestSubspacesCentroids(point, subspace_centroids_to_consider_, &subspaces_short_lists);
356 |   clock_t after = clock();
357 |   perf_tester_.nearest_subcentroids_time += after - before;
358 |   clock_t before_merger = clock();
359 |   merger_.setLists(subspaces_short_lists);
360 |   clock_t after_merger = clock();
361 |   perf_tester_.merger_init_time += after_merger - before_merger;
362 |   clock_t before_traversal = clock();
363 |   found_neghbours_count_ = 0;
364 |   bool traverse_next_cell = true;
365 |   int cells_visited = 0;
366 |   while(found_neghbours_count_ < k && traverse_next_cell) {
367 |     perf_tester_.cells_traversed += 1;
368 |     traverse_next_cell = TraverseNextMultiIndexCell(point, neighbours);
369 |     cells_visited += 1;
370 |   }
371 |   clock_t after_traversal = clock();
372 |   perf_tester_.full_traversal_time += after_traversal - before_traversal;
373 |   if(do_rerank_) {
374 |     std::sort(neighbours->begin(), neighbours->end());
375 |   }
376 |   clock_t finish = clock();
377 |   perf_tester_.full_search_time += finish - start;
378 | }
379 | 
380 | template<>
381 | inline void RecordToMetainfoAndDistance<RerankADC8, PointId>(const Coord* point, const RerankADC8& record,
382 |                                                              pair<Distance, PointId>* result,
383 |                                                              const vector<int>& cell_coordinates,
384 |                                                              const vector<Centroids>& fine_vocabs) {
385 |   result->second = record.pid;
386 |   int coarse_clusters_count = cell_coordinates.size();
387 |   int fine_clusters_count = fine_vocabs.size();
388 |   int coarse_to_fine_ratio = fine_clusters_count / coarse_clusters_count;
389 |   int subvectors_dim = SPACE_DIMENSION / fine_clusters_count;
390 |   char* rerank_info_ptr = (char*)&record + sizeof(record.pid);
391 |   for(int centroid_index = 0; centroid_index < fine_clusters_count; ++centroid_index) {
392 |     int start_dim = centroid_index * subvectors_dim;
393 |     int final_dim = start_dim + subvectors_dim;
394 |     FineClusterId pid_nearest_centroid = *((FineClusterId*)rerank_info_ptr);
395 |     rerank_info_ptr += sizeof(FineClusterId);
396 |     int current_coarse_index = centroid_index / coarse_to_fine_ratio;
397 |     Distance subvector_distance = 0;
398 |     for(int i = start_dim; i < final_dim; ++i) {
399 |       Coord diff = fine_vocabs[centroid_index][pid_nearest_centroid][i - start_dim] - point[i];
400 |         subvector_distance += diff * diff;
401 |     }
402 |     result->first += subvector_distance;
403 |   }
404 | }
405 | 
406 | template<>
407 | inline void RecordToMetainfoAndDistance<RerankADC16, PointId>(const Coord* point, const RerankADC16& record,
408 |                                                               pair<Distance, PointId>* result,
409 |                                                               const vector<int>& cell_coordinates,
410 |                                                               const vector<Centroids>& fine_vocabs) {
411 |   result->second = record.pid;
412 |   int coarse_clusters_count = cell_coordinates.size();
413 |   int fine_clusters_count = fine_vocabs.size();
414 |   int coarse_to_fine_ratio = fine_clusters_count / coarse_clusters_count;
415 |   int subvectors_dim = SPACE_DIMENSION / fine_clusters_count;
416 |   char* rerank_info_ptr = (char*)&record + sizeof(record.pid);
417 |   for(int centroid_index = 0; centroid_index < fine_clusters_count; ++centroid_index) {
418 |     int start_dim = centroid_index * subvectors_dim;
419 |     int final_dim = start_dim + subvectors_dim;
420 |     FineClusterId pid_nearest_centroid = *((FineClusterId*)rerank_info_ptr);
421 |     rerank_info_ptr += sizeof(FineClusterId);
422 |     int current_coarse_index = centroid_index / coarse_to_fine_ratio;
423 |     Distance subvector_distance = 0;
424 |     for(int i = start_dim; i < final_dim; ++i) {
425 |       Coord diff = fine_vocabs[centroid_index][pid_nearest_centroid][i - start_dim] - point[i];
426 |       subvector_distance += diff * diff;
427 |     }
428 |     result->first += subvector_distance;
429 |   }
430 | }
431 | 
432 | template class MultiSearcher<RerankADC8, PointId>;
433 | template class MultiSearcher<RerankADC16, PointId>;
434 | template class MultiSearcher<PointId, PointId>;
435 | 
436 | #endif
437 | 
438 | 


--------------------------------------------------------------------------------
/searcher_tester.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2012 Yandex Artem Babenko
  2 | #include <iostream>
  3 | 
  4 | #include <boost/program_options.hpp>
  5 | 
  6 | #include <mkl.h>
  7 | 
  8 | #include "searcher.h"
  9 | #include "indexer.h"
 10 | 
 11 | using namespace boost::program_options;
 12 | 
 13 | /**
 14 |  * Number of threads for indexing
 15 |  */
 16 | Dimensions SPACE_DIMENSION;
 17 | /**
 18 |  * File with vocabularies for multiindex structure
 19 |  */
 20 | string coarse_vocabs_file;
 21 | /**
 22 |  * File with vocabularies for reranking
 23 |  */
 24 | string fine_vocabs_file;
 25 | /**
 26 |  * Reranking approach, should be USE_RESIDUALS or USE_INIT_POINTS
 27 |  */
 28 | RerankMode mode;
 29 | /**
 30 |  * Common prefix of all multiindex files
 31 |  */
 32 | string index_files_prefix;
 33 | /**
 34 |  * File with queries (.bvec or .fvec)
 35 |  */
 36 | string queries_file;
 37 | /**
 38 |  * Type, should be BVEC or FVEC
 39 |  */
 40 | PointType query_point_type;
 41 | /**
 42 |  * File with groundtruth (.ivec)
 43 |  */
 44 | string groundtruth_file;
 45 | /**
 46 |  * Number of queries to search
 47 |  */
 48 | int queries_count;
 49 | /**
 50 |  * Should we rerank?
 51 |  */
 52 | bool do_rerank;
 53 | /**
 54 |  * Number of neighbours to look over
 55 |  */
 56 | int neighbours_count;
 57 | /**
 58 |  * File to write report in
 59 |  */
 60 | string report_file;
 61 | /**
 62 |  * Number of nearest centroids for each group of dimensions to handle
 63 |  */
 64 | int subspaces_centroids_count;
 65 | 
 66 | 
 67 | 
 68 | int SetOptions(int argc, char** argv) {
 69 |   options_description description("Options");
 70 |   description.add_options()
 71 |     ("index_files_prefix,i", value<string>())
 72 |     ("queries_file,q", value<string>())
 73 |     ("queries_count,n", value<int>())
 74 |     ("neighbours_count,k", value<int>())
 75 |     ("groundtruth_file,g", value<string>())
 76 |     ("coarse_vocabs_file,c", value<string>())
 77 |     ("fine_vocabs_file,f", value<string>())
 78 |     ("query_point_type,t", value<string>())
 79 |     ("do_rerank,l", bool_switch(), "Flag B")
 80 |     ("use_residuals,r", bool_switch(), "Flag R")
 81 |     ("points_count,p", value<int>())
 82 |     ("report_file,o", value<string>())
 83 |     ("space_dim,d", value<int>())
 84 |     ("subspaces_centroids_count,s", value<int>());
 85 |   variables_map name_to_value;
 86 |   try {
 87 |     store(command_line_parser(argc, argv).options(description).run(), name_to_value);
 88 |   } catch (const invalid_command_line_syntax &inv_syntax) {
 89 |     switch (inv_syntax.kind()) {
 90 |       case invalid_syntax::missing_parameter :
 91 |         cout << "Missing argument for option '" << inv_syntax.tokens() << "'.\n";
 92 |         break;
 93 |       default:
 94 |         cout << "Syntax error, kind " << int(inv_syntax.kind()) << "\n";
 95 |         break;
 96 |        };
 97 |     return 1;
 98 |   } catch (const unknown_option &unkn_opt) {
 99 |     cout << "Unknown option '" << unkn_opt.get_option_name() << "'\n";
100 |     return 1;
101 |   }
102 |   if (name_to_value.count("help")) {
103 |     cout << description << "\n";
104 |     return 1;
105 |   }
106 | 
107 |   coarse_vocabs_file =         name_to_value["coarse_vocabs_file"].as<string>();
108 |   fine_vocabs_file =           name_to_value["fine_vocabs_file"].as<string>();
109 |   SPACE_DIMENSION =            name_to_value["space_dim"].as<int>();
110 |   index_files_prefix =         name_to_value["index_files_prefix"].as<string>();
111 |   queries_file =               name_to_value["queries_file"].as<string>();
112 |   report_file =                name_to_value["report_file"].as<string>();
113 |   groundtruth_file =           name_to_value["groundtruth_file"].as<string>();
114 |   queries_count =              name_to_value["queries_count"].as<int>();
115 |   neighbours_count =           name_to_value["neighbours_count"].as<int>();
116 |   subspaces_centroids_count =  name_to_value["subspaces_centroids_count"].as<int>();
117 |  
118 |   do_rerank =                  (name_to_value["do_rerank"].as<bool>() == true) ? true : false;
119 |   mode =                       (name_to_value["use_residuals"].as<bool>() == true) ? USE_RESIDUALS : USE_INIT_POINTS;
120 |   if (name_to_value["query_point_type"].as<string>() == "FVEC") {
121 |     query_point_type = FVEC;
122 |   } else if(name_to_value["query_point_type"].as<string>() == "BVEC") {
123 |     query_point_type = BVEC;
124 |   }
125 |   return 0;
126 | }
127 | 
128 | template<class TSearcher>
129 | void TestSearcher(TSearcher& searcher,
130 |                   const Points& queries,
131 |                   const vector<vector<PointId> >& groundtruth) {
132 |   searcher.Init(index_files_prefix, coarse_vocabs_file,
133 |                 fine_vocabs_file, mode,
134 |                 subspaces_centroids_count,
135 |                 do_rerank);
136 |   cout << "Searcher inited ...\n";
137 |   vector<DistanceToPoint> result;
138 |     float recall = 0.0;
139 |     vector<double> recalls(5, 0.0);
140 |     clock_t start = clock();
141 |     for(int i = 0; i < queries_count; ++i) {
142 |       std::cout << i << std::endl;
143 |       neighbours_count = 10000;
144 |       result.clear();
145 |       searcher.GetNearestNeighbours(queries[i], neighbours_count, &result);
146 |       recalls[0] += GetRecallAt(1, groundtruth[i], result);
147 |       recalls[1] += GetRecallAt(10, groundtruth[i], result);
148 |       recalls[2] += GetRecallAt(100, groundtruth[i], result);
149 |       recalls[3] += GetRecallAt(1000, groundtruth[i], result);
150 |       recalls[4] += GetRecallAt(10000, groundtruth[i], result);
151 |     }
152 |     cout << "R@1 "     << recalls[0] / queries_count << "\n" <<
153 |             "R@10 "    << recalls[1] / queries_count << "\n" <<
154 |             "R@100 "   << recalls[2] / queries_count << "\n" <<
155 |             "R@1000 "  << recalls[3] / queries_count << "\n" <<
156 |             "R@10000 " << recalls[4] / queries_count << endl;
157 |     searcher.GetPerfTester().DoReport();
158 |     clock_t finish = clock();
159 |     std::cout << "Average search time(ms): "<<(double)(finish - start) / queries.size() << std::endl;
160 | }
161 | 
162 | int main(int argc, char** argv) {
163 |   SetOptions(argc, argv);
164 |   cout << "Options are set ...\n";
165 |   Points queries;
166 |   if(query_point_type == BVEC) {
167 |     ReadPoints<unsigned char, Coord>(queries_file, &queries, queries_count);
168 |   } else if (query_point_type == FVEC) {
169 |     ReadPoints<float, Coord>(queries_file, &queries, queries_count);
170 |   }
171 |   cout << "Queries are read ...\n";
172 |   vector<vector<PointId> > groundtruth;
173 |   ReadPoints<int, PointId>(groundtruth_file, &groundtruth, queries_count);
174 |   MKL_Set_Num_Threads(1);
175 |   cout << "Groundtruth is read ...\n";
176 |   vector<Centroids> fine_vocabs;
177 |   ReadFineVocabs<float>(fine_vocabs_file, &fine_vocabs);
178 |   if(fine_vocabs.size() == 8) {
179 |     MultiSearcher<RerankADC8, PointId> searcher;
180 |     TestSearcher<MultiSearcher<RerankADC8, PointId> > (searcher, queries, groundtruth);
181 |   } else if(fine_vocabs.size() == 16) {
182 |     MultiSearcher<RerankADC16, PointId> searcher;
183 |     TestSearcher<MultiSearcher<RerankADC16, PointId> > (searcher, queries, groundtruth);
184 |   }
185 |   return 0;
186 | }
187 | 


--------------------------------------------------------------------------------
/sift1M_double_4096.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arbabenko/MultiIndex/bb0578821093f19d9c44a3ed7f50b8930e1d0199/sift1M_double_4096.dat


--------------------------------------------------------------------------------
/sift1M_double_4096_8.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arbabenko/MultiIndex/bb0578821093f19d9c44a3ed7f50b8930e1d0199/sift1M_double_4096_8.dat


--------------------------------------------------------------------------------
/test_searcher_double.sh:
--------------------------------------------------------------------------------
 1 | cd build_master
 2 | ./searcher_tester \
 3 | --coarse_vocabs_file="../sift1M_double_4096.dat" \
 4 | --fine_vocabs_file="../sift1M_double_4096_8.dat" \
 5 | --query_point_type="BVEC" \
 6 | --use_residuals \
 7 | --space_dim=128 \
 8 | --subspaces_centroids_count=1024 \
 9 | --index_files_prefix="/sata/ResearchData/BigAnn/indices/sift1M_double_4096_8" \
10 | --queries_file="/sata/ResearchData/BigAnn/bases/sift1B_queries.bvecs" \
11 | --groundtruth_file="/sata/ResearchData/BigAnn/gnd/sift1M_groundtruth.ivecs" \
12 | --queries_count=500 \
13 | --neighbours_count=10000 \
14 | --report_file="sift1M_4096_8_report.txt" \
15 | --do_rerank
16 | 


--------------------------------------------------------------------------------