├── .gitignore ├── Makefile ├── maketgz ├── CHANGES ├── LICENSE ├── README.md ├── kdtree.hpp ├── demo_kdtree.cpp └── kdtree.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | demo_kdtree 3 | 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # GNU makefile fuer kd-tree demo program 3 | #============================================ 4 | 5 | 6 | # compiler options 7 | #-------------------------------------------- 8 | CC = g++ 9 | #CFLAGS = -Wall -g 10 | CFLAGS = -Wall -O2 11 | LDFLAGS = -lstdc++ 12 | 13 | # project files 14 | #-------------------------------------------- 15 | PROGRAM = demo_kdtree 16 | OBJECTS = demo_kdtree.o kdtree.o 17 | 18 | # rules 19 | #-------------------------------------------- 20 | all: $(PROGRAM) 21 | 22 | $(PROGRAM): $(OBJECTS) 23 | $(CC) -o $@ $+ $(LDFLAGS) 24 | 25 | # generic rule for compiling *.cpp -> *.o 26 | %.o: %.cpp kdtree.hpp 27 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $*.cpp 28 | 29 | clean: 30 | rm -f $(PROGRAM) $(OBJECTS) 31 | 32 | -------------------------------------------------------------------------------- /maketgz: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # maketgz - creates TGZ archive of source code 5 | # 6 | 7 | VERSION="" 8 | 9 | 10 | # 11 | # parse command line arguments 12 | # 13 | USAGEMSG="USAGE: `basename $0` -v version" 14 | while [ $# -gt 0 ] 15 | do 16 | case "$1" in 17 | -v) VERSION="$2"; shift;; 18 | *) echo "$USAGEMSG" 1>&2; exit 1;; 19 | esac 20 | shift 21 | done 22 | 23 | if [ -z "$VERSION" ] 24 | then 25 | echo "$USAGEMSG" 1>&2 26 | exit 1 27 | fi 28 | 29 | # 30 | # build archive 31 | # 32 | DIR=kdtree-$VERSION 33 | 34 | ln -s . $DIR 35 | 36 | tar czvf $DIR.tgz \ 37 | $DIR/README.md $DIR/CHANGES $DIR/LICENSE \ 38 | $DIR/*.cpp $DIR/*.hpp $DIR/Makefile $DIR/maketgz 39 | 40 | # clean up 41 | rm $DIR 42 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | Change log of kd-tree library 2 | ============================= 3 | 4 | Version 1.3 from 2024-01-08 5 | --------------------------- 6 | 7 | - error in searches with maximum distance (disacne_type == 0) fixed 8 | fixes https://github.com/cdalitz/kdtree-cpp/issues/6 9 | (thanks to Juliane Arning for implementing the fix) 10 | 11 | 12 | Version 1.2 from 2022-10-17 13 | --------------------------- 14 | 15 | - search functions k_nearest_neighbors and range_nearest_neighbors are 16 | now thread safe, i.e., they can be called serveral times in parallel 17 | 18 | - new property KdNode::index for optionally storing the point index 19 | 20 | - exception thrown if vector nodes empty in constructor of KdTree 21 | 22 | 23 | Version 1.1 from 2020-12-02 24 | --------------------------- 25 | 26 | - removed "unused argument" warning of some compilers 27 | 28 | 29 | Version 1.0 from 2019-02-11 30 | --------------------------- 31 | 32 | - first release under BSD license 33 | 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright: 2 | * 2018 Christoph Dalitz and Jens Wilberg 3 | Niederrhein University of Applied Sciences, 4 | Institute for Pattern Recognition, 5 | Reinarzstr. 49, 47805 Krefeld, Germany 6 | 7 | 8 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 11 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | C++ Kd-Tree Library 2 | =================== 3 | 4 | This is a standalone C++ version of the kd-tree implementation in the 5 | Gamera framework. It has been extended by a range search and relicensed 6 | by the original author under a BSD style license. The library is 7 | written in plain C++98 and does not depend on any third party libraries. 8 | 9 | A kd-tree is a data structure that allows for nearest neighbor queries 10 | in expected O(log(n)) time. The creation time of a kd-tree is O(n log(n)). 11 | This library offers four additional features not commonly found in kd-tree 12 | implementations: 13 | 14 | - kNN search with optional additional search condition (search predicate) 15 | - support for Euclidean, Manhattan, and Maximum distance and 16 | their weighted extensions (Euclidean is the default distance) 17 | - range query 18 | - nodes can store a pointer to arbitrary data as void* 19 | 20 | Details about the design and usage of the library can be found in 21 | 22 | > C. Dalitz: *Kd-Trees for Document Layout Analysis.* 23 | > In C. Dalitz (Ed.): "Document Image Analysis with the Gamera Framework." 24 | > Schriftenreihe des Fachbereichs Elektrotechnik und Informatik, 25 | > Hochschule Niederrhein, vol. 8, pp. 39-52, Shaker Verlag (2009) 26 | 27 | Please cite this article when using this library in a scientific project. 28 | 29 | 30 | Usage of the library 31 | -------------------- 32 | 33 | The library consists of two files: 34 | 35 | - `kdtree.hpp` contains the class declarations 36 | - `kdtree.cpp` contains the implementation and must be compiled 37 | 38 | The sample program `demo_kdtree.cpp` shows how the following operations 39 | are done: 40 | 41 | - creation of a kd-tree 42 | - k nearest neighbor search 43 | - k nearest neighbor search with additional search predicate 44 | - range query 45 | 46 | *** 47 | **Note:** 48 | The present implementation does *not* use the C++ template mechanism 49 | for storing data points of only a specific dimension defined at compile time. 50 | The dimension is instead determined at *run time* and the data points are 51 | stored as nested vectors. This can slow down memory management considerably 52 | when compiling in debug mode. It is thus advisable, to turn off debug flags 53 | when compiling. 54 | 55 | *** 56 | 57 | Authors & Copyright 58 | ------------------- 59 | 60 | - Christoph Dalitz, 2018-2022, 61 | - Jens Wilberg, 2018 62 | 63 | For licensing information, see the file LICENSE for details. 64 | -------------------------------------------------------------------------------- /kdtree.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __kdtree_HPP 2 | #define __kdtree_HPP 3 | 4 | // 5 | // Kd-Tree implementation. 6 | // 7 | // Copyright: Christoph Dalitz, 2018-2023 8 | // Jens Wilberg, 2018 9 | // Version: 1.3 10 | // License: BSD style license 11 | // (see the file LICENSE for details) 12 | // 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | namespace Kdtree { 19 | 20 | typedef std::vector CoordPoint; 21 | typedef std::vector DoubleVector; 22 | 23 | // for passing points to the constructor of kdtree 24 | struct KdNode { 25 | CoordPoint point; 26 | void* data; 27 | int index; 28 | KdNode(const CoordPoint& p, void* d = NULL, int i = -1) { 29 | point = p; 30 | data = d; 31 | index = i; 32 | } 33 | KdNode() { data = NULL; } 34 | }; 35 | typedef std::vector KdNodeVector; 36 | 37 | // base function object for search predicate in knn search 38 | // returns true when the given KdNode is an admissible neighbor 39 | // To define an own search predicate, derive from this class 40 | // and overwrite the call operator operator() 41 | struct KdNodePredicate { 42 | virtual ~KdNodePredicate() {} 43 | virtual bool operator()(const KdNode&) const { return true; } 44 | }; 45 | 46 | //-------------------------------------------------------- 47 | // private helper classes used internally by KdTree 48 | // 49 | // the internal node structure used by kdtree 50 | class kdtree_node; 51 | // base class for different distance computations 52 | class DistanceMeasure; 53 | // helper class for priority queue in k nearest neighbor search 54 | class nn4heap { 55 | public: 56 | size_t dataindex; // index of actual kdnode in *allnodes* 57 | double distance; // distance of this neighbor from *point* 58 | nn4heap(size_t i, double d) { 59 | dataindex = i; 60 | distance = d; 61 | } 62 | }; 63 | class compare_nn4heap { 64 | public: 65 | bool operator()(const nn4heap& n, const nn4heap& m) { 66 | return (n.distance < m.distance); 67 | } 68 | }; 69 | typedef std::priority_queue, compare_nn4heap> SearchQueue; 70 | //-------------------------------------------------------- 71 | 72 | // kdtree class 73 | class KdTree { 74 | private: 75 | // recursive build of tree 76 | kdtree_node* build_tree(size_t depth, size_t a, size_t b); 77 | // helper variable for keeping track of subtree bounding box 78 | CoordPoint lobound, upbound; 79 | // helper variable to check the distance method 80 | int distance_type; 81 | bool neighbor_search(const CoordPoint& point, kdtree_node* node, size_t k, SearchQueue* neighborheap); 82 | void range_search(const CoordPoint& point, kdtree_node* node, double r, std::vector* range_result); 83 | bool bounds_overlap_ball(const CoordPoint& point, double dist, 84 | kdtree_node* node); 85 | bool ball_within_bounds(const CoordPoint& point, double dist, 86 | kdtree_node* node); 87 | // class implementing the distance computation 88 | DistanceMeasure* distance; 89 | // search predicate in knn searches 90 | KdNodePredicate* searchpredicate; 91 | 92 | public: 93 | KdNodeVector allnodes; 94 | size_t dimension; 95 | kdtree_node* root; 96 | // distance_type can be 0 (max), 1 (city block), or 2 (euklid [squared]) 97 | KdTree(const KdNodeVector* nodes, int distance_type = 2); 98 | ~KdTree(); 99 | void set_distance(int distance_type, const DoubleVector* weights = NULL); 100 | void k_nearest_neighbors(const CoordPoint& point, size_t k, 101 | KdNodeVector* result, KdNodePredicate* pred = NULL); 102 | void range_nearest_neighbors(const CoordPoint& point, double r, 103 | KdNodeVector* result); 104 | }; 105 | 106 | } // end namespace Kdtree 107 | 108 | #endif 109 | -------------------------------------------------------------------------------- /demo_kdtree.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Demonstration of typical use cases of the kd-tree library 3 | // Author: Christoph Dalitz, 2024-01-08 4 | // License: BSD style license (see the file LICENSE) 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "kdtree.hpp" 12 | 13 | using namespace std; 14 | 15 | // 16 | // helper function for printing points 17 | // 18 | void print_nodes(const Kdtree::KdNodeVector &nodes) { 19 | size_t i,j; 20 | for (i = 0; i < nodes.size(); ++i) { 21 | if (i > 0) 22 | cout << " "; 23 | cout << "("; 24 | for (j = 0; j < nodes[i].point.size(); j++) { 25 | if (j > 0) 26 | cout << ","; 27 | cout << nodes[i].point[j]; 28 | } 29 | cout << ")"; 30 | } 31 | cout << endl; 32 | } 33 | 34 | // 35 | // main program demonstrating typical use cases 36 | // 37 | int main(int argc, char** argv) { 38 | 39 | // 40 | // functionality tests 41 | // 42 | cout << "Functionality tests" << endl; 43 | cout << "-------------------" << endl; 44 | 45 | // 1.1) construction of kd-tree 46 | Kdtree::KdNodeVector nodes; 47 | double points[10][2] = {{1, 1}, {2, 1}, {1, 2}, {2, 4}, {3, 4}, 48 | {7, 2}, {8, 3}, {8, 5}, {7, 3}, {7, 3}}; 49 | for (int i = 0; i < 10; ++i) { 50 | std::vector point(2); 51 | point[0] = points[i][0]; 52 | point[1] = points[i][1]; 53 | nodes.push_back(Kdtree::KdNode(point)); 54 | } 55 | Kdtree::KdTree tree(&nodes); 56 | cout << "Points in kd-tree:\n "; 57 | print_nodes(tree.allnodes); 58 | 59 | // 1.2) kNN search 60 | Kdtree::KdNodeVector result; 61 | std::vector test_point(2); 62 | test_point[0] = 8; 63 | test_point[1] = 3; 64 | tree.k_nearest_neighbors(test_point, 3, &result); 65 | cout << "3NNs of (" << test_point[0] << "," << test_point[1] << "):\n "; 66 | print_nodes(result); 67 | 68 | // 1.3) kNN search with predicate 69 | struct Predicate : Kdtree::KdNodePredicate { 70 | std::vector point; 71 | Predicate(std::vector p) { 72 | this->point = p; 73 | } 74 | // only search for points with smaller y-coordinate 75 | bool operator()(const Kdtree::KdNode& kn) const { 76 | return this->point[1] > kn.point[1]; 77 | } 78 | }; 79 | Predicate pred(test_point); 80 | tree.k_nearest_neighbors(test_point, 3, &result, &pred); 81 | cout << "3NNs of (" << test_point[0] << "," << test_point[1] << ") with smaller y-coordinate:\n "; 82 | print_nodes(result); 83 | 84 | // 1.4) range query 85 | test_point[0] = 8; 86 | test_point[1] = 2; 87 | tree.range_nearest_neighbors(test_point, 1.1, &result); 88 | cout << "Neighbors of (" << test_point[0] << "," << test_point[1] << ") with distance <= 1.1:\n "; 89 | print_nodes(result); 90 | // same with maximum distance (distance_type=0) 91 | Kdtree::KdTree tree0(&nodes, 0); 92 | tree0.range_nearest_neighbors(test_point, 1.1, &result); 93 | cout << "Neighbors of (" << test_point[0] << "," << test_point[1] << ") with maximum distance <= 1.1:\n "; 94 | print_nodes(result); 95 | 96 | // 97 | // speed tests 98 | // 99 | cout << "\nSpeed tests" << endl; 100 | cout << "-----------" << endl; 101 | 102 | double diff; 103 | size_t N = 500000; 104 | nodes.clear(); 105 | for (size_t i = 0; i < N; ++i) { 106 | std::vector point(2); 107 | point[0] = (double)rand() / RAND_MAX; 108 | point[1] = (double)rand() / RAND_MAX; 109 | nodes.push_back(Kdtree::KdNode(point)); 110 | } 111 | 112 | // 2.1) build time 113 | clock_t begin = clock(); 114 | Kdtree::KdTree tree2(&nodes); 115 | clock_t end = clock(); 116 | diff = double(end - begin) / CLOCKS_PER_SEC; 117 | cout << "Creation time for " << N << " random points:\n " << diff << "s" << endl; 118 | 119 | // 2.2) one kNN query 120 | test_point[0] = (double)rand() / RAND_MAX; 121 | test_point[1] = (double)rand() / RAND_MAX; 122 | begin = clock(); 123 | tree.k_nearest_neighbors(test_point, 3, &result); 124 | end = clock(); 125 | diff = double(end - begin) / CLOCKS_PER_SEC; 126 | cout << "3NN search time for one random point:\n " << diff << "s" << endl; 127 | 128 | // 2.3) all NN query 129 | begin = clock(); 130 | for (size_t i = 0; i < N; ++i) { 131 | std::vector point(2); 132 | tree.k_nearest_neighbors(nodes[i].point, 2, &result); 133 | } 134 | end = clock(); 135 | diff = double(end - begin) / CLOCKS_PER_SEC; 136 | cout << "Time for all nearest neighbor search:\n " << diff << "s" << endl; 137 | 138 | // 2.4) range query 139 | double r = 0.3; 140 | begin = clock(); 141 | tree.range_nearest_neighbors(test_point, r, &result); 142 | end = clock(); 143 | diff = double(end - begin) / CLOCKS_PER_SEC; 144 | cout << "Range search time for one random point (r=" << r << "):\n " << diff << endl; 145 | 146 | // 2.5) for comparison: loop version 147 | std::vector res; 148 | begin = clock(); 149 | for (size_t i = 0; i < N; ++i) { 150 | if ((nodes[i].point[0] - test_point[0]) * 151 | (nodes[i].point[0] - test_point[0]) + 152 | (nodes[i].point[1] - test_point[1]) * 153 | (nodes[i].point[1] - test_point[1]) < r * r) 154 | res.push_back(nodes[i].point); 155 | } 156 | end = clock(); 157 | diff = double(end - begin) / CLOCKS_PER_SEC; 158 | cout << "Loop version of a single range search for comparison:\n " << diff << "s" << endl; 159 | 160 | return 0; 161 | } 162 | -------------------------------------------------------------------------------- /kdtree.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Kd-Tree implementation. 3 | // 4 | // Copyright: Christoph Dalitz, 2018-2023 5 | // Jens Wilberg, 2018 6 | // Version: 1.3 7 | // License: BSD style license 8 | // (see the file LICENSE for details) 9 | // 10 | 11 | #include "kdtree.hpp" 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | namespace Kdtree { 18 | 19 | //-------------------------------------------------------------- 20 | // function object for comparing only dimension d of two vecotrs 21 | //-------------------------------------------------------------- 22 | class compare_dimension { 23 | public: 24 | compare_dimension(size_t dim) { d = dim; } 25 | bool operator()(const KdNode& p, const KdNode& q) { 26 | return (p.point[d] < q.point[d]); 27 | } 28 | size_t d; 29 | }; 30 | 31 | //-------------------------------------------------------------- 32 | // internal node structure used by kdtree 33 | //-------------------------------------------------------------- 34 | class kdtree_node { 35 | public: 36 | kdtree_node() { 37 | dataindex = cutdim = 0; 38 | loson = hison = (kdtree_node*)NULL; 39 | } 40 | ~kdtree_node() { 41 | if (loson) delete loson; 42 | if (hison) delete hison; 43 | } 44 | // index of node data in kdtree array "allnodes" 45 | size_t dataindex; 46 | // cutting dimension 47 | size_t cutdim; 48 | // value of point 49 | // double cutval; // == point[cutdim] 50 | CoordPoint point; 51 | // roots of the two subtrees 52 | kdtree_node *loson, *hison; 53 | // bounding rectangle of this node's subtree 54 | CoordPoint lobound, upbound; 55 | }; 56 | 57 | //-------------------------------------------------------------- 58 | // different distance metrics 59 | //-------------------------------------------------------------- 60 | class DistanceMeasure { 61 | public: 62 | DistanceMeasure() {} 63 | virtual ~DistanceMeasure() {} 64 | virtual double distance(const CoordPoint& p, const CoordPoint& q) = 0; 65 | virtual double coordinate_distance(double x, double y, size_t dim) = 0; 66 | }; 67 | // Maximum distance (Linfinite norm) 68 | class DistanceL0 : virtual public DistanceMeasure { 69 | DoubleVector* w; 70 | 71 | public: 72 | DistanceL0(const DoubleVector* weights = NULL) { 73 | if (weights) 74 | w = new DoubleVector(*weights); 75 | else 76 | w = (DoubleVector*)NULL; 77 | } 78 | ~DistanceL0() { 79 | if (w) delete w; 80 | } 81 | double distance(const CoordPoint& p, const CoordPoint& q) { 82 | size_t i; 83 | double dist, test; 84 | if (w) { 85 | dist = (*w)[0] * fabs(p[0] - q[0]); 86 | for (i = 1; i < p.size(); i++) { 87 | test = (*w)[i] * fabs(p[i] - q[i]); 88 | if (test > dist) dist = test; 89 | } 90 | } else { 91 | dist = fabs(p[0] - q[0]); 92 | for (i = 1; i < p.size(); i++) { 93 | test = fabs(p[i] - q[i]); 94 | if (test > dist) dist = test; 95 | } 96 | } 97 | return dist; 98 | } 99 | double coordinate_distance(double x, double y, size_t dim) { 100 | if (w) 101 | return (*w)[dim] * fabs(x - y); 102 | else 103 | return fabs(x - y); 104 | } 105 | }; 106 | // Manhatten distance (L1 norm) 107 | class DistanceL1 : virtual public DistanceMeasure { 108 | DoubleVector* w; 109 | 110 | public: 111 | DistanceL1(const DoubleVector* weights = NULL) { 112 | if (weights) 113 | w = new DoubleVector(*weights); 114 | else 115 | w = (DoubleVector*)NULL; 116 | } 117 | ~DistanceL1() { 118 | if (w) delete w; 119 | } 120 | double distance(const CoordPoint& p, const CoordPoint& q) { 121 | size_t i; 122 | double dist = 0.0; 123 | if (w) { 124 | for (i = 0; i < p.size(); i++) dist += (*w)[i] * fabs(p[i] - q[i]); 125 | } else { 126 | for (i = 0; i < p.size(); i++) dist += fabs(p[i] - q[i]); 127 | } 128 | return dist; 129 | } 130 | double coordinate_distance(double x, double y, size_t dim) { 131 | if (w) 132 | return (*w)[dim] * fabs(x - y); 133 | else 134 | return fabs(x - y); 135 | } 136 | }; 137 | // Euklidean distance (L2 norm) (squared) 138 | class DistanceL2 : virtual public DistanceMeasure { 139 | DoubleVector* w; 140 | 141 | public: 142 | DistanceL2(const DoubleVector* weights = NULL) { 143 | if (weights) 144 | w = new DoubleVector(*weights); 145 | else 146 | w = (DoubleVector*)NULL; 147 | } 148 | ~DistanceL2() { 149 | if (w) delete w; 150 | } 151 | double distance(const CoordPoint& p, const CoordPoint& q) { 152 | size_t i; 153 | double dist = 0.0; 154 | if (w) { 155 | for (i = 0; i < p.size(); i++) 156 | dist += (*w)[i] * (p[i] - q[i]) * (p[i] - q[i]); 157 | } else { 158 | for (i = 0; i < p.size(); i++) dist += (p[i] - q[i]) * (p[i] - q[i]); 159 | } 160 | return dist; 161 | } 162 | double coordinate_distance(double x, double y, size_t dim) { 163 | if (w) 164 | return (*w)[dim] * (x - y) * (x - y); 165 | else 166 | return (x - y) * (x - y); 167 | } 168 | }; 169 | 170 | //-------------------------------------------------------------- 171 | // destructor and constructor of kdtree 172 | //-------------------------------------------------------------- 173 | KdTree::~KdTree() { 174 | if (root) delete root; 175 | delete distance; 176 | } 177 | // distance_type can be 0 (Maximum), 1 (Manhatten), or 2 (Euklidean [squared]) 178 | KdTree::KdTree(const KdNodeVector* nodes, int distance_type /*=2*/) { 179 | size_t i, j; 180 | double val; 181 | // copy over input data 182 | if (!nodes || nodes->empty()) 183 | throw std::invalid_argument( 184 | "kdtree::KdTree(): argument nodes must not be empty"); 185 | dimension = nodes->begin()->point.size(); 186 | allnodes = *nodes; 187 | // initialize distance values 188 | distance = NULL; 189 | this->distance_type = -1; 190 | set_distance(distance_type); 191 | // compute global bounding box 192 | lobound = nodes->begin()->point; 193 | upbound = nodes->begin()->point; 194 | for (i = 1; i < nodes->size(); i++) { 195 | for (j = 0; j < dimension; j++) { 196 | val = allnodes[i].point[j]; 197 | if (lobound[j] > val) lobound[j] = val; 198 | if (upbound[j] < val) upbound[j] = val; 199 | } 200 | } 201 | // build tree recursively 202 | root = build_tree(0, 0, allnodes.size()); 203 | } 204 | 205 | // distance_type can be 0 (Maximum), 1 (Manhatten), or 2 (Euklidean [squared]) 206 | void KdTree::set_distance(int distance_type, 207 | const DoubleVector* weights /*=NULL*/) { 208 | if (distance) delete distance; 209 | this->distance_type = distance_type; 210 | if (distance_type == 0) { 211 | distance = (DistanceMeasure*)new DistanceL0(weights); 212 | } else if (distance_type == 1) { 213 | distance = (DistanceMeasure*)new DistanceL1(weights); 214 | } else { 215 | distance = (DistanceMeasure*)new DistanceL2(weights); 216 | } 217 | } 218 | 219 | //-------------------------------------------------------------- 220 | // recursive build of tree 221 | // "a" and "b"-1 are the lower and upper indices 222 | // from "allnodes" from which the subtree is to be built 223 | //-------------------------------------------------------------- 224 | kdtree_node* KdTree::build_tree(size_t depth, size_t a, size_t b) { 225 | size_t m; 226 | double temp, cutval; 227 | kdtree_node* node = new kdtree_node(); 228 | node->lobound = lobound; 229 | node->upbound = upbound; 230 | node->cutdim = depth % dimension; 231 | if (b - a <= 1) { 232 | node->dataindex = a; 233 | node->point = allnodes[a].point; 234 | } else { 235 | m = (a + b) / 2; 236 | std::nth_element(allnodes.begin() + a, allnodes.begin() + m, 237 | allnodes.begin() + b, compare_dimension(node->cutdim)); 238 | node->point = allnodes[m].point; 239 | cutval = allnodes[m].point[node->cutdim]; 240 | node->dataindex = m; 241 | if (m - a > 0) { 242 | temp = upbound[node->cutdim]; 243 | upbound[node->cutdim] = cutval; 244 | node->loson = build_tree(depth + 1, a, m); 245 | upbound[node->cutdim] = temp; 246 | } 247 | if (b - m > 1) { 248 | temp = lobound[node->cutdim]; 249 | lobound[node->cutdim] = cutval; 250 | node->hison = build_tree(depth + 1, m + 1, b); 251 | lobound[node->cutdim] = temp; 252 | } 253 | } 254 | return node; 255 | } 256 | 257 | //-------------------------------------------------------------- 258 | // k nearest neighbor search 259 | // returns the *k* nearest neighbors of *point* in O(log(n)) 260 | // time. The result is returned in *result* and is sorted by 261 | // distance from *point*. 262 | // The optional search predicate is a callable class (aka "functor") 263 | // derived from KdNodePredicate. When Null (default, no search 264 | // predicate is applied). 265 | //-------------------------------------------------------------- 266 | void KdTree::k_nearest_neighbors(const CoordPoint& point, size_t k, 267 | KdNodeVector* result, 268 | KdNodePredicate* pred /*=NULL*/) { 269 | size_t i; 270 | KdNode temp; 271 | searchpredicate = pred; 272 | 273 | result->clear(); 274 | if (k < 1) return; 275 | if (point.size() != dimension) 276 | throw std::invalid_argument( 277 | "kdtree::k_nearest_neighbors(): point must be of same dimension as " 278 | "kdtree"); 279 | 280 | // collect result of k values in neighborheap 281 | //std::priority_queue, compare_nn4heap>* 282 | //neighborheap = new std::priority_queue, compare_nn4heap>(); 283 | SearchQueue* neighborheap = new SearchQueue(); 284 | if (k > allnodes.size()) { 285 | // when more neighbors asked than nodes in tree, return everything 286 | k = allnodes.size(); 287 | for (i = 0; i < k; i++) { 288 | if (!(searchpredicate && !(*searchpredicate)(allnodes[i]))) 289 | neighborheap->push( 290 | nn4heap(i, distance->distance(allnodes[i].point, point))); 291 | } 292 | } else { 293 | neighbor_search(point, root, k, neighborheap); 294 | } 295 | 296 | // copy over result sorted by distance 297 | // (we must revert the vector for ascending order) 298 | while (!neighborheap->empty()) { 299 | i = neighborheap->top().dataindex; 300 | neighborheap->pop(); 301 | result->push_back(allnodes[i]); 302 | } 303 | // beware that less than k results might have been returned 304 | k = result->size(); 305 | for (i = 0; i < k / 2; i++) { 306 | temp = (*result)[i]; 307 | (*result)[i] = (*result)[k - 1 - i]; 308 | (*result)[k - 1 - i] = temp; 309 | } 310 | delete neighborheap; 311 | } 312 | 313 | //-------------------------------------------------------------- 314 | // range nearest neighbor search 315 | // returns the nearest neighbors of *point* in the given range 316 | // *r*. The result is returned in *result* and is sorted by 317 | // distance from *point*. 318 | //-------------------------------------------------------------- 319 | void KdTree::range_nearest_neighbors(const CoordPoint& point, double r, 320 | KdNodeVector* result) { 321 | KdNode temp; 322 | 323 | result->clear(); 324 | if (point.size() != dimension) 325 | throw std::invalid_argument( 326 | "kdtree::k_nearest_neighbors(): point must be of same dimension as " 327 | "kdtree"); 328 | if (this->distance_type == 2) { 329 | // if euclidien distance is used the range must be squared because we 330 | // get squared distances from this implementation 331 | r *= r; 332 | } 333 | 334 | // collect result in range_result 335 | std::vector range_result; 336 | range_search(point, root, r, &range_result); 337 | 338 | // copy over result 339 | for (std::vector::iterator i = range_result.begin(); 340 | i != range_result.end(); ++i) { 341 | result->push_back(allnodes[*i]); 342 | } 343 | 344 | // clear vector 345 | range_result.clear(); 346 | } 347 | 348 | //-------------------------------------------------------------- 349 | // recursive function for nearest neighbor search in subtree 350 | // under *node*. Stores result in *neighborheap*. 351 | // returns "true" when no nearer neighbor elsewhere possible 352 | //-------------------------------------------------------------- 353 | bool KdTree::neighbor_search(const CoordPoint& point, kdtree_node* node, 354 | size_t k, SearchQueue* neighborheap) { 355 | double curdist, dist; 356 | 357 | curdist = distance->distance(point, node->point); 358 | if (!(searchpredicate && !(*searchpredicate)(allnodes[node->dataindex]))) { 359 | if (neighborheap->size() < k) { 360 | neighborheap->push(nn4heap(node->dataindex, curdist)); 361 | } else if (curdist < neighborheap->top().distance) { 362 | neighborheap->pop(); 363 | neighborheap->push(nn4heap(node->dataindex, curdist)); 364 | } 365 | } 366 | // first search on side closer to point 367 | if (point[node->cutdim] < node->point[node->cutdim]) { 368 | if (node->loson) 369 | if (neighbor_search(point, node->loson, k, neighborheap)) return true; 370 | } else { 371 | if (node->hison) 372 | if (neighbor_search(point, node->hison, k, neighborheap)) return true; 373 | } 374 | // second search on farther side, if necessary 375 | if (neighborheap->size() < k) { 376 | dist = std::numeric_limits::max(); 377 | } else { 378 | dist = neighborheap->top().distance; 379 | } 380 | if (point[node->cutdim] < node->point[node->cutdim]) { 381 | if (node->hison && bounds_overlap_ball(point, dist, node->hison)) 382 | if (neighbor_search(point, node->hison, k, neighborheap)) return true; 383 | } else { 384 | if (node->loson && bounds_overlap_ball(point, dist, node->loson)) 385 | if (neighbor_search(point, node->loson, k, neighborheap)) return true; 386 | } 387 | 388 | if (neighborheap->size() == k) dist = neighborheap->top().distance; 389 | return ball_within_bounds(point, dist, node); 390 | } 391 | 392 | //-------------------------------------------------------------- 393 | // recursive function for range search in subtree under *node*. 394 | // Stores result in *range_result*. 395 | //-------------------------------------------------------------- 396 | void KdTree::range_search(const CoordPoint& point, kdtree_node* node, 397 | double r, std::vector* range_result) { 398 | double curdist = distance->distance(point, node->point); 399 | if (curdist <= r) { 400 | range_result->push_back(node->dataindex); 401 | } 402 | if (node->loson != NULL && this->bounds_overlap_ball(point, r, node->loson)) { 403 | range_search(point, node->loson, r, range_result); 404 | } 405 | if (node->hison != NULL && this->bounds_overlap_ball(point, r, node->hison)) { 406 | range_search(point, node->hison, r, range_result); 407 | } 408 | } 409 | 410 | // returns true when the bounds of *node* overlap with the 411 | // ball with radius *dist* around *point* 412 | bool KdTree::bounds_overlap_ball(const CoordPoint& point, double dist, 413 | kdtree_node* node) { 414 | if (distance_type != 0) { 415 | double distsum = 0.0; 416 | size_t i; 417 | for (i = 0; i < dimension; i++) { 418 | if (point[i] < node->lobound[i]) { // lower than low boundary 419 | distsum += distance->coordinate_distance(point[i], node->lobound[i], i); 420 | if (distsum > dist) return false; 421 | } else if (point[i] > node->upbound[i]) { // higher than high boundary 422 | distsum += distance->coordinate_distance(point[i], node->upbound[i], i); 423 | if (distsum > dist) return false; 424 | } 425 | } 426 | return true; 427 | } else { // maximum distance needs different treatment 428 | double max_dist = 0.0; 429 | double curr_dist = 0.0; 430 | size_t i; 431 | for (i = 0; i < dimension; i++) { 432 | if (point[i] < node->lobound[i]) { // lower than low boundary 433 | curr_dist = distance->coordinate_distance(point[i], node->lobound[i], i); 434 | } else if (point[i] > node->upbound[i]) { // higher than high boundary 435 | curr_dist = distance->coordinate_distance(point[i], node->upbound[i], i); 436 | } 437 | if(curr_dist > max_dist) { 438 | max_dist = curr_dist; 439 | } 440 | if (max_dist > dist) return false; 441 | } 442 | return true; 443 | } 444 | } 445 | 446 | // returns true when the bounds of *node* completely contain the 447 | // ball with radius *dist* around *point* 448 | bool KdTree::ball_within_bounds(const CoordPoint& point, double dist, 449 | kdtree_node* node) { 450 | size_t i; 451 | for (i = 0; i < dimension; i++) 452 | if (distance->coordinate_distance(point[i], node->lobound[i], i) <= dist || 453 | distance->coordinate_distance(point[i], node->upbound[i], i) <= dist) 454 | return false; 455 | return true; 456 | } 457 | 458 | } // namespace Kdtree 459 | --------------------------------------------------------------------------------