├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── pom.xml └── src ├── assembly └── benchmark.xml ├── benchmark └── java │ └── com │ └── eatthepath │ └── jvptree │ ├── CartesianDistanceFunction.java │ ├── CartesianPoint.java │ ├── ThresholdSelectionBenchmark.java │ ├── VPTreeConstructionBenchmark.java │ └── VPTreeQueryBenchmark.java ├── main └── java │ ├── com │ └── eatthepath │ │ └── jvptree │ │ ├── DistanceComparator.java │ │ ├── DistanceFunction.java │ │ ├── MetaIterator.java │ │ ├── NearestNeighborCollector.java │ │ ├── PartitionException.java │ │ ├── PointFilter.java │ │ ├── SpatialIndex.java │ │ ├── ThresholdSelectionStrategy.java │ │ ├── VPTree.java │ │ ├── VPTreeNode.java │ │ ├── package-info.java │ │ └── util │ │ ├── MedianDistanceThresholdSelectionStrategy.java │ │ ├── SamplingMedianDistanceThresholdSelectionStrategy.java │ │ └── package-info.java │ └── overview.html └── test └── java └── com └── eatthepath └── jvptree ├── IntegerDistanceFunction.java ├── MetaIteratorTest.java ├── NearestNeighborCollectorTest.java ├── VPTreeNodeTest.java ├── VPTreeTest.java ├── example ├── CartesianDistanceFunction.java ├── CartesianPoint.java ├── ExampleApp.java └── SpaceInvader.java └── util ├── MedianDistanceThresholdSelectionStrategyTest.java └── SamplingMedianDistanceThresholdSelectionStrategyTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Mobile Tools for Java (J2ME) 4 | .mtj.tmp/ 5 | 6 | # Package Files # 7 | *.jar 8 | *.war 9 | *.ear 10 | 11 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 12 | hs_err_pid* 13 | /target 14 | 15 | # Eclipse project files 16 | .classpath 17 | .project 18 | .settings/ 19 | 20 | # IntelliJ project files 21 | .idea/ 22 | *.iml 23 | 24 | # Generated output 25 | doc/ 26 | 27 | # OS detritus 28 | .DS_Store 29 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | jdk: 3 | - openjdk8 4 | - openjdk10 5 | - openjdk11 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Jon Chambers 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/jchambers/jvptree.svg?branch=master)](https://travis-ci.org/jchambers/jvptree) 2 | 3 | # jvptree 4 | 5 | Jvptree is a generic [vantage-point tree](https://en.wikipedia.org/wiki/Vantage-point_tree) implementation written in Java that allows for quick (*O(log(n))*) searches for the nearest neighbors to a given point. Vantage-point trees are binary space partitioning trees that partition points according to their distance from each node's "vantage point." Points that are closer than a chosen threshold go into one child node, while points that are farther away go into the other. Vantage point trees operate on any [metric space](https://en.wikipedia.org/wiki/Metric_space). 6 | 7 | Steve Hanov has written a great and accessible [introducton to vp-trees](http://stevehanov.ca/blog/index.php?id=130). 8 | 9 | ## Getting jvptree 10 | 11 | If you use [Maven](http://maven.apache.org/), you can add jvptree to your project by adding the following dependency declaration to your POM: 12 | 13 | ```xml 14 | 15 | com.eatthepath 16 | jvptree 17 | 0.2 18 | 19 | ``` 20 | 21 | If you don't use Maven, you can download jvptree as a `.jar` file and add it to your project directly. Jvptree has no external dependencies, and works with Java 1.7 and newer. 22 | 23 | ## Major concepts 24 | 25 | The main thing vantage-point trees do is partitioning points into groups that are closer or farther than a given distance threshold. To do that, a vp-tree needs to be able to figure out how far apart any two points are and also decide what to use as a distance threshold. At a minimum, you'll need to provide a distance function that can calculate the distance between points. You may optionally specify a threshold selection strategy; if you don't, a reasonable default will be used. 26 | 27 | ### Distance functions 28 | 29 | You must always specify a [distance function](http://jchambers.github.io/jvptree/apidocs/0.2/com/eatthepath/jvptree/DistanceFunction.html) when creating a vp-tree. Distance functions take two points as arguments and must satisfy the requirements of a metric space, namely: 30 | 31 | - d(x, y) >= 0 32 | - d(x, y) = 0 if and only if x == y 33 | - d(x, y) == d(y, x) 34 | - d(x, z) <= d(x, y) + d(y, z) 35 | 36 | ### Threshold selection strategies 37 | 38 | You may optionally specify a [strategy for choosing a distance threshold](http://jchambers.github.io/jvptree/apidocs/0.2/com/eatthepath/jvptree/ThresholdSelectionStrategy.html) for partitioning. By default, jvptree will use [sampling median strategy](http://jchambers.github.io/jvptree/apidocs/0.2/com/eatthepath/jvptree/util/SamplingMedianDistanceThresholdSelectionStrategy.html), where it will take the median distance from a small subset of the points to partition. Jvptree also includes a [threshold selection strategy that takes the median of *all* points](http://jchambers.github.io/jvptree/apidocs/0.2/com/eatthepath/jvptree/util/MedianDistanceThresholdSelectionStrategy.html) to be partitioned; this is slower, but may result in a more balanced tree. Most users will not need to specify a threshold selection strategy. 39 | 40 | ### Node capacity 41 | 42 | Additionally, you may specify a desired capacity for the tree's leaf nodes. It's worth mentioning early that you almost certainly do not need to worry about this; a reasonable default (32 points) will be used, and most users won't realize significant performance gains by tuning it. 43 | 44 | Still, for those in need, you may choose a desired capacity for leaf nodes in a vp-tree. At one extreme, leaf nodes may contain only a single point. This means that searches will have to traverse more nodes, but once a leaf node is reached, fewer points will need to be searched to find nearest neighbors. 45 | 46 | Using a larger node capacity will result in a "flatter" tree, and fewer nodes will need to be traversed when searching, but more nodes will need to be tested once a search reaches a leaf node. Larger node capacities also lead to less memory overhead because there are fewer nodes in the tree. 47 | 48 | As a general rule of thumb, node capacities should be on the same order of magnitude as your typical search result size. The idea is that if a search reaches a leaf node, most of the points in the node will wind up in the collection of nearest neighbors (i.e. they all would have had to been checked anyhow) and few other nodes will have to be visited to gather any remaining neighbors. 49 | 50 | ## Using jvptree 51 | 52 | As discussed above, you must provide a distance function when creating a vp-tree and may optionally specify a distance threshold selection strategy and leaf node capacity. As a simple example, let's say you're writing a version of [Space Invaders](https://en.wikipedia.org/wiki/Space_Invaders), and you know you'll need to find the closest enemies to the player's position. To start, everything on the playing field will exist at a specific point: 53 | 54 | ```java 55 | public interface CartesianPoint { 56 | double getX(); 57 | double getY(); 58 | } 59 | ``` 60 | 61 | To create a vp-tree, you must provide a distance function that will return the distance between any two given points. In this example, you might create a `CartesianDistanceFunction` class: 62 | 63 | ```java 64 | public class CartesianDistanceFunction implements DistanceFunction { 65 | 66 | public double getDistance(final CartesianPoint firstPoint, final CartesianPoint secondPoint) { 67 | final double deltaX = firstPoint.getX() - secondPoint.getX(); 68 | final double deltaY = firstPoint.getY() - secondPoint.getY(); 69 | 70 | return Math.sqrt((deltaX * deltaX) + (deltaY * deltaY)); 71 | } 72 | } 73 | ``` 74 | 75 | Once you have your distance function, you can create a vp-tree that stores the locations of all of the space invaders on the playing field: 76 | 77 | ```java 78 | final VPTree vpTree = 79 | new VPTree<>(new CartesianDistanceFunction(), enemies); 80 | ``` 81 | 82 | In this case, we provide all of our points at construction time, but you may also create an empty tree and add points later. The `VPTree` class implements Java's [`Collection`](http://docs.oracle.com/javase/7/docs/api/java/util/Collection.html) interface and supports all optional operations. 83 | 84 | Note that a `VPTree` has two generic types: a general "base" point type and a more specific type for the elements actually stored in the tree. You can query the tree using any instance of the base type, but still know that you'll be receiving a list of the more specific type as a result of the query. In our example, this is helpful because the player's location is a cartesian point, but the player is not a space invader. It wouldn't make much sense to create a new space invader at the player's location just to query the vp-tree, and so this construct allows us to query the tree with the player's location instead. 85 | 86 | With your tree created, you can find the closest enemies to the player's position. For example, to find (up to) the ten closest space invaders: 87 | 88 | ```java 89 | final List nearestEnemies = 90 | vpTree.getNearestNeighbors(playerPosition, 10); 91 | ``` 92 | 93 | You could also find all of the enemies that are within firing range of the player: 94 | 95 | ```java 96 | final List enemiesWithinFiringRange = 97 | vpTree.getAllWithinDistance(playerPosition, 4.5); 98 | ``` 99 | 100 | ## License 101 | 102 | Jvptree is available to the public under the [MIT License](http://opensource.org/licenses/MIT). 103 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | 5 | com.eatthepath 6 | jvptree 7 | jar 8 | 0.4.0-SNAPSHOT 9 | jvptree 10 | A generic vp-tree implementation in Java 11 | 12 | 13 | 14 | The MIT License (MIT) 15 | http://opensource.org/licenses/MIT 16 | repo 17 | 18 | 19 | 20 | 21 | org.sonatype.oss 22 | oss-parent 23 | 7 24 | 25 | 26 | 27 | UTF-8 28 | 29 | 30 | 31 | 32 | org.junit.jupiter 33 | junit-jupiter-engine 34 | 5.7.1 35 | test 36 | 37 | 38 | 39 | org.junit.jupiter 40 | junit-jupiter-params 41 | 5.7.1 42 | test 43 | 44 | 45 | 46 | org.openjdk.jmh 47 | jmh-core 48 | 1.21 49 | test 50 | 51 | 52 | 53 | org.openjdk.jmh 54 | jmh-generator-annprocess 55 | 1.21 56 | test 57 | 58 | 59 | 60 | 61 | 62 | 63 | org.apache.maven.plugins 64 | maven-surefire-plugin 65 | 3.0.0-M4 66 | 67 | 68 | 69 | org.apache.maven.plugins 70 | maven-jar-plugin 71 | 3.0.2 72 | 73 | 74 | **/.gitignore 75 | 76 | 77 | 78 | 79 | 80 | 81 | org.codehaus.mojo 82 | build-helper-maven-plugin 83 | 3.0.0 84 | 85 | 86 | add-test-source 87 | generate-test-sources 88 | 89 | add-test-source 90 | 91 | 92 | 93 | src/benchmark/java 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | org.apache.maven.plugins 102 | maven-compiler-plugin 103 | 3.8.1 104 | 105 | 106 | 1.8 107 | 1.8 108 | 109 | 110 | 111 | 112 | 113 | testCompile 114 | 115 | 116 | 117 | 118 | 119 | org.openjdk.jmh 120 | jmh-generator-annprocess 121 | 1.21 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | org.apache.maven.plugins 131 | maven-source-plugin 132 | 2.2.1 133 | 134 | 135 | 136 | attach-sources 137 | 138 | jar 139 | 140 | 141 | 142 | 143 | 144 | 145 | org.apache.maven.plugins 146 | maven-assembly-plugin 147 | 3.1.1 148 | 149 | 150 | 151 | src/assembly/benchmark.xml 152 | 153 | 154 | 155 | 156 | 157 | make-assembly 158 | package 159 | 160 | single 161 | 162 | 163 | true 164 | 165 | 166 | org.openjdk.jmh.Main 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | release-sign-artifacts 179 | 180 | 181 | performRelease 182 | true 183 | 184 | 185 | 186 | 187 | 188 | org.apache.maven.plugins 189 | maven-javadoc-plugin 190 | 2.9.1 191 | 192 | 193 | attach-javadocs 194 | 195 | jar 196 | 197 | 198 | 199 | 200 | 201 | org.apache.maven.plugins 202 | maven-source-plugin 203 | 2.2.1 204 | 205 | 206 | attach-sources 207 | 208 | jar 209 | 210 | 211 | 212 | 213 | 214 | org.apache.maven.plugins 215 | maven-gpg-plugin 216 | 1.1 217 | 218 | 219 | sign-artifacts 220 | verify 221 | 222 | sign 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | jon 235 | Jon Chambers 236 | jon.chambers@gmail.com 237 | https://github.com/jchambers 238 | 239 | developer 240 | 241 | -5 242 | 243 | 244 | 2015 245 | https://github.com/jchambers/jvptree 246 | 247 | scm:git:https://github.com/jchambers/jvptree.git 248 | scm:git:git@github.com:jchambers/jvptree.git 249 | https://github.com/jchambers/jvptree 250 | 251 | 252 | -------------------------------------------------------------------------------- /src/assembly/benchmark.xml: -------------------------------------------------------------------------------- 1 | 3 | benchmark 4 | 5 | jar 6 | 7 | false 8 | 9 | 10 | / 11 | true 12 | true 13 | test 14 | 15 | 16 | 17 | 18 | ${project.build.directory}/test-classes 19 | / 20 | 21 | **/* 22 | 23 | true 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/benchmark/java/com/eatthepath/jvptree/CartesianDistanceFunction.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | import com.eatthepath.jvptree.DistanceFunction; 4 | 5 | public class CartesianDistanceFunction implements DistanceFunction { 6 | 7 | @Override 8 | public double getDistance(final CartesianPoint firstPoint, final CartesianPoint secondPoint) { 9 | final double deltaX = firstPoint.getX() - secondPoint.getX(); 10 | final double deltaY = firstPoint.getY() - secondPoint.getY(); 11 | 12 | return Math.sqrt((deltaX * deltaX) + (deltaY * deltaY)); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/benchmark/java/com/eatthepath/jvptree/CartesianPoint.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | public class CartesianPoint { 4 | private final double x; 5 | private final double y; 6 | 7 | public CartesianPoint(final double x, final double y) { 8 | this.x = x; 9 | this.y = y; 10 | } 11 | 12 | public double getX() { 13 | return this.x; 14 | } 15 | 16 | public double getY() { 17 | return this.y; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/benchmark/java/com/eatthepath/jvptree/ThresholdSelectionBenchmark.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Oracle America, Inc. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * * Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * * Redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * 15 | * * Neither the name of Oracle nor the names of its contributors may be used 16 | * to endorse or promote products derived from this software without 17 | * specific prior written permission. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 23 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 29 | * THE POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | 32 | package com.eatthepath.jvptree; 33 | 34 | import java.util.ArrayList; 35 | import java.util.Collections; 36 | import java.util.List; 37 | import java.util.Random; 38 | 39 | import org.openjdk.jmh.annotations.Benchmark; 40 | import org.openjdk.jmh.annotations.Param; 41 | import org.openjdk.jmh.annotations.Scope; 42 | import org.openjdk.jmh.annotations.Setup; 43 | import org.openjdk.jmh.annotations.State; 44 | 45 | import com.eatthepath.jvptree.util.MedianDistanceThresholdSelectionStrategy; 46 | import com.eatthepath.jvptree.util.SamplingMedianDistanceThresholdSelectionStrategy; 47 | 48 | @State(Scope.Thread) 49 | public class ThresholdSelectionBenchmark { 50 | 51 | @Param({"100000"}) 52 | public int pointCount; 53 | 54 | private List points; 55 | 56 | private final Random random = new Random(); 57 | private final CartesianDistanceFunction distanceFunction = new CartesianDistanceFunction(); 58 | 59 | private final MedianDistanceThresholdSelectionStrategy medianSelectionStrategy = 60 | new MedianDistanceThresholdSelectionStrategy<>(); 61 | 62 | private final SamplingMedianDistanceThresholdSelectionStrategy samplingMedianSelectionStrategy = 63 | new SamplingMedianDistanceThresholdSelectionStrategy<>(100); 64 | 65 | @Setup 66 | public void setUp() { 67 | this.points = new ArrayList<>(this.pointCount); 68 | 69 | for (int i = 0; i < this.pointCount; i++) { 70 | this.points.add(this.createRandomPoint()); 71 | } 72 | } 73 | 74 | @Benchmark 75 | public double benchmarkRandomThresholdSelection() { 76 | final CartesianPoint origin = this.createRandomPoint(); 77 | 78 | return this.distanceFunction.getDistance(origin, this.points.get(this.random.nextInt(this.pointCount))); 79 | } 80 | 81 | @Benchmark 82 | public double benchmarkMedianThresholdSelection() { 83 | final CartesianPoint origin = this.createRandomPoint(); 84 | 85 | return this.medianSelectionStrategy.selectThreshold(this.points, origin, this.distanceFunction); 86 | } 87 | 88 | @Benchmark 89 | public double benchmarkSamplingMedianThresholdSelection() { 90 | final CartesianPoint origin = this.createRandomPoint(); 91 | 92 | return this.samplingMedianSelectionStrategy.selectThreshold(this.points, origin, this.distanceFunction); 93 | } 94 | 95 | @Benchmark 96 | public double benchmarkNaiveMedianThresholdSelection() { 97 | final CartesianPoint origin = this.createRandomPoint(); 98 | 99 | Collections.sort(this.points, new DistanceComparator<>(origin, this.distanceFunction)); 100 | 101 | return this.distanceFunction.getDistance(origin, this.points.get(this.points.size() / 2)); 102 | } 103 | 104 | private CartesianPoint createRandomPoint() { 105 | return new CartesianPoint(this.random.nextDouble(), this.random.nextDouble()); 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/benchmark/java/com/eatthepath/jvptree/VPTreeConstructionBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Random; 6 | 7 | import org.openjdk.jmh.annotations.Benchmark; 8 | import org.openjdk.jmh.annotations.Param; 9 | import org.openjdk.jmh.annotations.Scope; 10 | import org.openjdk.jmh.annotations.Setup; 11 | import org.openjdk.jmh.annotations.State; 12 | 13 | @State(Scope.Thread) 14 | public class VPTreeConstructionBenchmark { 15 | 16 | @Param({"100000"}) 17 | public int pointCount; 18 | 19 | private List points; 20 | 21 | private final Random random = new Random(); 22 | private final CartesianDistanceFunction distanceFunction = new CartesianDistanceFunction(); 23 | 24 | @Setup 25 | public void setUp() { 26 | this.points = new ArrayList<>(this.pointCount); 27 | 28 | for (int i = 0; i < this.pointCount; i++) { 29 | this.points.add(this.createRandomPoint()); 30 | } 31 | } 32 | 33 | @Benchmark 34 | public VPTree benchmarkConstructTreeWithPoints() { 35 | return new VPTree<>(this.distanceFunction, this.points); 36 | } 37 | 38 | @Benchmark 39 | public VPTree benchmarkConstructAndAddPoints() { 40 | final VPTree vptree = new VPTree<>(this.distanceFunction); 41 | vptree.addAll(this.points); 42 | 43 | return vptree; 44 | } 45 | 46 | private CartesianPoint createRandomPoint() { 47 | return new CartesianPoint(this.random.nextDouble(), this.random.nextDouble()); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/benchmark/java/com/eatthepath/jvptree/VPTreeQueryBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.List; 6 | import java.util.Random; 7 | 8 | import org.openjdk.jmh.annotations.Benchmark; 9 | import org.openjdk.jmh.annotations.Param; 10 | import org.openjdk.jmh.annotations.Scope; 11 | import org.openjdk.jmh.annotations.Setup; 12 | import org.openjdk.jmh.annotations.State; 13 | 14 | import com.eatthepath.jvptree.util.SamplingMedianDistanceThresholdSelectionStrategy; 15 | 16 | @State(Scope.Thread) 17 | public class VPTreeQueryBenchmark { 18 | 19 | @Param({"100000"}) 20 | public int pointCount; 21 | 22 | @Param({"2", "16", "128"}) 23 | public int nodeSize; 24 | 25 | @Param({"2", "16", "128"}) 26 | public int resultSetSize; 27 | 28 | private List points; 29 | private VPTree vptree; 30 | 31 | private final Random random = new Random(); 32 | private final CartesianDistanceFunction distanceFunction = new CartesianDistanceFunction(); 33 | 34 | @Setup 35 | public void setUp() { 36 | this.points = new ArrayList<>(this.pointCount); 37 | 38 | for (int i = 0; i < this.pointCount; i++) { 39 | this.points.add(this.createRandomPoint()); 40 | } 41 | 42 | this.vptree = new VPTree<>(this.distanceFunction, 43 | new SamplingMedianDistanceThresholdSelectionStrategy(32), 44 | this.nodeSize, this.points); 45 | } 46 | 47 | @Benchmark 48 | public List benchmarkNaiveSearch() { 49 | Collections.sort(this.points, new DistanceComparator<>(this.createRandomPoint(), this.distanceFunction)); 50 | return this.points.subList(0, this.resultSetSize); 51 | } 52 | 53 | @Benchmark 54 | public List benchmarkQueryTree() { 55 | return this.vptree.getNearestNeighbors(this.createRandomPoint(), this.resultSetSize); 56 | } 57 | 58 | private CartesianPoint createRandomPoint() { 59 | return new CartesianPoint(this.random.nextDouble(), this.random.nextDouble()); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/com/eatthepath/jvptree/DistanceComparator.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | import java.util.Comparator; 4 | 5 | /** 6 | * A {@code Comparator} that orders points by their distance (as determined by a given distance function) from a given 7 | * origin point. 8 | * 9 | * @author Jon Chambers 10 | */ 11 | public class DistanceComparator implements Comparator { 12 | private final T origin; 13 | private final DistanceFunction distanceFunction; 14 | 15 | /** 16 | * Constructs a new distance comparator with the given origin point and distance function. 17 | * 18 | * @param origin the point from which distances to other points will be calculated 19 | * @param distanceFunction the function that calculates the distance between the origin and the given points 20 | */ 21 | public DistanceComparator(final T origin, final DistanceFunction distanceFunction) { 22 | this.origin = origin; 23 | this.distanceFunction = distanceFunction; 24 | } 25 | 26 | /** 27 | * Compares two points by their distance from this distance comparator's origin point. 28 | * 29 | * @param o1 the first point to be compared 30 | * @param o2 the second point to be compared 31 | * 32 | * @return a negative integer if o1 is closer to the origin than o2, a positive integer if o2 is closer to the 33 | * origin than o1, or zero if o1 and o2 are equidistant from the origin 34 | */ 35 | public int compare(final T o1, final T o2) { 36 | return Double.compare( 37 | this.distanceFunction.getDistance(this.origin, o1), 38 | this.distanceFunction.getDistance(this.origin, o2)); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/com/eatthepath/jvptree/DistanceFunction.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | /** 4 | *

A function that calculates the distance between two points. For the purposes of vp-trees, distance functions must 5 | * conform to the rules of a metric space, namely:

6 | * 7 | *
    8 | *
  1. d(x, y) ≥ 0
  2. 9 | *
  3. d(x, y) = 0 if and only if x = y
  4. 10 | *
  5. d(x, y) = d(y, x)
  6. 11 | *
  7. d(x, z) ≤ d(x, y) + d(y, z)
  8. 12 | *
13 | * 14 | * @author Jon Chambers 15 | */ 16 | public interface DistanceFunction { 17 | 18 | /** 19 | * Returns the distance between two points. 20 | * 21 | * @param firstPoint the first point 22 | * @param secondPoint the second point 23 | * 24 | * @return the distance between the two points 25 | */ 26 | double getDistance(T firstPoint, T secondPoint); 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/com/eatthepath/jvptree/MetaIterator.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | import java.util.ArrayDeque; 4 | import java.util.Collection; 5 | import java.util.Deque; 6 | import java.util.Iterator; 7 | import java.util.NoSuchElementException; 8 | 9 | /** 10 | * An iterator that concatenates a number of sub-iterators. 11 | * 12 | * @author Jon Chambers 13 | */ 14 | class MetaIterator implements Iterator { 15 | 16 | private final Deque> iterators; 17 | 18 | /** 19 | * Constructs an iterator that concatenates the contents of the given collection of iterators. 20 | * 21 | * @param iterators the iterators to concatenate 22 | */ 23 | public MetaIterator(final Collection> iterators) { 24 | this.iterators = new ArrayDeque<>(iterators); 25 | } 26 | 27 | /* 28 | * (non-Javadoc) 29 | * @see java.util.Iterator#hasNext() 30 | */ 31 | public boolean hasNext() { 32 | while (!this.iterators.isEmpty()) { 33 | if (this.iterators.peek().hasNext()) { 34 | return true; 35 | } 36 | 37 | this.iterators.pop(); 38 | } 39 | 40 | return false; 41 | } 42 | 43 | /* 44 | * (non-Javadoc) 45 | * @see java.util.Iterator#next() 46 | */ 47 | public E next() { 48 | if (!this.hasNext()) { 49 | throw new NoSuchElementException(); 50 | } 51 | 52 | return this.iterators.peek().next(); 53 | } 54 | 55 | /* 56 | * (non-Javadoc) 57 | * @see java.util.Iterator#remove() 58 | */ 59 | public void remove() { 60 | throw new UnsupportedOperationException(); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/com/eatthepath/jvptree/NearestNeighborCollector.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.PriorityQueue; 6 | 7 | /** 8 | * A utility class that uses a priority queue to efficiently collect results for a k-nearest-neighbors query in a 9 | * vp-tree. 10 | * 11 | * @author Jon Chambers 12 | */ 13 | class NearestNeighborCollector { 14 | private final P queryPoint; 15 | private final int capacity; 16 | 17 | private final DistanceFunction

distanceFunction; 18 | private final DistanceComparator

distanceComparator; 19 | private final PriorityQueue priorityQueue; 20 | 21 | private double distanceToFarthestPoint; 22 | 23 | /** 24 | * Constructs a new nearest neighbor collector that selectively accepts points that are close to the given query 25 | * point as determined by the given distance function. Up to the given number of nearest neighbors are collected, 26 | * and if neighbors are found that are closer than points in the current set, the most distant previously collected 27 | * point is replaced with the closer candidate. 28 | * 29 | * @param queryPoint the point for which nearest neighbors are to be collected 30 | * @param distanceFunction the distance function to be used to determine the distance between the query point and 31 | * potential neighbors 32 | * @param capacity the maximum number of nearest neighbors to collect 33 | */ 34 | public NearestNeighborCollector(final P queryPoint, final DistanceFunction

distanceFunction, final int capacity) { 35 | if (capacity < 1) { 36 | throw new IllegalArgumentException("Capacity must be positive."); 37 | } 38 | 39 | this.queryPoint = queryPoint; 40 | this.distanceFunction = distanceFunction; 41 | this.capacity = capacity; 42 | 43 | this.distanceComparator = new DistanceComparator<>(queryPoint, distanceFunction); 44 | 45 | this.priorityQueue = 46 | new PriorityQueue<>(this.capacity, java.util.Collections.reverseOrder(this.distanceComparator)); 47 | } 48 | 49 | /** 50 | * Returns the query point for this collector. 51 | * 52 | * @return the query point for this collector 53 | */ 54 | public P getQueryPoint() { 55 | return this.queryPoint; 56 | } 57 | 58 | /** 59 | * Offers a point to this collector. The point may or may not be added to the collection; points will only be added 60 | * if the collector is not already full, or if the collector is full, but the offered point is closer to the query 61 | * point than the most distant point already in the collection. 62 | * 63 | * @param point the point to offer to this collector 64 | */ 65 | public void offerPoint(final E point) { 66 | final boolean pointAdded; 67 | 68 | if (this.priorityQueue.size() < this.capacity) { 69 | this.priorityQueue.add(point); 70 | pointAdded = true; 71 | } else { 72 | assert this.priorityQueue.size() > 0; 73 | 74 | final double distanceToNewPoint = this.distanceFunction.getDistance(this.queryPoint, point); 75 | 76 | if (distanceToNewPoint < this.distanceToFarthestPoint) { 77 | this.priorityQueue.poll(); 78 | this.priorityQueue.add(point); 79 | pointAdded = true; 80 | } else { 81 | pointAdded = false; 82 | } 83 | } 84 | 85 | if (pointAdded) { 86 | this.distanceToFarthestPoint = this.distanceFunction.getDistance(this.queryPoint, this.priorityQueue.peek()); 87 | } 88 | } 89 | 90 | /** 91 | * Returns the point retained by this collector that is the farthest from the query point. 92 | * 93 | * @return the point retained by this collector that is the farthest from the query point 94 | */ 95 | public E getFarthestPoint() { 96 | return this.priorityQueue.peek(); 97 | } 98 | 99 | /** 100 | * Returns a list of points retained by this collector, sorted by distance from the query point. 101 | * 102 | * @return a list of points retained by this collector, sorted by distance from the query point 103 | */ 104 | public List toSortedList() { 105 | final ArrayList sortedList = new ArrayList<>(this.priorityQueue); 106 | java.util.Collections.sort(sortedList, this.distanceComparator); 107 | 108 | return sortedList; 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/main/java/com/eatthepath/jvptree/PartitionException.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | /** 4 | * Indicates that a list of points could not be partitioned by distance because either all points are on one side of 5 | * the distance threshold or all points are of equal distance from the pivot point. 6 | * 7 | * @author Jon Chambers 8 | */ 9 | class PartitionException extends Exception { 10 | private static final long serialVersionUID = 1L; 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/com/eatthepath/jvptree/PointFilter.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | /** 4 | * A stateless filter that can determine whether points should be included in a spatial index's result set when 5 | * searching for nearby neighbors. 6 | * 7 | * @param the type of point to which this filter applies 8 | * 9 | * @see SpatialIndex#getNearestNeighbors(Object, int, PointFilter) 10 | * @see SpatialIndex#getAllWithinDistance(Object, double, PointFilter) 11 | */ 12 | public interface PointFilter { 13 | 14 | /** 15 | * Tests whether a point should be included in a spatial index's result set when searching for nearby neighbors. 16 | * 17 | * @param point the point to test 18 | * 19 | * @return {@code true} if the point may be included in the result set or {@code false} if it should be excluded 20 | */ 21 | boolean allowPoint(T point); 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/eatthepath/jvptree/SpatialIndex.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | import java.util.Collection; 4 | import java.util.List; 5 | 6 | /** 7 | * A collection of points that can be searched efficiently to find points near a given query point. A spatial index 8 | * takes two generic types. The first, {@code P}, is the base type of point for which distances can be measured. The 9 | * second, {@code E}, is the specific type of point contained within the index. The two ideas are separated because 10 | * callers may want to use an instance of {@code E} when querying the index. For example, an index that is used to 11 | * search for local businesses might have a base type of {@code GeospatialPoint}, but a specific type of 12 | * {@code HardwareStore}, which implements {@code GeospatialPoint} but has a number of additional required properties. 13 | * By separating the types, callers may realize the benefits of using a specific type when working with elements in the 14 | * index without the need to construct a new {@code HardwareStore} instance when querying points. Instead, they might 15 | * call {@link SpatialIndex#getNearestNeighbors(Object, int)} with a {@code new GeospatialPoint} instead of a much 16 | * heavier {@code HardwareStore}. 17 | * 18 | * @author Jon Chambers 19 | * 20 | * @param

the base type of points between which distances can be measured 21 | * @param the specific type of point contained in this vantage point tree 22 | */ 23 | public interface SpatialIndex extends Collection { 24 | /** 25 | *

Returns a list of the nearest neighbors to a given query point. The returned list is sorted by increasing 26 | * distance from the query point.

27 | * 28 | *

This returned list will contain at most {@code maxResults} elements (and may contain fewer if 29 | * {@code maxResults} is larger than the number of points in the index). If multiple points have the same distance 30 | * from the query point, the order in which they appear in the returned list is undefined. By extension, if multiple 31 | * points have the same distance from the query point and those points would "straddle" the end of the 32 | * returned list, which points are included in the list and which are cut off is not prescribed.

33 | * 34 | * @param queryPoint the point for which to find neighbors 35 | * @param maxResults the maximum length of the returned list 36 | * 37 | * @return a list of the nearest neighbors to the given query point sorted by increasing distance from the query 38 | * point 39 | */ 40 | List getNearestNeighbors(P queryPoint, int maxResults); 41 | 42 | /** 43 | *

Returns a list of the nearest neighbors accepted by the given filter to a given query point. The returned list 44 | * is sorted by increasing distance from the query point.

45 | * 46 | *

This returned list will contain at most {@code maxResults} elements (and may contain fewer if 47 | * {@code maxResults} is larger than the number of points in the index). If multiple points have the same distance 48 | * from the query point, the order in which they appear in the returned list is undefined. By extension, if multiple 49 | * points have the same distance from the query point and those points would "straddle" the end of the 50 | * returned list, which points are included in the list and which are cut off is not prescribed.

51 | * 52 | * @param queryPoint the point for which to find neighbors 53 | * @param maxResults the maximum length of the returned list 54 | * @param filter a filter to apply to each element to determine if it should be included in the list of neighbors 55 | * 56 | * @return a list of the nearest neighbors to the given query point sorted by increasing distance from the query 57 | * point 58 | */ 59 | List getNearestNeighbors(P queryPoint, int maxResults, PointFilter filter); 60 | 61 | /** 62 | * Returns a list of all points within a given distance to a query point. 63 | * 64 | * @param queryPoint the point for which to find neighbors 65 | * @param maxDistance the maximum allowable distance from the query point; points farther away than 66 | * {@code maxDistance} will not be included in the returned list 67 | * 68 | * @return a list of all points within the given distance to the query point; the returned list is sorted in order 69 | * of increasing distance from the query point 70 | */ 71 | List getAllWithinDistance(P queryPoint, double maxDistance); 72 | 73 | /** 74 | * Returns a list of all points within a given distance to a query point that match the given filter. 75 | * 76 | * @param queryPoint the point for which to find neighbors 77 | * @param maxDistance the maximum allowable distance from the query point; points farther away than 78 | * {@code maxDistance} will not be included in the returned list 79 | * @param filter a filter to apply to each element to determine if it should be included in the list of neighbors 80 | * 81 | * @return a list of all points within the given distance to the query point; the returned list is sorted in order 82 | * of increasing distance from the query point 83 | */ 84 | List getAllWithinDistance(P queryPoint, double maxDistance, PointFilter filter); 85 | } 86 | -------------------------------------------------------------------------------- /src/main/java/com/eatthepath/jvptree/ThresholdSelectionStrategy.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * A strategy for choosing a distance threshold for vp-tree nodes. The main feature of vp-trees is that they partition 7 | * collections of points into collections of points that are closer to a given point (the vantage point) than a certain 8 | * threshold or farther away from the vantage point than the threshold. Given a list of points, a 9 | * {@code ThresholdSelectionStrategy} chooses the distance that will be used by a vp-tree node to partition its points. 10 | * 11 | * @author Jon Chambers 12 | */ 13 | public interface ThresholdSelectionStrategy { 14 | 15 | /** 16 | * Chooses a partitioning distance threshold appropriate for the given list of points. Implementations are allowed to 17 | * reorder the list of points, but must not add or remove points from the list. 18 | * 19 | * @param points the points for which to choose a partitioning distance threshold 20 | * @param origin the point from which the threshold distances should be calculated 21 | * @param distanceFunction the function to be used to calculate distances between points 22 | * 23 | * @return a partitioning threshold distance appropriate for the given list of points; ideally, some points should 24 | * be closer to the origin than the returned threshold, and some should be farther 25 | */ 26 | double selectThreshold(List points, P origin, DistanceFunction

distanceFunction); 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/com/eatthepath/jvptree/VPTree.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import java.util.Collections; 6 | import java.util.Iterator; 7 | import java.util.List; 8 | 9 | import com.eatthepath.jvptree.util.SamplingMedianDistanceThresholdSelectionStrategy; 10 | 11 | /** 12 | *

A vantage-point tree (or vp-tree) is a binary space partitioning collection of points in a metric space. The main 13 | * feature of vantage point trees is that they allow for k-nearest-neighbor searches in any metric space in 14 | * O(log(n)) time.

15 | * 16 | *

Vantage point trees recursively partition points by choosing a "vantage point" and a distance threshold; 17 | * points are then partitioned into one collection that contains all of the points closer to the vantage point than the 18 | * chosen threshold and one collection that contains all of the points farther away than the chosen threshold.

19 | * 20 | *

A {@linkplain DistanceFunction distance function} that satisfies the properties of a metric space must be provided 21 | * when constructing a vantage point tree. Callers may also specify a threshold selection strategy (a sampling median 22 | * strategy is used by default) and a node size to tune the ratio of nodes searched to points inspected per node. 23 | * Vantage point trees may be constructed with or without an initial collection of points, though specifying a 24 | * collection of points at construction time is the most efficient approach.

25 | * 26 | * @author Jon Chambers 27 | * 28 | * @param

the base type of points between which distances can be measured 29 | * @param the specific type of point contained in this vantage point tree 30 | */ 31 | public class VPTree implements SpatialIndex { 32 | 33 | private final DistanceFunction

distanceFunction; 34 | private final ThresholdSelectionStrategy thresholdSelectionStrategy; 35 | private final int nodeCapacity; 36 | 37 | private VPTreeNode rootNode; 38 | 39 | public static final int DEFAULT_NODE_CAPACITY = 32; 40 | 41 | private static final PointFilter NO_OP_POINT_FILTER = new PointFilter() { 42 | 43 | @Override 44 | public boolean allowPoint(final Object point) { 45 | return true; 46 | } 47 | }; 48 | 49 | /** 50 | * Constructs a new vp-tree that uses the given distance function and is initially empty. The constructed tree will 51 | * use a default {@link SamplingMedianDistanceThresholdSelectionStrategy} and node capacity 52 | * ({@value VPTree#DEFAULT_NODE_CAPACITY} points). 53 | * 54 | * @param distanceFunction the distance function to use to calculate the distance between points 55 | */ 56 | public VPTree(final DistanceFunction

distanceFunction) { 57 | this(distanceFunction, (Collection) null); 58 | } 59 | 60 | /** 61 | * Constructs a new vp-tree that uses the given distance function and is initially populated with the given 62 | * collection of points. The constructed tree will use a default 63 | * {@link SamplingMedianDistanceThresholdSelectionStrategy} and node capacity 64 | * ({@value VPTree#DEFAULT_NODE_CAPACITY} points). 65 | * 66 | * @param distanceFunction the distance function to use to calculate the distance between points 67 | * @param points the points with which this tree should be initially populated; may be {@code null} 68 | */ 69 | public VPTree(final DistanceFunction

distanceFunction, final Collection points) { 70 | this(distanceFunction, new SamplingMedianDistanceThresholdSelectionStrategy( 71 | SamplingMedianDistanceThresholdSelectionStrategy.DEFAULT_NUMBER_OF_SAMPLES), 72 | VPTree.DEFAULT_NODE_CAPACITY, points); 73 | } 74 | 75 | /** 76 | * Constructs a new vp-tree that uses the given distance function and threshold selection strategy to partition 77 | * points. The tree will be initially empty and will have a default node capacity 78 | * ({@value VPTree#DEFAULT_NODE_CAPACITY} points). 79 | * 80 | * @param distanceFunction the distance function to use to calculate the distance between points 81 | * @param thresholdSelectionStrategy the function to use to choose distance thresholds when partitioning nodes 82 | */ 83 | public VPTree(final DistanceFunction

distanceFunction, final ThresholdSelectionStrategy thresholdSelectionStrategy) { 84 | this(distanceFunction, thresholdSelectionStrategy, VPTree.DEFAULT_NODE_CAPACITY, null); 85 | } 86 | 87 | /** 88 | * Constructs a new vp-tree that uses the given distance function and threshold selection strategy to partition 89 | * points. The tree will be initially populated with the given collection of points and will have a default node 90 | * capacity ({@value VPTree#DEFAULT_NODE_CAPACITY} points). 91 | * 92 | * @param distanceFunction the distance function to use to calculate the distance between points 93 | * @param thresholdSelectionStrategy the function to use to choose distance thresholds when partitioning nodes 94 | * @param points the points with which this tree should be initially populated; may be {@code null} 95 | */ 96 | public VPTree(final DistanceFunction

distanceFunction, final ThresholdSelectionStrategy thresholdSelectionStrategy, final Collection points) { 97 | this(distanceFunction, thresholdSelectionStrategy, VPTree.DEFAULT_NODE_CAPACITY, points); 98 | } 99 | 100 | /** 101 | * Constructs a new vp-tree that uses the given distance function and threshold selection strategy to partition 102 | * points and is initially empty. The tree will attempt to partition nodes that contain more than 103 | * {@code nodeCapacity} points, and will be initially populated with the given collection of points. 104 | * 105 | * @param distanceFunction the distance function to use to calculate the distance between points 106 | * @param thresholdSelectionStrategy the function to use to choose distance thresholds when partitioning nodes 107 | * @param nodeCapacity the largest capacity a node may have before it should be partitioned 108 | */ 109 | public VPTree(final DistanceFunction

distanceFunction, final ThresholdSelectionStrategy thresholdSelectionStrategy, final int nodeCapacity) { 110 | this(distanceFunction, thresholdSelectionStrategy, nodeCapacity, null); 111 | } 112 | 113 | /** 114 | * Constructs a new vp-tree that uses the given distance function and threshold selection strategy to partition 115 | * points. The tree will attempt to partition nodes that contain more than {@code nodeCapacity} points, and will 116 | * be initially populated with the given collection of points. 117 | * 118 | * @param distanceFunction the distance function to use to calculate the distance between points 119 | * @param thresholdSelectionStrategy the function to use to choose distance thresholds when partitioning nodes 120 | * @param nodeCapacity the largest capacity a node may have before it should be partitioned 121 | * @param points the points with which this tree should be initially populated; may be {@code null} 122 | */ 123 | public VPTree(final DistanceFunction

distanceFunction, final ThresholdSelectionStrategy thresholdSelectionStrategy, final int nodeCapacity, final Collection points) { 124 | this.distanceFunction = distanceFunction; 125 | this.thresholdSelectionStrategy = thresholdSelectionStrategy; 126 | this.nodeCapacity = nodeCapacity; 127 | 128 | if (points != null && !points.isEmpty()) { 129 | this.rootNode = new VPTreeNode<>( 130 | points, 131 | this.distanceFunction, 132 | this.thresholdSelectionStrategy, 133 | this.nodeCapacity); 134 | } 135 | } 136 | 137 | /* 138 | * (non-Javadoc) 139 | * @see com.eatthepath.jvptree.SpatialIndex#getNearestNeighbors(java.lang.Object, int) 140 | */ 141 | @Override 142 | public List getNearestNeighbors(final P queryPoint, final int maxResults) { 143 | return this.getNearestNeighbors(queryPoint, maxResults, NO_OP_POINT_FILTER); 144 | } 145 | 146 | @Override 147 | public List getNearestNeighbors(final P queryPoint, final int maxResults, final PointFilter filter) { 148 | final List nearestNeighbors; 149 | 150 | if (this.rootNode == null) { 151 | nearestNeighbors = null; 152 | } else { 153 | final NearestNeighborCollector collector = 154 | new NearestNeighborCollector<>(queryPoint, this.distanceFunction, maxResults); 155 | 156 | this.rootNode.collectNearestNeighbors(collector, filter); 157 | 158 | nearestNeighbors = collector.toSortedList(); 159 | } 160 | 161 | return nearestNeighbors; 162 | } 163 | 164 | /* 165 | * (non-Javadoc) 166 | * @see com.eatthepath.jvptree.SpatialIndex#getAllWithinRange(java.lang.Object, double) 167 | */ 168 | @Override 169 | public List getAllWithinDistance(final P queryPoint, final double maxDistance) { 170 | return this.getAllWithinDistance(queryPoint, maxDistance, NO_OP_POINT_FILTER); 171 | } 172 | 173 | @Override 174 | public List getAllWithinDistance(final P queryPoint, final double maxDistance, final PointFilter filter) { 175 | final List pointsWithinRange; 176 | 177 | if (this.rootNode == null) { 178 | pointsWithinRange = null; 179 | } else { 180 | pointsWithinRange = new ArrayList<>(); 181 | this.rootNode.collectAllWithinDistance(queryPoint, maxDistance, pointsWithinRange, filter); 182 | } 183 | 184 | return pointsWithinRange; 185 | } 186 | 187 | /* 188 | * (non-Javadoc) 189 | * @see java.util.Collection#size() 190 | */ 191 | @Override 192 | public int size() { 193 | return this.rootNode == null ? 0 : this.rootNode.size(); 194 | } 195 | 196 | /* 197 | * (non-Javadoc) 198 | * @see java.util.Collection#isEmpty() 199 | */ 200 | @Override 201 | public boolean isEmpty() { 202 | return this.size() == 0; 203 | } 204 | 205 | /* 206 | * (non-Javadoc) 207 | * @see java.util.Collection#contains(java.lang.Object) 208 | */ 209 | @Override 210 | @SuppressWarnings("unchecked") 211 | public boolean contains(final Object o) { 212 | try { 213 | return this.rootNode == null ? false : this.rootNode.contains((E) o); 214 | } catch (final ClassCastException e) { 215 | return false; 216 | } 217 | } 218 | 219 | /* 220 | * (non-Javadoc) 221 | * @see java.util.Collection#containsAll(java.util.Collection) 222 | */ 223 | @Override 224 | public boolean containsAll(final Collection points) { 225 | for (final Object point : points) { 226 | if (!this.contains(point)) { return false; } 227 | } 228 | 229 | return true; 230 | } 231 | 232 | /* 233 | * (non-Javadoc) 234 | * @see java.util.Collection#iterator() 235 | */ 236 | @Override 237 | public Iterator iterator() { 238 | final ArrayList> iterators = new ArrayList<>(); 239 | 240 | if (this.rootNode != null) { 241 | this.rootNode.collectIterators(iterators); 242 | } 243 | 244 | return new MetaIterator<>(iterators); 245 | } 246 | 247 | /* 248 | * (non-Javadoc) 249 | * @see java.util.Collection#toArray() 250 | */ 251 | @Override 252 | public Object[] toArray() { 253 | final Object[] array = new Object[this.size()]; 254 | 255 | if (this.rootNode != null) { 256 | this.rootNode.addPointsToArray(array, 0); 257 | } 258 | 259 | return array; 260 | } 261 | 262 | /* 263 | * (non-Javadoc) 264 | * @see java.util.Collection#toArray(java.lang.Object[]) 265 | */ 266 | @Override 267 | @SuppressWarnings("unchecked") 268 | public T[] toArray(final T[] array) { 269 | final T[] arrayToPopulate; 270 | 271 | if (array.length < this.size()) { 272 | arrayToPopulate = (T[])java.lang.reflect.Array.newInstance(array.getClass().getComponentType(), this.size()); 273 | } else { 274 | arrayToPopulate = array; 275 | } 276 | 277 | if (this.rootNode != null) { 278 | this.rootNode.addPointsToArray(arrayToPopulate, 0); 279 | } 280 | 281 | return arrayToPopulate; 282 | } 283 | 284 | /* 285 | * (non-Javadoc) 286 | * @see java.util.Collection#add(java.lang.Object) 287 | */ 288 | @Override 289 | public boolean add(final E point) { 290 | return this.addAll(Collections.singletonList(point)); 291 | } 292 | 293 | /* 294 | * (non-Javadoc) 295 | * @see java.util.Collection#addAll(java.util.Collection) 296 | */ 297 | @SuppressWarnings("unchecked") 298 | public boolean addAll(final Collection points) { 299 | // Adding points always modifies a VPTree 300 | final boolean modified = !points.isEmpty(); 301 | 302 | if (this.rootNode == null) { 303 | // We don't need to anneal here because annealing happens automatically as part of node construction 304 | this.rootNode = new VPTreeNode<>( 305 | (Collection) points, 306 | this.distanceFunction, 307 | this.thresholdSelectionStrategy, 308 | this.nodeCapacity); 309 | } else { 310 | for (final E point : points) { 311 | this.rootNode.add(point); 312 | } 313 | 314 | if (modified) { 315 | this.rootNode.anneal(); 316 | } 317 | } 318 | 319 | return modified; 320 | } 321 | 322 | /* 323 | * (non-Javadoc) 324 | * @see java.util.Collection#remove(java.lang.Object) 325 | */ 326 | public boolean remove(final Object point) { 327 | return this.removeAll(Collections.singletonList(point)); 328 | } 329 | 330 | /* 331 | * (non-Javadoc) 332 | * @see java.util.Collection#removeAll(java.util.Collection) 333 | */ 334 | @SuppressWarnings("unchecked") 335 | public boolean removeAll(final Collection points) { 336 | boolean pointRemoved = false; 337 | 338 | if (this.rootNode == null) { 339 | pointRemoved = false; 340 | } else { 341 | for (final Object point : points) { 342 | try { 343 | pointRemoved = this.rootNode.remove((E) point) || pointRemoved; 344 | } catch (final ClassCastException ignored) { 345 | // Ignored; no change to `pointRemoved` 346 | } 347 | } 348 | } 349 | 350 | if (pointRemoved) { 351 | this.rootNode.anneal(); 352 | } 353 | 354 | return pointRemoved; 355 | } 356 | 357 | /* 358 | * (non-Javadoc) 359 | * @see java.util.Collection#retainAll(java.util.Collection) 360 | */ 361 | @Override 362 | public boolean retainAll(final Collection points) { 363 | final boolean modified = this.rootNode == null ? false : this.rootNode.retainAll(points); 364 | 365 | if (modified) { 366 | this.rootNode.anneal(); 367 | } 368 | 369 | return modified; 370 | } 371 | 372 | /* 373 | * (non-Javadoc) 374 | * @see java.util.Collection#clear() 375 | */ 376 | @Override 377 | public void clear() { 378 | this.rootNode = null; 379 | } 380 | } 381 | -------------------------------------------------------------------------------- /src/main/java/com/eatthepath/jvptree/VPTreeNode.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import java.util.Collections; 6 | import java.util.Iterator; 7 | import java.util.List; 8 | import java.util.Random; 9 | 10 | /** 11 | * A single node of a vantage-point tree. Nodes may either be leaf nodes that contain points directly or branch nodes 12 | * that have a "closer than threshold" and "farther than threshold" child node. 13 | * 14 | * @author Jon Chambers 15 | */ 16 | class VPTreeNode { 17 | 18 | private final int capacity; 19 | private final DistanceFunction

distanceFunction; 20 | private final ThresholdSelectionStrategy thresholdSelectionStrategy; 21 | 22 | private ArrayList points; 23 | 24 | private final E vantagePoint; 25 | 26 | private double threshold; 27 | 28 | private VPTreeNode closer; 29 | private VPTreeNode farther; 30 | 31 | /** 32 | * Constructs a new node that contains the given collection of points. If the given collection of points is larger 33 | * than the given maximum capacity, the new node will attempts to partition the collection of points into child 34 | * nodes using the given distance function and threshold selection strategy. 35 | * 36 | * @param points the collection of points to store in or below this node 37 | * @param distanceFunction the distance function to use when partitioning points 38 | * @param thresholdSelectionStrategy the threshold selection strategy to use when selecting points 39 | * @param capacity the desired maximum capacity of this node; this node may contain more points than the given 40 | * capacity if the given collection of points cannot be partitioned (for example, because all of the points are an 41 | * equal distance away from the vantage point) 42 | */ 43 | public VPTreeNode(final Collection points, final DistanceFunction

distanceFunction, 44 | final ThresholdSelectionStrategy thresholdSelectionStrategy, final int capacity) { 45 | 46 | if (capacity < 1) { 47 | throw new IllegalArgumentException("Capacity must be positive."); 48 | } 49 | 50 | if (points.isEmpty()) { 51 | throw new IllegalArgumentException("Cannot create a VPTreeNode with an empty list of points."); 52 | } 53 | 54 | this.capacity = capacity; 55 | this.distanceFunction = distanceFunction; 56 | this.thresholdSelectionStrategy = thresholdSelectionStrategy; 57 | this.points = new ArrayList<>(points); 58 | 59 | // All nodes must have a vantage point; choose one at random from the available points 60 | this.vantagePoint = this.points.get(new Random().nextInt(points.size())); 61 | 62 | this.anneal(); 63 | } 64 | 65 | protected void anneal() { 66 | if (this.points == null) { 67 | final int closerSize = this.closer.size(); 68 | final int fartherSize = this.farther.size(); 69 | 70 | if (closerSize == 0 || fartherSize == 0) { 71 | // One of the child nodes has become empty, and needs to be pruned. 72 | this.points = new ArrayList<>(closerSize + fartherSize); 73 | this.addAllPointsToCollection(this.points); 74 | 75 | this.closer = null; 76 | this.farther = null; 77 | 78 | this.anneal(); 79 | } else { 80 | this.closer.anneal(); 81 | this.farther.anneal(); 82 | } 83 | } else { 84 | if (this.points.size() > this.capacity) { 85 | // Partially sort the list such that all points closer than or equal to the threshold distance from the 86 | // vantage point come before the threshold point in the list and all points farther away come after the 87 | // threshold point. 88 | this.threshold = this.thresholdSelectionStrategy.selectThreshold(this.points, this.vantagePoint, this.distanceFunction); 89 | 90 | try { 91 | final int firstIndexPastThreshold = 92 | VPTreeNode.partitionPoints(this.points, this.vantagePoint, this.threshold, this.distanceFunction); 93 | 94 | this.closer = new VPTreeNode<>(this.points.subList(0, firstIndexPastThreshold), this.distanceFunction, this.thresholdSelectionStrategy, this.capacity); 95 | this.farther = new VPTreeNode<>(this.points.subList(firstIndexPastThreshold, this.points.size()), this.distanceFunction, this.thresholdSelectionStrategy, this.capacity); 96 | 97 | this.points = null; 98 | } catch (final PartitionException e) { 99 | // We couldn't partition the list, so just store all of the points in this node 100 | this.closer = null; 101 | this.farther = null; 102 | } 103 | } 104 | } 105 | } 106 | 107 | /** 108 | * Returns the number of points stored in this node and its children. 109 | * 110 | * @return the number of points stored in this node and its children 111 | */ 112 | public int size() { 113 | if (this.points == null) { 114 | return this.closer.size() + this.farther.size(); 115 | } else { 116 | return this.points.size(); 117 | } 118 | } 119 | 120 | /** 121 | * Adds a point to this node or one of its children. If this node is a leaf node and the addition of the new point 122 | * increases the size of the node beyond its desired capacity, the node will attempt to partition its points into 123 | * two child nodes. 124 | * 125 | * @param point the point to add to this node 126 | */ 127 | public void add(final E point) { 128 | if (this.points == null) { 129 | // This is not a leaf node; pass this point on to the appropriate child 130 | this.getChildNodeForPoint(point).add(point); 131 | } else { 132 | this.points.add(point); 133 | } 134 | } 135 | 136 | /** 137 | * Removes a point from this node (if it is a leaf node) or one of its children. If the removal of the point would 138 | * result in an empty node, the empty node's parent will absorb and re-partition all points from all child nodes. 139 | * 140 | * @param point the point to remove from this node or one of its children 141 | * @return {@code true} if a points was removed or {@code false} otherwise 142 | */ 143 | public boolean remove(final E point) { 144 | final boolean modified; 145 | 146 | if (this.points == null) { 147 | // This is not a leaf node; try to remove the point from an appropriate child node 148 | modified = this.getChildNodeForPoint(point).remove(point); 149 | } else { 150 | modified = this.points.remove(point); 151 | } 152 | 153 | return modified; 154 | } 155 | 156 | /** 157 | * Removes all from this node and its children that are not in the given collection of points. If the removal of a 158 | * point would result in an empty node, the empty node's parent will absorb and re-partition all points from all 159 | * child nodes. 160 | * 161 | * @param points the collection of points to retain 162 | * 163 | * @return {@code true} if any points were removed from this node or one of its children as a result of this 164 | * operation or {@code false} otherwise 165 | */ 166 | public boolean retainAll(final Collection points) { 167 | final boolean modified; 168 | 169 | if (this.points == null) { 170 | final boolean modifiedCloser = this.closer.retainAll(points); 171 | final boolean modifiedFarther = this.farther.retainAll(points); 172 | 173 | modified = modifiedCloser || modifiedFarther; 174 | } else { 175 | modified = this.points.retainAll(points); 176 | } 177 | 178 | return modified; 179 | } 180 | 181 | /** 182 | * Tests whether this node or one of its children contains the given point. 183 | * 184 | * @param point the point to check 185 | * 186 | * @return {@code true} if this node or one of its children contains the given point or {@code false} otherwise 187 | */ 188 | public boolean contains(final E point) { 189 | return this.points == null ? this.getChildNodeForPoint(point).contains(point) : this.points.contains(point); 190 | } 191 | 192 | public void collectNearestNeighbors(final NearestNeighborCollector collector, final PointFilter filter) { 193 | if (this.points == null) { 194 | final VPTreeNode firstNodeSearched = this.getChildNodeForPoint(collector.getQueryPoint()); 195 | firstNodeSearched.collectNearestNeighbors(collector, filter); 196 | 197 | final double distanceFromVantagePointToQueryPoint = 198 | this.distanceFunction.getDistance(this.vantagePoint, collector.getQueryPoint()); 199 | 200 | final double distanceFromQueryPointToFarthestPoint = 201 | this.distanceFunction.getDistance(collector.getQueryPoint(), collector.getFarthestPoint()); 202 | 203 | if (firstNodeSearched == this.closer) { 204 | // We've already searched the node that contains points within this node's threshold. We also want to 205 | // search the farther node if the distance from the query point to the most distant point in the 206 | // neighbor collector is greater than the distance from the query point to this node's threshold, since 207 | // there could be a point outside of this node that's closer than the most distant neighbor we've found 208 | // so far. 209 | 210 | final double distanceFromQueryPointToThreshold = this.threshold - distanceFromVantagePointToQueryPoint; 211 | 212 | if (distanceFromQueryPointToFarthestPoint > distanceFromQueryPointToThreshold) { 213 | this.farther.collectNearestNeighbors(collector, filter); 214 | } 215 | } else { 216 | // We've already searched the node that contains points beyond this node's threshold. We want to search 217 | // the within-threshold node if it's "easier" to get from the query point to this node's region than it 218 | // is to get from the query point to the most distant match, since there could be a point within this 219 | // node's threshold that's closer than the most distant match. 220 | final double distanceFromQueryPointToThreshold = distanceFromVantagePointToQueryPoint - this.threshold; 221 | 222 | if(distanceFromQueryPointToThreshold <= distanceFromQueryPointToFarthestPoint) { 223 | this.closer.collectNearestNeighbors(collector, filter); 224 | } 225 | } 226 | } else { 227 | for (final E point : this.points) { 228 | if (filter.allowPoint(point)) { 229 | collector.offerPoint(point); 230 | } 231 | } 232 | } 233 | } 234 | 235 | /** 236 | * Gathers all points within a given maximum distance of the given query point into the given collection. 237 | * 238 | * @param queryPoint the point from which to measure distance to other points 239 | * @param maxDistance the distance within which to collect points 240 | * @param collection the collection to which points within the maximum distance should be added 241 | */ 242 | public void collectAllWithinDistance(final P queryPoint, final double maxDistance, final Collection collection, final PointFilter filter) { 243 | if (this.points == null) { 244 | final double distanceFromVantagePointToQueryPoint = 245 | this.distanceFunction.getDistance(this.vantagePoint, queryPoint); 246 | 247 | // We want to search any of this node's children that intersect with the query region 248 | if (distanceFromVantagePointToQueryPoint <= this.threshold + maxDistance) { 249 | this.closer.collectAllWithinDistance(queryPoint, maxDistance, collection, filter); 250 | } 251 | 252 | if (distanceFromVantagePointToQueryPoint + maxDistance > this.threshold) { 253 | this.farther.collectAllWithinDistance(queryPoint, maxDistance, collection, filter); 254 | } 255 | } else { 256 | for (final E point : this.points) { 257 | if (this.distanceFunction.getDistance(queryPoint, point) <= maxDistance) { 258 | if (filter.allowPoint(point)) { 259 | collection.add(point); 260 | } 261 | } 262 | } 263 | } 264 | } 265 | 266 | /** 267 | * Returns the child node (either the closer node or farther node) that would contain the given point given its 268 | * distance from this node's vantage point. 269 | * 270 | * @param point the point for which to choose an appropriate child node; the point need not actually exist within 271 | * either child node 272 | * 273 | * @return this node's "closer" child node if the given point is within this node's distance threshold of the 274 | * vantage point or the "farther" node otherwise 275 | */ 276 | private VPTreeNode getChildNodeForPoint(final P point) { 277 | return this.distanceFunction.getDistance(this.vantagePoint, point) <= this.threshold ? this.closer : this.farther; 278 | } 279 | 280 | /** 281 | * Adds all points contained by this node and its children to the given collection. 282 | * 283 | * @param collection the collection to which points should be added. 284 | */ 285 | private void addAllPointsToCollection(final Collection collection) { 286 | if (this.points == null) { 287 | this.closer.addAllPointsToCollection(collection); 288 | this.farther.addAllPointsToCollection(collection); 289 | } else { 290 | collection.addAll(this.points); 291 | } 292 | } 293 | 294 | /** 295 | * Adds all points contained by this node and its children to the given array. 296 | * 297 | * @param array the array to which points should be added 298 | * @param offset the starting index at which to add points to the array 299 | * 300 | * @return the number of points added to the array 301 | */ 302 | public int addPointsToArray(final Object[] array, final int offset) { 303 | final int pointsAdded; 304 | 305 | if (this.points == null) { 306 | final int pointsAddedFromCloserNode = this.closer.addPointsToArray(array, offset); 307 | final int pointsAddedFromFartherNode = this.farther.addPointsToArray(array, offset + pointsAddedFromCloserNode); 308 | 309 | pointsAdded = pointsAddedFromCloserNode + pointsAddedFromFartherNode; 310 | } else { 311 | System.arraycopy(this.points.toArray(), 0, array, offset, this.points.size()); 312 | pointsAdded = this.points.size(); 313 | } 314 | 315 | return pointsAdded; 316 | } 317 | 318 | /** 319 | * Recursively gathers iterators that span the points contained in this node and its children into the given 320 | * collection. 321 | * 322 | * @param collection the collection to which iterators should be added 323 | */ 324 | public void collectIterators(final Collection> collection) { 325 | if (this.points == null) { 326 | this.closer.collectIterators(collection); 327 | this.farther.collectIterators(collection); 328 | } else { 329 | collection.add(this.points.iterator()); 330 | } 331 | } 332 | 333 | /** 334 | * Partitions the points in the given list such that all points that fall within the given distance threshold of the 335 | * given vantage point are on one "side" of the list and all points beyond the threshold are on the other. 336 | * 337 | * @param points the list of points to partition 338 | * @param vantagePoint the point from which to measure distances 339 | * @param threshold the distance threshold to be used for partitioning 340 | * @param distanceFunction the function to use to calculate distances from the vantage point 341 | * @return the index of the first point in the list that falls beyond the distance threshold 342 | * 343 | * @throws PartitionException if the list of points could not be partitioned (i.e. because they are all the same 344 | * distance from the vantage point). 345 | */ 346 | private static int partitionPoints(final List points, final E vantagePoint, final double threshold, final DistanceFunction distanceFunction) throws PartitionException { 347 | int i = 0; 348 | int j = points.size() - 1; 349 | 350 | // This is, essentially, a single swapping quicksort iteration 351 | for (; i <= j; i++) { 352 | if (distanceFunction.getDistance(vantagePoint, points.get(i)) > threshold) { 353 | for (; j >= i; j--) { 354 | if (distanceFunction.getDistance(vantagePoint, points.get(j)) <= threshold) { 355 | Collections.swap(points, i, j--); 356 | break; 357 | } 358 | } 359 | } 360 | } 361 | 362 | final int firstIndexPastThreshold = distanceFunction.getDistance(vantagePoint, points.get(i - 1)) > threshold ? i - 1 : i; 363 | 364 | if (distanceFunction.getDistance(vantagePoint, points.get(0)) <= threshold && 365 | distanceFunction.getDistance(vantagePoint, points.get(points.size() - 1)) > threshold) { 366 | 367 | return firstIndexPastThreshold; 368 | } else { 369 | throw new PartitionException(); 370 | } 371 | } 372 | } 373 | -------------------------------------------------------------------------------- /src/main/java/com/eatthepath/jvptree/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Contains core classes and interfaces for working with vantage-point trees. 3 | * 4 | * @author Jon Chambers 5 | */ 6 | package com.eatthepath.jvptree; 7 | -------------------------------------------------------------------------------- /src/main/java/com/eatthepath/jvptree/util/MedianDistanceThresholdSelectionStrategy.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree.util; 2 | 3 | import java.util.List; 4 | import java.util.Random; 5 | 6 | import com.eatthepath.jvptree.DistanceFunction; 7 | import com.eatthepath.jvptree.ThresholdSelectionStrategy; 8 | 9 | /** 10 | * A threshold distance selection strategy that uses the median distance from the origin as the threshold. 11 | * 12 | * @author Jon Chambers 13 | */ 14 | public class MedianDistanceThresholdSelectionStrategy implements ThresholdSelectionStrategy { 15 | 16 | /** 17 | * Returns the median distance of the given points from the given origin. This method will partially sort the list 18 | * of points in the process. 19 | * 20 | * @param points the list of points from which a median distance will be chosen 21 | * @param origin the point from which distances to other points will be calculated 22 | * @param distanceFunction the function to be used to calculate the distance between the origin and other points 23 | * 24 | * @return the median distance from the origin to the given list of points 25 | * 26 | * @throws IllegalArgumentException if the given list of points is empty 27 | */ 28 | public double selectThreshold(final List points, final P origin, final DistanceFunction

distanceFunction) { 29 | if (points.isEmpty()) { 30 | throw new IllegalArgumentException("Point list must not be empty."); 31 | } 32 | 33 | int left = 0; 34 | int right = points.size() - 1; 35 | 36 | final int medianIndex = points.size() / 2; 37 | final Random random = new Random(); 38 | 39 | // The strategy here is to use quickselect (https://en.wikipedia.org/wiki/Quickselect) to recursively partition 40 | // the parts of a list on one side of a pivot, working our way toward the center of the list. 41 | while (left != right) { 42 | final int pivotIndex = left + (right - left == 0 ? 0 : random.nextInt(right - left)); 43 | final double pivotDistance = distanceFunction.getDistance(origin, points.get(pivotIndex)); 44 | 45 | // Temporarily move the pivot point all the way out to the end of this section of the list 46 | java.util.Collections.swap(points, pivotIndex, right); 47 | 48 | int storeIndex = left; 49 | 50 | for (int i = left; i < right; i++) { 51 | if (distanceFunction.getDistance(origin, points.get(i)) < pivotDistance) { 52 | java.util.Collections.swap(points, storeIndex++, i); 53 | } 54 | } 55 | 56 | // ...and now bring that original pivot point back to its rightful place. 57 | java.util.Collections.swap(points, right, storeIndex); 58 | 59 | if (storeIndex == medianIndex) { 60 | // Mission accomplished; we've placed the point that should rightfully be at the median index 61 | break; 62 | } else if (storeIndex < medianIndex) { 63 | // We need to work on the section of the list to the right of the pivot 64 | left = storeIndex + 1; 65 | } else { 66 | // We need to work on the section of the list to the left of the pivot 67 | right = storeIndex - 1; 68 | } 69 | } 70 | 71 | return distanceFunction.getDistance(origin, points.get(medianIndex)); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/com/eatthepath/jvptree/util/SamplingMedianDistanceThresholdSelectionStrategy.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree.util; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import com.eatthepath.jvptree.DistanceFunction; 7 | import com.eatthepath.jvptree.ThresholdSelectionStrategy; 8 | 9 | /** 10 | * A threshold distance selection strategy that uses the median distance from the origin to a subset of the given list 11 | * of points as the threshold. 12 | * 13 | * @author Jon Chambers 14 | */ 15 | public class SamplingMedianDistanceThresholdSelectionStrategy extends MedianDistanceThresholdSelectionStrategy implements ThresholdSelectionStrategy { 16 | 17 | private final int numberOfSamples; 18 | 19 | public static final int DEFAULT_NUMBER_OF_SAMPLES = 32; 20 | 21 | /** 22 | * Constructs a threshold selector that uses up to a default ({@value DEFAULT_NUMBER_OF_SAMPLES}) number of samples 23 | * from a list of points to choose a median distance. 24 | */ 25 | public SamplingMedianDistanceThresholdSelectionStrategy() { 26 | this(DEFAULT_NUMBER_OF_SAMPLES); 27 | } 28 | 29 | /** 30 | * Constructs a threshold selector that uses up to the given number of samples from a list of points to choose a 31 | * median distance. 32 | * 33 | * @param numberOfSamples the maximum number of samples to use when choosing a median distance 34 | */ 35 | public SamplingMedianDistanceThresholdSelectionStrategy(final int numberOfSamples) { 36 | this.numberOfSamples = numberOfSamples; 37 | } 38 | 39 | /** 40 | * Returns the median distance of a subset of the given points from the given origin. The given list of points may 41 | * be partially sorted in the process. 42 | * 43 | * @param points the list of points from which a median distance will be chosen 44 | * @param origin the point from which distances to other points will be calculated 45 | * @param distanceFunction the function to be used to calculate the distance between the origin and other points 46 | * 47 | * @return the median distance from the origin to the given list of points 48 | */ 49 | @Override 50 | public double selectThreshold(final List points, final P origin, final DistanceFunction

distanceFunction) { 51 | return super.selectThreshold(this.getSampledPoints(points), origin, distanceFunction); 52 | } 53 | 54 | /** 55 | * Chooses a subset of points from which to calculate a median by sampling the given list. 56 | * 57 | * @param points the points from which to choose a subset of points 58 | * 59 | * @return a list containing at most the number of points chosen at construction time 60 | */ 61 | List getSampledPoints(final List points) { 62 | final List sampledPoints; 63 | 64 | if (points.size() > this.numberOfSamples) { 65 | sampledPoints = new ArrayList<>(this.numberOfSamples); 66 | final int step = points.size() / this.numberOfSamples; 67 | 68 | for (int i = 0; i < this.numberOfSamples; i++) { 69 | sampledPoints.add(points.get(i * step)); 70 | } 71 | } else { 72 | sampledPoints = points; 73 | } 74 | 75 | return sampledPoints; 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/main/java/com/eatthepath/jvptree/util/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Contains utility classes and concrete implementations of threshold selection strategies. 3 | * 4 | * @author Jon Chambers 5 | */ 6 | package com.eatthepath.jvptree.util; 7 | -------------------------------------------------------------------------------- /src/main/java/overview.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Jvptree overview 6 | 7 | 8 | 9 |

com.eatthepath.jvptree

10 | 11 |

Jvptree is a generic vantage-point tree implementation that allows for quick (O(log(n))) searches for the nearest neighbors to a given point. Vantage-point trees are binary space partitioning trees that partition points according to their distance from each node's "vantage point." Points that are closer than a chosen threshold go into one child node, while points that are farther away go into the other. Vantage point trees operate on any metric space.

12 | 13 |

Major concepts

14 | 15 |

The main thing vantage-point trees do is partitioning points into groups that are closer or farther than a given distance threshold. To do that, a vp-tree needs to be able to figure out how far apart any two points are and also decide what to use as a distance threshold. At a minimum, you'll need to provide a distance function that can calculate the distance between points. You may optionally specify a threshold selection strategy; if you don't, a reasonable default will be used.

16 | 17 |

Distance functions

18 | 19 |

You must always specify a distance function when creating a vp-tree. Distance functions take two points as arguments and must satisfy the requirements of a metric space, namely:

20 | 21 |
    22 |
  • d(x, y) >= 0
  • 23 |
  • d(x, y) = 0 if and only if x == y
  • 24 |
  • d(x, y) == d(y, x)
  • 25 |
  • d(x, z) <= d(x, y) + d(y, z)
  • 26 |
27 | 28 |

Threshold selection strategies

29 | 30 |

You may optionally specify a strategy for choosing a distance threshold for partitioning. By default, com.eatthepath.jvptree will use sampling median strategy, where it will take the median distance from a small subset of the points to partition. Jvptree also includes a threshold selection strategy that takes the median of all points to be partitioned; this is slower, but may result in a more balanced tree. Most users will not need to specify a threshold selection strategy.

31 | 32 |

Node capacity

33 | 34 |

Additionally, you may specify a desired capacity for the tree's leaf nodes. It's worth mentioning early that you almost certainly do not need to worry about this; a reasonable default (32 points) will be used, and most users won't realize significant performance gains by tuning it.

35 | 36 |

Still, for those in need, you may choose a desired capacity for leaf nodes in a vp-tree. At one extreme, leaf nodes may contain only a single point. This means that searches will have to traverse more nodes, but once a leaf node is reached, fewer points will need to be searched to find nearest neighbors.

37 | 38 |

Using a larger node capacity will result in a "flatter" tree, and fewer nodes will need to be traversed when searching, but more nodes will need to be tested once a search reaches a leaf node. Larger node capacities also lead to less memory overhead because there are fewer nodes in the tree.

39 | 40 |

As a general rule of thumb, node capacities should be on the same order of magnitude as your typical search result size. The idea is that if a search reaches a leaf node, most of the points in the node will wind up in the collection of nearest neighbors (i.e. they all would have had to been checked anyhow) and few other nodes will have to be visited to gather any remaining neighbors.

41 | 42 |

License

43 | 44 |

Jvptree is available to the public under the MIT License.

45 | 46 | 47 | -------------------------------------------------------------------------------- /src/test/java/com/eatthepath/jvptree/IntegerDistanceFunction.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | public class IntegerDistanceFunction implements DistanceFunction { 4 | 5 | public double getDistance(final Number firstPoint, final Number secondPoint) { 6 | return Math.abs(firstPoint.intValue() - secondPoint.intValue()); 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src/test/java/com/eatthepath/jvptree/MetaIteratorTest.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import java.util.*; 6 | 7 | import static org.junit.jupiter.api.Assertions.*; 8 | 9 | public class MetaIteratorTest { 10 | 11 | @Test 12 | void testHasNextAndNext() { 13 | { 14 | final MetaIterator emptyIterator = new MetaIterator<>(Collections.emptyList()); 15 | 16 | assertFalse(emptyIterator.hasNext(), "Empty iterators should not have a next element."); 17 | 18 | assertThrows(NoSuchElementException.class, emptyIterator::next, 19 | "Empty iterators should throw NoSuchElementException for next element"); 20 | } 21 | 22 | { 23 | final List integers = Arrays.asList(1, 2, 3); 24 | 25 | final MetaIterator singleIterator = 26 | new MetaIterator<>(Collections.singletonList(integers.iterator())); 27 | 28 | final ArrayList integersFromIterator = new ArrayList<>(); 29 | 30 | while (singleIterator.hasNext()) { 31 | integersFromIterator.add(singleIterator.next()); 32 | } 33 | 34 | assertEquals(integers, integersFromIterator, "Elements from iterator should match initial elements."); 35 | } 36 | 37 | { 38 | final List firstIntegers = Arrays.asList(1, 2, 3); 39 | final List emptyList = Collections.emptyList(); 40 | final List secondIntegers = Arrays.asList(4, 5, 6); 41 | 42 | @SuppressWarnings("RedundantOperationOnEmptyContainer") final List> iterators = 43 | Arrays.asList(firstIntegers.iterator(), emptyList.iterator(), secondIntegers.iterator()); 44 | 45 | final MetaIterator multipleIterator = new MetaIterator<>(iterators); 46 | 47 | final List integersFromIterator = new ArrayList<>(); 48 | 49 | while (multipleIterator.hasNext()) { 50 | integersFromIterator.add(multipleIterator.next()); 51 | } 52 | 53 | final List combinedList = new ArrayList<>(); 54 | combinedList.addAll(firstIntegers); 55 | combinedList.addAll(emptyList); 56 | combinedList.addAll(secondIntegers); 57 | 58 | assertEquals(combinedList, integersFromIterator, "Elements from iterator should match initial elements."); 59 | } 60 | } 61 | 62 | @Test 63 | void testRemove() { 64 | assertThrows(UnsupportedOperationException.class, () -> new MetaIterator<>(Collections.emptyList()).remove()); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/test/java/com/eatthepath/jvptree/NearestNeighborCollectorTest.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | import java.util.ArrayList; 4 | 5 | import org.junit.jupiter.api.BeforeEach; 6 | import org.junit.jupiter.api.Test; 7 | 8 | import static org.junit.jupiter.api.Assertions.*; 9 | 10 | public class NearestNeighborCollectorTest { 11 | 12 | private NearestNeighborCollector collector; 13 | 14 | private static final int CAPACITY = 5; 15 | 16 | @BeforeEach 17 | void setup() { 18 | this.collector = new NearestNeighborCollector<>(0, new IntegerDistanceFunction(), CAPACITY); 19 | } 20 | 21 | @Test 22 | void testOfferPointAndGetFarthestPoint() { 23 | assertNull(this.collector.getFarthestPoint(), "Farthest point in an empty collector should be null."); 24 | 25 | this.collector.offerPoint(17); 26 | assertEquals(17, this.collector.getFarthestPoint(), 27 | "Farthest point in collector with single element should be the single element"); 28 | 29 | this.collector.offerPoint(2); 30 | assertEquals(17, this.collector.getFarthestPoint(), 31 | "Farthest point after adding a closer point should still be the farther point"); 32 | 33 | this.collector.offerPoint(19); 34 | assertEquals(19, this.collector.getFarthestPoint(), 35 | "Farthest point after adding a new farther point should be the new point"); 36 | 37 | for (int i = 0; i < CAPACITY; i++) { 38 | this.collector.offerPoint(3); 39 | } 40 | 41 | assertEquals(3, this.collector.getFarthestPoint(), 42 | "Farthest point after flushing with identical closer points should be closer point"); 43 | 44 | for (int i = 0; i < CAPACITY; i++) { 45 | this.collector.offerPoint(20); 46 | } 47 | 48 | assertEquals(3, this.collector.getFarthestPoint(), 49 | "Farthest point after flushing with identical farther points should still be closer point"); 50 | 51 | } 52 | 53 | @Test 54 | public void testToSortedList() { 55 | assertTrue(this.collector.toSortedList().isEmpty(), "Sorted list from empty collector should be empty."); 56 | 57 | this.collector.offerPoint(19); 58 | this.collector.offerPoint(77); 59 | this.collector.offerPoint(4); 60 | this.collector.offerPoint(1); 61 | this.collector.offerPoint(2); 62 | this.collector.offerPoint(62); 63 | this.collector.offerPoint(8375); 64 | this.collector.offerPoint(3); 65 | this.collector.offerPoint(5); 66 | this.collector.offerPoint(5); 67 | 68 | final ArrayList expectedList = new ArrayList<>(); 69 | java.util.Collections.addAll(expectedList, 1, 2, 3, 4, 5); 70 | 71 | assertEquals(CAPACITY, expectedList.size()); 72 | assertEquals(expectedList, this.collector.toSortedList()); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/test/java/com/eatthepath/jvptree/VPTreeNodeTest.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import java.util.Iterator; 6 | 7 | import com.eatthepath.jvptree.util.MedianDistanceThresholdSelectionStrategy; 8 | import org.junit.jupiter.api.Test; 9 | 10 | import static org.junit.jupiter.api.Assertions.*; 11 | 12 | public class VPTreeNodeTest { 13 | 14 | private static final int TEST_NODE_SIZE = 32; 15 | 16 | private static final PointFilter NO_OP_POINT_FILTER = point -> true; 17 | 18 | @Test 19 | void testVPNodeNoPoints() { 20 | assertThrows(IllegalArgumentException.class, 21 | () -> new VPTreeNode<>(new ArrayList(), new IntegerDistanceFunction(), 22 | new MedianDistanceThresholdSelectionStrategy<>(), VPTree.DEFAULT_NODE_CAPACITY)); 23 | } 24 | 25 | @Test 26 | void testVPNodeZeroCapacity() { 27 | assertThrows(IllegalArgumentException.class, 28 | () -> new VPTreeNode<>(java.util.Collections.singletonList(7), new IntegerDistanceFunction(), 29 | new MedianDistanceThresholdSelectionStrategy<>(), 0)); 30 | } 31 | 32 | @Test 33 | void testSize() { 34 | for (final VPTreeNode testNode : this.createTestNodes(TEST_NODE_SIZE)) { 35 | assertEquals(TEST_NODE_SIZE, testNode.size()); 36 | } 37 | } 38 | 39 | @Test 40 | void testAdd() { 41 | final Integer testPoint = TEST_NODE_SIZE * 2; 42 | 43 | for (final VPTreeNode testNode : this.createTestNodes(TEST_NODE_SIZE)) { 44 | assertFalse(testNode.contains(testPoint)); 45 | 46 | testNode.add(testPoint); 47 | 48 | assertEquals(TEST_NODE_SIZE + 1, testNode.size()); 49 | assertTrue(testNode.contains(testPoint)); 50 | } 51 | } 52 | 53 | @Test 54 | void testRemove() { 55 | final Integer pointNotInNode = TEST_NODE_SIZE * 2; 56 | final Integer pointInNode = TEST_NODE_SIZE / 2; 57 | 58 | for (final VPTreeNode testNode : this.createTestNodes(TEST_NODE_SIZE)) { 59 | assertFalse(testNode.remove(pointNotInNode)); 60 | assertTrue(testNode.remove(pointInNode)); 61 | 62 | assertEquals(TEST_NODE_SIZE - 1, testNode.size()); 63 | assertFalse(testNode.contains(pointInNode)); 64 | 65 | for (int i = 0; i < TEST_NODE_SIZE; i++) { 66 | testNode.remove(i); 67 | } 68 | 69 | assertEquals(0, testNode.size()); 70 | } 71 | } 72 | 73 | @Test 74 | void testContains() { 75 | final Integer pointNotInNode = TEST_NODE_SIZE * 2; 76 | 77 | for (final VPTreeNode testNode : this.createTestNodes(TEST_NODE_SIZE)) { 78 | for (int i = 0; i < TEST_NODE_SIZE; i++) { 79 | assertTrue(testNode.contains(i)); 80 | } 81 | 82 | assertFalse(testNode.contains(pointNotInNode)); 83 | } 84 | } 85 | 86 | @Test 87 | void testRetainAll() { 88 | final ArrayList pointsToRetain = new ArrayList<>(); 89 | 90 | for (int i = 0; i < TEST_NODE_SIZE / 8; i++) { 91 | pointsToRetain.add(i); 92 | } 93 | 94 | for (final VPTreeNode testNode : this.createTestNodes(TEST_NODE_SIZE)) { 95 | assertTrue(testNode.retainAll(pointsToRetain)); 96 | assertEquals(pointsToRetain.size(), testNode.size()); 97 | 98 | for (final int point : pointsToRetain) { 99 | assertTrue(testNode.contains(point)); 100 | } 101 | 102 | assertFalse(testNode.retainAll(pointsToRetain)); 103 | } 104 | } 105 | 106 | @Test 107 | void testCollectNearestNeighbors() { 108 | final Integer queryPoint = TEST_NODE_SIZE / 2; 109 | final int numberOfNeighbors = 3; 110 | 111 | for (final VPTreeNode testNode : this.createTestNodes(TEST_NODE_SIZE)) { 112 | final NearestNeighborCollector collector = 113 | new NearestNeighborCollector<>(queryPoint, new IntegerDistanceFunction(), numberOfNeighbors); 114 | 115 | testNode.collectNearestNeighbors(collector, NO_OP_POINT_FILTER); 116 | 117 | assertEquals(numberOfNeighbors, collector.toSortedList().size()); 118 | assertEquals(queryPoint, collector.toSortedList().get(0)); 119 | assertTrue(collector.toSortedList().containsAll( 120 | java.util.Arrays.asList(queryPoint - 1, queryPoint, queryPoint + 1))); 121 | } 122 | } 123 | 124 | @Test 125 | void testCollectAllWithinRange() { 126 | final int queryPoint = TEST_NODE_SIZE / 2; 127 | final int maxRange = TEST_NODE_SIZE / 8; 128 | 129 | for (final VPTreeNode testNode : this.createTestNodes(TEST_NODE_SIZE)) { 130 | final ArrayList collectedPoints = new ArrayList<>(); 131 | 132 | testNode.collectAllWithinDistance(queryPoint, maxRange, collectedPoints, NO_OP_POINT_FILTER); 133 | 134 | assertEquals((2 * maxRange) + 1, collectedPoints.size()); 135 | 136 | for (int i = queryPoint - maxRange; i <= queryPoint + maxRange; i++) { 137 | assertTrue(collectedPoints.contains(i)); 138 | } 139 | } 140 | } 141 | 142 | @Test 143 | void testAddPointsToArray() { 144 | for (final VPTreeNode testNode : this.createTestNodes(TEST_NODE_SIZE)) { 145 | final Integer[] array = new Integer[TEST_NODE_SIZE]; 146 | testNode.addPointsToArray(array, 0); 147 | 148 | assertFalse(testNode.retainAll(java.util.Arrays.asList(array))); 149 | } 150 | } 151 | 152 | @Test 153 | void testCollectIterators() { 154 | for (final VPTreeNode testNode : this.createTestNodes(TEST_NODE_SIZE)) { 155 | final ArrayList> iterators = new ArrayList<>(); 156 | testNode.collectIterators(iterators); 157 | 158 | final ArrayList pointsFromIterators = new ArrayList<>(); 159 | 160 | for (final Iterator iterator : iterators) { 161 | while (iterator.hasNext()) { 162 | pointsFromIterators.add(iterator.next()); 163 | } 164 | } 165 | 166 | assertEquals(testNode.size(), pointsFromIterators.size()); 167 | assertFalse(testNode.retainAll(pointsFromIterators)); 168 | } 169 | } 170 | 171 | private Collection> createTestNodes(@SuppressWarnings("SameParameterValue") final int nodeSize) { 172 | final ArrayList points = new ArrayList<>(nodeSize); 173 | 174 | for (int i = 0; i < nodeSize; i++) { 175 | points.add(i); 176 | } 177 | 178 | final ArrayList> testNodes = new ArrayList<>(3); 179 | 180 | testNodes.add(new VPTreeNode<>(points, new IntegerDistanceFunction(), 181 | new MedianDistanceThresholdSelectionStrategy<>(), points.size() * 2)); 182 | 183 | testNodes.add(new VPTreeNode<>(points, new IntegerDistanceFunction(), 184 | new MedianDistanceThresholdSelectionStrategy<>(), points.size())); 185 | 186 | testNodes.add(new VPTreeNode<>(points, new IntegerDistanceFunction(), 187 | new MedianDistanceThresholdSelectionStrategy<>(), points.size() / 8)); 188 | 189 | return testNodes; 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /src/test/java/com/eatthepath/jvptree/VPTreeTest.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import java.util.ArrayList; 6 | import java.util.Arrays; 7 | import java.util.Iterator; 8 | import java.util.List; 9 | 10 | import static org.junit.jupiter.api.Assertions.*; 11 | 12 | public class VPTreeTest { 13 | 14 | private static final int TEST_TREE_SIZE = 256; 15 | 16 | @Test 17 | public void testGetNearestNeighbors() { 18 | final VPTree vpTree = this.createTestTree(TEST_TREE_SIZE); 19 | 20 | final Integer queryPoint = TEST_TREE_SIZE / 2; 21 | final int numberOfNeighbors = 3; 22 | 23 | final List nearestNeighbors = vpTree.getNearestNeighbors(queryPoint, numberOfNeighbors); 24 | 25 | assertEquals(numberOfNeighbors, nearestNeighbors.size()); 26 | assertEquals(queryPoint, nearestNeighbors.get(0)); 27 | assertTrue(nearestNeighbors.containsAll(Arrays.asList(queryPoint - 1, queryPoint, queryPoint + 1))); 28 | } 29 | 30 | @Test 31 | public void testGetNearestNeighborsWithFilter() { 32 | final VPTree vpTree = this.createTestTree(TEST_TREE_SIZE); 33 | 34 | final Integer queryPoint = TEST_TREE_SIZE / 2; 35 | final int numberOfNeighbors = 3; 36 | 37 | final PointFilter evenNumberFilter = point -> point % 2 == 0; 38 | 39 | final List nearestNeighbors = vpTree.getNearestNeighbors(queryPoint, numberOfNeighbors, evenNumberFilter); 40 | 41 | assertEquals(numberOfNeighbors, nearestNeighbors.size()); 42 | assertEquals(queryPoint, nearestNeighbors.get(0)); 43 | assertTrue(nearestNeighbors.containsAll( 44 | java.util.Arrays.asList(queryPoint - 2, queryPoint, queryPoint + 2))); 45 | } 46 | 47 | @Test 48 | public void testGetAllWithinRange() { 49 | final VPTree vpTree = this.createTestTree(TEST_TREE_SIZE); 50 | 51 | final int queryPoint = TEST_TREE_SIZE / 2; 52 | final int maxDistance = TEST_TREE_SIZE / 8; 53 | 54 | final PointFilter evenNumberFilter = point -> point % 2 == 0; 55 | 56 | final List pointsWithinRange = vpTree.getAllWithinDistance(queryPoint, maxDistance, evenNumberFilter); 57 | 58 | assertEquals(maxDistance + 1, pointsWithinRange.size()); 59 | 60 | for (int i = queryPoint - maxDistance; i <= queryPoint + maxDistance; i += 2) { 61 | assertTrue(pointsWithinRange.contains(i)); 62 | } 63 | } 64 | 65 | @Test 66 | public void testGetAllWithinRangeWithFilter() { 67 | final VPTree vpTree = this.createTestTree(TEST_TREE_SIZE); 68 | 69 | final int queryPoint = TEST_TREE_SIZE / 2; 70 | final int maxDistance = TEST_TREE_SIZE / 8; 71 | 72 | final List pointsWithinRange = vpTree.getAllWithinDistance(queryPoint, maxDistance); 73 | 74 | assertEquals((2 * maxDistance) + 1, pointsWithinRange.size()); 75 | 76 | for (int i = queryPoint - maxDistance; i <= queryPoint + maxDistance; i++) { 77 | assertTrue(pointsWithinRange.contains(i)); 78 | } 79 | } 80 | 81 | @Test 82 | public void testSize() { 83 | final ArrayList points = new ArrayList<>(); 84 | 85 | for (int i = 0; i < TEST_TREE_SIZE; i++) { 86 | points.add(i); 87 | } 88 | 89 | { 90 | final VPTree initiallyEmptyTree = new VPTree<>(new IntegerDistanceFunction()); 91 | assertEquals(0, initiallyEmptyTree.size()); 92 | 93 | initiallyEmptyTree.addAll(points); 94 | 95 | assertEquals(points.size(), initiallyEmptyTree.size()); 96 | 97 | initiallyEmptyTree.removeAll(points); 98 | 99 | assertEquals(0, initiallyEmptyTree.size()); 100 | } 101 | 102 | { 103 | final VPTree initiallyPopulatedTree = new VPTree<>(new IntegerDistanceFunction(), points); 104 | assertEquals(points.size(), initiallyPopulatedTree.size()); 105 | } 106 | } 107 | 108 | @Test 109 | public void testIsEmpty() { 110 | final VPTree vpTree = this.createTestTree(0); 111 | final Integer testPoint = 12; 112 | 113 | assertTrue(vpTree.isEmpty()); 114 | 115 | vpTree.add(testPoint); 116 | assertFalse(vpTree.isEmpty()); 117 | 118 | vpTree.remove(testPoint); 119 | assertTrue(vpTree.isEmpty()); 120 | } 121 | 122 | @Test 123 | public void testAdd() { 124 | final VPTree vpTree = this.createTestTree(0); 125 | final Integer testPoint = 12; 126 | 127 | assertFalse(vpTree.contains(testPoint)); 128 | 129 | assertTrue(vpTree.add(testPoint)); 130 | assertTrue(vpTree.contains(testPoint)); 131 | } 132 | 133 | @Test 134 | public void testAddAll() { 135 | final VPTree vpTree = this.createTestTree(0); 136 | 137 | final int numberOfPoints = 256; 138 | final ArrayList points = new ArrayList<>(numberOfPoints); 139 | 140 | for (int i = 0; i < numberOfPoints; i++) { 141 | points.add(i); 142 | } 143 | 144 | assertTrue(vpTree.addAll(points)); 145 | assertEquals(points.size(), vpTree.size()); 146 | assertTrue(vpTree.containsAll(points)); 147 | } 148 | 149 | @Test 150 | public void testRemove() { 151 | final VPTree vpTree = this.createTestTree(0); 152 | final Integer testPoint = 12; 153 | 154 | assertFalse(vpTree.remove(testPoint)); 155 | 156 | vpTree.add(testPoint); 157 | assertTrue(vpTree.remove(testPoint)); 158 | assertTrue(vpTree.isEmpty()); 159 | } 160 | 161 | @Test 162 | public void testRemoveAll() { 163 | final ArrayList pointsToRemove = new ArrayList<>(); 164 | 165 | for (int i = 0; i < TEST_TREE_SIZE; i += 2) { 166 | pointsToRemove.add(i); 167 | } 168 | 169 | final VPTree vpTree = this.createTestTree(TEST_TREE_SIZE); 170 | 171 | assertTrue(vpTree.removeAll(pointsToRemove)); 172 | assertEquals(TEST_TREE_SIZE - pointsToRemove.size(), vpTree.size()); 173 | 174 | for (final Integer point : pointsToRemove) { 175 | assertFalse(vpTree.contains(point)); 176 | } 177 | 178 | assertFalse(vpTree.removeAll(pointsToRemove)); 179 | } 180 | 181 | @Test 182 | public void testRetainAll() { 183 | final ArrayList pointsToRetain = new ArrayList<>(); 184 | 185 | for (int i = 0; i < TEST_TREE_SIZE; i += 2) { 186 | pointsToRetain.add(i); 187 | } 188 | 189 | final VPTree vpTree = this.createTestTree(TEST_TREE_SIZE); 190 | 191 | assertTrue(vpTree.retainAll(pointsToRetain)); 192 | assertEquals(pointsToRetain.size(), vpTree.size()); 193 | 194 | for (final Integer point : pointsToRetain) { 195 | assertTrue(vpTree.contains(point)); 196 | } 197 | 198 | assertFalse(vpTree.retainAll(pointsToRetain)); 199 | } 200 | 201 | @Test 202 | public void testClear() { 203 | final VPTree vpTree = this.createTestTree(TEST_TREE_SIZE); 204 | 205 | assertFalse(vpTree.isEmpty()); 206 | 207 | vpTree.clear(); 208 | //noinspection ConstantConditions 209 | assertTrue(vpTree.isEmpty()); 210 | } 211 | 212 | @Test 213 | public void testContains() { 214 | final VPTree vpTree = this.createTestTree(0); 215 | 216 | final Integer pointAdded = 12; 217 | final Integer pointNotAdded = 7; 218 | 219 | assertFalse(vpTree.contains(pointAdded)); 220 | 221 | vpTree.add(pointAdded); 222 | assertTrue(vpTree.contains(pointAdded)); 223 | assertFalse(vpTree.contains(pointNotAdded)); 224 | 225 | vpTree.remove(pointAdded); 226 | assertFalse(vpTree.contains(pointAdded)); 227 | } 228 | 229 | @Test 230 | public void testContainsAll() { 231 | final int numberOfPoints = 256; 232 | final ArrayList points = new ArrayList<>(numberOfPoints); 233 | 234 | for (int i = 0; i < numberOfPoints; i++) { 235 | points.add(i); 236 | } 237 | 238 | final VPTree vpTree = new VPTree<>(new IntegerDistanceFunction(), points); 239 | 240 | assertTrue(vpTree.containsAll(points)); 241 | 242 | points.add(numberOfPoints + 1); 243 | assertFalse(vpTree.containsAll(points)); 244 | } 245 | 246 | @Test 247 | public void testIterator() { 248 | final int numberOfPoints = 256; 249 | final ArrayList points = new ArrayList<>(numberOfPoints); 250 | 251 | for (int i = 0; i < numberOfPoints; i++) { 252 | points.add(i); 253 | } 254 | 255 | final VPTree vpTree = new VPTree<>(new IntegerDistanceFunction(), points); 256 | 257 | final ArrayList pointsFromIterator = new ArrayList<>(); 258 | final Iterator iterator = vpTree.iterator(); 259 | 260 | //noinspection WhileLoopReplaceableByForEach 261 | while (iterator.hasNext()) { 262 | //noinspection UseBulkOperation 263 | pointsFromIterator.add(iterator.next()); 264 | } 265 | 266 | assertEquals(points.size(), pointsFromIterator.size()); 267 | assertTrue(pointsFromIterator.containsAll(points)); 268 | } 269 | 270 | @Test 271 | public void testToArray() { 272 | final int numberOfPoints = 256; 273 | final ArrayList points = new ArrayList<>(numberOfPoints); 274 | 275 | for (int i = 0; i < numberOfPoints; i++) { 276 | points.add(i); 277 | } 278 | 279 | final VPTree vpTree = new VPTree<>(new IntegerDistanceFunction(), points); 280 | final Object[] array = vpTree.toArray(); 281 | 282 | assertEquals(vpTree.size(), array.length); 283 | 284 | for (final Object point : array) { 285 | //noinspection SuspiciousMethodCalls 286 | assertTrue(vpTree.contains(point)); 287 | } 288 | } 289 | 290 | @Test 291 | public void testToArrayTArray() { 292 | final VPTree vpTree = this.createTestTree(TEST_TREE_SIZE); 293 | 294 | { 295 | final Integer[] array = vpTree.toArray(new Integer[0]); 296 | 297 | assertEquals(vpTree.size(), array.length); 298 | 299 | for (final Integer point : array) { 300 | assertTrue(vpTree.contains(point)); 301 | } 302 | } 303 | 304 | { 305 | final Integer[] array = vpTree.toArray(new Integer[0]); 306 | 307 | assertEquals(vpTree.size(), array.length); 308 | 309 | for (final Integer point : array) { 310 | assertTrue(vpTree.contains(point)); 311 | } 312 | } 313 | 314 | { 315 | final Integer[] array = vpTree.toArray(new Integer[vpTree.size() + 1]); 316 | 317 | assertEquals(vpTree.size() + 1, array.length); 318 | 319 | for (int i = 0; i < vpTree.size(); i++) { 320 | assertTrue(vpTree.contains(array[i])); 321 | } 322 | 323 | assertNull(array[vpTree.size()]); 324 | } 325 | } 326 | 327 | private VPTree createTestTree(final int numberOfPoints) { 328 | final List points; 329 | 330 | if (numberOfPoints == 0) { 331 | points = null; 332 | } else { 333 | points = new ArrayList<>(numberOfPoints); 334 | 335 | for (int i = 0; i < numberOfPoints; i++) { 336 | points.add(i); 337 | } 338 | } 339 | 340 | return new VPTree<>(new IntegerDistanceFunction(), points); 341 | } 342 | } 343 | -------------------------------------------------------------------------------- /src/test/java/com/eatthepath/jvptree/example/CartesianDistanceFunction.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree.example; 2 | 3 | import com.eatthepath.jvptree.DistanceFunction; 4 | 5 | public class CartesianDistanceFunction implements DistanceFunction { 6 | 7 | public double getDistance(final CartesianPoint firstPoint, final CartesianPoint secondPoint) { 8 | final double deltaX = firstPoint.getX() - secondPoint.getX(); 9 | final double deltaY = firstPoint.getY() - secondPoint.getY(); 10 | 11 | return Math.sqrt((deltaX * deltaX) + (deltaY * deltaY)); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/test/java/com/eatthepath/jvptree/example/CartesianPoint.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree.example; 2 | 3 | public interface CartesianPoint { 4 | double getX(); 5 | double getY(); 6 | } 7 | -------------------------------------------------------------------------------- /src/test/java/com/eatthepath/jvptree/example/ExampleApp.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree.example; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import com.eatthepath.jvptree.VPTree; 7 | 8 | public class ExampleApp { 9 | 10 | @SuppressWarnings("unused") 11 | public static void main(final String[] args) { 12 | final CartesianPoint playerPosition = new CartesianPoint() { 13 | public double getX() { 14 | return 20; 15 | } 16 | 17 | public double getY() { 18 | return 10; 19 | } 20 | }; 21 | 22 | final List enemies = new ArrayList<>(); 23 | 24 | final VPTree vpTree = 25 | new VPTree<>(new CartesianDistanceFunction(), enemies); 26 | 27 | final List nearestEnemies = 28 | vpTree.getNearestNeighbors(playerPosition, 10); 29 | 30 | final List enemiesWithinFiringRange = 31 | vpTree.getAllWithinDistance(playerPosition, 4.5); 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /src/test/java/com/eatthepath/jvptree/example/SpaceInvader.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree.example; 2 | 3 | public class SpaceInvader implements CartesianPoint { 4 | 5 | private final double x; 6 | private final double y; 7 | 8 | private final String color; 9 | 10 | public SpaceInvader(final double x, final double y, final String color) { 11 | this.x = x; 12 | this.y = y; 13 | 14 | this.color = color; 15 | } 16 | 17 | public double getX() { 18 | return this.x; 19 | } 20 | 21 | public double getY() { 22 | return this.y; 23 | } 24 | 25 | public String getColor() { 26 | return this.color; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/test/java/com/eatthepath/jvptree/util/MedianDistanceThresholdSelectionStrategyTest.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree.util; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import com.eatthepath.jvptree.IntegerDistanceFunction; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import static org.junit.jupiter.api.Assertions.assertEquals; 10 | import static org.junit.jupiter.api.Assertions.assertThrows; 11 | 12 | public class MedianDistanceThresholdSelectionStrategyTest { 13 | 14 | @Test 15 | void testSelectThreshold() { 16 | final MedianDistanceThresholdSelectionStrategy strategy = 17 | new MedianDistanceThresholdSelectionStrategy<>(); 18 | 19 | { 20 | final List singleIntegerList = new ArrayList<>(); 21 | singleIntegerList.add(7); 22 | 23 | assertEquals(7, (int)strategy.selectThreshold(singleIntegerList, 0, new IntegerDistanceFunction())); 24 | } 25 | 26 | { 27 | final List multipleIntegerList = new ArrayList<>(); 28 | multipleIntegerList.add(2); 29 | multipleIntegerList.add(9); 30 | multipleIntegerList.add(3); 31 | multipleIntegerList.add(1); 32 | multipleIntegerList.add(6); 33 | multipleIntegerList.add(4); 34 | multipleIntegerList.add(8); 35 | multipleIntegerList.add(5); 36 | multipleIntegerList.add(7); 37 | 38 | assertEquals(5, (int)strategy.selectThreshold(multipleIntegerList, 0, new IntegerDistanceFunction())); 39 | } 40 | } 41 | 42 | @Test 43 | void testSelectThresholdEmptyList() { 44 | assertThrows(IllegalArgumentException.class, 45 | () -> new MedianDistanceThresholdSelectionStrategy().selectThreshold( 46 | new ArrayList<>(), 0, new IntegerDistanceFunction())); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/test/java/com/eatthepath/jvptree/util/SamplingMedianDistanceThresholdSelectionStrategyTest.java: -------------------------------------------------------------------------------- 1 | package com.eatthepath.jvptree.util; 2 | 3 | import com.eatthepath.jvptree.DistanceFunction; 4 | import com.eatthepath.jvptree.IntegerDistanceFunction; 5 | import org.junit.jupiter.api.Test; 6 | 7 | import java.util.AbstractList; 8 | import java.util.Arrays; 9 | import java.util.List; 10 | import java.util.Random; 11 | 12 | import static org.junit.jupiter.api.Assertions.*; 13 | 14 | class SamplingMedianDistanceThresholdSelectionStrategyTest { 15 | 16 | private static class FakeGiganticList extends AbstractList { 17 | 18 | @Override 19 | public Integer get(final int index) { 20 | if (index >= size() || index < 0) { 21 | throw new IndexOutOfBoundsException(); 22 | } 23 | 24 | return index; 25 | } 26 | 27 | @Override 28 | public int size() { 29 | return 305574400; 30 | } 31 | } 32 | 33 | @Test 34 | void getSampledPoints() { 35 | final SamplingMedianDistanceThresholdSelectionStrategy strategy = 36 | new SamplingMedianDistanceThresholdSelectionStrategy<>(5); 37 | 38 | assertEquals(Arrays.asList(1, 3, 5, 7, 9), 39 | strategy.getSampledPoints(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); 40 | } 41 | 42 | @Test 43 | void selectThresholdOverflow() { 44 | final SamplingMedianDistanceThresholdSelectionStrategy strategy = 45 | new SamplingMedianDistanceThresholdSelectionStrategy<>(); 46 | 47 | final List points = new FakeGiganticList(); 48 | 49 | assertDoesNotThrow(() -> strategy.selectThreshold(points, 17, (a, b) -> Math.abs(a - b))); 50 | } 51 | } 52 | --------------------------------------------------------------------------------