├── LICENSE ├── Makefile ├── README.md ├── doc └── Yianilos_vpt_10.1.1.41.4193.pdf ├── dune-project ├── plot.gpl ├── src ├── dune ├── myArray.ml ├── test.ml ├── vp_tree.ml └── vp_tree.mli └── vpt.opam /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, Francois BERENGER 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | all: 4 | dune build @install 5 | 6 | test: 7 | dune build src/test.exe 8 | _build/default/src/test.exe 9 | 10 | install: all 11 | dune install 12 | 13 | uninstall: 14 | ocamlfind -remove vpt 15 | 16 | clean: 17 | rm -rf _build 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # vp-tree 2 | A vantage point tree implementation in OCaml. 3 | 4 | Cf. http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.41.4193&rep=rep1&type=pdf 5 | for details. 6 | 7 | A vantage point tree allows to do fast but exact nearest neighbor searches 8 | in any space provided that you have a distance function 9 | to measure the distance between any two points in that space. 10 | 11 | This implementation might need some tweaks in case it is used to index a very 12 | large number of points (especially the select_vp function in the code). 13 | -------------------------------------------------------------------------------- /doc/Yianilos_vpt_10.1.1.41.4193.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnixJunkie/vp-tree/65b10e18f099d6076f3cae0ce5f3b73deb1d9e75/doc/Yianilos_vpt_10.1.1.41.4193.pdf -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 2.8) 2 | (name vpt) 3 | -------------------------------------------------------------------------------- /plot.gpl: -------------------------------------------------------------------------------- 1 | # plot 'toplot' u 1:2 w l t 'bct', \ 2 | # '' u 1:3 w l t 'gct', \ 3 | # '' u 1:4 w l t 'rct' 4 | 5 | plot 'toplot' u 1:5 w l t 'bq', \ 6 | '' u 1:6 w l t 'gq', \ 7 | '' u 1:7 w l t 'rq', \ 8 | '' u 1:8 w l t 'brute' 9 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name vpt) 3 | (public_name vpt) 4 | (modules vp_tree myArray)) 5 | 6 | (executable 7 | (name test) 8 | (modules test) 9 | (libraries unix vpt)) 10 | -------------------------------------------------------------------------------- /src/myArray.ml: -------------------------------------------------------------------------------- 1 | (* extend the Array module *) 2 | 3 | include Array 4 | 5 | (* smaller array, without elt at index 'i' *) 6 | let remove i a = 7 | let n = length a in 8 | assert(i >= 0 && i < n); 9 | let res = make (n - 1) (unsafe_get a 0) in 10 | let j = ref 0 in 11 | for i' = 0 to n - 1 do 12 | if i' <> i then 13 | (unsafe_set res !j (unsafe_get a i'); 14 | incr j) 15 | done; 16 | res 17 | 18 | (* <=> List.partition *) 19 | let partition p a = 20 | let ok, ko = 21 | fold_right (fun x (ok_acc, ko_acc) -> 22 | if p x then (x :: ok_acc, ko_acc) 23 | else (ok_acc, x :: ko_acc) 24 | ) a ([], []) 25 | in 26 | (of_list ok, of_list ko) 27 | 28 | (* <=> List.split *) 29 | let split a = 30 | let n = length a in 31 | if n = 0 then ([||], [||]) 32 | else 33 | let l, r = unsafe_get a 0 in 34 | let left = make n l in 35 | let right = make n r in 36 | for i = 1 to n - 1 do 37 | let l, r = unsafe_get a i in 38 | unsafe_set left i l; 39 | unsafe_set right i r 40 | done; 41 | (left, right) 42 | 43 | (* <=> BatArray.min_max with default value in case of empty array *) 44 | let min_max_def a def = 45 | let n = length a in 46 | if n = 0 then def 47 | else 48 | let mini = ref (unsafe_get a 0) in 49 | let maxi = ref (unsafe_get a 0) in 50 | for i = 1 to n - 1 do 51 | let x = unsafe_get a i in 52 | if x < !mini then mini := x; 53 | if x > !maxi then maxi := x 54 | done; 55 | (!mini, !maxi) 56 | 57 | (* get one bootstrap sample of 'size' using sampling with replacement *) 58 | let bootstrap_sample rng size a = 59 | let n = length a in 60 | assert(n > 0); 61 | assert(size < n); 62 | let res = make size (unsafe_get a 0) in 63 | for i = 0 to size - 1 do 64 | let rand = Random.State.int rng n in 65 | unsafe_set res i (unsafe_get a rand) 66 | done; 67 | res 68 | -------------------------------------------------------------------------------- /src/test.ml: -------------------------------------------------------------------------------- 1 | 2 | module L = List 3 | 4 | open Printf 5 | 6 | let one_rand_point_2D () = 7 | (Random.float 1.0, Random.float 1.0) 8 | 9 | let square x = 10 | x *. x 11 | 12 | let dist_2D (x0, y0) (x1, y1) = 13 | sqrt (square (x0 -. x1) +. square (y0 -. y1)) 14 | 15 | let fabs x = 16 | if x > 0.0 then x 17 | else -.x 18 | 19 | module Point_2D = 20 | struct 21 | type t = float * float 22 | let dist = dist_2D 23 | end 24 | 25 | module VPT = Vpt.Vp_tree.Make(Point_2D) 26 | 27 | let to_string_2D (x, y) = 28 | sprintf "%.3f %.3f" x y 29 | 30 | let n_times n f = 31 | let res = ref [] in 32 | for _i = 1 to n do 33 | res := f() :: !res 34 | done; 35 | !res 36 | 37 | let brute_force_nearest_find dist query points = 38 | let rec loop ((curr_d, _curr_p) as acc) = function 39 | | [] -> acc 40 | | x :: xs -> 41 | let d = dist x query in 42 | let acc' = if d < curr_d then (d, x) else acc in 43 | loop acc' xs 44 | in 45 | match points with 46 | | [] -> assert(false) 47 | | p :: ps -> loop (dist query p, p) ps 48 | 49 | let time_it f = 50 | let start = Unix.gettimeofday () in 51 | let res = f () in 52 | let stop = Unix.gettimeofday () in 53 | (stop -. start, res) 54 | 55 | let query_several_times n vpt = 56 | let dt, _ = 57 | time_it (fun () -> 58 | for _i = 1 to n do 59 | let q = one_rand_point_2D () in 60 | ignore(VPT.nearest_neighbor q vpt) 61 | done 62 | ) in 63 | let q = one_rand_point_2D () in 64 | let res = VPT.nearest_neighbor q vpt in 65 | (dt /. (float n), res, q) 66 | 67 | let main () = 68 | (* test all neighbors within tolerance query *) 69 | let points = n_times 1000 one_rand_point_2D in 70 | let t1 = VPT.create VPT.Optimal points in 71 | let t2 = VPT.create (VPT.Good 50) points in 72 | let t3 = VPT.create VPT.Random points in 73 | assert(VPT.check t1); 74 | assert(VPT.check t2); 75 | assert(VPT.check t3); 76 | (* test all points are in the tree *) 77 | assert(L.sort compare points = L.sort compare (VPT.to_list t1)); 78 | assert(L.sort compare points = L.sort compare (VPT.to_list t2)); 79 | assert(L.sort compare points = L.sort compare (VPT.to_list t3)); 80 | let query = one_rand_point_2D () in 81 | let tol = Random.float 0.01 in 82 | let vpt_t, nearby_curr = time_it (fun () -> VPT.neighbors query tol t1) in 83 | let nearby_curr' = VPT.neighbors query tol t2 in 84 | let nearby_curr'' = VPT.neighbors query tol t3 in 85 | assert(L.for_all (fun p -> dist_2D query p <= tol) nearby_curr); 86 | let brute_t, nearby_ref = time_it (fun () -> 87 | L.filter (fun p -> dist_2D query p <= tol) points 88 | ) in 89 | assert(L.sort compare nearby_curr = L.sort compare nearby_ref); 90 | assert(L.sort compare nearby_curr' = L.sort compare nearby_ref); 91 | assert(L.sort compare nearby_curr'' = L.sort compare nearby_ref); 92 | (* test all points can be found in the tree *) 93 | assert(L.for_all (fun p -> VPT.find p t1 = p) points); 94 | assert(L.for_all (fun p -> VPT.find p t2 = p) points); 95 | assert(L.for_all (fun p -> VPT.find p t3 = p) points); 96 | printf "#vpt_neighbors(%d): %f brute: %f accel: %.3f\n%!" 97 | (L.length nearby_curr) 98 | vpt_t brute_t (brute_t /. vpt_t); 99 | (* test nearest_neighbor queries *) 100 | let sizes = [1;2;4;8;16;32;64;128;256;512;1024;2048;4096;8192] in 101 | let ntimes = 100 in 102 | Printf.printf "#size b_c g_c r_c b_q g_q r_q brute\n"; 103 | L.iter (fun size -> 104 | let points = n_times size one_rand_point_2D in 105 | (* create all VPTs *) 106 | let b_t, bvpt = time_it (fun () -> VPT.create VPT.Optimal points) in 107 | let g_t, gvpt = time_it (fun () -> VPT.create (VPT.Good 50) points) in 108 | let r_t, rvpt = time_it (fun () -> VPT.create VPT.Random points) in 109 | (* query all VPTs *) 110 | (* FBR: we should query always with the same query points, whatever 111 | * the flavor of the tree in order to compare query speed *) 112 | let bq_t, b_curr, q = query_several_times ntimes bvpt in 113 | let reff = brute_force_nearest_find dist_2D q points in 114 | assert(b_curr = reff); 115 | let gq_t, g_curr, q = query_several_times ntimes gvpt in 116 | let reff = brute_force_nearest_find dist_2D q points in 117 | assert(g_curr = reff); 118 | let rq_t, r_curr, q = query_several_times ntimes rvpt in 119 | let brute_t, reff = 120 | time_it (fun () -> brute_force_nearest_find dist_2D q points) in 121 | assert(r_curr = reff); 122 | Printf.printf "%d %f %f %f %f %f %f %f\n%!" 123 | size b_t g_t r_t bq_t gq_t rq_t brute_t 124 | ) sizes 125 | 126 | let () = main () 127 | -------------------------------------------------------------------------------- /src/vp_tree.ml: -------------------------------------------------------------------------------- 1 | 2 | module A = MyArray 3 | 4 | (* Vantage-point tree implementation 5 | Cf. "Data structures and algorithms for nearest neighbor search 6 | in general metric spaces" by Peter N. Yianilos for details. 7 | http://citeseerx.ist.psu.edu/viewdoc/\ 8 | download?doi=10.1.1.41.4193&rep=rep1&type=pdf *) 9 | 10 | (* Functorial interface *) 11 | 12 | module type Point = 13 | sig 14 | type t 15 | (* dist _must_ be a metric *) 16 | val dist: t -> t -> float 17 | end 18 | 19 | module Make = functor (P: Point) -> 20 | struct 21 | 22 | type quality = Optimal (* if you have thousands of points *) 23 | | Good of int (* if you have tens to hundreds 24 | of thousands of points *) 25 | | Random (* if you have millions of points *) 26 | 27 | type node = { vp: P.t; 28 | lb_low: float; 29 | lb_high: float; 30 | middle: float; 31 | rb_low: float; 32 | rb_high: float; 33 | left: t; 34 | right: t } 35 | and t = Empty 36 | | Node of node 37 | 38 | let new_node vp lb_low lb_high middle rb_low rb_high left right = 39 | Node { vp; lb_low; lb_high; middle; rb_low; rb_high; left; right } 40 | 41 | type open_itv = { lbound: float; rbound: float } 42 | 43 | let new_open_itv lbound rbound = 44 | assert(lbound <= rbound); 45 | { lbound; rbound } 46 | 47 | let in_open_itv x { lbound ; rbound } = 48 | (x > lbound) && (x < rbound) 49 | 50 | let itv_dont_overlap left right = 51 | let a = left.lbound in 52 | let b = left.rbound in 53 | let c = right.lbound in 54 | let d = right.rbound in 55 | (* [a..b] [c..d] OR [c..d] [a..b] *) 56 | (b < c) || (d < a) 57 | 58 | let itv_overlap left right = 59 | not (itv_dont_overlap left right) 60 | 61 | let square (x: float): float = 62 | x *. x 63 | 64 | let float_compare (x: float) (y: float): int = 65 | if x < y then -1 66 | else if x > y then 1 67 | else 0 (* x = y *) 68 | 69 | let median (xs: float array): float = 70 | A.sort float_compare xs; 71 | let n = A.length xs in 72 | if n mod 2 = 1 then xs.(n / 2) 73 | else 0.5 *. (xs.(n / 2) +. xs.(n / 2 - 1)) 74 | 75 | let variance (mu: float) (xs: float array): float = 76 | A.fold_left (fun acc x -> 77 | acc +. (square (x -. mu)) 78 | ) 0.0 xs 79 | 80 | (* compute distance of point at index 'q_i' to all other points *) 81 | let distances (q_i: int) (points: P.t array): float array = 82 | let n = A.length points in 83 | assert(n > 1); 84 | let res = A.make (n - 1) 0.0 in 85 | let j = ref 0 in 86 | let q = points.(q_i) in 87 | for i = 0 to n - 1 do 88 | if i <> q_i then 89 | (res.(!j) <- P.dist q points.(i); 90 | incr j) 91 | done; 92 | res 93 | 94 | (* this is optimal (slowest tree construction; O(n^2)); 95 | but fastest query time *) 96 | let select_best_vp (points: P.t array) = 97 | let n = A.length points in 98 | if n = 0 then assert(false) 99 | else if n = 1 then (points.(0), 0.0, [||]) 100 | else 101 | let curr_vp = ref 0 in 102 | let curr_mu = ref 0.0 in 103 | let curr_spread = ref 0.0 in 104 | for i = 0 to n - 1 do 105 | (* could be faster using a distance cache? *) 106 | let dists = distances !curr_vp points in 107 | let mu = median dists in 108 | let spread = variance mu dists in 109 | if spread > !curr_spread then 110 | (curr_vp := i; 111 | curr_mu := mu; 112 | curr_spread := spread) 113 | done; 114 | (points.(!curr_vp), !curr_mu, A.remove !curr_vp points) 115 | 116 | (* to replace select_best_vp when working with too many points *) 117 | let select_good_vp rng (sample_size: int) (points: P.t array) = 118 | let n = A.length points in 119 | if sample_size * sample_size >= n then 120 | select_best_vp points 121 | else 122 | let candidates = A.bootstrap_sample rng sample_size points in 123 | let curr_vp = ref 0 in 124 | let curr_mu = ref 0.0 in 125 | let curr_spread = ref 0.0 in 126 | A.iteri (fun i p_i -> 127 | let sample = A.bootstrap_sample rng sample_size points in 128 | let dists = A.map (P.dist p_i) sample in 129 | let mu = median dists in 130 | let spread = variance mu dists in 131 | if spread > !curr_spread then 132 | (curr_vp := i; 133 | curr_mu := mu; 134 | curr_spread := spread) 135 | ) candidates; 136 | (* we need the true mu to balance the tree; 137 | not the one gotten from the sample! *) 138 | let dists = distances !curr_vp points in 139 | let mu = median dists in 140 | (points.(!curr_vp), mu, A.remove !curr_vp points) 141 | 142 | (* to replace select_good_vp when working with way too many points, 143 | or if you really need the fastest possible tree construction *) 144 | let select_rand_vp rng (points: P.t array) = 145 | let n = A.length points in 146 | assert(n > 0); 147 | let vp = Random.State.int rng n in 148 | let dists = distances vp points in 149 | let mu = median dists in 150 | (points.(vp), mu, A.remove vp points) 151 | 152 | let rec create' select_vp points = 153 | let n = A.length points in 154 | if n = 0 then Empty 155 | else if n = 1 then new_node points.(0) 0. 0. 0. 0. 0. Empty Empty 156 | else 157 | let vp, mu, others = select_vp points in 158 | let dists = A.map (fun p -> (P.dist vp p, p)) others in 159 | let lefties, righties = A.partition (fun (d, _p) -> d < mu) dists in 160 | let ldists, lpoints = A.split lefties in 161 | let rdists, rpoints = A.split righties in 162 | let lb_low, lb_high = A.min_max_def ldists (0., 0.) in 163 | let rb_low, rb_high = A.min_max_def rdists (0., 0.) in 164 | let middle = (lb_high +. rb_low) *. 0.5 in 165 | new_node vp lb_low lb_high middle rb_low rb_high 166 | (create' select_vp lpoints) (create' select_vp rpoints) 167 | 168 | let create quality points = 169 | let select_vp = match quality with 170 | | Optimal -> select_best_vp 171 | | Good ssize -> select_good_vp (Random.State.make_self_init ()) ssize 172 | | Random -> select_rand_vp (Random.State.make_self_init ()) in 173 | create' select_vp (A.of_list points) 174 | 175 | let rec find_nearest acc query tree = 176 | match tree with 177 | | Empty -> acc 178 | | Node { vp; lb_low; lb_high; middle; rb_low; rb_high; left; right } -> 179 | let x = P.dist vp query in 180 | if x = 0.0 then Some (x, vp) (* can't get nearer than that *) 181 | else 182 | let tau, acc' = 183 | match acc with 184 | | None -> (x, Some (x, vp)) 185 | | Some (tau, best) -> 186 | if x < tau then (x, Some (x, vp)) 187 | else (tau, Some (tau, best)) in 188 | let il = new_open_itv (lb_low -. tau) (lb_high +. tau) in 189 | let ir = new_open_itv (rb_low -. tau) (rb_high +. tau) in 190 | let in_il = in_open_itv x il in 191 | let in_ir = in_open_itv x ir in 192 | if x < middle then 193 | match in_il, in_ir with 194 | | false, false -> acc' 195 | | true, false -> find_nearest acc' query left 196 | | false, true -> find_nearest acc' query right 197 | | true, true -> 198 | (match find_nearest acc' query left with 199 | | None -> find_nearest acc' query right 200 | | Some (tau, best) -> 201 | match find_nearest acc' query right with 202 | | None -> Some (tau, best) 203 | | Some (tau', best') -> 204 | if tau' < tau then Some (tau', best') 205 | else Some (tau, best)) 206 | else (* x >= middle *) 207 | match in_ir, in_il with 208 | | false, false -> acc' 209 | | true, false -> find_nearest acc' query right 210 | | false, true -> find_nearest acc' query left 211 | | true, true -> 212 | (match find_nearest acc' query right with 213 | | None -> find_nearest acc' query left 214 | | Some (tau, best) -> 215 | match find_nearest acc' query left with 216 | | None -> Some (tau, best) 217 | | Some (tau', best') -> 218 | if tau' < tau then Some (tau', best') 219 | else Some (tau, best)) 220 | 221 | let nearest_neighbor query tree = 222 | match find_nearest None query tree with 223 | | Some x -> x 224 | | None -> raise Not_found 225 | 226 | let rec to_list_loop acc = function 227 | | Empty -> acc 228 | | Node n -> 229 | let acc' = to_list_loop acc n.right in 230 | to_list_loop (n.vp :: acc') n.left 231 | 232 | let to_list tree = 233 | to_list_loop [] tree 234 | 235 | let neighbors query tol tree = 236 | let rec loop acc = function 237 | | Empty -> acc 238 | | Node { vp; lb_low; lb_high; rb_low; rb_high; left; right; _ } -> 239 | (* should we include vp? *) 240 | let d = P.dist vp query in 241 | let acc' = if d <= tol then vp :: acc else acc in 242 | let lbound = max 0.0 (d -. tol) in 243 | let rbound = d +. tol in 244 | let itv = new_open_itv lbound rbound in 245 | (* should we inspect the left? *) 246 | let lmatches = 247 | let itv_left = new_open_itv lb_low lb_high in 248 | if itv_overlap itv itv_left then 249 | (* further calls to P.dist needed? *) 250 | if d +. lb_high <= tol then 251 | (* all descendants are included *) 252 | to_list_loop acc' left 253 | else 254 | loop acc' left 255 | else acc' in 256 | (* should we inspect the right? *) 257 | let itv_right = new_open_itv rb_low rb_high in 258 | if itv_overlap itv itv_right then 259 | (* further calls to P.dist needed? *) 260 | if d +. rb_high <= tol then 261 | to_list_loop lmatches right 262 | else 263 | loop lmatches right 264 | else lmatches in 265 | loop [] tree 266 | 267 | let is_empty = function 268 | | Empty -> true 269 | | Node _ -> false 270 | 271 | let root = function 272 | | Empty -> raise Not_found 273 | | Node { vp; _ } -> vp 274 | 275 | (* test if the tree invariant holds. 276 | If it doesn't, then we are in trouble... *) 277 | let rec check = function 278 | | Empty -> true 279 | | Node { vp; lb_low; lb_high; middle; rb_low; rb_high; left; right } -> 280 | let bounds_OK = (0.0 <= lb_low) && 281 | (lb_low <= lb_high) && 282 | ((lb_high < middle) || (0.0 = middle)) && 283 | (middle <= rb_low) && 284 | (rb_low <= rb_high) in 285 | (bounds_OK && 286 | List.for_all (fun p -> P.dist vp p < middle) (to_list left) && 287 | List.for_all (fun p -> P.dist vp p >= middle) (to_list right) && 288 | check left && check right) 289 | 290 | exception Found of P.t 291 | 292 | let find query tree = 293 | let rec loop = function 294 | | Empty -> () 295 | | Node { vp; middle; left; right; _ } -> 296 | let d = P.dist vp query in 297 | if d = 0.0 then raise (Found vp) 298 | else if d < middle then loop left 299 | else loop right in 300 | try (loop tree; raise Not_found) 301 | with Found p -> p 302 | 303 | let mem query tree = 304 | try let _ = find query tree in true 305 | with Not_found -> false 306 | 307 | end 308 | -------------------------------------------------------------------------------- /src/vp_tree.mli: -------------------------------------------------------------------------------- 1 | 2 | (** Functorial interface. *) 3 | 4 | module type Point = 5 | sig 6 | (** A point. *) 7 | type t 8 | 9 | (** [dist] should be a distance function: symmetric, zero and 10 | the diagonal and verifying the triangular inequality. 11 | Be _very_ careful with the implementation of your metric 12 | (dist x x = 0.0, NaN is not a proper distance, etc). *) 13 | val dist: t -> t -> float 14 | end 15 | 16 | module Make: functor (P: Point) -> 17 | sig 18 | (** A vantage point tree. *) 19 | type t 20 | 21 | (** Quality of the constructed tree. 22 | Tree construction takes more time with higher quality. 23 | Tree query time takes less time with higher tree quality. 24 | If you have 100k or more points, use a Good or Random tree. *) 25 | type quality = Optimal 26 | | Good of int (* sample size *) 27 | | Random 28 | 29 | (** [create quality points] 30 | create a vantage point tree of given quality containing all points. *) 31 | val create: quality -> P.t list -> t 32 | 33 | (** [nearest_neighbor p vpt] return the distance along with the nearest 34 | neighbor to query point [p] in [vpt]. Warning: there may be several 35 | points at this distance from [p] in [vpt], 36 | but a single (arbitrary) one is returned. 37 | If you are not happy with that, use a point type that is 38 | deduplicated (i.e. a point that holds the info for all points with 39 | the same coordinates). *) 40 | val nearest_neighbor: P.t -> t -> float * P.t 41 | 42 | (** [neighbors p tol vpt] return all points in [vpt] within 43 | [tol] distance from query point [p]. 44 | I.e. all points returned are within [(d <= tol)] 45 | distance from [p]. *) 46 | val neighbors: P.t -> float -> t -> P.t list 47 | 48 | (** [to_list vpt] return the list of points inside the [vpt], 49 | in an unspecified order. *) 50 | val to_list: t -> P.t list 51 | 52 | (** [is_empty vpt] test if [vpt] is empty. *) 53 | val is_empty: t -> bool 54 | 55 | (** [find query tree] return the first point with distance to [query] = 0.0. 56 | @raise [Not_found] if no such element exists. 57 | Warning: there may be several 58 | points at this distance from [p] in [vpt], 59 | but a single (arbitrary) one is returned. *) 60 | val find: P.t -> t -> P.t 61 | 62 | (** [mem query tree] return true if [query] can be found in [tree]; 63 | false otherwise. *) 64 | val mem: P.t -> t -> bool 65 | 66 | (** [root tree] return the root point of the tree. 67 | @raise [Not_found] if [tree] is empty. *) 68 | val root: t -> P.t 69 | 70 | (** [check tree] test the tree invariant. 71 | Should always be true. 72 | If invariant doesn't hold, then this library has a bug 73 | (or your distance function is not a proper metric). *) 74 | val check: t -> bool 75 | end 76 | -------------------------------------------------------------------------------- /vpt.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | maintainer: "unixjunkie@sdf.org" 3 | authors: ["Francois BERENGER"] 4 | homepage: "https://github.com/UnixJunkie/vp-tree" 5 | bug-reports: "https://github.com/UnixJunkie/vp-tree/issues" 6 | dev-repo: "git+https://github.com/UnixJunkie/vp-tree.git" 7 | license: "BSD-3-Clause" 8 | build: [ 9 | ["dune" "build" "-p" name "-j" jobs] 10 | ["dune" "runtest" "-p" name "-j" jobs] {with-test} 11 | ] 12 | depends: [ 13 | "dune" {>= "2.8"} 14 | "ocaml" 15 | ] 16 | synopsis: "Vantage point tree implementation in OCaml" 17 | description: """ 18 | A vantage point tree allows to do fast but exact nearest neighbor searches 19 | in any space provided that you have a metric (function) to measure the 20 | distance between any two points in that space. 21 | 22 | Bibliography 23 | ============ 24 | Yianilos, P. N. (1993, January). 25 | Data structures and algorithms for nearest neighbor search 26 | in general metric spaces. 27 | In Soda (Vol. 93, No. 194, pp. 311-21). 28 | http://dx.doi.org/10.1145/313559.313789 29 | """ 30 | url { 31 | src: "https://github.com/UnixJunkie/vp-tree/archive/v4.0.1.tar.gz" 32 | checksum: "md5=d65973d0c20d32deb57bbc91f2af5a6d" 33 | } 34 | --------------------------------------------------------------------------------