├── LICENSE ├── Makefile ├── README.md ├── bst.opam ├── bunny.sh ├── data ├── bunny.txt └── stanford_bunny.png ├── doc └── Brin_1995_NNsearchLargeMetricSpace.pdf ├── dune-project └── src ├── bisec_tree.ml ├── bisec_tree.mli ├── dump.ml ├── dune ├── simplify.ml └── test.ml /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, Francois BERENGER 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test simplify install uninstall reinstall doc clean 2 | 3 | all: 4 | dune build @install 5 | 6 | test: 7 | dune build src/test.exe 8 | _build/default/src/test.exe 9 | 10 | simplify: 11 | dune build src/simplify.exe 12 | _build/default/src/simplify.exe -k 50 data/bunny.txt \ 13 | 2> data/bunny_simple.txt 14 | wc -l data/bunny.txt data/bunny_simple.txt 15 | 16 | install: all 17 | dune install 18 | 19 | uninstall: 20 | dune uninstall 21 | 22 | reinstall: install reinstall 23 | 24 | doc: 25 | mkdir -p doc 26 | ocamldoc -html -d doc bisec_tree.mli 27 | 28 | clean: 29 | rm -rf _build test_*.txt 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bisec-tree 2 | 3 | Bisector tree implementation in OCaml. 4 | 5 | A bisector tree allows to do fast and exact nearest neighbor searches 6 | in any space provided that you have a metric (function) to measure the 7 | distance between any two points in that space. 8 | 9 | Cf. this article for details: 10 | "A Data Structure and an Algorithm for the Nearest Point Problem"; 11 | Iraj Kalaranti and Gerard McDonald. 12 | ieeexplore.ieee.org/iel5/32/35936/01703102.pdf 13 | 14 | ![Bunny](data/stanford_bunny.png?raw=true) 15 | 16 | Figure: the Stanford bunny, consisting of 35947 3D points, guillotined 17 | by the first layer of a bisector tree. 18 | -------------------------------------------------------------------------------- /bst.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | maintainer: "unixjunkie@sdf.org" 3 | authors: ["Francois Berenger"] 4 | homepage: "https://github.com/UnixJunkie/bisec-tree" 5 | bug-reports: "https://github.com/UnixJunkie/bisec-tree/issues" 6 | dev-repo: "git+https://github.com/UnixJunkie/bisec-tree.git" 7 | license: "BSD-3-Clause" 8 | build: ["dune" "build" "-p" name "-j" jobs] 9 | depends: [ 10 | "ocaml" 11 | "batteries" 12 | "dune" {>= "1.11"} 13 | "base-unix" {with-test} 14 | "dolog" {with-test} 15 | "minicli" {with-test} 16 | ] 17 | synopsis: "Bisector tree implementation in OCaml" 18 | description: """ 19 | A bisector tree allows to do fast but exact nearest neighbor searches 20 | in any space provided that you can measure the 21 | distance between any two points in that space. 22 | A bisector tree also allows fast neighbor searches (range queries/ 23 | finding all points within a given tolerance from your query point). 24 | Cf. this article for details: 25 | 'A Data Structure and an Algorithm for the Nearest Point Problem'; 26 | Iraj Kalaranti and Gerard McDonald. 27 | ieeexplore.ieee.org/iel5/32/35936/01703102.pdf 28 | """ 29 | # url { 30 | # src: "https://github.com/UnixJunkie/bisec-tree/archive/v6.0.0.tar.gz" 31 | # checksum: "md5=e3bafda0a2b705c5cd24e0710dee9c37" 32 | # } 33 | -------------------------------------------------------------------------------- /bunny.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | jbuilder build dump.exe 4 | 5 | ./_build/default/dump.exe -i data/bunny.txt -d 1 > data/bunny_bst.txt 6 | 7 | egrep ^0' ' data/bunny_bst.txt > data/bst_left_01.txt 8 | egrep ^1' ' data/bunny_bst.txt > data/bst_right_01.txt 9 | 10 | gnuplot -persist < t -> float 14 | end 15 | 16 | (* Apparently, bisector trees where first describbed in 17 | "A Data Structure and an Algorithm for the Nearest Point Problem"; 18 | Iraj Kalaranti and Gerard McDonald. 19 | ieeexplore.ieee.org/iel5/32/35936/01703102.pdf *) 20 | 21 | (* Heuristic to find good vantage points. *) 22 | type vp_heuristic = One_band | Two_bands 23 | 24 | type direction = Left | Right 25 | 26 | type step = L of float (* dist to l_vp *) 27 | | R of float (* dist to r_vp *) 28 | 29 | let string_of_addr addr = 30 | let n = L.length addr in 31 | let buff = Bytes.create n in 32 | L.iteri (fun i s -> 33 | Bytes.unsafe_set buff i (match s with 34 | | L _ -> '0' 35 | | R _ -> '1' 36 | ) 37 | ) addr; 38 | Bytes.to_string buff 39 | 40 | let string_of_path path = 41 | let char_of_dir = function 42 | | Left -> '0' 43 | | Right -> '1' in 44 | let buff = Buffer.create 80 in 45 | L.iter (fun d -> 46 | Buffer.add_char buff (char_of_dir d) 47 | ) path; 48 | Buffer.contents buff 49 | 50 | module Make = functor (P: Point) -> struct 51 | 52 | type bucket = { vp: P.t; (* vantage point *) 53 | sup: float; (* max dist to vp among bucket points *) 54 | points: P.t array } (* remaining points (vp excluded) *) 55 | 56 | type node = 57 | { (* left half-space *) 58 | l_vp: P.t; (* left vantage point *) 59 | l_sup: float; (* max dist to l_vp among points in same half-space *) 60 | (* right half-space *) 61 | r_vp: P.t; (* right vantage point *) 62 | r_sup: float; (* max dist to r_vp among points in same half-space *) 63 | (* sub-trees *) 64 | left: t; 65 | right: t } 66 | and t = Empty 67 | | Node of node 68 | | Bucket of bucket 69 | 70 | let rng = Random.State.make_self_init () 71 | 72 | (* points of the bucket as a list; bucket's vp included *) 73 | let points_of_bucket (b: bucket): P.t list = 74 | b.vp :: (A.to_list b.points) 75 | 76 | (* n must be > 0 *) 77 | let rand_int n = 78 | Random.State.int rng n 79 | 80 | let fcmp (x: float) (y: float): int = 81 | if x < y then -1 82 | else if x > y then 1 83 | else 0 (* x = y *) 84 | 85 | let fmax (x: float) (y: float): float = 86 | if x > y then x else y 87 | 88 | (* point indexed by one vantage point *) 89 | type point1 = { p: P.t; 90 | d1: float } 91 | let point1_cmp (x: point1) (y: point1): int = 92 | fcmp x.d1 y.d1 93 | (* enrich p by distance to vp *) 94 | let enr (vp: P.t) (p: P.t): point1 = 95 | { p; d1 = P.dist vp p } 96 | (* point indexed by two vantage points *) 97 | type point2 = { p: P.t; 98 | d1: float; 99 | d2: float } 100 | let enr2 (vp: P.t) (p: point1): point2 = 101 | { p = p.p; d1 = p.d1; d2 = P.dist vp p.p } 102 | let strip2 (points: point2 array): P.t array = 103 | A.map (fun x -> x.p) points 104 | (* return max dist to vp1 *) 105 | let max1 (points: point2 array): float = 106 | let maxi = ref 0.0 in (* a distance is always >= 0.0 *) 107 | A.iter (fun x -> 108 | maxi := fmax !maxi x.d1 109 | ) points; 110 | !maxi 111 | (* return max dist to vp2 *) 112 | let max2 (points: point2 array): float = 113 | let maxi = ref 0.0 in (* a distance is always >= 0.0 *) 114 | A.iter (fun x -> 115 | maxi := fmax !maxi x.d2 116 | ) points; 117 | !maxi 118 | 119 | (* stuff that will be promoted to bucket *) 120 | type pre_bucket = { vp: P.t; 121 | points: point2 array } 122 | (* stuff that will be promoted to node *) 123 | type pre_node = { l_vp: P.t; 124 | points: point2 array; 125 | r_vp: P.t } 126 | type pre = Pre_bucket of pre_bucket 127 | | Pre_node of pre_node 128 | | Pre_empty 129 | 130 | (* counting already indexed points *) 131 | let pre_bucket_length (b: pre_bucket): int = 132 | 1 + A.length b.points 133 | let bucket_length (b: bucket): int = 134 | 1 + A.length b.points 135 | 136 | (* select first vp randomly, then enrich points by their distance to it; 137 | output is ordered by incr. dist. to this rand vp *) 138 | let rand_vp (points: P.t array): point1 array = 139 | let n = A.length points in 140 | assert(n > 0); 141 | if n = 1 then [|{ p = points.(0); d1 = 0.0 }|] 142 | else 143 | let i = rand_int n in 144 | let vp = points.(i) in 145 | let enr_points = A.map (enr vp) points in 146 | A.sort point1_cmp enr_points; 147 | enr_points 148 | 149 | (* heuristics for choosing a good pair of vp points 150 | are inspired by section 4.2 'Selecting Split Points' in 151 | "Near Neighbor Search in Large Metric Spaces", Sergey Brin, VLDB 1995. *) 152 | 153 | (* choose one vp randomly, the furthest point from it is the other vp *) 154 | let one_band (k: int) (points: P.t array) = 155 | let n = A.length points in 156 | if n = 0 then Pre_empty 157 | else if n = 1 then Pre_bucket { vp = points.(0); points = [||] } 158 | else (* n >= 2 *) 159 | let enr_points = rand_vp points in 160 | let vp1 = enr_points.(0).p in 161 | let vp2 = enr_points.(n - 1).p in 162 | let lr_gap = enr_points.(n - 1).d1 in 163 | (* we bucketize because there are not enough points left, or because 164 | * it is not possible to bisect space further *) 165 | if n = 2 || n <= k || lr_gap = 0.0 then 166 | (* we use vp2 to index the bucket: vp2 is supposed to be good 167 | while vp1 is random *) 168 | let enr_rem = A.sub enr_points 0 (n - 1) in 169 | let rem = A.map (enr2 vp2) enr_rem in 170 | Pre_bucket { vp = vp2; points = rem } 171 | else 172 | (* remove selected vps from point array 173 | and enrich points by their dist to vp2 *) 174 | let enr_rem = A.sub enr_points 1 (n - 2) in 175 | let rem = A.map (enr2 vp2) enr_rem in 176 | Pre_node { l_vp = vp1; points = rem; r_vp = vp2 } 177 | 178 | let two_bands (k: int) (points: P.t array) = 179 | let n = A.length points in 180 | if n = 0 then Pre_empty 181 | else if n = 1 then Pre_bucket { vp = points.(0); points = [||] } 182 | else (* n >= 2 *) 183 | let enr_points = rand_vp points in 184 | (* furthest from random vp *) 185 | let vp = enr_points.(n - 1).p in 186 | let enr_points1 = A.map (enr vp) points in 187 | A.sort point1_cmp enr_points1; 188 | let vp1 = enr_points1.(0).p in 189 | let vp2 = enr_points1.(n - 1).p in 190 | let lr_gap = enr_points1.(n - 1).d1 in 191 | (* we bucketize because there are not enough points left, or because 192 | * it is not possible to bisect space further *) 193 | if n = 2 || n <= k || lr_gap = 0.0 then 194 | (* we use vp2 to index the bucket *) 195 | let enr_rem = A.sub enr_points1 0 (n - 1) in 196 | let rem = A.map (enr2 vp2) enr_rem in 197 | Pre_bucket { vp = vp2; points = rem } 198 | else 199 | (* remove selected vps from points array 200 | and enrich points by their distance to vp2 *) 201 | let enr_rem = A.sub enr_points1 1 (n - 2) in 202 | let rem = A.map (enr2 vp2) enr_rem in 203 | Pre_node { l_vp = vp1; points = rem; r_vp = vp2 } 204 | 205 | let sample_distances (sample_size: int) (points: P.t array): float array = 206 | let n = A.length points in 207 | assert(n > 0); 208 | let distances = 209 | A.init sample_size (fun _ -> 210 | P.dist points.(rand_int n) points.(rand_int n) 211 | ) in 212 | A.sort fcmp distances; 213 | distances 214 | 215 | let create ?(progress_callback = fun _x _y -> ()) 216 | (k: int) (h: vp_heuristic) (points': P.t array): t = 217 | let nb_points = A.length points' in 218 | let indexed = ref 0 in 219 | let heuristic = match h with 220 | | One_band -> one_band 221 | | Two_bands -> two_bands in 222 | let rec loop points = match heuristic k points with 223 | | Pre_empty -> Empty 224 | | Pre_bucket b -> 225 | begin 226 | indexed := !indexed + (pre_bucket_length b); 227 | progress_callback !indexed nb_points; 228 | Bucket { vp = b.vp; sup = max2 b.points; points = strip2 b.points } 229 | end 230 | | Pre_node pn -> 231 | (* points to the left are strictly closer to l_vp 232 | than points to the right *) 233 | let lpoints, rpoints = A.partition (fun p -> p.d1 < p.d2) pn.points in 234 | indexed := !indexed + 2; 235 | progress_callback !indexed nb_points; 236 | Node { l_vp = pn.l_vp; 237 | l_sup = max1 lpoints; 238 | r_vp = pn.r_vp; 239 | r_sup = max2 rpoints; 240 | left = loop (strip2 lpoints); 241 | right = loop (strip2 rpoints) } in 242 | loop points' 243 | 244 | (* to_list with an acc *) 245 | let rec to_list_loop acc = function 246 | | Empty -> acc 247 | | Node n -> 248 | let acc' = to_list_loop acc n.right in 249 | to_list_loop (n.l_vp :: n.r_vp :: acc') n.left 250 | | Bucket b -> 251 | A.fold_left (fun acc' x -> 252 | x :: acc' 253 | ) (b.vp :: acc) b.points 254 | 255 | let to_list t = 256 | to_list_loop [] t 257 | 258 | let length t = 259 | let rec loop acc = function 260 | | Empty -> acc 261 | | Node n -> 262 | let acc' = loop acc n.right in 263 | (* two vantage points --> +2 *) 264 | loop (acc' + 2) n.left 265 | | Bucket b -> 266 | (* one vantage point --> +1 *) 267 | 1 + acc + (A.length b.points) in 268 | loop 0 t 269 | 270 | (* dive in the tree until [max_depth] is reached 271 | (or you cannot go further down) then dump all points 272 | along with the descent path that was followed to reach them *) 273 | let dump max_depth t = 274 | let rec loop acc path curr_depth = function 275 | | Empty -> acc 276 | | Bucket b -> 277 | let points = to_list (Bucket b) in 278 | (L.rev path, points) :: acc 279 | | Node n -> 280 | if curr_depth = max_depth then 281 | let l_points = n.l_vp :: to_list n.left in 282 | let l_path = Left :: path in 283 | let r_points = n.r_vp :: to_list n.right in 284 | let r_path = Right :: path in 285 | (L.rev l_path, l_points) :: 286 | (L.rev r_path, r_points) :: acc 287 | else 288 | let depth' = curr_depth + 1 in 289 | let l_path = Left :: path in 290 | let r_path = Right :: path in 291 | let acc' = (L.rev l_path, [n.l_vp]) :: acc in 292 | let acc'' = loop acc' l_path depth' n.left in 293 | let acc''' = (L.rev r_path, [n.r_vp]) :: acc'' in 294 | loop acc''' r_path depth' n.right in 295 | loop [] [] 1 t 296 | 297 | let is_empty = function 298 | | Empty -> true 299 | | _ -> false 300 | 301 | (* the root is the first point in the vp that we find 302 | (either a bucket's vp or a node's left vp); 303 | not sure it is very useful, but at least it allows 304 | to get one point from the tree if it is not empty *) 305 | let root = function 306 | | Empty -> raise Not_found 307 | | Node n -> n.l_vp 308 | | Bucket b -> b.vp 309 | 310 | (* nearest point to query point *) 311 | let nearest_neighbor query tree = 312 | let rec loop ((_x, d) as acc) = function 313 | | Empty -> acc 314 | | Bucket b -> 315 | let b_d = P.dist query b.vp in 316 | let x', d' = if b_d < d then (b.vp, b_d) else acc in 317 | (* should we inspect bucket points? *) 318 | if b_d -. b.sup >= d' then (x', d') (* no *) 319 | else (* yes *) 320 | A.fold_left (fun (nearest_p, nearest_d) y -> 321 | let y_d = P.dist query y in 322 | if y_d < nearest_d then (y, y_d) else (nearest_p, nearest_d) 323 | ) (x', d') b.points 324 | | Node n -> 325 | let l_d = P.dist query n.l_vp in 326 | let x', d' = if l_d < d then (n.l_vp, l_d) else acc in 327 | (* should we dive left? *) 328 | let x'', d'' = 329 | if l_d -. n.l_sup >= d' then (x', d') (* no *) 330 | else loop (x', d') n.left (* yes *) in 331 | (* should we dive right? *) 332 | let r_d = P.dist query n.r_vp in 333 | let x''', d''' = if r_d < d'' then (n.r_vp, r_d) else (x'', d'') in 334 | if r_d -. n.r_sup >= d''' then (x''', d''') (* no *) 335 | else loop (x''', d''') n.right (* yes *) in 336 | match tree with 337 | | Empty -> raise Not_found 338 | | not_empty -> 339 | let x = root not_empty in 340 | loop (x, P.dist query x) not_empty 341 | 342 | (* all points [x] such that [P.dist query x <= tol] *) 343 | let neighbors query tol tree = 344 | let rec loop acc = function 345 | | Empty -> acc 346 | | Bucket b -> 347 | let b_d = P.dist query b.vp in 348 | (* should we inspect bucket points? *) 349 | if b_d -. b.sup > tol then acc (* no *) 350 | (* are all remaining points included? *) 351 | else if b_d +. b.sup <= tol then (* yes *) 352 | to_list_loop acc (Bucket b) 353 | else (* we need to inspect the bucket *) 354 | let acc' = if b_d <= tol then b.vp :: acc else acc in 355 | A.fold_left (fun acc'' y -> 356 | let y_d = P.dist query y in 357 | if y_d <= tol then y :: acc'' else acc'' 358 | ) acc' b.points 359 | | Node n -> 360 | let l_d = P.dist query n.l_vp in 361 | (* should we dive left? *) 362 | let acc'' = 363 | if l_d -. n.l_sup > tol then acc (* no *) 364 | else if l_d +. n.l_sup <= tol then 365 | (* all remaining points are included *) 366 | to_list_loop (n.l_vp :: acc) n.left 367 | else 368 | (* need to inspect further *) 369 | let acc' = if l_d <= tol then n.l_vp :: acc else acc in 370 | loop acc' n.left in 371 | (* should we dive right? *) 372 | let r_d = P.dist query n.r_vp in 373 | if r_d -. n.r_sup > tol then acc'' (* no *) 374 | else if r_d +. n.r_sup <= tol then 375 | (* all remaining points are included *) 376 | to_list_loop (n.r_vp :: acc'') n.right 377 | else 378 | (* need to inspect further *) 379 | let acc''' = if r_d <= tol then (n.r_vp :: acc'') else acc'' in 380 | loop acc''' n.right in 381 | loop [] tree 382 | 383 | exception Found 384 | 385 | (* lazy, early exit range query. 386 | We don't care about who the neighbors are, 387 | we just want to know if there are any. *) 388 | let any_neighbor query tol tree = 389 | let rec loop = function 390 | | Empty -> () 391 | | Bucket b -> 392 | let b_d = P.dist query b.vp in 393 | (* should we inspect bucket points? *) 394 | if b_d -. b.sup > tol then () (* no *) 395 | (* are all remaining points included? *) 396 | else if b_d +. b.sup <= tol then (* yes *) 397 | raise Found 398 | else (* we need to inspect the bucket *) 399 | if b_d <= tol then 400 | raise Found 401 | else 402 | A.iter (fun y -> 403 | if P.dist query y <= tol then raise Found 404 | ) b.points 405 | | Node n -> 406 | let l_d = P.dist query n.l_vp in 407 | (* should we dive left? *) 408 | let () = 409 | if l_d -. n.l_sup > tol then () (* no *) 410 | else if l_d +. n.l_sup <= tol then 411 | (* all remaining points are included *) 412 | raise Found 413 | else (* need to inspect further *) 414 | if l_d <= tol then 415 | raise Found 416 | else 417 | loop n.left in 418 | (* should we dive right? *) 419 | let r_d = P.dist query n.r_vp in 420 | if r_d -. n.r_sup > tol then () (* no *) 421 | else if r_d +. n.r_sup <= tol then 422 | (* all remaining points are included *) 423 | raise Found 424 | else (* need to inspect further *) 425 | if r_d <= tol then 426 | raise Found 427 | else 428 | loop n.right in 429 | try (loop tree; false) 430 | with Found -> true 431 | 432 | (* all points [x] such that [P.dist query x <= tol] and 433 | all points [y] such that [P.dist query y > tol] *) 434 | let partition query tol tree = 435 | let rec loop included excluded = function 436 | | Empty -> (included, excluded) 437 | | Bucket b -> 438 | let b_d = P.dist query b.vp in 439 | (* should we inspect bucket points? *) 440 | if b_d -. b.sup > tol then (* no *) 441 | (included, to_list_loop excluded (Bucket b)) 442 | (* are all remaining points included? *) 443 | else if b_d +. b.sup <= tol then (* yes *) 444 | (to_list_loop included (Bucket b), excluded) 445 | else (* we need to inspect the bucket *) 446 | let included', excluded' = 447 | if b_d <= tol then 448 | (b.vp :: included, excluded) 449 | else 450 | (included, b.vp :: excluded) in 451 | A.fold_left (fun (included'', excluded'') y -> 452 | let y_d = P.dist query y in 453 | if y_d <= tol then 454 | (y :: included'', excluded'') 455 | else 456 | (included'', y :: excluded'') 457 | ) (included', excluded') b.points 458 | | Node n -> 459 | let l_d = P.dist query n.l_vp in 460 | (* should we dive left? *) 461 | let included', excluded' = 462 | if l_d -. n.l_sup > tol then (* no *) 463 | (included, to_list_loop (n.l_vp :: excluded) n.left) 464 | else if l_d +. n.l_sup <= tol then 465 | (* all remaining points are included *) 466 | (to_list_loop (n.l_vp :: included) n.left, excluded) 467 | else 468 | (* need to inspect further *) 469 | let included'', excluded'' = 470 | if l_d <= tol then 471 | (n.l_vp :: included, excluded) 472 | else 473 | (included, n.l_vp :: excluded) in 474 | loop included'' excluded'' n.left in 475 | (* should we dive right? *) 476 | let r_d = P.dist query n.r_vp in 477 | if r_d -. n.r_sup > tol then (* no *) 478 | (included', to_list_loop (n.r_vp :: excluded') n.right) 479 | else if r_d +. n.r_sup <= tol then 480 | (* all remaining points are included *) 481 | (to_list_loop (n.r_vp :: included') n.right, excluded') 482 | else 483 | (* need to inspect further *) 484 | let included'', excluded'' = 485 | if r_d <= tol then 486 | (n.r_vp :: included', excluded') 487 | else 488 | (included', n.r_vp :: excluded') in 489 | loop included'' excluded'' n.right in 490 | loop [] [] tree 491 | 492 | (* test if the tree invariant holds. 493 | If it doesn't, we are in trouble... *) 494 | let rec check = function 495 | | Empty -> true 496 | | Bucket b -> (* check bounds *) 497 | A.for_all (fun x -> 498 | let d = P.dist b.vp x in 499 | d <= b.sup 500 | ) b.points 501 | | Node n -> (* check bounds and partitioning rules *) 502 | L.for_all (fun x -> (* lbounds *) 503 | let l_d = P.dist n.l_vp x in 504 | let r_d = P.dist n.r_vp x in 505 | (l_d <= n.l_sup && l_d < r_d) 506 | ) (to_list n.left) && 507 | L.for_all (fun x -> (* rbounds *) 508 | let r_d = P.dist n.r_vp x in 509 | let l_d = P.dist n.l_vp x in 510 | (r_d <= n.r_sup && r_d <= l_d) 511 | ) (to_list n.right) && 512 | (* check left then right *) 513 | check n.left && check n.right 514 | 515 | (* extract vp points from the tree *) 516 | let inspect tree = 517 | let rec loop acc = function 518 | | Empty -> acc 519 | | Bucket b -> b.vp :: acc 520 | | Node n -> 521 | let acc' = n.l_vp :: n.r_vp :: acc in 522 | let acc'' = loop acc' n.left in 523 | loop acc'' n.right in 524 | loop [] tree 525 | 526 | let vantage_points = 527 | inspect 528 | 529 | (* extract vantage points which are not attached to a bucket *) 530 | let orfan_vantage_points tree = 531 | let rec loop acc = function 532 | | Empty | Bucket _ -> acc 533 | | Node n -> 534 | loop (loop (n.l_vp :: n.r_vp :: acc) n.left) n.right in 535 | loop [] tree 536 | 537 | (* collect all buckets *) 538 | let buckets (tree: t): bucket list = 539 | let rec loop acc = function 540 | | Empty -> acc 541 | | Bucket b -> b :: acc 542 | | Node n -> 543 | loop (loop acc n.left) n.right in 544 | loop [] tree 545 | 546 | let find query tree = 547 | let nearest_p, nearest_d = nearest_neighbor query tree in 548 | (* Log.warn "nearest_d: %f" nearest_d; *) 549 | if nearest_d = 0.0 then nearest_p else raise Not_found 550 | 551 | let mem query tree = 552 | try let _ = find query tree in true 553 | with Not_found -> false 554 | 555 | (* find where 'query' would belong in 'tree' *) 556 | let get_addr query tree = 557 | let rec loop acc = function 558 | | Empty | Bucket _ -> L.rev acc 559 | | Node n -> 560 | let l_d = P.dist query n.l_vp in 561 | let r_d = P.dist query n.r_vp in 562 | if l_d < r_d then 563 | loop (L l_d :: acc) n.left 564 | else 565 | loop (R r_d :: acc) n.right in 566 | loop [] tree 567 | 568 | (* add 'query' at 'addr' in 'tree' if possible, or crash if not. 569 | Return a new tree where bounds on the path to the new point 570 | have been updated. *) 571 | let add query address tree = 572 | let rec loop addr = function 573 | | Empty -> 574 | begin match addr with 575 | | [] -> Bucket { vp = query; sup = 0.0; points = [||] } 576 | | _ -> assert(false) (* cannot go deeper in tree *) 577 | end 578 | | Bucket b -> 579 | begin match addr with 580 | | [] -> 581 | let d = P.dist query b.vp in 582 | let points = A.append b.points [|query|] in 583 | Bucket { vp = b.vp; sup = max b.sup d; points } 584 | | _ -> assert(false) (* cannot go deeper in tree *) 585 | end 586 | | Node n -> 587 | begin match addr with 588 | | [] -> assert(false) (* should go deeper *) 589 | | L l_d :: rest -> 590 | Node { l_vp = n.l_vp; 591 | l_sup = max n.l_sup l_d; 592 | r_vp = n.r_vp; 593 | r_sup = n.r_sup; 594 | left = loop rest n.left; 595 | right = n.right } 596 | | R r_d :: rest -> 597 | Node { l_vp = n.l_vp; 598 | l_sup = n.l_sup; 599 | r_vp = n.r_vp; 600 | r_sup = max n.r_sup r_d; 601 | left = n.left; 602 | right = loop rest n.right } 603 | end 604 | in 605 | loop address tree 606 | 607 | let to_string t = 608 | let rec loop path acc = function 609 | | Empty -> 610 | let str = sprintf "%s 0" (string_of_path (L.rev path)) in 611 | str :: acc 612 | | Bucket b -> 613 | let str = 614 | sprintf "%s %d" 615 | (string_of_path (L.rev path)) 616 | (bucket_length b) in 617 | str :: acc 618 | | Node n -> 619 | let acc' = loop (Left :: path) acc n.left in 620 | loop (Right :: path) acc' n.right 621 | in 622 | let unsorted = loop [] [] t in 623 | let sorted = List.sort compare unsorted in 624 | String.concat "\n" sorted 625 | 626 | (* let create_sample sample_size nprocs points = 627 | * let n = A.length points in 628 | * if n <= sample_size then 629 | * create 1 Two_bands points 630 | * else 631 | * begin 632 | * (\* randomize points *\) 633 | * BatArray.shuffle ~state:rng points; 634 | * let to_index = A.sub points 0 sample_size in 635 | * let rest_size = n - sample_size in 636 | * let rest = A.sub points sample_size rest_size in 637 | * (\* create standard bst for the sample *\) 638 | * let bst = create 1 Two_bands to_index in 639 | * let addr_indexes = 640 | * Parmap.array_parmapi ~ncores:nprocs ~chunksize:1 641 | * (fun i x -> (i, get_addr x bst)) rest in 642 | * (\* add remaining points to previously built bst *\) 643 | * A.fold_left (fun acc (i, addr) -> 644 | * let point = rest.(i) in 645 | * add point addr acc 646 | * ) bst addr_indexes 647 | * end *) 648 | 649 | (* Hierarchical simplification from: 650 | Pauly, M., Gross, M., & Kobbelt, L. P. (2002, October). 651 | Efficient simplification of point-sampled surfaces. 652 | In Proceedings of the conference on Visualization'02 (pp. 163-170). 653 | IEEE Computer Society. *) 654 | let simplify t = 655 | (* get orfan vps *) 656 | let orfan_vps = orfan_vantage_points t in 657 | (* get all buckets and their addresses *) 658 | let all_buckets = buckets t in 659 | (* maximum number of distinct adresses *) 660 | let n = (L.length orfan_vps) + (L.length all_buckets) in 661 | let addr_to_points = Ht.create n in 662 | L.iter (fun (b: bucket) -> 663 | let addr = get_addr b.vp t in 664 | let points = points_of_bucket b in 665 | try 666 | let prev_points = Ht.find addr_to_points addr in 667 | let curr_points = L.rev_append points prev_points in 668 | Ht.replace addr_to_points addr curr_points 669 | with Not_found -> 670 | Ht.add addr_to_points addr points 671 | ) all_buckets; 672 | (* address each orfan vp and add it to points at the same address, 673 | if any *) 674 | L.iter (fun vp -> 675 | let addr = get_addr vp t in 676 | try 677 | let prev_points = Ht.find addr_to_points addr in 678 | Ht.replace addr_to_points addr (vp :: prev_points) 679 | with Not_found -> 680 | (* under a vantage point can be an empty tree: 681 | in that case, there are no bucket points with the same address 682 | already *) 683 | Ht.add addr_to_points addr [vp] 684 | ) orfan_vps; 685 | (* return the list of points associated to each address *) 686 | Ht.fold (fun _k v acc -> v :: acc) addr_to_points [] 687 | 688 | end 689 | -------------------------------------------------------------------------------- /src/bisec_tree.mli: -------------------------------------------------------------------------------- 1 | 2 | (** Functorial interface. *) 3 | 4 | module type Point = 5 | sig 6 | (** A point. *) 7 | type t 8 | 9 | (** [dist] must be a distance. 10 | Be careful with the implementation of your distance 11 | (dist x x = 0.0, triangular inequality must hold, 12 | NaN is not a proper distance, etc). *) 13 | val dist: t -> t -> float 14 | end 15 | 16 | (** Heuristics to find good vantage (reference) points. 17 | One_band is faster (at tree construction time), 18 | but the resulting tree should be less efficient for queries. 19 | Two_bands (recommended) is more expensive (at tree construction time) 20 | but queries should be faster. 21 | Heuristics inspired by 22 | "Near Neighbor Search in Large Metric Spaces"; 23 | Sergey Brin; November 20, 1995. *) 24 | type vp_heuristic = One_band | Two_bands 25 | 26 | (** To reach a point in the bst, you need to follow a path. 27 | A path is a list of directions. *) 28 | type direction = Left | Right 29 | 30 | (** Part of a point's address in the bst. *) 31 | type step = L of float (* dist to l_vp *) 32 | | R of float (* dist to r_vp *) 33 | 34 | val string_of_addr: step list -> string 35 | 36 | module Make: functor (P: Point) -> 37 | sig 38 | (** A Bisector Tree (BST). *) 39 | type t 40 | 41 | (** [create k h points] create the BST containing all [points], 42 | using bucket size [k] and heuristic [h]. 43 | You can provide an optional [progress_callback] 44 | function to give some feedback to the user 45 | when indexing many points. If provided, [progress_callback] 46 | will be called upon progression of the indexing as 47 | [progress_callback x y]. 48 | [x] is the current number of points that have been indexed 49 | and [y] is the total number of points to index. 50 | The default [progress_callback] function does nothing. *) 51 | val create: ?progress_callback:(int -> int -> unit) -> 52 | int -> vp_heuristic -> P.t array -> t 53 | 54 | (* (\** [create_sample sample_size nprocs points] 55 | * create a bst using only a subset of [sample_size] from [points]. 56 | * Remaining points are adressed in parallel using [nprocs], 57 | * then sequentially added to the previously created bst. *\) 58 | * val create_sample: int -> int -> P.t array -> t *) 59 | 60 | (** [sample_distances n points] get distances found in [n] pairs 61 | of randomly-chosen points. The result is sorted. *) 62 | val sample_distances: int -> P.t array -> float array 63 | 64 | (** [nearest_neighbor q bst] return the distance along with the nearest 65 | neighbor to query point [q] in [bst]. Warning: there may be several 66 | points at this distance from [q] in [bst], 67 | but a single (arbitrary) one is returned. 68 | If you are not happy with this, use a point type that is 69 | deduplicated (i.e. a point that holds the info for all points with 70 | the same coordinates). *) 71 | val nearest_neighbor: P.t -> t -> P.t * float 72 | 73 | (** [neighbors q tol bst] return all points in [bst] within 74 | [tol] distance from query point [q]. 75 | I.e. all points returned are within [(d <= tol)] 76 | distance from [q]. *) 77 | val neighbors: P.t -> float -> t -> P.t list 78 | 79 | (** [any_neighbor q tol bst] tell if the range query 80 | [neighbors q tol bst] would return some neighbors, 81 | but much faster than doing the actual range query. *) 82 | val any_neighbor: P.t -> float -> t -> bool 83 | 84 | (** [partition q tol bst] is like [neighbors], but a pair 85 | [(xs, ys)] is returned, such that [(d <= tol)] 86 | for any [x] and [(d > tol)] for any [y]. *) 87 | val partition: P.t -> float -> t -> P.t list * P.t list 88 | 89 | (** [to_list bst] return the list of points inside [bst], 90 | in an unspecified order. *) 91 | val to_list: t -> P.t list 92 | 93 | (** [length bst] return the number of elements inside [bst]. 94 | I.e. how many points are indexed by this [bst]. *) 95 | val length: t -> int 96 | 97 | (** [is_empty bst] test if [bst] is empty. *) 98 | val is_empty: t -> bool 99 | 100 | (** [find q bst] return the first point with distance to [q] = 0.0. 101 | @raise Not_found if no such element exists. 102 | Warning: there may be several 103 | points at this distance from [q] in [bst], 104 | but a single (arbitrary) one is returned. *) 105 | val find: P.t -> t -> P.t 106 | 107 | (** [mem q bst] return true if [q] can be found in [bst], 108 | false otherwise. *) 109 | val mem: P.t -> t -> bool 110 | 111 | (** [root bst] return the first point found in [bst] 112 | (either a bucket's vantage point or a node's left vantage point). 113 | @raise Not_found if [is_empty bst]. *) 114 | val root: t -> P.t 115 | 116 | (** [check bst] test the invariant of [bst]. 117 | Should always be true. 118 | If invariant doesn't hold, then this library has a bug 119 | or your [P.dist] function is not a proper metric. *) 120 | val check: t -> bool 121 | 122 | (** [inspect bst] extract the vantage points of [bst] 123 | in an unspecified order. *) 124 | val inspect: t -> P.t list 125 | 126 | (** alias for [inspect] *) 127 | val vantage_points: t -> P.t list 128 | 129 | (** [dump max_depth bst] list points and paths to reach 130 | them in the [bst], going down up to [max_depth]. *) 131 | val dump: int -> t -> (direction list * P.t list) list 132 | 133 | (** [get_addr q bst] find the address of [q] in [bst]. *) 134 | val get_addr: P.t -> t -> step list 135 | 136 | (** [add p addr bst] add point [p] to [bst] at given address [addr]. 137 | [addr] _must_ be a valid address in [bst]. Call 138 | [get_addr p bst] to get a valid address for [p] in [bst]. *) 139 | val add: P.t -> step list -> t -> t 140 | 141 | (** [to_string bst] create a string representation/summary for [bst] *) 142 | val to_string: t -> string 143 | 144 | (** [simplify bst] compute the hierarchical simplification of 145 | the point set contained in [bst]. If [bst] was not constructed 146 | with [k > 1], it is stupid to call [simplify]. 147 | For example, if you want to reduce the size of your point set by at 148 | least 10. First, construct a bst with k=10. 149 | Then, call [simplify] on it. 150 | The result is a list of points lists. 151 | You should average each points list in order to get the 152 | simplified point set. Note that if your points carry a payload, 153 | during averaging the payload might also need to be weighted or averaged 154 | in some way (depending on your application).*) 155 | val simplify: t -> (P.t list) list 156 | 157 | end 158 | -------------------------------------------------------------------------------- /src/dump.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | 3 | module A = BatArray 4 | module L = BatList 5 | 6 | let square x = 7 | x *. x 8 | 9 | let rng = Random.State.make_self_init () 10 | 11 | module P3D = struct 12 | type t = float * float * float 13 | let dist (x, y, z) (x', y', z') = 14 | sqrt (square (x -. x') +. square (y -. y') +. square (z -. z')) 15 | let rand () = 16 | (Random.State.float rng 1.0, 17 | Random.State.float rng 1.0, 18 | Random.State.float rng 1.0) 19 | let of_string s = 20 | Scanf.sscanf s "%f %f %f" (fun x y z -> x, y, z) 21 | let to_string (x, y, z) = 22 | sprintf "%f %f %f" x y z 23 | end 24 | 25 | module BST = Bst.Bisec_tree.Make (P3D) 26 | 27 | let with_in_file fn f = 28 | let input = open_in_bin fn in 29 | let res = f input in 30 | close_in input; 31 | res 32 | 33 | let lines_of_file fn = 34 | with_in_file fn (fun input -> 35 | let res, exn = L.unfold_exc (fun () -> input_line input) in 36 | if exn <> End_of_file then 37 | raise exn 38 | else res 39 | ) 40 | 41 | let char_of_dir = function 42 | | Bst.Bisec_tree.Left -> '0' 43 | | Bst.Bisec_tree.Right -> '1' 44 | 45 | let string_of_path p = 46 | let buff = Buffer.create 80 in 47 | L.iter (fun dir -> 48 | Buffer.add_char buff (char_of_dir dir) 49 | ) p; 50 | Buffer.contents buff 51 | 52 | let main () = 53 | let _argc, args = CLI.init () in 54 | let input_fn = CLI.get_string ["-i"] args in 55 | let depth = CLI.get_int ["-d"] args in 56 | let point_lines = lines_of_file input_fn in 57 | let points = L.map P3D.of_string point_lines in 58 | let tree = BST.(create 10 Bst.Bisec_tree.Two_bands (A.of_list points)) in 59 | let dump = BST.dump depth tree in 60 | L.iter (fun (path, points) -> 61 | let p_str = string_of_path path in 62 | L.iter (fun p -> 63 | printf "%s %s\n" p_str (P3D.to_string p) 64 | ) points 65 | ) dump 66 | 67 | let () = main () 68 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name bst) 3 | (public_name bst) 4 | (modules bisec_tree) 5 | (libraries batteries)) 6 | 7 | (executables 8 | (names test dump simplify) 9 | (modules test dump simplify) 10 | (libraries unix dolog batteries minicli bst)) 11 | -------------------------------------------------------------------------------- /src/simplify.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | 3 | module CLI = Minicli.CLI 4 | module L = BatList 5 | 6 | let square x = 7 | x *. x 8 | 9 | module P = struct 10 | 11 | type t = float * float * float 12 | 13 | let dist (x, y, z) (x', y', z') = 14 | sqrt (square (x -. x') +. square (y -. y') +. square (z -. z')) 15 | 16 | (* avg. list of points to a single point *) 17 | let average points = 18 | let n = float (L.length points) in 19 | let (u, v, w) = 20 | L.fold_left (fun (x, y, z) (x', y', z') -> 21 | (x +. x', y +. y', z +. z') 22 | ) (0., 0., 0.) points in 23 | (u /. n, v /. n, w /. n) 24 | end 25 | 26 | module BST = Bst.Bisec_tree.Make (P) 27 | 28 | type filename = string 29 | 30 | let with_in_file (fn: filename) (f: in_channel -> 'a): 'a = 31 | let input = open_in_bin fn in 32 | let res = f input in 33 | close_in input; 34 | res 35 | 36 | let with_out_file (fn: filename) (f: out_channel -> 'a): 'a = 37 | let output = open_out_bin fn in 38 | let res = f output in 39 | close_out output; 40 | res 41 | 42 | let map_on_lines_of_file (fn: filename) (f: string -> 'a): 'a list = 43 | with_in_file fn (fun input -> 44 | let res, exn = L.unfold_exc (fun () -> f (input_line input)) in 45 | if exn = End_of_file then res 46 | else raise exn 47 | ) 48 | 49 | let main () = 50 | Log.color_on (); 51 | Log.set_log_level Log.INFO; 52 | let _argc, args = CLI.init () in 53 | let k = CLI.get_int ["-k"] args in 54 | let points = 55 | let all_points = 56 | map_on_lines_of_file "data/bunny.txt" (fun line -> 57 | Scanf.sscanf line "%f %f %f" (fun x y z -> (x, y, z)) 58 | ) in 59 | Array.of_list all_points in 60 | let tree = BST.(create k Two_bands) points in 61 | let summarized = BST.simplify tree in 62 | L.iteri (fun i group -> 63 | let fn = sprintf "test_%05d.txt" i in 64 | with_out_file fn (fun out -> 65 | L.iter (fun (x, y, z) -> 66 | fprintf out "%f %f %f\n" x y z 67 | ) group 68 | ); 69 | printf "%s\n" fn 70 | ) summarized; 71 | let averaged = L.map P.average summarized in 72 | L.iter (fun (x, y, z) -> 73 | eprintf "%f %f %f\n" x y z 74 | ) averaged 75 | 76 | let () = main () 77 | -------------------------------------------------------------------------------- /src/test.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | 3 | module A = BatArray 4 | module L = List 5 | module Log = Dolog.Log 6 | 7 | let square x = 8 | x *. x 9 | 10 | let rng = Random.State.make_self_init () 11 | 12 | module P = struct 13 | type t = float * float 14 | let dist (x, y) (x', y') = 15 | sqrt (square (x -. x') +. square (y -. y')) 16 | let rand () = 17 | (Random.State.float rng 1.0, Random.State.float rng 1.0) 18 | end 19 | 20 | module BST = Bst.Bisec_tree.Make (P) 21 | 22 | let with_out_file fn f = 23 | let output = open_out_bin fn in 24 | let res = f output in 25 | close_out output; 26 | res 27 | 28 | let list_to_file fn to_string l = 29 | with_out_file fn (fun out -> 30 | L.iter (fun x -> fprintf out "%s\n" (to_string x)) l 31 | ) 32 | 33 | let array_to_file fn to_string a = 34 | with_out_file fn (fun out -> 35 | A.iter (fun x -> fprintf out "%s\n" (to_string x)) a 36 | ) 37 | 38 | (* nearest neighbor brute force search *) 39 | let nearest_brute_force query points = 40 | A.fold_left (fun (p, d) p' -> 41 | let d' = P.dist query p' in 42 | if d' < d then 43 | (p', d') 44 | else 45 | (p, d) 46 | ) (query, max_float) points 47 | 48 | (* all neighbors within [tol] of [query] using brute force *) 49 | let neighbors_brute_force query tol points = 50 | A.fold_left (fun acc p -> 51 | if P.dist query p <= tol then 52 | p :: acc 53 | else 54 | acc 55 | ) [] points 56 | 57 | let any_neighbor_brute_force query tol points = 58 | match neighbors_brute_force query tol points with 59 | | [] -> false (* not any neighbor *) 60 | | _ -> true (* some *) 61 | 62 | let partition_brute_force query tol points = 63 | A.fold_left (fun (ok, ko) p -> 64 | if P.dist query p <= tol then 65 | (p :: ok, ko) 66 | else 67 | (ok, p :: ko) 68 | ) ([], []) points 69 | 70 | (* measure time spent in f in seconds *) 71 | let wall_clock_time f = 72 | let start = Unix.gettimeofday () in 73 | let res = f () in 74 | let stop = Unix.gettimeofday () in 75 | let delta_t = stop -. start in 76 | (delta_t, res) 77 | 78 | let progress_callback curr total = 79 | Log.info "done: %.3f" (float curr /. float total) 80 | 81 | let main () = 82 | Log.color_on (); 83 | Log.set_log_level Log.INFO; 84 | (* on-line algorithm *) 85 | (* let points_100k = A.init 100_000 (fun _ -> P.rand ()) in *) 86 | (* let bst_100k = BST.create_sample 1000 16 points_100k in 87 | * assert(BST.length bst_100k = 100_000); 88 | * (\* Log.info "bst_100k:\n%s" (BST.to_string bst_100k); *\) 89 | * assert(BST.check bst_100k); *) 90 | (* N rand points *) 91 | let nb_points = 1000 in 92 | let points = A.init nb_points (fun _ -> P.rand ()) in 93 | let tree_k1 = BST.(create 1 Two_bands) points in 94 | let tree_k50 = BST.(create 50 Two_bands points) in 95 | (* check tree lengths *) 96 | assert(BST.length tree_k1 = nb_points); 97 | assert(BST.length tree_k50 = nb_points); 98 | (* check tree invariant *) 99 | assert(BST.check tree_k1); 100 | assert(BST.check tree_k50); 101 | (* addr and add test *) 102 | let points_1000 = A.init 1000 (fun _ -> P.rand ()) in 103 | let points_500 = A.sub points_1000 0 500 in 104 | let tree_500 = BST.(create 1 Two_bands points_500) in 105 | let tree_1000 = ref tree_500 in 106 | (* Log.info "tree_500:\n%s" (BST.to_string tree_500); *) 107 | for i = 500 to 999 do 108 | let p = points_1000.(i) in 109 | let addr = BST.get_addr p tree_500 in 110 | (* Log.info "addr: %s" (Bst.Bisec_tree.string_of_addr addr); *) 111 | tree_1000 := BST.add p addr !tree_1000; 112 | assert(BST.check !tree_1000) 113 | done; 114 | assert(BST.length tree_500 = 500); 115 | assert(BST.length !tree_1000 = 1000); 116 | (* Log.info "tree_1000:\n%s" (BST.to_string !tree_1000); *) 117 | (* check all points are in the tree *) 118 | let n = L.length (BST.to_list tree_k1) in 119 | assert(n = nb_points); 120 | let m = L.length (BST.to_list tree_k50) in 121 | assert(n = m); 122 | Log.info "testing if all points can be found..."; 123 | assert( 124 | A.for_all 125 | (fun ((x, y) as p) -> 126 | let found = BST.mem p tree_k1 in 127 | if not found then 128 | Log.error "not found: %f %f" x y; 129 | found 130 | ) points 131 | ); 132 | Log.info "testing neighbor queries..."; 133 | assert( 134 | A.for_all 135 | (fun p -> 136 | let tol = Random.State.float rng 1.0 in 137 | let brute_points = neighbors_brute_force p tol points in 138 | let smart_points = BST.neighbors p tol tree_k1 in 139 | Log.debug "tol: %f card: %d" tol (L.length smart_points); 140 | L.sort compare brute_points = L.sort compare smart_points 141 | ) points 142 | ); 143 | Log.info "testing any_neighbor queries..."; 144 | assert( 145 | A.for_all 146 | (fun p -> 147 | let tol = Random.State.float rng 1.0 in 148 | any_neighbor_brute_force p tol points = BST.any_neighbor p tol tree_k1 149 | ) points 150 | ); 151 | Log.info "testing partition..."; 152 | assert( 153 | A.for_all 154 | (fun p -> 155 | let tol = Random.State.float rng 1.0 in 156 | let ok_brute, ko_brute = partition_brute_force p tol points in 157 | let ok_smart = BST.neighbors p tol tree_k1 in 158 | let ok, ko = BST.partition p tol tree_k1 in 159 | let nok, nko = L.(length ok, length ko) in 160 | Log.debug "tol: %f card: %d" tol nko; 161 | assert(nok + nko = nb_points); 162 | (L.sort compare ok_brute = L.sort compare ok) && 163 | (L.sort compare ok = L.sort compare ok_smart) && 164 | (L.sort compare ko_brute = L.sort compare ko) 165 | ) points 166 | ); 167 | Log.info "testing NN queries..."; 168 | assert( 169 | A.for_all 170 | (fun p -> 171 | let brute_points = nearest_brute_force p points in 172 | let smart_points = BST.nearest_neighbor p tree_k1 in 173 | brute_points = smart_points 174 | ) points 175 | ); 176 | (* time construction of tree for many points Vs heuristic *) 177 | let many_points = A.init 1_000_000 (fun _ -> P.rand ()) in 178 | (* inspect distribution of distances *) 179 | let dists = BST.sample_distances 1000 many_points in 180 | let dists_fn = "dists_1000.txt" in 181 | array_to_file dists_fn string_of_float dists; 182 | let dt1, big_tree = 183 | wall_clock_time (fun () -> BST.(create 10 Two_bands many_points)) in 184 | Log.info "dt1: %f" dt1; 185 | assert(BST.check big_tree); 186 | Log.info "NN query times"; 187 | for _ = 1 to 10 do 188 | let q = P.rand () in 189 | let dt, res = 190 | wall_clock_time (fun () -> nearest_brute_force q many_points) in 191 | let dt', res' = 192 | wall_clock_time (fun () -> BST.nearest_neighbor q big_tree) in 193 | assert(res = res'); 194 | Log.info "dt: %f dt': %f accel: %.1f" dt dt' (dt /. dt') 195 | done; 196 | Log.info "neighbors query times"; 197 | for _ = 1 to 10 do 198 | let q = P.rand () in 199 | let tol = 0.1 in 200 | let dt, brute_neighbors = 201 | wall_clock_time (fun () -> neighbors_brute_force q tol many_points) in 202 | let dt', smart_neighbors = 203 | wall_clock_time (fun () -> BST.neighbors q tol big_tree) in 204 | let reference = L.sort compare brute_neighbors in 205 | assert(reference = L.sort compare smart_neighbors); 206 | Log.info "dt: %f dt': %f accel: %.1f" dt dt' (dt /. dt'); 207 | done 208 | 209 | let () = main () 210 | --------------------------------------------------------------------------------