├── .travis.yml ├── Setup.hs ├── .gitignore ├── README.md ├── app-src ├── Data │ └── Point2d.hs ├── Tests │ ├── DynamicTest.hs │ └── StaticTest.hs └── Benchmarks │ └── KDTBenchmark.hs ├── LICENSE ├── changelog.md ├── kdt.cabal └── lib-src └── Data ├── KdTree ├── Dynamic.hs └── Static.hs └── KdMap ├── Dynamic.hs └── Static.hs /.travis.yml: -------------------------------------------------------------------------------- 1 | language: haskell 2 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | cabal-dev 3 | *.o 4 | *.hi 5 | *.chi 6 | *.chs.h 7 | .virthualenv 8 | *_flymake.hs 9 | .cabal-sandbox 10 | cabal.sandbox.config 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | kdt [![Build Status](https://travis-ci.org/giogadi/kdt.svg?branch=master)](https://travis-ci.org/giogadi/kdt) 2 | === 3 | 4 | Fast and flexible k-d trees for the following point queries: 5 | 6 | * Nearest neighbor 7 | * all neighbors within given radius 8 | * k nearest neighbors 9 | * all neighbors within given range 10 | 11 | Check out the documentation [on Hackage](http://hackage.haskell.org/package/kdt). 12 | 13 | Benchmarks can be found [on the wiki](https://github.com/giogadi/kdt/wiki/Benchmarks). 14 | -------------------------------------------------------------------------------- /app-src/Data/Point2d.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_HADDOCK hide #-} 2 | 3 | {-# LANGUAGE DeriveGeneric #-} 4 | 5 | module Data.Point2d where 6 | 7 | import Control.DeepSeq 8 | import Control.DeepSeq.Generics (genericRnf) 9 | import GHC.Generics 10 | import Test.QuickCheck 11 | 12 | data Point2d = Point2d Double Double deriving (Show, Eq, Ord, Generic) 13 | instance NFData Point2d where rnf = genericRnf 14 | 15 | pointAsList2d :: Point2d -> [Double] 16 | pointAsList2d (Point2d x y) = [x, y] 17 | 18 | distSqr2d :: Point2d -> Point2d -> Double 19 | distSqr2d (Point2d x1 y1) (Point2d x2 y2) = let dx = x2 - x1 20 | dy = y2 - y1 21 | in dx*dx + dy*dy 22 | 23 | instance Arbitrary Point2d where 24 | arbitrary = do 25 | x <- arbitrary 26 | y <- arbitrary 27 | return (Point2d x y) 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 Luis G. Torres 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included 12 | in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 18 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 19 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 20 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | # 0.2.6 2 | * Fixed some failing tests (thanks to @nh2) 3 | 4 | # 0.2.5 5 | * Fixed a minor compile error under GHC 9 caused by ambiguous use of "singleton". 6 | 7 | # 0.2.4 8 | * Now exporting KdMap.Static.TreeNode to facilitate advanced usage of the library. 9 | 10 | # 0.2.3 11 | * For internal priority queue implementation, use the heap library instead of pqueue library in order to build on ghc 7.10. 12 | 13 | # 0.2.2 14 | * Relax lower version bound on QuickCheck to 2.5. 15 | 16 | # 0.2.1 17 | * Relax upper version constraint for MonadRandom (benchmarking code) 18 | * Add Data.Point2d as dependency of executables so tests and benchmarks can be built from the archive downloaded on Hackage. 19 | 20 | # 0.2.0 21 | * Lots and lots of renaming all throughout to more closely match terminology used in `containers`. 22 | * Removed kdt library dependency on QuickCheck (if not building testing code). 23 | * Removed testing module Point2d from public API 24 | * All structures now have Show instance 25 | * Static variants now have functions for dynamically inserting new points into existing structure, with caveat that these functions do not maintain balanced tree structure. 26 | -------------------------------------------------------------------------------- /kdt.cabal: -------------------------------------------------------------------------------- 1 | -- Initial kdt.cabal generated by cabal init. For further documentation, 2 | -- see http://haskell.org/cabal/users-guide/ 3 | 4 | name: kdt 5 | version: 0.2.6 6 | synopsis: Fast and flexible k-d trees for various types of point queries. 7 | description: This package includes static and dynamic versions of k-d trees, 8 | as well as \"Map\" variants that store data at each point in the 9 | k-d tree structure. Supports nearest neighbor, 10 | k nearest neighbors, points within a given radius, and points 11 | within a given range. 12 | To learn to use this package, start with the documentation for 13 | the "Data.KdTree.Static" module. 14 | homepage: https://github.com/giogadi/kdt 15 | license: MIT 16 | license-file: LICENSE 17 | author: Luis G. Torres 18 | maintainer: lgtorres42@gmail.com 19 | copyright: Luis G. Torres, 2014 20 | category: Data 21 | build-type: Simple 22 | extra-source-files: changelog.md 23 | cabal-version: >=1.10 24 | source-repository head 25 | type: git 26 | location: https://github.com/giogadi/kdt.git 27 | branch: master 28 | 29 | library 30 | exposed-modules: Data.KdMap.Static, 31 | Data.KdTree.Static, 32 | Data.KdMap.Dynamic, 33 | Data.KdTree.Dynamic 34 | -- other-modules: 35 | other-extensions: DeriveGeneric, TemplateHaskell 36 | ghc-options: -Wall -O3 37 | -- ghc-prof-options: -Wall -O3 -fprof-auto 38 | build-depends: base >=4.6 && <5, 39 | deepseq >=1.3, 40 | heap >=1.0.0, 41 | deepseq-generics >=0.2.0.0 42 | hs-source-dirs: lib-src 43 | default-language: Haskell2010 44 | 45 | Test-Suite StaticTest 46 | type: exitcode-stdio-1.0 47 | main-is: Tests/StaticTest.hs 48 | other-modules: Data.Point2d 49 | hs-source-dirs: app-src 50 | ghc-options: -Wall -O3 51 | build-depends: base >=4.6 && <5, 52 | kdt -any, 53 | containers >= 0.6.0.1, 54 | QuickCheck >=2.5, 55 | deepseq >=1.3, 56 | deepseq-generics >=0.2.0.0 57 | default-language: Haskell2010 58 | 59 | Test-Suite DynamicTest 60 | type: exitcode-stdio-1.0 61 | main-is: Tests/DynamicTest.hs 62 | other-modules: Data.Point2d 63 | hs-source-dirs: app-src 64 | ghc-options: -Wall -O3 65 | build-depends: base >=4.6 && <5, 66 | kdt -any, 67 | containers >= 0.6.0.1, 68 | QuickCheck >=2.5, 69 | deepseq >=1.3, 70 | deepseq-generics >=0.2.0.0 71 | default-language: Haskell2010 72 | 73 | benchmark KDTBenchmark 74 | type: exitcode-stdio-1.0 75 | main-is: Benchmarks/KDTBenchmark.hs 76 | other-modules: Data.Point2d 77 | hs-source-dirs: app-src 78 | ghc-options: -Wall -O3 79 | -- ghc-prof-options: -Wall -O3 -fprof-auto 80 | -- "-with-rtsopts=-p" 81 | build-depends: base >=4.6 && <5, 82 | kdt -any, 83 | MonadRandom >= 0.1.12, 84 | mersenne-random-pure64 >=0.2.0.4, 85 | criterion >=1.0.0.0, 86 | QuickCheck >=2.5, 87 | heap >=1.0.0, 88 | deepseq >=1.3, 89 | deepseq-generics >=0.2.0.0 90 | default-language: Haskell2010 91 | -------------------------------------------------------------------------------- /app-src/Tests/DynamicTest.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell, CPP #-} 2 | 3 | import qualified Data.KdMap.Static as KDM 4 | import Data.KdMap.Dynamic 5 | 6 | import Control.Monad (unless) 7 | import Data.Bits 8 | import Data.List 9 | import qualified Data.Set as Set 10 | import Data.Set (isSubsetOf) 11 | import Data.Point2d 12 | import System.Exit (exitFailure) 13 | import Test.QuickCheck 14 | 15 | #if MIN_VERSION_QuickCheck(2,7,0) 16 | #else 17 | import Test.QuickCheck.All 18 | #endif 19 | 20 | import Tests.TestHelpers (nearestsLinear, withinDistanceOfKthNearest) 21 | 22 | testElements :: [p] -> [(p, Int)] 23 | testElements ps = zip ps [1 ..] 24 | 25 | checkLogNTrees :: Real a => PointAsListFn a p -> SquaredDistanceFn a p -> [p] -> Bool 26 | checkLogNTrees p2l d2 ps = 27 | let lengthIsLogN kdm = length (subtreeSizes kdm) == popCount (size kdm) 28 | in all lengthIsLogN $ scanl insertPair (emptyWithDist p2l d2) $ testElements ps 29 | 30 | prop_logNTrees :: [Point2d] -> Bool 31 | prop_logNTrees = checkLogNTrees pointAsList2d distSqr2d 32 | 33 | checkTreeSizesPowerOf2 :: Real a => PointAsListFn a p -> 34 | SquaredDistanceFn a p -> 35 | [p] -> 36 | Bool 37 | checkTreeSizesPowerOf2 p2l d2 ps = 38 | let sizesPowerOf2 = all ((== 1) . popCount) . subtreeSizes 39 | in all sizesPowerOf2 $ scanl insertPair (emptyWithDist p2l d2) $ testElements ps 40 | 41 | prop_treeSizesPowerOf2 :: [Point2d] -> Bool 42 | prop_treeSizesPowerOf2 = checkTreeSizesPowerOf2 pointAsList2d distSqr2d 43 | 44 | checkNumElements :: Real a => PointAsListFn a p -> SquaredDistanceFn a p -> [p] -> Bool 45 | checkNumElements p2l d2 ps = 46 | let numsMatch (num, kdm) = size kdm == num && num == sum (subtreeSizes kdm) 47 | in all numsMatch $ zip [0..] $ scanl insertPair (emptyWithDist p2l d2) $ testElements ps 48 | 49 | prop_validNumElements :: [Point2d] -> Bool 50 | prop_validNumElements = checkNumElements pointAsList2d distSqr2d 51 | 52 | checkNearestConsistentWithLinear :: (Eq p, Real a) => PointAsListFn a p -> 53 | SquaredDistanceFn a p -> 54 | ([p], p) -> 55 | Bool 56 | checkNearestConsistentWithLinear p2l d2 (ps, query) = 57 | let dkdt = batchInsert (emptyWithDist p2l d2) $ testElements ps 58 | dkdtAnswer = nearest dkdt query 59 | in dkdtAnswer `elem` nearestsLinear p2l (testElements ps) query 60 | 61 | prop_nearestConsistentWithLinear :: Point2d -> Property 62 | prop_nearestConsistentWithLinear query = 63 | forAll (listOf1 arbitrary) $ \xs -> 64 | checkNearestConsistentWithLinear pointAsList2d distSqr2d (xs, query) 65 | 66 | checkKNearestConsistentWithLinear :: (Ord p, Real a) => PointAsListFn a p -> 67 | SquaredDistanceFn a p -> 68 | ([p], Int, p) -> 69 | Bool 70 | checkKNearestConsistentWithLinear p2l d2 (ps, k, query) = 71 | let dkdt = batchInsert (emptyWithDist p2l d2) $ testElements ps 72 | dkdtAnswer = kNearest dkdt k query 73 | possibleNearest = withinDistanceOfKthNearest p2l (testElements ps) query k 74 | in Set.fromList dkdtAnswer `isSubsetOf` Set.fromList possibleNearest 75 | 76 | prop_kNearestConsistentWithLinear :: Point2d -> Property 77 | prop_kNearestConsistentWithLinear query = 78 | forAll (listOf1 arbitrary) $ \xs -> 79 | forAll (choose (1, length xs)) $ \k -> 80 | checkKNearestConsistentWithLinear pointAsList2d distSqr2d (xs, k, query) 81 | 82 | checkInRadiusEqualToBatch :: (Ord p, Real a) => PointAsListFn a p -> 83 | SquaredDistanceFn a p -> 84 | ([p], a, p) -> 85 | Bool 86 | checkInRadiusEqualToBatch p2l d2 (ps, radius, query) = 87 | let kdt = KDM.buildWithDist p2l d2 $ testElements ps 88 | kdtAnswer = KDM.inRadius kdt radius query 89 | dkdt = batchInsert (emptyWithDist p2l d2) $ testElements ps 90 | dkdtAnswer = inRadius dkdt radius query 91 | in sort dkdtAnswer == sort kdtAnswer 92 | 93 | prop_checkInRadiusEqualToBatch :: Point2d -> Property 94 | prop_checkInRadiusEqualToBatch query = 95 | forAll (listOf1 arbitrary) $ \xs -> 96 | forAll (choose (0.0, 1000.0)) $ \radius -> 97 | checkInRadiusEqualToBatch pointAsList2d distSqr2d (xs, radius, query) 98 | 99 | prop_checkInRangeEqualToBatch :: ([Point2d], Point2d, Point2d) -> Bool 100 | prop_checkInRangeEqualToBatch ([], _, _) = True 101 | prop_checkInRangeEqualToBatch (xs, lowers, uppers) 102 | | and $ zipWith (<) (pointAsList2d lowers) (pointAsList2d uppers) = 103 | let kdt = KDM.buildWithDist pointAsList2d distSqr2d $ testElements xs 104 | kdtAnswer = KDM.inRange kdt lowers uppers 105 | dkdt = batchInsert (emptyWithDist pointAsList2d distSqr2d) $ testElements xs 106 | dkdtAnswer = inRange dkdt lowers uppers 107 | in sort dkdtAnswer == sort kdtAnswer 108 | | otherwise = True 109 | 110 | 111 | -- Run all tests 112 | return [] 113 | runTests :: IO Bool 114 | runTests = $(forAllProperties) $ 115 | -- Vastly increase success counts; finds more bugs and our properties are cheap. 116 | quickCheckWithResult stdArgs{ maxSuccess = 2000 } 117 | 118 | main :: IO () 119 | main = do 120 | success <- runTests 121 | unless success exitFailure 122 | -------------------------------------------------------------------------------- /lib-src/Data/KdTree/Dynamic.hs: -------------------------------------------------------------------------------- 1 | module Data.KdTree.Dynamic 2 | ( -- * Usage 3 | 4 | -- $usage 5 | 6 | -- * Reference 7 | 8 | -- ** Types 9 | PointAsListFn 10 | , SquaredDistanceFn 11 | , KdTree 12 | -- ** Dynamic /k/-d tree construction 13 | , empty 14 | , singleton 15 | , emptyWithDist 16 | , singletonWithDist 17 | -- ** Insertion 18 | , insert 19 | -- ** Query 20 | , nearest 21 | , inRadius 22 | , kNearest 23 | , inRange 24 | , toList 25 | , null 26 | , size 27 | -- ** Utilities 28 | , defaultSqrDist 29 | ) where 30 | 31 | import Prelude hiding (null) 32 | 33 | import qualified Data.Foldable as F 34 | 35 | import qualified Data.KdMap.Dynamic as DKDM 36 | import Data.KdMap.Dynamic (PointAsListFn, SquaredDistanceFn, defaultSqrDist) 37 | 38 | -- $usage 39 | -- 40 | -- The 'KdTree' is a dynamic variant of 41 | -- @Data.KdTree.Static.@'Data.KdTree.Static.KdTree' that allows for 42 | -- insertion of new points into an existing 'KdTree'. This algorithm 43 | -- was implemented using a 44 | -- . 45 | -- 46 | -- Here's an example of interleaving 3D point insertions and point 47 | -- queries using 'KdTree': 48 | -- 49 | -- @ 50 | -- >>> let dkdt = 'singleton' point3dAsList (Point3D 0.0 0.0 0.0) 51 | -- 52 | -- >>> let dkdt' = 'insert' dkdt (Point3D 1.0 1.0 1.0) 53 | -- 54 | -- >>> 'nearest' dkdt' (Point3D 0.4 0.4 0.4) 55 | -- Point3D {x = 0.0, y = 0.0, z = 0.0} 56 | -- 57 | -- >>> let dkdt'' = 'insert' dkdt' (Point3D 0.5 0.5 0.5) 58 | -- 59 | -- >>> 'nearest' dkdt'' (Point3D 0.4 0.4 0.4) 60 | -- Point3D {x = 0.5, y = 0.5, z = 0.5} 61 | -- @ 62 | -- 63 | -- Check out @Data.KdMap.Dynamic.@'Data.KdMap.Dynamic.KdMap' if you 64 | -- want to associate a value with each point in your tree structure. 65 | 66 | -- | A dynamic /k/-d tree structure that stores points of type @p@ 67 | -- with axis values of type @a@. 68 | newtype KdTree a p = KdTree (DKDM.KdMap a p ()) 69 | 70 | instance F.Foldable (KdTree a) where 71 | foldr f z (KdTree dkdMap) = DKDM.foldrWithKey (f . fst) z dkdMap 72 | 73 | instance (Show a, Show p) => Show (KdTree a p) where 74 | show (KdTree kdm) = "KdTree " ++ show kdm 75 | 76 | -- | Generates an empty 'KdTree' with a user-specified distance function. 77 | emptyWithDist :: Real a => PointAsListFn a p -> SquaredDistanceFn a p -> KdTree a p 78 | emptyWithDist p2l d2 = KdTree $ DKDM.emptyWithDist p2l d2 79 | 80 | -- | Generates an empty 'KdTree' with the default distance function. 81 | empty :: Real a => PointAsListFn a p -> KdTree a p 82 | empty p2l = emptyWithDist p2l $ defaultSqrDist p2l 83 | 84 | -- | Returns whether the 'KdTree' is empty. 85 | null :: KdTree a p -> Bool 86 | null (KdTree dkdMap) = DKDM.null dkdMap 87 | 88 | -- | Generates a 'KdTree' with a single point using a 89 | -- user-specified distance function. 90 | singletonWithDist :: Real a => PointAsListFn a p 91 | -> SquaredDistanceFn a p 92 | -> p 93 | -> KdTree a p 94 | singletonWithDist p2l d2 p = KdTree $ DKDM.singletonWithDist p2l d2 (p, ()) 95 | 96 | -- | Generates a 'KdTree' with a single point using the default 97 | -- distance function. 98 | singleton :: Real a => PointAsListFn a p -> p -> KdTree a p 99 | singleton p2l = singletonWithDist p2l $ defaultSqrDist p2l 100 | 101 | -- | Adds a given point to a 'KdTree'. 102 | -- 103 | -- Average time complexity per insert for /n/ inserts: /O(log^2(n))/. 104 | insert :: Real a => KdTree a p -> p -> KdTree a p 105 | insert (KdTree dkdMap) p = KdTree $ DKDM.insert dkdMap p () 106 | 107 | -- | Given a 'KdTree' and a query point, returns the nearest point 108 | -- in the 'KdTree' to the query point. 109 | -- 110 | -- Average time complexity: /O(log^2(n))/. 111 | nearest :: Real a => KdTree a p -> p -> p 112 | nearest (KdTree dkdMap) = fst . DKDM.nearest dkdMap 113 | 114 | -- | Given a 'KdTree', a query point, and a number @k@, returns the 115 | -- @k@ nearest points in the 'KdTree' to the query point. 116 | -- 117 | -- Neighbors are returned in order of increasing distance from query 118 | -- point. 119 | -- 120 | -- Average time complexity: /log(k) * log^2(n)/ for /k/ nearest 121 | -- neighbors on a structure with /n/ data points. 122 | -- 123 | -- Worst case time complexity: /n * log(k)/ for /k/ nearest neighbors 124 | -- on a structure with /n/ data points. 125 | kNearest :: Real a => KdTree a p -> Int -> p -> [p] 126 | kNearest (KdTree dkdMap) k query = 127 | map fst $ DKDM.kNearest dkdMap k query 128 | 129 | -- | Given a 'KdTree', a query point, and a radius, returns all 130 | -- points in the 'KdTree' that are within the given radius of the 131 | -- query points. 132 | -- 133 | -- Points are not returned in any particular order. 134 | -- 135 | -- Worst case time complexity: /O(n)/ for /n/ data points. 136 | inRadius :: Real a => KdTree a p -> a -> p -> [p] 137 | inRadius (KdTree dkdMap) radius query = 138 | map fst $ DKDM.inRadius dkdMap radius query 139 | 140 | -- | Finds all points in a 'KdTree' with points within a given range, 141 | -- where the range is specified as a set of lower and upper bounds. 142 | -- 143 | -- Points are not returned in any particular order. 144 | -- 145 | -- Worst case time complexity: /O(n)/ for n data points and a range 146 | -- that spans all the points. 147 | inRange :: Real a => KdTree a p 148 | -> p -- ^ lower bounds of range 149 | -> p -- ^ upper bounds of range 150 | -> [p] -- ^ all points within given range 151 | inRange (KdTree dkdMap) lowers uppers = 152 | map fst $ DKDM.inRange dkdMap lowers uppers 153 | 154 | -- | Returns the number of elements in the 'KdTree'. 155 | -- 156 | -- Time complexity: /O(1)/ 157 | size :: KdTree a p -> Int 158 | size (KdTree dkdMap) = DKDM.size dkdMap 159 | 160 | -- | Returns a list of all the points in the 'KdTree'. 161 | -- 162 | -- Time complexity: /O(n)/ 163 | toList :: KdTree a p -> [p] 164 | toList (KdTree dkdMap) = DKDM.keys dkdMap 165 | -------------------------------------------------------------------------------- /app-src/Tests/StaticTest.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell, CPP #-} 2 | 3 | import Data.KdMap.Static as KDM 4 | 5 | import Control.Monad (unless) 6 | import Data.List (null, sort, sortBy) 7 | import qualified Data.Set as Set 8 | import Data.Set (isSubsetOf) 9 | import Data.Point2d 10 | import System.Exit (exitFailure) 11 | import Test.QuickCheck (Property, arbitrary, forAllProperties, forAll, listOf1, choose, Args(..), stdArgs, quickCheckWithResult) 12 | 13 | #if MIN_VERSION_QuickCheck(2,7,0) 14 | #else 15 | import Test.QuickCheck.All 16 | #endif 17 | 18 | import Tests.TestHelpers (nearestsLinear, withinDistanceOfKthNearest) 19 | 20 | testElements :: [p] -> [(p, Int)] 21 | testElements ps = zip ps [0 ..] 22 | 23 | prop_validTree :: Property 24 | prop_validTree = 25 | forAll (listOf1 arbitrary) $ isValid . build pointAsList2d . testElements 26 | 27 | checkElements :: (Ord p, Real a) => PointAsListFn a p -> [p] -> Bool 28 | checkElements pointAsList ps = 29 | let kdt = build pointAsList $ testElements ps 30 | in sort (assocs kdt) == sort (testElements ps) 31 | 32 | prop_sameElements :: Property 33 | prop_sameElements = forAll (listOf1 arbitrary) $ checkElements pointAsList2d 34 | 35 | checkNumElements :: Real a => PointAsListFn a p -> [p] -> Bool 36 | checkNumElements pointAsList ps = 37 | let kdm = build pointAsList $ testElements ps 38 | in size kdm == length ps 39 | 40 | prop_validNumElements :: Property 41 | prop_validNumElements = forAll (listOf1 arbitrary) $ checkNumElements pointAsList2d 42 | 43 | checkNearestConsistentWithLinear :: (Eq p, Real a, Show p) => KDM.PointAsListFn a p -> ([p], p) -> Bool 44 | checkNearestConsistentWithLinear pointAsList (ps, query) = 45 | let kdt = build pointAsList $ testElements ps 46 | in nearest kdt query `elem` nearestsLinear pointAsList (testElements ps) query 47 | 48 | prop_nearestConsistentWithLinear :: Point2d -> Property 49 | prop_nearestConsistentWithLinear query = 50 | forAll (listOf1 arbitrary) $ \xs -> 51 | checkNearestConsistentWithLinear pointAsList2d (xs, query) 52 | 53 | inRadiusLinear :: Real a => KDM.PointAsListFn a p -> [(p, v)] -> p -> a -> [(p, v)] 54 | inRadiusLinear pointAsList xs query radius = 55 | filter ((<= radius * radius) . defaultSqrDist pointAsList query . fst) xs 56 | 57 | checkInRadiusEqualToLinear :: (Ord p, Real a) => KDM.PointAsListFn a p -> a -> ([p], p) -> Bool 58 | checkInRadiusEqualToLinear pointAsList radius (ps, query) = 59 | let kdt = build pointAsList $ testElements ps 60 | kdtNear = inRadius kdt radius query 61 | linearNear = inRadiusLinear pointAsList (testElements ps) query radius 62 | in sort kdtNear == sort linearNear 63 | 64 | prop_inRadiusEqualToLinear :: Point2d -> Property 65 | prop_inRadiusEqualToLinear query = 66 | forAll (listOf1 arbitrary) $ \xs -> 67 | forAll (choose (0.0, 1000.0)) $ \radius -> 68 | checkInRadiusEqualToLinear pointAsList2d radius (xs, query) 69 | 70 | checkKNearestConsistentWithLinear :: (Ord p, Real a) => KDM.PointAsListFn a p -> Int -> ([p], p) -> Bool 71 | checkKNearestConsistentWithLinear pointAsList k (xs, query) = 72 | let kdt = build pointAsList $ testElements xs 73 | kdtKNear = kNearest kdt k query 74 | possibleNearest = withinDistanceOfKthNearest pointAsList (testElements xs) query k 75 | in Set.fromList kdtKNear `isSubsetOf` Set.fromList possibleNearest 76 | 77 | prop_kNearestConsistentWithLinear :: Point2d -> Property 78 | prop_kNearestConsistentWithLinear query = 79 | forAll (listOf1 arbitrary) $ \xs -> 80 | forAll (choose (1, length xs)) $ \k -> 81 | checkKNearestConsistentWithLinear pointAsList2d k (xs, query) 82 | 83 | checkKNearestSorted :: (Eq p, Real a) => KDM.PointAsListFn a p -> ([p], p) -> Bool 84 | checkKNearestSorted _ ([], _) = True 85 | checkKNearestSorted pointAsList (ps, query) = 86 | let kdt = build pointAsList $ testElements ps 87 | kNearestDists = 88 | map (defaultSqrDist pointAsList query . fst) $ kNearest kdt (length ps) query 89 | in kNearestDists == sort kNearestDists 90 | 91 | prop_kNearestSorted :: Point2d -> Property 92 | prop_kNearestSorted query = 93 | forAll (listOf1 arbitrary) $ \xs -> 94 | checkKNearestSorted pointAsList2d (xs, query) 95 | 96 | rangeLinear :: Real a => KDM.PointAsListFn a p -> [(p, v)] -> p -> p -> [(p, v)] 97 | rangeLinear pointAsList xs lowers uppers = 98 | let valInRange a lower upper = lower <= a && a <= upper 99 | lowersAsList = pointAsList lowers 100 | uppersAsList = pointAsList uppers 101 | pointInRange (p, _) = 102 | and $ zipWith3 valInRange (pointAsList p) lowersAsList uppersAsList 103 | in filter pointInRange xs 104 | 105 | prop_rangeEqualToLinear :: ([Point2d], Point2d, Point2d) -> Bool 106 | prop_rangeEqualToLinear (xs, lowers, uppers) 107 | | Data.List.null xs = True 108 | | and $ zipWith (<) (pointAsList2d lowers) (pointAsList2d uppers) = 109 | let linear = rangeLinear pointAsList2d (testElements xs) lowers uppers 110 | kdt = build pointAsList2d $ testElements xs 111 | kdtPoints = inRange kdt lowers uppers 112 | in sort linear == sort kdtPoints 113 | | otherwise = True 114 | 115 | prop_equalAxisValueSameElems :: Property 116 | prop_equalAxisValueSameElems = 117 | forAll (listOf1 arbitrary) $ \xs@(Point2d x y : _) -> 118 | checkElements pointAsList2d $ Point2d x (y + 1) : xs 119 | 120 | prop_equalAxisValueEqualToLinear :: Point2d -> Property 121 | prop_equalAxisValueEqualToLinear query = 122 | forAll (listOf1 arbitrary) $ \xs@(Point2d x y : _) -> 123 | checkNearestConsistentWithLinear pointAsList2d (Point2d x (y + 1) : xs, query) 124 | 125 | prop_unbalancedInsertValid :: Property 126 | prop_unbalancedInsertValid = 127 | forAll (listOf1 arbitrary) $ 128 | isValid . batchInsertUnbalanced (empty pointAsList2d) . testElements 129 | 130 | prop_unbalancedInsertNNConsistentWithLinear :: Point2d -> Property 131 | prop_unbalancedInsertNNConsistentWithLinear query = 132 | forAll (listOf1 arbitrary) $ \xs -> 133 | let kdm = batchInsertUnbalanced (empty pointAsList2d) $ testElements xs 134 | in nearest kdm query `elem` nearestsLinear pointAsList2d (testElements xs) query 135 | 136 | -- Run all tests 137 | return [] 138 | runTests :: IO Bool 139 | runTests = $(forAllProperties) $ 140 | -- Vastly increase success counts; finds more bugs and our properties are cheap. 141 | quickCheckWithResult stdArgs{ maxSuccess = 2000 } 142 | 143 | main :: IO () 144 | main = do 145 | success <- runTests 146 | unless success exitFailure 147 | -------------------------------------------------------------------------------- /app-src/Benchmarks/KDTBenchmark.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | 3 | import Data.Point2d 4 | import Data.KdTree.Static as KDT 5 | import Data.KdTree.Dynamic as DKDT 6 | 7 | import Control.DeepSeq 8 | import Control.Monad 9 | import qualified Control.Monad.Random as CMR 10 | import Criterion.Main 11 | import Data.List 12 | import Data.Maybe 13 | import qualified Data.Heap as Q 14 | import System.Random.Mersenne.Pure64 15 | 16 | zeroOnePointSampler :: CMR.Rand PureMT Point2d 17 | zeroOnePointSampler = 18 | liftM2 Point2d 19 | (CMR.getRandomR (0.0, 1.0)) 20 | (CMR.getRandomR (0.0, 1.0)) 21 | 22 | -- Input: List of pairs of points, where first of each pair is the 23 | -- point to add to the dynamic KdTree, and the second is the point to 24 | -- query for nearest neighbor 25 | interleaveBuildQuery :: [(Point2d, Point2d)] -> [Point2d] 26 | interleaveBuildQuery = 27 | let f :: (DKDT.KdTree Double Point2d, [Point2d]) -> 28 | (Point2d, Point2d) -> 29 | (DKDT.KdTree Double Point2d, [Point2d]) 30 | f (kdt, accList) (treePt, queryPt) = 31 | let newKdt = DKDT.insert kdt treePt 32 | near = DKDT.nearest newKdt queryPt 33 | in (newKdt, near : accList) 34 | start = (DKDT.emptyWithDist pointAsList2d distSqr2d, []) 35 | in snd . foldl' f start 36 | 37 | -- nn implemented with optimized linear scan 38 | nearestLinear :: [Point2d] -> Point2d -> Point2d 39 | nearestLinear [] _ = error "nearestLinear called on an empty list!" 40 | nearestLinear (ph : pt) query = fst $ foldl' f (ph, distSqr2d query ph) pt 41 | where {-# INLINE f #-} 42 | f b@(_, dBest) x 43 | | d < dBest = (x, d) 44 | | otherwise = b 45 | where d = distSqr2d query x 46 | 47 | pointsInRadiusLinear :: [Point2d] -> Double -> Point2d -> [Point2d] 48 | pointsInRadiusLinear ps radius query = 49 | filter ((<= radius * radius) . distSqr2d query) ps 50 | 51 | -- knn implemented with priority queue 52 | kNearestNeighborsLinear :: [Point2d] -> Int -> Point2d -> [Point2d] 53 | kNearestNeighborsLinear ps k query = 54 | reverse $ map snd $ Q.toAscList $ foldl' f (Q.empty :: Q.MaxPrioHeap Double Point2d) ps 55 | where f q p = let insertBounded queue dist x 56 | | Q.size queue < k = Q.insert (dist, x) queue 57 | | otherwise = 58 | let ((farthestDist, _), rest) = fromJust $ Q.view queue 59 | in if dist < farthestDist 60 | then Q.insert (dist, x) rest 61 | else queue 62 | in insertBounded q (distSqr2d query p) p 63 | 64 | rangeLinear :: Point2d -> Point2d -> [Point2d] -> [Point2d] 65 | rangeLinear lowers uppers xs = 66 | let lowersAsList = pointAsList2d lowers 67 | uppersAsList = pointAsList2d uppers 68 | valInRange l x u = l <= x && x <= u 69 | pointInRange p = 70 | and $ zipWith3 valInRange 71 | lowersAsList (pointAsList2d p) uppersAsList 72 | in filter pointInRange xs 73 | 74 | pointToBounds :: Point2d -> Double -> (Point2d, Point2d) 75 | pointToBounds (Point2d x y) w = 76 | (Point2d (x - w) (y - w), Point2d (x + w) (y + w)) 77 | 78 | rangeOfPointLinear :: [Point2d] -> Double -> Point2d -> [Point2d] 79 | rangeOfPointLinear xs w q = 80 | let (lowers, uppers) = pointToBounds q w 81 | in rangeLinear lowers uppers xs 82 | 83 | rangeOfPointKdt :: KDT.KdTree Double Point2d -> Double -> Point2d -> [Point2d] 84 | rangeOfPointKdt kdt w q = 85 | let (lowers, uppers) = pointToBounds q w 86 | in KDT.inRange kdt lowers uppers 87 | 88 | linearInterleaveBuildQuery :: [(Point2d, Point2d)] -> [Point2d] 89 | linearInterleaveBuildQuery = 90 | let f :: ([Point2d], [Point2d]) -> (Point2d, Point2d) -> ([Point2d], [Point2d]) 91 | f (ps, accList) (structPt, queryPt) = 92 | let ps' = structPt : ps 93 | near = nearestLinear ps' queryPt 94 | in (ps', near : accList) 95 | in snd . foldl' f ([], []) 96 | 97 | main :: IO () 98 | main = 99 | let seed = 1 100 | treePoints = CMR.evalRand (sequence $ repeat zeroOnePointSampler) $ pureMT seed 101 | kdtN n = KDT.buildWithDist pointAsList2d distSqr2d $ take n treePoints 102 | queryPoints = CMR.evalRand (sequence $ repeat zeroOnePointSampler) $ pureMT (seed + 1) 103 | buildKdtBench n = bench (show n) $ nf kdtN n 104 | nnKdtBench nq np = 105 | bench ("np-" ++ show np ++ "-nq-" ++ show nq) $ 106 | nf (map (KDT.nearest (kdtN np))) (take nq queryPoints) 107 | inRadKdtBench nq r np = 108 | bench ("np-" ++ show np ++ "-nq-" ++ show nq ++ "-r-" ++ show r) $ 109 | nf (map (KDT.inRadius (kdtN np) r)) (take nq queryPoints) 110 | knnKdtBench nq k np = 111 | bench ("np-" ++ show np ++ "-nq-" ++ show nq ++ "-k-" ++ show k) $ 112 | nf (map (KDT.kNearest (kdtN np) k)) (take nq queryPoints) 113 | rangeKdtBench nq w np = 114 | bench ("np-" ++ show np ++ "-nq-" ++ show nq ++ "-w-" ++ show w) $ 115 | nf (map $ rangeOfPointKdt (kdtN np) w) (take nq queryPoints) 116 | nnLinearBench nq np = 117 | bench ("np-" ++ show np ++ "-nq-" ++ show nq) $ 118 | nf (map (nearestLinear (take np treePoints))) (take nq queryPoints) 119 | inRadLinearBench nq r np = 120 | bench ("np-" ++ show np ++ "-nq-" ++ show nq ++ "-r-" ++ show r) $ 121 | nf (map $ pointsInRadiusLinear (take np treePoints) r) (take nq queryPoints) 122 | rangeLinearBench nq w np = 123 | bench ("np-" ++ show np ++ "-nq-" ++ show nq ++ "-w-" ++ show w) $ 124 | nf (map $ rangeOfPointLinear (take np treePoints) w) (take nq queryPoints) 125 | knnLinearBench nq k np = 126 | bench ("np-" ++ show np ++ "-nq-" ++ show nq ++ "-k-" ++ show k) $ 127 | nf (map $ kNearestNeighborsLinear (take np treePoints) k) (take nq queryPoints) 128 | nniDkdtBench n = 129 | bench ("n-" ++ show n) $ 130 | nf interleaveBuildQuery (zip (take n treePoints) (take n queryPoints)) 131 | numQueries = 100 132 | pointSetSizes = [100, 1000, 10000, 100000] 133 | radius = 0.05 134 | numNeighbors = 10 135 | rangeWidth = 0.05 136 | in defaultMain [ 137 | bgroup "linear-nn" $ map (nnLinearBench numQueries) pointSetSizes, 138 | bgroup "linear-rad" $ map (inRadLinearBench numQueries radius) pointSetSizes, 139 | bgroup "linear-knn" $ map (knnLinearBench numQueries numNeighbors) pointSetSizes, 140 | bgroup "linear-range" $ map (rangeLinearBench numQueries rangeWidth) pointSetSizes, 141 | bgroup "kdt-build" $ map buildKdtBench pointSetSizes, 142 | bgroup "kdt-nn" $ map (nnKdtBench numQueries) pointSetSizes, 143 | bgroup "kdt-rad" $ map (inRadKdtBench numQueries radius) pointSetSizes, 144 | bgroup "kdt-knn" $ map (knnKdtBench numQueries numNeighbors) pointSetSizes, 145 | bgroup "kdt-range" $ map (rangeKdtBench numQueries rangeWidth) pointSetSizes, 146 | bgroup "dkdt-nn" $ map nniDkdtBench pointSetSizes 147 | ] 148 | -------------------------------------------------------------------------------- /lib-src/Data/KdMap/Dynamic.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric, CPP #-} 2 | 3 | module Data.KdMap.Dynamic 4 | ( -- * Usage 5 | 6 | -- $usage 7 | 8 | -- * Reference 9 | 10 | -- ** Types 11 | PointAsListFn 12 | , SquaredDistanceFn 13 | , KdMap 14 | -- ** Dynamic /k/-d map construction 15 | , empty 16 | , singleton 17 | , emptyWithDist 18 | , singletonWithDist 19 | -- ** Insertion 20 | , insert 21 | , insertPair 22 | , batchInsert 23 | -- ** Query 24 | , nearest 25 | , inRadius 26 | , kNearest 27 | , inRange 28 | , assocs 29 | , keys 30 | , elems 31 | , null 32 | , size 33 | -- ** Folds 34 | , foldrWithKey 35 | -- ** Utilities 36 | , defaultSqrDist 37 | -- ** Internal (for testing) 38 | , subtreeSizes 39 | ) where 40 | 41 | import Prelude hiding (null) 42 | 43 | #if MIN_VERSION_base(4,8,0) 44 | #else 45 | import Control.Applicative hiding (empty) 46 | import Data.Foldable 47 | import Data.Traversable 48 | #endif 49 | 50 | import Data.Bits 51 | import Data.Function 52 | import qualified Data.List as L 53 | 54 | import Control.DeepSeq 55 | import Control.DeepSeq.Generics (genericRnf) 56 | import GHC.Generics 57 | 58 | import qualified Data.KdMap.Static as KDM 59 | import Data.KdMap.Static (PointAsListFn, SquaredDistanceFn, defaultSqrDist) 60 | 61 | -- $usage 62 | -- 63 | -- The 'KdMap' is a variant of 64 | -- @Data.KdTree.Dynamic.@'Data.KdTree.Dynamic.KdTree' where each point 65 | -- in the tree is associated with some data. It is the dynamic variant 66 | -- of @Data.KdMap.Static.@'Data.KdMap.Static.KdMap'. 67 | -- 68 | -- Here's an example of interleaving point-value insertions and point 69 | -- queries using 'KdMap', where points are 3D points and values are 70 | -- 'String's: 71 | -- 72 | -- @ 73 | -- >>> let dkdm = 'singleton' point3dAsList ((Point3D 0.0 0.0 0.0), \"First\") 74 | -- 75 | -- >>> let dkdm' = 'insert' dkdm ((Point3D 1.0 1.0 1.0), \"Second\") 76 | -- 77 | -- >>> 'nearest' dkdm' (Point3D 0.4 0.4 0.4) 78 | -- (Point3D {x = 0.0, y = 0.0, z = 0.0}, \"First\") 79 | -- 80 | -- >>> let dkdm'' = 'insert' dkdm' ((Point3D 0.5 0.5 0.5), \"Third\") 81 | -- 82 | -- >>> 'nearest' dkdm'' (Point3D 0.4 0.4 0.4) 83 | -- (Point3D {x = 0.5, y = 0.5, z = 0.5}, \"Third\") 84 | -- @ 85 | 86 | -- | A dynamic /k/-d tree structure that stores points of type @p@ 87 | -- with axis values of type @a@. Additionally, each point is 88 | -- associated with a value of type @v@. 89 | data KdMap a p v = KdMap 90 | { _trees :: [KDM.KdMap a p v] 91 | , _pointAsList :: PointAsListFn a p 92 | , _distSqr :: SquaredDistanceFn a p 93 | , _numNodes :: Int 94 | } deriving Generic 95 | instance (NFData a, NFData p, NFData v) => NFData (KdMap a p v) where rnf = genericRnf 96 | 97 | instance (Show a, Show p, Show v) => Show (KdMap a p v) where 98 | show kdm = "KdMap " ++ show (_trees kdm) 99 | 100 | instance Functor (KdMap a p) where 101 | fmap f dkdMap = dkdMap { _trees = map (fmap f) $ _trees dkdMap } 102 | 103 | -- | Performs a foldr over each point-value pair in the 'KdMap'. 104 | foldrWithKey :: ((p, v) -> b -> b) -> b -> KdMap a p v -> b 105 | foldrWithKey f z dkdMap = L.foldr (flip $ KDM.foldrWithKey f) z $ _trees dkdMap 106 | 107 | instance Foldable (KdMap a p) where 108 | foldr f = foldrWithKey (f . snd) 109 | 110 | instance Traversable (KdMap a p) where 111 | traverse f (KdMap t p d n) = 112 | KdMap <$> traverse (traverse f) t <*> pure p <*> pure d <*> pure n 113 | 114 | -- | Generates an empty 'KdMap' with a user-specified distance function. 115 | emptyWithDist :: PointAsListFn a p -> SquaredDistanceFn a p -> KdMap a p v 116 | emptyWithDist p2l d2 = KdMap [] p2l d2 0 117 | 118 | -- | Returns whether the 'KdMap' is empty. 119 | null :: KdMap a p v -> Bool 120 | null (KdMap [] _ _ _) = True 121 | null _ = False 122 | 123 | -- | Generates a 'KdMap' with a single point-value pair using a 124 | -- user-specified distance function. 125 | singletonWithDist :: Real a => PointAsListFn a p 126 | -> SquaredDistanceFn a p 127 | -> (p, v) 128 | -> KdMap a p v 129 | singletonWithDist p2l d2 (k, v) = 130 | KdMap [KDM.buildWithDist p2l d2 [(k, v)]] p2l d2 1 131 | 132 | -- | Generates an empty 'KdMap' with the default distance function. 133 | empty :: Real a => PointAsListFn a p -> KdMap a p v 134 | empty p2l = emptyWithDist p2l $ defaultSqrDist p2l 135 | 136 | -- | Generates a 'KdMap' with a single point-value pair using the 137 | -- default distance function. 138 | singleton :: Real a => PointAsListFn a p -> (p, v) -> KdMap a p v 139 | singleton p2l = singletonWithDist p2l $ defaultSqrDist p2l 140 | 141 | -- | Adds a given point-value pair to a 'KdMap'. 142 | -- 143 | -- Average time complexity per insert for /n/ inserts: /O(log^2(n))/. 144 | insert :: Real a => KdMap a p v -> p -> v -> KdMap a p v 145 | insert (KdMap trees p2l d2 n) k v = 146 | let bitList = map ((1 .&.) . (n `shiftR`)) [0..] 147 | (onesPairs, theRestPairs) = span ((== 1) . fst) $ zip bitList trees 148 | ((_, ones), (_, theRest)) = (unzip onesPairs, unzip theRestPairs) 149 | newTree = KDM.buildWithDist p2l d2 $ (k, v) : L.concatMap KDM.assocs ones 150 | in KdMap (newTree : theRest) p2l d2 $ n + 1 151 | 152 | -- | Same as 'insert', but takes point and value as a pair. 153 | insertPair :: Real a => KdMap a p v -> (p, v) -> KdMap a p v 154 | insertPair t = uncurry (insert t) 155 | 156 | -- | Given a 'KdMap' and a query point, returns the point-value pair in 157 | -- the 'KdMap' with the point nearest to the query. 158 | -- 159 | -- Average time complexity: /O(log^2(n))/. 160 | nearest :: Real a => KdMap a p v -> p -> (p, v) 161 | nearest (KdMap ts _ d2 _) query = 162 | let nearests = map (`KDM.nearest` query) ts 163 | --in if Data.List.null nearests 164 | in if L.null nearests 165 | then error "Called nearest on empty KdMap." 166 | else L.minimumBy (compare `on` (d2 query . fst)) nearests 167 | 168 | -- | Given a 'KdMap', a query point, and a number @k@, returns the 169 | -- @k@ point-value pairs with the nearest points to the query. 170 | -- 171 | -- Neighbors are returned in order of increasing distance from query 172 | -- point. 173 | -- 174 | -- Average time complexity: /log(k) * log^2(n)/ for /k/ nearest 175 | -- neighbors on a structure with /n/ data points. 176 | -- 177 | -- Worst case time complexity: /n * log(k)/ for /k/ nearest neighbors 178 | -- on a structure with /n/ data points. 179 | kNearest :: Real a => KdMap a p v -> Int -> p -> [(p, v)] 180 | kNearest (KdMap trees _ d2 _) k query = 181 | let neighborSets = map (\t -> KDM.kNearest t k query) trees 182 | in take k $ L.foldr merge [] neighborSets 183 | where merge [] ys = ys 184 | merge xs [] = xs 185 | merge xs@(x:xt) ys@(y:yt) 186 | | distX <= distY = x : merge xt ys 187 | | otherwise = y : merge xs yt 188 | where distX = d2 query $ fst x 189 | distY = d2 query $ fst y 190 | 191 | -- | Given a 'KdMap', a query point, and a radius, returns all 192 | -- point-value pairs in the 'KdTree' with points within the given 193 | -- radius of the query point. 194 | -- 195 | -- Points are not returned in any particular order. 196 | -- 197 | -- Worst case time complexity: /O(n)/ for /n/ data points. 198 | inRadius :: Real a => KdMap a p v -> a -> p -> [(p, v)] 199 | inRadius (KdMap trees _ _ _) radius query = 200 | L.concatMap (\t -> KDM.inRadius t radius query) trees 201 | 202 | -- | Finds all point-value pairs in a 'KdMap' with points within a 203 | -- given range, where the range is specified as a set of lower and 204 | -- upper bounds. 205 | -- 206 | -- Points are not returned in any particular order. 207 | -- 208 | -- Worst case time complexity: /O(n)/ for n data points and a range 209 | -- that spans all the points. 210 | inRange :: Real a => KdMap a p v 211 | -> p -- ^ lower bounds of range 212 | -> p -- ^ upper bounds of range 213 | -> [(p, v)] -- ^ point-value pairs within given 214 | -- range 215 | inRange (KdMap trees _ _ _) lowers uppers = 216 | L.concatMap (\t -> KDM.inRange t lowers uppers) trees 217 | 218 | -- | Returns the number of elements in the 'KdMap'. 219 | -- 220 | -- Time complexity: /O(1)/ 221 | size :: KdMap a p v -> Int 222 | size (KdMap _ _ _ n) = n 223 | 224 | -- | Returns a list of all the point-value pairs in the 'KdMap'. 225 | -- 226 | -- Time complexity: /O(n)/ for /n/ data points. 227 | assocs :: KdMap a p v -> [(p, v)] 228 | assocs (KdMap trees _ _ _) = L.concatMap KDM.assocs trees 229 | 230 | -- | Returns all points in the 'KdMap'. 231 | -- 232 | -- Time complexity: /O(n)/ for /n/ data points. 233 | keys :: KdMap a p v -> [p] 234 | keys = map fst . assocs 235 | 236 | -- | Returns all values in the 'KdMap'. 237 | -- 238 | -- Time complexity: /O(n)/ for /n/ data points. 239 | elems :: KdMap a p v -> [v] 240 | elems = map snd . assocs 241 | 242 | -- | Inserts a list of point-value pairs into the 'KdMap'. 243 | -- 244 | -- TODO: This will be made far more efficient than simply repeatedly 245 | -- inserting. 246 | batchInsert :: Real a => KdMap a p v -> [(p, v)] -> KdMap a p v 247 | batchInsert = L.foldl' insertPair 248 | 249 | -- | Returns size of each internal /k/-d tree that makes up the 250 | -- dynamic structure. For internal testing use. 251 | subtreeSizes :: KdMap a p v -> [Int] 252 | subtreeSizes (KdMap trees _ _ _) = map KDM.size trees 253 | -------------------------------------------------------------------------------- /lib-src/Data/KdTree/Static.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | 3 | module Data.KdTree.Static 4 | ( -- * Introduction 5 | 6 | -- $intro 7 | 8 | -- * Usage 9 | 10 | -- $usage 11 | 12 | -- * Variants 13 | 14 | -- ** Dynamic /k/-d trees 15 | 16 | -- $dkdtrees 17 | 18 | -- ** /k/-d maps 19 | 20 | -- $kdmaps 21 | 22 | -- * Advanced 23 | 24 | -- ** Custom distance functions 25 | 26 | -- $customdistancefunctions 27 | 28 | -- ** Axis value types 29 | 30 | -- $axisvaluetypes 31 | 32 | -- * Reference 33 | 34 | -- ** Types 35 | PointAsListFn 36 | , SquaredDistanceFn 37 | , KdTree 38 | -- ** /k/-d tree construction 39 | , empty 40 | , emptyWithDist 41 | , singleton 42 | , singletonWithDist 43 | , build 44 | , buildWithDist 45 | , insertUnbalanced 46 | , batchInsertUnbalanced 47 | -- ** Query 48 | , nearest 49 | , inRadius 50 | , kNearest 51 | , inRange 52 | , toList 53 | , null 54 | , size 55 | -- ** Utilities 56 | , defaultSqrDist 57 | ) where 58 | 59 | import Control.DeepSeq 60 | import Control.DeepSeq.Generics (genericRnf) 61 | import GHC.Generics 62 | 63 | import qualified Data.Foldable as F 64 | import Prelude hiding (null) 65 | 66 | import qualified Data.KdMap.Static as KDM 67 | import Data.KdMap.Static (PointAsListFn, SquaredDistanceFn, defaultSqrDist) 68 | 69 | -- $intro 70 | -- 71 | -- Let's say you have a large set of 3D points called /data points/, 72 | -- and you'd like to be able to quickly perform /point queries/ on the 73 | -- data points. One example of a point query is the /nearest neighbor/ 74 | -- query: given a set of data points @points@ and a query point @p@, 75 | -- which point in @points@ is closest to @p@? 76 | -- 77 | -- We can efficiently solve the nearest neighbor query (along with 78 | -- many other types of point queries) if we appropriately organize the 79 | -- data points. One such method of organization is called the /k/-d 80 | -- tree algorithm, which is implemented in this module. 81 | 82 | -- $usage 83 | -- 84 | -- Let's say you have a list of 3D data points, and each point is of 85 | -- type @Point3d@: 86 | -- 87 | -- @ 88 | -- data Point3d = Point3d { x :: Double 89 | -- , y :: Double 90 | -- , z :: Double 91 | -- } deriving Show 92 | -- @ 93 | -- 94 | -- We call a point's individual values /axis values/ (i.e., @x@, @y@, 95 | -- and @z@ in the case of @Point3d@). 96 | -- 97 | -- In order to generate a /k/-d tree of @Point3d@'s, we need to define 98 | -- a 'PointAsListFn' that expresses the point's axis values as a list: 99 | -- 100 | -- @ 101 | -- point3dAsList :: Point3d -> [Double] 102 | -- point3dAsList (Point3d x y z) = [x, y, z] 103 | -- @ 104 | -- 105 | -- Now we can build a 'KdTree' structure from a list of data points 106 | -- and perform a nearest neighbor query as follows: 107 | -- 108 | -- @ 109 | -- >>> let dataPoints = [(Point3d 0.0 0.0 0.0), (Point3d 1.0 1.0 1.0)] 110 | -- 111 | -- >>> let kdt = 'build' point3dAsList dataPoints 112 | -- 113 | -- >>> let queryPoint = Point3d 0.1 0.1 0.1 114 | -- 115 | -- >>> 'nearest' kdt queryPoint 116 | -- Point3d {x = 0.0, y = 0.0, z = 0.0} 117 | -- @ 118 | 119 | -- $dkdtrees 120 | -- 121 | -- The 'KdTree' structure is meant for static sets of data points. If 122 | -- you need to insert points into an existing /k/-d tree, check out 123 | -- @Data.KdTree.Dynamic.@'Data.KdTree.Dynamic.KdTree'. 124 | 125 | -- $kdmaps 126 | -- 127 | -- If you need to associate additional data with each point in the 128 | -- tree (i.e., points are /keys/ associated with /values/), check out 129 | -- @Data.KdMap.Static.@'Data.KdMap.Static.KdMap' and 130 | -- @Data.KdMap.Dynamic.@'Data.KdMap.Dynamic.KdMap' for static and dynamic 131 | -- variants of this functionality. Please /do not/ try to fake this 132 | -- functionality with a 'KdTree' by augmenting your point type with 133 | -- the extra data; you're gonna have a bad time. 134 | 135 | -- $customdistancefunctions 136 | -- 137 | -- You may have noticed in the previous use case that we never 138 | -- specified what "nearest" means for our points. By default, 139 | -- 'build' uses a Euclidean distance function that is sufficient 140 | -- in most cases. However, point queries are typically faster on a 141 | -- 'KdTree' built with a user-specified custom distance 142 | -- function. Let's generate a 'KdTree' using a custom distance 143 | -- function. 144 | -- 145 | -- One idiosyncrasy about 'KdTree' is that custom distance functions 146 | -- are actually specified as /squared distance/ functions 147 | -- ('SquaredDistanceFn'). This means that your custom distance 148 | -- function must return the /square/ of the actual distance between 149 | -- two points. This is for efficiency: regular distance functions 150 | -- often require expensive square root computations, whereas in our 151 | -- case, the squared distance works fine and doesn't require computing 152 | -- any square roots. Here's an example of a squared distance function 153 | -- for @Point3d@: 154 | -- 155 | -- @ 156 | -- point3dSquaredDistance :: Point3d -> Point3d -> Double 157 | -- point3dSquaredDistance (Point3d x1 y1 z1) (Point3d x2 y2 z2) = 158 | -- let dx = x1 - x2 159 | -- dy = y1 - y2 160 | -- dz = z1 - z2 161 | -- in dx * dx + dy * dy + dz * dz 162 | -- @ 163 | -- 164 | -- We can build a 'KdTree' using our custom distance function as follows: 165 | -- 166 | -- @ 167 | -- >>> let kdt = 'buildWithDist' point3dAsList point3dSquaredDistance points 168 | -- @ 169 | 170 | -- $axisvaluetypes 171 | -- 172 | -- In the above examples, we used a point type with axis values of 173 | -- type 'Double'. We can in fact use axis values of any type that is 174 | -- an instance of the 'Real' typeclass. This means you can use points 175 | -- that are composed of 'Double's, 'Int's, 'Float's, and so on: 176 | -- 177 | -- @ 178 | -- data Point2i = Point2i Int Int 179 | -- 180 | -- point2iAsList :: Point2i -> [Int] 181 | -- point2iAsList (Point2i x y) = [x, y] 182 | -- 183 | -- kdt :: [Point2i] -> KdTree Int Point2i 184 | -- kdt dataPoints = 'build' point2iAsList dataPoints 185 | -- @ 186 | 187 | -- | A /k/-d tree structure that stores points of type @p@ with axis 188 | -- values of type @a@. 189 | newtype KdTree a p = KdTree (KDM.KdMap a p ()) deriving Generic 190 | instance (NFData a, NFData p) => NFData (KdTree a p) where rnf = genericRnf 191 | 192 | instance (Show a, Show p) => Show (KdTree a p) where 193 | show (KdTree kdm) = "KdTree " ++ show kdm 194 | 195 | instance F.Foldable (KdTree a) where 196 | foldr f z (KdTree kdMap) = KDM.foldrWithKey (f . fst) z kdMap 197 | 198 | -- | Builds an empty 'KdTree'. 199 | empty :: Real a => PointAsListFn a p -> KdTree a p 200 | empty = KdTree . KDM.empty 201 | 202 | -- | Builds an empty 'KdTree' using a user-specified squared distance 203 | -- function. 204 | emptyWithDist :: Real a => PointAsListFn a p 205 | -> SquaredDistanceFn a p 206 | -> KdTree a p 207 | emptyWithDist p2l d2 = KdTree $ KDM.emptyWithDist p2l d2 208 | 209 | -- | Builds a 'KdTree' with a single point. 210 | singleton :: Real a => PointAsListFn a p -> p -> KdTree a p 211 | singleton p2l p = KdTree $ KDM.singleton p2l (p, ()) 212 | 213 | -- | Builds a 'KdTree' with a single point using a user-specified 214 | -- squared distance function. 215 | singletonWithDist :: Real a => PointAsListFn a p 216 | -> SquaredDistanceFn a p 217 | -> p 218 | -> KdTree a p 219 | singletonWithDist p2l d2 p = KdTree $ KDM.singletonWithDist p2l d2 (p, ()) 220 | 221 | null :: KdTree a p -> Bool 222 | null (KdTree kdm) = KDM.null kdm 223 | 224 | -- | Builds a 'KdTree' from a list of data points using a default 225 | -- squared distance function 'defaultSqrDist'. 226 | -- 227 | -- Average complexity: /O(n * log(n))/ for /n/ data points. 228 | -- 229 | -- Worst case time complexity: /O(n^2)/ for /n/ data points. 230 | -- 231 | -- Worst case space complexity: /O(n)/ for /n/ data points. 232 | build :: Real a => PointAsListFn a p 233 | -> [p] -- ^ non-empty list of data points to be stored in the /k/-d tree 234 | -> KdTree a p 235 | build pointAsList ps = 236 | KdTree $ KDM.build pointAsList $ zip ps $ repeat () 237 | 238 | -- | Builds a 'KdTree' from a list of data points using a 239 | -- user-specified squared distance function. 240 | -- 241 | -- Average time complexity: /O(n * log(n))/ for /n/ data points. 242 | -- 243 | -- Worst case time complexity: /O(n^2)/ for /n/ data points. 244 | -- 245 | -- Worst case space complexity: /O(n)/ for /n/ data points. 246 | buildWithDist :: Real a => PointAsListFn a p 247 | -> SquaredDistanceFn a p 248 | -> [p] 249 | -> KdTree a p 250 | buildWithDist pointAsList distSqr ps = 251 | KdTree $ KDM.buildWithDist pointAsList distSqr $ zip ps $ repeat () 252 | 253 | -- | Inserts a point into a 'KdTree'. This can potentially 254 | -- cause the internal tree structure to become unbalanced. If the tree 255 | -- becomes too unbalanced, point queries will be very inefficient. If 256 | -- you need to perform lots of point insertions on an already existing 257 | -- /k/-d tree, check out 258 | -- @Data.KdTree.Dynamic.@'Data.KdTree.Dynamic.KdTree'. 259 | -- 260 | -- Average complexity: /O(log(n))/ for /n/ data points. 261 | -- 262 | -- Worse case time complexity: /O(n)/ for /n/ data points. 263 | insertUnbalanced :: Real a => KdTree a p -> p -> KdTree a p 264 | insertUnbalanced (KdTree kdm) p = KdTree $ KDM.insertUnbalanced kdm p () 265 | 266 | -- | Inserts a list of points into a 'KdTree'. This can potentially 267 | -- cause the internal tree structure to become unbalanced, which leads 268 | -- to inefficient point queries. 269 | -- 270 | -- Average complexity: /O(n * log(n))/ for /n/ data points. 271 | -- 272 | -- Worst case time complexity: /O(n^2)/ for /n/ data points. 273 | batchInsertUnbalanced :: Real a => KdTree a p -> [p] -> KdTree a p 274 | batchInsertUnbalanced (KdTree kdm) ps = 275 | KdTree $ KDM.batchInsertUnbalanced kdm $ zip ps $ repeat () 276 | 277 | -- | Given a 'KdTree' and a query point, returns the nearest point 278 | -- in the 'KdTree' to the query point. 279 | -- 280 | -- Average time complexity: /O(log(n))/ for /n/ data points. 281 | -- 282 | -- Worst case time complexity: /O(n)/ for /n/ data points. 283 | -- 284 | -- Throws an error if called on an empty 'KdTree'. 285 | nearest :: Real a => KdTree a p -> p -> p 286 | nearest (KdTree t) query 287 | | KDM.null t = error "Attempted to call nearest on an empty KdTree." 288 | | otherwise = fst $ KDM.nearest t query 289 | 290 | -- | Given a 'KdTree', a query point, and a radius, returns all 291 | -- points in the 'KdTree' that are within the given radius of the 292 | -- query point. 293 | -- 294 | -- Points are not returned in any particular order. 295 | -- 296 | -- Worst case time complexity: /O(n)/ for /n/ data points and 297 | -- a radius that subsumes all points in the structure. 298 | inRadius :: Real a => KdTree a p 299 | -> a -- ^ radius 300 | -> p -- ^ query point 301 | -> [p] -- ^ list of points in tree with given 302 | -- radius of query point 303 | inRadius (KdTree t) radius query = map fst $ KDM.inRadius t radius query 304 | 305 | -- | Given a 'KdTree', a query point, and a number @k@, returns the 306 | -- @k@ nearest points in the 'KdTree' to the query point. 307 | -- 308 | -- Neighbors are returned in order of increasing distance from query 309 | -- point. 310 | -- 311 | -- Average time complexity: /log(k) * log(n)/ for /k/ nearest 312 | -- neighbors on a structure with /n/ data points. 313 | -- 314 | -- Worst case time complexity: /n * log(k)/ for /k/ nearest 315 | -- neighbors on a structure with /n/ data points. 316 | kNearest :: Real a => KdTree a p -> Int -> p -> [p] 317 | kNearest (KdTree t) k query = map fst $ KDM.kNearest t k query 318 | 319 | -- | Finds all points in a 'KdTree' with points within a given range, 320 | -- where the range is specified as a set of lower and upper bounds. 321 | -- 322 | -- Points are not returned in any particular order. 323 | -- 324 | -- Worst case time complexity: /O(n)/ for n data points and a range 325 | -- that spans all the points. 326 | inRange :: Real a => KdTree a p 327 | -> p -- ^ lower bounds of range 328 | -> p -- ^ upper bounds of range 329 | -> [p] -- ^ all points within given range 330 | inRange (KdTree t) lower upper = map fst $ KDM.inRange t lower upper 331 | 332 | -- | Returns a list of all the points in the 'KdTree'. 333 | -- 334 | -- Time complexity: /O(n)/ for /n/ data points. 335 | toList :: KdTree a p -> [p] 336 | toList (KdTree t) = KDM.keys t 337 | 338 | -- | Returns the number of elements in the 'KdTree'. 339 | -- 340 | -- Time complexity: /O(1)/ 341 | size :: KdTree a p -> Int 342 | size (KdTree t) = KDM.size t 343 | -------------------------------------------------------------------------------- /lib-src/Data/KdMap/Static.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric, CPP, FlexibleContexts #-} 2 | 3 | module Data.KdMap.Static 4 | ( -- * Usage 5 | 6 | -- $usage 7 | 8 | -- * Reference 9 | 10 | -- ** Types 11 | PointAsListFn 12 | , SquaredDistanceFn 13 | , KdMap 14 | -- ** /k/-d map construction 15 | , empty 16 | , emptyWithDist 17 | , singleton 18 | , singletonWithDist 19 | , build 20 | , buildWithDist 21 | , insertUnbalanced 22 | , batchInsertUnbalanced 23 | -- ** Query 24 | , nearest 25 | , inRadius 26 | , kNearest 27 | , inRange 28 | , assocs 29 | , keys 30 | , elems 31 | , null 32 | , size 33 | -- ** Folds 34 | , foldrWithKey 35 | -- ** Utilities 36 | , defaultSqrDist 37 | -- ** Advanced 38 | , TreeNode(..) 39 | -- ** Internal (for testing) 40 | , isValid 41 | ) where 42 | 43 | import Control.DeepSeq 44 | import Control.DeepSeq.Generics (genericRnf) 45 | import GHC.Generics 46 | 47 | import Control.Applicative hiding (empty) 48 | 49 | #if MIN_VERSION_base(4,8,0) 50 | import Data.Foldable hiding (null) 51 | #else 52 | import Data.Foldable 53 | import Data.Traversable 54 | #endif 55 | 56 | import Prelude hiding (null) 57 | import qualified Data.List as L 58 | import Data.Maybe 59 | import Data.Ord 60 | import qualified Data.Heap as Q 61 | 62 | -- $usage 63 | -- 64 | -- The 'KdMap' is a variant of 'Data.KdTree.Static.KdTree' where each point in 65 | -- the tree is associated with some data. When talking about 'KdMap's, 66 | -- we'll refer to the points and their associated data as the /points/ 67 | -- and /values/ of the 'KdMap', respectively. It might help to think 68 | -- of 'Data.KdTree.Static.KdTree' and 'KdMap' as being analogous to 69 | -- 'Set' and 'Map'. 70 | -- 71 | -- Suppose you wanted to perform point queries on a set of 3D points, 72 | -- where each point is associated with a 'String'. Here's how to build 73 | -- a 'KdMap' of the data and perform a nearest neighbor query (if this 74 | -- doesn't make sense, start with the documentation for 75 | -- 'Data.KdTree.Static.KdTree'): 76 | -- 77 | -- @ 78 | -- >>> let points = [(Point3d 0.0 0.0 0.0), (Point3d 1.0 1.0 1.0)] 79 | -- 80 | -- >>> let valueStrings = [\"First\", \"Second\"] 81 | -- 82 | -- >>> let pointValuePairs = 'zip' points valueStrings 83 | -- 84 | -- >>> let kdm = 'build' point3dAsList pointValuePairs 85 | -- 86 | -- >>> 'nearest' kdm (Point3d 0.1 0.1 0.1) 87 | -- [Point3d {x = 0.0, y = 0.0, z = 0.0}, \"First\"] 88 | -- @ 89 | 90 | -- | A node of a /k/-d tree structure that stores a point of type @p@ 91 | -- with axis values of type @a@. Additionally, each point is 92 | -- associated with a value of type @v@. Note: users typically will not 93 | -- need to use this type, but we export it just in case. 94 | data TreeNode a p v = TreeNode { _treeLeft :: TreeNode a p v 95 | , _treePoint :: (p, v) 96 | , _axisValue :: a 97 | , _treeRight :: TreeNode a p v 98 | } | 99 | Empty 100 | deriving (Generic, Show, Read) 101 | instance (NFData a, NFData p, NFData v) => NFData (TreeNode a p v) where rnf = genericRnf 102 | 103 | mapTreeNode :: (v1 -> v2) -> TreeNode a p v1 -> TreeNode a p v2 104 | mapTreeNode _ Empty = Empty 105 | mapTreeNode f (TreeNode left (k, v) axisValue right) = 106 | TreeNode (mapTreeNode f left) (k, f v) axisValue (mapTreeNode f right) 107 | 108 | -- | Converts a point of type @p@ with axis values of type 109 | -- @a@ into a list of axis values [a]. 110 | type PointAsListFn a p = p -> [a] 111 | 112 | -- | Returns the squared distance between two points of type 113 | -- @p@ with axis values of type @a@. 114 | type SquaredDistanceFn a p = p -> p -> a 115 | 116 | -- | A /k/-d tree structure that stores points of type @p@ with axis 117 | -- values of type @a@. Additionally, each point is associated with a 118 | -- value of type @v@. 119 | data KdMap a p v = KdMap { _pointAsList :: PointAsListFn a p 120 | , _distSqr :: SquaredDistanceFn a p 121 | , _rootNode :: TreeNode a p v 122 | , _size :: Int 123 | } deriving Generic 124 | instance (NFData a, NFData p, NFData v) => NFData (KdMap a p v) where rnf = genericRnf 125 | 126 | instance (Show a, Show p, Show v) => Show (KdMap a p v) where 127 | show (KdMap _ _ rootNode _) = "KdMap " ++ show rootNode 128 | 129 | instance Functor (KdMap a p) where 130 | fmap f kdMap = kdMap { _rootNode = mapTreeNode f (_rootNode kdMap) } 131 | 132 | foldrTreeNode :: ((p, v) -> b -> b) -> b -> TreeNode a p v -> b 133 | foldrTreeNode _ z Empty = z 134 | foldrTreeNode f z (TreeNode left p _ right) = 135 | foldrTreeNode f (f p (foldrTreeNode f z right)) left 136 | 137 | -- | Performs a foldr over each point-value pair in the 'KdMap'. 138 | foldrWithKey :: ((p, v) -> b -> b) -> b -> KdMap a p v -> b 139 | foldrWithKey f z (KdMap _ _ r _) = foldrTreeNode f z r 140 | 141 | instance Foldable (KdMap a p) where 142 | foldr f = foldrWithKey (f . snd) 143 | 144 | traverseTreeNode :: Applicative f => (b -> f c) -> TreeNode a p b -> f (TreeNode a p c) 145 | traverseTreeNode _ Empty = pure Empty 146 | traverseTreeNode f (TreeNode l (p, v) axisValue r) = 147 | TreeNode <$> 148 | traverseTreeNode f l <*> 149 | ((,) p <$> f v) <*> -- would simply be traverse f (p, v), but 150 | -- base-4.6.* doesn't have a Traversable 151 | -- instance for tuples. 152 | pure axisValue <*> 153 | traverseTreeNode f r 154 | 155 | instance Traversable (KdMap a p) where 156 | traverse f (KdMap p d r n) = 157 | KdMap <$> pure p <*> pure d <*> traverseTreeNode f r <*> pure n 158 | 159 | -- | Builds an empty 'KdMap'. 160 | empty :: Real a => PointAsListFn a p -> KdMap a p v 161 | empty p2l = emptyWithDist p2l (defaultSqrDist p2l) 162 | 163 | -- | Builds an empty 'KdMap' using a user-specified squared distance 164 | -- function. 165 | emptyWithDist :: Real a => PointAsListFn a p 166 | -> SquaredDistanceFn a p 167 | -> KdMap a p v 168 | emptyWithDist p2l d2 = KdMap p2l d2 Empty 0 169 | 170 | -- | Returns 'True' if the given 'KdMap' is empty. 171 | null :: KdMap a p v -> Bool 172 | null kdm = _size kdm == 0 173 | 174 | -- | Builds a 'KdMap' with a single point-value pair and a 175 | -- user-specified squared distance function. 176 | singletonWithDist :: Real a => PointAsListFn a p 177 | -> SquaredDistanceFn a p 178 | -> (p, v) 179 | -> KdMap a p v 180 | singletonWithDist p2l d2 (p, v) = 181 | let singletonTreeNode = TreeNode Empty (p, v) (head $ p2l p) Empty 182 | in KdMap p2l d2 singletonTreeNode 1 183 | 184 | -- | Builds a 'KdMap' with a single point-value pair. 185 | singleton :: Real a => PointAsListFn a p -> (p, v) -> KdMap a p v 186 | singleton p2l (p, v) = singletonWithDist p2l (defaultSqrDist p2l) (p, v) 187 | 188 | quickselect :: (b -> b -> Ordering) -> Int -> [b] -> b 189 | quickselect cmp = go 190 | where go _ [] = error "quickselect must be called on a non-empty list." 191 | go k (x:xs) | k < l = go k ys 192 | | k > l = go (k - l - 1) zs 193 | | otherwise = x 194 | where (ys, zs) = L.partition ((== LT) . (`cmp` x)) xs 195 | l = length ys 196 | 197 | -- | Builds a 'KdMap' from a list of pairs of points (of type p) and 198 | -- values (of type v), using a user-specified squared distance 199 | -- function. 200 | -- 201 | -- Average time complexity: /O(n * log(n))/ for /n/ data points. 202 | -- 203 | -- Worst case time complexity: /O(n^2)/ for /n/ data points. 204 | -- 205 | -- Worst case space complexity: /O(n)/ for /n/ data points. 206 | buildWithDist :: Real a => PointAsListFn a p 207 | -> SquaredDistanceFn a p 208 | -> [(p, v)] 209 | -> KdMap a p v 210 | buildWithDist p2l d2 [] = emptyWithDist p2l d2 211 | buildWithDist pointAsList distSqr dataPoints = 212 | let axisValsPointsPairs = zip (map (cycle . pointAsList . fst) dataPoints) dataPoints 213 | in KdMap { _pointAsList = pointAsList 214 | , _distSqr = distSqr 215 | , _rootNode = buildTreeInternal axisValsPointsPairs 216 | , _size = length dataPoints 217 | } 218 | where buildTreeInternal [] = Empty 219 | buildTreeInternal ps = 220 | let n = length ps 221 | (medianAxisVal : _, _) = 222 | quickselect (comparing (head . fst)) (n `div` 2) ps 223 | f ([], _) _ = error "buildKdMap.f: no empty lists allowed!" 224 | f (v : vt, p) (lt, maybeMedian, gt) 225 | | v < medianAxisVal = ((vt, p) : lt, maybeMedian, gt) 226 | | v > medianAxisVal = (lt, maybeMedian, (vt, p) : gt) 227 | | otherwise = 228 | case maybeMedian of 229 | Nothing -> (lt, Just p, gt) 230 | Just _ -> ((vt, p) : lt, maybeMedian, gt) 231 | (leftPoints, maybeMedianPt, rightPoints) = L.foldr f ([], Nothing, []) ps 232 | in TreeNode 233 | { _treeLeft = buildTreeInternal leftPoints 234 | , _treePoint = fromJust maybeMedianPt 235 | , _axisValue = medianAxisVal 236 | , _treeRight = buildTreeInternal rightPoints 237 | } 238 | 239 | -- | A default implementation of squared distance given two points and 240 | -- a 'PointAsListFn'. 241 | defaultSqrDist :: Num a => PointAsListFn a p -> SquaredDistanceFn a p 242 | defaultSqrDist pointAsList k1 k2 = 243 | L.sum $ map (^ (2 :: Int)) $ zipWith (-) (pointAsList k1) (pointAsList k2) 244 | 245 | -- | Builds a 'KdTree' from a list of pairs of points (of type p) and 246 | -- values (of type v) using a default squared distance function 247 | -- 'defaultSqrDist'. 248 | -- 249 | -- Average complexity: /O(n * log(n))/ for /n/ data points. 250 | -- 251 | -- Worst case time complexity: /O(n^2)/ for /n/ data points. 252 | -- 253 | -- Worst case space complexity: /O(n)/ for /n/ data points. 254 | build :: Real a => PointAsListFn a p -> [(p, v)] -> KdMap a p v 255 | build pointAsList = 256 | buildWithDist pointAsList $ defaultSqrDist pointAsList 257 | 258 | -- | Inserts a point-value pair into a 'KdMap'. This can potentially 259 | -- cause the internal tree structure to become unbalanced. If the tree 260 | -- becomes too unbalanced, point queries will be very inefficient. If 261 | -- you need to perform lots of point insertions on an already existing 262 | -- /k/-d map, check out 263 | -- @Data.KdMap.Dynamic.@'Data.KdMap.Dynamic.KdMap'. 264 | -- 265 | -- Average complexity: /O(log(n))/ for /n/ data points. 266 | -- 267 | -- Worst case time complexity: /O(n)/ for /n/ data points. 268 | insertUnbalanced :: Real a => KdMap a p v -> p -> v -> KdMap a p v 269 | insertUnbalanced kdm@(KdMap pointAsList _ rootNode n) p' v' = 270 | kdm { _rootNode = go rootNode (cycle $ pointAsList p'), _size = n + 1 } 271 | where 272 | go _ [] = error "insertUnbalanced.go: no empty lists allowed!" 273 | go Empty (axisValue' : _) = TreeNode Empty (p', v') axisValue' Empty 274 | go t@(TreeNode left _ nodeAxisValue right) (axisValue' : nextAxisValues) 275 | | axisValue' <= nodeAxisValue = t { _treeLeft = go left nextAxisValues } 276 | | otherwise = t { _treeRight = go right nextAxisValues } 277 | 278 | -- | Inserts a list of point-value pairs into a 'KdMap'. This can 279 | -- potentially cause the internal tree structure to become unbalanced, 280 | -- which leads to inefficient point queries. 281 | -- 282 | -- Average complexity: /O(n * log(n))/ for /n/ data points. 283 | -- 284 | -- Worst case time complexity: /O(n^2)/ for /n/ data points. 285 | batchInsertUnbalanced :: Real a => KdMap a p v -> [(p, v)] -> KdMap a p v 286 | batchInsertUnbalanced = foldl' $ \kdm (p, v) -> insertUnbalanced kdm p v 287 | 288 | assocsInternal :: TreeNode a p v -> [(p, v)] 289 | assocsInternal t = go t [] 290 | where go Empty = id 291 | go (TreeNode l p _ r) = go l . (p :) . go r 292 | 293 | -- | Returns a list of all the point-value pairs in the 'KdMap'. 294 | -- 295 | -- Time complexity: /O(n)/ for /n/ data points. 296 | assocs :: KdMap a p v -> [(p, v)] 297 | assocs (KdMap _ _ t _) = assocsInternal t 298 | 299 | -- | Returns all points in the 'KdMap'. 300 | -- 301 | -- Time complexity: /O(n)/ for /n/ data points. 302 | keys :: KdMap a p v -> [p] 303 | keys = map fst . assocs 304 | 305 | -- | Returns all values in the 'KdMap'. 306 | -- 307 | -- Time complexity: /O(n)/ for /n/ data points. 308 | elems :: KdMap a p v -> [v] 309 | elems = map snd . assocs 310 | 311 | -- | Given a 'KdMap' and a query point, returns the point-value pair 312 | -- in the 'KdMap' with the point nearest to the query. 313 | -- 314 | -- Average time complexity: /O(log(n))/ for /n/ data points. 315 | -- 316 | -- Worst case time complexity: /O(n)/ for /n/ data points. 317 | -- 318 | -- Throws error if called on an empty 'KdMap'. 319 | nearest :: Real a => KdMap a p v -> p -> (p, v) 320 | nearest (KdMap _ _ Empty _) _ = 321 | error "Attempted to call nearest on an empty KdMap." 322 | nearest (KdMap pointAsList distSqr t@(TreeNode _ root _ _) _) query = 323 | -- This is an ugly way to kickstart the function but it's faster 324 | -- than using a Maybe. 325 | fst $ go (root, distSqr (fst root) query) (cycle $ pointAsList query) t 326 | where 327 | go _ [] _ = error "nearest.go: no empty lists allowed!" 328 | go bestSoFar _ Empty = bestSoFar 329 | go bestSoFar 330 | (queryAxisValue : qvs) 331 | (TreeNode left (nodeK, nodeV) nodeAxisVal right) = 332 | let better x1@(_, dist1) x2@(_, dist2) = if dist1 < dist2 333 | then x1 334 | else x2 335 | currDist = distSqr query nodeK 336 | bestAfterNode = better ((nodeK, nodeV), currDist) bestSoFar 337 | nearestInTree onsideSubtree offsideSubtree = 338 | let bestAfterOnside = go bestAfterNode qvs onsideSubtree 339 | checkOffsideSubtree = 340 | (queryAxisValue - nodeAxisVal)^(2 :: Int) < snd bestAfterOnside 341 | in if checkOffsideSubtree 342 | then go bestAfterOnside qvs offsideSubtree 343 | else bestAfterOnside 344 | in if queryAxisValue <= nodeAxisVal 345 | then nearestInTree left right 346 | else nearestInTree right left 347 | 348 | -- | Given a 'KdMap', a query point, and a radius, returns all 349 | -- point-value pairs in the 'KdMap' with points within the given 350 | -- radius of the query point. 351 | -- 352 | -- Points are not returned in any particular order. 353 | -- 354 | -- Worst case time complexity: /O(n)/ for /n/ data points and a radius 355 | -- that spans all points in the structure. 356 | inRadius :: Real a => KdMap a p v 357 | -> a -- ^ radius 358 | -> p -- ^ query point 359 | -> [(p, v)] -- ^ list of point-value pairs with 360 | -- points within given radius of query 361 | inRadius (KdMap pointAsList distSqr t _) radius query = 362 | go (cycle $ pointAsList query) t [] 363 | where 364 | go [] _ _ = error "inRadius.go: no empty lists allowed!" 365 | go _ Empty acc = acc 366 | go (queryAxisValue : qvs) (TreeNode left (k, v) nodeAxisVal right) acc = 367 | let onTheLeft = queryAxisValue <= nodeAxisVal 368 | accAfterOnside = if onTheLeft 369 | then go qvs left acc 370 | else go qvs right acc 371 | accAfterOffside = if abs (queryAxisValue - nodeAxisVal) < radius 372 | then if onTheLeft 373 | then go qvs right accAfterOnside 374 | else go qvs left accAfterOnside 375 | else accAfterOnside 376 | accAfterCurrent = if distSqr k query <= radius * radius 377 | then (k, v) : accAfterOffside 378 | else accAfterOffside 379 | in accAfterCurrent 380 | 381 | -- | Given a 'KdMap', a query point, and a number @k@, returns the @k@ 382 | -- point-value pairs with the nearest points to the query. 383 | -- 384 | -- Neighbors are returned in order of increasing distance from query 385 | -- point. 386 | -- 387 | -- Average time complexity: /log(k) * log(n)/ for /k/ nearest 388 | -- neighbors on a structure with /n/ data points. 389 | -- 390 | -- Worst case time complexity: /n * log(k)/ for /k/ nearest 391 | -- neighbors on a structure with /n/ data points. 392 | kNearest :: Real a => KdMap a p v -> Int -> p -> [(p, v)] 393 | kNearest (KdMap pointAsList distSqr t _) numNeighbors query = 394 | reverse $ map snd $ Q.toAscList $ go (cycle $ pointAsList query) 395 | (Q.empty :: Q.MaxPrioHeap a (p,v)) t 396 | where 397 | -- go :: [a] -> Q.MaxPrioHeap a (p, v) -> TreeNode a p v -> Q.MaxPrioHeap a (p, v) 398 | go [] _ _ = error "kNearest.go: no empty lists allowed!" 399 | go _ q Empty = q 400 | go (queryAxisValue : qvs) q (TreeNode left (k, v) nodeAxisVal right) = 401 | let insertBounded queue dist x 402 | | Q.size queue < numNeighbors = Q.insert (dist, x) queue 403 | | otherwise = let ((farthestDist, _), rest) = fromJust $ Q.view queue 404 | in if dist < farthestDist 405 | then Q.insert (dist, x) rest 406 | else queue 407 | q' = insertBounded q (distSqr k query) (k, v) 408 | kNear queue onsideSubtree offsideSubtree = 409 | let queue' = go qvs queue onsideSubtree 410 | checkOffsideTree = 411 | Q.size queue' < numNeighbors || 412 | (queryAxisValue - nodeAxisVal)^(2 :: Int) < 413 | (fst . fst) (fromJust $ Q.view queue') 414 | in if checkOffsideTree 415 | then go qvs queue' offsideSubtree 416 | else queue' 417 | in if queryAxisValue <= nodeAxisVal 418 | then kNear q' left right 419 | else kNear q' right left 420 | 421 | -- | Finds all point-value pairs in a 'KdMap' with points within a 422 | -- given range, where the range is specified as a set of lower and 423 | -- upper bounds. 424 | -- 425 | -- Points are not returned in any particular order. 426 | -- 427 | -- Worst case time complexity: /O(n)/ for n data points and a range 428 | -- that spans all the points. 429 | -- 430 | -- TODO: Maybe use known bounds on entire tree structure to be able to 431 | -- automatically count whole portions of tree as being within given 432 | -- range. 433 | inRange :: Real a => KdMap a p v 434 | -> p -- ^ lower bounds of range 435 | -> p -- ^ upper bounds of range 436 | -> [(p, v)] -- ^ point-value pairs within given 437 | -- range 438 | inRange (KdMap pointAsList _ t _) lowers uppers = 439 | go (cycle (pointAsList lowers) `zip` cycle (pointAsList uppers)) t [] 440 | where 441 | go [] _ _ = error "inRange.go: no empty lists allowed!" 442 | go _ Empty acc = acc 443 | go ((lower, upper) : nextBounds) (TreeNode left p nodeAxisVal right) acc = 444 | let accAfterLeft = if lower <= nodeAxisVal 445 | then go nextBounds left acc 446 | else acc 447 | accAfterRight = if upper > nodeAxisVal 448 | then go nextBounds right accAfterLeft 449 | else accAfterLeft 450 | valInRange l x u = l <= x && x <= u 451 | -- maybe "cache" lowers and uppers as lists sooner as hint 452 | -- to ghc. Also, maybe only need to check previously 453 | -- unchecked axes? 454 | currentInRange = 455 | L.and $ zipWith3 valInRange 456 | (pointAsList lowers) (pointAsList $ fst p) (pointAsList uppers) 457 | accAfterCurrent = if currentInRange 458 | then p : accAfterRight 459 | else accAfterRight 460 | in accAfterCurrent 461 | 462 | -- | Returns the number of point-value pairs in the 'KdMap'. 463 | -- 464 | -- Time complexity: /O(1)/ 465 | size :: KdMap a p v -> Int 466 | size (KdMap _ _ _ n) = n 467 | 468 | isTreeNodeValid :: Real a => PointAsListFn a p -> Int -> TreeNode a p v -> Bool 469 | isTreeNodeValid _ _ Empty = True 470 | isTreeNodeValid pointAsList axis (TreeNode l (k, _) nodeAxisVal r) = 471 | let childrenAxisValues = map ((!! axis) . pointAsList . fst) . assocsInternal 472 | leftSubtreeLess = L.all (<= nodeAxisVal) $ childrenAxisValues l 473 | rightSubtreeGreater = L.all (> nodeAxisVal) $ childrenAxisValues r 474 | nextAxis = (axis + 1) `mod` length (pointAsList k) 475 | in leftSubtreeLess && rightSubtreeGreater && 476 | isTreeNodeValid pointAsList nextAxis l && isTreeNodeValid pointAsList nextAxis r 477 | 478 | -- | Returns 'True' if tree structure adheres to k-d tree 479 | -- properties. For internal testing use. 480 | isValid :: Real a => KdMap a p v -> Bool 481 | isValid (KdMap pointAsList _ r _) = isTreeNodeValid pointAsList 0 r 482 | --------------------------------------------------------------------------------