├── .hgignore ├── .hgtags ├── Data └── SuffixTree.hs ├── LICENSE ├── Setup.lhs ├── examples ├── Makefile ├── Tiny.hs └── UniqueMatch.hs ├── suffixtree.cabal └── tests ├── Makefile └── SuffixCheck.hs /.hgignore: -------------------------------------------------------------------------------- 1 | .*\.(?:aux|h[ip]|o|orig|out|pdf|prof|ps|rej)$ 2 | ^(?:dist|\.DS_Store)$ 3 | 4 | syntax: glob 5 | cabal-dev 6 | *~ 7 | .*.swp 8 | .\#* 9 | \#* 10 | -------------------------------------------------------------------------------- /.hgtags: -------------------------------------------------------------------------------- 1 | 5b1107427dcad057b076e18bc016c519607a740f 0_2_2 2 | 36517f098c5146ae53d641a9cfec15565beb1301 0.2.2.1 3 | -------------------------------------------------------------------------------- /Data/SuffixTree.hs: -------------------------------------------------------------------------------- 1 | {- Fastest when compiled as follows: ghc -O2 -optc-O3 -funbox-strict-fields -} 2 | 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Data.SuffixTree 6 | -- Copyright : (c) Bryan O'Sullivan 2007 7 | -- License : BSD-style 8 | -- Maintainer : bos@serpentine.com 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- A lazy, efficient suffix tree implementation. 13 | -- 14 | -- Since many function names (but not the type name) clash with 15 | -- "Prelude" names, this module is usually imported @qualified@, e.g. 16 | -- 17 | -- > import Data.SuffixTree (STree) 18 | -- > import qualified Data.SuffixTree as T 19 | -- 20 | -- The implementation is based on the first of those described in the 21 | -- following paper: 22 | -- 23 | -- * Robert Giegerich and Stefan Kurtz, \"/A comparison of 24 | -- imperative and purely functional suffix tree constructions/\", 25 | -- Science of Computer Programming 25(2-3):187-218, 1995, 26 | -- 27 | -- 28 | -- This implementation constructs the suffix tree lazily, so subtrees 29 | -- are not created until they are traversed. Two construction 30 | -- functions are provided, 'constructWith' for sequences composed of 31 | -- small alphabets, and 'construct' for larger alphabets. 32 | -- 33 | -- Estimates are given for performance. The value /k/ is a constant; 34 | -- /n/ is the length of a query string; and /t/ is the number of 35 | -- elements (nodes and leaves) in a suffix tree. 36 | 37 | module Data.SuffixTree 38 | ( 39 | -- * Types 40 | Alphabet 41 | , Edge 42 | , Prefix 43 | , STree(..) 44 | 45 | -- * Construction 46 | , constructWith 47 | , construct 48 | 49 | -- * Querying 50 | , elem 51 | , findEdge 52 | , findTree 53 | , findPath 54 | , countLeaves 55 | , countRepeats 56 | 57 | -- * Traversal 58 | , foldr 59 | , foldl 60 | , fold 61 | 62 | -- * Other useful functions 63 | , mkPrefix 64 | , prefix 65 | , suffixes 66 | ) where 67 | 68 | import Prelude hiding (elem, foldl, foldr) 69 | import qualified Data.Map as M 70 | import Control.Arrow (second) 71 | import qualified Data.ByteString as SB 72 | import qualified Data.ByteString.Lazy as LB 73 | import qualified Data.List as L 74 | import Data.Maybe (listToMaybe, mapMaybe) 75 | 76 | -- | The length of a prefix list. This type is formulated to do cheap 77 | -- work eagerly (to avoid constructing a pile of deferred thunks), 78 | -- while deferring potentially expensive work (computing the length of 79 | -- a list). 80 | data Length a = Exactly {-# UNPACK #-} !Int 81 | | Sum {-# UNPACK #-} !Int [a] 82 | deriving (Show) 83 | 84 | -- | The list of symbols that 'constructWith' can possibly see in its 85 | -- input. 86 | type Alphabet a = [a] 87 | 88 | -- | The prefix string associated with an 'Edge'. Use 'mkPrefix' to 89 | -- create a value of this type, and 'prefix' to deconstruct one. 90 | newtype Prefix a = Prefix ([a], Length a) 91 | 92 | instance (Eq a) => Eq (Prefix a) where 93 | a == b = prefix a == prefix b 94 | 95 | instance (Ord a) => Ord (Prefix a) where 96 | compare a b = compare (prefix a) (prefix b) 97 | 98 | instance (Show a) => Show (Prefix a) where 99 | show a = "mkPrefix " ++ show (prefix a) 100 | 101 | type EdgeFunction a = [[a]] -> (Length a, [[a]]) 102 | 103 | -- | An edge in the suffix tree. 104 | type Edge a = (Prefix a, STree a) 105 | 106 | -- | /O(1)/. Construct a 'Prefix' value. 107 | mkPrefix :: [a] -> Prefix a 108 | mkPrefix xs = Prefix (xs, Sum 0 xs) 109 | 110 | pmap :: (a -> b) -> Prefix a -> Prefix b 111 | pmap f = mkPrefix . map f . prefix 112 | 113 | instance Functor Prefix where 114 | fmap = pmap 115 | 116 | -- | The suffix tree type. The implementation is exposed to ease the 117 | -- development of custom traversal functions. Note that @('Prefix' a, 118 | -- 'STree' a)@ pairs are not stored in any order. 119 | data STree a = Node [Edge a] 120 | | Leaf 121 | deriving (Show) 122 | 123 | smap :: (a -> b) -> STree a -> STree b 124 | smap _ Leaf = Leaf 125 | smap f (Node es) = Node (map (\(p, t) -> (fmap f p, smap f t)) es) 126 | 127 | instance Functor STree where 128 | fmap = smap 129 | 130 | -- | /O(n)/. Obtain the list stored in a 'Prefix'. 131 | prefix :: Prefix a -> [a] 132 | prefix (Prefix (ys, Exactly n)) = take n ys 133 | prefix (Prefix (ys, Sum n xs)) = tk n ys 134 | where tk 0 ys = zipWith (const id) xs ys 135 | tk n (y:ys) = y : tk (n-1) ys 136 | 137 | -- | /O(t)/. Folds the edges in a tree, using post-order traversal. 138 | -- Suitable for lazy use. 139 | foldr :: (Prefix a -> b -> b) -> b -> STree a -> b 140 | foldr _ z Leaf = z 141 | foldr f z (Node es) = L.foldr (\(p,t) v -> f p (foldr f v t)) z es 142 | 143 | -- | /O(t)/. Folds the edges in a tree, using pre-order traversal. The 144 | -- step function is evaluated strictly. 145 | foldl :: (a -> Prefix b -> a) -- ^ step function (evaluated strictly) 146 | -> a -- ^ initial state 147 | -> STree b 148 | -> a 149 | foldl _ z Leaf = z 150 | foldl f z (Node es) = L.foldl' (\v (p,t) -> f (foldl f v t) p) z es 151 | 152 | -- | /O(t)/. Generic fold over a tree. 153 | -- 154 | -- A few simple examples. 155 | -- 156 | -- >countLeaves == fold id id (const const) (1+) 0 157 | -- 158 | -- >countEdges = fold id id (\_ a _ -> a+1) id 0 159 | -- 160 | -- This more complicated example generates a tree of the same shape, 161 | -- but new type, with annotated leaves. 162 | -- 163 | -- @ 164 | --data GenTree a b = GenNode [('Prefix' a, GenTree a b)] 165 | -- | GenLeaf b 166 | -- deriving ('Show') 167 | -- @ 168 | -- 169 | -- @ 170 | --gentree :: 'STree' a -> GenTree a Int 171 | --gentree = 'fold' reset id fprefix reset leaf 172 | -- where leaf = GenLeaf 1 173 | -- reset = 'const' leaf 174 | -- fprefix p t (GenLeaf _) = GenNode [(p, t)] 175 | -- fprefix p t (GenNode es) = GenNode ((p, t):es) 176 | -- @ 177 | fold :: (a -> a) -- ^ downwards state transformer 178 | -> (a -> a) -- ^ upwards state transformer 179 | -> (Prefix b -> a -> a -> a) -- ^ edge state transformer 180 | -> (a -> a) -- ^ leaf state transformer 181 | -> a -- ^ initial state 182 | -> STree b -- ^ tree 183 | -> a 184 | fold fdown fup fprefix fleaf = go 185 | where go v Leaf = fleaf v 186 | go v (Node es) = fup (L.foldr edge v es) 187 | edge (p, t) v = fprefix p (go (fdown v) t) v 188 | 189 | -- | Increments the length of a prefix. 190 | inc :: Length a -> Length a 191 | inc (Exactly n) = Exactly (n+1) 192 | inc (Sum n xs) = Sum (n+1) xs 193 | 194 | lazyTreeWith :: (Eq a) => EdgeFunction a -> Alphabet a -> [a] -> STree a 195 | lazyTreeWith edge alphabet = suf . suffixes 196 | where suf [[]] = Leaf 197 | suf ss = Node [(Prefix (a:sa, inc cpl), suf ssr) 198 | | a <- alphabet, 199 | n@(sa:_) <- [ss `clusterBy` a], 200 | (cpl,ssr) <- [edge n]] 201 | clusterBy ss a = [cs | c:cs <- ss, c == a] 202 | 203 | -- | /O(n)/. Returns all non-empty suffixes of the argument, longest 204 | -- first. Behaves as follows: 205 | -- 206 | -- >suffixes xs == init (tails xs) 207 | suffixes :: [a] -> [[a]] 208 | suffixes xs@(_:xs') = xs : suffixes xs' 209 | suffixes _ = [] 210 | 211 | lazyTree :: (Ord a) => EdgeFunction a -> [a] -> STree a 212 | lazyTree edge = suf . suffixes 213 | where suf [[]] = Leaf 214 | suf ss = Node [(Prefix (a:sa, inc cpl), suf ssr) 215 | | (a, n@(sa:_)) <- suffixMap ss, 216 | (cpl,ssr) <- [edge n]] 217 | 218 | suffixMap :: Ord a => [[a]] -> [(a, [[a]])] 219 | suffixMap = map (second reverse) . M.toList . L.foldl' step M.empty 220 | where step m (x:xs) = M.alter (f xs) x m 221 | step m _ = m 222 | f x Nothing = Just [x] 223 | f x (Just xs) = Just (x:xs) 224 | 225 | cst :: Eq a => EdgeFunction a 226 | cst [s] = (Sum 0 s, [[]]) 227 | cst awss@((a:w):ss) 228 | | null [c | c:_ <- ss, a /= c] = let cpl' = inc cpl 229 | in seq cpl' (cpl', rss) 230 | | otherwise = (Exactly 0, awss) 231 | where (cpl, rss) = cst (w:[u | _:u <- ss]) 232 | 233 | pst :: Eq a => EdgeFunction a 234 | pst = g . dropNested 235 | where g [s] = (Sum 0 s, [[]]) 236 | g ss = (Exactly 0, ss) 237 | dropNested ss@[_] = ss 238 | dropNested awss@((a:w):ss) 239 | | null [c | c:_ <- ss, a /= c] = [a:s | s <- rss] 240 | | otherwise = awss 241 | where rss = dropNested (w:[u | _:u <- ss]) 242 | 243 | {-# SPECIALISE constructWith :: [Char] -> [Char] -> STree Char #-} 244 | {-# SPECIALISE constructWith :: [[Char]] -> [[Char]] -> STree [Char] #-} 245 | {-# SPECIALISE constructWith :: [SB.ByteString] -> [SB.ByteString] 246 | -> STree SB.ByteString #-} 247 | {-# SPECIALISE constructWith :: [LB.ByteString] -> [LB.ByteString] 248 | -> STree LB.ByteString #-} 249 | {-# SPECIALISE constructWith :: (Eq a) => [[a]] -> [[a]] -> STree [a] #-} 250 | 251 | -- | /O(k n log n)/. Constructs a suffix tree using the given 252 | -- alphabet. The performance of this function is linear in the size 253 | -- /k/ of the alphabet. That makes this function suitable for small 254 | -- alphabets, such as DNA nucleotides. For an alphabet containing 255 | -- more than a few symbols, 'construct' is usually several orders of 256 | -- magnitude faster. 257 | constructWith :: (Eq a) => Alphabet a -> [a] -> STree a 258 | constructWith = lazyTreeWith cst 259 | 260 | {-# SPECIALISE construct :: [Char] -> STree Char #-} 261 | {-# SPECIALISE construct :: [[Char]] -> STree [Char] #-} 262 | {-# SPECIALISE construct :: [SB.ByteString] -> STree SB.ByteString #-} 263 | {-# SPECIALISE construct :: [LB.ByteString] -> STree LB.ByteString #-} 264 | {-# SPECIALISE construct :: (Ord a) => [[a]] -> STree [a] #-} 265 | 266 | -- | /O(n log n)/. Constructs a suffix tree. 267 | construct :: (Ord a) => [a] -> STree a 268 | construct = lazyTree cst 269 | 270 | suffix :: (Eq a) => [a] -> [a] -> Maybe [a] 271 | suffix (p:ps) (x:xs) | p == x = suffix ps xs 272 | | otherwise = Nothing 273 | suffix _ xs = Just xs 274 | 275 | {-# SPECIALISE elem :: [Char] -> STree Char -> Bool #-} 276 | {-# SPECIALISE elem :: [[Char]] -> STree [Char] -> Bool #-} 277 | {-# SPECIALISE elem :: [SB.ByteString] -> STree SB.ByteString -> Bool #-} 278 | {-# SPECIALISE elem :: [LB.ByteString] -> STree LB.ByteString -> Bool #-} 279 | {-# SPECIALISE elem :: (Eq a) => [[a]] -> STree [a] -> Bool #-} 280 | 281 | -- | /O(n)/. Indicates whether the suffix tree contains the given 282 | -- sublist. Performance is linear in the length /n/ of the 283 | -- sublist. 284 | elem :: (Eq a) => [a] -> STree a -> Bool 285 | elem [] _ = True 286 | elem _ Leaf = False 287 | elem xs (Node es) = any pfx es 288 | where pfx (e, t) = maybe False (`elem` t) (suffix (prefix e) xs) 289 | 290 | {-# SPECIALISE findEdge :: [Char] -> STree Char 291 | -> Maybe (Edge Char, Int) #-} 292 | {-# SPECIALISE findEdge :: [String] -> STree String 293 | -> Maybe (Edge String, Int) #-} 294 | {-# SPECIALISE findEdge :: [SB.ByteString] -> STree SB.ByteString 295 | -> Maybe (Edge SB.ByteString, Int) #-} 296 | {-# SPECIALISE findEdge :: [ LB.ByteString] -> STree LB.ByteString 297 | -> Maybe (Edge LB.ByteString, Int) #-} 298 | {-# SPECIALISE findEdge :: (Eq a) => [[a]] -> STree [a] 299 | -> Maybe (Edge [a], Int) #-} 300 | 301 | -- | /O(n)/. Finds the given subsequence in the suffix tree. On 302 | -- failure, returns 'Nothing'. On success, returns the 'Edge' in the 303 | -- suffix tree at which the subsequence ends, along with the number of 304 | -- elements to drop from the prefix of the 'Edge' to get the \"real\" 305 | -- remaining prefix. 306 | -- 307 | -- Here is an example: 308 | -- 309 | -- >> find "ssip" (construct "mississippi") 310 | -- >Just ((mkPrefix "ppi",Leaf),1) 311 | -- 312 | -- This indicates that the edge @('mkPrefix' \"ppi\",'Leaf')@ matches, 313 | -- and that we must strip 1 character from the string @\"ppi\"@ to get 314 | -- the remaining prefix string @\"pi\"@. 315 | -- 316 | -- Performance is linear in the length /n/ of the query list. 317 | findEdge :: (Eq a) => [a] -> STree a -> Maybe (Edge a, Int) 318 | findEdge _ Leaf = Nothing 319 | findEdge xs (Node es) = listToMaybe (mapMaybe pfx es) 320 | where pfx e@(p, t) = let p' = prefix p 321 | in suffix p' xs >>= \suf -> 322 | case suf of 323 | [] -> return (e, length (zipWith const xs p')) 324 | s -> findEdge s t 325 | 326 | -- | /O(n)/. Finds the subtree rooted at the end of the given query 327 | -- sequence. On failure, returns 'Nothing'. 328 | -- 329 | -- Performance is linear in the length /n/ of the query list. 330 | findTree :: (Eq a) => [a] -> STree a -> Maybe (STree a) 331 | findTree s t = (snd . fst) `fmap` findEdge s t 332 | 333 | -- | /O(n)/. Returns the path from the 'Edge' in the suffix tree at 334 | -- which the given subsequence ends, up to the root of the tree. If 335 | -- the subsequence is not found, returns the empty list. 336 | -- 337 | -- Performance is linear in the length of the query list. 338 | findPath :: (Eq a) => [a] -> STree a -> [Edge a] 339 | findPath = go [] 340 | where go _ _ Leaf = [] 341 | go me xs (Node es) = pfx me es 342 | where pfx _ [] = [] 343 | pfx me (e@(p, t):es) = 344 | case suffix (prefix p) xs of 345 | Nothing -> pfx me es 346 | Just [] -> e:me 347 | Just s -> go (e:me) s t 348 | 349 | -- | /O(t)/. Count the number of leaves in a tree. 350 | -- 351 | -- Performance is linear in the number /t/ of elements in the tree. 352 | countLeaves :: STree a -> Int 353 | countLeaves Leaf = 1 354 | countLeaves (Node es) = L.foldl' (\v (_, t) -> countLeaves t + v) 0 es 355 | 356 | -- | /O(n + r)/. Count the number of times a sequence is repeated 357 | -- in the input sequence that was used to construct the suffix tree. 358 | -- 359 | -- Performance is linear in the length /n/ of the input sequence, plus 360 | -- the number of times /r/ the sequence is repeated. 361 | countRepeats :: (Eq a) => [a] -> STree a -> Int 362 | countRepeats s t = maybe 0 countLeaves (findTree s t) 363 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Bryan O'Sullivan 2007. 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 1. Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 3. Neither the name of the author nor the names of his contributors 14 | may be used to endorse or promote products derived from this software 15 | without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /Setup.lhs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env runhaskell 2 | > import Distribution.Simple 3 | > main = defaultMain 4 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | example-objs := \ 2 | tiny \ 3 | UniqueMatch.o 4 | 5 | ghc := ghc 6 | 7 | examples: inplace $(example-objs) 8 | 9 | inplace: 10 | $(MAKE) -C .. inplace 11 | 12 | tiny: Tiny.hs 13 | $(ghc) --make -o $@ $< 14 | 15 | %.o: %.hs 16 | $(ghc) -c -o $@ $< 17 | 18 | clean: 19 | -rm -f $(example-objs) *.hi *.o 20 | -------------------------------------------------------------------------------- /examples/Tiny.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import qualified Data.SuffixTree as S 4 | import System.Environment (getArgs) 5 | 6 | main = do 7 | [fileName, cons] <- getArgs 8 | let ctor = case cons of 9 | "1" -> S.constructWith [minBound..maxBound] 10 | "2" -> S.construct 11 | tree <- ctor `fmap` readFile fileName 12 | putStrLn (show (S.fold id id (\_ a _ -> a+1) id 0 tree) ++ " edges") 13 | (lines `fmap` getContents) >>= mapM_ (print . (`S.elem` tree)) 14 | -------------------------------------------------------------------------------- /examples/UniqueMatch.hs: -------------------------------------------------------------------------------- 1 | -- This module solves, more or less, the maximal unique match (MUM) 2 | -- problem for two input lists, using a generalised suffix tree. 3 | -- 4 | -- Unfortunately, we can't check for left maximality because we're 5 | -- using lists instead of indices into arrays. It's easy to look one 6 | -- element to the left in an array, but you can't look one element 7 | -- left of the head of a list. 8 | 9 | module UniqueMatch (Sym(..), mkGenTree, maxUniqueMatches) where 10 | 11 | import Data.SuffixTree (STree(..), construct, prefix) 12 | 13 | -- We construct a generalised suffix tree, with elements annotated to 14 | -- tell us whether they come from the left or right list. Each list 15 | -- is terminated with a stop symbol. 16 | data Sym a = L a 17 | | Lx 18 | | R a 19 | | Rx 20 | deriving (Show) 21 | 22 | isLeft (L _:_) = True 23 | isLeft (Lx:_) = True 24 | isLeft _ = False 25 | 26 | isRight (R _:_) = True 27 | isRight (Rx:_) = True 28 | isRight _ = False 29 | 30 | fromSyms (L a:ss) = a : fromSyms ss 31 | fromSyms (R a:ss) = a : fromSyms ss 32 | fromSyms (Lx:_) = [] 33 | fromSyms (Rx:_) = [] 34 | fromSyms _ = [] 35 | 36 | instance (Eq a) => Eq (Sym a) where 37 | L a == L b = a == b 38 | R a == R b = a == b 39 | L a == R b = a == b 40 | R a == L b = a == b 41 | Lx == Lx = True 42 | Rx == Rx = True 43 | _ == _ = False 44 | 45 | instance (Ord a) => Ord (Sym a) where 46 | L a <= L b = a <= b 47 | R a <= R b = a <= b 48 | L a <= R b = a <= b 49 | R a <= L b = a <= b 50 | L _ <= Lx = True 51 | L _ <= Rx = True 52 | R _ <= Lx = True 53 | R _ <= Rx = True 54 | Lx <= Lx = True 55 | Rx <= Rx = True 56 | Lx <= Rx = True 57 | _ <= _ = False 58 | 59 | mkGenTree :: (Ord a) => [a] -> [a] -> STree (Sym a) 60 | mkGenTree a b = construct (map L a ++ Lx : map R b ++ [Rx]) 61 | 62 | maxUniqueMatches :: (Ord a) => STree (Sym a) -> [[a]] 63 | maxUniqueMatches t = map (fromSyms . concatMap prefix . reverse) 64 | (recurse [] t) 65 | where recurse _ Leaf = [] 66 | recurse path (Node es) = loop path es 67 | 68 | loop path ((p, t):es) = matches ++ loop path es 69 | where matches | rightMaximal t = [p:path] 70 | | otherwise = recurse (p:path) t 71 | loop _ _ = [] 72 | 73 | rightMaximal (Node [(pa,Leaf), (pb,Leaf)]) = 74 | (isLeft a && isRight b) || (isRight a && isLeft b) 75 | where a = prefix pa 76 | b = prefix pb 77 | rightMaximal _ = False 78 | -------------------------------------------------------------------------------- /suffixtree.cabal: -------------------------------------------------------------------------------- 1 | Name: suffixtree 2 | Version: 0.2.2.1 3 | Synopsis: Efficient, lazy suffix tree implementation 4 | Description: An efficient, lazy suffix tree implementation. 5 | Category: Data Structures, Data 6 | License: BSD3 7 | License-File: LICENSE 8 | Author: Bryan O'Sullivan 9 | Maintainer: bos@serpentine.com 10 | Homepage: https://github.com/bos/suffixtree 11 | Bug-Reports: https://github.com/bos/suffixtree/issues 12 | Build-Type: Simple 13 | Cabal-Version: >= 1.6 14 | Extra-Source-Files: examples/Makefile 15 | examples/Tiny.hs 16 | examples/UniqueMatch.hs 17 | 18 | Library 19 | Build-Depends: base >= 3 && < 5, 20 | bytestring, 21 | containers 22 | Exposed-Modules: Data.SuffixTree 23 | GHC-Options: -Wall -funbox-strict-fields -fno-warn-incomplete-patterns 24 | 25 | source-repository head 26 | type: git 27 | location: https://github.com/bos/suffixtree 28 | 29 | source-repository head 30 | type: mercurial 31 | location: https://bitbucket.org/bos/suffixtree 32 | -------------------------------------------------------------------------------- /tests/Makefile: -------------------------------------------------------------------------------- 1 | ghc := ghc 2 | ghcflags := $(shell awk -F: '/ghc-options/{print $$2}' ../suffixtree.cabal) 3 | 4 | tests: 5 | runhaskell SuffixCheck 6 | 7 | inplace: 8 | cd .. && $(ghc) $(ghcflags) --make Data/*.hs 9 | -------------------------------------------------------------------------------- /tests/SuffixCheck.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | import qualified Data.SuffixTree as T 3 | import Data.Char (ord) 4 | import Data.List (inits) 5 | import Data.Maybe (isJust) 6 | import Test.QuickCheck 7 | 8 | instance Arbitrary Char where 9 | arbitrary = choose ('a', 'z') 10 | coarbitrary c = variant (ord c `rem` 4) 11 | 12 | deepCheck p = check (defaultConfig {configMaxTest = 1000}) p 13 | 14 | prop_allsuffixes :: [Char] -> Bool 15 | prop_allsuffixes s = let t = T.construct s 16 | in all (`T.elem` t) (T.suffixes s) 17 | 18 | prefixes = tail . inits 19 | 20 | prop_allinfixes :: [Char] -> Bool 21 | prop_allinfixes s = let t = T.construct s 22 | in all (\suf -> all (`T.elem` t) (prefixes suf)) (T.suffixes s) 23 | 24 | prop_findEdge_eqv_elem :: [Char] -> Bool 25 | prop_findEdge_eqv_elem s = all check (T.suffixes s) 26 | where t = T.construct s 27 | check s = isJust (T.findEdge s t) == T.elem s t 28 | 29 | prop_findEdge_eqv_findTree :: [Char] -> Bool 30 | prop_findEdge_eqv_findTree s = all check (T.suffixes s) 31 | where t = T.construct s 32 | check s = isJust (T.findEdge s t) == isJust (T.findTree s t) 33 | 34 | prop_findEdge_eqv_findPath :: [Char] -> Bool 35 | prop_findEdge_eqv_findPath s = all check (T.suffixes s) 36 | where t = T.construct s 37 | check s = isJust (T.findEdge s t) == (not . null) (T.findPath s t) 38 | 39 | main = do 40 | deepCheck prop_allsuffixes 41 | quickCheck prop_allinfixes 42 | deepCheck prop_findEdge_eqv_elem 43 | deepCheck prop_findEdge_eqv_findTree 44 | deepCheck prop_findEdge_eqv_findPath 45 | --------------------------------------------------------------------------------