├── .hgignore
├── .hgtags
├── Data
    └── SuffixTree.hs
├── LICENSE
├── Setup.lhs
├── examples
    ├── Makefile
    ├── Tiny.hs
    └── UniqueMatch.hs
├── suffixtree.cabal
└── tests
    ├── Makefile
    └── SuffixCheck.hs


/.hgignore:
--------------------------------------------------------------------------------
 1 | .*\.(?:aux|h[ip]|o|orig|out|pdf|prof|ps|rej)$
 2 | ^(?:dist|\.DS_Store)$
 3 | 
 4 | syntax: glob
 5 | cabal-dev
 6 | *~
 7 | .*.swp
 8 | .\#*
 9 | \#*
10 | 


--------------------------------------------------------------------------------
/.hgtags:
--------------------------------------------------------------------------------
1 | 5b1107427dcad057b076e18bc016c519607a740f 0_2_2
2 | 36517f098c5146ae53d641a9cfec15565beb1301 0.2.2.1
3 | 


--------------------------------------------------------------------------------
/Data/SuffixTree.hs:
--------------------------------------------------------------------------------
  1 | {- Fastest when compiled as follows: ghc -O2 -optc-O3 -funbox-strict-fields -}
  2 | 
  3 | -----------------------------------------------------------------------------
  4 | -- |
  5 | -- Module      :  Data.SuffixTree
  6 | -- Copyright   :  (c) Bryan O'Sullivan 2007
  7 | -- License     :  BSD-style
  8 | -- Maintainer  :  bos@serpentine.com
  9 | -- Stability   :  experimental
 10 | -- Portability :  portable
 11 | --
 12 | -- A lazy, efficient suffix tree implementation.
 13 | --
 14 | -- Since many function names (but not the type name) clash with
 15 | -- "Prelude" names, this module is usually imported @qualified@, e.g.
 16 | --
 17 | -- >  import Data.SuffixTree (STree)
 18 | -- >  import qualified Data.SuffixTree as T
 19 | --
 20 | -- The implementation is based on the first of those described in the
 21 | -- following paper:
 22 | -- 
 23 | --   * Robert Giegerich and Stefan Kurtz, \"/A comparison of
 24 | --     imperative and purely functional suffix tree constructions/\",
 25 | --     Science of Computer Programming 25(2-3):187-218, 1995,
 26 | --     <http://citeseer.ist.psu.edu/giegerich95comparison.html>
 27 | -- 
 28 | -- This implementation constructs the suffix tree lazily, so subtrees
 29 | -- are not created until they are traversed.  Two construction
 30 | -- functions are provided, 'constructWith' for sequences composed of
 31 | -- small alphabets, and 'construct' for larger alphabets.
 32 | --
 33 | -- Estimates are given for performance.  The value /k/ is a constant;
 34 | -- /n/ is the length of a query string; and /t/ is the number of
 35 | -- elements (nodes and leaves) in a suffix tree.
 36 | 
 37 | module Data.SuffixTree
 38 |     (
 39 |     -- * Types
 40 |       Alphabet
 41 |     , Edge
 42 |     , Prefix
 43 |     , STree(..)
 44 | 
 45 |     -- * Construction
 46 |     , constructWith
 47 |     , construct
 48 | 
 49 |     -- * Querying
 50 |     , elem
 51 |     , findEdge
 52 |     , findTree
 53 |     , findPath
 54 |     , countLeaves
 55 |     , countRepeats
 56 | 
 57 |     -- * Traversal
 58 |     , foldr
 59 |     , foldl
 60 |     , fold
 61 | 
 62 |     -- * Other useful functions
 63 |     , mkPrefix
 64 |     , prefix
 65 |     , suffixes
 66 |     ) where
 67 |                    
 68 | import Prelude hiding (elem, foldl, foldr)
 69 | import qualified Data.Map as M
 70 | import Control.Arrow (second)
 71 | import qualified Data.ByteString as SB
 72 | import qualified Data.ByteString.Lazy as LB
 73 | import qualified Data.List as L
 74 | import Data.Maybe (listToMaybe, mapMaybe)
 75 | 
 76 | -- | The length of a prefix list.  This type is formulated to do cheap
 77 | -- work eagerly (to avoid constructing a pile of deferred thunks),
 78 | -- while deferring potentially expensive work (computing the length of
 79 | -- a list).
 80 | data Length a = Exactly {-# UNPACK #-} !Int
 81 |               | Sum {-# UNPACK #-} !Int [a]
 82 |               deriving (Show)
 83 | 
 84 | -- | The list of symbols that 'constructWith' can possibly see in its
 85 | -- input.
 86 | type Alphabet a = [a]
 87 | 
 88 | -- | The prefix string associated with an 'Edge'.  Use 'mkPrefix' to
 89 | -- create a value of this type, and 'prefix' to deconstruct one.
 90 | newtype Prefix a = Prefix ([a], Length a)
 91 | 
 92 | instance (Eq a) => Eq (Prefix a) where
 93 |     a == b = prefix a == prefix b
 94 | 
 95 | instance (Ord a) => Ord (Prefix a) where
 96 |     compare a b = compare (prefix a) (prefix b)
 97 | 
 98 | instance (Show a) => Show (Prefix a) where
 99 |     show a = "mkPrefix " ++ show (prefix a)
100 | 
101 | type EdgeFunction a = [[a]] -> (Length a, [[a]])
102 | 
103 | -- | An edge in the suffix tree.
104 | type Edge a = (Prefix a, STree a)
105 | 
106 | -- | /O(1)/. Construct a 'Prefix' value.
107 | mkPrefix :: [a] -> Prefix a
108 | mkPrefix xs = Prefix (xs, Sum 0 xs)
109 | 
110 | pmap :: (a -> b) -> Prefix a -> Prefix b
111 | pmap f = mkPrefix . map f . prefix
112 | 
113 | instance Functor Prefix where
114 |     fmap = pmap
115 | 
116 | -- | The suffix tree type.  The implementation is exposed to ease the
117 | -- development of custom traversal functions.  Note that @('Prefix' a,
118 | -- 'STree' a)@ pairs are not stored in any order.
119 | data STree a = Node [Edge a]
120 |              | Leaf
121 |                deriving (Show)
122 | 
123 | smap :: (a -> b) -> STree a -> STree b
124 | smap _ Leaf = Leaf
125 | smap f (Node es) = Node (map (\(p, t) -> (fmap f p, smap f t)) es)
126 | 
127 | instance Functor STree where
128 |     fmap = smap
129 | 
130 | -- | /O(n)/. Obtain the list stored in a 'Prefix'.
131 | prefix :: Prefix a -> [a]
132 | prefix (Prefix (ys, Exactly n)) = take n ys
133 | prefix (Prefix (ys, Sum n xs)) = tk n ys
134 |     where tk 0 ys = zipWith (const id) xs ys
135 |           tk n (y:ys) = y : tk (n-1) ys
136 | 
137 | -- | /O(t)/. Folds the edges in a tree, using post-order traversal.
138 | -- Suitable for lazy use.
139 | foldr :: (Prefix a -> b -> b) -> b -> STree a -> b
140 | foldr _ z Leaf = z
141 | foldr f z (Node es) = L.foldr (\(p,t) v -> f p (foldr f v t)) z es
142 | 
143 | -- | /O(t)/. Folds the edges in a tree, using pre-order traversal.  The
144 | -- step function is evaluated strictly.
145 | foldl :: (a -> Prefix b -> a)   -- ^ step function (evaluated strictly)
146 |       -> a                      -- ^ initial state
147 |       -> STree b
148 |       -> a
149 | foldl _ z Leaf = z
150 | foldl f z (Node es) = L.foldl' (\v (p,t) -> f (foldl f v t) p) z es
151 | 
152 | -- | /O(t)/. Generic fold over a tree.
153 | --
154 | -- A few simple examples.
155 | --
156 | -- >countLeaves == fold id id (const const) (1+) 0
157 | --
158 | -- >countEdges = fold id id (\_ a _ -> a+1) id 0
159 | --
160 | -- This more complicated example generates a tree of the same shape,
161 | -- but new type, with annotated leaves.
162 | --
163 | -- @
164 | --data GenTree a b = GenNode [('Prefix' a, GenTree a b)]
165 | --                 | GenLeaf b
166 | --                   deriving ('Show')
167 | -- @
168 | -- 
169 | -- @
170 | --gentree :: 'STree' a -> GenTree a Int
171 | --gentree = 'fold' reset id fprefix reset leaf
172 | --    where leaf = GenLeaf 1
173 | --          reset = 'const' leaf
174 | --          fprefix p t (GenLeaf _) = GenNode [(p, t)]
175 | --          fprefix p t (GenNode es) = GenNode ((p, t):es)
176 | -- @
177 | fold :: (a -> a)                -- ^ downwards state transformer
178 |      -> (a -> a)                -- ^ upwards state transformer
179 |      -> (Prefix b -> a -> a -> a) -- ^ edge state transformer
180 |      -> (a -> a)                -- ^ leaf state transformer
181 |      -> a                       -- ^ initial state
182 |      -> STree b                 -- ^ tree
183 |      -> a
184 | fold fdown fup fprefix fleaf = go
185 |     where go v Leaf = fleaf v
186 |           go v (Node es) = fup (L.foldr edge v es)
187 |           edge (p, t) v = fprefix p (go (fdown v) t) v
188 | 
189 | -- | Increments the length of a prefix.
190 | inc :: Length a -> Length a
191 | inc (Exactly n) = Exactly (n+1)
192 | inc (Sum n xs)  = Sum (n+1) xs
193 | 
194 | lazyTreeWith :: (Eq a) => EdgeFunction a -> Alphabet a -> [a] -> STree a
195 | lazyTreeWith edge alphabet = suf . suffixes
196 |     where suf [[]] = Leaf
197 |           suf ss = Node [(Prefix (a:sa, inc cpl), suf ssr)
198 |                          | a <- alphabet,
199 |                            n@(sa:_) <- [ss `clusterBy` a],
200 |                            (cpl,ssr) <- [edge n]]
201 |           clusterBy ss a = [cs | c:cs <- ss, c == a]
202 | 
203 | -- | /O(n)/. Returns all non-empty suffixes of the argument, longest
204 | -- first.  Behaves as follows:
205 | --
206 | -- >suffixes xs == init (tails xs)
207 | suffixes :: [a] -> [[a]]
208 | suffixes xs@(_:xs') = xs : suffixes xs'
209 | suffixes _ = []
210 | 
211 | lazyTree :: (Ord a) => EdgeFunction a -> [a] -> STree a
212 | lazyTree edge = suf . suffixes
213 |     where suf [[]] = Leaf
214 |           suf ss = Node [(Prefix (a:sa, inc cpl), suf ssr)
215 |                          | (a, n@(sa:_)) <- suffixMap ss,
216 |                            (cpl,ssr) <- [edge n]]
217 | 
218 | suffixMap :: Ord a => [[a]] -> [(a, [[a]])]
219 | suffixMap = map (second reverse) . M.toList . L.foldl' step M.empty
220 |     where step m (x:xs) = M.alter (f xs) x m
221 |           step m _ = m
222 |           f x Nothing = Just [x]
223 |           f x (Just xs) = Just (x:xs)
224 | 
225 | cst :: Eq a => EdgeFunction a
226 | cst [s] = (Sum 0 s, [[]])
227 | cst awss@((a:w):ss)
228 |     | null [c | c:_ <- ss, a /= c] = let cpl' = inc cpl
229 |                                      in seq cpl' (cpl', rss)
230 |     | otherwise = (Exactly 0, awss)
231 |     where (cpl, rss) = cst (w:[u | _:u <- ss])
232 | 
233 | pst :: Eq a => EdgeFunction a
234 | pst = g . dropNested
235 |     where g [s] = (Sum 0 s, [[]])
236 |           g ss  = (Exactly 0, ss)
237 |           dropNested ss@[_] = ss
238 |           dropNested awss@((a:w):ss)
239 |               | null [c | c:_ <- ss, a /= c] = [a:s | s <- rss]
240 |               | otherwise = awss
241 |               where rss = dropNested (w:[u | _:u <- ss])
242 | 
243 | {-# SPECIALISE constructWith :: [Char] -> [Char] -> STree Char #-}
244 | {-# SPECIALISE constructWith :: [[Char]] -> [[Char]] -> STree [Char] #-}
245 | {-# SPECIALISE constructWith :: [SB.ByteString] -> [SB.ByteString]
246 |                              -> STree SB.ByteString #-}
247 | {-# SPECIALISE constructWith :: [LB.ByteString] -> [LB.ByteString]
248 |                              -> STree LB.ByteString #-}
249 | {-# SPECIALISE constructWith :: (Eq a) => [[a]] -> [[a]] -> STree [a] #-}
250 | 
251 | -- | /O(k n log n)/.  Constructs a suffix tree using the given
252 | -- alphabet.  The performance of this function is linear in the size
253 | -- /k/ of the alphabet.  That makes this function suitable for small
254 | -- alphabets, such as DNA nucleotides.  For an alphabet containing
255 | -- more than a few symbols, 'construct' is usually several orders of
256 | -- magnitude faster.
257 | constructWith :: (Eq a) => Alphabet a -> [a] -> STree a
258 | constructWith = lazyTreeWith cst
259 | 
260 | {-# SPECIALISE construct :: [Char] -> STree Char #-}
261 | {-# SPECIALISE construct :: [[Char]] -> STree [Char] #-}
262 | {-# SPECIALISE construct :: [SB.ByteString] -> STree SB.ByteString #-}
263 | {-# SPECIALISE construct :: [LB.ByteString] -> STree LB.ByteString #-}
264 | {-# SPECIALISE construct :: (Ord a) => [[a]] -> STree [a] #-}
265 | 
266 | -- | /O(n log n)/.  Constructs a suffix tree.
267 | construct :: (Ord a) => [a] -> STree a
268 | construct = lazyTree cst
269 | 
270 | suffix :: (Eq a) => [a] -> [a] -> Maybe [a]
271 | suffix (p:ps) (x:xs) | p == x = suffix ps xs
272 |                      | otherwise = Nothing
273 | suffix _ xs = Just xs
274 | 
275 | {-# SPECIALISE elem :: [Char] -> STree Char -> Bool #-}
276 | {-# SPECIALISE elem :: [[Char]] -> STree [Char] -> Bool #-}
277 | {-# SPECIALISE elem :: [SB.ByteString] -> STree SB.ByteString -> Bool #-}
278 | {-# SPECIALISE elem :: [LB.ByteString] -> STree LB.ByteString -> Bool #-}
279 | {-# SPECIALISE elem :: (Eq a) => [[a]] -> STree [a] -> Bool #-}
280 | 
281 | -- | /O(n)/.  Indicates whether the suffix tree contains the given
282 | -- sublist.  Performance is linear in the length /n/ of the
283 | -- sublist.
284 | elem :: (Eq a) => [a] -> STree a -> Bool
285 | elem [] _ = True
286 | elem _ Leaf = False
287 | elem xs (Node es) = any pfx es
288 |     where pfx (e, t) = maybe False (`elem` t) (suffix (prefix e) xs)
289 | 
290 | {-# SPECIALISE findEdge :: [Char] -> STree Char
291 |                         -> Maybe (Edge Char, Int) #-}
292 | {-# SPECIALISE findEdge :: [String] -> STree String
293 |                         -> Maybe (Edge String, Int) #-}
294 | {-# SPECIALISE findEdge :: [SB.ByteString] -> STree SB.ByteString
295 |                         -> Maybe (Edge SB.ByteString, Int) #-}
296 | {-# SPECIALISE findEdge :: [ LB.ByteString] -> STree LB.ByteString
297 |                         -> Maybe (Edge LB.ByteString, Int) #-}
298 | {-# SPECIALISE findEdge :: (Eq a) => [[a]] -> STree [a]
299 |                         -> Maybe (Edge [a], Int) #-}
300 | 
301 | -- | /O(n)/.  Finds the given subsequence in the suffix tree.  On
302 | -- failure, returns 'Nothing'.  On success, returns the 'Edge' in the
303 | -- suffix tree at which the subsequence ends, along with the number of
304 | -- elements to drop from the prefix of the 'Edge' to get the \"real\"
305 | -- remaining prefix.
306 | --
307 | -- Here is an example:
308 | --
309 | -- >> find "ssip" (construct "mississippi")
310 | -- >Just ((mkPrefix "ppi",Leaf),1)
311 | --
312 | -- This indicates that the edge @('mkPrefix' \"ppi\",'Leaf')@ matches,
313 | -- and that we must strip 1 character from the string @\"ppi\"@ to get
314 | -- the remaining prefix string @\"pi\"@.
315 | --
316 | -- Performance is linear in the length /n/ of the query list.
317 | findEdge :: (Eq a) => [a] -> STree a -> Maybe (Edge a, Int)
318 | findEdge _ Leaf = Nothing
319 | findEdge xs (Node es) = listToMaybe (mapMaybe pfx es)
320 |     where pfx e@(p, t) = let p' = prefix p
321 |                          in suffix p' xs >>= \suf ->
322 |             case suf of
323 |               [] -> return (e, length (zipWith const xs p'))
324 |               s -> findEdge s t
325 | 
326 | -- | /O(n)/. Finds the subtree rooted at the end of the given query
327 | -- sequence.  On failure, returns 'Nothing'.
328 | --
329 | -- Performance is linear in the length /n/ of the query list.
330 | findTree :: (Eq a) => [a] -> STree a -> Maybe (STree a)
331 | findTree s t = (snd . fst) `fmap` findEdge s t
332 | 
333 | -- | /O(n)/. Returns the path from the 'Edge' in the suffix tree at
334 | -- which the given subsequence ends, up to the root of the tree.  If
335 | -- the subsequence is not found, returns the empty list.
336 | --
337 | -- Performance is linear in the length of the query list.
338 | findPath :: (Eq a) => [a] -> STree a -> [Edge a]
339 | findPath = go []
340 |     where go _ _ Leaf = []
341 |           go me xs (Node es) = pfx me es
342 |               where pfx _ [] = []
343 |                     pfx me (e@(p, t):es) =
344 |                         case suffix (prefix p) xs of
345 |                           Nothing -> pfx me es
346 |                           Just [] -> e:me
347 |                           Just s -> go (e:me) s t
348 | 
349 | -- | /O(t)/. Count the number of leaves in a tree.
350 | --
351 | -- Performance is linear in the number /t/ of elements in the tree.
352 | countLeaves :: STree a -> Int
353 | countLeaves Leaf = 1
354 | countLeaves (Node es) = L.foldl' (\v (_, t) -> countLeaves t + v) 0 es
355 | 
356 | -- | /O(n + r)/. Count the number of times a sequence is repeated
357 | -- in the input sequence that was used to construct the suffix tree.
358 | --
359 | -- Performance is linear in the length /n/ of the input sequence, plus
360 | -- the number of times /r/ the sequence is repeated.
361 | countRepeats :: (Eq a) => [a] -> STree a -> Int
362 | countRepeats s t = maybe 0 countLeaves (findTree s t)
363 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) Bryan O'Sullivan 2007.
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions
 7 | are met:
 8 | 1. Redistributions of source code must retain the above copyright
 9 |    notice, this list of conditions and the following disclaimer.
10 | 2. Redistributions in binary form must reproduce the above copyright
11 |    notice, this list of conditions and the following disclaimer in the
12 |    documentation and/or other materials provided with the distribution.
13 | 3. Neither the name of the author nor the names of his contributors
14 |    may be used to endorse or promote products derived from this software
15 |    without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 | ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 | SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/Setup.lhs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env runhaskell
2 | > import Distribution.Simple
3 | > main = defaultMain
4 | 


--------------------------------------------------------------------------------
/examples/Makefile:
--------------------------------------------------------------------------------
 1 | example-objs := \
 2 | 	tiny \
 3 | 	UniqueMatch.o
 4 | 
 5 | ghc := ghc
 6 | 
 7 | examples: inplace $(example-objs)
 8 | 
 9 | inplace:
10 | 	$(MAKE) -C .. inplace
11 | 
12 | tiny: Tiny.hs
13 | 	$(ghc) --make -o $@ $<
14 | 
15 | %.o: %.hs
16 | 	$(ghc) -c -o $@ $<
17 | 
18 | clean:
19 | 	-rm -f $(example-objs) *.hi *.o
20 | 


--------------------------------------------------------------------------------
/examples/Tiny.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import qualified Data.SuffixTree as S
 4 | import System.Environment (getArgs)
 5 | 
 6 | main = do
 7 |   [fileName, cons] <- getArgs
 8 |   let ctor = case cons of
 9 |                "1" -> S.constructWith [minBound..maxBound]
10 |                "2" -> S.construct
11 |   tree <- ctor `fmap` readFile fileName
12 |   putStrLn (show (S.fold id id (\_ a _ -> a+1) id 0 tree) ++ " edges")
13 |   (lines `fmap` getContents) >>= mapM_ (print . (`S.elem` tree))
14 | 


--------------------------------------------------------------------------------
/examples/UniqueMatch.hs:
--------------------------------------------------------------------------------
 1 | -- This module solves, more or less, the maximal unique match (MUM)
 2 | -- problem for two input lists, using a generalised suffix tree.
 3 | --
 4 | -- Unfortunately, we can't check for left maximality because we're
 5 | -- using lists instead of indices into arrays.  It's easy to look one
 6 | -- element to the left in an array, but you can't look one element
 7 | -- left of the head of a list.
 8 | 
 9 | module UniqueMatch (Sym(..), mkGenTree, maxUniqueMatches) where
10 | 
11 | import Data.SuffixTree (STree(..), construct, prefix)
12 | 
13 | -- We construct a generalised suffix tree, with elements annotated to
14 | -- tell us whether they come from the left or right list.  Each list
15 | -- is terminated with a stop symbol.
16 | data Sym a = L a
17 |            | Lx
18 |            | R a
19 |            | Rx
20 |              deriving (Show)
21 | 
22 | isLeft (L _:_) = True
23 | isLeft (Lx:_) = True
24 | isLeft _ = False
25 | 
26 | isRight (R _:_) = True
27 | isRight (Rx:_) = True
28 | isRight _ = False
29 | 
30 | fromSyms (L a:ss) = a : fromSyms ss
31 | fromSyms (R a:ss) = a : fromSyms ss
32 | fromSyms (Lx:_) = []
33 | fromSyms (Rx:_) = []
34 | fromSyms _ = []
35 | 
36 | instance (Eq a) => Eq (Sym a) where
37 |     L a == L b = a == b
38 |     R a == R b = a == b
39 |     L a == R b = a == b
40 |     R a == L b = a == b
41 |     Lx == Lx = True
42 |     Rx == Rx = True
43 |     _ == _ = False
44 | 
45 | instance (Ord a) => Ord (Sym a) where
46 |     L a <= L b = a <= b
47 |     R a <= R b = a <= b
48 |     L a <= R b = a <= b
49 |     R a <= L b = a <= b
50 |     L _ <= Lx = True
51 |     L _ <= Rx = True
52 |     R _ <= Lx = True
53 |     R _ <= Rx = True
54 |     Lx <= Lx = True
55 |     Rx <= Rx = True
56 |     Lx <= Rx = True
57 |     _ <= _ = False
58 | 
59 | mkGenTree :: (Ord a) => [a] -> [a] -> STree (Sym a)
60 | mkGenTree a b = construct (map L a ++ Lx : map R b ++ [Rx])
61 | 
62 | maxUniqueMatches :: (Ord a) => STree (Sym a) -> [[a]]
63 | maxUniqueMatches t = map (fromSyms . concatMap prefix . reverse)
64 |                      (recurse [] t)
65 |     where recurse _ Leaf = []
66 |           recurse path (Node es) = loop path es
67 | 
68 |           loop path ((p, t):es) = matches ++ loop path es
69 |               where matches | rightMaximal t = [p:path]
70 |                             | otherwise = recurse (p:path) t
71 |           loop _ _ = []
72 | 
73 |           rightMaximal (Node [(pa,Leaf), (pb,Leaf)]) =
74 |                 (isLeft a && isRight b) || (isRight a && isLeft b)
75 |               where a = prefix pa
76 |                     b = prefix pb
77 |           rightMaximal _ = False
78 | 


--------------------------------------------------------------------------------
/suffixtree.cabal:
--------------------------------------------------------------------------------
 1 | Name:                suffixtree
 2 | Version:             0.2.2.1
 3 | Synopsis:            Efficient, lazy suffix tree implementation
 4 | Description:         An efficient, lazy suffix tree implementation.
 5 | Category:            Data Structures, Data
 6 | License:             BSD3
 7 | License-File:        LICENSE
 8 | Author:              Bryan O'Sullivan <bos@serpentine.com>
 9 | Maintainer:          bos@serpentine.com
10 | Homepage:            https://github.com/bos/suffixtree
11 | Bug-Reports:         https://github.com/bos/suffixtree/issues
12 | Build-Type:          Simple
13 | Cabal-Version:       >= 1.6
14 | Extra-Source-Files:  examples/Makefile
15 |                      examples/Tiny.hs
16 |                      examples/UniqueMatch.hs
17 | 
18 | Library
19 |   Build-Depends:     base >= 3 && < 5,
20 |                      bytestring,
21 |                      containers
22 |   Exposed-Modules:   Data.SuffixTree
23 |   GHC-Options:       -Wall -funbox-strict-fields -fno-warn-incomplete-patterns
24 | 
25 | source-repository head
26 |   type:     git
27 |   location: https://github.com/bos/suffixtree
28 | 
29 | source-repository head
30 |   type:     mercurial
31 |   location: https://bitbucket.org/bos/suffixtree
32 | 


--------------------------------------------------------------------------------
/tests/Makefile:
--------------------------------------------------------------------------------
1 | ghc := ghc
2 | ghcflags := $(shell awk -F: '/ghc-options/{print $$2}' ../suffixtree.cabal)
3 | 
4 | tests:
5 | 	runhaskell SuffixCheck
6 | 
7 | inplace:
8 | 	cd .. && $(ghc) $(ghcflags) --make Data/*.hs
9 | 


--------------------------------------------------------------------------------
/tests/SuffixCheck.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | import qualified Data.SuffixTree as T
 3 | import Data.Char (ord)
 4 | import Data.List (inits)
 5 | import Data.Maybe (isJust)
 6 | import Test.QuickCheck
 7 | 
 8 | instance Arbitrary Char where
 9 |   arbitrary     = choose ('a', 'z')
10 |   coarbitrary c = variant (ord c `rem` 4)
11 | 
12 | deepCheck p = check (defaultConfig {configMaxTest = 1000}) p
13 | 
14 | prop_allsuffixes :: [Char] -> Bool
15 | prop_allsuffixes s = let t = T.construct s
16 |                      in all (`T.elem` t) (T.suffixes s)
17 | 
18 | prefixes = tail . inits
19 | 
20 | prop_allinfixes :: [Char] -> Bool
21 | prop_allinfixes s = let t = T.construct s
22 |                     in all (\suf -> all (`T.elem` t) (prefixes suf)) (T.suffixes s)
23 | 
24 | prop_findEdge_eqv_elem :: [Char] -> Bool
25 | prop_findEdge_eqv_elem s = all check (T.suffixes s)
26 |     where t = T.construct s
27 |           check s = isJust (T.findEdge s t) == T.elem s t
28 | 
29 | prop_findEdge_eqv_findTree :: [Char] -> Bool
30 | prop_findEdge_eqv_findTree s = all check (T.suffixes s)
31 |     where t = T.construct s
32 |           check s = isJust (T.findEdge s t) == isJust (T.findTree s t)
33 | 
34 | prop_findEdge_eqv_findPath :: [Char] -> Bool
35 | prop_findEdge_eqv_findPath s = all check (T.suffixes s)
36 |     where t = T.construct s
37 |           check s = isJust (T.findEdge s t) == (not . null) (T.findPath s t)
38 | 
39 | main = do
40 |   deepCheck prop_allsuffixes
41 |   quickCheck prop_allinfixes
42 |   deepCheck prop_findEdge_eqv_elem
43 |   deepCheck prop_findEdge_eqv_findTree
44 |   deepCheck prop_findEdge_eqv_findPath
45 | 


--------------------------------------------------------------------------------