├── slides.pdf ├── screenshots ├── 1.png └── 2.png ├── README.md ├── suggest.cabal ├── Wildcard.hs ├── TST.hs ├── Suggest.hs └── resources └── search.html /slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bitonic/suggest/HEAD/slides.pdf -------------------------------------------------------------------------------- /screenshots/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bitonic/suggest/HEAD/screenshots/1.png -------------------------------------------------------------------------------- /screenshots/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bitonic/suggest/HEAD/screenshots/2.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Simple suggestion and correction server, implemented with warp and using 2 | ternary search tries to complete and correct words. 3 | 4 | To find out how it works, check out the 5 | [slides](https://raw.github.com/bitonic/suggest/master/slides.pdf). The slides 6 | are slightly outdated, since when I wrote them warp still used enumerator, while 7 | now it uses conduits, but the concepts still apply. 8 | 9 | I did this the guys at RabbitMQ asked me to give a presentation on a project, 10 | so I coded this in less than 10 hrs the day before (I got the job!). 11 | 12 | It's very fast, over 20k req/s. 13 | 14 | To build and run: 15 | 16 | ``` 17 | cabal configure ; cabal build 18 | cp dist/build/suggest/suggest ./ 19 | ./suggest 20 | ``` 21 | 22 | Screenshots: 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /suggest.cabal: -------------------------------------------------------------------------------- 1 | name: suggest 2 | version: 0.0 3 | homepage: https://github.com/rostayob/suggest 4 | synopsis: Auto completion and spell corrector server 5 | category: Web 6 | license: GPL-2 7 | author: Francesco Mazzoli 8 | maintainer: Francesco Mazzoli 9 | cabal-version: >= 1.6 10 | build-type: Simple 11 | 12 | source-repository head 13 | type: git 14 | location: git://github.com/bitonic/suggest 15 | branch: master 16 | 17 | executable suggest 18 | main-is: Suggest.hs 19 | 20 | build-depends: base >= 4 && < 5 21 | , warp 22 | , wai 23 | , conduit 24 | , blaze-builder 25 | , http-types 26 | , utf8-string 27 | , bytestring 28 | , mtl 29 | 30 | GHC-options: -Wall -O2 -fno-warn-unused-do-bind -threaded 31 | -------------------------------------------------------------------------------- /Wildcard.hs: -------------------------------------------------------------------------------- 1 | module Wildcard where 2 | 3 | data Wildcard a = Wildcard 4 | | El a 5 | deriving (Eq, Ord) 6 | 7 | 8 | type WildList a = [Wildcard a] 9 | 10 | instance Show a => Show (Wildcard a) where 11 | show Wildcard = "*" 12 | show (El c) = show c 13 | 14 | wildList :: [a] -> WildList a 15 | wildList = map El 16 | 17 | wildString :: String -> WildList Char 18 | wildString = map (\c -> if c == '*' then Wildcard else El c) 19 | 20 | matches :: Eq a => WildList a -> [a] -> Bool 21 | matches [] [] = True 22 | matches [] _ = False 23 | matches _ [] = False 24 | matches (Wildcard : w) (_ : s) = matches w s 25 | matches (El c1 : w) (c2 : s) | c1 == c2 = matches w s 26 | | otherwise = False 27 | 28 | edits :: WildList a -> [WildList a] 29 | edits s' = concatMap init [ deleted s' 30 | , transposes s' 31 | , replaces s' 32 | , inserts s' 33 | ] 34 | where 35 | deleted [] = [[]] 36 | deleted (c : s) = s : map (c :) (deleted s) 37 | 38 | transposes [] = [[]] 39 | transposes [x] = [[x]] 40 | transposes (x : y : s) = (y : x : s) : map (x :) (transposes (y : s)) 41 | 42 | replaces [] = [[]] 43 | replaces (c : s) = (Wildcard : s) : map (c :) (replaces s) 44 | 45 | inserts [] = [[]] 46 | inserts (c : s) = (Wildcard : c : s) : map (c :) (inserts s) 47 | 48 | -------------------------------------------------------------------------------- /TST.hs: -------------------------------------------------------------------------------- 1 | module TST 2 | ( TST 3 | , empty 4 | , singleton 5 | , toList 6 | , fromList 7 | , insert 8 | , prefix 9 | , matchWL 10 | , lookup 11 | ) where 12 | 13 | import Control.Arrow (first) 14 | 15 | import Wildcard 16 | 17 | import Prelude hiding (lookup) 18 | 19 | data TST c a = Branch c (TST c a) (TST c a) (TST c a) 20 | | End a (TST c a) 21 | | Null 22 | 23 | instance (Ord c, Show c, Show a) => Show (TST c a) where 24 | show = ("fromList " ++) . show . toList 25 | 26 | instance (Ord c, Eq c, Eq a) => Eq (TST c a) where 27 | t1 == t2 = toList t1 == toList t2 28 | 29 | empty :: TST l a 30 | empty = Null 31 | 32 | singleton :: [c] -> a -> TST c a 33 | singleton [] v = End v Null 34 | singleton (c : s) v = Branch c Null (singleton s v) Null 35 | 36 | toList :: Ord c => TST c a -> [([c], a)] 37 | toList = prefix [] 38 | 39 | fromList :: Ord c => [([c], a)] -> TST c a 40 | fromList = foldr (uncurry insert) empty 41 | 42 | insert :: Ord c => [c] -> a -> TST c a -> TST c a 43 | insert [] v Null = End v Null 44 | insert [] v (End _ t) = End v t 45 | insert [] v (Branch c l m r) = Branch c (insert [] v l) m r 46 | insert s v Null = singleton s v 47 | insert s v1 (End v2 t) = End v2 (insert s v1 t) 48 | insert (c1 : s) v (Branch c2 l m r) = 49 | case compare c1 c2 of 50 | LT -> Branch c2 (insert (c1 : s) v l) m r 51 | EQ -> Branch c2 l (insert s v m) r 52 | GT -> Branch c2 l m (insert (c1 : s) v r) 53 | 54 | prefix :: Ord c => [c] -> TST c a -> [([c], a)] 55 | prefix _ Null = [] 56 | prefix [] (End v t) = ([], v) : prefix [] t 57 | prefix [] (Branch c l m r) = 58 | prefix [] l ++ map (first (c :)) (prefix [] m) ++ prefix [] r 59 | prefix s (End _ t) = prefix s t 60 | prefix (c1 : s) (Branch c2 l m r) = 61 | case compare c1 c2 of 62 | LT -> prefix (c1 : s) l 63 | EQ -> map (first (c1 :)) (prefix s m) 64 | GT -> prefix (c1 : s) r 65 | 66 | matchWL :: Ord c => WildList c -> TST c a -> [([c], a)] 67 | matchWL _ Null = [] 68 | matchWL [] (End v _) = [([], v)] 69 | matchWL [] (Branch _ l _ _) = matchWL [] l 70 | matchWL s (End _ t) = matchWL s t 71 | matchWL (w : s) (Branch c2 l m r) = 72 | let left = matchWL (w : s) l 73 | middle = map (first (c2 :)) (matchWL s m) 74 | right = matchWL (w : s) r 75 | in case w of 76 | Wildcard -> left ++ middle ++ right 77 | El c1 -> case compare c1 c2 of 78 | LT -> left 79 | EQ -> middle 80 | GT -> right 81 | 82 | lookup :: Ord c => [c] -> TST c a -> Maybe a 83 | lookup s t = 84 | case prefix s t of 85 | ((s', v):_) -> if s == s' then Just v else Nothing 86 | _ -> Nothing 87 | -------------------------------------------------------------------------------- /Suggest.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings, BangPatterns, TupleSections, PatternGuards #-} 2 | module Main where 3 | 4 | import Blaze.ByteString.Builder.Char.Utf8 (fromString) 5 | import Data.ByteString.UTF8 (toString) 6 | import Data.Char (toLower) 7 | import Data.IORef 8 | import Data.List (intercalate, sortBy) 9 | import Data.Ord (comparing) 10 | import Network.HTTP.Types (status200, status404) 11 | import Network.Wai 12 | import Network.Wai.Handler.Warp 13 | 14 | import Wildcard 15 | import TST 16 | 17 | import Prelude hiding (lookup) 18 | 19 | dictFile :: FilePath 20 | dictFile = "resources/frequency" 21 | 22 | searchPage :: FilePath 23 | searchPage = "resources/search.html" 24 | 25 | wordsLimit :: Int 26 | wordsLimit = 10 27 | 28 | jsonList :: [String] -> String 29 | jsonList ws = "[" ++ intercalate "," (map show ws) ++ "]" 30 | 31 | type Dictionary = TST Char Int 32 | type SuggestCache = TST Char [String] 33 | type CorrectorCache = TST Char String 34 | 35 | lookupSuggestCache :: Dictionary -> IORef SuggestCache -> String -> IO [String] 36 | lookupSuggestCache dict cache w = do 37 | wsm <- fmap (lookup w) (readIORef cache) 38 | case wsm of 39 | Just ws -> return ws 40 | Nothing -> do 41 | let ws = take wordsLimit . map fst . sortBy (comparing snd) . prefix w $ dict 42 | atomicModifyIORef cache ((, ()) . insert w ws) 43 | return ws 44 | 45 | suggest :: Dictionary -> IORef SuggestCache -> String -> IO Response 46 | suggest dict cache w = do 47 | ws <- lookupSuggestCache dict cache w 48 | return $ responseBuilder status200 [("Content-Type", "application/json")] 49 | (fromString . jsonList $ ws) 50 | 51 | lookupCorrectorCache :: Dictionary -> IORef CorrectorCache -> String -> IO String 52 | lookupCorrectorCache dict cache w = do 53 | wsm <- fmap (lookup w) (readIORef cache) 54 | case wsm of 55 | Just wm -> return wm 56 | Nothing -> do 57 | let wm | Just _ <- lookup w dict = w 58 | | (w' : _) <- edits1 = w' 59 | | (w' : _) <- edits2 = w' 60 | | otherwise = w 61 | atomicModifyIORef cache ((, ()) . insert w wm) 62 | return wm 63 | where 64 | process = map fst . sortBy (comparing snd) . concatMap (flip matchWL dict) 65 | edits1 = process . edits . wildList $ w 66 | edits2 = process . concatMap edits . edits . wildList $ w 67 | 68 | correct :: Dictionary -> IORef CorrectorCache -> [String] -> IO Response 69 | correct dict cache ws = do 70 | wm <- mapM (lookupCorrectorCache dict cache) $ ws 71 | return $ responseBuilder status200 [("Content-Type", "application/json")] 72 | (fromString . unwords $ wm) 73 | 74 | search :: Response 75 | search = responseFile status200 [("Content-Type", "text/html")] searchPage Nothing 76 | 77 | e404 :: Response 78 | e404 = responseBuilder status404 [("Content-Type", "text/html")] (fromString "404") 79 | 80 | app :: Dictionary -> IORef SuggestCache -> IORef CorrectorCache -> Application 81 | app dict scache ccache req respond = case rawPathInfo req of 82 | "/suggest.json" -> case queryString req of 83 | [("q", (Just w))] -> respond =<< suggest dict scache (toString w) 84 | _ -> respond e404 85 | "/correct.json" -> case queryString req of 86 | [("q", (Just w))] -> respond =<< correct dict ccache (words $ toString w) 87 | _ -> respond e404 88 | "/" -> respond search 89 | _ -> respond e404 90 | 91 | main :: IO () 92 | main = do 93 | !dict <- fmap (fromList . flip zip [1..] . lines . map toLower) $ readFile dictFile 94 | scache <- newIORef empty 95 | ccache <- newIORef empty 96 | putStrLn "Server ready on port 3000" 97 | run 3000 (app dict scache ccache) 98 | -------------------------------------------------------------------------------- /resources/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Foogle 6 | 7 | 8 | 50 | 51 | 137 | 138 | 139 | 140 |

Foogle

141 |
142 | Maybe you meant “” 143 |
144 | 148 | 149 | 150 | --------------------------------------------------------------------------------