9 | cabal-version: >= 1.6
10 | build-type: Simple
11 |
12 | source-repository head
13 | type: git
14 | location: git://github.com/bitonic/suggest
15 | branch: master
16 |
17 | executable suggest
18 | main-is: Suggest.hs
19 |
20 | build-depends: base >= 4 && < 5
21 | , warp
22 | , wai
23 | , conduit
24 | , blaze-builder
25 | , http-types
26 | , utf8-string
27 | , bytestring
28 | , mtl
29 |
30 | GHC-options: -Wall -O2 -fno-warn-unused-do-bind -threaded
31 |
--------------------------------------------------------------------------------
/Wildcard.hs:
--------------------------------------------------------------------------------
1 | module Wildcard where
2 |
3 | data Wildcard a = Wildcard
4 | | El a
5 | deriving (Eq, Ord)
6 |
7 |
8 | type WildList a = [Wildcard a]
9 |
10 | instance Show a => Show (Wildcard a) where
11 | show Wildcard = "*"
12 | show (El c) = show c
13 |
14 | wildList :: [a] -> WildList a
15 | wildList = map El
16 |
17 | wildString :: String -> WildList Char
18 | wildString = map (\c -> if c == '*' then Wildcard else El c)
19 |
20 | matches :: Eq a => WildList a -> [a] -> Bool
21 | matches [] [] = True
22 | matches [] _ = False
23 | matches _ [] = False
24 | matches (Wildcard : w) (_ : s) = matches w s
25 | matches (El c1 : w) (c2 : s) | c1 == c2 = matches w s
26 | | otherwise = False
27 |
28 | edits :: WildList a -> [WildList a]
29 | edits s' = concatMap init [ deleted s'
30 | , transposes s'
31 | , replaces s'
32 | , inserts s'
33 | ]
34 | where
35 | deleted [] = [[]]
36 | deleted (c : s) = s : map (c :) (deleted s)
37 |
38 | transposes [] = [[]]
39 | transposes [x] = [[x]]
40 | transposes (x : y : s) = (y : x : s) : map (x :) (transposes (y : s))
41 |
42 | replaces [] = [[]]
43 | replaces (c : s) = (Wildcard : s) : map (c :) (replaces s)
44 |
45 | inserts [] = [[]]
46 | inserts (c : s) = (Wildcard : c : s) : map (c :) (inserts s)
47 |
48 |
--------------------------------------------------------------------------------
/TST.hs:
--------------------------------------------------------------------------------
1 | module TST
2 | ( TST
3 | , empty
4 | , singleton
5 | , toList
6 | , fromList
7 | , insert
8 | , prefix
9 | , matchWL
10 | , lookup
11 | ) where
12 |
13 | import Control.Arrow (first)
14 |
15 | import Wildcard
16 |
17 | import Prelude hiding (lookup)
18 |
19 | data TST c a = Branch c (TST c a) (TST c a) (TST c a)
20 | | End a (TST c a)
21 | | Null
22 |
23 | instance (Ord c, Show c, Show a) => Show (TST c a) where
24 | show = ("fromList " ++) . show . toList
25 |
26 | instance (Ord c, Eq c, Eq a) => Eq (TST c a) where
27 | t1 == t2 = toList t1 == toList t2
28 |
29 | empty :: TST l a
30 | empty = Null
31 |
32 | singleton :: [c] -> a -> TST c a
33 | singleton [] v = End v Null
34 | singleton (c : s) v = Branch c Null (singleton s v) Null
35 |
36 | toList :: Ord c => TST c a -> [([c], a)]
37 | toList = prefix []
38 |
39 | fromList :: Ord c => [([c], a)] -> TST c a
40 | fromList = foldr (uncurry insert) empty
41 |
42 | insert :: Ord c => [c] -> a -> TST c a -> TST c a
43 | insert [] v Null = End v Null
44 | insert [] v (End _ t) = End v t
45 | insert [] v (Branch c l m r) = Branch c (insert [] v l) m r
46 | insert s v Null = singleton s v
47 | insert s v1 (End v2 t) = End v2 (insert s v1 t)
48 | insert (c1 : s) v (Branch c2 l m r) =
49 | case compare c1 c2 of
50 | LT -> Branch c2 (insert (c1 : s) v l) m r
51 | EQ -> Branch c2 l (insert s v m) r
52 | GT -> Branch c2 l m (insert (c1 : s) v r)
53 |
54 | prefix :: Ord c => [c] -> TST c a -> [([c], a)]
55 | prefix _ Null = []
56 | prefix [] (End v t) = ([], v) : prefix [] t
57 | prefix [] (Branch c l m r) =
58 | prefix [] l ++ map (first (c :)) (prefix [] m) ++ prefix [] r
59 | prefix s (End _ t) = prefix s t
60 | prefix (c1 : s) (Branch c2 l m r) =
61 | case compare c1 c2 of
62 | LT -> prefix (c1 : s) l
63 | EQ -> map (first (c1 :)) (prefix s m)
64 | GT -> prefix (c1 : s) r
65 |
66 | matchWL :: Ord c => WildList c -> TST c a -> [([c], a)]
67 | matchWL _ Null = []
68 | matchWL [] (End v _) = [([], v)]
69 | matchWL [] (Branch _ l _ _) = matchWL [] l
70 | matchWL s (End _ t) = matchWL s t
71 | matchWL (w : s) (Branch c2 l m r) =
72 | let left = matchWL (w : s) l
73 | middle = map (first (c2 :)) (matchWL s m)
74 | right = matchWL (w : s) r
75 | in case w of
76 | Wildcard -> left ++ middle ++ right
77 | El c1 -> case compare c1 c2 of
78 | LT -> left
79 | EQ -> middle
80 | GT -> right
81 |
82 | lookup :: Ord c => [c] -> TST c a -> Maybe a
83 | lookup s t =
84 | case prefix s t of
85 | ((s', v):_) -> if s == s' then Just v else Nothing
86 | _ -> Nothing
87 |
--------------------------------------------------------------------------------
/Suggest.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE OverloadedStrings, BangPatterns, TupleSections, PatternGuards #-}
2 | module Main where
3 |
4 | import Blaze.ByteString.Builder.Char.Utf8 (fromString)
5 | import Data.ByteString.UTF8 (toString)
6 | import Data.Char (toLower)
7 | import Data.IORef
8 | import Data.List (intercalate, sortBy)
9 | import Data.Ord (comparing)
10 | import Network.HTTP.Types (status200, status404)
11 | import Network.Wai
12 | import Network.Wai.Handler.Warp
13 |
14 | import Wildcard
15 | import TST
16 |
17 | import Prelude hiding (lookup)
18 |
19 | dictFile :: FilePath
20 | dictFile = "resources/frequency"
21 |
22 | searchPage :: FilePath
23 | searchPage = "resources/search.html"
24 |
25 | wordsLimit :: Int
26 | wordsLimit = 10
27 |
28 | jsonList :: [String] -> String
29 | jsonList ws = "[" ++ intercalate "," (map show ws) ++ "]"
30 |
31 | type Dictionary = TST Char Int
32 | type SuggestCache = TST Char [String]
33 | type CorrectorCache = TST Char String
34 |
35 | lookupSuggestCache :: Dictionary -> IORef SuggestCache -> String -> IO [String]
36 | lookupSuggestCache dict cache w = do
37 | wsm <- fmap (lookup w) (readIORef cache)
38 | case wsm of
39 | Just ws -> return ws
40 | Nothing -> do
41 | let ws = take wordsLimit . map fst . sortBy (comparing snd) . prefix w $ dict
42 | atomicModifyIORef cache ((, ()) . insert w ws)
43 | return ws
44 |
45 | suggest :: Dictionary -> IORef SuggestCache -> String -> IO Response
46 | suggest dict cache w = do
47 | ws <- lookupSuggestCache dict cache w
48 | return $ responseBuilder status200 [("Content-Type", "application/json")]
49 | (fromString . jsonList $ ws)
50 |
51 | lookupCorrectorCache :: Dictionary -> IORef CorrectorCache -> String -> IO String
52 | lookupCorrectorCache dict cache w = do
53 | wsm <- fmap (lookup w) (readIORef cache)
54 | case wsm of
55 | Just wm -> return wm
56 | Nothing -> do
57 | let wm | Just _ <- lookup w dict = w
58 | | (w' : _) <- edits1 = w'
59 | | (w' : _) <- edits2 = w'
60 | | otherwise = w
61 | atomicModifyIORef cache ((, ()) . insert w wm)
62 | return wm
63 | where
64 | process = map fst . sortBy (comparing snd) . concatMap (flip matchWL dict)
65 | edits1 = process . edits . wildList $ w
66 | edits2 = process . concatMap edits . edits . wildList $ w
67 |
68 | correct :: Dictionary -> IORef CorrectorCache -> [String] -> IO Response
69 | correct dict cache ws = do
70 | wm <- mapM (lookupCorrectorCache dict cache) $ ws
71 | return $ responseBuilder status200 [("Content-Type", "application/json")]
72 | (fromString . unwords $ wm)
73 |
74 | search :: Response
75 | search = responseFile status200 [("Content-Type", "text/html")] searchPage Nothing
76 |
77 | e404 :: Response
78 | e404 = responseBuilder status404 [("Content-Type", "text/html")] (fromString "404")
79 |
80 | app :: Dictionary -> IORef SuggestCache -> IORef CorrectorCache -> Application
81 | app dict scache ccache req respond = case rawPathInfo req of
82 | "/suggest.json" -> case queryString req of
83 | [("q", (Just w))] -> respond =<< suggest dict scache (toString w)
84 | _ -> respond e404
85 | "/correct.json" -> case queryString req of
86 | [("q", (Just w))] -> respond =<< correct dict ccache (words $ toString w)
87 | _ -> respond e404
88 | "/" -> respond search
89 | _ -> respond e404
90 |
91 | main :: IO ()
92 | main = do
93 | !dict <- fmap (fromList . flip zip [1..] . lines . map toLower) $ readFile dictFile
94 | scache <- newIORef empty
95 | ccache <- newIORef empty
96 | putStrLn "Server ready on port 3000"
97 | run 3000 (app dict scache ccache)
98 |
--------------------------------------------------------------------------------
/resources/search.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Foogle
6 |
7 |
8 |
50 |
51 |
137 |
138 |
139 |
140 | Foogle
141 |
142 | Maybe you meant “”
143 |
144 |
148 |
149 |
150 |
--------------------------------------------------------------------------------