├── Examples
    ├── align.gr
    ├── chess.gr
    ├── cross.gr
    ├── diam.gr
    ├── digits.gr
    ├── nether.gr
    ├── noq.gr
    ├── prelude.gr
    ├── square.gr
    ├── whole-chess.gr
    └── word.gr
├── Expression.hs
├── LICENSE
├── Matcher.hs
├── Parser.hs
├── PrattParser.hs
├── README.md
├── grime.hs
├── rungrime
└── tutorial.md


/Examples/align.gr:
--------------------------------------------------------------------------------
1 | | Align two #-characters horizontally or vertically.
2 | \#.*\#|\#/./*/\#


--------------------------------------------------------------------------------
/Examples/chess.gr:
--------------------------------------------------------------------------------
1 | | Match a chessboard of _# of size at least 3x3
2 | (\#\#|\#/\#|\_\_|\_/\_)#!&[#_]{3-}


--------------------------------------------------------------------------------
/Examples/cross.gr:
--------------------------------------------------------------------------------
1 | | Match a "cross" of #-chars.
2 | E=\.+/+
3 | F=\#+/+
4 | EFE/F/EFE&E/F/E F E/F/E


--------------------------------------------------------------------------------
/Examples/diam.gr:
--------------------------------------------------------------------------------
 1 | | Match a diamond shape, like this:
 2 | |  X
 3 | | / \
 4 | |X   X
 5 | | \ /
 6 | |  X
 7 | C=./+
 8 | T=\X|CTC/\/.+\\
 9 | B=\X|\\.+\//CBC
10 | CTC/\X.+\X/CBC


--------------------------------------------------------------------------------
/Examples/digits.gr:
--------------------------------------------------------------------------------
1 | | Match a 2x2 or larger rectangle of digits.
2 | d{2-}


--------------------------------------------------------------------------------
/Examples/nether.gr:
--------------------------------------------------------------------------------
1 | | Match a rectangular "nether portal".
2 | .\X+./\X/+\.{2-22,3-22}\X/+/.\X+.


--------------------------------------------------------------------------------
/Examples/noq.gr:
--------------------------------------------------------------------------------
1 | | Match a 4x4 rectangle without q or Q.
2 | [,qQ]{4}


--------------------------------------------------------------------------------
/Examples/prelude.gr:
--------------------------------------------------------------------------------
1 | | Verify Prelude syntax: every column has at most one ( or ),
2 | | and they are balanced if their y-coords are ignored.
3 | A=[,()]/*
4 | P=A*|P(A/\(/A)P(A/\)/A)P
5 | e`P
6 | 


--------------------------------------------------------------------------------
/Examples/square.gr:
--------------------------------------------------------------------------------
1 | | Verify that input is a square.
2 | S=.|S./+/.+
3 | e`S
4 | 


--------------------------------------------------------------------------------
/Examples/whole-chess.gr:
--------------------------------------------------------------------------------
1 | | Verify that input is a chessboard of #_.
2 | e`(\#\#|\#/\#|\_\_|\_/\_)#!&[#_]+/+
3 | 


--------------------------------------------------------------------------------
/Examples/word.gr:
--------------------------------------------------------------------------------
1 | | Find the word GOLF in any orientation.
2 | G=\G
3 | O=\O
4 | L=\L
5 | F=\F
6 | GOLF|FLOG|G/O/L/F|F/L/O/G|G.../.O../..L./...F|...G/..O./.L../F...|F.../.L../..O./...G|...F/..L./.O../G...


--------------------------------------------------------------------------------
/Expression.hs:
--------------------------------------------------------------------------------
  1 | module Expression where
  2 | 
  3 | import Data.Set (Set, toAscList)
  4 | import Data.Map (Map)
  5 | import Data.Monoid (Monoid(..), (<>))
  6 | 
  7 | -- The label of a variable
  8 | type Label = Maybe Char
  9 | 
 10 | -- Context anchor label; begins from 0
 11 | type AnchorLabel = Int
 12 | 
 13 | type Coord = (Int, Int) -- x, y
 14 | type Size = (Int, Int) -- w, h
 15 | type Rect = (Int, Int, Int, Int) -- x, y, w, h
 16 | type Range = (Int, Maybe Int)
 17 | 
 18 | -- Dihedral group D4 of orientations
 19 | -- Rot n is rotation by n*90 degrees
 20 | -- RefRot n is Rot n, then reflection by vertical line
 21 | data D4 = Rot Int | RefRot Int
 22 |   deriving (Eq, Ord)
 23 | 
 24 | instance Show D4 where
 25 |   show (Rot 0) = ""
 26 |   show (Rot n) = "o" ++ show n
 27 |   show (RefRot n) = "o" ++ show (n + 4)
 28 | 
 29 | axisPreserving :: D4 -> Bool
 30 | axisPreserving (Rot 0) = True
 31 | axisPreserving (Rot 2) = True
 32 | axisPreserving (RefRot 0) = True
 33 | axisPreserving (RefRot 2) = True
 34 | axisPreserving _ = False
 35 | 
 36 | instance Monoid D4 where
 37 |   mempty = Rot 0
 38 |   mappend (Rot m) (Rot n) = Rot $ (m + n) `mod` 4
 39 |   mappend (RefRot m) (Rot n) = RefRot $ (m + n) `mod` 4
 40 |   mappend (Rot m) (RefRot n) = RefRot $ (n - m) `mod` 4
 41 |   mappend (RefRot m) (RefRot n) = Rot $ (n - m) `mod` 4
 42 | 
 43 | -- An expression that may or may not match a rectangle of characters
 44 | data Expr = Border                   -- Matches the rectangle border symbol
 45 |           | Edge                     -- Matches an edge segment
 46 |           | AnyRect                  -- Mathces any rectangle
 47 |           | AnyChar                  -- Matches any single character (not border)
 48 |           | SomeChar Bool
 49 |             (Set (Maybe Char))       -- Matches if flag XOR char in set; Nothing matches border
 50 |           | Var D4 Label             -- Matches the given oriented variable
 51 |           | Expr :> Expr             -- Horizontal concatenation
 52 |           | Expr :^ Expr             -- Vertical concatenation
 53 |           | Expr :| Expr             -- Disjunction
 54 |           | Expr :& Expr             -- Conjunction
 55 |           | Expr :~ Expr             -- Exclusive OR
 56 |           | Not Expr                 -- Negation
 57 |           | Sized Range Range Expr   -- Size range
 58 |           | Grid Range Range Expr    -- Grid of repetitions
 59 |           | Count Range Expr         -- Count number of matches
 60 |           | InContext Expr           -- Context brackets
 61 |           | Anchor AnchorLabel       -- Context anchor
 62 |           | Fixed Expr               -- Fixed orientation
 63 |           deriving (Eq)
 64 | 
 65 | instance Show Expr where
 66 |   show Border = "b"
 67 |   show Edge = "e"
 68 |   show AnyRect = "$"
 69 |   show AnyChar = "."
 70 |   show (SomeChar isPos charSet) =
 71 |     if isPos
 72 |     then "[p:" ++ concatMap (maybe "\\b" $ \c -> if c == '\\' then "\\\\" else [c]) (toAscList charSet) ++ "]"
 73 |     else "[n:" ++ concatMap (maybe "\\b" $ \c -> if c == '\\' then "\\\\" else [c]) (toAscList charSet) ++ "]"
 74 |   show (Var rot Nothing) = "_" ++ show rot
 75 |   show (Var rot (Just a)) = [a] ++ show rot
 76 |   show (e1 :> e2) = show e1 ++ show e2
 77 |   show (e1 :^ e2) = "(" ++ show e1 ++ "/" ++ show e2 ++ ")"
 78 |   show (e1 :| e2) = "(" ++ show e1 ++ "|" ++ show e2 ++ ")"
 79 |   show (e1 :& e2) = "(" ++ show e1 ++ "&" ++ show e2 ++ ")"
 80 |   show (e1 :~ e2) = "(" ++ show e1 ++ "~" ++ show e2 ++ ")"
 81 |   show (Not e) = "(" ++ show e ++ ")!"
 82 |   show (Sized (x1,x2) (y1,y2) e) =
 83 |     "(" ++ show e ++ "){" ++ show x1 ++ "-" ++ sx2 ++ "," ++ show y1 ++ "-" ++ sy2 ++ "}"
 84 |     where sx2 = case x2 of Nothing -> ""; Just x -> show x
 85 |           sy2 = case y2 of Nothing -> ""; Just y -> show y
 86 |   show (Grid (x1,x2) (y1,y2) e) =
 87 |     "(" ++ show e ++ "):" ++ show x1 ++ "-" ++ sx2 ++ "," ++ show y1 ++ "-" ++ sy2 ++ "}"
 88 |     where sx2 = case x2 of Nothing -> ""; Just x -> show x
 89 |           sy2 = case y2 of Nothing -> ""; Just y -> show y
 90 |   show (Count (low, high) e) =
 91 |     "(" ++ show e ++ ")#" ++ show low ++ "-" ++ showHigh ++ "}"
 92 |     where showHigh = case high of Nothing -> ""; Just n -> show n
 93 |   show (InContext e) = "<" ++ show e ++ ">"
 94 |   show (Anchor n) = show n
 95 |   show (Fixed e) = "(" ++ show e ++ ")oF"
 96 | 
 97 | -- Rotate and/or reflect an expression
 98 | orient :: Expr -> D4 -> Expr
 99 | orient Border _ = Border
100 | orient Edge _ = Edge
101 | orient AnyRect _ = AnyRect
102 | orient AnyChar _ = AnyChar
103 | orient e@(SomeChar _ _) _ = e
104 | orient (Var rot1 label) rot2 = Var (rot2 <> rot1) label
105 | orient e@(e1 :> e2) rot = case rot of
106 |   Rot 0 -> e
107 |   Rot 1 -> orient e2 rot :^ orient e1 rot
108 |   Rot 2 -> orient e2 rot :> orient e1 rot
109 |   Rot 3 -> orient e1 rot :^ orient e2 rot
110 |   RefRot 0 -> orient e2 rot :> orient e1 rot
111 |   RefRot 1 -> orient e2 rot :^ orient e1 rot
112 |   RefRot 2 -> orient e1 rot :> orient e2 rot
113 |   RefRot 3 -> orient e1 rot :^ orient e2 rot
114 | orient e@(e1 :^ e2) rot = case rot of
115 |   Rot 0 -> e
116 |   Rot 1 -> orient e1 rot :> orient e2 rot
117 |   Rot 2 -> orient e2 rot :^ orient e1 rot
118 |   Rot 3 -> orient e2 rot :> orient e1 rot
119 |   RefRot 0 -> orient e1 rot :^ orient e2 rot
120 |   RefRot 1 -> orient e2 rot :> orient e1 rot
121 |   RefRot 2 -> orient e2 rot :^ orient e1 rot
122 |   RefRot 3 -> orient e1 rot :> orient e2 rot
123 | orient (e1 :| e2) rot = orient e1 rot :| orient e2 rot
124 | orient (e1 :& e2) rot = orient e1 rot :& orient e2 rot
125 | orient (e1 :~ e2) rot = orient e1 rot :~ orient e2 rot
126 | orient (Not e) rot = Not $ orient e rot
127 | orient (Sized (x1,x2) (y1,y2) e) rot =
128 |   if axisPreserving rot
129 |   then Sized (x1,x2) (y1,y2) $ orient e rot
130 |   else Sized (y1,y2) (x1,x2) $ orient e rot
131 | orient (Grid (x1,x2) (y1,y2) e) rot =
132 |   if axisPreserving rot
133 |   then Grid (x1,x2) (y1,y2) $ orient e rot
134 |   else Grid (y1,y2) (x1,x2) $ orient e rot
135 | orient (Count range e) rot = Count range $ orient e rot
136 | orient (InContext e) rot = InContext $ orient e rot
137 | orient e@(Anchor _) _ = e
138 | orient e@(Fixed _) _ = e
139 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Ilkka Törmä
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/Matcher.hs:
--------------------------------------------------------------------------------
  1 | module Matcher where
  2 | 
  3 | import Expression
  4 | import Prelude hiding (lookup)
  5 | import Data.Map.Strict (Map, empty, lookup, insert)
  6 | import qualified Data.Map.Strict as Map (filter, size)
  7 | import Data.Set (member)
  8 | import Data.Monoid (Any(Any), getAny, mempty)
  9 | import Data.Maybe (isNothing)
 10 | import qualified Data.List.Ordered as Asc (member, isect, union, unionAll, nub)
 11 | import Control.Applicative ((<$>), liftA2)
 12 | import Control.Monad.Trans.Class (lift)
 13 | import Control.Monad.Writer.Lazy (WriterT, tell, listens, runWriterT)
 14 | import Control.Monad.Reader (ReaderT, asks, reader, local, runReaderT)
 15 | import Control.Monad.State.Lazy (State, gets, modify, evalState)
 16 | 
 17 | -- A fuzzy match
 18 | data Match = NoMatch
 19 |            | Unknown
 20 |            | Match
 21 |            deriving (Eq,Ord,Show)
 22 | 
 23 | invert :: Match -> Match
 24 | invert Match = NoMatch
 25 | invert NoMatch = Match
 26 | invert Unknown = Unknown
 27 | 
 28 | -- Short-circuiting monadic utility functions for fuzzy matches
 29 | (&?) :: (Monad m) => m Match -> m Match -> m Match
 30 | f &? g = do
 31 |   a <- f
 32 |   case a of
 33 |    NoMatch -> return NoMatch
 34 |    _ -> do
 35 |      b <- g
 36 |      return $ case (a,b) of
 37 |       (_, NoMatch) -> NoMatch
 38 |       (Match, Match) -> Match
 39 |       _ -> Unknown
 40 | 
 41 | (|?) :: (Monad m) => m Match -> m Match -> m Match
 42 | f |? g = do
 43 |   a <- f
 44 |   case a of
 45 |    Match -> return Match
 46 |    _ -> do
 47 |      b <- g
 48 |      return $ case (a,b) of
 49 |       (_, Match) -> Match
 50 |       (NoMatch, NoMatch) -> NoMatch
 51 |       _ -> Unknown
 52 | 
 53 | anyMatch :: (Monad m) => [a] -> (a -> m Match) -> m Match
 54 | anyMatch xs f = foldr (|?) (return NoMatch) $ f <$> xs
 55 | 
 56 | allMatch :: (Monad m) => [a] -> (a -> m Match) -> m Match
 57 | allMatch xs f = foldr (&?) (return Match) $ f <$> xs
 58 | 
 59 | -- Keep those that match, signal if any was unknown
 60 | filterMatch :: (Monad m) => [a] -> (a -> m Match) -> m ([a], Bool)
 61 | filterMatch [] _ = return ([], False)
 62 | filterMatch (x : xs) p = do
 63 |   res <- p x
 64 |   (found, unk) <- filterMatch xs p
 65 |   return $ case res of
 66 |     Match   -> (x:found, unk)
 67 |     Unknown -> (found, True)
 68 |     NoMatch -> (found, unk)
 69 | 
 70 | -- Sum of two strictly ordered lists
 71 | ascSum :: [Int] -> [Int] -> [Int]
 72 | ascSum xs = go
 73 |   where go (y:ys) = Asc.nub . Asc.union [y + x | x <- xs] $ go ys
 74 |         go [] = []
 75 | 
 76 | -- A memoization of matches
 77 | type Classification = Map (Rect, D4, Label) Match
 78 | 
 79 | -- A changing context for matching in a matrix
 80 | data Context = Context {size :: Size,
 81 |                         matrix :: Map Coord Char,
 82 |                         clauses :: Map Label Expr,
 83 |                         hasBorders :: Bool,
 84 |                         anchors :: [Rect],
 85 |                         depth :: Int}
 86 |             deriving (Show)
 87 | 
 88 | -- A monad for performing matching in a matrix
 89 | -- The String is for logging, and the Any for keeping track of new definite matches or non-matches
 90 | type Matcher a = WriterT (String, Any) (ReaderT Context (State Classification)) a
 91 | 
 92 | -- Modify context anchors, increase logging depth
 93 | withAnchors :: ([Rect] -> [Rect]) -> Matcher a -> Matcher a
 94 | withAnchors f = local $ \con -> con{anchors = f $ anchors con, depth = depth con + 1}
 95 | 
 96 | -- Helper function for logging
 97 | logMsg :: String -> Matcher ()
 98 | logMsg message = do
 99 |   level <- asks depth
100 |   tell (replicate level ' ' ++ message, mempty)
101 | 
102 | -- Possible widths and heights of matching rectangles
103 | -- May give false positives, but never false negatives
104 | sizes :: Expr -> Matcher ([Int], [Int])
105 | sizes expr = do
106 |   numVars <- asks $ Map.size . clauses
107 |   go numVars expr
108 |   where
109 |     go :: Int -> Expr -> Matcher ([Int], [Int])
110 |     go _ Border = return ([1], [1])
111 |     go _ Edge = do
112 |       (maxX, maxY) <- asks size
113 |       return ([0..maxX+2], [0..maxY+2])
114 |     go _ AnyRect = do
115 |       (maxX, maxY) <- asks size
116 |       return ([0..maxX+2], [0..maxY+2])
117 |     go _ AnyChar = return ([1], [1])
118 |     go _ (SomeChar _ _) = return ([1], [1])
119 |     go 0 (Var _ _) = do
120 |       (maxX, maxY) <- asks size
121 |       return ([0..maxX+2], [0..maxY+2])
122 |     go n (Var rot label) = do
123 |       Just expr <- asks $ lookup label . clauses
124 |       go (n-1) $ orient expr rot
125 |     go n (e1 :> e2) = do
126 |       (ws1, hs1) <- go n e1
127 |       (ws2, hs2) <- go n e2
128 |       return (ascSum ws1 ws2, Asc.isect hs1 hs2)
129 |     go n (e1 :^ e2) = do
130 |       (ws1, hs1) <- go n e1
131 |       (ws2, hs2) <- go n e2
132 |       return (Asc.isect ws1 ws2, ascSum hs1 hs2)
133 |     go n (e1 :| e2) = do
134 |       (ws1, hs1) <- go n e1
135 |       (ws2, hs2) <- go n e2
136 |       return (Asc.nub $ Asc.union ws1 ws2, Asc.nub $ Asc.union hs1 hs2)
137 |     go n (e1 :& e2) = do
138 |       (ws1, hs1) <- go n e1
139 |       (ws2, hs2) <- go n e2
140 |       return (Asc.isect ws1 ws2, Asc.isect hs1 hs2)
141 |     go n (e1 :~ e2) = do
142 |       (ws1, hs1) <- go n e1
143 |       (ws2, hs2) <- go n e2
144 |       return (Asc.nub $ Asc.union ws1 ws2, Asc.nub $ Asc.union hs1 hs2)
145 |     go _ (Not _) = do
146 |       (maxX, maxY) <- asks size
147 |       return ([0..maxX+2], [0..maxY+2])
148 |     go n (Sized (x1,x2) (y1,y2) e) = do
149 |       (maxX, maxY) <- asks size
150 |       (ws, hs) <- case e of
151 |         Border -> return ([0..maxX+2], [0..maxY+2])
152 |         AnyChar -> return ([0..maxX+2], [0..maxY+2])
153 |         SomeChar _ _ -> return ([0..maxX+2], [0..maxY+2])
154 |         _ -> go n e
155 |       let wRange = case x2 of Just high -> [x1..high]
156 |                               Nothing -> [x1..]
157 |           hRange = case y2 of Just high -> [y1..high]
158 |                               Nothing -> [y1..]
159 |       return (Asc.isect ws wRange, Asc.isect hs hRange)
160 |     go _ (Grid (x1,x2) (y1,y2) e) = do
161 |       (maxX, maxY) <- asks size
162 |       return ([0..maxX+2], [0..maxY+2]) -- TODO: implement
163 |     go _ (Count (low, high) e) = do
164 |       (maxX, maxY) <- asks size
165 |       return ([0..maxX+2], [0..maxY+2])
166 |     go _ (InContext e) = do
167 |       (maxX, maxY) <- asks size
168 |       return ([0..maxX+2], [0..maxY+2])
169 |     go _ (Anchor n) = do
170 |       anchors <- asks anchors
171 |       (maxX, maxY) <- asks size
172 |       return $ if length anchors > n
173 |                then let (_, _, w, h) = anchors !! n in ([w], [h])
174 |                else ([0..maxX+2], [0..maxY+2])
175 |     go n (Fixed e) = go n e
176 | 
177 | -- Does the pattern match? Update all sub-rectangles as needed
178 | matches :: Expr -> Rect -> Matcher Match
179 | 
180 | matches Border (x, y, 1, 1) = do
181 |   ch <- lift $ asks $ lookup (x, y) . matrix
182 |   return $ case ch of
183 |     Nothing -> Match
184 |     Just _ -> NoMatch
185 | matches Border _ = return NoMatch
186 | 
187 | matches Edge (x, y, w, h) = do
188 |   (maxX, maxY) <- asks size
189 |   return $ if (x == 0 || x == maxX) && w == 0 && y >= 0 && y+h <= maxY ||
190 |               (y == 0 || y == maxY) && h == 0 && x >= 0 && x+w <= maxX
191 |            then Match
192 |            else NoMatch
193 | 
194 | matches AnyRect _ = return Match
195 | 
196 | matches AnyChar (x, y, 1, 1) = do
197 |   ch <- lift $ asks $ lookup (x, y) . matrix
198 |   return $ case ch of
199 |     Nothing -> NoMatch
200 |     Just _ -> Match
201 | matches AnyChar _ = return NoMatch
202 | 
203 | matches (SomeChar isPos cs) (x, y, 1, 1) = do
204 |   ch <- lift $ asks $ lookup (x, y) . matrix
205 |   return $ if (ch `member` cs) == isPos then Match else NoMatch
206 | matches (SomeChar _ _) _ = return NoMatch
207 | 
208 | matches (Var rot label) rect = do
209 |   memoed <- gets $ lookup (rect, rot, label)
210 |   case memoed of
211 |     Just b -> do
212 |       logMsg $ "Lookup   " ++ show rot ++ show label ++ " at " ++ show rect ++ ": " ++ show b ++ "\n"
213 |       return b
214 |     Nothing -> do
215 |       modify $ insert (rect, rot, label) Unknown
216 |       logMsg $ "Checking " ++ show rot ++ show label ++ " at " ++ show rect ++ "\n"
217 |       Just expr <- lift $ asks $ lookup label . clauses
218 |       match <- withAnchors (const []) $ matches (orient expr rot) rect
219 |       logMsg $ "Checked  " ++ show rot ++ show label ++ " at " ++ show rect ++ ": " ++ show match ++ "\n"
220 |       tell (mempty, Any $ match /= Unknown)
221 |       modify $ insert (rect, rot, label) match
222 |       return match
223 | 
224 | matches (lExp :> rExp) (x, y, w, h) = do
225 |   (wsl, hsl) <- sizes lExp
226 |   (wsr, hsr) <- sizes rExp
227 |   let cuts = Asc.isect wsl . reverse . map (w -) $ takeWhile (<= w) wsr
228 |   if h `Asc.member` Asc.isect hsl hsr
229 |     then anyMatch cuts $ \i -> matches lExp (x, y, i, h) &? matches rExp (x+i, y, w-i, h)
230 |     else return NoMatch
231 | 
232 | matches (tExp :^ bExp) (x, y, w, h) = do
233 |   (wsl, hsl) <- sizes tExp
234 |   (wsr, hsr) <- sizes bExp
235 |   let cuts = Asc.isect hsl . reverse . map (h -) $ takeWhile (<= h) hsr
236 |   if w `Asc.member` Asc.isect wsl wsr
237 |     then anyMatch cuts $ \j -> matches tExp (x, y, w, j) &? matches bExp (x, y+j, w, h-j)
238 |     else return NoMatch
239 |           
240 | matches (exp1 :| exp2) rect = matches exp1 rect |? matches exp2 rect
241 | 
242 | matches (exp1 :& exp2) rect = matches exp1 rect &? matches exp2 rect
243 | 
244 | matches (exp1 :~ exp2) rect = do
245 |   first <- matches exp1 rect
246 |   second <- matches exp2 rect
247 |   return $ case (first, second) of
248 |     (Unknown, _) -> Unknown
249 |     (_, Unknown) -> Unknown
250 |     _            -> if first /= second then Match else NoMatch
251 | 
252 | matches (Not expr) rect = invert <$> matches expr rect
253 | 
254 | matches (Sized (x1, x2) (y1, y2) expr) r@(x, y, w, h) = do
255 |   let xOk = x1 <= w && case x2 of Nothing -> True; Just x3 -> w <= x3
256 |       yOk = y1 <= h && case y2 of Nothing -> True; Just y3 -> h <= y3
257 |   return (if xOk && yOk then Match else NoMatch) &? case expr of
258 |     Border -> allCells
259 |     AnyChar -> allCells
260 |     SomeChar _ _ -> allCells
261 |     _ -> matches expr r
262 |   where allCells = allMatch [(x+i, y+j, 1, 1) | i <- [0..w-1], j <- [0..h-1]] $
263 |                    matches expr
264 | 
265 | matches (Grid (0, _) _ _) (_, _, 0, _) = return Match
266 | matches (Grid _ (0, _) _) (_, _, _, 0) = return Match
267 | matches (Grid xr@(x1, x2) yr@(y1, y2) expr) r@(x, y, w, h) = do
268 |   (ws, hs) <- sizes expr
269 |   goWith ws hs
270 |   where goWith widths heights = go False False 0 0 [x] [y]
271 |           where
272 |             -- Arguments:
273 |             --  a) Have we placed double vertical lines
274 |             --  b) Have we placed double horizontal lines
275 |             --  c) How many vertical lines have we placed
276 |             --  d) How many horizontal lines have we placed
277 |             --  e) List of vertical cell borders (reversed)
278 |             --  f) List of horizontal cell borders (reversed)
279 |             go :: Bool -> Bool -> Int -> Int -> [Int] -> [Int] -> Matcher Match
280 |             go vOverlap hOverlap numV numH vs@(ver:vers) hs@(hor:hors)
281 |               -- More than allowed vertical lines
282 |               | Just n <- x2, numV > n = return NoMatch
283 |               -- More than allowed horizontal lines
284 |               | Just n <- y2, numH > n = return NoMatch
285 |               -- Reached lower right corner
286 |               | ver == x + w, hor == y + h, vOverlap || x1 <= numV, hOverlap || y1 <= numH = return Match
287 |               -- Incomplete grid => extend
288 |               | otherwise = do
289 |                   let vMin = case () of
290 |                         -- About to reach upper bound on vertical lines => go to end
291 |                         _ | x2 == Just (numV + 1) -> x+w
292 |                         -- No upper bound or already seen overlap => advance at least one cell
293 |                           | isNothing x2 || vOverlap -> ver + 1
294 |                         -- Otherwise may stay in place
295 |                           | otherwise -> ver
296 |                       -- Options for new vertical line
297 |                       vCuts = map (ver +) widths `Asc.isect` [vMin .. x+w]
298 |                       hMin = case () of
299 |                         _ | y2 == Just (numH + 1) -> y+h
300 |                           | isNothing y2 || hOverlap -> hor + 1
301 |                           | otherwise -> hor
302 |                       hCuts = map (hor +) heights `Asc.isect` [hMin .. y+h]
303 |                   -- Only consider vertical lines that produce matches on the right margin
304 |                   (vMargin, unkV) <- filterMatch vCuts $ \newVer ->
305 |                     allMatch [(ver, h1, newVer-ver, h2-h1) | (h1, h2) <- zip (tail hs) hs] $ matches expr
306 |                   -- Same for bottom margin
307 |                   (hMargin, unkH) <- filterMatch hCuts $ \newHor ->
308 |                     allMatch [(v1, hor, v2-v1, newHor-hor) | (v1, v2) <- zip (tail vs) vs] $ matches expr
309 |                   -- Same for corner rectangle
310 |                   (pairs, unkP) <- filterMatch [(newV, newH) | numV == numH, newV <- vMargin, newH <- hMargin] $ \(newV, newH) ->
311 |                     matches expr (ver, hor, newV-ver, newH-hor)
312 |                   -- Recurse for all possible choices of new lines
313 |                   found <- anyMatch ([(numV,   numH+1, vs,        newHor:hs) | numV <= numH, ver == x+w, newHor <- hMargin] ++
314 |                                      [(numV+1, numH,   newVer:vs, hs)        | numV >= numH, hor == y+h, newVer <- vMargin] ++
315 |                                      [(numV+1, numH+1, newVer:vs, newHor:hs) | (newVer, newHor) <- pairs]) $ \(newNumV, newNumH, newVers, newHors) ->
316 |                              go (vOverlap || overlap newVers) (hOverlap || overlap newHors) newNumV newNumH newVers newHors
317 |                   return $ if unkV || unkH || unkP then max Unknown found else found
318 |             overlap (a:b:c) = a == b
319 |             overlap _ = False
320 | 
321 | matches (Count (low, high) expr) (x, y, w, h) = do
322 |   (ws, hs) <- sizes expr
323 |   let allRects = [(x', y', w', h') |
324 |                   w' <- Asc.isect ws [0..w], h' <- Asc.isect hs [0..h],
325 |                   x' <- [x..x+w-w'], y' <- [y..y+h-h']]
326 |       total = length allRects
327 |   go 0 0 total allRects
328 |   where go :: Int -> Int -> Int -> [Rect] -> Matcher Match
329 |         go found unk left _ | found >= low, Nothing <- high = return Match
330 |                             | found >= low, Just n <- high, n >= found + left + unk = return Match
331 |                             | found + unk + left < low = return NoMatch
332 |                             | found + left < low = return Unknown
333 |                             | Just n <- high, found > n = return NoMatch
334 |         go found unk left [] = error "Unreachable state."
335 |         go found unk left (r:rs) = do
336 |           matchR <- matches expr r
337 |           case matchR of
338 |             Match -> go (found+1) unk (left-1) rs
339 |             NoMatch -> go found unk (left-1) rs
340 |             Unknown -> go found (unk+1) (left-1) rs
341 | 
342 | matches (InContext expr) r@(x, y, w, h) = do
343 |   (ws, hs) <- sizes expr
344 |   (maxX', maxY') <- asks size
345 |   addBorders <- asks hasBorders
346 |   let (minX, maxX, minY, maxY) = if addBorders
347 |                                  then (-1, maxX'+1, -1, maxY'+1)
348 |                                  else ( 0, maxX',    0, maxY')
349 |       surrounding = [(x', y', w', h') |
350 |                      w' <- Asc.isect ws [w .. maxX - minX],
351 |                      h' <- Asc.isect hs [h .. maxY - minY],
352 |                      x' <- [max minX $ x + w - w' .. min x $ maxX - w'],
353 |                      y' <- [max minY $ y + h - h' .. min y $ maxY - h']]
354 |   withAnchors (r:) $ anyMatch surrounding $ matches expr
355 | 
356 | matches (Anchor n) r = do
357 |   anchs <- asks anchors
358 |   return $ if anchs !! n == r then Match else NoMatch
359 | 
360 | matches (Fixed e) r = matches e r
361 | 
362 | -- Collect definite matches of Nothing for the given rectangles, possibly looping until no uncertainty remains.
363 | -- Also collect logs for debugging.
364 | matchAllEmpty :: Size -> Bool -> Map Coord Char -> Map Label Expr -> [Rect] -> ([Rect], String)
365 | matchAllEmpty size hasBorders matrix clauses rects =
366 |   flip evalState empty .
367 |   flip runReaderT context .
368 |   fmap (\(a,(b,_)) -> (a,b)) .
369 |   runWriterT $
370 |   go rects
371 |   where context = Context {size = size, matrix = matrix, hasBorders = hasBorders, clauses = clauses, anchors = [], depth = 0}
372 |         selfAndMatch rect = (,) rect <$> matches (Var (Rot 0) Nothing) rect
373 |         go :: [Rect] -> Matcher [Rect]
374 |         go currRects = do
375 |           logMsg "Matching...\n"
376 |           modify $ Map.filter (/= Unknown)
377 |           (currMatches, changed) <- listens (getAny . snd) $ mapM selfAndMatch currRects
378 |           logMsg $ "Matching complete, change = " ++ show changed ++ "\n"
379 |           if and [match /= Unknown | (_, match) <- currMatches] || not changed
380 |             then return $ [rect | (rect, Match) <- currMatches]
381 |             else do
382 |                  remainingMatches <- go [rect | (rect, Unknown) <- currMatches]
383 |                  return $ [rect | (rect, Match) <- currMatches] ++ remainingMatches
384 | 


--------------------------------------------------------------------------------
/Parser.hs:
--------------------------------------------------------------------------------
  1 | module Parser (Option(..), parseGrFile, parseMatFile, parseOptions) where
  2 | 
  3 | import Expression
  4 | import PrattParser
  5 | import Data.Maybe (catMaybes)
  6 | import Data.List ((\\), sort, nub)
  7 | import Data.Set (fromAscList)
  8 | import Data.Map.Strict (Map, empty, insert, fromList)
  9 | import Control.Applicative((<$>), (<*), pure)
 10 | import Text.Parsec (Parsec, ParseError, parse, runParser, try, (<?>), (<|>), between, many, manyTill, many1,
 11 |                     choice, optionMaybe, sepEndBy, notFollowedBy, lookAhead, eof, getState, putState)
 12 | import Text.Parsec.Char (char, oneOf, noneOf, anyChar, string, upper, digit)
 13 | 
 14 | -- Command-line and grammar options
 15 | data Option = Entire
 16 |             | Number
 17 |             | AllMatches
 18 |             | Positions
 19 |             | Patterns
 20 |             | AddBorder
 21 |             | Debug0
 22 |             | Debug1
 23 |             deriving (Show, Eq)
 24 | 
 25 | -- Parse a string of options
 26 | options :: Parsec String () [Option]
 27 | options = concat <$> many (choice option) <?> "option string"
 28 |   where option = [char 'e' >> return [Entire],
 29 |                   char 'n' >> return [Number],
 30 |                   char 'a' >> return [AllMatches],
 31 |                   char 'p' >> return [Positions],
 32 |                   char 's' >> return [Patterns],
 33 |                   char 'b' >> return [AddBorder],
 34 |                   try $ string "d1" >> return [Debug0, Debug1],
 35 |                   char 'd' >> optionMaybe (char '0') >> return [Debug0]]
 36 | 
 37 | -- Parse an end of line
 38 | endOfLine :: Parsec String s String
 39 | endOfLine = try (string "\r\n") <|> (pure <$> oneOf "\n\r") <?> "end of line"
 40 | 
 41 | -- Parse a string with quotes, return a string without them
 42 | quoted :: Parsec String Bool String
 43 | quoted = concat <$> many (quote <|> escQuote <|> slash <|> escSlash <|> escBackslash <|> newLine <|> maybeEscaped)
 44 |   where quote = do
 45 |           char '"'
 46 |           inQuote <- getState
 47 |           putState $ not inQuote
 48 |           return "\""
 49 |         escQuote = try $ string "\\\""
 50 |         slash = char '/' >> return "/"
 51 |         escSlash = try $ string "\\/"
 52 |         escBackslash = try $ string "\\\\"
 53 |         newLine = do
 54 |           eol <- endOfLine
 55 |           inQuote <- getState
 56 |           putState False
 57 |           return $ if inQuote then "\"" ++ eol else eol
 58 |         maybeEscaped = do
 59 |           inQuote <- getState
 60 |           maybeEscape <- optionMaybe $ char '\\'
 61 |           symbol <- anyChar
 62 |           case (inQuote, maybeEscape) of
 63 |             (False, Just _) -> return $ '\\' : [symbol]
 64 |             (False, Nothing) -> return [symbol]
 65 |             (True, Just _) -> return [symbol]
 66 |             (True, Nothing) -> return $ '\\' : [symbol]
 67 | 
 68 | -- Skip this token, or be at the end of a line
 69 | skipOrEnd :: Parsec String () a -> Parsec String () ()
 70 | skipOrEnd p = (p >> return ()) <|> (lookAhead endOfLine >> return ()) <|> eof
 71 | 
 72 | -- Special expressions
 73 | flat :: Expr
 74 | flat = Sized (0, Nothing) (0, Just 0) AnyRect
 75 | thin :: Expr
 76 | thin = Sized (0, Just 0) (0, Nothing) AnyRect
 77 | 
 78 | -- Make a finite character class from string
 79 | mkSomeChar :: Bool -> [Maybe Char] -> Expr
 80 | mkSomeChar isPos = SomeChar isPos . fromAscList . sort
 81 | 
 82 | -- Shorthand for nonterminal in standard orientation
 83 | stdVar :: Label -> Expr
 84 | stdVar = Var (Rot 0)
 85 | 
 86 | -- Single-character expressions
 87 | reservedChars :: [(Char, Expr)]
 88 | reservedChars = [('$', AnyRect),
 89 |                  ('.', AnyChar),
 90 |                  ('f', flat),
 91 |                  ('t', thin),
 92 |                  ('_', stdVar Nothing),
 93 |                  ('b', Border),
 94 |                  ('e', Edge),
 95 |                  ('d', mkSomeChar True $ map Just ['0'..'9']),
 96 |                  ('u', mkSomeChar True $ map Just ['A'..'Z']),
 97 |                  ('l', mkSomeChar True $ map Just ['a'..'z']),
 98 |                  ('a', mkSomeChar True $ map Just $ ['A'..'Z'] ++ ['a'..'z']),
 99 |                  ('n', mkSomeChar True $ map Just $ ['0'..'9'] ++ ['A'..'Z'] ++ ['a'..'z']),
100 |                  ('s', mkSomeChar True $ map Just $ ['!'..'/'] ++ [':'..'@'] ++ ['['..'`'] ++ ['{'..'~'])]
101 | 
102 | -- Parse an escaped literal
103 | escapedLit :: Parsec String () Expr
104 | escapedLit = do
105 |   char '\\'
106 |   c <- anyChar
107 |   return $ mkSomeChar True $ [Just c]
108 | 
109 | -- Parse a nonterminal
110 | nonterm :: Parsec String () Expr
111 | nonterm = do
112 |   label <- upper
113 |   return $ stdVar $ Just label
114 | 
115 | -- Parse a reserved character
116 | reserved :: Parsec String () Expr
117 | reserved = do
118 |   char <- oneOf $ fst <$> reservedChars
119 |   let Just expr = lookup char reservedChars
120 |   return expr
121 | 
122 | -- Parse a character class
123 | charClass :: Parsec String () Expr
124 | charClass = char '[' `between` (skipOrEnd $ char ']') $ do
125 |   include <- many $ try classRange <|> try border <|> pure <$> Just <$> classChar
126 |   maybeExclude <- optionMaybe $ char ',' >> (many $ try classRange <|> try border <|> pure <$> Just <$> classChar)
127 |   return $ case (null include, maybeExclude) of
128 |     (True, Nothing) -> mkSomeChar False []
129 |     (False, Nothing) -> mkSomeChar True $ concat include
130 |     (True, Just exclude) -> mkSomeChar False $ concat exclude
131 |     (False, Just exclude) -> mkSomeChar True $ concat include \\ concat exclude
132 |   where needsEscape = "[]-,\\"
133 |         classChar = noneOf (needsEscape ++ "\n\r") <|> (char '\\' >> oneOf needsEscape)
134 |         border = string "\\b" >> return [Nothing]
135 |         classRange = do
136 |           a <- classChar
137 |           char '-'
138 |           b <- classChar
139 |           return $ map Just [a..b]
140 | 
141 | -- Parse a numeric range with default minimum
142 | numRange :: Int -> Parsec String () Range
143 | numRange m = do
144 |   lower <- many digit
145 |   maybeDash <- optionMaybe $ char '-'
146 |   upper <- many digit
147 |   let lowerNum = if null lower then m else read lower
148 |       upperNum = if null upper then Nothing else Just $ read upper
149 |   return $ case maybeDash of
150 |     Nothing -> (lowerNum, if null lower then Nothing else Just lowerNum)
151 |     Just _ -> (lowerNum, upperNum)
152 | 
153 | -- Parse two numeric ranges with default minima
154 | numRange2D :: Int -> Parsec String () (Range, Range)
155 | numRange2D m = do
156 |   xRange <- numRange 0
157 |   maybeYRange <- optionMaybe $ char ',' >> numRange 0
158 |   return $ case maybeYRange of
159 |     Nothing -> (xRange, xRange)
160 |     Just yRange -> (xRange, yRange)
161 | 
162 | -- Parse a size constraint
163 | sizeConstr :: Parsec String () (Expr -> Expr)
164 | sizeConstr = do
165 |   char '{'
166 |   (xRange, yRange) <- numRange2D 0
167 |   optionMaybe $ char '}'
168 |   return $ Sized xRange yRange
169 | 
170 | -- Encoding of orientations
171 | charToD4 :: Char -> [D4]
172 | charToD4 '0' = [Rot 0]
173 | charToD4 '1' = [Rot 1]
174 | charToD4 '2' = [Rot 2]
175 | charToD4 '3' = [Rot 3]
176 | charToD4 '4' = [RefRot 0]
177 | charToD4 '5' = [RefRot 1]
178 | charToD4 '6' = [RefRot 2]
179 | charToD4 '7' = [RefRot 3]
180 | charToD4 'O' = [Rot 0, Rot 1, Rot 2, Rot 3, RefRot 0, RefRot 1, RefRot 2, RefRot 3]
181 | charToD4 'X' = [Rot 0, Rot 1, Rot 2, Rot 3]
182 | charToD4 'N' = [Rot 0, Rot 2]
183 | charToD4 'T' = [Rot 0, RefRot 0]
184 | charToD4 'K' = [Rot 0, RefRot 2]
185 | charToD4 'H' = [Rot 0, RefRot 0, RefRot 2, Rot 2]
186 | charToD4 'A' = [Rot 0, RefRot 1]
187 | charToD4 'D' = [Rot 0, RefRot 3]
188 | charToD4 'C' = [Rot 0, RefRot 1, RefRot 3, Rot 2]
189 | charToD4 'F' = []
190 | 
191 | -- Parse a set of orientations
192 | orientationSet :: Parsec String () (Expr -> Expr)
193 | orientationSet = do
194 |   char 'o'
195 |   choices <- many1 $ oneOf "01234567OXNTKHADCF"
196 |   optionMaybe $ char '}'
197 |   let transformations = \expr ->
198 |         [Fixed expr | 'F' `elem` choices] ++ nub [orient expr rot | ch <- choices, rot <- charToD4 ch]
199 |   return $ foldr1 (:|) . transformations
200 | 
201 | -- Parse a postfix grid specification
202 | gridSpec :: Parsec String () (Expr -> Expr)
203 | gridSpec = do
204 |   char ':'
205 |   (xRange, yRange) <- numRange2D 1
206 |   optionMaybe $ char '}'
207 |   return $ Grid xRange yRange
208 | 
209 | -- Parse a counting specification
210 | countSpec :: Parsec String () (Expr -> Expr)
211 | countSpec = do
212 |   char '#'
213 |   range <- numRange 1
214 |   optionMaybe $ char '}'
215 |   return $ Count range
216 |   
217 | -- Parse a context anchor
218 | anchor :: Parsec String () Expr
219 | anchor = do
220 |   label <- oneOf ['0'..'9']
221 |   return $ Anchor $ read [label]
222 | 
223 | -- Parse an expression
224 | expression :: Parsec String () Expr
225 | expression = mkPrattParser opTable term <?> "expression"
226 |   where term = parenthesized <|> quoted <|> inContext <|> anchor <|> nonterm <|> reserved <|> escapedLit <|> charClass <?> "term"
227 |         parenthesized = char '(' `between` (skipOrEnd $ char ')') $ expression
228 |         quoted = char '"' `between` char '"' $ expression
229 |         inContext = char '<' `between` (skipOrEnd $ char '>') $ InContext <$> expression
230 |         basicInfixes = [lookAhead term >> return (:>),
231 |                         char '/' >> lookAhead term >> return (:^)] ++
232 |                        map addPostfix
233 |                        [char ' ' >> return (:>),
234 |                         char '&' >> return (:&),
235 |                         char '-' >> return (\e1 e2 -> e1 :& Not e2),
236 |                         char '|' >> return (:|),
237 |                         char '~' >> return (:~)]
238 |         opTable = [[Postfix $ try $ char '^' >> postfix]] ++
239 |                   map (return . InfixR . try . (char '^' >>)) basicInfixes ++
240 |                   [[Postfix $ try $ postfix]] ++
241 |                   map (return . InfixR . try) basicInfixes ++
242 |                   [[Postfix $ try $ char 'v' >> postfix]] ++
243 |                   map (return . InfixR . try . (char 'v' >>)) basicInfixes
244 |         postfixes = [sizeConstr,
245 |                      orientationSet,
246 |                      gridSpec,
247 |                      countSpec,
248 |                      char '\'' >> return Fixed,
249 |                      char '?' >> return (thin :|),
250 |                      try (string "/?") >> return (flat :|),
251 |                      char '+' >> return (Grid (1, Nothing) (1, Just 1)),
252 |                      try (string "/+") >> return (Grid (1, Just 1) (1, Nothing)),
253 |                      char '*' >> return (Grid (0, Nothing) (1, Just 1)),
254 |                      try (string "/*") >> return (Grid (1, Just 1) (0, Nothing)),
255 |                      char '!' >> return Not]
256 |         postfix = do
257 |           operations <- many1 $ choice postfixes
258 |           return $ foldr1 (flip (.)) operations
259 |         addPostfix :: Parsec String () (Expr -> Expr -> Expr) -> Parsec String () (Expr -> Expr -> Expr)
260 |         addPostfix parseOp = do
261 |           op <- parseOp
262 |           pfix <- optionMaybe postfix
263 |           return $ case pfix of
264 |             Just p -> \e1 e2 -> p $ op e1 e2
265 |             Nothing -> op
266 | 
267 | -- Parse a line of a grammar file into options, label, and expression
268 | contentLine :: Parsec String () ([Option], (Label, Expr))
269 | contentLine = do
270 |   opts <- try (options <* char '`') <|> return []
271 |   label <- optionMaybe $ try (upper <* char '=')
272 |   expr <- expression
273 |   return (opts, (label, expr))
274 | 
275 | -- Parse a comment
276 | commentLine :: Parsec String () ()
277 | commentLine = do
278 |   char '|'
279 |   many $ noneOf "\n\r"
280 |   return ()
281 | 
282 | -- Parse either a content line or a comment
283 | grammarLine :: Parsec String () (Maybe ([Option], (Label, Expr)))
284 | grammarLine = try (commentLine >> return Nothing) <|> (Just <$> contentLine)
285 | 
286 | -- File parser
287 | parseGrFile :: String -> String -> Either ParseError ([Option], Map Label Expr)
288 | parseGrFile filename grammar = foldToMap <$> contents
289 |   where contents :: Either ParseError [([Option], (Label, Expr))]
290 |         contents = do
291 |           unquoted <- runParser quoted False filename grammar
292 |           catMaybes <$> parse (grammarLine `sepEndBy` endOfLine <* eof) ("pre-processed " ++ filename) unquoted
293 |         foldToMap triples = (firsts, folded)
294 |           where firsts = concat $ fst <$> triples
295 |                 folded = foldr (\(label, expr) assoc -> insert label expr assoc) empty $ snd <$> triples
296 | 
297 | -- Parse a matrix from newline-delimited string
298 | parseMatFile :: String -> (Size, Map Coord Char)
299 | parseMatFile s = ((w, h), fromList pairs)
300 |   where rows = lines s
301 |         pairs = [((x, y), c) | (y, row) <- zip [0..] rows, (x, c) <- zip [0..] row]
302 |         w = maximum $ 0 : map length rows
303 |         h = length rows
304 | 
305 | -- Parse a chain of options form a string
306 | parseOptions :: String -> [Option]
307 | parseOptions str = case parse options "" str of
308 |   Right result -> result
309 |   Left _ -> []
310 | 


--------------------------------------------------------------------------------
/PrattParser.hs:
--------------------------------------------------------------------------------
 1 | -- Simplified Pratt parser for expressions, adapted from http://stackoverflow.com/a/33534426
 2 | 
 3 | module PrattParser where
 4 | 
 5 | import Text.Parsec (Parsec, choice, (<|>))
 6 | import Data.List (tails)
 7 | import Control.Applicative (pure, (<*>))
 8 | 
 9 | data Operator t = Postfix (Parsec String () (t -> t))
10 |                 | InfixL (Parsec String () (t -> t -> t)) -- Left associative
11 |                 | InfixR (Parsec String () (t -> t -> t)) -- Right associative
12 | 
13 | -- Make a Pratt parser from a precedence table and a term parser
14 | -- Precedence table is given from highest to lowest precedence
15 | mkPrattParser :: [[Operator t]] -> Parsec String () t -> Parsec String () t
16 | mkPrattParser precTable parseTerm = parseExpr precs
17 |   where precs = reverse precTable                                                              -- We go from lowest to highest precedence
18 |         parseExpr operators = do
19 |           term <- parseTerm
20 |           parseOper operators term
21 |         parseOper operators lhs = choice stepParsers <|> return lhs                            -- Choose an operator; if fails, return lhs
22 |           where stepParsers = do
23 |                   newPrecs@(precLev : higherPrecs) <- tails operators                          -- Choose a precedence level and all higher levels
24 |                   operator <- precLev                                                          -- Choose an operator from the level
25 |                   stepParser <- case operator of                                               -- Make a "next step" parser
26 |                     Postfix parseOp -> return $ parseOp <*> pure lhs                           -- For postfix, just grab that
27 |                     InfixL parseOp -> return $ parseOp <*> pure lhs <*> parseExpr higherPrecs  -- For left infix, grab everything with higher precedence
28 |                     InfixR parseOp -> return $ parseOp <*> pure lhs <*> parseExpr newPrecs     -- For right infix, grab everything with same or higher precedence
29 |                   return $ stepParser >>= parseOper operators                                  -- Parse with "next step", then with all operators
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Grime
 2 | 
 3 | ## Introduction
 4 | 
 5 | Grime is a two-dimensional pattern matching language based on Boolean grammars.
 6 | The basic idea is to construct rectangular patterns from smaller components, and check whether they are found in the input matrix.
 7 | 
 8 | You can [try Grime online](http://grime.tryitonline.net/), courtesy of [Dennis from PPCG](http://codegolf.stackexchange.com/users/12012/dennis).
 9 | Be sure to check the [tutorial](https://github.com/iatorm/grime/blob/master/tutorial.md) too.
10 | 
11 | ## Usage
12 | 
13 | A Grime program is called  a _grammar_, and the correct file extension for a grammar is `.gr`, although this is not enforced.
14 | The program is called like
15 | 
16 |     runhaskell grime.hs [options] grammarfile matrixfile
17 | 
18 | where `matrixfile` is a file containing the character matrix.
19 | For example, the digits program would be called like
20 | 
21 |     runhaskell grime.hs digits.gr digit-matrix
22 | 
23 | If you want a speed boost, you should compile Grime with the command
24 | 
25 | 	ghc -O2 grime.hs
26 | 
27 | and then run the resulting executable:
28 | 
29 | 	grime [options] grammarfile matrixfile
30 | 
31 | There's also a bash script `rungrime` that compiles Grime if needed and then calls the executable with the provided arguments.
32 | 
33 | By default, the interpreter prints the first match it finds, but this can be controlled using the option flags:
34 | 
35 | - `-e`: match only the entire matrix, print `1` for match and `0` for no match.
36 | - `-n`: print the number of matches, or the entire matrix if `-e` is also given.
37 | - `-a`: print all matches.
38 | - `-p`: print also the positions of the matches, in the format `(x,y,w,h)`.
39 | - `-s`: do not print the matches themselves.
40 | - `-b`: include borders of width 1.
41 | - `-d` or `-d0`: print debug information.
42 | - `-d1`: print logs from the matcher.
43 | 
44 | These can be inserted from the command line, or in the beginning of any line of a grammar file, separated from the rest of the line by a backtick `` ` ``.
45 | 
46 | ## Syntax and Semantics
47 | 
48 | A Grime grammar consists of one or more _definitions_, each on a separate line.
49 | Each of them defines the value of a _nonterminal_, and one of them must define the anonymous _toplevel nonterminal_.
50 | The syntax of a definition is either `N=E` or `E`, where `N` is an uppercase letter and `E` is an _expression_.
51 | 
52 | Expressions are constructed as follows.
53 | 
54 | - Any character escaped with `\` matches any `1x1` rectangle containing that character.
55 | - `.` matches any single character.
56 | - `b` matches a `1x1` rectangle outside the character matrix. It's only relevant if the `-b` option is used, or if the input is not rectangular.
57 | - `e` matches a 0-height or 0-width segment of the input's edge.
58 | - `$` always matches.
59 | - `f` matches any 0-height (flat) rectangle, and `t` matches any 0-width (thin) rectangle.
60 | - The pre-defined character groups are `d`igit, `u`ppercase, `l`owercase, `a`lphabetic, alpha`n`umeric, and `s`ymbol.
61 | - New character classes can be defined by the syntax `[a-prt-w,d-gu]`. The letters on the left are included, and those on the right are excluded, so this matches exactly the letters `abchijklmnoprtvw`. If the left side is empty, it is taken to contain all characters and the border. The comma can be omitted if the right side is empty. The characters `[],-\` must be escaped with `\`. `\b` matches a border.
62 | - An unescaped uppercase letter is a nonterminal, and matches the expression it is assigned.
63 | - `_` is the anonymous toplevel nonterminal.
64 | - If `P` and `Q` are expressions, then `PQ` is just their horizontal concatenation, and `P/Q` is their vertical concatenation, with `P` on top.
65 | - `P Q` is like `PQ`, but with lower precedence than `/`. This sometimes saves parentheses.
66 | - `P+` is one or more `P`s aligned horizontally, and `P/+` is the same aligned vertically.
67 | - The Boolean operations are denoted `P|Q`, `P&Q` and `P!`. Exclusive OR is `P~Q`, and difference is `P-Q`.
68 | - `P?` is shorthand for `t|P`, `P/?` for `f|P`, `P*` for `t|P+`, and `P/*` for `f|P/+`.
69 | - `P{a-b,c-d}`, where `abcd` are nonnegative integers, is a _size constraint_ on `P`. If `P` is a character class, then the expression matches any `mxn` rectangle containing only those characters, provided that `m` is between `a` and `b` inclusive, and `n` is between `c` and `d` inclusive. In other cases, the expression matches any rectangle that has the correct size and that `P` also matches. If `a` or `c` are omitted, they are taken to be `0`, and if `b` or `d` are omitted, they are infinite. If the hyphen between `a` and `b` is omitted, then we use the same number for both ends of the interval. If the entire `c-d` part is omitted, both axes are constrained. To clarify, `{-b}` is equivalent to `{0-b,0-b}`, and `{a-,c}` is equivalent to `{a-infinity,c-c}`. The `}` can be omitted.
70 | - `P:a-b,c-d}` is a _grid specifier_. It matches a rectangle that can be divided into an `mxn` grid of matches of `P`, where `m` is between `a` and `b` inclusive, and `n` is between `c` and `d` inclusive. The ranges and `}` can be omitted, with the same defaulting behavior as in the size constraint, except that the beginning of a range defaults to `1` instead of `0`.
71 | - `P#a-b}` is a _counting specifier_. It matches a rectangle that contains between `a` and `b` possibly overlapping matches of `P`, inclusive. The `}` is optional, `a` defaults to `1` and `b` to infinity.
72 | - `PoS`, where `S` is a (greedily parsed) string of the characters `01234567`, optionally terminated with `}`, is an _orientation modifier_. Each of the characters stands for a rotation (`0123`) or reflection (`4567`), which is applied to the expression `P`. The transformed expressions are combined with Boolean disjunctions. The characters `OXNTKHADC` encode convenient classes of orientations. The character `F` fixes an expression, so that its orientation cannot be modified.
73 | - `<P>` is a _context bracket_. It matches any rectangle **r** that's contained in a larger rectangle that matches `P`. The rectangle **r** can be matched by a digit in `P`; this is `0` by default, and increases with the nesting depth (`0` matches the anchor of the innermost brackets, `1` the next one etc).
74 | 
75 | You can add the prefix `^` to any infix operator or chain of postfix operators to raise its precedence higher than all other operators, or `v` to lower it; for example, `\a\b^|\cv+` is parsed as `(\a(\b|\c))+`.
76 | If `i` is any infix operator apart from concatenation or `/`, and `p` a chain of postfix operators, then `PipQ` is a shorthand for `(PiQ)p`; for example, `\a|+\b` is equivalent to `(\a|\b)+`.
77 | Double quotes `""` are used for swapping the `\`-escaped status of all characters inside them, except for the characters `"\/`.
78 | They allow text grids like `"this/is a/grid"`.
79 | Non-literal syntax elements, like nonterminals, operations and parentheses, can be used in quoted expressions by escaping them.
80 | Note that quotes define syntax elements, so `"ab/cd"+/E` will be parsed as `(\a\b/\c\d)+/E`.
81 | All open parentheses, brackets and quotes are implicitly closed at the end of a line.
82 | Lines beginning with `|` are comments, and the parser ignores them.
83 | 
84 | ## Notes
85 | 
86 | Grime does allow paradoxical definitions of the form `A=A!`; these will result in no match.
87 | In general, the matching engine can resolve any grammar where no match of a single nonterminal on a single rectangle depends on itself (although for complex grammars, this can take a while).
88 | A nontrivial example of a resolvable grammar is
89 | 
90 |     A=E?\a
91 |     B=[ab]+&A!
92 |     E=A|B
93 |     a`A
94 | 
95 | Given a one-dimensional string of `a`s and `b`s, this grammar will match every substring that ends in `a`.
96 | Note how the nonterminal `A` is self-referential in more than one way (via `E` directly, and via `E` through `B`), but since the `E` in `A` must be a proper substring, the system can always be resolved.
97 | 
98 | Grime supports non-rectangular inputs; the rows are simply aligned to the left, and the gaps can be matched using `b`.
99 | 


--------------------------------------------------------------------------------
/grime.hs:
--------------------------------------------------------------------------------
 1 | -- A two-dimensional language based on Boolean grammars
 2 | 
 3 | import Expression
 4 | import Matcher (matchAllEmpty)
 5 | import Parser (Option(..), parseMatFile, parseGrFile, parseOptions)
 6 | import Data.List (nub)
 7 | import Data.Map.Strict (toList)
 8 | import Control.Monad (forM_, when)
 9 | import System.Environment (getArgs)
10 | import System.IO (hPutStrLn, stderr)
11 | 
12 | 
13 | -- Take a submatrix of a newline-delimited string, possibly with border
14 | submatrix :: Bool -> Rect -> String -> String
15 | submatrix border (x, y, w, h) = unlines . take h . drop y' . map (take w . drop x') . addBorder . lines
16 |   where addBorder matrix = if border
17 |                            then let blankRow = replicate (maximum $ map length matrix) ' '
18 |                                 in blankRow : map (\row -> ' ' : row ++ " ") matrix ++ [blankRow]
19 |                            else matrix
20 |         (x', y') = if border then (x+1, y+1) else (x, y)
21 | 
22 | -- Print a string to STDERR with newline
23 | printErr :: String -> IO ()
24 | printErr = hPutStrLn stderr
25 | 
26 | main :: IO ()
27 | main = do
28 |   args <- getArgs
29 |   let (cmdOpts, grFile, matFile) = case args of
30 |         ['-':a, b, c] -> (parseOptions a, b, c)
31 |         [a, b] -> ([], a, b)
32 |         _ -> error "Incorrect arguments. Usage: grime [opts] grammarfile patternfile"
33 |   parsedGrammar <- fmap (parseGrFile grFile) $ readFile grFile
34 |   case parsedGrammar of
35 |     Left parseError -> printErr $ show parseError
36 |     Right (fileOpts, grammar) -> do
37 |       pict <- readFile matFile
38 |       let opts = [opt | opt <- nub $ cmdOpts ++ fileOpts, elem opt cmdOpts /= elem opt fileOpts]
39 |           (sze@(wMat, hMat), mat) = parseMatFile pict
40 |           (minX, minY, numX, numY) = if elem AddBorder opts then (-1, -1, wMat+2, hMat+2) else (0, 0, wMat, hMat)
41 |           (matches, logs) = matchAllEmpty sze (elem AddBorder opts) mat grammar $
42 |                             if elem Entire opts
43 |                             then [(minX, minY, numX, numY)]
44 |                             else [(x, y, w, h) |
45 |                                   w <- [numX,numX-1..0], h <- [numY,numY-1..0],
46 |                                   x <- [minX..numX-w], y <- [minY..numY-h]]
47 |           finalMatches = if elem AllMatches opts || elem Number opts then matches else take 1 matches
48 |       when (elem Debug0 opts) $ do
49 |         printErr $ "Options: " ++ show opts
50 |         printErr $ "Input dimensions: " ++ show sze
51 |         printErr "Definitions:"
52 |         forM_ (toList grammar) $ \(l, e) ->
53 |           printErr $ " " ++ (case l of Nothing -> "_ = "; Just a -> a:" = ") ++ show e
54 |         when (elem Debug1 opts) $ printErr logs
55 |       if (elem Number opts /= elem Entire opts)
56 |         then print $ length finalMatches
57 |         else forM_ finalMatches $ \rect -> do
58 |         when (elem Positions opts) $ print rect
59 |         when (not $ elem Patterns opts) . putStrLn $ submatrix (elem AddBorder opts) rect pict
60 | 


--------------------------------------------------------------------------------
/rungrime:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ ! -f grime ] ||
 4 |    [ Expression.hs -nt grime ] ||
 5 |    [ PrattParser.hs -nt grime ] ||
 6 |    [ Parser.hs -nt grime ] ||
 7 |    [ Matcher.hs -nt grime ] ||
 8 |    [ grime.hs -nt grime ]; then
 9 |    >&2 echo "Newer source files detected, compiling."
10 |    >&2 ghc -O2 grime.hs
11 | fi
12 | 
13 | ./grime "$@"
14 | 


--------------------------------------------------------------------------------
/tutorial.md:
--------------------------------------------------------------------------------
  1 | # Tutorial
  2 | 
  3 | _This tutorial is incomplete.
  4 | Suggestions are welcome!_
  5 | 
  6 | ## First Principles
  7 | 
  8 | Grime is a language for matching two-dimensional patterns (grids) of characters.
  9 | This is analogous to how regular expressions are used to match strings, which are one-dimensional patterns of characters.
 10 | The idea is that a Grime program, called a _grammar_, defines an _expression_ that may or may not match a given rectangular grid.
 11 | The executable takes such a grammar and a grid, and either tries to find a match in the grid (the default), enumerates all matches, or just tries to match the entire grid.
 12 | 
 13 | As with regular expressions, Grime expressions are constructed from simple parts using different combinators (infix operations) and modifiers (postfix operations).
 14 | For example, take the regular expression `a+ba+`.
 15 | The parts `a` and `b` are _atomic expressions_, which match the respective letters.
 16 | The modifier `+` transforms the expression `a` into `a+`, which matches one or more adjacent copies of `a`.
 17 | The concatenation `a+ba+` can be seen as a combination of `a+`, `b` and `a+`: it matches first `a+`, then `b`, then `a+` again.
 18 | 
 19 | ## A Simple Grammar
 20 | 
 21 | As said, a Grime expression may match a rectangular grid of characters.
 22 | The simplest expression is a _literal_, which matches a **1&times;1** grid containing a fixed character.
 23 | All Grime literals are escaped with a backslash; for example, the expression `\a` matches the character `a`.
 24 | 
 25 | The modifier `+` is _horizontal repetition_.
 26 | If `E` is an expression, then `E+` matches any rectangle formed by gluing one or more `E`s in a row.
 27 | The `E`s may be of different widths, but their heights must match.
 28 | Here's an ASCII art picture of the situation:
 29 | 
 30 |     +----+---+-------+--+
 31 | 	|    |   |       |  |
 32 | 	| E  | E |   E   |E |
 33 | 	|    |   |       |  |
 34 | 	+----+---+-------+--+
 35 | 
 36 | For example, the expression `\a+` matches any **n&times;1** grid of `a`s, where **n &ge; 1**.
 37 | 
 38 | The concatenation of two expressions (which is really an "invisible" binary operator) means horizontal gluing: `EF` matches a rectangle whose left side matches `E` and right side matches `F`.
 39 | The border between `E` and `F` may go anywhere in the rectangle, and the expressions may even have zero width.
 40 | Here's again a picture:
 41 | 
 42 |     +----+-------+
 43 | 	|    |       |
 44 | 	| E  |   F   |
 45 | 	|    |       |
 46 | 	+----+-------+
 47 | 
 48 | Thus, `\a+\b` matches an **n&times;1** rectangle, where **n &ge; 2**, the rightmost character is a `b`, and the other are `a`s.
 49 | The expression `\a+\b\a+` matches one `b` surrounded by one or more `a`s on both sides.
 50 | 
 51 | Let's try out this grammar!
 52 | Grime has an [online interpreter](http://grime.tryitonline.net/) hosted by [Dennis from PPCG](http://codegolf.stackexchange.com/users/12012/dennis).
 53 | Paste the expression `\a+\b\a+` on the Code box (which corresponds to the grammar file), the string `ababaa` to the Input box (which corresponds to the pattern file), and click Run.
 54 | The result in the Output box should be `abaa`, the first matching sub-rectangle that Grime found in the pattern string (it tries large rectangles first, so `abaa` matches before anything else).
 55 | 
 56 | The grammar file may contain _option flags_, which are reparated from the main grammar by a backtick.
 57 | If you modify the grammar into `` a`\a\b+ ``, the output will list all matched sub-rectangles instead of just the first.
 58 | Also, `` n`\a\b+ `` counts the number of matches, and `` e`\a\b+ `` matches the entire pattern against the expression, producing `0` for "no match" in our case.
 59 | 
 60 | ## Vertical Concatenation
 61 | 
 62 | In addition to gluing rectangles side by side, Grime allows them to be glued on top of each other.
 63 | The operator `/` is vertical concatenation: `E/F` matches a rectangle whose top part matches `E` and bottom part matches `F`.
 64 | Also, `E/+` matches one or more `E`s stacked vertically on top of each other.
 65 | Analogously to the horizontal case, the parts may have different heights, but their widths must match.
 66 | 
 67 | Let's put the vertical concatenation operators into action!
 68 | Consider the expression `\a\b+/\d+`.
 69 | The `/` has lower precedence than horizontal concatenation, so it will be parsed as `(\a\b+)/(\d+)`.
 70 | On the left, we have the expression `\a\b+`, which matches an **n&times;1** rectangle consisting of an `a` and then one or more `b`s.
 71 | On the right, we have `\d+`, which matches an **n&times;1** rectangle consisting of one or more `d`s.
 72 | Their vertical concatenation matches an **n&times;2** rectangle like
 73 | 
 74 |     abbb
 75 | 	dddd
 76 | 
 77 | Let's make a vertical repetition of this pattern.
 78 | We can use parentheses for grouping, so `(\a\b+/\d+)/+` does the job: it matches patterns like
 79 | 
 80 | 	abbb
 81 | 	dddd
 82 | 	abbb
 83 | 	dddd
 84 | 	abbb
 85 | 	dddd
 86 | 
 87 | where the above **n&times;2** pattern is repeated one or more times.
 88 | 
 89 | > ### Excercise
 90 | >
 91 | > Write an expression that matches any **n&times;m** rectangle, where **n&ge;3** and **m&ge;3**, that consists entirely of `a`s with the exception of one `b` that doesn't touch the borders.
 92 | > Like this:
 93 | >
 94 | >     aaaaa
 95 | >     aaaba
 96 | >     aaaaa
 97 | >     aaaaa
 98 | 
 99 | ## Character Classes
100 | 
101 | Similarly to regular expressions, Grime supports character classes defined by brackets `[]`.
102 | The expression <code>[&hellip;]</code> matches any **1&times;1** pattern whose character occurs inside the brackets.
103 | It is effectively a disjunction of literals, but with a more terse syntax.<sup>1</sup>
104 | For example `[abcdr]` matches either `a`, `b`, `c`, `d` or `r`.
105 | Hyphens can be used to shorten ranges of characters, so the above is equivalent to `[a-dr]`.
106 | The comma is used to define _negative characters_ that are excluded from the class.
107 | For example, `[a-z,aeiou]` matches all lowercase English consonants, since we excluded the vowels.
108 | If no non-negative characters are listed, as in `[,asdf]`, the resulting class will match all characters except the negative ones.
109 | The characters `[]-,\` have to be escaped with backslashes inside the character class construct.
110 | 
111 | > ### Excercise
112 | >
113 | > Write an expression that matches any **n&times;m** rectangle, where **n&ge;2** and **m&ge;2**, that contains only vowels on its left and bottom borders, and only consonants everywhere else.
114 | > Like this:
115 | >
116 | >     anrbz
117 | >     ejhjp
118 | >     egghw
119 | >     eaaio
120 | 
121 | ## Logical Operators
122 | 
123 | Expressions can also be combined using logical conjunction (`E&F`) or disjunction (`E|F`), which behave as expected.
124 | For example, `\a+|\a/+` matches either an **n&times;1** or **1&times;n** rectangle of `a`s.
125 | Note that the logical operations only work on expressions, not operators, so you cannot do `\a(+|/+)` or something similar.
126 | Disjunction has higher precedence than conjunction, so `E&F|G` is parsed as `(E&F)|G`.
127 | Logical negation is denoted by the exclamation mark: `E!` matches whenever `E` does not match.
128 | 
129 | Logical conjunction is already a quite powerful tool.
130 | For example, we can match a rectangle that's divided into four parts by two lines, where the upper-left and lower-right parts consist of `a`s and the others of `b`s, like this:
131 | 
132 |     aaabbbb
133 | 	aaabbbb
134 | 	bbbaaaa
135 | 	bbbaaaa
136 | 	bbbaaaa
137 | 
138 | Namely, we can get the unbroken horizontal line with the expression `(\a+/+\b+/+)/(\b+/+\a+/+)`, and the unbroken vertical line with the expression `(\a+/+/\b+/+)(\b+/+/\a+/+)`.
139 | Their conjunction
140 | 
141 |     (\a+/+\b+/+)/(\b+/+\a+/+)&(\a+/+/\b+/+)(\b+/+/\a+/+)
142 | 
143 | has both an unbroken horizontal line and unbroken vertical line.
144 | 
145 | > ### Excercise
146 | >
147 | > Write an expression that matches any rectangle of `a`s and `b`s where every horizontal row and vertical column contains at least one `b`, like this:
148 | >
149 | >     aaabb
150 | >     baaab
151 | >     aabab
152 | >     abaab
153 | 
154 | ## Built-in Expressions
155 | 
156 | Grime contains several atomic expressions that are built into the language.
157 | We list some of them here.
158 | 
159 | - The dot `.` is equivalent to `[]`; it matches any single character.<sup>2</sup>
160 | - The dollar `$` is a "wildcard" that matches all rectangles, regardless of size or content.
161 | - The letter `f` (called "flat") matches all rectangles of height 0, and `t` (called "thin") matches all rectangles of width 0.
162 | - The letter `e` matches any flat or thin rectangle on the border of the input.
163 | - There are six built-in character classes: `d`igit, `u`ppercase, `l`owercase, `a`lphabetic, alpha`n`umeric, and `s`ymbol. They all work in the ASCII range only.
164 | 
165 | Some operators are defined in terms of these expressions.
166 | The option operator `E?` is syntactic sugar for `t|E`, and the vertical choice `E/?` is equivalent to `f|E`.
167 | We also have the zero-or-more-repetitions operators `E*` (equivalent to `t|E+`) and `E/*` (equivalent to `f|E/+`).
168 | 
169 | ## Containment
170 | 
171 | The operator `#` denotes _containment_: `E#` matches any pattern that contains a match of `E` as a sub-pattern.
172 | For example, `(\a\b/\b\a)#` matches a pattern that contains a copy of
173 | 
174 |     ab
175 | 	ba
176 | 
177 | The idiom `E#!` denotes _forbidden patterns_ (it's a combination of containment and logical negation), and it is surprisingly powerful.
178 | For example, consider the task of matching an arbitrary-sized "chessboard" of `a`s and `b`s, like this:
179 | 
180 |     abababa
181 | 	bababab
182 | 	abababa
183 | 	bababab
184 | 
185 | The top left corner may also be a `b`.
186 | We could construct an expression by repeating the pattern `\a\b/\b\a` and taking care of the borders in some way, but this quickly becomes complicated.
187 | A much simpler approach is to match a rectangle of `a`s and `b`s that contains no adjacent characters:
188 | 
189 |     [ab]+/+&(\a\a|\a/\a|\b\b|\b/\b)#!
190 | 
191 | As another example, if `P` is an arbitrary expression, then `P&e#` matches any rectangle that touches an edge of the input and is a match of `P`.
192 | 
193 | ## Size Constraints
194 | 
195 | In Grime, there is a simple way for constraining the size of a expression: _size constraints_.
196 | A size constraint is a postfix operator defined with braces `{}`, and it contains one or two numerical ranges.
197 | The most complete form of a size constraint is `E{a-b,c-d}`, and it matches a pattern that matches `E`, and is also of size **n&times;m**, where **a&le;n&le;b** and **c&le;m&le;d**.
198 | For example, a **3&times;3** chessboard can be matched with
199 | 
200 |     ([ab]+/+&(\a\a|\a/\a|\b\b|\b/\b)#!){3-3,3-3}
201 | 
202 | However, if `E` is a literal, a dot, or a character class, then `E{a-b,c-d}` matches a rectangle of the given sizes that only contains characters from that class or literal.
203 | For example, `[aeiou]{2-2,2-4}` matches a rectangle of vowels of width 2 and height 2, 3 or 4.
204 | If you leave out some of the numbers, reasonable defaults will be used instead.
205 | If the second range is missing, the first one is simply dupicated.
206 | A missing lower bound is replaced by `0`, and a missing upper bound by infinity.
207 | A missing dash gives equal upper and lower bounds, so `{2,4}` means `{2-2,4-4}`.
208 | Thus, a more compact way of matching **3&times;3** chessboards is
209 | 
210 |     [ab]{3}&(\a\a|\a/\a|\b\b|\b/\b)#!
211 | 
212 | In particular, for character literals `\a*/*` is equivalent to `\a{}`.
213 | 
214 | ## Nonterminals
215 | 
216 | _Nonterminals_ in Grime are used for two purposes: as shorthands for long expressions, and to implement recursion.
217 | A nonterminal is denoted by an uppercase letter, and it must be defined on a separate line in the grammar file.
218 | The syntax for defining a nonterminal, in this case `A`, is this:
219 | 
220 |     A=\a\b\c
221 | 
222 | This line defines `A` as the expression `\a\b\c`, and it can then be used anywhere in the grammar file.
223 | For example, you could define the main expression `A/A/A`, which would match the pattern
224 | 
225 |     abc
226 | 	abc
227 | 	abc
228 | 
229 | However, the main strength of nonterminals is that they can refer to themselves.
230 | As an example, suppose we want to match all rectangles of size **n&times;n** where **n&ge;1** (also known as squares).
231 | We can build a square recursively: it is either a **1&times;1** rectangle, or it is a smaller rectangle plus a border of width 1 below and to its right.
232 | Let's turn this into a definition:
233 | 
234 |     S=.|S./+/.+
235 | 
236 | As stated above, this defines `S` as either a single character (`.`), or a smaller `S` with a vertical border (`./+`) to its right, and a horizontal border (`.+`) below that.
237 | As a picture, where `A` stands for `./+` and `B` for `.+`:
238 | 
239 |     +---+-+
240 | 	|   | |
241 | 	| S |A|
242 | 	|   | |
243 | 	+---+-+
244 | 	|  B  |
245 | 	+-----+
246 | 
247 | Note how the concatenations force the borders to have the correct lengths.
248 | Now, a grammar file must contain exactly one anonymous "toplevel expression" that's not bound to any nonterminal.
249 | Thus a complete grammar file for matching square-shaped inputs is this:
250 | 
251 |     S=.|S./+/.+
252 | 	e`S
253 | 
254 | As a shorthand, the character `_` refers to the toplevel nonterminal, and can be used in its own definition.
255 | This shortens the above grammar file to the following:
256 | 
257 |     e`.|_./+/.+
258 | 
259 | You can define very complex grammars using nonterminals whose definitions refer to each other.
260 | 
261 | > ### Excercise
262 | >
263 | > Write a grammar that matches an odd-sized square of `a`s whose center point is a `b`, like this:
264 | >
265 | >     aaaaa
266 | >     aaaaa
267 | >     aabaa
268 | >     aaaaa
269 | >     aaaaa
270 | 
271 | > ### Excercise
272 | >
273 | > Write a grammar that matches correctly nested parentheses `()` and `[]`, like this:
274 | >
275 | >     (())[()([])]
276 | 
277 | ## Orientation modifiers
278 | 
279 | Suppose you want to match a rectangle of `0`s with a single `1` in any corner.
280 | The expression `\1\0+/\0+/+` of course matches such a rectangle with `1` in the upper left corner, but to get them all, you'd need to repeat the pattern four times with slight modifications, which is really cumbersome:
281 | 
282 |     \1\0+/\0+/+|\0+\1/\0+/+|\0+/+/\1\0+|\0+/+/\0+\1
283 | 
284 | _Orientation modifiers_, which are postfix operators, allow you to more easily rotate and reflect Grime patterns.
285 | For example, the modifier `oX` means "rotated by any multiple of 90 degrees", so the above expression can be shortened to `(\1\0+/\0+/+)oX`.
286 | 
287 | The full syntax of an orientation modifier is `oS}`, where `S` is a nonempty string of the characters `01234567OXNTKHADCF`, and the final `}` is optional.
288 | Each digit `0123` denotes counterclockwise rotation by the corresponding multiple of 90 degrees, and `4567` are the same followed by reflection around the vertical axis.
289 | Each of the letters (except `F`) corresponds to a subgroup of rotations and reflections, usually the symmetry group of the glyph itself.
290 | For example, `O` stands for all rotations and reflections, `X` for all rotations, and `N` for rotation by 0 or 180 degrees.
291 | The exceptions are `D` (nothing or reflection around diagonal), `A` (nothing or reflection around anti-diagonal) and `C` (nothing or reflection around diagonal and/or anti-diagonal).
292 | If `S` contains several characters, they are combined with disjunctions (logical OR).
293 | For example, `oN4` would mean no change, rotation by 180 degrees or reflection around vertical axis.
294 | Using the `A` option for reflecting around the andi-diagonal, the chessboard example above can be shortened to
295 | 
296 |     [ab]+/+&(\a\a|\b\b)oA#!
297 | 
298 | The `F` option is special, and stands for _fixed orientation_.
299 | Sometimes you want to rotate a pattern as a whole, but keep some things unchanged.
300 | For example, you might want to match the patterns `()` and `[]` side by side or on top of one another, in any orientation.
301 | However, the pattern `(\(\)\[\])oX` is not what we want, since it matches the following rectanges:
302 | 
303 |     ()[]
304 |     
305 |     ]
306 |     [
307 |     )
308 |     (
309 | 
310 |     ][)(
311 |     
312 |     (
313 |     )
314 |     [
315 |     ]
316 | 
317 | What we can do is to fix the patterns `()` and `[]` with the `F` option, and rotate the rest of the expression around them: `((\(\))oF(\[\])oF)oX`.
318 | This correctly matches the following rectangles:
319 | 
320 |     ()[]
321 | 
322 |     []
323 |     ()
324 | 
325 |     []()
326 | 
327 |     ()
328 |     []
329 | 
330 | > ### Excercise
331 | >
332 | > Write a grammar that matches a rectangular "track" lined by `#`-characters with one "runner" represented by `@` at any point of the track, on a background or `.`-characters, like this:
333 | >
334 | >     ##########
335 | >     #........#
336 | >     #.######.#
337 | >     #@#....#.#
338 | >     #.#....#.#
339 | >     #.######.#
340 | >     #........#
341 | >     ##########
342 | 
343 | 
344 | ## Context Brackets
345 | 
346 | _Context brackets_ provide a method for matching a rectangle depending on its surroundings.
347 | The syntax consists of the angle brackets `<>`: the expression `<E>` is read as "in context `E`", and it matches any rectangle **r** that's contained in a larger rectangle that matches `E`.
348 | For example, consider the definition `A=<\a\b+\a>`.
349 | In the one-dimensional string `abbcabba`, the first `b` does not match `A`, since it's not contained in a run of `b`s surrounded by `a`s.
350 | However, the third `b` is, and so it does match `A`.
351 | 
352 | Inside a context bracket, the currently-matched rectangle **r** can be explicitly matched by an _anchor_, which by default is the digit `0`.
353 | For example, the expression `<0\a>` matches any rectangle that has a single `a` as its right neighbor (and thus has height 1).
354 | With nested context brackets, the anchor of the `n`th surrounding bracket is the digit `n`, up to `9`.
355 | This means that the anchor of the innermost bracket is always matched with `0`.
356 | 
357 | Context brackets can be used for many things, but an very useful application is path-finding.
358 | Consider the following problem: we have a grid consisting of the characters `SE#.`, and we want to count the number of `S`s that are connected to an `E` by a path of `.`s.
359 | In the grid
360 | 
361 |     ..#.#..E#
362 | 	S.#...###
363 | 	....#....
364 | 	###.#.##.
365 | 	.S##..#S.
366 | 	....###.#
367 | 
368 | the bottom left `S` should not be counted, while the other two should.
369 | We can achieve this with the following grammar:
370 | 
371 |     R=\E|[S.]&<(0RoF)oX>
372 | 	n`R&\S
373 | 
374 | The nonterminal `R` matches any non-`#` character that contains a path to an `E`: it is either an `E` itself, or one of `S` or `.` that has a match of `R` as a neighbor in any direction (which we achieve with an orientation modifier; the `oF` fixes the orientation of `R`, which doesn't change the semantics, but speeds up the matching).
375 | It should be noted that anchors have lexical scope, so you cannot match an anchor "through" a nonterminal.
376 | 
377 | > ### Excercise
378 | >
379 | > Write a grammar that counts the number of `a`s that have occurrences of `b`s in all four cardinal directions (not necessarily as nearest neighbors).
380 | > The left `a` here is an example, but the right one is not, so the correct result is `1`:
381 | >
382 | >     xxbxxxx
383 | >     bxaxxbx
384 | >     bxxxxax
385 | >     xbbxxbx
386 | 
387 | <sup>1</sup> This is not entirely true, as character classes are given special treatement by the size constraint operator. We'll return to that later.
388 | 
389 | <sup>2</sup> This is not entirely true either, since `[]` can also match special _border characters_.
390 | 


--------------------------------------------------------------------------------