├── .gitignore ├── Setup.hs ├── ChangeLog.md ├── LICENSE ├── alex-tools.cabal └── src ├── AlexToolsBin.hs └── AlexTools.hs /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | dist-newstyle 3 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /ChangeLog.md: -------------------------------------------------------------------------------- 1 | # Revision history for alex-tools 2 | 3 | ## 0.2.0.0 -- 2017-03-21 4 | 5 | * Add the `sourceFile` field to `SourceRange` 6 | 7 | ## 0.1.1.0 -- 2016-09-02 8 | 9 | * Add `NFData` instances 10 | 11 | ## 0.1.0.0 -- 2016-09-02 12 | 13 | * Initial version. 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Iavor S. Diatchki 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any purpose 4 | with or without fee is hereby granted, provided that the above copyright notice 5 | and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 8 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 9 | FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 10 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS 11 | OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 12 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 13 | THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /alex-tools.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 2.0 2 | name: alex-tools 3 | version: 0.6.1 4 | synopsis: A set of functions for a common use case of Alex. 5 | description: This captures a common pattern for using Alex. 6 | license: ISC 7 | license-file: LICENSE 8 | author: Iavor S. Diatchki 9 | maintainer: iavor.diatchki@gmail.com 10 | copyright: Iavor S. Diatchki, 2016 11 | category: Development 12 | build-type: Simple 13 | extra-source-files: ChangeLog.md 14 | 15 | source-repository head 16 | type: git 17 | location: https://github.com/GaloisInc/alex-tools 18 | 19 | library 20 | exposed-modules: AlexTools, AlexToolsBin 21 | other-extensions: TemplateHaskell 22 | build-depends: base >=4.7 && <4.22, 23 | text >= 1.2.4 && < 2.2, 24 | bytestring >= 0.9 && <0.13, 25 | deepseq >=1.3 && <1.6, 26 | template-haskell >=2.9.0 && <2.24 27 | hs-source-dirs: src 28 | ghc-options: -Wall 29 | default-language: Haskell2010 30 | -------------------------------------------------------------------------------- /src/AlexToolsBin.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell, CPP #-} 2 | module AlexToolsBin 3 | ( -- * Lexer Basics 4 | initialInput, Input(..), inputFile 5 | , Lexeme(..) 6 | , SourcePos(..), startPos, beforeStartPos, prevPos 7 | , SourceRange(..) 8 | , prettySourcePos, prettySourceRange 9 | , prettySourcePosLong, prettySourceRangeLong 10 | , HasRange(..) 11 | , (<->) 12 | , moveSourcePos 13 | 14 | -- * Writing Lexer Actions 15 | , Action(..) 16 | 17 | -- ** Lexemes 18 | , lexeme 19 | , matchLength 20 | , matchRange 21 | , matchBytes 22 | 23 | -- ** Manipulating the lexer's state 24 | , getLexerState 25 | , setLexerState 26 | 27 | -- ** Access to the lexer's input 28 | , startInput 29 | , endInput 30 | 31 | -- * Interface with Alex 32 | , AlexInput 33 | , alexInputPrevChar 34 | , alexGetByte 35 | , makeLexer 36 | , LexerConfig(..) 37 | , simpleLexer 38 | , Word8 39 | 40 | ) where 41 | 42 | import Control.DeepSeq 43 | import Data.Word(Word8) 44 | import Data.ByteString(ByteString) 45 | import qualified Data.ByteString as BS 46 | import Data.Text(Text) 47 | import qualified Data.Text as Text 48 | import Control.Monad(liftM,ap,replicateM) 49 | import Language.Haskell.TH 50 | #if !MIN_VERSION_base(4,8,0) 51 | import Control.Applicative 52 | #endif 53 | 54 | data Lexeme t = Lexeme 55 | { lexemeBytes :: !ByteString 56 | , lexemeToken :: !t 57 | , lexemeRange :: !SourceRange 58 | } deriving (Show, Eq) 59 | 60 | instance NFData t => NFData (Lexeme t) where 61 | rnf (Lexeme x y z) = rnf (x,y,z) 62 | 63 | data SourcePos = SourcePos 64 | { sourceIndex :: !Int 65 | , sourceFile :: !Text 66 | } deriving (Show, Eq) 67 | 68 | -- | Pretty print the source position without the file name. 69 | prettySourcePos :: SourcePos -> String 70 | prettySourcePos x = show (sourceIndex x) 71 | 72 | -- | Pretty print the source position, including the file name. 73 | prettySourcePosLong :: SourcePos -> String 74 | prettySourcePosLong x = Text.unpack (sourceFile x) ++ ":" ++ show (sourceIndex x) 75 | 76 | instance NFData SourcePos where 77 | rnf (SourcePos {}) = () 78 | 79 | -- | Update a 'SourcePos' for a particular matched character 80 | moveSourcePos :: SourcePos -> SourcePos 81 | moveSourcePos p = p { sourceIndex = sourceIndex p + 1 } 82 | 83 | -- | A range in the source code. 84 | data SourceRange = SourceRange 85 | { sourceFrom :: !SourcePos 86 | , sourceTo :: !SourcePos 87 | } deriving (Show, Eq) 88 | 89 | -- | Pretty print the range, without the file name 90 | prettySourceRange :: SourceRange -> String 91 | prettySourceRange x = prettySourcePos (sourceFrom x) ++ "--" ++ 92 | prettySourcePos (sourceTo x) 93 | 94 | -- | Pretty print the range, including the file name. 95 | prettySourceRangeLong :: SourceRange -> String 96 | prettySourceRangeLong x 97 | | sourceFile pfrom == sourceFile pto = 98 | Text.unpack (sourceFile pfrom) ++ ":" ++ 99 | prettySourcePos pfrom ++ "--" ++ prettySourcePos pto 100 | | otherwise = prettySourcePosLong pfrom ++ "--" ++ 101 | prettySourcePosLong pto 102 | where 103 | pfrom = sourceFrom x 104 | pto = sourceTo x 105 | 106 | 107 | 108 | instance NFData SourceRange where 109 | rnf (SourceRange {}) = () 110 | 111 | class HasRange t where 112 | range :: t -> SourceRange 113 | 114 | instance HasRange SourcePos where 115 | range p = SourceRange { sourceFrom = p, sourceTo = p } 116 | 117 | instance HasRange SourceRange where 118 | range = id 119 | 120 | instance HasRange (Lexeme t) where 121 | range = lexemeRange 122 | 123 | instance (HasRange a, HasRange b) => HasRange (Either a b) where 124 | range (Left x) = range x 125 | range (Right x) = range x 126 | 127 | (<->) :: (HasRange a, HasRange b) => a -> b -> SourceRange 128 | x <-> y = SourceRange { sourceFrom = sourceFrom (range x) 129 | , sourceTo = sourceTo (range y) 130 | } 131 | 132 | -------------------------------------------------------------------------------- 133 | 134 | -- | An action to be taken when a regular expression matchers. 135 | newtype Action s a = A { runA :: Input -> Input -> Int -> s -> (s, a) } 136 | 137 | instance Functor (Action s) where 138 | fmap = liftM 139 | 140 | instance Applicative (Action s) where 141 | pure a = A (\_ _ _ s -> (s,a)) 142 | (<*>) = ap 143 | 144 | instance Monad (Action s) where 145 | return = pure 146 | A m >>= f = A (\i1 i2 l s -> let (s1,a) = m i1 i2 l s 147 | A m1 = f a 148 | in m1 i1 i2 l s1) 149 | 150 | -- | Acces the input just before the regular expression started matching. 151 | startInput :: Action s Input 152 | startInput = A (\i1 _ _ s -> (s,i1)) 153 | 154 | -- | Acces the input just after the regular expression that matched. 155 | endInput :: Action s Input 156 | endInput = A (\_ i2 _ s -> (s,i2)) 157 | 158 | -- | The number of characters in the matching input. 159 | matchLength :: Action s Int 160 | matchLength = A (\_ _ l s -> (s,l)) 161 | 162 | -- | Acces the curent state of the lexer. 163 | getLexerState :: Action s s 164 | getLexerState = A (\_ _ _ s -> (s,s)) 165 | 166 | -- | Change the state of the lexer. 167 | setLexerState :: s -> Action s () 168 | setLexerState s = A (\_ _ _ _ -> (s,())) 169 | 170 | -- | Get the range for the matching input. 171 | matchRange :: Action s SourceRange 172 | matchRange = 173 | do i1 <- startInput 174 | i2 <- endInput 175 | return (inputPos i1 <-> inputPrev i2) 176 | 177 | -- | Get the text associated with the matched input. 178 | matchBytes :: Action s ByteString 179 | matchBytes = 180 | do i1 <- startInput 181 | n <- matchLength 182 | return (BS.take n (inputBytes i1)) 183 | 184 | -- | Use the token and the current match to construct a lexeme. 185 | lexeme :: t -> Action s [Lexeme t] 186 | lexeme tok = 187 | do r <- matchRange 188 | txt <- matchBytes 189 | let l = Lexeme { lexemeRange = r 190 | , lexemeToken = tok 191 | , lexemeBytes = txt 192 | } 193 | l `seq` return [ l ] 194 | 195 | -- | Information about the lexer's input. 196 | data Input = Input 197 | { inputPos :: {-# UNPACK #-} !SourcePos 198 | -- ^ Current input position. 199 | 200 | , inputBytes :: {-# UNPACK #-} !ByteString 201 | -- ^ The text that needs to be lexed. 202 | 203 | , inputPrev :: {-# UNPACK #-} !SourcePos 204 | -- ^ Location of the last consumed character. 205 | 206 | , inputPrevByte :: {-# UNPACK #-} !Word8 207 | -- ^ The last consumed character. 208 | } 209 | 210 | -- | Prepare the text for lexing. 211 | initialInput :: Text {- ^ Where the text came from -} -> 212 | ByteString {- ^ The text to lex -} -> Input 213 | initialInput file str = Input 214 | { inputPos = startPos file 215 | , inputPrev = beforeStartPos file 216 | , inputPrevByte = 0 217 | , inputBytes = str 218 | } 219 | 220 | startPos :: Text {- ^ Name of file/thing containing this -} -> SourcePos 221 | startPos file = SourcePos { sourceIndex = 0, sourceFile = file } 222 | 223 | beforeStartPos :: Text -> SourcePos 224 | beforeStartPos file = SourcePos { sourceIndex = -1, sourceFile = file } 225 | 226 | {- | Move one position back. Assumes that newlines use a single bytes. 227 | 228 | This function is intended to be used when starting the lexing somewhere 229 | in the middle of the input, for example, if we are implementing a quasi 230 | quoter, and the previous part of the input is not in our language. 231 | -} 232 | prevPos :: SourcePos -> SourcePos 233 | prevPos p 234 | | sourceIndex p < 0 = p 235 | | otherwise = p { sourceIndex = sourceIndex p - 1 } 236 | 237 | -- | The file/thing for the current position. 238 | inputFile :: Input -> Text 239 | inputFile = sourceFile . inputPos 240 | 241 | -------------------------------------------------------------------------------- 242 | -- | Lexer configuration. 243 | data LexerConfig s t = LexerConfig 244 | { lexerInitialState :: s 245 | -- ^ State that the lexer starts in 246 | 247 | , lexerStateMode :: s -> Int 248 | -- ^ Determine the current lexer mode from the lexer's state. 249 | 250 | , lexerEOF :: s -> SourcePos -> [Lexeme t] 251 | -- ^ Emit some lexemes at the end of the input. 252 | } 253 | 254 | -- | A lexer that uses no lexer-modes, and does not emit anything at the 255 | -- end of the file. 256 | simpleLexer :: LexerConfig () t 257 | simpleLexer = LexerConfig 258 | { lexerInitialState = () 259 | , lexerStateMode = \_ -> 0 260 | , lexerEOF = \_ _ -> [] 261 | } 262 | 263 | 264 | -- | Generate a function to use an Alex lexer. 265 | -- The expression is of type @LexerConfig s t -> Input -> [Lexeme t]@ 266 | makeLexer :: ExpQ 267 | makeLexer = 268 | do let local = do n <- newName "x" 269 | return (varP n, varE n) 270 | 271 | ([xP,yP,zP], [xE,yE,zE]) <- unzip <$> replicateM 3 local 272 | 273 | let -- Defined by Alex 274 | alexEOF = conP (mkName "AlexEOF") [ ] 275 | alexError = conP (mkName "AlexError") [ wildP ] 276 | alexSkip = conP (mkName "AlexSkip") [ xP, wildP ] 277 | alexToken = conP (mkName "AlexToken") [ xP, yP, zP ] 278 | alexScanUser = varE (mkName "alexScanUser") 279 | 280 | let p ~> e = match p (normalB e) [] 281 | body go mode inp cfg = 282 | caseE [| $alexScanUser $mode $inp (lexerStateMode $cfg $mode) |] 283 | [ alexEOF ~> [| lexerEOF $cfg $mode (inputPrev $inp) |] 284 | , alexError ~> 285 | [| error "internal error in lexer (AlexToolsBin.hs)" |] 286 | , alexSkip ~> [| $go $mode $xE |] 287 | , alexToken ~> [| case runA $zE $inp $xE $yE $mode of 288 | (mode', ts) -> ts ++ $go mode' $xE |] 289 | ] 290 | 291 | [e| \cfg -> let go mode inp = $(body [|go|] [|mode|] [|inp|] [|cfg|]) 292 | in go (lexerInitialState cfg) |] 293 | 294 | type AlexInput = Input 295 | 296 | {-# INLINE alexInputPrevChar #-} 297 | alexInputPrevChar :: AlexInput -> Char 298 | alexInputPrevChar = toEnum . fromEnum . inputPrevByte 299 | 300 | {-# INLINE alexGetByte #-} 301 | alexGetByte :: AlexInput -> Maybe (Word8,AlexInput) 302 | alexGetByte inp = 303 | do (b,bs) <- BS.uncons (inputBytes inp) 304 | let inp1 = Input { inputPrev = inputPos inp 305 | , inputPrevByte = b 306 | , inputPos = moveSourcePos (inputPos inp) 307 | , inputBytes = bs 308 | } 309 | inp1 `seq` pure (b,inp1) 310 | 311 | 312 | -------------------------------------------------------------------------------- /src/AlexTools.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell, CPP, DeriveLift #-} 2 | module AlexTools 3 | ( -- * Lexer Basics 4 | initialInput, initialInputAt, Input(..), inputFile 5 | , Lexeme(..) 6 | , SourcePos(..), startPos, beforeStartPos, prevPos 7 | , SourceRange(..) 8 | , prettySourcePos, prettySourceRange 9 | , prettySourcePosLong, prettySourceRangeLong 10 | , HasRange(..) 11 | , (<->) 12 | , moveSourcePos 13 | 14 | -- * Writing Lexer Actions 15 | , Action 16 | 17 | -- ** Lexemes 18 | , lexeme 19 | , matchLength 20 | , matchRange 21 | , matchText 22 | 23 | -- ** Manipulating the lexer's state 24 | , getLexerState 25 | , setLexerState 26 | 27 | -- ** Access to the lexer's input 28 | , startInput 29 | , endInput 30 | 31 | -- * Interface with Alex 32 | , AlexInput 33 | , alexInputPrevChar 34 | , makeAlexGetByte 35 | , makeLexer 36 | , LexerConfig(..) 37 | , simpleLexer 38 | , Word8 39 | 40 | ) where 41 | 42 | import Control.DeepSeq 43 | import Data.Word(Word8) 44 | import Data.Text(Text) 45 | import qualified Data.Text as Text 46 | import Control.Monad(liftM,ap,replicateM) 47 | import Language.Haskell.TH 48 | import Language.Haskell.TH.Syntax 49 | #if !MIN_VERSION_base(4,8,0) 50 | import Control.Applicative 51 | #endif 52 | 53 | data Lexeme t = Lexeme 54 | { lexemeText :: !Text 55 | , lexemeToken :: !t 56 | , lexemeRange :: !SourceRange 57 | } deriving (Show, Eq) 58 | 59 | instance NFData t => NFData (Lexeme t) where 60 | rnf (Lexeme x y z) = rnf (x,y,z) 61 | 62 | data SourcePos = SourcePos 63 | { sourceIndex :: !Int 64 | , sourceLine :: !Int 65 | , sourceColumn :: !Int 66 | , sourceFile :: !Text 67 | } deriving (Show, Eq, Lift) 68 | 69 | -- | Pretty print the source position without the file name. 70 | prettySourcePos :: SourcePos -> String 71 | prettySourcePos x = show (sourceLine x) ++ ":" ++ show (sourceColumn x) 72 | 73 | -- | Pretty print the source position, including the file name. 74 | prettySourcePosLong :: SourcePos -> String 75 | prettySourcePosLong x = 76 | Text.unpack (sourceFile x) ++ ":" ++ 77 | show (sourceLine x) ++ ":" ++ 78 | show (sourceColumn x) 79 | 80 | 81 | 82 | 83 | instance NFData SourcePos where 84 | rnf (SourcePos {}) = () 85 | 86 | -- | Update a 'SourcePos' for a particular matched character 87 | moveSourcePos :: Char -> SourcePos -> SourcePos 88 | moveSourcePos c p = SourcePos { sourceIndex = sourceIndex p + 1 89 | , sourceLine = newLine 90 | , sourceColumn = newColumn 91 | , sourceFile = sourceFile p 92 | } 93 | where 94 | line = sourceLine p 95 | column = sourceColumn p 96 | 97 | (newLine,newColumn) = case c of 98 | '\t' -> (line, ((column + 7) `div` 8) * 8 + 1) 99 | '\n' -> (line + 1, 1) 100 | _ -> (line, column + 1) 101 | 102 | 103 | -- | A range in the source code. 104 | data SourceRange = SourceRange 105 | { sourceFrom :: !SourcePos 106 | , sourceTo :: !SourcePos 107 | } deriving (Show, Eq, Lift) 108 | 109 | -- | Pretty print the range, without the file name 110 | prettySourceRange :: SourceRange -> String 111 | prettySourceRange x = prettySourcePos (sourceFrom x) ++ "--" ++ 112 | prettySourcePos (sourceTo x) 113 | 114 | -- | Pretty print the range, including the file name. 115 | prettySourceRangeLong :: SourceRange -> String 116 | prettySourceRangeLong x 117 | | sourceFile pfrom == sourceFile pto = 118 | Text.unpack (sourceFile pfrom) ++ ":" ++ 119 | prettySourcePos pfrom ++ "--" ++ 120 | prettySourcePos pto 121 | | otherwise = prettySourcePosLong pfrom ++ "--" ++ 122 | prettySourcePosLong pto 123 | where 124 | pfrom = sourceFrom x 125 | pto = sourceTo x 126 | 127 | 128 | 129 | instance NFData SourceRange where 130 | rnf (SourceRange x y) = rnf (x,y) 131 | 132 | class HasRange t where 133 | range :: t -> SourceRange 134 | 135 | instance HasRange SourcePos where 136 | range p = SourceRange { sourceFrom = p 137 | , sourceTo = p } 138 | 139 | instance HasRange SourceRange where 140 | range = id 141 | 142 | instance HasRange (Lexeme t) where 143 | range = lexemeRange 144 | 145 | instance (HasRange a, HasRange b) => HasRange (Either a b) where 146 | range (Left x) = range x 147 | range (Right x) = range x 148 | 149 | (<->) :: (HasRange a, HasRange b) => a -> b -> SourceRange 150 | x <-> y = SourceRange { sourceFrom = sourceFrom (range x) 151 | , sourceTo = sourceTo (range y) 152 | } 153 | 154 | -------------------------------------------------------------------------------- 155 | 156 | -- | An action to be taken when a regular expression matchers. 157 | newtype Action s a = A { runA :: Input -> Input -> Int -> s -> (s, a) } 158 | 159 | instance Functor (Action s) where 160 | fmap = liftM 161 | 162 | instance Applicative (Action s) where 163 | pure a = A (\_ _ _ s -> (s,a)) 164 | (<*>) = ap 165 | 166 | instance Monad (Action s) where 167 | return = pure 168 | A m >>= f = A (\i1 i2 l s -> let (s1,a) = m i1 i2 l s 169 | A m1 = f a 170 | in m1 i1 i2 l s1) 171 | 172 | -- | Acces the input just before the regular expression started matching. 173 | startInput :: Action s Input 174 | startInput = A (\i1 _ _ s -> (s,i1)) 175 | 176 | -- | Acces the input just after the regular expression that matched. 177 | endInput :: Action s Input 178 | endInput = A (\_ i2 _ s -> (s,i2)) 179 | 180 | -- | The number of characters in the matching input. 181 | matchLength :: Action s Int 182 | matchLength = A (\_ _ l s -> (s,l)) 183 | 184 | -- | Acces the curent state of the lexer. 185 | getLexerState :: Action s s 186 | getLexerState = A (\_ _ _ s -> (s,s)) 187 | 188 | -- | Change the state of the lexer. 189 | setLexerState :: s -> Action s () 190 | setLexerState s = A (\_ _ _ _ -> (s,())) 191 | 192 | -- | Get the range for the matching input. 193 | matchRange :: Action s SourceRange 194 | matchRange = 195 | do i1 <- startInput 196 | i2 <- endInput 197 | return (inputPos i1 <-> inputPrev i2) 198 | 199 | -- | Get the text associated with the matched input. 200 | matchText :: Action s Text 201 | matchText = 202 | do i1 <- startInput 203 | n <- matchLength 204 | return (Text.take n (inputText i1)) 205 | 206 | -- | Use the token and the current match to construct a lexeme. 207 | lexeme :: t -> Action s [Lexeme t] 208 | lexeme tok = 209 | do r <- matchRange 210 | txt <- matchText 211 | let l = Lexeme { lexemeRange = r 212 | , lexemeToken = tok 213 | , lexemeText = txt 214 | } 215 | l `seq` return [ l ] 216 | 217 | -- | Information about the lexer's input. 218 | data Input = Input 219 | { inputPos :: {-# UNPACK #-} !SourcePos 220 | -- ^ Current input position. 221 | 222 | , inputText :: {-# UNPACK #-} !Text 223 | -- ^ The text that needs to be lexed. 224 | 225 | , inputPrev :: {-# UNPACK #-} !SourcePos 226 | -- ^ Location of the last consumed character. 227 | 228 | , inputPrevChar :: {-# UNPACK #-} !Char 229 | -- ^ The last consumed character. 230 | } 231 | 232 | -- | Prepare the text for lexing. 233 | initialInput :: Text {- ^ Where the text came from -} -> 234 | Text {- ^ The text to lex -} -> Input 235 | initialInput file str = Input 236 | { inputPos = startPos file 237 | , inputPrev = beforeStartPos file 238 | , inputPrevChar = '\n' -- end of the virtual previous line 239 | , inputText = str 240 | } 241 | 242 | -- | Prepare the text for lexing, starting at a particular position. 243 | -- This is useful when the document is not at the start of the file. 244 | -- Since: 0.6 245 | initialInputAt :: 246 | SourcePos {- ^ Starting poistion -} -> 247 | Text {- ^ The text to lex, not including any preceeding text -} -> 248 | Input 249 | initialInputAt start str = Input 250 | { inputPos = start 251 | , inputPrev = start { sourceIndex = sourceIndex start - 1 252 | , sourceColumn = sourceColumn start - 1 253 | } 254 | , inputPrevChar = '\n' -- just something 255 | , inputText = str 256 | } 257 | 258 | 259 | 260 | startPos :: Text {- ^ Name of file/thing containing this -} -> SourcePos 261 | startPos file = SourcePos { sourceIndex = 0 262 | , sourceLine = 1 263 | , sourceColumn = 1 264 | , sourceFile = file 265 | } 266 | 267 | beforeStartPos :: Text -> SourcePos 268 | beforeStartPos file = SourcePos { sourceIndex = -1 269 | , sourceLine = 0 270 | , sourceColumn = 0 271 | , sourceFile = file 272 | } 273 | 274 | {- | Move one position back. Assumes that newlines use a single bytes. 275 | 276 | This function is intended to be used when starting the lexing somewhere 277 | in the middle of the input, for example, if we are implementing a quasi 278 | quoter, and the previous part of the input is not in our language. 279 | -} 280 | prevPos :: SourcePos -> SourcePos 281 | prevPos p 282 | | sourceColumn p > 1 = p { sourceColumn = sourceColumn p - 1 283 | , sourceIndex = sourceIndex p - 1 284 | } 285 | 286 | | sourceLine p > 1 = p { sourceLine = sourceLine p - 1 287 | , sourceColumn = 1 288 | , sourceIndex = sourceIndex p - 1 289 | } 290 | 291 | | otherwise = beforeStartPos (sourceFile p) 292 | 293 | 294 | -- | The file/thing for the current position. 295 | inputFile :: Input -> Text 296 | inputFile = sourceFile . inputPos 297 | 298 | -------------------------------------------------------------------------------- 299 | -- | Lexer configuration. 300 | data LexerConfig s t = LexerConfig 301 | { lexerInitialState :: s 302 | -- ^ State that the lexer starts in 303 | 304 | , lexerStateMode :: s -> Int 305 | -- ^ Determine the current lexer mode from the lexer's state. 306 | 307 | , lexerEOF :: s -> SourcePos -> [Lexeme t] 308 | -- ^ Emit some lexemes at the end of the input. 309 | } 310 | 311 | -- | A lexer that uses no lexer-modes, and does not emit anything at the 312 | -- end of the file. 313 | simpleLexer :: LexerConfig () t 314 | simpleLexer = LexerConfig 315 | { lexerInitialState = () 316 | , lexerStateMode = \_ -> 0 317 | , lexerEOF = \_ _ -> [] 318 | } 319 | 320 | 321 | -- | Generate a function to use an Alex lexer. 322 | -- The expression is of type @LexerConfig s t -> Input -> [Lexeme t]@ 323 | makeLexer :: ExpQ 324 | makeLexer = 325 | do let local = do n <- newName "x" 326 | return (varP n, varE n) 327 | 328 | ([xP,yP,zP], [xE,yE,zE]) <- unzip <$> replicateM 3 local 329 | 330 | let -- Defined by Alex 331 | alexEOF = conP (mkName "AlexEOF") [ ] 332 | alexError = conP (mkName "AlexError") [ wildP ] 333 | alexSkip = conP (mkName "AlexSkip") [ xP, wildP ] 334 | alexToken = conP (mkName "AlexToken") [ xP, yP, zP ] 335 | alexScanUser = varE (mkName "alexScanUser") 336 | 337 | let p ~> e = match p (normalB e) [] 338 | body go mode inp cfg = 339 | caseE [| $alexScanUser $mode $inp (lexerStateMode $cfg $mode) |] 340 | [ alexEOF ~> [| lexerEOF $cfg $mode (inputPrev $inp) |] 341 | , alexError ~> [| error "internal error in lexer (AlexTools.hs)" |] 342 | , alexSkip ~> [| $go $mode $xE |] 343 | , alexToken ~> [| case runA $zE $inp $xE $yE $mode of 344 | (mode', ts) -> ts ++ $go mode' $xE |] 345 | ] 346 | 347 | [e| \cfg -> let go mode inp = $(body [|go|] [|mode|] [|inp|] [|cfg|]) 348 | in go (lexerInitialState cfg) |] 349 | 350 | type AlexInput = Input 351 | 352 | alexInputPrevChar :: AlexInput -> Char 353 | alexInputPrevChar = inputPrevChar 354 | 355 | {-# INLINE makeAlexGetByte #-} 356 | makeAlexGetByte :: (Char -> Word8) -> AlexInput -> Maybe (Word8,AlexInput) 357 | makeAlexGetByte charToByte Input { inputPos = p, inputText = text } = 358 | do (c,text') <- Text.uncons text 359 | let p' = moveSourcePos c p 360 | x = charToByte c 361 | inp = Input { inputPrev = p 362 | , inputPrevChar = c 363 | , inputPos = p' 364 | , inputText = text' 365 | } 366 | x `seq` inp `seq` return (x, inp) 367 | 368 | 369 | 370 | --------------------------------------------------------------------------------