├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── Setup.hs ├── lexer-applicative.cabal ├── src └── Language │ └── Lexer │ └── Applicative.hs ├── stack.yaml └── tests └── test.hs /.travis.yml: -------------------------------------------------------------------------------- 1 | # This is the complex Travis configuration, which is intended for use 2 | # on open source libraries which need compatibility across multiple GHC 3 | # versions, must work with cabal-install, and should be 4 | # cross-platform. For more information and other options, see: 5 | # 6 | # https://docs.haskellstack.org/en/stable/travis_ci/ 7 | # 8 | # Copy these contents into the root directory of your Github project in a file 9 | # named .travis.yml 10 | 11 | # Use new container infrastructure to enable caching 12 | sudo: false 13 | 14 | # Do not choose a language; we provide our own build tools. 15 | language: generic 16 | 17 | # Caching so the next build will be fast too. 18 | cache: 19 | directories: 20 | - $HOME/.ghc 21 | - $HOME/.cabal 22 | - $HOME/.stack 23 | 24 | # The different configurations we want to test. We have BUILD=cabal which uses 25 | # cabal-install, and BUILD=stack which uses Stack. More documentation on each 26 | # of those below. 27 | # 28 | # We set the compiler values here to tell Travis to use a different 29 | # cache file per set of arguments. 30 | # 31 | # If you need to have different apt packages for each combination in the 32 | # matrix, you can use a line such as: 33 | # addons: {apt: {packages: [libfcgi-dev,libgmp-dev]}} 34 | matrix: 35 | include: 36 | # We grab the appropriate GHC and cabal-install versions from hvr's PPA. See: 37 | # https://github.com/hvr/multi-ghc-travis 38 | - env: BUILD=cabal GHCVER=8.0.2 CABALVER=1.24 HAPPYVER=1.19.5 ALEXVER=3.1.7 39 | compiler: ": #GHC 8.0.2" 40 | addons: {apt: {packages: [cabal-install-1.24,ghc-8.0.2], sources: [hvr-ghc]}} 41 | - env: BUILD=cabal GHCVER=8.4.3 CABALVER=1.24 42 | compiler: ": #GHC 8.4.3" 43 | addons: {apt: {packages: [cabal-install-1.24,ghc-8.4.3], sources: [hvr-ghc]}} 44 | 45 | # The Stack builds. We can pass in arbitrary Stack arguments via the ARGS 46 | # variable, such as using --stack-yaml to point to a different file. 47 | - env: BUILD=stack ARGS="--resolver lts-8" 48 | compiler: ": #stack 8.0.2" 49 | addons: {apt: {packages: [libgmp-dev]}} 50 | 51 | - env: BUILD=stack ARGS="--resolver lts-10" 52 | compiler: ": #stack 8.2.2" 53 | addons: {apt: {packages: [libgmp-dev]}} 54 | 55 | allow_failures: 56 | - env: BUILD=cabal GHCVER=head CABALVER=head HAPPYVER=1.19.5 ALEXVER=3.1.7 57 | - env: BUILD=stack ARGS="--resolver nightly" 58 | 59 | before_install: 60 | # Using compiler above sets CC to an invalid value, so unset it 61 | - unset CC 62 | 63 | # We want to always allow newer versions of packages when building on GHC HEAD 64 | - CABALARGS="" 65 | - if [ "x$GHCVER" = "xhead" ]; then CABALARGS=--allow-newer; fi 66 | 67 | # Download and unpack the stack executable 68 | - export PATH=/opt/ghc/$GHCVER/bin:/opt/cabal/$CABALVER/bin:$HOME/.local/bin:/opt/alex/$ALEXVER/bin:/opt/happy/$HAPPYVER/bin:$HOME/.cabal/bin:$PATH 69 | - mkdir -p ~/.local/bin 70 | - | 71 | if [ `uname` = "Darwin" ] 72 | then 73 | travis_retry curl --insecure -L https://www.stackage.org/stack/osx-x86_64 | tar xz --strip-components=1 --include '*/stack' -C ~/.local/bin 74 | else 75 | travis_retry curl -L https://www.stackage.org/stack/linux-x86_64 | tar xz --wildcards --strip-components=1 -C ~/.local/bin '*/stack' 76 | fi 77 | 78 | # Use the more reliable S3 mirror of Hackage 79 | mkdir -p $HOME/.cabal 80 | echo 'remote-repo: hackage.haskell.org:http://hackage.fpcomplete.com/' > $HOME/.cabal/config 81 | echo 'remote-repo-cache: $HOME/.cabal/packages' >> $HOME/.cabal/config 82 | 83 | if [ "$CABALVER" != "1.16" ] 84 | then 85 | echo 'jobs: $ncpus' >> $HOME/.cabal/config 86 | fi 87 | 88 | install: 89 | - echo "$(ghc --version) [$(ghc --print-project-git-commit-id 2> /dev/null || echo '?')]" 90 | - if [ -f configure.ac ]; then autoreconf -i; fi 91 | - | 92 | set -ex 93 | case "$BUILD" in 94 | stack) 95 | stack --no-terminal --install-ghc $ARGS test --bench --only-dependencies 96 | ;; 97 | cabal) 98 | cabal --version 99 | travis_retry cabal update 100 | 101 | # Get the list of packages from the stack.yaml file 102 | PACKAGES=$(stack --install-ghc query locals | grep '^ *path' | sed 's@^ *path:@@') 103 | 104 | cabal install --only-dependencies --enable-tests --enable-benchmarks --force-reinstalls --ghc-options=-O0 --reorder-goals --max-backjumps=-1 $CABALARGS $PACKAGES 105 | ;; 106 | esac 107 | set +ex 108 | 109 | script: 110 | - | 111 | set -ex 112 | case "$BUILD" in 113 | stack) 114 | stack --no-terminal $ARGS test --bench --no-run-benchmarks --haddock --no-haddock-deps 115 | ;; 116 | cabal) 117 | cabal install --enable-tests --enable-benchmarks --force-reinstalls --ghc-options=-O0 --reorder-goals --max-backjumps=-1 $CABALARGS $PACKAGES 118 | 119 | ORIGDIR=$(pwd) 120 | for dir in $PACKAGES 121 | do 122 | cd $dir 123 | cabal check || [ "$CABALVER" == "1.16" ] 124 | cabal sdist 125 | PKGVER=$(cabal info . | awk '{print $2;exit}') 126 | SRC_TGZ=$PKGVER.tar.gz 127 | cd dist 128 | tar zxfv "$SRC_TGZ" 129 | cd "$PKGVER" 130 | cabal configure --enable-tests 131 | cabal build 132 | cabal test 133 | cd $ORIGDIR 134 | done 135 | ;; 136 | esac 137 | set +ex 138 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | 2.1.0.2 5 | ------- 6 | 7 | Fix compatibility with GHC 8.4 and drop support for GHC 7.x 8 | 9 | 2.1.0.1 10 | ------- 11 | 12 | Fix a link in the README 13 | 14 | 2.1 15 | --- 16 | 17 | * Restore compatibility with older GHCs 18 | * Change the type of `longestShortest` 19 | 20 | 2.0 21 | --- 22 | 23 | This is a major redesign of the API. Notable changes: 24 | 25 | - The lexer now supports parsing the longest prefix/shortest suffix 26 | (see `longestShortest`) 27 | - Instead of throwing an exception, we return a stream. The stream can be 28 | consumed directly, converted to a list or either-error-list of tokens. 29 | 30 | 1.1.1 31 | ----- 32 | 33 | Add `tokensEither` 34 | 35 | 1.1 36 | --- 37 | 38 | Upgrade to srcloc 0.5 39 | 40 | 1.0.0.1 41 | ------- 42 | 43 | Signal a lexical error (instead of looping) when a regex does not consume any 44 | characters 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Roman Cheplyaka 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included 12 | in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 18 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 19 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 20 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Simple applicative lexer based on the article 2 | [Lexical analysis with parser combinators][1] 3 | and the [regex-applicative][2] library. 4 | 5 | [1]: https://ro-che.info/articles/2015-01-02-lexical-analysis 6 | [2]: http://hackage.haskell.org/package/regex-applicative-0.3.1/docs/Text-Regex-Applicative.html 7 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /lexer-applicative.cabal: -------------------------------------------------------------------------------- 1 | -- Initial lexer-applicative.cabal generated by cabal init. For further 2 | -- documentation, see http://haskell.org/cabal/users-guide/ 3 | 4 | name: lexer-applicative 5 | version: 2.1.0.2 6 | synopsis: Simple lexer based on applicative regular expressions 7 | description: Simple lexer based on applicative regular expressions 8 | homepage: https://github.com/feuerbach/lexer-applicative 9 | license: MIT 10 | license-file: LICENSE 11 | author: Roman Cheplyaka 12 | maintainer: Roman Cheplyaka 13 | -- copyright: 14 | category: Language 15 | build-type: Simple 16 | extra-source-files: 17 | README.md 18 | CHANGELOG.md 19 | cabal-version: >=1.10 20 | 21 | Source-repository head 22 | type: git 23 | location: git://github.com/feuerbach/lexer-applicative.git 24 | 25 | library 26 | exposed-modules: 27 | Language.Lexer.Applicative 28 | -- other-modules: 29 | -- other-extensions: 30 | build-depends: 31 | base >=4.9 && < 5, 32 | srcloc >= 0.5, 33 | regex-applicative >= 0.3.1 34 | hs-source-dirs: src 35 | default-language: Haskell2010 36 | ghc-options: -Wall 37 | 38 | test-suite test 39 | default-language: 40 | Haskell2010 41 | type: 42 | exitcode-stdio-1.0 43 | hs-source-dirs: 44 | tests 45 | main-is: 46 | test.hs 47 | build-depends: 48 | base >= 4 && < 5 49 | , tasty >= 0.10 50 | , tasty-hunit >= 0.9 51 | , regex-applicative 52 | , lexer-applicative 53 | , srcloc 54 | , deepseq 55 | -------------------------------------------------------------------------------- /src/Language/Lexer/Applicative.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ScopedTypeVariables, DeriveDataTypeable, DeriveFunctor #-} 2 | -- | For some background, see 3 | -- 4 | module Language.Lexer.Applicative 5 | ( 6 | -- * Building a Lexer 7 | Lexer(..) 8 | , token 9 | , whitespace 10 | -- ** Building Recognizers 11 | , Recognizer 12 | , longest 13 | , longestShortest 14 | -- * Running a Lexer 15 | , runLexer 16 | -- ** Working with a token stream 17 | , TokenStream(..) 18 | , streamToList 19 | , streamToEitherList 20 | , LexicalError(..) 21 | ) where 22 | 23 | import Text.Regex.Applicative 24 | import Data.Loc 25 | import Data.List 26 | import Data.Typeable (Typeable) 27 | import Data.Semigroup (Semigroup(..)) 28 | import Data.Function 29 | import Control.Exception 30 | 31 | ---------------------------------------------------------------------- 32 | -- Lexer 33 | ---------------------------------------------------------------------- 34 | 35 | -- | A 'Lexer' specification consists of two recognizers: one for 36 | -- meaningful tokens and one for whitespace and comments. 37 | -- 38 | -- Although you can construct 'Lexer's directly, it is more convenient to 39 | -- build them with 'token', 'whitespace', and the 'Monoid' instance like this: 40 | -- 41 | -- @ 42 | -- myLexer :: 'Lexer' MyToken 43 | -- myLexer = 'mconcat' 44 | -- [ 'token' ('longest' myToken) 45 | -- , 'whitespace' ('longest' myWhiteSpace) 46 | -- , 'whitespace' ('longestShortest' myCommentPrefix myCommentSuffix) 47 | -- ] 48 | -- @ 49 | data Lexer tok = Lexer 50 | { lexerTokenRE :: Recognizer tok 51 | , lexerWhitespaceRE :: Recognizer () 52 | } 53 | deriving Functor 54 | 55 | instance Semigroup (Lexer tok) where 56 | Lexer t1 w1 <> Lexer t2 w2 = Lexer (t1 <> t2) (w1 <> w2) 57 | 58 | instance Monoid (Lexer tok) where 59 | mempty = Lexer mempty mempty 60 | mappend = (<>) 61 | 62 | -- | Build a lexer with the given token recognizer and no (i.e. 'mempty') 63 | -- whitespace recognizer. 64 | -- 65 | -- 'token' is a monoid homomorphism: 66 | -- 67 | -- @'token' a '<>' 'token' b = 'token' (a '<>' b)@ 68 | token :: Recognizer tok -> Lexer tok 69 | token r = Lexer r mempty 70 | 71 | -- | Build a lexer with the given whitespace recognizer and no (i.e. 'mempty') 72 | -- token recognizer. 73 | -- 74 | -- 'whitespace' is a monoid homomorphism: 75 | -- 76 | -- @'whitespace' a '<>' 'whitespace' b = 'whitespace' (a '<>' b)@ 77 | whitespace :: Recognizer a -> Lexer tok 78 | whitespace r = Lexer mempty (() <$ r) 79 | 80 | ---------------------------------------------------------------------- 81 | -- Recognizer 82 | ---------------------------------------------------------------------- 83 | 84 | -- | A token recognizer 85 | -- 86 | -- 'Recognizer' values are constructed by functions like 'longest' and 87 | -- 'longestShortest', combined with `mappend`, and used by 'token' and 88 | -- 'whitespace'. 89 | -- 90 | -- When a recognizer returns without consuming any characters, a lexical 91 | -- error is signaled. 92 | newtype Recognizer tok = Recognizer (RE Char (RE Char tok)) 93 | deriving Functor 94 | 95 | instance Semigroup (Recognizer tok) where 96 | Recognizer r1 <> Recognizer r2 = Recognizer (r1 <|> r2) 97 | 98 | instance Monoid (Recognizer tok) where 99 | mempty = Recognizer empty 100 | mappend = (<>) 101 | 102 | -- | When scanning a next token, the regular expression will compete with 103 | -- the other 'Recognizer's of its 'Lexer'. If it wins, its result 104 | -- will become the next token. 105 | -- 106 | -- 'longest' has the following properties: 107 | -- 108 | -- * @'longest' (r1 '<|>' r2) = 'longest' r1 '<>' 'longest' r2@ 109 | -- 110 | -- * @'longest' r = 'longestShortest' r 'pure'@ 111 | longest 112 | :: RE Char tok 113 | -> Recognizer tok 114 | longest re = longestShortest re pure 115 | 116 | -- | This is a more sophisticated recognizer than 'longest'. 117 | -- 118 | -- It recognizes a token consisting of a prefix and a suffix, where prefix 119 | -- is chosen longest, and suffix is chosen shortest. 120 | -- 121 | -- An example would be a C block comment 122 | -- 123 | -- >/* comment text */ 124 | -- 125 | -- The naive 126 | -- 127 | -- @'longest' ('string' "\/*" '*>' 'many' 'anySym' '*>' 'string' "*\/")@ 128 | -- 129 | -- doesn't work because it consumes too much: in 130 | -- 131 | -- >/* xxx */ yyy /* zzz */ 132 | -- 133 | -- it will treat the whole line as a comment. 134 | -- 135 | -- This is where 'longestShortest' comes in handy: 136 | -- 137 | -- @ 138 | -- 'longestShortest' 139 | -- ('string' "\/*") 140 | -- (\\_ -> 'many' 'anySym' '*>' 'string' "*\/") 141 | -- @ 142 | -- 143 | -- Operationally, the prefix regex first competes with other 'Recognizer's 144 | -- for the longest match. If it wins, then the shortest match for the 145 | -- suffix regex is found, and the two results are combined with the given 146 | -- function to produce a token. 147 | -- 148 | -- The two regular expressions combined must consume some input, or else 149 | -- 'LexicalError' is thrown. However, any one of them may return without 150 | -- consuming input. 151 | -- 152 | -- \* * * 153 | -- 154 | -- Once the prefix regex wins, the choice is committed; the suffix regex 155 | -- must match or else a 'LexicalError' is thrown. Therefore, 156 | -- 157 | -- @ 158 | -- 'longestShortest' pref suff1 159 | -- '<>' 160 | -- 'longestShortest' pref suff2 161 | -- = 162 | -- 'longestShortest' pref suff1 163 | -- @ 164 | -- 165 | -- and is not the same as 166 | -- 167 | -- @'longestShortest' pref (suff1 '<|>' suff2)@ 168 | -- 169 | -- The following holds, however: 170 | -- 171 | -- @ 172 | -- 'longestShortest' pref1 suff 173 | -- '<>' 174 | -- 'longestShortest' pref2 suff 175 | -- = 176 | -- 'longestShortest' (pref1 '<|>' pref2) suff 177 | -- @ 178 | longestShortest 179 | :: RE Char pref -- ^ regex for the longest prefix 180 | -> (pref -> RE Char tok) -- ^ regex for the shortest suffix 181 | -> Recognizer tok 182 | longestShortest prefRE suffRE = 183 | Recognizer $ 184 | suffRE <$> prefRE 185 | 186 | ---------------------------------------------------------------------- 187 | -- Running a Lexer 188 | ---------------------------------------------------------------------- 189 | 190 | -- | The lexical error exception 191 | data LexicalError = LexicalError !Pos 192 | deriving (Eq, Typeable) 193 | 194 | instance Show LexicalError where 195 | show (LexicalError pos) = "Lexical error at " ++ displayPos pos 196 | instance Exception LexicalError 197 | 198 | -- | A stream of tokens 199 | data TokenStream tok 200 | = TsToken tok (TokenStream tok) 201 | | TsEof 202 | | TsError LexicalError 203 | deriving (Eq, Functor, Show) 204 | 205 | -- | Convert a 'TokenStream' to a list of tokens. Turn 'TsError' into 206 | -- a runtime 'LexicalError' exception. 207 | streamToList :: TokenStream tok -> [tok] 208 | streamToList stream = 209 | case stream of 210 | TsToken t stream' -> t : streamToList stream' 211 | TsEof -> [] 212 | TsError e -> throw e 213 | 214 | -- | Convert a 'TokenStream' into either a token list or a 'LexicalError'. 215 | -- This function may be occasionally useful, but in general its use is 216 | -- discouraged because it needs to force the whole stream before returning 217 | -- a result. 218 | streamToEitherList :: TokenStream tok -> Either LexicalError [tok] 219 | streamToEitherList = 220 | sequence . 221 | fix (\rec stream -> 222 | case stream of 223 | TsToken t stream' -> Right t : rec stream' 224 | TsEof -> [] 225 | TsError e -> [Left e] 226 | ) 227 | 228 | -- | Run a lexer on a string and produce a lazy stream of tokens 229 | runLexer 230 | :: forall tok. 231 | Lexer tok -- ^ lexer specification 232 | -> String -- ^ source file name (used in locations) 233 | -> String -- ^ source text 234 | -> TokenStream (L tok) 235 | runLexer (Lexer (Recognizer pToken) (Recognizer pJunk)) src = go . annotate src 236 | where 237 | go l = case l of 238 | [] -> TsEof 239 | s@((_, pos1, _):_) -> 240 | let 241 | -- last position in the stream 242 | -- in this branch s is non-empty, so this is safe 243 | last_pos :: Pos 244 | last_pos = case last s of (_, p, _) -> p 245 | in 246 | case findLongestPrefix re s of 247 | 248 | Nothing -> TsError (LexicalError pos1) 249 | 250 | Just (shortest_re, rest1) -> 251 | 252 | case findShortestPrefix shortest_re rest1 of 253 | Nothing -> TsError . LexicalError $ 254 | case rest1 of 255 | (_, _, p):_ -> p 256 | [] -> last_pos 257 | 258 | -- If the combined match is empty, we have a lexical error 259 | Just (_, (_, pos1', _):_) | pos1' == pos1 -> 260 | TsError $ LexicalError pos1 261 | 262 | Just (Just tok, rest) -> 263 | let 264 | pos2 = 265 | case rest of 266 | (_, _, p):_ -> p 267 | [] -> last_pos 268 | 269 | in TsToken (L (Loc pos1 pos2) tok) (go rest) 270 | 271 | Just (Nothing, rest) -> go rest 272 | 273 | extend :: RE Char a -> RE (Char, Pos, Pos) a 274 | extend = comap (\(c, _, _) -> c) 275 | 276 | re :: RE (Char, Pos, Pos) (RE (Char, Pos, Pos) (Maybe tok)) 277 | re = extend . fmap extend $ 278 | ((Just <$>) <$> pToken) <|> ((Nothing <$) <$> pJunk) 279 | 280 | annotate 281 | :: String -- ^ source file name 282 | -> String -- ^ contents 283 | -> [(Char, Pos, Pos)] -- ^ the character, its position, and the previous position 284 | annotate src s = snd $ mapAccumL f (startPos src, startPos src) s 285 | where 286 | f (pos, prev_pos) ch = 287 | let pos' = advancePos pos ch 288 | in pos' `seq` ((pos', pos), (ch, pos, prev_pos)) 289 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | extra-deps: [] 2 | resolver: lts-10.6 3 | packages: 4 | - '.' 5 | -------------------------------------------------------------------------------- /tests/test.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ScopedTypeVariables #-} 2 | 3 | import Test.Tasty 4 | import Test.Tasty.HUnit 5 | 6 | import Language.Lexer.Applicative 7 | import Text.Regex.Applicative 8 | import Text.Regex.Applicative.Common 9 | import Data.Char 10 | import Data.Monoid 11 | import Data.Loc 12 | import Control.Exception 13 | import Control.DeepSeq 14 | 15 | ws = whitespace $ longest $ some (psym isSpace) 16 | -- this is bad, because it accepts an empty string 17 | badWhitespace = whitespace $ longest $ many (psym isSpace) 18 | 19 | word = longestToken $ many $ psym isAlpha 20 | 21 | longestToken = token . longest 22 | 23 | tokens l n s = streamToList $ runLexer l n s 24 | tokensEither l n s = streamToEitherList $ runLexer l n s 25 | 26 | unloc (L l a) = (a, l) 27 | 28 | -- This recognizes C-style block comments like /* ... */, 29 | -- but also matching delimiters like /*** ... ***/ (to make it more fun) 30 | blockComment = token $ 31 | longestShortest 32 | ((++) <$> string "/" <*> many (sym '*')) 33 | (\start -> (,) start <$> ((++) <$> many anySym <*> string (reverse start))) 34 | 35 | main = defaultMain $ testGroup "Tests" 36 | [ testCase "Empty string" $ 37 | tokens (longestToken (empty :: RE Char Int) <> whitespace mempty) "-" "" @=? [] 38 | , testCase "Space- and newline-separated numbers" $ 39 | unloc <$> tokens (longestToken decimal <> ws) "-" "1\n 23 456" @?= 40 | [ (1, Loc (Pos "-" 1 1 0) (Pos "-" 1 1 0)) 41 | , (23, Loc (Pos "-" 2 2 3) (Pos "-" 2 3 4)) 42 | , (456,Loc (Pos "-" 2 6 7) (Pos "-" 2 8 9)) 43 | ] 44 | , testCase "Nullable parser, no error" $ do 45 | let r = tokensEither (longestToken decimal <> badWhitespace) "-" "31 45" 46 | case r of 47 | Right (_ :: [L Int]) -> return () 48 | Left e -> assertFailure $ show e 49 | , testCase "Nullable parser, error" $ do 50 | let r = tokensEither (longestToken decimal <> badWhitespace) "-" "31? 45" 51 | case r of 52 | Right (_ :: [L Int]) -> assertFailure "No error?" 53 | Left (LexicalError p) -> p @?= Pos "-" 1 3 2 54 | , testCase "No matches, error" $ do 55 | tokensEither (longestToken decimal) "-" " " @?= Left (LexicalError (Pos "-" 1 1 0)) 56 | , testCase "No matches after a recognized token" $ do 57 | fmap unloc (runLexer (longestToken decimal <> ws) "-" "2 x") @?= 58 | TsToken (2 :: Int, Loc (Pos "-" 1 1 0) (Pos "-" 1 1 0)) (TsError $ LexicalError (Pos "-" 1 3 2)) 59 | , testCase "streamToList throws an exception upon failure" $ do 60 | r :: Either LexicalError [L ()] <- try . evaluate . force $ tokens mempty "-" " " 61 | r @?= Left (LexicalError (Pos "-" 1 1 0)) 62 | , testCase "longestShortest (success)" $ 63 | fmap (map unLoc) 64 | (tokensEither ((Left <$> blockComment) <> (Right <$> word) <> ws) 65 | "-" 66 | "/* xxx */ yyy /*** abc ***/ ef") 67 | @?= 68 | Right [Left ("/*"," xxx */"),Right "yyy",Left ("/***"," abc ***/"),Right "ef"] 69 | , testCase "longestShortest (failure of shortest; end of stream)" $ 70 | (tokensEither (whitespace $ longestShortest (string "abc") (const empty)) 71 | "-" 72 | "abc" :: Either LexicalError [L ()]) 73 | @?= 74 | Left (LexicalError (Pos "-" 1 3 2)) 75 | , testCase "longestShortest (failure of shortest; not end of stream)" $ 76 | (tokensEither (whitespace $ longestShortest (string "abc") (const empty)) 77 | "-" 78 | "abc " :: Either LexicalError [L ()]) 79 | @?= 80 | Left (LexicalError (Pos "-" 1 3 2)) 81 | ] 82 | 83 | -- orphan 84 | instance NFData a => NFData (L a) where 85 | rnf (L loc a) = loc `seq` rnf a 86 | --------------------------------------------------------------------------------