├── cabal.project ├── Setup.hs ├── .ghci ├── .hlint.yaml ├── cabal.haskell-ci ├── .gitignore ├── .github └── workflows │ ├── hlint.yml │ └── haskell-ci.yml ├── README.markdown ├── .vim.custom ├── src └── Text │ └── Parser │ ├── Token │ ├── Highlight.hs │ └── Style.hs │ ├── LookAhead.hs │ ├── Permutation.hs │ ├── Expression.hs │ ├── Char.hs │ ├── Combinators.hs │ └── Token.hs ├── LICENSE ├── tests └── QuickCheck.hs ├── parsers.cabal └── CHANGELOG.markdown /cabal.project: -------------------------------------------------------------------------------- 1 | packages: . 2 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /.ghci: -------------------------------------------------------------------------------- 1 | :set -isrc -idist/build/autogen -optP-include -optPdist/build/autogen/cabal_macros.h 2 | -------------------------------------------------------------------------------- /.hlint.yaml: -------------------------------------------------------------------------------- 1 | - arguments: [-XCPP, --cpp-define=HLINT, --cpp-ansi] 2 | 3 | - ignore: {name: Reduce duplication} 4 | - ignore: {name: Use String} 5 | # Needed for backwards compatibility with old versions of base, where 6 | # replicateM had a Monad constraint instead of an Applicative one. 7 | - ignore: {name: Use replicateM, within: [Text.Parser.Combinators]} 8 | -------------------------------------------------------------------------------- /cabal.haskell-ci: -------------------------------------------------------------------------------- 1 | distribution: jammy 2 | no-tests-no-benchmarks: False 3 | unconstrained: False 4 | -- irc-channels: irc.freenode.org#haskell-lens 5 | irc-if-in-origin-repo: True 6 | 7 | constraint-set no-binary 8 | constraints: parsers -binary 9 | 10 | constraint-set no-parsec 11 | constraints: parsers -parsec 12 | 13 | constraint-set no-attoparsec 14 | constraints: parsers -attoparsec 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | dist-newstyle 3 | .cabal-sandbox 4 | cabal.sandbox.config 5 | docs 6 | wiki 7 | TAGS 8 | tags 9 | wip 10 | .DS_Store 11 | .*.swp 12 | .*.swo 13 | *.o 14 | *.hi 15 | *~ 16 | *# 17 | .stack-work/ 18 | cabal-dev 19 | *.chi 20 | *.chs.h 21 | *.dyn_o 22 | *.dyn_hi 23 | .hpc 24 | .hsenv 25 | .cabal-sandbox/ 26 | cabal.sandbox.config 27 | *.prof 28 | *.aux 29 | *.hp 30 | *.eventlog 31 | cabal.project.local 32 | cabal.project.local~ 33 | .HTF/ 34 | .ghc.environment.* 35 | -------------------------------------------------------------------------------- /.github/workflows/hlint.yml: -------------------------------------------------------------------------------- 1 | name: HLint 2 | on: 3 | - push 4 | - pull_request 5 | jobs: 6 | hlint: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - name: Checkout repository 11 | uses: actions/checkout@v4 12 | 13 | - name: 'Set up HLint' 14 | uses: haskell-actions/hlint-setup@v2 15 | with: 16 | version: '3.8' 17 | 18 | - name: 'Run HLint' 19 | uses: haskell-actions/hlint-run@v2 20 | with: 21 | path: src/ 22 | fail-on: suggestion 23 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | parsers 2 | ======= 3 | 4 | [![Hackage](https://img.shields.io/hackage/v/parsers.svg)](https://hackage.haskell.org/package/parsers) [![Build Status](https://github.com/ekmett/parsers/workflows/Haskell-CI/badge.svg)](https://github.com/ekmett/parsers/actions?query=workflow%3AHaskell-CI) 5 | 6 | Goals 7 | ----- 8 | 9 | This library provides convenient combinators for working with and building parsing combinator libraries. 10 | 11 | Given a few simple instances, you get access to a large number of canned definitions. 12 | 13 | Contact Information 14 | ------------------- 15 | 16 | Contributions and bug reports are welcome! 17 | 18 | Please feel free to contact me through github or on the #haskell IRC channel on irc.freenode.net. 19 | 20 | -Edward Kmett 21 | 22 | -------------------------------------------------------------------------------- /.vim.custom: -------------------------------------------------------------------------------- 1 | " Add the following to your .vimrc to automatically load this on startup 2 | 3 | " if filereadable(".vim.custom") 4 | " so .vim.custom 5 | " endif 6 | 7 | function StripTrailingWhitespace() 8 | let myline=line(".") 9 | let mycolumn = col(".") 10 | silent %s/ *$// 11 | call cursor(myline, mycolumn) 12 | endfunction 13 | 14 | " enable syntax highlighting 15 | syntax on 16 | 17 | " search for the tags file anywhere between here and / 18 | set tags=TAGS;/ 19 | 20 | " highlight tabs and trailing spaces 21 | set listchars=tab:‗‗,trail:‗ 22 | set list 23 | 24 | " f2 runs hasktags 25 | map :exec ":!hasktags -x -c --ignore src" 26 | 27 | " strip trailing whitespace before saving 28 | " au BufWritePre *.hs,*.markdown silent! cal StripTrailingWhitespace() 29 | 30 | " rebuild hasktags after saving 31 | au BufWritePost *.hs silent! :exec ":!hasktags -x -c --ignore src" 32 | -------------------------------------------------------------------------------- /src/Text/Parser/Token/Highlight.hs: -------------------------------------------------------------------------------- 1 | ----------------------------------------------------------------------------- 2 | -- | 3 | -- Module : Text.Parser.Token.Highlight 4 | -- Copyright : (C) 2011 Edward Kmett 5 | -- License : BSD-style (see the file LICENSE) 6 | -- 7 | -- Maintainer : Edward Kmett 8 | -- Stability : experimental 9 | -- Portability : portable 10 | -- 11 | -- Highlighting isn't strictly a parsing concern, but it makes more sense 12 | -- to annotate a parser with highlighting information than to require 13 | -- someone to completely reimplement all of the combinators to add 14 | -- this functionality later when they need it. 15 | -- 16 | ---------------------------------------------------------------------------- 17 | module Text.Parser.Token.Highlight 18 | ( Highlight(..) 19 | ) where 20 | 21 | -- | Tags used by the 'Text.Parser.Token.TokenParsing' 'Text.Parser.Token.highlight' combinator. 22 | data Highlight 23 | = EscapeCode 24 | | Number 25 | | Comment 26 | | CharLiteral 27 | | StringLiteral 28 | | Constant 29 | | Statement 30 | | Special 31 | | Symbol 32 | | Identifier 33 | | ReservedIdentifier 34 | | Operator 35 | | ReservedOperator 36 | | Constructor 37 | | ReservedConstructor 38 | | ConstructorOperator 39 | | ReservedConstructorOperator 40 | | BadInput 41 | | Unbound 42 | | Layout 43 | | MatchedSymbols 44 | | LiterateComment 45 | | LiterateSyntax 46 | deriving (Eq,Ord,Show,Read,Enum,Bounded) 47 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2011-2013 Edward Kmett 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the author nor the names of his contributors 17 | may be used to endorse or promote products derived from this software 18 | without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 21 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR 24 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 28 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /tests/QuickCheck.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE FlexibleInstances #-} 3 | {-# LANGUAGE RankNTypes #-} 4 | {-# LANGUAGE CPP #-} 5 | 6 | module Main 7 | ( main 8 | ) where 9 | 10 | import Control.Applicative 11 | 12 | #ifdef MIN_VERSION_attoparsec 13 | import Data.Attoparsec.Text (parseOnly) 14 | #endif 15 | import Data.Either 16 | import Data.String 17 | 18 | import Test.QuickCheck 19 | import Test.QuickCheck.Instances () 20 | 21 | #ifdef MIN_VERSION_parsec 22 | import Text.Parsec.Prim as P (parse) 23 | #endif 24 | import Text.Parser.Char 25 | import Text.Parser.Combinators 26 | import Text.ParserCombinators.ReadP (readP_to_S) 27 | 28 | import System.Exit 29 | 30 | -- -------------------------------------------------------------------------- -- 31 | -- Run tests with different parser frameworks 32 | 33 | -- Instead of letting quick check pick the parser framework as a test parameter 34 | -- it may be better to just run all tests for each parser framework. 35 | 36 | newtype P a = P (forall m. (Monad m, CharParsing m) => m a) 37 | 38 | data TestParser a = TestParser String (P a -> String -> Either String a) 39 | 40 | instance Show (TestParser a) where show (TestParser n _) = n 41 | 42 | #ifdef MIN_VERSION_attoparsec 43 | pAtto :: TestParser a 44 | pAtto = TestParser "attoparsec" $ \(P p) -> parseOnly p . fromString 45 | #endif 46 | 47 | #ifdef MIN_VERSION_parsec 48 | pParsec :: TestParser a 49 | pParsec = TestParser "parsec" $ \(P p) -> either (Left . show) Right . parse p "test input" 50 | #endif 51 | 52 | pReadP :: TestParser a 53 | pReadP = TestParser "ReadP" $ \(P p) s -> case readP_to_S p s of 54 | [] -> Left "parseFailed" 55 | (a,_):_ -> Right a 56 | 57 | instance Arbitrary (TestParser a) where 58 | arbitrary = elements ps 59 | where 60 | ps = [pReadP] 61 | #ifdef MIN_VERSION_attoparsec 62 | ++ [pAtto] 63 | #endif 64 | #ifdef MIN_VERSION_parsec 65 | ++ [pParsec] 66 | #endif 67 | 68 | -- -------------------------------------------------------------------------- -- 69 | -- Main 70 | 71 | main :: IO () 72 | main = mapM quickCheckResult tests >>= \x -> case filter (not . passed) x of 73 | [] -> exitSuccess 74 | _ -> exitFailure 75 | where 76 | passed Success{} = True 77 | passed _ = False 78 | 79 | -- -------------------------------------------------------------------------- -- 80 | -- Tests 81 | 82 | tests :: [Property] 83 | tests = 84 | [ property prop_notFollowedBy0 85 | , property prop_notFollowedBy1 86 | , property prop_notFollowedBy2 87 | , property prop_notFollowedBy3 88 | ] 89 | 90 | -- -------------------------------------------------------------------------- -- 91 | -- Properties 92 | 93 | prop_notFollowedBy0 :: TestParser Char -> Char -> Char -> Bool 94 | prop_notFollowedBy0 (TestParser _ p) x y = either (\_ -> x == y) (/= y) 95 | $ p (P (notFollowedBy (char y) *> anyChar)) [x] 96 | 97 | prop_notFollowedBy1 :: TestParser Char -> Char -> Bool 98 | prop_notFollowedBy1 (TestParser _ p) x = either (\_ -> x == x) (/= x) 99 | $ p (P (notFollowedBy (char x) *> anyChar)) [x] 100 | 101 | prop_notFollowedBy2 :: TestParser Char -> String -> Char -> Bool 102 | prop_notFollowedBy2 (TestParser _ p) x y = isLeft 103 | $ p (P (anyChar *> notFollowedBy (char y) *> char y)) x 104 | 105 | prop_notFollowedBy3 :: TestParser () -> Char -> Bool 106 | prop_notFollowedBy3 (TestParser _ p) x = isRight 107 | $ p (P (notFollowedBy (char x) <|> char x *> pure ())) [x] 108 | -------------------------------------------------------------------------------- /parsers.cabal: -------------------------------------------------------------------------------- 1 | name: parsers 2 | category: Text, Parsing 3 | version: 0.12.12 4 | license: BSD3 5 | cabal-version: >= 1.10 6 | license-file: LICENSE 7 | author: Edward A. Kmett 8 | maintainer: Edward A. Kmett 9 | stability: experimental 10 | homepage: http://github.com/ekmett/parsers/ 11 | bug-reports: http://github.com/ekmett/parsers/issues 12 | copyright: Copyright (C) 2010-2013 Edward A. Kmett 13 | synopsis: Parsing combinators 14 | description: 15 | This library provides convenient combinators for working with and building parsing combinator libraries. 16 | . 17 | Given a few simple instances, e.g. for the class 'Text.Parser.Combinators.Parsing' in "Text.Parser.Combinators.Parsing" you 18 | get access to a large number of canned definitions. Instances exist for the parsers provided by @parsec@, 19 | @attoparsec@ and base’s "Text.Read". 20 | build-type: Simple 21 | tested-with: GHC==8.0.2 22 | , GHC==8.2.2 23 | , GHC==8.4.4 24 | , GHC==8.6.5 25 | , GHC==8.8.4 26 | , GHC==8.10.7 27 | , GHC==9.0.2 28 | , GHC==9.2.8 29 | , GHC==9.4.8 30 | , GHC==9.6.6 31 | , GHC==9.8.4 32 | , GHC==9.10.1 33 | , GHC==9.12.1 34 | 35 | extra-source-files: 36 | .hlint.yaml 37 | CHANGELOG.markdown 38 | README.markdown 39 | 40 | source-repository head 41 | type: git 42 | location: https://github.com/ekmett/parsers.git 43 | 44 | flag binary 45 | default: True 46 | description: 47 | You can disable the use of the `binary` package using `-f-binary`. 48 | 49 | flag parsec 50 | default: True 51 | description: 52 | You can disable the use of the `parsec` package using `-f-parsec`. 53 | 54 | flag attoparsec 55 | default: True 56 | description: 57 | You can disable the use of the `attoparsec` package using `-f-attoparsec`. 58 | 59 | library 60 | default-language: Haskell2010 61 | exposed-modules: 62 | Text.Parser.Char 63 | Text.Parser.Combinators 64 | Text.Parser.LookAhead 65 | Text.Parser.Permutation 66 | Text.Parser.Expression 67 | Text.Parser.Token 68 | Text.Parser.Token.Style 69 | Text.Parser.Token.Highlight 70 | 71 | hs-source-dirs: src 72 | 73 | ghc-options: -Wall -Wno-wrong-do-bind -Wmonomorphism-restriction -Wincomplete-record-updates -Widentities -Wincomplete-uni-patterns -Wno-trustworthy-safe 74 | 75 | build-depends: 76 | base >= 4.9 && < 5, 77 | charset >= 0.3 && < 1, 78 | containers >= 0.4 && < 0.9, 79 | text >= 0.10 && < 2.2, 80 | transformers >= 0.2 && < 0.7, 81 | mtl >= 2.0.1 && < 2.4, 82 | scientific >= 0.3 && < 0.4, 83 | unordered-containers >= 0.2 && < 0.3 84 | 85 | if flag(binary) 86 | build-depends: binary >= 0.7.2 && < 1 87 | if flag(parsec) 88 | build-depends: parsec >= 3.1 && < 3.2 89 | if flag(attoparsec) 90 | build-depends: attoparsec >= 0.12.1.4 && < 0.15 91 | if impl(ghc < 8.0) 92 | build-depends: semigroups >= 0.12 && < 1 93 | 94 | test-suite quickcheck 95 | type: exitcode-stdio-1.0 96 | main-is: QuickCheck.hs 97 | default-language: Haskell2010 98 | build-depends: 99 | base == 4.*, 100 | bytestring, 101 | parsers, 102 | QuickCheck, 103 | quickcheck-instances 104 | ghc-options: -Wall -threaded 105 | hs-source-dirs: tests 106 | 107 | if flag(parsec) 108 | build-depends: parsec >= 3 109 | if flag(attoparsec) 110 | build-depends: attoparsec 111 | -------------------------------------------------------------------------------- /src/Text/Parser/LookAhead.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE UndecidableInstances #-} 3 | {-# LANGUAGE TypeFamilies #-} 4 | 5 | ----------------------------------------------------------------------------- 6 | -- | 7 | -- Module : Text.Parser.LookAhead 8 | -- Copyright : (c) Edward Kmett 2011-2013 9 | -- License : BSD3 10 | -- 11 | -- Maintainer : ekmett@gmail.com 12 | -- Stability : experimental 13 | -- Portability : non-portable 14 | -- 15 | -- Parsers that can 'lookAhead'. 16 | ----------------------------------------------------------------------------- 17 | module Text.Parser.LookAhead 18 | ( 19 | -- * Parsing Combinators 20 | LookAheadParsing(..) 21 | ) where 22 | 23 | import Control.Monad (MonadPlus(..)) 24 | import Control.Monad.Trans.State.Lazy as Lazy 25 | import Control.Monad.Trans.State.Strict as Strict 26 | import Control.Monad.Trans.Writer.Lazy as Lazy 27 | import Control.Monad.Trans.Writer.Strict as Strict 28 | import Control.Monad.Trans.RWS.Lazy as Lazy 29 | import Control.Monad.Trans.RWS.Strict as Strict 30 | import Control.Monad.Trans.Reader 31 | import Control.Monad.Trans.Identity 32 | import qualified Text.ParserCombinators.ReadP as ReadP 33 | import Text.Parser.Combinators 34 | 35 | #ifdef MIN_VERSION_parsec 36 | import qualified Text.Parsec as Parsec 37 | #endif 38 | 39 | #ifdef MIN_VERSION_attoparsec 40 | import qualified Data.Attoparsec.Types as Att 41 | import qualified Data.Attoparsec.Combinator as Att 42 | #endif 43 | 44 | #ifdef MIN_VERSION_binary 45 | import qualified Data.Binary.Get as B 46 | #endif 47 | 48 | -- | Additional functionality needed to describe parsers independent of input type. 49 | class Parsing m => LookAheadParsing m where 50 | -- | @lookAhead p@ parses @p@ without consuming any input. 51 | lookAhead :: m a -> m a 52 | 53 | instance (LookAheadParsing m, MonadPlus m) => LookAheadParsing (Lazy.StateT s m) where 54 | lookAhead (Lazy.StateT m) = Lazy.StateT $ lookAhead . m 55 | {-# INLINE lookAhead #-} 56 | 57 | instance (LookAheadParsing m, MonadPlus m) => LookAheadParsing (Strict.StateT s m) where 58 | lookAhead (Strict.StateT m) = Strict.StateT $ lookAhead . m 59 | {-# INLINE lookAhead #-} 60 | 61 | instance (LookAheadParsing m, MonadPlus m) => LookAheadParsing (ReaderT e m) where 62 | lookAhead (ReaderT m) = ReaderT $ lookAhead . m 63 | {-# INLINE lookAhead #-} 64 | 65 | instance (LookAheadParsing m, MonadPlus m, Monoid w) => LookAheadParsing (Strict.WriterT w m) where 66 | lookAhead (Strict.WriterT m) = Strict.WriterT $ lookAhead m 67 | {-# INLINE lookAhead #-} 68 | 69 | instance (LookAheadParsing m, MonadPlus m, Monoid w) => LookAheadParsing (Lazy.WriterT w m) where 70 | lookAhead (Lazy.WriterT m) = Lazy.WriterT $ lookAhead m 71 | {-# INLINE lookAhead #-} 72 | 73 | instance (LookAheadParsing m, MonadPlus m, Monoid w) => LookAheadParsing (Lazy.RWST r w s m) where 74 | lookAhead (Lazy.RWST m) = Lazy.RWST $ \r s -> lookAhead (m r s) 75 | {-# INLINE lookAhead #-} 76 | 77 | instance (LookAheadParsing m, MonadPlus m, Monoid w) => LookAheadParsing (Strict.RWST r w s m) where 78 | lookAhead (Strict.RWST m) = Strict.RWST $ \r s -> lookAhead (m r s) 79 | {-# INLINE lookAhead #-} 80 | 81 | instance (LookAheadParsing m, Monad m) => LookAheadParsing (IdentityT m) where 82 | lookAhead = IdentityT . lookAhead . runIdentityT 83 | {-# INLINE lookAhead #-} 84 | 85 | #ifdef MIN_VERSION_parsec 86 | instance (Parsec.Stream s m t, Show t) => LookAheadParsing (Parsec.ParsecT s u m) where 87 | lookAhead = Parsec.lookAhead 88 | #endif 89 | 90 | #ifdef MIN_VERSION_attoparsec 91 | instance Att.Chunk i => LookAheadParsing (Att.Parser i) where 92 | lookAhead = Att.lookAhead 93 | #endif 94 | 95 | #ifdef MIN_VERSION_binary 96 | instance LookAheadParsing B.Get where 97 | lookAhead = B.lookAhead 98 | #endif 99 | 100 | instance LookAheadParsing ReadP.ReadP where 101 | lookAhead p = ReadP.look >>= \s -> 102 | ReadP.choice $ map (return . fst) $ ReadP.readP_to_S p s 103 | -------------------------------------------------------------------------------- /CHANGELOG.markdown: -------------------------------------------------------------------------------- 1 | 0.12.12 [2024.10.26] 2 | -------------------- 3 | * Support building with `text-2.1.2`. 4 | * Drop support for pre-8.0 versions of GHC. 5 | 6 | 0.12.11 [2022.05.07] 7 | -------------------- 8 | * Allow building with `transformers-0.6.*` and `mtl-2.3.*`. 9 | 10 | 0.12.10 [2019.05.02] 11 | -------------------- 12 | * Make the `parsec` and `attoparsec` dependencies optional with the use of 13 | `Cabal` flags of the same names. 14 | 15 | 0.12.9 [2018.07.04] 16 | ------------------- 17 | * Add instances for the `Get` type from `binary`. 18 | * Add a `surroundedBy` function, as a shorthand for `between bra ket` when 19 | `bra` and `ket` are the same. 20 | 21 | 0.12.8 22 | ------ 23 | * Remove the `doctest` test suite, as there are no actual doctests anywhere 24 | in `parsers`. 25 | 26 | 0.12.7 27 | ------ 28 | * Add `sepByNonEmpty`, `sepEndByNonEmpty`, and `endByNonEmpty` to 29 | `Text.Parser.Combinators` 30 | * Fix sporadic `QuickCheck` test suite failures 31 | 32 | 0.12.6 33 | ------ 34 | * Add a library dependency in the `doctests` test suite 35 | 36 | 0.12.5 37 | ------ 38 | * Allow building with GHC 8.2 39 | * Add `mtl` instances for `Unspaced`, `Unhighlighted`, and `Unlined` 40 | * Revamp `Setup.hs` to use `cabal-doctest`. This makes it build 41 | with `Cabal-2.0`, and makes the `doctest`s work with `cabal new-build` and 42 | sandboxes. 43 | 44 | 0.12.4 45 | ------ 46 | * Allow `transformers` 0.5 47 | 48 | 0.12.3 49 | ------ 50 | * Build without warnings on GHC 7.10 51 | * Add `LookAheadParsing` instance for `attoparsec` 52 | * Documentation fixes 53 | * Fix out-of-bounds error in numeric escapes 54 | * Depend on `base-orphans` for `Applicative ReadP` on old `base` 55 | 56 | 0.12.2 57 | ------ 58 | * Added parsers for `scientific`, so we can parse decimal places without losing precision. 59 | 60 | 0.12.1 61 | ---- 62 | * Fixed the fixed behavior of `notFollowedBy`, which was showing internal state. This had led to unnecessary constraints on internal state that are now removed. 63 | 64 | 0.12 65 | ------ 66 | * Fixed the behavior of `notFollowedBy`. This necessitated removing the default implementation, and therefore required a major version bump. 67 | 68 | 0.11.0.2 69 | -------- 70 | * Allow `attoparsec` 0.12 71 | 72 | 0.11 73 | ---- 74 | * Mikhail Vorozhtsov refactored `attoparsec` to permit `parsers` instances. Instances added. 75 | 76 | 0.10.3 77 | ------ 78 | * Compatibility with ghc 7.8 roles 79 | 80 | 0.10.2 81 | ------ 82 | * Documentation fixes 83 | 84 | 0.10.1.2 85 | -------- 86 | * Updated to work with `text` 1.0 87 | 88 | 0.10.1.1 89 | -------- 90 | * 0.10.1 accidentally prevented the orphan instances for ReadP from compiling. Fxed. 91 | 92 | 0.10.1 93 | ------ 94 | * Fixed an issue with the expression parser, where it didn't `try` hard enough. 95 | * Added `satisfyRange` 96 | * Fixed a longstanding issue with the char escapes that we inherited from parsec, where ^A and the like were returning 0 not 1. 97 | 98 | 0.10 99 | ---- 100 | * Added proper upper bounds for PVP compliance 101 | * Switched to an applicative expression parser 102 | 103 | 0.9 104 | --- 105 | * `instance MonadTrans Unlined` 106 | 107 | 0.8.3 108 | ----- 109 | * Fixed a _major_ performance regression in Text.Parser.Expression 110 | 111 | 0.8.2 112 | ----- 113 | * Added `scalaCommentStyle`. 114 | 115 | 0.8.1 116 | ----- 117 | * Text.Parser.Token.* is now Trustworthy 118 | 119 | 0.8 120 | --- 121 | * Removed the need for `textLiteral`, `textLiteral'` and `identText` by using `fromString`. Use `stringLiteral`, `stringLiteral'`, and `ident` instead respectively. 122 | 123 | 0.7.1 124 | ----- 125 | * Added support for `Text`-based parsing. 126 | 127 | 0.7 128 | --- 129 | * Added `Unlined` to support parsing solely within a line 130 | * Simplified `TokenParsing` instances 131 | 132 | 0.6 133 | --- 134 | * Disallowed nested comments in 'javaCommentStyle' 135 | * More derived instances 136 | 137 | 0.5.2 138 | ----- 139 | * Bugfix in `commaSep1`. 140 | 141 | 0.5.1 142 | ----- 143 | * Taught zeroNumFloat about `0.`. 144 | * Bugfix in `buildExpressionParser`. 145 | 146 | 0.5 147 | --- 148 | * Split out `LookAheadParsing` since it wasn't used by other combinators here and isn't supported by `attoparsec`. 149 | 150 | 0.4.1 151 | ----- 152 | * Added `token` to `TokenParsing`. 153 | 154 | 0.4 155 | ----- 156 | * Updated build system 157 | * Converted various style accessors to lenses and traversals 158 | * More aggressive inlining 159 | * Added CHANGELOG 160 | -------------------------------------------------------------------------------- /src/Text/Parser/Permutation.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ExistentialQuantification #-} 2 | {-# LANGUAGE ScopedTypeVariables #-} 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.Parser.Permutation 6 | -- Copyright : (c) Edward Kmett 2011-2012 7 | -- (c) Paolo Martini 2007 8 | -- (c) Daan Leijen 1999-2001 9 | -- License : BSD-style 10 | -- 11 | -- Maintainer : ekmett@gmail.com 12 | -- Stability : provisional 13 | -- Portability : non-portable 14 | -- 15 | -- This module implements permutation parsers. The algorithm is described in: 16 | -- 17 | -- /Parsing Permutation Phrases,/ 18 | -- by Arthur Baars, Andres Loh and Doaitse Swierstra. 19 | -- Published as a functional pearl at the Haskell Workshop 2001. 20 | -- 21 | ----------------------------------------------------------------------------- 22 | module Text.Parser.Permutation 23 | ( Permutation 24 | , permute 25 | , (<||>), (<$$>) 26 | , (<|?>), (<$?>) 27 | ) where 28 | 29 | import Control.Applicative 30 | import qualified Data.Foldable as F (asum) 31 | 32 | infixl 1 <||>, <|?> 33 | infixl 2 <$$>, <$?> 34 | 35 | ---------------------------------------------------------------- 36 | -- Building a permutation parser 37 | ---------------------------------------------------------------- 38 | 39 | -- | The expression @perm \<||> p@ adds parser @p@ to the permutation 40 | -- parser @perm@. The parser @p@ is not allowed to accept empty input - 41 | -- use the optional combinator ('<|?>') instead. Returns a 42 | -- new permutation parser that includes @p@. 43 | 44 | (<||>) :: Functor m => Permutation m (a -> b) -> m a -> Permutation m b 45 | (<||>) = add 46 | {-# INLINE (<||>) #-} 47 | 48 | -- | The expression @f \<$$> p@ creates a fresh permutation parser 49 | -- consisting of parser @p@. The final result of the permutation 50 | -- parser is the function @f@ applied to the return value of @p@. The 51 | -- parser @p@ is not allowed to accept empty input - use the optional 52 | -- combinator ('<$?>') instead. 53 | -- 54 | -- If the function @f@ takes more than one parameter, the type variable 55 | -- @b@ is instantiated to a functional type which combines nicely with 56 | -- the adds parser @p@ to the ('<||>') combinator. This 57 | -- results in stylized code where a permutation parser starts with a 58 | -- combining function @f@ followed by the parsers. The function @f@ 59 | -- gets its parameters in the order in which the parsers are specified, 60 | -- but actual input can be in any order. 61 | 62 | (<$$>) :: Functor m => (a -> b) -> m a -> Permutation m b 63 | (<$$>) f p = newPermutation f <||> p 64 | {-# INLINE (<$$>) #-} 65 | 66 | -- | The expression @perm \<|?> (x,p)@ adds parser @p@ to the 67 | -- permutation parser @perm@. The parser @p@ is optional - if it can 68 | -- not be applied, the default value @x@ will be used instead. Returns 69 | -- a new permutation parser that includes the optional parser @p@. 70 | 71 | (<|?>) :: Functor m => Permutation m (a -> b) -> (a, m a) -> Permutation m b 72 | (<|?>) perm (x,p) = addOpt perm x p 73 | {-# INLINE (<|?>) #-} 74 | 75 | -- | The expression @f \<$?> (x,p)@ creates a fresh permutation parser 76 | -- consisting of parser @p@. The final result of the permutation 77 | -- parser is the function @f@ applied to the return value of @p@. The 78 | -- parser @p@ is optional - if it can not be applied, the default value 79 | -- @x@ will be used instead. 80 | 81 | (<$?>) :: Functor m => (a -> b) -> (a, m a) -> Permutation m b 82 | (<$?>) f (x,p) = newPermutation f <|?> (x,p) 83 | {-# INLINE (<$?>) #-} 84 | 85 | ---------------------------------------------------------------- 86 | -- The permutation tree 87 | ---------------------------------------------------------------- 88 | 89 | -- | The type @Permutation m a@ denotes a permutation parser that, 90 | -- when converted by the 'permute' function, parses 91 | -- using the base parsing monad @m@ and returns a value of 92 | -- type @a@ on success. 93 | -- 94 | -- Normally, a permutation parser is first build with special operators 95 | -- like ('<||>') and than transformed into a normal parser 96 | -- using 'permute'. 97 | 98 | data Permutation m a = Permutation (Maybe a) [Branch m a] 99 | 100 | instance Functor m => Functor (Permutation m) where 101 | fmap f (Permutation x xs) = Permutation (fmap f x) (fmap f <$> xs) 102 | 103 | data Branch m a = forall b. Branch (Permutation m (b -> a)) (m b) 104 | 105 | instance Functor m => Functor (Branch m) where 106 | fmap f (Branch perm p) = Branch (fmap (f.) perm) p 107 | 108 | -- | The parser @permute perm@ parses a permutation of parser described 109 | -- by @perm@. For example, suppose we want to parse a permutation of: 110 | -- an optional string of @a@'s, the character @b@ and an optional @c@. 111 | -- This can be described by: 112 | -- 113 | -- > test = permute (tuple <$?> ("",some (char 'a')) 114 | -- > <||> char 'b' 115 | -- > <|?> ('_',char 'c')) 116 | -- > where 117 | -- > tuple a b c = (a,b,c) 118 | 119 | -- transform a permutation tree into a normal parser 120 | permute :: forall m a. Alternative m => Permutation m a -> m a 121 | permute (Permutation def xs) 122 | = F.asum (map branch xs ++ e) 123 | where 124 | e :: [m a] 125 | e = maybe [] (pure . pure) def 126 | branch (Branch perm p) = flip id <$> p <*> permute perm 127 | 128 | -- build permutation trees 129 | newPermutation :: (a -> b) -> Permutation m (a -> b) 130 | newPermutation f = Permutation (Just f) [] 131 | {-# INLINE newPermutation #-} 132 | 133 | add :: Functor m => Permutation m (a -> b) -> m a -> Permutation m b 134 | add perm@(Permutation _mf fs) p 135 | = Permutation Nothing (first:map insert fs) 136 | where 137 | first = Branch perm p 138 | insert (Branch perm' p') 139 | = Branch (add (fmap flip perm') p) p' 140 | 141 | addOpt :: Functor m => Permutation m (a -> b) -> a -> m a -> Permutation m b 142 | addOpt perm@(Permutation mf fs) x p 143 | = Permutation (fmap ($ x) mf) (first:map insert fs) 144 | where 145 | first = Branch perm p 146 | insert (Branch perm' p') = Branch (addOpt (fmap flip perm') x p) p' 147 | -------------------------------------------------------------------------------- /src/Text/Parser/Expression.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | {-# LANGUAGE ScopedTypeVariables #-} 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Text.Parser.Expression 6 | -- Copyright : (c) Edward Kmett 2011-2012 7 | -- (c) Paolo Martini 2007 8 | -- (c) Daan Leijen 1999-2001, 9 | -- License : BSD-style (see the LICENSE file) 10 | -- 11 | -- Maintainer : ekmett@gmail.com 12 | -- Stability : experimental 13 | -- Portability : non-portable 14 | -- 15 | -- A helper module to parse \"expressions\". 16 | -- Builds a parser given a table of operators and associativities. 17 | -- 18 | ----------------------------------------------------------------------------- 19 | 20 | module Text.Parser.Expression 21 | ( Assoc(..), Operator(..), OperatorTable 22 | , buildExpressionParser 23 | ) where 24 | 25 | import Control.Applicative 26 | import Text.Parser.Combinators 27 | import Data.Data hiding (Infix, Prefix) 28 | import Data.Ix 29 | 30 | ----------------------------------------------------------- 31 | -- Assoc and OperatorTable 32 | ----------------------------------------------------------- 33 | 34 | -- | This data type specifies the associativity of operators: left, right 35 | -- or none. 36 | 37 | data Assoc 38 | = AssocNone 39 | | AssocLeft 40 | | AssocRight 41 | deriving (Eq,Ord,Show,Read,Ix,Enum,Bounded,Data) 42 | 43 | -- | This data type specifies operators that work on values of type @a@. 44 | -- An operator is either binary infix or unary prefix or postfix. A 45 | -- binary operator has also an associated associativity. 46 | 47 | data Operator m a 48 | = Infix (m (a -> a -> a)) Assoc 49 | | Prefix (m (a -> a)) 50 | | Postfix (m (a -> a)) 51 | 52 | -- | An @OperatorTable m a@ is a list of @Operator m a@ 53 | -- lists. The list is ordered in descending 54 | -- precedence. All operators in one list have the same precedence (but 55 | -- may have a different associativity). 56 | 57 | type OperatorTable m a = [[Operator m a]] 58 | 59 | ----------------------------------------------------------- 60 | -- Convert an OperatorTable and basic term parser into 61 | -- a full fledged expression parser 62 | ----------------------------------------------------------- 63 | 64 | -- | @buildExpressionParser table term@ builds an expression parser for 65 | -- terms @term@ with operators from @table@, taking the associativity 66 | -- and precedence specified in @table@ into account. Prefix and postfix 67 | -- operators of the same precedence can only occur once (i.e. @--2@ is 68 | -- not allowed if @-@ is prefix negate). Prefix and postfix operators 69 | -- of the same precedence associate to the left (i.e. if @++@ is 70 | -- postfix increment, than @-2++@ equals @-1@, not @-3@). 71 | -- 72 | -- The @buildExpressionParser@ takes care of all the complexity 73 | -- involved in building expression parser. Here is an example of an 74 | -- expression parser that handles prefix signs, postfix increment and 75 | -- basic arithmetic. 76 | -- 77 | -- > import Control.Applicative ((<|>)) 78 | -- > import Text.Parser.Combinators (()) 79 | -- > import Text.Parser.Expression 80 | -- > import Text.Parser.Token (TokenParsing, natural, parens, reserve) 81 | -- > import Text.Parser.Token.Style (emptyOps) 82 | -- > 83 | -- > expr :: (Monad m, TokenParsing m) => m Integer 84 | -- > expr = buildExpressionParser table term 85 | -- > "expression" 86 | -- > 87 | -- > term :: (Monad m, TokenParsing m) => m Integer 88 | -- > term = parens expr 89 | -- > <|> natural 90 | -- > "simple expression" 91 | -- > 92 | -- > table :: (Monad m, TokenParsing m) => [[Operator m Integer]] 93 | -- > table = [ [prefix "-" negate, prefix "+" id ] 94 | -- > , [postfix "++" (+1)] 95 | -- > , [binary "*" (*) AssocLeft, binary "/" (div) AssocLeft ] 96 | -- > , [binary "+" (+) AssocLeft, binary "-" (-) AssocLeft ] 97 | -- > ] 98 | -- > 99 | -- > binary name fun assoc = Infix (fun <$ reservedOp name) assoc 100 | -- > prefix name fun = Prefix (fun <$ reservedOp name) 101 | -- > postfix name fun = Postfix (fun <$ reservedOp name) 102 | -- > 103 | -- > reservedOp name = reserve emptyOps name 104 | 105 | buildExpressionParser :: forall m a. (Parsing m, Applicative m) 106 | => OperatorTable m a 107 | -> m a 108 | -> m a 109 | buildExpressionParser operators simpleExpr 110 | = foldl makeParser simpleExpr operators 111 | where 112 | makeParser term ops 113 | = let rassoc, lassoc, nassoc :: [m (a -> a -> a)] 114 | prefix, postfix :: [m (a -> a)] 115 | (rassoc,lassoc,nassoc,prefix,postfix) = foldr splitOp ([],[],[],[],[]) ops 116 | 117 | rassocOp, lassocOp, nassocOp :: m (a -> a -> a) 118 | rassocOp = choice rassoc 119 | lassocOp = choice lassoc 120 | nassocOp = choice nassoc 121 | 122 | prefixOp, postfixOp :: m (a -> a) 123 | prefixOp = choice prefix "" 124 | postfixOp = choice postfix "" 125 | 126 | ambiguous :: String -> m x -> m y 127 | ambiguous assoc op = try $ op *> empty ("ambiguous use of a " ++ assoc ++ "-associative operator") 128 | 129 | ambiguousRight, ambiguousLeft, ambiguousNon :: m y 130 | ambiguousRight = ambiguous "right" rassocOp 131 | ambiguousLeft = ambiguous "left" lassocOp 132 | ambiguousNon = ambiguous "non" nassocOp 133 | 134 | termP :: m a 135 | termP = (prefixP <*> term) <**> postfixP 136 | 137 | postfixP :: m (a -> a) 138 | postfixP = postfixOp <|> pure id 139 | 140 | prefixP :: m (a -> a) 141 | prefixP = prefixOp <|> pure id 142 | 143 | rassocP, rassocP1, lassocP, lassocP1, nassocP :: m (a -> a) 144 | 145 | rassocP = (flip <$> rassocOp <*> (termP <**> rassocP1) 146 | <|> ambiguousLeft 147 | <|> ambiguousNon) 148 | 149 | rassocP1 = rassocP <|> pure id 150 | 151 | lassocP = ((flip <$> lassocOp <*> termP) <**> ((.) <$> lassocP1) 152 | <|> ambiguousRight 153 | <|> ambiguousNon) 154 | 155 | lassocP1 = lassocP <|> pure id 156 | 157 | nassocP = (flip <$> nassocOp <*> termP) 158 | <**> (ambiguousRight 159 | <|> ambiguousLeft 160 | <|> ambiguousNon 161 | <|> pure id) 162 | in termP <**> (rassocP <|> lassocP <|> nassocP <|> pure id) "operator" 163 | 164 | 165 | splitOp (Infix op assoc) (rassoc,lassoc,nassoc,prefix,postfix) 166 | = case assoc of 167 | AssocNone -> (rassoc,lassoc,op:nassoc,prefix,postfix) 168 | AssocLeft -> (rassoc,op:lassoc,nassoc,prefix,postfix) 169 | AssocRight -> (op:rassoc,lassoc,nassoc,prefix,postfix) 170 | 171 | splitOp (Prefix op) (rassoc,lassoc,nassoc,prefix,postfix) 172 | = (rassoc,lassoc,nassoc,op:prefix,postfix) 173 | 174 | splitOp (Postfix op) (rassoc,lassoc,nassoc,prefix,postfix) 175 | = (rassoc,lassoc,nassoc,prefix,op:postfix) 176 | -------------------------------------------------------------------------------- /src/Text/Parser/Token/Style.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | {-# LANGUAGE ScopedTypeVariables #-} 3 | {-# LANGUAGE Trustworthy #-} 4 | ----------------------------------------------------------------------------- 5 | -- | 6 | -- Module : Text.Parser.Token.Style 7 | -- Copyright : (c) Edward Kmett 2011-2012 8 | -- License : BSD3 9 | -- 10 | -- Maintainer : ekmett@gmail.com 11 | -- Stability : provisional 12 | -- Portability : non-portable 13 | -- 14 | -- A toolbox for specifying comment and identifier styles 15 | -- 16 | -- This must be imported directly as it is not re-exported elsewhere 17 | -- 18 | ----------------------------------------------------------------------------- 19 | module Text.Parser.Token.Style 20 | ( 21 | -- * Comment and white space styles 22 | CommentStyle(..) 23 | -- ** Lenses 24 | , commentStart 25 | , commentEnd 26 | , commentLine 27 | , commentNesting 28 | -- ** Common Comment Styles 29 | , emptyCommentStyle 30 | , javaCommentStyle 31 | , scalaCommentStyle 32 | , haskellCommentStyle 33 | , buildSomeSpaceParser 34 | -- * Identifier Styles 35 | , emptyIdents, haskellIdents, haskell98Idents 36 | -- * Operator Styles 37 | , emptyOps, haskellOps, haskell98Ops 38 | ) where 39 | 40 | import Control.Applicative 41 | import Control.Monad (void) 42 | import qualified Data.HashSet as HashSet 43 | import Data.HashSet (HashSet) 44 | import Data.Data 45 | import Text.Parser.Combinators 46 | import Text.Parser.Char 47 | import Text.Parser.Token 48 | import Text.Parser.Token.Highlight 49 | import Data.List (nub) 50 | 51 | -- | How to deal with comments. 52 | data CommentStyle = CommentStyle 53 | { _commentStart :: String -- ^ String that starts a multiline comment 54 | , _commentEnd :: String -- ^ String that ends a multiline comment 55 | , _commentLine :: String -- ^ String that starts a single line comment 56 | , _commentNesting :: Bool -- ^ Can we nest multiline comments? 57 | } deriving (Eq,Ord,Show,Read,Data) 58 | 59 | -- | This is a lens that can edit the string that starts a multiline comment. 60 | -- 61 | -- @'commentStart' :: Lens' 'CommentStyle' 'String'@ 62 | commentStart :: Functor f => (String -> f String) -> CommentStyle -> f CommentStyle 63 | commentStart f (CommentStyle s e l n) = (\s' -> CommentStyle s' e l n) <$> f s 64 | {-# INLINE commentStart #-} 65 | 66 | -- | This is a lens that can edit the string that ends a multiline comment. 67 | -- 68 | -- @'commentEnd' :: Lens' 'CommentStyle' 'String'@ 69 | commentEnd :: Functor f => (String -> f String) -> CommentStyle -> f CommentStyle 70 | commentEnd f (CommentStyle s e l n) = (\e' -> CommentStyle s e' l n) <$> f e 71 | {-# INLINE commentEnd #-} 72 | 73 | -- | This is a lens that can edit the string that starts a single line comment. 74 | -- 75 | -- @'commentLine' :: Lens' 'CommentStyle' 'String'@ 76 | commentLine :: Functor f => (String -> f String) -> CommentStyle -> f CommentStyle 77 | commentLine f (CommentStyle s e l n) = (\l' -> CommentStyle s e l' n) <$> f l 78 | {-# INLINE commentLine #-} 79 | 80 | -- | This is a lens that can edit whether we can nest multiline comments. 81 | -- 82 | -- @'commentNesting' :: Lens' 'CommentStyle' 'Bool'@ 83 | commentNesting :: Functor f => (Bool -> f Bool) -> CommentStyle -> f CommentStyle 84 | commentNesting f (CommentStyle s e l n) = CommentStyle s e l <$> f n 85 | {-# INLINE commentNesting #-} 86 | 87 | -- | No comments at all 88 | emptyCommentStyle :: CommentStyle 89 | emptyCommentStyle = CommentStyle "" "" "" True 90 | 91 | -- | Use java-style comments 92 | javaCommentStyle :: CommentStyle 93 | javaCommentStyle = CommentStyle "/*" "*/" "//" False 94 | 95 | -- | Use scala-style comments 96 | scalaCommentStyle :: CommentStyle 97 | scalaCommentStyle = CommentStyle "/*" "*/" "//" True 98 | 99 | -- | Use haskell-style comments 100 | haskellCommentStyle :: CommentStyle 101 | haskellCommentStyle = CommentStyle "{-" "-}" "--" True 102 | 103 | -- | Use this to easily build the definition of whiteSpace for your MonadParser 104 | -- given a comment style and an underlying someWhiteSpace parser 105 | buildSomeSpaceParser :: forall m. CharParsing m => m () -> CommentStyle -> m () 106 | buildSomeSpaceParser simpleSpace (CommentStyle startStyle endStyle lineStyle nestingStyle) 107 | | noLine && noMulti = skipSome (simpleSpace "") 108 | | noLine = skipSome (simpleSpace <|> multiLineComment "") 109 | | noMulti = skipSome (simpleSpace <|> oneLineComment "") 110 | | otherwise = skipSome (simpleSpace <|> oneLineComment <|> multiLineComment "") 111 | where 112 | noLine = null lineStyle 113 | noMulti = null startStyle 114 | 115 | oneLineComment, multiLineComment, inComment, inCommentMulti :: m () 116 | oneLineComment = try (string lineStyle) *> skipMany (satisfy (/= '\n')) 117 | multiLineComment = try (string startStyle) *> inComment 118 | inComment = if nestingStyle then inCommentMulti else inCommentSingle 119 | inCommentMulti 120 | = void (try (string endStyle)) 121 | <|> multiLineComment *> inCommentMulti 122 | <|> skipSome (noneOf startEnd) *> inCommentMulti 123 | <|> oneOf startEnd *> inCommentMulti 124 | "end of comment" 125 | 126 | startEnd = nub (endStyle ++ startStyle) 127 | 128 | inCommentSingle :: m () 129 | inCommentSingle 130 | = void (try (string endStyle)) 131 | <|> skipSome (noneOf startEnd) *> inCommentSingle 132 | <|> oneOf startEnd *> inCommentSingle 133 | "end of comment" 134 | 135 | set :: [String] -> HashSet String 136 | set = HashSet.fromList 137 | 138 | -- | A simple operator style based on haskell with no reserved operators 139 | emptyOps :: TokenParsing m => IdentifierStyle m 140 | emptyOps = IdentifierStyle 141 | { _styleName = "operator" 142 | , _styleStart = _styleLetter emptyOps 143 | , _styleLetter = oneOf ":!#$%&*+./<=>?@\\^|-~" 144 | , _styleReserved = mempty 145 | , _styleHighlight = Operator 146 | , _styleReservedHighlight = ReservedOperator 147 | } 148 | -- | A simple operator style based on haskell with the operators from Haskell 98. 149 | haskell98Ops, haskellOps :: TokenParsing m => IdentifierStyle m 150 | haskell98Ops = emptyOps 151 | { _styleReserved = set ["::","..","=","\\","|","<-","->","@","~","=>"] 152 | } 153 | haskellOps = haskell98Ops 154 | 155 | -- | A simple identifier style based on haskell with no reserve words 156 | emptyIdents :: TokenParsing m => IdentifierStyle m 157 | emptyIdents = IdentifierStyle 158 | { _styleName = "identifier" 159 | , _styleStart = letter <|> char '_' 160 | , _styleLetter = alphaNum <|> oneOf "_'" 161 | , _styleReserved = set [] 162 | , _styleHighlight = Identifier 163 | , _styleReservedHighlight = ReservedIdentifier 164 | } 165 | 166 | -- | A simple identifier style based on haskell with only the reserved words from Haskell 98. 167 | haskell98Idents :: TokenParsing m => IdentifierStyle m 168 | haskell98Idents = emptyIdents 169 | { _styleReserved = set haskell98ReservedIdents } 170 | 171 | -- | A simple identifier style based on haskell with the reserved words from Haskell 98 and some common extensions. 172 | haskellIdents :: TokenParsing m => IdentifierStyle m 173 | haskellIdents = haskell98Idents 174 | { _styleLetter = _styleLetter haskell98Idents <|> char '#' 175 | , _styleReserved = set $ haskell98ReservedIdents ++ 176 | ["foreign","import","export","primitive","_ccall_","_casm_" ,"forall"] 177 | } 178 | 179 | haskell98ReservedIdents :: [String] 180 | haskell98ReservedIdents = 181 | ["let","in","case","of","if","then","else","data","type" 182 | ,"class","default","deriving","do","import","infix" 183 | ,"infixl","infixr","instance","module","newtype" 184 | ,"where","primitive" -- "as","qualified","hiding" 185 | ] 186 | -------------------------------------------------------------------------------- /.github/workflows/haskell-ci.yml: -------------------------------------------------------------------------------- 1 | # This GitHub workflow config has been generated by a script via 2 | # 3 | # haskell-ci 'github' 'cabal.project' 4 | # 5 | # To regenerate the script (for example after adjusting tested-with) run 6 | # 7 | # haskell-ci regenerate 8 | # 9 | # For more information, see https://github.com/haskell-CI/haskell-ci 10 | # 11 | # version: 0.19.20250216 12 | # 13 | # REGENDATA ("0.19.20250216",["github","cabal.project"]) 14 | # 15 | name: Haskell-CI 16 | on: 17 | - push 18 | - pull_request 19 | jobs: 20 | linux: 21 | name: Haskell-CI - Linux - ${{ matrix.compiler }} 22 | runs-on: ubuntu-24.04 23 | timeout-minutes: 24 | 60 25 | container: 26 | image: buildpack-deps:jammy 27 | continue-on-error: ${{ matrix.allow-failure }} 28 | strategy: 29 | matrix: 30 | include: 31 | - compiler: ghc-9.12.1 32 | compilerKind: ghc 33 | compilerVersion: 9.12.1 34 | setup-method: ghcup 35 | allow-failure: false 36 | - compiler: ghc-9.10.1 37 | compilerKind: ghc 38 | compilerVersion: 9.10.1 39 | setup-method: ghcup 40 | allow-failure: false 41 | - compiler: ghc-9.8.4 42 | compilerKind: ghc 43 | compilerVersion: 9.8.4 44 | setup-method: ghcup 45 | allow-failure: false 46 | - compiler: ghc-9.6.6 47 | compilerKind: ghc 48 | compilerVersion: 9.6.6 49 | setup-method: ghcup 50 | allow-failure: false 51 | - compiler: ghc-9.4.8 52 | compilerKind: ghc 53 | compilerVersion: 9.4.8 54 | setup-method: ghcup 55 | allow-failure: false 56 | - compiler: ghc-9.2.8 57 | compilerKind: ghc 58 | compilerVersion: 9.2.8 59 | setup-method: ghcup 60 | allow-failure: false 61 | - compiler: ghc-9.0.2 62 | compilerKind: ghc 63 | compilerVersion: 9.0.2 64 | setup-method: ghcup 65 | allow-failure: false 66 | - compiler: ghc-8.10.7 67 | compilerKind: ghc 68 | compilerVersion: 8.10.7 69 | setup-method: ghcup 70 | allow-failure: false 71 | - compiler: ghc-8.8.4 72 | compilerKind: ghc 73 | compilerVersion: 8.8.4 74 | setup-method: ghcup 75 | allow-failure: false 76 | - compiler: ghc-8.6.5 77 | compilerKind: ghc 78 | compilerVersion: 8.6.5 79 | setup-method: ghcup 80 | allow-failure: false 81 | - compiler: ghc-8.4.4 82 | compilerKind: ghc 83 | compilerVersion: 8.4.4 84 | setup-method: ghcup 85 | allow-failure: false 86 | - compiler: ghc-8.2.2 87 | compilerKind: ghc 88 | compilerVersion: 8.2.2 89 | setup-method: ghcup 90 | allow-failure: false 91 | - compiler: ghc-8.0.2 92 | compilerKind: ghc 93 | compilerVersion: 8.0.2 94 | setup-method: ghcup 95 | allow-failure: false 96 | fail-fast: false 97 | steps: 98 | - name: apt-get install 99 | run: | 100 | apt-get update 101 | apt-get install -y --no-install-recommends gnupg ca-certificates dirmngr curl git software-properties-common libtinfo5 libnuma-dev 102 | - name: Install GHCup 103 | run: | 104 | mkdir -p "$HOME/.ghcup/bin" 105 | curl -sL https://downloads.haskell.org/ghcup/0.1.30.0/x86_64-linux-ghcup-0.1.30.0 > "$HOME/.ghcup/bin/ghcup" 106 | chmod a+x "$HOME/.ghcup/bin/ghcup" 107 | - name: Install cabal-install 108 | run: | 109 | "$HOME/.ghcup/bin/ghcup" install cabal 3.12.1.0 || (cat "$HOME"/.ghcup/logs/*.* && false) 110 | echo "CABAL=$HOME/.ghcup/bin/cabal-3.12.1.0 -vnormal+nowrap" >> "$GITHUB_ENV" 111 | - name: Install GHC (GHCup) 112 | if: matrix.setup-method == 'ghcup' 113 | run: | 114 | "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false) 115 | HC=$("$HOME/.ghcup/bin/ghcup" whereis ghc "$HCVER") 116 | HCPKG=$(echo "$HC" | sed 's#ghc$#ghc-pkg#') 117 | HADDOCK=$(echo "$HC" | sed 's#ghc$#haddock#') 118 | echo "HC=$HC" >> "$GITHUB_ENV" 119 | echo "HCPKG=$HCPKG" >> "$GITHUB_ENV" 120 | echo "HADDOCK=$HADDOCK" >> "$GITHUB_ENV" 121 | env: 122 | HCKIND: ${{ matrix.compilerKind }} 123 | HCNAME: ${{ matrix.compiler }} 124 | HCVER: ${{ matrix.compilerVersion }} 125 | - name: Set PATH and environment variables 126 | run: | 127 | echo "$HOME/.cabal/bin" >> $GITHUB_PATH 128 | echo "LANG=C.UTF-8" >> "$GITHUB_ENV" 129 | echo "CABAL_DIR=$HOME/.cabal" >> "$GITHUB_ENV" 130 | echo "CABAL_CONFIG=$HOME/.cabal/config" >> "$GITHUB_ENV" 131 | HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\d+)\.(\d+)\.(\d+)(\.(\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))') 132 | echo "HCNUMVER=$HCNUMVER" >> "$GITHUB_ENV" 133 | echo "ARG_TESTS=--enable-tests" >> "$GITHUB_ENV" 134 | echo "ARG_BENCH=--enable-benchmarks" >> "$GITHUB_ENV" 135 | echo "HEADHACKAGE=false" >> "$GITHUB_ENV" 136 | echo "ARG_COMPILER=--$HCKIND --with-compiler=$HC" >> "$GITHUB_ENV" 137 | env: 138 | HCKIND: ${{ matrix.compilerKind }} 139 | HCNAME: ${{ matrix.compiler }} 140 | HCVER: ${{ matrix.compilerVersion }} 141 | - name: env 142 | run: | 143 | env 144 | - name: write cabal config 145 | run: | 146 | mkdir -p $CABAL_DIR 147 | cat >> $CABAL_CONFIG <> $CABAL_CONFIG < cabal-plan.xz 180 | echo 'f62ccb2971567a5f638f2005ad3173dba14693a45154c1508645c52289714cb2 cabal-plan.xz' | sha256sum -c - 181 | xz -d < cabal-plan.xz > $HOME/.cabal/bin/cabal-plan 182 | rm -f cabal-plan.xz 183 | chmod a+x $HOME/.cabal/bin/cabal-plan 184 | cabal-plan --version 185 | - name: checkout 186 | uses: actions/checkout@v4 187 | with: 188 | path: source 189 | - name: initial cabal.project for sdist 190 | run: | 191 | touch cabal.project 192 | echo "packages: $GITHUB_WORKSPACE/source/." >> cabal.project 193 | cat cabal.project 194 | - name: sdist 195 | run: | 196 | mkdir -p sdist 197 | $CABAL sdist all --output-dir $GITHUB_WORKSPACE/sdist 198 | - name: unpack 199 | run: | 200 | mkdir -p unpacked 201 | find sdist -maxdepth 1 -type f -name '*.tar.gz' -exec tar -C $GITHUB_WORKSPACE/unpacked -xzvf {} \; 202 | - name: generate cabal.project 203 | run: | 204 | PKGDIR_parsers="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/parsers-[0-9.]*')" 205 | echo "PKGDIR_parsers=${PKGDIR_parsers}" >> "$GITHUB_ENV" 206 | rm -f cabal.project cabal.project.local 207 | touch cabal.project 208 | touch cabal.project.local 209 | echo "packages: ${PKGDIR_parsers}" >> cabal.project 210 | if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo "package parsers" >> cabal.project ; fi 211 | if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo " ghc-options: -Werror=missing-methods" >> cabal.project ; fi 212 | cat >> cabal.project <> cabal.project.local 215 | cat cabal.project 216 | cat cabal.project.local 217 | - name: dump install plan 218 | run: | 219 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dry-run all 220 | cabal-plan 221 | - name: restore cache 222 | uses: actions/cache/restore@v4 223 | with: 224 | key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }} 225 | path: ~/.cabal/store 226 | restore-keys: ${{ runner.os }}-${{ matrix.compiler }}- 227 | - name: install dependencies 228 | run: | 229 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --dependencies-only -j2 all 230 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dependencies-only -j2 all 231 | - name: build 232 | run: | 233 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always 234 | - name: tests 235 | run: | 236 | $CABAL v2-test $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --test-show-details=direct 237 | - name: cabal check 238 | run: | 239 | cd ${PKGDIR_parsers} || false 240 | ${CABAL} -vnormal check 241 | - name: haddock 242 | run: | 243 | $CABAL v2-haddock --disable-documentation --haddock-all $ARG_COMPILER --with-haddock $HADDOCK $ARG_TESTS $ARG_BENCH all 244 | - name: prepare for constraint sets 245 | run: | 246 | rm -f cabal.project.local 247 | - name: constraint set no-attoparsec 248 | run: | 249 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='parsers -attoparsec' all --dry-run 250 | cabal-plan topo | sort 251 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='parsers -attoparsec' --dependencies-only -j2 all 252 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='parsers -attoparsec' all 253 | - name: constraint set no-parsec 254 | run: | 255 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='parsers -parsec' all --dry-run 256 | cabal-plan topo | sort 257 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='parsers -parsec' --dependencies-only -j2 all 258 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='parsers -parsec' all 259 | - name: constraint set no-binary 260 | run: | 261 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='parsers -binary' all --dry-run 262 | cabal-plan topo | sort 263 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='parsers -binary' --dependencies-only -j2 all 264 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='parsers -binary' all 265 | - name: save cache 266 | if: always() 267 | uses: actions/cache/save@v4 268 | with: 269 | key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }} 270 | path: ~/.cabal/store 271 | -------------------------------------------------------------------------------- /src/Text/Parser/Char.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE DefaultSignatures #-} 3 | {-# LANGUAGE FlexibleContexts #-} 4 | {-# LANGUAGE ScopedTypeVariables #-} 5 | {-# LANGUAGE TypeFamilies #-} 6 | {-# LANGUAGE TypeOperators #-} 7 | {-# OPTIONS_GHC -fspec-constr -fspec-constr-count=8 #-} 8 | ----------------------------------------------------------------------------- 9 | -- | 10 | -- Module : Text.Parser.Char 11 | -- Copyright : (c) Edward Kmett 2011 12 | -- License : BSD3 13 | -- 14 | -- Maintainer : ekmett@gmail.com 15 | -- Stability : experimental 16 | -- Portability : non-portable 17 | -- 18 | -- Parsers for character streams 19 | -- 20 | ----------------------------------------------------------------------------- 21 | module Text.Parser.Char 22 | ( 23 | -- * Combinators 24 | oneOf -- :: CharParsing m => [Char] -> m Char 25 | , noneOf -- :: CharParsing m => [Char] -> m Char 26 | , oneOfSet -- :: CharParsing m => CharSet -> m Char 27 | , noneOfSet -- :: CharParsing m => CharSet -> m Char 28 | , spaces -- :: CharParsing m => m () 29 | , space -- :: CharParsing m => m Char 30 | , newline -- :: CharParsing m => m Char 31 | , tab -- :: CharParsing m => m Char 32 | , upper -- :: CharParsing m => m Char 33 | , lower -- :: CharParsing m => m Char 34 | , alphaNum -- :: CharParsing m => m Char 35 | , letter -- :: CharParsing m => m Char 36 | , digit -- :: CharParsing m => m Char 37 | , hexDigit -- :: CharParsing m => m Char 38 | , octDigit -- :: CharParsing m => m Char 39 | , satisfyRange -- :: CharParsing m => Char -> Char -> m Char 40 | -- * Class 41 | , CharParsing(..) 42 | ) where 43 | 44 | import Control.Monad.Trans.Class 45 | import Control.Monad.Trans.State.Lazy as Lazy 46 | import Control.Monad.Trans.State.Strict as Strict 47 | import Control.Monad.Trans.Writer.Lazy as Lazy 48 | import Control.Monad.Trans.Writer.Strict as Strict 49 | import Control.Monad.Trans.RWS.Lazy as Lazy 50 | import Control.Monad.Trans.RWS.Strict as Strict 51 | import Control.Monad.Trans.Reader 52 | import Control.Monad.Trans.Identity 53 | import Control.Monad (MonadPlus(..)) 54 | import Data.Char 55 | import Data.CharSet (CharSet(..)) 56 | import qualified Data.CharSet as CharSet 57 | import Data.Foldable 58 | import qualified Data.IntSet as IntSet 59 | import qualified Data.Text as Text 60 | import Data.Text (Text) 61 | import qualified Text.ParserCombinators.ReadP as ReadP 62 | import Text.Parser.Combinators 63 | 64 | #ifdef MIN_VERSION_parsec 65 | import qualified Text.Parsec as Parsec 66 | #endif 67 | 68 | #ifdef MIN_VERSION_attoparsec 69 | import qualified Data.Attoparsec.Types as Att 70 | import qualified Data.Attoparsec.Combinator as Att 71 | #endif 72 | 73 | -- | @oneOf cs@ succeeds if the current character is in the supplied 74 | -- list of characters @cs@. Returns the parsed character. See also 75 | -- 'satisfy'. 76 | -- 77 | -- > vowel = oneOf "aeiou" 78 | oneOf :: CharParsing m => [Char] -> m Char 79 | oneOf xs = oneOfSet (CharSet.fromList xs) 80 | {-# INLINE oneOf #-} 81 | {-# ANN oneOf "HLint: ignore Use String" #-} 82 | 83 | -- | As the dual of 'oneOf', @noneOf cs@ succeeds if the current 84 | -- character is /not/ in the supplied list of characters @cs@. Returns the 85 | -- parsed character. 86 | -- 87 | -- > consonant = noneOf "aeiou" 88 | noneOf :: CharParsing m => [Char] -> m Char 89 | noneOf xs = noneOfSet (CharSet.fromList xs) 90 | {-# INLINE noneOf #-} 91 | {-# ANN noneOf "HLint: ignore Use String" #-} 92 | 93 | -- | @oneOfSet cs@ succeeds if the current character is in the supplied 94 | -- set of characters @cs@. Returns the parsed character. See also 95 | -- 'satisfy'. 96 | -- 97 | -- > vowel = oneOf "aeiou" 98 | oneOfSet :: CharParsing m => CharSet -> m Char 99 | oneOfSet (CharSet True _ is) = satisfy (\c -> IntSet.member (fromEnum c) is) 100 | oneOfSet (CharSet False _ is) = satisfy (\c -> not (IntSet.member (fromEnum c) is)) 101 | {-# INLINE oneOfSet #-} 102 | 103 | -- | As the dual of 'oneOf', @noneOf cs@ succeeds if the current 104 | -- character is /not/ in the supplied list of characters @cs@. Returns the 105 | -- parsed character. 106 | -- 107 | -- > consonant = noneOf "aeiou" 108 | noneOfSet :: CharParsing m => CharSet -> m Char 109 | noneOfSet s = oneOfSet (CharSet.complement s) 110 | {-# INLINE noneOfSet #-} 111 | 112 | -- | Skips /zero/ or more white space characters. See also 'skipMany'. 113 | spaces :: CharParsing m => m () 114 | spaces = skipMany space "white space" 115 | {-# INLINE spaces #-} 116 | 117 | -- | Parses a white space character (any character which satisfies 'isSpace') 118 | -- Returns the parsed character. 119 | space :: CharParsing m => m Char 120 | space = satisfy isSpace "space" 121 | {-# INLINE space #-} 122 | 123 | -- | Parses a newline character (\'\\n\'). Returns a newline character. 124 | newline :: CharParsing m => m Char 125 | newline = char '\n' "new-line" 126 | {-# INLINE newline #-} 127 | 128 | -- | Parses a tab character (\'\\t\'). Returns a tab character. 129 | tab :: CharParsing m => m Char 130 | tab = char '\t' "tab" 131 | {-# INLINE tab #-} 132 | 133 | -- | Parses an upper case letter. Returns the parsed character. 134 | upper :: CharParsing m => m Char 135 | upper = satisfy isUpper "uppercase letter" 136 | {-# INLINE upper #-} 137 | 138 | -- | Parses a lower case character. Returns the parsed character. 139 | lower :: CharParsing m => m Char 140 | lower = satisfy isLower "lowercase letter" 141 | {-# INLINE lower #-} 142 | 143 | -- | Parses a letter or digit. Returns the parsed character. 144 | alphaNum :: CharParsing m => m Char 145 | alphaNum = satisfy isAlphaNum "letter or digit" 146 | {-# INLINE alphaNum #-} 147 | 148 | -- | Parses a letter (an upper case or lower case character). Returns the 149 | -- parsed character. 150 | letter :: CharParsing m => m Char 151 | letter = satisfy isAlpha "letter" 152 | {-# INLINE letter #-} 153 | 154 | -- | Parses a digit. Returns the parsed character. 155 | digit :: CharParsing m => m Char 156 | digit = satisfy isDigit "digit" 157 | {-# INLINE digit #-} 158 | 159 | -- | Parses a hexadecimal digit (a digit or a letter between \'a\' and 160 | -- \'f\' or \'A\' and \'F\'). Returns the parsed character. 161 | hexDigit :: CharParsing m => m Char 162 | hexDigit = satisfy isHexDigit "hexadecimal digit" 163 | {-# INLINE hexDigit #-} 164 | 165 | -- | Parses an octal digit (a character between \'0\' and \'7\'). Returns 166 | -- the parsed character. 167 | octDigit :: CharParsing m => m Char 168 | octDigit = satisfy isOctDigit "octal digit" 169 | {-# INLINE octDigit #-} 170 | 171 | satisfyRange :: CharParsing m => Char -> Char -> m Char 172 | satisfyRange a z = satisfy (\c -> c >= a && c <= z) 173 | {-# INLINE satisfyRange #-} 174 | 175 | -- | Additional functionality needed to parse character streams. 176 | class Parsing m => CharParsing m where 177 | -- | Parse a single character of the input, with UTF-8 decoding 178 | satisfy :: (Char -> Bool) -> m Char 179 | default satisfy :: (MonadTrans t, CharParsing n, Monad n, m ~ t n) => 180 | (Char -> Bool) -> 181 | m Char 182 | satisfy = lift . satisfy 183 | 184 | -- | @char c@ parses a single character @c@. Returns the parsed 185 | -- character (i.e. @c@). 186 | -- 187 | -- /e.g./ 188 | -- 189 | -- @semiColon = 'char' ';'@ 190 | char :: Char -> m Char 191 | char c = satisfy (c ==) show [c] 192 | {-# INLINE char #-} 193 | 194 | -- | @notChar c@ parses any single character other than @c@. Returns the parsed 195 | -- character. 196 | notChar :: Char -> m Char 197 | notChar c = satisfy (c /=) 198 | {-# INLINE notChar #-} 199 | 200 | -- | This parser succeeds for any character. Returns the parsed character. 201 | anyChar :: m Char 202 | anyChar = satisfy (const True) 203 | {-# INLINE anyChar #-} 204 | 205 | -- | @string s@ parses a sequence of characters given by @s@. Returns 206 | -- the parsed string (i.e. @s@). 207 | -- 208 | -- > divOrMod = string "div" 209 | -- > <|> string "mod" 210 | string :: String -> m String 211 | string s = s <$ try (traverse_ char s) show s 212 | {-# INLINE string #-} 213 | 214 | -- | @text t@ parses a sequence of characters determined by the text @t@ Returns 215 | -- the parsed text fragment (i.e. @t@). 216 | -- 217 | -- Using @OverloadedStrings@: 218 | -- 219 | -- > divOrMod = text "div" 220 | -- > <|> text "mod" 221 | text :: Text -> m Text 222 | text t = t <$ string (Text.unpack t) 223 | {-# INLINE text #-} 224 | 225 | instance (CharParsing m, MonadPlus m) => CharParsing (Lazy.StateT s m) where 226 | satisfy = lift . satisfy 227 | {-# INLINE satisfy #-} 228 | char = lift . char 229 | {-# INLINE char #-} 230 | notChar = lift . notChar 231 | {-# INLINE notChar #-} 232 | anyChar = lift anyChar 233 | {-# INLINE anyChar #-} 234 | string = lift . string 235 | {-# INLINE string #-} 236 | text = lift . text 237 | {-# INLINE text #-} 238 | 239 | instance (CharParsing m, MonadPlus m) => CharParsing (Strict.StateT s m) where 240 | satisfy = lift . satisfy 241 | {-# INLINE satisfy #-} 242 | char = lift . char 243 | {-# INLINE char #-} 244 | notChar = lift . notChar 245 | {-# INLINE notChar #-} 246 | anyChar = lift anyChar 247 | {-# INLINE anyChar #-} 248 | string = lift . string 249 | {-# INLINE string #-} 250 | text = lift . text 251 | {-# INLINE text #-} 252 | 253 | instance (CharParsing m, MonadPlus m) => CharParsing (ReaderT e m) where 254 | satisfy = lift . satisfy 255 | {-# INLINE satisfy #-} 256 | char = lift . char 257 | {-# INLINE char #-} 258 | notChar = lift . notChar 259 | {-# INLINE notChar #-} 260 | anyChar = lift anyChar 261 | {-# INLINE anyChar #-} 262 | string = lift . string 263 | {-# INLINE string #-} 264 | text = lift . text 265 | {-# INLINE text #-} 266 | 267 | instance (CharParsing m, MonadPlus m, Monoid w) => CharParsing (Strict.WriterT w m) where 268 | satisfy = lift . satisfy 269 | {-# INLINE satisfy #-} 270 | char = lift . char 271 | {-# INLINE char #-} 272 | notChar = lift . notChar 273 | {-# INLINE notChar #-} 274 | anyChar = lift anyChar 275 | {-# INLINE anyChar #-} 276 | string = lift . string 277 | {-# INLINE string #-} 278 | text = lift . text 279 | {-# INLINE text #-} 280 | 281 | instance (CharParsing m, MonadPlus m, Monoid w) => CharParsing (Lazy.WriterT w m) where 282 | satisfy = lift . satisfy 283 | {-# INLINE satisfy #-} 284 | char = lift . char 285 | {-# INLINE char #-} 286 | notChar = lift . notChar 287 | {-# INLINE notChar #-} 288 | anyChar = lift anyChar 289 | {-# INLINE anyChar #-} 290 | string = lift . string 291 | {-# INLINE string #-} 292 | text = lift . text 293 | {-# INLINE text #-} 294 | 295 | instance (CharParsing m, MonadPlus m, Monoid w) => CharParsing (Lazy.RWST r w s m) where 296 | satisfy = lift . satisfy 297 | {-# INLINE satisfy #-} 298 | char = lift . char 299 | {-# INLINE char #-} 300 | notChar = lift . notChar 301 | {-# INLINE notChar #-} 302 | anyChar = lift anyChar 303 | {-# INLINE anyChar #-} 304 | string = lift . string 305 | {-# INLINE string #-} 306 | text = lift . text 307 | {-# INLINE text #-} 308 | 309 | instance (CharParsing m, MonadPlus m, Monoid w) => CharParsing (Strict.RWST r w s m) where 310 | satisfy = lift . satisfy 311 | {-# INLINE satisfy #-} 312 | char = lift . char 313 | {-# INLINE char #-} 314 | notChar = lift . notChar 315 | {-# INLINE notChar #-} 316 | anyChar = lift anyChar 317 | {-# INLINE anyChar #-} 318 | string = lift . string 319 | {-# INLINE string #-} 320 | text = lift . text 321 | {-# INLINE text #-} 322 | 323 | instance (CharParsing m, MonadPlus m) => CharParsing (IdentityT m) where 324 | satisfy = lift . satisfy 325 | {-# INLINE satisfy #-} 326 | char = lift . char 327 | {-# INLINE char #-} 328 | notChar = lift . notChar 329 | {-# INLINE notChar #-} 330 | anyChar = lift anyChar 331 | {-# INLINE anyChar #-} 332 | string = lift . string 333 | {-# INLINE string #-} 334 | text = lift . text 335 | {-# INLINE text #-} 336 | 337 | #ifdef MIN_VERSION_parsec 338 | instance Parsec.Stream s m Char => CharParsing (Parsec.ParsecT s u m) where 339 | satisfy = Parsec.satisfy 340 | char = Parsec.char 341 | notChar c = Parsec.satisfy (/= c) 342 | anyChar = Parsec.anyChar 343 | string = Parsec.string 344 | #endif 345 | 346 | #ifdef MIN_VERSION_attoparsec 347 | instance Att.Chunk t => CharParsing (Att.Parser t) where 348 | satisfy p = fmap e2c $ Att.satisfyElem $ p . e2c 349 | where e2c = Att.chunkElemToChar (undefined :: t) 350 | {-# INLINE satisfy #-} 351 | #endif 352 | 353 | instance CharParsing ReadP.ReadP where 354 | satisfy = ReadP.satisfy 355 | char = ReadP.char 356 | notChar c = ReadP.satisfy (/= c) 357 | anyChar = ReadP.get 358 | string = ReadP.string 359 | -------------------------------------------------------------------------------- /src/Text/Parser/Combinators.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE DefaultSignatures #-} 3 | {-# LANGUAGE TypeFamilies #-} 4 | {-# LANGUAGE TypeOperators #-} 5 | {-# LANGUAGE UndecidableInstances #-} 6 | 7 | ----------------------------------------------------------------------------- 8 | -- | 9 | -- Module : Text.Parser.Combinators 10 | -- Copyright : (c) Edward Kmett 2011-2012 11 | -- License : BSD3 12 | -- 13 | -- Maintainer : ekmett@gmail.com 14 | -- Stability : experimental 15 | -- Portability : non-portable 16 | -- 17 | -- Alternative parser combinators 18 | -- 19 | ----------------------------------------------------------------------------- 20 | module Text.Parser.Combinators 21 | ( 22 | -- * Parsing Combinators 23 | choice 24 | , option 25 | , optional -- from Control.Applicative, parsec optionMaybe 26 | , skipOptional -- parsec optional 27 | , between 28 | , surroundedBy 29 | , some -- from Control.Applicative, parsec many1 30 | , many -- from Control.Applicative 31 | , sepBy 32 | , sepBy1 33 | , sepByNonEmpty 34 | , sepEndBy1 35 | , sepEndByNonEmpty 36 | , sepEndBy 37 | , endBy1 38 | , endByNonEmpty 39 | , endBy 40 | , count 41 | , chainl 42 | , chainr 43 | , chainl1 44 | , chainr1 45 | , manyTill 46 | -- * Parsing Class 47 | , Parsing(..) 48 | ) where 49 | 50 | import Control.Applicative 51 | import Control.Monad (MonadPlus(..), replicateM, void) 52 | import Control.Monad.Trans.Class 53 | import Control.Monad.Trans.State.Lazy as Lazy 54 | import Control.Monad.Trans.State.Strict as Strict 55 | import Control.Monad.Trans.Writer.Lazy as Lazy 56 | import Control.Monad.Trans.Writer.Strict as Strict 57 | import Control.Monad.Trans.RWS.Lazy as Lazy 58 | import Control.Monad.Trans.RWS.Strict as Strict 59 | import Control.Monad.Trans.Reader 60 | import Control.Monad.Trans.Identity 61 | import qualified Data.Foldable as F 62 | import qualified Data.List.NonEmpty as NonEmpty 63 | import Data.List.NonEmpty (NonEmpty(..)) 64 | 65 | #ifdef MIN_VERSION_parsec 66 | import qualified Text.Parsec as Parsec 67 | #endif 68 | 69 | #ifdef MIN_VERSION_attoparsec 70 | import qualified Data.Attoparsec.Types as Att 71 | import qualified Data.Attoparsec.Combinator as Att 72 | #endif 73 | 74 | import qualified Text.ParserCombinators.ReadP as ReadP 75 | 76 | #ifdef MIN_VERSION_binary 77 | import Control.Monad (when, unless) 78 | import qualified Data.Binary.Get as B 79 | #endif 80 | 81 | -- | @choice ps@ tries to apply the parsers in the list @ps@ in order, 82 | -- until one of them succeeds. Returns the value of the succeeding 83 | -- parser. 84 | choice :: Alternative m => [m a] -> m a 85 | choice = F.asum 86 | {-# INLINE choice #-} 87 | 88 | -- | @option x p@ tries to apply parser @p@. If @p@ fails without 89 | -- consuming input, it returns the value @x@, otherwise the value 90 | -- returned by @p@. 91 | -- 92 | -- > priority = option 0 (digitToInt <$> digit) 93 | option :: Alternative m => a -> m a -> m a 94 | option x p = p <|> pure x 95 | {-# INLINE option #-} 96 | 97 | -- | @skipOptional p@ tries to apply parser @p@. It will parse @p@ or nothing. 98 | -- It only fails if @p@ fails after consuming input. It discards the result 99 | -- of @p@. (Plays the role of parsec's optional, which conflicts with Applicative's optional) 100 | skipOptional :: Alternative m => m a -> m () 101 | skipOptional p = void p <|> pure () 102 | {-# INLINE skipOptional #-} 103 | 104 | -- | @between open close p@ parses @open@, followed by @p@ and @close@. 105 | -- Returns the value returned by @p@. 106 | -- 107 | -- > braces = between (symbol "{") (symbol "}") 108 | between :: Applicative m => m bra -> m ket -> m a -> m a 109 | between bra ket p = bra *> p <* ket 110 | {-# INLINE between #-} 111 | 112 | -- | @p \`surroundedBy\` f@ is @p@ surrounded by @f@. Shortcut for @between f f p@. 113 | -- As in @between@, returns the value returned by @p@. 114 | surroundedBy :: Applicative m => m a -> m sur -> m a 115 | surroundedBy p bound = between bound bound p 116 | {-# INLINE surroundedBy #-} 117 | 118 | -- | @sepBy p sep@ parses /zero/ or more occurrences of @p@, separated 119 | -- by @sep@. Returns a list of values returned by @p@. 120 | -- 121 | -- > commaSep p = p `sepBy` (symbol ",") 122 | sepBy :: Alternative m => m a -> m sep -> m [a] 123 | sepBy p sep = sepBy1 p sep <|> pure [] 124 | {-# INLINE sepBy #-} 125 | 126 | -- | @sepBy1 p sep@ parses /one/ or more occurrences of @p@, separated 127 | -- by @sep@. Returns a list of values returned by @p@. 128 | sepBy1 :: Alternative m => m a -> m sep -> m [a] 129 | sepBy1 p sep = F.toList <$> sepByNonEmpty p sep 130 | {-# INLINE sepBy1 #-} 131 | 132 | -- | @sepByNonEmpty p sep@ parses /one/ or more occurrences of @p@, separated 133 | -- by @sep@. Returns a non-empty list of values returned by @p@. 134 | sepByNonEmpty :: Alternative m => m a -> m sep -> m (NonEmpty a) 135 | sepByNonEmpty p sep = (:|) <$> p <*> many (sep *> p) 136 | {-# INLINE sepByNonEmpty #-} 137 | 138 | -- | @sepEndBy1 p sep@ parses /one/ or more occurrences of @p@, 139 | -- separated and optionally ended by @sep@. Returns a list of values 140 | -- returned by @p@. 141 | sepEndBy1 :: Alternative m => m a -> m sep -> m [a] 142 | sepEndBy1 p sep = F.toList <$> sepEndByNonEmpty p sep 143 | 144 | -- | @sepEndByNonEmpty p sep@ parses /one/ or more occurrences of @p@, 145 | -- separated and optionally ended by @sep@. Returns a non-empty list of values 146 | -- returned by @p@. 147 | sepEndByNonEmpty :: Alternative m => m a -> m sep -> m (NonEmpty a) 148 | sepEndByNonEmpty p sep = (:|) <$> p <*> ((sep *> sepEndBy p sep) <|> pure []) 149 | 150 | -- | @sepEndBy p sep@ parses /zero/ or more occurrences of @p@, 151 | -- separated and optionally ended by @sep@, ie. haskell style 152 | -- statements. Returns a list of values returned by @p@. 153 | -- 154 | -- > haskellStatements = haskellStatement `sepEndBy` semi 155 | sepEndBy :: Alternative m => m a -> m sep -> m [a] 156 | sepEndBy p sep = sepEndBy1 p sep <|> pure [] 157 | {-# INLINE sepEndBy #-} 158 | 159 | -- | @endBy1 p sep@ parses /one/ or more occurrences of @p@, separated 160 | -- and ended by @sep@. Returns a list of values returned by @p@. 161 | endBy1 :: Alternative m => m a -> m sep -> m [a] 162 | endBy1 p sep = some (p <* sep) 163 | {-# INLINE endBy1 #-} 164 | 165 | -- | @endByNonEmpty p sep@ parses /one/ or more occurrences of @p@, separated 166 | -- and ended by @sep@. Returns a non-empty list of values returned by @p@. 167 | endByNonEmpty :: Alternative m => m a -> m sep -> m (NonEmpty a) 168 | endByNonEmpty p sep = NonEmpty.some1 (p <* sep) 169 | {-# INLINE endByNonEmpty #-} 170 | 171 | -- | @endBy p sep@ parses /zero/ or more occurrences of @p@, separated 172 | -- and ended by @sep@. Returns a list of values returned by @p@. 173 | -- 174 | -- > cStatements = cStatement `endBy` semi 175 | endBy :: Alternative m => m a -> m sep -> m [a] 176 | endBy p sep = many (p <* sep) 177 | {-# INLINE endBy #-} 178 | 179 | -- | @count n p@ parses @n@ occurrences of @p@. If @n@ is smaller or 180 | -- equal to zero, the parser equals to @return []@. Returns a list of 181 | -- @n@ values returned by @p@. 182 | count :: Applicative m => Int -> m a -> m [a] 183 | count = replicateM 184 | {-# INLINE count #-} 185 | 186 | -- | @chainr p op x@ parses /zero/ or more occurrences of @p@, 187 | -- separated by @op@ Returns a value obtained by a /right/ associative 188 | -- application of all functions returned by @op@ to the values returned 189 | -- by @p@. If there are no occurrences of @p@, the value @x@ is 190 | -- returned. 191 | chainr :: Alternative m => m a -> m (a -> a -> a) -> a -> m a 192 | chainr p op x = chainr1 p op <|> pure x 193 | {-# INLINE chainr #-} 194 | 195 | -- | @chainl p op x@ parses /zero/ or more occurrences of @p@, 196 | -- separated by @op@. Returns a value obtained by a /left/ associative 197 | -- application of all functions returned by @op@ to the values returned 198 | -- by @p@. If there are zero occurrences of @p@, the value @x@ is 199 | -- returned. 200 | chainl :: Alternative m => m a -> m (a -> a -> a) -> a -> m a 201 | chainl p op x = chainl1 p op <|> pure x 202 | {-# INLINE chainl #-} 203 | 204 | -- | @chainl1 p op x@ parses /one/ or more occurrences of @p@, 205 | -- separated by @op@ Returns a value obtained by a /left/ associative 206 | -- application of all functions returned by @op@ to the values returned 207 | -- by @p@. . This parser can for example be used to eliminate left 208 | -- recursion which typically occurs in expression grammars. 209 | -- 210 | -- > expr = term `chainl1` addop 211 | -- > term = factor `chainl1` mulop 212 | -- > factor = parens expr <|> integer 213 | -- > 214 | -- > mulop = (*) <$ symbol "*" 215 | -- > <|> div <$ symbol "/" 216 | -- > 217 | -- > addop = (+) <$ symbol "+" 218 | -- > <|> (-) <$ symbol "-" 219 | chainl1 :: Alternative m => m a -> m (a -> a -> a) -> m a 220 | chainl1 p op = scan where 221 | scan = p <**> rst 222 | rst = (\f y g x -> g (f x y)) <$> op <*> p <*> rst <|> pure id 223 | {-# INLINE chainl1 #-} 224 | 225 | -- | @chainr1 p op x@ parses /one/ or more occurrences of @p@, 226 | -- separated by @op@ Returns a value obtained by a /right/ associative 227 | -- application of all functions returned by @op@ to the values returned 228 | -- by @p@. 229 | chainr1 :: Alternative m => m a -> m (a -> a -> a) -> m a 230 | chainr1 p op = scan where 231 | scan = p <**> rst 232 | rst = (flip <$> op <*> scan) <|> pure id 233 | {-# INLINE chainr1 #-} 234 | 235 | -- | @manyTill p end@ applies parser @p@ /zero/ or more times until 236 | -- parser @end@ succeeds. Returns the list of values returned by @p@. 237 | -- This parser can be used to scan comments: 238 | -- 239 | -- > simpleComment = do{ string "")) 241 | -- > } 242 | -- 243 | -- Note the overlapping parsers @anyChar@ and @string \"-->\"@, and 244 | -- therefore the use of the 'try' combinator. 245 | manyTill :: Alternative m => m a -> m end -> m [a] 246 | manyTill p end = go where go = ([] <$ end) <|> ((:) <$> p <*> go) 247 | {-# INLINE manyTill #-} 248 | 249 | infixr 0 250 | 251 | -- | Additional functionality needed to describe parsers independent of input type. 252 | class Alternative m => Parsing m where 253 | -- | Take a parser that may consume input, and on failure, go back to 254 | -- where we started and fail as if we didn't consume input. 255 | try :: m a -> m a 256 | 257 | -- | Give a parser a name 258 | () :: m a -> String -> m a 259 | 260 | -- | A version of many that discards its input. Specialized because it 261 | -- can often be implemented more cheaply. 262 | skipMany :: m a -> m () 263 | skipMany p = void (many p) 264 | {-# INLINE skipMany #-} 265 | 266 | -- | @skipSome p@ applies the parser @p@ /one/ or more times, skipping 267 | -- its result. (aka skipMany1 in parsec) 268 | skipSome :: m a -> m () 269 | skipSome p = p *> skipMany p 270 | {-# INLINE skipSome #-} 271 | 272 | -- | Used to emit an error on an unexpected token 273 | unexpected :: String -> m a 274 | default unexpected :: (MonadTrans t, Monad n, Parsing n, m ~ t n) => 275 | String -> m a 276 | unexpected = lift . unexpected 277 | {-# INLINE unexpected #-} 278 | 279 | -- | This parser only succeeds at the end of the input. This is not a 280 | -- primitive parser but it is defined using 'notFollowedBy'. 281 | -- 282 | -- > eof = notFollowedBy anyChar "end of input" 283 | eof :: m () 284 | default eof :: (MonadTrans t, Monad n, Parsing n, m ~ t n) => m () 285 | eof = lift eof 286 | {-# INLINE eof #-} 287 | 288 | -- | @notFollowedBy p@ only succeeds when parser @p@ fails. This parser 289 | -- does not consume any input. This parser can be used to implement the 290 | -- \'longest match\' rule. For example, when recognizing keywords (for 291 | -- example @let@), we want to make sure that a keyword is not followed 292 | -- by a legal identifier character, in which case the keyword is 293 | -- actually an identifier (for example @lets@). We can program this 294 | -- behaviour as follows: 295 | -- 296 | -- > keywordLet = try $ string "let" <* notFollowedBy alphaNum 297 | notFollowedBy :: Show a => m a -> m () 298 | 299 | instance (Parsing m, MonadPlus m) => Parsing (Lazy.StateT s m) where 300 | try (Lazy.StateT m) = Lazy.StateT $ try . m 301 | {-# INLINE try #-} 302 | Lazy.StateT m l = Lazy.StateT $ \s -> m s l 303 | {-# INLINE () #-} 304 | unexpected = lift . unexpected 305 | {-# INLINE unexpected #-} 306 | eof = lift eof 307 | {-# INLINE eof #-} 308 | notFollowedBy (Lazy.StateT m) = Lazy.StateT 309 | $ \s -> notFollowedBy (fst <$> m s) >> return ((),s) 310 | {-# INLINE notFollowedBy #-} 311 | 312 | instance (Parsing m, MonadPlus m) => Parsing (Strict.StateT s m) where 313 | try (Strict.StateT m) = Strict.StateT $ try . m 314 | {-# INLINE try #-} 315 | Strict.StateT m l = Strict.StateT $ \s -> m s l 316 | {-# INLINE () #-} 317 | unexpected = lift . unexpected 318 | {-# INLINE unexpected #-} 319 | eof = lift eof 320 | {-# INLINE eof #-} 321 | notFollowedBy (Strict.StateT m) = Strict.StateT 322 | $ \s -> notFollowedBy (fst <$> m s) >> return ((),s) 323 | {-# INLINE notFollowedBy #-} 324 | 325 | instance (Parsing m, MonadPlus m) => Parsing (ReaderT e m) where 326 | try (ReaderT m) = ReaderT $ try . m 327 | {-# INLINE try #-} 328 | ReaderT m l = ReaderT $ \e -> m e l 329 | {-# INLINE () #-} 330 | skipMany (ReaderT m) = ReaderT $ skipMany . m 331 | {-# INLINE skipMany #-} 332 | unexpected = lift . unexpected 333 | {-# INLINE unexpected #-} 334 | eof = lift eof 335 | {-# INLINE eof #-} 336 | notFollowedBy (ReaderT m) = ReaderT $ notFollowedBy . m 337 | {-# INLINE notFollowedBy #-} 338 | 339 | instance (Parsing m, MonadPlus m, Monoid w) => Parsing (Strict.WriterT w m) where 340 | try (Strict.WriterT m) = Strict.WriterT $ try m 341 | {-# INLINE try #-} 342 | Strict.WriterT m l = Strict.WriterT (m l) 343 | {-# INLINE () #-} 344 | unexpected = lift . unexpected 345 | {-# INLINE unexpected #-} 346 | eof = lift eof 347 | {-# INLINE eof #-} 348 | notFollowedBy (Strict.WriterT m) = Strict.WriterT 349 | $ notFollowedBy (fst <$> m) >>= \x -> return (x, mempty) 350 | {-# INLINE notFollowedBy #-} 351 | 352 | instance (Parsing m, MonadPlus m, Monoid w) => Parsing (Lazy.WriterT w m) where 353 | try (Lazy.WriterT m) = Lazy.WriterT $ try m 354 | {-# INLINE try #-} 355 | Lazy.WriterT m l = Lazy.WriterT (m l) 356 | {-# INLINE () #-} 357 | unexpected = lift . unexpected 358 | {-# INLINE unexpected #-} 359 | eof = lift eof 360 | {-# INLINE eof #-} 361 | notFollowedBy (Lazy.WriterT m) = Lazy.WriterT 362 | $ notFollowedBy (fst <$> m) >>= \x -> return (x, mempty) 363 | {-# INLINE notFollowedBy #-} 364 | 365 | instance (Parsing m, MonadPlus m, Monoid w) => Parsing (Lazy.RWST r w s m) where 366 | try (Lazy.RWST m) = Lazy.RWST $ \r s -> try (m r s) 367 | {-# INLINE try #-} 368 | Lazy.RWST m l = Lazy.RWST $ \r s -> m r s l 369 | {-# INLINE () #-} 370 | unexpected = lift . unexpected 371 | {-# INLINE unexpected #-} 372 | eof = lift eof 373 | {-# INLINE eof #-} 374 | notFollowedBy (Lazy.RWST m) = Lazy.RWST 375 | $ \r s -> notFollowedBy ((\(a,_,_) -> a) <$> m r s) >>= \x -> return (x, s, mempty) 376 | {-# INLINE notFollowedBy #-} 377 | 378 | instance (Parsing m, MonadPlus m, Monoid w) => Parsing (Strict.RWST r w s m) where 379 | try (Strict.RWST m) = Strict.RWST $ \r s -> try (m r s) 380 | {-# INLINE try #-} 381 | Strict.RWST m l = Strict.RWST $ \r s -> m r s l 382 | {-# INLINE () #-} 383 | unexpected = lift . unexpected 384 | {-# INLINE unexpected #-} 385 | eof = lift eof 386 | {-# INLINE eof #-} 387 | notFollowedBy (Strict.RWST m) = Strict.RWST 388 | $ \r s -> notFollowedBy ((\(a,_,_) -> a) <$> m r s) >>= \x -> return (x, s, mempty) 389 | {-# INLINE notFollowedBy #-} 390 | 391 | instance (Parsing m, Monad m) => Parsing (IdentityT m) where 392 | try = IdentityT . try . runIdentityT 393 | {-# INLINE try #-} 394 | IdentityT m l = IdentityT (m l) 395 | {-# INLINE () #-} 396 | skipMany = IdentityT . skipMany . runIdentityT 397 | {-# INLINE skipMany #-} 398 | unexpected = lift . unexpected 399 | {-# INLINE unexpected #-} 400 | eof = lift eof 401 | {-# INLINE eof #-} 402 | notFollowedBy (IdentityT m) = IdentityT $ notFollowedBy m 403 | {-# INLINE notFollowedBy #-} 404 | 405 | #ifdef MIN_VERSION_parsec 406 | instance (Parsec.Stream s m t, Show t) => Parsing (Parsec.ParsecT s u m) where 407 | try = Parsec.try 408 | () = (Parsec.) 409 | skipMany = Parsec.skipMany 410 | skipSome = Parsec.skipMany1 411 | unexpected = Parsec.unexpected 412 | eof = Parsec.eof 413 | notFollowedBy = Parsec.notFollowedBy 414 | #endif 415 | 416 | #ifdef MIN_VERSION_attoparsec 417 | instance Att.Chunk t => Parsing (Att.Parser t) where 418 | try = Att.try 419 | () = (Att.) 420 | skipMany = Att.skipMany 421 | skipSome = Att.skipMany1 422 | unexpected = fail 423 | eof = Att.endOfInput 424 | notFollowedBy p = optional p >>= maybe (pure ()) (unexpected . show) 425 | #endif 426 | 427 | #ifdef MIN_VERSION_binary 428 | instance Parsing B.Get where 429 | try = id 430 | () = flip B.label 431 | skipMany p = do skipped <- True <$ p <|> pure False 432 | when skipped $ skipMany p 433 | unexpected = fail 434 | eof = do isEof <- B.isEmpty 435 | unless isEof $ fail "Parsing.eof" 436 | notFollowedBy p = optional p >>= maybe (pure ()) (unexpected . show) 437 | #endif 438 | 439 | instance Parsing ReadP.ReadP where 440 | try = id 441 | () = const 442 | skipMany = ReadP.skipMany 443 | skipSome = ReadP.skipMany1 444 | unexpected = const ReadP.pfail 445 | eof = ReadP.eof 446 | notFollowedBy p = ((Just <$> p) ReadP.<++ pure Nothing) 447 | >>= maybe (pure ()) (unexpected . show) 448 | -------------------------------------------------------------------------------- /src/Text/Parser/Token.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE FlexibleContexts #-} 3 | {-# LANGUAGE FlexibleInstances #-} 4 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} 5 | {-# LANGUAGE MultiParamTypeClasses #-} 6 | {-# LANGUAGE ScopedTypeVariables #-} 7 | {-# LANGUAGE Trustworthy #-} 8 | {-# LANGUAGE UndecidableInstances #-} 9 | {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} 10 | {-# OPTIONS_GHC -fspec-constr -fspec-constr-count=8 #-} 11 | ----------------------------------------------------------------------------- 12 | -- | 13 | -- Module : Text.Parser.Token 14 | -- Copyright : (c) Edward Kmett 2011 15 | -- (c) Daan Leijen 1999-2001 16 | -- License : BSD3 17 | -- 18 | -- Maintainer : ekmett@gmail.com 19 | -- Stability : experimental 20 | -- Portability : non-portable 21 | -- 22 | -- Parsers that comprehend whitespace and identifier styles 23 | -- 24 | -- > idStyle = haskellIdents { styleReserved = ... } 25 | -- > identifier = ident idStyle 26 | -- > reserved = reserve idStyle 27 | -- 28 | ----------------------------------------------------------------------------- 29 | module Text.Parser.Token 30 | ( 31 | -- * Token Parsing 32 | whiteSpace -- :: TokenParsing m => m () 33 | , charLiteral -- :: TokenParsing m => m Char 34 | , stringLiteral -- :: (TokenParsing m, IsString s) => m s 35 | , stringLiteral' -- :: (TokenParsing m, IsString s) => m s 36 | , natural -- :: TokenParsing m => m Integer 37 | , integer -- :: TokenParsing m => m Integer 38 | , double -- :: TokenParsing m => m Double 39 | , naturalOrDouble -- :: TokenParsing m => m (Either Integer Double) 40 | , integerOrDouble -- :: TokenParsing m => m (Either Integer Double) 41 | , scientific -- :: TokenParsing m => m Scientific 42 | , naturalOrScientific -- :: TokenParsing m => m (Either Integer Scientific) 43 | , integerOrScientific -- :: TokenParsing m => m (Either Integer Scientific) 44 | , symbol -- :: TokenParsing m => String -> m String 45 | , textSymbol -- :: TokenParsing m => Text -> m Text 46 | , symbolic -- :: TokenParsing m => Char -> m Char 47 | , parens -- :: TokenParsing m => m a -> m a 48 | , braces -- :: TokenParsing m => m a -> m a 49 | , angles -- :: TokenParsing m => m a -> m a 50 | , brackets -- :: TokenParsing m => m a -> m a 51 | , comma -- :: TokenParsing m => m Char 52 | , colon -- :: TokenParsing m => m Char 53 | , dot -- :: TokenParsing m => m Char 54 | , semiSep -- :: TokenParsing m => m a -> m [a] 55 | , semiSep1 -- :: TokenParsing m => m a -> m [a] 56 | , commaSep -- :: TokenParsing m => m a -> m [a] 57 | , commaSep1 -- :: TokenParsing m => m a -> m [a] 58 | -- ** Token Parsing Class 59 | , TokenParsing(..) 60 | -- ** Token Parsing Transformers 61 | , Unspaced(..) 62 | , Unlined(..) 63 | , Unhighlighted(..) 64 | -- ** /Non-Token/ Parsers 65 | , decimal -- :: TokenParsing m => m Integer 66 | , hexadecimal -- :: TokenParsing m => m Integer 67 | , octal -- :: TokenParsing m => m Integer 68 | , characterChar -- :: TokenParsing m => m Char 69 | , integer' -- :: TokenParsing m => m Integer 70 | -- * Identifiers 71 | , IdentifierStyle(..) 72 | , liftIdentifierStyle -- :: (MonadTrans t, Monad m) => IdentifierStyle m -> IdentifierStyle (t m) 73 | , ident -- :: (TokenParsing m, IsString s) => IdentifierStyle m -> m s 74 | , reserve -- :: TokenParsing m => IdentifierStyle m -> String -> m () 75 | , reserveText -- :: TokenParsing m => IdentifierStyle m -> Text -> m () 76 | -- ** Lenses and Traversals 77 | , styleName 78 | , styleStart 79 | , styleLetter 80 | , styleChars 81 | , styleReserved 82 | , styleHighlight 83 | , styleReservedHighlight 84 | , styleHighlights 85 | ) where 86 | 87 | import Control.Applicative 88 | import Control.Monad (MonadPlus(..), when) 89 | import Control.Monad.Trans.Class 90 | import Control.Monad.Trans.State.Lazy as Lazy 91 | import Control.Monad.Trans.State.Strict as Strict 92 | import Control.Monad.Trans.Writer.Lazy as Lazy 93 | import Control.Monad.Trans.Writer.Strict as Strict 94 | import Control.Monad.Trans.RWS.Lazy as Lazy 95 | import Control.Monad.Trans.RWS.Strict as Strict 96 | import Control.Monad.Trans.Reader 97 | import Control.Monad.Trans.Identity 98 | import Control.Monad.State.Class as Class 99 | import Control.Monad.Reader.Class as Class 100 | import Control.Monad.Writer.Class as Class 101 | import Data.Char 102 | import Data.Functor.Identity 103 | import qualified Data.HashSet as HashSet 104 | import Data.HashSet (HashSet) 105 | import qualified Data.List as List (foldl', transpose) 106 | import Data.Scientific ( Scientific ) 107 | import qualified Data.Scientific as Sci 108 | import Data.String 109 | import Data.Text (Text) 110 | import Numeric (showIntAtBase) 111 | import qualified Text.ParserCombinators.ReadP as ReadP 112 | import Text.Parser.Char 113 | import Text.Parser.Combinators 114 | import Text.Parser.Token.Highlight 115 | 116 | #ifdef MIN_VERSION_parsec 117 | import qualified Text.Parsec as Parsec 118 | #endif 119 | 120 | #ifdef MIN_VERSION_attoparsec 121 | import qualified Data.Attoparsec.Types as Att 122 | #endif 123 | 124 | -- | Skip zero or more bytes worth of white space. More complex parsers are 125 | -- free to consider comments as white space. 126 | whiteSpace :: TokenParsing m => m () 127 | whiteSpace = someSpace <|> pure () 128 | {-# INLINE whiteSpace #-} 129 | 130 | -- | This token parser parses a single literal character. Returns the 131 | -- literal character value. This parsers deals correctly with escape 132 | -- sequences. The literal character is parsed according to the grammar 133 | -- rules defined in the Haskell report (which matches most programming 134 | -- languages quite closely). 135 | charLiteral :: forall m. TokenParsing m => m Char 136 | charLiteral = token (highlight CharLiteral lit) where 137 | lit :: m Char 138 | lit = between (char '\'') (char '\'' "end of character") characterChar 139 | "character" 140 | {-# INLINE charLiteral #-} 141 | 142 | -- | This token parser parses a literal string. Returns the literal 143 | -- string value. This parsers deals correctly with escape sequences and 144 | -- gaps. The literal string is parsed according to the grammar rules 145 | -- defined in the Haskell report (which matches most programming 146 | -- languages quite closely). 147 | stringLiteral :: forall m s. (TokenParsing m, IsString s) => m s 148 | stringLiteral = fromString <$> token (highlight StringLiteral lit) where 149 | lit :: m [Char] 150 | lit = Prelude.foldr (maybe id (:)) "" 151 | <$> between (char '"') (char '"' "end of string") (many stringChar) 152 | "string" 153 | 154 | stringChar :: m (Maybe Char) 155 | stringChar = Just <$> stringLetter 156 | <|> stringEscape 157 | "string character" 158 | 159 | stringLetter :: m Char 160 | stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026')) 161 | 162 | stringEscape :: m (Maybe Char) 163 | stringEscape = highlight EscapeCode $ char '\\' *> esc where 164 | esc :: m (Maybe Char) 165 | esc = Nothing <$ escapeGap 166 | <|> Nothing <$ escapeEmpty 167 | <|> Just <$> escapeCode 168 | 169 | escapeEmpty, escapeGap :: m Char 170 | escapeEmpty = char '&' 171 | escapeGap = skipSome space *> (char '\\' "end of string gap") 172 | {-# INLINE stringLiteral #-} 173 | 174 | -- | This token parser behaves as 'stringLiteral', but for single-quoted 175 | -- strings. 176 | stringLiteral' :: forall m s. (TokenParsing m, IsString s) => m s 177 | stringLiteral' = fromString <$> token (highlight StringLiteral lit) where 178 | lit :: m [Char] 179 | lit = Prelude.foldr (maybe id (:)) "" 180 | <$> between (char '\'') (char '\'' "end of string") (many stringChar) 181 | "string" 182 | 183 | stringChar :: m (Maybe Char) 184 | stringChar = Just <$> stringLetter 185 | <|> stringEscape 186 | "string character" 187 | 188 | stringLetter :: m Char 189 | stringLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026')) 190 | 191 | stringEscape :: m (Maybe Char) 192 | stringEscape = highlight EscapeCode $ char '\\' *> esc where 193 | esc :: m (Maybe Char) 194 | esc = Nothing <$ escapeGap 195 | <|> Nothing <$ escapeEmpty 196 | <|> Just <$> escapeCode 197 | 198 | escapeEmpty, escapeGap :: m Char 199 | escapeEmpty = char '&' 200 | escapeGap = skipSome space *> (char '\\' "end of string gap") 201 | {-# INLINE stringLiteral' #-} 202 | 203 | -- | This token parser parses a natural number (a non-negative whole 204 | -- number). Returns the value of the number. The number can be 205 | -- specified in 'decimal', 'hexadecimal' or 206 | -- 'octal'. The number is parsed according to the grammar 207 | -- rules in the Haskell report. 208 | natural :: TokenParsing m => m Integer 209 | natural = token natural' 210 | {-# INLINE natural #-} 211 | 212 | -- | This token parser parses an integer (a whole number). This parser 213 | -- is like 'natural' except that it can be prefixed with 214 | -- sign (i.e. \'-\' or \'+\'). Returns the value of the number. The 215 | -- number can be specified in 'decimal', 'hexadecimal' 216 | -- or 'octal'. The number is parsed according 217 | -- to the grammar rules in the Haskell report. 218 | integer :: forall m. TokenParsing m => m Integer 219 | integer = token (token (highlight Operator sgn <*> natural')) "integer" 220 | where 221 | sgn :: m (Integer -> Integer) 222 | sgn = negate <$ char '-' 223 | <|> id <$ char '+' 224 | <|> pure id 225 | {-# INLINE integer #-} 226 | 227 | -- | This token parser parses a floating point value. Returns the value 228 | -- of the number. The number is parsed according to the grammar rules 229 | -- defined in the Haskell report. 230 | double :: TokenParsing m => m Double 231 | double = token (highlight Number (Sci.toRealFloat <$> floating) "double") 232 | {-# INLINE double #-} 233 | 234 | -- | This token parser parses either 'natural' or a 'float'. 235 | -- Returns the value of the number. This parsers deals with 236 | -- any overlap in the grammar rules for naturals and floats. The number 237 | -- is parsed according to the grammar rules defined in the Haskell report. 238 | naturalOrDouble :: TokenParsing m => m (Either Integer Double) 239 | naturalOrDouble = fmap Sci.toRealFloat <$> naturalOrScientific 240 | {-# INLINE naturalOrDouble #-} 241 | 242 | -- | This token parser is like 'naturalOrDouble', but handles 243 | -- leading @-@ or @+@. 244 | integerOrDouble :: TokenParsing m => m (Either Integer Double) 245 | integerOrDouble = fmap Sci.toRealFloat <$> integerOrScientific 246 | {-# INLINE integerOrDouble #-} 247 | 248 | -- | This token parser parses a floating point value. Returns the value 249 | -- of the number. The number is parsed according to the grammar rules 250 | -- defined in the Haskell report. 251 | scientific :: TokenParsing m => m Scientific 252 | scientific = token (highlight Number floating "scientific") 253 | {-# INLINE scientific #-} 254 | 255 | -- | This token parser parses either 'natural' or a 'scientific'. 256 | -- Returns the value of the number. This parsers deals with 257 | -- any overlap in the grammar rules for naturals and floats. The number 258 | -- is parsed according to the grammar rules defined in the Haskell report. 259 | naturalOrScientific :: TokenParsing m => m (Either Integer Scientific) 260 | naturalOrScientific = token (highlight Number natFloating "number") 261 | {-# INLINE naturalOrScientific #-} 262 | 263 | -- | This token parser is like 'naturalOrScientific', but handles 264 | -- leading @-@ or @+@. 265 | integerOrScientific :: forall m. TokenParsing m => m (Either Integer Scientific) 266 | integerOrScientific = token (highlight Number ios "number") 267 | where ios :: m (Either Integer Scientific) 268 | ios = mneg <$> optional (oneOf "+-") <*> natFloating 269 | 270 | mneg (Just '-') nd = either (Left . negate) (Right . negate) nd 271 | mneg _ nd = nd 272 | {-# INLINE integerOrScientific #-} 273 | 274 | 275 | -- | Token parser @symbol s@ parses 'string' @s@ and skips 276 | -- trailing white space. 277 | symbol :: TokenParsing m => String -> m String 278 | symbol name = token (highlight Symbol (string name)) 279 | {-# INLINE symbol #-} 280 | 281 | -- | Token parser @textSymbol t@ parses 'text' @s@ and skips 282 | -- trailing white space. 283 | textSymbol :: TokenParsing m => Text -> m Text 284 | textSymbol name = token (highlight Symbol (text name)) 285 | {-# INLINE textSymbol #-} 286 | 287 | -- | Token parser @symbolic s@ parses 'char' @s@ and skips 288 | -- trailing white space. 289 | symbolic :: TokenParsing m => Char -> m Char 290 | symbolic name = token (highlight Symbol (char name)) 291 | {-# INLINE symbolic #-} 292 | 293 | -- | Token parser @parens p@ parses @p@ enclosed in parenthesis, 294 | -- returning the value of @p@. 295 | parens :: TokenParsing m => m a -> m a 296 | parens = nesting . between (symbolic '(') (symbolic ')') 297 | {-# INLINE parens #-} 298 | 299 | -- | Token parser @braces p@ parses @p@ enclosed in braces (\'{\' and 300 | -- \'}\'), returning the value of @p@. 301 | braces :: TokenParsing m => m a -> m a 302 | braces = nesting . between (symbolic '{') (symbolic '}') 303 | {-# INLINE braces #-} 304 | 305 | -- | Token parser @angles p@ parses @p@ enclosed in angle brackets (\'\<\' 306 | -- and \'>\'), returning the value of @p@. 307 | angles :: TokenParsing m => m a -> m a 308 | angles = nesting . between (symbolic '<') (symbolic '>') 309 | {-# INLINE angles #-} 310 | 311 | -- | Token parser @brackets p@ parses @p@ enclosed in brackets (\'[\' 312 | -- and \']\'), returning the value of @p@. 313 | brackets :: TokenParsing m => m a -> m a 314 | brackets = nesting . between (symbolic '[') (symbolic ']') 315 | {-# INLINE brackets #-} 316 | 317 | -- | Token parser @comma@ parses the character \',\' and skips any 318 | -- trailing white space. Returns the string \",\". 319 | comma :: TokenParsing m => m Char 320 | comma = symbolic ',' 321 | {-# INLINE comma #-} 322 | 323 | -- | Token parser @colon@ parses the character \':\' and skips any 324 | -- trailing white space. Returns the string \":\". 325 | colon :: TokenParsing m => m Char 326 | colon = symbolic ':' 327 | {-# INLINE colon #-} 328 | 329 | -- | Token parser @dot@ parses the character \'.\' and skips any 330 | -- trailing white space. Returns the string \".\". 331 | dot :: TokenParsing m => m Char 332 | dot = symbolic '.' 333 | {-# INLINE dot #-} 334 | 335 | -- | Token parser @semiSep p@ parses /zero/ or more occurrences of @p@ 336 | -- separated by 'semi'. Returns a list of values returned by @p@. 337 | semiSep :: TokenParsing m => m a -> m [a] 338 | semiSep p = sepBy p semi 339 | {-# INLINE semiSep #-} 340 | 341 | -- | Token parser @semiSep1 p@ parses /one/ or more occurrences of @p@ 342 | -- separated by 'semi'. Returns a list of values returned by @p@. 343 | semiSep1 :: TokenParsing m => m a -> m [a] 344 | semiSep1 p = sepBy1 p semi 345 | {-# INLINE semiSep1 #-} 346 | 347 | -- | Token parser @commaSep p@ parses /zero/ or more occurrences of 348 | -- @p@ separated by 'comma'. Returns a list of values returned 349 | -- by @p@. 350 | commaSep :: TokenParsing m => m a -> m [a] 351 | commaSep p = sepBy p comma 352 | {-# INLINE commaSep #-} 353 | 354 | -- | Token parser @commaSep1 p@ parses /one/ or more occurrences of 355 | -- @p@ separated by 'comma'. Returns a list of values returned 356 | -- by @p@. 357 | commaSep1 :: TokenParsing m => m a -> m [a] 358 | commaSep1 p = sepBy1 p comma 359 | {-# INLINE commaSep1 #-} 360 | 361 | -- | Additional functionality that is needed to tokenize input while ignoring whitespace. 362 | class CharParsing m => TokenParsing m where 363 | -- | Usually, someSpace consists of /one/ or more occurrences of a 'space'. 364 | -- Some parsers may choose to recognize line comments or block (multi line) 365 | -- comments as white space as well. 366 | someSpace :: m () 367 | someSpace = skipSome (satisfy isSpace) 368 | {-# INLINE someSpace #-} 369 | 370 | -- | Called when we enter a nested pair of symbols. 371 | -- Overloadable to enable disabling layout 372 | nesting :: m a -> m a 373 | nesting = id 374 | {-# INLINE nesting #-} 375 | 376 | -- | The token parser |semi| parses the character \';\' and skips 377 | -- any trailing white space. Returns the character \';\'. Overloadable to 378 | -- permit automatic semicolon insertion or Haskell-style layout. 379 | semi :: m Char 380 | semi = token (satisfy (';'==) ";") 381 | {-# INLINE semi #-} 382 | 383 | -- | Tag a region of parsed text with a bit of semantic information. 384 | -- Most parsers won't use this, but it is indispensible for highlighters. 385 | highlight :: Highlight -> m a -> m a 386 | highlight _ a = a 387 | {-# INLINE highlight #-} 388 | 389 | -- | @token p@ first applies parser @p@ and then the 'whiteSpace' 390 | -- parser, returning the value of @p@. Every lexical 391 | -- token (token) is defined using @token@, this way every parse 392 | -- starts at a point without white space. Parsers that use @token@ are 393 | -- called /token/ parsers in this document. 394 | -- 395 | -- The only point where the 'whiteSpace' parser should be 396 | -- called explicitly is the start of the main parser in order to skip 397 | -- any leading white space. 398 | -- 399 | -- Alternatively, one might define 'token' as first parsing 'whiteSpace' 400 | -- and then parser @p@. By parsing whiteSpace first, the parser is able 401 | -- to return before parsing additional whiteSpace, improving laziness. 402 | -- 403 | -- > mainParser = sum <$ whiteSpace <*> many (token digit) <* eof 404 | token :: m a -> m a 405 | token p = p <* (someSpace <|> pure ()) 406 | 407 | instance (TokenParsing m, MonadPlus m) => TokenParsing (Lazy.StateT s m) where 408 | nesting (Lazy.StateT m) = Lazy.StateT $ nesting . m 409 | {-# INLINE nesting #-} 410 | someSpace = lift someSpace 411 | {-# INLINE someSpace #-} 412 | semi = lift semi 413 | {-# INLINE semi #-} 414 | highlight h (Lazy.StateT m) = Lazy.StateT $ highlight h . m 415 | {-# INLINE highlight #-} 416 | 417 | instance (TokenParsing m, MonadPlus m) => TokenParsing (Strict.StateT s m) where 418 | nesting (Strict.StateT m) = Strict.StateT $ nesting . m 419 | {-# INLINE nesting #-} 420 | someSpace = lift someSpace 421 | {-# INLINE someSpace #-} 422 | semi = lift semi 423 | {-# INLINE semi #-} 424 | highlight h (Strict.StateT m) = Strict.StateT $ highlight h . m 425 | {-# INLINE highlight #-} 426 | 427 | instance (TokenParsing m, MonadPlus m) => TokenParsing (ReaderT e m) where 428 | nesting (ReaderT m) = ReaderT $ nesting . m 429 | {-# INLINE nesting #-} 430 | someSpace = lift someSpace 431 | {-# INLINE someSpace #-} 432 | semi = lift semi 433 | {-# INLINE semi #-} 434 | highlight h (ReaderT m) = ReaderT $ highlight h . m 435 | {-# INLINE highlight #-} 436 | 437 | instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Strict.WriterT w m) where 438 | nesting (Strict.WriterT m) = Strict.WriterT $ nesting m 439 | {-# INLINE nesting #-} 440 | someSpace = lift someSpace 441 | {-# INLINE someSpace #-} 442 | semi = lift semi 443 | {-# INLINE semi #-} 444 | highlight h (Strict.WriterT m) = Strict.WriterT $ highlight h m 445 | {-# INLINE highlight #-} 446 | 447 | instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Lazy.WriterT w m) where 448 | nesting (Lazy.WriterT m) = Lazy.WriterT $ nesting m 449 | {-# INLINE nesting #-} 450 | someSpace = lift someSpace 451 | {-# INLINE someSpace #-} 452 | semi = lift semi 453 | {-# INLINE semi #-} 454 | highlight h (Lazy.WriterT m) = Lazy.WriterT $ highlight h m 455 | {-# INLINE highlight #-} 456 | 457 | instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Lazy.RWST r w s m) where 458 | nesting (Lazy.RWST m) = Lazy.RWST $ \r s -> nesting (m r s) 459 | {-# INLINE nesting #-} 460 | someSpace = lift someSpace 461 | {-# INLINE someSpace #-} 462 | semi = lift semi 463 | {-# INLINE semi #-} 464 | highlight h (Lazy.RWST m) = Lazy.RWST $ \r s -> highlight h (m r s) 465 | {-# INLINE highlight #-} 466 | 467 | instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Strict.RWST r w s m) where 468 | nesting (Strict.RWST m) = Strict.RWST $ \r s -> nesting (m r s) 469 | {-# INLINE nesting #-} 470 | someSpace = lift someSpace 471 | {-# INLINE someSpace #-} 472 | semi = lift semi 473 | {-# INLINE semi #-} 474 | highlight h (Strict.RWST m) = Strict.RWST $ \r s -> highlight h (m r s) 475 | {-# INLINE highlight #-} 476 | 477 | instance (TokenParsing m, MonadPlus m) => TokenParsing (IdentityT m) where 478 | nesting = IdentityT . nesting . runIdentityT 479 | {-# INLINE nesting #-} 480 | someSpace = lift someSpace 481 | {-# INLINE someSpace #-} 482 | semi = lift semi 483 | {-# INLINE semi #-} 484 | highlight h = IdentityT . highlight h . runIdentityT 485 | {-# INLINE highlight #-} 486 | 487 | -- | Used to describe an input style for constructors, values, operators, etc. 488 | data IdentifierStyle m = IdentifierStyle 489 | { _styleName :: String 490 | , _styleStart :: m Char 491 | , _styleLetter :: m Char 492 | , _styleReserved :: HashSet String 493 | , _styleHighlight :: Highlight 494 | , _styleReservedHighlight :: Highlight 495 | } 496 | 497 | -- | This lens can be used to update the name for this style of identifier. 498 | -- 499 | -- @'styleName' :: Lens' ('IdentifierStyle' m) 'String'@ 500 | styleName :: Functor f => (String -> f String) -> IdentifierStyle m -> f (IdentifierStyle m) 501 | styleName f is = (\n -> is { _styleName = n }) <$> f (_styleName is) 502 | {-# INLINE styleName #-} 503 | 504 | -- | This lens can be used to update the action used to recognize the first letter in an identifier. 505 | -- 506 | -- @'styleStart' :: Lens' ('IdentifierStyle' m) (m 'Char')@ 507 | styleStart :: Functor f => (m Char -> f (m Char)) -> IdentifierStyle m -> f (IdentifierStyle m) 508 | styleStart f is = (\n -> is { _styleStart = n }) <$> f (_styleStart is) 509 | {-# INLINE styleStart #-} 510 | 511 | -- | This lens can be used to update the action used to recognize subsequent letters in an identifier. 512 | -- 513 | -- @'styleLetter' :: Lens' ('IdentifierStyle' m) (m 'Char')@ 514 | styleLetter :: Functor f => (m Char -> f (m Char)) -> IdentifierStyle m -> f (IdentifierStyle m) 515 | styleLetter f is = (\n -> is { _styleLetter = n }) <$> f (_styleLetter is) 516 | {-# INLINE styleLetter #-} 517 | 518 | -- | This is a traversal of both actions in contained in an 'IdentifierStyle'. 519 | -- 520 | -- @'styleChars' :: Traversal ('IdentifierStyle' m) ('IdentifierStyle' n) (m 'Char') (n 'Char')@ 521 | styleChars :: Applicative f => (m Char -> f (n Char)) -> IdentifierStyle m -> f (IdentifierStyle n) 522 | styleChars f is = (\n m -> is { _styleStart = n, _styleLetter = m }) <$> f (_styleStart is) <*> f (_styleLetter is) 523 | {-# INLINE styleChars #-} 524 | 525 | -- | This is a lens that can be used to modify the reserved identifier set. 526 | -- 527 | -- @'styleReserved' :: Lens' ('IdentifierStyle' m) ('HashSet' 'String')@ 528 | styleReserved :: Functor f => (HashSet String -> f (HashSet String)) -> IdentifierStyle m -> f (IdentifierStyle m) 529 | styleReserved f is = (\n -> is { _styleReserved = n }) <$> f (_styleReserved is) 530 | {-# INLINE styleReserved #-} 531 | 532 | -- | This is a lens that can be used to modify the highlight used for this identifier set. 533 | -- 534 | -- @'styleHighlight' :: Lens' ('IdentifierStyle' m) 'Highlight'@ 535 | styleHighlight :: Functor f => (Highlight -> f Highlight) -> IdentifierStyle m -> f (IdentifierStyle m) 536 | styleHighlight f is = (\n -> is { _styleHighlight = n }) <$> f (_styleHighlight is) 537 | {-# INLINE styleHighlight #-} 538 | 539 | -- | This is a lens that can be used to modify the highlight used for reserved identifiers in this identifier set. 540 | -- 541 | -- @'styleReservedHighlight' :: Lens' ('IdentifierStyle' m) 'Highlight'@ 542 | styleReservedHighlight :: Functor f => (Highlight -> f Highlight) -> IdentifierStyle m -> f (IdentifierStyle m) 543 | styleReservedHighlight f is = (\n -> is { _styleReservedHighlight = n }) <$> f (_styleReservedHighlight is) 544 | {-# INLINE styleReservedHighlight #-} 545 | 546 | -- | This is a traversal that can be used to modify the highlights used for both non-reserved and reserved identifiers in this identifier set. 547 | -- 548 | -- @'styleHighlights' :: Traversal' ('IdentifierStyle' m) 'Highlight'@ 549 | styleHighlights :: Applicative f => (Highlight -> f Highlight) -> IdentifierStyle m -> f (IdentifierStyle m) 550 | styleHighlights f is = (\n m -> is { _styleHighlight = n, _styleReservedHighlight = m }) <$> f (_styleHighlight is) <*> f (_styleReservedHighlight is) 551 | {-# INLINE styleHighlights #-} 552 | 553 | -- | Lift an identifier style into a monad transformer 554 | -- 555 | -- Using @over@ from the @lens@ package: 556 | -- 557 | -- @'liftIdentifierStyle' = over 'styleChars' 'lift'@ 558 | liftIdentifierStyle :: (MonadTrans t, Monad m) => IdentifierStyle m -> IdentifierStyle (t m) 559 | liftIdentifierStyle = runIdentity . styleChars (Identity . lift) 560 | {-# INLINE liftIdentifierStyle #-} 561 | 562 | -- | parse a reserved operator or identifier using a given style 563 | reserve :: (TokenParsing m, Monad m) => IdentifierStyle m -> String -> m () 564 | reserve s name = token $ try $ do 565 | _ <- highlight (_styleReservedHighlight s) $ string name 566 | notFollowedBy (_styleLetter s) "end of " ++ show name 567 | {-# INLINE reserve #-} 568 | 569 | -- | parse a reserved operator or identifier using a given style given 'Text'. 570 | reserveText :: (TokenParsing m, Monad m) => IdentifierStyle m -> Text -> m () 571 | reserveText s name = token $ try $ do 572 | _ <- highlight (_styleReservedHighlight s) $ text name 573 | notFollowedBy (_styleLetter s) "end of " ++ show name 574 | {-# INLINE reserveText #-} 575 | 576 | -- | Parse a non-reserved identifier or symbol 577 | ident :: (TokenParsing m, Monad m, IsString s) => IdentifierStyle m -> m s 578 | ident s = fmap fromString $ token $ try $ do 579 | name <- highlight (_styleHighlight s) 580 | ((:) <$> _styleStart s <*> many (_styleLetter s) _styleName s) 581 | when (HashSet.member name (_styleReserved s)) $ unexpected $ "reserved " ++ _styleName s ++ " " ++ show name 582 | return name 583 | {-# INLINE ident #-} 584 | 585 | -- * Utilities 586 | 587 | -- | This parser parses a character literal without the surrounding quotation marks. 588 | -- 589 | -- This parser does NOT swallow trailing whitespace 590 | 591 | characterChar :: TokenParsing m => m Char 592 | 593 | charEscape, charLetter :: TokenParsing m => m Char 594 | characterChar = charLetter <|> charEscape "literal character" 595 | {-# INLINE characterChar #-} 596 | 597 | charEscape = highlight EscapeCode $ char '\\' *> escapeCode 598 | {-# INLINE charEscape #-} 599 | 600 | charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026')) 601 | {-# INLINE charLetter #-} 602 | 603 | -- | This parser parses a literal string. Returns the literal 604 | -- string value. This parsers deals correctly with escape sequences and 605 | -- gaps. The literal string is parsed according to the grammar rules 606 | -- defined in the Haskell report (which matches most programming 607 | -- languages quite closely). 608 | -- 609 | -- This parser does NOT swallow trailing whitespace 610 | 611 | escapeCode :: forall m. TokenParsing m => m Char 612 | escapeCode = (charEsc <|> charNum <|> charAscii <|> charControl) "escape code" 613 | where 614 | charControl, charNum :: m Char 615 | charControl = (\c -> toEnum (fromEnum c - fromEnum '@')) <$> (char '^' *> (upper <|> char '@')) 616 | charNum = toEnum <$> num 617 | where 618 | num :: m Int 619 | num = bounded 10 maxchar 620 | <|> (char 'o' *> bounded 8 maxchar) 621 | <|> (char 'x' *> bounded 16 maxchar) 622 | maxchar = fromEnum (maxBound :: Char) 623 | 624 | bounded :: Int -> Int -> m Int 625 | bounded base bnd = List.foldl' (\x d -> base * x + digitToInt d) 0 626 | <$> bounded' (take base thedigits) (map digitToInt $ showIntAtBase base intToDigit bnd "") 627 | where 628 | thedigits :: [m Char] 629 | thedigits = map char ['0'..'9'] ++ map oneOf (List.transpose [['A'..'F'],['a'..'f']]) 630 | 631 | toomuch :: m a 632 | toomuch = unexpected "out-of-range numeric escape sequence" 633 | 634 | bounded', bounded'' :: [m Char] -> [Int] -> m [Char] 635 | bounded' dps@(zero:_) bds = skipSome zero *> ([] <$ notFollowedBy (choice dps) <|> bounded'' dps bds) 636 | <|> bounded'' dps bds 637 | bounded' [] _ = error "bounded called with base 0" 638 | bounded'' dps [] = [] <$ notFollowedBy (choice dps) <|> toomuch 639 | bounded'' dps (bd : bds) = let anyd :: m Char 640 | anyd = choice dps 641 | 642 | nomore :: m () 643 | nomore = notFollowedBy anyd <|> toomuch 644 | (low, ex : high) = splitAt bd dps 645 | in ((:) <$> choice low <*> atMost (length bds) anyd) <* nomore 646 | <|> ((:) <$> ex <*> ([] <$ nomore <|> bounded'' dps bds)) 647 | <|> if not (null bds) 648 | then (:) <$> choice high <*> atMost (length bds - 1) anyd <* nomore 649 | else empty 650 | atMost n p | n <= 0 = pure [] 651 | | otherwise = ((:) <$> p <*> atMost (n - 1) p) <|> pure [] 652 | 653 | charEsc :: m Char 654 | charEsc = choice $ parseEsc <$> escMap 655 | 656 | parseEsc (c,code) = code <$ char c 657 | escMap = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'" 658 | 659 | charAscii :: m Char 660 | charAscii = choice $ parseAscii <$> asciiMap 661 | 662 | parseAscii (asc,code) = try $ code <$ string asc 663 | asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2) 664 | ascii2codes, ascii3codes :: [String] 665 | ascii2codes = [ "BS","HT","LF","VT","FF","CR","SO" 666 | , "SI","EM","FS","GS","RS","US","SP"] 667 | ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK" 668 | ,"BEL","DLE","DC1","DC2","DC3","DC4","NAK" 669 | ,"SYN","ETB","CAN","SUB","ESC","DEL"] 670 | ascii2, ascii3 :: String 671 | ascii2 = "\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP" 672 | ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL" 673 | 674 | -- | This parser parses a natural number (a non-negative whole 675 | -- number). Returns the value of the number. The number can be 676 | -- specified in 'decimal', 'hexadecimal' or 677 | -- 'octal'. The number is parsed according to the grammar 678 | -- rules in the Haskell report. 679 | -- 680 | -- This parser does NOT swallow trailing whitespace. 681 | natural' :: TokenParsing m => m Integer 682 | natural' = highlight Number nat "natural" 683 | 684 | number :: TokenParsing m => Integer -> m Char -> m Integer 685 | number base baseDigit = 686 | List.foldl' (\x d -> base*x + toInteger (digitToInt d)) 0 <$> some baseDigit 687 | 688 | -- | This parser parses an integer (a whole number). This parser 689 | -- is like 'natural' except that it can be prefixed with 690 | -- sign (i.e. \'-\' or \'+\'). Returns the value of the number. The 691 | -- number can be specified in 'decimal', 'hexadecimal' 692 | -- or 'octal'. The number is parsed according 693 | -- to the grammar rules in the Haskell report. 694 | -- 695 | -- This parser does NOT swallow trailing whitespace. 696 | -- 697 | -- Also, unlike the 'integer' parser, this parser does not admit spaces 698 | -- between the sign and the number. 699 | 700 | integer' :: TokenParsing m => m Integer 701 | integer' = int "integer" 702 | {-# INLINE integer' #-} 703 | 704 | sign :: TokenParsing m => m (Integer -> Integer) 705 | sign = highlight Operator 706 | $ negate <$ char '-' 707 | <|> id <$ char '+' 708 | <|> pure id 709 | 710 | int :: TokenParsing m => m Integer 711 | int = {-token-} sign <*> highlight Number nat 712 | nat, zeroNumber :: TokenParsing m => m Integer 713 | nat = zeroNumber <|> decimal 714 | zeroNumber = char '0' *> (hexadecimal <|> octal <|> decimal <|> pure 0) "" 715 | 716 | floating :: TokenParsing m => m Scientific 717 | floating = decimal <**> fractExponent 718 | {-# INLINE floating #-} 719 | 720 | fractExponent :: forall m. TokenParsing m => m (Integer -> Scientific) 721 | fractExponent = (\fract expo n -> (fromInteger n + fract) * expo) <$> fraction <*> option 1 exponent' 722 | <|> (\expo n -> fromInteger n * expo) <$> exponent' 723 | where 724 | fraction :: m Scientific 725 | fraction = List.foldl' op 0 <$> (char '.' *> (some digit "fraction")) 726 | 727 | op f d = f + Sci.scientific (fromIntegral (digitToInt d)) (Sci.base10Exponent f - 1) 728 | 729 | exponent' :: m Scientific 730 | exponent' = ((\f e -> power (f e)) <$ oneOf "eE" <*> sign <*> (decimal "exponent")) "exponent" 731 | 732 | power = Sci.scientific 1 . fromInteger 733 | 734 | 735 | natFloating, zeroNumFloat, decimalFloat :: TokenParsing m => m (Either Integer Scientific) 736 | natFloating 737 | = char '0' *> zeroNumFloat 738 | <|> decimalFloat 739 | zeroNumFloat 740 | = Left <$> (hexadecimal <|> octal) 741 | <|> decimalFloat 742 | <|> pure 0 <**> try fractFloat 743 | <|> pure (Left 0) 744 | decimalFloat = decimal <**> option Left (try fractFloat) 745 | 746 | fractFloat :: TokenParsing m => m (Integer -> Either Integer Scientific) 747 | fractFloat = (Right .) <$> fractExponent 748 | {-# INLINE fractFloat #-} 749 | 750 | -- | Parses a non-negative whole number in the decimal system. Returns the 751 | -- value of the number. 752 | -- 753 | -- This parser does NOT swallow trailing whitespace 754 | decimal :: TokenParsing m => m Integer 755 | decimal = number 10 digit 756 | {-# INLINE decimal #-} 757 | 758 | -- | Parses a non-negative whole number in the hexadecimal system. The number 759 | -- should be prefixed with \"x\" or \"X\". Returns the value of the 760 | -- number. 761 | -- 762 | -- This parser does NOT swallow trailing whitespace 763 | hexadecimal :: TokenParsing m => m Integer 764 | hexadecimal = oneOf "xX" *> number 16 hexDigit 765 | {-# INLINE hexadecimal #-} 766 | 767 | -- | Parses a non-negative whole number in the octal system. The number 768 | -- should be prefixed with \"o\" or \"O\". Returns the value of the 769 | -- number. 770 | -- 771 | -- This parser does NOT swallow trailing whitespace 772 | octal :: TokenParsing m => m Integer 773 | octal = oneOf "oO" *> number 8 octDigit 774 | {-# INLINE octal #-} 775 | 776 | -- | This is a parser transformer you can use to disable syntax highlighting 777 | -- over a range of text you are parsing. 778 | newtype Unhighlighted m a = Unhighlighted { runUnhighlighted :: m a } 779 | deriving (Functor,Applicative,Alternative,Monad,MonadPlus,CharParsing) 780 | 781 | instance Parsing m => Parsing (Unhighlighted m) where 782 | try (Unhighlighted m) = Unhighlighted $ try m 783 | {-# INLINE try #-} 784 | Unhighlighted m l = Unhighlighted $ m l 785 | {-# INLINE () #-} 786 | unexpected = Unhighlighted . unexpected 787 | {-# INLINE unexpected #-} 788 | eof = Unhighlighted eof 789 | {-# INLINE eof #-} 790 | notFollowedBy (Unhighlighted m) = Unhighlighted $ notFollowedBy m 791 | {-# INLINE notFollowedBy #-} 792 | 793 | 794 | instance MonadTrans Unhighlighted where 795 | lift = Unhighlighted 796 | {-# INLINE lift #-} 797 | 798 | instance MonadState s m => MonadState s (Unhighlighted m) where 799 | get = lift Class.get 800 | {-# INLINE get #-} 801 | put = lift . Class.put 802 | {-# INLINE put #-} 803 | 804 | instance MonadReader e m => MonadReader e (Unhighlighted m) where 805 | ask = lift Class.ask 806 | {-# INLINE ask #-} 807 | local f = Unhighlighted . Class.local f . runUnhighlighted 808 | {-# INLINE local #-} 809 | 810 | instance MonadWriter e m => MonadWriter e (Unhighlighted m) where 811 | tell = lift . Class.tell 812 | {-# INLINE tell #-} 813 | listen = Unhighlighted . Class.listen . runUnhighlighted 814 | {-# INLINE listen #-} 815 | pass = Unhighlighted . Class.pass . runUnhighlighted 816 | {-# INLINE pass #-} 817 | 818 | instance TokenParsing m => TokenParsing (Unhighlighted m) where 819 | nesting (Unhighlighted m) = Unhighlighted (nesting m) 820 | {-# INLINE nesting #-} 821 | someSpace = Unhighlighted someSpace 822 | {-# INLINE someSpace #-} 823 | semi = Unhighlighted semi 824 | {-# INLINE semi #-} 825 | highlight _ m = m 826 | {-# INLINE highlight #-} 827 | 828 | -- | This is a parser transformer you can use to disable the automatic trailing 829 | -- space consumption of a Token parser. 830 | newtype Unspaced m a = Unspaced { runUnspaced :: m a } 831 | deriving (Functor,Applicative,Alternative,Monad,MonadPlus,CharParsing) 832 | 833 | instance Parsing m => Parsing (Unspaced m) where 834 | try (Unspaced m) = Unspaced $ try m 835 | {-# INLINE try #-} 836 | Unspaced m l = Unspaced $ m l 837 | {-# INLINE () #-} 838 | unexpected = Unspaced . unexpected 839 | {-# INLINE unexpected #-} 840 | eof = Unspaced eof 841 | {-# INLINE eof #-} 842 | notFollowedBy (Unspaced m) = Unspaced $ notFollowedBy m 843 | {-# INLINE notFollowedBy #-} 844 | 845 | instance MonadTrans Unspaced where 846 | lift = Unspaced 847 | {-# INLINE lift #-} 848 | 849 | instance MonadState s m => MonadState s (Unspaced m) where 850 | get = lift Class.get 851 | {-# INLINE get #-} 852 | put = lift . Class.put 853 | {-# INLINE put #-} 854 | 855 | instance MonadReader e m => MonadReader e (Unspaced m) where 856 | ask = lift Class.ask 857 | {-# INLINE ask #-} 858 | local f = Unspaced . Class.local f . runUnspaced 859 | {-# INLINE local #-} 860 | 861 | instance MonadWriter e m => MonadWriter e (Unspaced m) where 862 | tell = lift . Class.tell 863 | {-# INLINE tell #-} 864 | listen = Unspaced . Class.listen . runUnspaced 865 | {-# INLINE listen #-} 866 | pass = Unspaced . Class.pass . runUnspaced 867 | {-# INLINE pass #-} 868 | 869 | instance TokenParsing m => TokenParsing (Unspaced m) where 870 | nesting (Unspaced m) = Unspaced (nesting m) 871 | {-# INLINE nesting #-} 872 | someSpace = empty 873 | {-# INLINE someSpace #-} 874 | semi = Unspaced semi 875 | {-# INLINE semi #-} 876 | highlight h (Unspaced m) = Unspaced (highlight h m) 877 | {-# INLINE highlight #-} 878 | 879 | -- | This is a parser transformer you can use to disable the automatic trailing 880 | -- newline (but not whitespace-in-general) consumption of a Token parser. 881 | newtype Unlined m a = Unlined { runUnlined :: m a } 882 | deriving (Functor,Applicative,Alternative,Monad,MonadPlus,CharParsing) 883 | 884 | instance Parsing m => Parsing (Unlined m) where 885 | try (Unlined m) = Unlined $ try m 886 | {-# INLINE try #-} 887 | Unlined m l = Unlined $ m l 888 | {-# INLINE () #-} 889 | unexpected = Unlined . unexpected 890 | {-# INLINE unexpected #-} 891 | eof = Unlined eof 892 | {-# INLINE eof #-} 893 | notFollowedBy (Unlined m) = Unlined $ notFollowedBy m 894 | {-# INLINE notFollowedBy #-} 895 | 896 | instance MonadTrans Unlined where 897 | lift = Unlined 898 | {-# INLINE lift #-} 899 | 900 | instance MonadState s m => MonadState s (Unlined m) where 901 | get = lift Class.get 902 | {-# INLINE get #-} 903 | put = lift . Class.put 904 | {-# INLINE put #-} 905 | 906 | instance MonadReader e m => MonadReader e (Unlined m) where 907 | ask = lift Class.ask 908 | {-# INLINE ask #-} 909 | local f = Unlined . Class.local f . runUnlined 910 | {-# INLINE local #-} 911 | 912 | instance MonadWriter e m => MonadWriter e (Unlined m) where 913 | tell = lift . Class.tell 914 | {-# INLINE tell #-} 915 | listen = Unlined . Class.listen . runUnlined 916 | {-# INLINE listen #-} 917 | pass = Unlined . Class.pass . runUnlined 918 | {-# INLINE pass #-} 919 | 920 | instance TokenParsing m => TokenParsing (Unlined m) where 921 | nesting (Unlined m) = Unlined (nesting m) 922 | {-# INLINE nesting #-} 923 | someSpace = skipMany (satisfy $ \c -> c /= '\n' && isSpace c) 924 | {-# INLINE someSpace #-} 925 | semi = Unlined semi 926 | {-# INLINE semi #-} 927 | highlight h (Unlined m) = Unlined (highlight h m) 928 | {-# INLINE highlight #-} 929 | 930 | #ifdef MIN_VERSION_parsec 931 | instance Parsec.Stream s m Char => TokenParsing (Parsec.ParsecT s u m) 932 | #endif 933 | 934 | #ifdef MIN_VERSION_attoparsec 935 | instance Att.Chunk t => TokenParsing (Att.Parser t) 936 | #endif 937 | 938 | instance TokenParsing ReadP.ReadP 939 | --------------------------------------------------------------------------------