├── .gitignore ├── ChangeLog.md ├── LICENSE ├── README.md ├── Setup.hs ├── bin ├── make-pkg └── test-all ├── docs ├── IntPair_submachine.png ├── MyChar_ATN.png ├── PExpression_ATN.png ├── PTuple_1.png ├── PTuple_2.png ├── PTuple_3.png ├── PTuple_4.png └── pads_to_atn_conversion.md ├── lang_demo.hs ├── package.yaml ├── src ├── Data │ └── Set │ │ └── Monad.hs ├── Language │ ├── ANTLR4.hs │ └── ANTLR4 │ │ ├── Boot │ │ ├── Parser.hs.boot │ │ ├── Quote.hs │ │ ├── SplicedParser.hs │ │ └── Syntax.hs │ │ ├── FileOpener.hs │ │ ├── G4.hs │ │ ├── Parser.hs │ │ ├── Parser.hs.bak │ │ ├── Regex.hs.boot │ │ └── Syntax.hs └── Text │ └── ANTLR │ ├── Allstar.hs │ ├── Allstar │ ├── ATN.hs │ ├── ParserGenerator.hs │ └── Stacks.hs │ ├── Common.hs │ ├── Grammar.hs │ ├── LL1.hs │ ├── LR.hs │ ├── Language.hs │ ├── Lex.hs │ ├── Lex │ ├── Automata.hs │ ├── DFA.hs │ ├── NFA.hs │ ├── Regex.hs │ └── Tokenizer.hs │ ├── MultiMap.hs │ ├── Parser.hs │ ├── Pretty.hs │ └── Set.hs ├── stack.yaml └── test ├── GrammarReader ├── GrammarReader.hs ├── output │ └── generated_atn_lookup.txt └── sample_grammars │ └── sample_bnf_grammar.txt ├── allstar ├── AllStarTests.hs ├── ConvertDFA.hs ├── ConvertP.hs ├── Main.hs └── README.md ├── atn ├── ATN.hs └── Main.hs ├── c ├── C.g4 ├── CParser.hs └── Main.hs ├── chisel ├── Language │ └── Chisel │ │ ├── Grammar.hs │ │ ├── Parser.hs │ │ └── Syntax.hs └── Main.hs ├── coreg4 ├── G4.hs ├── G4Fast.hs ├── G4Parser.hs ├── Hello.hs ├── HelloParser.hs └── Main.hs ├── g4 ├── DoubleSemi.hs ├── DoubleSemiP.hs ├── Empty.hs ├── EmptyP.hs ├── Main.hs ├── Optional.hs └── OptionalParser.hs ├── lexer └── Main.hs ├── ll └── Main.hs ├── lr ├── EOF.hs ├── EOFGrammar.hs ├── GLRInc.hs ├── GLRIncGrammar.hs ├── GLRPartial.hs ├── GLRPartialGrammar.hs └── Main.hs ├── sexpression ├── Grammar.hs ├── Parser.hs └── sexpression.hs ├── shared-hunit └── Text │ └── ANTLR │ └── HUnit.hs ├── shared └── Example │ └── Grammar.hs ├── simple ├── Grammar.hs └── Main.hs ├── swift ├── Grammar.hs ├── Parser.hs ├── Swift.g4 └── swift.hs ├── template └── Main.hs ├── unit ├── Main.hs └── PlusBug0.hs └── unit0 ├── DupTerms.hs ├── DupTermsGrammar.hs ├── Main.hs ├── Star0.hs ├── Star0Grammar.hs ├── Star1.hs └── Star1Grammar.hs /.gitignore: -------------------------------------------------------------------------------- 1 | /*.local 2 | /stack.yaml.lock 3 | cabal.config 4 | antlr-haskell.cabal 5 | /books 6 | .stack-work/ 7 | dist 8 | /dist-* 9 | cabal-dev 10 | *.o 11 | *.hi 12 | *.chi 13 | *.chs.h 14 | *.dyn_o 15 | *.dyn_hi 16 | .virtualenv 17 | .hpc 18 | .hsenv 19 | .cabal-sandbox/ 20 | cabal.sandbox.config 21 | *.prof 22 | *.aux 23 | *.hp 24 | *~ 25 | -------------------------------------------------------------------------------- /ChangeLog.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | * April 14, 2018: Moved to hpack's package.yaml format instead of native cabal 4 | file. 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017-2018, Karl Cronburg & Sam Lasser 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Karl Cronburg, Sam Lasser, nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # antlr-haskell 2 | A Haskell implementation of ANTLR. 3 | 4 | In implementing ANTLR we referenced the behavior of the original Java version 5 | (ANTLR4): 6 | [The definitive ANTLR4 Reference.](https://pragprog.com/book/tpantlr2/the-definitive-antlr-4-reference) 7 | However we have taken much liberty in the design of this library compared to the 8 | workflow of the original Java version. In particular in implementing ANTLR for 9 | Haskell we have followed the following principles: 10 | 11 | - Parsing backends should be interchangeable 12 | - GLR, LR, SLR, LL, ALL(\*) 13 | - Code should be first class and declarative 14 | - The implementation of G4 is metacircular 15 | - Regular expressions are interpreted 16 | - Implement algorithms from first principles 17 | - Set notation is used in implementing LL and LR algorithms. 18 | - Pure functional implementations of parsing algorithms can eventually support 19 | embedding of arbitrary (including IO) actions without breaking the predictive 20 | parsing abstraction. 21 | 22 | More info can be found here: 23 | [https://www.cronburg.com/2018/antlr-haskell-project/](https://www.cronburg.com/2018/antlr-haskell-project/) 24 | 25 | ## Build instructions 26 | 27 | The library can be built with: 28 | 29 | ```bash 30 | $ stack build # stack version 2.3.3 31 | $ stack test :simple 32 | ``` 33 | 34 | Or with cabal (tested on 3.0.0.0) like so: 35 | 36 | ```bash 37 | $ hpack 38 | $ cabal update 39 | $ cabal configure 40 | $ cabal new-build 41 | $ cabal test sexpression 42 | ... 43 | Test suite sexpression: RUNNING... 44 | Test suite sexpression: PASS 45 | Test suite logged to: 46 | /antlr-haskell/dist-newstyle/build/x86_64-linux/ghc-8.6.5/antlr-haskell-0.1.0.1/t/sexpression/test/antlr-haskell-0.1.0.1-sexpression.log 47 | 1 of 1 test suites (1 of 1 test cases) passed. 48 | ``` 49 | 50 | Here's a good one to run when making changes to the library, and you're unsure 51 | of what may become affected by those changes: 52 | 53 | ```bash 54 | stack test :simple :atn :ll :lr :sexpression :allstar :c 55 | ``` 56 | 57 | And then compare the results with that of this upstream branch. Some of the 58 | GLR features (incremental and partial tokenization, notably) are still experimental, 59 | and so there are known test cases which currently fail. 60 | 61 | ## Version History 62 | 63 | - September 25th, 2020. Released version 0.1.0.1: bug fixes, documentation, and 64 | library versioning updates. 65 | 66 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /bin/make-pkg: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | SCRIPTDIR="$(cd "`dirname $0`"; pwd)" 3 | GITDIR="$(dirname "$SCRIPTDIR")" 4 | 5 | # ============================================================================= 6 | # MODIFY THESE PARAMETERS EACH TIME YOU PUBLISH A NEW VERSION: 7 | arch=x86_64-linux-tinfo6 8 | cVersion=3.0.1.0 9 | pVersion=0.1.0.1 10 | # ============================================================================= 11 | 12 | gzDir=$GITDIR/.stack-work/dist/$arch/Cabal-$cVersion 13 | gzFile=antlr-haskell-$pVersion.tar.gz 14 | 15 | echo "Changing to directory $GITDIR" 16 | cd "$GITDIR" 17 | 18 | # Generates .stack-work/dist/[arch]/Cabal-[version]/antlr-haskell-[version].tar.gz : 19 | echo "Generating $gzFile" 20 | stack sdist --pvp-bounds both 21 | 22 | # Generates .stack-work/dist/[arch]/Cabal-[version]/doc/html/antlr-haskell/* : 23 | echo "Generating haddock documentation" 24 | stack haddock --haddock-hyperlink-source # --no-haddock-deps 25 | 26 | # Copy source gzipped tar file to dist: 27 | mkdir -p dist 28 | cp $gzDir/$gzFile dist/ 29 | 30 | # Copy over the haddock documentation: 31 | destDir=dist/antlr-haskell-$pVersion/antlr-haskell-$pVersion-docs 32 | mkdir -p $destDir 33 | cp -r $gzDir/doc/html/antlr-haskell $destDir 34 | 35 | # Generate tar file to upload to hackage: 36 | echo "Making haddock tar file for *manual* upload to https://hackage.haskell.org/package/antlr-haskell-$pVersion/maintain/docs" 37 | tarFile=antlr-haskell-$pVersion-docs.tar 38 | tar --format=ustar -cvf dist/$tarFile $destDir | grep index.html 39 | 40 | -------------------------------------------------------------------------------- /bin/test-all: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # && stack test antlr-haskell:Stacks \ 4 | 5 | OPTS=--ghc-options="-ddump-splices" 6 | 7 | #TESTS="simple template atn coreg4 ll lr lexer g4 sexpression simple allstar chisel" 8 | TESTS=$(cd test 9 | for f in *; do 10 | if ! [[ "$f" = "shared"* ]]; then 11 | echo -n "$f " 12 | fi 13 | done) 14 | failures="" 15 | echo "Running tests: $TESTS" 16 | 17 | for t in $TESTS; do 18 | stack test antlr-haskell:$t $OPTS 19 | if [ $? -eq 1 ]; then 20 | count=$((count + 1)) 21 | failures="$t $failures" 22 | fi 23 | done 24 | echo "Total number of failures: $count" 25 | echo "Failed on: " $failures 26 | 27 | -------------------------------------------------------------------------------- /docs/IntPair_submachine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/IntPair_submachine.png -------------------------------------------------------------------------------- /docs/MyChar_ATN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/MyChar_ATN.png -------------------------------------------------------------------------------- /docs/PExpression_ATN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PExpression_ATN.png -------------------------------------------------------------------------------- /docs/PTuple_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PTuple_1.png -------------------------------------------------------------------------------- /docs/PTuple_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PTuple_2.png -------------------------------------------------------------------------------- /docs/PTuple_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PTuple_3.png -------------------------------------------------------------------------------- /docs/PTuple_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cronburg/antlr-haskell/7a9367038eaa58f9764f2ff694269245fbebc155/docs/PTuple_4.png -------------------------------------------------------------------------------- /lang_demo.hs: -------------------------------------------------------------------------------- 1 | {-#LANGUAGE TypeFamilies #-} 2 | module LangDemo where 3 | 4 | -- If you never change the base value and are only adding expressions 5 | data BaseVal = IntV Int 6 | 7 | data BaseExp = ValE BaseVal 8 | | AddE BaseExp BaseExp 9 | 10 | data ExtExp = BaseE BaseExp 11 | | SubE ExtExp ExtExp 12 | 13 | 14 | 15 | class Lang lang where 16 | eval :: lang -> BaseVal 17 | 18 | instance Lang BaseExp where 19 | eval (ValE i) = i 20 | eval (AddE be1 be2) = 21 | let IntV i1 = eval be1 22 | IntV i2 = eval be2 23 | in IntV $ i1 + i2 24 | 25 | instance Lang ExtExp where 26 | eval (BaseE b) = eval b 27 | eval (SubE ee1 ee2) = 28 | let IntV i1 = eval ee1 29 | IntV i2 = eval ee2 30 | in IntV $ i1 - i2 31 | 32 | -- if you want to be able to change the set of values you can evaluate to when extending the language 33 | 34 | data BaseVal2 = IntV2 Int 35 | 36 | data BaseExp2 = ValBE2 BaseVal2 37 | | AddE2 BaseExp2 BaseExp2 38 | 39 | data ExtVal2 = StringV2 String 40 | 41 | data ExtExp2 = BaseE2 BaseExp2 42 | | ValEE2 ExtVal2 43 | | ConcatE2 ExtExp2 ExtExp2 44 | 45 | -- can use type families to suspend the value kind until later. 46 | data family Val2 47 | 48 | class Lang2 lang where 49 | eval :: lang -> Val2 50 | 51 | -- define at some time t1 for the base language 52 | data instance Val2 = BV2 BaseVal2 53 | 54 | instance Lang2 BaseExp2 where 55 | eval ValBE2 bv2 = BV2 bv2 56 | eval AddE2 be1 be2 = 57 | let IntV2 i1 = eval be1 58 | IntV2 i2 = eval be2 59 | in BV2 $ IntV2 $ i1 + i2 60 | 61 | 62 | -- extend at some later time t2 for the extended language 63 | data instance Val2 = EV2 ExtVal2 64 | 65 | instance Lang2 ExtExp2 where 66 | eval BaseE2 be2 = eval be2 67 | eval ValEE2 ev2 = ev2 68 | eval ConcatE2 ee1 ee2 = 69 | case (eval ee1, eval ee2) of 70 | (StringV2 s1, StringV2 s2) -> EV2 $ StringV2 $ s1 ++ s2 71 | _ -> error "trying to concat something other than strings!" 72 | 73 | -------------------------------------------------------------------------------- /src/Language/ANTLR4.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.ANTLR4 3 | Description : Primary entrypoint for top-level antlr-haskell users 4 | Copyright : (c) Karl Cronburg, 2018 5 | License : BSD3 6 | Maintainer : karl@cs.tufts.edu 7 | Stability : experimental 8 | Portability : POSIX 9 | -} 10 | module Language.ANTLR4 ( 11 | -- * Functions 12 | -- | Compile-time support for expanding LR-specific data types: 13 | mkLRParser 14 | -- | Other basic functions used in generated code: 15 | , (&&&) 16 | -- * Module exports 17 | -- | Most importantly for the Grammar type so that the quasiquoter can generate 18 | -- new grammar itself: 19 | , module Text.ANTLR.Grammar 20 | -- | Supporting data types and instances so that the spliced AST translator 21 | -- functions can talk about parse events, tokens, and EOF: 22 | , module Text.ANTLR.Parser 23 | -- | Regular expressions used during tokenization, as opposed to 24 | -- 'Language.ANTLR4.Regex' which are regexes used for G4 parsing: 25 | , module Text.ANTLR.Lex.Regex 26 | -- | The G4 quasiquoter and accompanying grammar: 27 | , g4, g4_parsers 28 | -- | For defining pretty-printable instances of quasiquoter-generated data types: 29 | , module Text.ANTLR.Pretty 30 | -- | Tokenizer: 31 | , module T 32 | -- * Type exports 33 | -- | Typeclass instances for quasiquoter-generated data types: 34 | , Hashable(..), Generic(..), Data(..), Lift(..) 35 | -- | Parser interface data types: 36 | , S.Set(..), T.Token(..), LRResult(..) 37 | -- | Grammar interface data types: 38 | , Directive(..), PRHS(..), TermAnnot(..) 39 | ) 40 | where 41 | 42 | import Text.ANTLR.Grammar 43 | import Text.ANTLR.Parser 44 | 45 | import Text.ANTLR.LR as LR 46 | import Text.ANTLR.Lex.Tokenizer as T 47 | import Text.ANTLR.Set as S 48 | 49 | import Text.ANTLR.Set (Hashable(..), Generic(..)) 50 | import Text.ANTLR.Pretty 51 | import Control.Arrow ( (&&&) ) 52 | import Text.ANTLR.Lex.Regex 53 | 54 | import Language.ANTLR4.G4 55 | import Language.ANTLR4.Parser 56 | import Language.ANTLR4.Boot.Quote (mkLRParser, g4_parsers) 57 | 58 | import Data.Data (Data(..)) 59 | import Language.Haskell.TH.Lift (Lift(..)) 60 | 61 | import Language.ANTLR4.Boot.Syntax 62 | (Directive(..), PRHS(..), TermAnnot(..), ProdElem(..)) 63 | 64 | -------------------------------------------------------------------------------- /src/Language/ANTLR4/Boot/Parser.hs.boot: -------------------------------------------------------------------------------- 1 | {-#LANGUAGE QuasiQuotes, TemplateHaskell#-} 2 | {-| 3 | Module : Language.ANTLR4.Boot.Parser 4 | Description : ANTLR4 boot parser written with parsec 5 | Copyright : (c) Karl Cronburg, 2018 6 | License : BSD3 7 | Maintainer : karl@cs.tufts.edu 8 | Stability : experimental 9 | Portability : POSIX 10 | -} 11 | module Language.ANTLR4.Boot.Parser where 12 | -- syntax (Exp) 13 | import Language.Haskell.TH 14 | import Language.Haskell.TH.Syntax 15 | import qualified Debug.Trace as D (trace, traceM) 16 | 17 | import qualified Language.Haskell.Meta as LHM 18 | 19 | -- monadic ops 20 | import Control.Monad (mapM) 21 | -- parsec 22 | import Text.ParserCombinators.Parsec 23 | import qualified Text.Parsec.String as PS 24 | import qualified Text.Parsec.Prim as PP 25 | import qualified Text.Parsec.Token as PT 26 | import qualified Text.Parsec.Expr as PE 27 | import qualified Text.Parsec.Combinator as PC 28 | import Text.ParserCombinators.Parsec.Language (haskellStyle, reservedOpNames, reservedNames 29 | , commentLine, commentStart, commentEnd) 30 | import Text.ParserCombinators.Parsec.Pos (newPos) 31 | -- text munging 32 | import Data.Char 33 | 34 | import Language.ANTLR4.Boot.Syntax 35 | import Language.ANTLR4.Regex (parseRegex, regexP) 36 | 37 | --traceM s = D.traceM ("[ANTLR4.Boot.Parser] " ++ s) 38 | traceM = return 39 | 40 | ------------------------------------------------------------------------------ 41 | -- Or-Try Combinator (tries two parsers, one after the other) 42 | (<||>) a b = try a <|> try b 43 | 44 | parseANTLR :: SourceName -> Line -> Column -> String -> Either ParseError [G4] 45 | parseANTLR fileName line column input = 46 | PP.parse result fileName input 47 | where 48 | 49 | result = do 50 | setPosition (newPos fileName line column) 51 | whiteSpace 52 | x <- gExps 53 | traceM $ show x 54 | eof <|> errorParse 55 | return x 56 | 57 | errorParse = do 58 | rest <- manyTill anyToken eof 59 | unexpected $ '"' : rest ++ "\"" 60 | 61 | gExps :: PS.Parser [G4] 62 | gExps = concat <$> many1 gExp 63 | 64 | gExp :: PS.Parser [G4] 65 | gExp = do 66 | traceM "gExp" 67 | whiteSpace 68 | xs <- grammarP <||> lexerP <||> prodP 69 | traceM $ show xs 70 | return xs 71 | 72 | grammarP :: PS.Parser [G4] 73 | grammarP = do 74 | reserved "grammar" 75 | h <- upper 76 | t <- manyTill anyToken (reservedOp ";") 77 | traceM $ show $ Grammar (h : t) 78 | return [Grammar (h : t)] 79 | 80 | -- | Assumptions: 81 | -- 82 | -- * Directives must be on a single line. 83 | -- * 84 | prodP :: PS.Parser [G4] 85 | prodP = do 86 | h <- lower 87 | t <- manyTill anyChar (reservedOp ":") 88 | traceM $ "[prodP] " ++ trim (h : t) 89 | rhsList <- sepBy1 rhsP (traceM "rhsList..." >> reservedOp "|") 90 | traceM $ "[prodP.rhsList] " ++ show rhsList 91 | reservedOp ";" 92 | traceM "prodP returning..." 93 | return [Prod (trim (h : t)) (concat rhsList)] 94 | where 95 | rhsP = do 96 | mPred <- optionMaybe predP 97 | traceM $ "[rhsP0] " ++ show mPred 98 | alphaList <- many alphaP 99 | traceM $ "[rhsP] " ++ show alphaList 100 | mMute <- optionMaybe muteP 101 | pDir <- optionMaybe directiveP 102 | whiteSpace 103 | return [PRHS alphaList mPred mMute pDir] 104 | alphaP = termP <||> nonTermP 105 | termP = do 106 | whiteSpace 107 | char '\'' 108 | traceM "[prodP.termP.s] " 109 | s <- manyTill anyChar $ char '\'' 110 | whiteSpace 111 | traceM $ "[prodP.termP.s] " ++ show s 112 | return $ GTerm NoAnnot s 113 | nonTermP = do 114 | s <- identifier 115 | traceM $ "[nonTermP] " ++ s 116 | return $ GNonTerm NoAnnot s 117 | predP = do 118 | traceM "[predP]" 119 | reservedOp "{" 120 | haskellParseExpTill "}?" 121 | muteP = do 122 | traceM "[muteP]" 123 | reservedOp "{" 124 | haskellParseExpTill "}" 125 | directiveP = do 126 | whiteSpace 127 | symbol "->" 128 | whiteSpace 129 | str <- manyTill anyChar (char '\n') 130 | whiteSpace 131 | traceM $ "[directiveP]" ++ show str 132 | return (toDirective $ trim $ str) 133 | 134 | -- TODO: not use getInput 135 | rEOF = do 136 | y <- getInput 137 | return (case y of 138 | '-':'>':_ -> True 139 | ';':_ -> True 140 | _ -> False) 141 | 142 | toDirective [] = LowerD [] 143 | toDirective s@(h:rst) 144 | | isUpper h = UpperD s 145 | | otherwise = LowerD s 146 | 147 | lexerP :: PS.Parser [G4] 148 | lexerP = do 149 | mAnnot <- optionMaybe annot 150 | h <- upper 151 | t <- manyTill anyChar (reservedOp ":") 152 | traceM $ "Lexeme Name: " ++ (h:t) 153 | r <- regexP rEOF 154 | traceM $ "Regex: " ++ show r 155 | optionMaybe $ symbol "->" 156 | mDir <- optionMaybe $ manyTill anyToken (reservedOp ";") 157 | return $ [Lex mAnnot (trim (h : t)) (LRHS r (toDirective <$> trim <$> mDir))] 158 | where 159 | annot = fragment -- <||> .... 160 | fragment = do 161 | reserved "fragment" 162 | return Fragment 163 | 164 | -- Parser combinators end 165 | haskellParseExpTill :: String -> PS.Parser Exp 166 | haskellParseExpTill op = do { 167 | _ <- whiteSpace 168 | ; str <- manyTill anyChar (reservedOp op) 169 | ; haskellParseExp str 170 | } 171 | haskellParseExp :: String -> PS.Parser Exp 172 | haskellParseExp str = 173 | case LHM.parseExp str of 174 | Left err -> error err -- PP.parserZero 175 | Right expTH -> return expTH 176 | 177 | whiteSpaceOrComment = comment <||> whiteSpace 178 | where 179 | comment = do 180 | whiteSpace 181 | reservedOp "//" 182 | (manyTill anyChar $ try $ string "\n") <||> (manyTill anyChar $ try $ string "\r") 183 | return () 184 | 185 | ------------------------------------------------------------------------------ 186 | -- Lexer 187 | lexer :: PT.TokenParser () 188 | lexer = PT.makeTokenParser $ haskellStyle 189 | { reservedOpNames = [";", "|", ":", "{", "}", "}?", "'"] 190 | , reservedNames = ["grammar"] 191 | , commentLine = "//" 192 | , commentStart = "/*" 193 | , commentEnd = "*/" 194 | } 195 | 196 | whiteSpace = PT.whiteSpace lexer 197 | identifier = PT.identifier lexer 198 | operator = PT.operator lexer 199 | reserved = PT.reserved lexer 200 | reservedOp = PT.reservedOp lexer 201 | charLiteral = PT.charLiteral lexer 202 | stringLiteral = PT.stringLiteral lexer 203 | integer = PT.integer lexer 204 | natural = PT.natural lexer 205 | commaSep1 = PT.commaSep1 lexer 206 | parens = PT.parens lexer 207 | braces = PT.braces lexer 208 | brackets = PT.brackets lexer 209 | symbol = PT.symbol lexer 210 | 211 | expr = PE.buildExpressionParser table term 212 | "expression" 213 | term = natural 214 | "simple expression" 215 | table = [ [prefix "-" negate, prefix "+" id ] ] 216 | prefix name fun = PE.Prefix $ reservedOp name >> return fun 217 | 218 | -- http://stackoverflow.com/a/6270382 219 | trim xs = dropSpaceTail "" $ dropWhile isSpace xs 220 | 221 | dropSpaceTail maybeStuff "" = "" 222 | dropSpaceTail maybeStuff (x:xs) 223 | | isSpace x = dropSpaceTail (x:maybeStuff) xs 224 | | null maybeStuff = x : dropSpaceTail "" xs 225 | | otherwise = reverse maybeStuff ++ x : dropSpaceTail "" xs 226 | -------------------------------------------------------------------------------- /src/Language/ANTLR4/Boot/Syntax.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveLift, DeriveAnyClass, DeriveGeneric, OverloadedStrings #-} 2 | {-| 3 | Module : Language.ANTLR4.Boot.Syntax 4 | Description : Both the boot and core syntax data types for G4 5 | Copyright : (c) Karl Cronburg, 2018 6 | License : BSD3 7 | Maintainer : karl@cs.tufts.edu 8 | Stability : experimental 9 | Portability : POSIX 10 | -} 11 | module Language.ANTLR4.Boot.Syntax 12 | ( G4(..), PRHS(..), ProdElem(..), GAnnot(..) 13 | , Directive(..) 14 | , LRHS(..), Regex(..), isGTerm, isGNonTerm 15 | , TermAnnot(..), isMaybeAnnot, isNoAnnot, annot 16 | , prodElemSymbol 17 | ) where 18 | import Text.ANTLR.Grammar () 19 | import Language.Haskell.TH.Lift (Lift(..)) 20 | 21 | import Language.Haskell.TH.Syntax (Exp) 22 | import qualified Language.Haskell.TH.Syntax as S 23 | 24 | import Text.ANTLR.Set ( Hashable(..), Generic(..) ) 25 | import Text.ANTLR.Pretty -- (rshow, Prettify(..), pStr, pshow, pStr') 26 | 27 | -- | .g4 style syntax representation 28 | data G4 = -- | Grammar name declaration in g4 29 | Grammar { gName :: String -- ^ Name 30 | } 31 | -- | One or more g4 productions 32 | | Prod { pName :: String -- ^ Production's name 33 | , patterns :: [PRHS] -- ^ List of rules to match on 34 | } 35 | -- | A single, possibly annotated, g4 lexical rule 36 | | Lex { annotation :: Maybe GAnnot -- ^ Lexical annotation (@fragment@) 37 | , lName :: String -- ^ Lexical rule name 38 | , pattern :: LRHS -- ^ The regex to match on 39 | } 40 | deriving (Show, Eq, Lift, Generic, Hashable) 41 | 42 | instance Prettify G4 where 43 | prettify (Grammar gn) = do 44 | pStr "grammar " 45 | pStr' gn 46 | prettify (Prod n ps) = do 47 | pStr' n 48 | pStr " -> " 49 | incrIndent $ length n + 4 50 | pListLines ps 51 | incrIndent $ 0 - (length n + 4) 52 | prettify (Lex annot ln (LRHS regex dir)) = do 53 | pStr' ln 54 | pStr " -> " 55 | incrIndent $ length ln + 4 56 | prettify regex 57 | incrIndent $ 0 - (length ln + 4) 58 | pStr "(" 59 | prettify dir 60 | pStr ")" 61 | 62 | 63 | instance Lift Exp 64 | 65 | -- | The right-hand side of a G4 production rule. 66 | data PRHS = PRHS 67 | { alphas :: [ProdElem] -- ^ In-order list of elements defining this rule 68 | , pred :: Maybe Exp -- ^ Arbitrary boolean predicate to test whether or not this rule should fire 69 | , mutator :: Maybe Exp -- ^ Arbitrary mutator to run when this rule fires 70 | , pDirective :: Maybe Directive -- ^ How to construct a Haskell type when this rules fires 71 | } deriving (Show, Eq, Lift, Generic) 72 | 73 | instance Prettify PRHS where 74 | prettify (PRHS as pred mut pDir) = do 75 | prettify as 76 | pStr "(" 77 | pStr' $ show pred; pStr "," 78 | pStr' $ show mut; pStr "," 79 | prettify pDir; pStr ")" 80 | 81 | -- | Antiquoted (or g4-embedded) string that goes to the right of an arrow in 82 | -- a g4 production rule. This specifies how to construct a Haskell type. 83 | data Directive = 84 | UpperD String -- ^ Probably a Haskell data constructor 85 | | LowerD String -- ^ Probably just a Haskell function to call 86 | | HaskellD String -- ^ Arbitrary antiquoted Haskell code embedded in the G4 grammar 87 | deriving (Show, Eq, Ord, Lift, Generic, Hashable) 88 | 89 | instance Prettify Directive where prettify = rshow 90 | 91 | instance Hashable PRHS where 92 | hashWithSalt salt prhs = salt `hashWithSalt` alphas prhs 93 | 94 | -- | Annotations on a term (nonterminal or terminal) for extending our G4 95 | -- BNF-like syntax with regular expression modifiers. 96 | data TermAnnot = 97 | Regular Char -- ^ Regular expression modifier (e.g. +, ?, *) 98 | | NoAnnot -- ^ Term is not annotated with anything 99 | deriving (Show, Eq, Ord, Lift, Generic, Hashable) 100 | 101 | instance Prettify TermAnnot where 102 | prettify NoAnnot = return () 103 | prettify (Regular c) = pStr' [c] 104 | 105 | -- | Get the annotation from a 'ProdElem' 106 | annot :: ProdElem -> TermAnnot 107 | annot (GTerm a _) = a 108 | annot (GNonTerm a _) = a 109 | 110 | -- | Is this 'TermAnnot' a maybe? 111 | isMaybeAnnot :: TermAnnot -> Bool 112 | isMaybeAnnot (Regular '?') = True 113 | isMaybeAnnot _ = False 114 | 115 | -- | Does this 'TermAnnot' have no annotation? 116 | isNoAnnot :: TermAnnot -> Bool 117 | isNoAnnot NoAnnot = True 118 | isNoAnnot _ = False 119 | 120 | -- | A single production element with any accompanying regex annotation 121 | data ProdElem = 122 | GTerm TermAnnot String -- ^ G4 terminal 123 | | GNonTerm TermAnnot String -- ^ G4 nonterminal 124 | deriving (Show, Eq, Ord, Lift, Generic, Hashable) 125 | 126 | instance Prettify ProdElem where 127 | prettify (GTerm annot s) = do 128 | pStr' s 129 | prettify annot 130 | prettify (GNonTerm annot s) = do 131 | pStr' s 132 | prettify annot 133 | 134 | prodElemSymbol (GTerm _ s) = s 135 | prodElemSymbol (GNonTerm _ s) = s 136 | 137 | -- | Is this a terminal G4 element? 138 | isGTerm (GTerm _ _) = True 139 | isGTerm _ = False 140 | 141 | -- | Is this a nonterminal G4 element? 142 | isGNonTerm (GNonTerm _ _) = True 143 | isGNonTerm _ = False 144 | 145 | -- | Allowable annotations on a lexical production rule 146 | data GAnnot = Fragment -- ^ For now the only annotation is @fragment@. 147 | deriving (Show, Eq, Lift, Generic, Hashable) 148 | 149 | -- | Right-hand side of a lexical G4 rule 150 | data LRHS = LRHS 151 | { regex :: Regex Char -- ^ A regular expression over characters as tokens. 152 | , directive :: Maybe Directive -- ^ Optional directive: @Nothing@ is equivalent to @(Just "String")@. 153 | } 154 | deriving (Show, Eq, Lift, Generic, Hashable) 155 | 156 | -- | G4 representation of a regex (G4 regex syntax, not regexs used by tokenizer) 157 | data Regex s = 158 | Epsilon -- ^ Consume no input 159 | | Literal [s] -- ^ Match on a literal string (sequence of characters) 160 | | Union [Regex s] -- ^ Match on any 161 | | Concat [Regex s] -- ^ Match in sequence 162 | | Kleene (Regex s) -- ^ Match zero or more times 163 | | PosClos (Regex s) -- ^ Match one or more times 164 | | Question (Regex s) -- ^ Match zero or one time. 165 | | CharSet [s] -- ^ Match once on any of the characters 166 | | Negation (Regex s) -- ^ Match anything that doesn't match this 167 | | Named String -- ^ A reference to some other regex (need to track an environment) 168 | deriving (Lift, Eq, Show, Generic, Hashable) 169 | -- TODO: Lex regexs (e.g. complement sets, escape chars, ...) 170 | -- TODO: Set s, and ranges of characters 171 | 172 | instance (Show s, Prettify s) => Prettify (Regex s) where 173 | prettify Epsilon = pStr "ε" 174 | prettify (Literal ss) = do 175 | pStr "\"" 176 | mapM prettify ss 177 | pStr "\"" 178 | prettify rest = pStr' $ show rest 179 | 180 | -------------------------------------------------------------------------------- /src/Language/ANTLR4/FileOpener.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE QuasiQuotes, TemplateHaskell, ScopedTypeVariables, 2 | OverloadedStrings #-} 3 | {-| 4 | Module : Language.ANTLR4.FileOpener 5 | Description : Quasiquoter for reading files by name at compile time 6 | Copyright : (c) Karl Cronburg, 2018 7 | License : BSD3 8 | Maintainer : karl@cs.tufts.edu 9 | Stability : experimental 10 | Portability : POSIX 11 | 12 | Just do the following. It'll make sense: 13 | 14 | @ 15 | foo = id 16 | file_contents = [open| test/file.foo |] 17 | @ 18 | -} 19 | module Language.ANTLR4.FileOpener ( 20 | -- * File opening quasiquoter 21 | open 22 | ) where 23 | import qualified Language.Haskell.TH as TH 24 | import Language.Haskell.TH.Syntax (Exp(..), addDependentFile) 25 | import Language.Haskell.TH.Quote (QuasiQuoter(..)) 26 | 27 | import Data.Text (strip, splitOn, pack, unpack) 28 | 29 | -- | A quasiquoter for opening a file on disk, reading its contents, and running 30 | -- a function by the same name as the file extension. e.g.: 31 | -- 32 | -- @ 33 | -- foo = id 34 | -- file_contents = [open| test/file.foo |] 35 | -- @ 36 | -- 37 | -- @foo@ gets called on the contents of files with the extension @.foo@. 38 | open :: QuasiQuoter 39 | open = QuasiQuoter 40 | { quoteExp = openExp 41 | , quotePat = error "parse pattern" 42 | , quoteType = error "parse type" 43 | , quoteDec = error "parse decl?" 44 | } 45 | 46 | -- | Reads a file and runs a function with the name of the file extension, 47 | -- returning the result for use by a quasiquoter. 48 | openExp :: String -> TH.Q TH.Exp 49 | openExp s = let 50 | fn = unpack $ strip $ pack s 51 | ext = unpack $ last $ splitOn "." $ pack fn 52 | in do 53 | file_contents <- TH.runIO (readFile fn) 54 | addDependentFile fn 55 | [| $(return $ TH.VarE $ TH.mkName ext) file_contents |] 56 | 57 | -------------------------------------------------------------------------------- /src/Language/ANTLR4/G4.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, DeriveDataTypeable 4 | , TemplateHaskell #-} 5 | {-| 6 | Module : Language.ANTLR4.G4 7 | Description : Core G4 quasiquoter for antlr-haskell 8 | Copyright : (c) Karl Cronburg, 2018 9 | License : BSD3 10 | Maintainer : karl@cs.tufts.edu 11 | Stability : experimental 12 | Portability : POSIX 13 | 14 | Until better haddock integration is developed, you'll need to look 15 | at the source for this module to see the G4 grammar for G4. 16 | -} 17 | module Language.ANTLR4.G4 where 18 | 19 | import Control.Arrow ( (&&&) ) 20 | import Data.Char (isUpper) 21 | 22 | import Text.ANTLR.Common 23 | import Text.ANTLR.Grammar 24 | import Text.ANTLR.Parser 25 | import qualified Text.ANTLR.LR as LR 26 | import Text.ANTLR.Lex.Tokenizer as T 27 | import qualified Text.ANTLR.Set as S 28 | import Text.ANTLR.Set (Hashable(..), Generic(..)) 29 | import Text.ANTLR.Pretty 30 | import Text.ANTLR.Lex.Regex (regex2dfa) 31 | import Data.Data (Data(..)) 32 | import Language.Haskell.TH.Lift (Lift(..)) 33 | 34 | import Language.Haskell.TH.Quote (QuasiQuoter(..)) 35 | import qualified Language.Haskell.TH as TH 36 | import Language.ANTLR4.Boot.Quote (antlr4) 37 | import Language.ANTLR4.Syntax 38 | import qualified Language.ANTLR4.Boot.Syntax as G4S 39 | import qualified Language.ANTLR4.Boot.Quote as G4Q 40 | 41 | import Debug.Trace as D 42 | 43 | char :: String -> Char 44 | char = head 45 | 46 | append :: String -> String -> String 47 | append = (++) 48 | 49 | list a = [a] 50 | cons = (:) 51 | lexemeDirective r d = G4S.LRHS r (Just d) 52 | lexemeNoDir r = G4S.LRHS r Nothing 53 | lexDecl = G4S.Lex Nothing 54 | lexFragment = G4S.Lex (Just G4S.Fragment) 55 | 56 | literalRegex :: String -> G4S.Regex Char 57 | literalRegex = G4S.Literal 58 | 59 | prodDirective as d = G4S.PRHS as Nothing Nothing (Just d) 60 | prodNoDir as = G4S.PRHS as Nothing Nothing Nothing 61 | prodNoAlphas d = G4S.PRHS [] Nothing Nothing (Just d) 62 | prodNothing = G4S.PRHS [] Nothing Nothing Nothing 63 | 64 | list2 a b = [a,b] 65 | range a b = [a .. b] 66 | 67 | gterm = G4S.GTerm G4S.NoAnnot 68 | gnonTerm = G4S.GNonTerm G4S.NoAnnot 69 | 70 | maybeGTerm = G4S.GTerm (G4S.Regular '?') 71 | maybeGNonTerm = G4S.GNonTerm (G4S.Regular '?') 72 | 73 | starGTerm = G4S.GTerm (G4S.Regular '*') 74 | starGNonTerm = G4S.GNonTerm (G4S.Regular '*') 75 | 76 | plusGTerm = G4S.GTerm (G4S.Regular '+') 77 | plusGNonTerm = G4S.GNonTerm (G4S.Regular '+') 78 | 79 | regexAnyChar = G4S.Negation (G4S.CharSet []) 80 | 81 | dQual [] = G4S.UpperD [] 82 | dQual xs = case last xs of 83 | [] -> G4S.UpperD $ concatWith "." xs 84 | (a:as) 85 | | isUpper a -> G4S.UpperD $ concatWith "." xs 86 | | otherwise -> G4S.LowerD $ concatWith "." xs 87 | 88 | qDir l u = [l,u] 89 | 90 | haskellD = G4S.HaskellD 91 | 92 | -- Force the above declarations (and their types) into scope: 93 | $( return [] ) 94 | 95 | [antlr4| 96 | grammar G4; 97 | 98 | decls : decl1 ';' -> list 99 | | decl1 ';' decls -> cons 100 | ; 101 | 102 | decl1 : 'grammar' UpperID -> G4S.Grammar 103 | | LowerID ':' prods -> G4S.Prod 104 | | UpperID ':' lexemeRHS -> lexDecl 105 | | 'fragment' UpperID ':' lexemeRHS -> lexFragment 106 | ; 107 | 108 | prods : prodRHS -> list 109 | | prodRHS '|' prods -> cons 110 | ; 111 | 112 | lexemeRHS : regexes1 '->' directive -> lexemeDirective 113 | | regexes1 -> lexemeNoDir 114 | ; 115 | 116 | prodRHS : alphas '->' directive -> prodDirective 117 | | alphas -> prodNoDir 118 | | '->' directive -> prodNoAlphas 119 | | -> prodNothing 120 | ; 121 | 122 | directive : qDirective -> dQual 123 | | UpperID -> G4S.UpperD 124 | | LowerID -> G4S.LowerD 125 | | '${' HaskellExp '}' -> haskellD 126 | ; 127 | 128 | qDirective : UpperID '.' qDot -> qDir 129 | ; 130 | 131 | qDot : UpperID 132 | | LowerID 133 | ; 134 | 135 | alphas : alpha -> list 136 | | alpha alphas -> cons 137 | | '(' alphas ')' 138 | | '(' alphas ')' '?' 139 | | '(' alphas ')' '*' 140 | | '(' alphas ')' '+' 141 | ; 142 | 143 | alpha : Literal '?' -> maybeGTerm 144 | | LowerID '?' -> maybeGNonTerm 145 | | UpperID '?' -> maybeGNonTerm 146 | | Literal '*' -> starGTerm 147 | | LowerID '*' -> starGNonTerm 148 | | UpperID '*' -> starGNonTerm 149 | | Literal '+' -> plusGTerm 150 | | LowerID '+' -> plusGNonTerm 151 | | UpperID '+' -> plusGNonTerm 152 | | Literal -> gterm 153 | | LowerID -> gnonTerm 154 | | UpperID -> gnonTerm 155 | ; 156 | 157 | // Regex Stuff: 158 | 159 | regexes1 : regexes -> G4S.Concat 160 | ; 161 | 162 | regexes : regex -> list 163 | | regex regexes -> cons 164 | ; 165 | 166 | regex : regex1 '?' -> G4S.Question 167 | | regex1 '*' -> G4S.Kleene 168 | | regex1 '+' -> G4S.PosClos 169 | | '~' regex1 -> G4S.Negation 170 | | regex1 -> id 171 | ; 172 | 173 | regex1 : '[' charSet ']' -> G4S.CharSet 174 | | Literal -> literalRegex 175 | | UpperID -> G4S.Named 176 | | '(' regexes1 ')' 177 | | unionR -> G4S.Union 178 | | '.' -> regexAnyChar 179 | ; 180 | 181 | unionR : regex '|' regex -> list2 182 | | regex '|' unionR -> cons 183 | ; 184 | 185 | charSet : charSet1 -> id 186 | | charSet1 charSet -> append 187 | ; 188 | 189 | charSet1 : SetChar '-' SetChar -> range 190 | | SetChar -> list 191 | | EscapedChar -> list 192 | ; 193 | 194 | UpperID : [A-Z][a-zA-Z0-9_]* -> String; 195 | LowerID : [a-z][a-zA-Z0-9_]* -> String; 196 | Literal : '\'' ( ( '\\\'' ) | (~ ( '\'' ) ) )+ '\'' -> stripQuotesReadEscape; 197 | LineComment : '//' (~ '\n')* '\n' -> String; 198 | 199 | HaskellExp : ( ~ '}' )+ -> String; 200 | 201 | SetChar : ~ ']' -> char ; 202 | WS : [ \t\n\r\f\v]+ -> String; 203 | EscapedChar : '\\' [tnrfv] -> readEscape ; 204 | 205 | |] 206 | 207 | -------------------------------------------------------------------------------- /src/Language/ANTLR4/Parser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, DeriveDataTypeable 4 | , TemplateHaskell #-} 5 | {-| 6 | Module : Language.ANTLR4.Parser 7 | Description : Core G4 quasiquoter (with parsers) for antlr-haskell 8 | Copyright : (c) Karl Cronburg, 2019 9 | License : BSD3 10 | Maintainer : karl@cs.tufts.edu 11 | Stability : experimental 12 | Portability : POSIX 13 | 14 | -} 15 | module Language.ANTLR4.Parser (g4) where 16 | 17 | import Control.Arrow ( (&&&) ) 18 | import Data.Char (isUpper) 19 | 20 | import Text.ANTLR.Common 21 | import Text.ANTLR.Grammar 22 | import Text.ANTLR.Parser 23 | import qualified Text.ANTLR.LR as LR 24 | import Text.ANTLR.Lex.Tokenizer as T hiding (tokenize) 25 | import qualified Text.ANTLR.Set as S 26 | import Text.ANTLR.Set (Hashable(..), Generic(..)) 27 | import Text.ANTLR.Pretty 28 | import Text.ANTLR.Lex.Regex (regex2dfa) 29 | import Data.Data (Data(..)) 30 | import Language.Haskell.TH.Lift (Lift(..)) 31 | 32 | import Language.Haskell.TH.Quote (QuasiQuoter(..)) 33 | import qualified Language.Haskell.TH as TH 34 | import Language.ANTLR4.Boot.Quote (antlr4, g4_parsers) 35 | import Language.ANTLR4.Syntax 36 | import qualified Language.ANTLR4.Boot.Syntax as G4S 37 | import qualified Language.ANTLR4.Boot.Quote as G4Q 38 | import Language.ANTLR4.G4 39 | 40 | import Debug.Trace as D 41 | 42 | -- Splice the parsers for the grammar we defined in Language.ANTLR4.G4 43 | $(g4_parsers g4AST g4Grammar) 44 | 45 | {- isWhitespace (Just T_LineComment) = True 46 | isWhitespace (Just T_WS) = True 47 | isWhitespace Nothing = True 48 | isWhitespace _ = False -} 49 | 50 | isWhitespace T_LineComment = True 51 | isWhitespace T_WS = True 52 | isWhitespace _ = False 53 | 54 | g4_codeGen :: String -> TH.Q [TH.Dec] 55 | g4_codeGen input = do 56 | loc <- TH.location 57 | let fileName = TH.loc_filename loc 58 | let (line,column) = TH.loc_start loc 59 | 60 | {- case allstarParse (filter (not . isWhitespace . stripEOF . getSymbol) (tokenize input)) of 61 | Left err -> error err 62 | Right ast -> codeGen ast -} 63 | case glrParse isWhitespace input of 64 | (LR.ResultAccept ast) -> codeGen ast 65 | LR.ResultSet s -> 66 | if S.size s == 1 67 | then codeGen $ fromAccept (S.findMin s) 68 | else D.trace (pshow' s) $ codeGen $ fromAccept (S.findMin s) 69 | err -> error $ pshow' err 70 | 71 | fromAccept (LR.ResultAccept ast) = ast 72 | fromAccept err = error $ pshow' err 73 | 74 | codeGen ast = G4Q.g4_decls $ ast2decls ast 75 | 76 | -- | Entrypoint to the G4 quasiquoter. Currently only supports declaration-level 77 | -- Haskell generation of G4 grammars using a GLR parser. The output grammars 78 | -- need not use a GLR parser themselves. 79 | g4 :: QuasiQuoter 80 | g4 = QuasiQuoter 81 | (error "parse exp") 82 | (error "parse pattern") 83 | (error "parse type") 84 | g4_codeGen 85 | 86 | -------------------------------------------------------------------------------- /src/Language/ANTLR4/Regex.hs.boot: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveLift, DeriveGeneric, DeriveAnyClass #-} 2 | {-| 3 | Module : Language.ANTLR4.Regex 4 | Description : Parsec parser for G4 regular expressions 5 | Copyright : (c) Karl Cronburg, 2018 6 | License : BSD3 7 | Maintainer : karl@cs.tufts.edu 8 | Stability : experimental 9 | Portability : POSIX 10 | -} 11 | module Language.ANTLR4.Regex (parseRegex, regexP) where 12 | import Language.Haskell.TH.Lift (Lift(..)) 13 | import Text.ParserCombinators.Parsec 14 | import qualified Text.Parsec.String as PS 15 | import qualified Text.Parsec.Prim as PP 16 | import qualified Text.Parsec.Token as PT 17 | import qualified Text.Parsec.Expr as PE 18 | import qualified Text.Parsec.Combinator as PC 19 | import Data.Char (ord) 20 | import Text.ParserCombinators.Parsec.Language 21 | import qualified Debug.Trace as D -- trace, traceM 22 | 23 | import Text.ANTLR.Set ( Hashable(..), Generic(..) ) 24 | import Language.ANTLR4.Boot.Syntax ( Regex(..) ) 25 | 26 | --traceM s = D.traceM ("[Regex] " ++ s) 27 | traceM = return 28 | 29 | (<||>) a b = try a <|> try b 30 | 31 | rEOF' = do 32 | (eof >>= return . const True) 33 | <||> 34 | (return False) 35 | 36 | -- | Entrypoint for parsing a G4 regex. This does not get called 37 | -- by the spliced parser, and is here for posterity (and debugging / 38 | -- backwards compatibility). 39 | parseRegex :: String -> Either ParseError (Regex Char) 40 | parseRegex input = PP.parse (regexP rEOF') "" input 41 | 42 | type RegexC = Regex Char 43 | 44 | -- convert list of sequential regexes into a single regex 45 | list2regex [] = Epsilon 46 | list2regex [x] = x 47 | list2regex xs = Concat xs 48 | 49 | -- | G4 regex parser, as used exclusively by the boot parser. 50 | -- 51 | -- rEOF is a parser to indicate when it's okay to stop parsing the regex -} 52 | regexP :: PS.Parser Bool -> PS.Parser RegexC 53 | regexP rEOF = let 54 | 55 | regexP' :: PS.Parser [RegexC] 56 | regexP' = do 57 | traceM $ "regexP0" 58 | r <- extendedRegex regexElement 59 | traceM $ "regexP: " ++ show r 60 | whiteSpace 61 | b <- try rEOF 62 | traceM $ "regexP2: " ++ show b 63 | y <- getInput 64 | traceM $ "regexP3: " ++ show y 65 | if b 66 | then return [r] 67 | else do 68 | rs <- regexP' 69 | return $ r:rs 70 | "regexP" 71 | 72 | in do 73 | xs <- regexP' 74 | return $ list2regex xs 75 | 76 | extendedRegex foo = do 77 | whiteSpace 78 | negation <- optionMaybe (symbol "~") 79 | whiteSpace 80 | r <- foo 81 | whiteSpace 82 | p <- optionMaybe (try $ satisfy (`elem` "+*?")) 83 | whiteSpace 84 | let fncn = (case negation of 85 | (Just _) -> Negation 86 | _ -> id) 87 | return $ fncn (case p of 88 | Nothing -> r 89 | Just '+' -> PosClos r 90 | Just '*' -> Kleene r 91 | Just '?' -> Question r 92 | Just _ -> undefined) 93 | 94 | 95 | regexElement :: PS.Parser RegexC 96 | regexElement = do 97 | whiteSpace 98 | r <- extendedRegex (charSet <||> literal <||> concatR <||> namedR <||> parens unionR) 99 | y <- getInput 100 | traceM $ show y 101 | return r 102 | 103 | {- 104 | return (case rs of 105 | [] -> r 106 | _ -> Concat (r:rs)) 107 | -} 108 | 109 | many2 p = do { x <- p; xs <- many p; return (x:xs) } 110 | 111 | -- Named regex (upper-case identifier) 112 | namedR :: PS.Parser RegexC 113 | namedR = do 114 | s <- identifier 115 | return $ Named s 116 | 117 | unionR :: PS.Parser RegexC 118 | unionR = do 119 | traceM "" 120 | u <- sepBy1 regexElement (traceM "OR" >> whiteSpace >> symbol "|" >> traceM "OR2") 121 | traceM "" 122 | return $ Union u 123 | 124 | concatR :: PS.Parser RegexC 125 | concatR = do 126 | traceM "" 127 | c <- many1 (extendedRegex (charSet <||> literal <||> parens regexElement)) >>= return . Concat 128 | traceM "" 129 | return c 130 | 131 | parseEscape :: String -> String 132 | parseEscape s = (read $ "\"" ++ s ++ "\"") :: String 133 | 134 | -- regex string literal uses single quotes 135 | literal :: PS.Parser RegexC 136 | literal = do 137 | r <- PC.between (char '\'') (char '\'') (many singleChar >>= (return . Literal . parseEscape)) 138 | y <- getInput 139 | traceM $ "literal: " ++ show y 140 | return r 141 | 142 | charSet :: PS.Parser RegexC 143 | charSet = do 144 | traceM "" 145 | whiteSpace 146 | cset <- PC.between (char '[') (char ']') (charSetBody >>= (return . CharSet . parseEscape)) 147 | whiteSpace 148 | traceM $ ": " ++ show cset 149 | return cset 150 | 151 | charSetBody :: PS.Parser [Char] 152 | charSetBody = do 153 | traceM $ "" 154 | xs <- many $ charSetRange <||> (singleChar >>= (\c -> return [c])) 155 | traceM $ "charSetBody: " ++ (show $ concat xs) 156 | return $ concat xs 157 | 158 | charSetRange :: PS.Parser [Char] 159 | charSetRange = do 160 | start <- singleChar 161 | char '-' 162 | end <- singleChar 163 | if ord end <= ord start 164 | then unexpected [end] 165 | else return [start..end] 166 | 167 | singleChar = 168 | escapedChar 169 | <||> 170 | satisfy (\c -> not (c `elem` ['\'', ']'])) 171 | 172 | escapedChar :: PS.Parser Char 173 | escapedChar = (do 174 | char '\\' 175 | char '\'' 176 | return '\'') 177 | <||> (do 178 | char '\\' 179 | char ']' 180 | return ']') 181 | <||> (do 182 | char '\\' 183 | char '[' 184 | return '[') 185 | <||> (do 186 | char '\\' 187 | char '\\' 188 | return '\\') 189 | 190 | regexLexer :: PT.TokenParser () 191 | regexLexer = PT.makeTokenParser $ haskellStyle 192 | { reservedOpNames = ["[", "]", "//", "-", "+"] } 193 | 194 | whiteSpace = PT.whiteSpace regexLexer 195 | identifier = PT.identifier regexLexer 196 | operator = PT.operator regexLexer 197 | reserved = PT.reserved regexLexer 198 | reservedOp = PT.reservedOp regexLexer 199 | charLiteral = PT.charLiteral regexLexer 200 | stringLiteral = PT.stringLiteral regexLexer 201 | integer = PT.integer regexLexer 202 | natural = PT.natural regexLexer 203 | commaSep1 = PT.commaSep1 regexLexer 204 | parens = PT.parens regexLexer 205 | braces = PT.braces regexLexer 206 | brackets = PT.brackets regexLexer 207 | symbol = PT.symbol regexLexer 208 | 209 | -------------------------------------------------------------------------------- /src/Language/ANTLR4/Syntax.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.ANTLR4.Syntax 3 | Description : Helper syntax functions used by core G4 parser 4 | Copyright : (c) Karl Cronburg, 2018 5 | License : BSD3 6 | Maintainer : karl@cs.tufts.edu 7 | Stability : experimental 8 | Portability : POSIX 9 | -} 10 | module Language.ANTLR4.Syntax where 11 | import Language.ANTLR4.Boot.Syntax 12 | import Data.Char (readLitChar) 13 | 14 | import qualified Debug.Trace as D 15 | 16 | -- | Debugging support 17 | trace s = D.trace ("Language.ANTLR4.Syntax] " ++ s) 18 | 19 | -- | Parse an escape characters allowable in G4: 20 | readEscape :: String -> Char 21 | readEscape s = let 22 | eC ('\\':'n':xs) = '\n' 23 | eC ('\\':'r':xs) = '\r' 24 | eC ('\\':'t':xs) = '\t' 25 | eC ('\\':'b':xs) = '\b' 26 | eC ('\\':'f':xs) = '\f' 27 | eC ('\\':'v':xs) = '\v' 28 | eC ('\\':'"':xs) = '\"' 29 | eC ('\\':'\'':xs) = '\'' 30 | eC ('\\':'\\':xs) = '\\' 31 | in eC s 32 | 33 | -- | Parse a literal String by stripping the quotes at the beginning and end of 34 | -- the String, and replacing all escaped characters with the actual escape 35 | -- character code. 36 | stripQuotesReadEscape :: String -> String 37 | stripQuotesReadEscape s = let 38 | 39 | eC [] = error "String ended in a single escape '\\': '" ++ s ++ "'" 40 | eC ('n':xs) = "\n" ++ sQRE xs 41 | eC ('r':xs) = "\r" ++ sQRE xs 42 | eC ('t':xs) = "\t" ++ sQRE xs 43 | eC ('b':xs) = "\b" ++ sQRE xs 44 | eC ('f':xs) = "\f" ++ sQRE xs 45 | eC ('v':xs) = "\v" ++ sQRE xs 46 | eC ('"':xs) = "\"" ++ sQRE xs 47 | eC ('\'':xs) = "\'" ++ sQRE xs 48 | eC ('\\':xs) = "\\" ++ sQRE xs 49 | eC ('u':a:b:c:d:xs) = 50 | case (readLitChar $ '\\' : 'x' : a : b : c : d : []) of 51 | ((hex, "") : []) -> hex : sQRE xs 52 | _ -> error $ "Invalid unicode character '\\u" ++ [a,b,c,d] ++ "' in string '" ++ s ++ "'" 53 | eC (x:xs) = error $ "Invalid escape character '" ++ [x] ++ "' in string '" ++ s ++ "'" 54 | 55 | sQRE [] = [] 56 | sQRE ('\\':xs) = eC xs 57 | sQRE (x:xs) = x : sQRE xs 58 | 59 | --in trace s $ (sQRE . init . tail) s 60 | in (sQRE . init . tail) s 61 | --read $ "\"" ++ (init . tail) s ++ "\"" :: String 62 | 63 | -------------------------------------------------------------------------------- /src/Text/ANTLR/Allstar.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, FlexibleContexts #-} 2 | {-| 3 | Module : Text.ANTLR.Allstar 4 | Description : Entrypoint for using the ALL(*) parsing algorithm 5 | Copyright : (c) Karl Cronburg, 2018 6 | License : BSD3 7 | Maintainer : karl@cs.tufts.edu 8 | Stability : experimental 9 | Portability : POSIX 10 | 11 | This module contains the glue code for hooking Sam's 12 | 'Text.ANTLR.Allstar.ParserGenerator' implementation into the rest of 13 | this package. 14 | -} 15 | module Text.ANTLR.Allstar 16 | ( parse, parse', atnOf 17 | , ALL.GrammarSymbol(..) 18 | , ALL.ATNEnv 19 | ) where 20 | 21 | import qualified Text.ANTLR.Allstar.ParserGenerator as ALL 22 | 23 | import qualified Text.ANTLR.Parser as P 24 | import qualified Text.ANTLR.Grammar as G 25 | import qualified Text.ANTLR.Allstar.ATN as ATN 26 | 27 | import qualified Data.Set as DS 28 | import qualified Text.ANTLR.Set as S 29 | 30 | import Text.ANTLR.Pretty (Prettify(..)) 31 | 32 | -- | Go from an Allstar AST to the AST type used internally in this package 33 | fromAllstarAST :: ALL.AST nts t -> P.AST nts t 34 | fromAllstarAST (ALL.Node nt ruleFired asts) = P.AST nt (map fromAllstarSymbol ruleFired) (map fromAllstarAST asts) 35 | fromAllstarAST (ALL.Leaf tok) = P.Leaf tok 36 | 37 | -- TODO: Handle predicate and mutator state during the conversion 38 | -- | Go from an antlr-haskell Grammar to an Allstar ATNEnv. ALL(*) does not 39 | -- currently support predicates and mutators. 40 | atnOf :: (Ord nt, Ord t, S.Hashable nt, S.Hashable t) => G.Grammar s nt t dt -> ALL.ATNEnv nt t 41 | atnOf g = DS.fromList (map convTrans (S.toList (ATN._Δ (ATN.atnOf g)))) 42 | 43 | -- | ATN Transition to AllStar version 44 | convTrans (st0, e, st1) = (convState st0, convEdge e, convState st1) 45 | 46 | -- | ATN State to AllStar version 47 | convState (ATN.Start nt) = ALL.Init nt 48 | convState (ATN.Middle nt i0 i1) = ALL.Middle nt i0 i1 49 | convState (ATN.Accept nt) = ALL.Final nt 50 | 51 | -- | ATN Edge to AllStar version 52 | convEdge (ATN.NTE nt) = ALL.GS (ALL.NT nt) 53 | convEdge (ATN.TE t) = ALL.GS (ALL.T t) 54 | convEdge (ATN.PE p) = ALL.PRED True -- TODO 55 | convEdge (ATN.ME m) = ALL.PRED True -- TODO 56 | convEdge ATN.Epsilon = ALL.GS ALL.EPS 57 | 58 | -- | Entrypoint to the ALL(*) parsing algorithm. 59 | parse' 60 | :: ( P.CanParse nts tok, Prettify chr ) 61 | => ALL.Tokenizer chr tok 62 | -> [chr] 63 | -> ALL.GrammarSymbol nts (ALL.Label tok) 64 | -> ALL.ATNEnv nts (ALL.Label tok) 65 | -> Bool 66 | -> Either String (P.AST nts tok) 67 | parse' tokenizer inp s0 atns cache = fromAllstarAST <$> ALL.parse tokenizer inp s0 atns cache 68 | 69 | -- | No tokenizer required (chr == tok): 70 | parse 71 | :: ( P.CanParse nts tok ) 72 | => [tok] 73 | -> ALL.GrammarSymbol nts (ALL.Label tok) 74 | -> ALL.ATNEnv nts (ALL.Label tok) 75 | -> Bool 76 | -> Either String (P.AST nts tok) 77 | parse = let 78 | tokenizer [] = [] 79 | tokenizer (t:ts) = [(t,ts)] 80 | in parse' tokenizer 81 | 82 | convSymbol s = ALL.NT s 83 | 84 | toAllstarSymbol :: G.ProdElem nts ts -> ALL.GrammarSymbol nts ts 85 | toAllstarSymbol (G.NT nts) = ALL.NT nts 86 | toAllstarSymbol (G.T ts) = ALL.T ts 87 | toAllstarSymbol (G.Eps) = ALL.EPS 88 | 89 | fromAllstarSymbol :: ALL.GrammarSymbol nts ts -> G.ProdElem nts ts 90 | fromAllstarSymbol (ALL.NT nts) = (G.NT nts) 91 | fromAllstarSymbol (ALL.T ts) = (G.T ts) 92 | fromAllstarSymbol ALL.EPS = G.Eps 93 | 94 | -------------------------------------------------------------------------------- /src/Text/ANTLR/Allstar/ATN.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ScopedTypeVariables, DeriveAnyClass, DeriveGeneric 2 | , FlexibleContexts, UndecidableInstances, StandaloneDeriving 3 | , OverloadedStrings #-} 4 | {-| 5 | Module : Text.ANTLR.Allstar.ATN 6 | Description : Augmented recursive transition network algorithms 7 | Copyright : (c) Karl Cronburg, 2018 8 | License : BSD3 9 | Maintainer : karl@cs.tufts.edu 10 | Stability : experimental 11 | Portability : POSIX 12 | 13 | -} 14 | module Text.ANTLR.Allstar.ATN where 15 | -- Augmented recursive Transition Network 16 | import Text.ANTLR.Grammar 17 | --import Text.ANTLR.Allstar.GSS hiding (Edge, Node) 18 | import Text.ANTLR.Allstar.Stacks 19 | import Text.ANTLR.Set (Set(..), empty, fromList, toList, Hashable, Generic) 20 | import Text.ANTLR.Pretty 21 | 22 | -- | Graph-structured stack over ATN states. 23 | type Gamma nt = Stacks (ATNState nt) 24 | 25 | -- | An ATN defining some language we wish to parse 26 | data ATN s nt t = ATN 27 | { _Δ :: Set (Transition s nt t) -- ^ The transition function 28 | } deriving (Eq, Ord, Show) 29 | 30 | instance (Prettify s, Prettify nt, Prettify t, Hashable nt, Hashable t, Eq nt, Eq t) => Prettify (ATN s nt t) where 31 | prettify atn = do 32 | pLine "_Δ:" 33 | incrIndent 4 34 | prettify $ _Δ atn 35 | incrIndent (-4) 36 | 37 | -- | Tuple corresponding to a distinct transition in the ATN: 38 | type Transition s nt t = (ATNState nt, Edge s nt t, ATNState nt) 39 | 40 | -- | The possible subscripts from Figure 8 of the ALL(*) paper 41 | data ATNState nt = Start nt 42 | | Middle nt Int Int 43 | | Accept nt 44 | deriving (Eq, Generic, Hashable, Ord, Show) 45 | 46 | -- | LaTeX style ATN states. TODO: check length of NT printed and put curly braces 47 | -- around it if more than one character. 48 | instance (Prettify nt) => Prettify (ATNState nt) where 49 | prettify (Start nt) = pStr "p_" >> prettify nt 50 | prettify (Accept nt) = pStr "p'_" >> prettify nt 51 | prettify (Middle nt i j) = do 52 | pStr "p_{" 53 | prettify i 54 | pStr "," 55 | prettify j 56 | pStr "}" 57 | 58 | -- | An edge in an ATN. 59 | data Edge s nt t = 60 | NTE nt -- ^ Nonterminal edge 61 | | TE t -- ^ Terminal edge 62 | | PE (Predicate ()) -- ^ Predicated edge with no state 63 | | ME (Mutator ()) -- ^ Mutator edge with no state 64 | | Epsilon -- ^ Nondeterministic edge parsing nothing 65 | deriving (Eq, Generic, Hashable, Ord, Show) 66 | 67 | instance (Prettify s, Prettify nt, Prettify t) => Prettify (Edge s nt t) where 68 | prettify x = do 69 | pStr "--" 70 | case x of 71 | NTE nt -> prettify nt 72 | TE t -> prettify t 73 | PE p -> prettify p 74 | ME m -> prettify m 75 | Epsilon -> pStr "ε" 76 | pStr "-->" 77 | 78 | -- | Convert a G4 grammar into an ATN for parsing with ALL(*) 79 | atnOf 80 | :: forall nt t s dt. (Eq nt, Eq t, Hashable nt, Hashable t) 81 | => Grammar s nt t dt -> ATN s nt t 82 | atnOf g = let 83 | 84 | _Δ :: Int -> Production s nt t dt -> [Transition s nt t] 85 | _Δ i (Production lhs rhs _) = let 86 | --(Prod _α)) = let 87 | 88 | -- Construct an internal production state from the given ATN identifier 89 | st :: nt -> Int -> Int -> ATNState nt 90 | st = Middle 91 | 92 | -- Create the transition for the k^th production element in the i^th 93 | -- production: 94 | _Δ' :: Int -> ProdElem nt t -> Transition s nt t 95 | _Δ' k (NT nt) = (st lhs i (k - 1), NTE nt, st lhs i k) 96 | _Δ' k (T t) = (st lhs i (k - 1), TE t, st lhs i k) 97 | 98 | -- The epsilon (or mu) transition for the accepting / final state: 99 | sϵ = (Start lhs, Epsilon, Middle lhs i 0) 100 | fϵ _α = (Middle lhs i (length _α), Epsilon, Accept lhs) 101 | 102 | sem_state _α = Middle lhs i (length _α + 1) 103 | sϵ_sem _π _α = [(Start lhs, Epsilon, sem_state _α), (sem_state _α, PE _π, Middle lhs i 0)] 104 | fϵ_sem = fϵ 105 | 106 | sϵ_mut = sϵ 107 | fϵ_mut _μ = (Middle lhs i 0, ME _μ, Accept lhs) 108 | 109 | in (case rhs of 110 | (Prod Pass _α) -> [sϵ, fϵ _α] ++ zipWith _Δ' [1..(length _α)] _α 111 | (Prod (Sem _π) _α) -> sϵ_sem _π _α ++ [fϵ_sem _α] ++ zipWith _Δ' [1..(length _α)] _α 112 | (Prod (Action _μ) _) -> [sϵ_mut, fϵ_mut _μ] 113 | ) 114 | 115 | in ATN 116 | { _Δ = fromList $ concat $ zipWith _Δ [0..length (ps g)] $ ps g 117 | } 118 | -------------------------------------------------------------------------------- /src/Text/ANTLR/Allstar/Stacks.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ScopedTypeVariables, DeriveAnyClass, DeriveGeneric, 2 | OverloadedStrings #-} 3 | {-| 4 | Module : Text.ANTLR.Allstar.Stacks 5 | Description : Graph-structured stack (GSS) for the ALL(*) algorithm 6 | Copyright : (c) Karl Cronburg, 2018 7 | License : BSD3 8 | Maintainer : karl@cs.tufts.edu 9 | Stability : experimental 10 | Portability : POSIX 11 | 12 | -} 13 | module Text.ANTLR.Allstar.Stacks 14 | ( Stacks(..) 15 | , (#) 16 | , merge 17 | , push 18 | , pop 19 | ) where 20 | import qualified Prelude as P 21 | import Prelude hiding (map, foldr, filter) 22 | import Text.ANTLR.Set 23 | ( union, Set(..), foldr, map, filter 24 | , fromList, singleton, Hashable(..), Generic(..) 25 | ) 26 | import qualified Text.ANTLR.Set as Set 27 | import Data.List (nub) 28 | import Text.ANTLR.Pretty 29 | 30 | -- | Graph-structured stack representation 31 | data Stacks a = 32 | Empty 33 | | Wildcard 34 | | Stacks (Set [a]) 35 | deriving (Eq, Ord, Generic, Hashable, Show) 36 | 37 | instance (Prettify a, Hashable a, Eq a) => Prettify (Stacks a) where 38 | prettify Empty = pStr "[]" 39 | prettify Wildcard = pStr "#" 40 | prettify (Stacks s) = prettify s 41 | 42 | -- | Represents the set of __all__ stacks 43 | (#) = Wildcard 44 | 45 | -- | Combine two GSSs 46 | merge :: (Eq a, Hashable a) => Stacks a -> Stacks a -> Stacks a 47 | merge Wildcard _ = Wildcard 48 | merge _ Wildcard = Wildcard 49 | merge Empty Empty = Empty 50 | merge (Stacks _Γ) Empty = Stacks $ _Γ `union` fromList [[]] 51 | merge Empty (Stacks _Γ) = Stacks $ _Γ `union` fromList [[]] 52 | merge (Stacks _Γ) (Stacks _Γ') = Stacks $ _Γ `union` _Γ' 53 | 54 | -- | Push a state onto all the leaves of the given GSS 55 | push :: (Eq a, Hashable a) => a -> Stacks a -> Stacks a 56 | push a Empty = Stacks $ singleton [a] 57 | push a Wildcard = Wildcard 58 | push a (Stacks _Γ) = Stacks $ map ((:) a) _Γ 59 | 60 | -- | Get heads of non-empty stacks / lists: 61 | heads :: (Eq a, Hashable a) => Set [a] -> [a] 62 | heads = let 63 | heads' :: [a] -> [a] -> [a] 64 | heads' [] bs = bs 65 | heads' (a:as) bs = a:bs 66 | in foldr heads' [] 67 | 68 | -- | Pop off all the current states from the given GSS 69 | pop :: (Eq a, Hashable a) => Stacks a -> [(a, Stacks a)] 70 | pop Empty = [] 71 | pop Wildcard = [] 72 | pop (Stacks _Γ) = let 73 | -- ss :: a -> Stacks a 74 | ss a = Stacks $ map tail $ filter (\as -> (not . null) as && ((== a) . head) as) _Γ 75 | in P.map (\a -> (a, ss a)) (nub . heads $ _Γ) 76 | 77 | -------------------------------------------------------------------------------- /src/Text/ANTLR/Common.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Text.ANTLR.Common 3 | Description : Haskell-level helper functions used throughout Text.ANTLR 4 | Copyright : (c) Karl Cronburg, 2018 5 | License : BSD3 6 | Maintainer : karl@cs.tufts.edu 7 | Stability : experimental 8 | Portability : POSIX 9 | 10 | -} 11 | module Text.ANTLR.Common where 12 | 13 | concatWith cs [] = [] 14 | concatWith cs [x] = x 15 | concatWith cs (x:xs) = x ++ cs ++ concatWith cs xs 16 | 17 | -------------------------------------------------------------------------------- /src/Text/ANTLR/Language.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Text.ANTLR.Language 3 | Description : Viewing a language as a set of words accepted 4 | Copyright : (c) Karl Cronburg, 2018 5 | License : BSD3 6 | Maintainer : karl@cs.tufts.edu 7 | Stability : experimental 8 | Portability : POSIX 9 | 10 | -} 11 | module Text.ANTLR.Language 12 | ( Alphabet(..), ascii, isASCII 13 | ) where 14 | import Prelude hiding (Word) 15 | import Data.Set.Monad (Set(..)) 16 | import qualified Data.Set.Monad as Set 17 | 18 | import Data.Char 19 | 20 | type Alphabet a = Set a 21 | 22 | ascii :: Alphabet Char 23 | ascii = Set.fromList $ map chr [0 .. 127] 24 | 25 | isASCII :: Char -> Bool 26 | isASCII c = ord c < 127 27 | 28 | type Word a = [a] 29 | 30 | type Language a = Set (Word a) 31 | 32 | union :: (Ord a) => Set a -> Set a -> Set a 33 | union = Set.union 34 | 35 | concat :: (Ord a) => Language a -> Language a -> Language a 36 | concat a b = Set.fromList 37 | [ s ++ t 38 | | s <- Set.toList a 39 | , t <- Set.toList b 40 | ] 41 | 42 | kleene = undefined 43 | 44 | -------------------------------------------------------------------------------- /src/Text/ANTLR/Lex.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleInstances, InstanceSigs, DeriveDataTypeable 2 | , ScopedTypeVariables #-} 3 | {-| 4 | Module : Text.ANTLR.Lex 5 | Description : Entrypoint for lexical and tokenization algorithms 6 | Copyright : (c) Karl Cronburg, 2018 7 | License : BSD3 8 | Maintainer : karl@cs.tufts.edu 9 | Stability : experimental 10 | Portability : POSIX 11 | 12 | -} 13 | module Text.ANTLR.Lex 14 | ( tokenize 15 | , Token(..) 16 | , tokenName, tokenValue 17 | ) where 18 | 19 | import Text.ANTLR.Lex.Tokenizer 20 | 21 | -------------------------------------------------------------------------------- /src/Text/ANTLR/Lex/Automata.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ScopedTypeVariables, MonadComprehensions #-} 2 | {-| 3 | Module : Text.ANTLR.Automata 4 | Description : Automatons and algorithms as used during tokenization 5 | Copyright : (c) Karl Cronburg, 2018 6 | License : BSD3 7 | Maintainer : karl@cs.tufts.edu 8 | Stability : experimental 9 | Portability : POSIX 10 | 11 | -} 12 | module Text.ANTLR.Lex.Automata where 13 | import Text.ANTLR.Set (Set(..), member, toList, union, notMember, Hashable(..), fromList) 14 | import qualified Text.ANTLR.Set as Set 15 | 16 | -- | An automaton with edges @e@, symbols @s@, and state indices @i@ 17 | data Automata e s i = Automata 18 | { _S :: Set i -- ^ Finite set of states. 19 | , _Σ :: Set s -- ^ Input (edge) alphabet 20 | , _Δ :: Set (Transition e i) -- ^ Transition function 21 | , s0 :: i -- ^ Start state 22 | , _F :: Set i -- ^ Accepting states 23 | } deriving (Eq) 24 | 25 | instance (Eq e, Eq s, Eq i, Hashable e, Hashable s, Hashable i, Show e, Show s, Show i) => Show (Automata e s i) where 26 | show (Automata s sigma delta s0 f) = 27 | show s 28 | ++ "\n Σ: " ++ show sigma 29 | ++ "\n Δ: " ++ show delta 30 | ++ "\n s0: " ++ show s0 31 | ++ "\n F: " ++ show f 32 | ++ "\n" 33 | 34 | -- | Edge label of an automaton, on which we traverse if we match 35 | -- on one of the tokens @t@ in the set. The boolean is for negation 36 | -- of the set. 37 | type AutomataEdge t = (Bool, Set t) 38 | 39 | -- | A triplet with an edge alphabet of @e@ and node states of @i@. 40 | type Transition e i = (i, AutomataEdge e, i) 41 | 42 | -- | The from-node component of a 'Transition' 43 | tFrom :: Transition e i -> i 44 | tFrom (a,b,c) = a 45 | 46 | -- | The to-node component of a 'Transition' 47 | tTo :: Transition e i -> i 48 | tTo (a,b,c) = c 49 | 50 | -- | The set of edge characters in @e@ of a 'Transition' 51 | tEdge :: Transition e i -> Set e 52 | tEdge (a,(comp, b),c) = b 53 | 54 | -- | Determine the edge-label alphabet of a set of transitions. 55 | transitionAlphabet __Δ = 56 | [ e 57 | | (_, (c, es), _) <- toList __Δ 58 | , e <- es 59 | ] 60 | 61 | -- | Compress a set of transitions such that every pair of (start,end) states 62 | -- appears at most once in the set. 63 | compress :: 64 | (Eq i, Eq e, Hashable i, Hashable e) 65 | => Set (Transition e i) -> Set (Transition e i) 66 | compress __Δ = fromList 67 | [ ( a, (c, fromList [ e 68 | | (a', (c', es'), b') <- toList __Δ 69 | , a' == a && b' == b && c' == c 70 | , e <- toList es' 71 | ]) 72 | , b) 73 | | (a, (c, es), b) <- toList __Δ 74 | ] 75 | 76 | -- | XOR helper function over booleans. 77 | xor a b = (not a && b) || (not b && a) 78 | 79 | -- | Is the given transition triplet (with a single @e@ character as the edge 80 | -- edge label) in some set of transitions? Note that we need to handle complement 81 | -- sets here, in case the given @e@ is in the complement of one of the 82 | -- transitions in the set. 83 | transitionMember :: 84 | (Eq i, Hashable e, Eq e) 85 | => (i, e, i) -> Set (Transition e i) -> Bool 86 | transitionMember (a, e, b) _Δ = 87 | or 88 | [ xor complement (e `member` es) 89 | | (a', (complement, es), b') <- toList _Δ 90 | , a' == a 91 | , b' == b 92 | ] 93 | 94 | -- | Is the given character @s@ accepted by the given edge label? 95 | edgeMember s (complement, es) = xor complement (s `member` es) 96 | 97 | -- | An automaton must either 'Accept' or 'Reject'. 98 | data Result = Accept | Reject 99 | 100 | -- | Is the start state valid? 101 | validStartState nfa = s0 nfa `member` _S nfa 102 | 103 | -- | Are all of the ending states valid? 104 | validFinalStates nfa = and [s `member` _S nfa | s <- toList $ _F nfa] 105 | 106 | -- | Can all of the nodes as defined by the set of transitions be found 107 | -- in the set of allowable states '_S'? 108 | validTransitions :: 109 | forall e s i. (Hashable e, Hashable i, Eq e, Eq i) 110 | => Automata e s i -> Bool 111 | validTransitions nfa = let 112 | vT :: [Transition e i] -> Bool 113 | vT [] = True 114 | vT ((s1, es, s2):rest) = 115 | s1 `member` _S nfa 116 | && s2 `member` _S nfa 117 | && vT rest 118 | in vT $ (toList . _Δ) nfa 119 | 120 | -- | An automaton configuration is the set of state (indices) your 121 | -- can currently be in. 122 | type Config i = Set i 123 | 124 | -- | Generic closure function so that *someone* never asks "what's a closure?" ever 125 | -- again. For an epsilon-closure the given @fncn@ needs to return 'True' when 126 | -- given an @e@ that is an epsilon, and 'False' in all other cases. 127 | closureWith 128 | :: forall e s i. (Hashable e, Hashable i, Eq e, Eq i) 129 | => (e -> Bool) -> Automata e s i -> Config i -> Config i 130 | closureWith fncn Automata{_S = _S, _Δ = _Δ'} states = let 131 | 132 | -- Check which edges are "epsilons" (or something else). 133 | _Δ = Set.map (\(a,(comp, b),c) -> (a, (comp, Set.map fncn b), c)) _Δ' 134 | 135 | cl :: Config i -> Config i -> Config i 136 | cl busy ss 137 | | Set.null ss = Set.empty 138 | | otherwise = let 139 | ret = fromList 140 | [ s' | s <- toList ss 141 | , s' <- toList _S 142 | , s' `notMember` busy 143 | , (s, True, s') `transitionMember` _Δ ] 144 | in ret `union` cl (ret `union` busy) ret 145 | in states `union` cl Set.empty states 146 | --in Set.foldr (\a b -> union (cl a) b) Set.empty states 147 | 148 | -- | Consume the @e@ character given, based on the fact that we are currently 149 | -- in some 'Config i' of states, resulting in a new config consisting of the 150 | -- states that we can get to by doing so. 151 | move 152 | :: forall e s i. (Hashable e, Hashable i, Eq i, Eq e) 153 | => Automata e s i -> Config i -> e -> Config i 154 | move Automata{_S = _S, _Δ = _Δ} _T a = fromList 155 | [ s' | s <- toList _T 156 | , s' <- toList _S 157 | , (s, a, s') `transitionMember` _Δ ] 158 | 159 | -- | Whether or not (a, (True, _), b) is a transition in our set of transitions. 160 | complementMember 161 | :: (Hashable i, Eq i, Hashable e, Eq e) 162 | => (i, i) -> Set (Transition e i) -> Bool 163 | complementMember (a, b) = 164 | not . null . Set.filter (\(a', (c, _), b') -> a' == a && b' == b && c) 165 | 166 | -- | Set of states you can move to if you see a character not in the alphabet. 167 | moveComplement 168 | :: forall e s i. (Hashable e, Hashable i, Eq i, Eq e) 169 | => Automata e s i -> Config i -> Config i 170 | moveComplement Automata{_S = _S, _Δ = _Δ} _T = fromList 171 | [ s' | s <- toList _T 172 | , s' <- toList _S 173 | , (s, s') `complementMember` _Δ ] 174 | 175 | -------------------------------------------------------------------------------- /src/Text/ANTLR/Lex/DFA.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Text.ANTLR.Lex.DFA 3 | Description : Deterministic finite automaton types 4 | Copyright : (c) Karl Cronburg, 2018 5 | License : BSD3 6 | Maintainer : karl@cs.tufts.edu 7 | Stability : experimental 8 | Portability : POSIX 9 | 10 | -} 11 | module Text.ANTLR.Lex.DFA where 12 | import Text.ANTLR.Lex.Automata 13 | 14 | -- | DFA edges are just the symbols of our alphabet. 15 | type Edge s = s 16 | 17 | -- | DFA states are just some Eq-able value, likely integers @i@ 18 | type State i = i 19 | 20 | -- | A DFA is an automata with edges labeled by symbols @s@ and nodes representing 21 | -- states labeled by some type @i@. 22 | type DFA s i = Automata (Edge s) s (State i) 23 | 24 | -------------------------------------------------------------------------------- /src/Text/ANTLR/Lex/NFA.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ScopedTypeVariables, MonadComprehensions, DeriveAnyClass, 2 | DeriveGeneric #-} 3 | {-| 4 | Module : Text.ANTLR.Lex.NFA 5 | Description : Nondeterministic finite automatons and algorithms to compute DFAs 6 | Copyright : (c) Karl Cronburg, 2018 7 | License : BSD3 8 | Maintainer : karl@cs.tufts.edu 9 | Stability : experimental 10 | Portability : POSIX 11 | 12 | -} 13 | module Text.ANTLR.Lex.NFA where 14 | import Text.ANTLR.Lex.Automata 15 | import Text.ANTLR.Lex.DFA (DFA(..)) 16 | import qualified Text.ANTLR.Lex.DFA as DFA 17 | 18 | import Text.ANTLR.Set (singleton, notMember, union, Set(..), member, Hashable) 19 | import qualified Text.ANTLR.Set as Set 20 | import Text.ANTLR.Set (fromList, toList) 21 | 22 | import Data.List (maximumBy) 23 | import GHC.Generics (Generic) 24 | 25 | -- | NFA edges can be labeled with either a symbol in symbol alphabet @s@, 26 | -- or an epsilon. 27 | data Edge s = Edge s | NFAEpsilon 28 | deriving (Ord, Eq, Hashable, Generic) 29 | 30 | instance (Show s) => Show (Edge s) where 31 | show NFAEpsilon = "ϵ" 32 | show (Edge s) = "E(" ++ show s ++ ")" 33 | 34 | -- | Is this an edge (not an epsilon)? 35 | isEdge :: Edge s -> Bool 36 | isEdge (Edge _) = True 37 | isEdge _ = False 38 | 39 | -- | An NFA is an automata with edges @'Edge' s@ and nodes @i@. 40 | type NFA s i = Automata (Edge s) s i 41 | 42 | -- | NFA states 43 | type State i = i 44 | 45 | -- | DFA states as constructed from an NFA is a set (config) of NFA states. 46 | type DFAState i = Config (State i) 47 | 48 | -- | Epsilon closure of an NFA is a closure where we can traverse epsilons. 49 | epsClosure :: 50 | (Ord i, Hashable i, Hashable s, Eq s) 51 | => Automata (Edge s) s i -> Config i -> Config i 52 | epsClosure = closureWith (NFAEpsilon ==) 53 | 54 | -- | Subset construction algorithm for constructing a DFA from an NFA. 55 | nfa2dfa_slow :: forall s i. (Hashable s, Eq s, Hashable i, Eq i, Ord i) 56 | => NFA s i -> DFA s (Set (State i)) 57 | nfa2dfa_slow nfa@Automata{s0 = s0, _Σ = _Σ, _F = _F0} = let 58 | 59 | epsCl = epsClosure nfa 60 | mv = move nfa 61 | 62 | dS :: Config (DFAState i) -> Config (DFAState i) -> Set (Transition (DFA.Edge s) (DFAState i)) 63 | dS marked ts 64 | | Set.null ts = Set.empty 65 | | otherwise = let 66 | 67 | _Δ = fromList 68 | [ (_T, (False, singleton a), epsCl (mv _T (Edge a))) 69 | | _T <- toList ts 70 | , _T `notMember` marked 71 | , a <- toList _Σ 72 | ] 73 | 74 | _Us = Set.map (\(a,b,c) -> c) _Δ 75 | fromStates = Set.map (\(a,b,c) -> a) _Δ 76 | 77 | in _Δ `union` dS (fromStates `union` marked) _Us 78 | 79 | _Δ' :: Set (Transition (DFA.Edge s) (DFAState i)) 80 | _Δ' = dS Set.empty (singleton s0') 81 | 82 | s0' = epsCl $ singleton s0 83 | 84 | in Automata 85 | { _S = fromList [ tFrom x | x <- toList _Δ' ] `union` fromList [ tTo x | x <- toList _Δ' ] 86 | , _Σ = _Σ 87 | , _Δ = _Δ' 88 | , s0 = s0' 89 | , _F = fromList [nfaState | (_,_,nfaState) <- toList _Δ', c <- toList nfaState, c `member` _F0] 90 | } 91 | 92 | -- | Subset construction but where we compress our sets of transitions along the way. 93 | nfa2dfa :: forall s i. (Hashable s, Eq s, Hashable i, Eq i, Ord i) 94 | => NFA s i -> DFA s (Set (State i)) 95 | nfa2dfa nfa@Automata{s0 = s0, _Σ = _Σ, _S = _S, _F = _F0} = let 96 | 97 | epsCl = epsClosure nfa 98 | mv = move nfa 99 | 100 | dS :: Config (DFAState i) -> Config (DFAState i) -> Set (Transition (DFA.Edge s) (DFAState i)) 101 | dS marked ts 102 | | Set.null ts = Set.empty 103 | | otherwise = let 104 | 105 | _Δ = 106 | Set.fromList 107 | [ (_T, (False, singleton a), epsCl (mv _T (Edge a))) 108 | | _T <- Set.toList ts 109 | , _T `notMember` marked 110 | , a <- Set.toList _Σ 111 | ] 112 | `union` 113 | Set.fromList 114 | [ (_T, (True, _Σ), epsCl $ moveComplement nfa _T) 115 | | _T <- Set.toList ts 116 | , _T `notMember` marked 117 | ] 118 | 119 | _Us = fromList [ c | (a,b,c) <- toList _Δ ] 120 | fromStates = fromList [ a | (a,b,c) <- toList _Δ ] 121 | 122 | in _Δ `union` dS (fromStates `union` marked) _Us 123 | 124 | _Δ' :: Set (Transition (DFA.Edge s) (DFAState i)) 125 | _Δ' = let run_dS = dS Set.empty (singleton s0') 126 | in Set.filter (\(_, _, b) -> not $ Set.null b) $ compress run_dS 127 | 128 | s0' = epsCl $ singleton s0 129 | 130 | in Automata 131 | { _S = fromList [ tFrom x | x <- toList _Δ' ] `union` fromList [ tTo x | x <- toList _Δ' ] 132 | , _Σ = _Σ 133 | , _Δ = _Δ' 134 | , s0 = s0' 135 | , _F = fromList [nfaState | (_,_,nfaState) <- toList _Δ', c <- toList nfaState, c `member` _F0] 136 | } 137 | 138 | -- | Compute all the states statically used in a particular set of transitions. 139 | allStates :: forall s i. (Hashable i, Eq i) => Set (Transition (Edge s) i) -> Set (State i) 140 | allStates ts = fromList [ n | (n, _, _) <- toList ts ] `union` fromList [ n | (_, _, n) <- toList ts ] 141 | 142 | -- | Converts the given list of transitions into a complete NFA / Automata 143 | -- structure, assuming two things: 144 | -- 145 | -- > The first node of the first edge is the start state 146 | -- > The last node of the last edge is the (only) final state 147 | -- 148 | list2nfa :: forall s i. (Hashable i, Eq i, Hashable s, Eq s) => [Transition (Edge s) i] -> NFA s i 149 | list2nfa [] = undefined 150 | list2nfa ((t@(n1,_,_)):ts) = Automata 151 | { _S = allStates $ Set.fromList (t:ts) 152 | , _Σ = Set.fromList [ e 153 | | (_, es, _) <- t:ts 154 | , Edge e <- filter isEdge (Set.toList $ snd es) 155 | ] 156 | , s0 = n1 157 | , _F = Set.fromList [ (\(_,_,c) -> c) $ last (t:ts) ] 158 | , _Δ = Set.fromList $ t:ts 159 | } 160 | 161 | -- | Rename the states in the second NFA such that they start at the index 162 | -- one greater than the maximum index of the first NFA. 163 | shiftAllStates :: 164 | forall s i. (Hashable i, Eq i, Ord i, Hashable s, Eq s) 165 | => (i -> Int) -> (Int -> i) -> NFA s i -> NFA s i -> NFA s i 166 | shiftAllStates from to 167 | n1 (n2@Automata{_Δ = _Δ2, _S = _S2, _F = _F2, s0 = s2_0}) 168 | = n2 { _Δ = fromList [ (to $ from i0 + shift, e, to $ from i1 + shift) | (i0, e, i1) <- toList _Δ2 ] 169 | , _S = fromList [ to $ from i + shift | i <- toList _S2 ] 170 | , _F = fromList [ to $ from i + shift | i <- toList _F2 ] 171 | , s0 = to $ from s2_0 + shift 172 | } 173 | where 174 | shift = 1 + foldr (\(i0, _, i1) i -> from $ maximum [to i, i0, i1]) 0 (_Δ n1) 175 | 176 | -- | Take the union of two NFAs, renaming states according to 'shiftAllStates'. 177 | nfaUnion :: 178 | forall s i. (Ord i, Hashable i, Eq i, Hashable s, Eq s) 179 | => (i -> Int) -> (Int -> i) -> NFA s i -> NFA s i -> NFA s i 180 | nfaUnion from to 181 | (n1@Automata{_Δ = _Δ1, _S = _S1, _F = _F1, s0 = s1_0}) n2 182 | = let 183 | 184 | Automata{_Δ = _Δ2, _S = _S2, _F = _F2, s0 = s2_0} = shiftAllStates from to n1 n2 185 | mx2 = 1 + foldr (\(i0, _, i1) i -> from $ maximum [to i, i0, i1]) 0 _Δ2 186 | 187 | _Δ' = _Δ1 188 | `union` _Δ2 189 | `union` Set.singleton (s0', (False, singleton NFAEpsilon), s1_0) 190 | `union` Set.singleton (s0', (False, singleton NFAEpsilon), s2_0) 191 | `union` fromList [ (f1_0, (False, singleton NFAEpsilon), f0') | f1_0 <- toList _F1 ] 192 | `union` fromList [ (f2_0, (False, singleton NFAEpsilon), f0') | f2_0 <- toList _F2 ] 193 | 194 | s0' = to mx2 195 | f0' = to $ mx2 + 1 196 | 197 | in Automata 198 | { _S = allStates _Δ' 199 | , _Σ = fromList [ e 200 | | (_, es, _) <- toList _Δ' 201 | , Edge e <- toList $ Set.filter isEdge $ snd es 202 | ] 203 | , s0 = s0' 204 | , _F = Set.fromList [f0'] 205 | , _Δ = _Δ' 206 | } 207 | 208 | -- | Concatenate two NFAs, renaming states in the second NFA according to 'shiftAllStates'. 209 | nfaConcat :: 210 | forall s i. (Hashable i, Eq i, Ord i, Hashable s, Eq s) => (i -> Int) -> (Int -> i) -> NFA s i -> NFA s i -> NFA s i 211 | nfaConcat from to 212 | (n1@Automata{_Δ = _Δ1, _S = _S1, _F = _F1, s0 = s1_0}) n2 213 | = let 214 | Automata{_Δ = _Δ2, _S = _S2, _F = _F2, s0 = s2_0} = shiftAllStates from to n1 n2 215 | 216 | _Δ' = _Δ1 217 | `union` _Δ2 218 | `union` fromList [ (f1_0, (False, singleton NFAEpsilon), s2_0) | f1_0 <- toList _F1 ] 219 | 220 | in Automata 221 | { _S = allStates _Δ' 222 | , _Σ = fromList [ e 223 | | (_, es, _) <- toList _Δ' 224 | , Edge e <- toList $ Set.filter isEdge $ snd es 225 | ] 226 | , s0 = s1_0 227 | , _F = _F2 228 | , _Δ = _Δ' 229 | } 230 | 231 | -- | Take the Kleene-star of an NFA, adding epsilons as needed. 232 | nfaKleene :: forall s i. (Ord i, Hashable i, Eq i, Hashable s, Eq s) => (i -> Int) -> (Int -> i) -> NFA s i -> NFA s i 233 | nfaKleene from to 234 | (n1@Automata{_Δ = _Δ1, _S = _S1, _F = _F1, s0 = s1_0}) 235 | = let 236 | mx1 = 1 + foldr (\(i0, _, i1) i -> from $ maximum [to i, i0, i1]) 0 _Δ1 237 | 238 | s0' = to mx1 239 | f0' = to $ mx1 + 1 240 | 241 | _Δ' = _Δ1 242 | `union` Set.singleton (s0', (False, singleton NFAEpsilon), s1_0) 243 | `union` Set.singleton (s0', (False, singleton NFAEpsilon), f0') 244 | `union` fromList [ (f1_0, (False, singleton NFAEpsilon), s1_0) | f1_0 <- toList _F1 ] 245 | `union` fromList [ (f1_0, (False, singleton NFAEpsilon), f0') | f1_0 <- toList _F1 ] 246 | 247 | in Automata 248 | { _S = allStates _Δ' 249 | , _Σ = fromList [ e 250 | | (_, es, _) <- toList _Δ' 251 | , Edge e <- toList $ Set.filter isEdge $ snd es 252 | ] 253 | , s0 = s0' 254 | , _F = Set.fromList [f0'] 255 | , _Δ = _Δ' 256 | } 257 | 258 | -------------------------------------------------------------------------------- /src/Text/ANTLR/Lex/Regex.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ScopedTypeVariables, DeriveLift #-} 2 | {-| 3 | Module : Text.ANTLR.Lex.Regex 4 | Description : Regular expressions as used during tokenization 5 | Copyright : (c) Karl Cronburg, 2018 6 | License : BSD3 7 | Maintainer : karl@cs.tufts.edu 8 | Stability : experimental 9 | Portability : POSIX 10 | 11 | -} 12 | module Text.ANTLR.Lex.Regex where 13 | 14 | import Text.ANTLR.Set (Hashable, singleton, fromList) 15 | import Text.ANTLR.Lex.NFA 16 | import qualified Text.ANTLR.Lex.DFA as DFA 17 | import Language.Haskell.TH.Syntax (Lift(..)) 18 | 19 | -- | Regular expression data representation as used by the tokenizer. 20 | data Regex s = 21 | Epsilon -- ^ Regex accepting the empty string 22 | | Symbol s -- ^ An individual symbol in the alphabet 23 | | Literal [s] -- ^ A literal sequence of symbols (concatenated together) 24 | | Class [s] -- ^ A set of alternative symbols (unioned together) 25 | | Union (Regex s) (Regex s) -- ^ Union of two arbitrary regular expressions 26 | | Concat [Regex s] -- ^ Concatenation of 2 or more regular expressions 27 | | Kleene (Regex s) -- ^ Kleene closure of a regex 28 | | PosClos (Regex s) -- ^ Positive closure 29 | | Question (Regex s) -- ^ 0 or 1 instances 30 | | MultiUnion [Regex s] -- ^ Union of two or more arbitrary regexs 31 | | NotClass [s] -- ^ Complement of a character class 32 | deriving (Lift) 33 | 34 | instance (Show s) => Show (Regex s) where 35 | show Epsilon = "ϵ" 36 | show (Symbol s) = show s 37 | show (Literal s) = show s 38 | show (Class s) = "[" ++ show s ++ "]" 39 | show (Union r1 r2) = "(" ++ show r1 ++ "|" ++ show r2 ++ ")" 40 | show (Concat rs) = concatMap show rs 41 | show (Kleene r) = "(" ++ show r ++ ")*" 42 | show (PosClos r) = "(" ++ show r ++ ")+" 43 | show (Question r) = "(" ++ show r ++ ")?" 44 | show (MultiUnion rs) = tail $ concatMap (\r -> "|" ++ show r) rs 45 | show (NotClass rs) = "[^" ++ tail (concatMap show rs) ++ "]" 46 | 47 | -- | Translation code of a regular expresion to an NFA. 48 | regex2nfa' :: 49 | forall s i. (Hashable i, Ord i, Hashable s, Eq s) 50 | => (i -> Int) -> (Int -> i) -> Regex s -> NFA s i 51 | regex2nfa' from to r = let 52 | r2n :: Regex s -> NFA s i 53 | r2n Epsilon = list2nfa [ (to 0, (False, singleton NFAEpsilon), to 1) ] 54 | r2n (Symbol s) = list2nfa [ (to 0, (False, singleton $ Edge s), to 1) ] 55 | r2n (Union r1 r2) = nfaUnion from to (r2n r1) (r2n r2) 56 | r2n (Concat []) = r2n Epsilon -- TODO: empty concat 57 | r2n (Concat (r:rs)) = foldl (nfaConcat from to) (r2n r) (map r2n rs) 58 | r2n (Kleene r1) = nfaKleene from to (r2n r1) 59 | r2n (PosClos r1) = r2n $ Concat [r1, Kleene r1] 60 | r2n (Question r1) = nfaUnion from to (r2n r1) (r2n Epsilon) 61 | r2n (Class []) = r2n Epsilon -- TODO: empty character class shouldn't accept empty string? 62 | r2n (Class (s:ss)) = list2nfa [ (to 0, (False, fromList $ map Edge $ s:ss), to 1) ] --r2n $ foldl Union (Symbol s) (map Symbol ss) 63 | r2n (MultiUnion []) = r2n Epsilon 64 | r2n (MultiUnion (r:rs)) = r2n $ foldl Union r rs 65 | r2n (Literal ss) = list2nfa $ map (\(s,i) -> (to i, (False, singleton $ Edge s), to $ i + 1)) (zip ss [0..length ss - 1]) 66 | r2n (NotClass []) = list2nfa $ [ (to 0, (True, fromList []), to 1) ] -- Not nothing = everything 67 | r2n (NotClass (s:ss)) = list2nfa $ [ (to 0, (True, fromList $ map Edge $ s:ss), to 1) ] 68 | in r2n r 69 | 70 | -- | Entrypoint for translating a regular expression into an 'NFA' with integer indices. 71 | regex2nfa :: (Hashable s, Ord s) => Regex s -> NFA s Int 72 | regex2nfa = regex2nfa' id id 73 | 74 | -- | Entrypoint for translating a regular expression into a 'DFA.DFA' with integer indices. 75 | regex2dfa :: (Hashable s, Ord s) => Regex s -> DFA.DFA s (DFAState Int) 76 | regex2dfa = nfa2dfa . regex2nfa 77 | 78 | -------------------------------------------------------------------------------- /src/Text/ANTLR/Lex/Tokenizer.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ScopedTypeVariables, DeriveGeneric, DeriveAnyClass 2 | , OverloadedStrings #-} 3 | {-| 4 | Module : Text.ANTLR.Lex.Tokenizer 5 | Description : Tokenization algorithms 6 | Copyright : (c) Karl Cronburg, 2018 7 | License : BSD3 8 | Maintainer : karl@cs.tufts.edu 9 | Stability : experimental 10 | Portability : POSIX 11 | 12 | -} 13 | module Text.ANTLR.Lex.Tokenizer where 14 | import Text.ANTLR.Lex.Automata 15 | import Text.ANTLR.Lex.DFA 16 | 17 | import qualified Text.ANTLR.Set as Set 18 | import Text.ANTLR.Set (Hashable, member, Generic(..), Set(..)) 19 | 20 | import Text.ANTLR.Pretty 21 | import qualified Debug.Trace as D 22 | import Data.List (find) 23 | import qualified Data.Text as T 24 | 25 | -- | Token with names @n@, values @v@, and number of input symbols consumed to match 26 | -- it. 27 | data Token n v = 28 | Token n v Int -- ^ Tokenized a token 29 | | EOF -- ^ The end-of-file token 30 | | Error T.Text -- ^ Error encountered while tokenizing 31 | deriving (Show, Ord, Generic, Hashable) 32 | 33 | instance (Prettify n, Prettify v) => Prettify (Token n v) where 34 | prettify EOF = pStr "EOF" 35 | prettify (Error s) = pStr "Token Error: " >> pStr s 36 | prettify (Token n v i) = 37 | prettify v 38 | 39 | instance Eq n => Eq (Token n v) where 40 | Token s _ _ == Token s1 _ _ = s == s1 41 | EOF == EOF = True 42 | Error s == Error s1 = s == s1 43 | _ == _ = False 44 | 45 | -- | Token Names are Input Symbols to the parser. 46 | tokenName :: Token n v -> n 47 | tokenName (Token n v _) = n 48 | 49 | -- | Get the value of a token, ignoring its name. 50 | tokenValue :: Token n v -> v 51 | tokenValue (Token n v _) = v 52 | 53 | -- | Get the number of characters from the input that this token matched on. 54 | tokenSize :: Token n v -> Int 55 | tokenSize (Token _ _ i) = i 56 | tokenSize EOF = 0 57 | 58 | -- | A Lexeme is a sequence of zero or more (matched) input symbols 59 | type Lexeme s = [s] 60 | 61 | -- | A named DFA over symbols @s@, indices @i@, and names @n@. 62 | type NDFA s i n = (n, DFA s i) 63 | 64 | -- | Entrypoint for tokenizing an input stream given a list of named DFAs that 65 | -- we can match on. 66 | -- 67 | -- > @dfaTuples@: converts from DFAs to the names associated with them in 68 | -- the specification of the lexer. 69 | -- 70 | -- > @fncn@: function for constructing the value of a token from the lexeme 71 | -- matched (e.g. @varName@) and the associated token name (e.g. @id@) 72 | -- 73 | tokenize :: 74 | forall s i n v. (Eq i, Ord s, Eq s, Show s, Show i, Show n, Show v, Hashable i, Hashable s) 75 | => [(n, DFA s i)] -- ^ Association list of named DFAs. 76 | -> (Lexeme s -> n -> v) -- ^ Constructs the value of a token from lexeme matched. 77 | -> [s] -- ^ The input string. 78 | -> [Token n v] -- ^ The tokenized tokens. 79 | tokenize dfaTuples fncn input0 = let 80 | 81 | dfas0 = map snd dfaTuples 82 | 83 | allTok :: [(NDFA s i n, State i)] -> [s] -> [Token n v] 84 | allTok dfaSims0 currInput = let 85 | oneTok :: [(NDFA s i n, State i)] -> [s] -> Maybe (Lexeme s, NDFA s i n) 86 | oneTok dfaSims [] = Nothing 87 | oneTok [] ss = Nothing 88 | oneTok dfaSims (s:ss) = let 89 | dfaSims' = 90 | [ ((n, dfa), stop) 91 | | ((n, dfa), cursor) <- dfaSims 92 | , (start, es, stop) <- Set.toList $ _Δ dfa 93 | , start == cursor && s `edgeMember` es ] 94 | 95 | accepting = [ (n,dfa) | ((n, dfa), cursor) <- dfaSims', cursor `member` _F dfa ] 96 | 97 | in (case (oneTok dfaSims' ss, accepting) of 98 | (Nothing, []) -> Nothing 99 | (Nothing, d:ds) -> Just ([s], d) 100 | (Just (l,d), _) -> Just (s:l, d)) 101 | in case (currInput, oneTok dfaSims0 currInput) of 102 | ([], _) -> [EOF] 103 | (ss, Nothing) -> [Error $ T.pack $ show ss] 104 | (ss, Just (l, (name,d))) -> 105 | Token name (fncn l name) (length l) 106 | : allTok dfaSims0 (drop (length l) currInput) 107 | in allTok (zip dfaTuples (map s0 dfas0)) input0 108 | 109 | -- | Incremental tokenizer takes in the same list of DFAs and AST value 110 | -- constructor function, but instead returns an incremental tokenizer function 111 | -- that expects a set of names that we currently expect to tokenize on, 112 | -- the current input stream, and returns a single tokenized token along 113 | -- with the modified input stream to iteratively call 'tokenizeInc' on. 114 | tokenizeInc 115 | :: forall s i n v. (Eq i, Ord s, Eq n, Eq s, Show s, Show i, Show n, Show v, Hashable i, Hashable s, Hashable n) 116 | => (n -> Bool) -- ^ Function that returns True on DFA names we wish to filter __out__ of the results. 117 | -> [(n, DFA s i)] -- ^ Closure over association list of named DFAs. 118 | -> (Lexeme s -> n -> v) -- ^ Token value constructor from lexemes. 119 | -> (Set n -> [s] -> (Token n v, [s])) -- ^ The incremental tokenizer closure. 120 | tokenizeInc filterF dfaTuples fncn = let 121 | 122 | tI :: Set n -> [s] -> (Token n v, [s]) 123 | tI ns input = let 124 | 125 | dfaTuples' = filter (\(n,_) -> n `Set.member` ns || filterF n) dfaTuples 126 | tokenized = tokenize dfaTuples' fncn input 127 | 128 | filterF' (Token n _ _) = filterF n 129 | filterF' _ = False 130 | 131 | ignored = takeWhile filterF' tokenized 132 | nextTokens = dropWhile filterF' tokenized 133 | -- Yayy lazy function evaluation. 134 | next = case nextTokens of 135 | [] -> EOF 136 | (t:_) -> t --D.traceShowId t 137 | 138 | in (next, drop (sum $ map tokenSize $ next : ignored) input) 139 | in tI 140 | 141 | tokenizeIncAll 142 | :: forall s i n v. (Eq i, Ord s, Eq n, Eq s, Show s, Show i, Show n, Show v, Hashable i, Hashable s, Hashable n) 143 | => (n -> Bool) -- ^ Function that returns True on DFA names we wish to filter __out__ of the results. 144 | -> [(n, DFA s i)] 145 | -> (Lexeme s -> n -> v) 146 | -> (Set n -> [s] -> [(Token n v, [s])]) 147 | tokenizeIncAll filterF dfaTuples fncn = let 148 | 149 | tI :: Set n -> [s] -> [(Token n v, [s])] 150 | tI ns input = let 151 | 152 | dfaTuples' = filter (\(n,_) -> n `Set.member` ns || filterF n) dfaTuples 153 | tokenized = tokenize dfaTuples' fncn input 154 | 155 | filterF' (Token n _ _) = filterF n 156 | filterF' _ = False 157 | 158 | ignored = takeWhile filterF' tokenized 159 | nextTokens = dropWhile filterF' tokenized 160 | -- Yayy lazy function evaluation. 161 | next = case nextTokens of 162 | [] -> EOF 163 | (t:_) -> t --D.traceShowId t 164 | 165 | input' = drop (sum $ map tokenSize $ next : ignored) input 166 | 167 | ns' = case next of 168 | Token n _ _ -> n `Set.delete` ns 169 | _ -> Set.empty 170 | 171 | in (next, input') : tI ns' input' 172 | in tI 173 | 174 | -------------------------------------------------------------------------------- /src/Text/ANTLR/MultiMap.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric, DeriveAnyClass, MonadComprehensions, DeriveLift, 2 | DeriveDataTypeable #-} 3 | {-| 4 | Module : Text.ANTLR.MultiMap 5 | Description : A one-to-many key value map 6 | Copyright : (c) Karl Cronburg, 2018 7 | License : BSD3 8 | Maintainer : karl@cs.tufts.edu 9 | Stability : experimental 10 | Portability : POSIX 11 | 12 | -} 13 | module Text.ANTLR.MultiMap where 14 | import qualified Data.Map.Strict as M 15 | import Data.Maybe (fromMaybe) 16 | import Text.ANTLR.Set (Generic(..), Hashable(..), Set(..)) 17 | import qualified Text.ANTLR.Set as S 18 | import Prelude hiding (lookup) 19 | import Text.ANTLR.Pretty 20 | 21 | import Data.Data (Data(..)) 22 | import Language.Haskell.TH.Syntax (Lift(..)) 23 | 24 | instance (Lift k, Lift v, Data k, Data v, Ord k, Ord v) => Lift (M.Map k v) 25 | 26 | -- | A multi 'Map' is a mapping from keys @k@ to sets of values @v@. A nice 27 | -- invariant to maintain while using a multi-map is to never have empty 28 | -- sets mapped to by some key. 29 | newtype Map k v = Map (M.Map k (Set v)) 30 | deriving (Generic, Hashable, Eq, Show, Lift) 31 | 32 | instance (Prettify k, Prettify v, Hashable v, Eq v) => Prettify (Map k v) where 33 | prettify (Map m) = prettify m 34 | 35 | -- | The singleton multimap, given a single key and a __single__ value. 36 | singleton :: (Hashable v, Eq v) => k -> v -> Map k v 37 | singleton k v = Map (M.singleton k (S.singleton v)) 38 | 39 | -- | Construct a multi 'Map' from a list of key-value pairs. 40 | fromList :: (Hashable v, Ord k, Eq k, Eq v) => [(k, v)] -> Map k v 41 | fromList kvs = Map (M.fromList 42 | [ (k1, S.fromList [v2 | (k2, v2) <- kvs, k1 == k2]) 43 | | (k1, _) <- kvs]) 44 | 45 | -- | Same as 'fromList' but where the values in the key-value tuples are already in sets. 46 | fromList' :: (Ord k, Eq k, Hashable v, Eq v) => [(k, Set v)] -> Map k v 47 | fromList' kvs = fromList [(k, v) | (k, vs) <- kvs, v <- S.toList vs] 48 | 49 | -- | Inverse of 'fromList\''. 50 | toList :: Map k v -> [(k, Set v)] 51 | toList (Map m) = M.toList m 52 | 53 | -- | Take the union of two maps. 54 | union :: (Ord k, Eq k, Hashable v, Eq v) => Map k v -> Map k v -> Map k v 55 | union m1 m2 = fromList' (toList m1 ++ toList m2) 56 | 57 | -- | The empty multi-map. 58 | empty :: Map k v 59 | empty = Map M.empty 60 | 61 | -- | Get the set of values mapped to by some key @k@. 62 | lookup :: (Ord k, Hashable v, Eq v) => k -> Map k v -> Set v 63 | lookup k (Map m) = fromMaybe S.empty (M.lookup k m) 64 | 65 | -- | Number of keys in the multi-map. 66 | size (Map m) = M.size m 67 | 68 | -- | Map difference of two multi-maps, deleting individual key-value pairs 69 | -- rather than deleting the entire key. Invariant maintained is that 70 | -- input maps with non-null value sets will result in an output with 71 | -- non-null value sets. 72 | difference (Map m1) m2 = Map $ M.fromList 73 | [ (k1, vs) 74 | | (k1, vs1) <- M.toList m1 75 | , let vs2 = k1 `lookup` m2 76 | , let vs = vs1 `S.difference` vs2 77 | , (not . S.null) vs 78 | ] 79 | 80 | -------------------------------------------------------------------------------- /src/Text/ANTLR/Parser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric, DeriveAnyClass, FlexibleContexts, InstanceSigs 2 | , UndecidableInstances, StandaloneDeriving, TypeFamilies 3 | , ScopedTypeVariables, FlexibleInstances, MultiParamTypeClasses 4 | , OverloadedStrings, DeriveDataTypeable, ConstraintKinds #-} 5 | {-| 6 | Module : Text.ANTLR.Parser 7 | Description : Parsing API for constructing Haskell data types from lists of tokens 8 | Copyright : (c) Karl Cronburg, 2018 9 | License : BSD3 10 | Maintainer : karl@cs.tufts.edu 11 | Stability : experimental 12 | Portability : POSIX 13 | 14 | -} 15 | module Text.ANTLR.Parser where 16 | import Text.ANTLR.Grammar hiding (Action) 17 | import Text.ANTLR.Pretty 18 | import Text.ANTLR.Set (Generic(..)) 19 | import Text.ANTLR.Lex.Tokenizer (Token(..)) 20 | import Data.Data (Data(..)) 21 | import Language.Haskell.TH.Lift (Lift(..)) 22 | import Text.ANTLR.Set (Hashable) 23 | 24 | -- | Nonterminals in a grammar are tabular, terminal symbols are tabular (as are 25 | -- the EOF-stripped version), terminals are referenceable (can be symbolized), 26 | -- and terminals are also tabular. 27 | type CanParse nts t = 28 | ( Tabular nts 29 | , Tabular (Sym t) 30 | , Tabular (StripEOF (Sym t)) 31 | , HasEOF (Sym t) 32 | , Ref t 33 | , Tabular t) 34 | 35 | -- | Same as 'CanParse' but with second formal parameter representing (StripEOF (Sym t)) 36 | -- aka "sts" (stripped terminal symbol). 37 | type CanParse' nts sts = ( Tabular nts, Tabular sts ) 38 | 39 | type IsAST ast = ( Ord ast, Eq ast, Hashable ast ) 40 | 41 | type IsState st = ( Ord st, Hashable st, Prettify st ) 42 | type Tabular sym = ( Ord sym, Hashable sym, Prettify sym, Eq sym ) 43 | 44 | -- | Action functions triggered during parsing are given the nonterminal we just matched on, the 45 | -- corresponding list of production elements (grammar symbols) in the RHS of the matched production 46 | -- alternative, and the result of recursively. 47 | -- 48 | -- A 'ParseEvent' may also be just a terminal matched on, or an epsilon event 49 | -- based heavily on which parsing algorithm is being run. 50 | -- 51 | -- __This__ data type is one of the data types that tie together terminal (token) types 52 | -- and terminal symbol types. When the parser produces a terminal event, you're 53 | -- seeing a __token__, but when the parser produces a nonterminal event, you're 54 | -- seeing a production in the grammar firing which contains terminal __symbols__, 55 | -- not tokens. 56 | data ParseEvent ast nts t = 57 | TermE t -- ^ A terminal was seen in the input 58 | | NonTE (nts, ProdElems nts (StripEOF (Sym t)), [ast]) -- ^ A non-terminal was seen in the input 59 | | EpsE -- ^ Epsilon event 60 | 61 | deriving instance (Show ast, Show nts, Show (StripEOF (Sym t)), Show t) => Show (ParseEvent ast nts t) 62 | 63 | instance (Prettify ast, Prettify nts, Prettify (StripEOF (Sym t)), Prettify t) => Prettify (ParseEvent ast nts t) where 64 | prettify e = do 65 | pStr "Terminal Event: " 66 | incrIndent 2 67 | prettify e 68 | incrIndent (-2) 69 | 70 | -- | An Action as seen by the host language (Haskell) is a function from parse 71 | -- events to an abstract-syntax tree that the function constructs based on which 72 | -- non-terminal or terminal symbol was seen. 73 | type Action ast nts t = ParseEvent ast nts t -> ast 74 | 75 | -- | An Icon (as used in first and follow sets of the LL1 parser and the 76 | -- shift-reduce table of the LR1 parser) is just a terminal symbol taken from 77 | -- the grammar, or it's an epsilon or EOF. 78 | data Icon ts = 79 | Icon ts -- ^ Terminal symbol icon 80 | | IconEps -- ^ Epsilon icon 81 | | IconEOF -- ^ EOF (end of file / input) icon 82 | deriving (Generic, Hashable, Show, Eq, Ord, Data, Lift) 83 | 84 | -- | __This__ is the function defining the (n == Sym t == ts) relationship between 85 | -- the __name__ type of a token, the __symbol__ type of a terminal token (as 86 | -- constructed by the tokenizer), and the __terminal symbol__ type as used by the 87 | -- parser. When a parser wants to compare the symbol of an input token to a 88 | -- terminal symbol found in the grammar, it should convert the token to an icon 89 | -- using this function and then compare icons using Eq because icons throw away 90 | -- the value of a token, leaving only the Eq-able piece that we care about. 91 | token2symbol :: Token n v -> TokenSymbol n 92 | token2symbol (Token n v _) = TokenSymbol n 93 | token2symbol EOF = EOFSymbol 94 | token2symbol (Error s) = EOFSymbol 95 | 96 | -- | Tokens are symbolized by an icon containing their name. 97 | instance Ref (Token n v) where 98 | type Sym (Token n v) = TokenSymbol n 99 | getSymbol = token2symbol 100 | 101 | -- | The symbol for some tokenize is either just it's name @n@ or the special EOF symbol. 102 | data TokenSymbol n = 103 | TokenSymbol n -- ^ Named symbol 104 | | EOFSymbol -- ^ End-of-file symbol 105 | deriving (Eq, Ord, Show, Hashable, Generic) 106 | 107 | instance (Prettify n) => Prettify (TokenSymbol n) where 108 | prettify (TokenSymbol n) = do 109 | pStr "TokenSymbol " 110 | prettify n 111 | prettify EOFSymbol = pStr "EOFSymbol" 112 | 113 | -- | A data type with an EOF constructor. There are two things you can do with a 114 | -- data type that has an EOF: 115 | -- 116 | -- > Ask for the type *without* the EOF at compile time 117 | -- > Ask whether or not an instance is the EOF symbol at runtime 118 | -- 119 | class HasEOF t where 120 | -- | The unwrapped type (without the EOF data constructor alternative) 121 | type StripEOF t :: * 122 | -- | Whether or not the given value of type t is the EOF value 123 | isEOF :: t -> Bool 124 | -- | Take a token and try to unwrap its name (an EOF should result in Nothing) 125 | stripEOF :: t -> Maybe (StripEOF t) 126 | 127 | instance HasEOF (TokenSymbol n) where 128 | type StripEOF (TokenSymbol n) = n 129 | 130 | isEOF EOFSymbol = True 131 | isEOF _ = False 132 | 133 | stripEOF EOFSymbol = Nothing 134 | stripEOF (TokenSymbol n) = Just n 135 | 136 | instance HasEOF String where 137 | type StripEOF String = String 138 | 139 | isEOF "" = True 140 | isEOF _ = False 141 | 142 | stripEOF "" = Nothing 143 | stripEOF x = Just x 144 | 145 | instance (Prettify ts) => Prettify (Icon ts) where 146 | prettify IconEps = pStr "iϵ" 147 | prettify IconEOF = pStr "iEOF" 148 | prettify (Icon ts) = do 149 | pStr "Icon " 150 | prettify ts 151 | 152 | -- | Is this a terminal-symbol icon? 153 | isIcon Icon{} = True 154 | isIcon _ = False 155 | 156 | -- | Is this an epsilon icon? 157 | isIconEps IconEps = True 158 | isIconEps _ = False 159 | 160 | -- | Is this the EOF icon? 161 | isIconEOF IconEOF = True 162 | isIconEOF _ = False 163 | 164 | -- | Universal Abstract Syntax Tree data type. All internal AST "nodes" have a 165 | -- nonterminal, the grammar production symbols it reduced from, and the 166 | -- resulting recursively defined AST nodes acquired from the parser. Leaf AST 167 | -- nodes can be either an epsilon (when explicit epsilons are used in the 168 | -- grammar) or more importantly a terminal symbol. 169 | -- __This__ is another type that defines the relationship between the terminal 170 | -- token type @t@ and the terminal symbol type @(ts == Sym t)@ where the AST tells 171 | -- you the production rule that fired containing @ts@ as well as the tokens @t@ 172 | -- contained in leaves of the AST. 173 | data AST nts t = 174 | LeafEps -- ^ Epsilon leaf AST node 175 | | Leaf t -- ^ Terminal token leaf in the AST 176 | | AST nts (ProdElems nts (StripEOF (Sym t))) [AST nts t] -- ^ Internal AST node 177 | deriving (Generic) 178 | 179 | deriving instance (Eq (StripEOF (Sym t)), Eq nts, Eq t) => Eq (AST nts t) 180 | deriving instance (Ord (StripEOF (Sym t)), Ord nts, Ord t) => Ord (AST nts t) 181 | deriving instance (Show (StripEOF (Sym t)), Show nts, Show t) => Show (AST nts t) 182 | deriving instance (Hashable (StripEOF (Sym t)), Hashable nts, Hashable t) => Hashable (AST nts t) 183 | 184 | instance (Prettify nts, Prettify t) => Prettify (AST nts t) where 185 | prettify LeafEps = pStr "ϵ" 186 | prettify (Leaf t) = prettify t 187 | prettify (AST nts ps asts) = do 188 | prettify nts 189 | pStr "{" 190 | prettify asts 191 | pStr "}" 192 | 193 | -- | Default AST-constructor function which just copies over the contents of 194 | -- some parse event into an 'AST'. 195 | event2ast :: ParseEvent (AST nts t) nts t -> AST nts t 196 | event2ast (TermE t) = Leaf t 197 | event2ast (NonTE (nts, ss, asts)) = AST nts ss asts 198 | 199 | -------------------------------------------------------------------------------- /src/Text/ANTLR/Pretty.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleInstances, DefaultSignatures, UndecidableInstances 2 | , OverloadedStrings #-} 3 | {-| 4 | Module : Text.ANTLR.Pretty 5 | Description : A pretty-printing type class to be used across antlr-haskell modules 6 | Copyright : (c) Karl Cronburg, 2018 7 | License : BSD3 8 | Maintainer : karl@cs.tufts.edu 9 | Stability : experimental 10 | Portability : POSIX 11 | 12 | I want to have something like Show whereby every time I add a new type to the 13 | system, I can implement a function that gets called by existing code which 14 | happens to have types that get parametrized by that type. I don't want to 15 | modify an existing file / centralizing all of the types in my system into a 16 | single file makes little sense because then that one file becomes a hub / 17 | single point of failure. 18 | 19 | * I need a typeclass (no modifying existing files, but they need to call my 20 | new code without passing around a new show function) 21 | 22 | * The prettify function of that typeclass needs to return a state monad so 23 | that recursive calls keep the state 24 | 25 | * A pshow function needs to evalState on the prettify function with an 26 | initial indentation of zero (along with any other future state values...) 27 | 28 | -} 29 | module Text.ANTLR.Pretty where 30 | import Control.Monad.Trans.State.Lazy 31 | import qualified Data.Map.Strict as M 32 | import Data.Data (toConstr, Data(..)) 33 | 34 | import qualified Data.Text as T 35 | 36 | -- | Pretty-printing state 37 | data PState = PState 38 | { indent :: Int -- ^ current indentation level 39 | , vis_chrs :: Int -- ^ number of visible characters consumed so far 40 | , str :: T.Text -- ^ the string, 'T.Text', that we've constructed so far 41 | , columns_soft :: Int -- ^ soft limit on number of columns to consume per row 42 | , columns_hard :: Int -- ^ hard limit on number of columns to consume per row 43 | , curr_col :: Int -- ^ column number we're on in the current row of 'str' 44 | , curr_row :: Int -- ^ number of rows (newlines) we've printed to 'str' 45 | } 46 | 47 | -- | The pretty state monad 48 | type PrettyM val = State PState val 49 | 50 | -- | No value being threaded through the monad (because result is in 'str') 51 | type Pretty = PrettyM () 52 | 53 | -- | Define the 'Prettify' type class for your pretty-printable type @t@. 54 | class Prettify t where 55 | {-# MINIMAL prettify #-} 56 | 57 | -- | Defines how to pretty-print some type. 58 | prettify :: t -> Pretty 59 | default prettify :: (Show t) => t -> Pretty 60 | prettify = rshow 61 | 62 | -- | Lists are pretty-printed specially. 63 | prettifyList :: [t] -> Pretty 64 | prettifyList = prettifyList_ 65 | 66 | -- | Initial Pretty state with safe soft and hard column defaults. 67 | initPState = PState 68 | { indent = 0 -- Indentation level 69 | , vis_chrs = 0 -- Number of visible characters consumed. 70 | , str = T.empty -- The string 71 | , columns_soft = 100 -- Soft limit on terminal width. 72 | , columns_hard = 120 -- Hard limit on terminal width. 73 | , curr_col = 0 -- Column position in the current row. 74 | , curr_row = 0 -- Number of newlines seen 75 | } 76 | 77 | -- | Prettify a string by putting it on the end of the current string state 78 | pLine :: T.Text -> Pretty 79 | pLine s = do 80 | pStr s 81 | _pNewLine 82 | 83 | -- | Pretty print a literal string by just printing the string. 84 | pStr' :: String -> Pretty 85 | pStr' = pStr . T.pack 86 | 87 | -- | This currently assumes all input strings contain no newlines, and that this is 88 | -- only called on relatively small strings because strings running over the end 89 | -- of the hard column limit get dumped onto the next line __no matter what__. 90 | -- T.Texts can run over the soft limit, but hitting the soft limit after a call 91 | -- to 'pStr' forces a newline. 92 | pStr :: T.Text -> Pretty 93 | pStr s = do 94 | pstate <- get 95 | _doIf _pNewLine (T.length s + curr_col pstate > columns_hard pstate && curr_col pstate /= 0) 96 | pstate <- get 97 | _doIf _pIndent (curr_col pstate == 0 && indent pstate > 0) 98 | pstate <- get 99 | put $ pstate 100 | { str = T.append (str pstate) s 101 | , curr_col = (curr_col pstate) + T.length s 102 | } 103 | pstate <- get 104 | _doIf _pNewLine (curr_col pstate > columns_soft pstate) 105 | 106 | -- | Print a single character to the output. 107 | pChr :: Char -> Pretty 108 | pChr c = pStr $ T.singleton c 109 | 110 | -- | Gets rid of if-then-else lines in the Pretty monad code: 111 | _doIf fncn True = fncn 112 | _doIf fncn False = return () 113 | 114 | -- | Indent by the number of spaces specified in the state. 115 | _pIndent :: Pretty 116 | _pIndent = do 117 | pstate <- get 118 | put $ pstate 119 | { str = str pstate `T.append` T.replicate (indent pstate) (T.singleton ' ') 120 | , curr_col = curr_col pstate + indent pstate 121 | , vis_chrs = vis_chrs pstate + indent pstate 122 | } 123 | 124 | -- | Insert a newline 125 | _pNewLine :: Pretty 126 | _pNewLine = do 127 | pstate <- get 128 | put $ pstate 129 | { str = T.snoc (str pstate) '\n' 130 | , curr_col = 0 131 | , curr_row = curr_row pstate + 1 132 | } 133 | 134 | -- | Run the pretty-printer, returning a 'T.Text'. 135 | pshow :: (Prettify t) => t -> T.Text 136 | pshow t = str $ execState (prettify t) initPState 137 | 138 | -- | Run the pretty-printer, returning a 'String'. 139 | pshow' :: (Prettify t) => t -> String 140 | pshow' = T.unpack . pshow 141 | 142 | pshowList :: (Prettify t) => [t] -> T.Text 143 | pshowList t = str $ execState (prettifyList t) initPState 144 | 145 | pshowList' :: (Prettify t) => [t] -> String 146 | pshowList' = T.unpack . pshowList 147 | 148 | -- | Run the pretty-printer with a specific indentation level. 149 | pshowIndent :: (Prettify t) => Int -> t -> T.Text 150 | pshowIndent i t = str $ execState (prettify t) $ initPState { indent = i } 151 | 152 | -- | Plain-vanilla show of something in the 'Pretty' state monad. 153 | rshow :: (Show t) => t -> Pretty 154 | rshow t = do 155 | pstate <- get 156 | let s = show t 157 | put $ pstate 158 | { str = str pstate `T.append` T.pack s 159 | , curr_row = curr_row pstate + (T.length . T.filter (== '\n')) (T.pack s) 160 | , curr_col = curr_col pstate -- TODO 161 | } 162 | 163 | -- | Parenthesize something in 'Pretty'. 164 | pParens fncn = do 165 | pChr '(' 166 | fncn 167 | pChr ')' 168 | 169 | -- | Increment the indentation level by modifying the pretty-printer state. 170 | incrIndent :: Int -> Pretty 171 | incrIndent n = do 172 | pstate <- get 173 | put $ pstate { indent = indent pstate + n } 174 | 175 | -- | Like 'incrIndent' but set indentation level instead of incrementing. 176 | setIndent :: Int -> Pretty 177 | setIndent n = do 178 | pstate <- get 179 | put $ pstate { indent = n } 180 | 181 | -- | Prettify the given value and compute the number of characters consumed as a 182 | -- result. 183 | pCount :: (Prettify v) => v -> PrettyM Int 184 | pCount v = do 185 | i0 <- indent <$> get 186 | prettify v 187 | i1 <- indent <$> get 188 | return (i1 - i0) 189 | 190 | -- | Pretty-print a list with one entry per line. 191 | pListLines :: (Prettify v) => [v] -> Pretty 192 | pListLines vs = do 193 | pStr $ T.pack "[ " 194 | col0 <- curr_col <$> get 195 | i0 <- indent <$> get 196 | setIndent (col0 - 2) 197 | sepBy (pLine T.empty >> (pStr $ T.pack ", ")) (map prettify vs) 198 | pLine T.empty >> pChr ']' 199 | setIndent i0 -- Reset indentation back to what it was 200 | 201 | instance (Prettify k, Prettify v) => Prettify (M.Map k v) where 202 | prettify m = do 203 | -- (5 == length of "Map: ") ==> TODO: indentation "discipline" 204 | pStr "Map: "; incrIndent 5 205 | prettify $ M.toList m -- TODO: prettier map 206 | incrIndent (-5) 207 | 208 | instance (Prettify v) => Prettify (Maybe v) where 209 | prettify Nothing = pStr "Nope" 210 | prettify (Just v) = pStr "Yep" >> pParens (prettify v) 211 | 212 | -- | Prettify a list with possibly more than one entry per line. 213 | prettifyList_ [] = pStr "[]" 214 | prettifyList_ vs = do 215 | pChr '[' 216 | sepBy (pStr ", ") (map prettify vs) 217 | pChr ']' 218 | 219 | instance (Prettify v) => Prettify [v] where 220 | prettify = prettifyList 221 | 222 | -- TODO: template haskell-ify for larger tuples 223 | instance (Prettify a, Prettify b) => Prettify (a,b) where 224 | prettify (a,b) = do 225 | pChr '(' 226 | prettify a 227 | pChr ',' 228 | prettify b 229 | pChr ')' 230 | 231 | instance (Prettify a, Prettify b, Prettify c) => Prettify (a,b,c) where 232 | prettify (a,b,c) = do 233 | pChr '(' 234 | prettify a 235 | pChr ',' 236 | prettify b 237 | pChr ',' 238 | prettify c 239 | pChr ')' 240 | 241 | instance (Prettify a, Prettify b, Prettify c, Prettify d) => Prettify (a,b,c,d) where 242 | prettify (a,b,c,d) = do 243 | pChr '(' 244 | prettify a 245 | pChr ',' 246 | prettify b 247 | pChr ',' 248 | prettify c 249 | pChr ',' 250 | prettify d 251 | pChr ')' 252 | 253 | -- | Pretty-print a list of values, separated by some other pretty-printer. 254 | sepBy s [] = return () 255 | sepBy s (v:vs) = foldl (_sepBy s) v vs 256 | 257 | -- | Reorder pretty-printer bind. 258 | _sepBy s ma mb = ma >> s >> mb 259 | 260 | instance Prettify Char where 261 | prettify = pChr 262 | prettifyList = pStr . T.pack 263 | 264 | instance Prettify () where prettify = rshow 265 | instance Prettify Bool where prettify = rshow 266 | instance Prettify Int where prettify = rshow 267 | 268 | instance Prettify Double where prettify = rshow 269 | 270 | -------------------------------------------------------------------------------- /src/Text/ANTLR/Set.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GADTs, DeriveAnyClass, DeriveGeneric, OverloadedStrings, DeriveLift 2 | , QuasiQuotes, TemplateHaskell, DeriveDataTypeable #-} 3 | {-| 4 | Module : Text.ANTLR.Set 5 | Description : Entrypoint for swapping out different underlying set representations 6 | Copyright : (c) Karl Cronburg, 2018 7 | License : BSD3 8 | Maintainer : karl@cs.tufts.edu 9 | Stability : experimental 10 | Portability : POSIX 11 | 12 | -} 13 | module Text.ANTLR.Set 14 | ( Set, null, size, member, notMember 15 | , empty, singleton, insert, delete, union, unions 16 | , difference, intersection, filter, map, foldr, foldl', fold 17 | , toList, fromList, (\\), findMin, maybeMin 18 | , Hashable(..), Generic(..) 19 | ) where 20 | import Text.ANTLR.Pretty 21 | 22 | import GHC.Generics (Generic, Rep) 23 | import Data.Hashable (Hashable(..)) 24 | import Language.Haskell.TH.Syntax (Lift(..)) 25 | 26 | import qualified Data.Functor as F 27 | import qualified Control.Applicative as A 28 | import qualified Data.Foldable as Foldable 29 | 30 | import Data.Map ( Map(..) ) 31 | import qualified Data.Map as M 32 | 33 | import qualified Data.HashSet as S 34 | import Data.HashSet as S 35 | ( HashSet(..), member, toList, union 36 | , null, empty, map, size, singleton, insert 37 | , delete, unions, difference, intersection, foldl' 38 | , fromList 39 | ) 40 | 41 | import Prelude hiding (null, filter, map, foldr, foldl) 42 | 43 | -- | Use a hash-based set (hashable keys) for our internal set representation 44 | -- during parsing. 45 | type Set = S.HashSet 46 | 47 | -- | Is @e@ not a member of the set @s@. 48 | notMember e s = not $ member e s 49 | 50 | -- | Set fold 51 | fold = S.foldr 52 | 53 | -- | Set fold 54 | foldr = S.foldr 55 | 56 | -- | Find the minimum value of an orderable set. 57 | findMin :: (Ord a, Hashable a) => Set a -> a 58 | findMin = minimum . toList 59 | 60 | --maybeMin :: (Ord a, Hashable a) => Set a -> Maybe a 61 | -- | Get minimum of a set without erroring out on empty set. 62 | maybeMin as 63 | | S.size as == 0 = Nothing 64 | | otherwise = Just $ findMin as 65 | 66 | infixl 9 \\ 67 | 68 | -- | Set difference 69 | (\\) :: (Hashable a, Eq a) => Set a -> Set a -> Set a 70 | m1 \\ m2 = difference m1 m2 71 | 72 | instance (Hashable a, Eq a, Lift a) => Lift (S.HashSet a) where 73 | lift set = [| fromList $(lift $ toList set) |] 74 | 75 | instance (Hashable k, Hashable v) => Hashable (Map k v) where 76 | hashWithSalt salt mp = salt `hashWithSalt` M.toList mp 77 | 78 | instance (Prettify a, Hashable a, Eq a) => Prettify (S.HashSet a) where 79 | prettify s = do 80 | pStr "Set: "; incrIndent 5 81 | pListLines $ toList s 82 | incrIndent (-5) 83 | pLine "" 84 | 85 | --filter :: (Hashable a, Eq a) => (a -> Bool) -> Set a -> Set a 86 | -- | Set filter 87 | filter f s = S.filter f s 88 | 89 | --instance (Hashable a, Eq a) => Hashable (S.HashSet a) where 90 | -- hashWithSalt salt set = salt `hashWithSalt` S.toList (run set) 91 | 92 | 93 | {- 94 | 95 | --import Data.Set.Monad (Set(..), member, toList, union, notMember) 96 | --import qualified Data.Set.Monad as Set 97 | 98 | import Prelude hiding (null, filter, map, foldr, foldl) 99 | import qualified Data.List as L 100 | --import qualified Data.Set as S 101 | import qualified Data.Functor as F 102 | import qualified Control.Applicative as A 103 | import qualified Data.Foldable as Foldable 104 | 105 | import Data.Monoid 106 | import Data.Foldable (Foldable) 107 | import Control.Arrow 108 | import Control.Monad 109 | import Control.DeepSeq 110 | 111 | import Data.Hashable (Hashable(..)) 112 | import GHC.Generics (Generic, Rep) 113 | import Control.DeepSeq (NFData(..)) 114 | import Language.Haskell.TH.Syntax (Lift(..)) 115 | import Data.Data (Data(..)) 116 | 117 | import Data.Map ( Map(..) ) 118 | import qualified Data.Map as M 119 | 120 | import Text.ANTLR.Pretty 121 | 122 | instance (Hashable k, Hashable v) => Hashable (Map k v) where 123 | hashWithSalt salt mp = salt `hashWithSalt` M.toList mp 124 | 125 | instance (Hashable a, Eq a) => Hashable (Set a) where 126 | hashWithSalt salt set = salt `hashWithSalt` S.toList (run set) 127 | 128 | instance (Hashable a, Ord a) => Ord (Set a) where 129 | s1 <= s2 = S.toList (run s1) <= S.toList (run s2) 130 | 131 | data Set a where 132 | Prim :: (Hashable a, Eq a) => S.HashSet a -> Set a 133 | Return :: a -> Set a 134 | Bind :: Set a -> (a -> Set b) -> Set b 135 | Zero :: Set a 136 | Plus :: Set a -> Set a -> Set a 137 | 138 | instance (Data a) => Data (Set a) 139 | 140 | instance (Hashable a, Eq a, Lift a) => Lift (Set a) where 141 | lift set = [| fromList $(lift $ toList set) |] 142 | 143 | run :: (Hashable a, Eq a) => Set a -> S.HashSet a 144 | run (Prim s) = s 145 | run (Return a) = S.singleton a 146 | run (Zero) = S.empty 147 | run (Plus ma mb) = run ma `S.union` run mb 148 | run (Bind (Prim s) f) = S.foldl' S.union S.empty (S.map (run . f) s) 149 | run (Bind (Return a) f) = run (f a) 150 | run (Bind Zero _) = S.empty 151 | run (Bind (Plus (Prim s) ma) f) = run (Bind (Prim (s `S.union` run ma)) f) 152 | run (Bind (Plus ma (Prim s)) f) = run (Bind (Prim (run ma `S.union` s)) f) 153 | run (Bind (Plus (Return a) ma) f) = run (Plus (f a) (Bind ma f)) 154 | run (Bind (Plus ma (Return a)) f) = run (Plus (Bind ma f) (f a)) 155 | run (Bind (Plus Zero ma) f) = run (Bind ma f) 156 | run (Bind (Plus ma Zero) f) = run (Bind ma f) 157 | run (Bind (Plus (Plus ma mb) mc) f) = run (Bind (Plus ma (Plus mb mc)) f) 158 | run (Bind (Plus ma mb) f) = run (Plus (Bind ma f) (Bind mb f)) 159 | run (Bind (Bind ma f) g) = run (Bind ma (\a -> Bind (f a) g)) 160 | 161 | instance F.Functor Set where 162 | fmap = liftM 163 | 164 | instance A.Applicative Set where 165 | pure = return 166 | (<*>) = ap 167 | 168 | instance A.Alternative Set where 169 | empty = Zero 170 | (<|>) = Plus 171 | 172 | instance Monad Set where 173 | return = Return 174 | (>>=) = Bind 175 | 176 | instance MonadPlus Set where 177 | mzero = Zero 178 | mplus = Plus 179 | 180 | instance (Hashable a, Eq a) => Monoid (Set a) where 181 | mempty = empty 182 | mappend = union 183 | mconcat = unions 184 | 185 | instance Foldable Set where 186 | foldr f def m = 187 | case m of 188 | Prim s -> S.foldr f def s 189 | Return a -> f a def 190 | Zero -> def 191 | Plus ma mb -> Foldable.foldr f (Foldable.foldr f def ma) mb 192 | Bind s g -> Foldable.foldr f' def s 193 | where f' x b = Foldable.foldr f b (g x) 194 | 195 | instance (Hashable a, Eq a) => Eq (Set a) where 196 | s1 == s2 = run s1 == run s2 197 | 198 | --instance (Hashable a, Eq a, Ord a) => Ord (Set a) where 199 | -- compare s1 s2 = compare (run s1) (run s2) 200 | 201 | instance (Show a, Hashable a, Eq a) => Show (Set a) where 202 | show = show . run 203 | 204 | instance (Prettify a, Hashable a, Eq a) => Prettify (Set a) where 205 | prettify s = do 206 | pStr "Set: "; incrIndent 5 207 | pListLines $ toList s 208 | incrIndent (-5) 209 | pLine "" 210 | 211 | instance (Read a, Hashable a, Eq a) => Read (Set a) where 212 | readsPrec i s = L.map (first Prim) (readsPrec i s) 213 | 214 | instance (NFData a, Hashable a, Eq a) => NFData (Set a) where 215 | rnf = rnf . run 216 | 217 | infixl 9 \\ 218 | 219 | (\\) :: (Hashable a, Eq a) => Set a -> Set a -> Set a 220 | m1 \\ m2 = difference m1 m2 221 | 222 | null :: (Hashable a, Eq a) => Set a -> Bool 223 | null = S.null . run 224 | 225 | size :: (Hashable a, Eq a) => Set a -> Int 226 | size = S.size . run 227 | 228 | member :: (Hashable a, Eq a) => a -> Set a -> Bool 229 | member a s = S.member a (run s) 230 | 231 | notMember :: (Hashable a, Eq a) => a -> Set a -> Bool 232 | notMember a t = not (member a t) 233 | 234 | empty :: (Hashable a, Eq a) => Set a 235 | empty = Prim S.empty 236 | 237 | singleton :: (Hashable a, Eq a) => a -> Set a 238 | singleton a = Prim (S.singleton a) 239 | 240 | insert :: (Hashable a, Eq a) => a -> Set a -> Set a 241 | insert a s = Prim (S.insert a (run s)) 242 | 243 | delete :: (Hashable a, Eq a) => a -> Set a -> Set a 244 | delete a s = Prim (S.delete a (run s)) 245 | 246 | union :: (Hashable a, Eq a) => Set a -> Set a -> Set a 247 | union s1 s2 = Prim (run s1 `S.union` run s2) 248 | 249 | unions :: (Hashable a, Eq a) => [Set a] -> Set a 250 | unions ss = Prim (S.unions (L.map run ss)) 251 | 252 | difference :: (Hashable a, Eq a) => Set a -> Set a -> Set a 253 | difference s1 s2 = Prim (S.difference (run s1) (run s2)) 254 | 255 | intersection :: (Hashable a, Eq a) => Set a -> Set a -> Set a 256 | intersection s1 s2 = Prim (S.intersection (run s1) (run s2)) 257 | 258 | filter :: (Hashable a, Eq a) => (a -> Bool) -> Set a -> Set a 259 | filter f s = Prim (S.filter f (run s)) 260 | 261 | map :: (Hashable a, Eq a, Hashable b, Eq b) => (a -> b) -> Set a -> Set b 262 | map f s = Prim (S.map f (run s)) 263 | 264 | foldr :: (Hashable a, Eq a) => (a -> b -> b) -> b -> Set a -> b 265 | foldr f z s = S.foldr f z (run s) 266 | 267 | fold :: (Hashable a, Eq a) => (a -> b -> b) -> b -> Set a -> b 268 | fold f z s = S.foldr f z (run s) 269 | 270 | foldl' :: (Hashable a, Eq a) => (b -> a -> b) -> b -> Set a -> b 271 | foldl' f z s = S.foldl' f z (run s) 272 | 273 | toList :: (Hashable a, Eq a) => Set a -> [a] 274 | toList = S.toList . run 275 | 276 | fromList :: (Hashable a, Eq a) => [a] -> Set a 277 | fromList as = Prim (S.fromList as) 278 | 279 | findMin :: (Ord a, Hashable a) => Set a -> a 280 | findMin = minimum . toList 281 | 282 | maybeMin :: (Ord a, Hashable a) => Set a -> Maybe a 283 | maybeMin as 284 | | size as == 0 = Nothing 285 | | otherwise = Just $ findMin as 286 | 287 | -} 288 | 289 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | # resolver: nightly-2019-04-10 2 | resolver: lts-16.15 3 | 4 | packages: 5 | - . 6 | 7 | -------------------------------------------------------------------------------- /test/GrammarReader/GrammarReader.hs: -------------------------------------------------------------------------------- 1 | import Text.ParserCombinators.Parsec as Parsec 2 | import Data.Char 3 | import ParserGenerator.AllStar 4 | 5 | --Functions for parsing a context-free grammar in BNF form 6 | 7 | grammarFile = endBy production eol 8 | 9 | production = 10 | do l <- nonterminal 11 | skipMany spacesAndTabs 12 | string "::=" 13 | skipMany spacesAndTabs 14 | r <- rightSide 15 | return (l, r) 16 | 17 | rightSide = sepBy expansion expansionSep 18 | 19 | expansion = many symbol 20 | 21 | expansionSep = 22 | do skipMany spacesAndTabs 23 | char '|' 24 | skipMany spacesAndTabs 25 | 26 | symbol = nonterminal <|> terminal 27 | 28 | nonterminal = upper 29 | 30 | terminal = lower 31 | 32 | spacesAndTabs = oneOf " \t" 33 | 34 | eol = char '\n' 35 | 36 | 37 | parseGrammar :: String -> Either ParseError [(Char, [String])] 38 | parseGrammar input = Parsec.parse grammarFile "(unknown)" input 39 | 40 | 41 | --Functions for converting a parsed grammar to an ATN environment 42 | 43 | grammarToATNEnv g = 44 | case parseGrammar g of 45 | Left e -> error "could not parse grammar" 46 | Right productions -> 47 | let (nonterminals, _) = unzip productions 48 | in zip nonterminals (map productionToATN productions) 49 | 50 | 51 | productionToATN (nt, expansions) = 52 | map (\(n, exp) -> expansionToATNPath nt exp n) (zip [1..] expansions) 53 | 54 | expansionToATNPath nt exp choiceNum = 55 | let buildMiddleAndFinalStates n symbols = 56 | case symbols of 57 | [] -> [(MIDDLE n, EPS, FINAL nt)] 58 | s : symbols' -> 59 | let edgeLabel = if isUpper s then NT s else T s 60 | in (MIDDLE n, edgeLabel, MIDDLE (n + 1)) : buildMiddleAndFinalStates (n + 1) symbols' 61 | in (INIT nt, EPS, CHOICE nt choiceNum) : 62 | (CHOICE nt choiceNum, EPS, MIDDLE 1) : 63 | buildMiddleAndFinalStates 1 exp 64 | 65 | main = 66 | do g <- getContents 67 | putStrLn (show (grammarToATNEnv g)) 68 | -------------------------------------------------------------------------------- /test/GrammarReader/output/generated_atn_lookup.txt: -------------------------------------------------------------------------------- 1 | [('S',[[(INIT 'S',EPS,CHOICE 'S' 1),(CHOICE 'S' 1,EPS,MIDDLE 1),(MIDDLE 1,NT 'A',MIDDLE 2),(MIDDLE 2,T 'c',MIDDLE 3),(MIDDLE 3,EPS,FINAL 'S')],[(INIT 'S',EPS,CHOICE 'S' 2),(CHOICE 'S' 2,EPS,MIDDLE 1),(MIDDLE 1,NT 'A',MIDDLE 2),(MIDDLE 2,T 'd',MIDDLE 3),(MIDDLE 3,EPS,FINAL 'S')]]),('A',[[(INIT 'A',EPS,CHOICE 'A' 1),(CHOICE 'A' 1,EPS,MIDDLE 1),(MIDDLE 1,T 'a',MIDDLE 2),(MIDDLE 2,NT 'A',MIDDLE 3),(MIDDLE 3,EPS,FINAL 'A')],[(INIT 'A',EPS,CHOICE 'A' 2),(CHOICE 'A' 2,EPS,MIDDLE 1),(MIDDLE 1,T 'b',MIDDLE 2),(MIDDLE 2,EPS,FINAL 'A')]])] 2 | -------------------------------------------------------------------------------- /test/GrammarReader/sample_grammars/sample_bnf_grammar.txt: -------------------------------------------------------------------------------- 1 | S ::= Ac | Ad 2 | A ::= aA | b 3 | 4 | -------------------------------------------------------------------------------- /test/allstar/AllStarTests.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TypeFamilies #-} 2 | 3 | module AllStarTests where 4 | 5 | --import Test.HUnit 6 | import Text.ANTLR.Allstar.ParserGenerator 7 | import qualified Data.Set as DS 8 | import Text.ANTLR.Parser (HasEOF(..)) 9 | import Text.ANTLR.Grammar (Ref(..)) 10 | 11 | import Test.Framework 12 | import Test.Framework.Providers.HUnit 13 | import Text.ANTLR.HUnit 14 | import Text.ANTLR.Pretty 15 | 16 | --------------------------------TESTING----------------------------------------- 17 | 18 | {- instance Token Char where 19 | type Label Char = Char 20 | type Literal Char = Char 21 | getLabel c = c 22 | getLiteral c = c 23 | 24 | instance Token (a, b) where 25 | type Label (a, b) = a 26 | type Literal (a, b) = b 27 | getLabel (a, b) = a 28 | getLiteral (a, b) = b -} 29 | 30 | instance (Show a, Show b) => Prettify (Either a b) where prettify = rshow 31 | 32 | instance Ref Char where 33 | type Sym Char = Char 34 | getSymbol = id 35 | 36 | instance HasEOF Char where 37 | type StripEOF Char = Char 38 | isEOF c = False 39 | stripEOF c = Just c 40 | 41 | dumbTokenizer [] = [] 42 | dumbTokenizer (t:ts) = [(t,ts)] 43 | 44 | atnEnv = DS.fromList [ -- First path through the 'S' ATN 45 | (Init 'S', GS EPS, Middle 'S' 0 0), 46 | (Middle 'S' 0 0, GS (NT 'A'), Middle 'S' 0 1), 47 | (Middle 'S' 0 1, GS (T 'c'), Middle 'S' 0 2), 48 | (Middle 'S' 0 2, GS EPS, Final 'S'), 49 | 50 | -- Second path through the 'S' ATN 51 | (Init 'S', GS EPS, Middle 'S' 1 0), 52 | (Middle 'S' 1 0, GS (NT 'A'), Middle 'S' 1 1), 53 | (Middle 'S' 1 1, GS (T 'd'), Middle 'S' 1 2), 54 | (Middle 'S' 1 2, GS EPS, Final 'S'), 55 | 56 | -- First path through the 'A' ATN 57 | (Init 'A', GS EPS, Middle 'A' 0 0), 58 | (Middle 'A' 0 0, GS (T 'a'), Middle 'A' 0 1), 59 | (Middle 'A' 0 1, GS (NT 'A'), Middle 'A' 0 2), 60 | (Middle 'A' 0 2, GS EPS, Final 'A'), 61 | 62 | -- Second path through the 'A' ATN 63 | (Init 'A', GS EPS, Middle 'A' 1 0), 64 | (Middle 'A' 1 0, GS (T 'b'), Middle 'A' 1 1), 65 | (Middle 'A' 1 1, GS EPS, Final 'A')] 66 | 67 | 68 | -- For now, I'm only checking whether the input was accepted--not checking the derivation. 69 | 70 | -- Example from the manual trace of ALL(*)'s execution 71 | parseTest1 = ((@=?) --"for parse dumbTokenizer [a, b, c]," 72 | (Right (Node 'S' [NT 'A', T 'c'] 73 | [Node 'A' [T 'a', NT 'A'] 74 | [Leaf 'a', 75 | Node 'A' [T 'b'] 76 | [Leaf 'b']], 77 | Leaf 'c'])) 78 | (parse dumbTokenizer ['a', 'b', 'c'] (NT 'S') atnEnv True)) 79 | 80 | -- Example #1 from the ALL(*) paper 81 | parseTest2 = ((@=?) --"for parse dumbTokenizer [b, c]," 82 | (Right (Node 'S' [NT 'A', T 'c'] 83 | [Node 'A' [T 'b'] 84 | [Leaf 'b'], 85 | Leaf 'c'])) 86 | (parse dumbTokenizer ['b', 'c'] (NT 'S') atnEnv True)) 87 | 88 | -- Example #2 from the ALL(*) paper 89 | parseTest3 = ((@=?) --"for parse dumbTokenizer [b, d]," 90 | (Right (Node 'S' [NT 'A', T 'd'] 91 | [Node 'A' [T 'b'] 92 | [Leaf 'b'], 93 | Leaf 'd'])) 94 | (parse dumbTokenizer ['b', 'd'] (NT 'S') atnEnv True)) 95 | 96 | -- Input that requires more recursive traversals of the A ATN 97 | parseTest4 = ((@=?) --"for parse dumbTokenizer [a a a b c]," 98 | (Right (Node 'S' [NT 'A', T 'c'] 99 | [Node 'A' [T 'a', NT 'A'] 100 | [Leaf 'a', 101 | Node 'A' [T 'a', NT 'A'] 102 | [Leaf 'a', 103 | Node 'A' [T 'a', NT 'A'] 104 | [Leaf 'a', 105 | Node 'A' [T 'b'] 106 | [Leaf 'b']]]], 107 | Leaf 'c'])) 108 | (parse dumbTokenizer ['a', 'a', 'a', 'b', 'c'] (NT 'S') atnEnv True)) 109 | 110 | -- Make sure that the result of parsing an out-of-language string has a Left tag. 111 | parseTest5 = ((@=?) --"for parse dumbTokenizer [a b a c]," 112 | True 113 | (let parseResult = parse dumbTokenizer ['a', 'b', 'a', 'c'] (NT 'S') atnEnv True 114 | isLeft pr = case pr of 115 | Left _ -> True 116 | _ -> False 117 | in isLeft parseResult)) 118 | 119 | -- To do: Update these tests so that they use the new ATN state representation. 120 | {- 121 | 122 | conflictsTest = ((@=?) --"for getConflictSetsPerLoc()" 123 | 124 | ([[(MIDDLE 5, 1, []), (MIDDLE 5, 2, []),(MIDDLE 5, 3, [])], 125 | [(MIDDLE 5, 1, [MIDDLE 1]), (MIDDLE 5, 2, [MIDDLE 1])], 126 | [(MIDDLE 7, 2, [MIDDLE 6, MIDDLE 1])]] :: [[ATNConfig Char]]) 127 | 128 | (getConflictSetsPerLoc (D [(MIDDLE 5, 1, []), 129 | (MIDDLE 5, 2, []), 130 | (MIDDLE 5, 3, []), 131 | (MIDDLE 5, 1, [MIDDLE 1]), 132 | (MIDDLE 5, 2, [MIDDLE 1]), 133 | (MIDDLE 7, 2, [MIDDLE 6, MIDDLE 1])]))) 134 | 135 | prodsTest = ((@=?) --"for getProdSetsPerState()" 136 | 137 | ([[(MIDDLE 5, 1, []), 138 | (MIDDLE 5, 2, []), 139 | (MIDDLE 5, 3, []), 140 | (MIDDLE 5, 1, [MIDDLE 1]), 141 | (MIDDLE 5, 2, [MIDDLE 1])], 142 | 143 | [(MIDDLE 7, 2, [MIDDLE 6, MIDDLE 1])]] :: [[ATNConfig Char]]) 144 | 145 | (getProdSetsPerState (D [(MIDDLE 5, 1, []), 146 | (MIDDLE 5, 2, []), 147 | (MIDDLE 5, 3, []), 148 | (MIDDLE 5, 1, [MIDDLE 1]), 149 | (MIDDLE 5, 2, [MIDDLE 1]), 150 | (MIDDLE 7, 2, [MIDDLE 6, MIDDLE 1])]))) 151 | 152 | -} 153 | 154 | 155 | ambigATNEnv = DS.fromList [(Init 'S', GS EPS, Middle 'S' 0 0), 156 | (Middle 'S' 0 0, GS (T 'a'), Middle 'S' 0 1), 157 | (Middle 'S' 0 1, GS EPS, Final 'S'), 158 | 159 | (Init 'S', GS EPS, Middle 'S' 1 0), 160 | (Middle 'S' 1 0, GS (T 'a'), Middle 'S' 1 1), 161 | (Middle 'S' 1 1, GS EPS, Final 'S'), 162 | 163 | (Init 'S', GS EPS, Middle 'S' 2 0), 164 | (Middle 'S' 2 0, GS (T 'a'), Middle 'S' 2 1), 165 | (Middle 'S' 2 1, GS (T 'b'), Middle 'S' 2 2), 166 | (Middle 'S' 2 2, GS EPS, Final 'S')] 167 | 168 | ambigParseTest1 = ((@=?) --"for parse dumbTokenizer [a]," 169 | True 170 | (let parseResult = parse dumbTokenizer ['a'] (NT 'S') ambigATNEnv True 171 | isLeft pr = case pr of 172 | Left _ -> True 173 | _ -> False 174 | in isLeft parseResult)) 175 | 176 | ambigParseTest2 = ((@=?) --"for parse dumbTokenizer [a b]," 177 | (Right (Node 'S' [T 'a', T 'b'] 178 | [Leaf 'a', 179 | Leaf 'b'])) 180 | (parse dumbTokenizer ['a', 'b'] (NT 'S') ambigATNEnv True)) 181 | 182 | 183 | tests = [testCase "parseTest1" parseTest1, 184 | testCase "parseTest2" parseTest2, 185 | testCase "parseTest3" parseTest3, 186 | testCase "parseTest4" parseTest4, 187 | testCase "parseTest5" parseTest5, 188 | 189 | --testCase "conflictsTest" conflictsTest, 190 | --testCase "prodsTest" prodsTest, 191 | 192 | testCase "ambigParseTest1" ambigParseTest1, 193 | testCase "ambigParseTest2" ambigParseTest2] 194 | 195 | --main = runTestTT (TestList tests) 196 | -------------------------------------------------------------------------------- /test/allstar/ConvertDFA.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell #-} 4 | module ConvertDFA where 5 | import Language.ANTLR4 6 | 7 | mkInt :: String -> Int 8 | mkInt _ = 3 9 | 10 | data ConvertIt = 11 | Start String 12 | | End Int 13 | deriving (Eq, Ord, Show) 14 | 15 | $( return [] ) 16 | 17 | [g4| 18 | grammar Convert; 19 | 20 | root : 'START' LexemeA -> Start 21 | | 'END' LexemeB -> End 22 | ; 23 | 24 | LexemeA : 'abc' -> String ; 25 | LexemeB : 'efg' -> mkInt ; 26 | |] 27 | 28 | -------------------------------------------------------------------------------- /test/allstar/ConvertP.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-} 4 | module ConvertP 5 | ( module ConvertDFA 6 | , module ConvertP 7 | ) where 8 | import Language.ANTLR4 9 | import ConvertDFA 10 | 11 | $(g4_parsers convertAST convertGrammar) 12 | 13 | -------------------------------------------------------------------------------- /test/allstar/Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import System.IO.Unsafe (unsafePerformIO) 4 | import Data.Monoid 5 | import Test.Framework 6 | import Test.Framework.Providers.HUnit 7 | import Test.Framework.Providers.QuickCheck2 8 | import Test.HUnit 9 | import Test.QuickCheck (Property, quickCheck, (==>)) 10 | import qualified Test.QuickCheck.Monadic as TQM 11 | 12 | import qualified AllStarTests 13 | import qualified ConvertP 14 | import ConvertP (ConvertIt(..)) 15 | 16 | convertP_simple = 17 | case ConvertP.allstarParse (const False) "STARTabc" of 18 | (Right ast) -> ConvertP.ast2root ast @=? (Start "abc") 19 | (Left err) -> assertFailure err 20 | 21 | convertP_simple2 = 22 | case ConvertP.allstarParse (const False) "ENDefg" of 23 | (Right ast) -> ConvertP.ast2root ast @=? (End 3) 24 | (Left err) -> assertFailure err 25 | 26 | tests = 27 | [ testCase "convertP_simple" convertP_simple 28 | , testCase "convertP_simple2" convertP_simple2 29 | ] 30 | 31 | main :: IO () 32 | main = defaultMainWithOpts 33 | (AllStarTests.tests ++ tests) 34 | mempty 35 | 36 | -------------------------------------------------------------------------------- /test/allstar/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Sample grammar for ALL(\*) 3 | 4 | ``` 5 | S -> Ac | Ad 6 | 7 | A -> aA | b 8 | ``` 9 | 10 | ### ALL(\*) Input/output examples 11 | 12 | ```haskell 13 | *Test.AllStarTests> parse ['a', 'b', 'c'] (NT 'S') atnEnv 14 | (Just True, Node 'S' [Node 'A' [Leaf 'a', Node 'A' [Leaf 'b']], Leaf 'c']) 15 | ``` 16 | 17 | ```haskell 18 | *Test.AllStarTests> parse ['b', 'd'] (NT 'S') atnEnv 19 | (Just True, Node 'S' [Node 'A' [Leaf 'b'], Leaf 'd']) 20 | ``` 21 | 22 | ```haskell 23 | *Test.AllStarTests> parse ['a', 'a', 'a', 'a', 'b', 'c'] (NT 'S') atnEnv 24 | (Just True, Node 'S' [Node 'A' [Leaf 'a', Node 'A' [Leaf 'a', Node 'A' [Leaf 'a', Node 'A' [Leaf 'a', Nod 25 | ``` 26 | 27 | -------------------------------------------------------------------------------- /test/atn/ATN.hs: -------------------------------------------------------------------------------- 1 | module ATN where 2 | 3 | import Text.ANTLR.Grammar 4 | import Text.ANTLR.Allstar.ATN 5 | import Example.Grammar 6 | import Text.ANTLR.Set (fromList, union) 7 | import System.IO.Unsafe (unsafePerformIO) 8 | 9 | -- This is the Grammar from page 6 of the 10 | -- 'Adaptive LL(*) Parsing: The Power of Dynamic Analysis' 11 | -- paper, namely the expected transitions based on Figures 12 | -- 5 through 8: 13 | paperATNGrammar = (defaultGrammar "C" :: Grammar () String String ()) 14 | { ns = fromList ["S", "A"] 15 | , ts = fromList ["a", "b", "c", "d"] 16 | , s0 = "C" 17 | , ps = 18 | [ production "S" $ Prod Pass [NT "A", T "c"] 19 | , production "S" $ Prod Pass [NT "A", T "d"] 20 | , production "A" $ Prod Pass [T "a", NT "A"] 21 | , production "A" $ Prod Pass [T "b"] 22 | ] 23 | } 24 | 25 | s i = ("S", i) 26 | 27 | -- Names as shown in paper: 28 | pS = Start "S" 29 | pS1 = Middle "S" 0 0 30 | p1 = Middle "S" 0 1 31 | p2 = Middle "S" 0 2 32 | pS2 = Middle "S" 1 0 33 | p3 = Middle "S" 1 1 34 | p4 = Middle "S" 1 2 35 | pS' = Accept "S" 36 | 37 | pA = Start "A" 38 | pA1 = Middle "A" 2 0 39 | p5 = Middle "A" 2 1 40 | p6 = Middle "A" 2 2 41 | pA2 = Middle "A" 3 0 42 | p7 = Middle "A" 3 1 43 | pA' = Accept "A" 44 | 45 | exp_paperATN = ATN 46 | { _Δ = fromList 47 | -- Submachine for S: 48 | [ (pS, Epsilon, pS1) 49 | , (pS1, NTE "A", p1) 50 | , (p1, TE "c", p2) 51 | , (p2, Epsilon, pS') 52 | , (pS, Epsilon, pS2) 53 | , (pS2, NTE "A", p3) 54 | , (p3, TE "d", p4) 55 | , (p4, Epsilon, pS') 56 | -- Submachine for A: 57 | , (pA, Epsilon, pA1) 58 | , (pA1, TE "a", p5) 59 | , (p5, NTE "A", p6) 60 | , (p6, Epsilon, pA') 61 | , (pA, Epsilon, pA2) 62 | , (pA2, TE "b", p7) 63 | , (p7, Epsilon, pA') 64 | ] 65 | } 66 | 67 | --always _ = True 68 | --never _ = False 69 | 70 | addPredicates = paperATNGrammar 71 | { ps = 72 | ps paperATNGrammar ++ 73 | [ production "A" $ Prod (Sem (Predicate "always" ())) [T "a"] 74 | , production "A" $ Prod (Sem (Predicate "never" ())) [] 75 | , production "A" $ Prod (Sem (Predicate "always2" ())) [NT "A", T "a"] 76 | ] 77 | } 78 | 79 | (pX,pY,pZ) = (Start "A", Start "A", Start "A") 80 | pX1 = Middle "A" 4 2 81 | pX2 = Middle "A" 4 0 82 | pX3 = Middle "A" 4 1 83 | pX4 = Accept "A" 84 | 85 | pY1 = Middle "A" 5 1 86 | pY2 = Middle "A" 5 0 87 | pY3 = Accept "A" 88 | 89 | pZ1 = Middle "A" 6 3 90 | pZ2 = Middle "A" 6 0 91 | pZ3 = Middle "A" 6 1 92 | pZ4 = Middle "A" 6 2 93 | pZ5 = Accept "A" 94 | 95 | exp_addPredicates = ATN 96 | { _Δ = union (_Δ exp_paperATN) $ fromList 97 | [ (pX, Epsilon, pX1) 98 | , (pX1, PE $ Predicate "always" (), pX2) 99 | , (pX2, TE "a", pX3) 100 | , (pX3, Epsilon, pX4) 101 | 102 | , (pY, Epsilon, pY1) 103 | , (pY1, PE $ Predicate "never" (), pY2) 104 | , (pY2, Epsilon, pY3) 105 | 106 | , (pZ, Epsilon, pZ1) 107 | , (pZ1, PE $ Predicate "always2" (), pZ2) 108 | , (pZ2, NTE "A", pZ3) 109 | , (pZ3, TE "a", pZ4) 110 | , (pZ4, Epsilon, pZ5) 111 | ] 112 | } 113 | 114 | fireZeMissiles state = seq 115 | (unsafePerformIO $ putStrLn "Missiles fired.") 116 | undefined 117 | 118 | addMutators = addPredicates 119 | { ps = ps addPredicates ++ 120 | [ production "A" $ Prod (Action (Mutator "fireZeMissiles" ())) [] 121 | , production "S" $ Prod (Action (Mutator "identity" ())) [] 122 | ] 123 | } 124 | 125 | exp_addMutators = ATN 126 | { _Δ = union (_Δ exp_addPredicates) $ fromList 127 | [ (Start "A", Epsilon, Middle "A" 7 0) 128 | , (Middle "A" 7 0, ME $ Mutator "fireZeMissiles" (), Accept "A") 129 | , (Start "S", Epsilon, Middle "S" 8 0) 130 | , (Middle "S" 8 0, ME $ Mutator "identity" (), Accept "S") 131 | ] 132 | } 133 | 134 | 135 | -------------------------------------------------------------------------------- /test/atn/Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | import Text.ANTLR.Allstar.ATN 3 | import ATN 4 | import Text.ANTLR.Set (fromList, (\\)) 5 | 6 | import System.IO.Unsafe (unsafePerformIO) 7 | import Data.Monoid 8 | import Test.Framework 9 | import Test.Framework.Providers.HUnit 10 | import Test.Framework.Providers.QuickCheck2 11 | import Test.HUnit 12 | import Test.QuickCheck (Property, quickCheck, (==>)) 13 | import qualified Test.QuickCheck.Monadic as TQM 14 | 15 | -- ATNs should be same: 16 | test_paperATNGrammar = 17 | atnOf paperATNGrammar 18 | @?= 19 | exp_paperATN 20 | 21 | -- Set difference to make debugging easier: 22 | test_paperATNGrammar2 = 23 | ((_Δ . atnOf) paperATNGrammar \\ _Δ exp_paperATN) 24 | @?= 25 | fromList [] 26 | 27 | test_addPredicates = 28 | atnOf addPredicates 29 | @?= 30 | exp_addPredicates 31 | 32 | test_addPredicates2 = 33 | ((_Δ . atnOf) addPredicates \\ _Δ exp_addPredicates) 34 | @?= 35 | fromList [] 36 | 37 | test_addMutators = 38 | atnOf addMutators 39 | @?= 40 | exp_addMutators 41 | 42 | main :: IO () 43 | main = defaultMainWithOpts 44 | [ testCase "paper_ATN_Grammar" test_paperATNGrammar 45 | , testCase "paper_ATN_Grammar2" test_paperATNGrammar2 46 | , testCase "paper_ATN_Predicates2" test_addPredicates2 47 | , testCase "paper_ATN_Predicates" test_addPredicates 48 | , testCase "paper_ATN_Mutators" test_addMutators 49 | ] mempty 50 | 51 | -------------------------------------------------------------------------------- /test/c/Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | import Language.ANTLR4 3 | import CParser 4 | 5 | main :: IO () 6 | main = print "done" 7 | 8 | -------------------------------------------------------------------------------- /test/chisel/Language/Chisel/Grammar.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances #-} 4 | module Language.Chisel.Grammar 5 | ( ChiselNTSymbol(..), ChiselTSymbol(..), ChiselAST 6 | , lowerID, upperID, prim, int, arrow, lparen, rparen, pound 7 | , vertbar, colon, comma, atsymbol, carrot, dot, linecomm, ws 8 | , Primitive(..), chiselGrammar, TokenValue(..) 9 | , chiselAST, TokenName(..), chiselDFAs, lexeme2value, isWhitespace 10 | , ChiselToken(..), list, cons, append 11 | ) where 12 | import Language.ANTLR4 13 | import Language.Chisel.Syntax as S 14 | 15 | list a = [a] 16 | cons = (:) 17 | append = (++) 18 | 19 | [g4| 20 | grammar Chisel; 21 | chiselProd : prodSimple 22 | | '(' prodSimple ')' 23 | ; 24 | 25 | prodSimple : prodID formals magnitude alignment '->' group -> S.prodFMA 26 | | prodID formals '->' group -> S.prodF 27 | | prodID magnitude alignment '->' group -> S.prodMA 28 | | prodID magnitude '->' group -> S.prodM 29 | | LowerID prodID magnitude alignment '->' group -> S.prodNMA 30 | ; 31 | 32 | formals : LowerID formals -> cons 33 | | LowerID -> list 34 | ; 35 | 36 | magnitude : '|' '#' sizeArith '|' -> magWild 37 | | '|' sizeArith '|' -> magNorm 38 | | '|' prodID '|' -> magID 39 | ; 40 | 41 | alignment : '@' '(' sizeArith ')'; 42 | 43 | group : groupExp1 -> list 44 | | '(' groupExp ')' 45 | ; 46 | 47 | groupExp : groupExp1 -> list 48 | | groupExp1 ',' groupExp -> cons 49 | ; 50 | 51 | groupExp1 : '#' chiselProd -> gProdWild 52 | | '#' sizeArith -> gSizeWild 53 | | '(' flags ')' -> GFlags 54 | | chiselProd -> gProdNorm 55 | | sizeArith -> gSizeNorm 56 | | label -> GLabel 57 | | arith chiselProd -> gProdArith 58 | | arith prodApp -> GProdApp 59 | | '(' labels ')' -> GLabels 60 | ; 61 | 62 | flags : prodID -> list 63 | | prodID '|' flags -> cons 64 | ; 65 | 66 | labels : label -> list 67 | | label '|' labels -> cons 68 | ; 69 | 70 | label : LowerID ':' labelExp -> Label 71 | ; 72 | 73 | labelExp : '#' chiselProd -> lProdWild 74 | | '#' prodApp -> lProdAppWild 75 | | '#' sizeArith -> lSizeWild 76 | | chiselProd -> lProd 77 | | prodApp -> lProdApp 78 | | sizeArith -> lSize 79 | ; 80 | 81 | prodApp : prodID prodApp -> cons 82 | | prodID -> list 83 | ; 84 | 85 | sizeArith : arith Prim -> SizeArith 86 | | Prim -> singleArith 87 | ; 88 | 89 | arith : INT -> SizeInt 90 | | LowerID -> SizeID 91 | | arith '^' arith -> SizeExp 92 | ; 93 | 94 | prodID : UpperID -> id 95 | | UpperID '.' prodID -> append 96 | ; 97 | 98 | Prim : ( 'bit' | 'byte' ) 's'? -> Primitive; 99 | ArchPrim : ( 'page' | 'word' ) 's'? -> Primitive; 100 | UpperID : [A-Z][a-zA-Z0-9_]* -> String; 101 | LowerID : [a-z][a-zA-Z0-9_]* -> String; 102 | INT : [0-9]+ -> Int; 103 | LineComment : '//' (~ '\n')* '\n' -> String; 104 | WS : [ \t\n\r\f\v]+ -> String; 105 | |] 106 | 107 | -- Types used to the right of the '->' directive must instance Read 108 | 109 | isWhitespace T_LineComment = True 110 | isWhitespace T_WS = True 111 | isWhitespace _ = False 112 | 113 | {- Helper functions to construct all the various Tokens from either the desired 114 | - (arbitrary) lexeme or by looking it up based on the static lexeme it always 115 | - matches. -} 116 | lowerID x = Token T_LowerID (V_LowerID x) (length x) 117 | upperID x = Token T_UpperID (V_UpperID x) (length x) 118 | prim x = Token T_Prim (V_Prim x) (length $ show x) 119 | int x = Token T_INT (V_INT x) (length $ show x) 120 | arrow = lookupToken "->" 121 | lparen = lookupToken "(" 122 | rparen = lookupToken ")" 123 | pound = lookupToken "#" 124 | vertbar = lookupToken "|" 125 | colon = lookupToken ":" 126 | comma = lookupToken "," 127 | atsymbol = lookupToken "@" 128 | carrot = lookupToken "^" 129 | dot = lookupToken "." 130 | linecomm x = Token T_LineComment (V_LineComment x) (length x) 131 | ws x = Token T_WS (V_WS x) (length x) 132 | 133 | -------------------------------------------------------------------------------- /test/chisel/Language/Chisel/Parser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell #-} 4 | module Language.Chisel.Parser 5 | ( module Language.Chisel.Grammar 6 | , Language.Chisel.Parser.tokenize, glrParseFast 7 | , parse, glrParse 8 | ) where 9 | import Language.ANTLR4 10 | import Language.Chisel.Syntax as S 11 | import Language.Chisel.Grammar 12 | 13 | $(g4_parsers chiselAST chiselGrammar) 14 | 15 | $(mkLRParser chiselAST chiselGrammar) 16 | 17 | parse = glrParse isWhitespace 18 | 19 | 20 | -------------------------------------------------------------------------------- /test/chisel/Language/Chisel/Syntax.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric #-} 2 | module Language.Chisel.Syntax where 3 | import Language.ANTLR4 (Data(..)) 4 | import Text.ANTLR.Pretty 5 | import Text.ANTLR.Set (Hashable(..), Generic(..)) 6 | 7 | data ChiselProd = ChiselProd 8 | { prodID :: UpperID 9 | , count :: Maybe LowerID 10 | , formals :: Maybe [Formal] 11 | , magnitude :: Maybe Magnitude 12 | , alignment :: Maybe SizeArith 13 | , rhs :: Group 14 | } 15 | 16 | prodFMA s f m a = ChiselProd s Nothing (Just f) (Just m) (Just a) 17 | prodF s f = ChiselProd s Nothing (Just f) Nothing Nothing 18 | prodMA s m a = ChiselProd s Nothing Nothing (Just m) (Just a) 19 | prodM s m = ChiselProd s Nothing Nothing (Just m) Nothing 20 | prodNMA s n m a = ChiselProd s (Just n) Nothing (Just m) (Just a) 21 | 22 | type Formal = String 23 | 24 | -- Whether or not '#' was used: 25 | type Wild = Bool 26 | 27 | data Magnitude = Mag Wild SizeArith 28 | 29 | magWild = Mag True -- Variable magnitude 30 | magNorm = Mag False -- Fixed magnitude 31 | magID = Mag False . SizeID 32 | 33 | type Alignment = SizeArith 34 | 35 | type Group = [GroupExp] 36 | 37 | -- Left False == no size annotation 38 | -- Left True == '#' annotation 39 | -- Right sz == fixed size annotation 40 | type Count = Either Wild SizeArith 41 | 42 | data GroupExp = 43 | GProd Count ChiselProd 44 | | GSize Wild SizeArith 45 | | GFlags [Flag] 46 | | GLabel Label 47 | | GLabels [Label] 48 | | GProdApp SizeArith ProdApp 49 | 50 | gProdWild = GProd (Left True) 51 | gProdNorm = GProd (Left False) 52 | gProdArith a = GProd (Right a) 53 | 54 | gSizeWild = GSize True 55 | gSizeNorm = GSize False 56 | 57 | type Flag = ProdID 58 | 59 | type ProdApp = [ProdID] 60 | 61 | type UpperID = String 62 | type LowerID = String 63 | type ProdID = String 64 | type LabelID = String 65 | 66 | data Label = Label LabelID LabelExp 67 | 68 | data LabelExp = 69 | LProd Wild ChiselProd 70 | | LProdApp Wild ProdApp 71 | | LSize Wild SizeArith 72 | 73 | lProdWild = LProd True 74 | lProd = LProd False 75 | 76 | lProdAppWild = LProdApp True 77 | lProdApp = LProdApp False 78 | 79 | lSizeWild = LSize True 80 | lSize = LSize False 81 | 82 | data SizeArith = 83 | SizeInt Int 84 | | SizeID LowerID 85 | | SizeExp SizeArith SizeArith 86 | | SizeArith SizeArith Primitive 87 | 88 | singleArith = SizeArith (SizeInt 1) 89 | 90 | data Primitive = Page | Word | Byte | Bit 91 | deriving (Show, Eq, Ord, Generic, Hashable, Data) 92 | 93 | lexeme2prim "page" = Just Page 94 | lexeme2prim "pages" = Just Page 95 | lexeme2prim "word" = Just Word 96 | lexeme2prim "words" = Just Word 97 | lexeme2prim "byte" = Just Byte 98 | lexeme2prim "bytes" = Just Byte 99 | lexeme2prim "bit" = Just Bit 100 | lexeme2prim "bits" = Just Bit 101 | lexeme2prim _ = Nothing 102 | 103 | instance Read Primitive where 104 | readsPrec _ input = case lexeme2prim input of 105 | Just x -> [(x,"")] 106 | Nothing -> [] 107 | 108 | instance Prettify Primitive where prettify = rshow 109 | 110 | -------------------------------------------------------------------------------- /test/chisel/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE QuasiQuotes, TemplateHaskell, ScopedTypeVariables #-} 2 | module Main where 3 | -- Project imports go here, e.g.: 4 | --import Language.Chisel.Tokenizer 5 | import Text.ANTLR.Lex.Tokenizer (Token(..)) 6 | import Text.ANTLR.Parser (AST(..)) 7 | import Language.Chisel.Parser 8 | import Language.Chisel.Syntax 9 | import Text.ANTLR.Grammar (Grammar(..), ProdElem(..)) 10 | import Language.ANTLR4.FileOpener (open) 11 | import Language.ANTLR4.Boot.Syntax (Directive(..)) 12 | 13 | import System.IO.Unsafe (unsafePerformIO) 14 | import Data.Monoid 15 | import Test.Framework 16 | import Test.Framework.Providers.HUnit 17 | import Test.Framework.Providers.QuickCheck2 18 | import Test.HUnit hiding ((@?=), assertEqual) 19 | import Test.QuickCheck (Property, quickCheck, (==>)) 20 | import qualified Test.QuickCheck.Monadic as TQM 21 | 22 | import Text.ANTLR.HUnit 23 | import Debug.Trace as D 24 | import qualified Text.ANTLR.LR as LR 25 | import Text.ANTLR.Pretty (pshow) 26 | import qualified Data.Text as T 27 | 28 | chi = id 29 | 30 | ghc_val = [open| test/chisel/Language/Chisel/Examples/GHC.chi |] 31 | tokenizeGHC_val = tokenize ghc_val 32 | 33 | tokenizeGHC_exp = 34 | [ upperID "Heap", ws " ", lowerID "m", ws " ", lowerID "k", ws " ", arrow 35 | , ws "\n ", pound, ws " ", upperID "MegaBlock", ws " " 36 | , vertbar, int 2, carrot, lowerID "m", ws " ", prim Byte, vertbar, ws " " 37 | , atsymbol, lparen, int 2, carrot, lowerID "m", ws " ", prim Byte, rparen 38 | , ws " ", arrow, ws "\n ", linecomm "// -------Megablock \"Header\"------------------" 39 | , ws "\n ", lparen, ws " ", upperID "Descrs", ws " " 40 | , vertbar, int 2, carrot, lowerID "k", ws " ", prim Byte, vertbar, ws " " 41 | , arrow, ws "\n ", lparen, ws " ", lowerID "padMB", ws " ", prim Byte 42 | , ws "\n ", linecomm "// +++++++++++++++++++++++++++++++++++++++++\n" 43 | , ws " ", comma, ws " ", lowerID "bds", ws " " 44 | , colon, ws " ", lowerID "n", ws " ", upperID "BlockDescr", ws " " 45 | , vertbar, int 2, carrot, lowerID "d", ws " ", prim Byte, vertbar 46 | , ws " ", atsymbol, lparen, int 2, carrot, lowerID "k", ws " ", prim Byte, rparen 47 | , ws " ", arrow, ws "\n " 48 | , lparen, ws " ", lowerID "start", ws " ", colon, ws " " 49 | , upperID "Ptr", ws " ", upperID "Stg", dot, upperID "Word", ws "\n " 50 | , comma, ws " ", lowerID "free", ws " ", colon, ws " " 51 | , upperID "Ptr", ws " ", upperID "Stg", dot, upperID "Word", ws "\n " 52 | , comma, ws " ", lowerID "link", ws " ", colon, ws " " 53 | , upperID "Ptr", ws " ", upperID "BlockDescr", ws "\n " 54 | , comma, ws " ", lparen, ws " ", lowerID "back", ws " ", colon, ws " " 55 | , upperID "Ptr", ws " ", upperID "BlockDescr", ws "\n " 56 | , vertbar, ws " ", lowerID "bitmap", ws " ", colon, ws " " 57 | , upperID "Ptr", ws " ", upperID "Stg", dot, upperID "Word", ws "\n " 58 | , vertbar, ws " ", lowerID "scan", ws " ", colon, ws " " 59 | , upperID "Ptr", ws " ", upperID "Stg", dot, upperID "Word", rparen, ws "\n " 60 | , comma, ws " ", lowerID "gen", ws " ", colon, ws " " 61 | , upperID "Ptr", ws " ", upperID "Generation", ws "\n ", comma, ws " " 62 | , lowerID "gen_no", ws " ", colon, ws " " 63 | , upperID "Stg", dot, upperID "Word16", ws "\n " 64 | , comma, ws " ", lowerID "dest_no", ws " ", colon, ws " " 65 | , upperID "Stg", dot, upperID "Word16", ws "\n " 66 | , comma, ws " ", lowerID "node", ws " ", colon, ws " " 67 | , upperID "Stg", dot, upperID "Word16", ws "\n " 68 | , comma, ws " ", upperID "Flags", ws " " 69 | , vertbar, upperID "Stg", dot, upperID "Word16", vertbar, ws " ", arrow, ws "\n " 70 | , lparen, ws " ", upperID "LARGE", ws " ", vertbar, ws " " 71 | , upperID "EVACUATED", ws "", vertbar, ws " " 72 | , upperID "FREE", ws "\n ", vertbar, ws " " 73 | , upperID "PINNED", ws " ", vertbar, ws " " 74 | , upperID "MARKED", ws " ", vertbar, ws " " 75 | , upperID "KNOWN", ws "\n ", vertbar, ws " " 76 | , upperID "EXEC", ws " ", vertbar, ws " " 77 | , upperID "FRAGMENTED", ws " ", vertbar, ws " " 78 | , upperID "SWEPT", ws "\n ", vertbar, ws " " 79 | , upperID "COMPACT", ws " ", rparen, ws "\n ", comma, ws " " 80 | , lowerID "n_blocks", ws " ", colon, ws " " 81 | , upperID "Stg", dot, upperID "Word32", ws "\n " 82 | , comma, ws " ", lowerID "padD", ws " ", prim Byte, rparen, rparen, ws "\n " 83 | , linecomm "// +++++++++++++++++++++++++++++++++++++++++\n", ws " " 84 | , linecomm "// -------Megablock payload-------------------\n", ws " ", comma, ws " " 85 | , lowerID "blocks", ws " ", colon, ws " ", lowerID "n", ws " " 86 | , upperID "Block", ws " ", vertbar, int 2, carrot 87 | , lowerID "k", ws " ", prim Byte, vertbar, ws " " 88 | , atsymbol, lparen, int 2, carrot, lowerID "k", ws " ", prim Byte, rparen 89 | , ws " ", arrow, ws "\n ", lparen, ws " " 90 | , lowerID "closures", ws " ", colon, ws " ", pound, ws " " 91 | , upperID "Stg", dot, upperID "Closures", ws "\n ", comma, ws " " 92 | , lowerID "free", ws " ", colon, ws " ", pound, ws " ", prim Byte, rparen 93 | , ws "\n ", rparen, ws "\n", EOF 94 | ] 95 | 96 | tokenizeGHC = 97 | tokenizeGHC_val 98 | @?= 99 | tokenizeGHC_exp 100 | 101 | tokenizeGHC2 = 102 | dropWhile (\(a,b) -> a == b) (zip tokenizeGHC_val tokenizeGHC_exp) 103 | @?= 104 | [] 105 | 106 | tokenizeSmall = tokenize "Foo x -> x Bar" 107 | 108 | parseTestSmall = 109 | D.traceShowId (parse "Foo x -> x Bar") 110 | @?= 111 | LR.ResultAccept 112 | ( AST NT_chiselProd [NT NT_prodSimple] 113 | [ AST NT_prodSimple [NT NT_prodID, NT NT_formals, T T_2, NT NT_group] 114 | [ AST NT_prodID [T T_UpperID] [Leaf $ upperID "Foo"] 115 | , AST NT_formals [T T_LowerID] 116 | [ Leaf $ lowerID "x"] 117 | , Leaf arrow 118 | , AST NT_group [NT NT_groupExp1] 119 | [ AST NT_groupExp1 [NT NT_arith, NT NT_prodApp] 120 | [ AST NT_arith [T T_LowerID] [Leaf $ lowerID "x"] 121 | , AST NT_prodApp [NT NT_prodID] 122 | [ AST NT_prodID [T T_UpperID] [Leaf $ upperID "Bar"] ] 123 | ]]]]) 124 | 125 | tokenizeSmallTest = 126 | tokenizeSmall 127 | @?= 128 | [upperID "Foo", ws " ", lowerID "x", ws " ", arrow, ws " ", lowerID "x", ws " ", upperID "Bar", EOF] 129 | 130 | parseGHCTestBig = 131 | case parse ghc_val of 132 | (LR.ResultAccept _) -> (1 :: Int) @?= 1 133 | e -> e @?= LR.ResultAccept LeafEps 134 | 135 | testPrettify = 136 | unsafePerformIO (putStr $ T.unpack $ pshow (chiselGrammar :: Grammar () ChiselNTSymbol ChiselTSymbol Directive)) 137 | @?= () 138 | 139 | testFast = 140 | glrParseFast isWhitespace "Foo x -> x Bar" 141 | @?= 142 | glrParse isWhitespace "Foo x -> x Bar" 143 | 144 | main :: IO () 145 | main = defaultMainWithOpts 146 | [ testCase "Tokenize GHC" tokenizeGHC 147 | , testCase "Tokenize GHC2" tokenizeGHC2 148 | , testCase "Tokenize Small" tokenizeSmallTest 149 | , testCase "Parse Test (Small)" parseTestSmall 150 | , testCase "Parse GHC" parseGHCTestBig 151 | , testCase "Prettify speed" testPrettify 152 | , testCase "Run glrParseFast build" testFast 153 | ] mempty 154 | 155 | -------------------------------------------------------------------------------- /test/coreg4/G4.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, DeriveDataTypeable #-} 4 | module G4 where 5 | import Language.ANTLR4 6 | 7 | [g4| 8 | grammar G4Basic; 9 | exp : '1' 10 | | '2' 11 | | '3' 12 | ; 13 | |] 14 | 15 | -------------------------------------------------------------------------------- /test/coreg4/G4Fast.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell #-} 2 | module G4Fast where 3 | import Language.ANTLR4 4 | import G4 5 | import G4Parser 6 | 7 | $(mkLRParser g4BasicAST g4BasicGrammar) 8 | 9 | {- 10 | import System.IO.Unsafe (unsafePerformIO) 11 | import Data.Monoid 12 | import Test.Framework 13 | import Test.Framework.Providers.HUnit 14 | import Test.Framework.Providers.QuickCheck2 15 | import Test.HUnit 16 | import Test.QuickCheck (Property, quickCheck, (==>)) 17 | import qualified Test.QuickCheck.Monadic as TQM 18 | -} 19 | 20 | {- 21 | import Language.ANTLR4 hiding (tokenize) 22 | import Text.ANTLR.Grammar 23 | import qualified Language.ANTLR4.Example.Optionals as Opt 24 | import Language.ANTLR4.Example.G4 as G4 25 | --import Language.ANTLR4.Example.G4 (g4BasicGrammar, G4BasicNTSymbol, G4BasicTSymbol, G4BasicAST) 26 | import Text.ANTLR.Parser (AST(..)) 27 | import qualified Text.ANTLR.LR as LR 28 | import qualified Text.ANTLR.Lex.Tokenizer as T 29 | -} 30 | 31 | {- 32 | import Data.Map.Internal (fromList) 33 | import qualified Text.ANTLR.MultiMap as M 34 | import Text.ANTLR.MultiMap (Map(..)) 35 | import qualified Text.ANTLR.Set as S 36 | import qualified Data.HashSet as H 37 | -} 38 | 39 | -------------------------------------------------------------------------------- /test/coreg4/G4Parser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-} 4 | module G4Parser where 5 | import Language.ANTLR4 6 | import G4 7 | 8 | $(g4_parsers g4BasicAST g4BasicGrammar) 9 | 10 | -------------------------------------------------------------------------------- /test/coreg4/Hello.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, DeriveDataTypeable #-} 4 | module Hello where 5 | import Language.ANTLR4 6 | 7 | [g4| 8 | // Hello World grammar 9 | // https://github.com/antlr/grammars-v4/blob/master/antlr4/examples/Hello.g4 10 | grammar Hello; 11 | r : 'hello' WS ID; 12 | ID : [a-zA-Z]+ -> String; 13 | WS : [ \t\r\n]+ -> String; 14 | |] 15 | 16 | -------------------------------------------------------------------------------- /test/coreg4/HelloParser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE QuasiQuotes, TemplateHaskell, DeriveAnyClass, DeriveGeneric, TypeFamilies 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, DeriveDataTypeable, FlexibleContexts #-} 4 | module HelloParser where 5 | import Language.ANTLR4 6 | import Hello 7 | 8 | $(g4_parsers helloAST helloGrammar) 9 | 10 | -------------------------------------------------------------------------------- /test/coreg4/Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import System.IO.Unsafe (unsafePerformIO) 4 | import Data.Monoid 5 | import Test.Framework 6 | import Test.Framework.Providers.HUnit 7 | import Test.Framework.Providers.QuickCheck2 8 | import Test.HUnit 9 | import Test.QuickCheck (Property, quickCheck, (==>)) 10 | import qualified Test.QuickCheck.Monadic as TQM 11 | 12 | import Language.ANTLR4 hiding (tokenize, Regex(..)) 13 | import qualified G4Parser as G4P 14 | import qualified G4 as G4 15 | import Hello 16 | import HelloParser 17 | import Text.ANTLR.Parser (AST(..)) 18 | import qualified Text.ANTLR.LR as LR 19 | import Language.ANTLR4.Boot.Syntax (Regex(..)) 20 | --import Language.ANTLR4.Regex (parseRegex) 21 | 22 | import qualified Language.ANTLR4.G4 as P -- Parser 23 | 24 | import qualified G4Fast as Fast 25 | 26 | test_g4_basic_type_check = do 27 | let _ = G4.g4BasicGrammar 28 | 1 @?= 1 29 | 30 | hello_g4_test_type_check = do 31 | let _ = helloGrammar 32 | 1 @?= 1 33 | 34 | {- 35 | regex_test = do 36 | parseRegex "[ab]* 'a' 'b' 'b'" 37 | @?= Right 38 | (Concat 39 | [ Kleene $ CharSet "ab" 40 | , Literal "a" 41 | , Literal "b" 42 | , Literal "b" 43 | ]) 44 | -} 45 | 46 | _1 = G4.lookupToken "1" 47 | 48 | -- TODO: implement 'read' instance for TokenValue type so that I don't have to 49 | -- hardcode the name for literal terminals (e.g. '1' == T_0 below) 50 | test_g4 = 51 | G4P.slrParse (G4P.tokenize "1") 52 | @?= 53 | LR.ResultAccept (AST G4.NT_exp [T G4.T_0] [Leaf _1]) 54 | 55 | test_hello = 56 | slrParse (tokenize "hello Matt") 57 | @?= 58 | (LR.ResultAccept $ 59 | AST NT_r [T T_0, T T_WS, T T_ID] 60 | [ Leaf (Token T_0 V_0 1) 61 | , Leaf (Token T_WS (V_WS " ") 1) 62 | , Leaf (Token T_ID (V_ID "Matt") 4) 63 | ] 64 | ) 65 | 66 | test_hello_allstar = 67 | allstarParse (const False) ("hello Matt") 68 | @?= 69 | Right (AST NT_r [T T_0, T T_WS, T T_ID] [Leaf (Token T_0 V_0 5),Leaf (Token T_WS (V_WS " ") 1),Leaf 70 | (Token T_ID (V_ID "Matt") 4)]) 71 | --Right (AST NT_r [] []) 72 | 73 | testFastGLR = 74 | Fast.glrParseFast (const False) "3" 75 | @?= 76 | G4P.glrParse (const False) "3" 77 | 78 | main :: IO () 79 | main = defaultMainWithOpts 80 | [ testCase "g4_basic_compilation_type_check" test_g4_basic_type_check 81 | , testCase "hello_parse_type_check" hello_g4_test_type_check 82 | -- , testCase "regex_test" regex_test 83 | , testCase "test_g4" test_g4 84 | , testCase "test_hello" test_hello 85 | , testCase "test_hello_allstar" test_hello_allstar 86 | , testCase "testFastGLR" testFastGLR 87 | ] mempty 88 | 89 | -------------------------------------------------------------------------------- /test/g4/DoubleSemi.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell #-} 4 | module DoubleSemi where 5 | import Language.ANTLR4 6 | 7 | $( return [] ) 8 | 9 | [g4| 10 | grammar Dbl; 11 | dbl : 'a' | 'f' ; // ; 12 | WS : [ \t\r\n]+ -> String; 13 | |] 14 | 15 | isWS T_WS = True 16 | isWS _ = False 17 | 18 | -------------------------------------------------------------------------------- /test/g4/DoubleSemiP.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-} 4 | module DoubleSemiP 5 | ( module DoubleSemiP 6 | , module DoubleSemi 7 | ) where 8 | import Language.ANTLR4 9 | import DoubleSemi 10 | 11 | $(g4_parsers dblAST dblGrammar) 12 | 13 | -------------------------------------------------------------------------------- /test/g4/Empty.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell #-} 4 | module Empty where 5 | import Language.ANTLR4 6 | 7 | $( return [] ) 8 | 9 | [g4| 10 | grammar Empty; 11 | emp : 'a' b | 'f' d ; 12 | b : 'c' 13 | | // empty! 14 | ; 15 | 16 | d : // empty! 17 | | 'd' 18 | ; 19 | 20 | WS : [ \t\r\n]+ -> String; 21 | |] 22 | 23 | isWS T_WS = True 24 | isWS _ = False 25 | 26 | -------------------------------------------------------------------------------- /test/g4/EmptyP.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-} 4 | module EmptyP 5 | ( module Empty 6 | , module EmptyP 7 | ) where 8 | import Language.ANTLR4 9 | import Empty 10 | 11 | $(g4_parsers emptyAST emptyGrammar) 12 | 13 | -------------------------------------------------------------------------------- /test/g4/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell, FlexibleContexts, TypeFamilies #-} 2 | module Main where 3 | 4 | import System.IO.Unsafe (unsafePerformIO) 5 | import Data.Monoid 6 | import Test.Framework 7 | import Test.Framework.Providers.HUnit 8 | import Test.Framework.Providers.QuickCheck2 9 | import Test.HUnit 10 | import Test.QuickCheck (Property, quickCheck, (==>)) 11 | import qualified Test.QuickCheck.Monadic as TQM 12 | 13 | import Language.ANTLR4 hiding (tokenize, Regex(..)) 14 | import Text.ANTLR.Grammar 15 | --import Language.ANTLR4.Regex 16 | import Text.ANTLR.Parser (AST(..)) 17 | import qualified Text.ANTLR.LR as LR 18 | import qualified Text.ANTLR.Lex.Tokenizer as T 19 | 20 | import qualified Language.ANTLR4.G4 as P -- Parser 21 | 22 | import qualified Optional as Opt 23 | import qualified OptionalParser as Opt 24 | 25 | import qualified EmptyP as E 26 | import qualified Empty as E 27 | 28 | test_optional = 29 | Opt.glrParse Opt.isWS "a" 30 | @?= 31 | (LR.ResultAccept $ AST Opt.NT_r [NT Opt.NT_a] [AST Opt.NT_a [T Opt.T_1] [Leaf (Token Opt.T_1 Opt.V_1 1)]]) 32 | 33 | test_optional2 = 34 | case Opt.glrParse Opt.isWS "a a e b c d" of 35 | LR.ResultAccept ast -> Opt.ast2r ast @?= "accept" 36 | err -> error $ show err 37 | 38 | test_optional3 = 39 | case Opt.glrParse Opt.isWS "a e b b b c c d" of 40 | LR.ResultAccept ast -> Opt.ast2r ast @?= "accept" 41 | err -> error $ show err 42 | 43 | test_optional4 = 44 | case Opt.glrParse Opt.isWS "a" of 45 | LR.ResultAccept ast -> Opt.ast2r ast @?= "reject" 46 | err -> error $ show err 47 | 48 | test_e v = 49 | case v of 50 | LR.ResultAccept ast -> E.ast2emp ast @?= () 51 | err -> error $ show err 52 | test_e_fail v = 53 | case v of 54 | LR.ResultAccept ast -> assertFailure $ show ast 55 | err -> () @?= () 56 | 57 | test_empty = test_e (E.slrParse (E.tokenize "a")) 58 | test_empty2 = test_e (E.slrParse (E.tokenize "f")) 59 | test_empty3 = test_e (E.slrParse (E.tokenize "ac")) 60 | test_empty4 = test_e (E.slrParse (E.tokenize "fd")) 61 | test_empty5 = test_e_fail (E.slrParse (E.tokenize "fc")) 62 | test_empty6 = test_e_fail (E.slrParse (E.tokenize "ad")) 63 | 64 | main :: IO () 65 | main = defaultMainWithOpts 66 | [ testCase "test_optional" test_optional 67 | , testCase "test_optional2" test_optional2 68 | , testCase "test_optional3" test_optional3 69 | , testCase "test_optional4" test_optional4 70 | , testCase "test_empty" test_empty 71 | , testCase "test_empty2" test_empty2 72 | , testCase "test_empty3" test_empty3 73 | , testCase "test_empty4" test_empty4 74 | , testCase "test_empty5" test_empty5 75 | , testCase "test_empty6" test_empty6 76 | ] mempty 77 | 78 | -------------------------------------------------------------------------------- /test/g4/Optional.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE QuasiQuotes, DeriveAnyClass, DeriveGeneric, TypeFamilies 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell #-} 4 | module Optional where 5 | import Language.ANTLR4 6 | 7 | opt a b c d = (1, 'b', 2.0, [1,2,3]) 8 | 9 | foo :: () -> Maybe (Int, Char, Double, [Int]) -> String 10 | foo a1 (Just (a,b,c,d)) = "accept" 11 | foo a1 Nothing = "reject" 12 | 13 | $( return [] ) 14 | 15 | [g4| 16 | grammar Optional; 17 | r : a s? -> foo; 18 | s : a? 'e' b* c+ d -> opt; 19 | a : 'a'; 20 | b : 'b'; 21 | c : 'c'; 22 | d : 'd'; 23 | 24 | ID : [a-zA-Z]+ -> String; 25 | WS : [ \t\r\n]+ -> String; 26 | |] 27 | 28 | isWS T_WS = True 29 | isWS _ = False 30 | 31 | -------------------------------------------------------------------------------- /test/g4/OptionalParser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell, FlexibleContexts #-} 4 | module OptionalParser where 5 | import Language.ANTLR4 6 | import Optional 7 | 8 | $(g4_parsers optionalAST optionalGrammar) 9 | 10 | -------------------------------------------------------------------------------- /test/lexer/Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Text.ANTLR.Lex 4 | import Text.ANTLR.Lex.Automata 5 | import Text.ANTLR.Lex.NFA as NFA 6 | import qualified Text.ANTLR.Lex.DFA as DFA 7 | 8 | import Text.ANTLR.Lex.Regex 9 | 10 | import System.IO.Unsafe (unsafePerformIO) 11 | import Data.Monoid 12 | import Test.Framework 13 | import Test.Framework.Providers.HUnit 14 | import Test.Framework.Providers.QuickCheck2 15 | import Test.HUnit 16 | import Test.QuickCheck (Property, quickCheck, (==>)) 17 | import qualified Test.QuickCheck.Monadic as TQM 18 | 19 | import Text.ANTLR.Set (fromList, Set(..), singleton, Hashable) 20 | 21 | singleEdge s = (False, singleton s) 22 | 23 | fL :: (Hashable a, Eq a) => [a] -> Set a 24 | fL = fromList 25 | 26 | nfa0 :: NFA Char Int 27 | nfa0 = Automata 28 | { _S = fL [0, 1, 2, 3] 29 | , _Σ = fL "ab" 30 | , s0 = 0 31 | , _F = fL [3] 32 | , _Δ = fL 33 | [ (0, singleEdge $ Edge 'a', 0) 34 | , (0, singleEdge $ Edge 'a', 1) 35 | , (0, singleEdge $ Edge 'b', 0) 36 | , (1, singleEdge $ Edge 'b', 2) 37 | , (2, singleEdge $ Edge 'b', 3) 38 | ] 39 | } 40 | 41 | testValid0 = 42 | ( validStartState nfa0 43 | && validFinalStates nfa0 44 | && validTransitions nfa0 45 | ) 46 | @?= 47 | True 48 | 49 | testClosureWith0 = 50 | closureWith (Edge 'a' ==) nfa0 (singleton 0) 51 | @?= 52 | fromList [0, 1] 53 | 54 | testClosureWith1 = 55 | closureWith (NFAEpsilon ==) nfa0 (singleton 0) 56 | @?= 57 | fromList [0] 58 | 59 | testClosureWith2 = 60 | closureWith (const True) nfa0 (singleton 0) 61 | @?= 62 | fromList [0, 1, 2, 3] 63 | 64 | testClosureWith3 = 65 | closureWith (Edge 'b' ==) nfa0 (singleton 0) 66 | @?= 67 | fromList [0] 68 | 69 | testMove0 = 70 | move nfa0 (fromList [0,1,2]) (Edge 'a') 71 | @?= 72 | fromList [0,1] 73 | 74 | nfa334 :: NFA Char Int 75 | nfa334 = Automata 76 | { _S = fL [0 .. 10] 77 | , _Σ = fL "ab" 78 | , s0 = 0 79 | , _F = fL [10] 80 | , _Δ = fL 81 | [ (0, singleEdge NFAEpsilon, 1) 82 | , (0, singleEdge NFAEpsilon, 7) 83 | , (1, singleEdge NFAEpsilon, 2) 84 | , (1, singleEdge NFAEpsilon, 4) 85 | , (2, singleEdge $ Edge 'a', 3) 86 | , (3, singleEdge NFAEpsilon, 6) 87 | , (4, singleEdge $ Edge 'b', 5) 88 | , (5, singleEdge NFAEpsilon, 6) 89 | , (6, singleEdge NFAEpsilon, 1) 90 | , (6, singleEdge NFAEpsilon, 7) 91 | , (7, singleEdge $ Edge 'a', 8) 92 | , (8, singleEdge $ Edge 'b', 9) 93 | , (9, singleEdge $ Edge 'b', 10) 94 | ] 95 | } 96 | 97 | _A = fromList [0,1,2,4,7] 98 | _B = fromList [1,2,3,4,6,7,8] 99 | _C = fromList [1,2,4,5,6,7] 100 | _D = fromList [1,2,4,5,6,7,9] 101 | _E = fromList [1,2,4,5,6,7,10] 102 | 103 | a = singleEdge 'a' 104 | b = singleEdge 'b' 105 | 106 | dfa336 :: DFA.DFA Char (Set Int) 107 | dfa336 = Automata 108 | { _S = fL [_A, _B, _C, _D, _E] 109 | , _Σ = fL "ab" 110 | , s0 = _A 111 | , _F = fL [_E] 112 | , _Δ = fL 113 | [ (_A, a, _B), (_A, b, _C) 114 | , (_B, a, _B), (_B, b, _D) 115 | , (_C, a, _B), (_C, b, _C) 116 | , (_D, a, _B), (_D, b, _E) 117 | , (_E, a, _B), (_E, b, _C) 118 | ] 119 | } 120 | 121 | nfa2dfa0 = 122 | nfa2dfa nfa334 123 | @?= 124 | dfa336 125 | 126 | nfa334Eps0 = 127 | NFA.epsClosure nfa334 128 | 129 | epsilonNFA = 130 | Automata 131 | { _S = fL [0, 1] 132 | , _Σ = fL "" 133 | , s0 = 0 134 | , _F = fL [1] 135 | , _Δ = fL [ (0, singleEdge NFAEpsilon, 1) ] 136 | } 137 | 138 | regexTest0 = 139 | regex2nfa Epsilon 140 | @?= 141 | epsilonNFA 142 | 143 | regexTest1 = 144 | regex2nfa (Symbol 'a') 145 | @?= 146 | epsilonNFA { _Σ = fL "a", _Δ = fL [ (0, singleEdge $ Edge 'a', 1) ] } 147 | 148 | regexTestUnion = 149 | regex2nfa (Union (Symbol 'a') (Symbol 'b')) 150 | @?= Automata 151 | { _S = fL [0..5] 152 | , _Σ = fL "ab" 153 | , s0 = 4 154 | , _F = fL [5] 155 | , _Δ = fL [ (0, singleEdge $ Edge 'a', 1) 156 | , (2, singleEdge $ Edge 'b', 3) 157 | , (4, singleEdge NFAEpsilon, 0) 158 | , (4, singleEdge NFAEpsilon, 2) 159 | , (1, singleEdge NFAEpsilon, 5) 160 | , (3, singleEdge NFAEpsilon, 5) ] 161 | } 162 | 163 | regexTestConcat = 164 | regex2nfa (Concat [Symbol 'a', Symbol 'b']) 165 | @?= Automata 166 | { _S = fL [0..3] 167 | , _Σ = fL "ab" 168 | , s0 = 0 169 | , _F = fL [3] 170 | , _Δ = fL [ (0, singleEdge $ Edge 'a', 1) 171 | , (1, singleEdge NFAEpsilon, 2) 172 | , (2, singleEdge $ Edge 'b', 3) ] 173 | } 174 | 175 | regexTestKleene = 176 | regex2nfa (Kleene (Concat [Symbol 'a', Symbol 'b'])) 177 | @?= Automata 178 | { _S = fL [0..5] 179 | , _Σ = fL "ab" 180 | , s0 = 4 181 | , _F = fL [5] 182 | , _Δ = fL [ (0, singleEdge $ Edge 'a', 1) 183 | , (1, singleEdge NFAEpsilon, 2) 184 | , (2, singleEdge $ Edge 'b', 3) 185 | , (4, singleEdge NFAEpsilon, 0) 186 | , (4, singleEdge NFAEpsilon, 5) 187 | , (3, singleEdge NFAEpsilon, 0) 188 | , (3, singleEdge NFAEpsilon, 5)] 189 | } 190 | 191 | regexTestPosclos = 192 | regex2nfa (PosClos (Concat [Symbol 'a', Symbol 'b'])) 193 | @?= Automata 194 | { _S = fL [0..9] 195 | , _Σ = fL "ab" 196 | , s0 = 0 197 | , _F = fL [9] 198 | , _Δ = fL [ (0, singleEdge $ Edge 'a', 1) 199 | , (1, singleEdge NFAEpsilon, 2) 200 | , (2, singleEdge $ Edge 'b', 3) 201 | , (3, singleEdge NFAEpsilon, 8) 202 | , (4, singleEdge $ Edge 'a', 5) 203 | , (5, singleEdge NFAEpsilon, 6) 204 | , (6, singleEdge $ Edge 'b', 7) 205 | , (8, singleEdge NFAEpsilon, 4) 206 | , (8, singleEdge NFAEpsilon, 9) 207 | , (7, singleEdge NFAEpsilon, 4) 208 | , (7, singleEdge NFAEpsilon, 9)] 209 | } 210 | 211 | dfaABPlus = regex2dfa (PosClos (Concat [Symbol 'a', Symbol 'b'])) 212 | dfaWS = regex2dfa (PosClos (Symbol ' ')) 213 | 214 | {- 215 | dfaGetName x 216 | | x == dfaWS = "ws" 217 | | x == dfaABPlus = "ab+" 218 | | otherwise = "Error" 219 | -} 220 | 221 | tokenizeTest0 = 222 | tokenize [("ab+", dfaABPlus), ("ws", dfaWS)] const "abab ab ababab" 223 | @?= 224 | [ Token "ab+" "abab" 4 225 | , Token "ws" " " 1 226 | , Token "ab+" "ab" 2 227 | , Token "ws" " " 1 228 | , Token "ab+" "ababab" 6 229 | , EOF 230 | ] 231 | 232 | {- 233 | dfaID = regex2dfa 234 | (PosClos $ MultiUnion 235 | [ Class ['a' .. 'z'] 236 | , Class ['A' .. 'Z'] 237 | , Symbol '_' 238 | ]) 239 | -} 240 | 241 | dfaID = regex2dfa 242 | (PosClos $ Class $ '_' : ['a' .. 'z'] ++ ['A' .. 'Z']) 243 | 244 | -- For profiling runtime of DFA subset construction (nfa2dfa): 245 | dfaIDTest = 246 | dfaID 247 | @?= 248 | dfaID 249 | 250 | dfaEQ = regex2dfa (Symbol '=') 251 | dfaSEMI = regex2dfa (Symbol ';') 252 | 253 | dfaINT = regex2dfa (PosClos $ Class [ '0' .. '9' ]) 254 | 255 | data TermSymbol = T_ID | T_INT | T_WS | T_EQ | T_SEMI 256 | deriving (Eq, Ord, Show) 257 | 258 | data TermValue = 259 | ID String 260 | | INT Int 261 | | WS String 262 | | EQSIGN 263 | | SEMI 264 | deriving (Eq, Ord, Show) 265 | 266 | lexeme2value lexeme T_WS = WS lexeme 267 | lexeme2value lexeme T_ID = ID lexeme 268 | lexeme2value lexeme T_INT = INT $ read lexeme 269 | lexeme2value lexeme T_EQ = EQSIGN 270 | lexeme2value lexeme T_SEMI = SEMI 271 | 272 | tokenizeTest1 = 273 | tokenize 274 | [ (T_WS, dfaWS), (T_ID, dfaID), (T_INT, dfaINT) 275 | , (T_EQ, dfaEQ), (T_SEMI, dfaSEMI) ] 276 | lexeme2value "_matt = 0;" 277 | @?= 278 | [ Token T_ID (ID "_matt") 5 279 | , Token T_WS (WS " ") 1 280 | , Token T_EQ EQSIGN 1 281 | , Token T_WS (WS " ") 1 282 | , Token T_INT (INT 0) 1 283 | , Token T_SEMI SEMI 1 284 | , EOF 285 | ] 286 | 287 | lineCommentDFA = regex2dfa $ Concat [Literal "//", Kleene $ NotClass ['\n'], Symbol '\n'] 288 | 289 | lineCommentTest = 290 | tokenize [("LineComment", lineCommentDFA)] const 291 | "// This is a line comment.\n" 292 | @?= 293 | [ Token "LineComment" "// This is a line comment.\n" 27 294 | , EOF 295 | ] 296 | 297 | main :: IO () 298 | main = defaultMainWithOpts 299 | [ testCase "testValid0" testValid0 300 | , testCase "testClosureWith0" testClosureWith0 301 | , testCase "testClosureWith1" testClosureWith1 302 | , testCase "testClosureWith2" testClosureWith2 303 | , testCase "testClosureWith3" testClosureWith3 304 | , testCase "testMove0" testMove0 305 | , testCase "nfa2dfa0" nfa2dfa0 306 | , testCase "regexTest0" regexTest0 307 | , testCase "regexTest1" regexTest1 308 | , testCase "regexTestUnion" regexTestUnion 309 | , testCase "regexTestConcat" regexTestConcat 310 | , testCase "regexTestKleene" regexTestKleene 311 | , testCase "regexTestPosclos" regexTestPosclos 312 | , testCase "tokenizeTest0" tokenizeTest0 313 | , testCase "dfaIDTest" dfaIDTest 314 | , testCase "tokenizeTest1" tokenizeTest1 315 | , testCase "lineCommentTest" lineCommentTest 316 | ] mempty 317 | 318 | -------------------------------------------------------------------------------- /test/ll/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | module Main where 3 | import Example.Grammar 4 | import Text.ANTLR.Grammar 5 | import Text.ANTLR.Parser 6 | import Text.ANTLR.Pretty 7 | import qualified Data.Text as T 8 | import Text.ANTLR.LL1 9 | 10 | import Text.ANTLR.Set (fromList, union, empty, Set(..)) 11 | import qualified Text.ANTLR.Set as Set 12 | 13 | import qualified Data.Map.Strict as M 14 | 15 | import System.IO.Unsafe (unsafePerformIO) 16 | import Data.Monoid 17 | import Test.Framework 18 | import Test.Framework (defaultMainWithOpts) 19 | import Test.Framework.Providers.HUnit 20 | import Test.Framework.Providers.QuickCheck2 21 | import Test.HUnit hiding ((@?=), assertEqual) 22 | import Test.QuickCheck (Property, quickCheck, (==>)) 23 | import qualified Test.QuickCheck.Monadic as TQM 24 | 25 | import Text.ANTLR.HUnit 26 | 27 | type LL1NonTerminal = String 28 | type LL1Terminal = String 29 | 30 | uPIO = unsafePerformIO 31 | 32 | grm :: Grammar () LL1NonTerminal LL1Terminal () 33 | grm = dragonBook428 34 | 35 | termination = first grm [NT "E"] @?= first grm [NT "E"] 36 | 37 | firstF = first grm [NT "F"] @?= fromList [Icon "(", Icon "id"] 38 | 39 | noEps = first grm [NT "E"] @?= fromList [Icon "(", Icon "id"] 40 | 41 | firstT' = 42 | first grm [NT "T'"] 43 | @?= 44 | fromList [Icon "*", IconEps] 45 | 46 | foldEpsTest = foldWhileEpsilon union empty 47 | [ fromList [Icon "(", Icon "id"] 48 | , fromList [Icon ")"] 49 | ] 50 | @?= 51 | fromList [Icon "(", Icon "id"] 52 | 53 | firstAll = 54 | ( Set.map ((\nt -> (nt, first grm [nt])) . NT) (ns grm) 55 | `union` 56 | Set.map ((\t -> (t, first grm [t])) . T) (ts grm) 57 | ) 58 | @?= 59 | fromList 60 | [ (NT "E", fromList [Icon "(", Icon "id"]) 61 | , (NT "E'", fromList [Icon "+", IconEps]) 62 | , (NT "F", fromList [Icon "(", Icon "id"]) 63 | , (NT "T", fromList [Icon "(", Icon "id"]) 64 | , (NT "T'", fromList [Icon "*", IconEps]) 65 | , (T "(", fromList [Icon "("]) 66 | , (T ")", fromList [Icon ")"]) 67 | , (T "*", fromList [Icon "*"]) 68 | , (T "+", fromList [Icon "+"]) 69 | , (T "id", fromList [Icon "id"]) 70 | ] 71 | 72 | grm' :: Grammar () LL1NonTerminal LL1Terminal () 73 | grm' = grm 74 | 75 | followAll :: IO () 76 | followAll = let 77 | fncn :: LL1NonTerminal -> (ProdElem LL1NonTerminal LL1Terminal, Set (Icon LL1Terminal)) 78 | fncn nt = (NT nt, follow grm' nt) 79 | in Set.map fncn (ns grm') 80 | @?= 81 | fromList 82 | [ (NT "E", fromList [Icon ")", IconEOF]) 83 | , (NT "E'", fromList [Icon ")", IconEOF]) 84 | , (NT "T", fromList [Icon ")", Icon "+", IconEOF]) 85 | , (NT "T'", fromList [Icon ")", Icon "+", IconEOF]) 86 | , (NT "F", fromList [Icon ")", Icon "*", Icon "+", IconEOF]) 87 | ] 88 | 89 | parseTableTest = 90 | parseTable grm 91 | @?= 92 | M.fromList (map (\((a,b),c) -> ((a,b), Set.singleton c)) 93 | -- Figure 4.17 of dragon book: 94 | [ (("E", Icon "id"), [NT "T", NT "E'"]) 95 | , (("E", Icon "("), [NT "T", NT "E'"]) 96 | , (("E'", Icon "+"), [T "+", NT "T", NT "E'"]) 97 | , (("E'", Icon ")"), [Eps]) 98 | , (("E'", IconEOF), [Eps]) 99 | , (("T", Icon "id"), [NT "F", NT "T'"]) 100 | , (("T", Icon "("), [NT "F", NT "T'"]) 101 | , (("T'", Icon "+"), [Eps]) 102 | , (("T'", Icon "*"), [T "*", NT "F", NT "T'"]) 103 | , (("T'", Icon ")"), [Eps]) 104 | , (("T'", IconEOF), [Eps]) 105 | , (("F", Icon "id"), [T "id"]) 106 | , (("F", Icon "("), [T "(", NT "E", T ")"]) 107 | ]) 108 | 109 | type LLAST = AST LL1NonTerminal LL1Terminal 110 | 111 | action0 EpsE = LeafEps 112 | action0 (TermE t) = Leaf t 113 | action0 (NonTE (nt, ss, us)) = AST nt ss us 114 | 115 | action1 :: 116 | (Prettify t, Prettify (StripEOF (Sym t)), Prettify nts) 117 | => ParseEvent (AST nts t) nts t -> AST nts t 118 | action1 (NonTE (nt, ss, trees)) = uPIO (putStrLn $ T.unpack $ pshow ("Act:", nt, ss, trees)) `seq` action0 $ NonTE (nt,ss,trees) 119 | action1 (TermE x) = uPIO (putStrLn $ T.unpack $ pshow ("Act:", x)) `seq` action0 $ TermE x 120 | action1 EpsE = action0 EpsE 121 | 122 | dragonPredParse = 123 | predictiveParse grm action0 ["id", "+", "id", "*", "id", ""] 124 | @?= 125 | (Just $ AST "E" [NT "T", NT "E'"] 126 | [ AST "T" [NT "F", NT "T'"] 127 | [ AST "F" [T "id"] [Leaf "id"] 128 | , AST "T'" [Eps] [LeafEps] 129 | ] 130 | , AST "E'" [T "+", NT "T", NT "E'"] 131 | [ Leaf "+" 132 | , AST "T" [NT "F", NT "T'"] 133 | [ AST "F" [T "id"] [Leaf "id"] 134 | , AST "T'" [T "*", NT "F", NT "T'"] 135 | [ Leaf "*" 136 | , AST "F" [T "id"] [Leaf "id"] 137 | , AST "T'" [Eps] [LeafEps] 138 | ] 139 | ] 140 | , AST "E'" [Eps] [LeafEps] 141 | ] 142 | ]) 143 | 144 | singleLang :: Grammar () String Char () 145 | singleLang = (defaultGrammar "S" :: Grammar () String Char ()) 146 | { s0 = "S" 147 | , ns = fromList ["S", "X"] 148 | , ts = fromList ['a'] 149 | , ps = [ production "S" $ Prod Pass [NT "X", T 'a'] 150 | , production "X" $ Prod Pass [Eps] 151 | ] 152 | } 153 | 154 | testRemoveEpsilons = 155 | removeEpsilons singleLang 156 | @?= singleLang 157 | { ps = [ production "S" $ Prod Pass [NT "X", T 'a'] 158 | , production "S" $ Prod Pass [T 'a'] 159 | ] 160 | } 161 | 162 | singleLang2 :: Grammar () String Char () 163 | singleLang2 = singleLang 164 | { ts = fromList ['a', 'b'] 165 | , ps = [ production "S" $ Prod Pass [NT "X", T 'a', NT "X", T 'b', NT "X"] 166 | , production "X" $ Prod Pass [Eps] 167 | ] 168 | } 169 | 170 | testRemoveEpsilons2 = 171 | (Set.fromList . ps . removeEpsilons) singleLang2 172 | @?= 173 | fromList 174 | [ production "S" $ Prod Pass [ T 'a', T 'b' ] 175 | , production "S" $ Prod Pass [ T 'a', T 'b', NT "X"] 176 | , production "S" $ Prod Pass [ T 'a', NT "X", T 'b' ] 177 | , production "S" $ Prod Pass [ T 'a', NT "X", T 'b', NT "X"] 178 | , production "S" $ Prod Pass [NT "X", T 'a', T 'b' ] 179 | , production "S" $ Prod Pass [NT "X", T 'a', T 'b', NT "X"] 180 | , production "S" $ Prod Pass [NT "X", T 'a', NT "X", T 'b' ] 181 | , production "S" $ Prod Pass [NT "X", T 'a', NT "X", T 'b', NT "X"] 182 | ] 183 | 184 | testRemoveEpsilons3 = 185 | removeEpsilons dragonBook428 186 | @?= (defaultGrammar "E" :: Grammar () String String ()) 187 | { ns = fromList ["E", "E'", "T", "T'", "F"] 188 | , ts = fromList ["+", "*", "(", ")", "id"] 189 | , s0 = "E" 190 | , ps = [ production "E" $ Prod Pass [NT "T", NT "E'"] 191 | , production "E'" $ Prod Pass [T "+", NT "T", NT "E'"] 192 | , production "E'" $ Prod Pass [Eps] -- Implicitly epsilon 193 | , production "T" $ Prod Pass [NT "F", NT "T'"] 194 | , production "T'" $ Prod Pass [T "*", NT "F", NT "T'"] 195 | , production "T'" $ Prod Pass [Eps] 196 | , production "F" $ Prod Pass [T "(", NT "E", T ")"] 197 | , production "F" $ Prod Pass [T "id"] 198 | ] 199 | } 200 | 201 | leftGrammar0 :: Grammar () Char Char () 202 | leftGrammar0 = (defaultGrammar 'S' :: Grammar () Char Char ()) 203 | { ns = fromList "SABC" 204 | , ts = fromList "defg" 205 | , s0 = 'S' 206 | , ps = [ production 'S' $ Prod Pass [NT 'A'] 207 | , production 'A' $ Prod Pass [T 'd', T 'e', NT 'B'] 208 | , production 'A' $ Prod Pass [T 'd', T 'e', NT 'C'] 209 | , production 'B' $ Prod Pass [T 'f'] 210 | , production 'C' $ Prod Pass [T 'g'] 211 | ] 212 | } 213 | 214 | testLeftFactor = 215 | leftFactor leftGrammar0 216 | @?= G 217 | { ns = fromList $ map Prime [('S', 0), ('A', 0), ('B', 0), ('C', 0)] 218 | , ts = fromList "defg" 219 | , s0 = Prime ('S', 0) 220 | , ps = [ production (Prime ('S', 0)) $ Prod Pass [NT $ Prime ('A', 0)] 221 | , production (Prime ('A', 0)) $ Prod Pass [T 'd', T 'e', NT $ Prime ('A', 1)] 222 | , production (Prime ('A', 1)) $ Prod Pass [NT $ Prime ('B', 0)] 223 | , production (Prime ('A', 1)) $ Prod Pass [NT $ Prime ('C', 0)] 224 | , production (Prime ('B', 0)) $ Prod Pass [T 'f'] 225 | , production (Prime ('C', 0)) $ Prod Pass [T 'g'] 226 | ] 227 | , _πs = fromList [] 228 | , _μs = fromList [] 229 | } 230 | 231 | main :: IO () 232 | main = defaultMainWithOpts 233 | [ testCase "fold_epsilon" foldEpsTest 234 | , testCase "termination" termination 235 | , testCase "no_epsilon" noEps 236 | , testCase "firstF" firstF 237 | , testCase "firstT'" firstT' 238 | , testCase "firstAll" firstAll 239 | , testCase "followAll" followAll 240 | , testCase "dragonHasAllNonTerms" $ hasAllNonTerms grm @?= True 241 | , testCase "dragonHasAllTerms" $ hasAllTerms grm @?= True 242 | , testCase "dragonStartIsNonTerm" $ startIsNonTerm grm @?= True 243 | , testCase "dragonIsValid" $ validGrammar grm @?= True 244 | , testCase "dragonIsLL1" $ isLL1 grm @?= True 245 | , testCase "dragonParseTable" parseTableTest 246 | , testCase "dragonPredParse" dragonPredParse 247 | , testCase "testRemoveEpsilons" testRemoveEpsilons 248 | , testCase "testRemoveEpsilons2" testRemoveEpsilons2 249 | , testCase "testLeftFactor" testLeftFactor 250 | ] mempty 251 | 252 | -------------------------------------------------------------------------------- /test/lr/EOF.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module EOF where 6 | import Language.ANTLR4 7 | 8 | import System.IO.Unsafe (unsafePerformIO) 9 | import Data.Monoid 10 | import Test.Framework 11 | import Test.Framework.Providers.HUnit 12 | import Test.Framework.Providers.QuickCheck2 13 | import Test.HUnit 14 | import Test.QuickCheck (Property, quickCheck, (==>)) 15 | import qualified Test.QuickCheck.Monadic as TQM 16 | 17 | import Language.Haskell.TH.Syntax (lift) 18 | import Language.ANTLR4.Syntax (stripQuotesReadEscape) 19 | 20 | import EOFGrammar 21 | 22 | $(g4_parsers eOFAST eOFGrammar) 23 | 24 | test_eof = case glrParse (== T_WS) "anything that is not a comma" of 25 | (ResultAccept ast) -> ast2words ast @?= ["anything", "that", "is", "not", "a", "comma"] 26 | (ResultSet xs) -> assertFailure $ "Ambiguous parse: " ++ pshow' xs 27 | rest -> assertFailure $ stripQuotesReadEscape $ "\"" ++ pshow' rest ++ "\"" 28 | 29 | -------------------------------------------------------------------------------- /test/lr/EOFGrammar.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module EOFGrammar where 6 | import Language.ANTLR4 7 | 8 | data Plus = Plus String String | Minus String String 9 | deriving (Eq, Show) 10 | 11 | [g4| 12 | grammar EOF; 13 | 14 | words : word* ; 15 | 16 | word : WORD ; 17 | 18 | WORD : (~ [, ])* -> String; 19 | 20 | WS : [ \t\n\r\f\v]+ -> String; 21 | |] 22 | 23 | -------------------------------------------------------------------------------- /test/lr/GLRInc.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module GLRInc where 6 | import Language.ANTLR4 7 | 8 | import System.IO.Unsafe (unsafePerformIO) 9 | import Data.Monoid 10 | import Test.Framework 11 | import Test.Framework.Providers.HUnit 12 | import Test.Framework.Providers.QuickCheck2 13 | import Test.HUnit 14 | import Test.QuickCheck (Property, quickCheck, (==>)) 15 | import qualified Test.QuickCheck.Monadic as TQM 16 | 17 | import Language.Haskell.TH.Syntax (lift) 18 | import Language.ANTLR4.Syntax (stripQuotesReadEscape) 19 | 20 | import GLRIncGrammar 21 | 22 | $(g4_parsers gLRIncAST gLRIncGrammar) 23 | 24 | test_GLRInc = case glrParse (== T_WS) "word - foo" of 25 | (ResultAccept ast) -> ast2plus ast @?= Minus "word" "foo" 26 | (ResultSet xs) -> assertFailure $ "Ambiguous parse: " ++ pshow' xs 27 | rest -> assertFailure $ stripQuotesReadEscape $ "\"" ++ pshow' rest ++ "\"" 28 | 29 | -------------------------------------------------------------------------------- /test/lr/GLRIncGrammar.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module GLRIncGrammar where 6 | import Language.ANTLR4 7 | 8 | data Plus = Plus String String | Minus String String 9 | deriving (Eq, Show) 10 | 11 | [g4| 12 | grammar GLRInc; 13 | 14 | plus : LowerID '+' Prim -> Plus 15 | | Prim '-' LowerID -> Minus 16 | ; 17 | 18 | LowerID : [a-z][a-zA-Z0-9_]* -> String; 19 | Prim : 'word' -> String; 20 | 21 | WS : [ \t\n\r\f\v]+ -> String; 22 | |] 23 | 24 | -------------------------------------------------------------------------------- /test/lr/GLRPartial.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module GLRPartial where 6 | import Language.ANTLR4 7 | 8 | import System.IO.Unsafe (unsafePerformIO) 9 | import Data.Monoid 10 | import Test.Framework 11 | import Test.Framework.Providers.HUnit 12 | import Test.Framework.Providers.QuickCheck2 13 | import Test.HUnit 14 | import Test.QuickCheck (Property, quickCheck, (==>)) 15 | import qualified Test.QuickCheck.Monadic as TQM 16 | 17 | import Language.Haskell.TH.Syntax (lift) 18 | import qualified Text.ANTLR.LR as LR 19 | import Language.ANTLR4.Syntax (stripQuotesReadEscape) 20 | 21 | import GLRPartialGrammar 22 | 23 | $(g4_parsers gLRPartialAST gLRPartialGrammar) 24 | 25 | test_GLRPartial = case glrParse (== T_WS) "word\nword\nword\nw0rd" of 26 | (ResultAccept ast) -> assertFailure $ "Was not suppose to parse: " ++ pshow' (ast2words ast) 27 | (ResultSet xs) -> assertFailure $ "Ambiguous parse: " ++ pshow' xs 28 | (ErrorNoAction cfg asts _) -> return () -- Correct, should not have parsed 29 | rest -> assertFailure $ stripQuotesReadEscape $ "\"" ++ pshow' rest ++ "\"" 30 | 31 | -------------------------------------------------------------------------------- /test/lr/GLRPartialGrammar.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module GLRPartialGrammar where 6 | import Language.ANTLR4 7 | 8 | [g4| 9 | grammar GLRPartial; 10 | 11 | words : word+ ; 12 | 13 | word : Prim ; 14 | 15 | LowerID : [a-z][a-zA-Z0-9_]* -> String; 16 | Prim : 'word' -> String; 17 | 18 | WS : [ \t\n\r\f\v]+ -> String; 19 | |] 20 | 21 | -------------------------------------------------------------------------------- /test/sexpression/Grammar.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell #-} 4 | module Grammar where 5 | import Language.ANTLR4 6 | 7 | {- 8 | The MIT License 9 | Copyright (c) 2008 Robert Stehwien 10 | Permission is hereby granted, free of charge, to any person obtaining a copy 11 | of this software and associated documentation files (the "Software"), to deal 12 | in the Software without restriction, including without limitation the rights 13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | copies of the Software, and to permit persons to whom the Software is 15 | furnished to do so, subject to the following conditions: 16 | The above copyright notice and this permission notice shall be included in 17 | all copies or substantial portions of the Software. 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | 26 | Port to Antlr4 by Tom Everett 27 | Subsequent port to antlr-haskell by Karl Cronburg 28 | -} 29 | 30 | data Atom 31 | = Str String 32 | | Symb String 33 | | Number Double 34 | deriving (Eq, Ord, Show) 35 | 36 | data Item 37 | = Atm Atom 38 | | List [Item] 39 | | Field Item Item 40 | deriving (Eq, Ord, Show) 41 | 42 | [g4| 43 | grammar Sexpression; 44 | 45 | sexpr 46 | : item* 47 | ; 48 | 49 | item 50 | : atom -> ${\a -> Atm a} 51 | | list -> List 52 | | '(' item '.' item ')' -> Field 53 | ; 54 | 55 | list 56 | : '(' item* ')' 57 | ; 58 | 59 | atom 60 | : STRING -> Str 61 | | SYMBOL -> Symb 62 | | NUMBER -> Number 63 | ; 64 | 65 | STRING : ["] ( ('\\' .) | ~ ["\\] )* ["] -> String; 66 | 67 | WHITESPACE : [ \n\t\r]+ -> String; 68 | 69 | NUMBER : [-+]? DIGIT+ ([\.] DIGIT+)? -> Double; 70 | 71 | SYMBOL : SYMBOL_START (SYMBOL_START | DIGIT)* -> String; 72 | 73 | fragment SYMBOL_START : [a-zA-Z+\-*/] ; 74 | 75 | fragment DIGIT : [0-9] ; 76 | 77 | |] 78 | 79 | isWS T_WHITESPACE = True 80 | isWS _ = False 81 | 82 | -------------------------------------------------------------------------------- /test/sexpression/Parser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell #-} 4 | module Parser 5 | ( module Grammar 6 | , glrParse, ast2sexpr 7 | -- , glrParseFast 8 | ) where 9 | import Language.ANTLR4 10 | import Grammar 11 | 12 | --import qualified GHC.Types as G 13 | import qualified Text.ANTLR.LR as LR 14 | 15 | $(g4_parsers sexpressionAST sexpressionGrammar) 16 | 17 | -- $(mkLRParser the_ast sexpressionGrammar) 18 | 19 | -------------------------------------------------------------------------------- /test/sexpression/sexpression.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | import Language.ANTLR4 3 | import Parser 4 | import qualified Text.ANTLR.Set as S 5 | 6 | getAST (ResultAccept ast) = ast 7 | getAST _ = error "non-AST in ResultSet" 8 | 9 | main = 10 | case glrParse isWS "((m1lk ju1ce 3.1) . (h0ney marmalade \"jam\"))" of 11 | (ResultAccept ast) -> print $ ast2sexpr ast 12 | (ResultSet xs) -> mapM_ (print . ast2sexpr . getAST) (S.toList xs) 13 | 14 | -------------------------------------------------------------------------------- /test/shared-hunit/Text/ANTLR/HUnit.hs: -------------------------------------------------------------------------------- 1 | module Text.ANTLR.HUnit where 2 | import Control.DeepSeq 3 | import Control.Exception as E 4 | import Control.Monad 5 | import Data.List 6 | import Data.Typeable 7 | import Data.CallStack 8 | import Test.HUnit.Lang hiding (assertEqual, (@?=), (@=?)) 9 | 10 | import Text.ANTLR.Pretty 11 | import qualified Data.Text as T 12 | 13 | location :: HasCallStack => Maybe SrcLoc 14 | location = case reverse callStack of 15 | (_, loc) : _ -> Just loc 16 | [] -> Nothing 17 | 18 | -- | Asserts that the specified actual value is equal to the expected value. 19 | -- The output message will contain the prefix, the expected value, and the 20 | -- actual value. 21 | -- 22 | -- If the prefix is the empty string (i.e., @\"\"@), then the prefix is omitted 23 | -- and only the expected and actual values are output. 24 | assertEqual :: (HasCallStack, Eq a, Prettify a) 25 | => String -- ^ The message prefix 26 | -> a -- ^ The expected value 27 | -> a -- ^ The actual value 28 | -> Assertion 29 | assertEqual preface expected actual = 30 | unless (actual == expected) $ do 31 | (prefaceMsg `deepseq` expectedMsg `deepseq` actualMsg `deepseq` E.throwIO (HUnitFailure location $ ExpectedButGot prefaceMsg expectedMsg actualMsg)) 32 | where 33 | prefaceMsg 34 | | null preface = Nothing 35 | | otherwise = Just preface 36 | expectedMsg = '\n' : T.unpack (pshowIndent 4 expected) 37 | actualMsg = '\n' : T.unpack (pshowIndent 4 actual) 38 | 39 | -- | Asserts that the specified actual value is equal to the expected value 40 | -- (with the actual value on the left-hand side). 41 | (@?=) :: (HasCallStack, Eq a, Prettify a) 42 | => a -- ^ The actual value 43 | -> a -- ^ The expected value 44 | -> Assertion 45 | actual @?= expected = assertEqual "" expected actual 46 | 47 | (@=?) a b = (@?=) b a 48 | 49 | -------------------------------------------------------------------------------- /test/shared/Example/Grammar.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ExplicitForAll, DeriveAnyClass, DeriveGeneric, TypeFamilies 2 | , DeriveDataTypeable #-} 3 | module Example.Grammar where 4 | import Text.ANTLR.Set (fromList, member, (\\), empty, Generic(..), Hashable(..)) 5 | import Text.ANTLR.Grammar 6 | import Text.ANTLR.Pretty 7 | import Data.Data (toConstr, Data(..)) 8 | 9 | data NS0 = A | B | C deriving (Eq, Ord, Generic, Hashable, Bounded, Enum, Show, Data) 10 | data TS0 = A_ | B_ | C_ deriving (Eq, Ord, Generic, Hashable, Bounded, Enum, Show, Data) 11 | a = A_ 12 | b = B_ 13 | c = C_ 14 | 15 | -- TODO: boilerplate identity type classes for bounded enums 16 | instance Ref NS0 where 17 | type Sym NS0 = NS0 18 | getSymbol = id 19 | instance Ref TS0 where 20 | type Sym TS0 = TS0 21 | getSymbol = id 22 | instance Prettify NS0 where prettify = rshow . toConstr 23 | instance Prettify TS0 where prettify = rshow . toConstr 24 | dG :: Grammar () NS0 TS0 () 25 | dG = defaultGrammar C 26 | 27 | production :: nts -> (ProdRHS s nts ts) -> Production s nts ts dt 28 | production lhs rhs = Production lhs rhs Nothing 29 | 30 | mattToolG :: Grammar () NS0 TS0 () 31 | mattToolG = dG 32 | { ns = fromList [A, B, C] 33 | , ts = fromList [a, b, c] 34 | , s0 = C 35 | , ps = 36 | [ production A $ Prod Pass [T a, T b] 37 | , production A $ Prod Pass [T a] 38 | , production B $ Prod Pass [NT A, T b] 39 | , production B $ Prod Pass [T b] 40 | , production C $ Prod Pass [NT A, NT B, NT C] 41 | ] 42 | } 43 | 44 | dG' :: Grammar () String String () 45 | dG' = defaultGrammar "A" 46 | 47 | dragonBook428 :: Grammar () String String () 48 | dragonBook428 = dG' 49 | { ns = fromList ["E", "E'", "T", "T'", "F"] 50 | , ts = fromList ["+", "*", "(", ")", "id"] 51 | , s0 = "E" 52 | , ps = [ production "E" $ Prod Pass [NT "T", NT "E'"] 53 | , production "E'" $ Prod Pass [T "+", NT "T", NT "E'"] 54 | , production "E'" $ Prod Pass [Eps] -- Implicitly epsilon 55 | , production "T" $ Prod Pass [NT "F", NT "T'"] 56 | , production "T'" $ Prod Pass [T "*", NT "F", NT "T'"] 57 | , production "T'" $ Prod Pass [Eps] 58 | , production "F" $ Prod Pass [T "(", NT "E", T ")"] 59 | , production "F" $ Prod Pass [T "id"] 60 | ] 61 | } 62 | 63 | dragonBook41 :: Grammar () String String () 64 | dragonBook41 = dG' 65 | { ns = fromList ["E'", "E", "T", "F"] 66 | , ts = fromList ["+", "*", "(", ")", "id"] 67 | , s0 = "E" 68 | , ps = [ production "E" $ Prod Pass [NT "E", T "+", NT "T"] 69 | , production "E" $ Prod Pass [NT "T"] 70 | , production "T" $ Prod Pass [NT "T", T "*", NT "F"] 71 | , production "T" $ Prod Pass [NT "F"] 72 | , production "F" $ Prod Pass [T "(", NT "E", T ")"] 73 | , production "F" $ Prod Pass [T "id"] 74 | ] 75 | } 76 | 77 | dragonBook455 :: Grammar () String String () 78 | dragonBook455 = dG' 79 | { ns = fromList ["S", "C"] 80 | , ts = fromList ["c", "d"] 81 | , s0 = "S" 82 | , ps = [ production "S" $ Prod Pass [NT "C", NT "C"] 83 | , production "C" $ Prod Pass [T "c", NT "C"] 84 | , production "C" $ Prod Pass [T "d"] 85 | ] 86 | } 87 | 88 | dumbGrammar :: Grammar () String String () 89 | dumbGrammar = dG' 90 | { ns = fromList ["S", "A", "B", "I", "D"] 91 | , ts = fromList ["1","2","3","+","-","*"] 92 | , s0 = "S" 93 | , ps = [ production "S" $ Prod Pass [NT "A"] 94 | , production "S" $ Prod Pass [NT "B"] 95 | , production "S" $ Prod Pass [NT "D"] 96 | , production "A" $ Prod Pass [NT "I", T "+", NT "I"] 97 | , production "B" $ Prod Pass [NT "I", T "-", NT "I"] 98 | , production "I" $ Prod Pass [T "1"] 99 | , production "I" $ Prod Pass [T "2"] 100 | , production "I" $ Prod Pass [T "3"] 101 | , production "D" $ Prod Pass [NT "I", T "*", NT "I"] 102 | ] 103 | --, us = [(\_ -> True)] 104 | } 105 | 106 | -------------------------------------------------------------------------------- /test/simple/Grammar.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module Grammar where 6 | import Language.ANTLR4 7 | 8 | import System.IO.Unsafe (unsafePerformIO) 9 | import Data.Monoid 10 | import Test.Framework 11 | import Test.Framework.Providers.HUnit 12 | import Test.Framework.Providers.QuickCheck2 13 | import Test.HUnit 14 | import Test.QuickCheck (Property, quickCheck, (==>)) 15 | import qualified Test.QuickCheck.Monadic as TQM 16 | 17 | import Language.Haskell.TH.Syntax (lift) 18 | 19 | data Attr = A | B 20 | 21 | data Decl = Foo | Bar 22 | 23 | [g4| 24 | grammar Simple; 25 | 26 | attrDecl : attr* decl ; 27 | 28 | attrDecl2 : attr? decl ; 29 | 30 | attrDecl3 : attr+ decl ; 31 | 32 | attr : 'a' ';' -> A 33 | | 'b' ';' -> B 34 | ; 35 | 36 | decl : 'foo' -> Foo 37 | | 'bar' -> Bar 38 | ; 39 | 40 | UNICODE : '\u0008' -> String ; 41 | 42 | |] 43 | 44 | -------------------------------------------------------------------------------- /test/simple/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module Main where 6 | import Language.ANTLR4 7 | 8 | import System.IO.Unsafe (unsafePerformIO) 9 | import Data.Monoid 10 | import Test.Framework 11 | import Test.Framework.Providers.HUnit 12 | import Test.Framework.Providers.QuickCheck2 13 | import Test.HUnit 14 | import Test.QuickCheck (Property, quickCheck, (==>)) 15 | import qualified Test.QuickCheck.Monadic as TQM 16 | 17 | import Language.Haskell.TH.Syntax (lift) 18 | import qualified Text.ANTLR.LR as LR 19 | 20 | import Grammar 21 | 22 | --foo = [ $(lift $ LR.lr1Table simpleGrammar) ] 23 | 24 | --test_star = foo @?= [] 25 | test_star = () @?= () 26 | 27 | main :: IO () 28 | main = defaultMainWithOpts 29 | [ testCase "test_star" test_star 30 | ] mempty 31 | 32 | -------------------------------------------------------------------------------- /test/swift/Parser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, TemplateHaskell #-} 4 | module Parser 5 | ( module Grammar 6 | , glrParse, ast2sexpr 7 | ) where 8 | import Language.ANTLR4 9 | import Grammar 10 | 11 | import qualified Text.ANTLR.LR as LR 12 | 13 | $(g4_parsers swiftAST swiftGrammar) 14 | -- $(mkLRParser the_ast sexpressionGrammar) 15 | 16 | -------------------------------------------------------------------------------- /test/swift/swift.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | import Language.ANTLR4 3 | import Parser 4 | import qualified Text.ANTLR.Set as S 5 | 6 | getAST (ResultAccept ast) = ast 7 | getAST _ = error "non-AST in ResultSet" 8 | 9 | main = 10 | case glrParse isWS "var i = 0;" of 11 | (ResultAccept ast) -> print $ ast2topLevel ast 12 | (ResultSet xs) -> mapM_ (print . ast2topLevel . getAST) (S.toList xs) 13 | 14 | -------------------------------------------------------------------------------- /test/template/Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import System.IO.Unsafe (unsafePerformIO) 4 | import Data.Monoid 5 | import Test.Framework 6 | import Test.Framework.Providers.HUnit 7 | import Test.Framework.Providers.QuickCheck2 8 | import Test.HUnit 9 | import Test.QuickCheck (Property, quickCheck, (==>)) 10 | import qualified Test.QuickCheck.Monadic as TQM 11 | 12 | main :: IO () 13 | main = defaultMainWithOpts 14 | [ 15 | ] mempty 16 | 17 | -------------------------------------------------------------------------------- /test/unit/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module Main where 6 | import Language.ANTLR4 7 | 8 | import System.IO.Unsafe (unsafePerformIO) 9 | import Data.Monoid 10 | import Test.Framework 11 | import Test.Framework.Providers.HUnit 12 | import Test.Framework.Providers.QuickCheck2 13 | import Test.HUnit 14 | import Test.QuickCheck (Property, quickCheck, (==>)) 15 | import qualified Test.QuickCheck.Monadic as TQM 16 | 17 | import Language.Haskell.TH.Syntax (lift) 18 | import qualified Text.ANTLR.LR as LR 19 | 20 | import PlusBug0 21 | $(g4_parsers plusBug0Grammar plusBug0AST) 22 | 23 | test_plusBug0 = case glrParse (== T_WS) "foo bar baz" of 24 | (ResultAccept ast) -> ast2plus ast @?= Plus [ "foo", "bar", "baz" ] 25 | _ -> assertFailure "Ambiguous parse" 26 | 27 | main :: IO () 28 | main = defaultMainWithOpts 29 | [ testCase "test_plusBug0" test_plusBug0 30 | ] mempty 31 | 32 | -------------------------------------------------------------------------------- /test/unit/PlusBug0.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module PlusBug0 where 6 | import Language.ANTLR4 7 | 8 | data Plus = Plus [String] | NotPlus [String] 9 | deriving (Eq, Show) 10 | 11 | [g4| 12 | grammar PlusBug0; 13 | 14 | plus : LowerID+ -> Plus ; 15 | 16 | LowerID : [a-z][a-zA-Z0-9_]* -> String; 17 | WS : [ \t\n\r\f\v]+ -> String; 18 | |] 19 | 20 | -------------------------------------------------------------------------------- /test/unit0/DupTerms.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module DupTerms where 6 | import Language.ANTLR4 7 | 8 | import System.IO.Unsafe (unsafePerformIO) 9 | import Data.Monoid 10 | import Test.Framework 11 | import Test.Framework.Providers.HUnit 12 | import Test.Framework.Providers.QuickCheck2 13 | import Test.HUnit 14 | import Test.QuickCheck (Property, quickCheck, (==>)) 15 | import qualified Test.QuickCheck.Monadic as TQM 16 | 17 | import Language.Haskell.TH.Syntax (lift) 18 | import qualified Text.ANTLR.LR as LR 19 | 20 | import DupTermsGrammar 21 | 22 | $(g4_parsers dupTermsAST dupTermsGrammar) 23 | 24 | test_dup_terms = case glrParse (== T_WS) "(" of 25 | (ResultAccept ast) -> ast2justParen ast @?= 3 26 | rest -> assertFailure $ "Did not parse: " ++ pshow' rest 27 | 28 | -------------------------------------------------------------------------------- /test/unit0/DupTermsGrammar.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module DupTermsGrammar where 6 | import Language.ANTLR4 7 | 8 | doesThisFire :: Int 9 | doesThisFire = 3 10 | 11 | orDoesThisFire :: String -> Int 12 | orDoesThisFire _ = 4 13 | 14 | $( return [] ) 15 | 16 | [g4| 17 | grammar DupTerms; 18 | 19 | justParen : '(' -> doesThisFire ; 20 | 21 | LParen : '(' -> orDoesThisFire ; 22 | 23 | WS : [ \t\n\r\f\v]+ -> String; 24 | |] 25 | 26 | -------------------------------------------------------------------------------- /test/unit0/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module Main where 6 | import Language.ANTLR4 7 | 8 | import System.IO.Unsafe (unsafePerformIO) 9 | import Data.Monoid 10 | import Test.Framework 11 | import Test.Framework.Providers.HUnit 12 | import Test.Framework.Providers.QuickCheck2 13 | import Test.HUnit 14 | import Test.QuickCheck (Property, quickCheck, (==>)) 15 | import qualified Test.QuickCheck.Monadic as TQM 16 | 17 | import Language.Haskell.TH.Syntax (lift) 18 | import qualified Text.ANTLR.LR as LR 19 | 20 | import Star0 21 | import Star1 22 | import DupTerms 23 | 24 | main :: IO () 25 | main = defaultMainWithOpts 26 | [ testCase "test_star0" test_star0 27 | , testCase "test_star1" test_star1 28 | , testCase "duplicate_terminals" test_dup_terms 29 | ] mempty 30 | 31 | -------------------------------------------------------------------------------- /test/unit0/Star0.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module Star0 where 6 | import Language.ANTLR4 7 | 8 | import System.IO.Unsafe (unsafePerformIO) 9 | import Data.Monoid 10 | import Test.Framework 11 | import Test.Framework.Providers.HUnit 12 | import Test.Framework.Providers.QuickCheck2 13 | import Test.HUnit 14 | import Test.QuickCheck (Property, quickCheck, (==>)) 15 | import qualified Test.QuickCheck.Monadic as TQM 16 | 17 | import Language.Haskell.TH.Syntax (lift) 18 | import qualified Text.ANTLR.LR as LR 19 | 20 | import Star0Grammar 21 | 22 | $(g4_parsers star0AST star0Grammar) 23 | 24 | test_star0 = case glrParse (== T_WS) "page page" of 25 | (ResultAccept ast) -> ast2words ast @?= ["page", "page"] 26 | rest -> assertFailure $ "Did not parse: " ++ pshow' rest 27 | 28 | -------------------------------------------------------------------------------- /test/unit0/Star0Grammar.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module Star0Grammar where 6 | import Language.ANTLR4 7 | 8 | [g4| 9 | grammar Star0; 10 | 11 | words : page* -> ${\ps -> ps} ; 12 | 13 | page : Page ; 14 | 15 | Page : 'page' -> String ; 16 | 17 | WS : [ \t\n\r\f\v]+ -> String; 18 | |] 19 | 20 | -------------------------------------------------------------------------------- /test/unit0/Star1.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module Star1 where 6 | import Language.ANTLR4 7 | 8 | import System.IO.Unsafe (unsafePerformIO) 9 | import Data.Monoid 10 | import Test.Framework 11 | import Test.Framework.Providers.HUnit 12 | import Test.Framework.Providers.QuickCheck2 13 | import Test.HUnit 14 | import Test.QuickCheck (Property, quickCheck, (==>)) 15 | import qualified Test.QuickCheck.Monadic as TQM 16 | 17 | import Language.Haskell.TH.Syntax (lift) 18 | import qualified Text.ANTLR.LR as LR 19 | import Debug.Trace as D 20 | 21 | import Star1Grammar 22 | 23 | $(g4_parsers star1AST star1Grammar) 24 | 25 | test_star1 = D.trace (pshow' star1Grammar) $ 26 | case glrParse (== T_WS) "me page you { byte, byte }" of 27 | (ResultAccept ast) -> ast2words ast @?= Frst (Yep ("me", "page")) [] [Byte, Byte] 28 | rest -> assertFailure $ "Did not parse: " ++ pshow' rest 29 | 30 | -------------------------------------------------------------------------------- /test/unit0/Star1Grammar.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes 2 | , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances 3 | , FlexibleInstances, UndecidableInstances, FlexibleContexts, TemplateHaskell 4 | , DeriveDataTypeable #-} 5 | module Star1Grammar where 6 | import Language.ANTLR4 7 | 8 | data MyMaybe x = Nope | Yep x 9 | deriving (Eq, Ord, Show) 10 | 11 | data Mem = Byte 12 | deriving (Eq, Ord, Show) 13 | 14 | data Words = 15 | Frst (MyMaybe (String, String)) [String] [Mem] 16 | | Snd 17 | | Thrd (MyMaybe (String, String)) 18 | deriving (Eq, Ord, Show) 19 | 20 | [g4| 21 | grammar Star1; 22 | 23 | words : me 'you' page* '{' bytes '}' -> Frst 24 | | me -> Thrd 25 | | 'woops' -> Snd 26 | ; 27 | 28 | bytes : 'byte' -> ${ [Byte] } 29 | | bytes ',' 'byte' -> ${\bs -> Byte : bs} 30 | ; 31 | 32 | me : me2? -> ${\m -> case m of Nothing -> Nope ; Just x -> Yep x}; 33 | me2 : me3 ; 34 | me3 : Me page ; 35 | Me : 'me' -> String ; 36 | 37 | page : Page ; 38 | Page : 'page' -> String ; 39 | 40 | WS : [ \t\n\r\f\v]+ -> String; 41 | |] 42 | 43 | --------------------------------------------------------------------------------